AstGen: implement integers bigger than u64

also get rid of the `optional_type_from_ptr_elem` instruction.
2026-02-13 12:59:04 +00:00 · 2021-04-22 23:47:31 -07:00 · 2021-04-22 23:47:31 -07:00 · 715abe8ebe
commit 715abe8ebe
parent 2d290d6f82
4 changed files with 95 additions and 66 deletions
--- a/src/AstGen.zig
+++ b/src/AstGen.zig
@ -1735,6 +1735,7 @@ fn unusedResultExpr(gz: *GenZir, scope: *Scope, statement: ast.Node.Index) Inner
            .func,
            .func_inferred,
            .int,
+            .int_big,
            .float,
            .float128,
            .intcast,
@ -1762,7 +1763,6 @@ fn unusedResultExpr(gz: *GenZir, scope: *Scope, statement: ast.Node.Index) Inner
            .typeof_elem,
            .xor,
            .optional_type,
-            .optional_type_from_ptr_elem,
            .optional_payload_safe,
            .optional_payload_unsafe,
            .optional_payload_safe_ptr,
@ -3874,18 +3874,9 @@ fn orelseCatchExpr(
    block_scope.setBreakResultLoc(rl);
    defer block_scope.instructions.deinit(astgen.gpa);

-    // TODO get rid of optional_type_from_ptr_elem
    const operand_rl: ResultLoc = switch (block_scope.break_result_loc) {
        .ref => .ref,
-        .discard, .none, .none_or_ref, .block_ptr, .inferred_ptr => .none,
-        .ty => |elem_ty| blk: {
-            const wrapped_ty = try block_scope.addUnNode(.optional_type, elem_ty, node);
-            break :blk .{ .ty = wrapped_ty };
-        },
-        .ptr => |ptr_ty| blk: {
-            const wrapped_ty = try block_scope.addUnNode(.optional_type_from_ptr_elem, ptr_ty, node);
-            break :blk .{ .ty = wrapped_ty };
-        },
+        else => .none,
    };
    block_scope.break_count += 1;
    // This could be a pointer or value depending on the `operand_rl` parameter.
@ -5755,10 +5746,37 @@ fn integerLiteral(
            else => try gz.addInt(small_int),
        };
        return rvalue(gz, scope, rl, result, node);
-    } else |err| {
-        assert(err != error.InvalidCharacter);
-        return gz.astgen.failNode(node, "TODO implement int literals that don't fit in a u64", .{});
+    } else |err| switch (err) {
+        error.InvalidCharacter => unreachable, // Caught by the parser.
+        error.Overflow => {},
    }
+
+    var base: u8 = 10;
+    var non_prefixed: []const u8 = prefixed_bytes;
+    if (mem.startsWith(u8, prefixed_bytes, "0x")) {
+        base = 16;
+        non_prefixed = prefixed_bytes[2..];
+    } else if (mem.startsWith(u8, prefixed_bytes, "0o")) {
+        base = 8;
+        non_prefixed = prefixed_bytes[2..];
+    } else if (mem.startsWith(u8, prefixed_bytes, "0b")) {
+        base = 2;
+        non_prefixed = prefixed_bytes[2..];
+    }
+
+    const gpa = astgen.gpa;
+    var big_int = try std.math.big.int.Managed.init(gpa);
+    defer big_int.deinit();
+    big_int.setString(base, non_prefixed) catch |err| switch (err) {
+        error.InvalidCharacter => unreachable, // caught by parser
+        error.InvalidBase => unreachable, // we only pass 16, 8, 2, see above
+        error.OutOfMemory => return error.OutOfMemory,
+    };
+
+    const limbs = big_int.limbs[0..big_int.len()];
+    assert(big_int.isPositive());
+    const result = try gz.addIntBig(limbs);
+    return rvalue(gz, scope, rl, result, node);
 }

 fn floatLiteral(
--- a/src/Module.zig
+++ b/src/Module.zig
@ -1423,6 +1423,26 @@ pub const Scope = struct {
            });
        }

+        pub fn addIntBig(gz: *GenZir, limbs: []const std.math.big.Limb) !Zir.Inst.Ref {
+            const astgen = gz.astgen;
+            const gpa = astgen.gpa;
+            try gz.instructions.ensureUnusedCapacity(gpa, 1);
+            try astgen.instructions.ensureUnusedCapacity(gpa, 1);
+            try astgen.string_bytes.ensureUnusedCapacity(gpa, @sizeOf(std.math.big.Limb) * limbs.len);
+
+            const new_index = @intCast(Zir.Inst.Index, astgen.instructions.len);
+            astgen.instructions.appendAssumeCapacity(.{
+                .tag = .int_big,
+                .data = .{ .str = .{
+                    .start = @intCast(u32, astgen.string_bytes.items.len),
+                    .len = @intCast(u32, limbs.len),
+                } },
+            });
+            gz.instructions.appendAssumeCapacity(new_index);
+            astgen.string_bytes.appendSliceAssumeCapacity(mem.sliceAsBytes(limbs));
+            return gz.indexToRef(new_index);
+        }
+
        pub fn addFloat(gz: *GenZir, number: f32, src_node: ast.Node.Index) !Zir.Inst.Ref {
            return gz.add(.{
                .tag = .float,
@ -1683,22 +1703,6 @@ pub const Scope = struct {
            return gz.indexToRef(new_index);
        }

-        /// Asserts that `str` is 8 or fewer bytes.
-        pub fn addSmallStr(
-            gz: *GenZir,
-            tag: Zir.Inst.Tag,
-            str: []const u8,
-        ) !Zir.Inst.Ref {
-            var buf: [9]u8 = undefined;
-            mem.copy(u8, &buf, str);
-            buf[str.len] = 0;
-
-            return gz.add(.{
-                .tag = tag,
-                .data = .{ .small_str = .{ .bytes = buf[0..8].* } },
-            });
-        }
-
        /// Note that this returns a `Zir.Inst.Index` not a ref.
        /// Does *not* append the block instruction to the scope.
        /// Leaves the `payload_index` field undefined.
--- a/src/Sema.zig
+++ b/src/Sema.zig
@ -200,6 +200,7 @@ pub fn analyzeBody(
            .import                       => try sema.zirImport(block, inst),
            .indexable_ptr_len            => try sema.zirIndexablePtrLen(block, inst),
            .int                          => try sema.zirInt(block, inst),
+            .int_big                      => try sema.zirIntBig(block, inst),
            .float                        => try sema.zirFloat(block, inst),
            .float128                     => try sema.zirFloat128(block, inst),
            .int_type                     => try sema.zirIntType(block, inst),
@ -219,7 +220,6 @@ pub fn analyzeBody(
            .optional_payload_unsafe      => try sema.zirOptionalPayload(block, inst, false),
            .optional_payload_unsafe_ptr  => try sema.zirOptionalPayloadPtr(block, inst, false),
            .optional_type                => try sema.zirOptionalType(block, inst),
-            .optional_type_from_ptr_elem  => try sema.zirOptionalTypeFromPtrElem(block, inst),
            .param_type                   => try sema.zirParamType(block, inst),
            .ptr_type                     => try sema.zirPtrType(block, inst),
            .ptr_type_simple              => try sema.zirPtrTypeSimple(block, inst),
@ -1479,6 +1479,23 @@ fn zirInt(sema: *Sema, block: *Scope.Block, inst: Zir.Inst.Index) InnerError!*In
    return sema.mod.constIntUnsigned(sema.arena, .unneeded, Type.initTag(.comptime_int), int);
 }

+fn zirIntBig(sema: *Sema, block: *Scope.Block, inst: Zir.Inst.Index) InnerError!*Inst {
+    const tracy = trace(@src());
+    defer tracy.end();
+
+    const arena = sema.arena;
+    const int = sema.code.instructions.items(.data)[inst].str;
+    const byte_count = int.len * @sizeOf(std.math.big.Limb);
+    const limb_bytes = sema.code.string_bytes[int.start..][0..byte_count];
+    const limbs = try arena.alloc(std.math.big.Limb, int.len);
+    mem.copy(u8, mem.sliceAsBytes(limbs), limb_bytes);
+
+    return sema.mod.constInst(arena, .unneeded, .{
+        .ty = Type.initTag(.comptime_int),
+        .val = try Value.Tag.int_big_positive.create(arena, limbs),
+    });
+}
+
 fn zirFloat(sema: *Sema, block: *Scope.Block, inst: Zir.Inst.Index) InnerError!*Inst {
    const arena = sema.arena;
    const inst_data = sema.code.instructions.items(.data)[inst].float;
@ -2120,18 +2137,6 @@ fn zirOptionalType(sema: *Sema, block: *Scope.Block, inst: Zir.Inst.Index) Inner
    return sema.mod.constType(sema.arena, src, opt_type);
 }

-fn zirOptionalTypeFromPtrElem(sema: *Sema, block: *Scope.Block, inst: Zir.Inst.Index) InnerError!*Inst {
-    const tracy = trace(@src());
-    defer tracy.end();
-
-    const inst_data = sema.code.instructions.items(.data)[inst].un_node;
-    const ptr = try sema.resolveInst(inst_data.operand);
-    const elem_ty = ptr.ty.elemType();
-    const opt_ty = try sema.mod.optionalType(sema.arena, elem_ty);
-
-    return sema.mod.constType(sema.arena, inst_data.src(), opt_ty);
-}
-
 fn zirElemType(sema: *Sema, block: *Scope.Block, inst: Zir.Inst.Index) InnerError!*Inst {
    const inst_data = sema.code.instructions.items(.data)[inst].un_node;
    const src = inst_data.src();
--- a/src/Zir.zig
+++ b/src/Zir.zig
@ -374,8 +374,10 @@ pub const Inst = struct {
        /// Implements the `@import` builtin.
        /// Uses the `str_tok` field.
        import,
-        /// Integer literal that fits in a u64. Uses the int union value.
+        /// Integer literal that fits in a u64. Uses the `int` union field.
        int,
+        /// Arbitrary sized integer literal. Uses the `str` union field.
+        int_big,
        /// A float literal that fits in a f32. Uses the float union value.
        float,
        /// A float literal that fits in a f128. Uses the `pl_node` union value.
@ -540,10 +542,6 @@ pub const Inst = struct {
        /// Create an optional type '?T'
        /// Uses the `un_node` field.
        optional_type,
-        /// Create an optional type '?T'. The operand is a pointer value. The optional type will
-        /// be the type of the pointer element, wrapped in an optional.
-        /// Uses the `un_node` field.
-        optional_type_from_ptr_elem,
        /// ?T => T with safety.
        /// Given an optional value, returns the payload value, with a safety check that
        /// the value is non-null. Used for `orelse`, `if` and `while`.
@ -1030,6 +1028,7 @@ pub const Inst = struct {
                .func_inferred,
                .has_decl,
                .int,
+                .int_big,
                .float,
                .float128,
                .intcast,
@ -1061,7 +1060,6 @@ pub const Inst = struct {
                .typeof_elem,
                .xor,
                .optional_type,
-                .optional_type_from_ptr_elem,
                .optional_payload_safe,
                .optional_payload_unsafe,
                .optional_payload_safe_ptr,
@ -1700,17 +1698,6 @@ pub const Inst = struct {
                return code.string_bytes[self.start..][0..self.len];
            }
        },
-        /// Strings 8 or fewer bytes which may not contain null bytes.
-        small_str: struct {
-            bytes: [8]u8,
-
-            pub fn get(self: @This()) []const u8 {
-                const end = for (self.bytes) |byte, i| {
-                    if (byte == 0) break i;
-                } else self.bytes.len;
-                return self.bytes[0..end];
-            }
-        },
        str_tok: struct {
            /// Offset into `string_bytes`. Null-terminated.
            start: u32,
@ -2324,7 +2311,6 @@ const Writer = struct {
            .ret_node,
            .resolve_inferred_alloc,
            .optional_type,
-            .optional_type_from_ptr_elem,
            .optional_payload_safe,
            .optional_payload_unsafe,
            .optional_payload_safe_ptr,
@ -2405,6 +2391,7 @@ const Writer = struct {
            .ptr_type_simple => try self.writePtrTypeSimple(stream, inst),
            .ptr_type => try self.writePtrType(stream, inst),
            .int => try self.writeInt(stream, inst),
+            .int_big => try self.writeIntBig(stream, inst),
            .float => try self.writeFloat(stream, inst),
            .float128 => try self.writeFloat128(stream, inst),
            .str => try self.writeStr(stream, inst),
@ -2710,15 +2697,30 @@ const Writer = struct {
        try stream.writeAll("TODO)");
    }

-    fn writeInt(
-        self: *Writer,
-        stream: anytype,
-        inst: Inst.Index,
-    ) (@TypeOf(stream).Error || error{OutOfMemory})!void {
+    fn writeInt(self: *Writer, stream: anytype, inst: Inst.Index) !void {
        const inst_data = self.code.instructions.items(.data)[inst].int;
        try stream.print("{d})", .{inst_data});
    }

+    fn writeIntBig(self: *Writer, stream: anytype, inst: Inst.Index) !void {
+        const inst_data = self.code.instructions.items(.data)[inst].str;
+        const byte_count = inst_data.len * @sizeOf(std.math.big.Limb);
+        const limb_bytes = self.code.string_bytes[inst_data.start..][0..byte_count];
+        // limb_bytes is not aligned properly; we must allocate and copy the bytes
+        // in order to accomplish this.
+        const limbs = try self.gpa.alloc(std.math.big.Limb, inst_data.len);
+        defer self.gpa.free(limbs);
+
+        mem.copy(u8, mem.sliceAsBytes(limbs), limb_bytes);
+        const big_int: std.math.big.int.Const = .{
+            .limbs = limbs,
+            .positive = true,
+        };
+        const as_string = try big_int.toStringAlloc(self.gpa, 10, false);
+        defer self.gpa.free(as_string);
+        try stream.print("{s})", .{as_string});
+    }
+
    fn writeFloat(self: *Writer, stream: anytype, inst: Inst.Index) !void {
        const inst_data = self.code.instructions.items(.data)[inst].float;
        const src = inst_data.src();