stage2: Move BlockData out of ir.Inst.Block

2026-02-11 03:51:08 +00:00 · 2021-05-09 15:20:08 +08:00 · 2021-05-09 15:20:08 +08:00 · e8236551ab
commit e8236551ab
parent 1c636e2564
5 changed files with 62 additions and 63 deletions
--- a/src/air.zig
+++ b/src/air.zig
@ -395,8 +395,6 @@ pub const Inst = struct {

        base: Inst,
        body: Body,
-        /// This memory is reserved for codegen code to do whatever it needs to here.
-        codegen: codegen.BlockData = .{},

        pub fn operandCount(self: *const Block) usize {
            return 0;
--- a/src/codegen.zig
+++ b/src/codegen.zig
@ -22,37 +22,6 @@ const RegisterManager = @import("register_manager.zig").RegisterManager;

 const X8664Encoder = @import("codegen/x86_64.zig").Encoder;

-/// The codegen-related data that is stored in `ir.Inst.Block` instructions.
-pub const BlockData = struct {
-    relocs: std.ArrayListUnmanaged(Reloc) = undefined,
-    /// The first break instruction encounters `null` here and chooses a
-    /// machine code value for the block result, populating this field.
-    /// Following break instructions encounter that value and use it for
-    /// the location to store their block results.
-    mcv: AnyMCValue = undefined,
-};
-
-/// Architecture-independent MCValue. Here, we have a type that is the same size as
-/// the architecture-specific MCValue. Next to the declaration of MCValue is a
-/// comptime assert that makes sure we guessed correctly about the size. This only
-/// exists so that we can bitcast an arch-independent field to and from the real MCValue.
-pub const AnyMCValue = extern struct {
-    a: usize,
-    b: u64,
-};
-
-pub const Reloc = union(enum) {
-    /// The value is an offset into the `Function` `code` from the beginning.
-    /// To perform the reloc, write 32-bit signed little-endian integer
-    /// which is a relative jump, based on the address following the reloc.
-    rel32: usize,
-    /// A branch in the ARM instruction set
-    arm_branch: struct {
-        pos: usize,
-        cond: @import("codegen/arm.zig").Condition,
-    },
-};
-
 pub const Result = union(enum) {
    /// The `code` parameter passed to `generateSymbol` has the value appended.
    appended: void,
@ -317,6 +286,8 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
        /// across each runtime branch upon joining.
        branch_stack: *std.ArrayList(Branch),

+        blocks: std.AutoHashMapUnmanaged(*ir.Inst.Block, BlockData) = .{},
+
        register_manager: RegisterManager(Self, Register, &callee_preserved_regs) = .{},
        /// Maps offset to what is stored there.
        stack: std.AutoHashMapUnmanaged(u32, StackAllocation) = .{},
@ -415,6 +386,27 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
            size: u32,
        };

+        const BlockData = struct {
+            relocs: std.ArrayListUnmanaged(Reloc),
+            /// The first break instruction encounters `null` here and chooses a
+            /// machine code value for the block result, populating this field.
+            /// Following break instructions encounter that value and use it for
+            /// the location to store their block results.
+            mcv: MCValue,
+        };
+
+        const Reloc = union(enum) {
+            /// The value is an offset into the `Function` `code` from the beginning.
+            /// To perform the reloc, write 32-bit signed little-endian integer
+            /// which is a relative jump, based on the address following the reloc.
+            rel32: usize,
+            /// A branch in the ARM instruction set
+            arm_branch: struct {
+                pos: usize,
+                cond: @import("codegen/arm.zig").Condition,
+            },
+        };
+
        const Self = @This();

        fn generateSymbol(
@ -463,6 +455,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
                .end_di_column = module_fn.rbrace_column,
            };
            defer function.stack.deinit(bin_file.allocator);
+            defer function.blocks.deinit(bin_file.allocator);
            defer function.exitlude_jump_relocs.deinit(bin_file.allocator);

            var call_info = function.resolveCallingConventionValues(src_loc.lazy, fn_type) catch |err| switch (err) {
@ -3025,7 +3018,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
        }

        fn genBlock(self: *Self, inst: *ir.Inst.Block) !MCValue {
-            inst.codegen = .{
+            try self.blocks.putNoClobber(self.gpa, inst, .{
                // A block is a setup to be able to jump to the end.
                .relocs = .{},
                // It also acts as a receptical for break operands.
@ -3033,15 +3026,16 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
                // break instruction will choose a MCValue for the block result and overwrite
                // this field. Following break instructions will use that MCValue to put their
                // block results.
-                .mcv = @bitCast(AnyMCValue, MCValue{ .none = {} }),
-            };
-            defer inst.codegen.relocs.deinit(self.gpa);
+                .mcv = MCValue{ .none = {} },
+            });
+            const block_data = &self.blocks.getEntry(inst).?.value;
+            defer block_data.relocs.deinit(self.gpa);

            try self.genBody(inst.body);

-            for (inst.codegen.relocs.items) |reloc| try self.performReloc(inst.base.src, reloc);
+            for (block_data.relocs.items) |reloc| try self.performReloc(inst.base.src, reloc);

-            return @bitCast(MCValue, inst.codegen.mcv);
+            return @bitCast(MCValue, block_data.mcv);
        }

        fn genSwitch(self: *Self, inst: *ir.Inst.SwitchBr) !MCValue {
@ -3115,11 +3109,13 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
        }

        fn br(self: *Self, src: LazySrcLoc, block: *ir.Inst.Block, operand: *ir.Inst) !MCValue {
+            const block_data = &self.blocks.getEntry(block).?.value;
+
            if (operand.ty.hasCodeGenBits()) {
                const operand_mcv = try self.resolveInst(operand);
-                const block_mcv = @bitCast(MCValue, block.codegen.mcv);
+                const block_mcv = block_data.mcv;
                if (block_mcv == .none) {
-                    block.codegen.mcv = @bitCast(AnyMCValue, operand_mcv);
+                    block_data.mcv = operand_mcv;
                } else {
                    try self.setRegOrMem(src, block.base.ty, block_mcv, operand_mcv);
                }
@ -3128,8 +3124,10 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
        }

        fn brVoid(self: *Self, src: LazySrcLoc, block: *ir.Inst.Block) !MCValue {
+            const block_data = &self.blocks.getEntry(block).?.value;
+
            // Emit a jump with a relocation. It will be patched up after the block ends.
-            try block.codegen.relocs.ensureCapacity(self.gpa, block.codegen.relocs.items.len + 1);
+            try block_data.relocs.ensureCapacity(self.gpa, block_data.relocs.items.len + 1);

            switch (arch) {
                .i386, .x86_64 => {
@ -3138,11 +3136,11 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
                    try self.code.resize(self.code.items.len + 5);
                    self.code.items[self.code.items.len - 5] = 0xe9; // jmp rel32
                    // Leave the jump offset undefined
-                    block.codegen.relocs.appendAssumeCapacity(.{ .rel32 = self.code.items.len - 4 });
+                    block_data.relocs.appendAssumeCapacity(.{ .rel32 = self.code.items.len - 4 });
                },
                .arm, .armeb => {
                    try self.code.resize(self.code.items.len + 4);
-                    block.codegen.relocs.appendAssumeCapacity(.{
+                    block_data.relocs.appendAssumeCapacity(.{
                        .arm_branch = .{
                            .pos = self.code.items.len - 4,
                            .cond = .al,
--- a/src/codegen/c.zig
+++ b/src/codegen/c.zig
@ -33,6 +33,11 @@ pub const CValue = union(enum) {
    decl_ref: *Decl,
 };

+const BlockData = struct {
+    block_id: usize,
+    result: CValue,
+};
+
 pub const CValueMap = std.AutoHashMap(*Inst, CValue);
 pub const TypedefMap = std.HashMap(Type, struct { name: []const u8, rendered: []u8 }, Type.hash, Type.eql, std.hash_map.default_max_load_percentage);

@ -83,6 +88,7 @@ pub const Object = struct {
    gpa: *mem.Allocator,
    code: std.ArrayList(u8),
    value_map: CValueMap,
+    blocks: std.AutoHashMapUnmanaged(*ir.Inst.Block, BlockData) = .{},
    next_arg_index: usize = 0,
    next_local_index: usize = 0,
    next_block_index: usize = 0,
@ -939,8 +945,6 @@ fn genBlock(o: *Object, inst: *Inst.Block) !CValue {
    o.next_block_index += 1;
    const writer = o.writer();

-    // store the block id in relocs.capacity as it is not  used for anything else in the C backend.
-    inst.codegen.relocs.capacity = block_id;
    const result = if (inst.base.ty.tag() != .void and !inst.base.isUnused()) blk: {
        // allocate a location for the result
        const local = try o.allocLocal(inst.base.ty, .Mut);
@ -948,7 +952,11 @@ fn genBlock(o: *Object, inst: *Inst.Block) !CValue {
        break :blk local;
    } else CValue{ .none = {} };

-    inst.codegen.mcv = @bitCast(@import("../codegen.zig").AnyMCValue, result);
+    try o.blocks.putNoClobber(o.gpa, inst, .{
+        .block_id = block_id,
+        .result = result,
+    });
+
    try genBody(o, inst.body);
    try o.indent_writer.insertNewline();
    // label must be followed by an expression, add an empty one.
@ -957,7 +965,7 @@ fn genBlock(o: *Object, inst: *Inst.Block) !CValue {
 }

 fn genBr(o: *Object, inst: *Inst.Br) !CValue {
-    const result = @bitCast(CValue, inst.block.codegen.mcv);
+    const result = o.blocks.get(inst.block).?.result;
    const writer = o.writer();

    // If result is .none then the value of the block is unused.
@ -973,7 +981,7 @@ fn genBr(o: *Object, inst: *Inst.Br) !CValue {
 }

 fn genBrVoid(o: *Object, block: *Inst.Block) !CValue {
-    try o.writer().print("goto zig_block_{d};\n", .{block.codegen.relocs.capacity});
+    try o.writer().print("goto zig_block_{d};\n", .{o.blocks.get(block).?.block_id});
    return CValue.none;
 }

--- a/src/codegen/wasm.zig
+++ b/src/codegen/wasm.zig
@ -14,7 +14,6 @@ const Inst = ir.Inst;
 const Type = @import("../type.zig").Type;
 const Value = @import("../value.zig").Value;
 const Compilation = @import("../Compilation.zig");
-const AnyMCValue = @import("../codegen.zig").AnyMCValue;
 const LazySrcLoc = Module.LazySrcLoc;
 const link = @import("../link.zig");
 const TypedValue = @import("../TypedValue.zig");
@ -29,8 +28,6 @@ const WValue = union(enum) {
    constant: *Inst,
    /// Offset position in the list of bytecode instructions
    code_offset: usize,
-    /// The label of the block, used by breaks to find its relative distance
-    block_idx: u32,
    /// Used for variables that create multiple locals on the stack when allocated
    /// such as structs and optionals.
    multi_value: u32,
@ -492,6 +489,8 @@ pub const Context = struct {
    gpa: *mem.Allocator,
    /// Table to save `WValue`'s generated by an `Inst`
    values: ValueTable,
+    /// Mapping from *Inst.Block to block ids
+    blocks: std.AutoArrayHashMapUnmanaged(*Inst.Block, u32) = .{},
    /// `bytes` contains the wasm bytecode belonging to the 'code' section.
    code: ArrayList(u8),
    /// Contains the generated function type bytecode for the current function
@ -521,6 +520,7 @@ pub const Context = struct {

    pub fn deinit(self: *Context) void {
        self.values.deinit(self.gpa);
+        self.blocks.deinit(self.gpa);
        self.locals.deinit(self.gpa);
        self.* = undefined;
    }
@ -590,7 +590,6 @@ pub const Context = struct {
    fn emitWValue(self: *Context, val: WValue) InnerError!void {
        const writer = self.code.writer();
        switch (val) {
-            .block_idx => unreachable, // block_idx cannot be referenced
            .multi_value => unreachable, // multi_value can never be written directly, and must be accessed individually
            .none, .code_offset => {}, // no-op
            .local => |idx| {
@ -968,13 +967,9 @@ pub const Context = struct {
        const block_ty = try self.genBlockType(block.base.src, block.base.ty);

        try self.startBlock(.block, block_ty, null);
-        block.codegen = .{
-            // we don't use relocs, so using `relocs` is illegal behaviour.
-            .relocs = undefined,
-            // Here we set the current block idx, so breaks know the depth to jump
-            // to when breaking out.
-            .mcv = @bitCast(AnyMCValue, WValue{ .block_idx = self.block_depth }),
-        };
+        // Here we set the current block idx, so breaks know the depth to jump
+        // to when breaking out.
+        try self.blocks.putNoClobber(self.gpa, block, self.block_depth);
        try self.genBody(block.body);
        try self.endBlock();

@ -1091,10 +1086,9 @@ pub const Context = struct {
            try self.emitWValue(operand);
        }

-        // every block contains a `WValue` with its block index.
+        // We map every block to its block index.
        // We then determine how far we have to jump to it by substracting it from current block depth
-        const wvalue = @bitCast(WValue, br.block.codegen.mcv);
-        const idx: u32 = self.block_depth - wvalue.block_idx;
+        const idx: u32 = self.block_depth - self.blocks.get(br.block).?;
        const writer = self.code.writer();
        try writer.writeByte(wasm.opcode(.br));
        try leb.writeULEB128(writer, idx);
--- a/src/link/C.zig
+++ b/src/link/C.zig
@ -125,6 +125,7 @@ pub fn updateDecl(self: *C, module: *Module, decl: *Module.Decl) !void {
    object.indent_writer = .{ .underlying_writer = object.code.writer() };
    defer {
        object.value_map.deinit();
+        object.blocks.deinit(module.gpa);
        object.code.deinit();
        object.dg.fwd_decl.deinit();
        var it = object.dg.typedefs.iterator();