riscv: totally rewrite how we do loads and stores

this commit is a little too large to document fully, however the main gist of it this - finish the `genInlineMemcpy` implement - rename `setValue` to `genCopy` as I agree with jacob that it's a better name - add in `genVarDbgInfo` for a better gdb experience - follow the x86_64's method for genCall, as the procedure is very similar for us - add `airSliceLen` as it's trivial - change up the `airAddWithOverflow implementation a bit - make sure to not spill of the elem_ty is 0 size - correctly follow the RISC-V calling convention and spill the used calle saved registers in the prologue and restore them in the epilogue - add `address`, `deref`, and `offset` helper functions for MCValue. I must say I love these, they make the code very readable and super verbose :) - fix a `register_manager.zig` issue where when using the last register in the set, the value would overflow at comptime. was happening because we were adding to `max_id` before subtracting from it.
2026-02-21 16:54:52 +00:00 · 2024-03-28 15:59:28 -07:00 · 2024-03-28 15:59:28 -07:00 · 350ad90cee
commit 350ad90cee
parent cbf62bd6dc
7 changed files with 748 additions and 311 deletions
--- a/lib/std/builtin.zig
+++ b/lib/std/builtin.zig
@ -775,14 +775,14 @@ pub fn default_panic(msg: []const u8, error_return_trace: ?*StackTrace, ret_addr
    }

    if (builtin.zig_backend == .stage2_riscv64) {
-        asm volatile ("ecall"
-            :
-            : [number] "{a7}" (64),
-              [arg1] "{a0}" (1),
-              [arg2] "{a1}" (@intFromPtr(msg.ptr)),
-              [arg3] "{a2}" (msg.len),
-            : "rcx", "r11", "memory"
-        );
+        // asm volatile ("ecall"
+        //     :
+        //     : [number] "{a7}" (64),
+        //       [arg1] "{a0}" (1),
+        //       [arg2] "{a1}" (@intFromPtr(msg.ptr)),
+        //       [arg3] "{a2}" (msg.len),
+        //     : "rcx", "r11", "memory"
+        // );
        std.posix.exit(127);
    }

--- a/src/arch/riscv64/CodeGen.zig
+++ b/src/arch/riscv64/CodeGen.zig
--- a/src/arch/riscv64/Emit.zig
+++ b/src/arch/riscv64/Emit.zig
@ -1,19 +1,6 @@
 //! This file contains the functionality for lowering RISCV64 MIR into
 //! machine code

-const Emit = @This();
-const std = @import("std");
-const math = std.math;
-const Mir = @import("Mir.zig");
-const bits = @import("bits.zig");
-const link = @import("../../link.zig");
-const Module = @import("../../Module.zig");
-const ErrorMsg = Module.ErrorMsg;
-const assert = std.debug.assert;
-const Instruction = bits.Instruction;
-const Register = bits.Register;
-const DebugInfoOutput = @import("../../codegen.zig").DebugInfoOutput;
-
 mir: Mir,
 bin_file: *link.File,
 debug_output: DebugInfoOutput,
@ -22,12 +9,17 @@ err_msg: ?*ErrorMsg = null,
 src_loc: Module.SrcLoc,
 code: *std.ArrayList(u8),

+/// List of registers to save in the prologue.
+save_reg_list: Mir.RegisterList,
+
 prev_di_line: u32,
 prev_di_column: u32,
 /// Relative to the beginning of `code`.
 prev_di_pc: usize,
+
 /// Function's stack size. Used for backpatching.
 stack_size: u32,
+
 /// For backward branches: stores the code offset of the target
 /// instruction
 ///
@ -212,7 +204,7 @@ fn mirRType(emit: *Emit, inst: Mir.Inst.Index) !void {
            // rs1 != rs2

            try emit.writeInstruction(Instruction.xor(rd, rs1, rs2));
-            try emit.writeInstruction(Instruction.sltu(rd, .x0, rd)); // snez
+            try emit.writeInstruction(Instruction.sltu(rd, .zero, rd)); // snez
        },
        .cmp_lt => {
            // rd = 1 if rs1 < rs2
@ -368,17 +360,20 @@ fn mirPsuedo(emit: *Emit, inst: Mir.Inst.Index) !void {
                return emit.fail("TODO: mirPsuedo support larger stack sizes", .{});
            };

-            // Decrement sp by num s registers + local var space
+            // Decrement sp by (num s registers * 8) + local var space
            try emit.writeInstruction(Instruction.addi(.sp, .sp, -stack_size));

            // Spill ra
-            try emit.writeInstruction(Instruction.sd(.ra, stack_size - 8, .sp));
+            try emit.writeInstruction(Instruction.sd(.ra, 0, .sp));

-            // Spill s0
-            try emit.writeInstruction(Instruction.sd(.s0, stack_size - 16, .sp));
-
-            // Setup s0
-            try emit.writeInstruction(Instruction.addi(.s0, .sp, stack_size));
+            // Spill callee saved registers.
+            var s_reg_iter = emit.save_reg_list.iterator(.{});
+            var i: i12 = 8;
+            while (s_reg_iter.next()) |reg_i| {
+                const reg = abi.callee_preserved_regs[reg_i];
+                try emit.writeInstruction(Instruction.sd(reg, i, .sp));
+                i += 8;
+            }
        },
        .psuedo_epilogue => {
            const stack_size: i12 = math.cast(i12, emit.stack_size) orelse {
@ -386,10 +381,16 @@ fn mirPsuedo(emit: *Emit, inst: Mir.Inst.Index) !void {
            };

            // Restore ra
-            try emit.writeInstruction(Instruction.ld(.ra, stack_size - 8, .sp));
+            try emit.writeInstruction(Instruction.ld(.ra, 0, .sp));

-            // Restore s0
-            try emit.writeInstruction(Instruction.ld(.s0, stack_size - 16, .sp));
+            // Restore spilled callee saved registers
+            var s_reg_iter = emit.save_reg_list.iterator(.{});
+            var i: i12 = 8;
+            while (s_reg_iter.next()) |reg_i| {
+                const reg = abi.callee_preserved_regs[reg_i];
+                try emit.writeInstruction(Instruction.ld(reg, i, .sp));
+                i += 8;
+            }

            // Increment sp back to previous value
            try emit.writeInstruction(Instruction.addi(.sp, .sp, stack_size));
@ -408,8 +409,11 @@ fn mirRR(emit: *Emit, inst: Mir.Inst.Index) !void {
    const tag = emit.mir.instructions.items(.tag)[inst];
    const rr = emit.mir.instructions.items(.data)[inst].rr;

+    const rd = rr.rd;
+    const rs = rr.rs;
+
    switch (tag) {
-        .mv => try emit.writeInstruction(Instruction.addi(rr.rd, rr.rs, 0)),
+        .mv => try emit.writeInstruction(Instruction.addi(rd, rs, 0)),
        else => unreachable,
    }
 }
@ -435,7 +439,6 @@ fn mirNop(emit: *Emit, inst: Mir.Inst.Index) !void {
 }

 fn mirLoadSymbol(emit: *Emit, inst: Mir.Inst.Index) !void {
-    // const tag = emit.mir.instructions.items(.tag)[inst];
    const payload = emit.mir.instructions.items(.data)[inst].payload;
    const data = emit.mir.extraData(Mir.LoadSymbolPayload, payload).data;
    const reg = @as(Register, @enumFromInt(data.register));
@ -523,20 +526,19 @@ fn instructionSize(emit: *Emit, inst: Mir.Inst.Index) usize {
        .dbg_prologue_end,
        => 0,

-        .psuedo_prologue,
-        => 16,
-
-        .psuedo_epilogue,
-        .abs,
-        => 12,
-
        .cmp_eq,
        .cmp_neq,
        .cmp_imm_eq,
        .cmp_gte,
        .load_symbol,
+        .abs,
        => 8,

+        .psuedo_epilogue, .psuedo_prologue => size: {
+            const count = emit.save_reg_list.count() * 4;
+            break :size count + 8;
+        },
+
        else => 4,
    };
 }
@ -547,25 +549,17 @@ fn lowerMir(emit: *Emit) !void {
    const mir_tags = emit.mir.instructions.items(.tag);
    const mir_datas = emit.mir.instructions.items(.data);

+    const proglogue_size: u32 = @intCast(emit.save_reg_list.size());
+    emit.stack_size += proglogue_size;
+
    for (mir_tags, 0..) |tag, index| {
        const inst: u32 = @intCast(index);

        if (isStore(tag) or isLoad(tag)) {
            const data = mir_datas[inst].i_type;
-            // TODO: probably create a psuedo instruction for s0 loads/stores instead of this.
-            if (data.rs1 == .s0) {
+            if (data.rs1 == .sp) {
                const offset = mir_datas[inst].i_type.imm12;
-
-                // sp + 32 (aka s0)
-                // ra -- previous ra spilled
-                // s0 -- previous s0 spilled
-                // --- this is -16(s0)
-
-                // TODO: this "+ 8" is completely arbiratary as the largest possible store
-                // we don't want to actually use it. instead we need to calculate the difference
-                // between the first and second stack store and use it instead.
-
-                mir_datas[inst].i_type.imm12 = -(16 + offset + 8);
+                mir_datas[inst].i_type.imm12 = offset + @as(i12, @intCast(proglogue_size)) + 8;
            }
        }

@ -584,3 +578,17 @@ fn lowerMir(emit: *Emit) !void {
        current_code_offset += emit.instructionSize(inst);
    }
 }
+
+const Emit = @This();
+const std = @import("std");
+const math = std.math;
+const Mir = @import("Mir.zig");
+const bits = @import("bits.zig");
+const abi = @import("abi.zig");
+const link = @import("../../link.zig");
+const Module = @import("../../Module.zig");
+const ErrorMsg = Module.ErrorMsg;
+const assert = std.debug.assert;
+const Instruction = bits.Instruction;
+const Register = bits.Register;
+const DebugInfoOutput = @import("../../codegen.zig").DebugInfoOutput;
--- a/src/arch/riscv64/Mir.zig
+++ b/src/arch/riscv64/Mir.zig
@ -6,14 +6,6 @@
 //! The main purpose of MIR is to postpone the assignment of offsets until Isel,
 //! so that, for example, the smaller encodings of jump instructions can be used.

-const Mir = @This();
-const std = @import("std");
-const builtin = @import("builtin");
-const assert = std.debug.assert;
-
-const bits = @import("bits.zig");
-const Register = bits.Register;
-
 instructions: std.MultiArrayList(Inst).Slice,
 /// The meaning of this data is determined by `Inst.Tag` value.
 extra: []const u32,
@ -58,7 +50,7 @@ pub const Inst = struct {
        /// Jumps. Uses `inst` payload.
        j,

-        /// Immediate and, uses i_type payload
+        /// Immediate AND, uses i_type payload
        andi,

        // NOTE: Maybe create a special data for compares that includes the ops
@ -219,15 +211,6 @@ pub const Inst = struct {
        },
    };

-    const CompareOp = enum {
-        eq,
-        neq,
-        gt,
-        gte,
-        lt,
-        lte,
-    };
-
    // Make sure we don't accidentally make instructions bigger than expected.
    // Note that in Debug builds, Zig is allowed to insert a secret field for safety checks.
    // comptime {
@ -268,3 +251,49 @@ pub const LoadSymbolPayload = struct {
    atom_index: u32,
    sym_index: u32,
 };
+
+/// Used in conjunction with payload to transfer a list of used registers in a compact manner.
+pub const RegisterList = struct {
+    bitset: BitSet = BitSet.initEmpty(),
+
+    const BitSet = IntegerBitSet(32);
+    const Self = @This();
+
+    fn getIndexForReg(registers: []const Register, reg: Register) BitSet.MaskInt {
+        for (registers, 0..) |cpreg, i| {
+            if (reg.id() == cpreg.id()) return @intCast(i);
+        }
+        unreachable; // register not in input register list!
+    }
+
+    pub fn push(self: *Self, registers: []const Register, reg: Register) void {
+        const index = getIndexForReg(registers, reg);
+        self.bitset.set(index);
+    }
+
+    pub fn isSet(self: Self, registers: []const Register, reg: Register) bool {
+        const index = getIndexForReg(registers, reg);
+        return self.bitset.isSet(index);
+    }
+
+    pub fn iterator(self: Self, comptime options: std.bit_set.IteratorOptions) BitSet.Iterator(options) {
+        return self.bitset.iterator(options);
+    }
+
+    pub fn count(self: Self) u32 {
+        return @intCast(self.bitset.count());
+    }
+
+    pub fn size(self: Self) u32 {
+        return @intCast(self.bitset.count() * 8);
+    }
+};
+
+const Mir = @This();
+const std = @import("std");
+const builtin = @import("builtin");
+const assert = std.debug.assert;
+
+const bits = @import("bits.zig");
+const Register = bits.Register;
+const IntegerBitSet = std.bit_set.IntegerBitSet;
--- a/src/arch/riscv64/abi.zig
+++ b/src/arch/riscv64/abi.zig
@ -92,15 +92,18 @@ pub fn classifyType(ty: Type, mod: *Module) Class {
 }

 pub const callee_preserved_regs = [_]Register{
-    // NOTE: we use s0 as a psuedo stack pointer, so it's not included.
-    .s1, .s2, .s3, .s4, .s5, .s6, .s7, .s8, .s9, .s10, .s11,
+    .s0, .s1, .s2, .s3, .s4, .s5, .s6, .s7, .s8, .s9, .s10, .s11,
 };

 pub const function_arg_regs = [_]Register{
    .a0, .a1, .a2, .a3, .a4, .a5, .a6, .a7,
 };

-const allocatable_registers = callee_preserved_regs ++ function_arg_regs;
+pub const temporary_regs = [_]Register{
+    .t0, .t1, .t2, .t3, .t4, .t5, .t6,
+};
+
+const allocatable_registers = callee_preserved_regs ++ function_arg_regs ++ temporary_regs;
 pub const RegisterManager = RegisterManagerFn(@import("CodeGen.zig"), Register, &allocatable_registers);

 // Register classes
@ -123,4 +126,13 @@ pub const RegisterClass = struct {
        }, true);
        break :blk set;
    };
+
+    pub const tp: RegisterBitSet = blk: {
+        var set = RegisterBitSet.initEmpty();
+        set.setRangeValue(.{
+            .start = callee_preserved_regs.len + function_arg_regs.len,
+            .end = callee_preserved_regs.len + function_arg_regs.len + temporary_regs.len,
+        }, true);
+        break :blk set;
+    };
 };
--- a/src/arch/riscv64/bits.zig
+++ b/src/arch/riscv64/bits.zig
@ -404,14 +404,14 @@ pub const Register = enum(u6) {
    t3, t4, t5, t6, // caller saved
    // zig fmt: on

-    /// Returns the unique 4-bit ID of this register which is used in
+    /// Returns the unique 5-bit ID of this register which is used in
    /// the machine code
    pub fn id(self: Register) u5 {
        return @as(u5, @truncate(@intFromEnum(self)));
    }

    pub fn dwarfLocOp(reg: Register) u8 {
-        return @as(u8, reg.id()) + DW.OP.reg0;
+        return @as(u8, reg.id());
    }
 };

--- a/src/register_manager.zig
+++ b/src/register_manager.zig
@ -102,7 +102,7 @@ pub fn RegisterManager(
            }

            const OptionalIndex = std.math.IntFittingRange(0, set.len);
-            comptime var map = [1]OptionalIndex{set.len} ** (max_id + 1 - min_id);
+            comptime var map = [1]OptionalIndex{set.len} ** (max_id - min_id + 1);
            inline for (set, 0..) |elem, elem_index| map[comptime elem.id() - min_id] = elem_index;

            const id_index = reg.id() -% min_id;