From 423bef4dfc635a3ca0144cac95384984857a8519 Mon Sep 17 00:00:00 2001 From: joachimschmidt557 Date: Fri, 29 Jul 2022 00:55:00 +0200 Subject: [PATCH 1/8] stage2 AArch64: Fix struct_field_val for register_with_overflow Now mirrors the behavior of the native ARM backend --- src/arch/aarch64/CodeGen.zig | 38 +++++++++++++++++------------------- 1 file changed, 18 insertions(+), 20 deletions(-) diff --git a/src/arch/aarch64/CodeGen.zig b/src/arch/aarch64/CodeGen.zig index a8bafee4f8..b68ae283b5 100644 --- a/src/arch/aarch64/CodeGen.zig +++ b/src/arch/aarch64/CodeGen.zig @@ -3016,29 +3016,27 @@ fn airStructFieldVal(self: *Self, inst: Air.Inst.Index) !void { break :result MCValue{ .memory = addr + struct_field_offset }; }, .register_with_overflow => |rwo| { - switch (index) { - 0 => { - // get wrapped value: return register - break :result MCValue{ .register = rwo.reg }; - }, - 1 => { - // TODO return special MCValue condition flags - // get overflow bit: set register to C flag - // resp. V flag - const raw_dest_reg = try self.register_manager.allocReg(null, gp); - const dest_reg = raw_dest_reg.to32(); + const reg_lock = self.register_manager.lockRegAssumeUnused(rwo.reg); + defer self.register_manager.unlockReg(reg_lock); - _ = try self.addInst(.{ - .tag = .cset, - .data = .{ .r_cond = .{ - .rd = dest_reg, - .cond = rwo.flag, - } }, - }); + const field: MCValue = switch (index) { + // get wrapped value: return register + 0 => MCValue{ .register = rwo.reg }, + + // get overflow bit: return C or V flag + 1 => MCValue{ .condition_flags = rwo.flag }, - break :result MCValue{ .register = dest_reg }; - }, else => unreachable, + }; + + if (self.reuseOperand(inst, operand, 0, field)) { + break :result field; + } else { + // Copy to new register + const dest_reg = try self.register_manager.allocReg(null, gp); + try self.genSetReg(struct_ty.structFieldType(index), dest_reg, field); + + break :result MCValue{ .register = dest_reg }; } }, else => return self.fail("TODO implement codegen struct_field_val for {}", .{mcv}), From cf3aaceed9f2a9e1872bdd8b2cccecd1766e2419 Mon Sep 17 00:00:00 2001 From: joachimschmidt557 Date: Sat, 30 Jul 2022 23:17:15 +0200 Subject: [PATCH 2/8] stage2 AArch64: introduce MCValue.stack_argument_offset This new MCValue union member shares the same semantics as the MCValue type of the same name in the ARM backend. --- src/arch/aarch64/CodeGen.zig | 210 +++++++++++++++++++++++++++++++++-- src/arch/aarch64/Emit.zig | 64 +++++++++++ src/arch/aarch64/Mir.zig | 10 ++ 3 files changed, 275 insertions(+), 9 deletions(-) diff --git a/src/arch/aarch64/CodeGen.zig b/src/arch/aarch64/CodeGen.zig index b68ae283b5..63be9a2220 100644 --- a/src/arch/aarch64/CodeGen.zig +++ b/src/arch/aarch64/CodeGen.zig @@ -166,10 +166,12 @@ const MCValue = union(enum) { /// the type is u1) or true (if the type in bool) iff the /// specified condition is true. condition_flags: Condition, + /// The value is a function argument passed via the stack. + stack_argument_offset: u32, fn isMemory(mcv: MCValue) bool { return switch (mcv) { - .memory, .stack_offset => true, + .memory, .stack_offset, .stack_argument_offset => true, else => false, }; } @@ -192,6 +194,7 @@ const MCValue = union(enum) { .condition_flags, .ptr_stack_offset, .undef, + .stack_argument_offset, => false, .register, @@ -337,6 +340,7 @@ pub fn generate( .prev_di_line = module_fn.lbrace_line, .prev_di_column = module_fn.lbrace_column, .stack_size = mem.alignForwardGeneric(u32, function.max_end_stack, function.stack_align), + .prologue_stack_space = call_info.stack_byte_count + function.saved_regs_stack_space, }; defer emit.deinit(); @@ -2726,6 +2730,7 @@ fn load(self: *Self, dst_mcv: MCValue, ptr: MCValue, ptr_ty: Type) InnerError!vo }, .memory, .stack_offset, + .stack_argument_offset, .got_load, .direct_load, => { @@ -2927,6 +2932,7 @@ fn store(self: *Self, ptr: MCValue, value: MCValue, ptr_ty: Type, value_ty: Type }, .memory, .stack_offset, + .stack_argument_offset, .got_load, .direct_load, => { @@ -3009,6 +3015,9 @@ fn airStructFieldVal(self: *Self, inst: Air.Inst.Index) !void { switch (mcv) { .dead, .unreach => unreachable, + .stack_argument_offset => |off| { + break :result MCValue{ .stack_argument_offset = off - struct_field_offset }; + }, .stack_offset => |off| { break :result MCValue{ .stack_offset = off - struct_field_offset }; }, @@ -3152,12 +3161,12 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallOptions. try self.register_manager.getReg(reg, null); try self.genSetReg(arg_ty, reg, arg_mcv); }, - .stack_offset => { - return self.fail("TODO implement calling with parameters in memory", .{}); - }, - .ptr_stack_offset => { - return self.fail("TODO implement calling with MCValue.ptr_stack_offset arg", .{}); - }, + .stack_offset => unreachable, + .stack_argument_offset => |offset| try self.genSetStackArgument( + arg_ty, + info.stack_byte_count - offset, + arg_mcv, + ), else => unreachable, } } @@ -3884,7 +3893,7 @@ fn br(self: *Self, block: Air.Inst.Index, operand: Air.Inst.Ref) !void { block_data.mcv = switch (operand_mcv) { .none, .dead, .unreach => unreachable, .register, .stack_offset, .memory => operand_mcv, - .immediate, .condition_flags => blk: { + .immediate, .stack_argument_offset, .condition_flags => blk: { const new_mcv = try self.allocRegOrMem(block, true); try self.setRegOrMem(self.air.typeOfIndex(block), new_mcv, operand_mcv); break :blk new_mcv; @@ -4126,6 +4135,7 @@ fn genSetStack(self: *Self, ty: Type, stack_offset: u32, mcv: MCValue) InnerErro .got_load, .direct_load, .memory, + .stack_argument_offset, .stack_offset, => { switch (mcv) { @@ -4328,6 +4338,188 @@ fn genSetReg(self: *Self, ty: Type, reg: Register, mcv: MCValue) InnerError!void else => unreachable, } }, + .stack_argument_offset => |off| { + const abi_size = ty.abiSize(self.target.*); + + switch (abi_size) { + 1, 2, 4, 8 => { + const tag: Mir.Inst.Tag = switch (abi_size) { + 1 => if (ty.isSignedInt()) Mir.Inst.Tag.ldrsb_stack_argument else .ldrb_stack_argument, + 2 => if (ty.isSignedInt()) Mir.Inst.Tag.ldrsh_stack_argument else .ldrh_stack_argument, + 4, 8 => .ldr_stack_argument, + else => unreachable, // unexpected abi size + }; + + _ = try self.addInst(.{ + .tag = tag, + .data = .{ .load_store_stack = .{ + .rt = reg, + .offset = @intCast(u32, off), + } }, + }); + }, + 3, 5, 6, 7 => return self.fail("TODO implement genSetReg types size {}", .{abi_size}), + else => unreachable, + } + }, + } +} + +fn genSetStackArgument(self: *Self, ty: Type, stack_offset: u32, mcv: MCValue) InnerError!void { + const abi_size = @intCast(u32, ty.abiSize(self.target.*)); + switch (mcv) { + .dead => unreachable, + .none, .unreach => return, + .undef => { + if (!self.wantSafety()) + return; // The already existing value will do just fine. + // TODO Upgrade this to a memset call when we have that available. + switch (ty.abiSize(self.target.*)) { + 1 => return self.genSetStack(ty, stack_offset, .{ .immediate = 0xaa }), + 2 => return self.genSetStack(ty, stack_offset, .{ .immediate = 0xaaaa }), + 4 => return self.genSetStack(ty, stack_offset, .{ .immediate = 0xaaaaaaaa }), + 8 => return self.genSetStack(ty, stack_offset, .{ .immediate = 0xaaaaaaaaaaaaaaaa }), + else => return self.fail("TODO implement memset", .{}), + } + }, + .register => |reg| { + switch (abi_size) { + 1, 2, 4, 8 => { + const tag: Mir.Inst.Tag = switch (abi_size) { + 1 => .strb_immediate, + 2 => .strh_immediate, + 4, 8 => .str_immediate, + else => unreachable, // unexpected abi size + }; + const rt = registerAlias(reg, abi_size); + const offset = switch (abi_size) { + 1 => blk: { + if (math.cast(u12, stack_offset)) |imm| { + break :blk Instruction.LoadStoreOffset.imm(imm); + } else { + return self.fail("TODO genSetStackArgument byte with larger offset", .{}); + } + }, + 2 => blk: { + assert(std.mem.isAlignedGeneric(u32, stack_offset, 2)); // misaligned stack entry + if (math.cast(u12, @divExact(stack_offset, 2))) |imm| { + break :blk Instruction.LoadStoreOffset.imm(imm); + } else { + return self.fail("TODO getSetStackArgument halfword with larger offset", .{}); + } + }, + 4, 8 => blk: { + const alignment = abi_size; + assert(std.mem.isAlignedGeneric(u32, stack_offset, alignment)); // misaligned stack entry + if (math.cast(u12, @divExact(stack_offset, alignment))) |imm| { + break :blk Instruction.LoadStoreOffset.imm(imm); + } else { + return self.fail("TODO genSetStackArgument with larger offset", .{}); + } + }, + else => unreachable, + }; + + _ = try self.addInst(.{ + .tag = tag, + .data = .{ .load_store_register_immediate = .{ + .rt = rt, + .rn = .sp, + .offset = offset.immediate, + } }, + }); + }, + else => return self.fail("TODO genSetStackArgument other types abi_size={}", .{abi_size}), + } + }, + .register_with_overflow => { + return self.fail("TODO implement genSetStack {}", .{mcv}); + }, + .got_load, + .direct_load, + .memory, + .stack_argument_offset, + .stack_offset, + => { + if (abi_size <= 4) { + const reg = try self.copyToTmpRegister(ty, mcv); + return self.genSetStackArgument(ty, stack_offset, MCValue{ .register = reg }); + } else { + var ptr_ty_payload: Type.Payload.ElemType = .{ + .base = .{ .tag = .single_mut_pointer }, + .data = ty, + }; + const ptr_ty = Type.initPayload(&ptr_ty_payload.base); + + // TODO call extern memcpy + const regs = try self.register_manager.allocRegs(5, .{ null, null, null, null, null }, gp); + const regs_locks = self.register_manager.lockRegsAssumeUnused(5, regs); + defer for (regs_locks) |reg| { + self.register_manager.unlockReg(reg); + }; + + const src_reg = regs[0]; + const dst_reg = regs[1]; + const len_reg = regs[2]; + const count_reg = regs[3]; + const tmp_reg = regs[4]; + + switch (mcv) { + .stack_offset => |off| { + // sub src_reg, fp, #off + try self.genSetReg(ptr_ty, src_reg, .{ .ptr_stack_offset = off }); + }, + .memory => |addr| try self.genSetReg(ptr_ty, src_reg, .{ .immediate = @intCast(u32, addr) }), + .got_load, + .direct_load, + => |sym_index| { + const tag: Mir.Inst.Tag = switch (mcv) { + .got_load => .load_memory_ptr_got, + .direct_load => .load_memory_ptr_direct, + else => unreachable, + }; + const mod = self.bin_file.options.module.?; + _ = try self.addInst(.{ + .tag = tag, + .data = .{ + .payload = try self.addExtra(Mir.LoadMemoryPie{ + .register = @enumToInt(src_reg), + .atom_index = mod.declPtr(self.mod_fn.owner_decl).link.macho.sym_index, + .sym_index = sym_index, + }), + }, + }); + }, + .stack_argument_offset => return self.fail("TODO load {}", .{mcv}), + else => unreachable, + } + + // add dst_reg, sp, #stack_offset + _ = try self.addInst(.{ + .tag = .add_immediate, + .data = .{ .rr_imm12_sh = .{ + .rd = dst_reg, + .rn = .sp, + .imm12 = math.cast(u12, stack_offset) orelse { + return self.fail("TODO load: set reg to stack offset with all possible offsets", .{}); + }, + } }, + }); + + // mov len, #abi_size + try self.genSetReg(Type.usize, len_reg, .{ .immediate = abi_size }); + + // memcpy(src, dst, len) + try self.genInlineMemcpy(src_reg, dst_reg, len_reg, count_reg, tmp_reg); + } + }, + .condition_flags, + .immediate, + .ptr_stack_offset, + => { + const reg = try self.copyToTmpRegister(ty, mcv); + return self.genSetStackArgument(ty, stack_offset, MCValue{ .register = reg }); + }, } } @@ -4835,8 +5027,8 @@ fn resolveCallingConventionValues(self: *Self, fn_ty: Type) !CallMCValues { } } - result.args[i] = .{ .stack_offset = nsaa }; nsaa += param_size; + result.args[i] = .{ .stack_argument_offset = nsaa }; } } diff --git a/src/arch/aarch64/Emit.zig b/src/arch/aarch64/Emit.zig index 47a0c08893..9320138f65 100644 --- a/src/arch/aarch64/Emit.zig +++ b/src/arch/aarch64/Emit.zig @@ -27,14 +27,21 @@ code: *std.ArrayList(u8), prev_di_line: u32, prev_di_column: u32, + /// Relative to the beginning of `code`. prev_di_pc: usize, +/// The amount of stack space consumed by all stack arguments as well +/// as the saved callee-saved registers +prologue_stack_space: u32, + /// The branch type of every branch branch_types: std.AutoHashMapUnmanaged(Mir.Inst.Index, BranchType) = .{}, + /// For every forward branch, maps the target instruction to a list of /// branches which branch to this target instruction branch_forward_origins: std.AutoHashMapUnmanaged(Mir.Inst.Index, std.ArrayListUnmanaged(Mir.Inst.Index)) = .{}, + /// For backward branches: stores the code offset of the target /// instruction /// @@ -42,6 +49,8 @@ branch_forward_origins: std.AutoHashMapUnmanaged(Mir.Inst.Index, std.ArrayListUn /// instruction code_offset_mapping: std.AutoHashMapUnmanaged(Mir.Inst.Index, usize) = .{}, +/// The final stack frame size of the function (already aligned to the +/// respective stack alignment). Does not include prologue stack space. stack_size: u32, const InnerError = error{ @@ -148,6 +157,12 @@ pub fn emitMir( .strb_stack => try emit.mirLoadStoreStack(inst), .strh_stack => try emit.mirLoadStoreStack(inst), + .ldr_stack_argument => try emit.mirLoadStackArgument(inst), + .ldrb_stack_argument => try emit.mirLoadStackArgument(inst), + .ldrh_stack_argument => try emit.mirLoadStackArgument(inst), + .ldrsb_stack_argument => try emit.mirLoadStackArgument(inst), + .ldrsh_stack_argument => try emit.mirLoadStackArgument(inst), + .ldr_register => try emit.mirLoadStoreRegisterRegister(inst), .ldrb_register => try emit.mirLoadStoreRegisterRegister(inst), .ldrh_register => try emit.mirLoadStoreRegisterRegister(inst), @@ -920,6 +935,55 @@ fn mirLoadStoreRegisterPair(emit: *Emit, inst: Mir.Inst.Index) !void { } } +fn mirLoadStackArgument(emit: *Emit, inst: Mir.Inst.Index) !void { + const tag = emit.mir.instructions.items(.tag)[inst]; + const load_store_stack = emit.mir.instructions.items(.data)[inst].load_store_stack; + const rt = load_store_stack.rt; + + const raw_offset = emit.stack_size + emit.prologue_stack_space - load_store_stack.offset; + const offset = switch (tag) { + .ldrb_stack_argument, .ldrsb_stack_argument => blk: { + if (math.cast(u12, raw_offset)) |imm| { + break :blk Instruction.LoadStoreOffset.imm(imm); + } else { + return emit.fail("TODO load stack argument byte with larger offset", .{}); + } + }, + .ldrh_stack_argument, .ldrsh_stack_argument => blk: { + assert(std.mem.isAlignedGeneric(u32, raw_offset, 2)); // misaligned stack entry + if (math.cast(u12, @divExact(raw_offset, 2))) |imm| { + break :blk Instruction.LoadStoreOffset.imm(imm); + } else { + return emit.fail("TODO load stack argument halfword with larger offset", .{}); + } + }, + .ldr_stack_argument => blk: { + const alignment: u32 = switch (rt.size()) { + 32 => 4, + 64 => 8, + else => unreachable, + }; + + assert(std.mem.isAlignedGeneric(u32, raw_offset, alignment)); // misaligned stack entry + if (math.cast(u12, @divExact(raw_offset, alignment))) |imm| { + break :blk Instruction.LoadStoreOffset.imm(imm); + } else { + return emit.fail("TODO load stack argument with larger offset", .{}); + } + }, + else => unreachable, + }; + + switch (tag) { + .ldr_stack_argument => try emit.writeInstruction(Instruction.ldr(rt, .sp, offset)), + .ldrb_stack_argument => try emit.writeInstruction(Instruction.ldrb(rt, .sp, offset)), + .ldrh_stack_argument => try emit.writeInstruction(Instruction.ldrh(rt, .sp, offset)), + .ldrsb_stack_argument => try emit.writeInstruction(Instruction.ldrsb(rt, .sp, offset)), + .ldrsh_stack_argument => try emit.writeInstruction(Instruction.ldrsh(rt, .sp, offset)), + else => unreachable, + } +} + fn mirLoadStoreStack(emit: *Emit, inst: Mir.Inst.Index) !void { const tag = emit.mir.instructions.items(.tag)[inst]; const load_store_stack = emit.mir.instructions.items(.data)[inst].load_store_stack; diff --git a/src/arch/aarch64/Mir.zig b/src/arch/aarch64/Mir.zig index 2fef069f7a..6242026b66 100644 --- a/src/arch/aarch64/Mir.zig +++ b/src/arch/aarch64/Mir.zig @@ -94,18 +94,24 @@ pub const Inst = struct { ldp, /// Pseudo-instruction: Load from stack ldr_stack, + /// Pseudo-instruction: Load from stack argument + ldr_stack_argument, /// Load Register (immediate) ldr_immediate, /// Load Register (register) ldr_register, /// Pseudo-instruction: Load byte from stack ldrb_stack, + /// Pseudo-instruction: Load byte from stack argument + ldrb_stack_argument, /// Load Register Byte (immediate) ldrb_immediate, /// Load Register Byte (register) ldrb_register, /// Pseudo-instruction: Load halfword from stack ldrh_stack, + /// Pseudo-instruction: Load halfword from stack argument + ldrh_stack_argument, /// Load Register Halfword (immediate) ldrh_immediate, /// Load Register Halfword (register) @@ -114,10 +120,14 @@ pub const Inst = struct { ldrsb_immediate, /// Pseudo-instruction: Load signed byte from stack ldrsb_stack, + /// Pseudo-instruction: Load signed byte from stack argument + ldrsb_stack_argument, /// Load Register Signed Halfword (immediate) ldrsh_immediate, /// Pseudo-instruction: Load signed halfword from stack ldrsh_stack, + /// Pseudo-instruction: Load signed halfword from stack argument + ldrsh_stack_argument, /// Load Register Signed Word (immediate) ldrsw_immediate, /// Logical Shift Left (immediate) From 65b3c27f2457f3d957d83edf13e20e41e84f6dd4 Mon Sep 17 00:00:00 2001 From: joachimschmidt557 Date: Tue, 2 Aug 2022 21:04:54 +0200 Subject: [PATCH 3/8] stage2 AArch64: all arguments passed via stack from now on Only in the Undefined calling convention, not in other calling conventions --- src/arch/aarch64/CodeGen.zig | 142 +++++++++++++++++++++-------------- src/arch/aarch64/Emit.zig | 65 +++++++++------- src/arch/aarch64/Mir.zig | 2 + 3 files changed, 128 insertions(+), 81 deletions(-) diff --git a/src/arch/aarch64/CodeGen.zig b/src/arch/aarch64/CodeGen.zig index 63be9a2220..f3a6ad84ed 100644 --- a/src/arch/aarch64/CodeGen.zig +++ b/src/arch/aarch64/CodeGen.zig @@ -340,7 +340,7 @@ pub fn generate( .prev_di_line = module_fn.lbrace_line, .prev_di_column = module_fn.lbrace_column, .stack_size = mem.alignForwardGeneric(u32, function.max_end_stack, function.stack_align), - .prologue_stack_space = call_info.stack_byte_count + function.saved_regs_stack_space, + .saved_regs_stack_space = function.saved_regs_stack_space, }; defer emit.deinit(); @@ -2317,6 +2317,9 @@ fn errUnionErr(self: *Self, error_union_mcv: MCValue, error_union_ty: Type) !MCV const err_offset = @intCast(u32, errUnionErrorOffset(payload_ty, self.target.*)); switch (error_union_mcv) { .register => return self.fail("TODO errUnionErr for registers", .{}), + .stack_argument_offset => |off| { + return MCValue{ .stack_argument_offset = off + err_offset }; + }, .stack_offset => |off| { return MCValue{ .stack_offset = off - err_offset }; }, @@ -2351,6 +2354,9 @@ fn errUnionPayload(self: *Self, error_union_mcv: MCValue, error_union_ty: Type) const payload_offset = @intCast(u32, errUnionPayloadOffset(payload_ty, self.target.*)); switch (error_union_mcv) { .register => return self.fail("TODO errUnionPayload for registers", .{}), + .stack_argument_offset => |off| { + return MCValue{ .stack_argument_offset = off + payload_offset }; + }, .stack_offset => |off| { return MCValue{ .stack_offset = off - payload_offset }; }, @@ -3016,7 +3022,7 @@ fn airStructFieldVal(self: *Self, inst: Air.Inst.Index) !void { switch (mcv) { .dead, .unreach => unreachable, .stack_argument_offset => |off| { - break :result MCValue{ .stack_argument_offset = off - struct_field_offset }; + break :result MCValue{ .stack_argument_offset = off + struct_field_offset }; }, .stack_offset => |off| { break :result MCValue{ .stack_offset = off - struct_field_offset }; @@ -3150,6 +3156,9 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallOptions. // saving compare flags may require a new caller-saved register try self.spillCompareFlagsIfOccupied(); + // Make space for the arguments passed via the stack + self.max_end_stack += info.stack_byte_count; + for (info.args) |mc_arg, arg_i| { const arg = args[arg_i]; const arg_ty = self.air.typeOf(arg); @@ -3164,7 +3173,7 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallOptions. .stack_offset => unreachable, .stack_argument_offset => |offset| try self.genSetStackArgument( arg_ty, - info.stack_byte_count - offset, + offset, arg_mcv, ), else => unreachable, @@ -3642,40 +3651,14 @@ fn isNonNull(self: *Self, operand: MCValue) !MCValue { fn isErr(self: *Self, ty: Type, operand: MCValue) !MCValue { const error_type = ty.errorUnionSet(); - const payload_type = ty.errorUnionPayload(); + const error_int_type = Type.initTag(.u16); if (error_type.errorSetIsEmpty()) { return MCValue{ .immediate = 0 }; // always false } - const err_off = errUnionErrorOffset(payload_type, self.target.*); - switch (operand) { - .stack_offset => |off| { - const offset = off - @intCast(u32, err_off); - const tmp_reg = try self.copyToTmpRegister(Type.anyerror, .{ .stack_offset = offset }); - _ = try self.addInst(.{ - .tag = .cmp_immediate, - .data = .{ .r_imm12_sh = .{ - .rn = tmp_reg, - .imm12 = 0, - } }, - }); - }, - .register => |reg| { - if (err_off > 0 or payload_type.hasRuntimeBitsIgnoreComptime()) { - return self.fail("TODO implement isErr for register operand with payload bits", .{}); - } - _ = try self.addInst(.{ - .tag = .cmp_immediate, - .data = .{ .r_imm12_sh = .{ - .rn = reg, - .imm12 = 0, - } }, - }); - }, - else => return self.fail("TODO implement isErr for {}", .{operand}), - } - + const error_mcv = try self.errUnionErr(operand, ty); + _ = try self.binOp(.cmp_eq, error_mcv, .{ .immediate = 0 }, error_int_type, error_int_type, null); return MCValue{ .condition_flags = .hi }; } @@ -4174,6 +4157,15 @@ fn genSetStack(self: *Self, ty: Type, stack_offset: u32, mcv: MCValue) InnerErro // sub src_reg, fp, #off try self.genSetReg(ptr_ty, src_reg, .{ .ptr_stack_offset = off }); }, + .stack_argument_offset => |off| { + _ = try self.addInst(.{ + .tag = .ldr_ptr_stack_argument, + .data = .{ .load_store_stack = .{ + .rt = src_reg, + .offset = off, + } }, + }); + }, .memory => |addr| try self.genSetReg(Type.usize, src_reg, .{ .immediate = addr }), .got_load, .direct_load, @@ -4433,7 +4425,7 @@ fn genSetStackArgument(self: *Self, ty: Type, stack_offset: u32, mcv: MCValue) I } }, .register_with_overflow => { - return self.fail("TODO implement genSetStack {}", .{mcv}); + return self.fail("TODO implement genSetStackArgument {}", .{mcv}); }, .got_load, .direct_load, @@ -4469,6 +4461,15 @@ fn genSetStackArgument(self: *Self, ty: Type, stack_offset: u32, mcv: MCValue) I // sub src_reg, fp, #off try self.genSetReg(ptr_ty, src_reg, .{ .ptr_stack_offset = off }); }, + .stack_argument_offset => |off| { + _ = try self.addInst(.{ + .tag = .ldr_ptr_stack_argument, + .data = .{ .load_store_stack = .{ + .rt = src_reg, + .offset = off, + } }, + }); + }, .memory => |addr| try self.genSetReg(ptr_ty, src_reg, .{ .immediate = @intCast(u32, addr) }), .got_load, .direct_load, @@ -4490,7 +4491,6 @@ fn genSetStackArgument(self: *Self, ty: Type, stack_offset: u32, mcv: MCValue) I }, }); }, - .stack_argument_offset => return self.fail("TODO load {}", .{mcv}), else => unreachable, } @@ -4989,11 +4989,27 @@ fn resolveCallingConventionValues(self: *Self, fn_ty: Type) !CallMCValues { result.stack_align = 1; return result; }, - .Unspecified, .C => { + .C => { // ARM64 Procedure Call Standard var ncrn: usize = 0; // Next Core Register Number var nsaa: u32 = 0; // Next stacked argument address + if (ret_ty.zigTypeTag() == .NoReturn) { + result.return_value = .{ .unreach = {} }; + } else if (!ret_ty.hasRuntimeBitsIgnoreComptime() and !ret_ty.isError()) { + result.return_value = .{ .none = {} }; + } else { + const ret_ty_size = @intCast(u32, ret_ty.abiSize(self.target.*)); + if (ret_ty_size == 0) { + assert(ret_ty.isError()); + result.return_value = .{ .immediate = 0 }; + } else if (ret_ty_size <= 8) { + result.return_value = .{ .register = registerAlias(c_abi_int_return_regs[0], ret_ty_size) }; + } else { + return self.fail("TODO support more return types for ARM backend", .{}); + } + } + for (param_types) |ty, i| { const param_size = @intCast(u32, ty.abiSize(self.target.*)); if (param_size == 0) { @@ -5027,36 +5043,52 @@ fn resolveCallingConventionValues(self: *Self, fn_ty: Type) !CallMCValues { } } - nsaa += param_size; result.args[i] = .{ .stack_argument_offset = nsaa }; + nsaa += param_size; } } result.stack_byte_count = nsaa; result.stack_align = 16; }, + .Unspecified => { + if (ret_ty.zigTypeTag() == .NoReturn) { + result.return_value = .{ .unreach = {} }; + } else if (!ret_ty.hasRuntimeBitsIgnoreComptime() and !ret_ty.isError()) { + result.return_value = .{ .none = {} }; + } else { + const ret_ty_size = @intCast(u32, ret_ty.abiSize(self.target.*)); + if (ret_ty_size == 0) { + assert(ret_ty.isError()); + result.return_value = .{ .immediate = 0 }; + } else if (ret_ty_size <= 8) { + result.return_value = .{ .register = registerAlias(c_abi_int_return_regs[0], ret_ty_size) }; + } else { + return self.fail("TODO support more return types for ARM backend", .{}); + } + } + + var stack_offset: u32 = 0; + + for (param_types) |ty, i| { + if (ty.abiSize(self.target.*) > 0) { + const param_size = @intCast(u32, ty.abiSize(self.target.*)); + const param_alignment = ty.abiAlignment(self.target.*); + + stack_offset = std.mem.alignForwardGeneric(u32, stack_offset, param_alignment); + result.args[i] = .{ .stack_argument_offset = stack_offset }; + stack_offset += param_size; + } else { + result.args[i] = .{ .none = {} }; + } + } + + result.stack_byte_count = stack_offset; + result.stack_align = 16; + }, else => return self.fail("TODO implement function parameters for {} on aarch64", .{cc}), } - if (ret_ty.zigTypeTag() == .NoReturn) { - result.return_value = .{ .unreach = {} }; - } else if (!ret_ty.hasRuntimeBitsIgnoreComptime() and !ret_ty.isError()) { - result.return_value = .{ .none = {} }; - } else switch (cc) { - .Naked => unreachable, - .Unspecified, .C => { - const ret_ty_size = @intCast(u32, ret_ty.abiSize(self.target.*)); - if (ret_ty_size == 0) { - assert(ret_ty.isError()); - result.return_value = .{ .immediate = 0 }; - } else if (ret_ty_size <= 8) { - result.return_value = .{ .register = registerAlias(c_abi_int_return_regs[0], ret_ty_size) }; - } else { - return self.fail("TODO support more return types for ARM backend", .{}); - } - }, - else => return self.fail("TODO implement function return values for {}", .{cc}), - } return result; } diff --git a/src/arch/aarch64/Emit.zig b/src/arch/aarch64/Emit.zig index 9320138f65..1ca198ccd8 100644 --- a/src/arch/aarch64/Emit.zig +++ b/src/arch/aarch64/Emit.zig @@ -31,9 +31,9 @@ prev_di_column: u32, /// Relative to the beginning of `code`. prev_di_pc: usize, -/// The amount of stack space consumed by all stack arguments as well -/// as the saved callee-saved registers -prologue_stack_space: u32, +/// The amount of stack space consumed by the saved callee-saved +/// registers in bytes +saved_regs_stack_space: u32, /// The branch type of every branch branch_types: std.AutoHashMapUnmanaged(Mir.Inst.Index, BranchType) = .{}, @@ -158,6 +158,7 @@ pub fn emitMir( .strh_stack => try emit.mirLoadStoreStack(inst), .ldr_stack_argument => try emit.mirLoadStackArgument(inst), + .ldr_ptr_stack_argument => try emit.mirLoadStackArgument(inst), .ldrb_stack_argument => try emit.mirLoadStackArgument(inst), .ldrh_stack_argument => try emit.mirLoadStackArgument(inst), .ldrsb_stack_argument => try emit.mirLoadStackArgument(inst), @@ -940,24 +941,42 @@ fn mirLoadStackArgument(emit: *Emit, inst: Mir.Inst.Index) !void { const load_store_stack = emit.mir.instructions.items(.data)[inst].load_store_stack; const rt = load_store_stack.rt; - const raw_offset = emit.stack_size + emit.prologue_stack_space - load_store_stack.offset; - const offset = switch (tag) { - .ldrb_stack_argument, .ldrsb_stack_argument => blk: { - if (math.cast(u12, raw_offset)) |imm| { - break :blk Instruction.LoadStoreOffset.imm(imm); - } else { + const raw_offset = emit.stack_size + emit.saved_regs_stack_space + load_store_stack.offset; + switch (tag) { + .ldr_ptr_stack_argument => { + const offset = if (math.cast(u12, raw_offset)) |imm| imm else { + return emit.fail("TODO load stack argument ptr with larger offset", .{}); + }; + + switch (tag) { + .ldr_ptr_stack_argument => try emit.writeInstruction(Instruction.add(rt, .sp, offset, false)), + else => unreachable, + } + }, + .ldrb_stack_argument, .ldrsb_stack_argument => { + const offset = if (math.cast(u12, raw_offset)) |imm| Instruction.LoadStoreOffset.imm(imm) else { return emit.fail("TODO load stack argument byte with larger offset", .{}); + }; + + switch (tag) { + .ldrb_stack_argument => try emit.writeInstruction(Instruction.ldrb(rt, .sp, offset)), + .ldrsb_stack_argument => try emit.writeInstruction(Instruction.ldrsb(rt, .sp, offset)), + else => unreachable, } }, - .ldrh_stack_argument, .ldrsh_stack_argument => blk: { + .ldrh_stack_argument, .ldrsh_stack_argument => { assert(std.mem.isAlignedGeneric(u32, raw_offset, 2)); // misaligned stack entry - if (math.cast(u12, @divExact(raw_offset, 2))) |imm| { - break :blk Instruction.LoadStoreOffset.imm(imm); - } else { + const offset = if (math.cast(u12, @divExact(raw_offset, 2))) |imm| Instruction.LoadStoreOffset.imm(imm) else { return emit.fail("TODO load stack argument halfword with larger offset", .{}); + }; + + switch (tag) { + .ldrh_stack_argument => try emit.writeInstruction(Instruction.ldrh(rt, .sp, offset)), + .ldrsh_stack_argument => try emit.writeInstruction(Instruction.ldrsh(rt, .sp, offset)), + else => unreachable, } }, - .ldr_stack_argument => blk: { + .ldr_stack_argument => { const alignment: u32 = switch (rt.size()) { 32 => 4, 64 => 8, @@ -965,22 +984,16 @@ fn mirLoadStackArgument(emit: *Emit, inst: Mir.Inst.Index) !void { }; assert(std.mem.isAlignedGeneric(u32, raw_offset, alignment)); // misaligned stack entry - if (math.cast(u12, @divExact(raw_offset, alignment))) |imm| { - break :blk Instruction.LoadStoreOffset.imm(imm); - } else { + const offset = if (math.cast(u12, @divExact(raw_offset, alignment))) |imm| Instruction.LoadStoreOffset.imm(imm) else { return emit.fail("TODO load stack argument with larger offset", .{}); + }; + + switch (tag) { + .ldr_stack_argument => try emit.writeInstruction(Instruction.ldr(rt, .sp, offset)), + else => unreachable, } }, else => unreachable, - }; - - switch (tag) { - .ldr_stack_argument => try emit.writeInstruction(Instruction.ldr(rt, .sp, offset)), - .ldrb_stack_argument => try emit.writeInstruction(Instruction.ldrb(rt, .sp, offset)), - .ldrh_stack_argument => try emit.writeInstruction(Instruction.ldrh(rt, .sp, offset)), - .ldrsb_stack_argument => try emit.writeInstruction(Instruction.ldrsb(rt, .sp, offset)), - .ldrsh_stack_argument => try emit.writeInstruction(Instruction.ldrsh(rt, .sp, offset)), - else => unreachable, } } diff --git a/src/arch/aarch64/Mir.zig b/src/arch/aarch64/Mir.zig index 6242026b66..c4d6af9db4 100644 --- a/src/arch/aarch64/Mir.zig +++ b/src/arch/aarch64/Mir.zig @@ -92,6 +92,8 @@ pub const Inst = struct { load_memory_ptr_direct, /// Load Pair of Registers ldp, + /// Pseudo-instruction: Load pointer to stack argument + ldr_ptr_stack_argument, /// Pseudo-instruction: Load from stack ldr_stack, /// Pseudo-instruction: Load from stack argument From 02738228f24b807c45f9ae7b3b1f3657668aed27 Mon Sep 17 00:00:00 2001 From: joachimschmidt557 Date: Thu, 4 Aug 2022 09:33:04 +0200 Subject: [PATCH 4/8] stage2 AArch64: support returning values by reference also adds some more support for slices passed as stack arguments --- src/arch/aarch64/CodeGen.zig | 153 ++++++++++++++++++++++++++++------- 1 file changed, 124 insertions(+), 29 deletions(-) diff --git a/src/arch/aarch64/CodeGen.zig b/src/arch/aarch64/CodeGen.zig index f3a6ad84ed..c5b71657aa 100644 --- a/src/arch/aarch64/CodeGen.zig +++ b/src/arch/aarch64/CodeGen.zig @@ -418,6 +418,23 @@ fn gen(self: *Self) !void { // sub sp, sp, #reloc const backpatch_reloc = try self.addNop(); + if (self.ret_mcv == .stack_offset) { + // The address of where to store the return value is in x0 + // (or w0 when pointer size is 32 bits). As this register + // might get overwritten along the way, save the address + // to the stack. + const ptr_bits = self.target.cpu.arch.ptrBitWidth(); + const ptr_bytes = @divExact(ptr_bits, 8); + const ret_ptr_reg = registerAlias(.x0, ptr_bytes); + + const stack_offset = mem.alignForwardGeneric(u32, self.next_stack_offset, ptr_bytes) + ptr_bytes; + self.next_stack_offset = stack_offset; + self.max_end_stack = @maximum(self.max_end_stack, self.next_stack_offset); + + try self.genSetStack(Type.usize, stack_offset, MCValue{ .register = ret_ptr_reg }); + self.ret_mcv = MCValue{ .stack_offset = stack_offset }; + } + _ = try self.addInst(.{ .tag = .dbg_prologue_end, .data = .{ .nop = {} }, @@ -2446,21 +2463,28 @@ fn airWrapErrUnionErr(self: *Self, inst: Air.Inst.Index) !void { return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); } +fn slicePtr(mcv: MCValue) MCValue { + switch (mcv) { + .dead, .unreach, .none => unreachable, + .register => unreachable, // a slice doesn't fit in one register + .stack_argument_offset => |off| { + return MCValue{ .stack_argument_offset = off }; + }, + .stack_offset => |off| { + return MCValue{ .stack_offset = off }; + }, + .memory => |addr| { + return MCValue{ .memory = addr }; + }, + else => unreachable, // invalid MCValue for a slice + } +} + fn airSlicePtr(self: *Self, inst: Air.Inst.Index) !void { const ty_op = self.air.instructions.items(.data)[inst].ty_op; const result: MCValue = if (self.liveness.isUnused(inst)) .dead else result: { const mcv = try self.resolveInst(ty_op.operand); - switch (mcv) { - .dead, .unreach, .none => unreachable, - .register => unreachable, // a slice doesn't fit in one register - .stack_offset => |off| { - break :result MCValue{ .stack_offset = off }; - }, - .memory => |addr| { - break :result MCValue{ .memory = addr }; - }, - else => return self.fail("TODO implement slice_len for {}", .{mcv}), - } + break :result slicePtr(mcv); }; return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); } @@ -2474,6 +2498,9 @@ fn airSliceLen(self: *Self, inst: Air.Inst.Index) !void { switch (mcv) { .dead, .unreach, .none => unreachable, .register => unreachable, // a slice doesn't fit in one register + .stack_argument_offset => |off| { + break :result MCValue{ .stack_argument_offset = off + ptr_bytes }; + }, .stack_offset => |off| { break :result MCValue{ .stack_offset = off - ptr_bytes }; }, @@ -2524,6 +2551,9 @@ fn airSliceElemVal(self: *Self, inst: Air.Inst.Index) !void { if (!is_volatile and self.liveness.isUnused(inst)) return self.finishAir(inst, .dead, .{ bin_op.lhs, bin_op.rhs, .none }); const result: MCValue = result: { + const slice_ty = self.air.typeOf(bin_op.lhs); + const elem_ty = slice_ty.childType(); + const elem_size = elem_ty.abiSize(self.target.*); const slice_mcv = try self.resolveInst(bin_op.lhs); // TODO optimize for the case where the index is a constant, @@ -2531,10 +2561,6 @@ fn airSliceElemVal(self: *Self, inst: Air.Inst.Index) !void { const index_mcv = try self.resolveInst(bin_op.rhs); const index_is_register = index_mcv == .register; - const slice_ty = self.air.typeOf(bin_op.lhs); - const elem_ty = slice_ty.childType(); - const elem_size = elem_ty.abiSize(self.target.*); - var buf: Type.SlicePtrFieldTypeBuffer = undefined; const slice_ptr_field_type = slice_ty.slicePtrFieldType(&buf); @@ -2544,15 +2570,17 @@ fn airSliceElemVal(self: *Self, inst: Air.Inst.Index) !void { null; defer if (index_lock) |reg| self.register_manager.unlockReg(reg); - const base_mcv: MCValue = switch (slice_mcv) { - .stack_offset => |off| .{ .register = try self.copyToTmpRegister(slice_ptr_field_type, .{ .stack_offset = off }) }, - else => return self.fail("TODO slice_elem_val when slice is {}", .{slice_mcv}), - }; - const base_lock = self.register_manager.lockRegAssumeUnused(base_mcv.register); - defer self.register_manager.unlockReg(base_lock); + const base_mcv = slicePtr(slice_mcv); switch (elem_size) { else => { + const base_reg = switch (base_mcv) { + .register => |r| r, + else => try self.copyToTmpRegister(slice_ptr_field_type, base_mcv), + }; + const base_reg_lock = self.register_manager.lockRegAssumeUnused(base_reg); + defer self.register_manager.unlockReg(base_reg_lock); + const dest = try self.allocRegOrMem(inst, true); const addr = try self.binOp(.ptr_add, base_mcv, index_mcv, slice_ptr_field_type, Type.usize, null); try self.load(dest, addr, slice_ptr_field_type); @@ -2567,7 +2595,16 @@ fn airSliceElemVal(self: *Self, inst: Air.Inst.Index) !void { fn airSliceElemPtr(self: *Self, inst: Air.Inst.Index) !void { const ty_pl = self.air.instructions.items(.data)[inst].ty_pl; const extra = self.air.extraData(Air.Bin, ty_pl.payload).data; - const result: MCValue = if (self.liveness.isUnused(inst)) .dead else return self.fail("TODO implement slice_elem_ptr for {}", .{self.target.cpu.arch}); + const result: MCValue = if (self.liveness.isUnused(inst)) .dead else result: { + const slice_mcv = try self.resolveInst(extra.lhs); + const index_mcv = try self.resolveInst(extra.rhs); + const base_mcv = slicePtr(slice_mcv); + + const slice_ty = self.air.typeOf(extra.lhs); + + const addr = try self.binOp(.ptr_add, base_mcv, index_mcv, slice_ty, Type.usize, null); + break :result addr; + }; return self.finishAir(inst, result, .{ extra.lhs, extra.rhs, .none }); } @@ -3156,6 +3193,28 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallOptions. // saving compare flags may require a new caller-saved register try self.spillCompareFlagsIfOccupied(); + if (info.return_value == .stack_offset) { + log.debug("airCall: return by reference", .{}); + const ret_ty = fn_ty.fnReturnType(); + const ret_abi_size = @intCast(u32, ret_ty.abiSize(self.target.*)); + const ret_abi_align = @intCast(u32, ret_ty.abiAlignment(self.target.*)); + const stack_offset = try self.allocMem(inst, ret_abi_size, ret_abi_align); + + const ptr_bits = self.target.cpu.arch.ptrBitWidth(); + const ptr_bytes = @divExact(ptr_bits, 8); + const ret_ptr_reg = registerAlias(.x0, ptr_bytes); + + var ptr_ty_payload: Type.Payload.ElemType = .{ + .base = .{ .tag = .single_mut_pointer }, + .data = ret_ty, + }; + const ptr_ty = Type.initPayload(&ptr_ty_payload.base); + try self.register_manager.getReg(ret_ptr_reg, null); + try self.genSetReg(ptr_ty, ret_ptr_reg, .{ .ptr_stack_offset = stack_offset }); + + info.return_value = .{ .stack_offset = stack_offset }; + } + // Make space for the arguments passed via the stack self.max_end_stack += info.stack_byte_count; @@ -3319,8 +3378,15 @@ fn airRet(self: *Self, inst: Air.Inst.Index) !void { }, .stack_offset => { // Return result by reference - // TODO - return self.fail("TODO implement airRet for {}", .{self.ret_mcv}); + // + // self.ret_mcv is an address to where this function + // should store its result into + var ptr_ty_payload: Type.Payload.ElemType = .{ + .base = .{ .tag = .single_mut_pointer }, + .data = ret_ty, + }; + const ptr_ty = Type.initPayload(&ptr_ty_payload.base); + try self.store(self.ret_mcv, operand, ptr_ty, ret_ty); }, else => unreachable, } @@ -3346,10 +3412,34 @@ fn airRetLoad(self: *Self, inst: Air.Inst.Index) !void { }, .stack_offset => { // Return result by reference - // TODO - return self.fail("TODO implement airRetLoad for {}", .{self.ret_mcv}); + // + // self.ret_mcv is an address to where this function + // should store its result into + // + // If the operand is a ret_ptr instruction, we are done + // here. Else we need to load the result from the location + // pointed to by the operand and store it to the result + // location. + const op_inst = Air.refToIndex(un_op).?; + if (self.air.instructions.items(.tag)[op_inst] != .ret_ptr) { + const abi_size = @intCast(u32, ret_ty.abiSize(self.target.*)); + const abi_align = ret_ty.abiAlignment(self.target.*); + + // This is essentially allocMem without the + // instruction tracking + if (abi_align > self.stack_align) + self.stack_align = abi_align; + // TODO find a free slot instead of always appending + const offset = mem.alignForwardGeneric(u32, self.next_stack_offset, abi_align) + abi_size; + self.next_stack_offset = offset; + self.max_end_stack = @maximum(self.max_end_stack, self.next_stack_offset); + + const tmp_mcv = MCValue{ .stack_offset = offset }; + try self.load(tmp_mcv, ptr, ptr_ty); + try self.store(self.ret_mcv, tmp_mcv, ptr_ty, ret_ty); + } }, - else => unreachable, + else => unreachable, // invalid return result } try self.exitlude_jump_relocs.append(self.gpa, try self.addNop()); @@ -5062,9 +5152,14 @@ fn resolveCallingConventionValues(self: *Self, fn_ty: Type) !CallMCValues { assert(ret_ty.isError()); result.return_value = .{ .immediate = 0 }; } else if (ret_ty_size <= 8) { - result.return_value = .{ .register = registerAlias(c_abi_int_return_regs[0], ret_ty_size) }; + result.return_value = .{ .register = registerAlias(.x0, ret_ty_size) }; } else { - return self.fail("TODO support more return types for ARM backend", .{}); + // The result is returned by reference, not by + // value. This means that x0 (or w0 when pointer + // size is 32 bits) will contain the address of + // where this function should write the result + // into. + result.return_value = .{ .stack_offset = 0 }; } } From dcb236acf432c1d772b2f7d65074735eeae4c4c2 Mon Sep 17 00:00:00 2001 From: joachimschmidt557 Date: Thu, 4 Aug 2022 17:51:15 +0200 Subject: [PATCH 5/8] stage2 AArch64: memcpy support in store and more complete intcast --- src/arch/aarch64/CodeGen.zig | 80 +++++++++++++++++++++++++++++++----- 1 file changed, 70 insertions(+), 10 deletions(-) diff --git a/src/arch/aarch64/CodeGen.zig b/src/arch/aarch64/CodeGen.zig index c5b71657aa..c30b8d97b3 100644 --- a/src/arch/aarch64/CodeGen.zig +++ b/src/arch/aarch64/CodeGen.zig @@ -1029,17 +1029,37 @@ fn airIntCast(self: *Self, inst: Air.Inst.Index) !void { if (self.liveness.isUnused(inst)) return self.finishAir(inst, .dead, .{ ty_op.operand, .none, .none }); - const operand_ty = self.air.typeOf(ty_op.operand); - const operand = try self.resolveInst(ty_op.operand); - const info_a = operand_ty.intInfo(self.target.*); - const info_b = self.air.typeOfIndex(inst).intInfo(self.target.*); - if (info_a.signedness != info_b.signedness) - return self.fail("TODO gen intcast sign safety in semantic analysis", .{}); + const operand = ty_op.operand; + const operand_mcv = try self.resolveInst(operand); + const operand_ty = self.air.typeOf(operand); + const operand_info = operand_ty.intInfo(self.target.*); - if (info_a.bits == info_b.bits) - return self.finishAir(inst, operand, .{ ty_op.operand, .none, .none }); + const dest_ty = self.air.typeOfIndex(inst); + const dest_info = dest_ty.intInfo(self.target.*); - return self.fail("TODO implement intCast for {}", .{self.target.cpu.arch}); + const result: MCValue = result: { + const operand_lock: ?RegisterLock = switch (operand_mcv) { + .register => |reg| self.register_manager.lockRegAssumeUnused(reg), + else => null, + }; + defer if (operand_lock) |lock| self.register_manager.unlockReg(lock); + + if (dest_info.bits > operand_info.bits) { + const dest_mcv = try self.allocRegOrMem(inst, true); + try self.setRegOrMem(self.air.typeOfIndex(inst), dest_mcv, operand_mcv); + break :result dest_mcv; + } else { + if (self.reuseOperand(inst, operand, 0, operand_mcv)) { + break :result operand_mcv; + } else { + const dest_mcv = try self.allocRegOrMem(inst, true); + try self.setRegOrMem(self.air.typeOfIndex(inst), dest_mcv, operand_mcv); + break :result dest_mcv; + } + } + }; + + return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); } fn truncRegister( @@ -1065,6 +1085,8 @@ fn truncRegister( }); }, 32, 64 => { + assert(dest_reg.size() == operand_reg.size()); + _ = try self.addInst(.{ .tag = .mov_register, .data = .{ .rr = .{ @@ -2955,6 +2977,8 @@ fn store(self: *Self, ptr: MCValue, value: MCValue, ptr_ty: Type, value_ty: Type defer if (addr_reg_lock) |reg| self.register_manager.unlockReg(reg); switch (value) { + .dead => unreachable, + .undef => unreachable, .register => |value_reg| { try self.genStrRegister(value_reg, addr_reg, value_ty); }, @@ -2968,7 +2992,41 @@ fn store(self: *Self, ptr: MCValue, value: MCValue, ptr_ty: Type, value_ty: Type try self.genSetReg(value_ty, tmp_reg, value); try self.store(ptr, .{ .register = tmp_reg }, ptr_ty, value_ty); } else { - return self.fail("TODO implement memcpy", .{}); + const regs = try self.register_manager.allocRegs(4, .{ null, null, null, null }, gp); + const regs_locks = self.register_manager.lockRegsAssumeUnused(4, regs); + defer for (regs_locks) |reg| { + self.register_manager.unlockReg(reg); + }; + + const src_reg = addr_reg; + const dst_reg = regs[0]; + const len_reg = regs[1]; + const count_reg = regs[2]; + const tmp_reg = regs[3]; + + switch (value) { + .stack_offset => |off| { + // sub src_reg, fp, #off + try self.genSetReg(ptr_ty, src_reg, .{ .ptr_stack_offset = off }); + }, + .memory => |addr| try self.genSetReg(Type.usize, src_reg, .{ .immediate = @intCast(u32, addr) }), + .stack_argument_offset => |off| { + _ = try self.addInst(.{ + .tag = .ldr_ptr_stack_argument, + .data = .{ .load_store_stack = .{ + .rt = src_reg, + .offset = off, + } }, + }); + }, + else => return self.fail("TODO store {} to register", .{value}), + } + + // mov len, #abi_size + try self.genSetReg(Type.usize, len_reg, .{ .immediate = abi_size }); + + // memcpy(src, dst, len) + try self.genInlineMemcpy(src_reg, dst_reg, len_reg, count_reg, tmp_reg); } }, } @@ -4359,6 +4417,8 @@ fn genSetReg(self: *Self, ty: Type, reg: Register, mcv: MCValue) InnerError!void } }, .register => |src_reg| { + assert(src_reg.size() == reg.size()); + // If the registers are the same, nothing to do. if (src_reg.id() == reg.id()) return; From 8b24c783c5bd417f84beeb2f3736a78c3f595d22 Mon Sep 17 00:00:00 2001 From: joachimschmidt557 Date: Thu, 4 Aug 2022 21:05:11 +0200 Subject: [PATCH 6/8] stage2 AArch64: implement basic integer division --- src/arch/aarch64/CodeGen.zig | 130 ++++++++++++++++++++++++----------- src/arch/aarch64/Emit.zig | 12 ++-- src/arch/aarch64/Mir.zig | 4 ++ src/arch/aarch64/bits.zig | 8 +++ 4 files changed, 109 insertions(+), 45 deletions(-) diff --git a/src/arch/aarch64/CodeGen.zig b/src/arch/aarch64/CodeGen.zig index c30b8d97b3..37c3721709 100644 --- a/src/arch/aarch64/CodeGen.zig +++ b/src/arch/aarch64/CodeGen.zig @@ -561,33 +561,38 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { switch (air_tags[inst]) { // zig fmt: off - .add => try self.airBinOp(inst, .add), - .addwrap => try self.airBinOp(inst, .addwrap), - .sub => try self.airBinOp(inst, .sub), - .subwrap => try self.airBinOp(inst, .subwrap), - .mul => try self.airBinOp(inst, .mul), - .mulwrap => try self.airBinOp(inst, .mulwrap), - .shl => try self.airBinOp(inst, .shl), - .shl_exact => try self.airBinOp(inst, .shl_exact), - .bool_and => try self.airBinOp(inst, .bool_and), - .bool_or => try self.airBinOp(inst, .bool_or), - .bit_and => try self.airBinOp(inst, .bit_and), - .bit_or => try self.airBinOp(inst, .bit_or), - .xor => try self.airBinOp(inst, .xor), - .shr => try self.airBinOp(inst, .shr), - .shr_exact => try self.airBinOp(inst, .shr_exact), + .add => try self.airBinOp(inst, .add), + .addwrap => try self.airBinOp(inst, .addwrap), + .sub => try self.airBinOp(inst, .sub), + .subwrap => try self.airBinOp(inst, .subwrap), + .mul => try self.airBinOp(inst, .mul), + .mulwrap => try self.airBinOp(inst, .mulwrap), + .shl => try self.airBinOp(inst, .shl), + .shl_exact => try self.airBinOp(inst, .shl_exact), + .bool_and => try self.airBinOp(inst, .bool_and), + .bool_or => try self.airBinOp(inst, .bool_or), + .bit_and => try self.airBinOp(inst, .bit_and), + .bit_or => try self.airBinOp(inst, .bit_or), + .xor => try self.airBinOp(inst, .xor), + .shr => try self.airBinOp(inst, .shr), + .shr_exact => try self.airBinOp(inst, .shr_exact), + .div_float => try self.airBinOp(inst, .div_float), + .div_trunc => try self.airBinOp(inst, .div_trunc), + .div_floor => try self.airBinOp(inst, .div_floor), + .div_exact => try self.airBinOp(inst, .div_exact), + .rem => try self.airBinOp(inst, .rem), + .mod => try self.airBinOp(inst, .mod), - .ptr_add => try self.airPtrArithmetic(inst, .ptr_add), - .ptr_sub => try self.airPtrArithmetic(inst, .ptr_sub), + .ptr_add => try self.airPtrArithmetic(inst, .ptr_add), + .ptr_sub => try self.airPtrArithmetic(inst, .ptr_sub), + + .min => try self.airMin(inst), + .max => try self.airMax(inst), .add_sat => try self.airAddSat(inst), .sub_sat => try self.airSubSat(inst), .mul_sat => try self.airMulSat(inst), - .rem => try self.airRem(inst), - .mod => try self.airMod(inst), .shl_sat => try self.airShlSat(inst), - .min => try self.airMin(inst), - .max => try self.airMax(inst), .slice => try self.airSlice(inst), .sqrt, @@ -612,8 +617,6 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { .mul_with_overflow => try self.airMulWithOverflow(inst), .shl_with_overflow => try self.airShlWithOverflow(inst), - .div_float, .div_trunc, .div_floor, .div_exact => try self.airDiv(inst), - .cmp_lt => try self.airCmp(inst, .lt), .cmp_lte => try self.airCmp(inst, .lte), .cmp_eq => try self.airCmp(inst, .eq), @@ -1391,6 +1394,8 @@ fn binOpRegister( .lsl_register, .asr_register, .lsr_register, + .sdiv, + .udiv, => .{ .rrr = .{ .rd = dest_reg, .rn = lhs_reg, @@ -1629,6 +1634,67 @@ fn binOp( else => unreachable, } }, + .div_float => { + switch (lhs_ty.zigTypeTag()) { + .Float => return self.fail("TODO div_float", .{}), + .Vector => return self.fail("TODO div_float on vectors", .{}), + else => unreachable, + } + }, + .div_trunc, .div_floor, .div_exact => { + switch (lhs_ty.zigTypeTag()) { + .Float => return self.fail("TODO div on floats", .{}), + .Vector => return self.fail("TODO div on vectors", .{}), + .Int => { + assert(lhs_ty.eql(rhs_ty, mod)); + const int_info = lhs_ty.intInfo(self.target.*); + if (int_info.bits <= 64) { + switch (int_info.signedness) { + .signed => { + switch (tag) { + .div_trunc, .div_exact => { + // TODO optimize integer division by constants + return try self.binOpRegister(.sdiv, lhs, rhs, lhs_ty, rhs_ty, metadata); + }, + .div_floor => return self.fail("TODO div_floor on signed integers", .{}), + else => unreachable, + } + }, + .unsigned => { + // TODO optimize integer division by constants + return try self.binOpRegister(.udiv, lhs, rhs, lhs_ty, rhs_ty, metadata); + }, + } + } else { + return self.fail("TODO integer division for ints with bits > 64", .{}); + } + }, + else => unreachable, + } + }, + .rem, .mod => { + switch (lhs_ty.zigTypeTag()) { + .Float => return self.fail("TODO rem/mod on floats", .{}), + .Vector => return self.fail("TODO rem/mod on vectors", .{}), + .Int => { + assert(lhs_ty.eql(rhs_ty, mod)); + const int_info = lhs_ty.intInfo(self.target.*); + if (int_info.bits <= 32) { + switch (int_info.signedness) { + .signed => { + return self.fail("TODO rem/mod on signed integers", .{}); + }, + .unsigned => { + return self.fail("TODO rem/mod on unsigned integers", .{}); + }, + } + } else { + return self.fail("TODO rem/mod for integers with bits > 64", .{}); + } + }, + else => unreachable, + } + }, .addwrap, .subwrap, .mulwrap, @@ -2300,24 +2366,6 @@ fn airShlWithOverflow(self: *Self, inst: Air.Inst.Index) !void { return self.finishAir(inst, result, .{ extra.lhs, extra.rhs, .none }); } -fn airDiv(self: *Self, inst: Air.Inst.Index) !void { - const bin_op = self.air.instructions.items(.data)[inst].bin_op; - const result: MCValue = if (self.liveness.isUnused(inst)) .dead else return self.fail("TODO implement div for {}", .{self.target.cpu.arch}); - return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none }); -} - -fn airRem(self: *Self, inst: Air.Inst.Index) !void { - const bin_op = self.air.instructions.items(.data)[inst].bin_op; - const result: MCValue = if (self.liveness.isUnused(inst)) .dead else return self.fail("TODO implement rem for {}", .{self.target.cpu.arch}); - return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none }); -} - -fn airMod(self: *Self, inst: Air.Inst.Index) !void { - const bin_op = self.air.instructions.items(.data)[inst].bin_op; - const result: MCValue = if (self.liveness.isUnused(inst)) .dead else return self.fail("TODO implement mod for {}", .{self.target.cpu.arch}); - return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none }); -} - fn airShlSat(self: *Self, inst: Air.Inst.Index) !void { const bin_op = self.air.instructions.items(.data)[inst].bin_op; const result: MCValue = if (self.liveness.isUnused(inst)) .dead else return self.fail("TODO implement shl_sat for {}", .{self.target.cpu.arch}); diff --git a/src/arch/aarch64/Emit.zig b/src/arch/aarch64/Emit.zig index 1ca198ccd8..ba85730276 100644 --- a/src/arch/aarch64/Emit.zig +++ b/src/arch/aarch64/Emit.zig @@ -91,9 +91,11 @@ pub fn emitMir( .sub_immediate => try emit.mirAddSubtractImmediate(inst), .subs_immediate => try emit.mirAddSubtractImmediate(inst), - .asr_register => try emit.mirShiftRegister(inst), - .lsl_register => try emit.mirShiftRegister(inst), - .lsr_register => try emit.mirShiftRegister(inst), + .asr_register => try emit.mirDataProcessing2Source(inst), + .lsl_register => try emit.mirDataProcessing2Source(inst), + .lsr_register => try emit.mirDataProcessing2Source(inst), + .sdiv => try emit.mirDataProcessing2Source(inst), + .udiv => try emit.mirDataProcessing2Source(inst), .asr_immediate => try emit.mirShiftImmediate(inst), .lsl_immediate => try emit.mirShiftImmediate(inst), @@ -520,7 +522,7 @@ fn mirAddSubtractImmediate(emit: *Emit, inst: Mir.Inst.Index) !void { } } -fn mirShiftRegister(emit: *Emit, inst: Mir.Inst.Index) !void { +fn mirDataProcessing2Source(emit: *Emit, inst: Mir.Inst.Index) !void { const tag = emit.mir.instructions.items(.tag)[inst]; const rrr = emit.mir.instructions.items(.data)[inst].rrr; const rd = rrr.rd; @@ -531,6 +533,8 @@ fn mirShiftRegister(emit: *Emit, inst: Mir.Inst.Index) !void { .asr_register => try emit.writeInstruction(Instruction.asrRegister(rd, rn, rm)), .lsl_register => try emit.writeInstruction(Instruction.lslRegister(rd, rn, rm)), .lsr_register => try emit.writeInstruction(Instruction.lsrRegister(rd, rn, rm)), + .sdiv => try emit.writeInstruction(Instruction.sdiv(rd, rn, rm)), + .udiv => try emit.writeInstruction(Instruction.udiv(rd, rn, rm)), else => unreachable, } } diff --git a/src/arch/aarch64/Mir.zig b/src/arch/aarch64/Mir.zig index c4d6af9db4..d1ba38a779 100644 --- a/src/arch/aarch64/Mir.zig +++ b/src/arch/aarch64/Mir.zig @@ -164,6 +164,8 @@ pub const Inst = struct { ret, /// Signed bitfield extract sbfx, + /// Signed divide + sdiv, /// Signed multiply high smulh, /// Signed multiply long @@ -212,6 +214,8 @@ pub const Inst = struct { tst_immediate, /// Unsigned bitfield extract ubfx, + /// Unsigned divide + udiv, /// Unsigned multiply high umulh, /// Unsigned multiply long diff --git a/src/arch/aarch64/bits.zig b/src/arch/aarch64/bits.zig index a3f5fbac51..ad45661b70 100644 --- a/src/arch/aarch64/bits.zig +++ b/src/arch/aarch64/bits.zig @@ -1698,6 +1698,14 @@ pub const Instruction = union(enum) { // Data processing (2 source) + pub fn udiv(rd: Register, rn: Register, rm: Register) Instruction { + return dataProcessing2Source(0b0, 0b000010, rd, rn, rm); + } + + pub fn sdiv(rd: Register, rn: Register, rm: Register) Instruction { + return dataProcessing2Source(0b0, 0b000011, rd, rn, rm); + } + pub fn lslv(rd: Register, rn: Register, rm: Register) Instruction { return dataProcessing2Source(0b0, 0b001000, rd, rn, rm); } From 508b90fcfa4749b50618f947e2c3573edcf29713 Mon Sep 17 00:00:00 2001 From: joachimschmidt557 Date: Fri, 5 Aug 2022 15:22:53 +0200 Subject: [PATCH 7/8] stage2 AArch64: implement basic integer rem/mod --- src/arch/aarch64/CodeGen.zig | 85 ++++++++++++++++++++++++++++++------ src/arch/aarch64/Emit.zig | 30 ++++++++++--- src/arch/aarch64/Mir.zig | 11 +++++ 3 files changed, 107 insertions(+), 19 deletions(-) diff --git a/src/arch/aarch64/CodeGen.zig b/src/arch/aarch64/CodeGen.zig index 37c3721709..4a08a91976 100644 --- a/src/arch/aarch64/CodeGen.zig +++ b/src/arch/aarch64/CodeGen.zig @@ -1038,6 +1038,7 @@ fn airIntCast(self: *Self, inst: Air.Inst.Index) !void { const operand_info = operand_ty.intInfo(self.target.*); const dest_ty = self.air.typeOfIndex(inst); + const dest_abi_size = dest_ty.abiSize(self.target.*); const dest_info = dest_ty.intInfo(self.target.*); const result: MCValue = result: { @@ -1047,16 +1048,21 @@ fn airIntCast(self: *Self, inst: Air.Inst.Index) !void { }; defer if (operand_lock) |lock| self.register_manager.unlockReg(lock); + const truncated: MCValue = switch (operand_mcv) { + .register => |r| MCValue{ .register = registerAlias(r, dest_abi_size) }, + else => operand_mcv, + }; + if (dest_info.bits > operand_info.bits) { const dest_mcv = try self.allocRegOrMem(inst, true); - try self.setRegOrMem(self.air.typeOfIndex(inst), dest_mcv, operand_mcv); + try self.setRegOrMem(self.air.typeOfIndex(inst), dest_mcv, truncated); break :result dest_mcv; } else { - if (self.reuseOperand(inst, operand, 0, operand_mcv)) { - break :result operand_mcv; + if (self.reuseOperand(inst, operand, 0, truncated)) { + break :result truncated; } else { const dest_mcv = try self.allocRegOrMem(inst, true); - try self.setRegOrMem(self.air.typeOfIndex(inst), dest_mcv, operand_mcv); + try self.setRegOrMem(self.air.typeOfIndex(inst), dest_mcv, truncated); break :result dest_mcv; } } @@ -1145,7 +1151,7 @@ fn trunc( return MCValue{ .register = dest_reg }; } else { - return self.fail("TODO: truncate to ints > 32 bits", .{}); + return self.fail("TODO: truncate to ints > 64 bits", .{}); } } @@ -1679,14 +1685,67 @@ fn binOp( .Int => { assert(lhs_ty.eql(rhs_ty, mod)); const int_info = lhs_ty.intInfo(self.target.*); - if (int_info.bits <= 32) { - switch (int_info.signedness) { - .signed => { - return self.fail("TODO rem/mod on signed integers", .{}); - }, - .unsigned => { - return self.fail("TODO rem/mod on unsigned integers", .{}); - }, + if (int_info.bits <= 64) { + if (int_info.signedness == .signed and tag == .mod) { + return self.fail("TODO mod on signed integers", .{}); + } else { + const lhs_is_register = lhs == .register; + const rhs_is_register = rhs == .register; + + const lhs_lock: ?RegisterLock = if (lhs_is_register) + self.register_manager.lockReg(lhs.register) + else + null; + defer if (lhs_lock) |reg| self.register_manager.unlockReg(reg); + + const lhs_reg = if (lhs_is_register) + lhs.register + else + try self.register_manager.allocReg(null, gp); + const new_lhs_lock = self.register_manager.lockReg(lhs_reg); + defer if (new_lhs_lock) |reg| self.register_manager.unlockReg(reg); + + const rhs_reg = if (rhs_is_register) + rhs.register + else + try self.register_manager.allocReg(null, gp); + const new_rhs_lock = self.register_manager.lockReg(rhs_reg); + defer if (new_rhs_lock) |reg| self.register_manager.unlockReg(reg); + + const dest_regs = try self.register_manager.allocRegs(2, .{ null, null }, gp); + const dest_regs_locks = self.register_manager.lockRegsAssumeUnused(2, dest_regs); + defer for (dest_regs_locks) |reg| { + self.register_manager.unlockReg(reg); + }; + const quotient_reg = dest_regs[0]; + const remainder_reg = dest_regs[1]; + + if (!lhs_is_register) try self.genSetReg(lhs_ty, lhs_reg, lhs); + if (!rhs_is_register) try self.genSetReg(rhs_ty, rhs_reg, rhs); + + _ = try self.addInst(.{ + .tag = switch (int_info.signedness) { + .signed => .sdiv, + .unsigned => .udiv, + }, + .data = .{ .rrr = .{ + .rd = quotient_reg, + .rn = lhs_reg, + .rm = rhs_reg, + } }, + }); + + _ = try self.addInst(.{ + .tag = .msub, + .data = .{ .rrrr = .{ + .rd = remainder_reg, + .rn = quotient_reg, + .rm = rhs_reg, + .ra = lhs_reg, + } }, + }); + + return MCValue{ .register = remainder_reg }; } } else { return self.fail("TODO rem/mod for integers with bits > 64", .{}); diff --git a/src/arch/aarch64/Emit.zig b/src/arch/aarch64/Emit.zig index ba85730276..8abc083a1e 100644 --- a/src/arch/aarch64/Emit.zig +++ b/src/arch/aarch64/Emit.zig @@ -190,6 +190,7 @@ pub fn emitMir( .movk => try emit.mirMoveWideImmediate(inst), .movz => try emit.mirMoveWideImmediate(inst), + .msub => try emit.mirDataProcessing3Source(inst), .mul => try emit.mirDataProcessing3Source(inst), .smulh => try emit.mirDataProcessing3Source(inst), .smull => try emit.mirDataProcessing3Source(inst), @@ -1140,14 +1141,31 @@ fn mirMoveWideImmediate(emit: *Emit, inst: Mir.Inst.Index) !void { fn mirDataProcessing3Source(emit: *Emit, inst: Mir.Inst.Index) !void { const tag = emit.mir.instructions.items(.tag)[inst]; - const rrr = emit.mir.instructions.items(.data)[inst].rrr; switch (tag) { - .mul => try emit.writeInstruction(Instruction.mul(rrr.rd, rrr.rn, rrr.rm)), - .smulh => try emit.writeInstruction(Instruction.smulh(rrr.rd, rrr.rn, rrr.rm)), - .smull => try emit.writeInstruction(Instruction.smull(rrr.rd, rrr.rn, rrr.rm)), - .umulh => try emit.writeInstruction(Instruction.umulh(rrr.rd, rrr.rn, rrr.rm)), - .umull => try emit.writeInstruction(Instruction.umull(rrr.rd, rrr.rn, rrr.rm)), + .mul, + .smulh, + .smull, + .umulh, + .umull, + => { + const rrr = emit.mir.instructions.items(.data)[inst].rrr; + switch (tag) { + .mul => try emit.writeInstruction(Instruction.mul(rrr.rd, rrr.rn, rrr.rm)), + .smulh => try emit.writeInstruction(Instruction.smulh(rrr.rd, rrr.rn, rrr.rm)), + .smull => try emit.writeInstruction(Instruction.smull(rrr.rd, rrr.rn, rrr.rm)), + .umulh => try emit.writeInstruction(Instruction.umulh(rrr.rd, rrr.rn, rrr.rm)), + .umull => try emit.writeInstruction(Instruction.umull(rrr.rd, rrr.rn, rrr.rm)), + else => unreachable, + } + }, + .msub => { + const rrrr = emit.mir.instructions.items(.data)[inst].rrrr; + switch (tag) { + .msub => try emit.writeInstruction(Instruction.msub(rrrr.rd, rrrr.rn, rrrr.rm, rrrr.ra)), + else => unreachable, + } + }, else => unreachable, } } diff --git a/src/arch/aarch64/Mir.zig b/src/arch/aarch64/Mir.zig index d1ba38a779..00537e0e38 100644 --- a/src/arch/aarch64/Mir.zig +++ b/src/arch/aarch64/Mir.zig @@ -148,6 +148,8 @@ pub const Inst = struct { movk, /// Move wide with zero movz, + /// Multiply-subtract + msub, /// Multiply mul, /// Bitwise NOT @@ -446,6 +448,15 @@ pub const Inst = struct { rn: Register, offset: bits.Instruction.LoadStorePairOffset, }, + /// Four registers + /// + /// Used by e.g. msub + rrrr: struct { + rd: Register, + rn: Register, + rm: Register, + ra: Register, + }, /// Debug info: line and column /// /// Used by e.g. dbg_line From f46c80b267396d02b5008bf8c426e0eb886a05d2 Mon Sep 17 00:00:00 2001 From: joachimschmidt557 Date: Fri, 5 Aug 2022 19:59:26 +0200 Subject: [PATCH 8/8] stage2 AArch64: improve correctness of register aliases Also implements ptr_elem_ptr --- src/arch/aarch64/CodeGen.zig | 83 ++++++++++++++++++++++++++++++------ 1 file changed, 69 insertions(+), 14 deletions(-) diff --git a/src/arch/aarch64/CodeGen.zig b/src/arch/aarch64/CodeGen.zig index 4a08a91976..ba53c2e757 100644 --- a/src/arch/aarch64/CodeGen.zig +++ b/src/arch/aarch64/CodeGen.zig @@ -1314,6 +1314,9 @@ fn binOpRegister( const lhs_is_register = lhs == .register; const rhs_is_register = rhs == .register; + if (lhs_is_register) assert(lhs.register == registerAlias(lhs.register, lhs_ty.abiSize(self.target.*))); + if (rhs_is_register) assert(rhs.register == registerAlias(rhs.register, rhs_ty.abiSize(self.target.*))); + const lhs_lock: ?RegisterLock = if (lhs_is_register) self.register_manager.lockReg(lhs.register) else @@ -1343,13 +1346,22 @@ fn binOpRegister( const new_lhs_lock = self.register_manager.lockReg(lhs_reg); defer if (new_lhs_lock) |reg| self.register_manager.unlockReg(reg); - const rhs_reg = if (rhs_is_register) rhs.register else blk: { + const rhs_reg = if (rhs_is_register) + // lhs is almost always equal to rhs, except in shifts. In + // order to guarantee that registers will have equal sizes, we + // use the register alias of rhs corresponding to the size of + // lhs. + registerAlias(rhs.register, lhs_ty.abiSize(self.target.*)) + else blk: { const track_inst: ?Air.Inst.Index = if (metadata) |md| inst: { break :inst Air.refToIndex(md.rhs).?; } else null; const raw_reg = try self.register_manager.allocReg(track_inst, gp); - const reg = registerAlias(raw_reg, rhs_ty.abiAlignment(self.target.*)); + + // Here, we deliberately use lhs as lhs and rhs may differ in + // the case of shifts. See comment above. + const reg = registerAlias(raw_reg, lhs_ty.abiSize(self.target.*)); if (track_inst) |inst| branch.inst_table.putAssumeCapacity(inst, .{ .register = reg }); @@ -1458,6 +1470,8 @@ fn binOpImmediate( ) !MCValue { const lhs_is_register = lhs == .register; + if (lhs_is_register) assert(lhs.register == registerAlias(lhs.register, lhs_ty.abiSize(self.target.*))); + const lhs_lock: ?RegisterLock = if (lhs_is_register) self.register_manager.lockReg(lhs.register) else @@ -1698,21 +1712,52 @@ fn binOp( null; defer if (lhs_lock) |reg| self.register_manager.unlockReg(reg); - const lhs_reg = if (lhs_is_register) - lhs.register + const rhs_lock: ?RegisterLock = if (rhs_is_register) + self.register_manager.lockReg(rhs.register) else - try self.register_manager.allocReg(null, gp); + null; + defer if (rhs_lock) |reg| self.register_manager.unlockReg(reg); + + const branch = &self.branch_stack.items[self.branch_stack.items.len - 1]; + + const lhs_reg = if (lhs_is_register) lhs.register else blk: { + const track_inst: ?Air.Inst.Index = if (metadata) |md| inst: { + break :inst Air.refToIndex(md.lhs).?; + } else null; + + const raw_reg = try self.register_manager.allocReg(track_inst, gp); + const reg = registerAlias(raw_reg, lhs_ty.abiSize(self.target.*)); + + if (track_inst) |inst| branch.inst_table.putAssumeCapacity(inst, .{ .register = reg }); + + break :blk reg; + }; const new_lhs_lock = self.register_manager.lockReg(lhs_reg); defer if (new_lhs_lock) |reg| self.register_manager.unlockReg(reg); - const rhs_reg = if (rhs_is_register) - rhs.register - else - try self.register_manager.allocReg(null, gp); + const rhs_reg = if (rhs_is_register) rhs.register else blk: { + const track_inst: ?Air.Inst.Index = if (metadata) |md| inst: { + break :inst Air.refToIndex(md.rhs).?; + } else null; + + const raw_reg = try self.register_manager.allocReg(track_inst, gp); + const reg = registerAlias(raw_reg, rhs_ty.abiAlignment(self.target.*)); + + if (track_inst) |inst| branch.inst_table.putAssumeCapacity(inst, .{ .register = reg }); + + break :blk reg; + }; const new_rhs_lock = self.register_manager.lockReg(rhs_reg); defer if (new_rhs_lock) |reg| self.register_manager.unlockReg(reg); - const dest_regs = try self.register_manager.allocRegs(2, .{ null, null }, gp); + const dest_regs: [2]Register = blk: { + const raw_regs = try self.register_manager.allocRegs(2, .{ null, null }, gp); + const abi_size = lhs_ty.abiSize(self.target.*); + break :blk .{ + registerAlias(raw_regs[0], abi_size), + registerAlias(raw_regs[1], abi_size), + }; + }; const dest_regs_locks = self.register_manager.lockRegsAssumeUnused(2, dest_regs); defer for (dest_regs_locks) |reg| { self.register_manager.unlockReg(reg); @@ -2037,7 +2082,7 @@ fn airOverflow(self: *Self, inst: Air.Inst.Index) !void { try self.truncRegister(dest_reg, truncated_reg, int_info.signedness, int_info.bits); // cmp dest, truncated - _ = try self.binOp(.cmp_eq, dest, .{ .register = truncated_reg }, Type.usize, Type.usize, null); + _ = try self.binOp(.cmp_eq, dest, .{ .register = truncated_reg }, lhs_ty, lhs_ty, null); try self.genSetStack(lhs_ty, stack_offset, .{ .register = truncated_reg }); try self.genSetStack(Type.initTag(.u1), stack_offset - overflow_bit_offset, .{ .condition_flags = .ne }); @@ -2753,7 +2798,15 @@ fn airPtrElemVal(self: *Self, inst: Air.Inst.Index) !void { fn airPtrElemPtr(self: *Self, inst: Air.Inst.Index) !void { const ty_pl = self.air.instructions.items(.data)[inst].ty_pl; const extra = self.air.extraData(Air.Bin, ty_pl.payload).data; - const result: MCValue = if (self.liveness.isUnused(inst)) .dead else return self.fail("TODO implement ptr_elem_ptr for {}", .{self.target.cpu.arch}); + const result: MCValue = if (self.liveness.isUnused(inst)) .dead else result: { + const ptr_mcv = try self.resolveInst(extra.lhs); + const index_mcv = try self.resolveInst(extra.rhs); + + const ptr_ty = self.air.typeOf(extra.lhs); + + const addr = try self.binOp(.ptr_add, ptr_mcv, index_mcv, ptr_ty, Type.usize, null); + break :result addr; + }; return self.finishAir(inst, result, .{ extra.lhs, extra.rhs, .none }); } @@ -3219,6 +3272,7 @@ fn airStructFieldVal(self: *Self, inst: Air.Inst.Index) !void { const result: MCValue = if (self.liveness.isUnused(inst)) .dead else result: { const mcv = try self.resolveInst(operand); const struct_ty = self.air.typeOf(operand); + const struct_field_ty = struct_ty.structFieldType(index); const struct_field_offset = @intCast(u32, struct_ty.structFieldOffset(index, self.target.*)); switch (mcv) { @@ -3250,8 +3304,9 @@ fn airStructFieldVal(self: *Self, inst: Air.Inst.Index) !void { break :result field; } else { // Copy to new register - const dest_reg = try self.register_manager.allocReg(null, gp); - try self.genSetReg(struct_ty.structFieldType(index), dest_reg, field); + const raw_dest_reg = try self.register_manager.allocReg(null, gp); + const dest_reg = registerAlias(raw_dest_reg, struct_field_ty.abiSize(self.target.*)); + try self.genSetReg(struct_field_ty, dest_reg, field); break :result MCValue{ .register = dest_reg }; }