From 117f9f69e79a628347c9fdb22e7ee8618de143c5 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Tue, 7 Jun 2022 11:16:08 +0200 Subject: [PATCH 1/7] x64: simplify saving registers to stack in prologue --- src/arch/x86_64/CodeGen.zig | 144 ++++++++++++++++++++---------------- src/arch/x86_64/Emit.zig | 45 ++++++----- src/arch/x86_64/Mir.zig | 67 +++++++++++++---- src/register_manager.zig | 3 +- 4 files changed, 154 insertions(+), 105 deletions(-) diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index 6211a8cae8..0d4aae2688 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -409,13 +409,11 @@ fn gen(self: *Self) InnerError!void { // The address where to store the return value for the caller is in `.rdi` // register which the callee is free to clobber. Therefore, we purposely // spill it to stack immediately. - const ptr_ty = Type.usize; - const abi_size = @intCast(u32, ptr_ty.abiSize(self.target.*)); - const abi_align = ptr_ty.abiAlignment(self.target.*); - const stack_offset = mem.alignForwardGeneric(u32, self.next_stack_offset + abi_size, abi_align); + const stack_offset = mem.alignForwardGeneric(u32, self.next_stack_offset + 8, 8); self.next_stack_offset = stack_offset; self.max_end_stack = @maximum(self.max_end_stack, self.next_stack_offset); - try self.genSetStack(ptr_ty, @intCast(i32, stack_offset), MCValue{ .register = .rdi }, .{}); + + try self.genSetStack(Type.usize, @intCast(i32, stack_offset), MCValue{ .register = .rdi }, .{}); self.ret_mcv = MCValue{ .stack_offset = @intCast(i32, stack_offset) }; log.debug("gen: spilling .rdi to stack at offset {}", .{stack_offset}); } @@ -426,11 +424,11 @@ fn gen(self: *Self) InnerError!void { .data = undefined, }); - // push the callee_preserved_regs that were used - const backpatch_push_callee_preserved_regs_i = try self.addInst(.{ - .tag = .push_regs_from_callee_preserved_regs, - .ops = Mir.Inst.Ops.encode(.{ .reg1 = .rbp }), - .data = .{ .payload = undefined }, // to be backpatched + // Push callee-preserved regs that were used actually in use. + const backpatch_push_callee_preserved_regs = try self.addInst(.{ + .tag = .nop, + .ops = undefined, + .data = undefined, }); try self.genBody(self.air.getMainBody()); @@ -446,31 +444,21 @@ fn gen(self: *Self) InnerError!void { self.mir_instructions.items(.data)[jmp_reloc].inst = @intCast(u32, self.mir_instructions.len); } - // calculate the data for callee_preserved_regs to be pushed and popped - const callee_preserved_regs_payload = blk: { - var data = Mir.RegsToPushOrPop{ - .regs = 0, - .disp = mem.alignForwardGeneric(u32, self.next_stack_offset, 8), - }; - var disp = data.disp + 8; - inline for (callee_preserved_regs) |reg, i| { - if (self.register_manager.isRegAllocated(reg)) { - data.regs |= 1 << @intCast(u5, i); - self.max_end_stack += 8; - disp += 8; - } + // Create list of registers to save in the prologue. + // TODO handle register classes + var reg_list: Mir.RegisterList(Register, &callee_preserved_regs) = .{}; + inline for (callee_preserved_regs) |reg| { + if (self.register_manager.isRegAllocated(reg)) { + reg_list.push(reg); } - break :blk try self.addExtra(data); - }; + } + const saved_regs_stack_space: u32 = reg_list.count() * 8; - const data = self.mir_instructions.items(.data); - // backpatch the push instruction - data[backpatch_push_callee_preserved_regs_i].payload = callee_preserved_regs_payload; - // pop the callee_preserved_regs - _ = try self.addInst(.{ - .tag = .pop_regs_from_callee_preserved_regs, - .ops = Mir.Inst.Ops.encode(.{ .reg1 = .rbp }), - .data = .{ .payload = callee_preserved_regs_payload }, + // Pop saved callee-preserved regs. + const backpatch_pop_callee_preserved_regs = try self.addInst(.{ + .tag = .nop, + .ops = undefined, + .data = undefined, }); _ = try self.addInst(.{ @@ -502,9 +490,11 @@ fn gen(self: *Self) InnerError!void { if (self.max_end_stack > math.maxInt(i32)) { return self.failSymbol("too much stack used in call parameters", .{}); } - // TODO we should reuse this mechanism to align the stack when calling any function even if - // we do not pass any args on the stack BUT we still push regs to stack with `push` inst. - const aligned_stack_end = @intCast(u32, mem.alignForward(self.max_end_stack, self.stack_align)); + + const aligned_stack_end = @intCast( + u32, + mem.alignForward(self.max_end_stack + saved_regs_stack_space, self.stack_align), + ); if (aligned_stack_end > 0) { self.mir_instructions.set(backpatch_stack_sub, .{ .tag = .sub, @@ -516,6 +506,21 @@ fn gen(self: *Self) InnerError!void { .ops = Mir.Inst.Ops.encode(.{ .reg1 = .rsp }), .data = .{ .imm = aligned_stack_end }, }); + + const save_reg_list = try self.addExtra(Mir.SaveRegisterList{ + .register_list = reg_list.asInt(), + .stack_end = aligned_stack_end, + }); + self.mir_instructions.set(backpatch_push_callee_preserved_regs, .{ + .tag = .push_regs, + .ops = Mir.Inst.Ops.encode(.{ .reg1 = .rbp }), + .data = .{ .payload = save_reg_list }, + }); + self.mir_instructions.set(backpatch_pop_callee_preserved_regs, .{ + .tag = .pop_regs, + .ops = Mir.Inst.Ops.encode(.{ .reg1 = .rbp }), + .data = .{ .payload = save_reg_list }, + }); } } else { _ = try self.addInst(.{ @@ -907,6 +912,39 @@ fn allocRegOrMem(self: *Self, inst: Air.Inst.Index, reg_ok: bool) !MCValue { return MCValue{ .stack_offset = @intCast(i32, stack_offset) }; } +const State = struct { + next_stack_offset: u32, + registers: abi.RegisterManager.TrackedRegisters, + free_registers: abi.RegisterManager.RegisterBitSet, + eflags_inst: ?Air.Inst.Index, + stack: std.AutoHashMapUnmanaged(u32, StackAllocation), + + fn deinit(state: *State, gpa: Allocator) void { + state.stack.deinit(gpa); + } +}; + +fn captureState(self: *Self) !State { + return State{ + .next_stack_offset = self.next_stack_offset, + .registers = self.register_manager.registers, + .free_registers = self.register_manager.free_registers, + .eflags_inst = self.eflags_inst, + .stack = try self.stack.clone(self.gpa), + }; +} + +fn revertState(self: *Self, state: State) void { + self.register_manager.registers = state.registers; + self.eflags_inst = state.eflags_inst; + + self.stack.deinit(self.gpa); + self.stack = state.stack; + + self.next_stack_offset = state.next_stack_offset; + self.register_manager.free_registers = state.free_registers; +} + pub fn spillInstruction(self: *Self, reg: Register, inst: Air.Inst.Index) !void { const stack_mcv = try self.allocRegOrMem(inst, false); log.debug("spilling {d} to stack mcv {any}", .{ inst, stack_mcv }); @@ -4503,12 +4541,7 @@ fn airCondBr(self: *Self, inst: Air.Inst.Index) !void { } // Capture the state of register and stack allocation state so that we can revert to it. - const parent_next_stack_offset = self.next_stack_offset; - const parent_free_registers = self.register_manager.free_registers; - const parent_eflags_inst = self.eflags_inst; - var parent_stack = try self.stack.clone(self.gpa); - defer parent_stack.deinit(self.gpa); - const parent_registers = self.register_manager.registers; + const saved_state = try self.captureState(); try self.branch_stack.append(.{}); errdefer { @@ -4526,17 +4559,10 @@ fn airCondBr(self: *Self, inst: Air.Inst.Index) !void { var saved_then_branch = self.branch_stack.pop(); defer saved_then_branch.deinit(self.gpa); - self.register_manager.registers = parent_registers; - self.eflags_inst = parent_eflags_inst; - - self.stack.deinit(self.gpa); - self.stack = parent_stack; - parent_stack = .{}; - - self.next_stack_offset = parent_next_stack_offset; - self.register_manager.free_registers = parent_free_registers; + self.revertState(saved_state); try self.performReloc(reloc); + const else_branch = self.branch_stack.addOneAssumeCapacity(); else_branch.* = .{}; @@ -5021,12 +5047,7 @@ fn airSwitch(self: *Self, inst: Air.Inst.Index) !void { } // Capture the state of register and stack allocation state so that we can revert to it. - const parent_next_stack_offset = self.next_stack_offset; - const parent_free_registers = self.register_manager.free_registers; - const parent_eflags_inst = self.eflags_inst; - var parent_stack = try self.stack.clone(self.gpa); - defer parent_stack.deinit(self.gpa); - const parent_registers = self.register_manager.registers; + const saved_state = try self.captureState(); try self.branch_stack.append(.{}); errdefer { @@ -5044,14 +5065,7 @@ fn airSwitch(self: *Self, inst: Air.Inst.Index) !void { var saved_case_branch = self.branch_stack.pop(); defer saved_case_branch.deinit(self.gpa); - self.register_manager.registers = parent_registers; - self.eflags_inst = parent_eflags_inst; - self.stack.deinit(self.gpa); - self.stack = parent_stack; - parent_stack = .{}; - - self.next_stack_offset = parent_next_stack_offset; - self.register_manager.free_registers = parent_free_registers; + self.revertState(saved_state); for (relocs) |reloc| { try self.performReloc(reloc); diff --git a/src/arch/x86_64/Emit.zig b/src/arch/x86_64/Emit.zig index 654775cc21..cc5b54fb55 100644 --- a/src/arch/x86_64/Emit.zig +++ b/src/arch/x86_64/Emit.zig @@ -169,7 +169,7 @@ pub fn lowerMir(emit: *Emit) InnerError!void { .@"test" => try emit.mirTest(inst), .interrupt => try emit.mirInterrupt(inst), - .nop => try emit.mirNop(), + .nop => {}, // just skip it // SSE instructions .mov_f64_sse => try emit.mirMovFloatSse(.movsd, inst), @@ -198,8 +198,8 @@ pub fn lowerMir(emit: *Emit) InnerError!void { .dbg_prologue_end => try emit.mirDbgPrologueEnd(inst), .dbg_epilogue_begin => try emit.mirDbgEpilogueBegin(inst), - .push_regs_from_callee_preserved_regs => try emit.mirPushPopRegsFromCalleePreservedRegs(.push, inst), - .pop_regs_from_callee_preserved_regs => try emit.mirPushPopRegsFromCalleePreservedRegs(.pop, inst), + .push_regs => try emit.mirPushPopRegisterList(.push, inst), + .pop_regs => try emit.mirPushPopRegisterList(.pop, inst), else => { return emit.fail("Implement MIR->Emit lowering for x86_64 for pseudo-inst: {s}", .{tag}); @@ -246,10 +246,6 @@ fn mirInterrupt(emit: *Emit, inst: Mir.Inst.Index) InnerError!void { } } -fn mirNop(emit: *Emit) InnerError!void { - return lowerToZoEnc(.nop, emit.code); -} - fn mirSyscall(emit: *Emit) InnerError!void { return lowerToZoEnc(.syscall, emit.code); } @@ -283,26 +279,27 @@ fn mirPushPop(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!void { } } -fn mirPushPopRegsFromCalleePreservedRegs(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!void { +fn mirPushPopRegisterList(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!void { const ops = emit.mir.instructions.items(.ops)[inst].decode(); const payload = emit.mir.instructions.items(.data)[inst].payload; - const data = emit.mir.extraData(Mir.RegsToPushOrPop, payload).data; - const regs = data.regs; - var disp: u32 = data.disp + 8; - for (abi.callee_preserved_regs) |reg, i| { - if ((regs >> @intCast(u5, i)) & 1 == 0) continue; - if (tag == .push) { - try lowerToMrEnc(.mov, RegisterOrMemory.mem(.qword_ptr, .{ - .disp = @bitCast(u32, -@intCast(i32, disp)), - .base = ops.reg1, - }), reg.to64(), emit.code); - } else { - try lowerToRmEnc(.mov, reg.to64(), RegisterOrMemory.mem(.qword_ptr, .{ - .disp = @bitCast(u32, -@intCast(i32, disp)), - .base = ops.reg1, - }), emit.code); + const save_reg_list = emit.mir.extraData(Mir.SaveRegisterList, payload).data; + const reg_list = Mir.RegisterList(Register, &abi.callee_preserved_regs).fromInt(save_reg_list.register_list); + var disp: i32 = -@intCast(i32, save_reg_list.stack_end); + inline for (abi.callee_preserved_regs) |reg| { + if (reg_list.isSet(reg)) { + switch (tag) { + .push => try lowerToMrEnc(.mov, RegisterOrMemory.mem(.qword_ptr, .{ + .disp = @bitCast(u32, disp), + .base = ops.reg1, + }), reg, emit.code), + .pop => try lowerToRmEnc(.mov, reg, RegisterOrMemory.mem(.qword_ptr, .{ + .disp = @bitCast(u32, disp), + .base = ops.reg1, + }), emit.code), + else => unreachable, + } + disp += 8; } - disp += 8; } } diff --git a/src/arch/x86_64/Mir.zig b/src/arch/x86_64/Mir.zig index 0918d67f3c..74b0ca0d12 100644 --- a/src/arch/x86_64/Mir.zig +++ b/src/arch/x86_64/Mir.zig @@ -14,6 +14,7 @@ const assert = std.debug.assert; const bits = @import("bits.zig"); const Air = @import("../../Air.zig"); const CodeGen = @import("CodeGen.zig"); +const IntegerBitSet = std.bit_set.IntegerBitSet; const Register = bits.Register; instructions: std.MultiArrayList(Inst).Slice, @@ -379,19 +380,13 @@ pub const Inst = struct { /// update debug line dbg_line, - /// push registers from the callee_preserved_regs - /// data is the bitfield of which regs to push - /// for example on x86_64, the callee_preserved_regs are [_]Register{ .rcx, .rsi, .rdi, .r8, .r9, .r10, .r11 }; }; - /// so to push rcx and r8 one would make data 0b00000000_00000000_00000000_00001001 (the first and fourth bits are set) - /// ops is unused - push_regs_from_callee_preserved_regs, + /// push registers + /// Uses `payload` field with `SaveRegisterList` as payload. + push_regs, - /// pop registers from the callee_preserved_regs - /// data is the bitfield of which regs to pop - /// for example on x86_64, the callee_preserved_regs are [_]Register{ .rcx, .rsi, .rdi, .r8, .r9, .r10, .r11 }; }; - /// so to pop rcx and r8 one would make data 0b00000000_00000000_00000000_00001001 (the first and fourth bits are set) - /// ops is unused - pop_regs_from_callee_preserved_regs, + /// pop registers + /// Uses `payload` field with `SaveRegisterList` as payload. + pop_regs, }; /// The position of an MIR instruction within the `Mir` instructions array. pub const Index = u32; @@ -471,9 +466,51 @@ pub const Inst = struct { } }; -pub const RegsToPushOrPop = struct { - regs: u32, - disp: u32, +pub fn RegisterList(comptime Reg: type, comptime registers: []const Reg) type { + assert(registers.len <= @bitSizeOf(u32)); + return struct { + bitset: RegBitSet = RegBitSet.initEmpty(), + + const RegBitSet = IntegerBitSet(registers.len); + const Self = @This(); + + fn getIndexForReg(reg: Reg) RegBitSet.MaskInt { + inline for (registers) |cpreg, i| { + if (reg.id() == cpreg.id()) return i; + } + unreachable; // register not in input register list! + } + + pub fn push(self: *Self, reg: Reg) void { + const index = getIndexForReg(reg); + self.bitset.set(index); + } + + pub fn isSet(self: Self, reg: Reg) bool { + const index = getIndexForReg(reg); + return self.bitset.isSet(index); + } + + pub fn asInt(self: Self) u32 { + return self.bitset.mask; + } + + pub fn fromInt(mask: u32) Self { + return .{ + .bitset = RegBitSet{ .mask = @intCast(RegBitSet.MaskInt, mask) }, + }; + } + + pub fn count(self: Self) u32 { + return @intCast(u32, self.bitset.count()); + } + }; +} + +pub const SaveRegisterList = struct { + /// Use `RegisterList` to populate. + register_list: u32, + stack_end: u32, }; pub const ImmPair = struct { diff --git a/src/register_manager.zig b/src/register_manager.zig index 64f6609a9b..0bd9ef62e9 100644 --- a/src/register_manager.zig +++ b/src/register_manager.zig @@ -39,7 +39,7 @@ pub fn RegisterManager( /// register is free), the value in that slot is undefined. /// /// The key must be canonical register. - registers: [tracked_registers.len]Air.Inst.Index = undefined, + registers: TrackedRegisters = undefined, /// Tracks which registers are free (in which case the /// corresponding bit is set to 1) free_registers: RegisterBitSet = RegisterBitSet.initFull(), @@ -51,6 +51,7 @@ pub fn RegisterManager( const Self = @This(); + pub const TrackedRegisters = [tracked_registers.len]Air.Inst.Index; pub const RegisterBitSet = StaticBitSet(tracked_registers.len); fn getFunction(self: *Self) *Function { From fc015231ad4e2e449e848fd08dd003efd049ad43 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Tue, 7 Jun 2022 18:04:58 +0200 Subject: [PATCH 2/7] x64: account for non-pow-two stores via register deref In this case, we need to proceed rather carefully to avoid writing containing register width rather than the precise amount of bytes. --- src/arch/x86_64/CodeGen.zig | 49 ++++++++++++++++++++++++++++++------- 1 file changed, 40 insertions(+), 9 deletions(-) diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index 0d4aae2688..be41f99b56 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -2732,15 +2732,46 @@ fn store(self: *Self, ptr: MCValue, value: MCValue, ptr_ty: Type, value_ty: Type } }, .register => |src_reg| { - _ = try self.addInst(.{ - .tag = .mov, - .ops = Mir.Inst.Ops.encode(.{ - .reg1 = reg.to64(), - .reg2 = registerAlias(src_reg, @intCast(u32, abi_size)), - .flags = 0b10, - }), - .data = .{ .imm = 0 }, - }); + const src_reg_lock = self.register_manager.lockReg(src_reg); + defer if (src_reg_lock) |lock| self.register_manager.unlockReg(lock); + + // TODO common code-path with genSetStack, refactor! + if (!math.isPowerOfTwo(abi_size)) { + const tmp_reg = try self.copyToTmpRegister(value_ty, value); + + var next_offset: i32 = 0; + var remainder = abi_size; + while (remainder > 0) { + const nearest_power_of_two = @as(u6, 1) << math.log2_int(u3, @intCast(u3, remainder)); + + _ = try self.addInst(.{ + .tag = .mov, + .ops = Mir.Inst.Ops.encode(.{ + .reg1 = reg.to64(), + .reg2 = registerAlias(tmp_reg, nearest_power_of_two), + .flags = 0b10, + }), + .data = .{ .imm = @bitCast(u32, -next_offset) }, + }); + + if (nearest_power_of_two > 1) { + try self.genShiftBinOpMir(.shr, value_ty, tmp_reg, .{ .immediate = nearest_power_of_two * 8 }); + } + + remainder -= nearest_power_of_two; + next_offset -= nearest_power_of_two; + } + } else { + _ = try self.addInst(.{ + .tag = .mov, + .ops = Mir.Inst.Ops.encode(.{ + .reg1 = reg.to64(), + .reg2 = registerAlias(src_reg, @intCast(u32, abi_size)), + .flags = 0b10, + }), + .data = .{ .imm = 0 }, + }); + } }, .got_load, .direct_load, From 0c727604540df7dbff4f57e4599fa01e01b36695 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Tue, 7 Jun 2022 18:10:28 +0200 Subject: [PATCH 3/7] x64: optimise element offset calculation if dealing with immediates If `index` MCValue is actually an immediate, we can calculate offset directly at "comptime" rather than at runtime. --- src/arch/x86_64/CodeGen.zig | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index be41f99b56..5c81233845 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -2100,8 +2100,22 @@ fn airPtrSlicePtrPtr(self: *Self, inst: Air.Inst.Index) !void { } fn elemOffset(self: *Self, index_ty: Type, index: MCValue, elem_size: u64) !Register { - const reg = try self.copyToTmpRegister(index_ty, index); - try self.genIntMulComplexOpMir(index_ty, .{ .register = reg }, .{ .immediate = elem_size }); + const reg: Register = blk: { + switch (index) { + .immediate => |imm| { + // Optimisation: if index MCValue is an immediate, we can multiply in `comptime` + // and set the register directly to the scaled offset as an immediate. + const reg = try self.register_manager.allocReg(null, gp); + try self.genSetReg(index_ty, reg, .{ .immediate = imm * elem_size }); + break :blk reg; + }, + else => { + const reg = try self.copyToTmpRegister(index_ty, index); + try self.genIntMulComplexOpMir(index_ty, .{ .register = reg }, .{ .immediate = elem_size }); + break :blk reg; + }, + } + }; return reg; } From a8bce8f14b5a2a3a6b5e069f3b434fd9430d9a8e Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Tue, 7 Jun 2022 18:12:45 +0200 Subject: [PATCH 4/7] x64: pass behavior test bugs/1381 --- test/behavior/bugs/1381.zig | 1 - 1 file changed, 1 deletion(-) diff --git a/test/behavior/bugs/1381.zig b/test/behavior/bugs/1381.zig index 242c852795..ae80132cc1 100644 --- a/test/behavior/bugs/1381.zig +++ b/test/behavior/bugs/1381.zig @@ -14,7 +14,6 @@ const A = union(enum) { test "union that needs padding bytes inside an array" { if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; var as = [_]A{ A{ .B = B{ .D = 1 } }, From 03068ce6a67d2cf83954606dc96329b85bd4be1a Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Tue, 7 Jun 2022 18:30:59 +0200 Subject: [PATCH 5/7] x64: clean up store helper --- src/arch/x86_64/CodeGen.zig | 51 +++++++------------------------------ 1 file changed, 9 insertions(+), 42 deletions(-) diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index 5c81233845..839063e0e4 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -2730,15 +2730,7 @@ fn store(self: *Self, ptr: MCValue, value: MCValue, ptr_ty: Type, value_ty: Type // movabs does not support indirect register addressing // so we need an extra register and an extra mov. const tmp_reg = try self.copyToTmpRegister(value_ty, value); - _ = try self.addInst(.{ - .tag = .mov, - .ops = Mir.Inst.Ops.encode(.{ - .reg1 = reg.to64(), - .reg2 = tmp_reg.to64(), - .flags = 0b10, - }), - .data = .{ .imm = 0 }, - }); + return self.store(ptr, .{ .register = tmp_reg }, ptr_ty, value_ty); }, else => { return self.fail("TODO implement set pointee with immediate of ABI size {d}", .{abi_size}); @@ -2835,6 +2827,8 @@ fn store(self: *Self, ptr: MCValue, value: MCValue, ptr_ty: Type, value_ty: Type .data = .{ .imm = 0 }, }); + const new_ptr = MCValue{ .register = addr_reg.to64() }; + switch (value) { .immediate => |imm| { if (abi_size > 8) { @@ -2873,16 +2867,8 @@ fn store(self: *Self, ptr: MCValue, value: MCValue, ptr_ty: Type, value_ty: Type .data = .{ .payload = payload }, }); }, - .register => |reg| { - _ = try self.addInst(.{ - .tag = .mov, - .ops = Mir.Inst.Ops.encode(.{ - .reg1 = addr_reg.to64(), - .reg2 = reg, - .flags = 0b10, - }), - .data = .{ .imm = 0 }, - }); + .register => { + return self.store(new_ptr, value, ptr_ty, value_ty); }, .got_load, .direct_load, @@ -2904,37 +2890,18 @@ fn store(self: *Self, ptr: MCValue, value: MCValue, ptr_ty: Type, value_ty: Type }), .data = .{ .imm = 0 }, }); - _ = try self.addInst(.{ - .tag = .mov, - .ops = Mir.Inst.Ops.encode(.{ - .reg1 = addr_reg.to64(), - .reg2 = tmp_reg, - .flags = 0b10, - }), - .data = .{ .imm = 0 }, - }); - return; + return self.store(new_ptr, .{ .register = tmp_reg }, ptr_ty, value_ty); } - try self.genInlineMemcpy(.{ .register = addr_reg.to64() }, value, .{ .immediate = abi_size }, .{}); + try self.genInlineMemcpy(new_ptr, value, .{ .immediate = abi_size }, .{}); }, .stack_offset => { if (abi_size <= 8) { - // TODO this should really be a recursive call const tmp_reg = try self.copyToTmpRegister(value_ty, value); - _ = try self.addInst(.{ - .tag = .mov, - .ops = Mir.Inst.Ops.encode(.{ - .reg1 = addr_reg.to64(), - .reg2 = tmp_reg, - .flags = 0b10, - }), - .data = .{ .imm = 0 }, - }); - return; + return self.store(new_ptr, .{ .register = tmp_reg }, ptr_ty, value_ty); } - try self.genInlineMemcpy(.{ .register = addr_reg.to64() }, value, .{ .immediate = abi_size }, .{}); + try self.genInlineMemcpy(new_ptr, value, .{ .immediate = abi_size }, .{}); }, else => return self.fail("TODO implement storing {} to MCValue.memory", .{value}), } From 76ad7af4d87982ef62876df2a2df7bbb2ac312e8 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Tue, 7 Jun 2022 19:10:26 +0200 Subject: [PATCH 6/7] x64: pull common codepath between store and genSetStack into a helper --- src/arch/x86_64/CodeGen.zig | 142 ++++++++++++++++-------------------- 1 file changed, 63 insertions(+), 79 deletions(-) diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index 839063e0e4..f4444d56a6 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -2738,46 +2738,7 @@ fn store(self: *Self, ptr: MCValue, value: MCValue, ptr_ty: Type, value_ty: Type } }, .register => |src_reg| { - const src_reg_lock = self.register_manager.lockReg(src_reg); - defer if (src_reg_lock) |lock| self.register_manager.unlockReg(lock); - - // TODO common code-path with genSetStack, refactor! - if (!math.isPowerOfTwo(abi_size)) { - const tmp_reg = try self.copyToTmpRegister(value_ty, value); - - var next_offset: i32 = 0; - var remainder = abi_size; - while (remainder > 0) { - const nearest_power_of_two = @as(u6, 1) << math.log2_int(u3, @intCast(u3, remainder)); - - _ = try self.addInst(.{ - .tag = .mov, - .ops = Mir.Inst.Ops.encode(.{ - .reg1 = reg.to64(), - .reg2 = registerAlias(tmp_reg, nearest_power_of_two), - .flags = 0b10, - }), - .data = .{ .imm = @bitCast(u32, -next_offset) }, - }); - - if (nearest_power_of_two > 1) { - try self.genShiftBinOpMir(.shr, value_ty, tmp_reg, .{ .immediate = nearest_power_of_two * 8 }); - } - - remainder -= nearest_power_of_two; - next_offset -= nearest_power_of_two; - } - } else { - _ = try self.addInst(.{ - .tag = .mov, - .ops = Mir.Inst.Ops.encode(.{ - .reg1 = reg.to64(), - .reg2 = registerAlias(src_reg, @intCast(u32, abi_size)), - .flags = 0b10, - }), - .data = .{ .imm = 0 }, - }); - } + try self.genInlineMemcpyRegisterRegister(value_ty, reg, src_reg, 0); }, .got_load, .direct_load, @@ -5638,45 +5599,7 @@ fn genSetStack(self: *Self, ty: Type, stack_offset: i32, mcv: MCValue, opts: Inl return self.fail("TODO genSetStack for register for type float with no intrinsics", .{}); }, else => { - if (!math.isPowerOfTwo(abi_size)) { - const reg_lock = self.register_manager.lockReg(reg); - defer if (reg_lock) |lock| self.register_manager.unlockReg(lock); - - const tmp_reg = try self.copyToTmpRegister(ty, mcv); - - var next_offset = stack_offset; - var remainder = abi_size; - while (remainder > 0) { - const nearest_power_of_two = @as(u6, 1) << math.log2_int(u3, @intCast(u3, remainder)); - - _ = try self.addInst(.{ - .tag = .mov, - .ops = Mir.Inst.Ops.encode(.{ - .reg1 = base_reg, - .reg2 = registerAlias(tmp_reg, nearest_power_of_two), - .flags = 0b10, - }), - .data = .{ .imm = @bitCast(u32, -next_offset) }, - }); - - if (nearest_power_of_two > 1) { - try self.genShiftBinOpMir(.shr, ty, tmp_reg, .{ .immediate = nearest_power_of_two * 8 }); - } - - remainder -= nearest_power_of_two; - next_offset -= nearest_power_of_two; - } - } else { - _ = try self.addInst(.{ - .tag = .mov, - .ops = Mir.Inst.Ops.encode(.{ - .reg1 = base_reg, - .reg2 = registerAlias(reg, @intCast(u32, abi_size)), - .flags = 0b10, - }), - .data = .{ .imm = @bitCast(u32, -stack_offset) }, - }); - } + try self.genInlineMemcpyRegisterRegister(ty, base_reg, reg, stack_offset); }, } }, @@ -5711,6 +5634,67 @@ fn genSetStack(self: *Self, ty: Type, stack_offset: i32, mcv: MCValue, opts: Inl } } +/// Like `genInlineMemcpy` but copies value from a register to an address via dereferencing +/// of destination register. +/// Boils down to MOV r/m64, r64. +fn genInlineMemcpyRegisterRegister( + self: *Self, + ty: Type, + dst_reg: Register, + src_reg: Register, + offset: i32, +) InnerError!void { + assert(dst_reg.size() == 64); + + const dst_reg_lock = self.register_manager.lockReg(dst_reg); + defer if (dst_reg_lock) |lock| self.register_manager.unlockReg(lock); + + const src_reg_lock = self.register_manager.lockReg(src_reg); + defer if (src_reg_lock) |lock| self.register_manager.unlockReg(lock); + + const abi_size = @intCast(u32, ty.abiSize(self.target.*)); + + // TODO common code-path with genSetStack, refactor! + if (!math.isPowerOfTwo(abi_size)) { + const tmp_reg = try self.copyToTmpRegister(ty, .{ .register = src_reg }); + + var next_offset = offset; + var remainder = abi_size; + while (remainder > 0) { + const nearest_power_of_two = @as(u6, 1) << math.log2_int(u3, @intCast(u3, remainder)); + + _ = try self.addInst(.{ + .tag = .mov, + .ops = Mir.Inst.Ops.encode(.{ + .reg1 = dst_reg, + .reg2 = registerAlias(tmp_reg, nearest_power_of_two), + .flags = 0b10, + }), + .data = .{ .imm = @bitCast(u32, -next_offset) }, + }); + + if (nearest_power_of_two > 1) { + try self.genShiftBinOpMir(.shr, ty, tmp_reg, .{ + .immediate = nearest_power_of_two * 8, + }); + } + + remainder -= nearest_power_of_two; + next_offset -= nearest_power_of_two; + } + } else { + _ = try self.addInst(.{ + .tag = .mov, + .ops = Mir.Inst.Ops.encode(.{ + .reg1 = dst_reg, + .reg2 = registerAlias(src_reg, @intCast(u32, abi_size)), + .flags = 0b10, + }), + .data = .{ .imm = @bitCast(u32, -offset) }, + }); + } +} + const InlineMemcpyOpts = struct { source_stack_base: ?Register = null, dest_stack_base: ?Register = null, From 27dad11ef166d5834be1166c86289c8e37aa0471 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Tue, 7 Jun 2022 21:05:11 +0200 Subject: [PATCH 7/7] x64: remove outdated TODO comment --- src/arch/x86_64/CodeGen.zig | 1 - 1 file changed, 1 deletion(-) diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index f4444d56a6..5bda84c98c 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -5654,7 +5654,6 @@ fn genInlineMemcpyRegisterRegister( const abi_size = @intCast(u32, ty.abiSize(self.target.*)); - // TODO common code-path with genSetStack, refactor! if (!math.isPowerOfTwo(abi_size)) { const tmp_reg = try self.copyToTmpRegister(ty, .{ .register = src_reg });