From 3b22ce82643f2e547c85bf4b17477bc27133eb42 Mon Sep 17 00:00:00 2001 From: Jacob Young Date: Thu, 6 Apr 2023 20:21:54 -0400 Subject: [PATCH] x86_64: fix atomic loop implementation --- src/arch/x86_64/CodeGen.zig | 137 ++++++++++++++++++++---------------- test/behavior/atomics.zig | 1 - test/behavior/pointers.zig | 1 + 3 files changed, 79 insertions(+), 60 deletions(-) diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index dab2f36140..69c3a37111 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -1734,10 +1734,10 @@ fn airAddSat(self: *Self, inst: Air.Inst.Index) !void { }; try self.genBinOpMir(.add, ty, dst_mcv, rhs_mcv); - const abi_size = @intCast(u32, @max(ty.abiSize(self.target.*), 2)); + const cmov_abi_size = @max(@intCast(u32, ty.abiSize(self.target.*)), 2); try self.asmCmovccRegisterRegister( - registerAlias(dst_reg, abi_size), - registerAlias(limit_reg, abi_size), + registerAlias(dst_reg, cmov_abi_size), + registerAlias(limit_reg, cmov_abi_size), cc, ); break :result dst_mcv; @@ -1785,10 +1785,10 @@ fn airSubSat(self: *Self, inst: Air.Inst.Index) !void { }; try self.genBinOpMir(.sub, ty, dst_mcv, rhs_mcv); - const abi_size = @intCast(u32, @max(ty.abiSize(self.target.*), 2)); + const cmov_abi_size = @max(@intCast(u32, ty.abiSize(self.target.*)), 2); try self.asmCmovccRegisterRegister( - registerAlias(dst_reg, abi_size), - registerAlias(limit_reg, abi_size), + registerAlias(dst_reg, cmov_abi_size), + registerAlias(limit_reg, cmov_abi_size), cc, ); break :result dst_mcv; @@ -1841,10 +1841,10 @@ fn airMulSat(self: *Self, inst: Air.Inst.Index) !void { }; const dst_mcv = try self.genMulDivBinOp(.mul, inst, ty, ty, lhs_mcv, rhs_mcv); - const abi_size = @intCast(u32, @max(ty.abiSize(self.target.*), 2)); + const cmov_abi_size = @max(@intCast(u32, ty.abiSize(self.target.*)), 2); try self.asmCmovccRegisterRegister( - registerAlias(dst_mcv.register, abi_size), - registerAlias(limit_reg, abi_size), + registerAlias(dst_mcv.register, cmov_abi_size), + registerAlias(limit_reg, cmov_abi_size), cc, ); break :result dst_mcv; @@ -3102,10 +3102,10 @@ fn airClz(self: *Self, inst: Air.Inst.Index) !void { try self.copyToRegisterWithInstTracking(inst, dst_ty, .{ .immediate = src_bits }); try self.genBinOpMir(.bsr, src_ty, dst_mcv, mat_src_mcv); - const dst_abi_size = @intCast(u32, @max(dst_ty.abiSize(self.target.*), 2)); + const cmov_abi_size = @max(@intCast(u32, dst_ty.abiSize(self.target.*)), 2); try self.asmCmovccRegisterRegister( - registerAlias(dst_reg, dst_abi_size), - registerAlias(width_mcv.register, dst_abi_size), + registerAlias(dst_reg, cmov_abi_size), + registerAlias(width_mcv.register, cmov_abi_size), .z, ); @@ -3162,10 +3162,10 @@ fn airCtz(self: *Self, inst: Air.Inst.Index) !void { const width_reg = try self.copyToTmpRegister(dst_ty, .{ .immediate = src_bits }); try self.genBinOpMir(.bsf, src_ty, dst_mcv, mat_src_mcv); - const abi_size = @max(@intCast(u32, dst_ty.abiSize(self.target.*)), 2); + const cmov_abi_size = @max(@intCast(u32, dst_ty.abiSize(self.target.*)), 2); try self.asmCmovccRegisterRegister( - registerAlias(dst_reg, abi_size), - registerAlias(width_reg, abi_size), + registerAlias(dst_reg, cmov_abi_size), + registerAlias(width_reg, cmov_abi_size), .z, ); break :result dst_mcv; @@ -4766,7 +4766,7 @@ fn genBinOp( }, }; - const abi_size = @intCast(u32, lhs_ty.abiSize(self.target.*)); + const cmov_abi_size = @max(@intCast(u32, lhs_ty.abiSize(self.target.*)), 2); const tmp_reg = switch (dst_mcv) { .register => |reg| reg, else => try self.copyToTmpRegister(lhs_ty, dst_mcv), @@ -4784,13 +4784,13 @@ fn genBinOp( .ptr_stack_offset, => unreachable, .register => |src_reg| try self.asmCmovccRegisterRegister( - registerAlias(tmp_reg, abi_size), - registerAlias(src_reg, abi_size), + registerAlias(tmp_reg, cmov_abi_size), + registerAlias(src_reg, cmov_abi_size), cc, ), .stack_offset => |off| try self.asmCmovccRegisterMemory( - registerAlias(tmp_reg, abi_size), - Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ + registerAlias(tmp_reg, cmov_abi_size), + Memory.sib(Memory.PtrSize.fromSize(cmov_abi_size), .{ .base = .rbp, .disp = -off, }), @@ -4803,8 +4803,8 @@ fn genBinOp( try self.loadMemPtrIntoRegister(addr_reg, Type.usize, mat_src_mcv); try self.asmCmovccRegisterMemory( - registerAlias(tmp_reg, abi_size), - Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ .base = addr_reg }), + registerAlias(tmp_reg, cmov_abi_size), + Memory.sib(Memory.PtrSize.fromSize(cmov_abi_size), .{ .base = addr_reg }), cc, ); }, @@ -7727,7 +7727,6 @@ fn airCmpxchg(self: *Self, inst: Air.Inst.Index) !void { fn atomicOp( self: *Self, - dst_reg: Register, ptr_mcv: MCValue, val_mcv: MCValue, ptr_ty: Type, @@ -7735,11 +7734,7 @@ fn atomicOp( unused: bool, rmw_op: ?std.builtin.AtomicRmwOp, order: std.builtin.AtomicOrder, -) InnerError!void { - const dst_mcv = MCValue{ .register = dst_reg }; - const dst_lock = self.register_manager.lockReg(dst_reg); - defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); - +) InnerError!MCValue { const ptr_lock = switch (ptr_mcv) { .register => |reg| self.register_manager.lockReg(reg), else => null, @@ -7794,9 +7789,14 @@ fn atomicOp( .SeqCst => .xchg, }; + const dst_reg = try self.register_manager.allocReg(null, gp); + const dst_mcv = MCValue{ .register = dst_reg }; + const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg); + defer self.register_manager.unlockReg(dst_lock); + try self.genSetReg(val_ty, dst_reg, val_mcv); if (rmw_op == std.builtin.AtomicRmwOp.Sub and tag == .xadd) { - try self.genUnOpMir(.neg, val_ty, .{ .register = dst_reg }); + try self.genUnOpMir(.neg, val_ty, dst_mcv); } _ = try self.addInst(.{ .tag = tag, .ops = switch (tag) { .mov, .xchg => .mr_sib, @@ -7806,25 +7806,31 @@ fn atomicOp( .r = registerAlias(dst_reg, val_abi_size), .payload = try self.addExtra(Mir.MemorySib.encode(ptr_mem)), } } }); + + return if (unused) .none else dst_mcv; }, - .loop => _ = try self.asmJccReloc(if (val_abi_size <= 8) loop: { - try self.genSetReg(val_ty, dst_reg, val_mcv); + .loop => _ = if (val_abi_size <= 8) { + const tmp_reg = try self.register_manager.allocReg(null, gp); + const tmp_mcv = MCValue{ .register = tmp_reg }; + const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); + defer self.register_manager.unlockReg(tmp_lock); + try self.asmRegisterMemory(.mov, registerAlias(.rax, val_abi_size), ptr_mem); const loop = @intCast(u32, self.mir_instructions.len); if (rmw_op != std.builtin.AtomicRmwOp.Xchg) { - try self.genSetReg(val_ty, dst_reg, .{ .register = .rax }); + try self.genSetReg(val_ty, tmp_reg, .{ .register = .rax }); } if (rmw_op) |op| switch (op) { - .Xchg => try self.genSetReg(val_ty, dst_reg, val_mcv), - .Add => try self.genBinOpMir(.add, val_ty, dst_mcv, val_mcv), - .Sub => try self.genBinOpMir(.sub, val_ty, dst_mcv, val_mcv), - .And => try self.genBinOpMir(.@"and", val_ty, dst_mcv, val_mcv), + .Xchg => try self.genSetReg(val_ty, tmp_reg, val_mcv), + .Add => try self.genBinOpMir(.add, val_ty, tmp_mcv, val_mcv), + .Sub => try self.genBinOpMir(.sub, val_ty, tmp_mcv, val_mcv), + .And => try self.genBinOpMir(.@"and", val_ty, tmp_mcv, val_mcv), .Nand => { - try self.genBinOpMir(.@"and", val_ty, dst_mcv, val_mcv); - try self.genUnOpMir(.not, val_ty, dst_mcv); + try self.genBinOpMir(.@"and", val_ty, tmp_mcv, val_mcv); + try self.genUnOpMir(.not, val_ty, tmp_mcv); }, - .Or => try self.genBinOpMir(.@"or", val_ty, dst_mcv, val_mcv), - .Xor => try self.genBinOpMir(.xor, val_ty, dst_mcv, val_mcv), + .Or => try self.genBinOpMir(.@"or", val_ty, tmp_mcv, val_mcv), + .Xor => try self.genBinOpMir(.xor, val_ty, tmp_mcv, val_mcv), .Min, .Max => { const cc: Condition = switch (if (val_ty.isAbiInt()) val_ty.intInfo(self.target.*).signedness @@ -7842,17 +7848,18 @@ fn atomicOp( }, }; - try self.genBinOpMir(.cmp, val_ty, dst_mcv, val_mcv); + try self.genBinOpMir(.cmp, val_ty, tmp_mcv, val_mcv); + const cmov_abi_size = @max(val_abi_size, 2); switch (val_mcv) { .register => |val_reg| try self.asmCmovccRegisterRegister( - registerAlias(dst_reg, val_abi_size), - registerAlias(val_reg, val_abi_size), + registerAlias(tmp_reg, cmov_abi_size), + registerAlias(val_reg, cmov_abi_size), cc, ), .stack_offset => |val_off| try self.asmCmovccRegisterMemory( - registerAlias(dst_reg, val_abi_size), + registerAlias(tmp_reg, cmov_abi_size), Memory.sib( - Memory.PtrSize.fromSize(val_abi_size), + Memory.PtrSize.fromSize(cmov_abi_size), .{ .base = .rbp, .disp = -val_off }, ), cc, @@ -7860,8 +7867,8 @@ fn atomicOp( else => { const val_reg = try self.copyToTmpRegister(val_ty, val_mcv); try self.asmCmovccRegisterRegister( - registerAlias(dst_reg, val_abi_size), - registerAlias(val_reg, val_abi_size), + registerAlias(tmp_reg, cmov_abi_size), + registerAlias(val_reg, cmov_abi_size), cc, ); }, @@ -7869,11 +7876,12 @@ fn atomicOp( }, }; _ = try self.addInst(.{ .tag = .cmpxchg, .ops = .lock_mr_sib, .data = .{ .rx = .{ - .r = registerAlias(dst_reg, val_abi_size), + .r = registerAlias(tmp_reg, val_abi_size), .payload = try self.addExtra(Mir.MemorySib.encode(ptr_mem)), } } }); - break :loop loop; - } else loop: { + _ = try self.asmJccReloc(loop, .ne); + return if (unused) .none else .{ .register = .rax }; + } else { try self.asmRegisterMemory(.mov, .rax, Memory.sib(.qword, .{ .base = ptr_mem.sib.base, .scale_index = ptr_mem.sib.scale_index, @@ -7939,8 +7947,22 @@ fn atomicOp( _ = try self.addInst(.{ .tag = .cmpxchgb, .ops = .lock_m_sib, .data = .{ .payload = try self.addExtra(Mir.MemorySib.encode(ptr_mem)), } }); - break :loop loop; - }, .ne), + _ = try self.asmJccReloc(loop, .ne); + + if (unused) return .none; + const dst_mcv = try self.allocTempRegOrMem(val_ty, false); + try self.asmMemoryRegister( + .mov, + Memory.sib(.qword, .{ .base = .rbp, .disp = 0 - dst_mcv.stack_offset }), + .rax, + ); + try self.asmMemoryRegister( + .mov, + Memory.sib(.qword, .{ .base = .rbp, .disp = 8 - dst_mcv.stack_offset }), + .rdx, + ); + return dst_mcv; + }, .libcall => return self.fail("TODO implement x86 atomic libcall", .{}), } } @@ -7954,7 +7976,6 @@ fn airAtomicRmw(self: *Self, inst: Air.Inst.Index) !void { defer for (regs_lock) |lock| self.register_manager.unlockReg(lock); const unused = self.liveness.isUnused(inst); - const dst_reg = try self.register_manager.allocReg(if (unused) null else inst, gp); const ptr_ty = self.air.typeOf(pl_op.operand); const ptr_mcv = try self.resolveInst(pl_op.operand); @@ -7962,8 +7983,8 @@ fn airAtomicRmw(self: *Self, inst: Air.Inst.Index) !void { const val_ty = self.air.typeOf(extra.operand); const val_mcv = try self.resolveInst(extra.operand); - try self.atomicOp(dst_reg, ptr_mcv, val_mcv, ptr_ty, val_ty, unused, extra.op(), extra.ordering()); - const result: MCValue = if (unused) .dead else .{ .register = dst_reg }; + const result = + try self.atomicOp(ptr_mcv, val_mcv, ptr_ty, val_ty, unused, extra.op(), extra.ordering()); return self.finishAir(inst, result, .{ pl_op.operand, extra.operand, .none }); } @@ -7996,16 +8017,14 @@ fn airAtomicLoad(self: *Self, inst: Air.Inst.Index) !void { fn airAtomicStore(self: *Self, inst: Air.Inst.Index, order: std.builtin.AtomicOrder) !void { const bin_op = self.air.instructions.items(.data)[inst].bin_op; - const dst_reg = try self.register_manager.allocReg(null, gp); - const ptr_ty = self.air.typeOf(bin_op.lhs); const ptr_mcv = try self.resolveInst(bin_op.lhs); const val_ty = self.air.typeOf(bin_op.rhs); const val_mcv = try self.resolveInst(bin_op.rhs); - try self.atomicOp(dst_reg, ptr_mcv, val_mcv, ptr_ty, val_ty, true, null, order); - return self.finishAir(inst, .none, .{ bin_op.lhs, bin_op.rhs, .none }); + const result = try self.atomicOp(ptr_mcv, val_mcv, ptr_ty, val_ty, true, null, order); + return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none }); } fn airMemset(self: *Self, inst: Air.Inst.Index) !void { diff --git a/test/behavior/atomics.zig b/test/behavior/atomics.zig index e6000cd848..04dd2661bb 100644 --- a/test/behavior/atomics.zig +++ b/test/behavior/atomics.zig @@ -232,7 +232,6 @@ fn testAtomicRmwFloat() !void { test "atomicrmw with ints" { if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO diff --git a/test/behavior/pointers.zig b/test/behavior/pointers.zig index 626a1a7eb6..a03ad66bd2 100644 --- a/test/behavior/pointers.zig +++ b/test/behavior/pointers.zig @@ -506,6 +506,7 @@ test "ptrToInt on a generic function" { if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO + if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; const S = struct { fn generic(i: anytype) @TypeOf(i) {