From 0165187cd07b26f15c2ca7e021747d0989d1956b Mon Sep 17 00:00:00 2001 From: Jacob Young Date: Wed, 5 Apr 2023 02:18:18 -0400 Subject: [PATCH 1/4] x86_64: fix some of the mass confusion about the meaning of `MCValue` --- src/arch/x86_64/CodeGen.zig | 202 ++++++++++++++++++++++-------------- test/behavior/array.zig | 1 - test/behavior/bugs/718.zig | 1 - test/behavior/bugs/7325.zig | 1 - 4 files changed, 126 insertions(+), 79 deletions(-) diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index cff63fa6e1..dab2f36140 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -3569,7 +3569,12 @@ fn load(self: *Self, dst_mcv: MCValue, ptr: MCValue, ptr_ty: Type) InnerError!vo return self.genSetStack(elem_ty, off, MCValue{ .register = tmp_reg }, .{}); } - try self.genInlineMemcpy(dst_mcv, ptr, .{ .immediate = abi_size }, .{}); + try self.genInlineMemcpy( + .{ .ptr_stack_offset = off }, + ptr, + .{ .immediate = abi_size }, + .{}, + ); }, else => return self.fail("TODO implement loading from register into {}", .{dst_mcv}), } @@ -3745,22 +3750,47 @@ fn store(self: *Self, ptr: MCValue, value: MCValue, ptr_ty: Type, value_ty: Type -@intCast(i32, overflow_bit_offset), ); }, - .linker_load, .memory, .stack_offset => if (abi_size <= 8) { + .memory, .linker_load => if (abi_size <= 8) { const tmp_reg = try self.copyToTmpRegister(value_ty, value); + const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); + defer self.register_manager.unlockReg(tmp_lock); + + try self.store(ptr, .{ .register = tmp_reg }, ptr_ty, value_ty); + } else { + const addr_reg = try self.register_manager.allocReg(null, gp); + const addr_lock = self.register_manager.lockRegAssumeUnused(addr_reg); + defer self.register_manager.unlockReg(addr_lock); + + try self.loadMemPtrIntoRegister(addr_reg, Type.usize, value); + try self.genInlineMemcpy( + ptr, + .{ .register = addr_reg }, + .{ .immediate = abi_size }, + .{}, + ); + }, + .stack_offset => |off| if (abi_size <= 8) { + const tmp_reg = try self.copyToTmpRegister(value_ty, value); + const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); + defer self.register_manager.unlockReg(tmp_lock); + try self.store(ptr, .{ .register = tmp_reg }, ptr_ty, value_ty); } else try self.genInlineMemcpy( - .{ .stack_offset = 0 }, - value, + ptr, + .{ .ptr_stack_offset = off }, .{ .immediate = abi_size }, - .{ .source_stack_base = .rbp, .dest_stack_base = reg.to64() }, + .{}, ), .ptr_stack_offset => { const tmp_reg = try self.copyToTmpRegister(value_ty, value); + const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); + defer self.register_manager.unlockReg(tmp_lock); + try self.store(ptr, .{ .register = tmp_reg }, ptr_ty, value_ty); }, } }, - .linker_load, .memory => { + .memory, .linker_load => { const value_lock: ?RegisterLock = switch (value) { .register => |reg| self.register_manager.lockReg(reg), else => null, @@ -5530,14 +5560,7 @@ fn airRet(self: *Self, inst: Air.Inst.Index) !void { assert(ret_ty.isError()); }, .stack_offset => { - const reg = try self.copyToTmpRegister(Type.usize, self.ret_mcv); - const reg_lock = self.register_manager.lockRegAssumeUnused(reg); - defer self.register_manager.unlockReg(reg_lock); - - try self.genSetStack(ret_ty, 0, operand, .{ - .source_stack_base = .rbp, - .dest_stack_base = reg, - }); + try self.store(self.ret_mcv, operand, Type.usize, ret_ty); }, else => { try self.setRegOrMem(ret_ty, self.ret_mcv, operand); @@ -5556,6 +5579,7 @@ fn airRetLoad(self: *Self, inst: Air.Inst.Index) !void { const ptr = try self.resolveInst(un_op); const ptr_ty = self.air.typeOf(un_op); const elem_ty = ptr_ty.elemType(); + const abi_size = elem_ty.abiSize(self.target.*); switch (self.ret_mcv) { .immediate => { assert(elem_ty.isError()); @@ -5565,10 +5589,7 @@ fn airRetLoad(self: *Self, inst: Air.Inst.Index) !void { const reg_lock = self.register_manager.lockRegAssumeUnused(reg); defer self.register_manager.unlockReg(reg_lock); - try self.genInlineMemcpy(.{ .stack_offset = 0 }, ptr, .{ .immediate = elem_ty.abiSize(self.target.*) }, .{ - .source_stack_base = .rbp, - .dest_stack_base = reg, - }); + try self.genInlineMemcpy(.{ .register = reg }, ptr, .{ .immediate = abi_size }, .{}); }, else => { try self.load(self.ret_mcv, ptr, ptr_ty); @@ -6838,7 +6859,7 @@ fn genSetStackArg(self: *Self, ty: Type, stack_offset: i32, mcv: MCValue) InnerE return self.genSetStackArg(ty, stack_offset, MCValue{ .register = reg }); } try self.genInlineMemset( - .{ .stack_offset = stack_offset }, + .{ .ptr_stack_offset = stack_offset }, .{ .immediate = 0xaa }, .{ .immediate = abi_size }, .{ .dest_stack_base = .rsp }, @@ -6877,10 +6898,17 @@ fn genSetStackArg(self: *Self, ty: Type, stack_offset: i32, mcv: MCValue) InnerE return self.genSetStackArg(ty, stack_offset, MCValue{ .register = reg }); } - try self.genInlineMemcpy(.{ .stack_offset = stack_offset }, mcv, .{ .immediate = abi_size }, .{ - .source_stack_base = .rbp, - .dest_stack_base = .rsp, - }); + const addr_reg = try self.register_manager.allocReg(null, gp); + const addr_lock = self.register_manager.lockRegAssumeUnused(addr_reg); + defer self.register_manager.unlockReg(addr_lock); + + try self.loadMemPtrIntoRegister(addr_reg, Type.usize, mcv); + try self.genInlineMemcpy( + .{ .ptr_stack_offset = stack_offset }, + .{ .register = addr_reg }, + .{ .immediate = abi_size }, + .{ .dest_stack_base = .rsp }, + ); }, .register => |reg| { switch (ty.zigTypeTag()) { @@ -6920,16 +6948,18 @@ fn genSetStackArg(self: *Self, ty: Type, stack_offset: i32, mcv: MCValue) InnerE const reg = try self.copyToTmpRegister(ty, mcv); return self.genSetStackArg(ty, stack_offset, MCValue{ .register = reg }); }, - .stack_offset => { + .stack_offset => |mcv_off| { if (abi_size <= 8) { const reg = try self.copyToTmpRegister(ty, mcv); return self.genSetStackArg(ty, stack_offset, MCValue{ .register = reg }); } - try self.genInlineMemcpy(.{ .stack_offset = stack_offset }, mcv, .{ .immediate = abi_size }, .{ - .source_stack_base = .rbp, - .dest_stack_base = .rsp, - }); + try self.genInlineMemcpy( + .{ .ptr_stack_offset = stack_offset }, + .{ .ptr_stack_offset = mcv_off }, + .{ .immediate = abi_size }, + .{ .dest_stack_base = .rsp }, + ); }, } } @@ -6963,7 +6993,7 @@ fn genSetStack(self: *Self, ty: Type, stack_offset: i32, mcv: MCValue, opts: Inl opts, ), else => |x| return self.genInlineMemset( - .{ .stack_offset = stack_offset }, + .{ .ptr_stack_offset = stack_offset }, .{ .immediate = 0xaa }, .{ .immediate = x }, opts, @@ -7072,22 +7102,40 @@ fn genSetStack(self: *Self, ty: Type, stack_offset: i32, mcv: MCValue, opts: Inl }, } }, - .memory, .linker_load, .stack_offset, .ptr_stack_offset => { - switch (mcv) { - else => unreachable, - .memory, .linker_load, .ptr_stack_offset => {}, - .stack_offset => |src_off| if (stack_offset == src_off) { - // Copy stack variable to itself; nothing to do. - return; - }, - } + .memory, .linker_load => if (abi_size <= 8) { + const reg = try self.copyToTmpRegister(ty, mcv); + return self.genSetStack(ty, stack_offset, MCValue{ .register = reg }, opts); + } else { + const addr_reg = try self.register_manager.allocReg(null, gp); + const addr_lock = self.register_manager.lockRegAssumeUnused(addr_reg); + defer self.register_manager.unlockReg(addr_lock); - if (abi_size <= 8) { - const reg = try self.copyToTmpRegister(ty, mcv); - return self.genSetStack(ty, stack_offset, MCValue{ .register = reg }, opts); - } + try self.loadMemPtrIntoRegister(addr_reg, Type.usize, mcv); + try self.genInlineMemcpy( + .{ .ptr_stack_offset = stack_offset }, + .{ .register = addr_reg }, + .{ .immediate = abi_size }, + .{}, + ); + }, + .stack_offset => |off| if (abi_size <= 8) { + const tmp_reg = try self.copyToTmpRegister(ty, mcv); + const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); + defer self.register_manager.unlockReg(tmp_lock); - try self.genInlineMemcpy(.{ .stack_offset = stack_offset }, mcv, .{ .immediate = abi_size }, opts); + try self.genSetStack(ty, stack_offset, .{ .register = tmp_reg }, opts); + } else try self.genInlineMemcpy( + .{ .ptr_stack_offset = stack_offset }, + .{ .ptr_stack_offset = off }, + .{ .immediate = abi_size }, + .{}, + ), + .ptr_stack_offset => { + const tmp_reg = try self.copyToTmpRegister(ty, mcv); + const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); + defer self.register_manager.unlockReg(tmp_lock); + + try self.genSetStack(ty, stack_offset, .{ .register = tmp_reg }, opts); }, } } @@ -7134,10 +7182,14 @@ fn genInlineMemcpyRegisterRegister( next_offset -= nearest_power_of_two; } } else { - try self.asmMemoryRegister(.mov, Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ - .base = dst_reg, - .disp = -offset, - }), registerAlias(src_reg, abi_size)); + try self.asmMemoryRegister( + switch (src_reg.class()) { + .general_purpose, .segment => .mov, + .floating_point => .movss, + }, + Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ .base = dst_reg, .disp = -offset }), + registerAlias(src_reg, abi_size), + ); } } @@ -7170,9 +7222,15 @@ fn genInlineMemcpy( switch (dst_ptr) { .memory, .linker_load => { try self.loadMemPtrIntoRegister(.rdi, Type.usize, dst_ptr); + // Load the pointer, which is stored in memory + try self.asmRegisterMemory(.mov, .rdi, Memory.sib(.qword, .{ .base = .rdi })); }, - .ptr_stack_offset, .stack_offset => |off| { - try self.asmRegisterMemory(.lea, .rdi, Memory.sib(.qword, .{ + .stack_offset, .ptr_stack_offset => |off| { + try self.asmRegisterMemory(switch (dst_ptr) { + .stack_offset => .mov, + .ptr_stack_offset => .lea, + else => unreachable, + }, .rdi, Memory.sib(.qword, .{ .base = opts.dest_stack_base orelse .rbp, .disp = -off, })); @@ -7192,9 +7250,15 @@ fn genInlineMemcpy( switch (src_ptr) { .memory, .linker_load => { try self.loadMemPtrIntoRegister(.rsi, Type.usize, src_ptr); + // Load the pointer, which is stored in memory + try self.asmRegisterMemory(.mov, .rsi, Memory.sib(.qword, .{ .base = .rsi })); }, - .ptr_stack_offset, .stack_offset => |off| { - try self.asmRegisterMemory(.lea, .rsi, Memory.sib(.qword, .{ + .stack_offset, .ptr_stack_offset => |off| { + try self.asmRegisterMemory(switch (src_ptr) { + .stack_offset => .mov, + .ptr_stack_offset => .lea, + else => unreachable, + }, .rsi, Memory.sib(.qword, .{ .base = opts.source_stack_base orelse .rbp, .disp = -off, })); @@ -7237,9 +7301,15 @@ fn genInlineMemset( switch (dst_ptr) { .memory, .linker_load => { try self.loadMemPtrIntoRegister(.rdi, Type.usize, dst_ptr); + // Load the pointer, which is stored in memory + try self.asmRegisterMemory(.mov, .rdi, Memory.sib(.qword, .{ .base = .rdi })); }, - .ptr_stack_offset, .stack_offset => |off| { - try self.asmRegisterMemory(.lea, .rdi, Memory.sib(.qword, .{ + .stack_offset, .ptr_stack_offset => |off| { + try self.asmRegisterMemory(switch (dst_ptr) { + .stack_offset => .mov, + .ptr_stack_offset => .lea, + else => unreachable, + }, .rdi, Memory.sib(.qword, .{ .base = opts.dest_stack_base orelse .rbp, .disp = -off, })); @@ -7979,7 +8049,6 @@ fn airMemcpy(self: *Self, inst: Air.Inst.Index) !void { }; defer if (dst_ptr_lock) |lock| self.register_manager.unlockReg(lock); - const src_ty = self.air.typeOf(extra.lhs); const src_ptr = try self.resolveInst(extra.lhs); const src_ptr_lock: ?RegisterLock = switch (src_ptr) { .register => |reg| self.register_manager.lockRegAssumeUnused(reg), @@ -7994,25 +8063,7 @@ fn airMemcpy(self: *Self, inst: Air.Inst.Index) !void { }; defer if (len_lock) |lock| self.register_manager.unlockReg(lock); - // TODO Is this the only condition for pointer dereference for memcpy? - const src: MCValue = blk: { - switch (src_ptr) { - .linker_load, .memory => { - const reg = try self.register_manager.allocReg(null, gp); - try self.loadMemPtrIntoRegister(reg, src_ty, src_ptr); - try self.asmRegisterMemory(.mov, reg, Memory.sib(.qword, .{ .base = reg })); - break :blk MCValue{ .register = reg }; - }, - else => break :blk src_ptr, - } - }; - const src_lock: ?RegisterLock = switch (src) { - .register => |reg| self.register_manager.lockReg(reg), - else => null, - }; - defer if (src_lock) |lock| self.register_manager.unlockReg(lock); - - try self.genInlineMemcpy(dst_ptr, src, len, .{}); + try self.genInlineMemcpy(dst_ptr, src_ptr, len, .{}); return self.finishAir(inst, .none, .{ pl_op.operand, extra.lhs, extra.rhs }); } @@ -8156,11 +8207,10 @@ fn airAggregateInit(self: *Self, inst: Air.Inst.Index) !void { switch (result_ty.zigTypeTag()) { .Struct => { const stack_offset = @intCast(i32, try self.allocMem(inst, abi_size, abi_align)); - const dst_mcv = MCValue{ .stack_offset = stack_offset }; if (result_ty.containerLayout() == .Packed) { const struct_obj = result_ty.castTag(.@"struct").?.data; try self.genInlineMemset( - dst_mcv, + .{ .ptr_stack_offset = stack_offset }, .{ .immediate = 0 }, .{ .immediate = abi_size }, .{}, @@ -8236,7 +8286,7 @@ fn airAggregateInit(self: *Self, inst: Air.Inst.Index) !void { const elem_mcv = try self.resolveInst(elem); try self.genSetStack(elem_ty, stack_offset - elem_off, elem_mcv, .{}); } - break :res dst_mcv; + break :res .{ .stack_offset = stack_offset }; }, .Array => { const stack_offset = @intCast(i32, try self.allocMem(inst, abi_size, abi_align)); diff --git a/test/behavior/array.zig b/test/behavior/array.zig index b2d9816c18..88a6ab947d 100644 --- a/test/behavior/array.zig +++ b/test/behavior/array.zig @@ -190,7 +190,6 @@ test "nested arrays of strings" { if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; const array_of_strings = [_][]const u8{ "hello", "this", "is", "my", "thing" }; for (array_of_strings, 0..) |s, i| { diff --git a/test/behavior/bugs/718.zig b/test/behavior/bugs/718.zig index f038675def..3e87c6bb37 100644 --- a/test/behavior/bugs/718.zig +++ b/test/behavior/bugs/718.zig @@ -12,7 +12,6 @@ var keys: Keys = undefined; test "zero keys with @memset" { if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO @memset(@ptrCast([*]u8, &keys), 0, @sizeOf(@TypeOf(keys))); diff --git a/test/behavior/bugs/7325.zig b/test/behavior/bugs/7325.zig index 592f9cc80f..23550a512d 100644 --- a/test/behavior/bugs/7325.zig +++ b/test/behavior/bugs/7325.zig @@ -79,7 +79,6 @@ fn genExpression(expr: Expression) !ExpressionResult { test { if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_x86) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO From 3b22ce82643f2e547c85bf4b17477bc27133eb42 Mon Sep 17 00:00:00 2001 From: Jacob Young Date: Thu, 6 Apr 2023 20:21:54 -0400 Subject: [PATCH 2/4] x86_64: fix atomic loop implementation --- src/arch/x86_64/CodeGen.zig | 137 ++++++++++++++++++++---------------- test/behavior/atomics.zig | 1 - test/behavior/pointers.zig | 1 + 3 files changed, 79 insertions(+), 60 deletions(-) diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index dab2f36140..69c3a37111 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -1734,10 +1734,10 @@ fn airAddSat(self: *Self, inst: Air.Inst.Index) !void { }; try self.genBinOpMir(.add, ty, dst_mcv, rhs_mcv); - const abi_size = @intCast(u32, @max(ty.abiSize(self.target.*), 2)); + const cmov_abi_size = @max(@intCast(u32, ty.abiSize(self.target.*)), 2); try self.asmCmovccRegisterRegister( - registerAlias(dst_reg, abi_size), - registerAlias(limit_reg, abi_size), + registerAlias(dst_reg, cmov_abi_size), + registerAlias(limit_reg, cmov_abi_size), cc, ); break :result dst_mcv; @@ -1785,10 +1785,10 @@ fn airSubSat(self: *Self, inst: Air.Inst.Index) !void { }; try self.genBinOpMir(.sub, ty, dst_mcv, rhs_mcv); - const abi_size = @intCast(u32, @max(ty.abiSize(self.target.*), 2)); + const cmov_abi_size = @max(@intCast(u32, ty.abiSize(self.target.*)), 2); try self.asmCmovccRegisterRegister( - registerAlias(dst_reg, abi_size), - registerAlias(limit_reg, abi_size), + registerAlias(dst_reg, cmov_abi_size), + registerAlias(limit_reg, cmov_abi_size), cc, ); break :result dst_mcv; @@ -1841,10 +1841,10 @@ fn airMulSat(self: *Self, inst: Air.Inst.Index) !void { }; const dst_mcv = try self.genMulDivBinOp(.mul, inst, ty, ty, lhs_mcv, rhs_mcv); - const abi_size = @intCast(u32, @max(ty.abiSize(self.target.*), 2)); + const cmov_abi_size = @max(@intCast(u32, ty.abiSize(self.target.*)), 2); try self.asmCmovccRegisterRegister( - registerAlias(dst_mcv.register, abi_size), - registerAlias(limit_reg, abi_size), + registerAlias(dst_mcv.register, cmov_abi_size), + registerAlias(limit_reg, cmov_abi_size), cc, ); break :result dst_mcv; @@ -3102,10 +3102,10 @@ fn airClz(self: *Self, inst: Air.Inst.Index) !void { try self.copyToRegisterWithInstTracking(inst, dst_ty, .{ .immediate = src_bits }); try self.genBinOpMir(.bsr, src_ty, dst_mcv, mat_src_mcv); - const dst_abi_size = @intCast(u32, @max(dst_ty.abiSize(self.target.*), 2)); + const cmov_abi_size = @max(@intCast(u32, dst_ty.abiSize(self.target.*)), 2); try self.asmCmovccRegisterRegister( - registerAlias(dst_reg, dst_abi_size), - registerAlias(width_mcv.register, dst_abi_size), + registerAlias(dst_reg, cmov_abi_size), + registerAlias(width_mcv.register, cmov_abi_size), .z, ); @@ -3162,10 +3162,10 @@ fn airCtz(self: *Self, inst: Air.Inst.Index) !void { const width_reg = try self.copyToTmpRegister(dst_ty, .{ .immediate = src_bits }); try self.genBinOpMir(.bsf, src_ty, dst_mcv, mat_src_mcv); - const abi_size = @max(@intCast(u32, dst_ty.abiSize(self.target.*)), 2); + const cmov_abi_size = @max(@intCast(u32, dst_ty.abiSize(self.target.*)), 2); try self.asmCmovccRegisterRegister( - registerAlias(dst_reg, abi_size), - registerAlias(width_reg, abi_size), + registerAlias(dst_reg, cmov_abi_size), + registerAlias(width_reg, cmov_abi_size), .z, ); break :result dst_mcv; @@ -4766,7 +4766,7 @@ fn genBinOp( }, }; - const abi_size = @intCast(u32, lhs_ty.abiSize(self.target.*)); + const cmov_abi_size = @max(@intCast(u32, lhs_ty.abiSize(self.target.*)), 2); const tmp_reg = switch (dst_mcv) { .register => |reg| reg, else => try self.copyToTmpRegister(lhs_ty, dst_mcv), @@ -4784,13 +4784,13 @@ fn genBinOp( .ptr_stack_offset, => unreachable, .register => |src_reg| try self.asmCmovccRegisterRegister( - registerAlias(tmp_reg, abi_size), - registerAlias(src_reg, abi_size), + registerAlias(tmp_reg, cmov_abi_size), + registerAlias(src_reg, cmov_abi_size), cc, ), .stack_offset => |off| try self.asmCmovccRegisterMemory( - registerAlias(tmp_reg, abi_size), - Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ + registerAlias(tmp_reg, cmov_abi_size), + Memory.sib(Memory.PtrSize.fromSize(cmov_abi_size), .{ .base = .rbp, .disp = -off, }), @@ -4803,8 +4803,8 @@ fn genBinOp( try self.loadMemPtrIntoRegister(addr_reg, Type.usize, mat_src_mcv); try self.asmCmovccRegisterMemory( - registerAlias(tmp_reg, abi_size), - Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ .base = addr_reg }), + registerAlias(tmp_reg, cmov_abi_size), + Memory.sib(Memory.PtrSize.fromSize(cmov_abi_size), .{ .base = addr_reg }), cc, ); }, @@ -7727,7 +7727,6 @@ fn airCmpxchg(self: *Self, inst: Air.Inst.Index) !void { fn atomicOp( self: *Self, - dst_reg: Register, ptr_mcv: MCValue, val_mcv: MCValue, ptr_ty: Type, @@ -7735,11 +7734,7 @@ fn atomicOp( unused: bool, rmw_op: ?std.builtin.AtomicRmwOp, order: std.builtin.AtomicOrder, -) InnerError!void { - const dst_mcv = MCValue{ .register = dst_reg }; - const dst_lock = self.register_manager.lockReg(dst_reg); - defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); - +) InnerError!MCValue { const ptr_lock = switch (ptr_mcv) { .register => |reg| self.register_manager.lockReg(reg), else => null, @@ -7794,9 +7789,14 @@ fn atomicOp( .SeqCst => .xchg, }; + const dst_reg = try self.register_manager.allocReg(null, gp); + const dst_mcv = MCValue{ .register = dst_reg }; + const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg); + defer self.register_manager.unlockReg(dst_lock); + try self.genSetReg(val_ty, dst_reg, val_mcv); if (rmw_op == std.builtin.AtomicRmwOp.Sub and tag == .xadd) { - try self.genUnOpMir(.neg, val_ty, .{ .register = dst_reg }); + try self.genUnOpMir(.neg, val_ty, dst_mcv); } _ = try self.addInst(.{ .tag = tag, .ops = switch (tag) { .mov, .xchg => .mr_sib, @@ -7806,25 +7806,31 @@ fn atomicOp( .r = registerAlias(dst_reg, val_abi_size), .payload = try self.addExtra(Mir.MemorySib.encode(ptr_mem)), } } }); + + return if (unused) .none else dst_mcv; }, - .loop => _ = try self.asmJccReloc(if (val_abi_size <= 8) loop: { - try self.genSetReg(val_ty, dst_reg, val_mcv); + .loop => _ = if (val_abi_size <= 8) { + const tmp_reg = try self.register_manager.allocReg(null, gp); + const tmp_mcv = MCValue{ .register = tmp_reg }; + const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); + defer self.register_manager.unlockReg(tmp_lock); + try self.asmRegisterMemory(.mov, registerAlias(.rax, val_abi_size), ptr_mem); const loop = @intCast(u32, self.mir_instructions.len); if (rmw_op != std.builtin.AtomicRmwOp.Xchg) { - try self.genSetReg(val_ty, dst_reg, .{ .register = .rax }); + try self.genSetReg(val_ty, tmp_reg, .{ .register = .rax }); } if (rmw_op) |op| switch (op) { - .Xchg => try self.genSetReg(val_ty, dst_reg, val_mcv), - .Add => try self.genBinOpMir(.add, val_ty, dst_mcv, val_mcv), - .Sub => try self.genBinOpMir(.sub, val_ty, dst_mcv, val_mcv), - .And => try self.genBinOpMir(.@"and", val_ty, dst_mcv, val_mcv), + .Xchg => try self.genSetReg(val_ty, tmp_reg, val_mcv), + .Add => try self.genBinOpMir(.add, val_ty, tmp_mcv, val_mcv), + .Sub => try self.genBinOpMir(.sub, val_ty, tmp_mcv, val_mcv), + .And => try self.genBinOpMir(.@"and", val_ty, tmp_mcv, val_mcv), .Nand => { - try self.genBinOpMir(.@"and", val_ty, dst_mcv, val_mcv); - try self.genUnOpMir(.not, val_ty, dst_mcv); + try self.genBinOpMir(.@"and", val_ty, tmp_mcv, val_mcv); + try self.genUnOpMir(.not, val_ty, tmp_mcv); }, - .Or => try self.genBinOpMir(.@"or", val_ty, dst_mcv, val_mcv), - .Xor => try self.genBinOpMir(.xor, val_ty, dst_mcv, val_mcv), + .Or => try self.genBinOpMir(.@"or", val_ty, tmp_mcv, val_mcv), + .Xor => try self.genBinOpMir(.xor, val_ty, tmp_mcv, val_mcv), .Min, .Max => { const cc: Condition = switch (if (val_ty.isAbiInt()) val_ty.intInfo(self.target.*).signedness @@ -7842,17 +7848,18 @@ fn atomicOp( }, }; - try self.genBinOpMir(.cmp, val_ty, dst_mcv, val_mcv); + try self.genBinOpMir(.cmp, val_ty, tmp_mcv, val_mcv); + const cmov_abi_size = @max(val_abi_size, 2); switch (val_mcv) { .register => |val_reg| try self.asmCmovccRegisterRegister( - registerAlias(dst_reg, val_abi_size), - registerAlias(val_reg, val_abi_size), + registerAlias(tmp_reg, cmov_abi_size), + registerAlias(val_reg, cmov_abi_size), cc, ), .stack_offset => |val_off| try self.asmCmovccRegisterMemory( - registerAlias(dst_reg, val_abi_size), + registerAlias(tmp_reg, cmov_abi_size), Memory.sib( - Memory.PtrSize.fromSize(val_abi_size), + Memory.PtrSize.fromSize(cmov_abi_size), .{ .base = .rbp, .disp = -val_off }, ), cc, @@ -7860,8 +7867,8 @@ fn atomicOp( else => { const val_reg = try self.copyToTmpRegister(val_ty, val_mcv); try self.asmCmovccRegisterRegister( - registerAlias(dst_reg, val_abi_size), - registerAlias(val_reg, val_abi_size), + registerAlias(tmp_reg, cmov_abi_size), + registerAlias(val_reg, cmov_abi_size), cc, ); }, @@ -7869,11 +7876,12 @@ fn atomicOp( }, }; _ = try self.addInst(.{ .tag = .cmpxchg, .ops = .lock_mr_sib, .data = .{ .rx = .{ - .r = registerAlias(dst_reg, val_abi_size), + .r = registerAlias(tmp_reg, val_abi_size), .payload = try self.addExtra(Mir.MemorySib.encode(ptr_mem)), } } }); - break :loop loop; - } else loop: { + _ = try self.asmJccReloc(loop, .ne); + return if (unused) .none else .{ .register = .rax }; + } else { try self.asmRegisterMemory(.mov, .rax, Memory.sib(.qword, .{ .base = ptr_mem.sib.base, .scale_index = ptr_mem.sib.scale_index, @@ -7939,8 +7947,22 @@ fn atomicOp( _ = try self.addInst(.{ .tag = .cmpxchgb, .ops = .lock_m_sib, .data = .{ .payload = try self.addExtra(Mir.MemorySib.encode(ptr_mem)), } }); - break :loop loop; - }, .ne), + _ = try self.asmJccReloc(loop, .ne); + + if (unused) return .none; + const dst_mcv = try self.allocTempRegOrMem(val_ty, false); + try self.asmMemoryRegister( + .mov, + Memory.sib(.qword, .{ .base = .rbp, .disp = 0 - dst_mcv.stack_offset }), + .rax, + ); + try self.asmMemoryRegister( + .mov, + Memory.sib(.qword, .{ .base = .rbp, .disp = 8 - dst_mcv.stack_offset }), + .rdx, + ); + return dst_mcv; + }, .libcall => return self.fail("TODO implement x86 atomic libcall", .{}), } } @@ -7954,7 +7976,6 @@ fn airAtomicRmw(self: *Self, inst: Air.Inst.Index) !void { defer for (regs_lock) |lock| self.register_manager.unlockReg(lock); const unused = self.liveness.isUnused(inst); - const dst_reg = try self.register_manager.allocReg(if (unused) null else inst, gp); const ptr_ty = self.air.typeOf(pl_op.operand); const ptr_mcv = try self.resolveInst(pl_op.operand); @@ -7962,8 +7983,8 @@ fn airAtomicRmw(self: *Self, inst: Air.Inst.Index) !void { const val_ty = self.air.typeOf(extra.operand); const val_mcv = try self.resolveInst(extra.operand); - try self.atomicOp(dst_reg, ptr_mcv, val_mcv, ptr_ty, val_ty, unused, extra.op(), extra.ordering()); - const result: MCValue = if (unused) .dead else .{ .register = dst_reg }; + const result = + try self.atomicOp(ptr_mcv, val_mcv, ptr_ty, val_ty, unused, extra.op(), extra.ordering()); return self.finishAir(inst, result, .{ pl_op.operand, extra.operand, .none }); } @@ -7996,16 +8017,14 @@ fn airAtomicLoad(self: *Self, inst: Air.Inst.Index) !void { fn airAtomicStore(self: *Self, inst: Air.Inst.Index, order: std.builtin.AtomicOrder) !void { const bin_op = self.air.instructions.items(.data)[inst].bin_op; - const dst_reg = try self.register_manager.allocReg(null, gp); - const ptr_ty = self.air.typeOf(bin_op.lhs); const ptr_mcv = try self.resolveInst(bin_op.lhs); const val_ty = self.air.typeOf(bin_op.rhs); const val_mcv = try self.resolveInst(bin_op.rhs); - try self.atomicOp(dst_reg, ptr_mcv, val_mcv, ptr_ty, val_ty, true, null, order); - return self.finishAir(inst, .none, .{ bin_op.lhs, bin_op.rhs, .none }); + const result = try self.atomicOp(ptr_mcv, val_mcv, ptr_ty, val_ty, true, null, order); + return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none }); } fn airMemset(self: *Self, inst: Air.Inst.Index) !void { diff --git a/test/behavior/atomics.zig b/test/behavior/atomics.zig index e6000cd848..04dd2661bb 100644 --- a/test/behavior/atomics.zig +++ b/test/behavior/atomics.zig @@ -232,7 +232,6 @@ fn testAtomicRmwFloat() !void { test "atomicrmw with ints" { if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO diff --git a/test/behavior/pointers.zig b/test/behavior/pointers.zig index 626a1a7eb6..a03ad66bd2 100644 --- a/test/behavior/pointers.zig +++ b/test/behavior/pointers.zig @@ -506,6 +506,7 @@ test "ptrToInt on a generic function" { if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO + if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; const S = struct { fn generic(i: anytype) @TypeOf(i) { From caa3d6a4f4413c1cace517b073476780168f24cf Mon Sep 17 00:00:00 2001 From: Jacob Young Date: Fri, 7 Apr 2023 00:45:14 -0400 Subject: [PATCH 3/4] x86_64: fix constant pointers to zero-bit types These non-dereferencable pointers still need to have the correct alignment and non-null-ness. --- src/arch/x86_64/CodeGen.zig | 15 ++++++++++++--- src/codegen.zig | 14 +++++++++----- test/behavior/pointers.zig | 1 - 3 files changed, 21 insertions(+), 9 deletions(-) diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index 69c3a37111..63ba74da1c 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -1080,7 +1080,7 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { .constant => unreachable, // excluded from function bodies .const_ty => unreachable, // excluded from function bodies - .unreach => self.finishAirBookkeeping(), + .unreach => if (self.wantSafety()) try self.airTrap() else self.finishAirBookkeeping(), .optional_payload => try self.airOptionalPayload(inst), .optional_payload_ptr => try self.airOptionalPayloadPtr(inst), @@ -6273,6 +6273,15 @@ fn airBlock(self: *Self, inst: Air.Inst.Index) !void { const block_data = self.blocks.getPtr(inst).?; const target_branch = self.branch_stack.pop(); + + log.debug("airBlock: %{d}", .{inst}); + log.debug("Upper branches:", .{}); + for (self.branch_stack.items) |bs| { + log.debug("{}", .{bs.fmtDebug()}); + } + log.debug("Block branch: {}", .{block_data.branch.fmtDebug()}); + log.debug("Target branch: {}", .{target_branch.fmtDebug()}); + try self.canonicaliseBranches(true, &block_data.branch, &target_branch, false, false); for (block_data.relocs.items) |reloc| try self.performReloc(reloc); @@ -6444,7 +6453,7 @@ fn canonicaliseBranches( // If integer overflow occurs, the question is: why wasn't the instruction marked dead? break :blk self.getResolvedInstValue(target_key).?.*; }; - log.debug("consolidating target_entry {d} {}=>{}", .{ target_key, target_value, canon_mcv }); + log.debug("consolidating target_entry %{d} {}=>{}", .{ target_key, target_value, canon_mcv }); // TODO handle the case where the destination stack offset / register has something // going on there. assert(!hazard_map.contains(target_value)); @@ -6466,7 +6475,7 @@ fn canonicaliseBranches( const parent_mcv = if (canon_value != .dead) self.getResolvedInstValue(canon_key).?.* else undefined; if (canon_value != .dead) { - log.debug("consolidating canon_entry {d} {}=>{}", .{ canon_key, parent_mcv, canon_value }); + log.debug("consolidating canon_entry %{d} {}=>{}", .{ canon_key, parent_mcv, canon_value }); // TODO handle the case where the destination stack offset / register has something // going on there. assert(!hazard_map.contains(parent_mcv)); diff --git a/src/codegen.zig b/src/codegen.zig index 57c33ad524..b322d336cd 100644 --- a/src/codegen.zig +++ b/src/codegen.zig @@ -966,7 +966,7 @@ fn genDeclRef( const module = bin_file.options.module.?; const decl = module.declPtr(decl_index); - if (decl.ty.zigTypeTag() != .Fn and !decl.ty.hasRuntimeBitsIgnoreComptime()) { + if (!decl.ty.isFnOrHasRuntimeBitsIgnoreComptime()) { const imm: u64 = switch (ptr_bytes) { 1 => 0xaa, 2 => 0xaaaa, @@ -978,10 +978,14 @@ fn genDeclRef( } // TODO this feels clunky. Perhaps we should check for it in `genTypedValue`? - if (tv.ty.zigTypeTag() == .Pointer) blk: { - if (tv.ty.castPtrToFn()) |_| break :blk; - if (!tv.ty.elemType2().hasRuntimeBits()) { - return GenResult.mcv(.none); + if (tv.ty.castPtrToFn()) |fn_ty| { + if (fn_ty.fnInfo().is_generic) { + return GenResult.mcv(.{ .immediate = fn_ty.abiAlignment(target) }); + } + } else if (tv.ty.zigTypeTag() == .Pointer) { + const elem_ty = tv.ty.elemType2(); + if (!elem_ty.hasRuntimeBits()) { + return GenResult.mcv(.{ .immediate = elem_ty.abiAlignment(target) }); } } diff --git a/test/behavior/pointers.zig b/test/behavior/pointers.zig index a03ad66bd2..626a1a7eb6 100644 --- a/test/behavior/pointers.zig +++ b/test/behavior/pointers.zig @@ -506,7 +506,6 @@ test "ptrToInt on a generic function" { if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; const S = struct { fn generic(i: anytype) @TypeOf(i) { From e72c41a32af41c15f4f73b46c902c01ed838462f Mon Sep 17 00:00:00 2001 From: Jacob Young Date: Thu, 13 Apr 2023 04:06:17 -0400 Subject: [PATCH 4/4] x86_64: fix clz miscompile --- src/arch/x86_64/CodeGen.zig | 42 ++++++++++++++++++++++++++----------- 1 file changed, 30 insertions(+), 12 deletions(-) diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index 63ba74da1c..7344af9673 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -3083,7 +3083,7 @@ fn airClz(self: *Self, inst: Air.Inst.Index) !void { }; defer if (mat_src_lock) |lock| self.register_manager.unlockReg(lock); - const dst_reg = try self.register_manager.allocReg(null, gp); + const dst_reg = try self.register_manager.allocReg(inst, gp); const dst_mcv = MCValue{ .register = dst_reg }; const dst_lock = self.register_manager.lockReg(dst_reg); defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); @@ -3098,19 +3098,37 @@ fn airClz(self: *Self, inst: Air.Inst.Index) !void { } const src_bits = src_ty.bitSize(self.target.*); - const width_mcv = - try self.copyToRegisterWithInstTracking(inst, dst_ty, .{ .immediate = src_bits }); - try self.genBinOpMir(.bsr, src_ty, dst_mcv, mat_src_mcv); + if (math.isPowerOfTwo(src_bits)) { + const imm_reg = try self.copyToTmpRegister(dst_ty, .{ + .immediate = src_bits ^ (src_bits - 1), + }); + try self.genBinOpMir(.bsf, src_ty, dst_mcv, mat_src_mcv); - const cmov_abi_size = @max(@intCast(u32, dst_ty.abiSize(self.target.*)), 2); - try self.asmCmovccRegisterRegister( - registerAlias(dst_reg, cmov_abi_size), - registerAlias(width_mcv.register, cmov_abi_size), - .z, - ); + const cmov_abi_size = @max(@intCast(u32, dst_ty.abiSize(self.target.*)), 2); + try self.asmCmovccRegisterRegister( + registerAlias(dst_reg, cmov_abi_size), + registerAlias(imm_reg, cmov_abi_size), + .z, + ); - try self.genBinOpMir(.sub, dst_ty, width_mcv, dst_mcv); - break :result width_mcv; + try self.genBinOpMir(.xor, dst_ty, dst_mcv, .{ .immediate = src_bits - 1 }); + } else { + const imm_reg = try self.copyToTmpRegister(dst_ty, .{ + .immediate = @as(u64, math.maxInt(u64)) >> @intCast(u6, 64 - self.regBitSize(dst_ty)), + }); + try self.genBinOpMir(.bsf, src_ty, dst_mcv, mat_src_mcv); + + const cmov_abi_size = @max(@intCast(u32, dst_ty.abiSize(self.target.*)), 2); + try self.asmCmovccRegisterRegister( + registerAlias(imm_reg, cmov_abi_size), + registerAlias(dst_reg, cmov_abi_size), + .nz, + ); + + try self.genSetReg(dst_ty, dst_reg, .{ .immediate = src_bits - 1 }); + try self.genBinOpMir(.sub, dst_ty, dst_mcv, .{ .register = imm_reg }); + } + break :result dst_mcv; }; return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); }