From 941d3a2bb1213b04399fe772aaf88c35d043af01 Mon Sep 17 00:00:00 2001 From: Jacob Young Date: Sun, 4 Feb 2024 21:05:00 +0100 Subject: [PATCH] x86_64: fix miscompilations on baseline --- src/arch/x86_64/CodeGen.zig | 71 ++++++++++++++++++------------------- 1 file changed, 34 insertions(+), 37 deletions(-) diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index 8e5f316aba..212a30bbf4 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -5653,10 +5653,10 @@ fn airCtz(self: *Self, inst: Air.Inst.Index) !void { const dst_lock = self.register_manager.lockReg(dst_reg); defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); + const wide_ty = if (src_bits <= 8) Type.u16 else src_ty; if (self.hasFeature(.bmi)) { if (src_bits <= 64) { const extra_bits = self.regExtraBits(src_ty) + @as(u64, if (src_bits <= 8) 8 else 0); - const wide_ty = if (src_bits <= 8) Type.u16 else src_ty; const masked_mcv = if (extra_bits > 0) masked: { const tmp_mcv = tmp: { if (src_mcv.isImmediate() or self.liveness.operandDies(inst, 0)) @@ -5718,7 +5718,7 @@ fn airCtz(self: *Self, inst: Air.Inst.Index) !void { defer self.register_manager.unlockReg(wide_lock); try self.truncateRegister(src_ty, wide_reg); - try self.genBinOpMir(.{ ._, .bsf }, Type.u16, dst_mcv, .{ .register = wide_reg }); + try self.genBinOpMir(.{ ._, .bsf }, wide_ty, dst_mcv, .{ .register = wide_reg }); } else try self.genBinOpMir(.{ ._, .bsf }, src_ty, dst_mcv, mat_src_mcv); const cmov_abi_size = @max(@as(u32, @intCast(dst_ty.abiSize(mod))), 2); @@ -5890,6 +5890,7 @@ fn genByteSwap( ) !MCValue { const mod = self.bin_file.comp.module.?; const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; + const have_movbe = self.hasFeature(.movbe); if (src_ty.zigTypeTag(mod) == .Vector) return self.fail( "TODO implement genByteSwap for {}", @@ -5935,46 +5936,42 @@ fn genByteSwap( const dst_locks = self.register_manager.lockRegsAssumeUnused(2, dst_regs); defer for (dst_locks) |lock| self.register_manager.unlockReg(lock); - if (src_mcv.isMemory()) { - try self.asmRegisterMemory( - .{ ._, .movbe }, - dst_regs[0], - try src_mcv.address().offset(8).deref().mem(self, .qword), - ); - try self.asmRegisterMemory(.{ ._, .movbe }, dst_regs[1], try src_mcv.mem(self, .qword)); - } else for (dst_regs, src_mcv.register_pair) |dst_reg, src_reg| { - try self.asmRegisterRegister(.{ ._, .mov }, dst_reg.to64(), src_reg.to64()); - try self.asmRegister(.{ ._, .bswap }, dst_reg.to64()); + for (dst_regs, 0..) |dst_reg, limb_index| { + if (src_mcv.isMemory()) { + try self.asmRegisterMemory( + .{ ._, if (have_movbe) .movbe else .mov }, + dst_reg.to64(), + try src_mcv.address().offset(@intCast(limb_index * 8)).deref().mem(self, .qword), + ); + if (!have_movbe) try self.asmRegister(.{ ._, .bswap }, dst_reg.to64()); + } else { + try self.asmRegisterRegister( + .{ ._, .mov }, + dst_reg.to64(), + src_mcv.register_pair[limb_index].to64(), + ); + try self.asmRegister(.{ ._, .bswap }, dst_reg.to64()); + } } - return .{ .register_pair = dst_regs }; + return .{ .register_pair = .{ dst_regs[1], dst_regs[0] } }; }, } - if (src_mcv.isRegister()) { - const dst_mcv: MCValue = if (mem_ok) - try self.allocRegOrMem(inst, true) - else - .{ .register = try self.register_manager.allocReg(inst, abi.RegisterClass.gp) }; - if (dst_mcv.isRegister()) { - const dst_lock = self.register_manager.lockRegAssumeUnused(dst_mcv.register); - defer self.register_manager.unlockReg(dst_lock); + const dst_mcv: MCValue = if (mem_ok and have_movbe and src_mcv.isRegister()) + try self.allocRegOrMem(inst, true) + else + .{ .register = try self.register_manager.allocReg(inst, abi.RegisterClass.gp) }; + if (dst_mcv.getReg()) |dst_reg| { + const dst_lock = self.register_manager.lockRegAssumeUnused(dst_mcv.register); + defer self.register_manager.unlockReg(dst_lock); - try self.genSetReg(dst_mcv.register, src_ty, src_mcv); - switch (abi_size) { - else => unreachable, - 2 => try self.genBinOpMir(.{ ._l, .ro }, src_ty, dst_mcv, .{ .immediate = 8 }), - 3...8 => try self.genUnOpMir(.{ ._, .bswap }, src_ty, dst_mcv), - } - } else try self.genBinOpMir(.{ ._, .movbe }, src_ty, dst_mcv, src_mcv); - return dst_mcv; - } - - const dst_reg = try self.register_manager.allocReg(inst, abi.RegisterClass.gp); - const dst_mcv = MCValue{ .register = dst_reg }; - const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg); - defer self.register_manager.unlockReg(dst_lock); - - try self.genBinOpMir(.{ ._, .movbe }, src_ty, dst_mcv, src_mcv); + try self.genSetReg(dst_reg, src_ty, src_mcv); + switch (abi_size) { + else => unreachable, + 2 => try self.genBinOpMir(.{ ._l, .ro }, src_ty, dst_mcv, .{ .immediate = 8 }), + 3...8 => try self.genUnOpMir(.{ ._, .bswap }, src_ty, dst_mcv), + } + } else try self.genBinOpMir(.{ ._, .movbe }, src_ty, dst_mcv, src_mcv); return dst_mcv; }