From 1a261917ce41efb49fe41ea0c6d9083212c17797 Mon Sep 17 00:00:00 2001 From: Jacob Young Date: Thu, 4 May 2023 03:36:04 -0400 Subject: [PATCH] x86_64: implement `@ctz` and `@clz` for `u128` --- src/arch/x86_64/CodeGen.zig | 93 ++++++++++++++++++++++++++++--------- test/behavior/bugs/2114.zig | 3 +- test/behavior/math.zig | 3 +- 3 files changed, 74 insertions(+), 25 deletions(-) diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index fbed0bcf96..55b18985da 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -3798,19 +3798,38 @@ fn airClz(self: *Self, inst: Air.Inst.Index) !void { const dst_reg = try self.register_manager.allocReg(inst, gp); const dst_mcv = MCValue{ .register = dst_reg }; - const dst_lock = self.register_manager.lockReg(dst_reg); - defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); + const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg); + defer self.register_manager.unlockReg(dst_lock); + const src_bits = src_ty.bitSize(self.target.*); if (Target.x86.featureSetHas(self.target.cpu.features, .lzcnt)) { - try self.genBinOpMir(.lzcnt, src_ty, dst_mcv, mat_src_mcv); - const extra_bits = self.regExtraBits(src_ty); - if (extra_bits > 0) { - try self.genBinOpMir(.sub, dst_ty, dst_mcv, .{ .immediate = extra_bits }); - } + if (src_bits <= 64) { + try self.genBinOpMir(.lzcnt, src_ty, dst_mcv, mat_src_mcv); + + const extra_bits = self.regExtraBits(src_ty); + if (extra_bits > 0) { + try self.genBinOpMir(.sub, dst_ty, dst_mcv, .{ .immediate = extra_bits }); + } + } else if (src_bits <= 128) { + const tmp_reg = try self.register_manager.allocReg(null, gp); + const tmp_mcv = MCValue{ .register = tmp_reg }; + const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); + defer self.register_manager.unlockReg(tmp_lock); + + try self.genBinOpMir(.lzcnt, Type.u64, dst_mcv, mat_src_mcv); + try self.genBinOpMir(.add, dst_ty, dst_mcv, .{ .immediate = 64 }); + try self.genBinOpMir(.lzcnt, Type.u64, tmp_mcv, mat_src_mcv.address().offset(8).deref()); + try self.asmCmovccRegisterRegister(dst_reg.to32(), tmp_reg.to32(), .nc); + + if (src_bits < 128) { + try self.genBinOpMir(.sub, dst_ty, dst_mcv, .{ .immediate = 128 - src_bits }); + } + } else return self.fail("TODO airClz of {}", .{src_ty.fmt(self.bin_file.options.module.?)}); break :result dst_mcv; } - const src_bits = src_ty.bitSize(self.target.*); + if (src_bits > 64) + return self.fail("TODO airClz of {}", .{src_ty.fmt(self.bin_file.options.module.?)}); if (math.isPowerOfTwo(src_bits)) { const imm_reg = try self.copyToTmpRegister(dst_ty, .{ .immediate = src_bits ^ (src_bits - 1), @@ -3870,24 +3889,52 @@ fn airCtz(self: *Self, inst: Air.Inst.Index) !void { defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); if (Target.x86.featureSetHas(self.target.cpu.features, .bmi)) { - const extra_bits = self.regExtraBits(src_ty); - const masked_mcv = if (extra_bits > 0) masked: { - const mask_mcv = MCValue{ - .immediate = ((@as(u64, 1) << @intCast(u6, extra_bits)) - 1) << - @intCast(u6, src_bits), - }; - const tmp_mcv = tmp: { - if (src_mcv.isImmediate() or self.liveness.operandDies(inst, 0)) break :tmp src_mcv; - try self.genSetReg(dst_reg, src_ty, src_mcv); - break :tmp dst_mcv; - }; - try self.genBinOpMir(.@"or", src_ty, tmp_mcv, mask_mcv); - break :masked tmp_mcv; - } else mat_src_mcv; - try self.genBinOpMir(.tzcnt, src_ty, dst_mcv, masked_mcv); + if (src_bits <= 64) { + const extra_bits = self.regExtraBits(src_ty); + const masked_mcv = if (extra_bits > 0) masked: { + const tmp_mcv = tmp: { + if (src_mcv.isImmediate() or self.liveness.operandDies(inst, 0)) + break :tmp src_mcv; + try self.genSetReg(dst_reg, src_ty, src_mcv); + break :tmp dst_mcv; + }; + try self.genBinOpMir( + .@"or", + src_ty, + tmp_mcv, + .{ .immediate = (@as(u64, math.maxInt(u64)) >> @intCast(u6, 64 - extra_bits)) << + @intCast(u6, src_bits) }, + ); + break :masked tmp_mcv; + } else mat_src_mcv; + try self.genBinOpMir(.tzcnt, src_ty, dst_mcv, masked_mcv); + } else if (src_bits <= 128) { + const tmp_reg = try self.register_manager.allocReg(null, gp); + const tmp_mcv = MCValue{ .register = tmp_reg }; + const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); + defer self.register_manager.unlockReg(tmp_lock); + + const masked_mcv = if (src_bits < 128) masked: { + try self.genCopy(Type.u64, dst_mcv, mat_src_mcv.address().offset(8).deref()); + try self.genBinOpMir( + .@"or", + Type.u64, + dst_mcv, + .{ .immediate = @as(u64, math.maxInt(u64)) << @intCast(u6, src_bits - 64) }, + ); + break :masked dst_mcv; + } else mat_src_mcv.address().offset(8).deref(); + try self.genBinOpMir(.tzcnt, Type.u64, dst_mcv, masked_mcv); + try self.genBinOpMir(.add, dst_ty, dst_mcv, .{ .immediate = 64 }); + try self.genBinOpMir(.tzcnt, Type.u64, tmp_mcv, mat_src_mcv); + try self.asmCmovccRegisterRegister(dst_reg.to32(), tmp_reg.to32(), .nc); + } else return self.fail("TODO airCtz of {}", .{src_ty.fmt(self.bin_file.options.module.?)}); break :result dst_mcv; } + if (src_bits > 64) + return self.fail("TODO airCtz of {}", .{src_ty.fmt(self.bin_file.options.module.?)}); + const width_reg = try self.copyToTmpRegister(dst_ty, .{ .immediate = src_bits }); try self.genBinOpMir(.bsf, src_ty, dst_mcv, mat_src_mcv); diff --git a/test/behavior/bugs/2114.zig b/test/behavior/bugs/2114.zig index f92728eff6..3ad4a97b80 100644 --- a/test/behavior/bugs/2114.zig +++ b/test/behavior/bugs/2114.zig @@ -9,7 +9,8 @@ fn ctz(x: anytype) usize { test "fixed" { if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO + if (builtin.zig_backend == .stage2_x86_64 and + !comptime std.Target.x86.featureSetHas(builtin.cpu.features, .bmi)) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO diff --git a/test/behavior/math.zig b/test/behavior/math.zig index f9c9f43927..0362bd3a2b 100644 --- a/test/behavior/math.zig +++ b/test/behavior/math.zig @@ -77,7 +77,8 @@ fn testClz() !void { } test "@clz big ints" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO + if (builtin.zig_backend == .stage2_x86_64 and + !comptime std.Target.x86.featureSetHas(builtin.cpu.features, .lzcnt)) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO