mirror of
https://github.com/ziglang/zig.git
synced 2026-01-03 03:53:20 +00:00
x86_64: implement @popCount for older processors
This fixes the behavior tests when compiled for baseline.
This commit is contained in:
parent
f95faac5ae
commit
958c8e1ce9
@ -2771,29 +2771,112 @@ fn airCtz(self: *Self, inst: Air.Inst.Index) !void {
|
||||
|
||||
fn airPopcount(self: *Self, inst: Air.Inst.Index) !void {
|
||||
const ty_op = self.air.instructions.items(.data)[inst].ty_op;
|
||||
const result = result: {
|
||||
const result: MCValue = result: {
|
||||
if (self.liveness.isUnused(inst)) break :result .dead;
|
||||
|
||||
const op_ty = self.air.typeOf(ty_op.operand);
|
||||
const src_ty = self.air.typeOf(ty_op.operand);
|
||||
const src_abi_size = @intCast(u32, src_ty.abiSize(self.target.*));
|
||||
const src_mcv = try self.resolveInst(ty_op.operand);
|
||||
|
||||
if (Target.x86.featureSetHas(self.target.cpu.features, .popcnt)) {
|
||||
const op_mcv = try self.resolveInst(ty_op.operand);
|
||||
const mat_op_mcv = switch (op_mcv) {
|
||||
.immediate => MCValue{ .register = try self.copyToTmpRegister(op_ty, op_mcv) },
|
||||
else => op_mcv,
|
||||
const mat_src_mcv = switch (src_mcv) {
|
||||
.immediate => MCValue{ .register = try self.copyToTmpRegister(src_ty, src_mcv) },
|
||||
else => src_mcv,
|
||||
};
|
||||
const mat_op_lock = switch (mat_op_mcv) {
|
||||
const mat_src_lock = switch (mat_src_mcv) {
|
||||
.register => |reg| self.register_manager.lockReg(reg),
|
||||
else => null,
|
||||
};
|
||||
defer if (mat_op_lock) |lock| self.register_manager.unlockReg(lock);
|
||||
defer if (mat_src_lock) |lock| self.register_manager.unlockReg(lock);
|
||||
|
||||
const dst_mcv = MCValue{ .register = try self.register_manager.allocReg(inst, gp) };
|
||||
try self.genBinOpMir(.popcnt, op_ty, dst_mcv, mat_op_mcv);
|
||||
const dst_mcv: MCValue = if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv))
|
||||
src_mcv
|
||||
else
|
||||
.{ .register = try self.register_manager.allocReg(inst, gp) };
|
||||
|
||||
const popcnt_ty = if (src_abi_size > 1) src_ty else Type.u16;
|
||||
try self.genBinOpMir(.popcnt, popcnt_ty, dst_mcv, mat_src_mcv);
|
||||
break :result dst_mcv;
|
||||
}
|
||||
|
||||
return self.fail("TODO implement airPopcount for {}", .{op_ty.fmt(self.bin_file.options.module.?)});
|
||||
const mask = @as(u64, math.maxInt(u64)) >> @intCast(u6, 64 - src_abi_size * 8);
|
||||
const imm_0_1 = Immediate.u(mask / 0b1_1);
|
||||
const imm_00_11 = Immediate.u(mask / 0b01_01);
|
||||
const imm_0000_1111 = Immediate.u(mask / 0b0001_0001);
|
||||
const imm_0000_0001 = Immediate.u(mask / 0b1111_1111);
|
||||
|
||||
const tmp_reg = if (src_mcv.isRegister() and self.reuseOperand(inst, ty_op.operand, 0, src_mcv))
|
||||
src_mcv.register
|
||||
else
|
||||
try self.copyToTmpRegister(src_ty, src_mcv);
|
||||
const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
|
||||
defer self.register_manager.unlockReg(tmp_lock);
|
||||
|
||||
const dst_reg = try self.register_manager.allocReg(inst, gp);
|
||||
const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg);
|
||||
defer self.register_manager.unlockReg(dst_lock);
|
||||
|
||||
{
|
||||
const dst = registerAlias(dst_reg, src_abi_size);
|
||||
const tmp = registerAlias(tmp_reg, src_abi_size);
|
||||
const imm = if (src_abi_size > 4)
|
||||
try self.register_manager.allocReg(null, gp)
|
||||
else
|
||||
undefined;
|
||||
|
||||
// tmp = operand
|
||||
try self.asmRegisterRegister(.mov, dst, tmp);
|
||||
// dst = operand
|
||||
try self.asmRegisterImmediate(.shr, tmp, Immediate.u(1));
|
||||
// tmp = operand >> 1
|
||||
if (src_abi_size > 4) {
|
||||
try self.asmRegisterImmediate(.mov, imm, imm_0_1);
|
||||
try self.asmRegisterRegister(.@"and", tmp, imm);
|
||||
} else try self.asmRegisterImmediate(.@"and", tmp, imm_0_1);
|
||||
// tmp = (operand >> 1) & 0x55...55
|
||||
try self.asmRegisterRegister(.sub, dst, tmp);
|
||||
// dst = temp1 = operand - ((operand >> 1) & 0x55...55)
|
||||
try self.asmRegisterRegister(.mov, tmp, dst);
|
||||
// tmp = temp1
|
||||
try self.asmRegisterImmediate(.shr, dst, Immediate.u(2));
|
||||
// dst = temp1 >> 2
|
||||
if (src_abi_size > 4) {
|
||||
try self.asmRegisterImmediate(.mov, imm, imm_00_11);
|
||||
try self.asmRegisterRegister(.@"and", tmp, imm);
|
||||
try self.asmRegisterRegister(.@"and", dst, imm);
|
||||
} else {
|
||||
try self.asmRegisterImmediate(.@"and", tmp, imm_00_11);
|
||||
try self.asmRegisterImmediate(.@"and", dst, imm_00_11);
|
||||
}
|
||||
// tmp = temp1 & 0x33...33
|
||||
// dst = (temp1 >> 2) & 0x33...33
|
||||
try self.asmRegisterRegister(.add, tmp, dst);
|
||||
// tmp = temp2 = (temp1 & 0x33...33) + ((temp1 >> 2) & 0x33...33)
|
||||
try self.asmRegisterRegister(.mov, dst, tmp);
|
||||
// dst = temp2
|
||||
try self.asmRegisterImmediate(.shr, tmp, Immediate.u(4));
|
||||
// tmp = temp2 >> 4
|
||||
try self.asmRegisterRegister(.add, dst, tmp);
|
||||
// dst = temp2 + (temp2 >> 4)
|
||||
if (src_abi_size > 4) {
|
||||
try self.asmRegisterImmediate(.mov, imm, imm_0000_1111);
|
||||
try self.asmRegisterImmediate(.mov, tmp, imm_0000_0001);
|
||||
try self.asmRegisterRegister(.@"and", dst, imm);
|
||||
try self.asmRegisterRegister(.imul, dst, tmp);
|
||||
} else {
|
||||
try self.asmRegisterImmediate(.@"and", dst, imm_0000_1111);
|
||||
if (src_abi_size > 1) {
|
||||
try self.asmRegisterRegisterImmediate(.imul, dst, dst, imm_0000_0001);
|
||||
}
|
||||
}
|
||||
// dst = temp3 = (temp2 + (temp2 >> 4)) & 0x0f...0f
|
||||
// dst = temp3 * 0x01...01
|
||||
if (src_abi_size > 1) {
|
||||
try self.asmRegisterImmediate(.shr, dst, Immediate.u((src_abi_size - 1) * 8));
|
||||
}
|
||||
// dst = (temp3 * 0x01...01) >> (bits - 8)
|
||||
}
|
||||
break :result .{ .register = dst_reg };
|
||||
};
|
||||
return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
|
||||
}
|
||||
|
||||
@ -476,6 +476,7 @@ pub const Memory = union(enum) {
|
||||
base: ?Register = null,
|
||||
scale_index: ?ScaleIndex = null,
|
||||
}) Memory {
|
||||
if (args.scale_index) |si| assert(std.math.isPowerOfTwo(si.scale));
|
||||
return .{ .sib = .{
|
||||
.base = args.base,
|
||||
.disp = args.disp,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user