x86_64: implement @abs for some integer vector types

This commit is contained in:
Jacob Young 2023-10-07 22:04:21 -04:00
parent 24d76500d2
commit b8f00ae337
4 changed files with 112 additions and 34 deletions

View File

@ -5385,46 +5385,104 @@ fn airAbs(self: *Self, inst: Air.Inst.Index) !void {
const mod = self.bin_file.options.module.?;
const ty_op = self.air.instructions.items(.data)[inst].ty_op;
const ty = self.typeOf(ty_op.operand);
const scalar_ty = ty.scalarType(mod);
switch (scalar_ty.zigTypeTag(mod)) {
.Int => if (ty.zigTypeTag(mod) == .Vector) {
return self.fail("TODO implement airAbs for {}", .{ty.fmt(mod)});
} else {
if (ty.abiSize(mod) > 8) {
return self.fail("TODO implement abs for integer abi sizes larger than 8", .{});
}
const src_mcv = try self.resolveInst(ty_op.operand);
const dst_mcv = try self.copyToRegisterWithInstTracking(inst, ty, src_mcv);
const result: MCValue = result: {
const mir_tag = @as(?Mir.Inst.FixedTag, switch (ty.zigTypeTag(mod)) {
else => null,
.Int => {
if (ty.abiSize(mod) > 8) {
return self.fail("TODO implement abs for integer abi sizes larger than 8", .{});
}
const src_mcv = try self.resolveInst(ty_op.operand);
const dst_mcv = try self.copyToRegisterWithInstTracking(inst, ty, src_mcv);
try self.genUnOpMir(.{ ._, .neg }, ty, dst_mcv);
try self.genUnOpMir(.{ ._, .neg }, ty, dst_mcv);
const cmov_abi_size = @max(@as(u32, @intCast(ty.abiSize(mod))), 2);
switch (src_mcv) {
.register => |val_reg| try self.asmCmovccRegisterRegister(
registerAlias(dst_mcv.register, cmov_abi_size),
registerAlias(val_reg, cmov_abi_size),
.l,
),
.memory, .indirect, .load_frame => try self.asmCmovccRegisterMemory(
registerAlias(dst_mcv.register, cmov_abi_size),
src_mcv.mem(Memory.PtrSize.fromSize(cmov_abi_size)),
.l,
),
else => {
const val_reg = try self.copyToTmpRegister(ty, src_mcv);
try self.asmCmovccRegisterRegister(
const cmov_abi_size = @max(@as(u32, @intCast(ty.abiSize(mod))), 2);
switch (src_mcv) {
.register => |val_reg| try self.asmCmovccRegisterRegister(
registerAlias(dst_mcv.register, cmov_abi_size),
registerAlias(val_reg, cmov_abi_size),
.l,
);
),
.memory, .indirect, .load_frame => try self.asmCmovccRegisterMemory(
registerAlias(dst_mcv.register, cmov_abi_size),
src_mcv.mem(Memory.PtrSize.fromSize(cmov_abi_size)),
.l,
),
else => {
const val_reg = try self.copyToTmpRegister(ty, src_mcv);
try self.asmCmovccRegisterRegister(
registerAlias(dst_mcv.register, cmov_abi_size),
registerAlias(val_reg, cmov_abi_size),
.l,
);
},
}
break :result dst_mcv;
},
.Float => return self.floatSign(inst, ty_op.operand, ty),
.Vector => switch (ty.childType(mod).zigTypeTag(mod)) {
else => null,
.Int => switch (ty.childType(mod).intInfo(mod).bits) {
else => null,
8 => switch (ty.vectorLen(mod)) {
else => null,
1...16 => if (self.hasFeature(.avx))
.{ .vp_b, .abs }
else if (self.hasFeature(.ssse3))
.{ .p_b, .abs }
else
null,
17...32 => if (self.hasFeature(.avx2)) .{ .vp_b, .abs } else null,
},
16 => switch (ty.vectorLen(mod)) {
else => null,
1...8 => if (self.hasFeature(.avx))
.{ .vp_w, .abs }
else if (self.hasFeature(.ssse3))
.{ .p_w, .abs }
else
null,
9...16 => if (self.hasFeature(.avx2)) .{ .vp_w, .abs } else null,
},
32 => switch (ty.vectorLen(mod)) {
else => null,
1...4 => if (self.hasFeature(.avx))
.{ .vp_d, .abs }
else if (self.hasFeature(.ssse3))
.{ .p_d, .abs }
else
null,
5...8 => if (self.hasFeature(.avx2)) .{ .vp_d, .abs } else null,
},
},
}
return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none });
},
.Float => return self.floatSign(inst, ty_op.operand, ty),
else => unreachable,
}
.Float => return self.floatSign(inst, ty_op.operand, ty),
},
}) orelse return self.fail("TODO implement airAbs for {}", .{ty.fmt(mod)});
const abi_size: u32 = @intCast(ty.abiSize(mod));
const src_mcv = try self.resolveInst(ty_op.operand);
const dst_reg = if (src_mcv.isRegister() and self.reuseOperand(inst, ty_op.operand, 0, src_mcv))
src_mcv.getReg().?
else
try self.register_manager.allocReg(inst, self.regClassForType(ty));
const dst_alias = registerAlias(dst_reg, abi_size);
if (src_mcv.isMemory()) try self.asmRegisterMemory(
mir_tag,
dst_alias,
src_mcv.mem(self.memPtrSize(ty)),
) else try self.asmRegisterRegister(
mir_tag,
dst_alias,
registerAlias(if (src_mcv.isRegister())
src_mcv.getReg().?
else
try self.copyToTmpRegister(ty, src_mcv), abi_size),
);
break :result .{ .register = dst_reg };
};
return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
}
fn airSqrt(self: *Self, inst: Air.Inst.Index) !void {

View File

@ -316,6 +316,8 @@ pub const Mnemonic = enum {
xorpd,
// SSE3
movddup, movshdup, movsldup,
// SSSE3
pabsb, pabsd, pabsw,
// SSE4.1
blendpd, blendps, blendvpd, blendvps,
extractps,
@ -353,6 +355,7 @@ pub const Mnemonic = enum {
vmovupd, vmovups,
vmulpd, vmulps, vmulsd, vmulss,
vorpd, vorps,
vpabsb, vpabsd, vpabsw,
vpackssdw, vpacksswb, vpackusdw, vpackuswb,
vpaddb, vpaddd, vpaddq, vpaddsb, vpaddsw, vpaddusb, vpaddusw, vpaddw,
vpand, vpandn,
@ -750,6 +753,7 @@ pub const Feature = enum {
sse2,
sse3,
sse4_1,
ssse3,
x87,
};

View File

@ -1108,6 +1108,14 @@ pub const table = [_]Entry{
.{ .movsldup, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0xf3, 0x0f, 0x12 }, 0, .none, .sse3 },
// SSSE3
.{ .pabsb, .rm, &.{ .mm, .mm_m64 }, &.{ 0x0f, 0x38, 0x1c }, 0, .none, .ssse3 },
.{ .pabsb, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x1c }, 0, .none, .ssse3 },
.{ .pabsd, .rm, &.{ .mm, .mm_m64 }, &.{ 0x0f, 0x38, 0x1e }, 0, .none, .ssse3 },
.{ .pabsd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x1e }, 0, .none, .ssse3 },
.{ .pabsw, .rm, &.{ .mm, .mm_m64 }, &.{ 0x0f, 0x38, 0x1d }, 0, .none, .ssse3 },
.{ .pabsw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x1d }, 0, .none, .ssse3 },
// SSE4.1
.{ .blendpd, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0d }, 0, .none, .sse4_1 },
@ -1368,6 +1376,10 @@ pub const table = [_]Entry{
.{ .vorps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x0f, 0x56 }, 0, .vex_128_wig, .avx },
.{ .vorps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x0f, 0x56 }, 0, .vex_256_wig, .avx },
.{ .vpabsb, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x1c }, 0, .vex_128_wig, .avx },
.{ .vpabsd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x1e }, 0, .vex_128_wig, .avx },
.{ .vpabsw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x1d }, 0, .vex_128_wig, .avx },
.{ .vpacksswb, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x63 }, 0, .vex_128_wig, .avx },
.{ .vpackssdw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x6b }, 0, .vex_128_wig, .avx },
@ -1537,6 +1549,10 @@ pub const table = [_]Entry{
.{ .vbroadcastss, .rm, &.{ .ymm, .xmm }, &.{ 0x66, 0x0f, 0x38, 0x18 }, 0, .vex_256_w0, .avx2 },
.{ .vbroadcastsd, .rm, &.{ .ymm, .xmm }, &.{ 0x66, 0x0f, 0x38, 0x19 }, 0, .vex_256_w0, .avx2 },
.{ .vpabsb, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x1c }, 0, .vex_256_wig, .avx2 },
.{ .vpabsd, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x1e }, 0, .vex_256_wig, .avx2 },
.{ .vpabsw, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x1d }, 0, .vex_256_wig, .avx2 },
.{ .vpacksswb, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x63 }, 0, .vex_256_wig, .avx2 },
.{ .vpackssdw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x6b }, 0, .vex_256_wig, .avx2 },

View File

@ -280,7 +280,7 @@ test "@abs float vectors" {
try testAbsFloatVectors(f16, 16);
try comptime testAbsFloatVectors(f16, 17);
try testAbsFloatVectors(f32, 17);
try testAbsFloatVectors(f32, 1);
try comptime testAbsFloatVectors(f32, 1);
try testAbsFloatVectors(f32, 1);
try comptime testAbsFloatVectors(f32, 2);