x86_64: implement binary operations for f16 and f16 vectors

This commit is contained in:
Jacob Young 2023-05-07 20:42:46 -04:00
parent f8708e2c4d
commit 6778da4516
7 changed files with 354 additions and 46 deletions

View File

@ -4497,14 +4497,15 @@ fn airFloatSign(self: *Self, inst: Air.Inst.Index) !void {
const tag = self.air.instructions.items(.tag)[inst];
try self.genBinOpMir(switch (ty_bits) {
// No point using an extra prefix byte for *pd which performs the same operation.
32, 64 => switch (tag) {
16, 32, 64, 128 => switch (tag) {
.neg => .xorps,
.fabs => .andnps,
else => unreachable,
},
else => return self.fail("TODO implement airFloatSign for {}", .{
80 => return self.fail("TODO implement airFloatSign for {}", .{
ty.fmt(self.bin_file.options.module.?),
}),
else => unreachable,
}, vec_ty, dst_mcv, sign_mcv);
return self.finishAir(inst, dst_mcv, .{ un_op, .none, .none });
}
@ -6112,9 +6113,53 @@ fn genBinOp(
return dst_mcv;
}
const dst_reg = registerAlias(dst_mcv.getReg().?, abi_size);
const mir_tag = if (@as(?Mir.Inst.Tag, switch (lhs_ty.zigTypeTag()) {
else => unreachable,
.Float => switch (lhs_ty.floatBits(self.target.*)) {
16 => if (self.hasFeature(.f16c)) {
const tmp_reg = (try self.register_manager.allocReg(null, sse)).to128();
const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
defer self.register_manager.unlockReg(tmp_lock);
if (src_mcv.isMemory()) try self.asmRegisterRegisterMemoryImmediate(
.vpinsrw,
dst_reg,
dst_reg,
src_mcv.mem(.word),
Immediate.u(1),
) else try self.asmRegisterRegisterRegister(
.vpunpcklwd,
dst_reg,
dst_reg,
(if (src_mcv.isRegister())
src_mcv.getReg().?
else
try self.copyToTmpRegister(rhs_ty, src_mcv)).to128(),
);
try self.asmRegisterRegister(.vcvtph2ps, dst_reg, dst_reg);
try self.asmRegisterRegister(.vmovshdup, tmp_reg, dst_reg);
try self.asmRegisterRegisterRegister(
switch (air_tag) {
.add => .vaddss,
.sub => .vsubss,
.div_float, .div_trunc, .div_floor, .div_exact => .vdivss,
.max => .vmaxss,
.min => .vmaxss,
else => unreachable,
},
dst_reg,
dst_reg,
tmp_reg,
);
try self.asmRegisterRegisterImmediate(
.vcvtps2ph,
dst_reg,
dst_reg,
Immediate.u(0b1_00),
);
return dst_mcv;
} else null,
32 => switch (air_tag) {
.add => if (self.hasFeature(.avx)) .vaddss else .addss,
.sub => if (self.hasFeature(.avx)) .vsubss else .subss,
@ -6141,12 +6186,178 @@ fn genBinOp(
.min => if (self.hasFeature(.avx)) .vminsd else .minsd,
else => unreachable,
},
16, 80, 128 => null,
80, 128 => null,
else => unreachable,
},
.Vector => switch (lhs_ty.childType().zigTypeTag()) {
else => null,
.Float => switch (lhs_ty.childType().floatBits(self.target.*)) {
16 => if (self.hasFeature(.f16c)) switch (lhs_ty.vectorLen()) {
1 => {
const tmp_reg = (try self.register_manager.allocReg(null, sse)).to128();
const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
defer self.register_manager.unlockReg(tmp_lock);
if (src_mcv.isMemory()) try self.asmRegisterRegisterMemoryImmediate(
.vpinsrw,
dst_reg,
dst_reg,
src_mcv.mem(.word),
Immediate.u(1),
) else try self.asmRegisterRegisterRegister(
.vpunpcklwd,
dst_reg,
dst_reg,
(if (src_mcv.isRegister())
src_mcv.getReg().?
else
try self.copyToTmpRegister(rhs_ty, src_mcv)).to128(),
);
try self.asmRegisterRegister(.vcvtph2ps, dst_reg, dst_reg);
try self.asmRegisterRegister(.vmovshdup, tmp_reg, dst_reg);
try self.asmRegisterRegisterRegister(
switch (air_tag) {
.add => .vaddss,
.sub => .vsubss,
.div_float, .div_trunc, .div_floor, .div_exact => .vdivss,
.max => .vmaxss,
.min => .vmaxss,
else => unreachable,
},
dst_reg,
dst_reg,
tmp_reg,
);
try self.asmRegisterRegisterImmediate(
.vcvtps2ph,
dst_reg,
dst_reg,
Immediate.u(0b1_00),
);
return dst_mcv;
},
2 => {
const tmp_reg = (try self.register_manager.allocReg(null, sse)).to128();
const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
defer self.register_manager.unlockReg(tmp_lock);
if (src_mcv.isMemory()) try self.asmRegisterMemoryImmediate(
.vpinsrd,
dst_reg,
src_mcv.mem(.dword),
Immediate.u(1),
) else try self.asmRegisterRegisterRegister(
.vunpcklps,
dst_reg,
dst_reg,
(if (src_mcv.isRegister())
src_mcv.getReg().?
else
try self.copyToTmpRegister(rhs_ty, src_mcv)).to128(),
);
try self.asmRegisterRegister(.vcvtph2ps, dst_reg, dst_reg);
try self.asmRegisterRegisterRegister(.vmovhlps, tmp_reg, dst_reg, dst_reg);
try self.asmRegisterRegisterRegister(
switch (air_tag) {
.add => .vaddps,
.sub => .vsubps,
.div_float, .div_trunc, .div_floor, .div_exact => .vdivps,
.max => .vmaxps,
.min => .vmaxps,
else => unreachable,
},
dst_reg,
dst_reg,
tmp_reg,
);
try self.asmRegisterRegisterImmediate(
.vcvtps2ph,
dst_reg,
dst_reg,
Immediate.u(0b1_00),
);
return dst_mcv;
},
3...4 => {
const tmp_reg = (try self.register_manager.allocReg(null, sse)).to128();
const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
defer self.register_manager.unlockReg(tmp_lock);
try self.asmRegisterRegister(.vcvtph2ps, dst_reg, dst_reg);
if (src_mcv.isMemory()) try self.asmRegisterMemory(
.vcvtph2ps,
tmp_reg,
src_mcv.mem(.qword),
) else try self.asmRegisterRegister(
.vcvtph2ps,
tmp_reg,
(if (src_mcv.isRegister())
src_mcv.getReg().?
else
try self.copyToTmpRegister(rhs_ty, src_mcv)).to128(),
);
try self.asmRegisterRegisterRegister(
switch (air_tag) {
.add => .vaddps,
.sub => .vsubps,
.div_float, .div_trunc, .div_floor, .div_exact => .vdivps,
.max => .vmaxps,
.min => .vmaxps,
else => unreachable,
},
dst_reg,
dst_reg,
tmp_reg,
);
try self.asmRegisterRegisterImmediate(
.vcvtps2ph,
dst_reg,
dst_reg,
Immediate.u(0b1_00),
);
return dst_mcv;
},
5...8 => {
const tmp_reg = (try self.register_manager.allocReg(null, sse)).to256();
const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
defer self.register_manager.unlockReg(tmp_lock);
try self.asmRegisterRegister(.vcvtph2ps, dst_reg.to256(), dst_reg);
if (src_mcv.isMemory()) try self.asmRegisterMemory(
.vcvtph2ps,
tmp_reg,
src_mcv.mem(.xword),
) else try self.asmRegisterRegister(
.vcvtph2ps,
tmp_reg,
(if (src_mcv.isRegister())
src_mcv.getReg().?
else
try self.copyToTmpRegister(rhs_ty, src_mcv)).to128(),
);
try self.asmRegisterRegisterRegister(
switch (air_tag) {
.add => .vaddps,
.sub => .vsubps,
.div_float, .div_trunc, .div_floor, .div_exact => .vdivps,
.max => .vmaxps,
.min => .vmaxps,
else => unreachable,
},
dst_reg.to256(),
dst_reg.to256(),
tmp_reg,
);
try self.asmRegisterRegisterImmediate(
.vcvtps2ph,
dst_reg,
dst_reg.to256(),
Immediate.u(0b1_00),
);
return dst_mcv;
},
else => null,
} else null,
32 => switch (lhs_ty.vectorLen()) {
1 => switch (air_tag) {
.add => if (self.hasFeature(.avx)) .vaddss else .addss,
@ -6223,14 +6434,13 @@ fn genBinOp(
} else null,
else => null,
},
16, 80, 128 => null,
80, 128 => null,
else => unreachable,
},
},
})) |tag| tag else return self.fail("TODO implement genBinOp for {s} {}", .{
@tagName(air_tag), lhs_ty.fmt(self.bin_file.options.module.?),
});
const dst_reg = registerAlias(dst_mcv.getReg().?, abi_size);
if (self.hasFeature(.avx)) {
const src1_alias =
if (copied_to_dst) dst_reg else registerAlias(lhs_mcv.getReg().?, abi_size);
@ -7139,21 +7349,21 @@ fn airCmp(self: *Self, inst: Air.Inst.Index, op: math.CompareOperator) !void {
const tmp2_lock = self.register_manager.lockRegAssumeUnused(tmp2_reg);
defer self.register_manager.unlockReg(tmp2_lock);
if (src_mcv.isRegister())
try self.asmRegisterRegisterRegister(
.vpunpcklwd,
tmp1_reg,
dst_reg.to128(),
src_mcv.getReg().?.to128(),
)
else
try self.asmRegisterRegisterMemoryImmediate(
.vpinsrw,
tmp1_reg,
dst_reg.to128(),
src_mcv.mem(.word),
Immediate.u(1),
);
if (src_mcv.isMemory()) try self.asmRegisterRegisterMemoryImmediate(
.vpinsrw,
tmp1_reg,
dst_reg.to128(),
src_mcv.mem(.word),
Immediate.u(1),
) else try self.asmRegisterRegisterRegister(
.vpunpcklwd,
tmp1_reg,
dst_reg.to128(),
(if (src_mcv.isRegister())
src_mcv.getReg().?
else
try self.copyToTmpRegister(ty, src_mcv)).to128(),
);
try self.asmRegisterRegister(.vcvtph2ps, tmp1_reg, tmp1_reg);
try self.asmRegisterRegister(.vmovshdup, tmp2_reg, tmp1_reg);
try self.genBinOpMir(.ucomiss, ty, tmp1_mcv, tmp2_mcv);
@ -8139,7 +8349,16 @@ fn movMirTag(self: *Self, ty: Type, aligned: bool) !Mir.Inst.Tag {
},
.Vector => switch (ty.childType().zigTypeTag()) {
.Float => switch (ty.childType().floatBits(self.target.*)) {
16 => unreachable, // needs special handling
16 => switch (ty.vectorLen()) {
1 => unreachable, // needs special handling
2 => return if (self.hasFeature(.avx)) .vmovss else .movss,
3...4 => return if (self.hasFeature(.avx)) .vmovsd else .movsd,
5...8 => return if (self.hasFeature(.avx))
if (aligned) .vmovaps else .vmovups
else if (aligned) .movaps else .movups,
9...16 => if (self.hasFeature(.avx)) return if (aligned) .vmovaps else .vmovups,
else => {},
},
32 => switch (ty.vectorLen()) {
1 => return if (self.hasFeature(.avx)) .vmovss else .movss,
2...4 => return if (self.hasFeature(.avx))

View File

@ -270,7 +270,7 @@ pub const Mnemonic = enum {
divps, divss,
maxps, maxss,
minps, minss,
movaps, movss, movups,
movaps, movhlps, movss, movups,
mulps, mulss,
orps,
pextrw, pinsrw,
@ -303,6 +303,8 @@ pub const Mnemonic = enum {
// SSE3
movddup, movshdup, movsldup,
// SSE4.1
pextrb, pextrd, pextrq,
pinsrb, pinsrd, pinsrq,
roundpd, roundps, roundsd, roundss,
// AVX
vaddpd, vaddps, vaddsd, vaddss,
@ -311,13 +313,14 @@ pub const Mnemonic = enum {
vmaxpd, vmaxps, vmaxsd, vmaxss,
vminpd, vminps, vminsd, vminss,
vmovapd, vmovaps,
vmovddup,
vmovddup, vmovhlps,
vmovsd,
vmovshdup, vmovsldup,
vmovss,
vmovupd, vmovups,
vmulpd, vmulps, vmulsd, vmulss,
vpextrw, vpinsrw,
vpextrb, vpextrd, vpextrq, vpextrw,
vpinsrb, vpinsrd, vpinsrq, vpinsrw,
vpshufhw, vpshuflw,
vpsrld, vpsrlq, vpsrlw,
vpunpckhbw, vpunpckhdq, vpunpckhqdq, vpunpckhwd,
@ -359,7 +362,7 @@ pub const Op = enum {
cl,
r8, r16, r32, r64,
rm8, rm16, rm32, rm64,
r32_m16, r64_m16,
r32_m8, r32_m16, r64_m16,
m8, m16, m32, m64, m80, m128, m256,
rel8, rel16, rel32,
m,
@ -444,7 +447,7 @@ pub const Op = enum {
pub fn immBitSize(op: Op) u64 {
return switch (op) {
.none, .o16, .o32, .o64, .moffs, .m, .sreg => unreachable,
.al, .cl, .r8, .rm8 => unreachable,
.al, .cl, .r8, .rm8, .r32_m8 => unreachable,
.ax, .r16, .rm16 => unreachable,
.eax, .r32, .rm32, .r32_m16 => unreachable,
.rax, .r64, .rm64, .r64_m16 => unreachable,
@ -467,7 +470,7 @@ pub const Op = enum {
.m8, .m16, .m32, .m64, .m80, .m128, .m256 => unreachable,
.al, .cl, .r8, .rm8 => 8,
.ax, .r16, .rm16 => 16,
.eax, .r32, .rm32, .r32_m16 => 32,
.eax, .r32, .rm32, .r32_m8, .r32_m16 => 32,
.rax, .r64, .rm64, .r64_m16 => 64,
.xmm, .xmm_m32, .xmm_m64, .xmm_m128 => 128,
.ymm, .ymm_m256 => 256,
@ -480,7 +483,7 @@ pub const Op = enum {
.unity, .imm8, .imm8s, .imm16, .imm16s, .imm32, .imm32s, .imm64 => unreachable,
.rel8, .rel16, .rel32 => unreachable,
.al, .cl, .r8, .ax, .r16, .eax, .r32, .rax, .r64, .xmm, .ymm => unreachable,
.m8, .rm8 => 8,
.m8, .rm8, .r32_m8 => 8,
.m16, .rm16, .r32_m16, .r64_m16 => 16,
.m32, .rm32, .xmm_m32 => 32,
.m64, .rm64, .xmm_m64 => 64,
@ -509,7 +512,7 @@ pub const Op = enum {
.al, .ax, .eax, .rax,
.r8, .r16, .r32, .r64,
.rm8, .rm16, .rm32, .rm64,
.r32_m16, .r64_m16,
.r32_m8, .r32_m16, .r64_m16,
.xmm, .xmm_m32, .xmm_m64, .xmm_m128,
.ymm, .ymm_m256,
=> true,
@ -535,7 +538,7 @@ pub const Op = enum {
// zig fmt: off
return switch (op) {
.rm8, .rm16, .rm32, .rm64,
.r32_m16, .r64_m16,
.r32_m8, .r32_m16, .r64_m16,
.m8, .m16, .m32, .m64, .m80, .m128, .m256,
.m,
.xmm_m32, .xmm_m64, .xmm_m128,
@ -559,7 +562,7 @@ pub const Op = enum {
.al, .ax, .eax, .rax, .cl => .general_purpose,
.r8, .r16, .r32, .r64 => .general_purpose,
.rm8, .rm16, .rm32, .rm64 => .general_purpose,
.r32_m16, .r64_m16 => .general_purpose,
.r32_m8, .r32_m16, .r64_m16 => .general_purpose,
.sreg => .segment,
.xmm, .xmm_m32, .xmm_m64, .xmm_m128 => .floating_point,
.ymm, .ymm_m256 => .floating_point,

View File

@ -137,6 +137,7 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct {
.minps,
.minss,
.movaps,
.movhlps,
.movss,
.movups,
.mulps,
@ -149,6 +150,8 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct {
.subps,
.subss,
.ucomiss,
.unpckhps,
.unpcklps,
.xorps,
.addpd,
@ -187,12 +190,20 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct {
.subpd,
.subsd,
.ucomisd,
.unpckhpd,
.unpcklpd,
.xorpd,
.movddup,
.movshdup,
.movsldup,
.pextrb,
.pextrd,
.pextrq,
.pinsrb,
.pinsrd,
.pinsrq,
.roundpd,
.roundps,
.roundsd,
@ -221,6 +232,7 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct {
.vmovapd,
.vmovaps,
.vmovddup,
.vmovhlps,
.vmovsd,
.vmovshdup,
.vmovsldup,
@ -231,7 +243,13 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct {
.vmulps,
.vmulsd,
.vmulss,
.vpextrb,
.vpextrd,
.vpextrq,
.vpextrw,
.vpinsrb,
.vpinsrd,
.vpinsrq,
.vpinsrw,
.vpshufhw,
.vpshuflw,
@ -258,6 +276,10 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct {
.vsubps,
.vsubsd,
.vsubss,
.vunpckhpd,
.vunpckhps,
.vunpcklpd,
.vunpcklps,
.vcvtph2ps,
.vcvtps2ph,

View File

@ -192,6 +192,8 @@ pub const Inst = struct {
minss,
/// Move aligned packed single-precision floating-point values
movaps,
/// Move packed single-precision floating-point values high to low
movhlps,
/// Move scalar single-precision floating-point value
movss,
/// Move unaligned packed single-precision floating-point values
@ -216,6 +218,10 @@ pub const Inst = struct {
subss,
/// Unordered compare scalar single-precision floating-point values
ucomiss,
/// Unpack and interleave high packed single-precision floating-point values
unpckhps,
/// Unpack and interleave low packed single-precision floating-point values
unpcklps,
/// Bitwise logical xor of packed single precision floating-point values
xorps,
@ -291,6 +297,10 @@ pub const Inst = struct {
subsd,
/// Unordered compare scalar double-precision floating-point values
ucomisd,
/// Unpack and interleave high packed double-precision floating-point values
unpckhpd,
/// Unpack and interleave low packed double-precision floating-point values
unpcklpd,
/// Bitwise logical xor of packed double precision floating-point values
xorpd,
@ -301,6 +311,18 @@ pub const Inst = struct {
/// Replicate single floating-point values
movsldup,
/// Extract Byte
pextrb,
/// Extract Doubleword
pextrd,
/// Extract Quadword
pextrq,
/// Insert Byte
pinsrb,
/// Insert Doubleword
pinsrd,
/// Insert Quadword
pinsrq,
/// Round packed double-precision floating-point values
roundpd,
/// Round packed single-precision floating-point values
@ -354,6 +376,8 @@ pub const Inst = struct {
vmovapd,
/// Move aligned packed single-precision floating-point values
vmovaps,
/// Move packed single-precision floating-point values high to low
vmovhlps,
/// Replicate double floating-point values
vmovddup,
/// Move or merge scalar double-precision floating-point value
@ -376,8 +400,20 @@ pub const Inst = struct {
vmulsd,
/// Multiply scalar single-precision floating-point values
vmulss,
/// Extract Byte
vpextrb,
/// Extract Doubleword
vpextrd,
/// Extract Quadword
vpextrq,
/// Extract word
vpextrw,
/// Insert Byte
vpinsrb,
/// Insert Doubleword
vpinsrd,
/// Insert Quadword
vpinsrq,
/// Insert word
vpinsrw,
/// Shuffle packed high words
@ -430,6 +466,14 @@ pub const Inst = struct {
vsubsd,
/// Subtract scalar single-precision floating-point values
vsubss,
/// Unpack and interleave high packed double-precision floating-point values
vunpckhpd,
/// Unpack and interleave high packed single-precision floating-point values
vunpckhps,
/// Unpack and interleave low packed double-precision floating-point values
vunpcklpd,
/// Unpack and interleave low packed single-precision floating-point values
vunpcklps,
/// Convert 16-bit floating-point values to single-precision floating-point values
vcvtph2ps,

View File

@ -865,6 +865,8 @@ pub const table = [_]Entry{
.{ .movaps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x28 }, 0, .none, .sse },
.{ .movaps, .mr, &.{ .xmm_m128, .xmm }, &.{ 0x0f, 0x29 }, 0, .none, .sse },
.{ .movhlps, .rm, &.{ .xmm, .xmm }, &.{ 0x0f, 0x12 }, 0, .none, .sse },
.{ .movss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x10 }, 0, .none, .sse },
.{ .movss, .mr, &.{ .xmm_m32, .xmm }, &.{ 0xf3, 0x0f, 0x11 }, 0, .none, .sse },
@ -988,8 +990,16 @@ pub const table = [_]Entry{
.{ .movsldup, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0xf3, 0x0f, 0x12 }, 0, .none, .sse3 },
// SSE4.1
.{ .pextrb, .mri, &.{ .r32_m8, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x14 }, 0, .none, .sse4_1 },
.{ .pextrd, .mri, &.{ .rm32, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x16 }, 0, .none, .sse4_1 },
.{ .pextrq, .mri, &.{ .rm64, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x16 }, 0, .long, .sse4_1 },
.{ .pextrw, .mri, &.{ .r32_m16, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x15 }, 0, .none, .sse4_1 },
.{ .pinsrb, .rmi, &.{ .xmm, .r32_m8, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x20 }, 0, .none, .sse4_1 },
.{ .pinsrd, .rmi, &.{ .xmm, .rm32, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x22 }, 0, .none, .sse4_1 },
.{ .pinsrq, .rmi, &.{ .xmm, .rm64, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x22 }, 0, .long, .sse4_1 },
.{ .roundpd, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x09 }, 0, .none, .sse4_1 },
.{ .roundps, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x08 }, 0, .none, .sse4_1 },
@ -1062,6 +1072,8 @@ pub const table = [_]Entry{
.{ .vmovddup, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x12 }, 0, .vex_128_wig, .avx },
.{ .vmovddup, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0xf2, 0x0f, 0x12 }, 0, .vex_256_wig, .avx },
.{ .vmovhlps, .rvm, &.{ .xmm, .xmm, .xmm }, &.{ 0x0f, 0x12 }, 0, .vex_128_wig, .avx },
.{ .vmovsd, .rvm, &.{ .xmm, .xmm, .xmm }, &.{ 0xf2, 0x0f, 0x10 }, 0, .vex_lig_wig, .avx },
.{ .vmovsd, .rm, &.{ .xmm, .m64 }, &.{ 0xf2, 0x0f, 0x10 }, 0, .vex_lig_wig, .avx },
.{ .vmovsd, .mvr, &.{ .xmm, .xmm, .xmm }, &.{ 0xf2, 0x0f, 0x11 }, 0, .vex_lig_wig, .avx },
@ -1098,9 +1110,17 @@ pub const table = [_]Entry{
.{ .vmulss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x59 }, 0, .vex_lig_wig, .avx },
.{ .vpextrb, .mri, &.{ .r32_m8, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x14 }, 0, .vex_128_w0, .avx },
.{ .vpextrd, .mri, &.{ .rm32, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x16 }, 0, .vex_128_w0, .avx },
.{ .vpextrq, .mri, &.{ .rm64, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x16 }, 0, .vex_128_w1, .avx },
.{ .vpextrw, .rmi, &.{ .r32, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x15 }, 0, .vex_128_wig, .avx },
.{ .vpextrw, .mri, &.{ .r32_m16, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x15 }, 0, .vex_128_wig, .avx },
.{ .vpinsrb, .rmi, &.{ .xmm, .r32_m8, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x20 }, 0, .vex_128_w0, .avx },
.{ .vpinsrd, .rmi, &.{ .xmm, .rm32, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x22 }, 0, .vex_128_w0, .avx },
.{ .vpinsrq, .rmi, &.{ .xmm, .rm64, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x22 }, 0, .vex_128_w1, .avx },
.{ .vpinsrw, .rvmi, &.{ .xmm, .xmm, .r32_m16, .imm8 }, &.{ 0x66, 0x0f, 0xc4 }, 0, .vex_128_wig, .avx },
.{ .vpsrlw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd1 }, 0, .vex_128_wig, .avx },

View File

@ -8,6 +8,8 @@ const has_f80_rt = switch (builtin.cpu.arch) {
.x86_64, .x86 => true,
else => false,
};
const no_x86_64_hardware_f16_support = builtin.zig_backend == .stage2_x86_64 and
!std.Target.x86.featureSetHas(builtin.cpu.features, .f16c);
const epsilon_16 = 0.001;
const epsilon = 0.000001;
@ -52,8 +54,7 @@ fn testFloatComparisons() !void {
}
test "different sized float comparisons" {
if (builtin.zig_backend == .stage2_x86_64 and
!comptime std.Target.x86.featureSetHas(builtin.cpu.features, .f16c)) return error.SkipZigTest; // TODO
if (no_x86_64_hardware_f16_support) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
@ -152,7 +153,7 @@ fn testSqrtWithVectors() !void {
}
test "more @sqrt f16 tests" {
if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
if (no_x86_64_hardware_f16_support) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
@ -202,7 +203,7 @@ fn testSqrtLegacy(comptime T: type, x: T) !void {
}
test "@sin" {
if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
if (no_x86_64_hardware_f16_support) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
@ -241,7 +242,7 @@ fn testSinWithVectors() !void {
}
test "@cos" {
if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
if (no_x86_64_hardware_f16_support) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
@ -280,7 +281,7 @@ fn testCosWithVectors() !void {
}
test "@exp" {
if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
if (no_x86_64_hardware_f16_support) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
@ -318,7 +319,7 @@ fn testExpWithVectors() !void {
}
test "@exp2" {
if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
if (no_x86_64_hardware_f16_support) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
@ -403,7 +404,7 @@ test "@log with @vectors" {
}
test "@log2" {
if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
if (no_x86_64_hardware_f16_support) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
@ -445,7 +446,7 @@ fn testLog2WithVectors() !void {
}
test "@log10" {
if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
if (no_x86_64_hardware_f16_support) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
@ -881,7 +882,7 @@ fn testTruncLegacy(comptime T: type, x: T) !void {
}
test "negation f16" {
if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
if (no_x86_64_hardware_f16_support) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
@ -1040,7 +1041,6 @@ test "comptime_float zero divided by zero produces zero" {
}
test "nan negation f16" {
if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO

View File

@ -2,11 +2,11 @@ const std = @import("std");
const builtin = @import("builtin");
const expect = std.testing.expect;
const stage2_x86_64_without_hardware_fma_support = builtin.zig_backend == .stage2_x86_64 and
const no_x86_64_hardware_fma_support = builtin.zig_backend == .stage2_x86_64 and
!std.Target.x86.featureSetHas(builtin.cpu.features, .fma);
test "@mulAdd" {
if (stage2_x86_64_without_hardware_fma_support) return error.SkipZigTest; // TODO
if (no_x86_64_hardware_fma_support) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
@ -120,7 +120,7 @@ fn vector32() !void {
test "vector f32" {
if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
if (stage2_x86_64_without_hardware_fma_support) return error.SkipZigTest; // TODO
if (no_x86_64_hardware_fma_support) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
@ -143,7 +143,7 @@ fn vector64() !void {
test "vector f64" {
if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
if (stage2_x86_64_without_hardware_fma_support) return error.SkipZigTest; // TODO
if (no_x86_64_hardware_fma_support) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO