x86_64: fix float min/max behavior

This commit is contained in:
Jacob Young 2023-05-15 20:17:06 -04:00
parent b9d2e0e308
commit 403c2d91be
7 changed files with 393 additions and 40 deletions

View File

@ -1271,6 +1271,27 @@ fn asmRegisterRegisterRegister(
});
}
fn asmRegisterRegisterRegisterRegister(
self: *Self,
tag: Mir.Inst.FixedTag,
reg1: Register,
reg2: Register,
reg3: Register,
reg4: Register,
) !void {
_ = try self.addInst(.{
.tag = tag[1],
.ops = .rrrr,
.data = .{ .rrrr = .{
.fixes = tag[0],
.r1 = reg1,
.r2 = reg2,
.r3 = reg3,
.r4 = reg4,
} },
});
}
fn asmRegisterRegisterRegisterImmediate(
self: *Self,
tag: Mir.Inst.FixedTag,
@ -6224,12 +6245,26 @@ fn genBinOp(
lhs_air: Air.Inst.Ref,
rhs_air: Air.Inst.Ref,
) !MCValue {
const lhs_mcv = try self.resolveInst(lhs_air);
const rhs_mcv = try self.resolveInst(rhs_air);
const lhs_ty = self.air.typeOf(lhs_air);
const rhs_ty = self.air.typeOf(rhs_air);
const abi_size = @intCast(u32, lhs_ty.abiSize(self.target.*));
const maybe_mask_reg = switch (air_tag) {
else => null,
.max, .min => if (lhs_ty.scalarType().isRuntimeFloat()) registerAlias(
if (!self.hasFeature(.avx) and self.hasFeature(.sse4_1)) mask: {
try self.register_manager.getReg(.xmm0, null);
break :mask .xmm0;
} else try self.register_manager.allocReg(null, sse),
abi_size,
) else null,
};
const mask_lock =
if (maybe_mask_reg) |mask_reg| self.register_manager.lockRegAssumeUnused(mask_reg) else null;
defer if (mask_lock) |lock| self.register_manager.unlockReg(lock);
const lhs_mcv = try self.resolveInst(lhs_air);
const rhs_mcv = try self.resolveInst(rhs_air);
switch (lhs_mcv) {
.immediate => |imm| switch (imm) {
0 => switch (air_tag) {
@ -6300,7 +6335,16 @@ fn genBinOp(
};
defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
const src_mcv = if (flipped) lhs_mcv else rhs_mcv;
const unmat_src_mcv = if (flipped) lhs_mcv else rhs_mcv;
const src_mcv: MCValue = if (maybe_mask_reg) |mask_reg|
if (self.hasFeature(.avx) and unmat_src_mcv.isRegister() and maybe_inst != null and
self.liveness.operandDies(maybe_inst.?, if (flipped) 0 else 1)) unmat_src_mcv else src: {
try self.genSetReg(mask_reg, rhs_ty, unmat_src_mcv);
break :src .{ .register = mask_reg };
}
else
unmat_src_mcv;
if (!vec_op) {
switch (air_tag) {
.add,
@ -7009,18 +7053,26 @@ fn genBinOp(
})) |tag| tag else return self.fail("TODO implement genBinOp for {s} {}", .{
@tagName(air_tag), lhs_ty.fmt(self.bin_file.options.module.?),
});
const lhs_copy_reg = if (maybe_mask_reg) |_| registerAlias(
if (copied_to_dst) try self.copyToTmpRegister(lhs_ty, dst_mcv) else lhs_mcv.getReg().?,
abi_size,
) else null;
const lhs_copy_lock = if (lhs_copy_reg) |reg| self.register_manager.lockReg(reg) else null;
defer if (lhs_copy_lock) |lock| self.register_manager.unlockReg(lock);
if (self.hasFeature(.avx)) {
const src1_alias =
const lhs_reg =
if (copied_to_dst) dst_reg else registerAlias(lhs_mcv.getReg().?, abi_size);
if (src_mcv.isMemory()) try self.asmRegisterRegisterMemory(
mir_tag,
dst_reg,
src1_alias,
lhs_reg,
src_mcv.mem(Memory.PtrSize.fromSize(abi_size)),
) else try self.asmRegisterRegisterRegister(
mir_tag,
dst_reg,
src1_alias,
lhs_reg,
registerAlias(if (src_mcv.isRegister())
src_mcv.getReg().?
else
@ -7041,9 +7093,10 @@ fn genBinOp(
try self.copyToTmpRegister(rhs_ty, src_mcv), abi_size),
);
}
switch (air_tag) {
.add, .addwrap, .sub, .subwrap, .mul, .mulwrap, .div_float, .div_exact => {},
.div_trunc, .div_floor => try self.genRound(
.div_trunc, .div_floor => if (self.hasFeature(.sse4_1)) try self.genRound(
lhs_ty,
dst_reg,
.{ .register = dst_reg },
@ -7052,11 +7105,240 @@ fn genBinOp(
.div_floor => 0b1_0_01,
else => unreachable,
},
),
) else return self.fail("TODO implement genBinOp for {s} {} without sse4_1 feature", .{
@tagName(air_tag), lhs_ty.fmt(self.bin_file.options.module.?),
}),
.bit_and, .bit_or, .xor => {},
.max, .min => {}, // TODO: unordered select
.max, .min => if (maybe_mask_reg) |mask_reg| if (self.hasFeature(.avx)) {
const rhs_copy_reg = registerAlias(src_mcv.getReg().?, abi_size);
try self.asmRegisterRegisterRegisterImmediate(
if (@as(?Mir.Inst.FixedTag, switch (lhs_ty.zigTypeTag()) {
.Float => switch (lhs_ty.floatBits(self.target.*)) {
32 => .{ .v_ss, .cmp },
64 => .{ .v_sd, .cmp },
16, 80, 128 => null,
else => unreachable,
},
.Vector => switch (lhs_ty.childType().zigTypeTag()) {
.Float => switch (lhs_ty.childType().floatBits(self.target.*)) {
32 => switch (lhs_ty.vectorLen()) {
1 => .{ .v_ss, .cmp },
2...8 => .{ .v_ps, .cmp },
else => null,
},
64 => switch (lhs_ty.vectorLen()) {
1 => .{ .v_sd, .cmp },
2...4 => .{ .v_pd, .cmp },
else => null,
},
16, 80, 128 => null,
else => unreachable,
},
else => unreachable,
},
else => unreachable,
})) |tag| tag else return self.fail("TODO implement genBinOp for {s} {}", .{
@tagName(air_tag), lhs_ty.fmt(self.bin_file.options.module.?),
}),
mask_reg,
rhs_copy_reg,
rhs_copy_reg,
Immediate.u(3), // unord
);
try self.asmRegisterRegisterRegisterRegister(
if (@as(?Mir.Inst.FixedTag, switch (lhs_ty.zigTypeTag()) {
.Float => switch (lhs_ty.floatBits(self.target.*)) {
32 => .{ .v_ps, .blendv },
64 => .{ .v_pd, .blendv },
16, 80, 128 => null,
else => unreachable,
},
.Vector => switch (lhs_ty.childType().zigTypeTag()) {
.Float => switch (lhs_ty.childType().floatBits(self.target.*)) {
32 => switch (lhs_ty.vectorLen()) {
1...8 => .{ .v_ps, .blendv },
else => null,
},
64 => switch (lhs_ty.vectorLen()) {
1...4 => .{ .v_pd, .blendv },
else => null,
},
16, 80, 128 => null,
else => unreachable,
},
else => unreachable,
},
else => unreachable,
})) |tag| tag else return self.fail("TODO implement genBinOp for {s} {}", .{
@tagName(air_tag), lhs_ty.fmt(self.bin_file.options.module.?),
}),
dst_reg,
dst_reg,
lhs_copy_reg.?,
mask_reg,
);
} else {
const has_blend = self.hasFeature(.sse4_1);
try self.asmRegisterRegisterImmediate(
if (@as(?Mir.Inst.FixedTag, switch (lhs_ty.zigTypeTag()) {
.Float => switch (lhs_ty.floatBits(self.target.*)) {
32 => .{ ._ss, .cmp },
64 => .{ ._sd, .cmp },
16, 80, 128 => null,
else => unreachable,
},
.Vector => switch (lhs_ty.childType().zigTypeTag()) {
.Float => switch (lhs_ty.childType().floatBits(self.target.*)) {
32 => switch (lhs_ty.vectorLen()) {
1 => .{ ._ss, .cmp },
2...4 => .{ ._ps, .cmp },
else => null,
},
64 => switch (lhs_ty.vectorLen()) {
1 => .{ ._sd, .cmp },
2 => .{ ._pd, .cmp },
else => null,
},
16, 80, 128 => null,
else => unreachable,
},
else => unreachable,
},
else => unreachable,
})) |tag| tag else return self.fail("TODO implement genBinOp for {s} {}", .{
@tagName(air_tag), lhs_ty.fmt(self.bin_file.options.module.?),
}),
mask_reg,
mask_reg,
Immediate.u(if (has_blend) 3 else 7), // unord, ord
);
if (has_blend) try self.asmRegisterRegisterRegister(
if (@as(?Mir.Inst.FixedTag, switch (lhs_ty.zigTypeTag()) {
.Float => switch (lhs_ty.floatBits(self.target.*)) {
32 => .{ ._ps, .blendv },
64 => .{ ._pd, .blendv },
16, 80, 128 => null,
else => unreachable,
},
.Vector => switch (lhs_ty.childType().zigTypeTag()) {
.Float => switch (lhs_ty.childType().floatBits(self.target.*)) {
32 => switch (lhs_ty.vectorLen()) {
1...4 => .{ ._ps, .blendv },
else => null,
},
64 => switch (lhs_ty.vectorLen()) {
1...2 => .{ ._pd, .blendv },
else => null,
},
16, 80, 128 => null,
else => unreachable,
},
else => unreachable,
},
else => unreachable,
})) |tag| tag else return self.fail("TODO implement genBinOp for {s} {}", .{
@tagName(air_tag), lhs_ty.fmt(self.bin_file.options.module.?),
}),
dst_reg,
lhs_copy_reg.?,
mask_reg,
) else {
try self.asmRegisterRegister(
if (@as(?Mir.Inst.FixedTag, switch (lhs_ty.zigTypeTag()) {
.Float => switch (lhs_ty.floatBits(self.target.*)) {
32 => .{ ._ps, .@"and" },
64 => .{ ._pd, .@"and" },
16, 80, 128 => null,
else => unreachable,
},
.Vector => switch (lhs_ty.childType().zigTypeTag()) {
.Float => switch (lhs_ty.childType().floatBits(self.target.*)) {
32 => switch (lhs_ty.vectorLen()) {
1...4 => .{ ._ps, .@"and" },
else => null,
},
64 => switch (lhs_ty.vectorLen()) {
1...2 => .{ ._pd, .@"and" },
else => null,
},
16, 80, 128 => null,
else => unreachable,
},
else => unreachable,
},
else => unreachable,
})) |tag| tag else return self.fail("TODO implement genBinOp for {s} {}", .{
@tagName(air_tag), lhs_ty.fmt(self.bin_file.options.module.?),
}),
dst_reg,
mask_reg,
);
try self.asmRegisterRegister(
if (@as(?Mir.Inst.FixedTag, switch (lhs_ty.zigTypeTag()) {
.Float => switch (lhs_ty.floatBits(self.target.*)) {
32 => .{ ._ps, .andn },
64 => .{ ._pd, .andn },
16, 80, 128 => null,
else => unreachable,
},
.Vector => switch (lhs_ty.childType().zigTypeTag()) {
.Float => switch (lhs_ty.childType().floatBits(self.target.*)) {
32 => switch (lhs_ty.vectorLen()) {
1...4 => .{ ._ps, .andn },
else => null,
},
64 => switch (lhs_ty.vectorLen()) {
1...2 => .{ ._pd, .andn },
else => null,
},
16, 80, 128 => null,
else => unreachable,
},
else => unreachable,
},
else => unreachable,
})) |tag| tag else return self.fail("TODO implement genBinOp for {s} {}", .{
@tagName(air_tag), lhs_ty.fmt(self.bin_file.options.module.?),
}),
mask_reg,
lhs_copy_reg.?,
);
try self.asmRegisterRegister(
if (@as(?Mir.Inst.FixedTag, switch (lhs_ty.zigTypeTag()) {
.Float => switch (lhs_ty.floatBits(self.target.*)) {
32 => .{ ._ps, .@"or" },
64 => .{ ._pd, .@"or" },
16, 80, 128 => null,
else => unreachable,
},
.Vector => switch (lhs_ty.childType().zigTypeTag()) {
.Float => switch (lhs_ty.childType().floatBits(self.target.*)) {
32 => switch (lhs_ty.vectorLen()) {
1...4 => .{ ._ps, .@"or" },
else => null,
},
64 => switch (lhs_ty.vectorLen()) {
1...2 => .{ ._pd, .@"or" },
else => null,
},
16, 80, 128 => null,
else => unreachable,
},
else => unreachable,
},
else => unreachable,
})) |tag| tag else return self.fail("TODO implement genBinOp for {s} {}", .{
@tagName(air_tag), lhs_ty.fmt(self.bin_file.options.module.?),
}),
dst_reg,
mask_reg,
);
}
},
else => unreachable,
}
return dst_mcv;
}
@ -9282,7 +9564,7 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr
17...32 => if (self.hasFeature(.avx)) .{ .v_, .movdqa } else null,
else => null,
},
.Float => switch (ty.floatBits(self.target.*)) {
.Float => switch (ty.scalarType().floatBits(self.target.*)) {
16, 128 => switch (abi_size) {
2...4 => if (self.hasFeature(.avx)) .{ .v_d, .mov } else .{ ._d, .mov },
5...8 => if (self.hasFeature(.avx)) .{ .v_q, .mov } else .{ ._q, .mov },

View File

@ -178,7 +178,7 @@ pub fn format(
try writer.print("+{s} ", .{tag});
},
.m, .mi, .m1, .mc, .vmi => try writer.print("/{d} ", .{encoding.modRmExt()}),
.mr, .rm, .rmi, .mri, .mrc, .rvm, .rvmi, .mvr => try writer.writeAll("/r "),
.mr, .rm, .rmi, .mri, .mrc, .rm0, .rvm, .rvmr, .rvmi, .mvr => try writer.writeAll("/r "),
}
switch (encoding.data.op_en) {
@ -202,7 +202,8 @@ pub fn format(
};
try writer.print("{s} ", .{tag});
},
.np, .fd, .td, .o, .m, .m1, .mc, .mr, .rm, .mrc, .rvm, .mvr => {},
.rvmr => try writer.writeAll("/is4 "),
.np, .fd, .td, .o, .m, .m1, .mc, .mr, .rm, .mrc, .rm0, .rvm, .mvr => {},
}
try writer.print("{s} ", .{@tagName(encoding.mnemonic)});
@ -270,7 +271,7 @@ pub const Mnemonic = enum {
addps, addss,
andps,
andnps,
cmpss,
cmpps, cmpss,
cvtpi2ps, cvtps2pi, cvtsi2ss, cvtss2si, cvttps2pi, cvttss2si,
divps, divss,
maxps, maxss,
@ -290,7 +291,7 @@ pub const Mnemonic = enum {
addpd, addsd,
andpd,
andnpd,
//cmpsd,
cmppd, //cmpsd,
cvtdq2pd, cvtdq2ps, cvtpd2dq, cvtpd2pi, cvtpd2ps, cvtpi2pd,
cvtps2dq, cvtps2pd, cvtsd2si, cvtsd2ss, cvtsi2sd, cvtss2sd,
cvttpd2dq, cvttpd2pi, cvttps2dq, cvttsd2si,
@ -315,6 +316,7 @@ pub const Mnemonic = enum {
// SSE3
movddup, movshdup, movsldup,
// SSE4.1
blendpd, blendps, blendvpd, blendvps,
extractps,
insertps,
pextrb, pextrd, pextrq,
@ -325,7 +327,9 @@ pub const Mnemonic = enum {
// AVX
vaddpd, vaddps, vaddsd, vaddss,
vandnpd, vandnps, vandpd, vandps,
vblendpd, vblendps, vblendvpd, vblendvps,
vbroadcastf128, vbroadcastsd, vbroadcastss,
vcmppd, vcmpps, vcmpsd, vcmpss,
vcvtdq2pd, vcvtdq2ps, vcvtpd2dq, vcvtpd2ps,
vcvtps2dq, vcvtps2pd, vcvtsd2si, vcvtsd2ss,
vcvtsi2sd, vcvtsi2ss, vcvtss2sd, vcvtss2si,
@ -385,7 +389,7 @@ pub const OpEn = enum {
fd, td,
m1, mc, mi, mr, rm,
rmi, mri, mrc,
vmi, rvm, rvmi, mvr,
rm0, vmi, rvm, rvmr, rvmi, mvr,
// zig fmt: on
};
@ -407,7 +411,7 @@ pub const Op = enum {
moffs,
sreg,
st, mm, mm_m64,
xmm, xmm_m32, xmm_m64, xmm_m128,
xmm0, xmm, xmm_m32, xmm_m64, xmm_m128,
ymm, ymm_m256,
// zig fmt: on
@ -436,7 +440,9 @@ pub const Op = enum {
.segment => .sreg,
.x87 => .st,
.mmx => .mm,
.sse => switch (reg.bitSize()) {
.sse => if (reg == .xmm0)
.xmm0
else switch (reg.bitSize()) {
128 => .xmm,
256 => .ymm,
else => unreachable,
@ -494,7 +500,7 @@ pub const Op = enum {
.eax, .r32, .rm32, .r32_m16 => unreachable,
.rax, .r64, .rm64, .r64_m16 => unreachable,
.st, .mm, .mm_m64 => unreachable,
.xmm, .xmm_m32, .xmm_m64, .xmm_m128 => unreachable,
.xmm0, .xmm, .xmm_m32, .xmm_m64, .xmm_m128 => unreachable,
.ymm, .ymm_m256 => unreachable,
.m8, .m16, .m32, .m64, .m80, .m128, .m256 => unreachable,
.unity => 1,
@ -516,7 +522,7 @@ pub const Op = enum {
.eax, .r32, .rm32, .r32_m8, .r32_m16 => 32,
.rax, .r64, .rm64, .r64_m16, .mm, .mm_m64 => 64,
.st => 80,
.xmm, .xmm_m32, .xmm_m64, .xmm_m128 => 128,
.xmm0, .xmm, .xmm_m32, .xmm_m64, .xmm_m128 => 128,
.ymm, .ymm_m256 => 256,
};
}
@ -526,7 +532,8 @@ pub const Op = enum {
.none, .o16, .o32, .o64, .moffs, .m, .sreg => unreachable,
.unity, .imm8, .imm8s, .imm16, .imm16s, .imm32, .imm32s, .imm64 => unreachable,
.rel8, .rel16, .rel32 => unreachable,
.al, .cl, .r8, .ax, .r16, .eax, .r32, .rax, .r64, .st, .mm, .xmm, .ymm => unreachable,
.al, .cl, .r8, .ax, .r16, .eax, .r32, .rax, .r64 => unreachable,
.st, .mm, .xmm0, .xmm, .ymm => unreachable,
.m8, .rm8, .r32_m8 => 8,
.m16, .rm16, .r32_m16, .r64_m16 => 16,
.m32, .rm32, .xmm_m32 => 32,
@ -558,7 +565,7 @@ pub const Op = enum {
.rm8, .rm16, .rm32, .rm64,
.r32_m8, .r32_m16, .r64_m16,
.st, .mm, .mm_m64,
.xmm, .xmm_m32, .xmm_m64, .xmm_m128,
.xmm0, .xmm, .xmm_m32, .xmm_m64, .xmm_m128,
.ymm, .ymm_m256,
=> true,
else => false,
@ -612,7 +619,7 @@ pub const Op = enum {
.sreg => .segment,
.st => .x87,
.mm, .mm_m64 => .mmx,
.xmm, .xmm_m32, .xmm_m64, .xmm_m128 => .sse,
.xmm0, .xmm, .xmm_m32, .xmm_m64, .xmm_m128 => .sse,
.ymm, .ymm_m256 => .sse,
};
}
@ -629,7 +636,7 @@ pub const Op = enum {
else => {
if (op.isRegister() and target.isRegister()) {
return switch (target) {
.cl, .al, .ax, .eax, .rax => op == target,
.cl, .al, .ax, .eax, .rax, .xmm0 => op == target,
else => op.class() == target.class() and op.regBitSize() == target.regBitSize(),
};
}

View File

@ -377,6 +377,7 @@ fn generic(lower: *Lower, inst: Mir.Inst) Error!void {
.r => inst.data.r.fixes,
.rr => inst.data.rr.fixes,
.rrr => inst.data.rrr.fixes,
.rrrr => inst.data.rrrr.fixes,
.rrri => inst.data.rrri.fixes,
.rri_s, .rri_u => inst.data.rri.fixes,
.ri_s, .ri_u => inst.data.ri.fixes,
@ -430,6 +431,12 @@ fn generic(lower: *Lower, inst: Mir.Inst) Error!void {
.{ .reg = inst.data.rrr.r2 },
.{ .reg = inst.data.rrr.r3 },
},
.rrrr => &.{
.{ .reg = inst.data.rrrr.r1 },
.{ .reg = inst.data.rrrr.r2 },
.{ .reg = inst.data.rrrr.r3 },
.{ .reg = inst.data.rrrr.r4 },
},
.rrri => &.{
.{ .reg = inst.data.rrri.r1 },
.{ .reg = inst.data.rrri.r2 },

View File

@ -596,6 +596,16 @@ pub const Inst = struct {
/// Replicate single floating-point values
movsldup,
/// Blend packed single-precision floating-point values
/// Blend scalar single-precision floating-point values
/// Blend packed double-precision floating-point values
/// Blend scalar double-precision floating-point values
blend,
/// Variable blend packed single-precision floating-point values
/// Variable blend scalar single-precision floating-point values
/// Variable blend packed double-precision floating-point values
/// Variable blend scalar double-precision floating-point values
blendv,
/// Extract packed floating-point values
extract,
/// Insert scalar single-precision floating-point value
@ -651,6 +661,9 @@ pub const Inst = struct {
/// Register, register, register operands.
/// Uses `rrr` payload.
rrr,
/// Register, register, register, register operands.
/// Uses `rrrr` payload.
rrrr,
/// Register, register, register, immediate (byte) operands.
/// Uses `rrri` payload.
rrri,
@ -870,6 +883,13 @@ pub const Inst = struct {
r2: Register,
r3: Register,
},
rrrr: struct {
fixes: Fixes = ._,
r1: Register,
r2: Register,
r3: Register,
r4: Register,
},
rrri: struct {
fixes: Fixes = ._,
r1: Register,

View File

@ -226,8 +226,8 @@ pub const Instruction = struct {
else => {
const mem_op = switch (data.op_en) {
.m, .mi, .m1, .mc, .mr, .mri, .mrc, .mvr => inst.ops[0],
.rm, .rmi, .vmi => inst.ops[1],
.rvm, .rvmi => inst.ops[2],
.rm, .rmi, .rm0, .vmi => inst.ops[1],
.rvm, .rvmr, .rvmi => inst.ops[2],
else => unreachable,
};
switch (mem_op) {
@ -235,7 +235,7 @@ pub const Instruction = struct {
const rm = switch (data.op_en) {
.m, .mi, .m1, .mc, .vmi => enc.modRmExt(),
.mr, .mri, .mrc => inst.ops[1].reg.lowEnc(),
.rm, .rmi, .rvm, .rvmi => inst.ops[0].reg.lowEnc(),
.rm, .rmi, .rm0, .rvm, .rvmr, .rvmi => inst.ops[0].reg.lowEnc(),
.mvr => inst.ops[2].reg.lowEnc(),
else => unreachable,
};
@ -245,7 +245,7 @@ pub const Instruction = struct {
const op = switch (data.op_en) {
.m, .mi, .m1, .mc, .vmi => .none,
.mr, .mri, .mrc => inst.ops[1],
.rm, .rmi, .rvm, .rvmi => inst.ops[0],
.rm, .rmi, .rm0, .rvm, .rvmr, .rvmi => inst.ops[0],
.mvr => inst.ops[2],
else => unreachable,
};
@ -257,6 +257,7 @@ pub const Instruction = struct {
switch (data.op_en) {
.mi => try encodeImm(inst.ops[1].imm, data.ops[1], encoder),
.rmi, .mri, .vmi => try encodeImm(inst.ops[2].imm, data.ops[2], encoder),
.rvmr => try encoder.imm8(@as(u8, inst.ops[3].reg.enc()) << 4),
.rvmi => try encodeImm(inst.ops[3].imm, data.ops[3], encoder),
else => {},
}
@ -298,7 +299,7 @@ pub const Instruction = struct {
.i, .zi, .o, .oi, .d, .np => null,
.fd => inst.ops[1].mem.base().reg,
.td => inst.ops[0].mem.base().reg,
.rm, .rmi => if (inst.ops[1].isSegmentRegister())
.rm, .rmi, .rm0 => if (inst.ops[1].isSegmentRegister())
switch (inst.ops[1]) {
.reg => |reg| reg,
.mem => |mem| mem.base().reg,
@ -314,7 +315,7 @@ pub const Instruction = struct {
}
else
null,
.vmi, .rvm, .rvmi, .mvr => unreachable,
.vmi, .rvm, .rvmr, .rvmi, .mvr => unreachable,
};
if (segment_override) |seg| {
legacy.setSegmentOverride(seg);
@ -333,23 +334,23 @@ pub const Instruction = struct {
switch (op_en) {
.np, .i, .zi, .fd, .td, .d => {},
.o, .oi => rex.b = inst.ops[0].reg.isExtended(),
.m, .mi, .m1, .mc, .mr, .rm, .rmi, .mri, .mrc => {
.m, .mi, .m1, .mc, .mr, .rm, .rmi, .mri, .mrc, .rm0 => {
const r_op = switch (op_en) {
.rm, .rmi => inst.ops[0],
.rm, .rmi, .rm0 => inst.ops[0],
.mr, .mri, .mrc => inst.ops[1],
else => .none,
};
rex.r = r_op.isBaseExtended();
const b_x_op = switch (op_en) {
.rm, .rmi => inst.ops[1],
.rm, .rmi, .rm0 => inst.ops[1],
.m, .mi, .m1, .mc, .mr, .mri, .mrc => inst.ops[0],
else => unreachable,
};
rex.b = b_x_op.isBaseExtended();
rex.x = b_x_op.isIndexExtended();
},
.vmi, .rvm, .rvmi, .mvr => unreachable,
.vmi, .rvm, .rvmr, .rvmi, .mvr => unreachable,
}
try encoder.rex(rex);
@ -367,9 +368,9 @@ pub const Instruction = struct {
switch (op_en) {
.np, .i, .zi, .fd, .td, .d => {},
.o, .oi => vex.b = inst.ops[0].reg.isExtended(),
.m, .mi, .m1, .mc, .mr, .rm, .rmi, .mri, .mrc, .vmi, .rvm, .rvmi, .mvr => {
.m, .mi, .m1, .mc, .mr, .rm, .rmi, .mri, .mrc, .rm0, .vmi, .rvm, .rvmr, .rvmi, .mvr => {
const r_op = switch (op_en) {
.rm, .rmi, .rvm, .rvmi => inst.ops[0],
.rm, .rmi, .rm0, .rvm, .rvmr, .rvmi => inst.ops[0],
.mr, .mri, .mrc => inst.ops[1],
.mvr => inst.ops[2],
.m, .mi, .m1, .mc, .vmi => .none,
@ -378,9 +379,9 @@ pub const Instruction = struct {
vex.r = r_op.isBaseExtended();
const b_x_op = switch (op_en) {
.rm, .rmi, .vmi => inst.ops[1],
.rm, .rmi, .rm0, .vmi => inst.ops[1],
.m, .mi, .m1, .mc, .mr, .mri, .mrc, .mvr => inst.ops[0],
.rvm, .rvmi => inst.ops[2],
.rvm, .rvmr, .rvmi => inst.ops[2],
else => unreachable,
};
vex.b = b_x_op.isBaseExtended();
@ -408,7 +409,7 @@ pub const Instruction = struct {
switch (op_en) {
else => {},
.vmi => vex.v = inst.ops[0].reg,
.rvm, .rvmi => vex.v = inst.ops[1].reg,
.rvm, .rvmr, .rvmi => vex.v = inst.ops[1].reg,
}
try encoder.vex(vex);

View File

@ -846,6 +846,8 @@ pub const table = [_]Entry{
.{ .andps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x54 }, 0, .none, .sse },
.{ .cmpps, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x0f, 0xc2 }, 0, .none, .sse },
.{ .cmpss, .rmi, &.{ .xmm, .xmm_m32, .imm8 }, &.{ 0xf3, 0x0f, 0xc2 }, 0, .none, .sse },
.{ .cvtpi2ps, .rm, &.{ .xmm, .mm_m64 }, &.{ 0x0f, 0x2a }, 0, .none, .sse },
@ -917,6 +919,8 @@ pub const table = [_]Entry{
.{ .andpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x54 }, 0, .none, .sse2 },
.{ .cmppd, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0xc2 }, 0, .none, .sse2 },
.{ .cmpsd, .rmi, &.{ .xmm, .xmm_m64, .imm8 }, &.{ 0xf2, 0x0f, 0xc2 }, 0, .none, .sse2 },
.{ .cvtdq2pd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf3, 0x0f, 0xe6 }, 0, .none, .sse2 },
@ -1085,6 +1089,14 @@ pub const table = [_]Entry{
.{ .movsldup, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0xf3, 0x0f, 0x12 }, 0, .none, .sse3 },
// SSE4.1
.{ .blendpd, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0d }, 0, .none, .sse4_1 },
.{ .blendps, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0c }, 0, .none, .sse4_1 },
.{ .blendvpd, .rm0, &.{ .xmm, .xmm_m128, .xmm0 }, &.{ 0x66, 0x0f, 0x38, 0x15 }, 0, .none, .sse4_1 },
.{ .blendvps, .rm0, &.{ .xmm, .xmm_m128, .xmm0 }, &.{ 0x66, 0x0f, 0x38, 0x14 }, 0, .none, .sse4_1 },
.{ .extractps, .mri, &.{ .rm32, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x17 }, 0, .none, .sse4_1 },
.{ .insertps, .rmi, &.{ .xmm, .xmm_m32, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x21 }, 0, .none, .sse4_1 },
@ -1146,11 +1158,33 @@ pub const table = [_]Entry{
.{ .vandps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x0f, 0x54 }, 0, .vex_128_wig, .avx },
.{ .vandps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x0f, 0x54 }, 0, .vex_256_wig, .avx },
.{ .vblendpd, .rvmi, &.{ .xmm, .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0d }, 0, .vex_128_wig, .avx },
.{ .vblendpd, .rvmi, &.{ .ymm, .ymm, .ymm_m256, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0d }, 0, .vex_256_wig, .avx },
.{ .vblendps, .rvmi, &.{ .xmm, .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0c }, 0, .vex_128_wig, .avx },
.{ .vblendps, .rvmi, &.{ .ymm, .ymm, .ymm_m256, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0c }, 0, .vex_256_wig, .avx },
.{ .vblendvpd, .rvmr, &.{ .xmm, .xmm, .xmm_m128, .xmm }, &.{ 0x66, 0x0f, 0x3a, 0x4b }, 0, .vex_128_w0, .avx },
.{ .vblendvpd, .rvmr, &.{ .ymm, .ymm, .ymm_m256, .ymm }, &.{ 0x66, 0x0f, 0x3a, 0x4b }, 0, .vex_256_w0, .avx },
.{ .vblendvps, .rvmr, &.{ .xmm, .xmm, .xmm_m128, .xmm }, &.{ 0x66, 0x0f, 0x3a, 0x4a }, 0, .vex_128_w0, .avx },
.{ .vblendvps, .rvmr, &.{ .ymm, .ymm, .ymm_m256, .ymm }, &.{ 0x66, 0x0f, 0x3a, 0x4a }, 0, .vex_256_w0, .avx },
.{ .vbroadcastss, .rm, &.{ .xmm, .m32 }, &.{ 0x66, 0x0f, 0x38, 0x18 }, 0, .vex_128_w0, .avx },
.{ .vbroadcastss, .rm, &.{ .ymm, .m32 }, &.{ 0x66, 0x0f, 0x38, 0x18 }, 0, .vex_256_w0, .avx },
.{ .vbroadcastsd, .rm, &.{ .ymm, .m64 }, &.{ 0x66, 0x0f, 0x38, 0x19 }, 0, .vex_256_w0, .avx },
.{ .vbroadcastf128, .rm, &.{ .ymm, .m128 }, &.{ 0x66, 0x0f, 0x38, 0x1a }, 0, .vex_256_w0, .avx },
.{ .vcmppd, .rvmi, &.{ .xmm, .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0xc2 }, 0, .vex_128_wig, .avx },
.{ .vcmppd, .rvmi, &.{ .ymm, .ymm, .ymm_m256, .imm8 }, &.{ 0x66, 0x0f, 0xc2 }, 0, .vex_256_wig, .avx },
.{ .vcmpps, .rvmi, &.{ .xmm, .xmm, .xmm_m128, .imm8 }, &.{ 0x0f, 0xc2 }, 0, .vex_128_wig, .avx },
.{ .vcmpps, .rvmi, &.{ .ymm, .ymm, .ymm_m256, .imm8 }, &.{ 0x0f, 0xc2 }, 0, .vex_256_wig, .avx },
.{ .vcmpsd, .rvmi, &.{ .xmm, .xmm, .xmm_m64, .imm8 }, &.{ 0xf2, 0x0f, 0xc2 }, 0, .vex_lig_wig, .avx },
.{ .vcmpss, .rvmi, &.{ .xmm, .xmm, .xmm_m32, .imm8 }, &.{ 0xf3, 0x0f, 0xc2 }, 0, .vex_lig_wig, .avx },
.{ .vcvtdq2pd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf3, 0x0f, 0xe6 }, 0, .vex_128_wig, .avx },
.{ .vcvtdq2pd, .rm, &.{ .ymm, .xmm_m128 }, &.{ 0xf3, 0x0f, 0xe6 }, 0, .vex_256_wig, .avx },

View File

@ -24,7 +24,8 @@ test "@max" {
test "@max on vectors" {
if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_x86_64 and
!comptime std.Target.x86.featureSetHas(builtin.cpu.features, .sse4_1)) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
@ -72,7 +73,8 @@ test "@min" {
test "@min for vectors" {
if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_x86_64 and
!comptime std.Target.x86.featureSetHas(builtin.cpu.features, .sse4_1)) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO