mirror of
https://github.com/ziglang/zig.git
synced 2025-12-27 00:23:22 +00:00
x86_64: implement @splat
This commit is contained in:
parent
1336619979
commit
c23e80e671
@ -8561,7 +8561,8 @@ fn movMirTag(self: *Self, ty: Type, aligned: bool) !Mir.Inst.FixedTag {
|
||||
},
|
||||
32 => switch (ty.vectorLen()) {
|
||||
1 => return if (self.hasFeature(.avx)) .{ .v_ss, .mov } else .{ ._ss, .mov },
|
||||
2...4 => return if (self.hasFeature(.avx))
|
||||
2 => return if (self.hasFeature(.avx)) .{ .v_sd, .mov } else .{ ._sd, .mov },
|
||||
3...4 => return if (self.hasFeature(.avx))
|
||||
if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu }
|
||||
else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu },
|
||||
5...8 => if (self.hasFeature(.avx))
|
||||
@ -8577,6 +8578,14 @@ fn movMirTag(self: *Self, ty: Type, aligned: bool) !Mir.Inst.FixedTag {
|
||||
return if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu },
|
||||
else => {},
|
||||
},
|
||||
128 => switch (ty.vectorLen()) {
|
||||
1 => return if (self.hasFeature(.avx))
|
||||
if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu }
|
||||
else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu },
|
||||
2 => if (self.hasFeature(.avx))
|
||||
return if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu },
|
||||
else => {},
|
||||
},
|
||||
else => {},
|
||||
},
|
||||
else => {},
|
||||
@ -9939,9 +9948,200 @@ fn airErrorName(self: *Self, inst: Air.Inst.Index) !void {
|
||||
|
||||
fn airSplat(self: *Self, inst: Air.Inst.Index) !void {
|
||||
const ty_op = self.air.instructions.items(.data)[inst].ty_op;
|
||||
_ = ty_op;
|
||||
return self.fail("TODO implement airSplat for x86_64", .{});
|
||||
//return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
|
||||
const vector_ty = self.air.typeOfIndex(inst);
|
||||
const dst_rc = regClassForType(vector_ty);
|
||||
const scalar_ty = vector_ty.scalarType();
|
||||
|
||||
const src_mcv = try self.resolveInst(ty_op.operand);
|
||||
const result: MCValue = result: {
|
||||
switch (scalar_ty.zigTypeTag()) {
|
||||
else => {},
|
||||
.Float => switch (scalar_ty.floatBits(self.target.*)) {
|
||||
32 => switch (vector_ty.vectorLen()) {
|
||||
1 => {
|
||||
if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) break :result src_mcv;
|
||||
const dst_reg = try self.register_manager.allocReg(inst, dst_rc);
|
||||
try self.genSetReg(dst_reg, scalar_ty, src_mcv);
|
||||
break :result .{ .register = dst_reg };
|
||||
},
|
||||
2...4 => {
|
||||
if (self.hasFeature(.avx)) {
|
||||
const dst_reg = try self.register_manager.allocReg(inst, dst_rc);
|
||||
if (src_mcv.isMemory()) try self.asmRegisterMemory(
|
||||
.{ .v_ss, .broadcast },
|
||||
dst_reg.to128(),
|
||||
src_mcv.mem(.dword),
|
||||
) else {
|
||||
const src_reg = if (src_mcv.isRegister())
|
||||
src_mcv.getReg().?
|
||||
else
|
||||
try self.copyToTmpRegister(scalar_ty, src_mcv);
|
||||
try self.asmRegisterRegisterRegisterImmediate(
|
||||
.{ .v_ps, .shuf },
|
||||
dst_reg.to128(),
|
||||
src_reg.to128(),
|
||||
src_reg.to128(),
|
||||
Immediate.u(0),
|
||||
);
|
||||
}
|
||||
break :result .{ .register = dst_reg };
|
||||
} else {
|
||||
const dst_mcv = if (src_mcv.isRegister() and
|
||||
self.reuseOperand(inst, ty_op.operand, 0, src_mcv))
|
||||
src_mcv
|
||||
else
|
||||
try self.copyToRegisterWithInstTracking(inst, scalar_ty, src_mcv);
|
||||
const dst_reg = dst_mcv.getReg().?;
|
||||
try self.asmRegisterRegisterImmediate(
|
||||
.{ ._ps, .shuf },
|
||||
dst_reg.to128(),
|
||||
dst_reg.to128(),
|
||||
Immediate.u(0),
|
||||
);
|
||||
break :result dst_mcv;
|
||||
}
|
||||
},
|
||||
5...8 => if (self.hasFeature(.avx)) {
|
||||
const dst_reg = try self.register_manager.allocReg(inst, dst_rc);
|
||||
if (src_mcv.isMemory()) try self.asmRegisterMemory(
|
||||
.{ .v_ss, .broadcast },
|
||||
dst_reg.to256(),
|
||||
src_mcv.mem(.dword),
|
||||
) else {
|
||||
const src_reg = if (src_mcv.isRegister())
|
||||
src_mcv.getReg().?
|
||||
else
|
||||
try self.copyToTmpRegister(scalar_ty, src_mcv);
|
||||
if (self.hasFeature(.avx2)) try self.asmRegisterRegister(
|
||||
.{ .v_ss, .broadcast },
|
||||
dst_reg.to256(),
|
||||
src_reg.to128(),
|
||||
) else {
|
||||
try self.asmRegisterRegisterRegisterImmediate(
|
||||
.{ .v_ps, .shuf },
|
||||
dst_reg.to128(),
|
||||
src_reg.to128(),
|
||||
src_reg.to128(),
|
||||
Immediate.u(0),
|
||||
);
|
||||
try self.asmRegisterRegisterRegisterImmediate(
|
||||
.{ .v_f128, .insert },
|
||||
dst_reg.to256(),
|
||||
dst_reg.to256(),
|
||||
dst_reg.to128(),
|
||||
Immediate.u(1),
|
||||
);
|
||||
}
|
||||
}
|
||||
break :result .{ .register = dst_reg };
|
||||
},
|
||||
else => {},
|
||||
},
|
||||
64 => switch (vector_ty.vectorLen()) {
|
||||
1 => {
|
||||
if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) break :result src_mcv;
|
||||
const dst_reg = try self.register_manager.allocReg(inst, dst_rc);
|
||||
try self.genSetReg(dst_reg, scalar_ty, src_mcv);
|
||||
break :result .{ .register = dst_reg };
|
||||
},
|
||||
2 => {
|
||||
const dst_reg = try self.register_manager.allocReg(inst, dst_rc);
|
||||
if (self.hasFeature(.sse3)) {
|
||||
if (src_mcv.isMemory()) try self.asmRegisterMemory(
|
||||
if (self.hasFeature(.avx)) .{ .v_, .movddup } else .{ ._, .movddup },
|
||||
dst_reg.to128(),
|
||||
src_mcv.mem(.qword),
|
||||
) else try self.asmRegisterRegister(
|
||||
if (self.hasFeature(.avx)) .{ .v_, .movddup } else .{ ._, .movddup },
|
||||
dst_reg.to128(),
|
||||
(if (src_mcv.isRegister())
|
||||
src_mcv.getReg().?
|
||||
else
|
||||
try self.copyToTmpRegister(scalar_ty, src_mcv)).to128(),
|
||||
);
|
||||
break :result .{ .register = dst_reg };
|
||||
} else try self.asmRegisterRegister(
|
||||
.{ ._ps, .movlh },
|
||||
dst_reg.to128(),
|
||||
(if (src_mcv.isRegister())
|
||||
src_mcv.getReg().?
|
||||
else
|
||||
try self.copyToTmpRegister(scalar_ty, src_mcv)).to128(),
|
||||
);
|
||||
},
|
||||
3...4 => if (self.hasFeature(.avx)) {
|
||||
const dst_reg = try self.register_manager.allocReg(inst, dst_rc);
|
||||
if (src_mcv.isMemory()) try self.asmRegisterMemory(
|
||||
.{ .v_sd, .broadcast },
|
||||
dst_reg.to256(),
|
||||
src_mcv.mem(.qword),
|
||||
) else {
|
||||
const src_reg = if (src_mcv.isRegister())
|
||||
src_mcv.getReg().?
|
||||
else
|
||||
try self.copyToTmpRegister(scalar_ty, src_mcv);
|
||||
if (self.hasFeature(.avx2)) try self.asmRegisterRegister(
|
||||
.{ .v_sd, .broadcast },
|
||||
dst_reg.to256(),
|
||||
src_reg.to128(),
|
||||
) else {
|
||||
try self.asmRegisterRegister(
|
||||
.{ .v_, .movddup },
|
||||
dst_reg.to128(),
|
||||
src_reg.to128(),
|
||||
);
|
||||
try self.asmRegisterRegisterRegisterImmediate(
|
||||
.{ .v_f128, .insert },
|
||||
dst_reg.to256(),
|
||||
dst_reg.to256(),
|
||||
dst_reg.to128(),
|
||||
Immediate.u(1),
|
||||
);
|
||||
}
|
||||
}
|
||||
break :result .{ .register = dst_reg };
|
||||
},
|
||||
else => {},
|
||||
},
|
||||
128 => switch (vector_ty.vectorLen()) {
|
||||
1 => {
|
||||
if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) break :result src_mcv;
|
||||
const dst_reg = try self.register_manager.allocReg(inst, dst_rc);
|
||||
try self.genSetReg(dst_reg, scalar_ty, src_mcv);
|
||||
break :result .{ .register = dst_reg };
|
||||
},
|
||||
2 => if (self.hasFeature(.avx)) {
|
||||
const dst_reg = try self.register_manager.allocReg(inst, dst_rc);
|
||||
if (src_mcv.isMemory()) try self.asmRegisterMemory(
|
||||
.{ .v_f128, .broadcast },
|
||||
dst_reg.to256(),
|
||||
src_mcv.mem(.xword),
|
||||
) else {
|
||||
const src_reg = if (src_mcv.isRegister())
|
||||
src_mcv.getReg().?
|
||||
else
|
||||
try self.copyToTmpRegister(scalar_ty, src_mcv);
|
||||
try self.asmRegisterRegisterRegisterImmediate(
|
||||
.{ .v_f128, .insert },
|
||||
dst_reg.to256(),
|
||||
src_reg.to256(),
|
||||
src_reg.to128(),
|
||||
Immediate.u(1),
|
||||
);
|
||||
}
|
||||
break :result .{ .register = dst_reg };
|
||||
},
|
||||
else => {},
|
||||
},
|
||||
16, 80 => {},
|
||||
else => unreachable,
|
||||
},
|
||||
}
|
||||
return self.fail("TODO implement airSplat for {}", .{
|
||||
vector_ty.fmt(self.bin_file.options.module.?),
|
||||
});
|
||||
};
|
||||
return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
|
||||
}
|
||||
|
||||
fn airSelect(self: *Self, inst: Air.Inst.Index) !void {
|
||||
|
||||
@ -270,10 +270,12 @@ pub const Mnemonic = enum {
|
||||
divps, divss,
|
||||
maxps, maxss,
|
||||
minps, minss,
|
||||
movaps, movhlps, movss, movups,
|
||||
movaps, movhlps, movlhps,
|
||||
movss, movups,
|
||||
mulps, mulss,
|
||||
orps,
|
||||
pextrw, pinsrw,
|
||||
shufps,
|
||||
sqrtps, sqrtss,
|
||||
subps, subss,
|
||||
ucomiss,
|
||||
@ -296,6 +298,7 @@ pub const Mnemonic = enum {
|
||||
psrld, psrlq, psrlw,
|
||||
punpckhbw, punpckhdq, punpckhqdq, punpckhwd,
|
||||
punpcklbw, punpckldq, punpcklqdq, punpcklwd,
|
||||
shufpd,
|
||||
sqrtpd, sqrtsd,
|
||||
subpd, subsd,
|
||||
ucomisd,
|
||||
@ -303,17 +306,22 @@ pub const Mnemonic = enum {
|
||||
// SSE3
|
||||
movddup, movshdup, movsldup,
|
||||
// SSE4.1
|
||||
extractps,
|
||||
insertps,
|
||||
pextrb, pextrd, pextrq,
|
||||
pinsrb, pinsrd, pinsrq,
|
||||
roundpd, roundps, roundsd, roundss,
|
||||
// AVX
|
||||
vaddpd, vaddps, vaddsd, vaddss,
|
||||
vbroadcastf128, vbroadcastsd, vbroadcastss,
|
||||
vcvtsd2ss, vcvtsi2sd, vcvtsi2ss, vcvtss2sd,
|
||||
vdivpd, vdivps, vdivsd, vdivss,
|
||||
vextractf128, vextractps,
|
||||
vinsertf128, vinsertps,
|
||||
vmaxpd, vmaxps, vmaxsd, vmaxss,
|
||||
vminpd, vminps, vminsd, vminss,
|
||||
vmovapd, vmovaps,
|
||||
vmovddup, vmovhlps,
|
||||
vmovddup, vmovhlps, vmovlhps,
|
||||
vmovsd,
|
||||
vmovshdup, vmovsldup,
|
||||
vmovss,
|
||||
@ -326,6 +334,7 @@ pub const Mnemonic = enum {
|
||||
vpunpckhbw, vpunpckhdq, vpunpckhqdq, vpunpckhwd,
|
||||
vpunpcklbw, vpunpckldq, vpunpcklqdq, vpunpcklwd,
|
||||
vroundpd, vroundps, vroundsd, vroundss,
|
||||
vshufpd, vshufps,
|
||||
vsqrtpd, vsqrtps, vsqrtsd, vsqrtss,
|
||||
vsubpd, vsubps, vsubsd, vsubss,
|
||||
// F16C
|
||||
|
||||
@ -300,6 +300,8 @@ fn generic(lower: *Lower, inst: Mir.Inst) Error!void {
|
||||
else
|
||||
.none,
|
||||
}, mnemonic: {
|
||||
@setEvalBranchQuota(2_000);
|
||||
|
||||
comptime var max_len = 0;
|
||||
inline for (@typeInfo(Mnemonic).Enum.fields) |field| max_len = @max(field.name.len, max_len);
|
||||
var buf: [max_len]u8 = undefined;
|
||||
|
||||
@ -256,6 +256,8 @@ pub const Inst = struct {
|
||||
v_sd,
|
||||
/// VEX-Encoded ___ Packed Double-Precision Values
|
||||
v_pd,
|
||||
/// VEX-Encoded ___ 128-Bits Of Floating-Point Data
|
||||
v_f128,
|
||||
|
||||
/// Mask ___ Byte
|
||||
k_b,
|
||||
@ -454,6 +456,8 @@ pub const Inst = struct {
|
||||
mova,
|
||||
/// Move packed single-precision floating-point values high to low
|
||||
movhl,
|
||||
/// Move packed single-precision floating-point values low to high
|
||||
movlh,
|
||||
/// Move unaligned packed single-precision floating-point values
|
||||
/// Move unaligned packed double-precision floating-point values
|
||||
movu,
|
||||
@ -488,6 +492,9 @@ pub const Inst = struct {
|
||||
cvtsi2sd,
|
||||
/// Convert scalar single-precision floating-point value to scalar double-precision floating-point value
|
||||
cvtss2sd,
|
||||
/// Packed interleave shuffle of quadruplets of single-precision floating-point values
|
||||
/// Packed interleave shuffle of pairs of double-precision floating-point values
|
||||
shuf,
|
||||
/// Shuffle packed high words
|
||||
shufh,
|
||||
/// Shuffle packed low words
|
||||
@ -520,12 +527,20 @@ pub const Inst = struct {
|
||||
/// Replicate single floating-point values
|
||||
movsldup,
|
||||
|
||||
/// Extract packed floating-point values
|
||||
extract,
|
||||
/// Insert scalar single-precision floating-point value
|
||||
/// Insert packed floating-point values
|
||||
insert,
|
||||
/// Round packed single-precision floating-point values
|
||||
/// Round scalar single-precision floating-point value
|
||||
/// Round packed double-precision floating-point values
|
||||
/// Round scalar double-precision floating-point value
|
||||
round,
|
||||
|
||||
/// Load with broadcast floating-point data
|
||||
broadcast,
|
||||
|
||||
/// Convert 16-bit floating-point values to single-precision floating-point values
|
||||
cvtph2ps,
|
||||
/// Convert single-precision floating-point values to 16-bit floating-point values
|
||||
|
||||
@ -867,6 +867,8 @@ pub const table = [_]Entry{
|
||||
|
||||
.{ .movhlps, .rm, &.{ .xmm, .xmm }, &.{ 0x0f, 0x12 }, 0, .none, .sse },
|
||||
|
||||
.{ .movlhps, .rm, &.{ .xmm, .xmm }, &.{ 0x0f, 0x16 }, 0, .none, .sse },
|
||||
|
||||
.{ .movss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x10 }, 0, .none, .sse },
|
||||
.{ .movss, .mr, &.{ .xmm_m32, .xmm }, &.{ 0xf3, 0x0f, 0x11 }, 0, .none, .sse },
|
||||
|
||||
@ -879,14 +881,16 @@ pub const table = [_]Entry{
|
||||
|
||||
.{ .orps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x56 }, 0, .none, .sse },
|
||||
|
||||
.{ .subps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x5c }, 0, .none, .sse },
|
||||
|
||||
.{ .subss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5c }, 0, .none, .sse },
|
||||
.{ .shufps, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x0f, 0xc6 }, 0, .none, .sse },
|
||||
|
||||
.{ .sqrtps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x51 }, 0, .none, .sse },
|
||||
|
||||
.{ .sqrtss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x51 }, 0, .none, .sse },
|
||||
|
||||
.{ .subps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x5c }, 0, .none, .sse },
|
||||
|
||||
.{ .subss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5c }, 0, .none, .sse },
|
||||
|
||||
.{ .ucomiss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0x0f, 0x2e }, 0, .none, .sse },
|
||||
|
||||
.{ .xorps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x57 }, 0, .none, .sse },
|
||||
@ -967,6 +971,8 @@ pub const table = [_]Entry{
|
||||
.{ .punpckldq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x62 }, 0, .none, .sse2 },
|
||||
.{ .punpcklqdq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x6c }, 0, .none, .sse2 },
|
||||
|
||||
.{ .shufpd, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0xc6 }, 0, .none, .sse2 },
|
||||
|
||||
.{ .sqrtpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x51 }, 0, .none, .sse2 },
|
||||
|
||||
.{ .sqrtsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x51 }, 0, .none, .sse2 },
|
||||
@ -990,6 +996,10 @@ pub const table = [_]Entry{
|
||||
.{ .movsldup, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0xf3, 0x0f, 0x12 }, 0, .none, .sse3 },
|
||||
|
||||
// SSE4.1
|
||||
.{ .extractps, .mri, &.{ .rm32, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x17 }, 0, .none, .sse4_1 },
|
||||
|
||||
.{ .insertps, .rmi, &.{ .xmm, .xmm_m32, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x21 }, 0, .none, .sse4_1 },
|
||||
|
||||
.{ .pextrb, .mri, &.{ .r32_m8, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x14 }, 0, .none, .sse4_1 },
|
||||
.{ .pextrd, .mri, &.{ .rm32, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x16 }, 0, .none, .sse4_1 },
|
||||
.{ .pextrq, .mri, &.{ .rm64, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x16 }, 0, .long, .sse4_1 },
|
||||
@ -1019,6 +1029,11 @@ pub const table = [_]Entry{
|
||||
|
||||
.{ .vaddss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x58 }, 0, .vex_lig_wig, .avx },
|
||||
|
||||
.{ .vbroadcastss, .rm, &.{ .xmm, .m32 }, &.{ 0x66, 0x0f, 0x38, 0x18 }, 0, .vex_128_w0, .avx },
|
||||
.{ .vbroadcastss, .rm, &.{ .ymm, .m32 }, &.{ 0x66, 0x0f, 0x38, 0x18 }, 0, .vex_256_w0, .avx },
|
||||
.{ .vbroadcastsd, .rm, &.{ .ymm, .m64 }, &.{ 0x66, 0x0f, 0x38, 0x19 }, 0, .vex_256_w0, .avx },
|
||||
.{ .vbroadcastf128, .rm, &.{ .ymm, .m128 }, &.{ 0x66, 0x0f, 0x38, 0x1a }, 0, .vex_256_w0, .avx },
|
||||
|
||||
.{ .vcvtsd2ss, .rvm, &.{ .xmm, .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5a }, 0, .vex_lig_wig, .avx },
|
||||
|
||||
.{ .vcvtsi2sd, .rvm, &.{ .xmm, .xmm, .rm32 }, &.{ 0xf2, 0x0f, 0x2a }, 0, .vex_lig_w0, .avx },
|
||||
@ -1039,6 +1054,14 @@ pub const table = [_]Entry{
|
||||
|
||||
.{ .vdivss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5e }, 0, .vex_lig_wig, .avx },
|
||||
|
||||
.{ .vextractf128, .mri, &.{ .xmm_m128, .ymm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x19 }, 0, .vex_256_w0, .avx },
|
||||
|
||||
.{ .vextractps, .mri, &.{ .rm32, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x17 }, 0, .vex_128_wig, .avx },
|
||||
|
||||
.{ .vinsertf128, .rvmi, &.{ .ymm, .ymm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x18 }, 0, .vex_256_w0, .avx },
|
||||
|
||||
.{ .vinsertps, .rvmi, &.{ .xmm, .xmm, .xmm_m32, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x21 }, 0, .vex_128_wig, .avx },
|
||||
|
||||
.{ .vmaxpd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x5f }, 0, .vex_128_wig, .avx },
|
||||
.{ .vmaxpd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x5f }, 0, .vex_256_wig, .avx },
|
||||
|
||||
@ -1074,6 +1097,8 @@ pub const table = [_]Entry{
|
||||
|
||||
.{ .vmovhlps, .rvm, &.{ .xmm, .xmm, .xmm }, &.{ 0x0f, 0x12 }, 0, .vex_128_wig, .avx },
|
||||
|
||||
.{ .vmovlhps, .rvm, &.{ .xmm, .xmm, .xmm }, &.{ 0x0f, 0x16 }, 0, .vex_128_wig, .avx },
|
||||
|
||||
.{ .vmovsd, .rvm, &.{ .xmm, .xmm, .xmm }, &.{ 0xf2, 0x0f, 0x10 }, 0, .vex_lig_wig, .avx },
|
||||
.{ .vmovsd, .rm, &.{ .xmm, .m64 }, &.{ 0xf2, 0x0f, 0x10 }, 0, .vex_lig_wig, .avx },
|
||||
.{ .vmovsd, .mvr, &.{ .xmm, .xmm, .xmm }, &.{ 0xf2, 0x0f, 0x11 }, 0, .vex_lig_wig, .avx },
|
||||
@ -1150,6 +1175,12 @@ pub const table = [_]Entry{
|
||||
|
||||
.{ .vroundss, .rvmi, &.{ .xmm, .xmm, .xmm_m32, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0a }, 0, .vex_lig_wig, .avx },
|
||||
|
||||
.{ .vshufpd, .rvmi, &.{ .xmm, .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0xc6 }, 0, .vex_128_wig, .avx },
|
||||
.{ .vshufpd, .rvmi, &.{ .ymm, .ymm, .ymm_m256, .imm8 }, &.{ 0x66, 0x0f, 0xc6 }, 0, .vex_256_wig, .avx },
|
||||
|
||||
.{ .vshufps, .rvmi, &.{ .xmm, .xmm, .xmm_m128, .imm8 }, &.{ 0x0f, 0xc6 }, 0, .vex_128_wig, .avx },
|
||||
.{ .vshufps, .rvmi, &.{ .ymm, .ymm, .ymm_m256, .imm8 }, &.{ 0x0f, 0xc6 }, 0, .vex_256_wig, .avx },
|
||||
|
||||
.{ .vsqrtpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x51 }, 0, .vex_128_wig, .avx },
|
||||
.{ .vsqrtpd, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x51 }, 0, .vex_256_wig, .avx },
|
||||
|
||||
@ -1201,6 +1232,10 @@ pub const table = [_]Entry{
|
||||
.{ .vfmadd231ss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0x66, 0x0f, 0x38, 0xb9 }, 0, .vex_lig_w0, .fma },
|
||||
|
||||
// AVX2
|
||||
.{ .vbroadcastss, .rm, &.{ .xmm, .xmm }, &.{ 0x66, 0x0f, 0x38, 0x18 }, 0, .vex_128_w0, .avx2 },
|
||||
.{ .vbroadcastss, .rm, &.{ .ymm, .xmm }, &.{ 0x66, 0x0f, 0x38, 0x18 }, 0, .vex_256_w0, .avx2 },
|
||||
.{ .vbroadcastsd, .rm, &.{ .ymm, .xmm }, &.{ 0x66, 0x0f, 0x38, 0x19 }, 0, .vex_256_w0, .avx2 },
|
||||
|
||||
.{ .vpsrlw, .rvm, &.{ .ymm, .ymm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd1 }, 0, .vex_256_wig, .avx2 },
|
||||
.{ .vpsrlw, .vmi, &.{ .ymm, .ymm, .imm8 }, &.{ 0x66, 0x0f, 0x71 }, 2, .vex_256_wig, .avx2 },
|
||||
.{ .vpsrld, .rvm, &.{ .ymm, .ymm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd2 }, 0, .vex_256_wig, .avx2 },
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user