From 05580b9453e4ae2d9b62fe4178651937d8b73989 Mon Sep 17 00:00:00 2001 From: Jacob Young Date: Sun, 7 May 2023 03:14:31 -0400 Subject: [PATCH] x86_64: implement float cast from `f16` to `f64` --- src/arch/x86_64/CodeGen.zig | 95 ++++++++++++----- src/arch/x86_64/Encoding.zig | 163 +++++++++++++++++------------ src/arch/x86_64/Lower.zig | 4 + src/arch/x86_64/Mir.zig | 8 ++ src/arch/x86_64/encoder.zig | 33 ++---- src/arch/x86_64/encodings.zig | 189 ++++++++++++++++++---------------- test/behavior/floatop.zig | 3 +- 7 files changed, 288 insertions(+), 207 deletions(-) diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index 3e47ef63f6..38497400f2 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -2287,26 +2287,46 @@ fn airFptrunc(self: *Self, inst: Air.Inst.Index) !void { src_mcv else try self.copyToRegisterWithInstTracking(inst, dst_ty, src_mcv); - const dst_lock = self.register_manager.lockReg(dst_mcv.register); + const dst_reg = dst_mcv.getReg().?.to128(); + const dst_lock = self.register_manager.lockReg(dst_reg); defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); - if (src_bits == 32 and dst_bits == 16 and self.hasFeature(.f16c)) - try self.asmRegisterRegisterImmediate( - .vcvtps2ph, - dst_mcv.register, - if (src_mcv.isRegister()) src_mcv.getReg().? else src_reg: { - const src_reg = dst_mcv.register; - try self.genSetReg(src_reg, src_ty, src_mcv); - break :src_reg src_reg; + if (dst_bits == 16 and self.hasFeature(.f16c)) { + switch (src_bits) { + 32 => { + const mat_src_reg = if (src_mcv.isRegister()) + src_mcv.getReg().? + else + try self.copyToTmpRegister(src_ty, src_mcv); + try self.asmRegisterRegisterImmediate( + .vcvtps2ph, + dst_reg, + mat_src_reg.to128(), + Immediate.u(0b1_00), + ); }, - Immediate.u(0b1_00), - ) - else if (src_bits == 64 and dst_bits == 32) - try self.genBinOpMir(.cvtsd2ss, src_ty, dst_mcv, src_mcv) - else - return self.fail("TODO implement airFptrunc from {} to {}", .{ - src_ty.fmt(self.bin_file.options.module.?), dst_ty.fmt(self.bin_file.options.module.?), - }); + else => return self.fail("TODO implement airFptrunc from {} to {}", .{ + src_ty.fmt(self.bin_file.options.module.?), dst_ty.fmt(self.bin_file.options.module.?), + }), + } + } else if (src_bits == 64 and dst_bits == 32) { + if (self.hasFeature(.avx)) if (src_mcv.isRegister()) try self.asmRegisterRegisterRegister( + .vcvtsd2ss, + dst_reg, + dst_reg, + src_mcv.getReg().?.to128(), + ) else try self.asmRegisterRegisterMemory( + .vcvtsd2ss, + dst_reg, + dst_reg, + src_mcv.mem(.qword), + ) else if (src_mcv.isRegister()) + try self.asmRegisterRegister(.cvtsd2ss, dst_reg, src_mcv.getReg().?.to128()) + else + try self.asmRegisterMemory(.cvtsd2ss, dst_reg, src_mcv.mem(.qword)); + } else return self.fail("TODO implement airFptrunc from {} to {}", .{ + src_ty.fmt(self.bin_file.options.module.?), dst_ty.fmt(self.bin_file.options.module.?), + }); return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none }); } @@ -2322,22 +2342,41 @@ fn airFpext(self: *Self, inst: Air.Inst.Index) !void { src_mcv else try self.copyToRegisterWithInstTracking(inst, dst_ty, src_mcv); - const dst_lock = self.register_manager.lockReg(dst_mcv.register); + const dst_reg = dst_mcv.getReg().?.to128(); + const dst_lock = self.register_manager.lockReg(dst_reg); defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); - try self.genBinOpMir( - if (src_bits == 16 and dst_bits == 32 and self.hasFeature(.f16c)) - .vcvtph2ps - else if (src_bits == 32 and dst_bits == 64) - .cvtss2sd + if (src_bits == 16 and self.hasFeature(.f16c)) { + const mat_src_reg = if (src_mcv.isRegister()) + src_mcv.getReg().? else - return self.fail("TODO implement airFpext from {} to {}", .{ + try self.copyToTmpRegister(src_ty, src_mcv); + try self.asmRegisterRegister(.vcvtph2ps, dst_reg, mat_src_reg.to128()); + switch (dst_bits) { + 32 => {}, + 64 => try self.asmRegisterRegisterRegister(.vcvtss2sd, dst_reg, dst_reg, dst_reg), + else => return self.fail("TODO implement airFpext from {} to {}", .{ src_ty.fmt(self.bin_file.options.module.?), dst_ty.fmt(self.bin_file.options.module.?), }), - src_ty, - dst_mcv, - src_mcv, - ); + } + } else if (src_bits == 32 and dst_bits == 64) { + if (self.hasFeature(.avx)) if (src_mcv.isRegister()) try self.asmRegisterRegisterRegister( + .vcvtss2sd, + dst_reg, + dst_reg, + src_mcv.getReg().?.to128(), + ) else try self.asmRegisterRegisterMemory( + .vcvtss2sd, + dst_reg, + dst_reg, + src_mcv.mem(.dword), + ) else if (src_mcv.isRegister()) + try self.asmRegisterRegister(.cvtss2sd, dst_reg, src_mcv.getReg().?.to128()) + else + try self.asmRegisterMemory(.cvtss2sd, dst_reg, src_mcv.mem(.dword)); + } else return self.fail("TODO implement airFpext from {} to {}", .{ + src_ty.fmt(self.bin_file.options.module.?), dst_ty.fmt(self.bin_file.options.module.?), + }); return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none }); } diff --git a/src/arch/x86_64/Encoding.zig b/src/arch/x86_64/Encoding.zig index 1fd1112aaf..bd6e70c975 100644 --- a/src/arch/x86_64/Encoding.zig +++ b/src/arch/x86_64/Encoding.zig @@ -89,30 +89,13 @@ pub fn findByOpcode(opc: []const u8, prefixes: struct { if (modrm_ext) |ext| if (ext != data.modrm_ext) continue; if (!std.mem.eql(u8, opc, enc.opcode())) continue; if (prefixes.rex.w) { - switch (data.mode) { - .none, .short, .rex, .rex_short, .vex_128, .vex_256 => continue, - .long, .vex_128_long, .vex_256_long => {}, - } + if (!data.mode.isLong()) continue; } else if (prefixes.rex.present and !prefixes.rex.isSet()) { - switch (data.mode) { - .rex, .rex_short => {}, - else => continue, - } + if (!data.mode.isRex()) continue; } else if (prefixes.legacy.prefix_66) { - switch (data.mode) { - .short, .rex_short => {}, - .none, .rex, .vex_128, .vex_256 => continue, - .long, .vex_128_long, .vex_256_long => continue, - } + if (!data.mode.isShort()) continue; } else { - switch (data.mode) { - .none => switch (data.mode) { - .short, .rex_short => continue, - .none, .rex, .vex_128, .vex_256 => {}, - .long, .vex_128_long, .vex_256_long => {}, - }, - else => continue, - } + if (data.mode.isShort()) continue; } return enc; }; @@ -148,50 +131,39 @@ pub fn format( _ = fmt; var opc = encoding.opcode(); - switch (encoding.data.mode) { - else => {}, - .long => try writer.writeAll("REX.W + "), - .vex_128, .vex_128_long, .vex_256, .vex_256_long => { - try writer.writeAll("VEX."); + if (encoding.data.mode.isVex()) { + try writer.writeAll("VEX."); - switch (encoding.data.mode) { - .vex_128, .vex_128_long => try writer.writeAll("128"), - .vex_256, .vex_256_long => try writer.writeAll("256"), - else => unreachable, - } + try writer.writeAll(switch (encoding.data.mode) { + .vex_128_w0, .vex_128_w1, .vex_128_wig => "128", + .vex_256_w0, .vex_256_w1, .vex_256_wig => "256", + .vex_lig_w0, .vex_lig_w1, .vex_lig_wig => "LIG", + .vex_lz_w0, .vex_lz_w1, .vex_lz_wig => "LZ", + else => unreachable, + }); - switch (opc[0]) { - else => {}, - 0x66, 0xf3, 0xf2 => { - try writer.print(".{X:0>2}", .{opc[0]}); - opc = opc[1..]; - }, - } + switch (opc[0]) { + else => {}, + 0x66, 0xf3, 0xf2 => { + try writer.print(".{X:0>2}", .{opc[0]}); + opc = opc[1..]; + }, + } - try writer.print(".{X:0>2}", .{opc[0]}); - opc = opc[1..]; + try writer.print(".{}", .{std.fmt.fmtSliceHexUpper(opc[0 .. opc.len - 1])}); + opc = opc[opc.len - 1 ..]; - switch (opc[0]) { - else => {}, - 0x38, 0x3A => { - try writer.print("{X:0>2}", .{opc[0]}); - opc = opc[1..]; - }, - } + try writer.writeAll(".W"); + try writer.writeAll(switch (encoding.data.mode) { + .vex_128_w0, .vex_256_w0, .vex_lig_w0, .vex_lz_w0 => "0", + .vex_128_w1, .vex_256_w1, .vex_lig_w1, .vex_lz_w1 => "1", + .vex_128_wig, .vex_256_wig, .vex_lig_wig, .vex_lz_wig => "IG", + else => unreachable, + }); - try writer.writeByte('.'); - try writer.writeAll(switch (encoding.data.mode) { - .vex_128, .vex_256 => "W0", - .vex_128_long, .vex_256_long => "W1", - else => unreachable, - }); - try writer.writeByte(' '); - }, - } - - for (opc) |byte| { - try writer.print("{x:0>2} ", .{byte}); - } + try writer.writeByte(' '); + } else if (encoding.data.mode.isLong()) try writer.writeAll("REX.W + "); + for (opc) |byte| try writer.print("{x:0>2} ", .{byte}); switch (encoding.data.op_en) { .np, .fd, .td, .i, .zi, .d => {}, @@ -332,6 +304,7 @@ pub const Mnemonic = enum { // SSE4.1 roundsd, roundss, // AVX + vcvtsd2ss, vcvtsi2sd, vcvtsi2ss, vcvtss2sd, vmovapd, vmovaps, vmovddup, vmovsd, @@ -629,20 +602,74 @@ pub const Op = enum { }; pub const Mode = enum { + // zig fmt: off none, - short, - long, - rex, - rex_short, - vex_128, - vex_128_long, - vex_256, - vex_256_long, + short, long, + rex, rex_short, + vex_128_w0, vex_128_w1, vex_128_wig, + vex_256_w0, vex_256_w1, vex_256_wig, + vex_lig_w0, vex_lig_w1, vex_lig_wig, + vex_lz_w0, vex_lz_w1, vex_lz_wig, + // zig fmt: on + + pub fn isShort(mode: Mode) bool { + return switch (mode) { + .short, .rex_short => true, + else => false, + }; + } + + pub fn isLong(mode: Mode) bool { + return switch (mode) { + .long, + .vex_128_w1, + .vex_256_w1, + .vex_lig_w1, + .vex_lz_w1, + => true, + else => false, + }; + } + + pub fn isRex(mode: Mode) bool { + return switch (mode) { + else => false, + .rex, .rex_short => true, + }; + } + + pub fn isVex(mode: Mode) bool { + return switch (mode) { + // zig fmt: off + else => false, + .vex_128_w0, .vex_128_w1, .vex_128_wig, + .vex_256_w0, .vex_256_w1, .vex_256_wig, + .vex_lig_w0, .vex_lig_w1, .vex_lig_wig, + .vex_lz_w0, .vex_lz_w1, .vex_lz_wig, + => true, + // zig fmt: on + }; + } + + pub fn isVecLong(mode: Mode) bool { + return switch (mode) { + // zig fmt: off + else => unreachable, + .vex_128_w0, .vex_128_w1, .vex_128_wig, + .vex_lig_w0, .vex_lig_w1, .vex_lig_wig, + .vex_lz_w0, .vex_lz_w1, .vex_lz_wig, + => false, + .vex_256_w0, .vex_256_w1, .vex_256_wig, + => true, + // zig fmt: on + }; + } }; pub const Feature = enum { none, avx, + avx2, f16c, fma, sse, diff --git a/src/arch/x86_64/Lower.zig b/src/arch/x86_64/Lower.zig index a246a97d4b..40a5ccdb10 100644 --- a/src/arch/x86_64/Lower.zig +++ b/src/arch/x86_64/Lower.zig @@ -184,6 +184,10 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct { .roundsd, .roundss, + .vcvtsd2ss, + .vcvtsi2sd, + .vcvtsi2ss, + .vcvtss2sd, .vmovapd, .vmovaps, .vmovddup, diff --git a/src/arch/x86_64/Mir.zig b/src/arch/x86_64/Mir.zig index de7f2cff53..cb1a578bb6 100644 --- a/src/arch/x86_64/Mir.zig +++ b/src/arch/x86_64/Mir.zig @@ -282,6 +282,14 @@ pub const Inst = struct { /// Round scalar single-precision floating-point values roundss, + /// Convert scalar double-precision floating-point value to scalar single-precision floating-point value + vcvtsd2ss, + /// Convert doubleword integer to scalar double-precision floating-point value + vcvtsi2sd, + /// Convert doubleword integer to scalar single-precision floating-point value + vcvtsi2ss, + /// Convert scalar single-precision floating-point value to scalar double-precision floating-point value + vcvtss2sd, /// Move aligned packed double-precision floating-point values vmovapd, /// Move aligned packed single-precision floating-point values diff --git a/src/arch/x86_64/encoder.zig b/src/arch/x86_64/encoder.zig index fa6ce676cb..0ce875240d 100644 --- a/src/arch/x86_64/encoder.zig +++ b/src/arch/x86_64/encoder.zig @@ -206,18 +206,15 @@ pub const Instruction = struct { const enc = inst.encoding; const data = enc.data; - switch (data.mode) { - .none, .short, .long, .rex, .rex_short => { - try inst.encodeLegacyPrefixes(encoder); - try inst.encodeMandatoryPrefix(encoder); - try inst.encodeRexPrefix(encoder); - try inst.encodeOpcode(encoder); - }, - .vex_128, .vex_128_long, .vex_256, .vex_256_long => { - try inst.encodeVexPrefix(encoder); - const opc = inst.encoding.opcode(); - try encoder.opcode_1byte(opc[opc.len - 1]); - }, + if (data.mode.isVex()) { + try inst.encodeVexPrefix(encoder); + const opc = inst.encoding.opcode(); + try encoder.opcode_1byte(opc[opc.len - 1]); + } else { + try inst.encodeLegacyPrefixes(encoder); + try inst.encodeMandatoryPrefix(encoder); + try inst.encodeRexPrefix(encoder); + try inst.encodeOpcode(encoder); } switch (data.op_en) { @@ -365,11 +362,7 @@ pub const Instruction = struct { var vex = Vex{}; - vex.w = switch (inst.encoding.data.mode) { - .vex_128, .vex_256 => false, - .vex_128_long, .vex_256_long => true, - else => unreachable, - }; + vex.w = inst.encoding.data.mode.isLong(); switch (op_en) { .np, .i, .zi, .fd, .td, .d => {}, @@ -395,11 +388,7 @@ pub const Instruction = struct { }, } - vex.l = switch (inst.encoding.data.mode) { - .vex_128, .vex_128_long => false, - .vex_256, .vex_256_long => true, - else => unreachable, - }; + vex.l = inst.encoding.data.mode.isVecLong(); vex.p = if (mand_pre) |mand| switch (mand) { 0x66 => .@"66", diff --git a/src/arch/x86_64/encodings.zig b/src/arch/x86_64/encodings.zig index 5096ca5627..5e4dc2f04b 100644 --- a/src/arch/x86_64/encodings.zig +++ b/src/arch/x86_64/encodings.zig @@ -918,7 +918,6 @@ pub const table = [_]Entry{ .{ .orpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x56 }, 0, .none, .sse2 }, .{ .pextrw, .rmi, &.{ .r32, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0xc5 }, 0, .none, .sse2 }, - .{ .pextrw, .rmi, &.{ .r64, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0xc5 }, 0, .long, .sse2 }, .{ .pinsrw, .rmi, &.{ .xmm, .r32_m16, .imm8 }, &.{ 0x66, 0x0f, 0xc4 }, 0, .none, .sse2 }, @@ -926,31 +925,23 @@ pub const table = [_]Entry{ .{ .pshuflw, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0xf2, 0x0f, 0x70 }, 0, .none, .sse2 }, + .{ .psrlw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd1 }, 0, .none, .sse2 }, + .{ .psrlw, .mi, &.{ .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x71 }, 2, .none, .sse2 }, .{ .psrld, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd2 }, 0, .none, .sse2 }, .{ .psrld, .mi, &.{ .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x72 }, 2, .none, .sse2 }, - .{ .psrlq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd3 }, 0, .none, .sse2 }, .{ .psrlq, .mi, &.{ .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x73 }, 2, .none, .sse2 }, - .{ .psrlw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd1 }, 0, .none, .sse2 }, - .{ .psrlw, .mi, &.{ .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x71 }, 2, .none, .sse2 }, - - .{ .punpckhbw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x68 }, 0, .none, .sse2 }, - - .{ .punpckhdq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x6a }, 0, .none, .sse2 }, - + .{ .punpckhbw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x68 }, 0, .none, .sse2 }, + .{ .punpckhwd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x69 }, 0, .none, .sse2 }, + .{ .punpckhdq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x6a }, 0, .none, .sse2 }, .{ .punpckhqdq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x6d }, 0, .none, .sse2 }, - .{ .punpckhwd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x69 }, 0, .none, .sse2 }, - - .{ .punpcklbw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x60 }, 0, .none, .sse2 }, - - .{ .punpckldq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x62 }, 0, .none, .sse2 }, - + .{ .punpcklbw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x60 }, 0, .none, .sse2 }, + .{ .punpcklwd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x61 }, 0, .none, .sse2 }, + .{ .punpckldq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x62 }, 0, .none, .sse2 }, .{ .punpcklqdq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x6c }, 0, .none, .sse2 }, - .{ .punpcklwd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x61 }, 0, .none, .sse2 }, - .{ .sqrtpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x51 }, 0, .none, .sse2 }, .{ .sqrtsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x51 }, 0, .none, .sse2 }, @@ -972,106 +963,128 @@ pub const table = [_]Entry{ // SSE4.1 .{ .pextrw, .mri, &.{ .r32_m16, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x15 }, 0, .none, .sse4_1 }, - .{ .pextrw, .mri, &.{ .r64_m16, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x15 }, 0, .long, .sse4_1 }, .{ .roundss, .rmi, &.{ .xmm, .xmm_m32, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0a }, 0, .none, .sse4_1 }, .{ .roundsd, .rmi, &.{ .xmm, .xmm_m64, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0b }, 0, .none, .sse4_1 }, // AVX - .{ .vmovapd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x28 }, 0, .vex_128, .avx }, - .{ .vmovapd, .mr, &.{ .xmm_m128, .xmm }, &.{ 0x66, 0x0f, 0x29 }, 0, .vex_128, .avx }, - .{ .vmovapd, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x28 }, 0, .vex_256, .avx }, - .{ .vmovapd, .mr, &.{ .ymm_m256, .ymm }, &.{ 0x66, 0x0f, 0x29 }, 0, .vex_256, .avx }, + .{ .vcvtsd2ss, .rvm, &.{ .xmm, .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5a }, 0, .vex_lig_wig, .avx }, - .{ .vmovaps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x28 }, 0, .vex_128, .avx }, - .{ .vmovaps, .mr, &.{ .xmm_m128, .xmm }, &.{ 0x0f, 0x29 }, 0, .vex_128, .avx }, - .{ .vmovaps, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x0f, 0x28 }, 0, .vex_256, .avx }, - .{ .vmovaps, .mr, &.{ .ymm_m256, .ymm }, &.{ 0x0f, 0x29 }, 0, .vex_256, .avx }, + .{ .vcvtsi2sd, .rvm, &.{ .xmm, .xmm, .rm32 }, &.{ 0xf2, 0x0f, 0x2a }, 0, .vex_lig_w0, .avx }, + .{ .vcvtsi2sd, .rvm, &.{ .xmm, .xmm, .rm64 }, &.{ 0xf2, 0x0f, 0x2a }, 0, .vex_lig_w1, .avx }, - .{ .vmovddup, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x12 }, 0, .vex_128, .avx }, + .{ .vcvtsi2ss, .rvm, &.{ .xmm, .xmm, .rm32 }, &.{ 0xf2, 0x0f, 0x2a }, 0, .vex_lig_w0, .avx }, + .{ .vcvtsi2ss, .rvm, &.{ .xmm, .xmm, .rm64 }, &.{ 0xf2, 0x0f, 0x2a }, 0, .vex_lig_w1, .avx }, - .{ .vmovsd, .rvm, &.{ .xmm, .xmm, .xmm }, &.{ 0xf2, 0x0f, 0x10 }, 0, .vex_128, .avx }, - .{ .vmovsd, .rm, &.{ .xmm, .m64 }, &.{ 0xf2, 0x0f, 0x10 }, 0, .vex_128, .avx }, - .{ .vmovsd, .mvr, &.{ .xmm, .xmm, .xmm }, &.{ 0xf2, 0x0f, 0x11 }, 0, .vex_128, .avx }, - .{ .vmovsd, .mr, &.{ .m64, .xmm }, &.{ 0xf2, 0x0f, 0x11 }, 0, .vex_128, .avx }, + .{ .vcvtss2sd, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0xf2, 0x0f, 0x5a }, 0, .vex_lig_wig, .avx }, - .{ .vmovshdup, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0xf3, 0x0f, 0x16 }, 0, .vex_128, .avx }, + .{ .vmovapd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x28 }, 0, .vex_128_wig, .avx }, + .{ .vmovapd, .mr, &.{ .xmm_m128, .xmm }, &.{ 0x66, 0x0f, 0x29 }, 0, .vex_128_wig, .avx }, + .{ .vmovapd, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x28 }, 0, .vex_256_wig, .avx }, + .{ .vmovapd, .mr, &.{ .ymm_m256, .ymm }, &.{ 0x66, 0x0f, 0x29 }, 0, .vex_256_wig, .avx }, - .{ .vmovsldup, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0xf3, 0x0f, 0x12 }, 0, .vex_128, .avx }, + .{ .vmovaps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x28 }, 0, .vex_128_wig, .avx }, + .{ .vmovaps, .mr, &.{ .xmm_m128, .xmm }, &.{ 0x0f, 0x29 }, 0, .vex_128_wig, .avx }, + .{ .vmovaps, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x0f, 0x28 }, 0, .vex_256_wig, .avx }, + .{ .vmovaps, .mr, &.{ .ymm_m256, .ymm }, &.{ 0x0f, 0x29 }, 0, .vex_256_wig, .avx }, - .{ .vmovss, .rvm, &.{ .xmm, .xmm, .xmm }, &.{ 0xf3, 0x0f, 0x10 }, 0, .vex_128, .avx }, - .{ .vmovss, .rm, &.{ .xmm, .m32 }, &.{ 0xf3, 0x0f, 0x10 }, 0, .vex_128, .avx }, - .{ .vmovss, .mvr, &.{ .xmm, .xmm, .xmm }, &.{ 0xf3, 0x0f, 0x11 }, 0, .vex_128, .avx }, - .{ .vmovss, .mr, &.{ .m32, .xmm }, &.{ 0xf3, 0x0f, 0x11 }, 0, .vex_128, .avx }, + .{ .vmovddup, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x12 }, 0, .vex_128_wig, .avx }, + .{ .vmovddup, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0xf2, 0x0f, 0x12 }, 0, .vex_256_wig, .avx }, - .{ .vmovupd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x10 }, 0, .vex_128, .avx }, - .{ .vmovupd, .mr, &.{ .xmm_m128, .xmm }, &.{ 0x66, 0x0f, 0x11 }, 0, .vex_128, .avx }, - .{ .vmovupd, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x10 }, 0, .vex_256, .avx }, - .{ .vmovupd, .mr, &.{ .ymm_m256, .ymm }, &.{ 0x66, 0x0f, 0x11 }, 0, .vex_256, .avx }, + .{ .vmovsd, .rvm, &.{ .xmm, .xmm, .xmm }, &.{ 0xf2, 0x0f, 0x10 }, 0, .vex_lig_wig, .avx }, + .{ .vmovsd, .rm, &.{ .xmm, .m64 }, &.{ 0xf2, 0x0f, 0x10 }, 0, .vex_lig_wig, .avx }, + .{ .vmovsd, .mvr, &.{ .xmm, .xmm, .xmm }, &.{ 0xf2, 0x0f, 0x11 }, 0, .vex_lig_wig, .avx }, + .{ .vmovsd, .mr, &.{ .m64, .xmm }, &.{ 0xf2, 0x0f, 0x11 }, 0, .vex_lig_wig, .avx }, - .{ .vmovups, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x10 }, 0, .vex_128, .avx }, - .{ .vmovups, .mr, &.{ .xmm_m128, .xmm }, &.{ 0x0f, 0x11 }, 0, .vex_128, .avx }, - .{ .vmovups, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x0f, 0x10 }, 0, .vex_256, .avx }, - .{ .vmovups, .mr, &.{ .ymm_m256, .ymm }, &.{ 0x0f, 0x11 }, 0, .vex_256, .avx }, + .{ .vmovshdup, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0xf3, 0x0f, 0x16 }, 0, .vex_128_wig, .avx }, + .{ .vmovshdup, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0xf3, 0x0f, 0x16 }, 0, .vex_256_wig, .avx }, - .{ .vpextrw, .mri, &.{ .r32, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x15 }, 0, .vex_128, .avx }, - .{ .vpextrw, .mri, &.{ .r64, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x15 }, 0, .vex_128_long, .avx }, - .{ .vpextrw, .mri, &.{ .r32_m16, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x15 }, 0, .vex_128, .avx }, - .{ .vpextrw, .mri, &.{ .r64_m16, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x15 }, 0, .vex_128_long, .avx }, + .{ .vmovsldup, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0xf3, 0x0f, 0x12 }, 0, .vex_128_wig, .avx }, + .{ .vmovsldup, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0xf3, 0x0f, 0x12 }, 0, .vex_256_wig, .avx }, - .{ .vpinsrw, .rvmi, &.{ .xmm, .xmm, .r32_m16, .imm8 }, &.{ 0x66, 0x0f, 0xc4 }, 0, .vex_128, .avx }, + .{ .vmovss, .rvm, &.{ .xmm, .xmm, .xmm }, &.{ 0xf3, 0x0f, 0x10 }, 0, .vex_lig_wig, .avx }, + .{ .vmovss, .rm, &.{ .xmm, .m32 }, &.{ 0xf3, 0x0f, 0x10 }, 0, .vex_lig_wig, .avx }, + .{ .vmovss, .mvr, &.{ .xmm, .xmm, .xmm }, &.{ 0xf3, 0x0f, 0x11 }, 0, .vex_lig_wig, .avx }, + .{ .vmovss, .mr, &.{ .m32, .xmm }, &.{ 0xf3, 0x0f, 0x11 }, 0, .vex_lig_wig, .avx }, - .{ .vpsrld, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd2 }, 0, .vex_128, .avx }, - .{ .vpsrld, .vmi, &.{ .xmm, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x72 }, 2, .vex_128, .avx }, + .{ .vmovupd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x10 }, 0, .vex_128_wig, .avx }, + .{ .vmovupd, .mr, &.{ .xmm_m128, .xmm }, &.{ 0x66, 0x0f, 0x11 }, 0, .vex_128_wig, .avx }, + .{ .vmovupd, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x10 }, 0, .vex_256_wig, .avx }, + .{ .vmovupd, .mr, &.{ .ymm_m256, .ymm }, &.{ 0x66, 0x0f, 0x11 }, 0, .vex_256_wig, .avx }, - .{ .vpsrlq, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd3 }, 0, .vex_128, .avx }, - .{ .vpsrlq, .vmi, &.{ .xmm, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x73 }, 2, .vex_128, .avx }, + .{ .vmovups, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x10 }, 0, .vex_128_wig, .avx }, + .{ .vmovups, .mr, &.{ .xmm_m128, .xmm }, &.{ 0x0f, 0x11 }, 0, .vex_128_wig, .avx }, + .{ .vmovups, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x0f, 0x10 }, 0, .vex_256_wig, .avx }, + .{ .vmovups, .mr, &.{ .ymm_m256, .ymm }, &.{ 0x0f, 0x11 }, 0, .vex_256_wig, .avx }, - .{ .vpsrlw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd1 }, 0, .vex_128, .avx }, - .{ .vpsrlw, .vmi, &.{ .xmm, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x71 }, 2, .vex_128, .avx }, + .{ .vpextrw, .rmi, &.{ .r32, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x15 }, 0, .vex_128_wig, .avx }, + .{ .vpextrw, .mri, &.{ .r32_m16, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x15 }, 0, .vex_128_wig, .avx }, - .{ .vpunpckhbw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x68 }, 0, .vex_128, .avx }, + .{ .vpinsrw, .rvmi, &.{ .xmm, .xmm, .r32_m16, .imm8 }, &.{ 0x66, 0x0f, 0xc4 }, 0, .vex_128_wig, .avx }, - .{ .vpunpckhdq, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x6a }, 0, .vex_128, .avx }, + .{ .vpsrlw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd1 }, 0, .vex_128_wig, .avx }, + .{ .vpsrlw, .vmi, &.{ .xmm, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x71 }, 2, .vex_128_wig, .avx }, + .{ .vpsrld, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd2 }, 0, .vex_128_wig, .avx }, + .{ .vpsrld, .vmi, &.{ .xmm, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x72 }, 2, .vex_128_wig, .avx }, + .{ .vpsrlq, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd3 }, 0, .vex_128_wig, .avx }, + .{ .vpsrlq, .vmi, &.{ .xmm, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x73 }, 2, .vex_128_wig, .avx }, - .{ .vpunpckhqdq, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x6d }, 0, .vex_128, .avx }, + .{ .vpunpckhbw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x68 }, 0, .vex_128_wig, .avx }, + .{ .vpunpckhwd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x69 }, 0, .vex_128_wig, .avx }, + .{ .vpunpckhdq, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x6a }, 0, .vex_128_wig, .avx }, + .{ .vpunpckhqdq, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x6d }, 0, .vex_128_wig, .avx }, - .{ .vpunpckhwd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x69 }, 0, .vex_128, .avx }, - - .{ .vpunpcklbw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x60 }, 0, .vex_128, .avx }, - - .{ .vpunpckldq, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x62 }, 0, .vex_128, .avx }, - - .{ .vpunpcklqdq, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x6c }, 0, .vex_128, .avx }, - - .{ .vpunpcklwd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x61 }, 0, .vex_128, .avx }, + .{ .vpunpcklbw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x60 }, 0, .vex_128_wig, .avx }, + .{ .vpunpcklwd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x61 }, 0, .vex_128_wig, .avx }, + .{ .vpunpckldq, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x62 }, 0, .vex_128_wig, .avx }, + .{ .vpunpcklqdq, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x6c }, 0, .vex_128_wig, .avx }, // F16C - .{ .vcvtph2ps, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0x13 }, 0, .vex_128, .f16c }, + .{ .vcvtph2ps, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0x13 }, 0, .vex_128_w0, .f16c }, + .{ .vcvtph2ps, .rm, &.{ .ymm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x13 }, 0, .vex_256_w0, .f16c }, - .{ .vcvtps2ph, .mri, &.{ .xmm_m64, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x1d }, 0, .vex_128, .f16c }, + .{ .vcvtps2ph, .mri, &.{ .xmm_m64, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x1d }, 0, .vex_128_w0, .f16c }, + .{ .vcvtps2ph, .mri, &.{ .xmm_m128, .ymm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x1d }, 0, .vex_256_w0, .f16c }, // FMA - .{ .vfmadd132pd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x98 }, 0, .vex_128_long, .fma }, - .{ .vfmadd132pd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x98 }, 0, .vex_256_long, .fma }, - .{ .vfmadd213pd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0xa8 }, 0, .vex_128_long, .fma }, - .{ .vfmadd213pd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0xa8 }, 0, .vex_256_long, .fma }, - .{ .vfmadd231pd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0xb8 }, 0, .vex_128_long, .fma }, - .{ .vfmadd231pd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0xb8 }, 0, .vex_256_long, .fma }, + .{ .vfmadd132pd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x98 }, 0, .vex_128_w1, .fma }, + .{ .vfmadd213pd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0xa8 }, 0, .vex_128_w1, .fma }, + .{ .vfmadd231pd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0xb8 }, 0, .vex_128_w1, .fma }, + .{ .vfmadd132pd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x98 }, 0, .vex_256_w1, .fma }, + .{ .vfmadd213pd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0xa8 }, 0, .vex_256_w1, .fma }, + .{ .vfmadd231pd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0xb8 }, 0, .vex_256_w1, .fma }, - .{ .vfmadd132ps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x98 }, 0, .vex_128, .fma }, - .{ .vfmadd132ps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x98 }, 0, .vex_256, .fma }, - .{ .vfmadd213ps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0xa8 }, 0, .vex_128, .fma }, - .{ .vfmadd213ps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0xa8 }, 0, .vex_256, .fma }, - .{ .vfmadd231ps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0xb8 }, 0, .vex_128, .fma }, - .{ .vfmadd231ps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0xb8 }, 0, .vex_256, .fma }, + .{ .vfmadd132ps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x98 }, 0, .vex_128_w0, .fma }, + .{ .vfmadd213ps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0xa8 }, 0, .vex_128_w0, .fma }, + .{ .vfmadd231ps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0xb8 }, 0, .vex_128_w0, .fma }, + .{ .vfmadd132ps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x98 }, 0, .vex_256_w0, .fma }, + .{ .vfmadd213ps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0xa8 }, 0, .vex_256_w0, .fma }, + .{ .vfmadd231ps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0xb8 }, 0, .vex_256_w0, .fma }, - .{ .vfmadd132sd, .rvm, &.{ .xmm, .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0x99 }, 0, .vex_128_long, .fma }, - .{ .vfmadd213sd, .rvm, &.{ .xmm, .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0xa9 }, 0, .vex_128_long, .fma }, - .{ .vfmadd231sd, .rvm, &.{ .xmm, .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0xb9 }, 0, .vex_128_long, .fma }, + .{ .vfmadd132sd, .rvm, &.{ .xmm, .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0x99 }, 0, .vex_lig_w1, .fma }, + .{ .vfmadd213sd, .rvm, &.{ .xmm, .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0xa9 }, 0, .vex_lig_w1, .fma }, + .{ .vfmadd231sd, .rvm, &.{ .xmm, .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0xb9 }, 0, .vex_lig_w1, .fma }, - .{ .vfmadd132ss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0x66, 0x0f, 0x38, 0x99 }, 0, .vex_128, .fma }, - .{ .vfmadd213ss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0x66, 0x0f, 0x38, 0xa9 }, 0, .vex_128, .fma }, - .{ .vfmadd231ss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0x66, 0x0f, 0x38, 0xb9 }, 0, .vex_128, .fma }, + .{ .vfmadd132ss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0x66, 0x0f, 0x38, 0x99 }, 0, .vex_lig_w0, .fma }, + .{ .vfmadd213ss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0x66, 0x0f, 0x38, 0xa9 }, 0, .vex_lig_w0, .fma }, + .{ .vfmadd231ss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0x66, 0x0f, 0x38, 0xb9 }, 0, .vex_lig_w0, .fma }, + + // AVX2 + .{ .vpsrlw, .rvm, &.{ .ymm, .ymm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd1 }, 0, .vex_256_wig, .avx2 }, + .{ .vpsrlw, .vmi, &.{ .ymm, .ymm, .imm8 }, &.{ 0x66, 0x0f, 0x71 }, 2, .vex_256_wig, .avx2 }, + .{ .vpsrld, .rvm, &.{ .ymm, .ymm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd2 }, 0, .vex_256_wig, .avx2 }, + .{ .vpsrld, .vmi, &.{ .ymm, .ymm, .imm8 }, &.{ 0x66, 0x0f, 0x72 }, 2, .vex_256_wig, .avx2 }, + .{ .vpsrlq, .rvm, &.{ .ymm, .ymm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd3 }, 0, .vex_256_wig, .avx2 }, + .{ .vpsrlq, .vmi, &.{ .ymm, .ymm, .imm8 }, &.{ 0x66, 0x0f, 0x73 }, 2, .vex_256_wig, .avx2 }, + + .{ .vpunpckhbw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x68 }, 0, .vex_256_wig, .avx2 }, + .{ .vpunpckhwd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x69 }, 0, .vex_256_wig, .avx2 }, + .{ .vpunpckhdq, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x6a }, 0, .vex_256_wig, .avx2 }, + .{ .vpunpckhqdq, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x6d }, 0, .vex_256_wig, .avx2 }, + + .{ .vpunpcklbw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x60 }, 0, .vex_256_wig, .avx2 }, + .{ .vpunpcklwd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x61 }, 0, .vex_256_wig, .avx2 }, + .{ .vpunpckldq, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x62 }, 0, .vex_256_wig, .avx2 }, + .{ .vpunpcklqdq, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x6c }, 0, .vex_256_wig, .avx2 }, }; // zig fmt: on diff --git a/test/behavior/floatop.zig b/test/behavior/floatop.zig index b98d782da1..ec24407d9f 100644 --- a/test/behavior/floatop.zig +++ b/test/behavior/floatop.zig @@ -52,7 +52,8 @@ fn testFloatComparisons() !void { } test "different sized float comparisons" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO + if (builtin.zig_backend == .stage2_x86_64 and + !comptime std.Target.x86.featureSetHas(builtin.cpu.features, .f16c)) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO