From 9bea854dc2af293cba1d000b31f6e82d9c431285 Mon Sep 17 00:00:00 2001 From: Jacob Young Date: Thu, 4 May 2023 03:36:28 -0400 Subject: [PATCH 01/20] x86_64: implement `@floor`, `@ceil`, and `@trunc` --- src/arch/x86_64/CodeGen.zig | 44 ++++++++++++++++++++++++++++++++++--- 1 file changed, 41 insertions(+), 3 deletions(-) diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index 97e672b71f..fbed0bcf96 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -1480,12 +1480,12 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { .log, .log2, .log10, - .floor, - .ceil, .round, - .trunc_float, => try self.airUnaryMath(inst), + .floor => try self.airRound(inst, Immediate.u(0b1_0_01)), + .ceil => try self.airRound(inst, Immediate.u(0b1_0_10)), + .trunc_float => try self.airRound(inst, Immediate.u(0b1_0_11)), .sqrt => try self.airSqrt(inst), .neg, .fabs => try self.airFloatSign(inst), @@ -4258,6 +4258,44 @@ fn airFloatSign(self: *Self, inst: Air.Inst.Index) !void { return self.finishAir(inst, dst_mcv, .{ un_op, .none, .none }); } +fn airRound(self: *Self, inst: Air.Inst.Index, mode: Immediate) !void { + const un_op = self.air.instructions.items(.data)[inst].un_op; + const ty = self.air.typeOf(un_op); + + if (!Target.x86.featureSetHas(self.target.cpu.features, .sse4_1)) + return self.fail("TODO implement airRound without sse4_1 feature", .{}); + + const src_mcv = try self.resolveInst(un_op); + const dst_mcv = if (src_mcv.isRegister() and self.reuseOperand(inst, un_op, 0, src_mcv)) + src_mcv + else + try self.copyToRegisterWithInstTracking(inst, ty, src_mcv); + + const mir_tag: Mir.Inst.Tag = switch (ty.zigTypeTag()) { + .Float => switch (ty.floatBits(self.target.*)) { + 32 => .roundss, + 64 => .roundsd, + else => return self.fail("TODO implement airRound for {}", .{ + ty.fmt(self.bin_file.options.module.?), + }), + }, + else => return self.fail("TODO implement airRound for {}", .{ + ty.fmt(self.bin_file.options.module.?), + }), + }; + assert(dst_mcv.isRegister()); + if (src_mcv.isRegister()) + try self.asmRegisterRegisterImmediate(mir_tag, dst_mcv.getReg().?, src_mcv.getReg().?, mode) + else + try self.asmRegisterMemoryImmediate( + mir_tag, + dst_mcv.getReg().?, + src_mcv.mem(Memory.PtrSize.fromSize(@intCast(u32, ty.abiSize(self.target.*)))), + mode, + ); + return self.finishAir(inst, dst_mcv, .{ un_op, .none, .none }); +} + fn airSqrt(self: *Self, inst: Air.Inst.Index) !void { const un_op = self.air.instructions.items(.data)[inst].un_op; const ty = self.air.typeOf(un_op); From 1a261917ce41efb49fe41ea0c6d9083212c17797 Mon Sep 17 00:00:00 2001 From: Jacob Young Date: Thu, 4 May 2023 03:36:04 -0400 Subject: [PATCH 02/20] x86_64: implement `@ctz` and `@clz` for `u128` --- src/arch/x86_64/CodeGen.zig | 93 ++++++++++++++++++++++++++++--------- test/behavior/bugs/2114.zig | 3 +- test/behavior/math.zig | 3 +- 3 files changed, 74 insertions(+), 25 deletions(-) diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index fbed0bcf96..55b18985da 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -3798,19 +3798,38 @@ fn airClz(self: *Self, inst: Air.Inst.Index) !void { const dst_reg = try self.register_manager.allocReg(inst, gp); const dst_mcv = MCValue{ .register = dst_reg }; - const dst_lock = self.register_manager.lockReg(dst_reg); - defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); + const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg); + defer self.register_manager.unlockReg(dst_lock); + const src_bits = src_ty.bitSize(self.target.*); if (Target.x86.featureSetHas(self.target.cpu.features, .lzcnt)) { - try self.genBinOpMir(.lzcnt, src_ty, dst_mcv, mat_src_mcv); - const extra_bits = self.regExtraBits(src_ty); - if (extra_bits > 0) { - try self.genBinOpMir(.sub, dst_ty, dst_mcv, .{ .immediate = extra_bits }); - } + if (src_bits <= 64) { + try self.genBinOpMir(.lzcnt, src_ty, dst_mcv, mat_src_mcv); + + const extra_bits = self.regExtraBits(src_ty); + if (extra_bits > 0) { + try self.genBinOpMir(.sub, dst_ty, dst_mcv, .{ .immediate = extra_bits }); + } + } else if (src_bits <= 128) { + const tmp_reg = try self.register_manager.allocReg(null, gp); + const tmp_mcv = MCValue{ .register = tmp_reg }; + const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); + defer self.register_manager.unlockReg(tmp_lock); + + try self.genBinOpMir(.lzcnt, Type.u64, dst_mcv, mat_src_mcv); + try self.genBinOpMir(.add, dst_ty, dst_mcv, .{ .immediate = 64 }); + try self.genBinOpMir(.lzcnt, Type.u64, tmp_mcv, mat_src_mcv.address().offset(8).deref()); + try self.asmCmovccRegisterRegister(dst_reg.to32(), tmp_reg.to32(), .nc); + + if (src_bits < 128) { + try self.genBinOpMir(.sub, dst_ty, dst_mcv, .{ .immediate = 128 - src_bits }); + } + } else return self.fail("TODO airClz of {}", .{src_ty.fmt(self.bin_file.options.module.?)}); break :result dst_mcv; } - const src_bits = src_ty.bitSize(self.target.*); + if (src_bits > 64) + return self.fail("TODO airClz of {}", .{src_ty.fmt(self.bin_file.options.module.?)}); if (math.isPowerOfTwo(src_bits)) { const imm_reg = try self.copyToTmpRegister(dst_ty, .{ .immediate = src_bits ^ (src_bits - 1), @@ -3870,24 +3889,52 @@ fn airCtz(self: *Self, inst: Air.Inst.Index) !void { defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); if (Target.x86.featureSetHas(self.target.cpu.features, .bmi)) { - const extra_bits = self.regExtraBits(src_ty); - const masked_mcv = if (extra_bits > 0) masked: { - const mask_mcv = MCValue{ - .immediate = ((@as(u64, 1) << @intCast(u6, extra_bits)) - 1) << - @intCast(u6, src_bits), - }; - const tmp_mcv = tmp: { - if (src_mcv.isImmediate() or self.liveness.operandDies(inst, 0)) break :tmp src_mcv; - try self.genSetReg(dst_reg, src_ty, src_mcv); - break :tmp dst_mcv; - }; - try self.genBinOpMir(.@"or", src_ty, tmp_mcv, mask_mcv); - break :masked tmp_mcv; - } else mat_src_mcv; - try self.genBinOpMir(.tzcnt, src_ty, dst_mcv, masked_mcv); + if (src_bits <= 64) { + const extra_bits = self.regExtraBits(src_ty); + const masked_mcv = if (extra_bits > 0) masked: { + const tmp_mcv = tmp: { + if (src_mcv.isImmediate() or self.liveness.operandDies(inst, 0)) + break :tmp src_mcv; + try self.genSetReg(dst_reg, src_ty, src_mcv); + break :tmp dst_mcv; + }; + try self.genBinOpMir( + .@"or", + src_ty, + tmp_mcv, + .{ .immediate = (@as(u64, math.maxInt(u64)) >> @intCast(u6, 64 - extra_bits)) << + @intCast(u6, src_bits) }, + ); + break :masked tmp_mcv; + } else mat_src_mcv; + try self.genBinOpMir(.tzcnt, src_ty, dst_mcv, masked_mcv); + } else if (src_bits <= 128) { + const tmp_reg = try self.register_manager.allocReg(null, gp); + const tmp_mcv = MCValue{ .register = tmp_reg }; + const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); + defer self.register_manager.unlockReg(tmp_lock); + + const masked_mcv = if (src_bits < 128) masked: { + try self.genCopy(Type.u64, dst_mcv, mat_src_mcv.address().offset(8).deref()); + try self.genBinOpMir( + .@"or", + Type.u64, + dst_mcv, + .{ .immediate = @as(u64, math.maxInt(u64)) << @intCast(u6, src_bits - 64) }, + ); + break :masked dst_mcv; + } else mat_src_mcv.address().offset(8).deref(); + try self.genBinOpMir(.tzcnt, Type.u64, dst_mcv, masked_mcv); + try self.genBinOpMir(.add, dst_ty, dst_mcv, .{ .immediate = 64 }); + try self.genBinOpMir(.tzcnt, Type.u64, tmp_mcv, mat_src_mcv); + try self.asmCmovccRegisterRegister(dst_reg.to32(), tmp_reg.to32(), .nc); + } else return self.fail("TODO airCtz of {}", .{src_ty.fmt(self.bin_file.options.module.?)}); break :result dst_mcv; } + if (src_bits > 64) + return self.fail("TODO airCtz of {}", .{src_ty.fmt(self.bin_file.options.module.?)}); + const width_reg = try self.copyToTmpRegister(dst_ty, .{ .immediate = src_bits }); try self.genBinOpMir(.bsf, src_ty, dst_mcv, mat_src_mcv); diff --git a/test/behavior/bugs/2114.zig b/test/behavior/bugs/2114.zig index f92728eff6..3ad4a97b80 100644 --- a/test/behavior/bugs/2114.zig +++ b/test/behavior/bugs/2114.zig @@ -9,7 +9,8 @@ fn ctz(x: anytype) usize { test "fixed" { if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO + if (builtin.zig_backend == .stage2_x86_64 and + !comptime std.Target.x86.featureSetHas(builtin.cpu.features, .bmi)) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO diff --git a/test/behavior/math.zig b/test/behavior/math.zig index f9c9f43927..0362bd3a2b 100644 --- a/test/behavior/math.zig +++ b/test/behavior/math.zig @@ -77,7 +77,8 @@ fn testClz() !void { } test "@clz big ints" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO + if (builtin.zig_backend == .stage2_x86_64 and + !comptime std.Target.x86.featureSetHas(builtin.cpu.features, .lzcnt)) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO From 32ab930f1d39c374265ae14f1de9d837dcd7f650 Mon Sep 17 00:00:00 2001 From: Jacob Young Date: Fri, 5 May 2023 01:32:39 -0400 Subject: [PATCH 03/20] x86_64: implement f16 conversions when supported --- src/arch/x86_64/CodeGen.zig | 66 +- src/arch/x86_64/Encoding.zig | 95 +- src/arch/x86_64/Lower.zig | 3 + src/arch/x86_64/Mir.zig | 5 + src/arch/x86_64/encoder.zig | 160 +++- src/arch/x86_64/encodings.zig | 1606 +++++++++++++++++---------------- test/behavior/vector.zig | 3 +- 7 files changed, 1081 insertions(+), 857 deletions(-) diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index 55b18985da..b7fd81db68 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -2172,12 +2172,9 @@ fn airRetPtr(self: *Self, inst: Air.Inst.Index) !void { fn airFptrunc(self: *Self, inst: Air.Inst.Index) !void { const ty_op = self.air.instructions.items(.data)[inst].ty_op; const dst_ty = self.air.typeOfIndex(inst); + const dst_bits = dst_ty.floatBits(self.target.*); const src_ty = self.air.typeOf(ty_op.operand); - if (dst_ty.floatBits(self.target.*) != 32 or src_ty.floatBits(self.target.*) != 64 or - !Target.x86.featureSetHas(self.target.cpu.features, .sse2)) - return self.fail("TODO implement airFptrunc from {} to {}", .{ - src_ty.fmt(self.bin_file.options.module.?), dst_ty.fmt(self.bin_file.options.module.?), - }); + const src_bits = src_ty.floatBits(self.target.*); const src_mcv = try self.resolveInst(ty_op.operand); const dst_mcv = if (src_mcv.isRegister() and self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) @@ -2187,19 +2184,32 @@ fn airFptrunc(self: *Self, inst: Air.Inst.Index) !void { const dst_lock = self.register_manager.lockReg(dst_mcv.register); defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); - try self.genBinOpMir(.cvtsd2ss, src_ty, dst_mcv, src_mcv); + if (src_bits == 32 and dst_bits == 16 and self.hasFeature(.f16c)) + try self.asmRegisterRegisterImmediate( + .vcvtps2ph, + dst_mcv.register, + if (src_mcv.isRegister()) src_mcv.getReg().? else src_reg: { + const src_reg = dst_mcv.register; + try self.genSetReg(src_reg, src_ty, src_mcv); + break :src_reg src_reg; + }, + Immediate.u(0b1_00), + ) + else if (src_bits == 64 and dst_bits == 32) + try self.genBinOpMir(.cvtsd2ss, src_ty, dst_mcv, src_mcv) + else + return self.fail("TODO implement airFptrunc from {} to {}", .{ + src_ty.fmt(self.bin_file.options.module.?), dst_ty.fmt(self.bin_file.options.module.?), + }); return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none }); } fn airFpext(self: *Self, inst: Air.Inst.Index) !void { const ty_op = self.air.instructions.items(.data)[inst].ty_op; const dst_ty = self.air.typeOfIndex(inst); + const dst_bits = dst_ty.floatBits(self.target.*); const src_ty = self.air.typeOf(ty_op.operand); - if (dst_ty.floatBits(self.target.*) != 64 or src_ty.floatBits(self.target.*) != 32 or - !Target.x86.featureSetHas(self.target.cpu.features, .sse2)) - return self.fail("TODO implement airFpext from {} to {}", .{ - src_ty.fmt(self.bin_file.options.module.?), dst_ty.fmt(self.bin_file.options.module.?), - }); + const src_bits = src_ty.floatBits(self.target.*); const src_mcv = try self.resolveInst(ty_op.operand); const dst_mcv = if (src_mcv.isRegister() and self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) @@ -2209,7 +2219,19 @@ fn airFpext(self: *Self, inst: Air.Inst.Index) !void { const dst_lock = self.register_manager.lockReg(dst_mcv.register); defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); - try self.genBinOpMir(.cvtss2sd, src_ty, dst_mcv, src_mcv); + try self.genBinOpMir( + if (src_bits == 16 and dst_bits == 32 and self.hasFeature(.f16c)) + .vcvtph2ps + else if (src_bits == 32 and dst_bits == 64) + .cvtss2sd + else + return self.fail("TODO implement airFpext from {} to {}", .{ + src_ty.fmt(self.bin_file.options.module.?), dst_ty.fmt(self.bin_file.options.module.?), + }), + src_ty, + dst_mcv, + src_mcv, + ); return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none }); } @@ -3802,7 +3824,7 @@ fn airClz(self: *Self, inst: Air.Inst.Index) !void { defer self.register_manager.unlockReg(dst_lock); const src_bits = src_ty.bitSize(self.target.*); - if (Target.x86.featureSetHas(self.target.cpu.features, .lzcnt)) { + if (self.hasFeature(.lzcnt)) { if (src_bits <= 64) { try self.genBinOpMir(.lzcnt, src_ty, dst_mcv, mat_src_mcv); @@ -3888,7 +3910,7 @@ fn airCtz(self: *Self, inst: Air.Inst.Index) !void { const dst_lock = self.register_manager.lockReg(dst_reg); defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); - if (Target.x86.featureSetHas(self.target.cpu.features, .bmi)) { + if (self.hasFeature(.bmi)) { if (src_bits <= 64) { const extra_bits = self.regExtraBits(src_ty); const masked_mcv = if (extra_bits > 0) masked: { @@ -3956,7 +3978,7 @@ fn airPopcount(self: *Self, inst: Air.Inst.Index) !void { const src_abi_size = @intCast(u32, src_ty.abiSize(self.target.*)); const src_mcv = try self.resolveInst(ty_op.operand); - if (Target.x86.featureSetHas(self.target.cpu.features, .popcnt)) { + if (self.hasFeature(.popcnt)) { const mat_src_mcv = switch (src_mcv) { .immediate => MCValue{ .register = try self.copyToTmpRegister(src_ty, src_mcv) }, else => src_mcv, @@ -4309,7 +4331,7 @@ fn airRound(self: *Self, inst: Air.Inst.Index, mode: Immediate) !void { const un_op = self.air.instructions.items(.data)[inst].un_op; const ty = self.air.typeOf(un_op); - if (!Target.x86.featureSetHas(self.target.cpu.features, .sse4_1)) + if (!self.hasFeature(.sse4_1)) return self.fail("TODO implement airRound without sse4_1 feature", .{}); const src_mcv = try self.resolveInst(un_op); @@ -5712,7 +5734,7 @@ fn genBinOp( => {}, .div_trunc, .div_floor, - => if (Target.x86.featureSetHas(self.target.cpu.features, .sse4_1)) { + => if (self.hasFeature(.sse4_1)) { const abi_size = @intCast(u32, lhs_ty.abiSize(self.target.*)); const dst_alias = registerAlias(dst_mcv.register, abi_size); try self.asmRegisterRegisterImmediate(switch (lhs_ty.floatBits(self.target.*)) { @@ -9593,3 +9615,13 @@ fn regBitSize(self: *Self, ty: Type) u64 { fn regExtraBits(self: *Self, ty: Type) u64 { return self.regBitSize(ty) - ty.bitSize(self.target.*); } + +fn hasFeature(self: *Self, feature: Target.x86.Feature) bool { + return Target.x86.featureSetHas(self.target.cpu.features, feature); +} +fn hasAnyFeatures(self: *Self, features: anytype) bool { + return Target.x86.featureSetHasAny(self.target.cpu.features, features); +} +fn hasAllFeatures(self: *Self, features: anytype) bool { + return Target.x86.featureSetHasAll(self.target.cpu.features, features); +} diff --git a/src/arch/x86_64/Encoding.zig b/src/arch/x86_64/Encoding.zig index 944fe85458..05c48ecddf 100644 --- a/src/arch/x86_64/Encoding.zig +++ b/src/arch/x86_64/Encoding.zig @@ -23,6 +23,7 @@ const Data = struct { opc: [7]u8, modrm_ext: u3, mode: Mode, + feature: Feature, }; pub fn findByMnemonic( @@ -58,7 +59,7 @@ pub fn findByMnemonic( next: for (mnemonic_to_encodings_map[@enumToInt(mnemonic)]) |data| { switch (data.mode) { .rex => if (!rex_required) continue, - .long, .sse_long, .sse2_long => {}, + .long => {}, else => if (rex_required) continue, } for (input_ops, data.ops) |input_op, data_op| @@ -136,22 +137,20 @@ pub fn modRmExt(encoding: Encoding) u3 { } pub fn operandBitSize(encoding: Encoding) u64 { - switch (encoding.data.mode) { - .short => return 16, - .long, .sse_long, .sse2_long => return 64, - else => {}, - } - const bit_size: u64 = switch (encoding.data.op_en) { - .np => switch (encoding.data.ops[0]) { - .o16 => 16, - .o32 => 32, - .o64 => 64, - else => 32, + return switch (encoding.data.mode) { + .short => 16, + .long => 64, + else => switch (encoding.data.op_en) { + .np => switch (encoding.data.ops[0]) { + .o16 => 16, + .o32 => 32, + .o64 => 64, + else => 32, + }, + .td => encoding.data.ops[1].bitSize(), + else => encoding.data.ops[0].bitSize(), }, - .td => encoding.data.ops[1].bitSize(), - else => encoding.data.ops[0].bitSize(), }; - return bit_size; } pub fn format( @@ -162,12 +161,50 @@ pub fn format( ) !void { _ = options; _ = fmt; + + var opc = encoding.opcode(); switch (encoding.data.mode) { - .long, .sse_long, .sse2_long => try writer.writeAll("REX.W + "), else => {}, + .long => try writer.writeAll("REX.W + "), + .vex_128, .vex_128_long, .vex_256, .vex_256_long => { + try writer.writeAll("VEX."); + + switch (encoding.data.mode) { + .vex_128, .vex_128_long => try writer.writeAll("128"), + .vex_256, .vex_256_long => try writer.writeAll("256"), + else => unreachable, + } + + switch (opc[0]) { + else => {}, + 0x66, 0xf3, 0xf2 => { + try writer.print(".{X:0>2}", .{opc[0]}); + opc = opc[1..]; + }, + } + + try writer.print(".{X:0>2}", .{opc[0]}); + opc = opc[1..]; + + switch (opc[0]) { + else => {}, + 0x38, 0x3A => { + try writer.print("{X:0>2}", .{opc[0]}); + opc = opc[1..]; + }, + } + + try writer.writeByte('.'); + try writer.writeAll(switch (encoding.data.mode) { + .vex_128, .vex_256 => "W0", + .vex_128_long, .vex_256_long => "W1", + else => unreachable, + }); + try writer.writeByte(' '); + }, } - for (encoding.opcode()) |byte| { + for (opc) |byte| { try writer.print("{x:0>2} ", .{byte}); } @@ -184,15 +221,16 @@ pub fn format( try writer.print("+{s} ", .{tag}); }, .m, .mi, .m1, .mc => try writer.print("/{d} ", .{encoding.modRmExt()}), - .mr, .rm, .rmi, .mri, .mrc => try writer.writeAll("/r "), + .mr, .rm, .rmi, .mri, .mrc, .rrm, .rrmi => try writer.writeAll("/r "), } switch (encoding.data.op_en) { - .i, .d, .zi, .oi, .mi, .rmi, .mri => { + .i, .d, .zi, .oi, .mi, .rmi, .mri, .rrmi => { const op = switch (encoding.data.op_en) { .i, .d => encoding.data.ops[0], .zi, .oi, .mi => encoding.data.ops[1], .rmi, .mri => encoding.data.ops[2], + .rrmi => encoding.data.ops[3], else => unreachable, }; const tag = switch (op) { @@ -207,7 +245,7 @@ pub fn format( }; try writer.print("{s} ", .{tag}); }, - .np, .fd, .td, .o, .m, .m1, .mc, .mr, .rm, .mrc => {}, + .np, .fd, .td, .o, .m, .m1, .mc, .mr, .rm, .mrc, .rrm => {}, } try writer.print("{s} ", .{@tagName(encoding.mnemonic)}); @@ -305,6 +343,8 @@ pub const Mnemonic = enum { // SSE4.1 roundss, roundsd, + // F16C + vcvtph2ps, vcvtps2ph, // zig fmt: on }; @@ -317,6 +357,7 @@ pub const OpEn = enum { fd, td, m1, mc, mi, mr, rm, rmi, mri, mrc, + rrm, rrmi, // zig fmt: on }; @@ -549,14 +590,21 @@ pub const Op = enum { pub const Mode = enum { none, short, - fpu, rex, long, + vex_128, + vex_128_long, + vex_256, + vex_256_long, +}; + +pub const Feature = enum { + none, + f16c, sse, - sse_long, sse2, - sse2_long, sse4_1, + x87, }; fn estimateInstructionLength(prefix: Prefix, encoding: Encoding, ops: []const Operand) usize { @@ -593,6 +641,7 @@ const mnemonic_to_encodings_map = init: { .opc = undefined, .modrm_ext = entry[4], .mode = entry[5], + .feature = entry[6], }; // TODO: use `@memcpy` for these. When I did that, I got a false positive // compile error for this copy happening at compile time. diff --git a/src/arch/x86_64/Lower.zig b/src/arch/x86_64/Lower.zig index 4289cfaf2a..9571f50e7c 100644 --- a/src/arch/x86_64/Lower.zig +++ b/src/arch/x86_64/Lower.zig @@ -133,6 +133,9 @@ pub fn lowerMir(lower: *Lower, inst: Mir.Inst) Error![]const Instruction { .subsd, .ucomisd, .xorpd, + + .vcvtph2ps, + .vcvtps2ph, => try lower.mirGeneric(inst), .cmps, diff --git a/src/arch/x86_64/Mir.zig b/src/arch/x86_64/Mir.zig index 6b2db1b696..c4e19fdc0e 100644 --- a/src/arch/x86_64/Mir.zig +++ b/src/arch/x86_64/Mir.zig @@ -247,6 +247,11 @@ pub const Inst = struct { /// Bitwise logical xor of packed double precision floating-point values xorpd, + /// Convert 16-bit floating-point values to single-precision floating-point values + vcvtph2ps, + /// Convert single-precision floating-point values to 16-bit floating-point values + vcvtps2ph, + /// Compare string operands cmps, /// Load string diff --git a/src/arch/x86_64/encoder.zig b/src/arch/x86_64/encoder.zig index 4c900697f5..94f4eb56d5 100644 --- a/src/arch/x86_64/encoder.zig +++ b/src/arch/x86_64/encoder.zig @@ -209,10 +209,19 @@ pub const Instruction = struct { const enc = inst.encoding; const data = enc.data; - try inst.encodeLegacyPrefixes(encoder); - try inst.encodeMandatoryPrefix(encoder); - try inst.encodeRexPrefix(encoder); - try inst.encodeOpcode(encoder); + switch (data.mode) { + .none, .short, .rex, .long => { + try inst.encodeLegacyPrefixes(encoder); + try inst.encodeMandatoryPrefix(encoder); + try inst.encodeRexPrefix(encoder); + try inst.encodeOpcode(encoder); + }, + .vex_128, .vex_128_long, .vex_256, .vex_256_long => { + try inst.encodeVexPrefix(encoder); + const opc = inst.encoding.opcode(); + try encoder.opcode_1byte(opc[opc.len - 1]); + }, + } switch (data.op_en) { .np, .o => {}, @@ -309,6 +318,7 @@ pub const Instruction = struct { } else null, + .rrm, .rrmi => unreachable, }; if (segment_override) |seg| { legacy.setSegmentOverride(seg); @@ -322,10 +332,7 @@ pub const Instruction = struct { var rex = Rex{}; rex.present = inst.encoding.data.mode == .rex; - switch (inst.encoding.data.mode) { - .long, .sse_long, .sse2_long => rex.w = true, - else => {}, - } + rex.w = inst.encoding.data.mode == .long; switch (op_en) { .np, .i, .zi, .fd, .td, .d => {}, @@ -346,11 +353,76 @@ pub const Instruction = struct { rex.b = b_x_op.isBaseExtended(); rex.x = b_x_op.isIndexExtended(); }, + .rrm, .rrmi => unreachable, } try encoder.rex(rex); } + fn encodeVexPrefix(inst: Instruction, encoder: anytype) !void { + const op_en = inst.encoding.data.op_en; + const opc = inst.encoding.opcode(); + const mand_pre = inst.encoding.mandatoryPrefix(); + + var vex = Vex{}; + + vex.w = switch (inst.encoding.data.mode) { + .vex_128, .vex_256 => false, + .vex_128_long, .vex_256_long => true, + else => unreachable, + }; + + switch (op_en) { + .np, .i, .zi, .fd, .td, .d => {}, + .o, .oi => vex.b = inst.ops[0].reg.isExtended(), + .m, .mi, .m1, .mc, .mr, .rm, .rmi, .mri, .mrc, .rrm, .rrmi => { + const r_op = switch (op_en) { + .rm, .rmi, .rrm, .rrmi => inst.ops[0], + .mr, .mri, .mrc => inst.ops[1], + else => .none, + }; + vex.r = r_op.isBaseExtended(); + + const b_x_op = switch (op_en) { + .rm, .rmi => inst.ops[1], + .m, .mi, .m1, .mc, .mr, .mri, .mrc => inst.ops[0], + .rrm, .rrmi => inst.ops[2], + else => unreachable, + }; + vex.b = b_x_op.isBaseExtended(); + vex.x = b_x_op.isIndexExtended(); + }, + } + + vex.l = switch (inst.encoding.data.mode) { + .vex_128, .vex_128_long => false, + .vex_256, .vex_256_long => true, + else => unreachable, + }; + + vex.p = if (mand_pre) |mand| switch (mand) { + 0x66 => .@"66", + 0xf2 => .f2, + 0xf3 => .f3, + else => unreachable, + } else .none; + + const leading: usize = if (mand_pre) |_| 1 else 0; + assert(opc[leading] == 0x0f); + vex.m = switch (opc[leading + 1]) { + else => .@"0f", + 0x38 => .@"0f38", + 0x3a => .@"0f3a", + }; + + switch (op_en) { + else => {}, + .rrm, .rrmi => vex.v = inst.ops[1].reg, + } + + try encoder.vex(vex); + } + fn encodeMandatoryPrefix(inst: Instruction, encoder: anytype) !void { const prefix = inst.encoding.mandatoryPrefix() orelse return; try encoder.opcode_1byte(prefix); @@ -562,17 +634,48 @@ fn Encoder(comptime T: type, comptime opts: Options) type { /// or one of reg, index, r/m, base, or opcode-reg might be extended. /// /// See struct `Rex` for a description of each field. - pub fn rex(self: Self, byte: Rex) !void { - if (!byte.present and !byte.isSet()) return; + pub fn rex(self: Self, fields: Rex) !void { + if (!fields.present and !fields.isSet()) return; - var value: u8 = 0b0100_0000; + var byte: u8 = 0b0100_0000; - if (byte.w) value |= 0b1000; - if (byte.r) value |= 0b0100; - if (byte.x) value |= 0b0010; - if (byte.b) value |= 0b0001; + if (fields.w) byte |= 0b1000; + if (fields.r) byte |= 0b0100; + if (fields.x) byte |= 0b0010; + if (fields.b) byte |= 0b0001; - try self.writer.writeByte(value); + try self.writer.writeByte(byte); + } + + /// Encodes a VEX prefix given all the fields + /// + /// See struct `Vex` for a description of each field. + pub fn vex(self: Self, fields: Vex) !void { + if (fields.is3Byte()) { + try self.writer.writeByte(0b1100_0100); + + try self.writer.writeByte( + @as(u8, ~@boolToInt(fields.r)) << 7 | + @as(u8, ~@boolToInt(fields.x)) << 6 | + @as(u8, ~@boolToInt(fields.b)) << 5 | + @as(u8, @enumToInt(fields.m)) << 0, + ); + + try self.writer.writeByte( + @as(u8, @boolToInt(fields.w)) << 7 | + @as(u8, ~fields.v.enc()) << 3 | + @as(u8, @boolToInt(fields.l)) << 2 | + @as(u8, @enumToInt(fields.p)) << 0, + ); + } else { + try self.writer.writeByte(0b1100_0101); + try self.writer.writeByte( + @as(u8, ~@boolToInt(fields.r)) << 7 | + @as(u8, ~fields.v.enc()) << 3 | + @as(u8, @boolToInt(fields.l)) << 2 | + @as(u8, @enumToInt(fields.p)) << 0, + ); + } } // ------ @@ -848,6 +951,31 @@ pub const Rex = struct { } }; +pub const Vex = struct { + w: bool = false, + r: bool = false, + x: bool = false, + b: bool = false, + l: bool = false, + p: enum(u2) { + none = 0b00, + @"66" = 0b01, + f3 = 0b10, + f2 = 0b11, + } = .none, + m: enum(u5) { + @"0f" = 0b0_0001, + @"0f38" = 0b0_0010, + @"0f3a" = 0b0_0011, + _, + } = .@"0f", + v: Register = .ymm0, + + pub fn is3Byte(vex: Vex) bool { + return vex.w or vex.x or vex.b or vex.m != .@"0f"; + } +}; + // Tests fn expectEqualHexStrings(expected: []const u8, given: []const u8, assembly: []const u8) !void { assert(expected.len > 0); diff --git a/src/arch/x86_64/encodings.zig b/src/arch/x86_64/encodings.zig index f87a110e99..52b8cc29d6 100644 --- a/src/arch/x86_64/encodings.zig +++ b/src/arch/x86_64/encodings.zig @@ -3,933 +3,939 @@ const Mnemonic = Encoding.Mnemonic; const OpEn = Encoding.OpEn; const Op = Encoding.Op; const Mode = Encoding.Mode; +const Feature = Encoding.Feature; const modrm_ext = u3; -pub const Entry = struct { Mnemonic, OpEn, []const Op, []const u8, modrm_ext, Mode }; +pub const Entry = struct { Mnemonic, OpEn, []const Op, []const u8, modrm_ext, Mode, Feature }; // TODO move this into a .zon file when Zig is capable of importing .zon files // zig fmt: off pub const table = [_]Entry{ // General-purpose - .{ .adc, .zi, &.{ .al, .imm8 }, &.{ 0x14 }, 0, .none }, - .{ .adc, .zi, &.{ .ax, .imm16 }, &.{ 0x15 }, 0, .none }, - .{ .adc, .zi, &.{ .eax, .imm32 }, &.{ 0x15 }, 0, .none }, - .{ .adc, .zi, &.{ .rax, .imm32s }, &.{ 0x15 }, 0, .long }, - .{ .adc, .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 2, .none }, - .{ .adc, .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 2, .rex }, - .{ .adc, .mi, &.{ .rm16, .imm16 }, &.{ 0x81 }, 2, .none }, - .{ .adc, .mi, &.{ .rm32, .imm32 }, &.{ 0x81 }, 2, .none }, - .{ .adc, .mi, &.{ .rm64, .imm32s }, &.{ 0x81 }, 2, .long }, - .{ .adc, .mi, &.{ .rm16, .imm8s }, &.{ 0x83 }, 2, .none }, - .{ .adc, .mi, &.{ .rm32, .imm8s }, &.{ 0x83 }, 2, .none }, - .{ .adc, .mi, &.{ .rm64, .imm8s }, &.{ 0x83 }, 2, .long }, - .{ .adc, .mr, &.{ .rm8, .r8 }, &.{ 0x10 }, 0, .none }, - .{ .adc, .mr, &.{ .rm8, .r8 }, &.{ 0x10 }, 0, .rex }, - .{ .adc, .mr, &.{ .rm16, .r16 }, &.{ 0x11 }, 0, .none }, - .{ .adc, .mr, &.{ .rm32, .r32 }, &.{ 0x11 }, 0, .none }, - .{ .adc, .mr, &.{ .rm64, .r64 }, &.{ 0x11 }, 0, .long }, - .{ .adc, .rm, &.{ .r8, .rm8 }, &.{ 0x12 }, 0, .none }, - .{ .adc, .rm, &.{ .r8, .rm8 }, &.{ 0x12 }, 0, .rex }, - .{ .adc, .rm, &.{ .r16, .rm16 }, &.{ 0x13 }, 0, .none }, - .{ .adc, .rm, &.{ .r32, .rm32 }, &.{ 0x13 }, 0, .none }, - .{ .adc, .rm, &.{ .r64, .rm64 }, &.{ 0x13 }, 0, .long }, + .{ .adc, .zi, &.{ .al, .imm8 }, &.{ 0x14 }, 0, .none, .none }, + .{ .adc, .zi, &.{ .ax, .imm16 }, &.{ 0x15 }, 0, .none, .none }, + .{ .adc, .zi, &.{ .eax, .imm32 }, &.{ 0x15 }, 0, .none, .none }, + .{ .adc, .zi, &.{ .rax, .imm32s }, &.{ 0x15 }, 0, .long, .none }, + .{ .adc, .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 2, .none, .none }, + .{ .adc, .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 2, .rex, .none }, + .{ .adc, .mi, &.{ .rm16, .imm16 }, &.{ 0x81 }, 2, .none, .none }, + .{ .adc, .mi, &.{ .rm32, .imm32 }, &.{ 0x81 }, 2, .none, .none }, + .{ .adc, .mi, &.{ .rm64, .imm32s }, &.{ 0x81 }, 2, .long, .none }, + .{ .adc, .mi, &.{ .rm16, .imm8s }, &.{ 0x83 }, 2, .none, .none }, + .{ .adc, .mi, &.{ .rm32, .imm8s }, &.{ 0x83 }, 2, .none, .none }, + .{ .adc, .mi, &.{ .rm64, .imm8s }, &.{ 0x83 }, 2, .long, .none }, + .{ .adc, .mr, &.{ .rm8, .r8 }, &.{ 0x10 }, 0, .none, .none }, + .{ .adc, .mr, &.{ .rm8, .r8 }, &.{ 0x10 }, 0, .rex, .none }, + .{ .adc, .mr, &.{ .rm16, .r16 }, &.{ 0x11 }, 0, .none, .none }, + .{ .adc, .mr, &.{ .rm32, .r32 }, &.{ 0x11 }, 0, .none, .none }, + .{ .adc, .mr, &.{ .rm64, .r64 }, &.{ 0x11 }, 0, .long, .none }, + .{ .adc, .rm, &.{ .r8, .rm8 }, &.{ 0x12 }, 0, .none, .none }, + .{ .adc, .rm, &.{ .r8, .rm8 }, &.{ 0x12 }, 0, .rex, .none }, + .{ .adc, .rm, &.{ .r16, .rm16 }, &.{ 0x13 }, 0, .none, .none }, + .{ .adc, .rm, &.{ .r32, .rm32 }, &.{ 0x13 }, 0, .none, .none }, + .{ .adc, .rm, &.{ .r64, .rm64 }, &.{ 0x13 }, 0, .long, .none }, - .{ .add, .zi, &.{ .al, .imm8 }, &.{ 0x04 }, 0, .none }, - .{ .add, .zi, &.{ .ax, .imm16 }, &.{ 0x05 }, 0, .none }, - .{ .add, .zi, &.{ .eax, .imm32 }, &.{ 0x05 }, 0, .none }, - .{ .add, .zi, &.{ .rax, .imm32s }, &.{ 0x05 }, 0, .long }, - .{ .add, .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 0, .none }, - .{ .add, .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 0, .rex }, - .{ .add, .mi, &.{ .rm16, .imm16 }, &.{ 0x81 }, 0, .none }, - .{ .add, .mi, &.{ .rm32, .imm32 }, &.{ 0x81 }, 0, .none }, - .{ .add, .mi, &.{ .rm64, .imm32s }, &.{ 0x81 }, 0, .long }, - .{ .add, .mi, &.{ .rm16, .imm8s }, &.{ 0x83 }, 0, .none }, - .{ .add, .mi, &.{ .rm32, .imm8s }, &.{ 0x83 }, 0, .none }, - .{ .add, .mi, &.{ .rm64, .imm8s }, &.{ 0x83 }, 0, .long }, - .{ .add, .mr, &.{ .rm8, .r8 }, &.{ 0x00 }, 0, .none }, - .{ .add, .mr, &.{ .rm8, .r8 }, &.{ 0x00 }, 0, .rex }, - .{ .add, .mr, &.{ .rm16, .r16 }, &.{ 0x01 }, 0, .none }, - .{ .add, .mr, &.{ .rm32, .r32 }, &.{ 0x01 }, 0, .none }, - .{ .add, .mr, &.{ .rm64, .r64 }, &.{ 0x01 }, 0, .long }, - .{ .add, .rm, &.{ .r8, .rm8 }, &.{ 0x02 }, 0, .none }, - .{ .add, .rm, &.{ .r8, .rm8 }, &.{ 0x02 }, 0, .rex }, - .{ .add, .rm, &.{ .r16, .rm16 }, &.{ 0x03 }, 0, .none }, - .{ .add, .rm, &.{ .r32, .rm32 }, &.{ 0x03 }, 0, .none }, - .{ .add, .rm, &.{ .r64, .rm64 }, &.{ 0x03 }, 0, .long }, + .{ .add, .zi, &.{ .al, .imm8 }, &.{ 0x04 }, 0, .none, .none }, + .{ .add, .zi, &.{ .ax, .imm16 }, &.{ 0x05 }, 0, .none, .none }, + .{ .add, .zi, &.{ .eax, .imm32 }, &.{ 0x05 }, 0, .none, .none }, + .{ .add, .zi, &.{ .rax, .imm32s }, &.{ 0x05 }, 0, .long, .none }, + .{ .add, .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 0, .none, .none }, + .{ .add, .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 0, .rex, .none }, + .{ .add, .mi, &.{ .rm16, .imm16 }, &.{ 0x81 }, 0, .none, .none }, + .{ .add, .mi, &.{ .rm32, .imm32 }, &.{ 0x81 }, 0, .none, .none }, + .{ .add, .mi, &.{ .rm64, .imm32s }, &.{ 0x81 }, 0, .long, .none }, + .{ .add, .mi, &.{ .rm16, .imm8s }, &.{ 0x83 }, 0, .none, .none }, + .{ .add, .mi, &.{ .rm32, .imm8s }, &.{ 0x83 }, 0, .none, .none }, + .{ .add, .mi, &.{ .rm64, .imm8s }, &.{ 0x83 }, 0, .long, .none }, + .{ .add, .mr, &.{ .rm8, .r8 }, &.{ 0x00 }, 0, .none, .none }, + .{ .add, .mr, &.{ .rm8, .r8 }, &.{ 0x00 }, 0, .rex, .none }, + .{ .add, .mr, &.{ .rm16, .r16 }, &.{ 0x01 }, 0, .none, .none }, + .{ .add, .mr, &.{ .rm32, .r32 }, &.{ 0x01 }, 0, .none, .none }, + .{ .add, .mr, &.{ .rm64, .r64 }, &.{ 0x01 }, 0, .long, .none }, + .{ .add, .rm, &.{ .r8, .rm8 }, &.{ 0x02 }, 0, .none, .none }, + .{ .add, .rm, &.{ .r8, .rm8 }, &.{ 0x02 }, 0, .rex, .none }, + .{ .add, .rm, &.{ .r16, .rm16 }, &.{ 0x03 }, 0, .none, .none }, + .{ .add, .rm, &.{ .r32, .rm32 }, &.{ 0x03 }, 0, .none, .none }, + .{ .add, .rm, &.{ .r64, .rm64 }, &.{ 0x03 }, 0, .long, .none }, - .{ .@"and", .zi, &.{ .al, .imm8 }, &.{ 0x24 }, 0, .none }, - .{ .@"and", .zi, &.{ .ax, .imm16 }, &.{ 0x25 }, 0, .none }, - .{ .@"and", .zi, &.{ .eax, .imm32 }, &.{ 0x25 }, 0, .none }, - .{ .@"and", .zi, &.{ .rax, .imm32s }, &.{ 0x25 }, 0, .long }, - .{ .@"and", .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 4, .none }, - .{ .@"and", .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 4, .rex }, - .{ .@"and", .mi, &.{ .rm16, .imm16 }, &.{ 0x81 }, 4, .none }, - .{ .@"and", .mi, &.{ .rm32, .imm32 }, &.{ 0x81 }, 4, .none }, - .{ .@"and", .mi, &.{ .rm64, .imm32s }, &.{ 0x81 }, 4, .long }, - .{ .@"and", .mi, &.{ .rm16, .imm8s }, &.{ 0x83 }, 4, .none }, - .{ .@"and", .mi, &.{ .rm32, .imm8s }, &.{ 0x83 }, 4, .none }, - .{ .@"and", .mi, &.{ .rm64, .imm8s }, &.{ 0x83 }, 4, .long }, - .{ .@"and", .mr, &.{ .rm8, .r8 }, &.{ 0x20 }, 0, .none }, - .{ .@"and", .mr, &.{ .rm8, .r8 }, &.{ 0x20 }, 0, .rex }, - .{ .@"and", .mr, &.{ .rm16, .r16 }, &.{ 0x21 }, 0, .none }, - .{ .@"and", .mr, &.{ .rm32, .r32 }, &.{ 0x21 }, 0, .none }, - .{ .@"and", .mr, &.{ .rm64, .r64 }, &.{ 0x21 }, 0, .long }, - .{ .@"and", .rm, &.{ .r8, .rm8 }, &.{ 0x22 }, 0, .none }, - .{ .@"and", .rm, &.{ .r8, .rm8 }, &.{ 0x22 }, 0, .rex }, - .{ .@"and", .rm, &.{ .r16, .rm16 }, &.{ 0x23 }, 0, .none }, - .{ .@"and", .rm, &.{ .r32, .rm32 }, &.{ 0x23 }, 0, .none }, - .{ .@"and", .rm, &.{ .r64, .rm64 }, &.{ 0x23 }, 0, .long }, + .{ .@"and", .zi, &.{ .al, .imm8 }, &.{ 0x24 }, 0, .none, .none }, + .{ .@"and", .zi, &.{ .ax, .imm16 }, &.{ 0x25 }, 0, .none, .none }, + .{ .@"and", .zi, &.{ .eax, .imm32 }, &.{ 0x25 }, 0, .none, .none }, + .{ .@"and", .zi, &.{ .rax, .imm32s }, &.{ 0x25 }, 0, .long, .none }, + .{ .@"and", .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 4, .none, .none }, + .{ .@"and", .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 4, .rex, .none }, + .{ .@"and", .mi, &.{ .rm16, .imm16 }, &.{ 0x81 }, 4, .none, .none }, + .{ .@"and", .mi, &.{ .rm32, .imm32 }, &.{ 0x81 }, 4, .none, .none }, + .{ .@"and", .mi, &.{ .rm64, .imm32s }, &.{ 0x81 }, 4, .long, .none }, + .{ .@"and", .mi, &.{ .rm16, .imm8s }, &.{ 0x83 }, 4, .none, .none }, + .{ .@"and", .mi, &.{ .rm32, .imm8s }, &.{ 0x83 }, 4, .none, .none }, + .{ .@"and", .mi, &.{ .rm64, .imm8s }, &.{ 0x83 }, 4, .long, .none }, + .{ .@"and", .mr, &.{ .rm8, .r8 }, &.{ 0x20 }, 0, .none, .none }, + .{ .@"and", .mr, &.{ .rm8, .r8 }, &.{ 0x20 }, 0, .rex, .none }, + .{ .@"and", .mr, &.{ .rm16, .r16 }, &.{ 0x21 }, 0, .none, .none }, + .{ .@"and", .mr, &.{ .rm32, .r32 }, &.{ 0x21 }, 0, .none, .none }, + .{ .@"and", .mr, &.{ .rm64, .r64 }, &.{ 0x21 }, 0, .long, .none }, + .{ .@"and", .rm, &.{ .r8, .rm8 }, &.{ 0x22 }, 0, .none, .none }, + .{ .@"and", .rm, &.{ .r8, .rm8 }, &.{ 0x22 }, 0, .rex, .none }, + .{ .@"and", .rm, &.{ .r16, .rm16 }, &.{ 0x23 }, 0, .none, .none }, + .{ .@"and", .rm, &.{ .r32, .rm32 }, &.{ 0x23 }, 0, .none, .none }, + .{ .@"and", .rm, &.{ .r64, .rm64 }, &.{ 0x23 }, 0, .long, .none }, - .{ .bsf, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0xbc }, 0, .none }, - .{ .bsf, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0xbc }, 0, .none }, - .{ .bsf, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0xbc }, 0, .long }, + .{ .bsf, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0xbc }, 0, .none, .none }, + .{ .bsf, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0xbc }, 0, .none, .none }, + .{ .bsf, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0xbc }, 0, .long, .none }, - .{ .bsr, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0xbd }, 0, .none }, - .{ .bsr, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0xbd }, 0, .none }, - .{ .bsr, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0xbd }, 0, .long }, + .{ .bsr, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0xbd }, 0, .none, .none }, + .{ .bsr, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0xbd }, 0, .none, .none }, + .{ .bsr, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0xbd }, 0, .long, .none }, - .{ .bswap, .o, &.{ .r32 }, &.{ 0x0f, 0xc8 }, 0, .none }, - .{ .bswap, .o, &.{ .r64 }, &.{ 0x0f, 0xc8 }, 0, .long }, + .{ .bswap, .o, &.{ .r32 }, &.{ 0x0f, 0xc8 }, 0, .none, .none }, + .{ .bswap, .o, &.{ .r64 }, &.{ 0x0f, 0xc8 }, 0, .long, .none }, - .{ .bt, .mr, &.{ .rm16, .r16 }, &.{ 0x0f, 0xa3 }, 0, .none }, - .{ .bt, .mr, &.{ .rm32, .r32 }, &.{ 0x0f, 0xa3 }, 0, .none }, - .{ .bt, .mr, &.{ .rm64, .r64 }, &.{ 0x0f, 0xa3 }, 0, .long }, - .{ .bt, .mi, &.{ .rm16, .imm8 }, &.{ 0x0f, 0xba }, 4, .none }, - .{ .bt, .mi, &.{ .rm32, .imm8 }, &.{ 0x0f, 0xba }, 4, .none }, - .{ .bt, .mi, &.{ .rm64, .imm8 }, &.{ 0x0f, 0xba }, 4, .long }, + .{ .bt, .mr, &.{ .rm16, .r16 }, &.{ 0x0f, 0xa3 }, 0, .none, .none }, + .{ .bt, .mr, &.{ .rm32, .r32 }, &.{ 0x0f, 0xa3 }, 0, .none, .none }, + .{ .bt, .mr, &.{ .rm64, .r64 }, &.{ 0x0f, 0xa3 }, 0, .long, .none }, + .{ .bt, .mi, &.{ .rm16, .imm8 }, &.{ 0x0f, 0xba }, 4, .none, .none }, + .{ .bt, .mi, &.{ .rm32, .imm8 }, &.{ 0x0f, 0xba }, 4, .none, .none }, + .{ .bt, .mi, &.{ .rm64, .imm8 }, &.{ 0x0f, 0xba }, 4, .long, .none }, - .{ .btc, .mr, &.{ .rm16, .r16 }, &.{ 0x0f, 0xbb }, 0, .none }, - .{ .btc, .mr, &.{ .rm32, .r32 }, &.{ 0x0f, 0xbb }, 0, .none }, - .{ .btc, .mr, &.{ .rm64, .r64 }, &.{ 0x0f, 0xbb }, 0, .long }, - .{ .btc, .mi, &.{ .rm16, .imm8 }, &.{ 0x0f, 0xba }, 7, .none }, - .{ .btc, .mi, &.{ .rm32, .imm8 }, &.{ 0x0f, 0xba }, 7, .none }, - .{ .btc, .mi, &.{ .rm64, .imm8 }, &.{ 0x0f, 0xba }, 7, .long }, + .{ .btc, .mr, &.{ .rm16, .r16 }, &.{ 0x0f, 0xbb }, 0, .none, .none }, + .{ .btc, .mr, &.{ .rm32, .r32 }, &.{ 0x0f, 0xbb }, 0, .none, .none }, + .{ .btc, .mr, &.{ .rm64, .r64 }, &.{ 0x0f, 0xbb }, 0, .long, .none }, + .{ .btc, .mi, &.{ .rm16, .imm8 }, &.{ 0x0f, 0xba }, 7, .none, .none }, + .{ .btc, .mi, &.{ .rm32, .imm8 }, &.{ 0x0f, 0xba }, 7, .none, .none }, + .{ .btc, .mi, &.{ .rm64, .imm8 }, &.{ 0x0f, 0xba }, 7, .long, .none }, - .{ .btr, .mr, &.{ .rm16, .r16 }, &.{ 0x0f, 0xb3 }, 0, .none }, - .{ .btr, .mr, &.{ .rm32, .r32 }, &.{ 0x0f, 0xb3 }, 0, .none }, - .{ .btr, .mr, &.{ .rm64, .r64 }, &.{ 0x0f, 0xb3 }, 0, .long }, - .{ .btr, .mi, &.{ .rm16, .imm8 }, &.{ 0x0f, 0xba }, 6, .none }, - .{ .btr, .mi, &.{ .rm32, .imm8 }, &.{ 0x0f, 0xba }, 6, .none }, - .{ .btr, .mi, &.{ .rm64, .imm8 }, &.{ 0x0f, 0xba }, 6, .long }, + .{ .btr, .mr, &.{ .rm16, .r16 }, &.{ 0x0f, 0xb3 }, 0, .none, .none }, + .{ .btr, .mr, &.{ .rm32, .r32 }, &.{ 0x0f, 0xb3 }, 0, .none, .none }, + .{ .btr, .mr, &.{ .rm64, .r64 }, &.{ 0x0f, 0xb3 }, 0, .long, .none }, + .{ .btr, .mi, &.{ .rm16, .imm8 }, &.{ 0x0f, 0xba }, 6, .none, .none }, + .{ .btr, .mi, &.{ .rm32, .imm8 }, &.{ 0x0f, 0xba }, 6, .none, .none }, + .{ .btr, .mi, &.{ .rm64, .imm8 }, &.{ 0x0f, 0xba }, 6, .long, .none }, - .{ .bts, .mr, &.{ .rm16, .r16 }, &.{ 0x0f, 0xab }, 0, .none }, - .{ .bts, .mr, &.{ .rm32, .r32 }, &.{ 0x0f, 0xab }, 0, .none }, - .{ .bts, .mr, &.{ .rm64, .r64 }, &.{ 0x0f, 0xab }, 0, .long }, - .{ .bts, .mi, &.{ .rm16, .imm8 }, &.{ 0x0f, 0xba }, 5, .none }, - .{ .bts, .mi, &.{ .rm32, .imm8 }, &.{ 0x0f, 0xba }, 5, .none }, - .{ .bts, .mi, &.{ .rm64, .imm8 }, &.{ 0x0f, 0xba }, 5, .long }, + .{ .bts, .mr, &.{ .rm16, .r16 }, &.{ 0x0f, 0xab }, 0, .none, .none }, + .{ .bts, .mr, &.{ .rm32, .r32 }, &.{ 0x0f, 0xab }, 0, .none, .none }, + .{ .bts, .mr, &.{ .rm64, .r64 }, &.{ 0x0f, 0xab }, 0, .long, .none }, + .{ .bts, .mi, &.{ .rm16, .imm8 }, &.{ 0x0f, 0xba }, 5, .none, .none }, + .{ .bts, .mi, &.{ .rm32, .imm8 }, &.{ 0x0f, 0xba }, 5, .none, .none }, + .{ .bts, .mi, &.{ .rm64, .imm8 }, &.{ 0x0f, 0xba }, 5, .long, .none }, // This is M encoding according to Intel, but D makes more sense here. - .{ .call, .d, &.{ .rel32 }, &.{ 0xe8 }, 0, .none }, - .{ .call, .m, &.{ .rm64 }, &.{ 0xff }, 2, .none }, + .{ .call, .d, &.{ .rel32 }, &.{ 0xe8 }, 0, .none, .none }, + .{ .call, .m, &.{ .rm64 }, &.{ 0xff }, 2, .none, .none }, - .{ .cbw, .np, &.{ .o16 }, &.{ 0x98 }, 0, .none }, - .{ .cwde, .np, &.{ .o32 }, &.{ 0x98 }, 0, .none }, - .{ .cdqe, .np, &.{ .o64 }, &.{ 0x98 }, 0, .long }, + .{ .cbw, .np, &.{ .o16 }, &.{ 0x98 }, 0, .none, .none }, + .{ .cwde, .np, &.{ .o32 }, &.{ 0x98 }, 0, .none, .none }, + .{ .cdqe, .np, &.{ .o64 }, &.{ 0x98 }, 0, .long, .none }, - .{ .cwd, .np, &.{ .o16 }, &.{ 0x99 }, 0, .none }, - .{ .cdq, .np, &.{ .o32 }, &.{ 0x99 }, 0, .none }, - .{ .cqo, .np, &.{ .o64 }, &.{ 0x99 }, 0, .long }, + .{ .cwd, .np, &.{ .o16 }, &.{ 0x99 }, 0, .none, .none }, + .{ .cdq, .np, &.{ .o32 }, &.{ 0x99 }, 0, .none, .none }, + .{ .cqo, .np, &.{ .o64 }, &.{ 0x99 }, 0, .long, .none }, - .{ .cmova, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x47 }, 0, .none }, - .{ .cmova, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x47 }, 0, .none }, - .{ .cmova, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x47 }, 0, .long }, - .{ .cmovae, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x43 }, 0, .none }, - .{ .cmovae, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x43 }, 0, .none }, - .{ .cmovae, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x43 }, 0, .long }, - .{ .cmovb, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x42 }, 0, .none }, - .{ .cmovb, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x42 }, 0, .none }, - .{ .cmovb, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x42 }, 0, .long }, - .{ .cmovbe, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x46 }, 0, .none }, - .{ .cmovbe, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x46 }, 0, .none }, - .{ .cmovbe, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x46 }, 0, .long }, - .{ .cmovc, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x42 }, 0, .none }, - .{ .cmovc, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x42 }, 0, .none }, - .{ .cmovc, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x42 }, 0, .long }, - .{ .cmove, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x44 }, 0, .none }, - .{ .cmove, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x44 }, 0, .none }, - .{ .cmove, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x44 }, 0, .long }, - .{ .cmovg, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4f }, 0, .none }, - .{ .cmovg, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4f }, 0, .none }, - .{ .cmovg, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4f }, 0, .long }, - .{ .cmovge, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4d }, 0, .none }, - .{ .cmovge, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4d }, 0, .none }, - .{ .cmovge, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4d }, 0, .long }, - .{ .cmovl, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4c }, 0, .none }, - .{ .cmovl, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4c }, 0, .none }, - .{ .cmovl, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4c }, 0, .long }, - .{ .cmovle, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4e }, 0, .none }, - .{ .cmovle, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4e }, 0, .none }, - .{ .cmovle, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4e }, 0, .long }, - .{ .cmovna, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x46 }, 0, .none }, - .{ .cmovna, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x46 }, 0, .none }, - .{ .cmovna, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x46 }, 0, .long }, - .{ .cmovnae, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x42 }, 0, .none }, - .{ .cmovnae, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x42 }, 0, .none }, - .{ .cmovnae, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x42 }, 0, .long }, - .{ .cmovnb, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x43 }, 0, .none }, - .{ .cmovnb, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x43 }, 0, .none }, - .{ .cmovnb, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x43 }, 0, .long }, - .{ .cmovnbe, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x47 }, 0, .none }, - .{ .cmovnbe, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x47 }, 0, .none }, - .{ .cmovnbe, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x47 }, 0, .long }, - .{ .cmovnc, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x43 }, 0, .none }, - .{ .cmovnc, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x43 }, 0, .none }, - .{ .cmovnc, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x43 }, 0, .long }, - .{ .cmovne, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x45 }, 0, .none }, - .{ .cmovne, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x45 }, 0, .none }, - .{ .cmovne, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x45 }, 0, .long }, - .{ .cmovng, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4e }, 0, .none }, - .{ .cmovng, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4e }, 0, .none }, - .{ .cmovng, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4e }, 0, .long }, - .{ .cmovnge, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4c }, 0, .none }, - .{ .cmovnge, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4c }, 0, .none }, - .{ .cmovnge, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4c }, 0, .long }, - .{ .cmovnl, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4d }, 0, .none }, - .{ .cmovnl, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4d }, 0, .none }, - .{ .cmovnl, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4d }, 0, .long }, - .{ .cmovnle, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4f }, 0, .none }, - .{ .cmovnle, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4f }, 0, .none }, - .{ .cmovnle, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4f }, 0, .long }, - .{ .cmovno, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x41 }, 0, .none }, - .{ .cmovno, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x41 }, 0, .none }, - .{ .cmovno, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x41 }, 0, .long }, - .{ .cmovnp, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4b }, 0, .none }, - .{ .cmovnp, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4b }, 0, .none }, - .{ .cmovnp, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4b }, 0, .long }, - .{ .cmovns, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x49 }, 0, .none }, - .{ .cmovns, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x49 }, 0, .none }, - .{ .cmovns, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x49 }, 0, .long }, - .{ .cmovnz, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x45 }, 0, .none }, - .{ .cmovnz, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x45 }, 0, .none }, - .{ .cmovnz, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x45 }, 0, .long }, - .{ .cmovo, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x40 }, 0, .none }, - .{ .cmovo, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x40 }, 0, .none }, - .{ .cmovo, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x40 }, 0, .long }, - .{ .cmovp, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4a }, 0, .none }, - .{ .cmovp, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4a }, 0, .none }, - .{ .cmovp, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4a }, 0, .long }, - .{ .cmovpe, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4a }, 0, .none }, - .{ .cmovpe, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4a }, 0, .none }, - .{ .cmovpe, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4a }, 0, .long }, - .{ .cmovpo, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4b }, 0, .none }, - .{ .cmovpo, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4b }, 0, .none }, - .{ .cmovpo, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4b }, 0, .long }, - .{ .cmovs, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x48 }, 0, .none }, - .{ .cmovs, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x48 }, 0, .none }, - .{ .cmovs, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x48 }, 0, .long }, - .{ .cmovz, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x44 }, 0, .none }, - .{ .cmovz, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x44 }, 0, .none }, - .{ .cmovz, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x44 }, 0, .long }, + .{ .cmova, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x47 }, 0, .none, .none }, + .{ .cmova, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x47 }, 0, .none, .none }, + .{ .cmova, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x47 }, 0, .long, .none }, + .{ .cmovae, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x43 }, 0, .none, .none }, + .{ .cmovae, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x43 }, 0, .none, .none }, + .{ .cmovae, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x43 }, 0, .long, .none }, + .{ .cmovb, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x42 }, 0, .none, .none }, + .{ .cmovb, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x42 }, 0, .none, .none }, + .{ .cmovb, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x42 }, 0, .long, .none }, + .{ .cmovbe, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x46 }, 0, .none, .none }, + .{ .cmovbe, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x46 }, 0, .none, .none }, + .{ .cmovbe, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x46 }, 0, .long, .none }, + .{ .cmovc, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x42 }, 0, .none, .none }, + .{ .cmovc, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x42 }, 0, .none, .none }, + .{ .cmovc, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x42 }, 0, .long, .none }, + .{ .cmove, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x44 }, 0, .none, .none }, + .{ .cmove, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x44 }, 0, .none, .none }, + .{ .cmove, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x44 }, 0, .long, .none }, + .{ .cmovg, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4f }, 0, .none, .none }, + .{ .cmovg, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4f }, 0, .none, .none }, + .{ .cmovg, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4f }, 0, .long, .none }, + .{ .cmovge, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4d }, 0, .none, .none }, + .{ .cmovge, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4d }, 0, .none, .none }, + .{ .cmovge, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4d }, 0, .long, .none }, + .{ .cmovl, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4c }, 0, .none, .none }, + .{ .cmovl, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4c }, 0, .none, .none }, + .{ .cmovl, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4c }, 0, .long, .none }, + .{ .cmovle, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4e }, 0, .none, .none }, + .{ .cmovle, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4e }, 0, .none, .none }, + .{ .cmovle, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4e }, 0, .long, .none }, + .{ .cmovna, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x46 }, 0, .none, .none }, + .{ .cmovna, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x46 }, 0, .none, .none }, + .{ .cmovna, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x46 }, 0, .long, .none }, + .{ .cmovnae, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x42 }, 0, .none, .none }, + .{ .cmovnae, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x42 }, 0, .none, .none }, + .{ .cmovnae, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x42 }, 0, .long, .none }, + .{ .cmovnb, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x43 }, 0, .none, .none }, + .{ .cmovnb, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x43 }, 0, .none, .none }, + .{ .cmovnb, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x43 }, 0, .long, .none }, + .{ .cmovnbe, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x47 }, 0, .none, .none }, + .{ .cmovnbe, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x47 }, 0, .none, .none }, + .{ .cmovnbe, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x47 }, 0, .long, .none }, + .{ .cmovnc, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x43 }, 0, .none, .none }, + .{ .cmovnc, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x43 }, 0, .none, .none }, + .{ .cmovnc, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x43 }, 0, .long, .none }, + .{ .cmovne, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x45 }, 0, .none, .none }, + .{ .cmovne, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x45 }, 0, .none, .none }, + .{ .cmovne, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x45 }, 0, .long, .none }, + .{ .cmovng, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4e }, 0, .none, .none }, + .{ .cmovng, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4e }, 0, .none, .none }, + .{ .cmovng, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4e }, 0, .long, .none }, + .{ .cmovnge, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4c }, 0, .none, .none }, + .{ .cmovnge, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4c }, 0, .none, .none }, + .{ .cmovnge, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4c }, 0, .long, .none }, + .{ .cmovnl, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4d }, 0, .none, .none }, + .{ .cmovnl, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4d }, 0, .none, .none }, + .{ .cmovnl, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4d }, 0, .long, .none }, + .{ .cmovnle, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4f }, 0, .none, .none }, + .{ .cmovnle, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4f }, 0, .none, .none }, + .{ .cmovnle, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4f }, 0, .long, .none }, + .{ .cmovno, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x41 }, 0, .none, .none }, + .{ .cmovno, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x41 }, 0, .none, .none }, + .{ .cmovno, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x41 }, 0, .long, .none }, + .{ .cmovnp, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4b }, 0, .none, .none }, + .{ .cmovnp, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4b }, 0, .none, .none }, + .{ .cmovnp, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4b }, 0, .long, .none }, + .{ .cmovns, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x49 }, 0, .none, .none }, + .{ .cmovns, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x49 }, 0, .none, .none }, + .{ .cmovns, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x49 }, 0, .long, .none }, + .{ .cmovnz, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x45 }, 0, .none, .none }, + .{ .cmovnz, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x45 }, 0, .none, .none }, + .{ .cmovnz, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x45 }, 0, .long, .none }, + .{ .cmovo, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x40 }, 0, .none, .none }, + .{ .cmovo, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x40 }, 0, .none, .none }, + .{ .cmovo, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x40 }, 0, .long, .none }, + .{ .cmovp, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4a }, 0, .none, .none }, + .{ .cmovp, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4a }, 0, .none, .none }, + .{ .cmovp, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4a }, 0, .long, .none }, + .{ .cmovpe, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4a }, 0, .none, .none }, + .{ .cmovpe, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4a }, 0, .none, .none }, + .{ .cmovpe, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4a }, 0, .long, .none }, + .{ .cmovpo, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4b }, 0, .none, .none }, + .{ .cmovpo, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4b }, 0, .none, .none }, + .{ .cmovpo, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4b }, 0, .long, .none }, + .{ .cmovs, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x48 }, 0, .none, .none }, + .{ .cmovs, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x48 }, 0, .none, .none }, + .{ .cmovs, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x48 }, 0, .long, .none }, + .{ .cmovz, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x44 }, 0, .none, .none }, + .{ .cmovz, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x44 }, 0, .none, .none }, + .{ .cmovz, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x44 }, 0, .long, .none }, - .{ .cmp, .zi, &.{ .al, .imm8 }, &.{ 0x3c }, 0, .none }, - .{ .cmp, .zi, &.{ .ax, .imm16 }, &.{ 0x3d }, 0, .none }, - .{ .cmp, .zi, &.{ .eax, .imm32 }, &.{ 0x3d }, 0, .none }, - .{ .cmp, .zi, &.{ .rax, .imm32s }, &.{ 0x3d }, 0, .long }, - .{ .cmp, .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 7, .none }, - .{ .cmp, .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 7, .rex }, - .{ .cmp, .mi, &.{ .rm16, .imm16 }, &.{ 0x81 }, 7, .none }, - .{ .cmp, .mi, &.{ .rm32, .imm32 }, &.{ 0x81 }, 7, .none }, - .{ .cmp, .mi, &.{ .rm64, .imm32s }, &.{ 0x81 }, 7, .long }, - .{ .cmp, .mi, &.{ .rm16, .imm8s }, &.{ 0x83 }, 7, .none }, - .{ .cmp, .mi, &.{ .rm32, .imm8s }, &.{ 0x83 }, 7, .none }, - .{ .cmp, .mi, &.{ .rm64, .imm8s }, &.{ 0x83 }, 7, .long }, - .{ .cmp, .mr, &.{ .rm8, .r8 }, &.{ 0x38 }, 0, .none }, - .{ .cmp, .mr, &.{ .rm8, .r8 }, &.{ 0x38 }, 0, .rex }, - .{ .cmp, .mr, &.{ .rm16, .r16 }, &.{ 0x39 }, 0, .none }, - .{ .cmp, .mr, &.{ .rm32, .r32 }, &.{ 0x39 }, 0, .none }, - .{ .cmp, .mr, &.{ .rm64, .r64 }, &.{ 0x39 }, 0, .long }, - .{ .cmp, .rm, &.{ .r8, .rm8 }, &.{ 0x3a }, 0, .none }, - .{ .cmp, .rm, &.{ .r8, .rm8 }, &.{ 0x3a }, 0, .rex }, - .{ .cmp, .rm, &.{ .r16, .rm16 }, &.{ 0x3b }, 0, .none }, - .{ .cmp, .rm, &.{ .r32, .rm32 }, &.{ 0x3b }, 0, .none }, - .{ .cmp, .rm, &.{ .r64, .rm64 }, &.{ 0x3b }, 0, .long }, + .{ .cmp, .zi, &.{ .al, .imm8 }, &.{ 0x3c }, 0, .none, .none }, + .{ .cmp, .zi, &.{ .ax, .imm16 }, &.{ 0x3d }, 0, .none, .none }, + .{ .cmp, .zi, &.{ .eax, .imm32 }, &.{ 0x3d }, 0, .none, .none }, + .{ .cmp, .zi, &.{ .rax, .imm32s }, &.{ 0x3d }, 0, .long, .none }, + .{ .cmp, .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 7, .none, .none }, + .{ .cmp, .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 7, .rex, .none }, + .{ .cmp, .mi, &.{ .rm16, .imm16 }, &.{ 0x81 }, 7, .none, .none }, + .{ .cmp, .mi, &.{ .rm32, .imm32 }, &.{ 0x81 }, 7, .none, .none }, + .{ .cmp, .mi, &.{ .rm64, .imm32s }, &.{ 0x81 }, 7, .long, .none }, + .{ .cmp, .mi, &.{ .rm16, .imm8s }, &.{ 0x83 }, 7, .none, .none }, + .{ .cmp, .mi, &.{ .rm32, .imm8s }, &.{ 0x83 }, 7, .none, .none }, + .{ .cmp, .mi, &.{ .rm64, .imm8s }, &.{ 0x83 }, 7, .long, .none }, + .{ .cmp, .mr, &.{ .rm8, .r8 }, &.{ 0x38 }, 0, .none, .none }, + .{ .cmp, .mr, &.{ .rm8, .r8 }, &.{ 0x38 }, 0, .rex, .none }, + .{ .cmp, .mr, &.{ .rm16, .r16 }, &.{ 0x39 }, 0, .none, .none }, + .{ .cmp, .mr, &.{ .rm32, .r32 }, &.{ 0x39 }, 0, .none, .none }, + .{ .cmp, .mr, &.{ .rm64, .r64 }, &.{ 0x39 }, 0, .long, .none }, + .{ .cmp, .rm, &.{ .r8, .rm8 }, &.{ 0x3a }, 0, .none, .none }, + .{ .cmp, .rm, &.{ .r8, .rm8 }, &.{ 0x3a }, 0, .rex, .none }, + .{ .cmp, .rm, &.{ .r16, .rm16 }, &.{ 0x3b }, 0, .none, .none }, + .{ .cmp, .rm, &.{ .r32, .rm32 }, &.{ 0x3b }, 0, .none, .none }, + .{ .cmp, .rm, &.{ .r64, .rm64 }, &.{ 0x3b }, 0, .long, .none }, - .{ .cmps, .np, &.{ .m8, .m8 }, &.{ 0xa6 }, 0, .none }, - .{ .cmps, .np, &.{ .m16, .m16 }, &.{ 0xa7 }, 0, .none }, - .{ .cmps, .np, &.{ .m32, .m32 }, &.{ 0xa7 }, 0, .none }, - .{ .cmps, .np, &.{ .m64, .m64 }, &.{ 0xa7 }, 0, .long }, + .{ .cmps, .np, &.{ .m8, .m8 }, &.{ 0xa6 }, 0, .none, .none }, + .{ .cmps, .np, &.{ .m16, .m16 }, &.{ 0xa7 }, 0, .none, .none }, + .{ .cmps, .np, &.{ .m32, .m32 }, &.{ 0xa7 }, 0, .none, .none }, + .{ .cmps, .np, &.{ .m64, .m64 }, &.{ 0xa7 }, 0, .long, .none }, - .{ .cmpsb, .np, &.{}, &.{ 0xa6 }, 0, .none }, - .{ .cmpsw, .np, &.{}, &.{ 0xa7 }, 0, .short }, - .{ .cmpsd, .np, &.{}, &.{ 0xa7 }, 0, .none }, - .{ .cmpsq, .np, &.{}, &.{ 0xa7 }, 0, .long }, + .{ .cmpsb, .np, &.{}, &.{ 0xa6 }, 0, .none, .none }, + .{ .cmpsw, .np, &.{}, &.{ 0xa7 }, 0, .short, .none }, + .{ .cmpsd, .np, &.{}, &.{ 0xa7 }, 0, .none, .none }, + .{ .cmpsq, .np, &.{}, &.{ 0xa7 }, 0, .long, .none }, - .{ .cmpxchg, .mr, &.{ .rm8, .r8 }, &.{ 0x0f, 0xb0 }, 0, .none }, - .{ .cmpxchg, .mr, &.{ .rm8, .r8 }, &.{ 0x0f, 0xb0 }, 0, .rex }, - .{ .cmpxchg, .mr, &.{ .rm16, .r16 }, &.{ 0x0f, 0xb1 }, 0, .none }, - .{ .cmpxchg, .mr, &.{ .rm32, .r32 }, &.{ 0x0f, 0xb1 }, 0, .none }, - .{ .cmpxchg, .mr, &.{ .rm64, .r64 }, &.{ 0x0f, 0xb1 }, 0, .long }, + .{ .cmpxchg, .mr, &.{ .rm8, .r8 }, &.{ 0x0f, 0xb0 }, 0, .none, .none }, + .{ .cmpxchg, .mr, &.{ .rm8, .r8 }, &.{ 0x0f, 0xb0 }, 0, .rex, .none }, + .{ .cmpxchg, .mr, &.{ .rm16, .r16 }, &.{ 0x0f, 0xb1 }, 0, .none, .none }, + .{ .cmpxchg, .mr, &.{ .rm32, .r32 }, &.{ 0x0f, 0xb1 }, 0, .none, .none }, + .{ .cmpxchg, .mr, &.{ .rm64, .r64 }, &.{ 0x0f, 0xb1 }, 0, .long, .none }, - .{ .cmpxchg8b , .m, &.{ .m64 }, &.{ 0x0f, 0xc7 }, 1, .none }, - .{ .cmpxchg16b, .m, &.{ .m128 }, &.{ 0x0f, 0xc7 }, 1, .long }, + .{ .cmpxchg8b, .m, &.{ .m64 }, &.{ 0x0f, 0xc7 }, 1, .none, .none }, + .{ .cmpxchg16b, .m, &.{ .m128 }, &.{ 0x0f, 0xc7 }, 1, .long, .none }, - .{ .div, .m, &.{ .rm8 }, &.{ 0xf6 }, 6, .none }, - .{ .div, .m, &.{ .rm8 }, &.{ 0xf6 }, 6, .rex }, - .{ .div, .m, &.{ .rm16 }, &.{ 0xf7 }, 6, .none }, - .{ .div, .m, &.{ .rm32 }, &.{ 0xf7 }, 6, .none }, - .{ .div, .m, &.{ .rm64 }, &.{ 0xf7 }, 6, .long }, + .{ .div, .m, &.{ .rm8 }, &.{ 0xf6 }, 6, .none, .none }, + .{ .div, .m, &.{ .rm8 }, &.{ 0xf6 }, 6, .rex, .none }, + .{ .div, .m, &.{ .rm16 }, &.{ 0xf7 }, 6, .none, .none }, + .{ .div, .m, &.{ .rm32 }, &.{ 0xf7 }, 6, .none, .none }, + .{ .div, .m, &.{ .rm64 }, &.{ 0xf7 }, 6, .long, .none }, - .{ .fisttp, .m, &.{ .m16 }, &.{ 0xdf }, 1, .fpu }, - .{ .fisttp, .m, &.{ .m32 }, &.{ 0xdb }, 1, .fpu }, - .{ .fisttp, .m, &.{ .m64 }, &.{ 0xdd }, 1, .fpu }, + .{ .fisttp, .m, &.{ .m16 }, &.{ 0xdf }, 1, .none, .x87 }, + .{ .fisttp, .m, &.{ .m32 }, &.{ 0xdb }, 1, .none, .x87 }, + .{ .fisttp, .m, &.{ .m64 }, &.{ 0xdd }, 1, .none, .x87 }, - .{ .fld, .m, &.{ .m32 }, &.{ 0xd9 }, 0, .fpu }, - .{ .fld, .m, &.{ .m64 }, &.{ 0xdd }, 0, .fpu }, - .{ .fld, .m, &.{ .m80 }, &.{ 0xdb }, 5, .fpu }, + .{ .fld, .m, &.{ .m32 }, &.{ 0xd9 }, 0, .none, .x87 }, + .{ .fld, .m, &.{ .m64 }, &.{ 0xdd }, 0, .none, .x87 }, + .{ .fld, .m, &.{ .m80 }, &.{ 0xdb }, 5, .none, .x87 }, - .{ .idiv, .m, &.{ .rm8 }, &.{ 0xf6 }, 7, .none }, - .{ .idiv, .m, &.{ .rm8 }, &.{ 0xf6 }, 7, .rex }, - .{ .idiv, .m, &.{ .rm16 }, &.{ 0xf7 }, 7, .none }, - .{ .idiv, .m, &.{ .rm32 }, &.{ 0xf7 }, 7, .none }, - .{ .idiv, .m, &.{ .rm64 }, &.{ 0xf7 }, 7, .long }, + .{ .idiv, .m, &.{ .rm8 }, &.{ 0xf6 }, 7, .none, .none }, + .{ .idiv, .m, &.{ .rm8 }, &.{ 0xf6 }, 7, .rex, .none }, + .{ .idiv, .m, &.{ .rm16 }, &.{ 0xf7 }, 7, .none, .none }, + .{ .idiv, .m, &.{ .rm32 }, &.{ 0xf7 }, 7, .none, .none }, + .{ .idiv, .m, &.{ .rm64 }, &.{ 0xf7 }, 7, .long, .none }, - .{ .imul, .m, &.{ .rm8 }, &.{ 0xf6 }, 5, .none }, - .{ .imul, .m, &.{ .rm8 }, &.{ 0xf6 }, 5, .rex }, - .{ .imul, .m, &.{ .rm16, }, &.{ 0xf7 }, 5, .none }, - .{ .imul, .m, &.{ .rm32, }, &.{ 0xf7 }, 5, .none }, - .{ .imul, .m, &.{ .rm64, }, &.{ 0xf7 }, 5, .long }, - .{ .imul, .rm, &.{ .r16, .rm16, }, &.{ 0x0f, 0xaf }, 0, .none }, - .{ .imul, .rm, &.{ .r32, .rm32, }, &.{ 0x0f, 0xaf }, 0, .none }, - .{ .imul, .rm, &.{ .r64, .rm64, }, &.{ 0x0f, 0xaf }, 0, .long }, - .{ .imul, .rmi, &.{ .r16, .rm16, .imm8s }, &.{ 0x6b }, 0, .none }, - .{ .imul, .rmi, &.{ .r32, .rm32, .imm8s }, &.{ 0x6b }, 0, .none }, - .{ .imul, .rmi, &.{ .r64, .rm64, .imm8s }, &.{ 0x6b }, 0, .long }, - .{ .imul, .rmi, &.{ .r16, .rm16, .imm16 }, &.{ 0x69 }, 0, .none }, - .{ .imul, .rmi, &.{ .r32, .rm32, .imm32 }, &.{ 0x69 }, 0, .none }, - .{ .imul, .rmi, &.{ .r64, .rm64, .imm32 }, &.{ 0x69 }, 0, .long }, + .{ .imul, .m, &.{ .rm8 }, &.{ 0xf6 }, 5, .none, .none }, + .{ .imul, .m, &.{ .rm8 }, &.{ 0xf6 }, 5, .rex, .none }, + .{ .imul, .m, &.{ .rm16, }, &.{ 0xf7 }, 5, .none, .none }, + .{ .imul, .m, &.{ .rm32, }, &.{ 0xf7 }, 5, .none, .none }, + .{ .imul, .m, &.{ .rm64, }, &.{ 0xf7 }, 5, .long, .none }, + .{ .imul, .rm, &.{ .r16, .rm16, }, &.{ 0x0f, 0xaf }, 0, .none, .none }, + .{ .imul, .rm, &.{ .r32, .rm32, }, &.{ 0x0f, 0xaf }, 0, .none, .none }, + .{ .imul, .rm, &.{ .r64, .rm64, }, &.{ 0x0f, 0xaf }, 0, .long, .none }, + .{ .imul, .rmi, &.{ .r16, .rm16, .imm8s }, &.{ 0x6b }, 0, .none, .none }, + .{ .imul, .rmi, &.{ .r32, .rm32, .imm8s }, &.{ 0x6b }, 0, .none, .none }, + .{ .imul, .rmi, &.{ .r64, .rm64, .imm8s }, &.{ 0x6b }, 0, .long, .none }, + .{ .imul, .rmi, &.{ .r16, .rm16, .imm16 }, &.{ 0x69 }, 0, .none, .none }, + .{ .imul, .rmi, &.{ .r32, .rm32, .imm32 }, &.{ 0x69 }, 0, .none, .none }, + .{ .imul, .rmi, &.{ .r64, .rm64, .imm32 }, &.{ 0x69 }, 0, .long, .none }, - .{ .int3, .np, &.{}, &.{ 0xcc }, 0, .none }, + .{ .int3, .np, &.{}, &.{ 0xcc }, 0, .none, .none }, - .{ .ja, .d, &.{ .rel32 }, &.{ 0x0f, 0x87 }, 0, .none }, - .{ .jae, .d, &.{ .rel32 }, &.{ 0x0f, 0x83 }, 0, .none }, - .{ .jb, .d, &.{ .rel32 }, &.{ 0x0f, 0x82 }, 0, .none }, - .{ .jbe, .d, &.{ .rel32 }, &.{ 0x0f, 0x86 }, 0, .none }, - .{ .jc, .d, &.{ .rel32 }, &.{ 0x0f, 0x82 }, 0, .none }, - .{ .jrcxz, .d, &.{ .rel32 }, &.{ 0xe3 }, 0, .none }, - .{ .je, .d, &.{ .rel32 }, &.{ 0x0f, 0x84 }, 0, .none }, - .{ .jg, .d, &.{ .rel32 }, &.{ 0x0f, 0x8f }, 0, .none }, - .{ .jge, .d, &.{ .rel32 }, &.{ 0x0f, 0x8d }, 0, .none }, - .{ .jl, .d, &.{ .rel32 }, &.{ 0x0f, 0x8c }, 0, .none }, - .{ .jle, .d, &.{ .rel32 }, &.{ 0x0f, 0x8e }, 0, .none }, - .{ .jna, .d, &.{ .rel32 }, &.{ 0x0f, 0x86 }, 0, .none }, - .{ .jnae, .d, &.{ .rel32 }, &.{ 0x0f, 0x82 }, 0, .none }, - .{ .jnb, .d, &.{ .rel32 }, &.{ 0x0f, 0x83 }, 0, .none }, - .{ .jnbe, .d, &.{ .rel32 }, &.{ 0x0f, 0x87 }, 0, .none }, - .{ .jnc, .d, &.{ .rel32 }, &.{ 0x0f, 0x83 }, 0, .none }, - .{ .jne, .d, &.{ .rel32 }, &.{ 0x0f, 0x85 }, 0, .none }, - .{ .jng, .d, &.{ .rel32 }, &.{ 0x0f, 0x8e }, 0, .none }, - .{ .jnge, .d, &.{ .rel32 }, &.{ 0x0f, 0x8c }, 0, .none }, - .{ .jnl, .d, &.{ .rel32 }, &.{ 0x0f, 0x8d }, 0, .none }, - .{ .jnle, .d, &.{ .rel32 }, &.{ 0x0f, 0x8f }, 0, .none }, - .{ .jno, .d, &.{ .rel32 }, &.{ 0x0f, 0x81 }, 0, .none }, - .{ .jnp, .d, &.{ .rel32 }, &.{ 0x0f, 0x8b }, 0, .none }, - .{ .jns, .d, &.{ .rel32 }, &.{ 0x0f, 0x89 }, 0, .none }, - .{ .jnz, .d, &.{ .rel32 }, &.{ 0x0f, 0x85 }, 0, .none }, - .{ .jo, .d, &.{ .rel32 }, &.{ 0x0f, 0x80 }, 0, .none }, - .{ .jp, .d, &.{ .rel32 }, &.{ 0x0f, 0x8a }, 0, .none }, - .{ .jpe, .d, &.{ .rel32 }, &.{ 0x0f, 0x8a }, 0, .none }, - .{ .jpo, .d, &.{ .rel32 }, &.{ 0x0f, 0x8b }, 0, .none }, - .{ .js, .d, &.{ .rel32 }, &.{ 0x0f, 0x88 }, 0, .none }, - .{ .jz, .d, &.{ .rel32 }, &.{ 0x0f, 0x84 }, 0, .none }, + .{ .ja, .d, &.{ .rel32 }, &.{ 0x0f, 0x87 }, 0, .none, .none }, + .{ .jae, .d, &.{ .rel32 }, &.{ 0x0f, 0x83 }, 0, .none, .none }, + .{ .jb, .d, &.{ .rel32 }, &.{ 0x0f, 0x82 }, 0, .none, .none }, + .{ .jbe, .d, &.{ .rel32 }, &.{ 0x0f, 0x86 }, 0, .none, .none }, + .{ .jc, .d, &.{ .rel32 }, &.{ 0x0f, 0x82 }, 0, .none, .none }, + .{ .jrcxz, .d, &.{ .rel32 }, &.{ 0xe3 }, 0, .none, .none }, + .{ .je, .d, &.{ .rel32 }, &.{ 0x0f, 0x84 }, 0, .none, .none }, + .{ .jg, .d, &.{ .rel32 }, &.{ 0x0f, 0x8f }, 0, .none, .none }, + .{ .jge, .d, &.{ .rel32 }, &.{ 0x0f, 0x8d }, 0, .none, .none }, + .{ .jl, .d, &.{ .rel32 }, &.{ 0x0f, 0x8c }, 0, .none, .none }, + .{ .jle, .d, &.{ .rel32 }, &.{ 0x0f, 0x8e }, 0, .none, .none }, + .{ .jna, .d, &.{ .rel32 }, &.{ 0x0f, 0x86 }, 0, .none, .none }, + .{ .jnae, .d, &.{ .rel32 }, &.{ 0x0f, 0x82 }, 0, .none, .none }, + .{ .jnb, .d, &.{ .rel32 }, &.{ 0x0f, 0x83 }, 0, .none, .none }, + .{ .jnbe, .d, &.{ .rel32 }, &.{ 0x0f, 0x87 }, 0, .none, .none }, + .{ .jnc, .d, &.{ .rel32 }, &.{ 0x0f, 0x83 }, 0, .none, .none }, + .{ .jne, .d, &.{ .rel32 }, &.{ 0x0f, 0x85 }, 0, .none, .none }, + .{ .jng, .d, &.{ .rel32 }, &.{ 0x0f, 0x8e }, 0, .none, .none }, + .{ .jnge, .d, &.{ .rel32 }, &.{ 0x0f, 0x8c }, 0, .none, .none }, + .{ .jnl, .d, &.{ .rel32 }, &.{ 0x0f, 0x8d }, 0, .none, .none }, + .{ .jnle, .d, &.{ .rel32 }, &.{ 0x0f, 0x8f }, 0, .none, .none }, + .{ .jno, .d, &.{ .rel32 }, &.{ 0x0f, 0x81 }, 0, .none, .none }, + .{ .jnp, .d, &.{ .rel32 }, &.{ 0x0f, 0x8b }, 0, .none, .none }, + .{ .jns, .d, &.{ .rel32 }, &.{ 0x0f, 0x89 }, 0, .none, .none }, + .{ .jnz, .d, &.{ .rel32 }, &.{ 0x0f, 0x85 }, 0, .none, .none }, + .{ .jo, .d, &.{ .rel32 }, &.{ 0x0f, 0x80 }, 0, .none, .none }, + .{ .jp, .d, &.{ .rel32 }, &.{ 0x0f, 0x8a }, 0, .none, .none }, + .{ .jpe, .d, &.{ .rel32 }, &.{ 0x0f, 0x8a }, 0, .none, .none }, + .{ .jpo, .d, &.{ .rel32 }, &.{ 0x0f, 0x8b }, 0, .none, .none }, + .{ .js, .d, &.{ .rel32 }, &.{ 0x0f, 0x88 }, 0, .none, .none }, + .{ .jz, .d, &.{ .rel32 }, &.{ 0x0f, 0x84 }, 0, .none, .none }, - .{ .jmp, .d, &.{ .rel32 }, &.{ 0xe9 }, 0, .none }, - .{ .jmp, .m, &.{ .rm64 }, &.{ 0xff }, 4, .none }, + .{ .jmp, .d, &.{ .rel32 }, &.{ 0xe9 }, 0, .none, .none }, + .{ .jmp, .m, &.{ .rm64 }, &.{ 0xff }, 4, .none, .none }, - .{ .lea, .rm, &.{ .r16, .m }, &.{ 0x8d }, 0, .none }, - .{ .lea, .rm, &.{ .r32, .m }, &.{ 0x8d }, 0, .none }, - .{ .lea, .rm, &.{ .r64, .m }, &.{ 0x8d }, 0, .long }, + .{ .lea, .rm, &.{ .r16, .m }, &.{ 0x8d }, 0, .none, .none }, + .{ .lea, .rm, &.{ .r32, .m }, &.{ 0x8d }, 0, .none, .none }, + .{ .lea, .rm, &.{ .r64, .m }, &.{ 0x8d }, 0, .long, .none }, - .{ .lfence, .np, &.{}, &.{ 0x0f, 0xae, 0xe8 }, 0, .none }, + .{ .lfence, .np, &.{}, &.{ 0x0f, 0xae, 0xe8 }, 0, .none, .none }, - .{ .lods, .np, &.{ .m8 }, &.{ 0xac }, 0, .none }, - .{ .lods, .np, &.{ .m16 }, &.{ 0xad }, 0, .none }, - .{ .lods, .np, &.{ .m32 }, &.{ 0xad }, 0, .none }, - .{ .lods, .np, &.{ .m64 }, &.{ 0xad }, 0, .long }, + .{ .lods, .np, &.{ .m8 }, &.{ 0xac }, 0, .none, .none }, + .{ .lods, .np, &.{ .m16 }, &.{ 0xad }, 0, .none, .none }, + .{ .lods, .np, &.{ .m32 }, &.{ 0xad }, 0, .none, .none }, + .{ .lods, .np, &.{ .m64 }, &.{ 0xad }, 0, .long, .none }, - .{ .lodsb, .np, &.{}, &.{ 0xac }, 0, .none }, - .{ .lodsw, .np, &.{}, &.{ 0xad }, 0, .short }, - .{ .lodsd, .np, &.{}, &.{ 0xad }, 0, .none }, - .{ .lodsq, .np, &.{}, &.{ 0xad }, 0, .long }, + .{ .lodsb, .np, &.{}, &.{ 0xac }, 0, .none, .none }, + .{ .lodsw, .np, &.{}, &.{ 0xad }, 0, .short, .none }, + .{ .lodsd, .np, &.{}, &.{ 0xad }, 0, .none, .none }, + .{ .lodsq, .np, &.{}, &.{ 0xad }, 0, .long, .none }, - .{ .lzcnt, .rm, &.{ .r16, .rm16 }, &.{ 0xf3, 0x0f, 0xbd }, 0, .none }, - .{ .lzcnt, .rm, &.{ .r32, .rm32 }, &.{ 0xf3, 0x0f, 0xbd }, 0, .none }, - .{ .lzcnt, .rm, &.{ .r64, .rm64 }, &.{ 0xf3, 0x0f, 0xbd }, 0, .long }, + .{ .lzcnt, .rm, &.{ .r16, .rm16 }, &.{ 0xf3, 0x0f, 0xbd }, 0, .none, .none }, + .{ .lzcnt, .rm, &.{ .r32, .rm32 }, &.{ 0xf3, 0x0f, 0xbd }, 0, .none, .none }, + .{ .lzcnt, .rm, &.{ .r64, .rm64 }, &.{ 0xf3, 0x0f, 0xbd }, 0, .long, .none }, - .{ .mfence, .np, &.{}, &.{ 0x0f, 0xae, 0xf0 }, 0, .none }, + .{ .mfence, .np, &.{}, &.{ 0x0f, 0xae, 0xf0 }, 0, .none, .none }, - .{ .mov, .mr, &.{ .rm8, .r8 }, &.{ 0x88 }, 0, .none }, - .{ .mov, .mr, &.{ .rm8, .r8 }, &.{ 0x88 }, 0, .rex }, - .{ .mov, .mr, &.{ .rm16, .r16 }, &.{ 0x89 }, 0, .none }, - .{ .mov, .mr, &.{ .rm32, .r32 }, &.{ 0x89 }, 0, .none }, - .{ .mov, .mr, &.{ .rm64, .r64 }, &.{ 0x89 }, 0, .long }, - .{ .mov, .rm, &.{ .r8, .rm8 }, &.{ 0x8a }, 0, .none }, - .{ .mov, .rm, &.{ .r8, .rm8 }, &.{ 0x8a }, 0, .rex }, - .{ .mov, .rm, &.{ .r16, .rm16 }, &.{ 0x8b }, 0, .none }, - .{ .mov, .rm, &.{ .r32, .rm32 }, &.{ 0x8b }, 0, .none }, - .{ .mov, .rm, &.{ .r64, .rm64 }, &.{ 0x8b }, 0, .long }, - .{ .mov, .mr, &.{ .rm16, .sreg }, &.{ 0x8c }, 0, .none }, - .{ .mov, .mr, &.{ .rm64, .sreg }, &.{ 0x8c }, 0, .long }, - .{ .mov, .rm, &.{ .sreg, .rm16 }, &.{ 0x8e }, 0, .none }, - .{ .mov, .rm, &.{ .sreg, .rm64 }, &.{ 0x8e }, 0, .long }, - .{ .mov, .fd, &.{ .al, .moffs }, &.{ 0xa0 }, 0, .none }, - .{ .mov, .fd, &.{ .ax, .moffs }, &.{ 0xa1 }, 0, .none }, - .{ .mov, .fd, &.{ .eax, .moffs }, &.{ 0xa1 }, 0, .none }, - .{ .mov, .fd, &.{ .rax, .moffs }, &.{ 0xa1 }, 0, .long }, - .{ .mov, .td, &.{ .moffs, .al }, &.{ 0xa2 }, 0, .none }, - .{ .mov, .td, &.{ .moffs, .ax }, &.{ 0xa3 }, 0, .none }, - .{ .mov, .td, &.{ .moffs, .eax }, &.{ 0xa3 }, 0, .none }, - .{ .mov, .td, &.{ .moffs, .rax }, &.{ 0xa3 }, 0, .long }, - .{ .mov, .oi, &.{ .r8, .imm8 }, &.{ 0xb0 }, 0, .none }, - .{ .mov, .oi, &.{ .r8, .imm8 }, &.{ 0xb0 }, 0, .rex }, - .{ .mov, .oi, &.{ .r16, .imm16 }, &.{ 0xb8 }, 0, .none }, - .{ .mov, .oi, &.{ .r32, .imm32 }, &.{ 0xb8 }, 0, .none }, - .{ .mov, .oi, &.{ .r64, .imm64 }, &.{ 0xb8 }, 0, .long }, - .{ .mov, .mi, &.{ .rm8, .imm8 }, &.{ 0xc6 }, 0, .none }, - .{ .mov, .mi, &.{ .rm8, .imm8 }, &.{ 0xc6 }, 0, .rex }, - .{ .mov, .mi, &.{ .rm16, .imm16 }, &.{ 0xc7 }, 0, .none }, - .{ .mov, .mi, &.{ .rm32, .imm32 }, &.{ 0xc7 }, 0, .none }, - .{ .mov, .mi, &.{ .rm64, .imm32s }, &.{ 0xc7 }, 0, .long }, + .{ .mov, .mr, &.{ .rm8, .r8 }, &.{ 0x88 }, 0, .none, .none }, + .{ .mov, .mr, &.{ .rm8, .r8 }, &.{ 0x88 }, 0, .rex, .none }, + .{ .mov, .mr, &.{ .rm16, .r16 }, &.{ 0x89 }, 0, .none, .none }, + .{ .mov, .mr, &.{ .rm32, .r32 }, &.{ 0x89 }, 0, .none, .none }, + .{ .mov, .mr, &.{ .rm64, .r64 }, &.{ 0x89 }, 0, .long, .none }, + .{ .mov, .rm, &.{ .r8, .rm8 }, &.{ 0x8a }, 0, .none, .none }, + .{ .mov, .rm, &.{ .r8, .rm8 }, &.{ 0x8a }, 0, .rex, .none }, + .{ .mov, .rm, &.{ .r16, .rm16 }, &.{ 0x8b }, 0, .none, .none }, + .{ .mov, .rm, &.{ .r32, .rm32 }, &.{ 0x8b }, 0, .none, .none }, + .{ .mov, .rm, &.{ .r64, .rm64 }, &.{ 0x8b }, 0, .long, .none }, + .{ .mov, .mr, &.{ .rm16, .sreg }, &.{ 0x8c }, 0, .none, .none }, + .{ .mov, .mr, &.{ .rm64, .sreg }, &.{ 0x8c }, 0, .long, .none }, + .{ .mov, .rm, &.{ .sreg, .rm16 }, &.{ 0x8e }, 0, .none, .none }, + .{ .mov, .rm, &.{ .sreg, .rm64 }, &.{ 0x8e }, 0, .long, .none }, + .{ .mov, .fd, &.{ .al, .moffs }, &.{ 0xa0 }, 0, .none, .none }, + .{ .mov, .fd, &.{ .ax, .moffs }, &.{ 0xa1 }, 0, .none, .none }, + .{ .mov, .fd, &.{ .eax, .moffs }, &.{ 0xa1 }, 0, .none, .none }, + .{ .mov, .fd, &.{ .rax, .moffs }, &.{ 0xa1 }, 0, .long, .none }, + .{ .mov, .td, &.{ .moffs, .al }, &.{ 0xa2 }, 0, .none, .none }, + .{ .mov, .td, &.{ .moffs, .ax }, &.{ 0xa3 }, 0, .none, .none }, + .{ .mov, .td, &.{ .moffs, .eax }, &.{ 0xa3 }, 0, .none, .none }, + .{ .mov, .td, &.{ .moffs, .rax }, &.{ 0xa3 }, 0, .long, .none }, + .{ .mov, .oi, &.{ .r8, .imm8 }, &.{ 0xb0 }, 0, .none, .none }, + .{ .mov, .oi, &.{ .r8, .imm8 }, &.{ 0xb0 }, 0, .rex, .none }, + .{ .mov, .oi, &.{ .r16, .imm16 }, &.{ 0xb8 }, 0, .none, .none }, + .{ .mov, .oi, &.{ .r32, .imm32 }, &.{ 0xb8 }, 0, .none, .none }, + .{ .mov, .oi, &.{ .r64, .imm64 }, &.{ 0xb8 }, 0, .long, .none }, + .{ .mov, .mi, &.{ .rm8, .imm8 }, &.{ 0xc6 }, 0, .none, .none }, + .{ .mov, .mi, &.{ .rm8, .imm8 }, &.{ 0xc6 }, 0, .rex, .none }, + .{ .mov, .mi, &.{ .rm16, .imm16 }, &.{ 0xc7 }, 0, .none, .none }, + .{ .mov, .mi, &.{ .rm32, .imm32 }, &.{ 0xc7 }, 0, .none, .none }, + .{ .mov, .mi, &.{ .rm64, .imm32s }, &.{ 0xc7 }, 0, .long, .none }, - .{ .movbe, .rm, &.{ .r16, .m16 }, &.{ 0x0f, 0x38, 0xf0 }, 0, .none }, - .{ .movbe, .rm, &.{ .r32, .m32 }, &.{ 0x0f, 0x38, 0xf0 }, 0, .none }, - .{ .movbe, .rm, &.{ .r64, .m64 }, &.{ 0x0f, 0x38, 0xf0 }, 0, .long }, - .{ .movbe, .mr, &.{ .m16, .r16 }, &.{ 0x0f, 0x38, 0xf1 }, 0, .none }, - .{ .movbe, .mr, &.{ .m32, .r32 }, &.{ 0x0f, 0x38, 0xf1 }, 0, .none }, - .{ .movbe, .mr, &.{ .m64, .r64 }, &.{ 0x0f, 0x38, 0xf1 }, 0, .long }, + .{ .movbe, .rm, &.{ .r16, .m16 }, &.{ 0x0f, 0x38, 0xf0 }, 0, .none, .none }, + .{ .movbe, .rm, &.{ .r32, .m32 }, &.{ 0x0f, 0x38, 0xf0 }, 0, .none, .none }, + .{ .movbe, .rm, &.{ .r64, .m64 }, &.{ 0x0f, 0x38, 0xf0 }, 0, .long, .none }, + .{ .movbe, .mr, &.{ .m16, .r16 }, &.{ 0x0f, 0x38, 0xf1 }, 0, .none, .none }, + .{ .movbe, .mr, &.{ .m32, .r32 }, &.{ 0x0f, 0x38, 0xf1 }, 0, .none, .none }, + .{ .movbe, .mr, &.{ .m64, .r64 }, &.{ 0x0f, 0x38, 0xf1 }, 0, .long, .none }, - .{ .movs, .np, &.{ .m8, .m8 }, &.{ 0xa4 }, 0, .none }, - .{ .movs, .np, &.{ .m16, .m16 }, &.{ 0xa5 }, 0, .none }, - .{ .movs, .np, &.{ .m32, .m32 }, &.{ 0xa5 }, 0, .none }, - .{ .movs, .np, &.{ .m64, .m64 }, &.{ 0xa5 }, 0, .long }, + .{ .movs, .np, &.{ .m8, .m8 }, &.{ 0xa4 }, 0, .none, .none }, + .{ .movs, .np, &.{ .m16, .m16 }, &.{ 0xa5 }, 0, .none, .none }, + .{ .movs, .np, &.{ .m32, .m32 }, &.{ 0xa5 }, 0, .none, .none }, + .{ .movs, .np, &.{ .m64, .m64 }, &.{ 0xa5 }, 0, .long, .none }, - .{ .movsb, .np, &.{}, &.{ 0xa4 }, 0, .none }, - .{ .movsw, .np, &.{}, &.{ 0xa5 }, 0, .short }, - .{ .movsd, .np, &.{}, &.{ 0xa5 }, 0, .none }, - .{ .movsq, .np, &.{}, &.{ 0xa5 }, 0, .long }, + .{ .movsb, .np, &.{}, &.{ 0xa4 }, 0, .none, .none }, + .{ .movsw, .np, &.{}, &.{ 0xa5 }, 0, .short, .none }, + .{ .movsd, .np, &.{}, &.{ 0xa5 }, 0, .none, .none }, + .{ .movsq, .np, &.{}, &.{ 0xa5 }, 0, .long, .none }, - .{ .movsx, .rm, &.{ .r16, .rm8 }, &.{ 0x0f, 0xbe }, 0, .none }, - .{ .movsx, .rm, &.{ .r16, .rm8 }, &.{ 0x0f, 0xbe }, 0, .rex }, - .{ .movsx, .rm, &.{ .r32, .rm8 }, &.{ 0x0f, 0xbe }, 0, .none }, - .{ .movsx, .rm, &.{ .r32, .rm8 }, &.{ 0x0f, 0xbe }, 0, .rex }, - .{ .movsx, .rm, &.{ .r64, .rm8 }, &.{ 0x0f, 0xbe }, 0, .long }, - .{ .movsx, .rm, &.{ .r32, .rm16 }, &.{ 0x0f, 0xbf }, 0, .none }, - .{ .movsx, .rm, &.{ .r64, .rm16 }, &.{ 0x0f, 0xbf }, 0, .long }, + .{ .movsx, .rm, &.{ .r16, .rm8 }, &.{ 0x0f, 0xbe }, 0, .none, .none }, + .{ .movsx, .rm, &.{ .r16, .rm8 }, &.{ 0x0f, 0xbe }, 0, .rex, .none }, + .{ .movsx, .rm, &.{ .r32, .rm8 }, &.{ 0x0f, 0xbe }, 0, .none, .none }, + .{ .movsx, .rm, &.{ .r32, .rm8 }, &.{ 0x0f, 0xbe }, 0, .rex, .none }, + .{ .movsx, .rm, &.{ .r64, .rm8 }, &.{ 0x0f, 0xbe }, 0, .long, .none }, + .{ .movsx, .rm, &.{ .r32, .rm16 }, &.{ 0x0f, 0xbf }, 0, .none, .none }, + .{ .movsx, .rm, &.{ .r64, .rm16 }, &.{ 0x0f, 0xbf }, 0, .long, .none }, // This instruction is discouraged. - .{ .movsxd, .rm, &.{ .r32, .rm32 }, &.{ 0x63 }, 0, .none }, - .{ .movsxd, .rm, &.{ .r64, .rm32 }, &.{ 0x63 }, 0, .long }, + .{ .movsxd, .rm, &.{ .r32, .rm32 }, &.{ 0x63 }, 0, .none, .none }, + .{ .movsxd, .rm, &.{ .r64, .rm32 }, &.{ 0x63 }, 0, .long, .none }, - .{ .movzx, .rm, &.{ .r16, .rm8 }, &.{ 0x0f, 0xb6 }, 0, .none }, - .{ .movzx, .rm, &.{ .r32, .rm8 }, &.{ 0x0f, 0xb6 }, 0, .none }, - .{ .movzx, .rm, &.{ .r64, .rm8 }, &.{ 0x0f, 0xb6 }, 0, .long }, - .{ .movzx, .rm, &.{ .r32, .rm16 }, &.{ 0x0f, 0xb7 }, 0, .none }, - .{ .movzx, .rm, &.{ .r64, .rm16 }, &.{ 0x0f, 0xb7 }, 0, .long }, + .{ .movzx, .rm, &.{ .r16, .rm8 }, &.{ 0x0f, 0xb6 }, 0, .none, .none }, + .{ .movzx, .rm, &.{ .r32, .rm8 }, &.{ 0x0f, 0xb6 }, 0, .none, .none }, + .{ .movzx, .rm, &.{ .r64, .rm8 }, &.{ 0x0f, 0xb6 }, 0, .long, .none }, + .{ .movzx, .rm, &.{ .r32, .rm16 }, &.{ 0x0f, 0xb7 }, 0, .none, .none }, + .{ .movzx, .rm, &.{ .r64, .rm16 }, &.{ 0x0f, 0xb7 }, 0, .long, .none }, - .{ .mul, .m, &.{ .rm8 }, &.{ 0xf6 }, 4, .none }, - .{ .mul, .m, &.{ .rm8 }, &.{ 0xf6 }, 4, .rex }, - .{ .mul, .m, &.{ .rm16 }, &.{ 0xf7 }, 4, .none }, - .{ .mul, .m, &.{ .rm32 }, &.{ 0xf7 }, 4, .none }, - .{ .mul, .m, &.{ .rm64 }, &.{ 0xf7 }, 4, .long }, + .{ .mul, .m, &.{ .rm8 }, &.{ 0xf6 }, 4, .none, .none }, + .{ .mul, .m, &.{ .rm8 }, &.{ 0xf6 }, 4, .rex, .none }, + .{ .mul, .m, &.{ .rm16 }, &.{ 0xf7 }, 4, .none, .none }, + .{ .mul, .m, &.{ .rm32 }, &.{ 0xf7 }, 4, .none, .none }, + .{ .mul, .m, &.{ .rm64 }, &.{ 0xf7 }, 4, .long, .none }, - .{ .neg, .m, &.{ .rm8 }, &.{ 0xf6 }, 3, .none }, - .{ .neg, .m, &.{ .rm8 }, &.{ 0xf6 }, 3, .rex }, - .{ .neg, .m, &.{ .rm16 }, &.{ 0xf7 }, 3, .none }, - .{ .neg, .m, &.{ .rm32 }, &.{ 0xf7 }, 3, .none }, - .{ .neg, .m, &.{ .rm64 }, &.{ 0xf7 }, 3, .long }, + .{ .neg, .m, &.{ .rm8 }, &.{ 0xf6 }, 3, .none, .none }, + .{ .neg, .m, &.{ .rm8 }, &.{ 0xf6 }, 3, .rex, .none }, + .{ .neg, .m, &.{ .rm16 }, &.{ 0xf7 }, 3, .none, .none }, + .{ .neg, .m, &.{ .rm32 }, &.{ 0xf7 }, 3, .none, .none }, + .{ .neg, .m, &.{ .rm64 }, &.{ 0xf7 }, 3, .long, .none }, - .{ .nop, .np, &.{}, &.{ 0x90 }, 0, .none }, + .{ .nop, .np, &.{}, &.{ 0x90 }, 0, .none, .none }, - .{ .not, .m, &.{ .rm8 }, &.{ 0xf6 }, 2, .none }, - .{ .not, .m, &.{ .rm8 }, &.{ 0xf6 }, 2, .rex }, - .{ .not, .m, &.{ .rm16 }, &.{ 0xf7 }, 2, .none }, - .{ .not, .m, &.{ .rm32 }, &.{ 0xf7 }, 2, .none }, - .{ .not, .m, &.{ .rm64 }, &.{ 0xf7 }, 2, .long }, + .{ .not, .m, &.{ .rm8 }, &.{ 0xf6 }, 2, .none, .none }, + .{ .not, .m, &.{ .rm8 }, &.{ 0xf6 }, 2, .rex, .none }, + .{ .not, .m, &.{ .rm16 }, &.{ 0xf7 }, 2, .none, .none }, + .{ .not, .m, &.{ .rm32 }, &.{ 0xf7 }, 2, .none, .none }, + .{ .not, .m, &.{ .rm64 }, &.{ 0xf7 }, 2, .long, .none }, - .{ .@"or", .zi, &.{ .al, .imm8 }, &.{ 0x0c }, 0, .none }, - .{ .@"or", .zi, &.{ .ax, .imm16 }, &.{ 0x0d }, 0, .none }, - .{ .@"or", .zi, &.{ .eax, .imm32 }, &.{ 0x0d }, 0, .none }, - .{ .@"or", .zi, &.{ .rax, .imm32s }, &.{ 0x0d }, 0, .long }, - .{ .@"or", .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 1, .none }, - .{ .@"or", .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 1, .rex }, - .{ .@"or", .mi, &.{ .rm16, .imm16 }, &.{ 0x81 }, 1, .none }, - .{ .@"or", .mi, &.{ .rm32, .imm32 }, &.{ 0x81 }, 1, .none }, - .{ .@"or", .mi, &.{ .rm64, .imm32s }, &.{ 0x81 }, 1, .long }, - .{ .@"or", .mi, &.{ .rm16, .imm8s }, &.{ 0x83 }, 1, .none }, - .{ .@"or", .mi, &.{ .rm32, .imm8s }, &.{ 0x83 }, 1, .none }, - .{ .@"or", .mi, &.{ .rm64, .imm8s }, &.{ 0x83 }, 1, .long }, - .{ .@"or", .mr, &.{ .rm8, .r8 }, &.{ 0x08 }, 0, .none }, - .{ .@"or", .mr, &.{ .rm8, .r8 }, &.{ 0x08 }, 0, .rex }, - .{ .@"or", .mr, &.{ .rm16, .r16 }, &.{ 0x09 }, 0, .none }, - .{ .@"or", .mr, &.{ .rm32, .r32 }, &.{ 0x09 }, 0, .none }, - .{ .@"or", .mr, &.{ .rm64, .r64 }, &.{ 0x09 }, 0, .long }, - .{ .@"or", .rm, &.{ .r8, .rm8 }, &.{ 0x0a }, 0, .none }, - .{ .@"or", .rm, &.{ .r8, .rm8 }, &.{ 0x0a }, 0, .rex }, - .{ .@"or", .rm, &.{ .r16, .rm16 }, &.{ 0x0b }, 0, .none }, - .{ .@"or", .rm, &.{ .r32, .rm32 }, &.{ 0x0b }, 0, .none }, - .{ .@"or", .rm, &.{ .r64, .rm64 }, &.{ 0x0b }, 0, .long }, + .{ .@"or", .zi, &.{ .al, .imm8 }, &.{ 0x0c }, 0, .none, .none }, + .{ .@"or", .zi, &.{ .ax, .imm16 }, &.{ 0x0d }, 0, .none, .none }, + .{ .@"or", .zi, &.{ .eax, .imm32 }, &.{ 0x0d }, 0, .none, .none }, + .{ .@"or", .zi, &.{ .rax, .imm32s }, &.{ 0x0d }, 0, .long, .none }, + .{ .@"or", .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 1, .none, .none }, + .{ .@"or", .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 1, .rex, .none }, + .{ .@"or", .mi, &.{ .rm16, .imm16 }, &.{ 0x81 }, 1, .none, .none }, + .{ .@"or", .mi, &.{ .rm32, .imm32 }, &.{ 0x81 }, 1, .none, .none }, + .{ .@"or", .mi, &.{ .rm64, .imm32s }, &.{ 0x81 }, 1, .long, .none }, + .{ .@"or", .mi, &.{ .rm16, .imm8s }, &.{ 0x83 }, 1, .none, .none }, + .{ .@"or", .mi, &.{ .rm32, .imm8s }, &.{ 0x83 }, 1, .none, .none }, + .{ .@"or", .mi, &.{ .rm64, .imm8s }, &.{ 0x83 }, 1, .long, .none }, + .{ .@"or", .mr, &.{ .rm8, .r8 }, &.{ 0x08 }, 0, .none, .none }, + .{ .@"or", .mr, &.{ .rm8, .r8 }, &.{ 0x08 }, 0, .rex, .none }, + .{ .@"or", .mr, &.{ .rm16, .r16 }, &.{ 0x09 }, 0, .none, .none }, + .{ .@"or", .mr, &.{ .rm32, .r32 }, &.{ 0x09 }, 0, .none, .none }, + .{ .@"or", .mr, &.{ .rm64, .r64 }, &.{ 0x09 }, 0, .long, .none }, + .{ .@"or", .rm, &.{ .r8, .rm8 }, &.{ 0x0a }, 0, .none, .none }, + .{ .@"or", .rm, &.{ .r8, .rm8 }, &.{ 0x0a }, 0, .rex, .none }, + .{ .@"or", .rm, &.{ .r16, .rm16 }, &.{ 0x0b }, 0, .none, .none }, + .{ .@"or", .rm, &.{ .r32, .rm32 }, &.{ 0x0b }, 0, .none, .none }, + .{ .@"or", .rm, &.{ .r64, .rm64 }, &.{ 0x0b }, 0, .long, .none }, - .{ .pop, .o, &.{ .r16 }, &.{ 0x58 }, 0, .none }, - .{ .pop, .o, &.{ .r64 }, &.{ 0x58 }, 0, .none }, - .{ .pop, .m, &.{ .rm16 }, &.{ 0x8f }, 0, .none }, - .{ .pop, .m, &.{ .rm64 }, &.{ 0x8f }, 0, .none }, + .{ .pop, .o, &.{ .r16 }, &.{ 0x58 }, 0, .none, .none }, + .{ .pop, .o, &.{ .r64 }, &.{ 0x58 }, 0, .none, .none }, + .{ .pop, .m, &.{ .rm16 }, &.{ 0x8f }, 0, .none, .none }, + .{ .pop, .m, &.{ .rm64 }, &.{ 0x8f }, 0, .none, .none }, - .{ .popcnt, .rm, &.{ .r16, .rm16 }, &.{ 0xf3, 0x0f, 0xb8 }, 0, .none }, - .{ .popcnt, .rm, &.{ .r32, .rm32 }, &.{ 0xf3, 0x0f, 0xb8 }, 0, .none }, - .{ .popcnt, .rm, &.{ .r64, .rm64 }, &.{ 0xf3, 0x0f, 0xb8 }, 0, .long }, + .{ .popcnt, .rm, &.{ .r16, .rm16 }, &.{ 0xf3, 0x0f, 0xb8 }, 0, .none, .none }, + .{ .popcnt, .rm, &.{ .r32, .rm32 }, &.{ 0xf3, 0x0f, 0xb8 }, 0, .none, .none }, + .{ .popcnt, .rm, &.{ .r64, .rm64 }, &.{ 0xf3, 0x0f, 0xb8 }, 0, .long, .none }, - .{ .push, .o, &.{ .r16 }, &.{ 0x50 }, 0, .none }, - .{ .push, .o, &.{ .r64 }, &.{ 0x50 }, 0, .none }, - .{ .push, .m, &.{ .rm16 }, &.{ 0xff }, 6, .none }, - .{ .push, .m, &.{ .rm64 }, &.{ 0xff }, 6, .none }, - .{ .push, .i, &.{ .imm8 }, &.{ 0x6a }, 0, .none }, - .{ .push, .i, &.{ .imm16 }, &.{ 0x68 }, 0, .none }, - .{ .push, .i, &.{ .imm32 }, &.{ 0x68 }, 0, .none }, + .{ .push, .o, &.{ .r16 }, &.{ 0x50 }, 0, .none, .none }, + .{ .push, .o, &.{ .r64 }, &.{ 0x50 }, 0, .none, .none }, + .{ .push, .m, &.{ .rm16 }, &.{ 0xff }, 6, .none, .none }, + .{ .push, .m, &.{ .rm64 }, &.{ 0xff }, 6, .none, .none }, + .{ .push, .i, &.{ .imm8 }, &.{ 0x6a }, 0, .none, .none }, + .{ .push, .i, &.{ .imm16 }, &.{ 0x68 }, 0, .none, .none }, + .{ .push, .i, &.{ .imm32 }, &.{ 0x68 }, 0, .none, .none }, - .{ .ret, .np, &.{}, &.{ 0xc3 }, 0, .none }, + .{ .ret, .np, &.{}, &.{ 0xc3 }, 0, .none, .none }, - .{ .rcl, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 2, .none }, - .{ .rcl, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 2, .rex }, - .{ .rcl, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 2, .none }, - .{ .rcl, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 2, .rex }, - .{ .rcl, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 2, .none }, - .{ .rcl, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 2, .rex }, - .{ .rcl, .m1, &.{ .rm16, .unity }, &.{ 0xd1 }, 2, .none }, - .{ .rcl, .mc, &.{ .rm16, .cl }, &.{ 0xd3 }, 2, .none }, - .{ .rcl, .mi, &.{ .rm16, .imm8 }, &.{ 0xc1 }, 2, .none }, - .{ .rcl, .m1, &.{ .rm32, .unity }, &.{ 0xd1 }, 2, .none }, - .{ .rcl, .m1, &.{ .rm64, .unity }, &.{ 0xd1 }, 2, .long }, - .{ .rcl, .mc, &.{ .rm32, .cl }, &.{ 0xd3 }, 2, .none }, - .{ .rcl, .mc, &.{ .rm64, .cl }, &.{ 0xd3 }, 2, .long }, - .{ .rcl, .mi, &.{ .rm32, .imm8 }, &.{ 0xc1 }, 2, .none }, - .{ .rcl, .mi, &.{ .rm64, .imm8 }, &.{ 0xc1 }, 2, .long }, + .{ .rcl, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 2, .none, .none }, + .{ .rcl, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 2, .rex, .none }, + .{ .rcl, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 2, .none, .none }, + .{ .rcl, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 2, .rex, .none }, + .{ .rcl, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 2, .none, .none }, + .{ .rcl, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 2, .rex, .none }, + .{ .rcl, .m1, &.{ .rm16, .unity }, &.{ 0xd1 }, 2, .none, .none }, + .{ .rcl, .mc, &.{ .rm16, .cl }, &.{ 0xd3 }, 2, .none, .none }, + .{ .rcl, .mi, &.{ .rm16, .imm8 }, &.{ 0xc1 }, 2, .none, .none }, + .{ .rcl, .m1, &.{ .rm32, .unity }, &.{ 0xd1 }, 2, .none, .none }, + .{ .rcl, .m1, &.{ .rm64, .unity }, &.{ 0xd1 }, 2, .long, .none }, + .{ .rcl, .mc, &.{ .rm32, .cl }, &.{ 0xd3 }, 2, .none, .none }, + .{ .rcl, .mc, &.{ .rm64, .cl }, &.{ 0xd3 }, 2, .long, .none }, + .{ .rcl, .mi, &.{ .rm32, .imm8 }, &.{ 0xc1 }, 2, .none, .none }, + .{ .rcl, .mi, &.{ .rm64, .imm8 }, &.{ 0xc1 }, 2, .long, .none }, - .{ .rcr, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 3, .none }, - .{ .rcr, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 3, .rex }, - .{ .rcr, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 3, .none }, - .{ .rcr, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 3, .rex }, - .{ .rcr, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 3, .none }, - .{ .rcr, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 3, .rex }, - .{ .rcr, .m1, &.{ .rm16, .unity }, &.{ 0xd1 }, 3, .none }, - .{ .rcr, .mc, &.{ .rm16, .cl }, &.{ 0xd3 }, 3, .none }, - .{ .rcr, .mi, &.{ .rm16, .imm8 }, &.{ 0xc1 }, 3, .none }, - .{ .rcr, .m1, &.{ .rm32, .unity }, &.{ 0xd1 }, 3, .none }, - .{ .rcr, .m1, &.{ .rm64, .unity }, &.{ 0xd1 }, 3, .long }, - .{ .rcr, .mc, &.{ .rm32, .cl }, &.{ 0xd3 }, 3, .none }, - .{ .rcr, .mc, &.{ .rm64, .cl }, &.{ 0xd3 }, 3, .long }, - .{ .rcr, .mi, &.{ .rm32, .imm8 }, &.{ 0xc1 }, 3, .none }, - .{ .rcr, .mi, &.{ .rm64, .imm8 }, &.{ 0xc1 }, 3, .long }, + .{ .rcr, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 3, .none, .none }, + .{ .rcr, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 3, .rex, .none }, + .{ .rcr, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 3, .none, .none }, + .{ .rcr, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 3, .rex, .none }, + .{ .rcr, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 3, .none, .none }, + .{ .rcr, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 3, .rex, .none }, + .{ .rcr, .m1, &.{ .rm16, .unity }, &.{ 0xd1 }, 3, .none, .none }, + .{ .rcr, .mc, &.{ .rm16, .cl }, &.{ 0xd3 }, 3, .none, .none }, + .{ .rcr, .mi, &.{ .rm16, .imm8 }, &.{ 0xc1 }, 3, .none, .none }, + .{ .rcr, .m1, &.{ .rm32, .unity }, &.{ 0xd1 }, 3, .none, .none }, + .{ .rcr, .m1, &.{ .rm64, .unity }, &.{ 0xd1 }, 3, .long, .none }, + .{ .rcr, .mc, &.{ .rm32, .cl }, &.{ 0xd3 }, 3, .none, .none }, + .{ .rcr, .mc, &.{ .rm64, .cl }, &.{ 0xd3 }, 3, .long, .none }, + .{ .rcr, .mi, &.{ .rm32, .imm8 }, &.{ 0xc1 }, 3, .none, .none }, + .{ .rcr, .mi, &.{ .rm64, .imm8 }, &.{ 0xc1 }, 3, .long, .none }, - .{ .rol, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 0, .none }, - .{ .rol, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 0, .rex }, - .{ .rol, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 0, .none }, - .{ .rol, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 0, .rex }, - .{ .rol, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 0, .none }, - .{ .rol, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 0, .rex }, - .{ .rol, .m1, &.{ .rm16, .unity }, &.{ 0xd1 }, 0, .none }, - .{ .rol, .mc, &.{ .rm16, .cl }, &.{ 0xd3 }, 0, .none }, - .{ .rol, .mi, &.{ .rm16, .imm8 }, &.{ 0xc1 }, 0, .none }, - .{ .rol, .m1, &.{ .rm32, .unity }, &.{ 0xd1 }, 0, .none }, - .{ .rol, .m1, &.{ .rm64, .unity }, &.{ 0xd1 }, 0, .long }, - .{ .rol, .mc, &.{ .rm32, .cl }, &.{ 0xd3 }, 0, .none }, - .{ .rol, .mc, &.{ .rm64, .cl }, &.{ 0xd3 }, 0, .long }, - .{ .rol, .mi, &.{ .rm32, .imm8 }, &.{ 0xc1 }, 0, .none }, - .{ .rol, .mi, &.{ .rm64, .imm8 }, &.{ 0xc1 }, 0, .long }, + .{ .rol, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 0, .none, .none }, + .{ .rol, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 0, .rex, .none }, + .{ .rol, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 0, .none, .none }, + .{ .rol, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 0, .rex, .none }, + .{ .rol, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 0, .none, .none }, + .{ .rol, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 0, .rex, .none }, + .{ .rol, .m1, &.{ .rm16, .unity }, &.{ 0xd1 }, 0, .none, .none }, + .{ .rol, .mc, &.{ .rm16, .cl }, &.{ 0xd3 }, 0, .none, .none }, + .{ .rol, .mi, &.{ .rm16, .imm8 }, &.{ 0xc1 }, 0, .none, .none }, + .{ .rol, .m1, &.{ .rm32, .unity }, &.{ 0xd1 }, 0, .none, .none }, + .{ .rol, .m1, &.{ .rm64, .unity }, &.{ 0xd1 }, 0, .long, .none }, + .{ .rol, .mc, &.{ .rm32, .cl }, &.{ 0xd3 }, 0, .none, .none }, + .{ .rol, .mc, &.{ .rm64, .cl }, &.{ 0xd3 }, 0, .long, .none }, + .{ .rol, .mi, &.{ .rm32, .imm8 }, &.{ 0xc1 }, 0, .none, .none }, + .{ .rol, .mi, &.{ .rm64, .imm8 }, &.{ 0xc1 }, 0, .long, .none }, - .{ .ror, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 1, .none }, - .{ .ror, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 1, .rex }, - .{ .ror, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 1, .none }, - .{ .ror, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 1, .rex }, - .{ .ror, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 1, .none }, - .{ .ror, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 1, .rex }, - .{ .ror, .m1, &.{ .rm16, .unity }, &.{ 0xd1 }, 1, .none }, - .{ .ror, .mc, &.{ .rm16, .cl }, &.{ 0xd3 }, 1, .none }, - .{ .ror, .mi, &.{ .rm16, .imm8 }, &.{ 0xc1 }, 1, .none }, - .{ .ror, .m1, &.{ .rm32, .unity }, &.{ 0xd1 }, 1, .none }, - .{ .ror, .m1, &.{ .rm64, .unity }, &.{ 0xd1 }, 1, .long }, - .{ .ror, .mc, &.{ .rm32, .cl }, &.{ 0xd3 }, 1, .none }, - .{ .ror, .mc, &.{ .rm64, .cl }, &.{ 0xd3 }, 1, .long }, - .{ .ror, .mi, &.{ .rm32, .imm8 }, &.{ 0xc1 }, 1, .none }, - .{ .ror, .mi, &.{ .rm64, .imm8 }, &.{ 0xc1 }, 1, .long }, + .{ .ror, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 1, .none, .none }, + .{ .ror, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 1, .rex, .none }, + .{ .ror, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 1, .none, .none }, + .{ .ror, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 1, .rex, .none }, + .{ .ror, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 1, .none, .none }, + .{ .ror, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 1, .rex, .none }, + .{ .ror, .m1, &.{ .rm16, .unity }, &.{ 0xd1 }, 1, .none, .none }, + .{ .ror, .mc, &.{ .rm16, .cl }, &.{ 0xd3 }, 1, .none, .none }, + .{ .ror, .mi, &.{ .rm16, .imm8 }, &.{ 0xc1 }, 1, .none, .none }, + .{ .ror, .m1, &.{ .rm32, .unity }, &.{ 0xd1 }, 1, .none, .none }, + .{ .ror, .m1, &.{ .rm64, .unity }, &.{ 0xd1 }, 1, .long, .none }, + .{ .ror, .mc, &.{ .rm32, .cl }, &.{ 0xd3 }, 1, .none, .none }, + .{ .ror, .mc, &.{ .rm64, .cl }, &.{ 0xd3 }, 1, .long, .none }, + .{ .ror, .mi, &.{ .rm32, .imm8 }, &.{ 0xc1 }, 1, .none, .none }, + .{ .ror, .mi, &.{ .rm64, .imm8 }, &.{ 0xc1 }, 1, .long, .none }, - .{ .sal, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 4, .none }, - .{ .sal, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 4, .rex }, - .{ .sal, .m1, &.{ .rm16, .unity }, &.{ 0xd1 }, 4, .none }, - .{ .sal, .m1, &.{ .rm32, .unity }, &.{ 0xd1 }, 4, .none }, - .{ .sal, .m1, &.{ .rm64, .unity }, &.{ 0xd1 }, 4, .long }, - .{ .sal, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 4, .none }, - .{ .sal, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 4, .rex }, - .{ .sal, .mc, &.{ .rm16, .cl }, &.{ 0xd3 }, 4, .none }, - .{ .sal, .mc, &.{ .rm32, .cl }, &.{ 0xd3 }, 4, .none }, - .{ .sal, .mc, &.{ .rm64, .cl }, &.{ 0xd3 }, 4, .long }, - .{ .sal, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 4, .none }, - .{ .sal, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 4, .rex }, - .{ .sal, .mi, &.{ .rm16, .imm8 }, &.{ 0xc1 }, 4, .none }, - .{ .sal, .mi, &.{ .rm32, .imm8 }, &.{ 0xc1 }, 4, .none }, - .{ .sal, .mi, &.{ .rm64, .imm8 }, &.{ 0xc1 }, 4, .long }, + .{ .sal, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 4, .none, .none }, + .{ .sal, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 4, .rex, .none }, + .{ .sal, .m1, &.{ .rm16, .unity }, &.{ 0xd1 }, 4, .none, .none }, + .{ .sal, .m1, &.{ .rm32, .unity }, &.{ 0xd1 }, 4, .none, .none }, + .{ .sal, .m1, &.{ .rm64, .unity }, &.{ 0xd1 }, 4, .long, .none }, + .{ .sal, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 4, .none, .none }, + .{ .sal, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 4, .rex, .none }, + .{ .sal, .mc, &.{ .rm16, .cl }, &.{ 0xd3 }, 4, .none, .none }, + .{ .sal, .mc, &.{ .rm32, .cl }, &.{ 0xd3 }, 4, .none, .none }, + .{ .sal, .mc, &.{ .rm64, .cl }, &.{ 0xd3 }, 4, .long, .none }, + .{ .sal, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 4, .none, .none }, + .{ .sal, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 4, .rex, .none }, + .{ .sal, .mi, &.{ .rm16, .imm8 }, &.{ 0xc1 }, 4, .none, .none }, + .{ .sal, .mi, &.{ .rm32, .imm8 }, &.{ 0xc1 }, 4, .none, .none }, + .{ .sal, .mi, &.{ .rm64, .imm8 }, &.{ 0xc1 }, 4, .long, .none }, - .{ .sar, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 7, .none }, - .{ .sar, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 7, .rex }, - .{ .sar, .m1, &.{ .rm16, .unity }, &.{ 0xd1 }, 7, .none }, - .{ .sar, .m1, &.{ .rm32, .unity }, &.{ 0xd1 }, 7, .none }, - .{ .sar, .m1, &.{ .rm64, .unity }, &.{ 0xd1 }, 7, .long }, - .{ .sar, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 7, .none }, - .{ .sar, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 7, .rex }, - .{ .sar, .mc, &.{ .rm16, .cl }, &.{ 0xd3 }, 7, .none }, - .{ .sar, .mc, &.{ .rm32, .cl }, &.{ 0xd3 }, 7, .none }, - .{ .sar, .mc, &.{ .rm64, .cl }, &.{ 0xd3 }, 7, .long }, - .{ .sar, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 7, .none }, - .{ .sar, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 7, .rex }, - .{ .sar, .mi, &.{ .rm16, .imm8 }, &.{ 0xc1 }, 7, .none }, - .{ .sar, .mi, &.{ .rm32, .imm8 }, &.{ 0xc1 }, 7, .none }, - .{ .sar, .mi, &.{ .rm64, .imm8 }, &.{ 0xc1 }, 7, .long }, + .{ .sar, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 7, .none, .none }, + .{ .sar, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 7, .rex, .none }, + .{ .sar, .m1, &.{ .rm16, .unity }, &.{ 0xd1 }, 7, .none, .none }, + .{ .sar, .m1, &.{ .rm32, .unity }, &.{ 0xd1 }, 7, .none, .none }, + .{ .sar, .m1, &.{ .rm64, .unity }, &.{ 0xd1 }, 7, .long, .none }, + .{ .sar, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 7, .none, .none }, + .{ .sar, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 7, .rex, .none }, + .{ .sar, .mc, &.{ .rm16, .cl }, &.{ 0xd3 }, 7, .none, .none }, + .{ .sar, .mc, &.{ .rm32, .cl }, &.{ 0xd3 }, 7, .none, .none }, + .{ .sar, .mc, &.{ .rm64, .cl }, &.{ 0xd3 }, 7, .long, .none }, + .{ .sar, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 7, .none, .none }, + .{ .sar, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 7, .rex, .none }, + .{ .sar, .mi, &.{ .rm16, .imm8 }, &.{ 0xc1 }, 7, .none, .none }, + .{ .sar, .mi, &.{ .rm32, .imm8 }, &.{ 0xc1 }, 7, .none, .none }, + .{ .sar, .mi, &.{ .rm64, .imm8 }, &.{ 0xc1 }, 7, .long, .none }, - .{ .sbb, .zi, &.{ .al, .imm8 }, &.{ 0x1c }, 0, .none }, - .{ .sbb, .zi, &.{ .ax, .imm16 }, &.{ 0x1d }, 0, .none }, - .{ .sbb, .zi, &.{ .eax, .imm32 }, &.{ 0x1d }, 0, .none }, - .{ .sbb, .zi, &.{ .rax, .imm32s }, &.{ 0x1d }, 0, .long }, - .{ .sbb, .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 3, .none }, - .{ .sbb, .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 3, .rex }, - .{ .sbb, .mi, &.{ .rm16, .imm16 }, &.{ 0x81 }, 3, .none }, - .{ .sbb, .mi, &.{ .rm32, .imm32 }, &.{ 0x81 }, 3, .none }, - .{ .sbb, .mi, &.{ .rm64, .imm32s }, &.{ 0x81 }, 3, .long }, - .{ .sbb, .mi, &.{ .rm16, .imm8s }, &.{ 0x83 }, 3, .none }, - .{ .sbb, .mi, &.{ .rm32, .imm8s }, &.{ 0x83 }, 3, .none }, - .{ .sbb, .mi, &.{ .rm64, .imm8s }, &.{ 0x83 }, 3, .long }, - .{ .sbb, .mr, &.{ .rm8, .r8 }, &.{ 0x18 }, 0, .none }, - .{ .sbb, .mr, &.{ .rm8, .r8 }, &.{ 0x18 }, 0, .rex }, - .{ .sbb, .mr, &.{ .rm16, .r16 }, &.{ 0x19 }, 0, .none }, - .{ .sbb, .mr, &.{ .rm32, .r32 }, &.{ 0x19 }, 0, .none }, - .{ .sbb, .mr, &.{ .rm64, .r64 }, &.{ 0x19 }, 0, .long }, - .{ .sbb, .rm, &.{ .r8, .rm8 }, &.{ 0x1a }, 0, .none }, - .{ .sbb, .rm, &.{ .r8, .rm8 }, &.{ 0x1a }, 0, .rex }, - .{ .sbb, .rm, &.{ .r16, .rm16 }, &.{ 0x1b }, 0, .none }, - .{ .sbb, .rm, &.{ .r32, .rm32 }, &.{ 0x1b }, 0, .none }, - .{ .sbb, .rm, &.{ .r64, .rm64 }, &.{ 0x1b }, 0, .long }, + .{ .sbb, .zi, &.{ .al, .imm8 }, &.{ 0x1c }, 0, .none, .none }, + .{ .sbb, .zi, &.{ .ax, .imm16 }, &.{ 0x1d }, 0, .none, .none }, + .{ .sbb, .zi, &.{ .eax, .imm32 }, &.{ 0x1d }, 0, .none, .none }, + .{ .sbb, .zi, &.{ .rax, .imm32s }, &.{ 0x1d }, 0, .long, .none }, + .{ .sbb, .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 3, .none, .none }, + .{ .sbb, .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 3, .rex, .none }, + .{ .sbb, .mi, &.{ .rm16, .imm16 }, &.{ 0x81 }, 3, .none, .none }, + .{ .sbb, .mi, &.{ .rm32, .imm32 }, &.{ 0x81 }, 3, .none, .none }, + .{ .sbb, .mi, &.{ .rm64, .imm32s }, &.{ 0x81 }, 3, .long, .none }, + .{ .sbb, .mi, &.{ .rm16, .imm8s }, &.{ 0x83 }, 3, .none, .none }, + .{ .sbb, .mi, &.{ .rm32, .imm8s }, &.{ 0x83 }, 3, .none, .none }, + .{ .sbb, .mi, &.{ .rm64, .imm8s }, &.{ 0x83 }, 3, .long, .none }, + .{ .sbb, .mr, &.{ .rm8, .r8 }, &.{ 0x18 }, 0, .none, .none }, + .{ .sbb, .mr, &.{ .rm8, .r8 }, &.{ 0x18 }, 0, .rex, .none }, + .{ .sbb, .mr, &.{ .rm16, .r16 }, &.{ 0x19 }, 0, .none, .none }, + .{ .sbb, .mr, &.{ .rm32, .r32 }, &.{ 0x19 }, 0, .none, .none }, + .{ .sbb, .mr, &.{ .rm64, .r64 }, &.{ 0x19 }, 0, .long, .none }, + .{ .sbb, .rm, &.{ .r8, .rm8 }, &.{ 0x1a }, 0, .none, .none }, + .{ .sbb, .rm, &.{ .r8, .rm8 }, &.{ 0x1a }, 0, .rex, .none }, + .{ .sbb, .rm, &.{ .r16, .rm16 }, &.{ 0x1b }, 0, .none, .none }, + .{ .sbb, .rm, &.{ .r32, .rm32 }, &.{ 0x1b }, 0, .none, .none }, + .{ .sbb, .rm, &.{ .r64, .rm64 }, &.{ 0x1b }, 0, .long, .none }, - .{ .scas, .np, &.{ .m8 }, &.{ 0xae }, 0, .none }, - .{ .scas, .np, &.{ .m16 }, &.{ 0xaf }, 0, .none }, - .{ .scas, .np, &.{ .m32 }, &.{ 0xaf }, 0, .none }, - .{ .scas, .np, &.{ .m64 }, &.{ 0xaf }, 0, .long }, + .{ .scas, .np, &.{ .m8 }, &.{ 0xae }, 0, .none, .none }, + .{ .scas, .np, &.{ .m16 }, &.{ 0xaf }, 0, .none, .none }, + .{ .scas, .np, &.{ .m32 }, &.{ 0xaf }, 0, .none, .none }, + .{ .scas, .np, &.{ .m64 }, &.{ 0xaf }, 0, .long, .none }, - .{ .scasb, .np, &.{}, &.{ 0xae }, 0, .none }, - .{ .scasw, .np, &.{}, &.{ 0xaf }, 0, .short }, - .{ .scasd, .np, &.{}, &.{ 0xaf }, 0, .none }, - .{ .scasq, .np, &.{}, &.{ 0xaf }, 0, .long }, + .{ .scasb, .np, &.{}, &.{ 0xae }, 0, .none, .none }, + .{ .scasw, .np, &.{}, &.{ 0xaf }, 0, .short, .none }, + .{ .scasd, .np, &.{}, &.{ 0xaf }, 0, .none, .none }, + .{ .scasq, .np, &.{}, &.{ 0xaf }, 0, .long, .none }, - .{ .seta, .m, &.{ .rm8 }, &.{ 0x0f, 0x97 }, 0, .none }, - .{ .seta, .m, &.{ .rm8 }, &.{ 0x0f, 0x97 }, 0, .rex }, - .{ .setae, .m, &.{ .rm8 }, &.{ 0x0f, 0x93 }, 0, .none }, - .{ .setae, .m, &.{ .rm8 }, &.{ 0x0f, 0x93 }, 0, .rex }, - .{ .setb, .m, &.{ .rm8 }, &.{ 0x0f, 0x92 }, 0, .none }, - .{ .setb, .m, &.{ .rm8 }, &.{ 0x0f, 0x92 }, 0, .rex }, - .{ .setbe, .m, &.{ .rm8 }, &.{ 0x0f, 0x96 }, 0, .none }, - .{ .setbe, .m, &.{ .rm8 }, &.{ 0x0f, 0x96 }, 0, .rex }, - .{ .setc, .m, &.{ .rm8 }, &.{ 0x0f, 0x92 }, 0, .none }, - .{ .setc, .m, &.{ .rm8 }, &.{ 0x0f, 0x92 }, 0, .rex }, - .{ .sete, .m, &.{ .rm8 }, &.{ 0x0f, 0x94 }, 0, .none }, - .{ .sete, .m, &.{ .rm8 }, &.{ 0x0f, 0x94 }, 0, .rex }, - .{ .setg, .m, &.{ .rm8 }, &.{ 0x0f, 0x9f }, 0, .none }, - .{ .setg, .m, &.{ .rm8 }, &.{ 0x0f, 0x9f }, 0, .rex }, - .{ .setge, .m, &.{ .rm8 }, &.{ 0x0f, 0x9d }, 0, .none }, - .{ .setge, .m, &.{ .rm8 }, &.{ 0x0f, 0x9d }, 0, .rex }, - .{ .setl, .m, &.{ .rm8 }, &.{ 0x0f, 0x9c }, 0, .none }, - .{ .setl, .m, &.{ .rm8 }, &.{ 0x0f, 0x9c }, 0, .rex }, - .{ .setle, .m, &.{ .rm8 }, &.{ 0x0f, 0x9e }, 0, .none }, - .{ .setle, .m, &.{ .rm8 }, &.{ 0x0f, 0x9e }, 0, .rex }, - .{ .setna, .m, &.{ .rm8 }, &.{ 0x0f, 0x96 }, 0, .none }, - .{ .setna, .m, &.{ .rm8 }, &.{ 0x0f, 0x96 }, 0, .rex }, - .{ .setnae, .m, &.{ .rm8 }, &.{ 0x0f, 0x92 }, 0, .none }, - .{ .setnae, .m, &.{ .rm8 }, &.{ 0x0f, 0x92 }, 0, .rex }, - .{ .setnb, .m, &.{ .rm8 }, &.{ 0x0f, 0x93 }, 0, .none }, - .{ .setnb, .m, &.{ .rm8 }, &.{ 0x0f, 0x93 }, 0, .rex }, - .{ .setnbe, .m, &.{ .rm8 }, &.{ 0x0f, 0x97 }, 0, .none }, - .{ .setnbe, .m, &.{ .rm8 }, &.{ 0x0f, 0x97 }, 0, .rex }, - .{ .setnc, .m, &.{ .rm8 }, &.{ 0x0f, 0x93 }, 0, .none }, - .{ .setnc, .m, &.{ .rm8 }, &.{ 0x0f, 0x93 }, 0, .rex }, - .{ .setne, .m, &.{ .rm8 }, &.{ 0x0f, 0x95 }, 0, .none }, - .{ .setne, .m, &.{ .rm8 }, &.{ 0x0f, 0x95 }, 0, .rex }, - .{ .setng, .m, &.{ .rm8 }, &.{ 0x0f, 0x9e }, 0, .none }, - .{ .setng, .m, &.{ .rm8 }, &.{ 0x0f, 0x9e }, 0, .rex }, - .{ .setnge, .m, &.{ .rm8 }, &.{ 0x0f, 0x9c }, 0, .none }, - .{ .setnge, .m, &.{ .rm8 }, &.{ 0x0f, 0x9c }, 0, .rex }, - .{ .setnl, .m, &.{ .rm8 }, &.{ 0x0f, 0x9d }, 0, .none }, - .{ .setnl, .m, &.{ .rm8 }, &.{ 0x0f, 0x9d }, 0, .rex }, - .{ .setnle, .m, &.{ .rm8 }, &.{ 0x0f, 0x9f }, 0, .none }, - .{ .setnle, .m, &.{ .rm8 }, &.{ 0x0f, 0x9f }, 0, .rex }, - .{ .setno, .m, &.{ .rm8 }, &.{ 0x0f, 0x91 }, 0, .none }, - .{ .setno, .m, &.{ .rm8 }, &.{ 0x0f, 0x91 }, 0, .rex }, - .{ .setnp, .m, &.{ .rm8 }, &.{ 0x0f, 0x9b }, 0, .none }, - .{ .setnp, .m, &.{ .rm8 }, &.{ 0x0f, 0x9b }, 0, .rex }, - .{ .setns, .m, &.{ .rm8 }, &.{ 0x0f, 0x99 }, 0, .none }, - .{ .setns, .m, &.{ .rm8 }, &.{ 0x0f, 0x99 }, 0, .rex }, - .{ .setnz, .m, &.{ .rm8 }, &.{ 0x0f, 0x95 }, 0, .none }, - .{ .setnz, .m, &.{ .rm8 }, &.{ 0x0f, 0x95 }, 0, .rex }, - .{ .seto, .m, &.{ .rm8 }, &.{ 0x0f, 0x90 }, 0, .none }, - .{ .seto, .m, &.{ .rm8 }, &.{ 0x0f, 0x90 }, 0, .rex }, - .{ .setp, .m, &.{ .rm8 }, &.{ 0x0f, 0x9a }, 0, .none }, - .{ .setp, .m, &.{ .rm8 }, &.{ 0x0f, 0x9a }, 0, .rex }, - .{ .setpe, .m, &.{ .rm8 }, &.{ 0x0f, 0x9a }, 0, .none }, - .{ .setpe, .m, &.{ .rm8 }, &.{ 0x0f, 0x9a }, 0, .rex }, - .{ .setpo, .m, &.{ .rm8 }, &.{ 0x0f, 0x9b }, 0, .none }, - .{ .setpo, .m, &.{ .rm8 }, &.{ 0x0f, 0x9b }, 0, .rex }, - .{ .sets, .m, &.{ .rm8 }, &.{ 0x0f, 0x98 }, 0, .none }, - .{ .sets, .m, &.{ .rm8 }, &.{ 0x0f, 0x98 }, 0, .rex }, - .{ .setz, .m, &.{ .rm8 }, &.{ 0x0f, 0x94 }, 0, .none }, - .{ .setz, .m, &.{ .rm8 }, &.{ 0x0f, 0x94 }, 0, .rex }, + .{ .seta, .m, &.{ .rm8 }, &.{ 0x0f, 0x97 }, 0, .none, .none }, + .{ .seta, .m, &.{ .rm8 }, &.{ 0x0f, 0x97 }, 0, .rex, .none }, + .{ .setae, .m, &.{ .rm8 }, &.{ 0x0f, 0x93 }, 0, .none, .none }, + .{ .setae, .m, &.{ .rm8 }, &.{ 0x0f, 0x93 }, 0, .rex, .none }, + .{ .setb, .m, &.{ .rm8 }, &.{ 0x0f, 0x92 }, 0, .none, .none }, + .{ .setb, .m, &.{ .rm8 }, &.{ 0x0f, 0x92 }, 0, .rex, .none }, + .{ .setbe, .m, &.{ .rm8 }, &.{ 0x0f, 0x96 }, 0, .none, .none }, + .{ .setbe, .m, &.{ .rm8 }, &.{ 0x0f, 0x96 }, 0, .rex, .none }, + .{ .setc, .m, &.{ .rm8 }, &.{ 0x0f, 0x92 }, 0, .none, .none }, + .{ .setc, .m, &.{ .rm8 }, &.{ 0x0f, 0x92 }, 0, .rex, .none }, + .{ .sete, .m, &.{ .rm8 }, &.{ 0x0f, 0x94 }, 0, .none, .none }, + .{ .sete, .m, &.{ .rm8 }, &.{ 0x0f, 0x94 }, 0, .rex, .none }, + .{ .setg, .m, &.{ .rm8 }, &.{ 0x0f, 0x9f }, 0, .none, .none }, + .{ .setg, .m, &.{ .rm8 }, &.{ 0x0f, 0x9f }, 0, .rex, .none }, + .{ .setge, .m, &.{ .rm8 }, &.{ 0x0f, 0x9d }, 0, .none, .none }, + .{ .setge, .m, &.{ .rm8 }, &.{ 0x0f, 0x9d }, 0, .rex, .none }, + .{ .setl, .m, &.{ .rm8 }, &.{ 0x0f, 0x9c }, 0, .none, .none }, + .{ .setl, .m, &.{ .rm8 }, &.{ 0x0f, 0x9c }, 0, .rex, .none }, + .{ .setle, .m, &.{ .rm8 }, &.{ 0x0f, 0x9e }, 0, .none, .none }, + .{ .setle, .m, &.{ .rm8 }, &.{ 0x0f, 0x9e }, 0, .rex, .none }, + .{ .setna, .m, &.{ .rm8 }, &.{ 0x0f, 0x96 }, 0, .none, .none }, + .{ .setna, .m, &.{ .rm8 }, &.{ 0x0f, 0x96 }, 0, .rex, .none }, + .{ .setnae, .m, &.{ .rm8 }, &.{ 0x0f, 0x92 }, 0, .none, .none }, + .{ .setnae, .m, &.{ .rm8 }, &.{ 0x0f, 0x92 }, 0, .rex, .none }, + .{ .setnb, .m, &.{ .rm8 }, &.{ 0x0f, 0x93 }, 0, .none, .none }, + .{ .setnb, .m, &.{ .rm8 }, &.{ 0x0f, 0x93 }, 0, .rex, .none }, + .{ .setnbe, .m, &.{ .rm8 }, &.{ 0x0f, 0x97 }, 0, .none, .none }, + .{ .setnbe, .m, &.{ .rm8 }, &.{ 0x0f, 0x97 }, 0, .rex, .none }, + .{ .setnc, .m, &.{ .rm8 }, &.{ 0x0f, 0x93 }, 0, .none, .none }, + .{ .setnc, .m, &.{ .rm8 }, &.{ 0x0f, 0x93 }, 0, .rex, .none }, + .{ .setne, .m, &.{ .rm8 }, &.{ 0x0f, 0x95 }, 0, .none, .none }, + .{ .setne, .m, &.{ .rm8 }, &.{ 0x0f, 0x95 }, 0, .rex, .none }, + .{ .setng, .m, &.{ .rm8 }, &.{ 0x0f, 0x9e }, 0, .none, .none }, + .{ .setng, .m, &.{ .rm8 }, &.{ 0x0f, 0x9e }, 0, .rex, .none }, + .{ .setnge, .m, &.{ .rm8 }, &.{ 0x0f, 0x9c }, 0, .none, .none }, + .{ .setnge, .m, &.{ .rm8 }, &.{ 0x0f, 0x9c }, 0, .rex, .none }, + .{ .setnl, .m, &.{ .rm8 }, &.{ 0x0f, 0x9d }, 0, .none, .none }, + .{ .setnl, .m, &.{ .rm8 }, &.{ 0x0f, 0x9d }, 0, .rex, .none }, + .{ .setnle, .m, &.{ .rm8 }, &.{ 0x0f, 0x9f }, 0, .none, .none }, + .{ .setnle, .m, &.{ .rm8 }, &.{ 0x0f, 0x9f }, 0, .rex, .none }, + .{ .setno, .m, &.{ .rm8 }, &.{ 0x0f, 0x91 }, 0, .none, .none }, + .{ .setno, .m, &.{ .rm8 }, &.{ 0x0f, 0x91 }, 0, .rex, .none }, + .{ .setnp, .m, &.{ .rm8 }, &.{ 0x0f, 0x9b }, 0, .none, .none }, + .{ .setnp, .m, &.{ .rm8 }, &.{ 0x0f, 0x9b }, 0, .rex, .none }, + .{ .setns, .m, &.{ .rm8 }, &.{ 0x0f, 0x99 }, 0, .none, .none }, + .{ .setns, .m, &.{ .rm8 }, &.{ 0x0f, 0x99 }, 0, .rex, .none }, + .{ .setnz, .m, &.{ .rm8 }, &.{ 0x0f, 0x95 }, 0, .none, .none }, + .{ .setnz, .m, &.{ .rm8 }, &.{ 0x0f, 0x95 }, 0, .rex, .none }, + .{ .seto, .m, &.{ .rm8 }, &.{ 0x0f, 0x90 }, 0, .none, .none }, + .{ .seto, .m, &.{ .rm8 }, &.{ 0x0f, 0x90 }, 0, .rex, .none }, + .{ .setp, .m, &.{ .rm8 }, &.{ 0x0f, 0x9a }, 0, .none, .none }, + .{ .setp, .m, &.{ .rm8 }, &.{ 0x0f, 0x9a }, 0, .rex, .none }, + .{ .setpe, .m, &.{ .rm8 }, &.{ 0x0f, 0x9a }, 0, .none, .none }, + .{ .setpe, .m, &.{ .rm8 }, &.{ 0x0f, 0x9a }, 0, .rex, .none }, + .{ .setpo, .m, &.{ .rm8 }, &.{ 0x0f, 0x9b }, 0, .none, .none }, + .{ .setpo, .m, &.{ .rm8 }, &.{ 0x0f, 0x9b }, 0, .rex, .none }, + .{ .sets, .m, &.{ .rm8 }, &.{ 0x0f, 0x98 }, 0, .none, .none }, + .{ .sets, .m, &.{ .rm8 }, &.{ 0x0f, 0x98 }, 0, .rex, .none }, + .{ .setz, .m, &.{ .rm8 }, &.{ 0x0f, 0x94 }, 0, .none, .none }, + .{ .setz, .m, &.{ .rm8 }, &.{ 0x0f, 0x94 }, 0, .rex, .none }, - .{ .sfence, .np, &.{}, &.{ 0x0f, 0xae, 0xf8 }, 0, .none }, + .{ .sfence, .np, &.{}, &.{ 0x0f, 0xae, 0xf8 }, 0, .none, .none }, - .{ .shl, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 4, .none }, - .{ .shl, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 4, .rex }, - .{ .shl, .m1, &.{ .rm16, .unity }, &.{ 0xd1 }, 4, .none }, - .{ .shl, .m1, &.{ .rm32, .unity }, &.{ 0xd1 }, 4, .none }, - .{ .shl, .m1, &.{ .rm64, .unity }, &.{ 0xd1 }, 4, .long }, - .{ .shl, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 4, .none }, - .{ .shl, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 4, .rex }, - .{ .shl, .mc, &.{ .rm16, .cl }, &.{ 0xd3 }, 4, .none }, - .{ .shl, .mc, &.{ .rm32, .cl }, &.{ 0xd3 }, 4, .none }, - .{ .shl, .mc, &.{ .rm64, .cl }, &.{ 0xd3 }, 4, .long }, - .{ .shl, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 4, .none }, - .{ .shl, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 4, .rex }, - .{ .shl, .mi, &.{ .rm16, .imm8 }, &.{ 0xc1 }, 4, .none }, - .{ .shl, .mi, &.{ .rm32, .imm8 }, &.{ 0xc1 }, 4, .none }, - .{ .shl, .mi, &.{ .rm64, .imm8 }, &.{ 0xc1 }, 4, .long }, + .{ .shl, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 4, .none, .none }, + .{ .shl, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 4, .rex, .none }, + .{ .shl, .m1, &.{ .rm16, .unity }, &.{ 0xd1 }, 4, .none, .none }, + .{ .shl, .m1, &.{ .rm32, .unity }, &.{ 0xd1 }, 4, .none, .none }, + .{ .shl, .m1, &.{ .rm64, .unity }, &.{ 0xd1 }, 4, .long, .none }, + .{ .shl, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 4, .none, .none }, + .{ .shl, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 4, .rex, .none }, + .{ .shl, .mc, &.{ .rm16, .cl }, &.{ 0xd3 }, 4, .none, .none }, + .{ .shl, .mc, &.{ .rm32, .cl }, &.{ 0xd3 }, 4, .none, .none }, + .{ .shl, .mc, &.{ .rm64, .cl }, &.{ 0xd3 }, 4, .long, .none }, + .{ .shl, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 4, .none, .none }, + .{ .shl, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 4, .rex, .none }, + .{ .shl, .mi, &.{ .rm16, .imm8 }, &.{ 0xc1 }, 4, .none, .none }, + .{ .shl, .mi, &.{ .rm32, .imm8 }, &.{ 0xc1 }, 4, .none, .none }, + .{ .shl, .mi, &.{ .rm64, .imm8 }, &.{ 0xc1 }, 4, .long, .none }, - .{ .shld, .mri, &.{ .rm16, .r16, .imm8 }, &.{ 0x0f, 0xa4 }, 0, .none }, - .{ .shld, .mrc, &.{ .rm16, .r16, .cl }, &.{ 0x0f, 0xa5 }, 0, .none }, - .{ .shld, .mri, &.{ .rm32, .r32, .imm8 }, &.{ 0x0f, 0xa4 }, 0, .none }, - .{ .shld, .mri, &.{ .rm64, .r64, .imm8 }, &.{ 0x0f, 0xa4 }, 0, .long }, - .{ .shld, .mrc, &.{ .rm32, .r32, .cl }, &.{ 0x0f, 0xa5 }, 0, .none }, - .{ .shld, .mrc, &.{ .rm64, .r64, .cl }, &.{ 0x0f, 0xa5 }, 0, .long }, + .{ .shld, .mri, &.{ .rm16, .r16, .imm8 }, &.{ 0x0f, 0xa4 }, 0, .none, .none }, + .{ .shld, .mrc, &.{ .rm16, .r16, .cl }, &.{ 0x0f, 0xa5 }, 0, .none, .none }, + .{ .shld, .mri, &.{ .rm32, .r32, .imm8 }, &.{ 0x0f, 0xa4 }, 0, .none, .none }, + .{ .shld, .mri, &.{ .rm64, .r64, .imm8 }, &.{ 0x0f, 0xa4 }, 0, .long, .none }, + .{ .shld, .mrc, &.{ .rm32, .r32, .cl }, &.{ 0x0f, 0xa5 }, 0, .none, .none }, + .{ .shld, .mrc, &.{ .rm64, .r64, .cl }, &.{ 0x0f, 0xa5 }, 0, .long, .none }, - .{ .shr, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 5, .none }, - .{ .shr, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 5, .rex }, - .{ .shr, .m1, &.{ .rm16, .unity }, &.{ 0xd1 }, 5, .none }, - .{ .shr, .m1, &.{ .rm32, .unity }, &.{ 0xd1 }, 5, .none }, - .{ .shr, .m1, &.{ .rm64, .unity }, &.{ 0xd1 }, 5, .long }, - .{ .shr, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 5, .none }, - .{ .shr, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 5, .rex }, - .{ .shr, .mc, &.{ .rm16, .cl }, &.{ 0xd3 }, 5, .none }, - .{ .shr, .mc, &.{ .rm32, .cl }, &.{ 0xd3 }, 5, .none }, - .{ .shr, .mc, &.{ .rm64, .cl }, &.{ 0xd3 }, 5, .long }, - .{ .shr, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 5, .none }, - .{ .shr, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 5, .rex }, - .{ .shr, .mi, &.{ .rm16, .imm8 }, &.{ 0xc1 }, 5, .none }, - .{ .shr, .mi, &.{ .rm32, .imm8 }, &.{ 0xc1 }, 5, .none }, - .{ .shr, .mi, &.{ .rm64, .imm8 }, &.{ 0xc1 }, 5, .long }, + .{ .shr, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 5, .none, .none }, + .{ .shr, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 5, .rex, .none }, + .{ .shr, .m1, &.{ .rm16, .unity }, &.{ 0xd1 }, 5, .none, .none }, + .{ .shr, .m1, &.{ .rm32, .unity }, &.{ 0xd1 }, 5, .none, .none }, + .{ .shr, .m1, &.{ .rm64, .unity }, &.{ 0xd1 }, 5, .long, .none }, + .{ .shr, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 5, .none, .none }, + .{ .shr, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 5, .rex, .none }, + .{ .shr, .mc, &.{ .rm16, .cl }, &.{ 0xd3 }, 5, .none, .none }, + .{ .shr, .mc, &.{ .rm32, .cl }, &.{ 0xd3 }, 5, .none, .none }, + .{ .shr, .mc, &.{ .rm64, .cl }, &.{ 0xd3 }, 5, .long, .none }, + .{ .shr, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 5, .none, .none }, + .{ .shr, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 5, .rex, .none }, + .{ .shr, .mi, &.{ .rm16, .imm8 }, &.{ 0xc1 }, 5, .none, .none }, + .{ .shr, .mi, &.{ .rm32, .imm8 }, &.{ 0xc1 }, 5, .none, .none }, + .{ .shr, .mi, &.{ .rm64, .imm8 }, &.{ 0xc1 }, 5, .long, .none }, - .{ .shrd, .mri, &.{ .rm16, .r16, .imm8 }, &.{ 0x0f, 0xac }, 0, .none }, - .{ .shrd, .mrc, &.{ .rm16, .r16, .cl }, &.{ 0x0f, 0xad }, 0, .none }, - .{ .shrd, .mri, &.{ .rm32, .r32, .imm8 }, &.{ 0x0f, 0xac }, 0, .none }, - .{ .shrd, .mri, &.{ .rm64, .r64, .imm8 }, &.{ 0x0f, 0xac }, 0, .long }, - .{ .shrd, .mrc, &.{ .rm32, .r32, .cl }, &.{ 0x0f, 0xad }, 0, .none }, - .{ .shrd, .mrc, &.{ .rm64, .r64, .cl }, &.{ 0x0f, 0xad }, 0, .long }, + .{ .shrd, .mri, &.{ .rm16, .r16, .imm8 }, &.{ 0x0f, 0xac }, 0, .none, .none }, + .{ .shrd, .mrc, &.{ .rm16, .r16, .cl }, &.{ 0x0f, 0xad }, 0, .none, .none }, + .{ .shrd, .mri, &.{ .rm32, .r32, .imm8 }, &.{ 0x0f, 0xac }, 0, .none, .none }, + .{ .shrd, .mri, &.{ .rm64, .r64, .imm8 }, &.{ 0x0f, 0xac }, 0, .long, .none }, + .{ .shrd, .mrc, &.{ .rm32, .r32, .cl }, &.{ 0x0f, 0xad }, 0, .none, .none }, + .{ .shrd, .mrc, &.{ .rm64, .r64, .cl }, &.{ 0x0f, 0xad }, 0, .long, .none }, - .{ .stos, .np, &.{ .m8 }, &.{ 0xaa }, 0, .none }, - .{ .stos, .np, &.{ .m16 }, &.{ 0xab }, 0, .none }, - .{ .stos, .np, &.{ .m32 }, &.{ 0xab }, 0, .none }, - .{ .stos, .np, &.{ .m64 }, &.{ 0xab }, 0, .long }, + .{ .stos, .np, &.{ .m8 }, &.{ 0xaa }, 0, .none, .none }, + .{ .stos, .np, &.{ .m16 }, &.{ 0xab }, 0, .none, .none }, + .{ .stos, .np, &.{ .m32 }, &.{ 0xab }, 0, .none, .none }, + .{ .stos, .np, &.{ .m64 }, &.{ 0xab }, 0, .long, .none }, - .{ .stosb, .np, &.{}, &.{ 0xaa }, 0, .none }, - .{ .stosw, .np, &.{}, &.{ 0xab }, 0, .short }, - .{ .stosd, .np, &.{}, &.{ 0xab }, 0, .none }, - .{ .stosq, .np, &.{}, &.{ 0xab }, 0, .long }, + .{ .stosb, .np, &.{}, &.{ 0xaa }, 0, .none, .none }, + .{ .stosw, .np, &.{}, &.{ 0xab }, 0, .short, .none }, + .{ .stosd, .np, &.{}, &.{ 0xab }, 0, .none, .none }, + .{ .stosq, .np, &.{}, &.{ 0xab }, 0, .long, .none }, - .{ .sub, .zi, &.{ .al, .imm8 }, &.{ 0x2c }, 0, .none }, - .{ .sub, .zi, &.{ .ax, .imm16 }, &.{ 0x2d }, 0, .none }, - .{ .sub, .zi, &.{ .eax, .imm32 }, &.{ 0x2d }, 0, .none }, - .{ .sub, .zi, &.{ .rax, .imm32s }, &.{ 0x2d }, 0, .long }, - .{ .sub, .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 5, .none }, - .{ .sub, .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 5, .rex }, - .{ .sub, .mi, &.{ .rm16, .imm16 }, &.{ 0x81 }, 5, .none }, - .{ .sub, .mi, &.{ .rm32, .imm32 }, &.{ 0x81 }, 5, .none }, - .{ .sub, .mi, &.{ .rm64, .imm32s }, &.{ 0x81 }, 5, .long }, - .{ .sub, .mi, &.{ .rm16, .imm8s }, &.{ 0x83 }, 5, .none }, - .{ .sub, .mi, &.{ .rm32, .imm8s }, &.{ 0x83 }, 5, .none }, - .{ .sub, .mi, &.{ .rm64, .imm8s }, &.{ 0x83 }, 5, .long }, - .{ .sub, .mr, &.{ .rm8, .r8 }, &.{ 0x28 }, 0, .none }, - .{ .sub, .mr, &.{ .rm8, .r8 }, &.{ 0x28 }, 0, .rex }, - .{ .sub, .mr, &.{ .rm16, .r16 }, &.{ 0x29 }, 0, .none }, - .{ .sub, .mr, &.{ .rm32, .r32 }, &.{ 0x29 }, 0, .none }, - .{ .sub, .mr, &.{ .rm64, .r64 }, &.{ 0x29 }, 0, .long }, - .{ .sub, .rm, &.{ .r8, .rm8 }, &.{ 0x2a }, 0, .none }, - .{ .sub, .rm, &.{ .r8, .rm8 }, &.{ 0x2a }, 0, .rex }, - .{ .sub, .rm, &.{ .r16, .rm16 }, &.{ 0x2b }, 0, .none }, - .{ .sub, .rm, &.{ .r32, .rm32 }, &.{ 0x2b }, 0, .none }, - .{ .sub, .rm, &.{ .r64, .rm64 }, &.{ 0x2b }, 0, .long }, + .{ .sub, .zi, &.{ .al, .imm8 }, &.{ 0x2c }, 0, .none, .none }, + .{ .sub, .zi, &.{ .ax, .imm16 }, &.{ 0x2d }, 0, .none, .none }, + .{ .sub, .zi, &.{ .eax, .imm32 }, &.{ 0x2d }, 0, .none, .none }, + .{ .sub, .zi, &.{ .rax, .imm32s }, &.{ 0x2d }, 0, .long, .none }, + .{ .sub, .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 5, .none, .none }, + .{ .sub, .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 5, .rex, .none }, + .{ .sub, .mi, &.{ .rm16, .imm16 }, &.{ 0x81 }, 5, .none, .none }, + .{ .sub, .mi, &.{ .rm32, .imm32 }, &.{ 0x81 }, 5, .none, .none }, + .{ .sub, .mi, &.{ .rm64, .imm32s }, &.{ 0x81 }, 5, .long, .none }, + .{ .sub, .mi, &.{ .rm16, .imm8s }, &.{ 0x83 }, 5, .none, .none }, + .{ .sub, .mi, &.{ .rm32, .imm8s }, &.{ 0x83 }, 5, .none, .none }, + .{ .sub, .mi, &.{ .rm64, .imm8s }, &.{ 0x83 }, 5, .long, .none }, + .{ .sub, .mr, &.{ .rm8, .r8 }, &.{ 0x28 }, 0, .none, .none }, + .{ .sub, .mr, &.{ .rm8, .r8 }, &.{ 0x28 }, 0, .rex, .none }, + .{ .sub, .mr, &.{ .rm16, .r16 }, &.{ 0x29 }, 0, .none, .none }, + .{ .sub, .mr, &.{ .rm32, .r32 }, &.{ 0x29 }, 0, .none, .none }, + .{ .sub, .mr, &.{ .rm64, .r64 }, &.{ 0x29 }, 0, .long, .none }, + .{ .sub, .rm, &.{ .r8, .rm8 }, &.{ 0x2a }, 0, .none, .none }, + .{ .sub, .rm, &.{ .r8, .rm8 }, &.{ 0x2a }, 0, .rex, .none }, + .{ .sub, .rm, &.{ .r16, .rm16 }, &.{ 0x2b }, 0, .none, .none }, + .{ .sub, .rm, &.{ .r32, .rm32 }, &.{ 0x2b }, 0, .none, .none }, + .{ .sub, .rm, &.{ .r64, .rm64 }, &.{ 0x2b }, 0, .long, .none }, - .{ .syscall, .np, &.{}, &.{ 0x0f, 0x05 }, 0, .none } -, - .{ .@"test", .zi, &.{ .al, .imm8 }, &.{ 0xa8 }, 0, .none }, - .{ .@"test", .zi, &.{ .ax, .imm16 }, &.{ 0xa9 }, 0, .none }, - .{ .@"test", .zi, &.{ .eax, .imm32 }, &.{ 0xa9 }, 0, .none }, - .{ .@"test", .zi, &.{ .rax, .imm32s }, &.{ 0xa9 }, 0, .long }, - .{ .@"test", .mi, &.{ .rm8, .imm8 }, &.{ 0xf6 }, 0, .none }, - .{ .@"test", .mi, &.{ .rm8, .imm8 }, &.{ 0xf6 }, 0, .rex }, - .{ .@"test", .mi, &.{ .rm16, .imm16 }, &.{ 0xf7 }, 0, .none }, - .{ .@"test", .mi, &.{ .rm32, .imm32 }, &.{ 0xf7 }, 0, .none }, - .{ .@"test", .mi, &.{ .rm64, .imm32s }, &.{ 0xf7 }, 0, .long }, - .{ .@"test", .mr, &.{ .rm8, .r8 }, &.{ 0x84 }, 0, .none }, - .{ .@"test", .mr, &.{ .rm8, .r8 }, &.{ 0x84 }, 0, .rex }, - .{ .@"test", .mr, &.{ .rm16, .r16 }, &.{ 0x85 }, 0, .none }, - .{ .@"test", .mr, &.{ .rm32, .r32 }, &.{ 0x85 }, 0, .none }, - .{ .@"test", .mr, &.{ .rm64, .r64 }, &.{ 0x85 }, 0, .long }, + .{ .syscall, .np, &.{}, &.{ 0x0f, 0x05 }, 0, .none, .none }, - .{ .tzcnt, .rm, &.{ .r16, .rm16 }, &.{ 0xf3, 0x0f, 0xbc }, 0, .none }, - .{ .tzcnt, .rm, &.{ .r32, .rm32 }, &.{ 0xf3, 0x0f, 0xbc }, 0, .none }, - .{ .tzcnt, .rm, &.{ .r64, .rm64 }, &.{ 0xf3, 0x0f, 0xbc }, 0, .long }, + .{ .@"test", .zi, &.{ .al, .imm8 }, &.{ 0xa8 }, 0, .none, .none }, + .{ .@"test", .zi, &.{ .ax, .imm16 }, &.{ 0xa9 }, 0, .none, .none }, + .{ .@"test", .zi, &.{ .eax, .imm32 }, &.{ 0xa9 }, 0, .none, .none }, + .{ .@"test", .zi, &.{ .rax, .imm32s }, &.{ 0xa9 }, 0, .long, .none }, + .{ .@"test", .mi, &.{ .rm8, .imm8 }, &.{ 0xf6 }, 0, .none, .none }, + .{ .@"test", .mi, &.{ .rm8, .imm8 }, &.{ 0xf6 }, 0, .rex, .none }, + .{ .@"test", .mi, &.{ .rm16, .imm16 }, &.{ 0xf7 }, 0, .none, .none }, + .{ .@"test", .mi, &.{ .rm32, .imm32 }, &.{ 0xf7 }, 0, .none, .none }, + .{ .@"test", .mi, &.{ .rm64, .imm32s }, &.{ 0xf7 }, 0, .long, .none }, + .{ .@"test", .mr, &.{ .rm8, .r8 }, &.{ 0x84 }, 0, .none, .none }, + .{ .@"test", .mr, &.{ .rm8, .r8 }, &.{ 0x84 }, 0, .rex, .none }, + .{ .@"test", .mr, &.{ .rm16, .r16 }, &.{ 0x85 }, 0, .none, .none }, + .{ .@"test", .mr, &.{ .rm32, .r32 }, &.{ 0x85 }, 0, .none, .none }, + .{ .@"test", .mr, &.{ .rm64, .r64 }, &.{ 0x85 }, 0, .long, .none }, - .{ .ud2, .np, &.{}, &.{ 0x0f, 0x0b }, 0, .none }, + .{ .tzcnt, .rm, &.{ .r16, .rm16 }, &.{ 0xf3, 0x0f, 0xbc }, 0, .none, .none }, + .{ .tzcnt, .rm, &.{ .r32, .rm32 }, &.{ 0xf3, 0x0f, 0xbc }, 0, .none, .none }, + .{ .tzcnt, .rm, &.{ .r64, .rm64 }, &.{ 0xf3, 0x0f, 0xbc }, 0, .long, .none }, - .{ .xadd, .mr, &.{ .rm8, .r8 }, &.{ 0x0f, 0xc0 }, 0, .none }, - .{ .xadd, .mr, &.{ .rm8, .r8 }, &.{ 0x0f, 0xc0 }, 0, .rex }, - .{ .xadd, .mr, &.{ .rm16, .r16 }, &.{ 0x0f, 0xc1 }, 0, .none }, - .{ .xadd, .mr, &.{ .rm32, .r32 }, &.{ 0x0f, 0xc1 }, 0, .none }, - .{ .xadd, .mr, &.{ .rm64, .r64 }, &.{ 0x0f, 0xc1 }, 0, .long }, + .{ .ud2, .np, &.{}, &.{ 0x0f, 0x0b }, 0, .none, .none }, - .{ .xchg, .o, &.{ .ax, .r16 }, &.{ 0x90 }, 0, .none }, - .{ .xchg, .o, &.{ .r16, .ax }, &.{ 0x90 }, 0, .none }, - .{ .xchg, .o, &.{ .eax, .r32 }, &.{ 0x90 }, 0, .none }, - .{ .xchg, .o, &.{ .rax, .r64 }, &.{ 0x90 }, 0, .long }, - .{ .xchg, .o, &.{ .r32, .eax }, &.{ 0x90 }, 0, .none }, - .{ .xchg, .o, &.{ .r64, .rax }, &.{ 0x90 }, 0, .long }, - .{ .xchg, .mr, &.{ .rm8, .r8 }, &.{ 0x86 }, 0, .none }, - .{ .xchg, .mr, &.{ .rm8, .r8 }, &.{ 0x86 }, 0, .rex }, - .{ .xchg, .rm, &.{ .r8, .rm8 }, &.{ 0x86 }, 0, .none }, - .{ .xchg, .rm, &.{ .r8, .rm8 }, &.{ 0x86 }, 0, .rex }, - .{ .xchg, .mr, &.{ .rm16, .r16 }, &.{ 0x87 }, 0, .none }, - .{ .xchg, .rm, &.{ .r16, .rm16 }, &.{ 0x87 }, 0, .none }, - .{ .xchg, .mr, &.{ .rm32, .r32 }, &.{ 0x87 }, 0, .none }, - .{ .xchg, .mr, &.{ .rm64, .r64 }, &.{ 0x87 }, 0, .long }, - .{ .xchg, .rm, &.{ .r32, .rm32 }, &.{ 0x87 }, 0, .none }, - .{ .xchg, .rm, &.{ .r64, .rm64 }, &.{ 0x87 }, 0, .long }, + .{ .xadd, .mr, &.{ .rm8, .r8 }, &.{ 0x0f, 0xc0 }, 0, .none, .none }, + .{ .xadd, .mr, &.{ .rm8, .r8 }, &.{ 0x0f, 0xc0 }, 0, .rex, .none }, + .{ .xadd, .mr, &.{ .rm16, .r16 }, &.{ 0x0f, 0xc1 }, 0, .none, .none }, + .{ .xadd, .mr, &.{ .rm32, .r32 }, &.{ 0x0f, 0xc1 }, 0, .none, .none }, + .{ .xadd, .mr, &.{ .rm64, .r64 }, &.{ 0x0f, 0xc1 }, 0, .long, .none }, - .{ .xor, .zi, &.{ .al, .imm8 }, &.{ 0x34 }, 0, .none }, - .{ .xor, .zi, &.{ .ax, .imm16 }, &.{ 0x35 }, 0, .none }, - .{ .xor, .zi, &.{ .eax, .imm32 }, &.{ 0x35 }, 0, .none }, - .{ .xor, .zi, &.{ .rax, .imm32s }, &.{ 0x35 }, 0, .long }, - .{ .xor, .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 6, .none }, - .{ .xor, .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 6, .rex }, - .{ .xor, .mi, &.{ .rm16, .imm16 }, &.{ 0x81 }, 6, .none }, - .{ .xor, .mi, &.{ .rm32, .imm32 }, &.{ 0x81 }, 6, .none }, - .{ .xor, .mi, &.{ .rm64, .imm32s }, &.{ 0x81 }, 6, .long }, - .{ .xor, .mi, &.{ .rm16, .imm8s }, &.{ 0x83 }, 6, .none }, - .{ .xor, .mi, &.{ .rm32, .imm8s }, &.{ 0x83 }, 6, .none }, - .{ .xor, .mi, &.{ .rm64, .imm8s }, &.{ 0x83 }, 6, .long }, - .{ .xor, .mr, &.{ .rm8, .r8 }, &.{ 0x30 }, 0, .none }, - .{ .xor, .mr, &.{ .rm8, .r8 }, &.{ 0x30 }, 0, .rex }, - .{ .xor, .mr, &.{ .rm16, .r16 }, &.{ 0x31 }, 0, .none }, - .{ .xor, .mr, &.{ .rm32, .r32 }, &.{ 0x31 }, 0, .none }, - .{ .xor, .mr, &.{ .rm64, .r64 }, &.{ 0x31 }, 0, .long }, - .{ .xor, .rm, &.{ .r8, .rm8 }, &.{ 0x32 }, 0, .none }, - .{ .xor, .rm, &.{ .r8, .rm8 }, &.{ 0x32 }, 0, .rex }, - .{ .xor, .rm, &.{ .r16, .rm16 }, &.{ 0x33 }, 0, .none }, - .{ .xor, .rm, &.{ .r32, .rm32 }, &.{ 0x33 }, 0, .none }, - .{ .xor, .rm, &.{ .r64, .rm64 }, &.{ 0x33 }, 0, .long }, + .{ .xchg, .o, &.{ .ax, .r16 }, &.{ 0x90 }, 0, .none, .none }, + .{ .xchg, .o, &.{ .r16, .ax }, &.{ 0x90 }, 0, .none, .none }, + .{ .xchg, .o, &.{ .eax, .r32 }, &.{ 0x90 }, 0, .none, .none }, + .{ .xchg, .o, &.{ .rax, .r64 }, &.{ 0x90 }, 0, .long, .none }, + .{ .xchg, .o, &.{ .r32, .eax }, &.{ 0x90 }, 0, .none, .none }, + .{ .xchg, .o, &.{ .r64, .rax }, &.{ 0x90 }, 0, .long, .none }, + .{ .xchg, .mr, &.{ .rm8, .r8 }, &.{ 0x86 }, 0, .none, .none }, + .{ .xchg, .mr, &.{ .rm8, .r8 }, &.{ 0x86 }, 0, .rex, .none }, + .{ .xchg, .rm, &.{ .r8, .rm8 }, &.{ 0x86 }, 0, .none, .none }, + .{ .xchg, .rm, &.{ .r8, .rm8 }, &.{ 0x86 }, 0, .rex, .none }, + .{ .xchg, .mr, &.{ .rm16, .r16 }, &.{ 0x87 }, 0, .none, .none }, + .{ .xchg, .rm, &.{ .r16, .rm16 }, &.{ 0x87 }, 0, .none, .none }, + .{ .xchg, .mr, &.{ .rm32, .r32 }, &.{ 0x87 }, 0, .none, .none }, + .{ .xchg, .mr, &.{ .rm64, .r64 }, &.{ 0x87 }, 0, .long, .none }, + .{ .xchg, .rm, &.{ .r32, .rm32 }, &.{ 0x87 }, 0, .none, .none }, + .{ .xchg, .rm, &.{ .r64, .rm64 }, &.{ 0x87 }, 0, .long, .none }, + + .{ .xor, .zi, &.{ .al, .imm8 }, &.{ 0x34 }, 0, .none, .none }, + .{ .xor, .zi, &.{ .ax, .imm16 }, &.{ 0x35 }, 0, .none, .none }, + .{ .xor, .zi, &.{ .eax, .imm32 }, &.{ 0x35 }, 0, .none, .none }, + .{ .xor, .zi, &.{ .rax, .imm32s }, &.{ 0x35 }, 0, .long, .none }, + .{ .xor, .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 6, .none, .none }, + .{ .xor, .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 6, .rex, .none }, + .{ .xor, .mi, &.{ .rm16, .imm16 }, &.{ 0x81 }, 6, .none, .none }, + .{ .xor, .mi, &.{ .rm32, .imm32 }, &.{ 0x81 }, 6, .none, .none }, + .{ .xor, .mi, &.{ .rm64, .imm32s }, &.{ 0x81 }, 6, .long, .none }, + .{ .xor, .mi, &.{ .rm16, .imm8s }, &.{ 0x83 }, 6, .none, .none }, + .{ .xor, .mi, &.{ .rm32, .imm8s }, &.{ 0x83 }, 6, .none, .none }, + .{ .xor, .mi, &.{ .rm64, .imm8s }, &.{ 0x83 }, 6, .long, .none }, + .{ .xor, .mr, &.{ .rm8, .r8 }, &.{ 0x30 }, 0, .none, .none }, + .{ .xor, .mr, &.{ .rm8, .r8 }, &.{ 0x30 }, 0, .rex, .none }, + .{ .xor, .mr, &.{ .rm16, .r16 }, &.{ 0x31 }, 0, .none, .none }, + .{ .xor, .mr, &.{ .rm32, .r32 }, &.{ 0x31 }, 0, .none, .none }, + .{ .xor, .mr, &.{ .rm64, .r64 }, &.{ 0x31 }, 0, .long, .none }, + .{ .xor, .rm, &.{ .r8, .rm8 }, &.{ 0x32 }, 0, .none, .none }, + .{ .xor, .rm, &.{ .r8, .rm8 }, &.{ 0x32 }, 0, .rex, .none }, + .{ .xor, .rm, &.{ .r16, .rm16 }, &.{ 0x33 }, 0, .none, .none }, + .{ .xor, .rm, &.{ .r32, .rm32 }, &.{ 0x33 }, 0, .none, .none }, + .{ .xor, .rm, &.{ .r64, .rm64 }, &.{ 0x33 }, 0, .long, .none }, // SSE - .{ .addss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x58 }, 0, .sse }, + .{ .addss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x58 }, 0, .none, .sse }, - .{ .andnps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x55 }, 0, .sse }, + .{ .andnps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x55 }, 0, .none, .sse }, - .{ .andps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x54 }, 0, .sse }, + .{ .andps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x54 }, 0, .none, .sse }, - .{ .cmpss, .rmi, &.{ .xmm, .xmm_m32, .imm8 }, &.{ 0xf3, 0x0f, 0xc2 }, 0, .sse }, + .{ .cmpss, .rmi, &.{ .xmm, .xmm_m32, .imm8 }, &.{ 0xf3, 0x0f, 0xc2 }, 0, .none, .sse }, - .{ .cvtsi2ss, .rm, &.{ .xmm, .rm32 }, &.{ 0xf3, 0x0f, 0x2a }, 0, .sse }, - .{ .cvtsi2ss, .rm, &.{ .xmm, .rm64 }, &.{ 0xf3, 0x0f, 0x2a }, 0, .sse_long }, + .{ .cvtsi2ss, .rm, &.{ .xmm, .rm32 }, &.{ 0xf3, 0x0f, 0x2a }, 0, .none, .sse }, + .{ .cvtsi2ss, .rm, &.{ .xmm, .rm64 }, &.{ 0xf3, 0x0f, 0x2a }, 0, .long, .sse }, - .{ .divss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5e }, 0, .sse }, + .{ .divss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5e }, 0, .none, .sse }, - .{ .maxss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5f }, 0, .sse }, + .{ .maxss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5f }, 0, .none, .sse }, - .{ .minss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5d }, 0, .sse }, + .{ .minss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5d }, 0, .none, .sse }, - .{ .movaps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x28 }, 0, .sse }, - .{ .movaps, .mr, &.{ .xmm_m128, .xmm }, &.{ 0x0f, 0x29 }, 0, .sse }, + .{ .movaps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x28 }, 0, .none, .sse }, + .{ .movaps, .mr, &.{ .xmm_m128, .xmm }, &.{ 0x0f, 0x29 }, 0, .none, .sse }, - .{ .movss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x10 }, 0, .sse }, - .{ .movss, .mr, &.{ .xmm_m32, .xmm }, &.{ 0xf3, 0x0f, 0x11 }, 0, .sse }, + .{ .movss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x10 }, 0, .none, .sse }, + .{ .movss, .mr, &.{ .xmm_m32, .xmm }, &.{ 0xf3, 0x0f, 0x11 }, 0, .none, .sse }, - .{ .movups, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x10 }, 0, .sse }, - .{ .movups, .mr, &.{ .xmm_m128, .xmm }, &.{ 0x0f, 0x11 }, 0, .sse }, + .{ .movups, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x10 }, 0, .none, .sse }, + .{ .movups, .mr, &.{ .xmm_m128, .xmm }, &.{ 0x0f, 0x11 }, 0, .none, .sse }, - .{ .mulss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x59 }, 0, .sse }, + .{ .mulss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x59 }, 0, .none, .sse }, - .{ .orps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x56 }, 0, .sse }, + .{ .orps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x56 }, 0, .none, .sse }, - .{ .subss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5c }, 0, .sse }, + .{ .subss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5c }, 0, .none, .sse }, - .{ .sqrtps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x51 }, 0, .sse }, - .{ .sqrtss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x51 }, 0, .sse }, + .{ .sqrtps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x51 }, 0, .none, .sse }, + .{ .sqrtss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x51 }, 0, .none, .sse }, - .{ .ucomiss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0x0f, 0x2e }, 0, .sse }, + .{ .ucomiss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0x0f, 0x2e }, 0, .none, .sse }, - .{ .xorps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x57 }, 0, .sse }, + .{ .xorps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x57 }, 0, .none, .sse }, // SSE2 - .{ .addsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x58 }, 0, .sse2 }, + .{ .addsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x58 }, 0, .none, .sse2 }, - .{ .andnpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x55 }, 0, .sse2 }, + .{ .andnpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x55 }, 0, .none, .sse2 }, - .{ .andpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x54 }, 0, .sse2 }, + .{ .andpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x54 }, 0, .none, .sse2 }, - .{ .cmpsd, .rmi, &.{ .xmm, .xmm_m64, .imm8 }, &.{ 0xf2, 0x0f, 0xc2 }, 0, .sse2 }, + .{ .cmpsd, .rmi, &.{ .xmm, .xmm_m64, .imm8 }, &.{ 0xf2, 0x0f, 0xc2 }, 0, .none, .sse2 }, - .{ .cvtsd2ss, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5a }, 0, .sse2 }, + .{ .cvtsd2ss, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5a }, 0, .none, .sse2 }, - .{ .cvtsi2sd, .rm, &.{ .xmm, .rm32 }, &.{ 0xf2, 0x0f, 0x2a }, 0, .sse2 }, - .{ .cvtsi2sd, .rm, &.{ .xmm, .rm64 }, &.{ 0xf2, 0x0f, 0x2a }, 0, .sse2_long }, + .{ .cvtsi2sd, .rm, &.{ .xmm, .rm32 }, &.{ 0xf2, 0x0f, 0x2a }, 0, .none, .sse2 }, + .{ .cvtsi2sd, .rm, &.{ .xmm, .rm64 }, &.{ 0xf2, 0x0f, 0x2a }, 0, .long, .sse2 }, - .{ .cvtss2sd, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5a }, 0, .sse2 }, + .{ .cvtss2sd, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5a }, 0, .none, .sse2 }, - .{ .divsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5e }, 0, .sse2 }, + .{ .divsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5e }, 0, .none, .sse2 }, - .{ .maxsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5f }, 0, .sse2 }, + .{ .maxsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5f }, 0, .none, .sse2 }, - .{ .minsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5d }, 0, .sse2 }, + .{ .minsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5d }, 0, .none, .sse2 }, - .{ .movapd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x28 }, 0, .sse2 }, - .{ .movapd, .mr, &.{ .xmm_m128, .xmm }, &.{ 0x66, 0x0f, 0x29 }, 0, .sse2 }, + .{ .movapd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x28 }, 0, .none, .sse2 }, + .{ .movapd, .mr, &.{ .xmm_m128, .xmm }, &.{ 0x66, 0x0f, 0x29 }, 0, .none, .sse2 }, - .{ .movd, .rm, &.{ .xmm, .rm32 }, &.{ 0x66, 0x0f, 0x6e }, 0, .sse2 }, - .{ .movd, .mr, &.{ .rm32, .xmm }, &.{ 0x66, 0x0f, 0x7e }, 0, .sse2 }, + .{ .movd, .rm, &.{ .xmm, .rm32 }, &.{ 0x66, 0x0f, 0x6e }, 0, .none, .sse2 }, + .{ .movd, .mr, &.{ .rm32, .xmm }, &.{ 0x66, 0x0f, 0x7e }, 0, .none, .sse2 }, - .{ .movq, .rm, &.{ .xmm, .rm64 }, &.{ 0x66, 0x0f, 0x6e }, 0, .sse2_long }, - .{ .movq, .mr, &.{ .rm64, .xmm }, &.{ 0x66, 0x0f, 0x7e }, 0, .sse2_long }, + .{ .movq, .rm, &.{ .xmm, .rm64 }, &.{ 0x66, 0x0f, 0x6e }, 0, .long, .sse2 }, + .{ .movq, .mr, &.{ .rm64, .xmm }, &.{ 0x66, 0x0f, 0x7e }, 0, .long, .sse2 }, - .{ .movq, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf3, 0x0f, 0x7e }, 0, .sse2 }, - .{ .movq, .mr, &.{ .xmm_m64, .xmm }, &.{ 0x66, 0x0f, 0xd6 }, 0, .sse2 }, + .{ .movq, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf3, 0x0f, 0x7e }, 0, .none, .sse2 }, + .{ .movq, .mr, &.{ .xmm_m64, .xmm }, &.{ 0x66, 0x0f, 0xd6 }, 0, .none, .sse2 }, - .{ .movupd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x10 }, 0, .sse2 }, - .{ .movupd, .mr, &.{ .xmm_m128, .xmm }, &.{ 0x66, 0x0f, 0x11 }, 0, .sse2 }, + .{ .movupd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x10 }, 0, .none, .sse2 }, + .{ .movupd, .mr, &.{ .xmm_m128, .xmm }, &.{ 0x66, 0x0f, 0x11 }, 0, .none, .sse2 }, - .{ .mulsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x59 }, 0, .sse2 }, + .{ .mulsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x59 }, 0, .none, .sse2 }, - .{ .orpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x56 }, 0, .sse2 }, + .{ .orpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x56 }, 0, .none, .sse2 }, - .{ .pextrw, .mri, &.{ .r16, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0xc5 }, 0, .sse2 }, + .{ .pextrw, .mri, &.{ .r16, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0xc5 }, 0, .none, .sse2 }, - .{ .pinsrw, .rmi, &.{ .xmm, .rm16, .imm8 }, &.{ 0x66, 0x0f, 0xc4 }, 0, .sse2 }, + .{ .pinsrw, .rmi, &.{ .xmm, .rm16, .imm8 }, &.{ 0x66, 0x0f, 0xc4 }, 0, .none, .sse2 }, - .{ .sqrtpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x51 }, 0, .sse2 }, - .{ .sqrtsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x51 }, 0, .sse2 }, + .{ .sqrtpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x51 }, 0, .none, .sse2 }, + .{ .sqrtsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x51 }, 0, .none, .sse2 }, - .{ .subsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5c }, 0, .sse2 }, + .{ .subsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5c }, 0, .none, .sse2 }, - .{ .movsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x10 }, 0, .sse2 }, - .{ .movsd, .mr, &.{ .xmm_m64, .xmm }, &.{ 0xf2, 0x0f, 0x11 }, 0, .sse2 }, + .{ .movsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x10 }, 0, .none, .sse2 }, + .{ .movsd, .mr, &.{ .xmm_m64, .xmm }, &.{ 0xf2, 0x0f, 0x11 }, 0, .none, .sse2 }, - .{ .ucomisd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x2e }, 0, .sse2 }, + .{ .ucomisd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x2e }, 0, .none, .sse2 }, - .{ .xorpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x57 }, 0, .sse2 }, + .{ .xorpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x57 }, 0, .none, .sse2 }, // SSE4.1 - .{ .pextrw, .mri, &.{ .rm16, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x15 }, 0, .sse4_1 }, + .{ .pextrw, .mri, &.{ .rm16, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x15 }, 0, .none, .sse4_1 }, - .{ .roundss, .rmi, &.{ .xmm, .xmm_m32, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0a }, 0, .sse4_1 }, - .{ .roundsd, .rmi, &.{ .xmm, .xmm_m64, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0b }, 0, .sse4_1 }, + .{ .roundss, .rmi, &.{ .xmm, .xmm_m32, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0a }, 0, .none, .sse4_1 }, + .{ .roundsd, .rmi, &.{ .xmm, .xmm_m64, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0b }, 0, .none, .sse4_1 }, + + // F16C + .{ .vcvtph2ps, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0x13 }, 0, .vex_128, .f16c }, + + .{ .vcvtps2ph, .mri, &.{ .xmm_m64, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x1d }, 0, .vex_128, .f16c }, }; // zig fmt: on diff --git a/test/behavior/vector.zig b/test/behavior/vector.zig index b0e717d131..41b0bfc39b 100644 --- a/test/behavior/vector.zig +++ b/test/behavior/vector.zig @@ -168,7 +168,8 @@ test "array to vector" { test "array to vector with element type coercion" { if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO + if (builtin.zig_backend == .stage2_x86_64 and + !comptime std.Target.x86.featureSetHas(builtin.cpu.features, .f16c)) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO From ae588a09f2c2146ada0f914c7d279f69a0d79396 Mon Sep 17 00:00:00 2001 From: Jacob Young Date: Fri, 5 May 2023 22:16:13 -0400 Subject: [PATCH 04/20] x86_64: implement f16 cmp --- src/arch/x86_64/CodeGen.zig | 53 +- src/arch/x86_64/Encoding.zig | 163 +++-- src/arch/x86_64/Lower.zig | 42 +- src/arch/x86_64/Mir.zig | 80 +- src/arch/x86_64/encoder.zig | 49 +- src/arch/x86_64/encodings.zig | 1297 +++++++++++++++++---------------- 6 files changed, 957 insertions(+), 727 deletions(-) diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index b7fd81db68..d24428467a 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -6737,26 +6737,43 @@ fn airCmp(self: *Self, inst: Air.Inst.Index, op: math.CompareOperator) !void { defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); const src_mcv = if (flipped) lhs_mcv else rhs_mcv; - try self.genBinOpMir(switch (ty.zigTypeTag()) { - else => .cmp, + switch (ty.zigTypeTag()) { + else => try self.genBinOpMir(.cmp, ty, dst_mcv, src_mcv), .Float => switch (ty.floatBits(self.target.*)) { - 32 => if (Target.x86.featureSetHas(self.target.cpu.features, .sse)) - .ucomiss - else - return self.fail("TODO implement airCmp for {} without sse", .{ - ty.fmt(self.bin_file.options.module.?), - }), - 64 => if (Target.x86.featureSetHas(self.target.cpu.features, .sse2)) - .ucomisd - else - return self.fail("TODO implement airCmp for {} without sse2", .{ - ty.fmt(self.bin_file.options.module.?), - }), + 16 => if (self.hasFeature(.f16c)) { + const dst_reg = dst_mcv.getReg().?.to128(); + + const tmp_reg = (try self.register_manager.allocReg(null, sse)).to128(); + const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); + defer self.register_manager.unlockReg(tmp_lock); + + if (src_mcv.isRegister()) + try self.asmRegisterRegisterRegister( + .vpunpcklwd, + dst_reg, + dst_reg, + src_mcv.getReg().?.to128(), + ) + else + try self.asmRegisterMemoryImmediate( + .vpinsrw, + dst_reg, + src_mcv.mem(.word), + Immediate.u(1), + ); + try self.asmRegisterRegister(.vcvtph2ps, dst_reg, dst_reg); + try self.asmRegisterRegister(.vmovshdup, tmp_reg, dst_reg); + try self.genBinOpMir(.ucomiss, ty, dst_mcv, .{ .register = tmp_reg }); + } else return self.fail("TODO implement airCmp for {}", .{ + ty.fmt(self.bin_file.options.module.?), + }), + 32 => try self.genBinOpMir(.ucomiss, ty, dst_mcv, src_mcv), + 64 => try self.genBinOpMir(.ucomisd, ty, dst_mcv, src_mcv), else => return self.fail("TODO implement airCmp for {}", .{ ty.fmt(self.bin_file.options.module.?), }), }, - }, ty, dst_mcv, src_mcv); + } const signedness = if (ty.isAbiInt()) ty.intInfo(self.target.*).signedness else .unsigned; const result = MCValue{ @@ -7834,8 +7851,8 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr else switch (abi_size) { 2 => return try self.asmRegisterRegisterImmediate( if (dst_reg.class() == .floating_point) .pinsrw else .pextrw, - registerAlias(dst_reg, abi_size), - registerAlias(src_reg, abi_size), + registerAlias(dst_reg, 4), + registerAlias(src_reg, 4), Immediate.u(0), ), 4 => .movd, @@ -8045,7 +8062,7 @@ fn genSetMem(self: *Self, base: Memory.Base, disp: i32, ty: Type, src_mcv: MCVal try self.asmMemoryRegisterImmediate( .pextrw, dst_mem, - registerAlias(src_reg, abi_size), + src_reg.to128(), Immediate.u(0), ) else diff --git a/src/arch/x86_64/Encoding.zig b/src/arch/x86_64/Encoding.zig index 05c48ecddf..ada1e891fb 100644 --- a/src/arch/x86_64/Encoding.zig +++ b/src/arch/x86_64/Encoding.zig @@ -58,9 +58,9 @@ pub fn findByMnemonic( var shortest_len: ?usize = null; next: for (mnemonic_to_encodings_map[@enumToInt(mnemonic)]) |data| { switch (data.mode) { - .rex => if (!rex_required) continue, - .long => {}, - else => if (rex_required) continue, + .none, .short => if (rex_required) continue, + .rex, .rex_short => if (!rex_required) continue, + else => {}, } for (input_ops, data.ops) |input_op, data_op| if (!input_op.isSubset(data_op)) continue :next; @@ -90,24 +90,26 @@ pub fn findByOpcode(opc: []const u8, prefixes: struct { if (!std.mem.eql(u8, opc, enc.opcode())) continue; if (prefixes.rex.w) { switch (data.mode) { - .short, .fpu, .sse, .sse2, .sse4_1, .none => continue, - .long, .sse_long, .sse2_long, .rex => {}, + .none, .short, .rex, .rex_short, .vex_128, .vex_256 => continue, + .long, .vex_128_long, .vex_256_long => {}, } } else if (prefixes.rex.present and !prefixes.rex.isSet()) { switch (data.mode) { - .rex => {}, + .rex, .rex_short => {}, else => continue, } } else if (prefixes.legacy.prefix_66) { - switch (enc.operandBitSize()) { - 16 => {}, - else => continue, + switch (data.mode) { + .short, .rex_short => {}, + .none, .rex, .vex_128, .vex_256 => continue, + .long, .vex_128_long, .vex_256_long => continue, } } else { switch (data.mode) { - .none => switch (enc.operandBitSize()) { - 16 => continue, - else => {}, + .none => switch (data.mode) { + .short, .rex_short => continue, + .none, .rex, .vex_128, .vex_256 => {}, + .long, .vex_128_long, .vex_256_long => {}, }, else => continue, } @@ -131,28 +133,11 @@ pub fn mandatoryPrefix(encoding: *const Encoding) ?u8 { pub fn modRmExt(encoding: Encoding) u3 { return switch (encoding.data.op_en) { - .m, .mi, .m1, .mc => encoding.data.modrm_ext, + .m, .mi, .m1, .mc, .vmi => encoding.data.modrm_ext, else => unreachable, }; } -pub fn operandBitSize(encoding: Encoding) u64 { - return switch (encoding.data.mode) { - .short => 16, - .long => 64, - else => switch (encoding.data.op_en) { - .np => switch (encoding.data.ops[0]) { - .o16 => 16, - .o32 => 32, - .o64 => 64, - else => 32, - }, - .td => encoding.data.ops[1].bitSize(), - else => encoding.data.ops[0].bitSize(), - }, - }; -} - pub fn format( encoding: Encoding, comptime fmt: []const u8, @@ -220,17 +205,17 @@ pub fn format( }; try writer.print("+{s} ", .{tag}); }, - .m, .mi, .m1, .mc => try writer.print("/{d} ", .{encoding.modRmExt()}), - .mr, .rm, .rmi, .mri, .mrc, .rrm, .rrmi => try writer.writeAll("/r "), + .m, .mi, .m1, .mc, .vmi => try writer.print("/{d} ", .{encoding.modRmExt()}), + .mr, .rm, .rmi, .mri, .mrc, .rvm, .rvmi => try writer.writeAll("/r "), } switch (encoding.data.op_en) { - .i, .d, .zi, .oi, .mi, .rmi, .mri, .rrmi => { + .i, .d, .zi, .oi, .mi, .rmi, .mri, .vmi, .rvmi => { const op = switch (encoding.data.op_en) { .i, .d => encoding.data.ops[0], .zi, .oi, .mi => encoding.data.ops[1], - .rmi, .mri => encoding.data.ops[2], - .rrmi => encoding.data.ops[3], + .rmi, .mri, .vmi => encoding.data.ops[2], + .rvmi => encoding.data.ops[3], else => unreachable, }; const tag = switch (op) { @@ -245,7 +230,7 @@ pub fn format( }; try writer.print("{s} ", .{tag}); }, - .np, .fd, .td, .o, .m, .m1, .mc, .mr, .rm, .mrc, .rrm => {}, + .np, .fd, .td, .o, .m, .m1, .mc, .mr, .rm, .mrc, .rvm => {}, } try writer.print("{s} ", .{@tagName(encoding.mnemonic)}); @@ -315,8 +300,7 @@ pub const Mnemonic = enum { movaps, movss, movups, mulss, orps, - pextrw, - pinsrw, + pextrw, pinsrw, sqrtps, sqrtss, subss, @@ -335,14 +319,25 @@ pub const Mnemonic = enum { movupd, mulsd, orpd, - sqrtpd, - sqrtsd, + pshufhw, pshuflw, + psrld, psrlq, psrlw, + punpckhbw, punpckhdq, punpckhqdq, punpckhwd, + punpcklbw, punpckldq, punpcklqdq, punpcklwd, + sqrtpd, sqrtsd, subsd, ucomisd, xorpd, + // SSE3 + movddup, movshdup, movsldup, // SSE4.1 - roundss, - roundsd, + roundsd, roundss, + // AVX + vmovddup, vmovshdup, vmovsldup, + vpextrw, vpinsrw, + vpshufhw, vpshuflw, + vpsrld, vpsrlq, vpsrlw, + vpunpckhbw, vpunpckhdq, vpunpckhqdq, vpunpckhwd, + vpunpcklbw, vpunpckldq, vpunpcklqdq, vpunpcklwd, // F16C vcvtph2ps, vcvtps2ph, // zig fmt: on @@ -357,7 +352,7 @@ pub const OpEn = enum { fd, td, m1, mc, mi, mr, rm, rmi, mri, mrc, - rrm, rrmi, + vmi, rvm, rvmi, // zig fmt: on }; @@ -372,6 +367,7 @@ pub const Op = enum { cl, r8, r16, r32, r64, rm8, rm16, rm32, rm64, + r32_m16, r64_m16, m8, m16, m32, m64, m80, m128, rel8, rel16, rel32, m, @@ -450,16 +446,49 @@ pub const Op = enum { } } - pub fn bitSize(op: Op) u64 { + pub fn immBitSize(op: Op) u64 { return switch (op) { .none, .o16, .o32, .o64, .moffs, .m, .sreg => unreachable, + .al, .cl, .r8, .rm8 => unreachable, + .ax, .r16, .rm16 => unreachable, + .eax, .r32, .rm32, .r32_m16 => unreachable, + .rax, .r64, .rm64, .r64_m16 => unreachable, + .xmm, .xmm_m32, .xmm_m64, .xmm_m128 => unreachable, + .m8, .m16, .m32, .m64, .m80, .m128 => unreachable, .unity => 1, - .imm8, .imm8s, .al, .cl, .r8, .m8, .rm8, .rel8 => 8, - .imm16, .imm16s, .ax, .r16, .m16, .rm16, .rel16 => 16, - .imm32, .imm32s, .eax, .r32, .m32, .rm32, .rel32, .xmm_m32 => 32, - .imm64, .rax, .r64, .m64, .rm64, .xmm_m64 => 64, + .imm8, .imm8s, .rel8 => 8, + .imm16, .imm16s, .rel16 => 16, + .imm32, .imm32s, .rel32 => 32, + .imm64 => 64, + }; + } + + pub fn regBitSize(op: Op) u64 { + return switch (op) { + .none, .o16, .o32, .o64, .moffs, .m, .sreg => unreachable, + .unity, .imm8, .imm8s, .imm16, .imm16s, .imm32, .imm32s, .imm64 => unreachable, + .rel8, .rel16, .rel32 => unreachable, + .m8, .m16, .m32, .m64, .m80, .m128 => unreachable, + .al, .cl, .r8, .rm8 => 8, + .ax, .r16, .rm16 => 16, + .eax, .r32, .rm32, .r32_m16 => 32, + .rax, .r64, .rm64, .r64_m16 => 64, + .xmm, .xmm_m32, .xmm_m64, .xmm_m128 => 128, + }; + } + + pub fn memBitSize(op: Op) u64 { + return switch (op) { + .none, .o16, .o32, .o64, .moffs, .m, .sreg => unreachable, + .unity, .imm8, .imm8s, .imm16, .imm16s, .imm32, .imm32s, .imm64 => unreachable, + .rel8, .rel16, .rel32 => unreachable, + .al, .cl, .r8, .ax, .r16, .eax, .r32, .rax, .r64, .xmm => unreachable, + .m8, .rm8 => 8, + .m16, .rm16, .r32_m16, .r64_m16 => 16, + .m32, .rm32, .xmm_m32 => 32, + .m64, .rm64, .xmm_m64 => 64, .m80 => 80, - .m128, .xmm, .xmm_m128 => 128, + .m128, .xmm_m128 => 128, }; } @@ -482,6 +511,7 @@ pub const Op = enum { .al, .ax, .eax, .rax, .r8, .r16, .r32, .r64, .rm8, .rm16, .rm32, .rm64, + .r32_m16, .r64_m16, .xmm, .xmm_m32, .xmm_m64, .xmm_m128, => true, else => false, @@ -506,6 +536,7 @@ pub const Op = enum { // zig fmt: off return switch (op) { .rm8, .rm16, .rm32, .rm64, + .r32_m16, .r64_m16, .m8, .m16, .m32, .m64, .m80, .m128, .m, .xmm_m32, .xmm_m64, .xmm_m128, @@ -528,18 +559,12 @@ pub const Op = enum { .al, .ax, .eax, .rax, .cl => .general_purpose, .r8, .r16, .r32, .r64 => .general_purpose, .rm8, .rm16, .rm32, .rm64 => .general_purpose, + .r32_m16, .r64_m16 => .general_purpose, .sreg => .segment, .xmm, .xmm_m32, .xmm_m64, .xmm_m128 => .floating_point, }; } - pub fn isFloatingPointRegister(op: Op) bool { - return switch (op) { - .xmm, .xmm_m32, .xmm_m64, .xmm_m128 => true, - else => false, - }; - } - /// Given an operand `op` checks if `target` is a subset for the purposes of the encoding. pub fn isSubset(op: Op, target: Op) bool { switch (op) { @@ -553,30 +578,27 @@ pub const Op = enum { if (op.isRegister() and target.isRegister()) { return switch (target) { .cl, .al, .ax, .eax, .rax => op == target, - else => op.class() == target.class() and switch (target.class()) { - .floating_point => true, - else => op.bitSize() == target.bitSize(), - }, + else => op.class() == target.class() and op.regBitSize() == target.regBitSize(), }; } if (op.isMemory() and target.isMemory()) { switch (target) { .m => return true, - else => return op.bitSize() == target.bitSize(), + else => return op.memBitSize() == target.memBitSize(), } } if (op.isImmediate() and target.isImmediate()) { switch (target) { - .imm64 => if (op.bitSize() <= 64) return true, - .imm32s, .rel32 => if (op.bitSize() < 32 or (op.bitSize() == 32 and op.isSigned())) + .imm64 => if (op.immBitSize() <= 64) return true, + .imm32s, .rel32 => if (op.immBitSize() < 32 or (op.immBitSize() == 32 and op.isSigned())) return true, - .imm32 => if (op.bitSize() <= 32) return true, - .imm16s, .rel16 => if (op.bitSize() < 16 or (op.bitSize() == 16 and op.isSigned())) + .imm32 => if (op.immBitSize() <= 32) return true, + .imm16s, .rel16 => if (op.immBitSize() < 16 or (op.immBitSize() == 16 and op.isSigned())) return true, - .imm16 => if (op.bitSize() <= 16) return true, - .imm8s, .rel8 => if (op.bitSize() < 8 or (op.bitSize() == 8 and op.isSigned())) + .imm16 => if (op.immBitSize() <= 16) return true, + .imm8s, .rel8 => if (op.immBitSize() < 8 or (op.immBitSize() == 8 and op.isSigned())) return true, - .imm8 => if (op.bitSize() <= 8) return true, + .imm8 => if (op.immBitSize() <= 8) return true, else => {}, } return op == target; @@ -590,8 +612,9 @@ pub const Op = enum { pub const Mode = enum { none, short, - rex, long, + rex, + rex_short, vex_128, vex_128_long, vex_256, @@ -600,9 +623,11 @@ pub const Mode = enum { pub const Feature = enum { none, + avx, f16c, sse, sse2, + sse3, sse4_1, x87, }; diff --git a/src/arch/x86_64/Lower.zig b/src/arch/x86_64/Lower.zig index 9571f50e7c..d9482d4b39 100644 --- a/src/arch/x86_64/Lower.zig +++ b/src/arch/x86_64/Lower.zig @@ -108,12 +108,12 @@ pub fn lowerMir(lower: *Lower, inst: Mir.Inst) Error![]const Instruction { .orps, .pextrw, .pinsrw, - .roundss, .sqrtps, .sqrtss, .subss, .ucomiss, .xorps, + .addsd, .andnpd, .andpd, @@ -127,13 +127,51 @@ pub fn lowerMir(lower: *Lower, inst: Mir.Inst) Error![]const Instruction { .movsd, .mulsd, .orpd, - .roundsd, + .pshufhw, + .pshuflw, + .psrld, + .psrlq, + .psrlw, + .punpckhbw, + .punpckhdq, + .punpckhqdq, + .punpckhwd, + .punpcklbw, + .punpckldq, + .punpcklqdq, + .punpcklwd, .sqrtpd, .sqrtsd, .subsd, .ucomisd, .xorpd, + .movddup, + .movshdup, + .movsldup, + + .roundsd, + .roundss, + + .vmovddup, + .vmovshdup, + .vmovsldup, + .vpextrw, + .vpinsrw, + .vpshufhw, + .vpshuflw, + .vpsrld, + .vpsrlq, + .vpsrlw, + .vpunpckhbw, + .vpunpckhdq, + .vpunpckhqdq, + .vpunpckhwd, + .vpunpcklbw, + .vpunpckldq, + .vpunpcklqdq, + .vpunpcklwd, + .vcvtph2ps, .vcvtps2ph, => try lower.mirGeneric(inst), diff --git a/src/arch/x86_64/Mir.zig b/src/arch/x86_64/Mir.zig index c4e19fdc0e..9e39d23bd4 100644 --- a/src/arch/x86_64/Mir.zig +++ b/src/arch/x86_64/Mir.zig @@ -196,8 +196,6 @@ pub const Inst = struct { pextrw, /// Insert word pinsrw, - /// Round scalar single-precision floating-point values - roundss, /// Square root of scalar single precision floating-point value sqrtps, /// Subtract scalar single-precision floating-point values @@ -208,6 +206,7 @@ pub const Inst = struct { ucomiss, /// Bitwise logical xor of packed single precision floating-point values xorps, + /// Add double precision floating point values addsd, /// Bitwise logical and not of packed double precision floating-point values @@ -234,8 +233,32 @@ pub const Inst = struct { mulsd, /// Bitwise logical or of packed double precision floating-point values orpd, - /// Round scalar double-precision floating-point values - roundsd, + /// Shuffle packed high words + pshufhw, + /// Shuffle packed low words + pshuflw, + /// Shift packed data right logical + psrld, + /// Shift packed data right logical + psrlq, + /// Shift packed data right logical + psrlw, + /// Unpack high data + punpckhbw, + /// Unpack high data + punpckhdq, + /// Unpack high data + punpckhqdq, + /// Unpack high data + punpckhwd, + /// Unpack low data + punpcklbw, + /// Unpack low data + punpckldq, + /// Unpack low data + punpcklqdq, + /// Unpack low data + punpcklwd, /// Square root of double precision floating-point values sqrtpd, /// Square root of scalar double precision floating-point value @@ -247,6 +270,55 @@ pub const Inst = struct { /// Bitwise logical xor of packed double precision floating-point values xorpd, + /// Replicate double floating-point values + movddup, + /// Replicate single floating-point values + movshdup, + /// Replicate single floating-point values + movsldup, + + /// Round scalar double-precision floating-point values + roundsd, + /// Round scalar single-precision floating-point values + roundss, + + /// Replicate double floating-point values + vmovddup, + /// Replicate single floating-point values + vmovshdup, + /// Replicate single floating-point values + vmovsldup, + /// Extract word + vpextrw, + /// Insert word + vpinsrw, + /// Shuffle packed high words + vpshufhw, + /// Shuffle packed low words + vpshuflw, + /// Shift packed data right logical + vpsrld, + /// Shift packed data right logical + vpsrlq, + /// Shift packed data right logical + vpsrlw, + /// Unpack high data + vpunpckhbw, + /// Unpack high data + vpunpckhdq, + /// Unpack high data + vpunpckhqdq, + /// Unpack high data + vpunpckhwd, + /// Unpack low data + vpunpcklbw, + /// Unpack low data + vpunpckldq, + /// Unpack low data + vpunpcklqdq, + /// Unpack low data + vpunpcklwd, + /// Convert 16-bit floating-point values to single-precision floating-point values vcvtph2ps, /// Convert single-precision floating-point values to 16-bit floating-point values diff --git a/src/arch/x86_64/encoder.zig b/src/arch/x86_64/encoder.zig index 94f4eb56d5..495edb5f2a 100644 --- a/src/arch/x86_64/encoder.zig +++ b/src/arch/x86_64/encoder.zig @@ -151,15 +151,12 @@ pub const Instruction = struct { moffs.offset, }), }, - .imm => |imm| try writer.print("0x{x}", .{imm.asUnsigned(enc_op.bitSize())}), + .imm => |imm| try writer.print("0x{x}", .{imm.asUnsigned(enc_op.immBitSize())}), } } pub fn fmtPrint(op: Operand, enc_op: Encoding.Op) std.fmt.Formatter(fmt) { - return .{ .data = .{ - .op = op, - .enc_op = enc_op, - } }; + return .{ .data = .{ .op = op, .enc_op = enc_op } }; } }; @@ -210,7 +207,7 @@ pub const Instruction = struct { const data = enc.data; switch (data.mode) { - .none, .short, .rex, .long => { + .none, .short, .long, .rex, .rex_short => { try inst.encodeLegacyPrefixes(encoder); try inst.encodeMandatoryPrefix(encoder); try inst.encodeRexPrefix(encoder); @@ -232,15 +229,16 @@ pub const Instruction = struct { else => { const mem_op = switch (data.op_en) { .m, .mi, .m1, .mc, .mr, .mri, .mrc => inst.ops[0], - .rm, .rmi => inst.ops[1], + .rm, .rmi, .vmi => inst.ops[1], + .rvm, .rvmi => inst.ops[2], else => unreachable, }; switch (mem_op) { .reg => |reg| { const rm = switch (data.op_en) { - .m, .mi, .m1, .mc => enc.modRmExt(), + .m, .mi, .m1, .mc, .vmi => enc.modRmExt(), .mr, .mri, .mrc => inst.ops[1].reg.lowEnc(), - .rm, .rmi => inst.ops[0].reg.lowEnc(), + .rm, .rmi, .rvm, .rvmi => inst.ops[0].reg.lowEnc(), else => unreachable, }; try encoder.modRm_direct(rm, reg.lowEnc()); @@ -259,7 +257,8 @@ pub const Instruction = struct { switch (data.op_en) { .mi => try encodeImm(inst.ops[1].imm, data.ops[1], encoder), - .rmi, .mri => try encodeImm(inst.ops[2].imm, data.ops[2], encoder), + .rmi, .mri, .vmi => try encodeImm(inst.ops[2].imm, data.ops[2], encoder), + .rvmi => try encodeImm(inst.ops[3].imm, data.ops[3], encoder), else => {}, } }, @@ -291,11 +290,9 @@ pub const Instruction = struct { .rep, .repe, .repz => legacy.prefix_f3 = true, } - if (data.mode == .none) { - const bit_size = enc.operandBitSize(); - if (bit_size == 16) { - legacy.set16BitOverride(); - } + switch (data.mode) { + .short, .rex_short => legacy.set16BitOverride(), + else => {}, } const segment_override: ?Register = switch (op_en) { @@ -318,7 +315,7 @@ pub const Instruction = struct { } else null, - .rrm, .rrmi => unreachable, + .vmi, .rvm, .rvmi => unreachable, }; if (segment_override) |seg| { legacy.setSegmentOverride(seg); @@ -353,7 +350,7 @@ pub const Instruction = struct { rex.b = b_x_op.isBaseExtended(); rex.x = b_x_op.isIndexExtended(); }, - .rrm, .rrmi => unreachable, + .vmi, .rvm, .rvmi => unreachable, } try encoder.rex(rex); @@ -375,18 +372,19 @@ pub const Instruction = struct { switch (op_en) { .np, .i, .zi, .fd, .td, .d => {}, .o, .oi => vex.b = inst.ops[0].reg.isExtended(), - .m, .mi, .m1, .mc, .mr, .rm, .rmi, .mri, .mrc, .rrm, .rrmi => { + .m, .mi, .m1, .mc, .mr, .rm, .rmi, .mri, .mrc, .vmi, .rvm, .rvmi => { const r_op = switch (op_en) { - .rm, .rmi, .rrm, .rrmi => inst.ops[0], + .rm, .rmi, .rvm, .rvmi => inst.ops[0], .mr, .mri, .mrc => inst.ops[1], - else => .none, + .m, .mi, .m1, .mc, .vmi => .none, + else => unreachable, }; vex.r = r_op.isBaseExtended(); const b_x_op = switch (op_en) { - .rm, .rmi => inst.ops[1], + .rm, .rmi, .vmi => inst.ops[1], .m, .mi, .m1, .mc, .mr, .mri, .mrc => inst.ops[0], - .rrm, .rrmi => inst.ops[2], + .rvm, .rvmi => inst.ops[2], else => unreachable, }; vex.b = b_x_op.isBaseExtended(); @@ -417,7 +415,8 @@ pub const Instruction = struct { switch (op_en) { else => {}, - .rrm, .rrmi => vex.v = inst.ops[1].reg, + .vmi => vex.v = inst.ops[0].reg, + .rvm, .rvmi => vex.v = inst.ops[1].reg, } try encoder.vex(vex); @@ -515,8 +514,8 @@ pub const Instruction = struct { } fn encodeImm(imm: Immediate, kind: Encoding.Op, encoder: anytype) !void { - const raw = imm.asUnsigned(kind.bitSize()); - switch (kind.bitSize()) { + const raw = imm.asUnsigned(kind.immBitSize()); + switch (kind.immBitSize()) { 8 => try encoder.imm8(@intCast(u8, raw)), 16 => try encoder.imm16(@intCast(u16, raw)), 32 => try encoder.imm32(@intCast(u32, raw)), diff --git a/src/arch/x86_64/encodings.zig b/src/arch/x86_64/encodings.zig index 52b8cc29d6..5d2630e9a8 100644 --- a/src/arch/x86_64/encodings.zig +++ b/src/arch/x86_64/encodings.zig @@ -13,264 +13,264 @@ pub const Entry = struct { Mnemonic, OpEn, []const Op, []const u8, modrm_ext, Mo // zig fmt: off pub const table = [_]Entry{ // General-purpose - .{ .adc, .zi, &.{ .al, .imm8 }, &.{ 0x14 }, 0, .none, .none }, - .{ .adc, .zi, &.{ .ax, .imm16 }, &.{ 0x15 }, 0, .none, .none }, - .{ .adc, .zi, &.{ .eax, .imm32 }, &.{ 0x15 }, 0, .none, .none }, - .{ .adc, .zi, &.{ .rax, .imm32s }, &.{ 0x15 }, 0, .long, .none }, - .{ .adc, .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 2, .none, .none }, - .{ .adc, .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 2, .rex, .none }, - .{ .adc, .mi, &.{ .rm16, .imm16 }, &.{ 0x81 }, 2, .none, .none }, - .{ .adc, .mi, &.{ .rm32, .imm32 }, &.{ 0x81 }, 2, .none, .none }, - .{ .adc, .mi, &.{ .rm64, .imm32s }, &.{ 0x81 }, 2, .long, .none }, - .{ .adc, .mi, &.{ .rm16, .imm8s }, &.{ 0x83 }, 2, .none, .none }, - .{ .adc, .mi, &.{ .rm32, .imm8s }, &.{ 0x83 }, 2, .none, .none }, - .{ .adc, .mi, &.{ .rm64, .imm8s }, &.{ 0x83 }, 2, .long, .none }, - .{ .adc, .mr, &.{ .rm8, .r8 }, &.{ 0x10 }, 0, .none, .none }, - .{ .adc, .mr, &.{ .rm8, .r8 }, &.{ 0x10 }, 0, .rex, .none }, - .{ .adc, .mr, &.{ .rm16, .r16 }, &.{ 0x11 }, 0, .none, .none }, - .{ .adc, .mr, &.{ .rm32, .r32 }, &.{ 0x11 }, 0, .none, .none }, - .{ .adc, .mr, &.{ .rm64, .r64 }, &.{ 0x11 }, 0, .long, .none }, - .{ .adc, .rm, &.{ .r8, .rm8 }, &.{ 0x12 }, 0, .none, .none }, - .{ .adc, .rm, &.{ .r8, .rm8 }, &.{ 0x12 }, 0, .rex, .none }, - .{ .adc, .rm, &.{ .r16, .rm16 }, &.{ 0x13 }, 0, .none, .none }, - .{ .adc, .rm, &.{ .r32, .rm32 }, &.{ 0x13 }, 0, .none, .none }, - .{ .adc, .rm, &.{ .r64, .rm64 }, &.{ 0x13 }, 0, .long, .none }, + .{ .adc, .zi, &.{ .al, .imm8 }, &.{ 0x14 }, 0, .none, .none }, + .{ .adc, .zi, &.{ .ax, .imm16 }, &.{ 0x15 }, 0, .short, .none }, + .{ .adc, .zi, &.{ .eax, .imm32 }, &.{ 0x15 }, 0, .none, .none }, + .{ .adc, .zi, &.{ .rax, .imm32s }, &.{ 0x15 }, 0, .long, .none }, + .{ .adc, .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 2, .none, .none }, + .{ .adc, .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 2, .rex, .none }, + .{ .adc, .mi, &.{ .rm16, .imm16 }, &.{ 0x81 }, 2, .short, .none }, + .{ .adc, .mi, &.{ .rm32, .imm32 }, &.{ 0x81 }, 2, .none, .none }, + .{ .adc, .mi, &.{ .rm64, .imm32s }, &.{ 0x81 }, 2, .long, .none }, + .{ .adc, .mi, &.{ .rm16, .imm8s }, &.{ 0x83 }, 2, .short, .none }, + .{ .adc, .mi, &.{ .rm32, .imm8s }, &.{ 0x83 }, 2, .none, .none }, + .{ .adc, .mi, &.{ .rm64, .imm8s }, &.{ 0x83 }, 2, .long, .none }, + .{ .adc, .mr, &.{ .rm8, .r8 }, &.{ 0x10 }, 0, .none, .none }, + .{ .adc, .mr, &.{ .rm8, .r8 }, &.{ 0x10 }, 0, .rex, .none }, + .{ .adc, .mr, &.{ .rm16, .r16 }, &.{ 0x11 }, 0, .short, .none }, + .{ .adc, .mr, &.{ .rm32, .r32 }, &.{ 0x11 }, 0, .none, .none }, + .{ .adc, .mr, &.{ .rm64, .r64 }, &.{ 0x11 }, 0, .long, .none }, + .{ .adc, .rm, &.{ .r8, .rm8 }, &.{ 0x12 }, 0, .none, .none }, + .{ .adc, .rm, &.{ .r8, .rm8 }, &.{ 0x12 }, 0, .rex, .none }, + .{ .adc, .rm, &.{ .r16, .rm16 }, &.{ 0x13 }, 0, .short, .none }, + .{ .adc, .rm, &.{ .r32, .rm32 }, &.{ 0x13 }, 0, .none, .none }, + .{ .adc, .rm, &.{ .r64, .rm64 }, &.{ 0x13 }, 0, .long, .none }, - .{ .add, .zi, &.{ .al, .imm8 }, &.{ 0x04 }, 0, .none, .none }, - .{ .add, .zi, &.{ .ax, .imm16 }, &.{ 0x05 }, 0, .none, .none }, - .{ .add, .zi, &.{ .eax, .imm32 }, &.{ 0x05 }, 0, .none, .none }, - .{ .add, .zi, &.{ .rax, .imm32s }, &.{ 0x05 }, 0, .long, .none }, - .{ .add, .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 0, .none, .none }, - .{ .add, .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 0, .rex, .none }, - .{ .add, .mi, &.{ .rm16, .imm16 }, &.{ 0x81 }, 0, .none, .none }, - .{ .add, .mi, &.{ .rm32, .imm32 }, &.{ 0x81 }, 0, .none, .none }, - .{ .add, .mi, &.{ .rm64, .imm32s }, &.{ 0x81 }, 0, .long, .none }, - .{ .add, .mi, &.{ .rm16, .imm8s }, &.{ 0x83 }, 0, .none, .none }, - .{ .add, .mi, &.{ .rm32, .imm8s }, &.{ 0x83 }, 0, .none, .none }, - .{ .add, .mi, &.{ .rm64, .imm8s }, &.{ 0x83 }, 0, .long, .none }, - .{ .add, .mr, &.{ .rm8, .r8 }, &.{ 0x00 }, 0, .none, .none }, - .{ .add, .mr, &.{ .rm8, .r8 }, &.{ 0x00 }, 0, .rex, .none }, - .{ .add, .mr, &.{ .rm16, .r16 }, &.{ 0x01 }, 0, .none, .none }, - .{ .add, .mr, &.{ .rm32, .r32 }, &.{ 0x01 }, 0, .none, .none }, - .{ .add, .mr, &.{ .rm64, .r64 }, &.{ 0x01 }, 0, .long, .none }, - .{ .add, .rm, &.{ .r8, .rm8 }, &.{ 0x02 }, 0, .none, .none }, - .{ .add, .rm, &.{ .r8, .rm8 }, &.{ 0x02 }, 0, .rex, .none }, - .{ .add, .rm, &.{ .r16, .rm16 }, &.{ 0x03 }, 0, .none, .none }, - .{ .add, .rm, &.{ .r32, .rm32 }, &.{ 0x03 }, 0, .none, .none }, - .{ .add, .rm, &.{ .r64, .rm64 }, &.{ 0x03 }, 0, .long, .none }, + .{ .add, .zi, &.{ .al, .imm8 }, &.{ 0x04 }, 0, .none, .none }, + .{ .add, .zi, &.{ .ax, .imm16 }, &.{ 0x05 }, 0, .short, .none }, + .{ .add, .zi, &.{ .eax, .imm32 }, &.{ 0x05 }, 0, .none, .none }, + .{ .add, .zi, &.{ .rax, .imm32s }, &.{ 0x05 }, 0, .long, .none }, + .{ .add, .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 0, .none, .none }, + .{ .add, .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 0, .rex, .none }, + .{ .add, .mi, &.{ .rm16, .imm16 }, &.{ 0x81 }, 0, .short, .none }, + .{ .add, .mi, &.{ .rm32, .imm32 }, &.{ 0x81 }, 0, .none, .none }, + .{ .add, .mi, &.{ .rm64, .imm32s }, &.{ 0x81 }, 0, .long, .none }, + .{ .add, .mi, &.{ .rm16, .imm8s }, &.{ 0x83 }, 0, .short, .none }, + .{ .add, .mi, &.{ .rm32, .imm8s }, &.{ 0x83 }, 0, .none, .none }, + .{ .add, .mi, &.{ .rm64, .imm8s }, &.{ 0x83 }, 0, .long, .none }, + .{ .add, .mr, &.{ .rm8, .r8 }, &.{ 0x00 }, 0, .none, .none }, + .{ .add, .mr, &.{ .rm8, .r8 }, &.{ 0x00 }, 0, .rex, .none }, + .{ .add, .mr, &.{ .rm16, .r16 }, &.{ 0x01 }, 0, .short, .none }, + .{ .add, .mr, &.{ .rm32, .r32 }, &.{ 0x01 }, 0, .none, .none }, + .{ .add, .mr, &.{ .rm64, .r64 }, &.{ 0x01 }, 0, .long, .none }, + .{ .add, .rm, &.{ .r8, .rm8 }, &.{ 0x02 }, 0, .none, .none }, + .{ .add, .rm, &.{ .r8, .rm8 }, &.{ 0x02 }, 0, .rex, .none }, + .{ .add, .rm, &.{ .r16, .rm16 }, &.{ 0x03 }, 0, .short, .none }, + .{ .add, .rm, &.{ .r32, .rm32 }, &.{ 0x03 }, 0, .none, .none }, + .{ .add, .rm, &.{ .r64, .rm64 }, &.{ 0x03 }, 0, .long, .none }, - .{ .@"and", .zi, &.{ .al, .imm8 }, &.{ 0x24 }, 0, .none, .none }, - .{ .@"and", .zi, &.{ .ax, .imm16 }, &.{ 0x25 }, 0, .none, .none }, - .{ .@"and", .zi, &.{ .eax, .imm32 }, &.{ 0x25 }, 0, .none, .none }, - .{ .@"and", .zi, &.{ .rax, .imm32s }, &.{ 0x25 }, 0, .long, .none }, - .{ .@"and", .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 4, .none, .none }, - .{ .@"and", .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 4, .rex, .none }, - .{ .@"and", .mi, &.{ .rm16, .imm16 }, &.{ 0x81 }, 4, .none, .none }, - .{ .@"and", .mi, &.{ .rm32, .imm32 }, &.{ 0x81 }, 4, .none, .none }, - .{ .@"and", .mi, &.{ .rm64, .imm32s }, &.{ 0x81 }, 4, .long, .none }, - .{ .@"and", .mi, &.{ .rm16, .imm8s }, &.{ 0x83 }, 4, .none, .none }, - .{ .@"and", .mi, &.{ .rm32, .imm8s }, &.{ 0x83 }, 4, .none, .none }, - .{ .@"and", .mi, &.{ .rm64, .imm8s }, &.{ 0x83 }, 4, .long, .none }, - .{ .@"and", .mr, &.{ .rm8, .r8 }, &.{ 0x20 }, 0, .none, .none }, - .{ .@"and", .mr, &.{ .rm8, .r8 }, &.{ 0x20 }, 0, .rex, .none }, - .{ .@"and", .mr, &.{ .rm16, .r16 }, &.{ 0x21 }, 0, .none, .none }, - .{ .@"and", .mr, &.{ .rm32, .r32 }, &.{ 0x21 }, 0, .none, .none }, - .{ .@"and", .mr, &.{ .rm64, .r64 }, &.{ 0x21 }, 0, .long, .none }, - .{ .@"and", .rm, &.{ .r8, .rm8 }, &.{ 0x22 }, 0, .none, .none }, - .{ .@"and", .rm, &.{ .r8, .rm8 }, &.{ 0x22 }, 0, .rex, .none }, - .{ .@"and", .rm, &.{ .r16, .rm16 }, &.{ 0x23 }, 0, .none, .none }, - .{ .@"and", .rm, &.{ .r32, .rm32 }, &.{ 0x23 }, 0, .none, .none }, - .{ .@"and", .rm, &.{ .r64, .rm64 }, &.{ 0x23 }, 0, .long, .none }, + .{ .@"and", .zi, &.{ .al, .imm8 }, &.{ 0x24 }, 0, .none, .none }, + .{ .@"and", .zi, &.{ .ax, .imm16 }, &.{ 0x25 }, 0, .short, .none }, + .{ .@"and", .zi, &.{ .eax, .imm32 }, &.{ 0x25 }, 0, .none, .none }, + .{ .@"and", .zi, &.{ .rax, .imm32s }, &.{ 0x25 }, 0, .long, .none }, + .{ .@"and", .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 4, .none, .none }, + .{ .@"and", .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 4, .rex, .none }, + .{ .@"and", .mi, &.{ .rm16, .imm16 }, &.{ 0x81 }, 4, .short, .none }, + .{ .@"and", .mi, &.{ .rm32, .imm32 }, &.{ 0x81 }, 4, .none, .none }, + .{ .@"and", .mi, &.{ .rm64, .imm32s }, &.{ 0x81 }, 4, .long, .none }, + .{ .@"and", .mi, &.{ .rm16, .imm8s }, &.{ 0x83 }, 4, .short, .none }, + .{ .@"and", .mi, &.{ .rm32, .imm8s }, &.{ 0x83 }, 4, .none, .none }, + .{ .@"and", .mi, &.{ .rm64, .imm8s }, &.{ 0x83 }, 4, .long, .none }, + .{ .@"and", .mr, &.{ .rm8, .r8 }, &.{ 0x20 }, 0, .none, .none }, + .{ .@"and", .mr, &.{ .rm8, .r8 }, &.{ 0x20 }, 0, .rex, .none }, + .{ .@"and", .mr, &.{ .rm16, .r16 }, &.{ 0x21 }, 0, .short, .none }, + .{ .@"and", .mr, &.{ .rm32, .r32 }, &.{ 0x21 }, 0, .none, .none }, + .{ .@"and", .mr, &.{ .rm64, .r64 }, &.{ 0x21 }, 0, .long, .none }, + .{ .@"and", .rm, &.{ .r8, .rm8 }, &.{ 0x22 }, 0, .none, .none }, + .{ .@"and", .rm, &.{ .r8, .rm8 }, &.{ 0x22 }, 0, .rex, .none }, + .{ .@"and", .rm, &.{ .r16, .rm16 }, &.{ 0x23 }, 0, .short, .none }, + .{ .@"and", .rm, &.{ .r32, .rm32 }, &.{ 0x23 }, 0, .none, .none }, + .{ .@"and", .rm, &.{ .r64, .rm64 }, &.{ 0x23 }, 0, .long, .none }, - .{ .bsf, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0xbc }, 0, .none, .none }, - .{ .bsf, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0xbc }, 0, .none, .none }, - .{ .bsf, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0xbc }, 0, .long, .none }, + .{ .bsf, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0xbc }, 0, .short, .none }, + .{ .bsf, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0xbc }, 0, .none, .none }, + .{ .bsf, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0xbc }, 0, .long, .none }, - .{ .bsr, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0xbd }, 0, .none, .none }, - .{ .bsr, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0xbd }, 0, .none, .none }, - .{ .bsr, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0xbd }, 0, .long, .none }, + .{ .bsr, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0xbd }, 0, .short, .none }, + .{ .bsr, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0xbd }, 0, .none, .none }, + .{ .bsr, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0xbd }, 0, .long, .none }, .{ .bswap, .o, &.{ .r32 }, &.{ 0x0f, 0xc8 }, 0, .none, .none }, .{ .bswap, .o, &.{ .r64 }, &.{ 0x0f, 0xc8 }, 0, .long, .none }, - .{ .bt, .mr, &.{ .rm16, .r16 }, &.{ 0x0f, 0xa3 }, 0, .none, .none }, - .{ .bt, .mr, &.{ .rm32, .r32 }, &.{ 0x0f, 0xa3 }, 0, .none, .none }, - .{ .bt, .mr, &.{ .rm64, .r64 }, &.{ 0x0f, 0xa3 }, 0, .long, .none }, - .{ .bt, .mi, &.{ .rm16, .imm8 }, &.{ 0x0f, 0xba }, 4, .none, .none }, - .{ .bt, .mi, &.{ .rm32, .imm8 }, &.{ 0x0f, 0xba }, 4, .none, .none }, - .{ .bt, .mi, &.{ .rm64, .imm8 }, &.{ 0x0f, 0xba }, 4, .long, .none }, + .{ .bt, .mr, &.{ .rm16, .r16 }, &.{ 0x0f, 0xa3 }, 0, .short, .none }, + .{ .bt, .mr, &.{ .rm32, .r32 }, &.{ 0x0f, 0xa3 }, 0, .none, .none }, + .{ .bt, .mr, &.{ .rm64, .r64 }, &.{ 0x0f, 0xa3 }, 0, .long, .none }, + .{ .bt, .mi, &.{ .rm16, .imm8 }, &.{ 0x0f, 0xba }, 4, .short, .none }, + .{ .bt, .mi, &.{ .rm32, .imm8 }, &.{ 0x0f, 0xba }, 4, .none, .none }, + .{ .bt, .mi, &.{ .rm64, .imm8 }, &.{ 0x0f, 0xba }, 4, .long, .none }, - .{ .btc, .mr, &.{ .rm16, .r16 }, &.{ 0x0f, 0xbb }, 0, .none, .none }, - .{ .btc, .mr, &.{ .rm32, .r32 }, &.{ 0x0f, 0xbb }, 0, .none, .none }, - .{ .btc, .mr, &.{ .rm64, .r64 }, &.{ 0x0f, 0xbb }, 0, .long, .none }, - .{ .btc, .mi, &.{ .rm16, .imm8 }, &.{ 0x0f, 0xba }, 7, .none, .none }, - .{ .btc, .mi, &.{ .rm32, .imm8 }, &.{ 0x0f, 0xba }, 7, .none, .none }, - .{ .btc, .mi, &.{ .rm64, .imm8 }, &.{ 0x0f, 0xba }, 7, .long, .none }, + .{ .btc, .mr, &.{ .rm16, .r16 }, &.{ 0x0f, 0xbb }, 0, .short, .none }, + .{ .btc, .mr, &.{ .rm32, .r32 }, &.{ 0x0f, 0xbb }, 0, .none, .none }, + .{ .btc, .mr, &.{ .rm64, .r64 }, &.{ 0x0f, 0xbb }, 0, .long, .none }, + .{ .btc, .mi, &.{ .rm16, .imm8 }, &.{ 0x0f, 0xba }, 7, .short, .none }, + .{ .btc, .mi, &.{ .rm32, .imm8 }, &.{ 0x0f, 0xba }, 7, .none, .none }, + .{ .btc, .mi, &.{ .rm64, .imm8 }, &.{ 0x0f, 0xba }, 7, .long, .none }, - .{ .btr, .mr, &.{ .rm16, .r16 }, &.{ 0x0f, 0xb3 }, 0, .none, .none }, - .{ .btr, .mr, &.{ .rm32, .r32 }, &.{ 0x0f, 0xb3 }, 0, .none, .none }, - .{ .btr, .mr, &.{ .rm64, .r64 }, &.{ 0x0f, 0xb3 }, 0, .long, .none }, - .{ .btr, .mi, &.{ .rm16, .imm8 }, &.{ 0x0f, 0xba }, 6, .none, .none }, - .{ .btr, .mi, &.{ .rm32, .imm8 }, &.{ 0x0f, 0xba }, 6, .none, .none }, - .{ .btr, .mi, &.{ .rm64, .imm8 }, &.{ 0x0f, 0xba }, 6, .long, .none }, + .{ .btr, .mr, &.{ .rm16, .r16 }, &.{ 0x0f, 0xb3 }, 0, .short, .none }, + .{ .btr, .mr, &.{ .rm32, .r32 }, &.{ 0x0f, 0xb3 }, 0, .none, .none }, + .{ .btr, .mr, &.{ .rm64, .r64 }, &.{ 0x0f, 0xb3 }, 0, .long, .none }, + .{ .btr, .mi, &.{ .rm16, .imm8 }, &.{ 0x0f, 0xba }, 6, .short, .none }, + .{ .btr, .mi, &.{ .rm32, .imm8 }, &.{ 0x0f, 0xba }, 6, .none, .none }, + .{ .btr, .mi, &.{ .rm64, .imm8 }, &.{ 0x0f, 0xba }, 6, .long, .none }, - .{ .bts, .mr, &.{ .rm16, .r16 }, &.{ 0x0f, 0xab }, 0, .none, .none }, - .{ .bts, .mr, &.{ .rm32, .r32 }, &.{ 0x0f, 0xab }, 0, .none, .none }, - .{ .bts, .mr, &.{ .rm64, .r64 }, &.{ 0x0f, 0xab }, 0, .long, .none }, - .{ .bts, .mi, &.{ .rm16, .imm8 }, &.{ 0x0f, 0xba }, 5, .none, .none }, - .{ .bts, .mi, &.{ .rm32, .imm8 }, &.{ 0x0f, 0xba }, 5, .none, .none }, - .{ .bts, .mi, &.{ .rm64, .imm8 }, &.{ 0x0f, 0xba }, 5, .long, .none }, + .{ .bts, .mr, &.{ .rm16, .r16 }, &.{ 0x0f, 0xab }, 0, .short, .none }, + .{ .bts, .mr, &.{ .rm32, .r32 }, &.{ 0x0f, 0xab }, 0, .none, .none }, + .{ .bts, .mr, &.{ .rm64, .r64 }, &.{ 0x0f, 0xab }, 0, .long, .none }, + .{ .bts, .mi, &.{ .rm16, .imm8 }, &.{ 0x0f, 0xba }, 5, .short, .none }, + .{ .bts, .mi, &.{ .rm32, .imm8 }, &.{ 0x0f, 0xba }, 5, .none, .none }, + .{ .bts, .mi, &.{ .rm64, .imm8 }, &.{ 0x0f, 0xba }, 5, .long, .none }, // This is M encoding according to Intel, but D makes more sense here. .{ .call, .d, &.{ .rel32 }, &.{ 0xe8 }, 0, .none, .none }, .{ .call, .m, &.{ .rm64 }, &.{ 0xff }, 2, .none, .none }, - .{ .cbw, .np, &.{ .o16 }, &.{ 0x98 }, 0, .none, .none }, - .{ .cwde, .np, &.{ .o32 }, &.{ 0x98 }, 0, .none, .none }, - .{ .cdqe, .np, &.{ .o64 }, &.{ 0x98 }, 0, .long, .none }, + .{ .cbw, .np, &.{ .o16 }, &.{ 0x98 }, 0, .short, .none }, + .{ .cwde, .np, &.{ .o32 }, &.{ 0x98 }, 0, .none, .none }, + .{ .cdqe, .np, &.{ .o64 }, &.{ 0x98 }, 0, .long, .none }, - .{ .cwd, .np, &.{ .o16 }, &.{ 0x99 }, 0, .none, .none }, - .{ .cdq, .np, &.{ .o32 }, &.{ 0x99 }, 0, .none, .none }, - .{ .cqo, .np, &.{ .o64 }, &.{ 0x99 }, 0, .long, .none }, + .{ .cwd, .np, &.{ .o16 }, &.{ 0x99 }, 0, .short, .none }, + .{ .cdq, .np, &.{ .o32 }, &.{ 0x99 }, 0, .none, .none }, + .{ .cqo, .np, &.{ .o64 }, &.{ 0x99 }, 0, .long, .none }, - .{ .cmova, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x47 }, 0, .none, .none }, - .{ .cmova, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x47 }, 0, .none, .none }, - .{ .cmova, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x47 }, 0, .long, .none }, - .{ .cmovae, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x43 }, 0, .none, .none }, - .{ .cmovae, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x43 }, 0, .none, .none }, - .{ .cmovae, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x43 }, 0, .long, .none }, - .{ .cmovb, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x42 }, 0, .none, .none }, - .{ .cmovb, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x42 }, 0, .none, .none }, - .{ .cmovb, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x42 }, 0, .long, .none }, - .{ .cmovbe, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x46 }, 0, .none, .none }, - .{ .cmovbe, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x46 }, 0, .none, .none }, - .{ .cmovbe, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x46 }, 0, .long, .none }, - .{ .cmovc, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x42 }, 0, .none, .none }, - .{ .cmovc, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x42 }, 0, .none, .none }, - .{ .cmovc, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x42 }, 0, .long, .none }, - .{ .cmove, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x44 }, 0, .none, .none }, - .{ .cmove, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x44 }, 0, .none, .none }, - .{ .cmove, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x44 }, 0, .long, .none }, - .{ .cmovg, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4f }, 0, .none, .none }, - .{ .cmovg, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4f }, 0, .none, .none }, - .{ .cmovg, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4f }, 0, .long, .none }, - .{ .cmovge, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4d }, 0, .none, .none }, - .{ .cmovge, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4d }, 0, .none, .none }, - .{ .cmovge, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4d }, 0, .long, .none }, - .{ .cmovl, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4c }, 0, .none, .none }, - .{ .cmovl, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4c }, 0, .none, .none }, - .{ .cmovl, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4c }, 0, .long, .none }, - .{ .cmovle, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4e }, 0, .none, .none }, - .{ .cmovle, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4e }, 0, .none, .none }, - .{ .cmovle, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4e }, 0, .long, .none }, - .{ .cmovna, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x46 }, 0, .none, .none }, - .{ .cmovna, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x46 }, 0, .none, .none }, - .{ .cmovna, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x46 }, 0, .long, .none }, - .{ .cmovnae, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x42 }, 0, .none, .none }, - .{ .cmovnae, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x42 }, 0, .none, .none }, - .{ .cmovnae, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x42 }, 0, .long, .none }, - .{ .cmovnb, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x43 }, 0, .none, .none }, - .{ .cmovnb, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x43 }, 0, .none, .none }, - .{ .cmovnb, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x43 }, 0, .long, .none }, - .{ .cmovnbe, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x47 }, 0, .none, .none }, - .{ .cmovnbe, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x47 }, 0, .none, .none }, - .{ .cmovnbe, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x47 }, 0, .long, .none }, - .{ .cmovnc, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x43 }, 0, .none, .none }, - .{ .cmovnc, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x43 }, 0, .none, .none }, - .{ .cmovnc, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x43 }, 0, .long, .none }, - .{ .cmovne, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x45 }, 0, .none, .none }, - .{ .cmovne, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x45 }, 0, .none, .none }, - .{ .cmovne, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x45 }, 0, .long, .none }, - .{ .cmovng, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4e }, 0, .none, .none }, - .{ .cmovng, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4e }, 0, .none, .none }, - .{ .cmovng, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4e }, 0, .long, .none }, - .{ .cmovnge, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4c }, 0, .none, .none }, - .{ .cmovnge, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4c }, 0, .none, .none }, - .{ .cmovnge, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4c }, 0, .long, .none }, - .{ .cmovnl, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4d }, 0, .none, .none }, - .{ .cmovnl, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4d }, 0, .none, .none }, - .{ .cmovnl, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4d }, 0, .long, .none }, - .{ .cmovnle, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4f }, 0, .none, .none }, - .{ .cmovnle, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4f }, 0, .none, .none }, - .{ .cmovnle, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4f }, 0, .long, .none }, - .{ .cmovno, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x41 }, 0, .none, .none }, - .{ .cmovno, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x41 }, 0, .none, .none }, - .{ .cmovno, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x41 }, 0, .long, .none }, - .{ .cmovnp, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4b }, 0, .none, .none }, - .{ .cmovnp, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4b }, 0, .none, .none }, - .{ .cmovnp, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4b }, 0, .long, .none }, - .{ .cmovns, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x49 }, 0, .none, .none }, - .{ .cmovns, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x49 }, 0, .none, .none }, - .{ .cmovns, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x49 }, 0, .long, .none }, - .{ .cmovnz, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x45 }, 0, .none, .none }, - .{ .cmovnz, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x45 }, 0, .none, .none }, - .{ .cmovnz, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x45 }, 0, .long, .none }, - .{ .cmovo, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x40 }, 0, .none, .none }, - .{ .cmovo, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x40 }, 0, .none, .none }, - .{ .cmovo, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x40 }, 0, .long, .none }, - .{ .cmovp, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4a }, 0, .none, .none }, - .{ .cmovp, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4a }, 0, .none, .none }, - .{ .cmovp, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4a }, 0, .long, .none }, - .{ .cmovpe, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4a }, 0, .none, .none }, - .{ .cmovpe, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4a }, 0, .none, .none }, - .{ .cmovpe, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4a }, 0, .long, .none }, - .{ .cmovpo, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4b }, 0, .none, .none }, - .{ .cmovpo, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4b }, 0, .none, .none }, - .{ .cmovpo, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4b }, 0, .long, .none }, - .{ .cmovs, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x48 }, 0, .none, .none }, - .{ .cmovs, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x48 }, 0, .none, .none }, - .{ .cmovs, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x48 }, 0, .long, .none }, - .{ .cmovz, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x44 }, 0, .none, .none }, - .{ .cmovz, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x44 }, 0, .none, .none }, - .{ .cmovz, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x44 }, 0, .long, .none }, + .{ .cmova, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x47 }, 0, .short, .none }, + .{ .cmova, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x47 }, 0, .none, .none }, + .{ .cmova, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x47 }, 0, .long, .none }, + .{ .cmovae, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x43 }, 0, .short, .none }, + .{ .cmovae, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x43 }, 0, .none, .none }, + .{ .cmovae, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x43 }, 0, .long, .none }, + .{ .cmovb, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x42 }, 0, .short, .none }, + .{ .cmovb, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x42 }, 0, .none, .none }, + .{ .cmovb, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x42 }, 0, .long, .none }, + .{ .cmovbe, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x46 }, 0, .short, .none }, + .{ .cmovbe, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x46 }, 0, .none, .none }, + .{ .cmovbe, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x46 }, 0, .long, .none }, + .{ .cmovc, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x42 }, 0, .short, .none }, + .{ .cmovc, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x42 }, 0, .none, .none }, + .{ .cmovc, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x42 }, 0, .long, .none }, + .{ .cmove, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x44 }, 0, .short, .none }, + .{ .cmove, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x44 }, 0, .none, .none }, + .{ .cmove, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x44 }, 0, .long, .none }, + .{ .cmovg, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4f }, 0, .short, .none }, + .{ .cmovg, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4f }, 0, .none, .none }, + .{ .cmovg, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4f }, 0, .long, .none }, + .{ .cmovge, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4d }, 0, .short, .none }, + .{ .cmovge, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4d }, 0, .none, .none }, + .{ .cmovge, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4d }, 0, .long, .none }, + .{ .cmovl, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4c }, 0, .short, .none }, + .{ .cmovl, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4c }, 0, .none, .none }, + .{ .cmovl, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4c }, 0, .long, .none }, + .{ .cmovle, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4e }, 0, .short, .none }, + .{ .cmovle, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4e }, 0, .none, .none }, + .{ .cmovle, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4e }, 0, .long, .none }, + .{ .cmovna, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x46 }, 0, .short, .none }, + .{ .cmovna, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x46 }, 0, .none, .none }, + .{ .cmovna, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x46 }, 0, .long, .none }, + .{ .cmovnae, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x42 }, 0, .short, .none }, + .{ .cmovnae, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x42 }, 0, .none, .none }, + .{ .cmovnae, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x42 }, 0, .long, .none }, + .{ .cmovnb, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x43 }, 0, .short, .none }, + .{ .cmovnb, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x43 }, 0, .none, .none }, + .{ .cmovnb, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x43 }, 0, .long, .none }, + .{ .cmovnbe, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x47 }, 0, .short, .none }, + .{ .cmovnbe, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x47 }, 0, .none, .none }, + .{ .cmovnbe, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x47 }, 0, .long, .none }, + .{ .cmovnc, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x43 }, 0, .short, .none }, + .{ .cmovnc, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x43 }, 0, .none, .none }, + .{ .cmovnc, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x43 }, 0, .long, .none }, + .{ .cmovne, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x45 }, 0, .short, .none }, + .{ .cmovne, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x45 }, 0, .none, .none }, + .{ .cmovne, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x45 }, 0, .long, .none }, + .{ .cmovng, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4e }, 0, .short, .none }, + .{ .cmovng, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4e }, 0, .none, .none }, + .{ .cmovng, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4e }, 0, .long, .none }, + .{ .cmovnge, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4c }, 0, .short, .none }, + .{ .cmovnge, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4c }, 0, .none, .none }, + .{ .cmovnge, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4c }, 0, .long, .none }, + .{ .cmovnl, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4d }, 0, .short, .none }, + .{ .cmovnl, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4d }, 0, .none, .none }, + .{ .cmovnl, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4d }, 0, .long, .none }, + .{ .cmovnle, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4f }, 0, .short, .none }, + .{ .cmovnle, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4f }, 0, .none, .none }, + .{ .cmovnle, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4f }, 0, .long, .none }, + .{ .cmovno, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x41 }, 0, .short, .none }, + .{ .cmovno, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x41 }, 0, .none, .none }, + .{ .cmovno, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x41 }, 0, .long, .none }, + .{ .cmovnp, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4b }, 0, .short, .none }, + .{ .cmovnp, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4b }, 0, .none, .none }, + .{ .cmovnp, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4b }, 0, .long, .none }, + .{ .cmovns, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x49 }, 0, .short, .none }, + .{ .cmovns, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x49 }, 0, .none, .none }, + .{ .cmovns, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x49 }, 0, .long, .none }, + .{ .cmovnz, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x45 }, 0, .short, .none }, + .{ .cmovnz, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x45 }, 0, .none, .none }, + .{ .cmovnz, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x45 }, 0, .long, .none }, + .{ .cmovo, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x40 }, 0, .short, .none }, + .{ .cmovo, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x40 }, 0, .none, .none }, + .{ .cmovo, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x40 }, 0, .long, .none }, + .{ .cmovp, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4a }, 0, .short, .none }, + .{ .cmovp, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4a }, 0, .none, .none }, + .{ .cmovp, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4a }, 0, .long, .none }, + .{ .cmovpe, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4a }, 0, .short, .none }, + .{ .cmovpe, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4a }, 0, .none, .none }, + .{ .cmovpe, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4a }, 0, .long, .none }, + .{ .cmovpo, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4b }, 0, .short, .none }, + .{ .cmovpo, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4b }, 0, .none, .none }, + .{ .cmovpo, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4b }, 0, .long, .none }, + .{ .cmovs, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x48 }, 0, .short, .none }, + .{ .cmovs, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x48 }, 0, .none, .none }, + .{ .cmovs, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x48 }, 0, .long, .none }, + .{ .cmovz, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x44 }, 0, .short, .none }, + .{ .cmovz, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x44 }, 0, .none, .none }, + .{ .cmovz, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x44 }, 0, .long, .none }, - .{ .cmp, .zi, &.{ .al, .imm8 }, &.{ 0x3c }, 0, .none, .none }, - .{ .cmp, .zi, &.{ .ax, .imm16 }, &.{ 0x3d }, 0, .none, .none }, - .{ .cmp, .zi, &.{ .eax, .imm32 }, &.{ 0x3d }, 0, .none, .none }, - .{ .cmp, .zi, &.{ .rax, .imm32s }, &.{ 0x3d }, 0, .long, .none }, - .{ .cmp, .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 7, .none, .none }, - .{ .cmp, .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 7, .rex, .none }, - .{ .cmp, .mi, &.{ .rm16, .imm16 }, &.{ 0x81 }, 7, .none, .none }, - .{ .cmp, .mi, &.{ .rm32, .imm32 }, &.{ 0x81 }, 7, .none, .none }, - .{ .cmp, .mi, &.{ .rm64, .imm32s }, &.{ 0x81 }, 7, .long, .none }, - .{ .cmp, .mi, &.{ .rm16, .imm8s }, &.{ 0x83 }, 7, .none, .none }, - .{ .cmp, .mi, &.{ .rm32, .imm8s }, &.{ 0x83 }, 7, .none, .none }, - .{ .cmp, .mi, &.{ .rm64, .imm8s }, &.{ 0x83 }, 7, .long, .none }, - .{ .cmp, .mr, &.{ .rm8, .r8 }, &.{ 0x38 }, 0, .none, .none }, - .{ .cmp, .mr, &.{ .rm8, .r8 }, &.{ 0x38 }, 0, .rex, .none }, - .{ .cmp, .mr, &.{ .rm16, .r16 }, &.{ 0x39 }, 0, .none, .none }, - .{ .cmp, .mr, &.{ .rm32, .r32 }, &.{ 0x39 }, 0, .none, .none }, - .{ .cmp, .mr, &.{ .rm64, .r64 }, &.{ 0x39 }, 0, .long, .none }, - .{ .cmp, .rm, &.{ .r8, .rm8 }, &.{ 0x3a }, 0, .none, .none }, - .{ .cmp, .rm, &.{ .r8, .rm8 }, &.{ 0x3a }, 0, .rex, .none }, - .{ .cmp, .rm, &.{ .r16, .rm16 }, &.{ 0x3b }, 0, .none, .none }, - .{ .cmp, .rm, &.{ .r32, .rm32 }, &.{ 0x3b }, 0, .none, .none }, - .{ .cmp, .rm, &.{ .r64, .rm64 }, &.{ 0x3b }, 0, .long, .none }, + .{ .cmp, .zi, &.{ .al, .imm8 }, &.{ 0x3c }, 0, .none, .none }, + .{ .cmp, .zi, &.{ .ax, .imm16 }, &.{ 0x3d }, 0, .short, .none }, + .{ .cmp, .zi, &.{ .eax, .imm32 }, &.{ 0x3d }, 0, .none, .none }, + .{ .cmp, .zi, &.{ .rax, .imm32s }, &.{ 0x3d }, 0, .long, .none }, + .{ .cmp, .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 7, .none, .none }, + .{ .cmp, .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 7, .rex, .none }, + .{ .cmp, .mi, &.{ .rm16, .imm16 }, &.{ 0x81 }, 7, .short, .none }, + .{ .cmp, .mi, &.{ .rm32, .imm32 }, &.{ 0x81 }, 7, .none, .none }, + .{ .cmp, .mi, &.{ .rm64, .imm32s }, &.{ 0x81 }, 7, .long, .none }, + .{ .cmp, .mi, &.{ .rm16, .imm8s }, &.{ 0x83 }, 7, .short, .none }, + .{ .cmp, .mi, &.{ .rm32, .imm8s }, &.{ 0x83 }, 7, .none, .none }, + .{ .cmp, .mi, &.{ .rm64, .imm8s }, &.{ 0x83 }, 7, .long, .none }, + .{ .cmp, .mr, &.{ .rm8, .r8 }, &.{ 0x38 }, 0, .none, .none }, + .{ .cmp, .mr, &.{ .rm8, .r8 }, &.{ 0x38 }, 0, .rex, .none }, + .{ .cmp, .mr, &.{ .rm16, .r16 }, &.{ 0x39 }, 0, .short, .none }, + .{ .cmp, .mr, &.{ .rm32, .r32 }, &.{ 0x39 }, 0, .none, .none }, + .{ .cmp, .mr, &.{ .rm64, .r64 }, &.{ 0x39 }, 0, .long, .none }, + .{ .cmp, .rm, &.{ .r8, .rm8 }, &.{ 0x3a }, 0, .none, .none }, + .{ .cmp, .rm, &.{ .r8, .rm8 }, &.{ 0x3a }, 0, .rex, .none }, + .{ .cmp, .rm, &.{ .r16, .rm16 }, &.{ 0x3b }, 0, .short, .none }, + .{ .cmp, .rm, &.{ .r32, .rm32 }, &.{ 0x3b }, 0, .none, .none }, + .{ .cmp, .rm, &.{ .r64, .rm64 }, &.{ 0x3b }, 0, .long, .none }, - .{ .cmps, .np, &.{ .m8, .m8 }, &.{ 0xa6 }, 0, .none, .none }, - .{ .cmps, .np, &.{ .m16, .m16 }, &.{ 0xa7 }, 0, .none, .none }, - .{ .cmps, .np, &.{ .m32, .m32 }, &.{ 0xa7 }, 0, .none, .none }, - .{ .cmps, .np, &.{ .m64, .m64 }, &.{ 0xa7 }, 0, .long, .none }, + .{ .cmps, .np, &.{ .m8, .m8 }, &.{ 0xa6 }, 0, .none, .none }, + .{ .cmps, .np, &.{ .m16, .m16 }, &.{ 0xa7 }, 0, .short, .none }, + .{ .cmps, .np, &.{ .m32, .m32 }, &.{ 0xa7 }, 0, .none, .none }, + .{ .cmps, .np, &.{ .m64, .m64 }, &.{ 0xa7 }, 0, .long, .none }, .{ .cmpsb, .np, &.{}, &.{ 0xa6 }, 0, .none, .none }, .{ .cmpsw, .np, &.{}, &.{ 0xa7 }, 0, .short, .none }, .{ .cmpsd, .np, &.{}, &.{ 0xa7 }, 0, .none, .none }, .{ .cmpsq, .np, &.{}, &.{ 0xa7 }, 0, .long, .none }, - .{ .cmpxchg, .mr, &.{ .rm8, .r8 }, &.{ 0x0f, 0xb0 }, 0, .none, .none }, - .{ .cmpxchg, .mr, &.{ .rm8, .r8 }, &.{ 0x0f, 0xb0 }, 0, .rex, .none }, - .{ .cmpxchg, .mr, &.{ .rm16, .r16 }, &.{ 0x0f, 0xb1 }, 0, .none, .none }, - .{ .cmpxchg, .mr, &.{ .rm32, .r32 }, &.{ 0x0f, 0xb1 }, 0, .none, .none }, - .{ .cmpxchg, .mr, &.{ .rm64, .r64 }, &.{ 0x0f, 0xb1 }, 0, .long, .none }, + .{ .cmpxchg, .mr, &.{ .rm8, .r8 }, &.{ 0x0f, 0xb0 }, 0, .none, .none }, + .{ .cmpxchg, .mr, &.{ .rm8, .r8 }, &.{ 0x0f, 0xb0 }, 0, .rex, .none }, + .{ .cmpxchg, .mr, &.{ .rm16, .r16 }, &.{ 0x0f, 0xb1 }, 0, .short, .none }, + .{ .cmpxchg, .mr, &.{ .rm32, .r32 }, &.{ 0x0f, 0xb1 }, 0, .none, .none }, + .{ .cmpxchg, .mr, &.{ .rm64, .r64 }, &.{ 0x0f, 0xb1 }, 0, .long, .none }, .{ .cmpxchg8b, .m, &.{ .m64 }, &.{ 0x0f, 0xc7 }, 1, .none, .none }, .{ .cmpxchg16b, .m, &.{ .m128 }, &.{ 0x0f, 0xc7 }, 1, .long, .none }, - .{ .div, .m, &.{ .rm8 }, &.{ 0xf6 }, 6, .none, .none }, - .{ .div, .m, &.{ .rm8 }, &.{ 0xf6 }, 6, .rex, .none }, - .{ .div, .m, &.{ .rm16 }, &.{ 0xf7 }, 6, .none, .none }, - .{ .div, .m, &.{ .rm32 }, &.{ 0xf7 }, 6, .none, .none }, - .{ .div, .m, &.{ .rm64 }, &.{ 0xf7 }, 6, .long, .none }, + .{ .div, .m, &.{ .rm8 }, &.{ 0xf6 }, 6, .none, .none }, + .{ .div, .m, &.{ .rm8 }, &.{ 0xf6 }, 6, .rex, .none }, + .{ .div, .m, &.{ .rm16 }, &.{ 0xf7 }, 6, .short, .none }, + .{ .div, .m, &.{ .rm32 }, &.{ 0xf7 }, 6, .none, .none }, + .{ .div, .m, &.{ .rm64 }, &.{ 0xf7 }, 6, .long, .none }, .{ .fisttp, .m, &.{ .m16 }, &.{ 0xdf }, 1, .none, .x87 }, .{ .fisttp, .m, &.{ .m32 }, &.{ 0xdb }, 1, .none, .x87 }, @@ -280,26 +280,26 @@ pub const table = [_]Entry{ .{ .fld, .m, &.{ .m64 }, &.{ 0xdd }, 0, .none, .x87 }, .{ .fld, .m, &.{ .m80 }, &.{ 0xdb }, 5, .none, .x87 }, - .{ .idiv, .m, &.{ .rm8 }, &.{ 0xf6 }, 7, .none, .none }, - .{ .idiv, .m, &.{ .rm8 }, &.{ 0xf6 }, 7, .rex, .none }, - .{ .idiv, .m, &.{ .rm16 }, &.{ 0xf7 }, 7, .none, .none }, - .{ .idiv, .m, &.{ .rm32 }, &.{ 0xf7 }, 7, .none, .none }, - .{ .idiv, .m, &.{ .rm64 }, &.{ 0xf7 }, 7, .long, .none }, + .{ .idiv, .m, &.{ .rm8 }, &.{ 0xf6 }, 7, .none, .none }, + .{ .idiv, .m, &.{ .rm8 }, &.{ 0xf6 }, 7, .rex, .none }, + .{ .idiv, .m, &.{ .rm16 }, &.{ 0xf7 }, 7, .short, .none }, + .{ .idiv, .m, &.{ .rm32 }, &.{ 0xf7 }, 7, .none, .none }, + .{ .idiv, .m, &.{ .rm64 }, &.{ 0xf7 }, 7, .long, .none }, - .{ .imul, .m, &.{ .rm8 }, &.{ 0xf6 }, 5, .none, .none }, - .{ .imul, .m, &.{ .rm8 }, &.{ 0xf6 }, 5, .rex, .none }, - .{ .imul, .m, &.{ .rm16, }, &.{ 0xf7 }, 5, .none, .none }, - .{ .imul, .m, &.{ .rm32, }, &.{ 0xf7 }, 5, .none, .none }, - .{ .imul, .m, &.{ .rm64, }, &.{ 0xf7 }, 5, .long, .none }, - .{ .imul, .rm, &.{ .r16, .rm16, }, &.{ 0x0f, 0xaf }, 0, .none, .none }, - .{ .imul, .rm, &.{ .r32, .rm32, }, &.{ 0x0f, 0xaf }, 0, .none, .none }, - .{ .imul, .rm, &.{ .r64, .rm64, }, &.{ 0x0f, 0xaf }, 0, .long, .none }, - .{ .imul, .rmi, &.{ .r16, .rm16, .imm8s }, &.{ 0x6b }, 0, .none, .none }, - .{ .imul, .rmi, &.{ .r32, .rm32, .imm8s }, &.{ 0x6b }, 0, .none, .none }, - .{ .imul, .rmi, &.{ .r64, .rm64, .imm8s }, &.{ 0x6b }, 0, .long, .none }, - .{ .imul, .rmi, &.{ .r16, .rm16, .imm16 }, &.{ 0x69 }, 0, .none, .none }, - .{ .imul, .rmi, &.{ .r32, .rm32, .imm32 }, &.{ 0x69 }, 0, .none, .none }, - .{ .imul, .rmi, &.{ .r64, .rm64, .imm32 }, &.{ 0x69 }, 0, .long, .none }, + .{ .imul, .m, &.{ .rm8 }, &.{ 0xf6 }, 5, .none, .none }, + .{ .imul, .m, &.{ .rm8 }, &.{ 0xf6 }, 5, .rex, .none }, + .{ .imul, .m, &.{ .rm16, }, &.{ 0xf7 }, 5, .short, .none }, + .{ .imul, .m, &.{ .rm32, }, &.{ 0xf7 }, 5, .none, .none }, + .{ .imul, .m, &.{ .rm64, }, &.{ 0xf7 }, 5, .long, .none }, + .{ .imul, .rm, &.{ .r16, .rm16, }, &.{ 0x0f, 0xaf }, 0, .short, .none }, + .{ .imul, .rm, &.{ .r32, .rm32, }, &.{ 0x0f, 0xaf }, 0, .none, .none }, + .{ .imul, .rm, &.{ .r64, .rm64, }, &.{ 0x0f, 0xaf }, 0, .long, .none }, + .{ .imul, .rmi, &.{ .r16, .rm16, .imm8s }, &.{ 0x6b }, 0, .short, .none }, + .{ .imul, .rmi, &.{ .r32, .rm32, .imm8s }, &.{ 0x6b }, 0, .none, .none }, + .{ .imul, .rmi, &.{ .r64, .rm64, .imm8s }, &.{ 0x6b }, 0, .long, .none }, + .{ .imul, .rmi, &.{ .r16, .rm16, .imm16 }, &.{ 0x69 }, 0, .short, .none }, + .{ .imul, .rmi, &.{ .r32, .rm32, .imm32 }, &.{ 0x69 }, 0, .none, .none }, + .{ .imul, .rmi, &.{ .r64, .rm64, .imm32 }, &.{ 0x69 }, 0, .long, .none }, .{ .int3, .np, &.{}, &.{ 0xcc }, 0, .none, .none }, @@ -338,281 +338,283 @@ pub const table = [_]Entry{ .{ .jmp, .d, &.{ .rel32 }, &.{ 0xe9 }, 0, .none, .none }, .{ .jmp, .m, &.{ .rm64 }, &.{ 0xff }, 4, .none, .none }, - .{ .lea, .rm, &.{ .r16, .m }, &.{ 0x8d }, 0, .none, .none }, - .{ .lea, .rm, &.{ .r32, .m }, &.{ 0x8d }, 0, .none, .none }, - .{ .lea, .rm, &.{ .r64, .m }, &.{ 0x8d }, 0, .long, .none }, + .{ .lea, .rm, &.{ .r16, .m }, &.{ 0x8d }, 0, .short, .none }, + .{ .lea, .rm, &.{ .r32, .m }, &.{ 0x8d }, 0, .none, .none }, + .{ .lea, .rm, &.{ .r64, .m }, &.{ 0x8d }, 0, .long, .none }, .{ .lfence, .np, &.{}, &.{ 0x0f, 0xae, 0xe8 }, 0, .none, .none }, - .{ .lods, .np, &.{ .m8 }, &.{ 0xac }, 0, .none, .none }, - .{ .lods, .np, &.{ .m16 }, &.{ 0xad }, 0, .none, .none }, - .{ .lods, .np, &.{ .m32 }, &.{ 0xad }, 0, .none, .none }, - .{ .lods, .np, &.{ .m64 }, &.{ 0xad }, 0, .long, .none }, + .{ .lods, .np, &.{ .m8 }, &.{ 0xac }, 0, .none, .none }, + .{ .lods, .np, &.{ .m16 }, &.{ 0xad }, 0, .short, .none }, + .{ .lods, .np, &.{ .m32 }, &.{ 0xad }, 0, .none, .none }, + .{ .lods, .np, &.{ .m64 }, &.{ 0xad }, 0, .long, .none }, .{ .lodsb, .np, &.{}, &.{ 0xac }, 0, .none, .none }, .{ .lodsw, .np, &.{}, &.{ 0xad }, 0, .short, .none }, .{ .lodsd, .np, &.{}, &.{ 0xad }, 0, .none, .none }, .{ .lodsq, .np, &.{}, &.{ 0xad }, 0, .long, .none }, - .{ .lzcnt, .rm, &.{ .r16, .rm16 }, &.{ 0xf3, 0x0f, 0xbd }, 0, .none, .none }, - .{ .lzcnt, .rm, &.{ .r32, .rm32 }, &.{ 0xf3, 0x0f, 0xbd }, 0, .none, .none }, - .{ .lzcnt, .rm, &.{ .r64, .rm64 }, &.{ 0xf3, 0x0f, 0xbd }, 0, .long, .none }, + .{ .lzcnt, .rm, &.{ .r16, .rm16 }, &.{ 0xf3, 0x0f, 0xbd }, 0, .short, .none }, + .{ .lzcnt, .rm, &.{ .r32, .rm32 }, &.{ 0xf3, 0x0f, 0xbd }, 0, .none, .none }, + .{ .lzcnt, .rm, &.{ .r64, .rm64 }, &.{ 0xf3, 0x0f, 0xbd }, 0, .long, .none }, .{ .mfence, .np, &.{}, &.{ 0x0f, 0xae, 0xf0 }, 0, .none, .none }, - .{ .mov, .mr, &.{ .rm8, .r8 }, &.{ 0x88 }, 0, .none, .none }, - .{ .mov, .mr, &.{ .rm8, .r8 }, &.{ 0x88 }, 0, .rex, .none }, - .{ .mov, .mr, &.{ .rm16, .r16 }, &.{ 0x89 }, 0, .none, .none }, - .{ .mov, .mr, &.{ .rm32, .r32 }, &.{ 0x89 }, 0, .none, .none }, - .{ .mov, .mr, &.{ .rm64, .r64 }, &.{ 0x89 }, 0, .long, .none }, - .{ .mov, .rm, &.{ .r8, .rm8 }, &.{ 0x8a }, 0, .none, .none }, - .{ .mov, .rm, &.{ .r8, .rm8 }, &.{ 0x8a }, 0, .rex, .none }, - .{ .mov, .rm, &.{ .r16, .rm16 }, &.{ 0x8b }, 0, .none, .none }, - .{ .mov, .rm, &.{ .r32, .rm32 }, &.{ 0x8b }, 0, .none, .none }, - .{ .mov, .rm, &.{ .r64, .rm64 }, &.{ 0x8b }, 0, .long, .none }, - .{ .mov, .mr, &.{ .rm16, .sreg }, &.{ 0x8c }, 0, .none, .none }, - .{ .mov, .mr, &.{ .rm64, .sreg }, &.{ 0x8c }, 0, .long, .none }, - .{ .mov, .rm, &.{ .sreg, .rm16 }, &.{ 0x8e }, 0, .none, .none }, - .{ .mov, .rm, &.{ .sreg, .rm64 }, &.{ 0x8e }, 0, .long, .none }, - .{ .mov, .fd, &.{ .al, .moffs }, &.{ 0xa0 }, 0, .none, .none }, - .{ .mov, .fd, &.{ .ax, .moffs }, &.{ 0xa1 }, 0, .none, .none }, - .{ .mov, .fd, &.{ .eax, .moffs }, &.{ 0xa1 }, 0, .none, .none }, - .{ .mov, .fd, &.{ .rax, .moffs }, &.{ 0xa1 }, 0, .long, .none }, - .{ .mov, .td, &.{ .moffs, .al }, &.{ 0xa2 }, 0, .none, .none }, - .{ .mov, .td, &.{ .moffs, .ax }, &.{ 0xa3 }, 0, .none, .none }, - .{ .mov, .td, &.{ .moffs, .eax }, &.{ 0xa3 }, 0, .none, .none }, - .{ .mov, .td, &.{ .moffs, .rax }, &.{ 0xa3 }, 0, .long, .none }, - .{ .mov, .oi, &.{ .r8, .imm8 }, &.{ 0xb0 }, 0, .none, .none }, - .{ .mov, .oi, &.{ .r8, .imm8 }, &.{ 0xb0 }, 0, .rex, .none }, - .{ .mov, .oi, &.{ .r16, .imm16 }, &.{ 0xb8 }, 0, .none, .none }, - .{ .mov, .oi, &.{ .r32, .imm32 }, &.{ 0xb8 }, 0, .none, .none }, - .{ .mov, .oi, &.{ .r64, .imm64 }, &.{ 0xb8 }, 0, .long, .none }, - .{ .mov, .mi, &.{ .rm8, .imm8 }, &.{ 0xc6 }, 0, .none, .none }, - .{ .mov, .mi, &.{ .rm8, .imm8 }, &.{ 0xc6 }, 0, .rex, .none }, - .{ .mov, .mi, &.{ .rm16, .imm16 }, &.{ 0xc7 }, 0, .none, .none }, - .{ .mov, .mi, &.{ .rm32, .imm32 }, &.{ 0xc7 }, 0, .none, .none }, - .{ .mov, .mi, &.{ .rm64, .imm32s }, &.{ 0xc7 }, 0, .long, .none }, + .{ .mov, .mr, &.{ .rm8, .r8 }, &.{ 0x88 }, 0, .none, .none }, + .{ .mov, .mr, &.{ .rm8, .r8 }, &.{ 0x88 }, 0, .rex, .none }, + .{ .mov, .mr, &.{ .rm16, .r16 }, &.{ 0x89 }, 0, .short, .none }, + .{ .mov, .mr, &.{ .rm32, .r32 }, &.{ 0x89 }, 0, .none, .none }, + .{ .mov, .mr, &.{ .rm64, .r64 }, &.{ 0x89 }, 0, .long, .none }, + .{ .mov, .rm, &.{ .r8, .rm8 }, &.{ 0x8a }, 0, .none, .none }, + .{ .mov, .rm, &.{ .r8, .rm8 }, &.{ 0x8a }, 0, .rex, .none }, + .{ .mov, .rm, &.{ .r16, .rm16 }, &.{ 0x8b }, 0, .short, .none }, + .{ .mov, .rm, &.{ .r32, .rm32 }, &.{ 0x8b }, 0, .none, .none }, + .{ .mov, .rm, &.{ .r64, .rm64 }, &.{ 0x8b }, 0, .long, .none }, + .{ .mov, .mr, &.{ .rm16, .sreg }, &.{ 0x8c }, 0, .short, .none }, + .{ .mov, .mr, &.{ .r32_m16, .sreg }, &.{ 0x8c }, 0, .none, .none }, + .{ .mov, .mr, &.{ .r64_m16, .sreg }, &.{ 0x8c }, 0, .long, .none }, + .{ .mov, .rm, &.{ .sreg, .rm16 }, &.{ 0x8e }, 0, .short, .none }, + .{ .mov, .rm, &.{ .sreg, .r32_m16 }, &.{ 0x8e }, 0, .none, .none }, + .{ .mov, .rm, &.{ .sreg, .r64_m16 }, &.{ 0x8e }, 0, .long, .none }, + .{ .mov, .fd, &.{ .al, .moffs }, &.{ 0xa0 }, 0, .none, .none }, + .{ .mov, .fd, &.{ .ax, .moffs }, &.{ 0xa1 }, 0, .none, .none }, + .{ .mov, .fd, &.{ .eax, .moffs }, &.{ 0xa1 }, 0, .none, .none }, + .{ .mov, .fd, &.{ .rax, .moffs }, &.{ 0xa1 }, 0, .long, .none }, + .{ .mov, .td, &.{ .moffs, .al }, &.{ 0xa2 }, 0, .none, .none }, + .{ .mov, .td, &.{ .moffs, .ax }, &.{ 0xa3 }, 0, .none, .none }, + .{ .mov, .td, &.{ .moffs, .eax }, &.{ 0xa3 }, 0, .none, .none }, + .{ .mov, .td, &.{ .moffs, .rax }, &.{ 0xa3 }, 0, .long, .none }, + .{ .mov, .oi, &.{ .r8, .imm8 }, &.{ 0xb0 }, 0, .none, .none }, + .{ .mov, .oi, &.{ .r8, .imm8 }, &.{ 0xb0 }, 0, .rex, .none }, + .{ .mov, .oi, &.{ .r16, .imm16 }, &.{ 0xb8 }, 0, .short, .none }, + .{ .mov, .oi, &.{ .r32, .imm32 }, &.{ 0xb8 }, 0, .none, .none }, + .{ .mov, .oi, &.{ .r64, .imm64 }, &.{ 0xb8 }, 0, .long, .none }, + .{ .mov, .mi, &.{ .rm8, .imm8 }, &.{ 0xc6 }, 0, .none, .none }, + .{ .mov, .mi, &.{ .rm8, .imm8 }, &.{ 0xc6 }, 0, .rex, .none }, + .{ .mov, .mi, &.{ .rm16, .imm16 }, &.{ 0xc7 }, 0, .short, .none }, + .{ .mov, .mi, &.{ .rm32, .imm32 }, &.{ 0xc7 }, 0, .none, .none }, + .{ .mov, .mi, &.{ .rm64, .imm32s }, &.{ 0xc7 }, 0, .long, .none }, - .{ .movbe, .rm, &.{ .r16, .m16 }, &.{ 0x0f, 0x38, 0xf0 }, 0, .none, .none }, - .{ .movbe, .rm, &.{ .r32, .m32 }, &.{ 0x0f, 0x38, 0xf0 }, 0, .none, .none }, - .{ .movbe, .rm, &.{ .r64, .m64 }, &.{ 0x0f, 0x38, 0xf0 }, 0, .long, .none }, - .{ .movbe, .mr, &.{ .m16, .r16 }, &.{ 0x0f, 0x38, 0xf1 }, 0, .none, .none }, - .{ .movbe, .mr, &.{ .m32, .r32 }, &.{ 0x0f, 0x38, 0xf1 }, 0, .none, .none }, - .{ .movbe, .mr, &.{ .m64, .r64 }, &.{ 0x0f, 0x38, 0xf1 }, 0, .long, .none }, + .{ .movbe, .rm, &.{ .r16, .m16 }, &.{ 0x0f, 0x38, 0xf0 }, 0, .short, .none }, + .{ .movbe, .rm, &.{ .r32, .m32 }, &.{ 0x0f, 0x38, 0xf0 }, 0, .none, .none }, + .{ .movbe, .rm, &.{ .r64, .m64 }, &.{ 0x0f, 0x38, 0xf0 }, 0, .long, .none }, + .{ .movbe, .mr, &.{ .m16, .r16 }, &.{ 0x0f, 0x38, 0xf1 }, 0, .short, .none }, + .{ .movbe, .mr, &.{ .m32, .r32 }, &.{ 0x0f, 0x38, 0xf1 }, 0, .none, .none }, + .{ .movbe, .mr, &.{ .m64, .r64 }, &.{ 0x0f, 0x38, 0xf1 }, 0, .long, .none }, - .{ .movs, .np, &.{ .m8, .m8 }, &.{ 0xa4 }, 0, .none, .none }, - .{ .movs, .np, &.{ .m16, .m16 }, &.{ 0xa5 }, 0, .none, .none }, - .{ .movs, .np, &.{ .m32, .m32 }, &.{ 0xa5 }, 0, .none, .none }, - .{ .movs, .np, &.{ .m64, .m64 }, &.{ 0xa5 }, 0, .long, .none }, + .{ .movs, .np, &.{ .m8, .m8 }, &.{ 0xa4 }, 0, .none, .none }, + .{ .movs, .np, &.{ .m16, .m16 }, &.{ 0xa5 }, 0, .short, .none }, + .{ .movs, .np, &.{ .m32, .m32 }, &.{ 0xa5 }, 0, .none, .none }, + .{ .movs, .np, &.{ .m64, .m64 }, &.{ 0xa5 }, 0, .long, .none }, .{ .movsb, .np, &.{}, &.{ 0xa4 }, 0, .none, .none }, .{ .movsw, .np, &.{}, &.{ 0xa5 }, 0, .short, .none }, .{ .movsd, .np, &.{}, &.{ 0xa5 }, 0, .none, .none }, .{ .movsq, .np, &.{}, &.{ 0xa5 }, 0, .long, .none }, - .{ .movsx, .rm, &.{ .r16, .rm8 }, &.{ 0x0f, 0xbe }, 0, .none, .none }, - .{ .movsx, .rm, &.{ .r16, .rm8 }, &.{ 0x0f, 0xbe }, 0, .rex, .none }, - .{ .movsx, .rm, &.{ .r32, .rm8 }, &.{ 0x0f, 0xbe }, 0, .none, .none }, - .{ .movsx, .rm, &.{ .r32, .rm8 }, &.{ 0x0f, 0xbe }, 0, .rex, .none }, - .{ .movsx, .rm, &.{ .r64, .rm8 }, &.{ 0x0f, 0xbe }, 0, .long, .none }, - .{ .movsx, .rm, &.{ .r32, .rm16 }, &.{ 0x0f, 0xbf }, 0, .none, .none }, - .{ .movsx, .rm, &.{ .r64, .rm16 }, &.{ 0x0f, 0xbf }, 0, .long, .none }, + .{ .movsx, .rm, &.{ .r16, .rm8 }, &.{ 0x0f, 0xbe }, 0, .short, .none }, + .{ .movsx, .rm, &.{ .r16, .rm8 }, &.{ 0x0f, 0xbe }, 0, .rex_short, .none }, + .{ .movsx, .rm, &.{ .r32, .rm8 }, &.{ 0x0f, 0xbe }, 0, .none, .none }, + .{ .movsx, .rm, &.{ .r32, .rm8 }, &.{ 0x0f, 0xbe }, 0, .rex, .none }, + .{ .movsx, .rm, &.{ .r64, .rm8 }, &.{ 0x0f, 0xbe }, 0, .long, .none }, + .{ .movsx, .rm, &.{ .r32, .rm16 }, &.{ 0x0f, 0xbf }, 0, .none, .none }, + .{ .movsx, .rm, &.{ .r64, .rm16 }, &.{ 0x0f, 0xbf }, 0, .long, .none }, // This instruction is discouraged. .{ .movsxd, .rm, &.{ .r32, .rm32 }, &.{ 0x63 }, 0, .none, .none }, .{ .movsxd, .rm, &.{ .r64, .rm32 }, &.{ 0x63 }, 0, .long, .none }, - .{ .movzx, .rm, &.{ .r16, .rm8 }, &.{ 0x0f, 0xb6 }, 0, .none, .none }, - .{ .movzx, .rm, &.{ .r32, .rm8 }, &.{ 0x0f, 0xb6 }, 0, .none, .none }, - .{ .movzx, .rm, &.{ .r64, .rm8 }, &.{ 0x0f, 0xb6 }, 0, .long, .none }, - .{ .movzx, .rm, &.{ .r32, .rm16 }, &.{ 0x0f, 0xb7 }, 0, .none, .none }, - .{ .movzx, .rm, &.{ .r64, .rm16 }, &.{ 0x0f, 0xb7 }, 0, .long, .none }, + .{ .movzx, .rm, &.{ .r16, .rm8 }, &.{ 0x0f, 0xb6 }, 0, .short, .none }, + .{ .movzx, .rm, &.{ .r32, .rm8 }, &.{ 0x0f, 0xb6 }, 0, .none, .none }, + .{ .movzx, .rm, &.{ .r64, .rm8 }, &.{ 0x0f, 0xb6 }, 0, .long, .none }, + .{ .movzx, .rm, &.{ .r32, .rm16 }, &.{ 0x0f, 0xb7 }, 0, .none, .none }, + .{ .movzx, .rm, &.{ .r64, .rm16 }, &.{ 0x0f, 0xb7 }, 0, .long, .none }, - .{ .mul, .m, &.{ .rm8 }, &.{ 0xf6 }, 4, .none, .none }, - .{ .mul, .m, &.{ .rm8 }, &.{ 0xf6 }, 4, .rex, .none }, - .{ .mul, .m, &.{ .rm16 }, &.{ 0xf7 }, 4, .none, .none }, - .{ .mul, .m, &.{ .rm32 }, &.{ 0xf7 }, 4, .none, .none }, - .{ .mul, .m, &.{ .rm64 }, &.{ 0xf7 }, 4, .long, .none }, + .{ .mul, .m, &.{ .rm8 }, &.{ 0xf6 }, 4, .none, .none }, + .{ .mul, .m, &.{ .rm8 }, &.{ 0xf6 }, 4, .rex, .none }, + .{ .mul, .m, &.{ .rm16 }, &.{ 0xf7 }, 4, .short, .none }, + .{ .mul, .m, &.{ .rm32 }, &.{ 0xf7 }, 4, .none, .none }, + .{ .mul, .m, &.{ .rm64 }, &.{ 0xf7 }, 4, .long, .none }, - .{ .neg, .m, &.{ .rm8 }, &.{ 0xf6 }, 3, .none, .none }, - .{ .neg, .m, &.{ .rm8 }, &.{ 0xf6 }, 3, .rex, .none }, - .{ .neg, .m, &.{ .rm16 }, &.{ 0xf7 }, 3, .none, .none }, - .{ .neg, .m, &.{ .rm32 }, &.{ 0xf7 }, 3, .none, .none }, - .{ .neg, .m, &.{ .rm64 }, &.{ 0xf7 }, 3, .long, .none }, + .{ .neg, .m, &.{ .rm8 }, &.{ 0xf6 }, 3, .none, .none }, + .{ .neg, .m, &.{ .rm8 }, &.{ 0xf6 }, 3, .rex, .none }, + .{ .neg, .m, &.{ .rm16 }, &.{ 0xf7 }, 3, .short, .none }, + .{ .neg, .m, &.{ .rm32 }, &.{ 0xf7 }, 3, .none, .none }, + .{ .neg, .m, &.{ .rm64 }, &.{ 0xf7 }, 3, .long, .none }, .{ .nop, .np, &.{}, &.{ 0x90 }, 0, .none, .none }, - .{ .not, .m, &.{ .rm8 }, &.{ 0xf6 }, 2, .none, .none }, - .{ .not, .m, &.{ .rm8 }, &.{ 0xf6 }, 2, .rex, .none }, - .{ .not, .m, &.{ .rm16 }, &.{ 0xf7 }, 2, .none, .none }, - .{ .not, .m, &.{ .rm32 }, &.{ 0xf7 }, 2, .none, .none }, - .{ .not, .m, &.{ .rm64 }, &.{ 0xf7 }, 2, .long, .none }, + .{ .not, .m, &.{ .rm8 }, &.{ 0xf6 }, 2, .none, .none }, + .{ .not, .m, &.{ .rm8 }, &.{ 0xf6 }, 2, .rex, .none }, + .{ .not, .m, &.{ .rm16 }, &.{ 0xf7 }, 2, .short, .none }, + .{ .not, .m, &.{ .rm32 }, &.{ 0xf7 }, 2, .none, .none }, + .{ .not, .m, &.{ .rm64 }, &.{ 0xf7 }, 2, .long, .none }, - .{ .@"or", .zi, &.{ .al, .imm8 }, &.{ 0x0c }, 0, .none, .none }, - .{ .@"or", .zi, &.{ .ax, .imm16 }, &.{ 0x0d }, 0, .none, .none }, - .{ .@"or", .zi, &.{ .eax, .imm32 }, &.{ 0x0d }, 0, .none, .none }, - .{ .@"or", .zi, &.{ .rax, .imm32s }, &.{ 0x0d }, 0, .long, .none }, - .{ .@"or", .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 1, .none, .none }, - .{ .@"or", .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 1, .rex, .none }, - .{ .@"or", .mi, &.{ .rm16, .imm16 }, &.{ 0x81 }, 1, .none, .none }, - .{ .@"or", .mi, &.{ .rm32, .imm32 }, &.{ 0x81 }, 1, .none, .none }, - .{ .@"or", .mi, &.{ .rm64, .imm32s }, &.{ 0x81 }, 1, .long, .none }, - .{ .@"or", .mi, &.{ .rm16, .imm8s }, &.{ 0x83 }, 1, .none, .none }, - .{ .@"or", .mi, &.{ .rm32, .imm8s }, &.{ 0x83 }, 1, .none, .none }, - .{ .@"or", .mi, &.{ .rm64, .imm8s }, &.{ 0x83 }, 1, .long, .none }, - .{ .@"or", .mr, &.{ .rm8, .r8 }, &.{ 0x08 }, 0, .none, .none }, - .{ .@"or", .mr, &.{ .rm8, .r8 }, &.{ 0x08 }, 0, .rex, .none }, - .{ .@"or", .mr, &.{ .rm16, .r16 }, &.{ 0x09 }, 0, .none, .none }, - .{ .@"or", .mr, &.{ .rm32, .r32 }, &.{ 0x09 }, 0, .none, .none }, - .{ .@"or", .mr, &.{ .rm64, .r64 }, &.{ 0x09 }, 0, .long, .none }, - .{ .@"or", .rm, &.{ .r8, .rm8 }, &.{ 0x0a }, 0, .none, .none }, - .{ .@"or", .rm, &.{ .r8, .rm8 }, &.{ 0x0a }, 0, .rex, .none }, - .{ .@"or", .rm, &.{ .r16, .rm16 }, &.{ 0x0b }, 0, .none, .none }, - .{ .@"or", .rm, &.{ .r32, .rm32 }, &.{ 0x0b }, 0, .none, .none }, - .{ .@"or", .rm, &.{ .r64, .rm64 }, &.{ 0x0b }, 0, .long, .none }, + .{ .@"or", .zi, &.{ .al, .imm8 }, &.{ 0x0c }, 0, .none, .none }, + .{ .@"or", .zi, &.{ .ax, .imm16 }, &.{ 0x0d }, 0, .short, .none }, + .{ .@"or", .zi, &.{ .eax, .imm32 }, &.{ 0x0d }, 0, .none, .none }, + .{ .@"or", .zi, &.{ .rax, .imm32s }, &.{ 0x0d }, 0, .long, .none }, + .{ .@"or", .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 1, .none, .none }, + .{ .@"or", .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 1, .rex, .none }, + .{ .@"or", .mi, &.{ .rm16, .imm16 }, &.{ 0x81 }, 1, .short, .none }, + .{ .@"or", .mi, &.{ .rm32, .imm32 }, &.{ 0x81 }, 1, .none, .none }, + .{ .@"or", .mi, &.{ .rm64, .imm32s }, &.{ 0x81 }, 1, .long, .none }, + .{ .@"or", .mi, &.{ .rm16, .imm8s }, &.{ 0x83 }, 1, .short, .none }, + .{ .@"or", .mi, &.{ .rm32, .imm8s }, &.{ 0x83 }, 1, .none, .none }, + .{ .@"or", .mi, &.{ .rm64, .imm8s }, &.{ 0x83 }, 1, .long, .none }, + .{ .@"or", .mr, &.{ .rm8, .r8 }, &.{ 0x08 }, 0, .none, .none }, + .{ .@"or", .mr, &.{ .rm8, .r8 }, &.{ 0x08 }, 0, .rex, .none }, + .{ .@"or", .mr, &.{ .rm16, .r16 }, &.{ 0x09 }, 0, .short, .none }, + .{ .@"or", .mr, &.{ .rm32, .r32 }, &.{ 0x09 }, 0, .none, .none }, + .{ .@"or", .mr, &.{ .rm64, .r64 }, &.{ 0x09 }, 0, .long, .none }, + .{ .@"or", .rm, &.{ .r8, .rm8 }, &.{ 0x0a }, 0, .none, .none }, + .{ .@"or", .rm, &.{ .r8, .rm8 }, &.{ 0x0a }, 0, .rex, .none }, + .{ .@"or", .rm, &.{ .r16, .rm16 }, &.{ 0x0b }, 0, .short, .none }, + .{ .@"or", .rm, &.{ .r32, .rm32 }, &.{ 0x0b }, 0, .none, .none }, + .{ .@"or", .rm, &.{ .r64, .rm64 }, &.{ 0x0b }, 0, .long, .none }, - .{ .pop, .o, &.{ .r16 }, &.{ 0x58 }, 0, .none, .none }, - .{ .pop, .o, &.{ .r64 }, &.{ 0x58 }, 0, .none, .none }, - .{ .pop, .m, &.{ .rm16 }, &.{ 0x8f }, 0, .none, .none }, - .{ .pop, .m, &.{ .rm64 }, &.{ 0x8f }, 0, .none, .none }, + .{ .pop, .o, &.{ .r16 }, &.{ 0x58 }, 0, .short, .none }, + .{ .pop, .o, &.{ .r64 }, &.{ 0x58 }, 0, .none, .none }, + .{ .pop, .m, &.{ .rm16 }, &.{ 0x8f }, 0, .short, .none }, + .{ .pop, .m, &.{ .rm64 }, &.{ 0x8f }, 0, .none, .none }, - .{ .popcnt, .rm, &.{ .r16, .rm16 }, &.{ 0xf3, 0x0f, 0xb8 }, 0, .none, .none }, - .{ .popcnt, .rm, &.{ .r32, .rm32 }, &.{ 0xf3, 0x0f, 0xb8 }, 0, .none, .none }, - .{ .popcnt, .rm, &.{ .r64, .rm64 }, &.{ 0xf3, 0x0f, 0xb8 }, 0, .long, .none }, + .{ .popcnt, .rm, &.{ .r16, .rm16 }, &.{ 0xf3, 0x0f, 0xb8 }, 0, .short, .none }, + .{ .popcnt, .rm, &.{ .r32, .rm32 }, &.{ 0xf3, 0x0f, 0xb8 }, 0, .none, .none }, + .{ .popcnt, .rm, &.{ .r64, .rm64 }, &.{ 0xf3, 0x0f, 0xb8 }, 0, .long, .none }, - .{ .push, .o, &.{ .r16 }, &.{ 0x50 }, 0, .none, .none }, - .{ .push, .o, &.{ .r64 }, &.{ 0x50 }, 0, .none, .none }, - .{ .push, .m, &.{ .rm16 }, &.{ 0xff }, 6, .none, .none }, - .{ .push, .m, &.{ .rm64 }, &.{ 0xff }, 6, .none, .none }, - .{ .push, .i, &.{ .imm8 }, &.{ 0x6a }, 0, .none, .none }, - .{ .push, .i, &.{ .imm16 }, &.{ 0x68 }, 0, .none, .none }, - .{ .push, .i, &.{ .imm32 }, &.{ 0x68 }, 0, .none, .none }, + .{ .push, .o, &.{ .r16 }, &.{ 0x50 }, 0, .short, .none }, + .{ .push, .o, &.{ .r64 }, &.{ 0x50 }, 0, .none, .none }, + .{ .push, .m, &.{ .rm16 }, &.{ 0xff }, 6, .short, .none }, + .{ .push, .m, &.{ .rm64 }, &.{ 0xff }, 6, .none, .none }, + .{ .push, .i, &.{ .imm8 }, &.{ 0x6a }, 0, .none, .none }, + .{ .push, .i, &.{ .imm16 }, &.{ 0x68 }, 0, .short, .none }, + .{ .push, .i, &.{ .imm32 }, &.{ 0x68 }, 0, .none, .none }, .{ .ret, .np, &.{}, &.{ 0xc3 }, 0, .none, .none }, - .{ .rcl, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 2, .none, .none }, - .{ .rcl, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 2, .rex, .none }, - .{ .rcl, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 2, .none, .none }, - .{ .rcl, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 2, .rex, .none }, - .{ .rcl, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 2, .none, .none }, - .{ .rcl, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 2, .rex, .none }, - .{ .rcl, .m1, &.{ .rm16, .unity }, &.{ 0xd1 }, 2, .none, .none }, - .{ .rcl, .mc, &.{ .rm16, .cl }, &.{ 0xd3 }, 2, .none, .none }, - .{ .rcl, .mi, &.{ .rm16, .imm8 }, &.{ 0xc1 }, 2, .none, .none }, - .{ .rcl, .m1, &.{ .rm32, .unity }, &.{ 0xd1 }, 2, .none, .none }, - .{ .rcl, .m1, &.{ .rm64, .unity }, &.{ 0xd1 }, 2, .long, .none }, - .{ .rcl, .mc, &.{ .rm32, .cl }, &.{ 0xd3 }, 2, .none, .none }, - .{ .rcl, .mc, &.{ .rm64, .cl }, &.{ 0xd3 }, 2, .long, .none }, - .{ .rcl, .mi, &.{ .rm32, .imm8 }, &.{ 0xc1 }, 2, .none, .none }, - .{ .rcl, .mi, &.{ .rm64, .imm8 }, &.{ 0xc1 }, 2, .long, .none }, + .{ .rcl, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 2, .none, .none }, + .{ .rcl, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 2, .rex, .none }, + .{ .rcl, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 2, .none, .none }, + .{ .rcl, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 2, .rex, .none }, + .{ .rcl, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 2, .none, .none }, + .{ .rcl, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 2, .rex, .none }, + .{ .rcl, .m1, &.{ .rm16, .unity }, &.{ 0xd1 }, 2, .short, .none }, + .{ .rcl, .mc, &.{ .rm16, .cl }, &.{ 0xd3 }, 2, .short, .none }, + .{ .rcl, .mi, &.{ .rm16, .imm8 }, &.{ 0xc1 }, 2, .short, .none }, + .{ .rcl, .m1, &.{ .rm32, .unity }, &.{ 0xd1 }, 2, .none, .none }, + .{ .rcl, .m1, &.{ .rm64, .unity }, &.{ 0xd1 }, 2, .long, .none }, + .{ .rcl, .mc, &.{ .rm32, .cl }, &.{ 0xd3 }, 2, .none, .none }, + .{ .rcl, .mc, &.{ .rm64, .cl }, &.{ 0xd3 }, 2, .long, .none }, + .{ .rcl, .mi, &.{ .rm32, .imm8 }, &.{ 0xc1 }, 2, .none, .none }, + .{ .rcl, .mi, &.{ .rm64, .imm8 }, &.{ 0xc1 }, 2, .long, .none }, - .{ .rcr, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 3, .none, .none }, - .{ .rcr, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 3, .rex, .none }, - .{ .rcr, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 3, .none, .none }, - .{ .rcr, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 3, .rex, .none }, - .{ .rcr, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 3, .none, .none }, - .{ .rcr, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 3, .rex, .none }, - .{ .rcr, .m1, &.{ .rm16, .unity }, &.{ 0xd1 }, 3, .none, .none }, - .{ .rcr, .mc, &.{ .rm16, .cl }, &.{ 0xd3 }, 3, .none, .none }, - .{ .rcr, .mi, &.{ .rm16, .imm8 }, &.{ 0xc1 }, 3, .none, .none }, - .{ .rcr, .m1, &.{ .rm32, .unity }, &.{ 0xd1 }, 3, .none, .none }, - .{ .rcr, .m1, &.{ .rm64, .unity }, &.{ 0xd1 }, 3, .long, .none }, - .{ .rcr, .mc, &.{ .rm32, .cl }, &.{ 0xd3 }, 3, .none, .none }, - .{ .rcr, .mc, &.{ .rm64, .cl }, &.{ 0xd3 }, 3, .long, .none }, - .{ .rcr, .mi, &.{ .rm32, .imm8 }, &.{ 0xc1 }, 3, .none, .none }, - .{ .rcr, .mi, &.{ .rm64, .imm8 }, &.{ 0xc1 }, 3, .long, .none }, + .{ .rcr, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 3, .none, .none }, + .{ .rcr, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 3, .rex, .none }, + .{ .rcr, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 3, .none, .none }, + .{ .rcr, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 3, .rex, .none }, + .{ .rcr, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 3, .none, .none }, + .{ .rcr, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 3, .rex, .none }, + .{ .rcr, .m1, &.{ .rm16, .unity }, &.{ 0xd1 }, 3, .short, .none }, + .{ .rcr, .mc, &.{ .rm16, .cl }, &.{ 0xd3 }, 3, .short, .none }, + .{ .rcr, .mi, &.{ .rm16, .imm8 }, &.{ 0xc1 }, 3, .short, .none }, + .{ .rcr, .m1, &.{ .rm32, .unity }, &.{ 0xd1 }, 3, .none, .none }, + .{ .rcr, .m1, &.{ .rm64, .unity }, &.{ 0xd1 }, 3, .long, .none }, + .{ .rcr, .mc, &.{ .rm32, .cl }, &.{ 0xd3 }, 3, .none, .none }, + .{ .rcr, .mc, &.{ .rm64, .cl }, &.{ 0xd3 }, 3, .long, .none }, + .{ .rcr, .mi, &.{ .rm32, .imm8 }, &.{ 0xc1 }, 3, .none, .none }, + .{ .rcr, .mi, &.{ .rm64, .imm8 }, &.{ 0xc1 }, 3, .long, .none }, - .{ .rol, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 0, .none, .none }, - .{ .rol, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 0, .rex, .none }, - .{ .rol, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 0, .none, .none }, - .{ .rol, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 0, .rex, .none }, - .{ .rol, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 0, .none, .none }, - .{ .rol, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 0, .rex, .none }, - .{ .rol, .m1, &.{ .rm16, .unity }, &.{ 0xd1 }, 0, .none, .none }, - .{ .rol, .mc, &.{ .rm16, .cl }, &.{ 0xd3 }, 0, .none, .none }, - .{ .rol, .mi, &.{ .rm16, .imm8 }, &.{ 0xc1 }, 0, .none, .none }, - .{ .rol, .m1, &.{ .rm32, .unity }, &.{ 0xd1 }, 0, .none, .none }, - .{ .rol, .m1, &.{ .rm64, .unity }, &.{ 0xd1 }, 0, .long, .none }, - .{ .rol, .mc, &.{ .rm32, .cl }, &.{ 0xd3 }, 0, .none, .none }, - .{ .rol, .mc, &.{ .rm64, .cl }, &.{ 0xd3 }, 0, .long, .none }, - .{ .rol, .mi, &.{ .rm32, .imm8 }, &.{ 0xc1 }, 0, .none, .none }, - .{ .rol, .mi, &.{ .rm64, .imm8 }, &.{ 0xc1 }, 0, .long, .none }, + .{ .rol, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 0, .none, .none }, + .{ .rol, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 0, .rex, .none }, + .{ .rol, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 0, .none, .none }, + .{ .rol, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 0, .rex, .none }, + .{ .rol, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 0, .none, .none }, + .{ .rol, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 0, .rex, .none }, + .{ .rol, .m1, &.{ .rm16, .unity }, &.{ 0xd1 }, 0, .short, .none }, + .{ .rol, .mc, &.{ .rm16, .cl }, &.{ 0xd3 }, 0, .short, .none }, + .{ .rol, .mi, &.{ .rm16, .imm8 }, &.{ 0xc1 }, 0, .short, .none }, + .{ .rol, .m1, &.{ .rm32, .unity }, &.{ 0xd1 }, 0, .none, .none }, + .{ .rol, .m1, &.{ .rm64, .unity }, &.{ 0xd1 }, 0, .long, .none }, + .{ .rol, .mc, &.{ .rm32, .cl }, &.{ 0xd3 }, 0, .none, .none }, + .{ .rol, .mc, &.{ .rm64, .cl }, &.{ 0xd3 }, 0, .long, .none }, + .{ .rol, .mi, &.{ .rm32, .imm8 }, &.{ 0xc1 }, 0, .none, .none }, + .{ .rol, .mi, &.{ .rm64, .imm8 }, &.{ 0xc1 }, 0, .long, .none }, - .{ .ror, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 1, .none, .none }, - .{ .ror, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 1, .rex, .none }, - .{ .ror, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 1, .none, .none }, - .{ .ror, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 1, .rex, .none }, - .{ .ror, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 1, .none, .none }, - .{ .ror, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 1, .rex, .none }, - .{ .ror, .m1, &.{ .rm16, .unity }, &.{ 0xd1 }, 1, .none, .none }, - .{ .ror, .mc, &.{ .rm16, .cl }, &.{ 0xd3 }, 1, .none, .none }, - .{ .ror, .mi, &.{ .rm16, .imm8 }, &.{ 0xc1 }, 1, .none, .none }, - .{ .ror, .m1, &.{ .rm32, .unity }, &.{ 0xd1 }, 1, .none, .none }, - .{ .ror, .m1, &.{ .rm64, .unity }, &.{ 0xd1 }, 1, .long, .none }, - .{ .ror, .mc, &.{ .rm32, .cl }, &.{ 0xd3 }, 1, .none, .none }, - .{ .ror, .mc, &.{ .rm64, .cl }, &.{ 0xd3 }, 1, .long, .none }, - .{ .ror, .mi, &.{ .rm32, .imm8 }, &.{ 0xc1 }, 1, .none, .none }, - .{ .ror, .mi, &.{ .rm64, .imm8 }, &.{ 0xc1 }, 1, .long, .none }, + .{ .ror, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 1, .none, .none }, + .{ .ror, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 1, .rex, .none }, + .{ .ror, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 1, .none, .none }, + .{ .ror, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 1, .rex, .none }, + .{ .ror, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 1, .none, .none }, + .{ .ror, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 1, .rex, .none }, + .{ .ror, .m1, &.{ .rm16, .unity }, &.{ 0xd1 }, 1, .short, .none }, + .{ .ror, .mc, &.{ .rm16, .cl }, &.{ 0xd3 }, 1, .short, .none }, + .{ .ror, .mi, &.{ .rm16, .imm8 }, &.{ 0xc1 }, 1, .short, .none }, + .{ .ror, .m1, &.{ .rm32, .unity }, &.{ 0xd1 }, 1, .none, .none }, + .{ .ror, .m1, &.{ .rm64, .unity }, &.{ 0xd1 }, 1, .long, .none }, + .{ .ror, .mc, &.{ .rm32, .cl }, &.{ 0xd3 }, 1, .none, .none }, + .{ .ror, .mc, &.{ .rm64, .cl }, &.{ 0xd3 }, 1, .long, .none }, + .{ .ror, .mi, &.{ .rm32, .imm8 }, &.{ 0xc1 }, 1, .none, .none }, + .{ .ror, .mi, &.{ .rm64, .imm8 }, &.{ 0xc1 }, 1, .long, .none }, - .{ .sal, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 4, .none, .none }, - .{ .sal, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 4, .rex, .none }, - .{ .sal, .m1, &.{ .rm16, .unity }, &.{ 0xd1 }, 4, .none, .none }, - .{ .sal, .m1, &.{ .rm32, .unity }, &.{ 0xd1 }, 4, .none, .none }, - .{ .sal, .m1, &.{ .rm64, .unity }, &.{ 0xd1 }, 4, .long, .none }, - .{ .sal, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 4, .none, .none }, - .{ .sal, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 4, .rex, .none }, - .{ .sal, .mc, &.{ .rm16, .cl }, &.{ 0xd3 }, 4, .none, .none }, - .{ .sal, .mc, &.{ .rm32, .cl }, &.{ 0xd3 }, 4, .none, .none }, - .{ .sal, .mc, &.{ .rm64, .cl }, &.{ 0xd3 }, 4, .long, .none }, - .{ .sal, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 4, .none, .none }, - .{ .sal, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 4, .rex, .none }, - .{ .sal, .mi, &.{ .rm16, .imm8 }, &.{ 0xc1 }, 4, .none, .none }, - .{ .sal, .mi, &.{ .rm32, .imm8 }, &.{ 0xc1 }, 4, .none, .none }, - .{ .sal, .mi, &.{ .rm64, .imm8 }, &.{ 0xc1 }, 4, .long, .none }, + .{ .sal, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 4, .none, .none }, + .{ .sal, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 4, .rex, .none }, + .{ .sal, .m1, &.{ .rm16, .unity }, &.{ 0xd1 }, 4, .short, .none }, + .{ .sal, .m1, &.{ .rm32, .unity }, &.{ 0xd1 }, 4, .none, .none }, + .{ .sal, .m1, &.{ .rm64, .unity }, &.{ 0xd1 }, 4, .long, .none }, + .{ .sal, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 4, .none, .none }, + .{ .sal, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 4, .rex, .none }, + .{ .sal, .mc, &.{ .rm16, .cl }, &.{ 0xd3 }, 4, .short, .none }, + .{ .sal, .mc, &.{ .rm32, .cl }, &.{ 0xd3 }, 4, .none, .none }, + .{ .sal, .mc, &.{ .rm64, .cl }, &.{ 0xd3 }, 4, .long, .none }, + .{ .sal, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 4, .none, .none }, + .{ .sal, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 4, .rex, .none }, + .{ .sal, .mi, &.{ .rm16, .imm8 }, &.{ 0xc1 }, 4, .short, .none }, + .{ .sal, .mi, &.{ .rm32, .imm8 }, &.{ 0xc1 }, 4, .none, .none }, + .{ .sal, .mi, &.{ .rm64, .imm8 }, &.{ 0xc1 }, 4, .long, .none }, - .{ .sar, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 7, .none, .none }, - .{ .sar, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 7, .rex, .none }, - .{ .sar, .m1, &.{ .rm16, .unity }, &.{ 0xd1 }, 7, .none, .none }, - .{ .sar, .m1, &.{ .rm32, .unity }, &.{ 0xd1 }, 7, .none, .none }, - .{ .sar, .m1, &.{ .rm64, .unity }, &.{ 0xd1 }, 7, .long, .none }, - .{ .sar, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 7, .none, .none }, - .{ .sar, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 7, .rex, .none }, - .{ .sar, .mc, &.{ .rm16, .cl }, &.{ 0xd3 }, 7, .none, .none }, - .{ .sar, .mc, &.{ .rm32, .cl }, &.{ 0xd3 }, 7, .none, .none }, - .{ .sar, .mc, &.{ .rm64, .cl }, &.{ 0xd3 }, 7, .long, .none }, - .{ .sar, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 7, .none, .none }, - .{ .sar, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 7, .rex, .none }, - .{ .sar, .mi, &.{ .rm16, .imm8 }, &.{ 0xc1 }, 7, .none, .none }, - .{ .sar, .mi, &.{ .rm32, .imm8 }, &.{ 0xc1 }, 7, .none, .none }, - .{ .sar, .mi, &.{ .rm64, .imm8 }, &.{ 0xc1 }, 7, .long, .none }, + .{ .sar, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 7, .none, .none }, + .{ .sar, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 7, .rex, .none }, + .{ .sar, .m1, &.{ .rm16, .unity }, &.{ 0xd1 }, 7, .short, .none }, + .{ .sar, .m1, &.{ .rm32, .unity }, &.{ 0xd1 }, 7, .none, .none }, + .{ .sar, .m1, &.{ .rm64, .unity }, &.{ 0xd1 }, 7, .long, .none }, + .{ .sar, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 7, .none, .none }, + .{ .sar, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 7, .rex, .none }, + .{ .sar, .mc, &.{ .rm16, .cl }, &.{ 0xd3 }, 7, .short, .none }, + .{ .sar, .mc, &.{ .rm32, .cl }, &.{ 0xd3 }, 7, .none, .none }, + .{ .sar, .mc, &.{ .rm64, .cl }, &.{ 0xd3 }, 7, .long, .none }, + .{ .sar, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 7, .none, .none }, + .{ .sar, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 7, .rex, .none }, + .{ .sar, .mi, &.{ .rm16, .imm8 }, &.{ 0xc1 }, 7, .short, .none }, + .{ .sar, .mi, &.{ .rm32, .imm8 }, &.{ 0xc1 }, 7, .none, .none }, + .{ .sar, .mi, &.{ .rm64, .imm8 }, &.{ 0xc1 }, 7, .long, .none }, - .{ .sbb, .zi, &.{ .al, .imm8 }, &.{ 0x1c }, 0, .none, .none }, - .{ .sbb, .zi, &.{ .ax, .imm16 }, &.{ 0x1d }, 0, .none, .none }, - .{ .sbb, .zi, &.{ .eax, .imm32 }, &.{ 0x1d }, 0, .none, .none }, - .{ .sbb, .zi, &.{ .rax, .imm32s }, &.{ 0x1d }, 0, .long, .none }, - .{ .sbb, .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 3, .none, .none }, - .{ .sbb, .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 3, .rex, .none }, - .{ .sbb, .mi, &.{ .rm16, .imm16 }, &.{ 0x81 }, 3, .none, .none }, - .{ .sbb, .mi, &.{ .rm32, .imm32 }, &.{ 0x81 }, 3, .none, .none }, - .{ .sbb, .mi, &.{ .rm64, .imm32s }, &.{ 0x81 }, 3, .long, .none }, - .{ .sbb, .mi, &.{ .rm16, .imm8s }, &.{ 0x83 }, 3, .none, .none }, - .{ .sbb, .mi, &.{ .rm32, .imm8s }, &.{ 0x83 }, 3, .none, .none }, - .{ .sbb, .mi, &.{ .rm64, .imm8s }, &.{ 0x83 }, 3, .long, .none }, - .{ .sbb, .mr, &.{ .rm8, .r8 }, &.{ 0x18 }, 0, .none, .none }, - .{ .sbb, .mr, &.{ .rm8, .r8 }, &.{ 0x18 }, 0, .rex, .none }, - .{ .sbb, .mr, &.{ .rm16, .r16 }, &.{ 0x19 }, 0, .none, .none }, - .{ .sbb, .mr, &.{ .rm32, .r32 }, &.{ 0x19 }, 0, .none, .none }, - .{ .sbb, .mr, &.{ .rm64, .r64 }, &.{ 0x19 }, 0, .long, .none }, - .{ .sbb, .rm, &.{ .r8, .rm8 }, &.{ 0x1a }, 0, .none, .none }, - .{ .sbb, .rm, &.{ .r8, .rm8 }, &.{ 0x1a }, 0, .rex, .none }, - .{ .sbb, .rm, &.{ .r16, .rm16 }, &.{ 0x1b }, 0, .none, .none }, - .{ .sbb, .rm, &.{ .r32, .rm32 }, &.{ 0x1b }, 0, .none, .none }, - .{ .sbb, .rm, &.{ .r64, .rm64 }, &.{ 0x1b }, 0, .long, .none }, + .{ .sbb, .zi, &.{ .al, .imm8 }, &.{ 0x1c }, 0, .none, .none }, + .{ .sbb, .zi, &.{ .ax, .imm16 }, &.{ 0x1d }, 0, .short, .none }, + .{ .sbb, .zi, &.{ .eax, .imm32 }, &.{ 0x1d }, 0, .none, .none }, + .{ .sbb, .zi, &.{ .rax, .imm32s }, &.{ 0x1d }, 0, .long, .none }, + .{ .sbb, .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 3, .none, .none }, + .{ .sbb, .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 3, .rex, .none }, + .{ .sbb, .mi, &.{ .rm16, .imm16 }, &.{ 0x81 }, 3, .short, .none }, + .{ .sbb, .mi, &.{ .rm32, .imm32 }, &.{ 0x81 }, 3, .none, .none }, + .{ .sbb, .mi, &.{ .rm64, .imm32s }, &.{ 0x81 }, 3, .long, .none }, + .{ .sbb, .mi, &.{ .rm16, .imm8s }, &.{ 0x83 }, 3, .short, .none }, + .{ .sbb, .mi, &.{ .rm32, .imm8s }, &.{ 0x83 }, 3, .none, .none }, + .{ .sbb, .mi, &.{ .rm64, .imm8s }, &.{ 0x83 }, 3, .long, .none }, + .{ .sbb, .mr, &.{ .rm8, .r8 }, &.{ 0x18 }, 0, .none, .none }, + .{ .sbb, .mr, &.{ .rm8, .r8 }, &.{ 0x18 }, 0, .rex, .none }, + .{ .sbb, .mr, &.{ .rm16, .r16 }, &.{ 0x19 }, 0, .short, .none }, + .{ .sbb, .mr, &.{ .rm32, .r32 }, &.{ 0x19 }, 0, .none, .none }, + .{ .sbb, .mr, &.{ .rm64, .r64 }, &.{ 0x19 }, 0, .long, .none }, + .{ .sbb, .rm, &.{ .r8, .rm8 }, &.{ 0x1a }, 0, .none, .none }, + .{ .sbb, .rm, &.{ .r8, .rm8 }, &.{ 0x1a }, 0, .rex, .none }, + .{ .sbb, .rm, &.{ .r16, .rm16 }, &.{ 0x1b }, 0, .short, .none }, + .{ .sbb, .rm, &.{ .r32, .rm32 }, &.{ 0x1b }, 0, .none, .none }, + .{ .sbb, .rm, &.{ .r64, .rm64 }, &.{ 0x1b }, 0, .long, .none }, - .{ .scas, .np, &.{ .m8 }, &.{ 0xae }, 0, .none, .none }, - .{ .scas, .np, &.{ .m16 }, &.{ 0xaf }, 0, .none, .none }, - .{ .scas, .np, &.{ .m32 }, &.{ 0xaf }, 0, .none, .none }, - .{ .scas, .np, &.{ .m64 }, &.{ 0xaf }, 0, .long, .none }, + .{ .scas, .np, &.{ .m8 }, &.{ 0xae }, 0, .none, .none }, + .{ .scas, .np, &.{ .m16 }, &.{ 0xaf }, 0, .short, .none }, + .{ .scas, .np, &.{ .m32 }, &.{ 0xaf }, 0, .none, .none }, + .{ .scas, .np, &.{ .m64 }, &.{ 0xaf }, 0, .long, .none }, .{ .scasb, .np, &.{}, &.{ 0xae }, 0, .none, .none }, .{ .scasw, .np, &.{}, &.{ 0xaf }, 0, .short, .none }, @@ -682,153 +684,153 @@ pub const table = [_]Entry{ .{ .sfence, .np, &.{}, &.{ 0x0f, 0xae, 0xf8 }, 0, .none, .none }, - .{ .shl, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 4, .none, .none }, - .{ .shl, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 4, .rex, .none }, - .{ .shl, .m1, &.{ .rm16, .unity }, &.{ 0xd1 }, 4, .none, .none }, - .{ .shl, .m1, &.{ .rm32, .unity }, &.{ 0xd1 }, 4, .none, .none }, - .{ .shl, .m1, &.{ .rm64, .unity }, &.{ 0xd1 }, 4, .long, .none }, - .{ .shl, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 4, .none, .none }, - .{ .shl, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 4, .rex, .none }, - .{ .shl, .mc, &.{ .rm16, .cl }, &.{ 0xd3 }, 4, .none, .none }, - .{ .shl, .mc, &.{ .rm32, .cl }, &.{ 0xd3 }, 4, .none, .none }, - .{ .shl, .mc, &.{ .rm64, .cl }, &.{ 0xd3 }, 4, .long, .none }, - .{ .shl, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 4, .none, .none }, - .{ .shl, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 4, .rex, .none }, - .{ .shl, .mi, &.{ .rm16, .imm8 }, &.{ 0xc1 }, 4, .none, .none }, - .{ .shl, .mi, &.{ .rm32, .imm8 }, &.{ 0xc1 }, 4, .none, .none }, - .{ .shl, .mi, &.{ .rm64, .imm8 }, &.{ 0xc1 }, 4, .long, .none }, + .{ .shl, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 4, .none, .none }, + .{ .shl, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 4, .rex, .none }, + .{ .shl, .m1, &.{ .rm16, .unity }, &.{ 0xd1 }, 4, .short, .none }, + .{ .shl, .m1, &.{ .rm32, .unity }, &.{ 0xd1 }, 4, .none, .none }, + .{ .shl, .m1, &.{ .rm64, .unity }, &.{ 0xd1 }, 4, .long, .none }, + .{ .shl, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 4, .none, .none }, + .{ .shl, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 4, .rex, .none }, + .{ .shl, .mc, &.{ .rm16, .cl }, &.{ 0xd3 }, 4, .short, .none }, + .{ .shl, .mc, &.{ .rm32, .cl }, &.{ 0xd3 }, 4, .none, .none }, + .{ .shl, .mc, &.{ .rm64, .cl }, &.{ 0xd3 }, 4, .long, .none }, + .{ .shl, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 4, .none, .none }, + .{ .shl, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 4, .rex, .none }, + .{ .shl, .mi, &.{ .rm16, .imm8 }, &.{ 0xc1 }, 4, .short, .none }, + .{ .shl, .mi, &.{ .rm32, .imm8 }, &.{ 0xc1 }, 4, .none, .none }, + .{ .shl, .mi, &.{ .rm64, .imm8 }, &.{ 0xc1 }, 4, .long, .none }, - .{ .shld, .mri, &.{ .rm16, .r16, .imm8 }, &.{ 0x0f, 0xa4 }, 0, .none, .none }, - .{ .shld, .mrc, &.{ .rm16, .r16, .cl }, &.{ 0x0f, 0xa5 }, 0, .none, .none }, - .{ .shld, .mri, &.{ .rm32, .r32, .imm8 }, &.{ 0x0f, 0xa4 }, 0, .none, .none }, - .{ .shld, .mri, &.{ .rm64, .r64, .imm8 }, &.{ 0x0f, 0xa4 }, 0, .long, .none }, - .{ .shld, .mrc, &.{ .rm32, .r32, .cl }, &.{ 0x0f, 0xa5 }, 0, .none, .none }, - .{ .shld, .mrc, &.{ .rm64, .r64, .cl }, &.{ 0x0f, 0xa5 }, 0, .long, .none }, + .{ .shld, .mri, &.{ .rm16, .r16, .imm8 }, &.{ 0x0f, 0xa4 }, 0, .short, .none }, + .{ .shld, .mrc, &.{ .rm16, .r16, .cl }, &.{ 0x0f, 0xa5 }, 0, .short, .none }, + .{ .shld, .mri, &.{ .rm32, .r32, .imm8 }, &.{ 0x0f, 0xa4 }, 0, .none, .none }, + .{ .shld, .mri, &.{ .rm64, .r64, .imm8 }, &.{ 0x0f, 0xa4 }, 0, .long, .none }, + .{ .shld, .mrc, &.{ .rm32, .r32, .cl }, &.{ 0x0f, 0xa5 }, 0, .none, .none }, + .{ .shld, .mrc, &.{ .rm64, .r64, .cl }, &.{ 0x0f, 0xa5 }, 0, .long, .none }, - .{ .shr, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 5, .none, .none }, - .{ .shr, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 5, .rex, .none }, - .{ .shr, .m1, &.{ .rm16, .unity }, &.{ 0xd1 }, 5, .none, .none }, - .{ .shr, .m1, &.{ .rm32, .unity }, &.{ 0xd1 }, 5, .none, .none }, - .{ .shr, .m1, &.{ .rm64, .unity }, &.{ 0xd1 }, 5, .long, .none }, - .{ .shr, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 5, .none, .none }, - .{ .shr, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 5, .rex, .none }, - .{ .shr, .mc, &.{ .rm16, .cl }, &.{ 0xd3 }, 5, .none, .none }, - .{ .shr, .mc, &.{ .rm32, .cl }, &.{ 0xd3 }, 5, .none, .none }, - .{ .shr, .mc, &.{ .rm64, .cl }, &.{ 0xd3 }, 5, .long, .none }, - .{ .shr, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 5, .none, .none }, - .{ .shr, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 5, .rex, .none }, - .{ .shr, .mi, &.{ .rm16, .imm8 }, &.{ 0xc1 }, 5, .none, .none }, - .{ .shr, .mi, &.{ .rm32, .imm8 }, &.{ 0xc1 }, 5, .none, .none }, - .{ .shr, .mi, &.{ .rm64, .imm8 }, &.{ 0xc1 }, 5, .long, .none }, + .{ .shr, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 5, .none, .none }, + .{ .shr, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 5, .rex, .none }, + .{ .shr, .m1, &.{ .rm16, .unity }, &.{ 0xd1 }, 5, .short, .none }, + .{ .shr, .m1, &.{ .rm32, .unity }, &.{ 0xd1 }, 5, .none, .none }, + .{ .shr, .m1, &.{ .rm64, .unity }, &.{ 0xd1 }, 5, .long, .none }, + .{ .shr, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 5, .none, .none }, + .{ .shr, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 5, .rex, .none }, + .{ .shr, .mc, &.{ .rm16, .cl }, &.{ 0xd3 }, 5, .short, .none }, + .{ .shr, .mc, &.{ .rm32, .cl }, &.{ 0xd3 }, 5, .none, .none }, + .{ .shr, .mc, &.{ .rm64, .cl }, &.{ 0xd3 }, 5, .long, .none }, + .{ .shr, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 5, .none, .none }, + .{ .shr, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 5, .rex, .none }, + .{ .shr, .mi, &.{ .rm16, .imm8 }, &.{ 0xc1 }, 5, .short, .none }, + .{ .shr, .mi, &.{ .rm32, .imm8 }, &.{ 0xc1 }, 5, .none, .none }, + .{ .shr, .mi, &.{ .rm64, .imm8 }, &.{ 0xc1 }, 5, .long, .none }, - .{ .shrd, .mri, &.{ .rm16, .r16, .imm8 }, &.{ 0x0f, 0xac }, 0, .none, .none }, - .{ .shrd, .mrc, &.{ .rm16, .r16, .cl }, &.{ 0x0f, 0xad }, 0, .none, .none }, - .{ .shrd, .mri, &.{ .rm32, .r32, .imm8 }, &.{ 0x0f, 0xac }, 0, .none, .none }, - .{ .shrd, .mri, &.{ .rm64, .r64, .imm8 }, &.{ 0x0f, 0xac }, 0, .long, .none }, - .{ .shrd, .mrc, &.{ .rm32, .r32, .cl }, &.{ 0x0f, 0xad }, 0, .none, .none }, - .{ .shrd, .mrc, &.{ .rm64, .r64, .cl }, &.{ 0x0f, 0xad }, 0, .long, .none }, + .{ .shrd, .mri, &.{ .rm16, .r16, .imm8 }, &.{ 0x0f, 0xac }, 0, .short, .none }, + .{ .shrd, .mrc, &.{ .rm16, .r16, .cl }, &.{ 0x0f, 0xad }, 0, .short, .none }, + .{ .shrd, .mri, &.{ .rm32, .r32, .imm8 }, &.{ 0x0f, 0xac }, 0, .none, .none }, + .{ .shrd, .mri, &.{ .rm64, .r64, .imm8 }, &.{ 0x0f, 0xac }, 0, .long, .none }, + .{ .shrd, .mrc, &.{ .rm32, .r32, .cl }, &.{ 0x0f, 0xad }, 0, .none, .none }, + .{ .shrd, .mrc, &.{ .rm64, .r64, .cl }, &.{ 0x0f, 0xad }, 0, .long, .none }, - .{ .stos, .np, &.{ .m8 }, &.{ 0xaa }, 0, .none, .none }, - .{ .stos, .np, &.{ .m16 }, &.{ 0xab }, 0, .none, .none }, - .{ .stos, .np, &.{ .m32 }, &.{ 0xab }, 0, .none, .none }, - .{ .stos, .np, &.{ .m64 }, &.{ 0xab }, 0, .long, .none }, + .{ .stos, .np, &.{ .m8 }, &.{ 0xaa }, 0, .none, .none }, + .{ .stos, .np, &.{ .m16 }, &.{ 0xab }, 0, .short, .none }, + .{ .stos, .np, &.{ .m32 }, &.{ 0xab }, 0, .none, .none }, + .{ .stos, .np, &.{ .m64 }, &.{ 0xab }, 0, .long, .none }, .{ .stosb, .np, &.{}, &.{ 0xaa }, 0, .none, .none }, .{ .stosw, .np, &.{}, &.{ 0xab }, 0, .short, .none }, .{ .stosd, .np, &.{}, &.{ 0xab }, 0, .none, .none }, .{ .stosq, .np, &.{}, &.{ 0xab }, 0, .long, .none }, - .{ .sub, .zi, &.{ .al, .imm8 }, &.{ 0x2c }, 0, .none, .none }, - .{ .sub, .zi, &.{ .ax, .imm16 }, &.{ 0x2d }, 0, .none, .none }, - .{ .sub, .zi, &.{ .eax, .imm32 }, &.{ 0x2d }, 0, .none, .none }, - .{ .sub, .zi, &.{ .rax, .imm32s }, &.{ 0x2d }, 0, .long, .none }, - .{ .sub, .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 5, .none, .none }, - .{ .sub, .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 5, .rex, .none }, - .{ .sub, .mi, &.{ .rm16, .imm16 }, &.{ 0x81 }, 5, .none, .none }, - .{ .sub, .mi, &.{ .rm32, .imm32 }, &.{ 0x81 }, 5, .none, .none }, - .{ .sub, .mi, &.{ .rm64, .imm32s }, &.{ 0x81 }, 5, .long, .none }, - .{ .sub, .mi, &.{ .rm16, .imm8s }, &.{ 0x83 }, 5, .none, .none }, - .{ .sub, .mi, &.{ .rm32, .imm8s }, &.{ 0x83 }, 5, .none, .none }, - .{ .sub, .mi, &.{ .rm64, .imm8s }, &.{ 0x83 }, 5, .long, .none }, - .{ .sub, .mr, &.{ .rm8, .r8 }, &.{ 0x28 }, 0, .none, .none }, - .{ .sub, .mr, &.{ .rm8, .r8 }, &.{ 0x28 }, 0, .rex, .none }, - .{ .sub, .mr, &.{ .rm16, .r16 }, &.{ 0x29 }, 0, .none, .none }, - .{ .sub, .mr, &.{ .rm32, .r32 }, &.{ 0x29 }, 0, .none, .none }, - .{ .sub, .mr, &.{ .rm64, .r64 }, &.{ 0x29 }, 0, .long, .none }, - .{ .sub, .rm, &.{ .r8, .rm8 }, &.{ 0x2a }, 0, .none, .none }, - .{ .sub, .rm, &.{ .r8, .rm8 }, &.{ 0x2a }, 0, .rex, .none }, - .{ .sub, .rm, &.{ .r16, .rm16 }, &.{ 0x2b }, 0, .none, .none }, - .{ .sub, .rm, &.{ .r32, .rm32 }, &.{ 0x2b }, 0, .none, .none }, - .{ .sub, .rm, &.{ .r64, .rm64 }, &.{ 0x2b }, 0, .long, .none }, + .{ .sub, .zi, &.{ .al, .imm8 }, &.{ 0x2c }, 0, .none, .none }, + .{ .sub, .zi, &.{ .ax, .imm16 }, &.{ 0x2d }, 0, .short, .none }, + .{ .sub, .zi, &.{ .eax, .imm32 }, &.{ 0x2d }, 0, .none, .none }, + .{ .sub, .zi, &.{ .rax, .imm32s }, &.{ 0x2d }, 0, .long, .none }, + .{ .sub, .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 5, .none, .none }, + .{ .sub, .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 5, .rex, .none }, + .{ .sub, .mi, &.{ .rm16, .imm16 }, &.{ 0x81 }, 5, .short, .none }, + .{ .sub, .mi, &.{ .rm32, .imm32 }, &.{ 0x81 }, 5, .none, .none }, + .{ .sub, .mi, &.{ .rm64, .imm32s }, &.{ 0x81 }, 5, .long, .none }, + .{ .sub, .mi, &.{ .rm16, .imm8s }, &.{ 0x83 }, 5, .short, .none }, + .{ .sub, .mi, &.{ .rm32, .imm8s }, &.{ 0x83 }, 5, .none, .none }, + .{ .sub, .mi, &.{ .rm64, .imm8s }, &.{ 0x83 }, 5, .long, .none }, + .{ .sub, .mr, &.{ .rm8, .r8 }, &.{ 0x28 }, 0, .none, .none }, + .{ .sub, .mr, &.{ .rm8, .r8 }, &.{ 0x28 }, 0, .rex, .none }, + .{ .sub, .mr, &.{ .rm16, .r16 }, &.{ 0x29 }, 0, .short, .none }, + .{ .sub, .mr, &.{ .rm32, .r32 }, &.{ 0x29 }, 0, .none, .none }, + .{ .sub, .mr, &.{ .rm64, .r64 }, &.{ 0x29 }, 0, .long, .none }, + .{ .sub, .rm, &.{ .r8, .rm8 }, &.{ 0x2a }, 0, .none, .none }, + .{ .sub, .rm, &.{ .r8, .rm8 }, &.{ 0x2a }, 0, .rex, .none }, + .{ .sub, .rm, &.{ .r16, .rm16 }, &.{ 0x2b }, 0, .short, .none }, + .{ .sub, .rm, &.{ .r32, .rm32 }, &.{ 0x2b }, 0, .none, .none }, + .{ .sub, .rm, &.{ .r64, .rm64 }, &.{ 0x2b }, 0, .long, .none }, .{ .syscall, .np, &.{}, &.{ 0x0f, 0x05 }, 0, .none, .none }, - .{ .@"test", .zi, &.{ .al, .imm8 }, &.{ 0xa8 }, 0, .none, .none }, - .{ .@"test", .zi, &.{ .ax, .imm16 }, &.{ 0xa9 }, 0, .none, .none }, - .{ .@"test", .zi, &.{ .eax, .imm32 }, &.{ 0xa9 }, 0, .none, .none }, - .{ .@"test", .zi, &.{ .rax, .imm32s }, &.{ 0xa9 }, 0, .long, .none }, - .{ .@"test", .mi, &.{ .rm8, .imm8 }, &.{ 0xf6 }, 0, .none, .none }, - .{ .@"test", .mi, &.{ .rm8, .imm8 }, &.{ 0xf6 }, 0, .rex, .none }, - .{ .@"test", .mi, &.{ .rm16, .imm16 }, &.{ 0xf7 }, 0, .none, .none }, - .{ .@"test", .mi, &.{ .rm32, .imm32 }, &.{ 0xf7 }, 0, .none, .none }, - .{ .@"test", .mi, &.{ .rm64, .imm32s }, &.{ 0xf7 }, 0, .long, .none }, - .{ .@"test", .mr, &.{ .rm8, .r8 }, &.{ 0x84 }, 0, .none, .none }, - .{ .@"test", .mr, &.{ .rm8, .r8 }, &.{ 0x84 }, 0, .rex, .none }, - .{ .@"test", .mr, &.{ .rm16, .r16 }, &.{ 0x85 }, 0, .none, .none }, - .{ .@"test", .mr, &.{ .rm32, .r32 }, &.{ 0x85 }, 0, .none, .none }, - .{ .@"test", .mr, &.{ .rm64, .r64 }, &.{ 0x85 }, 0, .long, .none }, + .{ .@"test", .zi, &.{ .al, .imm8 }, &.{ 0xa8 }, 0, .none, .none }, + .{ .@"test", .zi, &.{ .ax, .imm16 }, &.{ 0xa9 }, 0, .short, .none }, + .{ .@"test", .zi, &.{ .eax, .imm32 }, &.{ 0xa9 }, 0, .none, .none }, + .{ .@"test", .zi, &.{ .rax, .imm32s }, &.{ 0xa9 }, 0, .long, .none }, + .{ .@"test", .mi, &.{ .rm8, .imm8 }, &.{ 0xf6 }, 0, .none, .none }, + .{ .@"test", .mi, &.{ .rm8, .imm8 }, &.{ 0xf6 }, 0, .rex, .none }, + .{ .@"test", .mi, &.{ .rm16, .imm16 }, &.{ 0xf7 }, 0, .short, .none }, + .{ .@"test", .mi, &.{ .rm32, .imm32 }, &.{ 0xf7 }, 0, .none, .none }, + .{ .@"test", .mi, &.{ .rm64, .imm32s }, &.{ 0xf7 }, 0, .long, .none }, + .{ .@"test", .mr, &.{ .rm8, .r8 }, &.{ 0x84 }, 0, .none, .none }, + .{ .@"test", .mr, &.{ .rm8, .r8 }, &.{ 0x84 }, 0, .rex, .none }, + .{ .@"test", .mr, &.{ .rm16, .r16 }, &.{ 0x85 }, 0, .short, .none }, + .{ .@"test", .mr, &.{ .rm32, .r32 }, &.{ 0x85 }, 0, .none, .none }, + .{ .@"test", .mr, &.{ .rm64, .r64 }, &.{ 0x85 }, 0, .long, .none }, - .{ .tzcnt, .rm, &.{ .r16, .rm16 }, &.{ 0xf3, 0x0f, 0xbc }, 0, .none, .none }, - .{ .tzcnt, .rm, &.{ .r32, .rm32 }, &.{ 0xf3, 0x0f, 0xbc }, 0, .none, .none }, - .{ .tzcnt, .rm, &.{ .r64, .rm64 }, &.{ 0xf3, 0x0f, 0xbc }, 0, .long, .none }, + .{ .tzcnt, .rm, &.{ .r16, .rm16 }, &.{ 0xf3, 0x0f, 0xbc }, 0, .short, .none }, + .{ .tzcnt, .rm, &.{ .r32, .rm32 }, &.{ 0xf3, 0x0f, 0xbc }, 0, .none, .none }, + .{ .tzcnt, .rm, &.{ .r64, .rm64 }, &.{ 0xf3, 0x0f, 0xbc }, 0, .long, .none }, .{ .ud2, .np, &.{}, &.{ 0x0f, 0x0b }, 0, .none, .none }, - .{ .xadd, .mr, &.{ .rm8, .r8 }, &.{ 0x0f, 0xc0 }, 0, .none, .none }, - .{ .xadd, .mr, &.{ .rm8, .r8 }, &.{ 0x0f, 0xc0 }, 0, .rex, .none }, - .{ .xadd, .mr, &.{ .rm16, .r16 }, &.{ 0x0f, 0xc1 }, 0, .none, .none }, - .{ .xadd, .mr, &.{ .rm32, .r32 }, &.{ 0x0f, 0xc1 }, 0, .none, .none }, - .{ .xadd, .mr, &.{ .rm64, .r64 }, &.{ 0x0f, 0xc1 }, 0, .long, .none }, + .{ .xadd, .mr, &.{ .rm8, .r8 }, &.{ 0x0f, 0xc0 }, 0, .none, .none }, + .{ .xadd, .mr, &.{ .rm8, .r8 }, &.{ 0x0f, 0xc0 }, 0, .rex, .none }, + .{ .xadd, .mr, &.{ .rm16, .r16 }, &.{ 0x0f, 0xc1 }, 0, .short, .none }, + .{ .xadd, .mr, &.{ .rm32, .r32 }, &.{ 0x0f, 0xc1 }, 0, .none, .none }, + .{ .xadd, .mr, &.{ .rm64, .r64 }, &.{ 0x0f, 0xc1 }, 0, .long, .none }, - .{ .xchg, .o, &.{ .ax, .r16 }, &.{ 0x90 }, 0, .none, .none }, - .{ .xchg, .o, &.{ .r16, .ax }, &.{ 0x90 }, 0, .none, .none }, - .{ .xchg, .o, &.{ .eax, .r32 }, &.{ 0x90 }, 0, .none, .none }, - .{ .xchg, .o, &.{ .rax, .r64 }, &.{ 0x90 }, 0, .long, .none }, - .{ .xchg, .o, &.{ .r32, .eax }, &.{ 0x90 }, 0, .none, .none }, - .{ .xchg, .o, &.{ .r64, .rax }, &.{ 0x90 }, 0, .long, .none }, - .{ .xchg, .mr, &.{ .rm8, .r8 }, &.{ 0x86 }, 0, .none, .none }, - .{ .xchg, .mr, &.{ .rm8, .r8 }, &.{ 0x86 }, 0, .rex, .none }, - .{ .xchg, .rm, &.{ .r8, .rm8 }, &.{ 0x86 }, 0, .none, .none }, - .{ .xchg, .rm, &.{ .r8, .rm8 }, &.{ 0x86 }, 0, .rex, .none }, - .{ .xchg, .mr, &.{ .rm16, .r16 }, &.{ 0x87 }, 0, .none, .none }, - .{ .xchg, .rm, &.{ .r16, .rm16 }, &.{ 0x87 }, 0, .none, .none }, - .{ .xchg, .mr, &.{ .rm32, .r32 }, &.{ 0x87 }, 0, .none, .none }, - .{ .xchg, .mr, &.{ .rm64, .r64 }, &.{ 0x87 }, 0, .long, .none }, - .{ .xchg, .rm, &.{ .r32, .rm32 }, &.{ 0x87 }, 0, .none, .none }, - .{ .xchg, .rm, &.{ .r64, .rm64 }, &.{ 0x87 }, 0, .long, .none }, + .{ .xchg, .o, &.{ .ax, .r16 }, &.{ 0x90 }, 0, .short, .none }, + .{ .xchg, .o, &.{ .r16, .ax }, &.{ 0x90 }, 0, .short, .none }, + .{ .xchg, .o, &.{ .eax, .r32 }, &.{ 0x90 }, 0, .none, .none }, + .{ .xchg, .o, &.{ .rax, .r64 }, &.{ 0x90 }, 0, .long, .none }, + .{ .xchg, .o, &.{ .r32, .eax }, &.{ 0x90 }, 0, .none, .none }, + .{ .xchg, .o, &.{ .r64, .rax }, &.{ 0x90 }, 0, .long, .none }, + .{ .xchg, .mr, &.{ .rm8, .r8 }, &.{ 0x86 }, 0, .none, .none }, + .{ .xchg, .mr, &.{ .rm8, .r8 }, &.{ 0x86 }, 0, .rex, .none }, + .{ .xchg, .rm, &.{ .r8, .rm8 }, &.{ 0x86 }, 0, .none, .none }, + .{ .xchg, .rm, &.{ .r8, .rm8 }, &.{ 0x86 }, 0, .rex, .none }, + .{ .xchg, .mr, &.{ .rm16, .r16 }, &.{ 0x87 }, 0, .short, .none }, + .{ .xchg, .rm, &.{ .r16, .rm16 }, &.{ 0x87 }, 0, .short, .none }, + .{ .xchg, .mr, &.{ .rm32, .r32 }, &.{ 0x87 }, 0, .none, .none }, + .{ .xchg, .mr, &.{ .rm64, .r64 }, &.{ 0x87 }, 0, .long, .none }, + .{ .xchg, .rm, &.{ .r32, .rm32 }, &.{ 0x87 }, 0, .none, .none }, + .{ .xchg, .rm, &.{ .r64, .rm64 }, &.{ 0x87 }, 0, .long, .none }, - .{ .xor, .zi, &.{ .al, .imm8 }, &.{ 0x34 }, 0, .none, .none }, - .{ .xor, .zi, &.{ .ax, .imm16 }, &.{ 0x35 }, 0, .none, .none }, - .{ .xor, .zi, &.{ .eax, .imm32 }, &.{ 0x35 }, 0, .none, .none }, - .{ .xor, .zi, &.{ .rax, .imm32s }, &.{ 0x35 }, 0, .long, .none }, - .{ .xor, .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 6, .none, .none }, - .{ .xor, .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 6, .rex, .none }, - .{ .xor, .mi, &.{ .rm16, .imm16 }, &.{ 0x81 }, 6, .none, .none }, - .{ .xor, .mi, &.{ .rm32, .imm32 }, &.{ 0x81 }, 6, .none, .none }, - .{ .xor, .mi, &.{ .rm64, .imm32s }, &.{ 0x81 }, 6, .long, .none }, - .{ .xor, .mi, &.{ .rm16, .imm8s }, &.{ 0x83 }, 6, .none, .none }, - .{ .xor, .mi, &.{ .rm32, .imm8s }, &.{ 0x83 }, 6, .none, .none }, - .{ .xor, .mi, &.{ .rm64, .imm8s }, &.{ 0x83 }, 6, .long, .none }, - .{ .xor, .mr, &.{ .rm8, .r8 }, &.{ 0x30 }, 0, .none, .none }, - .{ .xor, .mr, &.{ .rm8, .r8 }, &.{ 0x30 }, 0, .rex, .none }, - .{ .xor, .mr, &.{ .rm16, .r16 }, &.{ 0x31 }, 0, .none, .none }, - .{ .xor, .mr, &.{ .rm32, .r32 }, &.{ 0x31 }, 0, .none, .none }, - .{ .xor, .mr, &.{ .rm64, .r64 }, &.{ 0x31 }, 0, .long, .none }, - .{ .xor, .rm, &.{ .r8, .rm8 }, &.{ 0x32 }, 0, .none, .none }, - .{ .xor, .rm, &.{ .r8, .rm8 }, &.{ 0x32 }, 0, .rex, .none }, - .{ .xor, .rm, &.{ .r16, .rm16 }, &.{ 0x33 }, 0, .none, .none }, - .{ .xor, .rm, &.{ .r32, .rm32 }, &.{ 0x33 }, 0, .none, .none }, - .{ .xor, .rm, &.{ .r64, .rm64 }, &.{ 0x33 }, 0, .long, .none }, + .{ .xor, .zi, &.{ .al, .imm8 }, &.{ 0x34 }, 0, .none, .none }, + .{ .xor, .zi, &.{ .ax, .imm16 }, &.{ 0x35 }, 0, .short, .none }, + .{ .xor, .zi, &.{ .eax, .imm32 }, &.{ 0x35 }, 0, .none, .none }, + .{ .xor, .zi, &.{ .rax, .imm32s }, &.{ 0x35 }, 0, .long, .none }, + .{ .xor, .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 6, .none, .none }, + .{ .xor, .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 6, .rex, .none }, + .{ .xor, .mi, &.{ .rm16, .imm16 }, &.{ 0x81 }, 6, .short, .none }, + .{ .xor, .mi, &.{ .rm32, .imm32 }, &.{ 0x81 }, 6, .none, .none }, + .{ .xor, .mi, &.{ .rm64, .imm32s }, &.{ 0x81 }, 6, .long, .none }, + .{ .xor, .mi, &.{ .rm16, .imm8s }, &.{ 0x83 }, 6, .short, .none }, + .{ .xor, .mi, &.{ .rm32, .imm8s }, &.{ 0x83 }, 6, .none, .none }, + .{ .xor, .mi, &.{ .rm64, .imm8s }, &.{ 0x83 }, 6, .long, .none }, + .{ .xor, .mr, &.{ .rm8, .r8 }, &.{ 0x30 }, 0, .none, .none }, + .{ .xor, .mr, &.{ .rm8, .r8 }, &.{ 0x30 }, 0, .rex, .none }, + .{ .xor, .mr, &.{ .rm16, .r16 }, &.{ 0x31 }, 0, .short, .none }, + .{ .xor, .mr, &.{ .rm32, .r32 }, &.{ 0x31 }, 0, .none, .none }, + .{ .xor, .mr, &.{ .rm64, .r64 }, &.{ 0x31 }, 0, .long, .none }, + .{ .xor, .rm, &.{ .r8, .rm8 }, &.{ 0x32 }, 0, .none, .none }, + .{ .xor, .rm, &.{ .r8, .rm8 }, &.{ 0x32 }, 0, .rex, .none }, + .{ .xor, .rm, &.{ .r16, .rm16 }, &.{ 0x33 }, 0, .short, .none }, + .{ .xor, .rm, &.{ .r32, .rm32 }, &.{ 0x33 }, 0, .none, .none }, + .{ .xor, .rm, &.{ .r64, .rm64 }, &.{ 0x33 }, 0, .long, .none }, // SSE .{ .addss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x58 }, 0, .none, .sse }, @@ -911,9 +913,39 @@ pub const table = [_]Entry{ .{ .orpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x56 }, 0, .none, .sse2 }, - .{ .pextrw, .mri, &.{ .r16, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0xc5 }, 0, .none, .sse2 }, + .{ .pextrw, .rmi, &.{ .r32, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0xc5 }, 0, .none, .sse2 }, + .{ .pextrw, .rmi, &.{ .r64, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0xc5 }, 0, .long, .sse2 }, - .{ .pinsrw, .rmi, &.{ .xmm, .rm16, .imm8 }, &.{ 0x66, 0x0f, 0xc4 }, 0, .none, .sse2 }, + .{ .pinsrw, .rmi, &.{ .xmm, .r32_m16, .imm8 }, &.{ 0x66, 0x0f, 0xc4 }, 0, .none, .sse2 }, + + .{ .pshufhw, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0xf3, 0x0f, 0x70 }, 0, .none, .sse2 }, + + .{ .pshuflw, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0xf2, 0x0f, 0x70 }, 0, .none, .sse2 }, + + .{ .psrld, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd2 }, 0, .none, .sse2 }, + .{ .psrld, .mi, &.{ .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x72 }, 2, .none, .sse2 }, + + .{ .psrlq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd3 }, 0, .none, .sse2 }, + .{ .psrlq, .mi, &.{ .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x73 }, 2, .none, .sse2 }, + + .{ .psrlw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd1 }, 0, .none, .sse2 }, + .{ .psrlw, .mi, &.{ .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x71 }, 2, .none, .sse2 }, + + .{ .punpckhbw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x68 }, 0, .none, .sse2 }, + + .{ .punpckhdq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x6a }, 0, .none, .sse2 }, + + .{ .punpckhqdq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x6d }, 0, .none, .sse2 }, + + .{ .punpckhwd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x69 }, 0, .none, .sse2 }, + + .{ .punpcklbw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x60 }, 0, .none, .sse2 }, + + .{ .punpckldq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x62 }, 0, .none, .sse2 }, + + .{ .punpcklqdq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x6c }, 0, .none, .sse2 }, + + .{ .punpcklwd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x61 }, 0, .none, .sse2 }, .{ .sqrtpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x51 }, 0, .none, .sse2 }, .{ .sqrtsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x51 }, 0, .none, .sse2 }, @@ -927,12 +959,59 @@ pub const table = [_]Entry{ .{ .xorpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x57 }, 0, .none, .sse2 }, + // SSE3 + .{ .movddup, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x12 }, 0, .none, .sse3 }, + + .{ .movshdup, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0xf3, 0x0f, 0x16 }, 0, .none, .sse3 }, + + .{ .movsldup, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0xf3, 0x0f, 0x12 }, 0, .none, .sse3 }, + // SSE4.1 - .{ .pextrw, .mri, &.{ .rm16, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x15 }, 0, .none, .sse4_1 }, + .{ .pextrw, .mri, &.{ .r32_m16, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x15 }, 0, .none, .sse4_1 }, + .{ .pextrw, .mri, &.{ .r64_m16, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x15 }, 0, .long, .sse4_1 }, .{ .roundss, .rmi, &.{ .xmm, .xmm_m32, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0a }, 0, .none, .sse4_1 }, .{ .roundsd, .rmi, &.{ .xmm, .xmm_m64, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0b }, 0, .none, .sse4_1 }, + // AVX + .{ .vmovddup, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x12 }, 0, .vex_128, .avx }, + + .{ .vmovshdup, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0xf3, 0x0f, 0x16 }, 0, .vex_128, .avx }, + + .{ .vmovsldup, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0xf3, 0x0f, 0x12 }, 0, .vex_128, .avx }, + + .{ .vpextrw, .mri, &.{ .r32, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x15 }, 0, .vex_128, .avx }, + .{ .vpextrw, .mri, &.{ .r64, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x15 }, 0, .vex_128_long, .avx }, + .{ .vpextrw, .mri, &.{ .r32_m16, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x15 }, 0, .vex_128, .avx }, + .{ .vpextrw, .mri, &.{ .r64_m16, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x15 }, 0, .vex_128_long, .avx }, + + .{ .vpinsrw, .rvmi, &.{ .xmm, .xmm, .r32_m16, .imm8 }, &.{ 0x66, 0x0f, 0xc4 }, 0, .vex_128, .avx }, + + .{ .vpsrld, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd2 }, 0, .vex_128, .avx }, + .{ .vpsrld, .vmi, &.{ .xmm, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x72 }, 2, .vex_128, .avx }, + + .{ .vpsrlq, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd3 }, 0, .vex_128, .avx }, + .{ .vpsrlq, .vmi, &.{ .xmm, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x73 }, 2, .vex_128, .avx }, + + .{ .vpsrlw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd1 }, 0, .vex_128, .avx }, + .{ .vpsrlw, .vmi, &.{ .xmm, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x71 }, 2, .vex_128, .avx }, + + .{ .vpunpckhbw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x68 }, 0, .vex_128, .avx }, + + .{ .vpunpckhdq, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x6a }, 0, .vex_128, .avx }, + + .{ .vpunpckhqdq, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x6d }, 0, .vex_128, .avx }, + + .{ .vpunpckhwd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x69 }, 0, .vex_128, .avx }, + + .{ .vpunpcklbw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x60 }, 0, .vex_128, .avx }, + + .{ .vpunpckldq, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x62 }, 0, .vex_128, .avx }, + + .{ .vpunpcklqdq, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x6c }, 0, .vex_128, .avx }, + + .{ .vpunpcklwd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x61 }, 0, .vex_128, .avx }, + // F16C .{ .vcvtph2ps, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0x13 }, 0, .vex_128, .f16c }, From 5d4288c5f6c69bdd4cbd9b3580016828e38f087d Mon Sep 17 00:00:00 2001 From: Jacob Young Date: Sat, 6 May 2023 06:00:22 -0400 Subject: [PATCH 05/20] x86_64: fix unordered float equality --- src/arch/x86_64/CodeGen.zig | 421 ++++++++++++++++++----------- src/arch/x86_64/Emit.zig | 273 ++++++++++--------- src/arch/x86_64/Lower.zig | 220 +++++++++++---- src/arch/x86_64/Mir.zig | 34 ++- src/arch/x86_64/bits.zig | 9 + src/arch/x86_64/encoder.zig | 4 +- test/behavior/bugs/12891.zig | 6 - test/behavior/field_parent_ptr.zig | 1 - 8 files changed, 602 insertions(+), 366 deletions(-) diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index d24428467a..87eceec347 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -205,16 +205,7 @@ pub const MCValue = union(enum) { fn isMemory(mcv: MCValue) bool { return switch (mcv) { - .memory, - .load_direct, - .lea_direct, - .load_got, - .lea_got, - .load_tlv, - .lea_tlv, - .load_frame, - .lea_frame, - => true, + .memory, .indirect, .load_frame => true, else => false, }; } @@ -937,7 +928,7 @@ fn formatWipMir( .target = data.self.target, .src_loc = data.self.src_loc, }; - for (lower.lowerMir(data.self.mir_instructions.get(data.inst)) catch |err| switch (err) { + for ((lower.lowerMir(data.inst) catch |err| switch (err) { error.LowerFail => { defer { lower.err_msg.?.deinit(data.self.gpa); @@ -955,7 +946,7 @@ fn formatWipMir( return; }, else => |e| return e, - }) |lower_inst| try writer.print(" | {}", .{lower_inst}); + }).insts) |lowered_inst| try writer.print(" | {}", .{lowered_inst}); } fn fmtWipMir(self: *Self, inst: Mir.Inst.Index) std.fmt.Formatter(formatWipMir) { return .{ .data = .{ .self = self, .inst = inst } }; @@ -1016,7 +1007,14 @@ fn asmSetccRegister(self: *Self, reg: Register, cc: bits.Condition) !void { _ = try self.addInst(.{ .tag = .setcc, .ops = .r_cc, - .data = .{ .r_cc = .{ .r = reg, .cc = cc } }, + .data = .{ .r_cc = .{ + .r = reg, + .scratch = if (cc == .z_and_np or cc == .nz_or_p) + (try self.register_manager.allocReg(null, gp)).to8() + else + .none, + .cc = cc, + } }, }); } @@ -1028,23 +1026,36 @@ fn asmSetccMemory(self: *Self, m: Memory, cc: bits.Condition) !void { .rip => .m_rip_cc, else => unreachable, }, - .data = .{ .x_cc = .{ .cc = cc, .payload = switch (m) { - .sib => try self.addExtra(Mir.MemorySib.encode(m)), - .rip => try self.addExtra(Mir.MemoryRip.encode(m)), - else => unreachable, - } } }, + .data = .{ .x_cc = .{ + .scratch = if (cc == .z_and_np or cc == .nz_or_p) + (try self.register_manager.allocReg(null, gp)).to8() + else + .none, + .cc = cc, + .payload = switch (m) { + .sib => try self.addExtra(Mir.MemorySib.encode(m)), + .rip => try self.addExtra(Mir.MemoryRip.encode(m)), + else => unreachable, + }, + } }, }); } +/// A `cc` of `.z_and_np` clobbers `reg2`! fn asmCmovccRegisterRegister(self: *Self, reg1: Register, reg2: Register, cc: bits.Condition) !void { _ = try self.addInst(.{ .tag = .cmovcc, .ops = .rr_cc, - .data = .{ .rr_cc = .{ .r1 = reg1, .r2 = reg2, .cc = cc } }, + .data = .{ .rr_cc = .{ + .r1 = reg1, + .r2 = reg2, + .cc = cc, + } }, }); } fn asmCmovccRegisterMemory(self: *Self, reg: Register, m: Memory, cc: bits.Condition) !void { + assert(cc != .z_and_np); // not supported _ = try self.addInst(.{ .tag = .cmovcc, .ops = switch (m) { @@ -1052,11 +1063,15 @@ fn asmCmovccRegisterMemory(self: *Self, reg: Register, m: Memory, cc: bits.Condi .rip => .rm_rip_cc, else => unreachable, }, - .data = .{ .rx_cc = .{ .r = reg, .cc = cc, .payload = switch (m) { - .sib => try self.addExtra(Mir.MemorySib.encode(m)), - .rip => try self.addExtra(Mir.MemoryRip.encode(m)), - else => unreachable, - } } }, + .data = .{ .rx_cc = .{ + .r = reg, + .cc = cc, + .payload = switch (m) { + .sib => try self.addExtra(Mir.MemorySib.encode(m)), + .rip => try self.addExtra(Mir.MemoryRip.encode(m)), + else => unreachable, + }, + } }, }); } @@ -1131,10 +1146,13 @@ fn asmRegisterImmediate(self: *Self, tag: Mir.Inst.Tag, reg: Register, imm: Imme .tag = tag, .ops = ops, .data = switch (ops) { - .ri_s, .ri_u => .{ .ri = .{ .r = reg, .i = switch (imm) { - .signed => |s| @bitCast(u32, s), - .unsigned => |u| @intCast(u32, u), - } } }, + .ri_s, .ri_u => .{ .ri = .{ + .r = reg, + .i = switch (imm) { + .signed => |s| @bitCast(u32, s), + .unsigned => |u| @intCast(u32, u), + }, + } }, .ri64 => .{ .rx = .{ .r = reg, .payload = try self.addExtra(Mir.Imm64.encode(imm.unsigned)), @@ -1171,10 +1189,14 @@ fn asmRegisterRegisterImmediate( .signed => .rri_s, .unsigned => .rri_u, }, - .data = .{ .rri = .{ .r1 = reg1, .r2 = reg2, .i = switch (imm) { - .signed => |s| @bitCast(u32, s), - .unsigned => |u| @intCast(u32, u), - } } }, + .data = .{ .rri = .{ + .r1 = reg1, + .r2 = reg2, + .i = switch (imm) { + .signed => |s| @bitCast(u32, s), + .unsigned => |u| @intCast(u32, u), + }, + } }, }); } @@ -1202,11 +1224,14 @@ fn asmRegisterMemory(self: *Self, tag: Mir.Inst.Tag, reg: Register, m: Memory) ! .rip => .rm_rip, else => unreachable, }, - .data = .{ .rx = .{ .r = reg, .payload = switch (m) { - .sib => try self.addExtra(Mir.MemorySib.encode(m)), - .rip => try self.addExtra(Mir.MemoryRip.encode(m)), - else => unreachable, - } } }, + .data = .{ .rx = .{ + .r = reg, + .payload = switch (m) { + .sib => try self.addExtra(Mir.MemorySib.encode(m)), + .rip => try self.addExtra(Mir.MemoryRip.encode(m)), + else => unreachable, + }, + } }, }); } @@ -1224,11 +1249,43 @@ fn asmRegisterMemoryImmediate( .rip => .rmi_rip, else => unreachable, }, - .data = .{ .rix = .{ .r = reg, .i = @intCast(u8, imm.unsigned), .payload = switch (m) { - .sib => try self.addExtra(Mir.MemorySib.encode(m)), - .rip => try self.addExtra(Mir.MemoryRip.encode(m)), + .data = .{ .rix = .{ + .r = reg, + .i = @intCast(u8, imm.unsigned), + .payload = switch (m) { + .sib => try self.addExtra(Mir.MemorySib.encode(m)), + .rip => try self.addExtra(Mir.MemoryRip.encode(m)), + else => unreachable, + }, + } }, + }); +} + +fn asmRegisterRegisterMemoryImmediate( + self: *Self, + tag: Mir.Inst.Tag, + reg1: Register, + reg2: Register, + m: Memory, + imm: Immediate, +) !void { + _ = try self.addInst(.{ + .tag = tag, + .ops = switch (m) { + .sib => .rrmi_sib, + .rip => .rrmi_rip, else => unreachable, - } } }, + }, + .data = .{ .rrix = .{ + .r1 = reg1, + .r2 = reg2, + .i = @intCast(u8, imm.unsigned), + .payload = switch (m) { + .sib => try self.addExtra(Mir.MemorySib.encode(m)), + .rip => try self.addExtra(Mir.MemoryRip.encode(m)), + else => unreachable, + }, + } }, }); } @@ -1240,11 +1297,14 @@ fn asmMemoryRegister(self: *Self, tag: Mir.Inst.Tag, m: Memory, reg: Register) ! .rip => .mr_rip, else => unreachable, }, - .data = .{ .rx = .{ .r = reg, .payload = switch (m) { - .sib => try self.addExtra(Mir.MemorySib.encode(m)), - .rip => try self.addExtra(Mir.MemoryRip.encode(m)), - else => unreachable, - } } }, + .data = .{ .rx = .{ + .r = reg, + .payload = switch (m) { + .sib => try self.addExtra(Mir.MemorySib.encode(m)), + .rip => try self.addExtra(Mir.MemoryRip.encode(m)), + else => unreachable, + }, + } }, }); } @@ -1262,14 +1322,17 @@ fn asmMemoryImmediate(self: *Self, tag: Mir.Inst.Tag, m: Memory, imm: Immediate) }, else => unreachable, }, - .data = .{ .ix = .{ .i = switch (imm) { - .signed => |s| @bitCast(u32, s), - .unsigned => |u| @intCast(u32, u), - }, .payload = switch (m) { - .sib => try self.addExtra(Mir.MemorySib.encode(m)), - .rip => try self.addExtra(Mir.MemoryRip.encode(m)), - else => unreachable, - } } }, + .data = .{ .ix = .{ + .i = switch (imm) { + .signed => |s| @bitCast(u32, s), + .unsigned => |u| @intCast(u32, u), + }, + .payload = switch (m) { + .sib => try self.addExtra(Mir.MemorySib.encode(m)), + .rip => try self.addExtra(Mir.MemoryRip.encode(m)), + else => unreachable, + }, + } }, }); } @@ -6612,11 +6675,13 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallModifier _ = try self.addInst(.{ .tag = .mov_linker, .ops = .import_reloc, - .data = .{ .payload = try self.addExtra(Mir.LeaRegisterReloc{ - .reg = @enumToInt(Register.rax), - .atom_index = atom_index, - .sym_index = sym_index, - }) }, + .data = .{ .rx = .{ + .r = .rax, + .payload = try self.addExtra(Mir.Reloc{ + .atom_index = atom_index, + .sym_index = sym_index, + }), + } }, }); try self.asmRegister(.call, .rax); } else if (self.bin_file.cast(link.File.MachO)) |macho_file| { @@ -6695,8 +6760,6 @@ fn airRetLoad(self: *Self, inst: Air.Inst.Index) !void { fn airCmp(self: *Self, inst: Air.Inst.Index, op: math.CompareOperator) !void { const bin_op = self.air.instructions.items(.data)[inst].bin_op; const ty = self.air.typeOf(bin_op.lhs); - const ty_abi_size = ty.abiSize(self.target.*); - const can_reuse = ty_abi_size <= 8; try self.spillEflagsIfOccupied(); self.eflags_inst = inst; @@ -6715,69 +6778,93 @@ fn airCmp(self: *Self, inst: Air.Inst.Index, op: math.CompareOperator) !void { }; defer if (rhs_lock) |lock| self.register_manager.unlockReg(lock); - const dst_mem_ok = !ty.isRuntimeFloat(); - var flipped = false; - const dst_mcv: MCValue = if (can_reuse and !lhs_mcv.isImmediate() and - (dst_mem_ok or lhs_mcv.isRegister()) and self.liveness.operandDies(inst, 0)) - lhs_mcv - else if (can_reuse and !rhs_mcv.isImmediate() and - (dst_mem_ok or rhs_mcv.isRegister()) and self.liveness.operandDies(inst, 1)) - dst: { - flipped = true; - break :dst rhs_mcv; - } else if (dst_mem_ok) dst: { - const dst_mcv = try self.allocTempRegOrMem(ty, true); - try self.genCopy(ty, dst_mcv, lhs_mcv); - break :dst dst_mcv; - } else .{ .register = try self.copyToTmpRegister(ty, lhs_mcv) }; - const dst_lock = switch (dst_mcv) { - .register => |reg| self.register_manager.lockReg(reg), - else => null, - }; - defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); - - const src_mcv = if (flipped) lhs_mcv else rhs_mcv; - switch (ty.zigTypeTag()) { - else => try self.genBinOpMir(.cmp, ty, dst_mcv, src_mcv), - .Float => switch (ty.floatBits(self.target.*)) { - 16 => if (self.hasFeature(.f16c)) { - const dst_reg = dst_mcv.getReg().?.to128(); - - const tmp_reg = (try self.register_manager.allocReg(null, sse)).to128(); - const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); - defer self.register_manager.unlockReg(tmp_lock); - - if (src_mcv.isRegister()) - try self.asmRegisterRegisterRegister( - .vpunpcklwd, - dst_reg, - dst_reg, - src_mcv.getReg().?.to128(), - ) - else - try self.asmRegisterMemoryImmediate( - .vpinsrw, - dst_reg, - src_mcv.mem(.word), - Immediate.u(1), - ); - try self.asmRegisterRegister(.vcvtph2ps, dst_reg, dst_reg); - try self.asmRegisterRegister(.vmovshdup, tmp_reg, dst_reg); - try self.genBinOpMir(.ucomiss, ty, dst_mcv, .{ .register = tmp_reg }); - } else return self.fail("TODO implement airCmp for {}", .{ - ty.fmt(self.bin_file.options.module.?), - }), - 32 => try self.genBinOpMir(.ucomiss, ty, dst_mcv, src_mcv), - 64 => try self.genBinOpMir(.ucomisd, ty, dst_mcv, src_mcv), - else => return self.fail("TODO implement airCmp for {}", .{ - ty.fmt(self.bin_file.options.module.?), - }), - }, - } - - const signedness = if (ty.isAbiInt()) ty.intInfo(self.target.*).signedness else .unsigned; const result = MCValue{ - .eflags = Condition.fromCompareOperator(signedness, if (flipped) op.reverse() else op), + .eflags = switch (ty.zigTypeTag()) { + else => result: { + var flipped = false; + const dst_mcv: MCValue = if (lhs_mcv.isRegister() or lhs_mcv.isMemory()) + lhs_mcv + else if (rhs_mcv.isRegister() or rhs_mcv.isMemory()) dst: { + flipped = true; + break :dst rhs_mcv; + } else .{ .register = try self.copyToTmpRegister(ty, lhs_mcv) }; + const dst_lock = switch (dst_mcv) { + .register => |reg| self.register_manager.lockReg(reg), + else => null, + }; + defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); + const src_mcv = if (flipped) lhs_mcv else rhs_mcv; + + try self.genBinOpMir(.cmp, ty, dst_mcv, src_mcv); + break :result Condition.fromCompareOperator( + if (ty.isAbiInt()) ty.intInfo(self.target.*).signedness else .unsigned, + if (flipped) op.reverse() else op, + ); + }, + .Float => result: { + const flipped = switch (op) { + .lt, .lte => true, + .eq, .gte, .gt, .neq => false, + }; + + const dst_mcv = if (flipped) rhs_mcv else lhs_mcv; + const dst_reg = if (dst_mcv.isRegister()) + dst_mcv.getReg().? + else + try self.copyToTmpRegister(ty, dst_mcv); + const dst_lock = self.register_manager.lockReg(dst_reg); + defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); + const src_mcv = if (flipped) lhs_mcv else rhs_mcv; + + switch (ty.floatBits(self.target.*)) { + 16 => if (self.hasFeature(.f16c)) { + const tmp1_reg = (try self.register_manager.allocReg(null, sse)).to128(); + const tmp1_mcv = MCValue{ .register = tmp1_reg }; + const tmp1_lock = self.register_manager.lockRegAssumeUnused(tmp1_reg); + defer self.register_manager.unlockReg(tmp1_lock); + + const tmp2_reg = (try self.register_manager.allocReg(null, sse)).to128(); + const tmp2_mcv = MCValue{ .register = tmp2_reg }; + const tmp2_lock = self.register_manager.lockRegAssumeUnused(tmp2_reg); + defer self.register_manager.unlockReg(tmp2_lock); + + if (src_mcv.isRegister()) + try self.asmRegisterRegisterRegister( + .vpunpcklwd, + tmp1_reg, + dst_reg.to128(), + src_mcv.getReg().?.to128(), + ) + else + try self.asmRegisterRegisterMemoryImmediate( + .vpinsrw, + tmp1_reg, + dst_reg.to128(), + src_mcv.mem(.word), + Immediate.u(1), + ); + try self.asmRegisterRegister(.vcvtph2ps, tmp1_reg, tmp1_reg); + try self.asmRegisterRegister(.vmovshdup, tmp2_reg, tmp1_reg); + try self.genBinOpMir(.ucomiss, ty, tmp1_mcv, tmp2_mcv); + } else return self.fail("TODO implement airCmp for {}", .{ + ty.fmt(self.bin_file.options.module.?), + }), + 32 => try self.genBinOpMir(.ucomiss, ty, .{ .register = dst_reg }, src_mcv), + 64 => try self.genBinOpMir(.ucomisd, ty, .{ .register = dst_reg }, src_mcv), + else => return self.fail("TODO implement airCmp for {}", .{ + ty.fmt(self.bin_file.options.module.?), + }), + } + + break :result switch (if (flipped) op.reverse() else op) { + .lt, .lte => unreachable, // required to have been canonicalized to gt(e) + .gt => .a, + .gte => .ae, + .eq => .z_and_np, + .neq => .nz_or_p, + }; + }, + }, }; return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none }); } @@ -7929,11 +8016,13 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr _ = try self.addInst(.{ .tag = .mov_linker, .ops = .direct_reloc, - .data = .{ .payload = try self.addExtra(Mir.LeaRegisterReloc{ - .reg = @enumToInt(dst_reg.to64()), - .atom_index = atom_index, - .sym_index = sym_index, - }) }, + .data = .{ .rx = .{ + .r = dst_reg.to64(), + .payload = try self.addExtra(Mir.Reloc{ + .atom_index = atom_index, + .sym_index = sym_index, + }), + } }, }); return; }, @@ -7975,11 +8064,13 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr .lea_got => .got_reloc, else => unreachable, }, - .data = .{ .payload = try self.addExtra(Mir.LeaRegisterReloc{ - .reg = @enumToInt(dst_reg.to64()), - .atom_index = atom_index, - .sym_index = sym_index, - }) }, + .data = .{ .rx = .{ + .r = dst_reg.to64(), + .payload = try self.addExtra(Mir.Reloc{ + .atom_index = atom_index, + .sym_index = sym_index, + }), + } }, }); }, .lea_tlv => |sym_index| { @@ -7988,11 +8079,13 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr _ = try self.addInst(.{ .tag = .lea_linker, .ops = .tlv_reloc, - .data = .{ .payload = try self.addExtra(Mir.LeaRegisterReloc{ - .reg = @enumToInt(Register.rdi), - .atom_index = atom_index, - .sym_index = sym_index, - }) }, + .data = .{ .rx = .{ + .r = .rdi, + .payload = try self.addExtra(Mir.Reloc{ + .atom_index = atom_index, + .sym_index = sym_index, + }), + } }, }); // TODO: spill registers before calling try self.asmMemory(.call, Memory.sib(.qword, .{ .base = .{ .reg = .rdi } })); @@ -8463,14 +8556,20 @@ fn airCmpxchg(self: *Self, inst: Air.Inst.Index) !void { try self.spillEflagsIfOccupied(); if (val_abi_size <= 8) { - _ = try self.addInst(.{ .tag = .cmpxchg, .ops = .lock_mr_sib, .data = .{ .rx = .{ - .r = registerAlias(new_reg.?, val_abi_size), - .payload = try self.addExtra(Mir.MemorySib.encode(ptr_mem)), - } } }); + _ = try self.addInst(.{ + .tag = .cmpxchg, + .ops = .lock_mr_sib, + .data = .{ .rx = .{ + .r = registerAlias(new_reg.?, val_abi_size), + .payload = try self.addExtra(Mir.MemorySib.encode(ptr_mem)), + } }, + }); } else { - _ = try self.addInst(.{ .tag = .cmpxchgb, .ops = .lock_m_sib, .data = .{ - .payload = try self.addExtra(Mir.MemorySib.encode(ptr_mem)), - } }); + _ = try self.addInst(.{ + .tag = .cmpxchgb, + .ops = .lock_m_sib, + .data = .{ .payload = try self.addExtra(Mir.MemorySib.encode(ptr_mem)) }, + }); } const result: MCValue = result: { @@ -8571,14 +8670,18 @@ fn atomicOp( if (rmw_op == std.builtin.AtomicRmwOp.Sub and tag == .xadd) { try self.genUnOpMir(.neg, val_ty, dst_mcv); } - _ = try self.addInst(.{ .tag = tag, .ops = switch (tag) { - .mov, .xchg => .mr_sib, - .xadd, .add, .sub, .@"and", .@"or", .xor => .lock_mr_sib, - else => unreachable, - }, .data = .{ .rx = .{ - .r = registerAlias(dst_reg, val_abi_size), - .payload = try self.addExtra(Mir.MemorySib.encode(ptr_mem)), - } } }); + _ = try self.addInst(.{ + .tag = tag, + .ops = switch (tag) { + .mov, .xchg => .mr_sib, + .xadd, .add, .sub, .@"and", .@"or", .xor => .lock_mr_sib, + else => unreachable, + }, + .data = .{ .rx = .{ + .r = registerAlias(dst_reg, val_abi_size), + .payload = try self.addExtra(Mir.MemorySib.encode(ptr_mem)), + } }, + }); return if (unused) .unreach else dst_mcv; }, @@ -8645,10 +8748,14 @@ fn atomicOp( } }, }; - _ = try self.addInst(.{ .tag = .cmpxchg, .ops = .lock_mr_sib, .data = .{ .rx = .{ - .r = registerAlias(tmp_reg, val_abi_size), - .payload = try self.addExtra(Mir.MemorySib.encode(ptr_mem)), - } } }); + _ = try self.addInst(.{ + .tag = .cmpxchg, + .ops = .lock_mr_sib, + .data = .{ .rx = .{ + .r = registerAlias(tmp_reg, val_abi_size), + .payload = try self.addExtra(Mir.MemorySib.encode(ptr_mem)), + } }, + }); _ = try self.asmJccReloc(loop, .ne); return if (unused) .unreach else .{ .register = .rax }; } else { diff --git a/src/arch/x86_64/Emit.zig b/src/arch/x86_64/Emit.zig index c6c8f7995c..3574d52878 100644 --- a/src/arch/x86_64/Emit.zig +++ b/src/arch/x86_64/Emit.zig @@ -18,142 +18,149 @@ pub const Error = Lower.Error || error{ }; pub fn emitMir(emit: *Emit) Error!void { - for (0..emit.lower.mir.instructions.len) |i| { - const index = @intCast(Mir.Inst.Index, i); - const inst = emit.lower.mir.instructions.get(index); - - const start_offset = @intCast(u32, emit.code.items.len); - try emit.code_offset_mapping.putNoClobber(emit.lower.allocator, index, start_offset); - for (try emit.lower.lowerMir(inst)) |lower_inst| try lower_inst.encode(emit.code.writer(), .{}); - const end_offset = @intCast(u32, emit.code.items.len); - - switch (inst.tag) { - else => {}, - - .jmp_reloc => try emit.relocs.append(emit.lower.allocator, .{ - .source = start_offset, - .target = inst.data.inst, - .offset = end_offset - 4, - .length = 5, - }), - - .call_extern => if (emit.bin_file.cast(link.File.MachO)) |macho_file| { - // Add relocation to the decl. - const atom_index = macho_file.getAtomIndexForSymbol( - .{ .sym_index = inst.data.relocation.atom_index, .file = null }, - ).?; - const target = macho_file.getGlobalByIndex(inst.data.relocation.sym_index); - try link.File.MachO.Atom.addRelocation(macho_file, atom_index, .{ - .type = .branch, + for (0..emit.lower.mir.instructions.len) |mir_i| { + const mir_index = @intCast(Mir.Inst.Index, mir_i); + try emit.code_offset_mapping.putNoClobber( + emit.lower.allocator, + mir_index, + @intCast(u32, emit.code.items.len), + ); + const lowered = try emit.lower.lowerMir(mir_index); + var lowered_relocs = lowered.relocs; + for (lowered.insts, 0..) |lowered_inst, lowered_index| { + const start_offset = @intCast(u32, emit.code.items.len); + try lowered_inst.encode(emit.code.writer(), .{}); + const end_offset = @intCast(u32, emit.code.items.len); + while (lowered_relocs.len > 0 and + lowered_relocs[0].lowered_inst_index == lowered_index) : ({ + lowered_relocs = lowered_relocs[1..]; + }) switch (lowered_relocs[0].target) { + .inst => |target| try emit.relocs.append(emit.lower.allocator, .{ + .source = start_offset, .target = target, .offset = end_offset - 4, - .addend = 0, - .pcrel = true, - .length = 2, - }); - } else if (emit.bin_file.cast(link.File.Coff)) |coff_file| { - // Add relocation to the decl. - const atom_index = coff_file.getAtomIndexForSymbol( - .{ .sym_index = inst.data.relocation.atom_index, .file = null }, - ).?; - const target = coff_file.getGlobalByIndex(inst.data.relocation.sym_index); - try link.File.Coff.Atom.addRelocation(coff_file, atom_index, .{ - .type = .direct, - .target = target, - .offset = end_offset - 4, - .addend = 0, - .pcrel = true, - .length = 2, - }); - } else return emit.fail("TODO implement {} for {}", .{ inst.tag, emit.bin_file.tag }), + .length = @intCast(u5, end_offset - start_offset), + }), + .@"extern" => |symbol| if (emit.bin_file.cast(link.File.MachO)) |macho_file| { + // Add relocation to the decl. + const atom_index = macho_file.getAtomIndexForSymbol( + .{ .sym_index = symbol.atom_index, .file = null }, + ).?; + const target = macho_file.getGlobalByIndex(symbol.sym_index); + try link.File.MachO.Atom.addRelocation(macho_file, atom_index, .{ + .type = .branch, + .target = target, + .offset = end_offset - 4, + .addend = 0, + .pcrel = true, + .length = 2, + }); + } else if (emit.bin_file.cast(link.File.Coff)) |coff_file| { + // Add relocation to the decl. + const atom_index = coff_file.getAtomIndexForSymbol( + .{ .sym_index = symbol.atom_index, .file = null }, + ).?; + const target = coff_file.getGlobalByIndex(symbol.sym_index); + try link.File.Coff.Atom.addRelocation(coff_file, atom_index, .{ + .type = .direct, + .target = target, + .offset = end_offset - 4, + .addend = 0, + .pcrel = true, + .length = 2, + }); + } else return emit.fail("TODO implement extern reloc for {s}", .{ + @tagName(emit.bin_file.tag), + }), + .linker_got, + .linker_direct, + .linker_import, + .linker_tlv, + => |symbol| if (emit.bin_file.cast(link.File.MachO)) |macho_file| { + const atom_index = macho_file.getAtomIndexForSymbol(.{ + .sym_index = symbol.atom_index, + .file = null, + }).?; + try link.File.MachO.Atom.addRelocation(macho_file, atom_index, .{ + .type = switch (lowered_relocs[0].target) { + .linker_got => .got, + .linker_direct => .signed, + .linker_tlv => .tlv, + else => unreachable, + }, + .target = .{ .sym_index = symbol.sym_index, .file = null }, + .offset = @intCast(u32, end_offset - 4), + .addend = 0, + .pcrel = true, + .length = 2, + }); + } else if (emit.bin_file.cast(link.File.Coff)) |coff_file| { + const atom_index = coff_file.getAtomIndexForSymbol(.{ + .sym_index = symbol.atom_index, + .file = null, + }).?; + try link.File.Coff.Atom.addRelocation(coff_file, atom_index, .{ + .type = switch (lowered_relocs[0].target) { + .linker_got => .got, + .linker_direct => .direct, + .linker_import => .import, + else => unreachable, + }, + .target = switch (lowered_relocs[0].target) { + .linker_got, + .linker_direct, + => .{ .sym_index = symbol.sym_index, .file = null }, + .linker_import => coff_file.getGlobalByIndex(symbol.sym_index), + else => unreachable, + }, + .offset = @intCast(u32, end_offset - 4), + .addend = 0, + .pcrel = true, + .length = 2, + }); + } else return emit.fail("TODO implement linker reloc for {s}", .{ + @tagName(emit.bin_file.tag), + }), + }; + } + std.debug.assert(lowered_relocs.len == 0); - .mov_linker, .lea_linker => if (emit.bin_file.cast(link.File.MachO)) |macho_file| { - const metadata = - emit.lower.mir.extraData(Mir.LeaRegisterReloc, inst.data.payload).data; - const atom_index = macho_file.getAtomIndexForSymbol(.{ - .sym_index = metadata.atom_index, - .file = null, - }).?; - try link.File.MachO.Atom.addRelocation(macho_file, atom_index, .{ - .type = switch (inst.ops) { - .got_reloc => .got, - .direct_reloc => .signed, - .tlv_reloc => .tlv, - else => unreachable, - }, - .target = .{ .sym_index = metadata.sym_index, .file = null }, - .offset = @intCast(u32, end_offset - 4), - .addend = 0, - .pcrel = true, - .length = 2, - }); - } else if (emit.bin_file.cast(link.File.Coff)) |coff_file| { - const metadata = - emit.lower.mir.extraData(Mir.LeaRegisterReloc, inst.data.payload).data; - const atom_index = coff_file.getAtomIndexForSymbol(.{ - .sym_index = metadata.atom_index, - .file = null, - }).?; - try link.File.Coff.Atom.addRelocation(coff_file, atom_index, .{ - .type = switch (inst.ops) { - .got_reloc => .got, - .direct_reloc => .direct, - .import_reloc => .import, - else => unreachable, - }, - .target = switch (inst.ops) { - .got_reloc, - .direct_reloc, - => .{ .sym_index = metadata.sym_index, .file = null }, - .import_reloc => coff_file.getGlobalByIndex(metadata.sym_index), - else => unreachable, - }, - .offset = @intCast(u32, end_offset - 4), - .addend = 0, - .pcrel = true, - .length = 2, - }); - } else return emit.fail("TODO implement {} for {}", .{ inst.tag, emit.bin_file.tag }), - - .jcc => try emit.relocs.append(emit.lower.allocator, .{ - .source = start_offset, - .target = inst.data.inst_cc.inst, - .offset = end_offset - 4, - .length = 6, - }), - - .dbg_line => try emit.dbgAdvancePCAndLine( - inst.data.line_column.line, - inst.data.line_column.column, - ), - - .dbg_prologue_end => { - switch (emit.debug_output) { - .dwarf => |dw| { - try dw.setPrologueEnd(); - log.debug("mirDbgPrologueEnd (line={d}, col={d})", .{ - emit.prev_di_line, emit.prev_di_column, - }); - try emit.dbgAdvancePCAndLine(emit.prev_di_line, emit.prev_di_column); - }, - .plan9 => {}, - .none => {}, - } - }, - - .dbg_epilogue_begin => { - switch (emit.debug_output) { - .dwarf => |dw| { - try dw.setEpilogueBegin(); - log.debug("mirDbgEpilogueBegin (line={d}, col={d})", .{ - emit.prev_di_line, emit.prev_di_column, - }); - try emit.dbgAdvancePCAndLine(emit.prev_di_line, emit.prev_di_column); - }, - .plan9 => {}, - .none => {}, - } - }, + if (lowered.insts.len == 0) { + const mir_inst = emit.lower.mir.instructions.get(mir_index); + switch (mir_inst.tag) { + else => unreachable, + .dead => {}, + .dbg_line => try emit.dbgAdvancePCAndLine( + mir_inst.data.line_column.line, + mir_inst.data.line_column.column, + ), + .dbg_prologue_end => { + switch (emit.debug_output) { + .dwarf => |dw| { + try dw.setPrologueEnd(); + log.debug("mirDbgPrologueEnd (line={d}, col={d})", .{ + emit.prev_di_line, emit.prev_di_column, + }); + try emit.dbgAdvancePCAndLine(emit.prev_di_line, emit.prev_di_column); + }, + .plan9 => {}, + .none => {}, + } + }, + .dbg_epilogue_begin => { + switch (emit.debug_output) { + .dwarf => |dw| { + try dw.setEpilogueBegin(); + log.debug("mirDbgEpilogueBegin (line={d}, col={d})", .{ + emit.prev_di_line, emit.prev_di_column, + }); + try emit.dbgAdvancePCAndLine(emit.prev_di_line, emit.prev_di_column); + }, + .plan9 => {}, + .none => {}, + } + }, + } } } try emit.fixupRelocs(); diff --git a/src/arch/x86_64/Lower.zig b/src/arch/x86_64/Lower.zig index d9482d4b39..d82d5ec300 100644 --- a/src/arch/x86_64/Lower.zig +++ b/src/arch/x86_64/Lower.zig @@ -5,13 +5,22 @@ mir: Mir, target: *const std.Target, err_msg: ?*ErrorMsg = null, src_loc: Module.SrcLoc, -result: [ +result_insts_len: u8 = undefined, +result_relocs_len: u8 = undefined, +result_insts: [ std.mem.max(usize, &.{ - abi.Win64.callee_preserved_regs.len, - abi.SysV.callee_preserved_regs.len, + 2, // cmovcc: cmovcc \ cmovcc + 3, // setcc: setcc \ setcc \ logicop + 2, // jcc: jcc \ jcc + abi.Win64.callee_preserved_regs.len, // push_regs/pop_regs + abi.SysV.callee_preserved_regs.len, // push_regs/pop_regs }) ]Instruction = undefined, -result_len: usize = undefined, +result_relocs: [ + std.mem.max(usize, &.{ + 2, // jcc: jcc \ jcc + }) +]Reloc = undefined, pub const Error = error{ OutOfMemory, @@ -20,13 +29,35 @@ pub const Error = error{ CannotEncode, }; -/// The returned slice is overwritten by the next call to lowerMir. -pub fn lowerMir(lower: *Lower, inst: Mir.Inst) Error![]const Instruction { - lower.result = undefined; - errdefer lower.result = undefined; - lower.result_len = 0; - defer lower.result_len = undefined; +pub const Reloc = struct { + lowered_inst_index: u8, + target: Target, + const Target = union(enum) { + inst: Mir.Inst.Index, + @"extern": Mir.Reloc, + linker_got: Mir.Reloc, + linker_direct: Mir.Reloc, + linker_import: Mir.Reloc, + linker_tlv: Mir.Reloc, + }; +}; + +/// The returned slice is overwritten by the next call to lowerMir. +pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct { + insts: []const Instruction, + relocs: []const Reloc, +} { + lower.result_insts = undefined; + lower.result_relocs = undefined; + errdefer lower.result_insts = undefined; + errdefer lower.result_relocs = undefined; + lower.result_insts_len = 0; + lower.result_relocs_len = 0; + defer lower.result_insts_len = undefined; + defer lower.result_relocs_len = undefined; + + const inst = lower.mir.instructions.get(index); switch (inst.tag) { .adc, .add, @@ -185,22 +216,26 @@ pub fn lowerMir(lower: *Lower, inst: Mir.Inst) Error![]const Instruction { .cmpxchgb => try lower.mirCmpxchgBytes(inst), - .jmp_reloc => try lower.emit(.none, .jmp, &.{.{ .imm = Immediate.s(0) }}), + .jmp_reloc => try lower.emitInstWithReloc(.none, .jmp, &.{ + .{ .imm = Immediate.s(0) }, + }, .{ .inst = inst.data.inst }), - .call_extern => try lower.emit(.none, .call, &.{.{ .imm = Immediate.s(0) }}), + .call_extern => try lower.emitInstWithReloc(.none, .call, &.{ + .{ .imm = Immediate.s(0) }, + }, .{ .@"extern" = inst.data.relocation }), - .lea_linker => try lower.mirLeaLinker(inst), - .mov_linker => try lower.mirMovLinker(inst), + .lea_linker => try lower.mirLinker(.lea, inst), + .mov_linker => try lower.mirLinker(.mov, inst), .mov_moffs => try lower.mirMovMoffs(inst), .movsx => try lower.mirMovsx(inst), .cmovcc => try lower.mirCmovcc(inst), .setcc => try lower.mirSetcc(inst), - .jcc => try lower.emit(.none, mnem_cc(.j, inst.data.inst_cc.cc), &.{.{ .imm = Immediate.s(0) }}), + .jcc => try lower.mirJcc(index, inst), - .push_regs => try lower.mirPushPopRegisterList(inst, .push), - .pop_regs => try lower.mirPushPopRegisterList(inst, .pop), + .push_regs => try lower.mirRegisterList(.push, inst), + .pop_regs => try lower.mirRegisterList(.pop, inst), .dbg_line, .dbg_prologue_end, @@ -209,7 +244,10 @@ pub fn lowerMir(lower: *Lower, inst: Mir.Inst) Error![]const Instruction { => {}, } - return lower.result[0..lower.result_len]; + return .{ + .insts = lower.result_insts[0..lower.result_insts_len], + .relocs = lower.result_relocs[0..lower.result_relocs_len], + }; } pub fn fail(lower: *Lower, comptime format: []const u8, args: anytype) Error { @@ -221,7 +259,10 @@ pub fn fail(lower: *Lower, comptime format: []const u8, args: anytype) Error { fn mnem_cc(comptime base: @Type(.EnumLiteral), cc: bits.Condition) Mnemonic { return switch (cc) { - inline else => |c| @field(Mnemonic, @tagName(base) ++ @tagName(c)), + inline else => |c| if (@hasField(Mnemonic, @tagName(base) ++ @tagName(c))) + @field(Mnemonic, @tagName(base) ++ @tagName(c)) + else + unreachable, }; } @@ -247,6 +288,8 @@ fn imm(lower: Lower, ops: Mir.Inst.Ops, i: u32) Immediate { .rmi_rip, .mri_sib, .mri_rip, + .rrmi_sib, + .rrmi_rip, => Immediate.u(i), .ri64 => Immediate.u(lower.mir.extraData(Mir.Imm64, i).data.decode()), @@ -267,6 +310,7 @@ fn mem(lower: Lower, ops: Mir.Inst.Ops, payload: u32) Memory { .mr_sib, .mrr_sib, .mri_sib, + .rrmi_sib, .lock_m_sib, .lock_mi_sib_u, .lock_mi_sib_s, @@ -283,6 +327,7 @@ fn mem(lower: Lower, ops: Mir.Inst.Ops, payload: u32) Memory { .mr_rip, .mrr_rip, .mri_rip, + .rrmi_rip, .lock_m_rip, .lock_mi_rip_u, .lock_mi_rip_s, @@ -298,13 +343,28 @@ fn mem(lower: Lower, ops: Mir.Inst.Ops, payload: u32) Memory { }); } -fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand) Error!void { - lower.result[lower.result_len] = try Instruction.new(prefix, mnemonic, ops); - lower.result_len += 1; +fn emitInst(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand) Error!void { + lower.result_insts[lower.result_insts_len] = try Instruction.new(prefix, mnemonic, ops); + lower.result_insts_len += 1; +} + +fn emitInstWithReloc( + lower: *Lower, + prefix: Prefix, + mnemonic: Mnemonic, + ops: []const Operand, + target: Reloc.Target, +) Error!void { + lower.result_relocs[lower.result_relocs_len] = .{ + .lowered_inst_index = lower.result_insts_len, + .target = target, + }; + lower.result_relocs_len += 1; + try lower.emitInst(prefix, mnemonic, ops); } fn mirGeneric(lower: *Lower, inst: Mir.Inst) Error!void { - try lower.emit(switch (inst.ops) { + try lower.emitInst(switch (inst.ops) { else => .none, .lock_m_sib, .lock_m_rip, @@ -389,13 +449,19 @@ fn mirGeneric(lower: *Lower, inst: Mir.Inst) Error!void { .{ .reg = inst.data.rix.r }, .{ .imm = lower.imm(inst.ops, inst.data.rix.i) }, }, + .rrmi_sib, .rrmi_rip => &.{ + .{ .reg = inst.data.rrix.r1 }, + .{ .reg = inst.data.rrix.r2 }, + .{ .mem = lower.mem(inst.ops, inst.data.rrix.payload) }, + .{ .imm = lower.imm(inst.ops, inst.data.rrix.i) }, + }, else => return lower.fail("TODO lower {s} {s}", .{ @tagName(inst.tag), @tagName(inst.ops) }), }); } fn mirString(lower: *Lower, inst: Mir.Inst) Error!void { switch (inst.ops) { - .string => try lower.emit(switch (inst.data.string.repeat) { + .string => try lower.emitInst(switch (inst.data.string.repeat) { inline else => |repeat| @field(Prefix, @tagName(repeat)), }, switch (inst.tag) { inline .cmps, .lods, .movs, .scas, .stos => |tag| switch (inst.data.string.width) { @@ -414,7 +480,7 @@ fn mirCmpxchgBytes(lower: *Lower, inst: Mir.Inst) Error!void { }, else => return lower.fail("TODO lower {s} {s}", .{ @tagName(inst.tag), @tagName(inst.ops) }), }; - try lower.emit(switch (inst.ops) { + try lower.emitInst(switch (inst.ops) { .m_sib, .m_rip => .none, .lock_m_sib, .lock_m_rip => .lock, else => unreachable, @@ -426,7 +492,7 @@ fn mirCmpxchgBytes(lower: *Lower, inst: Mir.Inst) Error!void { } fn mirMovMoffs(lower: *Lower, inst: Mir.Inst) Error!void { - try lower.emit(switch (inst.ops) { + try lower.emitInst(switch (inst.ops) { .rax_moffs, .moffs_rax => .none, .lock_moffs_rax => .lock, else => return lower.fail("TODO lower {s} {s}", .{ @tagName(inst.tag), @tagName(inst.ops) }), @@ -455,7 +521,7 @@ fn mirMovsx(lower: *Lower, inst: Mir.Inst) Error!void { }, else => return lower.fail("TODO lower {s} {s}", .{ @tagName(inst.tag), @tagName(inst.ops) }), }; - try lower.emit(.none, switch (ops[0].bitSize()) { + try lower.emitInst(.none, switch (ops[0].bitSize()) { 32, 64 => switch (ops[1].bitSize()) { 32 => .movsxd, else => .movsx, @@ -465,32 +531,82 @@ fn mirMovsx(lower: *Lower, inst: Mir.Inst) Error!void { } fn mirCmovcc(lower: *Lower, inst: Mir.Inst) Error!void { - switch (inst.ops) { - .rr_cc => try lower.emit(.none, mnem_cc(.cmov, inst.data.rr_cc.cc), &.{ + const data: struct { cc: bits.Condition, ops: [2]Operand } = switch (inst.ops) { + .rr_cc => .{ .cc = inst.data.rr_cc.cc, .ops = .{ .{ .reg = inst.data.rr_cc.r1 }, .{ .reg = inst.data.rr_cc.r2 }, - }), - .rm_sib_cc, .rm_rip_cc => try lower.emit(.none, mnem_cc(.cmov, inst.data.rx_cc.cc), &.{ + } }, + .rm_sib_cc, .rm_rip_cc => .{ .cc = inst.data.rx_cc.cc, .ops = .{ .{ .reg = inst.data.rx_cc.r }, .{ .mem = lower.mem(inst.ops, inst.data.rx_cc.payload) }, - }), + } }, else => return lower.fail("TODO lower {s} {s}", .{ @tagName(inst.tag), @tagName(inst.ops) }), + }; + switch (data.cc) { + else => |cc| try lower.emitInst(.none, mnem_cc(.cmov, cc), &data.ops), + .z_and_np => { + try lower.emitInst(.none, mnem_cc(.cmov, .nz), &.{ data.ops[1], data.ops[0] }); + try lower.emitInst(.none, mnem_cc(.cmov, .np), &data.ops); + }, + .nz_or_p => { + try lower.emitInst(.none, mnem_cc(.cmov, .nz), &data.ops); + try lower.emitInst(.none, mnem_cc(.cmov, .p), &data.ops); + }, } } fn mirSetcc(lower: *Lower, inst: Mir.Inst) Error!void { - switch (inst.ops) { - .r_cc => try lower.emit(.none, mnem_cc(.set, inst.data.r_cc.cc), &.{ + const data: struct { cc: bits.Condition, ops: [2]Operand } = switch (inst.ops) { + .r_cc => .{ .cc = inst.data.r_cc.cc, .ops = .{ .{ .reg = inst.data.r_cc.r }, - }), - .m_sib_cc, .m_rip_cc => try lower.emit(.none, mnem_cc(.set, inst.data.x_cc.cc), &.{ + .{ .reg = inst.data.r_cc.scratch }, + } }, + .m_sib_cc, .m_rip_cc => .{ .cc = inst.data.x_cc.cc, .ops = .{ .{ .mem = lower.mem(inst.ops, inst.data.x_cc.payload) }, - }), + .{ .reg = inst.data.x_cc.scratch }, + } }, else => return lower.fail("TODO lower {s} {s}", .{ @tagName(inst.tag), @tagName(inst.ops) }), + }; + switch (data.cc) { + else => |cc| try lower.emitInst(.none, mnem_cc(.set, cc), data.ops[0..1]), + .z_and_np => { + try lower.emitInst(.none, mnem_cc(.set, .z), data.ops[0..1]); + try lower.emitInst(.none, mnem_cc(.set, .np), data.ops[1..2]); + try lower.emitInst(.none, .@"and", data.ops[0..2]); + }, + .nz_or_p => { + try lower.emitInst(.none, mnem_cc(.set, .nz), data.ops[0..1]); + try lower.emitInst(.none, mnem_cc(.set, .p), data.ops[1..2]); + try lower.emitInst(.none, .@"or", data.ops[0..2]); + }, } } -fn mirPushPopRegisterList(lower: *Lower, inst: Mir.Inst, comptime mnemonic: Mnemonic) Error!void { +fn mirJcc(lower: *Lower, index: Mir.Inst.Index, inst: Mir.Inst) Error!void { + switch (inst.data.inst_cc.cc) { + else => |cc| try lower.emitInstWithReloc(.none, mnem_cc(.j, cc), &.{ + .{ .imm = Immediate.s(0) }, + }, .{ .inst = inst.data.inst_cc.inst }), + .z_and_np => { + try lower.emitInstWithReloc(.none, mnem_cc(.j, .nz), &.{ + .{ .imm = Immediate.s(0) }, + }, .{ .inst = index + 1 }); + try lower.emitInstWithReloc(.none, mnem_cc(.j, .np), &.{ + .{ .imm = Immediate.s(0) }, + }, .{ .inst = inst.data.inst_cc.inst }); + }, + .nz_or_p => { + try lower.emitInstWithReloc(.none, mnem_cc(.j, .nz), &.{ + .{ .imm = Immediate.s(0) }, + }, .{ .inst = inst.data.inst_cc.inst }); + try lower.emitInstWithReloc(.none, mnem_cc(.j, .p), &.{ + .{ .imm = Immediate.s(0) }, + }, .{ .inst = inst.data.inst_cc.inst }); + }, + } +} + +fn mirRegisterList(lower: *Lower, comptime mnemonic: Mnemonic, inst: Mir.Inst) Error!void { const reg_list = Mir.RegisterList.fromInt(inst.data.payload); const callee_preserved_regs = abi.getCalleePreservedRegs(lower.target.*); var it = reg_list.iterator(.{ .direction = switch (mnemonic) { @@ -498,24 +614,20 @@ fn mirPushPopRegisterList(lower: *Lower, inst: Mir.Inst, comptime mnemonic: Mnem .pop => .forward, else => unreachable, } }); - while (it.next()) |i| try lower.emit(.none, mnemonic, &.{.{ .reg = callee_preserved_regs[i] }}); + while (it.next()) |i| try lower.emitInst(.none, mnemonic, &.{.{ .reg = callee_preserved_regs[i] }}); } -fn mirLeaLinker(lower: *Lower, inst: Mir.Inst) Error!void { - const metadata = lower.mir.extraData(Mir.LeaRegisterReloc, inst.data.payload).data; - const reg = @intToEnum(Register, metadata.reg); - try lower.emit(.none, .lea, &.{ - .{ .reg = reg }, - .{ .mem = Memory.rip(Memory.PtrSize.fromBitSize(reg.bitSize()), 0) }, - }); -} - -fn mirMovLinker(lower: *Lower, inst: Mir.Inst) Error!void { - const metadata = lower.mir.extraData(Mir.LeaRegisterReloc, inst.data.payload).data; - const reg = @intToEnum(Register, metadata.reg); - try lower.emit(.none, .mov, &.{ - .{ .reg = reg }, - .{ .mem = Memory.rip(Memory.PtrSize.fromBitSize(reg.bitSize()), 0) }, +fn mirLinker(lower: *Lower, mnemonic: Mnemonic, inst: Mir.Inst) Error!void { + const reloc = lower.mir.extraData(Mir.Reloc, inst.data.rx.payload).data; + try lower.emitInstWithReloc(.none, mnemonic, &.{ + .{ .reg = inst.data.rx.r }, + .{ .mem = Memory.rip(Memory.PtrSize.fromBitSize(inst.data.rx.r.bitSize()), 0) }, + }, switch (inst.ops) { + .got_reloc => .{ .linker_got = reloc }, + .direct_reloc => .{ .linker_direct = reloc }, + .import_reloc => .{ .linker_import = reloc }, + .tlv_reloc => .{ .linker_tlv = reloc }, + else => unreachable, }); } diff --git a/src/arch/x86_64/Mir.zig b/src/arch/x86_64/Mir.zig index 9e39d23bd4..e261f6dc38 100644 --- a/src/arch/x86_64/Mir.zig +++ b/src/arch/x86_64/Mir.zig @@ -434,6 +434,12 @@ pub const Inst = struct { /// Register, memory (SIB), immediate (byte) operands. /// Uses `rix` payload with extra data of type `MemorySib`. rmi_sib, + /// Register, register, memory (RIP), immediate (byte) operands. + /// Uses `rrix` payload with extra data of type `MemoryRip`. + rrmi_rip, + /// Register, register, memory (SIB), immediate (byte) operands. + /// Uses `rrix` payload with extra data of type `MemorySib`. + rrmi_sib, /// Register, memory (RIP), immediate (byte) operands. /// Uses `rix` payload with extra data of type `MemoryRip`. rmi_rip, @@ -524,16 +530,16 @@ pub const Inst = struct { /// Uses `reloc` payload. reloc, /// Linker relocation - GOT indirection. - /// Uses `payload` payload with extra data of type `LeaRegisterReloc`. + /// Uses `rx` payload with extra data of type `Reloc`. got_reloc, /// Linker relocation - direct reference. - /// Uses `payload` payload with extra data of type `LeaRegisterReloc`. + /// Uses `rx` payload with extra data of type `Reloc`. direct_reloc, /// Linker relocation - imports table indirection (binding). - /// Uses `payload` payload with extra data of type `LeaRegisterReloc`. + /// Uses `rx` payload with extra data of type `Reloc`. import_reloc, /// Linker relocation - threadlocal variable via GOT indirection. - /// Uses `payload` payload with extra data of type `LeaRegisterReloc`. + /// Uses `rx` payload with extra data of type `Reloc`. tlv_reloc, }; @@ -567,12 +573,14 @@ pub const Inst = struct { }, /// Condition code (CC), followed by custom payload found in extra. x_cc: struct { + scratch: Register, cc: bits.Condition, payload: u32, }, /// Register with condition code (CC). r_cc: struct { r: Register, + scratch: Register, cc: bits.Condition, }, /// Register, register with condition code (CC). @@ -614,6 +622,13 @@ pub const Inst = struct { i: u8, payload: u32, }, + /// Register, register, byte immediate, followed by Custom payload found in extra. + rrix: struct { + r1: Register, + r2: Register, + i: u8, + payload: u32, + }, /// String instruction prefix and width. string: struct { repeat: bits.StringRepeat, @@ -622,12 +637,7 @@ pub const Inst = struct { /// Relocation for the linker where: /// * `atom_index` is the index of the source /// * `sym_index` is the index of the target - relocation: struct { - /// Index of the containing atom. - atom_index: u32, - /// Index into the linker's symbol table. - sym_index: u32, - }, + relocation: Reloc, /// Debug line and column position line_column: struct { line: u32, @@ -646,9 +656,7 @@ pub const Inst = struct { } }; -pub const LeaRegisterReloc = struct { - /// Destination register. - reg: u32, +pub const Reloc = struct { /// Index of the containing atom. atom_index: u32, /// Index into the linker's symbol table. diff --git a/src/arch/x86_64/bits.zig b/src/arch/x86_64/bits.zig index 5d06865566..77dc0cfb7c 100644 --- a/src/arch/x86_64/bits.zig +++ b/src/arch/x86_64/bits.zig @@ -72,6 +72,12 @@ pub const Condition = enum(u5) { /// zero z, + // Pseudo conditions + /// zero and not parity + z_and_np, + /// not zero or parity + nz_or_p, + /// Converts a std.math.CompareOperator into a condition flag, /// i.e. returns the condition that is true iff the result of the /// comparison is true. Assumes signed comparison @@ -143,6 +149,9 @@ pub const Condition = enum(u5) { .po => .pe, .s => .ns, .z => .nz, + + .z_and_np => .nz_or_p, + .nz_or_p => .z_and_np, }; } }; diff --git a/src/arch/x86_64/encoder.zig b/src/arch/x86_64/encoder.zig index 495edb5f2a..47211591ec 100644 --- a/src/arch/x86_64/encoder.zig +++ b/src/arch/x86_64/encoder.zig @@ -245,9 +245,9 @@ pub const Instruction = struct { }, .mem => |mem| { const op = switch (data.op_en) { - .m, .mi, .m1, .mc => .none, + .m, .mi, .m1, .mc, .vmi => .none, .mr, .mri, .mrc => inst.ops[1], - .rm, .rmi => inst.ops[0], + .rm, .rmi, .rvm, .rvmi => inst.ops[0], else => unreachable, }; try encodeMemory(enc, mem, op, encoder); diff --git a/test/behavior/bugs/12891.zig b/test/behavior/bugs/12891.zig index e558783705..354d9e856e 100644 --- a/test/behavior/bugs/12891.zig +++ b/test/behavior/bugs/12891.zig @@ -29,7 +29,6 @@ test "inf >= 1" { test "isNan(nan * 1)" { if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO const nan_times_one = comptime std.math.nan(f64) * 1; try std.testing.expect(std.math.isNan(nan_times_one)); @@ -37,7 +36,6 @@ test "isNan(nan * 1)" { test "runtime isNan(nan * 1)" { if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO const nan_times_one = std.math.nan(f64) * 1; try std.testing.expect(std.math.isNan(nan_times_one)); @@ -45,7 +43,6 @@ test "runtime isNan(nan * 1)" { test "isNan(nan * 0)" { if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO const nan_times_zero = comptime std.math.nan(f64) * 0; try std.testing.expect(std.math.isNan(nan_times_zero)); @@ -55,7 +52,6 @@ test "isNan(nan * 0)" { test "isNan(inf * 0)" { if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO const inf_times_zero = comptime std.math.inf(f64) * 0; try std.testing.expect(std.math.isNan(inf_times_zero)); @@ -65,7 +61,6 @@ test "isNan(inf * 0)" { test "runtime isNan(nan * 0)" { if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO const nan_times_zero = std.math.nan(f64) * 0; try std.testing.expect(std.math.isNan(nan_times_zero)); @@ -75,7 +70,6 @@ test "runtime isNan(nan * 0)" { test "runtime isNan(inf * 0)" { if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO const inf_times_zero = std.math.inf(f64) * 0; try std.testing.expect(std.math.isNan(inf_times_zero)); diff --git a/test/behavior/field_parent_ptr.zig b/test/behavior/field_parent_ptr.zig index bf99fd1795..c56bcad0d2 100644 --- a/test/behavior/field_parent_ptr.zig +++ b/test/behavior/field_parent_ptr.zig @@ -2,7 +2,6 @@ const expect = @import("std").testing.expect; const builtin = @import("builtin"); test "@fieldParentPtr non-first field" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO try testParentFieldPtr(&foo.c); From 4b75352c78731f76cfeac0b5c78c03f232022096 Mon Sep 17 00:00:00 2001 From: Jacob Young Date: Sat, 6 May 2023 16:05:52 -0400 Subject: [PATCH 06/20] x86_64: implement packed floating point fields --- src/arch/x86_64/CodeGen.zig | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index 87eceec347..befd5be0fd 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -4922,7 +4922,14 @@ fn airStructFieldVal(self: *Self, inst: Air.Inst.Index) !void { } if (field_extra_bits > 0) try self.truncateRegister(field_ty, dst_reg); - break :result .{ .register = dst_reg }; + + const dst_mcv = MCValue{ .register = dst_reg }; + const dst_rc = regClassForType(field_ty); + if (dst_rc.eql(gp)) break :result dst_mcv; + + const result_reg = try self.register_manager.allocReg(inst, dst_rc); + try self.genSetReg(result_reg, field_ty, dst_mcv); + break :result .{ .register = result_reg }; }, .register => |reg| { const reg_lock = self.register_manager.lockRegAssumeUnused(reg); @@ -7896,7 +7903,8 @@ fn genCopy(self: *Self, ty: Type, dst_mcv: MCValue, src_mcv: MCValue) InnerError fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerError!void { const abi_size = @intCast(u32, ty.abiSize(self.target.*)); - if (abi_size > 8) return self.fail("genSetReg called with a value larger than one register", .{}); + if (abi_size * 8 > dst_reg.bitSize()) + return self.fail("genSetReg called with a value larger than dst_reg", .{}); switch (src_mcv) { .none, .unreach, From 0bd92da0e20058942497d3f2d572799f9710959c Mon Sep 17 00:00:00 2001 From: Jacob Young Date: Sat, 6 May 2023 16:24:41 -0400 Subject: [PATCH 07/20] target: fix typos in x86 feature descriptions --- lib/std/target/x86.zig | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/std/target/x86.zig b/lib/std/target/x86.zig index c46367e755..bf3b8cb953 100644 --- a/lib/std/target/x86.zig +++ b/lib/std/target/x86.zig @@ -326,7 +326,7 @@ pub const all_features = blk: { }; result[@enumToInt(Feature.avx512ifma)] = .{ .llvm_name = "avx512ifma", - .description = "Enable AVX-512 Integer Fused Multiple-Add", + .description = "Enable AVX-512 Integer Fused Multiply-Add", .dependencies = featureSet(&[_]Feature{ .avx512f, }), @@ -599,14 +599,14 @@ pub const all_features = blk: { }; result[@enumToInt(Feature.fma)] = .{ .llvm_name = "fma", - .description = "Enable three-operand fused multiple-add", + .description = "Enable three-operand fused multiply-add", .dependencies = featureSet(&[_]Feature{ .avx, }), }; result[@enumToInt(Feature.fma4)] = .{ .llvm_name = "fma4", - .description = "Enable four-operand fused multiple-add", + .description = "Enable four-operand fused multiply-add", .dependencies = featureSet(&[_]Feature{ .avx, .sse4a, From 3a5e3c52e0f09112989a2a40345305bfe9508431 Mon Sep 17 00:00:00 2001 From: Jacob Young Date: Sat, 6 May 2023 20:31:48 -0400 Subject: [PATCH 08/20] x86_64: implement `@mulAdd` --- src/arch/x86_64/CodeGen.zig | 169 +++++++++++++++++++++++++++++++++- src/arch/x86_64/Encoding.zig | 24 ++++- src/arch/x86_64/Lower.zig | 22 +++++ src/arch/x86_64/Mir.zig | 31 +++++++ src/arch/x86_64/bits.zig | 16 +++- src/arch/x86_64/encodings.zig | 23 +++++ test/behavior/muladd.zig | 6 +- 7 files changed, 277 insertions(+), 14 deletions(-) diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index befd5be0fd..fffb814d7f 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -1200,6 +1200,32 @@ fn asmRegisterRegisterImmediate( }); } +fn asmRegisterRegisterMemory( + self: *Self, + tag: Mir.Inst.Tag, + reg1: Register, + reg2: Register, + m: Memory, +) !void { + _ = try self.addInst(.{ + .tag = tag, + .ops = switch (m) { + .sib => .rrm_sib, + .rip => .rrm_rip, + else => unreachable, + }, + .data = .{ .rrx = .{ + .r1 = reg1, + .r2 = reg2, + .payload = switch (m) { + .sib => try self.addExtra(Mir.MemorySib.encode(m)), + .rip => try self.addExtra(Mir.MemoryRip.encode(m)), + else => unreachable, + }, + } }, + }); +} + fn asmMemory(self: *Self, tag: Mir.Inst.Tag, m: Memory) !void { _ = try self.addInst(.{ .tag = tag, @@ -9369,9 +9395,146 @@ fn airPrefetch(self: *Self, inst: Air.Inst.Index) !void { fn airMulAdd(self: *Self, inst: Air.Inst.Index) !void { const pl_op = self.air.instructions.items(.data)[inst].pl_op; const extra = self.air.extraData(Air.Bin, pl_op.payload).data; - _ = extra; - return self.fail("TODO implement airMulAdd for x86_64", .{}); - //return self.finishAir(inst, result, .{ extra.lhs, extra.rhs, pl_op.operand }); + const ty = self.air.typeOfIndex(inst); + + if (!self.hasFeature(.fma)) return self.fail("TODO implement airMulAdd for {}", .{ + ty.fmt(self.bin_file.options.module.?), + }); + + const ops = [3]Air.Inst.Ref{ extra.lhs, extra.rhs, pl_op.operand }; + var mcvs: [3]MCValue = undefined; + var locks = [1]?RegisterManager.RegisterLock{null} ** 3; + defer for (locks) |reg_lock| if (reg_lock) |lock| self.register_manager.unlockReg(lock); + var order = [1]u2{0} ** 3; + var unused = std.StaticBitSet(3).initFull(); + for (ops, &mcvs, &locks, 0..) |op, *mcv, *lock, op_i| { + const op_index = @intCast(u2, op_i); + mcv.* = try self.resolveInst(op); + if (unused.isSet(0) and mcv.isRegister() and self.reuseOperand(inst, op, op_index, mcv.*)) { + order[op_index] = 1; + unused.unset(0); + } else if (unused.isSet(2) and mcv.isMemory()) { + order[op_index] = 3; + unused.unset(2); + } + switch (mcv.*) { + .register => |reg| lock.* = self.register_manager.lockReg(reg), + else => {}, + } + } + for (&order, &mcvs, &locks) |*mop_index, *mcv, *lock| { + if (mop_index.* != 0) continue; + mop_index.* = 1 + @intCast(u2, unused.toggleFirstSet().?); + if (mop_index.* > 1 and mcv.isRegister()) continue; + const reg = try self.copyToTmpRegister(ty, mcv.*); + mcv.* = .{ .register = reg }; + if (lock.*) |old_lock| self.register_manager.unlockReg(old_lock); + lock.* = self.register_manager.lockRegAssumeUnused(reg); + } + + const tag: ?Mir.Inst.Tag = + if (mem.eql(u2, &order, &.{ 1, 3, 2 }) or mem.eql(u2, &order, &.{ 3, 1, 2 })) + switch (ty.zigTypeTag()) { + .Float => switch (ty.floatBits(self.target.*)) { + 32 => .vfmadd132ss, + 64 => .vfmadd132sd, + else => null, + }, + .Vector => switch (ty.childType().zigTypeTag()) { + .Float => switch (ty.childType().floatBits(self.target.*)) { + 32 => switch (ty.vectorLen()) { + 1 => .vfmadd132ss, + 2...8 => .vfmadd132ps, + else => null, + }, + 64 => switch (ty.vectorLen()) { + 1 => .vfmadd132sd, + 2...4 => .vfmadd132pd, + else => null, + }, + else => null, + }, + else => null, + }, + else => unreachable, + } + else if (mem.eql(u2, &order, &.{ 2, 1, 3 }) or mem.eql(u2, &order, &.{ 1, 2, 3 })) + switch (ty.zigTypeTag()) { + .Float => switch (ty.floatBits(self.target.*)) { + 32 => .vfmadd213ss, + 64 => .vfmadd213sd, + else => null, + }, + .Vector => switch (ty.childType().zigTypeTag()) { + .Float => switch (ty.childType().floatBits(self.target.*)) { + 32 => switch (ty.vectorLen()) { + 1 => .vfmadd213ss, + 2...8 => .vfmadd213ps, + else => null, + }, + 64 => switch (ty.vectorLen()) { + 1 => .vfmadd213sd, + 2...4 => .vfmadd213pd, + else => null, + }, + else => null, + }, + else => null, + }, + else => unreachable, + } + else if (mem.eql(u2, &order, &.{ 2, 3, 1 }) or mem.eql(u2, &order, &.{ 3, 2, 1 })) + switch (ty.zigTypeTag()) { + .Float => switch (ty.floatBits(self.target.*)) { + 32 => .vfmadd231ss, + 64 => .vfmadd231sd, + else => null, + }, + .Vector => switch (ty.childType().zigTypeTag()) { + .Float => switch (ty.childType().floatBits(self.target.*)) { + 32 => switch (ty.vectorLen()) { + 1 => .vfmadd231ss, + 2...8 => .vfmadd231ps, + else => null, + }, + 64 => switch (ty.vectorLen()) { + 1 => .vfmadd231sd, + 2...4 => .vfmadd231pd, + else => null, + }, + else => null, + }, + else => null, + }, + else => null, + } + else + unreachable; + if (tag == null) return self.fail("TODO implement airMulAdd for {}", .{ + ty.fmt(self.bin_file.options.module.?), + }); + + var mops: [3]MCValue = undefined; + for (order, mcvs) |mop_index, mcv| mops[mop_index - 1] = mcv; + + const abi_size = @intCast(u32, ty.abiSize(self.target.*)); + const mop1_reg = registerAlias(mops[0].getReg().?, abi_size); + const mop2_reg = registerAlias(mops[1].getReg().?, abi_size); + if (mops[2].isRegister()) + try self.asmRegisterRegisterRegister( + tag.?, + mop1_reg, + mop2_reg, + registerAlias(mops[2].getReg().?, abi_size), + ) + else + try self.asmRegisterRegisterMemory( + tag.?, + mop1_reg, + mop2_reg, + mops[2].mem(Memory.PtrSize.fromSize(abi_size)), + ); + return self.finishAir(inst, mops[0], ops); } fn resolveInst(self: *Self, ref: Air.Inst.Ref) InnerError!MCValue { diff --git a/src/arch/x86_64/Encoding.zig b/src/arch/x86_64/Encoding.zig index ada1e891fb..94bfa63999 100644 --- a/src/arch/x86_64/Encoding.zig +++ b/src/arch/x86_64/Encoding.zig @@ -340,6 +340,11 @@ pub const Mnemonic = enum { vpunpcklbw, vpunpckldq, vpunpcklqdq, vpunpcklwd, // F16C vcvtph2ps, vcvtps2ph, + // FMA + vfmadd132pd, vfmadd213pd, vfmadd231pd, + vfmadd132ps, vfmadd213ps, vfmadd231ps, + vfmadd132sd, vfmadd213sd, vfmadd231sd, + vfmadd132ss, vfmadd213ss, vfmadd231ss, // zig fmt: on }; @@ -368,12 +373,13 @@ pub const Op = enum { r8, r16, r32, r64, rm8, rm16, rm32, rm64, r32_m16, r64_m16, - m8, m16, m32, m64, m80, m128, + m8, m16, m32, m64, m80, m128, m256, rel8, rel16, rel32, m, moffs, sreg, xmm, xmm_m32, xmm_m64, xmm_m128, + ymm, ymm_m256, // zig fmt: on pub fn fromOperand(operand: Instruction.Operand) Op { @@ -385,6 +391,7 @@ pub const Op = enum { .segment => return .sreg, .floating_point => return switch (reg.bitSize()) { 128 => .xmm, + 256 => .ymm, else => unreachable, }, .general_purpose => { @@ -418,6 +425,7 @@ pub const Op = enum { 64 => .m64, 80 => .m80, 128 => .m128, + 256 => .m256, else => unreachable, }; }, @@ -454,7 +462,8 @@ pub const Op = enum { .eax, .r32, .rm32, .r32_m16 => unreachable, .rax, .r64, .rm64, .r64_m16 => unreachable, .xmm, .xmm_m32, .xmm_m64, .xmm_m128 => unreachable, - .m8, .m16, .m32, .m64, .m80, .m128 => unreachable, + .ymm, .ymm_m256 => unreachable, + .m8, .m16, .m32, .m64, .m80, .m128, .m256 => unreachable, .unity => 1, .imm8, .imm8s, .rel8 => 8, .imm16, .imm16s, .rel16 => 16, @@ -468,12 +477,13 @@ pub const Op = enum { .none, .o16, .o32, .o64, .moffs, .m, .sreg => unreachable, .unity, .imm8, .imm8s, .imm16, .imm16s, .imm32, .imm32s, .imm64 => unreachable, .rel8, .rel16, .rel32 => unreachable, - .m8, .m16, .m32, .m64, .m80, .m128 => unreachable, + .m8, .m16, .m32, .m64, .m80, .m128, .m256 => unreachable, .al, .cl, .r8, .rm8 => 8, .ax, .r16, .rm16 => 16, .eax, .r32, .rm32, .r32_m16 => 32, .rax, .r64, .rm64, .r64_m16 => 64, .xmm, .xmm_m32, .xmm_m64, .xmm_m128 => 128, + .ymm, .ymm_m256 => 256, }; } @@ -482,13 +492,14 @@ pub const Op = enum { .none, .o16, .o32, .o64, .moffs, .m, .sreg => unreachable, .unity, .imm8, .imm8s, .imm16, .imm16s, .imm32, .imm32s, .imm64 => unreachable, .rel8, .rel16, .rel32 => unreachable, - .al, .cl, .r8, .ax, .r16, .eax, .r32, .rax, .r64, .xmm => unreachable, + .al, .cl, .r8, .ax, .r16, .eax, .r32, .rax, .r64, .xmm, .ymm => unreachable, .m8, .rm8 => 8, .m16, .rm16, .r32_m16, .r64_m16 => 16, .m32, .rm32, .xmm_m32 => 32, .m64, .rm64, .xmm_m64 => 64, .m80 => 80, .m128, .xmm_m128 => 128, + .m256, .ymm_m256 => 256, }; } @@ -513,6 +524,7 @@ pub const Op = enum { .rm8, .rm16, .rm32, .rm64, .r32_m16, .r64_m16, .xmm, .xmm_m32, .xmm_m64, .xmm_m128, + .ymm, .ymm_m256, => true, else => false, }; @@ -539,7 +551,7 @@ pub const Op = enum { .r32_m16, .r64_m16, .m8, .m16, .m32, .m64, .m80, .m128, .m, - .xmm_m32, .xmm_m64, .xmm_m128, + .xmm_m32, .xmm_m64, .xmm_m128, .ymm_m256, => true, else => false, }; @@ -562,6 +574,7 @@ pub const Op = enum { .r32_m16, .r64_m16 => .general_purpose, .sreg => .segment, .xmm, .xmm_m32, .xmm_m64, .xmm_m128 => .floating_point, + .ymm, .ymm_m256 => .floating_point, }; } @@ -625,6 +638,7 @@ pub const Feature = enum { none, avx, f16c, + fma, sse, sse2, sse3, diff --git a/src/arch/x86_64/Lower.zig b/src/arch/x86_64/Lower.zig index d82d5ec300..a37f28c0c3 100644 --- a/src/arch/x86_64/Lower.zig +++ b/src/arch/x86_64/Lower.zig @@ -205,6 +205,19 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct { .vcvtph2ps, .vcvtps2ph, + + .vfmadd132pd, + .vfmadd213pd, + .vfmadd231pd, + .vfmadd132ps, + .vfmadd213ps, + .vfmadd231ps, + .vfmadd132sd, + .vfmadd213sd, + .vfmadd231sd, + .vfmadd132ss, + .vfmadd213ss, + .vfmadd231ss, => try lower.mirGeneric(inst), .cmps, @@ -288,6 +301,8 @@ fn imm(lower: Lower, ops: Mir.Inst.Ops, i: u32) Immediate { .rmi_rip, .mri_sib, .mri_rip, + .rrm_sib, + .rrm_rip, .rrmi_sib, .rrmi_rip, => Immediate.u(i), @@ -310,6 +325,7 @@ fn mem(lower: Lower, ops: Mir.Inst.Ops, payload: u32) Memory { .mr_sib, .mrr_sib, .mri_sib, + .rrm_sib, .rrmi_sib, .lock_m_sib, .lock_mi_sib_u, @@ -327,6 +343,7 @@ fn mem(lower: Lower, ops: Mir.Inst.Ops, payload: u32) Memory { .mr_rip, .mrr_rip, .mri_rip, + .rrm_rip, .rrmi_rip, .lock_m_rip, .lock_mi_rip_u, @@ -449,6 +466,11 @@ fn mirGeneric(lower: *Lower, inst: Mir.Inst) Error!void { .{ .reg = inst.data.rix.r }, .{ .imm = lower.imm(inst.ops, inst.data.rix.i) }, }, + .rrm_sib, .rrm_rip => &.{ + .{ .reg = inst.data.rrx.r1 }, + .{ .reg = inst.data.rrx.r2 }, + .{ .mem = lower.mem(inst.ops, inst.data.rrx.payload) }, + }, .rrmi_sib, .rrmi_rip => &.{ .{ .reg = inst.data.rrix.r1 }, .{ .reg = inst.data.rrix.r2 }, diff --git a/src/arch/x86_64/Mir.zig b/src/arch/x86_64/Mir.zig index e261f6dc38..92a9a74fbb 100644 --- a/src/arch/x86_64/Mir.zig +++ b/src/arch/x86_64/Mir.zig @@ -324,6 +324,31 @@ pub const Inst = struct { /// Convert single-precision floating-point values to 16-bit floating-point values vcvtps2ph, + /// Fused multiply-add of packed double-precision floating-point values + vfmadd132pd, + /// Fused multiply-add of packed double-precision floating-point values + vfmadd213pd, + /// Fused multiply-add of packed double-precision floating-point values + vfmadd231pd, + /// Fused multiply-add of packed single-precision floating-point values + vfmadd132ps, + /// Fused multiply-add of packed single-precision floating-point values + vfmadd213ps, + /// Fused multiply-add of packed single-precision floating-point values + vfmadd231ps, + /// Fused multiply-add of scalar double-precision floating-point values + vfmadd132sd, + /// Fused multiply-add of scalar double-precision floating-point values + vfmadd213sd, + /// Fused multiply-add of scalar double-precision floating-point values + vfmadd231sd, + /// Fused multiply-add of scalar single-precision floating-point values + vfmadd132ss, + /// Fused multiply-add of scalar single-precision floating-point values + vfmadd213ss, + /// Fused multiply-add of scalar single-precision floating-point values + vfmadd231ss, + /// Compare string operands cmps, /// Load string @@ -434,6 +459,12 @@ pub const Inst = struct { /// Register, memory (SIB), immediate (byte) operands. /// Uses `rix` payload with extra data of type `MemorySib`. rmi_sib, + /// Register, register, memory (RIP). + /// Uses `rrix` payload with extra data of type `MemoryRip`. + rrm_rip, + /// Register, register, memory (SIB). + /// Uses `rrix` payload with extra data of type `MemorySib`. + rrm_sib, /// Register, register, memory (RIP), immediate (byte) operands. /// Uses `rrix` payload with extra data of type `MemoryRip`. rrmi_rip, diff --git a/src/arch/x86_64/bits.zig b/src/arch/x86_64/bits.zig index 77dc0cfb7c..b73a37d6cb 100644 --- a/src/arch/x86_64/bits.zig +++ b/src/arch/x86_64/bits.zig @@ -485,7 +485,9 @@ pub const Memory = union(enum) { dword, qword, tbyte, - dqword, + xword, + yword, + zword, pub fn fromSize(size: u32) PtrSize { return switch (size) { @@ -493,7 +495,9 @@ pub const Memory = union(enum) { 2...2 => .word, 3...4 => .dword, 5...8 => .qword, - 9...16 => .dqword, + 9...16 => .xword, + 17...32 => .yword, + 33...64 => .zword, else => unreachable, }; } @@ -505,7 +509,9 @@ pub const Memory = union(enum) { 32 => .dword, 64 => .qword, 80 => .tbyte, - 128 => .dqword, + 128 => .xword, + 256 => .yword, + 512 => .zword, else => unreachable, }; } @@ -517,7 +523,9 @@ pub const Memory = union(enum) { .dword => 32, .qword => 64, .tbyte => 80, - .dqword => 128, + .xword => 128, + .yword => 256, + .zword => 512, }; } }; diff --git a/src/arch/x86_64/encodings.zig b/src/arch/x86_64/encodings.zig index 5d2630e9a8..dd05728e24 100644 --- a/src/arch/x86_64/encodings.zig +++ b/src/arch/x86_64/encodings.zig @@ -1016,5 +1016,28 @@ pub const table = [_]Entry{ .{ .vcvtph2ps, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0x13 }, 0, .vex_128, .f16c }, .{ .vcvtps2ph, .mri, &.{ .xmm_m64, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x1d }, 0, .vex_128, .f16c }, + + // FMA + .{ .vfmadd132pd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x98 }, 0, .vex_128_long, .fma }, + .{ .vfmadd132pd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x98 }, 0, .vex_256_long, .fma }, + .{ .vfmadd213pd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0xa8 }, 0, .vex_128_long, .fma }, + .{ .vfmadd213pd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0xa8 }, 0, .vex_256_long, .fma }, + .{ .vfmadd231pd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0xb8 }, 0, .vex_128_long, .fma }, + .{ .vfmadd231pd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0xb8 }, 0, .vex_256_long, .fma }, + + .{ .vfmadd132ps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x98 }, 0, .vex_128, .fma }, + .{ .vfmadd132ps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x98 }, 0, .vex_256, .fma }, + .{ .vfmadd213ps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0xa8 }, 0, .vex_128, .fma }, + .{ .vfmadd213ps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0xa8 }, 0, .vex_256, .fma }, + .{ .vfmadd231ps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0xb8 }, 0, .vex_128, .fma }, + .{ .vfmadd231ps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0xb8 }, 0, .vex_256, .fma }, + + .{ .vfmadd132sd, .rvm, &.{ .xmm, .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0x99 }, 0, .vex_128_long, .fma }, + .{ .vfmadd213sd, .rvm, &.{ .xmm, .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0xa9 }, 0, .vex_128_long, .fma }, + .{ .vfmadd231sd, .rvm, &.{ .xmm, .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0xb9 }, 0, .vex_128_long, .fma }, + + .{ .vfmadd132ss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0x66, 0x0f, 0x38, 0x99 }, 0, .vex_128, .fma }, + .{ .vfmadd213ss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0x66, 0x0f, 0x38, 0xa9 }, 0, .vex_128, .fma }, + .{ .vfmadd231ss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0x66, 0x0f, 0x38, 0xb9 }, 0, .vex_128, .fma }, }; // zig fmt: on diff --git a/test/behavior/muladd.zig b/test/behavior/muladd.zig index aa36c99784..8656dc4f45 100644 --- a/test/behavior/muladd.zig +++ b/test/behavior/muladd.zig @@ -1,8 +1,10 @@ +const std = @import("std"); const builtin = @import("builtin"); -const expect = @import("std").testing.expect; +const expect = std.testing.expect; test "@mulAdd" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO + if (builtin.zig_backend == .stage2_x86_64 and + !comptime std.Target.x86.featureSetHas(builtin.cpu.features, .fma)) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO From cba195c1170fff77c5210f023e019d72f13b9614 Mon Sep 17 00:00:00 2001 From: Jacob Young Date: Sat, 6 May 2023 22:27:39 -0400 Subject: [PATCH 09/20] x86_64: implement some float and float vector movement This allows actually storing value of these supported types in registers, and not restricting them to stack slots. --- src/arch/x86_64/CodeGen.zig | 127 ++++++++++++++++++++++++++-------- src/arch/x86_64/Encoding.zig | 18 +++-- src/arch/x86_64/Lower.zig | 6 ++ src/arch/x86_64/Mir.zig | 12 ++++ src/arch/x86_64/encoder.zig | 13 ++-- src/arch/x86_64/encodings.zig | 30 ++++++++ test/behavior/math.zig | 3 +- test/behavior/muladd.zig | 10 +-- 8 files changed, 176 insertions(+), 43 deletions(-) diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index fffb814d7f..3e47ef63f6 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -2008,6 +2008,11 @@ fn computeFrameLayout(self: *Self) !FrameLayout { }; } +fn getFrameAddrAlignment(self: *Self, frame_addr: FrameAddr) u32 { + const alloc_align = @as(u32, 1) << self.frame_allocs.get(@enumToInt(frame_addr.index)).abi_align; + return @min(alloc_align, @bitCast(u32, frame_addr.off) & (alloc_align - 1)); +} + fn allocFrameIndex(self: *Self, alloc: FrameAlloc) !FrameIndex { const frame_allocs_slice = self.frame_allocs.slice(); const frame_size = frame_allocs_slice.items(.abi_size); @@ -2051,24 +2056,36 @@ fn allocTempRegOrMem(self: *Self, elem_ty: Type, reg_ok: bool) !MCValue { return self.allocRegOrMemAdvanced(elem_ty, null, reg_ok); } -fn allocRegOrMemAdvanced(self: *Self, elem_ty: Type, inst: ?Air.Inst.Index, reg_ok: bool) !MCValue { - const abi_size = math.cast(u32, elem_ty.abiSize(self.target.*)) orelse { +fn allocRegOrMemAdvanced(self: *Self, ty: Type, inst: ?Air.Inst.Index, reg_ok: bool) !MCValue { + const abi_size = math.cast(u32, ty.abiSize(self.target.*)) orelse { const mod = self.bin_file.options.module.?; - return self.fail("type '{}' too big to fit into stack frame", .{elem_ty.fmt(mod)}); + return self.fail("type '{}' too big to fit into stack frame", .{ty.fmt(mod)}); }; - if (reg_ok) { - // Make sure the type can fit in a register before we try to allocate one. - const ptr_bits = self.target.cpu.arch.ptrBitWidth(); - const ptr_bytes: u64 = @divExact(ptr_bits, 8); - if (abi_size <= ptr_bytes) { - if (self.register_manager.tryAllocReg(inst, regClassForType(elem_ty))) |reg| { + if (reg_ok) need_mem: { + if (abi_size <= @as(u32, switch (ty.zigTypeTag()) { + .Float => switch (ty.floatBits(self.target.*)) { + 16, 32, 64, 128 => 16, + 80 => break :need_mem, + else => unreachable, + }, + .Vector => switch (ty.childType().zigTypeTag()) { + .Float => switch (ty.childType().floatBits(self.target.*)) { + 16, 32, 64 => if (self.hasFeature(.avx)) 32 else 16, + 80, 128 => break :need_mem, + else => unreachable, + }, + else => break :need_mem, + }, + else => 8, + })) { + if (self.register_manager.tryAllocReg(inst, regClassForType(ty))) |reg| { return MCValue{ .register = registerAlias(reg, abi_size) }; } } } - const frame_index = try self.allocFrameIndex(FrameAlloc.initType(elem_ty, self.target.*)); + const frame_index = try self.allocFrameIndex(FrameAlloc.initType(ty, self.target.*)); return .{ .load_frame = .{ .index = frame_index } }; } @@ -4442,12 +4459,19 @@ fn airRound(self: *Self, inst: Air.Inst.Index, mode: Immediate) !void { }), }; assert(dst_mcv.isRegister()); + const abi_size = @intCast(u32, ty.abiSize(self.target.*)); + const dst_reg = registerAlias(dst_mcv.getReg().?, abi_size); if (src_mcv.isRegister()) - try self.asmRegisterRegisterImmediate(mir_tag, dst_mcv.getReg().?, src_mcv.getReg().?, mode) + try self.asmRegisterRegisterImmediate( + mir_tag, + dst_reg, + registerAlias(src_mcv.getReg().?, abi_size), + mode, + ) else try self.asmRegisterMemoryImmediate( mir_tag, - dst_mcv.getReg().?, + dst_reg, src_mcv.mem(Memory.PtrSize.fromSize(@intCast(u32, ty.abiSize(self.target.*)))), mode, ); @@ -7847,19 +7871,43 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void { return self.finishAirResult(inst, result); } -fn movMirTag(self: *Self, ty: Type) !Mir.Inst.Tag { - return switch (ty.zigTypeTag()) { - else => .mov, +fn movMirTag(self: *Self, ty: Type, aligned: bool) !Mir.Inst.Tag { + switch (ty.zigTypeTag()) { + else => return .mov, .Float => switch (ty.floatBits(self.target.*)) { 16 => unreachable, // needs special handling - 32 => .movss, - 64 => .movsd, - 128 => .movaps, - else => return self.fail("TODO movMirTag from {}", .{ - ty.fmt(self.bin_file.options.module.?), - }), + 32 => return if (self.hasFeature(.avx)) .vmovss else .movss, + 64 => return if (self.hasFeature(.avx)) .vmovsd else .movsd, + 128 => return if (self.hasFeature(.avx)) + if (aligned) .vmovaps else .vmovups + else if (aligned) .movaps else .movups, + else => {}, }, - }; + .Vector => switch (ty.childType().zigTypeTag()) { + .Float => switch (ty.childType().floatBits(self.target.*)) { + 16 => unreachable, // needs special handling + 32 => switch (ty.vectorLen()) { + 1 => return if (self.hasFeature(.avx)) .vmovss else .movss, + 2...4 => return if (self.hasFeature(.avx)) + if (aligned) .vmovaps else .vmovups + else if (aligned) .movaps else .movups, + 5...8 => if (self.hasFeature(.avx)) return if (aligned) .vmovaps else .vmovups, + else => {}, + }, + 64 => switch (ty.vectorLen()) { + 1 => return if (self.hasFeature(.avx)) .vmovsd else .movsd, + 2 => return if (self.hasFeature(.avx)) + if (aligned) .vmovaps else .vmovups + else if (aligned) .movaps else .movups, + 3...4 => if (self.hasFeature(.avx)) return if (aligned) .vmovaps else .vmovups, + else => {}, + }, + else => {}, + }, + else => {}, + }, + } + return self.fail("TODO movMirTag for {}", .{ty.fmt(self.bin_file.options.module.?)}); } fn genCopy(self: *Self, ty: Type, dst_mcv: MCValue, src_mcv: MCValue) InnerError!void { @@ -8016,7 +8064,11 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr 0 => return self.genSetReg(dst_reg, ty, .{ .register = reg_off.reg }), else => .lea, }, - .indirect, .load_frame => try self.movMirTag(ty), + .indirect => try self.movMirTag(ty, false), + .load_frame => |frame_addr| try self.movMirTag( + ty, + self.getFrameAddrAlignment(frame_addr) >= ty.abiAlignment(self.target.*), + ), .lea_frame => .lea, else => unreachable, }, @@ -8040,7 +8092,11 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr ) else self.asmRegisterMemory( - try self.movMirTag(ty), + try self.movMirTag(ty, mem.isAlignedGeneric( + u32, + @bitCast(u32, small_addr), + ty.abiAlignment(self.target.*), + )), registerAlias(dst_reg, abi_size), src_mem, ); @@ -8080,7 +8136,7 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr ) else try self.asmRegisterMemory( - try self.movMirTag(ty), + try self.movMirTag(ty, false), registerAlias(dst_reg, abi_size), src_mem, ); @@ -8194,7 +8250,24 @@ fn genSetMem(self: *Self, base: Memory.Base, disp: i32, ty: Type, src_mcv: MCVal ) else try self.asmMemoryRegister( - try self.movMirTag(ty), + try self.movMirTag(ty, switch (base) { + .none => mem.isAlignedGeneric( + u32, + @bitCast(u32, disp), + ty.abiAlignment(self.target.*), + ), + .reg => |reg| switch (reg) { + .es, .cs, .ss, .ds => mem.isAlignedGeneric( + u32, + @bitCast(u32, disp), + ty.abiAlignment(self.target.*), + ), + else => false, + }, + .frame => |frame_index| self.getFrameAddrAlignment( + .{ .index = frame_index, .off = disp }, + ) >= ty.abiAlignment(self.target.*), + }), dst_mem, registerAlias(src_reg, abi_size), ); @@ -8415,7 +8488,7 @@ fn airBitCast(self: *Self, inst: Air.Inst.Index) !void { defer if (operand_lock) |lock| self.register_manager.unlockReg(lock); const dest = try self.allocRegOrMem(inst, true); - try self.genCopy(self.air.typeOfIndex(inst), dest, operand); + try self.genCopy(if (!dest.isMemory() or operand.isMemory()) dst_ty else src_ty, dest, operand); break :result dest; }; return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); diff --git a/src/arch/x86_64/Encoding.zig b/src/arch/x86_64/Encoding.zig index 94bfa63999..1fd1112aaf 100644 --- a/src/arch/x86_64/Encoding.zig +++ b/src/arch/x86_64/Encoding.zig @@ -206,7 +206,7 @@ pub fn format( try writer.print("+{s} ", .{tag}); }, .m, .mi, .m1, .mc, .vmi => try writer.print("/{d} ", .{encoding.modRmExt()}), - .mr, .rm, .rmi, .mri, .mrc, .rvm, .rvmi => try writer.writeAll("/r "), + .mr, .rm, .rmi, .mri, .mrc, .rvm, .rvmi, .mvr => try writer.writeAll("/r "), } switch (encoding.data.op_en) { @@ -230,7 +230,7 @@ pub fn format( }; try writer.print("{s} ", .{tag}); }, - .np, .fd, .td, .o, .m, .m1, .mc, .mr, .rm, .mrc, .rvm => {}, + .np, .fd, .td, .o, .m, .m1, .mc, .mr, .rm, .mrc, .rvm, .mvr => {}, } try writer.print("{s} ", .{@tagName(encoding.mnemonic)}); @@ -332,7 +332,12 @@ pub const Mnemonic = enum { // SSE4.1 roundsd, roundss, // AVX - vmovddup, vmovshdup, vmovsldup, + vmovapd, vmovaps, + vmovddup, + vmovsd, + vmovshdup, vmovsldup, + vmovss, + vmovupd, vmovups, vpextrw, vpinsrw, vpshufhw, vpshuflw, vpsrld, vpsrlq, vpsrlw, @@ -357,7 +362,7 @@ pub const OpEn = enum { fd, td, m1, mc, mi, mr, rm, rmi, mri, mrc, - vmi, rvm, rvmi, + vmi, rvm, rvmi, mvr, // zig fmt: on }; @@ -549,9 +554,10 @@ pub const Op = enum { return switch (op) { .rm8, .rm16, .rm32, .rm64, .r32_m16, .r64_m16, - .m8, .m16, .m32, .m64, .m80, .m128, + .m8, .m16, .m32, .m64, .m80, .m128, .m256, .m, - .xmm_m32, .xmm_m64, .xmm_m128, .ymm_m256, + .xmm_m32, .xmm_m64, .xmm_m128, + .ymm_m256, => true, else => false, }; diff --git a/src/arch/x86_64/Lower.zig b/src/arch/x86_64/Lower.zig index a37f28c0c3..a246a97d4b 100644 --- a/src/arch/x86_64/Lower.zig +++ b/src/arch/x86_64/Lower.zig @@ -184,9 +184,15 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct { .roundsd, .roundss, + .vmovapd, + .vmovaps, .vmovddup, + .vmovsd, .vmovshdup, .vmovsldup, + .vmovss, + .vmovupd, + .vmovups, .vpextrw, .vpinsrw, .vpshufhw, diff --git a/src/arch/x86_64/Mir.zig b/src/arch/x86_64/Mir.zig index 92a9a74fbb..de7f2cff53 100644 --- a/src/arch/x86_64/Mir.zig +++ b/src/arch/x86_64/Mir.zig @@ -282,12 +282,24 @@ pub const Inst = struct { /// Round scalar single-precision floating-point values roundss, + /// Move aligned packed double-precision floating-point values + vmovapd, + /// Move aligned packed single-precision floating-point values + vmovaps, /// Replicate double floating-point values vmovddup, + /// Move or merge scalar double-precision floating-point value + vmovsd, /// Replicate single floating-point values vmovshdup, /// Replicate single floating-point values vmovsldup, + /// Move or merge scalar single-precision floating-point value + vmovss, + /// Move unaligned packed double-precision floating-point values + vmovupd, + /// Move unaligned packed single-precision floating-point values + vmovups, /// Extract word vpextrw, /// Insert word diff --git a/src/arch/x86_64/encoder.zig b/src/arch/x86_64/encoder.zig index 47211591ec..fa6ce676cb 100644 --- a/src/arch/x86_64/encoder.zig +++ b/src/arch/x86_64/encoder.zig @@ -228,7 +228,7 @@ pub const Instruction = struct { .td => try encoder.imm64(inst.ops[0].mem.moffs.offset), else => { const mem_op = switch (data.op_en) { - .m, .mi, .m1, .mc, .mr, .mri, .mrc => inst.ops[0], + .m, .mi, .m1, .mc, .mr, .mri, .mrc, .mvr => inst.ops[0], .rm, .rmi, .vmi => inst.ops[1], .rvm, .rvmi => inst.ops[2], else => unreachable, @@ -239,6 +239,7 @@ pub const Instruction = struct { .m, .mi, .m1, .mc, .vmi => enc.modRmExt(), .mr, .mri, .mrc => inst.ops[1].reg.lowEnc(), .rm, .rmi, .rvm, .rvmi => inst.ops[0].reg.lowEnc(), + .mvr => inst.ops[2].reg.lowEnc(), else => unreachable, }; try encoder.modRm_direct(rm, reg.lowEnc()); @@ -248,6 +249,7 @@ pub const Instruction = struct { .m, .mi, .m1, .mc, .vmi => .none, .mr, .mri, .mrc => inst.ops[1], .rm, .rmi, .rvm, .rvmi => inst.ops[0], + .mvr => inst.ops[2], else => unreachable, }; try encodeMemory(enc, mem, op, encoder); @@ -315,7 +317,7 @@ pub const Instruction = struct { } else null, - .vmi, .rvm, .rvmi => unreachable, + .vmi, .rvm, .rvmi, .mvr => unreachable, }; if (segment_override) |seg| { legacy.setSegmentOverride(seg); @@ -350,7 +352,7 @@ pub const Instruction = struct { rex.b = b_x_op.isBaseExtended(); rex.x = b_x_op.isIndexExtended(); }, - .vmi, .rvm, .rvmi => unreachable, + .vmi, .rvm, .rvmi, .mvr => unreachable, } try encoder.rex(rex); @@ -372,10 +374,11 @@ pub const Instruction = struct { switch (op_en) { .np, .i, .zi, .fd, .td, .d => {}, .o, .oi => vex.b = inst.ops[0].reg.isExtended(), - .m, .mi, .m1, .mc, .mr, .rm, .rmi, .mri, .mrc, .vmi, .rvm, .rvmi => { + .m, .mi, .m1, .mc, .mr, .rm, .rmi, .mri, .mrc, .vmi, .rvm, .rvmi, .mvr => { const r_op = switch (op_en) { .rm, .rmi, .rvm, .rvmi => inst.ops[0], .mr, .mri, .mrc => inst.ops[1], + .mvr => inst.ops[2], .m, .mi, .m1, .mc, .vmi => .none, else => unreachable, }; @@ -383,7 +386,7 @@ pub const Instruction = struct { const b_x_op = switch (op_en) { .rm, .rmi, .vmi => inst.ops[1], - .m, .mi, .m1, .mc, .mr, .mri, .mrc => inst.ops[0], + .m, .mi, .m1, .mc, .mr, .mri, .mrc, .mvr => inst.ops[0], .rvm, .rvmi => inst.ops[2], else => unreachable, }; diff --git a/src/arch/x86_64/encodings.zig b/src/arch/x86_64/encodings.zig index dd05728e24..607a87b8d9 100644 --- a/src/arch/x86_64/encodings.zig +++ b/src/arch/x86_64/encodings.zig @@ -974,12 +974,42 @@ pub const table = [_]Entry{ .{ .roundsd, .rmi, &.{ .xmm, .xmm_m64, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0b }, 0, .none, .sse4_1 }, // AVX + .{ .vmovapd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x28 }, 0, .vex_128, .avx }, + .{ .vmovapd, .mr, &.{ .xmm_m128, .xmm }, &.{ 0x66, 0x0f, 0x29 }, 0, .vex_128, .avx }, + .{ .vmovapd, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x28 }, 0, .vex_256, .avx }, + .{ .vmovapd, .mr, &.{ .ymm_m256, .ymm }, &.{ 0x66, 0x0f, 0x29 }, 0, .vex_256, .avx }, + + .{ .vmovaps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x28 }, 0, .vex_128, .avx }, + .{ .vmovaps, .mr, &.{ .xmm_m128, .xmm }, &.{ 0x0f, 0x29 }, 0, .vex_128, .avx }, + .{ .vmovaps, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x0f, 0x28 }, 0, .vex_256, .avx }, + .{ .vmovaps, .mr, &.{ .ymm_m256, .ymm }, &.{ 0x0f, 0x29 }, 0, .vex_256, .avx }, + .{ .vmovddup, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x12 }, 0, .vex_128, .avx }, + .{ .vmovsd, .rvm, &.{ .xmm, .xmm, .xmm }, &.{ 0xf2, 0x0f, 0x10 }, 0, .vex_128, .avx }, + .{ .vmovsd, .rm, &.{ .xmm, .m64 }, &.{ 0xf2, 0x0f, 0x10 }, 0, .vex_128, .avx }, + .{ .vmovsd, .mvr, &.{ .xmm, .xmm, .xmm }, &.{ 0xf2, 0x0f, 0x11 }, 0, .vex_128, .avx }, + .{ .vmovsd, .mr, &.{ .m64, .xmm }, &.{ 0xf2, 0x0f, 0x11 }, 0, .vex_128, .avx }, + .{ .vmovshdup, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0xf3, 0x0f, 0x16 }, 0, .vex_128, .avx }, .{ .vmovsldup, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0xf3, 0x0f, 0x12 }, 0, .vex_128, .avx }, + .{ .vmovss, .rvm, &.{ .xmm, .xmm, .xmm }, &.{ 0xf3, 0x0f, 0x10 }, 0, .vex_128, .avx }, + .{ .vmovss, .rm, &.{ .xmm, .m32 }, &.{ 0xf3, 0x0f, 0x10 }, 0, .vex_128, .avx }, + .{ .vmovss, .mvr, &.{ .xmm, .xmm, .xmm }, &.{ 0xf3, 0x0f, 0x11 }, 0, .vex_128, .avx }, + .{ .vmovss, .mr, &.{ .m32, .xmm }, &.{ 0xf3, 0x0f, 0x11 }, 0, .vex_128, .avx }, + + .{ .vmovupd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x10 }, 0, .vex_128, .avx }, + .{ .vmovupd, .mr, &.{ .xmm_m128, .xmm }, &.{ 0x66, 0x0f, 0x11 }, 0, .vex_128, .avx }, + .{ .vmovupd, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x10 }, 0, .vex_256, .avx }, + .{ .vmovupd, .mr, &.{ .ymm_m256, .ymm }, &.{ 0x66, 0x0f, 0x11 }, 0, .vex_256, .avx }, + + .{ .vmovups, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x10 }, 0, .vex_128, .avx }, + .{ .vmovups, .mr, &.{ .xmm_m128, .xmm }, &.{ 0x0f, 0x11 }, 0, .vex_128, .avx }, + .{ .vmovups, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x0f, 0x10 }, 0, .vex_256, .avx }, + .{ .vmovups, .mr, &.{ .ymm_m256, .ymm }, &.{ 0x0f, 0x11 }, 0, .vex_256, .avx }, + .{ .vpextrw, .mri, &.{ .r32, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x15 }, 0, .vex_128, .avx }, .{ .vpextrw, .mri, &.{ .r64, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x15 }, 0, .vex_128_long, .avx }, .{ .vpextrw, .mri, &.{ .r32_m16, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x15 }, 0, .vex_128, .avx }, diff --git a/test/behavior/math.zig b/test/behavior/math.zig index 0362bd3a2b..7e16111059 100644 --- a/test/behavior/math.zig +++ b/test/behavior/math.zig @@ -399,7 +399,8 @@ fn testBinaryNot128(comptime Type: type, x: Type) !void { test "division" { if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO + if (builtin.zig_backend == .stage2_x86_64 and + !comptime std.Target.x86.featureSetHas(builtin.cpu.features, .sse4_1)) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO diff --git a/test/behavior/muladd.zig b/test/behavior/muladd.zig index 8656dc4f45..bfb94de270 100644 --- a/test/behavior/muladd.zig +++ b/test/behavior/muladd.zig @@ -2,9 +2,11 @@ const std = @import("std"); const builtin = @import("builtin"); const expect = std.testing.expect; +const stage2_x86_64_without_hardware_fma_support = builtin.zig_backend == .stage2_x86_64 and + !std.Target.x86.featureSetHas(builtin.cpu.features, .fma); + test "@mulAdd" { - if (builtin.zig_backend == .stage2_x86_64 and - !comptime std.Target.x86.featureSetHas(builtin.cpu.features, .fma)) return error.SkipZigTest; // TODO + if (stage2_x86_64_without_hardware_fma_support) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO @@ -118,7 +120,7 @@ fn vector32() !void { test "vector f32" { if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO + if (stage2_x86_64_without_hardware_fma_support) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO @@ -141,7 +143,7 @@ fn vector64() !void { test "vector f64" { if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO + if (stage2_x86_64_without_hardware_fma_support) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO From 406c4035435657aaefe6f8e96642d0db326c7989 Mon Sep 17 00:00:00 2001 From: Jacob Young Date: Sat, 6 May 2023 23:45:36 -0400 Subject: [PATCH 10/20] x86_64: add missing `movsx` and `movzx` encodings --- src/arch/x86_64/encodings.zig | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/src/arch/x86_64/encodings.zig b/src/arch/x86_64/encodings.zig index 607a87b8d9..5096ca5627 100644 --- a/src/arch/x86_64/encodings.zig +++ b/src/arch/x86_64/encodings.zig @@ -418,17 +418,21 @@ pub const table = [_]Entry{ .{ .movsx, .rm, &.{ .r32, .rm8 }, &.{ 0x0f, 0xbe }, 0, .rex, .none }, .{ .movsx, .rm, &.{ .r64, .rm8 }, &.{ 0x0f, 0xbe }, 0, .long, .none }, .{ .movsx, .rm, &.{ .r32, .rm16 }, &.{ 0x0f, 0xbf }, 0, .none, .none }, + .{ .movsx, .rm, &.{ .r32, .rm16 }, &.{ 0x0f, 0xbf }, 0, .rex, .none }, .{ .movsx, .rm, &.{ .r64, .rm16 }, &.{ 0x0f, 0xbf }, 0, .long, .none }, // This instruction is discouraged. .{ .movsxd, .rm, &.{ .r32, .rm32 }, &.{ 0x63 }, 0, .none, .none }, .{ .movsxd, .rm, &.{ .r64, .rm32 }, &.{ 0x63 }, 0, .long, .none }, - .{ .movzx, .rm, &.{ .r16, .rm8 }, &.{ 0x0f, 0xb6 }, 0, .short, .none }, - .{ .movzx, .rm, &.{ .r32, .rm8 }, &.{ 0x0f, 0xb6 }, 0, .none, .none }, - .{ .movzx, .rm, &.{ .r64, .rm8 }, &.{ 0x0f, 0xb6 }, 0, .long, .none }, - .{ .movzx, .rm, &.{ .r32, .rm16 }, &.{ 0x0f, 0xb7 }, 0, .none, .none }, - .{ .movzx, .rm, &.{ .r64, .rm16 }, &.{ 0x0f, 0xb7 }, 0, .long, .none }, + .{ .movzx, .rm, &.{ .r16, .rm8 }, &.{ 0x0f, 0xb6 }, 0, .short, .none }, + .{ .movzx, .rm, &.{ .r16, .rm8 }, &.{ 0x0f, 0xb6 }, 0, .rex_short, .none }, + .{ .movzx, .rm, &.{ .r32, .rm8 }, &.{ 0x0f, 0xb6 }, 0, .none, .none }, + .{ .movzx, .rm, &.{ .r32, .rm8 }, &.{ 0x0f, 0xb6 }, 0, .rex, .none }, + .{ .movzx, .rm, &.{ .r64, .rm8 }, &.{ 0x0f, 0xb6 }, 0, .long, .none }, + .{ .movzx, .rm, &.{ .r32, .rm16 }, &.{ 0x0f, 0xb7 }, 0, .none, .none }, + .{ .movzx, .rm, &.{ .r32, .rm16 }, &.{ 0x0f, 0xb7 }, 0, .rex, .none }, + .{ .movzx, .rm, &.{ .r64, .rm16 }, &.{ 0x0f, 0xb7 }, 0, .long, .none }, .{ .mul, .m, &.{ .rm8 }, &.{ 0xf6 }, 4, .none, .none }, .{ .mul, .m, &.{ .rm8 }, &.{ 0xf6 }, 4, .rex, .none }, From 1c53f0a6b0b1866663346c473da310203a317e90 Mon Sep 17 00:00:00 2001 From: Jacob Young Date: Sun, 7 May 2023 00:27:30 -0400 Subject: [PATCH 11/20] Dwarf: workaround crash --- src/link/Dwarf.zig | 54 ++++++++++++++++++++++------------------- test/behavior/error.zig | 3 --- 2 files changed, 29 insertions(+), 28 deletions(-) diff --git a/src/link/Dwarf.zig b/src/link/Dwarf.zig index 7a008ca732..2ec0dedc6f 100644 --- a/src/link/Dwarf.zig +++ b/src/link/Dwarf.zig @@ -526,7 +526,7 @@ pub const DeclState = struct { .ErrorUnion => { const error_ty = ty.errorUnionSet(); const payload_ty = ty.errorUnionPayload(); - const payload_align = payload_ty.abiAlignment(target); + const payload_align = if (payload_ty.isNoReturn()) 0 else payload_ty.abiAlignment(target); const error_align = Type.anyerror.abiAlignment(target); const abi_size = ty.abiSize(target); const payload_off = if (error_align >= payload_align) Type.anyerror.abiSize(target) else 0; @@ -540,31 +540,35 @@ pub const DeclState = struct { const name = try ty.nameAllocArena(arena, module); try dbg_info_buffer.writer().print("{s}\x00", .{name}); - // DW.AT.member - try dbg_info_buffer.ensureUnusedCapacity(7); - dbg_info_buffer.appendAssumeCapacity(@enumToInt(AbbrevKind.struct_member)); - // DW.AT.name, DW.FORM.string - dbg_info_buffer.appendSliceAssumeCapacity("value"); - dbg_info_buffer.appendAssumeCapacity(0); - // DW.AT.type, DW.FORM.ref4 - var index = dbg_info_buffer.items.len; - try dbg_info_buffer.resize(index + 4); - try self.addTypeRelocGlobal(atom_index, payload_ty, @intCast(u32, index)); - // DW.AT.data_member_location, DW.FORM.sdata - try leb128.writeULEB128(dbg_info_buffer.writer(), payload_off); + if (!payload_ty.isNoReturn()) { + // DW.AT.member + try dbg_info_buffer.ensureUnusedCapacity(7); + dbg_info_buffer.appendAssumeCapacity(@enumToInt(AbbrevKind.struct_member)); + // DW.AT.name, DW.FORM.string + dbg_info_buffer.appendSliceAssumeCapacity("value"); + dbg_info_buffer.appendAssumeCapacity(0); + // DW.AT.type, DW.FORM.ref4 + const index = dbg_info_buffer.items.len; + try dbg_info_buffer.resize(index + 4); + try self.addTypeRelocGlobal(atom_index, payload_ty, @intCast(u32, index)); + // DW.AT.data_member_location, DW.FORM.sdata + try leb128.writeULEB128(dbg_info_buffer.writer(), payload_off); + } - // DW.AT.member - try dbg_info_buffer.ensureUnusedCapacity(5); - dbg_info_buffer.appendAssumeCapacity(@enumToInt(AbbrevKind.struct_member)); - // DW.AT.name, DW.FORM.string - dbg_info_buffer.appendSliceAssumeCapacity("err"); - dbg_info_buffer.appendAssumeCapacity(0); - // DW.AT.type, DW.FORM.ref4 - index = dbg_info_buffer.items.len; - try dbg_info_buffer.resize(index + 4); - try self.addTypeRelocGlobal(atom_index, error_ty, @intCast(u32, index)); - // DW.AT.data_member_location, DW.FORM.sdata - try leb128.writeULEB128(dbg_info_buffer.writer(), error_off); + { + // DW.AT.member + try dbg_info_buffer.ensureUnusedCapacity(5); + dbg_info_buffer.appendAssumeCapacity(@enumToInt(AbbrevKind.struct_member)); + // DW.AT.name, DW.FORM.string + dbg_info_buffer.appendSliceAssumeCapacity("err"); + dbg_info_buffer.appendAssumeCapacity(0); + // DW.AT.type, DW.FORM.ref4 + const index = dbg_info_buffer.items.len; + try dbg_info_buffer.resize(index + 4); + try self.addTypeRelocGlobal(atom_index, error_ty, @intCast(u32, index)); + // DW.AT.data_member_location, DW.FORM.sdata + try leb128.writeULEB128(dbg_info_buffer.writer(), error_off); + } // DW.AT.structure_type delimit children try dbg_info_buffer.append(0); diff --git a/test/behavior/error.zig b/test/behavior/error.zig index 0cd9be05ca..91b5561d62 100644 --- a/test/behavior/error.zig +++ b/test/behavior/error.zig @@ -757,7 +757,6 @@ test "error union of noreturn used with if" { if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO NoReturn.a = 64; @@ -772,7 +771,6 @@ test "error union of noreturn used with try" { if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO NoReturn.a = 64; @@ -784,7 +782,6 @@ test "error union of noreturn used with catch" { if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO NoReturn.a = 64; From 05580b9453e4ae2d9b62fe4178651937d8b73989 Mon Sep 17 00:00:00 2001 From: Jacob Young Date: Sun, 7 May 2023 03:14:31 -0400 Subject: [PATCH 12/20] x86_64: implement float cast from `f16` to `f64` --- src/arch/x86_64/CodeGen.zig | 95 ++++++++++++----- src/arch/x86_64/Encoding.zig | 163 +++++++++++++++++------------ src/arch/x86_64/Lower.zig | 4 + src/arch/x86_64/Mir.zig | 8 ++ src/arch/x86_64/encoder.zig | 33 ++---- src/arch/x86_64/encodings.zig | 189 ++++++++++++++++++---------------- test/behavior/floatop.zig | 3 +- 7 files changed, 288 insertions(+), 207 deletions(-) diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index 3e47ef63f6..38497400f2 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -2287,26 +2287,46 @@ fn airFptrunc(self: *Self, inst: Air.Inst.Index) !void { src_mcv else try self.copyToRegisterWithInstTracking(inst, dst_ty, src_mcv); - const dst_lock = self.register_manager.lockReg(dst_mcv.register); + const dst_reg = dst_mcv.getReg().?.to128(); + const dst_lock = self.register_manager.lockReg(dst_reg); defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); - if (src_bits == 32 and dst_bits == 16 and self.hasFeature(.f16c)) - try self.asmRegisterRegisterImmediate( - .vcvtps2ph, - dst_mcv.register, - if (src_mcv.isRegister()) src_mcv.getReg().? else src_reg: { - const src_reg = dst_mcv.register; - try self.genSetReg(src_reg, src_ty, src_mcv); - break :src_reg src_reg; + if (dst_bits == 16 and self.hasFeature(.f16c)) { + switch (src_bits) { + 32 => { + const mat_src_reg = if (src_mcv.isRegister()) + src_mcv.getReg().? + else + try self.copyToTmpRegister(src_ty, src_mcv); + try self.asmRegisterRegisterImmediate( + .vcvtps2ph, + dst_reg, + mat_src_reg.to128(), + Immediate.u(0b1_00), + ); }, - Immediate.u(0b1_00), - ) - else if (src_bits == 64 and dst_bits == 32) - try self.genBinOpMir(.cvtsd2ss, src_ty, dst_mcv, src_mcv) - else - return self.fail("TODO implement airFptrunc from {} to {}", .{ - src_ty.fmt(self.bin_file.options.module.?), dst_ty.fmt(self.bin_file.options.module.?), - }); + else => return self.fail("TODO implement airFptrunc from {} to {}", .{ + src_ty.fmt(self.bin_file.options.module.?), dst_ty.fmt(self.bin_file.options.module.?), + }), + } + } else if (src_bits == 64 and dst_bits == 32) { + if (self.hasFeature(.avx)) if (src_mcv.isRegister()) try self.asmRegisterRegisterRegister( + .vcvtsd2ss, + dst_reg, + dst_reg, + src_mcv.getReg().?.to128(), + ) else try self.asmRegisterRegisterMemory( + .vcvtsd2ss, + dst_reg, + dst_reg, + src_mcv.mem(.qword), + ) else if (src_mcv.isRegister()) + try self.asmRegisterRegister(.cvtsd2ss, dst_reg, src_mcv.getReg().?.to128()) + else + try self.asmRegisterMemory(.cvtsd2ss, dst_reg, src_mcv.mem(.qword)); + } else return self.fail("TODO implement airFptrunc from {} to {}", .{ + src_ty.fmt(self.bin_file.options.module.?), dst_ty.fmt(self.bin_file.options.module.?), + }); return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none }); } @@ -2322,22 +2342,41 @@ fn airFpext(self: *Self, inst: Air.Inst.Index) !void { src_mcv else try self.copyToRegisterWithInstTracking(inst, dst_ty, src_mcv); - const dst_lock = self.register_manager.lockReg(dst_mcv.register); + const dst_reg = dst_mcv.getReg().?.to128(); + const dst_lock = self.register_manager.lockReg(dst_reg); defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); - try self.genBinOpMir( - if (src_bits == 16 and dst_bits == 32 and self.hasFeature(.f16c)) - .vcvtph2ps - else if (src_bits == 32 and dst_bits == 64) - .cvtss2sd + if (src_bits == 16 and self.hasFeature(.f16c)) { + const mat_src_reg = if (src_mcv.isRegister()) + src_mcv.getReg().? else - return self.fail("TODO implement airFpext from {} to {}", .{ + try self.copyToTmpRegister(src_ty, src_mcv); + try self.asmRegisterRegister(.vcvtph2ps, dst_reg, mat_src_reg.to128()); + switch (dst_bits) { + 32 => {}, + 64 => try self.asmRegisterRegisterRegister(.vcvtss2sd, dst_reg, dst_reg, dst_reg), + else => return self.fail("TODO implement airFpext from {} to {}", .{ src_ty.fmt(self.bin_file.options.module.?), dst_ty.fmt(self.bin_file.options.module.?), }), - src_ty, - dst_mcv, - src_mcv, - ); + } + } else if (src_bits == 32 and dst_bits == 64) { + if (self.hasFeature(.avx)) if (src_mcv.isRegister()) try self.asmRegisterRegisterRegister( + .vcvtss2sd, + dst_reg, + dst_reg, + src_mcv.getReg().?.to128(), + ) else try self.asmRegisterRegisterMemory( + .vcvtss2sd, + dst_reg, + dst_reg, + src_mcv.mem(.dword), + ) else if (src_mcv.isRegister()) + try self.asmRegisterRegister(.cvtss2sd, dst_reg, src_mcv.getReg().?.to128()) + else + try self.asmRegisterMemory(.cvtss2sd, dst_reg, src_mcv.mem(.dword)); + } else return self.fail("TODO implement airFpext from {} to {}", .{ + src_ty.fmt(self.bin_file.options.module.?), dst_ty.fmt(self.bin_file.options.module.?), + }); return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none }); } diff --git a/src/arch/x86_64/Encoding.zig b/src/arch/x86_64/Encoding.zig index 1fd1112aaf..bd6e70c975 100644 --- a/src/arch/x86_64/Encoding.zig +++ b/src/arch/x86_64/Encoding.zig @@ -89,30 +89,13 @@ pub fn findByOpcode(opc: []const u8, prefixes: struct { if (modrm_ext) |ext| if (ext != data.modrm_ext) continue; if (!std.mem.eql(u8, opc, enc.opcode())) continue; if (prefixes.rex.w) { - switch (data.mode) { - .none, .short, .rex, .rex_short, .vex_128, .vex_256 => continue, - .long, .vex_128_long, .vex_256_long => {}, - } + if (!data.mode.isLong()) continue; } else if (prefixes.rex.present and !prefixes.rex.isSet()) { - switch (data.mode) { - .rex, .rex_short => {}, - else => continue, - } + if (!data.mode.isRex()) continue; } else if (prefixes.legacy.prefix_66) { - switch (data.mode) { - .short, .rex_short => {}, - .none, .rex, .vex_128, .vex_256 => continue, - .long, .vex_128_long, .vex_256_long => continue, - } + if (!data.mode.isShort()) continue; } else { - switch (data.mode) { - .none => switch (data.mode) { - .short, .rex_short => continue, - .none, .rex, .vex_128, .vex_256 => {}, - .long, .vex_128_long, .vex_256_long => {}, - }, - else => continue, - } + if (data.mode.isShort()) continue; } return enc; }; @@ -148,50 +131,39 @@ pub fn format( _ = fmt; var opc = encoding.opcode(); - switch (encoding.data.mode) { - else => {}, - .long => try writer.writeAll("REX.W + "), - .vex_128, .vex_128_long, .vex_256, .vex_256_long => { - try writer.writeAll("VEX."); + if (encoding.data.mode.isVex()) { + try writer.writeAll("VEX."); - switch (encoding.data.mode) { - .vex_128, .vex_128_long => try writer.writeAll("128"), - .vex_256, .vex_256_long => try writer.writeAll("256"), - else => unreachable, - } + try writer.writeAll(switch (encoding.data.mode) { + .vex_128_w0, .vex_128_w1, .vex_128_wig => "128", + .vex_256_w0, .vex_256_w1, .vex_256_wig => "256", + .vex_lig_w0, .vex_lig_w1, .vex_lig_wig => "LIG", + .vex_lz_w0, .vex_lz_w1, .vex_lz_wig => "LZ", + else => unreachable, + }); - switch (opc[0]) { - else => {}, - 0x66, 0xf3, 0xf2 => { - try writer.print(".{X:0>2}", .{opc[0]}); - opc = opc[1..]; - }, - } + switch (opc[0]) { + else => {}, + 0x66, 0xf3, 0xf2 => { + try writer.print(".{X:0>2}", .{opc[0]}); + opc = opc[1..]; + }, + } - try writer.print(".{X:0>2}", .{opc[0]}); - opc = opc[1..]; + try writer.print(".{}", .{std.fmt.fmtSliceHexUpper(opc[0 .. opc.len - 1])}); + opc = opc[opc.len - 1 ..]; - switch (opc[0]) { - else => {}, - 0x38, 0x3A => { - try writer.print("{X:0>2}", .{opc[0]}); - opc = opc[1..]; - }, - } + try writer.writeAll(".W"); + try writer.writeAll(switch (encoding.data.mode) { + .vex_128_w0, .vex_256_w0, .vex_lig_w0, .vex_lz_w0 => "0", + .vex_128_w1, .vex_256_w1, .vex_lig_w1, .vex_lz_w1 => "1", + .vex_128_wig, .vex_256_wig, .vex_lig_wig, .vex_lz_wig => "IG", + else => unreachable, + }); - try writer.writeByte('.'); - try writer.writeAll(switch (encoding.data.mode) { - .vex_128, .vex_256 => "W0", - .vex_128_long, .vex_256_long => "W1", - else => unreachable, - }); - try writer.writeByte(' '); - }, - } - - for (opc) |byte| { - try writer.print("{x:0>2} ", .{byte}); - } + try writer.writeByte(' '); + } else if (encoding.data.mode.isLong()) try writer.writeAll("REX.W + "); + for (opc) |byte| try writer.print("{x:0>2} ", .{byte}); switch (encoding.data.op_en) { .np, .fd, .td, .i, .zi, .d => {}, @@ -332,6 +304,7 @@ pub const Mnemonic = enum { // SSE4.1 roundsd, roundss, // AVX + vcvtsd2ss, vcvtsi2sd, vcvtsi2ss, vcvtss2sd, vmovapd, vmovaps, vmovddup, vmovsd, @@ -629,20 +602,74 @@ pub const Op = enum { }; pub const Mode = enum { + // zig fmt: off none, - short, - long, - rex, - rex_short, - vex_128, - vex_128_long, - vex_256, - vex_256_long, + short, long, + rex, rex_short, + vex_128_w0, vex_128_w1, vex_128_wig, + vex_256_w0, vex_256_w1, vex_256_wig, + vex_lig_w0, vex_lig_w1, vex_lig_wig, + vex_lz_w0, vex_lz_w1, vex_lz_wig, + // zig fmt: on + + pub fn isShort(mode: Mode) bool { + return switch (mode) { + .short, .rex_short => true, + else => false, + }; + } + + pub fn isLong(mode: Mode) bool { + return switch (mode) { + .long, + .vex_128_w1, + .vex_256_w1, + .vex_lig_w1, + .vex_lz_w1, + => true, + else => false, + }; + } + + pub fn isRex(mode: Mode) bool { + return switch (mode) { + else => false, + .rex, .rex_short => true, + }; + } + + pub fn isVex(mode: Mode) bool { + return switch (mode) { + // zig fmt: off + else => false, + .vex_128_w0, .vex_128_w1, .vex_128_wig, + .vex_256_w0, .vex_256_w1, .vex_256_wig, + .vex_lig_w0, .vex_lig_w1, .vex_lig_wig, + .vex_lz_w0, .vex_lz_w1, .vex_lz_wig, + => true, + // zig fmt: on + }; + } + + pub fn isVecLong(mode: Mode) bool { + return switch (mode) { + // zig fmt: off + else => unreachable, + .vex_128_w0, .vex_128_w1, .vex_128_wig, + .vex_lig_w0, .vex_lig_w1, .vex_lig_wig, + .vex_lz_w0, .vex_lz_w1, .vex_lz_wig, + => false, + .vex_256_w0, .vex_256_w1, .vex_256_wig, + => true, + // zig fmt: on + }; + } }; pub const Feature = enum { none, avx, + avx2, f16c, fma, sse, diff --git a/src/arch/x86_64/Lower.zig b/src/arch/x86_64/Lower.zig index a246a97d4b..40a5ccdb10 100644 --- a/src/arch/x86_64/Lower.zig +++ b/src/arch/x86_64/Lower.zig @@ -184,6 +184,10 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct { .roundsd, .roundss, + .vcvtsd2ss, + .vcvtsi2sd, + .vcvtsi2ss, + .vcvtss2sd, .vmovapd, .vmovaps, .vmovddup, diff --git a/src/arch/x86_64/Mir.zig b/src/arch/x86_64/Mir.zig index de7f2cff53..cb1a578bb6 100644 --- a/src/arch/x86_64/Mir.zig +++ b/src/arch/x86_64/Mir.zig @@ -282,6 +282,14 @@ pub const Inst = struct { /// Round scalar single-precision floating-point values roundss, + /// Convert scalar double-precision floating-point value to scalar single-precision floating-point value + vcvtsd2ss, + /// Convert doubleword integer to scalar double-precision floating-point value + vcvtsi2sd, + /// Convert doubleword integer to scalar single-precision floating-point value + vcvtsi2ss, + /// Convert scalar single-precision floating-point value to scalar double-precision floating-point value + vcvtss2sd, /// Move aligned packed double-precision floating-point values vmovapd, /// Move aligned packed single-precision floating-point values diff --git a/src/arch/x86_64/encoder.zig b/src/arch/x86_64/encoder.zig index fa6ce676cb..0ce875240d 100644 --- a/src/arch/x86_64/encoder.zig +++ b/src/arch/x86_64/encoder.zig @@ -206,18 +206,15 @@ pub const Instruction = struct { const enc = inst.encoding; const data = enc.data; - switch (data.mode) { - .none, .short, .long, .rex, .rex_short => { - try inst.encodeLegacyPrefixes(encoder); - try inst.encodeMandatoryPrefix(encoder); - try inst.encodeRexPrefix(encoder); - try inst.encodeOpcode(encoder); - }, - .vex_128, .vex_128_long, .vex_256, .vex_256_long => { - try inst.encodeVexPrefix(encoder); - const opc = inst.encoding.opcode(); - try encoder.opcode_1byte(opc[opc.len - 1]); - }, + if (data.mode.isVex()) { + try inst.encodeVexPrefix(encoder); + const opc = inst.encoding.opcode(); + try encoder.opcode_1byte(opc[opc.len - 1]); + } else { + try inst.encodeLegacyPrefixes(encoder); + try inst.encodeMandatoryPrefix(encoder); + try inst.encodeRexPrefix(encoder); + try inst.encodeOpcode(encoder); } switch (data.op_en) { @@ -365,11 +362,7 @@ pub const Instruction = struct { var vex = Vex{}; - vex.w = switch (inst.encoding.data.mode) { - .vex_128, .vex_256 => false, - .vex_128_long, .vex_256_long => true, - else => unreachable, - }; + vex.w = inst.encoding.data.mode.isLong(); switch (op_en) { .np, .i, .zi, .fd, .td, .d => {}, @@ -395,11 +388,7 @@ pub const Instruction = struct { }, } - vex.l = switch (inst.encoding.data.mode) { - .vex_128, .vex_128_long => false, - .vex_256, .vex_256_long => true, - else => unreachable, - }; + vex.l = inst.encoding.data.mode.isVecLong(); vex.p = if (mand_pre) |mand| switch (mand) { 0x66 => .@"66", diff --git a/src/arch/x86_64/encodings.zig b/src/arch/x86_64/encodings.zig index 5096ca5627..5e4dc2f04b 100644 --- a/src/arch/x86_64/encodings.zig +++ b/src/arch/x86_64/encodings.zig @@ -918,7 +918,6 @@ pub const table = [_]Entry{ .{ .orpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x56 }, 0, .none, .sse2 }, .{ .pextrw, .rmi, &.{ .r32, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0xc5 }, 0, .none, .sse2 }, - .{ .pextrw, .rmi, &.{ .r64, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0xc5 }, 0, .long, .sse2 }, .{ .pinsrw, .rmi, &.{ .xmm, .r32_m16, .imm8 }, &.{ 0x66, 0x0f, 0xc4 }, 0, .none, .sse2 }, @@ -926,31 +925,23 @@ pub const table = [_]Entry{ .{ .pshuflw, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0xf2, 0x0f, 0x70 }, 0, .none, .sse2 }, + .{ .psrlw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd1 }, 0, .none, .sse2 }, + .{ .psrlw, .mi, &.{ .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x71 }, 2, .none, .sse2 }, .{ .psrld, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd2 }, 0, .none, .sse2 }, .{ .psrld, .mi, &.{ .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x72 }, 2, .none, .sse2 }, - .{ .psrlq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd3 }, 0, .none, .sse2 }, .{ .psrlq, .mi, &.{ .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x73 }, 2, .none, .sse2 }, - .{ .psrlw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd1 }, 0, .none, .sse2 }, - .{ .psrlw, .mi, &.{ .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x71 }, 2, .none, .sse2 }, - - .{ .punpckhbw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x68 }, 0, .none, .sse2 }, - - .{ .punpckhdq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x6a }, 0, .none, .sse2 }, - + .{ .punpckhbw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x68 }, 0, .none, .sse2 }, + .{ .punpckhwd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x69 }, 0, .none, .sse2 }, + .{ .punpckhdq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x6a }, 0, .none, .sse2 }, .{ .punpckhqdq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x6d }, 0, .none, .sse2 }, - .{ .punpckhwd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x69 }, 0, .none, .sse2 }, - - .{ .punpcklbw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x60 }, 0, .none, .sse2 }, - - .{ .punpckldq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x62 }, 0, .none, .sse2 }, - + .{ .punpcklbw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x60 }, 0, .none, .sse2 }, + .{ .punpcklwd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x61 }, 0, .none, .sse2 }, + .{ .punpckldq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x62 }, 0, .none, .sse2 }, .{ .punpcklqdq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x6c }, 0, .none, .sse2 }, - .{ .punpcklwd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x61 }, 0, .none, .sse2 }, - .{ .sqrtpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x51 }, 0, .none, .sse2 }, .{ .sqrtsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x51 }, 0, .none, .sse2 }, @@ -972,106 +963,128 @@ pub const table = [_]Entry{ // SSE4.1 .{ .pextrw, .mri, &.{ .r32_m16, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x15 }, 0, .none, .sse4_1 }, - .{ .pextrw, .mri, &.{ .r64_m16, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x15 }, 0, .long, .sse4_1 }, .{ .roundss, .rmi, &.{ .xmm, .xmm_m32, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0a }, 0, .none, .sse4_1 }, .{ .roundsd, .rmi, &.{ .xmm, .xmm_m64, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0b }, 0, .none, .sse4_1 }, // AVX - .{ .vmovapd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x28 }, 0, .vex_128, .avx }, - .{ .vmovapd, .mr, &.{ .xmm_m128, .xmm }, &.{ 0x66, 0x0f, 0x29 }, 0, .vex_128, .avx }, - .{ .vmovapd, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x28 }, 0, .vex_256, .avx }, - .{ .vmovapd, .mr, &.{ .ymm_m256, .ymm }, &.{ 0x66, 0x0f, 0x29 }, 0, .vex_256, .avx }, + .{ .vcvtsd2ss, .rvm, &.{ .xmm, .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5a }, 0, .vex_lig_wig, .avx }, - .{ .vmovaps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x28 }, 0, .vex_128, .avx }, - .{ .vmovaps, .mr, &.{ .xmm_m128, .xmm }, &.{ 0x0f, 0x29 }, 0, .vex_128, .avx }, - .{ .vmovaps, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x0f, 0x28 }, 0, .vex_256, .avx }, - .{ .vmovaps, .mr, &.{ .ymm_m256, .ymm }, &.{ 0x0f, 0x29 }, 0, .vex_256, .avx }, + .{ .vcvtsi2sd, .rvm, &.{ .xmm, .xmm, .rm32 }, &.{ 0xf2, 0x0f, 0x2a }, 0, .vex_lig_w0, .avx }, + .{ .vcvtsi2sd, .rvm, &.{ .xmm, .xmm, .rm64 }, &.{ 0xf2, 0x0f, 0x2a }, 0, .vex_lig_w1, .avx }, - .{ .vmovddup, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x12 }, 0, .vex_128, .avx }, + .{ .vcvtsi2ss, .rvm, &.{ .xmm, .xmm, .rm32 }, &.{ 0xf2, 0x0f, 0x2a }, 0, .vex_lig_w0, .avx }, + .{ .vcvtsi2ss, .rvm, &.{ .xmm, .xmm, .rm64 }, &.{ 0xf2, 0x0f, 0x2a }, 0, .vex_lig_w1, .avx }, - .{ .vmovsd, .rvm, &.{ .xmm, .xmm, .xmm }, &.{ 0xf2, 0x0f, 0x10 }, 0, .vex_128, .avx }, - .{ .vmovsd, .rm, &.{ .xmm, .m64 }, &.{ 0xf2, 0x0f, 0x10 }, 0, .vex_128, .avx }, - .{ .vmovsd, .mvr, &.{ .xmm, .xmm, .xmm }, &.{ 0xf2, 0x0f, 0x11 }, 0, .vex_128, .avx }, - .{ .vmovsd, .mr, &.{ .m64, .xmm }, &.{ 0xf2, 0x0f, 0x11 }, 0, .vex_128, .avx }, + .{ .vcvtss2sd, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0xf2, 0x0f, 0x5a }, 0, .vex_lig_wig, .avx }, - .{ .vmovshdup, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0xf3, 0x0f, 0x16 }, 0, .vex_128, .avx }, + .{ .vmovapd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x28 }, 0, .vex_128_wig, .avx }, + .{ .vmovapd, .mr, &.{ .xmm_m128, .xmm }, &.{ 0x66, 0x0f, 0x29 }, 0, .vex_128_wig, .avx }, + .{ .vmovapd, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x28 }, 0, .vex_256_wig, .avx }, + .{ .vmovapd, .mr, &.{ .ymm_m256, .ymm }, &.{ 0x66, 0x0f, 0x29 }, 0, .vex_256_wig, .avx }, - .{ .vmovsldup, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0xf3, 0x0f, 0x12 }, 0, .vex_128, .avx }, + .{ .vmovaps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x28 }, 0, .vex_128_wig, .avx }, + .{ .vmovaps, .mr, &.{ .xmm_m128, .xmm }, &.{ 0x0f, 0x29 }, 0, .vex_128_wig, .avx }, + .{ .vmovaps, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x0f, 0x28 }, 0, .vex_256_wig, .avx }, + .{ .vmovaps, .mr, &.{ .ymm_m256, .ymm }, &.{ 0x0f, 0x29 }, 0, .vex_256_wig, .avx }, - .{ .vmovss, .rvm, &.{ .xmm, .xmm, .xmm }, &.{ 0xf3, 0x0f, 0x10 }, 0, .vex_128, .avx }, - .{ .vmovss, .rm, &.{ .xmm, .m32 }, &.{ 0xf3, 0x0f, 0x10 }, 0, .vex_128, .avx }, - .{ .vmovss, .mvr, &.{ .xmm, .xmm, .xmm }, &.{ 0xf3, 0x0f, 0x11 }, 0, .vex_128, .avx }, - .{ .vmovss, .mr, &.{ .m32, .xmm }, &.{ 0xf3, 0x0f, 0x11 }, 0, .vex_128, .avx }, + .{ .vmovddup, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x12 }, 0, .vex_128_wig, .avx }, + .{ .vmovddup, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0xf2, 0x0f, 0x12 }, 0, .vex_256_wig, .avx }, - .{ .vmovupd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x10 }, 0, .vex_128, .avx }, - .{ .vmovupd, .mr, &.{ .xmm_m128, .xmm }, &.{ 0x66, 0x0f, 0x11 }, 0, .vex_128, .avx }, - .{ .vmovupd, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x10 }, 0, .vex_256, .avx }, - .{ .vmovupd, .mr, &.{ .ymm_m256, .ymm }, &.{ 0x66, 0x0f, 0x11 }, 0, .vex_256, .avx }, + .{ .vmovsd, .rvm, &.{ .xmm, .xmm, .xmm }, &.{ 0xf2, 0x0f, 0x10 }, 0, .vex_lig_wig, .avx }, + .{ .vmovsd, .rm, &.{ .xmm, .m64 }, &.{ 0xf2, 0x0f, 0x10 }, 0, .vex_lig_wig, .avx }, + .{ .vmovsd, .mvr, &.{ .xmm, .xmm, .xmm }, &.{ 0xf2, 0x0f, 0x11 }, 0, .vex_lig_wig, .avx }, + .{ .vmovsd, .mr, &.{ .m64, .xmm }, &.{ 0xf2, 0x0f, 0x11 }, 0, .vex_lig_wig, .avx }, - .{ .vmovups, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x10 }, 0, .vex_128, .avx }, - .{ .vmovups, .mr, &.{ .xmm_m128, .xmm }, &.{ 0x0f, 0x11 }, 0, .vex_128, .avx }, - .{ .vmovups, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x0f, 0x10 }, 0, .vex_256, .avx }, - .{ .vmovups, .mr, &.{ .ymm_m256, .ymm }, &.{ 0x0f, 0x11 }, 0, .vex_256, .avx }, + .{ .vmovshdup, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0xf3, 0x0f, 0x16 }, 0, .vex_128_wig, .avx }, + .{ .vmovshdup, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0xf3, 0x0f, 0x16 }, 0, .vex_256_wig, .avx }, - .{ .vpextrw, .mri, &.{ .r32, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x15 }, 0, .vex_128, .avx }, - .{ .vpextrw, .mri, &.{ .r64, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x15 }, 0, .vex_128_long, .avx }, - .{ .vpextrw, .mri, &.{ .r32_m16, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x15 }, 0, .vex_128, .avx }, - .{ .vpextrw, .mri, &.{ .r64_m16, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x15 }, 0, .vex_128_long, .avx }, + .{ .vmovsldup, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0xf3, 0x0f, 0x12 }, 0, .vex_128_wig, .avx }, + .{ .vmovsldup, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0xf3, 0x0f, 0x12 }, 0, .vex_256_wig, .avx }, - .{ .vpinsrw, .rvmi, &.{ .xmm, .xmm, .r32_m16, .imm8 }, &.{ 0x66, 0x0f, 0xc4 }, 0, .vex_128, .avx }, + .{ .vmovss, .rvm, &.{ .xmm, .xmm, .xmm }, &.{ 0xf3, 0x0f, 0x10 }, 0, .vex_lig_wig, .avx }, + .{ .vmovss, .rm, &.{ .xmm, .m32 }, &.{ 0xf3, 0x0f, 0x10 }, 0, .vex_lig_wig, .avx }, + .{ .vmovss, .mvr, &.{ .xmm, .xmm, .xmm }, &.{ 0xf3, 0x0f, 0x11 }, 0, .vex_lig_wig, .avx }, + .{ .vmovss, .mr, &.{ .m32, .xmm }, &.{ 0xf3, 0x0f, 0x11 }, 0, .vex_lig_wig, .avx }, - .{ .vpsrld, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd2 }, 0, .vex_128, .avx }, - .{ .vpsrld, .vmi, &.{ .xmm, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x72 }, 2, .vex_128, .avx }, + .{ .vmovupd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x10 }, 0, .vex_128_wig, .avx }, + .{ .vmovupd, .mr, &.{ .xmm_m128, .xmm }, &.{ 0x66, 0x0f, 0x11 }, 0, .vex_128_wig, .avx }, + .{ .vmovupd, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x10 }, 0, .vex_256_wig, .avx }, + .{ .vmovupd, .mr, &.{ .ymm_m256, .ymm }, &.{ 0x66, 0x0f, 0x11 }, 0, .vex_256_wig, .avx }, - .{ .vpsrlq, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd3 }, 0, .vex_128, .avx }, - .{ .vpsrlq, .vmi, &.{ .xmm, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x73 }, 2, .vex_128, .avx }, + .{ .vmovups, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x10 }, 0, .vex_128_wig, .avx }, + .{ .vmovups, .mr, &.{ .xmm_m128, .xmm }, &.{ 0x0f, 0x11 }, 0, .vex_128_wig, .avx }, + .{ .vmovups, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x0f, 0x10 }, 0, .vex_256_wig, .avx }, + .{ .vmovups, .mr, &.{ .ymm_m256, .ymm }, &.{ 0x0f, 0x11 }, 0, .vex_256_wig, .avx }, - .{ .vpsrlw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd1 }, 0, .vex_128, .avx }, - .{ .vpsrlw, .vmi, &.{ .xmm, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x71 }, 2, .vex_128, .avx }, + .{ .vpextrw, .rmi, &.{ .r32, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x15 }, 0, .vex_128_wig, .avx }, + .{ .vpextrw, .mri, &.{ .r32_m16, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x15 }, 0, .vex_128_wig, .avx }, - .{ .vpunpckhbw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x68 }, 0, .vex_128, .avx }, + .{ .vpinsrw, .rvmi, &.{ .xmm, .xmm, .r32_m16, .imm8 }, &.{ 0x66, 0x0f, 0xc4 }, 0, .vex_128_wig, .avx }, - .{ .vpunpckhdq, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x6a }, 0, .vex_128, .avx }, + .{ .vpsrlw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd1 }, 0, .vex_128_wig, .avx }, + .{ .vpsrlw, .vmi, &.{ .xmm, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x71 }, 2, .vex_128_wig, .avx }, + .{ .vpsrld, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd2 }, 0, .vex_128_wig, .avx }, + .{ .vpsrld, .vmi, &.{ .xmm, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x72 }, 2, .vex_128_wig, .avx }, + .{ .vpsrlq, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd3 }, 0, .vex_128_wig, .avx }, + .{ .vpsrlq, .vmi, &.{ .xmm, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x73 }, 2, .vex_128_wig, .avx }, - .{ .vpunpckhqdq, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x6d }, 0, .vex_128, .avx }, + .{ .vpunpckhbw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x68 }, 0, .vex_128_wig, .avx }, + .{ .vpunpckhwd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x69 }, 0, .vex_128_wig, .avx }, + .{ .vpunpckhdq, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x6a }, 0, .vex_128_wig, .avx }, + .{ .vpunpckhqdq, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x6d }, 0, .vex_128_wig, .avx }, - .{ .vpunpckhwd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x69 }, 0, .vex_128, .avx }, - - .{ .vpunpcklbw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x60 }, 0, .vex_128, .avx }, - - .{ .vpunpckldq, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x62 }, 0, .vex_128, .avx }, - - .{ .vpunpcklqdq, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x6c }, 0, .vex_128, .avx }, - - .{ .vpunpcklwd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x61 }, 0, .vex_128, .avx }, + .{ .vpunpcklbw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x60 }, 0, .vex_128_wig, .avx }, + .{ .vpunpcklwd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x61 }, 0, .vex_128_wig, .avx }, + .{ .vpunpckldq, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x62 }, 0, .vex_128_wig, .avx }, + .{ .vpunpcklqdq, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x6c }, 0, .vex_128_wig, .avx }, // F16C - .{ .vcvtph2ps, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0x13 }, 0, .vex_128, .f16c }, + .{ .vcvtph2ps, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0x13 }, 0, .vex_128_w0, .f16c }, + .{ .vcvtph2ps, .rm, &.{ .ymm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x13 }, 0, .vex_256_w0, .f16c }, - .{ .vcvtps2ph, .mri, &.{ .xmm_m64, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x1d }, 0, .vex_128, .f16c }, + .{ .vcvtps2ph, .mri, &.{ .xmm_m64, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x1d }, 0, .vex_128_w0, .f16c }, + .{ .vcvtps2ph, .mri, &.{ .xmm_m128, .ymm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x1d }, 0, .vex_256_w0, .f16c }, // FMA - .{ .vfmadd132pd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x98 }, 0, .vex_128_long, .fma }, - .{ .vfmadd132pd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x98 }, 0, .vex_256_long, .fma }, - .{ .vfmadd213pd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0xa8 }, 0, .vex_128_long, .fma }, - .{ .vfmadd213pd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0xa8 }, 0, .vex_256_long, .fma }, - .{ .vfmadd231pd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0xb8 }, 0, .vex_128_long, .fma }, - .{ .vfmadd231pd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0xb8 }, 0, .vex_256_long, .fma }, + .{ .vfmadd132pd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x98 }, 0, .vex_128_w1, .fma }, + .{ .vfmadd213pd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0xa8 }, 0, .vex_128_w1, .fma }, + .{ .vfmadd231pd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0xb8 }, 0, .vex_128_w1, .fma }, + .{ .vfmadd132pd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x98 }, 0, .vex_256_w1, .fma }, + .{ .vfmadd213pd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0xa8 }, 0, .vex_256_w1, .fma }, + .{ .vfmadd231pd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0xb8 }, 0, .vex_256_w1, .fma }, - .{ .vfmadd132ps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x98 }, 0, .vex_128, .fma }, - .{ .vfmadd132ps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x98 }, 0, .vex_256, .fma }, - .{ .vfmadd213ps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0xa8 }, 0, .vex_128, .fma }, - .{ .vfmadd213ps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0xa8 }, 0, .vex_256, .fma }, - .{ .vfmadd231ps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0xb8 }, 0, .vex_128, .fma }, - .{ .vfmadd231ps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0xb8 }, 0, .vex_256, .fma }, + .{ .vfmadd132ps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x98 }, 0, .vex_128_w0, .fma }, + .{ .vfmadd213ps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0xa8 }, 0, .vex_128_w0, .fma }, + .{ .vfmadd231ps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0xb8 }, 0, .vex_128_w0, .fma }, + .{ .vfmadd132ps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x98 }, 0, .vex_256_w0, .fma }, + .{ .vfmadd213ps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0xa8 }, 0, .vex_256_w0, .fma }, + .{ .vfmadd231ps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0xb8 }, 0, .vex_256_w0, .fma }, - .{ .vfmadd132sd, .rvm, &.{ .xmm, .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0x99 }, 0, .vex_128_long, .fma }, - .{ .vfmadd213sd, .rvm, &.{ .xmm, .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0xa9 }, 0, .vex_128_long, .fma }, - .{ .vfmadd231sd, .rvm, &.{ .xmm, .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0xb9 }, 0, .vex_128_long, .fma }, + .{ .vfmadd132sd, .rvm, &.{ .xmm, .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0x99 }, 0, .vex_lig_w1, .fma }, + .{ .vfmadd213sd, .rvm, &.{ .xmm, .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0xa9 }, 0, .vex_lig_w1, .fma }, + .{ .vfmadd231sd, .rvm, &.{ .xmm, .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0xb9 }, 0, .vex_lig_w1, .fma }, - .{ .vfmadd132ss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0x66, 0x0f, 0x38, 0x99 }, 0, .vex_128, .fma }, - .{ .vfmadd213ss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0x66, 0x0f, 0x38, 0xa9 }, 0, .vex_128, .fma }, - .{ .vfmadd231ss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0x66, 0x0f, 0x38, 0xb9 }, 0, .vex_128, .fma }, + .{ .vfmadd132ss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0x66, 0x0f, 0x38, 0x99 }, 0, .vex_lig_w0, .fma }, + .{ .vfmadd213ss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0x66, 0x0f, 0x38, 0xa9 }, 0, .vex_lig_w0, .fma }, + .{ .vfmadd231ss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0x66, 0x0f, 0x38, 0xb9 }, 0, .vex_lig_w0, .fma }, + + // AVX2 + .{ .vpsrlw, .rvm, &.{ .ymm, .ymm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd1 }, 0, .vex_256_wig, .avx2 }, + .{ .vpsrlw, .vmi, &.{ .ymm, .ymm, .imm8 }, &.{ 0x66, 0x0f, 0x71 }, 2, .vex_256_wig, .avx2 }, + .{ .vpsrld, .rvm, &.{ .ymm, .ymm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd2 }, 0, .vex_256_wig, .avx2 }, + .{ .vpsrld, .vmi, &.{ .ymm, .ymm, .imm8 }, &.{ 0x66, 0x0f, 0x72 }, 2, .vex_256_wig, .avx2 }, + .{ .vpsrlq, .rvm, &.{ .ymm, .ymm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd3 }, 0, .vex_256_wig, .avx2 }, + .{ .vpsrlq, .vmi, &.{ .ymm, .ymm, .imm8 }, &.{ 0x66, 0x0f, 0x73 }, 2, .vex_256_wig, .avx2 }, + + .{ .vpunpckhbw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x68 }, 0, .vex_256_wig, .avx2 }, + .{ .vpunpckhwd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x69 }, 0, .vex_256_wig, .avx2 }, + .{ .vpunpckhdq, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x6a }, 0, .vex_256_wig, .avx2 }, + .{ .vpunpckhqdq, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x6d }, 0, .vex_256_wig, .avx2 }, + + .{ .vpunpcklbw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x60 }, 0, .vex_256_wig, .avx2 }, + .{ .vpunpcklwd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x61 }, 0, .vex_256_wig, .avx2 }, + .{ .vpunpckldq, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x62 }, 0, .vex_256_wig, .avx2 }, + .{ .vpunpcklqdq, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x6c }, 0, .vex_256_wig, .avx2 }, }; // zig fmt: on diff --git a/test/behavior/floatop.zig b/test/behavior/floatop.zig index b98d782da1..ec24407d9f 100644 --- a/test/behavior/floatop.zig +++ b/test/behavior/floatop.zig @@ -52,7 +52,8 @@ fn testFloatComparisons() !void { } test "different sized float comparisons" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO + if (builtin.zig_backend == .stage2_x86_64 and + !comptime std.Target.x86.featureSetHas(builtin.cpu.features, .f16c)) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO From 5c5da179fb930c9d8be9366a851eb4a36f4044f1 Mon Sep 17 00:00:00 2001 From: Jacob Young Date: Sun, 7 May 2023 03:47:56 -0400 Subject: [PATCH 13/20] x86_64: implement `@sqrt` for vectors --- src/arch/x86_64/CodeGen.zig | 225 +++++++++++++++++++++------------- src/arch/x86_64/Encoding.zig | 1 + src/arch/x86_64/Lower.zig | 4 + src/arch/x86_64/Mir.zig | 8 ++ src/arch/x86_64/encodings.zig | 18 ++- 5 files changed, 166 insertions(+), 90 deletions(-) diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index 38497400f2..19878bae17 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -4520,25 +4520,69 @@ fn airRound(self: *Self, inst: Air.Inst.Index, mode: Immediate) !void { fn airSqrt(self: *Self, inst: Air.Inst.Index) !void { const un_op = self.air.instructions.items(.data)[inst].un_op; const ty = self.air.typeOf(un_op); + const abi_size = @intCast(u32, ty.abiSize(self.target.*)); const src_mcv = try self.resolveInst(un_op); const dst_mcv = if (src_mcv.isRegister() and self.reuseOperand(inst, un_op, 0, src_mcv)) src_mcv else try self.copyToRegisterWithInstTracking(inst, ty, src_mcv); + const dst_reg = registerAlias(dst_mcv.getReg().?, abi_size); + const dst_lock = self.register_manager.lockReg(dst_reg); + defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); - try self.genBinOpMir(switch (ty.zigTypeTag()) { - .Float => switch (ty.floatBits(self.target.*)) { - 32 => .sqrtss, - 64 => .sqrtsd, - else => return self.fail("TODO implement airSqrt for {}", .{ - ty.fmt(self.bin_file.options.module.?), - }), + const tag = if (@as(?Mir.Inst.Tag, switch (ty.zigTypeTag()) { + .Float => switch (ty.childType().floatBits(self.target.*)) { + 32 => if (self.hasFeature(.avx)) .vsqrtss else .sqrtss, + 64 => if (self.hasFeature(.avx)) .vsqrtsd else .sqrtsd, + 16, 80, 128 => null, + else => unreachable, }, - else => return self.fail("TODO implement airSqrt for {}", .{ - ty.fmt(self.bin_file.options.module.?), - }), - }, ty, dst_mcv, src_mcv); + .Vector => switch (ty.childType().zigTypeTag()) { + .Float => switch (ty.childType().floatBits(self.target.*)) { + 32 => switch (ty.vectorLen()) { + 1 => if (self.hasFeature(.avx)) .vsqrtss else .sqrtss, + 2...4 => if (self.hasFeature(.avx)) .vsqrtps else .sqrtps, + 5...8 => if (self.hasFeature(.avx)) .vsqrtps else null, + else => null, + }, + 64 => switch (ty.vectorLen()) { + 1 => if (self.hasFeature(.avx)) .vsqrtsd else .sqrtsd, + 2 => if (self.hasFeature(.avx)) .vsqrtpd else .sqrtpd, + 3...4 => if (self.hasFeature(.avx)) .vsqrtpd else null, + else => null, + }, + 16, 80, 128 => null, + else => unreachable, + }, + else => unreachable, + }, + else => unreachable, + })) |tag| tag else return self.fail("TODO implement airSqrt for {}", .{ + ty.fmt(self.bin_file.options.module.?), + }); + switch (tag) { + .vsqrtss, .vsqrtsd => if (src_mcv.isRegister()) try self.asmRegisterRegisterRegister( + tag, + dst_reg, + dst_reg, + registerAlias(src_mcv.getReg().?, abi_size), + ) else try self.asmRegisterRegisterMemory( + tag, + dst_reg, + dst_reg, + src_mcv.mem(Memory.PtrSize.fromSize(abi_size)), + ), + else => if (src_mcv.isRegister()) try self.asmRegisterRegister( + tag, + dst_reg, + registerAlias(src_mcv.getReg().?, abi_size), + ) else try self.asmRegisterMemory( + tag, + dst_reg, + src_mcv.mem(Memory.PtrSize.fromSize(abi_size)), + ), + } return self.finishAir(inst, dst_mcv, .{ un_op, .none, .none }); } @@ -9544,85 +9588,92 @@ fn airMulAdd(self: *Self, inst: Air.Inst.Index) !void { lock.* = self.register_manager.lockRegAssumeUnused(reg); } - const tag: ?Mir.Inst.Tag = + const tag = if (@as( + ?Mir.Inst.Tag, if (mem.eql(u2, &order, &.{ 1, 3, 2 }) or mem.eql(u2, &order, &.{ 3, 1, 2 })) - switch (ty.zigTypeTag()) { - .Float => switch (ty.floatBits(self.target.*)) { - 32 => .vfmadd132ss, - 64 => .vfmadd132sd, - else => null, - }, - .Vector => switch (ty.childType().zigTypeTag()) { - .Float => switch (ty.childType().floatBits(self.target.*)) { - 32 => switch (ty.vectorLen()) { - 1 => .vfmadd132ss, - 2...8 => .vfmadd132ps, - else => null, - }, - 64 => switch (ty.vectorLen()) { - 1 => .vfmadd132sd, - 2...4 => .vfmadd132pd, - else => null, - }, - else => null, + switch (ty.zigTypeTag()) { + .Float => switch (ty.floatBits(self.target.*)) { + 32 => .vfmadd132ss, + 64 => .vfmadd132sd, + 16, 80, 128 => null, + else => unreachable, }, - else => null, - }, - else => unreachable, - } - else if (mem.eql(u2, &order, &.{ 2, 1, 3 }) or mem.eql(u2, &order, &.{ 1, 2, 3 })) - switch (ty.zigTypeTag()) { - .Float => switch (ty.floatBits(self.target.*)) { - 32 => .vfmadd213ss, - 64 => .vfmadd213sd, - else => null, - }, - .Vector => switch (ty.childType().zigTypeTag()) { - .Float => switch (ty.childType().floatBits(self.target.*)) { - 32 => switch (ty.vectorLen()) { - 1 => .vfmadd213ss, - 2...8 => .vfmadd213ps, - else => null, + .Vector => switch (ty.childType().zigTypeTag()) { + .Float => switch (ty.childType().floatBits(self.target.*)) { + 32 => switch (ty.vectorLen()) { + 1 => .vfmadd132ss, + 2...8 => .vfmadd132ps, + else => null, + }, + 64 => switch (ty.vectorLen()) { + 1 => .vfmadd132sd, + 2...4 => .vfmadd132pd, + else => null, + }, + 16, 80, 128 => null, + else => unreachable, }, - 64 => switch (ty.vectorLen()) { - 1 => .vfmadd213sd, - 2...4 => .vfmadd213pd, - else => null, - }, - else => null, + else => unreachable, }, - else => null, - }, - else => unreachable, - } - else if (mem.eql(u2, &order, &.{ 2, 3, 1 }) or mem.eql(u2, &order, &.{ 3, 2, 1 })) - switch (ty.zigTypeTag()) { - .Float => switch (ty.floatBits(self.target.*)) { - 32 => .vfmadd231ss, - 64 => .vfmadd231sd, - else => null, - }, - .Vector => switch (ty.childType().zigTypeTag()) { - .Float => switch (ty.childType().floatBits(self.target.*)) { - 32 => switch (ty.vectorLen()) { - 1 => .vfmadd231ss, - 2...8 => .vfmadd231ps, - else => null, - }, - 64 => switch (ty.vectorLen()) { - 1 => .vfmadd231sd, - 2...4 => .vfmadd231pd, - else => null, - }, - else => null, + else => unreachable, + } + else if (mem.eql(u2, &order, &.{ 2, 1, 3 }) or mem.eql(u2, &order, &.{ 1, 2, 3 })) + switch (ty.zigTypeTag()) { + .Float => switch (ty.floatBits(self.target.*)) { + 32 => .vfmadd213ss, + 64 => .vfmadd213sd, + 16, 80, 128 => null, + else => unreachable, }, - else => null, - }, - else => null, - } - else - unreachable; - if (tag == null) return self.fail("TODO implement airMulAdd for {}", .{ + .Vector => switch (ty.childType().zigTypeTag()) { + .Float => switch (ty.childType().floatBits(self.target.*)) { + 32 => switch (ty.vectorLen()) { + 1 => .vfmadd213ss, + 2...8 => .vfmadd213ps, + else => null, + }, + 64 => switch (ty.vectorLen()) { + 1 => .vfmadd213sd, + 2...4 => .vfmadd213pd, + else => null, + }, + 16, 80, 128 => null, + else => unreachable, + }, + else => unreachable, + }, + else => unreachable, + } + else if (mem.eql(u2, &order, &.{ 2, 3, 1 }) or mem.eql(u2, &order, &.{ 3, 2, 1 })) + switch (ty.zigTypeTag()) { + .Float => switch (ty.floatBits(self.target.*)) { + 32 => .vfmadd231ss, + 64 => .vfmadd231sd, + 16, 80, 128 => null, + else => unreachable, + }, + .Vector => switch (ty.childType().zigTypeTag()) { + .Float => switch (ty.childType().floatBits(self.target.*)) { + 32 => switch (ty.vectorLen()) { + 1 => .vfmadd231ss, + 2...8 => .vfmadd231ps, + else => null, + }, + 64 => switch (ty.vectorLen()) { + 1 => .vfmadd231sd, + 2...4 => .vfmadd231pd, + else => null, + }, + 16, 80, 128 => null, + else => unreachable, + }, + else => unreachable, + }, + else => unreachable, + } + else + unreachable, + )) |tag| tag else return self.fail("TODO implement airMulAdd for {}", .{ ty.fmt(self.bin_file.options.module.?), }); @@ -9634,14 +9685,14 @@ fn airMulAdd(self: *Self, inst: Air.Inst.Index) !void { const mop2_reg = registerAlias(mops[1].getReg().?, abi_size); if (mops[2].isRegister()) try self.asmRegisterRegisterRegister( - tag.?, + tag, mop1_reg, mop2_reg, registerAlias(mops[2].getReg().?, abi_size), ) else try self.asmRegisterRegisterMemory( - tag.?, + tag, mop1_reg, mop2_reg, mops[2].mem(Memory.PtrSize.fromSize(abi_size)), diff --git a/src/arch/x86_64/Encoding.zig b/src/arch/x86_64/Encoding.zig index bd6e70c975..b242c98bdc 100644 --- a/src/arch/x86_64/Encoding.zig +++ b/src/arch/x86_64/Encoding.zig @@ -316,6 +316,7 @@ pub const Mnemonic = enum { vpsrld, vpsrlq, vpsrlw, vpunpckhbw, vpunpckhdq, vpunpckhqdq, vpunpckhwd, vpunpcklbw, vpunpckldq, vpunpcklqdq, vpunpcklwd, + vsqrtpd, vsqrtps, vsqrtsd, vsqrtss, // F16C vcvtph2ps, vcvtps2ph, // FMA diff --git a/src/arch/x86_64/Lower.zig b/src/arch/x86_64/Lower.zig index 40a5ccdb10..39ad2313e7 100644 --- a/src/arch/x86_64/Lower.zig +++ b/src/arch/x86_64/Lower.zig @@ -212,6 +212,10 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct { .vpunpckldq, .vpunpcklqdq, .vpunpcklwd, + .vsqrtpd, + .vsqrtps, + .vsqrtsd, + .vsqrtss, .vcvtph2ps, .vcvtps2ph, diff --git a/src/arch/x86_64/Mir.zig b/src/arch/x86_64/Mir.zig index cb1a578bb6..b6df0fff09 100644 --- a/src/arch/x86_64/Mir.zig +++ b/src/arch/x86_64/Mir.zig @@ -338,6 +338,14 @@ pub const Inst = struct { vpunpcklqdq, /// Unpack low data vpunpcklwd, + /// Square root of packed double-precision floating-point value + vsqrtpd, + /// Square root of packed single-precision floating-point value + vsqrtps, + /// Square root of scalar double-precision floating-point value + vsqrtsd, + /// Square root of scalar single-precision floating-point value + vsqrtss, /// Convert 16-bit floating-point values to single-precision floating-point values vcvtph2ps, diff --git a/src/arch/x86_64/encodings.zig b/src/arch/x86_64/encodings.zig index 5e4dc2f04b..49ebc344fd 100644 --- a/src/arch/x86_64/encodings.zig +++ b/src/arch/x86_64/encodings.zig @@ -869,8 +869,9 @@ pub const table = [_]Entry{ .{ .subss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5c }, 0, .none, .sse }, - .{ .sqrtps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x51 }, 0, .none, .sse }, - .{ .sqrtss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x51 }, 0, .none, .sse }, + .{ .sqrtps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x51 }, 0, .none, .sse }, + + .{ .sqrtss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x51 }, 0, .none, .sse }, .{ .ucomiss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0x0f, 0x2e }, 0, .none, .sse }, @@ -943,7 +944,8 @@ pub const table = [_]Entry{ .{ .punpcklqdq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x6c }, 0, .none, .sse2 }, .{ .sqrtpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x51 }, 0, .none, .sse2 }, - .{ .sqrtsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x51 }, 0, .none, .sse2 }, + + .{ .sqrtsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x51 }, 0, .none, .sse2 }, .{ .subsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5c }, 0, .none, .sse2 }, @@ -1039,6 +1041,16 @@ pub const table = [_]Entry{ .{ .vpunpckldq, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x62 }, 0, .vex_128_wig, .avx }, .{ .vpunpcklqdq, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x6c }, 0, .vex_128_wig, .avx }, + .{ .vsqrtpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x51 }, 0, .vex_128_wig, .avx }, + .{ .vsqrtpd, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x51 }, 0, .vex_256_wig, .avx }, + + .{ .vsqrtps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x51 }, 0, .vex_128_wig, .avx }, + .{ .vsqrtps, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x0f, 0x51 }, 0, .vex_256_wig, .avx }, + + .{ .vsqrtsd, .rvm, &.{ .xmm, .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f }, 0, .vex_lig_wig, .avx }, + + .{ .vsqrtss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f }, 0, .vex_lig_wig, .avx }, + // F16C .{ .vcvtph2ps, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0x13 }, 0, .vex_128_w0, .f16c }, .{ .vcvtph2ps, .rm, &.{ .ymm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x13 }, 0, .vex_256_w0, .f16c }, From ea957c4cff77f045108863cb5552b3511cb455c1 Mon Sep 17 00:00:00 2001 From: Jacob Young Date: Sun, 7 May 2023 05:01:37 -0400 Subject: [PATCH 14/20] x86_64: implement `@sqrt` for `f16` scalars and vectors --- src/arch/x86_64/CodeGen.zig | 156 +++++++++++++++++++++++----------- src/arch/x86_64/encodings.zig | 4 +- test/behavior/floatop.zig | 1 - 3 files changed, 109 insertions(+), 52 deletions(-) diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index 19878bae17..6337ad23f5 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -4531,59 +4531,117 @@ fn airSqrt(self: *Self, inst: Air.Inst.Index) !void { const dst_lock = self.register_manager.lockReg(dst_reg); defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); - const tag = if (@as(?Mir.Inst.Tag, switch (ty.zigTypeTag()) { - .Float => switch (ty.childType().floatBits(self.target.*)) { - 32 => if (self.hasFeature(.avx)) .vsqrtss else .sqrtss, - 64 => if (self.hasFeature(.avx)) .vsqrtsd else .sqrtsd, - 16, 80, 128 => null, - else => unreachable, - }, - .Vector => switch (ty.childType().zigTypeTag()) { - .Float => switch (ty.childType().floatBits(self.target.*)) { - 32 => switch (ty.vectorLen()) { - 1 => if (self.hasFeature(.avx)) .vsqrtss else .sqrtss, - 2...4 => if (self.hasFeature(.avx)) .vsqrtps else .sqrtps, - 5...8 => if (self.hasFeature(.avx)) .vsqrtps else null, - else => null, + const result: MCValue = result: { + const tag = if (@as(?Mir.Inst.Tag, switch (ty.zigTypeTag()) { + .Float => switch (ty.floatBits(self.target.*)) { + 16 => if (self.hasFeature(.f16c)) { + const mat_src_reg = if (src_mcv.isRegister()) + src_mcv.getReg().? + else + try self.copyToTmpRegister(ty, src_mcv); + try self.asmRegisterRegister(.vcvtph2ps, dst_reg, mat_src_reg.to128()); + try self.asmRegisterRegisterRegister(.vsqrtss, dst_reg, dst_reg, dst_reg); + try self.asmRegisterRegisterImmediate( + .vcvtps2ph, + dst_reg, + dst_reg, + Immediate.u(0b1_00), + ); + break :result dst_mcv; + } else null, + 32 => if (self.hasFeature(.avx)) .vsqrtss else .sqrtss, + 64 => if (self.hasFeature(.avx)) .vsqrtsd else .sqrtsd, + 80, 128 => null, + else => unreachable, + }, + .Vector => switch (ty.childType().zigTypeTag()) { + .Float => switch (ty.childType().floatBits(self.target.*)) { + 16 => if (self.hasFeature(.f16c)) switch (ty.vectorLen()) { + 1 => { + const mat_src_reg = if (src_mcv.isRegister()) + src_mcv.getReg().? + else + try self.copyToTmpRegister(ty, src_mcv); + try self.asmRegisterRegister(.vcvtph2ps, dst_reg, mat_src_reg.to128()); + try self.asmRegisterRegisterRegister(.vsqrtss, dst_reg, dst_reg, dst_reg); + try self.asmRegisterRegisterImmediate( + .vcvtps2ph, + dst_reg, + dst_reg, + Immediate.u(0b1_00), + ); + break :result dst_mcv; + }, + 2...8 => { + const wide_reg = registerAlias(dst_reg, abi_size * 2); + if (src_mcv.isRegister()) try self.asmRegisterRegister( + .vcvtph2ps, + wide_reg, + src_mcv.getReg().?.to128(), + ) else try self.asmRegisterMemory( + .vcvtph2ps, + wide_reg, + src_mcv.mem(Memory.PtrSize.fromSize( + @intCast(u32, @divExact(wide_reg.bitSize(), 16)), + )), + ); + try self.asmRegisterRegister(.vsqrtps, wide_reg, wide_reg); + try self.asmRegisterRegisterImmediate( + .vcvtps2ph, + dst_reg, + wide_reg, + Immediate.u(0b1_00), + ); + break :result dst_mcv; + }, + else => null, + } else null, + 32 => switch (ty.vectorLen()) { + 1 => if (self.hasFeature(.avx)) .vsqrtss else .sqrtss, + 2...4 => if (self.hasFeature(.avx)) .vsqrtps else .sqrtps, + 5...8 => if (self.hasFeature(.avx)) .vsqrtps else null, + else => null, + }, + 64 => switch (ty.vectorLen()) { + 1 => if (self.hasFeature(.avx)) .vsqrtsd else .sqrtsd, + 2 => if (self.hasFeature(.avx)) .vsqrtpd else .sqrtpd, + 3...4 => if (self.hasFeature(.avx)) .vsqrtpd else null, + else => null, + }, + 80, 128 => null, + else => unreachable, }, - 64 => switch (ty.vectorLen()) { - 1 => if (self.hasFeature(.avx)) .vsqrtsd else .sqrtsd, - 2 => if (self.hasFeature(.avx)) .vsqrtpd else .sqrtpd, - 3...4 => if (self.hasFeature(.avx)) .vsqrtpd else null, - else => null, - }, - 16, 80, 128 => null, else => unreachable, }, else => unreachable, - }, - else => unreachable, - })) |tag| tag else return self.fail("TODO implement airSqrt for {}", .{ - ty.fmt(self.bin_file.options.module.?), - }); - switch (tag) { - .vsqrtss, .vsqrtsd => if (src_mcv.isRegister()) try self.asmRegisterRegisterRegister( - tag, - dst_reg, - dst_reg, - registerAlias(src_mcv.getReg().?, abi_size), - ) else try self.asmRegisterRegisterMemory( - tag, - dst_reg, - dst_reg, - src_mcv.mem(Memory.PtrSize.fromSize(abi_size)), - ), - else => if (src_mcv.isRegister()) try self.asmRegisterRegister( - tag, - dst_reg, - registerAlias(src_mcv.getReg().?, abi_size), - ) else try self.asmRegisterMemory( - tag, - dst_reg, - src_mcv.mem(Memory.PtrSize.fromSize(abi_size)), - ), - } - return self.finishAir(inst, dst_mcv, .{ un_op, .none, .none }); + })) |tag| tag else return self.fail("TODO implement airSqrt for {}", .{ + ty.fmt(self.bin_file.options.module.?), + }); + switch (tag) { + .vsqrtss, .vsqrtsd => if (src_mcv.isRegister()) try self.asmRegisterRegisterRegister( + tag, + dst_reg, + dst_reg, + registerAlias(src_mcv.getReg().?, abi_size), + ) else try self.asmRegisterRegisterMemory( + tag, + dst_reg, + dst_reg, + src_mcv.mem(Memory.PtrSize.fromSize(abi_size)), + ), + else => if (src_mcv.isRegister()) try self.asmRegisterRegister( + tag, + dst_reg, + registerAlias(src_mcv.getReg().?, abi_size), + ) else try self.asmRegisterMemory( + tag, + dst_reg, + src_mcv.mem(Memory.PtrSize.fromSize(abi_size)), + ), + } + break :result dst_mcv; + }; + return self.finishAir(inst, result, .{ un_op, .none, .none }); } fn airUnaryMath(self: *Self, inst: Air.Inst.Index) !void { diff --git a/src/arch/x86_64/encodings.zig b/src/arch/x86_64/encodings.zig index 49ebc344fd..78bda4fc76 100644 --- a/src/arch/x86_64/encodings.zig +++ b/src/arch/x86_64/encodings.zig @@ -1047,9 +1047,9 @@ pub const table = [_]Entry{ .{ .vsqrtps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x51 }, 0, .vex_128_wig, .avx }, .{ .vsqrtps, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x0f, 0x51 }, 0, .vex_256_wig, .avx }, - .{ .vsqrtsd, .rvm, &.{ .xmm, .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f }, 0, .vex_lig_wig, .avx }, + .{ .vsqrtsd, .rvm, &.{ .xmm, .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x51 }, 0, .vex_lig_wig, .avx }, - .{ .vsqrtss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f }, 0, .vex_lig_wig, .avx }, + .{ .vsqrtss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x51 }, 0, .vex_lig_wig, .avx }, // F16C .{ .vcvtph2ps, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0x13 }, 0, .vex_128_w0, .f16c }, diff --git a/test/behavior/floatop.zig b/test/behavior/floatop.zig index ec24407d9f..3f407061f4 100644 --- a/test/behavior/floatop.zig +++ b/test/behavior/floatop.zig @@ -135,7 +135,6 @@ fn testSqrt() !void { test "@sqrt with vectors" { if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO From 057139fda575e0e6038b821256a45669cd70a073 Mon Sep 17 00:00:00 2001 From: Jacob Young Date: Sun, 7 May 2023 09:06:12 -0400 Subject: [PATCH 15/20] x86_64: implement binary operations for float vectors --- src/arch/x86_64/CodeGen.zig | 664 ++++++++++++++++++++-------------- src/arch/x86_64/Encoding.zig | 34 +- src/arch/x86_64/Lower.zig | 49 +++ src/arch/x86_64/Mir.zig | 115 +++++- src/arch/x86_64/encodings.zig | 101 +++++- 5 files changed, 662 insertions(+), 301 deletions(-) diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index 6337ad23f5..8c6f14ec3a 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -1176,6 +1176,21 @@ fn asmRegisterRegisterRegister( }); } +fn asmRegisterRegisterRegisterImmediate( + self: *Self, + tag: Mir.Inst.Tag, + reg1: Register, + reg2: Register, + reg3: Register, + imm: Immediate, +) !void { + _ = try self.addInst(.{ + .tag = tag, + .ops = .rrri, + .data = .{ .rrri = .{ .r1 = reg1, .r2 = reg2, .r3 = reg3, .i = @intCast(u8, imm.unsigned) } }, + }); +} + fn asmRegisterRegisterImmediate( self: *Self, tag: Mir.Inst.Tag, @@ -2310,20 +2325,31 @@ fn airFptrunc(self: *Self, inst: Air.Inst.Index) !void { }), } } else if (src_bits == 64 and dst_bits == 32) { - if (self.hasFeature(.avx)) if (src_mcv.isRegister()) try self.asmRegisterRegisterRegister( - .vcvtsd2ss, - dst_reg, - dst_reg, - src_mcv.getReg().?.to128(), - ) else try self.asmRegisterRegisterMemory( + if (self.hasFeature(.avx)) if (src_mcv.isMemory()) try self.asmRegisterRegisterMemory( .vcvtsd2ss, dst_reg, dst_reg, src_mcv.mem(.qword), - ) else if (src_mcv.isRegister()) - try self.asmRegisterRegister(.cvtsd2ss, dst_reg, src_mcv.getReg().?.to128()) - else - try self.asmRegisterMemory(.cvtsd2ss, dst_reg, src_mcv.mem(.qword)); + ) else try self.asmRegisterRegisterRegister( + .vcvtsd2ss, + dst_reg, + dst_reg, + (if (src_mcv.isRegister()) + src_mcv.getReg().? + else + try self.copyToTmpRegister(src_ty, src_mcv)).to128(), + ) else if (src_mcv.isMemory()) try self.asmRegisterMemory( + .cvtsd2ss, + dst_reg, + src_mcv.mem(.qword), + ) else try self.asmRegisterRegister( + .cvtsd2ss, + dst_reg, + (if (src_mcv.isRegister()) + src_mcv.getReg().? + else + try self.copyToTmpRegister(src_ty, src_mcv)).to128(), + ); } else return self.fail("TODO implement airFptrunc from {} to {}", .{ src_ty.fmt(self.bin_file.options.module.?), dst_ty.fmt(self.bin_file.options.module.?), }); @@ -2360,20 +2386,31 @@ fn airFpext(self: *Self, inst: Air.Inst.Index) !void { }), } } else if (src_bits == 32 and dst_bits == 64) { - if (self.hasFeature(.avx)) if (src_mcv.isRegister()) try self.asmRegisterRegisterRegister( - .vcvtss2sd, - dst_reg, - dst_reg, - src_mcv.getReg().?.to128(), - ) else try self.asmRegisterRegisterMemory( + if (self.hasFeature(.avx)) if (src_mcv.isMemory()) try self.asmRegisterRegisterMemory( .vcvtss2sd, dst_reg, dst_reg, src_mcv.mem(.dword), - ) else if (src_mcv.isRegister()) - try self.asmRegisterRegister(.cvtss2sd, dst_reg, src_mcv.getReg().?.to128()) - else - try self.asmRegisterMemory(.cvtss2sd, dst_reg, src_mcv.mem(.dword)); + ) else try self.asmRegisterRegisterRegister( + .vcvtss2sd, + dst_reg, + dst_reg, + (if (src_mcv.isRegister()) + src_mcv.getReg().? + else + try self.copyToTmpRegister(src_ty, src_mcv)).to128(), + ) else if (src_mcv.isMemory()) try self.asmRegisterMemory( + .cvtss2sd, + dst_reg, + src_mcv.mem(.dword), + ) else try self.asmRegisterRegister( + .cvtss2sd, + dst_reg, + (if (src_mcv.isRegister()) + src_mcv.getReg().? + else + try self.copyToTmpRegister(src_ty, src_mcv)).to128(), + ); } else return self.fail("TODO implement airFpext from {} to {}", .{ src_ty.fmt(self.bin_file.options.module.?), dst_ty.fmt(self.bin_file.options.module.?), }); @@ -4532,7 +4569,7 @@ fn airSqrt(self: *Self, inst: Air.Inst.Index) !void { defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); const result: MCValue = result: { - const tag = if (@as(?Mir.Inst.Tag, switch (ty.zigTypeTag()) { + const mir_tag = if (@as(?Mir.Inst.Tag, switch (ty.zigTypeTag()) { .Float => switch (ty.floatBits(self.target.*)) { 16 => if (self.hasFeature(.f16c)) { const mat_src_reg = if (src_mcv.isRegister()) @@ -4558,11 +4595,14 @@ fn airSqrt(self: *Self, inst: Air.Inst.Index) !void { .Float => switch (ty.childType().floatBits(self.target.*)) { 16 => if (self.hasFeature(.f16c)) switch (ty.vectorLen()) { 1 => { - const mat_src_reg = if (src_mcv.isRegister()) - src_mcv.getReg().? - else - try self.copyToTmpRegister(ty, src_mcv); - try self.asmRegisterRegister(.vcvtph2ps, dst_reg, mat_src_reg.to128()); + try self.asmRegisterRegister( + .vcvtph2ps, + dst_reg, + (if (src_mcv.isRegister()) + src_mcv.getReg().? + else + try self.copyToTmpRegister(ty, src_mcv)).to128(), + ); try self.asmRegisterRegisterRegister(.vsqrtss, dst_reg, dst_reg, dst_reg); try self.asmRegisterRegisterImmediate( .vcvtps2ph, @@ -4574,16 +4614,19 @@ fn airSqrt(self: *Self, inst: Air.Inst.Index) !void { }, 2...8 => { const wide_reg = registerAlias(dst_reg, abi_size * 2); - if (src_mcv.isRegister()) try self.asmRegisterRegister( - .vcvtph2ps, - wide_reg, - src_mcv.getReg().?.to128(), - ) else try self.asmRegisterMemory( + if (src_mcv.isMemory()) try self.asmRegisterMemory( .vcvtph2ps, wide_reg, src_mcv.mem(Memory.PtrSize.fromSize( @intCast(u32, @divExact(wide_reg.bitSize(), 16)), )), + ) else try self.asmRegisterRegister( + .vcvtph2ps, + wide_reg, + (if (src_mcv.isRegister()) + src_mcv.getReg().? + else + try self.copyToTmpRegister(ty, src_mcv)).to128(), ); try self.asmRegisterRegister(.vsqrtps, wide_reg, wide_reg); try self.asmRegisterRegisterImmediate( @@ -4617,26 +4660,32 @@ fn airSqrt(self: *Self, inst: Air.Inst.Index) !void { })) |tag| tag else return self.fail("TODO implement airSqrt for {}", .{ ty.fmt(self.bin_file.options.module.?), }); - switch (tag) { - .vsqrtss, .vsqrtsd => if (src_mcv.isRegister()) try self.asmRegisterRegisterRegister( - tag, - dst_reg, - dst_reg, - registerAlias(src_mcv.getReg().?, abi_size), - ) else try self.asmRegisterRegisterMemory( - tag, + switch (mir_tag) { + .vsqrtss, .vsqrtsd => if (src_mcv.isMemory()) try self.asmRegisterRegisterMemory( + mir_tag, dst_reg, dst_reg, src_mcv.mem(Memory.PtrSize.fromSize(abi_size)), + ) else try self.asmRegisterRegisterRegister( + mir_tag, + dst_reg, + dst_reg, + registerAlias(if (src_mcv.isRegister()) + src_mcv.getReg().? + else + try self.copyToTmpRegister(ty, src_mcv), abi_size), ), - else => if (src_mcv.isRegister()) try self.asmRegisterRegister( - tag, - dst_reg, - registerAlias(src_mcv.getReg().?, abi_size), - ) else try self.asmRegisterMemory( - tag, + else => if (src_mcv.isMemory()) try self.asmRegisterMemory( + mir_tag, dst_reg, src_mcv.mem(Memory.PtrSize.fromSize(abi_size)), + ) else try self.asmRegisterRegister( + mir_tag, + dst_reg, + registerAlias(if (src_mcv.isRegister()) + src_mcv.getReg().? + else + try self.copyToTmpRegister(ty, src_mcv), abi_size), ), } break :result dst_mcv; @@ -5800,25 +5849,22 @@ fn genMulDivBinOp( } } -/// Result is always a register. fn genBinOp( self: *Self, maybe_inst: ?Air.Inst.Index, - tag: Air.Inst.Tag, + air_tag: Air.Inst.Tag, lhs_air: Air.Inst.Ref, rhs_air: Air.Inst.Ref, ) !MCValue { - const lhs = try self.resolveInst(lhs_air); - const rhs = try self.resolveInst(rhs_air); + const lhs_mcv = try self.resolveInst(lhs_air); + const rhs_mcv = try self.resolveInst(rhs_air); const lhs_ty = self.air.typeOf(lhs_air); const rhs_ty = self.air.typeOf(rhs_air); - if (lhs_ty.zigTypeTag() == .Vector) { - return self.fail("TODO implement genBinOp for {}", .{lhs_ty.fmt(self.bin_file.options.module.?)}); - } + const abi_size = @intCast(u32, lhs_ty.abiSize(self.target.*)); - switch (lhs) { + switch (lhs_mcv) { .immediate => |imm| switch (imm) { - 0 => switch (tag) { + 0 => switch (air_tag) { .sub, .subwrap => return self.genUnOp(maybe_inst, .neg, rhs_air), else => {}, }, @@ -5827,9 +5873,10 @@ fn genBinOp( else => {}, } - const is_commutative = switch (tag) { + const is_commutative = switch (air_tag) { .add, .addwrap, + .mul, .bool_or, .bit_or, .bool_and, @@ -5841,48 +5888,42 @@ fn genBinOp( else => false, }; - const dst_mem_ok = switch (tag) { - .add, - .addwrap, - .sub, - .subwrap, - .mul, - .div_float, - .div_exact, - .div_trunc, - .div_floor, - => !lhs_ty.isRuntimeFloat(), - - else => true, + const vec_op = switch (lhs_ty.zigTypeTag()) { + else => false, + .Float, .Vector => true, }; - const lhs_lock: ?RegisterLock = switch (lhs) { + const lhs_lock: ?RegisterLock = switch (lhs_mcv) { .register => |reg| self.register_manager.lockRegAssumeUnused(reg), else => null, }; defer if (lhs_lock) |lock| self.register_manager.unlockReg(lock); - const rhs_lock: ?RegisterLock = switch (rhs) { + const rhs_lock: ?RegisterLock = switch (rhs_mcv) { .register => |reg| self.register_manager.lockReg(reg), else => null, }; defer if (rhs_lock) |lock| self.register_manager.unlockReg(lock); - var flipped: bool = false; + var flipped = false; + var copied_to_dst = true; const dst_mcv: MCValue = dst: { if (maybe_inst) |inst| { - if ((dst_mem_ok or lhs.isRegister()) and self.reuseOperand(inst, lhs_air, 0, lhs)) { - break :dst lhs; + if ((!vec_op or lhs_mcv.isRegister()) and self.reuseOperand(inst, lhs_air, 0, lhs_mcv)) { + break :dst lhs_mcv; } - if (is_commutative and (dst_mem_ok or rhs.isRegister()) and - self.reuseOperand(inst, rhs_air, 1, rhs)) + if (is_commutative and (!vec_op or rhs_mcv.isRegister()) and + self.reuseOperand(inst, rhs_air, 1, rhs_mcv)) { flipped = true; - break :dst rhs; + break :dst rhs_mcv; } } const dst_mcv = try self.allocRegOrMemAdvanced(lhs_ty, maybe_inst, true); - try self.genCopy(lhs_ty, dst_mcv, lhs); + if (vec_op and lhs_mcv.isRegister() and self.hasFeature(.avx)) + copied_to_dst = false + else + try self.genCopy(lhs_ty, dst_mcv, lhs_mcv); break :dst dst_mcv; }; const dst_lock: ?RegisterLock = switch (dst_mcv) { @@ -5891,160 +5932,47 @@ fn genBinOp( }; defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); - const src_mcv = if (flipped) lhs else rhs; - switch (tag) { - .add, - .addwrap, - => try self.genBinOpMir(switch (lhs_ty.zigTypeTag()) { - else => .add, - .Float => switch (lhs_ty.floatBits(self.target.*)) { - 32 => if (Target.x86.featureSetHas(self.target.cpu.features, .sse)) - .addss - else - return self.fail("TODO implement genBinOp for {s} {} without sse", .{ - @tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?), - }), - 64 => if (Target.x86.featureSetHas(self.target.cpu.features, .sse2)) - .addsd - else - return self.fail("TODO implement genBinOp for {s} {} without sse2", .{ - @tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?), - }), - else => return self.fail("TODO implement genBinOp for {s} {}", .{ - @tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?), - }), + const src_mcv = if (flipped) lhs_mcv else rhs_mcv; + if (!vec_op) { + switch (air_tag) { + .add, + .addwrap, + => try self.genBinOpMir(.add, lhs_ty, dst_mcv, src_mcv), + + .sub, + .subwrap, + => try self.genBinOpMir(.sub, lhs_ty, dst_mcv, src_mcv), + + .ptr_add, + .ptr_sub, + => { + const tmp_reg = try self.copyToTmpRegister(rhs_ty, src_mcv); + const tmp_mcv = MCValue{ .register = tmp_reg }; + const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); + defer self.register_manager.unlockReg(tmp_lock); + + const elem_size = lhs_ty.elemType2().abiSize(self.target.*); + try self.genIntMulComplexOpMir(rhs_ty, tmp_mcv, .{ .immediate = elem_size }); + try self.genBinOpMir(switch (air_tag) { + .ptr_add => .add, + .ptr_sub => .sub, + else => unreachable, + }, lhs_ty, dst_mcv, tmp_mcv); }, - }, lhs_ty, dst_mcv, src_mcv), - .sub, - .subwrap, - => try self.genBinOpMir(switch (lhs_ty.zigTypeTag()) { - else => .sub, - .Float => switch (lhs_ty.floatBits(self.target.*)) { - 32 => if (Target.x86.featureSetHas(self.target.cpu.features, .sse)) - .subss - else - return self.fail("TODO implement genBinOp for {s} {} without sse", .{ - @tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?), - }), - 64 => if (Target.x86.featureSetHas(self.target.cpu.features, .sse2)) - .subsd - else - return self.fail("TODO implement genBinOp for {s} {} without sse2", .{ - @tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?), - }), - else => return self.fail("TODO implement genBinOp for {s} {}", .{ - @tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?), - }), - }, - }, lhs_ty, dst_mcv, src_mcv), + .bool_or, + .bit_or, + => try self.genBinOpMir(.@"or", lhs_ty, dst_mcv, src_mcv), - .mul => try self.genBinOpMir(switch (lhs_ty.zigTypeTag()) { - else => return self.fail("TODO implement genBinOp for {s} {}", .{ - @tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?), - }), - .Float => switch (lhs_ty.floatBits(self.target.*)) { - 32 => if (Target.x86.featureSetHas(self.target.cpu.features, .sse)) - .mulss - else - return self.fail("TODO implement genBinOp for {s} {} without sse", .{ - @tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?), - }), - 64 => if (Target.x86.featureSetHas(self.target.cpu.features, .sse2)) - .mulsd - else - return self.fail("TODO implement genBinOp for {s} {} without sse2", .{ - @tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?), - }), - else => return self.fail("TODO implement genBinOp for {s} {}", .{ - @tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?), - }), - }, - }, lhs_ty, dst_mcv, src_mcv), + .bool_and, + .bit_and, + => try self.genBinOpMir(.@"and", lhs_ty, dst_mcv, src_mcv), - .div_float, - .div_exact, - .div_trunc, - .div_floor, - => { - try self.genBinOpMir(switch (lhs_ty.zigTypeTag()) { - else => return self.fail("TODO implement genBinOp for {s} {}", .{ - @tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?), - }), - .Float => switch (lhs_ty.floatBits(self.target.*)) { - 32 => if (Target.x86.featureSetHas(self.target.cpu.features, .sse)) - .divss - else - return self.fail("TODO implement genBinOp for {s} {} without sse", .{ - @tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?), - }), - 64 => if (Target.x86.featureSetHas(self.target.cpu.features, .sse2)) - .divsd - else - return self.fail("TODO implement genBinOp for {s} {} without sse2", .{ - @tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?), - }), - else => return self.fail("TODO implement genBinOp for {s} {}", .{ - @tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?), - }), - }, - }, lhs_ty, dst_mcv, src_mcv); - switch (tag) { - .div_float, - .div_exact, - => {}, - .div_trunc, - .div_floor, - => if (self.hasFeature(.sse4_1)) { - const abi_size = @intCast(u32, lhs_ty.abiSize(self.target.*)); - const dst_alias = registerAlias(dst_mcv.register, abi_size); - try self.asmRegisterRegisterImmediate(switch (lhs_ty.floatBits(self.target.*)) { - 32 => .roundss, - 64 => .roundsd, - else => unreachable, - }, dst_alias, dst_alias, Immediate.u(switch (tag) { - .div_trunc => 0b1_0_11, - .div_floor => 0b1_0_01, - else => unreachable, - })); - } else return self.fail("TODO implement genBinOp for {s} {} without sse4_1", .{ - @tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?), - }), - else => unreachable, - } - }, + .xor => try self.genBinOpMir(.xor, lhs_ty, dst_mcv, src_mcv), - .ptr_add, - .ptr_sub, - => { - const tmp_reg = try self.copyToTmpRegister(rhs_ty, src_mcv); - const tmp_mcv = MCValue{ .register = tmp_reg }; - const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); - defer self.register_manager.unlockReg(tmp_lock); - - const elem_size = lhs_ty.elemType2().abiSize(self.target.*); - try self.genIntMulComplexOpMir(rhs_ty, tmp_mcv, .{ .immediate = elem_size }); - try self.genBinOpMir(switch (tag) { - .ptr_add => .add, - .ptr_sub => .sub, - else => unreachable, - }, lhs_ty, dst_mcv, tmp_mcv); - }, - - .bool_or, - .bit_or, - => try self.genBinOpMir(.@"or", lhs_ty, dst_mcv, src_mcv), - - .bool_and, - .bit_and, - => try self.genBinOpMir(.@"and", lhs_ty, dst_mcv, src_mcv), - - .xor => try self.genBinOpMir(.xor, lhs_ty, dst_mcv, src_mcv), - - .min, - .max, - => switch (lhs_ty.zigTypeTag()) { - .Int => { + .min, + .max, + => { const mat_src_mcv: MCValue = if (switch (src_mcv) { .immediate, .eflags, @@ -6070,12 +5998,12 @@ fn genBinOp( const int_info = lhs_ty.intInfo(self.target.*); const cc: Condition = switch (int_info.signedness) { - .unsigned => switch (tag) { + .unsigned => switch (air_tag) { .min => .a, .max => .b, else => unreachable, }, - .signed => switch (tag) { + .signed => switch (air_tag) { .min => .g, .max => .l, else => unreachable, @@ -6134,26 +6062,222 @@ fn genBinOp( } try self.genCopy(lhs_ty, dst_mcv, .{ .register = tmp_reg }); }, - .Float => try self.genBinOpMir(switch (lhs_ty.floatBits(self.target.*)) { - 32 => switch (tag) { - .min => .minss, - .max => .maxss, - else => unreachable, - }, - 64 => switch (tag) { - .min => .minsd, - .max => .maxsd, - else => unreachable, - }, - else => return self.fail("TODO implement genBinOp for {s} {}", .{ - @tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?), - }), - }, lhs_ty, dst_mcv, src_mcv), - else => return self.fail("TODO implement genBinOp for {s} {}", .{ - @tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?), - }), - }, + else => return self.fail("TODO implement genBinOp for {s} {}", .{ + @tagName(air_tag), lhs_ty.fmt(self.bin_file.options.module.?), + }), + } + return dst_mcv; + } + + const mir_tag = if (@as(?Mir.Inst.Tag, switch (lhs_ty.zigTypeTag()) { + else => unreachable, + .Float => switch (lhs_ty.floatBits(self.target.*)) { + 32 => switch (air_tag) { + .add => if (self.hasFeature(.avx)) .vaddss else .addss, + .sub => if (self.hasFeature(.avx)) .vsubss else .subss, + .mul => if (self.hasFeature(.avx)) .vmulss else .mulss, + .div_float, + .div_trunc, + .div_floor, + .div_exact, + => if (self.hasFeature(.avx)) .vdivss else .divss, + .max => if (self.hasFeature(.avx)) .vmaxss else .maxss, + .min => if (self.hasFeature(.avx)) .vminss else .minss, + else => unreachable, + }, + 64 => switch (air_tag) { + .add => if (self.hasFeature(.avx)) .vaddsd else .addsd, + .sub => if (self.hasFeature(.avx)) .vsubsd else .subsd, + .mul => if (self.hasFeature(.avx)) .vmulsd else .mulsd, + .div_float, + .div_trunc, + .div_floor, + .div_exact, + => if (self.hasFeature(.avx)) .vdivsd else .divsd, + .max => if (self.hasFeature(.avx)) .vmaxsd else .maxsd, + .min => if (self.hasFeature(.avx)) .vminsd else .minsd, + else => unreachable, + }, + 16, 80, 128 => null, + else => unreachable, + }, + .Vector => switch (lhs_ty.childType().zigTypeTag()) { + else => null, + .Float => switch (lhs_ty.childType().floatBits(self.target.*)) { + 32 => switch (lhs_ty.vectorLen()) { + 1 => switch (air_tag) { + .add => if (self.hasFeature(.avx)) .vaddss else .addss, + .sub => if (self.hasFeature(.avx)) .vsubss else .subss, + .mul => if (self.hasFeature(.avx)) .vmulss else .mulss, + .div_float, + .div_trunc, + .div_floor, + .div_exact, + => if (self.hasFeature(.avx)) .vdivss else .divss, + .max => if (self.hasFeature(.avx)) .vmaxss else .maxss, + .min => if (self.hasFeature(.avx)) .vminss else .minss, + else => unreachable, + }, + 2...4 => switch (air_tag) { + .add => if (self.hasFeature(.avx)) .vaddps else .addps, + .sub => if (self.hasFeature(.avx)) .vsubps else .subps, + .mul => if (self.hasFeature(.avx)) .vmulps else .mulps, + .div_float, + .div_trunc, + .div_floor, + .div_exact, + => if (self.hasFeature(.avx)) .vdivps else .divps, + .max => if (self.hasFeature(.avx)) .vmaxps else .maxps, + .min => if (self.hasFeature(.avx)) .vminps else .minps, + else => unreachable, + }, + 5...8 => if (self.hasFeature(.avx)) switch (air_tag) { + .add => .vaddps, + .sub => .vsubps, + .mul => .vmulps, + .div_float, .div_trunc, .div_floor, .div_exact => .vdivps, + .max => .vmaxps, + .min => .vminps, + else => unreachable, + } else null, + else => null, + }, + 64 => switch (lhs_ty.vectorLen()) { + 1 => switch (air_tag) { + .add => if (self.hasFeature(.avx)) .vaddsd else .addsd, + .sub => if (self.hasFeature(.avx)) .vsubsd else .subsd, + .mul => if (self.hasFeature(.avx)) .vmulsd else .mulsd, + .div_float, + .div_trunc, + .div_floor, + .div_exact, + => if (self.hasFeature(.avx)) .vdivsd else .divsd, + .max => if (self.hasFeature(.avx)) .vmaxsd else .maxsd, + .min => if (self.hasFeature(.avx)) .vminsd else .minsd, + else => unreachable, + }, + 2 => switch (air_tag) { + .add => if (self.hasFeature(.avx)) .vaddpd else .addpd, + .sub => if (self.hasFeature(.avx)) .vsubpd else .subpd, + .mul => if (self.hasFeature(.avx)) .vmulpd else .mulpd, + .div_float, + .div_trunc, + .div_floor, + .div_exact, + => if (self.hasFeature(.avx)) .vdivpd else .divpd, + .max => if (self.hasFeature(.avx)) .vmaxpd else .maxpd, + .min => if (self.hasFeature(.avx)) .vminpd else .minpd, + else => unreachable, + }, + 3...4 => if (self.hasFeature(.avx)) switch (air_tag) { + .add => .vaddpd, + .sub => .vsubpd, + .mul => .vmulpd, + .div_float, .div_trunc, .div_floor, .div_exact => .vdivpd, + .max => .vmaxpd, + .min => .vminpd, + else => unreachable, + } else null, + else => null, + }, + 16, 80, 128 => null, + else => unreachable, + }, + }, + })) |tag| tag else return self.fail("TODO implement genBinOp for {s} {}", .{ + @tagName(air_tag), lhs_ty.fmt(self.bin_file.options.module.?), + }); + const dst_alias = registerAlias(dst_mcv.getReg().?, abi_size); + if (self.hasFeature(.avx)) { + const src1_alias = + if (copied_to_dst) dst_alias else registerAlias(lhs_mcv.getReg().?, abi_size); + if (src_mcv.isMemory()) try self.asmRegisterRegisterMemory( + mir_tag, + dst_alias, + src1_alias, + src_mcv.mem(Memory.PtrSize.fromSize(abi_size)), + ) else try self.asmRegisterRegisterRegister( + mir_tag, + dst_alias, + src1_alias, + registerAlias(if (src_mcv.isRegister()) + src_mcv.getReg().? + else + try self.copyToTmpRegister(rhs_ty, src_mcv), abi_size), + ); + } else { + assert(copied_to_dst); + if (src_mcv.isMemory()) try self.asmRegisterMemory( + mir_tag, + dst_alias, + src_mcv.mem(Memory.PtrSize.fromSize(abi_size)), + ) else try self.asmRegisterRegister( + mir_tag, + dst_alias, + registerAlias(if (src_mcv.isRegister()) + src_mcv.getReg().? + else + try self.copyToTmpRegister(rhs_ty, src_mcv), abi_size), + ); + } + switch (air_tag) { + .add, .sub, .mul, .div_float, .div_exact => {}, + .div_trunc, .div_floor => if (self.hasFeature(.sse4_1)) { + const round_tag = if (@as(?Mir.Inst.Tag, switch (lhs_ty.zigTypeTag()) { + .Float => switch (lhs_ty.floatBits(self.target.*)) { + 32 => if (self.hasFeature(.avx)) .vroundss else .roundss, + 64 => if (self.hasFeature(.avx)) .vroundsd else .roundsd, + 16, 80, 128 => null, + else => unreachable, + }, + .Vector => switch (lhs_ty.childType().zigTypeTag()) { + .Float => switch (lhs_ty.childType().floatBits(self.target.*)) { + 32 => switch (lhs_ty.vectorLen()) { + 1 => if (self.hasFeature(.avx)) .vroundss else .roundss, + 2...4 => if (self.hasFeature(.avx)) .vroundps else .roundps, + 5...8 => if (self.hasFeature(.avx)) .vroundps else null, + else => null, + }, + 64 => switch (lhs_ty.vectorLen()) { + 1 => if (self.hasFeature(.avx)) .vroundsd else .roundsd, + 2 => if (self.hasFeature(.avx)) .vroundpd else .roundpd, + 3...4 => if (self.hasFeature(.avx)) .vroundpd else null, + else => null, + }, + 16, 80, 128 => null, + else => unreachable, + }, + else => null, + }, + else => unreachable, + })) |tag| tag else return self.fail("TODO implement genBinOp for {s} {}", .{ + @tagName(air_tag), lhs_ty.fmt(self.bin_file.options.module.?), + }); + const round_mode = Immediate.u(switch (air_tag) { + .div_trunc => 0b1_0_11, + .div_floor => 0b1_0_01, + else => unreachable, + }); + switch (round_tag) { + .vroundss, .vroundsd => try self.asmRegisterRegisterRegisterImmediate( + round_tag, + dst_alias, + dst_alias, + dst_alias, + round_mode, + ), + else => try self.asmRegisterRegisterImmediate( + round_tag, + dst_alias, + dst_alias, + round_mode, + ), + } + } else return self.fail("TODO implement genBinOp for {s} {} without sse4_1", .{ + @tagName(air_tag), lhs_ty.fmt(self.bin_file.options.module.?), + }), + .max, .min => {}, // TODO: unordered select else => unreachable, } return dst_mcv; @@ -6186,20 +6310,11 @@ fn genBinOpMir(self: *Self, mir_tag: Mir.Inst.Tag, ty: Type, dst_mcv: MCValue, s .register_overflow, .reserved_frame, => unreachable, - .register => |src_reg| switch (ty.zigTypeTag()) { - .Float => { - if (!Target.x86.featureSetHas(self.target.cpu.features, .sse)) - return self.fail("TODO genBinOpMir for {s} {} without sse", .{ - @tagName(mir_tag), ty.fmt(self.bin_file.options.module.?), - }); - return self.asmRegisterRegister(mir_tag, dst_reg.to128(), src_reg.to128()); - }, - else => try self.asmRegisterRegister( - mir_tag, - dst_alias, - registerAlias(src_reg, abi_size), - ), - }, + .register => |src_reg| try self.asmRegisterRegister( + mir_tag, + dst_alias, + registerAlias(src_reg, abi_size), + ), .immediate => |imm| switch (self.regBitSize(ty)) { 8 => try self.asmRegisterImmediate( mir_tag, @@ -9646,7 +9761,7 @@ fn airMulAdd(self: *Self, inst: Air.Inst.Index) !void { lock.* = self.register_manager.lockRegAssumeUnused(reg); } - const tag = if (@as( + const mir_tag = if (@as( ?Mir.Inst.Tag, if (mem.eql(u2, &order, &.{ 1, 3, 2 }) or mem.eql(u2, &order, &.{ 3, 1, 2 })) switch (ty.zigTypeTag()) { @@ -9741,20 +9856,17 @@ fn airMulAdd(self: *Self, inst: Air.Inst.Index) !void { const abi_size = @intCast(u32, ty.abiSize(self.target.*)); const mop1_reg = registerAlias(mops[0].getReg().?, abi_size); const mop2_reg = registerAlias(mops[1].getReg().?, abi_size); - if (mops[2].isRegister()) - try self.asmRegisterRegisterRegister( - tag, - mop1_reg, - mop2_reg, - registerAlias(mops[2].getReg().?, abi_size), - ) - else - try self.asmRegisterRegisterMemory( - tag, - mop1_reg, - mop2_reg, - mops[2].mem(Memory.PtrSize.fromSize(abi_size)), - ); + if (mops[2].isRegister()) try self.asmRegisterRegisterRegister( + mir_tag, + mop1_reg, + mop2_reg, + registerAlias(mops[2].getReg().?, abi_size), + ) else try self.asmRegisterRegisterMemory( + mir_tag, + mop1_reg, + mop2_reg, + mops[2].mem(Memory.PtrSize.fromSize(abi_size)), + ); return self.finishAir(inst, mops[0], ops); } diff --git a/src/arch/x86_64/Encoding.zig b/src/arch/x86_64/Encoding.zig index b242c98bdc..b8ccc9efba 100644 --- a/src/arch/x86_64/Encoding.zig +++ b/src/arch/x86_64/Encoding.zig @@ -262,61 +262,69 @@ pub const Mnemonic = enum { // MMX movd, // SSE - addss, + addps, addss, andps, andnps, cmpss, cvtsi2ss, - divss, - maxss, minss, + divps, divss, + maxps, maxss, + minps, minss, movaps, movss, movups, - mulss, + mulps, mulss, orps, pextrw, pinsrw, - sqrtps, - sqrtss, - subss, + sqrtps, sqrtss, + subps, subss, ucomiss, xorps, // SSE2 - addsd, + addpd, addsd, andpd, andnpd, //cmpsd, cvtsd2ss, cvtsi2sd, cvtss2sd, - divsd, - maxsd, minsd, + divpd, divsd, + maxpd, maxsd, + minpd, minsd, movapd, movq, //movd, movsd, movupd, - mulsd, + mulpd, mulsd, orpd, pshufhw, pshuflw, psrld, psrlq, psrlw, punpckhbw, punpckhdq, punpckhqdq, punpckhwd, punpcklbw, punpckldq, punpcklqdq, punpcklwd, sqrtpd, sqrtsd, - subsd, + subpd, subsd, ucomisd, xorpd, // SSE3 movddup, movshdup, movsldup, // SSE4.1 - roundsd, roundss, + roundpd, roundps, roundsd, roundss, // AVX + vaddpd, vaddps, vaddsd, vaddss, vcvtsd2ss, vcvtsi2sd, vcvtsi2ss, vcvtss2sd, + vdivpd, vdivps, vdivsd, vdivss, + vmaxpd, vmaxps, vmaxsd, vmaxss, + vminpd, vminps, vminsd, vminss, vmovapd, vmovaps, vmovddup, vmovsd, vmovshdup, vmovsldup, vmovss, vmovupd, vmovups, + vmulpd, vmulps, vmulsd, vmulss, vpextrw, vpinsrw, vpshufhw, vpshuflw, vpsrld, vpsrlq, vpsrlw, vpunpckhbw, vpunpckhdq, vpunpckhqdq, vpunpckhwd, vpunpcklbw, vpunpckldq, vpunpcklqdq, vpunpcklwd, + vroundpd, vroundps, vroundsd, vroundss, vsqrtpd, vsqrtps, vsqrtsd, vsqrtss, + vsubpd, vsubps, vsubsd, vsubss, // F16C vcvtph2ps, vcvtps2ph, // FMA diff --git a/src/arch/x86_64/Lower.zig b/src/arch/x86_64/Lower.zig index 39ad2313e7..2cfa25ac84 100644 --- a/src/arch/x86_64/Lower.zig +++ b/src/arch/x86_64/Lower.zig @@ -124,27 +124,34 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct { .xchg, .xor, + .addps, .addss, .andnps, .andps, .cmpss, .cvtsi2ss, + .divps, .divss, + .maxps, .maxss, + .minps, .minss, .movaps, .movss, .movups, + .mulps, .mulss, .orps, .pextrw, .pinsrw, .sqrtps, .sqrtss, + .subps, .subss, .ucomiss, .xorps, + .addpd, .addsd, .andnpd, .andpd, @@ -152,10 +159,14 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct { .cvtsd2ss, .cvtsi2sd, .cvtss2sd, + .divpd, .divsd, + .maxpd, .maxsd, + .minpd, .minsd, .movsd, + .mulpd, .mulsd, .orpd, .pshufhw, @@ -173,6 +184,7 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct { .punpcklwd, .sqrtpd, .sqrtsd, + .subpd, .subsd, .ucomisd, .xorpd, @@ -181,13 +193,31 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct { .movshdup, .movsldup, + .roundpd, + .roundps, .roundsd, .roundss, + .vaddpd, + .vaddps, + .vaddsd, + .vaddss, .vcvtsd2ss, .vcvtsi2sd, .vcvtsi2ss, .vcvtss2sd, + .vdivpd, + .vdivps, + .vdivsd, + .vdivss, + .vmaxpd, + .vmaxps, + .vmaxsd, + .vmaxss, + .vminpd, + .vminps, + .vminsd, + .vminss, .vmovapd, .vmovaps, .vmovddup, @@ -197,6 +227,10 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct { .vmovss, .vmovupd, .vmovups, + .vmulpd, + .vmulps, + .vmulsd, + .vmulss, .vpextrw, .vpinsrw, .vpshufhw, @@ -212,10 +246,18 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct { .vpunpckldq, .vpunpcklqdq, .vpunpcklwd, + .vroundpd, + .vroundps, + .vroundsd, + .vroundss, .vsqrtpd, .vsqrtps, .vsqrtsd, .vsqrtss, + .vsubpd, + .vsubps, + .vsubsd, + .vsubss, .vcvtph2ps, .vcvtps2ph, @@ -304,6 +346,7 @@ fn imm(lower: Lower, ops: Mir.Inst.Ops, i: u32) Immediate { .lock_mi_rip_s, => Immediate.s(@bitCast(i32, i)), + .rrri, .rri_u, .ri_u, .i_u, @@ -429,6 +472,12 @@ fn mirGeneric(lower: *Lower, inst: Mir.Inst) Error!void { .{ .reg = inst.data.rrr.r2 }, .{ .reg = inst.data.rrr.r3 }, }, + .rrri => &.{ + .{ .reg = inst.data.rrri.r1 }, + .{ .reg = inst.data.rrri.r2 }, + .{ .reg = inst.data.rrri.r3 }, + .{ .imm = lower.imm(inst.ops, inst.data.rrri.i) }, + }, .ri_s, .ri_u => &.{ .{ .reg = inst.data.ri.r }, .{ .imm = lower.imm(inst.ops, inst.data.ri.i) }, diff --git a/src/arch/x86_64/Mir.zig b/src/arch/x86_64/Mir.zig index b6df0fff09..c0450406cf 100644 --- a/src/arch/x86_64/Mir.zig +++ b/src/arch/x86_64/Mir.zig @@ -166,7 +166,9 @@ pub const Inst = struct { /// Logical exclusive-or xor, - /// Add single precision floating point values + /// Add packed single-precision floating-point values + addps, + /// Add scalar single-precision floating-point values addss, /// Bitwise logical and of packed single precision floating-point values andps, @@ -176,11 +178,17 @@ pub const Inst = struct { cmpss, /// Convert doubleword integer to scalar single-precision floating-point value cvtsi2ss, + /// Divide packed single-precision floating-point values + divps, /// Divide scalar single-precision floating-point values divss, - /// Return maximum single-precision floating-point value + /// Maximum of packed single-precision floating-point values + maxps, + /// Maximum of scalar single-precision floating-point values maxss, - /// Return minimum single-precision floating-point value + /// Minimum of packed single-precision floating-point values + minps, + /// Minimum of scalar single-precision floating-point values minss, /// Move aligned packed single-precision floating-point values movaps, @@ -188,6 +196,8 @@ pub const Inst = struct { movss, /// Move unaligned packed single-precision floating-point values movups, + /// Multiply packed single-precision floating-point values + mulps, /// Multiply scalar single-precision floating-point values mulss, /// Bitwise logical or of packed single precision floating-point values @@ -196,18 +206,22 @@ pub const Inst = struct { pextrw, /// Insert word pinsrw, - /// Square root of scalar single precision floating-point value + /// Square root of packed single-precision floating-point values sqrtps, - /// Subtract scalar single-precision floating-point values + /// Square root of scalar single-precision floating-point value sqrtss, - /// Square root of single precision floating-point values + /// Subtract packed single-precision floating-point values + subps, + /// Subtract scalar single-precision floating-point values subss, /// Unordered compare scalar single-precision floating-point values ucomiss, /// Bitwise logical xor of packed single precision floating-point values xorps, - /// Add double precision floating point values + /// Add packed double-precision floating-point values + addpd, + /// Add scalar double-precision floating-point values addsd, /// Bitwise logical and not of packed double precision floating-point values andnpd, @@ -221,14 +235,22 @@ pub const Inst = struct { cvtsi2sd, /// Convert scalar single-precision floating-point value to scalar double-precision floating-point value cvtss2sd, + /// Divide packed double-precision floating-point values + divpd, /// Divide scalar double-precision floating-point values divsd, - /// Return maximum double-precision floating-point value + /// Maximum of packed double-precision floating-point values + maxpd, + /// Maximum of scalar double-precision floating-point values maxsd, - /// Return minimum double-precision floating-point value + /// Minimum of packed double-precision floating-point values + minpd, + /// Minimum of scalar double-precision floating-point values minsd, /// Move scalar double-precision floating-point value movsd, + /// Multiply packed double-precision floating-point values + mulpd, /// Multiply scalar double-precision floating-point values mulsd, /// Bitwise logical or of packed double precision floating-point values @@ -263,6 +285,8 @@ pub const Inst = struct { sqrtpd, /// Square root of scalar double precision floating-point value sqrtsd, + /// Subtract packed double-precision floating-point values + subpd, /// Subtract scalar double-precision floating-point values subsd, /// Unordered compare scalar double-precision floating-point values @@ -277,11 +301,23 @@ pub const Inst = struct { /// Replicate single floating-point values movsldup, - /// Round scalar double-precision floating-point values + /// Round packed double-precision floating-point values + roundpd, + /// Round packed single-precision floating-point values + roundps, + /// Round scalar double-precision floating-point value roundsd, - /// Round scalar single-precision floating-point values + /// Round scalar single-precision floating-point value roundss, + /// Add packed double-precision floating-point values + vaddpd, + /// Add packed single-precision floating-point values + vaddps, + /// Add scalar double-precision floating-point values + vaddsd, + /// Add scalar single-precision floating-point values + vaddss, /// Convert scalar double-precision floating-point value to scalar single-precision floating-point value vcvtsd2ss, /// Convert doubleword integer to scalar double-precision floating-point value @@ -290,6 +326,30 @@ pub const Inst = struct { vcvtsi2ss, /// Convert scalar single-precision floating-point value to scalar double-precision floating-point value vcvtss2sd, + /// Divide packed double-precision floating-point values + vdivpd, + /// Divide packed single-precision floating-point values + vdivps, + /// Divide scalar double-precision floating-point values + vdivsd, + /// Divide scalar single-precision floating-point values + vdivss, + /// Maximum of packed double-precision floating-point values + vmaxpd, + /// Maximum of packed single-precision floating-point values + vmaxps, + /// Maximum of scalar double-precision floating-point values + vmaxsd, + /// Maximum of scalar single-precision floating-point values + vmaxss, + /// Minimum of packed double-precision floating-point values + vminpd, + /// Minimum of packed single-precision floating-point values + vminps, + /// Minimum of scalar double-precision floating-point values + vminsd, + /// Minimum of scalar single-precision floating-point values + vminss, /// Move aligned packed double-precision floating-point values vmovapd, /// Move aligned packed single-precision floating-point values @@ -308,6 +368,14 @@ pub const Inst = struct { vmovupd, /// Move unaligned packed single-precision floating-point values vmovups, + /// Multiply packed double-precision floating-point values + vmulpd, + /// Multiply packed single-precision floating-point values + vmulps, + /// Multiply scalar double-precision floating-point values + vmulsd, + /// Multiply scalar single-precision floating-point values + vmulss, /// Extract word vpextrw, /// Insert word @@ -338,6 +406,14 @@ pub const Inst = struct { vpunpcklqdq, /// Unpack low data vpunpcklwd, + /// Round packed double-precision floating-point values + vroundpd, + /// Round packed single-precision floating-point values + vroundps, + /// Round scalar double-precision floating-point value + vroundsd, + /// Round scalar single-precision floating-point value + vroundss, /// Square root of packed double-precision floating-point value vsqrtpd, /// Square root of packed single-precision floating-point value @@ -346,6 +422,14 @@ pub const Inst = struct { vsqrtsd, /// Square root of scalar single-precision floating-point value vsqrtss, + /// Subtract packed double-precision floating-point values + vsubpd, + /// Subtract packed single-precision floating-point values + vsubps, + /// Subtract scalar double-precision floating-point values + vsubsd, + /// Subtract scalar single-precision floating-point values + vsubss, /// Convert 16-bit floating-point values to single-precision floating-point values vcvtph2ps, @@ -442,6 +526,9 @@ pub const Inst = struct { /// Register, register, register operands. /// Uses `rrr` payload. rrr, + /// Register, register, register, immediate (byte) operands. + /// Uses `rrri` payload. + rrri, /// Register, register, immediate (sign-extended) operands. /// Uses `rri` payload. rri_s, @@ -625,6 +712,12 @@ pub const Inst = struct { r2: Register, r3: Register, }, + rrri: struct { + r1: Register, + r2: Register, + r3: Register, + i: u8, + }, rri: struct { r1: Register, r2: Register, diff --git a/src/arch/x86_64/encodings.zig b/src/arch/x86_64/encodings.zig index 78bda4fc76..c41f0ea4e7 100644 --- a/src/arch/x86_64/encodings.zig +++ b/src/arch/x86_64/encodings.zig @@ -837,6 +837,8 @@ pub const table = [_]Entry{ .{ .xor, .rm, &.{ .r64, .rm64 }, &.{ 0x33 }, 0, .long, .none }, // SSE + .{ .addps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x58 }, 0, .none, .sse }, + .{ .addss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x58 }, 0, .none, .sse }, .{ .andnps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x55 }, 0, .none, .sse }, @@ -848,10 +850,16 @@ pub const table = [_]Entry{ .{ .cvtsi2ss, .rm, &.{ .xmm, .rm32 }, &.{ 0xf3, 0x0f, 0x2a }, 0, .none, .sse }, .{ .cvtsi2ss, .rm, &.{ .xmm, .rm64 }, &.{ 0xf3, 0x0f, 0x2a }, 0, .long, .sse }, + .{ .divps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x5e }, 0, .none, .sse }, + .{ .divss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5e }, 0, .none, .sse }, + .{ .maxps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x5f }, 0, .none, .sse }, + .{ .maxss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5f }, 0, .none, .sse }, + .{ .minps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x5d }, 0, .none, .sse }, + .{ .minss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5d }, 0, .none, .sse }, .{ .movaps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x28 }, 0, .none, .sse }, @@ -863,10 +871,14 @@ pub const table = [_]Entry{ .{ .movups, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x10 }, 0, .none, .sse }, .{ .movups, .mr, &.{ .xmm_m128, .xmm }, &.{ 0x0f, 0x11 }, 0, .none, .sse }, + .{ .mulps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x59 }, 0, .none, .sse }, + .{ .mulss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x59 }, 0, .none, .sse }, .{ .orps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x56 }, 0, .none, .sse }, + .{ .subps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x5c }, 0, .none, .sse }, + .{ .subss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5c }, 0, .none, .sse }, .{ .sqrtps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x51 }, 0, .none, .sse }, @@ -878,6 +890,8 @@ pub const table = [_]Entry{ .{ .xorps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x57 }, 0, .none, .sse }, // SSE2 + .{ .addpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x58 }, 0, .none, .sse2 }, + .{ .addsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x58 }, 0, .none, .sse2 }, .{ .andnpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x55 }, 0, .none, .sse2 }, @@ -893,10 +907,16 @@ pub const table = [_]Entry{ .{ .cvtss2sd, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5a }, 0, .none, .sse2 }, + .{ .divpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x5e }, 0, .none, .sse2 }, + .{ .divsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5e }, 0, .none, .sse2 }, + .{ .maxpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x5f }, 0, .none, .sse2 }, + .{ .maxsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5f }, 0, .none, .sse2 }, + .{ .minpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x5d }, 0, .none, .sse2 }, + .{ .minsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5d }, 0, .none, .sse2 }, .{ .movapd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x28 }, 0, .none, .sse2 }, @@ -914,6 +934,8 @@ pub const table = [_]Entry{ .{ .movupd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x10 }, 0, .none, .sse2 }, .{ .movupd, .mr, &.{ .xmm_m128, .xmm }, &.{ 0x66, 0x0f, 0x11 }, 0, .none, .sse2 }, + .{ .mulpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x59 }, 0, .none, .sse2 }, + .{ .mulsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x59 }, 0, .none, .sse2 }, .{ .orpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x56 }, 0, .none, .sse2 }, @@ -947,6 +969,8 @@ pub const table = [_]Entry{ .{ .sqrtsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x51 }, 0, .none, .sse2 }, + .{ .subpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x5c }, 0, .none, .sse2 }, + .{ .subsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5c }, 0, .none, .sse2 }, .{ .movsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x10 }, 0, .none, .sse2 }, @@ -966,10 +990,25 @@ pub const table = [_]Entry{ // SSE4.1 .{ .pextrw, .mri, &.{ .r32_m16, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x15 }, 0, .none, .sse4_1 }, - .{ .roundss, .rmi, &.{ .xmm, .xmm_m32, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0a }, 0, .none, .sse4_1 }, + .{ .roundpd, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x09 }, 0, .none, .sse4_1 }, + + .{ .roundps, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x08 }, 0, .none, .sse4_1 }, + .{ .roundsd, .rmi, &.{ .xmm, .xmm_m64, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0b }, 0, .none, .sse4_1 }, + .{ .roundss, .rmi, &.{ .xmm, .xmm_m32, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0a }, 0, .none, .sse4_1 }, + // AVX + .{ .vaddpd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x58 }, 0, .vex_128_wig, .avx }, + .{ .vaddpd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x58 }, 0, .vex_256_wig, .avx }, + + .{ .vaddps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x0f, 0x58 }, 0, .vex_128_wig, .avx }, + .{ .vaddps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x0f, 0x58 }, 0, .vex_256_wig, .avx }, + + .{ .vaddsd, .rvm, &.{ .xmm, .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x58 }, 0, .vex_lig_wig, .avx }, + + .{ .vaddss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x58 }, 0, .vex_lig_wig, .avx }, + .{ .vcvtsd2ss, .rvm, &.{ .xmm, .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5a }, 0, .vex_lig_wig, .avx }, .{ .vcvtsi2sd, .rvm, &.{ .xmm, .xmm, .rm32 }, &.{ 0xf2, 0x0f, 0x2a }, 0, .vex_lig_w0, .avx }, @@ -980,6 +1019,36 @@ pub const table = [_]Entry{ .{ .vcvtss2sd, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0xf2, 0x0f, 0x5a }, 0, .vex_lig_wig, .avx }, + .{ .vdivpd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x5e }, 0, .vex_128_wig, .avx }, + .{ .vdivpd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x5e }, 0, .vex_256_wig, .avx }, + + .{ .vdivps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x0f, 0x5e }, 0, .vex_128_wig, .avx }, + .{ .vdivps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x0f, 0x5e }, 0, .vex_256_wig, .avx }, + + .{ .vdivsd, .rvm, &.{ .xmm, .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5e }, 0, .vex_lig_wig, .avx }, + + .{ .vdivss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5e }, 0, .vex_lig_wig, .avx }, + + .{ .vmaxpd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x5f }, 0, .vex_128_wig, .avx }, + .{ .vmaxpd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x5f }, 0, .vex_256_wig, .avx }, + + .{ .vmaxps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x0f, 0x5f }, 0, .vex_128_wig, .avx }, + .{ .vmaxps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x0f, 0x5f }, 0, .vex_256_wig, .avx }, + + .{ .vmaxsd, .rvm, &.{ .xmm, .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5f }, 0, .vex_lig_wig, .avx }, + + .{ .vmaxss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5f }, 0, .vex_lig_wig, .avx }, + + .{ .vminpd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x5d }, 0, .vex_128_wig, .avx }, + .{ .vminpd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x5d }, 0, .vex_256_wig, .avx }, + + .{ .vminps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x0f, 0x5d }, 0, .vex_128_wig, .avx }, + .{ .vminps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x0f, 0x5d }, 0, .vex_256_wig, .avx }, + + .{ .vminsd, .rvm, &.{ .xmm, .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5d }, 0, .vex_lig_wig, .avx }, + + .{ .vminss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5d }, 0, .vex_lig_wig, .avx }, + .{ .vmovapd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x28 }, 0, .vex_128_wig, .avx }, .{ .vmovapd, .mr, &.{ .xmm_m128, .xmm }, &.{ 0x66, 0x0f, 0x29 }, 0, .vex_128_wig, .avx }, .{ .vmovapd, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x28 }, 0, .vex_256_wig, .avx }, @@ -1019,6 +1088,16 @@ pub const table = [_]Entry{ .{ .vmovups, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x0f, 0x10 }, 0, .vex_256_wig, .avx }, .{ .vmovups, .mr, &.{ .ymm_m256, .ymm }, &.{ 0x0f, 0x11 }, 0, .vex_256_wig, .avx }, + .{ .vmulpd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x59 }, 0, .vex_128_wig, .avx }, + .{ .vmulpd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x59 }, 0, .vex_256_wig, .avx }, + + .{ .vmulps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x0f, 0x59 }, 0, .vex_128_wig, .avx }, + .{ .vmulps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x0f, 0x59 }, 0, .vex_256_wig, .avx }, + + .{ .vmulsd, .rvm, &.{ .xmm, .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x59 }, 0, .vex_lig_wig, .avx }, + + .{ .vmulss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x59 }, 0, .vex_lig_wig, .avx }, + .{ .vpextrw, .rmi, &.{ .r32, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x15 }, 0, .vex_128_wig, .avx }, .{ .vpextrw, .mri, &.{ .r32_m16, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x15 }, 0, .vex_128_wig, .avx }, @@ -1041,6 +1120,16 @@ pub const table = [_]Entry{ .{ .vpunpckldq, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x62 }, 0, .vex_128_wig, .avx }, .{ .vpunpcklqdq, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x6c }, 0, .vex_128_wig, .avx }, + .{ .vroundpd, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x09 }, 0, .vex_128_wig, .avx }, + .{ .vroundpd, .rmi, &.{ .ymm, .ymm_m256, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x09 }, 0, .vex_256_wig, .avx }, + + .{ .vroundps, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x08 }, 0, .vex_128_wig, .avx }, + .{ .vroundps, .rmi, &.{ .ymm, .ymm_m256, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x08 }, 0, .vex_256_wig, .avx }, + + .{ .vroundsd, .rvmi, &.{ .xmm, .xmm, .xmm_m64, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0b }, 0, .vex_lig_wig, .avx }, + + .{ .vroundss, .rvmi, &.{ .xmm, .xmm, .xmm_m32, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0a }, 0, .vex_lig_wig, .avx }, + .{ .vsqrtpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x51 }, 0, .vex_128_wig, .avx }, .{ .vsqrtpd, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x51 }, 0, .vex_256_wig, .avx }, @@ -1051,6 +1140,16 @@ pub const table = [_]Entry{ .{ .vsqrtss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x51 }, 0, .vex_lig_wig, .avx }, + .{ .vsubpd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x5c }, 0, .vex_128_wig, .avx }, + .{ .vsubpd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x5c }, 0, .vex_256_wig, .avx }, + + .{ .vsubps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x0f, 0x5c }, 0, .vex_128_wig, .avx }, + .{ .vsubps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x0f, 0x5c }, 0, .vex_256_wig, .avx }, + + .{ .vsubsd, .rvm, &.{ .xmm, .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5c }, 0, .vex_lig_wig, .avx }, + + .{ .vsubss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5c }, 0, .vex_lig_wig, .avx }, + // F16C .{ .vcvtph2ps, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0x13 }, 0, .vex_128_w0, .f16c }, .{ .vcvtph2ps, .rm, &.{ .ymm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x13 }, 0, .vex_256_w0, .f16c }, From f8708e2c4d93eece5b3e131fd2d1b5b210806cd6 Mon Sep 17 00:00:00 2001 From: Jacob Young Date: Sun, 7 May 2023 10:04:56 -0400 Subject: [PATCH 16/20] x86_64: implement `@floor`, `@ceil`, and `@trunc` for float vectors --- src/arch/x86_64/CodeGen.zig | 186 ++++++++++++++++++------------------ test/behavior/floatop.zig | 9 +- 2 files changed, 98 insertions(+), 97 deletions(-) diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index 8c6f14ec3a..3e2d418105 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -1587,9 +1587,9 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { .round, => try self.airUnaryMath(inst), - .floor => try self.airRound(inst, Immediate.u(0b1_0_01)), - .ceil => try self.airRound(inst, Immediate.u(0b1_0_10)), - .trunc_float => try self.airRound(inst, Immediate.u(0b1_0_11)), + .floor => try self.airRound(inst, 0b1_0_01), + .ceil => try self.airRound(inst, 0b1_0_10), + .trunc_float => try self.airRound(inst, 0b1_0_11), .sqrt => try self.airSqrt(inst), .neg, .fabs => try self.airFloatSign(inst), @@ -4509,51 +4509,93 @@ fn airFloatSign(self: *Self, inst: Air.Inst.Index) !void { return self.finishAir(inst, dst_mcv, .{ un_op, .none, .none }); } -fn airRound(self: *Self, inst: Air.Inst.Index, mode: Immediate) !void { +fn airRound(self: *Self, inst: Air.Inst.Index, mode: u4) !void { const un_op = self.air.instructions.items(.data)[inst].un_op; const ty = self.air.typeOf(un_op); - if (!self.hasFeature(.sse4_1)) - return self.fail("TODO implement airRound without sse4_1 feature", .{}); - const src_mcv = try self.resolveInst(un_op); const dst_mcv = if (src_mcv.isRegister() and self.reuseOperand(inst, un_op, 0, src_mcv)) src_mcv else try self.copyToRegisterWithInstTracking(inst, ty, src_mcv); - - const mir_tag: Mir.Inst.Tag = switch (ty.zigTypeTag()) { - .Float => switch (ty.floatBits(self.target.*)) { - 32 => .roundss, - 64 => .roundsd, - else => return self.fail("TODO implement airRound for {}", .{ - ty.fmt(self.bin_file.options.module.?), - }), - }, - else => return self.fail("TODO implement airRound for {}", .{ - ty.fmt(self.bin_file.options.module.?), - }), - }; - assert(dst_mcv.isRegister()); - const abi_size = @intCast(u32, ty.abiSize(self.target.*)); - const dst_reg = registerAlias(dst_mcv.getReg().?, abi_size); - if (src_mcv.isRegister()) - try self.asmRegisterRegisterImmediate( - mir_tag, - dst_reg, - registerAlias(src_mcv.getReg().?, abi_size), - mode, - ) - else - try self.asmRegisterMemoryImmediate( - mir_tag, - dst_reg, - src_mcv.mem(Memory.PtrSize.fromSize(@intCast(u32, ty.abiSize(self.target.*)))), - mode, - ); + const dst_reg = dst_mcv.getReg().?; + const dst_lock = self.register_manager.lockReg(dst_reg); + defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); + try self.genRound(ty, dst_reg, src_mcv, mode); return self.finishAir(inst, dst_mcv, .{ un_op, .none, .none }); } +fn genRound(self: *Self, ty: Type, dst_reg: Register, src_mcv: MCValue, mode: u4) !void { + if (!self.hasFeature(.sse4_1)) + return self.fail("TODO implement genRound without sse4_1 feature", .{}); + + const mir_tag = if (@as(?Mir.Inst.Tag, switch (ty.zigTypeTag()) { + .Float => switch (ty.floatBits(self.target.*)) { + 32 => if (self.hasFeature(.avx)) .vroundss else .roundss, + 64 => if (self.hasFeature(.avx)) .vroundsd else .roundsd, + 16, 80, 128 => null, + else => unreachable, + }, + .Vector => switch (ty.childType().zigTypeTag()) { + .Float => switch (ty.childType().floatBits(self.target.*)) { + 32 => switch (ty.vectorLen()) { + 1 => if (self.hasFeature(.avx)) .vroundss else .roundss, + 2...4 => if (self.hasFeature(.avx)) .vroundps else .roundps, + 5...8 => if (self.hasFeature(.avx)) .vroundps else null, + else => null, + }, + 64 => switch (ty.vectorLen()) { + 1 => if (self.hasFeature(.avx)) .vroundsd else .roundsd, + 2 => if (self.hasFeature(.avx)) .vroundpd else .roundpd, + 3...4 => if (self.hasFeature(.avx)) .vroundpd else null, + else => null, + }, + 16, 80, 128 => null, + else => unreachable, + }, + else => null, + }, + else => unreachable, + })) |tag| tag else return self.fail("TODO implement genRound for {}", .{ + ty.fmt(self.bin_file.options.module.?), + }); + + const abi_size = @intCast(u32, ty.abiSize(self.target.*)); + const dst_alias = registerAlias(dst_reg, abi_size); + switch (mir_tag) { + .vroundss, .vroundsd => if (src_mcv.isMemory()) try self.asmRegisterRegisterMemoryImmediate( + mir_tag, + dst_alias, + dst_alias, + src_mcv.mem(Memory.PtrSize.fromSize(abi_size)), + Immediate.u(mode), + ) else try self.asmRegisterRegisterRegisterImmediate( + mir_tag, + dst_alias, + dst_alias, + registerAlias(if (src_mcv.isRegister()) + src_mcv.getReg().? + else + try self.copyToTmpRegister(ty, src_mcv), abi_size), + Immediate.u(mode), + ), + else => if (src_mcv.isMemory()) try self.asmRegisterMemoryImmediate( + mir_tag, + dst_alias, + src_mcv.mem(Memory.PtrSize.fromSize(abi_size)), + Immediate.u(mode), + ) else try self.asmRegisterRegisterImmediate( + mir_tag, + dst_alias, + registerAlias(if (src_mcv.isRegister()) + src_mcv.getReg().? + else + try self.copyToTmpRegister(ty, src_mcv), abi_size), + Immediate.u(mode), + ), + } +} + fn airSqrt(self: *Self, inst: Air.Inst.Index) !void { const un_op = self.air.instructions.items(.data)[inst].un_op; const ty = self.air.typeOf(un_op); @@ -6188,18 +6230,18 @@ fn genBinOp( })) |tag| tag else return self.fail("TODO implement genBinOp for {s} {}", .{ @tagName(air_tag), lhs_ty.fmt(self.bin_file.options.module.?), }); - const dst_alias = registerAlias(dst_mcv.getReg().?, abi_size); + const dst_reg = registerAlias(dst_mcv.getReg().?, abi_size); if (self.hasFeature(.avx)) { const src1_alias = - if (copied_to_dst) dst_alias else registerAlias(lhs_mcv.getReg().?, abi_size); + if (copied_to_dst) dst_reg else registerAlias(lhs_mcv.getReg().?, abi_size); if (src_mcv.isMemory()) try self.asmRegisterRegisterMemory( mir_tag, - dst_alias, + dst_reg, src1_alias, src_mcv.mem(Memory.PtrSize.fromSize(abi_size)), ) else try self.asmRegisterRegisterRegister( mir_tag, - dst_alias, + dst_reg, src1_alias, registerAlias(if (src_mcv.isRegister()) src_mcv.getReg().? @@ -6210,11 +6252,11 @@ fn genBinOp( assert(copied_to_dst); if (src_mcv.isMemory()) try self.asmRegisterMemory( mir_tag, - dst_alias, + dst_reg, src_mcv.mem(Memory.PtrSize.fromSize(abi_size)), ) else try self.asmRegisterRegister( mir_tag, - dst_alias, + dst_reg, registerAlias(if (src_mcv.isRegister()) src_mcv.getReg().? else @@ -6223,60 +6265,16 @@ fn genBinOp( } switch (air_tag) { .add, .sub, .mul, .div_float, .div_exact => {}, - .div_trunc, .div_floor => if (self.hasFeature(.sse4_1)) { - const round_tag = if (@as(?Mir.Inst.Tag, switch (lhs_ty.zigTypeTag()) { - .Float => switch (lhs_ty.floatBits(self.target.*)) { - 32 => if (self.hasFeature(.avx)) .vroundss else .roundss, - 64 => if (self.hasFeature(.avx)) .vroundsd else .roundsd, - 16, 80, 128 => null, - else => unreachable, - }, - .Vector => switch (lhs_ty.childType().zigTypeTag()) { - .Float => switch (lhs_ty.childType().floatBits(self.target.*)) { - 32 => switch (lhs_ty.vectorLen()) { - 1 => if (self.hasFeature(.avx)) .vroundss else .roundss, - 2...4 => if (self.hasFeature(.avx)) .vroundps else .roundps, - 5...8 => if (self.hasFeature(.avx)) .vroundps else null, - else => null, - }, - 64 => switch (lhs_ty.vectorLen()) { - 1 => if (self.hasFeature(.avx)) .vroundsd else .roundsd, - 2 => if (self.hasFeature(.avx)) .vroundpd else .roundpd, - 3...4 => if (self.hasFeature(.avx)) .vroundpd else null, - else => null, - }, - 16, 80, 128 => null, - else => unreachable, - }, - else => null, - }, - else => unreachable, - })) |tag| tag else return self.fail("TODO implement genBinOp for {s} {}", .{ - @tagName(air_tag), lhs_ty.fmt(self.bin_file.options.module.?), - }); - const round_mode = Immediate.u(switch (air_tag) { + .div_trunc, .div_floor => try self.genRound( + lhs_ty, + dst_reg, + .{ .register = dst_reg }, + switch (air_tag) { .div_trunc => 0b1_0_11, .div_floor => 0b1_0_01, else => unreachable, - }); - switch (round_tag) { - .vroundss, .vroundsd => try self.asmRegisterRegisterRegisterImmediate( - round_tag, - dst_alias, - dst_alias, - dst_alias, - round_mode, - ), - else => try self.asmRegisterRegisterImmediate( - round_tag, - dst_alias, - dst_alias, - round_mode, - ), - } - } else return self.fail("TODO implement genBinOp for {s} {} without sse4_1", .{ - @tagName(air_tag), lhs_ty.fmt(self.bin_file.options.module.?), - }), + }, + ), .max, .min => {}, // TODO: unordered select else => unreachable, } diff --git a/test/behavior/floatop.zig b/test/behavior/floatop.zig index 3f407061f4..3d46c267d3 100644 --- a/test/behavior/floatop.zig +++ b/test/behavior/floatop.zig @@ -617,7 +617,8 @@ fn testFloor() !void { test "@floor with vectors" { if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO + if (builtin.zig_backend == .stage2_x86_64 and + !comptime std.Target.x86.featureSetHas(builtin.cpu.features, .sse4_1)) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO @@ -707,7 +708,8 @@ fn testCeil() !void { test "@ceil with vectors" { if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO + if (builtin.zig_backend == .stage2_x86_64 and + !comptime std.Target.x86.featureSetHas(builtin.cpu.features, .sse4_1)) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO @@ -797,7 +799,8 @@ fn testTrunc() !void { test "@trunc with vectors" { if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO + if (builtin.zig_backend == .stage2_x86_64 and + !comptime std.Target.x86.featureSetHas(builtin.cpu.features, .sse4_1)) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO From 6778da4516e68c271cb50fe9c252ab4084daf16b Mon Sep 17 00:00:00 2001 From: Jacob Young Date: Sun, 7 May 2023 20:42:46 -0400 Subject: [PATCH 17/20] x86_64: implement binary operations for `f16` and `f16` vectors --- src/arch/x86_64/CodeGen.zig | 261 +++++++++++++++++++++++++++++++--- src/arch/x86_64/Encoding.zig | 23 +-- src/arch/x86_64/Lower.zig | 22 +++ src/arch/x86_64/Mir.zig | 44 ++++++ src/arch/x86_64/encodings.zig | 20 +++ test/behavior/floatop.zig | 22 +-- test/behavior/muladd.zig | 8 +- 7 files changed, 354 insertions(+), 46 deletions(-) diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index 3e2d418105..154b909a21 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -4497,14 +4497,15 @@ fn airFloatSign(self: *Self, inst: Air.Inst.Index) !void { const tag = self.air.instructions.items(.tag)[inst]; try self.genBinOpMir(switch (ty_bits) { // No point using an extra prefix byte for *pd which performs the same operation. - 32, 64 => switch (tag) { + 16, 32, 64, 128 => switch (tag) { .neg => .xorps, .fabs => .andnps, else => unreachable, }, - else => return self.fail("TODO implement airFloatSign for {}", .{ + 80 => return self.fail("TODO implement airFloatSign for {}", .{ ty.fmt(self.bin_file.options.module.?), }), + else => unreachable, }, vec_ty, dst_mcv, sign_mcv); return self.finishAir(inst, dst_mcv, .{ un_op, .none, .none }); } @@ -6112,9 +6113,53 @@ fn genBinOp( return dst_mcv; } + const dst_reg = registerAlias(dst_mcv.getReg().?, abi_size); const mir_tag = if (@as(?Mir.Inst.Tag, switch (lhs_ty.zigTypeTag()) { else => unreachable, .Float => switch (lhs_ty.floatBits(self.target.*)) { + 16 => if (self.hasFeature(.f16c)) { + const tmp_reg = (try self.register_manager.allocReg(null, sse)).to128(); + const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); + defer self.register_manager.unlockReg(tmp_lock); + + if (src_mcv.isMemory()) try self.asmRegisterRegisterMemoryImmediate( + .vpinsrw, + dst_reg, + dst_reg, + src_mcv.mem(.word), + Immediate.u(1), + ) else try self.asmRegisterRegisterRegister( + .vpunpcklwd, + dst_reg, + dst_reg, + (if (src_mcv.isRegister()) + src_mcv.getReg().? + else + try self.copyToTmpRegister(rhs_ty, src_mcv)).to128(), + ); + try self.asmRegisterRegister(.vcvtph2ps, dst_reg, dst_reg); + try self.asmRegisterRegister(.vmovshdup, tmp_reg, dst_reg); + try self.asmRegisterRegisterRegister( + switch (air_tag) { + .add => .vaddss, + .sub => .vsubss, + .div_float, .div_trunc, .div_floor, .div_exact => .vdivss, + .max => .vmaxss, + .min => .vmaxss, + else => unreachable, + }, + dst_reg, + dst_reg, + tmp_reg, + ); + try self.asmRegisterRegisterImmediate( + .vcvtps2ph, + dst_reg, + dst_reg, + Immediate.u(0b1_00), + ); + return dst_mcv; + } else null, 32 => switch (air_tag) { .add => if (self.hasFeature(.avx)) .vaddss else .addss, .sub => if (self.hasFeature(.avx)) .vsubss else .subss, @@ -6141,12 +6186,178 @@ fn genBinOp( .min => if (self.hasFeature(.avx)) .vminsd else .minsd, else => unreachable, }, - 16, 80, 128 => null, + 80, 128 => null, else => unreachable, }, .Vector => switch (lhs_ty.childType().zigTypeTag()) { else => null, .Float => switch (lhs_ty.childType().floatBits(self.target.*)) { + 16 => if (self.hasFeature(.f16c)) switch (lhs_ty.vectorLen()) { + 1 => { + const tmp_reg = (try self.register_manager.allocReg(null, sse)).to128(); + const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); + defer self.register_manager.unlockReg(tmp_lock); + + if (src_mcv.isMemory()) try self.asmRegisterRegisterMemoryImmediate( + .vpinsrw, + dst_reg, + dst_reg, + src_mcv.mem(.word), + Immediate.u(1), + ) else try self.asmRegisterRegisterRegister( + .vpunpcklwd, + dst_reg, + dst_reg, + (if (src_mcv.isRegister()) + src_mcv.getReg().? + else + try self.copyToTmpRegister(rhs_ty, src_mcv)).to128(), + ); + try self.asmRegisterRegister(.vcvtph2ps, dst_reg, dst_reg); + try self.asmRegisterRegister(.vmovshdup, tmp_reg, dst_reg); + try self.asmRegisterRegisterRegister( + switch (air_tag) { + .add => .vaddss, + .sub => .vsubss, + .div_float, .div_trunc, .div_floor, .div_exact => .vdivss, + .max => .vmaxss, + .min => .vmaxss, + else => unreachable, + }, + dst_reg, + dst_reg, + tmp_reg, + ); + try self.asmRegisterRegisterImmediate( + .vcvtps2ph, + dst_reg, + dst_reg, + Immediate.u(0b1_00), + ); + return dst_mcv; + }, + 2 => { + const tmp_reg = (try self.register_manager.allocReg(null, sse)).to128(); + const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); + defer self.register_manager.unlockReg(tmp_lock); + + if (src_mcv.isMemory()) try self.asmRegisterMemoryImmediate( + .vpinsrd, + dst_reg, + src_mcv.mem(.dword), + Immediate.u(1), + ) else try self.asmRegisterRegisterRegister( + .vunpcklps, + dst_reg, + dst_reg, + (if (src_mcv.isRegister()) + src_mcv.getReg().? + else + try self.copyToTmpRegister(rhs_ty, src_mcv)).to128(), + ); + try self.asmRegisterRegister(.vcvtph2ps, dst_reg, dst_reg); + try self.asmRegisterRegisterRegister(.vmovhlps, tmp_reg, dst_reg, dst_reg); + try self.asmRegisterRegisterRegister( + switch (air_tag) { + .add => .vaddps, + .sub => .vsubps, + .div_float, .div_trunc, .div_floor, .div_exact => .vdivps, + .max => .vmaxps, + .min => .vmaxps, + else => unreachable, + }, + dst_reg, + dst_reg, + tmp_reg, + ); + try self.asmRegisterRegisterImmediate( + .vcvtps2ph, + dst_reg, + dst_reg, + Immediate.u(0b1_00), + ); + return dst_mcv; + }, + 3...4 => { + const tmp_reg = (try self.register_manager.allocReg(null, sse)).to128(); + const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); + defer self.register_manager.unlockReg(tmp_lock); + + try self.asmRegisterRegister(.vcvtph2ps, dst_reg, dst_reg); + if (src_mcv.isMemory()) try self.asmRegisterMemory( + .vcvtph2ps, + tmp_reg, + src_mcv.mem(.qword), + ) else try self.asmRegisterRegister( + .vcvtph2ps, + tmp_reg, + (if (src_mcv.isRegister()) + src_mcv.getReg().? + else + try self.copyToTmpRegister(rhs_ty, src_mcv)).to128(), + ); + try self.asmRegisterRegisterRegister( + switch (air_tag) { + .add => .vaddps, + .sub => .vsubps, + .div_float, .div_trunc, .div_floor, .div_exact => .vdivps, + .max => .vmaxps, + .min => .vmaxps, + else => unreachable, + }, + dst_reg, + dst_reg, + tmp_reg, + ); + try self.asmRegisterRegisterImmediate( + .vcvtps2ph, + dst_reg, + dst_reg, + Immediate.u(0b1_00), + ); + return dst_mcv; + }, + 5...8 => { + const tmp_reg = (try self.register_manager.allocReg(null, sse)).to256(); + const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); + defer self.register_manager.unlockReg(tmp_lock); + + try self.asmRegisterRegister(.vcvtph2ps, dst_reg.to256(), dst_reg); + if (src_mcv.isMemory()) try self.asmRegisterMemory( + .vcvtph2ps, + tmp_reg, + src_mcv.mem(.xword), + ) else try self.asmRegisterRegister( + .vcvtph2ps, + tmp_reg, + (if (src_mcv.isRegister()) + src_mcv.getReg().? + else + try self.copyToTmpRegister(rhs_ty, src_mcv)).to128(), + ); + try self.asmRegisterRegisterRegister( + switch (air_tag) { + .add => .vaddps, + .sub => .vsubps, + .div_float, .div_trunc, .div_floor, .div_exact => .vdivps, + .max => .vmaxps, + .min => .vmaxps, + else => unreachable, + }, + dst_reg.to256(), + dst_reg.to256(), + tmp_reg, + ); + try self.asmRegisterRegisterImmediate( + .vcvtps2ph, + dst_reg, + dst_reg.to256(), + Immediate.u(0b1_00), + ); + return dst_mcv; + }, + else => null, + } else null, 32 => switch (lhs_ty.vectorLen()) { 1 => switch (air_tag) { .add => if (self.hasFeature(.avx)) .vaddss else .addss, @@ -6223,14 +6434,13 @@ fn genBinOp( } else null, else => null, }, - 16, 80, 128 => null, + 80, 128 => null, else => unreachable, }, }, })) |tag| tag else return self.fail("TODO implement genBinOp for {s} {}", .{ @tagName(air_tag), lhs_ty.fmt(self.bin_file.options.module.?), }); - const dst_reg = registerAlias(dst_mcv.getReg().?, abi_size); if (self.hasFeature(.avx)) { const src1_alias = if (copied_to_dst) dst_reg else registerAlias(lhs_mcv.getReg().?, abi_size); @@ -7139,21 +7349,21 @@ fn airCmp(self: *Self, inst: Air.Inst.Index, op: math.CompareOperator) !void { const tmp2_lock = self.register_manager.lockRegAssumeUnused(tmp2_reg); defer self.register_manager.unlockReg(tmp2_lock); - if (src_mcv.isRegister()) - try self.asmRegisterRegisterRegister( - .vpunpcklwd, - tmp1_reg, - dst_reg.to128(), - src_mcv.getReg().?.to128(), - ) - else - try self.asmRegisterRegisterMemoryImmediate( - .vpinsrw, - tmp1_reg, - dst_reg.to128(), - src_mcv.mem(.word), - Immediate.u(1), - ); + if (src_mcv.isMemory()) try self.asmRegisterRegisterMemoryImmediate( + .vpinsrw, + tmp1_reg, + dst_reg.to128(), + src_mcv.mem(.word), + Immediate.u(1), + ) else try self.asmRegisterRegisterRegister( + .vpunpcklwd, + tmp1_reg, + dst_reg.to128(), + (if (src_mcv.isRegister()) + src_mcv.getReg().? + else + try self.copyToTmpRegister(ty, src_mcv)).to128(), + ); try self.asmRegisterRegister(.vcvtph2ps, tmp1_reg, tmp1_reg); try self.asmRegisterRegister(.vmovshdup, tmp2_reg, tmp1_reg); try self.genBinOpMir(.ucomiss, ty, tmp1_mcv, tmp2_mcv); @@ -8139,7 +8349,16 @@ fn movMirTag(self: *Self, ty: Type, aligned: bool) !Mir.Inst.Tag { }, .Vector => switch (ty.childType().zigTypeTag()) { .Float => switch (ty.childType().floatBits(self.target.*)) { - 16 => unreachable, // needs special handling + 16 => switch (ty.vectorLen()) { + 1 => unreachable, // needs special handling + 2 => return if (self.hasFeature(.avx)) .vmovss else .movss, + 3...4 => return if (self.hasFeature(.avx)) .vmovsd else .movsd, + 5...8 => return if (self.hasFeature(.avx)) + if (aligned) .vmovaps else .vmovups + else if (aligned) .movaps else .movups, + 9...16 => if (self.hasFeature(.avx)) return if (aligned) .vmovaps else .vmovups, + else => {}, + }, 32 => switch (ty.vectorLen()) { 1 => return if (self.hasFeature(.avx)) .vmovss else .movss, 2...4 => return if (self.hasFeature(.avx)) diff --git a/src/arch/x86_64/Encoding.zig b/src/arch/x86_64/Encoding.zig index b8ccc9efba..3235b29358 100644 --- a/src/arch/x86_64/Encoding.zig +++ b/src/arch/x86_64/Encoding.zig @@ -270,7 +270,7 @@ pub const Mnemonic = enum { divps, divss, maxps, maxss, minps, minss, - movaps, movss, movups, + movaps, movhlps, movss, movups, mulps, mulss, orps, pextrw, pinsrw, @@ -303,6 +303,8 @@ pub const Mnemonic = enum { // SSE3 movddup, movshdup, movsldup, // SSE4.1 + pextrb, pextrd, pextrq, + pinsrb, pinsrd, pinsrq, roundpd, roundps, roundsd, roundss, // AVX vaddpd, vaddps, vaddsd, vaddss, @@ -311,13 +313,14 @@ pub const Mnemonic = enum { vmaxpd, vmaxps, vmaxsd, vmaxss, vminpd, vminps, vminsd, vminss, vmovapd, vmovaps, - vmovddup, + vmovddup, vmovhlps, vmovsd, vmovshdup, vmovsldup, vmovss, vmovupd, vmovups, vmulpd, vmulps, vmulsd, vmulss, - vpextrw, vpinsrw, + vpextrb, vpextrd, vpextrq, vpextrw, + vpinsrb, vpinsrd, vpinsrq, vpinsrw, vpshufhw, vpshuflw, vpsrld, vpsrlq, vpsrlw, vpunpckhbw, vpunpckhdq, vpunpckhqdq, vpunpckhwd, @@ -359,7 +362,7 @@ pub const Op = enum { cl, r8, r16, r32, r64, rm8, rm16, rm32, rm64, - r32_m16, r64_m16, + r32_m8, r32_m16, r64_m16, m8, m16, m32, m64, m80, m128, m256, rel8, rel16, rel32, m, @@ -444,7 +447,7 @@ pub const Op = enum { pub fn immBitSize(op: Op) u64 { return switch (op) { .none, .o16, .o32, .o64, .moffs, .m, .sreg => unreachable, - .al, .cl, .r8, .rm8 => unreachable, + .al, .cl, .r8, .rm8, .r32_m8 => unreachable, .ax, .r16, .rm16 => unreachable, .eax, .r32, .rm32, .r32_m16 => unreachable, .rax, .r64, .rm64, .r64_m16 => unreachable, @@ -467,7 +470,7 @@ pub const Op = enum { .m8, .m16, .m32, .m64, .m80, .m128, .m256 => unreachable, .al, .cl, .r8, .rm8 => 8, .ax, .r16, .rm16 => 16, - .eax, .r32, .rm32, .r32_m16 => 32, + .eax, .r32, .rm32, .r32_m8, .r32_m16 => 32, .rax, .r64, .rm64, .r64_m16 => 64, .xmm, .xmm_m32, .xmm_m64, .xmm_m128 => 128, .ymm, .ymm_m256 => 256, @@ -480,7 +483,7 @@ pub const Op = enum { .unity, .imm8, .imm8s, .imm16, .imm16s, .imm32, .imm32s, .imm64 => unreachable, .rel8, .rel16, .rel32 => unreachable, .al, .cl, .r8, .ax, .r16, .eax, .r32, .rax, .r64, .xmm, .ymm => unreachable, - .m8, .rm8 => 8, + .m8, .rm8, .r32_m8 => 8, .m16, .rm16, .r32_m16, .r64_m16 => 16, .m32, .rm32, .xmm_m32 => 32, .m64, .rm64, .xmm_m64 => 64, @@ -509,7 +512,7 @@ pub const Op = enum { .al, .ax, .eax, .rax, .r8, .r16, .r32, .r64, .rm8, .rm16, .rm32, .rm64, - .r32_m16, .r64_m16, + .r32_m8, .r32_m16, .r64_m16, .xmm, .xmm_m32, .xmm_m64, .xmm_m128, .ymm, .ymm_m256, => true, @@ -535,7 +538,7 @@ pub const Op = enum { // zig fmt: off return switch (op) { .rm8, .rm16, .rm32, .rm64, - .r32_m16, .r64_m16, + .r32_m8, .r32_m16, .r64_m16, .m8, .m16, .m32, .m64, .m80, .m128, .m256, .m, .xmm_m32, .xmm_m64, .xmm_m128, @@ -559,7 +562,7 @@ pub const Op = enum { .al, .ax, .eax, .rax, .cl => .general_purpose, .r8, .r16, .r32, .r64 => .general_purpose, .rm8, .rm16, .rm32, .rm64 => .general_purpose, - .r32_m16, .r64_m16 => .general_purpose, + .r32_m8, .r32_m16, .r64_m16 => .general_purpose, .sreg => .segment, .xmm, .xmm_m32, .xmm_m64, .xmm_m128 => .floating_point, .ymm, .ymm_m256 => .floating_point, diff --git a/src/arch/x86_64/Lower.zig b/src/arch/x86_64/Lower.zig index 2cfa25ac84..5c079f4768 100644 --- a/src/arch/x86_64/Lower.zig +++ b/src/arch/x86_64/Lower.zig @@ -137,6 +137,7 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct { .minps, .minss, .movaps, + .movhlps, .movss, .movups, .mulps, @@ -149,6 +150,8 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct { .subps, .subss, .ucomiss, + .unpckhps, + .unpcklps, .xorps, .addpd, @@ -187,12 +190,20 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct { .subpd, .subsd, .ucomisd, + .unpckhpd, + .unpcklpd, .xorpd, .movddup, .movshdup, .movsldup, + .pextrb, + .pextrd, + .pextrq, + .pinsrb, + .pinsrd, + .pinsrq, .roundpd, .roundps, .roundsd, @@ -221,6 +232,7 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct { .vmovapd, .vmovaps, .vmovddup, + .vmovhlps, .vmovsd, .vmovshdup, .vmovsldup, @@ -231,7 +243,13 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct { .vmulps, .vmulsd, .vmulss, + .vpextrb, + .vpextrd, + .vpextrq, .vpextrw, + .vpinsrb, + .vpinsrd, + .vpinsrq, .vpinsrw, .vpshufhw, .vpshuflw, @@ -258,6 +276,10 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct { .vsubps, .vsubsd, .vsubss, + .vunpckhpd, + .vunpckhps, + .vunpcklpd, + .vunpcklps, .vcvtph2ps, .vcvtps2ph, diff --git a/src/arch/x86_64/Mir.zig b/src/arch/x86_64/Mir.zig index c0450406cf..442cfabebb 100644 --- a/src/arch/x86_64/Mir.zig +++ b/src/arch/x86_64/Mir.zig @@ -192,6 +192,8 @@ pub const Inst = struct { minss, /// Move aligned packed single-precision floating-point values movaps, + /// Move packed single-precision floating-point values high to low + movhlps, /// Move scalar single-precision floating-point value movss, /// Move unaligned packed single-precision floating-point values @@ -216,6 +218,10 @@ pub const Inst = struct { subss, /// Unordered compare scalar single-precision floating-point values ucomiss, + /// Unpack and interleave high packed single-precision floating-point values + unpckhps, + /// Unpack and interleave low packed single-precision floating-point values + unpcklps, /// Bitwise logical xor of packed single precision floating-point values xorps, @@ -291,6 +297,10 @@ pub const Inst = struct { subsd, /// Unordered compare scalar double-precision floating-point values ucomisd, + /// Unpack and interleave high packed double-precision floating-point values + unpckhpd, + /// Unpack and interleave low packed double-precision floating-point values + unpcklpd, /// Bitwise logical xor of packed double precision floating-point values xorpd, @@ -301,6 +311,18 @@ pub const Inst = struct { /// Replicate single floating-point values movsldup, + /// Extract Byte + pextrb, + /// Extract Doubleword + pextrd, + /// Extract Quadword + pextrq, + /// Insert Byte + pinsrb, + /// Insert Doubleword + pinsrd, + /// Insert Quadword + pinsrq, /// Round packed double-precision floating-point values roundpd, /// Round packed single-precision floating-point values @@ -354,6 +376,8 @@ pub const Inst = struct { vmovapd, /// Move aligned packed single-precision floating-point values vmovaps, + /// Move packed single-precision floating-point values high to low + vmovhlps, /// Replicate double floating-point values vmovddup, /// Move or merge scalar double-precision floating-point value @@ -376,8 +400,20 @@ pub const Inst = struct { vmulsd, /// Multiply scalar single-precision floating-point values vmulss, + /// Extract Byte + vpextrb, + /// Extract Doubleword + vpextrd, + /// Extract Quadword + vpextrq, /// Extract word vpextrw, + /// Insert Byte + vpinsrb, + /// Insert Doubleword + vpinsrd, + /// Insert Quadword + vpinsrq, /// Insert word vpinsrw, /// Shuffle packed high words @@ -430,6 +466,14 @@ pub const Inst = struct { vsubsd, /// Subtract scalar single-precision floating-point values vsubss, + /// Unpack and interleave high packed double-precision floating-point values + vunpckhpd, + /// Unpack and interleave high packed single-precision floating-point values + vunpckhps, + /// Unpack and interleave low packed double-precision floating-point values + vunpcklpd, + /// Unpack and interleave low packed single-precision floating-point values + vunpcklps, /// Convert 16-bit floating-point values to single-precision floating-point values vcvtph2ps, diff --git a/src/arch/x86_64/encodings.zig b/src/arch/x86_64/encodings.zig index c41f0ea4e7..2b9d530c1e 100644 --- a/src/arch/x86_64/encodings.zig +++ b/src/arch/x86_64/encodings.zig @@ -865,6 +865,8 @@ pub const table = [_]Entry{ .{ .movaps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x28 }, 0, .none, .sse }, .{ .movaps, .mr, &.{ .xmm_m128, .xmm }, &.{ 0x0f, 0x29 }, 0, .none, .sse }, + .{ .movhlps, .rm, &.{ .xmm, .xmm }, &.{ 0x0f, 0x12 }, 0, .none, .sse }, + .{ .movss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x10 }, 0, .none, .sse }, .{ .movss, .mr, &.{ .xmm_m32, .xmm }, &.{ 0xf3, 0x0f, 0x11 }, 0, .none, .sse }, @@ -988,8 +990,16 @@ pub const table = [_]Entry{ .{ .movsldup, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0xf3, 0x0f, 0x12 }, 0, .none, .sse3 }, // SSE4.1 + .{ .pextrb, .mri, &.{ .r32_m8, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x14 }, 0, .none, .sse4_1 }, + .{ .pextrd, .mri, &.{ .rm32, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x16 }, 0, .none, .sse4_1 }, + .{ .pextrq, .mri, &.{ .rm64, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x16 }, 0, .long, .sse4_1 }, + .{ .pextrw, .mri, &.{ .r32_m16, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x15 }, 0, .none, .sse4_1 }, + .{ .pinsrb, .rmi, &.{ .xmm, .r32_m8, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x20 }, 0, .none, .sse4_1 }, + .{ .pinsrd, .rmi, &.{ .xmm, .rm32, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x22 }, 0, .none, .sse4_1 }, + .{ .pinsrq, .rmi, &.{ .xmm, .rm64, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x22 }, 0, .long, .sse4_1 }, + .{ .roundpd, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x09 }, 0, .none, .sse4_1 }, .{ .roundps, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x08 }, 0, .none, .sse4_1 }, @@ -1062,6 +1072,8 @@ pub const table = [_]Entry{ .{ .vmovddup, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x12 }, 0, .vex_128_wig, .avx }, .{ .vmovddup, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0xf2, 0x0f, 0x12 }, 0, .vex_256_wig, .avx }, + .{ .vmovhlps, .rvm, &.{ .xmm, .xmm, .xmm }, &.{ 0x0f, 0x12 }, 0, .vex_128_wig, .avx }, + .{ .vmovsd, .rvm, &.{ .xmm, .xmm, .xmm }, &.{ 0xf2, 0x0f, 0x10 }, 0, .vex_lig_wig, .avx }, .{ .vmovsd, .rm, &.{ .xmm, .m64 }, &.{ 0xf2, 0x0f, 0x10 }, 0, .vex_lig_wig, .avx }, .{ .vmovsd, .mvr, &.{ .xmm, .xmm, .xmm }, &.{ 0xf2, 0x0f, 0x11 }, 0, .vex_lig_wig, .avx }, @@ -1098,9 +1110,17 @@ pub const table = [_]Entry{ .{ .vmulss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x59 }, 0, .vex_lig_wig, .avx }, + .{ .vpextrb, .mri, &.{ .r32_m8, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x14 }, 0, .vex_128_w0, .avx }, + .{ .vpextrd, .mri, &.{ .rm32, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x16 }, 0, .vex_128_w0, .avx }, + .{ .vpextrq, .mri, &.{ .rm64, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x16 }, 0, .vex_128_w1, .avx }, + .{ .vpextrw, .rmi, &.{ .r32, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x15 }, 0, .vex_128_wig, .avx }, .{ .vpextrw, .mri, &.{ .r32_m16, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x15 }, 0, .vex_128_wig, .avx }, + .{ .vpinsrb, .rmi, &.{ .xmm, .r32_m8, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x20 }, 0, .vex_128_w0, .avx }, + .{ .vpinsrd, .rmi, &.{ .xmm, .rm32, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x22 }, 0, .vex_128_w0, .avx }, + .{ .vpinsrq, .rmi, &.{ .xmm, .rm64, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x22 }, 0, .vex_128_w1, .avx }, + .{ .vpinsrw, .rvmi, &.{ .xmm, .xmm, .r32_m16, .imm8 }, &.{ 0x66, 0x0f, 0xc4 }, 0, .vex_128_wig, .avx }, .{ .vpsrlw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd1 }, 0, .vex_128_wig, .avx }, diff --git a/test/behavior/floatop.zig b/test/behavior/floatop.zig index 3d46c267d3..242c8dabe5 100644 --- a/test/behavior/floatop.zig +++ b/test/behavior/floatop.zig @@ -8,6 +8,8 @@ const has_f80_rt = switch (builtin.cpu.arch) { .x86_64, .x86 => true, else => false, }; +const no_x86_64_hardware_f16_support = builtin.zig_backend == .stage2_x86_64 and + !std.Target.x86.featureSetHas(builtin.cpu.features, .f16c); const epsilon_16 = 0.001; const epsilon = 0.000001; @@ -52,8 +54,7 @@ fn testFloatComparisons() !void { } test "different sized float comparisons" { - if (builtin.zig_backend == .stage2_x86_64 and - !comptime std.Target.x86.featureSetHas(builtin.cpu.features, .f16c)) return error.SkipZigTest; // TODO + if (no_x86_64_hardware_f16_support) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO @@ -152,7 +153,7 @@ fn testSqrtWithVectors() !void { } test "more @sqrt f16 tests" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO + if (no_x86_64_hardware_f16_support) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO @@ -202,7 +203,7 @@ fn testSqrtLegacy(comptime T: type, x: T) !void { } test "@sin" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO + if (no_x86_64_hardware_f16_support) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO @@ -241,7 +242,7 @@ fn testSinWithVectors() !void { } test "@cos" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO + if (no_x86_64_hardware_f16_support) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO @@ -280,7 +281,7 @@ fn testCosWithVectors() !void { } test "@exp" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO + if (no_x86_64_hardware_f16_support) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO @@ -318,7 +319,7 @@ fn testExpWithVectors() !void { } test "@exp2" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO + if (no_x86_64_hardware_f16_support) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO @@ -403,7 +404,7 @@ test "@log with @vectors" { } test "@log2" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO + if (no_x86_64_hardware_f16_support) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO @@ -445,7 +446,7 @@ fn testLog2WithVectors() !void { } test "@log10" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO + if (no_x86_64_hardware_f16_support) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO @@ -881,7 +882,7 @@ fn testTruncLegacy(comptime T: type, x: T) !void { } test "negation f16" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO + if (no_x86_64_hardware_f16_support) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO @@ -1040,7 +1041,6 @@ test "comptime_float zero divided by zero produces zero" { } test "nan negation f16" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO diff --git a/test/behavior/muladd.zig b/test/behavior/muladd.zig index bfb94de270..199f117e7b 100644 --- a/test/behavior/muladd.zig +++ b/test/behavior/muladd.zig @@ -2,11 +2,11 @@ const std = @import("std"); const builtin = @import("builtin"); const expect = std.testing.expect; -const stage2_x86_64_without_hardware_fma_support = builtin.zig_backend == .stage2_x86_64 and +const no_x86_64_hardware_fma_support = builtin.zig_backend == .stage2_x86_64 and !std.Target.x86.featureSetHas(builtin.cpu.features, .fma); test "@mulAdd" { - if (stage2_x86_64_without_hardware_fma_support) return error.SkipZigTest; // TODO + if (no_x86_64_hardware_fma_support) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO @@ -120,7 +120,7 @@ fn vector32() !void { test "vector f32" { if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO - if (stage2_x86_64_without_hardware_fma_support) return error.SkipZigTest; // TODO + if (no_x86_64_hardware_fma_support) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO @@ -143,7 +143,7 @@ fn vector64() !void { test "vector f64" { if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO - if (stage2_x86_64_without_hardware_fma_support) return error.SkipZigTest; // TODO + if (no_x86_64_hardware_fma_support) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO From 6c14eb2863c7c00f809c5e447ceb8186b55f2eef Mon Sep 17 00:00:00 2001 From: Jacob Young Date: Mon, 8 May 2023 06:50:18 -0400 Subject: [PATCH 18/20] x86_64: optimize mir tag usage This moves all pseudo-instructions to a single `Mir.Inst.Tag` tag and prepares to start coalescing similar mnemonics. 239 tags left in use. --- src/arch/x86_64/CodeGen.zig | 427 +++++++++++++-------- src/arch/x86_64/Emit.zig | 65 ++-- src/arch/x86_64/Encoding.zig | 2 +- src/arch/x86_64/Lower.zig | 717 ++++++++++------------------------- src/arch/x86_64/Mir.zig | 517 ++++++++++++++++--------- src/arch/x86_64/bits.zig | 3 - 6 files changed, 853 insertions(+), 878 deletions(-) diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index 154b909a21..3ac05c95ac 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -973,14 +973,14 @@ fn addInst(self: *Self, inst: Mir.Inst) error{OutOfMemory}!Mir.Inst.Index { try self.mir_instructions.ensureUnusedCapacity(gpa, 1); const result_index = @intCast(Mir.Inst.Index, self.mir_instructions.len); self.mir_instructions.appendAssumeCapacity(inst); - switch (inst.tag) { - else => wip_mir_log.debug("{}", .{self.fmtWipMir(result_index)}), - .dbg_line, - .dbg_prologue_end, - .dbg_epilogue_begin, - .dead, - => {}, - } + if (inst.tag != .pseudo or switch (inst.ops) { + else => true, + .pseudo_dbg_prologue_end_none, + .pseudo_dbg_line_line_column, + .pseudo_dbg_epilogue_begin_none, + .pseudo_dead_none, + => false, + }) wip_mir_log.debug("{}", .{self.fmtWipMir(result_index)}); return result_index; } @@ -1003,98 +1003,166 @@ fn addExtraAssumeCapacity(self: *Self, extra: anytype) u32 { return result; } +/// A `cc` of `.z_and_np` clobbers `reg2`! +fn asmCmovccRegisterRegister(self: *Self, reg1: Register, reg2: Register, cc: bits.Condition) !void { + _ = try self.addInst(.{ + .tag = switch (cc) { + else => .cmov, + .z_and_np, .nz_or_p => .pseudo, + }, + .ops = switch (cc) { + else => .rr, + .z_and_np => .pseudo_cmov_z_and_np_rr, + .nz_or_p => .pseudo_cmov_nz_or_p_rr, + }, + .data = .{ .rr = .{ + .fixes = switch (cc) { + else => Mir.Inst.Fixes.fromCondition(cc), + .z_and_np, .nz_or_p => ._, + }, + .r1 = reg1, + .r2 = reg2, + } }, + }); +} + +/// A `cc` of `.z_and_np` is not supported by this encoding! +fn asmCmovccRegisterMemory(self: *Self, reg: Register, m: Memory, cc: bits.Condition) !void { + _ = try self.addInst(.{ + .tag = switch (cc) { + else => .cmov, + .z_and_np => unreachable, + .nz_or_p => .pseudo, + }, + .ops = switch (cc) { + else => switch (m) { + .sib => .rm_sib, + .rip => .rm_rip, + else => unreachable, + }, + .z_and_np => unreachable, + .nz_or_p => switch (m) { + .sib => .pseudo_cmov_nz_or_p_rm_sib, + .rip => .pseudo_cmov_nz_or_p_rm_rip, + else => unreachable, + }, + }, + .data = .{ .rx = .{ + .fixes = switch (cc) { + else => Mir.Inst.Fixes.fromCondition(cc), + .z_and_np => unreachable, + .nz_or_p => ._, + }, + .r1 = reg, + .payload = switch (m) { + .sib => try self.addExtra(Mir.MemorySib.encode(m)), + .rip => try self.addExtra(Mir.MemoryRip.encode(m)), + else => unreachable, + }, + } }, + }); +} + fn asmSetccRegister(self: *Self, reg: Register, cc: bits.Condition) !void { _ = try self.addInst(.{ - .tag = .setcc, - .ops = .r_cc, - .data = .{ .r_cc = .{ - .r = reg, - .scratch = if (cc == .z_and_np or cc == .nz_or_p) - (try self.register_manager.allocReg(null, gp)).to8() - else - .none, - .cc = cc, - } }, + .tag = switch (cc) { + else => .set, + .z_and_np, .nz_or_p => .pseudo, + }, + .ops = switch (cc) { + else => .r, + .z_and_np => .pseudo_set_z_and_np_r, + .nz_or_p => .pseudo_set_nz_or_p_r, + }, + .data = switch (cc) { + else => .{ .r = .{ + .fixes = Mir.Inst.Fixes.fromCondition(cc), + .r1 = reg, + } }, + .z_and_np, .nz_or_p => .{ .r_scratch = .{ + .r1 = reg, + .scratch_reg = (try self.register_manager.allocReg(null, gp)).to8(), + } }, + }, }); } fn asmSetccMemory(self: *Self, m: Memory, cc: bits.Condition) !void { + const payload = switch (m) { + .sib => try self.addExtra(Mir.MemorySib.encode(m)), + .rip => try self.addExtra(Mir.MemoryRip.encode(m)), + else => unreachable, + }; _ = try self.addInst(.{ - .tag = .setcc, - .ops = switch (m) { - .sib => .m_sib_cc, - .rip => .m_rip_cc, - else => unreachable, + .tag = switch (cc) { + else => .set, + .z_and_np, .nz_or_p => .pseudo, }, - .data = .{ .x_cc = .{ - .scratch = if (cc == .z_and_np or cc == .nz_or_p) - (try self.register_manager.allocReg(null, gp)).to8() - else - .none, - .cc = cc, - .payload = switch (m) { - .sib => try self.addExtra(Mir.MemorySib.encode(m)), - .rip => try self.addExtra(Mir.MemoryRip.encode(m)), + .ops = switch (cc) { + else => switch (m) { + .sib => .m_sib, + .rip => .m_rip, else => unreachable, }, - } }, - }); -} - -/// A `cc` of `.z_and_np` clobbers `reg2`! -fn asmCmovccRegisterRegister(self: *Self, reg1: Register, reg2: Register, cc: bits.Condition) !void { - _ = try self.addInst(.{ - .tag = .cmovcc, - .ops = .rr_cc, - .data = .{ .rr_cc = .{ - .r1 = reg1, - .r2 = reg2, - .cc = cc, - } }, - }); -} - -fn asmCmovccRegisterMemory(self: *Self, reg: Register, m: Memory, cc: bits.Condition) !void { - assert(cc != .z_and_np); // not supported - _ = try self.addInst(.{ - .tag = .cmovcc, - .ops = switch (m) { - .sib => .rm_sib_cc, - .rip => .rm_rip_cc, - else => unreachable, - }, - .data = .{ .rx_cc = .{ - .r = reg, - .cc = cc, - .payload = switch (m) { - .sib => try self.addExtra(Mir.MemorySib.encode(m)), - .rip => try self.addExtra(Mir.MemoryRip.encode(m)), + .z_and_np => switch (m) { + .sib => .pseudo_set_z_and_np_m_sib, + .rip => .pseudo_set_z_and_np_m_rip, else => unreachable, }, - } }, + .nz_or_p => switch (m) { + .sib => .pseudo_set_nz_or_p_m_sib, + .rip => .pseudo_set_nz_or_p_m_rip, + else => unreachable, + }, + }, + .data = switch (cc) { + else => .{ .x = .{ + .fixes = Mir.Inst.Fixes.fromCondition(cc), + .payload = payload, + } }, + .z_and_np, .nz_or_p => .{ .x_scratch = .{ + .scratch_reg = (try self.register_manager.allocReg(null, gp)).to8(), + .payload = payload, + } }, + }, }); } fn asmJmpReloc(self: *Self, target: Mir.Inst.Index) !Mir.Inst.Index { return self.addInst(.{ - .tag = .jmp_reloc, - .ops = undefined, - .data = .{ .inst = target }, + .tag = .jmp, + .ops = .inst, + .data = .{ .inst = .{ + .inst = target, + } }, }); } fn asmJccReloc(self: *Self, target: Mir.Inst.Index, cc: bits.Condition) !Mir.Inst.Index { return self.addInst(.{ - .tag = .jcc, - .ops = .inst_cc, - .data = .{ .inst_cc = .{ .inst = target, .cc = cc } }, + .tag = switch (cc) { + else => .j, + .z_and_np, .nz_or_p => .pseudo, + }, + .ops = switch (cc) { + else => .inst, + .z_and_np => .pseudo_j_z_and_np_inst, + .nz_or_p => .pseudo_j_nz_or_p_inst, + }, + .data = .{ .inst = .{ + .fixes = switch (cc) { + else => Mir.Inst.Fixes.fromCondition(cc), + .z_and_np, .nz_or_p => ._, + }, + .inst = target, + } }, }); } fn asmPlaceholder(self: *Self) !Mir.Inst.Index { return self.addInst(.{ - .tag = .dead, - .ops = undefined, + .tag = .pseudo, + .ops = .pseudo_dead_none, .data = undefined, }); } @@ -1107,11 +1175,19 @@ fn asmOpOnly(self: *Self, tag: Mir.Inst.Tag) !void { }); } +fn asmPseudo(self: *Self, ops: Mir.Inst.Ops) !void { + _ = try self.addInst(.{ + .tag = .pseudo, + .ops = ops, + .data = undefined, + }); +} + fn asmRegister(self: *Self, tag: Mir.Inst.Tag, reg: Register) !void { _ = try self.addInst(.{ .tag = tag, .ops = .r, - .data = .{ .r = reg }, + .data = .{ .r = .{ .r1 = reg } }, }); } @@ -1122,9 +1198,11 @@ fn asmImmediate(self: *Self, tag: Mir.Inst.Tag, imm: Immediate) !void { .signed => .i_s, .unsigned => .i_u, }, - .data = .{ .i = switch (imm) { - .signed => |s| @bitCast(u32, s), - .unsigned => |u| @intCast(u32, u), + .data = .{ .i = .{ + .i = switch (imm) { + .signed => |s| @bitCast(u32, s), + .unsigned => |u| @intCast(u32, u), + }, } }, }); } @@ -1147,14 +1225,14 @@ fn asmRegisterImmediate(self: *Self, tag: Mir.Inst.Tag, reg: Register, imm: Imme .ops = ops, .data = switch (ops) { .ri_s, .ri_u => .{ .ri = .{ - .r = reg, + .r1 = reg, .i = switch (imm) { .signed => |s| @bitCast(u32, s), .unsigned => |u| @intCast(u32, u), }, } }, .ri64 => .{ .rx = .{ - .r = reg, + .r1 = reg, .payload = try self.addExtra(Mir.Imm64.encode(imm.unsigned)), } }, else => unreachable, @@ -1249,10 +1327,12 @@ fn asmMemory(self: *Self, tag: Mir.Inst.Tag, m: Memory) !void { .rip => .m_rip, else => unreachable, }, - .data = .{ .payload = switch (m) { - .sib => try self.addExtra(Mir.MemorySib.encode(m)), - .rip => try self.addExtra(Mir.MemoryRip.encode(m)), - else => unreachable, + .data = .{ .x = .{ + .payload = switch (m) { + .sib => try self.addExtra(Mir.MemorySib.encode(m)), + .rip => try self.addExtra(Mir.MemoryRip.encode(m)), + else => unreachable, + }, } }, }); } @@ -1266,7 +1346,7 @@ fn asmRegisterMemory(self: *Self, tag: Mir.Inst.Tag, reg: Register, m: Memory) ! else => unreachable, }, .data = .{ .rx = .{ - .r = reg, + .r1 = reg, .payload = switch (m) { .sib => try self.addExtra(Mir.MemorySib.encode(m)), .rip => try self.addExtra(Mir.MemoryRip.encode(m)), @@ -1291,7 +1371,7 @@ fn asmRegisterMemoryImmediate( else => unreachable, }, .data = .{ .rix = .{ - .r = reg, + .r1 = reg, .i = @intCast(u8, imm.unsigned), .payload = switch (m) { .sib => try self.addExtra(Mir.MemorySib.encode(m)), @@ -1339,7 +1419,7 @@ fn asmMemoryRegister(self: *Self, tag: Mir.Inst.Tag, m: Memory, reg: Register) ! else => unreachable, }, .data = .{ .rx = .{ - .r = reg, + .r1 = reg, .payload = switch (m) { .sib => try self.addExtra(Mir.MemorySib.encode(m)), .rip => try self.addExtra(Mir.MemoryRip.encode(m)), @@ -1413,11 +1493,15 @@ fn asmMemoryRegisterImmediate( .rip => .mri_rip, else => unreachable, }, - .data = .{ .rix = .{ .r = reg, .i = @intCast(u8, imm.unsigned), .payload = switch (m) { - .sib => try self.addExtra(Mir.MemorySib.encode(m)), - .rip => try self.addExtra(Mir.MemoryRip.encode(m)), - else => unreachable, - } } }, + .data = .{ .rix = .{ + .r1 = reg, + .i = @intCast(u8, imm.unsigned), + .payload = switch (m) { + .sib => try self.addExtra(Mir.MemorySib.encode(m)), + .rip => try self.addExtra(Mir.MemoryRip.encode(m)), + else => unreachable, + }, + } }, }); } @@ -1450,7 +1534,7 @@ fn gen(self: *Self) InnerError!void { else => unreachable, } - try self.asmOpOnly(.dbg_prologue_end); + try self.asmPseudo(.pseudo_dbg_prologue_end_none); try self.genBody(self.air.getMainBody()); @@ -1462,11 +1546,11 @@ fn gen(self: *Self) InnerError!void { // } // Eliding the reloc will cause a miscompilation in this case. for (self.exitlude_jump_relocs.items) |jmp_reloc| { - self.mir_instructions.items(.data)[jmp_reloc].inst = + self.mir_instructions.items(.data)[jmp_reloc].inst.inst = @intCast(u32, self.mir_instructions.len); } - try self.asmOpOnly(.dbg_epilogue_begin); + try self.asmPseudo(.pseudo_dbg_epilogue_begin_none); const backpatch_stack_dealloc = try self.asmPlaceholder(); const backpatch_pop_callee_preserved_regs = try self.asmPlaceholder(); try self.asmRegister(.pop, .rbp); @@ -1480,46 +1564,54 @@ fn gen(self: *Self) InnerError!void { self.mir_instructions.set(backpatch_frame_align, .{ .tag = .@"and", .ops = .ri_s, - .data = .{ .ri = .{ .r = .rsp, .i = frame_layout.stack_mask } }, + .data = .{ .ri = .{ + .r1 = .rsp, + .i = frame_layout.stack_mask, + } }, }); } if (need_stack_adjust) { self.mir_instructions.set(backpatch_stack_alloc, .{ .tag = .sub, .ops = .ri_s, - .data = .{ .ri = .{ .r = .rsp, .i = frame_layout.stack_adjust } }, + .data = .{ .ri = .{ + .r1 = .rsp, + .i = frame_layout.stack_adjust, + } }, }); } if (need_frame_align or need_stack_adjust) { self.mir_instructions.set(backpatch_stack_dealloc, .{ .tag = .mov, .ops = .rr, - .data = .{ .rr = .{ .r1 = .rsp, .r2 = .rbp } }, + .data = .{ .rr = .{ + .r1 = .rsp, + .r2 = .rbp, + } }, }); } if (need_save_reg) { - const save_reg_list = frame_layout.save_reg_list.asInt(); self.mir_instructions.set(backpatch_push_callee_preserved_regs, .{ - .tag = .push_regs, - .ops = undefined, - .data = .{ .payload = save_reg_list }, + .tag = .pseudo, + .ops = .pseudo_push_reg_list, + .data = .{ .reg_list = frame_layout.save_reg_list }, }); self.mir_instructions.set(backpatch_pop_callee_preserved_regs, .{ - .tag = .pop_regs, - .ops = undefined, - .data = .{ .payload = save_reg_list }, + .tag = .pseudo, + .ops = .pseudo_pop_reg_list, + .data = .{ .reg_list = frame_layout.save_reg_list }, }); } } else { - try self.asmOpOnly(.dbg_prologue_end); + try self.asmPseudo(.pseudo_dbg_prologue_end_none); try self.genBody(self.air.getMainBody()); - try self.asmOpOnly(.dbg_epilogue_begin); + try self.asmPseudo(.pseudo_dbg_epilogue_begin_none); } // Drop them off at the rbrace. _ = try self.addInst(.{ - .tag = .dbg_line, - .ops = undefined, + .tag = .pseudo, + .ops = .pseudo_dbg_line_line_column, .data = .{ .line_column = .{ .line = self.end_di_line, .column = self.end_di_column, @@ -2446,11 +2538,11 @@ fn airIntCast(self: *Self, inst: Air.Inst.Index) !void { .register => |dst_reg| { const min_abi_size = @min(dst_abi_size, src_abi_size); const tag: Mir.Inst.Tag = switch (signedness) { - .signed => .movsx, - .unsigned => if (min_abi_size > 2) .mov else .movzx, + .signed => if (min_abi_size >= 4) .movsxd else .movsx, + .unsigned => if (min_abi_size >= 4) .mov else .movzx, }; const dst_alias = switch (tag) { - .movsx => dst_reg.to64(), + .movsx, .movsxd => dst_reg.to64(), .mov, .movzx => if (min_abi_size > 4) dst_reg.to64() else dst_reg.to32(), else => unreachable, }; @@ -5247,7 +5339,7 @@ fn airStructFieldVal(self: *Self, inst: Air.Inst.Index) !void { const field_byte_size = @intCast(u32, field_ty.abiSize(self.target.*)); if (signedness == .signed and field_byte_size < 8) { try self.asmRegisterRegister( - .movsx, + if (field_byte_size >= 4) .movsxd else .movsx, dst_mcv.register, registerAlias(dst_mcv.register, field_byte_size), ); @@ -7194,10 +7286,10 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallModifier const atom_index = try self.owner.getSymbolIndex(self); const sym_index = try coff_file.getGlobalSymbol(decl_name, lib_name); _ = try self.addInst(.{ - .tag = .mov_linker, + .tag = .mov, .ops = .import_reloc, .data = .{ .rx = .{ - .r = .rax, + .r1 = .rax, .payload = try self.addExtra(Mir.Reloc{ .atom_index = atom_index, .sym_index = sym_index, @@ -7209,9 +7301,9 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallModifier const atom_index = try self.owner.getSymbolIndex(self); const sym_index = try macho_file.getGlobalSymbol(decl_name, lib_name); _ = try self.addInst(.{ - .tag = .call_extern, - .ops = undefined, - .data = .{ .relocation = .{ + .tag = .call, + .ops = .extern_fn_reloc, + .data = .{ .reloc = .{ .atom_index = atom_index, .sym_index = sym_index, } }, @@ -7489,8 +7581,8 @@ fn genTry( fn airDbgStmt(self: *Self, inst: Air.Inst.Index) !void { const dbg_stmt = self.air.instructions.items(.data)[inst].dbg_stmt; _ = try self.addInst(.{ - .tag = .dbg_line, - .ops = undefined, + .tag = .pseudo, + .ops = .pseudo_dbg_line_line_column, .data = .{ .line_column = .{ .line = dbg_stmt.line, .column = dbg_stmt.column, @@ -8021,14 +8113,14 @@ fn airSwitchBr(self: *Self, inst: Air.Inst.Index) !void { fn performReloc(self: *Self, reloc: Mir.Inst.Index) !void { const next_inst = @intCast(u32, self.mir_instructions.len); switch (self.mir_instructions.items(.tag)[reloc]) { - .jcc => { - self.mir_instructions.items(.data)[reloc].inst_cc.inst = next_inst; - }, - .jmp_reloc => { - self.mir_instructions.items(.data)[reloc].inst = next_inst; + .j, .jmp => {}, + .pseudo => switch (self.mir_instructions.items(.ops)[reloc]) { + .pseudo_j_z_and_np_inst, .pseudo_j_nz_or_p_inst => {}, + else => unreachable, }, else => unreachable, } + self.mir_instructions.items(.data)[reloc].inst.inst = next_inst; } fn airBr(self: *Self, inst: Air.Inst.Index) !void { @@ -8577,10 +8669,10 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr .load_direct => |sym_index| if (!ty.isRuntimeFloat()) { const atom_index = try self.owner.getSymbolIndex(self); _ = try self.addInst(.{ - .tag = .mov_linker, + .tag = .mov, .ops = .direct_reloc, .data = .{ .rx = .{ - .r = dst_reg.to64(), + .r1 = dst_reg.to64(), .payload = try self.addExtra(Mir.Reloc{ .atom_index = atom_index, .sym_index = sym_index, @@ -8618,8 +8710,8 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr const atom_index = try self.owner.getSymbolIndex(self); _ = try self.addInst(.{ .tag = switch (src_mcv) { - .lea_direct => .lea_linker, - .lea_got => .mov_linker, + .lea_direct => .lea, + .lea_got => .mov, else => unreachable, }, .ops = switch (src_mcv) { @@ -8628,7 +8720,7 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr else => unreachable, }, .data = .{ .rx = .{ - .r = dst_reg.to64(), + .r1 = dst_reg.to64(), .payload = try self.addExtra(Mir.Reloc{ .atom_index = atom_index, .sym_index = sym_index, @@ -8640,10 +8732,10 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr const atom_index = try self.owner.getSymbolIndex(self); if (self.bin_file.cast(link.File.MachO)) |_| { _ = try self.addInst(.{ - .tag = .lea_linker, + .tag = .lea, .ops = .tlv_reloc, .data = .{ .rx = .{ - .r = .rdi, + .r1 = .rdi, .payload = try self.addExtra(Mir.Reloc{ .atom_index = atom_index, .sym_index = sym_index, @@ -8847,9 +8939,9 @@ fn genInlineMemcpy(self: *Self, dst_ptr: MCValue, src_ptr: MCValue, len: MCValue try self.genSetReg(.rsi, Type.usize, src_ptr); try self.genSetReg(.rcx, Type.usize, len); _ = try self.addInst(.{ - .tag = .movs, - .ops = .string, - .data = .{ .string = .{ .repeat = .rep, .width = .b } }, + .tag = .mov, + .ops = .none, + .data = .{ .none = .{ .fixes = .@"rep _sb" } }, }); } @@ -8859,9 +8951,9 @@ fn genInlineMemset(self: *Self, dst_ptr: MCValue, value: MCValue, len: MCValue) try self.genSetReg(.al, Type.u8, value); try self.genSetReg(.rcx, Type.usize, len); _ = try self.addInst(.{ - .tag = .stos, - .ops = .string, - .data = .{ .string = .{ .repeat = .rep, .width = .b } }, + .tag = .sto, + .ops = .none, + .data = .{ .none = .{ .fixes = .@"rep _sb" } }, }); } @@ -9135,22 +9227,22 @@ fn airCmpxchg(self: *Self, inst: Air.Inst.Index) !void { defer if (ptr_lock) |lock| self.register_manager.unlockReg(lock); try self.spillEflagsIfOccupied(); - if (val_abi_size <= 8) { - _ = try self.addInst(.{ - .tag = .cmpxchg, - .ops = .lock_mr_sib, - .data = .{ .rx = .{ - .r = registerAlias(new_reg.?, val_abi_size), - .payload = try self.addExtra(Mir.MemorySib.encode(ptr_mem)), - } }, - }); - } else { - _ = try self.addInst(.{ - .tag = .cmpxchgb, - .ops = .lock_m_sib, - .data = .{ .payload = try self.addExtra(Mir.MemorySib.encode(ptr_mem)) }, - }); - } + _ = try self.addInst(if (val_abi_size <= 8) .{ + .tag = .cmpxchg, + .ops = .mr_sib, + .data = .{ .rx = .{ + .fixes = .@"lock _", + .r1 = registerAlias(new_reg.?, val_abi_size), + .payload = try self.addExtra(Mir.MemorySib.encode(ptr_mem)), + } }, + } else .{ + .tag = .cmpxchg, + .ops = .m_sib, + .data = .{ .x = .{ + .fixes = .@"lock _16b", + .payload = try self.addExtra(Mir.MemorySib.encode(ptr_mem)), + } }, + }); const result: MCValue = result: { if (self.liveness.isUnused(inst)) break :result .unreach; @@ -9252,13 +9344,14 @@ fn atomicOp( } _ = try self.addInst(.{ .tag = tag, - .ops = switch (tag) { - .mov, .xchg => .mr_sib, - .xadd, .add, .sub, .@"and", .@"or", .xor => .lock_mr_sib, - else => unreachable, - }, + .ops = .mr_sib, .data = .{ .rx = .{ - .r = registerAlias(dst_reg, val_abi_size), + .fixes = switch (tag) { + .mov, .xchg => ._, + .xadd, .add, .sub, .@"and", .@"or", .xor => .@"lock _", + else => unreachable, + }, + .r1 = registerAlias(dst_reg, val_abi_size), .payload = try self.addExtra(Mir.MemorySib.encode(ptr_mem)), } }, }); @@ -9330,9 +9423,10 @@ fn atomicOp( }; _ = try self.addInst(.{ .tag = .cmpxchg, - .ops = .lock_mr_sib, + .ops = .mr_sib, .data = .{ .rx = .{ - .r = registerAlias(tmp_reg, val_abi_size), + .fixes = .@"lock _", + .r1 = registerAlias(tmp_reg, val_abi_size), .payload = try self.addExtra(Mir.MemorySib.encode(ptr_mem)), } }, }); @@ -9397,9 +9491,14 @@ fn atomicOp( val_ty.fmt(self.bin_file.options.module.?), @tagName(op), }), }; - _ = try self.addInst(.{ .tag = .cmpxchgb, .ops = .lock_m_sib, .data = .{ - .payload = try self.addExtra(Mir.MemorySib.encode(ptr_mem)), - } }); + _ = try self.addInst(.{ + .tag = .cmpxchg, + .ops = .m_sib, + .data = .{ .x = .{ + .fixes = .@"lock _16b", + .payload = try self.addExtra(Mir.MemorySib.encode(ptr_mem)), + } }, + }); _ = try self.asmJccReloc(loop, .ne); if (unused) return .unreach; diff --git a/src/arch/x86_64/Emit.zig b/src/arch/x86_64/Emit.zig index 3574d52878..506092ff17 100644 --- a/src/arch/x86_64/Emit.zig +++ b/src/arch/x86_64/Emit.zig @@ -41,7 +41,7 @@ pub fn emitMir(emit: *Emit) Error!void { .offset = end_offset - 4, .length = @intCast(u5, end_offset - start_offset), }), - .@"extern" => |symbol| if (emit.bin_file.cast(link.File.MachO)) |macho_file| { + .linker_extern_fn => |symbol| if (emit.bin_file.cast(link.File.MachO)) |macho_file| { // Add relocation to the decl. const atom_index = macho_file.getAtomIndexForSymbol( .{ .sym_index = symbol.atom_index, .file = null }, @@ -129,36 +129,39 @@ pub fn emitMir(emit: *Emit) Error!void { const mir_inst = emit.lower.mir.instructions.get(mir_index); switch (mir_inst.tag) { else => unreachable, - .dead => {}, - .dbg_line => try emit.dbgAdvancePCAndLine( - mir_inst.data.line_column.line, - mir_inst.data.line_column.column, - ), - .dbg_prologue_end => { - switch (emit.debug_output) { - .dwarf => |dw| { - try dw.setPrologueEnd(); - log.debug("mirDbgPrologueEnd (line={d}, col={d})", .{ - emit.prev_di_line, emit.prev_di_column, - }); - try emit.dbgAdvancePCAndLine(emit.prev_di_line, emit.prev_di_column); - }, - .plan9 => {}, - .none => {}, - } - }, - .dbg_epilogue_begin => { - switch (emit.debug_output) { - .dwarf => |dw| { - try dw.setEpilogueBegin(); - log.debug("mirDbgEpilogueBegin (line={d}, col={d})", .{ - emit.prev_di_line, emit.prev_di_column, - }); - try emit.dbgAdvancePCAndLine(emit.prev_di_line, emit.prev_di_column); - }, - .plan9 => {}, - .none => {}, - } + .pseudo => switch (mir_inst.ops) { + else => unreachable, + .pseudo_dbg_prologue_end_none => { + switch (emit.debug_output) { + .dwarf => |dw| { + try dw.setPrologueEnd(); + log.debug("mirDbgPrologueEnd (line={d}, col={d})", .{ + emit.prev_di_line, emit.prev_di_column, + }); + try emit.dbgAdvancePCAndLine(emit.prev_di_line, emit.prev_di_column); + }, + .plan9 => {}, + .none => {}, + } + }, + .pseudo_dbg_line_line_column => try emit.dbgAdvancePCAndLine( + mir_inst.data.line_column.line, + mir_inst.data.line_column.column, + ), + .pseudo_dbg_epilogue_begin_none => { + switch (emit.debug_output) { + .dwarf => |dw| { + try dw.setEpilogueBegin(); + log.debug("mirDbgEpilogueBegin (line={d}, col={d})", .{ + emit.prev_di_line, emit.prev_di_column, + }); + try emit.dbgAdvancePCAndLine(emit.prev_di_line, emit.prev_di_column); + }, + .plan9 => {}, + .none => {}, + } + }, + .pseudo_dead_none => {}, }, } } diff --git a/src/arch/x86_64/Encoding.zig b/src/arch/x86_64/Encoding.zig index 3235b29358..b6b49e8939 100644 --- a/src/arch/x86_64/Encoding.zig +++ b/src/arch/x86_64/Encoding.zig @@ -705,7 +705,7 @@ fn estimateInstructionLength(prefix: Prefix, encoding: Encoding, ops: []const Op } const mnemonic_to_encodings_map = init: { - @setEvalBranchQuota(100_000); + @setEvalBranchQuota(20_000); const encodings = @import("encodings.zig"); var entries = encodings.table; std.sort.sort(encodings.Entry, &entries, {}, struct { diff --git a/src/arch/x86_64/Lower.zig b/src/arch/x86_64/Lower.zig index 5c079f4768..2d7fa4b4fd 100644 --- a/src/arch/x86_64/Lower.zig +++ b/src/arch/x86_64/Lower.zig @@ -35,7 +35,7 @@ pub const Reloc = struct { const Target = union(enum) { inst: Mir.Inst.Index, - @"extern": Mir.Reloc, + linker_extern_fn: Mir.Reloc, linker_got: Mir.Reloc, linker_direct: Mir.Reloc, linker_import: Mir.Reloc, @@ -59,280 +59,119 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct { const inst = lower.mir.instructions.get(index); switch (inst.tag) { - .adc, - .add, - .@"and", - .bsf, - .bsr, - .bswap, - .bt, - .btc, - .btr, - .bts, - .call, - .cbw, - .cwde, - .cdqe, - .cwd, - .cdq, - .cqo, - .cmp, - .cmpxchg, - .div, - .fisttp, - .fld, - .idiv, - .imul, - .int3, - .jmp, - .lea, - .lfence, - .lzcnt, - .mfence, - .mov, - .movbe, - .movd, - .movq, - .movzx, - .mul, - .neg, - .nop, - .not, - .@"or", - .pop, - .popcnt, - .push, - .rcl, - .rcr, - .ret, - .rol, - .ror, - .sal, - .sar, - .sbb, - .sfence, - .shl, - .shld, - .shr, - .shrd, - .sub, - .syscall, - .@"test", - .tzcnt, - .ud2, - .xadd, - .xchg, - .xor, + else => try lower.generic(inst), + .pseudo => switch (inst.ops) { + .pseudo_cmov_z_and_np_rr => { + try lower.emit(.none, .cmovnz, &.{ + .{ .reg = inst.data.rr.r2 }, + .{ .reg = inst.data.rr.r1 }, + }); + try lower.emit(.none, .cmovnp, &.{ + .{ .reg = inst.data.rr.r1 }, + .{ .reg = inst.data.rr.r2 }, + }); + }, + .pseudo_cmov_nz_or_p_rr => { + try lower.emit(.none, .cmovnz, &.{ + .{ .reg = inst.data.rr.r1 }, + .{ .reg = inst.data.rr.r2 }, + }); + try lower.emit(.none, .cmovp, &.{ + .{ .reg = inst.data.rr.r1 }, + .{ .reg = inst.data.rr.r2 }, + }); + }, + .pseudo_cmov_nz_or_p_rm_sib, + .pseudo_cmov_nz_or_p_rm_rip, + => { + try lower.emit(.none, .cmovnz, &.{ + .{ .reg = inst.data.rx.r1 }, + .{ .mem = lower.mem(inst.ops, inst.data.rx.payload) }, + }); + try lower.emit(.none, .cmovp, &.{ + .{ .reg = inst.data.rx.r1 }, + .{ .mem = lower.mem(inst.ops, inst.data.rx.payload) }, + }); + }, + .pseudo_set_z_and_np_r => { + try lower.emit(.none, .setz, &.{ + .{ .reg = inst.data.r_scratch.r1 }, + }); + try lower.emit(.none, .setnp, &.{ + .{ .reg = inst.data.r_scratch.scratch_reg }, + }); + try lower.emit(.none, .@"and", &.{ + .{ .reg = inst.data.r_scratch.r1 }, + .{ .reg = inst.data.r_scratch.scratch_reg }, + }); + }, + .pseudo_set_z_and_np_m_sib, + .pseudo_set_z_and_np_m_rip, + => { + try lower.emit(.none, .setz, &.{ + .{ .mem = lower.mem(inst.ops, inst.data.x_scratch.payload) }, + }); + try lower.emit(.none, .setnp, &.{ + .{ .reg = inst.data.x_scratch.scratch_reg }, + }); + try lower.emit(.none, .@"and", &.{ + .{ .mem = lower.mem(inst.ops, inst.data.x_scratch.payload) }, + .{ .reg = inst.data.x_scratch.scratch_reg }, + }); + }, + .pseudo_set_nz_or_p_r => { + try lower.emit(.none, .setnz, &.{ + .{ .reg = inst.data.r_scratch.r1 }, + }); + try lower.emit(.none, .setp, &.{ + .{ .reg = inst.data.r_scratch.scratch_reg }, + }); + try lower.emit(.none, .@"or", &.{ + .{ .reg = inst.data.r_scratch.r1 }, + .{ .reg = inst.data.r_scratch.scratch_reg }, + }); + }, + .pseudo_set_nz_or_p_m_sib, + .pseudo_set_nz_or_p_m_rip, + => { + try lower.emit(.none, .setnz, &.{ + .{ .mem = lower.mem(inst.ops, inst.data.x_scratch.payload) }, + }); + try lower.emit(.none, .setp, &.{ + .{ .reg = inst.data.x_scratch.scratch_reg }, + }); + try lower.emit(.none, .@"or", &.{ + .{ .mem = lower.mem(inst.ops, inst.data.x_scratch.payload) }, + .{ .reg = inst.data.x_scratch.scratch_reg }, + }); + }, + .pseudo_j_z_and_np_inst => { + try lower.emit(.none, .jnz, &.{ + .{ .imm = lower.reloc(.{ .inst = index + 1 }) }, + }); + try lower.emit(.none, .jnp, &.{ + .{ .imm = lower.reloc(.{ .inst = inst.data.inst.inst }) }, + }); + }, + .pseudo_j_nz_or_p_inst => { + try lower.emit(.none, .jnz, &.{ + .{ .imm = lower.reloc(.{ .inst = inst.data.inst.inst }) }, + }); + try lower.emit(.none, .jp, &.{ + .{ .imm = lower.reloc(.{ .inst = inst.data.inst.inst }) }, + }); + }, - .addps, - .addss, - .andnps, - .andps, - .cmpss, - .cvtsi2ss, - .divps, - .divss, - .maxps, - .maxss, - .minps, - .minss, - .movaps, - .movhlps, - .movss, - .movups, - .mulps, - .mulss, - .orps, - .pextrw, - .pinsrw, - .sqrtps, - .sqrtss, - .subps, - .subss, - .ucomiss, - .unpckhps, - .unpcklps, - .xorps, + .pseudo_push_reg_list => try lower.pushPopRegList(.push, inst), + .pseudo_pop_reg_list => try lower.pushPopRegList(.pop, inst), - .addpd, - .addsd, - .andnpd, - .andpd, - .cmpsd, - .cvtsd2ss, - .cvtsi2sd, - .cvtss2sd, - .divpd, - .divsd, - .maxpd, - .maxsd, - .minpd, - .minsd, - .movsd, - .mulpd, - .mulsd, - .orpd, - .pshufhw, - .pshuflw, - .psrld, - .psrlq, - .psrlw, - .punpckhbw, - .punpckhdq, - .punpckhqdq, - .punpckhwd, - .punpcklbw, - .punpckldq, - .punpcklqdq, - .punpcklwd, - .sqrtpd, - .sqrtsd, - .subpd, - .subsd, - .ucomisd, - .unpckhpd, - .unpcklpd, - .xorpd, - - .movddup, - .movshdup, - .movsldup, - - .pextrb, - .pextrd, - .pextrq, - .pinsrb, - .pinsrd, - .pinsrq, - .roundpd, - .roundps, - .roundsd, - .roundss, - - .vaddpd, - .vaddps, - .vaddsd, - .vaddss, - .vcvtsd2ss, - .vcvtsi2sd, - .vcvtsi2ss, - .vcvtss2sd, - .vdivpd, - .vdivps, - .vdivsd, - .vdivss, - .vmaxpd, - .vmaxps, - .vmaxsd, - .vmaxss, - .vminpd, - .vminps, - .vminsd, - .vminss, - .vmovapd, - .vmovaps, - .vmovddup, - .vmovhlps, - .vmovsd, - .vmovshdup, - .vmovsldup, - .vmovss, - .vmovupd, - .vmovups, - .vmulpd, - .vmulps, - .vmulsd, - .vmulss, - .vpextrb, - .vpextrd, - .vpextrq, - .vpextrw, - .vpinsrb, - .vpinsrd, - .vpinsrq, - .vpinsrw, - .vpshufhw, - .vpshuflw, - .vpsrld, - .vpsrlq, - .vpsrlw, - .vpunpckhbw, - .vpunpckhdq, - .vpunpckhqdq, - .vpunpckhwd, - .vpunpcklbw, - .vpunpckldq, - .vpunpcklqdq, - .vpunpcklwd, - .vroundpd, - .vroundps, - .vroundsd, - .vroundss, - .vsqrtpd, - .vsqrtps, - .vsqrtsd, - .vsqrtss, - .vsubpd, - .vsubps, - .vsubsd, - .vsubss, - .vunpckhpd, - .vunpckhps, - .vunpcklpd, - .vunpcklps, - - .vcvtph2ps, - .vcvtps2ph, - - .vfmadd132pd, - .vfmadd213pd, - .vfmadd231pd, - .vfmadd132ps, - .vfmadd213ps, - .vfmadd231ps, - .vfmadd132sd, - .vfmadd213sd, - .vfmadd231sd, - .vfmadd132ss, - .vfmadd213ss, - .vfmadd231ss, - => try lower.mirGeneric(inst), - - .cmps, - .lods, - .movs, - .scas, - .stos, - => try lower.mirString(inst), - - .cmpxchgb => try lower.mirCmpxchgBytes(inst), - - .jmp_reloc => try lower.emitInstWithReloc(.none, .jmp, &.{ - .{ .imm = Immediate.s(0) }, - }, .{ .inst = inst.data.inst }), - - .call_extern => try lower.emitInstWithReloc(.none, .call, &.{ - .{ .imm = Immediate.s(0) }, - }, .{ .@"extern" = inst.data.relocation }), - - .lea_linker => try lower.mirLinker(.lea, inst), - .mov_linker => try lower.mirLinker(.mov, inst), - - .mov_moffs => try lower.mirMovMoffs(inst), - - .movsx => try lower.mirMovsx(inst), - .cmovcc => try lower.mirCmovcc(inst), - .setcc => try lower.mirSetcc(inst), - .jcc => try lower.mirJcc(index, inst), - - .push_regs => try lower.mirRegisterList(.push, inst), - .pop_regs => try lower.mirRegisterList(.pop, inst), - - .dbg_line, - .dbg_prologue_end, - .dbg_epilogue_begin, - .dead, - => {}, + .pseudo_dbg_prologue_end_none, + .pseudo_dbg_line_line_column, + .pseudo_dbg_epilogue_begin_none, + .pseudo_dead_none, + => {}, + else => unreachable, + }, } return .{ @@ -348,15 +187,6 @@ pub fn fail(lower: *Lower, comptime format: []const u8, args: anytype) Error { return error.LowerFail; } -fn mnem_cc(comptime base: @Type(.EnumLiteral), cc: bits.Condition) Mnemonic { - return switch (cc) { - inline else => |c| if (@hasField(Mnemonic, @tagName(base) ++ @tagName(c))) - @field(Mnemonic, @tagName(base) ++ @tagName(c)) - else - unreachable, - }; -} - fn imm(lower: Lower, ops: Mir.Inst.Ops, i: u32) Immediate { return switch (ops) { .rri_s, @@ -364,8 +194,6 @@ fn imm(lower: Lower, ops: Mir.Inst.Ops, i: u32) Immediate { .i_s, .mi_sib_s, .mi_rip_s, - .lock_mi_sib_s, - .lock_mi_rip_s, => Immediate.s(@bitCast(i32, i)), .rrri, @@ -374,8 +202,6 @@ fn imm(lower: Lower, ops: Mir.Inst.Ops, i: u32) Immediate { .i_u, .mi_sib_u, .mi_rip_u, - .lock_mi_sib_u, - .lock_mi_rip_u, .rmi_sib, .rmi_rip, .mri_sib, @@ -395,10 +221,8 @@ fn imm(lower: Lower, ops: Mir.Inst.Ops, i: u32) Immediate { fn mem(lower: Lower, ops: Mir.Inst.Ops, payload: u32) Memory { return lower.mir.resolveFrameLoc(switch (ops) { .rm_sib, - .rm_sib_cc, .rmi_sib, .m_sib, - .m_sib_cc, .mi_sib_u, .mi_sib_s, .mr_sib, @@ -406,17 +230,15 @@ fn mem(lower: Lower, ops: Mir.Inst.Ops, payload: u32) Memory { .mri_sib, .rrm_sib, .rrmi_sib, - .lock_m_sib, - .lock_mi_sib_u, - .lock_mi_sib_s, - .lock_mr_sib, + + .pseudo_cmov_nz_or_p_rm_sib, + .pseudo_set_z_and_np_m_sib, + .pseudo_set_nz_or_p_m_sib, => lower.mir.extraData(Mir.MemorySib, payload).data.decode(), .rm_rip, - .rm_rip_cc, .rmi_rip, .m_rip, - .m_rip_cc, .mi_rip_u, .mi_rip_s, .mr_rip, @@ -424,66 +246,83 @@ fn mem(lower: Lower, ops: Mir.Inst.Ops, payload: u32) Memory { .mri_rip, .rrm_rip, .rrmi_rip, - .lock_m_rip, - .lock_mi_rip_u, - .lock_mi_rip_s, - .lock_mr_rip, + + .pseudo_cmov_nz_or_p_rm_rip, + .pseudo_set_z_and_np_m_rip, + .pseudo_set_nz_or_p_m_rip, => lower.mir.extraData(Mir.MemoryRip, payload).data.decode(), .rax_moffs, .moffs_rax, - .lock_moffs_rax, => lower.mir.extraData(Mir.MemoryMoffs, payload).data.decode(), else => unreachable, }); } -fn emitInst(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand) Error!void { - lower.result_insts[lower.result_insts_len] = try Instruction.new(prefix, mnemonic, ops); - lower.result_insts_len += 1; -} - -fn emitInstWithReloc( - lower: *Lower, - prefix: Prefix, - mnemonic: Mnemonic, - ops: []const Operand, - target: Reloc.Target, -) Error!void { +fn reloc(lower: *Lower, target: Reloc.Target) Immediate { lower.result_relocs[lower.result_relocs_len] = .{ .lowered_inst_index = lower.result_insts_len, .target = target, }; lower.result_relocs_len += 1; - try lower.emitInst(prefix, mnemonic, ops); + return Immediate.s(0); } -fn mirGeneric(lower: *Lower, inst: Mir.Inst) Error!void { - try lower.emitInst(switch (inst.ops) { - else => .none, - .lock_m_sib, - .lock_m_rip, - .lock_mi_sib_u, - .lock_mi_rip_u, - .lock_mi_sib_s, - .lock_mi_rip_s, - .lock_mr_sib, - .lock_mr_rip, - .lock_moffs_rax, - => .lock, - }, switch (inst.tag) { - inline else => |tag| if (@hasField(Mnemonic, @tagName(tag))) - @field(Mnemonic, @tagName(tag)) +fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand) Error!void { + lower.result_insts[lower.result_insts_len] = try Instruction.new(prefix, mnemonic, ops); + lower.result_insts_len += 1; +} + +fn generic(lower: *Lower, inst: Mir.Inst) Error!void { + const fixes = switch (inst.ops) { + .none => inst.data.none.fixes, + .inst => inst.data.inst.fixes, + .i_s, .i_u => inst.data.i.fixes, + .r => inst.data.r.fixes, + .rr => inst.data.rr.fixes, + .rrr => inst.data.rrr.fixes, + .rrri => inst.data.rrri.fixes, + .rri_s, .rri_u => inst.data.rri.fixes, + .ri_s, .ri_u => inst.data.ri.fixes, + .ri64, .rm_sib, .rm_rip, .mr_sib, .mr_rip => inst.data.rx.fixes, + .mi_sib_u, .mi_rip_u, .mi_sib_s, .mi_rip_s => ._, + .mrr_sib, .mrr_rip, .rrm_sib, .rrm_rip => inst.data.rrx.fixes, + .rmi_sib, .rmi_rip, .mri_sib, .mri_rip => inst.data.rix.fixes, + .rrmi_sib, .rrmi_rip => inst.data.rrix.fixes, + .m_sib, .m_rip, .rax_moffs, .moffs_rax => inst.data.x.fixes, + .extern_fn_reloc, .got_reloc, .direct_reloc, .import_reloc, .tlv_reloc => ._, + else => return lower.fail("TODO lower .{s}", .{@tagName(inst.ops)}), + }; + try lower.emit(switch (fixes) { + inline else => |tag| comptime if (std.mem.indexOfScalar(u8, @tagName(tag), ' ')) |space| + @field(Prefix, @tagName(tag)[0..space]) else - unreachable, + .none, + }, mnemonic: { + comptime var max_len = 0; + inline for (@typeInfo(Mnemonic).Enum.fields) |field| max_len = @max(field.name.len, max_len); + var buf: [max_len]u8 = undefined; + + const fixes_name = @tagName(fixes); + const pattern = fixes_name[if (std.mem.indexOfScalar(u8, fixes_name, ' ')) |i| i + 1 else 0..]; + const wildcard_i = std.mem.indexOfScalar(u8, pattern, '_').?; + const parts = .{ pattern[0..wildcard_i], @tagName(inst.tag), pattern[wildcard_i + 1 ..] }; + const err_msg = "unsupported mnemonic: "; + const mnemonic = std.fmt.bufPrint(&buf, "{s}{s}{s}", parts) catch + return lower.fail(err_msg ++ "'{s}{s}{s}'", parts); + break :mnemonic std.meta.stringToEnum(Mnemonic, mnemonic) orelse + return lower.fail(err_msg ++ "'{s}'", .{mnemonic}); }, switch (inst.ops) { .none => &.{}, + .inst => &.{ + .{ .imm = lower.reloc(.{ .inst = inst.data.inst.inst }) }, + }, .i_s, .i_u => &.{ - .{ .imm = lower.imm(inst.ops, inst.data.i) }, + .{ .imm = lower.imm(inst.ops, inst.data.i.i) }, }, .r => &.{ - .{ .reg = inst.data.r }, + .{ .reg = inst.data.r.r1 }, }, .rr => &.{ .{ .reg = inst.data.rr.r1 }, @@ -501,11 +340,11 @@ fn mirGeneric(lower: *Lower, inst: Mir.Inst) Error!void { .{ .imm = lower.imm(inst.ops, inst.data.rrri.i) }, }, .ri_s, .ri_u => &.{ - .{ .reg = inst.data.ri.r }, + .{ .reg = inst.data.ri.r1 }, .{ .imm = lower.imm(inst.ops, inst.data.ri.i) }, }, .ri64 => &.{ - .{ .reg = inst.data.rx.r }, + .{ .reg = inst.data.rx.r1 }, .{ .imm = lower.imm(inst.ops, inst.data.rx.payload) }, }, .rri_s, .rri_u => &.{ @@ -513,33 +352,25 @@ fn mirGeneric(lower: *Lower, inst: Mir.Inst) Error!void { .{ .reg = inst.data.rri.r2 }, .{ .imm = lower.imm(inst.ops, inst.data.rri.i) }, }, - .m_sib, .lock_m_sib, .m_rip, .lock_m_rip => &.{ - .{ .mem = lower.mem(inst.ops, inst.data.payload) }, + .m_sib, .m_rip => &.{ + .{ .mem = lower.mem(inst.ops, inst.data.x.payload) }, }, - .mi_sib_s, - .lock_mi_sib_s, - .mi_sib_u, - .lock_mi_sib_u, - .mi_rip_u, - .lock_mi_rip_u, - .mi_rip_s, - .lock_mi_rip_s, - => &.{ + .mi_sib_s, .mi_sib_u, .mi_rip_u, .mi_rip_s => &.{ .{ .mem = lower.mem(inst.ops, inst.data.ix.payload) }, .{ .imm = lower.imm(inst.ops, inst.data.ix.i) }, }, .rm_sib, .rm_rip => &.{ - .{ .reg = inst.data.rx.r }, + .{ .reg = inst.data.rx.r1 }, .{ .mem = lower.mem(inst.ops, inst.data.rx.payload) }, }, .rmi_sib, .rmi_rip => &.{ - .{ .reg = inst.data.rix.r }, + .{ .reg = inst.data.rix.r1 }, .{ .mem = lower.mem(inst.ops, inst.data.rix.payload) }, .{ .imm = lower.imm(inst.ops, inst.data.rix.i) }, }, - .mr_sib, .lock_mr_sib, .mr_rip, .lock_mr_rip => &.{ + .mr_sib, .mr_rip => &.{ .{ .mem = lower.mem(inst.ops, inst.data.rx.payload) }, - .{ .reg = inst.data.rx.r }, + .{ .reg = inst.data.rx.r1 }, }, .mrr_sib, .mrr_rip => &.{ .{ .mem = lower.mem(inst.ops, inst.data.rrx.payload) }, @@ -548,7 +379,7 @@ fn mirGeneric(lower: *Lower, inst: Mir.Inst) Error!void { }, .mri_sib, .mri_rip => &.{ .{ .mem = lower.mem(inst.ops, inst.data.rix.payload) }, - .{ .reg = inst.data.rix.r }, + .{ .reg = inst.data.rix.r1 }, .{ .imm = lower.imm(inst.ops, inst.data.rix.i) }, }, .rrm_sib, .rrm_rip => &.{ @@ -562,180 +393,46 @@ fn mirGeneric(lower: *Lower, inst: Mir.Inst) Error!void { .{ .mem = lower.mem(inst.ops, inst.data.rrix.payload) }, .{ .imm = lower.imm(inst.ops, inst.data.rrix.i) }, }, - else => return lower.fail("TODO lower {s} {s}", .{ @tagName(inst.tag), @tagName(inst.ops) }), - }); -} - -fn mirString(lower: *Lower, inst: Mir.Inst) Error!void { - switch (inst.ops) { - .string => try lower.emitInst(switch (inst.data.string.repeat) { - inline else => |repeat| @field(Prefix, @tagName(repeat)), - }, switch (inst.tag) { - inline .cmps, .lods, .movs, .scas, .stos => |tag| switch (inst.data.string.width) { - inline else => |width| @field(Mnemonic, @tagName(tag) ++ @tagName(width)), - }, - else => unreachable, - }, &.{}), - else => return lower.fail("TODO lower {s} {s}", .{ @tagName(inst.tag), @tagName(inst.ops) }), - } -} - -fn mirCmpxchgBytes(lower: *Lower, inst: Mir.Inst) Error!void { - const ops: [1]Operand = switch (inst.ops) { - .m_sib, .lock_m_sib, .m_rip, .lock_m_rip => .{ - .{ .mem = lower.mem(inst.ops, inst.data.payload) }, - }, - else => return lower.fail("TODO lower {s} {s}", .{ @tagName(inst.tag), @tagName(inst.ops) }), - }; - try lower.emitInst(switch (inst.ops) { - .m_sib, .m_rip => .none, - .lock_m_sib, .lock_m_rip => .lock, - else => unreachable, - }, switch (@divExact(ops[0].bitSize(), 8)) { - 8 => .cmpxchg8b, - 16 => .cmpxchg16b, - else => return lower.fail("invalid operand for {s}", .{@tagName(inst.tag)}), - }, &ops); -} - -fn mirMovMoffs(lower: *Lower, inst: Mir.Inst) Error!void { - try lower.emitInst(switch (inst.ops) { - .rax_moffs, .moffs_rax => .none, - .lock_moffs_rax => .lock, - else => return lower.fail("TODO lower {s} {s}", .{ @tagName(inst.tag), @tagName(inst.ops) }), - }, .mov, switch (inst.ops) { .rax_moffs => &.{ .{ .reg = .rax }, - .{ .mem = lower.mem(inst.ops, inst.data.payload) }, + .{ .mem = lower.mem(inst.ops, inst.data.x.payload) }, }, - .moffs_rax, .lock_moffs_rax => &.{ - .{ .mem = lower.mem(inst.ops, inst.data.payload) }, + .moffs_rax => &.{ + .{ .mem = lower.mem(inst.ops, inst.data.x.payload) }, .{ .reg = .rax }, }, - else => unreachable, + .extern_fn_reloc => &.{ + .{ .imm = lower.reloc(.{ .linker_extern_fn = inst.data.reloc }) }, + }, + .got_reloc, .direct_reloc, .import_reloc, .tlv_reloc => ops: { + const reg = inst.data.rx.r1; + const extra = lower.mir.extraData(Mir.Reloc, inst.data.rx.payload).data; + _ = lower.reloc(switch (inst.ops) { + .got_reloc => .{ .linker_got = extra }, + .direct_reloc => .{ .linker_direct = extra }, + .import_reloc => .{ .linker_import = extra }, + .tlv_reloc => .{ .linker_tlv = extra }, + else => unreachable, + }); + break :ops &.{ + .{ .reg = reg }, + .{ .mem = Memory.rip(Memory.PtrSize.fromBitSize(reg.bitSize()), 0) }, + }; + }, + else => return lower.fail("TODO lower {s} {s}", .{ @tagName(inst.tag), @tagName(inst.ops) }), }); } -fn mirMovsx(lower: *Lower, inst: Mir.Inst) Error!void { - const ops: [2]Operand = switch (inst.ops) { - .rr => .{ - .{ .reg = inst.data.rr.r1 }, - .{ .reg = inst.data.rr.r2 }, - }, - .rm_sib, .rm_rip => .{ - .{ .reg = inst.data.rx.r }, - .{ .mem = lower.mem(inst.ops, inst.data.rx.payload) }, - }, - else => return lower.fail("TODO lower {s} {s}", .{ @tagName(inst.tag), @tagName(inst.ops) }), - }; - try lower.emitInst(.none, switch (ops[0].bitSize()) { - 32, 64 => switch (ops[1].bitSize()) { - 32 => .movsxd, - else => .movsx, - }, - else => .movsx, - }, &ops); -} - -fn mirCmovcc(lower: *Lower, inst: Mir.Inst) Error!void { - const data: struct { cc: bits.Condition, ops: [2]Operand } = switch (inst.ops) { - .rr_cc => .{ .cc = inst.data.rr_cc.cc, .ops = .{ - .{ .reg = inst.data.rr_cc.r1 }, - .{ .reg = inst.data.rr_cc.r2 }, - } }, - .rm_sib_cc, .rm_rip_cc => .{ .cc = inst.data.rx_cc.cc, .ops = .{ - .{ .reg = inst.data.rx_cc.r }, - .{ .mem = lower.mem(inst.ops, inst.data.rx_cc.payload) }, - } }, - else => return lower.fail("TODO lower {s} {s}", .{ @tagName(inst.tag), @tagName(inst.ops) }), - }; - switch (data.cc) { - else => |cc| try lower.emitInst(.none, mnem_cc(.cmov, cc), &data.ops), - .z_and_np => { - try lower.emitInst(.none, mnem_cc(.cmov, .nz), &.{ data.ops[1], data.ops[0] }); - try lower.emitInst(.none, mnem_cc(.cmov, .np), &data.ops); - }, - .nz_or_p => { - try lower.emitInst(.none, mnem_cc(.cmov, .nz), &data.ops); - try lower.emitInst(.none, mnem_cc(.cmov, .p), &data.ops); - }, - } -} - -fn mirSetcc(lower: *Lower, inst: Mir.Inst) Error!void { - const data: struct { cc: bits.Condition, ops: [2]Operand } = switch (inst.ops) { - .r_cc => .{ .cc = inst.data.r_cc.cc, .ops = .{ - .{ .reg = inst.data.r_cc.r }, - .{ .reg = inst.data.r_cc.scratch }, - } }, - .m_sib_cc, .m_rip_cc => .{ .cc = inst.data.x_cc.cc, .ops = .{ - .{ .mem = lower.mem(inst.ops, inst.data.x_cc.payload) }, - .{ .reg = inst.data.x_cc.scratch }, - } }, - else => return lower.fail("TODO lower {s} {s}", .{ @tagName(inst.tag), @tagName(inst.ops) }), - }; - switch (data.cc) { - else => |cc| try lower.emitInst(.none, mnem_cc(.set, cc), data.ops[0..1]), - .z_and_np => { - try lower.emitInst(.none, mnem_cc(.set, .z), data.ops[0..1]); - try lower.emitInst(.none, mnem_cc(.set, .np), data.ops[1..2]); - try lower.emitInst(.none, .@"and", data.ops[0..2]); - }, - .nz_or_p => { - try lower.emitInst(.none, mnem_cc(.set, .nz), data.ops[0..1]); - try lower.emitInst(.none, mnem_cc(.set, .p), data.ops[1..2]); - try lower.emitInst(.none, .@"or", data.ops[0..2]); - }, - } -} - -fn mirJcc(lower: *Lower, index: Mir.Inst.Index, inst: Mir.Inst) Error!void { - switch (inst.data.inst_cc.cc) { - else => |cc| try lower.emitInstWithReloc(.none, mnem_cc(.j, cc), &.{ - .{ .imm = Immediate.s(0) }, - }, .{ .inst = inst.data.inst_cc.inst }), - .z_and_np => { - try lower.emitInstWithReloc(.none, mnem_cc(.j, .nz), &.{ - .{ .imm = Immediate.s(0) }, - }, .{ .inst = index + 1 }); - try lower.emitInstWithReloc(.none, mnem_cc(.j, .np), &.{ - .{ .imm = Immediate.s(0) }, - }, .{ .inst = inst.data.inst_cc.inst }); - }, - .nz_or_p => { - try lower.emitInstWithReloc(.none, mnem_cc(.j, .nz), &.{ - .{ .imm = Immediate.s(0) }, - }, .{ .inst = inst.data.inst_cc.inst }); - try lower.emitInstWithReloc(.none, mnem_cc(.j, .p), &.{ - .{ .imm = Immediate.s(0) }, - }, .{ .inst = inst.data.inst_cc.inst }); - }, - } -} - -fn mirRegisterList(lower: *Lower, comptime mnemonic: Mnemonic, inst: Mir.Inst) Error!void { - const reg_list = Mir.RegisterList.fromInt(inst.data.payload); +fn pushPopRegList(lower: *Lower, comptime mnemonic: Mnemonic, inst: Mir.Inst) Error!void { const callee_preserved_regs = abi.getCalleePreservedRegs(lower.target.*); - var it = reg_list.iterator(.{ .direction = switch (mnemonic) { + var it = inst.data.reg_list.iterator(.{ .direction = switch (mnemonic) { .push => .reverse, .pop => .forward, else => unreachable, } }); - while (it.next()) |i| try lower.emitInst(.none, mnemonic, &.{.{ .reg = callee_preserved_regs[i] }}); -} - -fn mirLinker(lower: *Lower, mnemonic: Mnemonic, inst: Mir.Inst) Error!void { - const reloc = lower.mir.extraData(Mir.Reloc, inst.data.rx.payload).data; - try lower.emitInstWithReloc(.none, mnemonic, &.{ - .{ .reg = inst.data.rx.r }, - .{ .mem = Memory.rip(Memory.PtrSize.fromBitSize(inst.data.rx.r.bitSize()), 0) }, - }, switch (inst.ops) { - .got_reloc => .{ .linker_got = reloc }, - .direct_reloc => .{ .linker_direct = reloc }, - .import_reloc => .{ .linker_import = reloc }, - .tlv_reloc => .{ .linker_tlv = reloc }, - else => unreachable, - }); + while (it.next()) |i| try lower.emit(.none, mnemonic, &.{.{ + .reg = callee_preserved_regs[i], + }}); } const abi = @import("abi.zig"); diff --git a/src/arch/x86_64/Mir.zig b/src/arch/x86_64/Mir.zig index 442cfabebb..951a0c5d4d 100644 --- a/src/arch/x86_64/Mir.zig +++ b/src/arch/x86_64/Mir.zig @@ -32,6 +32,210 @@ pub const Inst = struct { pub const Index = u32; + pub const Fixes = enum(u8) { + /// ___ + @"_", + + /// ___ Above + _a, + /// ___ Above Or Equal + _ae, + /// ___ Below + _b, + /// ___ Below Or Equal + _be, + /// ___ Carry + _c, + /// ___ Equal + _e, + /// ___ Greater + _g, + /// ___ Greater Or Equal + _ge, + /// ___ Less + _l, + /// ___ Less Or Equal + _le, + /// ___ Not Above + _na, + /// ___ Not Above Or Equal + _nae, + /// ___ Not Below + _nb, + /// ___ Not Below Or Equal + _nbe, + /// ___ Not Carry + _nc, + /// ___ Not Equal + _ne, + /// ___ Not Greater + _ng, + /// ___ Not Greater Or Equal + _nge, + /// ___ Not Less + _nl, + /// ___ Not Less Or Equal + _nle, + /// ___ Not Overflow + _no, + /// ___ Not Parity + _np, + /// ___ Not Sign + _ns, + /// ___ Not Zero + _nz, + /// ___ Overflow + _o, + /// ___ Parity + _p, + /// ___ Parity Even + _pe, + /// ___ Parity Odd + _po, + /// ___ Sign + _s, + /// ___ Zero + _z, + + /// ___ String + //_s, + /// ___ String Byte + _sb, + /// ___ String Word + _sw, + /// ___ String Doubleword + _sd, + /// ___ String Quadword + _sq, + + /// Repeat ___ String + @"rep _s", + /// Repeat ___ String Byte + @"rep _sb", + /// Repeat ___ String Word + @"rep _sw", + /// Repeat ___ String Doubleword + @"rep _sd", + /// Repeat ___ String Quadword + @"rep _sq", + + /// Repeat Equal ___ String + @"repe _s", + /// Repeat Equal ___ String Byte + @"repe _sb", + /// Repeat Equal ___ String Word + @"repe _sw", + /// Repeat Equal ___ String Doubleword + @"repe _sd", + /// Repeat Equal ___ String Quadword + @"repe _sq", + + /// Repeat Not Equal ___ String + @"repne _s", + /// Repeat Not Equal ___ String Byte + @"repne _sb", + /// Repeat Not Equal ___ String Word + @"repne _sw", + /// Repeat Not Equal ___ String Doubleword + @"repne _sd", + /// Repeat Not Equal ___ String Quadword + @"repne _sq", + + /// Repeat Not Zero ___ String + @"repnz _s", + /// Repeat Not Zero ___ String Byte + @"repnz _sb", + /// Repeat Not Zero ___ String Word + @"repnz _sw", + /// Repeat Not Zero ___ String Doubleword + @"repnz _sd", + /// Repeat Not Zero ___ String Quadword + @"repnz _sq", + + /// Repeat Zero ___ String + @"repz _s", + /// Repeat Zero ___ String Byte + @"repz _sb", + /// Repeat Zero ___ String Word + @"repz _sw", + /// Repeat Zero ___ String Doubleword + @"repz _sd", + /// Repeat Zero ___ String Quadword + @"repz _sq", + + /// Locked ___ + @"lock _", + /// ___ 8 Bytes + _8b, + /// Locked ___ 8 Bytes + @"lock _8b", + /// ___ 16 Bytes + _16b, + /// Locked ___ 16 Bytes + @"lock _16b", + + /// Packed ___ + p_, + /// Packed ___ Byte + p_b, + /// Packed ___ Word + p_w, + /// Packed ___ Doubleword + p_d, + /// Packed ___ Quadword + p_q, + /// Packed ___ Double Quadword + p_dq, + + /// ___ Scalar Single-Precision Values + _ss, + /// ___ Packed Single-Precision Values + _ps, + /// ___ Scalar Double-Precision Values + //_sd, + /// ___ Packed Double-Precision Values + _pd, + + /// VEX-Encoded ___ + v_, + /// VEX-Encoded Packed ___ + vp_, + /// VEX-Encoded Packed ___ Byte + vp_b, + /// VEX-Encoded Packed ___ Word + vp_w, + /// VEX-Encoded Packed ___ Doubleword + vp_d, + /// VEX-Encoded Packed ___ Quadword + vp_q, + /// VEX-Encoded Packed ___ Double Quadword + vp_dq, + /// VEX-Encoded ___ Scalar Single-Precision Values + v_ss, + /// VEX-Encoded ___ Packed Single-Precision Values + v_ps, + /// VEX-Encoded ___ Scalar Double-Precision Values + v_sd, + /// VEX-Encoded ___ Packed Double-Precision Values + v_pd, + + /// Mask ___ Byte + k_b, + /// Mask ___ Word + k_w, + /// Mask ___ Doubleword + k_d, + /// Mask ___ Quadword + k_q, + + pub fn fromCondition(cc: bits.Condition) Fixes { + return switch (cc) { + inline else => |cc_tag| @field(Fixes, "_" ++ @tagName(cc_tag)), + .z_and_np, .nz_or_p => unreachable, + }; + } + }; + pub const Tag = enum(u8) { /// Add with carry adc, @@ -57,22 +261,24 @@ pub const Inst = struct { call, /// Convert byte to word cbw, - /// Convert word to doubleword - cwde, - /// Convert doubleword to quadword - cdqe, - /// Convert word to doubleword - cwd, /// Convert doubleword to quadword cdq, /// Convert doubleword to quadword - cqo, + cdqe, + /// Conditional move + cmov, /// Logical compare + /// Compare string cmp, /// Compare and exchange - cmpxchg, /// Compare and exchange bytes - cmpxchgb, + cmpxchg, + /// Convert doubleword to quadword + cqo, + /// Convert word to doubleword + cwd, + /// Convert word to doubleword + cwde, /// Unsigned division div, /// Store integer with truncation @@ -85,10 +291,14 @@ pub const Inst = struct { imul, /// int3, + /// Conditional jump + j, /// Jump jmp, /// Load effective address lea, + /// Load string + lod, /// Load fence lfence, /// Count the number of leading zero bits @@ -96,6 +306,7 @@ pub const Inst = struct { /// Memory fence mfence, /// Move + /// Move data from string to string mov, /// Move data after swapping bytes movbe, @@ -105,6 +316,8 @@ pub const Inst = struct { movq, /// Move with sign extension movsx, + /// Move with sign extension + movsxd, /// Move with zero extension movzx, /// Multiply @@ -139,6 +352,10 @@ pub const Inst = struct { sar, /// Integer subtraction with borrow sbb, + /// Scan string + sca, + /// Set byte on condition + set, /// Store fence sfence, /// Logical shift left @@ -151,6 +368,8 @@ pub const Inst = struct { shrd, /// Subtract sub, + /// Store string + sto, /// Syscall syscall, /// Test condition @@ -505,57 +724,10 @@ pub const Inst = struct { /// Fused multiply-add of scalar single-precision floating-point values vfmadd231ss, - /// Compare string operands - cmps, - /// Load string - lods, - /// Move data from string to string - movs, - /// Scan string - scas, - /// Store string - stos, - - /// Conditional move - cmovcc, - /// Conditional jump - jcc, - /// Set byte on condition - setcc, - - /// Mov absolute to/from memory wrt segment register to/from rax - mov_moffs, - - /// Jump with relocation to another local MIR instruction - /// Uses `inst` payload. - jmp_reloc, - - /// Call to an extern symbol via linker relocation. - /// Uses `relocation` payload. - call_extern, - - /// Load effective address of a symbol not yet allocated in VM. - lea_linker, - /// Move address of a symbol not yet allocated in VM. - mov_linker, - - /// End of prologue - dbg_prologue_end, - /// Start of epilogue - dbg_epilogue_begin, - /// Update debug line - /// Uses `line_column` payload containing the line and column. - dbg_line, - /// Push registers - /// Uses `payload` payload containing `RegisterList.asInt` directly. - push_regs, - /// Pop registers - /// Uses `payload` payload containing `RegisterList.asInt` directly. - pop_regs, - - /// Tombstone - /// Emitter should skip this instruction. - dead, + /// A pseudo instruction that requires special lowering. + /// This should be the only tag in this enum that doesn't + /// directly correspond to one or more instruction mnemonics. + pseudo, }; pub const Ops = enum(u8) { @@ -579,12 +751,6 @@ pub const Inst = struct { /// Register, register, immediate (unsigned) operands. /// Uses `rri` payload. rri_u, - /// Register with condition code (CC). - /// Uses `r_cc` payload. - r_cc, - /// Register, register with condition code (CC). - /// Uses `rr_cc` payload. - rr_cc, /// Register, immediate (sign-extended) operands. /// Uses `ri` payload. ri_s, @@ -609,12 +775,6 @@ pub const Inst = struct { /// Register, memory (RIP) operands. /// Uses `rx` payload. rm_rip, - /// Register, memory (SIB) operands with condition code (CC). - /// Uses `rx_cc` payload. - rm_sib_cc, - /// Register, memory (RIP) operands with condition code (CC). - /// Uses `rx_cc` payload. - rm_rip_cc, /// Register, memory (SIB), immediate (byte) operands. /// Uses `rix` payload with extra data of type `MemorySib`. rmi_sib, @@ -634,17 +794,11 @@ pub const Inst = struct { /// Uses `rix` payload with extra data of type `MemoryRip`. rmi_rip, /// Single memory (SIB) operand. - /// Uses `payload` with extra data of type `MemorySib`. + /// Uses `x` with extra data of type `MemorySib`. m_sib, /// Single memory (RIP) operand. - /// Uses `payload` with extra data of type `MemoryRip`. + /// Uses `x` with extra data of type `MemoryRip`. m_rip, - /// Single memory (SIB) operand with condition code (CC). - /// Uses `x_cc` with extra data of type `MemorySib`. - m_sib_cc, - /// Single memory (RIP) operand with condition code (CC). - /// Uses `x_cc` with extra data of type `MemoryRip`. - m_rip_cc, /// Memory (SIB), immediate (unsigned) operands. /// Uses `ix` payload with extra data of type `MemorySib`. mi_sib_u, @@ -676,49 +830,17 @@ pub const Inst = struct { /// Uses `rix` payload with extra data of type `MemoryRip`. mri_rip, /// Rax, Memory moffs. - /// Uses `payload` with extra data of type `MemoryMoffs`. + /// Uses `x` with extra data of type `MemoryMoffs`. rax_moffs, /// Memory moffs, rax. - /// Uses `payload` with extra data of type `MemoryMoffs`. + /// Uses `x` with extra data of type `MemoryMoffs`. moffs_rax, - /// Single memory (SIB) operand with lock prefix. - /// Uses `payload` with extra data of type `MemorySib`. - lock_m_sib, - /// Single memory (RIP) operand with lock prefix. - /// Uses `payload` with extra data of type `MemoryRip`. - lock_m_rip, - /// Memory (SIB), immediate (unsigned) operands with lock prefix. - /// Uses `xi` payload with extra data of type `MemorySib`. - lock_mi_sib_u, - /// Memory (RIP), immediate (unsigned) operands with lock prefix. - /// Uses `xi` payload with extra data of type `MemoryRip`. - lock_mi_rip_u, - /// Memory (SIB), immediate (sign-extend) operands with lock prefix. - /// Uses `xi` payload with extra data of type `MemorySib`. - lock_mi_sib_s, - /// Memory (RIP), immediate (sign-extend) operands with lock prefix. - /// Uses `xi` payload with extra data of type `MemoryRip`. - lock_mi_rip_s, - /// Memory (SIB), register operands with lock prefix. - /// Uses `rx` payload with extra data of type `MemorySib`. - lock_mr_sib, - /// Memory (RIP), register operands with lock prefix. - /// Uses `rx` payload with extra data of type `MemoryRip`. - lock_mr_rip, - /// Memory moffs, rax with lock prefix. - /// Uses `payload` with extra data of type `MemoryMoffs`. - lock_moffs_rax, /// References another Mir instruction directly. /// Uses `inst` payload. inst, - /// References another Mir instruction directly with condition code (CC). - /// Uses `inst_cc` payload. - inst_cc, - /// String repeat and width - /// Uses `string` payload. - string, + /// Linker relocation - external function. /// Uses `reloc` payload. - reloc, + extern_fn_reloc, /// Linker relocation - GOT indirection. /// Uses `rx` payload with extra data of type `Reloc`. got_reloc, @@ -731,74 +853,125 @@ pub const Inst = struct { /// Linker relocation - threadlocal variable via GOT indirection. /// Uses `rx` payload with extra data of type `Reloc`. tlv_reloc, + + // Pseudo instructions: + + /// Conditional move if zero flag set and parity flag not set + /// Clobbers the source operand! + /// Uses `rr` payload. + pseudo_cmov_z_and_np_rr, + /// Conditional move if zero flag not set or parity flag set + /// Uses `rr` payload. + pseudo_cmov_nz_or_p_rr, + /// Conditional move if zero flag not set or parity flag set + /// Uses `rx` payload. + pseudo_cmov_nz_or_p_rm_sib, + /// Conditional move if zero flag not set or parity flag set + /// Uses `rx` payload. + pseudo_cmov_nz_or_p_rm_rip, + /// Set byte if zero flag set and parity flag not set + /// Requires a scratch register! + /// Uses `r_scratch` payload. + pseudo_set_z_and_np_r, + /// Set byte if zero flag set and parity flag not set + /// Requires a scratch register! + /// Uses `x_scratch` payload. + pseudo_set_z_and_np_m_sib, + /// Set byte if zero flag set and parity flag not set + /// Requires a scratch register! + /// Uses `x_scratch` payload. + pseudo_set_z_and_np_m_rip, + /// Set byte if zero flag not set or parity flag set + /// Requires a scratch register! + /// Uses `r_scratch` payload. + pseudo_set_nz_or_p_r, + /// Set byte if zero flag not set or parity flag set + /// Requires a scratch register! + /// Uses `x_scratch` payload. + pseudo_set_nz_or_p_m_sib, + /// Set byte if zero flag not set or parity flag set + /// Requires a scratch register! + /// Uses `x_scratch` payload. + pseudo_set_nz_or_p_m_rip, + /// Jump if zero flag set and parity flag not set + /// Uses `inst` payload. + pseudo_j_z_and_np_inst, + /// Jump if zero flag not set or parity flag set + /// Uses `inst` payload. + pseudo_j_nz_or_p_inst, + + /// Push registers + /// Uses `reg_list` payload. + pseudo_push_reg_list, + /// Pop registers + /// Uses `reg_list` payload. + pseudo_pop_reg_list, + + /// End of prologue + pseudo_dbg_prologue_end_none, + /// Update debug line + /// Uses `line_column` payload. + pseudo_dbg_line_line_column, + /// Start of epilogue + pseudo_dbg_epilogue_begin_none, + + /// Tombstone + /// Emitter should skip this instruction. + pseudo_dead_none, }; pub const Data = union { + none: struct { + fixes: Fixes = ._, + }, /// References another Mir instruction. - inst: Index, - /// Another instruction with condition code (CC). - /// Used by `jcc`. - inst_cc: struct { - /// Another instruction. + inst: struct { + fixes: Fixes = ._, inst: Index, - /// A condition code for use with EFLAGS register. - cc: bits.Condition, }, /// A 32-bit immediate value. - i: u32, - r: Register, + i: struct { + fixes: Fixes = ._, + i: u32, + }, + r: struct { + fixes: Fixes = ._, + r1: Register, + }, rr: struct { + fixes: Fixes = ._, r1: Register, r2: Register, }, rrr: struct { + fixes: Fixes = ._, r1: Register, r2: Register, r3: Register, }, rrri: struct { + fixes: Fixes = ._, r1: Register, r2: Register, r3: Register, i: u8, }, rri: struct { + fixes: Fixes = ._, r1: Register, r2: Register, i: u32, }, - /// Condition code (CC), followed by custom payload found in extra. - x_cc: struct { - scratch: Register, - cc: bits.Condition, - payload: u32, - }, - /// Register with condition code (CC). - r_cc: struct { - r: Register, - scratch: Register, - cc: bits.Condition, - }, - /// Register, register with condition code (CC). - rr_cc: struct { - r1: Register, - r2: Register, - cc: bits.Condition, - }, /// Register, immediate. ri: struct { - r: Register, + fixes: Fixes = ._, + r1: Register, i: u32, }, /// Register, followed by custom payload found in extra. rx: struct { - r: Register, - payload: u32, - }, - /// Register with condition code (CC), followed by custom payload found in extra. - rx_cc: struct { - r: Register, - cc: bits.Condition, + fixes: Fixes = ._, + r1: Register, payload: u32, }, /// Immediate, followed by Custom payload found in extra. @@ -808,39 +981,54 @@ pub const Inst = struct { }, /// Register, register, followed by Custom payload found in extra. rrx: struct { + fixes: Fixes = ._, r1: Register, r2: Register, payload: u32, }, /// Register, byte immediate, followed by Custom payload found in extra. rix: struct { - r: Register, + fixes: Fixes = ._, + r1: Register, i: u8, payload: u32, }, /// Register, register, byte immediate, followed by Custom payload found in extra. rrix: struct { + fixes: Fixes = ._, r1: Register, r2: Register, i: u8, payload: u32, }, - /// String instruction prefix and width. - string: struct { - repeat: bits.StringRepeat, - width: bits.StringWidth, + /// Register, scratch register + r_scratch: struct { + fixes: Fixes = ._, + r1: Register, + scratch_reg: Register, + }, + /// Scratch register, followed by Custom payload found in extra. + x_scratch: struct { + fixes: Fixes = ._, + scratch_reg: Register, + payload: u32, + }, + /// Custom payload found in extra. + x: struct { + fixes: Fixes = ._, + payload: u32, }, /// Relocation for the linker where: /// * `atom_index` is the index of the source /// * `sym_index` is the index of the target - relocation: Reloc, + reloc: Reloc, /// Debug line and column position line_column: struct { line: u32, column: u32, }, - /// Index into `extra`. Meaning of what can be found there is context-dependent. - payload: u32, + /// Register list + reg_list: RegisterList, }; // Make sure we don't accidentally make instructions bigger than expected. @@ -852,6 +1040,7 @@ pub const Inst = struct { } }; +/// A linker symbol not yet allocated in VM. pub const Reloc = struct { /// Index of the containing atom. atom_index: u32, @@ -887,16 +1076,6 @@ pub const RegisterList = struct { return self.bitset.iterator(options); } - pub fn asInt(self: Self) u32 { - return self.bitset.mask; - } - - pub fn fromInt(mask: u32) Self { - return .{ - .bitset = BitSet{ .mask = @intCast(BitSet.MaskInt, mask) }, - }; - } - pub fn count(self: Self) u32 { return @intCast(u32, self.bitset.count()); } diff --git a/src/arch/x86_64/bits.zig b/src/arch/x86_64/bits.zig index b73a37d6cb..3343f280b9 100644 --- a/src/arch/x86_64/bits.zig +++ b/src/arch/x86_64/bits.zig @@ -6,9 +6,6 @@ const Allocator = std.mem.Allocator; const ArrayList = std.ArrayList; const DW = std.dwarf; -pub const StringRepeat = enum(u3) { none, rep, repe, repz, repne, repnz }; -pub const StringWidth = enum(u2) { b, w, d, q }; - /// EFLAGS condition codes pub const Condition = enum(u5) { /// above From ecb5feaf94bf49dc4c180f09c170223d6c1898b3 Mon Sep 17 00:00:00 2001 From: Jacob Young Date: Mon, 8 May 2023 06:51:05 -0400 Subject: [PATCH 19/20] x86_64: continue to optimize mir tag usage Migrate mnemonic literals to tuples that represent the compressed storage. 225 tags left in use, many tags left to compress. --- src/arch/x86_64/CodeGen.zig | 1489 +++++++++++++++++++---------------- src/arch/x86_64/Lower.zig | 9 +- src/arch/x86_64/Mir.zig | 100 ++- 3 files changed, 880 insertions(+), 718 deletions(-) diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index 3ac05c95ac..147be62e28 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -1167,11 +1167,13 @@ fn asmPlaceholder(self: *Self) !Mir.Inst.Index { }); } -fn asmOpOnly(self: *Self, tag: Mir.Inst.Tag) !void { +fn asmOpOnly(self: *Self, tag: Mir.Inst.FixedTag) !void { _ = try self.addInst(.{ - .tag = tag, + .tag = tag[1], .ops = .none, - .data = undefined, + .data = .{ .none = .{ + .fixes = tag[0], + } }, }); } @@ -1183,22 +1185,26 @@ fn asmPseudo(self: *Self, ops: Mir.Inst.Ops) !void { }); } -fn asmRegister(self: *Self, tag: Mir.Inst.Tag, reg: Register) !void { +fn asmRegister(self: *Self, tag: Mir.Inst.FixedTag, reg: Register) !void { _ = try self.addInst(.{ - .tag = tag, + .tag = tag[1], .ops = .r, - .data = .{ .r = .{ .r1 = reg } }, + .data = .{ .r = .{ + .fixes = tag[0], + .r1 = reg, + } }, }); } -fn asmImmediate(self: *Self, tag: Mir.Inst.Tag, imm: Immediate) !void { +fn asmImmediate(self: *Self, tag: Mir.Inst.FixedTag, imm: Immediate) !void { _ = try self.addInst(.{ - .tag = tag, + .tag = tag[1], .ops = switch (imm) { .signed => .i_s, .unsigned => .i_u, }, .data = .{ .i = .{ + .fixes = tag[0], .i = switch (imm) { .signed => |s| @bitCast(u32, s), .unsigned => |u| @intCast(u32, u), @@ -1207,24 +1213,29 @@ fn asmImmediate(self: *Self, tag: Mir.Inst.Tag, imm: Immediate) !void { }); } -fn asmRegisterRegister(self: *Self, tag: Mir.Inst.Tag, reg1: Register, reg2: Register) !void { +fn asmRegisterRegister(self: *Self, tag: Mir.Inst.FixedTag, reg1: Register, reg2: Register) !void { _ = try self.addInst(.{ - .tag = tag, + .tag = tag[1], .ops = .rr, - .data = .{ .rr = .{ .r1 = reg1, .r2 = reg2 } }, + .data = .{ .rr = .{ + .fixes = tag[0], + .r1 = reg1, + .r2 = reg2, + } }, }); } -fn asmRegisterImmediate(self: *Self, tag: Mir.Inst.Tag, reg: Register, imm: Immediate) !void { +fn asmRegisterImmediate(self: *Self, tag: Mir.Inst.FixedTag, reg: Register, imm: Immediate) !void { const ops: Mir.Inst.Ops = switch (imm) { .signed => .ri_s, .unsigned => |u| if (math.cast(u32, u)) |_| .ri_u else .ri64, }; _ = try self.addInst(.{ - .tag = tag, + .tag = tag[1], .ops = ops, .data = switch (ops) { .ri_s, .ri_u => .{ .ri = .{ + .fixes = tag[0], .r1 = reg, .i = switch (imm) { .signed => |s| @bitCast(u32, s), @@ -1232,6 +1243,7 @@ fn asmRegisterImmediate(self: *Self, tag: Mir.Inst.Tag, reg: Register, imm: Imme }, } }, .ri64 => .{ .rx = .{ + .fixes = tag[0], .r1 = reg, .payload = try self.addExtra(Mir.Imm64.encode(imm.unsigned)), } }, @@ -1242,47 +1254,59 @@ fn asmRegisterImmediate(self: *Self, tag: Mir.Inst.Tag, reg: Register, imm: Imme fn asmRegisterRegisterRegister( self: *Self, - tag: Mir.Inst.Tag, + tag: Mir.Inst.FixedTag, reg1: Register, reg2: Register, reg3: Register, ) !void { _ = try self.addInst(.{ - .tag = tag, + .tag = tag[1], .ops = .rrr, - .data = .{ .rrr = .{ .r1 = reg1, .r2 = reg2, .r3 = reg3 } }, + .data = .{ .rrr = .{ + .fixes = tag[0], + .r1 = reg1, + .r2 = reg2, + .r3 = reg3, + } }, }); } fn asmRegisterRegisterRegisterImmediate( self: *Self, - tag: Mir.Inst.Tag, + tag: Mir.Inst.FixedTag, reg1: Register, reg2: Register, reg3: Register, imm: Immediate, ) !void { _ = try self.addInst(.{ - .tag = tag, + .tag = tag[1], .ops = .rrri, - .data = .{ .rrri = .{ .r1 = reg1, .r2 = reg2, .r3 = reg3, .i = @intCast(u8, imm.unsigned) } }, + .data = .{ .rrri = .{ + .fixes = tag[0], + .r1 = reg1, + .r2 = reg2, + .r3 = reg3, + .i = @intCast(u8, imm.unsigned), + } }, }); } fn asmRegisterRegisterImmediate( self: *Self, - tag: Mir.Inst.Tag, + tag: Mir.Inst.FixedTag, reg1: Register, reg2: Register, imm: Immediate, ) !void { _ = try self.addInst(.{ - .tag = tag, + .tag = tag[1], .ops = switch (imm) { .signed => .rri_s, .unsigned => .rri_u, }, .data = .{ .rri = .{ + .fixes = tag[0], .r1 = reg1, .r2 = reg2, .i = switch (imm) { @@ -1295,19 +1319,20 @@ fn asmRegisterRegisterImmediate( fn asmRegisterRegisterMemory( self: *Self, - tag: Mir.Inst.Tag, + tag: Mir.Inst.FixedTag, reg1: Register, reg2: Register, m: Memory, ) !void { _ = try self.addInst(.{ - .tag = tag, + .tag = tag[1], .ops = switch (m) { .sib => .rrm_sib, .rip => .rrm_rip, else => unreachable, }, .data = .{ .rrx = .{ + .fixes = tag[0], .r1 = reg1, .r2 = reg2, .payload = switch (m) { @@ -1319,15 +1344,16 @@ fn asmRegisterRegisterMemory( }); } -fn asmMemory(self: *Self, tag: Mir.Inst.Tag, m: Memory) !void { +fn asmMemory(self: *Self, tag: Mir.Inst.FixedTag, m: Memory) !void { _ = try self.addInst(.{ - .tag = tag, + .tag = tag[1], .ops = switch (m) { .sib => .m_sib, .rip => .m_rip, else => unreachable, }, .data = .{ .x = .{ + .fixes = tag[0], .payload = switch (m) { .sib => try self.addExtra(Mir.MemorySib.encode(m)), .rip => try self.addExtra(Mir.MemoryRip.encode(m)), @@ -1337,15 +1363,16 @@ fn asmMemory(self: *Self, tag: Mir.Inst.Tag, m: Memory) !void { }); } -fn asmRegisterMemory(self: *Self, tag: Mir.Inst.Tag, reg: Register, m: Memory) !void { +fn asmRegisterMemory(self: *Self, tag: Mir.Inst.FixedTag, reg: Register, m: Memory) !void { _ = try self.addInst(.{ - .tag = tag, + .tag = tag[1], .ops = switch (m) { .sib => .rm_sib, .rip => .rm_rip, else => unreachable, }, .data = .{ .rx = .{ + .fixes = tag[0], .r1 = reg, .payload = switch (m) { .sib => try self.addExtra(Mir.MemorySib.encode(m)), @@ -1358,19 +1385,20 @@ fn asmRegisterMemory(self: *Self, tag: Mir.Inst.Tag, reg: Register, m: Memory) ! fn asmRegisterMemoryImmediate( self: *Self, - tag: Mir.Inst.Tag, + tag: Mir.Inst.FixedTag, reg: Register, m: Memory, imm: Immediate, ) !void { _ = try self.addInst(.{ - .tag = tag, + .tag = tag[1], .ops = switch (m) { .sib => .rmi_sib, .rip => .rmi_rip, else => unreachable, }, .data = .{ .rix = .{ + .fixes = tag[0], .r1 = reg, .i = @intCast(u8, imm.unsigned), .payload = switch (m) { @@ -1384,20 +1412,21 @@ fn asmRegisterMemoryImmediate( fn asmRegisterRegisterMemoryImmediate( self: *Self, - tag: Mir.Inst.Tag, + tag: Mir.Inst.FixedTag, reg1: Register, reg2: Register, m: Memory, imm: Immediate, ) !void { _ = try self.addInst(.{ - .tag = tag, + .tag = tag[1], .ops = switch (m) { .sib => .rrmi_sib, .rip => .rrmi_rip, else => unreachable, }, .data = .{ .rrix = .{ + .fixes = tag[0], .r1 = reg1, .r2 = reg2, .i = @intCast(u8, imm.unsigned), @@ -1410,15 +1439,16 @@ fn asmRegisterRegisterMemoryImmediate( }); } -fn asmMemoryRegister(self: *Self, tag: Mir.Inst.Tag, m: Memory, reg: Register) !void { +fn asmMemoryRegister(self: *Self, tag: Mir.Inst.FixedTag, m: Memory, reg: Register) !void { _ = try self.addInst(.{ - .tag = tag, + .tag = tag[1], .ops = switch (m) { .sib => .mr_sib, .rip => .mr_rip, else => unreachable, }, .data = .{ .rx = .{ + .fixes = tag[0], .r1 = reg, .payload = switch (m) { .sib => try self.addExtra(Mir.MemorySib.encode(m)), @@ -1429,9 +1459,9 @@ fn asmMemoryRegister(self: *Self, tag: Mir.Inst.Tag, m: Memory, reg: Register) ! }); } -fn asmMemoryImmediate(self: *Self, tag: Mir.Inst.Tag, m: Memory, imm: Immediate) !void { +fn asmMemoryImmediate(self: *Self, tag: Mir.Inst.FixedTag, m: Memory, imm: Immediate) !void { _ = try self.addInst(.{ - .tag = tag, + .tag = tag[1], .ops = switch (m) { .sib => switch (imm) { .signed => .mi_sib_s, @@ -1443,11 +1473,39 @@ fn asmMemoryImmediate(self: *Self, tag: Mir.Inst.Tag, m: Memory, imm: Immediate) }, else => unreachable, }, - .data = .{ .ix = .{ - .i = switch (imm) { + .data = .{ .x = .{ + .fixes = tag[0], + .payload = try self.addExtra(Mir.Imm32{ .imm = switch (imm) { .signed => |s| @bitCast(u32, s), .unsigned => |u| @intCast(u32, u), - }, + } }), + } }, + }); + _ = switch (m) { + .sib => try self.addExtra(Mir.MemorySib.encode(m)), + .rip => try self.addExtra(Mir.MemoryRip.encode(m)), + else => unreachable, + }; +} + +fn asmMemoryRegisterRegister( + self: *Self, + tag: Mir.Inst.FixedTag, + m: Memory, + reg1: Register, + reg2: Register, +) !void { + _ = try self.addInst(.{ + .tag = tag[1], + .ops = switch (m) { + .sib => .mrr_sib, + .rip => .mrr_rip, + else => unreachable, + }, + .data = .{ .rrx = .{ + .fixes = tag[0], + .r1 = reg1, + .r2 = reg2, .payload = switch (m) { .sib => try self.addExtra(Mir.MemorySib.encode(m)), .rip => try self.addExtra(Mir.MemoryRip.encode(m)), @@ -1457,43 +1515,22 @@ fn asmMemoryImmediate(self: *Self, tag: Mir.Inst.Tag, m: Memory, imm: Immediate) }); } -fn asmMemoryRegisterRegister( - self: *Self, - tag: Mir.Inst.Tag, - m: Memory, - reg1: Register, - reg2: Register, -) !void { - _ = try self.addInst(.{ - .tag = tag, - .ops = switch (m) { - .sib => .mrr_sib, - .rip => .mrr_rip, - else => unreachable, - }, - .data = .{ .rrx = .{ .r1 = reg1, .r2 = reg2, .payload = switch (m) { - .sib => try self.addExtra(Mir.MemorySib.encode(m)), - .rip => try self.addExtra(Mir.MemoryRip.encode(m)), - else => unreachable, - } } }, - }); -} - fn asmMemoryRegisterImmediate( self: *Self, - tag: Mir.Inst.Tag, + tag: Mir.Inst.FixedTag, m: Memory, reg: Register, imm: Immediate, ) !void { _ = try self.addInst(.{ - .tag = tag, + .tag = tag[1], .ops = switch (m) { .sib => .mri_sib, .rip => .mri_rip, else => unreachable, }, .data = .{ .rix = .{ + .fixes = tag[0], .r1 = reg, .i = @intCast(u8, imm.unsigned), .payload = switch (m) { @@ -1508,9 +1545,9 @@ fn asmMemoryRegisterImmediate( fn gen(self: *Self) InnerError!void { const cc = self.fn_type.fnCallingConvention(); if (cc != .Naked) { - try self.asmRegister(.push, .rbp); + try self.asmRegister(.{ ._, .push }, .rbp); const backpatch_push_callee_preserved_regs = try self.asmPlaceholder(); - try self.asmRegisterRegister(.mov, .rbp, .rsp); + try self.asmRegisterRegister(.{ ._, .mov }, .rbp, .rsp); const backpatch_frame_align = try self.asmPlaceholder(); const backpatch_stack_alloc = try self.asmPlaceholder(); @@ -1553,8 +1590,8 @@ fn gen(self: *Self) InnerError!void { try self.asmPseudo(.pseudo_dbg_epilogue_begin_none); const backpatch_stack_dealloc = try self.asmPlaceholder(); const backpatch_pop_callee_preserved_regs = try self.asmPlaceholder(); - try self.asmRegister(.pop, .rbp); - try self.asmOpOnly(.ret); + try self.asmRegister(.{ ._, .pop }, .rbp); + try self.asmOpOnly(.{ ._, .ret }); const frame_layout = try self.computeFrameLayout(); const need_frame_align = frame_layout.stack_mask != math.maxInt(u32); @@ -1927,7 +1964,7 @@ fn genLazy(self: *Self, lazy_sym: link.File.LazySymbol) InnerError!void { }; const tag_val = Value.initPayload(&tag_pl.base); const tag_mcv = try self.genTypedValue(.{ .ty = enum_ty, .val = tag_val }); - try self.genBinOpMir(.cmp, enum_ty, enum_mcv, tag_mcv); + try self.genBinOpMir(.{ ._, .cmp }, enum_ty, enum_mcv, tag_mcv); const skip_reloc = try self.asmJccReloc(undefined, .ne); try self.genSetMem( @@ -1947,7 +1984,7 @@ fn genLazy(self: *Self, lazy_sym: link.File.LazySymbol) InnerError!void { try self.airTrap(); for (exitlude_jump_relocs) |reloc| try self.performReloc(reloc); - try self.asmOpOnly(.ret); + try self.asmOpOnly(.{ ._, .ret }); }, else => return self.fail( "TODO implement {s} for {}", @@ -2406,7 +2443,7 @@ fn airFptrunc(self: *Self, inst: Air.Inst.Index) !void { else try self.copyToTmpRegister(src_ty, src_mcv); try self.asmRegisterRegisterImmediate( - .vcvtps2ph, + .{ ._, .vcvtps2ph }, dst_reg, mat_src_reg.to128(), Immediate.u(0b1_00), @@ -2418,12 +2455,12 @@ fn airFptrunc(self: *Self, inst: Air.Inst.Index) !void { } } else if (src_bits == 64 and dst_bits == 32) { if (self.hasFeature(.avx)) if (src_mcv.isMemory()) try self.asmRegisterRegisterMemory( - .vcvtsd2ss, + .{ ._, .vcvtsd2ss }, dst_reg, dst_reg, src_mcv.mem(.qword), ) else try self.asmRegisterRegisterRegister( - .vcvtsd2ss, + .{ ._, .vcvtsd2ss }, dst_reg, dst_reg, (if (src_mcv.isRegister()) @@ -2431,11 +2468,11 @@ fn airFptrunc(self: *Self, inst: Air.Inst.Index) !void { else try self.copyToTmpRegister(src_ty, src_mcv)).to128(), ) else if (src_mcv.isMemory()) try self.asmRegisterMemory( - .cvtsd2ss, + .{ ._, .cvtsd2ss }, dst_reg, src_mcv.mem(.qword), ) else try self.asmRegisterRegister( - .cvtsd2ss, + .{ ._, .cvtsd2ss }, dst_reg, (if (src_mcv.isRegister()) src_mcv.getReg().? @@ -2469,22 +2506,22 @@ fn airFpext(self: *Self, inst: Air.Inst.Index) !void { src_mcv.getReg().? else try self.copyToTmpRegister(src_ty, src_mcv); - try self.asmRegisterRegister(.vcvtph2ps, dst_reg, mat_src_reg.to128()); + try self.asmRegisterRegister(.{ ._, .vcvtph2ps }, dst_reg, mat_src_reg.to128()); switch (dst_bits) { 32 => {}, - 64 => try self.asmRegisterRegisterRegister(.vcvtss2sd, dst_reg, dst_reg, dst_reg), + 64 => try self.asmRegisterRegisterRegister(.{ ._, .vcvtss2sd }, dst_reg, dst_reg, dst_reg), else => return self.fail("TODO implement airFpext from {} to {}", .{ src_ty.fmt(self.bin_file.options.module.?), dst_ty.fmt(self.bin_file.options.module.?), }), } } else if (src_bits == 32 and dst_bits == 64) { if (self.hasFeature(.avx)) if (src_mcv.isMemory()) try self.asmRegisterRegisterMemory( - .vcvtss2sd, + .{ ._, .vcvtss2sd }, dst_reg, dst_reg, src_mcv.mem(.dword), ) else try self.asmRegisterRegisterRegister( - .vcvtss2sd, + .{ ._, .vcvtss2sd }, dst_reg, dst_reg, (if (src_mcv.isRegister()) @@ -2492,11 +2529,11 @@ fn airFpext(self: *Self, inst: Air.Inst.Index) !void { else try self.copyToTmpRegister(src_ty, src_mcv)).to128(), ) else if (src_mcv.isMemory()) try self.asmRegisterMemory( - .cvtss2sd, + .{ ._, .cvtss2sd }, dst_reg, src_mcv.mem(.dword), ) else try self.asmRegisterRegister( - .cvtss2sd, + .{ ._, .cvtss2sd }, dst_reg, (if (src_mcv.isRegister()) src_mcv.getReg().? @@ -2537,12 +2574,12 @@ fn airIntCast(self: *Self, inst: Air.Inst.Index) !void { switch (dst_mcv) { .register => |dst_reg| { const min_abi_size = @min(dst_abi_size, src_abi_size); - const tag: Mir.Inst.Tag = switch (signedness) { - .signed => if (min_abi_size >= 4) .movsxd else .movsx, - .unsigned => if (min_abi_size >= 4) .mov else .movzx, + const tag: Mir.Inst.FixedTag = switch (signedness) { + .signed => if (min_abi_size >= 4) .{ ._d, .movsx } else .{ ._, .movsx }, + .unsigned => if (min_abi_size >= 4) .{ ._, .mov } else .{ ._, .movzx }, }; - const dst_alias = switch (tag) { - .movsx, .movsxd => dst_reg.to64(), + const dst_alias = switch (tag[1]) { + .movsx => dst_reg.to64(), .mov, .movzx => if (min_abi_size > 4) dst_reg.to64() else dst_reg.to32(), else => unreachable, }; @@ -2570,14 +2607,24 @@ fn airIntCast(self: *Self, inst: Air.Inst.Index) !void { try self.genCopy(min_ty, dst_mcv, src_mcv); const extra = dst_abi_size * 8 - dst_int_info.bits; if (extra > 0) { - try self.genShiftBinOpMir(switch (signedness) { - .signed => .sal, - .unsigned => .shl, - }, dst_ty, dst_mcv, .{ .immediate = extra }); - try self.genShiftBinOpMir(switch (signedness) { - .signed => .sar, - .unsigned => .shr, - }, dst_ty, dst_mcv, .{ .immediate = extra }); + try self.genShiftBinOpMir( + switch (signedness) { + .signed => .{ ._l, .sa }, + .unsigned => .{ ._l, .sh }, + }, + dst_ty, + dst_mcv, + .{ .immediate = extra }, + ); + try self.genShiftBinOpMir( + switch (signedness) { + .signed => .{ ._r, .sa }, + .unsigned => .{ ._r, .sh }, + }, + dst_ty, + dst_mcv, + .{ .immediate = extra }, + ); } }, } @@ -2762,8 +2809,8 @@ fn airAddSat(self: *Self, inst: Air.Inst.Index) !void { const reg_bits = self.regBitSize(ty); const cc: Condition = if (ty.isSignedInt()) cc: { try self.genSetReg(limit_reg, ty, dst_mcv); - try self.genShiftBinOpMir(.sar, ty, limit_mcv, .{ .immediate = reg_bits - 1 }); - try self.genBinOpMir(.xor, ty, limit_mcv, .{ + try self.genShiftBinOpMir(.{ ._r, .sa }, ty, limit_mcv, .{ .immediate = reg_bits - 1 }); + try self.genBinOpMir(.{ ._, .xor }, ty, limit_mcv, .{ .immediate = (@as(u64, 1) << @intCast(u6, reg_bits - 1)) - 1, }); break :cc .o; @@ -2773,7 +2820,7 @@ fn airAddSat(self: *Self, inst: Air.Inst.Index) !void { }); break :cc .c; }; - try self.genBinOpMir(.add, ty, dst_mcv, rhs_mcv); + try self.genBinOpMir(.{ ._, .add }, ty, dst_mcv, rhs_mcv); const cmov_abi_size = @max(@intCast(u32, ty.abiSize(self.target.*)), 2); try self.asmCmovccRegisterRegister( @@ -2813,8 +2860,8 @@ fn airSubSat(self: *Self, inst: Air.Inst.Index) !void { const reg_bits = self.regBitSize(ty); const cc: Condition = if (ty.isSignedInt()) cc: { try self.genSetReg(limit_reg, ty, dst_mcv); - try self.genShiftBinOpMir(.sar, ty, limit_mcv, .{ .immediate = reg_bits - 1 }); - try self.genBinOpMir(.xor, ty, limit_mcv, .{ + try self.genShiftBinOpMir(.{ ._r, .sa }, ty, limit_mcv, .{ .immediate = reg_bits - 1 }); + try self.genBinOpMir(.{ ._, .xor }, ty, limit_mcv, .{ .immediate = (@as(u64, 1) << @intCast(u6, reg_bits - 1)) - 1, }); break :cc .o; @@ -2822,7 +2869,7 @@ fn airSubSat(self: *Self, inst: Air.Inst.Index) !void { try self.genSetReg(limit_reg, ty, .{ .immediate = 0 }); break :cc .c; }; - try self.genBinOpMir(.sub, ty, dst_mcv, rhs_mcv); + try self.genBinOpMir(.{ ._, .sub }, ty, dst_mcv, rhs_mcv); const cmov_abi_size = @max(@intCast(u32, ty.abiSize(self.target.*)), 2); try self.asmCmovccRegisterRegister( @@ -2864,9 +2911,9 @@ fn airMulSat(self: *Self, inst: Air.Inst.Index) !void { const reg_bits = self.regBitSize(ty); const cc: Condition = if (ty.isSignedInt()) cc: { try self.genSetReg(limit_reg, ty, lhs_mcv); - try self.genBinOpMir(.xor, ty, limit_mcv, rhs_mcv); - try self.genShiftBinOpMir(.sar, ty, limit_mcv, .{ .immediate = reg_bits - 1 }); - try self.genBinOpMir(.xor, ty, limit_mcv, .{ + try self.genBinOpMir(.{ ._, .xor }, ty, limit_mcv, rhs_mcv); + try self.genShiftBinOpMir(.{ ._, .sa }, ty, limit_mcv, .{ .immediate = reg_bits - 1 }); + try self.genBinOpMir(.{ ._, .xor }, ty, limit_mcv, .{ .immediate = (@as(u64, 1) << @intCast(u6, reg_bits - 1)) - 1, }); break :cc .o; @@ -2979,7 +3026,7 @@ fn airShlWithOverflow(self: *Self, inst: Air.Inst.Index) !void { }; defer if (tmp_lock) |lock| self.register_manager.unlockReg(lock); - try self.genBinOpMir(.cmp, lhs_ty, tmp_mcv, lhs); + try self.genBinOpMir(.{ ._, .cmp }, lhs_ty, tmp_mcv, lhs); const cc = Condition.ne; const tuple_ty = self.air.typeOfIndex(inst); @@ -3066,12 +3113,17 @@ fn genSetFrameTruncatedOverflowCompare( src_mcv; try self.genSetReg(scratch_reg, hi_limb_ty, hi_limb_mcv); try self.truncateRegister(hi_limb_ty, scratch_reg); - try self.genBinOpMir(.cmp, hi_limb_ty, .{ .register = scratch_reg }, hi_limb_mcv); + try self.genBinOpMir(.{ ._, .cmp }, hi_limb_ty, .{ .register = scratch_reg }, hi_limb_mcv); const eq_reg = temp_regs[2]; if (overflow_cc) |_| { try self.asmSetccRegister(eq_reg.to8(), .ne); - try self.genBinOpMir(.@"or", Type.u8, .{ .register = overflow_reg }, .{ .register = eq_reg }); + try self.genBinOpMir( + .{ ._, .@"or" }, + Type.u8, + .{ .register = overflow_reg }, + .{ .register = eq_reg }, + ); } const payload_off = @intCast(i32, tuple_ty.structFieldOffset(0, self.target.*)); @@ -3200,28 +3252,25 @@ fn airMulWithOverflow(self: *Self, inst: Air.Inst.Index) !void { /// Generates signed or unsigned integer multiplication/division. /// Clobbers .rax and .rdx registers. /// Quotient is saved in .rax and remainder in .rdx. -fn genIntMulDivOpMir( - self: *Self, - tag: Mir.Inst.Tag, - ty: Type, - lhs: MCValue, - rhs: MCValue, -) !void { +fn genIntMulDivOpMir(self: *Self, tag: Mir.Inst.FixedTag, ty: Type, lhs: MCValue, rhs: MCValue) !void { const abi_size = @intCast(u32, ty.abiSize(self.target.*)); if (abi_size > 8) { return self.fail("TODO implement genIntMulDivOpMir for ABI size larger than 8", .{}); } try self.genSetReg(.rax, ty, lhs); - switch (tag) { + switch (tag[1]) { else => unreachable, - .mul, .imul => {}, - .div => try self.asmRegisterRegister(.xor, .edx, .edx), - .idiv => switch (self.regBitSize(ty)) { - 8 => try self.asmOpOnly(.cbw), - 16 => try self.asmOpOnly(.cwd), - 32 => try self.asmOpOnly(.cdq), - 64 => try self.asmOpOnly(.cqo), + .mul => {}, + .div => switch (tag[0]) { + ._ => try self.asmRegisterRegister(.{ ._, .xor }, .edx, .edx), + .i_ => switch (self.regBitSize(ty)) { + 8 => try self.asmOpOnly(.{ ._, .cbw }), + 16 => try self.asmOpOnly(.{ ._, .cwd }), + 32 => try self.asmOpOnly(.{ ._, .cdq }), + 64 => try self.asmOpOnly(.{ ._, .cqo }), + else => unreachable, + }, else => unreachable, }, } @@ -3259,23 +3308,28 @@ fn genInlineIntDivFloor(self: *Self, ty: Type, lhs: MCValue, rhs: MCValue) !MCVa const divisor_lock = self.register_manager.lockReg(divisor); defer if (divisor_lock) |lock| self.register_manager.unlockReg(lock); - try self.genIntMulDivOpMir(switch (int_info.signedness) { - .signed => .idiv, - .unsigned => .div, - }, ty, .{ .register = dividend }, .{ .register = divisor }); + try self.genIntMulDivOpMir( + switch (int_info.signedness) { + .signed => .{ .i_, .div }, + .unsigned => .{ ._, .div }, + }, + ty, + .{ .register = dividend }, + .{ .register = divisor }, + ); try self.asmRegisterRegister( - .xor, + .{ ._, .xor }, registerAlias(divisor, abi_size), registerAlias(dividend, abi_size), ); try self.asmRegisterImmediate( - .sar, + .{ ._r, .sa }, registerAlias(divisor, abi_size), Immediate.u(int_info.bits - 1), ); try self.asmRegisterRegister( - .@"test", + .{ ._, .@"test" }, registerAlias(.rdx, abi_size), registerAlias(.rdx, abi_size), ); @@ -3284,7 +3338,7 @@ fn genInlineIntDivFloor(self: *Self, ty: Type, lhs: MCValue, rhs: MCValue) !MCVa registerAlias(.rdx, abi_size), .z, ); - try self.genBinOpMir(.add, ty, .{ .register = divisor }, .{ .register = .rax }); + try self.genBinOpMir(.{ ._, .add }, ty, .{ .register = divisor }, .{ .register = .rax }); return MCValue{ .register = divisor }; } @@ -3406,7 +3460,12 @@ fn airUnwrapErrUnionErr(self: *Self, inst: Air.Inst.Index) !void { const result = try self.copyToRegisterWithInstTracking(inst, err_union_ty, operand); if (err_off > 0) { const shift = @intCast(u6, err_off * 8); - try self.genShiftBinOpMir(.shr, err_union_ty, result, .{ .immediate = shift }); + try self.genShiftBinOpMir( + .{ ._r, .sh }, + err_union_ty, + result, + .{ .immediate = shift }, + ); } else { try self.truncateRegister(Type.anyerror, result.register); } @@ -3458,7 +3517,12 @@ fn genUnwrapErrorUnionPayloadMir( .{ .register = try self.copyToTmpRegister(err_union_ty, err_union) }; if (payload_off > 0) { const shift = @intCast(u6, payload_off * 8); - try self.genShiftBinOpMir(.shr, err_union_ty, result_mcv, .{ .immediate = shift }); + try self.genShiftBinOpMir( + .{ ._r, .sh }, + err_union_ty, + result_mcv, + .{ .immediate = shift }, + ); } else { try self.truncateRegister(payload_ty, result_mcv.register); } @@ -3495,7 +3559,7 @@ fn airUnwrapErrUnionErrPtr(self: *Self, inst: Air.Inst.Index) !void { const err_off = @intCast(i32, errUnionErrorOffset(pl_ty, self.target.*)); const err_abi_size = @intCast(u32, err_ty.abiSize(self.target.*)); try self.asmRegisterMemory( - .mov, + .{ ._, .mov }, registerAlias(dst_reg, err_abi_size), Memory.sib(Memory.PtrSize.fromSize(err_abi_size), .{ .base = .{ .reg = src_reg }, @@ -3533,7 +3597,7 @@ fn airUnwrapErrUnionPayloadPtr(self: *Self, inst: Air.Inst.Index) !void { const pl_off = @intCast(i32, errUnionPayloadOffset(pl_ty, self.target.*)); const dst_abi_size = @intCast(u32, dst_ty.abiSize(self.target.*)); try self.asmRegisterMemory( - .lea, + .{ ._, .lea }, registerAlias(dst_reg, dst_abi_size), Memory.sib(.qword, .{ .base = .{ .reg = src_reg }, .disp = pl_off }), ); @@ -3559,7 +3623,7 @@ fn airErrUnionPayloadPtrSet(self: *Self, inst: Air.Inst.Index) !void { const err_off = @intCast(i32, errUnionErrorOffset(pl_ty, self.target.*)); const err_abi_size = @intCast(u32, err_ty.abiSize(self.target.*)); try self.asmMemoryImmediate( - .mov, + .{ ._, .mov }, Memory.sib(Memory.PtrSize.fromSize(err_abi_size), .{ .base = .{ .reg = src_reg }, .disp = err_off, @@ -3580,7 +3644,7 @@ fn airErrUnionPayloadPtrSet(self: *Self, inst: Air.Inst.Index) !void { const pl_off = @intCast(i32, errUnionPayloadOffset(pl_ty, self.target.*)); const dst_abi_size = @intCast(u32, dst_ty.abiSize(self.target.*)); try self.asmRegisterMemory( - .lea, + .{ ._, .lea }, registerAlias(dst_reg, dst_abi_size), Memory.sib(.qword, .{ .base = .{ .reg = src_reg }, .disp = pl_off }), ); @@ -3631,13 +3695,13 @@ fn airWrapOptional(self: *Self, inst: Air.Inst.Index) !void { else => unreachable, .register => |opt_reg| try self.asmRegisterImmediate( - .bts, + .{ ._s, .bt }, opt_reg, Immediate.u(@intCast(u6, pl_abi_size * 8)), ), .load_frame => |frame_addr| try self.asmMemoryImmediate( - .mov, + .{ ._, .mov }, Memory.sib(.byte, .{ .base = .{ .frame = frame_addr.index }, .disp = frame_addr.off + pl_abi_size, @@ -3749,7 +3813,7 @@ fn airPtrSliceLenPtr(self: *Self, inst: Air.Inst.Index) !void { const dst_abi_size = @intCast(u32, dst_ty.abiSize(self.target.*)); try self.asmRegisterMemory( - .lea, + .{ ._, .lea }, registerAlias(dst_reg, dst_abi_size), Memory.sib(.qword, .{ .base = .{ .reg = src_reg }, @@ -3823,7 +3887,7 @@ fn genSliceElemPtr(self: *Self, lhs: Air.Inst.Ref, rhs: Air.Inst.Ref) !MCValue { try self.genSetReg(addr_reg, Type.usize, slice_mcv); // TODO we could allocate register here, but need to expect addr register and potentially // offset register. - try self.genBinOpMir(.add, slice_ptr_field_type, .{ .register = addr_reg }, .{ + try self.genBinOpMir(.{ ._, .add }, slice_ptr_field_type, .{ .register = addr_reg }, .{ .register = offset_reg, }); return MCValue{ .register = addr_reg.to64() }; @@ -3881,13 +3945,13 @@ fn airArrayElemVal(self: *Self, inst: Air.Inst.Index) !void { const frame_index = try self.allocFrameIndex(FrameAlloc.initType(array_ty, self.target.*)); try self.genSetMem(.{ .frame = frame_index }, 0, array_ty, array); try self.asmRegisterMemory( - .lea, + .{ ._, .lea }, addr_reg, Memory.sib(.qword, .{ .base = .{ .frame = frame_index } }), ); }, .load_frame => |frame_addr| try self.asmRegisterMemory( - .lea, + .{ ._, .lea }, addr_reg, Memory.sib(.qword, .{ .base = .{ .frame = frame_addr.index }, .disp = frame_addr.off }), ), @@ -3903,7 +3967,12 @@ fn airArrayElemVal(self: *Self, inst: Air.Inst.Index) !void { // TODO we could allocate register here, but need to expect addr register and potentially // offset register. const dst_mcv = try self.allocRegOrMem(inst, false); - try self.genBinOpMir(.add, Type.usize, .{ .register = addr_reg }, .{ .register = offset_reg }); + try self.genBinOpMir( + .{ ._, .add }, + Type.usize, + .{ .register = addr_reg }, + .{ .register = offset_reg }, + ); try self.genCopy(elem_ty, dst_mcv, .{ .indirect = .{ .reg = addr_reg } }); return self.finishAir(inst, dst_mcv, .{ bin_op.lhs, bin_op.rhs, .none }); @@ -3937,7 +4006,11 @@ fn airPtrElemVal(self: *Self, inst: Air.Inst.Index) !void { try self.copyToTmpRegister(ptr_ty, ptr_mcv); const elem_ptr_lock = self.register_manager.lockRegAssumeUnused(elem_ptr_reg); defer self.register_manager.unlockReg(elem_ptr_lock); - try self.asmRegisterRegister(.add, elem_ptr_reg, offset_reg); + try self.asmRegisterRegister( + .{ ._, .add }, + elem_ptr_reg, + offset_reg, + ); const dst_mcv = try self.allocRegOrMem(inst, true); const dst_lock = switch (dst_mcv) { @@ -3977,7 +4050,7 @@ fn airPtrElemPtr(self: *Self, inst: Air.Inst.Index) !void { defer self.register_manager.unlockReg(offset_reg_lock); const dst_mcv = try self.copyToRegisterWithInstTracking(inst, ptr_ty, ptr); - try self.genBinOpMir(.add, ptr_ty, dst_mcv, .{ .register = offset_reg }); + try self.genBinOpMir(.{ ._, .add }, ptr_ty, dst_mcv, .{ .register = offset_reg }); return self.finishAir(inst, dst_mcv, .{ extra.lhs, extra.rhs, .none }); } @@ -4010,7 +4083,12 @@ fn airSetUnionTag(self: *Self, inst: Air.Inst.Index) !void { const adjusted_ptr: MCValue = if (layout.payload_size > 0 and layout.tag_align < layout.payload_align) blk: { // TODO reusing the operand const reg = try self.copyToTmpRegister(ptr_union_ty, ptr); - try self.genBinOpMir(.add, ptr_union_ty, .{ .register = reg }, .{ .immediate = layout.payload_size }); + try self.genBinOpMir( + .{ ._, .add }, + ptr_union_ty, + .{ .register = reg }, + .{ .immediate = layout.payload_size }, + ); break :blk MCValue{ .register = reg }; } else ptr; @@ -4063,7 +4141,7 @@ fn airGetUnionTag(self: *Self, inst: Air.Inst.Index) !void { else 0; const result = try self.copyToRegisterWithInstTracking(inst, union_ty, operand); - try self.genShiftBinOpMir(.shr, Type.usize, result, .{ .immediate = shift }); + try self.genShiftBinOpMir(.{ ._r, .sh }, Type.usize, result, .{ .immediate = shift }); break :blk MCValue{ .register = registerAlias(result.register, @intCast(u32, layout.tag_size)), }; @@ -4100,11 +4178,11 @@ fn airClz(self: *Self, inst: Air.Inst.Index) !void { const src_bits = src_ty.bitSize(self.target.*); if (self.hasFeature(.lzcnt)) { if (src_bits <= 64) { - try self.genBinOpMir(.lzcnt, src_ty, dst_mcv, mat_src_mcv); + try self.genBinOpMir(.{ ._, .lzcnt }, src_ty, dst_mcv, mat_src_mcv); const extra_bits = self.regExtraBits(src_ty); if (extra_bits > 0) { - try self.genBinOpMir(.sub, dst_ty, dst_mcv, .{ .immediate = extra_bits }); + try self.genBinOpMir(.{ ._, .sub }, dst_ty, dst_mcv, .{ .immediate = extra_bits }); } } else if (src_bits <= 128) { const tmp_reg = try self.register_manager.allocReg(null, gp); @@ -4112,13 +4190,23 @@ fn airClz(self: *Self, inst: Air.Inst.Index) !void { const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); defer self.register_manager.unlockReg(tmp_lock); - try self.genBinOpMir(.lzcnt, Type.u64, dst_mcv, mat_src_mcv); - try self.genBinOpMir(.add, dst_ty, dst_mcv, .{ .immediate = 64 }); - try self.genBinOpMir(.lzcnt, Type.u64, tmp_mcv, mat_src_mcv.address().offset(8).deref()); + try self.genBinOpMir(.{ ._, .lzcnt }, Type.u64, dst_mcv, mat_src_mcv); + try self.genBinOpMir(.{ ._, .add }, dst_ty, dst_mcv, .{ .immediate = 64 }); + try self.genBinOpMir( + .{ ._, .lzcnt }, + Type.u64, + tmp_mcv, + mat_src_mcv.address().offset(8).deref(), + ); try self.asmCmovccRegisterRegister(dst_reg.to32(), tmp_reg.to32(), .nc); if (src_bits < 128) { - try self.genBinOpMir(.sub, dst_ty, dst_mcv, .{ .immediate = 128 - src_bits }); + try self.genBinOpMir( + .{ ._, .sub }, + dst_ty, + dst_mcv, + .{ .immediate = 128 - src_bits }, + ); } } else return self.fail("TODO airClz of {}", .{src_ty.fmt(self.bin_file.options.module.?)}); break :result dst_mcv; @@ -4130,7 +4218,7 @@ fn airClz(self: *Self, inst: Air.Inst.Index) !void { const imm_reg = try self.copyToTmpRegister(dst_ty, .{ .immediate = src_bits ^ (src_bits - 1), }); - try self.genBinOpMir(.bsr, src_ty, dst_mcv, mat_src_mcv); + try self.genBinOpMir(.{ ._, .bsr }, src_ty, dst_mcv, mat_src_mcv); const cmov_abi_size = @max(@intCast(u32, dst_ty.abiSize(self.target.*)), 2); try self.asmCmovccRegisterRegister( @@ -4139,12 +4227,12 @@ fn airClz(self: *Self, inst: Air.Inst.Index) !void { .z, ); - try self.genBinOpMir(.xor, dst_ty, dst_mcv, .{ .immediate = src_bits - 1 }); + try self.genBinOpMir(.{ ._, .xor }, dst_ty, dst_mcv, .{ .immediate = src_bits - 1 }); } else { const imm_reg = try self.copyToTmpRegister(dst_ty, .{ .immediate = @as(u64, math.maxInt(u64)) >> @intCast(u6, 64 - self.regBitSize(dst_ty)), }); - try self.genBinOpMir(.bsr, src_ty, dst_mcv, mat_src_mcv); + try self.genBinOpMir(.{ ._, .bsr }, src_ty, dst_mcv, mat_src_mcv); const cmov_abi_size = @max(@intCast(u32, dst_ty.abiSize(self.target.*)), 2); try self.asmCmovccRegisterRegister( @@ -4154,7 +4242,7 @@ fn airClz(self: *Self, inst: Air.Inst.Index) !void { ); try self.genSetReg(dst_reg, dst_ty, .{ .immediate = src_bits - 1 }); - try self.genBinOpMir(.sub, dst_ty, dst_mcv, .{ .register = imm_reg }); + try self.genBinOpMir(.{ ._, .sub }, dst_ty, dst_mcv, .{ .register = imm_reg }); } break :result dst_mcv; }; @@ -4195,7 +4283,7 @@ fn airCtz(self: *Self, inst: Air.Inst.Index) !void { break :tmp dst_mcv; }; try self.genBinOpMir( - .@"or", + .{ ._, .@"or" }, src_ty, tmp_mcv, .{ .immediate = (@as(u64, math.maxInt(u64)) >> @intCast(u6, 64 - extra_bits)) << @@ -4203,7 +4291,7 @@ fn airCtz(self: *Self, inst: Air.Inst.Index) !void { ); break :masked tmp_mcv; } else mat_src_mcv; - try self.genBinOpMir(.tzcnt, src_ty, dst_mcv, masked_mcv); + try self.genBinOpMir(.{ ._, .tzcnt }, src_ty, dst_mcv, masked_mcv); } else if (src_bits <= 128) { const tmp_reg = try self.register_manager.allocReg(null, gp); const tmp_mcv = MCValue{ .register = tmp_reg }; @@ -4213,16 +4301,16 @@ fn airCtz(self: *Self, inst: Air.Inst.Index) !void { const masked_mcv = if (src_bits < 128) masked: { try self.genCopy(Type.u64, dst_mcv, mat_src_mcv.address().offset(8).deref()); try self.genBinOpMir( - .@"or", + .{ ._, .@"or" }, Type.u64, dst_mcv, .{ .immediate = @as(u64, math.maxInt(u64)) << @intCast(u6, src_bits - 64) }, ); break :masked dst_mcv; } else mat_src_mcv.address().offset(8).deref(); - try self.genBinOpMir(.tzcnt, Type.u64, dst_mcv, masked_mcv); - try self.genBinOpMir(.add, dst_ty, dst_mcv, .{ .immediate = 64 }); - try self.genBinOpMir(.tzcnt, Type.u64, tmp_mcv, mat_src_mcv); + try self.genBinOpMir(.{ ._, .tzcnt }, Type.u64, dst_mcv, masked_mcv); + try self.genBinOpMir(.{ ._, .add }, dst_ty, dst_mcv, .{ .immediate = 64 }); + try self.genBinOpMir(.{ ._, .tzcnt }, Type.u64, tmp_mcv, mat_src_mcv); try self.asmCmovccRegisterRegister(dst_reg.to32(), tmp_reg.to32(), .nc); } else return self.fail("TODO airCtz of {}", .{src_ty.fmt(self.bin_file.options.module.?)}); break :result dst_mcv; @@ -4232,7 +4320,7 @@ fn airCtz(self: *Self, inst: Air.Inst.Index) !void { return self.fail("TODO airCtz of {}", .{src_ty.fmt(self.bin_file.options.module.?)}); const width_reg = try self.copyToTmpRegister(dst_ty, .{ .immediate = src_bits }); - try self.genBinOpMir(.bsf, src_ty, dst_mcv, mat_src_mcv); + try self.genBinOpMir(.{ ._, .bsf }, src_ty, dst_mcv, mat_src_mcv); const cmov_abi_size = @max(@intCast(u32, dst_ty.abiSize(self.target.*)), 2); try self.asmCmovccRegisterRegister( @@ -4270,7 +4358,7 @@ fn airPopcount(self: *Self, inst: Air.Inst.Index) !void { .{ .register = try self.register_manager.allocReg(inst, gp) }; const popcnt_ty = if (src_abi_size > 1) src_ty else Type.u16; - try self.genBinOpMir(.popcnt, popcnt_ty, dst_mcv, mat_src_mcv); + try self.genBinOpMir(.{ ._, .popcnt }, popcnt_ty, dst_mcv, mat_src_mcv); break :result dst_mcv; } @@ -4301,54 +4389,54 @@ fn airPopcount(self: *Self, inst: Air.Inst.Index) !void { undefined; // dst = operand - try self.asmRegisterRegister(.mov, tmp, dst); + try self.asmRegisterRegister(.{ ._, .mov }, tmp, dst); // tmp = operand - try self.asmRegisterImmediate(.shr, tmp, Immediate.u(1)); + try self.asmRegisterImmediate(.{ ._r, .sh }, tmp, Immediate.u(1)); // tmp = operand >> 1 if (src_abi_size > 4) { - try self.asmRegisterImmediate(.mov, imm, imm_0_1); - try self.asmRegisterRegister(.@"and", tmp, imm); - } else try self.asmRegisterImmediate(.@"and", tmp, imm_0_1); + try self.asmRegisterImmediate(.{ ._, .mov }, imm, imm_0_1); + try self.asmRegisterRegister(.{ ._, .@"and" }, tmp, imm); + } else try self.asmRegisterImmediate(.{ ._, .@"and" }, tmp, imm_0_1); // tmp = (operand >> 1) & 0x55...55 - try self.asmRegisterRegister(.sub, dst, tmp); + try self.asmRegisterRegister(.{ ._, .sub }, dst, tmp); // dst = temp1 = operand - ((operand >> 1) & 0x55...55) - try self.asmRegisterRegister(.mov, tmp, dst); + try self.asmRegisterRegister(.{ ._, .mov }, tmp, dst); // tmp = temp1 - try self.asmRegisterImmediate(.shr, dst, Immediate.u(2)); + try self.asmRegisterImmediate(.{ ._r, .sh }, dst, Immediate.u(2)); // dst = temp1 >> 2 if (src_abi_size > 4) { - try self.asmRegisterImmediate(.mov, imm, imm_00_11); - try self.asmRegisterRegister(.@"and", tmp, imm); - try self.asmRegisterRegister(.@"and", dst, imm); + try self.asmRegisterImmediate(.{ ._, .mov }, imm, imm_00_11); + try self.asmRegisterRegister(.{ ._, .@"and" }, tmp, imm); + try self.asmRegisterRegister(.{ ._, .@"and" }, dst, imm); } else { - try self.asmRegisterImmediate(.@"and", tmp, imm_00_11); - try self.asmRegisterImmediate(.@"and", dst, imm_00_11); + try self.asmRegisterImmediate(.{ ._, .@"and" }, tmp, imm_00_11); + try self.asmRegisterImmediate(.{ ._, .@"and" }, dst, imm_00_11); } // tmp = temp1 & 0x33...33 // dst = (temp1 >> 2) & 0x33...33 - try self.asmRegisterRegister(.add, tmp, dst); + try self.asmRegisterRegister(.{ ._, .add }, tmp, dst); // tmp = temp2 = (temp1 & 0x33...33) + ((temp1 >> 2) & 0x33...33) - try self.asmRegisterRegister(.mov, dst, tmp); + try self.asmRegisterRegister(.{ ._, .mov }, dst, tmp); // dst = temp2 - try self.asmRegisterImmediate(.shr, tmp, Immediate.u(4)); + try self.asmRegisterImmediate(.{ ._r, .sh }, tmp, Immediate.u(4)); // tmp = temp2 >> 4 - try self.asmRegisterRegister(.add, dst, tmp); + try self.asmRegisterRegister(.{ ._, .add }, dst, tmp); // dst = temp2 + (temp2 >> 4) if (src_abi_size > 4) { - try self.asmRegisterImmediate(.mov, imm, imm_0000_1111); - try self.asmRegisterImmediate(.mov, tmp, imm_0000_0001); - try self.asmRegisterRegister(.@"and", dst, imm); - try self.asmRegisterRegister(.imul, dst, tmp); + try self.asmRegisterImmediate(.{ ._, .mov }, imm, imm_0000_1111); + try self.asmRegisterImmediate(.{ ._, .mov }, tmp, imm_0000_0001); + try self.asmRegisterRegister(.{ ._, .@"and" }, dst, imm); + try self.asmRegisterRegister(.{ .i_, .mul }, dst, tmp); } else { - try self.asmRegisterImmediate(.@"and", dst, imm_0000_1111); + try self.asmRegisterImmediate(.{ ._, .@"and" }, dst, imm_0000_1111); if (src_abi_size > 1) { - try self.asmRegisterRegisterImmediate(.imul, dst, dst, imm_0000_0001); + try self.asmRegisterRegisterImmediate(.{ .i_, .mul }, dst, dst, imm_0000_0001); } } // dst = temp3 = (temp2 + (temp2 >> 4)) & 0x0f...0f // dst = temp3 * 0x01...01 if (src_abi_size > 1) { - try self.asmRegisterImmediate(.shr, dst, Immediate.u((src_abi_size - 1) * 8)); + try self.asmRegisterImmediate(.{ ._r, .sh }, dst, Immediate.u((src_abi_size - 1) * 8)); } // dst = (temp3 * 0x01...01) >> (bits - 8) } @@ -4377,11 +4465,11 @@ fn byteSwap(self: *Self, inst: Air.Inst.Index, src_ty: Type, src_mcv: MCValue, m 16 => if ((mem_ok or src_mcv.isRegister()) and self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) { - try self.genBinOpMir(.rol, src_ty, src_mcv, .{ .immediate = 8 }); + try self.genBinOpMir(.{ ._l, .ro }, src_ty, src_mcv, .{ .immediate = 8 }); return src_mcv; }, 32, 64 => if (src_mcv.isRegister() and self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) { - try self.genUnOpMir(.bswap, src_ty, src_mcv); + try self.genUnOpMir(.{ ._, .bswap }, src_ty, src_mcv); return src_mcv; }, } @@ -4398,10 +4486,10 @@ fn byteSwap(self: *Self, inst: Air.Inst.Index, src_ty: Type, src_mcv: MCValue, m try self.genSetReg(dst_mcv.register, src_ty, src_mcv); switch (src_bits) { else => unreachable, - 16 => try self.genBinOpMir(.rol, src_ty, dst_mcv, .{ .immediate = 8 }), - 32, 64 => try self.genUnOpMir(.bswap, src_ty, dst_mcv), + 16 => try self.genBinOpMir(.{ ._l, .ro }, src_ty, dst_mcv, .{ .immediate = 8 }), + 32, 64 => try self.genUnOpMir(.{ ._, .bswap }, src_ty, dst_mcv), } - } else try self.genBinOpMir(.movbe, src_ty, dst_mcv, src_mcv); + } else try self.genBinOpMir(.{ ._, .movbe }, src_ty, dst_mcv, src_mcv); return dst_mcv; } @@ -4410,7 +4498,7 @@ fn byteSwap(self: *Self, inst: Air.Inst.Index, src_ty: Type, src_mcv: MCValue, m const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg); defer self.register_manager.unlockReg(dst_lock); - try self.genBinOpMir(.movbe, src_ty, dst_mcv, src_mcv); + try self.genBinOpMir(.{ ._, .movbe }, src_ty, dst_mcv, src_mcv); return dst_mcv; } @@ -4424,7 +4512,7 @@ fn airByteSwap(self: *Self, inst: Air.Inst.Index) !void { switch (self.regExtraBits(src_ty)) { 0 => {}, else => |extra| try self.genBinOpMir( - if (src_ty.isSignedInt()) .sar else .shr, + if (src_ty.isSignedInt()) .{ ._r, .sa } else .{ ._r, .sh }, src_ty, dst_mcv, .{ .immediate = extra }, @@ -4464,40 +4552,40 @@ fn airBitReverse(self: *Self, inst: Air.Inst.Index) !void { const imm_0_1 = Immediate.u(mask / 0b1_1); // dst = temp1 = bswap(operand) - try self.asmRegisterRegister(.mov, tmp, dst); + try self.asmRegisterRegister(.{ ._, .mov }, tmp, dst); // tmp = temp1 - try self.asmRegisterImmediate(.shr, dst, Immediate.u(4)); + try self.asmRegisterImmediate(.{ ._r, .sh }, dst, Immediate.u(4)); // dst = temp1 >> 4 if (src_abi_size > 4) { - try self.asmRegisterImmediate(.mov, imm, imm_0000_1111); - try self.asmRegisterRegister(.@"and", tmp, imm); - try self.asmRegisterRegister(.@"and", dst, imm); + try self.asmRegisterImmediate(.{ ._, .mov }, imm, imm_0000_1111); + try self.asmRegisterRegister(.{ ._, .@"and" }, tmp, imm); + try self.asmRegisterRegister(.{ ._, .@"and" }, dst, imm); } else { - try self.asmRegisterImmediate(.@"and", tmp, imm_0000_1111); - try self.asmRegisterImmediate(.@"and", dst, imm_0000_1111); + try self.asmRegisterImmediate(.{ ._, .@"and" }, tmp, imm_0000_1111); + try self.asmRegisterImmediate(.{ ._, .@"and" }, dst, imm_0000_1111); } // tmp = temp1 & 0x0F...0F // dst = (temp1 >> 4) & 0x0F...0F - try self.asmRegisterImmediate(.shl, tmp, Immediate.u(4)); + try self.asmRegisterImmediate(.{ ._l, .sh }, tmp, Immediate.u(4)); // tmp = (temp1 & 0x0F...0F) << 4 - try self.asmRegisterRegister(.@"or", dst, tmp); + try self.asmRegisterRegister(.{ ._, .@"or" }, dst, tmp); // dst = temp2 = ((temp1 >> 4) & 0x0F...0F) | ((temp1 & 0x0F...0F) << 4) - try self.asmRegisterRegister(.mov, tmp, dst); + try self.asmRegisterRegister(.{ ._, .mov }, tmp, dst); // tmp = temp2 - try self.asmRegisterImmediate(.shr, dst, Immediate.u(2)); + try self.asmRegisterImmediate(.{ ._r, .sh }, dst, Immediate.u(2)); // dst = temp2 >> 2 if (src_abi_size > 4) { - try self.asmRegisterImmediate(.mov, imm, imm_00_11); - try self.asmRegisterRegister(.@"and", tmp, imm); - try self.asmRegisterRegister(.@"and", dst, imm); + try self.asmRegisterImmediate(.{ ._, .mov }, imm, imm_00_11); + try self.asmRegisterRegister(.{ ._, .@"and" }, tmp, imm); + try self.asmRegisterRegister(.{ ._, .@"and" }, dst, imm); } else { - try self.asmRegisterImmediate(.@"and", tmp, imm_00_11); - try self.asmRegisterImmediate(.@"and", dst, imm_00_11); + try self.asmRegisterImmediate(.{ ._, .@"and" }, tmp, imm_00_11); + try self.asmRegisterImmediate(.{ ._, .@"and" }, dst, imm_00_11); } // tmp = temp2 & 0x33...33 // dst = (temp2 >> 2) & 0x33...33 try self.asmRegisterMemory( - .lea, + .{ ._, .lea }, if (src_abi_size > 4) tmp.to64() else tmp.to32(), Memory.sib(.qword, .{ .base = .{ .reg = dst.to64() }, @@ -4505,22 +4593,22 @@ fn airBitReverse(self: *Self, inst: Air.Inst.Index) !void { }), ); // tmp = temp3 = ((temp2 >> 2) & 0x33...33) + ((temp2 & 0x33...33) << 2) - try self.asmRegisterRegister(.mov, dst, tmp); + try self.asmRegisterRegister(.{ ._, .mov }, dst, tmp); // dst = temp3 - try self.asmRegisterImmediate(.shr, tmp, Immediate.u(1)); + try self.asmRegisterImmediate(.{ ._r, .sh }, tmp, Immediate.u(1)); // tmp = temp3 >> 1 if (src_abi_size > 4) { - try self.asmRegisterImmediate(.mov, imm, imm_0_1); - try self.asmRegisterRegister(.@"and", dst, imm); - try self.asmRegisterRegister(.@"and", tmp, imm); + try self.asmRegisterImmediate(.{ ._, .mov }, imm, imm_0_1); + try self.asmRegisterRegister(.{ ._, .@"and" }, dst, imm); + try self.asmRegisterRegister(.{ ._, .@"and" }, tmp, imm); } else { - try self.asmRegisterImmediate(.@"and", dst, imm_0_1); - try self.asmRegisterImmediate(.@"and", tmp, imm_0_1); + try self.asmRegisterImmediate(.{ ._, .@"and" }, dst, imm_0_1); + try self.asmRegisterImmediate(.{ ._, .@"and" }, tmp, imm_0_1); } // dst = temp3 & 0x55...55 // tmp = (temp3 >> 1) & 0x55...55 try self.asmRegisterMemory( - .lea, + .{ ._, .lea }, if (src_abi_size > 4) dst.to64() else dst.to32(), Memory.sib(.qword, .{ .base = .{ .reg = tmp.to64() }, @@ -4533,7 +4621,7 @@ fn airBitReverse(self: *Self, inst: Air.Inst.Index) !void { switch (self.regExtraBits(src_ty)) { 0 => {}, else => |extra| try self.genBinOpMir( - if (src_ty.isSignedInt()) .sar else .shr, + if (src_ty.isSignedInt()) .{ ._r, .sa } else .{ ._r, .sh }, src_ty, dst_mcv, .{ .immediate = extra }, @@ -4590,8 +4678,8 @@ fn airFloatSign(self: *Self, inst: Air.Inst.Index) !void { try self.genBinOpMir(switch (ty_bits) { // No point using an extra prefix byte for *pd which performs the same operation. 16, 32, 64, 128 => switch (tag) { - .neg => .xorps, - .fabs => .andnps, + .neg => .{ ._, .xorps }, + .fabs => .{ ._, .andnps }, else => unreachable, }, 80 => return self.fail("TODO implement airFloatSign for {}", .{ @@ -4622,25 +4710,25 @@ fn genRound(self: *Self, ty: Type, dst_reg: Register, src_mcv: MCValue, mode: u4 if (!self.hasFeature(.sse4_1)) return self.fail("TODO implement genRound without sse4_1 feature", .{}); - const mir_tag = if (@as(?Mir.Inst.Tag, switch (ty.zigTypeTag()) { + const mir_tag = if (@as(?Mir.Inst.FixedTag, switch (ty.zigTypeTag()) { .Float => switch (ty.floatBits(self.target.*)) { - 32 => if (self.hasFeature(.avx)) .vroundss else .roundss, - 64 => if (self.hasFeature(.avx)) .vroundsd else .roundsd, + 32 => if (self.hasFeature(.avx)) .{ ._, .vroundss } else .{ ._, .roundss }, + 64 => if (self.hasFeature(.avx)) .{ ._, .vroundsd } else .{ ._, .roundsd }, 16, 80, 128 => null, else => unreachable, }, .Vector => switch (ty.childType().zigTypeTag()) { .Float => switch (ty.childType().floatBits(self.target.*)) { 32 => switch (ty.vectorLen()) { - 1 => if (self.hasFeature(.avx)) .vroundss else .roundss, - 2...4 => if (self.hasFeature(.avx)) .vroundps else .roundps, - 5...8 => if (self.hasFeature(.avx)) .vroundps else null, + 1 => if (self.hasFeature(.avx)) .{ ._, .vroundss } else .{ ._, .roundss }, + 2...4 => if (self.hasFeature(.avx)) .{ ._, .vroundps } else .{ ._, .roundps }, + 5...8 => if (self.hasFeature(.avx)) .{ ._, .vroundps } else null, else => null, }, 64 => switch (ty.vectorLen()) { - 1 => if (self.hasFeature(.avx)) .vroundsd else .roundsd, - 2 => if (self.hasFeature(.avx)) .vroundpd else .roundpd, - 3...4 => if (self.hasFeature(.avx)) .vroundpd else null, + 1 => if (self.hasFeature(.avx)) .{ ._, .vroundsd } else .{ ._, .roundsd }, + 2 => if (self.hasFeature(.avx)) .{ ._, .vroundpd } else .{ ._, .roundpd }, + 3...4 => if (self.hasFeature(.avx)) .{ ._, .vroundpd } else null, else => null, }, 16, 80, 128 => null, @@ -4655,7 +4743,7 @@ fn genRound(self: *Self, ty: Type, dst_reg: Register, src_mcv: MCValue, mode: u4 const abi_size = @intCast(u32, ty.abiSize(self.target.*)); const dst_alias = registerAlias(dst_reg, abi_size); - switch (mir_tag) { + switch (mir_tag[1]) { .vroundss, .vroundsd => if (src_mcv.isMemory()) try self.asmRegisterRegisterMemoryImmediate( mir_tag, dst_alias, @@ -4704,25 +4792,25 @@ fn airSqrt(self: *Self, inst: Air.Inst.Index) !void { defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); const result: MCValue = result: { - const mir_tag = if (@as(?Mir.Inst.Tag, switch (ty.zigTypeTag()) { + const mir_tag = if (@as(?Mir.Inst.FixedTag, switch (ty.zigTypeTag()) { .Float => switch (ty.floatBits(self.target.*)) { 16 => if (self.hasFeature(.f16c)) { const mat_src_reg = if (src_mcv.isRegister()) src_mcv.getReg().? else try self.copyToTmpRegister(ty, src_mcv); - try self.asmRegisterRegister(.vcvtph2ps, dst_reg, mat_src_reg.to128()); - try self.asmRegisterRegisterRegister(.vsqrtss, dst_reg, dst_reg, dst_reg); + try self.asmRegisterRegister(.{ ._, .vcvtph2ps }, dst_reg, mat_src_reg.to128()); + try self.asmRegisterRegisterRegister(.{ ._, .vsqrtss }, dst_reg, dst_reg, dst_reg); try self.asmRegisterRegisterImmediate( - .vcvtps2ph, + .{ ._, .vcvtps2ph }, dst_reg, dst_reg, Immediate.u(0b1_00), ); break :result dst_mcv; } else null, - 32 => if (self.hasFeature(.avx)) .vsqrtss else .sqrtss, - 64 => if (self.hasFeature(.avx)) .vsqrtsd else .sqrtsd, + 32 => if (self.hasFeature(.avx)) .{ ._, .vsqrtss } else .{ ._, .sqrtss }, + 64 => if (self.hasFeature(.avx)) .{ ._, .vsqrtsd } else .{ ._, .sqrtsd }, 80, 128 => null, else => unreachable, }, @@ -4731,16 +4819,21 @@ fn airSqrt(self: *Self, inst: Air.Inst.Index) !void { 16 => if (self.hasFeature(.f16c)) switch (ty.vectorLen()) { 1 => { try self.asmRegisterRegister( - .vcvtph2ps, + .{ ._, .vcvtph2ps }, dst_reg, (if (src_mcv.isRegister()) src_mcv.getReg().? else try self.copyToTmpRegister(ty, src_mcv)).to128(), ); - try self.asmRegisterRegisterRegister(.vsqrtss, dst_reg, dst_reg, dst_reg); + try self.asmRegisterRegisterRegister( + .{ ._, .vsqrtss }, + dst_reg, + dst_reg, + dst_reg, + ); try self.asmRegisterRegisterImmediate( - .vcvtps2ph, + .{ ._, .vcvtps2ph }, dst_reg, dst_reg, Immediate.u(0b1_00), @@ -4750,22 +4843,22 @@ fn airSqrt(self: *Self, inst: Air.Inst.Index) !void { 2...8 => { const wide_reg = registerAlias(dst_reg, abi_size * 2); if (src_mcv.isMemory()) try self.asmRegisterMemory( - .vcvtph2ps, + .{ ._, .vcvtph2ps }, wide_reg, src_mcv.mem(Memory.PtrSize.fromSize( @intCast(u32, @divExact(wide_reg.bitSize(), 16)), )), ) else try self.asmRegisterRegister( - .vcvtph2ps, + .{ ._, .vcvtph2ps }, wide_reg, (if (src_mcv.isRegister()) src_mcv.getReg().? else try self.copyToTmpRegister(ty, src_mcv)).to128(), ); - try self.asmRegisterRegister(.vsqrtps, wide_reg, wide_reg); + try self.asmRegisterRegister(.{ ._, .vsqrtps }, wide_reg, wide_reg); try self.asmRegisterRegisterImmediate( - .vcvtps2ph, + .{ ._, .vcvtps2ph }, dst_reg, wide_reg, Immediate.u(0b1_00), @@ -4775,15 +4868,15 @@ fn airSqrt(self: *Self, inst: Air.Inst.Index) !void { else => null, } else null, 32 => switch (ty.vectorLen()) { - 1 => if (self.hasFeature(.avx)) .vsqrtss else .sqrtss, - 2...4 => if (self.hasFeature(.avx)) .vsqrtps else .sqrtps, - 5...8 => if (self.hasFeature(.avx)) .vsqrtps else null, + 1 => if (self.hasFeature(.avx)) .{ ._, .vsqrtss } else .{ ._, .sqrtss }, + 2...4 => if (self.hasFeature(.avx)) .{ ._, .vsqrtps } else .{ ._, .sqrtps }, + 5...8 => if (self.hasFeature(.avx)) .{ ._, .vsqrtps } else null, else => null, }, 64 => switch (ty.vectorLen()) { - 1 => if (self.hasFeature(.avx)) .vsqrtsd else .sqrtsd, - 2 => if (self.hasFeature(.avx)) .vsqrtpd else .sqrtpd, - 3...4 => if (self.hasFeature(.avx)) .vsqrtpd else null, + 1 => if (self.hasFeature(.avx)) .{ ._, .vsqrtsd } else .{ ._, .sqrtsd }, + 2 => if (self.hasFeature(.avx)) .{ ._, .vsqrtpd } else .{ ._, .sqrtpd }, + 3...4 => if (self.hasFeature(.avx)) .{ ._, .vsqrtpd } else null, else => null, }, 80, 128 => null, @@ -4795,7 +4888,7 @@ fn airSqrt(self: *Self, inst: Air.Inst.Index) !void { })) |tag| tag else return self.fail("TODO implement airSqrt for {}", .{ ty.fmt(self.bin_file.options.module.?), }); - switch (mir_tag) { + switch (mir_tag[1]) { .vsqrtss, .vsqrtsd => if (src_mcv.isMemory()) try self.asmRegisterRegisterMemory( mir_tag, dst_reg, @@ -4911,14 +5004,14 @@ fn packedLoad(self: *Self, dst_mcv: MCValue, ptr_ty: Type, ptr_mcv: MCValue) Inn if (load_abi_size <= 8) { const load_reg = registerAlias(dst_reg, load_abi_size); try self.asmRegisterMemory( - .mov, + .{ ._, .mov }, load_reg, Memory.sib(Memory.PtrSize.fromSize(load_abi_size), .{ .base = .{ .reg = ptr_reg }, .disp = val_byte_off, }), ); - try self.asmRegisterImmediate(.shr, load_reg, Immediate.u(val_bit_off)); + try self.asmRegisterImmediate(.{ ._r, .sh }, load_reg, Immediate.u(val_bit_off)); } else { const tmp_reg = registerAlias(try self.register_manager.allocReg(null, gp), val_abi_size); const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); @@ -4926,7 +5019,7 @@ fn packedLoad(self: *Self, dst_mcv: MCValue, ptr_ty: Type, ptr_mcv: MCValue) Inn const dst_alias = registerAlias(dst_reg, val_abi_size); try self.asmRegisterMemory( - .mov, + .{ ._, .mov }, dst_alias, Memory.sib(Memory.PtrSize.fromSize(val_abi_size), .{ .base = .{ .reg = ptr_reg }, @@ -4934,14 +5027,19 @@ fn packedLoad(self: *Self, dst_mcv: MCValue, ptr_ty: Type, ptr_mcv: MCValue) Inn }), ); try self.asmRegisterMemory( - .mov, + .{ ._, .mov }, tmp_reg, Memory.sib(Memory.PtrSize.fromSize(val_abi_size), .{ .base = .{ .reg = ptr_reg }, .disp = val_byte_off + 1, }), ); - try self.asmRegisterRegisterImmediate(.shrd, dst_alias, tmp_reg, Immediate.u(val_bit_off)); + try self.asmRegisterRegisterImmediate( + .{ ._rd, .sh }, + dst_alias, + tmp_reg, + Immediate.u(val_bit_off), + ); } if (val_extra_bits > 0) try self.truncateRegister(val_ty, dst_reg); @@ -5047,13 +5145,13 @@ fn packedStore(self: *Self, ptr_ty: Type, ptr_mcv: MCValue, src_mcv: MCValue) In const part_mask_not = part_mask ^ (@as(u64, math.maxInt(u64)) >> @intCast(u6, 64 - limb_abi_bits)); if (limb_abi_size <= 4) { - try self.asmMemoryImmediate(.@"and", limb_mem, Immediate.u(part_mask_not)); + try self.asmMemoryImmediate(.{ ._, .@"and" }, limb_mem, Immediate.u(part_mask_not)); } else if (math.cast(i32, @bitCast(i64, part_mask_not))) |small| { - try self.asmMemoryImmediate(.@"and", limb_mem, Immediate.s(small)); + try self.asmMemoryImmediate(.{ ._, .@"and" }, limb_mem, Immediate.s(small)); } else { const part_mask_reg = try self.register_manager.allocReg(null, gp); - try self.asmRegisterImmediate(.mov, part_mask_reg, Immediate.u(part_mask_not)); - try self.asmMemoryRegister(.@"and", limb_mem, part_mask_reg); + try self.asmRegisterImmediate(.{ ._, .mov }, part_mask_reg, Immediate.u(part_mask_not)); + try self.asmMemoryRegister(.{ ._, .@"and" }, limb_mem, part_mask_reg); } if (src_bit_size <= 64) { @@ -5064,14 +5162,26 @@ fn packedStore(self: *Self, ptr_ty: Type, ptr_mcv: MCValue, src_mcv: MCValue) In try self.genSetReg(tmp_reg, src_ty, src_mcv); switch (limb_i) { - 0 => try self.genShiftBinOpMir(.shl, src_ty, tmp_mcv, .{ .immediate = src_bit_off }), - 1 => try self.genShiftBinOpMir(.shr, src_ty, tmp_mcv, .{ - .immediate = limb_abi_bits - src_bit_off, - }), + 0 => try self.genShiftBinOpMir( + .{ ._l, .sh }, + src_ty, + tmp_mcv, + .{ .immediate = src_bit_off }, + ), + 1 => try self.genShiftBinOpMir( + .{ ._r, .sh }, + src_ty, + tmp_mcv, + .{ .immediate = limb_abi_bits - src_bit_off }, + ), else => unreachable, } - try self.genBinOpMir(.@"and", src_ty, tmp_mcv, .{ .immediate = part_mask }); - try self.asmMemoryRegister(.@"or", limb_mem, registerAlias(tmp_reg, limb_abi_size)); + try self.genBinOpMir(.{ ._, .@"and" }, src_ty, tmp_mcv, .{ .immediate = part_mask }); + try self.asmMemoryRegister( + .{ ._, .@"or" }, + limb_mem, + registerAlias(tmp_reg, limb_abi_size), + ); } else return self.fail("TODO: implement packed store of {}", .{ src_ty.fmt(self.bin_file.options.module.?), }); @@ -5171,7 +5281,7 @@ fn fieldPtr(self: *Self, inst: Air.Inst.Index, operand: Air.Inst.Ref, index: u32 .load_tlv => |sym_index| .{ .lea_tlv = sym_index }, else => mcv, }); - try self.genBinOpMir(.add, Type.usize, dst_mcv, .{ .register = offset_reg }); + try self.genBinOpMir(.{ ._, .add }, Type.usize, dst_mcv, .{ .register = offset_reg }); break :result dst_mcv; }, .indirect => |reg_off| break :result .{ .indirect = .{ @@ -5255,14 +5365,14 @@ fn airStructFieldVal(self: *Self, inst: Air.Inst.Index) !void { if (load_abi_size <= 8) { const load_reg = registerAlias(dst_reg, load_abi_size); try self.asmRegisterMemory( - .mov, + .{ ._, .mov }, load_reg, Memory.sib(Memory.PtrSize.fromSize(load_abi_size), .{ .base = .{ .frame = frame_addr.index }, .disp = frame_addr.off + field_byte_off, }), ); - try self.asmRegisterImmediate(.shr, load_reg, Immediate.u(field_bit_off)); + try self.asmRegisterImmediate(.{ ._r, .sh }, load_reg, Immediate.u(field_bit_off)); } else { const tmp_reg = registerAlias( try self.register_manager.allocReg(null, gp), @@ -5273,7 +5383,7 @@ fn airStructFieldVal(self: *Self, inst: Air.Inst.Index) !void { const dst_alias = registerAlias(dst_reg, field_abi_size); try self.asmRegisterMemory( - .mov, + .{ ._, .mov }, dst_alias, Memory.sib(Memory.PtrSize.fromSize(field_abi_size), .{ .base = .{ .frame = frame_addr.index }, @@ -5281,7 +5391,7 @@ fn airStructFieldVal(self: *Self, inst: Air.Inst.Index) !void { }), ); try self.asmRegisterMemory( - .mov, + .{ ._, .mov }, tmp_reg, Memory.sib(Memory.PtrSize.fromSize(field_abi_size), .{ .base = .{ .frame = frame_addr.index }, @@ -5289,7 +5399,7 @@ fn airStructFieldVal(self: *Self, inst: Air.Inst.Index) !void { }), ); try self.asmRegisterRegisterImmediate( - .shrd, + .{ ._rd, .sh }, dst_alias, tmp_reg, Immediate.u(field_bit_off), @@ -5325,21 +5435,26 @@ fn airStructFieldVal(self: *Self, inst: Air.Inst.Index) !void { defer if (dst_mcv_lock) |lock| self.register_manager.unlockReg(lock); // Shift by struct_field_offset. - try self.genShiftBinOpMir(.shr, Type.usize, dst_mcv, .{ .immediate = field_off }); + try self.genShiftBinOpMir( + .{ ._r, .sh }, + Type.usize, + dst_mcv, + .{ .immediate = field_off }, + ); // Mask to field_bit_size bits const field_bit_size = field_ty.bitSize(self.target.*); const mask = ~@as(u64, 0) >> @intCast(u6, 64 - field_bit_size); const tmp_reg = try self.copyToTmpRegister(Type.usize, .{ .immediate = mask }); - try self.genBinOpMir(.@"and", Type.usize, dst_mcv, .{ .register = tmp_reg }); + try self.genBinOpMir(.{ ._, .@"and" }, Type.usize, dst_mcv, .{ .register = tmp_reg }); const signedness = if (field_ty.isAbiInt()) field_ty.intInfo(self.target.*).signedness else .unsigned; const field_byte_size = @intCast(u32, field_ty.abiSize(self.target.*)); if (signedness == .signed and field_byte_size < 8) { try self.asmRegisterRegister( - if (field_byte_size >= 4) .movsxd else .movsx, + if (field_byte_size >= 4) .{ ._d, .movsx } else .{ ._, .movsx }, dst_mcv.register, registerAlias(dst_mcv.register, field_byte_size), ); @@ -5451,17 +5566,17 @@ fn genUnOp(self: *Self, maybe_inst: ?Air.Inst.Index, tag: Air.Inst.Tag, src_air: if (limb_pl.base.tag == .int_unsigned and self.regExtraBits(limb_ty) > 0) { const mask = @as(u64, math.maxInt(u64)) >> @intCast(u6, 64 - limb_pl.data); - try self.genBinOpMir(.xor, limb_ty, limb_mcv, .{ .immediate = mask }); - } else try self.genUnOpMir(.not, limb_ty, limb_mcv); + try self.genBinOpMir(.{ ._, .xor }, limb_ty, limb_mcv, .{ .immediate = mask }); + } else try self.genUnOpMir(.{ ._, .not }, limb_ty, limb_mcv); } }, - .neg => try self.genUnOpMir(.neg, src_ty, dst_mcv), + .neg => try self.genUnOpMir(.{ ._, .neg }, src_ty, dst_mcv), else => unreachable, } return dst_mcv; } -fn genUnOpMir(self: *Self, mir_tag: Mir.Inst.Tag, dst_ty: Type, dst_mcv: MCValue) !void { +fn genUnOpMir(self: *Self, mir_tag: Mir.Inst.FixedTag, dst_ty: Type, dst_mcv: MCValue) !void { const abi_size = @intCast(u32, dst_ty.abiSize(self.target.*)); if (abi_size > 8) return self.fail("TODO implement {} for {}", .{ mir_tag, @@ -5504,7 +5619,7 @@ fn genUnOpMir(self: *Self, mir_tag: Mir.Inst.Tag, dst_ty: Type, dst_mcv: MCValue /// Clobbers .rcx for non-immediate shift value. fn genShiftBinOpMir( self: *Self, - tag: Mir.Inst.Tag, + tag: Mir.Inst.FixedTag, ty: Type, lhs_mcv: MCValue, shift_mcv: MCValue, @@ -5589,16 +5704,16 @@ fn genShiftBinOpMir( const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); defer self.register_manager.unlockReg(tmp_lock); - const info: struct { offsets: [2]i32, double_tag: Mir.Inst.Tag } = switch (tag) { - .shl, .sal => .{ .offsets = .{ 0, 8 }, .double_tag = .shld }, - .shr, .sar => .{ .offsets = .{ 8, 0 }, .double_tag = .shrd }, + const info: struct { offsets: [2]i32, double_tag: Mir.Inst.FixedTag } = switch (tag[0]) { + ._l => .{ .offsets = .{ 0, 8 }, .double_tag = .{ ._ld, .sh } }, + ._r => .{ .offsets = .{ 8, 0 }, .double_tag = .{ ._rd, .sh } }, else => unreachable, }; switch (lhs_mcv) { .load_frame => |dst_frame_addr| switch (rhs_mcv) { .immediate => |rhs_imm| if (rhs_imm == 0) {} else if (rhs_imm < 64) { try self.asmRegisterMemory( - .mov, + .{ ._, .mov }, tmp_reg, Memory.sib(.qword, .{ .base = .{ .frame = dst_frame_addr.index }, @@ -5625,7 +5740,7 @@ fn genShiftBinOpMir( } else { assert(rhs_imm < 128); try self.asmRegisterMemory( - .mov, + .{ ._, .mov }, tmp_reg, Memory.sib(.qword, .{ .base = .{ .frame = dst_frame_addr.index }, @@ -5636,34 +5751,30 @@ fn genShiftBinOpMir( try self.asmRegisterImmediate(tag, tmp_reg, Immediate.u(rhs_imm - 64)); } try self.asmMemoryRegister( - .mov, + .{ ._, .mov }, Memory.sib(.qword, .{ .base = .{ .frame = dst_frame_addr.index }, .disp = dst_frame_addr.off + info.offsets[1], }), tmp_reg, ); - switch (tag) { - .shl, .sal, .shr => { - try self.asmRegisterRegister(.xor, tmp_reg.to32(), tmp_reg.to32()); - try self.asmMemoryRegister( - .mov, - Memory.sib(.qword, .{ - .base = .{ .frame = dst_frame_addr.index }, - .disp = dst_frame_addr.off + info.offsets[0], - }), - tmp_reg, - ); - }, - .sar => try self.asmMemoryImmediate( - tag, + if (tag[0] == ._r and tag[1] == .sa) try self.asmMemoryImmediate( + tag, + Memory.sib(.qword, .{ + .base = .{ .frame = dst_frame_addr.index }, + .disp = dst_frame_addr.off + info.offsets[0], + }), + Immediate.u(63), + ) else { + try self.asmRegisterRegister(.{ ._, .xor }, tmp_reg.to32(), tmp_reg.to32()); + try self.asmMemoryRegister( + .{ ._, .mov }, Memory.sib(.qword, .{ .base = .{ .frame = dst_frame_addr.index }, .disp = dst_frame_addr.off + info.offsets[0], }), - Immediate.u(63), - ), - else => unreachable, + tmp_reg, + ); } }, else => { @@ -5677,7 +5788,7 @@ fn genShiftBinOpMir( try self.genSetReg(.cl, Type.u8, rhs_mcv); try self.asmRegisterMemory( - .mov, + .{ ._, .mov }, first_reg, Memory.sib(.qword, .{ .base = .{ .frame = dst_frame_addr.index }, @@ -5685,32 +5796,28 @@ fn genShiftBinOpMir( }), ); try self.asmRegisterMemory( - .mov, + .{ ._, .mov }, second_reg, Memory.sib(.qword, .{ .base = .{ .frame = dst_frame_addr.index }, .disp = dst_frame_addr.off + info.offsets[1], }), ); - switch (tag) { - .shl, .sal, .shr => try self.asmRegisterRegister( - .xor, - tmp_reg.to32(), - tmp_reg.to32(), - ), - .sar => { - try self.asmRegisterRegister(.mov, tmp_reg, first_reg); - try self.asmRegisterImmediate(tag, tmp_reg, Immediate.u(63)); - }, - else => unreachable, - } + if (tag[0] == ._r and tag[1] == .sa) { + try self.asmRegisterRegister(.{ ._, .mov }, tmp_reg, first_reg); + try self.asmRegisterImmediate(tag, tmp_reg, Immediate.u(63)); + } else try self.asmRegisterRegister( + .{ ._, .xor }, + tmp_reg.to32(), + tmp_reg.to32(), + ); try self.asmRegisterRegisterRegister(info.double_tag, second_reg, first_reg, .cl); try self.asmRegisterRegister(tag, first_reg, .cl); - try self.asmRegisterImmediate(.cmp, .cl, Immediate.u(64)); + try self.asmRegisterImmediate(.{ ._, .cmp }, .cl, Immediate.u(64)); try self.asmCmovccRegisterRegister(second_reg, first_reg, .ae); try self.asmCmovccRegisterRegister(first_reg, tmp_reg, .ae); try self.asmMemoryRegister( - .mov, + .{ ._, .mov }, Memory.sib(.qword, .{ .base = .{ .frame = dst_frame_addr.index }, .disp = dst_frame_addr.off + info.offsets[1], @@ -5718,7 +5825,7 @@ fn genShiftBinOpMir( second_reg, ); try self.asmMemoryRegister( - .mov, + .{ ._, .mov }, Memory.sib(.qword, .{ .base = .{ .frame = dst_frame_addr.index }, .disp = dst_frame_addr.off + info.offsets[0], @@ -5743,7 +5850,7 @@ fn genShiftBinOpMir( /// Asserts .rcx is free. fn genShiftBinOp( self: *Self, - tag: Air.Inst.Tag, + air_tag: Air.Inst.Tag, maybe_inst: ?Air.Inst.Index, lhs_mcv: MCValue, rhs_mcv: MCValue, @@ -5788,14 +5895,14 @@ fn genShiftBinOp( }; const signedness = lhs_ty.intInfo(self.target.*).signedness; - try self.genShiftBinOpMir(switch (tag) { + try self.genShiftBinOpMir(switch (air_tag) { .shl, .shl_exact => switch (signedness) { - .signed => .sal, - .unsigned => .shl, + .signed => .{ ._l, .sa }, + .unsigned => .{ ._l, .sh }, }, .shr, .shr_exact => switch (signedness) { - .signed => .sar, - .unsigned => .shr, + .signed => .{ ._r, .sa }, + .unsigned => .{ ._r, .sh }, }, else => unreachable, }, lhs_ty, dst_mcv, rhs_mcv); @@ -5855,20 +5962,18 @@ fn genMulDivBinOp( try self.register_manager.getReg(.rax, track_inst_rax); try self.register_manager.getReg(.rdx, track_inst_rdx); - const mir_tag: Mir.Inst.Tag = switch (signedness) { + try self.genIntMulDivOpMir(switch (signedness) { .signed => switch (tag) { - .mul, .mulwrap => .imul, - .div_trunc, .div_exact, .rem => .idiv, + .mul, .mulwrap => .{ .i_, .mul }, + .div_trunc, .div_exact, .rem => .{ .i_, .div }, else => unreachable, }, .unsigned => switch (tag) { - .mul, .mulwrap => .mul, - .div_trunc, .div_exact, .rem => .div, + .mul, .mulwrap => .{ ._, .mul }, + .div_trunc, .div_exact, .rem => .{ ._, .div }, else => unreachable, }, - }; - - try self.genIntMulDivOpMir(mir_tag, ty, lhs, rhs); + }, ty, lhs, rhs); if (dst_abi_size <= 8) return .{ .register = registerAlias(switch (tag) { .mul, .mulwrap, .div_trunc, .div_exact => .rax, @@ -5878,7 +5983,7 @@ fn genMulDivBinOp( const dst_mcv = try self.allocRegOrMemAdvanced(dst_ty, maybe_inst, false); try self.asmMemoryRegister( - .mov, + .{ ._, .mov }, Memory.sib(.qword, .{ .base = .{ .frame = dst_mcv.load_frame.index }, .disp = dst_mcv.load_frame.off, @@ -5886,7 +5991,7 @@ fn genMulDivBinOp( .rax, ); try self.asmMemoryRegister( - .mov, + .{ ._, .mov }, Memory.sib(.qword, .{ .base = .{ .frame = dst_mcv.load_frame.index }, .disp = dst_mcv.load_frame.off + 8, @@ -5927,12 +6032,12 @@ fn genMulDivBinOp( try self.copyToRegisterWithInstTracking(inst, ty, lhs) else .{ .register = try self.copyToTmpRegister(ty, lhs) }; - try self.genBinOpMir(.sub, ty, result, div_floor); + try self.genBinOpMir(.{ ._, .sub }, ty, result, div_floor); return result; }, .unsigned => { - try self.genIntMulDivOpMir(.div, ty, lhs, rhs); + try self.genIntMulDivOpMir(.{ ._, .div }, ty, lhs, rhs); return .{ .register = registerAlias(.rdx, abi_size) }; }, } @@ -5974,7 +6079,7 @@ fn genMulDivBinOp( switch (signedness) { .signed => return try self.genInlineIntDivFloor(ty, lhs, actual_rhs), .unsigned => { - try self.genIntMulDivOpMir(.div, ty, lhs, actual_rhs); + try self.genIntMulDivOpMir(.{ ._, .div }, ty, lhs, actual_rhs); return .{ .register = registerAlias(.rax, abi_size) }; }, } @@ -6072,11 +6177,11 @@ fn genBinOp( switch (air_tag) { .add, .addwrap, - => try self.genBinOpMir(.add, lhs_ty, dst_mcv, src_mcv), + => try self.genBinOpMir(.{ ._, .add }, lhs_ty, dst_mcv, src_mcv), .sub, .subwrap, - => try self.genBinOpMir(.sub, lhs_ty, dst_mcv, src_mcv), + => try self.genBinOpMir(.{ ._, .sub }, lhs_ty, dst_mcv, src_mcv), .ptr_add, .ptr_sub, @@ -6088,22 +6193,27 @@ fn genBinOp( const elem_size = lhs_ty.elemType2().abiSize(self.target.*); try self.genIntMulComplexOpMir(rhs_ty, tmp_mcv, .{ .immediate = elem_size }); - try self.genBinOpMir(switch (air_tag) { - .ptr_add => .add, - .ptr_sub => .sub, - else => unreachable, - }, lhs_ty, dst_mcv, tmp_mcv); + try self.genBinOpMir( + switch (air_tag) { + .ptr_add => .{ ._, .add }, + .ptr_sub => .{ ._, .sub }, + else => unreachable, + }, + lhs_ty, + dst_mcv, + tmp_mcv, + ); }, .bool_or, .bit_or, - => try self.genBinOpMir(.@"or", lhs_ty, dst_mcv, src_mcv), + => try self.genBinOpMir(.{ ._, .@"or" }, lhs_ty, dst_mcv, src_mcv), .bool_and, .bit_and, - => try self.genBinOpMir(.@"and", lhs_ty, dst_mcv, src_mcv), + => try self.genBinOpMir(.{ ._, .@"and" }, lhs_ty, dst_mcv, src_mcv), - .xor => try self.genBinOpMir(.xor, lhs_ty, dst_mcv, src_mcv), + .xor => try self.genBinOpMir(.{ ._, .xor }, lhs_ty, dst_mcv, src_mcv), .min, .max, @@ -6129,7 +6239,7 @@ fn genBinOp( }; defer if (mat_mcv_lock) |lock| self.register_manager.unlockReg(lock); - try self.genBinOpMir(.cmp, lhs_ty, dst_mcv, mat_src_mcv); + try self.genBinOpMir(.{ ._, .cmp }, lhs_ty, dst_mcv, mat_src_mcv); const int_info = lhs_ty.intInfo(self.target.*); const cc: Condition = switch (int_info.signedness) { @@ -6206,7 +6316,7 @@ fn genBinOp( } const dst_reg = registerAlias(dst_mcv.getReg().?, abi_size); - const mir_tag = if (@as(?Mir.Inst.Tag, switch (lhs_ty.zigTypeTag()) { + const mir_tag = if (@as(?Mir.Inst.FixedTag, switch (lhs_ty.zigTypeTag()) { else => unreachable, .Float => switch (lhs_ty.floatBits(self.target.*)) { 16 => if (self.hasFeature(.f16c)) { @@ -6215,13 +6325,13 @@ fn genBinOp( defer self.register_manager.unlockReg(tmp_lock); if (src_mcv.isMemory()) try self.asmRegisterRegisterMemoryImmediate( - .vpinsrw, + .{ ._, .vpinsrw }, dst_reg, dst_reg, src_mcv.mem(.word), Immediate.u(1), ) else try self.asmRegisterRegisterRegister( - .vpunpcklwd, + .{ ._, .vpunpcklwd }, dst_reg, dst_reg, (if (src_mcv.isRegister()) @@ -6229,15 +6339,15 @@ fn genBinOp( else try self.copyToTmpRegister(rhs_ty, src_mcv)).to128(), ); - try self.asmRegisterRegister(.vcvtph2ps, dst_reg, dst_reg); - try self.asmRegisterRegister(.vmovshdup, tmp_reg, dst_reg); + try self.asmRegisterRegister(.{ ._, .vcvtph2ps }, dst_reg, dst_reg); + try self.asmRegisterRegister(.{ ._, .vmovshdup }, tmp_reg, dst_reg); try self.asmRegisterRegisterRegister( switch (air_tag) { - .add => .vaddss, - .sub => .vsubss, - .div_float, .div_trunc, .div_floor, .div_exact => .vdivss, - .max => .vmaxss, - .min => .vmaxss, + .add => .{ ._, .vaddss }, + .sub => .{ ._, .vsubss }, + .div_float, .div_trunc, .div_floor, .div_exact => .{ ._, .vdivss }, + .max => .{ ._, .vmaxss }, + .min => .{ ._, .vmaxss }, else => unreachable, }, dst_reg, @@ -6245,7 +6355,7 @@ fn genBinOp( tmp_reg, ); try self.asmRegisterRegisterImmediate( - .vcvtps2ph, + .{ ._, .vcvtps2ph }, dst_reg, dst_reg, Immediate.u(0b1_00), @@ -6253,29 +6363,29 @@ fn genBinOp( return dst_mcv; } else null, 32 => switch (air_tag) { - .add => if (self.hasFeature(.avx)) .vaddss else .addss, - .sub => if (self.hasFeature(.avx)) .vsubss else .subss, - .mul => if (self.hasFeature(.avx)) .vmulss else .mulss, + .add => if (self.hasFeature(.avx)) .{ ._, .vaddss } else .{ ._, .addss }, + .sub => if (self.hasFeature(.avx)) .{ ._, .vsubss } else .{ ._, .subss }, + .mul => if (self.hasFeature(.avx)) .{ ._, .vmulss } else .{ ._, .mulss }, .div_float, .div_trunc, .div_floor, .div_exact, - => if (self.hasFeature(.avx)) .vdivss else .divss, - .max => if (self.hasFeature(.avx)) .vmaxss else .maxss, - .min => if (self.hasFeature(.avx)) .vminss else .minss, + => if (self.hasFeature(.avx)) .{ ._, .vdivss } else .{ ._, .divss }, + .max => if (self.hasFeature(.avx)) .{ ._, .vmaxss } else .{ ._, .maxss }, + .min => if (self.hasFeature(.avx)) .{ ._, .vminss } else .{ ._, .minss }, else => unreachable, }, 64 => switch (air_tag) { - .add => if (self.hasFeature(.avx)) .vaddsd else .addsd, - .sub => if (self.hasFeature(.avx)) .vsubsd else .subsd, - .mul => if (self.hasFeature(.avx)) .vmulsd else .mulsd, + .add => if (self.hasFeature(.avx)) .{ ._, .vaddsd } else .{ ._, .addsd }, + .sub => if (self.hasFeature(.avx)) .{ ._, .vsubsd } else .{ ._, .subsd }, + .mul => if (self.hasFeature(.avx)) .{ ._, .vmulsd } else .{ ._, .mulsd }, .div_float, .div_trunc, .div_floor, .div_exact, - => if (self.hasFeature(.avx)) .vdivsd else .divsd, - .max => if (self.hasFeature(.avx)) .vmaxsd else .maxsd, - .min => if (self.hasFeature(.avx)) .vminsd else .minsd, + => if (self.hasFeature(.avx)) .{ ._, .vdivsd } else .{ ._, .divsd }, + .max => if (self.hasFeature(.avx)) .{ ._, .vmaxsd } else .{ ._, .maxsd }, + .min => if (self.hasFeature(.avx)) .{ ._, .vminsd } else .{ ._, .minsd }, else => unreachable, }, 80, 128 => null, @@ -6291,13 +6401,13 @@ fn genBinOp( defer self.register_manager.unlockReg(tmp_lock); if (src_mcv.isMemory()) try self.asmRegisterRegisterMemoryImmediate( - .vpinsrw, + .{ ._, .vpinsrw }, dst_reg, dst_reg, src_mcv.mem(.word), Immediate.u(1), ) else try self.asmRegisterRegisterRegister( - .vpunpcklwd, + .{ ._, .vpunpcklwd }, dst_reg, dst_reg, (if (src_mcv.isRegister()) @@ -6305,15 +6415,15 @@ fn genBinOp( else try self.copyToTmpRegister(rhs_ty, src_mcv)).to128(), ); - try self.asmRegisterRegister(.vcvtph2ps, dst_reg, dst_reg); - try self.asmRegisterRegister(.vmovshdup, tmp_reg, dst_reg); + try self.asmRegisterRegister(.{ ._, .vcvtph2ps }, dst_reg, dst_reg); + try self.asmRegisterRegister(.{ ._, .vmovshdup }, tmp_reg, dst_reg); try self.asmRegisterRegisterRegister( switch (air_tag) { - .add => .vaddss, - .sub => .vsubss, - .div_float, .div_trunc, .div_floor, .div_exact => .vdivss, - .max => .vmaxss, - .min => .vmaxss, + .add => .{ ._, .vaddss }, + .sub => .{ ._, .vsubss }, + .div_float, .div_trunc, .div_floor, .div_exact => .{ ._, .vdivss }, + .max => .{ ._, .vmaxss }, + .min => .{ ._, .vmaxss }, else => unreachable, }, dst_reg, @@ -6321,7 +6431,7 @@ fn genBinOp( tmp_reg, ); try self.asmRegisterRegisterImmediate( - .vcvtps2ph, + .{ ._, .vcvtps2ph }, dst_reg, dst_reg, Immediate.u(0b1_00), @@ -6334,12 +6444,12 @@ fn genBinOp( defer self.register_manager.unlockReg(tmp_lock); if (src_mcv.isMemory()) try self.asmRegisterMemoryImmediate( - .vpinsrd, + .{ ._, .vpinsrd }, dst_reg, src_mcv.mem(.dword), Immediate.u(1), ) else try self.asmRegisterRegisterRegister( - .vunpcklps, + .{ ._, .vunpcklps }, dst_reg, dst_reg, (if (src_mcv.isRegister()) @@ -6347,15 +6457,20 @@ fn genBinOp( else try self.copyToTmpRegister(rhs_ty, src_mcv)).to128(), ); - try self.asmRegisterRegister(.vcvtph2ps, dst_reg, dst_reg); - try self.asmRegisterRegisterRegister(.vmovhlps, tmp_reg, dst_reg, dst_reg); + try self.asmRegisterRegister(.{ ._, .vcvtph2ps }, dst_reg, dst_reg); + try self.asmRegisterRegisterRegister( + .{ ._, .vmovhlps }, + tmp_reg, + dst_reg, + dst_reg, + ); try self.asmRegisterRegisterRegister( switch (air_tag) { - .add => .vaddps, - .sub => .vsubps, - .div_float, .div_trunc, .div_floor, .div_exact => .vdivps, - .max => .vmaxps, - .min => .vmaxps, + .add => .{ ._, .vaddps }, + .sub => .{ ._, .vsubps }, + .div_float, .div_trunc, .div_floor, .div_exact => .{ ._, .vdivps }, + .max => .{ ._, .vmaxps }, + .min => .{ ._, .vmaxps }, else => unreachable, }, dst_reg, @@ -6363,7 +6478,7 @@ fn genBinOp( tmp_reg, ); try self.asmRegisterRegisterImmediate( - .vcvtps2ph, + .{ ._, .vcvtps2ph }, dst_reg, dst_reg, Immediate.u(0b1_00), @@ -6375,13 +6490,13 @@ fn genBinOp( const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); defer self.register_manager.unlockReg(tmp_lock); - try self.asmRegisterRegister(.vcvtph2ps, dst_reg, dst_reg); + try self.asmRegisterRegister(.{ ._, .vcvtph2ps }, dst_reg, dst_reg); if (src_mcv.isMemory()) try self.asmRegisterMemory( - .vcvtph2ps, + .{ ._, .vcvtph2ps }, tmp_reg, src_mcv.mem(.qword), ) else try self.asmRegisterRegister( - .vcvtph2ps, + .{ ._, .vcvtph2ps }, tmp_reg, (if (src_mcv.isRegister()) src_mcv.getReg().? @@ -6390,11 +6505,11 @@ fn genBinOp( ); try self.asmRegisterRegisterRegister( switch (air_tag) { - .add => .vaddps, - .sub => .vsubps, - .div_float, .div_trunc, .div_floor, .div_exact => .vdivps, - .max => .vmaxps, - .min => .vmaxps, + .add => .{ ._, .vaddps }, + .sub => .{ ._, .vsubps }, + .div_float, .div_trunc, .div_floor, .div_exact => .{ ._, .vdivps }, + .max => .{ ._, .vmaxps }, + .min => .{ ._, .vmaxps }, else => unreachable, }, dst_reg, @@ -6402,7 +6517,7 @@ fn genBinOp( tmp_reg, ); try self.asmRegisterRegisterImmediate( - .vcvtps2ph, + .{ ._, .vcvtps2ph }, dst_reg, dst_reg, Immediate.u(0b1_00), @@ -6414,13 +6529,13 @@ fn genBinOp( const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); defer self.register_manager.unlockReg(tmp_lock); - try self.asmRegisterRegister(.vcvtph2ps, dst_reg.to256(), dst_reg); + try self.asmRegisterRegister(.{ ._, .vcvtph2ps }, dst_reg.to256(), dst_reg); if (src_mcv.isMemory()) try self.asmRegisterMemory( - .vcvtph2ps, + .{ ._, .vcvtph2ps }, tmp_reg, src_mcv.mem(.xword), ) else try self.asmRegisterRegister( - .vcvtph2ps, + .{ ._, .vcvtph2ps }, tmp_reg, (if (src_mcv.isRegister()) src_mcv.getReg().? @@ -6429,11 +6544,11 @@ fn genBinOp( ); try self.asmRegisterRegisterRegister( switch (air_tag) { - .add => .vaddps, - .sub => .vsubps, - .div_float, .div_trunc, .div_floor, .div_exact => .vdivps, - .max => .vmaxps, - .min => .vmaxps, + .add => .{ ._, .vaddps }, + .sub => .{ ._, .vsubps }, + .div_float, .div_trunc, .div_floor, .div_exact => .{ ._, .vdivps }, + .max => .{ ._, .vmaxps }, + .min => .{ ._, .vmaxps }, else => unreachable, }, dst_reg.to256(), @@ -6441,7 +6556,7 @@ fn genBinOp( tmp_reg, ); try self.asmRegisterRegisterImmediate( - .vcvtps2ph, + .{ ._, .vcvtps2ph }, dst_reg, dst_reg.to256(), Immediate.u(0b1_00), @@ -6452,76 +6567,76 @@ fn genBinOp( } else null, 32 => switch (lhs_ty.vectorLen()) { 1 => switch (air_tag) { - .add => if (self.hasFeature(.avx)) .vaddss else .addss, - .sub => if (self.hasFeature(.avx)) .vsubss else .subss, - .mul => if (self.hasFeature(.avx)) .vmulss else .mulss, + .add => if (self.hasFeature(.avx)) .{ ._, .vaddss } else .{ ._, .addss }, + .sub => if (self.hasFeature(.avx)) .{ ._, .vsubss } else .{ ._, .subss }, + .mul => if (self.hasFeature(.avx)) .{ ._, .vmulss } else .{ ._, .mulss }, .div_float, .div_trunc, .div_floor, .div_exact, - => if (self.hasFeature(.avx)) .vdivss else .divss, - .max => if (self.hasFeature(.avx)) .vmaxss else .maxss, - .min => if (self.hasFeature(.avx)) .vminss else .minss, + => if (self.hasFeature(.avx)) .{ ._, .vdivss } else .{ ._, .divss }, + .max => if (self.hasFeature(.avx)) .{ ._, .vmaxss } else .{ ._, .maxss }, + .min => if (self.hasFeature(.avx)) .{ ._, .vminss } else .{ ._, .minss }, else => unreachable, }, 2...4 => switch (air_tag) { - .add => if (self.hasFeature(.avx)) .vaddps else .addps, - .sub => if (self.hasFeature(.avx)) .vsubps else .subps, - .mul => if (self.hasFeature(.avx)) .vmulps else .mulps, + .add => if (self.hasFeature(.avx)) .{ ._, .vaddps } else .{ ._, .addps }, + .sub => if (self.hasFeature(.avx)) .{ ._, .vsubps } else .{ ._, .subps }, + .mul => if (self.hasFeature(.avx)) .{ ._, .vmulps } else .{ ._, .mulps }, .div_float, .div_trunc, .div_floor, .div_exact, - => if (self.hasFeature(.avx)) .vdivps else .divps, - .max => if (self.hasFeature(.avx)) .vmaxps else .maxps, - .min => if (self.hasFeature(.avx)) .vminps else .minps, + => if (self.hasFeature(.avx)) .{ ._, .vdivps } else .{ ._, .divps }, + .max => if (self.hasFeature(.avx)) .{ ._, .vmaxps } else .{ ._, .maxps }, + .min => if (self.hasFeature(.avx)) .{ ._, .vminps } else .{ ._, .minps }, else => unreachable, }, 5...8 => if (self.hasFeature(.avx)) switch (air_tag) { - .add => .vaddps, - .sub => .vsubps, - .mul => .vmulps, - .div_float, .div_trunc, .div_floor, .div_exact => .vdivps, - .max => .vmaxps, - .min => .vminps, + .add => .{ ._, .vaddps }, + .sub => .{ ._, .vsubps }, + .mul => .{ ._, .vmulps }, + .div_float, .div_trunc, .div_floor, .div_exact => .{ ._, .vdivps }, + .max => .{ ._, .vmaxps }, + .min => .{ ._, .vminps }, else => unreachable, } else null, else => null, }, 64 => switch (lhs_ty.vectorLen()) { 1 => switch (air_tag) { - .add => if (self.hasFeature(.avx)) .vaddsd else .addsd, - .sub => if (self.hasFeature(.avx)) .vsubsd else .subsd, - .mul => if (self.hasFeature(.avx)) .vmulsd else .mulsd, + .add => if (self.hasFeature(.avx)) .{ ._, .vaddsd } else .{ ._, .addsd }, + .sub => if (self.hasFeature(.avx)) .{ ._, .vsubsd } else .{ ._, .subsd }, + .mul => if (self.hasFeature(.avx)) .{ ._, .vmulsd } else .{ ._, .mulsd }, .div_float, .div_trunc, .div_floor, .div_exact, - => if (self.hasFeature(.avx)) .vdivsd else .divsd, - .max => if (self.hasFeature(.avx)) .vmaxsd else .maxsd, - .min => if (self.hasFeature(.avx)) .vminsd else .minsd, + => if (self.hasFeature(.avx)) .{ ._, .vdivsd } else .{ ._, .divsd }, + .max => if (self.hasFeature(.avx)) .{ ._, .vmaxsd } else .{ ._, .maxsd }, + .min => if (self.hasFeature(.avx)) .{ ._, .vminsd } else .{ ._, .minsd }, else => unreachable, }, 2 => switch (air_tag) { - .add => if (self.hasFeature(.avx)) .vaddpd else .addpd, - .sub => if (self.hasFeature(.avx)) .vsubpd else .subpd, - .mul => if (self.hasFeature(.avx)) .vmulpd else .mulpd, + .add => if (self.hasFeature(.avx)) .{ ._, .vaddpd } else .{ ._, .addpd }, + .sub => if (self.hasFeature(.avx)) .{ ._, .vsubpd } else .{ ._, .subpd }, + .mul => if (self.hasFeature(.avx)) .{ ._, .vmulpd } else .{ ._, .mulpd }, .div_float, .div_trunc, .div_floor, .div_exact, - => if (self.hasFeature(.avx)) .vdivpd else .divpd, - .max => if (self.hasFeature(.avx)) .vmaxpd else .maxpd, - .min => if (self.hasFeature(.avx)) .vminpd else .minpd, + => if (self.hasFeature(.avx)) .{ ._, .vdivpd } else .{ ._, .divpd }, + .max => if (self.hasFeature(.avx)) .{ ._, .vmaxpd } else .{ ._, .maxpd }, + .min => if (self.hasFeature(.avx)) .{ ._, .vminpd } else .{ ._, .minpd }, else => unreachable, }, 3...4 => if (self.hasFeature(.avx)) switch (air_tag) { - .add => .vaddpd, - .sub => .vsubpd, - .mul => .vmulpd, - .div_float, .div_trunc, .div_floor, .div_exact => .vdivpd, - .max => .vmaxpd, - .min => .vminpd, + .add => .{ ._, .vaddpd }, + .sub => .{ ._, .vsubpd }, + .mul => .{ ._, .vmulpd }, + .div_float, .div_trunc, .div_floor, .div_exact => .{ ._, .vdivpd }, + .max => .{ ._, .vmaxpd }, + .min => .{ ._, .vminpd }, else => unreachable, } else null, else => null, @@ -6583,7 +6698,13 @@ fn genBinOp( return dst_mcv; } -fn genBinOpMir(self: *Self, mir_tag: Mir.Inst.Tag, ty: Type, dst_mcv: MCValue, src_mcv: MCValue) !void { +fn genBinOpMir( + self: *Self, + mir_tag: Mir.Inst.FixedTag, + ty: Type, + dst_mcv: MCValue, + src_mcv: MCValue, +) !void { const abi_size = @intCast(u32, ty.abiSize(self.target.*)); switch (dst_mcv) { .none, @@ -6788,14 +6909,14 @@ fn genBinOpMir(self: *Self, mir_tag: Mir.Inst.Tag, ty: Type, dst_mcv: MCValue, s }; var off: i32 = 0; while (off < abi_size) : (off += 8) { - const mir_limb_tag = switch (off) { + const mir_limb_tag: Mir.Inst.FixedTag = switch (off) { 0 => mir_tag, - else => switch (mir_tag) { - .add => .adc, - .sub, .cmp => .sbb, + else => switch (mir_tag[1]) { + .add => .{ ._, .adc }, + .sub, .cmp => .{ ._, .sbb }, .@"or", .@"and", .xor => mir_tag, else => return self.fail("TODO genBinOpMir implement large ABI for {s}", .{ - @tagName(mir_tag), + @tagName(mir_tag[1]), }), }, }; @@ -6967,14 +7088,14 @@ fn genIntMulComplexOpMir(self: *Self, dst_ty: Type, dst_mcv: MCValue, src_mcv: M .reserved_frame, => unreachable, .register => |src_reg| try self.asmRegisterRegister( - .imul, + .{ .i_, .mul }, dst_alias, registerAlias(src_reg, abi_size), ), .immediate => |imm| { if (math.cast(i32, imm)) |small| { try self.asmRegisterRegisterImmediate( - .imul, + .{ .i_, .mul }, dst_alias, dst_alias, Immediate.s(small), @@ -6994,19 +7115,19 @@ fn genIntMulComplexOpMir(self: *Self, dst_ty: Type, dst_mcv: MCValue, src_mcv: M .lea_tlv, .lea_frame, => try self.asmRegisterRegister( - .imul, + .{ .i_, .mul }, dst_alias, registerAlias(try self.copyToTmpRegister(dst_ty, src_mcv), abi_size), ), .memory, .indirect, .load_frame => try self.asmRegisterMemory( - .imul, + .{ .i_, .mul }, dst_alias, Memory.sib(Memory.PtrSize.fromSize(abi_size), switch (src_mcv) { .memory => |addr| .{ .base = .{ .reg = .ds }, .disp = math.cast(i32, @bitCast(i64, addr)) orelse return self.asmRegisterRegister( - .imul, + .{ .i_, .mul }, dst_alias, registerAlias(try self.copyToTmpRegister(dst_ty, src_mcv), abi_size), ), @@ -7131,12 +7252,12 @@ fn genVarDbgInfo( } fn airTrap(self: *Self) !void { - try self.asmOpOnly(.ud2); + try self.asmOpOnly(.{ ._, .ud2 }); return self.finishAirBookkeeping(); } fn airBreakpoint(self: *Self) !void { - try self.asmOpOnly(.int3); + try self.asmOpOnly(.{ ._, .int3 }); return self.finishAirBookkeeping(); } @@ -7157,7 +7278,7 @@ fn airFence(self: *Self, inst: Air.Inst.Index) !void { switch (order) { .Unordered, .Monotonic => unreachable, .Acquire, .Release, .AcqRel => {}, - .SeqCst => try self.asmOpOnly(.mfence), + .SeqCst => try self.asmOpOnly(.{ ._, .mfence }), } return self.finishAirBookkeeping(); } @@ -7251,7 +7372,7 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallModifier const atom = elf_file.getAtom(atom_index); _ = try atom.getOrCreateOffsetTableEntry(elf_file); const got_addr = atom.getOffsetTableAddress(elf_file); - try self.asmMemory(.call, Memory.sib(.qword, .{ + try self.asmMemory(.{ ._, .call }, Memory.sib(.qword, .{ .base = .{ .reg = .ds }, .disp = @intCast(i32, got_addr), })); @@ -7259,12 +7380,12 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallModifier const atom = try coff_file.getOrCreateAtomForDecl(func.owner_decl); const sym_index = coff_file.getAtom(atom).getSymbolIndex().?; try self.genSetReg(.rax, Type.usize, .{ .lea_got = sym_index }); - try self.asmRegister(.call, .rax); + try self.asmRegister(.{ ._, .call }, .rax); } else if (self.bin_file.cast(link.File.MachO)) |macho_file| { const atom = try macho_file.getOrCreateAtomForDecl(func.owner_decl); const sym_index = macho_file.getAtom(atom).getSymbolIndex().?; try self.genSetReg(.rax, Type.usize, .{ .lea_got = sym_index }); - try self.asmRegister(.call, .rax); + try self.asmRegister(.{ ._, .call }, .rax); } else if (self.bin_file.cast(link.File.Plan9)) |p9| { const decl_block_index = try p9.seeDecl(func.owner_decl); const decl_block = p9.getDeclBlock(decl_block_index); @@ -7273,7 +7394,7 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallModifier const got_addr = p9.bases.data; const got_index = decl_block.got_index.?; const fn_got_addr = got_addr + got_index * ptr_bytes; - try self.asmMemory(.call, Memory.sib(.qword, .{ + try self.asmMemory(.{ ._, .call }, Memory.sib(.qword, .{ .base = .{ .reg = .ds }, .disp = @intCast(i32, fn_got_addr), })); @@ -7296,7 +7417,7 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallModifier }), } }, }); - try self.asmRegister(.call, .rax); + try self.asmRegister(.{ ._, .call }, .rax); } else if (self.bin_file.cast(link.File.MachO)) |macho_file| { const atom_index = try self.owner.getSymbolIndex(self); const sym_index = try macho_file.getGlobalSymbol(decl_name, lib_name); @@ -7318,7 +7439,7 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallModifier assert(ty.zigTypeTag() == .Pointer); const mcv = try self.resolveInst(callee); try self.genSetReg(.rax, Type.usize, mcv); - try self.asmRegister(.call, .rax); + try self.asmRegister(.{ ._, .call }, .rax); } var bt = self.liveness.iterateBigTomb(inst); @@ -7408,7 +7529,7 @@ fn airCmp(self: *Self, inst: Air.Inst.Index, op: math.CompareOperator) !void { defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); const src_mcv = if (flipped) lhs_mcv else rhs_mcv; - try self.genBinOpMir(.cmp, ty, dst_mcv, src_mcv); + try self.genBinOpMir(.{ ._, .cmp }, ty, dst_mcv, src_mcv); break :result Condition.fromCompareOperator( if (ty.isAbiInt()) ty.intInfo(self.target.*).signedness else .unsigned, if (flipped) op.reverse() else op, @@ -7442,13 +7563,13 @@ fn airCmp(self: *Self, inst: Air.Inst.Index, op: math.CompareOperator) !void { defer self.register_manager.unlockReg(tmp2_lock); if (src_mcv.isMemory()) try self.asmRegisterRegisterMemoryImmediate( - .vpinsrw, + .{ ._, .vpinsrw }, tmp1_reg, dst_reg.to128(), src_mcv.mem(.word), Immediate.u(1), ) else try self.asmRegisterRegisterRegister( - .vpunpcklwd, + .{ ._, .vpunpcklwd }, tmp1_reg, dst_reg.to128(), (if (src_mcv.isRegister()) @@ -7456,14 +7577,24 @@ fn airCmp(self: *Self, inst: Air.Inst.Index, op: math.CompareOperator) !void { else try self.copyToTmpRegister(ty, src_mcv)).to128(), ); - try self.asmRegisterRegister(.vcvtph2ps, tmp1_reg, tmp1_reg); - try self.asmRegisterRegister(.vmovshdup, tmp2_reg, tmp1_reg); - try self.genBinOpMir(.ucomiss, ty, tmp1_mcv, tmp2_mcv); + try self.asmRegisterRegister(.{ ._, .vcvtph2ps }, tmp1_reg, tmp1_reg); + try self.asmRegisterRegister(.{ ._, .vmovshdup }, tmp2_reg, tmp1_reg); + try self.genBinOpMir(.{ ._, .ucomiss }, ty, tmp1_mcv, tmp2_mcv); } else return self.fail("TODO implement airCmp for {}", .{ ty.fmt(self.bin_file.options.module.?), }), - 32 => try self.genBinOpMir(.ucomiss, ty, .{ .register = dst_reg }, src_mcv), - 64 => try self.genBinOpMir(.ucomisd, ty, .{ .register = dst_reg }, src_mcv), + 32 => try self.genBinOpMir( + .{ ._, .ucomiss }, + ty, + .{ .register = dst_reg }, + src_mcv, + ), + 64 => try self.genBinOpMir( + .{ ._, .ucomisd }, + ty, + .{ .register = dst_reg }, + src_mcv, + ), else => return self.fail("TODO implement airCmp for {}", .{ ty.fmt(self.bin_file.options.module.?), }), @@ -7507,7 +7638,7 @@ fn airCmpLtErrorsLen(self: *Self, inst: Air.Inst.Index) !void { else => try self.copyToTmpRegister(op_ty, op_mcv), }; try self.asmRegisterMemory( - .cmp, + .{ ._, .cmp }, registerAlias(dst_reg, op_abi_size), Memory.sib(Memory.PtrSize.fromSize(op_abi_size), .{ .base = .{ .reg = addr_reg } }), ); @@ -7627,7 +7758,7 @@ fn genCondBrMir(self: *Self, ty: Type, mcv: MCValue) !u32 { }, .register => |reg| { try self.spillEflagsIfOccupied(); - try self.asmRegisterImmediate(.@"test", reg, Immediate.u(1)); + try self.asmRegisterImmediate(.{ ._, .@"test" }, reg, Immediate.u(1)); return self.asmJccReloc(undefined, .e); }, .immediate, @@ -7730,13 +7861,13 @@ fn isNull(self: *Self, inst: Air.Inst.Index, opt_ty: Type, opt_mcv: MCValue) !MC const some_abi_size = @intCast(u32, some_info.ty.abiSize(self.target.*)); const alias_reg = registerAlias(opt_reg, some_abi_size); assert(some_abi_size * 8 == alias_reg.bitSize()); - try self.asmRegisterRegister(.@"test", alias_reg, alias_reg); + try self.asmRegisterRegister(.{ ._, .@"test" }, alias_reg, alias_reg); return .{ .eflags = .z }; } assert(some_info.ty.tag() == .bool); const opt_abi_size = @intCast(u32, opt_ty.abiSize(self.target.*)); try self.asmRegisterImmediate( - .bt, + .{ ._, .bt }, registerAlias(opt_reg, opt_abi_size), Immediate.u(@intCast(u6, some_info.off * 8)), ); @@ -7755,7 +7886,7 @@ fn isNull(self: *Self, inst: Air.Inst.Index, opt_ty: Type, opt_mcv: MCValue) !MC try self.genSetReg(addr_reg, Type.usize, opt_mcv.address()); const some_abi_size = @intCast(u32, some_info.ty.abiSize(self.target.*)); try self.asmMemoryImmediate( - .cmp, + .{ ._, .cmp }, Memory.sib(Memory.PtrSize.fromSize(some_abi_size), .{ .base = .{ .reg = addr_reg }, .disp = some_info.off, @@ -7768,7 +7899,7 @@ fn isNull(self: *Self, inst: Air.Inst.Index, opt_ty: Type, opt_mcv: MCValue) !MC .indirect, .load_frame => { const some_abi_size = @intCast(u32, some_info.ty.abiSize(self.target.*)); try self.asmMemoryImmediate( - .cmp, + .{ ._, .cmp }, Memory.sib(Memory.PtrSize.fromSize(some_abi_size), switch (opt_mcv) { .indirect => |reg_off| .{ .base = .{ .reg = reg_off.reg }, @@ -7810,7 +7941,7 @@ fn isNullPtr(self: *Self, inst: Air.Inst.Index, ptr_ty: Type, ptr_mcv: MCValue) const some_abi_size = @intCast(u32, some_info.ty.abiSize(self.target.*)); try self.asmMemoryImmediate( - .cmp, + .{ ._, .cmp }, Memory.sib(Memory.PtrSize.fromSize(some_abi_size), .{ .base = .{ .reg = ptr_reg }, .disp = some_info.off, @@ -7841,14 +7972,24 @@ fn isErr(self: *Self, maybe_inst: ?Air.Inst.Index, ty: Type, operand: MCValue) ! const tmp_reg = try self.copyToTmpRegister(ty, operand); if (err_off > 0) { const shift = @intCast(u6, err_off * 8); - try self.genShiftBinOpMir(.shr, ty, .{ .register = tmp_reg }, .{ .immediate = shift }); + try self.genShiftBinOpMir( + .{ ._r, .sh }, + ty, + .{ .register = tmp_reg }, + .{ .immediate = shift }, + ); } else { try self.truncateRegister(Type.anyerror, tmp_reg); } - try self.genBinOpMir(.cmp, Type.anyerror, .{ .register = tmp_reg }, .{ .immediate = 0 }); + try self.genBinOpMir( + .{ ._, .cmp }, + Type.anyerror, + .{ .register = tmp_reg }, + .{ .immediate = 0 }, + ); }, .load_frame => |frame_addr| try self.genBinOpMir( - .cmp, + .{ ._, .cmp }, Type.anyerror, .{ .load_frame = .{ .index = frame_addr.index, @@ -8073,7 +8214,7 @@ fn airSwitchBr(self: *Self, inst: Air.Inst.Index) !void { try self.spillEflagsIfOccupied(); for (items, relocs, 0..) |item, *reloc, i| { const item_mcv = try self.resolveInst(item); - try self.genBinOpMir(.cmp, condition_ty, condition, item_mcv); + try self.genBinOpMir(.{ ._, .cmp }, condition_ty, condition, item_mcv); reloc.* = try self.asmJccReloc(undefined, if (i < relocs.len - 1) .e else .ne); } @@ -8284,7 +8425,7 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void { .qword else null; - const mnem = mnem: { + const mnem_tag = Mir.Inst.FixedTag{ ._, mnem: { if (mnem_size) |_| { if (std.meta.stringToEnum(Mir.Inst.Tag, mnem_str[0 .. mnem_str.len - 1])) |mnem| { break :mnem mnem; @@ -8292,7 +8433,7 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void { } break :mnem std.meta.stringToEnum(Mir.Inst.Tag, mnem_str) orelse return self.fail("Invalid mnemonic: '{s}'", .{mnem_str}); - }; + } }; var op_it = mem.tokenize(u8, mnem_it.rest(), ","); var ops = [1]encoder.Instruction.Operand{.none} ** 4; @@ -8343,51 +8484,51 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void { } else if (op_it.next()) |op_str| return self.fail("Extra operand: '{s}'", .{op_str}); (switch (ops[0]) { - .none => self.asmOpOnly(mnem), + .none => self.asmOpOnly(mnem_tag), .reg => |reg0| switch (ops[1]) { - .none => self.asmRegister(mnem, reg0), + .none => self.asmRegister(mnem_tag, reg0), .reg => |reg1| switch (ops[2]) { - .none => self.asmRegisterRegister(mnem, reg1, reg0), + .none => self.asmRegisterRegister(mnem_tag, reg1, reg0), .reg => |reg2| switch (ops[3]) { - .none => self.asmRegisterRegisterRegister(mnem, reg2, reg1, reg0), + .none => self.asmRegisterRegisterRegister(mnem_tag, reg2, reg1, reg0), else => error.InvalidInstruction, }, .mem => |mem2| switch (ops[3]) { - .none => self.asmMemoryRegisterRegister(mnem, mem2, reg1, reg0), + .none => self.asmMemoryRegisterRegister(mnem_tag, mem2, reg1, reg0), else => error.InvalidInstruction, }, else => error.InvalidInstruction, }, .mem => |mem1| switch (ops[2]) { - .none => self.asmMemoryRegister(mnem, mem1, reg0), + .none => self.asmMemoryRegister(mnem_tag, mem1, reg0), else => error.InvalidInstruction, }, else => error.InvalidInstruction, }, .mem => |mem0| switch (ops[1]) { - .none => self.asmMemory(mnem, mem0), + .none => self.asmMemory(mnem_tag, mem0), .reg => |reg1| switch (ops[2]) { - .none => self.asmRegisterMemory(mnem, reg1, mem0), + .none => self.asmRegisterMemory(mnem_tag, reg1, mem0), else => error.InvalidInstruction, }, else => error.InvalidInstruction, }, .imm => |imm0| switch (ops[1]) { - .none => self.asmImmediate(mnem, imm0), + .none => self.asmImmediate(mnem_tag, imm0), .reg => |reg1| switch (ops[2]) { - .none => self.asmRegisterImmediate(mnem, reg1, imm0), + .none => self.asmRegisterImmediate(mnem_tag, reg1, imm0), .reg => |reg2| switch (ops[3]) { - .none => self.asmRegisterRegisterImmediate(mnem, reg2, reg1, imm0), + .none => self.asmRegisterRegisterImmediate(mnem_tag, reg2, reg1, imm0), else => error.InvalidInstruction, }, .mem => |mem2| switch (ops[3]) { - .none => self.asmMemoryRegisterImmediate(mnem, mem2, reg1, imm0), + .none => self.asmMemoryRegisterImmediate(mnem_tag, mem2, reg1, imm0), else => error.InvalidInstruction, }, else => error.InvalidInstruction, }, .mem => |mem1| switch (ops[2]) { - .none => self.asmMemoryImmediate(mnem, mem1, imm0), + .none => self.asmMemoryImmediate(mnem_tag, mem1, imm0), else => error.InvalidInstruction, }, else => error.InvalidInstruction, @@ -8396,7 +8537,7 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void { error.InvalidInstruction => return self.fail( "Invalid instruction: '{s} {s} {s} {s} {s}'", .{ - @tagName(mnem), + @tagName(mnem_tag[1]), @tagName(ops[0]), @tagName(ops[1]), @tagName(ops[2]), @@ -8427,44 +8568,47 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void { return self.finishAirResult(inst, result); } -fn movMirTag(self: *Self, ty: Type, aligned: bool) !Mir.Inst.Tag { +fn movMirTag(self: *Self, ty: Type, aligned: bool) !Mir.Inst.FixedTag { switch (ty.zigTypeTag()) { - else => return .mov, + else => return .{ ._, .mov }, .Float => switch (ty.floatBits(self.target.*)) { 16 => unreachable, // needs special handling - 32 => return if (self.hasFeature(.avx)) .vmovss else .movss, - 64 => return if (self.hasFeature(.avx)) .vmovsd else .movsd, + 32 => return if (self.hasFeature(.avx)) .{ ._, .vmovss } else .{ ._, .movss }, + 64 => return if (self.hasFeature(.avx)) .{ ._, .vmovsd } else .{ ._, .movsd }, 128 => return if (self.hasFeature(.avx)) - if (aligned) .vmovaps else .vmovups - else if (aligned) .movaps else .movups, + if (aligned) .{ ._, .vmovaps } else .{ ._, .vmovups } + else if (aligned) .{ ._, .movaps } else .{ ._, .movups }, else => {}, }, .Vector => switch (ty.childType().zigTypeTag()) { .Float => switch (ty.childType().floatBits(self.target.*)) { 16 => switch (ty.vectorLen()) { 1 => unreachable, // needs special handling - 2 => return if (self.hasFeature(.avx)) .vmovss else .movss, - 3...4 => return if (self.hasFeature(.avx)) .vmovsd else .movsd, + 2 => return if (self.hasFeature(.avx)) .{ ._, .vmovss } else .{ ._, .movss }, + 3...4 => return if (self.hasFeature(.avx)) .{ ._, .vmovsd } else .{ ._, .movsd }, 5...8 => return if (self.hasFeature(.avx)) - if (aligned) .vmovaps else .vmovups - else if (aligned) .movaps else .movups, - 9...16 => if (self.hasFeature(.avx)) return if (aligned) .vmovaps else .vmovups, + if (aligned) .{ ._, .vmovaps } else .{ ._, .vmovups } + else if (aligned) .{ ._, .movaps } else .{ ._, .movups }, + 9...16 => if (self.hasFeature(.avx)) + return if (aligned) .{ ._, .vmovaps } else .{ ._, .vmovups }, else => {}, }, 32 => switch (ty.vectorLen()) { - 1 => return if (self.hasFeature(.avx)) .vmovss else .movss, + 1 => return if (self.hasFeature(.avx)) .{ ._, .vmovss } else .{ ._, .movss }, 2...4 => return if (self.hasFeature(.avx)) - if (aligned) .vmovaps else .vmovups - else if (aligned) .movaps else .movups, - 5...8 => if (self.hasFeature(.avx)) return if (aligned) .vmovaps else .vmovups, + if (aligned) .{ ._, .vmovaps } else .{ ._, .vmovups } + else if (aligned) .{ ._, .movaps } else .{ ._, .movups }, + 5...8 => if (self.hasFeature(.avx)) + return if (aligned) .{ ._, .vmovaps } else .{ ._, .vmovups }, else => {}, }, 64 => switch (ty.vectorLen()) { - 1 => return if (self.hasFeature(.avx)) .vmovsd else .movsd, + 1 => return if (self.hasFeature(.avx)) .{ ._, .vmovsd } else .{ ._, .movsd }, 2 => return if (self.hasFeature(.avx)) - if (aligned) .vmovaps else .vmovups - else if (aligned) .movaps else .movups, - 3...4 => if (self.hasFeature(.avx)) return if (aligned) .vmovaps else .vmovups, + if (aligned) .{ ._, .vmovaps } else .{ ._, .vmovups } + else if (aligned) .{ ._, .movaps } else .{ ._, .movups }, + 3...4 => if (self.hasFeature(.avx)) + return if (aligned) .{ ._, .vmovaps } else .{ ._, .vmovups }, else => {}, }, else => {}, @@ -8558,19 +8702,19 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr if (imm == 0) { // 32-bit moves zero-extend to 64-bit, so xoring the 32-bit // register is the fastest way to zero a register. - try self.asmRegisterRegister(.xor, dst_reg.to32(), dst_reg.to32()); + try self.asmRegisterRegister(.{ ._, .xor }, dst_reg.to32(), dst_reg.to32()); } else if (abi_size > 4 and math.cast(u32, imm) != null) { // 32-bit moves zero-extend to 64-bit. - try self.asmRegisterImmediate(.mov, dst_reg.to32(), Immediate.u(imm)); + try self.asmRegisterImmediate(.{ ._, .mov }, dst_reg.to32(), Immediate.u(imm)); } else if (abi_size <= 4 and @bitCast(i64, imm) < 0) { try self.asmRegisterImmediate( - .mov, + .{ ._, .mov }, registerAlias(dst_reg, abi_size), Immediate.s(@intCast(i32, @bitCast(i64, imm))), ); } else { try self.asmRegisterImmediate( - .mov, + .{ ._, .mov }, registerAlias(dst_reg, abi_size), Immediate.u(imm), ); @@ -8579,18 +8723,18 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr .register => |src_reg| if (dst_reg.id() != src_reg.id()) try self.asmRegisterRegister( if ((dst_reg.class() == .floating_point) == (src_reg.class() == .floating_point)) switch (ty.zigTypeTag()) { - else => .mov, - .Float, .Vector => .movaps, + else => .{ ._, .mov }, + .Float, .Vector => .{ ._, .movaps }, } else switch (abi_size) { 2 => return try self.asmRegisterRegisterImmediate( - if (dst_reg.class() == .floating_point) .pinsrw else .pextrw, + if (dst_reg.class() == .floating_point) .{ ._, .pinsrw } else .{ ._, .pextrw }, registerAlias(dst_reg, 4), registerAlias(src_reg, 4), Immediate.u(0), ), - 4 => .movd, - 8 => .movq, + 4 => .{ ._d, .mov }, + 8 => .{ ._q, .mov }, else => return self.fail( "unsupported register copy from {s} to {s}", .{ @tagName(src_reg), @tagName(dst_reg) }, @@ -8617,7 +8761,7 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr }); if (ty.isRuntimeFloat() and ty.floatBits(self.target.*) == 16) try self.asmRegisterMemoryImmediate( - .pinsrw, + .{ ._, .pinsrw }, registerAlias(dst_reg, abi_size), src_mem, Immediate.u(0), @@ -8627,14 +8771,14 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr switch (src_mcv) { .register_offset => |reg_off| switch (reg_off.off) { 0 => return self.genSetReg(dst_reg, ty, .{ .register = reg_off.reg }), - else => .lea, + else => .{ ._, .lea }, }, .indirect => try self.movMirTag(ty, false), .load_frame => |frame_addr| try self.movMirTag( ty, self.getFrameAddrAlignment(frame_addr) >= ty.abiAlignment(self.target.*), ), - .lea_frame => .lea, + .lea_frame => .{ ._, .lea }, else => unreachable, }, registerAlias(dst_reg, abi_size), @@ -8650,7 +8794,7 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr }); return if (ty.isRuntimeFloat() and ty.floatBits(self.target.*) == 16) self.asmRegisterMemoryImmediate( - .pinsrw, + .{ ._, .pinsrw }, registerAlias(dst_reg, abi_size), src_mem, Immediate.u(0), @@ -8694,7 +8838,7 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr }); if (ty.isRuntimeFloat() and ty.floatBits(self.target.*) == 16) try self.asmRegisterMemoryImmediate( - .pinsrw, + .{ ._, .pinsrw }, registerAlias(dst_reg, abi_size), src_mem, Immediate.u(0), @@ -8743,7 +8887,7 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr } }, }); // TODO: spill registers before calling - try self.asmMemory(.call, Memory.sib(.qword, .{ .base = .{ .reg = .rdi } })); + try self.asmMemory(.{ ._, .call }, Memory.sib(.qword, .{ .base = .{ .reg = .rdi } })); try self.genSetReg(dst_reg.to64(), Type.usize, .{ .register = .rax }); } else return self.fail("TODO emit ptr to TLV sequence on {s}", .{ @tagName(self.bin_file.tag), @@ -8770,7 +8914,7 @@ fn genSetMem(self: *Self, base: Memory.Base, disp: i32, ty: Type, src_mcv: MCVal else Immediate.u(@intCast(u32, imm)); try self.asmMemoryImmediate( - .mov, + .{ ._, .mov }, Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ .base = base, .disp = disp }), immediate, ); @@ -8778,14 +8922,14 @@ fn genSetMem(self: *Self, base: Memory.Base, disp: i32, ty: Type, src_mcv: MCVal 3, 5...7 => unreachable, else => if (math.cast(i32, @bitCast(i64, imm))) |small| { try self.asmMemoryImmediate( - .mov, + .{ ._, .mov }, Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ .base = base, .disp = disp }), Immediate.s(small), ); } else { var offset: i32 = 0; while (offset < abi_size) : (offset += 4) try self.asmMemoryImmediate( - .mov, + .{ ._, .mov }, Memory.sib(.dword, .{ .base = base, .disp = disp + offset }), if (ty.isSignedInt()) Immediate.s(@truncate( @@ -8808,7 +8952,7 @@ fn genSetMem(self: *Self, base: Memory.Base, disp: i32, ty: Type, src_mcv: MCVal ); if (ty.isRuntimeFloat() and ty.floatBits(self.target.*) == 16) try self.asmMemoryRegisterImmediate( - .pextrw, + .{ ._, .pextrw }, dst_mem, src_reg.to128(), Immediate.u(0), @@ -8904,7 +9048,7 @@ fn genInlineMemcpyRegisterRegister( while (remainder > 0) { const nearest_power_of_two = @as(u6, 1) << math.log2_int(u3, @intCast(u3, remainder)); try self.asmMemoryRegister( - .mov, + .{ ._, .mov }, Memory.sib(Memory.PtrSize.fromSize(nearest_power_of_two), .{ .base = dst_reg, .disp = -next_offset, @@ -8913,7 +9057,7 @@ fn genInlineMemcpyRegisterRegister( ); if (nearest_power_of_two > 1) { - try self.genShiftBinOpMir(.shr, ty, .{ .register = tmp_reg }, .{ + try self.genShiftBinOpMir(.{ ._r, .sh }, ty, .{ .register = tmp_reg }, .{ .immediate = nearest_power_of_two * 8, }); } @@ -8924,8 +9068,8 @@ fn genInlineMemcpyRegisterRegister( } else { try self.asmMemoryRegister( switch (src_reg.class()) { - .general_purpose, .segment => .mov, - .floating_point => .movss, + .general_purpose, .segment => .{ ._, .mov }, + .floating_point => .{ ._, .movss }, }, Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ .base = dst_reg, .disp = -offset }), registerAlias(src_reg, abi_size), @@ -8938,11 +9082,7 @@ fn genInlineMemcpy(self: *Self, dst_ptr: MCValue, src_ptr: MCValue, len: MCValue try self.genSetReg(.rdi, Type.usize, dst_ptr); try self.genSetReg(.rsi, Type.usize, src_ptr); try self.genSetReg(.rcx, Type.usize, len); - _ = try self.addInst(.{ - .tag = .mov, - .ops = .none, - .data = .{ .none = .{ .fixes = .@"rep _sb" } }, - }); + try self.asmOpOnly(.{ .@"rep _sb", .mov }); } fn genInlineMemset(self: *Self, dst_ptr: MCValue, value: MCValue, len: MCValue) InnerError!void { @@ -8950,11 +9090,7 @@ fn genInlineMemset(self: *Self, dst_ptr: MCValue, value: MCValue, len: MCValue) try self.genSetReg(.rdi, Type.usize, dst_ptr); try self.genSetReg(.al, Type.u8, value); try self.genSetReg(.rcx, Type.usize, len); - _ = try self.addInst(.{ - .tag = .sto, - .ops = .none, - .data = .{ .none = .{ .fixes = .@"rep _sb" } }, - }); + try self.asmOpOnly(.{ .@"rep _sb", .sto }); } fn genLazySymbolRef( @@ -8972,14 +9108,14 @@ fn genLazySymbolRef( const got_mem = Memory.sib(.qword, .{ .base = .{ .reg = .ds }, .disp = @intCast(i32, got_addr) }); switch (tag) { - .lea, .mov => try self.asmRegisterMemory(.mov, reg.to64(), got_mem), - .call => try self.asmMemory(.call, got_mem), + .lea, .mov => try self.asmRegisterMemory(.{ ._, .mov }, reg.to64(), got_mem), + .call => try self.asmMemory(.{ ._, .call }, got_mem), else => unreachable, } switch (tag) { .lea, .call => {}, .mov => try self.asmRegisterMemory( - tag, + .{ ._, tag }, reg.to64(), Memory.sib(.qword, .{ .base = .{ .reg = reg.to64() } }), ), @@ -8996,7 +9132,7 @@ fn genLazySymbolRef( } switch (tag) { .lea, .mov => {}, - .call => try self.asmRegister(.call, reg), + .call => try self.asmRegister(.{ ._, .call }, reg), else => unreachable, } } else if (self.bin_file.cast(link.File.MachO)) |macho_file| { @@ -9010,7 +9146,7 @@ fn genLazySymbolRef( } switch (tag) { .lea, .mov => {}, - .call => try self.asmRegister(.call, reg), + .call => try self.asmRegister(.{ ._, .call }, reg), else => unreachable, } } else { @@ -9115,13 +9251,13 @@ fn airIntToFloat(self: *Self, inst: Air.Inst.Index) !void { try self.asmRegisterRegister(switch (dst_ty.floatBits(self.target.*)) { 32 => if (Target.x86.featureSetHas(self.target.cpu.features, .sse)) - .cvtsi2ss + .{ ._, .cvtsi2ss } else return self.fail("TODO implement airIntToFloat from {} to {} without sse", .{ src_ty.fmt(self.bin_file.options.module.?), dst_ty.fmt(self.bin_file.options.module.?), }), 64 => if (Target.x86.featureSetHas(self.target.cpu.features, .sse2)) - .cvtsi2sd + .{ ._, .cvtsi2sd } else return self.fail("TODO implement airIntToFloat from {} to {} without sse2", .{ src_ty.fmt(self.bin_file.options.module.?), dst_ty.fmt(self.bin_file.options.module.?), @@ -9161,7 +9297,7 @@ fn airFloatToInt(self: *Self, inst: Air.Inst.Index) !void { }, }; try self.asmMemory( - .fld, + .{ .f_, .ld }, Memory.sib(Memory.PtrSize.fromSize(src_abi_size), .{ .base = .{ .frame = frame_addr.index }, .disp = frame_addr.off, @@ -9171,7 +9307,7 @@ fn airFloatToInt(self: *Self, inst: Air.Inst.Index) !void { // convert const stack_dst = try self.allocRegOrMem(inst, false); try self.asmMemory( - .fisttp, + .{ .f_p, .istt }, Memory.sib(Memory.PtrSize.fromSize(dst_abi_size), .{ .base = .{ .frame = stack_dst.load_frame.index }, .disp = stack_dst.load_frame.off, @@ -9227,22 +9363,11 @@ fn airCmpxchg(self: *Self, inst: Air.Inst.Index) !void { defer if (ptr_lock) |lock| self.register_manager.unlockReg(lock); try self.spillEflagsIfOccupied(); - _ = try self.addInst(if (val_abi_size <= 8) .{ - .tag = .cmpxchg, - .ops = .mr_sib, - .data = .{ .rx = .{ - .fixes = .@"lock _", - .r1 = registerAlias(new_reg.?, val_abi_size), - .payload = try self.addExtra(Mir.MemorySib.encode(ptr_mem)), - } }, - } else .{ - .tag = .cmpxchg, - .ops = .m_sib, - .data = .{ .x = .{ - .fixes = .@"lock _16b", - .payload = try self.addExtra(Mir.MemorySib.encode(ptr_mem)), - } }, - }); + if (val_abi_size <= 8) try self.asmMemoryRegister( + .{ .@"lock _", .cmpxchg }, + ptr_mem, + registerAlias(new_reg.?, val_abi_size), + ) else try self.asmMemory(.{ .@"lock _16b", .cmpxchg }, ptr_mem); const result: MCValue = result: { if (self.liveness.isUnused(inst)) break :result .unreach; @@ -9340,21 +9465,17 @@ fn atomicOp( try self.genSetReg(dst_reg, val_ty, val_mcv); if (rmw_op == std.builtin.AtomicRmwOp.Sub and tag == .xadd) { - try self.genUnOpMir(.neg, val_ty, dst_mcv); + try self.genUnOpMir(.{ ._, .neg }, val_ty, dst_mcv); } - _ = try self.addInst(.{ - .tag = tag, - .ops = .mr_sib, - .data = .{ .rx = .{ - .fixes = switch (tag) { - .mov, .xchg => ._, - .xadd, .add, .sub, .@"and", .@"or", .xor => .@"lock _", - else => unreachable, - }, - .r1 = registerAlias(dst_reg, val_abi_size), - .payload = try self.addExtra(Mir.MemorySib.encode(ptr_mem)), - } }, - }); + try self.asmMemoryRegister( + switch (tag) { + .mov, .xchg => .{ ._, tag }, + .xadd, .add, .sub, .@"and", .@"or", .xor => .{ .@"lock _", tag }, + else => unreachable, + }, + ptr_mem, + registerAlias(dst_reg, val_abi_size), + ); return if (unused) .unreach else dst_mcv; }, @@ -9364,22 +9485,22 @@ fn atomicOp( const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); defer self.register_manager.unlockReg(tmp_lock); - try self.asmRegisterMemory(.mov, registerAlias(.rax, val_abi_size), ptr_mem); + try self.asmRegisterMemory(.{ ._, .mov }, registerAlias(.rax, val_abi_size), ptr_mem); const loop = @intCast(u32, self.mir_instructions.len); if (rmw_op != std.builtin.AtomicRmwOp.Xchg) { try self.genSetReg(tmp_reg, val_ty, .{ .register = .rax }); } if (rmw_op) |op| switch (op) { .Xchg => try self.genSetReg(tmp_reg, val_ty, val_mcv), - .Add => try self.genBinOpMir(.add, val_ty, tmp_mcv, val_mcv), - .Sub => try self.genBinOpMir(.sub, val_ty, tmp_mcv, val_mcv), - .And => try self.genBinOpMir(.@"and", val_ty, tmp_mcv, val_mcv), + .Add => try self.genBinOpMir(.{ ._, .add }, val_ty, tmp_mcv, val_mcv), + .Sub => try self.genBinOpMir(.{ ._, .sub }, val_ty, tmp_mcv, val_mcv), + .And => try self.genBinOpMir(.{ ._, .@"and" }, val_ty, tmp_mcv, val_mcv), .Nand => { - try self.genBinOpMir(.@"and", val_ty, tmp_mcv, val_mcv); - try self.genUnOpMir(.not, val_ty, tmp_mcv); + try self.genBinOpMir(.{ ._, .@"and" }, val_ty, tmp_mcv, val_mcv); + try self.genUnOpMir(.{ ._, .not }, val_ty, tmp_mcv); }, - .Or => try self.genBinOpMir(.@"or", val_ty, tmp_mcv, val_mcv), - .Xor => try self.genBinOpMir(.xor, val_ty, tmp_mcv, val_mcv), + .Or => try self.genBinOpMir(.{ ._, .@"or" }, val_ty, tmp_mcv, val_mcv), + .Xor => try self.genBinOpMir(.{ ._, .xor }, val_ty, tmp_mcv, val_mcv), .Min, .Max => { const cc: Condition = switch (if (val_ty.isAbiInt()) val_ty.intInfo(self.target.*).signedness @@ -9397,7 +9518,7 @@ fn atomicOp( }, }; - try self.genBinOpMir(.cmp, val_ty, tmp_mcv, val_mcv); + try self.genBinOpMir(.{ ._, .cmp }, val_ty, tmp_mcv, val_mcv); const cmov_abi_size = @max(val_abi_size, 2); switch (val_mcv) { .register => |val_reg| try self.asmCmovccRegisterRegister( @@ -9421,24 +9542,20 @@ fn atomicOp( } }, }; - _ = try self.addInst(.{ - .tag = .cmpxchg, - .ops = .mr_sib, - .data = .{ .rx = .{ - .fixes = .@"lock _", - .r1 = registerAlias(tmp_reg, val_abi_size), - .payload = try self.addExtra(Mir.MemorySib.encode(ptr_mem)), - } }, - }); + try self.asmMemoryRegister( + .{ .@"lock _", .cmpxchg }, + ptr_mem, + registerAlias(tmp_reg, val_abi_size), + ); _ = try self.asmJccReloc(loop, .ne); return if (unused) .unreach else .{ .register = .rax }; } else { - try self.asmRegisterMemory(.mov, .rax, Memory.sib(.qword, .{ + try self.asmRegisterMemory(.{ ._, .mov }, .rax, Memory.sib(.qword, .{ .base = ptr_mem.sib.base, .scale_index = ptr_mem.scaleIndex(), .disp = ptr_mem.sib.disp + 0, })); - try self.asmRegisterMemory(.mov, .rdx, Memory.sib(.qword, .{ + try self.asmRegisterMemory(.{ ._, .mov }, .rdx, Memory.sib(.qword, .{ .base = ptr_mem.sib.base, .scale_index = ptr_mem.scaleIndex(), .disp = ptr_mem.sib.disp + 8, @@ -9453,58 +9570,51 @@ fn atomicOp( const val_lo_mem = val_mem_mcv.mem(.qword); const val_hi_mem = val_mem_mcv.address().offset(8).deref().mem(.qword); if (rmw_op != std.builtin.AtomicRmwOp.Xchg) { - try self.asmRegisterRegister(.mov, .rbx, .rax); - try self.asmRegisterRegister(.mov, .rcx, .rdx); + try self.asmRegisterRegister(.{ ._, .mov }, .rbx, .rax); + try self.asmRegisterRegister(.{ ._, .mov }, .rcx, .rdx); } if (rmw_op) |op| switch (op) { .Xchg => { - try self.asmRegisterMemory(.mov, .rbx, val_lo_mem); - try self.asmRegisterMemory(.mov, .rcx, val_hi_mem); + try self.asmRegisterMemory(.{ ._, .mov }, .rbx, val_lo_mem); + try self.asmRegisterMemory(.{ ._, .mov }, .rcx, val_hi_mem); }, .Add => { - try self.asmRegisterMemory(.add, .rbx, val_lo_mem); - try self.asmRegisterMemory(.adc, .rcx, val_hi_mem); + try self.asmRegisterMemory(.{ ._, .add }, .rbx, val_lo_mem); + try self.asmRegisterMemory(.{ ._, .adc }, .rcx, val_hi_mem); }, .Sub => { - try self.asmRegisterMemory(.sub, .rbx, val_lo_mem); - try self.asmRegisterMemory(.sbb, .rcx, val_hi_mem); + try self.asmRegisterMemory(.{ ._, .sub }, .rbx, val_lo_mem); + try self.asmRegisterMemory(.{ ._, .sbb }, .rcx, val_hi_mem); }, .And => { - try self.asmRegisterMemory(.@"and", .rbx, val_lo_mem); - try self.asmRegisterMemory(.@"and", .rcx, val_hi_mem); + try self.asmRegisterMemory(.{ ._, .@"and" }, .rbx, val_lo_mem); + try self.asmRegisterMemory(.{ ._, .@"and" }, .rcx, val_hi_mem); }, .Nand => { - try self.asmRegisterMemory(.@"and", .rbx, val_lo_mem); - try self.asmRegisterMemory(.@"and", .rcx, val_hi_mem); - try self.asmRegister(.not, .rbx); - try self.asmRegister(.not, .rcx); + try self.asmRegisterMemory(.{ ._, .@"and" }, .rbx, val_lo_mem); + try self.asmRegisterMemory(.{ ._, .@"and" }, .rcx, val_hi_mem); + try self.asmRegister(.{ ._, .not }, .rbx); + try self.asmRegister(.{ ._, .not }, .rcx); }, .Or => { - try self.asmRegisterMemory(.@"or", .rbx, val_lo_mem); - try self.asmRegisterMemory(.@"or", .rcx, val_hi_mem); + try self.asmRegisterMemory(.{ ._, .@"or" }, .rbx, val_lo_mem); + try self.asmRegisterMemory(.{ ._, .@"or" }, .rcx, val_hi_mem); }, .Xor => { - try self.asmRegisterMemory(.xor, .rbx, val_lo_mem); - try self.asmRegisterMemory(.xor, .rcx, val_hi_mem); + try self.asmRegisterMemory(.{ ._, .xor }, .rbx, val_lo_mem); + try self.asmRegisterMemory(.{ ._, .xor }, .rcx, val_hi_mem); }, else => return self.fail("TODO implement x86 atomic loop for {} {s}", .{ val_ty.fmt(self.bin_file.options.module.?), @tagName(op), }), }; - _ = try self.addInst(.{ - .tag = .cmpxchg, - .ops = .m_sib, - .data = .{ .x = .{ - .fixes = .@"lock _16b", - .payload = try self.addExtra(Mir.MemorySib.encode(ptr_mem)), - } }, - }); + try self.asmMemory(.{ .@"lock _16b", .cmpxchg }, ptr_mem); _ = try self.asmJccReloc(loop, .ne); if (unused) return .unreach; const dst_mcv = try self.allocTempRegOrMem(val_ty, false); try self.asmMemoryRegister( - .mov, + .{ ._, .mov }, Memory.sib(.qword, .{ .base = .{ .frame = dst_mcv.load_frame.index }, .disp = dst_mcv.load_frame.off + 0, @@ -9512,7 +9622,7 @@ fn atomicOp( .rax, ); try self.asmMemoryRegister( - .mov, + .{ ._, .mov }, Memory.sib(.qword, .{ .base = .{ .frame = dst_mcv.load_frame.index }, .disp = dst_mcv.load_frame.off + 8, @@ -9664,8 +9774,13 @@ fn airMemset(self: *Self, inst: Air.Inst.Index, safety: bool) !void { .off = elem_abi_size, } }); - try self.genBinOpMir(.sub, Type.usize, len_mcv, .{ .immediate = 1 }); - try self.asmRegisterRegisterImmediate(.imul, len_reg, len_reg, Immediate.u(elem_abi_size)); + try self.genBinOpMir(.{ ._, .sub }, Type.usize, len_mcv, .{ .immediate = 1 }); + try self.asmRegisterRegisterImmediate( + .{ .i_, .mul }, + len_reg, + len_reg, + Immediate.u(elem_abi_size), + ); try self.genInlineMemcpy(second_elem_ptr_mcv, ptr, len_mcv); try self.performReloc(skip_reloc); @@ -9803,7 +9918,7 @@ fn airErrorName(self: *Self, inst: Air.Inst.Index) !void { try self.truncateRegister(err_ty, err_reg.to32()); try self.asmRegisterMemory( - .mov, + .{ ._, .mov }, start_reg.to32(), Memory.sib(.dword, .{ .base = .{ .reg = addr_reg.to64() }, @@ -9812,7 +9927,7 @@ fn airErrorName(self: *Self, inst: Air.Inst.Index) !void { }), ); try self.asmRegisterMemory( - .mov, + .{ ._, .mov }, end_reg.to32(), Memory.sib(.dword, .{ .base = .{ .reg = addr_reg.to64() }, @@ -9820,9 +9935,9 @@ fn airErrorName(self: *Self, inst: Air.Inst.Index) !void { .disp = 8, }), ); - try self.asmRegisterRegister(.sub, end_reg.to32(), start_reg.to32()); + try self.asmRegisterRegister(.{ ._, .sub }, end_reg.to32(), start_reg.to32()); try self.asmRegisterMemory( - .lea, + .{ ._, .lea }, start_reg.to64(), Memory.sib(.byte, .{ .base = .{ .reg = addr_reg.to64() }, @@ -9831,7 +9946,7 @@ fn airErrorName(self: *Self, inst: Air.Inst.Index) !void { }), ); try self.asmRegisterMemory( - .lea, + .{ ._, .lea }, end_reg.to32(), Memory.sib(.byte, .{ .base = .{ .reg = end_reg.to64() }, @@ -9841,7 +9956,7 @@ fn airErrorName(self: *Self, inst: Air.Inst.Index) !void { const dst_mcv = try self.allocRegOrMem(inst, false); try self.asmMemoryRegister( - .mov, + .{ ._, .mov }, Memory.sib(.qword, .{ .base = .{ .frame = dst_mcv.load_frame.index }, .disp = dst_mcv.load_frame.off, @@ -9849,7 +9964,7 @@ fn airErrorName(self: *Self, inst: Air.Inst.Index) !void { start_reg.to64(), ); try self.asmMemoryRegister( - .mov, + .{ ._, .mov }, Memory.sib(.qword, .{ .base = .{ .frame = dst_mcv.load_frame.index }, .disp = dst_mcv.load_frame.off + 8, @@ -9945,13 +10060,13 @@ fn airAggregateInit(self: *Self, inst: Air.Inst.Index) !void { try self.truncateRegister(elem_ty, elem_reg); } if (elem_bit_off > 0) try self.genShiftBinOpMir( - .shl, + .{ ._l, .sh }, elem_ty, .{ .register = elem_reg }, .{ .immediate = elem_bit_off }, ); try self.genBinOpMir( - .@"or", + .{ ._, .@"or" }, elem_ty, .{ .load_frame = .{ .index = frame_index, .off = elem_byte_off } }, .{ .register = elem_reg }, @@ -9962,13 +10077,13 @@ fn airAggregateInit(self: *Self, inst: Air.Inst.Index) !void { try self.truncateRegister(elem_ty, registerAlias(reg, elem_abi_size)); } try self.genShiftBinOpMir( - .shr, + .{ ._r, .sh }, elem_ty, .{ .register = reg }, .{ .immediate = elem_abi_bits - elem_bit_off }, ); try self.genBinOpMir( - .@"or", + .{ ._, .@"or" }, elem_ty, .{ .load_frame = .{ .index = frame_index, @@ -10078,25 +10193,25 @@ fn airMulAdd(self: *Self, inst: Air.Inst.Index) !void { } const mir_tag = if (@as( - ?Mir.Inst.Tag, + ?Mir.Inst.FixedTag, if (mem.eql(u2, &order, &.{ 1, 3, 2 }) or mem.eql(u2, &order, &.{ 3, 1, 2 })) switch (ty.zigTypeTag()) { .Float => switch (ty.floatBits(self.target.*)) { - 32 => .vfmadd132ss, - 64 => .vfmadd132sd, + 32 => .{ ._, .vfmadd132ss }, + 64 => .{ ._, .vfmadd132sd }, 16, 80, 128 => null, else => unreachable, }, .Vector => switch (ty.childType().zigTypeTag()) { .Float => switch (ty.childType().floatBits(self.target.*)) { 32 => switch (ty.vectorLen()) { - 1 => .vfmadd132ss, - 2...8 => .vfmadd132ps, + 1 => .{ ._, .vfmadd132ss }, + 2...8 => .{ ._, .vfmadd132ps }, else => null, }, 64 => switch (ty.vectorLen()) { - 1 => .vfmadd132sd, - 2...4 => .vfmadd132pd, + 1 => .{ ._, .vfmadd132sd }, + 2...4 => .{ ._, .vfmadd132pd }, else => null, }, 16, 80, 128 => null, @@ -10109,21 +10224,21 @@ fn airMulAdd(self: *Self, inst: Air.Inst.Index) !void { else if (mem.eql(u2, &order, &.{ 2, 1, 3 }) or mem.eql(u2, &order, &.{ 1, 2, 3 })) switch (ty.zigTypeTag()) { .Float => switch (ty.floatBits(self.target.*)) { - 32 => .vfmadd213ss, - 64 => .vfmadd213sd, + 32 => .{ ._, .vfmadd213ss }, + 64 => .{ ._, .vfmadd213sd }, 16, 80, 128 => null, else => unreachable, }, .Vector => switch (ty.childType().zigTypeTag()) { .Float => switch (ty.childType().floatBits(self.target.*)) { 32 => switch (ty.vectorLen()) { - 1 => .vfmadd213ss, - 2...8 => .vfmadd213ps, + 1 => .{ ._, .vfmadd213ss }, + 2...8 => .{ ._, .vfmadd213ps }, else => null, }, 64 => switch (ty.vectorLen()) { - 1 => .vfmadd213sd, - 2...4 => .vfmadd213pd, + 1 => .{ ._, .vfmadd213sd }, + 2...4 => .{ ._, .vfmadd213pd }, else => null, }, 16, 80, 128 => null, @@ -10136,21 +10251,21 @@ fn airMulAdd(self: *Self, inst: Air.Inst.Index) !void { else if (mem.eql(u2, &order, &.{ 2, 3, 1 }) or mem.eql(u2, &order, &.{ 3, 2, 1 })) switch (ty.zigTypeTag()) { .Float => switch (ty.floatBits(self.target.*)) { - 32 => .vfmadd231ss, - 64 => .vfmadd231sd, + 32 => .{ ._, .vfmadd231ss }, + 64 => .{ ._, .vfmadd231sd }, 16, 80, 128 => null, else => unreachable, }, .Vector => switch (ty.childType().zigTypeTag()) { .Float => switch (ty.childType().floatBits(self.target.*)) { 32 => switch (ty.vectorLen()) { - 1 => .vfmadd231ss, - 2...8 => .vfmadd231ps, + 1 => .{ ._, .vfmadd231ss }, + 2...8 => .{ ._, .vfmadd231ps }, else => null, }, 64 => switch (ty.vectorLen()) { - 1 => .vfmadd231sd, - 2...4 => .vfmadd231pd, + 1 => .{ ._, .vfmadd231sd }, + 2...4 => .{ ._, .vfmadd231pd }, else => null, }, 16, 80, 128 => null, @@ -10522,17 +10637,37 @@ fn truncateRegister(self: *Self, ty: Type, reg: Register) !void { switch (int_info.signedness) { .signed => { const shift = @intCast(u6, max_reg_bit_width - int_info.bits); - try self.genShiftBinOpMir(.sal, Type.isize, .{ .register = reg }, .{ .immediate = shift }); - try self.genShiftBinOpMir(.sar, Type.isize, .{ .register = reg }, .{ .immediate = shift }); + try self.genShiftBinOpMir( + .{ ._l, .sa }, + Type.isize, + .{ .register = reg }, + .{ .immediate = shift }, + ); + try self.genShiftBinOpMir( + .{ ._r, .sa }, + Type.isize, + .{ .register = reg }, + .{ .immediate = shift }, + ); }, .unsigned => { const shift = @intCast(u6, max_reg_bit_width - int_info.bits); const mask = (~@as(u64, 0)) >> shift; if (int_info.bits <= 32) { - try self.genBinOpMir(.@"and", Type.u32, .{ .register = reg }, .{ .immediate = mask }); + try self.genBinOpMir( + .{ ._, .@"and" }, + Type.u32, + .{ .register = reg }, + .{ .immediate = mask }, + ); } else { const tmp_reg = try self.copyToTmpRegister(Type.usize, .{ .immediate = mask }); - try self.genBinOpMir(.@"and", Type.usize, .{ .register = reg }, .{ .register = tmp_reg }); + try self.genBinOpMir( + .{ ._, .@"and" }, + Type.usize, + .{ .register = reg }, + .{ .register = tmp_reg }, + ); } }, } diff --git a/src/arch/x86_64/Lower.zig b/src/arch/x86_64/Lower.zig index 2d7fa4b4fd..c32e7fc974 100644 --- a/src/arch/x86_64/Lower.zig +++ b/src/arch/x86_64/Lower.zig @@ -286,10 +286,10 @@ fn generic(lower: *Lower, inst: Mir.Inst) Error!void { .rri_s, .rri_u => inst.data.rri.fixes, .ri_s, .ri_u => inst.data.ri.fixes, .ri64, .rm_sib, .rm_rip, .mr_sib, .mr_rip => inst.data.rx.fixes, - .mi_sib_u, .mi_rip_u, .mi_sib_s, .mi_rip_s => ._, .mrr_sib, .mrr_rip, .rrm_sib, .rrm_rip => inst.data.rrx.fixes, .rmi_sib, .rmi_rip, .mri_sib, .mri_rip => inst.data.rix.fixes, .rrmi_sib, .rrmi_rip => inst.data.rrix.fixes, + .mi_sib_u, .mi_rip_u, .mi_sib_s, .mi_rip_s => inst.data.x.fixes, .m_sib, .m_rip, .rax_moffs, .moffs_rax => inst.data.x.fixes, .extern_fn_reloc, .got_reloc, .direct_reloc, .import_reloc, .tlv_reloc => ._, else => return lower.fail("TODO lower .{s}", .{@tagName(inst.ops)}), @@ -356,8 +356,11 @@ fn generic(lower: *Lower, inst: Mir.Inst) Error!void { .{ .mem = lower.mem(inst.ops, inst.data.x.payload) }, }, .mi_sib_s, .mi_sib_u, .mi_rip_u, .mi_rip_s => &.{ - .{ .mem = lower.mem(inst.ops, inst.data.ix.payload) }, - .{ .imm = lower.imm(inst.ops, inst.data.ix.i) }, + .{ .mem = lower.mem(inst.ops, inst.data.x.payload + 1) }, + .{ .imm = lower.imm( + inst.ops, + lower.mir.extraData(Mir.Imm32, inst.data.x.payload).data.imm, + ) }, }, .rm_sib, .rm_rip => &.{ .{ .reg = inst.data.rx.r1 }, diff --git a/src/arch/x86_64/Mir.zig b/src/arch/x86_64/Mir.zig index 951a0c5d4d..6b5e2bded7 100644 --- a/src/arch/x86_64/Mir.zig +++ b/src/arch/x86_64/Mir.zig @@ -36,6 +36,18 @@ pub const Inst = struct { /// ___ @"_", + /// Integer __ + i_, + + /// ___ Left + _l, + /// ___ Left Double + _ld, + /// ___ Right + _r, + /// ___ Right Double + _rd, + /// ___ Above _a, /// ___ Above Or Equal @@ -53,7 +65,7 @@ pub const Inst = struct { /// ___ Greater Or Equal _ge, /// ___ Less - _l, + //_l, /// ___ Less Or Equal _le, /// ___ Not Above @@ -97,6 +109,15 @@ pub const Inst = struct { /// ___ Zero _z, + /// ___ Byte + //_b, + /// ___ Word + _w, + /// ___ Doubleword + _d, + /// ___ QuadWord + _q, + /// ___ String //_s, /// ___ String Byte @@ -165,6 +186,18 @@ pub const Inst = struct { /// Locked ___ @"lock _", + /// ___ And Complement + //_c, + /// Locked ___ And Complement + @"lock _c", + /// ___ And Reset + //_r, + /// Locked ___ And Reset + @"lock _r", + /// ___ And Set + //_s, + /// Locked ___ And Set + @"lock _s", /// ___ 8 Bytes _8b, /// Locked ___ 8 Bytes @@ -174,6 +207,11 @@ pub const Inst = struct { /// Locked ___ 16 Bytes @"lock _16b", + /// Float ___ + f_, + /// Float ___ Pop + f_p, + /// Packed ___ p_, /// Packed ___ Byte @@ -250,13 +288,10 @@ pub const Inst = struct { /// Byte swap bswap, /// Bit test - bt, /// Bit test and complement - btc, /// Bit test and reset - btr, /// Bit test and set - bts, + bt, /// Call call, /// Convert byte to word @@ -280,21 +315,18 @@ pub const Inst = struct { /// Convert word to doubleword cwde, /// Unsigned division - div, - /// Store integer with truncation - fisttp, - /// Load floating-point value - fld, /// Signed division - idiv, - /// Signed multiplication - imul, + div, /// int3, + /// Store integer with truncation + istt, /// Conditional jump j, /// Jump jmp, + /// Load floating-point value + ld, /// Load effective address lea, /// Load string @@ -307,20 +339,17 @@ pub const Inst = struct { mfence, /// Move /// Move data from string to string + /// Move doubleword + /// Move quadword mov, /// Move data after swapping bytes movbe, - /// Move doubleword - movd, - /// Move quadword - movq, /// Move with sign extension movsx, - /// Move with sign extension - movsxd, /// Move with zero extension movzx, /// Multiply + /// Signed multiplication mul, /// Two's complement negation neg, @@ -337,19 +366,16 @@ pub const Inst = struct { /// Push push, /// Rotate left through carry - rcl, /// Rotate right through carry - rcr, + rc, /// Return ret, /// Rotate left - rol, /// Rotate right - ror, + ro, /// Arithmetic shift left - sal, /// Arithmetic shift right - sar, + sa, /// Integer subtraction with borrow sbb, /// Scan string @@ -359,13 +385,10 @@ pub const Inst = struct { /// Store fence sfence, /// Logical shift left - shl, /// Double precision shift left - shld, /// Logical shift right - shr, /// Double precision shift right - shrd, + sh, /// Subtract sub, /// Store string @@ -730,6 +753,8 @@ pub const Inst = struct { pseudo, }; + pub const FixedTag = struct { Fixes, Tag }; + pub const Ops = enum(u8) { /// No data associated with this instruction (only mnemonic is used). none, @@ -800,16 +825,16 @@ pub const Inst = struct { /// Uses `x` with extra data of type `MemoryRip`. m_rip, /// Memory (SIB), immediate (unsigned) operands. - /// Uses `ix` payload with extra data of type `MemorySib`. + /// Uses `x` payload with extra data of type `Imm32` followed by `MemorySib`. mi_sib_u, /// Memory (RIP), immediate (unsigned) operands. - /// Uses `ix` payload with extra data of type `MemoryRip`. + /// Uses `x` payload with extra data of type `Imm32` followed by `MemoryRip`. mi_rip_u, /// Memory (SIB), immediate (sign-extend) operands. - /// Uses `ix` payload with extra data of type `MemorySib`. + /// Uses `x` payload with extra data of type `Imm32` followed by `MemorySib`. mi_sib_s, /// Memory (RIP), immediate (sign-extend) operands. - /// Uses `ix` payload with extra data of type `MemoryRip`. + /// Uses `x` payload with extra data of type `Imm32` followed by `MemoryRip`. mi_rip_s, /// Memory (SIB), register operands. /// Uses `rx` payload with extra data of type `MemorySib`. @@ -974,11 +999,6 @@ pub const Inst = struct { r1: Register, payload: u32, }, - /// Immediate, followed by Custom payload found in extra. - ix: struct { - i: u32, - payload: u32, - }, /// Register, register, followed by Custom payload found in extra. rrx: struct { fixes: Fixes = ._, @@ -1081,6 +1101,10 @@ pub const RegisterList = struct { } }; +pub const Imm32 = struct { + imm: u32, +}; + pub const Imm64 = struct { msb: u32, lsb: u32, From 1f5aa7747f5710e281cd2190508ce562a4bfd35f Mon Sep 17 00:00:00 2001 From: Jacob Young Date: Mon, 8 May 2023 07:35:31 -0400 Subject: [PATCH 20/20] x86_64: finish optimizing mir tag usage Final tag count is 95. --- src/arch/x86_64/CodeGen.zig | 368 ++++++++++++++++++------------------ src/arch/x86_64/Mir.zig | 364 ++++++++--------------------------- 2 files changed, 266 insertions(+), 466 deletions(-) diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index 147be62e28..2dc1cc8ee4 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -2443,7 +2443,7 @@ fn airFptrunc(self: *Self, inst: Air.Inst.Index) !void { else try self.copyToTmpRegister(src_ty, src_mcv); try self.asmRegisterRegisterImmediate( - .{ ._, .vcvtps2ph }, + .{ .v_, .cvtps2ph }, dst_reg, mat_src_reg.to128(), Immediate.u(0b1_00), @@ -2455,12 +2455,12 @@ fn airFptrunc(self: *Self, inst: Air.Inst.Index) !void { } } else if (src_bits == 64 and dst_bits == 32) { if (self.hasFeature(.avx)) if (src_mcv.isMemory()) try self.asmRegisterRegisterMemory( - .{ ._, .vcvtsd2ss }, + .{ .v_, .cvtsd2ss }, dst_reg, dst_reg, src_mcv.mem(.qword), ) else try self.asmRegisterRegisterRegister( - .{ ._, .vcvtsd2ss }, + .{ .v_, .cvtsd2ss }, dst_reg, dst_reg, (if (src_mcv.isRegister()) @@ -2506,22 +2506,22 @@ fn airFpext(self: *Self, inst: Air.Inst.Index) !void { src_mcv.getReg().? else try self.copyToTmpRegister(src_ty, src_mcv); - try self.asmRegisterRegister(.{ ._, .vcvtph2ps }, dst_reg, mat_src_reg.to128()); + try self.asmRegisterRegister(.{ .v_, .cvtph2ps }, dst_reg, mat_src_reg.to128()); switch (dst_bits) { 32 => {}, - 64 => try self.asmRegisterRegisterRegister(.{ ._, .vcvtss2sd }, dst_reg, dst_reg, dst_reg), + 64 => try self.asmRegisterRegisterRegister(.{ .v_, .cvtss2sd }, dst_reg, dst_reg, dst_reg), else => return self.fail("TODO implement airFpext from {} to {}", .{ src_ty.fmt(self.bin_file.options.module.?), dst_ty.fmt(self.bin_file.options.module.?), }), } } else if (src_bits == 32 and dst_bits == 64) { if (self.hasFeature(.avx)) if (src_mcv.isMemory()) try self.asmRegisterRegisterMemory( - .{ ._, .vcvtss2sd }, + .{ .v_, .cvtss2sd }, dst_reg, dst_reg, src_mcv.mem(.dword), ) else try self.asmRegisterRegisterRegister( - .{ ._, .vcvtss2sd }, + .{ .v_, .cvtss2sd }, dst_reg, dst_reg, (if (src_mcv.isRegister()) @@ -4678,8 +4678,8 @@ fn airFloatSign(self: *Self, inst: Air.Inst.Index) !void { try self.genBinOpMir(switch (ty_bits) { // No point using an extra prefix byte for *pd which performs the same operation. 16, 32, 64, 128 => switch (tag) { - .neg => .{ ._, .xorps }, - .fabs => .{ ._, .andnps }, + .neg => .{ ._ps, .xor }, + .fabs => .{ ._ps, .andn }, else => unreachable, }, 80 => return self.fail("TODO implement airFloatSign for {}", .{ @@ -4712,23 +4712,23 @@ fn genRound(self: *Self, ty: Type, dst_reg: Register, src_mcv: MCValue, mode: u4 const mir_tag = if (@as(?Mir.Inst.FixedTag, switch (ty.zigTypeTag()) { .Float => switch (ty.floatBits(self.target.*)) { - 32 => if (self.hasFeature(.avx)) .{ ._, .vroundss } else .{ ._, .roundss }, - 64 => if (self.hasFeature(.avx)) .{ ._, .vroundsd } else .{ ._, .roundsd }, + 32 => if (self.hasFeature(.avx)) .{ .v_ss, .round } else .{ ._ss, .round }, + 64 => if (self.hasFeature(.avx)) .{ .v_sd, .round } else .{ ._sd, .round }, 16, 80, 128 => null, else => unreachable, }, .Vector => switch (ty.childType().zigTypeTag()) { .Float => switch (ty.childType().floatBits(self.target.*)) { 32 => switch (ty.vectorLen()) { - 1 => if (self.hasFeature(.avx)) .{ ._, .vroundss } else .{ ._, .roundss }, - 2...4 => if (self.hasFeature(.avx)) .{ ._, .vroundps } else .{ ._, .roundps }, - 5...8 => if (self.hasFeature(.avx)) .{ ._, .vroundps } else null, + 1 => if (self.hasFeature(.avx)) .{ .v_ss, .round } else .{ ._ss, .round }, + 2...4 => if (self.hasFeature(.avx)) .{ .v_ps, .round } else .{ ._ps, .round }, + 5...8 => if (self.hasFeature(.avx)) .{ .v_ps, .round } else null, else => null, }, 64 => switch (ty.vectorLen()) { - 1 => if (self.hasFeature(.avx)) .{ ._, .vroundsd } else .{ ._, .roundsd }, - 2 => if (self.hasFeature(.avx)) .{ ._, .vroundpd } else .{ ._, .roundpd }, - 3...4 => if (self.hasFeature(.avx)) .{ ._, .vroundpd } else null, + 1 => if (self.hasFeature(.avx)) .{ .v_sd, .round } else .{ ._sd, .round }, + 2 => if (self.hasFeature(.avx)) .{ .v_pd, .round } else .{ ._pd, .round }, + 3...4 => if (self.hasFeature(.avx)) .{ .v_pd, .round } else null, else => null, }, 16, 80, 128 => null, @@ -4743,8 +4743,8 @@ fn genRound(self: *Self, ty: Type, dst_reg: Register, src_mcv: MCValue, mode: u4 const abi_size = @intCast(u32, ty.abiSize(self.target.*)); const dst_alias = registerAlias(dst_reg, abi_size); - switch (mir_tag[1]) { - .vroundss, .vroundsd => if (src_mcv.isMemory()) try self.asmRegisterRegisterMemoryImmediate( + switch (mir_tag[0]) { + .v_ss, .v_sd => if (src_mcv.isMemory()) try self.asmRegisterRegisterMemoryImmediate( mir_tag, dst_alias, dst_alias, @@ -4799,18 +4799,18 @@ fn airSqrt(self: *Self, inst: Air.Inst.Index) !void { src_mcv.getReg().? else try self.copyToTmpRegister(ty, src_mcv); - try self.asmRegisterRegister(.{ ._, .vcvtph2ps }, dst_reg, mat_src_reg.to128()); - try self.asmRegisterRegisterRegister(.{ ._, .vsqrtss }, dst_reg, dst_reg, dst_reg); + try self.asmRegisterRegister(.{ .v_, .cvtph2ps }, dst_reg, mat_src_reg.to128()); + try self.asmRegisterRegisterRegister(.{ .v_ss, .sqrt }, dst_reg, dst_reg, dst_reg); try self.asmRegisterRegisterImmediate( - .{ ._, .vcvtps2ph }, + .{ .v_, .cvtps2ph }, dst_reg, dst_reg, Immediate.u(0b1_00), ); break :result dst_mcv; } else null, - 32 => if (self.hasFeature(.avx)) .{ ._, .vsqrtss } else .{ ._, .sqrtss }, - 64 => if (self.hasFeature(.avx)) .{ ._, .vsqrtsd } else .{ ._, .sqrtsd }, + 32 => if (self.hasFeature(.avx)) .{ .v_ss, .sqrt } else .{ ._ss, .sqrt }, + 64 => if (self.hasFeature(.avx)) .{ .v_sd, .sqrt } else .{ ._sd, .sqrt }, 80, 128 => null, else => unreachable, }, @@ -4819,7 +4819,7 @@ fn airSqrt(self: *Self, inst: Air.Inst.Index) !void { 16 => if (self.hasFeature(.f16c)) switch (ty.vectorLen()) { 1 => { try self.asmRegisterRegister( - .{ ._, .vcvtph2ps }, + .{ .v_, .cvtph2ps }, dst_reg, (if (src_mcv.isRegister()) src_mcv.getReg().? @@ -4827,13 +4827,13 @@ fn airSqrt(self: *Self, inst: Air.Inst.Index) !void { try self.copyToTmpRegister(ty, src_mcv)).to128(), ); try self.asmRegisterRegisterRegister( - .{ ._, .vsqrtss }, + .{ .v_ss, .sqrt }, dst_reg, dst_reg, dst_reg, ); try self.asmRegisterRegisterImmediate( - .{ ._, .vcvtps2ph }, + .{ .v_, .cvtps2ph }, dst_reg, dst_reg, Immediate.u(0b1_00), @@ -4843,22 +4843,22 @@ fn airSqrt(self: *Self, inst: Air.Inst.Index) !void { 2...8 => { const wide_reg = registerAlias(dst_reg, abi_size * 2); if (src_mcv.isMemory()) try self.asmRegisterMemory( - .{ ._, .vcvtph2ps }, + .{ .v_, .cvtph2ps }, wide_reg, src_mcv.mem(Memory.PtrSize.fromSize( @intCast(u32, @divExact(wide_reg.bitSize(), 16)), )), ) else try self.asmRegisterRegister( - .{ ._, .vcvtph2ps }, + .{ .v_, .cvtph2ps }, wide_reg, (if (src_mcv.isRegister()) src_mcv.getReg().? else try self.copyToTmpRegister(ty, src_mcv)).to128(), ); - try self.asmRegisterRegister(.{ ._, .vsqrtps }, wide_reg, wide_reg); + try self.asmRegisterRegister(.{ .v_ps, .sqrt }, wide_reg, wide_reg); try self.asmRegisterRegisterImmediate( - .{ ._, .vcvtps2ph }, + .{ .v_, .cvtps2ph }, dst_reg, wide_reg, Immediate.u(0b1_00), @@ -4868,15 +4868,15 @@ fn airSqrt(self: *Self, inst: Air.Inst.Index) !void { else => null, } else null, 32 => switch (ty.vectorLen()) { - 1 => if (self.hasFeature(.avx)) .{ ._, .vsqrtss } else .{ ._, .sqrtss }, - 2...4 => if (self.hasFeature(.avx)) .{ ._, .vsqrtps } else .{ ._, .sqrtps }, - 5...8 => if (self.hasFeature(.avx)) .{ ._, .vsqrtps } else null, + 1 => if (self.hasFeature(.avx)) .{ .v_ss, .sqrt } else .{ ._ss, .sqrt }, + 2...4 => if (self.hasFeature(.avx)) .{ .v_ps, .sqrt } else .{ ._ps, .sqrt }, + 5...8 => if (self.hasFeature(.avx)) .{ .v_ps, .sqrt } else null, else => null, }, 64 => switch (ty.vectorLen()) { - 1 => if (self.hasFeature(.avx)) .{ ._, .vsqrtsd } else .{ ._, .sqrtsd }, - 2 => if (self.hasFeature(.avx)) .{ ._, .vsqrtpd } else .{ ._, .sqrtpd }, - 3...4 => if (self.hasFeature(.avx)) .{ ._, .vsqrtpd } else null, + 1 => if (self.hasFeature(.avx)) .{ .v_sd, .sqrt } else .{ ._sd, .sqrt }, + 2 => if (self.hasFeature(.avx)) .{ .v_pd, .sqrt } else .{ ._pd, .sqrt }, + 3...4 => if (self.hasFeature(.avx)) .{ .v_pd, .sqrt } else null, else => null, }, 80, 128 => null, @@ -4888,8 +4888,8 @@ fn airSqrt(self: *Self, inst: Air.Inst.Index) !void { })) |tag| tag else return self.fail("TODO implement airSqrt for {}", .{ ty.fmt(self.bin_file.options.module.?), }); - switch (mir_tag[1]) { - .vsqrtss, .vsqrtsd => if (src_mcv.isMemory()) try self.asmRegisterRegisterMemory( + switch (mir_tag[0]) { + .v_ss, .v_sd => if (src_mcv.isMemory()) try self.asmRegisterRegisterMemory( mir_tag, dst_reg, dst_reg, @@ -6325,13 +6325,13 @@ fn genBinOp( defer self.register_manager.unlockReg(tmp_lock); if (src_mcv.isMemory()) try self.asmRegisterRegisterMemoryImmediate( - .{ ._, .vpinsrw }, + .{ .vp_w, .insr }, dst_reg, dst_reg, src_mcv.mem(.word), Immediate.u(1), ) else try self.asmRegisterRegisterRegister( - .{ ._, .vpunpcklwd }, + .{ .vp_, .unpcklwd }, dst_reg, dst_reg, (if (src_mcv.isRegister()) @@ -6339,15 +6339,15 @@ fn genBinOp( else try self.copyToTmpRegister(rhs_ty, src_mcv)).to128(), ); - try self.asmRegisterRegister(.{ ._, .vcvtph2ps }, dst_reg, dst_reg); - try self.asmRegisterRegister(.{ ._, .vmovshdup }, tmp_reg, dst_reg); + try self.asmRegisterRegister(.{ .v_, .cvtph2ps }, dst_reg, dst_reg); + try self.asmRegisterRegister(.{ .v_, .movshdup }, tmp_reg, dst_reg); try self.asmRegisterRegisterRegister( switch (air_tag) { - .add => .{ ._, .vaddss }, - .sub => .{ ._, .vsubss }, - .div_float, .div_trunc, .div_floor, .div_exact => .{ ._, .vdivss }, - .max => .{ ._, .vmaxss }, - .min => .{ ._, .vmaxss }, + .add => .{ .v_ss, .add }, + .sub => .{ .v_ss, .sub }, + .div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ss, .div }, + .max => .{ .v_ss, .max }, + .min => .{ .v_ss, .max }, else => unreachable, }, dst_reg, @@ -6355,7 +6355,7 @@ fn genBinOp( tmp_reg, ); try self.asmRegisterRegisterImmediate( - .{ ._, .vcvtps2ph }, + .{ .v_, .cvtps2ph }, dst_reg, dst_reg, Immediate.u(0b1_00), @@ -6363,29 +6363,29 @@ fn genBinOp( return dst_mcv; } else null, 32 => switch (air_tag) { - .add => if (self.hasFeature(.avx)) .{ ._, .vaddss } else .{ ._, .addss }, - .sub => if (self.hasFeature(.avx)) .{ ._, .vsubss } else .{ ._, .subss }, - .mul => if (self.hasFeature(.avx)) .{ ._, .vmulss } else .{ ._, .mulss }, + .add => if (self.hasFeature(.avx)) .{ .v_ss, .add } else .{ ._ss, .add }, + .sub => if (self.hasFeature(.avx)) .{ .v_ss, .sub } else .{ ._ss, .sub }, + .mul => if (self.hasFeature(.avx)) .{ .v_ss, .mul } else .{ ._ss, .mul }, .div_float, .div_trunc, .div_floor, .div_exact, - => if (self.hasFeature(.avx)) .{ ._, .vdivss } else .{ ._, .divss }, - .max => if (self.hasFeature(.avx)) .{ ._, .vmaxss } else .{ ._, .maxss }, - .min => if (self.hasFeature(.avx)) .{ ._, .vminss } else .{ ._, .minss }, + => if (self.hasFeature(.avx)) .{ .v_ss, .div } else .{ ._ss, .div }, + .max => if (self.hasFeature(.avx)) .{ .v_ss, .max } else .{ ._ss, .max }, + .min => if (self.hasFeature(.avx)) .{ .v_ss, .min } else .{ ._ss, .min }, else => unreachable, }, 64 => switch (air_tag) { - .add => if (self.hasFeature(.avx)) .{ ._, .vaddsd } else .{ ._, .addsd }, - .sub => if (self.hasFeature(.avx)) .{ ._, .vsubsd } else .{ ._, .subsd }, - .mul => if (self.hasFeature(.avx)) .{ ._, .vmulsd } else .{ ._, .mulsd }, + .add => if (self.hasFeature(.avx)) .{ .v_sd, .add } else .{ ._sd, .add }, + .sub => if (self.hasFeature(.avx)) .{ .v_sd, .sub } else .{ ._sd, .sub }, + .mul => if (self.hasFeature(.avx)) .{ .v_sd, .mul } else .{ ._sd, .mul }, .div_float, .div_trunc, .div_floor, .div_exact, - => if (self.hasFeature(.avx)) .{ ._, .vdivsd } else .{ ._, .divsd }, - .max => if (self.hasFeature(.avx)) .{ ._, .vmaxsd } else .{ ._, .maxsd }, - .min => if (self.hasFeature(.avx)) .{ ._, .vminsd } else .{ ._, .minsd }, + => if (self.hasFeature(.avx)) .{ .v_sd, .div } else .{ ._sd, .div }, + .max => if (self.hasFeature(.avx)) .{ .v_sd, .max } else .{ ._sd, .max }, + .min => if (self.hasFeature(.avx)) .{ .v_sd, .min } else .{ ._sd, .min }, else => unreachable, }, 80, 128 => null, @@ -6401,13 +6401,13 @@ fn genBinOp( defer self.register_manager.unlockReg(tmp_lock); if (src_mcv.isMemory()) try self.asmRegisterRegisterMemoryImmediate( - .{ ._, .vpinsrw }, + .{ .vp_w, .insr }, dst_reg, dst_reg, src_mcv.mem(.word), Immediate.u(1), ) else try self.asmRegisterRegisterRegister( - .{ ._, .vpunpcklwd }, + .{ .vp_, .unpcklwd }, dst_reg, dst_reg, (if (src_mcv.isRegister()) @@ -6415,15 +6415,15 @@ fn genBinOp( else try self.copyToTmpRegister(rhs_ty, src_mcv)).to128(), ); - try self.asmRegisterRegister(.{ ._, .vcvtph2ps }, dst_reg, dst_reg); - try self.asmRegisterRegister(.{ ._, .vmovshdup }, tmp_reg, dst_reg); + try self.asmRegisterRegister(.{ .v_, .cvtph2ps }, dst_reg, dst_reg); + try self.asmRegisterRegister(.{ .v_, .movshdup }, tmp_reg, dst_reg); try self.asmRegisterRegisterRegister( switch (air_tag) { - .add => .{ ._, .vaddss }, - .sub => .{ ._, .vsubss }, - .div_float, .div_trunc, .div_floor, .div_exact => .{ ._, .vdivss }, - .max => .{ ._, .vmaxss }, - .min => .{ ._, .vmaxss }, + .add => .{ .v_ss, .add }, + .sub => .{ .v_ss, .sub }, + .div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ss, .div }, + .max => .{ .v_ss, .max }, + .min => .{ .v_ss, .max }, else => unreachable, }, dst_reg, @@ -6431,7 +6431,7 @@ fn genBinOp( tmp_reg, ); try self.asmRegisterRegisterImmediate( - .{ ._, .vcvtps2ph }, + .{ .v_, .cvtps2ph }, dst_reg, dst_reg, Immediate.u(0b1_00), @@ -6444,12 +6444,12 @@ fn genBinOp( defer self.register_manager.unlockReg(tmp_lock); if (src_mcv.isMemory()) try self.asmRegisterMemoryImmediate( - .{ ._, .vpinsrd }, + .{ .vp_d, .insr }, dst_reg, src_mcv.mem(.dword), Immediate.u(1), ) else try self.asmRegisterRegisterRegister( - .{ ._, .vunpcklps }, + .{ .v_ps, .unpckl }, dst_reg, dst_reg, (if (src_mcv.isRegister()) @@ -6457,20 +6457,20 @@ fn genBinOp( else try self.copyToTmpRegister(rhs_ty, src_mcv)).to128(), ); - try self.asmRegisterRegister(.{ ._, .vcvtph2ps }, dst_reg, dst_reg); + try self.asmRegisterRegister(.{ .v_, .cvtph2ps }, dst_reg, dst_reg); try self.asmRegisterRegisterRegister( - .{ ._, .vmovhlps }, + .{ .v_ps, .movhl }, tmp_reg, dst_reg, dst_reg, ); try self.asmRegisterRegisterRegister( switch (air_tag) { - .add => .{ ._, .vaddps }, - .sub => .{ ._, .vsubps }, - .div_float, .div_trunc, .div_floor, .div_exact => .{ ._, .vdivps }, - .max => .{ ._, .vmaxps }, - .min => .{ ._, .vmaxps }, + .add => .{ .v_ps, .add }, + .sub => .{ .v_ps, .sub }, + .div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ps, .div }, + .max => .{ .v_ps, .max }, + .min => .{ .v_ps, .max }, else => unreachable, }, dst_reg, @@ -6478,7 +6478,7 @@ fn genBinOp( tmp_reg, ); try self.asmRegisterRegisterImmediate( - .{ ._, .vcvtps2ph }, + .{ .v_, .cvtps2ph }, dst_reg, dst_reg, Immediate.u(0b1_00), @@ -6490,13 +6490,13 @@ fn genBinOp( const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); defer self.register_manager.unlockReg(tmp_lock); - try self.asmRegisterRegister(.{ ._, .vcvtph2ps }, dst_reg, dst_reg); + try self.asmRegisterRegister(.{ .v_, .cvtph2ps }, dst_reg, dst_reg); if (src_mcv.isMemory()) try self.asmRegisterMemory( - .{ ._, .vcvtph2ps }, + .{ .v_, .cvtph2ps }, tmp_reg, src_mcv.mem(.qword), ) else try self.asmRegisterRegister( - .{ ._, .vcvtph2ps }, + .{ .v_, .cvtph2ps }, tmp_reg, (if (src_mcv.isRegister()) src_mcv.getReg().? @@ -6505,11 +6505,11 @@ fn genBinOp( ); try self.asmRegisterRegisterRegister( switch (air_tag) { - .add => .{ ._, .vaddps }, - .sub => .{ ._, .vsubps }, - .div_float, .div_trunc, .div_floor, .div_exact => .{ ._, .vdivps }, - .max => .{ ._, .vmaxps }, - .min => .{ ._, .vmaxps }, + .add => .{ .v_ps, .add }, + .sub => .{ .v_ps, .sub }, + .div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ps, .div }, + .max => .{ .v_ps, .max }, + .min => .{ .v_ps, .max }, else => unreachable, }, dst_reg, @@ -6517,7 +6517,7 @@ fn genBinOp( tmp_reg, ); try self.asmRegisterRegisterImmediate( - .{ ._, .vcvtps2ph }, + .{ .v_, .cvtps2ph }, dst_reg, dst_reg, Immediate.u(0b1_00), @@ -6529,13 +6529,13 @@ fn genBinOp( const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); defer self.register_manager.unlockReg(tmp_lock); - try self.asmRegisterRegister(.{ ._, .vcvtph2ps }, dst_reg.to256(), dst_reg); + try self.asmRegisterRegister(.{ .v_, .cvtph2ps }, dst_reg.to256(), dst_reg); if (src_mcv.isMemory()) try self.asmRegisterMemory( - .{ ._, .vcvtph2ps }, + .{ .v_, .cvtph2ps }, tmp_reg, src_mcv.mem(.xword), ) else try self.asmRegisterRegister( - .{ ._, .vcvtph2ps }, + .{ .v_, .cvtph2ps }, tmp_reg, (if (src_mcv.isRegister()) src_mcv.getReg().? @@ -6544,11 +6544,11 @@ fn genBinOp( ); try self.asmRegisterRegisterRegister( switch (air_tag) { - .add => .{ ._, .vaddps }, - .sub => .{ ._, .vsubps }, - .div_float, .div_trunc, .div_floor, .div_exact => .{ ._, .vdivps }, - .max => .{ ._, .vmaxps }, - .min => .{ ._, .vmaxps }, + .add => .{ .v_ps, .add }, + .sub => .{ .v_ps, .sub }, + .div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ps, .div }, + .max => .{ .v_ps, .max }, + .min => .{ .v_ps, .max }, else => unreachable, }, dst_reg.to256(), @@ -6556,7 +6556,7 @@ fn genBinOp( tmp_reg, ); try self.asmRegisterRegisterImmediate( - .{ ._, .vcvtps2ph }, + .{ .v_, .cvtps2ph }, dst_reg, dst_reg.to256(), Immediate.u(0b1_00), @@ -6567,76 +6567,76 @@ fn genBinOp( } else null, 32 => switch (lhs_ty.vectorLen()) { 1 => switch (air_tag) { - .add => if (self.hasFeature(.avx)) .{ ._, .vaddss } else .{ ._, .addss }, - .sub => if (self.hasFeature(.avx)) .{ ._, .vsubss } else .{ ._, .subss }, - .mul => if (self.hasFeature(.avx)) .{ ._, .vmulss } else .{ ._, .mulss }, + .add => if (self.hasFeature(.avx)) .{ .v_ss, .add } else .{ ._ss, .add }, + .sub => if (self.hasFeature(.avx)) .{ .v_ss, .sub } else .{ ._ss, .sub }, + .mul => if (self.hasFeature(.avx)) .{ .v_ss, .mul } else .{ ._ss, .mul }, .div_float, .div_trunc, .div_floor, .div_exact, - => if (self.hasFeature(.avx)) .{ ._, .vdivss } else .{ ._, .divss }, - .max => if (self.hasFeature(.avx)) .{ ._, .vmaxss } else .{ ._, .maxss }, - .min => if (self.hasFeature(.avx)) .{ ._, .vminss } else .{ ._, .minss }, + => if (self.hasFeature(.avx)) .{ .v_ss, .div } else .{ ._ss, .div }, + .max => if (self.hasFeature(.avx)) .{ .v_ss, .max } else .{ ._ss, .max }, + .min => if (self.hasFeature(.avx)) .{ .v_ss, .min } else .{ ._ss, .min }, else => unreachable, }, 2...4 => switch (air_tag) { - .add => if (self.hasFeature(.avx)) .{ ._, .vaddps } else .{ ._, .addps }, - .sub => if (self.hasFeature(.avx)) .{ ._, .vsubps } else .{ ._, .subps }, - .mul => if (self.hasFeature(.avx)) .{ ._, .vmulps } else .{ ._, .mulps }, + .add => if (self.hasFeature(.avx)) .{ .v_ps, .add } else .{ ._ps, .add }, + .sub => if (self.hasFeature(.avx)) .{ .v_ps, .sub } else .{ ._ps, .sub }, + .mul => if (self.hasFeature(.avx)) .{ .v_ps, .mul } else .{ ._ps, .mul }, .div_float, .div_trunc, .div_floor, .div_exact, - => if (self.hasFeature(.avx)) .{ ._, .vdivps } else .{ ._, .divps }, - .max => if (self.hasFeature(.avx)) .{ ._, .vmaxps } else .{ ._, .maxps }, - .min => if (self.hasFeature(.avx)) .{ ._, .vminps } else .{ ._, .minps }, + => if (self.hasFeature(.avx)) .{ .v_ps, .div } else .{ ._ps, .div }, + .max => if (self.hasFeature(.avx)) .{ .v_ps, .max } else .{ ._ps, .max }, + .min => if (self.hasFeature(.avx)) .{ .v_ps, .min } else .{ ._ps, .min }, else => unreachable, }, 5...8 => if (self.hasFeature(.avx)) switch (air_tag) { - .add => .{ ._, .vaddps }, - .sub => .{ ._, .vsubps }, - .mul => .{ ._, .vmulps }, - .div_float, .div_trunc, .div_floor, .div_exact => .{ ._, .vdivps }, - .max => .{ ._, .vmaxps }, - .min => .{ ._, .vminps }, + .add => .{ .v_ps, .add }, + .sub => .{ .v_ps, .sub }, + .mul => .{ .v_ps, .mul }, + .div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ps, .div }, + .max => .{ .v_ps, .max }, + .min => .{ .v_ps, .min }, else => unreachable, } else null, else => null, }, 64 => switch (lhs_ty.vectorLen()) { 1 => switch (air_tag) { - .add => if (self.hasFeature(.avx)) .{ ._, .vaddsd } else .{ ._, .addsd }, - .sub => if (self.hasFeature(.avx)) .{ ._, .vsubsd } else .{ ._, .subsd }, - .mul => if (self.hasFeature(.avx)) .{ ._, .vmulsd } else .{ ._, .mulsd }, + .add => if (self.hasFeature(.avx)) .{ .v_sd, .add } else .{ ._sd, .add }, + .sub => if (self.hasFeature(.avx)) .{ .v_sd, .sub } else .{ ._sd, .sub }, + .mul => if (self.hasFeature(.avx)) .{ .v_sd, .mul } else .{ ._sd, .mul }, .div_float, .div_trunc, .div_floor, .div_exact, - => if (self.hasFeature(.avx)) .{ ._, .vdivsd } else .{ ._, .divsd }, - .max => if (self.hasFeature(.avx)) .{ ._, .vmaxsd } else .{ ._, .maxsd }, - .min => if (self.hasFeature(.avx)) .{ ._, .vminsd } else .{ ._, .minsd }, + => if (self.hasFeature(.avx)) .{ .v_sd, .div } else .{ ._sd, .div }, + .max => if (self.hasFeature(.avx)) .{ .v_sd, .max } else .{ ._sd, .max }, + .min => if (self.hasFeature(.avx)) .{ .v_sd, .min } else .{ ._sd, .min }, else => unreachable, }, 2 => switch (air_tag) { - .add => if (self.hasFeature(.avx)) .{ ._, .vaddpd } else .{ ._, .addpd }, - .sub => if (self.hasFeature(.avx)) .{ ._, .vsubpd } else .{ ._, .subpd }, - .mul => if (self.hasFeature(.avx)) .{ ._, .vmulpd } else .{ ._, .mulpd }, + .add => if (self.hasFeature(.avx)) .{ .v_pd, .add } else .{ ._pd, .add }, + .sub => if (self.hasFeature(.avx)) .{ .v_pd, .sub } else .{ ._pd, .sub }, + .mul => if (self.hasFeature(.avx)) .{ .v_pd, .mul } else .{ ._pd, .mul }, .div_float, .div_trunc, .div_floor, .div_exact, - => if (self.hasFeature(.avx)) .{ ._, .vdivpd } else .{ ._, .divpd }, - .max => if (self.hasFeature(.avx)) .{ ._, .vmaxpd } else .{ ._, .maxpd }, - .min => if (self.hasFeature(.avx)) .{ ._, .vminpd } else .{ ._, .minpd }, + => if (self.hasFeature(.avx)) .{ .v_pd, .div } else .{ ._pd, .div }, + .max => if (self.hasFeature(.avx)) .{ .v_pd, .max } else .{ ._pd, .max }, + .min => if (self.hasFeature(.avx)) .{ .v_pd, .min } else .{ ._pd, .min }, else => unreachable, }, 3...4 => if (self.hasFeature(.avx)) switch (air_tag) { - .add => .{ ._, .vaddpd }, - .sub => .{ ._, .vsubpd }, - .mul => .{ ._, .vmulpd }, - .div_float, .div_trunc, .div_floor, .div_exact => .{ ._, .vdivpd }, - .max => .{ ._, .vmaxpd }, - .min => .{ ._, .vminpd }, + .add => .{ .v_pd, .add }, + .sub => .{ .v_pd, .sub }, + .mul => .{ .v_pd, .mul }, + .div_float, .div_trunc, .div_floor, .div_exact => .{ .v_pd, .div }, + .max => .{ .v_pd, .max }, + .min => .{ .v_pd, .min }, else => unreachable, } else null, else => null, @@ -7563,13 +7563,13 @@ fn airCmp(self: *Self, inst: Air.Inst.Index, op: math.CompareOperator) !void { defer self.register_manager.unlockReg(tmp2_lock); if (src_mcv.isMemory()) try self.asmRegisterRegisterMemoryImmediate( - .{ ._, .vpinsrw }, + .{ .vp_w, .insr }, tmp1_reg, dst_reg.to128(), src_mcv.mem(.word), Immediate.u(1), ) else try self.asmRegisterRegisterRegister( - .{ ._, .vpunpcklwd }, + .{ .vp_, .unpcklwd }, tmp1_reg, dst_reg.to128(), (if (src_mcv.isRegister()) @@ -7577,20 +7577,20 @@ fn airCmp(self: *Self, inst: Air.Inst.Index, op: math.CompareOperator) !void { else try self.copyToTmpRegister(ty, src_mcv)).to128(), ); - try self.asmRegisterRegister(.{ ._, .vcvtph2ps }, tmp1_reg, tmp1_reg); - try self.asmRegisterRegister(.{ ._, .vmovshdup }, tmp2_reg, tmp1_reg); - try self.genBinOpMir(.{ ._, .ucomiss }, ty, tmp1_mcv, tmp2_mcv); + try self.asmRegisterRegister(.{ .v_, .cvtph2ps }, tmp1_reg, tmp1_reg); + try self.asmRegisterRegister(.{ .v_, .movshdup }, tmp2_reg, tmp1_reg); + try self.genBinOpMir(.{ ._ss, .ucomi }, ty, tmp1_mcv, tmp2_mcv); } else return self.fail("TODO implement airCmp for {}", .{ ty.fmt(self.bin_file.options.module.?), }), 32 => try self.genBinOpMir( - .{ ._, .ucomiss }, + .{ ._ss, .ucomi }, ty, .{ .register = dst_reg }, src_mcv, ), 64 => try self.genBinOpMir( - .{ ._, .ucomisd }, + .{ ._sd, .ucomi }, ty, .{ .register = dst_reg }, src_mcv, @@ -8573,42 +8573,42 @@ fn movMirTag(self: *Self, ty: Type, aligned: bool) !Mir.Inst.FixedTag { else => return .{ ._, .mov }, .Float => switch (ty.floatBits(self.target.*)) { 16 => unreachable, // needs special handling - 32 => return if (self.hasFeature(.avx)) .{ ._, .vmovss } else .{ ._, .movss }, - 64 => return if (self.hasFeature(.avx)) .{ ._, .vmovsd } else .{ ._, .movsd }, + 32 => return if (self.hasFeature(.avx)) .{ .v_ss, .mov } else .{ ._ss, .mov }, + 64 => return if (self.hasFeature(.avx)) .{ .v_sd, .mov } else .{ ._sd, .mov }, 128 => return if (self.hasFeature(.avx)) - if (aligned) .{ ._, .vmovaps } else .{ ._, .vmovups } - else if (aligned) .{ ._, .movaps } else .{ ._, .movups }, + if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu } + else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu }, else => {}, }, .Vector => switch (ty.childType().zigTypeTag()) { .Float => switch (ty.childType().floatBits(self.target.*)) { 16 => switch (ty.vectorLen()) { 1 => unreachable, // needs special handling - 2 => return if (self.hasFeature(.avx)) .{ ._, .vmovss } else .{ ._, .movss }, - 3...4 => return if (self.hasFeature(.avx)) .{ ._, .vmovsd } else .{ ._, .movsd }, + 2 => return if (self.hasFeature(.avx)) .{ .v_ss, .mov } else .{ ._ss, .mov }, + 3...4 => return if (self.hasFeature(.avx)) .{ .v_sd, .mov } else .{ ._sd, .mov }, 5...8 => return if (self.hasFeature(.avx)) - if (aligned) .{ ._, .vmovaps } else .{ ._, .vmovups } - else if (aligned) .{ ._, .movaps } else .{ ._, .movups }, + if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu } + else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu }, 9...16 => if (self.hasFeature(.avx)) - return if (aligned) .{ ._, .vmovaps } else .{ ._, .vmovups }, + return if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu }, else => {}, }, 32 => switch (ty.vectorLen()) { - 1 => return if (self.hasFeature(.avx)) .{ ._, .vmovss } else .{ ._, .movss }, + 1 => return if (self.hasFeature(.avx)) .{ .v_ss, .mov } else .{ ._ss, .mov }, 2...4 => return if (self.hasFeature(.avx)) - if (aligned) .{ ._, .vmovaps } else .{ ._, .vmovups } - else if (aligned) .{ ._, .movaps } else .{ ._, .movups }, + if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu } + else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu }, 5...8 => if (self.hasFeature(.avx)) - return if (aligned) .{ ._, .vmovaps } else .{ ._, .vmovups }, + return if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu }, else => {}, }, 64 => switch (ty.vectorLen()) { - 1 => return if (self.hasFeature(.avx)) .{ ._, .vmovsd } else .{ ._, .movsd }, + 1 => return if (self.hasFeature(.avx)) .{ .v_sd, .mov } else .{ ._sd, .mov }, 2 => return if (self.hasFeature(.avx)) - if (aligned) .{ ._, .vmovaps } else .{ ._, .vmovups } - else if (aligned) .{ ._, .movaps } else .{ ._, .movups }, + if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu } + else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu }, 3...4 => if (self.hasFeature(.avx)) - return if (aligned) .{ ._, .vmovaps } else .{ ._, .vmovups }, + return if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu }, else => {}, }, else => {}, @@ -8724,11 +8724,11 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr if ((dst_reg.class() == .floating_point) == (src_reg.class() == .floating_point)) switch (ty.zigTypeTag()) { else => .{ ._, .mov }, - .Float, .Vector => .{ ._, .movaps }, + .Float, .Vector => .{ ._ps, .mova }, } else switch (abi_size) { 2 => return try self.asmRegisterRegisterImmediate( - if (dst_reg.class() == .floating_point) .{ ._, .pinsrw } else .{ ._, .pextrw }, + if (dst_reg.class() == .floating_point) .{ .p_w, .insr } else .{ .p_w, .extr }, registerAlias(dst_reg, 4), registerAlias(src_reg, 4), Immediate.u(0), @@ -8761,7 +8761,7 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr }); if (ty.isRuntimeFloat() and ty.floatBits(self.target.*) == 16) try self.asmRegisterMemoryImmediate( - .{ ._, .pinsrw }, + .{ .p_w, .insr }, registerAlias(dst_reg, abi_size), src_mem, Immediate.u(0), @@ -8794,7 +8794,7 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr }); return if (ty.isRuntimeFloat() and ty.floatBits(self.target.*) == 16) self.asmRegisterMemoryImmediate( - .{ ._, .pinsrw }, + .{ .p_w, .insr }, registerAlias(dst_reg, abi_size), src_mem, Immediate.u(0), @@ -8838,7 +8838,7 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr }); if (ty.isRuntimeFloat() and ty.floatBits(self.target.*) == 16) try self.asmRegisterMemoryImmediate( - .{ ._, .pinsrw }, + .{ .p_w, .insr }, registerAlias(dst_reg, abi_size), src_mem, Immediate.u(0), @@ -8952,7 +8952,7 @@ fn genSetMem(self: *Self, base: Memory.Base, disp: i32, ty: Type, src_mcv: MCVal ); if (ty.isRuntimeFloat() and ty.floatBits(self.target.*) == 16) try self.asmMemoryRegisterImmediate( - .{ ._, .pextrw }, + .{ .p_w, .extr }, dst_mem, src_reg.to128(), Immediate.u(0), @@ -9069,7 +9069,7 @@ fn genInlineMemcpyRegisterRegister( try self.asmMemoryRegister( switch (src_reg.class()) { .general_purpose, .segment => .{ ._, .mov }, - .floating_point => .{ ._, .movss }, + .floating_point => .{ ._ss, .mov }, }, Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ .base = dst_reg, .disp = -offset }), registerAlias(src_reg, abi_size), @@ -10197,21 +10197,21 @@ fn airMulAdd(self: *Self, inst: Air.Inst.Index) !void { if (mem.eql(u2, &order, &.{ 1, 3, 2 }) or mem.eql(u2, &order, &.{ 3, 1, 2 })) switch (ty.zigTypeTag()) { .Float => switch (ty.floatBits(self.target.*)) { - 32 => .{ ._, .vfmadd132ss }, - 64 => .{ ._, .vfmadd132sd }, + 32 => .{ .v_ss, .fmadd132 }, + 64 => .{ .v_sd, .fmadd132 }, 16, 80, 128 => null, else => unreachable, }, .Vector => switch (ty.childType().zigTypeTag()) { .Float => switch (ty.childType().floatBits(self.target.*)) { 32 => switch (ty.vectorLen()) { - 1 => .{ ._, .vfmadd132ss }, - 2...8 => .{ ._, .vfmadd132ps }, + 1 => .{ .v_ss, .fmadd132 }, + 2...8 => .{ .v_ps, .fmadd132 }, else => null, }, 64 => switch (ty.vectorLen()) { - 1 => .{ ._, .vfmadd132sd }, - 2...4 => .{ ._, .vfmadd132pd }, + 1 => .{ .v_sd, .fmadd132 }, + 2...4 => .{ .v_pd, .fmadd132 }, else => null, }, 16, 80, 128 => null, @@ -10224,21 +10224,21 @@ fn airMulAdd(self: *Self, inst: Air.Inst.Index) !void { else if (mem.eql(u2, &order, &.{ 2, 1, 3 }) or mem.eql(u2, &order, &.{ 1, 2, 3 })) switch (ty.zigTypeTag()) { .Float => switch (ty.floatBits(self.target.*)) { - 32 => .{ ._, .vfmadd213ss }, - 64 => .{ ._, .vfmadd213sd }, + 32 => .{ .v_ss, .fmadd213 }, + 64 => .{ .v_sd, .fmadd213 }, 16, 80, 128 => null, else => unreachable, }, .Vector => switch (ty.childType().zigTypeTag()) { .Float => switch (ty.childType().floatBits(self.target.*)) { 32 => switch (ty.vectorLen()) { - 1 => .{ ._, .vfmadd213ss }, - 2...8 => .{ ._, .vfmadd213ps }, + 1 => .{ .v_ss, .fmadd213 }, + 2...8 => .{ .v_ps, .fmadd213 }, else => null, }, 64 => switch (ty.vectorLen()) { - 1 => .{ ._, .vfmadd213sd }, - 2...4 => .{ ._, .vfmadd213pd }, + 1 => .{ .v_sd, .fmadd213 }, + 2...4 => .{ .v_pd, .fmadd213 }, else => null, }, 16, 80, 128 => null, @@ -10251,21 +10251,21 @@ fn airMulAdd(self: *Self, inst: Air.Inst.Index) !void { else if (mem.eql(u2, &order, &.{ 2, 3, 1 }) or mem.eql(u2, &order, &.{ 3, 2, 1 })) switch (ty.zigTypeTag()) { .Float => switch (ty.floatBits(self.target.*)) { - 32 => .{ ._, .vfmadd231ss }, - 64 => .{ ._, .vfmadd231sd }, + 32 => .{ .v_ss, .fmadd231 }, + 64 => .{ .v_sd, .fmadd231 }, 16, 80, 128 => null, else => unreachable, }, .Vector => switch (ty.childType().zigTypeTag()) { .Float => switch (ty.childType().floatBits(self.target.*)) { 32 => switch (ty.vectorLen()) { - 1 => .{ ._, .vfmadd231ss }, - 2...8 => .{ ._, .vfmadd231ps }, + 1 => .{ .v_ss, .fmadd231 }, + 2...8 => .{ .v_ps, .fmadd231 }, else => null, }, 64 => switch (ty.vectorLen()) { - 1 => .{ ._, .vfmadd231sd }, - 2...4 => .{ ._, .vfmadd231pd }, + 1 => .{ .v_sd, .fmadd231 }, + 2...4 => .{ .v_pd, .fmadd231 }, else => null, }, 16, 80, 128 => null, diff --git a/src/arch/x86_64/Mir.zig b/src/arch/x86_64/Mir.zig index 6b5e2bded7..0a7b5597b3 100644 --- a/src/arch/x86_64/Mir.zig +++ b/src/arch/x86_64/Mir.zig @@ -278,8 +278,14 @@ pub const Inst = struct { /// Add with carry adc, /// Add + /// Add packed single-precision floating-point values + /// Add scalar single-precision floating-point values + /// Add packed double-precision floating-point values + /// Add scalar double-precision floating-point values add, /// Logical and + /// Bitwise logical and of packed single-precision floating-point values + /// Bitwise logical and of packed double-precision floating-point values @"and", /// Bit scan forward bsf, @@ -304,6 +310,8 @@ pub const Inst = struct { cmov, /// Logical compare /// Compare string + /// Compare scalar single-precision floating-point values + /// Compare scalar double-precision floating-point values cmp, /// Compare and exchange /// Compare and exchange bytes @@ -316,6 +324,10 @@ pub const Inst = struct { cwde, /// Unsigned division /// Signed division + /// Divide packed single-precision floating-point values + /// Divide scalar single-precision floating-point values + /// Divide packed double-precision floating-point values + /// Divide scalar double-precision floating-point values div, /// int3, @@ -339,6 +351,8 @@ pub const Inst = struct { mfence, /// Move /// Move data from string to string + /// Move scalar single-precision floating-point value + /// Move scalar double-precision floating-point value /// Move doubleword /// Move quadword mov, @@ -350,6 +364,10 @@ pub const Inst = struct { movzx, /// Multiply /// Signed multiplication + /// Multiply packed single-precision floating-point values + /// Multiply scalar single-precision floating-point values + /// Multiply packed double-precision floating-point values + /// Multiply scalar double-precision floating-point values mul, /// Two's complement negation neg, @@ -358,6 +376,8 @@ pub const Inst = struct { /// One's complement negation not, /// Logical or + /// Bitwise logical or of packed single-precision floating-point values + /// Bitwise logical or of packed double-precision floating-point values @"or", /// Pop pop, @@ -390,6 +410,10 @@ pub const Inst = struct { /// Double precision shift right sh, /// Subtract + /// Subtract packed single-precision floating-point values + /// Subtract scalar single-precision floating-point values + /// Subtract packed double-precision floating-point values + /// Subtract scalar double-precision floating-point values sub, /// Store string sto, @@ -406,145 +430,88 @@ pub const Inst = struct { /// Exchange register/memory with register xchg, /// Logical exclusive-or + /// Bitwise logical xor of packed single-precision floating-point values + /// Bitwise logical xor of packed double-precision floating-point values xor, - /// Add packed single-precision floating-point values - addps, - /// Add scalar single-precision floating-point values - addss, - /// Bitwise logical and of packed single precision floating-point values - andps, - /// Bitwise logical and not of packed single precision floating-point values - andnps, - /// Compare scalar single-precision floating-point values - cmpss, + /// Bitwise logical and not of packed single-precision floating-point values + /// Bitwise logical and not of packed double-precision floating-point values + andn, /// Convert doubleword integer to scalar single-precision floating-point value cvtsi2ss, - /// Divide packed single-precision floating-point values - divps, - /// Divide scalar single-precision floating-point values - divss, /// Maximum of packed single-precision floating-point values - maxps, /// Maximum of scalar single-precision floating-point values - maxss, + /// Maximum of packed double-precision floating-point values + /// Maximum of scalar double-precision floating-point values + max, /// Minimum of packed single-precision floating-point values - minps, /// Minimum of scalar single-precision floating-point values - minss, + /// Minimum of packed double-precision floating-point values + /// Minimum of scalar double-precision floating-point values + min, /// Move aligned packed single-precision floating-point values - movaps, + /// Move aligned packed double-precision floating-point values + mova, /// Move packed single-precision floating-point values high to low - movhlps, - /// Move scalar single-precision floating-point value - movss, + movhl, /// Move unaligned packed single-precision floating-point values - movups, - /// Multiply packed single-precision floating-point values - mulps, - /// Multiply scalar single-precision floating-point values - mulss, - /// Bitwise logical or of packed single precision floating-point values - orps, + /// Move unaligned packed double-precision floating-point values + movu, + /// Extract byte /// Extract word - pextrw, + /// Extract doubleword + /// Extract quadword + extr, + /// Insert byte /// Insert word - pinsrw, + /// Insert doubleword + /// Insert quadword + insr, /// Square root of packed single-precision floating-point values - sqrtps, /// Square root of scalar single-precision floating-point value - sqrtss, - /// Subtract packed single-precision floating-point values - subps, - /// Subtract scalar single-precision floating-point values - subss, + /// Square root of packed double-precision floating-point values + /// Square root of scalar double-precision floating-point value + sqrt, /// Unordered compare scalar single-precision floating-point values - ucomiss, + /// Unordered compare scalar double-precision floating-point values + ucomi, /// Unpack and interleave high packed single-precision floating-point values - unpckhps, + /// Unpack and interleave high packed double-precision floating-point values + unpckh, /// Unpack and interleave low packed single-precision floating-point values - unpcklps, - /// Bitwise logical xor of packed single precision floating-point values - xorps, + /// Unpack and interleave low packed double-precision floating-point values + unpckl, - /// Add packed double-precision floating-point values - addpd, - /// Add scalar double-precision floating-point values - addsd, - /// Bitwise logical and not of packed double precision floating-point values - andnpd, - /// Bitwise logical and of packed double precision floating-point values - andpd, - /// Compare scalar double-precision floating-point values - cmpsd, /// Convert scalar double-precision floating-point value to scalar single-precision floating-point value cvtsd2ss, /// Convert doubleword integer to scalar double-precision floating-point value cvtsi2sd, /// Convert scalar single-precision floating-point value to scalar double-precision floating-point value cvtss2sd, - /// Divide packed double-precision floating-point values - divpd, - /// Divide scalar double-precision floating-point values - divsd, - /// Maximum of packed double-precision floating-point values - maxpd, - /// Maximum of scalar double-precision floating-point values - maxsd, - /// Minimum of packed double-precision floating-point values - minpd, - /// Minimum of scalar double-precision floating-point values - minsd, - /// Move scalar double-precision floating-point value - movsd, - /// Multiply packed double-precision floating-point values - mulpd, - /// Multiply scalar double-precision floating-point values - mulsd, - /// Bitwise logical or of packed double precision floating-point values - orpd, /// Shuffle packed high words - pshufhw, + shufh, /// Shuffle packed low words - pshuflw, + shufl, /// Shift packed data right logical - psrld, /// Shift packed data right logical - psrlq, /// Shift packed data right logical - psrlw, + srl, /// Unpack high data - punpckhbw, + unpckhbw, /// Unpack high data - punpckhdq, + unpckhdq, /// Unpack high data - punpckhqdq, + unpckhqdq, /// Unpack high data - punpckhwd, + unpckhwd, /// Unpack low data - punpcklbw, + unpcklbw, /// Unpack low data - punpckldq, + unpckldq, /// Unpack low data - punpcklqdq, + unpcklqdq, /// Unpack low data - punpcklwd, - /// Square root of double precision floating-point values - sqrtpd, - /// Square root of scalar double precision floating-point value - sqrtsd, - /// Subtract packed double-precision floating-point values - subpd, - /// Subtract scalar double-precision floating-point values - subsd, - /// Unordered compare scalar double-precision floating-point values - ucomisd, - /// Unpack and interleave high packed double-precision floating-point values - unpckhpd, - /// Unpack and interleave low packed double-precision floating-point values - unpcklpd, - /// Bitwise logical xor of packed double precision floating-point values - xorpd, + unpcklwd, /// Replicate double floating-point values movddup, @@ -553,199 +520,32 @@ pub const Inst = struct { /// Replicate single floating-point values movsldup, - /// Extract Byte - pextrb, - /// Extract Doubleword - pextrd, - /// Extract Quadword - pextrq, - /// Insert Byte - pinsrb, - /// Insert Doubleword - pinsrd, - /// Insert Quadword - pinsrq, - /// Round packed double-precision floating-point values - roundpd, /// Round packed single-precision floating-point values - roundps, - /// Round scalar double-precision floating-point value - roundsd, /// Round scalar single-precision floating-point value - roundss, - - /// Add packed double-precision floating-point values - vaddpd, - /// Add packed single-precision floating-point values - vaddps, - /// Add scalar double-precision floating-point values - vaddsd, - /// Add scalar single-precision floating-point values - vaddss, - /// Convert scalar double-precision floating-point value to scalar single-precision floating-point value - vcvtsd2ss, - /// Convert doubleword integer to scalar double-precision floating-point value - vcvtsi2sd, - /// Convert doubleword integer to scalar single-precision floating-point value - vcvtsi2ss, - /// Convert scalar single-precision floating-point value to scalar double-precision floating-point value - vcvtss2sd, - /// Divide packed double-precision floating-point values - vdivpd, - /// Divide packed single-precision floating-point values - vdivps, - /// Divide scalar double-precision floating-point values - vdivsd, - /// Divide scalar single-precision floating-point values - vdivss, - /// Maximum of packed double-precision floating-point values - vmaxpd, - /// Maximum of packed single-precision floating-point values - vmaxps, - /// Maximum of scalar double-precision floating-point values - vmaxsd, - /// Maximum of scalar single-precision floating-point values - vmaxss, - /// Minimum of packed double-precision floating-point values - vminpd, - /// Minimum of packed single-precision floating-point values - vminps, - /// Minimum of scalar double-precision floating-point values - vminsd, - /// Minimum of scalar single-precision floating-point values - vminss, - /// Move aligned packed double-precision floating-point values - vmovapd, - /// Move aligned packed single-precision floating-point values - vmovaps, - /// Move packed single-precision floating-point values high to low - vmovhlps, - /// Replicate double floating-point values - vmovddup, - /// Move or merge scalar double-precision floating-point value - vmovsd, - /// Replicate single floating-point values - vmovshdup, - /// Replicate single floating-point values - vmovsldup, - /// Move or merge scalar single-precision floating-point value - vmovss, - /// Move unaligned packed double-precision floating-point values - vmovupd, - /// Move unaligned packed single-precision floating-point values - vmovups, - /// Multiply packed double-precision floating-point values - vmulpd, - /// Multiply packed single-precision floating-point values - vmulps, - /// Multiply scalar double-precision floating-point values - vmulsd, - /// Multiply scalar single-precision floating-point values - vmulss, - /// Extract Byte - vpextrb, - /// Extract Doubleword - vpextrd, - /// Extract Quadword - vpextrq, - /// Extract word - vpextrw, - /// Insert Byte - vpinsrb, - /// Insert Doubleword - vpinsrd, - /// Insert Quadword - vpinsrq, - /// Insert word - vpinsrw, - /// Shuffle packed high words - vpshufhw, - /// Shuffle packed low words - vpshuflw, - /// Shift packed data right logical - vpsrld, - /// Shift packed data right logical - vpsrlq, - /// Shift packed data right logical - vpsrlw, - /// Unpack high data - vpunpckhbw, - /// Unpack high data - vpunpckhdq, - /// Unpack high data - vpunpckhqdq, - /// Unpack high data - vpunpckhwd, - /// Unpack low data - vpunpcklbw, - /// Unpack low data - vpunpckldq, - /// Unpack low data - vpunpcklqdq, - /// Unpack low data - vpunpcklwd, /// Round packed double-precision floating-point values - vroundpd, - /// Round packed single-precision floating-point values - vroundps, /// Round scalar double-precision floating-point value - vroundsd, - /// Round scalar single-precision floating-point value - vroundss, - /// Square root of packed double-precision floating-point value - vsqrtpd, - /// Square root of packed single-precision floating-point value - vsqrtps, - /// Square root of scalar double-precision floating-point value - vsqrtsd, - /// Square root of scalar single-precision floating-point value - vsqrtss, - /// Subtract packed double-precision floating-point values - vsubpd, - /// Subtract packed single-precision floating-point values - vsubps, - /// Subtract scalar double-precision floating-point values - vsubsd, - /// Subtract scalar single-precision floating-point values - vsubss, - /// Unpack and interleave high packed double-precision floating-point values - vunpckhpd, - /// Unpack and interleave high packed single-precision floating-point values - vunpckhps, - /// Unpack and interleave low packed double-precision floating-point values - vunpcklpd, - /// Unpack and interleave low packed single-precision floating-point values - vunpcklps, + round, /// Convert 16-bit floating-point values to single-precision floating-point values - vcvtph2ps, + cvtph2ps, /// Convert single-precision floating-point values to 16-bit floating-point values - vcvtps2ph, + cvtps2ph, - /// Fused multiply-add of packed double-precision floating-point values - vfmadd132pd, - /// Fused multiply-add of packed double-precision floating-point values - vfmadd213pd, - /// Fused multiply-add of packed double-precision floating-point values - vfmadd231pd, /// Fused multiply-add of packed single-precision floating-point values - vfmadd132ps, + /// Fused multiply-add of scalar single-precision floating-point values + /// Fused multiply-add of packed double-precision floating-point values + /// Fused multiply-add of scalar double-precision floating-point values + fmadd132, /// Fused multiply-add of packed single-precision floating-point values - vfmadd213ps, + /// Fused multiply-add of scalar single-precision floating-point values + /// Fused multiply-add of packed double-precision floating-point values + /// Fused multiply-add of scalar double-precision floating-point values + fmadd213, /// Fused multiply-add of packed single-precision floating-point values - vfmadd231ps, - /// Fused multiply-add of scalar double-precision floating-point values - vfmadd132sd, - /// Fused multiply-add of scalar double-precision floating-point values - vfmadd213sd, - /// Fused multiply-add of scalar double-precision floating-point values - vfmadd231sd, /// Fused multiply-add of scalar single-precision floating-point values - vfmadd132ss, - /// Fused multiply-add of scalar single-precision floating-point values - vfmadd213ss, - /// Fused multiply-add of scalar single-precision floating-point values - vfmadd231ss, + /// Fused multiply-add of packed double-precision floating-point values + /// Fused multiply-add of scalar double-precision floating-point values + fmadd231, /// A pseudo instruction that requires special lowering. /// This should be the only tag in this enum that doesn't