From 8ee80d61f6b2355f590de9722172b93a55c4f563 Mon Sep 17 00:00:00 2001 From: Jacob Young Date: Fri, 17 Jan 2025 22:38:21 -0500 Subject: [PATCH] x86_64: add a bunch of instruction encodings Closes #19773 --- lib/std/zig/system/x86.zig | 1 + src/arch/x86_64/CodeGen.zig | 450 +++++++++++-------- src/arch/x86_64/Disassembler.zig | 17 +- src/arch/x86_64/Encoding.zig | 281 ++++++++---- src/arch/x86_64/Lower.zig | 16 +- src/arch/x86_64/Mir.zig | 603 ++++++++++++++++++++++---- src/arch/x86_64/bits.zig | 27 +- src/arch/x86_64/encoder.zig | 42 +- src/arch/x86_64/encodings.zig | 723 ++++++++++++++++++++++++++----- 9 files changed, 1672 insertions(+), 488 deletions(-) diff --git a/lib/std/zig/system/x86.zig b/lib/std/zig/system/x86.zig index 2737c67d0c..d101743a22 100644 --- a/lib/std/zig/system/x86.zig +++ b/lib/std/zig/system/x86.zig @@ -419,6 +419,7 @@ fn detectNativeFeatures(cpu: *Target.Cpu, os_tag: Target.Os.Tag) void { // detecting features using the "-march=native" flag. // For more info, see X86 ISA docs. setFeature(cpu, .pconfig, bit(leaf.edx, 18)); + setFeature(cpu, .uintr, bit(leaf.edx, 5)); // TODO I feel unsure about this check. // It doesn't really seem to check for 7.1, just for 7. diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index f30ecf34df..19c0acc2d4 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -1375,6 +1375,14 @@ fn asmOps(self: *CodeGen, tag: Mir.Inst.FixedTag, ops: [4]Operand) !void { }, .imm => |imm0| switch (ops[1]) { .none => self.asmImmediate(tag, imm0), + .reg => |reg1| switch (ops[2]) { + .none => self.asmImmediateRegister(tag, imm0, reg1), + else => error.InvalidInstruction, + }, + .imm => |imm1| switch (ops[2]) { + .none => self.asmImmediateImmediate(tag, imm0, imm1), + else => error.InvalidInstruction, + }, else => error.InvalidInstruction, }, .inst => |inst0| switch (ops[1]) { @@ -1491,9 +1499,10 @@ fn asmSetccMemory(self: *CodeGen, cc: Condition, m: Memory) !void { fn asmJmpReloc(self: *CodeGen, target: Mir.Inst.Index) !Mir.Inst.Index { return self.addInst(.{ - .tag = .jmp, + .tag = .j, .ops = .inst, .data = .{ .inst = .{ + .fixes = ._mp, .inst = target, } }, }); @@ -1753,6 +1762,42 @@ fn asmImmediate(self: *CodeGen, tag: Mir.Inst.FixedTag, imm: Immediate) !void { }); } +fn asmImmediateRegister(self: *CodeGen, tag: Mir.Inst.FixedTag, imm: Immediate, reg: Register) !void { + _ = try self.addInst(.{ + .tag = tag[1], + .ops = .ir, + .data = .{ .ri = .{ + .fixes = tag[0], + .r1 = reg, + .i = @as(u8, switch (imm) { + .signed => |s| @bitCast(@as(i8, @intCast(s))), + .unsigned => |u| @intCast(u), + .reloc => unreachable, + }), + } }, + }); +} + +fn asmImmediateImmediate(self: *CodeGen, tag: Mir.Inst.FixedTag, imm1: Immediate, imm2: Immediate) !void { + _ = try self.addInst(.{ + .tag = tag[1], + .ops = .ii, + .data = .{ .ii = .{ + .fixes = tag[0], + .i1 = switch (imm1) { + .signed => |s| @bitCast(@as(i16, @intCast(s))), + .unsigned => |u| @intCast(u), + .reloc => unreachable, + }, + .i2 = switch (imm2) { + .signed => |s| @bitCast(@as(i8, @intCast(s))), + .unsigned => |u| @intCast(u), + .reloc => unreachable, + }, + } }, + }); +} + fn asmRegisterRegister(self: *CodeGen, tag: Mir.Inst.FixedTag, reg1: Register, reg2: Register) !void { _ = try self.addInst(.{ .tag = tag[1], @@ -4188,8 +4233,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { _ = try cg.asmJmpReloc(loop.target); }, .br => try cg.airBr(inst), - .trap => try cg.asmOpOnly(.{ ._, .ud2 }), - .breakpoint => try cg.asmOpOnly(.{ ._, .int3 }), + .trap => try cg.asmOpOnly(.{ ._2, .ud }), + .breakpoint => try cg.asmOpOnly(.{ ._3, .int }), .ret_addr => if (use_old) try cg.airRetAddr(inst) else { var slot = try cg.tempInit(.usize, .{ .load_frame = .{ .index = .ret_addr, @@ -4233,7 +4278,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .dst_temps = .{.{ .ref = .src0 }}, .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ - .{ ._, ._, .inc, .dst0b, ._, ._, ._ }, + .{ ._, ._c, .in, .dst0b, ._, ._, ._ }, } }, }, .{ .src_constraints = .{ .{ .exact_unsigned_int = 1 }, .any }, @@ -5643,7 +5688,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .lzcnt, .tmp1d, .tmp1d, ._, ._ }, .{ ._, ._, .sub, .tmp1b, .sia(32, .src0, .sub_bit_size), ._, ._ }, .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp1b, ._, ._ }, - .{ ._, ._, .inc, .tmp0p, ._, ._, ._ }, + .{ ._, ._c, .in, .tmp0p, ._, ._, ._ }, .{ ._, ._nz, .j, .@"0b", ._, ._, ._ }, } }, }, .{ @@ -5695,7 +5740,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .lzcnt, .tmp1d, .tmp1d, ._, ._ }, .{ ._, ._, .sub, .tmp1b, .sia(32, .src0, .sub_bit_size), ._, ._ }, .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp1b, ._, ._ }, - .{ ._, ._, .inc, .tmp0p, ._, ._, ._ }, + .{ ._, ._c, .in, .tmp0p, ._, ._, ._ }, .{ ._, ._nz, .j, .@"0b", ._, ._, ._ }, } }, }, .{ @@ -5747,7 +5792,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .lzcnt, .tmp1d, .tmp1d, ._, ._ }, .{ ._, ._, .sub, .tmp1b, .sia(32, .src0, .sub_bit_size), ._, ._ }, .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp1b, ._, ._ }, - .{ ._, ._, .inc, .tmp0p, ._, ._, ._ }, + .{ ._, ._c, .in, .tmp0p, ._, ._, ._ }, .{ ._, ._nz, .j, .@"0b", ._, ._, ._ }, } }, }, .{ @@ -5799,7 +5844,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .lzcnt, .tmp1q, .tmp1q, ._, ._ }, .{ ._, ._, .sub, .tmp1b, .sia(64, .src0, .sub_bit_size), ._, ._ }, .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp1b, ._, ._ }, - .{ ._, ._, .inc, .tmp0p, ._, ._, ._ }, + .{ ._, ._c, .in, .tmp0p, ._, ._, ._ }, .{ ._, ._nz, .j, .@"0b", ._, ._, ._ }, } }, }, .{ @@ -5857,7 +5902,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .mov, .tmp3b, .sia(-1, .src0, .add_bit_size), ._, ._ }, .{ ._, ._, .sub, .tmp3b, .tmp2b, ._, ._ }, .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp3b, ._, ._ }, - .{ ._, ._, .inc, .tmp0p, ._, ._, ._ }, + .{ ._, ._c, .in, .tmp0p, ._, ._, ._ }, .{ ._, ._nz, .j, .@"0b", ._, ._, ._ }, } }, }, .{ @@ -5915,7 +5960,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._c, .st, ._, ._, ._, ._ }, .{ ._, ._, .sbb, .tmp2b, .tmp1b, ._, ._ }, .{ .@"1:", ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp2b, ._, ._ }, - .{ ._, ._, .inc, .tmp0p, ._, ._, ._ }, + .{ ._, ._c, .in, .tmp0p, ._, ._, ._ }, .{ ._, ._nz, .j, .@"0b", ._, ._, ._ }, } }, }, .{ @@ -5970,7 +6015,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .mov, .tmp1b, .sia(-1, .src0, .add_bit_size), ._, ._ }, .{ ._, ._, .sub, .tmp1b, .tmp2b, ._, ._ }, .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp1b, ._, ._ }, - .{ ._, ._, .inc, .tmp0p, ._, ._, ._ }, + .{ ._, ._c, .in, .tmp0p, ._, ._, ._ }, .{ ._, ._nz, .j, .@"0b", ._, ._, ._ }, } }, }, .{ @@ -6028,7 +6073,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .mov, .tmp3b, .sia(-1, .src0, .add_bit_size), ._, ._ }, .{ ._, ._, .sub, .tmp3b, .tmp2b, ._, ._ }, .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp3b, ._, ._ }, - .{ ._, ._, .inc, .tmp0p, ._, ._, ._ }, + .{ ._, ._c, .in, .tmp0p, ._, ._, ._ }, .{ ._, ._nz, .j, .@"0b", ._, ._, ._ }, } }, }, .{ @@ -6086,7 +6131,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._c, .st, ._, ._, ._, ._ }, .{ ._, ._, .sbb, .tmp2b, .tmp1b, ._, ._ }, .{ .@"1:", ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp2b, ._, ._ }, - .{ ._, ._, .inc, .tmp0p, ._, ._, ._ }, + .{ ._, ._c, .in, .tmp0p, ._, ._, ._ }, .{ ._, ._nz, .j, .@"0b", ._, ._, ._ }, } }, }, .{ @@ -6141,7 +6186,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .mov, .tmp1b, .sia(-1, .src0, .add_bit_size), ._, ._ }, .{ ._, ._, .sub, .tmp1b, .tmp2b, ._, ._ }, .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp1b, ._, ._ }, - .{ ._, ._, .inc, .tmp0p, ._, ._, ._ }, + .{ ._, ._c, .in, .tmp0p, ._, ._, ._ }, .{ ._, ._nz, .j, .@"0b", ._, ._, ._ }, } }, }, .{ @@ -6199,7 +6244,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .mov, .tmp3b, .sia(-1, .src0, .add_bit_size), ._, ._ }, .{ ._, ._, .sub, .tmp3b, .tmp2b, ._, ._ }, .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp3b, ._, ._ }, - .{ ._, ._, .inc, .tmp0p, ._, ._, ._ }, + .{ ._, ._c, .in, .tmp0p, ._, ._, ._ }, .{ ._, ._nz, .j, .@"0b", ._, ._, ._ }, } }, }, .{ @@ -6257,7 +6302,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._c, .st, ._, ._, ._, ._ }, .{ ._, ._, .sbb, .tmp2b, .tmp1b, ._, ._ }, .{ .@"1:", ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp2b, ._, ._ }, - .{ ._, ._, .inc, .tmp0p, ._, ._, ._ }, + .{ ._, ._c, .in, .tmp0p, ._, ._, ._ }, .{ ._, ._nz, .j, .@"0b", ._, ._, ._ }, } }, }, .{ @@ -6312,7 +6357,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .mov, .tmp1b, .sia(-1, .src0, .add_bit_size), ._, ._ }, .{ ._, ._, .sub, .tmp1b, .tmp2b, ._, ._ }, .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp1b, ._, ._ }, - .{ ._, ._, .inc, .tmp0p, ._, ._, ._ }, + .{ ._, ._c, .in, .tmp0p, ._, ._, ._ }, .{ ._, ._nz, .j, .@"0b", ._, ._, ._ }, } }, }, .{ @@ -6370,7 +6415,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .mov, .tmp3b, .sia(-1, .src0, .add_bit_size), ._, ._ }, .{ ._, ._, .sub, .tmp3b, .tmp2b, ._, ._ }, .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp3b, ._, ._ }, - .{ ._, ._, .inc, .tmp0p, ._, ._, ._ }, + .{ ._, ._c, .in, .tmp0p, ._, ._, ._ }, .{ ._, ._nz, .j, .@"0b", ._, ._, ._ }, } }, }, .{ @@ -6428,7 +6473,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._c, .st, ._, ._, ._, ._ }, .{ ._, ._, .sbb, .tmp2b, .tmp1b, ._, ._ }, .{ .@"1:", ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp2b, ._, ._ }, - .{ ._, ._, .inc, .tmp0p, ._, ._, ._ }, + .{ ._, ._c, .in, .tmp0p, ._, ._, ._ }, .{ ._, ._nz, .j, .@"0b", ._, ._, ._ }, } }, }, .{ @@ -6484,7 +6529,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .mov, .tmp1b, .sia(-1, .src0, .add_bit_size), ._, ._ }, .{ ._, ._, .sub, .tmp1b, .tmp2b, ._, ._ }, .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp1b, ._, ._ }, - .{ ._, ._, .inc, .tmp0p, ._, ._, ._ }, + .{ ._, ._c, .in, .tmp0p, ._, ._, ._ }, .{ ._, ._nz, .j, .@"0b", ._, ._, ._ }, } }, }, .{ @@ -10094,7 +10139,7 @@ fn genLazy(self: *CodeGen, lazy_sym: link.File.LazySymbol) InnerError!void { data_off += @intCast(tag_name_len + 1); } - try self.asmOpOnly(.{ ._, .ud2 }); + try self.asmOpOnly(.{ ._2, .ud }); for (epilogue_relocs) |reloc| self.performReloc(reloc); try self.asmOpOnly(.{ ._, .ret }); @@ -10373,7 +10418,7 @@ fn regClassForType(self: *CodeGen, ty: Type) Register.Class { fn regSetForRegClass(rc: Register.Class) RegisterManager.RegisterBitSet { return switch (rc) { .general_purpose => abi.RegisterClass.gp, - .segment, .ip => unreachable, + .segment, .ip, .cr, .dr => unreachable, .x87 => abi.RegisterClass.x87, .mmx => @panic("TODO"), .sse => abi.RegisterClass.sse, @@ -12195,8 +12240,8 @@ fn airMulWithOverflow(self: *CodeGen, inst: Air.Inst.Index) !void { try self.asmRegisterImmediate(.{ ._, .add }, temp_regs[2].to32(), .u(1)); try self.asmRegisterImmediate(.{ ._, .add }, temp_regs[3].to32(), .u(1)); } else { - try self.asmRegister(.{ ._, .inc }, temp_regs[2].to32()); - try self.asmRegister(.{ ._, .inc }, temp_regs[3].to32()); + try self.asmRegister(.{ ._c, .in }, temp_regs[2].to32()); + try self.asmRegister(.{ ._c, .in }, temp_regs[3].to32()); } try self.asmRegisterImmediate(.{ ._, .cmp }, temp_regs[3].to32(), .u(limb_len)); _ = try self.asmJccReloc(.b, inner_loop); @@ -12209,7 +12254,7 @@ fn airMulWithOverflow(self: *CodeGen, inst: Air.Inst.Index) !void { if (slow_inc) { try self.asmRegisterImmediate(.{ ._, .add }, temp_regs[2].to32(), .u(1)); } else { - try self.asmRegister(.{ ._, .inc }, temp_regs[2].to32()); + try self.asmRegister(.{ ._c, .in }, temp_regs[2].to32()); } try self.asmMemoryImmediate(.{ ._, .cmp }, .{ .base = .{ .frame = lhs_mcv.load_frame.index }, @@ -12236,7 +12281,7 @@ fn airMulWithOverflow(self: *CodeGen, inst: Air.Inst.Index) !void { if (slow_inc) { try self.asmRegisterImmediate(.{ ._, .add }, temp_regs[0].to32(), .u(1)); } else { - try self.asmRegister(.{ ._, .inc }, temp_regs[0].to32()); + try self.asmRegister(.{ ._c, .in }, temp_regs[0].to32()); } try self.asmRegisterImmediate(.{ ._, .cmp }, temp_regs[0].to32(), .u(limb_len)); _ = try self.asmJccReloc(.b, outer_loop); @@ -13938,7 +13983,7 @@ fn airClz(self: *CodeGen, inst: Air.Inst.Index) !void { if (self.hasFeature(.slow_incdec)) { try self.asmRegisterImmediate(.{ ._, .sub }, index_reg.to32(), .u(1)); } else { - try self.asmRegister(.{ ._, .dec }, index_reg.to32()); + try self.asmRegister(.{ ._c, .de }, index_reg.to32()); } try self.asmMemoryImmediate(.{ ._, .cmp }, .{ .base = .{ .frame = src_frame_addr.index }, @@ -14133,7 +14178,7 @@ fn airCtz(self: *CodeGen, inst: Air.Inst.Index) !void { if (self.hasFeature(.slow_incdec)) { try self.asmRegisterImmediate(.{ ._, .add }, index_reg.to32(), .u(1)); } else { - try self.asmRegister(.{ ._, .inc }, index_reg.to32()); + try self.asmRegister(.{ ._c, .in }, index_reg.to32()); } try self.asmRegisterImmediate(.{ ._, .cmp }, index_reg.to32(), .u(limbs_len)); const zero = try self.asmJccReloc(.nb, undefined); @@ -14535,8 +14580,8 @@ fn genByteSwap( try self.asmRegisterImmediate(.{ ._, .add }, temp_regs[0].to32(), .u(1)); try self.asmRegisterImmediate(.{ ._, .sub }, temp_regs[1].to32(), .u(1)); } else { - try self.asmRegister(.{ ._, .inc }, temp_regs[0].to32()); - try self.asmRegister(.{ ._, .dec }, temp_regs[1].to32()); + try self.asmRegister(.{ ._c, .in }, temp_regs[0].to32()); + try self.asmRegister(.{ ._c, .de }, temp_regs[1].to32()); } try self.asmRegisterRegister(.{ ._, .cmp }, temp_regs[0].to32(), temp_regs[1].to32()); _ = try self.asmJccReloc(.be, loop); @@ -15113,7 +15158,7 @@ fn airAbs(self: *CodeGen, inst: Air.Inst.Index) !void { if (self.hasFeature(.slow_incdec)) { try self.asmRegisterImmediate(.{ ._, .add }, tmp_regs[0].to32(), .u(1)); } else { - try self.asmRegister(.{ ._, .inc }, tmp_regs[0].to32()); + try self.asmRegister(.{ ._c, .in }, tmp_regs[0].to32()); } try self.asmRegisterImmediate(.{ ._, .cmp }, tmp_regs[0].to32(), .u(limb_len)); _ = try self.asmJccReloc(.b, neg_loop); @@ -16452,8 +16497,8 @@ fn genShiftBinOpMir( try self.asmRegisterImmediate(.{ ._, .sub }, temp_regs[1].to32(), .u(1)); try self.asmRegisterImmediate(.{ ._, .sub }, temp_regs[0].to32(), .u(1)); } else { - try self.asmRegister(.{ ._, .dec }, temp_regs[1].to32()); - try self.asmRegister(.{ ._, .dec }, temp_regs[0].to32()); + try self.asmRegister(.{ ._c, .de }, temp_regs[1].to32()); + try self.asmRegister(.{ ._c, .de }, temp_regs[0].to32()); } _ = try self.asmJccReloc(.nz, loop); }, @@ -16462,8 +16507,8 @@ fn genShiftBinOpMir( try self.asmRegisterImmediate(.{ ._, .add }, temp_regs[1].to32(), .u(1)); try self.asmRegisterImmediate(.{ ._, .add }, temp_regs[0].to32(), .u(1)); } else { - try self.asmRegister(.{ ._, .inc }, temp_regs[1].to32()); - try self.asmRegister(.{ ._, .inc }, temp_regs[0].to32()); + try self.asmRegister(.{ ._c, .in }, temp_regs[1].to32()); + try self.asmRegister(.{ ._c, .in }, temp_regs[0].to32()); } try self.asmRegisterImmediate( .{ ._, .cmp }, @@ -16532,12 +16577,12 @@ fn genShiftBinOpMir( ._l => if (slow_inc_dec) { try self.asmRegisterImmediate(.{ ._, .sub }, temp_regs[1].to32(), .u(1)); } else { - try self.asmRegister(.{ ._, .dec }, temp_regs[1].to32()); + try self.asmRegister(.{ ._c, .de }, temp_regs[1].to32()); }, ._r => if (slow_inc_dec) { try self.asmRegisterImmediate(.{ ._, .add }, temp_regs[1].to32(), .u(1)); } else { - try self.asmRegister(.{ ._, .inc }, temp_regs[1].to32()); + try self.asmRegister(.{ ._c, .in }, temp_regs[1].to32()); }, else => unreachable, } @@ -17163,8 +17208,8 @@ fn genMulDivBinOp( try self.asmRegisterImmediate(.{ ._, .add }, temp_regs[2].to32(), .u(1)); try self.asmRegisterImmediate(.{ ._, .add }, temp_regs[3].to32(), .u(1)); } else { - try self.asmRegister(.{ ._, .inc }, temp_regs[2].to32()); - try self.asmRegister(.{ ._, .inc }, temp_regs[3].to32()); + try self.asmRegister(.{ ._c, .in }, temp_regs[2].to32()); + try self.asmRegister(.{ ._c, .in }, temp_regs[3].to32()); } try self.asmRegisterImmediate(.{ ._, .cmp }, temp_regs[3].to32(), .u(limb_len)); _ = try self.asmJccReloc(.b, inner_loop); @@ -17173,7 +17218,7 @@ fn genMulDivBinOp( if (slow_inc) { try self.asmRegisterImmediate(.{ ._, .add }, temp_regs[0].to32(), .u(1)); } else { - try self.asmRegister(.{ ._, .inc }, temp_regs[0].to32()); + try self.asmRegister(.{ ._c, .in }, temp_regs[0].to32()); } try self.asmRegisterImmediate(.{ ._, .cmp }, temp_regs[0].to32(), .u(limb_len)); _ = try self.asmJccReloc(.b, outer_loop); @@ -19765,7 +19810,7 @@ fn airArg(self: *CodeGen, inst: Air.Inst.Index) !void { if (self.hasFeature(.slow_incdec)) { try self.asmRegisterImmediate(.{ ._, .add }, index_reg.to32(), .u(1)); } else { - try self.asmRegister(.{ ._, .inc }, index_reg.to32()); + try self.asmRegister(.{ ._c, .in }, index_reg.to32()); } try self.asmRegisterImmediate( .{ ._, .cmp }, @@ -20042,7 +20087,7 @@ fn genCall(self: *CodeGen, info: union(enum) { if (self.hasFeature(.slow_incdec)) { try self.asmRegisterImmediate(.{ ._, .add }, index_reg.to32(), .u(1)); } else { - try self.asmRegister(.{ ._, .inc }, index_reg.to32()); + try self.asmRegister(.{ ._c, .in }, index_reg.to32()); } try self.asmRegisterImmediate( .{ ._, .cmp }, @@ -21423,7 +21468,7 @@ fn lowerSwitchBr( defer if (condition_index_lock) |lock| self.register_manager.unlockReg(lock); try self.truncateRegister(condition_ty, condition_index_reg); const ptr_size = @divExact(self.target.ptrBitWidth(), 8); - try self.asmMemory(.{ ._, .jmp }, .{ + try self.asmMemory(.{ ._mp, .j }, .{ .base = .table, .mod = .{ .rm = .{ .size = .ptr, @@ -21720,7 +21765,7 @@ fn airSwitchDispatch(self: *CodeGen, inst: Air.Inst.Index) !void { defer if (condition_index_lock) |lock| self.register_manager.unlockReg(lock); try self.truncateRegister(condition_ty, condition_index_reg); const ptr_size = @divExact(self.target.ptrBitWidth(), 8); - try self.asmMemory(.{ ._, .jmp }, .{ + try self.asmMemory(.{ ._mp, .j }, .{ .base = .table, .mod = .{ .rm = .{ .size = .ptr, @@ -21777,7 +21822,7 @@ fn airSwitchDispatch(self: *CodeGen, inst: Air.Inst.Index) !void { fn performReloc(self: *CodeGen, reloc: Mir.Inst.Index) void { const next_inst: u32 = @intCast(self.mir_instructions.len); switch (self.mir_instructions.items(.tag)[reloc]) { - .j, .jmp => {}, + .j => {}, .pseudo => switch (self.mir_instructions.items(.ops)[reloc]) { .pseudo_j_z_and_np_inst, .pseudo_j_nz_or_p_inst => {}, else => unreachable, @@ -22149,65 +22194,52 @@ fn airAsm(self: *CodeGen, inst: Air.Inst.Index) !void { prefix = .directive; } - var mnem_size: ?Memory.Size = if (prefix == .directive) - null - else if (std.mem.endsWith(u8, mnem_str, "b")) - .byte - else if (std.mem.endsWith(u8, mnem_str, "w")) - .word - else if (std.mem.endsWith(u8, mnem_str, "l")) - .dword - else if (std.mem.endsWith(u8, mnem_str, "q") and - (std.mem.indexOfScalar(u8, "vp", mnem_str[0]) == null or !std.mem.endsWith(u8, mnem_str, "dq"))) - .qword - else if (std.mem.endsWith(u8, mnem_str, "t")) - .tbyte - else - null; - const mnem_tag = while (true) break std.meta.stringToEnum( + var mnem_size: struct { + used: bool, + size: ?Memory.Size, + fn use(size: *@This()) ?Memory.Size { + size.used = true; + return size.size; + } + } = .{ + .used = false, + .size = if (prefix == .directive) + null + else if (std.mem.endsWith(u8, mnem_str, "b")) + .byte + else if (std.mem.endsWith(u8, mnem_str, "w")) + .word + else if (std.mem.endsWith(u8, mnem_str, "l")) + .dword + else if (std.mem.endsWith(u8, mnem_str, "q") and + (std.mem.indexOfScalar(u8, "vp", mnem_str[0]) == null or !std.mem.endsWith(u8, mnem_str, "dq"))) + .qword + else if (std.mem.endsWith(u8, mnem_str, "t")) + .tbyte + else + null, + }; + var mnem_tag = while (true) break std.meta.stringToEnum( encoder.Instruction.Mnemonic, - mnem_str[0 .. mnem_str.len - @intFromBool(mnem_size != null)], - ) orelse if (mnem_size) |_| { - mnem_size = null; + mnem_str[0 .. mnem_str.len - @intFromBool(mnem_size.size != null)], + ) orelse if (mnem_size.size) |_| { + mnem_size.size = null; continue; } else return self.fail("invalid mnemonic: '{s}'", .{mnem_str}); if (@as(?Memory.Size, switch (mnem_tag) { .clflush => .byte, + .fldcw, .fnstcw, .fstcw, .fnstsw, .fstsw => .word, .fldenv, .fnstenv, .fstenv => .none, + .frstor, .fsave, .fnsave, .fxrstor, .fxrstor64, .fxsave, .fxsave64 => .none, + .invlpg => .none, + .invpcid => .xword, .ldmxcsr, .stmxcsr, .vldmxcsr, .vstmxcsr => .dword, else => null, })) |fixed_mnem_size| { - if (mnem_size) |size| if (size != fixed_mnem_size) + if (mnem_size.size) |size| if (size != fixed_mnem_size) return self.fail("invalid size: '{s}'", .{mnem_str}); - mnem_size = fixed_mnem_size; + mnem_size.size = fixed_mnem_size; } - const mnem_name = @tagName(mnem_tag); - const mnem_fixed_tag: Mir.Inst.FixedTag = if (prefix == .directive) - .{ ._, .pseudo } - else for (std.enums.values(Mir.Inst.Fixes)) |fixes| { - const fixes_name = @tagName(fixes); - const space_i = std.mem.indexOfScalar(u8, fixes_name, ' '); - const fixes_prefix = if (space_i) |i| - std.meta.stringToEnum(encoder.Instruction.Prefix, fixes_name[0..i]).? - else - .none; - if (fixes_prefix != prefix) continue; - const pattern = fixes_name[if (space_i) |i| i + " ".len else 0..]; - const wildcard_i = std.mem.indexOfScalar(u8, pattern, '_').?; - const mnem_prefix = pattern[0..wildcard_i]; - const mnem_suffix = pattern[wildcard_i + "_".len ..]; - if (!std.mem.startsWith(u8, mnem_name, mnem_prefix)) continue; - if (!std.mem.endsWith(u8, mnem_name, mnem_suffix)) continue; - break .{ fixes, std.meta.stringToEnum( - Mir.Inst.Tag, - mnem_name[mnem_prefix.len .. mnem_name.len - mnem_suffix.len], - ) orelse continue }; - } else { - assert(prefix != .none); // no combination of fixes produced a known mnemonic - return self.fail("invalid prefix for mnemonic: '{s} {s}'", .{ - @tagName(prefix), mnem_name, - }); - }; var ops: [4]Operand = @splat(.none); var ops_len: usize = 0; @@ -22236,12 +22268,13 @@ fn airAsm(self: *CodeGen, inst: Air.Inst.Index) !void { op.* = .{ .mem = .{ .base = .{ .reg = reg }, .mod = .{ .rm = .{ - .size = mnem_size orelse return self.fail("unknown size: '{s}'", .{op_str}), + .size = mnem_size.use() orelse + return self.fail("unknown size: '{s}'", .{op_str}), .disp = disp, } }, } }; } else { - if (mnem_size) |size| if (reg.bitSize() != size.bitSize(self.target)) + if (mnem_size.use()) |size| if (reg.bitSize() != size.bitSize(self.target)) return self.fail("invalid register size: '{s}'", .{op_str}); op.* = .{ .reg = reg }; } @@ -22260,14 +22293,17 @@ fn airAsm(self: *CodeGen, inst: Air.Inst.Index) !void { else return self.fail("invalid modifier: '{s}'", .{modifier}), .register => |reg| if (std.mem.eql(u8, modifier, "")) - .{ .reg = reg } + .{ .reg = if (mnem_size.use()) |size| + registerAlias(reg, @intCast(@divExact(size.bitSize(self.target), 8))) + else + reg } else return self.fail("invalid modifier: '{s}'", .{modifier}), .memory => |addr| if (std.mem.eql(u8, modifier, "") or std.mem.eql(u8, modifier, "P")) .{ .mem = .{ .base = .{ .reg = .ds }, .mod = .{ .rm = .{ - .size = mnem_size orelse + .size = mnem_size.use() orelse return self.fail("unknown size: '{s}'", .{op_str}), .disp = @intCast(@as(i64, @bitCast(addr))), } }, @@ -22278,7 +22314,7 @@ fn airAsm(self: *CodeGen, inst: Air.Inst.Index) !void { .{ .mem = .{ .base = .{ .reg = reg_off.reg }, .mod = .{ .rm = .{ - .size = mnem_size orelse + .size = mnem_size.use() orelse return self.fail("unknown size: '{s}'", .{op_str}), .disp = reg_off.off, } }, @@ -22289,7 +22325,7 @@ fn airAsm(self: *CodeGen, inst: Air.Inst.Index) !void { .{ .mem = .{ .base = .{ .frame = frame_addr.index }, .mod = .{ .rm = .{ - .size = mnem_size orelse + .size = mnem_size.use() orelse return self.fail("unknown size: '{s}'", .{op_str}), .disp = frame_addr.off, } }, @@ -22307,21 +22343,12 @@ fn airAsm(self: *CodeGen, inst: Air.Inst.Index) !void { else => return self.fail("invalid constraint: '{s}'", .{op_str}), }; } else if (std.mem.startsWith(u8, op_str, "$")) { - if (std.fmt.parseInt(i32, op_str["$".len..], 0)) |s| { - if (mnem_size) |size| { - const max = @as(u64, std.math.maxInt(u64)) >> @intCast(64 - (size.bitSize(self.target) - 1)); - if ((if (s < 0) ~s else s) > max) - return self.fail("invalid immediate size: '{s}'", .{op_str}); - } - op.* = .{ .imm = .s(s) }; - } else |_| if (std.fmt.parseInt(u64, op_str["$".len..], 0)) |u| { - if (mnem_size) |size| { - const max = @as(u64, std.math.maxInt(u64)) >> @intCast(64 - size.bitSize(self.target)); - if (u > max) - return self.fail("invalid immediate size: '{s}'", .{op_str}); - } - op.* = .{ .imm = .u(u) }; - } else |_| return self.fail("invalid immediate: '{s}'", .{op_str}); + op.* = if (std.fmt.parseInt(u64, op_str["$".len..], 0)) |u| + .{ .imm = .u(u) } + else |_| if (std.fmt.parseInt(i32, op_str["$".len..], 0)) |s| + .{ .imm = .s(s) } + else |_| + return self.fail("invalid immediate: '{s}'", .{op_str}); } else if (std.mem.endsWith(u8, op_str, ")")) { const open = std.mem.indexOfScalar(u8, op_str, '(') orelse return self.fail("invalid operand: '{s}'", .{op_str}); @@ -22348,49 +22375,47 @@ fn airAsm(self: *CodeGen, inst: Air.Inst.Index) !void { else .@"1"; if (sib_it.next()) |_| return self.fail("invalid memory operand: '{s}'", .{op_str}); - op.* = .{ - .mem = .{ - .base = if (base_str.len > 0) - .{ .reg = parseRegName(base_str["%%".len..]) orelse - return self.fail("invalid base register: '{s}'", .{base_str}) } + op.* = if (std.mem.eql(u8, base_str, "%%dx") and index_str.len == 0) .{ .reg = .dx } else .{ .mem = .{ + .base = if (base_str.len > 0) + .{ .reg = parseRegName(base_str["%%".len..]) orelse + return self.fail("invalid base register: '{s}'", .{base_str}) } + else + .none, + .mod = .{ .rm = .{ + .size = mnem_size.use() orelse return self.fail("unknown size: '{s}'", .{op_str}), + .index = if (index_str.len > 0) + parseRegName(index_str["%%".len..]) orelse + return self.fail("invalid index register: '{s}'", .{op_str}) else .none, - .mod = .{ .rm = .{ - .size = mnem_size orelse return self.fail("unknown size: '{s}'", .{op_str}), - .index = if (index_str.len > 0) - parseRegName(index_str["%%".len..]) orelse - return self.fail("invalid index register: '{s}'", .{op_str}) + .scale = scale, + .disp = if (std.mem.startsWith(u8, op_str[0..open], "%[") and + std.mem.endsWith(u8, op_str[0..open], "]")) + disp: { + const colon = std.mem.indexOfScalarPos(u8, op_str[0..open], "%[".len, ':'); + const modifier = if (colon) |colon_pos| + op_str[colon_pos + ":".len .. open - "]".len] else - .none, - .scale = scale, - .disp = if (std.mem.startsWith(u8, op_str[0..open], "%[") and - std.mem.endsWith(u8, op_str[0..open], "]")) - disp: { - const colon = std.mem.indexOfScalarPos(u8, op_str[0..open], "%[".len, ':'); - const modifier = if (colon) |colon_pos| - op_str[colon_pos + ":".len .. open - "]".len] + ""; + break :disp switch (args.items[ + arg_map.get(op_str["%[".len .. colon orelse open - "]".len]) orelse + return self.fail("no matching constraint: '{s}'", .{op_str}) + ]) { + .immediate => |imm| if (std.mem.eql(u8, modifier, "") or + std.mem.eql(u8, modifier, "c")) + std.math.cast(i32, @as(i64, @bitCast(imm))) orelse + return self.fail("invalid displacement: '{s}'", .{op_str}) else - ""; - break :disp switch (args.items[ - arg_map.get(op_str["%[".len .. colon orelse open - "]".len]) orelse - return self.fail("no matching constraint: '{s}'", .{op_str}) - ]) { - .immediate => |imm| if (std.mem.eql(u8, modifier, "") or - std.mem.eql(u8, modifier, "c")) - std.math.cast(i32, @as(i64, @bitCast(imm))) orelse - return self.fail("invalid displacement: '{s}'", .{op_str}) - else - return self.fail("invalid modifier: '{s}'", .{modifier}), - else => return self.fail("invalid constraint: '{s}'", .{op_str}), - }; - } else if (open > 0) - std.fmt.parseInt(i32, op_str[0..open], 0) catch - return self.fail("invalid displacement: '{s}'", .{op_str}) - else - 0, - } }, - }, - }; + return self.fail("invalid modifier: '{s}'", .{modifier}), + else => return self.fail("invalid constraint: '{s}'", .{op_str}), + }; + } else if (open > 0) + std.fmt.parseInt(i32, op_str[0..open], 0) catch + return self.fail("invalid displacement: '{s}'", .{op_str}) + else + 0, + } }, + } }; } else if (Label.isValid(.reference, op_str)) { const anon = std.ascii.isDigit(op_str[0]); const label_gop = try labels.getOrPut(self.gpa, op_str[0..if (anon) 1 else op_str.len]); @@ -22410,6 +22435,51 @@ fn airAsm(self: *CodeGen, inst: Air.Inst.Index) !void { // convert from att syntax to intel syntax std.mem.reverse(Operand, ops[0..ops_len]); + if (!mnem_size.used) if (mnem_size.size) |size| { + comptime var max_mnem_len: usize = 0; + inline for (@typeInfo(encoder.Instruction.Mnemonic).@"enum".fields) |mnem| + max_mnem_len = @max(mnem.name.len, max_mnem_len); + var intel_mnem_buf: [max_mnem_len + 1]u8 = undefined; + const intel_mnem_str = std.fmt.bufPrint(&intel_mnem_buf, "{s}{c}", .{ + @tagName(mnem_tag), + @as(u8, switch (size) { + .byte => 'b', + .word => 'w', + .dword => 'd', + .qword => 'q', + .tbyte => 't', + else => unreachable, + }), + }) catch unreachable; + if (std.meta.stringToEnum(encoder.Instruction.Mnemonic, intel_mnem_str)) |intel_mnem_tag| mnem_tag = intel_mnem_tag; + }; + const mnem_name = @tagName(mnem_tag); + const mnem_fixed_tag: Mir.Inst.FixedTag = if (prefix == .directive) + .{ ._, .pseudo } + else for (std.enums.values(Mir.Inst.Fixes)) |fixes| { + const fixes_name = @tagName(fixes); + const space_i = std.mem.indexOfScalar(u8, fixes_name, ' '); + const fixes_prefix = if (space_i) |i| + std.meta.stringToEnum(encoder.Instruction.Prefix, fixes_name[0..i]).? + else + .none; + if (fixes_prefix != prefix) continue; + const pattern = fixes_name[if (space_i) |i| i + " ".len else 0..]; + const wildcard_i = std.mem.indexOfScalar(u8, pattern, '_').?; + const mnem_prefix = pattern[0..wildcard_i]; + const mnem_suffix = pattern[wildcard_i + "_".len ..]; + if (!std.mem.startsWith(u8, mnem_name, mnem_prefix)) continue; + if (!std.mem.endsWith(u8, mnem_name, mnem_suffix)) continue; + break .{ fixes, std.meta.stringToEnum( + Mir.Inst.Tag, + mnem_name[mnem_prefix.len .. mnem_name.len - mnem_suffix.len], + ) orelse continue }; + } else { + assert(prefix != .none); // no combination of fixes produced a known mnemonic + return self.fail("invalid prefix for mnemonic: '{s} {s}'", .{ + @tagName(prefix), mnem_name, + }); + }; (if (prefix == .directive) switch (mnem_tag) { .@".cfi_def_cfa" => if (ops[0] == .reg and ops[1] == .imm and ops[2] == .none) @@ -22815,7 +22885,7 @@ fn moveStrategy(self: *CodeGen, ty: Type, class: Register.Class, aligned: bool) else => {}, }, }, - .ip => {}, + .ip, .cr, .dr => {}, } return self.fail("TODO moveStrategy for {}", .{ty.fmt(pt)}); } @@ -22900,13 +22970,13 @@ fn genCopy(self: *CodeGen, ty: Type, dst_mcv: MCValue, src_mcv: MCValue, opts: C for (dst_regs, &hazard_regs, 1..) |dst_reg, src_reg, hazard_index| { const dst_id = dst_reg.id(); if (dst_id == src_reg.id()) continue; - var mir_tag: Mir.Inst.Tag = .mov; + var mir_tag: Mir.Inst.FixedTag = .{ ._, .mov }; for (hazard_regs[hazard_index..]) |*hazard_reg| { if (dst_id != hazard_reg.id()) continue; - mir_tag = .xchg; + mir_tag = .{ ._g, .xch }; hazard_reg.* = src_reg; } - try self.asmRegisterRegister(.{ ._, mir_tag }, dst_reg.to64(), src_reg.to64()); + try self.asmRegisterRegister(mir_tag, dst_reg.to64(), src_reg.to64()); } return; }, @@ -23025,7 +23095,7 @@ fn genSetReg( else => unreachable, }, .segment, .x87, .mmx, .sse => try self.genSetReg(dst_reg, ty, try self.genTypedValue(try pt.undefValue(ty)), opts), - .ip => unreachable, + .ip, .cr, .dr => unreachable, }, .eflags => |cc| try self.asmSetccRegister(cc, dst_reg.to8()), .immediate => |imm| { @@ -23063,7 +23133,7 @@ fn genSetReg( registerAlias(dst_reg, abi_size), src_reg, ), - .x87, .mmx, .ip => unreachable, + .x87, .mmx, .ip, .cr, .dr => unreachable, .sse => if (self.hasFeature(.sse2)) try self.asmRegisterRegister( switch (abi_size) { 1...4 => if (self.hasFeature(.avx)) .{ .v_d, .mov } else .{ ._d, .mov }, @@ -23092,7 +23162,7 @@ fn genSetReg( dst_reg, switch (src_reg.class()) { .general_purpose, .segment => registerAlias(src_reg, abi_size), - .x87, .mmx, .ip => unreachable, + .x87, .mmx, .ip, .cr, .dr => unreachable, .sse => try self.copyToTmpRegister(ty, src_mcv), }, ), @@ -23107,7 +23177,7 @@ fn genSetReg( }, else => unreachable, }, - .mmx, .sse, .ip => unreachable, + .mmx, .sse, .ip, .cr, .dr => unreachable, }, .mmx => unreachable, .sse => switch (src_reg.class()) { @@ -23126,7 +23196,7 @@ fn genSetReg( .{ .register = try self.copyToTmpRegister(ty, src_mcv) }, opts, ), - .x87, .mmx, .ip => unreachable, + .x87, .mmx, .ip, .cr, .dr => unreachable, .sse => try self.asmRegisterRegister( @as(?Mir.Inst.FixedTag, switch (ty.scalarType(zcu).zigTypeTag(zcu)) { else => switch (abi_size) { @@ -23153,7 +23223,7 @@ fn genSetReg( registerAlias(src_reg, abi_size), ), }, - .ip => unreachable, + .ip, .cr, .dr => unreachable, }, inline .register_pair, .register_triple, @@ -23294,7 +23364,7 @@ fn genSetReg( }); return; }, - .segment, .mmx, .ip => unreachable, + .segment, .mmx, .ip, .cr, .dr => unreachable, .x87, .sse => {}, }, .load_direct => |sym_index| switch (dst_reg.class()) { @@ -23309,7 +23379,7 @@ fn genSetReg( }); return; }, - .segment, .mmx, .ip => unreachable, + .segment, .mmx, .ip, .cr, .dr => unreachable, .x87, .sse => {}, }, .load_got, .load_tlv => {}, @@ -23456,7 +23526,7 @@ fn genSetMem( }; const src_alias = registerAlias(src_reg, abi_size); const src_size: u32 = @intCast(switch (src_alias.class()) { - .general_purpose, .segment, .x87, .ip => @divExact(src_alias.bitSize(), 8), + .general_purpose, .segment, .x87, .ip, .cr, .dr => @divExact(src_alias.bitSize(), 8), .mmx, .sse => abi_size, }); const src_align: InternPool.Alignment = .fromNonzeroByteUnits( @@ -24240,18 +24310,18 @@ fn atomicOp( }; switch (strat) { .lock => { - const tag: Mir.Inst.Tag = if (rmw_op) |op| switch (op) { - .Xchg => if (unused) .mov else .xchg, - .Add => if (unused) .add else .xadd, - .Sub => if (unused) .sub else .xadd, - .And => .@"and", - .Or => .@"or", - .Xor => .xor, + const mir_tag: Mir.Inst.FixedTag = if (rmw_op) |op| switch (op) { + .Xchg => if (unused) .{ ._, .mov } else .{ ._g, .xch }, + .Add => .{ .@"lock _", if (unused) .add else .xadd }, + .Sub => .{ .@"lock _", if (unused) .sub else .xadd }, + .And => .{ .@"lock _", .@"and" }, + .Or => .{ .@"lock _", .@"or" }, + .Xor => .{ .@"lock _", .xor }, else => unreachable, } else switch (order) { - .unordered, .monotonic, .release, .acq_rel => .mov, + .unordered, .monotonic, .release, .acq_rel => .{ ._, .mov }, .acquire => unreachable, - .seq_cst => .xchg, + .seq_cst => .{ ._g, .xch }, }; const dst_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp); @@ -24260,18 +24330,10 @@ fn atomicOp( defer self.register_manager.unlockReg(dst_lock); try self.genSetReg(dst_reg, val_ty, val_mcv, .{}); - if (rmw_op == std.builtin.AtomicRmwOp.Sub and tag == .xadd) { + if (rmw_op == std.builtin.AtomicRmwOp.Sub and mir_tag[1] == .xadd) { try self.genUnOpMir(.{ ._, .neg }, val_ty, dst_mcv); } - try self.asmMemoryRegister( - switch (tag) { - .mov, .xchg => .{ ._, tag }, - .xadd, .add, .sub, .@"and", .@"or", .xor => .{ .@"lock _", tag }, - else => unreachable, - }, - ptr_mem, - registerAlias(dst_reg, val_abi_size), - ); + try self.asmMemoryRegister(mir_tag, ptr_mem, registerAlias(dst_reg, val_abi_size)); return if (unused) .unreach else dst_mcv; }, @@ -27599,7 +27661,7 @@ fn resolveCallingConventionValues( break :return_value .init(.{ .register = registerAlias(ret_gpr[0], ret_size) }) else if (ret_gpr.len >= 2 and ret_ty.isSliceAtRuntime(zcu)) break :return_value .init(.{ .register_pair = ret_gpr[0..2].* }), - .segment, .mmx, .ip => unreachable, + .segment, .mmx, .ip, .cr, .dr => unreachable, .x87 => break :return_value .init(.{ .register = .st0 }), .sse => if (ret_size <= self.vectorSize(.float)) break :return_value .init(.{ .register = registerAlias(abi.getCAbiSseReturnRegs(cc)[0], @max(ret_size, 16)), @@ -27634,7 +27696,7 @@ fn resolveCallingConventionValues( param_gpr = param_gpr[2..]; continue; }, - .segment, .mmx, .ip => unreachable, + .segment, .mmx, .ip, .cr, .dr => unreachable, .x87 => if (param_x87.len >= 1) { arg.* = .{ .register = param_x87[0] }; param_x87 = param_x87[1..]; @@ -27686,9 +27748,9 @@ fn failMsg(self: *CodeGen, msg: *Zcu.ErrorMsg) error{ OutOfMemory, CodegenFail } } fn parseRegName(name: []const u8) ?Register { - if (@hasDecl(Register, "parseRegName")) { - return Register.parseRegName(name); - } + if (std.mem.startsWith(u8, name, "db")) return @enumFromInt( + @intFromEnum(Register.dr0) + (std.fmt.parseInt(u4, name["db".len..], 0) catch return null), + ); return std.meta.stringToEnum(Register, name); } @@ -27733,6 +27795,14 @@ fn registerAlias(reg: Register, size_bytes: u32) Register { .rip else unreachable, + .cr => if (size_bytes <= 8) + reg + else + unreachable, + .dr => if (size_bytes <= 8) + reg + else + unreachable, }; } diff --git a/src/arch/x86_64/Disassembler.zig b/src/arch/x86_64/Disassembler.zig index e781a6cdc8..545f6c0e96 100644 --- a/src/arch/x86_64/Disassembler.zig +++ b/src/arch/x86_64/Disassembler.zig @@ -80,6 +80,21 @@ pub fn next(dis: *Disassembler) Error!?Instruction { .op2 = .{ .imm = imm }, }); }, + .ii => { + const imm1 = try dis.parseImm(enc.data.ops[0]); + const imm2 = try dis.parseImm(enc.data.ops[1]); + return inst(enc, .{ + .op1 = .{ .imm = imm1 }, + .op2 = .{ .imm = imm2 }, + }); + }, + .ia => { + const imm = try dis.parseImm(enc.data.ops[0]); + return inst(enc, .{ + .op1 = .{ .imm = imm }, + .op2 = .{ .reg = .eax }, + }); + }, .m, .mi, .m1, .mc => { const modrm = try dis.parseModRmByte(); const act_enc = Encoding.findByOpcode(enc.opcode(), .{ @@ -241,7 +256,7 @@ pub fn next(dis: *Disassembler) Error!?Instruction { .op3 = op3, }); }, - .rm0, .vmi, .rvm, .rvmr, .rvmi, .mvr, .rmv => unreachable, // TODO + .rm0, .vm, .vmi, .rvm, .rvmr, .rvmi, .mvr, .rmv => unreachable, // TODO } } diff --git a/src/arch/x86_64/Encoding.zig b/src/arch/x86_64/Encoding.zig index 9be0f36eda..304bef6359 100644 --- a/src/arch/x86_64/Encoding.zig +++ b/src/arch/x86_64/Encoding.zig @@ -60,6 +60,32 @@ pub fn findByMnemonic( next: for (mnemonic_to_encodings_map[@intFromEnum(mnemonic)]) |data| { if (!switch (data.feature) { .none => true, + .@"32bit" => switch (target.cpu.arch) { + else => unreachable, + .x86 => true, + .x86_64 => false, + }, + .@"64bit" => switch (target.cpu.arch) { + else => unreachable, + .x86 => false, + .x86_64 => true, + }, + inline .@"invpcid 32bit", .@"rdpid 32bit" => |tag| switch (target.cpu.arch) { + else => unreachable, + .x86 => std.Target.x86.featureSetHas( + target.cpu.features, + @field(std.Target.x86.Feature, @tagName(tag)[0 .. @tagName(tag).len - " 32bit".len]), + ), + .x86_64 => false, + }, + inline .@"invpcid 64bit", .@"rdpid 64bit" => |tag| switch (target.cpu.arch) { + else => unreachable, + .x86 => false, + .x86_64 => std.Target.x86.featureSetHas( + target.cpu.features, + @field(std.Target.x86.Feature, @tagName(tag)[0 .. @tagName(tag).len - " 64bit".len]), + ), + }, inline else => |tag| has_features: { comptime var feature_it = std.mem.splitScalar(u8, @tagName(tag), ' '); comptime var features: []const std.Target.x86.Feature = &.{}; @@ -126,7 +152,7 @@ pub fn mandatoryPrefix(encoding: *const Encoding) ?u8 { pub fn modRmExt(encoding: Encoding) u3 { return switch (encoding.data.op_en) { - .m, .mi, .m1, .mc, .vmi => encoding.data.modrm_ext, + .ia, .m, .mi, .m1, .mc, .vm, .vmi => encoding.data.modrm_ext, else => unreachable, }; } @@ -176,7 +202,7 @@ pub fn format( for (opc) |byte| try writer.print("{x:0>2} ", .{byte}); switch (encoding.data.op_en) { - .z, .fd, .td, .i, .zi, .d => {}, + .z, .fd, .td, .i, .zi, .ii, .d => {}, .o, .zo, .oz, .oi => { const op = switch (encoding.data.op_en) { .o, .oz, .oi => encoding.data.ops[0], @@ -192,17 +218,24 @@ pub fn format( }; try writer.print("+{s} ", .{tag}); }, - .m, .mi, .m1, .mc, .vmi => try writer.print("/{d} ", .{encoding.modRmExt()}), + .ia, .m, .mi, .m1, .mc, .vm, .vmi => try writer.print("/{d} ", .{encoding.modRmExt()}), .mr, .rm, .rmi, .mri, .mrc, .rm0, .rvm, .rvmr, .rvmi, .mvr, .rmv => try writer.writeAll("/r "), } switch (encoding.data.op_en) { - .i, .d, .zi, .oi, .mi, .rmi, .mri, .vmi, .rvmi => { - const op = switch (encoding.data.op_en) { - .i, .d => encoding.data.ops[0], - .zi, .oi, .mi => encoding.data.ops[1], - .rmi, .mri, .vmi => encoding.data.ops[2], - .rvmi => encoding.data.ops[3], + .i, .d, .zi, .ii, .ia, .oi, .mi, .rmi, .mri, .vmi, .rvmi => for (0..2) |i| { + const op = switch (i) { + 0 => switch (encoding.data.op_en) { + .i, .ii, .ia, .d => encoding.data.ops[0], + .zi, .oi, .mi => encoding.data.ops[1], + .rmi, .mri, .vmi => encoding.data.ops[2], + .rvmi => encoding.data.ops[3], + else => unreachable, + }, + 1 => switch (encoding.data.op_en) { + .ii => encoding.data.ops[1], + else => break, + }, else => unreachable, }; const tag = switch (op) { @@ -218,13 +251,13 @@ pub fn format( try writer.print("{s} ", .{tag}); }, .rvmr => try writer.writeAll("/is4 "), - .z, .fd, .td, .o, .zo, .oz, .m, .m1, .mc, .mr, .rm, .mrc, .rm0, .rvm, .mvr, .rmv => {}, + .z, .fd, .td, .o, .zo, .oz, .m, .m1, .mc, .mr, .rm, .mrc, .rm0, .vm, .rvm, .mvr, .rmv => {}, } try writer.print("{s} ", .{@tagName(encoding.mnemonic)}); for (encoding.data.ops) |op| switch (op) { - .none, .o16, .o32, .o64 => break, + .none => break, else => try writer.print("{s} ", .{@tagName(op)}), }; @@ -253,48 +286,67 @@ pub const Mnemonic = enum { @".cfi_escape", // zig fmt: off // General-purpose - adc, add, @"and", - bsf, bsr, bswap, bt, btc, btr, bts, + aaa, aad, aam, aas, adc, add, @"and", arpl, + bound, bsf, bsr, bswap, bt, btc, btr, bts, call, cbw, cdq, cdqe, - clac, clc, cld, clflush, cli, clts, clui, + clac, clc, cld, cldemote, clflush, clflushopt, cli, clts, clui, clrssbsy, clwb, cmc, cmova, cmovae, cmovb, cmovbe, cmovc, cmove, cmovg, cmovge, cmovl, cmovle, cmovna, cmovnae, cmovnb, cmovnbe, cmovnc, cmovne, cmovng, cmovnge, cmovnl, cmovnle, cmovno, cmovnp, cmovns, cmovnz, cmovo, cmovp, cmovpe, cmovpo, cmovs, cmovz, - cmp, - cmps, cmpsb, cmpsd, cmpsq, cmpsw, - cmpxchg, cmpxchg8b, cmpxchg16b, + cmp, cmps, cmpsb, cmpsd, cmpsq, cmpsw, cmpxchg, cmpxchg8b, cmpxchg16b, cpuid, cqo, cwd, cwde, - dec, div, idiv, imul, inc, int3, - ja, jae, jb, jbe, jc, jrcxz, je, jg, jge, jl, jle, jna, jnae, jnb, jnbe, - jnc, jne, jng, jnge, jnl, jnle, jno, jnp, jns, jnz, jo, jp, jpe, jpo, js, jz, - jmp, - lea, lfence, + daa, das, dec, div, + endbr32, endbr64, enqcmd, enqcmds, enter, + hlt, hreset, + idiv, imul, in, inc, incsspd, incsspq, ins, insb, insd, insw, + int, int1, int3, into, invd, invlpg, invpcid, iret, iretd, iretq, iretw, + ja, jae, jb, jbe, jc, jcxz, je, jecxz, jg, jge, jl, jle, jmp, jna, jnae, jnb, jnbe, + jnc, jne, jng, jnge, jnl, jnle, jno, jnp, jns, jnz, jo, jp, jpe, jpo, jrcxz, js, jz, + lahf, lar, lea, leave, lfence, lgdt, lidt, lldt, lmsw, loop, loope, loopne, lods, lodsb, lodsd, lodsq, lodsw, - lzcnt, + lsl, ltr, lzcnt, mfence, mov, movbe, movs, movsb, movsd, movsq, movsw, movsx, movsxd, movzx, mul, neg, nop, not, - @"or", - pause, pop, popcnt, popfq, push, pushfq, - rcl, rcr, ret, rol, ror, rorx, - sal, sar, sarx, sbb, + @"or", out, outs, outsb, outsd, outsw, + pause, pop, popcnt, popf, popfd, popfq, push, pushfq, + rcl, rcr, + rdfsbase, rdgsbase, rdmsr, rdpid, rdpkru, rdpmc, rdrand, rdseed, rdssd, rdssq, rdtsc, rdtscp, + ret, rol, ror, rorx, rsm, + sahf, sal, sar, sarx, sbb, scas, scasb, scasd, scasq, scasw, + senduipi, serialize, shl, shld, shlx, shr, shrd, shrx, - stac, stc, std, sti, stui, - sub, syscall, + stac, stc, std, sti, str, stui, + sub, swapgs, syscall, sysenter, sysexit, sysret, seta, setae, setb, setbe, setc, sete, setg, setge, setl, setle, setna, setnae, setnb, setnbe, setnc, setne, setng, setnge, setnl, setnle, setno, setnp, setns, setnz, seto, setp, setpe, setpo, sets, setz, - sfence, + sfence, sidt, sldt, smsw, stos, stosb, stosd, stosq, stosw, - @"test", tzcnt, - ud2, - xadd, xchg, xgetbv, xor, + @"test", testui, tpause, + ud0, ud1, ud2, uiret, umonitor, umwait, + verr, verw, wrfsbase, wrgsbase, wrmsr, wrpkru, wrssd, wrssq, wrussd, wrussq, + xadd, xchg, xgetbv, xlat, xlatb, xor, // X87 - fabs, fchs, ffree, fisttp, fld, fldenv, fnstenv, fst, fstenv, fstp, + f2xm1, fabs, fadd, faddp, fbld, fbstp, fchs, fclex, + fcmovb, fcmovbe, fcmove, fcmovnb, fcmovnbe, fcmovne, fcmovnu, fcmovu, + fcom, fcomi, fcomip, fcomp, fcompp, fcos, + fdecstp, fdiv, fdivp, fdivr, fdivrp, ffree, + fiadd, ficom, ficomp, fidiv, fidivr, fild, fimul, fincstp, finit, + fist, fistp, fisttp, fisub, fisubr, + fld, fld1, fldcw, fldenv, fldl2e, fldl2t, fldlg2, fldln2, fldpi, fldz, + fmul, fmulp, + fnclex, fninit, fnop, fnsave, fnstcw, fnstenv, fnstsw, + fpatan, fprem, fprem1, fptan, frndint, frstor, + fsave, fscale, fsin, fsincos, fsqrt, + fst, fstcw, fstenv, fstp, fstsw, + fsub, fsubp, fsubr, fsubrp, + ftst, fucom, fucomi, fucomip, fucomp, fucompp, + fwait, fxam, fxch, fxtract, fyl2x, fyl2xp1, wait, // MMX - movd, movq, + emms, movd, movq, packssdw, packsswb, packuswb, paddb, paddd, paddq, paddsb, paddsw, paddusb, paddusw, paddw, pand, pandn, por, pxor, @@ -312,6 +364,7 @@ pub const Mnemonic = enum { cmpps, cmpss, cvtpi2ps, cvtps2pi, cvtsi2ss, cvtss2si, cvttps2pi, cvttss2si, divps, divss, + fxrstor, fxrstor64, fxsave, fxsave64, ldmxcsr, maxps, maxss, minps, minss, @@ -333,10 +386,12 @@ pub const Mnemonic = enum { andpd, andnpd, cmppd, //cmpsd, + comisd, comiss, cvtdq2pd, cvtdq2ps, cvtpd2dq, cvtpd2pi, cvtpd2ps, cvtpi2pd, cvtps2dq, cvtps2pd, cvtsd2si, cvtsd2ss, cvtsi2sd, cvtss2sd, cvttpd2dq, cvttpd2pi, cvttps2dq, cvttsd2si, divpd, divsd, + gf2p8affineinvqb, gf2p8affineqb, gf2p8mulb, maxpd, maxsd, minpd, minsd, movapd, @@ -357,11 +412,12 @@ pub const Mnemonic = enum { ucomisd, xorpd, // SSE3 - movddup, movshdup, movsldup, + addsubpd, addsubps, haddpd, haddps, lddqu, movddup, movshdup, movsldup, // SSSE3 pabsb, pabsd, pabsw, palignr, pshufb, // SSE4.1 blendpd, blendps, blendvpd, blendvps, + dppd, dpps, extractps, insertps, packusdw, @@ -376,28 +432,32 @@ pub const Mnemonic = enum { ptest, roundpd, roundps, roundsd, roundss, // SSE4.2 - pcmpgtq, + crc32, pcmpgtq, // PCLMUL pclmulqdq, // AES aesdec, aesdeclast, aesenc, aesenclast, aesimc, aeskeygenassist, // SHA - sha256msg1, sha256msg2, sha256rnds2, + sha1rnds4, sha1nexte, sha1msg1, sha1msg2, sha256msg1, sha256msg2, sha256rnds2, // AVX - vaddpd, vaddps, vaddsd, vaddss, + andn, bextr, blsi, blsmsk, blsr, bzhi, tzcnt, + vaddpd, vaddps, vaddsd, vaddss, vaddsubpd, vaddsubps, vaesdec, vaesdeclast, vaesenc, vaesenclast, vaesimc, vaeskeygenassist, vandnpd, vandnps, vandpd, vandps, vblendpd, vblendps, vblendvpd, vblendvps, vbroadcastf128, vbroadcastsd, vbroadcastss, - vcmppd, vcmpps, vcmpsd, vcmpss, + vcmppd, vcmpps, vcmpsd, vcmpss, vcomisd, vcomiss, vcvtdq2pd, vcvtdq2ps, vcvtpd2dq, vcvtpd2ps, vcvtps2dq, vcvtps2pd, vcvtsd2si, vcvtsd2ss, vcvtsi2sd, vcvtsi2ss, vcvtss2sd, vcvtss2si, vcvttpd2dq, vcvttps2dq, vcvttsd2si, vcvttss2si, vdivpd, vdivps, vdivsd, vdivss, + vdppd, vdpps, vextractf128, vextractps, + vgf2p8affineinvqb, vgf2p8affineqb, vgf2p8mulb, + vhaddpd, vhaddps, vinsertf128, vinsertps, - vldmxcsr, + vlddqu, vldmxcsr, vmaxpd, vmaxps, vmaxsd, vmaxss, vminpd, vminps, vminsd, vminss, vmovapd, vmovaps, @@ -455,6 +515,12 @@ pub const Mnemonic = enum { // AVX2 vbroadcasti128, vpbroadcastb, vpbroadcastd, vpbroadcastq, vpbroadcastw, vextracti128, vinserti128, vpblendd, + // ADX + adcx, adox, + // AESKLE + aesdec128kl, aesdec256kl, aesenc128kl, aesenc256kl, encodekey128, encodekey256, loadiwkey, + // AESKLEWIDE_KL + aesdecwide128kl, aesdecwide256kl, aesencwide128kl, aesencwide256kl, // zig fmt: on }; @@ -462,24 +528,23 @@ pub const OpEn = enum { // zig fmt: off z, o, zo, oz, oi, - i, zi, + i, zi, ii, ia, d, m, fd, td, m1, mc, mi, mr, rm, rmi, mri, mrc, - rm0, vmi, rvm, rvmr, rvmi, mvr, rmv, + rm0, vm, vmi, rvm, rvmr, rvmi, mvr, rmv, // zig fmt: on }; pub const Op = enum { // zig fmt: off none, - o16, o32, o64, unity, imm8, imm16, imm32, imm64, imm8s, imm16s, imm32s, al, ax, eax, rax, - cl, + cl, dx, rip, eip, ip, r8, r16, r32, r64, rm8, rm16, rm32, rm64, @@ -489,9 +554,10 @@ pub const Op = enum { m, moffs, sreg, - st, mm, mm_m64, + st0, st, mm, mm_m64, xmm0, xmm, xmm_m8, xmm_m16, xmm_m32, xmm_m64, xmm_m128, ymm, ymm_m256, + cr, dr, // zig fmt: on pub fn fromOperand(operand: Instruction.Operand, target: *const std.Target) Op { @@ -499,32 +565,34 @@ pub const Op = enum { .none => .none, .reg => |reg| switch (reg.class()) { - .general_purpose => if (reg.to64() == .rax) - switch (reg) { - .al => .al, - .ax => .ax, - .eax => .eax, - .rax => .rax, + .general_purpose => switch (reg) { + .al => .al, + .ax => .ax, + .eax => .eax, + .rax => .rax, + .cl => .cl, + .dx => .dx, + else => switch (reg.bitSize()) { + 8 => .r8, + 16 => .r16, + 32 => .r32, + 64 => .r64, else => unreachable, - } - else if (reg == .cl) - .cl - else switch (reg.bitSize()) { - 8 => .r8, - 16 => .r16, - 32 => .r32, - 64 => .r64, - else => unreachable, + }, }, .segment => .sreg, - .x87 => .st, + .x87 => switch (reg) { + .st0 => .st0, + else => .st, + }, .mmx => .mm, - .sse => if (reg == .xmm0) - .xmm0 - else switch (reg.bitSize()) { - 128 => .xmm, - 256 => .ymm, - else => unreachable, + .sse => switch (reg) { + .xmm0 => .xmm0, + else => switch (reg.bitSize()) { + 128 => .xmm, + 256 => .ymm, + else => unreachable, + }, }, .ip => switch (reg) { .rip => .rip, @@ -532,6 +600,8 @@ pub const Op = enum { .ip => .ip, else => unreachable, }, + .cr => .cr, + .dr => .dr, }, .mem => |mem| switch (mem) { @@ -588,24 +658,27 @@ pub const Op = enum { .eax => .eax, .rax => .rax, .cl => .cl, + .dx => .dx, .rip => .rip, .eip => .eip, .ip => .ip, + .st0 => .st0, .xmm0 => .xmm0, }; } pub fn immBitSize(op: Op) u64 { return switch (op) { - .none, .o16, .o32, .o64, .moffs, .m, .sreg => unreachable, - .al, .cl, .rip, .eip, .ip, .r8, .rm8, .r32_m8 => unreachable, + .none, .moffs, .m, .sreg => unreachable, + .al, .cl, .dx, .rip, .eip, .ip, .r8, .rm8, .r32_m8 => unreachable, .ax, .r16, .rm16 => unreachable, .eax, .r32, .rm32, .r32_m16 => unreachable, .rax, .r64, .rm64, .r64_m16 => unreachable, - .st, .mm, .mm_m64 => unreachable, + .st0, .st, .mm, .mm_m64 => unreachable, .xmm0, .xmm, .xmm_m8, .xmm_m16, .xmm_m32, .xmm_m64, .xmm_m128 => unreachable, .ymm, .ymm_m256 => unreachable, .m8, .m16, .m32, .m64, .m80, .m128, .m256 => unreachable, + .cr, .dr => unreachable, .unity => 1, .imm8, .imm8s, .rel8 => 8, .imm16, .imm16s, .rel16 => 16, @@ -616,15 +689,15 @@ pub const Op = enum { pub fn regBitSize(op: Op) u64 { return switch (op) { - .none, .o16, .o32, .o64, .moffs, .m, .sreg => unreachable, + .none, .moffs, .m, .sreg => unreachable, .unity, .imm8, .imm8s, .imm16, .imm16s, .imm32, .imm32s, .imm64 => unreachable, .rel8, .rel16, .rel32 => unreachable, .m8, .m16, .m32, .m64, .m80, .m128, .m256 => unreachable, .al, .cl, .r8, .rm8 => 8, - .ax, .ip, .r16, .rm16 => 16, + .ax, .dx, .ip, .r16, .rm16 => 16, .eax, .eip, .r32, .rm32, .r32_m8, .r32_m16 => 32, - .rax, .rip, .r64, .rm64, .r64_m16, .mm, .mm_m64 => 64, - .st => 80, + .rax, .rip, .r64, .rm64, .r64_m16, .mm, .mm_m64, .cr, .dr => 64, + .st0, .st => 80, .xmm0, .xmm, .xmm_m8, .xmm_m16, .xmm_m32, .xmm_m64, .xmm_m128 => 128, .ymm, .ymm_m256 => 256, }; @@ -632,11 +705,12 @@ pub const Op = enum { pub fn memBitSize(op: Op) u64 { return switch (op) { - .none, .o16, .o32, .o64, .moffs, .m, .sreg => unreachable, + .none, .moffs, .m, .sreg => unreachable, .unity, .imm8, .imm8s, .imm16, .imm16s, .imm32, .imm32s, .imm64 => unreachable, .rel8, .rel16, .rel32 => unreachable, - .al, .cl, .r8, .ax, .ip, .r16, .eax, .eip, .r32, .rax, .rip, .r64 => unreachable, - .st, .mm, .xmm0, .xmm, .ymm => unreachable, + .al, .cl, .r8, .ax, .dx, .ip, .r16, .eax, .eip, .r32, .rax, .rip, .r64 => unreachable, + .st0, .st, .mm, .xmm0, .xmm, .ymm => unreachable, + .cr, .dr => unreachable, .m8, .rm8, .r32_m8, .xmm_m8 => 8, .m16, .rm16, .r32_m16, .r64_m16, .xmm_m16 => 16, .m32, .rm32, .xmm_m32 => 32, @@ -664,14 +738,15 @@ pub const Op = enum { // zig fmt: off return switch (op) { .al, .ax, .eax, .rax, - .cl, + .cl, .dx, .ip, .eip, .rip, .r8, .r16, .r32, .r64, .rm8, .rm16, .rm32, .rm64, .r32_m8, .r32_m16, .r64_m16, - .st, .mm, .mm_m64, + .st0, .st, .mm, .mm_m64, .xmm0, .xmm, .xmm_m8, .xmm_m16, .xmm_m32, .xmm_m64, .xmm_m128, .ymm, .ymm_m256, + .cr, .dr, => true, else => false, }; @@ -717,33 +792,34 @@ pub const Op = enum { pub fn class(op: Op) bits.Register.Class { return switch (op) { else => unreachable, - .al, .ax, .eax, .rax, .cl => .general_purpose, + .al, .ax, .eax, .rax, .cl, .dx => .general_purpose, .r8, .r16, .r32, .r64 => .general_purpose, .rm8, .rm16, .rm32, .rm64 => .general_purpose, .r32_m8, .r32_m16, .r64_m16 => .general_purpose, .sreg => .segment, - .st => .x87, + .st0, .st => .x87, .mm, .mm_m64 => .mmx, .xmm0, .xmm, .xmm_m8, .xmm_m16, .xmm_m32, .xmm_m64, .xmm_m128 => .sse, .ymm, .ymm_m256 => .sse, .rip, .eip, .ip => .ip, + .cr => .cr, + .dr => .dr, }; } /// Given an operand `op` checks if `target` is a subset for the purposes of the encoding. pub fn isSubset(op: Op, target: Op) bool { switch (op) { - .o16, .o32, .o64 => unreachable, .moffs, .sreg => return op == target, .none => switch (target) { - .o16, .o32, .o64, .none => return true, + .none => return true, else => return false, }, else => { if (op.isRegister() and target.isRegister()) { - return switch (target) { - .cl, .al, .ax, .eax, .rax, .xmm0 => op == target, - else => op.class() == target.class() and op.regBitSize() == target.regBitSize(), + return switch (target.toReg()) { + .none => op.class() == target.class() and op.regBitSize() == target.regBitSize(), + else => op == target, }; } if (op.isMemory() and target.isMemory()) { @@ -779,6 +855,7 @@ pub const Mode = enum { none, short, long, rex, rex_short, + wait, vex_128_w0, vex_128_w1, vex_128_wig, vex_256_w0, vex_256_w1, vex_256_wig, vex_lig_w0, vex_lig_w1, vex_lig_wig, @@ -841,20 +918,46 @@ pub const Mode = enum { pub const Feature = enum { none, + @"32bit", + @"64bit", + adx, aes, @"aes avx", avx, avx2, bmi, bmi2, + cldemote, + clflushopt, + clwb, cmov, + @"cmov x87", + crc32, + enqcmd, f16c, fma, + fsgsbase, + fxsr, + gfni, + @"gfni avx", + hreset, + @"invpcid 32bit", + @"invpcid 64bit", + kl, lzcnt, + mmx, movbe, pclmul, @"pclmul avx", + pku, popcnt, + rdrnd, + rdseed, + @"rdpid 32bit", + @"rdpid 64bit", + sahf, + serialize, + shstk, smap, sse, sse2, @@ -866,6 +969,8 @@ pub const Feature = enum { uintr, vaes, vpclmulqdq, + waitpkg, + widekl, x87, }; @@ -886,7 +991,7 @@ fn estimateInstructionLength(prefix: Prefix, encoding: Encoding, ops: []const Op } const mnemonic_to_encodings_map = init: { - @setEvalBranchQuota(5_000); + @setEvalBranchQuota(5_600); const mnemonic_count = @typeInfo(Mnemonic).@"enum".fields.len; var mnemonic_map: [mnemonic_count][]Data = @splat(&.{}); const encodings = @import("encodings.zig"); diff --git a/src/arch/x86_64/Lower.zig b/src/arch/x86_64/Lower.zig index 55582100ea..15b83ccf23 100644 --- a/src/arch/x86_64/Lower.zig +++ b/src/arch/x86_64/Lower.zig @@ -359,6 +359,8 @@ pub fn imm(lower: *const Lower, ops: Mir.Inst.Ops, i: u32) Immediate { .pseudo_dbg_local_ai_s, => .s(@bitCast(i)), + .ii, + .ir, .rrri, .rri_u, .ri_u, @@ -548,17 +550,19 @@ fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand) } fn generic(lower: *Lower, inst: Mir.Inst) Error!void { + @setEvalBranchQuota(2_400); const fixes = switch (inst.ops) { .none => inst.data.none.fixes, .inst => inst.data.inst.fixes, .i_s, .i_u => inst.data.i.fixes, + .ii => inst.data.ii.fixes, .r => inst.data.r.fixes, .rr => inst.data.rr.fixes, .rrr => inst.data.rrr.fixes, .rrrr => inst.data.rrrr.fixes, .rrri => inst.data.rrri.fixes, .rri_s, .rri_u => inst.data.rri.fixes, - .ri_s, .ri_u, .ri_64 => inst.data.ri.fixes, + .ri_s, .ri_u, .ri_64, .ir => inst.data.ri.fixes, .rm, .rmi_s, .mr => inst.data.rx.fixes, .mrr, .rrm, .rmr => inst.data.rrx.fixes, .rmi, .mri => inst.data.rix.fixes, @@ -575,8 +579,6 @@ fn generic(lower: *Lower, inst: Mir.Inst) Error!void { else .none, }, mnemonic: { - @setEvalBranchQuota(2_000); - comptime var max_len = 0; inline for (@typeInfo(Mnemonic).@"enum".fields) |field| max_len = @max(field.name.len, max_len); var buf: [max_len]u8 = undefined; @@ -598,6 +600,14 @@ fn generic(lower: *Lower, inst: Mir.Inst) Error!void { .i_s, .i_u => &.{ .{ .imm = lower.imm(inst.ops, inst.data.i.i) }, }, + .ii => &.{ + .{ .imm = lower.imm(inst.ops, inst.data.ii.i1) }, + .{ .imm = lower.imm(inst.ops, inst.data.ii.i2) }, + }, + .ir => &.{ + .{ .imm = lower.imm(inst.ops, inst.data.ri.i) }, + .{ .reg = inst.data.ri.r1 }, + }, .r => &.{ .{ .reg = inst.data.r.r1 }, }, diff --git a/src/arch/x86_64/Mir.zig b/src/arch/x86_64/Mir.zig index ca9859bc37..2a72be53c9 100644 --- a/src/arch/x86_64/Mir.zig +++ b/src/arch/x86_64/Mir.zig @@ -23,8 +23,82 @@ pub const Inst = struct { /// ___ @"_", + /// ___ 0 + _0, + /// ___ 1 + _1, + /// ___ 2 + _2, + /// ___ 3 + _3, + /// ___ 4 + _4, + + /// System Call ___ + sys_, + + /// ___ crement Shadow Stack Pointer Doubleword + _csspd, + /// ___ crement Shadow Stack Pointer Quadword + _csspq, + /// ___ FS Segment Base + _fsbase, + /// ___ GS Segment Base + _gsbase, + /// ___ Model Specific Register + _msr, + /// ___ MXCSR + _mxcsr, + /// ___ Processor ID + _pid, + /// ___ Protection Key Rights For User Pages + _pkru, + /// ___ Performance-Monitoring Counters + _pmc, + /// ___ Rondam Number + _rand, + /// ___ Rondam Seed + _seed, + /// ___ Shadow Stack Pointer Doubleword + _sspd, + /// ___ Shadow Stack Pointer Quadword + _sspq, + /// ___ Time-Stamp Counter + _tsc, + /// ___ Time-Stamp Counter And Processor ID + _tscp, + /// VEX-Encoded ___ MXCSR + v_mxcsr, + + /// Interrupt ___ /// Integer ___ i_, + /// Interrupt ___ Word + i_w, + /// Interrupt ___ Doubleword + i_d, + /// Interrupt ___ Quadword + i_q, + /// User-Interrupt ___ + ui_, + + /// ___ mp + _mp, + /// ___ if CX register is 0 + _cxz, + /// ___ if ECX register is 0 + _ecxz, + /// ___ if RCX register is 0 + _rcxz, + + /// ___ Addition + _a, + /// ___ Subtraction + _s, + /// ___ Multiply + _m, + /// ___ Division + _d, /// ___ Left _l, @@ -33,6 +107,8 @@ pub const Inst = struct { /// ___ Left Without Affecting Flags _lx, /// ___ Right + /// ___ For Reading + /// ___ Register _r, /// ___ Right Double _rd, @@ -45,7 +121,7 @@ pub const Inst = struct { //_r, /// ___ Above - _a, + //_a, /// ___ Above Or Equal _ae, /// ___ Below @@ -102,7 +178,7 @@ pub const Inst = struct { /// ___ Parity Odd _po, /// ___ Sign - _s, + //_s, /// ___ Zero _z, /// ___ Alignment Check Flag @@ -111,15 +187,18 @@ pub const Inst = struct { //_d, /// ___ Interrupt Flag _i, + /// ___ Task-Switched Flag In CR0 + _ts, /// ___ User Interrupt Flag _ui, /// ___ Byte //_b, /// ___ Word + /// ___ For Writing _w, /// ___ Doubleword - _d, + //_d, /// ___ QuadWord _q, @@ -214,8 +293,72 @@ pub const Inst = struct { /// Float ___ f_, + /// Float ___ +1.0 + /// Float ___ 1 + f_1, + /// Float ___ Below + f_b, + /// Float ___ Below Or Equal + f_be, + /// Float ___ Control Word + f_cw, + /// Float ___ Equal + f_e, + /// Float ___ Environment + f_env, + /// Float ___ log_2(e) + f_l2e, + /// Float ___ log_2(10) + f_l2t, + /// Float ___ log_10(2) + f_lg2, + /// Float ___ log_e(2) + f_ln2, + /// Float ___ Not Below + f_nb, + /// Float ___ Not Below Or Equal + f_nbe, + /// Float ___ Not Equal + f_ne, + /// Float ___ Not Unordered + f_nu, /// Float ___ Pop f_p, + /// Float ___ +1 + f_p1, + /// Float ___ π + f_pi, + /// Float ___ Pop Pop + f_pp, + /// Float ___ stack-top pointer + f_stp, + /// Float ___ Status Word + f_sw, + /// Float ___ Unordered + f_u, + /// Float ___ +0.0 + f_z, + /// Float BCD ___ + fb_, + /// Float BCD ___ Pop + fb_p, + /// Float And Integer ___ + fi_, + /// Float And Integer ___ Pop + fi_p, + /// Float No Wait ___ + fn_, + /// Float No Wait ___ Control Word + fn_cw, + /// Float No Wait ___ Environment + fn_env, + /// Float No Wait ___ status word + fn_sw, + + /// ___ in 32-bit and Compatibility Mode + _32, + /// ___ in 64-bit Mode + _64, /// Packed ___ p_, @@ -243,6 +386,24 @@ pub const Inst = struct { /// ___ Packed Double-Precision Values _pd, + /// ___ Internal Caches + //_d, + /// ___ TLB Entries + _lpg, + /// ___ Process-Context Identifier + _pcid, + + /// Load ___ + l_, + /// Memory ___ + m_, + /// Store ___ + s_, + /// Timed ___ + t_, + /// User Level Monitor ___ + um_, + /// VEX-Encoded ___ v_, /// VEX-Encoded ___ Byte @@ -282,6 +443,19 @@ pub const Inst = struct { /// VEX-Encoded ___ 128-Bits Of Floating-Point Data v_f128, + /// ___ 128-bit key with key locker + _128, + /// ___ 256-bit key with key locker + _256, + /// ___ with key locker using 128-bit key + _128kl, + /// ___ with key locker using 256-bit key + _256kl, + /// ___ with key locker on 8 blocks using 128-bit key + _wide128kl, + /// ___ with key locker on 8 blocks using 256-bit key + _wide256kl, + /// Mask ___ Byte k_b, /// Mask ___ Word @@ -300,6 +474,12 @@ pub const Inst = struct { }; pub const Tag = enum(u8) { + // General-purpose + /// ASCII adjust al after addition + /// ASCII adjust ax before division + /// ASCII adjust ax after multiply + /// ASCII adjust al after subtraction + aa, /// Add with carry adc, /// Add @@ -313,6 +493,8 @@ pub const Inst = struct { /// Bitwise logical and of packed single-precision floating-point values /// Bitwise logical and of packed double-precision floating-point values @"and", + /// Adjust RPL field of segment selector + arpl, /// Bit scan forward /// Bit scan reverse bs, @@ -324,6 +506,7 @@ pub const Inst = struct { /// Bit test and set bt, /// Call + /// Fast system call call, /// Convert byte to word cbw, @@ -331,12 +514,25 @@ pub const Inst = struct { cdq, /// Convert doubleword to quadword cdqe, + /// Clear AC flag in EFLAGS register /// Clear carry flag /// Clear direction flag /// Clear interrupt flag + /// Clear task-switched flag in CR0 + /// Clear user interrupt flag cl, + /// Cache line demote + cldemote, /// Flush cache line clflush, + /// Flush cache line optimized + clflushopt, + /// Clear busy flag in a supervisor shadow stack token + clrssbsy, + /// Cache line write back + clwb, + /// Complement carry flag + cmc, /// Conditional move cmov, /// Logical compare @@ -355,33 +551,79 @@ pub const Inst = struct { cwd, /// Convert word to doubleword cwde, + /// Decimal adjust AL after addition + /// Decimal adjust AL after subtraction + da, /// Decrement by 1 - dec, + /// Decrement shadow stack pointer + de, /// Unsigned division /// Signed division + /// Divide /// Divide packed single-precision floating-point values /// Divide scalar single-precision floating-point values /// Divide packed double-precision floating-point values /// Divide scalar double-precision floating-point values div, + /// Terminate and indirect branch in 32-bit and compatibility mode + /// Terminate and indirect branch in 64-bit mode + endbr, + /// Enqueue command + /// Enqueue command supervisor + enqcmd, + /// Make stack frame for procedure parameters + /// Fast system call + enter, + /// Fast return from fast system call + exit, + /// Load fence + /// Memory fence + /// Store fence + fence, + /// Halt + hlt, + /// History reset + hreset, + /// Input from port + /// Input from port to string /// Increment by 1 - inc, + /// Increment shadow stack pointer + in, /// Call to interrupt procedure - int3, + int, + /// Invalidate internal caches + /// Invalidate TLB entries + /// Invalidate process-context identifier + inv, /// Conditional jump - j, /// Jump - jmp, + j, + /// Load status flags into AH register + lahf, + /// Load access right byte + lar, /// Load effective address lea, + /// High level procedure exit + leave, + /// Load global descriptor table register + lgdt, + /// Load interrupt descriptor table register + lidt, + /// Load local descriptor table register + lldt, + /// Load machine status word + lmsw, /// Load string lod, - /// Load fence - lfence, + /// Loop according to ECX counter + loop, + /// Load segment limit + lsl, + /// Load task register + ltr, /// Count the number of leading zero bits lzcnt, - /// Memory fence - mfence, /// Move /// Move data from string to string /// Move scalar single-precision floating-point value @@ -407,6 +649,7 @@ pub const Inst = struct { /// Two's complement negation neg, /// No-op + /// No operation nop, /// One's complement negation not, @@ -414,39 +657,62 @@ pub const Inst = struct { /// Bitwise logical or of packed single-precision floating-point values /// Bitwise logical or of packed double-precision floating-point values @"or", + /// Output to port + /// Output string to port + out, /// Spin loop hint + /// Timed pause pause, /// Pop pop, /// Return the count of number of bits set to 1 popcnt, /// Pop stack into EFLAGS register - popfq, + popf, /// Push push, /// Push EFLAGS register onto the stack - pushfq, + pushf, /// Rotate left through carry /// Rotate right through carry rc, + /// Read FS segment base + /// Read GS segment base + /// Read from model specific register + /// Read processor ID + /// Read protection key rights for user pages + /// Read performance-monitoring counters + /// Read random number + /// Read random seed + /// Read shadow stack pointer + /// Read time-stamp counter + /// Read time-stamp counter and processor ID + rd, /// Return + /// Return from fast system call + /// Interrupt return + /// User-interrupt return ret, /// Rotate left /// Rotate right /// Rotate right logical without affecting flags ro, + /// Resume from system management mode + rsm, /// Arithmetic shift left /// Arithmetic shift right /// Shift left arithmetic without affecting flags sa, + /// Store AH into flags + sahf, /// Integer subtraction with borrow sbb, /// Scan string sca, + /// Send user interprocessor interrupt + senduipi, /// Set byte on condition set, - /// Store fence - sfence, /// Logical shift left /// Double precision shift left /// Logical shift right @@ -454,6 +720,12 @@ pub const Inst = struct { /// Shift left logical without affecting flags /// Shift right logical without affecting flags sh, + /// Store interrupt descriptor table register + sidt, + /// Store local descriptor table register + sldt, + /// Store machine status word + smsw, /// Subtract /// Subtract packed integers /// Subtract packed single-precision floating-point values @@ -464,46 +736,128 @@ pub const Inst = struct { /// Set carry flag /// Set direction flag /// Set interrupt flag + /// Store binary coded decimal integer and pop /// Store floating-point value + /// Store integer + /// Store x87 FPU control word + /// Store x87 FPU environment + /// Store x87 FPU status word + /// Store MXCSR register state st, /// Store string sto, - /// Syscall - syscall, + /// Swap GS base register + swapgs, /// Test condition @"test", - /// Count the number of trailing zero bits - tzcnt, /// Undefined instruction - ud2, + ud, + /// User level set up monitor address + umonitor, + /// Verify a segment for reading + /// Verify a segment for writing + ver, + /// Write to model specific register + /// Write to model specific register + /// Write to model specific register + wr, /// Exchange and add xadd, /// Exchange register/memory with register - xchg, + /// Exchange register contents + xch, /// Get value of extended control register xgetbv, + /// Table look-up translation + xlat, /// Logical exclusive-or /// Bitwise logical xor of packed single-precision floating-point values /// Bitwise logical xor of packed double-precision floating-point values xor, + // X87 + /// Compute 2^x-1 + @"2xm1", /// Absolute value abs, /// Change sign chs, + /// Clear exceptions + clex, + /// Compare floating-point values + com, + /// Compare floating-point values and set EFLAGS + /// Compare scalar ordered single-precision floating-point values + /// Compare scalar ordered double-precision floating-point values + comi, + /// Cosine + cos, + /// Decrement stack-top pointer + decstp, + /// Reverse divide + divr, /// Free floating-point register free, - /// Store integer with truncation - istt, + /// Increment stack-top pointer + incstp, + /// Initialize floating-point unit + init, + /// Load binary coded decimal integer /// Load floating-point value - ld, + /// Load integer + /// Load constant + /// Load x87 FPU control word /// Load x87 FPU environment - ldenv, - /// Store x87 FPU environment - nstenv, - /// Store x87 FPU environment - stenv, + /// Load MXCSR register state + ld, + /// Partial arctangent + patan, + /// Partial remainder + prem, + /// Partial tangent + ptan, + /// Round to integer + rndint, + /// Restore x87 FPU state + rstor, + /// Store x87 FPU state + save, + /// Scale + scale, + /// Sine + sin, + /// Sine and cosine + sincos, + /// Square root + /// Square root of packed single-precision floating-point values + /// Square root of scalar single-precision floating-point value + /// Square root of packed double-precision floating-point values + /// Square root of scalar double-precision floating-point value + sqrt, + /// Store integer with truncation + stt, + /// Reverse subtract + subr, + /// Test + tst, + /// Unordered compare floating-point values + ucom, + /// Unordered compare floating-point values and set EFLAGS + /// Unordered compare scalar single-precision floating-point values + /// Unordered compare scalar double-precision floating-point values + ucomi, + /// Wait + /// User level monitor wait + wait, + /// Examine floating-point + xam, + /// Extract exponent and significand + xtract, + /// Compute y * log2x + /// Compute y * log2(x + 1) + yl2x, + // MMX /// Pack with signed saturation ackssw, /// Pack with signed saturation @@ -514,6 +868,7 @@ pub const Inst = struct { adds, /// Add packed unsigned integers with unsigned saturation addus, + /// Logical and not /// Bitwise logical and not of packed single-precision floating-point values /// Bitwise logical and not of packed double-precision floating-point values andn, @@ -521,18 +876,8 @@ pub const Inst = struct { cmpeq, /// Compare packed data for greater than cmpgt, - /// Maximum of packed signed integers - maxs, - /// Maximum of packed unsigned integers - maxu, - /// Minimum of packed signed integers - mins, - /// Minimum of packed unsigned integers - minu, - /// Move byte mask - /// Extract packed single precision floating-point sign mask - /// Extract packed double precision floating-point sign mask - movmsk, + /// Empty MMX technology state + emms, /// Multiply packed signed integers and store low result mull, /// Multiply packed signed integers and store high result @@ -547,12 +892,20 @@ pub const Inst = struct { subs, /// Subtract packed unsigned integers with unsigned saturation subus, + /// Unpack high data + unpckhbw, + /// Unpack high data + unpckhdq, + /// Unpack high data + unpckhwd, + /// Unpack low data + unpcklbw, + /// Unpack low data + unpckldq, + /// Unpack low data + unpcklwd, - /// Load MXCSR register - ldmxcsr, - /// Store MXCSR register state - stmxcsr, - + // SSE /// Convert packed doubleword integers to packed single-precision floating-point values /// Convert packed doubleword integers to packed double-precision floating-point values cvtpi2, @@ -567,17 +920,38 @@ pub const Inst = struct { cvttps2pi, /// Convert with truncation scalar single-precision floating-point value to doubleword integer cvttss2si, - + /// Extract byte + /// Extract word + /// Extract doubleword + /// Extract quadword + extr, + /// Restore x87 FPU, MMX, XMM, and MXCSR state + fxrstor, + /// Save x87 FPU, MMX technology, and MXCSR state + fxsave, + /// Insert byte + /// Insert word + /// Insert doubleword + /// Insert quadword + insr, /// Maximum of packed single-precision floating-point values /// Maximum of scalar single-precision floating-point values /// Maximum of packed double-precision floating-point values /// Maximum of scalar double-precision floating-point values max, + /// Maximum of packed signed integers + maxs, + /// Maximum of packed unsigned integers + maxu, /// Minimum of packed single-precision floating-point values /// Minimum of scalar single-precision floating-point values /// Minimum of packed double-precision floating-point values /// Minimum of scalar double-precision floating-point values min, + /// Minimum of packed signed integers + mins, + /// Minimum of packed unsigned integers + minu, /// Move aligned packed single-precision floating-point values /// Move aligned packed double-precision floating-point values mova, @@ -591,27 +965,18 @@ pub const Inst = struct { movl, /// Move packed single-precision floating-point values low to high movlh, + /// Move byte mask + /// Extract packed single precision floating-point sign mask + /// Extract packed double precision floating-point sign mask + movmsk, /// Move unaligned packed single-precision floating-point values /// Move unaligned packed double-precision floating-point values movu, - /// Extract byte - /// Extract word - /// Extract doubleword - /// Extract quadword - extr, - /// Insert byte - /// Insert word - /// Insert doubleword - /// Insert quadword - insr, - /// Square root of packed single-precision floating-point values - /// Square root of scalar single-precision floating-point value - /// Square root of packed double-precision floating-point values - /// Square root of scalar double-precision floating-point value - sqrt, - /// Unordered compare scalar single-precision floating-point values - /// Unordered compare scalar double-precision floating-point values - ucomi, + /// Packed interleave shuffle of quadruplets of single-precision floating-point values + /// Packed interleave shuffle of pairs of double-precision floating-point values + /// Shuffle packed doublewords + /// Shuffle packed words + shuf, /// Unpack and interleave high packed single-precision floating-point values /// Unpack and interleave high packed double-precision floating-point values unpckh, @@ -619,6 +984,7 @@ pub const Inst = struct { /// Unpack and interleave low packed double-precision floating-point values unpckl, + // SSE2 /// Convert packed doubleword integers to packed single-precision floating-point values /// Convert packed doubleword integers to packed double-precision floating-point values cvtdq2, @@ -646,32 +1012,28 @@ pub const Inst = struct { cvttps2dq, /// Convert with truncation scalar double-precision floating-point value to doubleword integer cvttsd2si, - /// Packed interleave shuffle of quadruplets of single-precision floating-point values - /// Packed interleave shuffle of pairs of double-precision floating-point values - /// Shuffle packed doublewords - /// Shuffle packed words - shuf, + /// Galois field affine transformation inverse + gf2p8affineinvq, + /// Galois field affine transformation + gf2p8affineq, + /// Galois field multiply bytes + gf2p8mul, /// Shuffle packed high words shufh, /// Shuffle packed low words shufl, /// Unpack high data - unpckhbw, - /// Unpack high data - unpckhdq, - /// Unpack high data unpckhqdq, - /// Unpack high data - unpckhwd, - /// Unpack low data - unpcklbw, - /// Unpack low data - unpckldq, /// Unpack low data unpcklqdq, - /// Unpack low data - unpcklwd, + // SSE3 + /// Packed single-precision floating-point add/subtract + /// Packed double-precision floating-point add/subtract + addsub, + /// Packed single-precision floating-point horizontal add + /// Packed double-precision floating-point horizontal add + hadd, /// Replicate double floating-point values movddup, /// Replicate single floating-point values @@ -679,9 +1041,11 @@ pub const Inst = struct { /// Replicate single floating-point values movsldup, + // SSSE3 /// Packed align right alignr, + // SSE4.1 /// Pack with unsigned saturation ackusd, /// Blend packed single-precision floating-point values @@ -694,6 +1058,9 @@ pub const Inst = struct { /// Variable blend packed double-precision floating-point values /// Variable blend scalar double-precision floating-point values blendv, + /// Dot product of packed single-precision floating-point values + /// Dot product of packed double-precision floating-point values + dp, /// Extract packed floating-point values /// Extract packed integer values extract, @@ -714,14 +1081,28 @@ pub const Inst = struct { /// Round scalar double-precision floating-point value round, + // SSE4.2 + /// Accumulate CRC32 value + crc32, + + // PCLMUL /// Carry-less multiplication quadword clmulq, + // AES /// Perform one round of an AES decryption flow + /// Perform ten rounds of AES decryption flow with key locker using 128-bit key + /// Perform ten rounds of AES decryption flow with key locker using 256-bit key + /// Perform ten rounds of AES decryption flow with key locker on 8 blocks using 128-bit key + /// Perform ten rounds of AES decryption flow with key locker on 8 blocks using 256-bit key aesdec, /// Perform last round of an AES decryption flow aesdeclast, /// Perform one round of an AES encryption flow + /// Perform ten rounds of AES encryption flow with key locker using 128-bit key + /// Perform ten rounds of AES encryption flow with key locker using 256-bit key + /// Perform ten rounds of AES encryption flow with key locker on 8 blocks using 128-bit key + /// Perform ten rounds of AES encryption flow with key locker on 8 blocks using 256-bit key aesenc, /// Perform last round of an AES encryption flow aesenclast, @@ -730,22 +1111,42 @@ pub const Inst = struct { /// AES round key generation assist aeskeygenassist, + // SHA + /// Perform four rounds of SHA1 operation + sha1rnds, + /// Calculate SHA1 state variable E after four rounds + sha1nexte, + /// Perform an intermediate calculation for the next four SHA1 message dwords + /// Perform a final calculation for the next four SHA1 message dwords + sha1msg, /// Perform an intermediate calculation for the next four SHA256 message dwords - sha256msg1, /// Perform a final calculation for the next four SHA256 message dwords - sha256msg2, + sha256msg, /// Perform two rounds of SHA256 operation - sha256rnds2, + sha256rnds, + // AVX + /// Bit field extract + bextr, + /// Extract lowest set isolated bit + /// Get mask up to lowest set bit + /// Reset lowest set bit + bls, /// Load with broadcast floating-point data /// Load integer and broadcast broadcast, + /// Zero high bits starting with specified bit position + bzhi, + /// Count the number of trailing zero bits + tzcnt, + // F16C /// Convert 16-bit floating-point values to single-precision floating-point values cvtph2, /// Convert single-precision floating-point values to 16-bit floating-point values cvtps2ph, + // FMA /// Fused multiply-add of packed single-precision floating-point values /// Fused multiply-add of scalar single-precision floating-point values /// Fused multiply-add of packed double-precision floating-point values @@ -762,6 +1163,19 @@ pub const Inst = struct { /// Fused multiply-add of scalar double-precision floating-point values fmadd231, + // ADX + /// Unsigned integer addition of two operands with carry flag + adcx, + /// Unsigned integer addition of two operands with overflow flag + adox, + + // AESKLE + /// Encode 128-bit key with key locker + /// Encode 256-bit key with key locker + encodekey, + /// Load internal wrapping key with key locker + loadiwkey, + /// A pseudo instruction that requires special lowering. /// This should be the only tag in this enum that doesn't /// directly correspond to one or more instruction mnemonics. @@ -804,11 +1218,17 @@ pub const Inst = struct { /// Uses `ri` payload with `i` index of extra data of type `Imm64`. ri_64, /// Immediate (sign-extended) operand. - /// Uses `imm` payload. + /// Uses `i` payload. i_s, /// Immediate (unsigned) operand. - /// Uses `imm` payload. + /// Uses `i` payload. i_u, + /// Immediate (word), immediate (byte) operands. + /// Uses `ii` payload. + ii, + /// Immediate (byte), register operands. + /// Uses `ri` payload. + ir, /// Relative displacement operand. /// Uses `reloc` payload. rel, @@ -1036,6 +1456,11 @@ pub const Inst = struct { fixes: Fixes = ._, i: u32, }, + ii: struct { + fixes: Fixes = ._, + i1: u16, + i2: u8, + }, r: struct { fixes: Fixes = ._, r1: Register, @@ -1244,7 +1669,7 @@ pub const Memory = struct { size: bits.Memory.Size, index: Register, scale: bits.Memory.Scale, - _: u15 = undefined, + _: u14 = undefined, }; pub fn encode(mem: bits.Memory) Memory { diff --git a/src/arch/x86_64/bits.zig b/src/arch/x86_64/bits.zig index 8f13620730..1ef9653386 100644 --- a/src/arch/x86_64/bits.zig +++ b/src/arch/x86_64/bits.zig @@ -177,7 +177,7 @@ pub const Condition = enum(u5) { } }; -pub const Register = enum(u7) { +pub const Register = enum(u8) { // zig fmt: off rax, rcx, rdx, rbx, rsp, rbp, rsi, rdi, r8, r9, r10, r11, r12, r13, r14, r15, @@ -207,6 +207,12 @@ pub const Register = enum(u7) { rip, eip, ip, + cr0, cr1, cr2, cr3, cr4, cr5, cr6, cr7, + cr8, cr9, cr10, cr11, cr12, cr13, cr14, cr15, + + dr0, dr1, dr2, dr3, dr4, dr5, dr6, dr7, + dr8, dr9, dr10, dr11, dr12, dr13, dr14, dr15, + none, // zig fmt: on @@ -217,6 +223,8 @@ pub const Register = enum(u7) { mmx, sse, ip, + cr, + dr, }; pub fn class(reg: Register) Class { @@ -235,13 +243,15 @@ pub const Register = enum(u7) { @intFromEnum(Register.es) ... @intFromEnum(Register.gs) => .segment, @intFromEnum(Register.rip) ... @intFromEnum(Register.ip) => .ip, + @intFromEnum(Register.cr0) ... @intFromEnum(Register.cr15) => .cr, + @intFromEnum(Register.dr0) ... @intFromEnum(Register.dr15) => .dr, else => unreachable, // zig fmt: on }; } - pub fn id(reg: Register) u6 { + pub fn id(reg: Register) u7 { const base = switch (@intFromEnum(reg)) { // zig fmt: off @intFromEnum(Register.rax) ... @intFromEnum(Register.r15) => @intFromEnum(Register.rax), @@ -254,8 +264,9 @@ pub const Register = enum(u7) { @intFromEnum(Register.xmm0) ... @intFromEnum(Register.xmm15) => @intFromEnum(Register.xmm0) - 16, @intFromEnum(Register.mm0) ... @intFromEnum(Register.mm7) => @intFromEnum(Register.mm0) - 32, @intFromEnum(Register.st0) ... @intFromEnum(Register.st7) => @intFromEnum(Register.st0) - 40, - @intFromEnum(Register.es) ... @intFromEnum(Register.gs) => @intFromEnum(Register.es) - 48, + @intFromEnum(Register.cr0) ... @intFromEnum(Register.cr15) => @intFromEnum(Register.cr0) - 54, + @intFromEnum(Register.dr0) ... @intFromEnum(Register.dr15) => @intFromEnum(Register.dr0) - 70, else => unreachable, // zig fmt: on @@ -279,6 +290,9 @@ pub const Register = enum(u7) { @intFromEnum(Register.es) ... @intFromEnum(Register.gs) => 16, + @intFromEnum(Register.cr0) ... @intFromEnum(Register.cr15) => 64, + @intFromEnum(Register.dr0) ... @intFromEnum(Register.dr15) => 64, + else => unreachable, // zig fmt: on }; @@ -295,6 +309,9 @@ pub const Register = enum(u7) { @intFromEnum(Register.ymm8) ... @intFromEnum(Register.ymm15) => true, @intFromEnum(Register.xmm8) ... @intFromEnum(Register.xmm15) => true, + @intFromEnum(Register.cr8) ... @intFromEnum(Register.cr15) => true, + @intFromEnum(Register.dr8) ... @intFromEnum(Register.dr15) => true, + else => false, // zig fmt: on }; @@ -316,6 +333,9 @@ pub const Register = enum(u7) { @intFromEnum(Register.es) ... @intFromEnum(Register.gs) => @intFromEnum(Register.es), + @intFromEnum(Register.cr0) ... @intFromEnum(Register.cr15) => @intFromEnum(Register.cr0), + @intFromEnum(Register.dr0) ... @intFromEnum(Register.dr15) => @intFromEnum(Register.dr0), + else => unreachable, // zig fmt: on }; @@ -397,6 +417,7 @@ pub const Register = enum(u7) { .mmx => 41 + @as(u6, reg.enc()), .segment => 50 + @as(u6, reg.enc()), .ip => 16, + .cr, .dr => unreachable, }; } }; diff --git a/src/arch/x86_64/encoder.zig b/src/arch/x86_64/encoder.zig index b7449c2146..ce61310406 100644 --- a/src/arch/x86_64/encoder.zig +++ b/src/arch/x86_64/encoder.zig @@ -389,6 +389,7 @@ pub const Instruction = struct { const enc = inst.encoding; const data = enc.data; + try inst.encodeWait(encoder); if (data.mode.isVex()) { try inst.encodeVexPrefix(encoder); const opc = inst.encoding.opcode(); @@ -404,19 +405,24 @@ pub const Instruction = struct { .z, .o, .zo, .oz => {}, .i, .d => try encodeImm(inst.ops[0].imm, data.ops[0], encoder), .zi, .oi => try encodeImm(inst.ops[1].imm, data.ops[1], encoder), + .ii => { + try encodeImm(inst.ops[0].imm, data.ops[0], encoder); + try encodeImm(inst.ops[1].imm, data.ops[1], encoder); + }, .fd => try encoder.imm64(inst.ops[1].mem.moffs.offset), .td => try encoder.imm64(inst.ops[0].mem.moffs.offset), else => { - const mem_op = switch (data.op_en) { + const mem_op: Operand = switch (data.op_en) { + .ia => .{ .reg = .eax }, .m, .mi, .m1, .mc, .mr, .mri, .mrc, .mvr => inst.ops[0], - .rm, .rmi, .rm0, .vmi, .rmv => inst.ops[1], + .rm, .rmi, .rm0, .vm, .vmi, .rmv => inst.ops[1], .rvm, .rvmr, .rvmi => inst.ops[2], else => unreachable, }; switch (mem_op) { .reg => |reg| { const rm = switch (data.op_en) { - .m, .mi, .m1, .mc, .vmi => enc.modRmExt(), + .ia, .m, .mi, .m1, .mc, .vm, .vmi => enc.modRmExt(), .mr, .mri, .mrc => inst.ops[1].reg.lowEnc(), .rm, .rmi, .rm0, .rvm, .rvmr, .rvmi, .rmv => inst.ops[0].reg.lowEnc(), .mvr => inst.ops[2].reg.lowEnc(), @@ -426,7 +432,7 @@ pub const Instruction = struct { }, .mem => |mem| { const op = switch (data.op_en) { - .m, .mi, .m1, .mc, .vmi => .none, + .m, .mi, .m1, .mc, .vm, .vmi => .none, .mr, .mri, .mrc => inst.ops[1], .rm, .rmi, .rm0, .rvm, .rvmr, .rvmi, .rmv => inst.ops[0], .mvr => inst.ops[2], @@ -438,6 +444,7 @@ pub const Instruction = struct { } switch (data.op_en) { + .ia => try encodeImm(inst.ops[0].imm, data.ops[0], encoder), .mi => try encodeImm(inst.ops[1].imm, data.ops[1], encoder), .rmi, .mri, .vmi => try encodeImm(inst.ops[2].imm, data.ops[2], encoder), .rvmr => try encoder.imm8(@as(u8, inst.ops[3].reg.enc()) << 4), @@ -460,6 +467,13 @@ pub const Instruction = struct { } } + fn encodeWait(inst: Instruction, encoder: anytype) !void { + switch (inst.encoding.data.mode) { + .wait => try encoder.opcode_1byte(0x9b), + else => {}, + } + } + fn encodeLegacyPrefixes(inst: Instruction, encoder: anytype) !void { const enc = inst.encoding; const data = enc.data; @@ -481,7 +495,7 @@ pub const Instruction = struct { } const segment_override: ?Register = switch (op_en) { - .z, .i, .zi, .o, .zo, .oz, .oi, .d => null, + .z, .i, .zi, .ii, .ia, .o, .zo, .oz, .oi, .d => null, .fd => inst.ops[1].mem.base().reg, .td => inst.ops[0].mem.base().reg, .rm, .rmi, .rm0 => if (inst.ops[1].isSegmentRegister()) @@ -500,7 +514,7 @@ pub const Instruction = struct { } else null, - .vmi, .rvm, .rvmr, .rvmi, .mvr, .rmv => unreachable, + .vm, .vmi, .rvm, .rvmr, .rvmi, .mvr, .rmv => unreachable, }; if (segment_override) |seg| { legacy.setSegmentOverride(seg); @@ -517,7 +531,7 @@ pub const Instruction = struct { rex.w = inst.encoding.data.mode == .long; switch (op_en) { - .z, .i, .zi, .fd, .td, .d => {}, + .z, .i, .zi, .ii, .ia, .fd, .td, .d => {}, .o, .oz, .oi => rex.b = inst.ops[0].reg.isExtended(), .zo => rex.b = inst.ops[1].reg.isExtended(), .m, .mi, .m1, .mc, .mr, .rm, .rmi, .mri, .mrc, .rm0, .rmv => { @@ -536,7 +550,7 @@ pub const Instruction = struct { rex.b = b_x_op.isBaseExtended(); rex.x = b_x_op.isIndexExtended(); }, - .vmi, .rvm, .rvmr, .rvmi, .mvr => unreachable, + .vm, .vmi, .rvm, .rvmr, .rvmi, .mvr => unreachable, } try encoder.rex(rex); @@ -552,21 +566,19 @@ pub const Instruction = struct { vex.w = inst.encoding.data.mode.isLong(); switch (op_en) { - .z, .i, .zi, .fd, .td, .d => {}, - .o, .oz, .oi => vex.b = inst.ops[0].reg.isExtended(), - .zo => vex.b = inst.ops[1].reg.isExtended(), - .m, .mi, .m1, .mc, .mr, .rm, .rmi, .mri, .mrc, .rm0, .vmi, .rvm, .rvmr, .rvmi, .mvr, .rmv => { + .z, .i, .zi, .ii, .ia, .fd, .td, .d, .o, .oz, .oi, .zo => unreachable, + .m, .mi, .m1, .mc, .mr, .rm, .rmi, .mri, .mrc, .rm0, .vm, .vmi, .rvm, .rvmr, .rvmi, .mvr, .rmv => { const r_op = switch (op_en) { .rm, .rmi, .rm0, .rvm, .rvmr, .rvmi, .rmv => inst.ops[0], .mr, .mri, .mrc => inst.ops[1], .mvr => inst.ops[2], - .m, .mi, .m1, .mc, .vmi => .none, + .m, .mi, .m1, .mc, .vm, .vmi => .none, else => unreachable, }; vex.r = r_op.isBaseExtended(); const b_x_op = switch (op_en) { - .rm, .rmi, .rm0, .vmi, .rmv => inst.ops[1], + .rm, .rmi, .rm0, .vm, .vmi, .rmv => inst.ops[1], .m, .mi, .m1, .mc, .mr, .mri, .mrc, .mvr => inst.ops[0], .rvm, .rvmr, .rvmi => inst.ops[2], else => unreachable, @@ -595,7 +607,7 @@ pub const Instruction = struct { switch (op_en) { else => {}, - .vmi => vex.v = inst.ops[0].reg, + .vm, .vmi => vex.v = inst.ops[0].reg, .rvm, .rvmr, .rvmi => vex.v = inst.ops[1].reg, .rmv => vex.v = inst.ops[2].reg, } diff --git a/src/arch/x86_64/encodings.zig b/src/arch/x86_64/encodings.zig index fcb500d0bf..389317d2ae 100644 --- a/src/arch/x86_64/encodings.zig +++ b/src/arch/x86_64/encodings.zig @@ -13,6 +13,16 @@ pub const Entry = struct { Mnemonic, OpEn, []const Op, []const u8, modrm_ext, Mo // zig fmt: off pub const table = [_]Entry{ // General-purpose + .{ .aaa, .z, &.{}, &.{ 0x37 }, 0, .none, .@"32bit" }, + + .{ .aad, .z, &.{ }, &.{ 0xd5, 0x0a }, 0, .none, .@"32bit" }, + .{ .aad, .zi, &.{ .imm8 }, &.{ 0xd5 }, 0, .none, .@"32bit" }, + + .{ .aam, .z, &.{ }, &.{ 0xd4, 0x0a }, 0, .none, .@"32bit" }, + .{ .aam, .z, &.{ .imm8 }, &.{ 0xd4 }, 0, .none, .@"32bit" }, + + .{ .aas, .z, &.{}, &.{ 0x3f }, 0, .none, .@"32bit" }, + .{ .adc, .zi, &.{ .al, .imm8 }, &.{ 0x14 }, 0, .none, .none }, .{ .adc, .zi, &.{ .ax, .imm16 }, &.{ 0x15 }, 0, .short, .none }, .{ .adc, .zi, &.{ .eax, .imm32 }, &.{ 0x15 }, 0, .none, .none }, @@ -82,6 +92,11 @@ pub const table = [_]Entry{ .{ .@"and", .rm, &.{ .r32, .rm32 }, &.{ 0x23 }, 0, .none, .none }, .{ .@"and", .rm, &.{ .r64, .rm64 }, &.{ 0x23 }, 0, .long, .none }, + .{ .arpl, .mr, &.{ .rm16, .r16 }, &.{ 0x63 }, 0, .none, .@"32bit" }, + + .{ .bound, .rm, &.{ .r16, .m }, &.{ 0x62 }, 0, .short, .@"32bit" }, + .{ .bound, .rm, &.{ .r32, .m }, &.{ 0x62 }, 0, .short, .@"32bit" }, + .{ .bsf, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0xbc }, 0, .short, .none }, .{ .bsf, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0xbc }, 0, .none, .none }, .{ .bsf, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0xbc }, 0, .long, .none }, @@ -122,15 +137,12 @@ pub const table = [_]Entry{ .{ .bts, .mi, &.{ .rm64, .imm8 }, &.{ 0x0f, 0xba }, 5, .long, .none }, .{ .call, .d, &.{ .rel32 }, &.{ 0xe8 }, 0, .none, .none }, - .{ .call, .m, &.{ .rm64 }, &.{ 0xff }, 2, .none, .none }, + .{ .call, .m, &.{ .rm32 }, &.{ 0xff }, 2, .none, .@"32bit" }, + .{ .call, .m, &.{ .rm64 }, &.{ 0xff }, 2, .none, .@"64bit" }, - .{ .cbw, .z, &.{ .o16 }, &.{ 0x98 }, 0, .short, .none }, - .{ .cwde, .z, &.{ .o32 }, &.{ 0x98 }, 0, .none, .none }, - .{ .cdqe, .z, &.{ .o64 }, &.{ 0x98 }, 0, .long, .none }, - - .{ .cwd, .z, &.{ .o16 }, &.{ 0x99 }, 0, .short, .none }, - .{ .cdq, .z, &.{ .o32 }, &.{ 0x99 }, 0, .none, .none }, - .{ .cqo, .z, &.{ .o64 }, &.{ 0x99 }, 0, .long, .none }, + .{ .cbw, .z, &.{}, &.{ 0x98 }, 0, .short, .none }, + .{ .cwde, .z, &.{}, &.{ 0x98 }, 0, .none, .none }, + .{ .cdqe, .z, &.{}, &.{ 0x98 }, 0, .long, .none }, .{ .clac, .z, &.{}, &.{ 0x0f, 0x01, 0xca }, 0, .none, .smap }, @@ -138,14 +150,24 @@ pub const table = [_]Entry{ .{ .cld, .z, &.{}, &.{ 0xfc }, 0, .none, .none }, + .{ .cldemote, .m, &.{ .m8 }, &.{ 0x0f, 0x1c }, 0, .none, .cldemote }, + .{ .clflush, .m, &.{ .m8 }, &.{ 0x0f, 0xae }, 7, .none, .none }, + .{ .clflushopt, .m, &.{ .m8 }, &.{ 0x66, 0x0f, 0xae }, 7, .none, .clflushopt }, + .{ .cli, .z, &.{}, &.{ 0xfa }, 0, .none, .none }, + .{ .clrssbsy, .m, &.{ .m64 }, &.{ 0xf3, 0x0f, 0xae }, 6, .none, .shstk }, + .{ .clts, .z, &.{}, &.{ 0x0f, 0x06 }, 0, .none, .none }, .{ .clui, .z, &.{}, &.{ 0xf3, 0x0f, 0x01, 0xee }, 0, .none, .uintr }, + .{ .clwb, .m, &.{ .m8 }, &.{ 0x66, 0x0f, 0xae }, 6, .none, .clwb }, + + .{ .cmc, .z, &.{}, &.{ 0xf5 }, 0, .none, .none }, + .{ .cmova, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x47 }, 0, .short, .cmov }, .{ .cmova, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x47 }, 0, .none, .cmov }, .{ .cmova, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x47 }, 0, .long, .cmov }, @@ -264,11 +286,10 @@ pub const table = [_]Entry{ .{ .cmps, .z, &.{ .m16, .m16 }, &.{ 0xa7 }, 0, .short, .none }, .{ .cmps, .z, &.{ .m32, .m32 }, &.{ 0xa7 }, 0, .none, .none }, .{ .cmps, .z, &.{ .m64, .m64 }, &.{ 0xa7 }, 0, .long, .none }, - - .{ .cmpsb, .z, &.{}, &.{ 0xa6 }, 0, .none, .none }, - .{ .cmpsw, .z, &.{}, &.{ 0xa7 }, 0, .short, .none }, - .{ .cmpsd, .z, &.{}, &.{ 0xa7 }, 0, .none, .none }, - .{ .cmpsq, .z, &.{}, &.{ 0xa7 }, 0, .long, .none }, + .{ .cmpsb, .z, &.{ }, &.{ 0xa6 }, 0, .none, .none }, + .{ .cmpsw, .z, &.{ }, &.{ 0xa7 }, 0, .short, .none }, + .{ .cmpsd, .z, &.{ }, &.{ 0xa7 }, 0, .none, .none }, + .{ .cmpsq, .z, &.{ }, &.{ 0xa7 }, 0, .long, .none }, .{ .cmpxchg, .mr, &.{ .rm8, .r8 }, &.{ 0x0f, 0xb0 }, 0, .none, .none }, .{ .cmpxchg, .mr, &.{ .rm8, .r8 }, &.{ 0x0f, 0xb0 }, 0, .rex, .none }, @@ -281,6 +302,14 @@ pub const table = [_]Entry{ .{ .cpuid, .z, &.{}, &.{ 0x0f, 0xa2 }, 0, .none, .none }, + .{ .cwd, .z, &.{}, &.{ 0x99 }, 0, .short, .none }, + .{ .cdq, .z, &.{}, &.{ 0x99 }, 0, .none, .none }, + .{ .cqo, .z, &.{}, &.{ 0x99 }, 0, .long, .none }, + + .{ .daa, .z, &.{}, &.{ 0x27 }, 0, .none, .@"32bit" }, + + .{ .das, .z, &.{}, &.{ 0x27 }, 0, .none, .@"32bit" }, + .{ .dec, .m, &.{ .rm8 }, &.{ 0xfe }, 1, .none, .none }, .{ .dec, .m, &.{ .rm8 }, &.{ 0xfe }, 1, .rex, .none }, .{ .dec, .m, &.{ .rm16 }, &.{ 0xff }, 1, .short, .none }, @@ -293,26 +322,50 @@ pub const table = [_]Entry{ .{ .div, .m, &.{ .rm32 }, &.{ 0xf7 }, 6, .none, .none }, .{ .div, .m, &.{ .rm64 }, &.{ 0xf7 }, 6, .long, .none }, + .{ .endbr32, .z, &.{}, &.{ 0xf3, 0x0f, 0x1e, 0xfb }, 0, .none, .none }, + + .{ .endbr64, .z, &.{}, &.{ 0xf3, 0x0f, 0x1e, 0xfa }, 0, .none, .none }, + + .{ .enqcmd, .rm, &.{ .r32, .m }, &.{ 0xf2, 0x0f, 0x38, 0xf8 }, 0, .none, .enqcmd }, + .{ .enqcmd, .rm, &.{ .r64, .m }, &.{ 0xf2, 0x0f, 0x38, 0xf8 }, 0, .none, .enqcmd }, + + .{ .enqcmds, .rm, &.{ .r32, .m }, &.{ 0xf3, 0x0f, 0x38, 0xf8 }, 0, .none, .enqcmd }, + .{ .enqcmds, .rm, &.{ .r64, .m }, &.{ 0xf3, 0x0f, 0x38, 0xf8 }, 0, .none, .enqcmd }, + + .{ .enter, .ii, &.{ .imm16, .imm8 }, &.{ 0xc8 }, 0, .none, .none }, + + .{ .hlt, .z, &.{}, &.{ 0xf4 }, 0, .none, .none }, + + .{ .hreset, .ia, &.{ .imm8 }, &.{ 0xf3, 0x0f, 0x3a, 0xf0 }, 0, .none, .hreset }, + .{ .hreset, .ia, &.{ .imm8, .eax }, &.{ 0xf3, 0x0f, 0x3a, 0xf0 }, 0, .none, .hreset }, + .{ .idiv, .m, &.{ .rm8 }, &.{ 0xf6 }, 7, .none, .none }, .{ .idiv, .m, &.{ .rm8 }, &.{ 0xf6 }, 7, .rex, .none }, .{ .idiv, .m, &.{ .rm16 }, &.{ 0xf7 }, 7, .short, .none }, .{ .idiv, .m, &.{ .rm32 }, &.{ 0xf7 }, 7, .none, .none }, .{ .idiv, .m, &.{ .rm64 }, &.{ 0xf7 }, 7, .long, .none }, - .{ .imul, .m, &.{ .rm8 }, &.{ 0xf6 }, 5, .none, .none }, - .{ .imul, .m, &.{ .rm8 }, &.{ 0xf6 }, 5, .rex, .none }, - .{ .imul, .m, &.{ .rm16, }, &.{ 0xf7 }, 5, .short, .none }, - .{ .imul, .m, &.{ .rm32, }, &.{ 0xf7 }, 5, .none, .none }, - .{ .imul, .m, &.{ .rm64, }, &.{ 0xf7 }, 5, .long, .none }, + .{ .imul, .m, &.{ .rm8 }, &.{ 0xf6 }, 5, .none, .none }, + .{ .imul, .m, &.{ .rm8 }, &.{ 0xf6 }, 5, .rex, .none }, + .{ .imul, .m, &.{ .rm16, }, &.{ 0xf7 }, 5, .short, .none }, + .{ .imul, .m, &.{ .rm32, }, &.{ 0xf7 }, 5, .none, .none }, + .{ .imul, .m, &.{ .rm64, }, &.{ 0xf7 }, 5, .long, .none }, .{ .imul, .rm, &.{ .r16, .rm16, }, &.{ 0x0f, 0xaf }, 0, .short, .none }, .{ .imul, .rm, &.{ .r32, .rm32, }, &.{ 0x0f, 0xaf }, 0, .none, .none }, .{ .imul, .rm, &.{ .r64, .rm64, }, &.{ 0x0f, 0xaf }, 0, .long, .none }, - .{ .imul, .rmi, &.{ .r16, .rm16, .imm8s }, &.{ 0x6b }, 0, .short, .none }, - .{ .imul, .rmi, &.{ .r32, .rm32, .imm8s }, &.{ 0x6b }, 0, .none, .none }, - .{ .imul, .rmi, &.{ .r64, .rm64, .imm8s }, &.{ 0x6b }, 0, .long, .none }, - .{ .imul, .rmi, &.{ .r16, .rm16, .imm16 }, &.{ 0x69 }, 0, .short, .none }, - .{ .imul, .rmi, &.{ .r32, .rm32, .imm32 }, &.{ 0x69 }, 0, .none, .none }, - .{ .imul, .rmi, &.{ .r64, .rm64, .imm32 }, &.{ 0x69 }, 0, .long, .none }, + .{ .imul, .rmi, &.{ .r16, .rm16, .imm8s }, &.{ 0x6b }, 0, .short, .none }, + .{ .imul, .rmi, &.{ .r32, .rm32, .imm8s }, &.{ 0x6b }, 0, .none, .none }, + .{ .imul, .rmi, &.{ .r64, .rm64, .imm8s }, &.{ 0x6b }, 0, .long, .none }, + .{ .imul, .rmi, &.{ .r16, .rm16, .imm16 }, &.{ 0x69 }, 0, .short, .none }, + .{ .imul, .rmi, &.{ .r32, .rm32, .imm32 }, &.{ 0x69 }, 0, .none, .none }, + .{ .imul, .rmi, &.{ .r64, .rm64, .imm32 }, &.{ 0x69 }, 0, .long, .none }, + + .{ .in, .zi, &.{ .al, .imm8 }, &.{ 0xe4 }, 0, .none, .none }, + .{ .in, .zi, &.{ .ax, .imm8 }, &.{ 0xe5 }, 0, .short, .none }, + .{ .in, .zi, &.{ .eax, .imm8 }, &.{ 0xe5 }, 0, .none, .none }, + .{ .in, .z, &.{ .al, .dx }, &.{ 0xec }, 0, .none, .none }, + .{ .in, .z, &.{ .ax, .dx }, &.{ 0xed }, 0, .short, .none }, + .{ .in, .z, &.{ .eax, .dx }, &.{ 0xed }, 0, .none, .none }, .{ .inc, .m, &.{ .rm8 }, &.{ 0xfe }, 0, .none, .none }, .{ .inc, .m, &.{ .rm8 }, &.{ 0xfe }, 0, .rex, .none }, @@ -320,58 +373,108 @@ pub const table = [_]Entry{ .{ .inc, .m, &.{ .rm32 }, &.{ 0xff }, 0, .none, .none }, .{ .inc, .m, &.{ .rm64 }, &.{ 0xff }, 0, .long, .none }, - .{ .int3, .z, &.{}, &.{ 0xcc }, 0, .none, .none }, + .{ .incsspd, .m, &.{ .r32 }, &.{ 0xf3, 0x0f, 0xae }, 5, .none, .shstk }, + .{ .incsspq, .m, &.{ .r64 }, &.{ 0xf3, 0x0f, 0xae }, 5, .long, .shstk }, - .{ .ja, .d, &.{ .rel32 }, &.{ 0x0f, 0x87 }, 0, .none, .none }, - .{ .jae, .d, &.{ .rel32 }, &.{ 0x0f, 0x83 }, 0, .none, .none }, - .{ .jb, .d, &.{ .rel32 }, &.{ 0x0f, 0x82 }, 0, .none, .none }, - .{ .jbe, .d, &.{ .rel32 }, &.{ 0x0f, 0x86 }, 0, .none, .none }, - .{ .jc, .d, &.{ .rel32 }, &.{ 0x0f, 0x82 }, 0, .none, .none }, - .{ .jrcxz, .d, &.{ .rel32 }, &.{ 0xe3 }, 0, .none, .none }, - .{ .je, .d, &.{ .rel32 }, &.{ 0x0f, 0x84 }, 0, .none, .none }, - .{ .jg, .d, &.{ .rel32 }, &.{ 0x0f, 0x8f }, 0, .none, .none }, - .{ .jge, .d, &.{ .rel32 }, &.{ 0x0f, 0x8d }, 0, .none, .none }, - .{ .jl, .d, &.{ .rel32 }, &.{ 0x0f, 0x8c }, 0, .none, .none }, - .{ .jle, .d, &.{ .rel32 }, &.{ 0x0f, 0x8e }, 0, .none, .none }, - .{ .jna, .d, &.{ .rel32 }, &.{ 0x0f, 0x86 }, 0, .none, .none }, - .{ .jnae, .d, &.{ .rel32 }, &.{ 0x0f, 0x82 }, 0, .none, .none }, - .{ .jnb, .d, &.{ .rel32 }, &.{ 0x0f, 0x83 }, 0, .none, .none }, - .{ .jnbe, .d, &.{ .rel32 }, &.{ 0x0f, 0x87 }, 0, .none, .none }, - .{ .jnc, .d, &.{ .rel32 }, &.{ 0x0f, 0x83 }, 0, .none, .none }, - .{ .jne, .d, &.{ .rel32 }, &.{ 0x0f, 0x85 }, 0, .none, .none }, - .{ .jng, .d, &.{ .rel32 }, &.{ 0x0f, 0x8e }, 0, .none, .none }, - .{ .jnge, .d, &.{ .rel32 }, &.{ 0x0f, 0x8c }, 0, .none, .none }, - .{ .jnl, .d, &.{ .rel32 }, &.{ 0x0f, 0x8d }, 0, .none, .none }, - .{ .jnle, .d, &.{ .rel32 }, &.{ 0x0f, 0x8f }, 0, .none, .none }, - .{ .jno, .d, &.{ .rel32 }, &.{ 0x0f, 0x81 }, 0, .none, .none }, - .{ .jnp, .d, &.{ .rel32 }, &.{ 0x0f, 0x8b }, 0, .none, .none }, - .{ .jns, .d, &.{ .rel32 }, &.{ 0x0f, 0x89 }, 0, .none, .none }, - .{ .jnz, .d, &.{ .rel32 }, &.{ 0x0f, 0x85 }, 0, .none, .none }, - .{ .jo, .d, &.{ .rel32 }, &.{ 0x0f, 0x80 }, 0, .none, .none }, - .{ .jp, .d, &.{ .rel32 }, &.{ 0x0f, 0x8a }, 0, .none, .none }, - .{ .jpe, .d, &.{ .rel32 }, &.{ 0x0f, 0x8a }, 0, .none, .none }, - .{ .jpo, .d, &.{ .rel32 }, &.{ 0x0f, 0x8b }, 0, .none, .none }, - .{ .js, .d, &.{ .rel32 }, &.{ 0x0f, 0x88 }, 0, .none, .none }, - .{ .jz, .d, &.{ .rel32 }, &.{ 0x0f, 0x84 }, 0, .none, .none }, + .{ .ins, .z, &.{ .m8, .dx }, &.{ 0x6c }, 0, .none, .none }, + .{ .ins, .z, &.{ .m16, .dx }, &.{ 0x6d }, 0, .short, .none }, + .{ .ins, .z, &.{ .m32, .dx }, &.{ 0x6d }, 0, .none, .none }, + .{ .insb, .z, &.{ }, &.{ 0x6c }, 0, .none, .none }, + .{ .insw, .z, &.{ }, &.{ 0x6d }, 0, .short, .none }, + .{ .insd, .z, &.{ }, &.{ 0x6d }, 0, .none, .none }, + + .{ .int3, .z, &.{ }, &.{ 0xcc }, 0, .none, .none }, + .{ .int, .i, &.{ .imm8 }, &.{ 0xcd }, 0, .none, .none }, + .{ .into, .z, &.{ }, &.{ 0xce }, 0, .none, .@"32bit" }, + .{ .int1, .z, &.{ }, &.{ 0xf1 }, 0, .none, .none }, + + .{ .invd, .z, &.{}, &.{ 0x0f, 0x08 }, 0, .none, .none }, + + .{ .invlpg, .m, &.{ .m }, &.{ 0x0f, 0x01 }, 7, .none, .none }, + + .{ .invpcid, .rm, &.{ .r32, .m128 }, &.{ 0x66, 0x0f, 0x38, 0x82 }, 0, .none, .@"invpcid 32bit" }, + .{ .invpcid, .rm, &.{ .r64, .m128 }, &.{ 0x66, 0x0f, 0x38, 0x82 }, 0, .none, .@"invpcid 64bit" }, + + .{ .iretw, .z, &.{}, &.{ 0xcf }, 0, .short, .none }, + .{ .iretd, .z, &.{}, &.{ 0xcf }, 0, .none, .none }, + .{ .iret, .z, &.{}, &.{ 0xcf }, 0, .none, .none }, + .{ .iretq, .z, &.{}, &.{ 0xcf }, 0, .long, .none }, + + .{ .ja, .d, &.{ .rel32 }, &.{ 0x0f, 0x87 }, 0, .none, .none }, + .{ .jae, .d, &.{ .rel32 }, &.{ 0x0f, 0x83 }, 0, .none, .none }, + .{ .jb, .d, &.{ .rel32 }, &.{ 0x0f, 0x82 }, 0, .none, .none }, + .{ .jbe, .d, &.{ .rel32 }, &.{ 0x0f, 0x86 }, 0, .none, .none }, + .{ .jc, .d, &.{ .rel32 }, &.{ 0x0f, 0x82 }, 0, .none, .none }, + .{ .jcxz, .d, &.{ .rel32 }, &.{ 0xe3 }, 0, .short, .@"32bit" }, + .{ .jecxz, .d, &.{ .rel32 }, &.{ 0xe3 }, 0, .none, .@"32bit" }, + .{ .jrcxz, .d, &.{ .rel32 }, &.{ 0xe3 }, 0, .none, .@"64bit" }, + .{ .je, .d, &.{ .rel32 }, &.{ 0x0f, 0x84 }, 0, .none, .none }, + .{ .jg, .d, &.{ .rel32 }, &.{ 0x0f, 0x8f }, 0, .none, .none }, + .{ .jge, .d, &.{ .rel32 }, &.{ 0x0f, 0x8d }, 0, .none, .none }, + .{ .jl, .d, &.{ .rel32 }, &.{ 0x0f, 0x8c }, 0, .none, .none }, + .{ .jle, .d, &.{ .rel32 }, &.{ 0x0f, 0x8e }, 0, .none, .none }, + .{ .jna, .d, &.{ .rel32 }, &.{ 0x0f, 0x86 }, 0, .none, .none }, + .{ .jnae, .d, &.{ .rel32 }, &.{ 0x0f, 0x82 }, 0, .none, .none }, + .{ .jnb, .d, &.{ .rel32 }, &.{ 0x0f, 0x83 }, 0, .none, .none }, + .{ .jnbe, .d, &.{ .rel32 }, &.{ 0x0f, 0x87 }, 0, .none, .none }, + .{ .jnc, .d, &.{ .rel32 }, &.{ 0x0f, 0x83 }, 0, .none, .none }, + .{ .jne, .d, &.{ .rel32 }, &.{ 0x0f, 0x85 }, 0, .none, .none }, + .{ .jng, .d, &.{ .rel32 }, &.{ 0x0f, 0x8e }, 0, .none, .none }, + .{ .jnge, .d, &.{ .rel32 }, &.{ 0x0f, 0x8c }, 0, .none, .none }, + .{ .jnl, .d, &.{ .rel32 }, &.{ 0x0f, 0x8d }, 0, .none, .none }, + .{ .jnle, .d, &.{ .rel32 }, &.{ 0x0f, 0x8f }, 0, .none, .none }, + .{ .jno, .d, &.{ .rel32 }, &.{ 0x0f, 0x81 }, 0, .none, .none }, + .{ .jnp, .d, &.{ .rel32 }, &.{ 0x0f, 0x8b }, 0, .none, .none }, + .{ .jns, .d, &.{ .rel32 }, &.{ 0x0f, 0x89 }, 0, .none, .none }, + .{ .jnz, .d, &.{ .rel32 }, &.{ 0x0f, 0x85 }, 0, .none, .none }, + .{ .jo, .d, &.{ .rel32 }, &.{ 0x0f, 0x80 }, 0, .none, .none }, + .{ .jp, .d, &.{ .rel32 }, &.{ 0x0f, 0x8a }, 0, .none, .none }, + .{ .jpe, .d, &.{ .rel32 }, &.{ 0x0f, 0x8a }, 0, .none, .none }, + .{ .jpo, .d, &.{ .rel32 }, &.{ 0x0f, 0x8b }, 0, .none, .none }, + .{ .js, .d, &.{ .rel32 }, &.{ 0x0f, 0x88 }, 0, .none, .none }, + .{ .jz, .d, &.{ .rel32 }, &.{ 0x0f, 0x84 }, 0, .none, .none }, .{ .jmp, .d, &.{ .rel32 }, &.{ 0xe9 }, 0, .none, .none }, .{ .jmp, .m, &.{ .rm64 }, &.{ 0xff }, 4, .none, .none }, + .{ .lahf, .z, &.{}, &.{ 0x9f }, 0, .none, .sahf }, + + .{ .lar, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x02 }, 0, .none, .none }, + .{ .lar, .rm, &.{ .r32, .r32_m16 }, &.{ 0x0f, 0x02 }, 0, .none, .none }, + .{ .lea, .rm, &.{ .r16, .m }, &.{ 0x8d }, 0, .short, .none }, .{ .lea, .rm, &.{ .r32, .m }, &.{ 0x8d }, 0, .none, .none }, .{ .lea, .rm, &.{ .r64, .m }, &.{ 0x8d }, 0, .long, .none }, + .{ .leave, .z, &.{}, &.{ 0xc9 }, 0, .none, .none }, + .{ .lfence, .z, &.{}, &.{ 0x0f, 0xae, 0xe8 }, 0, .none, .none }, + .{ .lgdt, .m, &.{ .m }, &.{ 0x0f, 0x01 }, 2, .none, .none }, + .{ .lidt, .m, &.{ .m }, &.{ 0x0f, 0x01 }, 3, .none, .none }, + + .{ .lldt, .m, &.{ .rm16 }, &.{ 0x0f, 0x00 }, 2, .none, .none }, + + .{ .lmsw, .m, &.{ .rm16 }, &.{ 0x0f, 0x01 }, 6, .none, .none }, + .{ .lods, .z, &.{ .m8 }, &.{ 0xac }, 0, .none, .none }, .{ .lods, .z, &.{ .m16 }, &.{ 0xad }, 0, .short, .none }, .{ .lods, .z, &.{ .m32 }, &.{ 0xad }, 0, .none, .none }, .{ .lods, .z, &.{ .m64 }, &.{ 0xad }, 0, .long, .none }, + .{ .lodsb, .z, &.{ }, &.{ 0xac }, 0, .none, .none }, + .{ .lodsw, .z, &.{ }, &.{ 0xad }, 0, .short, .none }, + .{ .lodsd, .z, &.{ }, &.{ 0xad }, 0, .none, .none }, + .{ .lodsq, .z, &.{ }, &.{ 0xad }, 0, .long, .none }, - .{ .lodsb, .z, &.{}, &.{ 0xac }, 0, .none, .none }, - .{ .lodsw, .z, &.{}, &.{ 0xad }, 0, .short, .none }, - .{ .lodsd, .z, &.{}, &.{ 0xad }, 0, .none, .none }, - .{ .lodsq, .z, &.{}, &.{ 0xad }, 0, .long, .none }, + .{ .loop, .d, &.{ .rel8 }, &.{ 0xe2 }, 0, .none, .none }, + .{ .loope, .d, &.{ .rel8 }, &.{ 0xe1 }, 0, .none, .none }, + .{ .loopne, .d, &.{ .rel8 }, &.{ 0xe0 }, 0, .none, .none }, + + .{ .lsl, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x03 }, 0, .none, .none }, + .{ .lsl, .rm, &.{ .r32, .r32_m16 }, &.{ 0x0f, 0x03 }, 0, .none, .none }, + .{ .lsl, .rm, &.{ .r64, .r32_m16 }, &.{ 0x0f, 0x03 }, 0, .none, .none }, + + .{ .ltr, .m, &.{ .rm16 }, &.{ 0x0f, 0x00 }, 3, .none, .none }, .{ .lzcnt, .rm, &.{ .r16, .rm16 }, &.{ 0xf3, 0x0f, 0xbd }, 0, .short, .lzcnt }, .{ .lzcnt, .rm, &.{ .r32, .rm32 }, &.{ 0xf3, 0x0f, 0xbd }, 0, .none, .lzcnt }, @@ -414,6 +517,16 @@ pub const table = [_]Entry{ .{ .mov, .mi, &.{ .rm32, .imm32 }, &.{ 0xc7 }, 0, .none, .none }, .{ .mov, .mi, &.{ .rm64, .imm32s }, &.{ 0xc7 }, 0, .long, .none }, + .{ .mov, .mr, &.{ .r32, .cr }, &.{ 0x0f, 0x20 }, 0, .none, .@"32bit" }, + .{ .mov, .mr, &.{ .r64, .cr }, &.{ 0x0f, 0x20 }, 0, .none, .@"64bit" }, + .{ .mov, .rm, &.{ .cr, .r32 }, &.{ 0x0f, 0x22 }, 0, .none, .@"32bit" }, + .{ .mov, .rm, &.{ .cr, .r64 }, &.{ 0x0f, 0x22 }, 0, .none, .@"64bit" }, + + .{ .mov, .mr, &.{ .r32, .dr }, &.{ 0x0f, 0x21 }, 0, .none, .@"32bit" }, + .{ .mov, .mr, &.{ .r64, .dr }, &.{ 0x0f, 0x21 }, 0, .none, .@"64bit" }, + .{ .mov, .rm, &.{ .dr, .r32 }, &.{ 0x0f, 0x23 }, 0, .none, .@"32bit" }, + .{ .mov, .rm, &.{ .dr, .r64 }, &.{ 0x0f, 0x23 }, 0, .none, .@"64bit" }, + .{ .movbe, .rm, &.{ .r16, .m16 }, &.{ 0x0f, 0x38, 0xf0 }, 0, .short, .movbe }, .{ .movbe, .rm, &.{ .r32, .m32 }, &.{ 0x0f, 0x38, 0xf0 }, 0, .none, .movbe }, .{ .movbe, .rm, &.{ .r64, .m64 }, &.{ 0x0f, 0x38, 0xf0 }, 0, .long, .movbe }, @@ -425,11 +538,10 @@ pub const table = [_]Entry{ .{ .movs, .z, &.{ .m16, .m16 }, &.{ 0xa5 }, 0, .short, .none }, .{ .movs, .z, &.{ .m32, .m32 }, &.{ 0xa5 }, 0, .none, .none }, .{ .movs, .z, &.{ .m64, .m64 }, &.{ 0xa5 }, 0, .long, .none }, - - .{ .movsb, .z, &.{}, &.{ 0xa4 }, 0, .none, .none }, - .{ .movsw, .z, &.{}, &.{ 0xa5 }, 0, .short, .none }, - .{ .movsd, .z, &.{}, &.{ 0xa5 }, 0, .none, .none }, - .{ .movsq, .z, &.{}, &.{ 0xa5 }, 0, .long, .none }, + .{ .movsb, .z, &.{ }, &.{ 0xa4 }, 0, .none, .none }, + .{ .movsw, .z, &.{ }, &.{ 0xa5 }, 0, .short, .none }, + .{ .movsd, .z, &.{ }, &.{ 0xa5 }, 0, .none, .none }, + .{ .movsq, .z, &.{ }, &.{ 0xa5 }, 0, .long, .none }, .{ .movsx, .rm, &.{ .r16, .rm8 }, &.{ 0x0f, 0xbe }, 0, .short, .none }, .{ .movsx, .rm, &.{ .r16, .rm8 }, &.{ 0x0f, 0xbe }, 0, .rex_short, .none }, @@ -441,8 +553,8 @@ pub const table = [_]Entry{ .{ .movsx, .rm, &.{ .r64, .rm16 }, &.{ 0x0f, 0xbf }, 0, .long, .none }, // This instruction is discouraged. - .{ .movsxd, .rm, &.{ .r32, .rm32 }, &.{ 0x63 }, 0, .none, .none }, - .{ .movsxd, .rm, &.{ .r64, .rm32 }, &.{ 0x63 }, 0, .long, .none }, + .{ .movsxd, .rm, &.{ .r32, .rm32 }, &.{ 0x63 }, 0, .none, .@"64bit" }, + .{ .movsxd, .rm, &.{ .r64, .rm32 }, &.{ 0x63 }, 0, .long, .@"64bit" }, .{ .movzx, .rm, &.{ .r16, .rm8 }, &.{ 0x0f, 0xb6 }, 0, .short, .none }, .{ .movzx, .rm, &.{ .r16, .rm8 }, &.{ 0x0f, 0xb6 }, 0, .rex_short, .none }, @@ -496,6 +608,20 @@ pub const table = [_]Entry{ .{ .@"or", .rm, &.{ .r32, .rm32 }, &.{ 0x0b }, 0, .none, .none }, .{ .@"or", .rm, &.{ .r64, .rm64 }, &.{ 0x0b }, 0, .long, .none }, + .{ .out, .zi, &.{ .imm8, .al }, &.{ 0xe6 }, 0, .none, .none }, + .{ .out, .zi, &.{ .imm8, .ax }, &.{ 0xe7 }, 0, .short, .none }, + .{ .out, .zi, &.{ .imm8, .eax }, &.{ 0xe7 }, 0, .none, .none }, + .{ .out, .z, &.{ .dx, .al }, &.{ 0xee }, 0, .none, .none }, + .{ .out, .z, &.{ .dx, .ax }, &.{ 0xef }, 0, .short, .none }, + .{ .out, .z, &.{ .dx, .eax }, &.{ 0xef }, 0, .none, .none }, + + .{ .outs, .z, &.{ .dx, .m8 }, &.{ 0x6e }, 0, .none, .none }, + .{ .outs, .z, &.{ .dx, .m16 }, &.{ 0x6f }, 0, .short, .none }, + .{ .outs, .z, &.{ .dx, .m32 }, &.{ 0x6f }, 0, .none, .none }, + .{ .outsb, .z, &.{ }, &.{ 0x6e }, 0, .none, .none }, + .{ .outsw, .z, &.{ }, &.{ 0x6f }, 0, .short, .none }, + .{ .outsd, .z, &.{ }, &.{ 0x6f }, 0, .none, .none }, + .{ .pause, .z, &.{}, &.{ 0xf3, 0x90 }, 0, .none, .none }, .{ .pop, .o, &.{ .r16 }, &.{ 0x58 }, 0, .short, .none }, @@ -507,7 +633,9 @@ pub const table = [_]Entry{ .{ .popcnt, .rm, &.{ .r32, .rm32 }, &.{ 0xf3, 0x0f, 0xb8 }, 0, .none, .popcnt }, .{ .popcnt, .rm, &.{ .r64, .rm64 }, &.{ 0xf3, 0x0f, 0xb8 }, 0, .long, .popcnt }, - .{ .popfq, .z, &.{}, &.{ 0x9d }, 0, .none, .none }, + .{ .popf, .z, &.{}, &.{ 0x9d }, 0, .short, .none }, + .{ .popfd, .z, &.{}, &.{ 0x9d }, 0, .none, .@"32bit" }, + .{ .popfq, .z, &.{}, &.{ 0x9d }, 0, .none, .@"64bit" }, .{ .push, .o, &.{ .r16 }, &.{ 0x50 }, 0, .short, .none }, .{ .push, .o, &.{ .r64 }, &.{ 0x50 }, 0, .none, .none }, @@ -553,6 +681,35 @@ pub const table = [_]Entry{ .{ .rcr, .mi, &.{ .rm32, .imm8 }, &.{ 0xc1 }, 3, .none, .none }, .{ .rcr, .mi, &.{ .rm64, .imm8 }, &.{ 0xc1 }, 3, .long, .none }, + .{ .rdfsbase, .m, &.{ .r32 }, &.{ 0xf3 ,0x0f, 0xae }, 0, .none, .fsgsbase }, + .{ .rdfsbase, .m, &.{ .r64 }, &.{ 0xf3 ,0x0f, 0xae }, 0, .long, .fsgsbase }, + .{ .rdgsbase, .m, &.{ .r32 }, &.{ 0xf3 ,0x0f, 0xae }, 1, .none, .fsgsbase }, + .{ .rdgsbase, .m, &.{ .r64 }, &.{ 0xf3 ,0x0f, 0xae }, 1, .long, .fsgsbase }, + + .{ .rdmsr, .z, &.{}, &.{ 0x0f, 0x32 }, 0, .none, .none }, + + .{ .rdpid, .m, &.{ .r32 }, &.{ 0xf3, 0x0f, 0xc7 }, 7, .none, .@"rdpid 32bit" }, + .{ .rdpid, .m, &.{ .r64 }, &.{ 0xf3, 0x0f, 0xc7 }, 7, .none, .@"rdpid 64bit" }, + + .{ .rdpkru, .z, &.{}, &.{ 0x0f, 0x01, 0xee }, 0, .none, .pku }, + + .{ .rdpmc, .z, &.{}, &.{ 0x0f, 0x33 }, 0, .none, .none }, + + .{ .rdrand, .m, &.{ .r16 }, &.{ 0x0f, 0xc7 }, 6, .short, .rdrnd }, + .{ .rdrand, .m, &.{ .r32 }, &.{ 0x0f, 0xc7 }, 6, .none, .rdrnd }, + .{ .rdrand, .m, &.{ .r64 }, &.{ 0x0f, 0xc7 }, 6, .long, .rdrnd }, + + .{ .rdseed, .m, &.{ .r16 }, &.{ 0x0f, 0xc7 }, 7, .short, .rdseed }, + .{ .rdseed, .m, &.{ .r32 }, &.{ 0x0f, 0xc7 }, 7, .none, .rdseed }, + .{ .rdseed, .m, &.{ .r64 }, &.{ 0x0f, 0xc7 }, 7, .long, .rdseed }, + + .{ .rdssd, .m, &.{ .r32 }, &.{ 0xf3, 0x0f, 0x1e }, 1, .none, .shstk }, + .{ .rdssq, .m, &.{ .r64 }, &.{ 0xf3, 0x0f, 0x1e }, 1, .long, .shstk }, + + .{ .rdtsc, .z, &.{}, &.{ 0x0f, 0x31 }, 0, .none, .none }, + + .{ .rdtscp, .z, &.{}, &.{ 0x0f, 0x01, 0xf9 }, 0, .none, .none }, + .{ .rol, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 0, .none, .none }, .{ .rol, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 0, .rex, .none }, .{ .rol, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 0, .none, .none }, @@ -585,6 +742,10 @@ pub const table = [_]Entry{ .{ .ror, .mi, &.{ .rm32, .imm8 }, &.{ 0xc1 }, 1, .none, .none }, .{ .ror, .mi, &.{ .rm64, .imm8 }, &.{ 0xc1 }, 1, .long, .none }, + .{ .rsm, .z, &.{}, &.{ 0x0f, 0xaa }, 0, .none, .none }, + + .{ .sahf, .z, &.{}, &.{ 0x9e }, 0, .none, .sahf }, + .{ .sal, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 4, .none, .none }, .{ .sal, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 4, .rex, .none }, .{ .sal, .m1, &.{ .rm16, .unity }, &.{ 0xd1 }, 4, .short, .none }, @@ -644,11 +805,14 @@ pub const table = [_]Entry{ .{ .scas, .z, &.{ .m16 }, &.{ 0xaf }, 0, .short, .none }, .{ .scas, .z, &.{ .m32 }, &.{ 0xaf }, 0, .none, .none }, .{ .scas, .z, &.{ .m64 }, &.{ 0xaf }, 0, .long, .none }, + .{ .scasb, .z, &.{ }, &.{ 0xae }, 0, .none, .none }, + .{ .scasw, .z, &.{ }, &.{ 0xaf }, 0, .short, .none }, + .{ .scasd, .z, &.{ }, &.{ 0xaf }, 0, .none, .none }, + .{ .scasq, .z, &.{ }, &.{ 0xaf }, 0, .long, .none }, - .{ .scasb, .z, &.{}, &.{ 0xae }, 0, .none, .none }, - .{ .scasw, .z, &.{}, &.{ 0xaf }, 0, .short, .none }, - .{ .scasd, .z, &.{}, &.{ 0xaf }, 0, .none, .none }, - .{ .scasq, .z, &.{}, &.{ 0xaf }, 0, .long, .none }, + .{ .senduipi, .m, &.{ .r64 }, &.{ 0xf3, 0x0f, 0xc7 }, 6, .none, .uintr }, + + .{ .serialize, .z, &.{}, &.{ 0x0f, 0x01, 0xe8 }, 0, .none, .serialize }, .{ .seta, .m, &.{ .rm8 }, &.{ 0x0f, 0x97 }, 0, .none, .none }, .{ .seta, .m, &.{ .rm8 }, &.{ 0x0f, 0x97 }, 0, .rex, .none }, @@ -713,6 +877,14 @@ pub const table = [_]Entry{ .{ .sfence, .z, &.{}, &.{ 0x0f, 0xae, 0xf8 }, 0, .none, .none }, + .{ .sidt, .m, &.{ .m }, &.{ 0x0f, 0x01 }, 1, .none, .none }, + + .{ .sldt, .m, &.{ .rm16 }, &.{ 0x0f, 0x00 }, 0, .none, .none }, + + .{ .smsw, .m, &.{ .rm16 }, &.{ 0x0f, 0x01 }, 4, .short, .none }, + .{ .smsw, .m, &.{ .r32_m16 }, &.{ 0x0f, 0x01 }, 4, .none, .none }, + .{ .smsw, .m, &.{ .r64_m16 }, &.{ 0x0f, 0x01 }, 4, .long, .none }, + .{ .shl, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 4, .none, .none }, .{ .shl, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 4, .rex, .none }, .{ .shl, .m1, &.{ .rm16, .unity }, &.{ 0xd1 }, 4, .short, .none }, @@ -767,17 +939,18 @@ pub const table = [_]Entry{ .{ .sti, .z, &.{}, &.{ 0xfb }, 0, .none, .none }, + .{ .str, .m, &.{ .rm16 }, &.{ 0x0f, 0x00 }, 1, .none, .none }, + .{ .stui, .z, &.{}, &.{ 0xf3, 0x0f, 0x01, 0xef }, 0, .none, .uintr }, .{ .stos, .z, &.{ .m8 }, &.{ 0xaa }, 0, .none, .none }, .{ .stos, .z, &.{ .m16 }, &.{ 0xab }, 0, .short, .none }, .{ .stos, .z, &.{ .m32 }, &.{ 0xab }, 0, .none, .none }, .{ .stos, .z, &.{ .m64 }, &.{ 0xab }, 0, .long, .none }, - - .{ .stosb, .z, &.{}, &.{ 0xaa }, 0, .none, .none }, - .{ .stosw, .z, &.{}, &.{ 0xab }, 0, .short, .none }, - .{ .stosd, .z, &.{}, &.{ 0xab }, 0, .none, .none }, - .{ .stosq, .z, &.{}, &.{ 0xab }, 0, .long, .none }, + .{ .stosb, .z, &.{ }, &.{ 0xaa }, 0, .none, .none }, + .{ .stosw, .z, &.{ }, &.{ 0xab }, 0, .short, .none }, + .{ .stosd, .z, &.{ }, &.{ 0xab }, 0, .none, .none }, + .{ .stosq, .z, &.{ }, &.{ 0xab }, 0, .long, .none }, .{ .sub, .zi, &.{ .al, .imm8 }, &.{ 0x2c }, 0, .none, .none }, .{ .sub, .zi, &.{ .ax, .imm16 }, &.{ 0x2d }, 0, .short, .none }, @@ -802,7 +975,17 @@ pub const table = [_]Entry{ .{ .sub, .rm, &.{ .r32, .rm32 }, &.{ 0x2b }, 0, .none, .none }, .{ .sub, .rm, &.{ .r64, .rm64 }, &.{ 0x2b }, 0, .long, .none }, - .{ .syscall, .z, &.{}, &.{ 0x0f, 0x05 }, 0, .none, .none }, + .{ .swapgs, .z, &.{}, &.{ 0x0f, 0x01, 0xf8 }, 0, .none, .@"64bit" }, + + .{ .syscall, .z, &.{}, &.{ 0x0f, 0x05 }, 0, .none, .@"64bit" }, + + .{ .sysenter, .z, &.{}, &.{ 0x0f, 0x34 }, 0, .none, .none }, + + .{ .sysexit, .z, &.{}, &.{ 0x0f, 0x35 }, 0, .none, .none }, + .{ .sysexit, .z, &.{}, &.{ 0x0f, 0x35 }, 0, .long, .none }, + + .{ .sysret, .z, &.{}, &.{ 0x0f, 0x37 }, 0, .none, .none }, + .{ .sysret, .z, &.{}, &.{ 0x0f, 0x37 }, 0, .long, .none }, .{ .@"test", .zi, &.{ .al, .imm8 }, &.{ 0xa8 }, 0, .none, .none }, .{ .@"test", .zi, &.{ .ax, .imm16 }, &.{ 0xa9 }, 0, .short, .none }, @@ -819,12 +1002,38 @@ pub const table = [_]Entry{ .{ .@"test", .mr, &.{ .rm32, .r32 }, &.{ 0x85 }, 0, .none, .none }, .{ .@"test", .mr, &.{ .rm64, .r64 }, &.{ 0x85 }, 0, .long, .none }, - .{ .tzcnt, .rm, &.{ .r16, .rm16 }, &.{ 0xf3, 0x0f, 0xbc }, 0, .short, .bmi }, - .{ .tzcnt, .rm, &.{ .r32, .rm32 }, &.{ 0xf3, 0x0f, 0xbc }, 0, .none, .bmi }, - .{ .tzcnt, .rm, &.{ .r64, .rm64 }, &.{ 0xf3, 0x0f, 0xbc }, 0, .long, .bmi }, + .{ .testui, .z, &.{}, &.{ 0xf3, 0x0f, 0x01, 0xed }, 0, .none, .uintr }, + .{ .tpause, .m, &.{ .r32 }, &.{ 0x66, 0x0f, 0xae }, 6, .none, .waitpkg }, + + .{ .ud0, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0xff }, 0, .none, .none }, + .{ .ud1, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0xb9 }, 0, .none, .none }, .{ .ud2, .z, &.{}, &.{ 0x0f, 0x0b }, 0, .none, .none }, + .{ .uiret, .z, &.{}, &.{ 0xf3, 0x0f, 0x01, 0xec }, 0, .none, .uintr }, + + .{ .umonitor, .m, &.{ .r64 }, &.{ 0xf3, 0x0f, 0xae }, 6, .none, .waitpkg }, + + .{ .umwait, .m, &.{ .r32 }, &.{ 0xf2, 0x0f, 0xae }, 6, .none, .waitpkg }, + + .{ .verr, .m, &.{ .rm16 }, &.{ 0x0f, 0x00 }, 4, .none, .none }, + .{ .verw, .m, &.{ .rm16 }, &.{ 0x0f, 0x00 }, 5, .none, .none }, + + .{ .wrfsbase, .m, &.{ .r32 }, &.{ 0xf3 ,0x0f, 0xae }, 2, .none, .fsgsbase }, + .{ .wrfsbase, .m, &.{ .r64 }, &.{ 0xf3 ,0x0f, 0xae }, 2, .long, .fsgsbase }, + .{ .wrgsbase, .m, &.{ .r32 }, &.{ 0xf3 ,0x0f, 0xae }, 3, .none, .fsgsbase }, + .{ .wrgsbase, .m, &.{ .r64 }, &.{ 0xf3 ,0x0f, 0xae }, 3, .long, .fsgsbase }, + + .{ .wrmsr, .z, &.{}, &.{ 0x0f, 0x30 }, 0, .none, .none }, + + .{ .wrpkru, .z, &.{}, &.{ 0x0f, 0x01, 0xef }, 0, .none, .pku }, + + .{ .wrssd, .mr, &.{ .m32, .r32 }, &.{ 0x0f, 0x38, 0xf6 }, 0, .none, .shstk }, + .{ .wrssq, .mr, &.{ .m64, .r64 }, &.{ 0x0f, 0x38, 0xf6 }, 0, .long, .shstk }, + + .{ .wrussd, .mr, &.{ .m32, .r32 }, &.{ 0x66, 0x0f, 0x38, 0xf5 }, 0, .none, .shstk }, + .{ .wrussq, .mr, &.{ .m64, .r64 }, &.{ 0x66, 0x0f, 0x38, 0xf5 }, 0, .long, .shstk }, + .{ .xadd, .mr, &.{ .rm8, .r8 }, &.{ 0x0f, 0xc0 }, 0, .none, .none }, .{ .xadd, .mr, &.{ .rm8, .r8 }, &.{ 0x0f, 0xc0 }, 0, .rex, .none }, .{ .xadd, .mr, &.{ .rm16, .r16 }, &.{ 0x0f, 0xc1 }, 0, .short, .none }, @@ -850,6 +1059,11 @@ pub const table = [_]Entry{ .{ .xgetbv, .z, &.{}, &.{ 0x0f, 0x01, 0xd0 }, 0, .none, .none }, + .{ .xlat, .z, &.{ .m8 }, &.{ 0xd7 }, 0, .none, .@"32bit" }, + .{ .xlat, .z, &.{ .m8 }, &.{ 0xd7 }, 0, .long, .@"64bit" }, + .{ .xlatb, .z, &.{ }, &.{ 0xd7 }, 0, .none, .@"32bit" }, + .{ .xlatb, .z, &.{ }, &.{ 0xd7 }, 0, .long, .@"64bit" }, + .{ .xor, .zi, &.{ .al, .imm8 }, &.{ 0x34 }, 0, .none, .none }, .{ .xor, .zi, &.{ .ax, .imm16 }, &.{ 0x35 }, 0, .short, .none }, .{ .xor, .zi, &.{ .eax, .imm32 }, &.{ 0x35 }, 0, .none, .none }, @@ -874,12 +1088,96 @@ pub const table = [_]Entry{ .{ .xor, .rm, &.{ .r64, .rm64 }, &.{ 0x33 }, 0, .long, .none }, // X87 + .{ .f2xm1, .z, &.{}, &.{ 0xd9, 0xf0 }, 0, .none, .x87 }, + .{ .fabs, .z, &.{}, &.{ 0xd9, 0xe1 }, 0, .none, .x87 }, + .{ .fadd, .m, &.{ .m32 }, &.{ 0xd8 }, 0, .none, .x87 }, + .{ .fadd, .m, &.{ .m64 }, &.{ 0xdc }, 0, .none, .x87 }, + .{ .fadd, .zo, &.{ .st0, .st }, &.{ 0xd8, 0xc0 }, 0, .none, .x87 }, + .{ .fadd, .oz, &.{ .st, .st0 }, &.{ 0xdc, 0xc0 }, 0, .none, .x87 }, + .{ .faddp, .oz, &.{ .st, .st0 }, &.{ 0xde, 0xc0 }, 0, .none, .x87 }, + .{ .faddp, .z, &.{ }, &.{ 0xde, 0xc1 }, 0, .none, .x87 }, + .{ .fiadd, .m, &.{ .m32 }, &.{ 0xda }, 0, .none, .x87 }, + .{ .fiadd, .m, &.{ .m16 }, &.{ 0xde }, 0, .none, .x87 }, + + .{ .fbld, .m, &.{ .m80 }, &.{ 0xdf }, 4, .none, .x87 }, + + .{ .fbstp, .m, &.{ .m80 }, &.{ 0xdf }, 6, .none, .x87 }, + .{ .fchs, .z, &.{}, &.{ 0xd9, 0xe0 }, 0, .none, .x87 }, + .{ .fclex, .z, &.{}, &.{ 0xdb, 0xe2 }, 0, .wait, .x87 }, + .{ .fnclex, .z, &.{}, &.{ 0xdb, 0xe2 }, 0, .none, .x87 }, + + .{ .fcmovb, .zo, &.{ .st0, .st }, &.{ 0xda, 0xc0 }, 0, .none, .@"cmov x87" }, + .{ .fcmove, .zo, &.{ .st0, .st }, &.{ 0xda, 0xc8 }, 0, .none, .@"cmov x87" }, + .{ .fcmovbe, .zo, &.{ .st0, .st }, &.{ 0xda, 0xd0 }, 0, .none, .@"cmov x87" }, + .{ .fcmovu, .zo, &.{ .st0, .st }, &.{ 0xda, 0xd8 }, 0, .none, .@"cmov x87" }, + .{ .fcmovnb, .zo, &.{ .st0, .st }, &.{ 0xdb, 0xc0 }, 0, .none, .@"cmov x87" }, + .{ .fcmovne, .zo, &.{ .st0, .st }, &.{ 0xdb, 0xc8 }, 0, .none, .@"cmov x87" }, + .{ .fcmovnbe, .zo, &.{ .st0, .st }, &.{ 0xdb, 0xd0 }, 0, .none, .@"cmov x87" }, + .{ .fcmovnu, .zo, &.{ .st0, .st }, &.{ 0xdb, 0xd8 }, 0, .none, .@"cmov x87" }, + + .{ .fcom, .m, &.{ .m32 }, &.{ 0xd8 }, 2, .none, .x87 }, + .{ .fcom, .m, &.{ .m64 }, &.{ 0xdc }, 2, .none, .x87 }, + .{ .fcom, .o, &.{ .st }, &.{ 0xd8, 0xd0 }, 0, .none, .x87 }, + .{ .fcom, .z, &.{ }, &.{ 0xd8, 0xd1 }, 0, .none, .x87 }, + .{ .fcomp, .m, &.{ .m32 }, &.{ 0xd8 }, 3, .none, .x87 }, + .{ .fcomp, .m, &.{ .m64 }, &.{ 0xdc }, 3, .none, .x87 }, + .{ .fcomp, .o, &.{ .st }, &.{ 0xd8, 0xd8 }, 0, .none, .x87 }, + .{ .fcomp, .z, &.{ }, &.{ 0xd8, 0xd9 }, 0, .none, .x87 }, + .{ .fcompp, .z, &.{ }, &.{ 0xde, 0xd9 }, 0, .none, .x87 }, + + .{ .fcomi, .zo, &.{ .st0, .st }, &.{ 0xdb, 0xf0 }, 0, .none, .x87 }, + .{ .fcomip, .zo, &.{ .st0, .st }, &.{ 0xdf, 0xf0 }, 0, .none, .x87 }, + .{ .fucomi, .zo, &.{ .st0, .st }, &.{ 0xdb, 0xe8 }, 0, .none, .x87 }, + .{ .fucomip, .zo, &.{ .st0, .st }, &.{ 0xdf, 0xe8 }, 0, .none, .x87 }, + + .{ .fcos, .z, &.{}, &.{ 0xd9, 0xff }, 0, .none, .x87 }, + + .{ .fdecstp, .z, &.{}, &.{ 0xd9, 0xf6 }, 0, .none, .x87 }, + + .{ .fdiv, .m, &.{ .m32 }, &.{ 0xd8 }, 6, .none, .x87 }, + .{ .fdiv, .m, &.{ .m64 }, &.{ 0xdc }, 6, .none, .x87 }, + .{ .fdiv, .zo, &.{ .st0, .st }, &.{ 0xd8, 0xf0 }, 0, .none, .x87 }, + .{ .fdiv, .oz, &.{ .st, .st0 }, &.{ 0xdc, 0xf8 }, 0, .none, .x87 }, + .{ .fdivp, .oz, &.{ .st, .st0 }, &.{ 0xde, 0xf8 }, 0, .none, .x87 }, + .{ .fdivp, .z, &.{ }, &.{ 0xde, 0xf9 }, 0, .none, .x87 }, + .{ .fidiv, .m, &.{ .m32 }, &.{ 0xda }, 6, .none, .x87 }, + .{ .fidiv, .m, &.{ .m16 }, &.{ 0xde }, 6, .none, .x87 }, + + .{ .fdivr, .m, &.{ .m32 }, &.{ 0xd8 }, 7, .none, .x87 }, + .{ .fdivr, .m, &.{ .m64 }, &.{ 0xdc }, 7, .none, .x87 }, + .{ .fdivr, .zo, &.{ .st0, .st }, &.{ 0xd8, 0xf8 }, 0, .none, .x87 }, + .{ .fdivr, .oz, &.{ .st, .st0 }, &.{ 0xdc, 0xf0 }, 0, .none, .x87 }, + .{ .fdivrp, .oz, &.{ .st, .st0 }, &.{ 0xde, 0xf0 }, 0, .none, .x87 }, + .{ .fdivrp, .z, &.{ }, &.{ 0xde, 0xf1 }, 0, .none, .x87 }, + .{ .fidivr, .m, &.{ .m32 }, &.{ 0xda }, 7, .none, .x87 }, + .{ .fidivr, .m, &.{ .m16 }, &.{ 0xde }, 7, .none, .x87 }, + .{ .ffree, .o, &.{ .st }, &.{ 0xdd, 0xc0 }, 0, .none, .x87 }, + .{ .ficom, .m, &.{ .m16 }, &.{ 0xde }, 2, .none, .x87 }, + .{ .ficom, .m, &.{ .m32 }, &.{ 0xda }, 2, .none, .x87 }, + .{ .ficomp, .m, &.{ .m16 }, &.{ 0xde }, 3, .none, .x87 }, + .{ .ficomp, .m, &.{ .m32 }, &.{ 0xda }, 3, .none, .x87 }, + + .{ .fild, .m, &.{ .m16 }, &.{ 0xdf }, 0, .none, .x87 }, + .{ .fild, .m, &.{ .m32 }, &.{ 0xdb }, 0, .none, .x87 }, + .{ .fild, .m, &.{ .m64 }, &.{ 0xdf }, 5, .none, .x87 }, + + .{ .fincstp, .z, &.{}, &.{ 0xd9, 0xf7 }, 0, .none, .x87 }, + + .{ .finit, .z, &.{}, &.{ 0xdb, 0xe3 }, 0, .wait, .x87 }, + .{ .fninit, .z, &.{}, &.{ 0xdb, 0xe3 }, 0, .none, .x87 }, + + .{ .fist, .m, &.{ .m16 }, &.{ 0xdf }, 2, .none, .x87 }, + .{ .fist, .m, &.{ .m32 }, &.{ 0xdb }, 2, .none, .x87 }, + .{ .fistp, .m, &.{ .m16 }, &.{ 0xdf }, 3, .none, .x87 }, + .{ .fistp, .m, &.{ .m32 }, &.{ 0xdb }, 3, .none, .x87 }, + .{ .fistp, .m, &.{ .m64 }, &.{ 0xdf }, 7, .none, .x87 }, + .{ .fisttp, .m, &.{ .m16 }, &.{ 0xdf }, 1, .none, .x87 }, .{ .fisttp, .m, &.{ .m32 }, &.{ 0xdb }, 1, .none, .x87 }, .{ .fisttp, .m, &.{ .m64 }, &.{ 0xdd }, 1, .none, .x87 }, @@ -889,8 +1187,52 @@ pub const table = [_]Entry{ .{ .fld, .m, &.{ .m80 }, &.{ 0xdb }, 5, .none, .x87 }, .{ .fld, .o, &.{ .st }, &.{ 0xd9, 0xc0 }, 0, .none, .x87 }, + .{ .fld1, .z, &.{}, &.{ 0xd9, 0xe8 }, 0, .none, .x87 }, + .{ .fldl2t, .z, &.{}, &.{ 0xd9, 0xe9 }, 0, .none, .x87 }, + .{ .fldl2e, .z, &.{}, &.{ 0xd9, 0xea }, 0, .none, .x87 }, + .{ .fldpi, .z, &.{}, &.{ 0xd9, 0xeb }, 0, .none, .x87 }, + .{ .fldlg2, .z, &.{}, &.{ 0xd9, 0xec }, 0, .none, .x87 }, + .{ .fldln2, .z, &.{}, &.{ 0xd9, 0xed }, 0, .none, .x87 }, + .{ .fldz, .z, &.{}, &.{ 0xd9, 0xee }, 0, .none, .x87 }, + + .{ .fldcw, .m, &.{ .m16 }, &.{ 0xd9 }, 5, .none, .x87 }, + .{ .fldenv, .m, &.{ .m }, &.{ 0xd9 }, 4, .none, .x87 }, + .{ .fmul, .m, &.{ .m32 }, &.{ 0xd8 }, 1, .none, .x87 }, + .{ .fmul, .m, &.{ .m64 }, &.{ 0xdc }, 1, .none, .x87 }, + .{ .fmul, .zo, &.{ .st0, .st }, &.{ 0xd8, 0xc8 }, 0, .none, .x87 }, + .{ .fmul, .oz, &.{ .st, .st0 }, &.{ 0xdc, 0xc8 }, 0, .none, .x87 }, + .{ .fmulp, .oz, &.{ .st, .st0 }, &.{ 0xde, 0xc8 }, 0, .none, .x87 }, + .{ .fmulp, .z, &.{ }, &.{ 0xde, 0xc9 }, 0, .none, .x87 }, + .{ .fimul, .m, &.{ .m32 }, &.{ 0xda }, 1, .none, .x87 }, + .{ .fimul, .m, &.{ .m16 }, &.{ 0xde }, 1, .none, .x87 }, + + .{ .fnop, .z, &.{}, &.{ 0xd9, 0xd0 }, 0, .none, .x87 }, + + .{ .fpatan, .z, &.{}, &.{ 0xd9, 0xf3 }, 0, .none, .x87 }, + + .{ .fprem, .z, &.{}, &.{ 0xd9, 0xf8 }, 0, .none, .x87 }, + + .{ .fprem1, .z, &.{}, &.{ 0xd9, 0xf5 }, 0, .none, .x87 }, + + .{ .fptan, .z, &.{}, &.{ 0xd9, 0xf2 }, 0, .none, .x87 }, + + .{ .frndint, .z, &.{}, &.{ 0xd9, 0xfc }, 0, .none, .x87 }, + + .{ .frstor, .m, &.{ .m }, &.{ 0xdd }, 4, .none, .x87 }, + + .{ .fsave, .m, &.{ .m }, &.{ 0xdd }, 6, .wait, .x87 }, + .{ .fnsave, .m, &.{ .m }, &.{ 0xdd }, 6, .none, .x87 }, + + .{ .fscale, .z, &.{}, &.{ 0xd9, 0xfd }, 0, .none, .x87 }, + + .{ .fsin, .z, &.{}, &.{ 0xd9, 0xfe }, 0, .none, .x87 }, + + .{ .fsincos, .z, &.{}, &.{ 0xd9, 0xfb }, 0, .none, .x87 }, + + .{ .fsqrt, .z, &.{}, &.{ 0xd9, 0xfa }, 0, .none, .x87 }, + .{ .fst, .m, &.{ .m32 }, &.{ 0xd9 }, 2, .none, .x87 }, .{ .fst, .m, &.{ .m64 }, &.{ 0xdd }, 2, .none, .x87 }, .{ .fst, .o, &.{ .st }, &.{ 0xdd, 0xd0 }, 0, .none, .x87 }, @@ -899,8 +1241,59 @@ pub const table = [_]Entry{ .{ .fstp, .m, &.{ .m80 }, &.{ 0xdb }, 7, .none, .x87 }, .{ .fstp, .o, &.{ .st }, &.{ 0xdd, 0xd8 }, 0, .none, .x87 }, - .{ .fstenv, .m, &.{ .m }, &.{ 0x9b, 0xd9 }, 6, .none, .x87 }, - .{ .fnstenv, .m, &.{ .m }, &.{ 0xd9 }, 6, .none, .x87 }, + .{ .fstcw, .m, &.{ .m16 }, &.{ 0xd9 }, 7, .wait, .x87 }, + .{ .fnstcw, .m, &.{ .m16 }, &.{ 0xd9 }, 7, .none, .x87 }, + + .{ .fstenv, .m, &.{ .m }, &.{ 0xd9 }, 6, .wait, .x87 }, + .{ .fnstenv, .m, &.{ .m }, &.{ 0xd9 }, 6, .none, .x87 }, + + .{ .fstsw, .m, &.{ .m16 }, &.{ 0xdd }, 7, .wait, .x87 }, + .{ .fstsw, .m, &.{ .ax }, &.{ 0xdf }, 4, .wait, .x87 }, + .{ .fnstsw, .m, &.{ .m16 }, &.{ 0xdd }, 7, .none, .x87 }, + .{ .fnstsw, .m, &.{ .ax }, &.{ 0xdf }, 4, .none, .x87 }, + + .{ .fsub, .m, &.{ .m32 }, &.{ 0xd8 }, 4, .none, .x87 }, + .{ .fsub, .m, &.{ .m64 }, &.{ 0xdc }, 4, .none, .x87 }, + .{ .fsub, .zo, &.{ .st0, .st }, &.{ 0xd8, 0xe0 }, 0, .none, .x87 }, + .{ .fsub, .oz, &.{ .st, .st0 }, &.{ 0xdc, 0xe8 }, 0, .none, .x87 }, + .{ .fsubp, .oz, &.{ .st, .st0 }, &.{ 0xde, 0xe8 }, 0, .none, .x87 }, + .{ .fsubp, .z, &.{ }, &.{ 0xde, 0xe9 }, 0, .none, .x87 }, + .{ .fisub, .m, &.{ .m32 }, &.{ 0xda }, 4, .none, .x87 }, + .{ .fisub, .m, &.{ .m16 }, &.{ 0xde }, 4, .none, .x87 }, + + .{ .fsubr, .m, &.{ .m32 }, &.{ 0xd8 }, 5, .none, .x87 }, + .{ .fsubr, .m, &.{ .m64 }, &.{ 0xdc }, 5, .none, .x87 }, + .{ .fsubr, .zo, &.{ .st0, .st }, &.{ 0xd8, 0xe8 }, 0, .none, .x87 }, + .{ .fsubr, .oz, &.{ .st, .st0 }, &.{ 0xdc, 0xe0 }, 0, .none, .x87 }, + .{ .fsubrp, .oz, &.{ .st, .st0 }, &.{ 0xde, 0xe0 }, 0, .none, .x87 }, + .{ .fsubrp, .z, &.{ }, &.{ 0xde, 0xe1 }, 0, .none, .x87 }, + .{ .fisubr, .m, &.{ .m32 }, &.{ 0xda }, 5, .none, .x87 }, + .{ .fisubr, .m, &.{ .m16 }, &.{ 0xde }, 5, .none, .x87 }, + + .{ .ftst, .z, &.{}, &.{ 0xd9, 0xe4 }, 0, .none, .x87 }, + + .{ .fucom, .o, &.{ .st }, &.{ 0xdd, 0xe0 }, 0, .none, .x87 }, + .{ .fucom, .z, &.{ }, &.{ 0xdd, 0xe1 }, 0, .none, .x87 }, + .{ .fucomp, .o, &.{ .st }, &.{ 0xdd, 0xe8 }, 0, .none, .x87 }, + .{ .fucomp, .z, &.{ }, &.{ 0xdd, 0xe9 }, 0, .none, .x87 }, + .{ .fucompp, .z, &.{ }, &.{ 0xda, 0xe9 }, 0, .none, .x87 }, + + .{ .fxam, .z, &.{}, &.{ 0xd9, 0xe5 }, 0, .none, .x87 }, + + .{ .fxch, .o, &.{ .st }, &.{ 0xd9, 0xc8 }, 0, .none, .x87 }, + .{ .fxch, .z, &.{ }, &.{ 0xd9, 0xc9 }, 0, .none, .x87 }, + + .{ .fxtract, .z, &.{}, &.{ 0xd9, 0xf4 }, 0, .none, .x87 }, + + .{ .fyl2x, .z, &.{}, &.{ 0xd9, 0xf1 }, 0, .none, .x87 }, + + .{ .fyl2xp1, .z, &.{}, &.{ 0xd9, 0xf9 }, 0, .none, .x87 }, + + .{ .wait, .z, &.{}, &.{ 0x9b }, 0, .none, .x87 }, + .{ .fwait, .z, &.{}, &.{ 0x9b }, 0, .none, .x87 }, + + // MMX + .{ .emms, .z, &.{}, &.{ 0x0f, 0x77 }, 0, .none, .mmx }, // SSE .{ .addps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x58 }, 0, .none, .sse }, @@ -915,6 +1308,8 @@ pub const table = [_]Entry{ .{ .cmpss, .rmi, &.{ .xmm, .xmm_m32, .imm8 }, &.{ 0xf3, 0x0f, 0xc2 }, 0, .none, .sse }, + .{ .comiss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0x0f, 0x2f }, 0, .none, .sse }, + .{ .cvtpi2ps, .rm, &.{ .xmm, .mm_m64 }, &.{ 0x0f, 0x2a }, 0, .none, .sse }, .{ .cvtps2pi, .rm, &.{ .mm, .xmm_m64 }, &.{ 0x0f, 0x2d }, 0, .none, .sse }, @@ -934,6 +1329,12 @@ pub const table = [_]Entry{ .{ .divss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5e }, 0, .none, .sse }, + .{ .fxrstor, .m, &.{ .m }, &.{ 0x0f, 0xae }, 1, .none, .fxsr }, + .{ .fxrstor64, .m, &.{ .m }, &.{ 0x0f, 0xae }, 1, .long, .fxsr }, + + .{ .fxsave, .m, &.{ .m }, &.{ 0x0f, 0xae }, 0, .none, .fxsr }, + .{ .fxsave64, .m, &.{ .m }, &.{ 0x0f, 0xae }, 0, .long, .fxsr }, + .{ .ldmxcsr, .m, &.{ .m32 }, &.{ 0x0f, 0xae }, 2, .none, .sse }, .{ .maxps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x5f }, 0, .none, .sse }, @@ -1004,6 +1405,8 @@ pub const table = [_]Entry{ .{ .cmpsd, .rmi, &.{ .xmm, .xmm_m64, .imm8 }, &.{ 0xf2, 0x0f, 0xc2 }, 0, .none, .sse2 }, + .{ .comisd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x2f }, 0, .none, .sse2 }, + .{ .cvtdq2pd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf3, 0x0f, 0xe6 }, 0, .none, .sse2 }, .{ .cvtdq2ps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x5b }, 0, .none, .sse2 }, @@ -1043,6 +1446,12 @@ pub const table = [_]Entry{ .{ .divsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5e }, 0, .none, .sse2 }, + .{ .gf2p8affineinvqb, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0xcf }, 0, .none, .gfni }, + + .{ .gf2p8affineqb, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0xce }, 0, .none, .gfni }, + + .{ .gf2p8mulb, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0xcf }, 0, .none, .gfni }, + .{ .maxpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x5f }, 0, .none, .sse2 }, .{ .maxsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5f }, 0, .none, .sse2 }, @@ -1203,6 +1612,16 @@ pub const table = [_]Entry{ .{ .xorpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x57 }, 0, .none, .sse2 }, // SSE3 + .{ .addsubpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd0 }, 0, .none, .sse3 }, + + .{ .addsubps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0xf2, 0x0f, 0xd0 }, 0, .none, .sse3 }, + + .{ .haddpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x7c }, 0, .none, .sse3 }, + + .{ .haddps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0xf2, 0x0f, 0x7c }, 0, .none, .sse3 }, + + .{ .lddqu, .rm, &.{ .xmm, .m128 }, &.{ 0xf2, 0x0f, 0xf0 }, 0, .none, .sse3 }, + .{ .movddup, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x12 }, 0, .none, .sse3 }, .{ .movshdup, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0xf3, 0x0f, 0x16 }, 0, .none, .sse3 }, @@ -1226,20 +1645,24 @@ pub const table = [_]Entry{ .{ .blendps, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0c }, 0, .none, .sse4_1 }, - .{ .blendvpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x15 }, 0, .none, .sse4_1 }, + .{ .blendvpd, .rm0, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x15 }, 0, .none, .sse4_1 }, .{ .blendvpd, .rm0, &.{ .xmm, .xmm_m128, .xmm0 }, &.{ 0x66, 0x0f, 0x38, 0x15 }, 0, .none, .sse4_1 }, - .{ .blendvps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x14 }, 0, .none, .sse4_1 }, + .{ .blendvps, .rm0, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x14 }, 0, .none, .sse4_1 }, .{ .blendvps, .rm0, &.{ .xmm, .xmm_m128, .xmm0 }, &.{ 0x66, 0x0f, 0x38, 0x14 }, 0, .none, .sse4_1 }, + .{ .dppd, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x41 }, 0, .none, .sse4_1 }, + + .{ .dpps, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x40 }, 0, .none, .sse4_1 }, + .{ .extractps, .mri, &.{ .rm32, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x17 }, 0, .none, .sse4_1 }, .{ .insertps, .rmi, &.{ .xmm, .xmm_m32, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x21 }, 0, .none, .sse4_1 }, .{ .packusdw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x2b }, 0, .none, .sse4_1 }, - .{ .pblendvb, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x10 }, 0, .none, .sse4_1 }, - .{ .pblendvb, .rm, &.{ .xmm, .xmm_m128, .xmm0 }, &.{ 0x66, 0x0f, 0x38, 0x10 }, 0, .none, .sse4_1 }, + .{ .pblendvb, .rm0, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x10 }, 0, .none, .sse4_1 }, + .{ .pblendvb, .rm0, &.{ .xmm, .xmm_m128, .xmm0 }, &.{ 0x66, 0x0f, 0x38, 0x10 }, 0, .none, .sse4_1 }, .{ .pblendw, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0e }, 0, .none, .sse4_1 }, @@ -1296,6 +1719,13 @@ pub const table = [_]Entry{ .{ .roundss, .rmi, &.{ .xmm, .xmm_m32, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0a }, 0, .none, .sse4_1 }, // SSE4.2 + .{ .crc32, .rm, &.{ .r32, .rm8 }, &.{ 0xf2, 0x0f, 0x38, 0xf0 }, 0, .none, .crc32 }, + .{ .crc32, .rm, &.{ .r32, .rm8 }, &.{ 0xf2, 0x0f, 0x38, 0xf0 }, 0, .rex, .crc32 }, + .{ .crc32, .rm, &.{ .r32, .rm16 }, &.{ 0xf2, 0x0f, 0x38, 0xf1 }, 0, .short, .crc32 }, + .{ .crc32, .rm, &.{ .r32, .rm32 }, &.{ 0xf2, 0x0f, 0x38, 0xf1 }, 0, .none, .crc32 }, + .{ .crc32, .rm, &.{ .r64, .rm8 }, &.{ 0xf2, 0x0f, 0x38, 0xf0 }, 0, .long, .crc32 }, + .{ .crc32, .rm, &.{ .r64, .rm64 }, &.{ 0xf2, 0x0f, 0x38, 0xf1 }, 0, .long, .crc32 }, + .{ .pcmpgtq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x37 }, 0, .none, .sse4_2 }, // PCLMUL @@ -1315,14 +1745,40 @@ pub const table = [_]Entry{ .{ .aeskeygenassist, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0xdf }, 0, .none, .aes }, // SHA + .{ .sha1rnds4, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x0f, 0x3a, 0xcc }, 0, .none, .sha }, + + .{ .sha1nexte, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x38, 0xc8 }, 0, .none, .sha }, + + .{ .sha1msg1, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x38, 0xc9 }, 0, .none, .sha }, + + .{ .sha1msg2, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x38, 0xca }, 0, .none, .sha }, + + .{ .sha256rnds2, .rm0, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x38, 0xcb }, 0, .none, .sha }, + .{ .sha256rnds2, .rm0, &.{ .xmm, .xmm_m128, .xmm0 }, &.{ 0x0f, 0x38, 0xcb }, 0, .none, .sha }, + .{ .sha256msg1, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x38, 0xcc }, 0, .none, .sha }, .{ .sha256msg2, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x38, 0xcd }, 0, .none, .sha }, - .{ .sha256rnds2, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x38, 0xcb }, 0, .none, .sha }, - .{ .sha256rnds2, .rm0, &.{ .xmm, .xmm_m128, .xmm0 }, &.{ 0x0f, 0x38, 0xcb }, 0, .none, .sha }, - // AVX + .{ .andn, .rvm, &.{ .r32, .r32, .rm32 }, &.{ 0x0f, 0x38, 0xf2 }, 0, .vex_lz_w0, .bmi }, + .{ .andn, .rvm, &.{ .r64, .r64, .rm64 }, &.{ 0x0f, 0x38, 0xf2 }, 0, .vex_lz_w1, .bmi }, + + .{ .bextr, .rmv, &.{ .r32, .rm32, .r32 }, &.{ 0x0f, 0x38, 0xf7 }, 0, .vex_lz_w0, .bmi }, + .{ .bextr, .rmv, &.{ .r64, .rm64, .r64 }, &.{ 0x0f, 0x38, 0xf7 }, 0, .vex_lz_w1, .bmi }, + + .{ .blsi, .vm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x38, 0xf3 }, 3, .vex_lz_w0, .bmi }, + .{ .blsi, .vm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x38, 0xf3 }, 3, .vex_lz_w1, .bmi }, + + .{ .blsmsk, .vm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x38, 0xf3 }, 2, .vex_lz_w0, .bmi }, + .{ .blsmsk, .vm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x38, 0xf3 }, 2, .vex_lz_w1, .bmi }, + + .{ .blsr, .vm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x38, 0xf3 }, 1, .vex_lz_w0, .bmi }, + .{ .blsr, .vm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x38, 0xf3 }, 1, .vex_lz_w1, .bmi }, + + .{ .bzhi, .rmv, &.{ .r32, .rm32, .r32 }, &.{ 0x0f, 0x38, 0xf5 }, 0, .vex_lz_w0, .bmi2 }, + .{ .bzhi, .rmv, &.{ .r64, .rm64, .r64 }, &.{ 0x0f, 0x38, 0xf5 }, 0, .vex_lz_w1, .bmi2 }, + .{ .rorx, .rmi, &.{ .r32, .rm32, .imm8 }, &.{ 0xf2, 0x0f, 0x3a }, 0, .vex_lz_w0, .bmi2 }, .{ .rorx, .rmi, &.{ .r64, .rm64, .imm8 }, &.{ 0xf2, 0x0f, 0x3a }, 0, .vex_lz_w1, .bmi2 }, @@ -1333,6 +1789,10 @@ pub const table = [_]Entry{ .{ .shlx, .rmv, &.{ .r64, .rm64, .r64 }, &.{ 0x66, 0x0f, 0x38, 0xf7 }, 0, .vex_lz_w1, .bmi2 }, .{ .shrx, .rmv, &.{ .r64, .rm64, .r64 }, &.{ 0xf2, 0x0f, 0x38, 0xf7 }, 0, .vex_lz_w1, .bmi2 }, + .{ .tzcnt, .rm, &.{ .r16, .rm16 }, &.{ 0xf3, 0x0f, 0xbc }, 0, .short, .bmi }, + .{ .tzcnt, .rm, &.{ .r32, .rm32 }, &.{ 0xf3, 0x0f, 0xbc }, 0, .none, .bmi }, + .{ .tzcnt, .rm, &.{ .r64, .rm64 }, &.{ 0xf3, 0x0f, 0xbc }, 0, .long, .bmi }, + .{ .vaddpd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x58 }, 0, .vex_128_wig, .avx }, .{ .vaddpd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x58 }, 0, .vex_256_wig, .avx }, @@ -1343,6 +1803,12 @@ pub const table = [_]Entry{ .{ .vaddss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x58 }, 0, .vex_lig_wig, .avx }, + .{ .vaddsubpd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd0 }, 0, .vex_128_wig, .avx }, + .{ .vaddsubpd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xd0 }, 0, .vex_256_wig, .avx }, + + .{ .vaddsubps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0xf2, 0x0f, 0xd0 }, 0, .vex_128_wig, .avx }, + .{ .vaddsubps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0xf2, 0x0f, 0xd0 }, 0, .vex_256_wig, .avx }, + .{ .vaesdec, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0xde }, 0, .vex_128_wig, .@"aes avx" }, .{ .vaesdeclast, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0xdf }, 0, .vex_128_wig, .@"aes avx" }, @@ -1394,6 +1860,10 @@ pub const table = [_]Entry{ .{ .vcmpss, .rvmi, &.{ .xmm, .xmm, .xmm_m32, .imm8 }, &.{ 0xf3, 0x0f, 0xc2 }, 0, .vex_lig_wig, .avx }, + .{ .vcomisd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x2f }, 0, .vex_lig_wig, .avx }, + + .{ .vcomiss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0x0f, 0x2f }, 0, .vex_lig_wig, .avx }, + .{ .vcvtdq2pd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf3, 0x0f, 0xe6 }, 0, .vex_128_wig, .avx }, .{ .vcvtdq2pd, .rm, &.{ .ymm, .xmm_m128 }, &.{ 0xf3, 0x0f, 0xe6 }, 0, .vex_256_wig, .avx }, @@ -1440,6 +1910,11 @@ pub const table = [_]Entry{ .{ .vcvttss2si, .rm, &.{ .r32, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x2c }, 0, .vex_lig_w0, .avx }, .{ .vcvttss2si, .rm, &.{ .r64, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x2c }, 0, .vex_lig_w1, .avx }, + .{ .vdppd, .rvmi, &.{ .xmm, .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x41 }, 0, .vex_128_wig, .avx }, + + .{ .vdpps, .rvmi, &.{ .xmm, .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x40 }, 0, .vex_128_wig, .avx }, + .{ .vdpps, .rvmi, &.{ .ymm, .ymm, .ymm_m256, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x40 }, 0, .vex_256_wig, .avx }, + .{ .vdivpd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x5e }, 0, .vex_128_wig, .avx }, .{ .vdivpd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x5e }, 0, .vex_256_wig, .avx }, @@ -1454,10 +1929,28 @@ pub const table = [_]Entry{ .{ .vextractps, .mri, &.{ .rm32, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x17 }, 0, .vex_128_wig, .avx }, + .{ .vgf2p8affineinvqb, .rvmi, &.{ .xmm, .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0xcf }, 0, .vex_128_w1, .@"gfni avx" }, + .{ .vgf2p8affineinvqb, .rvmi, &.{ .ymm, .ymm, .ymm_m256, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0xcf }, 0, .vex_256_w1, .@"gfni avx" }, + + .{ .vgf2p8affineqb, .rvmi, &.{ .xmm, .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0xce }, 0, .vex_128_w1, .@"gfni avx" }, + .{ .vgf2p8affineqb, .rvmi, &.{ .ymm, .ymm, .ymm_m256, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0xce }, 0, .vex_256_w1, .@"gfni avx" }, + + .{ .vgf2p8mulb, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0xcf }, 0, .vex_128_w0, .@"gfni avx" }, + .{ .vgf2p8mulb, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0xcf }, 0, .vex_256_w0, .@"gfni avx" }, + + .{ .vhaddpd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x7c }, 0, .vex_128_wig, .avx }, + .{ .vhaddpd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x7c }, 0, .vex_256_wig, .avx }, + + .{ .vhaddps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0xf2, 0x0f, 0x7c }, 0, .vex_128_wig, .avx }, + .{ .vhaddps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0xf2, 0x0f, 0x7c }, 0, .vex_256_wig, .avx }, + .{ .vinsertf128, .rvmi, &.{ .ymm, .ymm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x18 }, 0, .vex_256_w0, .avx }, .{ .vinsertps, .rvmi, &.{ .xmm, .xmm, .xmm_m32, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x21 }, 0, .vex_128_wig, .avx }, + .{ .vlddqu, .rm, &.{ .xmm, .m128 }, &.{ 0xf2, 0x0f, 0xf0 }, 0, .vex_128_wig, .avx }, + .{ .vlddqu, .rm, &.{ .ymm, .m256 }, &.{ 0xf2, 0x0f, 0xf0 }, 0, .vex_256_wig, .avx }, + .{ .vldmxcsr, .m, &.{ .m32 }, &.{ 0x0f, 0xae }, 2, .vex_lz_wig, .avx }, .{ .vmaxpd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x5f }, 0, .vex_128_wig, .avx }, @@ -1821,15 +2314,6 @@ pub const table = [_]Entry{ // VPCLMULQDQ .{ .vpclmulqdq, .rvmi, &.{ .ymm, .ymm, .ymm_m256, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x44 }, 0, .vex_256_wig, .vpclmulqdq }, - // VAES - .{ .vaesdec, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0xde }, 0, .vex_256_wig, .vaes }, - - .{ .vaesdeclast, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0xdf }, 0, .vex_256_wig, .vaes }, - - .{ .vaesenc, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0xdc }, 0, .vex_256_wig, .vaes }, - - .{ .vaesenclast, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0xdd }, 0, .vex_256_wig, .vaes }, - // AVX2 .{ .vbroadcastss, .rm, &.{ .xmm, .xmm }, &.{ 0x66, 0x0f, 0x38, 0x18 }, 0, .vex_128_w0, .avx2 }, .{ .vbroadcastss, .rm, &.{ .ymm, .xmm }, &.{ 0x66, 0x0f, 0x38, 0x18 }, 0, .vex_256_w0, .avx2 }, @@ -1992,5 +2476,46 @@ pub const table = [_]Entry{ .{ .vpunpcklqdq, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x6c }, 0, .vex_256_wig, .avx2 }, .{ .vpxor, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xef }, 0, .vex_256_wig, .avx2 }, + + // ADX + .{ .adcx, .rm, &.{ .r32, .rm32 }, &.{ 0x66, 0x0f, 0x38, 0xf6 }, 0, .none, .adx }, + .{ .adcx, .rm, &.{ .r64, .rm64 }, &.{ 0x66, 0x0f, 0x38, 0xf6 }, 0, .long, .adx }, + + .{ .adox, .rm, &.{ .r32, .rm32 }, &.{ 0xf3, 0x0f, 0x38, 0xf6 }, 0, .none, .adx }, + .{ .adox, .rm, &.{ .r64, .rm64 }, &.{ 0xf3, 0x0f, 0x38, 0xf6 }, 0, .long, .adx }, + + // VAES + .{ .vaesdec, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0xde }, 0, .vex_256_wig, .vaes }, + + .{ .vaesdeclast, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0xdf }, 0, .vex_256_wig, .vaes }, + + .{ .vaesenc, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0xdc }, 0, .vex_256_wig, .vaes }, + + .{ .vaesenclast, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0xdd }, 0, .vex_256_wig, .vaes }, + + // AESKLE + .{ .aesdec128kl, .rm, &.{ .xmm, .m }, &.{ 0xf3, 0x0f, 0x38, 0xdd }, 0, .none, .kl }, + + .{ .aesdec256kl, .rm, &.{ .xmm, .m }, &.{ 0xf3, 0x0f, 0x38, 0xdf }, 0, .none, .kl }, + + .{ .aesenc128kl, .rm, &.{ .xmm, .m }, &.{ 0xf3, 0x0f, 0x38, 0xdc }, 0, .none, .kl }, + + .{ .aesenc256kl, .rm, &.{ .xmm, .m }, &.{ 0xf3, 0x0f, 0x38, 0xde }, 0, .none, .kl }, + + .{ .encodekey128, .rm, &.{ .r32, .r32 }, &.{ 0xf3, 0x0f, 0x38, 0xfa }, 0, .none, .kl }, + + .{ .encodekey256, .rm, &.{ .r32, .r32 }, &.{ 0xf3, 0x0f, 0x38, 0xfb }, 0, .none, .kl }, + + .{ .loadiwkey, .rm, &.{ .xmm, .xmm }, &.{ 0xf3, 0x0f, 0x38, 0xdc }, 0, .none, .kl }, + .{ .loadiwkey, .rm, &.{ .xmm, .xmm, .eax, .xmm0 }, &.{ 0xf3, 0x0f, 0x38, 0xdc }, 0, .none, .kl }, + + // AESKLEWIDE_KL + .{ .aesdecwide128kl, .m, &.{ .m }, &.{ 0xf3, 0x0f, 0x38, 0xd8 }, 1, .none, .widekl }, + + .{ .aesdecwide256kl, .m, &.{ .m }, &.{ 0xf3, 0x0f, 0x38, 0xd8 }, 3, .none, .widekl }, + + .{ .aesencwide128kl, .m, &.{ .m }, &.{ 0xf3, 0x0f, 0x38, 0xd8 }, 0, .none, .widekl }, + + .{ .aesencwide256kl, .m, &.{ .m }, &.{ 0xf3, 0x0f, 0x38, 0xd8 }, 2, .none, .widekl }, }; // zig fmt: on