x64: implement matching SSE instructions for generic cross-comp target

This commit is contained in:
Jakub Konka 2022-05-20 13:00:59 +02:00
parent 0e43d007c0
commit 274654d73e
4 changed files with 384 additions and 160 deletions

View File

@ -881,7 +881,7 @@ fn allocRegOrMem(self: *Self, inst: Air.Inst.Index, reg_ok: bool) !MCValue {
switch (elem_ty.zigTypeTag()) { switch (elem_ty.zigTypeTag()) {
.Vector => return self.fail("TODO allocRegOrMem for Vector type", .{}), .Vector => return self.fail("TODO allocRegOrMem for Vector type", .{}),
.Float => { .Float => {
if (self.intrinsicsAllowed(elem_ty)) { if (intrinsicsAllowed(self.target.*, elem_ty)) {
const ptr_bytes: u64 = 32; const ptr_bytes: u64 = 32;
if (abi_size <= ptr_bytes) { if (abi_size <= ptr_bytes) {
if (self.register_manager.tryAllocReg(inst, sse)) |reg| { if (self.register_manager.tryAllocReg(inst, sse)) |reg| {
@ -970,7 +970,7 @@ pub fn spillRegisters(self: *Self, comptime count: comptime_int, registers: [cou
fn copyToTmpRegister(self: *Self, ty: Type, mcv: MCValue) !Register { fn copyToTmpRegister(self: *Self, ty: Type, mcv: MCValue) !Register {
const reg_class: RegisterManager.RegisterBitSet = switch (ty.zigTypeTag()) { const reg_class: RegisterManager.RegisterBitSet = switch (ty.zigTypeTag()) {
.Float => blk: { .Float => blk: {
if (self.intrinsicsAllowed(ty)) break :blk sse; if (intrinsicsAllowed(self.target.*, ty)) break :blk sse;
return self.fail("TODO copy {} to register", .{ty.fmtDebug()}); return self.fail("TODO copy {} to register", .{ty.fmtDebug()});
}, },
else => gp, else => gp,
@ -987,7 +987,7 @@ fn copyToTmpRegister(self: *Self, ty: Type, mcv: MCValue) !Register {
fn copyToRegisterWithInstTracking(self: *Self, reg_owner: Air.Inst.Index, ty: Type, mcv: MCValue) !MCValue { fn copyToRegisterWithInstTracking(self: *Self, reg_owner: Air.Inst.Index, ty: Type, mcv: MCValue) !MCValue {
const reg_class: RegisterManager.RegisterBitSet = switch (ty.zigTypeTag()) { const reg_class: RegisterManager.RegisterBitSet = switch (ty.zigTypeTag()) {
.Float => blk: { .Float => blk: {
if (self.intrinsicsAllowed(ty)) break :blk sse; if (intrinsicsAllowed(self.target.*, ty)) break :blk sse;
return self.fail("TODO copy {} to register", .{ty.fmtDebug()}); return self.fail("TODO copy {} to register", .{ty.fmtDebug()});
}, },
else => gp, else => gp,
@ -3462,16 +3462,28 @@ fn genBinOpMir(self: *Self, mir_tag: Mir.Inst.Tag, dst_ty: Type, dst_mcv: MCValu
}, },
.register => |src_reg| switch (dst_ty.zigTypeTag()) { .register => |src_reg| switch (dst_ty.zigTypeTag()) {
.Float => { .Float => {
if (self.intrinsicsAllowed(dst_ty)) { if (intrinsicsAllowed(self.target.*, dst_ty)) {
const actual_tag: Mir.Inst.Tag = switch (dst_ty.tag()) { const actual_tag: Mir.Inst.Tag = switch (dst_ty.tag()) {
.f32 => switch (mir_tag) { .f32 => switch (mir_tag) {
.add => Mir.Inst.Tag.add_f32_avx, .add => if (hasAvxSupport(self.target.*))
.cmp => Mir.Inst.Tag.cmp_f32_avx, Mir.Inst.Tag.add_f32_avx
else
Mir.Inst.Tag.add_f32_sse,
.cmp => if (hasAvxSupport(self.target.*))
Mir.Inst.Tag.cmp_f32_avx
else
Mir.Inst.Tag.cmp_f32_sse,
else => return self.fail("TODO genBinOpMir for f32 register-register with MIR tag {}", .{mir_tag}), else => return self.fail("TODO genBinOpMir for f32 register-register with MIR tag {}", .{mir_tag}),
}, },
.f64 => switch (mir_tag) { .f64 => switch (mir_tag) {
.add => Mir.Inst.Tag.add_f64_avx, .add => if (hasAvxSupport(self.target.*))
.cmp => Mir.Inst.Tag.cmp_f64_avx, Mir.Inst.Tag.add_f64_avx
else
Mir.Inst.Tag.add_f64_sse,
.cmp => if (hasAvxSupport(self.target.*))
Mir.Inst.Tag.cmp_f64_avx
else
Mir.Inst.Tag.cmp_f64_sse,
else => return self.fail("TODO genBinOpMir for f64 register-register with MIR tag {}", .{mir_tag}), else => return self.fail("TODO genBinOpMir for f64 register-register with MIR tag {}", .{mir_tag}),
}, },
else => return self.fail("TODO genBinOpMir for float register-register and type {}", .{dst_ty.fmtDebug()}), else => return self.fail("TODO genBinOpMir for float register-register and type {}", .{dst_ty.fmtDebug()}),
@ -5324,10 +5336,16 @@ fn genSetStackArg(self: *Self, ty: Type, stack_offset: i32, mcv: MCValue) InnerE
.register => |reg| { .register => |reg| {
switch (ty.zigTypeTag()) { switch (ty.zigTypeTag()) {
.Float => { .Float => {
if (self.intrinsicsAllowed(ty)) { if (intrinsicsAllowed(self.target.*, ty)) {
const tag: Mir.Inst.Tag = switch (ty.tag()) { const tag: Mir.Inst.Tag = switch (ty.tag()) {
.f32 => .mov_f32_avx, .f32 => if (hasAvxSupport(self.target.*))
.f64 => .mov_f64_avx, Mir.Inst.Tag.mov_f32_avx
else
Mir.Inst.Tag.mov_f32_sse,
.f64 => if (hasAvxSupport(self.target.*))
Mir.Inst.Tag.mov_f64_avx
else
Mir.Inst.Tag.mov_f64_sse,
else => return self.fail("TODO genSetStackArg for register for type {}", .{ty.fmtDebug()}), else => return self.fail("TODO genSetStackArg for register for type {}", .{ty.fmtDebug()}),
}; };
_ = try self.addInst(.{ _ = try self.addInst(.{
@ -5508,10 +5526,16 @@ fn genSetStack(self: *Self, ty: Type, stack_offset: i32, mcv: MCValue, opts: Inl
switch (ty.zigTypeTag()) { switch (ty.zigTypeTag()) {
.Float => { .Float => {
if (self.intrinsicsAllowed(ty)) { if (intrinsicsAllowed(self.target.*, ty)) {
const tag: Mir.Inst.Tag = switch (ty.tag()) { const tag: Mir.Inst.Tag = switch (ty.tag()) {
.f32 => .mov_f32_avx, .f32 => if (hasAvxSupport(self.target.*))
.f64 => .mov_f64_avx, Mir.Inst.Tag.mov_f32_avx
else
Mir.Inst.Tag.mov_f32_sse,
.f64 => if (hasAvxSupport(self.target.*))
Mir.Inst.Tag.mov_f64_avx
else
Mir.Inst.Tag.mov_f64_sse,
else => return self.fail("TODO genSetStack for register for type {}", .{ty.fmtDebug()}), else => return self.fail("TODO genSetStack for register for type {}", .{ty.fmtDebug()}),
}; };
_ = try self.addInst(.{ _ = try self.addInst(.{
@ -6032,10 +6056,16 @@ fn genSetReg(self: *Self, ty: Type, reg: Register, mcv: MCValue) InnerError!void
}, },
}, },
.Float => { .Float => {
if (self.intrinsicsAllowed(ty)) { if (intrinsicsAllowed(self.target.*, ty)) {
const tag: Mir.Inst.Tag = switch (ty.tag()) { const tag: Mir.Inst.Tag = switch (ty.tag()) {
.f32 => .mov_f32_avx, .f32 => if (hasAvxSupport(self.target.*))
.f64 => .mov_f64_avx, Mir.Inst.Tag.mov_f32_avx
else
Mir.Inst.Tag.mov_f32_sse,
.f64 => if (hasAvxSupport(self.target.*))
Mir.Inst.Tag.mov_f64_avx
else
Mir.Inst.Tag.mov_f64_sse,
else => return self.fail("TODO genSetReg from register for {}", .{ty.fmtDebug()}), else => return self.fail("TODO genSetReg from register for {}", .{ty.fmtDebug()}),
}; };
_ = try self.addInst(.{ _ = try self.addInst(.{
@ -6072,10 +6102,16 @@ fn genSetReg(self: *Self, ty: Type, reg: Register, mcv: MCValue) InnerError!void
const base_reg = try self.register_manager.allocReg(null, gp); const base_reg = try self.register_manager.allocReg(null, gp);
try self.loadMemPtrIntoRegister(base_reg, Type.usize, mcv); try self.loadMemPtrIntoRegister(base_reg, Type.usize, mcv);
if (self.intrinsicsAllowed(ty)) { if (intrinsicsAllowed(self.target.*, ty)) {
const tag: Mir.Inst.Tag = switch (ty.tag()) { const tag: Mir.Inst.Tag = switch (ty.tag()) {
.f32 => .mov_f32_avx, .f32 => if (hasAvxSupport(self.target.*))
.f64 => .mov_f64_avx, Mir.Inst.Tag.mov_f32_avx
else
Mir.Inst.Tag.mov_f32_sse,
.f64 => if (hasAvxSupport(self.target.*))
Mir.Inst.Tag.mov_f64_avx
else
Mir.Inst.Tag.mov_f64_sse,
else => return self.fail("TODO genSetReg from memory for {}", .{ty.fmtDebug()}), else => return self.fail("TODO genSetReg from memory for {}", .{ty.fmtDebug()}),
}; };
@ -6115,10 +6151,16 @@ fn genSetReg(self: *Self, ty: Type, reg: Register, mcv: MCValue) InnerError!void
const base_reg = try self.register_manager.allocReg(null, gp); const base_reg = try self.register_manager.allocReg(null, gp);
try self.loadMemPtrIntoRegister(base_reg, Type.usize, mcv); try self.loadMemPtrIntoRegister(base_reg, Type.usize, mcv);
if (self.intrinsicsAllowed(ty)) { if (intrinsicsAllowed(self.target.*, ty)) {
const tag: Mir.Inst.Tag = switch (ty.tag()) { const tag: Mir.Inst.Tag = switch (ty.tag()) {
.f32 => .mov_f32_avx, .f32 => if (hasAvxSupport(self.target.*))
.f64 => .mov_f64_avx, Mir.Inst.Tag.mov_f32_avx
else
Mir.Inst.Tag.mov_f32_sse,
.f64 => if (hasAvxSupport(self.target.*))
Mir.Inst.Tag.mov_f64_avx
else
Mir.Inst.Tag.mov_f64_sse,
else => return self.fail("TODO genSetReg from memory for {}", .{ty.fmtDebug()}), else => return self.fail("TODO genSetReg from memory for {}", .{ty.fmtDebug()}),
}; };
@ -6230,10 +6272,16 @@ fn genSetReg(self: *Self, ty: Type, reg: Register, mcv: MCValue) InnerError!void
}, },
}, },
.Float => { .Float => {
if (self.intrinsicsAllowed(ty)) { if (intrinsicsAllowed(self.target.*, ty)) {
const tag: Mir.Inst.Tag = switch (ty.tag()) { const tag: Mir.Inst.Tag = switch (ty.tag()) {
.f32 => .mov_f32_avx, .f32 => if (hasAvxSupport(self.target.*))
.f64 => .mov_f64_avx, Mir.Inst.Tag.mov_f32_avx
else
Mir.Inst.Tag.mov_f32_sse,
.f64 => if (hasAvxSupport(self.target.*))
Mir.Inst.Tag.mov_f64_avx
else
Mir.Inst.Tag.mov_f64_sse,
else => return self.fail("TODO genSetReg from stack offset for {}", .{ty.fmtDebug()}), else => return self.fail("TODO genSetReg from stack offset for {}", .{ty.fmtDebug()}),
}; };
_ = try self.addInst(.{ _ = try self.addInst(.{
@ -7046,11 +7094,15 @@ fn truncateRegister(self: *Self, ty: Type, reg: Register) !void {
} }
} }
fn intrinsicsAllowed(self: *Self, ty: Type) bool { fn intrinsicsAllowed(target: Target, ty: Type) bool {
return switch (ty.tag()) { return switch (ty.tag()) {
.f32, .f32,
.f64, .f64,
=> Target.x86.featureSetHasAny(self.target.cpu.features, .{ .avx, .avx2 }), => Target.x86.featureSetHasAny(target.cpu.features, .{ .sse2, .avx, .avx2 }),
else => unreachable, // TODO finish this off else => unreachable, // TODO finish this off
}; };
} }
fn hasAvxSupport(target: Target) bool {
return Target.x86.featureSetHasAny(target.cpu.features, .{ .avx, .avx2 });
}

View File

@ -182,6 +182,16 @@ pub fn lowerMir(emit: *Emit) InnerError!void {
.interrupt => try emit.mirInterrupt(inst), .interrupt => try emit.mirInterrupt(inst),
.nop => try emit.mirNop(), .nop => try emit.mirNop(),
// SSE instructions
.mov_f64_sse => try emit.mirMovFloatSse(.movsd, inst),
.mov_f32_sse => try emit.mirMovFloatSse(.movss, inst),
.add_f64_sse => try emit.mirAddFloatSse(.addsd, inst),
.add_f32_sse => try emit.mirAddFloatSse(.addss, inst),
.cmp_f64_sse => try emit.mirCmpFloatSse(.ucomisd, inst),
.cmp_f32_sse => try emit.mirCmpFloatSse(.ucomiss, inst),
// AVX instructions // AVX instructions
.mov_f64_avx => try emit.mirMovFloatAvx(.vmovsd, inst), .mov_f64_avx => try emit.mirMovFloatAvx(.vmovsd, inst),
.mov_f32_avx => try emit.mirMovFloatAvx(.vmovss, inst), .mov_f32_avx => try emit.mirMovFloatAvx(.vmovss, inst),
@ -536,6 +546,7 @@ fn mirArithMemImm(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!void {
} }
inline fn setRexWRegister(reg: Register) bool { inline fn setRexWRegister(reg: Register) bool {
if (reg.size() > 64) return false;
if (reg.size() == 64) return true; if (reg.size() == 64) return true;
return switch (reg) { return switch (reg) {
.ah, .ch, .dh, .bh => true, .ah, .ch, .dh, .bh => true,
@ -963,11 +974,55 @@ fn mirLeaPie(emit: *Emit, inst: Mir.Inst.Index) InnerError!void {
} }
} }
// SSE instructions
fn mirMovFloatSse(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!void {
const ops = emit.mir.instructions.items(.ops)[inst].decode();
switch (ops.flags) {
0b00 => {
const imm = emit.mir.instructions.items(.data)[inst].imm;
return lowerToRmEnc(tag, ops.reg1, RegisterOrMemory.mem(Memory.PtrSize.new(ops.reg2.size()), .{
.disp = imm,
.base = ops.reg2,
}), emit.code);
},
0b01 => {
const imm = emit.mir.instructions.items(.data)[inst].imm;
return lowerToMrEnc(tag, RegisterOrMemory.mem(Memory.PtrSize.new(ops.reg1.size()), .{
.disp = imm,
.base = ops.reg1,
}), ops.reg2, emit.code);
},
0b10 => {
return lowerToRmEnc(tag, ops.reg1, RegisterOrMemory.reg(ops.reg2), emit.code);
},
else => return emit.fail("TODO unused variant 0b{b} for {}", .{ ops.flags, tag }),
}
}
fn mirAddFloatSse(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!void {
const ops = emit.mir.instructions.items(.ops)[inst].decode();
switch (ops.flags) {
0b00 => {
return lowerToRmEnc(tag, ops.reg1, RegisterOrMemory.reg(ops.reg2), emit.code);
},
else => return emit.fail("TODO unused variant 0b{b} for {}", .{ ops.flags, tag }),
}
}
fn mirCmpFloatSse(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!void {
const ops = emit.mir.instructions.items(.ops)[inst].decode();
switch (ops.flags) {
0b00 => {
return lowerToRmEnc(tag, ops.reg1, RegisterOrMemory.reg(ops.reg2), emit.code);
},
else => return emit.fail("TODO unused variant 0b{b} for {}", .{ ops.flags, tag }),
}
}
// AVX instructions // AVX instructions
fn mirMovFloatAvx(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!void { fn mirMovFloatAvx(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!void {
const ops = emit.mir.instructions.items(.ops)[inst].decode(); const ops = emit.mir.instructions.items(.ops)[inst].decode();
switch (ops.flags) { switch (ops.flags) {
0b00 => { 0b00 => {
const imm = emit.mir.instructions.items(.data)[inst].imm; const imm = emit.mir.instructions.items(.data)[inst].imm;
@ -986,24 +1041,22 @@ fn mirMovFloatAvx(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!void {
0b10 => { 0b10 => {
return lowerToRvmEnc(tag, ops.reg1, ops.reg1, RegisterOrMemory.reg(ops.reg2), emit.code); return lowerToRvmEnc(tag, ops.reg1, ops.reg1, RegisterOrMemory.reg(ops.reg2), emit.code);
}, },
else => return emit.fail("TODO unused variant 0b{b} for mov_f64", .{ops.flags}), else => return emit.fail("TODO unused variant 0b{b} for {}", .{ ops.flags, tag }),
} }
} }
fn mirAddFloatAvx(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!void { fn mirAddFloatAvx(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!void {
const ops = emit.mir.instructions.items(.ops)[inst].decode(); const ops = emit.mir.instructions.items(.ops)[inst].decode();
switch (ops.flags) { switch (ops.flags) {
0b00 => { 0b00 => {
return lowerToRvmEnc(tag, ops.reg1, ops.reg1, RegisterOrMemory.reg(ops.reg2), emit.code); return lowerToRvmEnc(tag, ops.reg1, ops.reg1, RegisterOrMemory.reg(ops.reg2), emit.code);
}, },
else => return emit.fail("TODO unused variant 0b{b} for mov_f64", .{ops.flags}), else => return emit.fail("TODO unused variant 0b{b} for {}", .{ ops.flags, tag }),
} }
} }
fn mirCmpFloatAvx(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!void { fn mirCmpFloatAvx(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!void {
const ops = emit.mir.instructions.items(.ops)[inst].decode(); const ops = emit.mir.instructions.items(.ops)[inst].decode();
switch (ops.flags) { switch (ops.flags) {
0b00 => { 0b00 => {
return lowerToVmEnc(tag, ops.reg1, RegisterOrMemory.reg(ops.reg2), emit.code); return lowerToVmEnc(tag, ops.reg1, RegisterOrMemory.reg(ops.reg2), emit.code);
@ -1247,6 +1300,14 @@ const Tag = enum {
cmovng, cmovng,
cmovb, cmovb,
cmovnae, cmovnae,
movsd,
movss,
addsd,
addss,
cmpsd,
cmpss,
ucomisd,
ucomiss,
vmovsd, vmovsd,
vmovss, vmovss,
vaddsd, vaddsd,
@ -1256,6 +1317,22 @@ const Tag = enum {
vucomisd, vucomisd,
vucomiss, vucomiss,
fn isSse(tag: Tag) bool {
return switch (tag) {
.movsd,
.movss,
.addsd,
.addss,
.cmpsd,
.cmpss,
.ucomisd,
.ucomiss,
=> true,
else => false,
};
}
fn isAvx(tag: Tag) bool { fn isAvx(tag: Tag) bool {
return switch (tag) { return switch (tag) {
.vmovsd, .vmovsd,
@ -1369,190 +1446,256 @@ const Encoding = enum {
rvmi, rvmi,
}; };
const OpCode = union(enum) { const OpCode = struct {
one_byte: u8, bytes: [3]u8,
two_byte: struct { _1: u8, _2: u8 }, count: usize,
fn oneByte(opc: u8) OpCode { fn init(comptime in_bytes: []const u8) OpCode {
return .{ .one_byte = opc }; comptime assert(in_bytes.len <= 3);
comptime var bytes: [3]u8 = undefined;
inline for (in_bytes) |x, i| {
bytes[i] = x;
} }
return .{ .bytes = bytes, .count = in_bytes.len };
fn twoByte(opc1: u8, opc2: u8) OpCode {
return .{ .two_byte = .{ ._1 = opc1, ._2 = opc2 } };
} }
fn encode(opc: OpCode, encoder: Encoder) void { fn encode(opc: OpCode, encoder: Encoder) void {
switch (opc) { switch (opc.count) {
.one_byte => |v| encoder.opcode_1byte(v), 1 => encoder.opcode_1byte(opc.bytes[0]),
.two_byte => |v| encoder.opcode_2byte(v._1, v._2), 2 => encoder.opcode_2byte(opc.bytes[0], opc.bytes[1]),
3 => encoder.opcode_3byte(opc.bytes[0], opc.bytes[1], opc.bytes[2]),
else => unreachable,
} }
} }
fn encodeWithReg(opc: OpCode, encoder: Encoder, reg: Register) void { fn encodeWithReg(opc: OpCode, encoder: Encoder, reg: Register) void {
assert(opc == .one_byte); assert(opc.count == 1);
encoder.opcode_withReg(opc.one_byte, reg.lowEnc()); encoder.opcode_withReg(opc.bytes[0], reg.lowEnc());
} }
}; };
inline fn getOpCode(tag: Tag, enc: Encoding, is_one_byte: bool) OpCode { inline fn getOpCode(tag: Tag, enc: Encoding, is_one_byte: bool) OpCode {
// zig fmt: off
switch (enc) { switch (enc) {
.zo => return switch (tag) { .zo => return switch (tag) {
.ret_near => OpCode.oneByte(0xc3), .ret_near => OpCode.init(&.{0xc3}),
.ret_far => OpCode.oneByte(0xcb), .ret_far => OpCode.init(&.{0xcb}),
.int3 => OpCode.oneByte(0xcc), .int3 => OpCode.init(&.{0xcc}),
.nop => OpCode.oneByte(0x90), .nop => OpCode.init(&.{0x90}),
.syscall => OpCode.twoByte(0x0f, 0x05), .syscall => OpCode.init(&.{ 0x0f, 0x05 }),
.cbw => OpCode.oneByte(0x98), .cbw => OpCode.init(&.{0x98}),
.cwd, .cdq, .cqo => OpCode.oneByte(0x99), .cwd,
.cdq,
.cqo => OpCode.init(&.{0x99}),
else => unreachable, else => unreachable,
}, },
.d => return switch (tag) { .d => return switch (tag) {
.jmp_near => OpCode.oneByte(0xe9), .jmp_near => OpCode.init(&.{0xe9}),
.call_near => OpCode.oneByte(0xe8), .call_near => OpCode.init(&.{0xe8}),
.jo => if (is_one_byte) OpCode.oneByte(0x70) else OpCode.twoByte(0x0f, 0x80), .jo => if (is_one_byte) OpCode.init(&.{0x70}) else OpCode.init(&.{0x0f,0x80}),
.jno => if (is_one_byte) OpCode.oneByte(0x71) else OpCode.twoByte(0x0f, 0x81), .jno => if (is_one_byte) OpCode.init(&.{0x71}) else OpCode.init(&.{0x0f,0x81}),
.jb, .jc, .jnae => if (is_one_byte) OpCode.oneByte(0x72) else OpCode.twoByte(0x0f, 0x82), .jb,
.jnb, .jnc, .jae => if (is_one_byte) OpCode.oneByte(0x73) else OpCode.twoByte(0x0f, 0x83), .jc,
.je, .jz => if (is_one_byte) OpCode.oneByte(0x74) else OpCode.twoByte(0x0f, 0x84), .jnae => if (is_one_byte) OpCode.init(&.{0x72}) else OpCode.init(&.{0x0f,0x82}),
.jne, .jnz => if (is_one_byte) OpCode.oneByte(0x75) else OpCode.twoByte(0x0f, 0x85), .jnb,
.jna, .jbe => if (is_one_byte) OpCode.oneByte(0x76) else OpCode.twoByte(0x0f, 0x86), .jnc,
.jnbe, .ja => if (is_one_byte) OpCode.oneByte(0x77) else OpCode.twoByte(0x0f, 0x87), .jae => if (is_one_byte) OpCode.init(&.{0x73}) else OpCode.init(&.{0x0f,0x83}),
.js => if (is_one_byte) OpCode.oneByte(0x78) else OpCode.twoByte(0x0f, 0x88), .je,
.jns => if (is_one_byte) OpCode.oneByte(0x79) else OpCode.twoByte(0x0f, 0x89), .jz => if (is_one_byte) OpCode.init(&.{0x74}) else OpCode.init(&.{0x0f,0x84}),
.jpe, .jp => if (is_one_byte) OpCode.oneByte(0x7a) else OpCode.twoByte(0x0f, 0x8a), .jne,
.jpo, .jnp => if (is_one_byte) OpCode.oneByte(0x7b) else OpCode.twoByte(0x0f, 0x8b), .jnz => if (is_one_byte) OpCode.init(&.{0x75}) else OpCode.init(&.{0x0f,0x85}),
.jnge, .jl => if (is_one_byte) OpCode.oneByte(0x7c) else OpCode.twoByte(0x0f, 0x8c), .jna,
.jge, .jnl => if (is_one_byte) OpCode.oneByte(0x7d) else OpCode.twoByte(0x0f, 0x8d), .jbe => if (is_one_byte) OpCode.init(&.{0x76}) else OpCode.init(&.{0x0f,0x86}),
.jle, .jng => if (is_one_byte) OpCode.oneByte(0x7e) else OpCode.twoByte(0x0f, 0x8e), .jnbe,
.jg, .jnle => if (is_one_byte) OpCode.oneByte(0x7f) else OpCode.twoByte(0x0f, 0x8f), .ja => if (is_one_byte) OpCode.init(&.{0x77}) else OpCode.init(&.{0x0f,0x87}),
.js => if (is_one_byte) OpCode.init(&.{0x78}) else OpCode.init(&.{0x0f,0x88}),
.jns => if (is_one_byte) OpCode.init(&.{0x79}) else OpCode.init(&.{0x0f,0x89}),
.jpe,
.jp => if (is_one_byte) OpCode.init(&.{0x7a}) else OpCode.init(&.{0x0f,0x8a}),
.jpo,
.jnp => if (is_one_byte) OpCode.init(&.{0x7b}) else OpCode.init(&.{0x0f,0x8b}),
.jnge,
.jl => if (is_one_byte) OpCode.init(&.{0x7c}) else OpCode.init(&.{0x0f,0x8c}),
.jge,
.jnl => if (is_one_byte) OpCode.init(&.{0x7d}) else OpCode.init(&.{0x0f,0x8d}),
.jle,
.jng => if (is_one_byte) OpCode.init(&.{0x7e}) else OpCode.init(&.{0x0f,0x8e}),
.jg,
.jnle => if (is_one_byte) OpCode.init(&.{0x7f}) else OpCode.init(&.{0x0f,0x8f}),
else => unreachable, else => unreachable,
}, },
.m => return switch (tag) { .m => return switch (tag) {
.jmp_near, .call_near, .push => OpCode.oneByte(0xff), .jmp_near,
.pop => OpCode.oneByte(0x8f), .call_near,
.seto => OpCode.twoByte(0x0f, 0x90), .push => OpCode.init(&.{0xff}),
.setno => OpCode.twoByte(0x0f, 0x91), .pop => OpCode.init(&.{0x8f}),
.setb, .setc, .setnae => OpCode.twoByte(0x0f, 0x92), .seto => OpCode.init(&.{0x0f,0x90}),
.setnb, .setnc, .setae => OpCode.twoByte(0x0f, 0x93), .setno => OpCode.init(&.{0x0f,0x91}),
.sete, .setz => OpCode.twoByte(0x0f, 0x94), .setb,
.setne, .setnz => OpCode.twoByte(0x0f, 0x95), .setc,
.setbe, .setna => OpCode.twoByte(0x0f, 0x96), .setnae => OpCode.init(&.{0x0f,0x92}),
.seta, .setnbe => OpCode.twoByte(0x0f, 0x97), .setnb,
.sets => OpCode.twoByte(0x0f, 0x98), .setnc,
.setns => OpCode.twoByte(0x0f, 0x99), .setae => OpCode.init(&.{0x0f,0x93}),
.setp, .setpe => OpCode.twoByte(0x0f, 0x9a), .sete,
.setnp, .setop => OpCode.twoByte(0x0f, 0x9b), .setz => OpCode.init(&.{0x0f,0x94}),
.setl, .setnge => OpCode.twoByte(0x0f, 0x9c), .setne,
.setnl, .setge => OpCode.twoByte(0x0f, 0x9d), .setnz => OpCode.init(&.{0x0f,0x95}),
.setle, .setng => OpCode.twoByte(0x0f, 0x9e), .setbe,
.setnle, .setg => OpCode.twoByte(0x0f, 0x9f), .setna => OpCode.init(&.{0x0f,0x96}),
.idiv, .div, .imul, .mul => OpCode.oneByte(if (is_one_byte) 0xf6 else 0xf7), .seta,
.fisttp16 => OpCode.oneByte(0xdf), .setnbe => OpCode.init(&.{0x0f,0x97}),
.fisttp32 => OpCode.oneByte(0xdb), .sets => OpCode.init(&.{0x0f,0x98}),
.fisttp64 => OpCode.oneByte(0xdd), .setns => OpCode.init(&.{0x0f,0x99}),
.fld32 => OpCode.oneByte(0xd9), .setp,
.fld64 => OpCode.oneByte(0xdd), .setpe => OpCode.init(&.{0x0f,0x9a}),
.setnp,
.setop => OpCode.init(&.{0x0f,0x9b}),
.setl,
.setnge => OpCode.init(&.{0x0f,0x9c}),
.setnl,
.setge => OpCode.init(&.{0x0f,0x9d}),
.setle,
.setng => OpCode.init(&.{0x0f,0x9e}),
.setnle,
.setg => OpCode.init(&.{0x0f,0x9f}),
.idiv,
.div,
.imul,
.mul => if (is_one_byte) OpCode.init(&.{0xf6}) else OpCode.init(&.{0xf7}),
.fisttp16 => OpCode.init(&.{0xdf}),
.fisttp32 => OpCode.init(&.{0xdb}),
.fisttp64 => OpCode.init(&.{0xdd}),
.fld32 => OpCode.init(&.{0xd9}),
.fld64 => OpCode.init(&.{0xdd}),
else => unreachable, else => unreachable,
}, },
.o => return switch (tag) { .o => return switch (tag) {
.push => OpCode.oneByte(0x50), .push => OpCode.init(&.{0x50}),
.pop => OpCode.oneByte(0x58), .pop => OpCode.init(&.{0x58}),
else => unreachable, else => unreachable,
}, },
.i => return switch (tag) { .i => return switch (tag) {
.push => OpCode.oneByte(if (is_one_byte) 0x6a else 0x68), .push => if (is_one_byte) OpCode.init(&.{0x6a}) else OpCode.init(&.{0x68}),
.@"test" => OpCode.oneByte(if (is_one_byte) 0xa8 else 0xa9), .@"test" => if (is_one_byte) OpCode.init(&.{0xa8}) else OpCode.init(&.{0xa9}),
.ret_near => OpCode.oneByte(0xc2), .ret_near => OpCode.init(&.{0xc2}),
.ret_far => OpCode.oneByte(0xca), .ret_far => OpCode.init(&.{0xca}),
else => unreachable, else => unreachable,
}, },
.m1 => return switch (tag) { .m1 => return switch (tag) {
.shl, .sal, .shr, .sar => OpCode.oneByte(if (is_one_byte) 0xd0 else 0xd1), .shl, .sal,
.shr, .sar => if (is_one_byte) OpCode.init(&.{0xd0}) else OpCode.init(&.{0xd1}),
else => unreachable, else => unreachable,
}, },
.mc => return switch (tag) { .mc => return switch (tag) {
.shl, .sal, .shr, .sar => OpCode.oneByte(if (is_one_byte) 0xd2 else 0xd3), .shl, .sal,
.shr, .sar => if (is_one_byte) OpCode.init(&.{0xd2}) else OpCode.init(&.{0xd3}),
else => unreachable, else => unreachable,
}, },
.mi => return switch (tag) { .mi => return switch (tag) {
.adc, .add, .sub, .xor, .@"and", .@"or", .sbb, .cmp => OpCode.oneByte(if (is_one_byte) 0x80 else 0x81), .adc, .add,
.mov => OpCode.oneByte(if (is_one_byte) 0xc6 else 0xc7), .sub, .xor,
.@"test" => OpCode.oneByte(if (is_one_byte) 0xf6 else 0xf7), .@"and", .@"or",
.sbb, .cmp => if (is_one_byte) OpCode.init(&.{0x80}) else OpCode.init(&.{0x81}),
.mov => if (is_one_byte) OpCode.init(&.{0xc6}) else OpCode.init(&.{0xc7}),
.@"test" => if (is_one_byte) OpCode.init(&.{0xf6}) else OpCode.init(&.{0xf7}),
else => unreachable, else => unreachable,
}, },
.mi8 => return switch (tag) { .mi8 => return switch (tag) {
.adc, .add, .sub, .xor, .@"and", .@"or", .sbb, .cmp => OpCode.oneByte(0x83), .adc, .add,
.shl, .sal, .shr, .sar => OpCode.oneByte(if (is_one_byte) 0xc0 else 0xc1), .sub, .xor,
.@"and", .@"or",
.sbb, .cmp => OpCode.init(&.{0x83}),
.shl, .sal,
.shr, .sar => if (is_one_byte) OpCode.init(&.{0xc0}) else OpCode.init(&.{0xc1}),
else => unreachable, else => unreachable,
}, },
.mr => return switch (tag) { .mr => return switch (tag) {
.adc => OpCode.oneByte(if (is_one_byte) 0x10 else 0x11), .adc => if (is_one_byte) OpCode.init(&.{0x10}) else OpCode.init(&.{0x11}),
.add => OpCode.oneByte(if (is_one_byte) 0x00 else 0x01), .add => if (is_one_byte) OpCode.init(&.{0x00}) else OpCode.init(&.{0x01}),
.sub => OpCode.oneByte(if (is_one_byte) 0x28 else 0x29), .sub => if (is_one_byte) OpCode.init(&.{0x28}) else OpCode.init(&.{0x29}),
.xor => OpCode.oneByte(if (is_one_byte) 0x30 else 0x31), .xor => if (is_one_byte) OpCode.init(&.{0x30}) else OpCode.init(&.{0x31}),
.@"and" => OpCode.oneByte(if (is_one_byte) 0x20 else 0x21), .@"and" => if (is_one_byte) OpCode.init(&.{0x20}) else OpCode.init(&.{0x21}),
.@"or" => OpCode.oneByte(if (is_one_byte) 0x08 else 0x09), .@"or" => if (is_one_byte) OpCode.init(&.{0x08}) else OpCode.init(&.{0x09}),
.sbb => OpCode.oneByte(if (is_one_byte) 0x18 else 0x19), .sbb => if (is_one_byte) OpCode.init(&.{0x18}) else OpCode.init(&.{0x19}),
.cmp => OpCode.oneByte(if (is_one_byte) 0x38 else 0x39), .cmp => if (is_one_byte) OpCode.init(&.{0x38}) else OpCode.init(&.{0x39}),
.mov => OpCode.oneByte(if (is_one_byte) 0x88 else 0x89), .mov => if (is_one_byte) OpCode.init(&.{0x88}) else OpCode.init(&.{0x89}),
.@"test" => OpCode.oneByte(if (is_one_byte) 0x84 else 0x85), .@"test" => if (is_one_byte) OpCode.init(&.{0x84}) else OpCode.init(&.{0x85}),
.movsd => OpCode.init(&.{0xf2,0x0f,0x11}),
.movss => OpCode.init(&.{0xf3,0x0f,0x11}),
else => unreachable, else => unreachable,
}, },
.rm => return switch (tag) { .rm => return switch (tag) {
.adc => OpCode.oneByte(if (is_one_byte) 0x12 else 0x13), .adc => if (is_one_byte) OpCode.init(&.{0x12}) else OpCode.init(&.{0x13}),
.add => OpCode.oneByte(if (is_one_byte) 0x02 else 0x03), .add => if (is_one_byte) OpCode.init(&.{0x02}) else OpCode.init(&.{0x03}),
.sub => OpCode.oneByte(if (is_one_byte) 0x2a else 0x2b), .sub => if (is_one_byte) OpCode.init(&.{0x2a}) else OpCode.init(&.{0x2b}),
.xor => OpCode.oneByte(if (is_one_byte) 0x32 else 0x33), .xor => if (is_one_byte) OpCode.init(&.{0x32}) else OpCode.init(&.{0x33}),
.@"and" => OpCode.oneByte(if (is_one_byte) 0x22 else 0x23), .@"and" => if (is_one_byte) OpCode.init(&.{0x22}) else OpCode.init(&.{0x23}),
.@"or" => OpCode.oneByte(if (is_one_byte) 0x0a else 0x0b), .@"or" => if (is_one_byte) OpCode.init(&.{0x0a}) else OpCode.init(&.{0x0b}),
.sbb => OpCode.oneByte(if (is_one_byte) 0x1a else 0x1b), .sbb => if (is_one_byte) OpCode.init(&.{0x1a}) else OpCode.init(&.{0x1b}),
.cmp => OpCode.oneByte(if (is_one_byte) 0x3a else 0x3b), .cmp => if (is_one_byte) OpCode.init(&.{0x3a}) else OpCode.init(&.{0x3b}),
.mov => OpCode.oneByte(if (is_one_byte) 0x8a else 0x8b), .mov => if (is_one_byte) OpCode.init(&.{0x8a}) else OpCode.init(&.{0x8b}),
.movsx => OpCode.twoByte(0x0f, if (is_one_byte) 0xbe else 0xbf), .movsx => if (is_one_byte) OpCode.init(&.{0x0f,0xbe}) else OpCode.init(&.{0x0f,0xbf}),
.movsxd => OpCode.oneByte(0x63), .movsxd => OpCode.init(&.{0x63}),
.movzx => OpCode.twoByte(0x0f, if (is_one_byte) 0xb6 else 0xb7), .movzx => if (is_one_byte) OpCode.init(&.{0x0f,0xb6}) else OpCode.init(&.{0x0f,0xb7}),
.lea => OpCode.oneByte(if (is_one_byte) 0x8c else 0x8d), .lea => if (is_one_byte) OpCode.init(&.{0x8c}) else OpCode.init(&.{0x8d}),
.imul => OpCode.twoByte(0x0f, 0xaf), .imul => OpCode.init(&.{0x0f,0xaf}),
.cmove, .cmovz => OpCode.twoByte(0x0f, 0x44), .cmove,
.cmovb, .cmovnae => OpCode.twoByte(0x0f, 0x42), .cmovz => OpCode.init(&.{0x0f,0x44}),
.cmovl, .cmovng => OpCode.twoByte(0x0f, 0x4c), .cmovb,
.cmovnae => OpCode.init(&.{0x0f,0x42}),
.cmovl,
.cmovng => OpCode.init(&.{0x0f,0x4c}),
.movsd => OpCode.init(&.{0xf2,0x0f,0x10}),
.movss => OpCode.init(&.{0xf3,0x0f,0x10}),
.addsd => OpCode.init(&.{0xf2,0x0f,0x58}),
.addss => OpCode.init(&.{0xf3,0x0f,0x58}),
.ucomisd => OpCode.init(&.{0x66,0x0f,0x2e}),
.ucomiss => OpCode.init(&.{0x0f,0x2e}),
else => unreachable, else => unreachable,
}, },
.oi => return switch (tag) { .oi => return switch (tag) {
.mov => OpCode.oneByte(if (is_one_byte) 0xb0 else 0xb8), .mov => if (is_one_byte) OpCode.init(&.{0xb0}) else OpCode.init(&.{0xb8}),
else => unreachable, else => unreachable,
}, },
.fd => return switch (tag) { .fd => return switch (tag) {
.mov => OpCode.oneByte(if (is_one_byte) 0xa0 else 0xa1), .mov => if (is_one_byte) OpCode.init(&.{0xa0}) else OpCode.init(&.{0xa1}),
else => unreachable, else => unreachable,
}, },
.td => return switch (tag) { .td => return switch (tag) {
.mov => OpCode.oneByte(if (is_one_byte) 0xa2 else 0xa3), .mov => if (is_one_byte) OpCode.init(&.{0xa2}) else OpCode.init(&.{0xa3}),
else => unreachable, else => unreachable,
}, },
.rmi => return switch (tag) { .rmi => return switch (tag) {
.imul => OpCode.oneByte(if (is_one_byte) 0x6b else 0x69), .imul => if (is_one_byte) OpCode.init(&.{0x6b}) else OpCode.init(&.{0x69}),
else => unreachable, else => unreachable,
}, },
.mv => return switch (tag) { .mv => return switch (tag) {
.vmovsd, .vmovss => OpCode.oneByte(0x11), .vmovsd,
.vmovss => OpCode.init(&.{0x11}),
else => unreachable, else => unreachable,
}, },
.vm => return switch (tag) { .vm => return switch (tag) {
.vmovsd, .vmovss => OpCode.oneByte(0x10), .vmovsd,
.vucomisd, .vucomiss => OpCode.oneByte(0x2e), .vmovss => OpCode.init(&.{0x10}),
.vucomisd,
.vucomiss => OpCode.init(&.{0x2e}),
else => unreachable, else => unreachable,
}, },
.rvm => return switch (tag) { .rvm => return switch (tag) {
.vaddsd, .vaddss => OpCode.oneByte(0x58), .vaddsd,
.vmovsd, .vmovss => OpCode.oneByte(0x10), .vaddss => OpCode.init(&.{0x58}),
.vmovsd,
.vmovss => OpCode.init(&.{0x10}),
else => unreachable, else => unreachable,
}, },
.rvmi => return switch (tag) { .rvmi => return switch (tag) {
.vcmpsd, .vcmpss => OpCode.oneByte(0xc2), .vcmpsd,
.vcmpss => OpCode.init(&.{0xc2}),
else => unreachable, else => unreachable,
}, },
} }
// zig fmt: on
} }
inline fn getModRmExt(tag: Tag) u3 { inline fn getModRmExt(tag: Tag) u3 {

View File

@ -345,11 +345,29 @@ pub const Inst = struct {
/// Nop /// Nop
nop, nop,
/// AVX instructions /// SSE instructions
/// ops flags: form: /// ops flags: form:
/// 0b00 reg1, qword ptr [reg2 + imm32] /// 0b00 reg1, qword ptr [reg2 + imm32]
/// 0b01 qword ptr [reg1 + imm32], reg2 /// 0b01 qword ptr [reg1 + imm32], reg2
/// 0b10 reg1, reg2 /// 0b10 reg1, reg2
mov_f64_sse,
mov_f32_sse,
/// ops flags: form:
/// 0b00 reg1, reg2
add_f64_sse,
add_f32_sse,
/// ops flags: form:
/// 0b00 reg1, reg2
cmp_f64_sse,
cmp_f32_sse,
/// AVX instructions
/// ops flags: form:
/// 0b00 reg1, qword ptr [reg2 + imm32]
/// 0b01 qword ptr [reg1 + imm32], reg2
/// 0b10 reg1, reg1, reg2
mov_f64_avx, mov_f64_avx,
mov_f32_avx, mov_f32_avx,
@ -359,7 +377,7 @@ pub const Inst = struct {
add_f32_avx, add_f32_avx,
/// ops flags: form: /// ops flags: form:
/// /// 0b00 reg1, reg1, reg2
cmp_f64_avx, cmp_f64_avx,
cmp_f32_avx, cmp_f32_avx,

View File

@ -441,6 +441,17 @@ pub const Encoder = struct {
self.code.appendAssumeCapacity(opcode); self.code.appendAssumeCapacity(opcode);
} }
/// Encodes a 3 byte opcode
///
/// e.g. MOVSD has the opcode 0xf2 0x0f 0x10
///
/// encoder.opcode_3byte(0xf2, 0x0f, 0x10);
pub fn opcode_3byte(self: Self, prefix_1: u8, prefix_2: u8, opcode: u8) void {
self.code.appendAssumeCapacity(prefix_1);
self.code.appendAssumeCapacity(prefix_2);
self.code.appendAssumeCapacity(opcode);
}
/// Encodes a 1 byte opcode with a reg field /// Encodes a 1 byte opcode with a reg field
/// ///
/// Remember to add a REX prefix byte if reg is extended! /// Remember to add a REX prefix byte if reg is extended!