diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index 8ce0d5884b..eeb4cab04f 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -881,7 +881,7 @@ fn allocRegOrMem(self: *Self, inst: Air.Inst.Index, reg_ok: bool) !MCValue { switch (elem_ty.zigTypeTag()) { .Vector => return self.fail("TODO allocRegOrMem for Vector type", .{}), .Float => { - if (self.intrinsicsAllowed(elem_ty)) { + if (intrinsicsAllowed(self.target.*, elem_ty)) { const ptr_bytes: u64 = 32; if (abi_size <= ptr_bytes) { if (self.register_manager.tryAllocReg(inst, sse)) |reg| { @@ -970,7 +970,7 @@ pub fn spillRegisters(self: *Self, comptime count: comptime_int, registers: [cou fn copyToTmpRegister(self: *Self, ty: Type, mcv: MCValue) !Register { const reg_class: RegisterManager.RegisterBitSet = switch (ty.zigTypeTag()) { .Float => blk: { - if (self.intrinsicsAllowed(ty)) break :blk sse; + if (intrinsicsAllowed(self.target.*, ty)) break :blk sse; return self.fail("TODO copy {} to register", .{ty.fmtDebug()}); }, else => gp, @@ -987,7 +987,7 @@ fn copyToTmpRegister(self: *Self, ty: Type, mcv: MCValue) !Register { fn copyToRegisterWithInstTracking(self: *Self, reg_owner: Air.Inst.Index, ty: Type, mcv: MCValue) !MCValue { const reg_class: RegisterManager.RegisterBitSet = switch (ty.zigTypeTag()) { .Float => blk: { - if (self.intrinsicsAllowed(ty)) break :blk sse; + if (intrinsicsAllowed(self.target.*, ty)) break :blk sse; return self.fail("TODO copy {} to register", .{ty.fmtDebug()}); }, else => gp, @@ -3462,16 +3462,28 @@ fn genBinOpMir(self: *Self, mir_tag: Mir.Inst.Tag, dst_ty: Type, dst_mcv: MCValu }, .register => |src_reg| switch (dst_ty.zigTypeTag()) { .Float => { - if (self.intrinsicsAllowed(dst_ty)) { + if (intrinsicsAllowed(self.target.*, dst_ty)) { const actual_tag: Mir.Inst.Tag = switch (dst_ty.tag()) { .f32 => switch (mir_tag) { - .add => Mir.Inst.Tag.add_f32_avx, - .cmp => Mir.Inst.Tag.cmp_f32_avx, + .add => if (hasAvxSupport(self.target.*)) + Mir.Inst.Tag.add_f32_avx + else + Mir.Inst.Tag.add_f32_sse, + .cmp => if (hasAvxSupport(self.target.*)) + Mir.Inst.Tag.cmp_f32_avx + else + Mir.Inst.Tag.cmp_f32_sse, else => return self.fail("TODO genBinOpMir for f32 register-register with MIR tag {}", .{mir_tag}), }, .f64 => switch (mir_tag) { - .add => Mir.Inst.Tag.add_f64_avx, - .cmp => Mir.Inst.Tag.cmp_f64_avx, + .add => if (hasAvxSupport(self.target.*)) + Mir.Inst.Tag.add_f64_avx + else + Mir.Inst.Tag.add_f64_sse, + .cmp => if (hasAvxSupport(self.target.*)) + Mir.Inst.Tag.cmp_f64_avx + else + Mir.Inst.Tag.cmp_f64_sse, else => return self.fail("TODO genBinOpMir for f64 register-register with MIR tag {}", .{mir_tag}), }, else => return self.fail("TODO genBinOpMir for float register-register and type {}", .{dst_ty.fmtDebug()}), @@ -5324,10 +5336,16 @@ fn genSetStackArg(self: *Self, ty: Type, stack_offset: i32, mcv: MCValue) InnerE .register => |reg| { switch (ty.zigTypeTag()) { .Float => { - if (self.intrinsicsAllowed(ty)) { + if (intrinsicsAllowed(self.target.*, ty)) { const tag: Mir.Inst.Tag = switch (ty.tag()) { - .f32 => .mov_f32_avx, - .f64 => .mov_f64_avx, + .f32 => if (hasAvxSupport(self.target.*)) + Mir.Inst.Tag.mov_f32_avx + else + Mir.Inst.Tag.mov_f32_sse, + .f64 => if (hasAvxSupport(self.target.*)) + Mir.Inst.Tag.mov_f64_avx + else + Mir.Inst.Tag.mov_f64_sse, else => return self.fail("TODO genSetStackArg for register for type {}", .{ty.fmtDebug()}), }; _ = try self.addInst(.{ @@ -5508,10 +5526,16 @@ fn genSetStack(self: *Self, ty: Type, stack_offset: i32, mcv: MCValue, opts: Inl switch (ty.zigTypeTag()) { .Float => { - if (self.intrinsicsAllowed(ty)) { + if (intrinsicsAllowed(self.target.*, ty)) { const tag: Mir.Inst.Tag = switch (ty.tag()) { - .f32 => .mov_f32_avx, - .f64 => .mov_f64_avx, + .f32 => if (hasAvxSupport(self.target.*)) + Mir.Inst.Tag.mov_f32_avx + else + Mir.Inst.Tag.mov_f32_sse, + .f64 => if (hasAvxSupport(self.target.*)) + Mir.Inst.Tag.mov_f64_avx + else + Mir.Inst.Tag.mov_f64_sse, else => return self.fail("TODO genSetStack for register for type {}", .{ty.fmtDebug()}), }; _ = try self.addInst(.{ @@ -6032,10 +6056,16 @@ fn genSetReg(self: *Self, ty: Type, reg: Register, mcv: MCValue) InnerError!void }, }, .Float => { - if (self.intrinsicsAllowed(ty)) { + if (intrinsicsAllowed(self.target.*, ty)) { const tag: Mir.Inst.Tag = switch (ty.tag()) { - .f32 => .mov_f32_avx, - .f64 => .mov_f64_avx, + .f32 => if (hasAvxSupport(self.target.*)) + Mir.Inst.Tag.mov_f32_avx + else + Mir.Inst.Tag.mov_f32_sse, + .f64 => if (hasAvxSupport(self.target.*)) + Mir.Inst.Tag.mov_f64_avx + else + Mir.Inst.Tag.mov_f64_sse, else => return self.fail("TODO genSetReg from register for {}", .{ty.fmtDebug()}), }; _ = try self.addInst(.{ @@ -6072,10 +6102,16 @@ fn genSetReg(self: *Self, ty: Type, reg: Register, mcv: MCValue) InnerError!void const base_reg = try self.register_manager.allocReg(null, gp); try self.loadMemPtrIntoRegister(base_reg, Type.usize, mcv); - if (self.intrinsicsAllowed(ty)) { + if (intrinsicsAllowed(self.target.*, ty)) { const tag: Mir.Inst.Tag = switch (ty.tag()) { - .f32 => .mov_f32_avx, - .f64 => .mov_f64_avx, + .f32 => if (hasAvxSupport(self.target.*)) + Mir.Inst.Tag.mov_f32_avx + else + Mir.Inst.Tag.mov_f32_sse, + .f64 => if (hasAvxSupport(self.target.*)) + Mir.Inst.Tag.mov_f64_avx + else + Mir.Inst.Tag.mov_f64_sse, else => return self.fail("TODO genSetReg from memory for {}", .{ty.fmtDebug()}), }; @@ -6115,10 +6151,16 @@ fn genSetReg(self: *Self, ty: Type, reg: Register, mcv: MCValue) InnerError!void const base_reg = try self.register_manager.allocReg(null, gp); try self.loadMemPtrIntoRegister(base_reg, Type.usize, mcv); - if (self.intrinsicsAllowed(ty)) { + if (intrinsicsAllowed(self.target.*, ty)) { const tag: Mir.Inst.Tag = switch (ty.tag()) { - .f32 => .mov_f32_avx, - .f64 => .mov_f64_avx, + .f32 => if (hasAvxSupport(self.target.*)) + Mir.Inst.Tag.mov_f32_avx + else + Mir.Inst.Tag.mov_f32_sse, + .f64 => if (hasAvxSupport(self.target.*)) + Mir.Inst.Tag.mov_f64_avx + else + Mir.Inst.Tag.mov_f64_sse, else => return self.fail("TODO genSetReg from memory for {}", .{ty.fmtDebug()}), }; @@ -6230,10 +6272,16 @@ fn genSetReg(self: *Self, ty: Type, reg: Register, mcv: MCValue) InnerError!void }, }, .Float => { - if (self.intrinsicsAllowed(ty)) { + if (intrinsicsAllowed(self.target.*, ty)) { const tag: Mir.Inst.Tag = switch (ty.tag()) { - .f32 => .mov_f32_avx, - .f64 => .mov_f64_avx, + .f32 => if (hasAvxSupport(self.target.*)) + Mir.Inst.Tag.mov_f32_avx + else + Mir.Inst.Tag.mov_f32_sse, + .f64 => if (hasAvxSupport(self.target.*)) + Mir.Inst.Tag.mov_f64_avx + else + Mir.Inst.Tag.mov_f64_sse, else => return self.fail("TODO genSetReg from stack offset for {}", .{ty.fmtDebug()}), }; _ = try self.addInst(.{ @@ -7046,11 +7094,15 @@ fn truncateRegister(self: *Self, ty: Type, reg: Register) !void { } } -fn intrinsicsAllowed(self: *Self, ty: Type) bool { +fn intrinsicsAllowed(target: Target, ty: Type) bool { return switch (ty.tag()) { .f32, .f64, - => Target.x86.featureSetHasAny(self.target.cpu.features, .{ .avx, .avx2 }), + => Target.x86.featureSetHasAny(target.cpu.features, .{ .sse2, .avx, .avx2 }), else => unreachable, // TODO finish this off }; } + +fn hasAvxSupport(target: Target) bool { + return Target.x86.featureSetHasAny(target.cpu.features, .{ .avx, .avx2 }); +} diff --git a/src/arch/x86_64/Emit.zig b/src/arch/x86_64/Emit.zig index fbcd8359f7..84955a8aac 100644 --- a/src/arch/x86_64/Emit.zig +++ b/src/arch/x86_64/Emit.zig @@ -182,6 +182,16 @@ pub fn lowerMir(emit: *Emit) InnerError!void { .interrupt => try emit.mirInterrupt(inst), .nop => try emit.mirNop(), + // SSE instructions + .mov_f64_sse => try emit.mirMovFloatSse(.movsd, inst), + .mov_f32_sse => try emit.mirMovFloatSse(.movss, inst), + + .add_f64_sse => try emit.mirAddFloatSse(.addsd, inst), + .add_f32_sse => try emit.mirAddFloatSse(.addss, inst), + + .cmp_f64_sse => try emit.mirCmpFloatSse(.ucomisd, inst), + .cmp_f32_sse => try emit.mirCmpFloatSse(.ucomiss, inst), + // AVX instructions .mov_f64_avx => try emit.mirMovFloatAvx(.vmovsd, inst), .mov_f32_avx => try emit.mirMovFloatAvx(.vmovss, inst), @@ -536,6 +546,7 @@ fn mirArithMemImm(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!void { } inline fn setRexWRegister(reg: Register) bool { + if (reg.size() > 64) return false; if (reg.size() == 64) return true; return switch (reg) { .ah, .ch, .dh, .bh => true, @@ -963,11 +974,55 @@ fn mirLeaPie(emit: *Emit, inst: Mir.Inst.Index) InnerError!void { } } +// SSE instructions + +fn mirMovFloatSse(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!void { + const ops = emit.mir.instructions.items(.ops)[inst].decode(); + switch (ops.flags) { + 0b00 => { + const imm = emit.mir.instructions.items(.data)[inst].imm; + return lowerToRmEnc(tag, ops.reg1, RegisterOrMemory.mem(Memory.PtrSize.new(ops.reg2.size()), .{ + .disp = imm, + .base = ops.reg2, + }), emit.code); + }, + 0b01 => { + const imm = emit.mir.instructions.items(.data)[inst].imm; + return lowerToMrEnc(tag, RegisterOrMemory.mem(Memory.PtrSize.new(ops.reg1.size()), .{ + .disp = imm, + .base = ops.reg1, + }), ops.reg2, emit.code); + }, + 0b10 => { + return lowerToRmEnc(tag, ops.reg1, RegisterOrMemory.reg(ops.reg2), emit.code); + }, + else => return emit.fail("TODO unused variant 0b{b} for {}", .{ ops.flags, tag }), + } +} + +fn mirAddFloatSse(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!void { + const ops = emit.mir.instructions.items(.ops)[inst].decode(); + switch (ops.flags) { + 0b00 => { + return lowerToRmEnc(tag, ops.reg1, RegisterOrMemory.reg(ops.reg2), emit.code); + }, + else => return emit.fail("TODO unused variant 0b{b} for {}", .{ ops.flags, tag }), + } +} + +fn mirCmpFloatSse(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!void { + const ops = emit.mir.instructions.items(.ops)[inst].decode(); + switch (ops.flags) { + 0b00 => { + return lowerToRmEnc(tag, ops.reg1, RegisterOrMemory.reg(ops.reg2), emit.code); + }, + else => return emit.fail("TODO unused variant 0b{b} for {}", .{ ops.flags, tag }), + } +} // AVX instructions fn mirMovFloatAvx(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!void { const ops = emit.mir.instructions.items(.ops)[inst].decode(); - switch (ops.flags) { 0b00 => { const imm = emit.mir.instructions.items(.data)[inst].imm; @@ -986,24 +1041,22 @@ fn mirMovFloatAvx(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!void { 0b10 => { return lowerToRvmEnc(tag, ops.reg1, ops.reg1, RegisterOrMemory.reg(ops.reg2), emit.code); }, - else => return emit.fail("TODO unused variant 0b{b} for mov_f64", .{ops.flags}), + else => return emit.fail("TODO unused variant 0b{b} for {}", .{ ops.flags, tag }), } } fn mirAddFloatAvx(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!void { const ops = emit.mir.instructions.items(.ops)[inst].decode(); - switch (ops.flags) { 0b00 => { return lowerToRvmEnc(tag, ops.reg1, ops.reg1, RegisterOrMemory.reg(ops.reg2), emit.code); }, - else => return emit.fail("TODO unused variant 0b{b} for mov_f64", .{ops.flags}), + else => return emit.fail("TODO unused variant 0b{b} for {}", .{ ops.flags, tag }), } } fn mirCmpFloatAvx(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!void { const ops = emit.mir.instructions.items(.ops)[inst].decode(); - switch (ops.flags) { 0b00 => { return lowerToVmEnc(tag, ops.reg1, RegisterOrMemory.reg(ops.reg2), emit.code); @@ -1247,6 +1300,14 @@ const Tag = enum { cmovng, cmovb, cmovnae, + movsd, + movss, + addsd, + addss, + cmpsd, + cmpss, + ucomisd, + ucomiss, vmovsd, vmovss, vaddsd, @@ -1256,6 +1317,22 @@ const Tag = enum { vucomisd, vucomiss, + fn isSse(tag: Tag) bool { + return switch (tag) { + .movsd, + .movss, + .addsd, + .addss, + .cmpsd, + .cmpss, + .ucomisd, + .ucomiss, + => true, + + else => false, + }; + } + fn isAvx(tag: Tag) bool { return switch (tag) { .vmovsd, @@ -1369,190 +1446,256 @@ const Encoding = enum { rvmi, }; -const OpCode = union(enum) { - one_byte: u8, - two_byte: struct { _1: u8, _2: u8 }, +const OpCode = struct { + bytes: [3]u8, + count: usize, - fn oneByte(opc: u8) OpCode { - return .{ .one_byte = opc }; - } - - fn twoByte(opc1: u8, opc2: u8) OpCode { - return .{ .two_byte = .{ ._1 = opc1, ._2 = opc2 } }; + fn init(comptime in_bytes: []const u8) OpCode { + comptime assert(in_bytes.len <= 3); + comptime var bytes: [3]u8 = undefined; + inline for (in_bytes) |x, i| { + bytes[i] = x; + } + return .{ .bytes = bytes, .count = in_bytes.len }; } fn encode(opc: OpCode, encoder: Encoder) void { - switch (opc) { - .one_byte => |v| encoder.opcode_1byte(v), - .two_byte => |v| encoder.opcode_2byte(v._1, v._2), + switch (opc.count) { + 1 => encoder.opcode_1byte(opc.bytes[0]), + 2 => encoder.opcode_2byte(opc.bytes[0], opc.bytes[1]), + 3 => encoder.opcode_3byte(opc.bytes[0], opc.bytes[1], opc.bytes[2]), + else => unreachable, } } fn encodeWithReg(opc: OpCode, encoder: Encoder, reg: Register) void { - assert(opc == .one_byte); - encoder.opcode_withReg(opc.one_byte, reg.lowEnc()); + assert(opc.count == 1); + encoder.opcode_withReg(opc.bytes[0], reg.lowEnc()); } }; inline fn getOpCode(tag: Tag, enc: Encoding, is_one_byte: bool) OpCode { + // zig fmt: off switch (enc) { .zo => return switch (tag) { - .ret_near => OpCode.oneByte(0xc3), - .ret_far => OpCode.oneByte(0xcb), - .int3 => OpCode.oneByte(0xcc), - .nop => OpCode.oneByte(0x90), - .syscall => OpCode.twoByte(0x0f, 0x05), - .cbw => OpCode.oneByte(0x98), - .cwd, .cdq, .cqo => OpCode.oneByte(0x99), - else => unreachable, + .ret_near => OpCode.init(&.{0xc3}), + .ret_far => OpCode.init(&.{0xcb}), + .int3 => OpCode.init(&.{0xcc}), + .nop => OpCode.init(&.{0x90}), + .syscall => OpCode.init(&.{ 0x0f, 0x05 }), + .cbw => OpCode.init(&.{0x98}), + .cwd, + .cdq, + .cqo => OpCode.init(&.{0x99}), + else => unreachable, }, .d => return switch (tag) { - .jmp_near => OpCode.oneByte(0xe9), - .call_near => OpCode.oneByte(0xe8), - .jo => if (is_one_byte) OpCode.oneByte(0x70) else OpCode.twoByte(0x0f, 0x80), - .jno => if (is_one_byte) OpCode.oneByte(0x71) else OpCode.twoByte(0x0f, 0x81), - .jb, .jc, .jnae => if (is_one_byte) OpCode.oneByte(0x72) else OpCode.twoByte(0x0f, 0x82), - .jnb, .jnc, .jae => if (is_one_byte) OpCode.oneByte(0x73) else OpCode.twoByte(0x0f, 0x83), - .je, .jz => if (is_one_byte) OpCode.oneByte(0x74) else OpCode.twoByte(0x0f, 0x84), - .jne, .jnz => if (is_one_byte) OpCode.oneByte(0x75) else OpCode.twoByte(0x0f, 0x85), - .jna, .jbe => if (is_one_byte) OpCode.oneByte(0x76) else OpCode.twoByte(0x0f, 0x86), - .jnbe, .ja => if (is_one_byte) OpCode.oneByte(0x77) else OpCode.twoByte(0x0f, 0x87), - .js => if (is_one_byte) OpCode.oneByte(0x78) else OpCode.twoByte(0x0f, 0x88), - .jns => if (is_one_byte) OpCode.oneByte(0x79) else OpCode.twoByte(0x0f, 0x89), - .jpe, .jp => if (is_one_byte) OpCode.oneByte(0x7a) else OpCode.twoByte(0x0f, 0x8a), - .jpo, .jnp => if (is_one_byte) OpCode.oneByte(0x7b) else OpCode.twoByte(0x0f, 0x8b), - .jnge, .jl => if (is_one_byte) OpCode.oneByte(0x7c) else OpCode.twoByte(0x0f, 0x8c), - .jge, .jnl => if (is_one_byte) OpCode.oneByte(0x7d) else OpCode.twoByte(0x0f, 0x8d), - .jle, .jng => if (is_one_byte) OpCode.oneByte(0x7e) else OpCode.twoByte(0x0f, 0x8e), - .jg, .jnle => if (is_one_byte) OpCode.oneByte(0x7f) else OpCode.twoByte(0x0f, 0x8f), - else => unreachable, + .jmp_near => OpCode.init(&.{0xe9}), + .call_near => OpCode.init(&.{0xe8}), + .jo => if (is_one_byte) OpCode.init(&.{0x70}) else OpCode.init(&.{0x0f,0x80}), + .jno => if (is_one_byte) OpCode.init(&.{0x71}) else OpCode.init(&.{0x0f,0x81}), + .jb, + .jc, + .jnae => if (is_one_byte) OpCode.init(&.{0x72}) else OpCode.init(&.{0x0f,0x82}), + .jnb, + .jnc, + .jae => if (is_one_byte) OpCode.init(&.{0x73}) else OpCode.init(&.{0x0f,0x83}), + .je, + .jz => if (is_one_byte) OpCode.init(&.{0x74}) else OpCode.init(&.{0x0f,0x84}), + .jne, + .jnz => if (is_one_byte) OpCode.init(&.{0x75}) else OpCode.init(&.{0x0f,0x85}), + .jna, + .jbe => if (is_one_byte) OpCode.init(&.{0x76}) else OpCode.init(&.{0x0f,0x86}), + .jnbe, + .ja => if (is_one_byte) OpCode.init(&.{0x77}) else OpCode.init(&.{0x0f,0x87}), + .js => if (is_one_byte) OpCode.init(&.{0x78}) else OpCode.init(&.{0x0f,0x88}), + .jns => if (is_one_byte) OpCode.init(&.{0x79}) else OpCode.init(&.{0x0f,0x89}), + .jpe, + .jp => if (is_one_byte) OpCode.init(&.{0x7a}) else OpCode.init(&.{0x0f,0x8a}), + .jpo, + .jnp => if (is_one_byte) OpCode.init(&.{0x7b}) else OpCode.init(&.{0x0f,0x8b}), + .jnge, + .jl => if (is_one_byte) OpCode.init(&.{0x7c}) else OpCode.init(&.{0x0f,0x8c}), + .jge, + .jnl => if (is_one_byte) OpCode.init(&.{0x7d}) else OpCode.init(&.{0x0f,0x8d}), + .jle, + .jng => if (is_one_byte) OpCode.init(&.{0x7e}) else OpCode.init(&.{0x0f,0x8e}), + .jg, + .jnle => if (is_one_byte) OpCode.init(&.{0x7f}) else OpCode.init(&.{0x0f,0x8f}), + else => unreachable, }, .m => return switch (tag) { - .jmp_near, .call_near, .push => OpCode.oneByte(0xff), - .pop => OpCode.oneByte(0x8f), - .seto => OpCode.twoByte(0x0f, 0x90), - .setno => OpCode.twoByte(0x0f, 0x91), - .setb, .setc, .setnae => OpCode.twoByte(0x0f, 0x92), - .setnb, .setnc, .setae => OpCode.twoByte(0x0f, 0x93), - .sete, .setz => OpCode.twoByte(0x0f, 0x94), - .setne, .setnz => OpCode.twoByte(0x0f, 0x95), - .setbe, .setna => OpCode.twoByte(0x0f, 0x96), - .seta, .setnbe => OpCode.twoByte(0x0f, 0x97), - .sets => OpCode.twoByte(0x0f, 0x98), - .setns => OpCode.twoByte(0x0f, 0x99), - .setp, .setpe => OpCode.twoByte(0x0f, 0x9a), - .setnp, .setop => OpCode.twoByte(0x0f, 0x9b), - .setl, .setnge => OpCode.twoByte(0x0f, 0x9c), - .setnl, .setge => OpCode.twoByte(0x0f, 0x9d), - .setle, .setng => OpCode.twoByte(0x0f, 0x9e), - .setnle, .setg => OpCode.twoByte(0x0f, 0x9f), - .idiv, .div, .imul, .mul => OpCode.oneByte(if (is_one_byte) 0xf6 else 0xf7), - .fisttp16 => OpCode.oneByte(0xdf), - .fisttp32 => OpCode.oneByte(0xdb), - .fisttp64 => OpCode.oneByte(0xdd), - .fld32 => OpCode.oneByte(0xd9), - .fld64 => OpCode.oneByte(0xdd), - else => unreachable, + .jmp_near, + .call_near, + .push => OpCode.init(&.{0xff}), + .pop => OpCode.init(&.{0x8f}), + .seto => OpCode.init(&.{0x0f,0x90}), + .setno => OpCode.init(&.{0x0f,0x91}), + .setb, + .setc, + .setnae => OpCode.init(&.{0x0f,0x92}), + .setnb, + .setnc, + .setae => OpCode.init(&.{0x0f,0x93}), + .sete, + .setz => OpCode.init(&.{0x0f,0x94}), + .setne, + .setnz => OpCode.init(&.{0x0f,0x95}), + .setbe, + .setna => OpCode.init(&.{0x0f,0x96}), + .seta, + .setnbe => OpCode.init(&.{0x0f,0x97}), + .sets => OpCode.init(&.{0x0f,0x98}), + .setns => OpCode.init(&.{0x0f,0x99}), + .setp, + .setpe => OpCode.init(&.{0x0f,0x9a}), + .setnp, + .setop => OpCode.init(&.{0x0f,0x9b}), + .setl, + .setnge => OpCode.init(&.{0x0f,0x9c}), + .setnl, + .setge => OpCode.init(&.{0x0f,0x9d}), + .setle, + .setng => OpCode.init(&.{0x0f,0x9e}), + .setnle, + .setg => OpCode.init(&.{0x0f,0x9f}), + .idiv, + .div, + .imul, + .mul => if (is_one_byte) OpCode.init(&.{0xf6}) else OpCode.init(&.{0xf7}), + .fisttp16 => OpCode.init(&.{0xdf}), + .fisttp32 => OpCode.init(&.{0xdb}), + .fisttp64 => OpCode.init(&.{0xdd}), + .fld32 => OpCode.init(&.{0xd9}), + .fld64 => OpCode.init(&.{0xdd}), + else => unreachable, }, .o => return switch (tag) { - .push => OpCode.oneByte(0x50), - .pop => OpCode.oneByte(0x58), - else => unreachable, + .push => OpCode.init(&.{0x50}), + .pop => OpCode.init(&.{0x58}), + else => unreachable, }, .i => return switch (tag) { - .push => OpCode.oneByte(if (is_one_byte) 0x6a else 0x68), - .@"test" => OpCode.oneByte(if (is_one_byte) 0xa8 else 0xa9), - .ret_near => OpCode.oneByte(0xc2), - .ret_far => OpCode.oneByte(0xca), - else => unreachable, + .push => if (is_one_byte) OpCode.init(&.{0x6a}) else OpCode.init(&.{0x68}), + .@"test" => if (is_one_byte) OpCode.init(&.{0xa8}) else OpCode.init(&.{0xa9}), + .ret_near => OpCode.init(&.{0xc2}), + .ret_far => OpCode.init(&.{0xca}), + else => unreachable, }, .m1 => return switch (tag) { - .shl, .sal, .shr, .sar => OpCode.oneByte(if (is_one_byte) 0xd0 else 0xd1), - else => unreachable, + .shl, .sal, + .shr, .sar => if (is_one_byte) OpCode.init(&.{0xd0}) else OpCode.init(&.{0xd1}), + else => unreachable, }, .mc => return switch (tag) { - .shl, .sal, .shr, .sar => OpCode.oneByte(if (is_one_byte) 0xd2 else 0xd3), - else => unreachable, + .shl, .sal, + .shr, .sar => if (is_one_byte) OpCode.init(&.{0xd2}) else OpCode.init(&.{0xd3}), + else => unreachable, }, .mi => return switch (tag) { - .adc, .add, .sub, .xor, .@"and", .@"or", .sbb, .cmp => OpCode.oneByte(if (is_one_byte) 0x80 else 0x81), - .mov => OpCode.oneByte(if (is_one_byte) 0xc6 else 0xc7), - .@"test" => OpCode.oneByte(if (is_one_byte) 0xf6 else 0xf7), - else => unreachable, + .adc, .add, + .sub, .xor, + .@"and", .@"or", + .sbb, .cmp => if (is_one_byte) OpCode.init(&.{0x80}) else OpCode.init(&.{0x81}), + .mov => if (is_one_byte) OpCode.init(&.{0xc6}) else OpCode.init(&.{0xc7}), + .@"test" => if (is_one_byte) OpCode.init(&.{0xf6}) else OpCode.init(&.{0xf7}), + else => unreachable, }, .mi8 => return switch (tag) { - .adc, .add, .sub, .xor, .@"and", .@"or", .sbb, .cmp => OpCode.oneByte(0x83), - .shl, .sal, .shr, .sar => OpCode.oneByte(if (is_one_byte) 0xc0 else 0xc1), - else => unreachable, + .adc, .add, + .sub, .xor, + .@"and", .@"or", + .sbb, .cmp => OpCode.init(&.{0x83}), + .shl, .sal, + .shr, .sar => if (is_one_byte) OpCode.init(&.{0xc0}) else OpCode.init(&.{0xc1}), + else => unreachable, }, .mr => return switch (tag) { - .adc => OpCode.oneByte(if (is_one_byte) 0x10 else 0x11), - .add => OpCode.oneByte(if (is_one_byte) 0x00 else 0x01), - .sub => OpCode.oneByte(if (is_one_byte) 0x28 else 0x29), - .xor => OpCode.oneByte(if (is_one_byte) 0x30 else 0x31), - .@"and" => OpCode.oneByte(if (is_one_byte) 0x20 else 0x21), - .@"or" => OpCode.oneByte(if (is_one_byte) 0x08 else 0x09), - .sbb => OpCode.oneByte(if (is_one_byte) 0x18 else 0x19), - .cmp => OpCode.oneByte(if (is_one_byte) 0x38 else 0x39), - .mov => OpCode.oneByte(if (is_one_byte) 0x88 else 0x89), - .@"test" => OpCode.oneByte(if (is_one_byte) 0x84 else 0x85), - else => unreachable, + .adc => if (is_one_byte) OpCode.init(&.{0x10}) else OpCode.init(&.{0x11}), + .add => if (is_one_byte) OpCode.init(&.{0x00}) else OpCode.init(&.{0x01}), + .sub => if (is_one_byte) OpCode.init(&.{0x28}) else OpCode.init(&.{0x29}), + .xor => if (is_one_byte) OpCode.init(&.{0x30}) else OpCode.init(&.{0x31}), + .@"and" => if (is_one_byte) OpCode.init(&.{0x20}) else OpCode.init(&.{0x21}), + .@"or" => if (is_one_byte) OpCode.init(&.{0x08}) else OpCode.init(&.{0x09}), + .sbb => if (is_one_byte) OpCode.init(&.{0x18}) else OpCode.init(&.{0x19}), + .cmp => if (is_one_byte) OpCode.init(&.{0x38}) else OpCode.init(&.{0x39}), + .mov => if (is_one_byte) OpCode.init(&.{0x88}) else OpCode.init(&.{0x89}), + .@"test" => if (is_one_byte) OpCode.init(&.{0x84}) else OpCode.init(&.{0x85}), + .movsd => OpCode.init(&.{0xf2,0x0f,0x11}), + .movss => OpCode.init(&.{0xf3,0x0f,0x11}), + else => unreachable, }, .rm => return switch (tag) { - .adc => OpCode.oneByte(if (is_one_byte) 0x12 else 0x13), - .add => OpCode.oneByte(if (is_one_byte) 0x02 else 0x03), - .sub => OpCode.oneByte(if (is_one_byte) 0x2a else 0x2b), - .xor => OpCode.oneByte(if (is_one_byte) 0x32 else 0x33), - .@"and" => OpCode.oneByte(if (is_one_byte) 0x22 else 0x23), - .@"or" => OpCode.oneByte(if (is_one_byte) 0x0a else 0x0b), - .sbb => OpCode.oneByte(if (is_one_byte) 0x1a else 0x1b), - .cmp => OpCode.oneByte(if (is_one_byte) 0x3a else 0x3b), - .mov => OpCode.oneByte(if (is_one_byte) 0x8a else 0x8b), - .movsx => OpCode.twoByte(0x0f, if (is_one_byte) 0xbe else 0xbf), - .movsxd => OpCode.oneByte(0x63), - .movzx => OpCode.twoByte(0x0f, if (is_one_byte) 0xb6 else 0xb7), - .lea => OpCode.oneByte(if (is_one_byte) 0x8c else 0x8d), - .imul => OpCode.twoByte(0x0f, 0xaf), - .cmove, .cmovz => OpCode.twoByte(0x0f, 0x44), - .cmovb, .cmovnae => OpCode.twoByte(0x0f, 0x42), - .cmovl, .cmovng => OpCode.twoByte(0x0f, 0x4c), + .adc => if (is_one_byte) OpCode.init(&.{0x12}) else OpCode.init(&.{0x13}), + .add => if (is_one_byte) OpCode.init(&.{0x02}) else OpCode.init(&.{0x03}), + .sub => if (is_one_byte) OpCode.init(&.{0x2a}) else OpCode.init(&.{0x2b}), + .xor => if (is_one_byte) OpCode.init(&.{0x32}) else OpCode.init(&.{0x33}), + .@"and" => if (is_one_byte) OpCode.init(&.{0x22}) else OpCode.init(&.{0x23}), + .@"or" => if (is_one_byte) OpCode.init(&.{0x0a}) else OpCode.init(&.{0x0b}), + .sbb => if (is_one_byte) OpCode.init(&.{0x1a}) else OpCode.init(&.{0x1b}), + .cmp => if (is_one_byte) OpCode.init(&.{0x3a}) else OpCode.init(&.{0x3b}), + .mov => if (is_one_byte) OpCode.init(&.{0x8a}) else OpCode.init(&.{0x8b}), + .movsx => if (is_one_byte) OpCode.init(&.{0x0f,0xbe}) else OpCode.init(&.{0x0f,0xbf}), + .movsxd => OpCode.init(&.{0x63}), + .movzx => if (is_one_byte) OpCode.init(&.{0x0f,0xb6}) else OpCode.init(&.{0x0f,0xb7}), + .lea => if (is_one_byte) OpCode.init(&.{0x8c}) else OpCode.init(&.{0x8d}), + .imul => OpCode.init(&.{0x0f,0xaf}), + .cmove, + .cmovz => OpCode.init(&.{0x0f,0x44}), + .cmovb, + .cmovnae => OpCode.init(&.{0x0f,0x42}), + .cmovl, + .cmovng => OpCode.init(&.{0x0f,0x4c}), + .movsd => OpCode.init(&.{0xf2,0x0f,0x10}), + .movss => OpCode.init(&.{0xf3,0x0f,0x10}), + .addsd => OpCode.init(&.{0xf2,0x0f,0x58}), + .addss => OpCode.init(&.{0xf3,0x0f,0x58}), + .ucomisd => OpCode.init(&.{0x66,0x0f,0x2e}), + .ucomiss => OpCode.init(&.{0x0f,0x2e}), else => unreachable, }, .oi => return switch (tag) { - .mov => OpCode.oneByte(if (is_one_byte) 0xb0 else 0xb8), + .mov => if (is_one_byte) OpCode.init(&.{0xb0}) else OpCode.init(&.{0xb8}), else => unreachable, }, .fd => return switch (tag) { - .mov => OpCode.oneByte(if (is_one_byte) 0xa0 else 0xa1), + .mov => if (is_one_byte) OpCode.init(&.{0xa0}) else OpCode.init(&.{0xa1}), else => unreachable, }, .td => return switch (tag) { - .mov => OpCode.oneByte(if (is_one_byte) 0xa2 else 0xa3), + .mov => if (is_one_byte) OpCode.init(&.{0xa2}) else OpCode.init(&.{0xa3}), else => unreachable, }, .rmi => return switch (tag) { - .imul => OpCode.oneByte(if (is_one_byte) 0x6b else 0x69), - else => unreachable, + .imul => if (is_one_byte) OpCode.init(&.{0x6b}) else OpCode.init(&.{0x69}), + else => unreachable, }, .mv => return switch (tag) { - .vmovsd, .vmovss => OpCode.oneByte(0x11), + .vmovsd, + .vmovss => OpCode.init(&.{0x11}), else => unreachable, }, .vm => return switch (tag) { - .vmovsd, .vmovss => OpCode.oneByte(0x10), - .vucomisd, .vucomiss => OpCode.oneByte(0x2e), + .vmovsd, + .vmovss => OpCode.init(&.{0x10}), + .vucomisd, + .vucomiss => OpCode.init(&.{0x2e}), else => unreachable, }, .rvm => return switch (tag) { - .vaddsd, .vaddss => OpCode.oneByte(0x58), - .vmovsd, .vmovss => OpCode.oneByte(0x10), + .vaddsd, + .vaddss => OpCode.init(&.{0x58}), + .vmovsd, + .vmovss => OpCode.init(&.{0x10}), else => unreachable, }, .rvmi => return switch (tag) { - .vcmpsd, .vcmpss => OpCode.oneByte(0xc2), - else => unreachable, + .vcmpsd, + .vcmpss => OpCode.init(&.{0xc2}), + else => unreachable, }, } + // zig fmt: on } inline fn getModRmExt(tag: Tag) u3 { diff --git a/src/arch/x86_64/Mir.zig b/src/arch/x86_64/Mir.zig index 0f200d43e6..a35231a9b8 100644 --- a/src/arch/x86_64/Mir.zig +++ b/src/arch/x86_64/Mir.zig @@ -345,11 +345,29 @@ pub const Inst = struct { /// Nop nop, - /// AVX instructions + /// SSE instructions /// ops flags: form: /// 0b00 reg1, qword ptr [reg2 + imm32] /// 0b01 qword ptr [reg1 + imm32], reg2 /// 0b10 reg1, reg2 + mov_f64_sse, + mov_f32_sse, + + /// ops flags: form: + /// 0b00 reg1, reg2 + add_f64_sse, + add_f32_sse, + + /// ops flags: form: + /// 0b00 reg1, reg2 + cmp_f64_sse, + cmp_f32_sse, + + /// AVX instructions + /// ops flags: form: + /// 0b00 reg1, qword ptr [reg2 + imm32] + /// 0b01 qword ptr [reg1 + imm32], reg2 + /// 0b10 reg1, reg1, reg2 mov_f64_avx, mov_f32_avx, @@ -359,7 +377,7 @@ pub const Inst = struct { add_f32_avx, /// ops flags: form: - /// + /// 0b00 reg1, reg1, reg2 cmp_f64_avx, cmp_f32_avx, diff --git a/src/arch/x86_64/bits.zig b/src/arch/x86_64/bits.zig index 64a067d050..6429781516 100644 --- a/src/arch/x86_64/bits.zig +++ b/src/arch/x86_64/bits.zig @@ -441,6 +441,17 @@ pub const Encoder = struct { self.code.appendAssumeCapacity(opcode); } + /// Encodes a 3 byte opcode + /// + /// e.g. MOVSD has the opcode 0xf2 0x0f 0x10 + /// + /// encoder.opcode_3byte(0xf2, 0x0f, 0x10); + pub fn opcode_3byte(self: Self, prefix_1: u8, prefix_2: u8, opcode: u8) void { + self.code.appendAssumeCapacity(prefix_1); + self.code.appendAssumeCapacity(prefix_2); + self.code.appendAssumeCapacity(opcode); + } + /// Encodes a 1 byte opcode with a reg field /// /// Remember to add a REX prefix byte if reg is extended!