diff --git a/src/arch/aarch64/CodeGen.zig b/src/arch/aarch64/CodeGen.zig index 0993d352d7..f4f2b1e5e5 100644 --- a/src/arch/aarch64/CodeGen.zig +++ b/src/arch/aarch64/CodeGen.zig @@ -21,9 +21,6 @@ const DW = std.dwarf; const leb128 = std.leb; const log = std.log.scoped(.codegen); const build_options = @import("build_options"); -const RegisterManagerFn = @import("../../register_manager.zig").RegisterManager; -const RegisterManager = RegisterManagerFn(Self, Register, &callee_preserved_regs); -const RegisterLock = RegisterManager.RegisterLock; const GenerateSymbolError = @import("../../codegen.zig").GenerateSymbolError; const FnResult = @import("../../codegen.zig").FnResult; @@ -31,11 +28,14 @@ const DebugInfoOutput = @import("../../codegen.zig").DebugInfoOutput; const bits = @import("bits.zig"); const abi = @import("abi.zig"); +const RegisterManager = abi.RegisterManager; +const RegisterLock = RegisterManager.RegisterLock; const Register = bits.Register; const Instruction = bits.Instruction; const callee_preserved_regs = abi.callee_preserved_regs; const c_abi_int_param_regs = abi.c_abi_int_param_regs; const c_abi_int_return_regs = abi.c_abi_int_return_regs; +const gp = abi.RegisterClass.gp; const InnerError = error{ OutOfMemory, @@ -888,7 +888,7 @@ fn allocRegOrMem(self: *Self, inst: Air.Inst.Index, reg_ok: bool) !MCValue { if (reg_ok) { // Make sure the type can fit in a register before we try to allocate one. if (abi_size <= 8) { - if (self.register_manager.tryAllocReg(inst)) |reg| { + if (self.register_manager.tryAllocReg(inst, gp)) |reg| { return MCValue{ .register = registerAlias(reg, abi_size) }; } } @@ -951,7 +951,7 @@ fn spillCompareFlagsIfOccupied(self: *Self) !void { /// allocated. A second call to `copyToTmpRegister` may return the same register. /// This can have a side effect of spilling instructions to the stack to free up a register. fn copyToTmpRegister(self: *Self, ty: Type, mcv: MCValue) !Register { - const raw_reg = try self.register_manager.allocReg(null); + const raw_reg = try self.register_manager.allocReg(null, gp); const reg = registerAlias(raw_reg, ty.abiSize(self.target.*)); try self.genSetReg(ty, reg, mcv); return reg; @@ -961,7 +961,7 @@ fn copyToTmpRegister(self: *Self, ty: Type, mcv: MCValue) !Register { /// `reg_owner` is the instruction that gets associated with the register in the register table. /// This can have a side effect of spilling instructions to the stack to free up a register. fn copyToNewRegister(self: *Self, reg_owner: Air.Inst.Index, mcv: MCValue) !MCValue { - const raw_reg = try self.register_manager.allocReg(reg_owner); + const raw_reg = try self.register_manager.allocReg(reg_owner, gp); const ty = self.air.typeOfIndex(reg_owner); const reg = registerAlias(raw_reg, ty.abiSize(self.target.*)); try self.genSetReg(self.air.typeOfIndex(reg_owner), reg, mcv); @@ -1074,11 +1074,11 @@ fn trunc( if (operand == .register and self.reuseOperand(inst, ty_op.operand, 0, operand)) { break :blk registerAlias(operand_reg, dest_ty.abiSize(self.target.*)); } else { - const raw_reg = try self.register_manager.allocReg(inst); + const raw_reg = try self.register_manager.allocReg(inst, gp); break :blk registerAlias(raw_reg, dest_ty.abiSize(self.target.*)); } } else blk: { - const raw_reg = try self.register_manager.allocReg(null); + const raw_reg = try self.register_manager.allocReg(null, gp); break :blk registerAlias(raw_reg, dest_ty.abiSize(self.target.*)); }; @@ -1160,7 +1160,7 @@ fn airNot(self: *Self, inst: Air.Inst.Index) !void { break :blk op_reg; } - const raw_reg = try self.register_manager.allocReg(null); + const raw_reg = try self.register_manager.allocReg(null, gp); break :blk raw_reg.to32(); }; @@ -1193,7 +1193,7 @@ fn airNot(self: *Self, inst: Air.Inst.Index) !void { break :blk op_reg; } - const raw_reg = try self.register_manager.allocReg(null); + const raw_reg = try self.register_manager.allocReg(null, gp); break :blk registerAlias(raw_reg, operand_ty.abiSize(self.target.*)); }; @@ -1293,7 +1293,7 @@ fn binOpRegister( break :inst Air.refToIndex(md.lhs).?; } else null; - const raw_reg = try self.register_manager.allocReg(track_inst); + const raw_reg = try self.register_manager.allocReg(track_inst, gp); const reg = registerAlias(raw_reg, lhs_ty.abiSize(self.target.*)); if (track_inst) |inst| branch.inst_table.putAssumeCapacity(inst, .{ .register = reg }); @@ -1308,7 +1308,7 @@ fn binOpRegister( break :inst Air.refToIndex(md.rhs).?; } else null; - const raw_reg = try self.register_manager.allocReg(track_inst); + const raw_reg = try self.register_manager.allocReg(track_inst, gp); const reg = registerAlias(raw_reg, rhs_ty.abiAlignment(self.target.*)); if (track_inst) |inst| branch.inst_table.putAssumeCapacity(inst, .{ .register = reg }); @@ -1326,11 +1326,11 @@ fn binOpRegister( } else if (rhs_is_register and self.reuseOperand(md.inst, md.rhs, 1, rhs)) { break :blk rhs_reg; } else { - const raw_reg = try self.register_manager.allocReg(md.inst); + const raw_reg = try self.register_manager.allocReg(md.inst, gp); break :blk registerAlias(raw_reg, lhs_ty.abiSize(self.target.*)); } } else blk: { - const raw_reg = try self.register_manager.allocReg(null); + const raw_reg = try self.register_manager.allocReg(null, gp); break :blk registerAlias(raw_reg, lhs_ty.abiSize(self.target.*)); }, }; @@ -1431,7 +1431,7 @@ fn binOpImmediate( ).?; } else null; - const raw_reg = try self.register_manager.allocReg(track_inst); + const raw_reg = try self.register_manager.allocReg(track_inst, gp); const reg = registerAlias(raw_reg, lhs_ty.abiSize(self.target.*)); if (track_inst) |inst| branch.inst_table.putAssumeCapacity(inst, .{ .register = reg }); @@ -1452,11 +1452,11 @@ fn binOpImmediate( )) { break :blk lhs_reg; } else { - const raw_reg = try self.register_manager.allocReg(md.inst); + const raw_reg = try self.register_manager.allocReg(md.inst, gp); break :blk registerAlias(raw_reg, lhs_ty.abiSize(self.target.*)); } } else blk: { - const raw_reg = try self.register_manager.allocReg(null); + const raw_reg = try self.register_manager.allocReg(null, gp); break :blk registerAlias(raw_reg, lhs_ty.abiSize(self.target.*)); }, }; @@ -1872,7 +1872,7 @@ fn airOverflow(self: *Self, inst: Air.Inst.Index) !void { const dest_reg_lock = self.register_manager.lockRegAssumeUnused(dest_reg); defer self.register_manager.unlockReg(dest_reg_lock); - const raw_truncated_reg = try self.register_manager.allocReg(null); + const raw_truncated_reg = try self.register_manager.allocReg(null, gp); const truncated_reg = registerAlias(raw_truncated_reg, lhs_ty.abiSize(self.target.*)); const truncated_reg_lock = self.register_manager.lockRegAssumeUnused(truncated_reg); defer self.register_manager.unlockReg(truncated_reg_lock); @@ -1983,7 +1983,7 @@ fn airMulWithOverflow(self: *Self, inst: Air.Inst.Index) !void { const dest_reg_lock = self.register_manager.lockRegAssumeUnused(dest_reg); defer self.register_manager.unlockReg(dest_reg_lock); - const truncated_reg = try self.register_manager.allocReg(null); + const truncated_reg = try self.register_manager.allocReg(null, gp); const truncated_reg_lock = self.register_manager.lockRegAssumeUnused(truncated_reg); defer self.register_manager.unlockReg(truncated_reg_lock); @@ -2048,7 +2048,7 @@ fn airMulWithOverflow(self: *Self, inst: Air.Inst.Index) !void { defer if (rhs_lock) |reg| self.register_manager.unlockReg(reg); const lhs_reg = if (lhs_is_register) lhs.register else blk: { - const raw_reg = try self.register_manager.allocReg(null); + const raw_reg = try self.register_manager.allocReg(null, gp); const reg = registerAlias(raw_reg, lhs_ty.abiSize(self.target.*)); break :blk reg; }; @@ -2056,7 +2056,7 @@ fn airMulWithOverflow(self: *Self, inst: Air.Inst.Index) !void { defer if (new_lhs_lock) |reg| self.register_manager.unlockReg(reg); const rhs_reg = if (rhs_is_register) rhs.register else blk: { - const raw_reg = try self.register_manager.allocReg(null); + const raw_reg = try self.register_manager.allocReg(null, gp); const reg = registerAlias(raw_reg, rhs_ty.abiAlignment(self.target.*)); break :blk reg; }; @@ -2067,7 +2067,7 @@ fn airMulWithOverflow(self: *Self, inst: Air.Inst.Index) !void { if (!rhs_is_register) try self.genSetReg(rhs_ty, rhs_reg, rhs); const dest_reg = blk: { - const raw_reg = try self.register_manager.allocReg(null); + const raw_reg = try self.register_manager.allocReg(null, gp); const reg = registerAlias(raw_reg, lhs_ty.abiSize(self.target.*)); break :blk reg; }; @@ -2086,7 +2086,7 @@ fn airMulWithOverflow(self: *Self, inst: Air.Inst.Index) !void { } }, }); - const dest_high_reg = try self.register_manager.allocReg(null); + const dest_high_reg = try self.register_manager.allocReg(null, gp); const dest_high_reg_lock = self.register_manager.lockRegAssumeUnused(dest_high_reg); defer self.register_manager.unlockReg(dest_high_reg_lock); @@ -2136,7 +2136,7 @@ fn airMulWithOverflow(self: *Self, inst: Air.Inst.Index) !void { } }, .unsigned => { - const dest_high_reg = try self.register_manager.allocReg(null); + const dest_high_reg = try self.register_manager.allocReg(null, gp); const dest_high_reg_lock = self.register_manager.lockRegAssumeUnused(dest_high_reg); defer self.register_manager.unlockReg(dest_high_reg_lock); @@ -2192,7 +2192,7 @@ fn airMulWithOverflow(self: *Self, inst: Air.Inst.Index) !void { }, } - const truncated_reg = try self.register_manager.allocReg(null); + const truncated_reg = try self.register_manager.allocReg(null, gp); const truncated_reg_lock = self.register_manager.lockRegAssumeUnused(truncated_reg); defer self.register_manager.unlockReg(truncated_reg_lock); @@ -2663,7 +2663,7 @@ fn load(self: *Self, dst_mcv: MCValue, ptr: MCValue, ptr_ty: Type) InnerError!vo }, .stack_offset => |off| { if (elem_size <= 8) { - const raw_tmp_reg = try self.register_manager.allocReg(null); + const raw_tmp_reg = try self.register_manager.allocReg(null, gp); const tmp_reg = registerAlias(raw_tmp_reg, elem_size); const tmp_reg_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); defer self.register_manager.unlockReg(tmp_reg_lock); @@ -2672,7 +2672,7 @@ fn load(self: *Self, dst_mcv: MCValue, ptr: MCValue, ptr_ty: Type) InnerError!vo try self.genSetStack(elem_ty, off, MCValue{ .register = tmp_reg }); } else { // TODO optimize the register allocation - const regs = try self.register_manager.allocRegs(4, .{ null, null, null, null }); + const regs = try self.register_manager.allocRegs(4, .{ null, null, null, null }, gp); const regs_locks = self.register_manager.lockRegsAssumeUnused(4, regs); defer for (regs_locks) |reg| { self.register_manager.unlockReg(reg); @@ -2887,7 +2887,7 @@ fn store(self: *Self, ptr: MCValue, value: MCValue, ptr_ty: Type, value_ty: Type }, else => { if (abi_size <= 8) { - const raw_tmp_reg = try self.register_manager.allocReg(null); + const raw_tmp_reg = try self.register_manager.allocReg(null, gp); const tmp_reg = registerAlias(raw_tmp_reg, abi_size); const tmp_reg_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); defer self.register_manager.unlockReg(tmp_reg_lock); @@ -3002,7 +3002,7 @@ fn airStructFieldVal(self: *Self, inst: Air.Inst.Index) !void { // TODO return special MCValue condition flags // get overflow bit: set register to C flag // resp. V flag - const raw_dest_reg = try self.register_manager.allocReg(null); + const raw_dest_reg = try self.register_manager.allocReg(null, gp); const dest_reg = raw_dest_reg.to32(); // C flag: cset reg, cs @@ -4065,7 +4065,7 @@ fn genSetStack(self: *Self, ty: Type, stack_offset: u32, mcv: MCValue) InnerErro const overflow_bit_ty = ty.structFieldType(1); const overflow_bit_offset = @intCast(u32, ty.structFieldOffset(1, self.target.*)); - const raw_cond_reg = try self.register_manager.allocReg(null); + const raw_cond_reg = try self.register_manager.allocReg(null, gp); const cond_reg = registerAlias( raw_cond_reg, @intCast(u32, overflow_bit_ty.abiSize(self.target.*)), @@ -4113,7 +4113,7 @@ fn genSetStack(self: *Self, ty: Type, stack_offset: u32, mcv: MCValue) InnerErro const ptr_ty = Type.initPayload(&ptr_ty_payload.base); // TODO call extern memcpy - const regs = try self.register_manager.allocRegs(5, .{ null, null, null, null, null }); + const regs = try self.register_manager.allocRegs(5, .{ null, null, null, null, null }, gp); const regs_locks = self.register_manager.lockRegsAssumeUnused(5, regs); defer for (regs_locks) |reg| { self.register_manager.unlockReg(reg); diff --git a/src/arch/aarch64/abi.zig b/src/arch/aarch64/abi.zig index 1c5225104a..89a3a6c21d 100644 --- a/src/arch/aarch64/abi.zig +++ b/src/arch/aarch64/abi.zig @@ -1,6 +1,8 @@ +const std = @import("std"); const builtin = @import("builtin"); const bits = @import("bits.zig"); const Register = bits.Register; +const RegisterManagerFn = @import("../../register_manager.zig").RegisterManager; const callee_preserved_regs_impl = if (builtin.os.tag.isDarwin()) struct { pub const callee_preserved_regs = [_]Register{ @@ -18,3 +20,21 @@ pub const callee_preserved_regs = callee_preserved_regs_impl.callee_preserved_re pub const c_abi_int_param_regs = [_]Register{ .x0, .x1, .x2, .x3, .x4, .x5, .x6, .x7 }; pub const c_abi_int_return_regs = [_]Register{ .x0, .x1, .x2, .x3, .x4, .x5, .x6, .x7 }; + +const allocatable_registers = callee_preserved_regs; +pub const RegisterManager = RegisterManagerFn(@import("CodeGen.zig"), Register, &allocatable_registers); + +// Register classes +const RegisterBitSet = RegisterManager.RegisterBitSet; +pub const RegisterClass = struct { + pub const gp: RegisterBitSet = std.math.maxInt(RegisterBitSet); + // TODO uncomment once #11680 is fixed. + // pub const gp: RegisterBitSet = blk: { + // var set = RegisterBitSet.initEmpty(); + // set.setRangeValue(.{ + // .start = 0, + // .end = callee_preserved_regs.len, + // }, true); + // break :blk set; + // }; +}; diff --git a/src/arch/arm/CodeGen.zig b/src/arch/arm/CodeGen.zig index bfc7f687fa..4f121dd56e 100644 --- a/src/arch/arm/CodeGen.zig +++ b/src/arch/arm/CodeGen.zig @@ -21,9 +21,6 @@ const DW = std.dwarf; const leb128 = std.leb; const log = std.log.scoped(.codegen); const build_options = @import("build_options"); -const RegisterManagerFn = @import("../../register_manager.zig").RegisterManager; -const RegisterManager = RegisterManagerFn(Self, Register, &allocatable_registers); -const RegisterLock = RegisterManager.RegisterLock; const FnResult = @import("../../codegen.zig").FnResult; const GenerateSymbolError = @import("../../codegen.zig").GenerateSymbolError; @@ -31,14 +28,16 @@ const DebugInfoOutput = @import("../../codegen.zig").DebugInfoOutput; const bits = @import("bits.zig"); const abi = @import("abi.zig"); +const RegisterManager = abi.RegisterManager; +const RegisterLock = RegisterManager.RegisterLock; const Register = bits.Register; const Instruction = bits.Instruction; const Condition = bits.Condition; const callee_preserved_regs = abi.callee_preserved_regs; const caller_preserved_regs = abi.caller_preserved_regs; -const allocatable_registers = abi.allocatable_registers; const c_abi_int_param_regs = abi.c_abi_int_param_regs; const c_abi_int_return_regs = abi.c_abi_int_return_regs; +const gp = abi.RegisterClass.gp; const InnerError = error{ OutOfMemory, @@ -874,7 +873,7 @@ fn allocRegOrMem(self: *Self, inst: Air.Inst.Index, reg_ok: bool) !MCValue { const ptr_bits = self.target.cpu.arch.ptrBitWidth(); const ptr_bytes: u64 = @divExact(ptr_bits, 8); if (abi_size <= ptr_bytes) { - if (self.register_manager.tryAllocReg(inst)) |reg| { + if (self.register_manager.tryAllocReg(inst, gp)) |reg| { return MCValue{ .register = reg }; } } @@ -939,7 +938,7 @@ fn spillCompareFlagsIfOccupied(self: *Self) !void { /// allocated. A second call to `copyToTmpRegister` may return the same register. /// This can have a side effect of spilling instructions to the stack to free up a register. fn copyToTmpRegister(self: *Self, ty: Type, mcv: MCValue) !Register { - const reg = try self.register_manager.allocReg(null); + const reg = try self.register_manager.allocReg(null, gp); try self.genSetReg(ty, reg, mcv); return reg; } @@ -948,7 +947,7 @@ fn copyToTmpRegister(self: *Self, ty: Type, mcv: MCValue) !Register { /// `reg_owner` is the instruction that gets associated with the register in the register table. /// This can have a side effect of spilling instructions to the stack to free up a register. fn copyToNewRegister(self: *Self, reg_owner: Air.Inst.Index, mcv: MCValue) !MCValue { - const reg = try self.register_manager.allocReg(reg_owner); + const reg = try self.register_manager.allocReg(reg_owner, gp); try self.genSetReg(self.air.typeOfIndex(reg_owner), reg, mcv); return MCValue{ .register = reg }; } @@ -1065,9 +1064,9 @@ fn trunc( if (operand == .register and self.reuseOperand(inst, ty_op.operand, 0, operand)) { break :blk operand_reg; } else { - break :blk try self.register_manager.allocReg(inst); + break :blk try self.register_manager.allocReg(inst, gp); } - } else try self.register_manager.allocReg(null); + } else try self.register_manager.allocReg(null, gp); switch (info_b.bits) { 32 => { @@ -1153,7 +1152,7 @@ fn airNot(self: *Self, inst: Air.Inst.Index) !void { break :blk op_reg; } - break :blk try self.register_manager.allocReg(null); + break :blk try self.register_manager.allocReg(null, gp); }; _ = try self.addInst(.{ @@ -1183,7 +1182,7 @@ fn airNot(self: *Self, inst: Air.Inst.Index) !void { break :blk op_reg; } - break :blk try self.register_manager.allocReg(null); + break :blk try self.register_manager.allocReg(null, gp); }; _ = try self.addInst(.{ @@ -1254,9 +1253,9 @@ fn minMax( } else if (rhs_is_register and self.reuseOperand(inst, bin_op.rhs, 1, rhs)) { break :blk rhs_reg; } else { - break :blk try self.register_manager.allocReg(inst); + break :blk try self.register_manager.allocReg(inst, gp); } - } else try self.register_manager.allocReg(null); + } else try self.register_manager.allocReg(null, gp); // lhs == reg should have been checked by airMinMax // @@ -1438,7 +1437,7 @@ fn airOverflow(self: *Self, inst: Air.Inst.Index) !void { const dest_reg_lock = self.register_manager.lockRegAssumeUnused(dest_reg); defer self.register_manager.unlockReg(dest_reg_lock); - const truncated_reg = try self.register_manager.allocReg(null); + const truncated_reg = try self.register_manager.allocReg(null, gp); const truncated_reg_lock = self.register_manager.lockRegAssumeUnused(truncated_reg); defer self.register_manager.unlockReg(truncated_reg_lock); @@ -1543,7 +1542,7 @@ fn airMulWithOverflow(self: *Self, inst: Air.Inst.Index) !void { const dest_reg_lock = self.register_manager.lockRegAssumeUnused(dest_reg); defer self.register_manager.unlockReg(dest_reg_lock); - const truncated_reg = try self.register_manager.allocReg(null); + const truncated_reg = try self.register_manager.allocReg(null, gp); const truncated_reg_lock = self.register_manager.lockRegAssumeUnused(truncated_reg); defer self.register_manager.unlockReg(truncated_reg_lock); @@ -1582,18 +1581,18 @@ fn airMulWithOverflow(self: *Self, inst: Air.Inst.Index) !void { const lhs_reg = if (lhs_is_register) lhs.register else - try self.register_manager.allocReg(null); + try self.register_manager.allocReg(null, gp); const new_lhs_lock = self.register_manager.lockReg(lhs_reg); defer if (new_lhs_lock) |reg| self.register_manager.unlockReg(reg); const rhs_reg = if (rhs_is_register) rhs.register else - try self.register_manager.allocReg(null); + try self.register_manager.allocReg(null, gp); const new_rhs_lock = self.register_manager.lockReg(rhs_reg); defer if (new_rhs_lock) |reg| self.register_manager.unlockReg(reg); - const dest_regs = try self.register_manager.allocRegs(2, .{ null, null }); + const dest_regs = try self.register_manager.allocRegs(2, .{ null, null }, gp); const dest_regs_locks = self.register_manager.lockRegsAssumeUnused(2, dest_regs); defer for (dest_regs_locks) |reg| { self.register_manager.unlockReg(reg); @@ -1604,7 +1603,7 @@ fn airMulWithOverflow(self: *Self, inst: Air.Inst.Index) !void { if (!lhs_is_register) try self.genSetReg(lhs_ty, lhs_reg, lhs); if (!rhs_is_register) try self.genSetReg(rhs_ty, rhs_reg, rhs); - const truncated_reg = try self.register_manager.allocReg(null); + const truncated_reg = try self.register_manager.allocReg(null, gp); const truncated_reg_lock = self.register_manager.lockRegAssumeUnused(truncated_reg); defer self.register_manager.unlockReg(truncated_reg_lock); @@ -2026,7 +2025,7 @@ fn airSliceElemVal(self: *Self, inst: Air.Inst.Index) !void { const base_reg_lock = self.register_manager.lockRegAssumeUnused(base_reg); defer self.register_manager.unlockReg(base_reg_lock); - const dst_reg = try self.register_manager.allocReg(inst); + const dst_reg = try self.register_manager.allocReg(inst, gp); const dst_mcv = MCValue{ .register = dst_reg }; const dst_reg_lock = self.register_manager.lockRegAssumeUnused(dst_reg); defer self.register_manager.unlockReg(dst_reg_lock); @@ -2234,7 +2233,7 @@ fn load(self: *Self, dst_mcv: MCValue, ptr: MCValue, ptr_ty: Type) InnerError!vo }, .stack_offset => |off| { if (elem_size <= 4) { - const tmp_reg = try self.register_manager.allocReg(null); + const tmp_reg = try self.register_manager.allocReg(null, gp); const tmp_reg_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); defer self.register_manager.unlockReg(tmp_reg_lock); @@ -2242,7 +2241,7 @@ fn load(self: *Self, dst_mcv: MCValue, ptr: MCValue, ptr_ty: Type) InnerError!vo try self.genSetStack(elem_ty, off, MCValue{ .register = tmp_reg }); } else { // TODO optimize the register allocation - const regs = try self.register_manager.allocRegs(4, .{ null, null, null, null }); + const regs = try self.register_manager.allocRegs(4, .{ null, null, null, null }, gp); const regs_locks = self.register_manager.lockRegsAssumeUnused(4, regs); defer for (regs_locks) |reg_locked| { self.register_manager.unlockReg(reg_locked); @@ -2271,7 +2270,7 @@ fn load(self: *Self, dst_mcv: MCValue, ptr: MCValue, ptr_ty: Type) InnerError!vo .stack_offset, .stack_argument_offset, => { - const reg = try self.register_manager.allocReg(null); + const reg = try self.register_manager.allocReg(null, gp); const reg_lock = self.register_manager.lockRegAssumeUnused(reg); defer self.register_manager.unlockReg(reg_lock); @@ -2338,14 +2337,14 @@ fn store(self: *Self, ptr: MCValue, value: MCValue, ptr_ty: Type, value_ty: Type }, else => { if (elem_size <= 4) { - const tmp_reg = try self.register_manager.allocReg(null); + const tmp_reg = try self.register_manager.allocReg(null, gp); const tmp_reg_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); defer self.register_manager.unlockReg(tmp_reg_lock); try self.genSetReg(value_ty, tmp_reg, value); try self.store(ptr, .{ .register = tmp_reg }, ptr_ty, value_ty); } else { - const regs = try self.register_manager.allocRegs(4, .{ null, null, null, null }); + const regs = try self.register_manager.allocRegs(4, .{ null, null, null, null }, gp); const regs_locks = self.register_manager.lockRegsAssumeUnused(4, regs); defer for (regs_locks) |reg| { self.register_manager.unlockReg(reg); @@ -2487,7 +2486,7 @@ fn airStructFieldVal(self: *Self, inst: Air.Inst.Index) !void { 1 => { // get overflow bit: set register to C flag // resp. V flag - const dest_reg = try self.register_manager.allocReg(null); + const dest_reg = try self.register_manager.allocReg(null, gp); // mov reg, #0 _ = try self.addInst(.{ @@ -2567,7 +2566,7 @@ fn binOpRegister( break :inst Air.refToIndex(md.lhs).?; } else null; - const reg = try self.register_manager.allocReg(track_inst); + const reg = try self.register_manager.allocReg(track_inst, gp); if (track_inst) |inst| branch.inst_table.putAssumeCapacity(inst, .{ .register = reg }); @@ -2581,7 +2580,7 @@ fn binOpRegister( break :inst Air.refToIndex(md.rhs).?; } else null; - const reg = try self.register_manager.allocReg(track_inst); + const reg = try self.register_manager.allocReg(track_inst, gp); if (track_inst) |inst| branch.inst_table.putAssumeCapacity(inst, .{ .register = reg }); @@ -2598,9 +2597,9 @@ fn binOpRegister( } else if (rhs_is_register and self.reuseOperand(md.inst, md.rhs, 1, rhs)) { break :blk rhs_reg; } else { - break :blk try self.register_manager.allocReg(md.inst); + break :blk try self.register_manager.allocReg(md.inst, gp); } - } else try self.register_manager.allocReg(null), + } else try self.register_manager.allocReg(null, gp), }; if (!lhs_is_register) try self.genSetReg(lhs_ty, lhs_reg, lhs); @@ -2684,7 +2683,7 @@ fn binOpImmediate( ).?; } else null; - const reg = try self.register_manager.allocReg(track_inst); + const reg = try self.register_manager.allocReg(track_inst, gp); if (track_inst) |inst| branch.inst_table.putAssumeCapacity(inst, .{ .register = reg }); @@ -2704,9 +2703,9 @@ fn binOpImmediate( )) { break :blk lhs_reg; } else { - break :blk try self.register_manager.allocReg(md.inst); + break :blk try self.register_manager.allocReg(md.inst, gp); } - } else try self.register_manager.allocReg(null), + } else try self.register_manager.allocReg(null, gp), }; if (!lhs_is_register) try self.genSetReg(lhs_ty, lhs_reg, lhs); @@ -4363,7 +4362,7 @@ fn genSetStack(self: *Self, ty: Type, stack_offset: u32, mcv: MCValue) InnerErro const overflow_bit_ty = ty.structFieldType(1); const overflow_bit_offset = @intCast(u32, ty.structFieldOffset(1, self.target.*)); - const cond_reg = try self.register_manager.allocReg(null); + const cond_reg = try self.register_manager.allocReg(null, gp); // C flag: movcs reg, #1 // V flag: movvs reg, #1 @@ -4408,7 +4407,7 @@ fn genSetStack(self: *Self, ty: Type, stack_offset: u32, mcv: MCValue) InnerErro const ptr_ty = Type.initPayload(&ptr_ty_payload.base); // TODO call extern memcpy - const regs = try self.register_manager.allocRegs(5, .{ null, null, null, null, null }); + const regs = try self.register_manager.allocRegs(5, .{ null, null, null, null, null }, gp); const src_reg = regs[0]; const dst_reg = regs[1]; const len_reg = regs[2]; @@ -4782,7 +4781,7 @@ fn genSetStackArgument(self: *Self, ty: Type, stack_offset: u32, mcv: MCValue) I const ptr_ty = Type.initPayload(&ptr_ty_payload.base); // TODO call extern memcpy - const regs = try self.register_manager.allocRegs(5, .{ null, null, null, null, null }); + const regs = try self.register_manager.allocRegs(5, .{ null, null, null, null, null }, gp); const src_reg = regs[0]; const dst_reg = regs[1]; const len_reg = regs[2]; diff --git a/src/arch/arm/abi.zig b/src/arch/arm/abi.zig index 4073b92222..c76c3b0ea0 100644 --- a/src/arch/arm/abi.zig +++ b/src/arch/arm/abi.zig @@ -1,9 +1,28 @@ +const std = @import("std"); const bits = @import("bits.zig"); const Register = bits.Register; +const RegisterManagerFn = @import("../../register_manager.zig").RegisterManager; pub const callee_preserved_regs = [_]Register{ .r4, .r5, .r6, .r7, .r8, .r10 }; pub const caller_preserved_regs = [_]Register{ .r0, .r1, .r2, .r3 }; -pub const allocatable_registers = callee_preserved_regs ++ caller_preserved_regs; pub const c_abi_int_param_regs = [_]Register{ .r0, .r1, .r2, .r3 }; pub const c_abi_int_return_regs = [_]Register{ .r0, .r1 }; + +const allocatable_registers = callee_preserved_regs ++ caller_preserved_regs; +pub const RegisterManager = RegisterManagerFn(@import("CodeGen.zig"), Register, &allocatable_registers); + +// Register classes +const RegisterBitSet = RegisterManager.RegisterBitSet; +pub const RegisterClass = struct { + pub const gp: RegisterBitSet = std.math.maxInt(RegisterBitSet); + // TODO uncomment once #11680 is fixed. + // pub const gp: RegisterBitSet = blk: { + // var set = RegisterBitSet.initEmpty(); + // set.setRangeValue(.{ + // .start = 0, + // .end = caller_preserved_regs.len + callee_preserved_regs.len, + // }, true); + // break :blk set; + // }; +}; diff --git a/src/arch/riscv64/CodeGen.zig b/src/arch/riscv64/CodeGen.zig index c2f9b2e36d..b713161053 100644 --- a/src/arch/riscv64/CodeGen.zig +++ b/src/arch/riscv64/CodeGen.zig @@ -21,9 +21,6 @@ const DW = std.dwarf; const leb128 = std.leb; const log = std.log.scoped(.codegen); const build_options = @import("build_options"); -const RegisterManagerFn = @import("../../register_manager.zig").RegisterManager; -const RegisterManager = RegisterManagerFn(Self, Register, &callee_preserved_regs); -const RegisterLock = RegisterManager.RegisterLock; const FnResult = @import("../../codegen.zig").FnResult; const GenerateSymbolError = @import("../../codegen.zig").GenerateSymbolError; @@ -32,8 +29,11 @@ const DebugInfoOutput = @import("../../codegen.zig").DebugInfoOutput; const bits = @import("bits.zig"); const abi = @import("abi.zig"); const Register = bits.Register; +const RegisterManager = abi.RegisterManager; +const RegisterLock = RegisterManager.RegisterLock; const Instruction = abi.Instruction; const callee_preserved_regs = abi.callee_preserved_regs; +const gp = abi.RegisterClass.gp; const InnerError = error{ OutOfMemory, @@ -803,7 +803,7 @@ fn allocRegOrMem(self: *Self, inst: Air.Inst.Index, reg_ok: bool) !MCValue { const ptr_bits = self.target.cpu.arch.ptrBitWidth(); const ptr_bytes: u64 = @divExact(ptr_bits, 8); if (abi_size <= ptr_bytes) { - if (self.register_manager.tryAllocReg(inst)) |reg| { + if (self.register_manager.tryAllocReg(inst, gp)) |reg| { return MCValue{ .register = reg }; } } @@ -826,7 +826,7 @@ pub fn spillInstruction(self: *Self, reg: Register, inst: Air.Inst.Index) !void /// allocated. A second call to `copyToTmpRegister` may return the same register. /// This can have a side effect of spilling instructions to the stack to free up a register. fn copyToTmpRegister(self: *Self, ty: Type, mcv: MCValue) !Register { - const reg = try self.register_manager.allocReg(null); + const reg = try self.register_manager.allocReg(null, gp); try self.genSetReg(ty, reg, mcv); return reg; } @@ -835,7 +835,7 @@ fn copyToTmpRegister(self: *Self, ty: Type, mcv: MCValue) !Register { /// `reg_owner` is the instruction that gets associated with the register in the register table. /// This can have a side effect of spilling instructions to the stack to free up a register. fn copyToNewRegister(self: *Self, reg_owner: Air.Inst.Index, mcv: MCValue) !MCValue { - const reg = try self.register_manager.allocReg(reg_owner); + const reg = try self.register_manager.allocReg(reg_owner, gp); try self.genSetReg(self.air.typeOfIndex(reg_owner), reg, mcv); return MCValue{ .register = reg }; } @@ -958,7 +958,7 @@ fn binOpRegister( break :inst Air.refToIndex(bin_op.lhs).?; } else null; - const reg = try self.register_manager.allocReg(track_inst); + const reg = try self.register_manager.allocReg(track_inst, gp); if (track_inst) |inst| branch.inst_table.putAssumeCapacity(inst, .{ .register = reg }); @@ -973,7 +973,7 @@ fn binOpRegister( break :inst Air.refToIndex(bin_op.rhs).?; } else null; - const reg = try self.register_manager.allocReg(track_inst); + const reg = try self.register_manager.allocReg(track_inst, gp); if (track_inst) |inst| branch.inst_table.putAssumeCapacity(inst, .{ .register = reg }); @@ -990,9 +990,9 @@ fn binOpRegister( } else if (rhs_is_register and self.reuseOperand(inst, bin_op.rhs, 1, rhs)) { break :blk rhs_reg; } else { - break :blk try self.register_manager.allocReg(inst); + break :blk try self.register_manager.allocReg(inst, gp); } - } else try self.register_manager.allocReg(null); + } else try self.register_manager.allocReg(null, gp); if (!lhs_is_register) try self.genSetReg(lhs_ty, lhs_reg, lhs); if (!rhs_is_register) try self.genSetReg(rhs_ty, rhs_reg, rhs); @@ -1482,7 +1482,7 @@ fn load(self: *Self, dst_mcv: MCValue, ptr: MCValue, ptr_ty: Type) InnerError!vo .memory, .stack_offset, => { - const reg = try self.register_manager.allocReg(null); + const reg = try self.register_manager.allocReg(null, gp); const reg_lock = self.register_manager.lockRegAssumeUnused(reg); defer self.register_manager.unlockReg(reg_lock); diff --git a/src/arch/riscv64/abi.zig b/src/arch/riscv64/abi.zig index dd0feeea49..30d3719a46 100644 --- a/src/arch/riscv64/abi.zig +++ b/src/arch/riscv64/abi.zig @@ -1,6 +1,26 @@ +const std = @import("std"); const bits = @import("bits.zig"); const Register = bits.Register; +const RegisterManagerFn = @import("../../register_manager.zig").RegisterManager; pub const callee_preserved_regs = [_]Register{ .s0, .s1, .s2, .s3, .s4, .s5, .s6, .s7, .s8, .s9, .s10, .s11, }; + +const allocatable_registers = callee_preserved_regs; +pub const RegisterManager = RegisterManagerFn(@import("CodeGen.zig"), Register, &allocatable_registers); + +// Register classes +const RegisterBitSet = RegisterManager.RegisterBitSet; +pub const RegisterClass = struct { + pub const gp: RegisterBitSet = std.math.maxInt(RegisterBitSet); + // TODO uncomment once #11680 is fixed. + // pub const gp: RegisterBitSet = blk: { + // var set = RegisterBitSet.initEmpty(); + // set.setRangeValue(.{ + // .start = 0, + // .end = callee_preserved_regs.len, + // }, true); + // break :blk set; + // }; +}; diff --git a/src/arch/sparc64/CodeGen.zig b/src/arch/sparc64/CodeGen.zig index aa679cac6d..ff066f78f2 100644 --- a/src/arch/sparc64/CodeGen.zig +++ b/src/arch/sparc64/CodeGen.zig @@ -21,9 +21,6 @@ const Type = @import("../../type.zig").Type; const GenerateSymbolError = @import("../../codegen.zig").GenerateSymbolError; const FnResult = @import("../../codegen.zig").FnResult; const DebugInfoOutput = @import("../../codegen.zig").DebugInfoOutput; -const RegisterManagerFn = @import("../../register_manager.zig").RegisterManager; -const RegisterManager = RegisterManagerFn(Self, Register, &abi.allocatable_regs); -const RegisterLock = RegisterManager.RegisterLock; const build_options = @import("build_options"); @@ -31,7 +28,10 @@ const bits = @import("bits.zig"); const abi = @import("abi.zig"); const Instruction = bits.Instruction; const ShiftWidth = Instruction.ShiftWidth; +const RegisterManager = abi.RegisterManager; +const RegisterLock = RegisterManager.RegisterLock; const Register = bits.Register; +const gp = abi.RegisterClass.gp; const Self = @This(); @@ -1613,7 +1613,7 @@ fn allocRegOrMem(self: *Self, inst: Air.Inst.Index, reg_ok: bool) !MCValue { if (reg_ok) { // Make sure the type can fit in a register before we try to allocate one. if (abi_size <= 8) { - if (self.register_manager.tryAllocReg(inst)) |reg| { + if (self.register_manager.tryAllocReg(inst, gp)) |reg| { return MCValue{ .register = reg }; } } @@ -1854,7 +1854,7 @@ fn binOpImmediate( ).?; } else null; - const reg = try self.register_manager.allocReg(track_inst); + const reg = try self.register_manager.allocReg(track_inst, gp); if (track_inst) |inst| branch.inst_table.putAssumeCapacity(inst, .{ .register = reg }); @@ -1873,10 +1873,10 @@ fn binOpImmediate( )) { break :blk lhs_reg; } else { - break :blk try self.register_manager.allocReg(md.inst); + break :blk try self.register_manager.allocReg(md.inst, gp); } } else blk: { - break :blk try self.register_manager.allocReg(null); + break :blk try self.register_manager.allocReg(null, gp); }, }; @@ -1953,7 +1953,7 @@ fn binOpRegister( break :inst Air.refToIndex(md.lhs).?; } else null; - const reg = try self.register_manager.allocReg(track_inst); + const reg = try self.register_manager.allocReg(track_inst, gp); if (track_inst) |inst| branch.inst_table.putAssumeCapacity(inst, .{ .register = reg }); break :blk reg; @@ -1966,7 +1966,7 @@ fn binOpRegister( break :inst Air.refToIndex(md.rhs).?; } else null; - const reg = try self.register_manager.allocReg(track_inst); + const reg = try self.register_manager.allocReg(track_inst, gp); if (track_inst) |inst| branch.inst_table.putAssumeCapacity(inst, .{ .register = reg }); break :blk reg; @@ -1981,10 +1981,10 @@ fn binOpRegister( } else if (rhs_is_register and self.reuseOperand(md.inst, md.rhs, 1, rhs)) { break :blk rhs_reg; } else { - break :blk try self.register_manager.allocReg(md.inst); + break :blk try self.register_manager.allocReg(md.inst, gp); } } else blk: { - break :blk try self.register_manager.allocReg(null); + break :blk try self.register_manager.allocReg(null, gp); }, }; @@ -2077,7 +2077,7 @@ fn brVoid(self: *Self, block: Air.Inst.Index) !void { /// allocated. A second call to `copyToTmpRegister` may return the same register. /// This can have a side effect of spilling instructions to the stack to free up a register. fn copyToTmpRegister(self: *Self, ty: Type, mcv: MCValue) !Register { - const reg = try self.register_manager.allocReg(null); + const reg = try self.register_manager.allocReg(null, gp); try self.genSetReg(ty, reg, mcv); return reg; } @@ -2364,7 +2364,7 @@ fn genSetReg(self: *Self, ty: Type, reg: Register, mcv: MCValue) InnerError!void }); } else { // Need to allocate a temporary register to load 64-bit immediates. - const tmp_reg = try self.register_manager.allocReg(null); + const tmp_reg = try self.register_manager.allocReg(null, gp); try self.genSetReg(ty, tmp_reg, .{ .immediate = @truncate(u32, x) }); try self.genSetReg(ty, reg, .{ .immediate = @truncate(u32, x >> 32) }); @@ -2478,7 +2478,7 @@ fn genSetStack(self: *Self, ty: Type, stack_offset: u32, mcv: MCValue) InnerErro }; const ptr_ty = Type.initPayload(&ptr_ty_payload.base); - const regs = try self.register_manager.allocRegs(4, .{ null, null, null, null }); + const regs = try self.register_manager.allocRegs(4, .{ null, null, null, null }, gp); const regs_locks = self.register_manager.lockRegsAssumeUnused(4, regs); defer for (regs_locks) |reg| { self.register_manager.unlockReg(reg); @@ -2717,14 +2717,14 @@ fn load(self: *Self, dst_mcv: MCValue, ptr: MCValue, ptr_ty: Type) InnerError!vo }, .stack_offset => |off| { if (elem_size <= 8) { - const tmp_reg = try self.register_manager.allocReg(null); + const tmp_reg = try self.register_manager.allocReg(null, gp); const tmp_reg_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); defer self.register_manager.unlockReg(tmp_reg_lock); try self.load(.{ .register = tmp_reg }, ptr, ptr_ty); try self.genSetStack(elem_ty, off, MCValue{ .register = tmp_reg }); } else { - const regs = try self.register_manager.allocRegs(3, .{ null, null, null }); + const regs = try self.register_manager.allocRegs(3, .{ null, null, null }, gp); const regs_locks = self.register_manager.lockRegsAssumeUnused(3, regs); defer for (regs_locks) |reg| { self.register_manager.unlockReg(reg); diff --git a/src/arch/sparc64/abi.zig b/src/arch/sparc64/abi.zig index a9001c7dc7..1c6d40941f 100644 --- a/src/arch/sparc64/abi.zig +++ b/src/arch/sparc64/abi.zig @@ -1,5 +1,7 @@ +const std = @import("std"); const bits = @import("bits.zig"); const Register = bits.Register; +const RegisterManagerFn = @import("../../register_manager.zig").RegisterManager; // SPARCv9 stack constants. // See: Registers and the Stack Frame, page 3P-8, SCD 2.4.1. @@ -21,7 +23,7 @@ pub const stack_save_area = 176; pub const caller_preserved_regs = [_]Register{ .o0, .o1, .o2, .o3, .o4, .o5, .g1, .g4, .g5 }; // Try to allocate i, l, o, then g sets of registers, in order of priority. -pub const allocatable_regs = [_]Register{ +const allocatable_regs = [_]Register{ // zig fmt: off .@"i0", .@"i1", .@"i2", .@"i3", .@"i4", .@"i5", .l0, .l1, .l2, .l3, .l4, .l5, .l6, .l7, @@ -35,3 +37,20 @@ pub const c_abi_int_param_regs_callee_view = [_]Register{ .@"i0", .@"i1", .@"i2" pub const c_abi_int_return_regs_caller_view = [_]Register{ .o0, .o1, .o2, .o3 }; pub const c_abi_int_return_regs_callee_view = [_]Register{ .@"i0", .@"i1", .@"i2", .@"i3" }; + +pub const RegisterManager = RegisterManagerFn(@import("CodeGen.zig"), Register, &allocatable_regs); + +// Register classes +const RegisterBitSet = RegisterManager.RegisterBitSet; +pub const RegisterClass = struct { + pub const gp: RegisterBitSet = std.math.maxInt(RegisterBitSet); + // TODO uncomment once #11680 is fixed. + // pub const gp: RegisterBitSet = blk: { + // var set = RegisterBitSet.initEmpty(); + // set.setRangeValue(.{ + // .start = 0, + // .end = allocatable_regs.len, + // }, true); + // break :blk set; + // }; +}; diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index f1455b0591..eeb4cab04f 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -21,9 +21,6 @@ const Emit = @import("Emit.zig"); const Liveness = @import("../../Liveness.zig"); const Mir = @import("Mir.zig"); const Module = @import("../../Module.zig"); -const RegisterManagerFn = @import("../../register_manager.zig").RegisterManager; -const RegisterManager = RegisterManagerFn(Self, Register, &allocatable_registers); -const RegisterLock = RegisterManager.RegisterLock; const Target = std.Target; const Type = @import("../../type.zig").Type; const TypedValue = @import("../../TypedValue.zig"); @@ -31,13 +28,19 @@ const Value = @import("../../value.zig").Value; const bits = @import("bits.zig"); const abi = @import("abi.zig"); -const Register = bits.Register; + const callee_preserved_regs = abi.callee_preserved_regs; const caller_preserved_regs = abi.caller_preserved_regs; -const allocatable_registers = abi.allocatable_registers; const c_abi_int_param_regs = abi.c_abi_int_param_regs; const c_abi_int_return_regs = abi.c_abi_int_return_regs; +const RegisterManager = abi.RegisterManager; +const RegisterLock = RegisterManager.RegisterLock; +const Register = bits.Register; + +const gp = abi.RegisterClass.gp; +const sse = abi.RegisterClass.sse; + const InnerError = error{ OutOfMemory, CodegenFail, @@ -119,12 +122,12 @@ pub const MCValue = union(enum) { /// A pointer-sized integer that fits in a register. /// If the type is a pointer, this is the pointer address in virtual address space. immediate: u64, - /// The value is in a target-specific register. + /// The value is in a GP register. register: Register, - /// The value is a tuple { wrapped, overflow } where wrapped value is stored in the register, + /// The value is a tuple { wrapped, overflow } where wrapped value is stored in the GP register, /// and the operation is an unsigned operation. register_overflow_unsigned: Register, - /// The value is a tuple { wrapped, overflow } where wrapped value is stored in the register, + /// The value is a tuple { wrapped, overflow } where wrapped value is stored in the GP register, /// and the operation is a signed operation. register_overflow_signed: Register, /// The value is in memory at a hard-coded address. @@ -387,17 +390,15 @@ fn gen(self: *Self) InnerError!void { if (cc != .Naked) { _ = try self.addInst(.{ .tag = .push, - .ops = (Mir.Ops{ - .reg1 = .rbp, - }).encode(), + .ops = Mir.Inst.Ops.encode(.{ .reg1 = .rbp }), .data = undefined, // unused for push reg, }); _ = try self.addInst(.{ .tag = .mov, - .ops = (Mir.Ops{ + .ops = Mir.Inst.Ops.encode(.{ .reg1 = .rbp, .reg2 = .rsp, - }).encode(), + }), .data = undefined, }); // We want to subtract the aligned stack frame size from rsp here, but we don't @@ -434,9 +435,7 @@ fn gen(self: *Self) InnerError!void { // push the callee_preserved_regs that were used const backpatch_push_callee_preserved_regs_i = try self.addInst(.{ .tag = .push_regs_from_callee_preserved_regs, - .ops = (Mir.Ops{ - .reg1 = .rbp, - }).encode(), + .ops = Mir.Inst.Ops.encode(.{ .reg1 = .rbp }), .data = .{ .payload = undefined }, // to be backpatched }); @@ -476,9 +475,7 @@ fn gen(self: *Self) InnerError!void { // pop the callee_preserved_regs _ = try self.addInst(.{ .tag = .pop_regs_from_callee_preserved_regs, - .ops = (Mir.Ops{ - .reg1 = .rbp, - }).encode(), + .ops = Mir.Inst.Ops.encode(.{ .reg1 = .rbp }), .data = .{ .payload = callee_preserved_regs_payload }, }); @@ -497,17 +494,13 @@ fn gen(self: *Self) InnerError!void { _ = try self.addInst(.{ .tag = .pop, - .ops = (Mir.Ops{ - .reg1 = .rbp, - }).encode(), + .ops = Mir.Inst.Ops.encode(.{ .reg1 = .rbp }), .data = undefined, }); _ = try self.addInst(.{ .tag = .ret, - .ops = (Mir.Ops{ - .flags = 0b11, - }).encode(), + .ops = Mir.Inst.Ops.encode(.{ .flags = 0b11 }), .data = undefined, }); @@ -521,16 +514,12 @@ fn gen(self: *Self) InnerError!void { if (aligned_stack_end > 0) { self.mir_instructions.set(backpatch_stack_sub, .{ .tag = .sub, - .ops = (Mir.Ops{ - .reg1 = .rsp, - }).encode(), + .ops = Mir.Inst.Ops.encode(.{ .reg1 = .rsp }), .data = .{ .imm = aligned_stack_end }, }); self.mir_instructions.set(backpatch_stack_add, .{ .tag = .add, - .ops = (Mir.Ops{ - .reg1 = .rsp, - }).encode(), + .ops = Mir.Inst.Ops.encode(.{ .reg1 = .rsp }), .data = .{ .imm = aligned_stack_end }, }); } @@ -889,13 +878,30 @@ fn allocRegOrMem(self: *Self, inst: Air.Inst.Index, reg_ok: bool) !MCValue { self.stack_align = abi_align; if (reg_ok) { - // Make sure the type can fit in a register before we try to allocate one. - const ptr_bits = self.target.cpu.arch.ptrBitWidth(); - const ptr_bytes: u64 = @divExact(ptr_bits, 8); - if (abi_size <= ptr_bytes) { - if (self.register_manager.tryAllocReg(inst)) |reg| { - return MCValue{ .register = registerAlias(reg, abi_size) }; - } + switch (elem_ty.zigTypeTag()) { + .Vector => return self.fail("TODO allocRegOrMem for Vector type", .{}), + .Float => { + if (intrinsicsAllowed(self.target.*, elem_ty)) { + const ptr_bytes: u64 = 32; + if (abi_size <= ptr_bytes) { + if (self.register_manager.tryAllocReg(inst, sse)) |reg| { + return MCValue{ .register = registerAlias(reg, abi_size) }; + } + } + } + + return self.fail("TODO allocRegOrMem for Float type without SSE/AVX support", .{}); + }, + else => { + // Make sure the type can fit in a register before we try to allocate one. + const ptr_bits = self.target.cpu.arch.ptrBitWidth(); + const ptr_bytes: u64 = @divExact(ptr_bits, 8); + if (abi_size <= ptr_bytes) { + if (self.register_manager.tryAllocReg(inst, gp)) |reg| { + return MCValue{ .register = registerAlias(reg, abi_size) }; + } + } + }, } } const stack_offset = try self.allocMem(inst, abi_size, abi_align); @@ -962,7 +968,14 @@ pub fn spillRegisters(self: *Self, comptime count: comptime_int, registers: [cou /// allocated. A second call to `copyToTmpRegister` may return the same register. /// This can have a side effect of spilling instructions to the stack to free up a register. fn copyToTmpRegister(self: *Self, ty: Type, mcv: MCValue) !Register { - const reg = try self.register_manager.allocReg(null); + const reg_class: RegisterManager.RegisterBitSet = switch (ty.zigTypeTag()) { + .Float => blk: { + if (intrinsicsAllowed(self.target.*, ty)) break :blk sse; + return self.fail("TODO copy {} to register", .{ty.fmtDebug()}); + }, + else => gp, + }; + const reg: Register = try self.register_manager.allocReg(null, reg_class); try self.genSetReg(ty, reg, mcv); return reg; } @@ -972,7 +985,14 @@ fn copyToTmpRegister(self: *Self, ty: Type, mcv: MCValue) !Register { /// This can have a side effect of spilling instructions to the stack to free up a register. /// WARNING make sure that the allocated register matches the returned MCValue from an instruction! fn copyToRegisterWithInstTracking(self: *Self, reg_owner: Air.Inst.Index, ty: Type, mcv: MCValue) !MCValue { - const reg = try self.register_manager.allocReg(reg_owner); + const reg_class: RegisterManager.RegisterBitSet = switch (ty.zigTypeTag()) { + .Float => blk: { + if (intrinsicsAllowed(self.target.*, ty)) break :blk sse; + return self.fail("TODO copy {} to register", .{ty.fmtDebug()}); + }, + else => gp, + }; + const reg: Register = try self.register_manager.allocReg(reg_owner, reg_class); try self.genSetReg(ty, reg, mcv); return MCValue{ .register = reg }; } @@ -1028,7 +1048,7 @@ fn airIntCast(self: *Self, inst: Air.Inst.Index) !void { }; defer if (operand_lock) |lock| self.register_manager.unlockReg(lock); - const reg = try self.register_manager.allocReg(inst); + const reg = try self.register_manager.allocReg(inst, gp); try self.genSetReg(dest_ty, reg, .{ .immediate = 0 }); try self.genSetReg(operand_ty, reg, operand); break :blk MCValue{ .register = reg }; @@ -1192,10 +1212,10 @@ fn airMin(self: *Self, inst: Air.Inst.Index) !void { const dst_mcv = try self.copyToRegisterWithInstTracking(inst, ty, rhs_mcv); _ = try self.addInst(.{ .tag = if (signedness == .signed) .cond_mov_lt else .cond_mov_below, - .ops = (Mir.Ops{ + .ops = Mir.Inst.Ops.encode(.{ .reg1 = dst_mcv.register, .reg2 = lhs_reg, - }).encode(), + }), .data = undefined, }); @@ -1383,7 +1403,7 @@ fn genSetStackTruncatedOverflowCompare( .unsigned => ty, }; - const temp_regs = try self.register_manager.allocRegs(3, .{ null, null, null }); + const temp_regs = try self.register_manager.allocRegs(3, .{ null, null, null }, gp); const temp_regs_locks = self.register_manager.lockRegsAssumeUnused(3, temp_regs); defer for (temp_regs_locks) |rreg| { self.register_manager.unlockReg(rreg); @@ -1396,10 +1416,10 @@ fn genSetStackTruncatedOverflowCompare( }; _ = try self.addInst(.{ .tag = .cond_set_byte_overflow, - .ops = (Mir.Ops{ + .ops = Mir.Inst.Ops.encode(.{ .reg1 = overflow_reg.to8(), .flags = flags, - }).encode(), + }), .data = undefined, }); @@ -1416,10 +1436,7 @@ fn genSetStackTruncatedOverflowCompare( const eq_reg = temp_regs[2]; _ = try self.addInst(.{ .tag = .cond_set_byte_eq_ne, - .ops = (Mir.Ops{ - .reg1 = eq_reg.to8(), - .flags = 0b00, - }).encode(), + .ops = Mir.Inst.Ops.encode(.{ .reg1 = eq_reg.to8() }), .data = undefined, }); @@ -1565,19 +1582,17 @@ fn genIntMulDivOpMir( .signed => { _ = try self.addInst(.{ .tag = .cwd, - .ops = (Mir.Ops{ - .flags = 0b11, - }).encode(), + .ops = Mir.Inst.Ops.encode(.{ .flags = 0b11 }), .data = undefined, }); }, .unsigned => { _ = try self.addInst(.{ .tag = .xor, - .ops = (Mir.Ops{ + .ops = Mir.Inst.Ops.encode(.{ .reg1 = .rdx, .reg2 = .rdx, - }).encode(), + }), .data = undefined, }); }, @@ -1596,16 +1611,14 @@ fn genIntMulDivOpMir( .register => |reg| { _ = try self.addInst(.{ .tag = tag, - .ops = (Mir.Ops{ - .reg1 = reg, - }).encode(), + .ops = Mir.Inst.Ops.encode(.{ .reg1 = reg }), .data = undefined, }); }, .stack_offset => |off| { _ = try self.addInst(.{ .tag = tag, - .ops = (Mir.Ops{ + .ops = Mir.Inst.Ops.encode(.{ .reg2 = .rbp, .flags = switch (abi_size) { 1 => 0b00, @@ -1614,7 +1627,7 @@ fn genIntMulDivOpMir( 8 => 0b11, else => unreachable, }, - }).encode(), + }), .data = .{ .imm = @bitCast(u32, -off) }, }); }, @@ -1647,34 +1660,34 @@ fn genInlineIntDivFloor(self: *Self, ty: Type, lhs: MCValue, rhs: MCValue) !MCVa _ = try self.addInst(.{ .tag = .xor, - .ops = (Mir.Ops{ + .ops = Mir.Inst.Ops.encode(.{ .reg1 = divisor.to64(), .reg2 = dividend.to64(), - }).encode(), + }), .data = undefined, }); _ = try self.addInst(.{ .tag = .sar, - .ops = (Mir.Ops{ + .ops = Mir.Inst.Ops.encode(.{ .reg1 = divisor.to64(), .flags = 0b10, - }).encode(), + }), .data = .{ .imm = 63 }, }); _ = try self.addInst(.{ .tag = .@"test", - .ops = (Mir.Ops{ + .ops = Mir.Inst.Ops.encode(.{ .reg1 = .rdx, .reg2 = .rdx, - }).encode(), + }), .data = undefined, }); _ = try self.addInst(.{ .tag = .cond_mov_eq, - .ops = (Mir.Ops{ + .ops = Mir.Inst.Ops.encode(.{ .reg1 = divisor.to64(), .reg2 = .rdx, - }).encode(), + }), .data = undefined, }); try self.genBinOpMir(.add, Type.isize, .{ .register = divisor }, .{ .register = .rax }); @@ -2052,17 +2065,17 @@ fn genSliceElemPtr(self: *Self, lhs: Air.Inst.Ref, rhs: Air.Inst.Ref) !MCValue { const offset_reg_lock = self.register_manager.lockRegAssumeUnused(offset_reg); defer self.register_manager.unlockReg(offset_reg_lock); - const addr_reg = try self.register_manager.allocReg(null); + const addr_reg = try self.register_manager.allocReg(null, gp); switch (slice_mcv) { .stack_offset => |off| { // mov reg, [rbp - 8] _ = try self.addInst(.{ .tag = .mov, - .ops = (Mir.Ops{ + .ops = Mir.Inst.Ops.encode(.{ .reg1 = addr_reg.to64(), .reg2 = .rbp, .flags = 0b01, - }).encode(), + }), .data = .{ .imm = @bitCast(u32, -@intCast(i32, off)) }, }); }, @@ -2131,7 +2144,7 @@ fn airArrayElemVal(self: *Self, inst: Air.Inst.Index) !void { const offset_reg_lock = self.register_manager.lockRegAssumeUnused(offset_reg); defer self.register_manager.unlockReg(offset_reg_lock); - const addr_reg = try self.register_manager.allocReg(null); + const addr_reg = try self.register_manager.allocReg(null, gp); switch (array) { .register => { const off = @intCast(i32, try self.allocMem( @@ -2143,10 +2156,10 @@ fn airArrayElemVal(self: *Self, inst: Air.Inst.Index) !void { // lea reg, [rbp] _ = try self.addInst(.{ .tag = .lea, - .ops = (Mir.Ops{ + .ops = Mir.Inst.Ops.encode(.{ .reg1 = addr_reg.to64(), .reg2 = .rbp, - }).encode(), + }), .data = .{ .imm = @bitCast(u32, -off) }, }); }, @@ -2154,10 +2167,10 @@ fn airArrayElemVal(self: *Self, inst: Air.Inst.Index) !void { // lea reg, [rbp] _ = try self.addInst(.{ .tag = .lea, - .ops = (Mir.Ops{ + .ops = Mir.Inst.Ops.encode(.{ .reg1 = addr_reg.to64(), .reg2 = .rbp, - }).encode(), + }), .data = .{ .imm = @bitCast(u32, -off) }, }); }, @@ -2222,11 +2235,11 @@ fn airPtrElemVal(self: *Self, inst: Air.Inst.Index) !void { // mov dst_mcv, [dst_mcv] _ = try self.addInst(.{ .tag = .mov, - .ops = (Mir.Ops{ - .flags = 0b01, + .ops = Mir.Inst.Ops.encode(.{ .reg1 = registerAlias(dst_mcv.register, @intCast(u32, elem_abi_size)), .reg2 = dst_mcv.register, - }).encode(), + .flags = 0b01, + }), .data = .{ .imm = 0 }, }); break :result .{ .register = registerAlias(dst_mcv.register, @intCast(u32, elem_abi_size)) }; @@ -2488,17 +2501,17 @@ fn load(self: *Self, dst_mcv: MCValue, ptr: MCValue, ptr_ty: Type) InnerError!vo // mov dst_reg, [reg] _ = try self.addInst(.{ .tag = .mov, - .ops = (Mir.Ops{ + .ops = Mir.Inst.Ops.encode(.{ .reg1 = registerAlias(dst_reg, @intCast(u32, abi_size)), .reg2 = reg, .flags = 0b01, - }).encode(), + }), .data = .{ .imm = 0 }, }); }, .stack_offset => |off| { if (abi_size <= 8) { - const tmp_reg = try self.register_manager.allocReg(null); + const tmp_reg = try self.register_manager.allocReg(null, gp); try self.load(.{ .register = tmp_reg }, ptr, ptr_ty); return self.genSetStack(elem_ty, off, MCValue{ .register = tmp_reg }, .{}); } @@ -2559,10 +2572,10 @@ fn loadMemPtrIntoRegister(self: *Self, reg: Register, ptr_ty: Type, ptr: MCValue const fn_owner_decl = mod.declPtr(self.mod_fn.owner_decl); _ = try self.addInst(.{ .tag = .lea_pie, - .ops = (Mir.Ops{ + .ops = Mir.Inst.Ops.encode(.{ .reg1 = registerAlias(reg, abi_size), .flags = flags, - }).encode(), + }), .data = .{ .load_reloc = .{ .atom_index = fn_owner_decl.link.macho.local_sym_index, @@ -2623,7 +2636,7 @@ fn store(self: *Self, ptr: MCValue, value: MCValue, ptr_ty: Type, value_ty: Type }); _ = try self.addInst(.{ .tag = .mov_mem_imm, - .ops = (Mir.Ops{ + .ops = Mir.Inst.Ops.encode(.{ .reg1 = reg.to64(), .flags = switch (abi_size) { 1 => 0b00, @@ -2631,7 +2644,7 @@ fn store(self: *Self, ptr: MCValue, value: MCValue, ptr_ty: Type, value_ty: Type 4 => 0b10, else => unreachable, }, - }).encode(), + }), .data = .{ .payload = payload }, }); }, @@ -2645,11 +2658,11 @@ fn store(self: *Self, ptr: MCValue, value: MCValue, ptr_ty: Type, value_ty: Type const tmp_reg = try self.copyToTmpRegister(value_ty, value); _ = try self.addInst(.{ .tag = .mov, - .ops = (Mir.Ops{ + .ops = Mir.Inst.Ops.encode(.{ .reg1 = reg.to64(), .reg2 = tmp_reg.to64(), .flags = 0b10, - }).encode(), + }), .data = .{ .imm = 0 }, }); }, @@ -2661,11 +2674,11 @@ fn store(self: *Self, ptr: MCValue, value: MCValue, ptr_ty: Type, value_ty: Type .register => |src_reg| { _ = try self.addInst(.{ .tag = .mov, - .ops = (Mir.Ops{ + .ops = Mir.Inst.Ops.encode(.{ .reg1 = reg.to64(), .reg2 = registerAlias(src_reg, @intCast(u32, abi_size)), .flags = 0b10, - }).encode(), + }), .data = .{ .imm = 0 }, }); }, @@ -2699,7 +2712,7 @@ fn store(self: *Self, ptr: MCValue, value: MCValue, ptr_ty: Type, value_ty: Type }; defer if (value_lock) |lock| self.register_manager.unlockReg(lock); - const addr_reg = try self.register_manager.allocReg(null); + const addr_reg = try self.register_manager.allocReg(null, gp); const addr_reg_lock = self.register_manager.lockRegAssumeUnused(addr_reg); defer self.register_manager.unlockReg(addr_reg_lock); @@ -2709,11 +2722,11 @@ fn store(self: *Self, ptr: MCValue, value: MCValue, ptr_ty: Type, value_ty: Type // mov reg, [reg] _ = try self.addInst(.{ .tag = .mov, - .ops = (Mir.Ops{ + .ops = Mir.Inst.Ops.encode(.{ .reg1 = addr_reg.to64(), .reg2 = addr_reg.to64(), .flags = 0b01, - }).encode(), + }), .data = .{ .imm = 0 }, }); @@ -2748,21 +2761,21 @@ fn store(self: *Self, ptr: MCValue, value: MCValue, ptr_ty: Type, value_ty: Type } _ = try self.addInst(.{ .tag = .mov_mem_imm, - .ops = (Mir.Ops{ + .ops = Mir.Inst.Ops.encode(.{ .reg1 = addr_reg.to64(), .flags = flags, - }).encode(), + }), .data = .{ .payload = payload }, }); }, .register => |reg| { _ = try self.addInst(.{ .tag = .mov, - .ops = (Mir.Ops{ + .ops = Mir.Inst.Ops.encode(.{ .reg1 = addr_reg.to64(), .reg2 = reg, .flags = 0b10, - }).encode(), + }), .data = .{ .imm = 0 }, }); }, @@ -2771,7 +2784,7 @@ fn store(self: *Self, ptr: MCValue, value: MCValue, ptr_ty: Type, value_ty: Type .memory, => { if (abi_size <= 8) { - const tmp_reg = try self.register_manager.allocReg(null); + const tmp_reg = try self.register_manager.allocReg(null, gp); const tmp_reg_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); defer self.register_manager.unlockReg(tmp_reg_lock); @@ -2779,20 +2792,20 @@ fn store(self: *Self, ptr: MCValue, value: MCValue, ptr_ty: Type, value_ty: Type _ = try self.addInst(.{ .tag = .mov, - .ops = (Mir.Ops{ + .ops = Mir.Inst.Ops.encode(.{ .reg1 = tmp_reg, .reg2 = tmp_reg, .flags = 0b01, - }).encode(), + }), .data = .{ .imm = 0 }, }); _ = try self.addInst(.{ .tag = .mov, - .ops = (Mir.Ops{ + .ops = Mir.Inst.Ops.encode(.{ .reg1 = addr_reg.to64(), .reg2 = tmp_reg, .flags = 0b10, - }).encode(), + }), .data = .{ .imm = 0 }, }); return; @@ -2806,11 +2819,11 @@ fn store(self: *Self, ptr: MCValue, value: MCValue, ptr_ty: Type, value_ty: Type const tmp_reg = try self.copyToTmpRegister(value_ty, value); _ = try self.addInst(.{ .tag = .mov, - .ops = (Mir.Ops{ + .ops = Mir.Inst.Ops.encode(.{ .reg1 = addr_reg.to64(), .reg2 = tmp_reg, .flags = 0b10, - }).encode(), + }), .data = .{ .imm = 0 }, }); return; @@ -2889,7 +2902,7 @@ fn structFieldPtr(self: *Self, inst: Air.Inst.Index, operand: Air.Inst.Ref, inde if (can_reuse_operand) { break :blk reg; } else { - const result_reg = try self.register_manager.allocReg(inst); + const result_reg = try self.register_manager.allocReg(inst, gp); try self.genSetReg(ptr_ty, result_reg, mcv); break :blk result_reg; } @@ -2967,10 +2980,10 @@ fn airStructFieldVal(self: *Self, inst: Air.Inst.Index) !void { if (signedness == .signed and field_size < 8) { _ = try self.addInst(.{ .tag = .mov_sign_extend, - .ops = (Mir.Ops{ + .ops = Mir.Inst.Ops.encode(.{ .reg1 = dst_mcv.register, .reg2 = registerAlias(dst_mcv.register, field_size), - }).encode(), + }), .data = undefined, }); } @@ -2990,7 +3003,7 @@ fn airStructFieldVal(self: *Self, inst: Air.Inst.Index) !void { const reg_lock = self.register_manager.lockRegAssumeUnused(reg); defer self.register_manager.unlockReg(reg_lock); - const dst_reg = try self.register_manager.allocReg(inst); + const dst_reg = try self.register_manager.allocReg(inst, gp); const flags: u2 = switch (mcv) { .register_overflow_unsigned => 0b10, .register_overflow_signed => 0b00, @@ -2998,10 +3011,10 @@ fn airStructFieldVal(self: *Self, inst: Air.Inst.Index) !void { }; _ = try self.addInst(.{ .tag = .cond_set_byte_overflow, - .ops = (Mir.Ops{ + .ops = Mir.Inst.Ops.encode(.{ .reg1 = dst_reg.to8(), .flags = flags, - }).encode(), + }), .data = undefined, }); break :result MCValue{ .register = dst_reg.to8() }; @@ -3042,10 +3055,7 @@ fn genShiftBinOpMir(self: *Self, tag: Mir.Inst.Tag, ty: Type, reg: Register, shi 1 => { _ = try self.addInst(.{ .tag = tag, - .ops = (Mir.Ops{ - .reg1 = registerAlias(reg, abi_size), - .flags = 0b00, - }).encode(), + .ops = Mir.Inst.Ops.encode(.{ .reg1 = registerAlias(reg, abi_size) }), .data = undefined, }); return; @@ -3053,10 +3063,10 @@ fn genShiftBinOpMir(self: *Self, tag: Mir.Inst.Tag, ty: Type, reg: Register, shi else => { _ = try self.addInst(.{ .tag = tag, - .ops = (Mir.Ops{ + .ops = Mir.Inst.Ops.encode(.{ .reg1 = registerAlias(reg, abi_size), .flags = 0b10, - }).encode(), + }), .data = .{ .imm = @intCast(u8, imm) }, }); return; @@ -3074,10 +3084,10 @@ fn genShiftBinOpMir(self: *Self, tag: Mir.Inst.Tag, ty: Type, reg: Register, shi _ = try self.addInst(.{ .tag = tag, - .ops = (Mir.Ops{ + .ops = Mir.Inst.Ops.encode(.{ .reg1 = registerAlias(reg, abi_size), .flags = 0b01, - }).encode(), + }), .data = undefined, }); } @@ -3326,7 +3336,7 @@ fn genBinOp( const rhs = try self.resolveInst(rhs_air); const lhs_ty = self.air.typeOf(lhs_air); const rhs_ty = self.air.typeOf(rhs_air); - if (lhs_ty.zigTypeTag() == .Vector or lhs_ty.zigTypeTag() == .Float) { + if (lhs_ty.zigTypeTag() == .Vector) { return self.fail("TODO implement genBinOp for {}", .{lhs_ty.fmtDebug()}); } if (lhs_ty.abiSize(self.target.*) > 8) { @@ -3450,22 +3460,62 @@ fn genBinOpMir(self: *Self, mir_tag: Mir.Inst.Tag, dst_ty: Type, dst_mcv: MCValu const reg = try self.copyToTmpRegister(dst_ty, src_mcv); return self.genBinOpMir(mir_tag, dst_ty, dst_mcv, .{ .register = reg }); }, - .register => |src_reg| { - _ = try self.addInst(.{ - .tag = mir_tag, - .ops = (Mir.Ops{ - .reg1 = registerAlias(dst_reg, abi_size), - .reg2 = registerAlias(src_reg, abi_size), - }).encode(), - .data = undefined, - }); + .register => |src_reg| switch (dst_ty.zigTypeTag()) { + .Float => { + if (intrinsicsAllowed(self.target.*, dst_ty)) { + const actual_tag: Mir.Inst.Tag = switch (dst_ty.tag()) { + .f32 => switch (mir_tag) { + .add => if (hasAvxSupport(self.target.*)) + Mir.Inst.Tag.add_f32_avx + else + Mir.Inst.Tag.add_f32_sse, + .cmp => if (hasAvxSupport(self.target.*)) + Mir.Inst.Tag.cmp_f32_avx + else + Mir.Inst.Tag.cmp_f32_sse, + else => return self.fail("TODO genBinOpMir for f32 register-register with MIR tag {}", .{mir_tag}), + }, + .f64 => switch (mir_tag) { + .add => if (hasAvxSupport(self.target.*)) + Mir.Inst.Tag.add_f64_avx + else + Mir.Inst.Tag.add_f64_sse, + .cmp => if (hasAvxSupport(self.target.*)) + Mir.Inst.Tag.cmp_f64_avx + else + Mir.Inst.Tag.cmp_f64_sse, + else => return self.fail("TODO genBinOpMir for f64 register-register with MIR tag {}", .{mir_tag}), + }, + else => return self.fail("TODO genBinOpMir for float register-register and type {}", .{dst_ty.fmtDebug()}), + }; + _ = try self.addInst(.{ + .tag = actual_tag, + .ops = Mir.Inst.Ops.encode(.{ + .reg1 = dst_reg.to128(), + .reg2 = src_reg.to128(), + }), + .data = undefined, + }); + return; + } + + return self.fail("TODO genBinOpMir for float register-register and no intrinsics", .{}); + }, + else => { + _ = try self.addInst(.{ + .tag = mir_tag, + .ops = Mir.Inst.Ops.encode(.{ + .reg1 = registerAlias(dst_reg, abi_size), + .reg2 = registerAlias(src_reg, abi_size), + }), + .data = undefined, + }); + }, }, .immediate => |imm| { _ = try self.addInst(.{ .tag = mir_tag, - .ops = (Mir.Ops{ - .reg1 = registerAlias(dst_reg, abi_size), - }).encode(), + .ops = Mir.Inst.Ops.encode(.{ .reg1 = registerAlias(dst_reg, abi_size) }), .data = .{ .imm = @truncate(u32, imm) }, }); }, @@ -3488,11 +3538,11 @@ fn genBinOpMir(self: *Self, mir_tag: Mir.Inst.Tag, dst_ty: Type, dst_mcv: MCValu } _ = try self.addInst(.{ .tag = mir_tag, - .ops = (Mir.Ops{ + .ops = Mir.Inst.Ops.encode(.{ .reg1 = registerAlias(dst_reg, abi_size), .reg2 = .rbp, .flags = 0b01, - }).encode(), + }), .data = .{ .imm = @bitCast(u32, -off) }, }); }, @@ -3515,11 +3565,11 @@ fn genBinOpMir(self: *Self, mir_tag: Mir.Inst.Tag, dst_ty: Type, dst_mcv: MCValu .register => |src_reg| { _ = try self.addInst(.{ .tag = mir_tag, - .ops = (Mir.Ops{ + .ops = Mir.Inst.Ops.encode(.{ .reg1 = .rbp, .reg2 = registerAlias(src_reg, abi_size), .flags = 0b10, - }).encode(), + }), .data = .{ .imm = @bitCast(u32, -off) }, }); }, @@ -3546,10 +3596,10 @@ fn genBinOpMir(self: *Self, mir_tag: Mir.Inst.Tag, dst_ty: Type, dst_mcv: MCValu }); _ = try self.addInst(.{ .tag = tag, - .ops = (Mir.Ops{ + .ops = Mir.Inst.Ops.encode(.{ .reg1 = .rbp, .flags = flags, - }).encode(), + }), .data = .{ .payload = payload }, }); }, @@ -3604,10 +3654,10 @@ fn genIntMulComplexOpMir(self: *Self, dst_ty: Type, dst_mcv: MCValue, src_mcv: M // register, register _ = try self.addInst(.{ .tag = .imul_complex, - .ops = (Mir.Ops{ + .ops = Mir.Inst.Ops.encode(.{ .reg1 = registerAlias(dst_reg, abi_size), .reg2 = registerAlias(src_reg, abi_size), - }).encode(), + }), .data = undefined, }); }, @@ -3617,11 +3667,11 @@ fn genIntMulComplexOpMir(self: *Self, dst_ty: Type, dst_mcv: MCValue, src_mcv: M if (math.minInt(i32) <= imm and imm <= math.maxInt(i32)) { _ = try self.addInst(.{ .tag = .imul_complex, - .ops = (Mir.Ops{ + .ops = Mir.Inst.Ops.encode(.{ .reg1 = dst_reg.to32(), .reg2 = dst_reg.to32(), .flags = 0b10, - }).encode(), + }), .data = .{ .imm = @truncate(u32, imm) }, }); } else { @@ -3633,11 +3683,11 @@ fn genIntMulComplexOpMir(self: *Self, dst_ty: Type, dst_mcv: MCValue, src_mcv: M .stack_offset => |off| { _ = try self.addInst(.{ .tag = .imul_complex, - .ops = (Mir.Ops{ + .ops = Mir.Inst.Ops.encode(.{ .reg1 = registerAlias(dst_reg, abi_size), .reg2 = .rbp, .flags = 0b01, - }).encode(), + }), .data = .{ .imm = @bitCast(u32, -off) }, }); }, @@ -3670,10 +3720,10 @@ fn genIntMulComplexOpMir(self: *Self, dst_ty: Type, dst_mcv: MCValue, src_mcv: M // register, register _ = try self.addInst(.{ .tag = .imul_complex, - .ops = (Mir.Ops{ + .ops = Mir.Inst.Ops.encode(.{ .reg1 = registerAlias(dst_reg, abi_size), .reg2 = registerAlias(src_reg, abi_size), - }).encode(), + }), .data = undefined, }); // copy dst_reg back out @@ -3780,9 +3830,7 @@ fn airArg(self: *Self, inst: Air.Inst.Index) !void { fn airBreakpoint(self: *Self) !void { _ = try self.addInst(.{ .tag = .interrupt, - .ops = (Mir.Ops{ - .flags = 0b00, - }).encode(), + .ops = Mir.Inst.Ops.encode(.{}), .data = undefined, }); return self.finishAirBookkeeping(); @@ -3883,9 +3931,7 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallOptions. // Adjust the stack _ = try self.addInst(.{ .tag = .sub, - .ops = (Mir.Ops{ - .reg1 = .rsp, - }).encode(), + .ops = Mir.Inst.Ops.encode(.{ .reg1 = .rsp }), .data = .{ .imm = info.stack_byte_count }, }); } @@ -3909,9 +3955,7 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallOptions. unreachable; _ = try self.addInst(.{ .tag = .call, - .ops = (Mir.Ops{ - .flags = 0b01, - }).encode(), + .ops = Mir.Inst.Ops.encode(.{ .flags = 0b01 }), .data = .{ .imm = @truncate(u32, got_addr) }, }); } else if (func_value.castTag(.extern_fn)) |_| { @@ -3925,10 +3969,10 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallOptions. try self.genSetReg(Type.initTag(.usize), .rax, mcv); _ = try self.addInst(.{ .tag = .call, - .ops = (Mir.Ops{ + .ops = Mir.Inst.Ops.encode(.{ .reg1 = .rax, .flags = 0b01, - }).encode(), + }), .data = undefined, }); } @@ -3943,10 +3987,10 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallOptions. // callq *%rax _ = try self.addInst(.{ .tag = .call, - .ops = (Mir.Ops{ + .ops = Mir.Inst.Ops.encode(.{ .reg1 = .rax, .flags = 0b01, - }).encode(), + }), .data = undefined, }); } else if (func_value.castTag(.extern_fn)) |func_payload| { @@ -3978,10 +4022,10 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallOptions. try self.genSetReg(Type.initTag(.usize), .rax, mcv); _ = try self.addInst(.{ .tag = .call, - .ops = (Mir.Ops{ + .ops = Mir.Inst.Ops.encode(.{ .reg1 = .rax, .flags = 0b01, - }).encode(), + }), .data = undefined, }); } @@ -3996,9 +4040,7 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallOptions. const fn_got_addr = got_addr + got_index * ptr_bytes; _ = try self.addInst(.{ .tag = .call, - .ops = (Mir.Ops{ - .flags = 0b01, - }).encode(), + .ops = Mir.Inst.Ops.encode(.{ .flags = 0b01 }), .data = .{ .imm = @intCast(u32, fn_got_addr) }, }); } else return self.fail("TODO implement calling extern fn on plan9", .{}); @@ -4008,10 +4050,10 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallOptions. try self.genSetReg(Type.initTag(.usize), .rax, mcv); _ = try self.addInst(.{ .tag = .call, - .ops = (Mir.Ops{ + .ops = Mir.Inst.Ops.encode(.{ .reg1 = .rax, .flags = 0b01, - }).encode(), + }), .data = undefined, }); } @@ -4021,9 +4063,7 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallOptions. // Readjust the stack _ = try self.addInst(.{ .tag = .add, - .ops = (Mir.Ops{ - .reg1 = .rsp, - }).encode(), + .ops = Mir.Inst.Ops.encode(.{ .reg1 = .rsp }), .data = .{ .imm = info.stack_byte_count }, }); } @@ -4081,9 +4121,7 @@ fn airRet(self: *Self, inst: Air.Inst.Index) !void { // which is available if the jump is 127 bytes or less forward. const jmp_reloc = try self.addInst(.{ .tag = .jmp, - .ops = (Mir.Ops{ - .flags = 0b00, - }).encode(), + .ops = Mir.Inst.Ops.encode(.{}), .data = .{ .inst = undefined }, }); try self.exitlude_jump_relocs.append(self.gpa, jmp_reloc); @@ -4116,9 +4154,7 @@ fn airRetLoad(self: *Self, inst: Air.Inst.Index) !void { // which is available if the jump is 127 bytes or less forward. const jmp_reloc = try self.addInst(.{ .tag = .jmp, - .ops = (Mir.Ops{ - .flags = 0b00, - }).encode(), + .ops = Mir.Inst.Ops.encode(.{}), .data = .{ .inst = undefined }, }); try self.exitlude_jump_relocs.append(self.gpa, jmp_reloc); @@ -4162,8 +4198,28 @@ fn airCmp(self: *Self, inst: Air.Inst.Index, op: math.CompareOperator) !void { const dst_mcv = MCValue{ .register = dst_reg }; + const rhs_ty = self.air.typeOf(bin_op.rhs); // This instruction supports only signed 32-bit immediates at most. - const src_mcv = try self.limitImmediateType(bin_op.rhs, i32); + const src_mcv: MCValue = blk: { + switch (rhs_ty.zigTypeTag()) { + .Float => { + const rhs = try self.resolveInst(bin_op.rhs); + const rhs_lock: ?RegisterLock = switch (rhs) { + .register => |reg| self.register_manager.lockRegAssumeUnused(reg), + else => null, + }; + defer if (rhs_lock) |lock| self.register_manager.unlockReg(lock); + const src_reg = try self.copyToTmpRegister(rhs_ty, rhs); + break :blk MCValue{ .register = src_reg }; + }, + else => break :blk try self.limitImmediateType(bin_op.rhs, i32), + } + }; + const src_lock: ?RegisterLock = switch (src_mcv) { + .register => |reg| self.register_manager.lockReg(reg), + else => null, + }; + defer if (src_lock) |lock| self.register_manager.unlockReg(lock); try self.genBinOpMir(.cmp, ty, dst_mcv, src_mcv); break :result switch (signedness) { @@ -4362,9 +4418,7 @@ fn genCondBrMir(self: *Self, ty: Type, mcv: MCValue) !u32 { Mir.Inst.Tag.cond_jmp_greater_less; return self.addInst(.{ .tag = tag, - .ops = (Mir.Ops{ - .flags = flags, - }).encode(), + .ops = Mir.Inst.Ops.encode(.{ .flags = flags }), .data = .{ .inst = undefined }, }); }, @@ -4372,17 +4426,12 @@ fn genCondBrMir(self: *Self, ty: Type, mcv: MCValue) !u32 { try self.spillCompareFlagsIfOccupied(); _ = try self.addInst(.{ .tag = .@"test", - .ops = (Mir.Ops{ - .reg1 = reg, - .flags = 0b00, - }).encode(), + .ops = Mir.Inst.Ops.encode(.{ .reg1 = reg }), .data = .{ .imm = 1 }, }); return self.addInst(.{ .tag = .cond_jmp_eq_ne, - .ops = (Mir.Ops{ - .flags = 0b01, - }).encode(), + .ops = Mir.Inst.Ops.encode(.{ .flags = 0b01 }), .data = .{ .inst = undefined }, }); }, @@ -4776,9 +4825,7 @@ fn airLoop(self: *Self, inst: Air.Inst.Index) !void { try self.genBody(body); _ = try self.addInst(.{ .tag = .jmp, - .ops = (Mir.Ops{ - .flags = 0b00, - }).encode(), + .ops = Mir.Inst.Ops.encode(.{}), .data = .{ .inst = jmp_target }, }); return self.finishAirBookkeeping(); @@ -4829,19 +4876,17 @@ fn genCondSwitchMir(self: *Self, ty: Type, condition: MCValue, case: MCValue) !u .immediate => |imm| { _ = try self.addInst(.{ .tag = .xor, - .ops = (Mir.Ops{ - .reg1 = registerAlias(cond_reg, abi_size), - }).encode(), + .ops = Mir.Inst.Ops.encode(.{ .reg1 = registerAlias(cond_reg, abi_size) }), .data = .{ .imm = @intCast(u32, imm) }, }); }, .register => |reg| { _ = try self.addInst(.{ .tag = .xor, - .ops = (Mir.Ops{ + .ops = Mir.Inst.Ops.encode(.{ .reg1 = registerAlias(cond_reg, abi_size), .reg2 = registerAlias(reg, abi_size), - }).encode(), + }), .data = undefined, }); }, @@ -4860,17 +4905,15 @@ fn genCondSwitchMir(self: *Self, ty: Type, condition: MCValue, case: MCValue) !u _ = try self.addInst(.{ .tag = .@"test", - .ops = (Mir.Ops{ + .ops = Mir.Inst.Ops.encode(.{ .reg1 = registerAlias(cond_reg, abi_size), .reg2 = registerAlias(cond_reg, abi_size), - }).encode(), + }), .data = undefined, }); return self.addInst(.{ .tag = .cond_jmp_eq_ne, - .ops = (Mir.Ops{ - .flags = 0b00, - }).encode(), + .ops = Mir.Inst.Ops.encode(.{}), .data = .{ .inst = undefined }, }); }, @@ -5036,9 +5079,7 @@ fn brVoid(self: *Self, block: Air.Inst.Index) !void { // Leave the jump offset undefined const jmp_reloc = try self.addInst(.{ .tag = .jmp, - .ops = (Mir.Ops{ - .flags = 0b00, - }).encode(), + .ops = Mir.Inst.Ops.encode(.{}), .data = .{ .inst = undefined }, }); block_data.relocs.appendAssumeCapacity(jmp_reloc); @@ -5126,9 +5167,7 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void { }; _ = try self.addInst(.{ .tag = .push, - .ops = (Mir.Ops{ - .flags = 0b10, - }).encode(), + .ops = Mir.Inst.Ops.encode(.{ .flags = 0b10 }), .data = .{ .imm = n }, }); } else if (mem.indexOf(u8, arg, "%%")) |l| { @@ -5137,9 +5176,7 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void { return self.fail("unrecognized register: '{s}'", .{reg_name}); _ = try self.addInst(.{ .tag = .push, - .ops = (Mir.Ops{ - .reg1 = reg, - }).encode(), + .ops = Mir.Inst.Ops.encode(.{ .reg1 = reg }), .data = undefined, }); } else return self.fail("TODO more push operands", .{}); @@ -5151,9 +5188,7 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void { return self.fail("unrecognized register: '{s}'", .{reg_name}); _ = try self.addInst(.{ .tag = .pop, - .ops = (Mir.Ops{ - .reg1 = reg, - }).encode(), + .ops = Mir.Inst.Ops.encode(.{ .reg1 = reg }), .data = undefined, }); } else return self.fail("TODO more pop operands", .{}); @@ -5265,7 +5300,7 @@ fn genSetStackArg(self: *Self, ty: Type, stack_offset: i32, mcv: MCValue) InnerE }); _ = try self.addInst(.{ .tag = .mov_mem_imm, - .ops = (Mir.Ops{ + .ops = Mir.Inst.Ops.encode(.{ .reg1 = .rsp, .flags = switch (abi_size) { 1 => 0b00, @@ -5273,7 +5308,7 @@ fn genSetStackArg(self: *Self, ty: Type, stack_offset: i32, mcv: MCValue) InnerE 4 => 0b10, else => unreachable, }, - }).encode(), + }), .data = .{ .payload = payload }, }); }, @@ -5299,15 +5334,50 @@ fn genSetStackArg(self: *Self, ty: Type, stack_offset: i32, mcv: MCValue) InnerE }); }, .register => |reg| { - _ = try self.addInst(.{ - .tag = .mov, - .ops = (Mir.Ops{ - .reg1 = .rsp, - .reg2 = registerAlias(reg, @intCast(u32, abi_size)), - .flags = 0b10, - }).encode(), - .data = .{ .imm = @bitCast(u32, -stack_offset) }, - }); + switch (ty.zigTypeTag()) { + .Float => { + if (intrinsicsAllowed(self.target.*, ty)) { + const tag: Mir.Inst.Tag = switch (ty.tag()) { + .f32 => if (hasAvxSupport(self.target.*)) + Mir.Inst.Tag.mov_f32_avx + else + Mir.Inst.Tag.mov_f32_sse, + .f64 => if (hasAvxSupport(self.target.*)) + Mir.Inst.Tag.mov_f64_avx + else + Mir.Inst.Tag.mov_f64_sse, + else => return self.fail("TODO genSetStackArg for register for type {}", .{ty.fmtDebug()}), + }; + _ = try self.addInst(.{ + .tag = tag, + .ops = Mir.Inst.Ops.encode(.{ + .reg1 = switch (ty.tag()) { + .f32 => .esp, + .f64 => .rsp, + else => unreachable, + }, + .reg2 = reg.to128(), + .flags = 0b01, + }), + .data = .{ .imm = @bitCast(u32, -stack_offset) }, + }); + return; + } + + return self.fail("TODO genSetStackArg for register with no intrinsics", .{}); + }, + else => { + _ = try self.addInst(.{ + .tag = .mov, + .ops = Mir.Inst.Ops.encode(.{ + .reg1 = .rsp, + .reg2 = registerAlias(reg, @intCast(u32, abi_size)), + .flags = 0b10, + }), + .data = .{ .imm = @bitCast(u32, -stack_offset) }, + }); + }, + } }, .ptr_stack_offset => { const reg = try self.copyToTmpRegister(ty, mcv); @@ -5360,7 +5430,7 @@ fn genSetStack(self: *Self, ty: Type, stack_offset: i32, mcv: MCValue, opts: Inl const overflow_bit_ty = ty.structFieldType(1); const overflow_bit_offset = ty.structFieldOffset(1, self.target.*); - const tmp_reg = try self.register_manager.allocReg(null); + const tmp_reg = try self.register_manager.allocReg(null, gp); const flags: u2 = switch (mcv) { .register_overflow_unsigned => 0b10, .register_overflow_signed => 0b00, @@ -5368,10 +5438,10 @@ fn genSetStack(self: *Self, ty: Type, stack_offset: i32, mcv: MCValue, opts: Inl }; _ = try self.addInst(.{ .tag = .cond_set_byte_overflow, - .ops = (Mir.Ops{ + .ops = Mir.Inst.Ops.encode(.{ .reg1 = tmp_reg.to8(), .flags = flags, - }).encode(), + }), .data = undefined, }); @@ -5398,7 +5468,7 @@ fn genSetStack(self: *Self, ty: Type, stack_offset: i32, mcv: MCValue, opts: Inl }); _ = try self.addInst(.{ .tag = .mov_mem_imm, - .ops = (Mir.Ops{ + .ops = Mir.Inst.Ops.encode(.{ .reg1 = base_reg, .flags = switch (abi_size) { 1 => 0b00, @@ -5406,7 +5476,7 @@ fn genSetStack(self: *Self, ty: Type, stack_offset: i32, mcv: MCValue, opts: Inl 4 => 0b10, else => unreachable, }, - }).encode(), + }), .data = .{ .payload = payload }, }); }, @@ -5420,10 +5490,10 @@ fn genSetStack(self: *Self, ty: Type, stack_offset: i32, mcv: MCValue, opts: Inl }); _ = try self.addInst(.{ .tag = .mov_mem_imm, - .ops = (Mir.Ops{ + .ops = Mir.Inst.Ops.encode(.{ .reg1 = base_reg, .flags = 0b10, - }).encode(), + }), .data = .{ .payload = payload }, }); } @@ -5434,10 +5504,10 @@ fn genSetStack(self: *Self, ty: Type, stack_offset: i32, mcv: MCValue, opts: Inl }); _ = try self.addInst(.{ .tag = .mov_mem_imm, - .ops = (Mir.Ops{ + .ops = Mir.Inst.Ops.encode(.{ .reg1 = base_reg, .flags = 0b10, - }).encode(), + }), .data = .{ .payload = payload }, }); } @@ -5453,44 +5523,80 @@ fn genSetStack(self: *Self, ty: Type, stack_offset: i32, mcv: MCValue, opts: Inl } const base_reg = opts.dest_stack_base orelse .rbp; - if (!math.isPowerOfTwo(abi_size)) { - const reg_lock = self.register_manager.lockReg(reg); - defer if (reg_lock) |lock| self.register_manager.unlockReg(lock); - const tmp_reg = try self.copyToTmpRegister(ty, mcv); - - var next_offset = stack_offset; - var remainder = abi_size; - while (remainder > 0) { - const nearest_power_of_two = @as(u6, 1) << math.log2_int(u3, @intCast(u3, remainder)); - - _ = try self.addInst(.{ - .tag = .mov, - .ops = (Mir.Ops{ - .reg1 = base_reg, - .reg2 = registerAlias(tmp_reg, nearest_power_of_two), - .flags = 0b10, - }).encode(), - .data = .{ .imm = @bitCast(u32, -next_offset) }, - }); - - if (nearest_power_of_two > 1) { - try self.genShiftBinOpMir(.shr, ty, tmp_reg, .{ .immediate = nearest_power_of_two * 8 }); + switch (ty.zigTypeTag()) { + .Float => { + if (intrinsicsAllowed(self.target.*, ty)) { + const tag: Mir.Inst.Tag = switch (ty.tag()) { + .f32 => if (hasAvxSupport(self.target.*)) + Mir.Inst.Tag.mov_f32_avx + else + Mir.Inst.Tag.mov_f32_sse, + .f64 => if (hasAvxSupport(self.target.*)) + Mir.Inst.Tag.mov_f64_avx + else + Mir.Inst.Tag.mov_f64_sse, + else => return self.fail("TODO genSetStack for register for type {}", .{ty.fmtDebug()}), + }; + _ = try self.addInst(.{ + .tag = tag, + .ops = Mir.Inst.Ops.encode(.{ + .reg1 = switch (ty.tag()) { + .f32 => base_reg.to32(), + .f64 => base_reg.to64(), + else => unreachable, + }, + .reg2 = reg.to128(), + .flags = 0b01, + }), + .data = .{ .imm = @bitCast(u32, -stack_offset) }, + }); + return; } - remainder -= nearest_power_of_two; - next_offset -= nearest_power_of_two; - } - } else { - _ = try self.addInst(.{ - .tag = .mov, - .ops = (Mir.Ops{ - .reg1 = base_reg, - .reg2 = registerAlias(reg, @intCast(u32, abi_size)), - .flags = 0b10, - }).encode(), - .data = .{ .imm = @bitCast(u32, -stack_offset) }, - }); + return self.fail("TODO genSetStack for register for type float with no intrinsics", .{}); + }, + else => { + if (!math.isPowerOfTwo(abi_size)) { + const reg_lock = self.register_manager.lockReg(reg); + defer if (reg_lock) |lock| self.register_manager.unlockReg(lock); + + const tmp_reg = try self.copyToTmpRegister(ty, mcv); + + var next_offset = stack_offset; + var remainder = abi_size; + while (remainder > 0) { + const nearest_power_of_two = @as(u6, 1) << math.log2_int(u3, @intCast(u3, remainder)); + + _ = try self.addInst(.{ + .tag = .mov, + .ops = Mir.Inst.Ops.encode(.{ + .reg1 = base_reg, + .reg2 = registerAlias(tmp_reg, nearest_power_of_two), + .flags = 0b10, + }), + .data = .{ .imm = @bitCast(u32, -next_offset) }, + }); + + if (nearest_power_of_two > 1) { + try self.genShiftBinOpMir(.shr, ty, tmp_reg, .{ .immediate = nearest_power_of_two * 8 }); + } + + remainder -= nearest_power_of_two; + next_offset -= nearest_power_of_two; + } + } else { + _ = try self.addInst(.{ + .tag = .mov, + .ops = Mir.Inst.Ops.encode(.{ + .reg1 = base_reg, + .reg2 = registerAlias(reg, @intCast(u32, abi_size)), + .flags = 0b10, + }), + .data = .{ .imm = @bitCast(u32, -stack_offset) }, + }); + } + }, } }, .memory, @@ -5558,7 +5664,7 @@ fn genInlineMemcpy( null; defer if (dsbase_lock) |lock| self.register_manager.unlockReg(lock); - const dst_addr_reg = try self.register_manager.allocReg(null); + const dst_addr_reg = try self.register_manager.allocReg(null, gp); switch (dst_ptr) { .memory, .got_load, @@ -5569,20 +5675,20 @@ fn genInlineMemcpy( .ptr_stack_offset, .stack_offset => |off| { _ = try self.addInst(.{ .tag = .lea, - .ops = (Mir.Ops{ + .ops = Mir.Inst.Ops.encode(.{ .reg1 = dst_addr_reg.to64(), .reg2 = opts.dest_stack_base orelse .rbp, - }).encode(), + }), .data = .{ .imm = @bitCast(u32, -off) }, }); }, .register => |reg| { _ = try self.addInst(.{ .tag = .mov, - .ops = (Mir.Ops{ + .ops = Mir.Inst.Ops.encode(.{ .reg1 = registerAlias(dst_addr_reg, @divExact(reg.size(), 8)), .reg2 = reg, - }).encode(), + }), .data = undefined, }); }, @@ -5593,7 +5699,7 @@ fn genInlineMemcpy( const dst_addr_reg_lock = self.register_manager.lockRegAssumeUnused(dst_addr_reg); defer self.register_manager.unlockReg(dst_addr_reg_lock); - const src_addr_reg = try self.register_manager.allocReg(null); + const src_addr_reg = try self.register_manager.allocReg(null, gp); switch (src_ptr) { .memory, .got_load, @@ -5604,20 +5710,20 @@ fn genInlineMemcpy( .ptr_stack_offset, .stack_offset => |off| { _ = try self.addInst(.{ .tag = .lea, - .ops = (Mir.Ops{ + .ops = Mir.Inst.Ops.encode(.{ .reg1 = src_addr_reg.to64(), .reg2 = opts.source_stack_base orelse .rbp, - }).encode(), + }), .data = .{ .imm = @bitCast(u32, -off) }, }); }, .register => |reg| { _ = try self.addInst(.{ .tag = .mov, - .ops = (Mir.Ops{ + .ops = Mir.Inst.Ops.encode(.{ .reg1 = registerAlias(src_addr_reg, @divExact(reg.size(), 8)), .reg2 = reg, - }).encode(), + }), .data = undefined, }); }, @@ -5628,7 +5734,7 @@ fn genInlineMemcpy( const src_addr_reg_lock = self.register_manager.lockRegAssumeUnused(src_addr_reg); defer self.register_manager.unlockReg(src_addr_reg_lock); - const regs = try self.register_manager.allocRegs(2, .{ null, null }); + const regs = try self.register_manager.allocRegs(2, .{ null, null }, gp); const count_reg = regs[0].to64(); const tmp_reg = regs[1].to8(); @@ -5637,18 +5743,14 @@ fn genInlineMemcpy( // mov rcx, 0 _ = try self.addInst(.{ .tag = .mov, - .ops = (Mir.Ops{ - .reg1 = .rcx, - }).encode(), + .ops = Mir.Inst.Ops.encode(.{ .reg1 = .rcx }), .data = .{ .imm = 0 }, }); // mov rax, 0 _ = try self.addInst(.{ .tag = .mov, - .ops = (Mir.Ops{ - .reg1 = .rax, - }).encode(), + .ops = Mir.Inst.Ops.encode(.{ .reg1 = .rax }), .data = .{ .imm = 0 }, }); @@ -5656,70 +5758,62 @@ fn genInlineMemcpy( // cmp count, 0 const loop_start = try self.addInst(.{ .tag = .cmp, - .ops = (Mir.Ops{ - .reg1 = count_reg, - }).encode(), + .ops = Mir.Inst.Ops.encode(.{ .reg1 = count_reg }), .data = .{ .imm = 0 }, }); // je end const loop_reloc = try self.addInst(.{ .tag = .cond_jmp_eq_ne, - .ops = (Mir.Ops{ .flags = 0b01 }).encode(), + .ops = Mir.Inst.Ops.encode(.{ .flags = 0b01 }), .data = .{ .inst = undefined }, }); // mov tmp, [addr + rcx] _ = try self.addInst(.{ .tag = .mov_scale_src, - .ops = (Mir.Ops{ + .ops = Mir.Inst.Ops.encode(.{ .reg1 = tmp_reg.to8(), .reg2 = src_addr_reg, - }).encode(), + }), .data = .{ .imm = 0 }, }); // mov [stack_offset + rax], tmp _ = try self.addInst(.{ .tag = .mov_scale_dst, - .ops = (Mir.Ops{ + .ops = Mir.Inst.Ops.encode(.{ .reg1 = dst_addr_reg, .reg2 = tmp_reg.to8(), - }).encode(), + }), .data = .{ .imm = 0 }, }); // add rcx, 1 _ = try self.addInst(.{ .tag = .add, - .ops = (Mir.Ops{ - .reg1 = .rcx, - }).encode(), + .ops = Mir.Inst.Ops.encode(.{ .reg1 = .rcx }), .data = .{ .imm = 1 }, }); // add rax, 1 _ = try self.addInst(.{ .tag = .add, - .ops = (Mir.Ops{ - .reg1 = .rax, - }).encode(), + .ops = Mir.Inst.Ops.encode(.{ .reg1 = .rax }), .data = .{ .imm = 1 }, }); // sub count, 1 _ = try self.addInst(.{ .tag = .sub, - .ops = (Mir.Ops{ - .reg1 = count_reg, - }).encode(), + .ops = Mir.Inst.Ops.encode(.{ .reg1 = count_reg }), .data = .{ .imm = 1 }, }); // jmp loop _ = try self.addInst(.{ .tag = .jmp, - .ops = (Mir.Ops{ .flags = 0b00 }).encode(), + .ops = Mir.Inst.Ops.encode(.{}), .data = .{ .inst = loop_start }, }); @@ -5740,7 +5834,7 @@ fn genInlineMemset( const rax_lock = self.register_manager.lockRegAssumeUnused(.rax); defer self.register_manager.unlockReg(rax_lock); - const addr_reg = try self.register_manager.allocReg(null); + const addr_reg = try self.register_manager.allocReg(null, gp); switch (dst_ptr) { .memory, .got_load, @@ -5751,20 +5845,20 @@ fn genInlineMemset( .ptr_stack_offset, .stack_offset => |off| { _ = try self.addInst(.{ .tag = .lea, - .ops = (Mir.Ops{ + .ops = Mir.Inst.Ops.encode(.{ .reg1 = addr_reg.to64(), .reg2 = opts.dest_stack_base orelse .rbp, - }).encode(), + }), .data = .{ .imm = @bitCast(u32, -off) }, }); }, .register => |reg| { _ = try self.addInst(.{ .tag = .mov, - .ops = (Mir.Ops{ + .ops = Mir.Inst.Ops.encode(.{ .reg1 = registerAlias(addr_reg, @divExact(reg.size(), 8)), .reg2 = reg, - }).encode(), + }), .data = undefined, }); }, @@ -5782,16 +5876,14 @@ fn genInlineMemset( // cmp rax, -1 const loop_start = try self.addInst(.{ .tag = .cmp, - .ops = (Mir.Ops{ - .reg1 = .rax, - }).encode(), + .ops = Mir.Inst.Ops.encode(.{ .reg1 = .rax }), .data = .{ .imm = @bitCast(u32, @as(i32, -1)) }, }); // je end const loop_reloc = try self.addInst(.{ .tag = .cond_jmp_eq_ne, - .ops = (Mir.Ops{ .flags = 0b01 }).encode(), + .ops = Mir.Inst.Ops.encode(.{ .flags = 0b01 }), .data = .{ .inst = undefined }, }); @@ -5807,9 +5899,7 @@ fn genInlineMemset( }); _ = try self.addInst(.{ .tag = .mov_mem_index_imm, - .ops = (Mir.Ops{ - .reg1 = addr_reg, - }).encode(), + .ops = Mir.Inst.Ops.encode(.{ .reg1 = addr_reg }), .data = .{ .payload = payload }, }); }, @@ -5819,16 +5909,14 @@ fn genInlineMemset( // sub rax, 1 _ = try self.addInst(.{ .tag = .sub, - .ops = (Mir.Ops{ - .reg1 = .rax, - }).encode(), + .ops = Mir.Inst.Ops.encode(.{ .reg1 = .rax }), .data = .{ .imm = 1 }, }); // jmp loop _ = try self.addInst(.{ .tag = .jmp, - .ops = (Mir.Ops{ .flags = 0b00 }).encode(), + .ops = Mir.Inst.Ops.encode(.{}), .data = .{ .inst = loop_start }, }); @@ -5849,10 +5937,10 @@ fn genSetReg(self: *Self, ty: Type, reg: Register, mcv: MCValue) InnerError!void } _ = try self.addInst(.{ .tag = .lea, - .ops = (Mir.Ops{ + .ops = Mir.Inst.Ops.encode(.{ .reg1 = registerAlias(reg, abi_size), .reg2 = .rbp, - }).encode(), + }), .data = .{ .imm = @bitCast(u32, -off) }, }); }, @@ -5889,10 +5977,10 @@ fn genSetReg(self: *Self, ty: Type, reg: Register, mcv: MCValue) InnerError!void }; _ = try self.addInst(.{ .tag = tag, - .ops = (Mir.Ops{ + .ops = Mir.Inst.Ops.encode(.{ .reg1 = reg.to8(), .flags = flags, - }).encode(), + }), .data = undefined, }); }, @@ -5902,10 +5990,10 @@ fn genSetReg(self: *Self, ty: Type, reg: Register, mcv: MCValue) InnerError!void if (x == 0) { _ = try self.addInst(.{ .tag = .xor, - .ops = (Mir.Ops{ + .ops = Mir.Inst.Ops.encode(.{ .reg1 = reg.to32(), .reg2 = reg.to32(), - }).encode(), + }), .data = undefined, }); return; @@ -5914,9 +6002,7 @@ fn genSetReg(self: *Self, ty: Type, reg: Register, mcv: MCValue) InnerError!void // Next best case: if we set the lower four bytes, the upper four will be zeroed. _ = try self.addInst(.{ .tag = .mov, - .ops = (Mir.Ops{ - .reg1 = registerAlias(reg, abi_size), - }).encode(), + .ops = Mir.Inst.Ops.encode(.{ .reg1 = registerAlias(reg, abi_size) }), .data = .{ .imm = @truncate(u32, x) }, }); return; @@ -5931,9 +6017,7 @@ fn genSetReg(self: *Self, ty: Type, reg: Register, mcv: MCValue) InnerError!void const payload = try self.addExtra(Mir.Imm64.encode(x)); _ = try self.addInst(.{ .tag = .movabs, - .ops = (Mir.Ops{ - .reg1 = reg.to64(), - }).encode(), + .ops = Mir.Inst.Ops.encode(.{ .reg1 = reg.to64() }), .data = .{ .payload = payload }, }); }, @@ -5942,150 +6026,290 @@ fn genSetReg(self: *Self, ty: Type, reg: Register, mcv: MCValue) InnerError!void if (src_reg.id() == reg.id()) return; - if (ty.zigTypeTag() == .Int) blk: { - switch (ty.intInfo(self.target.*).signedness) { + switch (ty.zigTypeTag()) { + .Int => switch (ty.intInfo(self.target.*).signedness) { .signed => { - if (abi_size > 4) break :blk; - _ = try self.addInst(.{ - .tag = .mov_sign_extend, - .ops = (Mir.Ops{ - .reg1 = reg.to64(), - .reg2 = registerAlias(src_reg, abi_size), - }).encode(), - .data = undefined, - }); + if (abi_size <= 4) { + _ = try self.addInst(.{ + .tag = .mov_sign_extend, + .ops = Mir.Inst.Ops.encode(.{ + .reg1 = reg.to64(), + .reg2 = registerAlias(src_reg, abi_size), + }), + .data = undefined, + }); + return; + } }, .unsigned => { - if (abi_size > 2) break :blk; + if (abi_size <= 2) { + _ = try self.addInst(.{ + .tag = .mov_zero_extend, + .ops = Mir.Inst.Ops.encode(.{ + .reg1 = reg.to64(), + .reg2 = registerAlias(src_reg, abi_size), + }), + .data = undefined, + }); + return; + } + }, + }, + .Float => { + if (intrinsicsAllowed(self.target.*, ty)) { + const tag: Mir.Inst.Tag = switch (ty.tag()) { + .f32 => if (hasAvxSupport(self.target.*)) + Mir.Inst.Tag.mov_f32_avx + else + Mir.Inst.Tag.mov_f32_sse, + .f64 => if (hasAvxSupport(self.target.*)) + Mir.Inst.Tag.mov_f64_avx + else + Mir.Inst.Tag.mov_f64_sse, + else => return self.fail("TODO genSetReg from register for {}", .{ty.fmtDebug()}), + }; _ = try self.addInst(.{ - .tag = .mov_zero_extend, - .ops = (Mir.Ops{ - .reg1 = reg.to64(), - .reg2 = registerAlias(src_reg, abi_size), - }).encode(), + .tag = tag, + .ops = Mir.Inst.Ops.encode(.{ + .reg1 = reg.to128(), + .reg2 = src_reg.to128(), + .flags = 0b10, + }), .data = undefined, }); - }, - } - return; + return; + } + + return self.fail("TODO genSetReg from register for float with no intrinsics", .{}); + }, + else => {}, } _ = try self.addInst(.{ .tag = .mov, - .ops = (Mir.Ops{ + .ops = Mir.Inst.Ops.encode(.{ .reg1 = registerAlias(reg, abi_size), .reg2 = registerAlias(src_reg, abi_size), - }).encode(), + }), .data = undefined, }); }, .direct_load, .got_load, => { - try self.loadMemPtrIntoRegister(reg, Type.usize, mcv); - _ = try self.addInst(.{ - .tag = .mov, - .ops = (Mir.Ops{ - .reg1 = registerAlias(reg, abi_size), - .reg2 = reg.to64(), - .flags = 0b01, - }).encode(), - .data = .{ .imm = 0 }, - }); - }, - .memory => |x| { - if (x <= math.maxInt(i32)) { - // mov reg, [ds:imm32] - _ = try self.addInst(.{ - .tag = .mov, - .ops = (Mir.Ops{ - .reg1 = registerAlias(reg, abi_size), - .flags = 0b01, - }).encode(), - .data = .{ .imm = @truncate(u32, x) }, - }); - } else { - // If this is RAX, we can use a direct load. - // Otherwise, we need to load the address, then indirectly load the value. - if (reg.id() == 0) { - // movabs rax, ds:moffs64 - const payload = try self.addExtra(Mir.Imm64.encode(x)); - _ = try self.addInst(.{ - .tag = .movabs, - .ops = (Mir.Ops{ - .reg1 = .rax, - .flags = 0b01, // imm64 will become moffs64 - }).encode(), - .data = .{ .payload = payload }, - }); - } else { - // Rather than duplicate the logic used for the move, we just use a self-call with a new MCValue. - try self.genSetReg(ty, reg, MCValue{ .immediate = x }); + switch (ty.zigTypeTag()) { + .Float => { + const base_reg = try self.register_manager.allocReg(null, gp); + try self.loadMemPtrIntoRegister(base_reg, Type.usize, mcv); - // mov reg, [reg + 0x0] + if (intrinsicsAllowed(self.target.*, ty)) { + const tag: Mir.Inst.Tag = switch (ty.tag()) { + .f32 => if (hasAvxSupport(self.target.*)) + Mir.Inst.Tag.mov_f32_avx + else + Mir.Inst.Tag.mov_f32_sse, + .f64 => if (hasAvxSupport(self.target.*)) + Mir.Inst.Tag.mov_f64_avx + else + Mir.Inst.Tag.mov_f64_sse, + else => return self.fail("TODO genSetReg from memory for {}", .{ty.fmtDebug()}), + }; + + _ = try self.addInst(.{ + .tag = tag, + .ops = Mir.Inst.Ops.encode(.{ + .reg1 = reg.to128(), + .reg2 = switch (ty.tag()) { + .f32 => base_reg.to32(), + .f64 => base_reg.to64(), + else => unreachable, + }, + }), + .data = .{ .imm = 0 }, + }); + return; + } + + return self.fail("TODO genSetReg from memory for float with no intrinsics", .{}); + }, + else => { + try self.loadMemPtrIntoRegister(reg, Type.usize, mcv); _ = try self.addInst(.{ .tag = .mov, - .ops = (Mir.Ops{ + .ops = Mir.Inst.Ops.encode(.{ .reg1 = registerAlias(reg, abi_size), .reg2 = reg.to64(), .flags = 0b01, - }).encode(), + }), .data = .{ .imm = 0 }, }); - } + }, } }, + .memory => |x| switch (ty.zigTypeTag()) { + .Float => { + const base_reg = try self.register_manager.allocReg(null, gp); + try self.loadMemPtrIntoRegister(base_reg, Type.usize, mcv); + + if (intrinsicsAllowed(self.target.*, ty)) { + const tag: Mir.Inst.Tag = switch (ty.tag()) { + .f32 => if (hasAvxSupport(self.target.*)) + Mir.Inst.Tag.mov_f32_avx + else + Mir.Inst.Tag.mov_f32_sse, + .f64 => if (hasAvxSupport(self.target.*)) + Mir.Inst.Tag.mov_f64_avx + else + Mir.Inst.Tag.mov_f64_sse, + else => return self.fail("TODO genSetReg from memory for {}", .{ty.fmtDebug()}), + }; + + _ = try self.addInst(.{ + .tag = tag, + .ops = Mir.Inst.Ops.encode(.{ + .reg1 = reg.to128(), + .reg2 = switch (ty.tag()) { + .f32 => base_reg.to32(), + .f64 => base_reg.to64(), + else => unreachable, + }, + }), + .data = .{ .imm = 0 }, + }); + return; + } + + return self.fail("TODO genSetReg from memory for float with no intrinsics", .{}); + }, + else => { + if (x <= math.maxInt(i32)) { + // mov reg, [ds:imm32] + _ = try self.addInst(.{ + .tag = .mov, + .ops = Mir.Inst.Ops.encode(.{ + .reg1 = registerAlias(reg, abi_size), + .flags = 0b01, + }), + .data = .{ .imm = @truncate(u32, x) }, + }); + } else { + // If this is RAX, we can use a direct load. + // Otherwise, we need to load the address, then indirectly load the value. + if (reg.id() == 0) { + // movabs rax, ds:moffs64 + const payload = try self.addExtra(Mir.Imm64.encode(x)); + _ = try self.addInst(.{ + .tag = .movabs, + .ops = Mir.Inst.Ops.encode(.{ + .reg1 = .rax, + .flags = 0b01, // imm64 will become moffs64 + }), + .data = .{ .payload = payload }, + }); + } else { + // Rather than duplicate the logic used for the move, we just use a self-call with a new MCValue. + try self.genSetReg(ty, reg, MCValue{ .immediate = x }); + + // mov reg, [reg + 0x0] + _ = try self.addInst(.{ + .tag = .mov, + .ops = Mir.Inst.Ops.encode(.{ + .reg1 = registerAlias(reg, abi_size), + .reg2 = reg.to64(), + .flags = 0b01, + }), + .data = .{ .imm = 0 }, + }); + } + } + }, + }, .stack_offset => |off| { if (off < std.math.minInt(i32) or off > std.math.maxInt(i32)) { return self.fail("stack offset too large", .{}); } - if (ty.zigTypeTag() == .Int) blk: { - switch (ty.intInfo(self.target.*).signedness) { + switch (ty.zigTypeTag()) { + .Int => switch (ty.intInfo(self.target.*).signedness) { .signed => { - const flags: u2 = switch (abi_size) { - 1 => 0b01, - 2 => 0b10, - 4 => 0b11, - else => break :blk, - }; - _ = try self.addInst(.{ - .tag = .mov_sign_extend, - .ops = (Mir.Ops{ - .reg1 = reg.to64(), - .reg2 = .rbp, - .flags = flags, - }).encode(), - .data = .{ .imm = @bitCast(u32, -off) }, - }); + if (abi_size <= 4) { + const flags: u2 = switch (abi_size) { + 1 => 0b01, + 2 => 0b10, + 4 => 0b11, + else => unreachable, + }; + _ = try self.addInst(.{ + .tag = .mov_sign_extend, + .ops = Mir.Inst.Ops.encode(.{ + .reg1 = reg.to64(), + .reg2 = .rbp, + .flags = flags, + }), + .data = .{ .imm = @bitCast(u32, -off) }, + }); + return; + } }, .unsigned => { - const flags: u2 = switch (abi_size) { - 1 => 0b01, - 2 => 0b10, - else => break :blk, + if (abi_size <= 2) { + const flags: u2 = switch (abi_size) { + 1 => 0b01, + 2 => 0b10, + else => unreachable, + }; + _ = try self.addInst(.{ + .tag = .mov_zero_extend, + .ops = Mir.Inst.Ops.encode(.{ + .reg1 = reg.to64(), + .reg2 = .rbp, + .flags = flags, + }), + .data = .{ .imm = @bitCast(u32, -off) }, + }); + return; + } + }, + }, + .Float => { + if (intrinsicsAllowed(self.target.*, ty)) { + const tag: Mir.Inst.Tag = switch (ty.tag()) { + .f32 => if (hasAvxSupport(self.target.*)) + Mir.Inst.Tag.mov_f32_avx + else + Mir.Inst.Tag.mov_f32_sse, + .f64 => if (hasAvxSupport(self.target.*)) + Mir.Inst.Tag.mov_f64_avx + else + Mir.Inst.Tag.mov_f64_sse, + else => return self.fail("TODO genSetReg from stack offset for {}", .{ty.fmtDebug()}), }; _ = try self.addInst(.{ - .tag = .mov_zero_extend, - .ops = (Mir.Ops{ - .reg1 = reg.to64(), - .reg2 = .rbp, - .flags = flags, - }).encode(), + .tag = tag, + .ops = Mir.Inst.Ops.encode(.{ + .reg1 = reg.to128(), + .reg2 = switch (ty.tag()) { + .f32 => .ebp, + .f64 => .rbp, + else => unreachable, + }, + }), .data = .{ .imm = @bitCast(u32, -off) }, }); - }, - } - return; + return; + } + return self.fail("TODO genSetReg from stack offset for float with no intrinsics", .{}); + }, + else => {}, } _ = try self.addInst(.{ .tag = .mov, - .ops = (Mir.Ops{ + .ops = Mir.Inst.Ops.encode(.{ .reg1 = registerAlias(reg, abi_size), .reg2 = .rbp, .flags = 0b01, - }).encode(), + }), .data = .{ .imm = @bitCast(u32, -off) }, }); }, @@ -6152,14 +6376,14 @@ fn airFloatToInt(self: *Self, inst: Air.Inst.Index) !void { }; _ = try self.addInst(.{ .tag = .fld, - .ops = (Mir.Ops{ + .ops = Mir.Inst.Ops.encode(.{ + .reg1 = .rbp, .flags = switch (src_ty.abiSize(self.target.*)) { 4 => 0b01, 8 => 0b10, else => |size| return self.fail("TODO load ST(0) with abiSize={}", .{size}), }, - .reg1 = .rbp, - }).encode(), + }), .data = .{ .imm = @bitCast(u32, -stack_offset) }, }); @@ -6167,15 +6391,15 @@ fn airFloatToInt(self: *Self, inst: Air.Inst.Index) !void { const stack_dst = try self.allocRegOrMem(inst, false); _ = try self.addInst(.{ .tag = .fisttp, - .ops = (Mir.Ops{ + .ops = Mir.Inst.Ops.encode(.{ + .reg1 = .rbp, .flags = switch (dst_ty.abiSize(self.target.*)) { 1...2 => 0b00, 3...4 => 0b01, 5...8 => 0b10, else => |size| return self.fail("TODO convert float with abiSize={}", .{size}), }, - .reg1 = .rbp, - }).encode(), + }), .data = .{ .imm = @bitCast(u32, -stack_dst.stack_offset) }, }); @@ -6267,15 +6491,15 @@ fn airMemcpy(self: *Self, inst: Air.Inst.Index) !void { const src: MCValue = blk: { switch (src_ptr) { .got_load, .direct_load, .memory => { - const reg = try self.register_manager.allocReg(null); + const reg = try self.register_manager.allocReg(null, gp); try self.loadMemPtrIntoRegister(reg, src_ty, src_ptr); _ = try self.addInst(.{ .tag = .mov, - .ops = (Mir.Ops{ + .ops = Mir.Inst.Ops.encode(.{ .reg1 = reg, .reg2 = reg, .flags = 0b01, - }).encode(), + }), .data = .{ .imm = 0 }, }); break :blk MCValue{ .register = reg }; @@ -6839,9 +7063,11 @@ fn registerAlias(reg: Register, size_bytes: u32) Register { return reg.to32(); } else if (size_bytes <= 8) { return reg.to64(); - } else { - unreachable; // TODO handle floating-point registers - } + } else if (size_bytes <= 16) { + return reg.to128(); + } else if (size_bytes <= 32) { + return reg.to256(); + } else unreachable; } /// Truncates the value in the register in place. @@ -6867,3 +7093,16 @@ fn truncateRegister(self: *Self, ty: Type, reg: Register) !void { }, } } + +fn intrinsicsAllowed(target: Target, ty: Type) bool { + return switch (ty.tag()) { + .f32, + .f64, + => Target.x86.featureSetHasAny(target.cpu.features, .{ .sse2, .avx, .avx2 }), + else => unreachable, // TODO finish this off + }; +} + +fn hasAvxSupport(target: Target) bool { + return Target.x86.featureSetHasAny(target.cpu.features, .{ .avx, .avx2 }); +} diff --git a/src/arch/x86_64/Emit.zig b/src/arch/x86_64/Emit.zig index 57100abc0f..84955a8aac 100644 --- a/src/arch/x86_64/Emit.zig +++ b/src/arch/x86_64/Emit.zig @@ -25,8 +25,8 @@ const MCValue = @import("CodeGen.zig").MCValue; const Mir = @import("Mir.zig"); const Module = @import("../../Module.zig"); const Instruction = bits.Instruction; -const Register = bits.Register; const Type = @import("../../type.zig").Type; +const Register = bits.Register; mir: Mir, bin_file: *link.File, @@ -67,6 +67,7 @@ pub fn lowerMir(emit: *Emit) InnerError!void { const inst = @intCast(u32, index); try emit.code_offset_mapping.putNoClobber(emit.bin_file.allocator, inst, emit.code.items.len); switch (tag) { + // GPR instructions .adc => try emit.mirArith(.adc, inst), .add => try emit.mirArith(.add, inst), .sub => try emit.mirArith(.sub, inst), @@ -181,6 +182,27 @@ pub fn lowerMir(emit: *Emit) InnerError!void { .interrupt => try emit.mirInterrupt(inst), .nop => try emit.mirNop(), + // SSE instructions + .mov_f64_sse => try emit.mirMovFloatSse(.movsd, inst), + .mov_f32_sse => try emit.mirMovFloatSse(.movss, inst), + + .add_f64_sse => try emit.mirAddFloatSse(.addsd, inst), + .add_f32_sse => try emit.mirAddFloatSse(.addss, inst), + + .cmp_f64_sse => try emit.mirCmpFloatSse(.ucomisd, inst), + .cmp_f32_sse => try emit.mirCmpFloatSse(.ucomiss, inst), + + // AVX instructions + .mov_f64_avx => try emit.mirMovFloatAvx(.vmovsd, inst), + .mov_f32_avx => try emit.mirMovFloatAvx(.vmovss, inst), + + .add_f64_avx => try emit.mirAddFloatAvx(.vaddsd, inst), + .add_f32_avx => try emit.mirAddFloatAvx(.vaddss, inst), + + .cmp_f64_avx => try emit.mirCmpFloatAvx(.vucomisd, inst), + .cmp_f32_avx => try emit.mirCmpFloatAvx(.vucomiss, inst), + + // Pseudo-instructions .call_extern => try emit.mirCallExtern(inst), .dbg_line => try emit.mirDbgLine(inst), @@ -228,7 +250,7 @@ fn fixupRelocs(emit: *Emit) InnerError!void { fn mirInterrupt(emit: *Emit, inst: Mir.Inst.Index) InnerError!void { const tag = emit.mir.instructions.items(.tag)[inst]; assert(tag == .interrupt); - const ops = Mir.Ops.decode(emit.mir.instructions.items(.ops)[inst]); + const ops = emit.mir.instructions.items(.ops)[inst].decode(); switch (ops.flags) { 0b00 => return lowerToZoEnc(.int3, emit.code), else => return emit.fail("TODO handle variant 0b{b} of interrupt instruction", .{ops.flags}), @@ -244,7 +266,7 @@ fn mirSyscall(emit: *Emit) InnerError!void { } fn mirPushPop(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!void { - const ops = Mir.Ops.decode(emit.mir.instructions.items(.ops)[inst]); + const ops = emit.mir.instructions.items(.ops)[inst].decode(); switch (ops.flags) { 0b00 => { // PUSH/POP reg @@ -271,8 +293,9 @@ fn mirPushPop(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!void { 0b11 => unreachable, } } + fn mirPushPopRegsFromCalleePreservedRegs(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!void { - const ops = Mir.Ops.decode(emit.mir.instructions.items(.ops)[inst]); + const ops = emit.mir.instructions.items(.ops)[inst].decode(); const payload = emit.mir.instructions.items(.data)[inst].payload; const data = emit.mir.extraData(Mir.RegsToPushOrPop, payload).data; const regs = data.regs; @@ -295,7 +318,7 @@ fn mirPushPopRegsFromCalleePreservedRegs(emit: *Emit, tag: Tag, inst: Mir.Inst.I } fn mirJmpCall(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!void { - const ops = Mir.Ops.decode(emit.mir.instructions.items(.ops)[inst]); + const ops = emit.mir.instructions.items(.ops)[inst].decode(); switch (ops.flags) { 0b00 => { const target = emit.mir.instructions.items(.data)[inst].inst; @@ -324,7 +347,7 @@ fn mirJmpCall(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!void { 0b10 => { // JMP/CALL r/m64 const imm = emit.mir.instructions.items(.data)[inst].imm; - return lowerToMEnc(tag, RegisterOrMemory.mem(Memory.PtrSize.fromBits(ops.reg1.size()), .{ + return lowerToMEnc(tag, RegisterOrMemory.mem(Memory.PtrSize.new(ops.reg1.size()), .{ .disp = imm, .base = ops.reg1, }), emit.code); @@ -334,7 +357,7 @@ fn mirJmpCall(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!void { } fn mirCondJmp(emit: *Emit, mir_tag: Mir.Inst.Tag, inst: Mir.Inst.Index) InnerError!void { - const ops = Mir.Ops.decode(emit.mir.instructions.items(.ops)[inst]); + const ops = emit.mir.instructions.items(.ops)[inst].decode(); const target = emit.mir.instructions.items(.data)[inst].inst; const tag = switch (mir_tag) { .cond_jmp_greater_less => switch (ops.flags) { @@ -366,7 +389,7 @@ fn mirCondJmp(emit: *Emit, mir_tag: Mir.Inst.Tag, inst: Mir.Inst.Index) InnerErr } fn mirCondSetByte(emit: *Emit, mir_tag: Mir.Inst.Tag, inst: Mir.Inst.Index) InnerError!void { - const ops = Mir.Ops.decode(emit.mir.instructions.items(.ops)[inst]); + const ops = emit.mir.instructions.items(.ops)[inst].decode(); const tag = switch (mir_tag) { .cond_set_byte_greater_less => switch (ops.flags) { 0b00 => Tag.setge, @@ -396,7 +419,7 @@ fn mirCondSetByte(emit: *Emit, mir_tag: Mir.Inst.Tag, inst: Mir.Inst.Index) Inne } fn mirCondMov(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!void { - const ops = Mir.Ops.decode(emit.mir.instructions.items(.ops)[inst]); + const ops = emit.mir.instructions.items(.ops)[inst].decode(); if (ops.flags == 0b00) { return lowerToRmEnc(tag, ops.reg1, RegisterOrMemory.reg(ops.reg2), emit.code); } @@ -416,7 +439,7 @@ fn mirCondMov(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!void { fn mirTest(emit: *Emit, inst: Mir.Inst.Index) InnerError!void { const tag = emit.mir.instructions.items(.tag)[inst]; assert(tag == .@"test"); - const ops = Mir.Ops.decode(emit.mir.instructions.items(.ops)[inst]); + const ops = emit.mir.instructions.items(.ops)[inst].decode(); switch (ops.flags) { 0b00 => { if (ops.reg2 == .none) { @@ -440,7 +463,7 @@ fn mirTest(emit: *Emit, inst: Mir.Inst.Index) InnerError!void { fn mirRet(emit: *Emit, inst: Mir.Inst.Index) InnerError!void { const tag = emit.mir.instructions.items(.tag)[inst]; assert(tag == .ret); - const ops = Mir.Ops.decode(emit.mir.instructions.items(.ops)[inst]); + const ops = emit.mir.instructions.items(.ops)[inst].decode(); switch (ops.flags) { 0b00 => { // RETF imm16 @@ -464,7 +487,7 @@ fn mirRet(emit: *Emit, inst: Mir.Inst.Index) InnerError!void { } fn mirArith(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!void { - const ops = Mir.Ops.decode(emit.mir.instructions.items(.ops)[inst]); + const ops = emit.mir.instructions.items(.ops)[inst].decode(); switch (ops.flags) { 0b00 => { if (ops.reg2 == .none) { @@ -481,8 +504,8 @@ fn mirArith(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!void { // mov reg1, [reg2 + imm32] // RM const imm = emit.mir.instructions.items(.data)[inst].imm; - const src_reg: ?Register = if (ops.reg2 == .none) null else ops.reg2; - return lowerToRmEnc(tag, ops.reg1, RegisterOrMemory.mem(Memory.PtrSize.fromBits(ops.reg1.size()), .{ + const src_reg: ?Register = if (ops.reg2 != .none) ops.reg2 else null; + return lowerToRmEnc(tag, ops.reg1, RegisterOrMemory.mem(Memory.PtrSize.new(ops.reg1.size()), .{ .disp = imm, .base = src_reg, }), emit.code); @@ -494,7 +517,7 @@ fn mirArith(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!void { // mov [reg1 + imm32], reg2 // MR const imm = emit.mir.instructions.items(.data)[inst].imm; - return lowerToMrEnc(tag, RegisterOrMemory.mem(Memory.PtrSize.fromBits(ops.reg2.size()), .{ + return lowerToMrEnc(tag, RegisterOrMemory.mem(Memory.PtrSize.new(ops.reg2.size()), .{ .disp = imm, .base = ops.reg1, }), ops.reg2, emit.code); @@ -506,7 +529,7 @@ fn mirArith(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!void { } fn mirArithMemImm(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!void { - const ops = Mir.Ops.decode(emit.mir.instructions.items(.ops)[inst]); + const ops = emit.mir.instructions.items(.ops)[inst].decode(); assert(ops.reg2 == .none); const payload = emit.mir.instructions.items(.data)[inst].payload; const imm_pair = emit.mir.extraData(Mir.ImmPair, payload).data; @@ -523,14 +546,15 @@ fn mirArithMemImm(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!void { } inline fn setRexWRegister(reg: Register) bool { + if (reg.size() > 64) return false; if (reg.size() == 64) return true; return switch (reg) { - .ah, .bh, .ch, .dh => true, + .ah, .ch, .dh, .bh => true, else => false, }; } -inline fn immOpSize(u_imm: u32) u8 { +inline fn immOpSize(u_imm: u32) u6 { const imm = @bitCast(i32, u_imm); if (math.minInt(i8) <= imm and imm <= math.maxInt(i8)) { return 8; @@ -542,7 +566,7 @@ inline fn immOpSize(u_imm: u32) u8 { } fn mirArithScaleSrc(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!void { - const ops = Mir.Ops.decode(emit.mir.instructions.items(.ops)[inst]); + const ops = emit.mir.instructions.items(.ops)[inst].decode(); const scale = ops.flags; const imm = emit.mir.instructions.items(.data)[inst].imm; // OP reg1, [reg2 + scale*rcx + imm32] @@ -550,7 +574,7 @@ fn mirArithScaleSrc(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!void .scale = scale, .index = .rcx, }; - return lowerToRmEnc(tag, ops.reg1, RegisterOrMemory.mem(Memory.PtrSize.fromBits(ops.reg1.size()), .{ + return lowerToRmEnc(tag, ops.reg1, RegisterOrMemory.mem(Memory.PtrSize.new(ops.reg1.size()), .{ .disp = imm, .base = ops.reg2, .scale_index = scale_index, @@ -558,7 +582,7 @@ fn mirArithScaleSrc(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!void } fn mirArithScaleDst(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!void { - const ops = Mir.Ops.decode(emit.mir.instructions.items(.ops)[inst]); + const ops = emit.mir.instructions.items(.ops)[inst].decode(); const scale = ops.flags; const imm = emit.mir.instructions.items(.data)[inst].imm; const scale_index = ScaleIndex{ @@ -574,7 +598,7 @@ fn mirArithScaleDst(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!void }), imm, emit.code); } // OP [reg1 + scale*rax + imm32], reg2 - return lowerToMrEnc(tag, RegisterOrMemory.mem(Memory.PtrSize.fromBits(ops.reg2.size()), .{ + return lowerToMrEnc(tag, RegisterOrMemory.mem(Memory.PtrSize.new(ops.reg2.size()), .{ .disp = imm, .base = ops.reg1, .scale_index = scale_index, @@ -582,7 +606,7 @@ fn mirArithScaleDst(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!void } fn mirArithScaleImm(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!void { - const ops = Mir.Ops.decode(emit.mir.instructions.items(.ops)[inst]); + const ops = emit.mir.instructions.items(.ops)[inst].decode(); const scale = ops.flags; const payload = emit.mir.instructions.items(.data)[inst].payload; const imm_pair = emit.mir.extraData(Mir.ImmPair, payload).data; @@ -599,7 +623,7 @@ fn mirArithScaleImm(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!void } fn mirArithMemIndexImm(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!void { - const ops = Mir.Ops.decode(emit.mir.instructions.items(.ops)[inst]); + const ops = emit.mir.instructions.items(.ops)[inst].decode(); assert(ops.reg2 == .none); const payload = emit.mir.instructions.items(.data)[inst].payload; const imm_pair = emit.mir.extraData(Mir.ImmPair, payload).data; @@ -624,7 +648,7 @@ fn mirArithMemIndexImm(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!v fn mirMovSignExtend(emit: *Emit, inst: Mir.Inst.Index) InnerError!void { const mir_tag = emit.mir.instructions.items(.tag)[inst]; assert(mir_tag == .mov_sign_extend); - const ops = Mir.Ops.decode(emit.mir.instructions.items(.ops)[inst]); + const ops = emit.mir.instructions.items(.ops)[inst].decode(); const imm = if (ops.flags != 0b00) emit.mir.instructions.items(.data)[inst].imm else undefined; switch (ops.flags) { 0b00 => { @@ -655,7 +679,7 @@ fn mirMovSignExtend(emit: *Emit, inst: Mir.Inst.Index) InnerError!void { fn mirMovZeroExtend(emit: *Emit, inst: Mir.Inst.Index) InnerError!void { const mir_tag = emit.mir.instructions.items(.tag)[inst]; assert(mir_tag == .mov_zero_extend); - const ops = Mir.Ops.decode(emit.mir.instructions.items(.ops)[inst]); + const ops = emit.mir.instructions.items(.ops)[inst].decode(); const imm = if (ops.flags != 0b00) emit.mir.instructions.items(.data)[inst].imm else undefined; switch (ops.flags) { 0b00 => { @@ -682,31 +706,46 @@ fn mirMovZeroExtend(emit: *Emit, inst: Mir.Inst.Index) InnerError!void { fn mirMovabs(emit: *Emit, inst: Mir.Inst.Index) InnerError!void { const tag = emit.mir.instructions.items(.tag)[inst]; assert(tag == .movabs); - const ops = Mir.Ops.decode(emit.mir.instructions.items(.ops)[inst]); - const imm: u64 = if (ops.reg1.size() == 64) blk: { - const payload = emit.mir.instructions.items(.data)[inst].payload; - const imm = emit.mir.extraData(Mir.Imm64, payload).data; - break :blk imm.decode(); - } else emit.mir.instructions.items(.data)[inst].imm; - if (ops.flags == 0b00) { - // movabs reg, imm64 - // OI - return lowerToOiEnc(.mov, ops.reg1, imm, emit.code); + const ops = emit.mir.instructions.items(.ops)[inst].decode(); + switch (ops.flags) { + 0b00 => { + const imm: u64 = if (ops.reg1.size() == 64) blk: { + const payload = emit.mir.instructions.items(.data)[inst].payload; + const imm = emit.mir.extraData(Mir.Imm64, payload).data; + break :blk imm.decode(); + } else emit.mir.instructions.items(.data)[inst].imm; + // movabs reg, imm64 + // OI + return lowerToOiEnc(.mov, ops.reg1, imm, emit.code); + }, + 0b01 => { + if (ops.reg1 == .none) { + const imm: u64 = if (ops.reg2.size() == 64) blk: { + const payload = emit.mir.instructions.items(.data)[inst].payload; + const imm = emit.mir.extraData(Mir.Imm64, payload).data; + break :blk imm.decode(); + } else emit.mir.instructions.items(.data)[inst].imm; + // movabs moffs64, rax + // TD + return lowerToTdEnc(.mov, imm, ops.reg2, emit.code); + } + const imm: u64 = if (ops.reg1.size() == 64) blk: { + const payload = emit.mir.instructions.items(.data)[inst].payload; + const imm = emit.mir.extraData(Mir.Imm64, payload).data; + break :blk imm.decode(); + } else emit.mir.instructions.items(.data)[inst].imm; + // movabs rax, moffs64 + // FD + return lowerToFdEnc(.mov, ops.reg1, imm, emit.code); + }, + else => return emit.fail("TODO unused variant: movabs 0b{b}", .{ops.flags}), } - if (ops.reg1 == .none) { - // movabs moffs64, rax - // TD - return lowerToTdEnc(.mov, imm, ops.reg2, emit.code); - } - // movabs rax, moffs64 - // FD - return lowerToFdEnc(.mov, ops.reg1, imm, emit.code); } fn mirFisttp(emit: *Emit, inst: Mir.Inst.Index) InnerError!void { const tag = emit.mir.instructions.items(.tag)[inst]; assert(tag == .fisttp); - const ops = Mir.Ops.decode(emit.mir.instructions.items(.ops)[inst]); + const ops = emit.mir.instructions.items(.ops)[inst].decode(); // the selecting between operand sizes for this particular `fisttp` instruction // is done via opcode instead of the usual prefixes. @@ -728,7 +767,7 @@ fn mirFisttp(emit: *Emit, inst: Mir.Inst.Index) InnerError!void { fn mirFld(emit: *Emit, inst: Mir.Inst.Index) InnerError!void { const tag = emit.mir.instructions.items(.tag)[inst]; assert(tag == .fld); - const ops = Mir.Ops.decode(emit.mir.instructions.items(.ops)[inst]); + const ops = emit.mir.instructions.items(.ops)[inst].decode(); // the selecting between operand sizes for this particular `fisttp` instruction // is done via opcode instead of the usual prefixes. @@ -745,8 +784,9 @@ fn mirFld(emit: *Emit, inst: Mir.Inst.Index) InnerError!void { }; return lowerToMEnc(opcode, .{ .memory = mem_or_reg }, emit.code); } + fn mirShift(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!void { - const ops = Mir.Ops.decode(emit.mir.instructions.items(.ops)[inst]); + const ops = emit.mir.instructions.items(.ops)[inst].decode(); switch (ops.flags) { 0b00 => { // sal reg1, 1 @@ -771,12 +811,11 @@ fn mirShift(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!void { } fn mirMulDiv(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!void { - const ops = Mir.Ops.decode(emit.mir.instructions.items(.ops)[inst]); + const ops = emit.mir.instructions.items(.ops)[inst].decode(); if (ops.reg1 != .none) { assert(ops.reg2 == .none); return lowerToMEnc(tag, RegisterOrMemory.reg(ops.reg1), emit.code); } - assert(ops.reg1 == .none); assert(ops.reg2 != .none); const imm = emit.mir.instructions.items(.data)[inst].imm; const ptr_size: Memory.PtrSize = switch (ops.flags) { @@ -794,14 +833,14 @@ fn mirMulDiv(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!void { fn mirIMulComplex(emit: *Emit, inst: Mir.Inst.Index) InnerError!void { const tag = emit.mir.instructions.items(.tag)[inst]; assert(tag == .imul_complex); - const ops = Mir.Ops.decode(emit.mir.instructions.items(.ops)[inst]); + const ops = emit.mir.instructions.items(.ops)[inst].decode(); switch (ops.flags) { 0b00 => { return lowerToRmEnc(.imul, ops.reg1, RegisterOrMemory.reg(ops.reg2), emit.code); }, 0b01 => { const imm = emit.mir.instructions.items(.data)[inst].imm; - const src_reg: ?Register = if (ops.reg2 == .none) null else ops.reg2; + const src_reg: ?Register = if (ops.reg2 != .none) ops.reg2 else null; return lowerToRmEnc(.imul, ops.reg1, RegisterOrMemory.mem(.qword_ptr, .{ .disp = imm, .base = src_reg, @@ -823,7 +862,7 @@ fn mirIMulComplex(emit: *Emit, inst: Mir.Inst.Index) InnerError!void { } fn mirCwd(emit: *Emit, inst: Mir.Inst.Index) InnerError!void { - const ops = Mir.Ops.decode(emit.mir.instructions.items(.ops)[inst]); + const ops = emit.mir.instructions.items(.ops)[inst].decode(); const tag: Tag = switch (ops.flags) { 0b00 => .cbw, 0b01 => .cwd, @@ -836,17 +875,17 @@ fn mirCwd(emit: *Emit, inst: Mir.Inst.Index) InnerError!void { fn mirLea(emit: *Emit, inst: Mir.Inst.Index) InnerError!void { const tag = emit.mir.instructions.items(.tag)[inst]; assert(tag == .lea); - const ops = Mir.Ops.decode(emit.mir.instructions.items(.ops)[inst]); + const ops = emit.mir.instructions.items(.ops)[inst].decode(); switch (ops.flags) { 0b00 => { // lea reg1, [reg2 + imm32] // RM const imm = emit.mir.instructions.items(.data)[inst].imm; - const src_reg: ?Register = if (ops.reg2 == .none) null else ops.reg2; + const src_reg: ?Register = if (ops.reg2 != .none) ops.reg2 else null; return lowerToRmEnc( .lea, ops.reg1, - RegisterOrMemory.mem(Memory.PtrSize.fromBits(ops.reg1.size()), .{ + RegisterOrMemory.mem(Memory.PtrSize.new(ops.reg1.size()), .{ .disp = imm, .base = src_reg, }), @@ -860,7 +899,7 @@ fn mirLea(emit: *Emit, inst: Mir.Inst.Index) InnerError!void { try lowerToRmEnc( .lea, ops.reg1, - RegisterOrMemory.rip(Memory.PtrSize.fromBits(ops.reg1.size()), 0), + RegisterOrMemory.rip(Memory.PtrSize.new(ops.reg1.size()), 0), emit.code, ); const end_offset = emit.code.items.len; @@ -873,7 +912,7 @@ fn mirLea(emit: *Emit, inst: Mir.Inst.Index) InnerError!void { 0b10 => { // lea reg, [rbp + rcx + imm32] const imm = emit.mir.instructions.items(.data)[inst].imm; - const src_reg: ?Register = if (ops.reg2 == .none) null else ops.reg2; + const src_reg: ?Register = if (ops.reg2 != .none) ops.reg2 else null; const scale_index = ScaleIndex{ .scale = 0, .index = .rcx, @@ -881,7 +920,7 @@ fn mirLea(emit: *Emit, inst: Mir.Inst.Index) InnerError!void { return lowerToRmEnc( .lea, ops.reg1, - RegisterOrMemory.mem(Memory.PtrSize.fromBits(ops.reg1.size()), .{ + RegisterOrMemory.mem(Memory.PtrSize.new(ops.reg1.size()), .{ .disp = imm, .base = src_reg, .scale_index = scale_index, @@ -896,7 +935,7 @@ fn mirLea(emit: *Emit, inst: Mir.Inst.Index) InnerError!void { fn mirLeaPie(emit: *Emit, inst: Mir.Inst.Index) InnerError!void { const tag = emit.mir.instructions.items(.tag)[inst]; assert(tag == .lea_pie); - const ops = Mir.Ops.decode(emit.mir.instructions.items(.ops)[inst]); + const ops = emit.mir.instructions.items(.ops)[inst].decode(); const load_reloc = emit.mir.instructions.items(.data)[inst].load_reloc; // lea reg1, [rip + reloc] @@ -904,7 +943,7 @@ fn mirLeaPie(emit: *Emit, inst: Mir.Inst.Index) InnerError!void { try lowerToRmEnc( .lea, ops.reg1, - RegisterOrMemory.rip(Memory.PtrSize.fromBits(ops.reg1.size()), 0), + RegisterOrMemory.rip(Memory.PtrSize.new(ops.reg1.size()), 0), emit.code, ); @@ -935,6 +974,99 @@ fn mirLeaPie(emit: *Emit, inst: Mir.Inst.Index) InnerError!void { } } +// SSE instructions + +fn mirMovFloatSse(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!void { + const ops = emit.mir.instructions.items(.ops)[inst].decode(); + switch (ops.flags) { + 0b00 => { + const imm = emit.mir.instructions.items(.data)[inst].imm; + return lowerToRmEnc(tag, ops.reg1, RegisterOrMemory.mem(Memory.PtrSize.new(ops.reg2.size()), .{ + .disp = imm, + .base = ops.reg2, + }), emit.code); + }, + 0b01 => { + const imm = emit.mir.instructions.items(.data)[inst].imm; + return lowerToMrEnc(tag, RegisterOrMemory.mem(Memory.PtrSize.new(ops.reg1.size()), .{ + .disp = imm, + .base = ops.reg1, + }), ops.reg2, emit.code); + }, + 0b10 => { + return lowerToRmEnc(tag, ops.reg1, RegisterOrMemory.reg(ops.reg2), emit.code); + }, + else => return emit.fail("TODO unused variant 0b{b} for {}", .{ ops.flags, tag }), + } +} + +fn mirAddFloatSse(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!void { + const ops = emit.mir.instructions.items(.ops)[inst].decode(); + switch (ops.flags) { + 0b00 => { + return lowerToRmEnc(tag, ops.reg1, RegisterOrMemory.reg(ops.reg2), emit.code); + }, + else => return emit.fail("TODO unused variant 0b{b} for {}", .{ ops.flags, tag }), + } +} + +fn mirCmpFloatSse(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!void { + const ops = emit.mir.instructions.items(.ops)[inst].decode(); + switch (ops.flags) { + 0b00 => { + return lowerToRmEnc(tag, ops.reg1, RegisterOrMemory.reg(ops.reg2), emit.code); + }, + else => return emit.fail("TODO unused variant 0b{b} for {}", .{ ops.flags, tag }), + } +} +// AVX instructions + +fn mirMovFloatAvx(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!void { + const ops = emit.mir.instructions.items(.ops)[inst].decode(); + switch (ops.flags) { + 0b00 => { + const imm = emit.mir.instructions.items(.data)[inst].imm; + return lowerToVmEnc(tag, ops.reg1, RegisterOrMemory.mem(Memory.PtrSize.new(ops.reg2.size()), .{ + .disp = imm, + .base = ops.reg2, + }), emit.code); + }, + 0b01 => { + const imm = emit.mir.instructions.items(.data)[inst].imm; + return lowerToMvEnc(tag, RegisterOrMemory.mem(Memory.PtrSize.new(ops.reg1.size()), .{ + .disp = imm, + .base = ops.reg1, + }), ops.reg2, emit.code); + }, + 0b10 => { + return lowerToRvmEnc(tag, ops.reg1, ops.reg1, RegisterOrMemory.reg(ops.reg2), emit.code); + }, + else => return emit.fail("TODO unused variant 0b{b} for {}", .{ ops.flags, tag }), + } +} + +fn mirAddFloatAvx(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!void { + const ops = emit.mir.instructions.items(.ops)[inst].decode(); + switch (ops.flags) { + 0b00 => { + return lowerToRvmEnc(tag, ops.reg1, ops.reg1, RegisterOrMemory.reg(ops.reg2), emit.code); + }, + else => return emit.fail("TODO unused variant 0b{b} for {}", .{ ops.flags, tag }), + } +} + +fn mirCmpFloatAvx(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!void { + const ops = emit.mir.instructions.items(.ops)[inst].decode(); + switch (ops.flags) { + 0b00 => { + return lowerToVmEnc(tag, ops.reg1, RegisterOrMemory.reg(ops.reg2), emit.code); + }, + else => return emit.fail("TODO unused variant 0b{b} for mov_f64", .{ops.flags}), + } +} + +// Pseudo-instructions + fn mirCallExtern(emit: *Emit, inst: Mir.Inst.Index) InnerError!void { const tag = emit.mir.instructions.items(.tag)[inst]; assert(tag == .call_extern); @@ -1168,6 +1300,54 @@ const Tag = enum { cmovng, cmovb, cmovnae, + movsd, + movss, + addsd, + addss, + cmpsd, + cmpss, + ucomisd, + ucomiss, + vmovsd, + vmovss, + vaddsd, + vaddss, + vcmpsd, + vcmpss, + vucomisd, + vucomiss, + + fn isSse(tag: Tag) bool { + return switch (tag) { + .movsd, + .movss, + .addsd, + .addss, + .cmpsd, + .cmpss, + .ucomisd, + .ucomiss, + => true, + + else => false, + }; + } + + fn isAvx(tag: Tag) bool { + return switch (tag) { + .vmovsd, + .vmovss, + .vaddsd, + .vaddss, + .vcmpsd, + .vcmpss, + .vucomisd, + .vucomiss, + => true, + + else => false, + }; + } fn isSetCC(tag: Tag) bool { return switch (tag) { @@ -1252,177 +1432,273 @@ const Encoding = enum { /// OP r64, r/m64, imm32 rmi, + + /// OP xmm1, xmm2/m64 + vm, + + /// OP m64, xmm1 + mv, + + /// OP xmm1, xmm2, xmm3/m64 + rvm, + + /// OP xmm1, xmm2, xmm3/m64, imm8 + rvmi, }; -const OpCode = union(enum) { - one_byte: u8, - two_byte: struct { _1: u8, _2: u8 }, +const OpCode = struct { + bytes: [3]u8, + count: usize, - fn oneByte(opc: u8) OpCode { - return .{ .one_byte = opc }; - } - - fn twoByte(opc1: u8, opc2: u8) OpCode { - return .{ .two_byte = .{ ._1 = opc1, ._2 = opc2 } }; + fn init(comptime in_bytes: []const u8) OpCode { + comptime assert(in_bytes.len <= 3); + comptime var bytes: [3]u8 = undefined; + inline for (in_bytes) |x, i| { + bytes[i] = x; + } + return .{ .bytes = bytes, .count = in_bytes.len }; } fn encode(opc: OpCode, encoder: Encoder) void { - switch (opc) { - .one_byte => |v| encoder.opcode_1byte(v), - .two_byte => |v| encoder.opcode_2byte(v._1, v._2), + switch (opc.count) { + 1 => encoder.opcode_1byte(opc.bytes[0]), + 2 => encoder.opcode_2byte(opc.bytes[0], opc.bytes[1]), + 3 => encoder.opcode_3byte(opc.bytes[0], opc.bytes[1], opc.bytes[2]), + else => unreachable, } } fn encodeWithReg(opc: OpCode, encoder: Encoder, reg: Register) void { - assert(opc == .one_byte); - encoder.opcode_withReg(opc.one_byte, reg.lowId()); + assert(opc.count == 1); + encoder.opcode_withReg(opc.bytes[0], reg.lowEnc()); } }; -inline fn getOpCode(tag: Tag, enc: Encoding, is_one_byte: bool) ?OpCode { +inline fn getOpCode(tag: Tag, enc: Encoding, is_one_byte: bool) OpCode { + // zig fmt: off switch (enc) { .zo => return switch (tag) { - .ret_near => OpCode.oneByte(0xc3), - .ret_far => OpCode.oneByte(0xcb), - .int3 => OpCode.oneByte(0xcc), - .nop => OpCode.oneByte(0x90), - .syscall => OpCode.twoByte(0x0f, 0x05), - .cbw => OpCode.oneByte(0x98), - .cwd, .cdq, .cqo => OpCode.oneByte(0x99), - else => null, + .ret_near => OpCode.init(&.{0xc3}), + .ret_far => OpCode.init(&.{0xcb}), + .int3 => OpCode.init(&.{0xcc}), + .nop => OpCode.init(&.{0x90}), + .syscall => OpCode.init(&.{ 0x0f, 0x05 }), + .cbw => OpCode.init(&.{0x98}), + .cwd, + .cdq, + .cqo => OpCode.init(&.{0x99}), + else => unreachable, }, .d => return switch (tag) { - .jmp_near => OpCode.oneByte(0xe9), - .call_near => OpCode.oneByte(0xe8), - .jo => if (is_one_byte) OpCode.oneByte(0x70) else OpCode.twoByte(0x0f, 0x80), - .jno => if (is_one_byte) OpCode.oneByte(0x71) else OpCode.twoByte(0x0f, 0x81), - .jb, .jc, .jnae => if (is_one_byte) OpCode.oneByte(0x72) else OpCode.twoByte(0x0f, 0x82), - .jnb, .jnc, .jae => if (is_one_byte) OpCode.oneByte(0x73) else OpCode.twoByte(0x0f, 0x83), - .je, .jz => if (is_one_byte) OpCode.oneByte(0x74) else OpCode.twoByte(0x0f, 0x84), - .jne, .jnz => if (is_one_byte) OpCode.oneByte(0x75) else OpCode.twoByte(0x0f, 0x85), - .jna, .jbe => if (is_one_byte) OpCode.oneByte(0x76) else OpCode.twoByte(0x0f, 0x86), - .jnbe, .ja => if (is_one_byte) OpCode.oneByte(0x77) else OpCode.twoByte(0x0f, 0x87), - .js => if (is_one_byte) OpCode.oneByte(0x78) else OpCode.twoByte(0x0f, 0x88), - .jns => if (is_one_byte) OpCode.oneByte(0x79) else OpCode.twoByte(0x0f, 0x89), - .jpe, .jp => if (is_one_byte) OpCode.oneByte(0x7a) else OpCode.twoByte(0x0f, 0x8a), - .jpo, .jnp => if (is_one_byte) OpCode.oneByte(0x7b) else OpCode.twoByte(0x0f, 0x8b), - .jnge, .jl => if (is_one_byte) OpCode.oneByte(0x7c) else OpCode.twoByte(0x0f, 0x8c), - .jge, .jnl => if (is_one_byte) OpCode.oneByte(0x7d) else OpCode.twoByte(0x0f, 0x8d), - .jle, .jng => if (is_one_byte) OpCode.oneByte(0x7e) else OpCode.twoByte(0x0f, 0x8e), - .jg, .jnle => if (is_one_byte) OpCode.oneByte(0x7f) else OpCode.twoByte(0x0f, 0x8f), - else => null, + .jmp_near => OpCode.init(&.{0xe9}), + .call_near => OpCode.init(&.{0xe8}), + .jo => if (is_one_byte) OpCode.init(&.{0x70}) else OpCode.init(&.{0x0f,0x80}), + .jno => if (is_one_byte) OpCode.init(&.{0x71}) else OpCode.init(&.{0x0f,0x81}), + .jb, + .jc, + .jnae => if (is_one_byte) OpCode.init(&.{0x72}) else OpCode.init(&.{0x0f,0x82}), + .jnb, + .jnc, + .jae => if (is_one_byte) OpCode.init(&.{0x73}) else OpCode.init(&.{0x0f,0x83}), + .je, + .jz => if (is_one_byte) OpCode.init(&.{0x74}) else OpCode.init(&.{0x0f,0x84}), + .jne, + .jnz => if (is_one_byte) OpCode.init(&.{0x75}) else OpCode.init(&.{0x0f,0x85}), + .jna, + .jbe => if (is_one_byte) OpCode.init(&.{0x76}) else OpCode.init(&.{0x0f,0x86}), + .jnbe, + .ja => if (is_one_byte) OpCode.init(&.{0x77}) else OpCode.init(&.{0x0f,0x87}), + .js => if (is_one_byte) OpCode.init(&.{0x78}) else OpCode.init(&.{0x0f,0x88}), + .jns => if (is_one_byte) OpCode.init(&.{0x79}) else OpCode.init(&.{0x0f,0x89}), + .jpe, + .jp => if (is_one_byte) OpCode.init(&.{0x7a}) else OpCode.init(&.{0x0f,0x8a}), + .jpo, + .jnp => if (is_one_byte) OpCode.init(&.{0x7b}) else OpCode.init(&.{0x0f,0x8b}), + .jnge, + .jl => if (is_one_byte) OpCode.init(&.{0x7c}) else OpCode.init(&.{0x0f,0x8c}), + .jge, + .jnl => if (is_one_byte) OpCode.init(&.{0x7d}) else OpCode.init(&.{0x0f,0x8d}), + .jle, + .jng => if (is_one_byte) OpCode.init(&.{0x7e}) else OpCode.init(&.{0x0f,0x8e}), + .jg, + .jnle => if (is_one_byte) OpCode.init(&.{0x7f}) else OpCode.init(&.{0x0f,0x8f}), + else => unreachable, }, .m => return switch (tag) { - .jmp_near, .call_near, .push => OpCode.oneByte(0xff), - .pop => OpCode.oneByte(0x8f), - .seto => OpCode.twoByte(0x0f, 0x90), - .setno => OpCode.twoByte(0x0f, 0x91), - .setb, .setc, .setnae => OpCode.twoByte(0x0f, 0x92), - .setnb, .setnc, .setae => OpCode.twoByte(0x0f, 0x93), - .sete, .setz => OpCode.twoByte(0x0f, 0x94), - .setne, .setnz => OpCode.twoByte(0x0f, 0x95), - .setbe, .setna => OpCode.twoByte(0x0f, 0x96), - .seta, .setnbe => OpCode.twoByte(0x0f, 0x97), - .sets => OpCode.twoByte(0x0f, 0x98), - .setns => OpCode.twoByte(0x0f, 0x99), - .setp, .setpe => OpCode.twoByte(0x0f, 0x9a), - .setnp, .setop => OpCode.twoByte(0x0f, 0x9b), - .setl, .setnge => OpCode.twoByte(0x0f, 0x9c), - .setnl, .setge => OpCode.twoByte(0x0f, 0x9d), - .setle, .setng => OpCode.twoByte(0x0f, 0x9e), - .setnle, .setg => OpCode.twoByte(0x0f, 0x9f), - .idiv, .div, .imul, .mul => OpCode.oneByte(if (is_one_byte) 0xf6 else 0xf7), - .fisttp16 => OpCode.oneByte(0xdf), - .fisttp32 => OpCode.oneByte(0xdb), - .fisttp64 => OpCode.oneByte(0xdd), - .fld32 => OpCode.oneByte(0xd9), - .fld64 => OpCode.oneByte(0xdd), - else => null, + .jmp_near, + .call_near, + .push => OpCode.init(&.{0xff}), + .pop => OpCode.init(&.{0x8f}), + .seto => OpCode.init(&.{0x0f,0x90}), + .setno => OpCode.init(&.{0x0f,0x91}), + .setb, + .setc, + .setnae => OpCode.init(&.{0x0f,0x92}), + .setnb, + .setnc, + .setae => OpCode.init(&.{0x0f,0x93}), + .sete, + .setz => OpCode.init(&.{0x0f,0x94}), + .setne, + .setnz => OpCode.init(&.{0x0f,0x95}), + .setbe, + .setna => OpCode.init(&.{0x0f,0x96}), + .seta, + .setnbe => OpCode.init(&.{0x0f,0x97}), + .sets => OpCode.init(&.{0x0f,0x98}), + .setns => OpCode.init(&.{0x0f,0x99}), + .setp, + .setpe => OpCode.init(&.{0x0f,0x9a}), + .setnp, + .setop => OpCode.init(&.{0x0f,0x9b}), + .setl, + .setnge => OpCode.init(&.{0x0f,0x9c}), + .setnl, + .setge => OpCode.init(&.{0x0f,0x9d}), + .setle, + .setng => OpCode.init(&.{0x0f,0x9e}), + .setnle, + .setg => OpCode.init(&.{0x0f,0x9f}), + .idiv, + .div, + .imul, + .mul => if (is_one_byte) OpCode.init(&.{0xf6}) else OpCode.init(&.{0xf7}), + .fisttp16 => OpCode.init(&.{0xdf}), + .fisttp32 => OpCode.init(&.{0xdb}), + .fisttp64 => OpCode.init(&.{0xdd}), + .fld32 => OpCode.init(&.{0xd9}), + .fld64 => OpCode.init(&.{0xdd}), + else => unreachable, }, .o => return switch (tag) { - .push => OpCode.oneByte(0x50), - .pop => OpCode.oneByte(0x58), - else => null, + .push => OpCode.init(&.{0x50}), + .pop => OpCode.init(&.{0x58}), + else => unreachable, }, .i => return switch (tag) { - .push => OpCode.oneByte(if (is_one_byte) 0x6a else 0x68), - .@"test" => OpCode.oneByte(if (is_one_byte) 0xa8 else 0xa9), - .ret_near => OpCode.oneByte(0xc2), - .ret_far => OpCode.oneByte(0xca), - else => null, + .push => if (is_one_byte) OpCode.init(&.{0x6a}) else OpCode.init(&.{0x68}), + .@"test" => if (is_one_byte) OpCode.init(&.{0xa8}) else OpCode.init(&.{0xa9}), + .ret_near => OpCode.init(&.{0xc2}), + .ret_far => OpCode.init(&.{0xca}), + else => unreachable, }, .m1 => return switch (tag) { - .shl, .sal, .shr, .sar => OpCode.oneByte(if (is_one_byte) 0xd0 else 0xd1), - else => null, + .shl, .sal, + .shr, .sar => if (is_one_byte) OpCode.init(&.{0xd0}) else OpCode.init(&.{0xd1}), + else => unreachable, }, .mc => return switch (tag) { - .shl, .sal, .shr, .sar => OpCode.oneByte(if (is_one_byte) 0xd2 else 0xd3), - else => null, + .shl, .sal, + .shr, .sar => if (is_one_byte) OpCode.init(&.{0xd2}) else OpCode.init(&.{0xd3}), + else => unreachable, }, .mi => return switch (tag) { - .adc, .add, .sub, .xor, .@"and", .@"or", .sbb, .cmp => OpCode.oneByte(if (is_one_byte) 0x80 else 0x81), - .mov => OpCode.oneByte(if (is_one_byte) 0xc6 else 0xc7), - .@"test" => OpCode.oneByte(if (is_one_byte) 0xf6 else 0xf7), - else => null, + .adc, .add, + .sub, .xor, + .@"and", .@"or", + .sbb, .cmp => if (is_one_byte) OpCode.init(&.{0x80}) else OpCode.init(&.{0x81}), + .mov => if (is_one_byte) OpCode.init(&.{0xc6}) else OpCode.init(&.{0xc7}), + .@"test" => if (is_one_byte) OpCode.init(&.{0xf6}) else OpCode.init(&.{0xf7}), + else => unreachable, }, .mi8 => return switch (tag) { - .adc, .add, .sub, .xor, .@"and", .@"or", .sbb, .cmp => OpCode.oneByte(0x83), - .shl, .sal, .shr, .sar => OpCode.oneByte(if (is_one_byte) 0xc0 else 0xc1), - else => null, + .adc, .add, + .sub, .xor, + .@"and", .@"or", + .sbb, .cmp => OpCode.init(&.{0x83}), + .shl, .sal, + .shr, .sar => if (is_one_byte) OpCode.init(&.{0xc0}) else OpCode.init(&.{0xc1}), + else => unreachable, }, .mr => return switch (tag) { - .adc => OpCode.oneByte(if (is_one_byte) 0x10 else 0x11), - .add => OpCode.oneByte(if (is_one_byte) 0x00 else 0x01), - .sub => OpCode.oneByte(if (is_one_byte) 0x28 else 0x29), - .xor => OpCode.oneByte(if (is_one_byte) 0x30 else 0x31), - .@"and" => OpCode.oneByte(if (is_one_byte) 0x20 else 0x21), - .@"or" => OpCode.oneByte(if (is_one_byte) 0x08 else 0x09), - .sbb => OpCode.oneByte(if (is_one_byte) 0x18 else 0x19), - .cmp => OpCode.oneByte(if (is_one_byte) 0x38 else 0x39), - .mov => OpCode.oneByte(if (is_one_byte) 0x88 else 0x89), - .@"test" => OpCode.oneByte(if (is_one_byte) 0x84 else 0x85), - else => null, + .adc => if (is_one_byte) OpCode.init(&.{0x10}) else OpCode.init(&.{0x11}), + .add => if (is_one_byte) OpCode.init(&.{0x00}) else OpCode.init(&.{0x01}), + .sub => if (is_one_byte) OpCode.init(&.{0x28}) else OpCode.init(&.{0x29}), + .xor => if (is_one_byte) OpCode.init(&.{0x30}) else OpCode.init(&.{0x31}), + .@"and" => if (is_one_byte) OpCode.init(&.{0x20}) else OpCode.init(&.{0x21}), + .@"or" => if (is_one_byte) OpCode.init(&.{0x08}) else OpCode.init(&.{0x09}), + .sbb => if (is_one_byte) OpCode.init(&.{0x18}) else OpCode.init(&.{0x19}), + .cmp => if (is_one_byte) OpCode.init(&.{0x38}) else OpCode.init(&.{0x39}), + .mov => if (is_one_byte) OpCode.init(&.{0x88}) else OpCode.init(&.{0x89}), + .@"test" => if (is_one_byte) OpCode.init(&.{0x84}) else OpCode.init(&.{0x85}), + .movsd => OpCode.init(&.{0xf2,0x0f,0x11}), + .movss => OpCode.init(&.{0xf3,0x0f,0x11}), + else => unreachable, }, .rm => return switch (tag) { - .adc => OpCode.oneByte(if (is_one_byte) 0x12 else 0x13), - .add => OpCode.oneByte(if (is_one_byte) 0x02 else 0x03), - .sub => OpCode.oneByte(if (is_one_byte) 0x2a else 0x2b), - .xor => OpCode.oneByte(if (is_one_byte) 0x32 else 0x33), - .@"and" => OpCode.oneByte(if (is_one_byte) 0x22 else 0x23), - .@"or" => OpCode.oneByte(if (is_one_byte) 0x0a else 0x0b), - .sbb => OpCode.oneByte(if (is_one_byte) 0x1a else 0x1b), - .cmp => OpCode.oneByte(if (is_one_byte) 0x3a else 0x3b), - .mov => OpCode.oneByte(if (is_one_byte) 0x8a else 0x8b), - .movsx => OpCode.twoByte(0x0f, if (is_one_byte) 0xbe else 0xbf), - .movsxd => OpCode.oneByte(0x63), - .movzx => OpCode.twoByte(0x0f, if (is_one_byte) 0xb6 else 0xb7), - .lea => OpCode.oneByte(if (is_one_byte) 0x8c else 0x8d), - .imul => OpCode.twoByte(0x0f, 0xaf), - .cmove, .cmovz => OpCode.twoByte(0x0f, 0x44), - .cmovb, .cmovnae => OpCode.twoByte(0x0f, 0x42), - .cmovl, .cmovng => OpCode.twoByte(0x0f, 0x4c), - else => null, + .adc => if (is_one_byte) OpCode.init(&.{0x12}) else OpCode.init(&.{0x13}), + .add => if (is_one_byte) OpCode.init(&.{0x02}) else OpCode.init(&.{0x03}), + .sub => if (is_one_byte) OpCode.init(&.{0x2a}) else OpCode.init(&.{0x2b}), + .xor => if (is_one_byte) OpCode.init(&.{0x32}) else OpCode.init(&.{0x33}), + .@"and" => if (is_one_byte) OpCode.init(&.{0x22}) else OpCode.init(&.{0x23}), + .@"or" => if (is_one_byte) OpCode.init(&.{0x0a}) else OpCode.init(&.{0x0b}), + .sbb => if (is_one_byte) OpCode.init(&.{0x1a}) else OpCode.init(&.{0x1b}), + .cmp => if (is_one_byte) OpCode.init(&.{0x3a}) else OpCode.init(&.{0x3b}), + .mov => if (is_one_byte) OpCode.init(&.{0x8a}) else OpCode.init(&.{0x8b}), + .movsx => if (is_one_byte) OpCode.init(&.{0x0f,0xbe}) else OpCode.init(&.{0x0f,0xbf}), + .movsxd => OpCode.init(&.{0x63}), + .movzx => if (is_one_byte) OpCode.init(&.{0x0f,0xb6}) else OpCode.init(&.{0x0f,0xb7}), + .lea => if (is_one_byte) OpCode.init(&.{0x8c}) else OpCode.init(&.{0x8d}), + .imul => OpCode.init(&.{0x0f,0xaf}), + .cmove, + .cmovz => OpCode.init(&.{0x0f,0x44}), + .cmovb, + .cmovnae => OpCode.init(&.{0x0f,0x42}), + .cmovl, + .cmovng => OpCode.init(&.{0x0f,0x4c}), + .movsd => OpCode.init(&.{0xf2,0x0f,0x10}), + .movss => OpCode.init(&.{0xf3,0x0f,0x10}), + .addsd => OpCode.init(&.{0xf2,0x0f,0x58}), + .addss => OpCode.init(&.{0xf3,0x0f,0x58}), + .ucomisd => OpCode.init(&.{0x66,0x0f,0x2e}), + .ucomiss => OpCode.init(&.{0x0f,0x2e}), + else => unreachable, }, .oi => return switch (tag) { - .mov => OpCode.oneByte(if (is_one_byte) 0xb0 else 0xb8), - else => null, + .mov => if (is_one_byte) OpCode.init(&.{0xb0}) else OpCode.init(&.{0xb8}), + else => unreachable, }, .fd => return switch (tag) { - .mov => OpCode.oneByte(if (is_one_byte) 0xa0 else 0xa1), - else => null, + .mov => if (is_one_byte) OpCode.init(&.{0xa0}) else OpCode.init(&.{0xa1}), + else => unreachable, }, .td => return switch (tag) { - .mov => OpCode.oneByte(if (is_one_byte) 0xa2 else 0xa3), - else => null, + .mov => if (is_one_byte) OpCode.init(&.{0xa2}) else OpCode.init(&.{0xa3}), + else => unreachable, }, .rmi => return switch (tag) { - .imul => OpCode.oneByte(if (is_one_byte) 0x6b else 0x69), - else => null, + .imul => if (is_one_byte) OpCode.init(&.{0x6b}) else OpCode.init(&.{0x69}), + else => unreachable, + }, + .mv => return switch (tag) { + .vmovsd, + .vmovss => OpCode.init(&.{0x11}), + else => unreachable, + }, + .vm => return switch (tag) { + .vmovsd, + .vmovss => OpCode.init(&.{0x10}), + .vucomisd, + .vucomiss => OpCode.init(&.{0x2e}), + else => unreachable, + }, + .rvm => return switch (tag) { + .vaddsd, + .vaddss => OpCode.init(&.{0x58}), + .vmovsd, + .vmovss => OpCode.init(&.{0x10}), + else => unreachable, + }, + .rvmi => return switch (tag) { + .vcmpsd, + .vcmpss => OpCode.init(&.{0xc2}), + else => unreachable, }, } + // zig fmt: on } -inline fn getModRmExt(tag: Tag) ?u3 { +inline fn getModRmExt(tag: Tag) u3 { return switch (tag) { .adc => 0x2, .add => 0x0, @@ -1483,11 +1759,101 @@ inline fn getModRmExt(tag: Tag) ?u3 { .fisttp64 => 0x1, .fld32 => 0x0, .fld64 => 0x0, - else => null, + else => unreachable, }; } -const ScaleIndex = struct { +const VexEncoding = struct { + prefix: Encoder.Vex, + reg: ?enum { + ndd, + nds, + dds, + }, +}; + +inline fn getVexEncoding(tag: Tag, enc: Encoding) VexEncoding { + const desc: struct { + reg: enum { + none, + ndd, + nds, + dds, + } = .none, + len_256: bool = false, + wig: bool = false, + lig: bool = false, + lz: bool = false, + lead_opc: enum { + l_0f, + l_0f_3a, + l_0f_38, + } = .l_0f, + simd_prefix: enum { + none, + p_66, + p_f2, + p_f3, + } = .none, + } = blk: { + switch (enc) { + .mv => switch (tag) { + .vmovsd => break :blk .{ .lig = true, .simd_prefix = .p_f2, .wig = true }, + .vmovss => break :blk .{ .lig = true, .simd_prefix = .p_f3, .wig = true }, + else => unreachable, + }, + .vm => switch (tag) { + .vmovsd => break :blk .{ .lig = true, .simd_prefix = .p_f2, .wig = true }, + .vmovss => break :blk .{ .lig = true, .simd_prefix = .p_f3, .wig = true }, + .vucomisd => break :blk .{ .lig = true, .simd_prefix = .p_66, .wig = true }, + .vucomiss => break :blk .{ .lig = true, .wig = true }, + else => unreachable, + }, + .rvm => switch (tag) { + .vaddsd => break :blk .{ .reg = .nds, .lig = true, .simd_prefix = .p_f2, .wig = true }, + .vaddss => break :blk .{ .reg = .nds, .lig = true, .simd_prefix = .p_f3, .wig = true }, + .vmovsd => break :blk .{ .reg = .nds, .lig = true, .simd_prefix = .p_f2, .wig = true }, + .vmovss => break :blk .{ .reg = .nds, .lig = true, .simd_prefix = .p_f3, .wig = true }, + else => unreachable, + }, + .rvmi => switch (tag) { + .vcmpsd => break :blk .{ .reg = .nds, .lig = true, .simd_prefix = .p_f2, .wig = true }, + .vcmpss => break :blk .{ .reg = .nds, .lig = true, .simd_prefix = .p_f3, .wig = true }, + else => unreachable, + }, + else => unreachable, + } + }; + + var vex: Encoder.Vex = .{}; + + if (desc.len_256) vex.len_256(); + if (desc.wig) vex.wig(); + if (desc.lig) vex.lig(); + if (desc.lz) vex.lz(); + + switch (desc.lead_opc) { + .l_0f => {}, + .l_0f_3a => vex.lead_opc_0f_3a(), + .l_0f_38 => vex.lead_opc_0f_38(), + } + + switch (desc.simd_prefix) { + .none => {}, + .p_66 => vex.simd_prefix_66(), + .p_f2 => vex.simd_prefix_f2(), + .p_f3 => vex.simd_prefix_f3(), + } + + return VexEncoding{ .prefix = vex, .reg = switch (desc.reg) { + .none => null, + .nds => .nds, + .dds => .dds, + .ndd => .ndd, + } }; +} + +const ScaleIndex = packed struct { scale: u2, index: Register, }; @@ -1499,49 +1865,38 @@ const Memory = struct { ptr_size: PtrSize, scale_index: ?ScaleIndex = null, - const PtrSize = enum { - byte_ptr, - word_ptr, - dword_ptr, - qword_ptr, + const PtrSize = enum(u2) { + byte_ptr = 0b00, + word_ptr = 0b01, + dword_ptr = 0b10, + qword_ptr = 0b11, - fn fromBits(in_bits: u64) PtrSize { - return switch (in_bits) { - 8 => .byte_ptr, - 16 => .word_ptr, - 32 => .dword_ptr, - 64 => .qword_ptr, - else => unreachable, - }; + fn new(bit_size: u64) PtrSize { + return @intToEnum(PtrSize, math.log2_int(u4, @intCast(u4, @divExact(bit_size, 8)))); } /// Returns size in bits. fn size(ptr_size: PtrSize) u64 { - return switch (ptr_size) { - .byte_ptr => 8, - .word_ptr => 16, - .dword_ptr => 32, - .qword_ptr => 64, - }; + return 8 * (math.powi(u8, 2, @enumToInt(ptr_size)) catch unreachable); } }; fn encode(mem_op: Memory, encoder: Encoder, operand: u3) void { if (mem_op.base) |base| { - const dst = base.lowId(); + const dst = base.lowEnc(); const src = operand; if (dst == 4 or mem_op.scale_index != null) { if (mem_op.disp == 0 and dst != 5) { encoder.modRm_SIBDisp0(src); if (mem_op.scale_index) |si| { - encoder.sib_scaleIndexBase(si.scale, si.index.lowId(), dst); + encoder.sib_scaleIndexBase(si.scale, si.index.lowEnc(), dst); } else { encoder.sib_base(dst); } } else if (immOpSize(mem_op.disp) == 8) { encoder.modRm_SIBDisp8(src); if (mem_op.scale_index) |si| { - encoder.sib_scaleIndexBaseDisp8(si.scale, si.index.lowId(), dst); + encoder.sib_scaleIndexBaseDisp8(si.scale, si.index.lowEnc(), dst); } else { encoder.sib_baseDisp8(dst); } @@ -1549,7 +1904,7 @@ const Memory = struct { } else { encoder.modRm_SIBDisp32(src); if (mem_op.scale_index) |si| { - encoder.sib_scaleIndexBaseDisp32(si.scale, si.index.lowId(), dst); + encoder.sib_scaleIndexBaseDisp32(si.scale, si.index.lowEnc(), dst); } else { encoder.sib_baseDisp32(dst); } @@ -1572,7 +1927,7 @@ const Memory = struct { } else { encoder.modRm_SIBDisp0(operand); if (mem_op.scale_index) |si| { - encoder.sib_scaleIndexDisp32(si.scale, si.index.lowId()); + encoder.sib_scaleIndexDisp32(si.scale, si.index.lowEnc()); } else { encoder.sib_disp32(); } @@ -1581,6 +1936,7 @@ const Memory = struct { } } + /// Returns size in bits. fn size(memory: Memory) u64 { return memory.ptr_size.size(); } @@ -1629,6 +1985,7 @@ const RegisterOrMemory = union(enum) { }; } + /// Returns size in bits. fn size(reg_or_mem: RegisterOrMemory) u64 { return switch (reg_or_mem) { .register => |reg| reg.size(), @@ -1638,7 +1995,8 @@ const RegisterOrMemory = union(enum) { }; fn lowerToZoEnc(tag: Tag, code: *std.ArrayList(u8)) InnerError!void { - const opc = getOpCode(tag, .zo, false).?; + assert(!tag.isAvx()); + const opc = getOpCode(tag, .zo, false); const encoder = try Encoder.init(code, 2); switch (tag) { .cqo => { @@ -1652,14 +2010,15 @@ fn lowerToZoEnc(tag: Tag, code: *std.ArrayList(u8)) InnerError!void { } fn lowerToIEnc(tag: Tag, imm: u32, code: *std.ArrayList(u8)) InnerError!void { + assert(!tag.isAvx()); if (tag == .ret_far or tag == .ret_near) { const encoder = try Encoder.init(code, 3); - const opc = getOpCode(tag, .i, false).?; + const opc = getOpCode(tag, .i, false); opc.encode(encoder); encoder.imm16(@bitCast(i16, @truncate(u16, imm))); return; } - const opc = getOpCode(tag, .i, immOpSize(imm) == 8).?; + const opc = getOpCode(tag, .i, immOpSize(imm) == 8); const encoder = try Encoder.init(code, 5); if (immOpSize(imm) == 16) { encoder.prefix16BitMode(); @@ -1669,7 +2028,8 @@ fn lowerToIEnc(tag: Tag, imm: u32, code: *std.ArrayList(u8)) InnerError!void { } fn lowerToOEnc(tag: Tag, reg: Register, code: *std.ArrayList(u8)) InnerError!void { - const opc = getOpCode(tag, .o, false).?; + assert(!tag.isAvx()); + const opc = getOpCode(tag, .o, false); const encoder = try Encoder.init(code, 3); if (reg.size() == 16) { encoder.prefix16BitMode(); @@ -1682,15 +2042,17 @@ fn lowerToOEnc(tag: Tag, reg: Register, code: *std.ArrayList(u8)) InnerError!voi } fn lowerToDEnc(tag: Tag, imm: u32, code: *std.ArrayList(u8)) InnerError!void { - const opc = getOpCode(tag, .d, false).?; + assert(!tag.isAvx()); + const opc = getOpCode(tag, .d, false); const encoder = try Encoder.init(code, 6); opc.encode(encoder); encoder.imm32(@bitCast(i32, imm)); } fn lowerToMxEnc(tag: Tag, reg_or_mem: RegisterOrMemory, enc: Encoding, code: *std.ArrayList(u8)) InnerError!void { - const opc = getOpCode(tag, enc, reg_or_mem.size() == 8).?; - const modrm_ext = getModRmExt(tag).?; + assert(!tag.isAvx()); + const opc = getOpCode(tag, enc, reg_or_mem.size() == 8); + const modrm_ext = getModRmExt(tag); switch (reg_or_mem) { .register => |reg| { const encoder = try Encoder.init(code, 4); @@ -1703,7 +2065,7 @@ fn lowerToMxEnc(tag: Tag, reg_or_mem: RegisterOrMemory, enc: Encoding, code: *st .b = reg.isExtended(), }); opc.encode(encoder); - encoder.modRm_direct(modrm_ext, reg.lowId()); + encoder.modRm_direct(modrm_ext, reg.lowEnc()); }, .memory => |mem_op| { const encoder = try Encoder.init(code, 8); @@ -1744,10 +2106,8 @@ fn lowerToFdEnc(tag: Tag, reg: Register, moffs: u64, code: *std.ArrayList(u8)) I } fn lowerToTdFdEnc(tag: Tag, reg: Register, moffs: u64, code: *std.ArrayList(u8), td: bool) InnerError!void { - const opc = if (td) - getOpCode(tag, .td, reg.size() == 8).? - else - getOpCode(tag, .fd, reg.size() == 8).?; + assert(!tag.isAvx()); + const opc = if (td) getOpCode(tag, .td, reg.size() == 8) else getOpCode(tag, .fd, reg.size() == 8); const encoder = try Encoder.init(code, 10); if (reg.size() == 16) { encoder.prefix16BitMode(); @@ -1766,7 +2126,8 @@ fn lowerToTdFdEnc(tag: Tag, reg: Register, moffs: u64, code: *std.ArrayList(u8), } fn lowerToOiEnc(tag: Tag, reg: Register, imm: u64, code: *std.ArrayList(u8)) InnerError!void { - const opc = getOpCode(tag, .oi, reg.size() == 8).?; + assert(!tag.isAvx()); + const opc = getOpCode(tag, .oi, reg.size() == 8); const encoder = try Encoder.init(code, 10); if (reg.size() == 16) { encoder.prefix16BitMode(); @@ -1792,8 +2153,9 @@ fn lowerToMiXEnc( enc: Encoding, code: *std.ArrayList(u8), ) InnerError!void { - const modrm_ext = getModRmExt(tag).?; - const opc = getOpCode(tag, enc, reg_or_mem.size() == 8).?; + assert(!tag.isAvx()); + const modrm_ext = getModRmExt(tag); + const opc = getOpCode(tag, enc, reg_or_mem.size() == 8); switch (reg_or_mem) { .register => |dst_reg| { const encoder = try Encoder.init(code, 7); @@ -1808,7 +2170,7 @@ fn lowerToMiXEnc( .b = dst_reg.isExtended(), }); opc.encode(encoder); - encoder.modRm_direct(modrm_ext, dst_reg.lowId()); + encoder.modRm_direct(modrm_ext, dst_reg.lowEnc()); encodeImm(encoder, imm, if (enc == .mi8) 8 else dst_reg.size()); }, .memory => |dst_mem| { @@ -1847,10 +2209,11 @@ fn lowerToRmEnc( reg_or_mem: RegisterOrMemory, code: *std.ArrayList(u8), ) InnerError!void { - const opc = getOpCode(tag, .rm, reg.size() == 8 or reg_or_mem.size() == 8).?; + assert(!tag.isAvx()); + const opc = getOpCode(tag, .rm, reg.size() == 8 or reg_or_mem.size() == 8); switch (reg_or_mem) { .register => |src_reg| { - const encoder = try Encoder.init(code, 4); + const encoder = try Encoder.init(code, 5); if (reg.size() == 16) { encoder.prefix16BitMode(); } @@ -1860,7 +2223,7 @@ fn lowerToRmEnc( .b = src_reg.isExtended(), }); opc.encode(encoder); - encoder.modRm_direct(reg.lowId(), src_reg.lowId()); + encoder.modRm_direct(reg.lowEnc(), src_reg.lowEnc()); }, .memory => |src_mem| { const encoder = try Encoder.init(code, 9); @@ -1882,7 +2245,7 @@ fn lowerToRmEnc( }); } opc.encode(encoder); - src_mem.encode(encoder, reg.lowId()); + src_mem.encode(encoder, reg.lowEnc()); }, } } @@ -1893,7 +2256,8 @@ fn lowerToMrEnc( reg: Register, code: *std.ArrayList(u8), ) InnerError!void { - const opc = getOpCode(tag, .mr, reg.size() == 8 or reg_or_mem.size() == 8).?; + assert(!tag.isAvx()); + const opc = getOpCode(tag, .mr, reg.size() == 8 or reg_or_mem.size() == 8); switch (reg_or_mem) { .register => |dst_reg| { const encoder = try Encoder.init(code, 4); @@ -1906,7 +2270,7 @@ fn lowerToMrEnc( .b = dst_reg.isExtended(), }); opc.encode(encoder); - encoder.modRm_direct(reg.lowId(), dst_reg.lowId()); + encoder.modRm_direct(reg.lowEnc(), dst_reg.lowEnc()); }, .memory => |dst_mem| { const encoder = try Encoder.init(code, 9); @@ -1926,7 +2290,7 @@ fn lowerToMrEnc( }); } opc.encode(encoder); - dst_mem.encode(encoder, reg.lowId()); + dst_mem.encode(encoder, reg.lowEnc()); }, } } @@ -1938,7 +2302,8 @@ fn lowerToRmiEnc( imm: u32, code: *std.ArrayList(u8), ) InnerError!void { - const opc = getOpCode(tag, .rmi, false).?; + assert(!tag.isAvx()); + const opc = getOpCode(tag, .rmi, false); const encoder = try Encoder.init(code, 13); if (reg.size() == 16) { encoder.prefix16BitMode(); @@ -1951,7 +2316,7 @@ fn lowerToRmiEnc( .b = src_reg.isExtended(), }); opc.encode(encoder); - encoder.modRm_direct(reg.lowId(), src_reg.lowId()); + encoder.modRm_direct(reg.lowEnc(), src_reg.lowEnc()); }, .memory => |src_mem| { if (src_mem.base) |base| { @@ -1969,12 +2334,165 @@ fn lowerToRmiEnc( }); } opc.encode(encoder); - src_mem.encode(encoder, reg.lowId()); + src_mem.encode(encoder, reg.lowEnc()); }, } encodeImm(encoder, imm, reg.size()); } +/// Also referred to as XM encoding in Intel manual. +fn lowerToVmEnc( + tag: Tag, + reg: Register, + reg_or_mem: RegisterOrMemory, + code: *std.ArrayList(u8), +) InnerError!void { + const opc = getOpCode(tag, .vm, false); + var enc = getVexEncoding(tag, .vm); + const vex = &enc.prefix; + switch (reg_or_mem) { + .register => |src_reg| { + const encoder = try Encoder.init(code, 5); + vex.rex(.{ + .r = reg.isExtended(), + .b = src_reg.isExtended(), + }); + encoder.vex(enc.prefix); + opc.encode(encoder); + encoder.modRm_direct(reg.lowEnc(), src_reg.lowEnc()); + }, + .memory => |src_mem| { + const encoder = try Encoder.init(code, 10); + if (src_mem.base) |base| { + vex.rex(.{ + .r = reg.isExtended(), + .b = base.isExtended(), + }); + } else { + vex.rex(.{ + .r = reg.isExtended(), + }); + } + encoder.vex(enc.prefix); + opc.encode(encoder); + src_mem.encode(encoder, reg.lowEnc()); + }, + } +} + +/// Usually referred to as MR encoding with V/V in Intel manual. +fn lowerToMvEnc( + tag: Tag, + reg_or_mem: RegisterOrMemory, + reg: Register, + code: *std.ArrayList(u8), +) InnerError!void { + const opc = getOpCode(tag, .mv, false); + var enc = getVexEncoding(tag, .mv); + const vex = &enc.prefix; + switch (reg_or_mem) { + .register => |dst_reg| { + const encoder = try Encoder.init(code, 4); + vex.rex(.{ + .r = reg.isExtended(), + .b = dst_reg.isExtended(), + }); + encoder.vex(enc.prefix); + opc.encode(encoder); + encoder.modRm_direct(reg.lowEnc(), dst_reg.lowEnc()); + }, + .memory => |dst_mem| { + const encoder = try Encoder.init(code, 10); + if (dst_mem.base) |base| { + vex.rex(.{ + .r = reg.isExtended(), + .b = base.isExtended(), + }); + } else { + vex.rex(.{ + .r = reg.isExtended(), + }); + } + encoder.vex(enc.prefix); + opc.encode(encoder); + dst_mem.encode(encoder, reg.lowEnc()); + }, + } +} + +fn lowerToRvmEnc( + tag: Tag, + reg1: Register, + reg2: Register, + reg_or_mem: RegisterOrMemory, + code: *std.ArrayList(u8), +) InnerError!void { + const opc = getOpCode(tag, .rvm, false); + var enc = getVexEncoding(tag, .rvm); + const vex = &enc.prefix; + switch (reg_or_mem) { + .register => |reg3| { + if (enc.reg) |vvvv| { + switch (vvvv) { + .nds => vex.reg(reg2.enc()), + else => unreachable, // TODO + } + } + const encoder = try Encoder.init(code, 5); + vex.rex(.{ + .r = reg1.isExtended(), + .b = reg3.isExtended(), + }); + encoder.vex(enc.prefix); + opc.encode(encoder); + encoder.modRm_direct(reg1.lowEnc(), reg3.lowEnc()); + }, + .memory => |dst_mem| { + _ = dst_mem; + unreachable; // TODO + }, + } +} + +fn lowerToRvmiEnc( + tag: Tag, + reg1: Register, + reg2: Register, + reg_or_mem: RegisterOrMemory, + imm: u32, + code: *std.ArrayList(u8), +) InnerError!void { + const opc = getOpCode(tag, .rvmi, false); + var enc = getVexEncoding(tag, .rvmi); + const vex = &enc.prefix; + const encoder: Encoder = blk: { + switch (reg_or_mem) { + .register => |reg3| { + if (enc.reg) |vvvv| { + switch (vvvv) { + .nds => vex.reg(reg2.enc()), + else => unreachable, // TODO + } + } + const encoder = try Encoder.init(code, 5); + vex.rex(.{ + .r = reg1.isExtended(), + .b = reg3.isExtended(), + }); + encoder.vex(enc.prefix); + opc.encode(encoder); + encoder.modRm_direct(reg1.lowEnc(), reg3.lowEnc()); + break :blk encoder; + }, + .memory => |dst_mem| { + _ = dst_mem; + unreachable; // TODO + }, + } + }; + encodeImm(encoder, imm, 8); // TODO +} + fn expectEqualHexStrings(expected: []const u8, given: []const u8, assembly: []const u8) !void { assert(expected.len > 0); if (mem.eql(u8, expected, given)) return; @@ -2369,3 +2887,34 @@ test "lower RMI encoding" { try lowerToRmiEnc(.imul, .r12w, RegisterOrMemory.reg(.r12w), 0x10, emit.code()); try expectEqualHexStrings("\x66\x45\x69\xE4\x10\x00", emit.lowered(), "imul r12w, r12w, 0x10"); } + +test "lower MV encoding" { + var emit = TestEmit.init(); + defer emit.deinit(); + try lowerToMvEnc(.vmovsd, RegisterOrMemory.rip(.qword_ptr, 0x10), .xmm1, emit.code()); + try expectEqualHexStrings( + "\xC5\xFB\x11\x0D\x10\x00\x00\x00", + emit.lowered(), + "vmovsd qword ptr [rip + 0x10], xmm1", + ); +} + +test "lower VM encoding" { + var emit = TestEmit.init(); + defer emit.deinit(); + try lowerToVmEnc(.vmovsd, .xmm1, RegisterOrMemory.rip(.qword_ptr, 0x10), emit.code()); + try expectEqualHexStrings( + "\xC5\xFB\x10\x0D\x10\x00\x00\x00", + emit.lowered(), + "vmovsd xmm1, qword ptr [rip + 0x10]", + ); +} + +test "lower to RVM encoding" { + var emit = TestEmit.init(); + defer emit.deinit(); + try lowerToRvmEnc(.vaddsd, .xmm0, .xmm1, RegisterOrMemory.reg(.xmm2), emit.code()); + try expectEqualHexStrings("\xC5\xF3\x58\xC2", emit.lowered(), "vaddsd xmm0, xmm1, xmm2"); + try lowerToRvmEnc(.vaddsd, .xmm0, .xmm0, RegisterOrMemory.reg(.xmm1), emit.code()); + try expectEqualHexStrings("\xC5\xFB\x58\xC1", emit.lowered(), "vaddsd xmm0, xmm0, xmm1"); +} diff --git a/src/arch/x86_64/Mir.zig b/src/arch/x86_64/Mir.zig index 30f4351cb0..a35231a9b8 100644 --- a/src/arch/x86_64/Mir.zig +++ b/src/arch/x86_64/Mir.zig @@ -22,11 +22,7 @@ extra: []const u32, pub const Inst = struct { tag: Tag, - /// This is 3 fields, and the meaning of each depends on `tag`. - /// reg1: Register - /// reg2: Register - /// flags: u2 - ops: u16, + ops: Ops, /// The meaning of this depends on `tag` and `ops`. data: Data, @@ -349,6 +345,42 @@ pub const Inst = struct { /// Nop nop, + /// SSE instructions + /// ops flags: form: + /// 0b00 reg1, qword ptr [reg2 + imm32] + /// 0b01 qword ptr [reg1 + imm32], reg2 + /// 0b10 reg1, reg2 + mov_f64_sse, + mov_f32_sse, + + /// ops flags: form: + /// 0b00 reg1, reg2 + add_f64_sse, + add_f32_sse, + + /// ops flags: form: + /// 0b00 reg1, reg2 + cmp_f64_sse, + cmp_f32_sse, + + /// AVX instructions + /// ops flags: form: + /// 0b00 reg1, qword ptr [reg2 + imm32] + /// 0b01 qword ptr [reg1 + imm32], reg2 + /// 0b10 reg1, reg1, reg2 + mov_f64_avx, + mov_f32_avx, + + /// ops flags: form: + /// 0b00 reg1, reg1, reg2 + add_f64_avx, + add_f32_avx, + + /// ops flags: form: + /// 0b00 reg1, reg1, reg2 + cmp_f64_avx, + cmp_f32_avx, + /// Pseudo-instructions /// call extern function /// Notes: @@ -381,6 +413,36 @@ pub const Inst = struct { /// The position of an MIR instruction within the `Mir` instructions array. pub const Index = u32; + pub const Ops = packed struct { + reg1: u7, + reg2: u7, + flags: u2, + + pub fn encode(vals: struct { + reg1: Register = .none, + reg2: Register = .none, + flags: u2 = 0b00, + }) Ops { + return .{ + .reg1 = @enumToInt(vals.reg1), + .reg2 = @enumToInt(vals.reg2), + .flags = vals.flags, + }; + } + + pub fn decode(ops: Ops) struct { + reg1: Register, + reg2: Register, + flags: u2, + } { + return .{ + .reg1 = @intToEnum(Register, ops.reg1), + .reg2 = @intToEnum(Register, ops.reg2), + .flags = ops.flags, + }; + } + }; + /// All instructions have a 4-byte payload, which is contained within /// this union. `Tag` determines which union field is active, as well as /// how to interpret the data within. @@ -450,31 +512,6 @@ pub const DbgLineColumn = struct { column: u32, }; -pub const Ops = struct { - reg1: Register = .none, - reg2: Register = .none, - flags: u2 = 0b00, - - pub fn encode(self: Ops) u16 { - var ops: u16 = 0; - ops |= @intCast(u16, @enumToInt(self.reg1)) << 9; - ops |= @intCast(u16, @enumToInt(self.reg2)) << 2; - ops |= self.flags; - return ops; - } - - pub fn decode(ops: u16) Ops { - const reg1 = @intToEnum(Register, @truncate(u7, ops >> 9)); - const reg2 = @intToEnum(Register, @truncate(u7, ops >> 2)); - const flags = @truncate(u2, ops); - return .{ - .reg1 = reg1, - .reg2 = reg2, - .flags = flags, - }; - } -}; - pub fn deinit(mir: *Mir, gpa: std.mem.Allocator) void { mir.instructions.deinit(gpa); gpa.free(mir.extra); diff --git a/src/arch/x86_64/abi.zig b/src/arch/x86_64/abi.zig index da2e3da394..77f28c11f4 100644 --- a/src/arch/x86_64/abi.zig +++ b/src/arch/x86_64/abi.zig @@ -3,6 +3,7 @@ const Type = @import("../../type.zig").Type; const Target = std.Target; const assert = std.debug.assert; const Register = @import("bits.zig").Register; +const RegisterManagerFn = @import("../../register_manager.zig").RegisterManager; pub const Class = enum { integer, sse, sseup, x87, x87up, complex_x87, memory, none }; @@ -378,6 +379,40 @@ pub const callee_preserved_regs = [_]Register{ .rbx, .r12, .r13, .r14, .r15 }; /// the caller relinquishes control to a subroutine via call instruction (or similar). /// In other words, these registers are free to use by the callee. pub const caller_preserved_regs = [_]Register{ .rax, .rcx, .rdx, .rsi, .rdi, .r8, .r9, .r10, .r11 }; -pub const allocatable_registers = callee_preserved_regs ++ caller_preserved_regs; + pub const c_abi_int_param_regs = [_]Register{ .rdi, .rsi, .rdx, .rcx, .r8, .r9 }; pub const c_abi_int_return_regs = [_]Register{ .rax, .rdx }; + +const sse_avx_regs = [_]Register{ + .ymm0, .ymm1, .ymm2, .ymm3, .ymm4, .ymm5, .ymm6, .ymm7, + .ymm8, .ymm9, .ymm10, .ymm11, .ymm12, .ymm13, .ymm14, .ymm15, +}; +const allocatable_registers = callee_preserved_regs ++ caller_preserved_regs ++ sse_avx_regs; +pub const RegisterManager = RegisterManagerFn(@import("CodeGen.zig"), Register, &allocatable_registers); + +// Register classes +const RegisterBitSet = RegisterManager.RegisterBitSet; +pub const RegisterClass = struct { + pub const gp: RegisterBitSet = @as(RegisterBitSet, std.math.maxInt(std.meta.Int( + .unsigned, + caller_preserved_regs.len + callee_preserved_regs.len, + ))); + pub const sse: RegisterBitSet = std.math.maxInt(RegisterBitSet) - gp; + // TODO uncomment once #11680 is fixed. + // pub const gp: RegisterBitSet = blk: { + // var set = RegisterBitSet.initEmpty(); + // set.setRangeValue(.{ + // .start = 0, + // .end = caller_preserved_regs.len + callee_preserved_regs.len, + // }, true); + // break :blk set; + // }; + // pub const sse: RegisterBitSet = blk: { + // var set = RegisterBitSet.initEmpty(); + // set.setRangeValue(.{ + // .start = caller_preserved_regs.len + callee_preserved_regs.len, + // .end = allocatable_registers.len, + // }, true); + // break :blk set; + // }; +}; diff --git a/src/arch/x86_64/bits.zig b/src/arch/x86_64/bits.zig index 02f032ab72..6429781516 100644 --- a/src/arch/x86_64/bits.zig +++ b/src/arch/x86_64/bits.zig @@ -8,7 +8,7 @@ const DW = std.dwarf; // zig fmt: off -/// Definitions of all of the x64 registers. The order is semantically meaningful. +/// Definitions of all of the general purpose x64 registers. The order is semantically meaningful. /// The registers are defined such that IDs go in descending order of 64-bit, /// 32-bit, 16-bit, and then 8-bit, and each set contains exactly sixteen /// registers. This results in some useful properties: @@ -43,17 +43,36 @@ pub const Register = enum(u7) { al, cl, dl, bl, ah, ch, dh, bh, r8b, r9b, r10b, r11b, r12b, r13b, r14b, r15b, - // Pseudo, used only for MIR to signify that the - // operand is not a register but an immediate, etc. + // 64-79, 256-bit registers. + // id is int value - 64. + ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7, + ymm8, ymm9, ymm10, ymm11, ymm12, ymm13, ymm14, ymm15, + + // 80-95, 128-bit registers. + // id is int value - 80. + xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, + xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, + + // Pseudo-value for MIR instructions. none, + pub fn id(self: Register) u7 { + return switch (@enumToInt(self)) { + 0...63 => @as(u7, @truncate(u4, @enumToInt(self))), + 64...79 => @enumToInt(self), + else => unreachable, + }; + } + /// Returns the bit-width of the register. - pub fn size(self: Register) u7 { + pub fn size(self: Register) u9 { return switch (@enumToInt(self)) { 0...15 => 64, 16...31 => 32, 32...47 => 16, - 48...64 => 8, + 48...63 => 8, + 64...79 => 256, + 80...95 => 128, else => unreachable, }; } @@ -72,33 +91,41 @@ pub const Register = enum(u7) { /// an instruction (@see isExtended), and requires special handling. The /// lower three bits are often embedded directly in instructions (such as /// the B8 variant of moves), or used in R/M bytes. - pub fn id(self: Register) u4 { + pub fn enc(self: Register) u4 { return @truncate(u4, @enumToInt(self)); } - /// Like id, but only returns the lower 3 bits. - pub fn lowId(self: Register) u3 { + /// Like enc, but only returns the lower 3 bits. + pub fn lowEnc(self: Register) u3 { return @truncate(u3, @enumToInt(self)); } + pub fn to256(self: Register) Register { + return @intToEnum(Register, @as(u8, self.enc()) + 64); + } + + pub fn to128(self: Register) Register { + return @intToEnum(Register, @as(u8, self.enc()) + 80); + } + /// Convert from any register to its 64 bit alias. pub fn to64(self: Register) Register { - return @intToEnum(Register, self.id()); + return @intToEnum(Register, self.enc()); } /// Convert from any register to its 32 bit alias. pub fn to32(self: Register) Register { - return @intToEnum(Register, @as(u8, self.id()) + 16); + return @intToEnum(Register, @as(u8, self.enc()) + 16); } /// Convert from any register to its 16 bit alias. pub fn to16(self: Register) Register { - return @intToEnum(Register, @as(u8, self.id()) + 32); + return @intToEnum(Register, @as(u8, self.enc()) + 32); } /// Convert from any register to its 8 bit alias. pub fn to8(self: Register) Register { - return @intToEnum(Register, @as(u8, self.id()) + 48); + return @intToEnum(Register, @as(u8, self.enc()) + 48); } pub fn dwarfLocOp(self: Register) u8 { @@ -251,6 +278,115 @@ pub const Encoder = struct { self.code.appendAssumeCapacity(0x66); } + pub const Vex = struct { + rex_prefix: Rex = .{}, + lead_opc: u5 = 0b0_0001, + register: u4 = 0b1111, + length: u1 = 0b0, + simd_prefix: u2 = 0b00, + wig_desc: bool = false, + lig_desc: bool = false, + lz_desc: bool = false, + + pub fn rex(self: *Vex, r: Rex) void { + self.rex_prefix = r; + } + + pub fn lead_opc_0f(self: *Vex) void { + self.lead_opc = 0b0_0001; + } + + pub fn lead_opc_0f_38(self: *Vex) void { + self.lead_opc = 0b0_0010; + } + + pub fn lead_opc_0f_3a(self: *Vex) void { + self.lead_opc = 0b0_0011; + } + + pub fn reg(self: *Vex, register: u4) void { + self.register = ~register; + } + + pub fn len_128(self: *Vex) void { + self.length = 0; + } + + pub fn len_256(self: *Vex) void { + assert(!self.lz_desc); + self.length = 1; + } + + pub fn simd_prefix_66(self: *Vex) void { + self.simd_prefix = 0b01; + } + + pub fn simd_prefix_f3(self: *Vex) void { + self.simd_prefix = 0b10; + } + + pub fn simd_prefix_f2(self: *Vex) void { + self.simd_prefix = 0b11; + } + + pub fn wig(self: *Vex) void { + self.wig_desc = true; + } + + pub fn lig(self: *Vex) void { + self.lig_desc = true; + } + + pub fn lz(self: *Vex) void { + self.lz_desc = true; + } + + pub fn write(self: Vex, writer: anytype) usize { + var buf: [3]u8 = .{0} ** 3; + const form_3byte: bool = blk: { + if (self.rex_prefix.w and !self.wig_desc) break :blk true; + if (self.rex_prefix.x or self.rex_prefix.b) break :blk true; + break :blk self.lead_opc != 0b0_0001; + }; + + if (self.lz_desc) { + assert(self.length == 0); + } + + if (form_3byte) { + // First byte + buf[0] = 0xc4; + // Second byte + const rxb_mask: u3 = @intCast(u3, @boolToInt(!self.rex_prefix.r)) << 2 | + @intCast(u2, @boolToInt(!self.rex_prefix.x)) << 1 | + @boolToInt(!self.rex_prefix.b); + buf[1] |= @intCast(u8, rxb_mask) << 5; + buf[1] |= self.lead_opc; + // Third byte + buf[2] |= @intCast(u8, @boolToInt(!self.rex_prefix.w)) << 7; + buf[2] |= @intCast(u7, self.register) << 3; + buf[2] |= @intCast(u3, self.length) << 2; + buf[2] |= self.simd_prefix; + } else { + // First byte + buf[0] = 0xc5; + // Second byte + buf[1] |= @intCast(u8, @boolToInt(!self.rex_prefix.r)) << 7; + buf[1] |= @intCast(u7, self.register) << 3; + buf[1] |= @intCast(u3, self.length) << 2; + buf[1] |= self.simd_prefix; + } + + const count: usize = if (form_3byte) 3 else 2; + _ = writer.writeAll(buf[0..count]) catch unreachable; + return count; + } + }; + + pub fn vex(self: Self, prefix: Vex) void { + _ = prefix.write(self.code.writer()); + } + /// From section 2.2.1.2 of the manual, REX is encoded as b0100WRXB pub const Rex = struct { /// Wide, enables 64-bit operation @@ -305,6 +441,17 @@ pub const Encoder = struct { self.code.appendAssumeCapacity(opcode); } + /// Encodes a 3 byte opcode + /// + /// e.g. MOVSD has the opcode 0xf2 0x0f 0x10 + /// + /// encoder.opcode_3byte(0xf2, 0x0f, 0x10); + pub fn opcode_3byte(self: Self, prefix_1: u8, prefix_2: u8, opcode: u8) void { + self.code.appendAssumeCapacity(prefix_1); + self.code.appendAssumeCapacity(prefix_2); + self.code.appendAssumeCapacity(opcode); + } + /// Encodes a 1 byte opcode with a reg field /// /// Remember to add a REX prefix byte if reg is extended! @@ -543,7 +690,7 @@ pub const Encoder = struct { } }; -test "x86_64 Encoder helpers" { +test "Encoder helpers - general purpose registers" { var code = ArrayList(u8).init(testing.allocator); defer code.deinit(); @@ -560,8 +707,8 @@ test "x86_64 Encoder helpers" { }); encoder.opcode_2byte(0x0f, 0xaf); encoder.modRm_direct( - Register.eax.lowId(), - Register.edi.lowId(), + Register.eax.lowEnc(), + Register.edi.lowEnc(), ); try testing.expectEqualSlices(u8, &[_]u8{ 0x0f, 0xaf, 0xc7 }, code.items); @@ -580,8 +727,8 @@ test "x86_64 Encoder helpers" { }); encoder.opcode_1byte(0x89); encoder.modRm_direct( - Register.edi.lowId(), - Register.eax.lowId(), + Register.edi.lowEnc(), + Register.eax.lowEnc(), ); try testing.expectEqualSlices(u8, &[_]u8{ 0x89, 0xf8 }, code.items); @@ -607,7 +754,7 @@ test "x86_64 Encoder helpers" { encoder.opcode_1byte(0x81); encoder.modRm_direct( 0, - Register.rcx.lowId(), + Register.rcx.lowEnc(), ); encoder.imm32(2147483647); @@ -615,6 +762,86 @@ test "x86_64 Encoder helpers" { } } +test "Encoder helpers - Vex prefix" { + var buf: [3]u8 = undefined; + var stream = std.io.fixedBufferStream(&buf); + const writer = stream.writer(); + + { + var vex_prefix = Encoder.Vex{}; + vex_prefix.rex(.{ + .r = true, + }); + const nwritten = vex_prefix.write(writer); + try testing.expectEqualSlices(u8, &[_]u8{ 0xc5, 0x78 }, buf[0..nwritten]); + } + + { + stream.reset(); + var vex_prefix = Encoder.Vex{}; + vex_prefix.reg(Register.xmm15.enc()); + const nwritten = vex_prefix.write(writer); + try testing.expectEqualSlices(u8, &[_]u8{ 0xc5, 0x80 }, buf[0..nwritten]); + } + + { + stream.reset(); + var vex_prefix = Encoder.Vex{}; + vex_prefix.rex(.{ + .w = true, + .x = true, + }); + const nwritten = vex_prefix.write(writer); + try testing.expectEqualSlices(u8, &[_]u8{ 0xc4, 0b101_0_0001, 0b0_1111_0_00 }, buf[0..nwritten]); + } + + { + stream.reset(); + var vex_prefix = Encoder.Vex{}; + vex_prefix.rex(.{ + .w = true, + .r = true, + }); + vex_prefix.len_256(); + vex_prefix.lead_opc_0f(); + vex_prefix.simd_prefix_66(); + const nwritten = vex_prefix.write(writer); + try testing.expectEqualSlices(u8, &[_]u8{ 0xc4, 0b011_0_0001, 0b0_1111_1_01 }, buf[0..nwritten]); + } + + var code = ArrayList(u8).init(testing.allocator); + defer code.deinit(); + + { + // vmovapd xmm1, xmm2 + const encoder = try Encoder.init(&code, 4); + var vex = Encoder.Vex{}; + vex.simd_prefix_66(); + encoder.vex(vex); // use 64 bit operation + encoder.opcode_1byte(0x28); + encoder.modRm_direct(0, Register.xmm1.lowEnc()); + try testing.expectEqualSlices(u8, &[_]u8{ 0xC5, 0xF9, 0x28, 0xC1 }, code.items); + } + + { + try code.resize(0); + + // vmovhpd xmm13, xmm1, qword ptr [rip] + const encoder = try Encoder.init(&code, 9); + var vex = Encoder.Vex{}; + vex.len_128(); + vex.simd_prefix_66(); + vex.lead_opc_0f(); + vex.rex(.{ .r = true }); + vex.reg(Register.xmm1.enc()); + encoder.vex(vex); + encoder.opcode_1byte(0x16); + encoder.modRm_RIPDisp32(Register.xmm13.lowEnc()); + encoder.disp32(0); + try testing.expectEqualSlices(u8, &[_]u8{ 0xC5, 0x71, 0x16, 0x2D, 0x00, 0x00, 0x00, 0x00 }, code.items); + } +} + // TODO add these registers to the enum and populate dwarfLocOp // // Return Address register. This is stored in `0(%rsp, "")` and is not a physical register. // RA = (16, "RA"), diff --git a/src/register_manager.zig b/src/register_manager.zig index 2c0502e867..347c916769 100644 --- a/src/register_manager.zig +++ b/src/register_manager.zig @@ -41,28 +41,33 @@ pub fn RegisterManager( registers: [tracked_registers.len]Air.Inst.Index = undefined, /// Tracks which registers are free (in which case the /// corresponding bit is set to 1) - free_registers: FreeRegInt = math.maxInt(FreeRegInt), + free_registers: RegisterBitSet = math.maxInt(RegisterBitSet), /// Tracks all registers allocated in the course of this /// function - allocated_registers: FreeRegInt = 0, + allocated_registers: RegisterBitSet = 0, /// Tracks registers which are locked from being allocated - locked_registers: FreeRegInt = 0, + locked_registers: RegisterBitSet = 0, const Self = @This(); /// An integer whose bits represent all the registers and /// whether they are free. - const FreeRegInt = std.meta.Int(.unsigned, tracked_registers.len); - const ShiftInt = math.Log2Int(FreeRegInt); + pub const RegisterBitSet = std.meta.Int(.unsigned, tracked_registers.len); + const ShiftInt = math.Log2Int(RegisterBitSet); fn getFunction(self: *Self) *Function { return @fieldParentPtr(Function, "register_manager", self); } - fn getRegisterMask(reg: Register) ?FreeRegInt { + fn excludeRegister(reg: Register, register_class: RegisterBitSet) bool { + const mask = getRegisterMask(reg) orelse return true; + return mask & register_class == 0; + } + + fn getRegisterMask(reg: Register) ?RegisterBitSet { const index = indexOfRegIntoTracked(reg) orelse return null; const shift = @intCast(ShiftInt, index); - const mask = @as(FreeRegInt, 1) << shift; + const mask = @as(RegisterBitSet, 1) << shift; return mask; } @@ -81,7 +86,10 @@ pub fn RegisterManager( self.free_registers |= mask; } - pub fn indexOfReg(comptime registers: []const Register, reg: Register) ?std.math.IntFittingRange(0, registers.len - 1) { + pub fn indexOfReg( + comptime registers: []const Register, + reg: Register, + ) ?std.math.IntFittingRange(0, registers.len - 1) { inline for (tracked_registers) |cpreg, i| { if (reg.id() == cpreg.id()) return i; } @@ -180,17 +188,20 @@ pub fn RegisterManager( self: *Self, comptime count: comptime_int, insts: [count]?Air.Inst.Index, + register_class: RegisterBitSet, ) ?[count]Register { comptime assert(count > 0 and count <= tracked_registers.len); - const free_and_not_locked_registers = self.free_registers & ~self.locked_registers; - const free_and_not_locked_registers_count = @popCount(FreeRegInt, free_and_not_locked_registers); + const free_registers = self.free_registers & register_class; + const free_and_not_locked_registers = free_registers & ~self.locked_registers; + const free_and_not_locked_registers_count = @popCount(RegisterBitSet, free_and_not_locked_registers); if (free_and_not_locked_registers_count < count) return null; var regs: [count]Register = undefined; var i: usize = 0; for (tracked_registers) |reg| { if (i >= count) break; + if (excludeRegister(reg, register_class)) continue; if (self.isRegLocked(reg)) continue; if (!self.isRegFree(reg)) continue; @@ -216,8 +227,8 @@ pub fn RegisterManager( /// Allocates a register and optionally tracks it with a /// corresponding instruction. Returns `null` if all registers /// are allocated. - pub fn tryAllocReg(self: *Self, inst: ?Air.Inst.Index) ?Register { - return if (tryAllocRegs(self, 1, .{inst})) |regs| regs[0] else null; + pub fn tryAllocReg(self: *Self, inst: ?Air.Inst.Index, register_class: RegisterBitSet) ?Register { + return if (tryAllocRegs(self, 1, .{inst}, register_class)) |regs| regs[0] else null; } /// Allocates a specified number of registers, optionally @@ -227,12 +238,16 @@ pub fn RegisterManager( self: *Self, comptime count: comptime_int, insts: [count]?Air.Inst.Index, + register_class: RegisterBitSet, ) AllocateRegistersError![count]Register { comptime assert(count > 0 and count <= tracked_registers.len); - const locked_registers_count = @popCount(FreeRegInt, self.locked_registers); - if (count > tracked_registers.len - locked_registers_count) return error.OutOfRegisters; - const result = self.tryAllocRegs(count, insts) orelse blk: { + const available_registers_count = @popCount(RegisterBitSet, register_class); + const locked_registers = self.locked_registers & register_class; + const locked_registers_count = @popCount(RegisterBitSet, locked_registers); + if (count > available_registers_count - locked_registers_count) return error.OutOfRegisters; + + const result = self.tryAllocRegs(count, insts, register_class) orelse blk: { // We'll take over the first count registers. Spill // the instructions that were previously there to a // stack allocations. @@ -240,6 +255,7 @@ pub fn RegisterManager( var i: usize = 0; for (tracked_registers) |reg| { if (i >= count) break; + if (excludeRegister(reg, register_class)) continue; if (self.isRegLocked(reg)) continue; regs[i] = reg; @@ -275,8 +291,12 @@ pub fn RegisterManager( /// Allocates a register and optionally tracks it with a /// corresponding instruction. - pub fn allocReg(self: *Self, inst: ?Air.Inst.Index) AllocateRegistersError!Register { - return (try self.allocRegs(1, .{inst}))[0]; + pub fn allocReg( + self: *Self, + inst: ?Air.Inst.Index, + register_class: RegisterBitSet, + ) AllocateRegistersError!Register { + return (try self.allocRegs(1, .{inst}, register_class))[0]; } /// Spills the register if it is currently allocated. If a @@ -332,6 +352,334 @@ pub fn RegisterManager( }; } +// TODO delete current implementation of RegisterManager above, and uncomment the one +// below once #11680 is fixed: +// https://github.com/ziglang/zig/issues/11680 + +//pub fn RegisterManager( +// comptime Function: type, +// comptime Register: type, +// comptime tracked_registers: []const Register, +//) type { +// // architectures which do not have a concept of registers should +// // refrain from using RegisterManager +// assert(tracked_registers.len > 0); // see note above + +// return struct { +// /// Tracks the AIR instruction allocated to every register. If +// /// no instruction is allocated to a register (i.e. the +// /// register is free), the value in that slot is undefined. +// /// +// /// The key must be canonical register. +// registers: [tracked_registers.len]Air.Inst.Index = undefined, +// /// Tracks which registers are free (in which case the +// /// corresponding bit is set to 1) +// free_registers: RegisterBitSet = RegisterBitSet.initFull(), +// /// Tracks all registers allocated in the course of this +// /// function +// allocated_registers: RegisterBitSet = RegisterBitSet.initEmpty(), +// /// Tracks registers which are locked from being allocated +// locked_registers: RegisterBitSet = RegisterBitSet.initEmpty(), + +// const Self = @This(); + +// pub const RegisterBitSet = StaticBitSet(tracked_registers.len); + +// fn getFunction(self: *Self) *Function { +// return @fieldParentPtr(Function, "register_manager", self); +// } + +// fn excludeRegister(reg: Register, register_class: RegisterBitSet) bool { +// const index = indexOfRegIntoTracked(reg) orelse return true; +// return !register_class.isSet(index); +// } + +// fn markRegAllocated(self: *Self, reg: Register) void { +// const index = indexOfRegIntoTracked(reg) orelse return; +// self.allocated_registers.set(index); +// } + +// fn markRegUsed(self: *Self, reg: Register) void { +// const index = indexOfRegIntoTracked(reg) orelse return; +// self.free_registers.unset(index); +// } + +// fn markRegFree(self: *Self, reg: Register) void { +// const index = indexOfRegIntoTracked(reg) orelse return; +// self.free_registers.set(index); +// } + +// pub fn indexOfReg( +// comptime registers: []const Register, +// reg: Register, +// ) ?std.math.IntFittingRange(0, registers.len - 1) { +// inline for (tracked_registers) |cpreg, i| { +// if (reg.id() == cpreg.id()) return i; +// } +// return null; +// } + +// pub fn indexOfRegIntoTracked(reg: Register) ?RegisterBitSet.ShiftInt { +// return indexOfReg(tracked_registers, reg); +// } + +// /// Returns true when this register is not tracked +// pub fn isRegFree(self: Self, reg: Register) bool { +// const index = indexOfRegIntoTracked(reg) orelse return true; +// return self.free_registers.isSet(index); +// } + +// /// Returns whether this register was allocated in the course +// /// of this function. +// /// +// /// Returns false when this register is not tracked +// pub fn isRegAllocated(self: Self, reg: Register) bool { +// const index = indexOfRegIntoTracked(reg) orelse return false; +// return self.allocated_registers.isSet(index); +// } + +// /// Returns whether this register is locked +// /// +// /// Returns false when this register is not tracked +// pub fn isRegLocked(self: Self, reg: Register) bool { +// const index = indexOfRegIntoTracked(reg) orelse return false; +// return self.locked_registers.isSet(index); +// } + +// pub const RegisterLock = struct { +// register: Register, +// }; + +// /// Prevents the register from being allocated until they are +// /// unlocked again. +// /// Returns `RegisterLock` if the register was not already +// /// locked, or `null` otherwise. +// /// Only the owner of the `RegisterLock` can unlock the +// /// register later. +// pub fn lockReg(self: *Self, reg: Register) ?RegisterLock { +// log.debug("locking {}", .{reg}); +// if (self.isRegLocked(reg)) { +// log.debug(" register already locked", .{}); +// return null; +// } +// const index = indexOfRegIntoTracked(reg) orelse return null; +// self.locked_registers.set(index); +// return RegisterLock{ .register = reg }; +// } + +// /// Like `lockReg` but asserts the register was unused always +// /// returning a valid lock. +// pub fn lockRegAssumeUnused(self: *Self, reg: Register) RegisterLock { +// log.debug("locking asserting free {}", .{reg}); +// assert(!self.isRegLocked(reg)); +// const index = indexOfRegIntoTracked(reg) orelse unreachable; +// self.locked_registers.set(index); +// return RegisterLock{ .register = reg }; +// } + +// /// Like `lockRegAssumeUnused` but locks multiple registers. +// pub fn lockRegsAssumeUnused( +// self: *Self, +// comptime count: comptime_int, +// regs: [count]Register, +// ) [count]RegisterLock { +// var buf: [count]RegisterLock = undefined; +// for (regs) |reg, i| { +// buf[i] = self.lockRegAssumeUnused(reg); +// } +// return buf; +// } + +// /// Unlocks the register allowing its re-allocation and re-use. +// /// Requires `RegisterLock` to unlock a register. +// /// Call `lockReg` to obtain the lock first. +// pub fn unlockReg(self: *Self, lock: RegisterLock) void { +// log.debug("unlocking {}", .{lock.register}); +// const index = indexOfRegIntoTracked(lock.register) orelse return; +// self.locked_registers.unset(index); +// } + +// /// Returns true when at least one register is locked +// pub fn lockedRegsExist(self: Self) bool { +// return self.locked_registers.count() > 0; +// } + +// /// Allocates a specified number of registers, optionally +// /// tracking them. Returns `null` if not enough registers are +// /// free. +// pub fn tryAllocRegs( +// self: *Self, +// comptime count: comptime_int, +// insts: [count]?Air.Inst.Index, +// register_class: RegisterBitSet, +// ) ?[count]Register { +// comptime assert(count > 0 and count <= tracked_registers.len); + +// var free_and_not_locked_registers = self.free_registers; +// free_and_not_locked_registers.setIntersection(register_class); + +// var unlocked_registers = self.locked_registers; +// unlocked_registers.toggleAll(); + +// free_and_not_locked_registers.setIntersection(unlocked_registers); + +// if (free_and_not_locked_registers.count() < count) return null; + +// var regs: [count]Register = undefined; +// var i: usize = 0; +// for (tracked_registers) |reg| { +// if (i >= count) break; +// if (excludeRegister(reg, register_class)) continue; +// if (self.isRegLocked(reg)) continue; +// if (!self.isRegFree(reg)) continue; + +// regs[i] = reg; +// i += 1; +// } +// assert(i == count); + +// for (regs) |reg, j| { +// self.markRegAllocated(reg); + +// if (insts[j]) |inst| { +// // Track the register +// const index = indexOfRegIntoTracked(reg).?; // indexOfReg() on a callee-preserved reg should never return null +// self.registers[index] = inst; +// self.markRegUsed(reg); +// } +// } + +// return regs; +// } + +// /// Allocates a register and optionally tracks it with a +// /// corresponding instruction. Returns `null` if all registers +// /// are allocated. +// pub fn tryAllocReg(self: *Self, inst: ?Air.Inst.Index, register_class: RegisterBitSet) ?Register { +// return if (tryAllocRegs(self, 1, .{inst}, register_class)) |regs| regs[0] else null; +// } + +// /// Allocates a specified number of registers, optionally +// /// tracking them. Asserts that count is not +// /// larger than the total number of registers available. +// pub fn allocRegs( +// self: *Self, +// comptime count: comptime_int, +// insts: [count]?Air.Inst.Index, +// register_class: RegisterBitSet, +// ) AllocateRegistersError![count]Register { +// comptime assert(count > 0 and count <= tracked_registers.len); + +// var locked_registers = self.locked_registers; +// locked_registers.setIntersection(register_class); + +// if (count > register_class.count() - locked_registers.count()) return error.OutOfRegisters; + +// const result = self.tryAllocRegs(count, insts, register_class) orelse blk: { +// // We'll take over the first count registers. Spill +// // the instructions that were previously there to a +// // stack allocations. +// var regs: [count]Register = undefined; +// var i: usize = 0; +// for (tracked_registers) |reg| { +// if (i >= count) break; +// if (excludeRegister(reg, register_class)) break; +// if (self.isRegLocked(reg)) continue; + +// regs[i] = reg; +// self.markRegAllocated(reg); +// const index = indexOfRegIntoTracked(reg).?; // indexOfReg() on a callee-preserved reg should never return null +// if (insts[i]) |inst| { +// // Track the register +// if (self.isRegFree(reg)) { +// self.markRegUsed(reg); +// } else { +// const spilled_inst = self.registers[index]; +// try self.getFunction().spillInstruction(reg, spilled_inst); +// } +// self.registers[index] = inst; +// } else { +// // Don't track the register +// if (!self.isRegFree(reg)) { +// const spilled_inst = self.registers[index]; +// try self.getFunction().spillInstruction(reg, spilled_inst); +// self.freeReg(reg); +// } +// } + +// i += 1; +// } + +// break :blk regs; +// }; + +// log.debug("allocated registers {any} for insts {any}", .{ result, insts }); +// return result; +// } + +// /// Allocates a register and optionally tracks it with a +// /// corresponding instruction. +// pub fn allocReg( +// self: *Self, +// inst: ?Air.Inst.Index, +// register_class: RegisterBitSet, +// ) AllocateRegistersError!Register { +// return (try self.allocRegs(1, .{inst}, register_class))[0]; +// } + +// /// Spills the register if it is currently allocated. If a +// /// corresponding instruction is passed, will also track this +// /// register. +// pub fn getReg(self: *Self, reg: Register, inst: ?Air.Inst.Index) AllocateRegistersError!void { +// const index = indexOfRegIntoTracked(reg) orelse return; +// log.debug("getReg {} for inst {}", .{ reg, inst }); +// self.markRegAllocated(reg); + +// if (inst) |tracked_inst| +// if (!self.isRegFree(reg)) { +// // Move the instruction that was previously there to a +// // stack allocation. +// const spilled_inst = self.registers[index]; +// self.registers[index] = tracked_inst; +// try self.getFunction().spillInstruction(reg, spilled_inst); +// } else { +// self.getRegAssumeFree(reg, tracked_inst); +// } +// else { +// if (!self.isRegFree(reg)) { +// // Move the instruction that was previously there to a +// // stack allocation. +// const spilled_inst = self.registers[index]; +// try self.getFunction().spillInstruction(reg, spilled_inst); +// self.freeReg(reg); +// } +// } +// } + +// /// Allocates the specified register with the specified +// /// instruction. Asserts that the register is free and no +// /// spilling is necessary. +// pub fn getRegAssumeFree(self: *Self, reg: Register, inst: Air.Inst.Index) void { +// const index = indexOfRegIntoTracked(reg) orelse return; +// log.debug("getRegAssumeFree {} for inst {}", .{ reg, inst }); +// self.markRegAllocated(reg); + +// assert(self.isRegFree(reg)); +// self.registers[index] = inst; +// self.markRegUsed(reg); +// } + +// /// Marks the specified register as free +// pub fn freeReg(self: *Self, reg: Register) void { +// const index = indexOfRegIntoTracked(reg) orelse return; +// log.debug("freeing register {}", .{reg}); + +// self.registers[index] = undefined; +// self.markRegFree(reg); +// } +// }; +//} + const MockRegister1 = enum(u2) { r0, r1, @@ -361,11 +709,15 @@ const MockRegister2 = enum(u2) { fn MockFunction(comptime Register: type) type { return struct { allocator: Allocator, - register_manager: RegisterManager(Self, Register, &Register.allocatable_registers) = .{}, + register_manager: RegisterManagerT = .{}, spilled: std.ArrayListUnmanaged(Register) = .{}, const Self = @This(); + const RegisterManagerT = RegisterManager(Self, Register, &Register.allocatable_registers); + + pub const reg_class: RegisterManagerT.RegisterBitSet = math.maxInt(RegisterManagerT.RegisterBitSet); + pub fn deinit(self: *Self) void { self.spilled.deinit(self.allocator); } @@ -410,10 +762,20 @@ test "tryAllocReg: no spilling" { defer function.deinit(); const mock_instruction: Air.Inst.Index = 1; + const reg_class = MockFunction1.reg_class; - try expectEqual(@as(?MockRegister1, .r2), function.register_manager.tryAllocReg(mock_instruction)); - try expectEqual(@as(?MockRegister1, .r3), function.register_manager.tryAllocReg(mock_instruction)); - try expectEqual(@as(?MockRegister1, null), function.register_manager.tryAllocReg(mock_instruction)); + try expectEqual(@as(?MockRegister1, .r2), function.register_manager.tryAllocReg( + mock_instruction, + reg_class, + )); + try expectEqual(@as(?MockRegister1, .r3), function.register_manager.tryAllocReg( + mock_instruction, + reg_class, + )); + try expectEqual(@as(?MockRegister1, null), function.register_manager.tryAllocReg( + mock_instruction, + reg_class, + )); try expect(function.register_manager.isRegAllocated(.r2)); try expect(function.register_manager.isRegAllocated(.r3)); @@ -438,17 +800,30 @@ test "allocReg: spilling" { defer function.deinit(); const mock_instruction: Air.Inst.Index = 1; + const reg_class = MockFunction1.reg_class; - try expectEqual(@as(?MockRegister1, .r2), try function.register_manager.allocReg(mock_instruction)); - try expectEqual(@as(?MockRegister1, .r3), try function.register_manager.allocReg(mock_instruction)); + try expectEqual(@as(?MockRegister1, .r2), try function.register_manager.allocReg( + mock_instruction, + reg_class, + )); + try expectEqual(@as(?MockRegister1, .r3), try function.register_manager.allocReg( + mock_instruction, + reg_class, + )); // Spill a register - try expectEqual(@as(?MockRegister1, .r2), try function.register_manager.allocReg(mock_instruction)); + try expectEqual(@as(?MockRegister1, .r2), try function.register_manager.allocReg( + mock_instruction, + reg_class, + )); try expectEqualSlices(MockRegister1, &[_]MockRegister1{.r2}, function.spilled.items); // No spilling necessary function.register_manager.freeReg(.r3); - try expectEqual(@as(?MockRegister1, .r3), try function.register_manager.allocReg(mock_instruction)); + try expectEqual(@as(?MockRegister1, .r3), try function.register_manager.allocReg( + mock_instruction, + reg_class, + )); try expectEqualSlices(MockRegister1, &[_]MockRegister1{.r2}, function.spilled.items); // Locked registers @@ -457,7 +832,10 @@ test "allocReg: spilling" { const lock = function.register_manager.lockReg(.r2); defer if (lock) |reg| function.register_manager.unlockReg(reg); - try expectEqual(@as(?MockRegister1, .r3), try function.register_manager.allocReg(mock_instruction)); + try expectEqual(@as(?MockRegister1, .r3), try function.register_manager.allocReg( + mock_instruction, + reg_class, + )); } try expect(!function.register_manager.lockedRegsExist()); } @@ -470,7 +848,13 @@ test "tryAllocRegs" { }; defer function.deinit(); - try expectEqual([_]MockRegister2{ .r0, .r1, .r2 }, function.register_manager.tryAllocRegs(3, .{ null, null, null }).?); + const reg_class = MockFunction2.reg_class; + + try expectEqual([_]MockRegister2{ .r0, .r1, .r2 }, function.register_manager.tryAllocRegs( + 3, + .{ null, null, null }, + reg_class, + ).?); try expect(function.register_manager.isRegAllocated(.r0)); try expect(function.register_manager.isRegAllocated(.r1)); @@ -485,7 +869,11 @@ test "tryAllocRegs" { const lock = function.register_manager.lockReg(.r1); defer if (lock) |reg| function.register_manager.unlockReg(reg); - try expectEqual([_]MockRegister2{ .r0, .r2, .r3 }, function.register_manager.tryAllocRegs(3, .{ null, null, null }).?); + try expectEqual([_]MockRegister2{ .r0, .r2, .r3 }, function.register_manager.tryAllocRegs( + 3, + .{ null, null, null }, + reg_class, + ).?); } try expect(!function.register_manager.lockedRegsExist()); @@ -505,6 +893,8 @@ test "allocRegs: normal usage" { }; defer function.deinit(); + const reg_class = MockFunction2.reg_class; + { const result_reg: MockRegister2 = .r1; @@ -524,7 +914,7 @@ test "allocRegs: normal usage" { const lock = function.register_manager.lockReg(result_reg); defer if (lock) |reg| function.register_manager.unlockReg(reg); - const regs = try function.register_manager.allocRegs(2, .{ null, null }); + const regs = try function.register_manager.allocRegs(2, .{ null, null }, reg_class); try function.genAdd(result_reg, regs[0], regs[1]); } } @@ -539,6 +929,8 @@ test "allocRegs: selectively reducing register pressure" { }; defer function.deinit(); + const reg_class = MockFunction2.reg_class; + { const result_reg: MockRegister2 = .r1; @@ -546,12 +938,12 @@ test "allocRegs: selectively reducing register pressure" { // Here, we don't defer unlock because we manually unlock // after genAdd - const regs = try function.register_manager.allocRegs(2, .{ null, null }); + const regs = try function.register_manager.allocRegs(2, .{ null, null }, reg_class); try function.genAdd(result_reg, regs[0], regs[1]); function.register_manager.unlockReg(lock.?); - const extra_summand_reg = try function.register_manager.allocReg(null); + const extra_summand_reg = try function.register_manager.allocReg(null, reg_class); try function.genAdd(result_reg, result_reg, extra_summand_reg); } } diff --git a/test/behavior/basic.zig b/test/behavior/basic.zig index 10e48c6c7b..d62ba75dee 100644 --- a/test/behavior/basic.zig +++ b/test/behavior/basic.zig @@ -402,6 +402,7 @@ fn testPointerToVoidReturnType2() *const void { test "array 2D const double ptr" { if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; + if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO const rect_2d_vertexes = [_][1]f32{ @@ -414,6 +415,7 @@ test "array 2D const double ptr" { test "array 2D const double ptr with offset" { if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; + if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; const rect_2d_vertexes = [_][2]f32{ @@ -426,6 +428,7 @@ test "array 2D const double ptr with offset" { test "array 3D const double ptr with offset" { if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; + if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO const rect_3d_vertexes = [_][2][2]f32{ diff --git a/test/behavior/math.zig b/test/behavior/math.zig index 60c71010d4..3b73d93c01 100644 --- a/test/behavior/math.zig +++ b/test/behavior/math.zig @@ -198,7 +198,6 @@ test "const number literal" { const ten = 10; test "float equality" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO diff --git a/test/behavior/union.zig b/test/behavior/union.zig index 8315ea8a22..7f17ff50c6 100644 --- a/test/behavior/union.zig +++ b/test/behavior/union.zig @@ -882,6 +882,7 @@ test "extern union doesn't trigger field check at comptime" { test "anonymous union literal syntax" { if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO + if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO const S = struct { const Number = union {