From 91d93b6395bf4a5718cffe18d4a9351b2ff06492 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sun, 31 Oct 2021 13:01:00 +0100 Subject: [PATCH] stage2: move x86_64 codegen to arch/x86_64/CodeGen.zig This mimics steps taken for aarch64 and preps stage2 x86_64 for a rewrite introducing MIR for this arch. --- src/arch/x86_64/CodeGen.zig | 3647 +++++++++++++++++++++++++++++++++++ src/codegen.zig | 1297 +------------ 2 files changed, 3663 insertions(+), 1281 deletions(-) create mode 100644 src/arch/x86_64/CodeGen.zig diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig new file mode 100644 index 0000000000..b163582135 --- /dev/null +++ b/src/arch/x86_64/CodeGen.zig @@ -0,0 +1,3647 @@ +const std = @import("std"); +const build_options = @import("build_options"); +const builtin = @import("builtin"); +const assert = std.debug.assert; +const leb128 = std.leb; +const link = @import("../../link.zig"); +const log = std.log.scoped(.codegen); +const math = std.math; +const mem = std.mem; +const trace = @import("../../tracy.zig").trace; + +const Air = @import("../../Air.zig"); +const Allocator = mem.Allocator; +const Compilation = @import("../../Compilation.zig"); +const DebugInfoOutput = @import("../../codegen.zig").DebugInfoOutput; +const DW = std.dwarf; +const Encoder = @import("bits.zig").Encoder; +const ErrorMsg = Module.ErrorMsg; +const FnResult = @import("../../codegen.zig").FnResult; +const GenerateSymbolError = @import("../../codegen.zig").GenerateSymbolError; +const Liveness = @import("../../Liveness.zig"); +const Module = @import("../../Module.zig"); +const RegisterManager = @import("../../register_manager.zig").RegisterManager; +const Target = std.Target; +const Type = @import("../../type.zig").Type; +const TypedValue = @import("../../TypedValue.zig"); +const Value = @import("../../value.zig").Value; +const Zir = @import("../../Zir.zig"); + +const InnerError = error{ + OutOfMemory, + CodegenFail, +}; + +arch: std.Target.Cpu.Arch, +gpa: *Allocator, +air: Air, +liveness: Liveness, +bin_file: *link.File, +target: *const std.Target, +mod_fn: *const Module.Fn, +code: *std.ArrayList(u8), +debug_output: DebugInfoOutput, +err_msg: ?*ErrorMsg, +args: []MCValue, +ret_mcv: MCValue, +fn_type: Type, +arg_index: usize, +src_loc: Module.SrcLoc, +stack_align: u32, + +prev_di_line: u32, +prev_di_column: u32, +/// Byte offset within the source file of the ending curly. +end_di_line: u32, +end_di_column: u32, +/// Relative to the beginning of `code`. +prev_di_pc: usize, + +/// The value is an offset into the `Function` `code` from the beginning. +/// To perform the reloc, write 32-bit signed little-endian integer +/// which is a relative jump, based on the address following the reloc. +exitlude_jump_relocs: std.ArrayListUnmanaged(usize) = .{}, + +/// Whenever there is a runtime branch, we push a Branch onto this stack, +/// and pop it off when the runtime branch joins. This provides an "overlay" +/// of the table of mappings from instructions to `MCValue` from within the branch. +/// This way we can modify the `MCValue` for an instruction in different ways +/// within different branches. Special consideration is needed when a branch +/// joins with its parent, to make sure all instructions have the same MCValue +/// across each runtime branch upon joining. +branch_stack: *std.ArrayList(Branch), + +// Key is the block instruction +blocks: std.AutoHashMapUnmanaged(Air.Inst.Index, BlockData) = .{}, + +register_manager: RegisterManager(Self, Register, &callee_preserved_regs) = .{}, +/// Maps offset to what is stored there. +stack: std.AutoHashMapUnmanaged(u32, StackAllocation) = .{}, + +/// Offset from the stack base, representing the end of the stack frame. +max_end_stack: u32 = 0, +/// Represents the current end stack offset. If there is no existing slot +/// to place a new stack allocation, it goes here, and then bumps `max_end_stack`. +next_stack_offset: u32 = 0, + +/// Debug field, used to find bugs in the compiler. +air_bookkeeping: @TypeOf(air_bookkeeping_init) = air_bookkeeping_init, + +const air_bookkeeping_init = if (std.debug.runtime_safety) @as(usize, 0) else {}; + +const MCValue = union(enum) { + /// No runtime bits. `void` types, empty structs, u0, enums with 1 tag, etc. + /// TODO Look into deleting this tag and using `dead` instead, since every use + /// of MCValue.none should be instead looking at the type and noticing it is 0 bits. + none, + /// Control flow will not allow this value to be observed. + unreach, + /// No more references to this value remain. + dead, + /// The value is undefined. + undef, + /// A pointer-sized integer that fits in a register. + /// If the type is a pointer, this is the pointer address in virtual address space. + immediate: u64, + /// The constant was emitted into the code, at this offset. + /// If the type is a pointer, it means the pointer address is embedded in the code. + embedded_in_code: usize, + /// The value is a pointer to a constant which was emitted into the code, at this offset. + ptr_embedded_in_code: usize, + /// The value is in a target-specific register. + register: Register, + /// The value is in memory at a hard-coded address. + /// If the type is a pointer, it means the pointer address is at this memory location. + memory: u64, + /// The value is one of the stack variables. + /// If the type is a pointer, it means the pointer address is in the stack at this offset. + stack_offset: u32, + /// The value is a pointer to one of the stack variables (payload is stack offset). + ptr_stack_offset: u32, + /// The value is in the compare flags assuming an unsigned operation, + /// with this operator applied on top of it. + compare_flags_unsigned: math.CompareOperator, + /// The value is in the compare flags assuming a signed operation, + /// with this operator applied on top of it. + compare_flags_signed: math.CompareOperator, + + fn isMemory(mcv: MCValue) bool { + return switch (mcv) { + .embedded_in_code, .memory, .stack_offset => true, + else => false, + }; + } + + fn isImmediate(mcv: MCValue) bool { + return switch (mcv) { + .immediate => true, + else => false, + }; + } + + fn isMutable(mcv: MCValue) bool { + return switch (mcv) { + .none => unreachable, + .unreach => unreachable, + .dead => unreachable, + + .immediate, + .embedded_in_code, + .memory, + .compare_flags_unsigned, + .compare_flags_signed, + .ptr_stack_offset, + .ptr_embedded_in_code, + .undef, + => false, + + .register, + .stack_offset, + => true, + }; + } +}; + +const Branch = struct { + inst_table: std.AutoArrayHashMapUnmanaged(Air.Inst.Index, MCValue) = .{}, + + fn deinit(self: *Branch, gpa: *Allocator) void { + self.inst_table.deinit(gpa); + self.* = undefined; + } +}; + +const StackAllocation = struct { + inst: Air.Inst.Index, + /// TODO do we need size? should be determined by inst.ty.abiSize() + size: u32, +}; + +const BlockData = struct { + relocs: std.ArrayListUnmanaged(Reloc), + /// The first break instruction encounters `null` here and chooses a + /// machine code value for the block result, populating this field. + /// Following break instructions encounter that value and use it for + /// the location to store their block results. + mcv: MCValue, +}; + +const Reloc = union(enum) { + /// The value is an offset into the `Function` `code` from the beginning. + /// To perform the reloc, write 32-bit signed little-endian integer + /// which is a relative jump, based on the address following the reloc. + rel32: usize, + /// A branch in the ARM instruction set + arm_branch: struct { + pos: usize, + cond: @import("../../arch/arm/bits.zig").Condition, + }, +}; + +const BigTomb = struct { + function: *Self, + inst: Air.Inst.Index, + tomb_bits: Liveness.Bpi, + big_tomb_bits: u32, + bit_index: usize, + + fn feed(bt: *BigTomb, op_ref: Air.Inst.Ref) void { + const this_bit_index = bt.bit_index; + bt.bit_index += 1; + + const op_int = @enumToInt(op_ref); + if (op_int < Air.Inst.Ref.typed_value_map.len) return; + const op_index = @intCast(Air.Inst.Index, op_int - Air.Inst.Ref.typed_value_map.len); + + if (this_bit_index < Liveness.bpi - 1) { + const dies = @truncate(u1, bt.tomb_bits >> @intCast(Liveness.OperandInt, this_bit_index)) != 0; + if (!dies) return; + } else { + const big_bit_index = @intCast(u5, this_bit_index - (Liveness.bpi - 1)); + const dies = @truncate(u1, bt.big_tomb_bits >> big_bit_index) != 0; + if (!dies) return; + } + bt.function.processDeath(op_index); + } + + fn finishAir(bt: *BigTomb, result: MCValue) void { + const is_used = !bt.function.liveness.isUnused(bt.inst); + if (is_used) { + log.debug("%{d} => {}", .{ bt.inst, result }); + const branch = &bt.function.branch_stack.items[bt.function.branch_stack.items.len - 1]; + branch.inst_table.putAssumeCapacityNoClobber(bt.inst, result); + } + bt.function.finishAirBookkeeping(); + } +}; + +const Self = @This(); + +pub fn generate( + arch: std.Target.Cpu.Arch, + bin_file: *link.File, + src_loc: Module.SrcLoc, + module_fn: *Module.Fn, + air: Air, + liveness: Liveness, + code: *std.ArrayList(u8), + debug_output: DebugInfoOutput, +) GenerateSymbolError!FnResult { + if (build_options.skip_non_native and builtin.cpu.arch != arch) { + @panic("Attempted to compile for architecture that was disabled by build configuration"); + } + + assert(module_fn.owner_decl.has_tv); + const fn_type = module_fn.owner_decl.ty; + + var branch_stack = std.ArrayList(Branch).init(bin_file.allocator); + defer { + assert(branch_stack.items.len == 1); + branch_stack.items[0].deinit(bin_file.allocator); + branch_stack.deinit(); + } + try branch_stack.append(.{}); + + var function = Self{ + .arch = arch, + .gpa = bin_file.allocator, + .air = air, + .liveness = liveness, + .target = &bin_file.options.target, + .bin_file = bin_file, + .mod_fn = module_fn, + .code = code, + .debug_output = debug_output, + .err_msg = null, + .args = undefined, // populated after `resolveCallingConventionValues` + .ret_mcv = undefined, // populated after `resolveCallingConventionValues` + .fn_type = fn_type, + .arg_index = 0, + .branch_stack = &branch_stack, + .src_loc = src_loc, + .stack_align = undefined, + .prev_di_pc = 0, + .prev_di_line = module_fn.lbrace_line, + .prev_di_column = module_fn.lbrace_column, + .end_di_line = module_fn.rbrace_line, + .end_di_column = module_fn.rbrace_column, + }; + defer function.stack.deinit(bin_file.allocator); + defer function.blocks.deinit(bin_file.allocator); + defer function.exitlude_jump_relocs.deinit(bin_file.allocator); + + var call_info = function.resolveCallingConventionValues(fn_type) catch |err| switch (err) { + error.CodegenFail => return FnResult{ .fail = function.err_msg.? }, + else => |e| return e, + }; + defer call_info.deinit(&function); + + function.args = call_info.args; + function.ret_mcv = call_info.return_value; + function.stack_align = call_info.stack_align; + function.max_end_stack = call_info.stack_byte_count; + + function.gen() catch |err| switch (err) { + error.CodegenFail => return FnResult{ .fail = function.err_msg.? }, + else => |e| return e, + }; + + if (function.err_msg) |em| { + return FnResult{ .fail = em }; + } else { + return FnResult{ .appended = {} }; + } +} + +fn gen(self: *Self) !void { + try self.code.ensureUnusedCapacity(11); + + const cc = self.fn_type.fnCallingConvention(); + if (cc != .Naked) { + // We want to subtract the aligned stack frame size from rsp here, but we don't + // yet know how big it will be, so we leave room for a 4-byte stack size. + // TODO During semantic analysis, check if there are no function calls. If there + // are none, here we can omit the part where we subtract and then add rsp. + self.code.appendSliceAssumeCapacity(&[_]u8{ + 0x55, // push rbp + 0x48, 0x89, 0xe5, // mov rbp, rsp + 0x48, 0x81, 0xec, // sub rsp, imm32 (with reloc) + }); + const reloc_index = self.code.items.len; + self.code.items.len += 4; + + try self.dbgSetPrologueEnd(); + try self.genBody(self.air.getMainBody()); + + const stack_end = self.max_end_stack; + if (stack_end > math.maxInt(i32)) + return self.failSymbol("too much stack used in call parameters", .{}); + const aligned_stack_end = mem.alignForward(stack_end, self.stack_align); + mem.writeIntLittle(u32, self.code.items[reloc_index..][0..4], @intCast(u32, aligned_stack_end)); + + if (self.code.items.len >= math.maxInt(i32)) { + return self.failSymbol("unable to perform relocation: jump too far", .{}); + } + if (self.exitlude_jump_relocs.items.len == 1) { + self.code.items.len -= 5; + } else for (self.exitlude_jump_relocs.items) |jmp_reloc| { + const amt = self.code.items.len - (jmp_reloc + 4); + const s32_amt = @intCast(i32, amt); + mem.writeIntLittle(i32, self.code.items[jmp_reloc..][0..4], s32_amt); + } + + // Important to be after the possible self.code.items.len -= 5 above. + try self.dbgSetEpilogueBegin(); + + try self.code.ensureUnusedCapacity(9); + // add rsp, x + if (aligned_stack_end > math.maxInt(i8)) { + // example: 48 81 c4 ff ff ff 7f add rsp,0x7fffffff + self.code.appendSliceAssumeCapacity(&[_]u8{ 0x48, 0x81, 0xc4 }); + const x = @intCast(u32, aligned_stack_end); + mem.writeIntLittle(u32, self.code.addManyAsArrayAssumeCapacity(4), x); + } else if (aligned_stack_end != 0) { + // example: 48 83 c4 7f add rsp,0x7f + const x = @intCast(u8, aligned_stack_end); + self.code.appendSliceAssumeCapacity(&[_]u8{ 0x48, 0x83, 0xc4, x }); + } + + self.code.appendSliceAssumeCapacity(&[_]u8{ + 0x5d, // pop rbp + 0xc3, // ret + }); + } else { + try self.dbgSetPrologueEnd(); + try self.genBody(self.air.getMainBody()); + try self.dbgSetEpilogueBegin(); + } + + // Drop them off at the rbrace. + try self.dbgAdvancePCAndLine(self.end_di_line, self.end_di_column); +} + +fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { + const air_tags = self.air.instructions.items(.tag); + + for (body) |inst| { + const old_air_bookkeeping = self.air_bookkeeping; + try self.ensureProcessDeathCapacity(Liveness.bpi); + + switch (air_tags[inst]) { + // zig fmt: off + .add, .ptr_add => try self.airAdd(inst), + .addwrap => try self.airAddWrap(inst), + .add_sat => try self.airAddSat(inst), + .sub, .ptr_sub => try self.airSub(inst), + .subwrap => try self.airSubWrap(inst), + .sub_sat => try self.airSubSat(inst), + .mul => try self.airMul(inst), + .mulwrap => try self.airMulWrap(inst), + .mul_sat => try self.airMulSat(inst), + .rem => try self.airRem(inst), + .mod => try self.airMod(inst), + .shl, .shl_exact => try self.airShl(inst), + .shl_sat => try self.airShlSat(inst), + .min => try self.airMin(inst), + .max => try self.airMax(inst), + .slice => try self.airSlice(inst), + + .div_float, .div_trunc, .div_floor, .div_exact => try self.airDiv(inst), + + .cmp_lt => try self.airCmp(inst, .lt), + .cmp_lte => try self.airCmp(inst, .lte), + .cmp_eq => try self.airCmp(inst, .eq), + .cmp_gte => try self.airCmp(inst, .gte), + .cmp_gt => try self.airCmp(inst, .gt), + .cmp_neq => try self.airCmp(inst, .neq), + + .bool_and => try self.airBoolOp(inst), + .bool_or => try self.airBoolOp(inst), + .bit_and => try self.airBitAnd(inst), + .bit_or => try self.airBitOr(inst), + .xor => try self.airXor(inst), + .shr => try self.airShr(inst), + + .alloc => try self.airAlloc(inst), + .ret_ptr => try self.airRetPtr(inst), + .arg => try self.airArg(inst), + .assembly => try self.airAsm(inst), + .bitcast => try self.airBitCast(inst), + .block => try self.airBlock(inst), + .br => try self.airBr(inst), + .breakpoint => try self.airBreakpoint(), + .fence => try self.airFence(), + .call => try self.airCall(inst), + .cond_br => try self.airCondBr(inst), + .dbg_stmt => try self.airDbgStmt(inst), + .fptrunc => try self.airFptrunc(inst), + .fpext => try self.airFpext(inst), + .intcast => try self.airIntCast(inst), + .trunc => try self.airTrunc(inst), + .bool_to_int => try self.airBoolToInt(inst), + .is_non_null => try self.airIsNonNull(inst), + .is_non_null_ptr => try self.airIsNonNullPtr(inst), + .is_null => try self.airIsNull(inst), + .is_null_ptr => try self.airIsNullPtr(inst), + .is_non_err => try self.airIsNonErr(inst), + .is_non_err_ptr => try self.airIsNonErrPtr(inst), + .is_err => try self.airIsErr(inst), + .is_err_ptr => try self.airIsErrPtr(inst), + .load => try self.airLoad(inst), + .loop => try self.airLoop(inst), + .not => try self.airNot(inst), + .ptrtoint => try self.airPtrToInt(inst), + .ret => try self.airRet(inst), + .ret_load => try self.airRetLoad(inst), + .store => try self.airStore(inst), + .struct_field_ptr=> try self.airStructFieldPtr(inst), + .struct_field_val=> try self.airStructFieldVal(inst), + .array_to_slice => try self.airArrayToSlice(inst), + .int_to_float => try self.airIntToFloat(inst), + .float_to_int => try self.airFloatToInt(inst), + .cmpxchg_strong => try self.airCmpxchg(inst), + .cmpxchg_weak => try self.airCmpxchg(inst), + .atomic_rmw => try self.airAtomicRmw(inst), + .atomic_load => try self.airAtomicLoad(inst), + .memcpy => try self.airMemcpy(inst), + .memset => try self.airMemset(inst), + .set_union_tag => try self.airSetUnionTag(inst), + .get_union_tag => try self.airGetUnionTag(inst), + .clz => try self.airClz(inst), + .ctz => try self.airCtz(inst), + .popcount => try self.airPopcount(inst), + + .atomic_store_unordered => try self.airAtomicStore(inst, .Unordered), + .atomic_store_monotonic => try self.airAtomicStore(inst, .Monotonic), + .atomic_store_release => try self.airAtomicStore(inst, .Release), + .atomic_store_seq_cst => try self.airAtomicStore(inst, .SeqCst), + + .struct_field_ptr_index_0 => try self.airStructFieldPtrIndex(inst, 0), + .struct_field_ptr_index_1 => try self.airStructFieldPtrIndex(inst, 1), + .struct_field_ptr_index_2 => try self.airStructFieldPtrIndex(inst, 2), + .struct_field_ptr_index_3 => try self.airStructFieldPtrIndex(inst, 3), + + .switch_br => try self.airSwitch(inst), + .slice_ptr => try self.airSlicePtr(inst), + .slice_len => try self.airSliceLen(inst), + + .ptr_slice_len_ptr => try self.airPtrSliceLenPtr(inst), + .ptr_slice_ptr_ptr => try self.airPtrSlicePtrPtr(inst), + + .array_elem_val => try self.airArrayElemVal(inst), + .slice_elem_val => try self.airSliceElemVal(inst), + .slice_elem_ptr => try self.airSliceElemPtr(inst), + .ptr_elem_val => try self.airPtrElemVal(inst), + .ptr_elem_ptr => try self.airPtrElemPtr(inst), + + .constant => unreachable, // excluded from function bodies + .const_ty => unreachable, // excluded from function bodies + .unreach => self.finishAirBookkeeping(), + + .optional_payload => try self.airOptionalPayload(inst), + .optional_payload_ptr => try self.airOptionalPayloadPtr(inst), + .unwrap_errunion_err => try self.airUnwrapErrErr(inst), + .unwrap_errunion_payload => try self.airUnwrapErrPayload(inst), + .unwrap_errunion_err_ptr => try self.airUnwrapErrErrPtr(inst), + .unwrap_errunion_payload_ptr=> try self.airUnwrapErrPayloadPtr(inst), + + .wrap_optional => try self.airWrapOptional(inst), + .wrap_errunion_payload => try self.airWrapErrUnionPayload(inst), + .wrap_errunion_err => try self.airWrapErrUnionErr(inst), + // zig fmt: on + } + if (std.debug.runtime_safety) { + if (self.air_bookkeeping < old_air_bookkeeping + 1) { + std.debug.panic("in codegen.zig, handling of AIR instruction %{d} ('{}') did not do proper bookkeeping. Look for a missing call to finishAir.", .{ inst, air_tags[inst] }); + } + } + } +} + +fn dbgSetPrologueEnd(self: *Self) InnerError!void { + switch (self.debug_output) { + .dwarf => |dbg_out| { + try dbg_out.dbg_line.append(DW.LNS.set_prologue_end); + try self.dbgAdvancePCAndLine(self.prev_di_line, self.prev_di_column); + }, + .plan9 => {}, + .none => {}, + } +} + +fn dbgSetEpilogueBegin(self: *Self) InnerError!void { + switch (self.debug_output) { + .dwarf => |dbg_out| { + try dbg_out.dbg_line.append(DW.LNS.set_epilogue_begin); + try self.dbgAdvancePCAndLine(self.prev_di_line, self.prev_di_column); + }, + .plan9 => {}, + .none => {}, + } +} + +fn dbgAdvancePCAndLine(self: *Self, line: u32, column: u32) InnerError!void { + const delta_line = @intCast(i32, line) - @intCast(i32, self.prev_di_line); + const delta_pc: usize = self.code.items.len - self.prev_di_pc; + switch (self.debug_output) { + .dwarf => |dbg_out| { + // TODO Look into using the DWARF special opcodes to compress this data. + // It lets you emit single-byte opcodes that add different numbers to + // both the PC and the line number at the same time. + try dbg_out.dbg_line.ensureUnusedCapacity(11); + dbg_out.dbg_line.appendAssumeCapacity(DW.LNS.advance_pc); + leb128.writeULEB128(dbg_out.dbg_line.writer(), delta_pc) catch unreachable; + if (delta_line != 0) { + dbg_out.dbg_line.appendAssumeCapacity(DW.LNS.advance_line); + leb128.writeILEB128(dbg_out.dbg_line.writer(), delta_line) catch unreachable; + } + dbg_out.dbg_line.appendAssumeCapacity(DW.LNS.copy); + self.prev_di_pc = self.code.items.len; + self.prev_di_line = line; + self.prev_di_column = column; + self.prev_di_pc = self.code.items.len; + }, + .plan9 => |dbg_out| { + if (delta_pc <= 0) return; // only do this when the pc changes + // we have already checked the target in the linker to make sure it is compatable + const quant = @import("../../link/Plan9/aout.zig").getPCQuant(self.target.cpu.arch) catch unreachable; + + // increasing the line number + try @import("../../link/Plan9.zig").changeLine(dbg_out.dbg_line, delta_line); + // increasing the pc + const d_pc_p9 = @intCast(i64, delta_pc) - quant; + if (d_pc_p9 > 0) { + // minus one because if its the last one, we want to leave space to change the line which is one quanta + try dbg_out.dbg_line.append(@intCast(u8, @divExact(d_pc_p9, quant) + 128) - quant); + if (dbg_out.pcop_change_index.*) |pci| + dbg_out.dbg_line.items[pci] += 1; + dbg_out.pcop_change_index.* = @intCast(u32, dbg_out.dbg_line.items.len - 1); + } else if (d_pc_p9 == 0) { + // we don't need to do anything, because adding the quant does it for us + } else unreachable; + if (dbg_out.start_line.* == null) + dbg_out.start_line.* = self.prev_di_line; + dbg_out.end_line.* = line; + // only do this if the pc changed + self.prev_di_line = line; + self.prev_di_column = column; + self.prev_di_pc = self.code.items.len; + }, + .none => {}, + } +} + +/// Asserts there is already capacity to insert into top branch inst_table. +fn processDeath(self: *Self, inst: Air.Inst.Index) void { + const air_tags = self.air.instructions.items(.tag); + if (air_tags[inst] == .constant) return; // Constants are immortal. + // When editing this function, note that the logic must synchronize with `reuseOperand`. + const prev_value = self.getResolvedInstValue(inst); + const branch = &self.branch_stack.items[self.branch_stack.items.len - 1]; + branch.inst_table.putAssumeCapacity(inst, .dead); + switch (prev_value) { + .register => |reg| { + const canon_reg = reg.to64(); + self.register_manager.freeReg(canon_reg); + }, + else => {}, // TODO process stack allocation death + } +} + +/// Called when there are no operands, and the instruction is always unreferenced. +fn finishAirBookkeeping(self: *Self) void { + if (std.debug.runtime_safety) { + self.air_bookkeeping += 1; + } +} + +fn finishAir(self: *Self, inst: Air.Inst.Index, result: MCValue, operands: [Liveness.bpi - 1]Air.Inst.Ref) void { + var tomb_bits = self.liveness.getTombBits(inst); + for (operands) |op| { + const dies = @truncate(u1, tomb_bits) != 0; + tomb_bits >>= 1; + if (!dies) continue; + const op_int = @enumToInt(op); + if (op_int < Air.Inst.Ref.typed_value_map.len) continue; + const op_index = @intCast(Air.Inst.Index, op_int - Air.Inst.Ref.typed_value_map.len); + self.processDeath(op_index); + } + const is_used = @truncate(u1, tomb_bits) == 0; + if (is_used) { + log.debug("%{d} => {}", .{ inst, result }); + const branch = &self.branch_stack.items[self.branch_stack.items.len - 1]; + branch.inst_table.putAssumeCapacityNoClobber(inst, result); + + switch (result) { + .register => |reg| { + // In some cases (such as bitcast), an operand + // may be the same MCValue as the result. If + // that operand died and was a register, it + // was freed by processDeath. We have to + // "re-allocate" the register. + if (self.register_manager.isRegFree(reg)) { + self.register_manager.getRegAssumeFree(reg, inst); + } + }, + else => {}, + } + } + self.finishAirBookkeeping(); +} + +fn ensureProcessDeathCapacity(self: *Self, additional_count: usize) !void { + const table = &self.branch_stack.items[self.branch_stack.items.len - 1].inst_table; + try table.ensureUnusedCapacity(self.gpa, additional_count); +} + +/// Adds a Type to the .debug_info at the current position. The bytes will be populated later, +/// after codegen for this symbol is done. +fn addDbgInfoTypeReloc(self: *Self, ty: Type) !void { + switch (self.debug_output) { + .dwarf => |dbg_out| { + assert(ty.hasCodeGenBits()); + const index = dbg_out.dbg_info.items.len; + try dbg_out.dbg_info.resize(index + 4); // DW.AT.type, DW.FORM.ref4 + + const gop = try dbg_out.dbg_info_type_relocs.getOrPut(self.gpa, ty); + if (!gop.found_existing) { + gop.value_ptr.* = .{ + .off = undefined, + .relocs = .{}, + }; + } + try gop.value_ptr.relocs.append(self.gpa, @intCast(u32, index)); + }, + .plan9 => {}, + .none => {}, + } +} + +fn allocMem(self: *Self, inst: Air.Inst.Index, abi_size: u32, abi_align: u32) !u32 { + if (abi_align > self.stack_align) + self.stack_align = abi_align; + // TODO find a free slot instead of always appending + const offset = mem.alignForwardGeneric(u32, self.next_stack_offset, abi_align); + self.next_stack_offset = offset + abi_size; + if (self.next_stack_offset > self.max_end_stack) + self.max_end_stack = self.next_stack_offset; + try self.stack.putNoClobber(self.gpa, offset, .{ + .inst = inst, + .size = abi_size, + }); + return offset; +} + +/// Use a pointer instruction as the basis for allocating stack memory. +fn allocMemPtr(self: *Self, inst: Air.Inst.Index) !u32 { + const elem_ty = self.air.typeOfIndex(inst).elemType(); + const abi_size = math.cast(u32, elem_ty.abiSize(self.target.*)) catch { + return self.fail("type '{}' too big to fit into stack frame", .{elem_ty}); + }; + // TODO swap this for inst.ty.ptrAlign + const abi_align = elem_ty.abiAlignment(self.target.*); + return self.allocMem(inst, abi_size, abi_align); +} + +fn allocRegOrMem(self: *Self, inst: Air.Inst.Index, reg_ok: bool) !MCValue { + const elem_ty = self.air.typeOfIndex(inst); + const abi_size = math.cast(u32, elem_ty.abiSize(self.target.*)) catch { + return self.fail("type '{}' too big to fit into stack frame", .{elem_ty}); + }; + const abi_align = elem_ty.abiAlignment(self.target.*); + if (abi_align > self.stack_align) + self.stack_align = abi_align; + + if (reg_ok) { + // Make sure the type can fit in a register before we try to allocate one. + const ptr_bits = self.target.cpu.arch.ptrBitWidth(); + const ptr_bytes: u64 = @divExact(ptr_bits, 8); + if (abi_size <= ptr_bytes) { + if (self.register_manager.tryAllocReg(inst, &.{})) |reg| { + return MCValue{ .register = registerAlias(reg, abi_size) }; + } + } + } + const stack_offset = try self.allocMem(inst, abi_size, abi_align); + return MCValue{ .stack_offset = stack_offset }; +} + +pub fn spillInstruction(self: *Self, reg: Register, inst: Air.Inst.Index) !void { + const stack_mcv = try self.allocRegOrMem(inst, false); + log.debug("spilling {d} to stack mcv {any}", .{ inst, stack_mcv }); + const reg_mcv = self.getResolvedInstValue(inst); + assert(reg == reg_mcv.register.to64()); + const branch = &self.branch_stack.items[self.branch_stack.items.len - 1]; + try branch.inst_table.put(self.gpa, inst, stack_mcv); + try self.genSetStack(self.air.typeOfIndex(inst), stack_mcv.stack_offset, reg_mcv); +} + +/// Copies a value to a register without tracking the register. The register is not considered +/// allocated. A second call to `copyToTmpRegister` may return the same register. +/// This can have a side effect of spilling instructions to the stack to free up a register. +fn copyToTmpRegister(self: *Self, ty: Type, mcv: MCValue) !Register { + const reg = try self.register_manager.allocReg(null, &.{}); + try self.genSetReg(ty, reg, mcv); + return reg; +} + +/// Allocates a new register and copies `mcv` into it. +/// `reg_owner` is the instruction that gets associated with the register in the register table. +/// This can have a side effect of spilling instructions to the stack to free up a register. +fn copyToNewRegister(self: *Self, reg_owner: Air.Inst.Index, mcv: MCValue) !MCValue { + const reg = try self.register_manager.allocReg(reg_owner, &.{}); + try self.genSetReg(self.air.typeOfIndex(reg_owner), reg, mcv); + return MCValue{ .register = reg }; +} + +fn airAlloc(self: *Self, inst: Air.Inst.Index) !void { + const stack_offset = try self.allocMemPtr(inst); + return self.finishAir(inst, .{ .ptr_stack_offset = stack_offset }, .{ .none, .none, .none }); +} + +fn airRetPtr(self: *Self, inst: Air.Inst.Index) !void { + const stack_offset = try self.allocMemPtr(inst); + return self.finishAir(inst, .{ .ptr_stack_offset = stack_offset }, .{ .none, .none, .none }); +} + +fn airFptrunc(self: *Self, inst: Air.Inst.Index) !void { + const ty_op = self.air.instructions.items(.data)[inst].ty_op; + _ = ty_op; + return self.fail("TODO implement airFptrunc for {}", .{self.target.cpu.arch}); + // return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); +} + +fn airFpext(self: *Self, inst: Air.Inst.Index) !void { + const ty_op = self.air.instructions.items(.data)[inst].ty_op; + _ = ty_op; + return self.fail("TODO implement airFpext for {}", .{self.target.cpu.arch}); + // return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); +} + +fn airIntCast(self: *Self, inst: Air.Inst.Index) !void { + const ty_op = self.air.instructions.items(.data)[inst].ty_op; + if (self.liveness.isUnused(inst)) + return self.finishAir(inst, .dead, .{ ty_op.operand, .none, .none }); + + const operand_ty = self.air.typeOf(ty_op.operand); + const operand = try self.resolveInst(ty_op.operand); + const info_a = operand_ty.intInfo(self.target.*); + const info_b = self.air.typeOfIndex(inst).intInfo(self.target.*); + if (info_a.signedness != info_b.signedness) + return self.fail("TODO gen intcast sign safety in semantic analysis", .{}); + + if (info_a.bits == info_b.bits) + return self.finishAir(inst, operand, .{ ty_op.operand, .none, .none }); + + return self.fail("TODO implement intCast for {}", .{self.target.cpu.arch}); +} + +fn airTrunc(self: *Self, inst: Air.Inst.Index) !void { + const ty_op = self.air.instructions.items(.data)[inst].ty_op; + if (self.liveness.isUnused(inst)) + return self.finishAir(inst, .dead, .{ ty_op.operand, .none, .none }); + + const operand = try self.resolveInst(ty_op.operand); + _ = operand; + return self.fail("TODO implement trunc for {}", .{self.target.cpu.arch}); +} + +fn airBoolToInt(self: *Self, inst: Air.Inst.Index) !void { + const un_op = self.air.instructions.items(.data)[inst].un_op; + const operand = try self.resolveInst(un_op); + const result: MCValue = if (self.liveness.isUnused(inst)) .dead else operand; + return self.finishAir(inst, result, .{ un_op, .none, .none }); +} + +fn airNot(self: *Self, inst: Air.Inst.Index) !void { + const ty_op = self.air.instructions.items(.data)[inst].ty_op; + const result: MCValue = if (self.liveness.isUnused(inst)) .dead else result: { + const operand = try self.resolveInst(ty_op.operand); + switch (operand) { + .dead => unreachable, + .unreach => unreachable, + .compare_flags_unsigned => |op| { + const r = MCValue{ + .compare_flags_unsigned = switch (op) { + .gte => .lt, + .gt => .lte, + .neq => .eq, + .lt => .gte, + .lte => .gt, + .eq => .neq, + }, + }; + break :result r; + }, + .compare_flags_signed => |op| { + const r = MCValue{ + .compare_flags_signed = switch (op) { + .gte => .lt, + .gt => .lte, + .neq => .eq, + .lt => .gte, + .lte => .gt, + .eq => .neq, + }, + }; + break :result r; + }, + else => {}, + } + break :result try self.genX8664BinMath(inst, ty_op.operand, .bool_true); + }; + return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); +} + +fn airMin(self: *Self, inst: Air.Inst.Index) !void { + const bin_op = self.air.instructions.items(.data)[inst].bin_op; + const result: MCValue = if (self.liveness.isUnused(inst)) + .dead + else + return self.fail("TODO implement min for {}", .{self.target.cpu.arch}); + return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none }); +} + +fn airMax(self: *Self, inst: Air.Inst.Index) !void { + const bin_op = self.air.instructions.items(.data)[inst].bin_op; + const result: MCValue = if (self.liveness.isUnused(inst)) + .dead + else + return self.fail("TODO implement max for {}", .{self.target.cpu.arch}); + return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none }); +} + +fn airSlice(self: *Self, inst: Air.Inst.Index) !void { + const ty_pl = self.air.instructions.items(.data)[inst].ty_pl; + const bin_op = self.air.extraData(Air.Bin, ty_pl.payload).data; + const result: MCValue = if (self.liveness.isUnused(inst)) + .dead + else + return self.fail("TODO implement slice for {}", .{self.target.cpu.arch}); + return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none }); +} + +fn airAdd(self: *Self, inst: Air.Inst.Index) !void { + const bin_op = self.air.instructions.items(.data)[inst].bin_op; + const result: MCValue = if (self.liveness.isUnused(inst)) + .dead + else + try self.genX8664BinMath(inst, bin_op.lhs, bin_op.rhs); + return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none }); +} + +fn airAddWrap(self: *Self, inst: Air.Inst.Index) !void { + const bin_op = self.air.instructions.items(.data)[inst].bin_op; + const result: MCValue = if (self.liveness.isUnused(inst)) + .dead + else + return self.fail("TODO implement addwrap for {}", .{self.target.cpu.arch}); + return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none }); +} + +fn airAddSat(self: *Self, inst: Air.Inst.Index) !void { + const bin_op = self.air.instructions.items(.data)[inst].bin_op; + const result: MCValue = if (self.liveness.isUnused(inst)) + .dead + else + return self.fail("TODO implement add_sat for {}", .{self.target.cpu.arch}); + return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none }); +} + +fn airSub(self: *Self, inst: Air.Inst.Index) !void { + const bin_op = self.air.instructions.items(.data)[inst].bin_op; + const result: MCValue = if (self.liveness.isUnused(inst)) + .dead + else + try self.genX8664BinMath(inst, bin_op.lhs, bin_op.rhs); + return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none }); +} + +fn airSubWrap(self: *Self, inst: Air.Inst.Index) !void { + const bin_op = self.air.instructions.items(.data)[inst].bin_op; + const result: MCValue = if (self.liveness.isUnused(inst)) + .dead + else + return self.fail("TODO implement subwrap for {}", .{self.target.cpu.arch}); + return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none }); +} + +fn airSubSat(self: *Self, inst: Air.Inst.Index) !void { + const bin_op = self.air.instructions.items(.data)[inst].bin_op; + const result: MCValue = if (self.liveness.isUnused(inst)) + .dead + else + return self.fail("TODO implement sub_sat for {}", .{self.target.cpu.arch}); + return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none }); +} + +fn airMul(self: *Self, inst: Air.Inst.Index) !void { + const bin_op = self.air.instructions.items(.data)[inst].bin_op; + const result: MCValue = if (self.liveness.isUnused(inst)) + .dead + else + try self.genX8664BinMath(inst, bin_op.lhs, bin_op.rhs); + return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none }); +} + +fn airMulWrap(self: *Self, inst: Air.Inst.Index) !void { + const bin_op = self.air.instructions.items(.data)[inst].bin_op; + const result: MCValue = if (self.liveness.isUnused(inst)) + .dead + else + return self.fail("TODO implement mulwrap for {}", .{self.target.cpu.arch}); + return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none }); +} + +fn airMulSat(self: *Self, inst: Air.Inst.Index) !void { + const bin_op = self.air.instructions.items(.data)[inst].bin_op; + const result: MCValue = if (self.liveness.isUnused(inst)) + .dead + else + return self.fail("TODO implement mul_sat for {}", .{self.target.cpu.arch}); + return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none }); +} + +fn airDiv(self: *Self, inst: Air.Inst.Index) !void { + const bin_op = self.air.instructions.items(.data)[inst].bin_op; + const result: MCValue = if (self.liveness.isUnused(inst)) + .dead + else + return self.fail("TODO implement div for {}", .{self.target.cpu.arch}); + return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none }); +} + +fn airRem(self: *Self, inst: Air.Inst.Index) !void { + const bin_op = self.air.instructions.items(.data)[inst].bin_op; + const result: MCValue = if (self.liveness.isUnused(inst)) + .dead + else + return self.fail("TODO implement rem for {}", .{self.target.cpu.arch}); + return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none }); +} + +fn airMod(self: *Self, inst: Air.Inst.Index) !void { + const bin_op = self.air.instructions.items(.data)[inst].bin_op; + const result: MCValue = if (self.liveness.isUnused(inst)) + .dead + else + return self.fail("TODO implement mod for {}", .{self.target.cpu.arch}); + return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none }); +} + +fn airBitAnd(self: *Self, inst: Air.Inst.Index) !void { + const bin_op = self.air.instructions.items(.data)[inst].bin_op; + const result: MCValue = if (self.liveness.isUnused(inst)) + .dead + else + try self.genX8664BinMath(inst, bin_op.lhs, bin_op.rhs); + return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none }); +} + +fn airBitOr(self: *Self, inst: Air.Inst.Index) !void { + const bin_op = self.air.instructions.items(.data)[inst].bin_op; + const result: MCValue = if (self.liveness.isUnused(inst)) + .dead + else + try self.genX8664BinMath(inst, bin_op.lhs, bin_op.rhs); + return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none }); +} + +fn airXor(self: *Self, inst: Air.Inst.Index) !void { + const bin_op = self.air.instructions.items(.data)[inst].bin_op; + const result: MCValue = if (self.liveness.isUnused(inst)) + .dead + else + return self.fail("TODO implement xor for {}", .{self.target.cpu.arch}); + return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none }); +} + +fn airShl(self: *Self, inst: Air.Inst.Index) !void { + const bin_op = self.air.instructions.items(.data)[inst].bin_op; + const result: MCValue = if (self.liveness.isUnused(inst)) + .dead + else + return self.fail("TODO implement shl for {}", .{self.target.cpu.arch}); + return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none }); +} + +fn airShlSat(self: *Self, inst: Air.Inst.Index) !void { + const bin_op = self.air.instructions.items(.data)[inst].bin_op; + const result: MCValue = if (self.liveness.isUnused(inst)) + .dead + else + return self.fail("TODO implement shl_sat for {}", .{self.target.cpu.arch}); + return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none }); +} + +fn airShr(self: *Self, inst: Air.Inst.Index) !void { + const bin_op = self.air.instructions.items(.data)[inst].bin_op; + const result: MCValue = if (self.liveness.isUnused(inst)) + .dead + else + return self.fail("TODO implement shr for {}", .{self.target.cpu.arch}); + return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none }); +} + +fn airOptionalPayload(self: *Self, inst: Air.Inst.Index) !void { + const ty_op = self.air.instructions.items(.data)[inst].ty_op; + const result: MCValue = if (self.liveness.isUnused(inst)) + .dead + else + return self.fail("TODO implement .optional_payload for {}", .{self.target.cpu.arch}); + return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); +} + +fn airOptionalPayloadPtr(self: *Self, inst: Air.Inst.Index) !void { + const ty_op = self.air.instructions.items(.data)[inst].ty_op; + const result: MCValue = if (self.liveness.isUnused(inst)) + .dead + else + return self.fail("TODO implement .optional_payload_ptr for {}", .{self.target.cpu.arch}); + return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); +} + +fn airUnwrapErrErr(self: *Self, inst: Air.Inst.Index) !void { + const ty_op = self.air.instructions.items(.data)[inst].ty_op; + const result: MCValue = if (self.liveness.isUnused(inst)) + .dead + else + return self.fail("TODO implement unwrap error union error for {}", .{self.target.cpu.arch}); + return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); +} + +fn airUnwrapErrPayload(self: *Self, inst: Air.Inst.Index) !void { + const ty_op = self.air.instructions.items(.data)[inst].ty_op; + const result: MCValue = if (self.liveness.isUnused(inst)) + .dead + else + return self.fail("TODO implement unwrap error union payload for {}", .{self.target.cpu.arch}); + return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); +} + +// *(E!T) -> E +fn airUnwrapErrErrPtr(self: *Self, inst: Air.Inst.Index) !void { + const ty_op = self.air.instructions.items(.data)[inst].ty_op; + const result: MCValue = if (self.liveness.isUnused(inst)) + .dead + else + return self.fail("TODO implement unwrap error union error ptr for {}", .{self.target.cpu.arch}); + return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); +} + +// *(E!T) -> *T +fn airUnwrapErrPayloadPtr(self: *Self, inst: Air.Inst.Index) !void { + const ty_op = self.air.instructions.items(.data)[inst].ty_op; + const result: MCValue = if (self.liveness.isUnused(inst)) + .dead + else + return self.fail("TODO implement unwrap error union payload ptr for {}", .{self.target.cpu.arch}); + return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); +} + +fn airWrapOptional(self: *Self, inst: Air.Inst.Index) !void { + const ty_op = self.air.instructions.items(.data)[inst].ty_op; + const result: MCValue = if (self.liveness.isUnused(inst)) .dead else result: { + const optional_ty = self.air.typeOfIndex(inst); + + // Optional with a zero-bit payload type is just a boolean true + if (optional_ty.abiSize(self.target.*) == 1) + break :result MCValue{ .immediate = 1 }; + + return self.fail("TODO implement wrap optional for {}", .{self.target.cpu.arch}); + }; + return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); +} + +/// T to E!T +fn airWrapErrUnionPayload(self: *Self, inst: Air.Inst.Index) !void { + const ty_op = self.air.instructions.items(.data)[inst].ty_op; + const result: MCValue = if (self.liveness.isUnused(inst)) + .dead + else + return self.fail("TODO implement wrap errunion payload for {}", .{self.target.cpu.arch}); + return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); +} + +/// E to E!T +fn airWrapErrUnionErr(self: *Self, inst: Air.Inst.Index) !void { + const ty_op = self.air.instructions.items(.data)[inst].ty_op; + const result: MCValue = if (self.liveness.isUnused(inst)) + .dead + else + return self.fail("TODO implement wrap errunion error for {}", .{self.target.cpu.arch}); + return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); +} + +fn airSlicePtr(self: *Self, inst: Air.Inst.Index) !void { + const ty_op = self.air.instructions.items(.data)[inst].ty_op; + const result: MCValue = if (self.liveness.isUnused(inst)) + .dead + else + return self.fail("TODO implement slice_ptr for {}", .{self.target.cpu.arch}); + return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); +} + +fn airSliceLen(self: *Self, inst: Air.Inst.Index) !void { + const ty_op = self.air.instructions.items(.data)[inst].ty_op; + const result: MCValue = if (self.liveness.isUnused(inst)) + .dead + else + return self.fail("TODO implement slice_len for {}", .{self.target.cpu.arch}); + return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); +} + +fn airPtrSliceLenPtr(self: *Self, inst: Air.Inst.Index) !void { + const ty_op = self.air.instructions.items(.data)[inst].ty_op; + const result: MCValue = if (self.liveness.isUnused(inst)) + .dead + else + return self.fail("TODO implement ptr_slice_len_ptr for {}", .{self.target.cpu.arch}); + return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); +} + +fn airPtrSlicePtrPtr(self: *Self, inst: Air.Inst.Index) !void { + const ty_op = self.air.instructions.items(.data)[inst].ty_op; + const result: MCValue = if (self.liveness.isUnused(inst)) + .dead + else + return self.fail("TODO implement ptr_slice_ptr_ptr for {}", .{self.target.cpu.arch}); + return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); +} + +fn airSliceElemVal(self: *Self, inst: Air.Inst.Index) !void { + const is_volatile = false; // TODO + const bin_op = self.air.instructions.items(.data)[inst].bin_op; + const result: MCValue = if (!is_volatile and self.liveness.isUnused(inst)) + .dead + else + return self.fail("TODO implement slice_elem_val for {}", .{self.target.cpu.arch}); + return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none }); +} + +fn airSliceElemPtr(self: *Self, inst: Air.Inst.Index) !void { + const ty_pl = self.air.instructions.items(.data)[inst].ty_pl; + const extra = self.air.extraData(Air.Bin, ty_pl.payload).data; + const result: MCValue = if (self.liveness.isUnused(inst)) + .dead + else + return self.fail("TODO implement slice_elem_ptr for {}", .{self.target.cpu.arch}); + return self.finishAir(inst, result, .{ extra.lhs, extra.rhs, .none }); +} + +fn airArrayElemVal(self: *Self, inst: Air.Inst.Index) !void { + const bin_op = self.air.instructions.items(.data)[inst].bin_op; + const result: MCValue = if (self.liveness.isUnused(inst)) + .dead + else + return self.fail("TODO implement array_elem_val for {}", .{self.target.cpu.arch}); + return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none }); +} + +fn airPtrElemVal(self: *Self, inst: Air.Inst.Index) !void { + const is_volatile = false; // TODO + const bin_op = self.air.instructions.items(.data)[inst].bin_op; + const result: MCValue = if (!is_volatile and self.liveness.isUnused(inst)) + .dead + else + return self.fail("TODO implement ptr_elem_val for {}", .{self.target.cpu.arch}); + return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none }); +} + +fn airPtrElemPtr(self: *Self, inst: Air.Inst.Index) !void { + const ty_pl = self.air.instructions.items(.data)[inst].ty_pl; + const extra = self.air.extraData(Air.Bin, ty_pl.payload).data; + const result: MCValue = if (self.liveness.isUnused(inst)) + .dead + else + return self.fail("TODO implement ptr_elem_ptr for {}", .{self.target.cpu.arch}); + return self.finishAir(inst, result, .{ extra.lhs, extra.rhs, .none }); +} + +fn airSetUnionTag(self: *Self, inst: Air.Inst.Index) !void { + const bin_op = self.air.instructions.items(.data)[inst].bin_op; + _ = bin_op; + return self.fail("TODO implement airSetUnionTag for {}", .{self.target.cpu.arch}); +} + +fn airGetUnionTag(self: *Self, inst: Air.Inst.Index) !void { + const ty_op = self.air.instructions.items(.data)[inst].ty_op; + const result: MCValue = if (self.liveness.isUnused(inst)) + .dead + else + return self.fail("TODO implement airGetUnionTag for {}", .{self.target.cpu.arch}); + return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); +} + +fn airClz(self: *Self, inst: Air.Inst.Index) !void { + const ty_op = self.air.instructions.items(.data)[inst].ty_op; + const result: MCValue = if (self.liveness.isUnused(inst)) + .dead + else + return self.fail("TODO implement airClz for {}", .{self.target.cpu.arch}); + return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); +} + +fn airCtz(self: *Self, inst: Air.Inst.Index) !void { + const ty_op = self.air.instructions.items(.data)[inst].ty_op; + const result: MCValue = if (self.liveness.isUnused(inst)) + .dead + else + return self.fail("TODO implement airCtz for {}", .{self.target.cpu.arch}); + return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); +} + +fn airPopcount(self: *Self, inst: Air.Inst.Index) !void { + const ty_op = self.air.instructions.items(.data)[inst].ty_op; + const result: MCValue = if (self.liveness.isUnused(inst)) + .dead + else + return self.fail("TODO implement airPopcount for {}", .{self.target.cpu.arch}); + return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); +} + +fn reuseOperand(self: *Self, inst: Air.Inst.Index, operand: Air.Inst.Ref, op_index: Liveness.OperandInt, mcv: MCValue) bool { + if (!self.liveness.operandDies(inst, op_index)) + return false; + + switch (mcv) { + .register => |reg| { + // If it's in the registers table, need to associate the register with the + // new instruction. + if (reg.allocIndex()) |index| { + if (!self.register_manager.isRegFree(reg)) { + self.register_manager.registers[index] = inst; + } + } + log.debug("%{d} => {} (reused)", .{ inst, reg }); + }, + .stack_offset => |off| { + log.debug("%{d} => stack offset {d} (reused)", .{ inst, off }); + }, + else => return false, + } + + // Prevent the operand deaths processing code from deallocating it. + self.liveness.clearOperandDeath(inst, op_index); + + // That makes us responsible for doing the rest of the stuff that processDeath would have done. + const branch = &self.branch_stack.items[self.branch_stack.items.len - 1]; + branch.inst_table.putAssumeCapacity(Air.refToIndex(operand).?, .dead); + + return true; +} + +fn load(self: *Self, dst_mcv: MCValue, ptr: MCValue, ptr_ty: Type) InnerError!void { + const elem_ty = ptr_ty.elemType(); + switch (ptr) { + .none => unreachable, + .undef => unreachable, + .unreach => unreachable, + .dead => unreachable, + .compare_flags_unsigned => unreachable, + .compare_flags_signed => unreachable, + .immediate => |imm| try self.setRegOrMem(elem_ty, dst_mcv, .{ .memory = imm }), + .ptr_stack_offset => |off| try self.setRegOrMem(elem_ty, dst_mcv, .{ .stack_offset = off }), + .ptr_embedded_in_code => |off| { + try self.setRegOrMem(elem_ty, dst_mcv, .{ .embedded_in_code = off }); + }, + .embedded_in_code => { + return self.fail("TODO implement loading from MCValue.embedded_in_code", .{}); + }, + .register => { + return self.fail("TODO implement loading from MCValue.register for {}", .{self.target.cpu.arch}); + }, + .memory => |addr| { + const reg = try self.register_manager.allocReg(null, &.{}); + try self.genSetReg(ptr_ty, reg, .{ .memory = addr }); + try self.load(dst_mcv, .{ .register = reg }, ptr_ty); + }, + .stack_offset => { + return self.fail("TODO implement loading from MCValue.stack_offset", .{}); + }, + } +} + +fn airLoad(self: *Self, inst: Air.Inst.Index) !void { + const ty_op = self.air.instructions.items(.data)[inst].ty_op; + const elem_ty = self.air.typeOfIndex(inst); + const result: MCValue = result: { + if (!elem_ty.hasCodeGenBits()) + break :result MCValue.none; + + const ptr = try self.resolveInst(ty_op.operand); + const is_volatile = self.air.typeOf(ty_op.operand).isVolatilePtr(); + if (self.liveness.isUnused(inst) and !is_volatile) + break :result MCValue.dead; + + const dst_mcv: MCValue = blk: { + if (self.reuseOperand(inst, ty_op.operand, 0, ptr)) { + // The MCValue that holds the pointer can be re-used as the value. + break :blk ptr; + } else { + break :blk try self.allocRegOrMem(inst, true); + } + }; + try self.load(dst_mcv, ptr, self.air.typeOf(ty_op.operand)); + break :result dst_mcv; + }; + return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); +} + +fn airStore(self: *Self, inst: Air.Inst.Index) !void { + const bin_op = self.air.instructions.items(.data)[inst].bin_op; + const ptr = try self.resolveInst(bin_op.lhs); + const value = try self.resolveInst(bin_op.rhs); + const elem_ty = self.air.typeOf(bin_op.rhs); + switch (ptr) { + .none => unreachable, + .undef => unreachable, + .unreach => unreachable, + .dead => unreachable, + .compare_flags_unsigned => unreachable, + .compare_flags_signed => unreachable, + .immediate => |imm| { + try self.setRegOrMem(elem_ty, .{ .memory = imm }, value); + }, + .ptr_stack_offset => |off| { + try self.genSetStack(elem_ty, off, value); + }, + .ptr_embedded_in_code => |off| { + try self.setRegOrMem(elem_ty, .{ .embedded_in_code = off }, value); + }, + .embedded_in_code => { + return self.fail("TODO implement storing to MCValue.embedded_in_code", .{}); + }, + .register => { + return self.fail("TODO implement storing to MCValue.register", .{}); + }, + .memory => { + return self.fail("TODO implement storing to MCValue.memory", .{}); + }, + .stack_offset => { + return self.fail("TODO implement storing to MCValue.stack_offset", .{}); + }, + } + return self.finishAir(inst, .dead, .{ bin_op.lhs, bin_op.rhs, .none }); +} + +fn airStructFieldPtr(self: *Self, inst: Air.Inst.Index) !void { + const ty_pl = self.air.instructions.items(.data)[inst].ty_pl; + const extra = self.air.extraData(Air.StructField, ty_pl.payload).data; + return self.structFieldPtr(extra.struct_operand, ty_pl.ty, extra.field_index); +} + +fn airStructFieldPtrIndex(self: *Self, inst: Air.Inst.Index, index: u8) !void { + const ty_op = self.air.instructions.items(.data)[inst].ty_op; + return self.structFieldPtr(ty_op.operand, ty_op.ty, index); +} +fn structFieldPtr(self: *Self, operand: Air.Inst.Ref, ty: Air.Inst.Ref, index: u32) !void { + _ = self; + _ = operand; + _ = ty; + _ = index; + return self.fail("TODO implement codegen struct_field_ptr", .{}); + //return self.finishAir(inst, result, .{ extra.struct_ptr, .none, .none }); +} + +fn airStructFieldVal(self: *Self, inst: Air.Inst.Index) !void { + const ty_pl = self.air.instructions.items(.data)[inst].ty_pl; + const extra = self.air.extraData(Air.StructField, ty_pl.payload).data; + _ = extra; + return self.fail("TODO implement codegen struct_field_val", .{}); + //return self.finishAir(inst, result, .{ extra.struct_ptr, .none, .none }); +} + +/// Perform "binary" operators, excluding comparisons. +/// Currently, the following ops are supported: +/// ADD, SUB, XOR, OR, AND +fn genX8664BinMath(self: *Self, inst: Air.Inst.Index, op_lhs: Air.Inst.Ref, op_rhs: Air.Inst.Ref) !MCValue { + // We'll handle these ops in two steps. + // 1) Prepare an output location (register or memory) + // This location will be the location of the operand that dies (if one exists) + // or just a temporary register (if one doesn't exist) + // 2) Perform the op with the other argument + // 3) Sometimes, the output location is memory but the op doesn't support it. + // In this case, copy that location to a register, then perform the op to that register instead. + // + // TODO: make this algorithm less bad + + try self.code.ensureUnusedCapacity(8); + + const lhs = try self.resolveInst(op_lhs); + const rhs = try self.resolveInst(op_rhs); + + // There are 2 operands, destination and source. + // Either one, but not both, can be a memory operand. + // Source operand can be an immediate, 8 bits or 32 bits. + // So, if either one of the operands dies with this instruction, we can use it + // as the result MCValue. + var dst_mcv: MCValue = undefined; + var src_mcv: MCValue = undefined; + var src_inst: Air.Inst.Ref = undefined; + if (self.reuseOperand(inst, op_lhs, 0, lhs)) { + // LHS dies; use it as the destination. + // Both operands cannot be memory. + src_inst = op_rhs; + if (lhs.isMemory() and rhs.isMemory()) { + dst_mcv = try self.copyToNewRegister(inst, lhs); + src_mcv = rhs; + } else { + dst_mcv = lhs; + src_mcv = rhs; + } + } else if (self.reuseOperand(inst, op_rhs, 1, rhs)) { + // RHS dies; use it as the destination. + // Both operands cannot be memory. + src_inst = op_lhs; + if (lhs.isMemory() and rhs.isMemory()) { + dst_mcv = try self.copyToNewRegister(inst, rhs); + src_mcv = lhs; + } else { + dst_mcv = rhs; + src_mcv = lhs; + } + } else { + if (lhs.isMemory()) { + dst_mcv = try self.copyToNewRegister(inst, lhs); + src_mcv = rhs; + src_inst = op_rhs; + } else { + dst_mcv = try self.copyToNewRegister(inst, rhs); + src_mcv = lhs; + src_inst = op_lhs; + } + } + // This instruction supports only signed 32-bit immediates at most. If the immediate + // value is larger than this, we put it in a register. + // A potential opportunity for future optimization here would be keeping track + // of the fact that the instruction is available both as an immediate + // and as a register. + switch (src_mcv) { + .immediate => |imm| { + if (imm > math.maxInt(u31)) { + src_mcv = MCValue{ .register = try self.copyToTmpRegister(Type.initTag(.u64), src_mcv) }; + } + }, + else => {}, + } + + // Now for step 2, we perform the actual op + const inst_ty = self.air.typeOfIndex(inst); + const air_tags = self.air.instructions.items(.tag); + switch (air_tags[inst]) { + // TODO: Generate wrapping and non-wrapping versions separately + .add, .addwrap => try self.genX8664BinMathCode(inst_ty, dst_mcv, src_mcv, 0, 0x00), + .bool_or, .bit_or => try self.genX8664BinMathCode(inst_ty, dst_mcv, src_mcv, 1, 0x08), + .bool_and, .bit_and => try self.genX8664BinMathCode(inst_ty, dst_mcv, src_mcv, 4, 0x20), + .sub, .subwrap => try self.genX8664BinMathCode(inst_ty, dst_mcv, src_mcv, 5, 0x28), + .xor, .not => try self.genX8664BinMathCode(inst_ty, dst_mcv, src_mcv, 6, 0x30), + + .mul, .mulwrap => try self.genX8664Imul(inst_ty, dst_mcv, src_mcv), + else => unreachable, + } + + return dst_mcv; +} + +/// Wrap over Instruction.encodeInto to translate errors +fn encodeX8664Instruction(self: *Self, inst: Instruction) !void { + inst.encodeInto(self.code) catch |err| { + if (err == error.OutOfMemory) + return error.OutOfMemory + else + return self.fail("Instruction.encodeInto failed because {s}", .{@errorName(err)}); + }; +} + +/// This function encodes a binary operation for x86_64 +/// intended for use with the following opcode ranges +/// because they share the same structure. +/// +/// Thus not all binary operations can be used here +/// -- multiplication needs to be done with imul, +/// which doesn't have as convenient an interface. +/// +/// "opx"-style instructions use the opcode extension field to indicate which instruction to execute: +/// +/// opx = /0: add +/// opx = /1: or +/// opx = /2: adc +/// opx = /3: sbb +/// opx = /4: and +/// opx = /5: sub +/// opx = /6: xor +/// opx = /7: cmp +/// +/// opcode | operand shape +/// --------+---------------------- +/// 80 /opx | *r/m8*, imm8 +/// 81 /opx | *r/m16/32/64*, imm16/32 +/// 83 /opx | *r/m16/32/64*, imm8 +/// +/// "mr"-style instructions use the low bits of opcode to indicate shape of instruction: +/// +/// mr = 00: add +/// mr = 08: or +/// mr = 10: adc +/// mr = 18: sbb +/// mr = 20: and +/// mr = 28: sub +/// mr = 30: xor +/// mr = 38: cmp +/// +/// opcode | operand shape +/// -------+------------------------- +/// mr + 0 | *r/m8*, r8 +/// mr + 1 | *r/m16/32/64*, r16/32/64 +/// mr + 2 | *r8*, r/m8 +/// mr + 3 | *r16/32/64*, r/m16/32/64 +/// mr + 4 | *AL*, imm8 +/// mr + 5 | *rAX*, imm16/32 +/// +/// TODO: rotates and shifts share the same structure, so we can potentially implement them +/// at a later date with very similar code. +/// They have "opx"-style instructions, but no "mr"-style instructions. +/// +/// opx = /0: rol, +/// opx = /1: ror, +/// opx = /2: rcl, +/// opx = /3: rcr, +/// opx = /4: shl sal, +/// opx = /5: shr, +/// opx = /6: sal shl, +/// opx = /7: sar, +/// +/// opcode | operand shape +/// --------+------------------ +/// c0 /opx | *r/m8*, imm8 +/// c1 /opx | *r/m16/32/64*, imm8 +/// d0 /opx | *r/m8*, 1 +/// d1 /opx | *r/m16/32/64*, 1 +/// d2 /opx | *r/m8*, CL (for context, CL is register 1) +/// d3 /opx | *r/m16/32/64*, CL (for context, CL is register 1) +fn genX8664BinMathCode( + self: *Self, + dst_ty: Type, + dst_mcv: MCValue, + src_mcv: MCValue, + opx: u3, + mr: u8, +) !void { + switch (dst_mcv) { + .none => unreachable, + .undef => unreachable, + .dead, .unreach, .immediate => unreachable, + .compare_flags_unsigned => unreachable, + .compare_flags_signed => unreachable, + .ptr_stack_offset => unreachable, + .ptr_embedded_in_code => unreachable, + .register => |dst_reg| { + switch (src_mcv) { + .none => unreachable, + .undef => try self.genSetReg(dst_ty, dst_reg, .undef), + .dead, .unreach => unreachable, + .ptr_stack_offset => unreachable, + .ptr_embedded_in_code => unreachable, + .register => |src_reg| { + // for register, register use mr + 1 + // addressing mode: *r/m16/32/64*, r16/32/64 + const abi_size = dst_ty.abiSize(self.target.*); + const encoder = try Encoder.init(self.code, 3); + encoder.rex(.{ + .w = abi_size == 8, + .r = src_reg.isExtended(), + .b = dst_reg.isExtended(), + }); + encoder.opcode_1byte(mr + 1); + encoder.modRm_direct( + src_reg.low_id(), + dst_reg.low_id(), + ); + }, + .immediate => |imm| { + // register, immediate use opx = 81 or 83 addressing modes: + // opx = 81: r/m16/32/64, imm16/32 + // opx = 83: r/m16/32/64, imm8 + const imm32 = @intCast(i32, imm); // This case must be handled before calling genX8664BinMathCode. + if (imm32 <= math.maxInt(i8)) { + const abi_size = dst_ty.abiSize(self.target.*); + const encoder = try Encoder.init(self.code, 4); + encoder.rex(.{ + .w = abi_size == 8, + .b = dst_reg.isExtended(), + }); + encoder.opcode_1byte(0x83); + encoder.modRm_direct( + opx, + dst_reg.low_id(), + ); + encoder.imm8(@intCast(i8, imm32)); + } else { + const abi_size = dst_ty.abiSize(self.target.*); + const encoder = try Encoder.init(self.code, 7); + encoder.rex(.{ + .w = abi_size == 8, + .b = dst_reg.isExtended(), + }); + encoder.opcode_1byte(0x81); + encoder.modRm_direct( + opx, + dst_reg.low_id(), + ); + encoder.imm32(@intCast(i32, imm32)); + } + }, + .embedded_in_code, .memory => { + return self.fail("TODO implement x86 ADD/SUB/CMP source memory", .{}); + }, + .stack_offset => |off| { + // register, indirect use mr + 3 + // addressing mode: *r16/32/64*, r/m16/32/64 + const abi_size = dst_ty.abiSize(self.target.*); + const adj_off = off + abi_size; + if (off > math.maxInt(i32)) { + return self.fail("stack offset too large", .{}); + } + const encoder = try Encoder.init(self.code, 7); + encoder.rex(.{ + .w = abi_size == 8, + .r = dst_reg.isExtended(), + }); + encoder.opcode_1byte(mr + 3); + if (adj_off <= std.math.maxInt(i8)) { + encoder.modRm_indirectDisp8( + dst_reg.low_id(), + Register.ebp.low_id(), + ); + encoder.disp8(-@intCast(i8, adj_off)); + } else { + encoder.modRm_indirectDisp32( + dst_reg.low_id(), + Register.ebp.low_id(), + ); + encoder.disp32(-@intCast(i32, adj_off)); + } + }, + .compare_flags_unsigned => { + return self.fail("TODO implement x86 ADD/SUB/CMP source compare flag (unsigned)", .{}); + }, + .compare_flags_signed => { + return self.fail("TODO implement x86 ADD/SUB/CMP source compare flag (signed)", .{}); + }, + } + }, + .stack_offset => |off| { + switch (src_mcv) { + .none => unreachable, + .undef => return self.genSetStack(dst_ty, off, .undef), + .dead, .unreach => unreachable, + .ptr_stack_offset => unreachable, + .ptr_embedded_in_code => unreachable, + .register => |src_reg| { + try self.genX8664ModRMRegToStack(dst_ty, off, src_reg, mr + 0x1); + }, + .immediate => |imm| { + _ = imm; + return self.fail("TODO implement x86 ADD/SUB/CMP source immediate", .{}); + }, + .embedded_in_code, .memory, .stack_offset => { + return self.fail("TODO implement x86 ADD/SUB/CMP source memory", .{}); + }, + .compare_flags_unsigned => { + return self.fail("TODO implement x86 ADD/SUB/CMP source compare flag (unsigned)", .{}); + }, + .compare_flags_signed => { + return self.fail("TODO implement x86 ADD/SUB/CMP source compare flag (signed)", .{}); + }, + } + }, + .embedded_in_code, .memory => { + return self.fail("TODO implement x86 ADD/SUB/CMP destination memory", .{}); + }, + } +} + +/// Performs integer multiplication between dst_mcv and src_mcv, storing the result in dst_mcv. +fn genX8664Imul( + self: *Self, + dst_ty: Type, + dst_mcv: MCValue, + src_mcv: MCValue, +) !void { + switch (dst_mcv) { + .none => unreachable, + .undef => unreachable, + .dead, .unreach, .immediate => unreachable, + .compare_flags_unsigned => unreachable, + .compare_flags_signed => unreachable, + .ptr_stack_offset => unreachable, + .ptr_embedded_in_code => unreachable, + .register => |dst_reg| { + switch (src_mcv) { + .none => unreachable, + .undef => try self.genSetReg(dst_ty, dst_reg, .undef), + .dead, .unreach => unreachable, + .ptr_stack_offset => unreachable, + .ptr_embedded_in_code => unreachable, + .register => |src_reg| { + // register, register + // + // Use the following imul opcode + // 0F AF /r: IMUL r32/64, r/m32/64 + const abi_size = dst_ty.abiSize(self.target.*); + const encoder = try Encoder.init(self.code, 4); + encoder.rex(.{ + .w = abi_size == 8, + .r = dst_reg.isExtended(), + .b = src_reg.isExtended(), + }); + encoder.opcode_2byte(0x0f, 0xaf); + encoder.modRm_direct( + dst_reg.low_id(), + src_reg.low_id(), + ); + }, + .immediate => |imm| { + // register, immediate: + // depends on size of immediate. + // + // immediate fits in i8: + // 6B /r ib: IMUL r32/64, r/m32/64, imm8 + // + // immediate fits in i32: + // 69 /r id: IMUL r32/64, r/m32/64, imm32 + // + // immediate is huge: + // split into 2 instructions + // 1) copy the 64 bit immediate into a tmp register + // 2) perform register,register mul + // 0F AF /r: IMUL r32/64, r/m32/64 + if (math.minInt(i8) <= imm and imm <= math.maxInt(i8)) { + const abi_size = dst_ty.abiSize(self.target.*); + const encoder = try Encoder.init(self.code, 4); + encoder.rex(.{ + .w = abi_size == 8, + .r = dst_reg.isExtended(), + .b = dst_reg.isExtended(), + }); + encoder.opcode_1byte(0x6B); + encoder.modRm_direct( + dst_reg.low_id(), + dst_reg.low_id(), + ); + encoder.imm8(@intCast(i8, imm)); + } else if (math.minInt(i32) <= imm and imm <= math.maxInt(i32)) { + const abi_size = dst_ty.abiSize(self.target.*); + const encoder = try Encoder.init(self.code, 7); + encoder.rex(.{ + .w = abi_size == 8, + .r = dst_reg.isExtended(), + .b = dst_reg.isExtended(), + }); + encoder.opcode_1byte(0x69); + encoder.modRm_direct( + dst_reg.low_id(), + dst_reg.low_id(), + ); + encoder.imm32(@intCast(i32, imm)); + } else { + const src_reg = try self.copyToTmpRegister(dst_ty, src_mcv); + return self.genX8664Imul(dst_ty, dst_mcv, MCValue{ .register = src_reg }); + } + }, + .embedded_in_code, .memory, .stack_offset => { + return self.fail("TODO implement x86 multiply source memory", .{}); + }, + .compare_flags_unsigned => { + return self.fail("TODO implement x86 multiply source compare flag (unsigned)", .{}); + }, + .compare_flags_signed => { + return self.fail("TODO implement x86 multiply source compare flag (signed)", .{}); + }, + } + }, + .stack_offset => |off| { + switch (src_mcv) { + .none => unreachable, + .undef => return self.genSetStack(dst_ty, off, .undef), + .dead, .unreach => unreachable, + .ptr_stack_offset => unreachable, + .ptr_embedded_in_code => unreachable, + .register => |src_reg| { + // copy dst to a register + const dst_reg = try self.copyToTmpRegister(dst_ty, dst_mcv); + // multiply into dst_reg + // register, register + // Use the following imul opcode + // 0F AF /r: IMUL r32/64, r/m32/64 + const abi_size = dst_ty.abiSize(self.target.*); + const encoder = try Encoder.init(self.code, 4); + encoder.rex(.{ + .w = abi_size == 8, + .r = dst_reg.isExtended(), + .b = src_reg.isExtended(), + }); + encoder.opcode_2byte(0x0f, 0xaf); + encoder.modRm_direct( + dst_reg.low_id(), + src_reg.low_id(), + ); + // copy dst_reg back out + return self.genSetStack(dst_ty, off, MCValue{ .register = dst_reg }); + }, + .immediate => |imm| { + _ = imm; + return self.fail("TODO implement x86 multiply source immediate", .{}); + }, + .embedded_in_code, .memory, .stack_offset => { + return self.fail("TODO implement x86 multiply source memory", .{}); + }, + .compare_flags_unsigned => { + return self.fail("TODO implement x86 multiply source compare flag (unsigned)", .{}); + }, + .compare_flags_signed => { + return self.fail("TODO implement x86 multiply source compare flag (signed)", .{}); + }, + } + }, + .embedded_in_code, .memory => { + return self.fail("TODO implement x86 multiply destination memory", .{}); + }, + } +} + +fn genX8664ModRMRegToStack(self: *Self, ty: Type, off: u32, reg: Register, opcode: u8) !void { + const abi_size = ty.abiSize(self.target.*); + const adj_off = off + abi_size; + if (off > math.maxInt(i32)) { + return self.fail("stack offset too large", .{}); + } + + const i_adj_off = -@intCast(i32, adj_off); + const encoder = try Encoder.init(self.code, 7); + encoder.rex(.{ + .w = abi_size == 8, + .r = reg.isExtended(), + }); + encoder.opcode_1byte(opcode); + if (i_adj_off < std.math.maxInt(i8)) { + // example: 48 89 55 7f mov QWORD PTR [rbp+0x7f],rdx + encoder.modRm_indirectDisp8( + reg.low_id(), + Register.ebp.low_id(), + ); + encoder.disp8(@intCast(i8, i_adj_off)); + } else { + // example: 48 89 95 80 00 00 00 mov QWORD PTR [rbp+0x80],rdx + encoder.modRm_indirectDisp32( + reg.low_id(), + Register.ebp.low_id(), + ); + encoder.disp32(i_adj_off); + } +} + +fn genArgDbgInfo(self: *Self, inst: Air.Inst.Index, mcv: MCValue) !void { + const ty_str = self.air.instructions.items(.data)[inst].ty_str; + const zir = &self.mod_fn.owner_decl.getFileScope().zir; + const name = zir.nullTerminatedString(ty_str.str); + const name_with_null = name.ptr[0 .. name.len + 1]; + const ty = self.air.getRefType(ty_str.ty); + + switch (mcv) { + .register => |reg| { + switch (self.debug_output) { + .dwarf => |dbg_out| { + try dbg_out.dbg_info.ensureUnusedCapacity(3); + dbg_out.dbg_info.appendAssumeCapacity(link.File.Elf.abbrev_parameter); + dbg_out.dbg_info.appendSliceAssumeCapacity(&[2]u8{ // DW.AT.location, DW.FORM.exprloc + 1, // ULEB128 dwarf expression length + reg.dwarfLocOp(), + }); + try dbg_out.dbg_info.ensureUnusedCapacity(5 + name_with_null.len); + try self.addDbgInfoTypeReloc(ty); // DW.AT.type, DW.FORM.ref4 + dbg_out.dbg_info.appendSliceAssumeCapacity(name_with_null); // DW.AT.name, DW.FORM.string + }, + .plan9 => {}, + .none => {}, + } + }, + .stack_offset => { + switch (self.debug_output) { + .dwarf => {}, + .plan9 => {}, + .none => {}, + } + }, + else => {}, + } +} + +fn airArg(self: *Self, inst: Air.Inst.Index) !void { + const arg_index = self.arg_index; + self.arg_index += 1; + + const ty = self.air.typeOfIndex(inst); + _ = ty; + + const mcv = self.args[arg_index]; + try self.genArgDbgInfo(inst, mcv); + + if (self.liveness.isUnused(inst)) + return self.finishAirBookkeeping(); + + switch (mcv) { + .register => |reg| { + self.register_manager.getRegAssumeFree(reg.to64(), inst); + }, + else => {}, + } + + return self.finishAir(inst, mcv, .{ .none, .none, .none }); +} + +fn airBreakpoint(self: *Self) !void { + try self.code.append(0xcc); // int3 + return self.finishAirBookkeeping(); +} + +fn airFence(self: *Self) !void { + return self.fail("TODO implement fence() for {}", .{self.target.cpu.arch}); + //return self.finishAirBookkeeping(); +} + +fn airCall(self: *Self, inst: Air.Inst.Index) !void { + const pl_op = self.air.instructions.items(.data)[inst].pl_op; + const fn_ty = self.air.typeOf(pl_op.operand); + const callee = pl_op.operand; + const extra = self.air.extraData(Air.Call, pl_op.payload); + const args = @bitCast([]const Air.Inst.Ref, self.air.extra[extra.end..][0..extra.data.args_len]); + + var info = try self.resolveCallingConventionValues(fn_ty); + defer info.deinit(self); + + // Due to incremental compilation, how function calls are generated depends + // on linking. + if (self.bin_file.tag == link.File.Elf.base_tag or self.bin_file.tag == link.File.Coff.base_tag) { + for (info.args) |mc_arg, arg_i| { + const arg = args[arg_i]; + const arg_ty = self.air.typeOf(arg); + const arg_mcv = try self.resolveInst(args[arg_i]); + // Here we do not use setRegOrMem even though the logic is similar, because + // the function call will move the stack pointer, so the offsets are different. + switch (mc_arg) { + .none => continue, + .register => |reg| { + try self.register_manager.getReg(reg, null); + try self.genSetReg(arg_ty, reg, arg_mcv); + }, + .stack_offset => |off| { + // Here we need to emit instructions like this: + // mov qword ptr [rsp + stack_offset], x + try self.genSetStack(arg_ty, off, arg_mcv); + }, + .ptr_stack_offset => { + return self.fail("TODO implement calling with MCValue.ptr_stack_offset arg", .{}); + }, + .ptr_embedded_in_code => { + return self.fail("TODO implement calling with MCValue.ptr_embedded_in_code arg", .{}); + }, + .undef => unreachable, + .immediate => unreachable, + .unreach => unreachable, + .dead => unreachable, + .embedded_in_code => unreachable, + .memory => unreachable, + .compare_flags_signed => unreachable, + .compare_flags_unsigned => unreachable, + } + } + + if (self.air.value(callee)) |func_value| { + if (func_value.castTag(.function)) |func_payload| { + const func = func_payload.data; + + const ptr_bits = self.target.cpu.arch.ptrBitWidth(); + const ptr_bytes: u64 = @divExact(ptr_bits, 8); + const got_addr = if (self.bin_file.cast(link.File.Elf)) |elf_file| blk: { + const got = &elf_file.program_headers.items[elf_file.phdr_got_index.?]; + break :blk @intCast(u32, got.p_vaddr + func.owner_decl.link.elf.offset_table_index * ptr_bytes); + } else if (self.bin_file.cast(link.File.Coff)) |coff_file| + @intCast(u32, coff_file.offset_table_virtual_address + func.owner_decl.link.coff.offset_table_index * ptr_bytes) + else + unreachable; + + // ff 14 25 xx xx xx xx call [addr] + try self.code.ensureUnusedCapacity(7); + self.code.appendSliceAssumeCapacity(&[3]u8{ 0xff, 0x14, 0x25 }); + mem.writeIntLittle(u32, self.code.addManyAsArrayAssumeCapacity(4), got_addr); + } else if (func_value.castTag(.extern_fn)) |_| { + return self.fail("TODO implement calling extern functions", .{}); + } else { + return self.fail("TODO implement calling bitcasted functions", .{}); + } + } else { + return self.fail("TODO implement calling runtime known function pointer", .{}); + } + } else if (self.bin_file.cast(link.File.MachO)) |macho_file| { + for (info.args) |mc_arg, arg_i| { + const arg = args[arg_i]; + const arg_ty = self.air.typeOf(arg); + const arg_mcv = try self.resolveInst(args[arg_i]); + // Here we do not use setRegOrMem even though the logic is similar, because + // the function call will move the stack pointer, so the offsets are different. + switch (mc_arg) { + .none => continue, + .register => |reg| { + // TODO prevent this macho if block to be generated for all archs + try self.register_manager.getReg(reg, null); + try self.genSetReg(arg_ty, reg, arg_mcv); + }, + .stack_offset => { + // Here we need to emit instructions like this: + // mov qword ptr [rsp + stack_offset], x + return self.fail("TODO implement calling with parameters in memory", .{}); + }, + .ptr_stack_offset => { + return self.fail("TODO implement calling with MCValue.ptr_stack_offset arg", .{}); + }, + .ptr_embedded_in_code => { + return self.fail("TODO implement calling with MCValue.ptr_embedded_in_code arg", .{}); + }, + .undef => unreachable, + .immediate => unreachable, + .unreach => unreachable, + .dead => unreachable, + .embedded_in_code => unreachable, + .memory => unreachable, + .compare_flags_signed => unreachable, + .compare_flags_unsigned => unreachable, + } + } + + if (self.air.value(callee)) |func_value| { + if (func_value.castTag(.function)) |func_payload| { + const func = func_payload.data; + // TODO I'm hacking my way through here by repurposing .memory for storing + // index to the GOT target symbol index. + try self.genSetReg(Type.initTag(.u64), .rax, .{ + .memory = func.owner_decl.link.macho.local_sym_index, + }); + // callq *%rax + try self.code.ensureUnusedCapacity(2); + self.code.appendSliceAssumeCapacity(&[2]u8{ 0xff, 0xd0 }); + } else if (func_value.castTag(.extern_fn)) |func_payload| { + const decl = func_payload.data; + const n_strx = try macho_file.addExternFn(mem.spanZ(decl.name)); + const offset = blk: { + // callq + try self.code.ensureUnusedCapacity(5); + self.code.appendSliceAssumeCapacity(&[5]u8{ 0xe8, 0x0, 0x0, 0x0, 0x0 }); + break :blk @intCast(u32, self.code.items.len) - 4; + }; + // Add relocation to the decl. + try macho_file.active_decl.?.link.macho.relocs.append(self.bin_file.allocator, .{ + .offset = offset, + .target = .{ .global = n_strx }, + .addend = 0, + .subtractor = null, + .pcrel = true, + .length = 2, + .@"type" = @enumToInt(std.macho.reloc_type_x86_64.X86_64_RELOC_BRANCH), + }); + } else { + return self.fail("TODO implement calling bitcasted functions", .{}); + } + } else { + return self.fail("TODO implement calling runtime known function pointer", .{}); + } + } else if (self.bin_file.cast(link.File.Plan9)) |p9| { + for (info.args) |mc_arg, arg_i| { + const arg = args[arg_i]; + const arg_ty = self.air.typeOf(arg); + const arg_mcv = try self.resolveInst(args[arg_i]); + // Here we do not use setRegOrMem even though the logic is similar, because + // the function call will move the stack pointer, so the offsets are different. + switch (mc_arg) { + .none => continue, + .register => |reg| { + try self.register_manager.getReg(reg, null); + try self.genSetReg(arg_ty, reg, arg_mcv); + }, + .stack_offset => { + // Here we need to emit instructions like this: + // mov qword ptr [rsp + stack_offset], x + return self.fail("TODO implement calling with parameters in memory", .{}); + }, + .ptr_stack_offset => { + return self.fail("TODO implement calling with MCValue.ptr_stack_offset arg", .{}); + }, + .ptr_embedded_in_code => { + return self.fail("TODO implement calling with MCValue.ptr_embedded_in_code arg", .{}); + }, + .undef => unreachable, + .immediate => unreachable, + .unreach => unreachable, + .dead => unreachable, + .embedded_in_code => unreachable, + .memory => unreachable, + .compare_flags_signed => unreachable, + .compare_flags_unsigned => unreachable, + } + } + if (self.air.value(callee)) |func_value| { + if (func_value.castTag(.function)) |func_payload| { + try p9.seeDecl(func_payload.data.owner_decl); + const ptr_bits = self.target.cpu.arch.ptrBitWidth(); + const ptr_bytes: u64 = @divExact(ptr_bits, 8); + const got_addr = p9.bases.data; + const got_index = func_payload.data.owner_decl.link.plan9.got_index.?; + // ff 14 25 xx xx xx xx call [addr] + try self.code.ensureUnusedCapacity(7); + self.code.appendSliceAssumeCapacity(&[3]u8{ 0xff, 0x14, 0x25 }); + const fn_got_addr = got_addr + got_index * ptr_bytes; + mem.writeIntLittle(u32, self.code.addManyAsArrayAssumeCapacity(4), @intCast(u32, fn_got_addr)); + } else return self.fail("TODO implement calling extern fn on plan9", .{}); + } else { + return self.fail("TODO implement calling runtime known function pointer", .{}); + } + } else unreachable; + + const result: MCValue = result: { + switch (info.return_value) { + .register => |reg| { + if (Register.allocIndex(reg) == null) { + // Save function return value in a callee saved register + break :result try self.copyToNewRegister(inst, info.return_value); + } + }, + else => {}, + } + break :result info.return_value; + }; + + if (args.len <= Liveness.bpi - 2) { + var buf = [1]Air.Inst.Ref{.none} ** (Liveness.bpi - 1); + buf[0] = callee; + std.mem.copy(Air.Inst.Ref, buf[1..], args); + return self.finishAir(inst, result, buf); + } + var bt = try self.iterateBigTomb(inst, 1 + args.len); + bt.feed(callee); + for (args) |arg| { + bt.feed(arg); + } + return bt.finishAir(result); +} + +fn ret(self: *Self, mcv: MCValue) !void { + const ret_ty = self.fn_type.fnReturnType(); + try self.setRegOrMem(ret_ty, self.ret_mcv, mcv); + // TODO when implementing defer, this will need to jump to the appropriate defer expression. + // TODO optimization opportunity: figure out when we can emit this as a 2 byte instruction + // which is available if the jump is 127 bytes or less forward. + try self.code.resize(self.code.items.len + 5); + self.code.items[self.code.items.len - 5] = 0xe9; // jmp rel32 + try self.exitlude_jump_relocs.append(self.gpa, self.code.items.len - 4); +} + +fn airRet(self: *Self, inst: Air.Inst.Index) !void { + const un_op = self.air.instructions.items(.data)[inst].un_op; + const operand = try self.resolveInst(un_op); + try self.ret(operand); + return self.finishAir(inst, .dead, .{ un_op, .none, .none }); +} + +fn airRetLoad(self: *Self, inst: Air.Inst.Index) !void { + const un_op = self.air.instructions.items(.data)[inst].un_op; + const ptr = try self.resolveInst(un_op); + _ = ptr; + return self.fail("TODO implement airRetLoad for {}", .{self.target.cpu.arch}); + //return self.finishAir(inst, .dead, .{ un_op, .none, .none }); +} + +fn airCmp(self: *Self, inst: Air.Inst.Index, op: math.CompareOperator) !void { + const bin_op = self.air.instructions.items(.data)[inst].bin_op; + if (self.liveness.isUnused(inst)) + return self.finishAir(inst, .dead, .{ bin_op.lhs, bin_op.rhs, .none }); + const ty = self.air.typeOf(bin_op.lhs); + assert(ty.eql(self.air.typeOf(bin_op.rhs))); + if (ty.zigTypeTag() == .ErrorSet) + return self.fail("TODO implement cmp for errors", .{}); + + const lhs = try self.resolveInst(bin_op.lhs); + const rhs = try self.resolveInst(bin_op.rhs); + const result: MCValue = result: { + try self.code.ensureUnusedCapacity(8); + + // There are 2 operands, destination and source. + // Either one, but not both, can be a memory operand. + // Source operand can be an immediate, 8 bits or 32 bits. + const dst_mcv = if (lhs.isImmediate() or (lhs.isMemory() and rhs.isMemory())) + try self.copyToNewRegister(inst, lhs) + else + lhs; + // This instruction supports only signed 32-bit immediates at most. + const src_mcv = try self.limitImmediateType(bin_op.rhs, i32); + + try self.genX8664BinMathCode(Type.initTag(.bool), dst_mcv, src_mcv, 7, 0x38); + break :result switch (ty.isSignedInt()) { + true => MCValue{ .compare_flags_signed = op }, + false => MCValue{ .compare_flags_unsigned = op }, + }; + }; + return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none }); +} + +fn airDbgStmt(self: *Self, inst: Air.Inst.Index) !void { + const dbg_stmt = self.air.instructions.items(.data)[inst].dbg_stmt; + try self.dbgAdvancePCAndLine(dbg_stmt.line, dbg_stmt.column); + return self.finishAirBookkeeping(); +} + +fn airCondBr(self: *Self, inst: Air.Inst.Index) !void { + const pl_op = self.air.instructions.items(.data)[inst].pl_op; + const cond = try self.resolveInst(pl_op.operand); + const extra = self.air.extraData(Air.CondBr, pl_op.payload); + const then_body = self.air.extra[extra.end..][0..extra.data.then_body_len]; + const else_body = self.air.extra[extra.end + then_body.len ..][0..extra.data.else_body_len]; + const liveness_condbr = self.liveness.getCondBr(inst); + + const reloc: Reloc = reloc: { + try self.code.ensureUnusedCapacity(6); + + const opcode: u8 = switch (cond) { + .compare_flags_signed => |cmp_op| blk: { + // Here we map to the opposite opcode because the jump is to the false branch. + const opcode: u8 = switch (cmp_op) { + .gte => 0x8c, + .gt => 0x8e, + .neq => 0x84, + .lt => 0x8d, + .lte => 0x8f, + .eq => 0x85, + }; + break :blk opcode; + }, + .compare_flags_unsigned => |cmp_op| blk: { + // Here we map to the opposite opcode because the jump is to the false branch. + const opcode: u8 = switch (cmp_op) { + .gte => 0x82, + .gt => 0x86, + .neq => 0x84, + .lt => 0x83, + .lte => 0x87, + .eq => 0x85, + }; + break :blk opcode; + }, + .register => |reg| blk: { + // test reg, 1 + // TODO detect al, ax, eax + const encoder = try Encoder.init(self.code, 4); + encoder.rex(.{ + // TODO audit this codegen: we force w = true here to make + // the value affect the big register + .w = true, + .b = reg.isExtended(), + }); + encoder.opcode_1byte(0xf6); + encoder.modRm_direct( + 0, + reg.low_id(), + ); + encoder.disp8(1); + break :blk 0x84; + }, + else => return self.fail("TODO implement condbr {s} when condition is {s}", .{ self.target.cpu.arch, @tagName(cond) }), + }; + self.code.appendSliceAssumeCapacity(&[_]u8{ 0x0f, opcode }); + const reloc = Reloc{ .rel32 = self.code.items.len }; + self.code.items.len += 4; + break :reloc reloc; + }; + + // Capture the state of register and stack allocation state so that we can revert to it. + const parent_next_stack_offset = self.next_stack_offset; + const parent_free_registers = self.register_manager.free_registers; + var parent_stack = try self.stack.clone(self.gpa); + defer parent_stack.deinit(self.gpa); + const parent_registers = self.register_manager.registers; + + try self.branch_stack.append(.{}); + + try self.ensureProcessDeathCapacity(liveness_condbr.then_deaths.len); + for (liveness_condbr.then_deaths) |operand| { + self.processDeath(operand); + } + try self.genBody(then_body); + + // Revert to the previous register and stack allocation state. + + var saved_then_branch = self.branch_stack.pop(); + defer saved_then_branch.deinit(self.gpa); + + self.register_manager.registers = parent_registers; + + self.stack.deinit(self.gpa); + self.stack = parent_stack; + parent_stack = .{}; + + self.next_stack_offset = parent_next_stack_offset; + self.register_manager.free_registers = parent_free_registers; + + try self.performReloc(reloc); + const else_branch = self.branch_stack.addOneAssumeCapacity(); + else_branch.* = .{}; + + try self.ensureProcessDeathCapacity(liveness_condbr.else_deaths.len); + for (liveness_condbr.else_deaths) |operand| { + self.processDeath(operand); + } + try self.genBody(else_body); + + // At this point, each branch will possibly have conflicting values for where + // each instruction is stored. They agree, however, on which instructions are alive/dead. + // We use the first ("then") branch as canonical, and here emit + // instructions into the second ("else") branch to make it conform. + // We continue respect the data structure semantic guarantees of the else_branch so + // that we can use all the code emitting abstractions. This is why at the bottom we + // assert that parent_branch.free_registers equals the saved_then_branch.free_registers + // rather than assigning it. + const parent_branch = &self.branch_stack.items[self.branch_stack.items.len - 2]; + try parent_branch.inst_table.ensureUnusedCapacity(self.gpa, else_branch.inst_table.count()); + + const else_slice = else_branch.inst_table.entries.slice(); + const else_keys = else_slice.items(.key); + const else_values = else_slice.items(.value); + for (else_keys) |else_key, else_idx| { + const else_value = else_values[else_idx]; + const canon_mcv = if (saved_then_branch.inst_table.fetchSwapRemove(else_key)) |then_entry| blk: { + // The instruction's MCValue is overridden in both branches. + parent_branch.inst_table.putAssumeCapacity(else_key, then_entry.value); + if (else_value == .dead) { + assert(then_entry.value == .dead); + continue; + } + break :blk then_entry.value; + } else blk: { + if (else_value == .dead) + continue; + // The instruction is only overridden in the else branch. + var i: usize = self.branch_stack.items.len - 2; + while (true) { + i -= 1; // If this overflows, the question is: why wasn't the instruction marked dead? + if (self.branch_stack.items[i].inst_table.get(else_key)) |mcv| { + assert(mcv != .dead); + break :blk mcv; + } + } + }; + log.debug("consolidating else_entry {d} {}=>{}", .{ else_key, else_value, canon_mcv }); + // TODO make sure the destination stack offset / register does not already have something + // going on there. + try self.setRegOrMem(self.air.typeOfIndex(else_key), canon_mcv, else_value); + // TODO track the new register / stack allocation + } + try parent_branch.inst_table.ensureUnusedCapacity(self.gpa, saved_then_branch.inst_table.count()); + const then_slice = saved_then_branch.inst_table.entries.slice(); + const then_keys = then_slice.items(.key); + const then_values = then_slice.items(.value); + for (then_keys) |then_key, then_idx| { + const then_value = then_values[then_idx]; + // We already deleted the items from this table that matched the else_branch. + // So these are all instructions that are only overridden in the then branch. + parent_branch.inst_table.putAssumeCapacity(then_key, then_value); + if (then_value == .dead) + continue; + const parent_mcv = blk: { + var i: usize = self.branch_stack.items.len - 2; + while (true) { + i -= 1; + if (self.branch_stack.items[i].inst_table.get(then_key)) |mcv| { + assert(mcv != .dead); + break :blk mcv; + } + } + }; + log.debug("consolidating then_entry {d} {}=>{}", .{ then_key, parent_mcv, then_value }); + // TODO make sure the destination stack offset / register does not already have something + // going on there. + try self.setRegOrMem(self.air.typeOfIndex(then_key), parent_mcv, then_value); + // TODO track the new register / stack allocation + } + + self.branch_stack.pop().deinit(self.gpa); + + return self.finishAir(inst, .unreach, .{ pl_op.operand, .none, .none }); +} + +fn isNull(self: *Self, operand: MCValue) !MCValue { + _ = operand; + // Here you can specialize this instruction if it makes sense to, otherwise the default + // will call isNonNull and invert the result. + return self.fail("TODO call isNonNull and invert the result", .{}); +} + +fn isNonNull(self: *Self, operand: MCValue) !MCValue { + _ = operand; + // Here you can specialize this instruction if it makes sense to, otherwise the default + // will call isNull and invert the result. + return self.fail("TODO call isNull and invert the result", .{}); +} + +fn isErr(self: *Self, operand: MCValue) !MCValue { + _ = operand; + // Here you can specialize this instruction if it makes sense to, otherwise the default + // will call isNonNull and invert the result. + return self.fail("TODO call isNonErr and invert the result", .{}); +} + +fn isNonErr(self: *Self, operand: MCValue) !MCValue { + _ = operand; + // Here you can specialize this instruction if it makes sense to, otherwise the default + // will call isNull and invert the result. + return self.fail("TODO call isErr and invert the result", .{}); +} + +fn airIsNull(self: *Self, inst: Air.Inst.Index) !void { + const un_op = self.air.instructions.items(.data)[inst].un_op; + const result: MCValue = if (self.liveness.isUnused(inst)) .dead else result: { + const operand = try self.resolveInst(un_op); + break :result try self.isNull(operand); + }; + return self.finishAir(inst, result, .{ un_op, .none, .none }); +} + +fn airIsNullPtr(self: *Self, inst: Air.Inst.Index) !void { + const un_op = self.air.instructions.items(.data)[inst].un_op; + const result: MCValue = if (self.liveness.isUnused(inst)) .dead else result: { + const operand_ptr = try self.resolveInst(un_op); + const operand: MCValue = blk: { + if (self.reuseOperand(inst, un_op, 0, operand_ptr)) { + // The MCValue that holds the pointer can be re-used as the value. + break :blk operand_ptr; + } else { + break :blk try self.allocRegOrMem(inst, true); + } + }; + try self.load(operand, operand_ptr, self.air.typeOf(un_op)); + break :result try self.isNull(operand); + }; + return self.finishAir(inst, result, .{ un_op, .none, .none }); +} + +fn airIsNonNull(self: *Self, inst: Air.Inst.Index) !void { + const un_op = self.air.instructions.items(.data)[inst].un_op; + const result: MCValue = if (self.liveness.isUnused(inst)) .dead else result: { + const operand = try self.resolveInst(un_op); + break :result try self.isNonNull(operand); + }; + return self.finishAir(inst, result, .{ un_op, .none, .none }); +} + +fn airIsNonNullPtr(self: *Self, inst: Air.Inst.Index) !void { + const un_op = self.air.instructions.items(.data)[inst].un_op; + const result: MCValue = if (self.liveness.isUnused(inst)) .dead else result: { + const operand_ptr = try self.resolveInst(un_op); + const operand: MCValue = blk: { + if (self.reuseOperand(inst, un_op, 0, operand_ptr)) { + // The MCValue that holds the pointer can be re-used as the value. + break :blk operand_ptr; + } else { + break :blk try self.allocRegOrMem(inst, true); + } + }; + try self.load(operand, operand_ptr, self.air.typeOf(un_op)); + break :result try self.isNonNull(operand); + }; + return self.finishAir(inst, result, .{ un_op, .none, .none }); +} + +fn airIsErr(self: *Self, inst: Air.Inst.Index) !void { + const un_op = self.air.instructions.items(.data)[inst].un_op; + const result: MCValue = if (self.liveness.isUnused(inst)) .dead else result: { + const operand = try self.resolveInst(un_op); + break :result try self.isErr(operand); + }; + return self.finishAir(inst, result, .{ un_op, .none, .none }); +} + +fn airIsErrPtr(self: *Self, inst: Air.Inst.Index) !void { + const un_op = self.air.instructions.items(.data)[inst].un_op; + const result: MCValue = if (self.liveness.isUnused(inst)) .dead else result: { + const operand_ptr = try self.resolveInst(un_op); + const operand: MCValue = blk: { + if (self.reuseOperand(inst, un_op, 0, operand_ptr)) { + // The MCValue that holds the pointer can be re-used as the value. + break :blk operand_ptr; + } else { + break :blk try self.allocRegOrMem(inst, true); + } + }; + try self.load(operand, operand_ptr, self.air.typeOf(un_op)); + break :result try self.isErr(operand); + }; + return self.finishAir(inst, result, .{ un_op, .none, .none }); +} + +fn airIsNonErr(self: *Self, inst: Air.Inst.Index) !void { + const un_op = self.air.instructions.items(.data)[inst].un_op; + const result: MCValue = if (self.liveness.isUnused(inst)) .dead else result: { + const operand = try self.resolveInst(un_op); + break :result try self.isNonErr(operand); + }; + return self.finishAir(inst, result, .{ un_op, .none, .none }); +} + +fn airIsNonErrPtr(self: *Self, inst: Air.Inst.Index) !void { + const un_op = self.air.instructions.items(.data)[inst].un_op; + const result: MCValue = if (self.liveness.isUnused(inst)) .dead else result: { + const operand_ptr = try self.resolveInst(un_op); + const operand: MCValue = blk: { + if (self.reuseOperand(inst, un_op, 0, operand_ptr)) { + // The MCValue that holds the pointer can be re-used as the value. + break :blk operand_ptr; + } else { + break :blk try self.allocRegOrMem(inst, true); + } + }; + try self.load(operand, operand_ptr, self.air.typeOf(un_op)); + break :result try self.isNonErr(operand); + }; + return self.finishAir(inst, result, .{ un_op, .none, .none }); +} + +fn airLoop(self: *Self, inst: Air.Inst.Index) !void { + // A loop is a setup to be able to jump back to the beginning. + const ty_pl = self.air.instructions.items(.data)[inst].ty_pl; + const loop = self.air.extraData(Air.Block, ty_pl.payload); + const body = self.air.extra[loop.end..][0..loop.data.body_len]; + const start_index = self.code.items.len; + try self.genBody(body); + try self.jump(start_index); + return self.finishAirBookkeeping(); +} + +/// Send control flow to the `index` of `self.code`. +fn jump(self: *Self, index: usize) !void { + try self.code.ensureUnusedCapacity(5); + if (math.cast(i8, @intCast(i32, index) - (@intCast(i32, self.code.items.len + 2)))) |delta| { + self.code.appendAssumeCapacity(0xeb); // jmp rel8 + self.code.appendAssumeCapacity(@bitCast(u8, delta)); + } else |_| { + const delta = @intCast(i32, index) - (@intCast(i32, self.code.items.len + 5)); + self.code.appendAssumeCapacity(0xe9); // jmp rel32 + mem.writeIntLittle(i32, self.code.addManyAsArrayAssumeCapacity(4), delta); + } +} + +fn airBlock(self: *Self, inst: Air.Inst.Index) !void { + try self.blocks.putNoClobber(self.gpa, inst, .{ + // A block is a setup to be able to jump to the end. + .relocs = .{}, + // It also acts as a receptacle for break operands. + // Here we use `MCValue.none` to represent a null value so that the first + // break instruction will choose a MCValue for the block result and overwrite + // this field. Following break instructions will use that MCValue to put their + // block results. + .mcv = MCValue{ .none = {} }, + }); + const block_data = self.blocks.getPtr(inst).?; + defer block_data.relocs.deinit(self.gpa); + + const ty_pl = self.air.instructions.items(.data)[inst].ty_pl; + const extra = self.air.extraData(Air.Block, ty_pl.payload); + const body = self.air.extra[extra.end..][0..extra.data.body_len]; + try self.genBody(body); + + for (block_data.relocs.items) |reloc| try self.performReloc(reloc); + + const result = @bitCast(MCValue, block_data.mcv); + return self.finishAir(inst, result, .{ .none, .none, .none }); +} + +fn airSwitch(self: *Self, inst: Air.Inst.Index) !void { + const pl_op = self.air.instructions.items(.data)[inst].pl_op; + const condition = pl_op.operand; + _ = condition; + return self.fail("TODO airSwitch for {}", .{self.target.cpu.arch}); + // return self.finishAir(inst, .dead, .{ condition, .none, .none }); +} + +fn performReloc(self: *Self, reloc: Reloc) !void { + switch (reloc) { + .rel32 => |pos| { + const amt = self.code.items.len - (pos + 4); + // Here it would be tempting to implement testing for amt == 0 and then elide the + // jump. However, that will cause a problem because other jumps may assume that they + // can jump to this code. Or maybe I didn't understand something when I was debugging. + // It could be worth another look. Anyway, that's why that isn't done here. Probably the + // best place to elide jumps will be in semantic analysis, by inlining blocks that only + // only have 1 break instruction. + const s32_amt = math.cast(i32, amt) catch + return self.fail("unable to perform relocation: jump too far", .{}); + mem.writeIntLittle(i32, self.code.items[pos..][0..4], s32_amt); + }, + .arm_branch => unreachable, + } +} + +fn airBr(self: *Self, inst: Air.Inst.Index) !void { + const branch = self.air.instructions.items(.data)[inst].br; + try self.br(branch.block_inst, branch.operand); + return self.finishAir(inst, .dead, .{ branch.operand, .none, .none }); +} + +fn airBoolOp(self: *Self, inst: Air.Inst.Index) !void { + const bin_op = self.air.instructions.items(.data)[inst].bin_op; + const air_tags = self.air.instructions.items(.tag); + const result: MCValue = if (self.liveness.isUnused(inst)) + .dead + else switch (air_tags[inst]) { + // lhs AND rhs + .bool_and => try self.genX8664BinMath(inst, bin_op.lhs, bin_op.rhs), + // lhs OR rhs + .bool_or => try self.genX8664BinMath(inst, bin_op.lhs, bin_op.rhs), + else => unreachable, // Not a boolean operation + }; + return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none }); +} + +fn br(self: *Self, block: Air.Inst.Index, operand: Air.Inst.Ref) !void { + const block_data = self.blocks.getPtr(block).?; + + if (self.air.typeOf(operand).hasCodeGenBits()) { + const operand_mcv = try self.resolveInst(operand); + const block_mcv = block_data.mcv; + if (block_mcv == .none) { + block_data.mcv = operand_mcv; + } else { + try self.setRegOrMem(self.air.typeOfIndex(block), block_mcv, operand_mcv); + } + } + return self.brVoid(block); +} + +fn brVoid(self: *Self, block: Air.Inst.Index) !void { + const block_data = self.blocks.getPtr(block).?; + // Emit a jump with a relocation. It will be patched up after the block ends. + try block_data.relocs.ensureUnusedCapacity(self.gpa, 1); + // TODO optimization opportunity: figure out when we can emit this as a 2 byte instruction + // which is available if the jump is 127 bytes or less forward. + try self.code.resize(self.code.items.len + 5); + self.code.items[self.code.items.len - 5] = 0xe9; // jmp rel32 + // Leave the jump offset undefined + block_data.relocs.appendAssumeCapacity(.{ .rel32 = self.code.items.len - 4 }); +} + +fn airAsm(self: *Self, inst: Air.Inst.Index) !void { + const air_datas = self.air.instructions.items(.data); + const air_extra = self.air.extraData(Air.Asm, air_datas[inst].ty_pl.payload); + const zir = self.mod_fn.owner_decl.getFileScope().zir; + const extended = zir.instructions.items(.data)[air_extra.data.zir_index].extended; + const zir_extra = zir.extraData(Zir.Inst.Asm, extended.operand); + const asm_source = zir.nullTerminatedString(zir_extra.data.asm_source); + const outputs_len = @truncate(u5, extended.small); + const args_len = @truncate(u5, extended.small >> 5); + const clobbers_len = @truncate(u5, extended.small >> 10); + _ = clobbers_len; // TODO honor these + const is_volatile = @truncate(u1, extended.small >> 15) != 0; + const outputs = @bitCast([]const Air.Inst.Ref, self.air.extra[air_extra.end..][0..outputs_len]); + const args = @bitCast([]const Air.Inst.Ref, self.air.extra[air_extra.end + outputs.len ..][0..args_len]); + + if (outputs_len > 1) { + return self.fail("TODO implement codegen for asm with more than 1 output", .{}); + } + var extra_i: usize = zir_extra.end; + const output_constraint: ?[]const u8 = out: { + var i: usize = 0; + while (i < outputs_len) : (i += 1) { + const output = zir.extraData(Zir.Inst.Asm.Output, extra_i); + extra_i = output.end; + break :out zir.nullTerminatedString(output.data.constraint); + } + break :out null; + }; + + const dead = !is_volatile and self.liveness.isUnused(inst); + const result: MCValue = if (dead) + .dead + else result: { + for (args) |arg| { + const input = zir.extraData(Zir.Inst.Asm.Input, extra_i); + extra_i = input.end; + const constraint = zir.nullTerminatedString(input.data.constraint); + + if (constraint.len < 3 or constraint[0] != '{' or constraint[constraint.len - 1] != '}') { + return self.fail("unrecognized asm input constraint: '{s}'", .{constraint}); + } + const reg_name = constraint[1 .. constraint.len - 1]; + const reg = parseRegName(reg_name) orelse + return self.fail("unrecognized register: '{s}'", .{reg_name}); + + const arg_mcv = try self.resolveInst(arg); + try self.register_manager.getReg(reg, null); + try self.genSetReg(self.air.typeOf(arg), reg, arg_mcv); + } + + { + var iter = std.mem.tokenize(u8, asm_source, "\n\r"); + while (iter.next()) |ins| { + if (mem.eql(u8, ins, "syscall")) { + try self.code.appendSlice(&[_]u8{ 0x0f, 0x05 }); + } else if (mem.indexOf(u8, ins, "push")) |_| { + const arg = ins[4..]; + if (mem.indexOf(u8, arg, "$")) |l| { + const n = std.fmt.parseInt(u8, ins[4 + l + 1 ..], 10) catch return self.fail("TODO implement more inline asm int parsing", .{}); + try self.code.appendSlice(&.{ 0x6a, n }); + } else if (mem.indexOf(u8, arg, "%%")) |l| { + const reg_name = ins[4 + l + 2 ..]; + const reg = parseRegName(reg_name) orelse + return self.fail("unrecognized register: '{s}'", .{reg_name}); + const low_id: u8 = reg.low_id(); + if (reg.isExtended()) { + try self.code.appendSlice(&.{ 0x41, 0b1010000 | low_id }); + } else { + try self.code.append(0b1010000 | low_id); + } + } else return self.fail("TODO more push operands", .{}); + } else if (mem.indexOf(u8, ins, "pop")) |_| { + const arg = ins[3..]; + if (mem.indexOf(u8, arg, "%%")) |l| { + const reg_name = ins[3 + l + 2 ..]; + const reg = parseRegName(reg_name) orelse + return self.fail("unrecognized register: '{s}'", .{reg_name}); + const low_id: u8 = reg.low_id(); + if (reg.isExtended()) { + try self.code.appendSlice(&.{ 0x41, 0b1011000 | low_id }); + } else { + try self.code.append(0b1011000 | low_id); + } + } else return self.fail("TODO more pop operands", .{}); + } else { + return self.fail("TODO implement support for more x86 assembly instructions", .{}); + } + } + } + + if (output_constraint) |output| { + if (output.len < 4 or output[0] != '=' or output[1] != '{' or output[output.len - 1] != '}') { + return self.fail("unrecognized asm output constraint: '{s}'", .{output}); + } + const reg_name = output[2 .. output.len - 1]; + const reg = parseRegName(reg_name) orelse + return self.fail("unrecognized register: '{s}'", .{reg_name}); + break :result MCValue{ .register = reg }; + } else { + break :result MCValue{ .none = {} }; + } + }; + if (outputs.len + args.len <= Liveness.bpi - 1) { + var buf = [1]Air.Inst.Ref{.none} ** (Liveness.bpi - 1); + std.mem.copy(Air.Inst.Ref, &buf, outputs); + std.mem.copy(Air.Inst.Ref, buf[outputs.len..], args); + return self.finishAir(inst, result, buf); + } + var bt = try self.iterateBigTomb(inst, outputs.len + args.len); + for (outputs) |output| { + bt.feed(output); + } + for (args) |arg| { + bt.feed(arg); + } + return bt.finishAir(result); +} + +fn iterateBigTomb(self: *Self, inst: Air.Inst.Index, operand_count: usize) !BigTomb { + try self.ensureProcessDeathCapacity(operand_count + 1); + return BigTomb{ + .function = self, + .inst = inst, + .tomb_bits = self.liveness.getTombBits(inst), + .big_tomb_bits = self.liveness.special.get(inst) orelse 0, + .bit_index = 0, + }; +} + +/// Sets the value without any modifications to register allocation metadata or stack allocation metadata. +fn setRegOrMem(self: *Self, ty: Type, loc: MCValue, val: MCValue) !void { + switch (loc) { + .none => return, + .register => |reg| return self.genSetReg(ty, reg, val), + .stack_offset => |off| return self.genSetStack(ty, off, val), + .memory => { + return self.fail("TODO implement setRegOrMem for memory", .{}); + }, + else => unreachable, + } +} + +fn genSetStack(self: *Self, ty: Type, stack_offset: u32, mcv: MCValue) InnerError!void { + switch (mcv) { + .dead => unreachable, + .ptr_stack_offset => unreachable, + .ptr_embedded_in_code => unreachable, + .unreach, .none => return, // Nothing to do. + .undef => { + if (!self.wantSafety()) + return; // The already existing value will do just fine. + // TODO Upgrade this to a memset call when we have that available. + switch (ty.abiSize(self.target.*)) { + 1 => return self.genSetStack(ty, stack_offset, .{ .immediate = 0xaa }), + 2 => return self.genSetStack(ty, stack_offset, .{ .immediate = 0xaaaa }), + 4 => return self.genSetStack(ty, stack_offset, .{ .immediate = 0xaaaaaaaa }), + 8 => return self.genSetStack(ty, stack_offset, .{ .immediate = 0xaaaaaaaaaaaaaaaa }), + else => return self.fail("TODO implement memset", .{}), + } + }, + .compare_flags_unsigned => |op| { + _ = op; + return self.fail("TODO implement set stack variable with compare flags value (unsigned)", .{}); + }, + .compare_flags_signed => |op| { + _ = op; + return self.fail("TODO implement set stack variable with compare flags value (signed)", .{}); + }, + .immediate => |x_big| { + const abi_size = ty.abiSize(self.target.*); + const adj_off = stack_offset + abi_size; + if (adj_off > 128) { + return self.fail("TODO implement set stack variable with large stack offset", .{}); + } + try self.code.ensureUnusedCapacity(8); + switch (abi_size) { + 1 => { + return self.fail("TODO implement set abi_size=1 stack variable with immediate", .{}); + }, + 2 => { + return self.fail("TODO implement set abi_size=2 stack variable with immediate", .{}); + }, + 4 => { + const x = @intCast(u32, x_big); + // We have a positive stack offset value but we want a twos complement negative + // offset from rbp, which is at the top of the stack frame. + const negative_offset = @intCast(i8, -@intCast(i32, adj_off)); + const twos_comp = @bitCast(u8, negative_offset); + // mov DWORD PTR [rbp+offset], immediate + self.code.appendSliceAssumeCapacity(&[_]u8{ 0xc7, 0x45, twos_comp }); + mem.writeIntLittle(u32, self.code.addManyAsArrayAssumeCapacity(4), x); + }, + 8 => { + // We have a positive stack offset value but we want a twos complement negative + // offset from rbp, which is at the top of the stack frame. + const negative_offset = @intCast(i8, -@intCast(i32, adj_off)); + const twos_comp = @bitCast(u8, negative_offset); + + // 64 bit write to memory would take two mov's anyways so we + // insted just use two 32 bit writes to avoid register allocation + try self.code.ensureUnusedCapacity(14); + var buf: [8]u8 = undefined; + mem.writeIntLittle(u64, &buf, x_big); + + // mov DWORD PTR [rbp+offset+4], immediate + self.code.appendSliceAssumeCapacity(&[_]u8{ 0xc7, 0x45, twos_comp + 4 }); + self.code.appendSliceAssumeCapacity(buf[4..8]); + + // mov DWORD PTR [rbp+offset], immediate + self.code.appendSliceAssumeCapacity(&[_]u8{ 0xc7, 0x45, twos_comp }); + self.code.appendSliceAssumeCapacity(buf[0..4]); + }, + else => { + return self.fail("TODO implement set abi_size=large stack variable with immediate", .{}); + }, + } + }, + .embedded_in_code => { + // TODO this and `.stack_offset` below need to get improved to support types greater than + // register size, and do general memcpy + const reg = try self.copyToTmpRegister(ty, mcv); + return self.genSetStack(ty, stack_offset, MCValue{ .register = reg }); + }, + .register => |reg| { + try self.genX8664ModRMRegToStack(ty, stack_offset, reg, 0x89); + }, + .memory => |vaddr| { + _ = vaddr; + return self.fail("TODO implement set stack variable from memory vaddr", .{}); + }, + .stack_offset => |off| { + // TODO this and `.embedded_in_code` above need to get improved to support types greater than + // register size, and do general memcpy + + if (stack_offset == off) + return; // Copy stack variable to itself; nothing to do. + + const reg = try self.copyToTmpRegister(ty, mcv); + return self.genSetStack(ty, stack_offset, MCValue{ .register = reg }); + }, + } +} + +fn genSetReg(self: *Self, ty: Type, reg: Register, mcv: MCValue) InnerError!void { + switch (mcv) { + .dead => unreachable, + .ptr_stack_offset => unreachable, + .ptr_embedded_in_code => unreachable, + .unreach, .none => return, // Nothing to do. + .undef => { + if (!self.wantSafety()) + return; // The already existing value will do just fine. + // Write the debug undefined value. + switch (reg.size()) { + 8 => return self.genSetReg(ty, reg, .{ .immediate = 0xaa }), + 16 => return self.genSetReg(ty, reg, .{ .immediate = 0xaaaa }), + 32 => return self.genSetReg(ty, reg, .{ .immediate = 0xaaaaaaaa }), + 64 => return self.genSetReg(ty, reg, .{ .immediate = 0xaaaaaaaaaaaaaaaa }), + else => unreachable, + } + }, + .compare_flags_unsigned => |op| { + const encoder = try Encoder.init(self.code, 7); + // TODO audit this codegen: we force w = true here to make + // the value affect the big register + encoder.rex(.{ + .w = true, + .b = reg.isExtended(), + }); + encoder.opcode_2byte(0x0f, switch (op) { + .gte => 0x93, + .gt => 0x97, + .neq => 0x95, + .lt => 0x92, + .lte => 0x96, + .eq => 0x94, + }); + encoder.modRm_direct( + 0, + reg.low_id(), + ); + }, + .compare_flags_signed => |op| { + _ = op; + return self.fail("TODO set register with compare flags value (signed)", .{}); + }, + .immediate => |x| { + // 32-bit moves zero-extend to 64-bit, so xoring the 32-bit + // register is the fastest way to zero a register. + if (x == 0) { + // The encoding for `xor r32, r32` is `0x31 /r`. + const encoder = try Encoder.init(self.code, 3); + + // If we're accessing e.g. r8d, we need to use a REX prefix before the actual operation. Since + // this is a 32-bit operation, the W flag is set to zero. X is also zero, as we're not using a SIB. + // Both R and B are set, as we're extending, in effect, the register bits *and* the operand. + encoder.rex(.{ + .r = reg.isExtended(), + .b = reg.isExtended(), + }); + encoder.opcode_1byte(0x31); + // Section 3.1.1.1 of the Intel x64 Manual states that "/r indicates that the + // ModR/M byte of the instruction contains a register operand and an r/m operand." + encoder.modRm_direct( + reg.low_id(), + reg.low_id(), + ); + + return; + } + if (x <= math.maxInt(i32)) { + // Next best case: if we set the lower four bytes, the upper four will be zeroed. + // + // The encoding for `mov IMM32 -> REG` is (0xB8 + R) IMM. + + const encoder = try Encoder.init(self.code, 6); + // Just as with XORing, we need a REX prefix. This time though, we only + // need the B bit set, as we're extending the opcode's register field, + // and there is no Mod R/M byte. + encoder.rex(.{ + .b = reg.isExtended(), + }); + encoder.opcode_withReg(0xB8, reg.low_id()); + + // no ModR/M byte + + // IMM + encoder.imm32(@intCast(i32, x)); + return; + } + // Worst case: we need to load the 64-bit register with the IMM. GNU's assemblers calls + // this `movabs`, though this is officially just a different variant of the plain `mov` + // instruction. + // + // This encoding is, in fact, the *same* as the one used for 32-bit loads. The only + // difference is that we set REX.W before the instruction, which extends the load to + // 64-bit and uses the full bit-width of the register. + { + const encoder = try Encoder.init(self.code, 10); + encoder.rex(.{ + .w = true, + .b = reg.isExtended(), + }); + encoder.opcode_withReg(0xB8, reg.low_id()); + encoder.imm64(x); + } + }, + .embedded_in_code => |code_offset| { + // We need the offset from RIP in a signed i32 twos complement. + // The instruction is 7 bytes long and RIP points to the next instruction. + + // 64-bit LEA is encoded as REX.W 8D /r. + const rip = self.code.items.len + 7; + const big_offset = @intCast(i64, code_offset) - @intCast(i64, rip); + const offset = @intCast(i32, big_offset); + const encoder = try Encoder.init(self.code, 7); + + // byte 1, always exists because w = true + encoder.rex(.{ + .w = true, + .r = reg.isExtended(), + }); + // byte 2 + encoder.opcode_1byte(0x8D); + // byte 3 + encoder.modRm_RIPDisp32(reg.low_id()); + // byte 4-7 + encoder.disp32(offset); + + // Double check that we haven't done any math errors + assert(rip == self.code.items.len); + }, + .register => |src_reg| { + // If the registers are the same, nothing to do. + if (src_reg.id() == reg.id()) + return; + + // This is a variant of 8B /r. + const abi_size = ty.abiSize(self.target.*); + const encoder = try Encoder.init(self.code, 3); + encoder.rex(.{ + .w = abi_size == 8, + .r = reg.isExtended(), + .b = src_reg.isExtended(), + }); + encoder.opcode_1byte(0x8B); + encoder.modRm_direct(reg.low_id(), src_reg.low_id()); + }, + .memory => |x| { + if (self.bin_file.options.pie) { + // RIP-relative displacement to the entry in the GOT table. + const abi_size = ty.abiSize(self.target.*); + const encoder = try Encoder.init(self.code, 10); + + // LEA reg, [] + + // We encode the instruction FIRST because prefixes may or may not appear. + // After we encode the instruction, we will know that the displacement bytes + // for [] will be at self.code.items.len - 4. + encoder.rex(.{ + .w = true, // force 64 bit because loading an address (to the GOT) + .r = reg.isExtended(), + }); + encoder.opcode_1byte(0x8D); + encoder.modRm_RIPDisp32(reg.low_id()); + encoder.disp32(0); + + const offset = @intCast(u32, self.code.items.len); + + if (self.bin_file.cast(link.File.MachO)) |macho_file| { + // TODO I think the reloc might be in the wrong place. + const decl = macho_file.active_decl.?; + // Load reloc for LEA instruction. + try decl.link.macho.relocs.append(self.bin_file.allocator, .{ + .offset = offset - 4, + .target = .{ .local = @intCast(u32, x) }, + .addend = 0, + .subtractor = null, + .pcrel = true, + .length = 2, + .@"type" = @enumToInt(std.macho.reloc_type_x86_64.X86_64_RELOC_GOT), + }); + } else { + return self.fail("TODO implement genSetReg for PIE GOT indirection on this platform", .{}); + } + + // MOV reg, [reg] + encoder.rex(.{ + .w = abi_size == 8, + .r = reg.isExtended(), + .b = reg.isExtended(), + }); + encoder.opcode_1byte(0x8B); + encoder.modRm_indirectDisp0(reg.low_id(), reg.low_id()); + } else if (x <= math.maxInt(i32)) { + // Moving from memory to a register is a variant of `8B /r`. + // Since we're using 64-bit moves, we require a REX. + // This variant also requires a SIB, as it would otherwise be RIP-relative. + // We want mode zero with the lower three bits set to four to indicate an SIB with no other displacement. + // The SIB must be 0x25, to indicate a disp32 with no scaled index. + // 0b00RRR100, where RRR is the lower three bits of the register ID. + // The instruction is thus eight bytes; REX 0x8B 0b00RRR100 0x25 followed by a four-byte disp32. + const abi_size = ty.abiSize(self.target.*); + const encoder = try Encoder.init(self.code, 8); + encoder.rex(.{ + .w = abi_size == 8, + .r = reg.isExtended(), + }); + encoder.opcode_1byte(0x8B); + // effective address = [SIB] + encoder.modRm_SIBDisp0(reg.low_id()); + // SIB = disp32 + encoder.sib_disp32(); + encoder.disp32(@intCast(i32, x)); + } else { + // If this is RAX, we can use a direct load; otherwise, we need to load the address, then indirectly load + // the value. + if (reg.id() == 0) { + // REX.W 0xA1 moffs64* + // moffs64* is a 64-bit offset "relative to segment base", which really just means the + // absolute address for all practical purposes. + + const encoder = try Encoder.init(self.code, 10); + encoder.rex(.{ + .w = true, + }); + encoder.opcode_1byte(0xA1); + encoder.writeIntLittle(u64, x); + } else { + // This requires two instructions; a move imm as used above, followed by an indirect load using the register + // as the address and the register as the destination. + // + // This cannot be used if the lower three bits of the id are equal to four or five, as there + // is no way to possibly encode it. This means that RSP, RBP, R12, and R13 cannot be used with + // this instruction. + const id3 = @truncate(u3, reg.id()); + assert(id3 != 4 and id3 != 5); + + // Rather than duplicate the logic used for the move, we just use a self-call with a new MCValue. + try self.genSetReg(ty, reg, MCValue{ .immediate = x }); + + // Now, the register contains the address of the value to load into it + // Currently, we're only allowing 64-bit registers, so we need the `REX.W 8B /r` variant. + // TODO: determine whether to allow other sized registers, and if so, handle them properly. + + // mov reg, [reg] + const abi_size = ty.abiSize(self.target.*); + const encoder = try Encoder.init(self.code, 3); + encoder.rex(.{ + .w = abi_size == 8, + .r = reg.isExtended(), + .b = reg.isExtended(), + }); + encoder.opcode_1byte(0x8B); + encoder.modRm_indirectDisp0(reg.low_id(), reg.low_id()); + } + } + }, + .stack_offset => |unadjusted_off| { + const abi_size = ty.abiSize(self.target.*); + const off = unadjusted_off + abi_size; + if (off < std.math.minInt(i32) or off > std.math.maxInt(i32)) { + return self.fail("stack offset too large", .{}); + } + const ioff = -@intCast(i32, off); + const encoder = try Encoder.init(self.code, 3); + encoder.rex(.{ + .w = abi_size == 8, + .r = reg.isExtended(), + }); + encoder.opcode_1byte(0x8B); + if (std.math.minInt(i8) <= ioff and ioff <= std.math.maxInt(i8)) { + // Example: 48 8b 4d 7f mov rcx,QWORD PTR [rbp+0x7f] + encoder.modRm_indirectDisp8(reg.low_id(), Register.ebp.low_id()); + encoder.disp8(@intCast(i8, ioff)); + } else { + // Example: 48 8b 8d 80 00 00 00 mov rcx,QWORD PTR [rbp+0x80] + encoder.modRm_indirectDisp32(reg.low_id(), Register.ebp.low_id()); + encoder.disp32(ioff); + } + }, + } +} + +fn airPtrToInt(self: *Self, inst: Air.Inst.Index) !void { + const un_op = self.air.instructions.items(.data)[inst].un_op; + const result = try self.resolveInst(un_op); + return self.finishAir(inst, result, .{ un_op, .none, .none }); +} + +fn airBitCast(self: *Self, inst: Air.Inst.Index) !void { + const ty_op = self.air.instructions.items(.data)[inst].ty_op; + const result = try self.resolveInst(ty_op.operand); + return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); +} + +fn airArrayToSlice(self: *Self, inst: Air.Inst.Index) !void { + const ty_op = self.air.instructions.items(.data)[inst].ty_op; + const result: MCValue = if (self.liveness.isUnused(inst)) + .dead + else + return self.fail("TODO implement airArrayToSlice for {}", .{self.target.cpu.arch}); + return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); +} + +fn airIntToFloat(self: *Self, inst: Air.Inst.Index) !void { + const ty_op = self.air.instructions.items(.data)[inst].ty_op; + const result: MCValue = if (self.liveness.isUnused(inst)) + .dead + else + return self.fail("TODO implement airIntToFloat for {}", .{self.target.cpu.arch}); + return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); +} + +fn airFloatToInt(self: *Self, inst: Air.Inst.Index) !void { + const ty_op = self.air.instructions.items(.data)[inst].ty_op; + const result: MCValue = if (self.liveness.isUnused(inst)) + .dead + else + return self.fail("TODO implement airFloatToInt for {}", .{self.target.cpu.arch}); + return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); +} + +fn airCmpxchg(self: *Self, inst: Air.Inst.Index) !void { + const ty_pl = self.air.instructions.items(.data)[inst].ty_pl; + const extra = self.air.extraData(Air.Block, ty_pl.payload); + _ = ty_pl; + _ = extra; + return self.fail("TODO implement airCmpxchg for {}", .{self.target.cpu.arch}); + // return self.finishAir(inst, result, .{ extra.ptr, extra.expected_value, extra.new_value }); +} + +fn airAtomicRmw(self: *Self, inst: Air.Inst.Index) !void { + _ = inst; + return self.fail("TODO implement airCmpxchg for {}", .{self.target.cpu.arch}); +} + +fn airAtomicLoad(self: *Self, inst: Air.Inst.Index) !void { + _ = inst; + return self.fail("TODO implement airAtomicLoad for {}", .{self.target.cpu.arch}); +} + +fn airAtomicStore(self: *Self, inst: Air.Inst.Index, order: std.builtin.AtomicOrder) !void { + _ = inst; + _ = order; + return self.fail("TODO implement airAtomicStore for {}", .{self.target.cpu.arch}); +} + +fn airMemset(self: *Self, inst: Air.Inst.Index) !void { + _ = inst; + return self.fail("TODO implement airMemset for {}", .{self.target.cpu.arch}); +} + +fn airMemcpy(self: *Self, inst: Air.Inst.Index) !void { + _ = inst; + return self.fail("TODO implement airMemcpy for {}", .{self.target.cpu.arch}); +} + +fn resolveInst(self: *Self, inst: Air.Inst.Ref) InnerError!MCValue { + // First section of indexes correspond to a set number of constant values. + const ref_int = @enumToInt(inst); + if (ref_int < Air.Inst.Ref.typed_value_map.len) { + const tv = Air.Inst.Ref.typed_value_map[ref_int]; + if (!tv.ty.hasCodeGenBits()) { + return MCValue{ .none = {} }; + } + return self.genTypedValue(tv); + } + + // If the type has no codegen bits, no need to store it. + const inst_ty = self.air.typeOf(inst); + if (!inst_ty.hasCodeGenBits()) + return MCValue{ .none = {} }; + + const inst_index = @intCast(Air.Inst.Index, ref_int - Air.Inst.Ref.typed_value_map.len); + switch (self.air.instructions.items(.tag)[inst_index]) { + .constant => { + // Constants have static lifetimes, so they are always memoized in the outer most table. + const branch = &self.branch_stack.items[0]; + const gop = try branch.inst_table.getOrPut(self.gpa, inst_index); + if (!gop.found_existing) { + const ty_pl = self.air.instructions.items(.data)[inst_index].ty_pl; + gop.value_ptr.* = try self.genTypedValue(.{ + .ty = inst_ty, + .val = self.air.values[ty_pl.payload], + }); + } + return gop.value_ptr.*; + }, + .const_ty => unreachable, + else => return self.getResolvedInstValue(inst_index), + } +} + +fn getResolvedInstValue(self: *Self, inst: Air.Inst.Index) MCValue { + // Treat each stack item as a "layer" on top of the previous one. + var i: usize = self.branch_stack.items.len; + while (true) { + i -= 1; + if (self.branch_stack.items[i].inst_table.get(inst)) |mcv| { + assert(mcv != .dead); + return mcv; + } + } +} + +/// If the MCValue is an immediate, and it does not fit within this type, +/// we put it in a register. +/// A potential opportunity for future optimization here would be keeping track +/// of the fact that the instruction is available both as an immediate +/// and as a register. +fn limitImmediateType(self: *Self, operand: Air.Inst.Ref, comptime T: type) !MCValue { + const mcv = try self.resolveInst(operand); + const ti = @typeInfo(T).Int; + switch (mcv) { + .immediate => |imm| { + // This immediate is unsigned. + const U = std.meta.Int(.unsigned, ti.bits - @boolToInt(ti.signedness == .signed)); + if (imm >= math.maxInt(U)) { + return MCValue{ .register = try self.copyToTmpRegister(Type.initTag(.usize), mcv) }; + } + }, + else => {}, + } + return mcv; +} + +fn genTypedValue(self: *Self, typed_value: TypedValue) InnerError!MCValue { + if (typed_value.val.isUndef()) + return MCValue{ .undef = {} }; + const ptr_bits = self.target.cpu.arch.ptrBitWidth(); + const ptr_bytes: u64 = @divExact(ptr_bits, 8); + switch (typed_value.ty.zigTypeTag()) { + .Pointer => switch (typed_value.ty.ptrSize()) { + .Slice => { + var buf: Type.SlicePtrFieldTypeBuffer = undefined; + const ptr_type = typed_value.ty.slicePtrFieldType(&buf); + const ptr_mcv = try self.genTypedValue(.{ .ty = ptr_type, .val = typed_value.val }); + const slice_len = typed_value.val.sliceLen(); + // Codegen can't handle some kinds of indirection. If the wrong union field is accessed here it may mean + // the Sema code needs to use anonymous Decls or alloca instructions to store data. + const ptr_imm = ptr_mcv.memory; + _ = slice_len; + _ = ptr_imm; + // We need more general support for const data being stored in memory to make this work. + return self.fail("TODO codegen for const slices", .{}); + }, + else => { + if (typed_value.val.castTag(.decl_ref)) |payload| { + const decl = payload.data; + decl.alive = true; + if (self.bin_file.cast(link.File.Elf)) |elf_file| { + const got = &elf_file.program_headers.items[elf_file.phdr_got_index.?]; + const got_addr = got.p_vaddr + decl.link.elf.offset_table_index * ptr_bytes; + return MCValue{ .memory = got_addr }; + } else if (self.bin_file.cast(link.File.MachO)) |_| { + // TODO I'm hacking my way through here by repurposing .memory for storing + // index to the GOT target symbol index. + return MCValue{ .memory = decl.link.macho.local_sym_index }; + } else if (self.bin_file.cast(link.File.Coff)) |coff_file| { + const got_addr = coff_file.offset_table_virtual_address + decl.link.coff.offset_table_index * ptr_bytes; + return MCValue{ .memory = got_addr }; + } else if (self.bin_file.cast(link.File.Plan9)) |p9| { + try p9.seeDecl(decl); + const got_addr = p9.bases.data + decl.link.plan9.got_index.? * ptr_bytes; + return MCValue{ .memory = got_addr }; + } else { + return self.fail("TODO codegen non-ELF const Decl pointer", .{}); + } + } + if (typed_value.val.tag() == .int_u64) { + return MCValue{ .immediate = typed_value.val.toUnsignedInt() }; + } + return self.fail("TODO codegen more kinds of const pointers", .{}); + }, + }, + .Int => { + const info = typed_value.ty.intInfo(self.target.*); + if (info.bits > ptr_bits or info.signedness == .signed) { + return self.fail("TODO const int bigger than ptr and signed int", .{}); + } + return MCValue{ .immediate = typed_value.val.toUnsignedInt() }; + }, + .Bool => { + return MCValue{ .immediate = @boolToInt(typed_value.val.toBool()) }; + }, + .ComptimeInt => unreachable, // semantic analysis prevents this + .ComptimeFloat => unreachable, // semantic analysis prevents this + .Optional => { + if (typed_value.ty.isPtrLikeOptional()) { + if (typed_value.val.isNull()) + return MCValue{ .immediate = 0 }; + + var buf: Type.Payload.ElemType = undefined; + return self.genTypedValue(.{ + .ty = typed_value.ty.optionalChild(&buf), + .val = typed_value.val, + }); + } else if (typed_value.ty.abiSize(self.target.*) == 1) { + return MCValue{ .immediate = @boolToInt(typed_value.val.isNull()) }; + } + return self.fail("TODO non pointer optionals", .{}); + }, + .Enum => { + if (typed_value.val.castTag(.enum_field_index)) |field_index| { + switch (typed_value.ty.tag()) { + .enum_simple => { + return MCValue{ .immediate = field_index.data }; + }, + .enum_full, .enum_nonexhaustive => { + const enum_full = typed_value.ty.cast(Type.Payload.EnumFull).?.data; + if (enum_full.values.count() != 0) { + const tag_val = enum_full.values.keys()[field_index.data]; + return self.genTypedValue(.{ .ty = enum_full.tag_ty, .val = tag_val }); + } else { + return MCValue{ .immediate = field_index.data }; + } + }, + else => unreachable, + } + } else { + var int_tag_buffer: Type.Payload.Bits = undefined; + const int_tag_ty = typed_value.ty.intTagType(&int_tag_buffer); + return self.genTypedValue(.{ .ty = int_tag_ty, .val = typed_value.val }); + } + }, + .ErrorSet => { + switch (typed_value.val.tag()) { + .@"error" => { + const err_name = typed_value.val.castTag(.@"error").?.data.name; + const module = self.bin_file.options.module.?; + const global_error_set = module.global_error_set; + const error_index = global_error_set.get(err_name).?; + return MCValue{ .immediate = error_index }; + }, + else => { + // In this case we are rendering an error union which has a 0 bits payload. + return MCValue{ .immediate = 0 }; + }, + } + }, + .ErrorUnion => { + const error_type = typed_value.ty.errorUnionSet(); + const payload_type = typed_value.ty.errorUnionPayload(); + const sub_val = typed_value.val.castTag(.eu_payload).?.data; + + if (!payload_type.hasCodeGenBits()) { + // We use the error type directly as the type. + return self.genTypedValue(.{ .ty = error_type, .val = sub_val }); + } + + return self.fail("TODO implement error union const of type '{}'", .{typed_value.ty}); + }, + else => return self.fail("TODO implement const of type '{}'", .{typed_value.ty}), + } +} + +const CallMCValues = struct { + args: []MCValue, + return_value: MCValue, + stack_byte_count: u32, + stack_align: u32, + + fn deinit(self: *CallMCValues, func: *Self) void { + func.gpa.free(self.args); + self.* = undefined; + } +}; + +/// Caller must call `CallMCValues.deinit`. +fn resolveCallingConventionValues(self: *Self, fn_ty: Type) !CallMCValues { + const cc = fn_ty.fnCallingConvention(); + const param_types = try self.gpa.alloc(Type, fn_ty.fnParamLen()); + defer self.gpa.free(param_types); + fn_ty.fnParamTypes(param_types); + var result: CallMCValues = .{ + .args = try self.gpa.alloc(MCValue, param_types.len), + // These undefined values must be populated before returning from this function. + .return_value = undefined, + .stack_byte_count = undefined, + .stack_align = undefined, + }; + errdefer self.gpa.free(result.args); + + const ret_ty = fn_ty.fnReturnType(); + + switch (cc) { + .Naked => { + assert(result.args.len == 0); + result.return_value = .{ .unreach = {} }; + result.stack_byte_count = 0; + result.stack_align = 1; + return result; + }, + .Unspecified, .C => { + var next_int_reg: usize = 0; + var next_stack_offset: u32 = 0; + + for (param_types) |ty, i| { + if (!ty.hasCodeGenBits()) { + assert(cc != .C); + result.args[i] = .{ .none = {} }; + continue; + } + const param_size = @intCast(u32, ty.abiSize(self.target.*)); + const pass_in_reg = switch (ty.zigTypeTag()) { + .Bool => true, + .Int => param_size <= 8, + .Pointer => ty.ptrSize() != .Slice, + .Optional => ty.isPtrLikeOptional(), + else => false, + }; + if (pass_in_reg) { + if (next_int_reg >= c_abi_int_param_regs.len) { + result.args[i] = .{ .stack_offset = next_stack_offset }; + next_stack_offset += param_size; + } else { + const aliased_reg = registerAlias( + c_abi_int_param_regs[next_int_reg], + param_size, + ); + result.args[i] = .{ .register = aliased_reg }; + next_int_reg += 1; + } + } else { + // For simplicity of codegen, slices and other types are always pushed onto the stack. + // TODO: look into optimizing this by passing things as registers sometimes, + // such as ptr and len of slices as separate registers. + // TODO: also we need to honor the C ABI for relevant types rather than passing on + // the stack here. + result.args[i] = .{ .stack_offset = next_stack_offset }; + next_stack_offset += param_size; + } + } + result.stack_byte_count = next_stack_offset; + result.stack_align = 16; + }, + else => return self.fail("TODO implement function parameters for {} on x86_64", .{cc}), + } + + if (ret_ty.zigTypeTag() == .NoReturn) { + result.return_value = .{ .unreach = {} }; + } else if (!ret_ty.hasCodeGenBits()) { + result.return_value = .{ .none = {} }; + } else switch (cc) { + .Naked => unreachable, + .Unspecified, .C => { + const ret_ty_size = @intCast(u32, ret_ty.abiSize(self.target.*)); + const aliased_reg = registerAlias(c_abi_int_return_regs[0], ret_ty_size); + result.return_value = .{ .register = aliased_reg }; + }, + else => return self.fail("TODO implement function return values for {}", .{cc}), + } + return result; +} + +/// TODO support scope overrides. Also note this logic is duplicated with `Module.wantSafety`. +fn wantSafety(self: *Self) bool { + return switch (self.bin_file.options.optimize_mode) { + .Debug => true, + .ReleaseSafe => true, + .ReleaseFast => false, + .ReleaseSmall => false, + }; +} + +fn fail(self: *Self, comptime format: []const u8, args: anytype) InnerError { + @setCold(true); + assert(self.err_msg == null); + self.err_msg = try ErrorMsg.create(self.bin_file.allocator, self.src_loc, format, args); + return error.CodegenFail; +} + +fn failSymbol(self: *Self, comptime format: []const u8, args: anytype) InnerError { + @setCold(true); + assert(self.err_msg == null); + self.err_msg = try ErrorMsg.create(self.bin_file.allocator, self.src_loc, format, args); + return error.CodegenFail; +} + +const Register = @import("bits.zig").Register; + +const Instruction = void; + +const Condition = void; + +const callee_preserved_regs = @import("bits.zig").callee_preserved_regs; + +const c_abi_int_param_regs = @import("bits.zig").c_abi_int_param_regs; + +const c_abi_int_return_regs = @import("bits.zig").c_abi_int_return_regs; + +fn parseRegName(name: []const u8) ?Register { + if (@hasDecl(Register, "parseRegName")) { + return Register.parseRegName(name); + } + return std.meta.stringToEnum(Register, name); +} + +fn registerAlias(reg: Register, size_bytes: u32) Register { + // For x86_64 we have to pick a smaller register alias depending on abi size. + switch (size_bytes) { + 1 => return reg.to8(), + 2 => return reg.to16(), + 4 => return reg.to32(), + 8 => return reg.to64(), + else => unreachable, + } +} diff --git a/src/codegen.zig b/src/codegen.zig index b219b76fc6..b8e4b72b1b 100644 --- a/src/codegen.zig +++ b/src/codegen.zig @@ -22,8 +22,6 @@ const log = std.log.scoped(.codegen); const build_options = @import("build_options"); const RegisterManager = @import("register_manager.zig").RegisterManager; -const X8664Encoder = @import("arch/x86_64/bits.zig").Encoder; - pub const FnResult = union(enum) { /// The `code` parameter passed to `generateSymbol` has the value appended. appended: void, @@ -118,7 +116,7 @@ pub fn generateFunction( //.thumb => return Function(.thumb).generate(bin_file, src_loc, func, air, liveness, code, debug_output), //.thumbeb => return Function(.thumbeb).generate(bin_file, src_loc, func, air, liveness, code, debug_output), //.i386 => return Function(.i386).generate(bin_file, src_loc, func, air, liveness, code, debug_output), - .x86_64 => return Function(.x86_64).generate(bin_file, src_loc, func, air, liveness, code, debug_output), + .x86_64 => return @import("arch/x86_64/CodeGen.zig").generate(.x86_64, bin_file, src_loc, func, air, liveness, code, debug_output), //.xcore => return Function(.xcore).generate(bin_file, src_loc, func, air, liveness, code, debug_output), //.nvptx => return Function(.nvptx).generate(bin_file, src_loc, func, air, liveness, code, debug_output), //.nvptx64 => return Function(.nvptx64).generate(bin_file, src_loc, func, air, liveness, code, debug_output), @@ -598,69 +596,6 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { fn gen(self: *Self) !void { switch (arch) { - .x86_64 => { - try self.code.ensureUnusedCapacity(11); - - const cc = self.fn_type.fnCallingConvention(); - if (cc != .Naked) { - // We want to subtract the aligned stack frame size from rsp here, but we don't - // yet know how big it will be, so we leave room for a 4-byte stack size. - // TODO During semantic analysis, check if there are no function calls. If there - // are none, here we can omit the part where we subtract and then add rsp. - self.code.appendSliceAssumeCapacity(&[_]u8{ - 0x55, // push rbp - 0x48, 0x89, 0xe5, // mov rbp, rsp - 0x48, 0x81, 0xec, // sub rsp, imm32 (with reloc) - }); - const reloc_index = self.code.items.len; - self.code.items.len += 4; - - try self.dbgSetPrologueEnd(); - try self.genBody(self.air.getMainBody()); - - const stack_end = self.max_end_stack; - if (stack_end > math.maxInt(i32)) - return self.failSymbol("too much stack used in call parameters", .{}); - const aligned_stack_end = mem.alignForward(stack_end, self.stack_align); - mem.writeIntLittle(u32, self.code.items[reloc_index..][0..4], @intCast(u32, aligned_stack_end)); - - if (self.code.items.len >= math.maxInt(i32)) { - return self.failSymbol("unable to perform relocation: jump too far", .{}); - } - if (self.exitlude_jump_relocs.items.len == 1) { - self.code.items.len -= 5; - } else for (self.exitlude_jump_relocs.items) |jmp_reloc| { - const amt = self.code.items.len - (jmp_reloc + 4); - const s32_amt = @intCast(i32, amt); - mem.writeIntLittle(i32, self.code.items[jmp_reloc..][0..4], s32_amt); - } - - // Important to be after the possible self.code.items.len -= 5 above. - try self.dbgSetEpilogueBegin(); - - try self.code.ensureUnusedCapacity(9); - // add rsp, x - if (aligned_stack_end > math.maxInt(i8)) { - // example: 48 81 c4 ff ff ff 7f add rsp,0x7fffffff - self.code.appendSliceAssumeCapacity(&[_]u8{ 0x48, 0x81, 0xc4 }); - const x = @intCast(u32, aligned_stack_end); - mem.writeIntLittle(u32, self.code.addManyAsArrayAssumeCapacity(4), x); - } else if (aligned_stack_end != 0) { - // example: 48 83 c4 7f add rsp,0x7f - const x = @intCast(u8, aligned_stack_end); - self.code.appendSliceAssumeCapacity(&[_]u8{ 0x48, 0x83, 0xc4, x }); - } - - self.code.appendSliceAssumeCapacity(&[_]u8{ - 0x5d, // pop rbp - 0xc3, // ret - }); - } else { - try self.dbgSetPrologueEnd(); - try self.genBody(self.air.getMainBody()); - try self.dbgSetEpilogueBegin(); - } - }, .arm, .armeb => { const cc = self.fn_type.fnCallingConvention(); if (cc != .Naked) { @@ -969,8 +904,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { branch.inst_table.putAssumeCapacity(inst, .dead); switch (prev_value) { .register => |reg| { - const canon_reg = toCanonicalReg(reg); - self.register_manager.freeReg(canon_reg); + self.register_manager.freeReg(reg); }, else => {}, // TODO process stack allocation death } @@ -1086,7 +1020,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { const ptr_bytes: u64 = @divExact(ptr_bits, 8); if (abi_size <= ptr_bytes) { if (self.register_manager.tryAllocReg(inst, &.{})) |reg| { - return MCValue{ .register = registerAlias(reg, abi_size) }; + return MCValue{ .register = reg }; } } } @@ -1098,7 +1032,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { const stack_mcv = try self.allocRegOrMem(inst, false); log.debug("spilling {d} to stack mcv {any}", .{ inst, stack_mcv }); const reg_mcv = self.getResolvedInstValue(inst); - assert(reg == toCanonicalReg(reg_mcv.register)); + assert(reg == reg_mcv.register); const branch = &self.branch_stack.items[self.branch_stack.items.len - 1]; try branch.inst_table.put(self.gpa, inst, stack_mcv); try self.genSetStack(self.air.typeOfIndex(inst), stack_mcv.stack_offset, reg_mcv); @@ -1226,9 +1160,6 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { } switch (arch) { - .x86_64 => { - break :result try self.genX8664BinMath(inst, ty_op.operand, .bool_true); - }, .arm, .armeb => { break :result try self.genArmBinOp(inst, ty_op.operand, .bool_true, .not); }, @@ -1266,7 +1197,6 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { fn airAdd(self: *Self, inst: Air.Inst.Index) !void { const bin_op = self.air.instructions.items(.data)[inst].bin_op; const result: MCValue = if (self.liveness.isUnused(inst)) .dead else switch (arch) { - .x86_64 => try self.genX8664BinMath(inst, bin_op.lhs, bin_op.rhs), .arm, .armeb => try self.genArmBinOp(inst, bin_op.lhs, bin_op.rhs, .add), else => return self.fail("TODO implement add for {}", .{self.target.cpu.arch}), }; @@ -1292,7 +1222,6 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { fn airSub(self: *Self, inst: Air.Inst.Index) !void { const bin_op = self.air.instructions.items(.data)[inst].bin_op; const result: MCValue = if (self.liveness.isUnused(inst)) .dead else switch (arch) { - .x86_64 => try self.genX8664BinMath(inst, bin_op.lhs, bin_op.rhs), .arm, .armeb => try self.genArmBinOp(inst, bin_op.lhs, bin_op.rhs, .sub), else => return self.fail("TODO implement sub for {}", .{self.target.cpu.arch}), }; @@ -1318,7 +1247,6 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { fn airMul(self: *Self, inst: Air.Inst.Index) !void { const bin_op = self.air.instructions.items(.data)[inst].bin_op; const result: MCValue = if (self.liveness.isUnused(inst)) .dead else switch (arch) { - .x86_64 => try self.genX8664BinMath(inst, bin_op.lhs, bin_op.rhs), .arm, .armeb => try self.genArmMul(inst, bin_op.lhs, bin_op.rhs), else => return self.fail("TODO implement mul for {}", .{self.target.cpu.arch}), }; @@ -1369,7 +1297,6 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { const bin_op = self.air.instructions.items(.data)[inst].bin_op; const result: MCValue = if (self.liveness.isUnused(inst)) .dead else switch (arch) { .arm, .armeb => try self.genArmBinOp(inst, bin_op.lhs, bin_op.rhs, .bit_and), - .x86_64 => try self.genX8664BinMath(inst, bin_op.lhs, bin_op.rhs), else => return self.fail("TODO implement bitwise and for {}", .{self.target.cpu.arch}), }; return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none }); @@ -1379,7 +1306,6 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { const bin_op = self.air.instructions.items(.data)[inst].bin_op; const result: MCValue = if (self.liveness.isUnused(inst)) .dead else switch (arch) { .arm, .armeb => try self.genArmBinOp(inst, bin_op.lhs, bin_op.rhs, .bit_or), - .x86_64 => try self.genX8664BinMath(inst, bin_op.lhs, bin_op.rhs), else => return self.fail("TODO implement bitwise or for {}", .{self.target.cpu.arch}), }; return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none }); @@ -2088,496 +2014,6 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { return dst_mcv; } - /// Perform "binary" operators, excluding comparisons. - /// Currently, the following ops are supported: - /// ADD, SUB, XOR, OR, AND - fn genX8664BinMath(self: *Self, inst: Air.Inst.Index, op_lhs: Air.Inst.Ref, op_rhs: Air.Inst.Ref) !MCValue { - // We'll handle these ops in two steps. - // 1) Prepare an output location (register or memory) - // This location will be the location of the operand that dies (if one exists) - // or just a temporary register (if one doesn't exist) - // 2) Perform the op with the other argument - // 3) Sometimes, the output location is memory but the op doesn't support it. - // In this case, copy that location to a register, then perform the op to that register instead. - // - // TODO: make this algorithm less bad - - try self.code.ensureUnusedCapacity(8); - - const lhs = try self.resolveInst(op_lhs); - const rhs = try self.resolveInst(op_rhs); - - // There are 2 operands, destination and source. - // Either one, but not both, can be a memory operand. - // Source operand can be an immediate, 8 bits or 32 bits. - // So, if either one of the operands dies with this instruction, we can use it - // as the result MCValue. - var dst_mcv: MCValue = undefined; - var src_mcv: MCValue = undefined; - var src_inst: Air.Inst.Ref = undefined; - if (self.reuseOperand(inst, op_lhs, 0, lhs)) { - // LHS dies; use it as the destination. - // Both operands cannot be memory. - src_inst = op_rhs; - if (lhs.isMemory() and rhs.isMemory()) { - dst_mcv = try self.copyToNewRegister(inst, lhs); - src_mcv = rhs; - } else { - dst_mcv = lhs; - src_mcv = rhs; - } - } else if (self.reuseOperand(inst, op_rhs, 1, rhs)) { - // RHS dies; use it as the destination. - // Both operands cannot be memory. - src_inst = op_lhs; - if (lhs.isMemory() and rhs.isMemory()) { - dst_mcv = try self.copyToNewRegister(inst, rhs); - src_mcv = lhs; - } else { - dst_mcv = rhs; - src_mcv = lhs; - } - } else { - if (lhs.isMemory()) { - dst_mcv = try self.copyToNewRegister(inst, lhs); - src_mcv = rhs; - src_inst = op_rhs; - } else { - dst_mcv = try self.copyToNewRegister(inst, rhs); - src_mcv = lhs; - src_inst = op_lhs; - } - } - // This instruction supports only signed 32-bit immediates at most. If the immediate - // value is larger than this, we put it in a register. - // A potential opportunity for future optimization here would be keeping track - // of the fact that the instruction is available both as an immediate - // and as a register. - switch (src_mcv) { - .immediate => |imm| { - if (imm > math.maxInt(u31)) { - src_mcv = MCValue{ .register = try self.copyToTmpRegister(Type.initTag(.u64), src_mcv) }; - } - }, - else => {}, - } - - // Now for step 2, we perform the actual op - const inst_ty = self.air.typeOfIndex(inst); - const air_tags = self.air.instructions.items(.tag); - switch (air_tags[inst]) { - // TODO: Generate wrapping and non-wrapping versions separately - .add, .addwrap => try self.genX8664BinMathCode(inst_ty, dst_mcv, src_mcv, 0, 0x00), - .bool_or, .bit_or => try self.genX8664BinMathCode(inst_ty, dst_mcv, src_mcv, 1, 0x08), - .bool_and, .bit_and => try self.genX8664BinMathCode(inst_ty, dst_mcv, src_mcv, 4, 0x20), - .sub, .subwrap => try self.genX8664BinMathCode(inst_ty, dst_mcv, src_mcv, 5, 0x28), - .xor, .not => try self.genX8664BinMathCode(inst_ty, dst_mcv, src_mcv, 6, 0x30), - - .mul, .mulwrap => try self.genX8664Imul(inst_ty, dst_mcv, src_mcv), - else => unreachable, - } - - return dst_mcv; - } - - /// Wrap over Instruction.encodeInto to translate errors - fn encodeX8664Instruction(self: *Self, inst: Instruction) !void { - inst.encodeInto(self.code) catch |err| { - if (err == error.OutOfMemory) - return error.OutOfMemory - else - return self.fail("Instruction.encodeInto failed because {s}", .{@errorName(err)}); - }; - } - - /// This function encodes a binary operation for x86_64 - /// intended for use with the following opcode ranges - /// because they share the same structure. - /// - /// Thus not all binary operations can be used here - /// -- multiplication needs to be done with imul, - /// which doesn't have as convenient an interface. - /// - /// "opx"-style instructions use the opcode extension field to indicate which instruction to execute: - /// - /// opx = /0: add - /// opx = /1: or - /// opx = /2: adc - /// opx = /3: sbb - /// opx = /4: and - /// opx = /5: sub - /// opx = /6: xor - /// opx = /7: cmp - /// - /// opcode | operand shape - /// --------+---------------------- - /// 80 /opx | *r/m8*, imm8 - /// 81 /opx | *r/m16/32/64*, imm16/32 - /// 83 /opx | *r/m16/32/64*, imm8 - /// - /// "mr"-style instructions use the low bits of opcode to indicate shape of instruction: - /// - /// mr = 00: add - /// mr = 08: or - /// mr = 10: adc - /// mr = 18: sbb - /// mr = 20: and - /// mr = 28: sub - /// mr = 30: xor - /// mr = 38: cmp - /// - /// opcode | operand shape - /// -------+------------------------- - /// mr + 0 | *r/m8*, r8 - /// mr + 1 | *r/m16/32/64*, r16/32/64 - /// mr + 2 | *r8*, r/m8 - /// mr + 3 | *r16/32/64*, r/m16/32/64 - /// mr + 4 | *AL*, imm8 - /// mr + 5 | *rAX*, imm16/32 - /// - /// TODO: rotates and shifts share the same structure, so we can potentially implement them - /// at a later date with very similar code. - /// They have "opx"-style instructions, but no "mr"-style instructions. - /// - /// opx = /0: rol, - /// opx = /1: ror, - /// opx = /2: rcl, - /// opx = /3: rcr, - /// opx = /4: shl sal, - /// opx = /5: shr, - /// opx = /6: sal shl, - /// opx = /7: sar, - /// - /// opcode | operand shape - /// --------+------------------ - /// c0 /opx | *r/m8*, imm8 - /// c1 /opx | *r/m16/32/64*, imm8 - /// d0 /opx | *r/m8*, 1 - /// d1 /opx | *r/m16/32/64*, 1 - /// d2 /opx | *r/m8*, CL (for context, CL is register 1) - /// d3 /opx | *r/m16/32/64*, CL (for context, CL is register 1) - fn genX8664BinMathCode( - self: *Self, - dst_ty: Type, - dst_mcv: MCValue, - src_mcv: MCValue, - opx: u3, - mr: u8, - ) !void { - switch (dst_mcv) { - .none => unreachable, - .undef => unreachable, - .dead, .unreach, .immediate => unreachable, - .compare_flags_unsigned => unreachable, - .compare_flags_signed => unreachable, - .ptr_stack_offset => unreachable, - .ptr_embedded_in_code => unreachable, - .register => |dst_reg| { - switch (src_mcv) { - .none => unreachable, - .undef => try self.genSetReg(dst_ty, dst_reg, .undef), - .dead, .unreach => unreachable, - .ptr_stack_offset => unreachable, - .ptr_embedded_in_code => unreachable, - .register => |src_reg| { - // for register, register use mr + 1 - // addressing mode: *r/m16/32/64*, r16/32/64 - const abi_size = dst_ty.abiSize(self.target.*); - const encoder = try X8664Encoder.init(self.code, 3); - encoder.rex(.{ - .w = abi_size == 8, - .r = src_reg.isExtended(), - .b = dst_reg.isExtended(), - }); - encoder.opcode_1byte(mr + 1); - encoder.modRm_direct( - src_reg.low_id(), - dst_reg.low_id(), - ); - }, - .immediate => |imm| { - // register, immediate use opx = 81 or 83 addressing modes: - // opx = 81: r/m16/32/64, imm16/32 - // opx = 83: r/m16/32/64, imm8 - const imm32 = @intCast(i32, imm); // This case must be handled before calling genX8664BinMathCode. - if (imm32 <= math.maxInt(i8)) { - const abi_size = dst_ty.abiSize(self.target.*); - const encoder = try X8664Encoder.init(self.code, 4); - encoder.rex(.{ - .w = abi_size == 8, - .b = dst_reg.isExtended(), - }); - encoder.opcode_1byte(0x83); - encoder.modRm_direct( - opx, - dst_reg.low_id(), - ); - encoder.imm8(@intCast(i8, imm32)); - } else { - const abi_size = dst_ty.abiSize(self.target.*); - const encoder = try X8664Encoder.init(self.code, 7); - encoder.rex(.{ - .w = abi_size == 8, - .b = dst_reg.isExtended(), - }); - encoder.opcode_1byte(0x81); - encoder.modRm_direct( - opx, - dst_reg.low_id(), - ); - encoder.imm32(@intCast(i32, imm32)); - } - }, - .embedded_in_code, .memory => { - return self.fail("TODO implement x86 ADD/SUB/CMP source memory", .{}); - }, - .stack_offset => |off| { - // register, indirect use mr + 3 - // addressing mode: *r16/32/64*, r/m16/32/64 - const abi_size = dst_ty.abiSize(self.target.*); - const adj_off = off + abi_size; - if (off > math.maxInt(i32)) { - return self.fail("stack offset too large", .{}); - } - const encoder = try X8664Encoder.init(self.code, 7); - encoder.rex(.{ - .w = abi_size == 8, - .r = dst_reg.isExtended(), - }); - encoder.opcode_1byte(mr + 3); - if (adj_off <= std.math.maxInt(i8)) { - encoder.modRm_indirectDisp8( - dst_reg.low_id(), - Register.ebp.low_id(), - ); - encoder.disp8(-@intCast(i8, adj_off)); - } else { - encoder.modRm_indirectDisp32( - dst_reg.low_id(), - Register.ebp.low_id(), - ); - encoder.disp32(-@intCast(i32, adj_off)); - } - }, - .compare_flags_unsigned => { - return self.fail("TODO implement x86 ADD/SUB/CMP source compare flag (unsigned)", .{}); - }, - .compare_flags_signed => { - return self.fail("TODO implement x86 ADD/SUB/CMP source compare flag (signed)", .{}); - }, - } - }, - .stack_offset => |off| { - switch (src_mcv) { - .none => unreachable, - .undef => return self.genSetStack(dst_ty, off, .undef), - .dead, .unreach => unreachable, - .ptr_stack_offset => unreachable, - .ptr_embedded_in_code => unreachable, - .register => |src_reg| { - try self.genX8664ModRMRegToStack(dst_ty, off, src_reg, mr + 0x1); - }, - .immediate => |imm| { - _ = imm; - return self.fail("TODO implement x86 ADD/SUB/CMP source immediate", .{}); - }, - .embedded_in_code, .memory, .stack_offset => { - return self.fail("TODO implement x86 ADD/SUB/CMP source memory", .{}); - }, - .compare_flags_unsigned => { - return self.fail("TODO implement x86 ADD/SUB/CMP source compare flag (unsigned)", .{}); - }, - .compare_flags_signed => { - return self.fail("TODO implement x86 ADD/SUB/CMP source compare flag (signed)", .{}); - }, - } - }, - .embedded_in_code, .memory => { - return self.fail("TODO implement x86 ADD/SUB/CMP destination memory", .{}); - }, - } - } - - /// Performs integer multiplication between dst_mcv and src_mcv, storing the result in dst_mcv. - fn genX8664Imul( - self: *Self, - dst_ty: Type, - dst_mcv: MCValue, - src_mcv: MCValue, - ) !void { - switch (dst_mcv) { - .none => unreachable, - .undef => unreachable, - .dead, .unreach, .immediate => unreachable, - .compare_flags_unsigned => unreachable, - .compare_flags_signed => unreachable, - .ptr_stack_offset => unreachable, - .ptr_embedded_in_code => unreachable, - .register => |dst_reg| { - switch (src_mcv) { - .none => unreachable, - .undef => try self.genSetReg(dst_ty, dst_reg, .undef), - .dead, .unreach => unreachable, - .ptr_stack_offset => unreachable, - .ptr_embedded_in_code => unreachable, - .register => |src_reg| { - // register, register - // - // Use the following imul opcode - // 0F AF /r: IMUL r32/64, r/m32/64 - const abi_size = dst_ty.abiSize(self.target.*); - const encoder = try X8664Encoder.init(self.code, 4); - encoder.rex(.{ - .w = abi_size == 8, - .r = dst_reg.isExtended(), - .b = src_reg.isExtended(), - }); - encoder.opcode_2byte(0x0f, 0xaf); - encoder.modRm_direct( - dst_reg.low_id(), - src_reg.low_id(), - ); - }, - .immediate => |imm| { - // register, immediate: - // depends on size of immediate. - // - // immediate fits in i8: - // 6B /r ib: IMUL r32/64, r/m32/64, imm8 - // - // immediate fits in i32: - // 69 /r id: IMUL r32/64, r/m32/64, imm32 - // - // immediate is huge: - // split into 2 instructions - // 1) copy the 64 bit immediate into a tmp register - // 2) perform register,register mul - // 0F AF /r: IMUL r32/64, r/m32/64 - if (math.minInt(i8) <= imm and imm <= math.maxInt(i8)) { - const abi_size = dst_ty.abiSize(self.target.*); - const encoder = try X8664Encoder.init(self.code, 4); - encoder.rex(.{ - .w = abi_size == 8, - .r = dst_reg.isExtended(), - .b = dst_reg.isExtended(), - }); - encoder.opcode_1byte(0x6B); - encoder.modRm_direct( - dst_reg.low_id(), - dst_reg.low_id(), - ); - encoder.imm8(@intCast(i8, imm)); - } else if (math.minInt(i32) <= imm and imm <= math.maxInt(i32)) { - const abi_size = dst_ty.abiSize(self.target.*); - const encoder = try X8664Encoder.init(self.code, 7); - encoder.rex(.{ - .w = abi_size == 8, - .r = dst_reg.isExtended(), - .b = dst_reg.isExtended(), - }); - encoder.opcode_1byte(0x69); - encoder.modRm_direct( - dst_reg.low_id(), - dst_reg.low_id(), - ); - encoder.imm32(@intCast(i32, imm)); - } else { - const src_reg = try self.copyToTmpRegister(dst_ty, src_mcv); - return self.genX8664Imul(dst_ty, dst_mcv, MCValue{ .register = src_reg }); - } - }, - .embedded_in_code, .memory, .stack_offset => { - return self.fail("TODO implement x86 multiply source memory", .{}); - }, - .compare_flags_unsigned => { - return self.fail("TODO implement x86 multiply source compare flag (unsigned)", .{}); - }, - .compare_flags_signed => { - return self.fail("TODO implement x86 multiply source compare flag (signed)", .{}); - }, - } - }, - .stack_offset => |off| { - switch (src_mcv) { - .none => unreachable, - .undef => return self.genSetStack(dst_ty, off, .undef), - .dead, .unreach => unreachable, - .ptr_stack_offset => unreachable, - .ptr_embedded_in_code => unreachable, - .register => |src_reg| { - // copy dst to a register - const dst_reg = try self.copyToTmpRegister(dst_ty, dst_mcv); - // multiply into dst_reg - // register, register - // Use the following imul opcode - // 0F AF /r: IMUL r32/64, r/m32/64 - const abi_size = dst_ty.abiSize(self.target.*); - const encoder = try X8664Encoder.init(self.code, 4); - encoder.rex(.{ - .w = abi_size == 8, - .r = dst_reg.isExtended(), - .b = src_reg.isExtended(), - }); - encoder.opcode_2byte(0x0f, 0xaf); - encoder.modRm_direct( - dst_reg.low_id(), - src_reg.low_id(), - ); - // copy dst_reg back out - return self.genSetStack(dst_ty, off, MCValue{ .register = dst_reg }); - }, - .immediate => |imm| { - _ = imm; - return self.fail("TODO implement x86 multiply source immediate", .{}); - }, - .embedded_in_code, .memory, .stack_offset => { - return self.fail("TODO implement x86 multiply source memory", .{}); - }, - .compare_flags_unsigned => { - return self.fail("TODO implement x86 multiply source compare flag (unsigned)", .{}); - }, - .compare_flags_signed => { - return self.fail("TODO implement x86 multiply source compare flag (signed)", .{}); - }, - } - }, - .embedded_in_code, .memory => { - return self.fail("TODO implement x86 multiply destination memory", .{}); - }, - } - } - - fn genX8664ModRMRegToStack(self: *Self, ty: Type, off: u32, reg: Register, opcode: u8) !void { - const abi_size = ty.abiSize(self.target.*); - const adj_off = off + abi_size; - if (off > math.maxInt(i32)) { - return self.fail("stack offset too large", .{}); - } - - const i_adj_off = -@intCast(i32, adj_off); - const encoder = try X8664Encoder.init(self.code, 7); - encoder.rex(.{ - .w = abi_size == 8, - .r = reg.isExtended(), - }); - encoder.opcode_1byte(opcode); - if (i_adj_off < std.math.maxInt(i8)) { - // example: 48 89 55 7f mov QWORD PTR [rbp+0x7f],rdx - encoder.modRm_indirectDisp8( - reg.low_id(), - Register.ebp.low_id(), - ); - encoder.disp8(@intCast(i8, i_adj_off)); - } else { - // example: 48 89 95 80 00 00 00 mov QWORD PTR [rbp+0x80],rdx - encoder.modRm_indirectDisp32( - reg.low_id(), - Register.ebp.low_id(), - ); - encoder.disp32(i_adj_off); - } - } - fn genArgDbgInfo(self: *Self, inst: Air.Inst.Index, mcv: MCValue) !void { const ty_str = self.air.instructions.items(.data)[inst].ty_str; const zir = &self.mod_fn.owner_decl.getFileScope().zir; @@ -2674,7 +2110,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { switch (mcv) { .register => |reg| { - self.register_manager.getRegAssumeFree(toCanonicalReg(reg), inst); + self.register_manager.getRegAssumeFree(reg, inst); }, else => {}, } @@ -2684,7 +2120,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { fn airBreakpoint(self: *Self) !void { switch (arch) { - .i386, .x86_64 => { + .i386 => { try self.code.append(0xcc); // int3 }, .riscv64 => { @@ -2717,68 +2153,6 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { // on linking. if (self.bin_file.tag == link.File.Elf.base_tag or self.bin_file.tag == link.File.Coff.base_tag) { switch (arch) { - .x86_64 => { - for (info.args) |mc_arg, arg_i| { - const arg = args[arg_i]; - const arg_ty = self.air.typeOf(arg); - const arg_mcv = try self.resolveInst(args[arg_i]); - // Here we do not use setRegOrMem even though the logic is similar, because - // the function call will move the stack pointer, so the offsets are different. - switch (mc_arg) { - .none => continue, - .register => |reg| { - try self.register_manager.getReg(reg, null); - try self.genSetReg(arg_ty, reg, arg_mcv); - }, - .stack_offset => |off| { - // Here we need to emit instructions like this: - // mov qword ptr [rsp + stack_offset], x - try self.genSetStack(arg_ty, off, arg_mcv); - }, - .ptr_stack_offset => { - return self.fail("TODO implement calling with MCValue.ptr_stack_offset arg", .{}); - }, - .ptr_embedded_in_code => { - return self.fail("TODO implement calling with MCValue.ptr_embedded_in_code arg", .{}); - }, - .undef => unreachable, - .immediate => unreachable, - .unreach => unreachable, - .dead => unreachable, - .embedded_in_code => unreachable, - .memory => unreachable, - .compare_flags_signed => unreachable, - .compare_flags_unsigned => unreachable, - } - } - - if (self.air.value(callee)) |func_value| { - if (func_value.castTag(.function)) |func_payload| { - const func = func_payload.data; - - const ptr_bits = self.target.cpu.arch.ptrBitWidth(); - const ptr_bytes: u64 = @divExact(ptr_bits, 8); - const got_addr = if (self.bin_file.cast(link.File.Elf)) |elf_file| blk: { - const got = &elf_file.program_headers.items[elf_file.phdr_got_index.?]; - break :blk @intCast(u32, got.p_vaddr + func.owner_decl.link.elf.offset_table_index * ptr_bytes); - } else if (self.bin_file.cast(link.File.Coff)) |coff_file| - @intCast(u32, coff_file.offset_table_virtual_address + func.owner_decl.link.coff.offset_table_index * ptr_bytes) - else - unreachable; - - // ff 14 25 xx xx xx xx call [addr] - try self.code.ensureUnusedCapacity(7); - self.code.appendSliceAssumeCapacity(&[3]u8{ 0xff, 0x14, 0x25 }); - mem.writeIntLittle(u32, self.code.addManyAsArrayAssumeCapacity(4), got_addr); - } else if (func_value.castTag(.extern_fn)) |_| { - return self.fail("TODO implement calling extern functions", .{}); - } else { - return self.fail("TODO implement calling bitcasted functions", .{}); - } - } else { - return self.fail("TODO implement calling runtime known function pointer", .{}); - } - }, .riscv64 => { if (info.args.len > 0) return self.fail("TODO implement fn args for {}", .{self.target.cpu.arch}); @@ -2873,149 +2247,10 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { }, else => return self.fail("TODO implement call for {}", .{self.target.cpu.arch}), } - } else if (self.bin_file.cast(link.File.MachO)) |macho_file| { - for (info.args) |mc_arg, arg_i| { - const arg = args[arg_i]; - const arg_ty = self.air.typeOf(arg); - const arg_mcv = try self.resolveInst(args[arg_i]); - // Here we do not use setRegOrMem even though the logic is similar, because - // the function call will move the stack pointer, so the offsets are different. - switch (mc_arg) { - .none => continue, - .register => |reg| { - // TODO prevent this macho if block to be generated for all archs - switch (arch) { - .x86_64 => try self.register_manager.getReg(reg, null), - else => unreachable, - } - try self.genSetReg(arg_ty, reg, arg_mcv); - }, - .stack_offset => { - // Here we need to emit instructions like this: - // mov qword ptr [rsp + stack_offset], x - return self.fail("TODO implement calling with parameters in memory", .{}); - }, - .ptr_stack_offset => { - return self.fail("TODO implement calling with MCValue.ptr_stack_offset arg", .{}); - }, - .ptr_embedded_in_code => { - return self.fail("TODO implement calling with MCValue.ptr_embedded_in_code arg", .{}); - }, - .undef => unreachable, - .immediate => unreachable, - .unreach => unreachable, - .dead => unreachable, - .embedded_in_code => unreachable, - .memory => unreachable, - .compare_flags_signed => unreachable, - .compare_flags_unsigned => unreachable, - } - } - - if (self.air.value(callee)) |func_value| { - if (func_value.castTag(.function)) |func_payload| { - const func = func_payload.data; - // TODO I'm hacking my way through here by repurposing .memory for storing - // index to the GOT target symbol index. - switch (arch) { - .x86_64 => { - try self.genSetReg(Type.initTag(.u64), .rax, .{ - .memory = func.owner_decl.link.macho.local_sym_index, - }); - // callq *%rax - try self.code.ensureUnusedCapacity(2); - self.code.appendSliceAssumeCapacity(&[2]u8{ 0xff, 0xd0 }); - }, - else => unreachable, // unsupported architecture on MachO - } - } else if (func_value.castTag(.extern_fn)) |func_payload| { - const decl = func_payload.data; - const n_strx = try macho_file.addExternFn(mem.spanZ(decl.name)); - const offset = blk: { - switch (arch) { - .x86_64 => { - // callq - try self.code.ensureUnusedCapacity(5); - self.code.appendSliceAssumeCapacity(&[5]u8{ 0xe8, 0x0, 0x0, 0x0, 0x0 }); - break :blk @intCast(u32, self.code.items.len) - 4; - }, - else => unreachable, // unsupported architecture on MachO - } - }; - // Add relocation to the decl. - try macho_file.active_decl.?.link.macho.relocs.append(self.bin_file.allocator, .{ - .offset = offset, - .target = .{ .global = n_strx }, - .addend = 0, - .subtractor = null, - .pcrel = true, - .length = 2, - .@"type" = switch (arch) { - .x86_64 => @enumToInt(std.macho.reloc_type_x86_64.X86_64_RELOC_BRANCH), - else => unreachable, - }, - }); - } else { - return self.fail("TODO implement calling bitcasted functions", .{}); - } - } else { - return self.fail("TODO implement calling runtime known function pointer", .{}); - } - } else if (self.bin_file.cast(link.File.Plan9)) |p9| { - switch (arch) { - .x86_64 => { - for (info.args) |mc_arg, arg_i| { - const arg = args[arg_i]; - const arg_ty = self.air.typeOf(arg); - const arg_mcv = try self.resolveInst(args[arg_i]); - // Here we do not use setRegOrMem even though the logic is similar, because - // the function call will move the stack pointer, so the offsets are different. - switch (mc_arg) { - .none => continue, - .register => |reg| { - try self.register_manager.getReg(reg, null); - try self.genSetReg(arg_ty, reg, arg_mcv); - }, - .stack_offset => { - // Here we need to emit instructions like this: - // mov qword ptr [rsp + stack_offset], x - return self.fail("TODO implement calling with parameters in memory", .{}); - }, - .ptr_stack_offset => { - return self.fail("TODO implement calling with MCValue.ptr_stack_offset arg", .{}); - }, - .ptr_embedded_in_code => { - return self.fail("TODO implement calling with MCValue.ptr_embedded_in_code arg", .{}); - }, - .undef => unreachable, - .immediate => unreachable, - .unreach => unreachable, - .dead => unreachable, - .embedded_in_code => unreachable, - .memory => unreachable, - .compare_flags_signed => unreachable, - .compare_flags_unsigned => unreachable, - } - } - if (self.air.value(callee)) |func_value| { - if (func_value.castTag(.function)) |func_payload| { - try p9.seeDecl(func_payload.data.owner_decl); - const ptr_bits = self.target.cpu.arch.ptrBitWidth(); - const ptr_bytes: u64 = @divExact(ptr_bits, 8); - const got_addr = p9.bases.data; - const got_index = func_payload.data.owner_decl.link.plan9.got_index.?; - // ff 14 25 xx xx xx xx call [addr] - try self.code.ensureUnusedCapacity(7); - self.code.appendSliceAssumeCapacity(&[3]u8{ 0xff, 0x14, 0x25 }); - const fn_got_addr = got_addr + got_index * ptr_bytes; - mem.writeIntLittle(u32, self.code.addManyAsArrayAssumeCapacity(4), @intCast(u32, fn_got_addr)); - } else return self.fail("TODO implement calling extern fn on plan9", .{}); - } else { - return self.fail("TODO implement calling runtime known function pointer", .{}); - } - }, - else => return self.fail("TODO implement call on plan9 for {}", .{self.target.cpu.arch}), - } + } else if (self.bin_file.cast(link.File.MachO)) |_| { + unreachable; // unsupported architecture for MachO + } else if (self.bin_file.cast(link.File.Plan9)) |_| { + return self.fail("TODO implement call on plan9 for {}", .{self.target.cpu.arch}); } else unreachable; const result: MCValue = result: { @@ -3052,14 +2287,6 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { .i386 => { try self.code.append(0xc3); // ret }, - .x86_64 => { - // TODO when implementing defer, this will need to jump to the appropriate defer expression. - // TODO optimization opportunity: figure out when we can emit this as a 2 byte instruction - // which is available if the jump is 127 bytes or less forward. - try self.code.resize(self.code.items.len + 5); - self.code.items[self.code.items.len - 5] = 0xe9; // jmp rel32 - try self.exitlude_jump_relocs.append(self.gpa, self.code.items.len - 4); - }, .riscv64 => { mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.jalr(.zero, 0, .ra).toU32()); }, @@ -3099,25 +2326,6 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { const lhs = try self.resolveInst(bin_op.lhs); const rhs = try self.resolveInst(bin_op.rhs); const result: MCValue = switch (arch) { - .x86_64 => result: { - try self.code.ensureUnusedCapacity(8); - - // There are 2 operands, destination and source. - // Either one, but not both, can be a memory operand. - // Source operand can be an immediate, 8 bits or 32 bits. - const dst_mcv = if (lhs.isImmediate() or (lhs.isMemory() and rhs.isMemory())) - try self.copyToNewRegister(inst, lhs) - else - lhs; - // This instruction supports only signed 32-bit immediates at most. - const src_mcv = try self.limitImmediateType(bin_op.rhs, i32); - - try self.genX8664BinMathCode(Type.initTag(.bool), dst_mcv, src_mcv, 7, 0x38); - break :result switch (ty.isSignedInt()) { - true => MCValue{ .compare_flags_signed = op }, - false => MCValue{ .compare_flags_unsigned = op }, - }; - }, .arm, .armeb => result: { const lhs_is_register = lhs == .register; const rhs_is_register = rhs == .register; @@ -3183,7 +2391,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { const liveness_condbr = self.liveness.getCondBr(inst); const reloc: Reloc = switch (arch) { - .i386, .x86_64 => reloc: { + .i386 => reloc: { try self.code.ensureUnusedCapacity(6); const opcode: u8 = switch (cond) { @@ -3214,7 +2422,8 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { .register => |reg| blk: { // test reg, 1 // TODO detect al, ax, eax - const encoder = try X8664Encoder.init(self.code, 4); + const Encoder = @import("arch/x86_64/bits.zig").Encoder; + const encoder = try Encoder.init(self.code, 4); encoder.rex(.{ // TODO audit this codegen: we force w = true here to make // the value affect the big register @@ -3543,7 +2752,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { /// Send control flow to the `index` of `self.code`. fn jump(self: *Self, index: usize) !void { switch (arch) { - .i386, .x86_64 => { + .i386 => { try self.code.ensureUnusedCapacity(5); if (math.cast(i8, @intCast(i32, index) - (@intCast(i32, self.code.items.len + 2)))) |delta| { self.code.appendAssumeCapacity(0xeb); // jmp rel8 @@ -3639,13 +2848,6 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { const bin_op = self.air.instructions.items(.data)[inst].bin_op; const air_tags = self.air.instructions.items(.tag); const result: MCValue = if (self.liveness.isUnused(inst)) .dead else switch (arch) { - .x86_64 => switch (air_tags[inst]) { - // lhs AND rhs - .bool_and => try self.genX8664BinMath(inst, bin_op.lhs, bin_op.rhs), - // lhs OR rhs - .bool_or => try self.genX8664BinMath(inst, bin_op.lhs, bin_op.rhs), - else => unreachable, // Not a boolean operation - }, .arm, .armeb => switch (air_tags[inst]) { .bool_and => try self.genArmBinOp(inst, bin_op.lhs, bin_op.rhs, .bool_and), .bool_or => try self.genArmBinOp(inst, bin_op.lhs, bin_op.rhs, .bool_or), @@ -3678,7 +2880,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { try block_data.relocs.ensureUnusedCapacity(self.gpa, 1); switch (arch) { - .i386, .x86_64 => { + .i386 => { // TODO optimization opportunity: figure out when we can emit this as a 2 byte instruction // which is available if the jump is 127 bytes or less forward. try self.code.resize(self.code.items.len + 5); @@ -3803,7 +3005,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { break :result MCValue{ .none = {} }; } }, - .x86_64, .i386 => result: { + .i386 => result: { for (args) |arg| { const input = zir.extraData(Zir.Inst.Asm.Input, extra_i); extra_i = input.end; @@ -3990,104 +3192,6 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { return self.genSetStack(ty, stack_offset, MCValue{ .register = reg }); }, }, - .x86_64 => switch (mcv) { - .dead => unreachable, - .ptr_stack_offset => unreachable, - .ptr_embedded_in_code => unreachable, - .unreach, .none => return, // Nothing to do. - .undef => { - if (!self.wantSafety()) - return; // The already existing value will do just fine. - // TODO Upgrade this to a memset call when we have that available. - switch (ty.abiSize(self.target.*)) { - 1 => return self.genSetStack(ty, stack_offset, .{ .immediate = 0xaa }), - 2 => return self.genSetStack(ty, stack_offset, .{ .immediate = 0xaaaa }), - 4 => return self.genSetStack(ty, stack_offset, .{ .immediate = 0xaaaaaaaa }), - 8 => return self.genSetStack(ty, stack_offset, .{ .immediate = 0xaaaaaaaaaaaaaaaa }), - else => return self.fail("TODO implement memset", .{}), - } - }, - .compare_flags_unsigned => |op| { - _ = op; - return self.fail("TODO implement set stack variable with compare flags value (unsigned)", .{}); - }, - .compare_flags_signed => |op| { - _ = op; - return self.fail("TODO implement set stack variable with compare flags value (signed)", .{}); - }, - .immediate => |x_big| { - const abi_size = ty.abiSize(self.target.*); - const adj_off = stack_offset + abi_size; - if (adj_off > 128) { - return self.fail("TODO implement set stack variable with large stack offset", .{}); - } - try self.code.ensureUnusedCapacity(8); - switch (abi_size) { - 1 => { - return self.fail("TODO implement set abi_size=1 stack variable with immediate", .{}); - }, - 2 => { - return self.fail("TODO implement set abi_size=2 stack variable with immediate", .{}); - }, - 4 => { - const x = @intCast(u32, x_big); - // We have a positive stack offset value but we want a twos complement negative - // offset from rbp, which is at the top of the stack frame. - const negative_offset = @intCast(i8, -@intCast(i32, adj_off)); - const twos_comp = @bitCast(u8, negative_offset); - // mov DWORD PTR [rbp+offset], immediate - self.code.appendSliceAssumeCapacity(&[_]u8{ 0xc7, 0x45, twos_comp }); - mem.writeIntLittle(u32, self.code.addManyAsArrayAssumeCapacity(4), x); - }, - 8 => { - // We have a positive stack offset value but we want a twos complement negative - // offset from rbp, which is at the top of the stack frame. - const negative_offset = @intCast(i8, -@intCast(i32, adj_off)); - const twos_comp = @bitCast(u8, negative_offset); - - // 64 bit write to memory would take two mov's anyways so we - // insted just use two 32 bit writes to avoid register allocation - try self.code.ensureUnusedCapacity(14); - var buf: [8]u8 = undefined; - mem.writeIntLittle(u64, &buf, x_big); - - // mov DWORD PTR [rbp+offset+4], immediate - self.code.appendSliceAssumeCapacity(&[_]u8{ 0xc7, 0x45, twos_comp + 4 }); - self.code.appendSliceAssumeCapacity(buf[4..8]); - - // mov DWORD PTR [rbp+offset], immediate - self.code.appendSliceAssumeCapacity(&[_]u8{ 0xc7, 0x45, twos_comp }); - self.code.appendSliceAssumeCapacity(buf[0..4]); - }, - else => { - return self.fail("TODO implement set abi_size=large stack variable with immediate", .{}); - }, - } - }, - .embedded_in_code => { - // TODO this and `.stack_offset` below need to get improved to support types greater than - // register size, and do general memcpy - const reg = try self.copyToTmpRegister(ty, mcv); - return self.genSetStack(ty, stack_offset, MCValue{ .register = reg }); - }, - .register => |reg| { - try self.genX8664ModRMRegToStack(ty, stack_offset, reg, 0x89); - }, - .memory => |vaddr| { - _ = vaddr; - return self.fail("TODO implement set stack variable from memory vaddr", .{}); - }, - .stack_offset => |off| { - // TODO this and `.embedded_in_code` above need to get improved to support types greater than - // register size, and do general memcpy - - if (stack_offset == off) - return; // Copy stack variable to itself; nothing to do. - - const reg = try self.copyToTmpRegister(ty, mcv); - return self.genSetStack(ty, stack_offset, MCValue{ .register = reg }); - }, - }, else => return self.fail("TODO implement getSetStack for {}", .{self.target.cpu.arch}), } } @@ -4250,284 +3354,6 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { }, else => return self.fail("TODO implement getSetReg for riscv64 {}", .{mcv}), }, - .x86_64 => switch (mcv) { - .dead => unreachable, - .ptr_stack_offset => unreachable, - .ptr_embedded_in_code => unreachable, - .unreach, .none => return, // Nothing to do. - .undef => { - if (!self.wantSafety()) - return; // The already existing value will do just fine. - // Write the debug undefined value. - switch (reg.size()) { - 8 => return self.genSetReg(ty, reg, .{ .immediate = 0xaa }), - 16 => return self.genSetReg(ty, reg, .{ .immediate = 0xaaaa }), - 32 => return self.genSetReg(ty, reg, .{ .immediate = 0xaaaaaaaa }), - 64 => return self.genSetReg(ty, reg, .{ .immediate = 0xaaaaaaaaaaaaaaaa }), - else => unreachable, - } - }, - .compare_flags_unsigned => |op| { - const encoder = try X8664Encoder.init(self.code, 7); - // TODO audit this codegen: we force w = true here to make - // the value affect the big register - encoder.rex(.{ - .w = true, - .b = reg.isExtended(), - }); - encoder.opcode_2byte(0x0f, switch (op) { - .gte => 0x93, - .gt => 0x97, - .neq => 0x95, - .lt => 0x92, - .lte => 0x96, - .eq => 0x94, - }); - encoder.modRm_direct( - 0, - reg.low_id(), - ); - }, - .compare_flags_signed => |op| { - _ = op; - return self.fail("TODO set register with compare flags value (signed)", .{}); - }, - .immediate => |x| { - // 32-bit moves zero-extend to 64-bit, so xoring the 32-bit - // register is the fastest way to zero a register. - if (x == 0) { - // The encoding for `xor r32, r32` is `0x31 /r`. - const encoder = try X8664Encoder.init(self.code, 3); - - // If we're accessing e.g. r8d, we need to use a REX prefix before the actual operation. Since - // this is a 32-bit operation, the W flag is set to zero. X is also zero, as we're not using a SIB. - // Both R and B are set, as we're extending, in effect, the register bits *and* the operand. - encoder.rex(.{ - .r = reg.isExtended(), - .b = reg.isExtended(), - }); - encoder.opcode_1byte(0x31); - // Section 3.1.1.1 of the Intel x64 Manual states that "/r indicates that the - // ModR/M byte of the instruction contains a register operand and an r/m operand." - encoder.modRm_direct( - reg.low_id(), - reg.low_id(), - ); - - return; - } - if (x <= math.maxInt(i32)) { - // Next best case: if we set the lower four bytes, the upper four will be zeroed. - // - // The encoding for `mov IMM32 -> REG` is (0xB8 + R) IMM. - - const encoder = try X8664Encoder.init(self.code, 6); - // Just as with XORing, we need a REX prefix. This time though, we only - // need the B bit set, as we're extending the opcode's register field, - // and there is no Mod R/M byte. - encoder.rex(.{ - .b = reg.isExtended(), - }); - encoder.opcode_withReg(0xB8, reg.low_id()); - - // no ModR/M byte - - // IMM - encoder.imm32(@intCast(i32, x)); - return; - } - // Worst case: we need to load the 64-bit register with the IMM. GNU's assemblers calls - // this `movabs`, though this is officially just a different variant of the plain `mov` - // instruction. - // - // This encoding is, in fact, the *same* as the one used for 32-bit loads. The only - // difference is that we set REX.W before the instruction, which extends the load to - // 64-bit and uses the full bit-width of the register. - { - const encoder = try X8664Encoder.init(self.code, 10); - encoder.rex(.{ - .w = true, - .b = reg.isExtended(), - }); - encoder.opcode_withReg(0xB8, reg.low_id()); - encoder.imm64(x); - } - }, - .embedded_in_code => |code_offset| { - // We need the offset from RIP in a signed i32 twos complement. - // The instruction is 7 bytes long and RIP points to the next instruction. - - // 64-bit LEA is encoded as REX.W 8D /r. - const rip = self.code.items.len + 7; - const big_offset = @intCast(i64, code_offset) - @intCast(i64, rip); - const offset = @intCast(i32, big_offset); - const encoder = try X8664Encoder.init(self.code, 7); - - // byte 1, always exists because w = true - encoder.rex(.{ - .w = true, - .r = reg.isExtended(), - }); - // byte 2 - encoder.opcode_1byte(0x8D); - // byte 3 - encoder.modRm_RIPDisp32(reg.low_id()); - // byte 4-7 - encoder.disp32(offset); - - // Double check that we haven't done any math errors - assert(rip == self.code.items.len); - }, - .register => |src_reg| { - // If the registers are the same, nothing to do. - if (src_reg.id() == reg.id()) - return; - - // This is a variant of 8B /r. - const abi_size = ty.abiSize(self.target.*); - const encoder = try X8664Encoder.init(self.code, 3); - encoder.rex(.{ - .w = abi_size == 8, - .r = reg.isExtended(), - .b = src_reg.isExtended(), - }); - encoder.opcode_1byte(0x8B); - encoder.modRm_direct(reg.low_id(), src_reg.low_id()); - }, - .memory => |x| { - if (self.bin_file.options.pie) { - // RIP-relative displacement to the entry in the GOT table. - const abi_size = ty.abiSize(self.target.*); - const encoder = try X8664Encoder.init(self.code, 10); - - // LEA reg, [] - - // We encode the instruction FIRST because prefixes may or may not appear. - // After we encode the instruction, we will know that the displacement bytes - // for [] will be at self.code.items.len - 4. - encoder.rex(.{ - .w = true, // force 64 bit because loading an address (to the GOT) - .r = reg.isExtended(), - }); - encoder.opcode_1byte(0x8D); - encoder.modRm_RIPDisp32(reg.low_id()); - encoder.disp32(0); - - const offset = @intCast(u32, self.code.items.len); - - if (self.bin_file.cast(link.File.MachO)) |macho_file| { - // TODO I think the reloc might be in the wrong place. - const decl = macho_file.active_decl.?; - // Load reloc for LEA instruction. - try decl.link.macho.relocs.append(self.bin_file.allocator, .{ - .offset = offset - 4, - .target = .{ .local = @intCast(u32, x) }, - .addend = 0, - .subtractor = null, - .pcrel = true, - .length = 2, - .@"type" = @enumToInt(std.macho.reloc_type_x86_64.X86_64_RELOC_GOT), - }); - } else { - return self.fail("TODO implement genSetReg for PIE GOT indirection on this platform", .{}); - } - - // MOV reg, [reg] - encoder.rex(.{ - .w = abi_size == 8, - .r = reg.isExtended(), - .b = reg.isExtended(), - }); - encoder.opcode_1byte(0x8B); - encoder.modRm_indirectDisp0(reg.low_id(), reg.low_id()); - } else if (x <= math.maxInt(i32)) { - // Moving from memory to a register is a variant of `8B /r`. - // Since we're using 64-bit moves, we require a REX. - // This variant also requires a SIB, as it would otherwise be RIP-relative. - // We want mode zero with the lower three bits set to four to indicate an SIB with no other displacement. - // The SIB must be 0x25, to indicate a disp32 with no scaled index. - // 0b00RRR100, where RRR is the lower three bits of the register ID. - // The instruction is thus eight bytes; REX 0x8B 0b00RRR100 0x25 followed by a four-byte disp32. - const abi_size = ty.abiSize(self.target.*); - const encoder = try X8664Encoder.init(self.code, 8); - encoder.rex(.{ - .w = abi_size == 8, - .r = reg.isExtended(), - }); - encoder.opcode_1byte(0x8B); - // effective address = [SIB] - encoder.modRm_SIBDisp0(reg.low_id()); - // SIB = disp32 - encoder.sib_disp32(); - encoder.disp32(@intCast(i32, x)); - } else { - // If this is RAX, we can use a direct load; otherwise, we need to load the address, then indirectly load - // the value. - if (reg.id() == 0) { - // REX.W 0xA1 moffs64* - // moffs64* is a 64-bit offset "relative to segment base", which really just means the - // absolute address for all practical purposes. - - const encoder = try X8664Encoder.init(self.code, 10); - encoder.rex(.{ - .w = true, - }); - encoder.opcode_1byte(0xA1); - encoder.writeIntLittle(u64, x); - } else { - // This requires two instructions; a move imm as used above, followed by an indirect load using the register - // as the address and the register as the destination. - // - // This cannot be used if the lower three bits of the id are equal to four or five, as there - // is no way to possibly encode it. This means that RSP, RBP, R12, and R13 cannot be used with - // this instruction. - const id3 = @truncate(u3, reg.id()); - assert(id3 != 4 and id3 != 5); - - // Rather than duplicate the logic used for the move, we just use a self-call with a new MCValue. - try self.genSetReg(ty, reg, MCValue{ .immediate = x }); - - // Now, the register contains the address of the value to load into it - // Currently, we're only allowing 64-bit registers, so we need the `REX.W 8B /r` variant. - // TODO: determine whether to allow other sized registers, and if so, handle them properly. - - // mov reg, [reg] - const abi_size = ty.abiSize(self.target.*); - const encoder = try X8664Encoder.init(self.code, 3); - encoder.rex(.{ - .w = abi_size == 8, - .r = reg.isExtended(), - .b = reg.isExtended(), - }); - encoder.opcode_1byte(0x8B); - encoder.modRm_indirectDisp0(reg.low_id(), reg.low_id()); - } - } - }, - .stack_offset => |unadjusted_off| { - const abi_size = ty.abiSize(self.target.*); - const off = unadjusted_off + abi_size; - if (off < std.math.minInt(i32) or off > std.math.maxInt(i32)) { - return self.fail("stack offset too large", .{}); - } - const ioff = -@intCast(i32, off); - const encoder = try X8664Encoder.init(self.code, 3); - encoder.rex(.{ - .w = abi_size == 8, - .r = reg.isExtended(), - }); - encoder.opcode_1byte(0x8B); - if (std.math.minInt(i8) <= ioff and ioff <= std.math.maxInt(i8)) { - // Example: 48 8b 4d 7f mov rcx,QWORD PTR [rbp+0x7f] - encoder.modRm_indirectDisp8(reg.low_id(), Register.ebp.low_id()); - encoder.disp8(@intCast(i8, ioff)); - } else { - // Example: 48 8b 8d 80 00 00 00 mov rcx,QWORD PTR [rbp+0x80] - encoder.modRm_indirectDisp32(reg.low_id(), Register.ebp.low_id()); - encoder.disp32(ioff); - } - }, - }, else => return self.fail("TODO implement getSetReg for {}", .{self.target.cpu.arch}), } } @@ -4840,61 +3666,6 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { const ret_ty = fn_ty.fnReturnType(); switch (arch) { - .x86_64 => { - switch (cc) { - .Naked => { - assert(result.args.len == 0); - result.return_value = .{ .unreach = {} }; - result.stack_byte_count = 0; - result.stack_align = 1; - return result; - }, - .Unspecified, .C => { - var next_int_reg: usize = 0; - var next_stack_offset: u32 = 0; - - for (param_types) |ty, i| { - if (!ty.hasCodeGenBits()) { - assert(cc != .C); - result.args[i] = .{ .none = {} }; - continue; - } - const param_size = @intCast(u32, ty.abiSize(self.target.*)); - const pass_in_reg = switch (ty.zigTypeTag()) { - .Bool => true, - .Int => param_size <= 8, - .Pointer => ty.ptrSize() != .Slice, - .Optional => ty.isPtrLikeOptional(), - else => false, - }; - if (pass_in_reg) { - if (next_int_reg >= c_abi_int_param_regs.len) { - result.args[i] = .{ .stack_offset = next_stack_offset }; - next_stack_offset += param_size; - } else { - const aliased_reg = registerAlias( - c_abi_int_param_regs[next_int_reg], - param_size, - ); - result.args[i] = .{ .register = aliased_reg }; - next_int_reg += 1; - } - } else { - // For simplicity of codegen, slices and other types are always pushed onto the stack. - // TODO: look into optimizing this by passing things as registers sometimes, - // such as ptr and len of slices as separate registers. - // TODO: also we need to honor the C ABI for relevant types rather than passing on - // the stack here. - result.args[i] = .{ .stack_offset = next_stack_offset }; - next_stack_offset += param_size; - } - } - result.stack_byte_count = next_stack_offset; - result.stack_align = 16; - }, - else => return self.fail("TODO implement function parameters for {} on x86_64", .{cc}), - } - }, .arm, .armeb => { switch (cc) { .Naked => { @@ -4948,15 +3719,6 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { } else if (!ret_ty.hasCodeGenBits()) { result.return_value = .{ .none = {} }; } else switch (arch) { - .x86_64 => switch (cc) { - .Naked => unreachable, - .Unspecified, .C => { - const ret_ty_size = @intCast(u32, ret_ty.abiSize(self.target.*)); - const aliased_reg = registerAlias(c_abi_int_return_regs[0], ret_ty_size); - result.return_value = .{ .register = aliased_reg }; - }, - else => return self.fail("TODO implement function return values for {}", .{cc}), - }, .arm, .armeb => switch (cc) { .Naked => unreachable, .Unspecified, .C => { @@ -5000,7 +3762,6 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { const Register = switch (arch) { .i386 => @import("arch/x86/bits.zig").Register, - .x86_64 => @import("arch/x86_64/bits.zig").Register, .riscv64 => @import("arch/riscv64/bits.zig").Register, .arm, .armeb => @import("arch/arm/bits.zig").Register, else => enum { @@ -5026,7 +3787,6 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { const callee_preserved_regs = switch (arch) { .i386 => @import("arch/x86/bits.zig").callee_preserved_regs, - .x86_64 => @import("arch/x86_64/bits.zig").callee_preserved_regs, .riscv64 => @import("arch/riscv64/bits.zig").callee_preserved_regs, .arm, .armeb => @import("arch/arm/bits.zig").callee_preserved_regs, else => [_]Register{}, @@ -5034,14 +3794,12 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { const c_abi_int_param_regs = switch (arch) { .i386 => @import("arch/x86/bits.zig").c_abi_int_param_regs, - .x86_64 => @import("arch/x86_64/bits.zig").c_abi_int_param_regs, .arm, .armeb => @import("arch/arm/bits.zig").c_abi_int_param_regs, else => [_]Register{}, }; const c_abi_int_return_regs = switch (arch) { .i386 => @import("arch/x86/bits.zig").c_abi_int_return_regs, - .x86_64 => @import("arch/x86_64/bits.zig").c_abi_int_return_regs, .arm, .armeb => @import("arch/arm/bits.zig").c_abi_int_return_regs, else => [_]Register{}, }; @@ -5052,28 +3810,5 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { } return std.meta.stringToEnum(Register, name); } - - fn registerAlias(reg: Register, size_bytes: u32) Register { - switch (arch) { - // For x86_64 we have to pick a smaller register alias depending on abi size. - .x86_64 => switch (size_bytes) { - 1 => return reg.to8(), - 2 => return reg.to16(), - 4 => return reg.to32(), - 8 => return reg.to64(), - else => unreachable, - }, - else => return reg, - } - } - - /// For most architectures this does nothing. For x86_64 it resolves any aliased registers - /// to the 64-bit wide ones. - fn toCanonicalReg(reg: Register) Register { - return switch (arch) { - .x86_64 => reg.to64(), - else => reg, - }; - } }; }