From 3c7310f8cc164dbf6e6393392e8812c9f5b6a270 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sun, 31 Oct 2021 23:08:53 +0100 Subject: [PATCH] stage2 x86_64: add MIR->Isel lowering step for x86_64 * incorporate Andrew's MIR draft as Mir.zig * add skeleton for Emit.zig module - Emit will lower MIR into machine code or textual ASM. * implement push * implement ret * implement mov r/m, r * implement sub r/m imm and sub r/m, r * put encoding common ops together - some ops share impl such as MOV and cmp so put them together and vary the actual opcode with modRM ext only. * implement pop * implement movabs - movabs being a special-case of mov not handled by general mov MIR instruction due to requirement to handle 64bit immediates. * store imm64 as a struct `Imm64{ msb: u32, lsb: u32 }` in extra data for use with for instance movabs inst * implement more mov variations * implement adc * implement add * implement sub * implement xor * implement and * implement or * implement sbb * implement cmp * implement lea - lea doesn't follow the scheme as other inst above. Similarly, I think bit shifts and rotates should be put in a separate basket too. * implement adc_scale_src * implement add_scale_src * implement sub_scale_src * implement xor_scale_src * implement and_scale_src * implement or_scale_src * implement sbb_scale_src * implement cmp_scale_src * implement adc_scale_dst * implement add_scale_dst * implement sub_scale_dst * implement xor_scale_dst * implement and_scale_dst * implement or_scale_dst * implement sbb_scale_dst * implement cmp_scale_dst * implement mov_scale_src * implement mov_scale_dst * implement adc_scale_imm * implement add_scale_imm * implement sub_scale_imm * implement xor_scale_imm * implement and_scale_imm * implement or_scale_imm * implement sbb_scale_imm * implement cmp_scale_imm * port bin math to MIR * backpatch stack size into prev MIR inst * implement Function.gen() (minus dbg info) * implement jmp/call [imm] - we can now call functions using indirect absolute addressing, or via registers. * port airRet to use MIR * port airLoop to use MIR * patch up performReloc to use inst indices * implement conditional jumps (without relocs) * implement set byte on condition * implement basic lea r64, [rip + imm] * implement calling externs * implement callq in PIE * implement lea RIP in PIE context * remove all refs to Encoder from CodeGen * implement basic imul ops * pass all Linux tests! * enable most of dbg info gen * generate arg dbg info in Emit --- src/arch/x86_64/CodeGen.zig | 1382 ++++++++++++++--------------------- src/arch/x86_64/Emit.zig | 1161 +++++++++++++++++++++++++++++ src/arch/x86_64/Mir.zig | 379 ++++++++++ src/arch/x86_64/bits.zig | 18 +- src/codegen.zig | 2 +- 5 files changed, 2120 insertions(+), 822 deletions(-) create mode 100644 src/arch/x86_64/Emit.zig create mode 100644 src/arch/x86_64/Mir.zig diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index b163582135..4089a2edd8 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -14,11 +14,12 @@ const Allocator = mem.Allocator; const Compilation = @import("../../Compilation.zig"); const DebugInfoOutput = @import("../../codegen.zig").DebugInfoOutput; const DW = std.dwarf; -const Encoder = @import("bits.zig").Encoder; +const Emit = @import("Emit.zig"); const ErrorMsg = Module.ErrorMsg; const FnResult = @import("../../codegen.zig").FnResult; const GenerateSymbolError = @import("../../codegen.zig").GenerateSymbolError; const Liveness = @import("../../Liveness.zig"); +const Mir = @import("Mir.zig"); const Module = @import("../../Module.zig"); const RegisterManager = @import("../../register_manager.zig").RegisterManager; const Target = std.Target; @@ -32,15 +33,12 @@ const InnerError = error{ CodegenFail, }; -arch: std.Target.Cpu.Arch, gpa: *Allocator, air: Air, liveness: Liveness, bin_file: *link.File, target: *const std.Target, mod_fn: *const Module.Fn, -code: *std.ArrayList(u8), -debug_output: DebugInfoOutput, err_msg: ?*ErrorMsg, args: []MCValue, ret_mcv: MCValue, @@ -49,18 +47,19 @@ arg_index: usize, src_loc: Module.SrcLoc, stack_align: u32, -prev_di_line: u32, -prev_di_column: u32, +/// MIR Instructions +mir_instructions: std.MultiArrayList(Mir.Inst) = .{}, +/// MIR extra data +mir_extra: std.ArrayListUnmanaged(u32) = .{}, + /// Byte offset within the source file of the ending curly. end_di_line: u32, end_di_column: u32, -/// Relative to the beginning of `code`. -prev_di_pc: usize, /// The value is an offset into the `Function` `code` from the beginning. /// To perform the reloc, write 32-bit signed little-endian integer /// which is a relative jump, based on the address following the reloc. -exitlude_jump_relocs: std.ArrayListUnmanaged(usize) = .{}, +exitlude_jump_relocs: std.ArrayListUnmanaged(Mir.Inst.Index) = .{}, /// Whenever there is a runtime branch, we push a Branch onto this stack, /// and pop it off when the runtime branch joins. This provides an "overlay" @@ -89,7 +88,7 @@ air_bookkeeping: @TypeOf(air_bookkeeping_init) = air_bookkeeping_init, const air_bookkeeping_init = if (std.debug.runtime_safety) @as(usize, 0) else {}; -const MCValue = union(enum) { +pub const MCValue = union(enum) { /// No runtime bits. `void` types, empty structs, u0, enums with 1 tag, etc. /// TODO Look into deleting this tag and using `dead` instead, since every use /// of MCValue.none should be instead looking at the type and noticing it is 0 bits. @@ -178,7 +177,7 @@ const StackAllocation = struct { }; const BlockData = struct { - relocs: std.ArrayListUnmanaged(Reloc), + relocs: std.ArrayListUnmanaged(Mir.Inst.Index), /// The first break instruction encounters `null` here and chooses a /// machine code value for the block result, populating this field. /// Following break instructions encounter that value and use it for @@ -186,18 +185,6 @@ const BlockData = struct { mcv: MCValue, }; -const Reloc = union(enum) { - /// The value is an offset into the `Function` `code` from the beginning. - /// To perform the reloc, write 32-bit signed little-endian integer - /// which is a relative jump, based on the address following the reloc. - rel32: usize, - /// A branch in the ARM instruction set - arm_branch: struct { - pos: usize, - cond: @import("../../arch/arm/bits.zig").Condition, - }, -}; - const BigTomb = struct { function: *Self, inst: Air.Inst.Index, @@ -238,7 +225,6 @@ const BigTomb = struct { const Self = @This(); pub fn generate( - arch: std.Target.Cpu.Arch, bin_file: *link.File, src_loc: Module.SrcLoc, module_fn: *Module.Fn, @@ -247,7 +233,7 @@ pub fn generate( code: *std.ArrayList(u8), debug_output: DebugInfoOutput, ) GenerateSymbolError!FnResult { - if (build_options.skip_non_native and builtin.cpu.arch != arch) { + if (build_options.skip_non_native and builtin.cpu.arch != bin_file.options.target.cpu.arch) { @panic("Attempted to compile for architecture that was disabled by build configuration"); } @@ -263,15 +249,12 @@ pub fn generate( try branch_stack.append(.{}); var function = Self{ - .arch = arch, .gpa = bin_file.allocator, .air = air, .liveness = liveness, .target = &bin_file.options.target, .bin_file = bin_file, .mod_fn = module_fn, - .code = code, - .debug_output = debug_output, .err_msg = null, .args = undefined, // populated after `resolveCallingConventionValues` .ret_mcv = undefined, // populated after `resolveCallingConventionValues` @@ -280,15 +263,14 @@ pub fn generate( .branch_stack = &branch_stack, .src_loc = src_loc, .stack_align = undefined, - .prev_di_pc = 0, - .prev_di_line = module_fn.lbrace_line, - .prev_di_column = module_fn.lbrace_column, .end_di_line = module_fn.rbrace_line, .end_di_column = module_fn.rbrace_column, }; defer function.stack.deinit(bin_file.allocator); defer function.blocks.deinit(bin_file.allocator); defer function.exitlude_jump_relocs.deinit(bin_file.allocator); + defer function.mir_instructions.deinit(bin_file.allocator); + defer function.mir_extra.deinit(bin_file.allocator); var call_info = function.resolveCallingConventionValues(fn_type) catch |err| switch (err) { error.CodegenFail => return FnResult{ .fail = function.err_msg.? }, @@ -306,6 +288,30 @@ pub fn generate( else => |e| return e, }; + var mir = Mir{ + .function = &function, + .instructions = function.mir_instructions.toOwnedSlice(), + .extra = function.mir_extra.toOwnedSlice(bin_file.allocator), + }; + defer mir.deinit(bin_file.allocator); + + var emit = Emit{ + .mir = mir, + .bin_file = bin_file, + .debug_output = debug_output, + .target = &bin_file.options.target, + .src_loc = src_loc, + .code = code, + .prev_di_pc = 0, + .prev_di_line = module_fn.lbrace_line, + .prev_di_column = module_fn.lbrace_column, + }; + defer emit.deinit(); + emit.emitMir() catch |err| switch (err) { + error.EmitFail => return FnResult{ .fail = emit.err_msg.? }, + else => |e| return e, + }; + if (function.err_msg) |em| { return FnResult{ .fail = em }; } else { @@ -313,71 +319,143 @@ pub fn generate( } } -fn gen(self: *Self) !void { - try self.code.ensureUnusedCapacity(11); +fn addInst(self: *Self, inst: Mir.Inst) error{OutOfMemory}!Mir.Inst.Index { + const gpa = self.gpa; + try self.mir_instructions.ensureUnusedCapacity(gpa, 1); + const result_index = @intCast(Air.Inst.Index, self.mir_instructions.len); + self.mir_instructions.appendAssumeCapacity(inst); + return result_index; +} +pub fn addExtra(self: *Self, extra: anytype) Allocator.Error!u32 { + const fields = std.meta.fields(@TypeOf(extra)); + try self.mir_extra.ensureUnusedCapacity(self.gpa, fields.len); + return self.addExtraAssumeCapacity(extra); +} + +pub fn addExtraAssumeCapacity(self: *Self, extra: anytype) u32 { + const fields = std.meta.fields(@TypeOf(extra)); + const result = @intCast(u32, self.mir_extra.items.len); + inline for (fields) |field| { + self.mir_extra.appendAssumeCapacity(switch (field.field_type) { + u32 => @field(extra, field.name), + i32 => @bitCast(u32, @field(extra, field.name)), + else => @compileError("bad field type"), + }); + } + return result; +} + +fn gen(self: *Self) InnerError!void { const cc = self.fn_type.fnCallingConvention(); if (cc != .Naked) { + _ = try self.addInst(.{ + .tag = .push, + .ops = (Mir.Ops{ + .reg1 = .rbp, + }).encode(), + .data = undefined, // unused for push reg, + }); + _ = try self.addInst(.{ + .tag = .mov, + .ops = (Mir.Ops{ + .reg1 = .rsp, + .reg2 = .rbp, + }).encode(), + .data = undefined, + }); // We want to subtract the aligned stack frame size from rsp here, but we don't // yet know how big it will be, so we leave room for a 4-byte stack size. // TODO During semantic analysis, check if there are no function calls. If there // are none, here we can omit the part where we subtract and then add rsp. - self.code.appendSliceAssumeCapacity(&[_]u8{ - 0x55, // push rbp - 0x48, 0x89, 0xe5, // mov rbp, rsp - 0x48, 0x81, 0xec, // sub rsp, imm32 (with reloc) + const backpatch_reloc = try self.addInst(.{ + .tag = .sub, + .ops = (Mir.Ops{ + .reg1 = .rsp, + }).encode(), + .data = .{ .imm = 0 }, + }); + + _ = try self.addInst(.{ + .tag = .dbg_prologue_end, + .ops = undefined, + .data = undefined, }); - const reloc_index = self.code.items.len; - self.code.items.len += 4; - try self.dbgSetPrologueEnd(); try self.genBody(self.air.getMainBody()); const stack_end = self.max_end_stack; - if (stack_end > math.maxInt(i32)) + if (stack_end > math.maxInt(i32)) { return self.failSymbol("too much stack used in call parameters", .{}); + } const aligned_stack_end = mem.alignForward(stack_end, self.stack_align); - mem.writeIntLittle(u32, self.code.items[reloc_index..][0..4], @intCast(u32, aligned_stack_end)); - - if (self.code.items.len >= math.maxInt(i32)) { - return self.failSymbol("unable to perform relocation: jump too far", .{}); + if (aligned_stack_end > 0) { + self.mir_instructions.items(.data)[backpatch_reloc].imm = @intCast(i32, aligned_stack_end); } + if (self.exitlude_jump_relocs.items.len == 1) { - self.code.items.len -= 5; + self.mir_instructions.len -= 1; } else for (self.exitlude_jump_relocs.items) |jmp_reloc| { - const amt = self.code.items.len - (jmp_reloc + 4); - const s32_amt = @intCast(i32, amt); - mem.writeIntLittle(i32, self.code.items[jmp_reloc..][0..4], s32_amt); + self.mir_instructions.items(.data)[jmp_reloc].inst = @intCast(u32, self.mir_instructions.len); } - // Important to be after the possible self.code.items.len -= 5 above. - try self.dbgSetEpilogueBegin(); + _ = try self.addInst(.{ + .tag = .dbg_epilogue_begin, + .ops = undefined, + .data = undefined, + }); - try self.code.ensureUnusedCapacity(9); - // add rsp, x - if (aligned_stack_end > math.maxInt(i8)) { - // example: 48 81 c4 ff ff ff 7f add rsp,0x7fffffff - self.code.appendSliceAssumeCapacity(&[_]u8{ 0x48, 0x81, 0xc4 }); - const x = @intCast(u32, aligned_stack_end); - mem.writeIntLittle(u32, self.code.addManyAsArrayAssumeCapacity(4), x); - } else if (aligned_stack_end != 0) { - // example: 48 83 c4 7f add rsp,0x7f - const x = @intCast(u8, aligned_stack_end); - self.code.appendSliceAssumeCapacity(&[_]u8{ 0x48, 0x83, 0xc4, x }); + if (aligned_stack_end > 0) { + // add rsp, x + _ = try self.addInst(.{ + .tag = .add, + .ops = (Mir.Ops{ + .reg1 = .rsp, + }).encode(), + .data = .{ .imm = @intCast(i32, aligned_stack_end) }, + }); } - self.code.appendSliceAssumeCapacity(&[_]u8{ - 0x5d, // pop rbp - 0xc3, // ret + _ = try self.addInst(.{ + .tag = .pop, + .ops = (Mir.Ops{ + .reg1 = .rbp, + }).encode(), + .data = undefined, + }); + _ = try self.addInst(.{ + .tag = .ret, + .ops = (Mir.Ops{ + .flags = 0b11, + }).encode(), + .data = undefined, }); } else { - try self.dbgSetPrologueEnd(); + _ = try self.addInst(.{ + .tag = .dbg_prologue_end, + .ops = undefined, + .data = undefined, + }); + try self.genBody(self.air.getMainBody()); - try self.dbgSetEpilogueBegin(); + + _ = try self.addInst(.{ + .tag = .dbg_epilogue_begin, + .ops = undefined, + .data = undefined, + }); } // Drop them off at the rbrace. - try self.dbgAdvancePCAndLine(self.end_di_line, self.end_di_column); + const payload = try self.addExtra(Mir.DbgLineColumn{ + .line = self.end_di_line, + .column = self.end_di_column, + }); + _ = try self.addInst(.{ + .tag = .dbg_line, + .ops = undefined, + .data = .{ .payload = payload }, + }); } fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { @@ -518,79 +596,6 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { } } -fn dbgSetPrologueEnd(self: *Self) InnerError!void { - switch (self.debug_output) { - .dwarf => |dbg_out| { - try dbg_out.dbg_line.append(DW.LNS.set_prologue_end); - try self.dbgAdvancePCAndLine(self.prev_di_line, self.prev_di_column); - }, - .plan9 => {}, - .none => {}, - } -} - -fn dbgSetEpilogueBegin(self: *Self) InnerError!void { - switch (self.debug_output) { - .dwarf => |dbg_out| { - try dbg_out.dbg_line.append(DW.LNS.set_epilogue_begin); - try self.dbgAdvancePCAndLine(self.prev_di_line, self.prev_di_column); - }, - .plan9 => {}, - .none => {}, - } -} - -fn dbgAdvancePCAndLine(self: *Self, line: u32, column: u32) InnerError!void { - const delta_line = @intCast(i32, line) - @intCast(i32, self.prev_di_line); - const delta_pc: usize = self.code.items.len - self.prev_di_pc; - switch (self.debug_output) { - .dwarf => |dbg_out| { - // TODO Look into using the DWARF special opcodes to compress this data. - // It lets you emit single-byte opcodes that add different numbers to - // both the PC and the line number at the same time. - try dbg_out.dbg_line.ensureUnusedCapacity(11); - dbg_out.dbg_line.appendAssumeCapacity(DW.LNS.advance_pc); - leb128.writeULEB128(dbg_out.dbg_line.writer(), delta_pc) catch unreachable; - if (delta_line != 0) { - dbg_out.dbg_line.appendAssumeCapacity(DW.LNS.advance_line); - leb128.writeILEB128(dbg_out.dbg_line.writer(), delta_line) catch unreachable; - } - dbg_out.dbg_line.appendAssumeCapacity(DW.LNS.copy); - self.prev_di_pc = self.code.items.len; - self.prev_di_line = line; - self.prev_di_column = column; - self.prev_di_pc = self.code.items.len; - }, - .plan9 => |dbg_out| { - if (delta_pc <= 0) return; // only do this when the pc changes - // we have already checked the target in the linker to make sure it is compatable - const quant = @import("../../link/Plan9/aout.zig").getPCQuant(self.target.cpu.arch) catch unreachable; - - // increasing the line number - try @import("../../link/Plan9.zig").changeLine(dbg_out.dbg_line, delta_line); - // increasing the pc - const d_pc_p9 = @intCast(i64, delta_pc) - quant; - if (d_pc_p9 > 0) { - // minus one because if its the last one, we want to leave space to change the line which is one quanta - try dbg_out.dbg_line.append(@intCast(u8, @divExact(d_pc_p9, quant) + 128) - quant); - if (dbg_out.pcop_change_index.*) |pci| - dbg_out.dbg_line.items[pci] += 1; - dbg_out.pcop_change_index.* = @intCast(u32, dbg_out.dbg_line.items.len - 1); - } else if (d_pc_p9 == 0) { - // we don't need to do anything, because adding the quant does it for us - } else unreachable; - if (dbg_out.start_line.* == null) - dbg_out.start_line.* = self.prev_di_line; - dbg_out.end_line.* = line; - // only do this if the pc changed - self.prev_di_line = line; - self.prev_di_column = column; - self.prev_di_pc = self.code.items.len; - }, - .none => {}, - } -} - /// Asserts there is already capacity to insert into top branch inst_table. fn processDeath(self: *Self, inst: Air.Inst.Index) void { const air_tags = self.air.instructions.items(.tag); @@ -654,29 +659,6 @@ fn ensureProcessDeathCapacity(self: *Self, additional_count: usize) !void { try table.ensureUnusedCapacity(self.gpa, additional_count); } -/// Adds a Type to the .debug_info at the current position. The bytes will be populated later, -/// after codegen for this symbol is done. -fn addDbgInfoTypeReloc(self: *Self, ty: Type) !void { - switch (self.debug_output) { - .dwarf => |dbg_out| { - assert(ty.hasCodeGenBits()); - const index = dbg_out.dbg_info.items.len; - try dbg_out.dbg_info.resize(index + 4); // DW.AT.type, DW.FORM.ref4 - - const gop = try dbg_out.dbg_info_type_relocs.getOrPut(self.gpa, ty); - if (!gop.found_existing) { - gop.value_ptr.* = .{ - .off = undefined, - .relocs = .{}, - }; - } - try gop.value_ptr.relocs.append(self.gpa, @intCast(u32, index)); - }, - .plan9 => {}, - .none => {}, - } -} - fn allocMem(self: *Self, inst: Air.Inst.Index, abi_size: u32, abi_align: u32) !u32 { if (abi_align > self.stack_align) self.stack_align = abi_align; @@ -848,7 +830,7 @@ fn airNot(self: *Self, inst: Air.Inst.Index) !void { }, else => {}, } - break :result try self.genX8664BinMath(inst, ty_op.operand, .bool_true); + break :result try self.genBinMathOp(inst, ty_op.operand, .bool_true); }; return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); } @@ -886,7 +868,7 @@ fn airAdd(self: *Self, inst: Air.Inst.Index) !void { const result: MCValue = if (self.liveness.isUnused(inst)) .dead else - try self.genX8664BinMath(inst, bin_op.lhs, bin_op.rhs); + try self.genBinMathOp(inst, bin_op.lhs, bin_op.rhs); return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none }); } @@ -913,7 +895,7 @@ fn airSub(self: *Self, inst: Air.Inst.Index) !void { const result: MCValue = if (self.liveness.isUnused(inst)) .dead else - try self.genX8664BinMath(inst, bin_op.lhs, bin_op.rhs); + try self.genBinMathOp(inst, bin_op.lhs, bin_op.rhs); return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none }); } @@ -940,7 +922,7 @@ fn airMul(self: *Self, inst: Air.Inst.Index) !void { const result: MCValue = if (self.liveness.isUnused(inst)) .dead else - try self.genX8664BinMath(inst, bin_op.lhs, bin_op.rhs); + try self.genBinMathOp(inst, bin_op.lhs, bin_op.rhs); return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none }); } @@ -994,7 +976,7 @@ fn airBitAnd(self: *Self, inst: Air.Inst.Index) !void { const result: MCValue = if (self.liveness.isUnused(inst)) .dead else - try self.genX8664BinMath(inst, bin_op.lhs, bin_op.rhs); + try self.genBinMathOp(inst, bin_op.lhs, bin_op.rhs); return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none }); } @@ -1003,7 +985,7 @@ fn airBitOr(self: *Self, inst: Air.Inst.Index) !void { const result: MCValue = if (self.liveness.isUnused(inst)) .dead else - try self.genX8664BinMath(inst, bin_op.lhs, bin_op.rhs); + try self.genBinMathOp(inst, bin_op.lhs, bin_op.rhs); return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none }); } @@ -1415,7 +1397,7 @@ fn airStructFieldVal(self: *Self, inst: Air.Inst.Index) !void { /// Perform "binary" operators, excluding comparisons. /// Currently, the following ops are supported: /// ADD, SUB, XOR, OR, AND -fn genX8664BinMath(self: *Self, inst: Air.Inst.Index, op_lhs: Air.Inst.Ref, op_rhs: Air.Inst.Ref) !MCValue { +fn genBinMathOp(self: *Self, inst: Air.Inst.Index, op_lhs: Air.Inst.Ref, op_rhs: Air.Inst.Ref) !MCValue { // We'll handle these ops in two steps. // 1) Prepare an output location (register or memory) // This location will be the location of the operand that dies (if one exists) @@ -1425,9 +1407,6 @@ fn genX8664BinMath(self: *Self, inst: Air.Inst.Index, op_lhs: Air.Inst.Ref, op_r // In this case, copy that location to a register, then perform the op to that register instead. // // TODO: make this algorithm less bad - - try self.code.ensureUnusedCapacity(8); - const lhs = try self.resolveInst(op_lhs); const rhs = try self.resolveInst(op_rhs); @@ -1486,107 +1465,28 @@ fn genX8664BinMath(self: *Self, inst: Air.Inst.Index, op_lhs: Air.Inst.Ref, op_r else => {}, } - // Now for step 2, we perform the actual op - const inst_ty = self.air.typeOfIndex(inst); + // Now for step 2, we assing an MIR instruction + const dst_ty = self.air.typeOfIndex(inst); const air_tags = self.air.instructions.items(.tag); switch (air_tags[inst]) { - // TODO: Generate wrapping and non-wrapping versions separately - .add, .addwrap => try self.genX8664BinMathCode(inst_ty, dst_mcv, src_mcv, 0, 0x00), - .bool_or, .bit_or => try self.genX8664BinMathCode(inst_ty, dst_mcv, src_mcv, 1, 0x08), - .bool_and, .bit_and => try self.genX8664BinMathCode(inst_ty, dst_mcv, src_mcv, 4, 0x20), - .sub, .subwrap => try self.genX8664BinMathCode(inst_ty, dst_mcv, src_mcv, 5, 0x28), - .xor, .not => try self.genX8664BinMathCode(inst_ty, dst_mcv, src_mcv, 6, 0x30), - - .mul, .mulwrap => try self.genX8664Imul(inst_ty, dst_mcv, src_mcv), + .add, .addwrap => try self.genBinMathOpMir(.add, dst_ty, dst_mcv, src_mcv), + .bool_or, .bit_or => try self.genBinMathOpMir(.@"or", dst_ty, dst_mcv, src_mcv), + .bool_and, .bit_and => try self.genBinMathOpMir(.@"and", dst_ty, dst_mcv, src_mcv), + .sub, .subwrap => try self.genBinMathOpMir(.sub, dst_ty, dst_mcv, src_mcv), + .xor, .not => try self.genBinMathOpMir(.xor, dst_ty, dst_mcv, src_mcv), + .mul, .mulwrap => try self.genIMulOpMir(dst_ty, dst_mcv, src_mcv), else => unreachable, } return dst_mcv; } -/// Wrap over Instruction.encodeInto to translate errors -fn encodeX8664Instruction(self: *Self, inst: Instruction) !void { - inst.encodeInto(self.code) catch |err| { - if (err == error.OutOfMemory) - return error.OutOfMemory - else - return self.fail("Instruction.encodeInto failed because {s}", .{@errorName(err)}); - }; -} - -/// This function encodes a binary operation for x86_64 -/// intended for use with the following opcode ranges -/// because they share the same structure. -/// -/// Thus not all binary operations can be used here -/// -- multiplication needs to be done with imul, -/// which doesn't have as convenient an interface. -/// -/// "opx"-style instructions use the opcode extension field to indicate which instruction to execute: -/// -/// opx = /0: add -/// opx = /1: or -/// opx = /2: adc -/// opx = /3: sbb -/// opx = /4: and -/// opx = /5: sub -/// opx = /6: xor -/// opx = /7: cmp -/// -/// opcode | operand shape -/// --------+---------------------- -/// 80 /opx | *r/m8*, imm8 -/// 81 /opx | *r/m16/32/64*, imm16/32 -/// 83 /opx | *r/m16/32/64*, imm8 -/// -/// "mr"-style instructions use the low bits of opcode to indicate shape of instruction: -/// -/// mr = 00: add -/// mr = 08: or -/// mr = 10: adc -/// mr = 18: sbb -/// mr = 20: and -/// mr = 28: sub -/// mr = 30: xor -/// mr = 38: cmp -/// -/// opcode | operand shape -/// -------+------------------------- -/// mr + 0 | *r/m8*, r8 -/// mr + 1 | *r/m16/32/64*, r16/32/64 -/// mr + 2 | *r8*, r/m8 -/// mr + 3 | *r16/32/64*, r/m16/32/64 -/// mr + 4 | *AL*, imm8 -/// mr + 5 | *rAX*, imm16/32 -/// -/// TODO: rotates and shifts share the same structure, so we can potentially implement them -/// at a later date with very similar code. -/// They have "opx"-style instructions, but no "mr"-style instructions. -/// -/// opx = /0: rol, -/// opx = /1: ror, -/// opx = /2: rcl, -/// opx = /3: rcr, -/// opx = /4: shl sal, -/// opx = /5: shr, -/// opx = /6: sal shl, -/// opx = /7: sar, -/// -/// opcode | operand shape -/// --------+------------------ -/// c0 /opx | *r/m8*, imm8 -/// c1 /opx | *r/m16/32/64*, imm8 -/// d0 /opx | *r/m8*, 1 -/// d1 /opx | *r/m16/32/64*, 1 -/// d2 /opx | *r/m8*, CL (for context, CL is register 1) -/// d3 /opx | *r/m16/32/64*, CL (for context, CL is register 1) -fn genX8664BinMathCode( +fn genBinMathOpMir( self: *Self, + mir_tag: Mir.Inst.Tag, dst_ty: Type, dst_mcv: MCValue, src_mcv: MCValue, - opx: u3, - mr: u8, ) !void { switch (dst_mcv) { .none => unreachable, @@ -1604,84 +1504,43 @@ fn genX8664BinMathCode( .ptr_stack_offset => unreachable, .ptr_embedded_in_code => unreachable, .register => |src_reg| { - // for register, register use mr + 1 - // addressing mode: *r/m16/32/64*, r16/32/64 - const abi_size = dst_ty.abiSize(self.target.*); - const encoder = try Encoder.init(self.code, 3); - encoder.rex(.{ - .w = abi_size == 8, - .r = src_reg.isExtended(), - .b = dst_reg.isExtended(), + _ = try self.addInst(.{ + .tag = mir_tag, + .ops = (Mir.Ops{ + .reg1 = src_reg, + .reg2 = dst_reg, + .flags = 0b11, + }).encode(), + .data = undefined, }); - encoder.opcode_1byte(mr + 1); - encoder.modRm_direct( - src_reg.low_id(), - dst_reg.low_id(), - ); }, .immediate => |imm| { - // register, immediate use opx = 81 or 83 addressing modes: - // opx = 81: r/m16/32/64, imm16/32 - // opx = 83: r/m16/32/64, imm8 - const imm32 = @intCast(i32, imm); // This case must be handled before calling genX8664BinMathCode. - if (imm32 <= math.maxInt(i8)) { - const abi_size = dst_ty.abiSize(self.target.*); - const encoder = try Encoder.init(self.code, 4); - encoder.rex(.{ - .w = abi_size == 8, - .b = dst_reg.isExtended(), - }); - encoder.opcode_1byte(0x83); - encoder.modRm_direct( - opx, - dst_reg.low_id(), - ); - encoder.imm8(@intCast(i8, imm32)); - } else { - const abi_size = dst_ty.abiSize(self.target.*); - const encoder = try Encoder.init(self.code, 7); - encoder.rex(.{ - .w = abi_size == 8, - .b = dst_reg.isExtended(), - }); - encoder.opcode_1byte(0x81); - encoder.modRm_direct( - opx, - dst_reg.low_id(), - ); - encoder.imm32(@intCast(i32, imm32)); - } + _ = try self.addInst(.{ + .tag = mir_tag, + .ops = (Mir.Ops{ + .reg1 = dst_reg, + }).encode(), + .data = .{ .imm = @intCast(i32, imm) }, + }); }, .embedded_in_code, .memory => { return self.fail("TODO implement x86 ADD/SUB/CMP source memory", .{}); }, .stack_offset => |off| { - // register, indirect use mr + 3 - // addressing mode: *r16/32/64*, r/m16/32/64 - const abi_size = dst_ty.abiSize(self.target.*); - const adj_off = off + abi_size; if (off > math.maxInt(i32)) { return self.fail("stack offset too large", .{}); } - const encoder = try Encoder.init(self.code, 7); - encoder.rex(.{ - .w = abi_size == 8, - .r = dst_reg.isExtended(), + const abi_size = dst_ty.abiSize(self.target.*); + const adj_off = off + abi_size; + _ = try self.addInst(.{ + .tag = mir_tag, + .ops = (Mir.Ops{ + .reg1 = dst_reg, + .reg2 = .ebp, + .flags = 0b01, + }).encode(), + .data = .{ .imm = -@intCast(i32, adj_off) }, }); - encoder.opcode_1byte(mr + 3); - if (adj_off <= std.math.maxInt(i8)) { - encoder.modRm_indirectDisp8( - dst_reg.low_id(), - Register.ebp.low_id(), - ); - encoder.disp8(-@intCast(i8, adj_off)); - } else { - encoder.modRm_indirectDisp32( - dst_reg.low_id(), - Register.ebp.low_id(), - ); - encoder.disp32(-@intCast(i32, adj_off)); - } }, .compare_flags_unsigned => { return self.fail("TODO implement x86 ADD/SUB/CMP source compare flag (unsigned)", .{}); @@ -1699,7 +1558,20 @@ fn genX8664BinMathCode( .ptr_stack_offset => unreachable, .ptr_embedded_in_code => unreachable, .register => |src_reg| { - try self.genX8664ModRMRegToStack(dst_ty, off, src_reg, mr + 0x1); + if (off > math.maxInt(i32)) { + return self.fail("stack offset too large", .{}); + } + const abi_size = dst_ty.abiSize(self.target.*); + const adj_off = off + abi_size; + _ = try self.addInst(.{ + .tag = mir_tag, + .ops = (Mir.Ops{ + .reg1 = src_reg, + .reg2 = .ebp, + .flags = 0b10, + }).encode(), + .data = .{ .imm = -@intCast(i32, adj_off) }, + }); }, .immediate => |imm| { _ = imm; @@ -1722,13 +1594,8 @@ fn genX8664BinMathCode( } } -/// Performs integer multiplication between dst_mcv and src_mcv, storing the result in dst_mcv. -fn genX8664Imul( - self: *Self, - dst_ty: Type, - dst_mcv: MCValue, - src_mcv: MCValue, -) !void { +// Performs integer multiplication between dst_mcv and src_mcv, storing the result in dst_mcv. +fn genIMulOpMir(self: *Self, dst_ty: Type, dst_mcv: MCValue, src_mcv: MCValue) !void { switch (dst_mcv) { .none => unreachable, .undef => unreachable, @@ -1746,68 +1613,30 @@ fn genX8664Imul( .ptr_embedded_in_code => unreachable, .register => |src_reg| { // register, register - // - // Use the following imul opcode - // 0F AF /r: IMUL r32/64, r/m32/64 - const abi_size = dst_ty.abiSize(self.target.*); - const encoder = try Encoder.init(self.code, 4); - encoder.rex(.{ - .w = abi_size == 8, - .r = dst_reg.isExtended(), - .b = src_reg.isExtended(), + _ = try self.addInst(.{ + .tag = .imul_complex, + .ops = (Mir.Ops{ + .reg1 = dst_reg, + .reg2 = src_reg, + }).encode(), + .data = undefined, }); - encoder.opcode_2byte(0x0f, 0xaf); - encoder.modRm_direct( - dst_reg.low_id(), - src_reg.low_id(), - ); }, .immediate => |imm| { - // register, immediate: - // depends on size of immediate. - // - // immediate fits in i8: - // 6B /r ib: IMUL r32/64, r/m32/64, imm8 - // - // immediate fits in i32: - // 69 /r id: IMUL r32/64, r/m32/64, imm32 - // - // immediate is huge: - // split into 2 instructions - // 1) copy the 64 bit immediate into a tmp register - // 2) perform register,register mul - // 0F AF /r: IMUL r32/64, r/m32/64 - if (math.minInt(i8) <= imm and imm <= math.maxInt(i8)) { - const abi_size = dst_ty.abiSize(self.target.*); - const encoder = try Encoder.init(self.code, 4); - encoder.rex(.{ - .w = abi_size == 8, - .r = dst_reg.isExtended(), - .b = dst_reg.isExtended(), + // register, immediate + if (imm <= math.maxInt(i32)) { + _ = try self.addInst(.{ + .tag = .imul_complex, + .ops = (Mir.Ops{ + .reg1 = dst_reg, + .reg2 = dst_reg, + .flags = 0b10, + }).encode(), + .data = .{ .imm = @intCast(i32, imm) }, }); - encoder.opcode_1byte(0x6B); - encoder.modRm_direct( - dst_reg.low_id(), - dst_reg.low_id(), - ); - encoder.imm8(@intCast(i8, imm)); - } else if (math.minInt(i32) <= imm and imm <= math.maxInt(i32)) { - const abi_size = dst_ty.abiSize(self.target.*); - const encoder = try Encoder.init(self.code, 7); - encoder.rex(.{ - .w = abi_size == 8, - .r = dst_reg.isExtended(), - .b = dst_reg.isExtended(), - }); - encoder.opcode_1byte(0x69); - encoder.modRm_direct( - dst_reg.low_id(), - dst_reg.low_id(), - ); - encoder.imm32(@intCast(i32, imm)); } else { const src_reg = try self.copyToTmpRegister(dst_ty, src_mcv); - return self.genX8664Imul(dst_ty, dst_mcv, MCValue{ .register = src_reg }); + return self.genIMulOpMir(dst_ty, dst_mcv, MCValue{ .register = src_reg }); } }, .embedded_in_code, .memory, .stack_offset => { @@ -1833,20 +1662,14 @@ fn genX8664Imul( const dst_reg = try self.copyToTmpRegister(dst_ty, dst_mcv); // multiply into dst_reg // register, register - // Use the following imul opcode - // 0F AF /r: IMUL r32/64, r/m32/64 - const abi_size = dst_ty.abiSize(self.target.*); - const encoder = try Encoder.init(self.code, 4); - encoder.rex(.{ - .w = abi_size == 8, - .r = dst_reg.isExtended(), - .b = src_reg.isExtended(), + _ = try self.addInst(.{ + .tag = .imul_complex, + .ops = (Mir.Ops{ + .reg1 = dst_reg, + .reg2 = src_reg, + }).encode(), + .data = undefined, }); - encoder.opcode_2byte(0x0f, 0xaf); - encoder.modRm_direct( - dst_reg.low_id(), - src_reg.low_id(), - ); // copy dst_reg back out return self.genSetStack(dst_ty, off, MCValue{ .register = dst_reg }); }, @@ -1871,73 +1694,6 @@ fn genX8664Imul( } } -fn genX8664ModRMRegToStack(self: *Self, ty: Type, off: u32, reg: Register, opcode: u8) !void { - const abi_size = ty.abiSize(self.target.*); - const adj_off = off + abi_size; - if (off > math.maxInt(i32)) { - return self.fail("stack offset too large", .{}); - } - - const i_adj_off = -@intCast(i32, adj_off); - const encoder = try Encoder.init(self.code, 7); - encoder.rex(.{ - .w = abi_size == 8, - .r = reg.isExtended(), - }); - encoder.opcode_1byte(opcode); - if (i_adj_off < std.math.maxInt(i8)) { - // example: 48 89 55 7f mov QWORD PTR [rbp+0x7f],rdx - encoder.modRm_indirectDisp8( - reg.low_id(), - Register.ebp.low_id(), - ); - encoder.disp8(@intCast(i8, i_adj_off)); - } else { - // example: 48 89 95 80 00 00 00 mov QWORD PTR [rbp+0x80],rdx - encoder.modRm_indirectDisp32( - reg.low_id(), - Register.ebp.low_id(), - ); - encoder.disp32(i_adj_off); - } -} - -fn genArgDbgInfo(self: *Self, inst: Air.Inst.Index, mcv: MCValue) !void { - const ty_str = self.air.instructions.items(.data)[inst].ty_str; - const zir = &self.mod_fn.owner_decl.getFileScope().zir; - const name = zir.nullTerminatedString(ty_str.str); - const name_with_null = name.ptr[0 .. name.len + 1]; - const ty = self.air.getRefType(ty_str.ty); - - switch (mcv) { - .register => |reg| { - switch (self.debug_output) { - .dwarf => |dbg_out| { - try dbg_out.dbg_info.ensureUnusedCapacity(3); - dbg_out.dbg_info.appendAssumeCapacity(link.File.Elf.abbrev_parameter); - dbg_out.dbg_info.appendSliceAssumeCapacity(&[2]u8{ // DW.AT.location, DW.FORM.exprloc - 1, // ULEB128 dwarf expression length - reg.dwarfLocOp(), - }); - try dbg_out.dbg_info.ensureUnusedCapacity(5 + name_with_null.len); - try self.addDbgInfoTypeReloc(ty); // DW.AT.type, DW.FORM.ref4 - dbg_out.dbg_info.appendSliceAssumeCapacity(name_with_null); // DW.AT.name, DW.FORM.string - }, - .plan9 => {}, - .none => {}, - } - }, - .stack_offset => { - switch (self.debug_output) { - .dwarf => {}, - .plan9 => {}, - .none => {}, - } - }, - else => {}, - } -} - fn airArg(self: *Self, inst: Air.Inst.Index) !void { const arg_index = self.arg_index; self.arg_index += 1; @@ -1946,8 +1702,15 @@ fn airArg(self: *Self, inst: Air.Inst.Index) !void { _ = ty; const mcv = self.args[arg_index]; - try self.genArgDbgInfo(inst, mcv); - + const payload = try self.addExtra(Mir.ArgDbgInfo{ + .air_inst = inst, + .arg_index = @intCast(u32, arg_index), // TODO can arg_index: u32? + }); + _ = try self.addInst(.{ + .tag = .arg_dbg_info, + .ops = undefined, + .data = .{ .payload = payload }, + }); if (self.liveness.isUnused(inst)) return self.finishAirBookkeeping(); @@ -1962,7 +1725,11 @@ fn airArg(self: *Self, inst: Air.Inst.Index) !void { } fn airBreakpoint(self: *Self) !void { - try self.code.append(0xcc); // int3 + _ = try self.addInst(.{ + .tag = .brk, + .ops = undefined, + .data = undefined, + }); return self.finishAirBookkeeping(); } @@ -2021,7 +1788,6 @@ fn airCall(self: *Self, inst: Air.Inst.Index) !void { if (self.air.value(callee)) |func_value| { if (func_value.castTag(.function)) |func_payload| { const func = func_payload.data; - const ptr_bits = self.target.cpu.arch.ptrBitWidth(); const ptr_bytes: u64 = @divExact(ptr_bits, 8); const got_addr = if (self.bin_file.cast(link.File.Elf)) |elf_file| blk: { @@ -2031,11 +1797,13 @@ fn airCall(self: *Self, inst: Air.Inst.Index) !void { @intCast(u32, coff_file.offset_table_virtual_address + func.owner_decl.link.coff.offset_table_index * ptr_bytes) else unreachable; - - // ff 14 25 xx xx xx xx call [addr] - try self.code.ensureUnusedCapacity(7); - self.code.appendSliceAssumeCapacity(&[3]u8{ 0xff, 0x14, 0x25 }); - mem.writeIntLittle(u32, self.code.addManyAsArrayAssumeCapacity(4), got_addr); + _ = try self.addInst(.{ + .tag = .call, + .ops = (Mir.Ops{ + .flags = 0b01, + }).encode(), + .data = .{ .imm = @bitCast(i32, got_addr) }, + }); } else if (func_value.castTag(.extern_fn)) |_| { return self.fail("TODO implement calling extern functions", .{}); } else { @@ -2089,26 +1857,21 @@ fn airCall(self: *Self, inst: Air.Inst.Index) !void { .memory = func.owner_decl.link.macho.local_sym_index, }); // callq *%rax - try self.code.ensureUnusedCapacity(2); - self.code.appendSliceAssumeCapacity(&[2]u8{ 0xff, 0xd0 }); + _ = try self.addInst(.{ + .tag = .call, + .ops = (Mir.Ops{ + .reg1 = .rax, + .flags = 0b01, + }).encode(), + .data = undefined, + }); } else if (func_value.castTag(.extern_fn)) |func_payload| { const decl = func_payload.data; const n_strx = try macho_file.addExternFn(mem.spanZ(decl.name)); - const offset = blk: { - // callq - try self.code.ensureUnusedCapacity(5); - self.code.appendSliceAssumeCapacity(&[5]u8{ 0xe8, 0x0, 0x0, 0x0, 0x0 }); - break :blk @intCast(u32, self.code.items.len) - 4; - }; - // Add relocation to the decl. - try macho_file.active_decl.?.link.macho.relocs.append(self.bin_file.allocator, .{ - .offset = offset, - .target = .{ .global = n_strx }, - .addend = 0, - .subtractor = null, - .pcrel = true, - .length = 2, - .@"type" = @enumToInt(std.macho.reloc_type_x86_64.X86_64_RELOC_BRANCH), + _ = try self.addInst(.{ + .tag = .call_extern, + .ops = undefined, + .data = .{ .extern_fn = n_strx }, }); } else { return self.fail("TODO implement calling bitcasted functions", .{}); @@ -2157,11 +1920,14 @@ fn airCall(self: *Self, inst: Air.Inst.Index) !void { const ptr_bytes: u64 = @divExact(ptr_bits, 8); const got_addr = p9.bases.data; const got_index = func_payload.data.owner_decl.link.plan9.got_index.?; - // ff 14 25 xx xx xx xx call [addr] - try self.code.ensureUnusedCapacity(7); - self.code.appendSliceAssumeCapacity(&[3]u8{ 0xff, 0x14, 0x25 }); const fn_got_addr = got_addr + got_index * ptr_bytes; - mem.writeIntLittle(u32, self.code.addManyAsArrayAssumeCapacity(4), @intCast(u32, fn_got_addr)); + _ = try self.addInst(.{ + .tag = .call, + .ops = (Mir.Ops{ + .flags = 0b01, + }).encode(), + .data = .{ .imm = @bitCast(i32, @intCast(u32, fn_got_addr)) }, + }); } else return self.fail("TODO implement calling extern fn on plan9", .{}); } else { return self.fail("TODO implement calling runtime known function pointer", .{}); @@ -2201,9 +1967,14 @@ fn ret(self: *Self, mcv: MCValue) !void { // TODO when implementing defer, this will need to jump to the appropriate defer expression. // TODO optimization opportunity: figure out when we can emit this as a 2 byte instruction // which is available if the jump is 127 bytes or less forward. - try self.code.resize(self.code.items.len + 5); - self.code.items[self.code.items.len - 5] = 0xe9; // jmp rel32 - try self.exitlude_jump_relocs.append(self.gpa, self.code.items.len - 4); + const jmp_reloc = try self.addInst(.{ + .tag = .jmp, + .ops = (Mir.Ops{ + .flags = 0b00, + }).encode(), + .data = .{ .inst = undefined }, + }); + try self.exitlude_jump_relocs.append(self.gpa, jmp_reloc); } fn airRet(self: *Self, inst: Air.Inst.Index) !void { @@ -2233,8 +2004,6 @@ fn airCmp(self: *Self, inst: Air.Inst.Index, op: math.CompareOperator) !void { const lhs = try self.resolveInst(bin_op.lhs); const rhs = try self.resolveInst(bin_op.rhs); const result: MCValue = result: { - try self.code.ensureUnusedCapacity(8); - // There are 2 operands, destination and source. // Either one, but not both, can be a memory operand. // Source operand can be an immediate, 8 bits or 32 bits. @@ -2245,7 +2014,7 @@ fn airCmp(self: *Self, inst: Air.Inst.Index, op: math.CompareOperator) !void { // This instruction supports only signed 32-bit immediates at most. const src_mcv = try self.limitImmediateType(bin_op.rhs, i32); - try self.genX8664BinMathCode(Type.initTag(.bool), dst_mcv, src_mcv, 7, 0x38); + try self.genBinMathOpMir(.cmp, Type.initTag(.bool), dst_mcv, src_mcv); break :result switch (ty.isSignedInt()) { true => MCValue{ .compare_flags_signed = op }, false => MCValue{ .compare_flags_unsigned = op }, @@ -2256,7 +2025,15 @@ fn airCmp(self: *Self, inst: Air.Inst.Index, op: math.CompareOperator) !void { fn airDbgStmt(self: *Self, inst: Air.Inst.Index) !void { const dbg_stmt = self.air.instructions.items(.data)[inst].dbg_stmt; - try self.dbgAdvancePCAndLine(dbg_stmt.line, dbg_stmt.column); + const payload = try self.addExtra(Mir.DbgLineColumn{ + .line = dbg_stmt.line, + .column = dbg_stmt.column, + }); + _ = try self.addInst(.{ + .tag = .dbg_line, + .ops = undefined, + .data = .{ .payload = payload }, + }); return self.finishAirBookkeeping(); } @@ -2268,58 +2045,77 @@ fn airCondBr(self: *Self, inst: Air.Inst.Index) !void { const else_body = self.air.extra[extra.end + then_body.len ..][0..extra.data.else_body_len]; const liveness_condbr = self.liveness.getCondBr(inst); - const reloc: Reloc = reloc: { - try self.code.ensureUnusedCapacity(6); - - const opcode: u8 = switch (cond) { - .compare_flags_signed => |cmp_op| blk: { - // Here we map to the opposite opcode because the jump is to the false branch. - const opcode: u8 = switch (cmp_op) { - .gte => 0x8c, - .gt => 0x8e, - .neq => 0x84, - .lt => 0x8d, - .lte => 0x8f, - .eq => 0x85, + const reloc: Mir.Inst.Index = reloc: { + switch (cond) { + .compare_flags_signed => |cmp_op| { + // Here we map the opposites since the jump is to the false branch. + const flags: u2 = switch (cmp_op) { + .gte => 0b10, + .gt => 0b11, + .neq => 0b01, + .lt => 0b00, + .lte => 0b01, + .eq => 0b00, }; - break :blk opcode; - }, - .compare_flags_unsigned => |cmp_op| blk: { - // Here we map to the opposite opcode because the jump is to the false branch. - const opcode: u8 = switch (cmp_op) { - .gte => 0x82, - .gt => 0x86, - .neq => 0x84, - .lt => 0x83, - .lte => 0x87, - .eq => 0x85, - }; - break :blk opcode; - }, - .register => |reg| blk: { - // test reg, 1 - // TODO detect al, ax, eax - const encoder = try Encoder.init(self.code, 4); - encoder.rex(.{ - // TODO audit this codegen: we force w = true here to make - // the value affect the big register - .w = true, - .b = reg.isExtended(), + const tag: Mir.Inst.Tag = if (cmp_op == .neq or cmp_op == .eq) + .cond_jmp_eq_ne + else + .cond_jmp_greater_less; + const reloc = try self.addInst(.{ + .tag = tag, + .ops = (Mir.Ops{ + .flags = flags, + }).encode(), + .data = .{ .inst = undefined }, }); - encoder.opcode_1byte(0xf6); - encoder.modRm_direct( - 0, - reg.low_id(), - ); - encoder.disp8(1); - break :blk 0x84; + break :reloc reloc; }, - else => return self.fail("TODO implement condbr {s} when condition is {s}", .{ self.target.cpu.arch, @tagName(cond) }), - }; - self.code.appendSliceAssumeCapacity(&[_]u8{ 0x0f, opcode }); - const reloc = Reloc{ .rel32 = self.code.items.len }; - self.code.items.len += 4; - break :reloc reloc; + .compare_flags_unsigned => |cmp_op| { + // Here we map the opposites since the jump is to the false branch. + const flags: u2 = switch (cmp_op) { + .gte => 0b10, + .gt => 0b11, + .neq => 0b01, + .lt => 0b00, + .lte => 0b01, + .eq => 0b00, + }; + const tag: Mir.Inst.Tag = if (cmp_op == .neq or cmp_op == .eq) + .cond_jmp_eq_ne + else + .cond_jmp_above_below; + const reloc = try self.addInst(.{ + .tag = tag, + .ops = (Mir.Ops{ + .flags = flags, + }).encode(), + .data = .{ .inst = undefined }, + }); + break :reloc reloc; + }, + .register => |reg| { + _ = try self.addInst(.{ + .tag = .@"test", + .ops = (Mir.Ops{ + .reg1 = reg, + .flags = 0b00, + }).encode(), + .data = .{ .imm = 1 }, + }); + const reloc = try self.addInst(.{ + .tag = .cond_jmp_eq_ne, + .ops = (Mir.Ops{ + .flags = 0b01, + }).encode(), + .data = .{ .inst = undefined }, + }); + break :reloc reloc; + }, + else => return self.fail("TODO implement condbr {s} when condition is {s}", .{ + self.target.cpu.arch, + @tagName(cond), + }), + } }; // Capture the state of register and stack allocation state so that we can revert to it. @@ -2578,25 +2374,18 @@ fn airLoop(self: *Self, inst: Air.Inst.Index) !void { const ty_pl = self.air.instructions.items(.data)[inst].ty_pl; const loop = self.air.extraData(Air.Block, ty_pl.payload); const body = self.air.extra[loop.end..][0..loop.data.body_len]; - const start_index = self.code.items.len; + const jmp_target = @intCast(u32, self.mir_instructions.len); try self.genBody(body); - try self.jump(start_index); + _ = try self.addInst(.{ + .tag = .jmp, + .ops = (Mir.Ops{ + .flags = 0b00, + }).encode(), + .data = .{ .inst = jmp_target }, + }); return self.finishAirBookkeeping(); } -/// Send control flow to the `index` of `self.code`. -fn jump(self: *Self, index: usize) !void { - try self.code.ensureUnusedCapacity(5); - if (math.cast(i8, @intCast(i32, index) - (@intCast(i32, self.code.items.len + 2)))) |delta| { - self.code.appendAssumeCapacity(0xeb); // jmp rel8 - self.code.appendAssumeCapacity(@bitCast(u8, delta)); - } else |_| { - const delta = @intCast(i32, index) - (@intCast(i32, self.code.items.len + 5)); - self.code.appendAssumeCapacity(0xe9); // jmp rel32 - mem.writeIntLittle(i32, self.code.addManyAsArrayAssumeCapacity(4), delta); - } -} - fn airBlock(self: *Self, inst: Air.Inst.Index) !void { try self.blocks.putNoClobber(self.gpa, inst, .{ // A block is a setup to be able to jump to the end. @@ -2630,22 +2419,9 @@ fn airSwitch(self: *Self, inst: Air.Inst.Index) !void { // return self.finishAir(inst, .dead, .{ condition, .none, .none }); } -fn performReloc(self: *Self, reloc: Reloc) !void { - switch (reloc) { - .rel32 => |pos| { - const amt = self.code.items.len - (pos + 4); - // Here it would be tempting to implement testing for amt == 0 and then elide the - // jump. However, that will cause a problem because other jumps may assume that they - // can jump to this code. Or maybe I didn't understand something when I was debugging. - // It could be worth another look. Anyway, that's why that isn't done here. Probably the - // best place to elide jumps will be in semantic analysis, by inlining blocks that only - // only have 1 break instruction. - const s32_amt = math.cast(i32, amt) catch - return self.fail("unable to perform relocation: jump too far", .{}); - mem.writeIntLittle(i32, self.code.items[pos..][0..4], s32_amt); - }, - .arm_branch => unreachable, - } +fn performReloc(self: *Self, reloc: Mir.Inst.Index) !void { + const next_inst = @intCast(u32, self.mir_instructions.len); + self.mir_instructions.items(.data)[reloc].inst = next_inst; } fn airBr(self: *Self, inst: Air.Inst.Index) !void { @@ -2661,9 +2437,9 @@ fn airBoolOp(self: *Self, inst: Air.Inst.Index) !void { .dead else switch (air_tags[inst]) { // lhs AND rhs - .bool_and => try self.genX8664BinMath(inst, bin_op.lhs, bin_op.rhs), + .bool_and => try self.genBinMathOp(inst, bin_op.lhs, bin_op.rhs), // lhs OR rhs - .bool_or => try self.genX8664BinMath(inst, bin_op.lhs, bin_op.rhs), + .bool_or => try self.genBinMathOp(inst, bin_op.lhs, bin_op.rhs), else => unreachable, // Not a boolean operation }; return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none }); @@ -2688,12 +2464,15 @@ fn brVoid(self: *Self, block: Air.Inst.Index) !void { const block_data = self.blocks.getPtr(block).?; // Emit a jump with a relocation. It will be patched up after the block ends. try block_data.relocs.ensureUnusedCapacity(self.gpa, 1); - // TODO optimization opportunity: figure out when we can emit this as a 2 byte instruction - // which is available if the jump is 127 bytes or less forward. - try self.code.resize(self.code.items.len + 5); - self.code.items[self.code.items.len - 5] = 0xe9; // jmp rel32 // Leave the jump offset undefined - block_data.relocs.appendAssumeCapacity(.{ .rel32 = self.code.items.len - 4 }); + const jmp_reloc = try self.addInst(.{ + .tag = .jmp, + .ops = (Mir.Ops{ + .flags = 0b00, + }).encode(), + .data = .{ .inst = undefined }, + }); + block_data.relocs.appendAssumeCapacity(jmp_reloc); } fn airAsm(self: *Self, inst: Air.Inst.Index) !void { @@ -2750,22 +2529,35 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void { var iter = std.mem.tokenize(u8, asm_source, "\n\r"); while (iter.next()) |ins| { if (mem.eql(u8, ins, "syscall")) { - try self.code.appendSlice(&[_]u8{ 0x0f, 0x05 }); + _ = try self.addInst(.{ + .tag = .syscall, + .ops = undefined, + .data = undefined, + }); } else if (mem.indexOf(u8, ins, "push")) |_| { const arg = ins[4..]; if (mem.indexOf(u8, arg, "$")) |l| { - const n = std.fmt.parseInt(u8, ins[4 + l + 1 ..], 10) catch return self.fail("TODO implement more inline asm int parsing", .{}); - try self.code.appendSlice(&.{ 0x6a, n }); + const n = std.fmt.parseInt(u8, ins[4 + l + 1 ..], 10) catch { + return self.fail("TODO implement more inline asm int parsing", .{}); + }; + _ = try self.addInst(.{ + .tag = .push, + .ops = (Mir.Ops{ + .flags = 0b10, + }).encode(), + .data = .{ .imm = n }, + }); } else if (mem.indexOf(u8, arg, "%%")) |l| { const reg_name = ins[4 + l + 2 ..]; const reg = parseRegName(reg_name) orelse return self.fail("unrecognized register: '{s}'", .{reg_name}); - const low_id: u8 = reg.low_id(); - if (reg.isExtended()) { - try self.code.appendSlice(&.{ 0x41, 0b1010000 | low_id }); - } else { - try self.code.append(0b1010000 | low_id); - } + _ = try self.addInst(.{ + .tag = .push, + .ops = (Mir.Ops{ + .reg1 = reg, + }).encode(), + .data = undefined, + }); } else return self.fail("TODO more push operands", .{}); } else if (mem.indexOf(u8, ins, "pop")) |_| { const arg = ins[3..]; @@ -2773,12 +2565,13 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void { const reg_name = ins[3 + l + 2 ..]; const reg = parseRegName(reg_name) orelse return self.fail("unrecognized register: '{s}'", .{reg_name}); - const low_id: u8 = reg.low_id(); - if (reg.isExtended()) { - try self.code.appendSlice(&.{ 0x41, 0b1011000 | low_id }); - } else { - try self.code.append(0b1011000 | low_id); - } + _ = try self.addInst(.{ + .tag = .pop, + .ops = (Mir.Ops{ + .reg1 = reg, + }).encode(), + .data = undefined, + }); } else return self.fail("TODO more pop operands", .{}); } else { return self.fail("TODO implement support for more x86 assembly instructions", .{}); @@ -2870,7 +2663,6 @@ fn genSetStack(self: *Self, ty: Type, stack_offset: u32, mcv: MCValue) InnerErro if (adj_off > 128) { return self.fail("TODO implement set stack variable with large stack offset", .{}); } - try self.code.ensureUnusedCapacity(8); switch (abi_size) { 1 => { return self.fail("TODO implement set abi_size=1 stack variable with immediate", .{}); @@ -2879,34 +2671,57 @@ fn genSetStack(self: *Self, ty: Type, stack_offset: u32, mcv: MCValue) InnerErro return self.fail("TODO implement set abi_size=2 stack variable with immediate", .{}); }, 4 => { - const x = @intCast(u32, x_big); // We have a positive stack offset value but we want a twos complement negative // offset from rbp, which is at the top of the stack frame. - const negative_offset = @intCast(i8, -@intCast(i32, adj_off)); - const twos_comp = @bitCast(u8, negative_offset); // mov DWORD PTR [rbp+offset], immediate - self.code.appendSliceAssumeCapacity(&[_]u8{ 0xc7, 0x45, twos_comp }); - mem.writeIntLittle(u32, self.code.addManyAsArrayAssumeCapacity(4), x); + const payload = try self.addExtra(Mir.ImmPair{ + .dest_off = -@intCast(i32, adj_off), + .operand = @bitCast(i32, @intCast(u32, x_big)), + }); + _ = try self.addInst(.{ + .tag = .mov, + .ops = (Mir.Ops{ + .reg1 = .rbp, + .flags = 0b11, + }).encode(), + .data = .{ .payload = payload }, + }); }, 8 => { // We have a positive stack offset value but we want a twos complement negative // offset from rbp, which is at the top of the stack frame. - const negative_offset = @intCast(i8, -@intCast(i32, adj_off)); - const twos_comp = @bitCast(u8, negative_offset); + const negative_offset = -@intCast(i32, adj_off); // 64 bit write to memory would take two mov's anyways so we // insted just use two 32 bit writes to avoid register allocation - try self.code.ensureUnusedCapacity(14); - var buf: [8]u8 = undefined; - mem.writeIntLittle(u64, &buf, x_big); - - // mov DWORD PTR [rbp+offset+4], immediate - self.code.appendSliceAssumeCapacity(&[_]u8{ 0xc7, 0x45, twos_comp + 4 }); - self.code.appendSliceAssumeCapacity(buf[4..8]); - - // mov DWORD PTR [rbp+offset], immediate - self.code.appendSliceAssumeCapacity(&[_]u8{ 0xc7, 0x45, twos_comp }); - self.code.appendSliceAssumeCapacity(buf[0..4]); + { + const payload = try self.addExtra(Mir.ImmPair{ + .dest_off = negative_offset + 4, + .operand = @bitCast(i32, @truncate(u32, x_big >> 32)), + }); + _ = try self.addInst(.{ + .tag = .mov, + .ops = (Mir.Ops{ + .reg1 = .rbp, + .flags = 0b11, + }).encode(), + .data = .{ .payload = payload }, + }); + } + { + const payload = try self.addExtra(Mir.ImmPair{ + .dest_off = negative_offset, + .operand = @bitCast(i32, @truncate(u32, x_big)), + }); + _ = try self.addInst(.{ + .tag = .mov, + .ops = (Mir.Ops{ + .reg1 = .rbp, + .flags = 0b11, + }).encode(), + .data = .{ .payload = payload }, + }); + } }, else => { return self.fail("TODO implement set abi_size=large stack variable with immediate", .{}); @@ -2920,7 +2735,20 @@ fn genSetStack(self: *Self, ty: Type, stack_offset: u32, mcv: MCValue) InnerErro return self.genSetStack(ty, stack_offset, MCValue{ .register = reg }); }, .register => |reg| { - try self.genX8664ModRMRegToStack(ty, stack_offset, reg, 0x89); + if (stack_offset > math.maxInt(i32)) { + return self.fail("stack offset too large", .{}); + } + const abi_size = ty.abiSize(self.target.*); + const adj_off = stack_offset + abi_size; + _ = try self.addInst(.{ + .tag = .mov, + .ops = (Mir.Ops{ + .reg1 = reg, + .reg2 = .ebp, + .flags = 0b10, + }).encode(), + .data = .{ .imm = -@intCast(i32, adj_off) }, + }); }, .memory => |vaddr| { _ = vaddr; @@ -2958,25 +2786,26 @@ fn genSetReg(self: *Self, ty: Type, reg: Register, mcv: MCValue) InnerError!void } }, .compare_flags_unsigned => |op| { - const encoder = try Encoder.init(self.code, 7); - // TODO audit this codegen: we force w = true here to make - // the value affect the big register - encoder.rex(.{ - .w = true, - .b = reg.isExtended(), + const tag: Mir.Inst.Tag = switch (op) { + .gte, .gt, .lt, .lte => .cond_set_byte_above_below, + .eq, .neq => .cond_set_byte_eq_ne, + }; + const flags: u2 = switch (op) { + .gte => 0b00, + .gt => 0b01, + .lt => 0b10, + .lte => 0b11, + .eq => 0b01, + .neq => 0b00, + }; + _ = try self.addInst(.{ + .tag = tag, + .ops = (Mir.Ops{ + .reg1 = reg, + .flags = flags, + }).encode(), + .data = undefined, }); - encoder.opcode_2byte(0x0f, switch (op) { - .gte => 0x93, - .gt => 0x97, - .neq => 0x95, - .lt => 0x92, - .lte => 0x96, - .eq => 0x94, - }); - encoder.modRm_direct( - 0, - reg.low_id(), - ); }, .compare_flags_signed => |op| { _ = op; @@ -2986,44 +2815,25 @@ fn genSetReg(self: *Self, ty: Type, reg: Register, mcv: MCValue) InnerError!void // 32-bit moves zero-extend to 64-bit, so xoring the 32-bit // register is the fastest way to zero a register. if (x == 0) { - // The encoding for `xor r32, r32` is `0x31 /r`. - const encoder = try Encoder.init(self.code, 3); - - // If we're accessing e.g. r8d, we need to use a REX prefix before the actual operation. Since - // this is a 32-bit operation, the W flag is set to zero. X is also zero, as we're not using a SIB. - // Both R and B are set, as we're extending, in effect, the register bits *and* the operand. - encoder.rex(.{ - .r = reg.isExtended(), - .b = reg.isExtended(), + _ = try self.addInst(.{ + .tag = .xor, + .ops = (Mir.Ops{ + .reg1 = reg, + .reg2 = reg, + }).encode(), + .data = undefined, }); - encoder.opcode_1byte(0x31); - // Section 3.1.1.1 of the Intel x64 Manual states that "/r indicates that the - // ModR/M byte of the instruction contains a register operand and an r/m operand." - encoder.modRm_direct( - reg.low_id(), - reg.low_id(), - ); - return; } if (x <= math.maxInt(i32)) { // Next best case: if we set the lower four bytes, the upper four will be zeroed. - // - // The encoding for `mov IMM32 -> REG` is (0xB8 + R) IMM. - - const encoder = try Encoder.init(self.code, 6); - // Just as with XORing, we need a REX prefix. This time though, we only - // need the B bit set, as we're extending the opcode's register field, - // and there is no Mod R/M byte. - encoder.rex(.{ - .b = reg.isExtended(), + _ = try self.addInst(.{ + .tag = .mov, + .ops = (Mir.Ops{ + .reg1 = reg, + }).encode(), + .data = .{ .imm = @intCast(i32, x) }, }); - encoder.opcode_withReg(0xB8, reg.low_id()); - - // no ModR/M byte - - // IMM - encoder.imm32(@intCast(i32, x)); return; } // Worst case: we need to load the 64-bit register with the IMM. GNU's assemblers calls @@ -3033,137 +2843,87 @@ fn genSetReg(self: *Self, ty: Type, reg: Register, mcv: MCValue) InnerError!void // This encoding is, in fact, the *same* as the one used for 32-bit loads. The only // difference is that we set REX.W before the instruction, which extends the load to // 64-bit and uses the full bit-width of the register. - { - const encoder = try Encoder.init(self.code, 10); - encoder.rex(.{ - .w = true, - .b = reg.isExtended(), - }); - encoder.opcode_withReg(0xB8, reg.low_id()); - encoder.imm64(x); - } + const payload = try self.addExtra(Mir.Imm64.encode(x)); + _ = try self.addInst(.{ + .tag = .movabs, + .ops = (Mir.Ops{ + .reg1 = reg, + }).encode(), + .data = .{ .payload = payload }, + }); }, .embedded_in_code => |code_offset| { // We need the offset from RIP in a signed i32 twos complement. - // The instruction is 7 bytes long and RIP points to the next instruction. - - // 64-bit LEA is encoded as REX.W 8D /r. - const rip = self.code.items.len + 7; - const big_offset = @intCast(i64, code_offset) - @intCast(i64, rip); - const offset = @intCast(i32, big_offset); - const encoder = try Encoder.init(self.code, 7); - - // byte 1, always exists because w = true - encoder.rex(.{ - .w = true, - .r = reg.isExtended(), + const payload = try self.addExtra(Mir.Imm64.encode(code_offset)); + _ = try self.addInst(.{ + .tag = .lea_rip, + .ops = (Mir.Ops{ + .reg1 = reg, + }).encode(), + .data = .{ .payload = payload }, }); - // byte 2 - encoder.opcode_1byte(0x8D); - // byte 3 - encoder.modRm_RIPDisp32(reg.low_id()); - // byte 4-7 - encoder.disp32(offset); - - // Double check that we haven't done any math errors - assert(rip == self.code.items.len); }, .register => |src_reg| { // If the registers are the same, nothing to do. if (src_reg.id() == reg.id()) return; - // This is a variant of 8B /r. - const abi_size = ty.abiSize(self.target.*); - const encoder = try Encoder.init(self.code, 3); - encoder.rex(.{ - .w = abi_size == 8, - .r = reg.isExtended(), - .b = src_reg.isExtended(), + _ = try self.addInst(.{ + .tag = .mov, + .ops = (Mir.Ops{ + .reg1 = reg, + .reg2 = src_reg, + .flags = 0b11, + }).encode(), + .data = undefined, }); - encoder.opcode_1byte(0x8B); - encoder.modRm_direct(reg.low_id(), src_reg.low_id()); }, .memory => |x| { + // TODO can we move this entire logic into Emit.zig like with aarch64? if (self.bin_file.options.pie) { - // RIP-relative displacement to the entry in the GOT table. - const abi_size = ty.abiSize(self.target.*); - const encoder = try Encoder.init(self.code, 10); - - // LEA reg, [] - - // We encode the instruction FIRST because prefixes may or may not appear. - // After we encode the instruction, we will know that the displacement bytes - // for [] will be at self.code.items.len - 4. - encoder.rex(.{ - .w = true, // force 64 bit because loading an address (to the GOT) - .r = reg.isExtended(), + // TODO we should flag up `x` as GOT symbol entry explicitly rather than as a hack. + _ = try self.addInst(.{ + .tag = .lea_rip, + .ops = (Mir.Ops{ + .reg1 = reg, + .flags = 0b01, + }).encode(), + .data = .{ .got_entry = @intCast(u32, x) }, }); - encoder.opcode_1byte(0x8D); - encoder.modRm_RIPDisp32(reg.low_id()); - encoder.disp32(0); - - const offset = @intCast(u32, self.code.items.len); - - if (self.bin_file.cast(link.File.MachO)) |macho_file| { - // TODO I think the reloc might be in the wrong place. - const decl = macho_file.active_decl.?; - // Load reloc for LEA instruction. - try decl.link.macho.relocs.append(self.bin_file.allocator, .{ - .offset = offset - 4, - .target = .{ .local = @intCast(u32, x) }, - .addend = 0, - .subtractor = null, - .pcrel = true, - .length = 2, - .@"type" = @enumToInt(std.macho.reloc_type_x86_64.X86_64_RELOC_GOT), - }); - } else { - return self.fail("TODO implement genSetReg for PIE GOT indirection on this platform", .{}); - } - // MOV reg, [reg] - encoder.rex(.{ - .w = abi_size == 8, - .r = reg.isExtended(), - .b = reg.isExtended(), + _ = try self.addInst(.{ + .tag = .mov, + .ops = (Mir.Ops{ + .reg1 = reg, + .reg2 = reg, + .flags = 0b01, + }).encode(), + .data = .{ .imm = 0 }, }); - encoder.opcode_1byte(0x8B); - encoder.modRm_indirectDisp0(reg.low_id(), reg.low_id()); } else if (x <= math.maxInt(i32)) { - // Moving from memory to a register is a variant of `8B /r`. - // Since we're using 64-bit moves, we require a REX. - // This variant also requires a SIB, as it would otherwise be RIP-relative. - // We want mode zero with the lower three bits set to four to indicate an SIB with no other displacement. - // The SIB must be 0x25, to indicate a disp32 with no scaled index. - // 0b00RRR100, where RRR is the lower three bits of the register ID. - // The instruction is thus eight bytes; REX 0x8B 0b00RRR100 0x25 followed by a four-byte disp32. - const abi_size = ty.abiSize(self.target.*); - const encoder = try Encoder.init(self.code, 8); - encoder.rex(.{ - .w = abi_size == 8, - .r = reg.isExtended(), + // mov reg, [ds:imm32] + _ = try self.addInst(.{ + .tag = .mov, + .ops = (Mir.Ops{ + .reg1 = reg, + .flags = 0b01, + }).encode(), + .data = .{ .imm = @intCast(i32, x) }, }); - encoder.opcode_1byte(0x8B); - // effective address = [SIB] - encoder.modRm_SIBDisp0(reg.low_id()); - // SIB = disp32 - encoder.sib_disp32(); - encoder.disp32(@intCast(i32, x)); } else { - // If this is RAX, we can use a direct load; otherwise, we need to load the address, then indirectly load - // the value. + // If this is RAX, we can use a direct load. + // Otherwise, we need to load the address, then indirectly load the value. if (reg.id() == 0) { - // REX.W 0xA1 moffs64* - // moffs64* is a 64-bit offset "relative to segment base", which really just means the - // absolute address for all practical purposes. - - const encoder = try Encoder.init(self.code, 10); - encoder.rex(.{ - .w = true, + // movabs rax, ds:moffs64 + const payload = try self.addExtra(Mir.Imm64.encode(x)); + _ = try self.addInst(.{ + .tag = .movabs, + .ops = (Mir.Ops{ + .reg1 = .rax, + .flags = 0b01, // imm64 will become moffs64 + }).encode(), + .data = .{ .payload = payload }, }); - encoder.opcode_1byte(0xA1); - encoder.writeIntLittle(u64, x); } else { // This requires two instructions; a move imm as used above, followed by an indirect load using the register // as the address and the register as the destination. @@ -3181,16 +2941,16 @@ fn genSetReg(self: *Self, ty: Type, reg: Register, mcv: MCValue) InnerError!void // Currently, we're only allowing 64-bit registers, so we need the `REX.W 8B /r` variant. // TODO: determine whether to allow other sized registers, and if so, handle them properly. - // mov reg, [reg] - const abi_size = ty.abiSize(self.target.*); - const encoder = try Encoder.init(self.code, 3); - encoder.rex(.{ - .w = abi_size == 8, - .r = reg.isExtended(), - .b = reg.isExtended(), + // mov reg, [reg + 0x0] + _ = try self.addInst(.{ + .tag = .mov, + .ops = (Mir.Ops{ + .reg1 = reg, + .reg2 = reg, + .flags = 0b01, + }).encode(), + .data = .{ .imm = 0 }, }); - encoder.opcode_1byte(0x8B); - encoder.modRm_indirectDisp0(reg.low_id(), reg.low_id()); } } }, @@ -3201,21 +2961,15 @@ fn genSetReg(self: *Self, ty: Type, reg: Register, mcv: MCValue) InnerError!void return self.fail("stack offset too large", .{}); } const ioff = -@intCast(i32, off); - const encoder = try Encoder.init(self.code, 3); - encoder.rex(.{ - .w = abi_size == 8, - .r = reg.isExtended(), + _ = try self.addInst(.{ + .tag = .mov, + .ops = (Mir.Ops{ + .reg1 = reg, + .reg2 = .ebp, + .flags = 0b01, + }).encode(), + .data = .{ .imm = ioff }, }); - encoder.opcode_1byte(0x8B); - if (std.math.minInt(i8) <= ioff and ioff <= std.math.maxInt(i8)) { - // Example: 48 8b 4d 7f mov rcx,QWORD PTR [rbp+0x7f] - encoder.modRm_indirectDisp8(reg.low_id(), Register.ebp.low_id()); - encoder.disp8(@intCast(i8, ioff)); - } else { - // Example: 48 8b 8d 80 00 00 00 mov rcx,QWORD PTR [rbp+0x80] - encoder.modRm_indirectDisp32(reg.low_id(), Register.ebp.low_id()); - encoder.disp32(ioff); - } }, } } diff --git a/src/arch/x86_64/Emit.zig b/src/arch/x86_64/Emit.zig new file mode 100644 index 0000000000..3c53e5f680 --- /dev/null +++ b/src/arch/x86_64/Emit.zig @@ -0,0 +1,1161 @@ +//! This file contains the functionality for lowering x86_64 MIR into +//! machine code + +const Emit = @This(); + +const std = @import("std"); +const assert = std.debug.assert; +const bits = @import("bits.zig"); +const leb128 = std.leb; +const link = @import("../../link.zig"); +const log = std.log.scoped(.codegen); +const math = std.math; +const mem = std.mem; + +const Air = @import("../../Air.zig"); +const DebugInfoOutput = @import("../../codegen.zig").DebugInfoOutput; +const DW = std.dwarf; +const Encoder = bits.Encoder; +const ErrorMsg = Module.ErrorMsg; +const MCValue = @import("CodeGen.zig").MCValue; +const Mir = @import("Mir.zig"); +const Module = @import("../../Module.zig"); +const Instruction = bits.Instruction; +const Register = bits.Register; +const Type = @import("../../type.zig").Type; + +mir: Mir, +bin_file: *link.File, +debug_output: DebugInfoOutput, +target: *const std.Target, +err_msg: ?*ErrorMsg = null, +src_loc: Module.SrcLoc, +code: *std.ArrayList(u8), + +prev_di_line: u32, +prev_di_column: u32, +/// Relative to the beginning of `code`. +prev_di_pc: usize, + +code_offset_mapping: std.AutoHashMapUnmanaged(Mir.Inst.Index, usize) = .{}, +relocs: std.ArrayListUnmanaged(Reloc) = .{}, + +const InnerError = error{ + OutOfMemory, + EmitFail, +}; + +const Reloc = struct { + /// Offset of the instruction. + source: u64, + /// Target of the relocation. + target: Mir.Inst.Index, + /// Offset of the relocation within the instruction. + offset: u64, + /// Length of the instruction. + length: u5, +}; + +pub fn emitMir(emit: *Emit) InnerError!void { + const mir_tags = emit.mir.instructions.items(.tag); + + for (mir_tags) |tag, index| { + const inst = @intCast(u32, index); + try emit.code_offset_mapping.putNoClobber(emit.bin_file.allocator, inst, emit.code.items.len); + switch (tag) { + .adc => try emit.mirArith(.adc, inst), + .add => try emit.mirArith(.add, inst), + .sub => try emit.mirArith(.sub, inst), + .xor => try emit.mirArith(.xor, inst), + .@"and" => try emit.mirArith(.@"and", inst), + .@"or" => try emit.mirArith(.@"or", inst), + .sbb => try emit.mirArith(.sbb, inst), + .cmp => try emit.mirArith(.cmp, inst), + + .adc_scale_src => try emit.mirArithScaleSrc(.adc, inst), + .add_scale_src => try emit.mirArithScaleSrc(.add, inst), + .sub_scale_src => try emit.mirArithScaleSrc(.sub, inst), + .xor_scale_src => try emit.mirArithScaleSrc(.xor, inst), + .and_scale_src => try emit.mirArithScaleSrc(.@"and", inst), + .or_scale_src => try emit.mirArithScaleSrc(.@"or", inst), + .sbb_scale_src => try emit.mirArithScaleSrc(.sbb, inst), + .cmp_scale_src => try emit.mirArithScaleSrc(.cmp, inst), + + .adc_scale_dst => try emit.mirArithScaleDst(.adc, inst), + .add_scale_dst => try emit.mirArithScaleDst(.add, inst), + .sub_scale_dst => try emit.mirArithScaleDst(.sub, inst), + .xor_scale_dst => try emit.mirArithScaleDst(.xor, inst), + .and_scale_dst => try emit.mirArithScaleDst(.@"and", inst), + .or_scale_dst => try emit.mirArithScaleDst(.@"or", inst), + .sbb_scale_dst => try emit.mirArithScaleDst(.sbb, inst), + .cmp_scale_dst => try emit.mirArithScaleDst(.cmp, inst), + + .adc_scale_imm => try emit.mirArithScaleImm(.adc, inst), + .add_scale_imm => try emit.mirArithScaleImm(.add, inst), + .sub_scale_imm => try emit.mirArithScaleImm(.sub, inst), + .xor_scale_imm => try emit.mirArithScaleImm(.xor, inst), + .and_scale_imm => try emit.mirArithScaleImm(.@"and", inst), + .or_scale_imm => try emit.mirArithScaleImm(.@"or", inst), + .sbb_scale_imm => try emit.mirArithScaleImm(.sbb, inst), + .cmp_scale_imm => try emit.mirArithScaleImm(.cmp, inst), + + // Even though MOV is technically not an arithmetic op, + // its structure can be represented using the same set of + // opcode primitives. + .mov => try emit.mirArith(.mov, inst), + .mov_scale_src => try emit.mirArithScaleSrc(.mov, inst), + .mov_scale_dst => try emit.mirArithScaleDst(.mov, inst), + .mov_scale_imm => try emit.mirArithScaleImm(.mov, inst), + .movabs => try emit.mirMovabs(inst), + + .lea => try emit.mirLea(inst), + .lea_rip => try emit.mirLeaRip(inst), + + .imul_complex => try emit.mirIMulComplex(inst), + + .push => try emit.mirPushPop(.push, inst), + .pop => try emit.mirPushPop(.pop, inst), + + .jmp => try emit.mirJmpCall(.jmp, inst), + .call => try emit.mirJmpCall(.call, inst), + + .cond_jmp_greater_less => try emit.mirCondJmp(.cond_jmp_greater_less, inst), + .cond_jmp_above_below => try emit.mirCondJmp(.cond_jmp_above_below, inst), + .cond_jmp_eq_ne => try emit.mirCondJmp(.cond_jmp_eq_ne, inst), + + .cond_set_byte_greater_less => try emit.mirCondSetByte(.cond_set_byte_greater_less, inst), + .cond_set_byte_above_below => try emit.mirCondSetByte(.cond_set_byte_above_below, inst), + .cond_set_byte_eq_ne => try emit.mirCondSetByte(.cond_set_byte_eq_ne, inst), + + .ret => try emit.mirRet(inst), + + .syscall => try emit.mirSyscall(), + + .@"test" => try emit.mirTest(inst), + + .brk => try emit.mirBrk(), + + .call_extern => try emit.mirCallExtern(inst), + + .dbg_line => try emit.mirDbgLine(inst), + .dbg_prologue_end => try emit.mirDbgPrologueEnd(inst), + .dbg_epilogue_begin => try emit.mirDbgEpilogueBegin(inst), + .arg_dbg_info => try emit.mirArgDbgInfo(inst), + + else => { + return emit.fail("Implement MIR->Isel lowering for x86_64 for pseudo-inst: {s}", .{tag}); + }, + } + } + + try emit.fixupRelocs(); +} + +pub fn deinit(emit: *Emit) void { + emit.relocs.deinit(emit.bin_file.allocator); + emit.code_offset_mapping.deinit(emit.bin_file.allocator); + emit.* = undefined; +} + +fn fail(emit: *Emit, comptime format: []const u8, args: anytype) InnerError { + @setCold(true); + assert(emit.err_msg == null); + emit.err_msg = try ErrorMsg.create(emit.bin_file.allocator, emit.src_loc, format, args); + return error.EmitFail; +} + +fn fixupRelocs(emit: *Emit) InnerError!void { + // TODO this function currently assumes all relocs via JMP/CALL instructions are 32bit in size. + // This should be reversed like it is done in aarch64 MIR emit code: start with the smallest + // possible resolution, i.e., 8bit, and iteratively converge on the minimum required resolution + // until the entire decl is correctly emitted with all JMP/CALL instructions within range. + for (emit.relocs.items) |reloc| { + const target = emit.code_offset_mapping.get(reloc.target) orelse + return emit.fail("JMP/CALL relocation target not found!", .{}); + const disp = @intCast(i32, @intCast(i64, target) - @intCast(i64, reloc.source + reloc.length)); + mem.writeIntLittle(i32, emit.code.items[reloc.offset..][0..4], disp); + } +} + +fn mirBrk(emit: *Emit) InnerError!void { + const encoder = try Encoder.init(emit.code, 1); + encoder.opcode_1byte(0xcc); +} + +fn mirSyscall(emit: *Emit) InnerError!void { + const encoder = try Encoder.init(emit.code, 2); + encoder.opcode_2byte(0x0f, 0x05); +} + +fn mirPushPop(emit: *Emit, tag: Mir.Inst.Tag, inst: Mir.Inst.Index) InnerError!void { + const ops = Mir.Ops.decode(emit.mir.instructions.items(.ops)[inst]); + switch (ops.flags) { + 0b00 => { + // PUSH/POP reg + const opc: u8 = switch (tag) { + .push => 0x50, + .pop => 0x58, + else => unreachable, + }; + const encoder = try Encoder.init(emit.code, 1); + encoder.opcode_withReg(opc, ops.reg1.lowId()); + }, + 0b01 => { + // PUSH/POP r/m64 + const imm = emit.mir.instructions.items(.data)[inst].imm; + const opc: u8 = switch (tag) { + .push => 0xff, + .pop => 0x8f, + else => unreachable, + }; + const modrm_ext: u3 = switch (tag) { + .push => 0x6, + .pop => 0x0, + else => unreachable, + }; + const encoder = try Encoder.init(emit.code, 6); + encoder.opcode_1byte(opc); + if (math.cast(i8, imm)) |imm_i8| { + encoder.modRm_indirectDisp8(modrm_ext, ops.reg1.lowId()); + encoder.imm8(@intCast(i8, imm_i8)); + } else |_| { + encoder.modRm_indirectDisp32(modrm_ext, ops.reg1.lowId()); + encoder.imm32(imm); + } + }, + 0b10 => { + // PUSH imm32 + assert(tag == .push); + const imm = emit.mir.instructions.items(.data)[inst].imm; + const opc: u8 = if (imm <= math.maxInt(i8)) 0x6a else 0x6b; + const encoder = try Encoder.init(emit.code, 2); + encoder.opcode_1byte(opc); + if (imm <= math.maxInt(i8)) { + encoder.imm8(@intCast(i8, imm)); + } else if (imm <= math.maxInt(i16)) { + encoder.imm16(@intCast(i16, imm)); + } else { + encoder.imm32(imm); + } + }, + 0b11 => unreachable, + } +} + +fn mirJmpCall(emit: *Emit, tag: Mir.Inst.Tag, inst: Mir.Inst.Index) InnerError!void { + const ops = Mir.Ops.decode(emit.mir.instructions.items(.ops)[inst]); + const flag = @truncate(u1, ops.flags); + if (flag == 0) { + const target = emit.mir.instructions.items(.data)[inst].inst; + const opc: u8 = switch (tag) { + .jmp => 0xe9, + .call => 0xe8, + else => unreachable, + }; + const source = emit.code.items.len; + const encoder = try Encoder.init(emit.code, 5); + encoder.opcode_1byte(opc); + try emit.relocs.append(emit.bin_file.allocator, .{ + .source = source, + .target = target, + .offset = emit.code.items.len, + .length = 5, + }); + encoder.imm32(0x0); + return; + } + const modrm_ext: u3 = switch (tag) { + .jmp => 0x4, + .call => 0x2, + else => unreachable, + }; + if (ops.reg1 == .none) { + // JMP/CALL [imm] + const imm = emit.mir.instructions.items(.data)[inst].imm; + const encoder = try Encoder.init(emit.code, 7); + encoder.opcode_1byte(0xff); + encoder.modRm_SIBDisp0(modrm_ext); + encoder.sib_disp32(); + encoder.imm32(imm); + return; + } + // JMP/CALL reg + const encoder = try Encoder.init(emit.code, 2); + encoder.opcode_1byte(0xff); + encoder.modRm_direct(modrm_ext, ops.reg1.lowId()); +} + +const CondType = enum { + /// greater than or equal + gte, + + /// greater than + gt, + + /// less than + lt, + + /// less than or equal + lte, + + /// above or equal + ae, + + /// above + a, + + /// below + b, + + /// below or equal + be, + + /// not equal + ne, + + /// equal + eq, + + fn fromTagAndFlags(tag: Mir.Inst.Tag, flags: u2) CondType { + return switch (tag) { + .cond_jmp_greater_less, + .cond_set_byte_greater_less, + => switch (flags) { + 0b00 => CondType.gte, + 0b01 => CondType.gt, + 0b10 => CondType.lt, + 0b11 => CondType.lte, + }, + .cond_jmp_above_below, + .cond_set_byte_above_below, + => switch (flags) { + 0b00 => CondType.ae, + 0b01 => CondType.a, + 0b10 => CondType.b, + 0b11 => CondType.be, + }, + .cond_jmp_eq_ne, + .cond_set_byte_eq_ne, + => switch (@truncate(u1, flags)) { + 0b0 => CondType.ne, + 0b1 => CondType.eq, + }, + else => unreachable, + }; + } +}; + +inline fn getCondOpCode(tag: Mir.Inst.Tag, cond: CondType) u8 { + switch (cond) { + .gte => return switch (tag) { + .cond_jmp_greater_less => 0x8d, + .cond_set_byte_greater_less => 0x9d, + else => unreachable, + }, + .gt => return switch (tag) { + .cond_jmp_greater_less => 0x8f, + .cond_set_byte_greater_less => 0x9f, + else => unreachable, + }, + .lt => return switch (tag) { + .cond_jmp_greater_less => 0x8c, + .cond_set_byte_greater_less => 0x9c, + else => unreachable, + }, + .lte => return switch (tag) { + .cond_jmp_greater_less => 0x8e, + .cond_set_byte_greater_less => 0x9e, + else => unreachable, + }, + .ae => return switch (tag) { + .cond_jmp_above_below => 0x83, + .cond_set_byte_above_below => 0x93, + else => unreachable, + }, + .a => return switch (tag) { + .cond_jmp_above_below => 0x87, + .cond_set_byte_greater_less => 0x97, + else => unreachable, + }, + .b => return switch (tag) { + .cond_jmp_above_below => 0x82, + .cond_set_byte_greater_less => 0x92, + else => unreachable, + }, + .be => return switch (tag) { + .cond_jmp_above_below => 0x86, + .cond_set_byte_greater_less => 0x96, + else => unreachable, + }, + .eq => return switch (tag) { + .cond_jmp_eq_ne => 0x84, + .cond_set_byte_eq_ne => 0x94, + else => unreachable, + }, + .ne => return switch (tag) { + .cond_jmp_eq_ne => 0x85, + .cond_set_byte_eq_ne => 0x95, + else => unreachable, + }, + } +} + +fn mirCondJmp(emit: *Emit, tag: Mir.Inst.Tag, inst: Mir.Inst.Index) InnerError!void { + const ops = Mir.Ops.decode(emit.mir.instructions.items(.ops)[inst]); + const target = emit.mir.instructions.items(.data)[inst].inst; + const cond = CondType.fromTagAndFlags(tag, ops.flags); + const opc = getCondOpCode(tag, cond); + const source = emit.code.items.len; + const encoder = try Encoder.init(emit.code, 6); + encoder.opcode_2byte(0x0f, opc); + try emit.relocs.append(emit.bin_file.allocator, .{ + .source = source, + .target = target, + .offset = emit.code.items.len, + .length = 6, + }); + encoder.imm32(0); +} + +fn mirCondSetByte(emit: *Emit, tag: Mir.Inst.Tag, inst: Mir.Inst.Index) InnerError!void { + const ops = Mir.Ops.decode(emit.mir.instructions.items(.ops)[inst]); + const cond = CondType.fromTagAndFlags(tag, ops.flags); + const opc = getCondOpCode(tag, cond); + const encoder = try Encoder.init(emit.code, 4); + encoder.rex(.{ + .w = true, + .b = ops.reg1.isExtended(), + }); + encoder.opcode_2byte(0x0f, opc); + encoder.modRm_direct(0x0, ops.reg1.lowId()); +} + +fn mirTest(emit: *Emit, inst: Mir.Inst.Index) InnerError!void { + const tag = emit.mir.instructions.items(.tag)[inst]; + assert(tag == .@"test"); + const ops = Mir.Ops.decode(emit.mir.instructions.items(.ops)[inst]); + switch (ops.flags) { + 0b00 => blk: { + if (ops.reg2 == .none) { + // TEST r/m64, imm32 + const imm = emit.mir.instructions.items(.data)[inst].imm; + if (ops.reg1.to64() == .rax) { + // TODO reduce the size of the instruction if the immediate + // is smaller than 32 bits + const encoder = try Encoder.init(emit.code, 6); + encoder.rex(.{ + .w = true, + }); + encoder.opcode_1byte(0xa9); + encoder.imm32(imm); + break :blk; + } + const opc: u8 = if (ops.reg1.size() == 8) 0xf6 else 0xf7; + const encoder = try Encoder.init(emit.code, 7); + encoder.rex(.{ + .w = true, + .b = ops.reg1.isExtended(), + }); + encoder.opcode_1byte(opc); + encoder.modRm_direct(0, ops.reg1.lowId()); + encoder.imm8(@intCast(i8, imm)); + break :blk; + } + // TEST r/m64, r64 + return emit.fail("TODO TEST r/m64, r64", .{}); + }, + else => return emit.fail("TODO more TEST alternatives", .{}), + } +} + +fn mirRet(emit: *Emit, inst: Mir.Inst.Index) InnerError!void { + const tag = emit.mir.instructions.items(.tag)[inst]; + assert(tag == .ret); + const ops = Mir.Ops.decode(emit.mir.instructions.items(.ops)[inst]); + const encoder = try Encoder.init(emit.code, 3); + switch (ops.flags) { + 0b00 => { + // RETF imm16 + const imm = emit.mir.instructions.items(.data)[inst].imm; + encoder.opcode_1byte(0xca); + encoder.imm16(@intCast(i16, imm)); + }, + 0b01 => encoder.opcode_1byte(0xcb), // RETF + 0b10 => { + // RET imm16 + const imm = emit.mir.instructions.items(.data)[inst].imm; + encoder.opcode_1byte(0xc2); + encoder.imm16(@intCast(i16, imm)); + }, + 0b11 => encoder.opcode_1byte(0xc3), // RET + } +} + +const EncType = enum { + /// OP r/m64, imm32 + mi, + + /// OP r/m64, r64 + mr, + + /// OP r64, r/m64 + rm, +}; + +const OpCode = struct { + opc: u8, + /// Only used if `EncType == .mi`. + modrm_ext: u3, +}; + +inline fn getArithOpCode(tag: Mir.Inst.Tag, enc: EncType) OpCode { + switch (enc) { + .mi => return switch (tag) { + .adc => .{ .opc = 0x81, .modrm_ext = 0x2 }, + .add => .{ .opc = 0x81, .modrm_ext = 0x0 }, + .sub => .{ .opc = 0x81, .modrm_ext = 0x5 }, + .xor => .{ .opc = 0x81, .modrm_ext = 0x6 }, + .@"and" => .{ .opc = 0x81, .modrm_ext = 0x4 }, + .@"or" => .{ .opc = 0x81, .modrm_ext = 0x1 }, + .sbb => .{ .opc = 0x81, .modrm_ext = 0x3 }, + .cmp => .{ .opc = 0x81, .modrm_ext = 0x7 }, + .mov => .{ .opc = 0xc7, .modrm_ext = 0x0 }, + else => unreachable, + }, + .mr => { + const opc: u8 = switch (tag) { + .adc => 0x11, + .add => 0x01, + .sub => 0x29, + .xor => 0x31, + .@"and" => 0x21, + .@"or" => 0x09, + .sbb => 0x19, + .cmp => 0x39, + .mov => 0x89, + else => unreachable, + }; + return .{ .opc = opc, .modrm_ext = undefined }; + }, + .rm => { + const opc: u8 = switch (tag) { + .adc => 0x13, + .add => 0x03, + .sub => 0x2b, + .xor => 0x33, + .@"and" => 0x23, + .@"or" => 0x0b, + .sbb => 0x1b, + .cmp => 0x3b, + .mov => 0x8b, + else => unreachable, + }; + return .{ .opc = opc, .modrm_ext = undefined }; + }, + } +} + +fn mirArith(emit: *Emit, tag: Mir.Inst.Tag, inst: Mir.Inst.Index) InnerError!void { + const ops = Mir.Ops.decode(emit.mir.instructions.items(.ops)[inst]); + switch (ops.flags) { + 0b00 => blk: { + if (ops.reg2 == .none) { + // OP reg1, imm32 + // OP r/m64, imm32 + const imm = emit.mir.instructions.items(.data)[inst].imm; + const opcode = getArithOpCode(tag, .mi); + const encoder = try Encoder.init(emit.code, 7); + encoder.rex(.{ + .w = ops.reg1.size() == 64, + .b = ops.reg1.isExtended(), + }); + if (tag != .mov and imm <= math.maxInt(i8)) { + encoder.opcode_1byte(opcode.opc + 2); + encoder.modRm_direct(opcode.modrm_ext, ops.reg1.lowId()); + encoder.imm8(@intCast(i8, imm)); + } else { + encoder.opcode_1byte(opcode.opc); + encoder.modRm_direct(opcode.modrm_ext, ops.reg1.lowId()); + encoder.imm32(imm); + } + break :blk; + } + // OP reg1, reg2 + // OP r/m64, r64 + const opcode = getArithOpCode(tag, .mr); + const opc = if (ops.reg1.size() == 8) opcode.opc - 1 else opcode.opc; + const encoder = try Encoder.init(emit.code, 3); + encoder.rex(.{ + .w = ops.reg1.size() == 64 and ops.reg2.size() == 64, + .r = ops.reg1.isExtended(), + .b = ops.reg2.isExtended(), + }); + encoder.opcode_1byte(opc); + encoder.modRm_direct(ops.reg1.lowId(), ops.reg2.lowId()); + }, + 0b01 => blk: { + const imm = emit.mir.instructions.items(.data)[inst].imm; + const opcode = getArithOpCode(tag, .rm); + const opc = if (ops.reg1.size() == 8) opcode.opc - 1 else opcode.opc; + if (ops.reg2 == .none) { + // OP reg1, [imm32] + // OP r64, r/m64 + const encoder = try Encoder.init(emit.code, 8); + encoder.rex(.{ + .w = ops.reg1.size() == 64, + .b = ops.reg1.isExtended(), + }); + encoder.opcode_1byte(opc); + encoder.modRm_SIBDisp0(ops.reg1.lowId()); + encoder.sib_disp32(); + encoder.disp32(imm); + break :blk; + } + // OP reg1, [reg2 + imm32] + // OP r64, r/m64 + const encoder = try Encoder.init(emit.code, 7); + encoder.rex(.{ + .w = ops.reg1.size() == 64, + .r = ops.reg1.isExtended(), + .b = ops.reg2.isExtended(), + }); + encoder.opcode_1byte(opc); + if (imm <= math.maxInt(i8)) { + encoder.modRm_indirectDisp8(ops.reg1.lowId(), ops.reg2.lowId()); + encoder.disp8(@intCast(i8, imm)); + } else { + encoder.modRm_indirectDisp32(ops.reg1.lowId(), ops.reg2.lowId()); + encoder.disp32(imm); + } + }, + 0b10 => blk: { + if (ops.reg2 == .none) { + // OP [reg1 + 0], imm32 + // OP r/m64, imm32 + const imm = emit.mir.instructions.items(.data)[inst].imm; + const opcode = getArithOpCode(tag, .mi); + const opc = if (ops.reg1.size() == 8) opcode.opc - 1 else opcode.opc; + const encoder = try Encoder.init(emit.code, 7); + encoder.rex(.{ + .w = ops.reg1.size() == 64, + .b = ops.reg1.isExtended(), + }); + encoder.opcode_1byte(opc); + encoder.modRm_indirectDisp0(opcode.modrm_ext, ops.reg1.lowId()); + if (imm <= math.maxInt(i8)) { + encoder.imm8(@intCast(i8, imm)); + } else if (imm <= math.maxInt(i16)) { + encoder.imm16(@intCast(i16, imm)); + } else { + encoder.imm32(imm); + } + break :blk; + } + // OP [reg1 + imm32], reg2 + // OP r/m64, r64 + const imm = emit.mir.instructions.items(.data)[inst].imm; + const opcode = getArithOpCode(tag, .mr); + const opc = if (ops.reg1.size() == 8) opcode.opc - 1 else opcode.opc; + const encoder = try Encoder.init(emit.code, 7); + encoder.rex(.{ + .w = ops.reg2.size() == 64, + .r = ops.reg1.isExtended(), + .b = ops.reg2.isExtended(), + }); + encoder.opcode_1byte(opc); + if (imm <= math.maxInt(i8)) { + encoder.modRm_indirectDisp8(ops.reg1.lowId(), ops.reg2.lowId()); + encoder.disp8(@intCast(i8, imm)); + } else { + encoder.modRm_indirectDisp32(ops.reg1.lowId(), ops.reg2.lowId()); + encoder.disp32(imm); + } + }, + 0b11 => blk: { + if (ops.reg2 == .none) { + // OP [reg1 + imm32], imm32 + // OP r/m64, imm32 + const payload = emit.mir.instructions.items(.data)[inst].payload; + const imm_pair = emit.mir.extraData(Mir.ImmPair, payload).data; + const opcode = getArithOpCode(tag, .mi); + const opc = if (ops.reg1.size() == 8) opcode.opc - 1 else opcode.opc; + const encoder = try Encoder.init(emit.code, 11); + encoder.rex(.{ + .w = false, + .b = ops.reg1.isExtended(), + }); + encoder.opcode_1byte(opc); + if (imm_pair.dest_off <= math.maxInt(i8)) { + encoder.modRm_indirectDisp8(opcode.modrm_ext, ops.reg1.lowId()); + encoder.disp8(@intCast(i8, imm_pair.dest_off)); + } else { + encoder.modRm_indirectDisp32(opcode.modrm_ext, ops.reg1.lowId()); + encoder.disp32(imm_pair.dest_off); + } + encoder.imm32(imm_pair.operand); + break :blk; + } + // TODO clearly mov doesn't belong here; for other, arithemtic ops, + // this is the same as 0b00. + const opcode = getArithOpCode(tag, if (tag == .mov) .rm else .mr); + const opc = if (ops.reg1.size() == 8) opcode.opc - 1 else opcode.opc; + const encoder = try Encoder.init(emit.code, 3); + encoder.rex(.{ + .w = ops.reg1.size() == 64 and ops.reg2.size() == 64, + .r = ops.reg1.isExtended(), + .b = ops.reg2.isExtended(), + }); + encoder.opcode_1byte(opc); + encoder.modRm_direct(ops.reg1.lowId(), ops.reg2.lowId()); + }, + } +} + +fn mirArithScaleSrc(emit: *Emit, tag: Mir.Inst.Tag, inst: Mir.Inst.Index) InnerError!void { + const ops = Mir.Ops.decode(emit.mir.instructions.items(.ops)[inst]); + const scale = ops.flags; + // OP reg1, [reg2 + scale*rcx + imm32] + const opcode = getArithOpCode(tag, .rm); + const opc = if (ops.reg1.size() == 8) opcode.opc - 1 else opcode.opc; + const imm = emit.mir.instructions.items(.data)[inst].imm; + const encoder = try Encoder.init(emit.code, 8); + encoder.rex(.{ + .w = ops.reg1.size() == 64, + .r = ops.reg1.isExtended(), + .b = ops.reg2.isExtended(), + }); + encoder.opcode_1byte(opc); + if (imm <= math.maxInt(i8)) { + encoder.modRm_SIBDisp8(ops.reg1.lowId()); + encoder.sib_scaleIndexBaseDisp8(scale, Register.rcx.lowId(), ops.reg2.lowId()); + encoder.disp8(@intCast(i8, imm)); + } else { + encoder.modRm_SIBDisp32(ops.reg1.lowId()); + encoder.sib_scaleIndexBaseDisp32(scale, Register.rcx.lowId(), ops.reg2.lowId()); + encoder.disp32(imm); + } +} + +fn mirArithScaleDst(emit: *Emit, tag: Mir.Inst.Tag, inst: Mir.Inst.Index) InnerError!void { + const ops = Mir.Ops.decode(emit.mir.instructions.items(.ops)[inst]); + const scale = ops.flags; + const imm = emit.mir.instructions.items(.data)[inst].imm; + + if (ops.reg2 == .none) { + // OP [reg1 + scale*rax + 0], imm32 + const opcode = getArithOpCode(tag, .mi); + const opc = if (ops.reg1.size() == 8) opcode.opc - 1 else opcode.opc; + const encoder = try Encoder.init(emit.code, 8); + encoder.rex(.{ + .w = ops.reg1.size() == 64, + .b = ops.reg1.isExtended(), + }); + encoder.opcode_1byte(opc); + encoder.modRm_SIBDisp0(opcode.modrm_ext); + encoder.sib_scaleIndexBase(scale, Register.rax.lowId(), ops.reg1.lowId()); + if (imm <= math.maxInt(i8)) { + encoder.imm8(@intCast(i8, imm)); + } else if (imm <= math.maxInt(i16)) { + encoder.imm16(@intCast(i16, imm)); + } else { + encoder.imm32(imm); + } + return; + } + + // OP [reg1 + scale*rax + imm32], reg2 + const opcode = getArithOpCode(tag, .mr); + const opc = if (ops.reg1.size() == 8) opcode.opc - 1 else opcode.opc; + const encoder = try Encoder.init(emit.code, 8); + encoder.rex(.{ + .w = ops.reg1.size() == 64, + .r = ops.reg2.isExtended(), + .b = ops.reg1.isExtended(), + }); + encoder.opcode_1byte(opc); + if (imm <= math.maxInt(i8)) { + encoder.modRm_SIBDisp8(ops.reg2.lowId()); + encoder.sib_scaleIndexBaseDisp8(scale, Register.rax.lowId(), ops.reg1.lowId()); + encoder.disp8(@intCast(i8, imm)); + } else { + encoder.modRm_SIBDisp32(ops.reg2.lowId()); + encoder.sib_scaleIndexBaseDisp32(scale, Register.rax.lowId(), ops.reg1.lowId()); + encoder.disp32(imm); + } +} + +fn mirArithScaleImm(emit: *Emit, tag: Mir.Inst.Tag, inst: Mir.Inst.Index) InnerError!void { + const ops = Mir.Ops.decode(emit.mir.instructions.items(.ops)[inst]); + const scale = ops.flags; + const payload = emit.mir.instructions.items(.data)[inst].payload; + const imm_pair = emit.mir.extraData(Mir.ImmPair, payload).data; + const opcode = getArithOpCode(tag, .mi); + const opc = if (ops.reg1.size() == 8) opcode.opc - 1 else opcode.opc; + const encoder = try Encoder.init(emit.code, 2); + encoder.rex(.{ + .w = ops.reg1.size() == 64, + .b = ops.reg1.isExtended(), + }); + encoder.opcode_1byte(opc); + if (imm_pair.dest_off <= math.maxInt(i8)) { + encoder.modRm_SIBDisp8(opcode.modrm_ext); + encoder.sib_scaleIndexBaseDisp8(scale, Register.rax.lowId(), ops.reg1.lowId()); + encoder.disp8(@intCast(i8, imm_pair.dest_off)); + } else { + encoder.modRm_SIBDisp32(opcode.modrm_ext); + encoder.sib_scaleIndexBaseDisp32(scale, Register.rax.lowId(), ops.reg1.lowId()); + encoder.disp32(imm_pair.dest_off); + } + encoder.imm32(imm_pair.operand); +} + +fn mirMovabs(emit: *Emit, inst: Mir.Inst.Index) InnerError!void { + const tag = emit.mir.instructions.items(.tag)[inst]; + assert(tag == .movabs); + const ops = Mir.Ops.decode(emit.mir.instructions.items(.ops)[inst]); + + const encoder = try Encoder.init(emit.code, 10); + const is_64 = blk: { + if (ops.flags == 0b00) { + // movabs reg, imm64 + const opc: u8 = if (ops.reg1.size() == 8) 0xb0 else 0xb8; + if (ops.reg1.size() == 64) { + encoder.rex(.{ + .w = true, + .b = ops.reg1.isExtended(), + }); + encoder.opcode_withReg(opc, ops.reg1.lowId()); + break :blk true; + } + break :blk false; + } + if (ops.reg1 == .none) { + // movabs moffs64, rax + const opc: u8 = if (ops.reg2.size() == 8) 0xa2 else 0xa3; + encoder.rex(.{ + .w = ops.reg2.size() == 64, + }); + encoder.opcode_1byte(opc); + break :blk ops.reg2.size() == 64; + } else { + // movabs rax, moffs64 + const opc: u8 = if (ops.reg2.size() == 8) 0xa0 else 0xa1; + encoder.rex(.{ + .w = ops.reg1.size() == 64, + }); + encoder.opcode_1byte(opc); + break :blk ops.reg1.size() == 64; + } + }; + + if (is_64) { + const payload = emit.mir.instructions.items(.data)[inst].payload; + const imm64 = emit.mir.extraData(Mir.Imm64, payload).data; + encoder.imm64(imm64.decode()); + } else { + const imm = emit.mir.instructions.items(.data)[inst].imm; + if (imm <= math.maxInt(i8)) { + encoder.imm8(@intCast(i8, imm)); + } else if (imm <= math.maxInt(i16)) { + encoder.imm16(@intCast(i16, imm)); + } else { + encoder.imm32(imm); + } + } +} + +fn mirIMulComplex(emit: *Emit, inst: Mir.Inst.Index) InnerError!void { + const tag = emit.mir.instructions.items(.tag)[inst]; + assert(tag == .imul_complex); + const ops = Mir.Ops.decode(emit.mir.instructions.items(.ops)[inst]); + switch (ops.flags) { + 0b00 => { + const encoder = try Encoder.init(emit.code, 4); + encoder.rex(.{ + .w = ops.reg1.size() == 64, + .r = ops.reg1.isExtended(), + .b = ops.reg2.isExtended(), + }); + encoder.opcode_2byte(0x0f, 0xaf); + encoder.modRm_direct(ops.reg1.lowId(), ops.reg2.lowId()); + }, + 0b10 => { + const imm = emit.mir.instructions.items(.data)[inst].imm; + const opc: u8 = if (imm <= math.maxInt(i8)) 0x6b else 0x69; + const encoder = try Encoder.init(emit.code, 7); + encoder.rex(.{ + .w = ops.reg1.size() == 64, + .r = ops.reg1.isExtended(), + .b = ops.reg1.isExtended(), + }); + encoder.opcode_1byte(opc); + encoder.modRm_direct(ops.reg1.lowId(), ops.reg2.lowId()); + if (imm <= math.maxInt(i8)) { + encoder.imm8(@intCast(i8, imm)); + } else if (imm <= math.maxInt(i16)) { + encoder.imm16(@intCast(i16, imm)); + } else { + encoder.imm32(imm); + } + }, + else => return emit.fail("TODO implement imul", .{}), + } +} + +fn mirLea(emit: *Emit, inst: Mir.Inst.Index) InnerError!void { + const tag = emit.mir.instructions.items(.tag)[inst]; + assert(tag == .lea); + const ops = Mir.Ops.decode(emit.mir.instructions.items(.ops)[inst]); + assert(ops.flags == 0b01); + const imm = emit.mir.instructions.items(.data)[inst].imm; + + if (imm == 0) { + const encoder = try Encoder.init(emit.code, 3); + encoder.rex(.{ + .w = ops.reg1.size() == 64, + .r = ops.reg1.isExtended(), + .b = ops.reg2.isExtended(), + }); + encoder.opcode_1byte(0x8d); + encoder.modRm_indirectDisp0(ops.reg1.lowId(), ops.reg2.lowId()); + } else if (imm <= math.maxInt(i8)) { + const encoder = try Encoder.init(emit.code, 4); + encoder.rex(.{ + .w = ops.reg1.size() == 64, + .r = ops.reg1.isExtended(), + .b = ops.reg2.isExtended(), + }); + encoder.opcode_1byte(0x8d); + encoder.modRm_indirectDisp8(ops.reg1.lowId(), ops.reg2.lowId()); + encoder.disp8(@intCast(i8, imm)); + } else { + const encoder = try Encoder.init(emit.code, 7); + encoder.rex(.{ + .w = ops.reg1.size() == 64, + .r = ops.reg1.isExtended(), + .b = ops.reg2.isExtended(), + }); + encoder.opcode_1byte(0x8d); + encoder.modRm_indirectDisp32(ops.reg1.lowId(), ops.reg2.lowId()); + encoder.disp32(imm); + } +} + +fn mirLeaRip(emit: *Emit, inst: Mir.Inst.Index) InnerError!void { + const tag = emit.mir.instructions.items(.tag)[inst]; + assert(tag == .lea_rip); + const ops = Mir.Ops.decode(emit.mir.instructions.items(.ops)[inst]); + const start_offset = emit.code.items.len; + const encoder = try Encoder.init(emit.code, 7); + encoder.rex(.{ + .w = ops.reg1.size() == 64, + .r = ops.reg1.isExtended(), + }); + encoder.opcode_1byte(0x8d); + encoder.modRm_RIPDisp32(ops.reg1.lowId()); + const end_offset = emit.code.items.len; + if (@truncate(u1, ops.flags) == 0b0) { + const payload = emit.mir.instructions.items(.data)[inst].payload; + const imm = emit.mir.extraData(Mir.Imm64, payload).data.decode(); + encoder.disp32(@intCast(i32, @intCast(i64, imm) - @intCast(i64, end_offset - start_offset + 4))); + } else { + const got_entry = emit.mir.instructions.items(.data)[inst].got_entry; + encoder.disp32(0); + if (emit.bin_file.cast(link.File.MachO)) |macho_file| { + // TODO I think the reloc might be in the wrong place. + const decl = macho_file.active_decl.?; + try decl.link.macho.relocs.append(emit.bin_file.allocator, .{ + .offset = @intCast(u32, end_offset), + .target = .{ .local = got_entry }, + .addend = 0, + .subtractor = null, + .pcrel = true, + .length = 2, + .@"type" = @enumToInt(std.macho.reloc_type_x86_64.X86_64_RELOC_GOT), + }); + } else { + return emit.fail("TODO implement lea_rip for linking backends different than MachO", .{}); + } + } +} + +fn mirCallExtern(emit: *Emit, inst: Mir.Inst.Index) InnerError!void { + const tag = emit.mir.instructions.items(.tag)[inst]; + assert(tag == .call_extern); + const n_strx = emit.mir.instructions.items(.data)[inst].extern_fn; + const offset = blk: { + const offset = @intCast(u32, emit.code.items.len + 1); + // callq + const encoder = try Encoder.init(emit.code, 5); + encoder.opcode_1byte(0xe8); + encoder.imm32(0x0); + break :blk offset; + }; + if (emit.bin_file.cast(link.File.MachO)) |macho_file| { + // Add relocation to the decl. + try macho_file.active_decl.?.link.macho.relocs.append(emit.bin_file.allocator, .{ + .offset = offset, + .target = .{ .global = n_strx }, + .addend = 0, + .subtractor = null, + .pcrel = true, + .length = 2, + .@"type" = @enumToInt(std.macho.reloc_type_x86_64.X86_64_RELOC_BRANCH), + }); + } else { + return emit.fail("TODO implement call_extern for linking backends different than MachO", .{}); + } +} + +fn mirDbgLine(emit: *Emit, inst: Mir.Inst.Index) InnerError!void { + const tag = emit.mir.instructions.items(.tag)[inst]; + assert(tag == .dbg_line); + const payload = emit.mir.instructions.items(.data)[inst].payload; + const dbg_line_column = emit.mir.extraData(Mir.DbgLineColumn, payload).data; + try emit.dbgAdvancePCAndLine(dbg_line_column.line, dbg_line_column.column); +} + +fn dbgAdvancePCAndLine(emit: *Emit, line: u32, column: u32) InnerError!void { + const delta_line = @intCast(i32, line) - @intCast(i32, emit.prev_di_line); + const delta_pc: usize = emit.code.items.len - emit.prev_di_pc; + switch (emit.debug_output) { + .dwarf => |dbg_out| { + // TODO Look into using the DWARF special opcodes to compress this data. + // It lets you emit single-byte opcodes that add different numbers to + // both the PC and the line number at the same time. + try dbg_out.dbg_line.ensureUnusedCapacity(11); + dbg_out.dbg_line.appendAssumeCapacity(DW.LNS.advance_pc); + leb128.writeULEB128(dbg_out.dbg_line.writer(), delta_pc) catch unreachable; + if (delta_line != 0) { + dbg_out.dbg_line.appendAssumeCapacity(DW.LNS.advance_line); + leb128.writeILEB128(dbg_out.dbg_line.writer(), delta_line) catch unreachable; + } + dbg_out.dbg_line.appendAssumeCapacity(DW.LNS.copy); + emit.prev_di_pc = emit.code.items.len; + emit.prev_di_line = line; + emit.prev_di_column = column; + emit.prev_di_pc = emit.code.items.len; + }, + .plan9 => |dbg_out| { + if (delta_pc <= 0) return; // only do this when the pc changes + // we have already checked the target in the linker to make sure it is compatable + const quant = @import("../../link/Plan9/aout.zig").getPCQuant(emit.target.cpu.arch) catch unreachable; + + // increasing the line number + try @import("../../link/Plan9.zig").changeLine(dbg_out.dbg_line, delta_line); + // increasing the pc + const d_pc_p9 = @intCast(i64, delta_pc) - quant; + if (d_pc_p9 > 0) { + // minus one because if its the last one, we want to leave space to change the line which is one quanta + try dbg_out.dbg_line.append(@intCast(u8, @divExact(d_pc_p9, quant) + 128) - quant); + if (dbg_out.pcop_change_index.*) |pci| + dbg_out.dbg_line.items[pci] += 1; + dbg_out.pcop_change_index.* = @intCast(u32, dbg_out.dbg_line.items.len - 1); + } else if (d_pc_p9 == 0) { + // we don't need to do anything, because adding the quant does it for us + } else unreachable; + if (dbg_out.start_line.* == null) + dbg_out.start_line.* = emit.prev_di_line; + dbg_out.end_line.* = line; + // only do this if the pc changed + emit.prev_di_line = line; + emit.prev_di_column = column; + emit.prev_di_pc = emit.code.items.len; + }, + .none => {}, + } +} + +fn mirDbgPrologueEnd(emit: *Emit, inst: Mir.Inst.Index) InnerError!void { + const tag = emit.mir.instructions.items(.tag)[inst]; + assert(tag == .dbg_prologue_end); + switch (emit.debug_output) { + .dwarf => |dbg_out| { + try dbg_out.dbg_line.append(DW.LNS.set_prologue_end); + try emit.dbgAdvancePCAndLine(emit.prev_di_line, emit.prev_di_column); + }, + .plan9 => {}, + .none => {}, + } +} + +fn mirDbgEpilogueBegin(emit: *Emit, inst: Mir.Inst.Index) InnerError!void { + const tag = emit.mir.instructions.items(.tag)[inst]; + assert(tag == .dbg_epilogue_begin); + switch (emit.debug_output) { + .dwarf => |dbg_out| { + try dbg_out.dbg_line.append(DW.LNS.set_epilogue_begin); + try emit.dbgAdvancePCAndLine(emit.prev_di_line, emit.prev_di_column); + }, + .plan9 => {}, + .none => {}, + } +} + +fn mirArgDbgInfo(emit: *Emit, inst: Mir.Inst.Index) InnerError!void { + const tag = emit.mir.instructions.items(.tag)[inst]; + assert(tag == .arg_dbg_info); + const payload = emit.mir.instructions.items(.data)[inst].payload; + const arg_dbg_info = emit.mir.extraData(Mir.ArgDbgInfo, payload).data; + const mcv = emit.mir.function.args[arg_dbg_info.arg_index]; + try emit.genArgDbgInfo(arg_dbg_info.air_inst, mcv); +} + +fn genArgDbgInfo(emit: *Emit, inst: Air.Inst.Index, mcv: MCValue) !void { + const ty_str = emit.mir.function.air.instructions.items(.data)[inst].ty_str; + const zir = &emit.mir.function.mod_fn.owner_decl.getFileScope().zir; + const name = zir.nullTerminatedString(ty_str.str); + const name_with_null = name.ptr[0 .. name.len + 1]; + const ty = emit.mir.function.air.getRefType(ty_str.ty); + + switch (mcv) { + .register => |reg| { + switch (emit.debug_output) { + .dwarf => |dbg_out| { + try dbg_out.dbg_info.ensureUnusedCapacity(3); + dbg_out.dbg_info.appendAssumeCapacity(link.File.Elf.abbrev_parameter); + dbg_out.dbg_info.appendSliceAssumeCapacity(&[2]u8{ // DW.AT.location, DW.FORM.exprloc + 1, // ULEB128 dwarf expression length + reg.dwarfLocOp(), + }); + try dbg_out.dbg_info.ensureUnusedCapacity(5 + name_with_null.len); + try emit.addDbgInfoTypeReloc(ty); // DW.AT.type, DW.FORM.ref4 + dbg_out.dbg_info.appendSliceAssumeCapacity(name_with_null); // DW.AT.name, DW.FORM.string + }, + .plan9 => {}, + .none => {}, + } + }, + .stack_offset => { + switch (emit.debug_output) { + .dwarf => {}, + .plan9 => {}, + .none => {}, + } + }, + else => {}, + } +} + +/// Adds a Type to the .debug_info at the current position. The bytes will be populated later, +/// after codegen for this symbol is done. +fn addDbgInfoTypeReloc(emit: *Emit, ty: Type) !void { + switch (emit.debug_output) { + .dwarf => |dbg_out| { + assert(ty.hasCodeGenBits()); + const index = dbg_out.dbg_info.items.len; + try dbg_out.dbg_info.resize(index + 4); // DW.AT.type, DW.FORM.ref4 + + const gop = try dbg_out.dbg_info_type_relocs.getOrPut(emit.bin_file.allocator, ty); + if (!gop.found_existing) { + gop.value_ptr.* = .{ + .off = undefined, + .relocs = .{}, + }; + } + try gop.value_ptr.relocs.append(emit.bin_file.allocator, @intCast(u32, index)); + }, + .plan9 => {}, + .none => {}, + } +} diff --git a/src/arch/x86_64/Mir.zig b/src/arch/x86_64/Mir.zig new file mode 100644 index 0000000000..0d83bfed7c --- /dev/null +++ b/src/arch/x86_64/Mir.zig @@ -0,0 +1,379 @@ +//! Machine Intermediate Representation. +//! This data is produced by x86_64 Codegen and consumed by x86_64 Isel. +//! These instructions have a 1:1 correspondence with machine code instructions +//! for the target. MIR can be lowered to source-annotated textual assembly code +//! instructions, or it can be lowered to machine code. +//! The main purpose of MIR is to postpone the assignment of offsets until Isel, +//! so that, for example, the smaller encodings of jump instructions can be used. + +const Mir = @This(); +const std = @import("std"); +const builtin = @import("builtin"); +const assert = std.debug.assert; + +const bits = @import("bits.zig"); +const Air = @import("../../Air.zig"); +const CodeGen = @import("CodeGen.zig"); +const Register = bits.Register; + +function: *const CodeGen, +instructions: std.MultiArrayList(Inst).Slice, +/// The meaning of this data is determined by `Inst.Tag` value. +extra: []const u32, + +pub const Inst = struct { + tag: Tag, + /// This is 3 fields, and the meaning of each depends on `tag`. + /// reg1: Register + /// reg2: Register + /// flags: u2 + ops: u16, + /// The meaning of this depends on `tag` and `ops`. + data: Data, + + pub const Tag = enum(u16) { + /// ops flags: form: + /// 0b00 reg1, reg2 + /// 0b00 reg1, imm32 + /// 0b01 reg1, [reg2 + imm32] + /// 0b01 reg1, [ds:imm32] + /// 0b10 [reg1 + imm32], reg2 + /// 0b10 [reg1 + 0], imm32 + /// 0b11 [reg1 + imm32], imm32 + /// Notes: + /// * If reg2 is `none` then it means Data field `imm` is used as the immediate. + /// * When two imm32 values are required, Data field `payload` points at `ImmPair`. + adc, + + /// form: reg1, [reg2 + scale*rcx + imm32] + /// ops flags scale + /// 0b00 1 + /// 0b01 2 + /// 0b10 4 + /// 0b11 8 + adc_scale_src, + + /// form: [reg1 + scale*rax + imm32], reg2 + /// form: [reg1 + scale*rax + 0], imm32 + /// ops flags scale + /// 0b00 1 + /// 0b01 2 + /// 0b10 4 + /// 0b11 8 + /// Notes: + /// * If reg2 is `none` then it means Data field `imm` is used as the immediate. + adc_scale_dst, + + /// form: [reg1 + scale*rax + imm32], imm32 + /// ops flags scale + /// 0b00 1 + /// 0b01 2 + /// 0b10 4 + /// 0b11 8 + /// Notes: + /// * Data field `payload` points at `ImmPair`. + adc_scale_imm, + + // The following instructions all have the same encoding as `adc`. + + add, + add_scale_src, + add_scale_dst, + add_scale_imm, + sub, + sub_scale_src, + sub_scale_dst, + sub_scale_imm, + xor, + xor_scale_src, + xor_scale_dst, + xor_scale_imm, + @"and", + and_scale_src, + and_scale_dst, + and_scale_imm, + @"or", + or_scale_src, + or_scale_dst, + or_scale_imm, + rol, + rol_scale_src, + rol_scale_dst, + rol_scale_imm, + ror, + ror_scale_src, + ror_scale_dst, + ror_scale_imm, + rcl, + rcl_scale_src, + rcl_scale_dst, + rcl_scale_imm, + rcr, + rcr_scale_src, + rcr_scale_dst, + rcr_scale_imm, + shl, + shl_scale_src, + shl_scale_dst, + shl_scale_imm, + sal, + sal_scale_src, + sal_scale_dst, + sal_scale_imm, + shr, + shr_scale_src, + shr_scale_dst, + shr_scale_imm, + sar, + sar_scale_src, + sar_scale_dst, + sar_scale_imm, + sbb, + sbb_scale_src, + sbb_scale_dst, + sbb_scale_imm, + cmp, + cmp_scale_src, + cmp_scale_dst, + cmp_scale_imm, + mov, + mov_scale_src, + mov_scale_dst, + mov_scale_imm, + lea, + lea_scale_src, + lea_scale_dst, + lea_scale_imm, + + /// ops flags: form: + /// 0bX0 reg1 + /// 0bX1 [reg1 + imm32] + imul, + idiv, + + /// ops flags: form: + /// 0b00 reg1, reg2 + /// 0b01 reg1, [reg2 + imm32] + /// 0b01 reg1, [imm32] if reg2 is none + /// 0b10 reg1, reg2, imm32 + /// 0b11 reg1, [reg2 + imm32], imm32 + imul_complex, + + /// ops flags: form: + /// 0bX0 reg1, [rip + imm32] + /// 0bX1 reg1, [rip + reloc] + /// Notes: + /// * if flags are 0bX1, `Data` contains `got_entry` for linker to generate + /// valid relocation for. + /// TODO handle more cases + lea_rip, + + /// ops flags: form: + /// 0bX0 reg1, imm64 + /// 0bX1 rax, moffs64 + /// Notes: + /// * If reg1 is 64-bit, the immediate is 64-bit and stored + /// within extra data `Imm64`. + /// * For 0bX1, reg1 (or reg2) need to be + /// a version of rax. If reg1 == .none, then reg2 == .rax, + /// or vice versa. + /// TODO handle scaling + movabs, + + /// ops flags: 0bX0: + /// - Uses the `inst` Data tag as the jump target. + /// - reg1 and reg2 are ignored. + /// ops flags: 0bX1: + /// - reg1 is the jump target, reg2 and data are ignored. + /// - if reg1 is none, [imm] + jmp, + call, + + /// ops flags: + /// 0b00 gte + /// 0b01 gt + /// 0b10 lt + /// 0b11 lte + cond_jmp_greater_less, + cond_set_byte_greater_less, + + /// ops flags: + /// 0b00 above or equal + /// 0b01 above + /// 0b10 below + /// 0b11 below or equal + cond_jmp_above_below, + cond_set_byte_above_below, + + /// ops flags: + /// 0bX0 ne + /// 0bX1 eq + cond_jmp_eq_ne, + cond_set_byte_eq_ne, + + /// ops flags: form: + /// 0b00 reg1 + /// 0b01 [reg1 + imm32] + /// 0b10 imm32 + /// Notes: + /// * If 0b10 is specified and the tag is push, pushes immediate onto the stack + /// using the mnemonic PUSH imm32. + push, + pop, + + /// ops flags: form: + /// 0b00 retf imm16 + /// 0b01 retf + /// 0b10 retn imm16 + /// 0b11 retn + ret, + + /// Fast system call + syscall, + + /// ops flags: form: + /// 0b00 reg1, reg2 + /// 0b00 reg1, imm32 + /// 0b01 reg1, [reg2 + imm32] + /// 0b01 reg1, [ds:imm32] + /// 0b10 [reg1 + imm32], reg2 + /// 0b10 [reg1 + 0], imm32 + /// 0b11 [reg1 + imm32], imm32 + /// Notes: + /// * If reg2 is `none` then it means Data field `imm` is used as the immediate. + /// * When two imm32 values are required, Data field `payload` points at `ImmPair`. + @"test", + + /// Breakpoint + brk, + + /// Pseudo-instructions + /// call extern function + /// Notes: + /// * target of the call is stored as `extern_fn` in `Data` union. + call_extern, + + /// end of prologue + dbg_prologue_end, + + /// start of epilogue + dbg_epilogue_begin, + + /// update debug line + dbg_line, + + /// arg debug info + arg_dbg_info, + }; + + /// The position of an MIR instruction within the `Mir` instructions array. + pub const Index = u32; + + /// All instructions have a 4-byte payload, which is contained within + /// this union. `Tag` determines which union field is active, as well as + /// how to interpret the data within. + pub const Data = union { + /// Another instruction. + inst: Index, + /// A 32-bit immediate value. + imm: i32, + /// An extern function. + /// Index into the linker's string table. + extern_fn: u32, + /// Entry in the GOT table by index. + got_entry: u32, + /// Index into `extra`. Meaning of what can be found there is context-dependent. + payload: u32, + }; + + // Make sure we don't accidentally make instructions bigger than expected. + // Note that in Debug builds, Zig is allowed to insert a secret field for safety checks. + comptime { + if (builtin.mode != .Debug) { + assert(@sizeOf(Inst) == 8); + } + } +}; + +pub const ImmPair = struct { + dest_off: i32, + operand: i32, +}; + +pub const Imm64 = struct { + msb: u32, + lsb: u32, + + pub fn encode(v: u64) Imm64 { + return .{ + .msb = @truncate(u32, v >> 32), + .lsb = @truncate(u32, v), + }; + } + + pub fn decode(imm: Imm64) u64 { + var res: u64 = 0; + res |= (@intCast(u64, imm.msb) << 32); + res |= @intCast(u64, imm.lsb); + return res; + } +}; + +pub const DbgLineColumn = struct { + line: u32, + column: u32, +}; + +pub const ArgDbgInfo = struct { + air_inst: Air.Inst.Index, + arg_index: u32, +}; + +pub fn deinit(mir: *Mir, gpa: *std.mem.Allocator) void { + mir.instructions.deinit(gpa); + gpa.free(mir.extra); + mir.* = undefined; +} + +pub const Ops = struct { + reg1: Register = .none, + reg2: Register = .none, + flags: u2 = 0b00, + + pub fn encode(self: Ops) u16 { + var ops: u16 = 0; + ops |= @intCast(u16, @enumToInt(self.reg1)) << 9; + ops |= @intCast(u16, @enumToInt(self.reg2)) << 2; + ops |= self.flags; + return ops; + } + + pub fn decode(ops: u16) Ops { + const reg1 = @intToEnum(Register, @truncate(u7, ops >> 9)); + const reg2 = @intToEnum(Register, @truncate(u7, ops >> 2)); + const flags = @truncate(u2, ops); + return .{ + .reg1 = reg1, + .reg2 = reg2, + .flags = flags, + }; + } +}; + +pub fn extraData(mir: Mir, comptime T: type, index: usize) struct { data: T, end: usize } { + const fields = std.meta.fields(T); + var i: usize = index; + var result: T = undefined; + inline for (fields) |field| { + @field(result, field.name) = switch (field.field_type) { + u32 => mir.extra[i], + i32 => @bitCast(i32, mir.extra[i]), + else => @compileError("bad field type"), + }; + i += 1; + } + return .{ + .data = result, + .end = i, + }; +} diff --git a/src/arch/x86_64/bits.zig b/src/arch/x86_64/bits.zig index 72a7468041..df221595f3 100644 --- a/src/arch/x86_64/bits.zig +++ b/src/arch/x86_64/bits.zig @@ -22,7 +22,7 @@ const DW = std.dwarf; /// /// The ID can be easily determined by figuring out what range the register is /// in, and then subtracting the base. -pub const Register = enum(u8) { +pub const Register = enum(u7) { // 0 through 15, 64-bit registers. 8-15 are extended. // id is just the int value. rax, rcx, rdx, rbx, rsp, rbp, rsi, rdi, @@ -43,6 +43,10 @@ pub const Register = enum(u8) { al, cl, dl, bl, ah, ch, dh, bh, r8b, r9b, r10b, r11b, r12b, r13b, r14b, r15b, + // Pseudo, used only for MIR to signify that the + // operand is not a register but an immediate, etc. + none, + /// Returns the bit-width of the register. pub fn size(self: Register) u7 { return switch (@enumToInt(self)) { @@ -73,7 +77,7 @@ pub const Register = enum(u8) { } /// Like id, but only returns the lower 3 bits. - pub fn low_id(self: Register) u3 { + pub fn lowId(self: Register) u3 { return @truncate(u3, @enumToInt(self)); } @@ -577,8 +581,8 @@ test "x86_64 Encoder helpers" { }); encoder.opcode_2byte(0x0f, 0xaf); encoder.modRm_direct( - Register.eax.low_id(), - Register.edi.low_id(), + Register.eax.lowId(), + Register.edi.lowId(), ); try testing.expectEqualSlices(u8, &[_]u8{ 0x0f, 0xaf, 0xc7 }, code.items); @@ -597,8 +601,8 @@ test "x86_64 Encoder helpers" { }); encoder.opcode_1byte(0x89); encoder.modRm_direct( - Register.edi.low_id(), - Register.eax.low_id(), + Register.edi.lowId(), + Register.eax.lowId(), ); try testing.expectEqualSlices(u8, &[_]u8{ 0x89, 0xf8 }, code.items); @@ -624,7 +628,7 @@ test "x86_64 Encoder helpers" { encoder.opcode_1byte(0x81); encoder.modRm_direct( 0, - Register.rcx.low_id(), + Register.rcx.lowId(), ); encoder.imm32(2147483647); diff --git a/src/codegen.zig b/src/codegen.zig index c16c3aca7a..0f13b43c36 100644 --- a/src/codegen.zig +++ b/src/codegen.zig @@ -117,7 +117,7 @@ pub fn generateFunction( //.thumb => return Function(.thumb).generate(bin_file, src_loc, func, air, liveness, code, debug_output), //.thumbeb => return Function(.thumbeb).generate(bin_file, src_loc, func, air, liveness, code, debug_output), //.i386 => return Function(.i386).generate(bin_file, src_loc, func, air, liveness, code, debug_output), - .x86_64 => return @import("arch/x86_64/CodeGen.zig").generate(.x86_64, bin_file, src_loc, func, air, liveness, code, debug_output), + .x86_64 => return @import("arch/x86_64/CodeGen.zig").generate(bin_file, src_loc, func, air, liveness, code, debug_output), //.xcore => return Function(.xcore).generate(bin_file, src_loc, func, air, liveness, code, debug_output), //.nvptx => return Function(.nvptx).generate(bin_file, src_loc, func, air, liveness, code, debug_output), //.nvptx64 => return Function(.nvptx64).generate(bin_file, src_loc, func, air, liveness, code, debug_output),