From ef9aeb6ac415348e16f04913839002929064c91e Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Fri, 17 Jul 2020 09:33:56 -0700 Subject: [PATCH 1/4] stage2: codegen: refactor to always have comptime arch --- src-self-hosted/codegen.zig | 2220 ++++++++++++++-------------- src-self-hosted/codegen/x86_64.zig | 5 +- 2 files changed, 1116 insertions(+), 1109 deletions(-) diff --git a/src-self-hosted/codegen.zig b/src-self-hosted/codegen.zig index e78ee28b5d..c259eb2595 100644 --- a/src-self-hosted/codegen.zig +++ b/src-self-hosted/codegen.zig @@ -11,6 +11,8 @@ const ErrorMsg = Module.ErrorMsg; const Target = std.Target; const Allocator = mem.Allocator; const trace = @import("tracy.zig").trace; +const x86_64 = @import("codegen/x86_64.zig"); +const x86 = @import("codegen/x86.zig"); /// The codegen-related data that is stored in `ir.Inst.Block` instructions. pub const BlockData = struct { @@ -32,67 +34,75 @@ pub const Result = union(enum) { fail: *Module.ErrorMsg, }; +pub const GenerateSymbolError = error{ + OutOfMemory, + /// A Decl that this symbol depends on had a semantic analysis failure. + AnalysisFail, +}; + pub fn generateSymbol( bin_file: *link.File.Elf, src: usize, typed_value: TypedValue, code: *std.ArrayList(u8), -) error{ - OutOfMemory, - /// A Decl that this symbol depends on had a semantic analysis failure. - AnalysisFail, -}!Result { +) GenerateSymbolError!Result { const tracy = trace(@src()); defer tracy.end(); switch (typed_value.ty.zigTypeTag()) { .Fn => { - const module_fn = typed_value.val.cast(Value.Payload.Function).?.func; - - const fn_type = module_fn.owner_decl.typed_value.most_recent.typed_value.ty; - const param_types = try bin_file.allocator.alloc(Type, fn_type.fnParamLen()); - defer bin_file.allocator.free(param_types); - fn_type.fnParamTypes(param_types); - var mc_args = try bin_file.allocator.alloc(MCValue, param_types.len); - defer bin_file.allocator.free(mc_args); - - var branch_stack = std.ArrayList(Function.Branch).init(bin_file.allocator); - defer { - assert(branch_stack.items.len == 1); - branch_stack.items[0].deinit(bin_file.allocator); - branch_stack.deinit(); - } - const branch = try branch_stack.addOne(); - branch.* = .{}; - - var function = Function{ - .gpa = bin_file.allocator, - .target = &bin_file.options.target, - .bin_file = bin_file, - .mod_fn = module_fn, - .code = code, - .err_msg = null, - .args = mc_args, - .arg_index = 0, - .branch_stack = &branch_stack, - .src = src, - }; - - const cc = fn_type.fnCallingConvention(); - branch.max_end_stack = function.resolveParameters(src, cc, param_types, mc_args) catch |err| switch (err) { - error.CodegenFail => return Result{ .fail = function.err_msg.? }, - else => |e| return e, - }; - - function.gen() catch |err| switch (err) { - error.CodegenFail => return Result{ .fail = function.err_msg.? }, - else => |e| return e, - }; - - if (function.err_msg) |em| { - return Result{ .fail = em }; - } else { - return Result{ .appended = {} }; + switch (bin_file.options.target.cpu.arch) { + .arm => return Function(.arm).generateSymbol(bin_file, src, typed_value, code), + .armeb => return Function(.armeb).generateSymbol(bin_file, src, typed_value, code), + .aarch64 => return Function(.aarch64).generateSymbol(bin_file, src, typed_value, code), + .aarch64_be => return Function(.aarch64_be).generateSymbol(bin_file, src, typed_value, code), + .aarch64_32 => return Function(.aarch64_32).generateSymbol(bin_file, src, typed_value, code), + .arc => return Function(.arc).generateSymbol(bin_file, src, typed_value, code), + .avr => return Function(.avr).generateSymbol(bin_file, src, typed_value, code), + .bpfel => return Function(.bpfel).generateSymbol(bin_file, src, typed_value, code), + .bpfeb => return Function(.bpfeb).generateSymbol(bin_file, src, typed_value, code), + .hexagon => return Function(.hexagon).generateSymbol(bin_file, src, typed_value, code), + .mips => return Function(.mips).generateSymbol(bin_file, src, typed_value, code), + .mipsel => return Function(.mipsel).generateSymbol(bin_file, src, typed_value, code), + .mips64 => return Function(.mips64).generateSymbol(bin_file, src, typed_value, code), + .mips64el => return Function(.mips64el).generateSymbol(bin_file, src, typed_value, code), + .msp430 => return Function(.msp430).generateSymbol(bin_file, src, typed_value, code), + .powerpc => return Function(.powerpc).generateSymbol(bin_file, src, typed_value, code), + .powerpc64 => return Function(.powerpc64).generateSymbol(bin_file, src, typed_value, code), + .powerpc64le => return Function(.powerpc64le).generateSymbol(bin_file, src, typed_value, code), + .r600 => return Function(.r600).generateSymbol(bin_file, src, typed_value, code), + .amdgcn => return Function(.amdgcn).generateSymbol(bin_file, src, typed_value, code), + .riscv32 => return Function(.riscv32).generateSymbol(bin_file, src, typed_value, code), + .riscv64 => return Function(.riscv64).generateSymbol(bin_file, src, typed_value, code), + .sparc => return Function(.sparc).generateSymbol(bin_file, src, typed_value, code), + .sparcv9 => return Function(.sparcv9).generateSymbol(bin_file, src, typed_value, code), + .sparcel => return Function(.sparcel).generateSymbol(bin_file, src, typed_value, code), + .s390x => return Function(.s390x).generateSymbol(bin_file, src, typed_value, code), + .tce => return Function(.tce).generateSymbol(bin_file, src, typed_value, code), + .tcele => return Function(.tcele).generateSymbol(bin_file, src, typed_value, code), + .thumb => return Function(.thumb).generateSymbol(bin_file, src, typed_value, code), + .thumbeb => return Function(.thumbeb).generateSymbol(bin_file, src, typed_value, code), + .i386 => return Function(.i386).generateSymbol(bin_file, src, typed_value, code), + .x86_64 => return Function(.x86_64).generateSymbol(bin_file, src, typed_value, code), + .xcore => return Function(.xcore).generateSymbol(bin_file, src, typed_value, code), + .nvptx => return Function(.nvptx).generateSymbol(bin_file, src, typed_value, code), + .nvptx64 => return Function(.nvptx64).generateSymbol(bin_file, src, typed_value, code), + .le32 => return Function(.le32).generateSymbol(bin_file, src, typed_value, code), + .le64 => return Function(.le64).generateSymbol(bin_file, src, typed_value, code), + .amdil => return Function(.amdil).generateSymbol(bin_file, src, typed_value, code), + .amdil64 => return Function(.amdil64).generateSymbol(bin_file, src, typed_value, code), + .hsail => return Function(.hsail).generateSymbol(bin_file, src, typed_value, code), + .hsail64 => return Function(.hsail64).generateSymbol(bin_file, src, typed_value, code), + .spir => return Function(.spir).generateSymbol(bin_file, src, typed_value, code), + .spir64 => return Function(.spir64).generateSymbol(bin_file, src, typed_value, code), + .kalimba => return Function(.kalimba).generateSymbol(bin_file, src, typed_value, code), + .shave => return Function(.shave).generateSymbol(bin_file, src, typed_value, code), + .lanai => return Function(.lanai).generateSymbol(bin_file, src, typed_value, code), + .wasm32 => return Function(.wasm32).generateSymbol(bin_file, src, typed_value, code), + .wasm64 => return Function(.wasm64).generateSymbol(bin_file, src, typed_value, code), + .renderscript32 => return Function(.renderscript32).generateSymbol(bin_file, src, typed_value, code), + .renderscript64 => return Function(.renderscript64).generateSymbol(bin_file, src, typed_value, code), + .ve => return Function(.ve).generateSymbol(bin_file, src, typed_value, code), } }, .Array => { @@ -189,1101 +199,1095 @@ const InnerError = error{ CodegenFail, }; -const MCValue = union(enum) { - /// No runtime bits. `void` types, empty structs, u0, enums with 1 tag, etc. - none, - /// Control flow will not allow this value to be observed. - unreach, - /// No more references to this value remain. - dead, - /// A pointer-sized integer that fits in a register. - immediate: u64, - /// The constant was emitted into the code, at this offset. - embedded_in_code: usize, - /// The value is in a target-specific register. The value can - /// be @intToEnum casted to the respective Reg enum. - register: usize, - /// The value is in memory at a hard-coded address. - memory: u64, - /// The value is one of the stack variables. - stack_offset: u64, - /// The value is in the compare flags assuming an unsigned operation, - /// with this operator applied on top of it. - compare_flags_unsigned: std.math.CompareOperator, - /// The value is in the compare flags assuming a signed operation, - /// with this operator applied on top of it. - compare_flags_signed: std.math.CompareOperator, +fn Function(comptime arch: std.Target.Cpu.Arch) type { + return struct { + gpa: *Allocator, + bin_file: *link.File.Elf, + target: *const std.Target, + mod_fn: *const Module.Fn, + code: *std.ArrayList(u8), + err_msg: ?*ErrorMsg, + args: []MCValue, + arg_index: usize, + src: usize, - fn isMemory(mcv: MCValue) bool { - return switch (mcv) { - .embedded_in_code, .memory, .stack_offset => true, - else => false, - }; - } + /// Whenever there is a runtime branch, we push a Branch onto this stack, + /// and pop it off when the runtime branch joins. This provides an "overlay" + /// of the table of mappings from instructions to `MCValue` from within the branch. + /// This way we can modify the `MCValue` for an instruction in different ways + /// within different branches. Special consideration is needed when a branch + /// joins with its parent, to make sure all instructions have the same MCValue + /// across each runtime branch upon joining. + branch_stack: *std.ArrayList(Branch), - fn isImmediate(mcv: MCValue) bool { - return switch (mcv) { - .immediate => true, - else => false, - }; - } + const MCValue = union(enum) { + /// No runtime bits. `void` types, empty structs, u0, enums with 1 tag, etc. + none, + /// Control flow will not allow this value to be observed. + unreach, + /// No more references to this value remain. + dead, + /// A pointer-sized integer that fits in a register. + immediate: u64, + /// The constant was emitted into the code, at this offset. + embedded_in_code: usize, + /// The value is in a target-specific register. + register: Reg, + /// The value is in memory at a hard-coded address. + memory: u64, + /// The value is one of the stack variables. + stack_offset: u64, + /// The value is in the compare flags assuming an unsigned operation, + /// with this operator applied on top of it. + compare_flags_unsigned: std.math.CompareOperator, + /// The value is in the compare flags assuming a signed operation, + /// with this operator applied on top of it. + compare_flags_signed: std.math.CompareOperator, - fn isMutable(mcv: MCValue) bool { - return switch (mcv) { - .none => unreachable, - .unreach => unreachable, - .dead => unreachable, - - .immediate, - .embedded_in_code, - .memory, - .compare_flags_unsigned, - .compare_flags_signed, - => false, - - .register, - .stack_offset, - => true, - }; - } -}; - -const Function = struct { - gpa: *Allocator, - bin_file: *link.File.Elf, - target: *const std.Target, - mod_fn: *const Module.Fn, - code: *std.ArrayList(u8), - err_msg: ?*ErrorMsg, - args: []MCValue, - arg_index: usize, - src: usize, - - /// Whenever there is a runtime branch, we push a Branch onto this stack, - /// and pop it off when the runtime branch joins. This provides an "overlay" - /// of the table of mappings from instructions to `MCValue` from within the branch. - /// This way we can modify the `MCValue` for an instruction in different ways - /// within different branches. Special consideration is needed when a branch - /// joins with its parent, to make sure all instructions have the same MCValue - /// across each runtime branch upon joining. - branch_stack: *std.ArrayList(Branch), - - const Branch = struct { - inst_table: std.AutoHashMapUnmanaged(*ir.Inst, MCValue) = .{}, - - /// The key is an enum value of an arch-specific register. - registers: std.AutoHashMapUnmanaged(usize, RegisterAllocation) = .{}, - - /// Maps offset to what is stored there. - stack: std.AutoHashMapUnmanaged(usize, StackAllocation) = .{}, - /// Offset from the stack base, representing the end of the stack frame. - max_end_stack: u32 = 0, - /// Represents the current end stack offset. If there is no existing slot - /// to place a new stack allocation, it goes here, and then bumps `max_end_stack`. - next_stack_offset: u32 = 0, - - fn deinit(self: *Branch, gpa: *Allocator) void { - self.inst_table.deinit(gpa); - self.registers.deinit(gpa); - self.stack.deinit(gpa); - self.* = undefined; - } - }; - - const RegisterAllocation = struct { - inst: *ir.Inst, - }; - - const StackAllocation = struct { - inst: *ir.Inst, - size: u32, - }; - - fn gen(self: *Function) !void { - switch (self.target.cpu.arch) { - .arm => return self.genArch(.arm), - .armeb => return self.genArch(.armeb), - .aarch64 => return self.genArch(.aarch64), - .aarch64_be => return self.genArch(.aarch64_be), - .aarch64_32 => return self.genArch(.aarch64_32), - .arc => return self.genArch(.arc), - .avr => return self.genArch(.avr), - .bpfel => return self.genArch(.bpfel), - .bpfeb => return self.genArch(.bpfeb), - .hexagon => return self.genArch(.hexagon), - .mips => return self.genArch(.mips), - .mipsel => return self.genArch(.mipsel), - .mips64 => return self.genArch(.mips64), - .mips64el => return self.genArch(.mips64el), - .msp430 => return self.genArch(.msp430), - .powerpc => return self.genArch(.powerpc), - .powerpc64 => return self.genArch(.powerpc64), - .powerpc64le => return self.genArch(.powerpc64le), - .r600 => return self.genArch(.r600), - .amdgcn => return self.genArch(.amdgcn), - .riscv32 => return self.genArch(.riscv32), - .riscv64 => return self.genArch(.riscv64), - .sparc => return self.genArch(.sparc), - .sparcv9 => return self.genArch(.sparcv9), - .sparcel => return self.genArch(.sparcel), - .s390x => return self.genArch(.s390x), - .tce => return self.genArch(.tce), - .tcele => return self.genArch(.tcele), - .thumb => return self.genArch(.thumb), - .thumbeb => return self.genArch(.thumbeb), - .i386 => return self.genArch(.i386), - .x86_64 => return self.genArch(.x86_64), - .xcore => return self.genArch(.xcore), - .nvptx => return self.genArch(.nvptx), - .nvptx64 => return self.genArch(.nvptx64), - .le32 => return self.genArch(.le32), - .le64 => return self.genArch(.le64), - .amdil => return self.genArch(.amdil), - .amdil64 => return self.genArch(.amdil64), - .hsail => return self.genArch(.hsail), - .hsail64 => return self.genArch(.hsail64), - .spir => return self.genArch(.spir), - .spir64 => return self.genArch(.spir64), - .kalimba => return self.genArch(.kalimba), - .shave => return self.genArch(.shave), - .lanai => return self.genArch(.lanai), - .wasm32 => return self.genArch(.wasm32), - .wasm64 => return self.genArch(.wasm64), - .renderscript32 => return self.genArch(.renderscript32), - .renderscript64 => return self.genArch(.renderscript64), - .ve => return self.genArch(.ve), - } - } - - fn genArch(self: *Function, comptime arch: std.Target.Cpu.Arch) !void { - try self.code.ensureCapacity(self.code.items.len + 11); - - // push rbp - // mov rbp, rsp - self.code.appendSliceAssumeCapacity(&[_]u8{ 0x55, 0x48, 0x89, 0xe5 }); - - // sub rsp, x - const stack_end = self.branch_stack.items[0].max_end_stack; - if (stack_end > std.math.maxInt(i32)) { - return self.fail(self.src, "too much stack used in call parameters", .{}); - } else if (stack_end > std.math.maxInt(i8)) { - // 48 83 ec xx sub rsp,0x10 - self.code.appendSliceAssumeCapacity(&[_]u8{ 0x48, 0x81, 0xec }); - const x = @intCast(u32, stack_end); - mem.writeIntLittle(u32, self.code.addManyAsArrayAssumeCapacity(4), x); - } else if (stack_end != 0) { - // 48 81 ec xx xx xx xx sub rsp,0x80 - const x = @intCast(u8, stack_end); - self.code.appendSliceAssumeCapacity(&[_]u8{ 0x48, 0x83, 0xec, x }); - } - - try self.genBody(self.mod_fn.analysis.success, arch); - } - - fn genBody(self: *Function, body: ir.Body, comptime arch: std.Target.Cpu.Arch) InnerError!void { - const inst_table = &self.branch_stack.items[0].inst_table; - for (body.instructions) |inst| { - const new_inst = try self.genFuncInst(inst, arch); - try inst_table.putNoClobber(self.gpa, inst, new_inst); - } - } - - fn genFuncInst(self: *Function, inst: *ir.Inst, comptime arch: std.Target.Cpu.Arch) !MCValue { - switch (inst.tag) { - .add => return self.genAdd(inst.cast(ir.Inst.Add).?, arch), - .arg => return self.genArg(inst.cast(ir.Inst.Arg).?), - .assembly => return self.genAsm(inst.cast(ir.Inst.Assembly).?, arch), - .bitcast => return self.genBitCast(inst.cast(ir.Inst.BitCast).?), - .block => return self.genBlock(inst.cast(ir.Inst.Block).?, arch), - .br => return self.genBr(inst.cast(ir.Inst.Br).?, arch), - .breakpoint => return self.genBreakpoint(inst.src, arch), - .brvoid => return self.genBrVoid(inst.cast(ir.Inst.BrVoid).?, arch), - .call => return self.genCall(inst.cast(ir.Inst.Call).?, arch), - .cmp => return self.genCmp(inst.cast(ir.Inst.Cmp).?, arch), - .condbr => return self.genCondBr(inst.cast(ir.Inst.CondBr).?, arch), - .constant => unreachable, // excluded from function bodies - .isnonnull => return self.genIsNonNull(inst.cast(ir.Inst.IsNonNull).?, arch), - .isnull => return self.genIsNull(inst.cast(ir.Inst.IsNull).?, arch), - .ptrtoint => return self.genPtrToInt(inst.cast(ir.Inst.PtrToInt).?), - .ret => return self.genRet(inst.cast(ir.Inst.Ret).?, arch), - .retvoid => return self.genRetVoid(inst.cast(ir.Inst.RetVoid).?, arch), - .sub => return self.genSub(inst.cast(ir.Inst.Sub).?, arch), - .unreach => return MCValue{ .unreach = {} }, - .not => return self.genNot(inst.cast(ir.Inst.Not).?, arch), - } - } - - fn genNot(self: *Function, inst: *ir.Inst.Not, comptime arch: std.Target.Cpu.Arch) !MCValue { - // No side effects, so if it's unreferenced, do nothing. - if (inst.base.isUnused()) - return MCValue.dead; - const operand = try self.resolveInst(inst.args.operand); - switch (operand) { - .dead => unreachable, - .unreach => unreachable, - .compare_flags_unsigned => |op| return MCValue{ - .compare_flags_unsigned = switch (op) { - .gte => .lt, - .gt => .lte, - .neq => .eq, - .lt => .gte, - .lte => .gt, - .eq => .neq, - }, - }, - .compare_flags_signed => |op| return MCValue{ - .compare_flags_signed = switch (op) { - .gte => .lt, - .gt => .lte, - .neq => .eq, - .lt => .gte, - .lte => .gt, - .eq => .neq, - }, - }, - else => {}, - } - - switch (arch) { - .x86_64 => { - var imm = ir.Inst.Constant{ - .base = .{ - .tag = .constant, - .deaths = 0, - .ty = inst.args.operand.ty, - .src = inst.args.operand.src, - }, - .val = Value.initTag(.bool_true), + fn isMemory(mcv: MCValue) bool { + return switch (mcv) { + .embedded_in_code, .memory, .stack_offset => true, + else => false, }; - return try self.genX8664BinMath(&inst.base, inst.args.operand, &imm.base, 6, 0x30); - }, - else => return self.fail(inst.base.src, "TODO implement NOT for {}", .{self.target.cpu.arch}), - } - } - - fn genAdd(self: *Function, inst: *ir.Inst.Add, comptime arch: std.Target.Cpu.Arch) !MCValue { - // No side effects, so if it's unreferenced, do nothing. - if (inst.base.isUnused()) - return MCValue.dead; - switch (arch) { - .x86_64 => { - return try self.genX8664BinMath(&inst.base, inst.args.lhs, inst.args.rhs, 0, 0x00); - }, - else => return self.fail(inst.base.src, "TODO implement add for {}", .{self.target.cpu.arch}), - } - } - - fn genSub(self: *Function, inst: *ir.Inst.Sub, comptime arch: std.Target.Cpu.Arch) !MCValue { - // No side effects, so if it's unreferenced, do nothing. - if (inst.base.isUnused()) - return MCValue.dead; - switch (arch) { - .x86_64 => { - return try self.genX8664BinMath(&inst.base, inst.args.lhs, inst.args.rhs, 5, 0x28); - }, - else => return self.fail(inst.base.src, "TODO implement sub for {}", .{self.target.cpu.arch}), - } - } - - /// ADD, SUB, XOR, OR, AND - fn genX8664BinMath(self: *Function, inst: *ir.Inst, op_lhs: *ir.Inst, op_rhs: *ir.Inst, opx: u8, mr: u8) !MCValue { - try self.code.ensureCapacity(self.code.items.len + 8); - - const lhs = try self.resolveInst(op_lhs); - const rhs = try self.resolveInst(op_rhs); - - // There are 2 operands, destination and source. - // Either one, but not both, can be a memory operand. - // Source operand can be an immediate, 8 bits or 32 bits. - // So, if either one of the operands dies with this instruction, we can use it - // as the result MCValue. - var dst_mcv: MCValue = undefined; - var src_mcv: MCValue = undefined; - var src_inst: *ir.Inst = undefined; - if (inst.operandDies(0) and lhs.isMutable()) { - // LHS dies; use it as the destination. - // Both operands cannot be memory. - src_inst = op_rhs; - if (lhs.isMemory() and rhs.isMemory()) { - dst_mcv = try self.copyToNewRegister(op_lhs); - src_mcv = rhs; - } else { - dst_mcv = lhs; - src_mcv = rhs; } - } else if (inst.operandDies(1) and rhs.isMutable()) { - // RHS dies; use it as the destination. - // Both operands cannot be memory. - src_inst = op_lhs; - if (lhs.isMemory() and rhs.isMemory()) { - dst_mcv = try self.copyToNewRegister(op_rhs); - src_mcv = lhs; - } else { - dst_mcv = rhs; - src_mcv = lhs; + + fn isImmediate(mcv: MCValue) bool { + return switch (mcv) { + .immediate => true, + else => false, + }; } - } else { - if (lhs.isMemory()) { - dst_mcv = try self.copyToNewRegister(op_lhs); - src_mcv = rhs; - src_inst = op_rhs; - } else { - dst_mcv = try self.copyToNewRegister(op_rhs); - src_mcv = lhs; - src_inst = op_lhs; - } - } - // This instruction supports only signed 32-bit immediates at most. If the immediate - // value is larger than this, we put it in a register. - // A potential opportunity for future optimization here would be keeping track - // of the fact that the instruction is available both as an immediate - // and as a register. - switch (src_mcv) { - .immediate => |imm| { - if (imm > std.math.maxInt(u31)) { - src_mcv = try self.copyToNewRegister(src_inst); - } - }, - else => {}, - } - try self.genX8664BinMathCode(inst.src, dst_mcv, src_mcv, opx, mr); - - return dst_mcv; - } - - fn genX8664BinMathCode(self: *Function, src: usize, dst_mcv: MCValue, src_mcv: MCValue, opx: u8, mr: u8) !void { - switch (dst_mcv) { - .none => unreachable, - .dead, .unreach, .immediate => unreachable, - .compare_flags_unsigned => unreachable, - .compare_flags_signed => unreachable, - .register => |dst_reg_usize| { - const dst_reg = @intToEnum(Reg(.x86_64), @intCast(u8, dst_reg_usize)); - switch (src_mcv) { + fn isMutable(mcv: MCValue) bool { + return switch (mcv) { .none => unreachable, - .dead, .unreach => unreachable, - .register => |src_reg_usize| { - const src_reg = @intToEnum(Reg(.x86_64), @intCast(u8, src_reg_usize)); - self.rex(.{ .b = dst_reg.isExtended(), .r = src_reg.isExtended(), .w = dst_reg.size() == 64 }); - self.code.appendSliceAssumeCapacity(&[_]u8{ mr + 0x1, 0xC0 | (@as(u8, src_reg.id() & 0b111) << 3) | @as(u8, dst_reg.id() & 0b111) }); + .unreach => unreachable, + .dead => unreachable, + + .immediate, + .embedded_in_code, + .memory, + .compare_flags_unsigned, + .compare_flags_signed, + => false, + + .register, + .stack_offset, + => true, + }; + } + }; + + const Branch = struct { + inst_table: std.AutoHashMapUnmanaged(*ir.Inst, MCValue) = .{}, + + /// The key is an enum value of an arch-specific register. + registers: std.AutoHashMapUnmanaged(usize, RegisterAllocation) = .{}, + + /// Maps offset to what is stored there. + stack: std.AutoHashMapUnmanaged(usize, StackAllocation) = .{}, + /// Offset from the stack base, representing the end of the stack frame. + max_end_stack: u32 = 0, + /// Represents the current end stack offset. If there is no existing slot + /// to place a new stack allocation, it goes here, and then bumps `max_end_stack`. + next_stack_offset: u32 = 0, + + fn deinit(self: *Branch, gpa: *Allocator) void { + self.inst_table.deinit(gpa); + self.registers.deinit(gpa); + self.stack.deinit(gpa); + self.* = undefined; + } + }; + + const RegisterAllocation = struct { + inst: *ir.Inst, + }; + + const StackAllocation = struct { + inst: *ir.Inst, + size: u32, + }; + + const Self = @This(); + + fn generateSymbol( + bin_file: *link.File.Elf, + src: usize, + typed_value: TypedValue, + code: *std.ArrayList(u8), + ) GenerateSymbolError!Result { + const module_fn = typed_value.val.cast(Value.Payload.Function).?.func; + + const fn_type = module_fn.owner_decl.typed_value.most_recent.typed_value.ty; + const param_types = try bin_file.allocator.alloc(Type, fn_type.fnParamLen()); + defer bin_file.allocator.free(param_types); + fn_type.fnParamTypes(param_types); + var mc_args = try bin_file.allocator.alloc(MCValue, param_types.len); + defer bin_file.allocator.free(mc_args); + + var branch_stack = std.ArrayList(Branch).init(bin_file.allocator); + defer { + assert(branch_stack.items.len == 1); + branch_stack.items[0].deinit(bin_file.allocator); + branch_stack.deinit(); + } + const branch = try branch_stack.addOne(); + branch.* = .{}; + + var function = Self{ + .gpa = bin_file.allocator, + .target = &bin_file.options.target, + .bin_file = bin_file, + .mod_fn = module_fn, + .code = code, + .err_msg = null, + .args = mc_args, + .arg_index = 0, + .branch_stack = &branch_stack, + .src = src, + }; + + const cc = fn_type.fnCallingConvention(); + branch.max_end_stack = function.resolveParameters(src, cc, param_types, mc_args) catch |err| switch (err) { + error.CodegenFail => return Result{ .fail = function.err_msg.? }, + else => |e| return e, + }; + + function.gen() catch |err| switch (err) { + error.CodegenFail => return Result{ .fail = function.err_msg.? }, + else => |e| return e, + }; + + if (function.err_msg) |em| { + return Result{ .fail = em }; + } else { + return Result{ .appended = {} }; + } + } + + fn gen(self: *Self) !void { + try self.code.ensureCapacity(self.code.items.len + 11); + + // push rbp + // mov rbp, rsp + self.code.appendSliceAssumeCapacity(&[_]u8{ 0x55, 0x48, 0x89, 0xe5 }); + + // sub rsp, x + const stack_end = self.branch_stack.items[0].max_end_stack; + if (stack_end > std.math.maxInt(i32)) { + return self.fail(self.src, "too much stack used in call parameters", .{}); + } else if (stack_end > std.math.maxInt(i8)) { + // 48 83 ec xx sub rsp,0x10 + self.code.appendSliceAssumeCapacity(&[_]u8{ 0x48, 0x81, 0xec }); + const x = @intCast(u32, stack_end); + mem.writeIntLittle(u32, self.code.addManyAsArrayAssumeCapacity(4), x); + } else if (stack_end != 0) { + // 48 81 ec xx xx xx xx sub rsp,0x80 + const x = @intCast(u8, stack_end); + self.code.appendSliceAssumeCapacity(&[_]u8{ 0x48, 0x83, 0xec, x }); + } + + try self.genBody(self.mod_fn.analysis.success); + } + + fn genBody(self: *Self, body: ir.Body) InnerError!void { + const inst_table = &self.branch_stack.items[0].inst_table; + for (body.instructions) |inst| { + const new_inst = try self.genFuncInst(inst); + try inst_table.putNoClobber(self.gpa, inst, new_inst); + } + } + + fn genFuncInst(self: *Self, inst: *ir.Inst) !MCValue { + switch (inst.tag) { + .add => return self.genAdd(inst.cast(ir.Inst.Add).?), + .arg => return self.genArg(inst.cast(ir.Inst.Arg).?), + .assembly => return self.genAsm(inst.cast(ir.Inst.Assembly).?), + .bitcast => return self.genBitCast(inst.cast(ir.Inst.BitCast).?), + .block => return self.genBlock(inst.cast(ir.Inst.Block).?), + .br => return self.genBr(inst.cast(ir.Inst.Br).?), + .breakpoint => return self.genBreakpoint(inst.src), + .brvoid => return self.genBrVoid(inst.cast(ir.Inst.BrVoid).?), + .call => return self.genCall(inst.cast(ir.Inst.Call).?), + .cmp => return self.genCmp(inst.cast(ir.Inst.Cmp).?), + .condbr => return self.genCondBr(inst.cast(ir.Inst.CondBr).?), + .constant => unreachable, // excluded from function bodies + .isnonnull => return self.genIsNonNull(inst.cast(ir.Inst.IsNonNull).?), + .isnull => return self.genIsNull(inst.cast(ir.Inst.IsNull).?), + .ptrtoint => return self.genPtrToInt(inst.cast(ir.Inst.PtrToInt).?), + .ret => return self.genRet(inst.cast(ir.Inst.Ret).?), + .retvoid => return self.genRetVoid(inst.cast(ir.Inst.RetVoid).?), + .sub => return self.genSub(inst.cast(ir.Inst.Sub).?), + .unreach => return MCValue{ .unreach = {} }, + .not => return self.genNot(inst.cast(ir.Inst.Not).?), + } + } + + fn genNot(self: *Self, inst: *ir.Inst.Not) !MCValue { + // No side effects, so if it's unreferenced, do nothing. + if (inst.base.isUnused()) + return MCValue.dead; + const operand = try self.resolveInst(inst.args.operand); + switch (operand) { + .dead => unreachable, + .unreach => unreachable, + .compare_flags_unsigned => |op| return MCValue{ + .compare_flags_unsigned = switch (op) { + .gte => .lt, + .gt => .lte, + .neq => .eq, + .lt => .gte, + .lte => .gt, + .eq => .neq, }, - .immediate => |imm| { - const imm32 = @intCast(u31, imm); // This case must be handled before calling genX8664BinMathCode. - // 81 /opx id - if (imm32 <= std.math.maxInt(u7)) { - self.rex(.{ .b = dst_reg.isExtended(), .w = dst_reg.size() == 64 }); - self.code.appendSliceAssumeCapacity(&[_]u8{ - 0x83, - 0xC0 | (opx << 3) | @truncate(u3, dst_reg.id()), - @intCast(u8, imm32), - }); - } else { - self.rex(.{ .r = dst_reg.isExtended(), .w = dst_reg.size() == 64 }); - self.code.appendSliceAssumeCapacity(&[_]u8{ - 0x81, - 0xC0 | (opx << 3) | @truncate(u3, dst_reg.id()), - }); - std.mem.writeIntLittle(u32, self.code.addManyAsArrayAssumeCapacity(4), imm32); + }, + .compare_flags_signed => |op| return MCValue{ + .compare_flags_signed = switch (op) { + .gte => .lt, + .gt => .lte, + .neq => .eq, + .lt => .gte, + .lte => .gt, + .eq => .neq, + }, + }, + else => {}, + } + + switch (arch) { + .x86_64 => { + var imm = ir.Inst.Constant{ + .base = .{ + .tag = .constant, + .deaths = 0, + .ty = inst.args.operand.ty, + .src = inst.args.operand.src, + }, + .val = Value.initTag(.bool_true), + }; + return try self.genX8664BinMath(&inst.base, inst.args.operand, &imm.base, 6, 0x30); + }, + else => return self.fail(inst.base.src, "TODO implement NOT for {}", .{self.target.cpu.arch}), + } + } + + fn genAdd(self: *Self, inst: *ir.Inst.Add) !MCValue { + // No side effects, so if it's unreferenced, do nothing. + if (inst.base.isUnused()) + return MCValue.dead; + switch (arch) { + .x86_64 => { + return try self.genX8664BinMath(&inst.base, inst.args.lhs, inst.args.rhs, 0, 0x00); + }, + else => return self.fail(inst.base.src, "TODO implement add for {}", .{self.target.cpu.arch}), + } + } + + fn genSub(self: *Self, inst: *ir.Inst.Sub) !MCValue { + // No side effects, so if it's unreferenced, do nothing. + if (inst.base.isUnused()) + return MCValue.dead; + switch (arch) { + .x86_64 => { + return try self.genX8664BinMath(&inst.base, inst.args.lhs, inst.args.rhs, 5, 0x28); + }, + else => return self.fail(inst.base.src, "TODO implement sub for {}", .{self.target.cpu.arch}), + } + } + + /// ADD, SUB, XOR, OR, AND + fn genX8664BinMath(self: *Self, inst: *ir.Inst, op_lhs: *ir.Inst, op_rhs: *ir.Inst, opx: u8, mr: u8) !MCValue { + try self.code.ensureCapacity(self.code.items.len + 8); + + const lhs = try self.resolveInst(op_lhs); + const rhs = try self.resolveInst(op_rhs); + + // There are 2 operands, destination and source. + // Either one, but not both, can be a memory operand. + // Source operand can be an immediate, 8 bits or 32 bits. + // So, if either one of the operands dies with this instruction, we can use it + // as the result MCValue. + var dst_mcv: MCValue = undefined; + var src_mcv: MCValue = undefined; + var src_inst: *ir.Inst = undefined; + if (inst.operandDies(0) and lhs.isMutable()) { + // LHS dies; use it as the destination. + // Both operands cannot be memory. + src_inst = op_rhs; + if (lhs.isMemory() and rhs.isMemory()) { + dst_mcv = try self.moveToNewRegister(op_lhs); + src_mcv = rhs; + } else { + dst_mcv = lhs; + src_mcv = rhs; + } + } else if (inst.operandDies(1) and rhs.isMutable()) { + // RHS dies; use it as the destination. + // Both operands cannot be memory. + src_inst = op_lhs; + if (lhs.isMemory() and rhs.isMemory()) { + dst_mcv = try self.moveToNewRegister(op_rhs); + src_mcv = lhs; + } else { + dst_mcv = rhs; + src_mcv = lhs; + } + } else { + if (lhs.isMemory()) { + dst_mcv = try self.moveToNewRegister(op_lhs); + src_mcv = rhs; + src_inst = op_rhs; + } else { + dst_mcv = try self.moveToNewRegister(op_rhs); + src_mcv = lhs; + src_inst = op_lhs; + } + } + // This instruction supports only signed 32-bit immediates at most. If the immediate + // value is larger than this, we put it in a register. + // A potential opportunity for future optimization here would be keeping track + // of the fact that the instruction is available both as an immediate + // and as a register. + switch (src_mcv) { + .immediate => |imm| { + if (imm > std.math.maxInt(u31)) { + src_mcv = try self.moveToNewRegister(src_inst); + } + }, + else => {}, + } + + try self.genX8664BinMathCode(inst.src, dst_mcv, src_mcv, opx, mr); + + return dst_mcv; + } + + fn genX8664BinMathCode(self: *Self, src: usize, dst_mcv: MCValue, src_mcv: MCValue, opx: u8, mr: u8) !void { + switch (dst_mcv) { + .none => unreachable, + .dead, .unreach, .immediate => unreachable, + .compare_flags_unsigned => unreachable, + .compare_flags_signed => unreachable, + .register => |dst_reg| { + switch (src_mcv) { + .none => unreachable, + .dead, .unreach => unreachable, + .register => |src_reg| { + self.rex(.{ .b = dst_reg.isExtended(), .r = src_reg.isExtended(), .w = dst_reg.size() == 64 }); + self.code.appendSliceAssumeCapacity(&[_]u8{ mr + 0x1, 0xC0 | (@as(u8, src_reg.id() & 0b111) << 3) | @as(u8, dst_reg.id() & 0b111) }); + }, + .immediate => |imm| { + const imm32 = @intCast(u31, imm); // This case must be handled before calling genX8664BinMathCode. + // 81 /opx id + if (imm32 <= std.math.maxInt(u7)) { + self.rex(.{ .b = dst_reg.isExtended(), .w = dst_reg.size() == 64 }); + self.code.appendSliceAssumeCapacity(&[_]u8{ + 0x83, + 0xC0 | (opx << 3) | @truncate(u3, dst_reg.id()), + @intCast(u8, imm32), + }); + } else { + self.rex(.{ .r = dst_reg.isExtended(), .w = dst_reg.size() == 64 }); + self.code.appendSliceAssumeCapacity(&[_]u8{ + 0x81, + 0xC0 | (opx << 3) | @truncate(u3, dst_reg.id()), + }); + std.mem.writeIntLittle(u32, self.code.addManyAsArrayAssumeCapacity(4), imm32); + } + }, + .embedded_in_code, .memory, .stack_offset => { + return self.fail(src, "TODO implement x86 ADD/SUB/CMP source memory", .{}); + }, + .compare_flags_unsigned => { + return self.fail(src, "TODO implement x86 ADD/SUB/CMP source compare flag (unsigned)", .{}); + }, + .compare_flags_signed => { + return self.fail(src, "TODO implement x86 ADD/SUB/CMP source compare flag (signed)", .{}); + }, + } + }, + .embedded_in_code, .memory, .stack_offset => { + return self.fail(src, "TODO implement x86 ADD/SUB/CMP destination memory", .{}); + }, + } + } + + fn genArg(self: *Self, inst: *ir.Inst.Arg) !MCValue { + const i = self.arg_index; + self.arg_index += 1; + return self.args[i]; + } + + fn genBreakpoint(self: *Self, src: usize) !MCValue { + switch (arch) { + .i386, .x86_64 => { + try self.code.append(0xcc); // int3 + }, + else => return self.fail(src, "TODO implement @breakpoint() for {}", .{self.target.cpu.arch}), + } + return .none; + } + + fn genCall(self: *Self, inst: *ir.Inst.Call) !MCValue { + const fn_ty = inst.args.func.ty; + const cc = fn_ty.fnCallingConvention(); + const param_types = try self.gpa.alloc(Type, fn_ty.fnParamLen()); + defer self.gpa.free(param_types); + fn_ty.fnParamTypes(param_types); + var mc_args = try self.gpa.alloc(MCValue, param_types.len); + defer self.gpa.free(mc_args); + const stack_byte_count = try self.resolveParameters(inst.base.src, cc, param_types, mc_args); + + switch (arch) { + .x86_64 => { + for (mc_args) |mc_arg, arg_i| { + const arg = inst.args.args[arg_i]; + const arg_mcv = try self.resolveInst(inst.args.args[arg_i]); + switch (mc_arg) { + .none => continue, + .register => |reg| { + try self.genSetReg(arg.src, reg, arg_mcv); + // TODO interact with the register allocator to mark the instruction as moved. + }, + .stack_offset => { + // Here we need to emit instructions like this: + // mov qword ptr [rsp + stack_offset], x + return self.fail(inst.base.src, "TODO implement calling with parameters in memory", .{}); + }, + .immediate => unreachable, + .unreach => unreachable, + .dead => unreachable, + .embedded_in_code => unreachable, + .memory => unreachable, + .compare_flags_signed => unreachable, + .compare_flags_unsigned => unreachable, } - }, - .embedded_in_code, .memory, .stack_offset => { - return self.fail(src, "TODO implement x86 ADD/SUB/CMP source memory", .{}); - }, - .compare_flags_unsigned => { - return self.fail(src, "TODO implement x86 ADD/SUB/CMP source compare flag (unsigned)", .{}); - }, - .compare_flags_signed => { - return self.fail(src, "TODO implement x86 ADD/SUB/CMP source compare flag (signed)", .{}); - }, - } - }, - .embedded_in_code, .memory, .stack_offset => { - return self.fail(src, "TODO implement x86 ADD/SUB/CMP destination memory", .{}); - }, - } - } - - fn genArg(self: *Function, inst: *ir.Inst.Arg) !MCValue { - const i = self.arg_index; - self.arg_index += 1; - return self.args[i]; - } - - fn genBreakpoint(self: *Function, src: usize, comptime arch: std.Target.Cpu.Arch) !MCValue { - switch (arch) { - .i386, .x86_64 => { - try self.code.append(0xcc); // int3 - }, - else => return self.fail(src, "TODO implement @breakpoint() for {}", .{self.target.cpu.arch}), - } - return .none; - } - - fn genCall(self: *Function, inst: *ir.Inst.Call, comptime arch: std.Target.Cpu.Arch) !MCValue { - const fn_ty = inst.args.func.ty; - const cc = fn_ty.fnCallingConvention(); - const param_types = try self.gpa.alloc(Type, fn_ty.fnParamLen()); - defer self.gpa.free(param_types); - fn_ty.fnParamTypes(param_types); - var mc_args = try self.gpa.alloc(MCValue, param_types.len); - defer self.gpa.free(mc_args); - const stack_byte_count = try self.resolveParameters(inst.base.src, cc, param_types, mc_args); - - switch (arch) { - .x86_64 => { - for (mc_args) |mc_arg, arg_i| { - const arg = inst.args.args[arg_i]; - const arg_mcv = try self.resolveInst(inst.args.args[arg_i]); - switch (mc_arg) { - .none => continue, - .register => |reg| { - try self.genSetReg(arg.src, arch, @intToEnum(Reg(arch), @intCast(u8, reg)), arg_mcv); - // TODO interact with the register allocator to mark the instruction as moved. - }, - .stack_offset => { - // Here we need to emit instructions like this: - // mov qword ptr [rsp + stack_offset], x - return self.fail(inst.base.src, "TODO implement calling with parameters in memory", .{}); - }, - .immediate => unreachable, - .unreach => unreachable, - .dead => unreachable, - .embedded_in_code => unreachable, - .memory => unreachable, - .compare_flags_signed => unreachable, - .compare_flags_unsigned => unreachable, } - } - if (inst.args.func.cast(ir.Inst.Constant)) |func_inst| { - if (func_inst.val.cast(Value.Payload.Function)) |func_val| { - const func = func_val.func; - const got = &self.bin_file.program_headers.items[self.bin_file.phdr_got_index.?]; - const ptr_bits = self.target.cpu.arch.ptrBitWidth(); - const ptr_bytes: u64 = @divExact(ptr_bits, 8); - const got_addr = @intCast(u32, got.p_vaddr + func.owner_decl.link.offset_table_index * ptr_bytes); - // ff 14 25 xx xx xx xx call [addr] - try self.code.ensureCapacity(self.code.items.len + 7); - self.code.appendSliceAssumeCapacity(&[3]u8{ 0xff, 0x14, 0x25 }); - mem.writeIntLittle(u32, self.code.addManyAsArrayAssumeCapacity(4), got_addr); + if (inst.args.func.cast(ir.Inst.Constant)) |func_inst| { + if (func_inst.val.cast(Value.Payload.Function)) |func_val| { + const func = func_val.func; + const got = &self.bin_file.program_headers.items[self.bin_file.phdr_got_index.?]; + const ptr_bits = self.target.cpu.arch.ptrBitWidth(); + const ptr_bytes: u64 = @divExact(ptr_bits, 8); + const got_addr = @intCast(u32, got.p_vaddr + func.owner_decl.link.offset_table_index * ptr_bytes); + // ff 14 25 xx xx xx xx call [addr] + try self.code.ensureCapacity(self.code.items.len + 7); + self.code.appendSliceAssumeCapacity(&[3]u8{ 0xff, 0x14, 0x25 }); + mem.writeIntLittle(u32, self.code.addManyAsArrayAssumeCapacity(4), got_addr); + } else { + return self.fail(inst.base.src, "TODO implement calling bitcasted functions", .{}); + } } else { - return self.fail(inst.base.src, "TODO implement calling bitcasted functions", .{}); + return self.fail(inst.base.src, "TODO implement calling runtime known function pointer", .{}); } - } else { - return self.fail(inst.base.src, "TODO implement calling runtime known function pointer", .{}); - } - }, - else => return self.fail(inst.base.src, "TODO implement call for {}", .{self.target.cpu.arch}), - } - - const return_type = fn_ty.fnReturnType(); - switch (return_type.zigTypeTag()) { - .Void => return MCValue{ .none = {} }, - .NoReturn => return MCValue{ .unreach = {} }, - else => return self.fail(inst.base.src, "TODO implement fn call with non-void return value", .{}), - } - } - - fn ret(self: *Function, src: usize, comptime arch: std.Target.Cpu.Arch, mcv: MCValue) !MCValue { - if (mcv != .none) { - return self.fail(src, "TODO implement return with non-void operand", .{}); - } - switch (arch) { - .i386 => { - try self.code.append(0xc3); // ret - }, - .x86_64 => { - try self.code.appendSlice(&[_]u8{ - 0x5d, // pop rbp - 0xc3, // ret - }); - }, - else => return self.fail(src, "TODO implement return for {}", .{self.target.cpu.arch}), - } - return .unreach; - } - - fn genRet(self: *Function, inst: *ir.Inst.Ret, comptime arch: std.Target.Cpu.Arch) !MCValue { - const operand = try self.resolveInst(inst.args.operand); - return self.ret(inst.base.src, arch, operand); - } - - fn genRetVoid(self: *Function, inst: *ir.Inst.RetVoid, comptime arch: std.Target.Cpu.Arch) !MCValue { - return self.ret(inst.base.src, arch, .none); - } - - fn genCmp(self: *Function, inst: *ir.Inst.Cmp, comptime arch: std.Target.Cpu.Arch) !MCValue { - // No side effects, so if it's unreferenced, do nothing. - if (inst.base.isUnused()) - return MCValue.dead; - switch (arch) { - .x86_64 => { - try self.code.ensureCapacity(self.code.items.len + 8); - - const lhs = try self.resolveInst(inst.args.lhs); - const rhs = try self.resolveInst(inst.args.rhs); - - // There are 2 operands, destination and source. - // Either one, but not both, can be a memory operand. - // Source operand can be an immediate, 8 bits or 32 bits. - const dst_mcv = if (lhs.isImmediate() or (lhs.isMemory() and rhs.isMemory())) - try self.copyToNewRegister(inst.args.lhs) - else - lhs; - // This instruction supports only signed 32-bit immediates at most. - const src_mcv = try self.limitImmediateType(inst.args.rhs, i32); - - try self.genX8664BinMathCode(inst.base.src, dst_mcv, src_mcv, 7, 0x38); - const info = inst.args.lhs.ty.intInfo(self.target.*); - if (info.signed) { - return MCValue{ .compare_flags_signed = inst.args.op }; - } else { - return MCValue{ .compare_flags_unsigned = inst.args.op }; - } - }, - else => return self.fail(inst.base.src, "TODO implement cmp for {}", .{self.target.cpu.arch}), - } - } - - fn genCondBr(self: *Function, inst: *ir.Inst.CondBr, comptime arch: std.Target.Cpu.Arch) !MCValue { - switch (arch) { - .x86_64 => { - try self.code.ensureCapacity(self.code.items.len + 6); - - const cond = try self.resolveInst(inst.args.condition); - switch (cond) { - .compare_flags_signed => |cmp_op| { - // Here we map to the opposite opcode because the jump is to the false branch. - const opcode: u8 = switch (cmp_op) { - .gte => 0x8c, - .gt => 0x8e, - .neq => 0x84, - .lt => 0x8d, - .lte => 0x8f, - .eq => 0x85, - }; - return self.genX86CondBr(inst, opcode, arch); - }, - .compare_flags_unsigned => |cmp_op| { - // Here we map to the opposite opcode because the jump is to the false branch. - const opcode: u8 = switch (cmp_op) { - .gte => 0x82, - .gt => 0x86, - .neq => 0x84, - .lt => 0x83, - .lte => 0x87, - .eq => 0x85, - }; - return self.genX86CondBr(inst, opcode, arch); - }, - .register => |reg_usize| { - const reg = @intToEnum(Reg(arch), @intCast(u8, reg_usize)); - // test reg, 1 - // TODO detect al, ax, eax - try self.code.ensureCapacity(self.code.items.len + 4); - self.rex(.{ .b = reg.isExtended(), .w = reg.size() == 64 }); - self.code.appendSliceAssumeCapacity(&[_]u8{ - 0xf6, - @as(u8, 0xC0) | (0 << 3) | @truncate(u3, reg.id()), - 0x01, - }); - return self.genX86CondBr(inst, 0x84, arch); - }, - else => return self.fail(inst.base.src, "TODO implement condbr {} when condition is {}", .{ self.target.cpu.arch, @tagName(cond) }), - } - }, - else => return self.fail(inst.base.src, "TODO implement condbr for {}", .{self.target.cpu.arch}), - } - } - - fn genX86CondBr(self: *Function, inst: *ir.Inst.CondBr, opcode: u8, comptime arch: std.Target.Cpu.Arch) !MCValue { - self.code.appendSliceAssumeCapacity(&[_]u8{ 0x0f, opcode }); - const reloc = Reloc{ .rel32 = self.code.items.len }; - self.code.items.len += 4; - try self.genBody(inst.args.true_body, arch); - try self.performReloc(inst.base.src, reloc); - try self.genBody(inst.args.false_body, arch); - return MCValue.unreach; - } - - fn genIsNull(self: *Function, inst: *ir.Inst.IsNull, comptime arch: std.Target.Cpu.Arch) !MCValue { - switch (arch) { - else => return self.fail(inst.base.src, "TODO implement isnull for {}", .{self.target.cpu.arch}), - } - } - - fn genIsNonNull(self: *Function, inst: *ir.Inst.IsNonNull, comptime arch: std.Target.Cpu.Arch) !MCValue { - // Here you can specialize this instruction if it makes sense to, otherwise the default - // will call genIsNull and invert the result. - switch (arch) { - else => return self.fail(inst.base.src, "TODO call genIsNull and invert the result ", .{}), - } - } - - fn genBlock(self: *Function, inst: *ir.Inst.Block, comptime arch: std.Target.Cpu.Arch) !MCValue { - if (inst.base.ty.hasCodeGenBits()) { - return self.fail(inst.base.src, "TODO codegen Block with non-void type", .{}); - } - // A block is nothing but a setup to be able to jump to the end. - defer inst.codegen.relocs.deinit(self.gpa); - try self.genBody(inst.args.body, arch); - - for (inst.codegen.relocs.items) |reloc| try self.performReloc(inst.base.src, reloc); - - return MCValue.none; - } - - fn performReloc(self: *Function, src: usize, reloc: Reloc) !void { - switch (reloc) { - .rel32 => |pos| { - const amt = self.code.items.len - (pos + 4); - const s32_amt = std.math.cast(i32, amt) catch - return self.fail(src, "unable to perform relocation: jump too far", .{}); - mem.writeIntLittle(i32, self.code.items[pos..][0..4], s32_amt); - }, - } - } - - fn genBr(self: *Function, inst: *ir.Inst.Br, comptime arch: std.Target.Cpu.Arch) !MCValue { - if (!inst.args.operand.ty.hasCodeGenBits()) - return self.brVoid(inst.base.src, inst.args.block, arch); - - const operand = try self.resolveInst(inst.args.operand); - switch (arch) { - else => return self.fail(inst.base.src, "TODO implement br for {}", .{self.target.cpu.arch}), - } - } - - fn genBrVoid(self: *Function, inst: *ir.Inst.BrVoid, comptime arch: std.Target.Cpu.Arch) !MCValue { - return self.brVoid(inst.base.src, inst.args.block, arch); - } - - fn brVoid(self: *Function, src: usize, block: *ir.Inst.Block, comptime arch: std.Target.Cpu.Arch) !MCValue { - // Emit a jump with a relocation. It will be patched up after the block ends. - try block.codegen.relocs.ensureCapacity(self.gpa, block.codegen.relocs.items.len + 1); - - switch (arch) { - .i386, .x86_64 => { - // TODO optimization opportunity: figure out when we can emit this as a 2 byte instruction - // which is available if the jump is 127 bytes or less forward. - try self.code.resize(self.code.items.len + 5); - self.code.items[self.code.items.len - 5] = 0xe9; // jmp rel32 - // Leave the jump offset undefined - block.codegen.relocs.appendAssumeCapacity(.{ .rel32 = self.code.items.len - 4 }); - }, - else => return self.fail(src, "TODO implement brvoid for {}", .{self.target.cpu.arch}), - } - return .none; - } - - fn genAsm(self: *Function, inst: *ir.Inst.Assembly, comptime arch: Target.Cpu.Arch) !MCValue { - if (!inst.args.is_volatile and inst.base.isUnused()) - return MCValue.dead; - if (arch != .x86_64 and arch != .i386) { - return self.fail(inst.base.src, "TODO implement inline asm support for more architectures", .{}); - } - for (inst.args.inputs) |input, i| { - if (input.len < 3 or input[0] != '{' or input[input.len - 1] != '}') { - return self.fail(inst.base.src, "unrecognized asm input constraint: '{}'", .{input}); + }, + else => return self.fail(inst.base.src, "TODO implement call for {}", .{self.target.cpu.arch}), } - const reg_name = input[1 .. input.len - 1]; - const reg = parseRegName(arch, reg_name) orelse - return self.fail(inst.base.src, "unrecognized register: '{}'", .{reg_name}); - const arg = try self.resolveInst(inst.args.args[i]); - try self.genSetReg(inst.base.src, arch, reg, arg); - } - if (mem.eql(u8, inst.args.asm_source, "syscall")) { - try self.code.appendSlice(&[_]u8{ 0x0f, 0x05 }); - } else { - return self.fail(inst.base.src, "TODO implement support for more x86 assembly instructions", .{}); - } - - if (inst.args.output) |output| { - if (output.len < 4 or output[0] != '=' or output[1] != '{' or output[output.len - 1] != '}') { - return self.fail(inst.base.src, "unrecognized asm output constraint: '{}'", .{output}); + const return_type = fn_ty.fnReturnType(); + switch (return_type.zigTypeTag()) { + .Void => return MCValue{ .none = {} }, + .NoReturn => return MCValue{ .unreach = {} }, + else => return self.fail(inst.base.src, "TODO implement fn call with non-void return value", .{}), } - const reg_name = output[2 .. output.len - 1]; - const reg = parseRegName(arch, reg_name) orelse - return self.fail(inst.base.src, "unrecognized register: '{}'", .{reg_name}); - return MCValue{ .register = @enumToInt(reg) }; - } else { + } + + fn ret(self: *Self, src: usize, mcv: MCValue) !MCValue { + if (mcv != .none) { + return self.fail(src, "TODO implement return with non-void operand", .{}); + } + switch (arch) { + .i386 => { + try self.code.append(0xc3); // ret + }, + .x86_64 => { + try self.code.appendSlice(&[_]u8{ + 0x5d, // pop rbp + 0xc3, // ret + }); + }, + else => return self.fail(src, "TODO implement return for {}", .{self.target.cpu.arch}), + } + return .unreach; + } + + fn genRet(self: *Self, inst: *ir.Inst.Ret) !MCValue { + const operand = try self.resolveInst(inst.args.operand); + return self.ret(inst.base.src, operand); + } + + fn genRetVoid(self: *Self, inst: *ir.Inst.RetVoid) !MCValue { + return self.ret(inst.base.src, .none); + } + + fn genCmp(self: *Self, inst: *ir.Inst.Cmp) !MCValue { + // No side effects, so if it's unreferenced, do nothing. + if (inst.base.isUnused()) + return MCValue.dead; + switch (arch) { + .x86_64 => { + try self.code.ensureCapacity(self.code.items.len + 8); + + const lhs = try self.resolveInst(inst.args.lhs); + const rhs = try self.resolveInst(inst.args.rhs); + + // There are 2 operands, destination and source. + // Either one, but not both, can be a memory operand. + // Source operand can be an immediate, 8 bits or 32 bits. + const dst_mcv = if (lhs.isImmediate() or (lhs.isMemory() and rhs.isMemory())) + try self.moveToNewRegister(inst.args.lhs) + else + lhs; + // This instruction supports only signed 32-bit immediates at most. + const src_mcv = try self.limitImmediateType(inst.args.rhs, i32); + + try self.genX8664BinMathCode(inst.base.src, dst_mcv, src_mcv, 7, 0x38); + const info = inst.args.lhs.ty.intInfo(self.target.*); + if (info.signed) { + return MCValue{ .compare_flags_signed = inst.args.op }; + } else { + return MCValue{ .compare_flags_unsigned = inst.args.op }; + } + }, + else => return self.fail(inst.base.src, "TODO implement cmp for {}", .{self.target.cpu.arch}), + } + } + + fn genCondBr(self: *Self, inst: *ir.Inst.CondBr) !MCValue { + switch (arch) { + .x86_64 => { + try self.code.ensureCapacity(self.code.items.len + 6); + + const cond = try self.resolveInst(inst.args.condition); + switch (cond) { + .compare_flags_signed => |cmp_op| { + // Here we map to the opposite opcode because the jump is to the false branch. + const opcode: u8 = switch (cmp_op) { + .gte => 0x8c, + .gt => 0x8e, + .neq => 0x84, + .lt => 0x8d, + .lte => 0x8f, + .eq => 0x85, + }; + return self.genX86CondBr(inst, opcode); + }, + .compare_flags_unsigned => |cmp_op| { + // Here we map to the opposite opcode because the jump is to the false branch. + const opcode: u8 = switch (cmp_op) { + .gte => 0x82, + .gt => 0x86, + .neq => 0x84, + .lt => 0x83, + .lte => 0x87, + .eq => 0x85, + }; + return self.genX86CondBr(inst, opcode); + }, + .register => |reg| { + // test reg, 1 + // TODO detect al, ax, eax + try self.code.ensureCapacity(self.code.items.len + 4); + self.rex(.{ .b = reg.isExtended(), .w = reg.size() == 64 }); + self.code.appendSliceAssumeCapacity(&[_]u8{ + 0xf6, + @as(u8, 0xC0) | (0 << 3) | @truncate(u3, reg.id()), + 0x01, + }); + return self.genX86CondBr(inst, 0x84); + }, + else => return self.fail(inst.base.src, "TODO implement condbr {} when condition is {}", .{ self.target.cpu.arch, @tagName(cond) }), + } + }, + else => return self.fail(inst.base.src, "TODO implement condbr for {}", .{self.target.cpu.arch}), + } + } + + fn genX86CondBr(self: *Self, inst: *ir.Inst.CondBr, opcode: u8) !MCValue { + self.code.appendSliceAssumeCapacity(&[_]u8{ 0x0f, opcode }); + const reloc = Reloc{ .rel32 = self.code.items.len }; + self.code.items.len += 4; + try self.genBody(inst.args.true_body); + try self.performReloc(inst.base.src, reloc); + try self.genBody(inst.args.false_body); + return MCValue.unreach; + } + + fn genIsNull(self: *Self, inst: *ir.Inst.IsNull) !MCValue { + switch (arch) { + else => return self.fail(inst.base.src, "TODO implement isnull for {}", .{self.target.cpu.arch}), + } + } + + fn genIsNonNull(self: *Self, inst: *ir.Inst.IsNonNull) !MCValue { + // Here you can specialize this instruction if it makes sense to, otherwise the default + // will call genIsNull and invert the result. + switch (arch) { + else => return self.fail(inst.base.src, "TODO call genIsNull and invert the result ", .{}), + } + } + + fn genBlock(self: *Self, inst: *ir.Inst.Block) !MCValue { + if (inst.base.ty.hasCodeGenBits()) { + return self.fail(inst.base.src, "TODO codegen Block with non-void type", .{}); + } + // A block is nothing but a setup to be able to jump to the end. + defer inst.codegen.relocs.deinit(self.gpa); + try self.genBody(inst.args.body); + + for (inst.codegen.relocs.items) |reloc| try self.performReloc(inst.base.src, reloc); + return MCValue.none; } - } - /// Encodes a REX prefix as specified, and appends it to the instruction - /// stream. This only modifies the instruction stream if at least one bit - /// is set true, which has a few implications: - /// - /// * The length of the instruction buffer will be modified *if* the - /// resulting REX is meaningful, but will remain the same if it is not. - /// * Deliberately inserting a "meaningless REX" requires explicit usage of - /// 0x40, and cannot be done via this function. - fn rex(self: *Function, arg: struct { b: bool = false, w: bool = false, x: bool = false, r: bool = false }) void { - // From section 2.2.1.2 of the manual, REX is encoded as b0100WRXB. - var value: u8 = 0x40; - if (arg.b) { - value |= 0x1; + fn performReloc(self: *Self, src: usize, reloc: Reloc) !void { + switch (reloc) { + .rel32 => |pos| { + const amt = self.code.items.len - (pos + 4); + const s32_amt = std.math.cast(i32, amt) catch + return self.fail(src, "unable to perform relocation: jump too far", .{}); + mem.writeIntLittle(i32, self.code.items[pos..][0..4], s32_amt); + }, + } } - if (arg.x) { - value |= 0x2; - } - if (arg.r) { - value |= 0x4; - } - if (arg.w) { - value |= 0x8; - } - if (value != 0x40) { - self.code.appendAssumeCapacity(value); - } - } - fn genSetReg(self: *Function, src: usize, comptime arch: Target.Cpu.Arch, reg: Reg(arch), mcv: MCValue) error{ CodegenFail, OutOfMemory }!void { - switch (arch) { - .x86_64 => switch (mcv) { - .dead => unreachable, - .none => unreachable, - .unreach => unreachable, - .compare_flags_unsigned => |op| { - try self.code.ensureCapacity(self.code.items.len + 3); - self.rex(.{ .b = reg.isExtended(), .w = reg.size() == 64 }); - const opcode: u8 = switch (op) { - .gte => 0x93, - .gt => 0x97, - .neq => 0x95, - .lt => 0x92, - .lte => 0x96, - .eq => 0x94, - }; - const id = @as(u8, reg.id() & 0b111); - self.code.appendSliceAssumeCapacity(&[_]u8{ 0x0f, opcode, 0xC0 | id }); + fn genBr(self: *Self, inst: *ir.Inst.Br) !MCValue { + if (!inst.args.operand.ty.hasCodeGenBits()) + return self.brVoid(inst.base.src, inst.args.block); + + const operand = try self.resolveInst(inst.args.operand); + switch (arch) { + else => return self.fail(inst.base.src, "TODO implement br for {}", .{self.target.cpu.arch}), + } + } + + fn genBrVoid(self: *Self, inst: *ir.Inst.BrVoid) !MCValue { + return self.brVoid(inst.base.src, inst.args.block); + } + + fn brVoid(self: *Self, src: usize, block: *ir.Inst.Block) !MCValue { + // Emit a jump with a relocation. It will be patched up after the block ends. + try block.codegen.relocs.ensureCapacity(self.gpa, block.codegen.relocs.items.len + 1); + + switch (arch) { + .i386, .x86_64 => { + // TODO optimization opportunity: figure out when we can emit this as a 2 byte instruction + // which is available if the jump is 127 bytes or less forward. + try self.code.resize(self.code.items.len + 5); + self.code.items[self.code.items.len - 5] = 0xe9; // jmp rel32 + // Leave the jump offset undefined + block.codegen.relocs.appendAssumeCapacity(.{ .rel32 = self.code.items.len - 4 }); }, - .compare_flags_signed => |op| { - return self.fail(src, "TODO set register with compare flags value (signed)", .{}); - }, - .immediate => |x| { - if (reg.size() != 64) { - return self.fail(src, "TODO decide whether to implement non-64-bit loads", .{}); - } - // 32-bit moves zero-extend to 64-bit, so xoring the 32-bit - // register is the fastest way to zero a register. - if (x == 0) { - // The encoding for `xor r32, r32` is `0x31 /r`. - // Section 3.1.1.1 of the Intel x64 Manual states that "/r indicates that the - // ModR/M byte of the instruction contains a register operand and an r/m operand." - // - // R/M bytes are composed of two bits for the mode, then three bits for the register, - // then three bits for the operand. Since we're zeroing a register, the two three-bit - // values will be identical, and the mode is three (the raw register value). - // - // If we're accessing e.g. r8d, we need to use a REX prefix before the actual operation. Since - // this is a 32-bit operation, the W flag is set to zero. X is also zero, as we're not using a SIB. - // Both R and B are set, as we're extending, in effect, the register bits *and* the operand. + else => return self.fail(src, "TODO implement brvoid for {}", .{self.target.cpu.arch}), + } + return .none; + } + + fn genAsm(self: *Self, inst: *ir.Inst.Assembly) !MCValue { + if (!inst.args.is_volatile and inst.base.isUnused()) + return MCValue.dead; + if (arch != .x86_64 and arch != .i386) { + return self.fail(inst.base.src, "TODO implement inline asm support for more architectures", .{}); + } + for (inst.args.inputs) |input, i| { + if (input.len < 3 or input[0] != '{' or input[input.len - 1] != '}') { + return self.fail(inst.base.src, "unrecognized asm input constraint: '{}'", .{input}); + } + const reg_name = input[1 .. input.len - 1]; + const reg = parseRegName(reg_name) orelse + return self.fail(inst.base.src, "unrecognized register: '{}'", .{reg_name}); + const arg = try self.resolveInst(inst.args.args[i]); + try self.genSetReg(inst.base.src, reg, arg); + } + + if (mem.eql(u8, inst.args.asm_source, "syscall")) { + try self.code.appendSlice(&[_]u8{ 0x0f, 0x05 }); + } else { + return self.fail(inst.base.src, "TODO implement support for more x86 assembly instructions", .{}); + } + + if (inst.args.output) |output| { + if (output.len < 4 or output[0] != '=' or output[1] != '{' or output[output.len - 1] != '}') { + return self.fail(inst.base.src, "unrecognized asm output constraint: '{}'", .{output}); + } + const reg_name = output[2 .. output.len - 1]; + const reg = parseRegName(reg_name) orelse + return self.fail(inst.base.src, "unrecognized register: '{}'", .{reg_name}); + return MCValue{ .register = reg }; + } else { + return MCValue.none; + } + } + + /// Encodes a REX prefix as specified, and appends it to the instruction + /// stream. This only modifies the instruction stream if at least one bit + /// is set true, which has a few implications: + /// + /// * The length of the instruction buffer will be modified *if* the + /// resulting REX is meaningful, but will remain the same if it is not. + /// * Deliberately inserting a "meaningless REX" requires explicit usage of + /// 0x40, and cannot be done via this function. + fn rex(self: *Self, arg: struct { b: bool = false, w: bool = false, x: bool = false, r: bool = false }) void { + // From section 2.2.1.2 of the manual, REX is encoded as b0100WRXB. + var value: u8 = 0x40; + if (arg.b) { + value |= 0x1; + } + if (arg.x) { + value |= 0x2; + } + if (arg.r) { + value |= 0x4; + } + if (arg.w) { + value |= 0x8; + } + if (value != 0x40) { + self.code.appendAssumeCapacity(value); + } + } + + fn genSetReg(self: *Self, src: usize, reg: Reg, mcv: MCValue) error{ CodegenFail, OutOfMemory }!void { + switch (arch) { + .x86_64 => switch (mcv) { + .dead => unreachable, + .none => unreachable, + .unreach => unreachable, + .compare_flags_unsigned => |op| { try self.code.ensureCapacity(self.code.items.len + 3); - self.rex(.{ .r = reg.isExtended(), .b = reg.isExtended() }); + self.rex(.{ .b = reg.isExtended(), .w = reg.size() == 64 }); + const opcode: u8 = switch (op) { + .gte => 0x93, + .gt => 0x97, + .neq => 0x95, + .lt => 0x92, + .lte => 0x96, + .eq => 0x94, + }; const id = @as(u8, reg.id() & 0b111); - self.code.appendSliceAssumeCapacity(&[_]u8{ 0x31, 0xC0 | id << 3 | id }); - return; - } - if (x <= std.math.maxInt(u32)) { - // Next best case: if we set the lower four bytes, the upper four will be zeroed. - // - // The encoding for `mov IMM32 -> REG` is (0xB8 + R) IMM. - if (reg.isExtended()) { - // Just as with XORing, we need a REX prefix. This time though, we only - // need the B bit set, as we're extending the opcode's register field, - // and there is no Mod R/M byte. - // - // Thus, we need b01000001, or 0x41. - try self.code.resize(self.code.items.len + 6); - self.code.items[self.code.items.len - 6] = 0x41; - } else { - try self.code.resize(self.code.items.len + 5); + self.code.appendSliceAssumeCapacity(&[_]u8{ 0x0f, opcode, 0xC0 | id }); + }, + .compare_flags_signed => |op| { + return self.fail(src, "TODO set register with compare flags value (signed)", .{}); + }, + .immediate => |x| { + if (reg.size() != 64) { + return self.fail(src, "TODO decide whether to implement non-64-bit loads", .{}); } - self.code.items[self.code.items.len - 5] = 0xB8 | @as(u8, reg.id() & 0b111); - const imm_ptr = self.code.items[self.code.items.len - 4 ..][0..4]; - mem.writeIntLittle(u32, imm_ptr, @intCast(u32, x)); - return; - } - // Worst case: we need to load the 64-bit register with the IMM. GNU's assemblers calls - // this `movabs`, though this is officially just a different variant of the plain `mov` - // instruction. - // - // This encoding is, in fact, the *same* as the one used for 32-bit loads. The only - // difference is that we set REX.W before the instruction, which extends the load to - // 64-bit and uses the full bit-width of the register. - // - // Since we always need a REX here, let's just check if we also need to set REX.B. - // - // In this case, the encoding of the REX byte is 0b0100100B - try self.code.ensureCapacity(self.code.items.len + 10); - self.rex(.{ .w = true, .b = reg.isExtended() }); - self.code.items.len += 9; - self.code.items[self.code.items.len - 9] = 0xB8 | @as(u8, reg.id() & 0b111); - const imm_ptr = self.code.items[self.code.items.len - 8 ..][0..8]; - mem.writeIntLittle(u64, imm_ptr, x); - }, - .embedded_in_code => |code_offset| { - if (reg.size() != 64) { - return self.fail(src, "TODO decide whether to implement non-64-bit loads", .{}); - } - // We need the offset from RIP in a signed i32 twos complement. - // The instruction is 7 bytes long and RIP points to the next instruction. - try self.code.ensureCapacity(self.code.items.len + 7); - // 64-bit LEA is encoded as REX.W 8D /r. If the register is extended, the REX byte is modified, - // but the operation size is unchanged. Since we're using a disp32, we want mode 0 and lower three - // bits as five. - // REX 0x8D 0b00RRR101, where RRR is the lower three bits of the id. - self.rex(.{ .w = true, .b = reg.isExtended() }); - self.code.items.len += 6; - const rip = self.code.items.len; - const big_offset = @intCast(i64, code_offset) - @intCast(i64, rip); - const offset = @intCast(i32, big_offset); - self.code.items[self.code.items.len - 6] = 0x8D; - self.code.items[self.code.items.len - 5] = 0b101 | (@as(u8, reg.id() & 0b111) << 3); - const imm_ptr = self.code.items[self.code.items.len - 4 ..][0..4]; - mem.writeIntLittle(i32, imm_ptr, offset); - }, - .register => |r| { - if (reg.size() != 64) { - return self.fail(src, "TODO decide whether to implement non-64-bit loads", .{}); - } - const src_reg = @intToEnum(Reg(arch), @intCast(u8, r)); - // This is a variant of 8B /r. Since we're using 64-bit moves, we require a REX. - // This is thus three bytes: REX 0x8B R/M. - // If the destination is extended, the R field must be 1. - // If the *source* is extended, the B field must be 1. - // Since the register is being accessed directly, the R/M mode is three. The reg field (the middle - // three bits) contain the destination, and the R/M field (the lower three bits) contain the source. - try self.code.ensureCapacity(self.code.items.len + 3); - self.rex(.{ .w = true, .r = reg.isExtended(), .b = src_reg.isExtended() }); - const R = 0xC0 | (@as(u8, reg.id() & 0b111) << 3) | @as(u8, src_reg.id() & 0b111); - self.code.appendSliceAssumeCapacity(&[_]u8{ 0x8B, R }); - }, - .memory => |x| { - if (reg.size() != 64) { - return self.fail(src, "TODO decide whether to implement non-64-bit loads", .{}); - } - if (x <= std.math.maxInt(u32)) { - // Moving from memory to a register is a variant of `8B /r`. - // Since we're using 64-bit moves, we require a REX. - // This variant also requires a SIB, as it would otherwise be RIP-relative. - // We want mode zero with the lower three bits set to four to indicate an SIB with no other displacement. - // The SIB must be 0x25, to indicate a disp32 with no scaled index. - // 0b00RRR100, where RRR is the lower three bits of the register ID. - // The instruction is thus eight bytes; REX 0x8B 0b00RRR100 0x25 followed by a four-byte disp32. - try self.code.ensureCapacity(self.code.items.len + 8); - self.rex(.{ .w = true, .b = reg.isExtended() }); - self.code.appendSliceAssumeCapacity(&[_]u8{ - 0x8B, - 0x04 | (@as(u8, reg.id() & 0b111) << 3), // R - 0x25, - }); - mem.writeIntLittle(u32, self.code.addManyAsArrayAssumeCapacity(4), @intCast(u32, x)); - } else { - // If this is RAX, we can use a direct load; otherwise, we need to load the address, then indirectly load - // the value. - if (reg.id() == 0) { - // REX.W 0xA1 moffs64* - // moffs64* is a 64-bit offset "relative to segment base", which really just means the - // absolute address for all practical purposes. - try self.code.resize(self.code.items.len + 10); - // REX.W == 0x48 - self.code.items[self.code.items.len - 10] = 0x48; - self.code.items[self.code.items.len - 9] = 0xA1; - const imm_ptr = self.code.items[self.code.items.len - 8 ..][0..8]; - mem.writeIntLittle(u64, imm_ptr, x); - } else { - // This requires two instructions; a move imm as used above, followed by an indirect load using the register - // as the address and the register as the destination. + // 32-bit moves zero-extend to 64-bit, so xoring the 32-bit + // register is the fastest way to zero a register. + if (x == 0) { + // The encoding for `xor r32, r32` is `0x31 /r`. + // Section 3.1.1.1 of the Intel x64 Manual states that "/r indicates that the + // ModR/M byte of the instruction contains a register operand and an r/m operand." // - // This cannot be used if the lower three bits of the id are equal to four or five, as there - // is no way to possibly encode it. This means that RSP, RBP, R12, and R13 cannot be used with - // this instruction. - const id3 = @truncate(u3, reg.id()); - std.debug.assert(id3 != 4 and id3 != 5); - - // Rather than duplicate the logic used for the move, we just use a self-call with a new MCValue. - try self.genSetReg(src, arch, reg, MCValue{ .immediate = x }); - - // Now, the register contains the address of the value to load into it - // Currently, we're only allowing 64-bit registers, so we need the `REX.W 8B /r` variant. - // TODO: determine whether to allow other sized registers, and if so, handle them properly. - // This operation requires three bytes: REX 0x8B R/M + // R/M bytes are composed of two bits for the mode, then three bits for the register, + // then three bits for the operand. Since we're zeroing a register, the two three-bit + // values will be identical, and the mode is three (the raw register value). + // + // If we're accessing e.g. r8d, we need to use a REX prefix before the actual operation. Since + // this is a 32-bit operation, the W flag is set to zero. X is also zero, as we're not using a SIB. + // Both R and B are set, as we're extending, in effect, the register bits *and* the operand. try self.code.ensureCapacity(self.code.items.len + 3); - // For this operation, we want R/M mode *zero* (use register indirectly), and the two register - // values must match. Thus, it's 00ABCABC where ABC is the lower three bits of the register ID. - // - // Furthermore, if this is an extended register, both B and R must be set in the REX byte, as *both* - // register operands need to be marked as extended. - self.rex(.{ .w = true, .b = reg.isExtended(), .r = reg.isExtended() }); - const RM = (@as(u8, reg.id() & 0b111) << 3) | @truncate(u3, reg.id()); - self.code.appendSliceAssumeCapacity(&[_]u8{ 0x8B, RM }); + self.rex(.{ .r = reg.isExtended(), .b = reg.isExtended() }); + const id = @as(u8, reg.id() & 0b111); + self.code.appendSliceAssumeCapacity(&[_]u8{ 0x31, 0xC0 | id << 3 | id }); + return; } - } - }, - .stack_offset => |off| { - return self.fail(src, "TODO implement genSetReg for stack variables", .{}); - }, - }, - else => return self.fail(src, "TODO implement genSetReg for more architectures", .{}), - } - } - - fn genPtrToInt(self: *Function, inst: *ir.Inst.PtrToInt) !MCValue { - // no-op - return self.resolveInst(inst.args.ptr); - } - - fn genBitCast(self: *Function, inst: *ir.Inst.BitCast) !MCValue { - const operand = try self.resolveInst(inst.args.operand); - return operand; - } - - fn resolveInst(self: *Function, inst: *ir.Inst) !MCValue { - // Constants have static lifetimes, so they are always memoized in the outer most table. - if (inst.cast(ir.Inst.Constant)) |const_inst| { - const branch = &self.branch_stack.items[0]; - const gop = try branch.inst_table.getOrPut(self.gpa, inst); - if (!gop.found_existing) { - gop.entry.value = try self.genTypedValue(inst.src, .{ .ty = inst.ty, .val = const_inst.val }); - } - return gop.entry.value; - } - - // Treat each stack item as a "layer" on top of the previous one. - var i: usize = self.branch_stack.items.len; - while (true) { - i -= 1; - if (self.branch_stack.items[i].inst_table.get(inst)) |mcv| { - return mcv; - } - } - } - - fn copyToNewRegister(self: *Function, inst: *ir.Inst) !MCValue { - return self.fail(inst.src, "TODO implement copyToNewRegister", .{}); - } - - /// If the MCValue is an immediate, and it does not fit within this type, - /// we put it in a register. - /// A potential opportunity for future optimization here would be keeping track - /// of the fact that the instruction is available both as an immediate - /// and as a register. - fn limitImmediateType(self: *Function, inst: *ir.Inst, comptime T: type) !MCValue { - const mcv = try self.resolveInst(inst); - const ti = @typeInfo(T).Int; - switch (mcv) { - .immediate => |imm| { - // This immediate is unsigned. - const U = @Type(.{ - .Int = .{ - .bits = ti.bits - @boolToInt(ti.is_signed), - .is_signed = false, + if (x <= std.math.maxInt(u32)) { + // Next best case: if we set the lower four bytes, the upper four will be zeroed. + // + // The encoding for `mov IMM32 -> REG` is (0xB8 + R) IMM. + if (reg.isExtended()) { + // Just as with XORing, we need a REX prefix. This time though, we only + // need the B bit set, as we're extending the opcode's register field, + // and there is no Mod R/M byte. + // + // Thus, we need b01000001, or 0x41. + try self.code.resize(self.code.items.len + 6); + self.code.items[self.code.items.len - 6] = 0x41; + } else { + try self.code.resize(self.code.items.len + 5); + } + self.code.items[self.code.items.len - 5] = 0xB8 | @as(u8, reg.id() & 0b111); + const imm_ptr = self.code.items[self.code.items.len - 4 ..][0..4]; + mem.writeIntLittle(u32, imm_ptr, @intCast(u32, x)); + return; + } + // Worst case: we need to load the 64-bit register with the IMM. GNU's assemblers calls + // this `movabs`, though this is officially just a different variant of the plain `mov` + // instruction. + // + // This encoding is, in fact, the *same* as the one used for 32-bit loads. The only + // difference is that we set REX.W before the instruction, which extends the load to + // 64-bit and uses the full bit-width of the register. + // + // Since we always need a REX here, let's just check if we also need to set REX.B. + // + // In this case, the encoding of the REX byte is 0b0100100B + try self.code.ensureCapacity(self.code.items.len + 10); + self.rex(.{ .w = true, .b = reg.isExtended() }); + self.code.items.len += 9; + self.code.items[self.code.items.len - 9] = 0xB8 | @as(u8, reg.id() & 0b111); + const imm_ptr = self.code.items[self.code.items.len - 8 ..][0..8]; + mem.writeIntLittle(u64, imm_ptr, x); }, - }); - if (imm >= std.math.maxInt(U)) { - return self.copyToNewRegister(inst); - } - }, - else => {}, - } - return mcv; - } - - fn genTypedValue(self: *Function, src: usize, typed_value: TypedValue) !MCValue { - const ptr_bits = self.target.cpu.arch.ptrBitWidth(); - const ptr_bytes: u64 = @divExact(ptr_bits, 8); - switch (typed_value.ty.zigTypeTag()) { - .Pointer => { - if (typed_value.val.cast(Value.Payload.DeclRef)) |payload| { - const got = &self.bin_file.program_headers.items[self.bin_file.phdr_got_index.?]; - const decl = payload.decl; - const got_addr = got.p_vaddr + decl.link.offset_table_index * ptr_bytes; - return MCValue{ .memory = got_addr }; - } - return self.fail(src, "TODO codegen more kinds of const pointers", .{}); - }, - .Int => { - const info = typed_value.ty.intInfo(self.target.*); - if (info.bits > ptr_bits or info.signed) { - return self.fail(src, "TODO const int bigger than ptr and signed int", .{}); - } - return MCValue{ .immediate = typed_value.val.toUnsignedInt() }; - }, - .Bool => { - return MCValue{ .immediate = @boolToInt(typed_value.val.toBool()) }; - }, - .ComptimeInt => unreachable, // semantic analysis prevents this - .ComptimeFloat => unreachable, // semantic analysis prevents this - else => return self.fail(src, "TODO implement const of type '{}'", .{typed_value.ty}), - } - } - - fn resolveParameters( - self: *Function, - src: usize, - cc: std.builtin.CallingConvention, - param_types: []const Type, - results: []MCValue, - ) !u32 { - switch (self.target.cpu.arch) { - .x86_64 => { - switch (cc) { - .Naked => { - assert(results.len == 0); - return 0; + .embedded_in_code => |code_offset| { + if (reg.size() != 64) { + return self.fail(src, "TODO decide whether to implement non-64-bit loads", .{}); + } + // We need the offset from RIP in a signed i32 twos complement. + // The instruction is 7 bytes long and RIP points to the next instruction. + try self.code.ensureCapacity(self.code.items.len + 7); + // 64-bit LEA is encoded as REX.W 8D /r. If the register is extended, the REX byte is modified, + // but the operation size is unchanged. Since we're using a disp32, we want mode 0 and lower three + // bits as five. + // REX 0x8D 0b00RRR101, where RRR is the lower three bits of the id. + self.rex(.{ .w = true, .b = reg.isExtended() }); + self.code.items.len += 6; + const rip = self.code.items.len; + const big_offset = @intCast(i64, code_offset) - @intCast(i64, rip); + const offset = @intCast(i32, big_offset); + self.code.items[self.code.items.len - 6] = 0x8D; + self.code.items[self.code.items.len - 5] = 0b101 | (@as(u8, reg.id() & 0b111) << 3); + const imm_ptr = self.code.items[self.code.items.len - 4 ..][0..4]; + mem.writeIntLittle(i32, imm_ptr, offset); }, - .Unspecified, .C => { - var next_int_reg: usize = 0; - var next_stack_offset: u32 = 0; + .register => |src_reg| { + if (reg.size() != 64) { + return self.fail(src, "TODO decide whether to implement non-64-bit loads", .{}); + } + // This is a variant of 8B /r. Since we're using 64-bit moves, we require a REX. + // This is thus three bytes: REX 0x8B R/M. + // If the destination is extended, the R field must be 1. + // If the *source* is extended, the B field must be 1. + // Since the register is being accessed directly, the R/M mode is three. The reg field (the middle + // three bits) contain the destination, and the R/M field (the lower three bits) contain the source. + try self.code.ensureCapacity(self.code.items.len + 3); + self.rex(.{ .w = true, .r = reg.isExtended(), .b = src_reg.isExtended() }); + const R = 0xC0 | (@as(u8, reg.id() & 0b111) << 3) | @as(u8, src_reg.id() & 0b111); + self.code.appendSliceAssumeCapacity(&[_]u8{ 0x8B, R }); + }, + .memory => |x| { + if (reg.size() != 64) { + return self.fail(src, "TODO decide whether to implement non-64-bit loads", .{}); + } + if (x <= std.math.maxInt(u32)) { + // Moving from memory to a register is a variant of `8B /r`. + // Since we're using 64-bit moves, we require a REX. + // This variant also requires a SIB, as it would otherwise be RIP-relative. + // We want mode zero with the lower three bits set to four to indicate an SIB with no other displacement. + // The SIB must be 0x25, to indicate a disp32 with no scaled index. + // 0b00RRR100, where RRR is the lower three bits of the register ID. + // The instruction is thus eight bytes; REX 0x8B 0b00RRR100 0x25 followed by a four-byte disp32. + try self.code.ensureCapacity(self.code.items.len + 8); + self.rex(.{ .w = true, .b = reg.isExtended() }); + self.code.appendSliceAssumeCapacity(&[_]u8{ + 0x8B, + 0x04 | (@as(u8, reg.id() & 0b111) << 3), // R + 0x25, + }); + mem.writeIntLittle(u32, self.code.addManyAsArrayAssumeCapacity(4), @intCast(u32, x)); + } else { + // If this is RAX, we can use a direct load; otherwise, we need to load the address, then indirectly load + // the value. + if (reg.id() == 0) { + // REX.W 0xA1 moffs64* + // moffs64* is a 64-bit offset "relative to segment base", which really just means the + // absolute address for all practical purposes. + try self.code.resize(self.code.items.len + 10); + // REX.W == 0x48 + self.code.items[self.code.items.len - 10] = 0x48; + self.code.items[self.code.items.len - 9] = 0xA1; + const imm_ptr = self.code.items[self.code.items.len - 8 ..][0..8]; + mem.writeIntLittle(u64, imm_ptr, x); + } else { + // This requires two instructions; a move imm as used above, followed by an indirect load using the register + // as the address and the register as the destination. + // + // This cannot be used if the lower three bits of the id are equal to four or five, as there + // is no way to possibly encode it. This means that RSP, RBP, R12, and R13 cannot be used with + // this instruction. + const id3 = @truncate(u3, reg.id()); + std.debug.assert(id3 != 4 and id3 != 5); - const integer_registers = [_]Reg(.x86_64){ .rdi, .rsi, .rdx, .rcx, .r8, .r9 }; - for (param_types) |ty, i| { - switch (ty.zigTypeTag()) { - .Bool, .Int => { - if (next_int_reg >= integer_registers.len) { - results[i] = .{ .stack_offset = next_stack_offset }; - next_stack_offset += @intCast(u32, ty.abiSize(self.target.*)); - } else { - results[i] = .{ .register = @enumToInt(integer_registers[next_int_reg]) }; - next_int_reg += 1; - } - }, - else => return self.fail(src, "TODO implement function parameters of type {}", .{@tagName(ty.zigTypeTag())}), + // Rather than duplicate the logic used for the move, we just use a self-call with a new MCValue. + try self.genSetReg(src, reg, MCValue{ .immediate = x }); + + // Now, the register contains the address of the value to load into it + // Currently, we're only allowing 64-bit registers, so we need the `REX.W 8B /r` variant. + // TODO: determine whether to allow other sized registers, and if so, handle them properly. + // This operation requires three bytes: REX 0x8B R/M + try self.code.ensureCapacity(self.code.items.len + 3); + // For this operation, we want R/M mode *zero* (use register indirectly), and the two register + // values must match. Thus, it's 00ABCABC where ABC is the lower three bits of the register ID. + // + // Furthermore, if this is an extended register, both B and R must be set in the REX byte, as *both* + // register operands need to be marked as extended. + self.rex(.{ .w = true, .b = reg.isExtended(), .r = reg.isExtended() }); + const RM = (@as(u8, reg.id() & 0b111) << 3) | @truncate(u3, reg.id()); + self.code.appendSliceAssumeCapacity(&[_]u8{ 0x8B, RM }); } } - return next_stack_offset; }, - else => return self.fail(src, "TODO implement function parameters for {}", .{cc}), - } - }, - else => return self.fail(src, "TODO implement C ABI support for {}", .{self.target.cpu.arch}), + .stack_offset => |off| { + return self.fail(src, "TODO implement genSetReg for stack variables", .{}); + }, + }, + else => return self.fail(src, "TODO implement genSetReg for more architectures", .{}), + } } - } - fn fail(self: *Function, src: usize, comptime format: []const u8, args: anytype) error{ CodegenFail, OutOfMemory } { - @setCold(true); - assert(self.err_msg == null); - self.err_msg = try ErrorMsg.create(self.bin_file.allocator, src, format, args); - return error.CodegenFail; - } -}; + fn genPtrToInt(self: *Self, inst: *ir.Inst.PtrToInt) !MCValue { + // no-op + return self.resolveInst(inst.args.ptr); + } -const x86_64 = @import("codegen/x86_64.zig"); -const x86 = @import("codegen/x86.zig"); + fn genBitCast(self: *Self, inst: *ir.Inst.BitCast) !MCValue { + const operand = try self.resolveInst(inst.args.operand); + return operand; + } -fn Reg(comptime arch: Target.Cpu.Arch) type { - return switch (arch) { - .i386 => x86.Register, - .x86_64 => x86_64.Register, - else => @compileError("TODO add more register enums"), + fn resolveInst(self: *Self, inst: *ir.Inst) !MCValue { + // Constants have static lifetimes, so they are always memoized in the outer most table. + if (inst.cast(ir.Inst.Constant)) |const_inst| { + const branch = &self.branch_stack.items[0]; + const gop = try branch.inst_table.getOrPut(self.gpa, inst); + if (!gop.found_existing) { + gop.entry.value = try self.genTypedValue(inst.src, .{ .ty = inst.ty, .val = const_inst.val }); + } + return gop.entry.value; + } + + // Treat each stack item as a "layer" on top of the previous one. + var i: usize = self.branch_stack.items.len; + while (true) { + i -= 1; + if (self.branch_stack.items[i].inst_table.get(inst)) |mcv| { + return mcv; + } + } + } + + fn moveToNewRegister(self: *Self, inst: *ir.Inst) !MCValue { + const branch = &self.branch_stack.items[self.branch_stack.items.len - 1]; + return self.fail(inst.src, "TODO implement moveToNewRegister", .{}); + } + + /// If the MCValue is an immediate, and it does not fit within this type, + /// we put it in a register. + /// A potential opportunity for future optimization here would be keeping track + /// of the fact that the instruction is available both as an immediate + /// and as a register. + fn limitImmediateType(self: *Self, inst: *ir.Inst, comptime T: type) !MCValue { + const mcv = try self.resolveInst(inst); + const ti = @typeInfo(T).Int; + switch (mcv) { + .immediate => |imm| { + // This immediate is unsigned. + const U = @Type(.{ + .Int = .{ + .bits = ti.bits - @boolToInt(ti.is_signed), + .is_signed = false, + }, + }); + if (imm >= std.math.maxInt(U)) { + return self.moveToNewRegister(inst); + } + }, + else => {}, + } + return mcv; + } + + fn genTypedValue(self: *Self, src: usize, typed_value: TypedValue) !MCValue { + const ptr_bits = self.target.cpu.arch.ptrBitWidth(); + const ptr_bytes: u64 = @divExact(ptr_bits, 8); + switch (typed_value.ty.zigTypeTag()) { + .Pointer => { + if (typed_value.val.cast(Value.Payload.DeclRef)) |payload| { + const got = &self.bin_file.program_headers.items[self.bin_file.phdr_got_index.?]; + const decl = payload.decl; + const got_addr = got.p_vaddr + decl.link.offset_table_index * ptr_bytes; + return MCValue{ .memory = got_addr }; + } + return self.fail(src, "TODO codegen more kinds of const pointers", .{}); + }, + .Int => { + const info = typed_value.ty.intInfo(self.target.*); + if (info.bits > ptr_bits or info.signed) { + return self.fail(src, "TODO const int bigger than ptr and signed int", .{}); + } + return MCValue{ .immediate = typed_value.val.toUnsignedInt() }; + }, + .Bool => { + return MCValue{ .immediate = @boolToInt(typed_value.val.toBool()) }; + }, + .ComptimeInt => unreachable, // semantic analysis prevents this + .ComptimeFloat => unreachable, // semantic analysis prevents this + else => return self.fail(src, "TODO implement const of type '{}'", .{typed_value.ty}), + } + } + + fn resolveParameters( + self: *Self, + src: usize, + cc: std.builtin.CallingConvention, + param_types: []const Type, + results: []MCValue, + ) !u32 { + switch (arch) { + .x86_64 => { + switch (cc) { + .Naked => { + assert(results.len == 0); + return 0; + }, + .Unspecified, .C => { + var next_int_reg: usize = 0; + var next_stack_offset: u32 = 0; + + const integer_registers = [_]Reg{ .rdi, .rsi, .rdx, .rcx, .r8, .r9 }; + for (param_types) |ty, i| { + switch (ty.zigTypeTag()) { + .Bool, .Int => { + if (next_int_reg >= integer_registers.len) { + results[i] = .{ .stack_offset = next_stack_offset }; + next_stack_offset += @intCast(u32, ty.abiSize(self.target.*)); + } else { + results[i] = .{ .register = integer_registers[next_int_reg] }; + next_int_reg += 1; + } + }, + else => return self.fail(src, "TODO implement function parameters of type {}", .{@tagName(ty.zigTypeTag())}), + } + } + return next_stack_offset; + }, + else => return self.fail(src, "TODO implement function parameters for {}", .{cc}), + } + }, + else => return self.fail(src, "TODO implement C ABI support for {}", .{self.target.cpu.arch}), + } + } + + fn fail(self: *Self, src: usize, comptime format: []const u8, args: anytype) error{ CodegenFail, OutOfMemory } { + @setCold(true); + assert(self.err_msg == null); + self.err_msg = try ErrorMsg.create(self.bin_file.allocator, src, format, args); + return error.CodegenFail; + } + + const Reg = switch (arch) { + .i386 => x86.Register, + .x86_64 => x86_64.Register, + else => enum { dummy }, + }; + + fn parseRegName(name: []const u8) ?Reg { + return std.meta.stringToEnum(Reg, name); + } }; } - -fn parseRegName(comptime arch: Target.Cpu.Arch, name: []const u8) ?Reg(arch) { - return std.meta.stringToEnum(Reg(arch), name); -} diff --git a/src-self-hosted/codegen/x86_64.zig b/src-self-hosted/codegen/x86_64.zig index ddcbd5320e..df8895275c 100644 --- a/src-self-hosted/codegen/x86_64.zig +++ b/src-self-hosted/codegen/x86_64.zig @@ -67,4 +67,7 @@ pub const Register = enum(u8) { } }; -// zig fmt: on \ No newline at end of file +// zig fmt: on + +/// These registers belong to the called function. +pub const callee_preserved = [_]Register{ rax, rcx, rdx, rsi, rdi, r8, r9, r10, r11 }; From 896472c20e33c81a010b21a6f900e721a2cf0839 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Fri, 17 Jul 2020 15:51:15 -0700 Subject: [PATCH 2/4] stage2: implement register copying --- src-self-hosted/codegen.zig | 106 +++++++++++++++++++++-------- src-self-hosted/codegen/x86.zig | 14 ++++ src-self-hosted/codegen/x86_64.zig | 25 +++++-- test/stage2/compare_output.zig | 15 ++-- 4 files changed, 124 insertions(+), 36 deletions(-) diff --git a/src-self-hosted/codegen.zig b/src-self-hosted/codegen.zig index c259eb2595..6e1686fd3e 100644 --- a/src-self-hosted/codegen.zig +++ b/src-self-hosted/codegen.zig @@ -11,8 +11,6 @@ const ErrorMsg = Module.ErrorMsg; const Target = std.Target; const Allocator = mem.Allocator; const trace = @import("tracy.zig").trace; -const x86_64 = @import("codegen/x86_64.zig"); -const x86 = @import("codegen/x86.zig"); /// The codegen-related data that is stored in `ir.Inst.Block` instructions. pub const BlockData = struct { @@ -232,7 +230,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { /// The constant was emitted into the code, at this offset. embedded_in_code: usize, /// The value is in a target-specific register. - register: Reg, + register: Register, /// The value is in memory at a hard-coded address. memory: u64, /// The value is one of the stack variables. @@ -280,9 +278,8 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { const Branch = struct { inst_table: std.AutoHashMapUnmanaged(*ir.Inst, MCValue) = .{}, - - /// The key is an enum value of an arch-specific register. - registers: std.AutoHashMapUnmanaged(usize, RegisterAllocation) = .{}, + registers: std.AutoHashMapUnmanaged(Register, RegisterAllocation) = .{}, + free_registers: FreeRegInt = std.math.maxInt(FreeRegInt), /// Maps offset to what is stored there. stack: std.AutoHashMapUnmanaged(usize, StackAllocation) = .{}, @@ -292,6 +289,20 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { /// to place a new stack allocation, it goes here, and then bumps `max_end_stack`. next_stack_offset: u32 = 0, + fn markRegUsed(self: *Branch, reg: Register) void { + const index = reg.allocIndex() orelse return; + const ShiftInt = std.math.Log2Int(FreeRegInt); + const shift = @intCast(ShiftInt, index); + self.free_registers &= ~(@as(FreeRegInt, 1) << shift); + } + + fn markRegFree(self: *Branch, reg: Register) void { + const index = reg.allocIndex() orelse return; + const ShiftInt = std.math.Log2Int(FreeRegInt); + const shift = @intCast(ShiftInt, index); + self.free_registers |= @as(FreeRegInt, 1) << shift; + } + fn deinit(self: *Branch, gpa: *Allocator) void { self.inst_table.deinit(gpa); self.registers.deinit(gpa); @@ -516,7 +527,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { // Both operands cannot be memory. src_inst = op_rhs; if (lhs.isMemory() and rhs.isMemory()) { - dst_mcv = try self.moveToNewRegister(op_lhs); + dst_mcv = try self.copyToNewRegister(op_lhs); src_mcv = rhs; } else { dst_mcv = lhs; @@ -527,7 +538,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { // Both operands cannot be memory. src_inst = op_lhs; if (lhs.isMemory() and rhs.isMemory()) { - dst_mcv = try self.moveToNewRegister(op_rhs); + dst_mcv = try self.copyToNewRegister(op_rhs); src_mcv = lhs; } else { dst_mcv = rhs; @@ -535,11 +546,11 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { } } else { if (lhs.isMemory()) { - dst_mcv = try self.moveToNewRegister(op_lhs); + dst_mcv = try self.copyToNewRegister(op_lhs); src_mcv = rhs; src_inst = op_rhs; } else { - dst_mcv = try self.moveToNewRegister(op_rhs); + dst_mcv = try self.copyToNewRegister(op_rhs); src_mcv = lhs; src_inst = op_lhs; } @@ -552,7 +563,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { switch (src_mcv) { .immediate => |imm| { if (imm > std.math.maxInt(u31)) { - src_mcv = try self.moveToNewRegister(src_inst); + src_mcv = try self.copyToNewRegister(src_inst); } }, else => {}, @@ -614,9 +625,26 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { } fn genArg(self: *Self, inst: *ir.Inst.Arg) !MCValue { - const i = self.arg_index; + if (FreeRegInt == u0) { + return self.fail(inst.base.src, "TODO implement Register enum for {}", .{self.target.cpu.arch}); + } + if (inst.base.isUnused()) + return MCValue.dead; + + const branch = &self.branch_stack.items[self.branch_stack.items.len - 1]; + try branch.registers.ensureCapacity(self.gpa, branch.registers.items().len + 1); + + const result = self.args[self.arg_index]; self.arg_index += 1; - return self.args[i]; + + switch (result) { + .register => |reg| { + branch.registers.putAssumeCapacityNoClobber(reg, .{ .inst = &inst.base }); + branch.markRegUsed(reg); + }, + else => {}, + } + return result; } fn genBreakpoint(self: *Self, src: usize) !MCValue { @@ -737,7 +765,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { // Either one, but not both, can be a memory operand. // Source operand can be an immediate, 8 bits or 32 bits. const dst_mcv = if (lhs.isImmediate() or (lhs.isMemory() and rhs.isMemory())) - try self.moveToNewRegister(inst.args.lhs) + try self.copyToNewRegister(inst.args.lhs) else lhs; // This instruction supports only signed 32-bit immediates at most. @@ -949,7 +977,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { } } - fn genSetReg(self: *Self, src: usize, reg: Reg, mcv: MCValue) error{ CodegenFail, OutOfMemory }!void { + fn genSetReg(self: *Self, src: usize, reg: Register, mcv: MCValue) error{ CodegenFail, OutOfMemory }!void { switch (arch) { .x86_64 => switch (mcv) { .dead => unreachable, @@ -1171,9 +1199,22 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { } } - fn moveToNewRegister(self: *Self, inst: *ir.Inst) !MCValue { + /// Does not "move" the instruction. + fn copyToNewRegister(self: *Self, inst: *ir.Inst) !MCValue { const branch = &self.branch_stack.items[self.branch_stack.items.len - 1]; - return self.fail(inst.src, "TODO implement moveToNewRegister", .{}); + try branch.registers.ensureCapacity(self.gpa, branch.registers.items().len + 1); + try branch.inst_table.ensureCapacity(self.gpa, branch.inst_table.items().len + 1); + + const free_index = @ctz(FreeRegInt, branch.free_registers); + if (free_index >= callee_preserved_regs.len) + return self.fail(inst.src, "TODO implement spilling register to stack", .{}); + branch.free_registers &= ~(@as(FreeRegInt, 1) << free_index); + const reg = callee_preserved_regs[free_index]; + branch.registers.putAssumeCapacityNoClobber(reg, .{ .inst = inst }); + const old_mcv = branch.inst_table.get(inst).?; + const new_mcv: MCValue = .{ .register = reg }; + try self.genSetReg(inst.src, reg, old_mcv); + return new_mcv; } /// If the MCValue is an immediate, and it does not fit within this type, @@ -1194,7 +1235,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { }, }); if (imm >= std.math.maxInt(U)) { - return self.moveToNewRegister(inst); + return self.copyToNewRegister(inst); } }, else => {}, @@ -1249,15 +1290,14 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { var next_int_reg: usize = 0; var next_stack_offset: u32 = 0; - const integer_registers = [_]Reg{ .rdi, .rsi, .rdx, .rcx, .r8, .r9 }; for (param_types) |ty, i| { switch (ty.zigTypeTag()) { .Bool, .Int => { - if (next_int_reg >= integer_registers.len) { + if (next_int_reg >= c_abi_int_param_regs.len) { results[i] = .{ .stack_offset = next_stack_offset }; next_stack_offset += @intCast(u32, ty.abiSize(self.target.*)); } else { - results[i] = .{ .register = integer_registers[next_int_reg] }; + results[i] = .{ .register = c_abi_int_param_regs[next_int_reg] }; next_int_reg += 1; } }, @@ -1280,14 +1320,26 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { return error.CodegenFail; } - const Reg = switch (arch) { - .i386 => x86.Register, - .x86_64 => x86_64.Register, - else => enum { dummy }, + usingnamespace switch (arch) { + .i386 => @import("codegen/x86.zig"), + .x86_64 => @import("codegen/x86_64.zig"), + else => struct { + pub const Register = enum { + dummy, + + pub fn allocIndex(self: Register) ?u4 { + return null; + } + }; + pub const callee_preserved_regs = [_]Register{}; + }, }; - fn parseRegName(name: []const u8) ?Reg { - return std.meta.stringToEnum(Reg, name); + /// An integer whose bits represent all the registers and whether they are free. + const FreeRegInt = @Type(.{ .Int = .{ .is_signed = false, .bits = callee_preserved_regs.len } }); + + fn parseRegName(name: []const u8) ?Register { + return std.meta.stringToEnum(Register, name); } }; } diff --git a/src-self-hosted/codegen/x86.zig b/src-self-hosted/codegen/x86.zig index da0f4e722a..e0d0848bf5 100644 --- a/src-self-hosted/codegen/x86.zig +++ b/src-self-hosted/codegen/x86.zig @@ -25,6 +25,20 @@ pub const Register = enum(u8) { pub fn id(self: @This()) u3 { return @truncate(u3, @enumToInt(self)); } + + /// Returns the index into `callee_preserved_regs`. + pub fn allocIndex(self: Register) ?u4 { + return switch (self) { + .eax, .ax, .al => 0, + .ecx, .cx, .cl => 1, + .edx, .dx, .dl => 2, + .esi, .si => 3, + .edi, .di => 4, + else => null, + }; + } }; // zig fmt: on + +pub const callee_preserved_regs = [_]Register{ .eax, .ecx, .edx, .esi, .edi }; diff --git a/src-self-hosted/codegen/x86_64.zig b/src-self-hosted/codegen/x86_64.zig index df8895275c..2c0937d28d 100644 --- a/src-self-hosted/codegen/x86_64.zig +++ b/src-self-hosted/codegen/x86_64.zig @@ -38,7 +38,7 @@ pub const Register = enum(u8) { r8b, r9b, r10b, r11b, r12b, r13b, r14b, r15b, /// Returns the bit-width of the register. - pub fn size(self: @This()) u7 { + pub fn size(self: Register) u7 { return switch (@enumToInt(self)) { 0...15 => 64, 16...31 => 32, @@ -53,7 +53,7 @@ pub const Register = enum(u8) { /// other variant of access to those registers, such as r8b, r15d, and so /// on. This is needed because access to these registers requires special /// handling via the REX prefix, via the B or R bits, depending on context. - pub fn isExtended(self: @This()) bool { + pub fn isExtended(self: Register) bool { return @enumToInt(self) & 0x08 != 0; } @@ -62,12 +62,29 @@ pub const Register = enum(u8) { /// an instruction (@see isExtended), and requires special handling. The /// lower three bits are often embedded directly in instructions (such as /// the B8 variant of moves), or used in R/M bytes. - pub fn id(self: @This()) u4 { + pub fn id(self: Register) u4 { return @truncate(u4, @enumToInt(self)); } + + /// Returns the index into `callee_preserved_regs`. + pub fn allocIndex(self: Register) ?u4 { + return switch (self) { + .rax, .eax, .ax, .al => 0, + .rcx, .ecx, .cx, .cl => 1, + .rdx, .edx, .dx, .dl => 2, + .rsi, .esi, .si => 3, + .rdi, .edi, .di => 4, + .r8, .r8d, .r8w, .r8b => 5, + .r9, .r9d, .r9w, .r9b => 6, + .r10, .r10d, .r10w, .r10b => 7, + .r11, .r11d, .r11w, .r11b => 8, + else => null, + }; + } }; // zig fmt: on /// These registers belong to the called function. -pub const callee_preserved = [_]Register{ rax, rcx, rdx, rsi, rdi, r8, r9, r10, r11 }; +pub const callee_preserved_regs = [_]Register{ .rax, .rcx, .rdx, .rsi, .rdi, .r8, .r9, .r10, .r11 }; +pub const c_abi_int_param_regs = [_]Register{ .rdi, .rsi, .rdx, .rcx, .r8, .r9 }; diff --git a/test/stage2/compare_output.zig b/test/stage2/compare_output.zig index c4f85bfba4..4c8d23f3c6 100644 --- a/test/stage2/compare_output.zig +++ b/test/stage2/compare_output.zig @@ -169,9 +169,8 @@ pub fn addCases(ctx: *TestContext) !void { , "", ); - } - { - var case = ctx.exe("assert function", linux_x64); + + // Tests the assert() function. case.addCompareOutput( \\export fn _start() noreturn { \\ add(3, 4); @@ -199,15 +198,21 @@ pub fn addCases(ctx: *TestContext) !void { , "", ); + + // Tests copying a register. For the `c = a + b`, it has to + // preserve both a and b, because they are both used later. case.addCompareOutput( \\export fn _start() noreturn { - \\ add(100, 200); + \\ add(3, 4); \\ \\ exit(); \\} \\ \\fn add(a: u32, b: u32) void { - \\ assert(a + b == 300); + \\ const c = a + b; // 7 + \\ const d = a + c; // 10 + \\ const e = d + b; // 14 + \\ assert(e == 14); \\} \\ \\pub fn assert(ok: bool) void { From a8065a05a5bc3df4036f1d7abe0928901cf7f5df Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Fri, 17 Jul 2020 17:03:24 -0700 Subject: [PATCH 3/4] stage2: fix implementation of liveness operandDies() --- src-self-hosted/codegen.zig | 2 ++ src-self-hosted/ir.zig | 2 +- test/stage2/compare_output.zig | 36 ++++++++++++++++++++++++++++++++++ 3 files changed, 39 insertions(+), 1 deletion(-) diff --git a/src-self-hosted/codegen.zig b/src-self-hosted/codegen.zig index 6e1686fd3e..2cc471a07d 100644 --- a/src-self-hosted/codegen.zig +++ b/src-self-hosted/codegen.zig @@ -407,6 +407,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { for (body.instructions) |inst| { const new_inst = try self.genFuncInst(inst); try inst_table.putNoClobber(self.gpa, inst, new_inst); + // TODO process operand deaths } } @@ -1194,6 +1195,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { while (true) { i -= 1; if (self.branch_stack.items[i].inst_table.get(inst)) |mcv| { + assert(mcv != .dead); return mcv; } } diff --git a/src-self-hosted/ir.zig b/src-self-hosted/ir.zig index a150957de0..9902bd70aa 100644 --- a/src-self-hosted/ir.zig +++ b/src-self-hosted/ir.zig @@ -38,7 +38,7 @@ pub const Inst = struct { pub fn operandDies(self: Inst, index: DeathsBitIndex) bool { assert(index < deaths_bits); - return @truncate(u1, self.deaths << index) != 0; + return @truncate(u1, self.deaths >> index) != 0; } pub fn specialOperandDeaths(self: Inst) bool { diff --git a/test/stage2/compare_output.zig b/test/stage2/compare_output.zig index 4c8d23f3c6..6a6772f935 100644 --- a/test/stage2/compare_output.zig +++ b/test/stage2/compare_output.zig @@ -231,5 +231,41 @@ pub fn addCases(ctx: *TestContext) !void { , "", ); + + // More stress on the liveness detection. + case.addCompareOutput( + \\export fn _start() noreturn { + \\ add(3, 4); + \\ + \\ exit(); + \\} + \\ + \\fn add(a: u32, b: u32) void { + \\ const c = a + b; // 7 + \\ const d = a + c; // 10 + \\ const e = d + b; // 14 + \\ const f = d + e; // 24 + \\ const g = e + f; // 38 + \\ const h = f + g; // 62 + \\ const i = g + h; // 100 + \\ assert(i == 100); + \\} + \\ + \\pub fn assert(ok: bool) void { + \\ if (!ok) unreachable; // assertion failure + \\} + \\ + \\fn exit() noreturn { + \\ asm volatile ("syscall" + \\ : + \\ : [number] "{rax}" (231), + \\ [arg1] "{rdi}" (0) + \\ : "rcx", "r11", "memory" + \\ ); + \\ unreachable; + \\} + , + "", + ); } } From ef91b11295a549a8173c488d9fd5b3f69b419829 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Mon, 20 Jul 2020 13:11:07 -0700 Subject: [PATCH 4/4] stage2: register allocator processes operand deaths also rework the IR data structures --- src-self-hosted/Module.zig | 223 ++++++++++++++------ src-self-hosted/astgen.zig | 8 +- src-self-hosted/codegen.zig | 150 ++++++++------ src-self-hosted/codegen/c.zig | 21 +- src-self-hosted/ir.zig | 361 +++++++++++++++++++++------------ src-self-hosted/liveness.zig | 90 +++----- src-self-hosted/zir.zig | 345 ++++++++++++++----------------- test/stage2/compare_output.zig | 37 ++++ 8 files changed, 708 insertions(+), 527 deletions(-) diff --git a/src-self-hosted/Module.zig b/src-self-hosted/Module.zig index 72e5f6cd63..25136b5289 100644 --- a/src-self-hosted/Module.zig +++ b/src-self-hosted/Module.zig @@ -1349,8 +1349,8 @@ fn astGenAndAnalyzeDecl(self: *Module, decl: *Decl) !bool { fn analyzeBodyValueAsType(self: *Module, block_scope: *Scope.Block, body: zir.Module.Body) !Type { try self.analyzeBody(&block_scope.base, body); for (block_scope.instructions.items) |inst| { - if (inst.cast(Inst.Ret)) |ret| { - const val = try self.resolveConstValue(&block_scope.base, ret.args.operand); + if (inst.castTag(.ret)) |ret| { + const val = try self.resolveConstValue(&block_scope.base, ret.operand); return val.toType(); } else { return self.fail(&block_scope.base, inst.src, "unable to resolve comptime value", .{}); @@ -1938,16 +1938,132 @@ fn analyzeExport(self: *Module, scope: *Scope, src: usize, symbol_name: []const }; } -fn addNewInstArgs( +fn addNoOp( self: *Module, block: *Scope.Block, src: usize, ty: Type, - comptime T: type, - args: Inst.Args(T), + comptime tag: Inst.Tag, ) !*Inst { - const inst = try self.addNewInst(block, src, ty, T); - inst.args = args; + const inst = try block.arena.create(tag.Type()); + inst.* = .{ + .base = .{ + .tag = tag, + .ty = ty, + .src = src, + }, + }; + try block.instructions.append(self.gpa, &inst.base); + return &inst.base; +} + +fn addUnOp( + self: *Module, + block: *Scope.Block, + src: usize, + ty: Type, + tag: Inst.Tag, + operand: *Inst, +) !*Inst { + const inst = try block.arena.create(Inst.UnOp); + inst.* = .{ + .base = .{ + .tag = tag, + .ty = ty, + .src = src, + }, + .operand = operand, + }; + try block.instructions.append(self.gpa, &inst.base); + return &inst.base; +} + +fn addBinOp( + self: *Module, + block: *Scope.Block, + src: usize, + ty: Type, + tag: Inst.Tag, + lhs: *Inst, + rhs: *Inst, +) !*Inst { + const inst = try block.arena.create(Inst.BinOp); + inst.* = .{ + .base = .{ + .tag = tag, + .ty = ty, + .src = src, + }, + .lhs = lhs, + .rhs = rhs, + }; + try block.instructions.append(self.gpa, &inst.base); + return &inst.base; +} + +fn addBr( + self: *Module, + scope_block: *Scope.Block, + src: usize, + target_block: *Inst.Block, + operand: *Inst, +) !*Inst { + const inst = try scope_block.arena.create(Inst.Br); + inst.* = .{ + .base = .{ + .tag = .br, + .ty = Type.initTag(.noreturn), + .src = src, + }, + .operand = operand, + .block = target_block, + }; + try scope_block.instructions.append(self.gpa, &inst.base); + return &inst.base; +} + +fn addCondBr( + self: *Module, + block: *Scope.Block, + src: usize, + condition: *Inst, + then_body: ir.Body, + else_body: ir.Body, +) !*Inst { + const inst = try block.arena.create(Inst.CondBr); + inst.* = .{ + .base = .{ + .tag = .condbr, + .ty = Type.initTag(.noreturn), + .src = src, + }, + .condition = condition, + .then_body = then_body, + .else_body = else_body, + }; + try block.instructions.append(self.gpa, &inst.base); + return &inst.base; +} + +fn addCall( + self: *Module, + block: *Scope.Block, + src: usize, + ty: Type, + func: *Inst, + args: []const *Inst, +) !*Inst { + const inst = try block.arena.create(Inst.Call); + inst.* = .{ + .base = .{ + .tag = .call, + .ty = ty, + .src = src, + }, + .func = func, + .args = args, + }; + try block.instructions.append(self.gpa, &inst.base); return &inst.base; } @@ -2017,7 +2133,6 @@ fn addNewInst(self: *Module, block: *Scope.Block, src: usize, ty: Type, comptime .ty = ty, .src = src, }, - .args = undefined, }; try block.instructions.append(self.gpa, &inst.base); return inst; @@ -2269,7 +2384,7 @@ fn analyzeInstArg(self: *Module, scope: *Scope, inst: *zir.Inst.Arg) InnerError! }); } const param_type = fn_ty.fnParamType(param_index); - return self.addNewInstArgs(b, inst.base.src, param_type, Inst.Arg, {}); + return self.addNoOp(b, inst.base.src, param_type, .arg); } fn analyzeInstBlock(self: *Module, scope: *Scope, inst: *zir.Inst.Block) InnerError!*Inst { @@ -2285,7 +2400,7 @@ fn analyzeInstBlock(self: *Module, scope: *Scope, inst: *zir.Inst.Block) InnerEr .ty = undefined, // Set after analysis. .src = inst.base.src, }, - .args = undefined, + .body = undefined, }; var child_block: Scope.Block = .{ @@ -2316,13 +2431,13 @@ fn analyzeInstBlock(self: *Module, scope: *Scope, inst: *zir.Inst.Block) InnerEr // to emit a jump instruction to after the block when it encounters the break. try parent_block.instructions.append(self.gpa, &block_inst.base); block_inst.base.ty = try self.resolvePeerTypes(scope, label.results.items); - block_inst.args.body = .{ .instructions = try parent_block.arena.dupe(*Inst, child_block.instructions.items) }; + block_inst.body = .{ .instructions = try parent_block.arena.dupe(*Inst, child_block.instructions.items) }; return &block_inst.base; } fn analyzeInstBreakpoint(self: *Module, scope: *Scope, inst: *zir.Inst.Breakpoint) InnerError!*Inst { const b = try self.requireRuntimeBlock(scope, inst.base.src); - return self.addNewInstArgs(b, inst.base.src, Type.initTag(.void), Inst.Breakpoint, {}); + return self.addNoOp(b, inst.base.src, Type.initTag(.void), .breakpoint); } fn analyzeInstBreak(self: *Module, scope: *Scope, inst: *zir.Inst.Break) InnerError!*Inst { @@ -2350,10 +2465,7 @@ fn analyzeBreak( if (label.zir_block == zir_block) { try label.results.append(self.gpa, operand); const b = try self.requireRuntimeBlock(scope, src); - return self.addNewInstArgs(b, src, Type.initTag(.noreturn), Inst.Br, .{ - .block = label.block_inst, - .operand = operand, - }); + return self.addBr(b, src, label.block_inst, operand); } } opt_block = block.parent; @@ -2484,10 +2596,7 @@ fn analyzeInstCall(self: *Module, scope: *Scope, inst: *zir.Inst.Call) InnerErro } const b = try self.requireRuntimeBlock(scope, inst.base.src); - return self.addNewInstArgs(b, inst.base.src, Type.initTag(.void), Inst.Call, .{ - .func = func, - .args = casted_args, - }); + return self.addCall(b, inst.base.src, Type.initTag(.void), func, casted_args); } fn analyzeInstFn(self: *Module, scope: *Scope, fn_inst: *zir.Inst.Fn) InnerError!*Inst { @@ -2570,14 +2679,14 @@ fn analyzeInstAs(self: *Module, scope: *Scope, as: *zir.Inst.As) InnerError!*Ins } fn analyzeInstPtrToInt(self: *Module, scope: *Scope, ptrtoint: *zir.Inst.PtrToInt) InnerError!*Inst { - const ptr = try self.resolveInst(scope, ptrtoint.positionals.ptr); + const ptr = try self.resolveInst(scope, ptrtoint.positionals.operand); if (ptr.ty.zigTypeTag() != .Pointer) { - return self.fail(scope, ptrtoint.positionals.ptr.src, "expected pointer, found '{}'", .{ptr.ty}); + return self.fail(scope, ptrtoint.positionals.operand.src, "expected pointer, found '{}'", .{ptr.ty}); } // TODO handle known-pointer-address const b = try self.requireRuntimeBlock(scope, ptrtoint.base.src); const ty = Type.initTag(.usize); - return self.addNewInstArgs(b, ptrtoint.base.src, ty, Inst.PtrToInt, .{ .ptr = ptr }); + return self.addUnOp(b, ptrtoint.base.src, ty, .ptrtoint, ptr); } fn analyzeInstFieldPtr(self: *Module, scope: *Scope, fieldptr: *zir.Inst.FieldPtr) InnerError!*Inst { @@ -2734,10 +2843,7 @@ fn analyzeInstAdd(self: *Module, scope: *Scope, inst: *zir.Inst.Add) InnerError! } const b = try self.requireRuntimeBlock(scope, inst.base.src); - return self.addNewInstArgs(b, inst.base.src, lhs.ty, Inst.Add, .{ - .lhs = lhs, - .rhs = rhs, - }); + return self.addBinOp(b, inst.base.src, lhs.ty, .add, lhs, rhs); } return self.fail(scope, inst.base.src, "TODO analyze add for {} + {}", .{ lhs.ty.zigTypeTag(), rhs.ty.zigTypeTag() }); } @@ -2783,14 +2889,22 @@ fn analyzeInstAsm(self: *Module, scope: *Scope, assembly: *zir.Inst.Asm) InnerEr } const b = try self.requireRuntimeBlock(scope, assembly.base.src); - return self.addNewInstArgs(b, assembly.base.src, return_type, Inst.Assembly, .{ + const inst = try b.arena.create(Inst.Assembly); + inst.* = .{ + .base = .{ + .tag = .assembly, + .ty = return_type, + .src = assembly.base.src, + }, .asm_source = asm_source, .is_volatile = assembly.kw_args.@"volatile", .output = output, .inputs = inputs, .clobbers = clobbers, .args = args, - }); + }; + try b.instructions.append(self.gpa, &inst.base); + return &inst.base; } fn analyzeInstCmp(self: *Module, scope: *Scope, inst: *zir.Inst.Cmp) InnerError!*Inst { @@ -2818,15 +2932,12 @@ fn analyzeInstCmp(self: *Module, scope: *Scope, inst: *zir.Inst.Cmp) InnerError! return self.constBool(scope, inst.base.src, if (op == .eq) is_null else !is_null); } const b = try self.requireRuntimeBlock(scope, inst.base.src); - switch (op) { - .eq => return self.addNewInstArgs(b, inst.base.src, Type.initTag(.bool), Inst.IsNull, .{ - .operand = opt_operand, - }), - .neq => return self.addNewInstArgs(b, inst.base.src, Type.initTag(.bool), Inst.IsNonNull, .{ - .operand = opt_operand, - }), + const inst_tag: Inst.Tag = switch (op) { + .eq => .isnull, + .neq => .isnonnull, else => unreachable, - } + }; + return self.addUnOp(b, inst.base.src, Type.initTag(.bool), inst_tag, opt_operand); } else if (is_equality_cmp and ((lhs_ty_tag == .Null and rhs.ty.isCPtr()) or (rhs_ty_tag == .Null and lhs.ty.isCPtr()))) { @@ -2861,7 +2972,7 @@ fn analyzeInstBoolNot(self: *Module, scope: *Scope, inst: *zir.Inst.BoolNot) Inn return self.constBool(scope, inst.base.src, !val.toBool()); } const b = try self.requireRuntimeBlock(scope, inst.base.src); - return self.addNewInstArgs(b, inst.base.src, bool_type, Inst.Not, .{ .operand = operand }); + return self.addUnOp(b, inst.base.src, bool_type, .not, operand); } fn analyzeInstIsNull(self: *Module, scope: *Scope, inst: *zir.Inst.IsNull) InnerError!*Inst { @@ -2879,7 +2990,7 @@ fn analyzeInstCondBr(self: *Module, scope: *Scope, inst: *zir.Inst.CondBr) Inner const cond = try self.coerce(scope, Type.initTag(.bool), uncasted_cond); if (try self.resolveDefinedValue(scope, cond)) |cond_val| { - const body = if (cond_val.toBool()) &inst.positionals.true_body else &inst.positionals.false_body; + const body = if (cond_val.toBool()) &inst.positionals.then_body else &inst.positionals.else_body; try self.analyzeBody(scope, body.*); return self.constVoid(scope, inst.base.src); } @@ -2894,7 +3005,7 @@ fn analyzeInstCondBr(self: *Module, scope: *Scope, inst: *zir.Inst.CondBr) Inner .arena = parent_block.arena, }; defer true_block.instructions.deinit(self.gpa); - try self.analyzeBody(&true_block.base, inst.positionals.true_body); + try self.analyzeBody(&true_block.base, inst.positionals.then_body); var false_block: Scope.Block = .{ .parent = parent_block, @@ -2904,13 +3015,11 @@ fn analyzeInstCondBr(self: *Module, scope: *Scope, inst: *zir.Inst.CondBr) Inner .arena = parent_block.arena, }; defer false_block.instructions.deinit(self.gpa); - try self.analyzeBody(&false_block.base, inst.positionals.false_body); + try self.analyzeBody(&false_block.base, inst.positionals.else_body); - return self.addNewInstArgs(parent_block, inst.base.src, Type.initTag(.noreturn), Inst.CondBr, Inst.Args(Inst.CondBr){ - .condition = cond, - .true_body = .{ .instructions = try scope.arena().dupe(*Inst, true_block.instructions.items) }, - .false_body = .{ .instructions = try scope.arena().dupe(*Inst, false_block.instructions.items) }, - }); + const then_body: ir.Body = .{ .instructions = try scope.arena().dupe(*Inst, true_block.instructions.items) }; + const else_body: ir.Body = .{ .instructions = try scope.arena().dupe(*Inst, false_block.instructions.items) }; + return self.addCondBr(parent_block, inst.base.src, cond, then_body, else_body); } fn wantSafety(self: *Module, scope: *Scope) bool { @@ -2926,20 +3035,20 @@ fn analyzeInstUnreachable(self: *Module, scope: *Scope, unreach: *zir.Inst.Unrea const b = try self.requireRuntimeBlock(scope, unreach.base.src); if (self.wantSafety(scope)) { // TODO Once we have a panic function to call, call it here instead of this. - _ = try self.addNewInstArgs(b, unreach.base.src, Type.initTag(.void), Inst.Breakpoint, {}); + _ = try self.addNoOp(b, unreach.base.src, Type.initTag(.void), .breakpoint); } - return self.addNewInstArgs(b, unreach.base.src, Type.initTag(.noreturn), Inst.Unreach, {}); + return self.addNoOp(b, unreach.base.src, Type.initTag(.noreturn), .unreach); } fn analyzeInstRet(self: *Module, scope: *Scope, inst: *zir.Inst.Return) InnerError!*Inst { const operand = try self.resolveInst(scope, inst.positionals.operand); const b = try self.requireRuntimeBlock(scope, inst.base.src); - return self.addNewInstArgs(b, inst.base.src, Type.initTag(.noreturn), Inst.Ret, .{ .operand = operand }); + return self.addUnOp(b, inst.base.src, Type.initTag(.noreturn), .ret, operand); } fn analyzeInstRetVoid(self: *Module, scope: *Scope, inst: *zir.Inst.ReturnVoid) InnerError!*Inst { const b = try self.requireRuntimeBlock(scope, inst.base.src); - return self.addNewInstArgs(b, inst.base.src, Type.initTag(.noreturn), Inst.RetVoid, {}); + return self.addNoOp(b, inst.base.src, Type.initTag(.noreturn), .retvoid); } fn analyzeBody(self: *Module, scope: *Scope, body: zir.Module.Body) !void { @@ -3027,11 +3136,7 @@ fn cmpNumeric( }; const casted_lhs = try self.coerce(scope, dest_type, lhs); const casted_rhs = try self.coerce(scope, dest_type, rhs); - return self.addNewInstArgs(b, src, dest_type, Inst.Cmp, .{ - .lhs = casted_lhs, - .rhs = casted_rhs, - .op = op, - }); + return self.addBinOp(b, src, dest_type, Inst.Tag.fromCmpOp(op), casted_lhs, casted_rhs); } // For mixed unsigned integer sizes, implicit cast both operands to the larger integer. // For mixed signed and unsigned integers, implicit cast both operands to a signed @@ -3131,11 +3236,7 @@ fn cmpNumeric( const casted_lhs = try self.coerce(scope, dest_type, lhs); const casted_rhs = try self.coerce(scope, dest_type, rhs); - return self.addNewInstArgs(b, src, Type.initTag(.bool), Inst.Cmp, .{ - .lhs = casted_lhs, - .rhs = casted_rhs, - .op = op, - }); + return self.addBinOp(b, src, Type.initTag(.bool), Inst.Tag.fromCmpOp(op), casted_lhs, casted_rhs); } fn makeIntType(self: *Module, scope: *Scope, signed: bool, bits: u16) !Type { @@ -3236,7 +3337,7 @@ fn bitcast(self: *Module, scope: *Scope, dest_type: Type, inst: *Inst) !*Inst { } // TODO validate the type size and other compile errors const b = try self.requireRuntimeBlock(scope, inst.src); - return self.addNewInstArgs(b, inst.src, dest_type, Inst.BitCast, .{ .operand = inst }); + return self.addUnOp(b, inst.src, dest_type, .bitcast, inst); } fn coerceArrayPtrToSlice(self: *Module, scope: *Scope, dest_type: Type, inst: *Inst) !*Inst { diff --git a/src-self-hosted/astgen.zig b/src-self-hosted/astgen.zig index be70a724c2..813d4d8dca 100644 --- a/src-self-hosted/astgen.zig +++ b/src-self-hosted/astgen.zig @@ -173,8 +173,8 @@ fn ifExpr(mod: *Module, scope: *Scope, if_node: *ast.Node.If) InnerError!*zir.In const if_src = tree.token_locs[if_node.if_token].start; const condbr = try mod.addZIRInstSpecial(&block_scope.base, if_src, zir.Inst.CondBr, .{ .condition = cond, - .true_body = undefined, // populated below - .false_body = undefined, // populated below + .then_body = undefined, // populated below + .else_body = undefined, // populated below }, .{}); const block = try mod.addZIRInstBlock(scope, if_src, .{ @@ -196,7 +196,7 @@ fn ifExpr(mod: *Module, scope: *Scope, if_node: *ast.Node.If) InnerError!*zir.In .operand = then_result, }, .{}); } - condbr.positionals.true_body = .{ + condbr.positionals.then_body = .{ .instructions = try then_scope.arena.dupe(*zir.Inst, then_scope.instructions.items), }; @@ -225,7 +225,7 @@ fn ifExpr(mod: *Module, scope: *Scope, if_node: *ast.Node.If) InnerError!*zir.In .block = block, }, .{}); } - condbr.positionals.false_body = .{ + condbr.positionals.else_body = .{ .instructions = try else_scope.arena.dupe(*zir.Inst, else_scope.instructions.items), }; diff --git a/src-self-hosted/codegen.zig b/src-self-hosted/codegen.zig index 2cc471a07d..d64c1824cf 100644 --- a/src-self-hosted/codegen.zig +++ b/src-self-hosted/codegen.zig @@ -290,6 +290,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { next_stack_offset: u32 = 0, fn markRegUsed(self: *Branch, reg: Register) void { + if (FreeRegInt == u0) return; const index = reg.allocIndex() orelse return; const ShiftInt = std.math.Log2Int(FreeRegInt); const shift = @intCast(ShiftInt, index); @@ -297,6 +298,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { } fn markRegFree(self: *Branch, reg: Register) void { + if (FreeRegInt == u0) return; const index = reg.allocIndex() orelse return; const ShiftInt = std.math.Log2Int(FreeRegInt); const shift = @intCast(ShiftInt, index); @@ -407,40 +409,64 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { for (body.instructions) |inst| { const new_inst = try self.genFuncInst(inst); try inst_table.putNoClobber(self.gpa, inst, new_inst); - // TODO process operand deaths + + var i: ir.Inst.DeathsBitIndex = 0; + while (inst.getOperand(i)) |operand| : (i += 1) { + if (inst.operandDies(i)) + self.processDeath(operand); + } + } + } + + fn processDeath(self: *Self, inst: *ir.Inst) void { + const branch = &self.branch_stack.items[self.branch_stack.items.len - 1]; + const entry = branch.inst_table.getEntry(inst) orelse return; + const prev_value = entry.value; + entry.value = .dead; + switch (prev_value) { + .register => |reg| { + _ = branch.registers.remove(reg); + branch.markRegFree(reg); + }, + else => {}, // TODO process stack allocation death } } fn genFuncInst(self: *Self, inst: *ir.Inst) !MCValue { switch (inst.tag) { - .add => return self.genAdd(inst.cast(ir.Inst.Add).?), - .arg => return self.genArg(inst.cast(ir.Inst.Arg).?), - .assembly => return self.genAsm(inst.cast(ir.Inst.Assembly).?), - .bitcast => return self.genBitCast(inst.cast(ir.Inst.BitCast).?), - .block => return self.genBlock(inst.cast(ir.Inst.Block).?), - .br => return self.genBr(inst.cast(ir.Inst.Br).?), + .add => return self.genAdd(inst.castTag(.add).?), + .arg => return self.genArg(inst.castTag(.arg).?), + .assembly => return self.genAsm(inst.castTag(.assembly).?), + .bitcast => return self.genBitCast(inst.castTag(.bitcast).?), + .block => return self.genBlock(inst.castTag(.block).?), + .br => return self.genBr(inst.castTag(.br).?), .breakpoint => return self.genBreakpoint(inst.src), - .brvoid => return self.genBrVoid(inst.cast(ir.Inst.BrVoid).?), - .call => return self.genCall(inst.cast(ir.Inst.Call).?), - .cmp => return self.genCmp(inst.cast(ir.Inst.Cmp).?), - .condbr => return self.genCondBr(inst.cast(ir.Inst.CondBr).?), + .brvoid => return self.genBrVoid(inst.castTag(.brvoid).?), + .call => return self.genCall(inst.castTag(.call).?), + .cmp_lt => return self.genCmp(inst.castTag(.cmp_lt).?, .lt), + .cmp_lte => return self.genCmp(inst.castTag(.cmp_lte).?, .lte), + .cmp_eq => return self.genCmp(inst.castTag(.cmp_eq).?, .eq), + .cmp_gte => return self.genCmp(inst.castTag(.cmp_gte).?, .gte), + .cmp_gt => return self.genCmp(inst.castTag(.cmp_gt).?, .gt), + .cmp_neq => return self.genCmp(inst.castTag(.cmp_neq).?, .neq), + .condbr => return self.genCondBr(inst.castTag(.condbr).?), .constant => unreachable, // excluded from function bodies - .isnonnull => return self.genIsNonNull(inst.cast(ir.Inst.IsNonNull).?), - .isnull => return self.genIsNull(inst.cast(ir.Inst.IsNull).?), - .ptrtoint => return self.genPtrToInt(inst.cast(ir.Inst.PtrToInt).?), - .ret => return self.genRet(inst.cast(ir.Inst.Ret).?), - .retvoid => return self.genRetVoid(inst.cast(ir.Inst.RetVoid).?), - .sub => return self.genSub(inst.cast(ir.Inst.Sub).?), + .isnonnull => return self.genIsNonNull(inst.castTag(.isnonnull).?), + .isnull => return self.genIsNull(inst.castTag(.isnull).?), + .ptrtoint => return self.genPtrToInt(inst.castTag(.ptrtoint).?), + .ret => return self.genRet(inst.castTag(.ret).?), + .retvoid => return self.genRetVoid(inst.castTag(.retvoid).?), + .sub => return self.genSub(inst.castTag(.sub).?), .unreach => return MCValue{ .unreach = {} }, - .not => return self.genNot(inst.cast(ir.Inst.Not).?), + .not => return self.genNot(inst.castTag(.not).?), } } - fn genNot(self: *Self, inst: *ir.Inst.Not) !MCValue { + fn genNot(self: *Self, inst: *ir.Inst.UnOp) !MCValue { // No side effects, so if it's unreferenced, do nothing. if (inst.base.isUnused()) return MCValue.dead; - const operand = try self.resolveInst(inst.args.operand); + const operand = try self.resolveInst(inst.operand); switch (operand) { .dead => unreachable, .unreach => unreachable, @@ -473,36 +499,36 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { .base = .{ .tag = .constant, .deaths = 0, - .ty = inst.args.operand.ty, - .src = inst.args.operand.src, + .ty = inst.operand.ty, + .src = inst.operand.src, }, .val = Value.initTag(.bool_true), }; - return try self.genX8664BinMath(&inst.base, inst.args.operand, &imm.base, 6, 0x30); + return try self.genX8664BinMath(&inst.base, inst.operand, &imm.base, 6, 0x30); }, else => return self.fail(inst.base.src, "TODO implement NOT for {}", .{self.target.cpu.arch}), } } - fn genAdd(self: *Self, inst: *ir.Inst.Add) !MCValue { + fn genAdd(self: *Self, inst: *ir.Inst.BinOp) !MCValue { // No side effects, so if it's unreferenced, do nothing. if (inst.base.isUnused()) return MCValue.dead; switch (arch) { .x86_64 => { - return try self.genX8664BinMath(&inst.base, inst.args.lhs, inst.args.rhs, 0, 0x00); + return try self.genX8664BinMath(&inst.base, inst.lhs, inst.rhs, 0, 0x00); }, else => return self.fail(inst.base.src, "TODO implement add for {}", .{self.target.cpu.arch}), } } - fn genSub(self: *Self, inst: *ir.Inst.Sub) !MCValue { + fn genSub(self: *Self, inst: *ir.Inst.BinOp) !MCValue { // No side effects, so if it's unreferenced, do nothing. if (inst.base.isUnused()) return MCValue.dead; switch (arch) { .x86_64 => { - return try self.genX8664BinMath(&inst.base, inst.args.lhs, inst.args.rhs, 5, 0x28); + return try self.genX8664BinMath(&inst.base, inst.lhs, inst.rhs, 5, 0x28); }, else => return self.fail(inst.base.src, "TODO implement sub for {}", .{self.target.cpu.arch}), } @@ -625,7 +651,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { } } - fn genArg(self: *Self, inst: *ir.Inst.Arg) !MCValue { + fn genArg(self: *Self, inst: *ir.Inst.NoOp) !MCValue { if (FreeRegInt == u0) { return self.fail(inst.base.src, "TODO implement Register enum for {}", .{self.target.cpu.arch}); } @@ -659,7 +685,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { } fn genCall(self: *Self, inst: *ir.Inst.Call) !MCValue { - const fn_ty = inst.args.func.ty; + const fn_ty = inst.func.ty; const cc = fn_ty.fnCallingConvention(); const param_types = try self.gpa.alloc(Type, fn_ty.fnParamLen()); defer self.gpa.free(param_types); @@ -671,8 +697,8 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { switch (arch) { .x86_64 => { for (mc_args) |mc_arg, arg_i| { - const arg = inst.args.args[arg_i]; - const arg_mcv = try self.resolveInst(inst.args.args[arg_i]); + const arg = inst.args[arg_i]; + const arg_mcv = try self.resolveInst(inst.args[arg_i]); switch (mc_arg) { .none => continue, .register => |reg| { @@ -694,7 +720,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { } } - if (inst.args.func.cast(ir.Inst.Constant)) |func_inst| { + if (inst.func.cast(ir.Inst.Constant)) |func_inst| { if (func_inst.val.cast(Value.Payload.Function)) |func_val| { const func = func_val.func; const got = &self.bin_file.program_headers.items[self.bin_file.phdr_got_index.?]; @@ -742,16 +768,16 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { return .unreach; } - fn genRet(self: *Self, inst: *ir.Inst.Ret) !MCValue { - const operand = try self.resolveInst(inst.args.operand); + fn genRet(self: *Self, inst: *ir.Inst.UnOp) !MCValue { + const operand = try self.resolveInst(inst.operand); return self.ret(inst.base.src, operand); } - fn genRetVoid(self: *Self, inst: *ir.Inst.RetVoid) !MCValue { + fn genRetVoid(self: *Self, inst: *ir.Inst.NoOp) !MCValue { return self.ret(inst.base.src, .none); } - fn genCmp(self: *Self, inst: *ir.Inst.Cmp) !MCValue { + fn genCmp(self: *Self, inst: *ir.Inst.BinOp, op: std.math.CompareOperator) !MCValue { // No side effects, so if it's unreferenced, do nothing. if (inst.base.isUnused()) return MCValue.dead; @@ -759,25 +785,25 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { .x86_64 => { try self.code.ensureCapacity(self.code.items.len + 8); - const lhs = try self.resolveInst(inst.args.lhs); - const rhs = try self.resolveInst(inst.args.rhs); + const lhs = try self.resolveInst(inst.lhs); + const rhs = try self.resolveInst(inst.rhs); // There are 2 operands, destination and source. // Either one, but not both, can be a memory operand. // Source operand can be an immediate, 8 bits or 32 bits. const dst_mcv = if (lhs.isImmediate() or (lhs.isMemory() and rhs.isMemory())) - try self.copyToNewRegister(inst.args.lhs) + try self.copyToNewRegister(inst.lhs) else lhs; // This instruction supports only signed 32-bit immediates at most. - const src_mcv = try self.limitImmediateType(inst.args.rhs, i32); + const src_mcv = try self.limitImmediateType(inst.rhs, i32); try self.genX8664BinMathCode(inst.base.src, dst_mcv, src_mcv, 7, 0x38); - const info = inst.args.lhs.ty.intInfo(self.target.*); + const info = inst.lhs.ty.intInfo(self.target.*); if (info.signed) { - return MCValue{ .compare_flags_signed = inst.args.op }; + return MCValue{ .compare_flags_signed = op }; } else { - return MCValue{ .compare_flags_unsigned = inst.args.op }; + return MCValue{ .compare_flags_unsigned = op }; } }, else => return self.fail(inst.base.src, "TODO implement cmp for {}", .{self.target.cpu.arch}), @@ -789,7 +815,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { .x86_64 => { try self.code.ensureCapacity(self.code.items.len + 6); - const cond = try self.resolveInst(inst.args.condition); + const cond = try self.resolveInst(inst.condition); switch (cond) { .compare_flags_signed => |cmp_op| { // Here we map to the opposite opcode because the jump is to the false branch. @@ -838,19 +864,19 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { self.code.appendSliceAssumeCapacity(&[_]u8{ 0x0f, opcode }); const reloc = Reloc{ .rel32 = self.code.items.len }; self.code.items.len += 4; - try self.genBody(inst.args.true_body); + try self.genBody(inst.then_body); try self.performReloc(inst.base.src, reloc); - try self.genBody(inst.args.false_body); + try self.genBody(inst.else_body); return MCValue.unreach; } - fn genIsNull(self: *Self, inst: *ir.Inst.IsNull) !MCValue { + fn genIsNull(self: *Self, inst: *ir.Inst.UnOp) !MCValue { switch (arch) { else => return self.fail(inst.base.src, "TODO implement isnull for {}", .{self.target.cpu.arch}), } } - fn genIsNonNull(self: *Self, inst: *ir.Inst.IsNonNull) !MCValue { + fn genIsNonNull(self: *Self, inst: *ir.Inst.UnOp) !MCValue { // Here you can specialize this instruction if it makes sense to, otherwise the default // will call genIsNull and invert the result. switch (arch) { @@ -864,7 +890,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { } // A block is nothing but a setup to be able to jump to the end. defer inst.codegen.relocs.deinit(self.gpa); - try self.genBody(inst.args.body); + try self.genBody(inst.body); for (inst.codegen.relocs.items) |reloc| try self.performReloc(inst.base.src, reloc); @@ -883,17 +909,17 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { } fn genBr(self: *Self, inst: *ir.Inst.Br) !MCValue { - if (!inst.args.operand.ty.hasCodeGenBits()) - return self.brVoid(inst.base.src, inst.args.block); + if (!inst.operand.ty.hasCodeGenBits()) + return self.brVoid(inst.base.src, inst.block); - const operand = try self.resolveInst(inst.args.operand); + const operand = try self.resolveInst(inst.operand); switch (arch) { else => return self.fail(inst.base.src, "TODO implement br for {}", .{self.target.cpu.arch}), } } fn genBrVoid(self: *Self, inst: *ir.Inst.BrVoid) !MCValue { - return self.brVoid(inst.base.src, inst.args.block); + return self.brVoid(inst.base.src, inst.block); } fn brVoid(self: *Self, src: usize, block: *ir.Inst.Block) !MCValue { @@ -915,29 +941,29 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { } fn genAsm(self: *Self, inst: *ir.Inst.Assembly) !MCValue { - if (!inst.args.is_volatile and inst.base.isUnused()) + if (!inst.is_volatile and inst.base.isUnused()) return MCValue.dead; if (arch != .x86_64 and arch != .i386) { return self.fail(inst.base.src, "TODO implement inline asm support for more architectures", .{}); } - for (inst.args.inputs) |input, i| { + for (inst.inputs) |input, i| { if (input.len < 3 or input[0] != '{' or input[input.len - 1] != '}') { return self.fail(inst.base.src, "unrecognized asm input constraint: '{}'", .{input}); } const reg_name = input[1 .. input.len - 1]; const reg = parseRegName(reg_name) orelse return self.fail(inst.base.src, "unrecognized register: '{}'", .{reg_name}); - const arg = try self.resolveInst(inst.args.args[i]); + const arg = try self.resolveInst(inst.args[i]); try self.genSetReg(inst.base.src, reg, arg); } - if (mem.eql(u8, inst.args.asm_source, "syscall")) { + if (mem.eql(u8, inst.asm_source, "syscall")) { try self.code.appendSlice(&[_]u8{ 0x0f, 0x05 }); } else { return self.fail(inst.base.src, "TODO implement support for more x86 assembly instructions", .{}); } - if (inst.args.output) |output| { + if (inst.output) |output| { if (output.len < 4 or output[0] != '=' or output[1] != '{' or output[output.len - 1] != '}') { return self.fail(inst.base.src, "unrecognized asm output constraint: '{}'", .{output}); } @@ -1169,13 +1195,13 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { } } - fn genPtrToInt(self: *Self, inst: *ir.Inst.PtrToInt) !MCValue { + fn genPtrToInt(self: *Self, inst: *ir.Inst.UnOp) !MCValue { // no-op - return self.resolveInst(inst.args.ptr); + return self.resolveInst(inst.operand); } - fn genBitCast(self: *Self, inst: *ir.Inst.BitCast) !MCValue { - const operand = try self.resolveInst(inst.args.operand); + fn genBitCast(self: *Self, inst: *ir.Inst.UnOp) !MCValue { + const operand = try self.resolveInst(inst.operand); return operand; } diff --git a/src-self-hosted/codegen/c.zig b/src-self-hosted/codegen/c.zig index ebc4ff7e1a..ed3a5f73b4 100644 --- a/src-self-hosted/codegen/c.zig +++ b/src-self-hosted/codegen/c.zig @@ -92,9 +92,9 @@ fn genFn(file: *C, decl: *Decl) !void { for (instructions) |inst| { try writer.writeAll("\n\t"); switch (inst.tag) { - .assembly => try genAsm(file, inst.cast(Inst.Assembly).?, decl), - .call => try genCall(file, inst.cast(Inst.Call).?, decl), - .ret => try genRet(file, inst.cast(Inst.Ret).?, decl, tv.ty.fnReturnType()), + .assembly => try genAsm(file, inst.castTag(.assembly).?, decl), + .call => try genCall(file, inst.castTag(.call).?, decl), + .ret => try genRet(file, inst.castTag(.ret).?, decl, tv.ty.fnReturnType()), .retvoid => try file.main.writer().print("return;", .{}), else => |e| return file.fail(decl.src(), "TODO implement C codegen for {}", .{e}), } @@ -105,9 +105,9 @@ fn genFn(file: *C, decl: *Decl) !void { try writer.writeAll("}\n\n"); } -fn genRet(file: *C, inst: *Inst.Ret, decl: *Decl, expected_return_type: Type) !void { +fn genRet(file: *C, inst: *Inst.UnOp, decl: *Decl, expected_return_type: Type) !void { const writer = file.main.writer(); - const ret_value = inst.args.operand; + const ret_value = inst.operand; const value = ret_value.value().?; if (expected_return_type.eql(ret_value.ty)) return file.fail(decl.src(), "TODO return {}", .{expected_return_type}) @@ -126,7 +126,7 @@ fn genRet(file: *C, inst: *Inst.Ret, decl: *Decl, expected_return_type: Type) !v fn genCall(file: *C, inst: *Inst.Call, decl: *Decl) !void { const writer = file.main.writer(); const header = file.header.writer(); - if (inst.args.func.cast(Inst.Constant)) |func_inst| { + if (inst.func.castTag(.constant)) |func_inst| { if (func_inst.val.cast(Value.Payload.Function)) |func_val| { const target = func_val.func.owner_decl; const target_ty = target.typed_value.most_recent.typed_value.ty; @@ -144,7 +144,7 @@ fn genCall(file: *C, inst: *Inst.Call, decl: *Decl) !void { } else { return file.fail(decl.src(), "TODO non-function call target?", .{}); } - if (inst.args.args.len != 0) { + if (inst.args.len != 0) { return file.fail(decl.src(), "TODO function arguments", .{}); } } else { @@ -152,14 +152,13 @@ fn genCall(file: *C, inst: *Inst.Call, decl: *Decl) !void { } } -fn genAsm(file: *C, inst: *Inst.Assembly, decl: *Decl) !void { - const as = inst.args; +fn genAsm(file: *C, as: *Inst.Assembly, decl: *Decl) !void { const writer = file.main.writer(); for (as.inputs) |i, index| { if (i[0] == '{' and i[i.len - 1] == '}') { const reg = i[1 .. i.len - 1]; const arg = as.args[index]; - if (arg.cast(Inst.Constant)) |c| { + if (arg.castTag(.constant)) |c| { if (c.val.tag() == .int_u64) { try writer.writeAll("register "); try renderType(file, writer, arg.ty, decl.src()); @@ -190,7 +189,7 @@ fn genAsm(file: *C, inst: *Inst.Assembly, decl: *Decl) !void { if (index > 0) { try writer.writeAll(", "); } - if (arg.cast(Inst.Constant)) |c| { + if (arg.castTag(.constant)) |c| { try writer.print("\"\"({}_constant)", .{reg}); } else { // This is blocked by the earlier test diff --git a/src-self-hosted/ir.zig b/src-self-hosted/ir.zig index 9902bd70aa..53a73dbf6c 100644 --- a/src-self-hosted/ir.zig +++ b/src-self-hosted/ir.zig @@ -55,7 +55,12 @@ pub const Inst = struct { breakpoint, brvoid, call, - cmp, + cmp_lt, + cmp_lte, + cmp_eq, + cmp_gte, + cmp_gt, + cmp_neq, condbr, constant, isnonnull, @@ -66,13 +71,80 @@ pub const Inst = struct { sub, unreach, not, + + /// There is one-to-one correspondence between tag and type for now, + /// but this will not always be the case. For example, binary operations + /// such as + and - will have different tags but the same type. + pub fn Type(tag: Tag) type { + return switch (tag) { + .retvoid, + .unreach, + .arg, + .breakpoint, + => NoOp, + + .ret, + .bitcast, + .not, + .isnonnull, + .isnull, + .ptrtoint, + => UnOp, + + .add, + .sub, + .cmp_lt, + .cmp_lte, + .cmp_eq, + .cmp_gte, + .cmp_gt, + .cmp_neq, + => BinOp, + + .assembly => Assembly, + .block => Block, + .br => Br, + .brvoid => BrVoid, + .call => Call, + .condbr => CondBr, + .constant => Constant, + }; + } + + pub fn fromCmpOp(op: std.math.CompareOperator) Tag { + return switch (op) { + .lt => .cmp_lt, + .lte => .cmp_lte, + .eq => .cmp_eq, + .gte => .cmp_gte, + .gt => .cmp_gt, + .neq => .cmp_neq, + }; + } }; + /// Prefer `castTag` to this. pub fn cast(base: *Inst, comptime T: type) ?*T { - if (base.tag != T.base_tag) - return null; + if (@hasField(T, "base_tag")) { + return base.castTag(T.base_tag); + } + inline for (@typeInfo(Tag).Enum.fields) |field| { + const tag = @intToEnum(Tag, field.value); + if (base.tag == tag) { + if (T == tag.Type()) { + return @fieldParentPtr(T, "base", base); + } + return null; + } + } + unreachable; + } - return @fieldParentPtr(T, "base", base); + pub fn castTag(base: *Inst, comptime tag: Tag) ?*tag.Type() { + if (base.tag == tag) { + return @fieldParentPtr(tag.Type(), "base", base); + } + return null; } pub fn Args(comptime T: type) type { @@ -88,186 +160,219 @@ pub const Inst = struct { return inst.val; } - pub const Add = struct { - pub const base_tag = Tag.add; + pub fn cmpOperator(base: *Inst) ?std.math.CompareOperator { + return switch (self.base.tag) { + .cmp_lt => .lt, + .cmp_lte => .lte, + .cmp_eq => .eq, + .cmp_gte => .gte, + .cmp_gt => .gt, + .cmp_neq => .neq, + else => null, + }; + } + + pub fn operandCount(base: *Inst) usize { + inline for (@typeInfo(Tag).Enum.fields) |field| { + const tag = @intToEnum(Tag, field.value); + if (tag == base.tag) { + return @fieldParentPtr(tag.Type(), "base", base).operandCount(); + } + } + unreachable; + } + + pub fn getOperand(base: *Inst, index: usize) ?*Inst { + inline for (@typeInfo(Tag).Enum.fields) |field| { + const tag = @intToEnum(Tag, field.value); + if (tag == base.tag) { + return @fieldParentPtr(tag.Type(), "base", base).getOperand(index); + } + } + unreachable; + } + + pub const NoOp = struct { base: Inst, - args: struct { - lhs: *Inst, - rhs: *Inst, - }, + pub fn operandCount(self: *const NoOp) usize { + return 0; + } + pub fn getOperand(self: *const NoOp, index: usize) ?*Inst { + return null; + } }; - pub const Arg = struct { - pub const base_tag = Tag.arg; + pub const UnOp = struct { base: Inst, - args: void, + operand: *Inst, + + pub fn operandCount(self: *const UnOp) usize { + return 1; + } + pub fn getOperand(self: *const UnOp, index: usize) ?*Inst { + if (index == 0) + return self.operand; + return null; + } + }; + + pub const BinOp = struct { + base: Inst, + lhs: *Inst, + rhs: *Inst, + + pub fn operandCount(self: *const BinOp) usize { + return 2; + } + pub fn getOperand(self: *const BinOp, index: usize) ?*Inst { + var i = index; + + if (i < 1) + return self.lhs; + i -= 1; + + if (i < 1) + return self.rhs; + i -= 1; + + return null; + } }; pub const Assembly = struct { pub const base_tag = Tag.assembly; - base: Inst, - - args: struct { - asm_source: []const u8, - is_volatile: bool, - output: ?[]const u8, - inputs: []const []const u8, - clobbers: []const []const u8, - args: []const *Inst, - }, - }; - - pub const BitCast = struct { - pub const base_tag = Tag.bitcast; base: Inst, - args: struct { - operand: *Inst, - }, + asm_source: []const u8, + is_volatile: bool, + output: ?[]const u8, + inputs: []const []const u8, + clobbers: []const []const u8, + args: []const *Inst, + + pub fn operandCount(self: *const Assembly) usize { + return self.args.len; + } + pub fn getOperand(self: *const Assembly, index: usize) ?*Inst { + if (index < self.args.len) + return self.args[index]; + return null; + } }; pub const Block = struct { pub const base_tag = Tag.block; + base: Inst, - args: struct { - body: Body, - }, + body: Body, /// This memory is reserved for codegen code to do whatever it needs to here. codegen: codegen.BlockData = .{}, + + pub fn operandCount(self: *const Block) usize { + return 0; + } + pub fn getOperand(self: *const Block, index: usize) ?*Inst { + return null; + } }; pub const Br = struct { pub const base_tag = Tag.br; - base: Inst, - args: struct { - block: *Block, - operand: *Inst, - }, - }; - pub const Breakpoint = struct { - pub const base_tag = Tag.breakpoint; base: Inst, - args: void, + block: *Block, + operand: *Inst, + + pub fn operandCount(self: *const Br) usize { + return 0; + } + pub fn getOperand(self: *const Br, index: usize) ?*Inst { + if (index == 0) + return self.operand; + return null; + } }; pub const BrVoid = struct { pub const base_tag = Tag.brvoid; + base: Inst, - args: struct { - block: *Block, - }, + block: *Block, + + pub fn operandCount(self: *const BrVoid) usize { + return 0; + } + pub fn getOperand(self: *const BrVoid, index: usize) ?*Inst { + return null; + } }; pub const Call = struct { pub const base_tag = Tag.call; - base: Inst, - args: struct { - func: *Inst, - args: []const *Inst, - }, - }; - - pub const Cmp = struct { - pub const base_tag = Tag.cmp; base: Inst, - args: struct { - lhs: *Inst, - op: std.math.CompareOperator, - rhs: *Inst, - }, + func: *Inst, + args: []const *Inst, + + pub fn operandCount(self: *const Call) usize { + return self.args.len + 1; + } + pub fn getOperand(self: *const Call, index: usize) ?*Inst { + var i = index; + + if (i < 1) + return self.func; + i -= 1; + + if (i < self.args.len) + return self.args[i]; + i -= self.args.len; + + return null; + } }; pub const CondBr = struct { pub const base_tag = Tag.condbr; base: Inst, - args: struct { - condition: *Inst, - true_body: Body, - false_body: Body, - }, + condition: *Inst, + then_body: Body, + else_body: Body, /// Set of instructions whose lifetimes end at the start of one of the branches. /// The `true` branch is first: `deaths[0..true_death_count]`. /// The `false` branch is next: `(deaths + true_death_count)[..false_death_count]`. deaths: [*]*Inst = undefined, true_death_count: u32 = 0, false_death_count: u32 = 0, - }; - pub const Not = struct { - pub const base_tag = Tag.not; + pub fn operandCount(self: *const CondBr) usize { + return 1; + } + pub fn getOperand(self: *const CondBr, index: usize) ?*Inst { + var i = index; - base: Inst, - args: struct { - operand: *Inst, - }, + if (i < 1) + return self.condition; + i -= 1; + + return null; + } }; pub const Constant = struct { pub const base_tag = Tag.constant; - base: Inst, + base: Inst, val: Value, - }; - pub const IsNonNull = struct { - pub const base_tag = Tag.isnonnull; - - base: Inst, - args: struct { - operand: *Inst, - }, - }; - - pub const IsNull = struct { - pub const base_tag = Tag.isnull; - - base: Inst, - args: struct { - operand: *Inst, - }, - }; - - pub const PtrToInt = struct { - pub const base_tag = Tag.ptrtoint; - - base: Inst, - args: struct { - ptr: *Inst, - }, - }; - - pub const Ret = struct { - pub const base_tag = Tag.ret; - base: Inst, - args: struct { - operand: *Inst, - }, - }; - - pub const RetVoid = struct { - pub const base_tag = Tag.retvoid; - base: Inst, - args: void, - }; - - pub const Sub = struct { - pub const base_tag = Tag.sub; - base: Inst, - - args: struct { - lhs: *Inst, - rhs: *Inst, - }, - }; - - pub const Unreach = struct { - pub const base_tag = Tag.unreach; - base: Inst, - args: void, + pub fn operandCount(self: *const Constant) usize { + return 0; + } + pub fn getOperand(self: *const Constant, index: usize) ?*Inst { + return null; + } }; }; diff --git a/src-self-hosted/liveness.zig b/src-self-hosted/liveness.zig index a06a4dd1d1..e8f80f30d5 100644 --- a/src-self-hosted/liveness.zig +++ b/src-self-hosted/liveness.zig @@ -25,53 +25,38 @@ fn analyzeWithTable(arena: *std.mem.Allocator, table: *std.AutoHashMap(*ir.Inst, while (i != 0) { i -= 1; const base = body.instructions[i]; - try analyzeInstGeneric(arena, table, base); + try analyzeInst(arena, table, base); } } -fn analyzeInstGeneric(arena: *std.mem.Allocator, table: *std.AutoHashMap(*ir.Inst, void), base: *ir.Inst) error{OutOfMemory}!void { - // Obtain the corresponding instruction type based on the tag type. - inline for (std.meta.declarations(ir.Inst)) |decl| { - switch (decl.data) { - .Type => |T| { - if (@typeInfo(T) == .Struct and @hasDecl(T, "base_tag")) { - if (T.base_tag == base.tag) { - return analyzeInst(arena, table, T, @fieldParentPtr(T, "base", base)); - } - } - }, - else => {}, - } - } - unreachable; -} - -fn analyzeInst(arena: *std.mem.Allocator, table: *std.AutoHashMap(*ir.Inst, void), comptime T: type, inst: *T) error{OutOfMemory}!void { - if (table.contains(&inst.base)) { - inst.base.deaths = 0; +fn analyzeInst(arena: *std.mem.Allocator, table: *std.AutoHashMap(*ir.Inst, void), base: *ir.Inst) error{OutOfMemory}!void { + if (table.contains(base)) { + base.deaths = 0; } else { // No tombstone for this instruction means it is never referenced, // and its birth marks its own death. Very metal 🤘 - inst.base.deaths = 1 << ir.Inst.unreferenced_bit_index; + base.deaths = 1 << ir.Inst.unreferenced_bit_index; } - switch (T) { - ir.Inst.Constant => return, - ir.Inst.Block => { - try analyzeWithTable(arena, table, inst.args.body); + switch (base.tag) { + .constant => return, + .block => { + const inst = base.castTag(.block).?; + try analyzeWithTable(arena, table, inst.body); // We let this continue so that it can possibly mark the block as // unreferenced below. }, - ir.Inst.CondBr => { + .condbr => { + const inst = base.castTag(.condbr).?; var true_table = std.AutoHashMap(*ir.Inst, void).init(table.allocator); defer true_table.deinit(); - try true_table.ensureCapacity(inst.args.true_body.instructions.len); - try analyzeWithTable(arena, &true_table, inst.args.true_body); + try true_table.ensureCapacity(inst.then_body.instructions.len); + try analyzeWithTable(arena, &true_table, inst.then_body); var false_table = std.AutoHashMap(*ir.Inst, void).init(table.allocator); defer false_table.deinit(); - try false_table.ensureCapacity(inst.args.false_body.instructions.len); - try analyzeWithTable(arena, &false_table, inst.args.false_body); + try false_table.ensureCapacity(inst.else_body.instructions.len); + try analyzeWithTable(arena, &false_table, inst.else_body); // Each death that occurs inside one branch, but not the other, needs // to be added as a death immediately upon entering the other branch. @@ -112,47 +97,22 @@ fn analyzeInst(arena: *std.mem.Allocator, table: *std.AutoHashMap(*ir.Inst, void // instruction, and the deaths flag for the CondBr instruction will indicate whether the // condition's lifetime ends immediately before entering any branch. }, - ir.Inst.Call => { - // Call instructions have a runtime-known number of operands so we have to handle them ourselves here. - const needed_bits = 1 + inst.args.args.len; - if (needed_bits <= ir.Inst.deaths_bits) { - var bit_i: ir.Inst.DeathsBitIndex = 0; - { - const prev = try table.fetchPut(inst.args.func, {}); - if (prev == null) inst.base.deaths |= @as(ir.Inst.DeathsInt, 1) << bit_i; - bit_i += 1; - } - for (inst.args.args) |arg| { - const prev = try table.fetchPut(arg, {}); - if (prev == null) inst.base.deaths |= @as(ir.Inst.DeathsInt, 1) << bit_i; - bit_i += 1; - } - } else { - @panic("Handle liveness analysis for function calls with many parameters"); - } - }, else => {}, } - const Args = ir.Inst.Args(T); - if (Args == void) { - return; - } - - comptime var arg_index: usize = 0; - inline for (std.meta.fields(Args)) |field| { - if (field.field_type == *ir.Inst) { - if (arg_index >= 6) { - @compileError("out of bits to mark deaths of operands"); - } - const prev = try table.fetchPut(@field(inst.args, field.name), {}); + const needed_bits = base.operandCount(); + if (needed_bits <= ir.Inst.deaths_bits) { + var bit_i: ir.Inst.DeathsBitIndex = 0; + while (base.getOperand(bit_i)) |operand| : (bit_i += 1) { + const prev = try table.fetchPut(operand, {}); if (prev == null) { // Death. - inst.base.deaths |= 1 << arg_index; + base.deaths |= @as(ir.Inst.DeathsInt, 1) << bit_i; } - arg_index += 1; } + } else { + @panic("Handle liveness analysis for instructions with many parameters"); } - std.log.debug(.liveness, "analyze {}: 0b{b}\n", .{ inst.base.tag, inst.base.deaths }); + std.log.debug(.liveness, "analyze {}: 0b{b}\n", .{ base.tag, base.deaths }); } diff --git a/src-self-hosted/zir.zig b/src-self-hosted/zir.zig index 5e8c966b94..514d08d6d4 100644 --- a/src-self-hosted/zir.zig +++ b/src-self-hosted/zir.zig @@ -337,7 +337,7 @@ pub const Inst = struct { base: Inst, positionals: struct { - ptr: *Inst, + operand: *Inst, }, kw_args: struct {}, }; @@ -629,8 +629,8 @@ pub const Inst = struct { positionals: struct { condition: *Inst, - true_body: Module.Body, - false_body: Module.Body, + then_body: Module.Body, + else_body: Module.Body, }, kw_args: struct {}, }; @@ -1615,7 +1615,7 @@ const EmitZIR = struct { } } - fn emitTrivial(self: *EmitZIR, src: usize, comptime T: type) Allocator.Error!*Inst { + fn emitNoOp(self: *EmitZIR, src: usize, comptime T: type) Allocator.Error!*Inst { const new_inst = try self.arena.allocator.create(T); new_inst.* = .{ .base = .{ @@ -1628,6 +1628,72 @@ const EmitZIR = struct { return &new_inst.base; } + fn emitCmp( + self: *EmitZIR, + src: usize, + new_body: ZirBody, + old_inst: *ir.Inst.BinOp, + op: std.math.CompareOperator, + ) Allocator.Error!*Inst { + const new_inst = try self.arena.allocator.create(Inst.Cmp); + new_inst.* = .{ + .base = .{ + .src = src, + .tag = Inst.Cmp.base_tag, + }, + .positionals = .{ + .lhs = try self.resolveInst(new_body, old_inst.lhs), + .rhs = try self.resolveInst(new_body, old_inst.rhs), + .op = op, + }, + .kw_args = .{}, + }; + return &new_inst.base; + } + + fn emitUnOp( + self: *EmitZIR, + src: usize, + new_body: ZirBody, + old_inst: *ir.Inst.UnOp, + comptime I: type, + ) Allocator.Error!*Inst { + const new_inst = try self.arena.allocator.create(I); + new_inst.* = .{ + .base = .{ + .src = src, + .tag = I.base_tag, + }, + .positionals = .{ + .operand = try self.resolveInst(new_body, old_inst.operand), + }, + .kw_args = .{}, + }; + return &new_inst.base; + } + + fn emitBinOp( + self: *EmitZIR, + src: usize, + new_body: ZirBody, + old_inst: *ir.Inst.BinOp, + comptime I: type, + ) Allocator.Error!*Inst { + const new_inst = try self.arena.allocator.create(I); + new_inst.* = .{ + .base = .{ + .src = src, + .tag = I.base_tag, + }, + .positionals = .{ + .lhs = try self.resolveInst(new_body, old_inst.lhs), + .rhs = try self.resolveInst(new_body, old_inst.rhs), + }, + .kw_args = .{}, + }; + return &new_inst.base; + } + fn emitBody( self: *EmitZIR, body: ir.Body, @@ -1640,69 +1706,48 @@ const EmitZIR = struct { }; for (body.instructions) |inst| { const new_inst = switch (inst.tag) { - .not => blk: { - const old_inst = inst.cast(ir.Inst.Not).?; - assert(inst.ty.zigTypeTag() == .Bool); - const new_inst = try self.arena.allocator.create(Inst.BoolNot); + .constant => unreachable, // excluded from function bodies + + .arg => try self.emitNoOp(inst.src, Inst.Arg), + .breakpoint => try self.emitNoOp(inst.src, Inst.Breakpoint), + .unreach => try self.emitNoOp(inst.src, Inst.Unreachable), + .retvoid => try self.emitNoOp(inst.src, Inst.ReturnVoid), + + .not => try self.emitUnOp(inst.src, new_body, inst.castTag(.not).?, Inst.BoolNot), + .ret => try self.emitUnOp(inst.src, new_body, inst.castTag(.ret).?, Inst.Return), + .ptrtoint => try self.emitUnOp(inst.src, new_body, inst.castTag(.ptrtoint).?, Inst.PtrToInt), + .isnull => try self.emitUnOp(inst.src, new_body, inst.castTag(.isnull).?, Inst.IsNull), + .isnonnull => try self.emitUnOp(inst.src, new_body, inst.castTag(.isnonnull).?, Inst.IsNonNull), + + .add => try self.emitBinOp(inst.src, new_body, inst.castTag(.add).?, Inst.Add), + .sub => try self.emitBinOp(inst.src, new_body, inst.castTag(.sub).?, Inst.Sub), + + .cmp_lt => try self.emitCmp(inst.src, new_body, inst.castTag(.cmp_lt).?, .lt), + .cmp_lte => try self.emitCmp(inst.src, new_body, inst.castTag(.cmp_lte).?, .lte), + .cmp_eq => try self.emitCmp(inst.src, new_body, inst.castTag(.cmp_eq).?, .eq), + .cmp_gte => try self.emitCmp(inst.src, new_body, inst.castTag(.cmp_gte).?, .gte), + .cmp_gt => try self.emitCmp(inst.src, new_body, inst.castTag(.cmp_gt).?, .gt), + .cmp_neq => try self.emitCmp(inst.src, new_body, inst.castTag(.cmp_neq).?, .neq), + + .bitcast => blk: { + const old_inst = inst.castTag(.bitcast).?; + const new_inst = try self.arena.allocator.create(Inst.BitCast); new_inst.* = .{ .base = .{ .src = inst.src, - .tag = Inst.BoolNot.base_tag, + .tag = Inst.BitCast.base_tag, }, .positionals = .{ - .operand = try self.resolveInst(new_body, old_inst.args.operand), + .dest_type = (try self.emitType(inst.src, inst.ty)).inst, + .operand = try self.resolveInst(new_body, old_inst.operand), }, .kw_args = .{}, }; break :blk &new_inst.base; }, - .add => blk: { - const old_inst = inst.cast(ir.Inst.Add).?; - const new_inst = try self.arena.allocator.create(Inst.Add); - new_inst.* = .{ - .base = .{ - .src = inst.src, - .tag = Inst.Add.base_tag, - }, - .positionals = .{ - .lhs = try self.resolveInst(new_body, old_inst.args.lhs), - .rhs = try self.resolveInst(new_body, old_inst.args.rhs), - }, - .kw_args = .{}, - }; - break :blk &new_inst.base; - }, - .sub => blk: { - const old_inst = inst.cast(ir.Inst.Sub).?; - const new_inst = try self.arena.allocator.create(Inst.Sub); - new_inst.* = .{ - .base = .{ - .src = inst.src, - .tag = Inst.Sub.base_tag, - }, - .positionals = .{ - .lhs = try self.resolveInst(new_body, old_inst.args.lhs), - .rhs = try self.resolveInst(new_body, old_inst.args.rhs), - }, - .kw_args = .{}, - }; - break :blk &new_inst.base; - }, - .arg => blk: { - const old_inst = inst.cast(ir.Inst.Arg).?; - const new_inst = try self.arena.allocator.create(Inst.Arg); - new_inst.* = .{ - .base = .{ - .src = inst.src, - .tag = Inst.Arg.base_tag, - }, - .positionals = .{}, - .kw_args = .{}, - }; - break :blk &new_inst.base; - }, + .block => blk: { - const old_inst = inst.cast(ir.Inst.Block).?; + const old_inst = inst.castTag(.block).?; const new_inst = try self.arena.allocator.create(Inst.Block); try self.block_table.put(old_inst, new_inst); @@ -1710,7 +1755,7 @@ const EmitZIR = struct { var block_body = std.ArrayList(*Inst).init(self.allocator); defer block_body.deinit(); - try self.emitBody(old_inst.args.body, inst_table, &block_body); + try self.emitBody(old_inst.body, inst_table, &block_body); new_inst.* = .{ .base = .{ @@ -1725,27 +1770,10 @@ const EmitZIR = struct { break :blk &new_inst.base; }, - .br => blk: { - const old_inst = inst.cast(ir.Inst.Br).?; - const new_block = self.block_table.get(old_inst.args.block).?; - const new_inst = try self.arena.allocator.create(Inst.Break); - new_inst.* = .{ - .base = .{ - .src = inst.src, - .tag = Inst.Break.base_tag, - }, - .positionals = .{ - .block = new_block, - .operand = try self.resolveInst(new_body, old_inst.args.operand), - }, - .kw_args = .{}, - }; - break :blk &new_inst.base; - }, - .breakpoint => try self.emitTrivial(inst.src, Inst.Breakpoint), + .brvoid => blk: { const old_inst = inst.cast(ir.Inst.BrVoid).?; - const new_block = self.block_table.get(old_inst.args.block).?; + const new_block = self.block_table.get(old_inst.block).?; const new_inst = try self.arena.allocator.create(Inst.BreakVoid); new_inst.* = .{ .base = .{ @@ -1759,13 +1787,32 @@ const EmitZIR = struct { }; break :blk &new_inst.base; }, + + .br => blk: { + const old_inst = inst.castTag(.br).?; + const new_block = self.block_table.get(old_inst.block).?; + const new_inst = try self.arena.allocator.create(Inst.Break); + new_inst.* = .{ + .base = .{ + .src = inst.src, + .tag = Inst.Break.base_tag, + }, + .positionals = .{ + .block = new_block, + .operand = try self.resolveInst(new_body, old_inst.operand), + }, + .kw_args = .{}, + }; + break :blk &new_inst.base; + }, + .call => blk: { - const old_inst = inst.cast(ir.Inst.Call).?; + const old_inst = inst.castTag(.call).?; const new_inst = try self.arena.allocator.create(Inst.Call); - const args = try self.arena.allocator.alloc(*Inst, old_inst.args.args.len); + const args = try self.arena.allocator.alloc(*Inst, old_inst.args.len); for (args) |*elem, i| { - elem.* = try self.resolveInst(new_body, old_inst.args.args[i]); + elem.* = try self.resolveInst(new_body, old_inst.args[i]); } new_inst.* = .{ .base = .{ @@ -1773,48 +1820,31 @@ const EmitZIR = struct { .tag = Inst.Call.base_tag, }, .positionals = .{ - .func = try self.resolveInst(new_body, old_inst.args.func), + .func = try self.resolveInst(new_body, old_inst.func), .args = args, }, .kw_args = .{}, }; break :blk &new_inst.base; }, - .unreach => try self.emitTrivial(inst.src, Inst.Unreachable), - .ret => blk: { - const old_inst = inst.cast(ir.Inst.Ret).?; - const new_inst = try self.arena.allocator.create(Inst.Return); - new_inst.* = .{ - .base = .{ - .src = inst.src, - .tag = Inst.Return.base_tag, - }, - .positionals = .{ - .operand = try self.resolveInst(new_body, old_inst.args.operand), - }, - .kw_args = .{}, - }; - break :blk &new_inst.base; - }, - .retvoid => try self.emitTrivial(inst.src, Inst.ReturnVoid), - .constant => unreachable, // excluded from function bodies + .assembly => blk: { - const old_inst = inst.cast(ir.Inst.Assembly).?; + const old_inst = inst.castTag(.assembly).?; const new_inst = try self.arena.allocator.create(Inst.Asm); - const inputs = try self.arena.allocator.alloc(*Inst, old_inst.args.inputs.len); + const inputs = try self.arena.allocator.alloc(*Inst, old_inst.inputs.len); for (inputs) |*elem, i| { - elem.* = (try self.emitStringLiteral(inst.src, old_inst.args.inputs[i])).inst; + elem.* = (try self.emitStringLiteral(inst.src, old_inst.inputs[i])).inst; } - const clobbers = try self.arena.allocator.alloc(*Inst, old_inst.args.clobbers.len); + const clobbers = try self.arena.allocator.alloc(*Inst, old_inst.clobbers.len); for (clobbers) |*elem, i| { - elem.* = (try self.emitStringLiteral(inst.src, old_inst.args.clobbers[i])).inst; + elem.* = (try self.emitStringLiteral(inst.src, old_inst.clobbers[i])).inst; } - const args = try self.arena.allocator.alloc(*Inst, old_inst.args.args.len); + const args = try self.arena.allocator.alloc(*Inst, old_inst.args.len); for (args) |*elem, i| { - elem.* = try self.resolveInst(new_body, old_inst.args.args[i]); + elem.* = try self.resolveInst(new_body, old_inst.args[i]); } new_inst.* = .{ @@ -1823,12 +1853,12 @@ const EmitZIR = struct { .tag = Inst.Asm.base_tag, }, .positionals = .{ - .asm_source = (try self.emitStringLiteral(inst.src, old_inst.args.asm_source)).inst, + .asm_source = (try self.emitStringLiteral(inst.src, old_inst.asm_source)).inst, .return_type = (try self.emitType(inst.src, inst.ty)).inst, }, .kw_args = .{ - .@"volatile" = old_inst.args.is_volatile, - .output = if (old_inst.args.output) |o| + .@"volatile" = old_inst.is_volatile, + .output = if (old_inst.output) |o| (try self.emitStringLiteral(inst.src, o)).inst else null, @@ -1839,65 +1869,18 @@ const EmitZIR = struct { }; break :blk &new_inst.base; }, - .ptrtoint => blk: { - const old_inst = inst.cast(ir.Inst.PtrToInt).?; - const new_inst = try self.arena.allocator.create(Inst.PtrToInt); - new_inst.* = .{ - .base = .{ - .src = inst.src, - .tag = Inst.PtrToInt.base_tag, - }, - .positionals = .{ - .ptr = try self.resolveInst(new_body, old_inst.args.ptr), - }, - .kw_args = .{}, - }; - break :blk &new_inst.base; - }, - .bitcast => blk: { - const old_inst = inst.cast(ir.Inst.BitCast).?; - const new_inst = try self.arena.allocator.create(Inst.BitCast); - new_inst.* = .{ - .base = .{ - .src = inst.src, - .tag = Inst.BitCast.base_tag, - }, - .positionals = .{ - .dest_type = (try self.emitType(inst.src, inst.ty)).inst, - .operand = try self.resolveInst(new_body, old_inst.args.operand), - }, - .kw_args = .{}, - }; - break :blk &new_inst.base; - }, - .cmp => blk: { - const old_inst = inst.cast(ir.Inst.Cmp).?; - const new_inst = try self.arena.allocator.create(Inst.Cmp); - new_inst.* = .{ - .base = .{ - .src = inst.src, - .tag = Inst.Cmp.base_tag, - }, - .positionals = .{ - .lhs = try self.resolveInst(new_body, old_inst.args.lhs), - .rhs = try self.resolveInst(new_body, old_inst.args.rhs), - .op = old_inst.args.op, - }, - .kw_args = .{}, - }; - break :blk &new_inst.base; - }, + .condbr => blk: { - const old_inst = inst.cast(ir.Inst.CondBr).?; + const old_inst = inst.castTag(.condbr).?; - var true_body = std.ArrayList(*Inst).init(self.allocator); - var false_body = std.ArrayList(*Inst).init(self.allocator); + var then_body = std.ArrayList(*Inst).init(self.allocator); + var else_body = std.ArrayList(*Inst).init(self.allocator); - defer true_body.deinit(); - defer false_body.deinit(); + defer then_body.deinit(); + defer else_body.deinit(); - try self.emitBody(old_inst.args.true_body, inst_table, &true_body); - try self.emitBody(old_inst.args.false_body, inst_table, &false_body); + try self.emitBody(old_inst.then_body, inst_table, &then_body); + try self.emitBody(old_inst.else_body, inst_table, &else_body); const new_inst = try self.arena.allocator.create(Inst.CondBr); new_inst.* = .{ @@ -1906,39 +1889,9 @@ const EmitZIR = struct { .tag = Inst.CondBr.base_tag, }, .positionals = .{ - .condition = try self.resolveInst(new_body, old_inst.args.condition), - .true_body = .{ .instructions = true_body.toOwnedSlice() }, - .false_body = .{ .instructions = false_body.toOwnedSlice() }, - }, - .kw_args = .{}, - }; - break :blk &new_inst.base; - }, - .isnull => blk: { - const old_inst = inst.cast(ir.Inst.IsNull).?; - const new_inst = try self.arena.allocator.create(Inst.IsNull); - new_inst.* = .{ - .base = .{ - .src = inst.src, - .tag = Inst.IsNull.base_tag, - }, - .positionals = .{ - .operand = try self.resolveInst(new_body, old_inst.args.operand), - }, - .kw_args = .{}, - }; - break :blk &new_inst.base; - }, - .isnonnull => blk: { - const old_inst = inst.cast(ir.Inst.IsNonNull).?; - const new_inst = try self.arena.allocator.create(Inst.IsNonNull); - new_inst.* = .{ - .base = .{ - .src = inst.src, - .tag = Inst.IsNonNull.base_tag, - }, - .positionals = .{ - .operand = try self.resolveInst(new_body, old_inst.args.operand), + .condition = try self.resolveInst(new_body, old_inst.condition), + .then_body = .{ .instructions = then_body.toOwnedSlice() }, + .else_body = .{ .instructions = else_body.toOwnedSlice() }, }, .kw_args = .{}, }; diff --git a/test/stage2/compare_output.zig b/test/stage2/compare_output.zig index 6a6772f935..d013573e30 100644 --- a/test/stage2/compare_output.zig +++ b/test/stage2/compare_output.zig @@ -267,5 +267,42 @@ pub fn addCases(ctx: *TestContext) !void { , "", ); + + // Requires a second move. The register allocator should figure out to re-use rax. + case.addCompareOutput( + \\export fn _start() noreturn { + \\ add(3, 4); + \\ + \\ exit(); + \\} + \\ + \\fn add(a: u32, b: u32) void { + \\ const c = a + b; // 7 + \\ const d = a + c; // 10 + \\ const e = d + b; // 14 + \\ const f = d + e; // 24 + \\ const g = e + f; // 38 + \\ const h = f + g; // 62 + \\ const i = g + h; // 100 + \\ const j = i + d; // 110 + \\ assert(j == 110); + \\} + \\ + \\pub fn assert(ok: bool) void { + \\ if (!ok) unreachable; // assertion failure + \\} + \\ + \\fn exit() noreturn { + \\ asm volatile ("syscall" + \\ : + \\ : [number] "{rax}" (231), + \\ [arg1] "{rdi}" (0) + \\ : "rcx", "r11", "memory" + \\ ); + \\ unreachable; + \\} + , + "", + ); } }