From b68fa9970b5cf5bb5954da476cc8679512ce489b Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Mon, 24 Aug 2020 16:43:20 -0700 Subject: [PATCH 1/4] stage2 codegen: Rework genCondBr so that the arch-independent logic isn't buried and duplicated. --- src-self-hosted/astgen.zig | 3 ++- src-self-hosted/codegen.zig | 40 ++++++++++++++++++------------------- 2 files changed, 21 insertions(+), 22 deletions(-) diff --git a/src-self-hosted/astgen.zig b/src-self-hosted/astgen.zig index 0d8e0dc874..23e8c9527a 100644 --- a/src-self-hosted/astgen.zig +++ b/src-self-hosted/astgen.zig @@ -13,7 +13,8 @@ const Scope = Module.Scope; const InnerError = Module.InnerError; pub const ResultLoc = union(enum) { - /// The expression is the right-hand side of assignment to `_`. + /// The expression is the right-hand side of assignment to `_`. Only the side-effects of the + /// expression should be generated. discard, /// The expression has an inferred type, and it will be evaluated as an rvalue. none, diff --git a/src-self-hosted/codegen.zig b/src-self-hosted/codegen.zig index cb12211206..6415fceecf 100644 --- a/src-self-hosted/codegen.zig +++ b/src-self-hosted/codegen.zig @@ -1540,14 +1540,15 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { } fn genCondBr(self: *Self, inst: *ir.Inst.CondBr) !MCValue { - // TODO Rework this so that the arch-independent logic isn't buried and duplicated. - switch (arch) { - .x86_64 => { + const cond = try self.resolveInst(inst.condition); + + // TODO deal with liveness / deaths condbr's then_entry_deaths and else_entry_deaths + const reloc: Reloc = switch (arch) { + .i386, .x86_64 => reloc: { try self.code.ensureCapacity(self.code.items.len + 6); - const cond = try self.resolveInst(inst.condition); - switch (cond) { - .compare_flags_signed => |cmp_op| { + const opcode: u8 = switch (cond) { + .compare_flags_signed => |cmp_op| blk: { // Here we map to the opposite opcode because the jump is to the false branch. const opcode: u8 = switch (cmp_op) { .gte => 0x8c, @@ -1557,9 +1558,9 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { .lte => 0x8f, .eq => 0x85, }; - return self.genX86CondBr(inst, opcode); + break :blk opcode; }, - .compare_flags_unsigned => |cmp_op| { + .compare_flags_unsigned => |cmp_op| blk: { // Here we map to the opposite opcode because the jump is to the false branch. const opcode: u8 = switch (cmp_op) { .gte => 0x82, @@ -1569,9 +1570,9 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { .lte => 0x87, .eq => 0x85, }; - return self.genX86CondBr(inst, opcode); + break :blk opcode; }, - .register => |reg| { + .register => |reg| blk: { // test reg, 1 // TODO detect al, ax, eax try self.code.ensureCapacity(self.code.items.len + 4); @@ -1583,20 +1584,17 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { @as(u8, 0xC0) | (0 << 3) | @truncate(u3, reg.id()), 0x01, }); - return self.genX86CondBr(inst, 0x84); + break :blk 0x84; }, else => return self.fail(inst.base.src, "TODO implement condbr {} when condition is {}", .{ self.target.cpu.arch, @tagName(cond) }), - } + }; + self.code.appendSliceAssumeCapacity(&[_]u8{ 0x0f, opcode }); + const reloc = Reloc{ .rel32 = self.code.items.len }; + self.code.items.len += 4; + break :reloc reloc; }, - else => return self.fail(inst.base.src, "TODO implement condbr for {}", .{self.target.cpu.arch}), - } - } - - fn genX86CondBr(self: *Self, inst: *ir.Inst.CondBr, opcode: u8) !MCValue { - // TODO deal with liveness / deaths condbr's then_entry_deaths and else_entry_deaths - self.code.appendSliceAssumeCapacity(&[_]u8{ 0x0f, opcode }); - const reloc = Reloc{ .rel32 = self.code.items.len }; - self.code.items.len += 4; + else => return self.fail(inst.base.src, "TODO implement condbr {}", .{ self.target.cpu.arch }), + }; try self.genBody(inst.then_body); try self.performReloc(inst.base.src, reloc); try self.genBody(inst.else_body); From e97157f71c79257ab575598d142f3caa785a2a76 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Mon, 24 Aug 2020 23:09:12 -0700 Subject: [PATCH 2/4] stage2: codegen for conditional branching * Move branch-local register and stack allocation metadata to the function-local struct. Conditional branches clone this data in order to restore it after generating machine code for a branch. Branch-local data is now only the instruction table mapping *ir.Inst to MCValue. * Implement conditional branching - Process operand deaths - Handle register and stack allocation metadata * Avoid storing unreferenced or void typed instructions into the branch-local instruction table. * Fix integer types reporting the wrong value for hasCodeGenBits. * Remove the codegen optimization for eliding length-0 jumps. I need to reexamine how this works because it was causing invalid jumps to be emitted. --- src-self-hosted/codegen.zig | 337 +++++++++++++++++++++++------------ src-self-hosted/link/Elf.zig | 9 +- src-self-hosted/type.zig | 4 +- 3 files changed, 235 insertions(+), 115 deletions(-) diff --git a/src-self-hosted/codegen.zig b/src-self-hosted/codegen.zig index 6415fceecf..298873e618 100644 --- a/src-self-hosted/codegen.zig +++ b/src-self-hosted/codegen.zig @@ -273,8 +273,22 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { /// across each runtime branch upon joining. branch_stack: *std.ArrayList(Branch), + /// The key must be canonical register. + registers: std.AutoHashMapUnmanaged(Register, *ir.Inst) = .{}, + free_registers: FreeRegInt = math.maxInt(FreeRegInt), + /// Maps offset to what is stored there. + stack: std.AutoHashMapUnmanaged(u32, StackAllocation) = .{}, + + /// Offset from the stack base, representing the end of the stack frame. + max_end_stack: u32 = 0, + /// Represents the current end stack offset. If there is no existing slot + /// to place a new stack allocation, it goes here, and then bumps `max_end_stack`. + next_stack_offset: u32 = 0, + const MCValue = union(enum) { /// No runtime bits. `void` types, empty structs, u0, enums with 1 tag, etc. + /// TODO Look into deleting this tag and using `dead` instead, since every use + /// of MCValue.none should be instead looking at the type and noticing it is 0 bits. none, /// Control flow will not allow this value to be observed. unreach, @@ -346,71 +360,55 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { const Branch = struct { inst_table: std.AutoHashMapUnmanaged(*ir.Inst, MCValue) = .{}, - /// The key must be canonical register. - registers: std.AutoHashMapUnmanaged(Register, RegisterAllocation) = .{}, - free_registers: FreeRegInt = math.maxInt(FreeRegInt), - - /// Maps offset to what is stored there. - stack: std.AutoHashMapUnmanaged(u32, StackAllocation) = .{}, - /// Offset from the stack base, representing the end of the stack frame. - max_end_stack: u32 = 0, - /// Represents the current end stack offset. If there is no existing slot - /// to place a new stack allocation, it goes here, and then bumps `max_end_stack`. - next_stack_offset: u32 = 0, - - fn markRegUsed(self: *Branch, reg: Register) void { - if (FreeRegInt == u0) return; - const index = reg.allocIndex() orelse return; - const ShiftInt = math.Log2Int(FreeRegInt); - const shift = @intCast(ShiftInt, index); - self.free_registers &= ~(@as(FreeRegInt, 1) << shift); - } - - fn markRegFree(self: *Branch, reg: Register) void { - if (FreeRegInt == u0) return; - const index = reg.allocIndex() orelse return; - const ShiftInt = math.Log2Int(FreeRegInt); - const shift = @intCast(ShiftInt, index); - self.free_registers |= @as(FreeRegInt, 1) << shift; - } - - /// Before calling, must ensureCapacity + 1 on branch.registers. - /// Returns `null` if all registers are allocated. - fn allocReg(self: *Branch, inst: *ir.Inst) ?Register { - const free_index = @ctz(FreeRegInt, self.free_registers); - if (free_index >= callee_preserved_regs.len) { - return null; - } - self.free_registers &= ~(@as(FreeRegInt, 1) << free_index); - const reg = callee_preserved_regs[free_index]; - self.registers.putAssumeCapacityNoClobber(reg, .{ .inst = inst }); - log.debug("alloc {} => {*}", .{reg, inst}); - return reg; - } - - /// Does not track the register. - fn findUnusedReg(self: *Branch) ?Register { - const free_index = @ctz(FreeRegInt, self.free_registers); - if (free_index >= callee_preserved_regs.len) { - return null; - } - return callee_preserved_regs[free_index]; - } fn deinit(self: *Branch, gpa: *Allocator) void { self.inst_table.deinit(gpa); - self.registers.deinit(gpa); - self.stack.deinit(gpa); self.* = undefined; } }; - const RegisterAllocation = struct { - inst: *ir.Inst, - }; + fn markRegUsed(self: *Self, reg: Register) void { + if (FreeRegInt == u0) return; + const index = reg.allocIndex() orelse return; + const ShiftInt = math.Log2Int(FreeRegInt); + const shift = @intCast(ShiftInt, index); + self.free_registers &= ~(@as(FreeRegInt, 1) << shift); + } + + fn markRegFree(self: *Self, reg: Register) void { + if (FreeRegInt == u0) return; + const index = reg.allocIndex() orelse return; + const ShiftInt = math.Log2Int(FreeRegInt); + const shift = @intCast(ShiftInt, index); + self.free_registers |= @as(FreeRegInt, 1) << shift; + } + + /// Before calling, must ensureCapacity + 1 on self.registers. + /// Returns `null` if all registers are allocated. + fn allocReg(self: *Self, inst: *ir.Inst) ?Register { + const free_index = @ctz(FreeRegInt, self.free_registers); + if (free_index >= callee_preserved_regs.len) { + return null; + } + self.free_registers &= ~(@as(FreeRegInt, 1) << free_index); + const reg = callee_preserved_regs[free_index]; + self.registers.putAssumeCapacityNoClobber(reg, inst); + log.debug("alloc {} => {*}", .{reg, inst}); + return reg; + } + + /// Does not track the register. + fn findUnusedReg(self: *Self) ?Register { + const free_index = @ctz(FreeRegInt, self.free_registers); + if (free_index >= callee_preserved_regs.len) { + return null; + } + return callee_preserved_regs[free_index]; + } const StackAllocation = struct { inst: *ir.Inst, + /// TODO do we need size? should be determined by inst.ty.abiSize() size: u32, }; @@ -435,8 +433,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { branch_stack.items[0].deinit(bin_file.allocator); branch_stack.deinit(); } - const branch = try branch_stack.addOne(); - branch.* = .{}; + try branch_stack.append(.{}); const src_data: struct {lbrace_src: usize, rbrace_src: usize, source: []const u8} = blk: { if (module_fn.owner_decl.scope.cast(Module.Scope.File)) |scope_file| { @@ -476,6 +473,8 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { .rbrace_src = src_data.rbrace_src, .source = src_data.source, }; + defer function.registers.deinit(bin_file.allocator); + defer function.stack.deinit(bin_file.allocator); defer function.exitlude_jump_relocs.deinit(bin_file.allocator); var call_info = function.resolveCallingConventionValues(src, fn_type) catch |err| switch (err) { @@ -487,7 +486,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { function.args = call_info.args; function.ret_mcv = call_info.return_value; function.stack_align = call_info.stack_align; - branch.max_end_stack = call_info.stack_byte_count; + function.max_end_stack = call_info.stack_byte_count; function.gen() catch |err| switch (err) { error.CodegenFail => return Result{ .fail = function.err_msg.? }, @@ -523,7 +522,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { try self.dbgSetPrologueEnd(); try self.genBody(self.mod_fn.analysis.success); - const stack_end = self.branch_stack.items[0].max_end_stack; + const stack_end = self.max_end_stack; if (stack_end > math.maxInt(i32)) return self.fail(self.src, "too much stack used in call parameters", .{}); const aligned_stack_end = mem.alignForward(stack_end, self.stack_align); @@ -580,13 +579,15 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { } fn genBody(self: *Self, body: ir.Body) InnerError!void { - const branch = &self.branch_stack.items[self.branch_stack.items.len - 1]; - const inst_table = &branch.inst_table; for (body.instructions) |inst| { + try self.ensureProcessDeathCapacity(@popCount(@TypeOf(inst.deaths), inst.deaths)); + const mcv = try self.genFuncInst(inst); - log.debug("{*} => {}", .{inst, mcv}); - // TODO don't put void or dead things in here - try inst_table.putNoClobber(self.gpa, inst, mcv); + if (!inst.isUnused()) { + log.debug("{*} => {}", .{inst, mcv}); + const branch = &self.branch_stack.items[self.branch_stack.items.len - 1]; + try branch.inst_table.putNoClobber(self.gpa, inst, mcv); + } var i: ir.Inst.DeathsBitIndex = 0; while (inst.getOperand(i)) |operand| : (i += 1) { @@ -628,21 +629,27 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { self.dbg_line.appendAssumeCapacity(DW.LNS_copy); } + /// Asserts there is already capacity to insert into top branch inst_table. fn processDeath(self: *Self, inst: *ir.Inst) void { + if (inst.tag == .constant) return; // Constants are immortal. + const prev_value = self.getResolvedInstValue(inst); const branch = &self.branch_stack.items[self.branch_stack.items.len - 1]; - const entry = branch.inst_table.getEntry(inst) orelse return; - const prev_value = entry.value; - entry.value = .dead; + branch.inst_table.putAssumeCapacity(inst, .dead); switch (prev_value) { .register => |reg| { const canon_reg = toCanonicalReg(reg); - _ = branch.registers.remove(canon_reg); - branch.markRegFree(canon_reg); + _ = self.registers.remove(canon_reg); + self.markRegFree(canon_reg); }, else => {}, // TODO process stack allocation death } } + fn ensureProcessDeathCapacity(self: *Self, additional_count: usize) !void { + const table = &self.branch_stack.items[self.branch_stack.items.len - 1].inst_table; + try table.ensureCapacity(self.gpa, table.items().len + additional_count); + } + /// Adds a Type to the .debug_info at the current position. The bytes will be populated later, /// after codegen for this symbol is done. fn addDbgInfoTypeReloc(self: *Self, ty: Type) !void { @@ -705,13 +712,12 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { fn allocMem(self: *Self, inst: *ir.Inst, abi_size: u32, abi_align: u32) !u32 { if (abi_align > self.stack_align) self.stack_align = abi_align; - const branch = &self.branch_stack.items[self.branch_stack.items.len - 1]; // TODO find a free slot instead of always appending - const offset = mem.alignForwardGeneric(u32, branch.next_stack_offset, abi_align); - branch.next_stack_offset = offset + abi_size; - if (branch.next_stack_offset > branch.max_end_stack) - branch.max_end_stack = branch.next_stack_offset; - try branch.stack.putNoClobber(self.gpa, offset, .{ + const offset = mem.alignForwardGeneric(u32, self.next_stack_offset, abi_align); + self.next_stack_offset = offset + abi_size; + if (self.next_stack_offset > self.max_end_stack) + self.max_end_stack = self.next_stack_offset; + try self.stack.putNoClobber(self.gpa, offset, .{ .inst = inst, .size = abi_size, }); @@ -737,15 +743,14 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { const abi_align = elem_ty.abiAlignment(self.target.*); if (abi_align > self.stack_align) self.stack_align = abi_align; - const branch = &self.branch_stack.items[self.branch_stack.items.len - 1]; if (reg_ok) { // Make sure the type can fit in a register before we try to allocate one. const ptr_bits = arch.ptrBitWidth(); const ptr_bytes: u64 = @divExact(ptr_bits, 8); if (abi_size <= ptr_bytes) { - try branch.registers.ensureCapacity(self.gpa, branch.registers.items().len + 1); - if (branch.allocReg(inst)) |reg| { + try self.registers.ensureCapacity(self.gpa, self.registers.items().len + 1); + if (self.allocReg(inst)) |reg| { return MCValue{ .register = registerAlias(reg, abi_size) }; } } @@ -758,20 +763,18 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { /// allocated. A second call to `copyToTmpRegister` may return the same register. /// This can have a side effect of spilling instructions to the stack to free up a register. fn copyToTmpRegister(self: *Self, src: usize, mcv: MCValue) !Register { - const branch = &self.branch_stack.items[self.branch_stack.items.len - 1]; - - const reg = branch.findUnusedReg() orelse b: { + const reg = self.findUnusedReg() orelse b: { // We'll take over the first register. Move the instruction that was previously // there to a stack allocation. const reg = callee_preserved_regs[0]; - const regs_entry = branch.registers.remove(reg).?; - const spilled_inst = regs_entry.value.inst; + const regs_entry = self.registers.remove(reg).?; + const spilled_inst = regs_entry.value; const stack_mcv = try self.allocRegOrMem(spilled_inst, false); - const inst_entry = branch.inst_table.getEntry(spilled_inst).?; - const reg_mcv = inst_entry.value; + const reg_mcv = self.getResolvedInstValue(spilled_inst); assert(reg == toCanonicalReg(reg_mcv.register)); - inst_entry.value = stack_mcv; + const branch = &self.branch_stack.items[self.branch_stack.items.len - 1]; + try branch.inst_table.put(self.gpa, spilled_inst, stack_mcv); try self.genSetStack(src, spilled_inst.ty, stack_mcv.stack_offset, reg_mcv); break :b reg; @@ -784,22 +787,21 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { /// `reg_owner` is the instruction that gets associated with the register in the register table. /// This can have a side effect of spilling instructions to the stack to free up a register. fn copyToNewRegister(self: *Self, reg_owner: *ir.Inst, mcv: MCValue) !MCValue { - const branch = &self.branch_stack.items[self.branch_stack.items.len - 1]; - try branch.registers.ensureCapacity(self.gpa, branch.registers.items().len + 1); + try self.registers.ensureCapacity(self.gpa, self.registers.items().len + 1); - const reg = branch.allocReg(reg_owner) orelse b: { + const reg = self.allocReg(reg_owner) orelse b: { // We'll take over the first register. Move the instruction that was previously // there to a stack allocation. const reg = callee_preserved_regs[0]; - const regs_entry = branch.registers.getEntry(reg).?; - const spilled_inst = regs_entry.value.inst; - regs_entry.value = .{ .inst = reg_owner }; + const regs_entry = self.registers.getEntry(reg).?; + const spilled_inst = regs_entry.value; + regs_entry.value = reg_owner; const stack_mcv = try self.allocRegOrMem(spilled_inst, false); - const inst_entry = branch.inst_table.getEntry(spilled_inst).?; - const reg_mcv = inst_entry.value; + const reg_mcv = self.getResolvedInstValue(spilled_inst); assert(reg == toCanonicalReg(reg_mcv.register)); - inst_entry.value = stack_mcv; + const branch = &self.branch_stack.items[self.branch_stack.items.len - 1]; + try branch.inst_table.put(self.gpa, spilled_inst, stack_mcv); try self.genSetStack(reg_owner.src, spilled_inst.ty, stack_mcv.stack_offset, reg_mcv); break :b reg; @@ -934,9 +936,8 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { .register => |reg| { // If it's in the registers table, need to associate the register with the // new instruction. - const branch = &self.branch_stack.items[self.branch_stack.items.len - 1]; - if (branch.registers.getEntry(toCanonicalReg(reg))) |entry| { - entry.value = .{ .inst = inst }; + if (self.registers.getEntry(toCanonicalReg(reg))) |entry| { + entry.value = inst; } log.debug("reusing {} => {*}", .{reg, inst}); }, @@ -1231,8 +1232,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { if (inst.base.isUnused()) return MCValue.dead; - const branch = &self.branch_stack.items[self.branch_stack.items.len - 1]; - try branch.registers.ensureCapacity(self.gpa, branch.registers.items().len + 1); + try self.registers.ensureCapacity(self.gpa, self.registers.items().len + 1); const result = self.args[self.arg_index]; self.arg_index += 1; @@ -1240,8 +1240,8 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { const name_with_null = inst.name[0..mem.lenZ(inst.name) + 1]; switch (result) { .register => |reg| { - branch.registers.putAssumeCapacityNoClobber(toCanonicalReg(reg), .{ .inst = &inst.base }); - branch.markRegUsed(reg); + self.registers.putAssumeCapacityNoClobber(toCanonicalReg(reg), &inst.base); + self.markRegUsed(reg); try self.dbg_info.ensureCapacity(self.dbg_info.items.len + 8 + name_with_null.len); self.dbg_info.appendAssumeCapacity(link.File.Elf.abbrev_parameter); @@ -1536,13 +1536,13 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { fn genDbgStmt(self: *Self, inst: *ir.Inst.NoOp) !MCValue { try self.dbgAdvancePCAndLine(inst.base.src); - return MCValue.none; + assert(inst.base.isUnused()); + return MCValue.dead; } fn genCondBr(self: *Self, inst: *ir.Inst.CondBr) !MCValue { const cond = try self.resolveInst(inst.condition); - // TODO deal with liveness / deaths condbr's then_entry_deaths and else_entry_deaths const reloc: Reloc = switch (arch) { .i386, .x86_64 => reloc: { try self.code.ensureCapacity(self.code.items.len + 6); @@ -1595,9 +1595,117 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { }, else => return self.fail(inst.base.src, "TODO implement condbr {}", .{ self.target.cpu.arch }), }; + + // Capture the state of register and stack allocation state so that we can revert to it. + const parent_next_stack_offset = self.next_stack_offset; + const parent_free_registers = self.free_registers; + var parent_stack = try self.stack.clone(self.gpa); + defer parent_stack.deinit(self.gpa); + var parent_registers = try self.registers.clone(self.gpa); + defer parent_registers.deinit(self.gpa); + + try self.branch_stack.append(.{}); + + const then_deaths = inst.thenDeaths(); + try self.ensureProcessDeathCapacity(then_deaths.len); + for (then_deaths) |operand| { + self.processDeath(operand); + } try self.genBody(inst.then_body); + + // Revert to the previous register and stack allocation state. + + var saved_then_branch = self.branch_stack.pop(); + defer saved_then_branch.deinit(self.gpa); + + self.registers.deinit(self.gpa); + self.registers = parent_registers; + parent_registers = .{}; + + self.stack.deinit(self.gpa); + self.stack = parent_stack; + parent_stack = .{}; + + self.next_stack_offset = parent_next_stack_offset; + self.free_registers = parent_free_registers; + try self.performReloc(inst.base.src, reloc); + const else_branch = self.branch_stack.addOneAssumeCapacity(); + else_branch.* = .{}; + + const else_deaths = inst.elseDeaths(); + try self.ensureProcessDeathCapacity(else_deaths.len); + for (else_deaths) |operand| { + self.processDeath(operand); + } try self.genBody(inst.else_body); + + // At this point, each branch will possibly have conflicting values for where + // each instruction is stored. They agree, however, on which instructions are alive/dead. + // We use the first ("then") branch as canonical, and here emit + // instructions into the second ("else") branch to make it conform. + // We continue respect the data structure semantic guarantees of the else_branch so + // that we can use all the code emitting abstractions. This is why at the bottom we + // assert that parent_branch.free_registers equals the saved_then_branch.free_registers + // rather than assigning it. + const parent_branch = &self.branch_stack.items[self.branch_stack.items.len - 2]; + try parent_branch.inst_table.ensureCapacity(self.gpa, parent_branch.inst_table.items().len + + else_branch.inst_table.items().len); + for (else_branch.inst_table.items()) |else_entry| { + const canon_mcv = if (saved_then_branch.inst_table.remove(else_entry.key)) |then_entry| blk: { + // The instruction's MCValue is overridden in both branches. + parent_branch.inst_table.putAssumeCapacity(else_entry.key, then_entry.value); + if (else_entry.value == .dead) { + assert(then_entry.value == .dead); + continue; + } + break :blk then_entry.value; + } else blk: { + if (else_entry.value == .dead) + continue; + // The instruction is only overridden in the else branch. + var i: usize = self.branch_stack.items.len - 2; + while (true) { + i -= 1; + if (self.branch_stack.items[i].inst_table.get(else_entry.key)) |mcv| { + assert(mcv != .dead); + break :blk mcv; + } + } + }; + log.debug("consolidating else_entry {*} {}=>{}", .{else_entry.key, else_entry.value, canon_mcv}); + // TODO make sure the destination stack offset / register does not already have something + // going on there. + try self.setRegOrMem(inst.base.src, else_entry.key.ty, canon_mcv, else_entry.value); + // TODO track the new register / stack allocation + } + try parent_branch.inst_table.ensureCapacity(self.gpa, parent_branch.inst_table.items().len + + saved_then_branch.inst_table.items().len); + for (saved_then_branch.inst_table.items()) |then_entry| { + // We already deleted the items from this table that matched the else_branch. + // So these are all instructions that are only overridden in the then branch. + parent_branch.inst_table.putAssumeCapacity(then_entry.key, then_entry.value); + if (then_entry.value == .dead) + continue; + const parent_mcv = blk: { + var i: usize = self.branch_stack.items.len - 2; + while (true) { + i -= 1; + if (self.branch_stack.items[i].inst_table.get(then_entry.key)) |mcv| { + assert(mcv != .dead); + break :blk mcv; + } + } + }; + log.debug("consolidating then_entry {*} {}=>{}", .{then_entry.key, parent_mcv, then_entry.value}); + // TODO make sure the destination stack offset / register does not already have something + // going on there. + try self.setRegOrMem(inst.base.src, then_entry.key.ty, parent_mcv, then_entry.value); + // TODO track the new register / stack allocation + } + + self.branch_stack.pop().deinit(self.gpa); + return MCValue.unreach; } @@ -1671,11 +1779,12 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { switch (reloc) { .rel32 => |pos| { const amt = self.code.items.len - (pos + 4); - // If it wouldn't jump at all, elide it. - if (amt == 0) { - self.code.items.len -= 5; - return; - } + // Here it would be tempting to implement testing for amt == 0 and then elide the + // jump. However, that will cause a problem because other jumps may assume that they + // can jump to this code. Or maybe I didn't understand something when I was debugging. + // It could be worth another look. Anyway, that's why that isn't done here. Probably the + // best place to elide jumps will be in semantic analysis, by inlining blocks that only + // only have 1 break instruction. const s32_amt = math.cast(i32, amt) catch return self.fail(src, "unable to perform relocation: jump too far", .{}); mem.writeIntLittle(i32, self.code.items[pos..][0..4], s32_amt); @@ -2280,8 +2389,12 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { } fn resolveInst(self: *Self, inst: *ir.Inst) !MCValue { + // If the type has no codegen bits, no need to store it. + if (!inst.ty.hasCodeGenBits()) + return MCValue.none; + // Constants have static lifetimes, so they are always memoized in the outer most table. - if (inst.cast(ir.Inst.Constant)) |const_inst| { + if (inst.castTag(.constant)) |const_inst| { const branch = &self.branch_stack.items[0]; const gop = try branch.inst_table.getOrPut(self.gpa, inst); if (!gop.found_existing) { @@ -2290,6 +2403,10 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { return gop.entry.value; } + return self.getResolvedInstValue(inst); + } + + fn getResolvedInstValue(self: *Self, inst: *ir.Inst) MCValue { // Treat each stack item as a "layer" on top of the previous one. var i: usize = self.branch_stack.items.len; while (true) { diff --git a/src-self-hosted/link/Elf.zig b/src-self-hosted/link/Elf.zig index 1d18344cbb..b67de55dd5 100644 --- a/src-self-hosted/link/Elf.zig +++ b/src-self-hosted/link/Elf.zig @@ -1640,9 +1640,12 @@ pub fn updateDecl(self: *Elf, module: *Module, decl: *Module.Decl) !void { else => false, }; if (is_fn) { - //if (mem.eql(u8, mem.spanZ(decl.name), "add")) { - // typed_value.val.cast(Value.Payload.Function).?.func.dump(module.*); - //} + { + //if (mem.eql(u8, mem.spanZ(decl.name), "add")) { + //} + std.debug.print("\n{}\n", .{decl.name}); + typed_value.val.cast(Value.Payload.Function).?.func.dump(module.*); + } // For functions we need to add a prologue to the debug line program. try dbg_line_buffer.ensureCapacity(26); diff --git a/src-self-hosted/type.zig b/src-self-hosted/type.zig index eb8fa2acd7..82079aa9f7 100644 --- a/src-self-hosted/type.zig +++ b/src-self-hosted/type.zig @@ -771,8 +771,8 @@ pub const Type = extern union { .array => self.elemType().hasCodeGenBits() and self.arrayLen() != 0, .array_u8 => self.arrayLen() != 0, .array_sentinel, .single_const_pointer, .single_mut_pointer, .many_const_pointer, .many_mut_pointer, .c_const_pointer, .c_mut_pointer, .const_slice, .mut_slice, .pointer => self.elemType().hasCodeGenBits(), - .int_signed => self.cast(Payload.IntSigned).?.bits == 0, - .int_unsigned => self.cast(Payload.IntUnsigned).?.bits == 0, + .int_signed => self.cast(Payload.IntSigned).?.bits != 0, + .int_unsigned => self.cast(Payload.IntUnsigned).?.bits != 0, .error_union => { const payload = self.cast(Payload.ErrorUnion).?; From 237d9a105d5eec82aefc63da8e844c47d8990eea Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Tue, 25 Aug 2020 21:31:03 -0700 Subject: [PATCH 3/4] stage2: support debug dumping zir as a build option So that it's not needed to manually comment and uncomment the debug code. --- build.zig | 2 ++ src-self-hosted/link/Elf.zig | 14 +++++++++----- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/build.zig b/build.zig index 9c50025375..869fb3f208 100644 --- a/build.zig +++ b/build.zig @@ -83,6 +83,7 @@ pub fn build(b: *Builder) !void { } const log_scopes = b.option([]const []const u8, "log", "Which log scopes to enable") orelse &[0][]const u8{}; + const zir_dumps = b.option([]const []const u8, "dump-zir", "Which functions to dump ZIR for before codegen") orelse &[0][]const u8{}; const opt_version_string = b.option([]const u8, "version-string", "Override Zig version string. Default is to find out with git."); const version = if (opt_version_string) |version| version else v: { @@ -103,6 +104,7 @@ pub fn build(b: *Builder) !void { exe.addBuildOption([]const u8, "version", version); exe.addBuildOption([]const []const u8, "log_scopes", log_scopes); + exe.addBuildOption([]const []const u8, "zir_dumps", zir_dumps); exe.addBuildOption(bool, "enable_tracy", tracy != null); if (tracy) |tracy_path| { const client_cpp = fs.path.join( diff --git a/src-self-hosted/link/Elf.zig b/src-self-hosted/link/Elf.zig index b67de55dd5..f2a3218118 100644 --- a/src-self-hosted/link/Elf.zig +++ b/src-self-hosted/link/Elf.zig @@ -17,6 +17,7 @@ const Type = @import("../type.zig").Type; const link = @import("../link.zig"); const File = link.File; const Elf = @This(); +const build_options = @import("build_options"); const default_entry_addr = 0x8000000; @@ -1640,11 +1641,14 @@ pub fn updateDecl(self: *Elf, module: *Module, decl: *Module.Decl) !void { else => false, }; if (is_fn) { - { - //if (mem.eql(u8, mem.spanZ(decl.name), "add")) { - //} - std.debug.print("\n{}\n", .{decl.name}); - typed_value.val.cast(Value.Payload.Function).?.func.dump(module.*); + const zir_dumps = if (std.builtin.is_test) &[0][]const u8{} else build_options.zir_dumps; + if (zir_dumps.len != 0) { + for (zir_dumps) |fn_name| { + if (mem.eql(u8, mem.spanZ(decl.name), fn_name)) { + std.debug.print("\n{}\n", .{decl.name}); + typed_value.val.cast(Value.Payload.Function).?.func.dump(module.*); + } + } } // For functions we need to add a prologue to the debug line program. From 0c5faa61aebca4215683d233dd52bf3a7a5d1db6 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Wed, 26 Aug 2020 01:00:04 -0700 Subject: [PATCH 4/4] stage2: codegen: fix reuseOperand not doing death bookkeeping --- src-self-hosted/codegen.zig | 7 ++++- src-self-hosted/zir.zig | 12 ++++++- test/stage2/test.zig | 62 +++++++++++++++++++++++++++++++++++++ 3 files changed, 79 insertions(+), 2 deletions(-) diff --git a/src-self-hosted/codegen.zig b/src-self-hosted/codegen.zig index 298873e618..b282c2011b 100644 --- a/src-self-hosted/codegen.zig +++ b/src-self-hosted/codegen.zig @@ -632,6 +632,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { /// Asserts there is already capacity to insert into top branch inst_table. fn processDeath(self: *Self, inst: *ir.Inst) void { if (inst.tag == .constant) return; // Constants are immortal. + // When editing this function, note that the logic must synchronize with `reuseOperand`. const prev_value = self.getResolvedInstValue(inst); const branch = &self.branch_stack.items[self.branch_stack.items.len - 1]; branch.inst_table.putAssumeCapacity(inst, .dead); @@ -951,6 +952,10 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { // Prevent the operand deaths processing code from deallocating it. inst.clearOperandDeath(op_index); + // That makes us responsible for doing the rest of the stuff that processDeath would have done. + const branch = &self.branch_stack.items[self.branch_stack.items.len - 1]; + branch.inst_table.putAssumeCapacity(inst.getOperand(op_index).?, .dead); + return true; } @@ -1666,7 +1671,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { // The instruction is only overridden in the else branch. var i: usize = self.branch_stack.items.len - 2; while (true) { - i -= 1; + i -= 1; // If this overflows, the question is: why wasn't the instruction marked dead? if (self.branch_stack.items[i].inst_table.get(else_entry.key)) |mcv| { assert(mcv != .dead); break :blk mcv; diff --git a/src-self-hosted/zir.zig b/src-self-hosted/zir.zig index 3557c88f4e..c552f28553 100644 --- a/src-self-hosted/zir.zig +++ b/src-self-hosted/zir.zig @@ -954,6 +954,7 @@ pub const Module = struct { pub const MetaData = struct { deaths: ir.Inst.DeathsInt, + addr: usize, }; pub const BodyMetaData = struct { @@ -1152,6 +1153,12 @@ const Writer = struct { try self.writeInstToStream(stream, inst); if (self.module.metadata.get(inst)) |metadata| { try stream.print(" ; deaths=0b{b}", .{metadata.deaths}); + // This is conditionally compiled in because addresses mess up the tests due + // to Address Space Layout Randomization. It's super useful when debugging + // codegen.zig though. + if (!std.builtin.is_test) { + try stream.print(" 0x{x}", .{metadata.addr}); + } } self.indent -= 2; try stream.writeByte('\n'); @@ -2417,7 +2424,10 @@ const EmitZIR = struct { .varptr => @panic("TODO"), }; - try self.metadata.put(new_inst, .{ .deaths = inst.deaths }); + try self.metadata.put(new_inst, .{ + .deaths = inst.deaths, + .addr = @ptrToInt(inst), + }); try instructions.append(new_inst); try inst_table.put(inst, new_inst); } diff --git a/test/stage2/test.zig b/test/stage2/test.zig index fee8886ddb..1a0d801ac3 100644 --- a/test/stage2/test.zig +++ b/test/stage2/test.zig @@ -694,6 +694,68 @@ pub fn addCases(ctx: *TestContext) !void { "", ); + // Reusing the registers of dead operands playing nicely with conditional branching. + case.addCompareOutput( + \\export fn _start() noreturn { + \\ assert(add(3, 4) == 791); + \\ assert(add(4, 3) == 79); + \\ + \\ exit(); + \\} + \\ + \\fn add(a: u32, b: u32) u32 { + \\ const x: u32 = if (a < b) blk: { + \\ const c = a + b; // 7 + \\ const d = a + c; // 10 + \\ const e = d + b; // 14 + \\ const f = d + e; // 24 + \\ const g = e + f; // 38 + \\ const h = f + g; // 62 + \\ const i = g + h; // 100 + \\ const j = i + d; // 110 + \\ const k = i + j; // 210 + \\ const l = k + c; // 217 + \\ const m = l + d; // 227 + \\ const n = m + e; // 241 + \\ const o = n + f; // 265 + \\ const p = o + g; // 303 + \\ const q = p + h; // 365 + \\ const r = q + i; // 465 + \\ const s = r + j; // 575 + \\ const t = s + k; // 785 + \\ break :blk t; + \\ } else blk: { + \\ const t = b + b + a; // 10 + \\ const c = a + t; // 14 + \\ const d = c + t; // 24 + \\ const e = d + t; // 34 + \\ const f = e + t; // 44 + \\ const g = f + t; // 54 + \\ const h = c + g; // 68 + \\ break :blk h + b; // 71 + \\ }; + \\ const y = x + a; // 788, 75 + \\ const z = y + a; // 791, 79 + \\ return z; + \\} + \\ + \\pub fn assert(ok: bool) void { + \\ if (!ok) unreachable; // assertion failure + \\} + \\ + \\fn exit() noreturn { + \\ asm volatile ("syscall" + \\ : + \\ : [number] "{rax}" (231), + \\ [arg1] "{rdi}" (0) + \\ : "rcx", "r11", "memory" + \\ ); + \\ unreachable; + \\} + , + "", + ); + // Character literals and multiline strings. case.addCompareOutput( \\export fn _start() noreturn {