From b68fa9970b5cf5bb5954da476cc8679512ce489b Mon Sep 17 00:00:00 2001
From: Andrew Kelley <andrew@ziglang.org>
Date: Mon, 24 Aug 2020 16:43:20 -0700
Subject: [PATCH 1/4] stage2 codegen: Rework genCondBr

so that the arch-independent logic isn't buried and duplicated.
---
 src-self-hosted/astgen.zig  |  3 ++-
 src-self-hosted/codegen.zig | 40 ++++++++++++++++++-------------------
 2 files changed, 21 insertions(+), 22 deletions(-)

diff --git a/src-self-hosted/astgen.zig b/src-self-hosted/astgen.zig
index 0d8e0dc874..23e8c9527a 100644
--- a/src-self-hosted/astgen.zig
+++ b/src-self-hosted/astgen.zig
@@ -13,7 +13,8 @@ const Scope = Module.Scope;
 const InnerError = Module.InnerError;
 
 pub const ResultLoc = union(enum) {
-    /// The expression is the right-hand side of assignment to `_`.
+    /// The expression is the right-hand side of assignment to `_`. Only the side-effects of the
+    /// expression should be generated.
     discard,
     /// The expression has an inferred type, and it will be evaluated as an rvalue.
     none,
diff --git a/src-self-hosted/codegen.zig b/src-self-hosted/codegen.zig
index cb12211206..6415fceecf 100644
--- a/src-self-hosted/codegen.zig
+++ b/src-self-hosted/codegen.zig
@@ -1540,14 +1540,15 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
         }
 
         fn genCondBr(self: *Self, inst: *ir.Inst.CondBr) !MCValue {
-            // TODO Rework this so that the arch-independent logic isn't buried and duplicated.
-            switch (arch) {
-                .x86_64 => {
+            const cond = try self.resolveInst(inst.condition);
+
+            // TODO deal with liveness / deaths condbr's then_entry_deaths and else_entry_deaths
+            const reloc: Reloc = switch (arch) {
+                .i386, .x86_64 => reloc: {
                     try self.code.ensureCapacity(self.code.items.len + 6);
 
-                    const cond = try self.resolveInst(inst.condition);
-                    switch (cond) {
-                        .compare_flags_signed => |cmp_op| {
+                    const opcode: u8 = switch (cond) {
+                        .compare_flags_signed => |cmp_op| blk: {
                             // Here we map to the opposite opcode because the jump is to the false branch.
                             const opcode: u8 = switch (cmp_op) {
                                 .gte => 0x8c,
@@ -1557,9 +1558,9 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
                                 .lte => 0x8f,
                                 .eq => 0x85,
                             };
-                            return self.genX86CondBr(inst, opcode);
+                            break :blk opcode;
                         },
-                        .compare_flags_unsigned => |cmp_op| {
+                        .compare_flags_unsigned => |cmp_op| blk: {
                             // Here we map to the opposite opcode because the jump is to the false branch.
                             const opcode: u8 = switch (cmp_op) {
                                 .gte => 0x82,
@@ -1569,9 +1570,9 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
                                 .lte => 0x87,
                                 .eq => 0x85,
                             };
-                            return self.genX86CondBr(inst, opcode);
+                            break :blk opcode;
                         },
-                        .register => |reg| {
+                        .register => |reg| blk: {
                             // test reg, 1
                             // TODO detect al, ax, eax
                             try self.code.ensureCapacity(self.code.items.len + 4);
@@ -1583,20 +1584,17 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
                                 @as(u8, 0xC0) | (0 << 3) | @truncate(u3, reg.id()),
                                 0x01,
                             });
-                            return self.genX86CondBr(inst, 0x84);
+                            break :blk 0x84;
                         },
                         else => return self.fail(inst.base.src, "TODO implement condbr {} when condition is {}", .{ self.target.cpu.arch, @tagName(cond) }),
-                    }
+                    };
+                    self.code.appendSliceAssumeCapacity(&[_]u8{ 0x0f, opcode });
+                    const reloc = Reloc{ .rel32 = self.code.items.len };
+                    self.code.items.len += 4;
+                    break :reloc reloc;
                 },
-                else => return self.fail(inst.base.src, "TODO implement condbr for {}", .{self.target.cpu.arch}),
-            }
-        }
-
-        fn genX86CondBr(self: *Self, inst: *ir.Inst.CondBr, opcode: u8) !MCValue {
-            // TODO deal with liveness / deaths condbr's then_entry_deaths and else_entry_deaths
-            self.code.appendSliceAssumeCapacity(&[_]u8{ 0x0f, opcode });
-            const reloc = Reloc{ .rel32 = self.code.items.len };
-            self.code.items.len += 4;
+                else => return self.fail(inst.base.src, "TODO implement condbr {}", .{ self.target.cpu.arch }),
+            };
             try self.genBody(inst.then_body);
             try self.performReloc(inst.base.src, reloc);
             try self.genBody(inst.else_body);

From e97157f71c79257ab575598d142f3caa785a2a76 Mon Sep 17 00:00:00 2001
From: Andrew Kelley <andrew@ziglang.org>
Date: Mon, 24 Aug 2020 23:09:12 -0700
Subject: [PATCH 2/4] stage2: codegen for conditional branching

 * Move branch-local register and stack allocation metadata to the
   function-local struct. Conditional branches clone this data in order
   to restore it after generating machine code for a branch.
   Branch-local data is now only the instruction table mapping *ir.Inst
   to MCValue.
 * Implement conditional branching
   - Process operand deaths
   - Handle register and stack allocation metadata
 * Avoid storing unreferenced or void typed instructions into
   the branch-local instruction table.
 * Fix integer types reporting the wrong value for hasCodeGenBits.
 * Remove the codegen optimization for eliding length-0 jumps. I need to
   reexamine how this works because it was causing invalid jumps to be
   emitted.
---
 src-self-hosted/codegen.zig  | 337 +++++++++++++++++++++++------------
 src-self-hosted/link/Elf.zig |   9 +-
 src-self-hosted/type.zig     |   4 +-
 3 files changed, 235 insertions(+), 115 deletions(-)

diff --git a/src-self-hosted/codegen.zig b/src-self-hosted/codegen.zig
index 6415fceecf..298873e618 100644
--- a/src-self-hosted/codegen.zig
+++ b/src-self-hosted/codegen.zig
@@ -273,8 +273,22 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
         /// across each runtime branch upon joining.
         branch_stack: *std.ArrayList(Branch),
 
+        /// The key must be canonical register.
+        registers: std.AutoHashMapUnmanaged(Register, *ir.Inst) = .{},
+        free_registers: FreeRegInt = math.maxInt(FreeRegInt),
+        /// Maps offset to what is stored there.
+        stack: std.AutoHashMapUnmanaged(u32, StackAllocation) = .{},
+
+        /// Offset from the stack base, representing the end of the stack frame.
+        max_end_stack: u32 = 0,
+        /// Represents the current end stack offset. If there is no existing slot
+        /// to place a new stack allocation, it goes here, and then bumps `max_end_stack`.
+        next_stack_offset: u32 = 0,
+
         const MCValue = union(enum) {
             /// No runtime bits. `void` types, empty structs, u0, enums with 1 tag, etc.
+            /// TODO Look into deleting this tag and using `dead` instead, since every use
+            /// of MCValue.none should be instead looking at the type and noticing it is 0 bits.
             none,
             /// Control flow will not allow this value to be observed.
             unreach,
@@ -346,71 +360,55 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
 
         const Branch = struct {
             inst_table: std.AutoHashMapUnmanaged(*ir.Inst, MCValue) = .{},
-            /// The key must be canonical register.
-            registers: std.AutoHashMapUnmanaged(Register, RegisterAllocation) = .{},
-            free_registers: FreeRegInt = math.maxInt(FreeRegInt),
-
-            /// Maps offset to what is stored there.
-            stack: std.AutoHashMapUnmanaged(u32, StackAllocation) = .{},
-            /// Offset from the stack base, representing the end of the stack frame.
-            max_end_stack: u32 = 0,
-            /// Represents the current end stack offset. If there is no existing slot
-            /// to place a new stack allocation, it goes here, and then bumps `max_end_stack`.
-            next_stack_offset: u32 = 0,
-
-            fn markRegUsed(self: *Branch, reg: Register) void {
-                if (FreeRegInt == u0) return;
-                const index = reg.allocIndex() orelse return;
-                const ShiftInt = math.Log2Int(FreeRegInt);
-                const shift = @intCast(ShiftInt, index);
-                self.free_registers &= ~(@as(FreeRegInt, 1) << shift);
-            }
-
-            fn markRegFree(self: *Branch, reg: Register) void {
-                if (FreeRegInt == u0) return;
-                const index = reg.allocIndex() orelse return;
-                const ShiftInt = math.Log2Int(FreeRegInt);
-                const shift = @intCast(ShiftInt, index);
-                self.free_registers |= @as(FreeRegInt, 1) << shift;
-            }
-
-            /// Before calling, must ensureCapacity + 1 on branch.registers.
-            /// Returns `null` if all registers are allocated.
-            fn allocReg(self: *Branch, inst: *ir.Inst) ?Register {
-                const free_index = @ctz(FreeRegInt, self.free_registers);
-                if (free_index >= callee_preserved_regs.len) {
-                    return null;
-                }
-                self.free_registers &= ~(@as(FreeRegInt, 1) << free_index);
-                const reg = callee_preserved_regs[free_index];
-                self.registers.putAssumeCapacityNoClobber(reg, .{ .inst = inst });
-                log.debug("alloc {} => {*}", .{reg, inst});
-                return reg;
-            }
-
-            /// Does not track the register.
-            fn findUnusedReg(self: *Branch) ?Register {
-                const free_index = @ctz(FreeRegInt, self.free_registers);
-                if (free_index >= callee_preserved_regs.len) {
-                    return null;
-                }
-                return callee_preserved_regs[free_index];
-            }
 
             fn deinit(self: *Branch, gpa: *Allocator) void {
                 self.inst_table.deinit(gpa);
-                self.registers.deinit(gpa);
-                self.stack.deinit(gpa);
                 self.* = undefined;
             }
         };
 
-        const RegisterAllocation = struct {
-            inst: *ir.Inst,
-        };
+        fn markRegUsed(self: *Self, reg: Register) void {
+            if (FreeRegInt == u0) return;
+            const index = reg.allocIndex() orelse return;
+            const ShiftInt = math.Log2Int(FreeRegInt);
+            const shift = @intCast(ShiftInt, index);
+            self.free_registers &= ~(@as(FreeRegInt, 1) << shift);
+        }
+
+        fn markRegFree(self: *Self, reg: Register) void {
+            if (FreeRegInt == u0) return;
+            const index = reg.allocIndex() orelse return;
+            const ShiftInt = math.Log2Int(FreeRegInt);
+            const shift = @intCast(ShiftInt, index);
+            self.free_registers |= @as(FreeRegInt, 1) << shift;
+        }
+
+        /// Before calling, must ensureCapacity + 1 on self.registers.
+        /// Returns `null` if all registers are allocated.
+        fn allocReg(self: *Self, inst: *ir.Inst) ?Register {
+            const free_index = @ctz(FreeRegInt, self.free_registers);
+            if (free_index >= callee_preserved_regs.len) {
+                return null;
+            }
+            self.free_registers &= ~(@as(FreeRegInt, 1) << free_index);
+            const reg = callee_preserved_regs[free_index];
+            self.registers.putAssumeCapacityNoClobber(reg, inst);
+            log.debug("alloc {} => {*}", .{reg, inst});
+            return reg;
+        }
+
+        /// Does not track the register.
+        fn findUnusedReg(self: *Self) ?Register {
+            const free_index = @ctz(FreeRegInt, self.free_registers);
+            if (free_index >= callee_preserved_regs.len) {
+                return null;
+            }
+            return callee_preserved_regs[free_index];
+        }
 
         const StackAllocation = struct {
             inst: *ir.Inst,
+            /// TODO do we need size? should be determined by inst.ty.abiSize()
             size: u32,
         };
 
@@ -435,8 +433,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
                 branch_stack.items[0].deinit(bin_file.allocator);
                 branch_stack.deinit();
             }
-            const branch = try branch_stack.addOne();
-            branch.* = .{};
+            try branch_stack.append(.{});
 
             const src_data: struct {lbrace_src: usize, rbrace_src: usize, source: []const u8} = blk: {
                 if (module_fn.owner_decl.scope.cast(Module.Scope.File)) |scope_file| {
@@ -476,6 +473,8 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
                 .rbrace_src = src_data.rbrace_src,
                 .source = src_data.source,
             };
+            defer function.registers.deinit(bin_file.allocator);
+            defer function.stack.deinit(bin_file.allocator);
             defer function.exitlude_jump_relocs.deinit(bin_file.allocator);
 
             var call_info = function.resolveCallingConventionValues(src, fn_type) catch |err| switch (err) {
@@ -487,7 +486,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
             function.args = call_info.args;
             function.ret_mcv = call_info.return_value;
             function.stack_align = call_info.stack_align;
-            branch.max_end_stack = call_info.stack_byte_count;
+            function.max_end_stack = call_info.stack_byte_count;
 
             function.gen() catch |err| switch (err) {
                 error.CodegenFail => return Result{ .fail = function.err_msg.? },
@@ -523,7 +522,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
                         try self.dbgSetPrologueEnd();
                         try self.genBody(self.mod_fn.analysis.success);
 
-                        const stack_end = self.branch_stack.items[0].max_end_stack;
+                        const stack_end = self.max_end_stack;
                         if (stack_end > math.maxInt(i32))
                             return self.fail(self.src, "too much stack used in call parameters", .{});
                         const aligned_stack_end = mem.alignForward(stack_end, self.stack_align);
@@ -580,13 +579,15 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
         }
 
         fn genBody(self: *Self, body: ir.Body) InnerError!void {
-            const branch = &self.branch_stack.items[self.branch_stack.items.len - 1];
-            const inst_table = &branch.inst_table;
             for (body.instructions) |inst| {
+                try self.ensureProcessDeathCapacity(@popCount(@TypeOf(inst.deaths), inst.deaths));
+
                 const mcv = try self.genFuncInst(inst);
-                log.debug("{*} => {}", .{inst, mcv});
-                // TODO don't put void or dead things in here
-                try inst_table.putNoClobber(self.gpa, inst, mcv);
+                if (!inst.isUnused()) {
+                    log.debug("{*} => {}", .{inst, mcv});
+                    const branch = &self.branch_stack.items[self.branch_stack.items.len - 1];
+                    try branch.inst_table.putNoClobber(self.gpa, inst, mcv);
+                }
 
                 var i: ir.Inst.DeathsBitIndex = 0;
                 while (inst.getOperand(i)) |operand| : (i += 1) {
@@ -628,21 +629,27 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
             self.dbg_line.appendAssumeCapacity(DW.LNS_copy);
         }
 
+        /// Asserts there is already capacity to insert into top branch inst_table.
         fn processDeath(self: *Self, inst: *ir.Inst) void {
+            if (inst.tag == .constant) return; // Constants are immortal.
+            const prev_value = self.getResolvedInstValue(inst);
             const branch = &self.branch_stack.items[self.branch_stack.items.len - 1];
-            const entry = branch.inst_table.getEntry(inst) orelse return;
-            const prev_value = entry.value;
-            entry.value = .dead;
+            branch.inst_table.putAssumeCapacity(inst, .dead);
             switch (prev_value) {
                 .register => |reg| {
                     const canon_reg = toCanonicalReg(reg);
-                    _ = branch.registers.remove(canon_reg);
-                    branch.markRegFree(canon_reg);
+                    _ = self.registers.remove(canon_reg);
+                    self.markRegFree(canon_reg);
                 },
                 else => {}, // TODO process stack allocation death
             }
         }
 
+        fn ensureProcessDeathCapacity(self: *Self, additional_count: usize) !void {
+            const table = &self.branch_stack.items[self.branch_stack.items.len - 1].inst_table;
+            try table.ensureCapacity(self.gpa, table.items().len + additional_count);
+        }
+
         /// Adds a Type to the .debug_info at the current position. The bytes will be populated later,
         /// after codegen for this symbol is done.
         fn addDbgInfoTypeReloc(self: *Self, ty: Type) !void {
@@ -705,13 +712,12 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
         fn allocMem(self: *Self, inst: *ir.Inst, abi_size: u32, abi_align: u32) !u32 {
             if (abi_align > self.stack_align)
                 self.stack_align = abi_align;
-            const branch = &self.branch_stack.items[self.branch_stack.items.len - 1];
             // TODO find a free slot instead of always appending
-            const offset = mem.alignForwardGeneric(u32, branch.next_stack_offset, abi_align);
-            branch.next_stack_offset = offset + abi_size;
-            if (branch.next_stack_offset > branch.max_end_stack)
-                branch.max_end_stack = branch.next_stack_offset;
-            try branch.stack.putNoClobber(self.gpa, offset, .{
+            const offset = mem.alignForwardGeneric(u32, self.next_stack_offset, abi_align);
+            self.next_stack_offset = offset + abi_size;
+            if (self.next_stack_offset > self.max_end_stack)
+                self.max_end_stack = self.next_stack_offset;
+            try self.stack.putNoClobber(self.gpa, offset, .{
                 .inst = inst,
                 .size = abi_size,
             });
@@ -737,15 +743,14 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
             const abi_align = elem_ty.abiAlignment(self.target.*);
             if (abi_align > self.stack_align)
                 self.stack_align = abi_align;
-            const branch = &self.branch_stack.items[self.branch_stack.items.len - 1];
 
             if (reg_ok) {
                 // Make sure the type can fit in a register before we try to allocate one.
                 const ptr_bits = arch.ptrBitWidth();
                 const ptr_bytes: u64 = @divExact(ptr_bits, 8);
                 if (abi_size <= ptr_bytes) {
-                    try branch.registers.ensureCapacity(self.gpa, branch.registers.items().len + 1);
-                    if (branch.allocReg(inst)) |reg| {
+                    try self.registers.ensureCapacity(self.gpa, self.registers.items().len + 1);
+                    if (self.allocReg(inst)) |reg| {
                         return MCValue{ .register = registerAlias(reg, abi_size) };
                     }
                 }
@@ -758,20 +763,18 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
         /// allocated. A second call to `copyToTmpRegister` may return the same register.
         /// This can have a side effect of spilling instructions to the stack to free up a register.
         fn copyToTmpRegister(self: *Self, src: usize, mcv: MCValue) !Register {
-            const branch = &self.branch_stack.items[self.branch_stack.items.len - 1];
-
-            const reg = branch.findUnusedReg() orelse b: {
+            const reg = self.findUnusedReg() orelse b: {
                 // We'll take over the first register. Move the instruction that was previously
                 // there to a stack allocation.
                 const reg = callee_preserved_regs[0];
-                const regs_entry = branch.registers.remove(reg).?;
-                const spilled_inst = regs_entry.value.inst;
+                const regs_entry = self.registers.remove(reg).?;
+                const spilled_inst = regs_entry.value;
 
                 const stack_mcv = try self.allocRegOrMem(spilled_inst, false);
-                const inst_entry = branch.inst_table.getEntry(spilled_inst).?;
-                const reg_mcv = inst_entry.value;
+                const reg_mcv = self.getResolvedInstValue(spilled_inst);
                 assert(reg == toCanonicalReg(reg_mcv.register));
-                inst_entry.value = stack_mcv;
+                const branch = &self.branch_stack.items[self.branch_stack.items.len - 1];
+                try branch.inst_table.put(self.gpa, spilled_inst, stack_mcv);
                 try self.genSetStack(src, spilled_inst.ty, stack_mcv.stack_offset, reg_mcv);
 
                 break :b reg;
@@ -784,22 +787,21 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
         /// `reg_owner` is the instruction that gets associated with the register in the register table.
         /// This can have a side effect of spilling instructions to the stack to free up a register.
         fn copyToNewRegister(self: *Self, reg_owner: *ir.Inst, mcv: MCValue) !MCValue {
-            const branch = &self.branch_stack.items[self.branch_stack.items.len - 1];
-            try branch.registers.ensureCapacity(self.gpa, branch.registers.items().len + 1);
+            try self.registers.ensureCapacity(self.gpa, self.registers.items().len + 1);
 
-            const reg = branch.allocReg(reg_owner) orelse b: {
+            const reg = self.allocReg(reg_owner) orelse b: {
                 // We'll take over the first register. Move the instruction that was previously
                 // there to a stack allocation.
                 const reg = callee_preserved_regs[0];
-                const regs_entry = branch.registers.getEntry(reg).?;
-                const spilled_inst = regs_entry.value.inst;
-                regs_entry.value = .{ .inst = reg_owner };
+                const regs_entry = self.registers.getEntry(reg).?;
+                const spilled_inst = regs_entry.value;
+                regs_entry.value = reg_owner;
 
                 const stack_mcv = try self.allocRegOrMem(spilled_inst, false);
-                const inst_entry = branch.inst_table.getEntry(spilled_inst).?;
-                const reg_mcv = inst_entry.value;
+                const reg_mcv = self.getResolvedInstValue(spilled_inst);
                 assert(reg == toCanonicalReg(reg_mcv.register));
-                inst_entry.value = stack_mcv;
+                const branch = &self.branch_stack.items[self.branch_stack.items.len - 1];
+                try branch.inst_table.put(self.gpa, spilled_inst, stack_mcv);
                 try self.genSetStack(reg_owner.src, spilled_inst.ty, stack_mcv.stack_offset, reg_mcv);
 
                 break :b reg;
@@ -934,9 +936,8 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
                 .register => |reg| {
                     // If it's in the registers table, need to associate the register with the
                     // new instruction.
-                    const branch = &self.branch_stack.items[self.branch_stack.items.len - 1];
-                    if (branch.registers.getEntry(toCanonicalReg(reg))) |entry| {
-                        entry.value = .{ .inst = inst };
+                    if (self.registers.getEntry(toCanonicalReg(reg))) |entry| {
+                        entry.value = inst;
                     }
                     log.debug("reusing {} => {*}", .{reg, inst});
                 },
@@ -1231,8 +1232,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
             if (inst.base.isUnused())
                 return MCValue.dead;
 
-            const branch = &self.branch_stack.items[self.branch_stack.items.len - 1];
-            try branch.registers.ensureCapacity(self.gpa, branch.registers.items().len + 1);
+            try self.registers.ensureCapacity(self.gpa, self.registers.items().len + 1);
 
             const result = self.args[self.arg_index];
             self.arg_index += 1;
@@ -1240,8 +1240,8 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
             const name_with_null = inst.name[0..mem.lenZ(inst.name) + 1];
             switch (result) {
                 .register => |reg| {
-                    branch.registers.putAssumeCapacityNoClobber(toCanonicalReg(reg), .{ .inst = &inst.base });
-                    branch.markRegUsed(reg);
+                    self.registers.putAssumeCapacityNoClobber(toCanonicalReg(reg), &inst.base);
+                    self.markRegUsed(reg);
 
                     try self.dbg_info.ensureCapacity(self.dbg_info.items.len + 8 + name_with_null.len);
                     self.dbg_info.appendAssumeCapacity(link.File.Elf.abbrev_parameter);
@@ -1536,13 +1536,13 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
 
         fn genDbgStmt(self: *Self, inst: *ir.Inst.NoOp) !MCValue {
             try self.dbgAdvancePCAndLine(inst.base.src);
-            return MCValue.none;
+            assert(inst.base.isUnused());
+            return MCValue.dead;
         }
 
         fn genCondBr(self: *Self, inst: *ir.Inst.CondBr) !MCValue {
             const cond = try self.resolveInst(inst.condition);
 
-            // TODO deal with liveness / deaths condbr's then_entry_deaths and else_entry_deaths
             const reloc: Reloc = switch (arch) {
                 .i386, .x86_64 => reloc: {
                     try self.code.ensureCapacity(self.code.items.len + 6);
@@ -1595,9 +1595,117 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
                 },
                 else => return self.fail(inst.base.src, "TODO implement condbr {}", .{ self.target.cpu.arch }),
             };
+
+            // Capture the state of register and stack allocation state so that we can revert to it.
+            const parent_next_stack_offset = self.next_stack_offset;
+            const parent_free_registers = self.free_registers;
+            var parent_stack = try self.stack.clone(self.gpa);
+            defer parent_stack.deinit(self.gpa);
+            var parent_registers = try self.registers.clone(self.gpa);
+            defer parent_registers.deinit(self.gpa);
+
+            try self.branch_stack.append(.{});
+
+            const then_deaths = inst.thenDeaths();
+            try self.ensureProcessDeathCapacity(then_deaths.len);
+            for (then_deaths) |operand| {
+                self.processDeath(operand);
+            }
             try self.genBody(inst.then_body);
+
+            // Revert to the previous register and stack allocation state.
+
+            var saved_then_branch = self.branch_stack.pop();
+            defer saved_then_branch.deinit(self.gpa);
+
+            self.registers.deinit(self.gpa);
+            self.registers = parent_registers;
+            parent_registers = .{};
+
+            self.stack.deinit(self.gpa);
+            self.stack = parent_stack;
+            parent_stack = .{};
+
+            self.next_stack_offset = parent_next_stack_offset;
+            self.free_registers = parent_free_registers;
+
             try self.performReloc(inst.base.src, reloc);
+            const else_branch = self.branch_stack.addOneAssumeCapacity();
+            else_branch.* = .{};
+
+            const else_deaths = inst.elseDeaths();
+            try self.ensureProcessDeathCapacity(else_deaths.len);
+            for (else_deaths) |operand| {
+                self.processDeath(operand);
+            }
             try self.genBody(inst.else_body);
+
+            // At this point, each branch will possibly have conflicting values for where
+            // each instruction is stored. They agree, however, on which instructions are alive/dead.
+            // We use the first ("then") branch as canonical, and here emit
+            // instructions into the second ("else") branch to make it conform.
+            // We continue respect the data structure semantic guarantees of the else_branch so
+            // that we can use all the code emitting abstractions. This is why at the bottom we
+            // assert that parent_branch.free_registers equals the saved_then_branch.free_registers
+            // rather than assigning it.
+            const parent_branch = &self.branch_stack.items[self.branch_stack.items.len - 2];
+            try parent_branch.inst_table.ensureCapacity(self.gpa, parent_branch.inst_table.items().len +
+                else_branch.inst_table.items().len);
+            for (else_branch.inst_table.items()) |else_entry| {
+                const canon_mcv = if (saved_then_branch.inst_table.remove(else_entry.key)) |then_entry| blk: {
+                    // The instruction's MCValue is overridden in both branches.
+                    parent_branch.inst_table.putAssumeCapacity(else_entry.key, then_entry.value);
+                    if (else_entry.value == .dead) {
+                        assert(then_entry.value == .dead);
+                        continue;
+                    }
+                    break :blk then_entry.value;
+                } else blk: {
+                    if (else_entry.value == .dead)
+                        continue;
+                    // The instruction is only overridden in the else branch.
+                    var i: usize = self.branch_stack.items.len - 2;
+                    while (true) {
+                        i -= 1;
+                        if (self.branch_stack.items[i].inst_table.get(else_entry.key)) |mcv| {
+                            assert(mcv != .dead);
+                            break :blk mcv;
+                        }
+                    }
+                };
+                log.debug("consolidating else_entry {*} {}=>{}", .{else_entry.key, else_entry.value, canon_mcv});
+                // TODO make sure the destination stack offset / register does not already have something
+                // going on there.
+                try self.setRegOrMem(inst.base.src, else_entry.key.ty, canon_mcv, else_entry.value);
+                // TODO track the new register / stack allocation
+            }
+            try parent_branch.inst_table.ensureCapacity(self.gpa, parent_branch.inst_table.items().len +
+                saved_then_branch.inst_table.items().len);
+            for (saved_then_branch.inst_table.items()) |then_entry| {
+                // We already deleted the items from this table that matched the else_branch.
+                // So these are all instructions that are only overridden in the then branch.
+                parent_branch.inst_table.putAssumeCapacity(then_entry.key, then_entry.value);
+                if (then_entry.value == .dead)
+                    continue;
+                const parent_mcv = blk: {
+                    var i: usize = self.branch_stack.items.len - 2;
+                    while (true) {
+                        i -= 1;
+                        if (self.branch_stack.items[i].inst_table.get(then_entry.key)) |mcv| {
+                            assert(mcv != .dead);
+                            break :blk mcv;
+                        }
+                    }
+                };
+                log.debug("consolidating then_entry {*} {}=>{}", .{then_entry.key, parent_mcv, then_entry.value});
+                // TODO make sure the destination stack offset / register does not already have something
+                // going on there.
+                try self.setRegOrMem(inst.base.src, then_entry.key.ty, parent_mcv, then_entry.value);
+                // TODO track the new register / stack allocation
+            }
+
+            self.branch_stack.pop().deinit(self.gpa);
+
             return MCValue.unreach;
         }
 
@@ -1671,11 +1779,12 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
             switch (reloc) {
                 .rel32 => |pos| {
                     const amt = self.code.items.len - (pos + 4);
-                    // If it wouldn't jump at all, elide it.
-                    if (amt == 0) {
-                        self.code.items.len -= 5;
-                        return;
-                    }
+                    // Here it would be tempting to implement testing for amt == 0 and then elide the
+                    // jump. However, that will cause a problem because other jumps may assume that they
+                    // can jump to this code. Or maybe I didn't understand something when I was debugging.
+                    // It could be worth another look. Anyway, that's why that isn't done here. Probably the
+                    // best place to elide jumps will be in semantic analysis, by inlining blocks that only
+                    // only have 1 break instruction.
                     const s32_amt = math.cast(i32, amt) catch
                         return self.fail(src, "unable to perform relocation: jump too far", .{});
                     mem.writeIntLittle(i32, self.code.items[pos..][0..4], s32_amt);
@@ -2280,8 +2389,12 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
         }
 
         fn resolveInst(self: *Self, inst: *ir.Inst) !MCValue {
+            // If the type has no codegen bits, no need to store it.
+            if (!inst.ty.hasCodeGenBits())
+                return MCValue.none;
+
             // Constants have static lifetimes, so they are always memoized in the outer most table.
-            if (inst.cast(ir.Inst.Constant)) |const_inst| {
+            if (inst.castTag(.constant)) |const_inst| {
                 const branch = &self.branch_stack.items[0];
                 const gop = try branch.inst_table.getOrPut(self.gpa, inst);
                 if (!gop.found_existing) {
@@ -2290,6 +2403,10 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
                 return gop.entry.value;
             }
 
+            return self.getResolvedInstValue(inst);
+        }
+
+        fn getResolvedInstValue(self: *Self, inst: *ir.Inst) MCValue {
             // Treat each stack item as a "layer" on top of the previous one.
             var i: usize = self.branch_stack.items.len;
             while (true) {
diff --git a/src-self-hosted/link/Elf.zig b/src-self-hosted/link/Elf.zig
index 1d18344cbb..b67de55dd5 100644
--- a/src-self-hosted/link/Elf.zig
+++ b/src-self-hosted/link/Elf.zig
@@ -1640,9 +1640,12 @@ pub fn updateDecl(self: *Elf, module: *Module, decl: *Module.Decl) !void {
         else => false,
     };
     if (is_fn) {
-        //if (mem.eql(u8, mem.spanZ(decl.name), "add")) {
-        //    typed_value.val.cast(Value.Payload.Function).?.func.dump(module.*);
-        //}
+        {
+            //if (mem.eql(u8, mem.spanZ(decl.name), "add")) {
+            //}
+            std.debug.print("\n{}\n", .{decl.name});
+            typed_value.val.cast(Value.Payload.Function).?.func.dump(module.*);
+        }
 
         // For functions we need to add a prologue to the debug line program.
         try dbg_line_buffer.ensureCapacity(26);
diff --git a/src-self-hosted/type.zig b/src-self-hosted/type.zig
index eb8fa2acd7..82079aa9f7 100644
--- a/src-self-hosted/type.zig
+++ b/src-self-hosted/type.zig
@@ -771,8 +771,8 @@ pub const Type = extern union {
             .array => self.elemType().hasCodeGenBits() and self.arrayLen() != 0,
             .array_u8 => self.arrayLen() != 0,
             .array_sentinel, .single_const_pointer, .single_mut_pointer, .many_const_pointer, .many_mut_pointer, .c_const_pointer, .c_mut_pointer, .const_slice, .mut_slice, .pointer => self.elemType().hasCodeGenBits(),
-            .int_signed => self.cast(Payload.IntSigned).?.bits == 0,
-            .int_unsigned => self.cast(Payload.IntUnsigned).?.bits == 0,
+            .int_signed => self.cast(Payload.IntSigned).?.bits != 0,
+            .int_unsigned => self.cast(Payload.IntUnsigned).?.bits != 0,
 
             .error_union => {
                 const payload = self.cast(Payload.ErrorUnion).?;

From 237d9a105d5eec82aefc63da8e844c47d8990eea Mon Sep 17 00:00:00 2001
From: Andrew Kelley <andrew@ziglang.org>
Date: Tue, 25 Aug 2020 21:31:03 -0700
Subject: [PATCH 3/4] stage2: support debug dumping zir as a build option

So that it's not needed to manually comment and uncomment the debug
code.
---
 build.zig                    |  2 ++
 src-self-hosted/link/Elf.zig | 14 +++++++++-----
 2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/build.zig b/build.zig
index 9c50025375..869fb3f208 100644
--- a/build.zig
+++ b/build.zig
@@ -83,6 +83,7 @@ pub fn build(b: *Builder) !void {
         }
 
         const log_scopes = b.option([]const []const u8, "log", "Which log scopes to enable") orelse &[0][]const u8{};
+        const zir_dumps = b.option([]const []const u8, "dump-zir", "Which functions to dump ZIR for before codegen") orelse &[0][]const u8{};
 
         const opt_version_string = b.option([]const u8, "version-string", "Override Zig version string. Default is to find out with git.");
         const version = if (opt_version_string) |version| version else v: {
@@ -103,6 +104,7 @@ pub fn build(b: *Builder) !void {
         exe.addBuildOption([]const u8, "version", version);
 
         exe.addBuildOption([]const []const u8, "log_scopes", log_scopes);
+        exe.addBuildOption([]const []const u8, "zir_dumps", zir_dumps);
         exe.addBuildOption(bool, "enable_tracy", tracy != null);
         if (tracy) |tracy_path| {
             const client_cpp = fs.path.join(
diff --git a/src-self-hosted/link/Elf.zig b/src-self-hosted/link/Elf.zig
index b67de55dd5..f2a3218118 100644
--- a/src-self-hosted/link/Elf.zig
+++ b/src-self-hosted/link/Elf.zig
@@ -17,6 +17,7 @@ const Type = @import("../type.zig").Type;
 const link = @import("../link.zig");
 const File = link.File;
 const Elf = @This();
+const build_options = @import("build_options");
 
 const default_entry_addr = 0x8000000;
 
@@ -1640,11 +1641,14 @@ pub fn updateDecl(self: *Elf, module: *Module, decl: *Module.Decl) !void {
         else => false,
     };
     if (is_fn) {
-        {
-            //if (mem.eql(u8, mem.spanZ(decl.name), "add")) {
-            //}
-            std.debug.print("\n{}\n", .{decl.name});
-            typed_value.val.cast(Value.Payload.Function).?.func.dump(module.*);
+        const zir_dumps = if (std.builtin.is_test) &[0][]const u8{} else build_options.zir_dumps;
+        if (zir_dumps.len != 0) {
+            for (zir_dumps) |fn_name| {
+                if (mem.eql(u8, mem.spanZ(decl.name), fn_name)) {
+                    std.debug.print("\n{}\n", .{decl.name});
+                    typed_value.val.cast(Value.Payload.Function).?.func.dump(module.*);
+                }
+            }
         }
 
         // For functions we need to add a prologue to the debug line program.

From 0c5faa61aebca4215683d233dd52bf3a7a5d1db6 Mon Sep 17 00:00:00 2001
From: Andrew Kelley <andrew@ziglang.org>
Date: Wed, 26 Aug 2020 01:00:04 -0700
Subject: [PATCH 4/4] stage2: codegen: fix reuseOperand not doing death
 bookkeeping

---
 src-self-hosted/codegen.zig |  7 ++++-
 src-self-hosted/zir.zig     | 12 ++++++-
 test/stage2/test.zig        | 62 +++++++++++++++++++++++++++++++++++++
 3 files changed, 79 insertions(+), 2 deletions(-)

diff --git a/src-self-hosted/codegen.zig b/src-self-hosted/codegen.zig
index 298873e618..b282c2011b 100644
--- a/src-self-hosted/codegen.zig
+++ b/src-self-hosted/codegen.zig
@@ -632,6 +632,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
         /// Asserts there is already capacity to insert into top branch inst_table.
         fn processDeath(self: *Self, inst: *ir.Inst) void {
             if (inst.tag == .constant) return; // Constants are immortal.
+            // When editing this function, note that the logic must synchronize with `reuseOperand`.
             const prev_value = self.getResolvedInstValue(inst);
             const branch = &self.branch_stack.items[self.branch_stack.items.len - 1];
             branch.inst_table.putAssumeCapacity(inst, .dead);
@@ -951,6 +952,10 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
             // Prevent the operand deaths processing code from deallocating it.
             inst.clearOperandDeath(op_index);
 
+            // That makes us responsible for doing the rest of the stuff that processDeath would have done.
+            const branch = &self.branch_stack.items[self.branch_stack.items.len - 1];
+            branch.inst_table.putAssumeCapacity(inst.getOperand(op_index).?, .dead);
+
             return true;
         }
 
@@ -1666,7 +1671,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
                     // The instruction is only overridden in the else branch.
                     var i: usize = self.branch_stack.items.len - 2;
                     while (true) {
-                        i -= 1;
+                        i -= 1; // If this overflows, the question is: why wasn't the instruction marked dead?
                         if (self.branch_stack.items[i].inst_table.get(else_entry.key)) |mcv| {
                             assert(mcv != .dead);
                             break :blk mcv;
diff --git a/src-self-hosted/zir.zig b/src-self-hosted/zir.zig
index 3557c88f4e..c552f28553 100644
--- a/src-self-hosted/zir.zig
+++ b/src-self-hosted/zir.zig
@@ -954,6 +954,7 @@ pub const Module = struct {
 
     pub const MetaData = struct {
         deaths: ir.Inst.DeathsInt,
+        addr: usize,
     };
 
     pub const BodyMetaData = struct {
@@ -1152,6 +1153,12 @@ const Writer = struct {
                     try self.writeInstToStream(stream, inst);
                     if (self.module.metadata.get(inst)) |metadata| {
                         try stream.print(" ; deaths=0b{b}", .{metadata.deaths});
+                        // This is conditionally compiled in because addresses mess up the tests due
+                        // to Address Space Layout Randomization. It's super useful when debugging
+                        // codegen.zig though.
+                        if (!std.builtin.is_test) {
+                            try stream.print(" 0x{x}", .{metadata.addr});
+                        }
                     }
                     self.indent -= 2;
                     try stream.writeByte('\n');
@@ -2417,7 +2424,10 @@ const EmitZIR = struct {
 
                 .varptr => @panic("TODO"),
             };
-            try self.metadata.put(new_inst, .{ .deaths = inst.deaths });
+            try self.metadata.put(new_inst, .{
+                .deaths = inst.deaths,
+                .addr = @ptrToInt(inst),
+            });
             try instructions.append(new_inst);
             try inst_table.put(inst, new_inst);
         }
diff --git a/test/stage2/test.zig b/test/stage2/test.zig
index fee8886ddb..1a0d801ac3 100644
--- a/test/stage2/test.zig
+++ b/test/stage2/test.zig
@@ -694,6 +694,68 @@ pub fn addCases(ctx: *TestContext) !void {
             "",
         );
 
+        // Reusing the registers of dead operands playing nicely with conditional branching.
+        case.addCompareOutput(
+            \\export fn _start() noreturn {
+            \\    assert(add(3, 4) == 791);
+            \\    assert(add(4, 3) == 79);
+            \\
+            \\    exit();
+            \\}
+            \\
+            \\fn add(a: u32, b: u32) u32 {
+            \\    const x: u32 = if (a < b) blk: {
+            \\        const c = a + b; // 7
+            \\        const d = a + c; // 10
+            \\        const e = d + b; // 14
+            \\        const f = d + e; // 24
+            \\        const g = e + f; // 38
+            \\        const h = f + g; // 62
+            \\        const i = g + h; // 100
+            \\        const j = i + d; // 110
+            \\        const k = i + j; // 210
+            \\        const l = k + c; // 217
+            \\        const m = l + d; // 227
+            \\        const n = m + e; // 241
+            \\        const o = n + f; // 265
+            \\        const p = o + g; // 303
+            \\        const q = p + h; // 365
+            \\        const r = q + i; // 465
+            \\        const s = r + j; // 575
+            \\        const t = s + k; // 785
+            \\        break :blk t;
+            \\    } else blk: {
+            \\        const t = b + b + a; // 10
+            \\        const c = a + t; // 14
+            \\        const d = c + t; // 24
+            \\        const e = d + t; // 34
+            \\        const f = e + t; // 44
+            \\        const g = f + t; // 54
+            \\        const h = c + g; // 68
+            \\        break :blk h + b; // 71
+            \\    };
+            \\    const y = x + a; // 788, 75
+            \\    const z = y + a; // 791, 79
+            \\    return z;
+            \\}
+            \\
+            \\pub fn assert(ok: bool) void {
+            \\    if (!ok) unreachable; // assertion failure
+            \\}
+            \\
+            \\fn exit() noreturn {
+            \\    asm volatile ("syscall"
+            \\        :
+            \\        : [number] "{rax}" (231),
+            \\          [arg1] "{rdi}" (0)
+            \\        : "rcx", "r11", "memory"
+            \\    );
+            \\    unreachable;
+            \\}
+        ,
+            "",
+        );
+
         // Character literals and multiline strings.
         case.addCompareOutput(
             \\export fn _start() noreturn {