Merge branch 'register-allocation'

2026-02-13 04:48:20 +00:00 · 2020-07-08 21:03:28 -07:00 · 2020-07-08 21:03:28 -07:00 · a489ea0b2f
commit a489ea0b2f
parent 0e1c7209e8 bf56cdd9ed
14 changed files with 1400 additions and 443 deletions
--- a/lib/std/array_list.zig
+++ b/lib/std/array_list.zig
@ -257,6 +257,24 @@ pub fn ArrayListAligned(comptime T: type, comptime alignment: ?u29) type {
            return &self.items[self.items.len - 1];
        }

+        /// Resize the array, adding `n` new elements, which have `undefined` values.
+        /// The return value is an array pointing to the newly allocated elements.
+        pub fn addManyAsArray(self: *Self, comptime n: usize) !*[n]T {
+            const prev_len = self.items.len;
+            try self.resize(self.items.len + n);
+            return self.items[prev_len..][0..n];
+        }
+
+        /// Resize the array, adding `n` new elements, which have `undefined` values.
+        /// The return value is an array pointing to the newly allocated elements.
+        /// Asserts that there is already space for the new item without allocating more.
+        pub fn addManyAsArrayAssumeCapacity(self: *Self, comptime n: usize) *[n]T {
+            assert(self.items.len + n <= self.capacity);
+            const prev_len = self.items.len;
+            self.items.len += n;
+            return self.items[prev_len..][0..n];
+        }
+
        /// Remove and return the last element from the list.
        /// Asserts the list has at least one item.
        pub fn pop(self: *Self) T {
@ -488,6 +506,24 @@ pub fn ArrayListAlignedUnmanaged(comptime T: type, comptime alignment: ?u29) typ
            return &self.items[self.items.len - 1];
        }

+        /// Resize the array, adding `n` new elements, which have `undefined` values.
+        /// The return value is an array pointing to the newly allocated elements.
+        pub fn addManyAsArray(self: *Self, allocator: *Allocator, comptime n: usize) !*[n]T {
+            const prev_len = self.items.len;
+            try self.resize(allocator, self.items.len + n);
+            return self.items[prev_len..][0..n];
+        }
+
+        /// Resize the array, adding `n` new elements, which have `undefined` values.
+        /// The return value is an array pointing to the newly allocated elements.
+        /// Asserts that there is already space for the new item without allocating more.
+        pub fn addManyAsArrayAssumeCapacity(self: *Self, comptime n: usize) *[n]T {
+            assert(self.items.len + n <= self.capacity);
+            const prev_len = self.items.len;
+            self.items.len += n;
+            return self.items[prev_len..][0..n];
+        }
+
        /// Remove and return the last element from the list.
        /// Asserts the list has at least one item.
        /// This operation does not invalidate any element pointers.
@ -727,3 +763,27 @@ test "std.ArrayList.writer" {
    try writer.writeAll("efg");
    testing.expectEqualSlices(u8, list.items, "abcdefg");
 }
+
+test "addManyAsArray" {
+    const a = std.testing.allocator;
+    {
+        var list = ArrayList(u8).init(a);
+        defer list.deinit();
+
+        (try list.addManyAsArray(4)).* = "aoeu".*;
+        try list.ensureCapacity(8);
+        list.addManyAsArrayAssumeCapacity(4).* = "asdf".*;
+
+        testing.expectEqualSlices(u8, list.items, "aoeuasdf");
+    }
+    {
+        var list = ArrayListUnmanaged(u8){};
+        defer list.deinit(a);
+
+        (try list.addManyAsArray(a, 4)).* = "aoeu".*;
+        try list.ensureCapacity(a, 8);
+        list.addManyAsArrayAssumeCapacity(4).* = "asdf".*;
+
+        testing.expectEqualSlices(u8, list.items, "aoeuasdf");
+    }
+}
--- a/lib/std/hash_map.zig
+++ b/lib/std/hash_map.zig
@ -15,6 +15,10 @@ pub fn AutoHashMap(comptime K: type, comptime V: type) type {
    return HashMap(K, V, getAutoHashFn(K), getAutoEqlFn(K), autoEqlIsCheap(K));
 }

+pub fn AutoHashMapUnmanaged(comptime K: type, comptime V: type) type {
+    return HashMapUnmanaged(K, V, getAutoHashFn(K), getAutoEqlFn(K), autoEqlIsCheap(K));
+}
+
 /// Builtin hashmap for strings as keys.
 pub fn StringHashMap(comptime V: type) type {
    return HashMap([]const u8, V, hashString, eqlString, true);
--- a/lib/std/math.zig
+++ b/lib/std/math.zig
@ -1047,19 +1047,14 @@ pub fn order(a: var, b: var) Order {
 pub const CompareOperator = enum {
    /// Less than (`<`)
    lt,
-
    /// Less than or equal (`<=`)
    lte,
-
    /// Equal (`==`)
    eq,
-
    /// Greater than or equal (`>=`)
    gte,
-
    /// Greater than (`>`)
    gt,
-
    /// Not equal (`!=`)
    neq,
 };
--- a/lib/std/special/test_runner.zig
+++ b/lib/std/special/test_runner.zig
@ -21,6 +21,7 @@ pub fn main() anyerror!void {

    for (test_fn_list) |test_fn, i| {
        std.testing.base_allocator_instance.reset();
+        std.testing.log_level = .warn;

        var test_node = root_node.start(test_fn.name, null);
        test_node.activate();
@ -73,3 +74,14 @@ pub fn main() anyerror!void {
        std.debug.warn("{} passed; {} skipped.\n", .{ ok_count, skip_count });
    }
 }
+
+pub fn log(
+    comptime message_level: std.log.Level,
+    comptime scope: @Type(.EnumLiteral),
+    comptime format: []const u8,
+    args: var,
+) void {
+    if (@enumToInt(message_level) <= @enumToInt(std.testing.log_level)) {
+        std.debug.print("[{}] ({}): " ++ format, .{@tagName(scope), @tagName(message_level)} ++ args);
+    }
+}
--- a/lib/std/std.zig
+++ b/lib/std/std.zig
@ -3,14 +3,16 @@ pub const ArrayListAligned = @import("array_list.zig").ArrayListAligned;
 pub const ArrayListAlignedUnmanaged = @import("array_list.zig").ArrayListAlignedUnmanaged;
 pub const ArrayListSentineled = @import("array_list_sentineled.zig").ArrayListSentineled;
 pub const ArrayListUnmanaged = @import("array_list.zig").ArrayListUnmanaged;
-pub const AutoHashMap = @import("hash_map.zig").AutoHashMap;
+pub const AutoHashMap = hash_map.AutoHashMap;
+pub const AutoHashMapUnmanaged = hash_map.AutoHashMapUnmanaged;
 pub const BloomFilter = @import("bloom_filter.zig").BloomFilter;
 pub const BufMap = @import("buf_map.zig").BufMap;
 pub const BufSet = @import("buf_set.zig").BufSet;
 pub const ChildProcess = @import("child_process.zig").ChildProcess;
 pub const ComptimeStringMap = @import("comptime_string_map.zig").ComptimeStringMap;
 pub const DynLib = @import("dynamic_library.zig").DynLib;
-pub const HashMap = @import("hash_map.zig").HashMap;
+pub const HashMap = hash_map.HashMap;
+pub const HashMapUnmanaged = hash_map.HashMapUnmanaged;
 pub const Mutex = @import("mutex.zig").Mutex;
 pub const PackedIntArray = @import("packed_int_array.zig").PackedIntArray;
 pub const PackedIntArrayEndian = @import("packed_int_array.zig").PackedIntArrayEndian;
@ -22,7 +24,7 @@ pub const ResetEvent = @import("reset_event.zig").ResetEvent;
 pub const SegmentedList = @import("segmented_list.zig").SegmentedList;
 pub const SinglyLinkedList = @import("linked_list.zig").SinglyLinkedList;
 pub const SpinLock = @import("spinlock.zig").SpinLock;
-pub const StringHashMap = @import("hash_map.zig").StringHashMap;
+pub const StringHashMap = hash_map.StringHashMap;
 pub const TailQueue = @import("linked_list.zig").TailQueue;
 pub const Target = @import("target.zig").Target;
 pub const Thread = @import("thread.zig").Thread;
--- a/lib/std/testing.zig
+++ b/lib/std/testing.zig
@ -14,6 +14,9 @@ pub var failing_allocator_instance = FailingAllocator.init(&base_allocator_insta
 pub var base_allocator_instance = std.mem.validationWrap(std.heap.ThreadSafeFixedBufferAllocator.init(allocator_mem[0..]));
 var allocator_mem: [2 * 1024 * 1024]u8 = undefined;

+/// TODO https://github.com/ziglang/zig/issues/5738
+pub var log_level = std.log.Level.warn;
+
 /// This function is intended to be used only in tests. It prints diagnostics to stderr
 /// and then aborts when actual_error_union is not expected_error.
 pub fn expectError(expected_error: anyerror, actual_error_union: var) void {
--- a/lib/std/zig/ast.zig
+++ b/lib/std/zig/ast.zig
@ -959,6 +959,8 @@ pub const Node = struct {
    };

    /// The params are directly after the FnProto in memory.
+    /// TODO have a flags field for the optional nodes, and have them appended
+    /// before or after the parameters in memory.
    pub const FnProto = struct {
        base: Node = Node{ .id = .FnProto },
        doc_comments: ?*DocComment,
--- a/src-self-hosted/Module.zig
+++ b/src-self-hosted/Module.zig
--- a/src-self-hosted/codegen.zig
+++ b/src-self-hosted/codegen.zig
@ -12,6 +12,18 @@ const Target = std.Target;
 const Allocator = mem.Allocator;
 const trace = @import("tracy.zig").trace;

+/// The codegen-related data that is stored in `ir.Inst.Block` instructions.
+pub const BlockData = struct {
+    relocs: std.ArrayListUnmanaged(Reloc) = .{},
+};
+
+pub const Reloc = union(enum) {
+    /// The value is an offset into the `Function` `code` from the beginning.
+    /// To perform the reloc, write 32-bit signed little-endian integer
+    /// which is a relative jump, based on the address following the reloc.
+    rel32: usize,
+};
+
 pub const Result = union(enum) {
    /// The `code` parameter passed to `generateSymbol` has the value appended.
    appended: void,
@ -46,7 +58,14 @@ pub fn generateSymbol(
            var mc_args = try std.ArrayList(Function.MCValue).initCapacity(bin_file.allocator, param_types.len);
            defer mc_args.deinit();

-            var next_stack_offset: u64 = 0;
+            var branch_stack = std.ArrayList(Function.Branch).init(bin_file.allocator);
+            defer {
+                assert(branch_stack.items.len == 1);
+                branch_stack.items[0].deinit(bin_file.allocator);
+                branch_stack.deinit();
+            }
+            const branch = try branch_stack.addOne();
+            branch.* = .{};

            switch (fn_type.fnCallingConvention()) {
                .Naked => assert(mc_args.items.len == 0),
@ -61,8 +80,8 @@ pub fn generateSymbol(
                                switch (param_type.zigTypeTag()) {
                                    .Bool, .Int => {
                                        if (next_int_reg >= integer_registers.len) {
-                                            try mc_args.append(.{ .stack_offset = next_stack_offset });
-                                            next_stack_offset += param_type.abiSize(bin_file.options.target);
+                                            try mc_args.append(.{ .stack_offset = branch.next_stack_offset });
+                                            branch.next_stack_offset += @intCast(u32, param_type.abiSize(bin_file.options.target));
                                        } else {
                                            try mc_args.append(.{ .register = @enumToInt(integer_registers[next_int_reg]) });
                                            next_int_reg += 1;
@ -100,16 +119,17 @@ pub fn generateSymbol(
            }

            var function = Function{
+                .gpa = bin_file.allocator,
                .target = &bin_file.options.target,
                .bin_file = bin_file,
                .mod_fn = module_fn,
                .code = code,
-                .inst_table = std.AutoHashMap(*ir.Inst, Function.MCValue).init(bin_file.allocator),
                .err_msg = null,
                .args = mc_args.items,
+                .branch_stack = &branch_stack,
            };
-            defer function.inst_table.deinit();

+            branch.max_end_stack = branch.next_stack_offset;
            function.gen() catch |err| switch (err) {
                error.CodegenFail => return Result{ .fail = function.err_msg.? },
                else => |e| return e,
@ -210,18 +230,67 @@ pub fn generateSymbol(
    }
 }

+const InnerError = error {
+    OutOfMemory,
+    CodegenFail,
+};
+
 const Function = struct {
+    gpa: *Allocator,
    bin_file: *link.File.Elf,
    target: *const std.Target,
    mod_fn: *const Module.Fn,
    code: *std.ArrayList(u8),
-    inst_table: std.AutoHashMap(*ir.Inst, MCValue),
    err_msg: ?*ErrorMsg,
    args: []MCValue,

+    /// Whenever there is a runtime branch, we push a Branch onto this stack,
+    /// and pop it off when the runtime branch joins. This provides an "overlay"
+    /// of the table of mappings from instructions to `MCValue` from within the branch.
+    /// This way we can modify the `MCValue` for an instruction in different ways
+    /// within different branches. Special consideration is needed when a branch
+    /// joins with its parent, to make sure all instructions have the same MCValue
+    /// across each runtime branch upon joining.
+    branch_stack: *std.ArrayList(Branch),
+
+    const Branch = struct {
+        inst_table: std.AutoHashMapUnmanaged(*ir.Inst, MCValue) = .{},
+
+        /// The key is an enum value of an arch-specific register.
+        registers: std.AutoHashMapUnmanaged(usize, RegisterAllocation) = .{},
+
+        /// Maps offset to what is stored there.
+        stack: std.AutoHashMapUnmanaged(usize, StackAllocation) = .{},
+        /// Offset from the stack base, representing the end of the stack frame.
+        max_end_stack: u32 = 0,
+        /// Represents the current end stack offset. If there is no existing slot
+        /// to place a new stack allocation, it goes here, and then bumps `max_end_stack`.
+        next_stack_offset: u32 = 0,
+
+        fn deinit(self: *Branch, gpa: *Allocator) void {
+            self.inst_table.deinit(gpa);
+            self.registers.deinit(gpa);
+            self.stack.deinit(gpa);
+            self.* = undefined;
+        }
+    };
+
+    const RegisterAllocation = struct {
+        inst: *ir.Inst,
+    };
+
+    const StackAllocation = struct {
+        inst: *ir.Inst,
+        size: u32,
+    };
+
    const MCValue = union(enum) {
+        /// No runtime bits. `void` types, empty structs, u0, enums with 1 tag, etc.
        none,
+        /// Control flow will not allow this value to be observed.
        unreach,
+        /// No more references to this value remain.
+        dead,
        /// A pointer-sized integer that fits in a register.
        immediate: u64,
        /// The constant was emitted into the code, at this offset.
@ -233,6 +302,45 @@ const Function = struct {
        memory: u64,
        /// The value is one of the stack variables.
        stack_offset: u64,
+        /// The value is in the compare flags assuming an unsigned operation,
+        /// with this operator applied on top of it.
+        compare_flags_unsigned: std.math.CompareOperator,
+        /// The value is in the compare flags assuming a signed operation,
+        /// with this operator applied on top of it.
+        compare_flags_signed: std.math.CompareOperator,
+
+        fn isMemory(mcv: MCValue) bool {
+            return switch (mcv) {
+                .embedded_in_code, .memory, .stack_offset => true,
+                else => false,
+            };
+        }
+
+        fn isImmediate(mcv: MCValue) bool {
+            return switch (mcv) {
+                .immediate => true,
+                else => false,
+            };
+        }
+
+        fn isMutable(mcv: MCValue) bool {
+            return switch (mcv) {
+                .none => unreachable,
+                .unreach => unreachable,
+                .dead => unreachable,
+
+                .immediate,
+                .embedded_in_code,
+                .memory,
+                .compare_flags_unsigned,
+                .compare_flags_signed,
+                => false,
+
+                .register,
+                .stack_offset,
+                => true,
+            };
+        }
    };

    fn gen(self: *Function) !void {
@ -292,9 +400,14 @@ const Function = struct {
    }

    fn genArch(self: *Function, comptime arch: std.Target.Cpu.Arch) !void {
-        for (self.mod_fn.analysis.success.instructions) |inst| {
+        return self.genBody(self.mod_fn.analysis.success, arch);
+    }
+
+    fn genBody(self: *Function, body: ir.Body, comptime arch: std.Target.Cpu.Arch) InnerError!void {
+        const inst_table = &self.branch_stack.items[0].inst_table;
+        for (body.instructions) |inst| {
            const new_inst = try self.genFuncInst(inst, arch);
-            try self.inst_table.putNoClobber(inst, new_inst);
+            try inst_table.putNoClobber(self.gpa, inst, new_inst);
        }
    }

@ -302,42 +415,169 @@ const Function = struct {
        switch (inst.tag) {
            .add => return self.genAdd(inst.cast(ir.Inst.Add).?, arch),
            .arg => return self.genArg(inst.cast(ir.Inst.Arg).?),
-            .block => return self.genBlock(inst.cast(ir.Inst.Block).?, arch),
-            .breakpoint => return self.genBreakpoint(inst.src, arch),
-            .call => return self.genCall(inst.cast(ir.Inst.Call).?, arch),
-            .unreach => return MCValue{ .unreach = {} },
-            .constant => unreachable, // excluded from function bodies
            .assembly => return self.genAsm(inst.cast(ir.Inst.Assembly).?, arch),
-            .ptrtoint => return self.genPtrToInt(inst.cast(ir.Inst.PtrToInt).?),
            .bitcast => return self.genBitCast(inst.cast(ir.Inst.BitCast).?),
-            .ret => return self.genRet(inst.cast(ir.Inst.Ret).?, arch),
-            .retvoid => return self.genRetVoid(inst.cast(ir.Inst.RetVoid).?, arch),
+            .block => return self.genBlock(inst.cast(ir.Inst.Block).?, arch),
+            .br => return self.genBr(inst.cast(ir.Inst.Br).?, arch),
+            .breakpoint => return self.genBreakpoint(inst.src, arch),
+            .brvoid => return self.genBrVoid(inst.cast(ir.Inst.BrVoid).?, arch),
+            .call => return self.genCall(inst.cast(ir.Inst.Call).?, arch),
            .cmp => return self.genCmp(inst.cast(ir.Inst.Cmp).?, arch),
            .condbr => return self.genCondBr(inst.cast(ir.Inst.CondBr).?, arch),
-            .isnull => return self.genIsNull(inst.cast(ir.Inst.IsNull).?, arch),
+            .constant => unreachable, // excluded from function bodies
            .isnonnull => return self.genIsNonNull(inst.cast(ir.Inst.IsNonNull).?, arch),
+            .isnull => return self.genIsNull(inst.cast(ir.Inst.IsNull).?, arch),
+            .ptrtoint => return self.genPtrToInt(inst.cast(ir.Inst.PtrToInt).?),
+            .ret => return self.genRet(inst.cast(ir.Inst.Ret).?, arch),
+            .retvoid => return self.genRetVoid(inst.cast(ir.Inst.RetVoid).?, arch),
+            .sub => return self.genSub(inst.cast(ir.Inst.Sub).?, arch),
+            .unreach => return MCValue{ .unreach = {} },
        }
    }

    fn genAdd(self: *Function, inst: *ir.Inst.Add, comptime arch: std.Target.Cpu.Arch) !MCValue {
-        const lhs = try self.resolveInst(inst.args.lhs);
-        const rhs = try self.resolveInst(inst.args.rhs);
+        // No side effects, so if it's unreferenced, do nothing.
+        if (inst.base.isUnused())
+            return MCValue.dead;
        switch (arch) {
-            .i386, .x86_64 => {
-                // const lhs_reg = try self.instAsReg(lhs);
-                // const rhs_reg = try self.instAsReg(rhs);
-                // const result = try self.allocateReg();
-
-                // try self.code.append(??);
-
-                // lhs_reg.release();
-                // rhs_reg.release();
-                return self.fail(inst.base.src, "TODO implement register allocation", .{});
+            .x86_64 => {
+                return try self.genX8664BinMath(&inst.base, inst.args.lhs, inst.args.rhs, 0, 0x00);
            },
            else => return self.fail(inst.base.src, "TODO implement add for {}", .{self.target.cpu.arch}),
        }
    }

+    fn genSub(self: *Function, inst: *ir.Inst.Sub, comptime arch: std.Target.Cpu.Arch) !MCValue {
+        // No side effects, so if it's unreferenced, do nothing.
+        if (inst.base.isUnused())
+            return MCValue.dead;
+        switch (arch) {
+            .x86_64 => {
+                return try self.genX8664BinMath(&inst.base, inst.args.lhs, inst.args.rhs, 5, 0x28);
+            },
+            else => return self.fail(inst.base.src, "TODO implement sub for {}", .{self.target.cpu.arch}),
+        }
+    }
+
+    /// ADD, SUB
+    fn genX8664BinMath(self: *Function, inst: *ir.Inst, op_lhs: *ir.Inst, op_rhs: *ir.Inst, opx: u8, mr: u8) !MCValue {
+        try self.code.ensureCapacity(self.code.items.len + 8);
+
+        const lhs = try self.resolveInst(op_lhs);
+        const rhs = try self.resolveInst(op_rhs);
+
+        // There are 2 operands, destination and source.
+        // Either one, but not both, can be a memory operand.
+        // Source operand can be an immediate, 8 bits or 32 bits.
+        // So, if either one of the operands dies with this instruction, we can use it
+        // as the result MCValue.
+        var dst_mcv: MCValue = undefined;
+        var src_mcv: MCValue = undefined;
+        var src_inst: *ir.Inst = undefined;
+        if (inst.operandDies(0) and lhs.isMutable()) {
+            // LHS dies; use it as the destination.
+            // Both operands cannot be memory.
+            src_inst = op_rhs;
+            if (lhs.isMemory() and rhs.isMemory()) {
+                dst_mcv = try self.copyToNewRegister(op_lhs);
+                src_mcv = rhs;
+            } else {
+                dst_mcv = lhs;
+                src_mcv = rhs;
+            }
+        } else if (inst.operandDies(1) and rhs.isMutable()) {
+            // RHS dies; use it as the destination.
+            // Both operands cannot be memory.
+            src_inst = op_lhs;
+            if (lhs.isMemory() and rhs.isMemory()) {
+                dst_mcv = try self.copyToNewRegister(op_rhs);
+                src_mcv = lhs;
+            } else {
+                dst_mcv = rhs;
+                src_mcv = lhs;
+            }
+        } else {
+            if (lhs.isMemory()) {
+                dst_mcv = try self.copyToNewRegister(op_lhs);
+                src_mcv = rhs;
+                src_inst = op_rhs;
+            } else {
+                dst_mcv = try self.copyToNewRegister(op_rhs);
+                src_mcv = lhs;
+                src_inst = op_lhs;
+            }
+        }
+        // This instruction supports only signed 32-bit immediates at most. If the immediate
+        // value is larger than this, we put it in a register.
+        // A potential opportunity for future optimization here would be keeping track
+        // of the fact that the instruction is available both as an immediate
+        // and as a register.
+        switch (src_mcv) {
+            .immediate => |imm| {
+                if (imm > std.math.maxInt(u31)) {
+                    src_mcv = try self.copyToNewRegister(src_inst);
+                }
+            },
+            else => {},
+        }
+
+        try self.genX8664BinMathCode(inst.src, dst_mcv, src_mcv, opx, mr);
+
+        return dst_mcv;
+    }
+
+    fn genX8664BinMathCode(self: *Function, src: usize, dst_mcv: MCValue, src_mcv: MCValue, opx: u8, mr: u8) !void {
+        switch (dst_mcv) {
+            .none => unreachable,
+            .dead, .unreach, .immediate => unreachable,
+            .compare_flags_unsigned => unreachable,
+            .compare_flags_signed => unreachable,
+            .register => |dst_reg_usize| {
+                const dst_reg = @intToEnum(Reg(.x86_64), @intCast(u8, dst_reg_usize));
+                switch (src_mcv) {
+                    .none => unreachable,
+                    .dead, .unreach => unreachable,
+                    .register => |src_reg_usize| {
+                        const src_reg = @intToEnum(Reg(.x86_64), @intCast(u8, src_reg_usize));
+                        self.rex(.{ .b = dst_reg.isExtended(), .r = src_reg.isExtended(), .w = dst_reg.size() == 64 });
+                        self.code.appendSliceAssumeCapacity(&[_]u8{ mr + 0x1, 0xC0 | (@as(u8, src_reg.id() & 0b111) << 3) | @as(u8, dst_reg.id() & 0b111) });
+                    },
+                    .immediate => |imm| {
+                        const imm32 = @intCast(u31, imm); // This case must be handled before calling genX8664BinMathCode.
+                        // 81 /opx id
+                        if (imm32 <= std.math.maxInt(u7)) {
+                            self.rex(.{ .b = dst_reg.isExtended(), .w = dst_reg.size() == 64 });
+                            self.code.appendSliceAssumeCapacity(&[_]u8{
+                                0x83,
+                                0xC0 | (opx << 3) | @truncate(u3, dst_reg.id()),
+                                @intCast(u8, imm32),
+                            });
+                        } else {
+                            self.rex(.{ .r = dst_reg.isExtended(), .w = dst_reg.size() == 64 });
+                            self.code.appendSliceAssumeCapacity(&[_]u8{
+                                0x81,
+                                0xC0 | (opx << 3) | @truncate(u3, dst_reg.id()),
+                            });
+                            std.mem.writeIntLittle(u32, self.code.addManyAsArrayAssumeCapacity(4), imm32);
+                        }
+                    },
+                    .embedded_in_code, .memory, .stack_offset => {
+                        return self.fail(src, "TODO implement x86 ADD/SUB/CMP source memory", .{});
+                    },
+                    .compare_flags_unsigned => {
+                        return self.fail(src, "TODO implement x86 ADD/SUB/CMP source compare flag (unsigned)", .{});
+                    },
+                    .compare_flags_signed => {
+                        return self.fail(src, "TODO implement x86 ADD/SUB/CMP source compare flag (signed)", .{});
+                    },
+                }
+            },
+            .embedded_in_code, .memory, .stack_offset => {
+                return self.fail(src, "TODO implement x86 ADD/SUB/CMP destination memory", .{});
+            },
+        }
+    }
+
    fn genArg(self: *Function, inst: *ir.Inst.Arg) !MCValue {
        return self.args[inst.args.index];
    }
@ -410,17 +650,86 @@ const Function = struct {
    }

    fn genCmp(self: *Function, inst: *ir.Inst.Cmp, comptime arch: std.Target.Cpu.Arch) !MCValue {
+        // No side effects, so if it's unreferenced, do nothing.
+        if (inst.base.isUnused())
+            return MCValue.dead;
        switch (arch) {
+            .x86_64 => {
+                try self.code.ensureCapacity(self.code.items.len + 8);
+
+                const lhs = try self.resolveInst(inst.args.lhs);
+                const rhs = try self.resolveInst(inst.args.rhs);
+
+                // There are 2 operands, destination and source.
+                // Either one, but not both, can be a memory operand.
+                // Source operand can be an immediate, 8 bits or 32 bits.
+                const dst_mcv = if (lhs.isImmediate() or (lhs.isMemory() and rhs.isMemory()))
+                    try self.copyToNewRegister(inst.args.lhs)
+                else
+                    lhs;
+                // This instruction supports only signed 32-bit immediates at most.
+                const src_mcv = try self.limitImmediateType(inst.args.rhs, i32);
+
+                try self.genX8664BinMathCode(inst.base.src, dst_mcv, src_mcv, 7, 0x38);
+                const info = inst.args.lhs.ty.intInfo(self.target.*);
+                if (info.signed) {
+                    return MCValue{.compare_flags_signed = inst.args.op};
+                } else {
+                    return MCValue{.compare_flags_unsigned = inst.args.op};
+                }
+            },
            else => return self.fail(inst.base.src, "TODO implement cmp for {}", .{self.target.cpu.arch}),
        }
    }

    fn genCondBr(self: *Function, inst: *ir.Inst.CondBr, comptime arch: std.Target.Cpu.Arch) !MCValue {
        switch (arch) {
+            .i386, .x86_64 => {
+                try self.code.ensureCapacity(self.code.items.len + 6);
+
+                const cond = try self.resolveInst(inst.args.condition);
+                switch (cond) {
+                    .compare_flags_signed => |cmp_op| {
+                        // Here we map to the opposite opcode because the jump is to the false branch.
+                        const opcode: u8 = switch (cmp_op) {
+                            .gte => 0x8c,
+                            .gt => 0x8e,
+                            .neq => 0x84,
+                            .lt => 0x8d,
+                            .lte => 0x8f,
+                            .eq => 0x85,
+                        };
+                        return self.genX86CondBr(inst, opcode, arch);
+                    },
+                    .compare_flags_unsigned => |cmp_op| {
+                        // Here we map to the opposite opcode because the jump is to the false branch.
+                        const opcode: u8 = switch (cmp_op) {
+                            .gte => 0x82,
+                            .gt => 0x86,
+                            .neq => 0x84,
+                            .lt => 0x83,
+                            .lte => 0x87,
+                            .eq => 0x85,
+                        };
+                        return self.genX86CondBr(inst, opcode, arch);
+                    },
+                    else => return self.fail(inst.base.src, "TODO implement condbr {} when condition not already in the compare flags", .{self.target.cpu.arch}),
+                }
+            },
            else => return self.fail(inst.base.src, "TODO implement condbr for {}", .{self.target.cpu.arch}),
        }
    }

+    fn genX86CondBr(self: *Function, inst: *ir.Inst.CondBr, opcode: u8, comptime arch: std.Target.Cpu.Arch) !MCValue {
+        self.code.appendSliceAssumeCapacity(&[_]u8{0x0f, opcode});
+        const reloc = Reloc{ .rel32 = self.code.items.len };
+        self.code.items.len += 4;
+        try self.genBody(inst.args.true_body, arch);
+        try self.performReloc(inst.base.src, reloc);
+        try self.genBody(inst.args.false_body, arch);
+        return MCValue.unreach;
+    }
+
    fn genIsNull(self: *Function, inst: *ir.Inst.IsNull, comptime arch: std.Target.Cpu.Arch) !MCValue {
        switch (arch) {
            else => return self.fail(inst.base.src, "TODO implement isnull for {}", .{self.target.cpu.arch}),
@ -435,31 +744,54 @@ const Function = struct {
        }
    }

-    fn genRelativeFwdJump(self: *Function, src: usize, comptime arch: std.Target.Cpu.Arch, amount: u32) !void {
-        switch (arch) {
-            .i386, .x86_64 => {
-                // TODO x86 treats the operands as signed
-                if (amount <= std.math.maxInt(u8)) {
-                    try self.code.resize(self.code.items.len + 2);
-                    self.code.items[self.code.items.len - 2] = 0xeb;
-                    self.code.items[self.code.items.len - 1] = @intCast(u8, amount);
-                } else {
-                    try self.code.resize(self.code.items.len + 5);
-                    self.code.items[self.code.items.len - 5] = 0xe9; // jmp rel32
-                    const imm_ptr = self.code.items[self.code.items.len - 4 ..][0..4];
-                    mem.writeIntLittle(u32, imm_ptr, amount);
-                }
+    fn genBlock(self: *Function, inst: *ir.Inst.Block, comptime arch: std.Target.Cpu.Arch) !MCValue {
+        if (inst.base.ty.hasCodeGenBits()) {
+            return self.fail(inst.base.src, "TODO codegen Block with non-void type", .{});
+        }
+        // A block is nothing but a setup to be able to jump to the end.
+        defer inst.codegen.relocs.deinit(self.gpa);
+        try self.genBody(inst.args.body, arch);
+
+        for (inst.codegen.relocs.items) |reloc| try self.performReloc(inst.base.src, reloc);
+
+        return MCValue.none;
+    }
+
+    fn performReloc(self: *Function, src: usize, reloc: Reloc) !void {
+        switch (reloc) {
+            .rel32 => |pos| {
+                const amt = self.code.items.len - (pos + 4);
+                const s32_amt = std.math.cast(i32, amt) catch
+                    return self.fail(src, "unable to perform relocation: jump too far", .{});
+                mem.writeIntLittle(i32, self.code.items[pos..][0..4], s32_amt);
            },
-            else => return self.fail(src, "TODO implement relative forward jump for {}", .{self.target.cpu.arch}),
        }
    }

-    fn genBlock(self: *Function, inst: *ir.Inst.Block, comptime arch: std.Target.Cpu.Arch) !MCValue {
+    fn genBr(self: *Function, inst: *ir.Inst.Br, comptime arch: std.Target.Cpu.Arch) !MCValue {
        switch (arch) {
-            else => return self.fail(inst.base.src, "TODO implement codegen Block for {}", .{self.target.cpu.arch}),
+            else => return self.fail(inst.base.src, "TODO implement br for {}", .{self.target.cpu.arch}),
        }
    }

+    fn genBrVoid(self: *Function, inst: *ir.Inst.BrVoid, comptime arch: std.Target.Cpu.Arch) !MCValue {
+        // Emit a jump with a relocation. It will be patched up after the block ends.
+        try inst.args.block.codegen.relocs.ensureCapacity(self.gpa, inst.args.block.codegen.relocs.items.len + 1);
+
+        switch (arch) {
+            .i386, .x86_64 => {
+                // TODO optimization opportunity: figure out when we can emit this as a 2 byte instruction
+                // which is available if the jump is 127 bytes or less forward.
+                try self.code.resize(self.code.items.len + 5);
+                self.code.items[self.code.items.len - 5] = 0xe9; // jmp rel32
+                // Leave the jump offset undefined
+                inst.args.block.codegen.relocs.appendAssumeCapacity(.{ .rel32 = self.code.items.len - 4 });
+            },
+            else => return self.fail(inst.base.src, "TODO implement brvoid for {}", .{self.target.cpu.arch}),
+        }
+        return .none;
+    }
+
    fn genAsm(self: *Function, inst: *ir.Inst.Assembly, comptime arch: Target.Cpu.Arch) !MCValue {
        if (arch != .x86_64 and arch != .i386) {
            return self.fail(inst.base.src, "TODO implement inline asm support for more architectures", .{});
@ -502,30 +834,38 @@ const Function = struct {
    /// resulting REX is meaningful, but will remain the same if it is not.
    /// * Deliberately inserting a "meaningless REX" requires explicit usage of
    /// 0x40, and cannot be done via this function.
-    fn REX(self: *Function, arg: struct { B: bool = false, W: bool = false, X: bool = false, R: bool = false }) !void {
+    fn rex(self: *Function, arg: struct { b: bool = false, w: bool = false, x: bool = false, r: bool = false }) void {
        //  From section 2.2.1.2 of the manual, REX is encoded as b0100WRXB.
        var value: u8 = 0x40;
-        if (arg.B) {
+        if (arg.b) {
            value |= 0x1;
        }
-        if (arg.X) {
+        if (arg.x) {
            value |= 0x2;
        }
-        if (arg.R) {
+        if (arg.r) {
            value |= 0x4;
        }
-        if (arg.W) {
+        if (arg.w) {
            value |= 0x8;
        }
        if (value != 0x40) {
-            try self.code.append(value);
+            self.code.appendAssumeCapacity(value);
        }
    }

    fn genSetReg(self: *Function, src: usize, comptime arch: Target.Cpu.Arch, reg: Reg(arch), mcv: MCValue) error{ CodegenFail, OutOfMemory }!void {
        switch (arch) {
            .x86_64 => switch (mcv) {
-                .none, .unreach => unreachable,
+                .dead => unreachable,
+                .none => unreachable,
+                .unreach => unreachable,
+                .compare_flags_unsigned => |op| {
+                    return self.fail(src, "TODO set register with compare flags value (unsigned)", .{});
+                },
+                .compare_flags_signed => |op| {
+                    return self.fail(src, "TODO set register with compare flags value (signed)", .{});
+                },
                .immediate => |x| {
                    if (reg.size() != 64) {
                        return self.fail(src, "TODO decide whether to implement non-64-bit loads", .{});
@ -544,11 +884,11 @@ const Function = struct {
                        // If we're accessing e.g. r8d, we need to use a REX prefix before the actual operation. Since
                        // this is a 32-bit operation, the W flag is set to zero. X is also zero, as we're not using a SIB.
                        // Both R and B are set, as we're extending, in effect, the register bits *and* the operand.
-                        try self.REX(.{ .R = reg.isExtended(), .B = reg.isExtended() });
+                        try self.code.ensureCapacity(self.code.items.len + 3);
+                        self.rex(.{ .r = reg.isExtended(), .b = reg.isExtended() });
                        const id = @as(u8, reg.id() & 0b111);
-                        return self.code.appendSlice(&[_]u8{
-                            0x31, 0xC0 | id << 3 | id,
-                        });
+                        self.code.appendSliceAssumeCapacity(&[_]u8{ 0x31, 0xC0 | id << 3 | id });
+                        return;
                    }
                    if (x <= std.math.maxInt(u32)) {
                        // Next best case: if we set the lower four bytes, the upper four will be zeroed.
@ -581,9 +921,9 @@ const Function = struct {
                    // Since we always need a REX here, let's just check if we also need to set REX.B.
                    //
                    // In this case, the encoding of the REX byte is 0b0100100B
-
-                    try self.REX(.{ .W = true, .B = reg.isExtended() });
-                    try self.code.resize(self.code.items.len + 9);
+                    try self.code.ensureCapacity(self.code.items.len + 10);
+                    self.rex(.{ .w = true, .b = reg.isExtended() });
+                    self.code.items.len += 9;
                    self.code.items[self.code.items.len - 9] = 0xB8 | @as(u8, reg.id() & 0b111);
                    const imm_ptr = self.code.items[self.code.items.len - 8 ..][0..8];
                    mem.writeIntLittle(u64, imm_ptr, x);
@ -594,13 +934,13 @@ const Function = struct {
                    }
                    // We need the offset from RIP in a signed i32 twos complement.
                    // The instruction is 7 bytes long and RIP points to the next instruction.
-                    //
+                    try self.code.ensureCapacity(self.code.items.len + 7);
                    // 64-bit LEA is encoded as REX.W 8D /r. If the register is extended, the REX byte is modified,
                    // but the operation size is unchanged. Since we're using a disp32, we want mode 0 and lower three
                    // bits as five.
                    // REX 0x8D 0b00RRR101, where RRR is the lower three bits of the id.
-                    try self.REX(.{ .W = true, .B = reg.isExtended() });
-                    try self.code.resize(self.code.items.len + 6);
+                    self.rex(.{ .w = true, .b = reg.isExtended() });
+                    self.code.items.len += 6;
                    const rip = self.code.items.len;
                    const big_offset = @intCast(i64, code_offset) - @intCast(i64, rip);
                    const offset = @intCast(i32, big_offset);
@ -620,9 +960,10 @@ const Function = struct {
                    // If the *source* is extended, the B field must be 1.
                    // Since the register is being accessed directly, the R/M mode is three. The reg field (the middle
                    // three bits) contain the destination, and the R/M field (the lower three bits) contain the source.
-                    try self.REX(.{ .W = true, .R = reg.isExtended(), .B = src_reg.isExtended() });
+                    try self.code.ensureCapacity(self.code.items.len + 3);
+                    self.rex(.{ .w = true, .r = reg.isExtended(), .b = src_reg.isExtended() });
                    const R = 0xC0 | (@as(u8, reg.id() & 0b111) << 3) | @as(u8, src_reg.id() & 0b111);
-                    try self.code.appendSlice(&[_]u8{ 0x8B, R });
+                    self.code.appendSliceAssumeCapacity(&[_]u8{ 0x8B, R });
                },
                .memory => |x| {
                    if (reg.size() != 64) {
@ -636,14 +977,14 @@ const Function = struct {
                        // The SIB must be 0x25, to indicate a disp32 with no scaled index.
                        // 0b00RRR100, where RRR is the lower three bits of the register ID.
                        // The instruction is thus eight bytes; REX 0x8B 0b00RRR100 0x25 followed by a four-byte disp32.
-                        try self.REX(.{ .W = true, .B = reg.isExtended() });
-                        try self.code.resize(self.code.items.len + 7);
-                        const r = 0x04 | (@as(u8, reg.id() & 0b111) << 3);
-                        self.code.items[self.code.items.len - 7] = 0x8B;
-                        self.code.items[self.code.items.len - 6] = r;
-                        self.code.items[self.code.items.len - 5] = 0x25;
-                        const imm_ptr = self.code.items[self.code.items.len - 4 ..][0..4];
-                        mem.writeIntLittle(u32, imm_ptr, @intCast(u32, x));
+                        try self.code.ensureCapacity(self.code.items.len + 8);
+                        self.rex(.{ .w = true, .b = reg.isExtended() });
+                        self.code.appendSliceAssumeCapacity(&[_]u8{
+                            0x8B,
+                            0x04 | (@as(u8, reg.id() & 0b111) << 3), // R
+                            0x25,
+                        });
+                        mem.writeIntLittle(u32, self.code.addManyAsArrayAssumeCapacity(4), @intCast(u32, x));
                    } else {
                        // If this is RAX, we can use a direct load; otherwise, we need to load the address, then indirectly load
                        // the value.
@ -674,15 +1015,15 @@ const Function = struct {
                            // Currently, we're only allowing 64-bit registers, so we need the `REX.W 8B /r` variant.
                            // TODO: determine whether to allow other sized registers, and if so, handle them properly.
                            // This operation requires three bytes: REX 0x8B R/M
-                            //
+                            try self.code.ensureCapacity(self.code.items.len + 3);
                            // For this operation, we want R/M mode *zero* (use register indirectly), and the two register
                            // values must match. Thus, it's 00ABCABC where ABC is the lower three bits of the register ID.
                            //
                            // Furthermore, if this is an extended register, both B and R must be set in the REX byte, as *both*
                            // register operands need to be marked as extended.
-                            try self.REX(.{ .W = true, .B = reg.isExtended(), .R = reg.isExtended() });
+                            self.rex(.{ .w = true, .b = reg.isExtended(), .r = reg.isExtended() });
                            const RM = (@as(u8, reg.id() & 0b111) << 3) | @truncate(u3, reg.id());
-                            try self.code.appendSlice(&[_]u8{ 0x8B, RM });
+                            self.code.appendSliceAssumeCapacity(&[_]u8{ 0x8B, RM });
                        }
                    }
                },
@ -705,22 +1046,58 @@ const Function = struct {
    }

    fn resolveInst(self: *Function, inst: *ir.Inst) !MCValue {
-        if (self.inst_table.get(inst)) |mcv| {
-            return mcv;
-        }
+        // Constants have static lifetimes, so they are always memoized in the outer most table.
        if (inst.cast(ir.Inst.Constant)) |const_inst| {
-            const mcvalue = try self.genTypedValue(inst.src, .{ .ty = inst.ty, .val = const_inst.val });
-            try self.inst_table.putNoClobber(inst, mcvalue);
-            return mcvalue;
-        } else {
-            return self.inst_table.get(inst).?;
+            const branch = &self.branch_stack.items[0];
+            const gop = try branch.inst_table.getOrPut(self.gpa, inst);
+            if (!gop.found_existing) {
+                gop.entry.value = try self.genTypedValue(inst.src, .{ .ty = inst.ty, .val = const_inst.val });
+            }
+            return gop.entry.value;
+        }
+
+        // Treat each stack item as a "layer" on top of the previous one.
+        var i: usize = self.branch_stack.items.len;
+        while (true) {
+            i -= 1;
+            if (self.branch_stack.items[i].inst_table.get(inst)) |mcv| {
+                return mcv;
+            }
        }
    }

+    fn copyToNewRegister(self: *Function, inst: *ir.Inst) !MCValue {
+        return self.fail(inst.src, "TODO implement copyToNewRegister", .{});
+    }
+
+    /// If the MCValue is an immediate, and it does not fit within this type,
+    /// we put it in a register.
+    /// A potential opportunity for future optimization here would be keeping track
+    /// of the fact that the instruction is available both as an immediate
+    /// and as a register.
+    fn limitImmediateType(self: *Function, inst: *ir.Inst, comptime T: type) !MCValue {
+        const mcv = try self.resolveInst(inst);
+        const ti = @typeInfo(T).Int;
+        switch (mcv) {
+            .immediate => |imm| {
+                // This immediate is unsigned.
+                const U = @Type(.{ .Int = .{
+                    .bits = ti.bits - @boolToInt(ti.is_signed),
+                    .is_signed = false,
+                }});
+                if (imm >= std.math.maxInt(U)) {
+                    return self.copyToNewRegister(inst);
+                }
+            },
+            else => {},
+        }
+        return mcv;
+    }
+
+
    fn genTypedValue(self: *Function, src: usize, typed_value: TypedValue) !MCValue {
        const ptr_bits = self.target.cpu.arch.ptrBitWidth();
        const ptr_bytes: u64 = @divExact(ptr_bits, 8);
-        const allocator = self.code.allocator;
        switch (typed_value.ty.zigTypeTag()) {
            .Pointer => {
                if (typed_value.val.cast(Value.Payload.DeclRef)) |payload| {
@ -747,7 +1124,7 @@ const Function = struct {
    fn fail(self: *Function, src: usize, comptime format: []const u8, args: var) error{ CodegenFail, OutOfMemory } {
        @setCold(true);
        assert(self.err_msg == null);
-        self.err_msg = try ErrorMsg.create(self.code.allocator, src, format, args);
+        self.err_msg = try ErrorMsg.create(self.bin_file.allocator, src, format, args);
        return error.CodegenFail;
    }
 };
--- a/src-self-hosted/ir.zig
+++ b/src-self-hosted/ir.zig
@ -2,6 +2,8 @@ const std = @import("std");
 const Value = @import("value.zig").Value;
 const Type = @import("type.zig").Type;
 const Module = @import("Module.zig");
+const assert = std.debug.assert;
+const codegen = @import("codegen.zig");

 /// These are in-memory, analyzed instructions. See `zir.Inst` for the representation
 /// of instructions that correspond to the ZIR text format.
@ -10,17 +12,43 @@ const Module = @import("Module.zig");
 /// a memory location for the value to survive after a const instruction.
 pub const Inst = struct {
    tag: Tag,
+    /// Each bit represents the index of an `Inst` parameter in the `args` field.
+    /// If a bit is set, it marks the end of the lifetime of the corresponding 
+    /// instruction parameter. For example, 0b000_00101 means that the first and
+    /// third `Inst` parameters' lifetimes end after this instruction, and will
+    /// not have any more following references.
+    /// The most significant bit being set means that the instruction itself is
+    /// never referenced, in other words its lifetime ends as soon as it finishes.
+    /// If bit 7 (0b1xxx_xxxx) is set, it means this instruction itself is unreferenced.
+    /// If bit 6 (0bx1xx_xxxx) is set, it means this is a special case and the
+    /// lifetimes of operands are encoded elsewhere.
+    deaths: u8 = undefined,
    ty: Type,
    /// Byte offset into the source.
    src: usize,

+    pub fn isUnused(self: Inst) bool {
+        return (self.deaths & 0b1000_0000) != 0;
+    }
+
+    pub fn operandDies(self: Inst, index: u3) bool {
+        assert(index < 6);
+        return @truncate(u1, self.deaths << index) != 0;
+    }
+
+    pub fn specialOperandDeaths(self: Inst) bool {
+        return (self.deaths & 0b1000_0000) != 0;
+    }
+
    pub const Tag = enum {
        add,
        arg,
        assembly,
        bitcast,
        block,
+        br,
        breakpoint,
+        brvoid,
        call,
        cmp,
        condbr,
@ -30,6 +58,7 @@ pub const Inst = struct {
        ptrtoint,
        ret,
        retvoid,
+        sub,
        unreach,

        /// Returns whether the instruction is one of the control flow "noreturn" types.
@ -43,14 +72,17 @@ pub const Inst = struct {
                .bitcast,
                .block,
                .breakpoint,
+                .call,
                .cmp,
                .constant,
                .isnonnull,
                .isnull,
                .ptrtoint,
-                .call,
+                .sub,
                => false,

+                .br,
+                .brvoid,
                .condbr,
                .ret,
                .retvoid,
@ -128,6 +160,17 @@ pub const Inst = struct {
        args: struct {
            body: Body,
        },
+        /// This memory is reserved for codegen code to do whatever it needs to here.
+        codegen: codegen.BlockData = .{},
+    };
+
+    pub const Br = struct {
+        pub const base_tag = Tag.br;
+        base: Inst,
+        args: struct {
+            block: *Block,
+            operand: *Inst,
+        },
    };

    pub const Breakpoint = struct {
@ -136,6 +179,14 @@ pub const Inst = struct {
        args: void,
    };

+    pub const BrVoid = struct {
+        pub const base_tag = Tag.brvoid;
+        base: Inst,
+        args: struct {
+            block: *Block,
+        },
+    };
+
    pub const Call = struct {
        pub const base_tag = Tag.call;
        base: Inst,
@ -165,6 +216,12 @@ pub const Inst = struct {
            true_body: Body,
            false_body: Body,
        },
+        /// Set of instructions whose lifetimes end at the start of one of the branches.
+        /// The `true` branch is first: `deaths[0..true_death_count]`.
+        /// The `false` branch is next: `(deaths + true_death_count)[..false_death_count]`.
+        deaths: [*]*Inst = undefined,
+        true_death_count: u32 = 0,
+        false_death_count: u32 = 0,
    };

    pub const Constant = struct {
@ -215,6 +272,16 @@ pub const Inst = struct {
        args: void,
    };

+    pub const Sub = struct {
+        pub const base_tag = Tag.sub;
+        base: Inst,
+
+        args: struct {
+            lhs: *Inst,
+            rhs: *Inst,
+        },
+    };
+
    pub const Unreach = struct {
        pub const base_tag = Tag.unreach;
        base: Inst,
--- a/src-self-hosted/link.zig
+++ b/src-self-hosted/link.zig
@ -206,6 +206,19 @@ pub const File = struct {
        };
    }

+    /// Must be called only after a successful call to `updateDecl`.
+    pub fn updateDeclExports(
+        base: *File,
+        module: *Module,
+        decl: *const Module.Decl,
+        exports: []const *Module.Export,
+    ) !void {
+        switch (base.tag) {
+            .Elf => return @fieldParentPtr(Elf, "base", base).updateDeclExports(module, decl, exports),
+            .C => return {},
+        }
+    }
+
    pub const Tag = enum {
        Elf,
        C,
@ -248,7 +261,7 @@ pub const File = struct {
        pub fn updateDecl(self: *File.C, module: *Module, decl: *Module.Decl) !void {
            cgen.generate(self, decl) catch |err| {
                if (err == error.CGenFailure) {
-                    try module.failed_decls.put(decl, self.error_msg);
+                    try module.failed_decls.put(module.gpa, decl, self.error_msg);
                }
                return err;
            };
@ -566,7 +579,7 @@ pub const File = struct {
                const file_size = self.options.program_code_size_hint;
                const p_align = 0x1000;
                const off = self.findFreeSpace(file_size, p_align);
-                //std.log.debug(.link, "found PT_LOAD free space 0x{x} to 0x{x}\n", .{ off, off + file_size });
+                std.log.debug(.link, "found PT_LOAD free space 0x{x} to 0x{x}\n", .{ off, off + file_size });
                try self.program_headers.append(self.allocator, .{
                    .p_type = elf.PT_LOAD,
                    .p_offset = off,
@ -587,7 +600,7 @@ pub const File = struct {
                // page align.
                const p_align = 0x1000;
                const off = self.findFreeSpace(file_size, p_align);
-                //std.log.debug(.link, "found PT_LOAD free space 0x{x} to 0x{x}\n", .{ off, off + file_size });
+                std.log.debug(.link, "found PT_LOAD free space 0x{x} to 0x{x}\n", .{ off, off + file_size });
                // TODO instead of hard coding the vaddr, make a function to find a vaddr to put things at.
                // we'll need to re-use that function anyway, in case the GOT grows and overlaps something
                // else in virtual memory.
@ -609,7 +622,7 @@ pub const File = struct {
                assert(self.shstrtab.items.len == 0);
                try self.shstrtab.append(self.allocator, 0); // need a 0 at position 0
                const off = self.findFreeSpace(self.shstrtab.items.len, 1);
-                //std.log.debug(.link, "found shstrtab free space 0x{x} to 0x{x}\n", .{ off, off + self.shstrtab.items.len });
+                std.log.debug(.link, "found shstrtab free space 0x{x} to 0x{x}\n", .{ off, off + self.shstrtab.items.len });
                try self.sections.append(self.allocator, .{
                    .sh_name = try self.makeString(".shstrtab"),
                    .sh_type = elf.SHT_STRTAB,
@ -667,7 +680,7 @@ pub const File = struct {
                const each_size: u64 = if (small_ptr) @sizeOf(elf.Elf32_Sym) else @sizeOf(elf.Elf64_Sym);
                const file_size = self.options.symbol_count_hint * each_size;
                const off = self.findFreeSpace(file_size, min_align);
-                //std.log.debug(.link, "found symtab free space 0x{x} to 0x{x}\n", .{ off, off + file_size });
+                std.log.debug(.link, "found symtab free space 0x{x} to 0x{x}\n", .{ off, off + file_size });

                try self.sections.append(self.allocator, .{
                    .sh_name = try self.makeString(".symtab"),
@ -783,7 +796,7 @@ pub const File = struct {
                        shstrtab_sect.sh_offset = self.findFreeSpace(needed_size, 1);
                    }
                    shstrtab_sect.sh_size = needed_size;
-                    //std.log.debug(.link, "shstrtab start=0x{x} end=0x{x}\n", .{ shstrtab_sect.sh_offset, shstrtab_sect.sh_offset + needed_size });
+                    std.log.debug(.link, "shstrtab start=0x{x} end=0x{x}\n", .{ shstrtab_sect.sh_offset, shstrtab_sect.sh_offset + needed_size });

                    try self.file.?.pwriteAll(self.shstrtab.items, shstrtab_sect.sh_offset);
                    if (!self.shdr_table_dirty) {
@ -829,7 +842,7 @@ pub const File = struct {

                        for (buf) |*shdr, i| {
                            shdr.* = self.sections.items[i];
-                            //std.log.debug(.link, "writing section {}\n", .{shdr.*});
+                            std.log.debug(.link, "writing section {}\n", .{shdr.*});
                            if (foreign_endian) {
                                bswapAllFields(elf.Elf64_Shdr, shdr);
                            }
@ -840,6 +853,7 @@ pub const File = struct {
                self.shdr_table_dirty = false;
            }
            if (self.entry_addr == null and self.options.output_mode == .Exe) {
+                std.log.debug(.link, "no_entry_point_found = true\n", .{});
                self.error_flags.no_entry_point_found = true;
            } else {
                self.error_flags.no_entry_point_found = false;
@ -1153,10 +1167,10 @@ pub const File = struct {
            try self.offset_table_free_list.ensureCapacity(self.allocator, self.local_symbols.items.len);

            if (self.local_symbol_free_list.popOrNull()) |i| {
-                //std.log.debug(.link, "reusing symbol index {} for {}\n", .{i, decl.name});
+                std.log.debug(.link, "reusing symbol index {} for {}\n", .{i, decl.name});
                decl.link.local_sym_index = i;
            } else {
-                //std.log.debug(.link, "allocating symbol index {} for {}\n", .{self.local_symbols.items.len, decl.name});
+                std.log.debug(.link, "allocating symbol index {} for {}\n", .{self.local_symbols.items.len, decl.name});
                decl.link.local_sym_index = @intCast(u32, self.local_symbols.items.len);
                _ = self.local_symbols.addOneAssumeCapacity();
            }
@ -1204,7 +1218,7 @@ pub const File = struct {
                .appended => code_buffer.items,
                .fail => |em| {
                    decl.analysis = .codegen_failure;
-                    try module.failed_decls.put(decl, em);
+                    try module.failed_decls.put(module.gpa, decl, em);
                    return;
                },
            };
@ -1224,11 +1238,11 @@ pub const File = struct {
                    !mem.isAlignedGeneric(u64, local_sym.st_value, required_alignment);
                if (need_realloc) {
                    const vaddr = try self.growTextBlock(&decl.link, code.len, required_alignment);
-                    //std.log.debug(.link, "growing {} from 0x{x} to 0x{x}\n", .{ decl.name, local_sym.st_value, vaddr });
+                    std.log.debug(.link, "growing {} from 0x{x} to 0x{x}\n", .{ decl.name, local_sym.st_value, vaddr });
                    if (vaddr != local_sym.st_value) {
                        local_sym.st_value = vaddr;

-                        //std.log.debug(.link, "  (writing new offset table entry)\n", .{});
+                        std.log.debug(.link, "  (writing new offset table entry)\n", .{});
                        self.offset_table.items[decl.link.offset_table_index] = vaddr;
                        try self.writeOffsetTableEntry(decl.link.offset_table_index);
                    }
@ -1246,7 +1260,7 @@ pub const File = struct {
                const decl_name = mem.spanZ(decl.name);
                const name_str_index = try self.makeString(decl_name);
                const vaddr = try self.allocateTextBlock(&decl.link, code.len, required_alignment);
-                //std.log.debug(.link, "allocated text block for {} at 0x{x}\n", .{ decl_name, vaddr });
+                std.log.debug(.link, "allocated text block for {} at 0x{x}\n", .{ decl_name, vaddr });
                errdefer self.freeTextBlock(&decl.link);

                local_sym.* = .{
@ -1290,7 +1304,7 @@ pub const File = struct {
            for (exports) |exp| {
                if (exp.options.section) |section_name| {
                    if (!mem.eql(u8, section_name, ".text")) {
-                        try module.failed_exports.ensureCapacity(module.failed_exports.items().len + 1);
+                        try module.failed_exports.ensureCapacity(module.gpa, module.failed_exports.items().len + 1);
                        module.failed_exports.putAssumeCapacityNoClobber(
                            exp,
                            try Module.ErrorMsg.create(self.allocator, 0, "Unimplemented: ExportOptions.section", .{}),
@ -1308,7 +1322,7 @@ pub const File = struct {
                    },
                    .Weak => elf.STB_WEAK,
                    .LinkOnce => {
-                        try module.failed_exports.ensureCapacity(module.failed_exports.items().len + 1);
+                        try module.failed_exports.ensureCapacity(module.gpa, module.failed_exports.items().len + 1);
                        module.failed_exports.putAssumeCapacityNoClobber(
                            exp,
                            try Module.ErrorMsg.create(self.allocator, 0, "Unimplemented: GlobalLinkage.LinkOnce", .{}),
--- a/src-self-hosted/liveness.zig
+++ b/src-self-hosted/liveness.zig
@ -0,0 +1,139 @@
+const std = @import("std");
+const ir = @import("ir.zig");
+const trace = @import("tracy.zig").trace;
+
+/// Perform Liveness Analysis over the `Body`. Each `Inst` will have its `deaths` field populated.
+pub fn analyze(
+    /// Used for temporary storage during the analysis.
+    gpa: *std.mem.Allocator,
+    /// Used to tack on extra allocations in the same lifetime as the existing instructions.
+    arena: *std.mem.Allocator,
+    body: ir.Body,
+) error{OutOfMemory}!void {
+    const tracy = trace(@src());
+    defer tracy.end();
+
+    var table = std.AutoHashMap(*ir.Inst, void).init(gpa);
+    defer table.deinit();
+    try table.ensureCapacity(body.instructions.len);
+    try analyzeWithTable(arena, &table, body);
+}
+
+fn analyzeWithTable(arena: *std.mem.Allocator, table: *std.AutoHashMap(*ir.Inst, void), body: ir.Body) error{OutOfMemory}!void {
+    var i: usize = body.instructions.len;
+
+    while (i != 0) {
+        i -= 1;
+        const base = body.instructions[i];
+        try analyzeInstGeneric(arena, table, base);
+    }
+}
+
+fn analyzeInstGeneric(arena: *std.mem.Allocator, table: *std.AutoHashMap(*ir.Inst, void), base: *ir.Inst) error{OutOfMemory}!void {
+    // Obtain the corresponding instruction type based on the tag type.
+    inline for (std.meta.declarations(ir.Inst)) |decl| {
+        switch (decl.data) {
+            .Type => |T| {
+                if (@hasDecl(T, "base_tag")) {
+                    if (T.base_tag == base.tag) {
+                        return analyzeInst(arena, table, T, @fieldParentPtr(T, "base", base));
+                    }
+                }
+            },
+            else => {},
+        }
+    }
+    unreachable;
+}
+
+fn analyzeInst(arena: *std.mem.Allocator, table: *std.AutoHashMap(*ir.Inst, void), comptime T: type, inst: *T) error{OutOfMemory}!void {
+    inst.base.deaths = 0;
+
+    switch (T) {
+        ir.Inst.Constant => return,
+        ir.Inst.Block => {
+            try analyzeWithTable(arena, table, inst.args.body);
+            // We let this continue so that it can possibly mark the block as
+            // unreferenced below.
+        },
+        ir.Inst.CondBr => {
+            var true_table = std.AutoHashMap(*ir.Inst, void).init(table.allocator);
+            defer true_table.deinit();
+            try true_table.ensureCapacity(inst.args.true_body.instructions.len);
+            try analyzeWithTable(arena, &true_table, inst.args.true_body);
+
+            var false_table = std.AutoHashMap(*ir.Inst, void).init(table.allocator);
+            defer false_table.deinit();
+            try false_table.ensureCapacity(inst.args.false_body.instructions.len);
+            try analyzeWithTable(arena, &false_table, inst.args.false_body);
+
+            // Each death that occurs inside one branch, but not the other, needs
+            // to be added as a death immediately upon entering the other branch.
+            // During the iteration of the table, we additionally propagate the
+            // deaths to the parent table.
+            var true_entry_deaths = std.ArrayList(*ir.Inst).init(table.allocator);
+            defer true_entry_deaths.deinit();
+            var false_entry_deaths = std.ArrayList(*ir.Inst).init(table.allocator);
+            defer false_entry_deaths.deinit();
+            {
+                var it = false_table.iterator();
+                while (it.next()) |entry| {
+                    const false_death = entry.key;
+                    if (!true_table.contains(false_death)) {
+                        try true_entry_deaths.append(false_death);
+                        // Here we are only adding to the parent table if the following iteration
+                        // would miss it.
+                        try table.putNoClobber(false_death, {});
+                    }
+                }
+            }
+            {
+                var it = true_table.iterator();
+                while (it.next()) |entry| {
+                    const true_death = entry.key;
+                    try table.putNoClobber(true_death, {});
+                    if (!false_table.contains(true_death)) {
+                        try false_entry_deaths.append(true_death);
+                    }
+                }
+            }
+            inst.true_death_count = std.math.cast(@TypeOf(inst.true_death_count), true_entry_deaths.items.len) catch return error.OutOfMemory;
+            inst.false_death_count = std.math.cast(@TypeOf(inst.false_death_count), false_entry_deaths.items.len) catch return error.OutOfMemory;
+            const allocated_slice = try arena.alloc(*ir.Inst, true_entry_deaths.items.len + false_entry_deaths.items.len);
+            inst.deaths = allocated_slice.ptr;
+
+            // Continue on with the instruction analysis. The following code will find the condition
+            // instruction, and the deaths flag for the CondBr instruction will indicate whether the
+            // condition's lifetime ends immediately before entering any branch.
+        },
+        else => {},
+    }
+
+    if (!table.contains(&inst.base)) {
+        // No tombstone for this instruction means it is never referenced,
+        // and its birth marks its own death. Very metal 🤘
+        inst.base.deaths |= 1 << 7;
+    }
+
+    const Args = ir.Inst.Args(T);
+    if (Args == void) {
+        return;
+    }
+
+    comptime var arg_index: usize = 0;
+    inline for (std.meta.fields(Args)) |field| {
+        if (field.field_type == *ir.Inst) {
+            if (arg_index >= 6) {
+                @compileError("out of bits to mark deaths of operands");
+            }
+            const prev = try table.fetchPut(@field(inst.args, field.name), {});
+            if (prev == null) {
+                // Death.
+                inst.base.deaths |= 1 << arg_index;
+            }
+            arg_index += 1;
+        }
+    }
+
+    std.log.debug(.liveness, "analyze {}: 0b{b}\n", .{inst.base.tag, inst.base.deaths});
+}
--- a/src-self-hosted/main.zig
+++ b/src-self-hosted/main.zig
@ -50,7 +50,10 @@ pub fn log(
    const scope_prefix = "(" ++ switch (scope) {
        // Uncomment to hide logs
        //.compiler,
-        .link => return,
+        .module,
+        .liveness,
+        .link,
+        => return,

        else => @tagName(scope),
    } ++ "): ";
@ -510,7 +513,7 @@ fn updateModule(gpa: *Allocator, module: *Module, zir_out_path: ?[]const u8) !vo
    const update_nanos = timer.read();

    var errors = try module.getAllErrorsAlloc();
-    defer errors.deinit(module.allocator);
+    defer errors.deinit(module.gpa);

    if (errors.list.len != 0) {
        for (errors.list) |full_err_msg| {
--- a/src-self-hosted/zir.zig
+++ b/src-self-hosted/zir.zig
@ -38,6 +38,8 @@ pub const Inst = struct {
        arg,
        /// A labeled block of code, which can return a value.
        block,
+        /// Return a value from a `Block`.
+        @"break",
        breakpoint,
        /// Same as `break` but without an operand; the operand is assumed to be the void value.
        breakvoid,
@ -57,6 +59,7 @@ pub const Inst = struct {
        /// String Literal. Makes an anonymous Decl and then takes a pointer to it.
        str,
        int,
+        inttype,
        ptrtoint,
        fieldptr,
        deref,
@ -73,6 +76,7 @@ pub const Inst = struct {
        bitcast,
        elemptr,
        add,
+        sub,
        cmp,
        condbr,
        isnull,
@ -83,6 +87,7 @@ pub const Inst = struct {
        return switch (tag) {
            .arg => Arg,
            .block => Block,
+            .@"break" => Break,
            .breakpoint => Breakpoint,
            .breakvoid => BreakVoid,
            .call => Call,
@ -94,6 +99,7 @@ pub const Inst = struct {
            .@"const" => Const,
            .str => Str,
            .int => Int,
+            .inttype => IntType,
            .ptrtoint => PtrToInt,
            .fieldptr => FieldPtr,
            .deref => Deref,
@ -110,6 +116,7 @@ pub const Inst = struct {
            .bitcast => BitCast,
            .elemptr => ElemPtr,
            .add => Add,
+            .sub => Sub,
            .cmp => Cmp,
            .condbr => CondBr,
            .isnull => IsNull,
@ -139,12 +146,22 @@ pub const Inst = struct {
        base: Inst,

        positionals: struct {
-            label: []const u8,
            body: Module.Body,
        },
        kw_args: struct {},
    };

+    pub const Break = struct {
+        pub const base_tag = Tag.@"break";
+        base: Inst,
+
+        positionals: struct {
+            block: *Block,
+            operand: *Inst,
+        },
+        kw_args: struct {},
+    };
+
    pub const Breakpoint = struct {
        pub const base_tag = Tag.breakpoint;
        base: Inst,
@ -158,7 +175,7 @@ pub const Inst = struct {
        base: Inst,

        positionals: struct {
-            label: []const u8,
+            block: *Block,
        },
        kw_args: struct {},
    };
@ -367,6 +384,17 @@ pub const Inst = struct {
        },
    };

+    pub const IntType = struct {
+        pub const base_tag = Tag.inttype;
+        base: Inst,
+
+        positionals: struct {
+            signed: *Inst,
+            bits: *Inst,
+        },
+        kw_args: struct {},
+    };
+
    pub const Export = struct {
        pub const base_tag = Tag.@"export";
        base: Inst,
@ -512,6 +540,19 @@ pub const Inst = struct {
        kw_args: struct {},
    };

+    pub const Sub = struct {
+        pub const base_tag = Tag.sub;
+        base: Inst,
+
+        positionals: struct {
+            lhs: *Inst,
+            rhs: *Inst,
+        },
+        kw_args: struct {},
+    };
+
+    /// TODO get rid of the op positional arg and make that data part of
+    /// the base Inst tag.
    pub const Cmp = struct {
        pub const base_tag = Tag.cmp;
        base: Inst,
@ -582,8 +623,6 @@ pub const Module = struct {
        self.writeToStream(std.heap.page_allocator, std.io.getStdErr().outStream()) catch {};
    }

-    const InstPtrTable = std.AutoHashMap(*Inst, struct { inst: *Inst, index: ?usize, name: []const u8 });
-
    const DeclAndIndex = struct {
        decl: *Decl,
        index: usize,
@ -617,80 +656,100 @@ pub const Module = struct {
    /// The allocator is used for temporary storage, but this function always returns
    /// with no resources allocated.
    pub fn writeToStream(self: Module, allocator: *Allocator, stream: var) !void {
-        // First, build a map of *Inst to @ or % indexes
-        var inst_table = InstPtrTable.init(allocator);
-        defer inst_table.deinit();
+        var write = Writer{
+            .module = &self,
+            .inst_table = InstPtrTable.init(allocator),
+            .block_table = std.AutoHashMap(*Inst.Block, []const u8).init(allocator),
+            .arena = std.heap.ArenaAllocator.init(allocator),
+            .indent = 2,
+        };
+        defer write.arena.deinit();
+        defer write.inst_table.deinit();
+        defer write.block_table.deinit();

-        try inst_table.ensureCapacity(self.decls.len);
+        // First, build a map of *Inst to @ or % indexes
+        try write.inst_table.ensureCapacity(self.decls.len);

        for (self.decls) |decl, decl_i| {
-            try inst_table.putNoClobber(decl.inst, .{ .inst = decl.inst, .index = null, .name = decl.name });
+            try write.inst_table.putNoClobber(decl.inst, .{ .inst = decl.inst, .index = null, .name = decl.name });

            if (decl.inst.cast(Inst.Fn)) |fn_inst| {
                for (fn_inst.positionals.body.instructions) |inst, inst_i| {
-                    try inst_table.putNoClobber(inst, .{ .inst = inst, .index = inst_i, .name = undefined });
+                    try write.inst_table.putNoClobber(inst, .{ .inst = inst, .index = inst_i, .name = undefined });
                }
            }
        }

        for (self.decls) |decl, i| {
            try stream.print("@{} ", .{decl.name});
-            try self.writeInstToStream(stream, decl.inst, &inst_table);
+            try write.writeInstToStream(stream, decl.inst);
            try stream.writeByte('\n');
        }
    }

+};
+
+const InstPtrTable = std.AutoHashMap(*Inst, struct { inst: *Inst, index: ?usize, name: []const u8 });
+
+const Writer = struct {
+    module: *const Module,
+    inst_table: InstPtrTable,
+    block_table: std.AutoHashMap(*Inst.Block, []const u8),
+    arena: std.heap.ArenaAllocator,
+    indent: usize,
+
    fn writeInstToStream(
-        self: Module,
+        self: *Writer,
        stream: var,
        inst: *Inst,
-        inst_table: *const InstPtrTable,
-    ) @TypeOf(stream).Error!void {
+    ) (@TypeOf(stream).Error || error{OutOfMemory})!void {
        // TODO I tried implementing this with an inline for loop and hit a compiler bug
        switch (inst.tag) {
-            .arg => return self.writeInstToStreamGeneric(stream, .arg, inst, inst_table),
-            .block => return self.writeInstToStreamGeneric(stream, .block, inst, inst_table),
-            .breakpoint => return self.writeInstToStreamGeneric(stream, .breakpoint, inst, inst_table),
-            .breakvoid => return self.writeInstToStreamGeneric(stream, .breakvoid, inst, inst_table),
-            .call => return self.writeInstToStreamGeneric(stream, .call, inst, inst_table),
-            .declref => return self.writeInstToStreamGeneric(stream, .declref, inst, inst_table),
-            .declref_str => return self.writeInstToStreamGeneric(stream, .declref_str, inst, inst_table),
-            .declval => return self.writeInstToStreamGeneric(stream, .declval, inst, inst_table),
-            .declval_in_module => return self.writeInstToStreamGeneric(stream, .declval_in_module, inst, inst_table),
-            .compileerror => return self.writeInstToStreamGeneric(stream, .compileerror, inst, inst_table),
-            .@"const" => return self.writeInstToStreamGeneric(stream, .@"const", inst, inst_table),
-            .str => return self.writeInstToStreamGeneric(stream, .str, inst, inst_table),
-            .int => return self.writeInstToStreamGeneric(stream, .int, inst, inst_table),
-            .ptrtoint => return self.writeInstToStreamGeneric(stream, .ptrtoint, inst, inst_table),
-            .fieldptr => return self.writeInstToStreamGeneric(stream, .fieldptr, inst, inst_table),
-            .deref => return self.writeInstToStreamGeneric(stream, .deref, inst, inst_table),
-            .as => return self.writeInstToStreamGeneric(stream, .as, inst, inst_table),
-            .@"asm" => return self.writeInstToStreamGeneric(stream, .@"asm", inst, inst_table),
-            .@"unreachable" => return self.writeInstToStreamGeneric(stream, .@"unreachable", inst, inst_table),
-            .@"return" => return self.writeInstToStreamGeneric(stream, .@"return", inst, inst_table),
-            .returnvoid => return self.writeInstToStreamGeneric(stream, .returnvoid, inst, inst_table),
-            .@"fn" => return self.writeInstToStreamGeneric(stream, .@"fn", inst, inst_table),
-            .@"export" => return self.writeInstToStreamGeneric(stream, .@"export", inst, inst_table),
-            .primitive => return self.writeInstToStreamGeneric(stream, .primitive, inst, inst_table),
-            .fntype => return self.writeInstToStreamGeneric(stream, .fntype, inst, inst_table),
-            .intcast => return self.writeInstToStreamGeneric(stream, .intcast, inst, inst_table),
-            .bitcast => return self.writeInstToStreamGeneric(stream, .bitcast, inst, inst_table),
-            .elemptr => return self.writeInstToStreamGeneric(stream, .elemptr, inst, inst_table),
-            .add => return self.writeInstToStreamGeneric(stream, .add, inst, inst_table),
-            .cmp => return self.writeInstToStreamGeneric(stream, .cmp, inst, inst_table),
-            .condbr => return self.writeInstToStreamGeneric(stream, .condbr, inst, inst_table),
-            .isnull => return self.writeInstToStreamGeneric(stream, .isnull, inst, inst_table),
-            .isnonnull => return self.writeInstToStreamGeneric(stream, .isnonnull, inst, inst_table),
+            .arg => return self.writeInstToStreamGeneric(stream, .arg, inst),
+            .block => return self.writeInstToStreamGeneric(stream, .block, inst),
+            .@"break" => return self.writeInstToStreamGeneric(stream, .@"break", inst),
+            .breakpoint => return self.writeInstToStreamGeneric(stream, .breakpoint, inst),
+            .breakvoid => return self.writeInstToStreamGeneric(stream, .breakvoid, inst),
+            .call => return self.writeInstToStreamGeneric(stream, .call, inst),
+            .declref => return self.writeInstToStreamGeneric(stream, .declref, inst),
+            .declref_str => return self.writeInstToStreamGeneric(stream, .declref_str, inst),
+            .declval => return self.writeInstToStreamGeneric(stream, .declval, inst),
+            .declval_in_module => return self.writeInstToStreamGeneric(stream, .declval_in_module, inst),
+            .compileerror => return self.writeInstToStreamGeneric(stream, .compileerror, inst),
+            .@"const" => return self.writeInstToStreamGeneric(stream, .@"const", inst),
+            .str => return self.writeInstToStreamGeneric(stream, .str, inst),
+            .int => return self.writeInstToStreamGeneric(stream, .int, inst),
+            .inttype => return self.writeInstToStreamGeneric(stream, .inttype, inst),
+            .ptrtoint => return self.writeInstToStreamGeneric(stream, .ptrtoint, inst),
+            .fieldptr => return self.writeInstToStreamGeneric(stream, .fieldptr, inst),
+            .deref => return self.writeInstToStreamGeneric(stream, .deref, inst),
+            .as => return self.writeInstToStreamGeneric(stream, .as, inst),
+            .@"asm" => return self.writeInstToStreamGeneric(stream, .@"asm", inst),
+            .@"unreachable" => return self.writeInstToStreamGeneric(stream, .@"unreachable", inst),
+            .@"return" => return self.writeInstToStreamGeneric(stream, .@"return", inst),
+            .returnvoid => return self.writeInstToStreamGeneric(stream, .returnvoid, inst),
+            .@"fn" => return self.writeInstToStreamGeneric(stream, .@"fn", inst),
+            .@"export" => return self.writeInstToStreamGeneric(stream, .@"export", inst),
+            .primitive => return self.writeInstToStreamGeneric(stream, .primitive, inst),
+            .fntype => return self.writeInstToStreamGeneric(stream, .fntype, inst),
+            .intcast => return self.writeInstToStreamGeneric(stream, .intcast, inst),
+            .bitcast => return self.writeInstToStreamGeneric(stream, .bitcast, inst),
+            .elemptr => return self.writeInstToStreamGeneric(stream, .elemptr, inst),
+            .add => return self.writeInstToStreamGeneric(stream, .add, inst),
+            .sub => return self.writeInstToStreamGeneric(stream, .sub, inst),
+            .cmp => return self.writeInstToStreamGeneric(stream, .cmp, inst),
+            .condbr => return self.writeInstToStreamGeneric(stream, .condbr, inst),
+            .isnull => return self.writeInstToStreamGeneric(stream, .isnull, inst),
+            .isnonnull => return self.writeInstToStreamGeneric(stream, .isnonnull, inst),
        }
    }

    fn writeInstToStreamGeneric(
-        self: Module,
+        self: *Writer,
        stream: var,
        comptime inst_tag: Inst.Tag,
        base: *Inst,
-        inst_table: *const InstPtrTable,
-    ) !void {
+    ) (@TypeOf(stream).Error || error{OutOfMemory})!void {
        const SpecificInst = Inst.TagToType(inst_tag);
        const inst = @fieldParentPtr(SpecificInst, "base", base);
        const Positionals = @TypeOf(inst.positionals);
@ -700,7 +759,7 @@ pub const Module = struct {
            if (i != 0) {
                try stream.writeAll(", ");
            }
-            try self.writeParamToStream(stream, @field(inst.positionals, arg_field.name), inst_table);
+            try self.writeParamToStream(stream, @field(inst.positionals, arg_field.name));
        }

        comptime var need_comma = pos_fields.len != 0;
@ -710,13 +769,13 @@ pub const Module = struct {
                if (@field(inst.kw_args, arg_field.name)) |non_optional| {
                    if (need_comma) try stream.writeAll(", ");
                    try stream.print("{}=", .{arg_field.name});
-                    try self.writeParamToStream(stream, non_optional, inst_table);
+                    try self.writeParamToStream(stream, non_optional);
                    need_comma = true;
                }
            } else {
                if (need_comma) try stream.writeAll(", ");
                try stream.print("{}=", .{arg_field.name});
-                try self.writeParamToStream(stream, @field(inst.kw_args, arg_field.name), inst_table);
+                try self.writeParamToStream(stream, @field(inst.kw_args, arg_field.name));
                need_comma = true;
            }
        }
@ -724,29 +783,37 @@ pub const Module = struct {
        try stream.writeByte(')');
    }

-    fn writeParamToStream(self: Module, stream: var, param: var, inst_table: *const InstPtrTable) !void {
+    fn writeParamToStream(self: *Writer, stream: var, param: var) !void {
        if (@typeInfo(@TypeOf(param)) == .Enum) {
            return stream.writeAll(@tagName(param));
        }
        switch (@TypeOf(param)) {
-            *Inst => return self.writeInstParamToStream(stream, param, inst_table),
+            *Inst => return self.writeInstParamToStream(stream, param),
            []*Inst => {
                try stream.writeByte('[');
                for (param) |inst, i| {
                    if (i != 0) {
                        try stream.writeAll(", ");
                    }
-                    try self.writeInstParamToStream(stream, inst, inst_table);
+                    try self.writeInstParamToStream(stream, inst);
                }
                try stream.writeByte(']');
            },
            Module.Body => {
                try stream.writeAll("{\n");
                for (param.instructions) |inst, i| {
-                    try stream.print("  %{} ", .{i});
-                    try self.writeInstToStream(stream, inst, inst_table);
+                    try stream.writeByteNTimes(' ', self.indent);
+                    try stream.print("%{} ", .{i});
+                    if (inst.cast(Inst.Block)) |block| {
+                        const name = try std.fmt.allocPrint(&self.arena.allocator, "label_{}", .{i});
+                        try self.block_table.put(block, name);
+                    }
+                    self.indent += 2;
+                    try self.writeInstToStream(stream, inst);
+                    self.indent -= 2;
                    try stream.writeByte('\n');
                }
+                try stream.writeByteNTimes(' ', self.indent - 2);
                try stream.writeByte('}');
            },
            bool => return stream.writeByte("01"[@boolToInt(param)]),
@ -754,12 +821,16 @@ pub const Module = struct {
            BigIntConst, usize => return stream.print("{}", .{param}),
            TypedValue => unreachable, // this is a special case
            *IrModule.Decl => unreachable, // this is a special case
+            *Inst.Block => {
+                const name = self.block_table.get(param).?;
+                return std.zig.renderStringLiteral(name, stream);
+            },
            else => |T| @compileError("unimplemented: rendering parameter of type " ++ @typeName(T)),
        }
    }

-    fn writeInstParamToStream(self: Module, stream: var, inst: *Inst, inst_table: *const InstPtrTable) !void {
-        if (inst_table.get(inst)) |info| {
+    fn writeInstParamToStream(self: *Writer, stream: var, inst: *Inst) !void {
+        if (self.inst_table.get(inst)) |info| {
            if (info.index) |i| {
                try stream.print("%{}", .{info.index});
            } else {
@ -789,7 +860,9 @@ pub fn parse(allocator: *Allocator, source: [:0]const u8) Allocator.Error!Module
        .global_name_map = &global_name_map,
        .decls = .{},
        .unnamed_index = 0,
+        .block_table = std.StringHashMap(*Inst.Block).init(allocator),
    };
+    defer parser.block_table.deinit();
    errdefer parser.arena.deinit();

    parser.parseRoot() catch |err| switch (err) {
@ -815,6 +888,7 @@ const Parser = struct {
    global_name_map: *std.StringHashMap(*Inst),
    error_msg: ?ErrorMsg = null,
    unnamed_index: usize,
+    block_table: std.StringHashMap(*Inst.Block),

    const Body = struct {
        instructions: std.ArrayList(*Inst),
@ -1023,6 +1097,10 @@ const Parser = struct {
            .tag = InstType.base_tag,
        };

+        if (InstType == Inst.Block) {
+            try self.block_table.put(inst_name, inst_specific);
+        }
+
        if (@hasField(InstType, "ty")) {
            inst_specific.ty = opt_type orelse {
                return self.fail("instruction '" ++ fn_name ++ "' requires type", .{});
@ -1128,6 +1206,10 @@ const Parser = struct {
            },
            TypedValue => return self.fail("'const' is a special instruction; not legal in ZIR text", .{}),
            *IrModule.Decl => return self.fail("'declval_in_module' is a special instruction; not legal in ZIR text", .{}),
+            *Inst.Block => {
+                const name = try self.parseStringLiteral();
+                return self.block_table.get(name).?;
+            },
            else => @compileError("Unimplemented: ir parseParameterGeneric for type " ++ @typeName(T)),
        }
        return self.fail("TODO parse parameter {}", .{@typeName(T)});
@ -1191,7 +1273,10 @@ pub fn emit(allocator: *Allocator, old_module: IrModule) !Module {
        .next_auto_name = 0,
        .names = std.StringHashMap(void).init(allocator),
        .primitive_table = std.AutoHashMap(Inst.Primitive.Builtin, *Decl).init(allocator),
+        .indent = 0,
+        .block_table = std.AutoHashMap(*ir.Inst.Block, *Inst.Block).init(allocator),
    };
+    defer ctx.block_table.deinit();
    defer ctx.decls.deinit(allocator);
    defer ctx.names.deinit();
    defer ctx.primitive_table.deinit();
@ -1213,6 +1298,8 @@ const EmitZIR = struct {
    names: std.StringHashMap(void),
    next_auto_name: usize,
    primitive_table: std.AutoHashMap(Inst.Primitive.Builtin, *Decl),
+    indent: usize,
+    block_table: std.AutoHashMap(*ir.Inst.Block, *Inst.Block),

    fn emit(self: *EmitZIR) !void {
        // Put all the Decls in a list and sort them by name to avoid nondeterminism introduced
@ -1542,6 +1629,22 @@ const EmitZIR = struct {
                    };
                    break :blk &new_inst.base;
                },
+                .sub => blk: {
+                    const old_inst = inst.cast(ir.Inst.Sub).?;
+                    const new_inst = try self.arena.allocator.create(Inst.Sub);
+                    new_inst.* = .{
+                        .base = .{
+                            .src = inst.src,
+                            .tag = Inst.Sub.base_tag,
+                        },
+                        .positionals = .{
+                            .lhs = try self.resolveInst(new_body, old_inst.args.lhs),
+                            .rhs = try self.resolveInst(new_body, old_inst.args.rhs),
+                        },
+                        .kw_args = .{},
+                    };
+                    break :blk &new_inst.base;
+                },
                .arg => blk: {
                    const old_inst = inst.cast(ir.Inst.Arg).?;
                    const new_inst = try self.arena.allocator.create(Inst.Arg);
@ -1559,6 +1662,8 @@ const EmitZIR = struct {
                    const old_inst = inst.cast(ir.Inst.Block).?;
                    const new_inst = try self.arena.allocator.create(Inst.Block);

+                    try self.block_table.put(old_inst, new_inst);
+
                    var block_body = std.ArrayList(*Inst).init(self.allocator);
                    defer block_body.deinit();

@ -1570,14 +1675,47 @@ const EmitZIR = struct {
                            .tag = Inst.Block.base_tag,
                        },
                        .positionals = .{
-                            .label = try self.autoName(),
                            .body = .{ .instructions = block_body.toOwnedSlice() },
                        },
                        .kw_args = .{},
                    };
+
+                    break :blk &new_inst.base;
+                },
+                .br => blk: {
+                    const old_inst = inst.cast(ir.Inst.Br).?;
+                    const new_block = self.block_table.get(old_inst.args.block).?;
+                    const new_inst = try self.arena.allocator.create(Inst.Break);
+                    new_inst.* = .{
+                        .base = .{
+                            .src = inst.src,
+                            .tag = Inst.Break.base_tag,
+                        },
+                        .positionals = .{
+                            .block = new_block,
+                            .operand = try self.resolveInst(new_body, old_inst.args.operand),
+                        },
+                        .kw_args = .{},
+                    };
                    break :blk &new_inst.base;
                },
                .breakpoint => try self.emitTrivial(inst.src, Inst.Breakpoint),
+                .brvoid => blk: {
+                    const old_inst = inst.cast(ir.Inst.BrVoid).?;
+                    const new_block = self.block_table.get(old_inst.args.block).?;
+                    const new_inst = try self.arena.allocator.create(Inst.BreakVoid);
+                    new_inst.* = .{
+                        .base = .{
+                            .src = inst.src,
+                            .tag = Inst.BreakVoid.base_tag,
+                        },
+                        .positionals = .{
+                            .block = new_block,
+                        },
+                        .kw_args = .{},
+                    };
+                    break :blk &new_inst.base;
+                },
                .call => blk: {
                    const old_inst = inst.cast(ir.Inst.Call).?;
                    const new_inst = try self.arena.allocator.create(Inst.Call);
@ -1765,7 +1903,7 @@ const EmitZIR = struct {
                },
            };
            try instructions.append(new_inst);
-            try inst_table.putNoClobber(inst, new_inst);
+            try inst_table.put(inst, new_inst);
        }
    }

@ -1829,6 +1967,26 @@ const EmitZIR = struct {
                    };
                    return self.emitUnnamedDecl(&fntype_inst.base);
                },
+                .Int => {
+                    const info = ty.intInfo(self.old_module.target());
+                    const signed = try self.emitPrimitive(src, if (info.signed) .@"true" else .@"false");
+                    const bits_payload = try self.arena.allocator.create(Value.Payload.Int_u64);
+                    bits_payload.* = .{ .int = info.bits };
+                    const bits = try self.emitComptimeIntVal(src, Value.initPayload(&bits_payload.base));
+                    const inttype_inst = try self.arena.allocator.create(Inst.IntType);
+                    inttype_inst.* = .{
+                        .base = .{
+                            .src = src,
+                            .tag = Inst.IntType.base_tag,
+                        },
+                        .positionals = .{
+                            .signed = signed.inst,
+                            .bits = bits.inst,
+                        },
+                        .kw_args = .{},
+                    };
+                    return self.emitUnnamedDecl(&inttype_inst.base);
+                },
                else => std.debug.panic("TODO implement emitType for {}", .{ty}),
            },
        }