diff --git a/src/arch/sparc64/CodeGen.zig b/src/arch/sparc64/CodeGen.zig
index 0745cd46c9..18583d1667 100644
--- a/src/arch/sparc64/CodeGen.zig
+++ b/src/arch/sparc64/CodeGen.zig
@@ -23,11 +23,14 @@ const FnResult = @import("../../codegen.zig").FnResult;
 const DebugInfoOutput = @import("../../codegen.zig").DebugInfoOutput;
 const RegisterManagerFn = @import("../../register_manager.zig").RegisterManager;
 const RegisterManager = RegisterManagerFn(Self, Register, &abi.allocatable_regs);
+const RegisterLock = RegisterManager.RegisterLock;
 
 const build_options = @import("build_options");
 
 const bits = @import("bits.zig");
 const abi = @import("abi.zig");
+const Instruction = bits.Instruction;
+const ShiftWidth = Instruction.ShiftWidth;
 const Register = bits.Register;
 
 const Self = @This();
@@ -90,6 +93,9 @@ register_manager: RegisterManager = .{},
 /// Maps offset to what is stored there.
 stack: std.AutoHashMapUnmanaged(u32, StackAllocation) = .{},
 
+/// Tracks the current instruction allocated to the compare flags
+compare_flags_inst: ?Air.Inst.Index = null,
+
 /// Offset from the stack base, representing the end of the stack frame.
 max_end_stack: u32 = 0,
 /// Represents the current end stack offset. If there is no existing slot
@@ -125,6 +131,12 @@ const MCValue = union(enum) {
     stack_offset: u32,
     /// The value is a pointer to one of the stack variables (payload is stack offset).
     ptr_stack_offset: u32,
+    /// The value is in the compare flags assuming an unsigned operation,
+    /// with this operator applied on top of it.
+    compare_flags_unsigned: math.CompareOperator,
+    /// The value is in the compare flags assuming a signed operation,
+    /// with this operator applied on top of it.
+    compare_flags_signed: math.CompareOperator,
 
     fn isMemory(mcv: MCValue) bool {
         return switch (mcv) {
@@ -367,18 +379,31 @@ fn gen(self: *Self) !void {
 
         // exitlude jumps
         if (self.exitlude_jump_relocs.items.len > 0 and
-            self.exitlude_jump_relocs.items[self.exitlude_jump_relocs.items.len - 1] == self.mir_instructions.len - 2)
+            self.exitlude_jump_relocs.items[self.exitlude_jump_relocs.items.len - 1] == self.mir_instructions.len - 3)
         {
             // If the last Mir instruction (apart from the
             // dbg_epilogue_begin) is the last exitlude jump
-            // relocation (which would just jump one instruction
+            // relocation (which would just jump two instructions
             // further), it can be safely removed
-            self.mir_instructions.orderedRemove(self.exitlude_jump_relocs.pop());
+            const index = self.exitlude_jump_relocs.pop();
+
+            // First, remove the delay slot, then remove
+            // the branch instruction itself.
+            self.mir_instructions.orderedRemove(index + 1);
+            self.mir_instructions.orderedRemove(index);
         }
 
         for (self.exitlude_jump_relocs.items) |jmp_reloc| {
-            _ = jmp_reloc;
-            return self.fail("TODO add branches in sparc64", .{});
+            self.mir_instructions.set(jmp_reloc, .{
+                .tag = .bpcc,
+                .data = .{
+                    .branch_predict_int = .{
+                        .ccr = .xcc,
+                        .cond = .al,
+                        .inst = @intCast(u32, self.mir_instructions.len),
+                    },
+                },
+            });
         }
 
         // Backpatch stack offset
@@ -458,7 +483,7 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
 
         switch (air_tags[inst]) {
             // zig fmt: off
-            .add, .ptr_add   => @panic("TODO try self.airBinOp(inst)"),
+            .add, .ptr_add   => try self.airBinOp(inst),
             .addwrap         => @panic("TODO try self.airAddWrap(inst)"),
             .add_sat         => @panic("TODO try self.airAddSat(inst)"),
             .sub, .ptr_sub   => @panic("TODO try self.airBinOp(inst)"),
@@ -498,12 +523,12 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
 
             .div_float, .div_trunc, .div_floor, .div_exact => try self.airDiv(inst),
 
-            .cmp_lt  => @panic("TODO try self.airCmp(inst, .lt)"),
-            .cmp_lte => @panic("TODO try self.airCmp(inst, .lte)"),
-            .cmp_eq  => @panic("TODO try self.airCmp(inst, .eq)"),
-            .cmp_gte => @panic("TODO try self.airCmp(inst, .gte)"),
-            .cmp_gt  => @panic("TODO try self.airCmp(inst, .gt)"),
-            .cmp_neq => @panic("TODO try self.airCmp(inst, .neq)"),
+            .cmp_lt  => try self.airCmp(inst, .lt),
+            .cmp_lte => try self.airCmp(inst, .lte),
+            .cmp_eq  => try self.airCmp(inst, .eq),
+            .cmp_gte => try self.airCmp(inst, .gte),
+            .cmp_gt  => try self.airCmp(inst, .gt),
+            .cmp_neq => try self.airCmp(inst, .neq),
             .cmp_vector => @panic("TODO try self.airCmpVector(inst)"),
             .cmp_lt_errors_len => @panic("TODO try self.airCmpLtErrorsLen(inst)"),
 
@@ -514,18 +539,18 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
             .xor             => @panic("TODO try self.airXor(inst)"),
             .shr, .shr_exact => @panic("TODO try self.airShr(inst)"),
 
-            .alloc           => @panic("TODO try self.airAlloc(inst)"),
+            .alloc           => try self.airAlloc(inst),
             .ret_ptr         => try self.airRetPtr(inst),
             .arg             => try self.airArg(inst),
             .assembly        => try self.airAsm(inst),
-            .bitcast         => @panic("TODO try self.airBitCast(inst)"),
+            .bitcast         => try self.airBitCast(inst),
             .block           => try self.airBlock(inst),
-            .br              => @panic("TODO try self.airBr(inst)"),
+            .br              => try self.airBr(inst),
             .breakpoint      => try self.airBreakpoint(),
             .ret_addr        => @panic("TODO try self.airRetAddr(inst)"),
             .frame_addr      => @panic("TODO try self.airFrameAddress(inst)"),
             .fence           => @panic("TODO try self.airFence()"),
-            .cond_br         => @panic("TODO try self.airCondBr(inst)"),
+            .cond_br         => try self.airCondBr(inst),
             .dbg_stmt        => try self.airDbgStmt(inst),
             .fptrunc         => @panic("TODO try self.airFptrunc(inst)"),
             .fpext           => @panic("TODO try self.airFpext(inst)"),
@@ -536,12 +561,12 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
             .is_non_null_ptr => @panic("TODO try self.airIsNonNullPtr(inst)"),
             .is_null         => @panic("TODO try self.airIsNull(inst)"),
             .is_null_ptr     => @panic("TODO try self.airIsNullPtr(inst)"),
-            .is_non_err      => @panic("TODO try self.airIsNonErr(inst)"),
+            .is_non_err      => try self.airIsNonErr(inst),
             .is_non_err_ptr  => @panic("TODO try self.airIsNonErrPtr(inst)"),
-            .is_err          => @panic("TODO try self.airIsErr(inst)"),
+            .is_err          => try self.airIsErr(inst),
             .is_err_ptr      => @panic("TODO try self.airIsErrPtr(inst)"),
-            .load            => @panic("TODO try self.airLoad(inst)"),
-            .loop            => @panic("TODO try self.airLoop(inst)"),
+            .load            => try self.airLoad(inst),
+            .loop            => try self.airLoop(inst),
             .not             => @panic("TODO try self.airNot(inst)"),
             .ptrtoint        => @panic("TODO try self.airPtrToInt(inst)"),
             .ret             => try self.airRet(inst),
@@ -598,22 +623,22 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
             .atomic_store_release   => @panic("TODO try self.airAtomicStore(inst, .Release)"),
             .atomic_store_seq_cst   => @panic("TODO try self.airAtomicStore(inst, .SeqCst)"),
 
-            .struct_field_ptr_index_0 => @panic("TODO try self.airStructFieldPtrIndex(inst, 0)"),
-            .struct_field_ptr_index_1 => @panic("TODO try self.airStructFieldPtrIndex(inst, 1)"),
-            .struct_field_ptr_index_2 => @panic("TODO try self.airStructFieldPtrIndex(inst, 2)"),
-            .struct_field_ptr_index_3 => @panic("TODO try self.airStructFieldPtrIndex(inst, 3)"),
+            .struct_field_ptr_index_0 => try self.airStructFieldPtrIndex(inst, 0),
+            .struct_field_ptr_index_1 => try self.airStructFieldPtrIndex(inst, 1),
+            .struct_field_ptr_index_2 => try self.airStructFieldPtrIndex(inst, 2),
+            .struct_field_ptr_index_3 => try self.airStructFieldPtrIndex(inst, 3),
 
             .field_parent_ptr => @panic("TODO try self.airFieldParentPtr(inst)"),
 
             .switch_br       => try self.airSwitch(inst),
             .slice_ptr       => @panic("TODO try self.airSlicePtr(inst)"),
-            .slice_len       => @panic("TODO try self.airSliceLen(inst)"),
+            .slice_len       => try self.airSliceLen(inst),
 
             .ptr_slice_len_ptr => @panic("TODO try self.airPtrSliceLenPtr(inst)"),
             .ptr_slice_ptr_ptr => @panic("TODO try self.airPtrSlicePtrPtr(inst)"),
 
             .array_elem_val      => @panic("TODO try self.airArrayElemVal(inst)"),
-            .slice_elem_val      => @panic("TODO try self.airSliceElemVal(inst)"),
+            .slice_elem_val      => try self.airSliceElemVal(inst),
             .slice_elem_ptr      => @panic("TODO try self.airSliceElemPtr(inst)"),
             .ptr_elem_val        => @panic("TODO try self.airPtrElemVal(inst)"),
             .ptr_elem_ptr        => @panic("TODO try self.airPtrElemPtr(inst)"),
@@ -625,8 +650,8 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
             .optional_payload           => @panic("TODO try self.airOptionalPayload(inst)"),
             .optional_payload_ptr       => @panic("TODO try self.airOptionalPayloadPtr(inst)"),
             .optional_payload_ptr_set   => @panic("TODO try self.airOptionalPayloadPtrSet(inst)"),
-            .unwrap_errunion_err        => @panic("TODO try self.airUnwrapErrErr(inst)"),
-            .unwrap_errunion_payload    => @panic("TODO try self.airUnwrapErrPayload(inst)"),
+            .unwrap_errunion_err        => try self.airUnwrapErrErr(inst),
+            .unwrap_errunion_payload    => try self.airUnwrapErrPayload(inst),
             .unwrap_errunion_err_ptr    => @panic("TODO try self.airUnwrapErrErrPtr(inst)"),
             .unwrap_errunion_payload_ptr=> @panic("TODO try self.airUnwrapErrPayloadPtr(inst)"),
             .errunion_payload_ptr_set   => @panic("TODO try self.airErrUnionPayloadPtrSet(inst)"),
@@ -648,6 +673,11 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
     }
 }
 
+fn airAlloc(self: *Self, inst: Air.Inst.Index) !void {
+    const stack_offset = try self.allocMemPtr(inst);
+    return self.finishAir(inst, .{ .ptr_stack_offset = stack_offset }, .{ .none, .none, .none });
+}
+
 fn airAsm(self: *Self, inst: Air.Inst.Index) !void {
     const ty_pl = self.air.instructions.items(.data)[inst].ty_pl;
     const extra = self.air.extraData(Air.Asm, ty_pl.payload);
@@ -719,7 +749,7 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void {
                 .data = .{
                     .trap = .{
                         .is_imm = true,
-                        .cond = 0b1000, // TODO need to look into changing this into an enum
+                        .cond = .al,
                         .rs2_or_imm = .{ .imm = 0x6d },
                     },
                 },
@@ -795,6 +825,27 @@ fn airArg(self: *Self, inst: Air.Inst.Index) !void {
     return self.finishAir(inst, mcv, .{ .none, .none, .none });
 }
 
+fn airBinOp(self: *Self, inst: Air.Inst.Index) !void {
+    const tag = self.air.instructions.items(.tag)[inst];
+    const bin_op = self.air.instructions.items(.data)[inst].bin_op;
+    const lhs = try self.resolveInst(bin_op.lhs);
+    const rhs = try self.resolveInst(bin_op.rhs);
+    const lhs_ty = self.air.typeOf(bin_op.lhs);
+    const rhs_ty = self.air.typeOf(bin_op.rhs);
+
+    const result: MCValue = if (self.liveness.isUnused(inst))
+        .dead
+    else
+        try self.binOp(tag, inst, lhs, rhs, lhs_ty, rhs_ty);
+    return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none });
+}
+
+fn airBitCast(self: *Self, inst: Air.Inst.Index) !void {
+    const ty_op = self.air.instructions.items(.data)[inst].ty_op;
+    const result = try self.resolveInst(ty_op.operand);
+    return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
+}
+
 fn airBlock(self: *Self, inst: Air.Inst.Index) !void {
     try self.blocks.putNoClobber(self.gpa, inst, .{
         // A block is a setup to be able to jump to the end.
@@ -829,6 +880,12 @@ fn airBlock(self: *Self, inst: Air.Inst.Index) !void {
     return self.finishAir(inst, result, .{ .none, .none, .none });
 }
 
+fn airBr(self: *Self, inst: Air.Inst.Index) !void {
+    const branch = self.air.instructions.items(.data)[inst].br;
+    try self.br(branch.block_inst, branch.operand);
+    return self.finishAir(inst, .dead, .{ branch.operand, .none, .none });
+}
+
 fn airBreakpoint(self: *Self) !void {
     // ta 0x01
     _ = try self.addInst(.{
@@ -836,7 +893,7 @@ fn airBreakpoint(self: *Self) !void {
         .data = .{
             .trap = .{
                 .is_imm = true,
-                .cond = 0b1000, // TODO need to look into changing this into an enum
+                .cond = .al,
                 .rs2_or_imm = .{ .imm = 0x01 },
             },
         },
@@ -872,6 +929,8 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallOptions.
             .unreach => unreachable,
             .dead => unreachable,
             .memory => unreachable,
+            .compare_flags_signed => unreachable,
+            .compare_flags_unsigned => unreachable,
             .register => |reg| {
                 try self.register_manager.getReg(reg, null);
                 try self.genSetReg(arg_ty, reg, arg_mcv);
@@ -960,6 +1019,252 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallOptions.
     @panic("TODO handle return value with BigTomb");
 }
 
+fn airCmp(self: *Self, inst: Air.Inst.Index, op: math.CompareOperator) !void {
+    const bin_op = self.air.instructions.items(.data)[inst].bin_op;
+    const result: MCValue = if (self.liveness.isUnused(inst)) .dead else result: {
+        const lhs = try self.resolveInst(bin_op.lhs);
+        const rhs = try self.resolveInst(bin_op.rhs);
+        const lhs_ty = self.air.typeOf(bin_op.lhs);
+
+        var int_buffer: Type.Payload.Bits = undefined;
+        const int_ty = switch (lhs_ty.zigTypeTag()) {
+            .Vector => unreachable, // Should be handled by cmp_vector?
+            .Enum => lhs_ty.intTagType(&int_buffer),
+            .Int => lhs_ty,
+            .Bool => Type.initTag(.u1),
+            .Pointer => Type.usize,
+            .ErrorSet => Type.initTag(.u16),
+            .Optional => blk: {
+                var opt_buffer: Type.Payload.ElemType = undefined;
+                const payload_ty = lhs_ty.optionalChild(&opt_buffer);
+                if (!payload_ty.hasRuntimeBitsIgnoreComptime()) {
+                    break :blk Type.initTag(.u1);
+                } else if (lhs_ty.isPtrLikeOptional()) {
+                    break :blk Type.usize;
+                } else {
+                    return self.fail("TODO SPARCv9 cmp non-pointer optionals", .{});
+                }
+            },
+            .Float => return self.fail("TODO SPARCv9 cmp floats", .{}),
+            else => unreachable,
+        };
+
+        const int_info = int_ty.intInfo(self.target.*);
+        if (int_info.bits <= 64) {
+            _ = try self.binOp(.cmp_eq, inst, lhs, rhs, int_ty, int_ty);
+
+            try self.spillCompareFlagsIfOccupied();
+            self.compare_flags_inst = inst;
+
+            break :result switch (int_info.signedness) {
+                .signed => MCValue{ .compare_flags_signed = op },
+                .unsigned => MCValue{ .compare_flags_unsigned = op },
+            };
+        } else {
+            return self.fail("TODO SPARCv9 cmp for ints > 64 bits", .{});
+        }
+    };
+    return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none });
+}
+
+fn airCondBr(self: *Self, inst: Air.Inst.Index) !void {
+    const pl_op = self.air.instructions.items(.data)[inst].pl_op;
+    const cond = try self.resolveInst(pl_op.operand);
+    const extra = self.air.extraData(Air.CondBr, pl_op.payload);
+    const then_body = self.air.extra[extra.end..][0..extra.data.then_body_len];
+    const else_body = self.air.extra[extra.end + then_body.len ..][0..extra.data.else_body_len];
+    const liveness_condbr = self.liveness.getCondBr(inst);
+
+    // Here we either emit a BPcc for branching on CCR content,
+    // or emit a BPr to branch on register content.
+    const reloc: Mir.Inst.Index = switch (cond) {
+        .compare_flags_signed,
+        .compare_flags_unsigned,
+        => try self.addInst(.{
+            .tag = .bpcc,
+            .data = .{
+                .branch_predict_int = .{
+                    .ccr = .xcc,
+                    .cond = switch (cond) {
+                        .compare_flags_signed => |cmp_op| blk: {
+                            // Here we map to the opposite condition because the jump is to the false branch.
+                            const condition = Instruction.ICondition.fromCompareOperatorSigned(cmp_op);
+                            break :blk condition.negate();
+                        },
+                        .compare_flags_unsigned => |cmp_op| blk: {
+                            // Here we map to the opposite condition because the jump is to the false branch.
+                            const condition = Instruction.ICondition.fromCompareOperatorUnsigned(cmp_op);
+                            break :blk condition.negate();
+                        },
+                        else => unreachable,
+                    },
+                    .inst = undefined, // Will be filled by performReloc
+                },
+            },
+        }),
+        else => blk: {
+            const reg = switch (cond) {
+                .register => |r| r,
+                else => try self.copyToTmpRegister(Type.bool, cond),
+            };
+
+            break :blk try self.addInst(.{
+                .tag = .bpr,
+                .data = .{
+                    .branch_predict_reg = .{
+                        .cond = .eq_zero,
+                        .rs1 = reg,
+                        .inst = undefined, // populated later through performReloc
+                    },
+                },
+            });
+        },
+    };
+
+    // Regardless of the branch type that's emitted, we need to reserve
+    // a space for the delay slot.
+    // TODO Find a way to fill this delay slot
+    _ = try self.addInst(.{
+        .tag = .nop,
+        .data = .{ .nop = {} },
+    });
+
+    // If the condition dies here in this condbr instruction, process
+    // that death now instead of later as this has an effect on
+    // whether it needs to be spilled in the branches
+    if (self.liveness.operandDies(inst, 0)) {
+        const op_int = @enumToInt(pl_op.operand);
+        if (op_int >= Air.Inst.Ref.typed_value_map.len) {
+            const op_index = @intCast(Air.Inst.Index, op_int - Air.Inst.Ref.typed_value_map.len);
+            self.processDeath(op_index);
+        }
+    }
+
+    // Capture the state of register and stack allocation state so that we can revert to it.
+    const parent_next_stack_offset = self.next_stack_offset;
+    const parent_free_registers = self.register_manager.free_registers;
+    var parent_stack = try self.stack.clone(self.gpa);
+    defer parent_stack.deinit(self.gpa);
+    const parent_registers = self.register_manager.registers;
+    const parent_compare_flags_inst = self.compare_flags_inst;
+
+    try self.branch_stack.append(.{});
+    errdefer {
+        _ = self.branch_stack.pop();
+    }
+
+    try self.ensureProcessDeathCapacity(liveness_condbr.then_deaths.len);
+    for (liveness_condbr.then_deaths) |operand| {
+        self.processDeath(operand);
+    }
+    try self.genBody(then_body);
+
+    // Revert to the previous register and stack allocation state.
+
+    var saved_then_branch = self.branch_stack.pop();
+    defer saved_then_branch.deinit(self.gpa);
+
+    self.register_manager.registers = parent_registers;
+    self.compare_flags_inst = parent_compare_flags_inst;
+
+    self.stack.deinit(self.gpa);
+    self.stack = parent_stack;
+    parent_stack = .{};
+
+    self.next_stack_offset = parent_next_stack_offset;
+    self.register_manager.free_registers = parent_free_registers;
+
+    try self.performReloc(reloc);
+    const else_branch = self.branch_stack.addOneAssumeCapacity();
+    else_branch.* = .{};
+
+    try self.ensureProcessDeathCapacity(liveness_condbr.else_deaths.len);
+    for (liveness_condbr.else_deaths) |operand| {
+        self.processDeath(operand);
+    }
+    try self.genBody(else_body);
+
+    // At this point, each branch will possibly have conflicting values for where
+    // each instruction is stored. They agree, however, on which instructions are alive/dead.
+    // We use the first ("then") branch as canonical, and here emit
+    // instructions into the second ("else") branch to make it conform.
+    // We continue respect the data structure semantic guarantees of the else_branch so
+    // that we can use all the code emitting abstractions. This is why at the bottom we
+    // assert that parent_branch.free_registers equals the saved_then_branch.free_registers
+    // rather than assigning it.
+    const parent_branch = &self.branch_stack.items[self.branch_stack.items.len - 2];
+    try parent_branch.inst_table.ensureUnusedCapacity(self.gpa, else_branch.inst_table.count());
+
+    const else_slice = else_branch.inst_table.entries.slice();
+    const else_keys = else_slice.items(.key);
+    const else_values = else_slice.items(.value);
+    for (else_keys) |else_key, else_idx| {
+        const else_value = else_values[else_idx];
+        const canon_mcv = if (saved_then_branch.inst_table.fetchSwapRemove(else_key)) |then_entry| blk: {
+            // The instruction's MCValue is overridden in both branches.
+            parent_branch.inst_table.putAssumeCapacity(else_key, then_entry.value);
+            if (else_value == .dead) {
+                assert(then_entry.value == .dead);
+                continue;
+            }
+            break :blk then_entry.value;
+        } else blk: {
+            if (else_value == .dead)
+                continue;
+            // The instruction is only overridden in the else branch.
+            var i: usize = self.branch_stack.items.len - 2;
+            while (true) {
+                i -= 1; // If this overflows, the question is: why wasn't the instruction marked dead?
+                if (self.branch_stack.items[i].inst_table.get(else_key)) |mcv| {
+                    assert(mcv != .dead);
+                    break :blk mcv;
+                }
+            }
+        };
+        log.debug("consolidating else_entry {d} {}=>{}", .{ else_key, else_value, canon_mcv });
+        // TODO make sure the destination stack offset / register does not already have something
+        // going on there.
+        try self.setRegOrMem(self.air.typeOfIndex(else_key), canon_mcv, else_value);
+        // TODO track the new register / stack allocation
+    }
+    try parent_branch.inst_table.ensureUnusedCapacity(self.gpa, saved_then_branch.inst_table.count());
+    const then_slice = saved_then_branch.inst_table.entries.slice();
+    const then_keys = then_slice.items(.key);
+    const then_values = then_slice.items(.value);
+    for (then_keys) |then_key, then_idx| {
+        const then_value = then_values[then_idx];
+        // We already deleted the items from this table that matched the else_branch.
+        // So these are all instructions that are only overridden in the then branch.
+        parent_branch.inst_table.putAssumeCapacity(then_key, then_value);
+        if (then_value == .dead)
+            continue;
+        const parent_mcv = blk: {
+            var i: usize = self.branch_stack.items.len - 2;
+            while (true) {
+                i -= 1;
+                if (self.branch_stack.items[i].inst_table.get(then_key)) |mcv| {
+                    assert(mcv != .dead);
+                    break :blk mcv;
+                }
+            }
+        };
+        log.debug("consolidating then_entry {d} {}=>{}", .{ then_key, parent_mcv, then_value });
+        // TODO make sure the destination stack offset / register does not already have something
+        // going on there.
+        try self.setRegOrMem(self.air.typeOfIndex(then_key), parent_mcv, then_value);
+        // TODO track the new register / stack allocation
+    }
+
+    {
+        var item = self.branch_stack.pop();
+        item.deinit(self.gpa);
+    }
+
+    // We already took care of pl_op.operand earlier, so we're going
+    // to pass .none here
+    return self.finishAir(inst, .unreach, .{ .none, .none, .none });
+}
+
 fn airDbgBlock(self: *Self, inst: Air.Inst.Index) !void {
     // TODO emit debug info lexical block
     return self.finishAir(inst, .dead, .{ .none, .none, .none });
@@ -1004,6 +1309,67 @@ fn airDiv(self: *Self, inst: Air.Inst.Index) !void {
     return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none });
 }
 
+fn airIsErr(self: *Self, inst: Air.Inst.Index) !void {
+    const un_op = self.air.instructions.items(.data)[inst].un_op;
+    const result: MCValue = if (self.liveness.isUnused(inst)) .dead else result: {
+        const operand = try self.resolveInst(un_op);
+        const ty = self.air.typeOf(un_op);
+        break :result try self.isErr(ty, operand);
+    };
+    return self.finishAir(inst, result, .{ un_op, .none, .none });
+}
+
+fn airIsNonErr(self: *Self, inst: Air.Inst.Index) !void {
+    const un_op = self.air.instructions.items(.data)[inst].un_op;
+    const result: MCValue = if (self.liveness.isUnused(inst)) .dead else result: {
+        const operand = try self.resolveInst(un_op);
+        const ty = self.air.typeOf(un_op);
+        break :result try self.isNonErr(ty, operand);
+    };
+    return self.finishAir(inst, result, .{ un_op, .none, .none });
+}
+
+fn airLoad(self: *Self, inst: Air.Inst.Index) !void {
+    const ty_op = self.air.instructions.items(.data)[inst].ty_op;
+    const elem_ty = self.air.typeOfIndex(inst);
+    const elem_size = elem_ty.abiSize(self.target.*);
+    const result: MCValue = result: {
+        if (!elem_ty.hasRuntimeBits())
+            break :result MCValue.none;
+
+        const ptr = try self.resolveInst(ty_op.operand);
+        const is_volatile = self.air.typeOf(ty_op.operand).isVolatilePtr();
+        if (self.liveness.isUnused(inst) and !is_volatile)
+            break :result MCValue.dead;
+
+        const dst_mcv: MCValue = blk: {
+            if (elem_size <= 8 and self.reuseOperand(inst, ty_op.operand, 0, ptr)) {
+                // The MCValue that holds the pointer can be re-used as the value.
+                break :blk switch (ptr) {
+                    .register => |r| MCValue{ .register = r },
+                    else => ptr,
+                };
+            } else {
+                break :blk try self.allocRegOrMem(inst, true);
+            }
+        };
+        try self.load(dst_mcv, ptr, self.air.typeOf(ty_op.operand));
+        break :result dst_mcv;
+    };
+    return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
+}
+
+fn airLoop(self: *Self, inst: Air.Inst.Index) !void {
+    // A loop is a setup to be able to jump back to the beginning.
+    const ty_pl = self.air.instructions.items(.data)[inst].ty_pl;
+    const loop = self.air.extraData(Air.Block, ty_pl.payload);
+    const body = self.air.extra[loop.end .. loop.end + loop.data.body_len];
+    const start = @intCast(u32, self.mir_instructions.len);
+    try self.genBody(body);
+    try self.jump(start);
+    return self.finishAirBookkeeping();
+}
+
 fn airRet(self: *Self, inst: Air.Inst.Index) !void {
     const un_op = self.air.instructions.items(.data)[inst].un_op;
     const operand = try self.resolveInst(un_op);
@@ -1024,11 +1390,87 @@ fn airRetPtr(self: *Self, inst: Air.Inst.Index) !void {
     return self.finishAir(inst, .{ .ptr_stack_offset = stack_offset }, .{ .none, .none, .none });
 }
 
-fn airStore(self: *Self, inst: Air.Inst.Index) !void {
-    _ = self;
-    _ = inst;
+fn airSliceElemVal(self: *Self, inst: Air.Inst.Index) !void {
+    const is_volatile = false; // TODO
+    const bin_op = self.air.instructions.items(.data)[inst].bin_op;
 
-    return self.fail("TODO implement store for {}", .{self.target.cpu.arch});
+    if (!is_volatile and self.liveness.isUnused(inst)) return self.finishAir(inst, .dead, .{ bin_op.lhs, bin_op.rhs, .none });
+    const result: MCValue = result: {
+        const slice_mcv = try self.resolveInst(bin_op.lhs);
+        const index_mcv = try self.resolveInst(bin_op.rhs);
+
+        const slice_ty = self.air.typeOf(bin_op.lhs);
+        const elem_ty = slice_ty.childType();
+        const elem_size = elem_ty.abiSize(self.target.*);
+
+        var buf: Type.SlicePtrFieldTypeBuffer = undefined;
+        const slice_ptr_field_type = slice_ty.slicePtrFieldType(&buf);
+
+        const index_lock: ?RegisterLock = if (index_mcv == .register)
+            self.register_manager.lockRegAssumeUnused(index_mcv.register)
+        else
+            null;
+        defer if (index_lock) |reg| self.register_manager.unlockReg(reg);
+
+        const base_mcv: MCValue = switch (slice_mcv) {
+            .stack_offset => |off| .{ .register = try self.copyToTmpRegister(slice_ptr_field_type, .{ .stack_offset = off }) },
+            else => return self.fail("TODO slice_elem_val when slice is {}", .{slice_mcv}),
+        };
+        const base_lock = self.register_manager.lockRegAssumeUnused(base_mcv.register);
+        defer self.register_manager.unlockReg(base_lock);
+
+        switch (elem_size) {
+            else => {
+                // TODO skip the ptr_add emission entirely and use native addressing modes
+                // i.e sllx/mulx then R+R or scale immediate then R+I
+                const dest = try self.allocRegOrMem(inst, true);
+                const addr = try self.binOp(.ptr_add, null, base_mcv, index_mcv, slice_ptr_field_type, Type.usize);
+                try self.load(dest, addr, slice_ptr_field_type);
+
+                break :result dest;
+            },
+        }
+    };
+    return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none });
+}
+
+fn airSliceLen(self: *Self, inst: Air.Inst.Index) !void {
+    const ty_op = self.air.instructions.items(.data)[inst].ty_op;
+    const result: MCValue = if (self.liveness.isUnused(inst)) .dead else result: {
+        const ptr_bits = self.target.cpu.arch.ptrBitWidth();
+        const ptr_bytes = @divExact(ptr_bits, 8);
+        const mcv = try self.resolveInst(ty_op.operand);
+        switch (mcv) {
+            .dead, .unreach, .none => unreachable,
+            .register => unreachable, // a slice doesn't fit in one register
+            .stack_offset => |off| {
+                break :result MCValue{ .stack_offset = off - ptr_bytes };
+            },
+            .memory => |addr| {
+                break :result MCValue{ .memory = addr + ptr_bytes };
+            },
+            else => return self.fail("TODO implement slice_len for {}", .{mcv}),
+        }
+    };
+    return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
+}
+
+fn airStore(self: *Self, inst: Air.Inst.Index) !void {
+    const bin_op = self.air.instructions.items(.data)[inst].bin_op;
+    const ptr = try self.resolveInst(bin_op.lhs);
+    const value = try self.resolveInst(bin_op.rhs);
+    const ptr_ty = self.air.typeOf(bin_op.lhs);
+    const value_ty = self.air.typeOf(bin_op.rhs);
+
+    try self.store(ptr, value, ptr_ty, value_ty);
+
+    return self.finishAir(inst, .dead, .{ bin_op.lhs, bin_op.rhs, .none });
+}
+
+fn airStructFieldPtrIndex(self: *Self, inst: Air.Inst.Index, index: u8) !void {
+    const ty_op = self.air.instructions.items(.data)[inst].ty_op;
+    const result = try self.structFieldPtr(inst, ty_op.operand, index);
+    return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
 }
 
 fn airSwitch(self: *Self, inst: Air.Inst.Index) !void {
@@ -1038,6 +1480,31 @@ fn airSwitch(self: *Self, inst: Air.Inst.Index) !void {
     return self.fail("TODO implement switch for {}", .{self.target.cpu.arch});
 }
 
+fn airUnwrapErrErr(self: *Self, inst: Air.Inst.Index) !void {
+    const ty_op = self.air.instructions.items(.data)[inst].ty_op;
+    const result: MCValue = if (self.liveness.isUnused(inst)) .dead else result: {
+        const error_union_ty = self.air.typeOf(ty_op.operand);
+        const payload_ty = error_union_ty.errorUnionPayload();
+        const mcv = try self.resolveInst(ty_op.operand);
+        if (!payload_ty.hasRuntimeBits()) break :result mcv;
+
+        return self.fail("TODO implement unwrap error union error for non-empty payloads", .{});
+    };
+    return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
+}
+
+fn airUnwrapErrPayload(self: *Self, inst: Air.Inst.Index) !void {
+    const ty_op = self.air.instructions.items(.data)[inst].ty_op;
+    const result: MCValue = if (self.liveness.isUnused(inst)) .dead else result: {
+        const error_union_ty = self.air.typeOf(ty_op.operand);
+        const payload_ty = error_union_ty.errorUnionPayload();
+        if (!payload_ty.hasRuntimeBits()) break :result MCValue.none;
+
+        return self.fail("TODO implement unwrap error union payload for non-empty payloads", .{});
+    };
+    return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
+}
+
 // Common helper functions
 
 /// Adds a Type to the .debug_info at the current position. The bytes will be populated later,
@@ -1126,6 +1593,459 @@ fn allocRegOrMem(self: *Self, inst: Air.Inst.Index, reg_ok: bool) !MCValue {
     return MCValue{ .stack_offset = stack_offset };
 }
 
+/// For all your binary operation needs, this function will generate
+/// the corresponding Mir instruction(s). Returns the location of the
+/// result.
+///
+/// If the binary operation itself happens to be an Air instruction,
+/// pass the corresponding index in the inst parameter. That helps
+/// this function do stuff like reusing operands.
+///
+/// This function does not do any lowering to Mir itself, but instead
+/// looks at the lhs and rhs and determines which kind of lowering
+/// would be best suitable and then delegates the lowering to other
+/// functions.
+fn binOp(
+    self: *Self,
+    tag: Air.Inst.Tag,
+    maybe_inst: ?Air.Inst.Index,
+    lhs: MCValue,
+    rhs: MCValue,
+    lhs_ty: Type,
+    rhs_ty: Type,
+) InnerError!MCValue {
+    const mod = self.bin_file.options.module.?;
+    switch (tag) {
+        .add, .cmp_eq => {
+            switch (lhs_ty.zigTypeTag()) {
+                .Float => return self.fail("TODO binary operations on floats", .{}),
+                .Vector => return self.fail("TODO binary operations on vectors", .{}),
+                .Int => {
+                    assert(lhs_ty.eql(rhs_ty, mod));
+                    const int_info = lhs_ty.intInfo(self.target.*);
+                    if (int_info.bits <= 64) {
+                        // Only say yes if the operation is
+                        // commutative, i.e. we can swap both of the
+                        // operands
+                        const lhs_immediate_ok = switch (tag) {
+                            .add => lhs == .immediate and lhs.immediate <= std.math.maxInt(u12),
+                            .sub, .cmp_eq => false,
+                            else => unreachable,
+                        };
+                        const rhs_immediate_ok = switch (tag) {
+                            .add,
+                            .sub,
+                            .cmp_eq,
+                            => rhs == .immediate and rhs.immediate <= std.math.maxInt(u12),
+                            else => unreachable,
+                        };
+
+                        const mir_tag: Mir.Inst.Tag = switch (tag) {
+                            .add => .add,
+                            .cmp_eq => .subcc,
+                            else => unreachable,
+                        };
+
+                        if (rhs_immediate_ok) {
+                            return try self.binOpImmediate(mir_tag, maybe_inst, lhs, rhs, lhs_ty, false);
+                        } else if (lhs_immediate_ok) {
+                            // swap lhs and rhs
+                            return try self.binOpImmediate(mir_tag, maybe_inst, rhs, lhs, rhs_ty, true);
+                        } else {
+                            // TODO convert large immediates to register before adding
+                            return try self.binOpRegister(mir_tag, maybe_inst, lhs, rhs, lhs_ty, rhs_ty);
+                        }
+                    } else {
+                        return self.fail("TODO binary operations on int with bits > 64", .{});
+                    }
+                },
+                else => unreachable,
+            }
+        },
+
+        .mul => {
+            switch (lhs_ty.zigTypeTag()) {
+                .Vector => return self.fail("TODO binary operations on vectors", .{}),
+                .Int => {
+                    assert(lhs_ty.eql(rhs_ty, mod));
+                    const int_info = lhs_ty.intInfo(self.target.*);
+                    if (int_info.bits <= 64) {
+                        // If LHS is immediate, then swap it with RHS.
+                        const lhs_is_imm = lhs == .immediate;
+                        const new_lhs = if (lhs_is_imm) rhs else lhs;
+                        const new_rhs = if (lhs_is_imm) lhs else rhs;
+                        const new_lhs_ty = if (lhs_is_imm) rhs_ty else lhs_ty;
+                        const new_rhs_ty = if (lhs_is_imm) lhs_ty else rhs_ty;
+
+                        // At this point, RHS might be an immediate
+                        // If it's a power of two immediate then we emit an shl instead
+                        // TODO add similar checks for LHS
+                        if (new_rhs == .immediate and math.isPowerOfTwo(new_rhs.immediate)) {
+                            return try self.binOp(.shl, maybe_inst, new_lhs, .{ .immediate = math.log2(new_rhs.immediate) }, new_lhs_ty, Type.usize);
+                        }
+
+                        return try self.binOpRegister(.mulx, maybe_inst, new_lhs, new_rhs, new_lhs_ty, new_rhs_ty);
+                    } else {
+                        return self.fail("TODO binary operations on int with bits > 64", .{});
+                    }
+                },
+                else => unreachable,
+            }
+        },
+
+        .ptr_add => {
+            switch (lhs_ty.zigTypeTag()) {
+                .Pointer => {
+                    const ptr_ty = lhs_ty;
+                    const elem_ty = switch (ptr_ty.ptrSize()) {
+                        .One => ptr_ty.childType().childType(), // ptr to array, so get array element type
+                        else => ptr_ty.childType(),
+                    };
+                    const elem_size = elem_ty.abiSize(self.target.*);
+
+                    if (elem_size == 1) {
+                        const base_tag: Mir.Inst.Tag = switch (tag) {
+                            .ptr_add => .add,
+                            else => unreachable,
+                        };
+
+                        return try self.binOpRegister(base_tag, maybe_inst, lhs, rhs, lhs_ty, rhs_ty);
+                    } else {
+                        // convert the offset into a byte offset by
+                        // multiplying it with elem_size
+
+                        const offset = try self.binOp(.mul, null, rhs, .{ .immediate = elem_size }, Type.usize, Type.usize);
+                        const addr = try self.binOp(tag, null, lhs, offset, Type.initTag(.manyptr_u8), Type.usize);
+                        return addr;
+                    }
+                },
+                else => unreachable,
+            }
+        },
+
+        .shl => {
+            const base_tag: Air.Inst.Tag = switch (tag) {
+                .shl => .shl_exact,
+                else => unreachable,
+            };
+
+            // Generate a shl_exact/shr_exact
+            const result = try self.binOp(base_tag, maybe_inst, lhs, rhs, lhs_ty, rhs_ty);
+
+            // Truncate if necessary
+            switch (tag) {
+                .shl => switch (lhs_ty.zigTypeTag()) {
+                    .Vector => return self.fail("TODO binary operations on vectors", .{}),
+                    .Int => {
+                        const int_info = lhs_ty.intInfo(self.target.*);
+                        if (int_info.bits <= 64) {
+                            const result_reg = result.register;
+                            try self.truncRegister(result_reg, result_reg, int_info.signedness, int_info.bits);
+                            return result;
+                        } else {
+                            return self.fail("TODO binary operations on integers > u64/i64", .{});
+                        }
+                    },
+                    else => unreachable,
+                },
+                else => unreachable,
+            }
+        },
+
+        .shl_exact => {
+            switch (lhs_ty.zigTypeTag()) {
+                .Vector => return self.fail("TODO binary operations on vectors", .{}),
+                .Int => {
+                    const int_info = lhs_ty.intInfo(self.target.*);
+                    if (int_info.bits <= 64) {
+                        const rhs_immediate_ok = rhs == .immediate;
+
+                        const mir_tag: Mir.Inst.Tag = switch (tag) {
+                            .shl_exact => .sllx,
+                            else => unreachable,
+                        };
+
+                        if (rhs_immediate_ok) {
+                            return try self.binOpImmediate(mir_tag, maybe_inst, lhs, rhs, lhs_ty, false);
+                        } else {
+                            return try self.binOpRegister(mir_tag, maybe_inst, lhs, rhs, lhs_ty, rhs_ty);
+                        }
+                    } else {
+                        return self.fail("TODO binary operations on int with bits > 64", .{});
+                    }
+                },
+                else => unreachable,
+            }
+        },
+
+        else => return self.fail("TODO implement {} binOp for SPARCv9", .{tag}),
+    }
+}
+
+/// Don't call this function directly. Use binOp instead.
+///
+/// Calling this function signals an intention to generate a Mir
+/// instruction of the form
+///
+///     op dest, lhs, #rhs_imm
+///
+/// Set lhs_and_rhs_swapped to true iff inst.bin_op.lhs corresponds to
+/// rhs and vice versa. This parameter is only used when maybe_inst !=
+/// null.
+///
+/// Asserts that generating an instruction of that form is possible.
+fn binOpImmediate(
+    self: *Self,
+    mir_tag: Mir.Inst.Tag,
+    maybe_inst: ?Air.Inst.Index,
+    lhs: MCValue,
+    rhs: MCValue,
+    lhs_ty: Type,
+    lhs_and_rhs_swapped: bool,
+) !MCValue {
+    const lhs_is_register = lhs == .register;
+
+    const lhs_lock: ?RegisterLock = if (lhs_is_register)
+        self.register_manager.lockReg(lhs.register)
+    else
+        null;
+    defer if (lhs_lock) |reg| self.register_manager.unlockReg(reg);
+
+    const branch = &self.branch_stack.items[self.branch_stack.items.len - 1];
+
+    const lhs_reg = if (lhs_is_register) lhs.register else blk: {
+        const track_inst: ?Air.Inst.Index = if (maybe_inst) |inst| inst: {
+            const bin_op = self.air.instructions.items(.data)[inst].bin_op;
+            break :inst Air.refToIndex(
+                if (lhs_and_rhs_swapped) bin_op.rhs else bin_op.lhs,
+            ).?;
+        } else null;
+
+        const reg = try self.register_manager.allocReg(track_inst);
+
+        if (track_inst) |inst| branch.inst_table.putAssumeCapacity(inst, .{ .register = reg });
+
+        break :blk reg;
+    };
+    const new_lhs_lock = self.register_manager.lockReg(lhs_reg);
+    defer if (new_lhs_lock) |reg| self.register_manager.unlockReg(reg);
+
+    const dest_reg = switch (mir_tag) {
+        else => if (maybe_inst) |inst| blk: {
+            const bin_op = self.air.instructions.items(.data)[inst].bin_op;
+
+            if (lhs_is_register and self.reuseOperand(
+                inst,
+                if (lhs_and_rhs_swapped) bin_op.rhs else bin_op.lhs,
+                if (lhs_and_rhs_swapped) 1 else 0,
+                lhs,
+            )) {
+                break :blk lhs_reg;
+            } else {
+                break :blk try self.register_manager.allocReg(inst);
+            }
+        } else blk: {
+            break :blk try self.register_manager.allocReg(null);
+        },
+    };
+
+    if (!lhs_is_register) try self.genSetReg(lhs_ty, lhs_reg, lhs);
+
+    const mir_data: Mir.Inst.Data = switch (mir_tag) {
+        .add,
+        .mulx,
+        .subcc,
+        => .{
+            .arithmetic_3op = .{
+                .is_imm = true,
+                .rd = dest_reg,
+                .rs1 = lhs_reg,
+                .rs2_or_imm = .{ .imm = @intCast(i13, rhs.immediate) },
+            },
+        },
+        .sllx => .{
+            .shift = .{
+                .is_imm = true,
+                .width = ShiftWidth.shift64,
+                .rd = dest_reg,
+                .rs1 = lhs_reg,
+                .rs2_or_imm = .{ .imm = @intCast(u6, rhs.immediate) },
+            },
+        },
+        else => unreachable,
+    };
+
+    _ = try self.addInst(.{
+        .tag = mir_tag,
+        .data = mir_data,
+    });
+
+    return MCValue{ .register = dest_reg };
+}
+
+/// Don't call this function directly. Use binOp instead.
+///
+/// Calling this function signals an intention to generate a Mir
+/// instruction of the form
+///
+///     op dest, lhs, rhs
+///
+/// Asserts that generating an instruction of that form is possible.
+fn binOpRegister(
+    self: *Self,
+    mir_tag: Mir.Inst.Tag,
+    maybe_inst: ?Air.Inst.Index,
+    lhs: MCValue,
+    rhs: MCValue,
+    lhs_ty: Type,
+    rhs_ty: Type,
+) !MCValue {
+    const lhs_is_register = lhs == .register;
+    const rhs_is_register = rhs == .register;
+
+    const lhs_lock: ?RegisterLock = if (lhs_is_register)
+        self.register_manager.lockReg(lhs.register)
+    else
+        null;
+    defer if (lhs_lock) |reg| self.register_manager.unlockReg(reg);
+
+    const rhs_lock: ?RegisterLock = if (rhs_is_register)
+        self.register_manager.lockReg(rhs.register)
+    else
+        null;
+    defer if (rhs_lock) |reg| self.register_manager.unlockReg(reg);
+
+    const branch = &self.branch_stack.items[self.branch_stack.items.len - 1];
+
+    const lhs_reg = if (lhs_is_register) lhs.register else blk: {
+        const track_inst: ?Air.Inst.Index = if (maybe_inst) |inst| inst: {
+            const bin_op = self.air.instructions.items(.data)[inst].bin_op;
+            break :inst Air.refToIndex(bin_op.lhs).?;
+        } else null;
+
+        const reg = try self.register_manager.allocReg(track_inst);
+        if (track_inst) |inst| branch.inst_table.putAssumeCapacity(inst, .{ .register = reg });
+
+        break :blk reg;
+    };
+    const new_lhs_lock = self.register_manager.lockReg(lhs_reg);
+    defer if (new_lhs_lock) |reg| self.register_manager.unlockReg(reg);
+
+    const rhs_reg = if (rhs_is_register) rhs.register else blk: {
+        const track_inst: ?Air.Inst.Index = if (maybe_inst) |inst| inst: {
+            const bin_op = self.air.instructions.items(.data)[inst].bin_op;
+            break :inst Air.refToIndex(bin_op.rhs).?;
+        } else null;
+
+        const reg = try self.register_manager.allocReg(track_inst);
+        if (track_inst) |inst| branch.inst_table.putAssumeCapacity(inst, .{ .register = reg });
+
+        break :blk reg;
+    };
+    const new_rhs_lock = self.register_manager.lockReg(rhs_reg);
+    defer if (new_rhs_lock) |reg| self.register_manager.unlockReg(reg);
+
+    const dest_reg = switch (mir_tag) {
+        else => if (maybe_inst) |inst| blk: {
+            const bin_op = self.air.instructions.items(.data)[inst].bin_op;
+
+            if (lhs_is_register and self.reuseOperand(inst, bin_op.lhs, 0, lhs)) {
+                break :blk lhs_reg;
+            } else if (rhs_is_register and self.reuseOperand(inst, bin_op.rhs, 1, rhs)) {
+                break :blk rhs_reg;
+            } else {
+                break :blk try self.register_manager.allocReg(inst);
+            }
+        } else blk: {
+            break :blk try self.register_manager.allocReg(null);
+        },
+    };
+
+    if (!lhs_is_register) try self.genSetReg(lhs_ty, lhs_reg, lhs);
+    if (!rhs_is_register) try self.genSetReg(rhs_ty, rhs_reg, rhs);
+
+    const mir_data: Mir.Inst.Data = switch (mir_tag) {
+        .add,
+        .mulx,
+        .subcc,
+        => .{
+            .arithmetic_3op = .{
+                .is_imm = false,
+                .rd = dest_reg,
+                .rs1 = lhs_reg,
+                .rs2_or_imm = .{ .rs2 = rhs_reg },
+            },
+        },
+        .sllx => .{
+            .shift = .{
+                .is_imm = false,
+                .width = ShiftWidth.shift64,
+                .rd = dest_reg,
+                .rs1 = lhs_reg,
+                .rs2_or_imm = .{ .rs2 = rhs_reg },
+            },
+        },
+        else => unreachable,
+    };
+
+    _ = try self.addInst(.{
+        .tag = mir_tag,
+        .data = mir_data,
+    });
+
+    return MCValue{ .register = dest_reg };
+}
+
+fn br(self: *Self, block: Air.Inst.Index, operand: Air.Inst.Ref) !void {
+    const block_data = self.blocks.getPtr(block).?;
+
+    if (self.air.typeOf(operand).hasRuntimeBits()) {
+        const operand_mcv = try self.resolveInst(operand);
+        const block_mcv = block_data.mcv;
+        if (block_mcv == .none) {
+            block_data.mcv = switch (operand_mcv) {
+                .none, .dead, .unreach => unreachable,
+                .register, .stack_offset, .memory => operand_mcv,
+                .immediate => blk: {
+                    const new_mcv = try self.allocRegOrMem(block, true);
+                    try self.setRegOrMem(self.air.typeOfIndex(block), new_mcv, operand_mcv);
+                    break :blk new_mcv;
+                },
+                else => return self.fail("TODO implement block_data.mcv = operand_mcv for {}", .{operand_mcv}),
+            };
+        } else {
+            try self.setRegOrMem(self.air.typeOfIndex(block), block_mcv, operand_mcv);
+        }
+    }
+    return self.brVoid(block);
+}
+
+fn brVoid(self: *Self, block: Air.Inst.Index) !void {
+    const block_data = self.blocks.getPtr(block).?;
+
+    // Emit a jump with a relocation. It will be patched up after the block ends.
+    try block_data.relocs.ensureUnusedCapacity(self.gpa, 1);
+
+    const br_index = try self.addInst(.{
+        .tag = .bpcc,
+        .data = .{
+            .branch_predict_int = .{
+                .ccr = .xcc,
+                .cond = .al,
+                .inst = undefined, // Will be filled by performReloc
+            },
+        },
+    });
+
+    // TODO Find a way to fill this delay slot
+    _ = try self.addInst(.{
+        .tag = .nop,
+        .data = .{ .nop = {} },
+    });
+
+    block_data.relocs.appendAssumeCapacity(br_index);
+}
+
 /// Copies a value to a register without tracking the register. The register is not considered
 /// allocated. A second call to `copyToTmpRegister` may return the same register.
 /// This can have a side effect of spilling instructions to the stack to free up a register.
@@ -1222,6 +2142,76 @@ fn genArgDbgInfo(self: *Self, inst: Air.Inst.Index, mcv: MCValue, arg_index: u32
     }
 }
 
+// TODO replace this to call to extern memcpy
+fn genInlineMemcpy(
+    self: *Self,
+    src: Register,
+    dst: Register,
+    len: Register,
+    tmp: Register,
+) !void {
+    // Here we assume that len > 0.
+    // Also we do the copy from end -> start address to save a register.
+
+    // sub len, 1, len
+    _ = try self.addInst(.{
+        .tag = .sub,
+        .data = .{ .arithmetic_3op = .{
+            .is_imm = true,
+            .rs1 = len,
+            .rs2_or_imm = .{ .imm = 1 },
+            .rd = len,
+        } },
+    });
+
+    // loop:
+    // ldub [src + len], tmp
+    _ = try self.addInst(.{
+        .tag = .ldub,
+        .data = .{ .arithmetic_3op = .{
+            .is_imm = false,
+            .rs1 = src,
+            .rs2_or_imm = .{ .rs2 = len },
+            .rd = tmp,
+        } },
+    });
+
+    // stb tmp, [dst + len]
+    _ = try self.addInst(.{
+        .tag = .stb,
+        .data = .{ .arithmetic_3op = .{
+            .is_imm = false,
+            .rs1 = dst,
+            .rs2_or_imm = .{ .rs2 = len },
+            .rd = tmp,
+        } },
+    });
+
+    // brnz len, loop
+    _ = try self.addInst(.{
+        .tag = .bpr,
+        .data = .{ .branch_predict_reg = .{
+            .cond = .ne_zero,
+            .rs1 = len,
+            .inst = @intCast(u32, self.mir_instructions.len - 2),
+        } },
+    });
+
+    // Delay slot:
+    //  sub len, 1, len
+    _ = try self.addInst(.{
+        .tag = .sub,
+        .data = .{ .arithmetic_3op = .{
+            .is_imm = true,
+            .rs1 = len,
+            .rs2_or_imm = .{ .imm = 1 },
+            .rd = len,
+        } },
+    });
+
+    // end:
+}
+
 fn genLoad(self: *Self, value_reg: Register, addr_reg: Register, comptime off_type: type, off: off_type, abi_size: u64) !void {
     assert(off_type == Register or off_type == i13);
 
@@ -1259,6 +2249,8 @@ fn genSetReg(self: *Self, ty: Type, reg: Register, mcv: MCValue) InnerError!void
     switch (mcv) {
         .dead => unreachable,
         .unreach, .none => return, // Nothing to do.
+        .compare_flags_signed => return self.fail("TODO: genSetReg for compare_flags_signed", .{}),
+        .compare_flags_unsigned => return self.fail("TODO: genSetReg for compare_flags_unsigned", .{}),
         .undef => {
             if (!self.wantSafety())
                 return; // The already existing value will do just fine.
@@ -1426,6 +2418,8 @@ fn genSetStack(self: *Self, ty: Type, stack_offset: u32, mcv: MCValue) InnerErro
                 else => return self.fail("TODO implement memset", .{}),
             }
         },
+        .compare_flags_unsigned,
+        .compare_flags_signed,
         .immediate,
         .ptr_stack_offset,
         => {
@@ -1438,7 +2432,47 @@ fn genSetStack(self: *Self, ty: Type, stack_offset: u32, mcv: MCValue) InnerErro
                 return self.fail("TODO larger stack offsets", .{});
             return self.genStore(reg, .sp, i13, simm13, abi_size);
         },
-        .memory, .stack_offset => return self.fail("TODO implement memcpy", .{}),
+        .memory, .stack_offset => {
+            switch (mcv) {
+                .stack_offset => |off| {
+                    if (stack_offset == off)
+                        return; // Copy stack variable to itself; nothing to do.
+                },
+                else => {},
+            }
+
+            if (abi_size <= 8) {
+                const reg = try self.copyToTmpRegister(ty, mcv);
+                return self.genSetStack(ty, stack_offset, MCValue{ .register = reg });
+            } else {
+                var ptr_ty_payload: Type.Payload.ElemType = .{
+                    .base = .{ .tag = .single_mut_pointer },
+                    .data = ty,
+                };
+                const ptr_ty = Type.initPayload(&ptr_ty_payload.base);
+
+                const regs = try self.register_manager.allocRegs(4, .{ null, null, null, null });
+                const regs_locks = self.register_manager.lockRegsAssumeUnused(4, regs);
+                defer for (regs_locks) |reg| {
+                    self.register_manager.unlockReg(reg);
+                };
+
+                const src_reg = regs[0];
+                const dst_reg = regs[1];
+                const len_reg = regs[2];
+                const tmp_reg = regs[3];
+
+                switch (mcv) {
+                    .stack_offset => |off| try self.genSetReg(ptr_ty, src_reg, .{ .ptr_stack_offset = off }),
+                    .memory => |addr| try self.genSetReg(Type.usize, src_reg, .{ .immediate = addr }),
+                    else => unreachable,
+                }
+
+                try self.genSetReg(ptr_ty, dst_reg, .{ .ptr_stack_offset = stack_offset });
+                try self.genSetReg(Type.usize, len_reg, .{ .immediate = abi_size });
+                try self.genInlineMemcpy(src_reg, dst_reg, len_reg, tmp_reg);
+            }
+        },
     }
 }
 
@@ -1504,6 +2538,34 @@ fn genTypedValue(self: *Self, typed_value: TypedValue) InnerError!MCValue {
                 return self.fail("TODO implement int genTypedValue of > 64 bits", .{});
             }
         },
+        .ErrorSet => {
+            const err_name = typed_value.val.castTag(.@"error").?.data.name;
+            const module = self.bin_file.options.module.?;
+            const global_error_set = module.global_error_set;
+            const error_index = global_error_set.get(err_name).?;
+            return MCValue{ .immediate = error_index };
+        },
+        .ErrorUnion => {
+            const error_type = typed_value.ty.errorUnionSet();
+            const payload_type = typed_value.ty.errorUnionPayload();
+
+            if (typed_value.val.castTag(.eu_payload)) |pl| {
+                if (!payload_type.hasRuntimeBits()) {
+                    // We use the error type directly as the type.
+                    return MCValue{ .immediate = 0 };
+                }
+
+                _ = pl;
+                return self.fail("TODO implement error union const of type '{}' (non-error)", .{typed_value.ty.fmtDebug()});
+            } else {
+                if (!payload_type.hasRuntimeBits()) {
+                    // We use the error type directly as the type.
+                    return self.genTypedValue(.{ .ty = error_type, .val = typed_value.val });
+                }
+
+                return self.fail("TODO implement error union const of type '{}' (error)", .{typed_value.ty.fmtDebug()});
+            }
+        },
         .ComptimeInt => unreachable, // semantic analysis prevents this
         .ComptimeFloat => unreachable, // semantic analysis prevents this
         else => return self.fail("TODO implement const of type '{}'", .{typed_value.ty.fmtDebug()}),
@@ -1522,6 +2584,54 @@ fn getResolvedInstValue(self: *Self, inst: Air.Inst.Index) MCValue {
     }
 }
 
+fn isErr(self: *Self, ty: Type, operand: MCValue) !MCValue {
+    const error_type = ty.errorUnionSet();
+    const payload_type = ty.errorUnionPayload();
+
+    if (!error_type.hasRuntimeBits()) {
+        return MCValue{ .immediate = 0 }; // always false
+    } else if (!payload_type.hasRuntimeBits()) {
+        if (error_type.abiSize(self.target.*) <= 8) {
+            const reg_mcv: MCValue = switch (operand) {
+                .register => operand,
+                else => .{ .register = try self.copyToTmpRegister(error_type, operand) },
+            };
+
+            _ = try self.addInst(.{
+                .tag = .subcc,
+                .data = .{ .arithmetic_3op = .{
+                    .is_imm = true,
+                    .rs1 = reg_mcv.register,
+                    .rs2_or_imm = .{ .imm = 0 },
+                    .rd = .g0,
+                } },
+            });
+
+            return MCValue{ .compare_flags_unsigned = .gt };
+        } else {
+            return self.fail("TODO isErr for errors with size > 8", .{});
+        }
+    } else {
+        return self.fail("TODO isErr for non-empty payloads", .{});
+    }
+}
+
+fn isNonErr(self: *Self, ty: Type, operand: MCValue) !MCValue {
+    // Call isErr, then negate the result.
+    const is_err_result = try self.isErr(ty, operand);
+    switch (is_err_result) {
+        .compare_flags_unsigned => |op| {
+            assert(op == .gt);
+            return MCValue{ .compare_flags_unsigned = .lte };
+        },
+        .immediate => |imm| {
+            assert(imm == 0);
+            return MCValue{ .immediate = 1 };
+        },
+        else => unreachable,
+    }
+}
+
 fn iterateBigTomb(self: *Self, inst: Air.Inst.Index, operand_count: usize) !BigTomb {
     try self.ensureProcessDeathCapacity(operand_count + 1);
     return BigTomb{
@@ -1533,6 +2643,88 @@ fn iterateBigTomb(self: *Self, inst: Air.Inst.Index, operand_count: usize) !BigT
     };
 }
 
+/// Send control flow to `inst`.
+fn jump(self: *Self, inst: Mir.Inst.Index) !void {
+    _ = try self.addInst(.{
+        .tag = .bpcc,
+        .data = .{
+            .branch_predict_int = .{
+                .cond = .al,
+                .ccr = .xcc,
+                .inst = inst,
+            },
+        },
+    });
+
+    // TODO find out a way to fill this delay slot
+    _ = try self.addInst(.{
+        .tag = .nop,
+        .data = .{ .nop = {} },
+    });
+}
+
+fn load(self: *Self, dst_mcv: MCValue, ptr: MCValue, ptr_ty: Type) InnerError!void {
+    const elem_ty = ptr_ty.elemType();
+    const elem_size = elem_ty.abiSize(self.target.*);
+
+    switch (ptr) {
+        .none => unreachable,
+        .undef => unreachable,
+        .unreach => unreachable,
+        .dead => unreachable,
+        .compare_flags_unsigned,
+        .compare_flags_signed,
+        => unreachable, // cannot hold an address
+        .immediate => |imm| try self.setRegOrMem(elem_ty, dst_mcv, .{ .memory = imm }),
+        .ptr_stack_offset => |off| try self.setRegOrMem(elem_ty, dst_mcv, .{ .stack_offset = off }),
+        .register => |addr_reg| {
+            const addr_reg_lock = self.register_manager.lockReg(addr_reg);
+            defer if (addr_reg_lock) |reg| self.register_manager.unlockReg(reg);
+
+            switch (dst_mcv) {
+                .dead => unreachable,
+                .undef => unreachable,
+                .compare_flags_signed, .compare_flags_unsigned => unreachable,
+                .register => |dst_reg| {
+                    try self.genLoad(dst_reg, addr_reg, i13, 0, elem_size);
+                },
+                .stack_offset => |off| {
+                    if (elem_size <= 8) {
+                        const tmp_reg = try self.register_manager.allocReg(null);
+                        const tmp_reg_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
+                        defer self.register_manager.unlockReg(tmp_reg_lock);
+
+                        try self.load(.{ .register = tmp_reg }, ptr, ptr_ty);
+                        try self.genSetStack(elem_ty, off, MCValue{ .register = tmp_reg });
+                    } else {
+                        const regs = try self.register_manager.allocRegs(3, .{ null, null, null });
+                        const regs_locks = self.register_manager.lockRegsAssumeUnused(3, regs);
+                        defer for (regs_locks) |reg| {
+                            self.register_manager.unlockReg(reg);
+                        };
+
+                        const src_reg = addr_reg;
+                        const dst_reg = regs[0];
+                        const len_reg = regs[1];
+                        const tmp_reg = regs[2];
+
+                        try self.genSetReg(ptr_ty, dst_reg, .{ .ptr_stack_offset = off });
+                        try self.genSetReg(Type.usize, len_reg, .{ .immediate = elem_size });
+                        try self.genInlineMemcpy(src_reg, dst_reg, len_reg, tmp_reg);
+                    }
+                },
+                else => return self.fail("TODO load from register into {}", .{dst_mcv}),
+            }
+        },
+        .memory,
+        .stack_offset,
+        => {
+            const addr_reg = try self.copyToTmpRegister(ptr_ty, ptr);
+            try self.load(dst_mcv, .{ .register = addr_reg }, ptr_ty);
+        },
+    }
+}
+
 fn lowerDeclRef(self: *Self, tv: TypedValue, decl_index: Module.Decl.Index) InnerError!MCValue {
     const ptr_bits = self.target.cpu.arch.ptrBitWidth();
     const ptr_bytes: u64 = @divExact(ptr_bits, 8);
@@ -1568,7 +2760,7 @@ fn parseRegName(name: []const u8) ?Register {
 fn performReloc(self: *Self, inst: Mir.Inst.Index) !void {
     const tag = self.mir_instructions.items(.tag)[inst];
     switch (tag) {
-        .bpcc => self.mir_instructions.items(.data)[inst].branch_predict.inst = @intCast(Mir.Inst.Index, self.mir_instructions.len),
+        .bpcc => self.mir_instructions.items(.data)[inst].branch_predict_int.inst = @intCast(Mir.Inst.Index, self.mir_instructions.len),
         else => unreachable,
     }
 }
@@ -1585,6 +2777,9 @@ fn processDeath(self: *Self, inst: Air.Inst.Index) void {
         .register => |reg| {
             self.register_manager.freeReg(reg);
         },
+        .compare_flags_signed, .compare_flags_unsigned => {
+            self.compare_flags_inst = null;
+        },
         else => {}, // TODO process stack allocation death
     }
 }
@@ -1718,11 +2913,18 @@ fn ret(self: *Self, mcv: MCValue) !void {
     const ret_ty = self.fn_type.fnReturnType();
     try self.setRegOrMem(ret_ty, self.ret_mcv, mcv);
 
-    // Just add space for an instruction, patch this later
+    // Just add space for a branch instruction, patch this later
     const index = try self.addInst(.{
         .tag = .nop,
         .data = .{ .nop = {} },
     });
+
+    // Reserve space for the delay slot too
+    // TODO find out a way to fill this
+    _ = try self.addInst(.{
+        .tag = .nop,
+        .data = .{ .nop = {} },
+    });
     try self.exitlude_jump_relocs.append(self.gpa, index);
 }
 
@@ -1770,6 +2972,29 @@ fn setRegOrMem(self: *Self, ty: Type, loc: MCValue, val: MCValue) !void {
     }
 }
 
+/// Save the current instruction stored in the compare flags if
+/// occupied
+fn spillCompareFlagsIfOccupied(self: *Self) !void {
+    if (self.compare_flags_inst) |inst_to_save| {
+        const mcv = self.getResolvedInstValue(inst_to_save);
+        switch (mcv) {
+            .compare_flags_signed,
+            .compare_flags_unsigned,
+            => {},
+            else => unreachable, // mcv doesn't occupy the compare flags
+        }
+
+        const new_mcv = try self.allocRegOrMem(inst_to_save, true);
+        try self.setRegOrMem(self.air.typeOfIndex(inst_to_save), new_mcv, mcv);
+        log.debug("spilling {d} to mcv {any}", .{ inst_to_save, new_mcv });
+
+        const branch = &self.branch_stack.items[self.branch_stack.items.len - 1];
+        try branch.inst_table.put(self.gpa, inst_to_save, new_mcv);
+
+        self.compare_flags_inst = null;
+    }
+}
+
 pub fn spillInstruction(self: *Self, reg: Register, inst: Air.Inst.Index) !void {
     const stack_mcv = try self.allocRegOrMem(inst, false);
     log.debug("spilling {d} to stack mcv {any}", .{ inst, stack_mcv });
@@ -1780,6 +3005,152 @@ pub fn spillInstruction(self: *Self, reg: Register, inst: Air.Inst.Index) !void
     try self.genSetStack(self.air.typeOfIndex(inst), stack_mcv.stack_offset, reg_mcv);
 }
 
+fn store(self: *Self, ptr: MCValue, value: MCValue, ptr_ty: Type, value_ty: Type) InnerError!void {
+    const abi_size = value_ty.abiSize(self.target.*);
+
+    switch (ptr) {
+        .none => unreachable,
+        .undef => unreachable,
+        .unreach => unreachable,
+        .dead => unreachable,
+        .compare_flags_unsigned,
+        .compare_flags_signed,
+        => unreachable, // cannot hold an address
+        .immediate => |imm| {
+            try self.setRegOrMem(value_ty, .{ .memory = imm }, value);
+        },
+        .ptr_stack_offset => |off| {
+            try self.genSetStack(value_ty, off, value);
+        },
+        .register => |addr_reg| {
+            const addr_reg_lock = self.register_manager.lockReg(addr_reg);
+            defer if (addr_reg_lock) |reg| self.register_manager.unlockReg(reg);
+
+            switch (value) {
+                .register => |value_reg| {
+                    try self.genStore(value_reg, addr_reg, i13, 0, abi_size);
+                },
+                else => {
+                    return self.fail("TODO implement copying of memory", .{});
+                },
+            }
+        },
+        .memory,
+        .stack_offset,
+        => {
+            const addr_reg = try self.copyToTmpRegister(ptr_ty, ptr);
+            try self.store(.{ .register = addr_reg }, value, ptr_ty, value_ty);
+        },
+    }
+}
+
+fn structFieldPtr(self: *Self, inst: Air.Inst.Index, operand: Air.Inst.Ref, index: u32) !MCValue {
+    return if (self.liveness.isUnused(inst)) .dead else result: {
+        const mcv = try self.resolveInst(operand);
+        const ptr_ty = self.air.typeOf(operand);
+        const struct_ty = ptr_ty.childType();
+        const struct_field_offset = @intCast(u32, struct_ty.structFieldOffset(index, self.target.*));
+        switch (mcv) {
+            .ptr_stack_offset => |off| {
+                break :result MCValue{ .ptr_stack_offset = off - struct_field_offset };
+            },
+            else => {
+                const offset_reg = try self.copyToTmpRegister(ptr_ty, .{
+                    .immediate = struct_field_offset,
+                });
+                const offset_reg_lock = self.register_manager.lockRegAssumeUnused(offset_reg);
+                defer self.register_manager.unlockReg(offset_reg_lock);
+
+                const addr_reg = try self.copyToTmpRegister(ptr_ty, mcv);
+                const addr_reg_lock = self.register_manager.lockRegAssumeUnused(addr_reg);
+                defer self.register_manager.unlockReg(addr_reg_lock);
+
+                const dest = try self.binOp(
+                    .add,
+                    null,
+                    .{ .register = addr_reg },
+                    .{ .register = offset_reg },
+                    Type.usize,
+                    Type.usize,
+                );
+
+                break :result dest;
+            },
+        }
+    };
+}
+
+fn truncRegister(
+    self: *Self,
+    operand_reg: Register,
+    dest_reg: Register,
+    int_signedness: std.builtin.Signedness,
+    int_bits: u16,
+) !void {
+    switch (int_bits) {
+        1...31, 33...63 => {
+            _ = try self.addInst(.{
+                .tag = .sllx,
+                .data = .{
+                    .shift = .{
+                        .is_imm = true,
+                        .width = ShiftWidth.shift64,
+                        .rd = dest_reg,
+                        .rs1 = operand_reg,
+                        .rs2_or_imm = .{ .imm = @intCast(u6, 64 - int_bits) },
+                    },
+                },
+            });
+            _ = try self.addInst(.{
+                .tag = switch (int_signedness) {
+                    .signed => .srax,
+                    .unsigned => .srlx,
+                },
+                .data = .{
+                    .shift = .{
+                        .is_imm = true,
+                        .width = ShiftWidth.shift32,
+                        .rd = dest_reg,
+                        .rs1 = dest_reg,
+                        .rs2_or_imm = .{ .imm = @intCast(u6, int_bits) },
+                    },
+                },
+            });
+        },
+        32 => {
+            _ = try self.addInst(.{
+                .tag = switch (int_signedness) {
+                    .signed => .sra,
+                    .unsigned => .srl,
+                },
+                .data = .{
+                    .shift = .{
+                        .is_imm = true,
+                        .width = ShiftWidth.shift32,
+                        .rd = dest_reg,
+                        .rs1 = operand_reg,
+                        .rs2_or_imm = .{ .imm = 0 },
+                    },
+                },
+            });
+        },
+        64 => {
+            _ = try self.addInst(.{
+                .tag = .@"or",
+                .data = .{
+                    .arithmetic_3op = .{
+                        .is_imm = true,
+                        .rd = dest_reg,
+                        .rs1 = .g0,
+                        .rs2_or_imm = .{ .rs2 = operand_reg },
+                    },
+                },
+            });
+        },
+        else => unreachable,
+    }
+}
+
 /// TODO support scope overrides. Also note this logic is duplicated with `Module.wantSafety`.
 fn wantSafety(self: *Self) bool {
     return switch (self.bin_file.options.optimize_mode) {
diff --git a/src/arch/sparc64/Emit.zig b/src/arch/sparc64/Emit.zig
index 81ae062c61..6f30f785c5 100644
--- a/src/arch/sparc64/Emit.zig
+++ b/src/arch/sparc64/Emit.zig
@@ -8,6 +8,7 @@ const link = @import("../../link.zig");
 const Module = @import("../../Module.zig");
 const ErrorMsg = Module.ErrorMsg;
 const Liveness = @import("../../Liveness.zig");
+const log = std.log.scoped(.sparcv9_emit);
 const DebugInfoOutput = @import("../../codegen.zig").DebugInfoOutput;
 const DW = std.dwarf;
 const leb128 = std.leb;
@@ -31,16 +32,44 @@ prev_di_column: u32,
 /// Relative to the beginning of `code`.
 prev_di_pc: usize,
 
+/// The branch type of every branch
+branch_types: std.AutoHashMapUnmanaged(Mir.Inst.Index, BranchType) = .{},
+/// For every forward branch, maps the target instruction to a list of
+/// branches which branch to this target instruction
+branch_forward_origins: std.AutoHashMapUnmanaged(Mir.Inst.Index, std.ArrayListUnmanaged(Mir.Inst.Index)) = .{},
+/// For backward branches: stores the code offset of the target
+/// instruction
+///
+/// For forward branches: stores the code offset of the branch
+/// instruction
+code_offset_mapping: std.AutoHashMapUnmanaged(Mir.Inst.Index, usize) = .{},
+
 const InnerError = error{
     OutOfMemory,
     EmitFail,
 };
 
+const BranchType = enum {
+    bpcc,
+    bpr,
+    fn default(tag: Mir.Inst.Tag) BranchType {
+        return switch (tag) {
+            .bpcc => .bpcc,
+            .bpr => .bpr,
+            else => unreachable,
+        };
+    }
+};
+
 pub fn emitMir(
     emit: *Emit,
 ) InnerError!void {
     const mir_tags = emit.mir.instructions.items(.tag);
 
+    // Convert absolute addresses into offsets and
+    // find smallest lowerings for branch instructions
+    try emit.lowerBranches();
+
     // Emit machine code
     for (mir_tags) |tag, index| {
         const inst = @intCast(u32, index);
@@ -51,7 +80,8 @@ pub fn emitMir(
 
             .add => try emit.mirArithmetic3Op(inst),
 
-            .bpcc => @panic("TODO implement sparc64 bpcc"),
+            .bpr => try emit.mirConditionalBranch(inst),
+            .bpcc => try emit.mirConditionalBranch(inst),
 
             .call => @panic("TODO implement sparc64 call"),
 
@@ -64,6 +94,8 @@ pub fn emitMir(
 
             .@"or" => try emit.mirArithmetic3Op(inst),
 
+            .mulx => try emit.mirArithmetic3Op(inst),
+
             .nop => try emit.mirNop(),
 
             .@"return" => try emit.mirArithmetic2Op(inst),
@@ -73,7 +105,12 @@ pub fn emitMir(
 
             .sethi => try emit.mirSethi(inst),
 
+            .sll => @panic("TODO implement sparc64 sll"),
+            .srl => @panic("TODO implement sparc64 srl"),
+            .sra => @panic("TODO implement sparc64 sra"),
             .sllx => @panic("TODO implement sparc64 sllx"),
+            .srlx => @panic("TODO implement sparc64 srlx"),
+            .srax => @panic("TODO implement sparc64 srax"),
 
             .stb => try emit.mirArithmetic3Op(inst),
             .sth => try emit.mirArithmetic3Op(inst),
@@ -81,6 +118,7 @@ pub fn emitMir(
             .stx => try emit.mirArithmetic3Op(inst),
 
             .sub => try emit.mirArithmetic3Op(inst),
+            .subcc => try emit.mirArithmetic3Op(inst),
 
             .tcc => try emit.mirTrap(inst),
         }
@@ -88,6 +126,14 @@ pub fn emitMir(
 }
 
 pub fn deinit(emit: *Emit) void {
+    var iter = emit.branch_forward_origins.valueIterator();
+    while (iter.next()) |origin_list| {
+        origin_list.deinit(emit.bin_file.allocator);
+    }
+
+    emit.branch_types.deinit(emit.bin_file.allocator);
+    emit.branch_forward_origins.deinit(emit.bin_file.allocator);
+    emit.code_offset_mapping.deinit(emit.bin_file.allocator);
     emit.* = undefined;
 }
 
@@ -161,6 +207,7 @@ fn mirArithmetic3Op(emit: *Emit, inst: Mir.Inst.Index) !void {
             .lduw => try emit.writeInstruction(Instruction.lduw(i13, rs1, imm, rd)),
             .ldx => try emit.writeInstruction(Instruction.ldx(i13, rs1, imm, rd)),
             .@"or" => try emit.writeInstruction(Instruction.@"or"(i13, rs1, imm, rd)),
+            .mulx => try emit.writeInstruction(Instruction.mulx(i13, rs1, imm, rd)),
             .save => try emit.writeInstruction(Instruction.save(i13, rs1, imm, rd)),
             .restore => try emit.writeInstruction(Instruction.restore(i13, rs1, imm, rd)),
             .stb => try emit.writeInstruction(Instruction.stb(i13, rs1, imm, rd)),
@@ -168,6 +215,7 @@ fn mirArithmetic3Op(emit: *Emit, inst: Mir.Inst.Index) !void {
             .stw => try emit.writeInstruction(Instruction.stw(i13, rs1, imm, rd)),
             .stx => try emit.writeInstruction(Instruction.stx(i13, rs1, imm, rd)),
             .sub => try emit.writeInstruction(Instruction.sub(i13, rs1, imm, rd)),
+            .subcc => try emit.writeInstruction(Instruction.subcc(i13, rs1, imm, rd)),
             else => unreachable,
         }
     } else {
@@ -180,6 +228,7 @@ fn mirArithmetic3Op(emit: *Emit, inst: Mir.Inst.Index) !void {
             .lduw => try emit.writeInstruction(Instruction.lduw(Register, rs1, rs2, rd)),
             .ldx => try emit.writeInstruction(Instruction.ldx(Register, rs1, rs2, rd)),
             .@"or" => try emit.writeInstruction(Instruction.@"or"(Register, rs1, rs2, rd)),
+            .mulx => try emit.writeInstruction(Instruction.mulx(Register, rs1, rs2, rd)),
             .save => try emit.writeInstruction(Instruction.save(Register, rs1, rs2, rd)),
             .restore => try emit.writeInstruction(Instruction.restore(Register, rs1, rs2, rd)),
             .stb => try emit.writeInstruction(Instruction.stb(Register, rs1, rs2, rd)),
@@ -187,11 +236,56 @@ fn mirArithmetic3Op(emit: *Emit, inst: Mir.Inst.Index) !void {
             .stw => try emit.writeInstruction(Instruction.stw(Register, rs1, rs2, rd)),
             .stx => try emit.writeInstruction(Instruction.stx(Register, rs1, rs2, rd)),
             .sub => try emit.writeInstruction(Instruction.sub(Register, rs1, rs2, rd)),
+            .subcc => try emit.writeInstruction(Instruction.subcc(Register, rs1, rs2, rd)),
             else => unreachable,
         }
     }
 }
 
+fn mirConditionalBranch(emit: *Emit, inst: Mir.Inst.Index) !void {
+    const tag = emit.mir.instructions.items(.tag)[inst];
+    const branch_type = emit.branch_types.get(inst).?;
+
+    switch (branch_type) {
+        .bpcc => switch (tag) {
+            .bpcc => {
+                const branch_predict_int = emit.mir.instructions.items(.data)[inst].branch_predict_int;
+                const offset = @intCast(i64, emit.code_offset_mapping.get(branch_predict_int.inst).?) - @intCast(i64, emit.code.items.len);
+                log.debug("mirConditionalBranch: {} offset={}", .{ inst, offset });
+
+                try emit.writeInstruction(
+                    Instruction.bpcc(
+                        branch_predict_int.cond,
+                        branch_predict_int.annul,
+                        branch_predict_int.pt,
+                        branch_predict_int.ccr,
+                        @intCast(i21, offset),
+                    ),
+                );
+            },
+            else => unreachable,
+        },
+        .bpr => switch (tag) {
+            .bpr => {
+                const branch_predict_reg = emit.mir.instructions.items(.data)[inst].branch_predict_reg;
+                const offset = @intCast(i64, emit.code_offset_mapping.get(branch_predict_reg.inst).?) - @intCast(i64, emit.code.items.len);
+                log.debug("mirConditionalBranch: {} offset={}", .{ inst, offset });
+
+                try emit.writeInstruction(
+                    Instruction.bpr(
+                        branch_predict_reg.cond,
+                        branch_predict_reg.annul,
+                        branch_predict_reg.pt,
+                        branch_predict_reg.rs1,
+                        @intCast(i18, offset),
+                    ),
+                );
+            },
+            else => unreachable,
+        },
+    }
+}
+
 fn mirNop(emit: *Emit) !void {
     try emit.writeInstruction(Instruction.nop());
 }
@@ -232,6 +326,16 @@ fn mirTrap(emit: *Emit, inst: Mir.Inst.Index) !void {
 
 // Common helper functions
 
+fn branchTarget(emit: *Emit, inst: Mir.Inst.Index) Mir.Inst.Index {
+    const tag = emit.mir.instructions.items(.tag)[inst];
+
+    switch (tag) {
+        .bpcc => return emit.mir.instructions.items(.data)[inst].branch_predict_int.inst,
+        .bpr => return emit.mir.instructions.items(.data)[inst].branch_predict_reg.inst,
+        else => unreachable,
+    }
+}
+
 fn dbgAdvancePCAndLine(emit: *Emit, line: u32, column: u32) !void {
     const delta_line = @intCast(i32, line) - @intCast(i32, emit.prev_di_line);
     const delta_pc: usize = emit.code.items.len - emit.prev_di_pc;
@@ -264,6 +368,164 @@ fn fail(emit: *Emit, comptime format: []const u8, args: anytype) InnerError {
     return error.EmitFail;
 }
 
+fn instructionSize(emit: *Emit, inst: Mir.Inst.Index) usize {
+    const tag = emit.mir.instructions.items(.tag)[inst];
+
+    switch (tag) {
+        .dbg_line,
+        .dbg_epilogue_begin,
+        .dbg_prologue_end,
+        => return 0,
+        // Currently Mir instructions always map to single machine instruction.
+        else => return 4,
+    }
+}
+
+fn isBranch(tag: Mir.Inst.Tag) bool {
+    return switch (tag) {
+        .bpcc => true,
+        .bpr => true,
+        else => false,
+    };
+}
+
+fn lowerBranches(emit: *Emit) !void {
+    const mir_tags = emit.mir.instructions.items(.tag);
+    const allocator = emit.bin_file.allocator;
+
+    // First pass: Note down all branches and their target
+    // instructions, i.e. populate branch_types,
+    // branch_forward_origins, and code_offset_mapping
+    //
+    // TODO optimization opportunity: do this in codegen while
+    // generating MIR
+    for (mir_tags) |tag, index| {
+        const inst = @intCast(u32, index);
+        if (isBranch(tag)) {
+            const target_inst = emit.branchTarget(inst);
+
+            // Remember this branch instruction
+            try emit.branch_types.put(allocator, inst, BranchType.default(tag));
+
+            // Forward branches require some extra stuff: We only
+            // know their offset once we arrive at the target
+            // instruction. Therefore, we need to be able to
+            // access the branch instruction when we visit the
+            // target instruction in order to manipulate its type
+            // etc.
+            if (target_inst > inst) {
+                // Remember the branch instruction index
+                try emit.code_offset_mapping.put(allocator, inst, 0);
+
+                if (emit.branch_forward_origins.getPtr(target_inst)) |origin_list| {
+                    try origin_list.append(allocator, inst);
+                } else {
+                    var origin_list: std.ArrayListUnmanaged(Mir.Inst.Index) = .{};
+                    try origin_list.append(allocator, inst);
+                    try emit.branch_forward_origins.put(allocator, target_inst, origin_list);
+                }
+            }
+
+            // Remember the target instruction index so that we
+            // update the real code offset in all future passes
+            //
+            // putNoClobber may not be used as the put operation
+            // may clobber the entry when multiple branches branch
+            // to the same target instruction
+            try emit.code_offset_mapping.put(allocator, target_inst, 0);
+        }
+    }
+
+    // Further passes: Until all branches are lowered, interate
+    // through all instructions and calculate new offsets and
+    // potentially new branch types
+    var all_branches_lowered = false;
+    while (!all_branches_lowered) {
+        all_branches_lowered = true;
+        var current_code_offset: usize = 0;
+
+        for (mir_tags) |tag, index| {
+            const inst = @intCast(u32, index);
+
+            // If this instruction contained in the code offset
+            // mapping (when it is a target of a branch or if it is a
+            // forward branch), update the code offset
+            if (emit.code_offset_mapping.getPtr(inst)) |offset| {
+                offset.* = current_code_offset;
+            }
+
+            // If this instruction is a backward branch, calculate the
+            // offset, which may potentially update the branch type
+            if (isBranch(tag)) {
+                const target_inst = emit.branchTarget(inst);
+                if (target_inst < inst) {
+                    const target_offset = emit.code_offset_mapping.get(target_inst).?;
+                    const offset = @intCast(i64, target_offset) - @intCast(i64, current_code_offset);
+                    const branch_type = emit.branch_types.getPtr(inst).?;
+                    const optimal_branch_type = try emit.optimalBranchType(tag, offset);
+                    if (branch_type.* != optimal_branch_type) {
+                        branch_type.* = optimal_branch_type;
+                        all_branches_lowered = false;
+                    }
+
+                    log.debug("lowerBranches: branch {} has offset {}", .{ inst, offset });
+                }
+            }
+
+            // If this instruction is the target of one or more
+            // forward branches, calculate the offset, which may
+            // potentially update the branch type
+            if (emit.branch_forward_origins.get(inst)) |origin_list| {
+                for (origin_list.items) |forward_branch_inst| {
+                    const branch_tag = emit.mir.instructions.items(.tag)[forward_branch_inst];
+                    const forward_branch_inst_offset = emit.code_offset_mapping.get(forward_branch_inst).?;
+                    const offset = @intCast(i64, current_code_offset) - @intCast(i64, forward_branch_inst_offset);
+                    const branch_type = emit.branch_types.getPtr(forward_branch_inst).?;
+                    const optimal_branch_type = try emit.optimalBranchType(branch_tag, offset);
+                    if (branch_type.* != optimal_branch_type) {
+                        branch_type.* = optimal_branch_type;
+                        all_branches_lowered = false;
+                    }
+
+                    log.debug("lowerBranches: branch {} has offset {}", .{ forward_branch_inst, offset });
+                }
+            }
+
+            // Increment code offset
+            current_code_offset += emit.instructionSize(inst);
+        }
+    }
+}
+
+fn optimalBranchType(emit: *Emit, tag: Mir.Inst.Tag, offset: i64) !BranchType {
+    assert(offset & 0b11 == 0);
+
+    switch (tag) {
+        // TODO use the following strategy to implement long branches:
+        // - Negate the conditional and target of the original instruction;
+        // - In the space immediately after the branch, load
+        //   the address of the original target, preferrably in
+        //   a PC-relative way, into %o7; and
+        // - jmpl %o7 + %g0, %g0
+
+        .bpcc => {
+            if (std.math.cast(i21, offset)) |_| {
+                return BranchType.bpcc;
+            } else |_| {
+                return emit.fail("TODO support BPcc branches larger than +-1 MiB", .{});
+            }
+        },
+        .bpr => {
+            if (std.math.cast(i18, offset)) |_| {
+                return BranchType.bpr;
+            } else |_| {
+                return emit.fail("TODO support BPr branches larger than +-128 KiB", .{});
+            }
+        },
+        else => unreachable,
+    }
+}
+
 fn writeInstruction(emit: *Emit, instruction: Instruction) !void {
     // SPARCv9 instructions are always arranged in BE regardless of the
     // endianness mode the CPU is running in (Section 3.1 of the ISA specification).
diff --git a/src/arch/sparc64/Mir.zig b/src/arch/sparc64/Mir.zig
index ef0be93f4c..441e151cea 100644
--- a/src/arch/sparc64/Mir.zig
+++ b/src/arch/sparc64/Mir.zig
@@ -43,8 +43,12 @@ pub const Inst = struct {
         // TODO add other operations.
         add,
 
+        /// A.3 Branch on Integer Register with Prediction (BPr)
+        /// This uses the branch_predict_reg field.
+        bpr,
+
         /// A.7 Branch on Integer Condition Codes with Prediction (BPcc)
-        /// This uses the branch_predict field.
+        /// This uses the branch_predict_int field.
         bpcc,
 
         /// A.8 Call and Link
@@ -70,6 +74,11 @@ pub const Inst = struct {
         // TODO add other operations.
         @"or",
 
+        /// A.37 Multiply and Divide (64-bit)
+        /// This uses the arithmetic_3op field.
+        // TODO add other operations.
+        mulx,
+
         /// A.40 No Operation
         /// This uses the nop field.
         nop,
@@ -89,8 +98,12 @@ pub const Inst = struct {
 
         /// A.49 Shift
         /// This uses the shift field.
-        // TODO add other operations.
+        sll,
+        srl,
+        sra,
         sllx,
+        srlx,
+        srax,
 
         /// A.54 Store Integer
         /// This uses the arithmetic_3op field.
@@ -106,10 +119,15 @@ pub const Inst = struct {
         /// This uses the arithmetic_3op field.
         // TODO add other operations.
         sub,
+        subcc,
 
         /// A.61 Trap on Integer Condition Codes (Tcc)
         /// This uses the trap field.
         tcc,
+
+        // TODO add synthetic instructions
+        // TODO add cmp synthetic instruction to avoid wasting a register when
+        // comparing with subcc
     };
 
     /// The position of an MIR instruction within the `Mir` instructions array.
@@ -164,13 +182,23 @@ pub const Inst = struct {
             link: Register = .o7,
         },
 
-        /// Branch with prediction.
+        /// Branch with prediction, checking the integer status code
         /// Used by e.g. bpcc
-        branch_predict: struct {
+        branch_predict_int: struct {
             annul: bool = false,
             pt: bool = true,
             ccr: Instruction.CCR,
-            cond: Instruction.Condition,
+            cond: Instruction.ICondition,
+            inst: Index,
+        },
+
+        /// Branch with prediction, comparing a register's content with zero
+        /// Used by e.g. bpr
+        branch_predict_reg: struct {
+            annul: bool = false,
+            pt: bool = true,
+            cond: Instruction.RCondition,
+            rs1: Register,
             inst: Index,
         },
 
@@ -191,7 +219,7 @@ pub const Inst = struct {
         /// if is_imm true then it uses the imm field of rs2_or_imm,
         /// otherwise it uses rs2 field.
         ///
-        /// Used by e.g. add, sub
+        /// Used by e.g. sllx
         shift: struct {
             is_imm: bool,
             width: Instruction.ShiftWidth,
@@ -210,7 +238,7 @@ pub const Inst = struct {
         /// Used by e.g. tcc
         trap: struct {
             is_imm: bool = true,
-            cond: Instruction.Condition,
+            cond: Instruction.ICondition,
             ccr: Instruction.CCR = .icc,
             rs1: Register = .g0,
             rs2_or_imm: union {
diff --git a/src/arch/sparc64/bits.zig b/src/arch/sparc64/bits.zig
index e66b24f617..f4226b49da 100644
--- a/src/arch/sparc64/bits.zig
+++ b/src/arch/sparc64/bits.zig
@@ -512,10 +512,172 @@ pub const Instruction = union(enum) {
         lookaside: bool = false,
     };
 
-    // TODO: Need to define an enum for `cond` values
-    // This is kinda challenging since the cond values have different meanings
-    // depending on whether it's operating on integer or FP CCR.
-    pub const Condition = u4;
+    // In SPARCv9, FP and integer comparison operations
+    // are encoded differently.
+
+    pub const FCondition = enum(u4) {
+        /// Branch Never
+        nv,
+        /// Branch on Not Equal
+        ne,
+        /// Branch on Less or Greater
+        lg,
+        /// Branch on Unordered or Less
+        ul,
+        /// Branch on Less
+        lt,
+        /// Branch on Unordered or Greater
+        ug,
+        /// Branch on Greater
+        gt,
+        /// Branch on Unordered
+        un,
+        /// Branch Always
+        al,
+        /// Branch on Equal
+        eq,
+        /// Branch on Unordered or Equal
+        ue,
+        /// Branch on Greater or Equal
+        ge,
+        /// Branch on Unordered or Greater or Equal
+        uge,
+        /// Branch on Less or Equal
+        le,
+        /// Branch on Unordered or Less or Equal
+        ule,
+        /// Branch on Ordered
+        ord,
+
+        /// Converts a std.math.CompareOperator into a condition flag,
+        /// i.e. returns the condition that is true iff the result of the
+        /// comparison is true.
+        pub fn fromCompareOperator(op: std.math.CompareOperator) FCondition {
+            return switch (op) {
+                .gte => .ge,
+                .gt => .gt,
+                .neq => .ne,
+                .lt => .lt,
+                .lte => .le,
+                .eq => .eq,
+            };
+        }
+
+        /// Returns the condition which is true iff the given condition is
+        /// false (if such a condition exists).
+        pub fn negate(cond: FCondition) FCondition {
+            return switch (cond) {
+                .eq => .ne,
+                .ne => .eq,
+                .ge => .ul,
+                .ul => .ge,
+                .le => .ug,
+                .ug => .le,
+                .lt => .uge,
+                .uge => .lt,
+                .gt => .ule,
+                .ule => .gt,
+                .ue => .lg,
+                .lg => .ue,
+                .ord => .un,
+                .un => .ord,
+                .al => unreachable,
+                .nv => unreachable,
+            };
+        }
+    };
+
+    pub const ICondition = enum(u4) {
+        /// Branch Never
+        nv,
+        /// Branch on Equal
+        eq,
+        /// Branch on Less or Equal
+        le,
+        /// Branch on Less
+        lt,
+        /// Branch on Less or Equal Unsigned
+        leu,
+        /// Branch on Carry Set (Less than, Unsigned)
+        cs,
+        /// Branch on Negative
+        neg,
+        /// Branch on Overflow Set
+        vs,
+        /// Branch Always
+        al,
+        /// Branch on Not Equal
+        ne,
+        /// Branch on Greater
+        gt,
+        /// Branch on Greater or Equal
+        ge,
+        /// Branch on Greater Unsigned
+        gu,
+        /// Branch on Carry Clear (Greater Than or Equal, Unsigned)
+        cc,
+        /// Branch on Positive
+        pos,
+        /// Branch on Overflow Clear
+        vc,
+
+        /// Converts a std.math.CompareOperator into a condition flag,
+        /// i.e. returns the condition that is true iff the result of the
+        /// comparison is true. Assumes signed comparison.
+        pub fn fromCompareOperatorSigned(op: std.math.CompareOperator) ICondition {
+            return switch (op) {
+                .gte => .ge,
+                .gt => .gt,
+                .neq => .ne,
+                .lt => .lt,
+                .lte => .le,
+                .eq => .eq,
+            };
+        }
+
+        /// Converts a std.math.CompareOperator into a condition flag,
+        /// i.e. returns the condition that is true iff the result of the
+        /// comparison is true. Assumes unsigned comparison.
+        pub fn fromCompareOperatorUnsigned(op: std.math.CompareOperator) ICondition {
+            return switch (op) {
+                .gte => .cc,
+                .gt => .gu,
+                .neq => .ne,
+                .lt => .cs,
+                .lte => .le,
+                .eq => .eq,
+            };
+        }
+
+        /// Returns the condition which is true iff the given condition is
+        /// false (if such a condition exists).
+        pub fn negate(cond: ICondition) ICondition {
+            return switch (cond) {
+                .eq => .ne,
+                .ne => .eq,
+                .cs => .cc,
+                .cc => .cs,
+                .neg => .pos,
+                .pos => .neg,
+                .vs => .vc,
+                .vc => .vs,
+                .gu => .leu,
+                .leu => .gu,
+                .ge => .lt,
+                .lt => .ge,
+                .gt => .le,
+                .le => .gt,
+                .al => unreachable,
+                .nv => unreachable,
+            };
+        }
+    };
+
+    pub const Condition = packed union {
+        fcond: FCondition,
+        icond: ICondition,
+        encoded: u4,
+    };
 
     pub fn toU32(self: Instruction) u32 {
         // TODO: Remove this once packed structs work.
@@ -593,7 +755,7 @@ pub const Instruction = union(enum) {
         return Instruction{
             .format_2b = .{
                 .a = @boolToInt(annul),
-                .cond = cond,
+                .cond = cond.encoded,
                 .op2 = op2,
                 .disp22 = udisp_truncated,
             },
@@ -614,7 +776,7 @@ pub const Instruction = union(enum) {
         return Instruction{
             .format_2c = .{
                 .a = @boolToInt(annul),
-                .cond = cond,
+                .cond = cond.encoded,
                 .op2 = op2,
                 .cc1 = ccr_cc1,
                 .cc0 = ccr_cc0,
@@ -895,7 +1057,7 @@ pub const Instruction = union(enum) {
                 .rd = rd.enc(),
                 .op3 = op3,
                 .cc2 = ccr_cc2,
-                .cond = cond,
+                .cond = cond.encoded,
                 .cc1 = ccr_cc1,
                 .cc0 = ccr_cc0,
                 .rs2 = rs2.enc(),
@@ -912,7 +1074,7 @@ pub const Instruction = union(enum) {
                 .rd = rd.enc(),
                 .op3 = op3,
                 .cc2 = ccr_cc2,
-                .cond = cond,
+                .cond = cond.encoded,
                 .cc1 = ccr_cc1,
                 .cc0 = ccr_cc0,
                 .simm11 = @bitCast(u11, imm),
@@ -960,7 +1122,7 @@ pub const Instruction = union(enum) {
             .format_4g = .{
                 .rd = rd.enc(),
                 .op3 = op3,
-                .cond = cond,
+                .cond = cond.encoded,
                 .opf_cc = opf_cc,
                 .opf_low = opf_low,
                 .rs2 = rs2.enc(),
@@ -979,6 +1141,14 @@ pub const Instruction = union(enum) {
         };
     }
 
+    pub fn bpcc(cond: ICondition, annul: bool, pt: bool, ccr: CCR, disp: i21) Instruction {
+        return format2c(0b001, .{ .icond = cond }, annul, pt, ccr, disp);
+    }
+
+    pub fn bpr(cond: RCondition, annul: bool, pt: bool, rs1: Register, disp: i18) Instruction {
+        return format2d(0b011, cond, annul, pt, rs1, disp);
+    }
+
     pub fn jmpl(comptime s2: type, rs1: Register, rs2: s2, rd: Register) Instruction {
         return switch (s2) {
             Register => format3a(0b10, 0b11_1000, rs1, rs2, rd),
@@ -1027,6 +1197,14 @@ pub const Instruction = union(enum) {
         };
     }
 
+    pub fn mulx(comptime s2: type, rs1: Register, rs2: s2, rd: Register) Instruction {
+        return switch (s2) {
+            Register => format3a(0b10, 0b00_1001, rs1, rs2, rd),
+            i13 => format3b(0b10, 0b00_1001, rs1, rs2, rd),
+            else => unreachable,
+        };
+    }
+
     pub fn nop() Instruction {
         return sethi(0, .g0);
     }
@@ -1099,11 +1277,19 @@ pub const Instruction = union(enum) {
         };
     }
 
-    pub fn trap(comptime s2: type, cond: Condition, ccr: CCR, rs1: Register, rs2: s2) Instruction {
+    pub fn subcc(comptime s2: type, rs1: Register, rs2: s2, rd: Register) Instruction {
+        return switch (s2) {
+            Register => format3a(0b10, 0b01_0100, rs1, rs2, rd),
+            i13 => format3b(0b10, 0b01_0100, rs1, rs2, rd),
+            else => unreachable,
+        };
+    }
+
+    pub fn trap(comptime s2: type, cond: ICondition, ccr: CCR, rs1: Register, rs2: s2) Instruction {
         // Tcc instructions abuse the rd field to store the conditionals.
         return switch (s2) {
-            Register => format4a(0b11_1010, ccr, rs1, rs2, @intToEnum(Register, cond)),
-            u7 => format4e(0b11_1010, ccr, rs1, @intToEnum(Register, cond), rs2),
+            Register => format4a(0b11_1010, ccr, rs1, rs2, @intToEnum(Register, @enumToInt(cond))),
+            u7 => format4e(0b11_1010, ccr, rs1, @intToEnum(Register, @enumToInt(cond)), rs2),
             else => unreachable,
         };
     }
@@ -1128,11 +1314,11 @@ test "Serialize formats" {
             .expected = 0b00_00000_100_0000000000000000000000,
         },
         .{
-            .inst = Instruction.format2b(6, 3, true, -4),
+            .inst = Instruction.format2b(6, .{ .icond = .lt }, true, -4),
             .expected = 0b00_1_0011_110_1111111111111111111111,
         },
         .{
-            .inst = Instruction.format2c(3, 0, false, true, .xcc, 8),
+            .inst = Instruction.format2c(3, .{ .icond = .nv }, false, true, .xcc, 8),
             .expected = 0b00_0_0000_011_1_0_1_0000000000000000010,
         },
         .{
@@ -1224,11 +1410,11 @@ test "Serialize formats" {
             .expected = 0b10_10010_001000_00000_1_1_0_11111111111,
         },
         .{
-            .inst = Instruction.format4c(8, 0, .xcc, .g0, .o1),
+            .inst = Instruction.format4c(8, .{ .icond = .nv }, .xcc, .g0, .o1),
             .expected = 0b10_01001_001000_1_0000_0_1_0_000000_00000,
         },
         .{
-            .inst = Instruction.format4d(8, 0, .xcc, 0, .l2),
+            .inst = Instruction.format4d(8, .{ .icond = .nv }, .xcc, 0, .l2),
             .expected = 0b10_10010_001000_1_0000_1_1_0_00000000000,
         },
         .{
@@ -1240,7 +1426,7 @@ test "Serialize formats" {
             .expected = 0b10_10010_001000_00000_0_001_00100_01001,
         },
         .{
-            .inst = Instruction.format4g(8, 4, 2, 0, .o1, .l2),
+            .inst = Instruction.format4g(8, 4, 2, .{ .icond = .nv }, .o1, .l2),
             .expected = 0b10_10010_001000_0_0000_010_000100_01001,
         },
     };