diff --git a/lib/std/Thread.zig b/lib/std/Thread.zig
index 91f7ff58c3..58a409c64e 100644
--- a/lib/std/Thread.zig
+++ b/lib/std/Thread.zig
@@ -505,8 +505,8 @@ const LinuxThreadImpl = struct {
         /// Ported over from musl libc's pthread detached implementation:
         /// https://github.com/ifduyue/musl/search?q=__unmapself
         fn freeAndExit(self: *ThreadCompletion) noreturn {
-            const unmap_and_exit: []const u8 = switch (target.cpu.arch) {
-                .i386 => (
+            switch (target.cpu.arch) {
+                .i386 => asm volatile (
                     \\  movl $91, %%eax
                     \\  movl %[ptr], %%ebx
                     \\  movl %[len], %%ecx
@@ -514,8 +514,12 @@ const LinuxThreadImpl = struct {
                     \\  movl $1, %%eax
                     \\  movl $0, %%ebx
                     \\  int $128
+                    :
+                    : [ptr] "r" (@ptrToInt(self.mapped.ptr)),
+                      [len] "r" (self.mapped.len)
+                    : "memory"
                 ),
-                .x86_64 => (
+                .x86_64 => asm volatile (
                     \\  movq $11, %%rax
                     \\  movq %[ptr], %%rbx
                     \\  movq %[len], %%rcx
@@ -523,8 +527,12 @@ const LinuxThreadImpl = struct {
                     \\  movq $60, %%rax
                     \\  movq $1, %%rdi
                     \\  syscall
+                    :
+                    : [ptr] "r" (@ptrToInt(self.mapped.ptr)),
+                      [len] "r" (self.mapped.len)
+                    : "memory"
                 ),
-                .arm, .armeb, .thumb, .thumbeb => (
+                .arm, .armeb, .thumb, .thumbeb => asm volatile (
                     \\  mov r7, #91
                     \\  mov r0, %[ptr]
                     \\  mov r1, %[len]
@@ -532,8 +540,12 @@ const LinuxThreadImpl = struct {
                     \\  mov r7, #1
                     \\  mov r0, #0
                     \\  svc 0
+                    :
+                    : [ptr] "r" (@ptrToInt(self.mapped.ptr)),
+                      [len] "r" (self.mapped.len)
+                    : "memory"
                 ),
-                .aarch64, .aarch64_be, .aarch64_32 => (
+                .aarch64, .aarch64_be, .aarch64_32 => asm volatile (
                     \\  mov x8, #215
                     \\  mov x0, %[ptr]
                     \\  mov x1, %[len]
@@ -541,8 +553,12 @@ const LinuxThreadImpl = struct {
                     \\  mov x8, #93
                     \\  mov x0, #0
                     \\  svc 0
+                    :
+                    : [ptr] "r" (@ptrToInt(self.mapped.ptr)),
+                      [len] "r" (self.mapped.len)
+                    : "memory"
                 ),
-                .mips, .mipsel => (
+                .mips, .mipsel => asm volatile (
                     \\  move $sp, $25
                     \\  li $2, 4091
                     \\  move $4, %[ptr]
@@ -551,8 +567,12 @@ const LinuxThreadImpl = struct {
                     \\  li $2, 4001
                     \\  li $4, 0
                     \\  syscall
+                    :
+                    : [ptr] "r" (@ptrToInt(self.mapped.ptr)),
+                      [len] "r" (self.mapped.len)
+                    : "memory"
                 ),
-                .mips64, .mips64el => (
+                .mips64, .mips64el => asm volatile (
                     \\  li $2, 4091
                     \\  move $4, %[ptr]
                     \\  move $5, %[len]
@@ -560,8 +580,12 @@ const LinuxThreadImpl = struct {
                     \\  li $2, 4001
                     \\  li $4, 0
                     \\  syscall
+                    :
+                    : [ptr] "r" (@ptrToInt(self.mapped.ptr)),
+                      [len] "r" (self.mapped.len)
+                    : "memory"
                 ),
-                .powerpc, .powerpcle, .powerpc64, .powerpc64le => (
+                .powerpc, .powerpcle, .powerpc64, .powerpc64le => asm volatile (
                     \\  li 0, 91
                     \\  mr %[ptr], 3
                     \\  mr %[len], 4
@@ -570,8 +594,12 @@ const LinuxThreadImpl = struct {
                     \\  li 3, 0
                     \\  sc
                     \\  blr
+                    :
+                    : [ptr] "r" (@ptrToInt(self.mapped.ptr)),
+                      [len] "r" (self.mapped.len)
+                    : "memory"
                 ),
-                .riscv64 => (
+                .riscv64 => asm volatile (
                     \\  li a7, 215
                     \\  mv a0, %[ptr]
                     \\  mv a1, %[len]
@@ -579,19 +607,13 @@ const LinuxThreadImpl = struct {
                     \\  li a7, 93
                     \\  mv a0, zero
                     \\  ecall
+                    :
+                    : [ptr] "r" (@ptrToInt(self.mapped.ptr)),
+                      [len] "r" (self.mapped.len)
+                    : "memory"
                 ),
-                else => |cpu_arch| {
-                    @compileLog("Unsupported linux arch ", cpu_arch);
-                },
-            };
-
-            asm volatile (unmap_and_exit
-                :
-                : [ptr] "r" (@ptrToInt(self.mapped.ptr)),
-                  [len] "r" (self.mapped.len)
-                : "memory"
-            );
-
+                else => |cpu_arch| @compileError("Unsupported linux arch: " ++ @tagName(cpu_arch)),
+            }
             unreachable;
         }
     };
diff --git a/lib/std/atomic.zig b/lib/std/atomic.zig
index 1944e5346b..42d57eb8fa 100644
--- a/lib/std/atomic.zig
+++ b/lib/std/atomic.zig
@@ -46,34 +46,38 @@ test "fence/compilerFence" {
 
 /// Signals to the processor that the caller is inside a busy-wait spin-loop.
 pub inline fn spinLoopHint() void {
-    const hint_instruction = switch (target.cpu.arch) {
-        // No-op instruction that can hint to save (or share with a hardware-thread) pipelining/power resources
+    switch (target.cpu.arch) {
+        // No-op instruction that can hint to save (or share with a hardware-thread)
+        // pipelining/power resources
         // https://software.intel.com/content/www/us/en/develop/articles/benefitting-power-and-performance-sleep-loops.html
-        .i386, .x86_64 => "pause",
+        .i386, .x86_64 => asm volatile ("pause" ::: "memory"),
 
         // No-op instruction that serves as a hardware-thread resource yield hint.
         // https://stackoverflow.com/a/7588941
-        .powerpc64, .powerpc64le => "or 27, 27, 27",
+        .powerpc64, .powerpc64le => asm volatile ("or 27, 27, 27" ::: "memory"),
 
-        // `isb` appears more reliable for releasing execution resources than `yield` on common aarch64 CPUs.
+        // `isb` appears more reliable for releasing execution resources than `yield`
+        // on common aarch64 CPUs.
         // https://bugs.java.com/bugdatabase/view_bug.do?bug_id=8258604
         // https://bugs.mysql.com/bug.php?id=100664
-        .aarch64, .aarch64_be, .aarch64_32 => "isb",
+        .aarch64, .aarch64_be, .aarch64_32 => asm volatile ("isb" ::: "memory"),
 
         // `yield` was introduced in v6k but is also available on v6m.
         // https://www.keil.com/support/man/docs/armasm/armasm_dom1361289926796.htm
-        .arm, .armeb, .thumb, .thumbeb => blk: {
-            const can_yield = comptime std.Target.arm.featureSetHasAny(target.cpu.features, .{ .has_v6k, .has_v6m });
-            const instruction = if (can_yield) "yield" else "";
-            break :blk instruction;
+        .arm, .armeb, .thumb, .thumbeb => {
+            const can_yield = comptime std.Target.arm.featureSetHasAny(target.cpu.features, .{
+                .has_v6k, .has_v6m,
+            });
+            if (can_yield) {
+                asm volatile ("yield" ::: "memory");
+            } else {
+                asm volatile ("" ::: "memory");
+            }
         },
-
-        else => "",
-    };
-
-    // Memory barrier to prevent the compiler from optimizing away the spin-loop
-    // even if no hint_instruction was provided.
-    asm volatile (hint_instruction ::: "memory");
+        // Memory barrier to prevent the compiler from optimizing away the spin-loop
+        // even if no hint_instruction was provided.
+        else => asm volatile ("" ::: "memory"),
+    }
 }
 
 test "spinLoopHint" {
diff --git a/lib/std/atomic/Atomic.zig b/lib/std/atomic/Atomic.zig
index 80fb1ae297..f4e3ebda9d 100644
--- a/lib/std/atomic/Atomic.zig
+++ b/lib/std/atomic/Atomic.zig
@@ -178,26 +178,78 @@ pub fn Atomic(comptime T: type) type {
             ) u1 {
                 // x86 supports dedicated bitwise instructions
                 if (comptime target.cpu.arch.isX86() and @sizeOf(T) >= 2 and @sizeOf(T) <= 8) {
-                    const instruction = switch (op) {
-                        .Set => "lock bts",
-                        .Reset => "lock btr",
-                        .Toggle => "lock btc",
-                    };
-
-                    const suffix = switch (@sizeOf(T)) {
-                        2 => "w",
-                        4 => "l",
-                        8 => "q",
+                    const old_bit: u8 = switch (@sizeOf(T)) {
+                        2 => switch (op) {
+                            .Set => asm volatile ("lock btsw %[bit], %[ptr]"
+                                // LLVM doesn't support u1 flag register return values
+                                : [result] "={@ccc}" (-> u8)
+                                : [ptr] "*p" (&self.value),
+                                  [bit] "X" (@as(T, bit))
+                                : "cc", "memory"
+                            ),
+                            .Reset => asm volatile ("lock btrw %[bit], %[ptr]"
+                                // LLVM doesn't support u1 flag register return values
+                                : [result] "={@ccc}" (-> u8)
+                                : [ptr] "*p" (&self.value),
+                                  [bit] "X" (@as(T, bit))
+                                : "cc", "memory"
+                            ),
+                            .Toggle => asm volatile ("lock btcw %[bit], %[ptr]"
+                                // LLVM doesn't support u1 flag register return values
+                                : [result] "={@ccc}" (-> u8)
+                                : [ptr] "*p" (&self.value),
+                                  [bit] "X" (@as(T, bit))
+                                : "cc", "memory"
+                            ),
+                        },
+                        4 => switch (op) {
+                            .Set => asm volatile ("lock btsl %[bit], %[ptr]"
+                                // LLVM doesn't support u1 flag register return values
+                                : [result] "={@ccc}" (-> u8)
+                                : [ptr] "*p" (&self.value),
+                                  [bit] "X" (@as(T, bit))
+                                : "cc", "memory"
+                            ),
+                            .Reset => asm volatile ("lock btrl %[bit], %[ptr]"
+                                // LLVM doesn't support u1 flag register return values
+                                : [result] "={@ccc}" (-> u8)
+                                : [ptr] "*p" (&self.value),
+                                  [bit] "X" (@as(T, bit))
+                                : "cc", "memory"
+                            ),
+                            .Toggle => asm volatile ("lock btcl %[bit], %[ptr]"
+                                // LLVM doesn't support u1 flag register return values
+                                : [result] "={@ccc}" (-> u8)
+                                : [ptr] "*p" (&self.value),
+                                  [bit] "X" (@as(T, bit))
+                                : "cc", "memory"
+                            ),
+                        },
+                        8 => switch (op) {
+                            .Set => asm volatile ("lock btsq %[bit], %[ptr]"
+                                // LLVM doesn't support u1 flag register return values
+                                : [result] "={@ccc}" (-> u8)
+                                : [ptr] "*p" (&self.value),
+                                  [bit] "X" (@as(T, bit))
+                                : "cc", "memory"
+                            ),
+                            .Reset => asm volatile ("lock btrq %[bit], %[ptr]"
+                                // LLVM doesn't support u1 flag register return values
+                                : [result] "={@ccc}" (-> u8)
+                                : [ptr] "*p" (&self.value),
+                                  [bit] "X" (@as(T, bit))
+                                : "cc", "memory"
+                            ),
+                            .Toggle => asm volatile ("lock btcq %[bit], %[ptr]"
+                                // LLVM doesn't support u1 flag register return values
+                                : [result] "={@ccc}" (-> u8)
+                                : [ptr] "*p" (&self.value),
+                                  [bit] "X" (@as(T, bit))
+                                : "cc", "memory"
+                            ),
+                        },
                         else => @compileError("Invalid atomic type " ++ @typeName(T)),
                     };
-
-                    const old_bit = asm volatile (instruction ++ suffix ++ " %[bit], %[ptr]"
-                        : [result] "={@ccc}" (-> u8) // LLVM doesn't support u1 flag register return values
-                        : [ptr] "*p" (&self.value),
-                          [bit] "X" (@as(T, bit))
-                        : "cc", "memory"
-                    );
-
                     return @intCast(u1, old_bit);
                 }
 
diff --git a/src/AstGen.zig b/src/AstGen.zig
index cbd918ecc7..31e7f040a2 100644
--- a/src/AstGen.zig
+++ b/src/AstGen.zig
@@ -6601,7 +6601,7 @@ fn asmExpr(
     const asm_source = switch (node_tags[full.ast.template]) {
         .string_literal => try astgen.strLitAsString(main_tokens[full.ast.template]),
         .multiline_string_literal => try astgen.strLitNodeAsString(full.ast.template),
-        else => return astgen.failNode(node, "assembly code must use string literal syntax", .{}),
+        else => return astgen.failNode(full.ast.template, "assembly code must use string literal syntax", .{}),
     };
 
     // See https://github.com/ziglang/zig/issues/215 and related issues discussing
diff --git a/src/Liveness.zig b/src/Liveness.zig
index 79fc0d7325..2c226122bf 100644
--- a/src/Liveness.zig
+++ b/src/Liveness.zig
@@ -24,6 +24,11 @@ const Log2Int = std.math.Log2Int;
 tomb_bits: []usize,
 /// Sparse table of specially handled instructions. The value is an index into the `extra`
 /// array. The meaning of the data depends on the AIR tag.
+///  * `cond_br` - points to a `CondBr` in `extra` at this index.
+///  * `switch_br` - points to a `SwitchBr` in `extra` at this index.
+///  * `asm`, `call` - the value is a set of bits which are the extra tomb bits of operands.
+///    The main tomb bits are still used and the extra ones are starting with the lsb of the
+///    value here.
 special: std.AutoHashMapUnmanaged(Air.Inst.Index, u32),
 /// Auxilliary data. The way this data is interpreted is determined contextually.
 extra: []const u32,
@@ -67,6 +72,8 @@ pub fn analyze(gpa: *Allocator, air: Air, zir: Zir) Allocator.Error!Liveness {
     defer a.extra.deinit(gpa);
     defer a.table.deinit(gpa);
 
+    std.mem.set(usize, a.tomb_bits, 0);
+
     const main_body = air.getMainBody();
     try a.table.ensureTotalCapacity(gpa, @intCast(u32, main_body.len));
     try analyzeWithContext(&a, null, main_body);
@@ -103,7 +110,7 @@ pub fn clearOperandDeath(l: Liveness, inst: Air.Inst.Index, operand: OperandInt)
     const usize_index = (inst * bpi) / @bitSizeOf(usize);
     const mask = @as(usize, 1) <<
         @intCast(Log2Int(usize), (inst % (@bitSizeOf(usize) / bpi)) * bpi + operand);
-    l.tomb_bits[usize_index] |= mask;
+    l.tomb_bits[usize_index] &= ~mask;
 }
 
 /// Higher level API.
@@ -298,7 +305,17 @@ fn analyzeInst(
                 std.mem.copy(Air.Inst.Ref, buf[1..], args);
                 return trackOperands(a, new_set, inst, main_tomb, buf);
             }
-            @panic("TODO: liveness analysis for function call with greater than 2 args");
+            var extra_tombs: ExtraTombs = .{
+                .analysis = a,
+                .new_set = new_set,
+                .inst = inst,
+                .main_tomb = main_tomb,
+            };
+            try extra_tombs.feed(callee);
+            for (args) |arg| {
+                try extra_tombs.feed(arg);
+            }
+            return extra_tombs.finish();
         },
         .struct_field_ptr => {
             const extra = a.air.extraData(Air.StructField, inst_datas[inst].ty_pl.payload).data;
@@ -317,7 +334,19 @@ fn analyzeInst(
                 std.mem.copy(Air.Inst.Ref, buf[outputs.len..], args);
                 return trackOperands(a, new_set, inst, main_tomb, buf);
             }
-            @panic("TODO: liveness analysis for asm with greater than 3 args");
+            var extra_tombs: ExtraTombs = .{
+                .analysis = a,
+                .new_set = new_set,
+                .inst = inst,
+                .main_tomb = main_tomb,
+            };
+            for (outputs) |output| {
+                try extra_tombs.feed(output);
+            }
+            for (args) |arg| {
+                try extra_tombs.feed(arg);
+            }
+            return extra_tombs.finish();
         },
         .block => {
             const extra = a.air.extraData(Air.Block, inst_datas[inst].ty_pl.payload);
@@ -531,3 +560,40 @@ fn trackOperands(
     }
     a.storeTombBits(inst, tomb_bits);
 }
+
+const ExtraTombs = struct {
+    analysis: *Analysis,
+    new_set: ?*std.AutoHashMapUnmanaged(Air.Inst.Index, void),
+    inst: Air.Inst.Index,
+    main_tomb: bool,
+    bit_index: usize = 0,
+    tomb_bits: Bpi = 0,
+    big_tomb_bits: u32 = 0,
+
+    fn feed(et: *ExtraTombs, op_ref: Air.Inst.Ref) !void {
+        const this_bit_index = et.bit_index;
+        assert(this_bit_index < 32); // TODO mechanism for when there are greater than 32 operands
+        et.bit_index += 1;
+        const gpa = et.analysis.gpa;
+        const op_int = @enumToInt(op_ref);
+        if (op_int < Air.Inst.Ref.typed_value_map.len) return;
+        const op_index: Air.Inst.Index = op_int - @intCast(u32, Air.Inst.Ref.typed_value_map.len);
+        const prev = try et.analysis.table.fetchPut(gpa, op_index, {});
+        if (prev == null) {
+            // Death.
+            if (et.new_set) |ns| try ns.putNoClobber(gpa, op_index, {});
+            if (this_bit_index < bpi - 1) {
+                et.tomb_bits |= @as(Bpi, 1) << @intCast(OperandInt, this_bit_index);
+            } else {
+                const big_bit_index = this_bit_index - (bpi - 1);
+                et.big_tomb_bits |= @as(u32, 1) << @intCast(u5, big_bit_index);
+            }
+        }
+    }
+
+    fn finish(et: *ExtraTombs) !void {
+        et.tomb_bits |= @as(Bpi, @boolToInt(et.main_tomb)) << (bpi - 1);
+        et.analysis.storeTombBits(et.inst, et.tomb_bits);
+        try et.analysis.special.put(et.analysis.gpa, et.inst, et.big_tomb_bits);
+    }
+};
diff --git a/src/Sema.zig b/src/Sema.zig
index 777619dc48..79f1ed0614 100644
--- a/src/Sema.zig
+++ b/src/Sema.zig
@@ -258,24 +258,24 @@ pub fn analyzeBody(
             .slice_sentinel               => try sema.zirSliceSentinel(block, inst),
             .slice_start                  => try sema.zirSliceStart(block, inst),
             .str                          => try sema.zirStr(block, inst),
-            //.switch_block                 => try sema.zirSwitchBlock(block, inst, false, .none),
-            //.switch_block_multi           => try sema.zirSwitchBlockMulti(block, inst, false, .none),
-            //.switch_block_else            => try sema.zirSwitchBlock(block, inst, false, .@"else"),
-            //.switch_block_else_multi      => try sema.zirSwitchBlockMulti(block, inst, false, .@"else"),
-            //.switch_block_under           => try sema.zirSwitchBlock(block, inst, false, .under),
-            //.switch_block_under_multi     => try sema.zirSwitchBlockMulti(block, inst, false, .under),
-            //.switch_block_ref             => try sema.zirSwitchBlock(block, inst, true, .none),
-            //.switch_block_ref_multi       => try sema.zirSwitchBlockMulti(block, inst, true, .none),
-            //.switch_block_ref_else        => try sema.zirSwitchBlock(block, inst, true, .@"else"),
-            //.switch_block_ref_else_multi  => try sema.zirSwitchBlockMulti(block, inst, true, .@"else"),
-            //.switch_block_ref_under       => try sema.zirSwitchBlock(block, inst, true, .under),
-            //.switch_block_ref_under_multi => try sema.zirSwitchBlockMulti(block, inst, true, .under),
-            //.switch_capture               => try sema.zirSwitchCapture(block, inst, false, false),
-            //.switch_capture_ref           => try sema.zirSwitchCapture(block, inst, false, true),
-            //.switch_capture_multi         => try sema.zirSwitchCapture(block, inst, true, false),
-            //.switch_capture_multi_ref     => try sema.zirSwitchCapture(block, inst, true, true),
-            //.switch_capture_else          => try sema.zirSwitchCaptureElse(block, inst, false),
-            //.switch_capture_else_ref      => try sema.zirSwitchCaptureElse(block, inst, true),
+            .switch_block                 => try sema.zirSwitchBlock(block, inst, false, .none),
+            .switch_block_multi           => try sema.zirSwitchBlockMulti(block, inst, false, .none),
+            .switch_block_else            => try sema.zirSwitchBlock(block, inst, false, .@"else"),
+            .switch_block_else_multi      => try sema.zirSwitchBlockMulti(block, inst, false, .@"else"),
+            .switch_block_under           => try sema.zirSwitchBlock(block, inst, false, .under),
+            .switch_block_under_multi     => try sema.zirSwitchBlockMulti(block, inst, false, .under),
+            .switch_block_ref             => try sema.zirSwitchBlock(block, inst, true, .none),
+            .switch_block_ref_multi       => try sema.zirSwitchBlockMulti(block, inst, true, .none),
+            .switch_block_ref_else        => try sema.zirSwitchBlock(block, inst, true, .@"else"),
+            .switch_block_ref_else_multi  => try sema.zirSwitchBlockMulti(block, inst, true, .@"else"),
+            .switch_block_ref_under       => try sema.zirSwitchBlock(block, inst, true, .under),
+            .switch_block_ref_under_multi => try sema.zirSwitchBlockMulti(block, inst, true, .under),
+            .switch_capture               => try sema.zirSwitchCapture(block, inst, false, false),
+            .switch_capture_ref           => try sema.zirSwitchCapture(block, inst, false, true),
+            .switch_capture_multi         => try sema.zirSwitchCapture(block, inst, true, false),
+            .switch_capture_multi_ref     => try sema.zirSwitchCapture(block, inst, true, true),
+            .switch_capture_else          => try sema.zirSwitchCaptureElse(block, inst, false),
+            .switch_capture_else_ref      => try sema.zirSwitchCaptureElse(block, inst, true),
             .type_info                    => try sema.zirTypeInfo(block, inst),
             .size_of                      => try sema.zirSizeOf(block, inst),
             .bit_size_of                  => try sema.zirBitSizeOf(block, inst),
@@ -534,7 +534,6 @@ pub fn analyzeBody(
                     return break_inst;
                 }
             },
-            else => |t| @panic(@tagName(t)),
         };
         if (sema.typeOf(air_inst).isNoReturn())
             return always_noreturn;
@@ -4110,8 +4109,8 @@ fn analyzeSwitch(
                 const body = sema.code.extra[extra_index..][0..body_len];
                 extra_index += body_len;
 
+                const item = sema.resolveInst(item_ref);
                 // Validation above ensured these will succeed.
-                const item = sema.resolveInst(item_ref) catch unreachable;
                 const item_val = sema.resolveConstValue(&child_block, .unneeded, item) catch unreachable;
                 if (operand_val.eql(item_val)) {
                     return sema.resolveBlockBody(block, src, &child_block, body, merges);
@@ -4132,9 +4131,9 @@ fn analyzeSwitch(
                 const body = sema.code.extra[extra_index + 2 * ranges_len ..][0..body_len];
 
                 for (items) |item_ref| {
+                    const item = sema.resolveInst(item_ref);
                     // Validation above ensured these will succeed.
-                    const item = sema.resolveInst(item_ref) catch unreachable;
-                    const item_val = sema.resolveConstValue(&child_block, item.src, item) catch unreachable;
+                    const item_val = sema.resolveConstValue(&child_block, .unneeded, item) catch unreachable;
                     if (operand_val.eql(item_val)) {
                         return sema.resolveBlockBody(block, src, &child_block, body, merges);
                     }
@@ -4171,156 +4170,157 @@ fn analyzeSwitch(
 
     // TODO when reworking AIR memory layout make multi cases get generated as cases,
     // not as part of the "else" block.
-    const cases = try sema.arena.alloc(Inst.SwitchBr.Case, scalar_cases_len);
+    return mod.fail(&block.base, src, "TODO rework runtime switch Sema", .{});
+    //const cases = try sema.arena.alloc(Inst.SwitchBr.Case, scalar_cases_len);
 
-    var case_block = child_block.makeSubBlock();
-    case_block.runtime_loop = null;
-    case_block.runtime_cond = operand.src;
-    case_block.runtime_index += 1;
-    defer case_block.instructions.deinit(gpa);
+    //var case_block = child_block.makeSubBlock();
+    //case_block.runtime_loop = null;
+    //case_block.runtime_cond = operand.src;
+    //case_block.runtime_index += 1;
+    //defer case_block.instructions.deinit(gpa);
 
-    var extra_index: usize = special.end;
+    //var extra_index: usize = special.end;
 
-    var scalar_i: usize = 0;
-    while (scalar_i < scalar_cases_len) : (scalar_i += 1) {
-        const item_ref = @intToEnum(Zir.Inst.Ref, sema.code.extra[extra_index]);
-        extra_index += 1;
-        const body_len = sema.code.extra[extra_index];
-        extra_index += 1;
-        const body = sema.code.extra[extra_index..][0..body_len];
-        extra_index += body_len;
+    //var scalar_i: usize = 0;
+    //while (scalar_i < scalar_cases_len) : (scalar_i += 1) {
+    //    const item_ref = @intToEnum(Zir.Inst.Ref, sema.code.extra[extra_index]);
+    //    extra_index += 1;
+    //    const body_len = sema.code.extra[extra_index];
+    //    extra_index += 1;
+    //    const body = sema.code.extra[extra_index..][0..body_len];
+    //    extra_index += body_len;
 
-        case_block.instructions.shrinkRetainingCapacity(0);
-        // We validate these above; these two calls are guaranteed to succeed.
-        const item = sema.resolveInst(item_ref) catch unreachable;
-        const item_val = sema.resolveConstValue(&case_block, .unneeded, item) catch unreachable;
+    //    case_block.instructions.shrinkRetainingCapacity(0);
+    //    const item = sema.resolveInst(item_ref);
+    //    // We validate these above; these two calls are guaranteed to succeed.
+    //    const item_val = sema.resolveConstValue(&case_block, .unneeded, item) catch unreachable;
 
-        _ = try sema.analyzeBody(&case_block, body);
+    //    _ = try sema.analyzeBody(&case_block, body);
 
-        cases[scalar_i] = .{
-            .item = item_val,
-            .body = .{ .instructions = try sema.arena.dupe(Air.Inst.Index, case_block.instructions.items) },
-        };
-    }
+    //    cases[scalar_i] = .{
+    //        .item = item_val,
+    //        .body = .{ .instructions = try sema.arena.dupe(Air.Inst.Index, case_block.instructions.items) },
+    //    };
+    //}
 
-    var first_else_body: Body = undefined;
-    var prev_condbr: ?*Inst.CondBr = null;
+    //var first_else_body: Body = undefined;
+    //var prev_condbr: ?*Inst.CondBr = null;
 
-    var multi_i: usize = 0;
-    while (multi_i < multi_cases_len) : (multi_i += 1) {
-        const items_len = sema.code.extra[extra_index];
-        extra_index += 1;
-        const ranges_len = sema.code.extra[extra_index];
-        extra_index += 1;
-        const body_len = sema.code.extra[extra_index];
-        extra_index += 1;
-        const items = sema.code.refSlice(extra_index, items_len);
-        extra_index += items_len;
+    //var multi_i: usize = 0;
+    //while (multi_i < multi_cases_len) : (multi_i += 1) {
+    //    const items_len = sema.code.extra[extra_index];
+    //    extra_index += 1;
+    //    const ranges_len = sema.code.extra[extra_index];
+    //    extra_index += 1;
+    //    const body_len = sema.code.extra[extra_index];
+    //    extra_index += 1;
+    //    const items = sema.code.refSlice(extra_index, items_len);
+    //    extra_index += items_len;
 
-        case_block.instructions.shrinkRetainingCapacity(0);
+    //    case_block.instructions.shrinkRetainingCapacity(0);
 
-        var any_ok: ?Air.Inst.Index = null;
+    //    var any_ok: ?Air.Inst.Index = null;
 
-        for (items) |item_ref| {
-            const item = sema.resolveInst(item_ref);
-            _ = try sema.resolveConstValue(&child_block, item.src, item);
+    //    for (items) |item_ref| {
+    //        const item = sema.resolveInst(item_ref);
+    //        _ = try sema.resolveConstValue(&child_block, item.src, item);
 
-            const cmp_ok = try case_block.addBinOp(.cmp_eq, operand, item);
-            if (any_ok) |some| {
-                any_ok = try case_block.addBinOp(.bool_or, some, cmp_ok);
-            } else {
-                any_ok = cmp_ok;
-            }
-        }
+    //        const cmp_ok = try case_block.addBinOp(.cmp_eq, operand, item);
+    //        if (any_ok) |some| {
+    //            any_ok = try case_block.addBinOp(.bool_or, some, cmp_ok);
+    //        } else {
+    //            any_ok = cmp_ok;
+    //        }
+    //    }
 
-        var range_i: usize = 0;
-        while (range_i < ranges_len) : (range_i += 1) {
-            const first_ref = @intToEnum(Zir.Inst.Ref, sema.code.extra[extra_index]);
-            extra_index += 1;
-            const last_ref = @intToEnum(Zir.Inst.Ref, sema.code.extra[extra_index]);
-            extra_index += 1;
+    //    var range_i: usize = 0;
+    //    while (range_i < ranges_len) : (range_i += 1) {
+    //        const first_ref = @intToEnum(Zir.Inst.Ref, sema.code.extra[extra_index]);
+    //        extra_index += 1;
+    //        const last_ref = @intToEnum(Zir.Inst.Ref, sema.code.extra[extra_index]);
+    //        extra_index += 1;
 
-            const item_first = sema.resolveInst(first_ref);
-            const item_last = sema.resolveInst(last_ref);
+    //        const item_first = sema.resolveInst(first_ref);
+    //        const item_last = sema.resolveInst(last_ref);
 
-            _ = try sema.resolveConstValue(&child_block, item_first.src, item_first);
-            _ = try sema.resolveConstValue(&child_block, item_last.src, item_last);
+    //        _ = try sema.resolveConstValue(&child_block, item_first.src, item_first);
+    //        _ = try sema.resolveConstValue(&child_block, item_last.src, item_last);
 
-            // operand >= first and operand <= last
-            const range_first_ok = try case_block.addBinOp(
-                .cmp_gte,
-                operand,
-                item_first,
-            );
-            const range_last_ok = try case_block.addBinOp(
-                .cmp_lte,
-                operand,
-                item_last,
-            );
-            const range_ok = try case_block.addBinOp(
-                .bool_and,
-                range_first_ok,
-                range_last_ok,
-            );
-            if (any_ok) |some| {
-                any_ok = try case_block.addBinOp(.bool_or, some, range_ok);
-            } else {
-                any_ok = range_ok;
-            }
-        }
+    //        // operand >= first and operand <= last
+    //        const range_first_ok = try case_block.addBinOp(
+    //            .cmp_gte,
+    //            operand,
+    //            item_first,
+    //        );
+    //        const range_last_ok = try case_block.addBinOp(
+    //            .cmp_lte,
+    //            operand,
+    //            item_last,
+    //        );
+    //        const range_ok = try case_block.addBinOp(
+    //            .bool_and,
+    //            range_first_ok,
+    //            range_last_ok,
+    //        );
+    //        if (any_ok) |some| {
+    //            any_ok = try case_block.addBinOp(.bool_or, some, range_ok);
+    //        } else {
+    //            any_ok = range_ok;
+    //        }
+    //    }
 
-        const new_condbr = try sema.arena.create(Inst.CondBr);
-        new_condbr.* = .{
-            .base = .{
-                .tag = .condbr,
-                .ty = Type.initTag(.noreturn),
-                .src = src,
-            },
-            .condition = any_ok.?,
-            .then_body = undefined,
-            .else_body = undefined,
-        };
-        try case_block.instructions.append(gpa, &new_condbr.base);
+    //    const new_condbr = try sema.arena.create(Inst.CondBr);
+    //    new_condbr.* = .{
+    //        .base = .{
+    //            .tag = .condbr,
+    //            .ty = Type.initTag(.noreturn),
+    //            .src = src,
+    //        },
+    //        .condition = any_ok.?,
+    //        .then_body = undefined,
+    //        .else_body = undefined,
+    //    };
+    //    try case_block.instructions.append(gpa, &new_condbr.base);
 
-        const cond_body: Body = .{
-            .instructions = try sema.arena.dupe(Air.Inst.Index, case_block.instructions.items),
-        };
+    //    const cond_body: Body = .{
+    //        .instructions = try sema.arena.dupe(Air.Inst.Index, case_block.instructions.items),
+    //    };
 
-        case_block.instructions.shrinkRetainingCapacity(0);
-        const body = sema.code.extra[extra_index..][0..body_len];
-        extra_index += body_len;
-        _ = try sema.analyzeBody(&case_block, body);
-        new_condbr.then_body = .{
-            .instructions = try sema.arena.dupe(Air.Inst.Index, case_block.instructions.items),
-        };
-        if (prev_condbr) |condbr| {
-            condbr.else_body = cond_body;
-        } else {
-            first_else_body = cond_body;
-        }
-        prev_condbr = new_condbr;
-    }
+    //    case_block.instructions.shrinkRetainingCapacity(0);
+    //    const body = sema.code.extra[extra_index..][0..body_len];
+    //    extra_index += body_len;
+    //    _ = try sema.analyzeBody(&case_block, body);
+    //    new_condbr.then_body = .{
+    //        .instructions = try sema.arena.dupe(Air.Inst.Index, case_block.instructions.items),
+    //    };
+    //    if (prev_condbr) |condbr| {
+    //        condbr.else_body = cond_body;
+    //    } else {
+    //        first_else_body = cond_body;
+    //    }
+    //    prev_condbr = new_condbr;
+    //}
 
-    const final_else_body: Body = blk: {
-        if (special.body.len != 0) {
-            case_block.instructions.shrinkRetainingCapacity(0);
-            _ = try sema.analyzeBody(&case_block, special.body);
-            const else_body: Body = .{
-                .instructions = try sema.arena.dupe(Air.Inst.Index, case_block.instructions.items),
-            };
-            if (prev_condbr) |condbr| {
-                condbr.else_body = else_body;
-                break :blk first_else_body;
-            } else {
-                break :blk else_body;
-            }
-        } else {
-            break :blk .{ .instructions = &.{} };
-        }
-    };
+    //const final_else_body: Body = blk: {
+    //    if (special.body.len != 0) {
+    //        case_block.instructions.shrinkRetainingCapacity(0);
+    //        _ = try sema.analyzeBody(&case_block, special.body);
+    //        const else_body: Body = .{
+    //            .instructions = try sema.arena.dupe(Air.Inst.Index, case_block.instructions.items),
+    //        };
+    //        if (prev_condbr) |condbr| {
+    //            condbr.else_body = else_body;
+    //            break :blk first_else_body;
+    //        } else {
+    //            break :blk else_body;
+    //        }
+    //    } else {
+    //        break :blk .{ .instructions = &.{} };
+    //    }
+    //};
 
-    _ = try child_block.addSwitchBr(src, operand, cases, final_else_body);
-    return sema.analyzeBlockBody(block, src, &child_block, merges);
+    //_ = try child_block.addSwitchBr(src, operand, cases, final_else_body);
+    //return sema.analyzeBlockBody(block, src, &child_block, merges);
 }
 
 fn resolveSwitchItemVal(
@@ -4332,16 +4332,17 @@ fn resolveSwitchItemVal(
     range_expand: Module.SwitchProngSrc.RangeExpand,
 ) CompileError!TypedValue {
     const item = sema.resolveInst(item_ref);
+    const item_ty = sema.typeOf(item);
     // Constructing a LazySrcLoc is costly because we only have the switch AST node.
     // Only if we know for sure we need to report a compile error do we resolve the
     // full source locations.
     if (sema.resolveConstValue(block, .unneeded, item)) |val| {
-        return TypedValue{ .ty = item.ty, .val = val };
+        return TypedValue{ .ty = item_ty, .val = val };
     } else |err| switch (err) {
         error.NeededSourceLocation => {
             const src = switch_prong_src.resolve(sema.gpa, block.src_decl, switch_node_offset, range_expand);
             return TypedValue{
-                .ty = item.ty,
+                .ty = item_ty,
                 .val = try sema.resolveConstValue(block, src, item),
             };
         },
diff --git a/src/codegen.zig b/src/codegen.zig
index bc22d7ec19..11a2603aac 100644
--- a/src/codegen.zig
+++ b/src/codegen.zig
@@ -452,6 +452,43 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
             },
         };
 
+        const BigTomb = struct {
+            function: *Self,
+            inst: Air.Inst.Index,
+            tomb_bits: Liveness.Bpi,
+            big_tomb_bits: u32,
+            bit_index: usize,
+
+            fn feed(bt: *BigTomb, op_ref: Air.Inst.Ref) void {
+                const this_bit_index = bt.bit_index;
+                bt.bit_index += 1;
+
+                const op_int = @enumToInt(op_ref);
+                if (op_int < Air.Inst.Ref.typed_value_map.len) return;
+                const op_index = @intCast(Air.Inst.Index, op_int - Air.Inst.Ref.typed_value_map.len);
+
+                if (this_bit_index < Liveness.bpi - 1) {
+                    const dies = @truncate(u1, bt.tomb_bits >> @intCast(Liveness.OperandInt, this_bit_index)) != 0;
+                    if (!dies) return;
+                } else {
+                    const big_bit_index = @intCast(u5, this_bit_index - (Liveness.bpi - 1));
+                    const dies = @truncate(u1, bt.big_tomb_bits >> big_bit_index) != 0;
+                    if (!dies) return;
+                }
+                bt.function.processDeath(op_index);
+            }
+
+            fn finishAir(bt: *BigTomb, result: MCValue) void {
+                const is_used = !bt.function.liveness.isUnused(bt.inst);
+                if (is_used) {
+                    log.debug("{} => {}", .{ bt.inst, result });
+                    const branch = &bt.function.branch_stack.items[bt.function.branch_stack.items.len - 1];
+                    branch.inst_table.putAssumeCapacityNoClobber(bt.inst, result);
+                }
+                bt.function.finishAirBookkeeping();
+            }
+        };
+
         const Self = @This();
 
         fn generate(
@@ -921,8 +958,8 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
                 if (!dies) continue;
                 const op_int = @enumToInt(op);
                 if (op_int < Air.Inst.Ref.typed_value_map.len) continue;
-                const operand: Air.Inst.Index = op_int - @intCast(u32, Air.Inst.Ref.typed_value_map.len);
-                self.processDeath(operand);
+                const op_index = @intCast(Air.Inst.Index, op_int - Air.Inst.Ref.typed_value_map.len);
+                self.processDeath(op_index);
             }
             const is_used = @truncate(u1, tomb_bits) == 0;
             if (is_used) {
@@ -2739,7 +2776,12 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
                 std.mem.copy(Air.Inst.Ref, buf[1..], args);
                 return self.finishAir(inst, result, buf);
             }
-            @panic("TODO: codegen for function call with greater than 2 args");
+            var bt = try self.iterateBigTomb(inst, 1 + args.len);
+            bt.feed(callee);
+            for (args) |arg| {
+                bt.feed(arg);
+            }
+            return bt.finishAir(result);
         }
 
         fn airRef(self: *Self, inst: Air.Inst.Index) !void {
@@ -3651,7 +3693,25 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
                 std.mem.copy(Air.Inst.Ref, buf[outputs.len..], args);
                 return self.finishAir(inst, result, buf);
             }
-            @panic("TODO: codegen for asm with greater than 3 args");
+            var bt = try self.iterateBigTomb(inst, outputs.len + args.len);
+            for (outputs) |output| {
+                bt.feed(output);
+            }
+            for (args) |arg| {
+                bt.feed(arg);
+            }
+            return bt.finishAir(result);
+        }
+
+        fn iterateBigTomb(self: *Self, inst: Air.Inst.Index, operand_count: usize) !BigTomb {
+            try self.ensureProcessDeathCapacity(operand_count + 1);
+            return BigTomb{
+                .function = self,
+                .inst = inst,
+                .tomb_bits = self.liveness.getTombBits(inst),
+                .big_tomb_bits = self.liveness.special.get(inst) orelse 0,
+                .bit_index = 0,
+            };
         }
 
         /// Sets the value without any modifications to register allocation metadata or stack allocation metadata.
@@ -4492,7 +4552,11 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
             // First section of indexes correspond to a set number of constant values.
             const ref_int = @enumToInt(inst);
             if (ref_int < Air.Inst.Ref.typed_value_map.len) {
-                return self.genTypedValue(Air.Inst.Ref.typed_value_map[ref_int]);
+                const tv = Air.Inst.Ref.typed_value_map[ref_int];
+                if (!tv.ty.hasCodeGenBits()) {
+                    return MCValue{ .none = {} };
+                }
+                return self.genTypedValue(tv);
             }
 
             // If the type has no codegen bits, no need to store it.
diff --git a/src/print_air.zig b/src/print_air.zig
index 44c170a078..76159d0796 100644
--- a/src/print_air.zig
+++ b/src/print_air.zig
@@ -89,7 +89,7 @@ const Writer = struct {
             if (w.liveness.isUnused(inst)) {
                 try s.writeAll(") unused\n");
             } else {
-                try s.writeAll("\n");
+                try s.writeAll(")\n");
             }
         }
     }