From 0395b35cee8d4082cc40b0dcd0298f797f42309d Mon Sep 17 00:00:00 2001
From: Andrew Kelley <andrew@ziglang.org>
Date: Tue, 14 Sep 2021 21:58:22 -0700
Subject: [PATCH] stage2: implement cmpxchg and improve comptime eval

 * Implement Sema for `@cmpxchgWeak` and `@cmpxchgStrong`. Both runtime
   and comptime codepaths are implement.
 * Implement Codegen for LLVM backend and C backend.
 * Add LazySrcLoc.node_offset_builtin_call_argX 3...5
 * Sema: rework comptime control flow.
   - `error.ComptimeReturn` is used to signal that a comptime function
     call has returned a result (stored in the Inlining struct).
     `analyzeCall` notices this and handles the result.
   - The ZIR instructions `break_inline`, `block_inline`,
     `condbr_inline` are now redundant and can be deleted. `break`,
     `block`, and `condbr` function equivalently inside a comptime scope.
   - The ZIR instructions `loop` and `repeat` also are modified to
     directly perform comptime control flow inside a comptime scope,
     skipping an unnecessary mechanism for analysis of runtime code.
     This makes Zig perform closer to an interpreter when evaluating
     comptime code.
 * Sema: zirRetErrValue looks at Sema.ret_fn_ty rather than sema.func
   for adding to the inferred error set. This fixes a bug for
    inlined/comptime function calls.
 * Implement ZIR printing for cmpxchg.
 * stage1: make cmpxchg respect --single-threaded
   - Our LLVM C++ API wrapper failed to expose this boolean flag before.
 * Fix AIR printing for struct fields showing incorrect liveness data.
---
 src/Air.zig                      |  23 +++
 src/AstGen.zig                   |  11 +-
 src/Liveness.zig                 |   4 +
 src/Module.zig                   |  94 ++++++-----
 src/Sema.zig                     | 258 +++++++++++++++++++++++++++----
 src/Zir.zig                      |  25 ++-
 src/codegen.zig                  |  13 ++
 src/codegen/c.zig                |  39 +++++
 src/codegen/llvm.zig             |  93 +++++++++++
 src/codegen/llvm/bindings.zig    |  39 +++++
 src/link/C/zig.h                 |  12 ++
 src/print_air.zig                |  17 +-
 src/stage1/codegen.cpp           |   2 +-
 src/target.zig                   |  69 +++++++++
 src/type.zig                     |  29 ++++
 src/zig_llvm.cpp                 |  23 +--
 src/zig_llvm.h                   |   2 +-
 test/behavior/atomics.zig        |  22 +++
 test/behavior/atomics_stage1.zig |  22 ---
 19 files changed, 682 insertions(+), 115 deletions(-)

diff --git a/src/Air.zig b/src/Air.zig
index bc939f6b04..29deb9a523 100644
--- a/src/Air.zig
+++ b/src/Air.zig
@@ -309,6 +309,10 @@ pub const Inst = struct {
         /// Given a pointer to an array, return a slice.
         /// Uses the `ty_op` field.
         array_to_slice,
+        /// Uses the `ty_pl` field with payload `Cmpxchg`.
+        cmpxchg_weak,
+        /// Uses the `ty_pl` field with payload `Cmpxchg`.
+        cmpxchg_strong,
 
         pub fn fromCmpOp(op: std.math.CompareOperator) Tag {
             return switch (op) {
@@ -443,6 +447,23 @@ pub const Asm = struct {
     zir_index: u32,
 };
 
+pub const Cmpxchg = struct {
+    ptr: Inst.Ref,
+    expected_value: Inst.Ref,
+    new_value: Inst.Ref,
+    /// 0b00000000000000000000000000000XXX - success_order
+    /// 0b00000000000000000000000000XXX000 - failure_order
+    flags: u32,
+
+    pub fn successOrder(self: Cmpxchg) std.builtin.AtomicOrder {
+        return @intToEnum(std.builtin.AtomicOrder, @truncate(u3, self.flags));
+    }
+
+    pub fn failureOrder(self: Cmpxchg) std.builtin.AtomicOrder {
+        return @intToEnum(std.builtin.AtomicOrder, @truncate(u3, self.flags >> 3));
+    }
+};
+
 pub fn getMainBody(air: Air) []const Air.Inst.Index {
     const body_index = air.extra[@enumToInt(ExtraIndex.main_block)];
     const extra = air.extraData(Block, body_index);
@@ -507,6 +528,8 @@ pub fn typeOfIndex(air: Air, inst: Air.Inst.Index) Type {
         .struct_field_ptr,
         .struct_field_val,
         .ptr_elem_ptr,
+        .cmpxchg_weak,
+        .cmpxchg_strong,
         => return air.getRefType(datas[inst].ty_pl.ty),
 
         .not,
diff --git a/src/AstGen.zig b/src/AstGen.zig
index a8b7586f6e..b9d7d6f5be 100644
--- a/src/AstGen.zig
+++ b/src/AstGen.zig
@@ -7542,6 +7542,7 @@ fn cmpxchg(
     tag: Zir.Inst.Tag,
 ) InnerError!Zir.Inst.Ref {
     const int_type = try typeExpr(gz, scope, params[0]);
+    // TODO: allow this to be volatile
     const ptr_type = try gz.add(.{ .tag = .ptr_type_simple, .data = .{
         .ptr_type_simple = .{
             .is_allowzero = false,
@@ -7553,11 +7554,11 @@ fn cmpxchg(
     } });
     const result = try gz.addPlNode(tag, node, Zir.Inst.Cmpxchg{
         // zig fmt: off
-        .ptr            = try expr(gz, scope, .{ .ty = ptr_type },           params[1]),
-        .expected_value = try expr(gz, scope, .{ .ty = int_type },           params[2]),
-        .new_value      = try expr(gz, scope, .{ .ty = int_type },           params[3]),
-        .success_order  = try expr(gz, scope, .{ .ty = .atomic_order_type }, params[4]),
-        .fail_order     = try expr(gz, scope, .{ .ty = .atomic_order_type }, params[5]),
+        .ptr            = try expr(gz, scope, .{ .coerced_ty = ptr_type },           params[1]),
+        .expected_value = try expr(gz, scope, .{ .coerced_ty = int_type },           params[2]),
+        .new_value      = try expr(gz, scope, .{ .coerced_ty = int_type },           params[3]),
+        .success_order  = try expr(gz, scope, .{ .coerced_ty = .atomic_order_type }, params[4]),
+        .failure_order  = try expr(gz, scope, .{ .coerced_ty = .atomic_order_type }, params[5]),
         // zig fmt: on
     });
     return rvalue(gz, rl, result, node);
diff --git a/src/Liveness.zig b/src/Liveness.zig
index f33c44d592..5d8e3eed34 100644
--- a/src/Liveness.zig
+++ b/src/Liveness.zig
@@ -340,6 +340,10 @@ fn analyzeInst(
             const extra = a.air.extraData(Air.Bin, inst_datas[inst].ty_pl.payload).data;
             return trackOperands(a, new_set, inst, main_tomb, .{ extra.lhs, extra.rhs, .none });
         },
+        .cmpxchg_strong, .cmpxchg_weak => {
+            const extra = a.air.extraData(Air.Cmpxchg, inst_datas[inst].ty_pl.payload).data;
+            return trackOperands(a, new_set, inst, main_tomb, .{ extra.ptr, extra.expected_value, extra.new_value });
+        },
         .br => {
             const br = inst_datas[inst].br;
             return trackOperands(a, new_set, inst, main_tomb, .{ br.operand, .none, .none });
diff --git a/src/Module.zig b/src/Module.zig
index 07b86c0d51..ec3bb2bbd3 100644
--- a/src/Module.zig
+++ b/src/Module.zig
@@ -1321,6 +1321,7 @@ pub const Scope = struct {
         /// It is shared among all the blocks in an inline or comptime called
         /// function.
         pub const Inlining = struct {
+            comptime_result: Air.Inst.Ref,
             merges: Merges,
         };
 
@@ -1643,36 +1644,12 @@ pub const SrcLoc = struct {
                 const token_starts = tree.tokens.items(.start);
                 return token_starts[tok_index];
             },
-            .node_offset_builtin_call_arg0 => |node_off| {
-                const tree = try src_loc.file_scope.getTree(gpa);
-                const node_datas = tree.nodes.items(.data);
-                const node_tags = tree.nodes.items(.tag);
-                const node = src_loc.declRelativeToNodeIndex(node_off);
-                const param = switch (node_tags[node]) {
-                    .builtin_call_two, .builtin_call_two_comma => node_datas[node].lhs,
-                    .builtin_call, .builtin_call_comma => tree.extra_data[node_datas[node].lhs],
-                    else => unreachable,
-                };
-                const main_tokens = tree.nodes.items(.main_token);
-                const tok_index = main_tokens[param];
-                const token_starts = tree.tokens.items(.start);
-                return token_starts[tok_index];
-            },
-            .node_offset_builtin_call_arg1 => |node_off| {
-                const tree = try src_loc.file_scope.getTree(gpa);
-                const node_datas = tree.nodes.items(.data);
-                const node_tags = tree.nodes.items(.tag);
-                const node = src_loc.declRelativeToNodeIndex(node_off);
-                const param = switch (node_tags[node]) {
-                    .builtin_call_two, .builtin_call_two_comma => node_datas[node].rhs,
-                    .builtin_call, .builtin_call_comma => tree.extra_data[node_datas[node].lhs + 1],
-                    else => unreachable,
-                };
-                const main_tokens = tree.nodes.items(.main_token);
-                const tok_index = main_tokens[param];
-                const token_starts = tree.tokens.items(.start);
-                return token_starts[tok_index];
-            },
+            .node_offset_builtin_call_arg0 => |n| return src_loc.byteOffsetBuiltinCallArg(gpa, n, 0),
+            .node_offset_builtin_call_arg1 => |n| return src_loc.byteOffsetBuiltinCallArg(gpa, n, 1),
+            .node_offset_builtin_call_arg2 => |n| return src_loc.byteOffsetBuiltinCallArg(gpa, n, 2),
+            .node_offset_builtin_call_arg3 => |n| return src_loc.byteOffsetBuiltinCallArg(gpa, n, 3),
+            .node_offset_builtin_call_arg4 => |n| return src_loc.byteOffsetBuiltinCallArg(gpa, n, 4),
+            .node_offset_builtin_call_arg5 => |n| return src_loc.byteOffsetBuiltinCallArg(gpa, n, 5),
             .node_offset_array_access_index => |node_off| {
                 const tree = try src_loc.file_scope.getTree(gpa);
                 const node_datas = tree.nodes.items(.data);
@@ -1965,6 +1942,31 @@ pub const SrcLoc = struct {
             },
         }
     }
+
+    pub fn byteOffsetBuiltinCallArg(
+        src_loc: SrcLoc,
+        gpa: *Allocator,
+        node_off: i32,
+        arg_index: u32,
+    ) !u32 {
+        const tree = try src_loc.file_scope.getTree(gpa);
+        const node_datas = tree.nodes.items(.data);
+        const node_tags = tree.nodes.items(.tag);
+        const node = src_loc.declRelativeToNodeIndex(node_off);
+        const param = switch (node_tags[node]) {
+            .builtin_call_two, .builtin_call_two_comma => switch (arg_index) {
+                0 => node_datas[node].lhs,
+                1 => node_datas[node].rhs,
+                else => unreachable,
+            },
+            .builtin_call, .builtin_call_comma => tree.extra_data[node_datas[node].lhs + arg_index],
+            else => unreachable,
+        };
+        const main_tokens = tree.nodes.items(.main_token);
+        const tok_index = main_tokens[param];
+        const token_starts = tree.tokens.items(.start);
+        return token_starts[tok_index];
+    }
 };
 
 /// Resolving a source location into a byte offset may require doing work
@@ -2032,6 +2034,10 @@ pub const LazySrcLoc = union(enum) {
     node_offset_builtin_call_arg0: i32,
     /// Same as `node_offset_builtin_call_arg0` except arg index 1.
     node_offset_builtin_call_arg1: i32,
+    node_offset_builtin_call_arg2: i32,
+    node_offset_builtin_call_arg3: i32,
+    node_offset_builtin_call_arg4: i32,
+    node_offset_builtin_call_arg5: i32,
     /// The source location points to the index expression of an array access
     /// expression, found by taking this AST node index offset from the containing
     /// Decl AST node, which points to an array access AST node. Next, navigate
@@ -2157,6 +2163,10 @@ pub const LazySrcLoc = union(enum) {
             .node_offset_for_cond,
             .node_offset_builtin_call_arg0,
             .node_offset_builtin_call_arg1,
+            .node_offset_builtin_call_arg2,
+            .node_offset_builtin_call_arg3,
+            .node_offset_builtin_call_arg4,
+            .node_offset_builtin_call_arg5,
             .node_offset_array_access_index,
             .node_offset_slice_sentinel,
             .node_offset_call_func,
@@ -2205,6 +2215,10 @@ pub const LazySrcLoc = union(enum) {
             .node_offset_for_cond,
             .node_offset_builtin_call_arg0,
             .node_offset_builtin_call_arg1,
+            .node_offset_builtin_call_arg2,
+            .node_offset_builtin_call_arg3,
+            .node_offset_builtin_call_arg4,
+            .node_offset_builtin_call_arg5,
             .node_offset_array_access_index,
             .node_offset_slice_sentinel,
             .node_offset_call_func,
@@ -2246,6 +2260,9 @@ pub const CompileError = error{
     /// because the function is generic. This is only seen when analyzing the body of a param
     /// instruction.
     GenericPoison,
+    /// In a comptime scope, a return instruction was encountered. This error is only seen when
+    /// doing a comptime function call.
+    ComptimeReturn,
 };
 
 pub fn deinit(mod: *Module) void {
@@ -3928,8 +3945,10 @@ pub fn analyzeFnBody(mod: *Module, decl: *Decl, func: *Fn) SemaError!Air {
     log.debug("set {s} to in_progress", .{decl.name});
 
     _ = sema.analyzeBody(&inner_block, fn_info.body) catch |err| switch (err) {
+        // TODO make these unreachable instead of @panic
         error.NeededSourceLocation => @panic("zig compiler bug: NeededSourceLocation"),
         error.GenericPoison => @panic("zig compiler bug: GenericPoison"),
+        error.ComptimeReturn => @panic("zig compiler bug: ComptimeReturn"),
         else => |e| return e,
     };
 
@@ -4534,7 +4553,6 @@ pub const PeerTypeCandidateSrc = union(enum) {
         self: PeerTypeCandidateSrc,
         gpa: *Allocator,
         decl: *Decl,
-        candidates: usize,
         candidate_i: usize,
     ) ?LazySrcLoc {
         @setCold(true);
@@ -4547,12 +4565,14 @@ pub const PeerTypeCandidateSrc = union(enum) {
                 return candidate_srcs[candidate_i];
             },
             .typeof_builtin_call_node_offset => |node_offset| {
-                if (candidates <= 2) {
-                    switch (candidate_i) {
-                        0 => return LazySrcLoc{ .node_offset_builtin_call_arg0 = node_offset },
-                        1 => return LazySrcLoc{ .node_offset_builtin_call_arg1 = node_offset },
-                        else => unreachable,
-                    }
+                switch (candidate_i) {
+                    0 => return LazySrcLoc{ .node_offset_builtin_call_arg0 = node_offset },
+                    1 => return LazySrcLoc{ .node_offset_builtin_call_arg1 = node_offset },
+                    2 => return LazySrcLoc{ .node_offset_builtin_call_arg2 = node_offset },
+                    3 => return LazySrcLoc{ .node_offset_builtin_call_arg3 = node_offset },
+                    4 => return LazySrcLoc{ .node_offset_builtin_call_arg4 = node_offset },
+                    5 => return LazySrcLoc{ .node_offset_builtin_call_arg5 = node_offset },
+                    else => {},
                 }
 
                 const tree = decl.namespace.file_scope.getTree(gpa) catch |err| {
diff --git a/src/Sema.zig b/src/Sema.zig
index ebfe0361d6..de0d0b7c88 100644
--- a/src/Sema.zig
+++ b/src/Sema.zig
@@ -159,7 +159,6 @@ pub fn analyzeBody(
             .bit_or                       => try sema.zirBitwise(block, inst, .bit_or),
             .bitcast                      => try sema.zirBitcast(block, inst),
             .bitcast_result_ptr           => try sema.zirBitcastResultPtr(block, inst),
-            .block                        => try sema.zirBlock(block, inst),
             .suspend_block                => try sema.zirSuspendBlock(block, inst),
             .bool_not                     => try sema.zirBoolNot(block, inst),
             .bool_br_and                  => try sema.zirBoolBr(block, inst, false),
@@ -215,7 +214,6 @@ pub fn analyzeBody(
             .is_non_err_ptr               => try sema.zirIsNonErrPtr(block, inst),
             .is_non_null                  => try sema.zirIsNonNull(block, inst),
             .is_non_null_ptr              => try sema.zirIsNonNullPtr(block, inst),
-            .loop                         => try sema.zirLoop(block, inst),
             .merge_error_sets             => try sema.zirMergeErrorSets(block, inst),
             .negate                       => try sema.zirNegate(block, inst, .sub),
             .negate_wrap                  => try sema.zirNegate(block, inst, .subwrap),
@@ -308,8 +306,8 @@ pub fn analyzeBody(
             .shr_exact                    => try sema.zirShrExact(block, inst),
             .bit_offset_of                => try sema.zirBitOffsetOf(block, inst),
             .offset_of                    => try sema.zirOffsetOf(block, inst),
-            .cmpxchg_strong               => try sema.zirCmpxchg(block, inst),
-            .cmpxchg_weak                 => try sema.zirCmpxchg(block, inst),
+            .cmpxchg_strong               => try sema.zirCmpxchg(block, inst, .cmpxchg_strong),
+            .cmpxchg_weak                 => try sema.zirCmpxchg(block, inst, .cmpxchg_weak),
             .splat                        => try sema.zirSplat(block, inst),
             .reduce                       => try sema.zirReduce(block, inst),
             .shuffle                      => try sema.zirShuffle(block, inst),
@@ -364,16 +362,12 @@ pub fn analyzeBody(
             // Instructions that we know to *always* be noreturn based solely on their tag.
             // These functions match the return type of analyzeBody so that we can
             // tail call them here.
-            .break_inline   => return inst,
-            .condbr         => return sema.zirCondbr(block, inst),
-            .@"break"       => return sema.zirBreak(block, inst),
             .compile_error  => return sema.zirCompileError(block, inst),
             .ret_coerce     => return sema.zirRetCoerce(block, inst),
             .ret_node       => return sema.zirRetNode(block, inst),
             .ret_load       => return sema.zirRetLoad(block, inst),
             .ret_err_value  => return sema.zirRetErrValue(block, inst),
             .@"unreachable" => return sema.zirUnreachable(block, inst),
-            .repeat         => return sema.zirRepeat(block, inst),
             .panic          => return sema.zirPanic(block, inst),
             // zig fmt: on
 
@@ -499,6 +493,28 @@ pub fn analyzeBody(
             },
 
             // Special case instructions to handle comptime control flow.
+            .@"break" => {
+                if (block.is_comptime) {
+                    return inst; // same as break_inline
+                } else {
+                    return sema.zirBreak(block, inst);
+                }
+            },
+            .break_inline => return inst,
+            .repeat => {
+                if (block.is_comptime) {
+                    // Send comptime control flow back to the beginning of this block.
+                    const src: LazySrcLoc = .{ .node_offset = datas[inst].node };
+                    try sema.emitBackwardBranch(block, src);
+                    i = 0;
+                    continue;
+                } else {
+                    const src_node = sema.code.instructions.items(.data)[inst].node;
+                    const src: LazySrcLoc = .{ .node_offset = src_node };
+                    try sema.requireRuntimeBlock(block, src);
+                    return always_noreturn;
+                }
+            },
             .repeat_inline => {
                 // Send comptime control flow back to the beginning of this block.
                 const src: LazySrcLoc = .{ .node_offset = datas[inst].node };
@@ -506,6 +522,34 @@ pub fn analyzeBody(
                 i = 0;
                 continue;
             },
+            .loop => blk: {
+                if (!block.is_comptime) break :blk try sema.zirLoop(block, inst);
+                // Same as `block_inline`. TODO https://github.com/ziglang/zig/issues/8220
+                const inst_data = datas[inst].pl_node;
+                const extra = sema.code.extraData(Zir.Inst.Block, inst_data.payload_index);
+                const inline_body = sema.code.extra[extra.end..][0..extra.data.body_len];
+                const break_inst = try sema.analyzeBody(block, inline_body);
+                const break_data = datas[break_inst].@"break";
+                if (inst == break_data.block_inst) {
+                    break :blk sema.resolveInst(break_data.operand);
+                } else {
+                    return break_inst;
+                }
+            },
+            .block => blk: {
+                if (!block.is_comptime) break :blk try sema.zirBlock(block, inst);
+                // Same as `block_inline`. TODO https://github.com/ziglang/zig/issues/8220
+                const inst_data = datas[inst].pl_node;
+                const extra = sema.code.extraData(Zir.Inst.Block, inst_data.payload_index);
+                const inline_body = sema.code.extra[extra.end..][0..extra.data.body_len];
+                const break_inst = try sema.analyzeBody(block, inline_body);
+                const break_data = datas[break_inst].@"break";
+                if (inst == break_data.block_inst) {
+                    break :blk sema.resolveInst(break_data.operand);
+                } else {
+                    return break_inst;
+                }
+            },
             .block_inline => blk: {
                 // Directly analyze the block body without introducing a new block.
                 const inst_data = datas[inst].pl_node;
@@ -519,6 +563,24 @@ pub fn analyzeBody(
                     return break_inst;
                 }
             },
+            .condbr => blk: {
+                if (!block.is_comptime) return sema.zirCondbr(block, inst);
+                // Same as condbr_inline. TODO https://github.com/ziglang/zig/issues/8220
+                const inst_data = datas[inst].pl_node;
+                const cond_src: LazySrcLoc = .{ .node_offset_if_cond = inst_data.src_node };
+                const extra = sema.code.extraData(Zir.Inst.CondBr, inst_data.payload_index);
+                const then_body = sema.code.extra[extra.end..][0..extra.data.then_body_len];
+                const else_body = sema.code.extra[extra.end + then_body.len ..][0..extra.data.else_body_len];
+                const cond = try sema.resolveInstConst(block, cond_src, extra.data.condition);
+                const inline_body = if (cond.val.toBool()) then_body else else_body;
+                const break_inst = try sema.analyzeBody(block, inline_body);
+                const break_data = datas[break_inst].@"break";
+                if (inst == break_data.block_inst) {
+                    break :blk sema.resolveInst(break_data.operand);
+                } else {
+                    return break_inst;
+                }
+            },
             .condbr_inline => blk: {
                 const inst_data = datas[inst].pl_node;
                 const cond_src: LazySrcLoc = .{ .node_offset_if_cond = inst_data.src_node };
@@ -1933,16 +1995,6 @@ fn zirCompileLog(
     return Air.Inst.Ref.void_value;
 }
 
-fn zirRepeat(sema: *Sema, block: *Scope.Block, inst: Zir.Inst.Index) CompileError!Zir.Inst.Index {
-    const tracy = trace(@src());
-    defer tracy.end();
-
-    const src_node = sema.code.instructions.items(.data)[inst].node;
-    const src: LazySrcLoc = .{ .node_offset = src_node };
-    try sema.requireRuntimeBlock(block, src);
-    return always_noreturn;
-}
-
 fn zirPanic(sema: *Sema, block: *Scope.Block, inst: Zir.Inst.Index) CompileError!Zir.Inst.Index {
     const inst_data = sema.code.instructions.items(.data)[inst].un_node;
     const src: LazySrcLoc = inst_data.src();
@@ -2003,7 +2055,6 @@ fn zirLoop(sema: *Sema, parent_block: *Scope.Block, inst: Zir.Inst.Index) Compil
 
     _ = try sema.analyzeBody(&loop_block, body);
 
-    // Loop repetition is implied so the last instruction may or may not be a noreturn instruction.
     try child_block.instructions.append(gpa, loop_inst);
 
     try sema.air_extra.ensureUnusedCapacity(gpa, @typeInfo(Air.Block).Struct.fields.len +
@@ -2615,6 +2666,7 @@ fn analyzeCall(
         // This one is shared among sub-blocks within the same callee, but not
         // shared among the entire inline/comptime call stack.
         var inlining: Scope.Block.Inlining = .{
+            .comptime_result = undefined,
             .merges = .{
                 .results = .{},
                 .br_list = .{},
@@ -2770,8 +2822,13 @@ fn analyzeCall(
                 }
             }
 
-            _ = try sema.analyzeBody(&child_block, fn_info.body);
-            const result = try sema.analyzeBlockBody(block, call_src, &child_block, merges);
+            const result = result: {
+                _ = sema.analyzeBody(&child_block, fn_info.body) catch |err| switch (err) {
+                    error.ComptimeReturn => break :result inlining.comptime_result,
+                    else => |e| return e,
+                };
+                break :result try sema.analyzeBlockBody(block, call_src, &child_block, merges);
+            };
 
             if (is_comptime_call) {
                 const result_val = try sema.resolveConstMaybeUndefVal(block, call_src, result);
@@ -6662,9 +6719,9 @@ fn zirRetErrValue(
     const src = inst_data.src();
 
     // Add the error tag to the inferred error set of the in-scope function.
-    if (sema.func) |func| {
-        if (func.getInferredErrorSet()) |map| {
-            _ = try map.getOrPut(sema.gpa, err_name);
+    if (sema.fn_ret_ty.zigTypeTag() == .ErrorUnion) {
+        if (sema.fn_ret_ty.errorUnionSet().castTag(.error_set_inferred)) |payload| {
+            _ = try payload.data.map.getOrPut(sema.gpa, err_name);
         }
     }
     // Return the error code from the function.
@@ -6699,6 +6756,10 @@ fn zirRetNode(sema: *Sema, block: *Scope.Block, inst: Zir.Inst.Index) CompileErr
     const operand = sema.resolveInst(inst_data.operand);
     const src = inst_data.src();
 
+    // TODO: we pass false here for the `need_coercion` boolean, but I'm pretty sure we need
+    // to remove this parameter entirely. Observe the problem by looking at the incorrect compile
+    // error that occurs when a behavior test case being executed at comptime fails, e.g.
+    // `test { comptime foo(); } fn foo() { try expect(false); }`
     return sema.analyzeRet(block, operand, src, false);
 }
 
@@ -6730,6 +6791,10 @@ fn analyzeRet(
         try sema.coerce(block, sema.fn_ret_ty, uncasted_operand, src);
 
     if (block.inlining) |inlining| {
+        if (block.is_comptime) {
+            inlining.comptime_result = operand;
+            return error.ComptimeReturn;
+        }
         // We are inlining a function call; rewrite the `ret` as a `break`.
         try inlining.merges.results.append(sema.gpa, operand);
         _ = try block.addBr(inlining.merges.block_inst, operand);
@@ -7425,10 +7490,149 @@ fn zirOffsetOf(sema: *Sema, block: *Scope.Block, inst: Zir.Inst.Index) CompileEr
     return sema.mod.fail(&block.base, src, "TODO: Sema.zirOffsetOf", .{});
 }
 
-fn zirCmpxchg(sema: *Sema, block: *Scope.Block, inst: Zir.Inst.Index) CompileError!Air.Inst.Ref {
+fn checkAtomicOperandType(
+    sema: *Sema,
+    block: *Scope.Block,
+    ty_src: LazySrcLoc,
+    ty: Type,
+) CompileError!void {
+    var buffer: Type.Payload.Bits = undefined;
+    const target = sema.mod.getTarget();
+    const max_atomic_bits = target_util.largestAtomicBits(target);
+    const int_ty = switch (ty.zigTypeTag()) {
+        .Int => ty,
+        .Enum => ty.enumTagType(&buffer),
+        .Float => {
+            const bit_count = ty.floatBits(target);
+            if (bit_count > max_atomic_bits) {
+                return sema.mod.fail(
+                    &block.base,
+                    ty_src,
+                    "expected {d}-bit float type or smaller; found {d}-bit float type",
+                    .{ max_atomic_bits, bit_count },
+                );
+            }
+            return;
+        },
+        .Bool => return, // Will be treated as `u8`.
+        else => return sema.mod.fail(
+            &block.base,
+            ty_src,
+            "expected bool, integer, float, enum, or pointer type; found {}",
+            .{ty},
+        ),
+    };
+    const bit_count = int_ty.intInfo(target).bits;
+    if (bit_count > max_atomic_bits) {
+        return sema.mod.fail(
+            &block.base,
+            ty_src,
+            "expected {d}-bit integer type or smaller; found {d}-bit integer type",
+            .{ max_atomic_bits, bit_count },
+        );
+    }
+}
+
+fn resolveAtomicOrder(
+    sema: *Sema,
+    block: *Scope.Block,
+    src: LazySrcLoc,
+    zir_ref: Zir.Inst.Ref,
+) CompileError!std.builtin.AtomicOrder {
+    const atomic_order_ty = try sema.getBuiltinType(block, src, "AtomicOrder");
+    const air_ref = sema.resolveInst(zir_ref);
+    const coerced = try sema.coerce(block, atomic_order_ty, air_ref, src);
+    const val = try sema.resolveConstValue(block, src, coerced);
+    return val.toEnum(std.builtin.AtomicOrder);
+}
+
+fn zirCmpxchg(
+    sema: *Sema,
+    block: *Scope.Block,
+    inst: Zir.Inst.Index,
+    air_tag: Air.Inst.Tag,
+) CompileError!Air.Inst.Ref {
+    const mod = sema.mod;
     const inst_data = sema.code.instructions.items(.data)[inst].pl_node;
+    const extra = sema.code.extraData(Zir.Inst.Cmpxchg, inst_data.payload_index).data;
     const src = inst_data.src();
-    return sema.mod.fail(&block.base, src, "TODO: Sema.zirCmpxchg", .{});
+    // zig fmt: off
+    const elem_ty_src      : LazySrcLoc = .{ .node_offset_builtin_call_arg0 = inst_data.src_node };
+    const ptr_src          : LazySrcLoc = .{ .node_offset_builtin_call_arg1 = inst_data.src_node };
+    const expected_src     : LazySrcLoc = .{ .node_offset_builtin_call_arg2 = inst_data.src_node };
+    const new_value_src    : LazySrcLoc = .{ .node_offset_builtin_call_arg3 = inst_data.src_node };
+    const success_order_src: LazySrcLoc = .{ .node_offset_builtin_call_arg4 = inst_data.src_node };
+    const failure_order_src: LazySrcLoc = .{ .node_offset_builtin_call_arg5 = inst_data.src_node };
+    // zig fmt: on
+    const ptr = sema.resolveInst(extra.ptr);
+    const elem_ty = sema.typeOf(ptr).elemType();
+    try sema.checkAtomicOperandType(block, elem_ty_src, elem_ty);
+    if (elem_ty.zigTypeTag() == .Float) {
+        return mod.fail(
+            &block.base,
+            elem_ty_src,
+            "expected bool, integer, enum, or pointer type; found '{}'",
+            .{elem_ty},
+        );
+    }
+    const expected_value = try sema.coerce(block, elem_ty, sema.resolveInst(extra.expected_value), expected_src);
+    const new_value = try sema.coerce(block, elem_ty, sema.resolveInst(extra.new_value), new_value_src);
+    const success_order = try sema.resolveAtomicOrder(block, success_order_src, extra.success_order);
+    const failure_order = try sema.resolveAtomicOrder(block, failure_order_src, extra.failure_order);
+
+    if (@enumToInt(success_order) < @enumToInt(std.builtin.AtomicOrder.Monotonic)) {
+        return mod.fail(&block.base, success_order_src, "success atomic ordering must be Monotonic or stricter", .{});
+    }
+    if (@enumToInt(failure_order) < @enumToInt(std.builtin.AtomicOrder.Monotonic)) {
+        return mod.fail(&block.base, failure_order_src, "failure atomic ordering must be Monotonic or stricter", .{});
+    }
+    if (@enumToInt(failure_order) > @enumToInt(success_order)) {
+        return mod.fail(&block.base, failure_order_src, "failure atomic ordering must be no stricter than success", .{});
+    }
+    if (failure_order == .Release or failure_order == .AcqRel) {
+        return mod.fail(&block.base, failure_order_src, "failure atomic ordering must not be Release or AcqRel", .{});
+    }
+
+    const result_ty = try Module.optionalType(sema.arena, elem_ty);
+
+    // special case zero bit types
+    if ((try sema.typeHasOnePossibleValue(block, elem_ty_src, elem_ty)) != null) {
+        return sema.addConstant(result_ty, Value.initTag(.null_value));
+    }
+
+    const runtime_src = if (try sema.resolveDefinedValue(block, ptr_src, ptr)) |ptr_val| rs: {
+        if (try sema.resolveMaybeUndefVal(block, expected_src, expected_value)) |expected_val| {
+            if (try sema.resolveMaybeUndefVal(block, new_value_src, new_value)) |new_val| {
+                if (expected_val.isUndef() or new_val.isUndef()) {
+                    return sema.addConstUndef(result_ty);
+                }
+                const stored_val = (try ptr_val.pointerDeref(sema.arena)) orelse break :rs ptr_src;
+                const result_val = if (stored_val.eql(expected_val, elem_ty)) blk: {
+                    try sema.storePtr(block, src, ptr, new_value);
+                    break :blk Value.initTag(.null_value);
+                } else try Value.Tag.opt_payload.create(sema.arena, stored_val);
+
+                return sema.addConstant(result_ty, result_val);
+            } else break :rs new_value_src;
+        } else break :rs expected_src;
+    } else ptr_src;
+
+    const flags: u32 = @as(u32, @enumToInt(success_order)) |
+        (@as(u32, @enumToInt(failure_order)) << 3);
+
+    try sema.requireRuntimeBlock(block, runtime_src);
+    return block.addInst(.{
+        .tag = air_tag,
+        .data = .{ .ty_pl = .{
+            .ty = try sema.addType(result_ty),
+            .payload = try sema.addExtra(Air.Cmpxchg{
+                .ptr = ptr,
+                .expected_value = expected_value,
+                .new_value = new_value,
+                .flags = flags,
+            }),
+        } },
+    });
 }
 
 fn zirSplat(sema: *Sema, block: *Scope.Block, inst: Zir.Inst.Index) CompileError!Air.Inst.Ref {
@@ -9576,13 +9780,11 @@ fn resolvePeerTypes(
         const chosen_src = candidate_srcs.resolve(
             sema.gpa,
             block.src_decl,
-            instructions.len,
             chosen_i,
         );
         const candidate_src = candidate_srcs.resolve(
             sema.gpa,
             block.src_decl,
-            instructions.len,
             candidate_i + 1,
         );
 
diff --git a/src/Zir.zig b/src/Zir.zig
index dbc1442364..1f0e4e370b 100644
--- a/src/Zir.zig
+++ b/src/Zir.zig
@@ -2778,7 +2778,7 @@ pub const Inst = struct {
         expected_value: Ref,
         new_value: Ref,
         success_order: Ref,
-        fail_order: Ref,
+        failure_order: Ref,
     };
 
     pub const AtomicRmw = struct {
@@ -3054,8 +3054,6 @@ const Writer = struct {
             .array_init_ref,
             .array_init_anon_ref,
             .union_init_ptr,
-            .cmpxchg_strong,
-            .cmpxchg_weak,
             .shuffle,
             .select,
             .atomic_rmw,
@@ -3072,6 +3070,10 @@ const Writer = struct {
             .struct_init_ref,
             => try self.writeStructInit(stream, inst),
 
+            .cmpxchg_strong,
+            .cmpxchg_weak,
+            => try self.writeCmpxchg(stream, inst),
+
             .struct_init_anon,
             .struct_init_anon_ref,
             => try self.writeStructInitAnon(stream, inst),
@@ -3474,6 +3476,23 @@ const Writer = struct {
         try self.writeSrc(stream, inst_data.src());
     }
 
+    fn writeCmpxchg(self: *Writer, stream: anytype, inst: Inst.Index) !void {
+        const inst_data = self.code.instructions.items(.data)[inst].pl_node;
+        const extra = self.code.extraData(Inst.Cmpxchg, inst_data.payload_index).data;
+
+        try self.writeInstRef(stream, extra.ptr);
+        try stream.writeAll(", ");
+        try self.writeInstRef(stream, extra.expected_value);
+        try stream.writeAll(", ");
+        try self.writeInstRef(stream, extra.new_value);
+        try stream.writeAll(", ");
+        try self.writeInstRef(stream, extra.success_order);
+        try stream.writeAll(", ");
+        try self.writeInstRef(stream, extra.failure_order);
+        try stream.writeAll(") ");
+        try self.writeSrc(stream, inst_data.src());
+    }
+
     fn writeStructInitAnon(self: *Writer, stream: anytype, inst: Inst.Index) !void {
         const inst_data = self.code.instructions.items(.data)[inst].pl_node;
         const extra = self.code.extraData(Inst.StructInitAnon, inst_data.payload_index);
diff --git a/src/codegen.zig b/src/codegen.zig
index 1995d8baa7..511d4c2301 100644
--- a/src/codegen.zig
+++ b/src/codegen.zig
@@ -857,6 +857,8 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
                     .struct_field_ptr=> try self.airStructFieldPtr(inst),
                     .struct_field_val=> try self.airStructFieldVal(inst),
                     .array_to_slice  => try self.airArrayToSlice(inst),
+                    .cmpxchg_strong  => try self.airCmpxchg(inst),
+                    .cmpxchg_weak    => try self.airCmpxchg(inst),
 
                     .struct_field_ptr_index_0 => try self.airStructFieldPtrIndex(inst, 0),
                     .struct_field_ptr_index_1 => try self.airStructFieldPtrIndex(inst, 1),
@@ -4751,6 +4753,17 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
             return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
         }
 
+        fn airCmpxchg(self: *Self, inst: Air.Inst.Index) !void {
+            const ty_pl = self.air.instructions.items(.data)[inst].ty_pl;
+            const extra = self.air.extraData(Air.Block, ty_pl.payload);
+            const result: MCValue = switch (arch) {
+                else => return self.fail("TODO implement airCmpxchg for {}", .{
+                    self.target.cpu.arch,
+                }),
+            };
+            return self.finishAir(inst, result, .{ extra.ptr, extra.expected_value, extra.new_value });
+        }
+
         fn resolveInst(self: *Self, inst: Air.Inst.Ref) InnerError!MCValue {
             // First section of indexes correspond to a set number of constant values.
             const ref_int = @enumToInt(inst);
diff --git a/src/codegen/c.zig b/src/codegen/c.zig
index 84bc6ceeb0..ff49b18f7b 100644
--- a/src/codegen/c.zig
+++ b/src/codegen/c.zig
@@ -911,6 +911,8 @@ fn genBody(o: *Object, body: []const Air.Inst.Index) error{ AnalysisFail, OutOfM
             .wrap_optional    => try airWrapOptional(o, inst),
             .struct_field_ptr => try airStructFieldPtr(o, inst),
             .array_to_slice   => try airArrayToSlice(o, inst),
+            .cmpxchg_weak     => try airCmpxchg(o, inst, "weak"),
+            .cmpxchg_strong   => try airCmpxchg(o, inst, "strong"),
 
             .struct_field_ptr_index_0 => try airStructFieldPtrIndex(o, inst, 0),
             .struct_field_ptr_index_1 => try airStructFieldPtrIndex(o, inst, 1),
@@ -1878,6 +1880,43 @@ fn airArrayToSlice(o: *Object, inst: Air.Inst.Index) !CValue {
     return local;
 }
 
+fn airCmpxchg(o: *Object, inst: Air.Inst.Index, flavor: [*:0]const u8) !CValue {
+    const ty_pl = o.air.instructions.items(.data)[inst].ty_pl;
+    const extra = o.air.extraData(Air.Cmpxchg, ty_pl.payload).data;
+    const inst_ty = o.air.typeOfIndex(inst);
+    const ptr = try o.resolveInst(extra.ptr);
+    const expected_value = try o.resolveInst(extra.expected_value);
+    const new_value = try o.resolveInst(extra.new_value);
+    const local = try o.allocLocal(inst_ty, .Const);
+    const writer = o.writer();
+
+    try writer.print(" = zig_cmpxchg_{s}(", .{flavor});
+    try o.writeCValue(writer, ptr);
+    try writer.writeAll(", ");
+    try o.writeCValue(writer, expected_value);
+    try writer.writeAll(", ");
+    try o.writeCValue(writer, new_value);
+    try writer.writeAll(", ");
+    try writeMemoryOrder(writer, extra.successOrder());
+    try writer.writeAll(", ");
+    try writeMemoryOrder(writer, extra.failureOrder());
+    try writer.writeAll(");\n");
+
+    return local;
+}
+
+fn writeMemoryOrder(w: anytype, order: std.builtin.AtomicOrder) !void {
+    const str = switch (order) {
+        .Unordered => "memory_order_relaxed",
+        .Monotonic => "memory_order_consume",
+        .Acquire => "memory_order_acquire",
+        .Release => "memory_order_release",
+        .AcqRel => "memory_order_acq_rel",
+        .SeqCst => "memory_order_seq_cst",
+    };
+    return w.writeAll(str);
+}
+
 fn IndentWriter(comptime UnderlyingWriter: type) type {
     return struct {
         const Self = @This();
diff --git a/src/codegen/llvm.zig b/src/codegen/llvm.zig
index d39262d62e..b28c371466 100644
--- a/src/codegen/llvm.zig
+++ b/src/codegen/llvm.zig
@@ -389,6 +389,7 @@ pub const Object = struct {
             .latest_alloca_inst = null,
             .llvm_func = llvm_func,
             .blocks = .{},
+            .single_threaded = module.comp.bin_file.options.single_threaded,
         };
         defer fg.deinit();
 
@@ -906,6 +907,31 @@ pub const DeclGen = struct {
         // TODO: improve this API, `addAttr(-1, attr_name)`
         self.addAttr(val, std.math.maxInt(llvm.AttributeIndex), attr_name);
     }
+
+    /// If the operand type of an atomic operation is not byte sized we need to
+    /// widen it before using it and then truncate the result.
+    /// RMW exchange of floating-point values is bitcasted to same-sized integer
+    /// types to work around a LLVM deficiency when targeting ARM/AArch64.
+    fn getAtomicAbiType(dg: *DeclGen, ty: Type, is_rmw_xchg: bool) ?*const llvm.Type {
+        const target = dg.module.getTarget();
+        var buffer: Type.Payload.Bits = undefined;
+        const int_ty = switch (ty.zigTypeTag()) {
+            .Int => ty,
+            .Enum => ty.enumTagType(&buffer),
+            .Float => {
+                if (!is_rmw_xchg) return null;
+                return dg.context.intType(@intCast(c_uint, ty.abiSize(target) * 8));
+            },
+            .Bool => return dg.context.intType(8),
+            else => return null,
+        };
+        const bit_count = int_ty.intInfo(target).bits;
+        if (!std.math.isPowerOfTwo(bit_count) or (bit_count % 8) != 0) {
+            return dg.context.intType(@intCast(c_uint, int_ty.abiSize(target) * 8));
+        } else {
+            return null;
+        }
+    }
 };
 
 pub const FuncGen = struct {
@@ -940,6 +966,8 @@ pub const FuncGen = struct {
         break_vals: *BreakValues,
     }),
 
+    single_threaded: bool,
+
     const BreakBasicBlocks = std.ArrayListUnmanaged(*const llvm.BasicBlock);
     const BreakValues = std.ArrayListUnmanaged(*const llvm.Value);
 
@@ -1029,6 +1057,8 @@ pub const FuncGen = struct {
                 .slice_ptr      => try self.airSliceField(inst, 0),
                 .slice_len      => try self.airSliceField(inst, 1),
                 .array_to_slice => try self.airArrayToSlice(inst),
+                .cmpxchg_weak   => try self.airCmpxchg(inst, true),
+                .cmpxchg_strong => try self.airCmpxchg(inst, false),
 
                 .struct_field_ptr => try self.airStructFieldPtr(inst),
                 .struct_field_val => try self.airStructFieldVal(inst),
@@ -1975,6 +2005,58 @@ pub const FuncGen = struct {
         return null;
     }
 
+    fn airCmpxchg(self: *FuncGen, inst: Air.Inst.Index, is_weak: bool) !?*const llvm.Value {
+        const ty_pl = self.air.instructions.items(.data)[inst].ty_pl;
+        const extra = self.air.extraData(Air.Cmpxchg, ty_pl.payload).data;
+        var ptr = try self.resolveInst(extra.ptr);
+        var expected_value = try self.resolveInst(extra.expected_value);
+        var new_value = try self.resolveInst(extra.new_value);
+        const operand_ty = self.air.typeOf(extra.ptr).elemType();
+        const opt_abi_ty = self.dg.getAtomicAbiType(operand_ty, false);
+        if (opt_abi_ty) |abi_ty| {
+            // operand needs widening and truncating
+            ptr = self.builder.buildBitCast(ptr, abi_ty.pointerType(0), "");
+            if (operand_ty.isSignedInt()) {
+                expected_value = self.builder.buildSExt(expected_value, abi_ty, "");
+                new_value = self.builder.buildSExt(new_value, abi_ty, "");
+            } else {
+                expected_value = self.builder.buildZExt(expected_value, abi_ty, "");
+                new_value = self.builder.buildZExt(new_value, abi_ty, "");
+            }
+        }
+        const success_order = toLlvmAtomicOrdering(extra.successOrder());
+        const failure_order = toLlvmAtomicOrdering(extra.failureOrder());
+        const result = self.builder.buildCmpXchg(
+            ptr,
+            expected_value,
+            new_value,
+            success_order,
+            failure_order,
+            is_weak,
+            self.single_threaded,
+        );
+
+        const optional_ty = self.air.typeOfIndex(inst);
+        var buffer: Type.Payload.ElemType = undefined;
+        const child_ty = optional_ty.optionalChild(&buffer);
+
+        var payload = self.builder.buildExtractValue(result, 0, "");
+        if (opt_abi_ty != null) {
+            payload = self.builder.buildTrunc(payload, try self.dg.llvmType(operand_ty), "");
+        }
+        const success_bit = self.builder.buildExtractValue(result, 1, "");
+
+        if (optional_ty.isPtrLikeOptional()) {
+            const child_llvm_ty = try self.dg.llvmType(child_ty);
+            return self.builder.buildSelect(success_bit, child_llvm_ty.constNull(), payload, "");
+        }
+
+        const optional_llvm_ty = try self.dg.llvmType(optional_ty);
+        const non_null_bit = self.builder.buildNot(success_bit, "");
+        const partial = self.builder.buildInsertValue(optional_llvm_ty.getUndef(), payload, 0, "");
+        return self.builder.buildInsertValue(partial, non_null_bit, 1, "");
+    }
+
     fn getIntrinsic(self: *FuncGen, name: []const u8) *const llvm.Value {
         const id = llvm.lookupIntrinsicID(name.ptr, name.len);
         assert(id != 0);
@@ -2125,3 +2207,14 @@ fn initializeLLVMTarget(arch: std.Target.Cpu.Arch) void {
         .spirv64 => {},
     }
 }
+
+fn toLlvmAtomicOrdering(atomic_order: std.builtin.AtomicOrder) llvm.AtomicOrdering {
+    return switch (atomic_order) {
+        .Unordered => .Unordered,
+        .Monotonic => .Monotonic,
+        .Acquire => .Acquire,
+        .Release => .Release,
+        .AcqRel => .AcquireRelease,
+        .SeqCst => .SequentiallyConsistent,
+    };
+}
diff --git a/src/codegen/llvm/bindings.zig b/src/codegen/llvm/bindings.zig
index e0283be236..b4bd91708d 100644
--- a/src/codegen/llvm/bindings.zig
+++ b/src/codegen/llvm/bindings.zig
@@ -298,6 +298,14 @@ pub const Builder = opaque {
         Name: [*:0]const u8,
     ) *const Value;
 
+    pub const buildSExt = LLVMBuildSExt;
+    extern fn LLVMBuildSExt(
+        *const Builder,
+        Val: *const Value,
+        DestTy: *const Type,
+        Name: [*:0]const u8,
+    ) *const Value;
+
     pub const buildCall = LLVMBuildCall;
     extern fn LLVMBuildCall(
         *const Builder,
@@ -493,6 +501,27 @@ pub const Builder = opaque {
         Index: c_uint,
         Name: [*:0]const u8,
     ) *const Value;
+
+    pub const buildCmpXchg = ZigLLVMBuildCmpXchg;
+    extern fn ZigLLVMBuildCmpXchg(
+        builder: *const Builder,
+        ptr: *const Value,
+        cmp: *const Value,
+        new_val: *const Value,
+        success_ordering: AtomicOrdering,
+        failure_ordering: AtomicOrdering,
+        is_weak: bool,
+        is_single_threaded: bool,
+    ) *const Value;
+
+    pub const buildSelect = LLVMBuildSelect;
+    extern fn LLVMBuildSelect(
+        *const Builder,
+        If: *const Value,
+        Then: *const Value,
+        Else: *const Value,
+        Name: [*:0]const u8,
+    ) *const Value;
 };
 
 pub const IntPredicate = enum(c_uint) {
@@ -854,3 +883,13 @@ pub const Linkage = enum(c_uint) {
     LinkerPrivate,
     LinkerPrivateWeak,
 };
+
+pub const AtomicOrdering = enum(c_uint) {
+    NotAtomic = 0,
+    Unordered = 1,
+    Monotonic = 2,
+    Acquire = 4,
+    Release = 5,
+    AcquireRelease = 6,
+    SequentiallyConsistent = 7,
+};
diff --git a/src/link/C/zig.h b/src/link/C/zig.h
index 232fb6bd0c..f3fb02b840 100644
--- a/src/link/C/zig.h
+++ b/src/link/C/zig.h
@@ -60,6 +60,18 @@
 #define zig_breakpoint() raise(SIGTRAP)
 #endif
 
+#if __STDC_VERSION__ >= 201112L && !defined(__STDC_NO_ATOMICS__)
+#include <stdatomic.h>
+#define zig_cmpxchg_strong(obj, expected, desired, succ, fail) atomic_compare_exchange_strong_explicit(obj, expected, desired, succ, fail)
+#define zig_cmpxchg_weak(obj, expected, desired, succ, fail) atomic_compare_exchange_weak_explicit(obj, expected, desired, succ, fail)
+#elif __GNUC__
+#define zig_cmpxchg_strong(obj, expected, desired, succ, fail) __sync_val_compare_and_swap(obj, expected, desired)
+#define zig_cmpxchg_weak(obj, expected, desired, succ, fail) __sync_val_compare_and_swap(obj, expected, desired)
+#else
+#define zig_cmpxchg_strong(obj, expected, desired, succ, fail) zig_unimplemented()
+#define zig_cmpxchg_weak(obj, expected, desired, succ, fail) zig_unimplemented()
+#endif
+
 #include <stdint.h>
 #include <stddef.h>
 #include <limits.h>
diff --git a/src/print_air.zig b/src/print_air.zig
index 4d9ce1bb36..11cf1b7baa 100644
--- a/src/print_air.zig
+++ b/src/print_air.zig
@@ -191,6 +191,7 @@ const Writer = struct {
             .br => try w.writeBr(s, inst),
             .cond_br => try w.writeCondBr(s, inst),
             .switch_br => try w.writeSwitchBr(s, inst),
+            .cmpxchg_weak, .cmpxchg_strong => try w.writeCmpxchg(s, inst),
         }
     }
 
@@ -258,7 +259,21 @@ const Writer = struct {
 
         try w.writeOperand(s, inst, 0, extra.lhs);
         try s.writeAll(", ");
-        try w.writeOperand(s, inst, 0, extra.rhs);
+        try w.writeOperand(s, inst, 1, extra.rhs);
+    }
+
+    fn writeCmpxchg(w: *Writer, s: anytype, inst: Air.Inst.Index) @TypeOf(s).Error!void {
+        const ty_pl = w.air.instructions.items(.data)[inst].ty_pl;
+        const extra = w.air.extraData(Air.Cmpxchg, ty_pl.payload).data;
+
+        try w.writeOperand(s, inst, 0, extra.ptr);
+        try s.writeAll(", ");
+        try w.writeOperand(s, inst, 1, extra.expected_value);
+        try s.writeAll(", ");
+        try w.writeOperand(s, inst, 2, extra.new_value);
+        try s.print(", {s}, {s}", .{
+            @tagName(extra.successOrder()), @tagName(extra.failureOrder()),
+        });
     }
 
     fn writeConstant(w: *Writer, s: anytype, inst: Air.Inst.Index) @TypeOf(s).Error!void {
diff --git a/src/stage1/codegen.cpp b/src/stage1/codegen.cpp
index 614ed8e26c..3b22101df9 100644
--- a/src/stage1/codegen.cpp
+++ b/src/stage1/codegen.cpp
@@ -5723,7 +5723,7 @@ static LLVMValueRef ir_render_cmpxchg(CodeGen *g, Stage1Air *executable, Stage1A
     LLVMAtomicOrdering failure_order = to_LLVMAtomicOrdering(instruction->failure_order);
 
     LLVMValueRef result_val = ZigLLVMBuildCmpXchg(g->builder, ptr_val, cmp_val, new_val,
-            success_order, failure_order, instruction->is_weak);
+            success_order, failure_order, instruction->is_weak, g->is_single_threaded);
 
     ZigType *optional_type = instruction->base.value->type;
     assert(optional_type->id == ZigTypeIdOptional);
diff --git a/src/target.zig b/src/target.zig
index 25a133f03b..c9d7e1742b 100644
--- a/src/target.zig
+++ b/src/target.zig
@@ -475,3 +475,72 @@ pub fn clangAssemblerSupportsMcpuArg(target: std.Target) bool {
 pub fn needUnwindTables(target: std.Target) bool {
     return target.os.tag == .windows;
 }
+
+/// TODO this was ported from stage1 but it does not take into account CPU features,
+/// which can affect this value. Audit this!
+pub fn largestAtomicBits(target: std.Target) u32 {
+    return switch (target.cpu.arch) {
+        .avr,
+        .msp430,
+        .spu_2,
+        => 16,
+
+        .arc,
+        .arm,
+        .armeb,
+        .hexagon,
+        .le32,
+        .mips,
+        .mipsel,
+        .nvptx,
+        .powerpc,
+        .powerpcle,
+        .r600,
+        .riscv32,
+        .sparc,
+        .sparcel,
+        .tce,
+        .tcele,
+        .thumb,
+        .thumbeb,
+        .i386,
+        .xcore,
+        .amdil,
+        .hsail,
+        .spir,
+        .kalimba,
+        .lanai,
+        .shave,
+        .wasm32,
+        .renderscript32,
+        .csky,
+        .spirv32,
+        => 32,
+
+        .aarch64,
+        .aarch64_be,
+        .aarch64_32,
+        .amdgcn,
+        .bpfel,
+        .bpfeb,
+        .le64,
+        .mips64,
+        .mips64el,
+        .nvptx64,
+        .powerpc64,
+        .powerpc64le,
+        .riscv64,
+        .sparcv9,
+        .s390x,
+        .amdil64,
+        .hsail64,
+        .spir64,
+        .wasm64,
+        .renderscript64,
+        .ve,
+        .spirv64,
+        => 64,
+
+        .x86_64 => 128,
+    };
+}
diff --git a/src/type.zig b/src/type.zig
index 4d6ff3be0f..2403893133 100644
--- a/src/type.zig
+++ b/src/type.zig
@@ -2886,6 +2886,35 @@ pub const Type = extern union {
         }
     }
 
+    /// Returns the integer tag type of the enum.
+    pub fn enumTagType(ty: Type, buffer: *Payload.Bits) Type {
+        switch (ty.tag()) {
+            .enum_full, .enum_nonexhaustive => {
+                const enum_full = ty.cast(Payload.EnumFull).?.data;
+                return enum_full.tag_ty;
+            },
+            .enum_simple => {
+                const enum_simple = ty.castTag(.enum_simple).?.data;
+                buffer.* = .{
+                    .base = .{ .tag = .int_unsigned },
+                    .data = std.math.log2_int_ceil(usize, enum_simple.fields.count()),
+                };
+                return Type.initPayload(&buffer.base);
+            },
+            .atomic_order,
+            .atomic_rmw_op,
+            .calling_convention,
+            .float_mode,
+            .reduce_op,
+            .call_options,
+            .export_options,
+            .extern_options,
+            => @panic("TODO resolve std.builtin types"),
+
+            else => unreachable,
+        }
+    }
+
     pub fn isNonexhaustiveEnum(ty: Type) bool {
         return switch (ty.tag()) {
             .enum_nonexhaustive => true,
diff --git a/src/zig_llvm.cpp b/src/zig_llvm.cpp
index 2089092c7c..e1ab74f423 100644
--- a/src/zig_llvm.cpp
+++ b/src/zig_llvm.cpp
@@ -1087,10 +1087,12 @@ static AtomicOrdering mapFromLLVMOrdering(LLVMAtomicOrdering Ordering) {
 
 LLVMValueRef ZigLLVMBuildCmpXchg(LLVMBuilderRef builder, LLVMValueRef ptr, LLVMValueRef cmp,
         LLVMValueRef new_val, LLVMAtomicOrdering success_ordering,
-        LLVMAtomicOrdering failure_ordering, bool is_weak)
+        LLVMAtomicOrdering failure_ordering, bool is_weak, bool is_single_threaded)
 {
-    AtomicCmpXchgInst *inst = unwrap(builder)->CreateAtomicCmpXchg(unwrap(ptr), unwrap(cmp),
-                unwrap(new_val), mapFromLLVMOrdering(success_ordering), mapFromLLVMOrdering(failure_ordering));
+    AtomicCmpXchgInst *inst = unwrap(builder)->CreateAtomicCmpXchg(unwrap(ptr),
+        unwrap(cmp), unwrap(new_val),
+        mapFromLLVMOrdering(success_ordering), mapFromLLVMOrdering(failure_ordering),
+        is_single_threaded ? SyncScope::SingleThread : SyncScope::System);
     inst->setWeak(is_weak);
     return wrap(inst);
 }
@@ -1308,19 +1310,6 @@ static AtomicRMWInst::BinOp toLLVMRMWBinOp(enum ZigLLVM_AtomicRMWBinOp BinOp) {
     }
 }
 
-static AtomicOrdering toLLVMOrdering(LLVMAtomicOrdering Ordering) {
-    switch (Ordering) {
-        default:
-        case LLVMAtomicOrderingNotAtomic: return AtomicOrdering::NotAtomic;
-        case LLVMAtomicOrderingUnordered: return AtomicOrdering::Unordered;
-        case LLVMAtomicOrderingMonotonic: return AtomicOrdering::Monotonic;
-        case LLVMAtomicOrderingAcquire: return AtomicOrdering::Acquire;
-        case LLVMAtomicOrderingRelease: return AtomicOrdering::Release;
-        case LLVMAtomicOrderingAcquireRelease: return AtomicOrdering::AcquireRelease;
-        case LLVMAtomicOrderingSequentiallyConsistent: return AtomicOrdering::SequentiallyConsistent;
-    }
-}
-
 inline LLVMAttributeRef wrap(Attribute Attr) {
     return reinterpret_cast<LLVMAttributeRef>(Attr.getRawPointer());
 }
@@ -1335,7 +1324,7 @@ LLVMValueRef ZigLLVMBuildAtomicRMW(LLVMBuilderRef B, enum ZigLLVM_AtomicRMWBinOp
 {
     AtomicRMWInst::BinOp intop = toLLVMRMWBinOp(op);
     return wrap(unwrap(B)->CreateAtomicRMW(intop, unwrap(PTR),
-        unwrap(Val), toLLVMOrdering(ordering), 
+        unwrap(Val), mapFromLLVMOrdering(ordering), 
         singleThread ? SyncScope::SingleThread : SyncScope::System));
 }
 
diff --git a/src/zig_llvm.h b/src/zig_llvm.h
index 91407b7f12..be279d86e1 100644
--- a/src/zig_llvm.h
+++ b/src/zig_llvm.h
@@ -148,7 +148,7 @@ ZIG_EXTERN_C LLVMValueRef ZigLLVMBuildSShlSat(LLVMBuilderRef builder, LLVMValueR
 
 ZIG_EXTERN_C LLVMValueRef ZigLLVMBuildCmpXchg(LLVMBuilderRef builder, LLVMValueRef ptr, LLVMValueRef cmp,
         LLVMValueRef new_val, LLVMAtomicOrdering success_ordering,
-        LLVMAtomicOrdering failure_ordering, bool is_weak);
+        LLVMAtomicOrdering failure_ordering, bool is_weak, bool is_single_threaded);
 
 ZIG_EXTERN_C LLVMValueRef ZigLLVMBuildNSWShl(LLVMBuilderRef builder, LLVMValueRef LHS, LLVMValueRef RHS,
         const char *name);
diff --git a/test/behavior/atomics.zig b/test/behavior/atomics.zig
index f6c94f7155..444ff56438 100644
--- a/test/behavior/atomics.zig
+++ b/test/behavior/atomics.zig
@@ -2,3 +2,25 @@ const std = @import("std");
 const expect = std.testing.expect;
 const expectEqual = std.testing.expectEqual;
 const builtin = @import("builtin");
+
+test "cmpxchg" {
+    try testCmpxchg();
+    comptime try testCmpxchg();
+}
+
+fn testCmpxchg() !void {
+    var x: i32 = 1234;
+    if (@cmpxchgWeak(i32, &x, 99, 5678, .SeqCst, .SeqCst)) |x1| {
+        try expect(x1 == 1234);
+    } else {
+        @panic("cmpxchg should have failed");
+    }
+
+    while (@cmpxchgWeak(i32, &x, 1234, 5678, .SeqCst, .SeqCst)) |x1| {
+        try expect(x1 == 1234);
+    }
+    try expect(x == 5678);
+
+    try expect(@cmpxchgStrong(i32, &x, 5678, 42, .SeqCst, .SeqCst) == null);
+    try expect(x == 42);
+}
diff --git a/test/behavior/atomics_stage1.zig b/test/behavior/atomics_stage1.zig
index 18836931aa..6e754e30cd 100644
--- a/test/behavior/atomics_stage1.zig
+++ b/test/behavior/atomics_stage1.zig
@@ -3,28 +3,6 @@ const expect = std.testing.expect;
 const expectEqual = std.testing.expectEqual;
 const builtin = @import("builtin");
 
-test "cmpxchg" {
-    try testCmpxchg();
-    comptime try testCmpxchg();
-}
-
-fn testCmpxchg() !void {
-    var x: i32 = 1234;
-    if (@cmpxchgWeak(i32, &x, 99, 5678, .SeqCst, .SeqCst)) |x1| {
-        try expect(x1 == 1234);
-    } else {
-        @panic("cmpxchg should have failed");
-    }
-
-    while (@cmpxchgWeak(i32, &x, 1234, 5678, .SeqCst, .SeqCst)) |x1| {
-        try expect(x1 == 1234);
-    }
-    try expect(x == 5678);
-
-    try expect(@cmpxchgStrong(i32, &x, 5678, 42, .SeqCst, .SeqCst) == null);
-    try expect(x == 42);
-}
-
 test "fence" {
     var x: i32 = 1234;
     @fence(.SeqCst);