diff --git a/src/Compilation.zig b/src/Compilation.zig
index e51b3de1ad..64ec1ab0a8 100644
--- a/src/Compilation.zig
+++ b/src/Compilation.zig
@@ -43,7 +43,6 @@ const Air = @import("Air.zig");
 const Builtin = @import("Builtin.zig");
 const LlvmObject = @import("codegen/llvm.zig").Object;
 const dev = @import("dev.zig");
-const ThreadSafeQueue = @import("ThreadSafeQueue.zig").ThreadSafeQueue;
 
 pub const Config = @import("Compilation/Config.zig");
 
@@ -113,17 +112,7 @@ win32_resource_table: if (dev.env.supports(.win32_resource)) std.AutoArrayHashMa
 } = .{},
 
 link_diags: link.Diags,
-link_task_queue: ThreadSafeQueue(link.Task) = .empty,
-/// Ensure only 1 simultaneous call to `flushTaskQueue`.
-link_task_queue_safety: std.debug.SafetyLock = .{},
-/// If any tasks are queued up that depend on prelink being finished, they are moved
-/// here until prelink finishes.
-link_task_queue_postponed: std.ArrayListUnmanaged(link.Task) = .empty,
-/// Initialized with how many link input tasks are expected. After this reaches zero
-/// the linker will begin the prelink phase.
-/// Initialized in the Compilation main thread before the pipeline; modified only in
-/// the linker task thread.
-remaining_prelink_tasks: u32,
+link_task_queue: link.Queue = .empty,
 
 /// Set of work that can be represented by only flags to determine whether the
 /// work is queued or not.
@@ -846,15 +835,24 @@ pub const RcIncludes = enum {
 };
 
 const Job = union(enum) {
-    /// Corresponds to the task in `link.Task`.
-    /// Only needed for backends that haven't yet been updated to not race against Sema.
+    /// Given the generated AIR for a function, put it onto the code generation queue.
+    /// This `Job` exists (instead of the `link.ZcuTask` being directly queued) to ensure that
+    /// all types are resolved before the linker task is queued.
+    /// If the backend does not support `Zcu.Feature.separate_thread`, codegen and linking happen immediately.
+    codegen_func: struct {
+        func: InternPool.Index,
+        /// The AIR emitted from analyzing `func`; owned by this `Job` in `gpa`.
+        air: Air,
+    },
+    /// Queue a `link.ZcuTask` to emit this non-function `Nav` into the output binary.
+    /// This `Job` exists (instead of the `link.ZcuTask` being directly queued) to ensure that
+    /// all types are resolved before the linker task is queued.
+    /// If the backend does not support `Zcu.Feature.separate_thread`, the task is run immediately.
     link_nav: InternPool.Nav.Index,
-    /// Corresponds to the task in `link.Task`.
-    /// TODO: this is currently also responsible for performing codegen.
-    /// Only needed for backends that haven't yet been updated to not race against Sema.
-    link_func: link.Task.CodegenFunc,
-    /// Corresponds to the task in `link.Task`.
-    /// Only needed for backends that haven't yet been updated to not race against Sema.
+    /// Queue a `link.ZcuTask` to emit debug information for this container type.
+    /// This `Job` exists (instead of the `link.ZcuTask` being directly queued) to ensure that
+    /// all types are resolved before the linker task is queued.
+    /// If the backend does not support `Zcu.Feature.separate_thread`, the task is run immediately.
     link_type: InternPool.Index,
     update_line_number: InternPool.TrackedInst.Index,
     /// The `AnalUnit`, which is *not* a `func`, must be semantically analyzed.
@@ -880,13 +878,13 @@ const Job = union(enum) {
         return switch (tag) {
             // Prioritize functions so that codegen can get to work on them on a
             // separate thread, while Sema goes back to its own work.
-            .resolve_type_fully, .analyze_func, .link_func => 0,
+            .resolve_type_fully, .analyze_func, .codegen_func => 0,
             else => 1,
         };
     }
     comptime {
         // Job dependencies
-        assert(stage(.resolve_type_fully) <= stage(.link_func));
+        assert(stage(.resolve_type_fully) <= stage(.codegen_func));
     }
 };
 
@@ -2004,7 +2002,6 @@ pub fn create(gpa: Allocator, arena: Allocator, options: CreateOptions) !*Compil
             .file_system_inputs = options.file_system_inputs,
             .parent_whole_cache = options.parent_whole_cache,
             .link_diags = .init(gpa),
-            .remaining_prelink_tasks = 0,
         };
 
         // Prevent some footguns by making the "any" fields of config reflect
@@ -2213,7 +2210,7 @@ pub fn create(gpa: Allocator, arena: Allocator, options: CreateOptions) !*Compil
         };
         comp.c_object_table.putAssumeCapacityNoClobber(c_object, {});
     }
-    comp.remaining_prelink_tasks += @intCast(comp.c_object_table.count());
+    comp.link_task_queue.pending_prelink_tasks += @intCast(comp.c_object_table.count());
 
     // Add a `Win32Resource` for each `rc_source_files` and one for `manifest_file`.
     const win32_resource_count =
@@ -2224,7 +2221,7 @@ pub fn create(gpa: Allocator, arena: Allocator, options: CreateOptions) !*Compil
         // Add this after adding logic to updateWin32Resource to pass the
         // result into link.loadInput. loadInput integration is not implemented
         // for Windows linking logic yet.
-        //comp.remaining_prelink_tasks += @intCast(win32_resource_count);
+        //comp.link_task_queue.pending_prelink_tasks += @intCast(win32_resource_count);
         for (options.rc_source_files) |rc_source_file| {
             const win32_resource = try gpa.create(Win32Resource);
             errdefer gpa.destroy(win32_resource);
@@ -2275,78 +2272,76 @@ pub fn create(gpa: Allocator, arena: Allocator, options: CreateOptions) !*Compil
                     const paths = try lci.resolveCrtPaths(arena, basenames, target);
 
                     const fields = @typeInfo(@TypeOf(paths)).@"struct".fields;
-                    try comp.link_task_queue.shared.ensureUnusedCapacity(gpa, fields.len + 1);
+                    try comp.link_task_queue.queued_prelink.ensureUnusedCapacity(gpa, fields.len + 1);
                     inline for (fields) |field| {
                         if (@field(paths, field.name)) |path| {
-                            comp.link_task_queue.shared.appendAssumeCapacity(.{ .load_object = path });
-                            comp.remaining_prelink_tasks += 1;
+                            comp.link_task_queue.queued_prelink.appendAssumeCapacity(.{ .load_object = path });
                         }
                     }
                     // Loads the libraries provided by `target_util.libcFullLinkFlags(target)`.
-                    comp.link_task_queue.shared.appendAssumeCapacity(.load_host_libc);
-                    comp.remaining_prelink_tasks += 1;
+                    comp.link_task_queue.queued_prelink.appendAssumeCapacity(.load_host_libc);
                 } else if (target.isMuslLibC()) {
                     if (!std.zig.target.canBuildLibC(target)) return error.LibCUnavailable;
 
                     if (musl.needsCrt0(comp.config.output_mode, comp.config.link_mode, comp.config.pie)) |f| {
                         comp.queued_jobs.musl_crt_file[@intFromEnum(f)] = true;
-                        comp.remaining_prelink_tasks += 1;
+                        comp.link_task_queue.pending_prelink_tasks += 1;
                     }
                     switch (comp.config.link_mode) {
                         .static => comp.queued_jobs.musl_crt_file[@intFromEnum(musl.CrtFile.libc_a)] = true,
                         .dynamic => comp.queued_jobs.musl_crt_file[@intFromEnum(musl.CrtFile.libc_so)] = true,
                     }
-                    comp.remaining_prelink_tasks += 1;
+                    comp.link_task_queue.pending_prelink_tasks += 1;
                 } else if (target.isGnuLibC()) {
                     if (!std.zig.target.canBuildLibC(target)) return error.LibCUnavailable;
 
                     if (glibc.needsCrt0(comp.config.output_mode)) |f| {
                         comp.queued_jobs.glibc_crt_file[@intFromEnum(f)] = true;
-                        comp.remaining_prelink_tasks += 1;
+                        comp.link_task_queue.pending_prelink_tasks += 1;
                     }
                     comp.queued_jobs.glibc_shared_objects = true;
-                    comp.remaining_prelink_tasks += glibc.sharedObjectsCount(&target);
+                    comp.link_task_queue.pending_prelink_tasks += glibc.sharedObjectsCount(&target);
 
                     comp.queued_jobs.glibc_crt_file[@intFromEnum(glibc.CrtFile.libc_nonshared_a)] = true;
-                    comp.remaining_prelink_tasks += 1;
+                    comp.link_task_queue.pending_prelink_tasks += 1;
                 } else if (target.isFreeBSDLibC()) {
                     if (!std.zig.target.canBuildLibC(target)) return error.LibCUnavailable;
 
                     if (freebsd.needsCrt0(comp.config.output_mode)) |f| {
                         comp.queued_jobs.freebsd_crt_file[@intFromEnum(f)] = true;
-                        comp.remaining_prelink_tasks += 1;
+                        comp.link_task_queue.pending_prelink_tasks += 1;
                     }
 
                     comp.queued_jobs.freebsd_shared_objects = true;
-                    comp.remaining_prelink_tasks += freebsd.sharedObjectsCount();
+                    comp.link_task_queue.pending_prelink_tasks += freebsd.sharedObjectsCount();
                 } else if (target.isNetBSDLibC()) {
                     if (!std.zig.target.canBuildLibC(target)) return error.LibCUnavailable;
 
                     if (netbsd.needsCrt0(comp.config.output_mode)) |f| {
                         comp.queued_jobs.netbsd_crt_file[@intFromEnum(f)] = true;
-                        comp.remaining_prelink_tasks += 1;
+                        comp.link_task_queue.pending_prelink_tasks += 1;
                     }
 
                     comp.queued_jobs.netbsd_shared_objects = true;
-                    comp.remaining_prelink_tasks += netbsd.sharedObjectsCount();
+                    comp.link_task_queue.pending_prelink_tasks += netbsd.sharedObjectsCount();
                 } else if (target.isWasiLibC()) {
                     if (!std.zig.target.canBuildLibC(target)) return error.LibCUnavailable;
 
                     for (comp.wasi_emulated_libs) |crt_file| {
                         comp.queued_jobs.wasi_libc_crt_file[@intFromEnum(crt_file)] = true;
                     }
-                    comp.remaining_prelink_tasks += @intCast(comp.wasi_emulated_libs.len);
+                    comp.link_task_queue.pending_prelink_tasks += @intCast(comp.wasi_emulated_libs.len);
 
                     comp.queued_jobs.wasi_libc_crt_file[@intFromEnum(wasi_libc.execModelCrtFile(comp.config.wasi_exec_model))] = true;
                     comp.queued_jobs.wasi_libc_crt_file[@intFromEnum(wasi_libc.CrtFile.libc_a)] = true;
-                    comp.remaining_prelink_tasks += 2;
+                    comp.link_task_queue.pending_prelink_tasks += 2;
                 } else if (target.isMinGW()) {
                     if (!std.zig.target.canBuildLibC(target)) return error.LibCUnavailable;
 
                     const main_crt_file: mingw.CrtFile = if (is_dyn_lib) .dllcrt2_o else .crt2_o;
                     comp.queued_jobs.mingw_crt_file[@intFromEnum(main_crt_file)] = true;
                     comp.queued_jobs.mingw_crt_file[@intFromEnum(mingw.CrtFile.libmingw32_lib)] = true;
-                    comp.remaining_prelink_tasks += 2;
+                    comp.link_task_queue.pending_prelink_tasks += 2;
 
                     // When linking mingw-w64 there are some import libs we always need.
                     try comp.windows_libs.ensureUnusedCapacity(gpa, mingw.always_link_libs.len);
@@ -2360,7 +2355,7 @@ pub fn create(gpa: Allocator, arena: Allocator, options: CreateOptions) !*Compil
                     target.isMinGW())
                 {
                     comp.queued_jobs.zigc_lib = true;
-                    comp.remaining_prelink_tasks += 1;
+                    comp.link_task_queue.pending_prelink_tasks += 1;
                 }
             }
 
@@ -2377,53 +2372,53 @@ pub fn create(gpa: Allocator, arena: Allocator, options: CreateOptions) !*Compil
             }
             if (comp.wantBuildLibUnwindFromSource()) {
                 comp.queued_jobs.libunwind = true;
-                comp.remaining_prelink_tasks += 1;
+                comp.link_task_queue.pending_prelink_tasks += 1;
             }
             if (build_options.have_llvm and is_exe_or_dyn_lib and comp.config.link_libcpp) {
                 comp.queued_jobs.libcxx = true;
                 comp.queued_jobs.libcxxabi = true;
-                comp.remaining_prelink_tasks += 2;
+                comp.link_task_queue.pending_prelink_tasks += 2;
             }
             if (build_options.have_llvm and is_exe_or_dyn_lib and comp.config.any_sanitize_thread) {
                 comp.queued_jobs.libtsan = true;
-                comp.remaining_prelink_tasks += 1;
+                comp.link_task_queue.pending_prelink_tasks += 1;
             }
 
             if (can_build_compiler_rt) {
                 if (comp.compiler_rt_strat == .lib) {
                     log.debug("queuing a job to build compiler_rt_lib", .{});
                     comp.queued_jobs.compiler_rt_lib = true;
-                    comp.remaining_prelink_tasks += 1;
+                    comp.link_task_queue.pending_prelink_tasks += 1;
                 } else if (comp.compiler_rt_strat == .obj) {
                     log.debug("queuing a job to build compiler_rt_obj", .{});
                     // In this case we are making a static library, so we ask
                     // for a compiler-rt object to put in it.
                     comp.queued_jobs.compiler_rt_obj = true;
-                    comp.remaining_prelink_tasks += 1;
+                    comp.link_task_queue.pending_prelink_tasks += 1;
                 }
 
                 if (comp.ubsan_rt_strat == .lib) {
                     log.debug("queuing a job to build ubsan_rt_lib", .{});
                     comp.queued_jobs.ubsan_rt_lib = true;
-                    comp.remaining_prelink_tasks += 1;
+                    comp.link_task_queue.pending_prelink_tasks += 1;
                 } else if (comp.ubsan_rt_strat == .obj) {
                     log.debug("queuing a job to build ubsan_rt_obj", .{});
                     comp.queued_jobs.ubsan_rt_obj = true;
-                    comp.remaining_prelink_tasks += 1;
+                    comp.link_task_queue.pending_prelink_tasks += 1;
                 }
 
                 if (is_exe_or_dyn_lib and comp.config.any_fuzz) {
                     log.debug("queuing a job to build libfuzzer", .{});
                     comp.queued_jobs.fuzzer_lib = true;
-                    comp.remaining_prelink_tasks += 1;
+                    comp.link_task_queue.pending_prelink_tasks += 1;
                 }
             }
         }
 
-        try comp.link_task_queue.shared.append(gpa, .load_explicitly_provided);
-        comp.remaining_prelink_tasks += 1;
+        try comp.link_task_queue.queued_prelink.append(gpa, .load_explicitly_provided);
     }
-    log.debug("total prelink tasks: {d}", .{comp.remaining_prelink_tasks});
+    log.debug("queued prelink tasks: {d}", .{comp.link_task_queue.queued_prelink.items.len});
+    log.debug("pending prelink tasks: {d}", .{comp.link_task_queue.pending_prelink_tasks});
 
     return comp;
 }
@@ -2431,6 +2426,10 @@ pub fn create(gpa: Allocator, arena: Allocator, options: CreateOptions) !*Compil
 pub fn destroy(comp: *Compilation) void {
     const gpa = comp.gpa;
 
+    // This needs to be destroyed first, because it might contain MIR which we only know
+    // how to interpret (which kind of MIR it is) from `comp.bin_file`.
+    comp.link_task_queue.deinit(comp);
+
     if (comp.bin_file) |lf| lf.destroy();
     if (comp.zcu) |zcu| zcu.deinit();
     comp.cache_use.deinit();
@@ -2512,8 +2511,6 @@ pub fn destroy(comp: *Compilation) void {
     comp.failed_win32_resources.deinit(gpa);
 
     comp.link_diags.deinit();
-    comp.link_task_queue.deinit(gpa);
-    comp.link_task_queue_postponed.deinit(gpa);
 
     comp.clearMiscFailures();
 
@@ -4180,9 +4177,7 @@ fn performAllTheWorkInner(
     comp.link_task_wait_group.reset();
     defer comp.link_task_wait_group.wait();
 
-    if (comp.link_task_queue.start()) {
-        comp.thread_pool.spawnWgId(&comp.link_task_wait_group, link.flushTaskQueue, .{comp});
-    }
+    comp.link_task_queue.start(comp);
 
     if (comp.docs_emit != null) {
         dev.check(.docs_emit);
@@ -4498,7 +4493,7 @@ fn performAllTheWorkInner(
         comp.link_task_wait_group.wait();
         comp.link_task_wait_group.reset();
         std.log.scoped(.link).debug("finished waiting for link_task_wait_group", .{});
-        if (comp.remaining_prelink_tasks > 0) {
+        if (comp.link_task_queue.pending_prelink_tasks > 0) {
             // Indicates an error occurred preventing prelink phase from completing.
             return;
         }
@@ -4543,6 +4538,45 @@ pub fn queueJobs(comp: *Compilation, jobs: []const Job) !void {
 
 fn processOneJob(tid: usize, comp: *Compilation, job: Job) JobError!void {
     switch (job) {
+        .codegen_func => |func| {
+            const zcu = comp.zcu.?;
+            const gpa = zcu.gpa;
+            var air = func.air;
+            errdefer air.deinit(gpa);
+            if (!air.typesFullyResolved(zcu)) {
+                // Type resolution failed in a way which affects this function. This is a transitive
+                // failure, but it doesn't need recording, because this function semantically depends
+                // on the failed type, so when it is changed the function is updated.
+                air.deinit(gpa);
+                return;
+            }
+            const pt: Zcu.PerThread = .activate(comp.zcu.?, @enumFromInt(tid));
+            defer pt.deactivate();
+            const shared_mir = try gpa.create(link.ZcuTask.LinkFunc.SharedMir);
+            shared_mir.* = .{
+                .status = .init(.pending),
+                .value = undefined,
+            };
+            if (comp.separateCodegenThreadOk()) {
+                // `workerZcuCodegen` takes ownership of `air`.
+                comp.thread_pool.spawnWgId(&comp.link_task_wait_group, workerZcuCodegen, .{ comp, func.func, air, shared_mir });
+                comp.dispatchZcuLinkTask(tid, .{ .link_func = .{
+                    .func = func.func,
+                    .mir = shared_mir,
+                    .air = undefined,
+                } });
+            } else {
+                const emit_needs_air = !zcu.backendSupportsFeature(.separate_thread);
+                pt.runCodegen(func.func, &air, shared_mir);
+                assert(shared_mir.status.load(.monotonic) != .pending);
+                comp.dispatchZcuLinkTask(tid, .{ .link_func = .{
+                    .func = func.func,
+                    .mir = shared_mir,
+                    .air = if (emit_needs_air) &air else undefined,
+                } });
+                air.deinit(gpa);
+            }
+        },
         .link_nav => |nav_index| {
             const zcu = comp.zcu.?;
             const nav = zcu.intern_pool.getNav(nav_index);
@@ -4559,17 +4593,7 @@ fn processOneJob(tid: usize, comp: *Compilation, job: Job) JobError!void {
                 // on the failed type, so when it is changed the `Nav` will be updated.
                 return;
             }
-            comp.dispatchLinkTask(tid, .{ .link_nav = nav_index });
-        },
-        .link_func => |func| {
-            const zcu = comp.zcu.?;
-            if (!func.air.typesFullyResolved(zcu)) {
-                // Type resolution failed in a way which affects this function. This is a transitive
-                // failure, but it doesn't need recording, because this function semantically depends
-                // on the failed type, so when it is changed the function is updated.
-                return;
-            }
-            comp.dispatchLinkTask(tid, .{ .link_func = func });
+            comp.dispatchZcuLinkTask(tid, .{ .link_nav = nav_index });
         },
         .link_type => |ty| {
             const zcu = comp.zcu.?;
@@ -4580,10 +4604,10 @@ fn processOneJob(tid: usize, comp: *Compilation, job: Job) JobError!void {
                 // on the failed type, so when that is changed, this type will be updated.
                 return;
             }
-            comp.dispatchLinkTask(tid, .{ .link_type = ty });
+            comp.dispatchZcuLinkTask(tid, .{ .link_type = ty });
         },
         .update_line_number => |ti| {
-            comp.dispatchLinkTask(tid, .{ .update_line_number = ti });
+            comp.dispatchZcuLinkTask(tid, .{ .update_line_number = ti });
         },
         .analyze_func => |func| {
             const named_frame = tracy.namedFrame("analyze_func");
@@ -4675,18 +4699,7 @@ fn processOneJob(tid: usize, comp: *Compilation, job: Job) JobError!void {
     }
 }
 
-/// The reason for the double-queue here is that the first queue ensures any
-/// resolve_type_fully tasks are complete before this dispatch function is called.
-fn dispatchLinkTask(comp: *Compilation, tid: usize, link_task: link.Task) void {
-    if (comp.separateCodegenThreadOk()) {
-        comp.queueLinkTasks(&.{link_task});
-    } else {
-        assert(comp.remaining_prelink_tasks == 0);
-        link.doTask(comp, tid, link_task);
-    }
-}
-
-fn separateCodegenThreadOk(comp: *const Compilation) bool {
+pub fn separateCodegenThreadOk(comp: *const Compilation) bool {
     if (InternPool.single_threaded) return false;
     const zcu = comp.zcu orelse return true;
     return zcu.backendSupportsFeature(.separate_thread);
@@ -5273,6 +5286,21 @@ pub const RtOptions = struct {
     allow_lto: bool = true,
 };
 
+fn workerZcuCodegen(
+    tid: usize,
+    comp: *Compilation,
+    func_index: InternPool.Index,
+    orig_air: Air,
+    out: *link.ZcuTask.LinkFunc.SharedMir,
+) void {
+    var air = orig_air;
+    // We own `air` now, so we are responsbile for freeing it.
+    defer air.deinit(comp.gpa);
+    const pt: Zcu.PerThread = .activate(comp.zcu.?, @enumFromInt(tid));
+    defer pt.deactivate();
+    pt.runCodegen(func_index, &air, out);
+}
+
 fn buildRt(
     comp: *Compilation,
     root_source_name: []const u8,
@@ -5804,7 +5832,7 @@ fn updateCObject(comp: *Compilation, c_object: *CObject, c_obj_prog_node: std.Pr
         },
     };
 
-    comp.queueLinkTasks(&.{.{ .load_object = c_object.status.success.object_path }});
+    comp.queuePrelinkTasks(&.{.{ .load_object = c_object.status.success.object_path }});
 }
 
 fn updateWin32Resource(comp: *Compilation, win32_resource: *Win32Resource, win32_resource_prog_node: std.Progress.Node) !void {
@@ -7237,7 +7265,7 @@ fn buildOutputFromZig(
     assert(out.* == null);
     out.* = crt_file;
 
-    comp.queueLinkTaskMode(crt_file.full_object_path, &config);
+    comp.queuePrelinkTaskMode(crt_file.full_object_path, &config);
 }
 
 pub const CrtFileOptions = struct {
@@ -7361,7 +7389,7 @@ pub fn build_crt_file(
     try comp.updateSubCompilation(sub_compilation, misc_task_tag, prog_node);
 
     const crt_file = try sub_compilation.toCrtFile();
-    comp.queueLinkTaskMode(crt_file.full_object_path, &config);
+    comp.queuePrelinkTaskMode(crt_file.full_object_path, &config);
 
     {
         comp.mutex.lock();
@@ -7371,8 +7399,8 @@ pub fn build_crt_file(
     }
 }
 
-pub fn queueLinkTaskMode(comp: *Compilation, path: Cache.Path, config: *const Compilation.Config) void {
-    comp.queueLinkTasks(switch (config.output_mode) {
+pub fn queuePrelinkTaskMode(comp: *Compilation, path: Cache.Path, config: *const Compilation.Config) void {
+    comp.queuePrelinkTasks(switch (config.output_mode) {
         .Exe => unreachable,
         .Obj => &.{.{ .load_object = path }},
         .Lib => &.{switch (config.link_mode) {
@@ -7384,12 +7412,30 @@ pub fn queueLinkTaskMode(comp: *Compilation, path: Cache.Path, config: *const Co
 
 /// Only valid to call during `update`. Automatically handles queuing up a
 /// linker worker task if there is not already one.
-pub fn queueLinkTasks(comp: *Compilation, tasks: []const link.Task) void {
-    if (comp.link_task_queue.enqueue(comp.gpa, tasks) catch |err| switch (err) {
+pub fn queuePrelinkTasks(comp: *Compilation, tasks: []const link.PrelinkTask) void {
+    comp.link_task_queue.enqueuePrelink(comp, tasks) catch |err| switch (err) {
         error.OutOfMemory => return comp.setAllocFailure(),
-    }) {
-        comp.thread_pool.spawnWgId(&comp.link_task_wait_group, link.flushTaskQueue, .{comp});
+    };
+}
+
+/// The reason for the double-queue here is that the first queue ensures any
+/// resolve_type_fully tasks are complete before this dispatch function is called.
+fn dispatchZcuLinkTask(comp: *Compilation, tid: usize, task: link.ZcuTask) void {
+    if (!comp.separateCodegenThreadOk()) {
+        assert(tid == 0);
+        if (task == .link_func) {
+            assert(task.link_func.mir.status.load(.monotonic) != .pending);
+        }
+        link.doZcuTask(comp, tid, task);
+        task.deinit(comp.zcu.?);
+        return;
     }
+    comp.link_task_queue.enqueueZcu(comp, task) catch |err| switch (err) {
+        error.OutOfMemory => {
+            task.deinit(comp.zcu.?);
+            comp.setAllocFailure();
+        },
+    };
 }
 
 pub fn toCrtFile(comp: *Compilation) Allocator.Error!CrtFile {
diff --git a/src/ThreadSafeQueue.zig b/src/ThreadSafeQueue.zig
deleted file mode 100644
index 74bbdc418f..0000000000
--- a/src/ThreadSafeQueue.zig
+++ /dev/null
@@ -1,72 +0,0 @@
-const std = @import("std");
-const assert = std.debug.assert;
-const Allocator = std.mem.Allocator;
-
-pub fn ThreadSafeQueue(comptime T: type) type {
-    return struct {
-        worker_owned: std.ArrayListUnmanaged(T),
-        /// Protected by `mutex`.
-        shared: std.ArrayListUnmanaged(T),
-        mutex: std.Thread.Mutex,
-        state: State,
-
-        const Self = @This();
-
-        pub const State = enum { wait, run };
-
-        pub const empty: Self = .{
-            .worker_owned = .empty,
-            .shared = .empty,
-            .mutex = .{},
-            .state = .wait,
-        };
-
-        pub fn deinit(self: *Self, gpa: Allocator) void {
-            self.worker_owned.deinit(gpa);
-            self.shared.deinit(gpa);
-            self.* = undefined;
-        }
-
-        /// Must be called from the worker thread.
-        pub fn check(self: *Self) ?[]T {
-            assert(self.worker_owned.items.len == 0);
-            {
-                self.mutex.lock();
-                defer self.mutex.unlock();
-                assert(self.state == .run);
-                if (self.shared.items.len == 0) {
-                    self.state = .wait;
-                    return null;
-                }
-                std.mem.swap(std.ArrayListUnmanaged(T), &self.worker_owned, &self.shared);
-            }
-            const result = self.worker_owned.items;
-            self.worker_owned.clearRetainingCapacity();
-            return result;
-        }
-
-        /// Adds items to the queue, returning true if and only if the worker
-        /// thread is waiting. Thread-safe.
-        /// Not safe to call from the worker thread.
-        pub fn enqueue(self: *Self, gpa: Allocator, items: []const T) error{OutOfMemory}!bool {
-            self.mutex.lock();
-            defer self.mutex.unlock();
-            try self.shared.appendSlice(gpa, items);
-            return switch (self.state) {
-                .run => false,
-                .wait => {
-                    self.state = .run;
-                    return true;
-                },
-            };
-        }
-
-        /// Safe only to call exactly once when initially starting the worker.
-        pub fn start(self: *Self) bool {
-            assert(self.state == .wait);
-            if (self.shared.items.len == 0) return false;
-            self.state = .run;
-            return true;
-        }
-    };
-}
diff --git a/src/Zcu.zig b/src/Zcu.zig
index 6a6a74e260..91d2c0ffff 100644
--- a/src/Zcu.zig
+++ b/src/Zcu.zig
@@ -171,6 +171,8 @@ transitive_failed_analysis: std.AutoArrayHashMapUnmanaged(AnalUnit, void) = .emp
 /// This `Nav` succeeded analysis, but failed codegen.
 /// This may be a simple "value" `Nav`, or it may be a function.
 /// The ErrorMsg memory is owned by the `AnalUnit`, using Module's general purpose allocator.
+/// While multiple threads are active (most of the time!), this is guarded by `zcu.comp.mutex`, as
+/// codegen and linking run on a separate thread.
 failed_codegen: std.AutoArrayHashMapUnmanaged(InternPool.Nav.Index, *ErrorMsg) = .empty,
 failed_types: std.AutoArrayHashMapUnmanaged(InternPool.Index, *ErrorMsg) = .empty,
 /// Keep track of `@compileLog`s per `AnalUnit`.
@@ -3817,7 +3819,36 @@ pub const Feature = enum {
     is_named_enum_value,
     error_set_has_value,
     field_reordering,
-    /// If the backend supports running from another thread.
+    /// In theory, backends are supposed to work like this:
+    ///
+    /// * The AIR emitted by `Sema` is converted into MIR by `codegen.generateFunction`. This pass
+    ///   is "pure", in that it does not depend on or modify any external mutable state.
+    ///
+    /// * That MIR is sent to the linker, which calls `codegen.emitFunction` to convert the MIR to
+    ///   finalized machine code. This process is permitted to query and modify linker state.
+    ///
+    /// * The linker stores the resulting machine code in the binary as needed.
+    ///
+    /// The first stage described above can run in parallel to the rest of the compiler, and even to
+    /// other code generation work; we can run as many codegen threads as we want in parallel because
+    /// of the fact that this pass is pure. Emit and link must be single-threaded, but are generally
+    /// very fast, so that isn't a problem.
+    ///
+    /// Unfortunately, some code generation implementations currently query and/or mutate linker state
+    /// or even (in the case of the LLVM backend) semantic analysis state. Such backends cannot be run
+    /// in parallel with each other, with linking, or (potentially) with semantic analysis.
+    ///
+    /// Additionally, some backends continue to need the AIR in the "emit" stage, despite this pass
+    /// operating on MIR. This complicates memory management under the threading model above.
+    ///
+    /// These are both **bugs** in backend implementations, left over from legacy code. However, they
+    /// are difficult to fix. So, this `Feature` currently guards correct threading of code generation:
+    ///
+    /// * With this feature enabled, the backend is threaded as described above. The "emit" stage does
+    ///   not have access to AIR (it will be `undefined`; see `codegen.emitFunction`).
+    ///
+    /// * With this feature disabled, semantic analysis, code generation, and linking all occur on the
+    ///   same thread, and the "emit" stage has access to AIR.
     separate_thread,
 };
 
@@ -4566,22 +4597,29 @@ pub fn codegenFail(
     comptime format: []const u8,
     args: anytype,
 ) CodegenFailError {
-    const gpa = zcu.gpa;
-    try zcu.failed_codegen.ensureUnusedCapacity(gpa, 1);
-    const msg = try Zcu.ErrorMsg.create(gpa, zcu.navSrcLoc(nav_index), format, args);
-    zcu.failed_codegen.putAssumeCapacityNoClobber(nav_index, msg);
-    return error.CodegenFail;
+    const msg = try Zcu.ErrorMsg.create(zcu.gpa, zcu.navSrcLoc(nav_index), format, args);
+    return zcu.codegenFailMsg(nav_index, msg);
 }
 
+/// Takes ownership of `msg`, even on OOM.
 pub fn codegenFailMsg(zcu: *Zcu, nav_index: InternPool.Nav.Index, msg: *ErrorMsg) CodegenFailError {
     const gpa = zcu.gpa;
     {
+        zcu.comp.mutex.lock();
+        defer zcu.comp.mutex.unlock();
         errdefer msg.deinit(gpa);
         try zcu.failed_codegen.putNoClobber(gpa, nav_index, msg);
     }
     return error.CodegenFail;
 }
 
+/// Asserts that `zcu.failed_codegen` contains the key `nav`, with the necessary lock held.
+pub fn assertCodegenFailed(zcu: *Zcu, nav: InternPool.Nav.Index) void {
+    zcu.comp.mutex.lock();
+    defer zcu.comp.mutex.unlock();
+    assert(zcu.failed_codegen.contains(nav));
+}
+
 pub fn codegenFailType(
     zcu: *Zcu,
     ty_index: InternPool.Index,
diff --git a/src/Zcu/PerThread.zig b/src/Zcu/PerThread.zig
index 137d93b82a..92f1adbf2a 100644
--- a/src/Zcu/PerThread.zig
+++ b/src/Zcu/PerThread.zig
@@ -27,6 +27,7 @@ const Type = @import("../Type.zig");
 const Value = @import("../Value.zig");
 const Zcu = @import("../Zcu.zig");
 const Compilation = @import("../Compilation.zig");
+const codegen = @import("../codegen.zig");
 const Zir = std.zig.Zir;
 const Zoir = std.zig.Zoir;
 const ZonGen = std.zig.ZonGen;
@@ -1716,7 +1717,7 @@ fn analyzeFuncBody(
     }
 
     // This job depends on any resolve_type_fully jobs queued up before it.
-    try comp.queueJob(.{ .link_func = .{
+    try comp.queueJob(.{ .codegen_func = .{
         .func = func_index,
         .air = air,
     } });
@@ -1724,79 +1725,6 @@ fn analyzeFuncBody(
     return .{ .ies_outdated = ies_outdated };
 }
 
-/// Takes ownership of `air`, even on error.
-/// If any types referenced by `air` are unresolved, marks the codegen as failed.
-pub fn linkerUpdateFunc(pt: Zcu.PerThread, func_index: InternPool.Index, air: *Air) Allocator.Error!void {
-    const zcu = pt.zcu;
-    const gpa = zcu.gpa;
-    const ip = &zcu.intern_pool;
-    const comp = zcu.comp;
-
-    const func = zcu.funcInfo(func_index);
-    const nav_index = func.owner_nav;
-    const nav = ip.getNav(nav_index);
-
-    const codegen_prog_node = zcu.codegen_prog_node.start(nav.fqn.toSlice(ip), 0);
-    defer codegen_prog_node.end();
-
-    legalize: {
-        try air.legalize(pt, @import("../codegen.zig").legalizeFeatures(pt, nav_index) orelse break :legalize);
-    }
-
-    var liveness = try Air.Liveness.analyze(zcu, air.*, ip);
-    defer liveness.deinit(gpa);
-
-    if (build_options.enable_debug_extensions and comp.verbose_air) {
-        std.debug.print("# Begin Function AIR: {}:\n", .{nav.fqn.fmt(ip)});
-        air.dump(pt, liveness);
-        std.debug.print("# End Function AIR: {}\n\n", .{nav.fqn.fmt(ip)});
-    }
-
-    if (std.debug.runtime_safety) {
-        var verify: Air.Liveness.Verify = .{
-            .gpa = gpa,
-            .zcu = zcu,
-            .air = air.*,
-            .liveness = liveness,
-            .intern_pool = ip,
-        };
-        defer verify.deinit();
-
-        verify.verify() catch |err| switch (err) {
-            error.OutOfMemory => return error.OutOfMemory,
-            else => {
-                try zcu.failed_codegen.putNoClobber(gpa, nav_index, try Zcu.ErrorMsg.create(
-                    gpa,
-                    zcu.navSrcLoc(nav_index),
-                    "invalid liveness: {s}",
-                    .{@errorName(err)},
-                ));
-                return;
-            },
-        };
-    }
-
-    if (zcu.llvm_object) |llvm_object| {
-        llvm_object.updateFunc(pt, func_index, air.*, liveness) catch |err| switch (err) {
-            error.OutOfMemory => return error.OutOfMemory,
-        };
-    } else if (comp.bin_file) |lf| {
-        lf.updateFunc(pt, func_index, air, liveness) catch |err| switch (err) {
-            error.OutOfMemory => return error.OutOfMemory,
-            error.CodegenFail => assert(zcu.failed_codegen.contains(nav_index)),
-            error.Overflow, error.RelocationNotByteAligned => {
-                try zcu.failed_codegen.putNoClobber(gpa, nav_index, try Zcu.ErrorMsg.create(
-                    gpa,
-                    zcu.navSrcLoc(nav_index),
-                    "unable to codegen: {s}",
-                    .{@errorName(err)},
-                ));
-                // Not a retryable failure.
-            },
-        };
-    }
-}
-
 pub fn semaMod(pt: Zcu.PerThread, mod: *Module) !void {
     dev.check(.sema);
     const file_index = pt.zcu.module_roots.get(mod).?.unwrap().?;
@@ -3449,7 +3377,7 @@ pub fn populateTestFunctions(
         }
 
         // The linker thread is not running, so we actually need to dispatch this task directly.
-        @import("../link.zig").doTask(zcu.comp, @intFromEnum(pt.tid), .{ .link_nav = nav_index });
+        @import("../link.zig").doZcuTask(zcu.comp, @intFromEnum(pt.tid), .{ .link_nav = nav_index });
     }
 }
 
@@ -4442,3 +4370,87 @@ pub fn addDependency(pt: Zcu.PerThread, unit: AnalUnit, dependee: InternPool.Dep
         try info.deps.append(gpa, dependee);
     }
 }
+
+/// Performs code generation, which comes after `Sema` but before `link` in the pipeline.
+/// This part of the pipeline is self-contained/"pure", so can be run in parallel with most
+/// other code. This function is currently run either on the main thread, or on a separate
+/// codegen thread, depending on whether the backend supports `Zcu.Feature.separate_thread`.
+pub fn runCodegen(pt: Zcu.PerThread, func_index: InternPool.Index, air: *Air, out: *@import("../link.zig").ZcuTask.LinkFunc.SharedMir) void {
+    if (runCodegenInner(pt, func_index, air)) |mir| {
+        out.value = mir;
+        out.status.store(.ready, .release);
+    } else |err| switch (err) {
+        error.OutOfMemory => {
+            pt.zcu.comp.setAllocFailure();
+            out.status.store(.failed, .monotonic);
+        },
+        error.CodegenFail => {
+            pt.zcu.assertCodegenFailed(pt.zcu.funcInfo(func_index).owner_nav);
+            out.status.store(.failed, .monotonic);
+        },
+        error.NoLinkFile => {
+            assert(pt.zcu.comp.bin_file == null);
+            out.status.store(.failed, .monotonic);
+        },
+    }
+    pt.zcu.comp.link_task_queue.mirReady(pt.zcu.comp, out);
+}
+fn runCodegenInner(pt: Zcu.PerThread, func_index: InternPool.Index, air: *Air) error{ OutOfMemory, CodegenFail, NoLinkFile }!codegen.AnyMir {
+    const zcu = pt.zcu;
+    const gpa = zcu.gpa;
+    const ip = &zcu.intern_pool;
+    const comp = zcu.comp;
+
+    const nav = zcu.funcInfo(func_index).owner_nav;
+    const fqn = ip.getNav(nav).fqn;
+
+    const codegen_prog_node = zcu.codegen_prog_node.start(fqn.toSlice(ip), 0);
+    defer codegen_prog_node.end();
+
+    if (codegen.legalizeFeatures(pt, nav)) |features| {
+        try air.legalize(pt, features);
+    }
+
+    var liveness: Air.Liveness = try .analyze(zcu, air.*, ip);
+    defer liveness.deinit(gpa);
+
+    // TODO: surely writing to stderr from n threads simultaneously will work flawlessly
+    if (build_options.enable_debug_extensions and comp.verbose_air) {
+        std.debug.print("# Begin Function AIR: {}:\n", .{fqn.fmt(ip)});
+        air.dump(pt, liveness);
+        std.debug.print("# End Function AIR: {}\n\n", .{fqn.fmt(ip)});
+    }
+
+    if (std.debug.runtime_safety) {
+        var verify: Air.Liveness.Verify = .{
+            .gpa = gpa,
+            .zcu = zcu,
+            .air = air.*,
+            .liveness = liveness,
+            .intern_pool = ip,
+        };
+        defer verify.deinit();
+
+        verify.verify() catch |err| switch (err) {
+            error.OutOfMemory => return error.OutOfMemory,
+            else => return zcu.codegenFail(nav, "invalid liveness: {s}", .{@errorName(err)}),
+        };
+    }
+
+    // The LLVM backend is special, because we only need to do codegen. There is no equivalent to the
+    // "emit" step because LLVM does not support incremental linking. Our linker (LLD or self-hosted)
+    // will just see the ZCU object file which LLVM ultimately emits.
+    if (zcu.llvm_object) |llvm_object| {
+        return llvm_object.updateFunc(pt, func_index, air, &liveness);
+    }
+
+    const lf = comp.bin_file orelse return error.NoLinkFile;
+    return codegen.generateFunction(lf, pt, zcu.navSrcLoc(nav), func_index, air, &liveness) catch |err| switch (err) {
+        error.OutOfMemory,
+        error.CodegenFail,
+        => |e| return e,
+        error.Overflow,
+        error.RelocationNotByteAligned,
+        => return zcu.codegenFail(nav, "unable to codegen: {s}", .{@errorName(err)}),
+    };
+}
diff --git a/src/codegen.zig b/src/codegen.zig
index a2de3e2d01..2c2524257c 100644
--- a/src/codegen.zig
+++ b/src/codegen.zig
@@ -85,16 +85,104 @@ pub fn legalizeFeatures(pt: Zcu.PerThread, nav_index: InternPool.Nav.Index) ?*co
     }
 }
 
+/// Every code generation backend has a different MIR representation. However, we want to pass
+/// MIR from codegen to the linker *regardless* of which backend is in use. So, we use this: a
+/// union of all MIR types. The active tag is known from the backend in use; see `AnyMir.tag`.
+pub const AnyMir = union {
+    aarch64: @import("arch/aarch64/Mir.zig"),
+    arm: @import("arch/arm/Mir.zig"),
+    powerpc: noreturn, //@import("arch/powerpc/Mir.zig"),
+    riscv64: @import("arch/riscv64/Mir.zig"),
+    sparc64: @import("arch/sparc64/Mir.zig"),
+    x86_64: @import("arch/x86_64/Mir.zig"),
+    wasm: @import("arch/wasm/Mir.zig"),
+    c: @import("codegen/c.zig").Mir,
+
+    pub inline fn tag(comptime backend: std.builtin.CompilerBackend) []const u8 {
+        return switch (backend) {
+            .stage2_aarch64 => "aarch64",
+            .stage2_arm => "arm",
+            .stage2_powerpc => "powerpc",
+            .stage2_riscv64 => "riscv64",
+            .stage2_sparc64 => "sparc64",
+            .stage2_x86_64 => "x86_64",
+            .stage2_wasm => "wasm",
+            .stage2_c => "c",
+            else => unreachable,
+        };
+    }
+
+    pub fn deinit(mir: *AnyMir, zcu: *const Zcu) void {
+        const gpa = zcu.gpa;
+        const backend = target_util.zigBackend(zcu.root_mod.resolved_target.result, zcu.comp.config.use_llvm);
+        switch (backend) {
+            else => unreachable,
+            inline .stage2_aarch64,
+            .stage2_arm,
+            .stage2_powerpc,
+            .stage2_riscv64,
+            .stage2_sparc64,
+            .stage2_x86_64,
+            .stage2_c,
+            => |backend_ct| @field(mir, tag(backend_ct)).deinit(gpa),
+        }
+    }
+};
+
+/// Runs code generation for a function. This process converts the `Air` emitted by `Sema`,
+/// alongside annotated `Liveness` data, to machine code in the form of MIR (see `AnyMir`).
+///
+/// This is supposed to be a "pure" process, but some backends are currently buggy; see
+/// `Zcu.Feature.separate_thread` for details.
 pub fn generateFunction(
     lf: *link.File,
     pt: Zcu.PerThread,
     src_loc: Zcu.LazySrcLoc,
     func_index: InternPool.Index,
-    air: Air,
-    liveness: Air.Liveness,
+    air: *const Air,
+    liveness: *const Air.Liveness,
+) CodeGenError!AnyMir {
+    const zcu = pt.zcu;
+    const func = zcu.funcInfo(func_index);
+    const target = zcu.navFileScope(func.owner_nav).mod.?.resolved_target.result;
+    switch (target_util.zigBackend(target, false)) {
+        else => unreachable,
+        inline .stage2_aarch64,
+        .stage2_arm,
+        .stage2_powerpc,
+        .stage2_riscv64,
+        .stage2_sparc64,
+        .stage2_x86_64,
+        .stage2_c,
+        => |backend| {
+            dev.check(devFeatureForBackend(backend));
+            const CodeGen = importBackend(backend);
+            const mir = try CodeGen.generate(lf, pt, src_loc, func_index, air, liveness);
+            return @unionInit(AnyMir, AnyMir.tag(backend), mir);
+        },
+    }
+}
+
+/// Converts the MIR returned by `generateFunction` to finalized machine code to be placed in
+/// the output binary. This is called from linker implementations, and may query linker state.
+///
+/// This function is not called for the C backend, as `link.C` directly understands its MIR.
+///
+/// The `air` parameter is not supposed to exist, but some backends are currently buggy; see
+/// `Zcu.Feature.separate_thread` for details.
+pub fn emitFunction(
+    lf: *link.File,
+    pt: Zcu.PerThread,
+    src_loc: Zcu.LazySrcLoc,
+    func_index: InternPool.Index,
+    any_mir: *const AnyMir,
     code: *std.ArrayListUnmanaged(u8),
     debug_output: link.File.DebugInfoOutput,
-) CodeGenError!void {
+    /// TODO: this parameter needs to be removed. We should not still hold AIR this late
+    /// in the pipeline. Any information needed to call emit must be stored in MIR.
+    /// This is `undefined` if the backend supports the `separate_thread` feature.
+    air: *const Air,
+) Allocator.Error!void {
     const zcu = pt.zcu;
     const func = zcu.funcInfo(func_index);
     const target = zcu.navFileScope(func.owner_nav).mod.?.resolved_target.result;
@@ -108,7 +196,8 @@ pub fn generateFunction(
         .stage2_x86_64,
         => |backend| {
             dev.check(devFeatureForBackend(backend));
-            return importBackend(backend).generate(lf, pt, src_loc, func_index, air, liveness, code, debug_output);
+            const mir = &@field(any_mir, AnyMir.tag(backend));
+            return mir.emit(lf, pt, src_loc, func_index, code, debug_output, air);
         },
     }
 }
diff --git a/src/codegen/c.zig b/src/codegen/c.zig
index 3b8ab52982..f4952d4a58 100644
--- a/src/codegen/c.zig
+++ b/src/codegen/c.zig
@@ -3,6 +3,7 @@ const builtin = @import("builtin");
 const assert = std.debug.assert;
 const mem = std.mem;
 const log = std.log.scoped(.c);
+const Allocator = mem.Allocator;
 
 const dev = @import("../dev.zig");
 const link = @import("../link.zig");
@@ -30,6 +31,35 @@ pub fn legalizeFeatures(_: *const std.Target) ?*const Air.Legalize.Features {
     }) else null; // we don't currently ask zig1 to use safe optimization modes
 }
 
+/// For most backends, MIR is basically a sequence of machine code instructions, perhaps with some
+/// "pseudo instructions" thrown in. For the C backend, it is instead the generated C code for a
+/// single function. We also need to track some information to get merged into the global `link.C`
+/// state, including:
+/// * The UAVs used, so declarations can be emitted in `flush`
+/// * The types used, so declarations can be emitted in `flush`
+/// * The lazy functions used, so definitions can be emitted in `flush`
+pub const Mir = struct {
+    /// This map contains all the UAVs we saw generating this function.
+    /// `link.C` will merge them into its `uavs`/`aligned_uavs` fields.
+    /// Key is the value of the UAV; value is the UAV's alignment, or
+    /// `.none` for natural alignment. The specified alignment is never
+    /// less than the natural alignment.
+    uavs: std.AutoArrayHashMapUnmanaged(InternPool.Index, Alignment),
+    // These remaining fields are essentially just an owned version of `link.C.AvBlock`.
+    code: []u8,
+    fwd_decl: []u8,
+    ctype_pool: CType.Pool,
+    lazy_fns: LazyFnMap,
+
+    pub fn deinit(mir: *Mir, gpa: Allocator) void {
+        mir.uavs.deinit(gpa);
+        gpa.free(mir.code);
+        gpa.free(mir.fwd_decl);
+        mir.ctype_pool.deinit(gpa);
+        mir.lazy_fns.deinit(gpa);
+    }
+};
+
 pub const CType = @import("c/Type.zig");
 
 pub const CValue = union(enum) {
@@ -671,7 +701,7 @@ pub const Object = struct {
 
 /// This data is available both when outputting .c code and when outputting an .h file.
 pub const DeclGen = struct {
-    gpa: mem.Allocator,
+    gpa: Allocator,
     pt: Zcu.PerThread,
     mod: *Module,
     pass: Pass,
@@ -682,10 +712,12 @@ pub const DeclGen = struct {
     error_msg: ?*Zcu.ErrorMsg,
     ctype_pool: CType.Pool,
     scratch: std.ArrayListUnmanaged(u32),
-    /// Keeps track of anonymous decls that need to be rendered before this
-    /// (named) Decl in the output C code.
-    uav_deps: std.AutoArrayHashMapUnmanaged(InternPool.Index, C.AvBlock),
-    aligned_uavs: std.AutoArrayHashMapUnmanaged(InternPool.Index, Alignment),
+    /// This map contains all the UAVs we saw generating this function.
+    /// `link.C` will merge them into its `uavs`/`aligned_uavs` fields.
+    /// Key is the value of the UAV; value is the UAV's alignment, or
+    /// `.none` for natural alignment. The specified alignment is never
+    /// less than the natural alignment.
+    uavs: std.AutoArrayHashMapUnmanaged(InternPool.Index, Alignment),
 
     pub const Pass = union(enum) {
         nav: InternPool.Nav.Index,
@@ -753,21 +785,17 @@ pub const DeclGen = struct {
         // Indicate that the anon decl should be rendered to the output so that
         // our reference above is not undefined.
         const ptr_type = ip.indexToKey(uav.orig_ty).ptr_type;
-        const gop = try dg.uav_deps.getOrPut(dg.gpa, uav.val);
-        if (!gop.found_existing) gop.value_ptr.* = .{};
-
-        // Only insert an alignment entry if the alignment is greater than ABI
-        // alignment. If there is already an entry, keep the greater alignment.
-        const explicit_alignment = ptr_type.flags.alignment;
-        if (explicit_alignment != .none) {
-            const abi_alignment = Type.fromInterned(ptr_type.child).abiAlignment(zcu);
-            if (explicit_alignment.order(abi_alignment).compare(.gt)) {
-                const aligned_gop = try dg.aligned_uavs.getOrPut(dg.gpa, uav.val);
-                aligned_gop.value_ptr.* = if (aligned_gop.found_existing)
-                    aligned_gop.value_ptr.maxStrict(explicit_alignment)
-                else
-                    explicit_alignment;
-            }
+        const gop = try dg.uavs.getOrPut(dg.gpa, uav.val);
+        if (!gop.found_existing) gop.value_ptr.* = .none;
+        // If there is an explicit alignment, greater than the current one, use it.
+        // Note that we intentionally start at `.none`, so `gop.value_ptr.*` is never
+        // underaligned, so we don't need to worry about the `.none` case here.
+        if (ptr_type.flags.alignment != .none) {
+            // Resolve the current alignment so we can choose the bigger one.
+            const cur_alignment: Alignment = if (gop.value_ptr.* == .none) abi: {
+                break :abi Type.fromInterned(ptr_type.child).abiAlignment(zcu);
+            } else gop.value_ptr.*;
+            gop.value_ptr.* = cur_alignment.maxStrict(ptr_type.flags.alignment);
         }
     }
 
@@ -2895,7 +2923,79 @@ pub fn genLazyFn(o: *Object, lazy_ctype_pool: *const CType.Pool, lazy_fn: LazyFn
     }
 }
 
-pub fn genFunc(f: *Function) !void {
+pub fn generate(
+    lf: *link.File,
+    pt: Zcu.PerThread,
+    src_loc: Zcu.LazySrcLoc,
+    func_index: InternPool.Index,
+    air: *const Air,
+    liveness: *const Air.Liveness,
+) @import("../codegen.zig").CodeGenError!Mir {
+    const zcu = pt.zcu;
+    const gpa = zcu.gpa;
+
+    _ = src_loc;
+    assert(lf.tag == .c);
+
+    const func = zcu.funcInfo(func_index);
+
+    var function: Function = .{
+        .value_map = .init(gpa),
+        .air = air.*,
+        .liveness = liveness.*,
+        .func_index = func_index,
+        .object = .{
+            .dg = .{
+                .gpa = gpa,
+                .pt = pt,
+                .mod = zcu.navFileScope(func.owner_nav).mod.?,
+                .error_msg = null,
+                .pass = .{ .nav = func.owner_nav },
+                .is_naked_fn = Type.fromInterned(func.ty).fnCallingConvention(zcu) == .naked,
+                .expected_block = null,
+                .fwd_decl = .init(gpa),
+                .ctype_pool = .empty,
+                .scratch = .empty,
+                .uavs = .empty,
+            },
+            .code = .init(gpa),
+            .indent_writer = undefined, // set later so we can get a pointer to object.code
+        },
+        .lazy_fns = .empty,
+    };
+    defer {
+        function.object.code.deinit();
+        function.object.dg.fwd_decl.deinit();
+        function.object.dg.ctype_pool.deinit(gpa);
+        function.object.dg.scratch.deinit(gpa);
+        function.object.dg.uavs.deinit(gpa);
+        function.deinit();
+    }
+    try function.object.dg.ctype_pool.init(gpa);
+    function.object.indent_writer = .{ .underlying_writer = function.object.code.writer() };
+
+    genFunc(&function) catch |err| switch (err) {
+        error.AnalysisFail => return zcu.codegenFailMsg(func.owner_nav, function.object.dg.error_msg.?),
+        error.OutOfMemory => |e| return e,
+    };
+
+    var mir: Mir = .{
+        .uavs = .empty,
+        .code = &.{},
+        .fwd_decl = &.{},
+        .ctype_pool = .empty,
+        .lazy_fns = .empty,
+    };
+    errdefer mir.deinit(gpa);
+    mir.uavs = function.object.dg.uavs.move();
+    mir.code = try function.object.code.toOwnedSlice();
+    mir.fwd_decl = try function.object.dg.fwd_decl.toOwnedSlice();
+    mir.ctype_pool = function.object.dg.ctype_pool.move();
+    mir.lazy_fns = function.lazy_fns.move();
+    return mir;
+}
+
+fn genFunc(f: *Function) !void {
     const tracy = trace(@src());
     defer tracy.end();
 
@@ -8482,7 +8582,7 @@ fn iterateBigTomb(f: *Function, inst: Air.Inst.Index) BigTomb {
 
 /// A naive clone of this map would create copies of the ArrayList which is
 /// stored in the values. This function additionally clones the values.
-fn cloneFreeLocalsMap(gpa: mem.Allocator, map: *LocalsMap) !LocalsMap {
+fn cloneFreeLocalsMap(gpa: Allocator, map: *LocalsMap) !LocalsMap {
     var cloned = try map.clone(gpa);
     const values = cloned.values();
     var i: usize = 0;
@@ -8499,7 +8599,7 @@ fn cloneFreeLocalsMap(gpa: mem.Allocator, map: *LocalsMap) !LocalsMap {
     return cloned;
 }
 
-fn deinitFreeLocalsMap(gpa: mem.Allocator, map: *LocalsMap) void {
+fn deinitFreeLocalsMap(gpa: Allocator, map: *LocalsMap) void {
     for (map.values()) |*value| {
         value.deinit(gpa);
     }
diff --git a/src/codegen/llvm.zig b/src/codegen/llvm.zig
index 37c13c7211..e30e8f70a3 100644
--- a/src/codegen/llvm.zig
+++ b/src/codegen/llvm.zig
@@ -1121,8 +1121,8 @@ pub const Object = struct {
         o: *Object,
         pt: Zcu.PerThread,
         func_index: InternPool.Index,
-        air: Air,
-        liveness: Air.Liveness,
+        air: *const Air,
+        liveness: *const Air.Liveness,
     ) !void {
         assert(std.meta.eql(pt, o.pt));
         const zcu = pt.zcu;
@@ -1479,8 +1479,8 @@ pub const Object = struct {
 
         var fg: FuncGen = .{
             .gpa = gpa,
-            .air = air,
-            .liveness = liveness,
+            .air = air.*,
+            .liveness = liveness.*,
             .ng = &ng,
             .wip = wip,
             .is_naked = fn_info.cc == .naked,
@@ -1506,10 +1506,9 @@ pub const Object = struct {
         deinit_wip = false;
 
         fg.genBody(air.getMainBody(), .poi) catch |err| switch (err) {
-            error.CodegenFail => {
-                try zcu.failed_codegen.put(gpa, func.owner_nav, ng.err_msg.?);
-                ng.err_msg = null;
-                return;
+            error.CodegenFail => switch (zcu.codegenFailMsg(func.owner_nav, ng.err_msg.?)) {
+                error.CodegenFail => return,
+                error.OutOfMemory => |e| return e,
             },
             else => |e| return e,
         };
@@ -1561,10 +1560,9 @@ pub const Object = struct {
             .err_msg = null,
         };
         ng.genDecl() catch |err| switch (err) {
-            error.CodegenFail => {
-                try pt.zcu.failed_codegen.put(pt.zcu.gpa, nav_index, ng.err_msg.?);
-                ng.err_msg = null;
-                return;
+            error.CodegenFail => switch (pt.zcu.codegenFailMsg(nav_index, ng.err_msg.?)) {
+                error.CodegenFail => return,
+                error.OutOfMemory => |e| return e,
             },
             else => |e| return e,
         };
diff --git a/src/codegen/spirv.zig b/src/codegen/spirv.zig
index f83c6979ff..e6c06d9f20 100644
--- a/src/codegen/spirv.zig
+++ b/src/codegen/spirv.zig
@@ -230,8 +230,9 @@ pub const Object = struct {
         defer nav_gen.deinit();
 
         nav_gen.genNav(do_codegen) catch |err| switch (err) {
-            error.CodegenFail => {
-                try zcu.failed_codegen.put(gpa, nav_index, nav_gen.error_msg.?);
+            error.CodegenFail => switch (zcu.codegenFailMsg(nav_index, nav_gen.error_msg.?)) {
+                error.CodegenFail => {},
+                error.OutOfMemory => |e| return e,
             },
             else => |other| {
                 // There might be an error that happened *after* self.error_msg
diff --git a/src/dev.zig b/src/dev.zig
index 1dc8264ebc..2438ae6df7 100644
--- a/src/dev.zig
+++ b/src/dev.zig
@@ -25,6 +25,9 @@ pub const Env = enum {
     /// - `zig build-* -fno-emit-bin`
     sema,
 
+    /// - `zig build-* -ofmt=c`
+    cbe,
+
     /// - sema
     /// - `zig build-* -fincremental -fno-llvm -fno-lld -target x86_64-linux --listen=-`
     @"x86_64-linux",
@@ -144,6 +147,12 @@ pub const Env = enum {
                 => true,
                 else => Env.ast_gen.supports(feature),
             },
+            .cbe => switch (feature) {
+                .c_backend,
+                .c_linker,
+                => true,
+                else => Env.sema.supports(feature),
+            },
             .@"x86_64-linux" => switch (feature) {
                 .build_command,
                 .stdio_listen,
diff --git a/src/libs/freebsd.zig b/src/libs/freebsd.zig
index 47fef32773..98d4a42f91 100644
--- a/src/libs/freebsd.zig
+++ b/src/libs/freebsd.zig
@@ -1004,7 +1004,7 @@ fn queueSharedObjects(comp: *Compilation, so_files: BuiltSharedObjects) void {
         }
     }
 
-    comp.queueLinkTasks(task_buffer[0..task_buffer_i]);
+    comp.queuePrelinkTasks(task_buffer[0..task_buffer_i]);
 }
 
 fn buildSharedLib(
diff --git a/src/libs/glibc.zig b/src/libs/glibc.zig
index cc781c5472..c1146d933d 100644
--- a/src/libs/glibc.zig
+++ b/src/libs/glibc.zig
@@ -1170,7 +1170,7 @@ fn queueSharedObjects(comp: *Compilation, so_files: BuiltSharedObjects) void {
         }
     }
 
-    comp.queueLinkTasks(task_buffer[0..task_buffer_i]);
+    comp.queuePrelinkTasks(task_buffer[0..task_buffer_i]);
 }
 
 fn buildSharedLib(
diff --git a/src/libs/libcxx.zig b/src/libs/libcxx.zig
index 17a7d3d29e..eb9f5df855 100644
--- a/src/libs/libcxx.zig
+++ b/src/libs/libcxx.zig
@@ -308,7 +308,7 @@ pub fn buildLibCxx(comp: *Compilation, prog_node: std.Progress.Node) BuildError!
     assert(comp.libcxx_static_lib == null);
     const crt_file = try sub_compilation.toCrtFile();
     comp.libcxx_static_lib = crt_file;
-    comp.queueLinkTaskMode(crt_file.full_object_path, &config);
+    comp.queuePrelinkTaskMode(crt_file.full_object_path, &config);
 }
 
 pub fn buildLibCxxAbi(comp: *Compilation, prog_node: std.Progress.Node) BuildError!void {
@@ -504,7 +504,7 @@ pub fn buildLibCxxAbi(comp: *Compilation, prog_node: std.Progress.Node) BuildErr
     assert(comp.libcxxabi_static_lib == null);
     const crt_file = try sub_compilation.toCrtFile();
     comp.libcxxabi_static_lib = crt_file;
-    comp.queueLinkTaskMode(crt_file.full_object_path, &config);
+    comp.queuePrelinkTaskMode(crt_file.full_object_path, &config);
 }
 
 pub fn addCxxArgs(
diff --git a/src/libs/libtsan.zig b/src/libs/libtsan.zig
index 8a5ffd2eab..0c59d85bc5 100644
--- a/src/libs/libtsan.zig
+++ b/src/libs/libtsan.zig
@@ -325,7 +325,7 @@ pub fn buildTsan(comp: *Compilation, prog_node: std.Progress.Node) BuildError!vo
     };
 
     const crt_file = try sub_compilation.toCrtFile();
-    comp.queueLinkTaskMode(crt_file.full_object_path, &config);
+    comp.queuePrelinkTaskMode(crt_file.full_object_path, &config);
     assert(comp.tsan_lib == null);
     comp.tsan_lib = crt_file;
 }
diff --git a/src/libs/libunwind.zig b/src/libs/libunwind.zig
index 945689ebab..ccea649c17 100644
--- a/src/libs/libunwind.zig
+++ b/src/libs/libunwind.zig
@@ -195,7 +195,7 @@ pub fn buildStaticLib(comp: *Compilation, prog_node: std.Progress.Node) BuildErr
     };
 
     const crt_file = try sub_compilation.toCrtFile();
-    comp.queueLinkTaskMode(crt_file.full_object_path, &config);
+    comp.queuePrelinkTaskMode(crt_file.full_object_path, &config);
     assert(comp.libunwind_static_lib == null);
     comp.libunwind_static_lib = crt_file;
 }
diff --git a/src/libs/musl.zig b/src/libs/musl.zig
index d208b09827..21aeee98b5 100644
--- a/src/libs/musl.zig
+++ b/src/libs/musl.zig
@@ -278,7 +278,7 @@ pub fn buildCrtFile(comp: *Compilation, in_crt_file: CrtFile, prog_node: std.Pro
             errdefer comp.gpa.free(basename);
 
             const crt_file = try sub_compilation.toCrtFile();
-            comp.queueLinkTaskMode(crt_file.full_object_path, &config);
+            comp.queuePrelinkTaskMode(crt_file.full_object_path, &config);
             {
                 comp.mutex.lock();
                 defer comp.mutex.unlock();
diff --git a/src/libs/netbsd.zig b/src/libs/netbsd.zig
index 718861bf5c..aab75cce49 100644
--- a/src/libs/netbsd.zig
+++ b/src/libs/netbsd.zig
@@ -669,7 +669,7 @@ fn queueSharedObjects(comp: *Compilation, so_files: BuiltSharedObjects) void {
         }
     }
 
-    comp.queueLinkTasks(task_buffer[0..task_buffer_i]);
+    comp.queuePrelinkTasks(task_buffer[0..task_buffer_i]);
 }
 
 fn buildSharedLib(
diff --git a/src/link.zig b/src/link.zig
index 4b4c3c611b..31fd0a4a4e 100644
--- a/src/link.zig
+++ b/src/link.zig
@@ -21,11 +21,11 @@ const Type = @import("Type.zig");
 const Value = @import("Value.zig");
 const Package = @import("Package.zig");
 const dev = @import("dev.zig");
-const ThreadSafeQueue = @import("ThreadSafeQueue.zig").ThreadSafeQueue;
 const target_util = @import("target.zig");
 const codegen = @import("codegen.zig");
 
 pub const LdScript = @import("link/LdScript.zig");
+pub const Queue = @import("link/Queue.zig");
 
 pub const Diags = struct {
     /// Stored here so that function definitions can distinguish between
@@ -741,21 +741,26 @@ pub const File = struct {
     }
 
     /// May be called before or after updateExports for any given Decl.
-    /// TODO: currently `pub` because `Zcu.PerThread` is calling this.
+    /// The active tag of `mir` is determined by the backend used for the module this function is in.
     /// Never called when LLVM is codegenning the ZCU.
-    pub fn updateFunc(
+    fn updateFunc(
         base: *File,
         pt: Zcu.PerThread,
         func_index: InternPool.Index,
-        air: Air,
-        liveness: Air.Liveness,
+        /// This is owned by the caller, but the callee is permitted to mutate it provided
+        /// that `mir.deinit` remains legal for the caller. For instance, the callee can
+        /// take ownership of an embedded slice and replace it with `&.{}` in `mir`.
+        mir: *codegen.AnyMir,
+        /// This may be `undefined`; only pass it to `emitFunction`.
+        /// This parameter will eventually be removed.
+        maybe_undef_air: *const Air,
     ) UpdateNavError!void {
         assert(base.comp.zcu.?.llvm_object == null);
         switch (base.tag) {
             .lld => unreachable,
             inline else => |tag| {
                 dev.check(tag.devFeature());
-                return @as(*tag.Type(), @fieldParentPtr("base", base)).updateFunc(pt, func_index, air, liveness);
+                return @as(*tag.Type(), @fieldParentPtr("base", base)).updateFunc(pt, func_index, mir, maybe_undef_air);
             },
         }
     }
@@ -1213,40 +1218,7 @@ pub const File = struct {
     pub const Dwarf = @import("link/Dwarf.zig");
 };
 
-/// Does all the tasks in the queue. Runs in exactly one separate thread
-/// from the rest of compilation. All tasks performed here are
-/// single-threaded with respect to one another.
-pub fn flushTaskQueue(tid: usize, comp: *Compilation) void {
-    const diags = &comp.link_diags;
-    // As soon as check() is called, another `flushTaskQueue` call could occur,
-    // so the safety lock must go after the check.
-    while (comp.link_task_queue.check()) |tasks| {
-        comp.link_task_queue_safety.lock();
-        defer comp.link_task_queue_safety.unlock();
-
-        if (comp.remaining_prelink_tasks > 0) {
-            comp.link_task_queue_postponed.ensureUnusedCapacity(comp.gpa, tasks.len) catch |err| switch (err) {
-                error.OutOfMemory => return diags.setAllocFailure(),
-            };
-        }
-
-        for (tasks) |task| doTask(comp, tid, task);
-
-        if (comp.remaining_prelink_tasks == 0) {
-            if (comp.bin_file) |base| if (!base.post_prelink) {
-                base.prelink(comp.work_queue_progress_node) catch |err| switch (err) {
-                    error.OutOfMemory => diags.setAllocFailure(),
-                    error.LinkFailure => continue,
-                };
-                base.post_prelink = true;
-                for (comp.link_task_queue_postponed.items) |task| doTask(comp, tid, task);
-                comp.link_task_queue_postponed.clearRetainingCapacity();
-            };
-        }
-    }
-}
-
-pub const Task = union(enum) {
+pub const PrelinkTask = union(enum) {
     /// Loads the objects, shared objects, and archives that are already
     /// known from the command line.
     load_explicitly_provided,
@@ -1264,31 +1236,70 @@ pub const Task = union(enum) {
     /// Tells the linker to load an input which could be an object file,
     /// archive, or shared library.
     load_input: Input,
-
+};
+pub const ZcuTask = union(enum) {
     /// Write the constant value for a Decl to the output file.
     link_nav: InternPool.Nav.Index,
     /// Write the machine code for a function to the output file.
-    link_func: CodegenFunc,
+    link_func: LinkFunc,
     link_type: InternPool.Index,
-
     update_line_number: InternPool.TrackedInst.Index,
-
-    pub const CodegenFunc = struct {
+    pub fn deinit(task: ZcuTask, zcu: *const Zcu) void {
+        switch (task) {
+            .link_nav,
+            .link_type,
+            .update_line_number,
+            => {},
+            .link_func => |link_func| {
+                switch (link_func.mir.status.load(.monotonic)) {
+                    .pending => unreachable, // cannot deinit until MIR done
+                    .failed => {}, // MIR not populated so doesn't need freeing
+                    .ready => link_func.mir.value.deinit(zcu),
+                }
+                zcu.gpa.destroy(link_func.mir);
+            },
+        }
+    }
+    pub const LinkFunc = struct {
         /// This will either be a non-generic `func_decl` or a `func_instance`.
         func: InternPool.Index,
-        /// This `Air` is owned by the `Job` and allocated with `gpa`.
-        /// It must be deinited when the job is processed.
-        air: Air,
+        /// This pointer is allocated into `gpa` and must be freed when the `ZcuTask` is processed.
+        /// The pointer is shared with the codegen worker, which will populate the MIR inside once
+        /// it has been generated. It's important that the `link_func` is queued at the same time as
+        /// the codegen job to ensure that the linker receives functions in a deterministic order,
+        /// allowing reproducible builds.
+        mir: *SharedMir,
+        /// This field exists only due to deficiencies in some codegen implementations; it should
+        /// be removed when the corresponding parameter of `CodeGen.emitFunction` can be removed.
+        /// This is `undefined` if `Zcu.Feature.separate_thread` is supported.
+        /// If this is defined, its memory is owned externally; do not `deinit` this `air`.
+        air: *const Air,
+
+        pub const SharedMir = struct {
+            /// This is initially `.pending`. When `value` is populated, the codegen thread will set
+            /// this to `.ready`, and alert the queue if needed. It could also end up `.failed`.
+            /// The action of storing a value (other than `.pending`) to this atomic transfers
+            /// ownership of memory assoicated with `value` to this `ZcuTask`.
+            status: std.atomic.Value(enum(u8) {
+                /// We are waiting on codegen to generate MIR (or die trying).
+                pending,
+                /// `value` is not populated and will not be populated. Just drop the task from the queue and move on.
+                failed,
+                /// `value` is populated with the MIR from the backend in use, which is not LLVM.
+                ready,
+            }),
+            /// This is `undefined` until `ready` is set to `true`. Once populated, this MIR belongs
+            /// to the `ZcuTask`, and must be `deinit`ed when it is processed. Allocated into `gpa`.
+            value: codegen.AnyMir,
+        };
     };
 };
 
-pub fn doTask(comp: *Compilation, tid: usize, task: Task) void {
+pub fn doPrelinkTask(comp: *Compilation, task: PrelinkTask) void {
     const diags = &comp.link_diags;
+    const base = comp.bin_file orelse return;
     switch (task) {
         .load_explicitly_provided => {
-            comp.remaining_prelink_tasks -= 1;
-            const base = comp.bin_file orelse return;
-
             const prog_node = comp.work_queue_progress_node.start("Parse Linker Inputs", comp.link_inputs.len);
             defer prog_node.end();
             for (comp.link_inputs) |input| {
@@ -1306,9 +1317,6 @@ pub fn doTask(comp: *Compilation, tid: usize, task: Task) void {
             }
         },
         .load_host_libc => {
-            comp.remaining_prelink_tasks -= 1;
-            const base = comp.bin_file orelse return;
-
             const prog_node = comp.work_queue_progress_node.start("Linker Parse Host libc", 0);
             defer prog_node.end();
 
@@ -1368,8 +1376,6 @@ pub fn doTask(comp: *Compilation, tid: usize, task: Task) void {
             }
         },
         .load_object => |path| {
-            comp.remaining_prelink_tasks -= 1;
-            const base = comp.bin_file orelse return;
             const prog_node = comp.work_queue_progress_node.start("Linker Parse Object", 0);
             defer prog_node.end();
             base.openLoadObject(path) catch |err| switch (err) {
@@ -1378,8 +1384,6 @@ pub fn doTask(comp: *Compilation, tid: usize, task: Task) void {
             };
         },
         .load_archive => |path| {
-            comp.remaining_prelink_tasks -= 1;
-            const base = comp.bin_file orelse return;
             const prog_node = comp.work_queue_progress_node.start("Linker Parse Archive", 0);
             defer prog_node.end();
             base.openLoadArchive(path, null) catch |err| switch (err) {
@@ -1388,8 +1392,6 @@ pub fn doTask(comp: *Compilation, tid: usize, task: Task) void {
             };
         },
         .load_dso => |path| {
-            comp.remaining_prelink_tasks -= 1;
-            const base = comp.bin_file orelse return;
             const prog_node = comp.work_queue_progress_node.start("Linker Parse Shared Library", 0);
             defer prog_node.end();
             base.openLoadDso(path, .{
@@ -1401,8 +1403,6 @@ pub fn doTask(comp: *Compilation, tid: usize, task: Task) void {
             };
         },
         .load_input => |input| {
-            comp.remaining_prelink_tasks -= 1;
-            const base = comp.bin_file orelse return;
             const prog_node = comp.work_queue_progress_node.start("Linker Parse Input", 0);
             defer prog_node.end();
             base.loadInput(input) catch |err| switch (err) {
@@ -1416,11 +1416,12 @@ pub fn doTask(comp: *Compilation, tid: usize, task: Task) void {
                 },
             };
         },
+    }
+}
+pub fn doZcuTask(comp: *Compilation, tid: usize, task: ZcuTask) void {
+    const diags = &comp.link_diags;
+    switch (task) {
         .link_nav => |nav_index| {
-            if (comp.remaining_prelink_tasks != 0) {
-                comp.link_task_queue_postponed.appendAssumeCapacity(task);
-                return;
-            }
             const zcu = comp.zcu.?;
             const pt: Zcu.PerThread = .activate(zcu, @enumFromInt(tid));
             defer pt.deactivate();
@@ -1431,39 +1432,43 @@ pub fn doTask(comp: *Compilation, tid: usize, task: Task) void {
             } else if (comp.bin_file) |lf| {
                 lf.updateNav(pt, nav_index) catch |err| switch (err) {
                     error.OutOfMemory => diags.setAllocFailure(),
-                    error.CodegenFail => assert(zcu.failed_codegen.contains(nav_index)),
+                    error.CodegenFail => zcu.assertCodegenFailed(nav_index),
                     error.Overflow, error.RelocationNotByteAligned => {
-                        zcu.failed_codegen.ensureUnusedCapacity(zcu.gpa, 1) catch return diags.setAllocFailure();
-                        const msg = Zcu.ErrorMsg.create(
-                            zcu.gpa,
-                            zcu.navSrcLoc(nav_index),
-                            "unable to codegen: {s}",
-                            .{@errorName(err)},
-                        ) catch return diags.setAllocFailure();
-                        zcu.failed_codegen.putAssumeCapacityNoClobber(nav_index, msg);
+                        switch (zcu.codegenFail(nav_index, "unable to codegen: {s}", .{@errorName(err)})) {
+                            error.CodegenFail => return,
+                            error.OutOfMemory => return diags.setAllocFailure(),
+                        }
                         // Not a retryable failure.
                     },
                 };
             }
         },
         .link_func => |func| {
-            if (comp.remaining_prelink_tasks != 0) {
-                comp.link_task_queue_postponed.appendAssumeCapacity(task);
-                return;
-            }
-            const pt: Zcu.PerThread = .activate(comp.zcu.?, @enumFromInt(tid));
+            const zcu = comp.zcu.?;
+            const nav = zcu.funcInfo(func.func).owner_nav;
+            const pt: Zcu.PerThread = .activate(zcu, @enumFromInt(tid));
             defer pt.deactivate();
-            var air = func.air;
-            defer air.deinit(comp.gpa);
-            pt.linkerUpdateFunc(func.func, &air) catch |err| switch (err) {
-                error.OutOfMemory => diags.setAllocFailure(),
-            };
+            assert(zcu.llvm_object == null); // LLVM codegen doesn't produce MIR
+            switch (func.mir.status.load(.monotonic)) {
+                .pending => unreachable,
+                .ready => {},
+                .failed => return,
+            }
+            const mir = &func.mir.value;
+            if (comp.bin_file) |lf| {
+                lf.updateFunc(pt, func.func, mir, func.air) catch |err| switch (err) {
+                    error.OutOfMemory => return diags.setAllocFailure(),
+                    error.CodegenFail => return zcu.assertCodegenFailed(nav),
+                    error.Overflow, error.RelocationNotByteAligned => {
+                        switch (zcu.codegenFail(nav, "unable to codegen: {s}", .{@errorName(err)})) {
+                            error.OutOfMemory => return diags.setAllocFailure(),
+                            error.CodegenFail => return,
+                        }
+                    },
+                };
+            }
         },
         .link_type => |ty| {
-            if (comp.remaining_prelink_tasks != 0) {
-                comp.link_task_queue_postponed.appendAssumeCapacity(task);
-                return;
-            }
             const zcu = comp.zcu.?;
             const pt: Zcu.PerThread = .activate(zcu, @enumFromInt(tid));
             defer pt.deactivate();
@@ -1477,10 +1482,6 @@ pub fn doTask(comp: *Compilation, tid: usize, task: Task) void {
             }
         },
         .update_line_number => |ti| {
-            if (comp.remaining_prelink_tasks != 0) {
-                comp.link_task_queue_postponed.appendAssumeCapacity(task);
-                return;
-            }
             const pt: Zcu.PerThread = .activate(comp.zcu.?, @enumFromInt(tid));
             defer pt.deactivate();
             if (pt.zcu.llvm_object == null) {
diff --git a/src/link/C.zig b/src/link/C.zig
index 34fc1d3775..417ebcdee6 100644
--- a/src/link/C.zig
+++ b/src/link/C.zig
@@ -18,6 +18,7 @@ const trace = @import("../tracy.zig").trace;
 const Type = @import("../Type.zig");
 const Value = @import("../Value.zig");
 const Air = @import("../Air.zig");
+const AnyMir = @import("../codegen.zig").AnyMir;
 
 pub const zig_h = "#include \"zig.h\"\n";
 
@@ -166,6 +167,9 @@ pub fn deinit(self: *C) void {
     self.uavs.deinit(gpa);
     self.aligned_uavs.deinit(gpa);
 
+    self.exported_navs.deinit(gpa);
+    self.exported_uavs.deinit(gpa);
+
     self.string_bytes.deinit(gpa);
     self.fwd_decl_buf.deinit(gpa);
     self.code_buf.deinit(gpa);
@@ -177,73 +181,28 @@ pub fn updateFunc(
     self: *C,
     pt: Zcu.PerThread,
     func_index: InternPool.Index,
-    air: Air,
-    liveness: Air.Liveness,
+    mir: *AnyMir,
+    /// This may be `undefined`; only pass it to `emitFunction`.
+    /// This parameter will eventually be removed.
+    maybe_undef_air: *const Air,
 ) link.File.UpdateNavError!void {
+    _ = maybe_undef_air; // It would be a bug to use this argument.
+
     const zcu = pt.zcu;
     const gpa = zcu.gpa;
     const func = zcu.funcInfo(func_index);
+
     const gop = try self.navs.getOrPut(gpa, func.owner_nav);
-    if (!gop.found_existing) gop.value_ptr.* = .{};
-    const ctype_pool = &gop.value_ptr.ctype_pool;
-    const lazy_fns = &gop.value_ptr.lazy_fns;
-    const fwd_decl = &self.fwd_decl_buf;
-    const code = &self.code_buf;
-    try ctype_pool.init(gpa);
-    ctype_pool.clearRetainingCapacity();
-    lazy_fns.clearRetainingCapacity();
-    fwd_decl.clearRetainingCapacity();
-    code.clearRetainingCapacity();
-
-    var function: codegen.Function = .{
-        .value_map = codegen.CValueMap.init(gpa),
-        .air = air,
-        .liveness = liveness,
-        .func_index = func_index,
-        .object = .{
-            .dg = .{
-                .gpa = gpa,
-                .pt = pt,
-                .mod = zcu.navFileScope(func.owner_nav).mod.?,
-                .error_msg = null,
-                .pass = .{ .nav = func.owner_nav },
-                .is_naked_fn = Type.fromInterned(func.ty).fnCallingConvention(zcu) == .naked,
-                .expected_block = null,
-                .fwd_decl = fwd_decl.toManaged(gpa),
-                .ctype_pool = ctype_pool.*,
-                .scratch = .{},
-                .uav_deps = self.uavs,
-                .aligned_uavs = self.aligned_uavs,
-            },
-            .code = code.toManaged(gpa),
-            .indent_writer = undefined, // set later so we can get a pointer to object.code
-        },
-        .lazy_fns = lazy_fns.*,
+    if (gop.found_existing) gop.value_ptr.deinit(gpa);
+    gop.value_ptr.* = .{
+        .code = .empty,
+        .fwd_decl = .empty,
+        .ctype_pool = mir.c.ctype_pool.move(),
+        .lazy_fns = mir.c.lazy_fns.move(),
     };
-    function.object.indent_writer = .{ .underlying_writer = function.object.code.writer() };
-    defer {
-        self.uavs = function.object.dg.uav_deps;
-        self.aligned_uavs = function.object.dg.aligned_uavs;
-        fwd_decl.* = function.object.dg.fwd_decl.moveToUnmanaged();
-        ctype_pool.* = function.object.dg.ctype_pool.move();
-        ctype_pool.freeUnusedCapacity(gpa);
-        function.object.dg.scratch.deinit(gpa);
-        lazy_fns.* = function.lazy_fns.move();
-        lazy_fns.shrinkAndFree(gpa, lazy_fns.count());
-        code.* = function.object.code.moveToUnmanaged();
-        function.deinit();
-    }
-
-    try zcu.failed_codegen.ensureUnusedCapacity(gpa, 1);
-    codegen.genFunc(&function) catch |err| switch (err) {
-        error.AnalysisFail => {
-            zcu.failed_codegen.putAssumeCapacityNoClobber(func.owner_nav, function.object.dg.error_msg.?);
-            return;
-        },
-        else => |e| return e,
-    };
-    gop.value_ptr.fwd_decl = try self.addString(function.object.dg.fwd_decl.items);
-    gop.value_ptr.code = try self.addString(function.object.code.items);
+    gop.value_ptr.code = try self.addString(mir.c.code);
+    gop.value_ptr.fwd_decl = try self.addString(mir.c.fwd_decl);
+    try self.addUavsFromCodegen(&mir.c.uavs);
 }
 
 fn updateUav(self: *C, pt: Zcu.PerThread, i: usize) !void {
@@ -267,16 +226,14 @@ fn updateUav(self: *C, pt: Zcu.PerThread, i: usize) !void {
             .fwd_decl = fwd_decl.toManaged(gpa),
             .ctype_pool = codegen.CType.Pool.empty,
             .scratch = .{},
-            .uav_deps = self.uavs,
-            .aligned_uavs = self.aligned_uavs,
+            .uavs = .empty,
         },
         .code = code.toManaged(gpa),
         .indent_writer = undefined, // set later so we can get a pointer to object.code
     };
     object.indent_writer = .{ .underlying_writer = object.code.writer() };
     defer {
-        self.uavs = object.dg.uav_deps;
-        self.aligned_uavs = object.dg.aligned_uavs;
+        object.dg.uavs.deinit(gpa);
         fwd_decl.* = object.dg.fwd_decl.moveToUnmanaged();
         object.dg.ctype_pool.deinit(object.dg.gpa);
         object.dg.scratch.deinit(gpa);
@@ -295,8 +252,10 @@ fn updateUav(self: *C, pt: Zcu.PerThread, i: usize) !void {
         else => |e| return e,
     };
 
+    try self.addUavsFromCodegen(&object.dg.uavs);
+
     object.dg.ctype_pool.freeUnusedCapacity(gpa);
-    object.dg.uav_deps.values()[i] = .{
+    self.uavs.values()[i] = .{
         .code = try self.addString(object.code.items),
         .fwd_decl = try self.addString(object.dg.fwd_decl.items),
         .ctype_pool = object.dg.ctype_pool.move(),
@@ -343,16 +302,14 @@ pub fn updateNav(self: *C, pt: Zcu.PerThread, nav_index: InternPool.Nav.Index) l
             .fwd_decl = fwd_decl.toManaged(gpa),
             .ctype_pool = ctype_pool.*,
             .scratch = .{},
-            .uav_deps = self.uavs,
-            .aligned_uavs = self.aligned_uavs,
+            .uavs = .empty,
         },
         .code = code.toManaged(gpa),
         .indent_writer = undefined, // set later so we can get a pointer to object.code
     };
     object.indent_writer = .{ .underlying_writer = object.code.writer() };
     defer {
-        self.uavs = object.dg.uav_deps;
-        self.aligned_uavs = object.dg.aligned_uavs;
+        object.dg.uavs.deinit(gpa);
         fwd_decl.* = object.dg.fwd_decl.moveToUnmanaged();
         ctype_pool.* = object.dg.ctype_pool.move();
         ctype_pool.freeUnusedCapacity(gpa);
@@ -360,16 +317,16 @@ pub fn updateNav(self: *C, pt: Zcu.PerThread, nav_index: InternPool.Nav.Index) l
         code.* = object.code.moveToUnmanaged();
     }
 
-    try zcu.failed_codegen.ensureUnusedCapacity(gpa, 1);
     codegen.genDecl(&object) catch |err| switch (err) {
-        error.AnalysisFail => {
-            zcu.failed_codegen.putAssumeCapacityNoClobber(nav_index, object.dg.error_msg.?);
-            return;
+        error.AnalysisFail => switch (zcu.codegenFailMsg(nav_index, object.dg.error_msg.?)) {
+            error.CodegenFail => return,
+            error.OutOfMemory => |e| return e,
         },
         else => |e| return e,
     };
     gop.value_ptr.code = try self.addString(object.code.items);
     gop.value_ptr.fwd_decl = try self.addString(object.dg.fwd_decl.items);
+    try self.addUavsFromCodegen(&object.dg.uavs);
 }
 
 pub fn updateLineNumber(self: *C, pt: Zcu.PerThread, ti_id: InternPool.TrackedInst.Index) !void {
@@ -671,16 +628,14 @@ fn flushErrDecls(self: *C, pt: Zcu.PerThread, ctype_pool: *codegen.CType.Pool) F
             .fwd_decl = fwd_decl.toManaged(gpa),
             .ctype_pool = ctype_pool.*,
             .scratch = .{},
-            .uav_deps = self.uavs,
-            .aligned_uavs = self.aligned_uavs,
+            .uavs = .empty,
         },
         .code = code.toManaged(gpa),
         .indent_writer = undefined, // set later so we can get a pointer to object.code
     };
     object.indent_writer = .{ .underlying_writer = object.code.writer() };
     defer {
-        self.uavs = object.dg.uav_deps;
-        self.aligned_uavs = object.dg.aligned_uavs;
+        object.dg.uavs.deinit(gpa);
         fwd_decl.* = object.dg.fwd_decl.moveToUnmanaged();
         ctype_pool.* = object.dg.ctype_pool.move();
         ctype_pool.freeUnusedCapacity(gpa);
@@ -692,6 +647,8 @@ fn flushErrDecls(self: *C, pt: Zcu.PerThread, ctype_pool: *codegen.CType.Pool) F
         error.AnalysisFail => unreachable,
         else => |e| return e,
     };
+
+    try self.addUavsFromCodegen(&object.dg.uavs);
 }
 
 fn flushLazyFn(
@@ -719,8 +676,7 @@ fn flushLazyFn(
             .fwd_decl = fwd_decl.toManaged(gpa),
             .ctype_pool = ctype_pool.*,
             .scratch = .{},
-            .uav_deps = .{},
-            .aligned_uavs = .{},
+            .uavs = .empty,
         },
         .code = code.toManaged(gpa),
         .indent_writer = undefined, // set later so we can get a pointer to object.code
@@ -729,8 +685,7 @@ fn flushLazyFn(
     defer {
         // If this assert trips just handle the anon_decl_deps the same as
         // `updateFunc()` does.
-        assert(object.dg.uav_deps.count() == 0);
-        assert(object.dg.aligned_uavs.count() == 0);
+        assert(object.dg.uavs.count() == 0);
         fwd_decl.* = object.dg.fwd_decl.moveToUnmanaged();
         ctype_pool.* = object.dg.ctype_pool.move();
         ctype_pool.freeUnusedCapacity(gpa);
@@ -866,12 +821,10 @@ pub fn updateExports(
         .fwd_decl = fwd_decl.toManaged(gpa),
         .ctype_pool = decl_block.ctype_pool,
         .scratch = .{},
-        .uav_deps = .{},
-        .aligned_uavs = .{},
+        .uavs = .empty,
     };
     defer {
-        assert(dg.uav_deps.count() == 0);
-        assert(dg.aligned_uavs.count() == 0);
+        assert(dg.uavs.count() == 0);
         fwd_decl.* = dg.fwd_decl.moveToUnmanaged();
         ctype_pool.* = dg.ctype_pool.move();
         ctype_pool.freeUnusedCapacity(gpa);
@@ -891,3 +844,21 @@ pub fn deleteExport(
         .uav => |uav| _ = self.exported_uavs.swapRemove(uav),
     }
 }
+
+fn addUavsFromCodegen(c: *C, uavs: *const std.AutoArrayHashMapUnmanaged(InternPool.Index, Alignment)) Allocator.Error!void {
+    const gpa = c.base.comp.gpa;
+    try c.uavs.ensureUnusedCapacity(gpa, uavs.count());
+    try c.aligned_uavs.ensureUnusedCapacity(gpa, uavs.count());
+    for (uavs.keys(), uavs.values()) |uav_val, uav_align| {
+        {
+            const gop = c.uavs.getOrPutAssumeCapacity(uav_val);
+            if (!gop.found_existing) gop.value_ptr.* = .{};
+        }
+        if (uav_align != .none) {
+            const gop = c.aligned_uavs.getOrPutAssumeCapacity(uav_val);
+            gop.value_ptr.* = if (gop.found_existing) max: {
+                break :max gop.value_ptr.*.maxStrict(uav_align);
+            } else uav_align;
+        }
+    }
+}
diff --git a/src/link/Coff.zig b/src/link/Coff.zig
index e7dcbcdf2a..9a040754ef 100644
--- a/src/link/Coff.zig
+++ b/src/link/Coff.zig
@@ -1079,7 +1079,7 @@ pub fn updateFunc(
     var code_buffer: std.ArrayListUnmanaged(u8) = .empty;
     defer code_buffer.deinit(gpa);
 
-    codegen.generateFunction(
+    try codegen.generateFunction(
         &coff.base,
         pt,
         zcu.navSrcLoc(nav_index),
@@ -1088,20 +1088,7 @@ pub fn updateFunc(
         liveness,
         &code_buffer,
         .none,
-    ) catch |err| switch (err) {
-        error.CodegenFail => return error.CodegenFail,
-        error.OutOfMemory => return error.OutOfMemory,
-        error.Overflow, error.RelocationNotByteAligned => |e| {
-            try zcu.failed_codegen.putNoClobber(gpa, nav_index, try Zcu.ErrorMsg.create(
-                gpa,
-                zcu.navSrcLoc(nav_index),
-                "unable to codegen: {s}",
-                .{@errorName(e)},
-            ));
-            try zcu.retryable_failures.append(zcu.gpa, AnalUnit.wrap(.{ .func = func_index }));
-            return error.CodegenFail;
-        },
-    };
+    );
 
     try coff.updateNavCode(pt, nav_index, code_buffer.items, .FUNCTION);
 
diff --git a/src/link/Elf.zig b/src/link/Elf.zig
index 1702ef200c..34e04ad557 100644
--- a/src/link/Elf.zig
+++ b/src/link/Elf.zig
@@ -1691,13 +1691,13 @@ pub fn updateFunc(
     self: *Elf,
     pt: Zcu.PerThread,
     func_index: InternPool.Index,
-    air: Air,
-    liveness: Air.Liveness,
+    mir: *const codegen.AnyMir,
+    maybe_undef_air: *const Air,
 ) link.File.UpdateNavError!void {
     if (build_options.skip_non_native and builtin.object_format != .elf) {
         @panic("Attempted to compile for object format that was disabled by build configuration");
     }
-    return self.zigObjectPtr().?.updateFunc(self, pt, func_index, air, liveness);
+    return self.zigObjectPtr().?.updateFunc(self, pt, func_index, mir, maybe_undef_air);
 }
 
 pub fn updateNav(
diff --git a/src/link/Elf/ZigObject.zig b/src/link/Elf/ZigObject.zig
index e377f3a9af..1a5ef4b408 100644
--- a/src/link/Elf/ZigObject.zig
+++ b/src/link/Elf/ZigObject.zig
@@ -1416,8 +1416,10 @@ pub fn updateFunc(
     elf_file: *Elf,
     pt: Zcu.PerThread,
     func_index: InternPool.Index,
-    air: Air,
-    liveness: Air.Liveness,
+    mir: *const codegen.AnyMir,
+    /// This may be `undefined`; only pass it to `emitFunction`.
+    /// This parameter will eventually be removed.
+    maybe_undef_air: *const Air,
 ) link.File.UpdateNavError!void {
     const tracy = trace(@src());
     defer tracy.end();
@@ -1438,15 +1440,15 @@ pub fn updateFunc(
     var debug_wip_nav = if (self.dwarf) |*dwarf| try dwarf.initWipNav(pt, func.owner_nav, sym_index) else null;
     defer if (debug_wip_nav) |*wip_nav| wip_nav.deinit();
 
-    try codegen.generateFunction(
+    try codegen.emitFunction(
         &elf_file.base,
         pt,
         zcu.navSrcLoc(func.owner_nav),
         func_index,
-        air,
-        liveness,
+        mir,
         &code_buffer,
         if (debug_wip_nav) |*dn| .{ .dwarf = dn } else .none,
+        maybe_undef_air,
     );
     const code = code_buffer.items;
 
diff --git a/src/link/Queue.zig b/src/link/Queue.zig
new file mode 100644
index 0000000000..c73a0e9684
--- /dev/null
+++ b/src/link/Queue.zig
@@ -0,0 +1,234 @@
+//! Stores and manages the queue of link tasks. Each task is either a `PrelinkTask` or a `ZcuTask`.
+//!
+//! There must be at most one link thread (the thread processing these tasks) active at a time. If
+//! `!comp.separateCodegenThreadOk()`, then ZCU tasks will be run on the main thread, bypassing this
+//! queue entirely.
+//!
+//! All prelink tasks must be processed before any ZCU tasks are processed. After all prelink tasks
+//! are run, but before any ZCU tasks are run, `prelink` must be called on the `link.File`.
+//!
+//! There will sometimes be a `ZcuTask` in the queue which is not yet ready because it depends on
+//! MIR which has not yet been generated by any codegen thread. In this case, we must pause
+//! processing of linker tasks until the MIR is ready. It would be incorrect to run any other link
+//! tasks first, since this would make builds unreproducible.
+
+mutex: std.Thread.Mutex,
+/// Validates that only one `flushTaskQueue` thread is running at a time.
+flush_safety: std.debug.SafetyLock,
+
+/// This is the number of prelink tasks which are expected but have not yet been enqueued.
+/// Guarded by `mutex`.
+pending_prelink_tasks: u32,
+
+/// Prelink tasks which have been enqueued and are not yet owned by the worker thread.
+/// Allocated into `gpa`, guarded by `mutex`.
+queued_prelink: std.ArrayListUnmanaged(PrelinkTask),
+/// The worker thread moves items from `queued_prelink` into this array in order to process them.
+/// Allocated into `gpa`, accessed only by the worker thread.
+wip_prelink: std.ArrayListUnmanaged(PrelinkTask),
+
+/// Like `queued_prelink`, but for ZCU tasks.
+/// Allocated into `gpa`, guarded by `mutex`.
+queued_zcu: std.ArrayListUnmanaged(ZcuTask),
+/// Like `wip_prelink`, but for ZCU tasks.
+/// Allocated into `gpa`, accessed only by the worker thread.
+wip_zcu: std.ArrayListUnmanaged(ZcuTask),
+
+/// When processing ZCU link tasks, we might have to block due to unpopulated MIR. When this
+/// happens, some tasks in `wip_zcu` have been run, and some are still pending. This is the
+/// index into `wip_zcu` which we have reached.
+wip_zcu_idx: usize,
+
+/// Guarded by `mutex`.
+state: union(enum) {
+    /// The link thread is currently running or queued to run.
+    running,
+    /// The link thread is not running or queued, because it has exhausted all immediately available
+    /// tasks. It should be spawned when more tasks are enqueued. If `pending_prelink_tasks` is not
+    /// zero, we are specifically waiting for prelink tasks.
+    finished,
+    /// The link thread is not running or queued, because it is waiting for this MIR to be populated.
+    /// Once codegen completes, it must call `mirReady` which will restart the link thread.
+    wait_for_mir: *ZcuTask.LinkFunc.SharedMir,
+},
+
+/// The initial `Queue` state, containing no tasks, expecting no prelink tasks, and with no running worker thread.
+/// The `pending_prelink_tasks` and `queued_prelink` fields may be modified as needed before calling `start`.
+pub const empty: Queue = .{
+    .mutex = .{},
+    .flush_safety = .{},
+    .pending_prelink_tasks = 0,
+    .queued_prelink = .empty,
+    .wip_prelink = .empty,
+    .queued_zcu = .empty,
+    .wip_zcu = .empty,
+    .wip_zcu_idx = 0,
+    .state = .finished,
+};
+/// `lf` is needed to correctly deinit any pending `ZcuTask`s.
+pub fn deinit(q: *Queue, comp: *Compilation) void {
+    const gpa = comp.gpa;
+    for (q.queued_zcu.items) |t| t.deinit(comp.zcu.?);
+    for (q.wip_zcu.items[q.wip_zcu_idx..]) |t| t.deinit(comp.zcu.?);
+    q.queued_prelink.deinit(gpa);
+    q.wip_prelink.deinit(gpa);
+    q.queued_zcu.deinit(gpa);
+    q.wip_zcu.deinit(gpa);
+}
+
+/// This is expected to be called exactly once, after which the caller must not directly access
+/// `queued_prelink` or `pending_prelink_tasks` any longer. This will spawn the link thread if
+/// necessary.
+pub fn start(q: *Queue, comp: *Compilation) void {
+    assert(q.state == .finished);
+    assert(q.queued_zcu.items.len == 0);
+    if (q.queued_prelink.items.len != 0) {
+        q.state = .running;
+        comp.thread_pool.spawnWgId(&comp.link_task_wait_group, flushTaskQueue, .{ q, comp });
+    }
+}
+
+/// Called by codegen workers after they have populated a `ZcuTask.LinkFunc.SharedMir`. If the link
+/// thread was waiting for this MIR, it can resume.
+pub fn mirReady(q: *Queue, comp: *Compilation, mir: *ZcuTask.LinkFunc.SharedMir) void {
+    // We would like to assert that `mir` is not pending, but that would race with a worker thread
+    // potentially freeing it.
+    {
+        q.mutex.lock();
+        defer q.mutex.unlock();
+        switch (q.state) {
+            .finished => unreachable, // there's definitely a task queued
+            .running => return,
+            .wait_for_mir => |wait_for| if (wait_for != mir) return,
+        }
+        // We were waiting for `mir`, so we will restart the linker thread.
+        q.state = .running;
+    }
+    assert(mir.status.load(.monotonic) != .pending);
+    comp.thread_pool.spawnWgId(&comp.link_task_wait_group, flushTaskQueue, .{ q, comp });
+}
+
+/// Enqueues all prelink tasks in `tasks`. Asserts that they were expected, i.e. that `tasks.len` is
+/// less than or equal to `q.pending_prelink_tasks`. Also asserts that `tasks.len` is not 0.
+pub fn enqueuePrelink(q: *Queue, comp: *Compilation, tasks: []const PrelinkTask) Allocator.Error!void {
+    {
+        q.mutex.lock();
+        defer q.mutex.unlock();
+        try q.queued_prelink.appendSlice(comp.gpa, tasks);
+        q.pending_prelink_tasks -= @intCast(tasks.len);
+        switch (q.state) {
+            .wait_for_mir => unreachable, // we've not started zcu tasks yet
+            .running => return,
+            .finished => {},
+        }
+        // Restart the linker thread, because it was waiting for a task
+        q.state = .running;
+    }
+    comp.thread_pool.spawnWgId(&comp.link_task_wait_group, flushTaskQueue, .{ q, comp });
+}
+
+pub fn enqueueZcu(q: *Queue, comp: *Compilation, task: ZcuTask) Allocator.Error!void {
+    assert(comp.separateCodegenThreadOk());
+    {
+        q.mutex.lock();
+        defer q.mutex.unlock();
+        try q.queued_zcu.append(comp.gpa, task);
+        switch (q.state) {
+            .running, .wait_for_mir => return,
+            .finished => if (q.pending_prelink_tasks != 0) return,
+        }
+        // Restart the linker thread, unless it would immediately be blocked
+        if (task == .link_func and task.link_func.mir.status.load(.monotonic) == .pending) {
+            q.state = .{ .wait_for_mir = task.link_func.mir };
+            return;
+        }
+        q.state = .running;
+    }
+    comp.thread_pool.spawnWgId(&comp.link_task_wait_group, flushTaskQueue, .{ q, comp });
+}
+
+fn flushTaskQueue(tid: usize, q: *Queue, comp: *Compilation) void {
+    q.flush_safety.lock();
+    defer q.flush_safety.unlock();
+
+    if (std.debug.runtime_safety) {
+        q.mutex.lock();
+        defer q.mutex.unlock();
+        assert(q.state == .running);
+    }
+    prelink: while (true) {
+        assert(q.wip_prelink.items.len == 0);
+        {
+            q.mutex.lock();
+            defer q.mutex.unlock();
+            std.mem.swap(std.ArrayListUnmanaged(PrelinkTask), &q.queued_prelink, &q.wip_prelink);
+            if (q.wip_prelink.items.len == 0) {
+                if (q.pending_prelink_tasks == 0) {
+                    break :prelink; // prelink is done
+                } else {
+                    // We're expecting more prelink tasks so can't move on to ZCU tasks.
+                    q.state = .finished;
+                    return;
+                }
+            }
+        }
+        for (q.wip_prelink.items) |task| {
+            link.doPrelinkTask(comp, task);
+        }
+        q.wip_prelink.clearRetainingCapacity();
+    }
+
+    // We've finished the prelink tasks, so run prelink if necessary.
+    if (comp.bin_file) |lf| {
+        if (!lf.post_prelink) {
+            if (lf.prelink(comp.work_queue_progress_node)) |_| {
+                lf.post_prelink = true;
+            } else |err| switch (err) {
+                error.OutOfMemory => comp.link_diags.setAllocFailure(),
+                error.LinkFailure => {},
+            }
+        }
+    }
+
+    // Now we can run ZCU tasks.
+    while (true) {
+        if (q.wip_zcu.items.len == q.wip_zcu_idx) {
+            q.wip_zcu.clearRetainingCapacity();
+            q.wip_zcu_idx = 0;
+            q.mutex.lock();
+            defer q.mutex.unlock();
+            std.mem.swap(std.ArrayListUnmanaged(ZcuTask), &q.queued_zcu, &q.wip_zcu);
+            if (q.wip_zcu.items.len == 0) {
+                // We've exhausted all available tasks.
+                q.state = .finished;
+                return;
+            }
+        }
+        const task = q.wip_zcu.items[q.wip_zcu_idx];
+        // If the task is a `link_func`, we might have to stop until its MIR is populated.
+        pending: {
+            if (task != .link_func) break :pending;
+            const status_ptr = &task.link_func.mir.status;
+            // First check without the mutex to optimize for the common case where MIR is ready.
+            if (status_ptr.load(.monotonic) != .pending) break :pending;
+            q.mutex.lock();
+            defer q.mutex.unlock();
+            if (status_ptr.load(.monotonic) != .pending) break :pending;
+            // We will stop for now, and get restarted once this MIR is ready.
+            q.state = .{ .wait_for_mir = task.link_func.mir };
+            return;
+        }
+        link.doZcuTask(comp, tid, task);
+        task.deinit(comp.zcu.?);
+        q.wip_zcu_idx += 1;
+    }
+}
+
+const std = @import("std");
+const assert = std.debug.assert;
+const Allocator = std.mem.Allocator;
+const Compilation = @import("../Compilation.zig");
+const link = @import("../link.zig");
+const PrelinkTask = link.PrelinkTask;
+const ZcuTask = link.ZcuTask;
+const Queue = @This();
diff --git a/src/target.zig b/src/target.zig
index 6172b5e7e9..01c6a6cbf0 100644
--- a/src/target.zig
+++ b/src/target.zig
@@ -850,7 +850,9 @@ pub inline fn backendSupportsFeature(backend: std.builtin.CompilerBackend, compt
         },
         .separate_thread => switch (backend) {
             .stage2_llvm => false,
-            else => true,
+            // MLUGG TODO
+            .stage2_c => true,
+            else => false,
         },
     };
 }