zig/src/link/Queue.zig
Jacob Young f58200e3f2 Elf2: create a new linker from scratch
This iteration already has significantly better incremental support.

Closes #24110
2025-09-21 14:09:14 -07:00

345 lines
15 KiB
Zig

//! Stores and manages the queue of link tasks. Each task is either a `PrelinkTask` or a `ZcuTask`.
//!
//! There must be at most one link thread (the thread processing these tasks) active at a time. If
//! `!comp.separateCodegenThreadOk()`, then ZCU tasks will be run on the main thread, bypassing this
//! queue entirely.
//!
//! All prelink tasks must be processed before any ZCU tasks are processed. After all prelink tasks
//! are run, but before any ZCU tasks are run, `prelink` must be called on the `link.File`.
//!
//! There will sometimes be a `ZcuTask` in the queue which is not yet ready because it depends on
//! MIR which has not yet been generated by any codegen thread. In this case, we must pause
//! processing of linker tasks until the MIR is ready. It would be incorrect to run any other link
//! tasks first, since this would make builds unreproducible.
mutex: std.Thread.Mutex,
/// Validates that only one `flushTaskQueue` thread is running at a time.
flush_safety: std.debug.SafetyLock,
/// This value is positive while there are still prelink tasks yet to be queued. Once they are
/// all queued, this value becomes 0, and ZCU tasks can be run. Guarded by `mutex`.
prelink_wait_count: u32,
/// Prelink tasks which have been enqueued and are not yet owned by the worker thread.
/// Allocated into `gpa`, guarded by `mutex`.
queued_prelink: std.ArrayList(PrelinkTask),
/// The worker thread moves items from `queued_prelink` into this array in order to process them.
/// Allocated into `gpa`, accessed only by the worker thread.
wip_prelink: std.ArrayList(PrelinkTask),
/// Like `queued_prelink`, but for ZCU tasks.
/// Allocated into `gpa`, guarded by `mutex`.
queued_zcu: std.ArrayList(ZcuTask),
/// Like `wip_prelink`, but for ZCU tasks.
/// Allocated into `gpa`, accessed only by the worker thread.
wip_zcu: std.ArrayList(ZcuTask),
/// When processing ZCU link tasks, we might have to block due to unpopulated MIR. When this
/// happens, some tasks in `wip_zcu` have been run, and some are still pending. This is the
/// index into `wip_zcu` which we have reached.
wip_zcu_idx: usize,
/// The sum of all `air_bytes` for all currently-queued `ZcuTask.link_func` tasks. Because
/// MIR bytes are approximately proportional to AIR bytes, this acts to limit the amount of
/// AIR and MIR which is queued for codegen and link respectively, to prevent excessive
/// memory usage if analysis produces AIR faster than it can be processed by codegen/link.
/// The cap is `max_air_bytes_in_flight`.
/// Guarded by `mutex`.
air_bytes_in_flight: u32,
/// If nonzero, then a call to `enqueueZcu` is blocked waiting to add a `link_func` task, but
/// cannot until `air_bytes_in_flight` is no greater than this value.
/// Guarded by `mutex`.
air_bytes_waiting: u32,
/// After setting `air_bytes_waiting`, `enqueueZcu` will wait on this condition (with `mutex`).
/// When `air_bytes_waiting` many bytes can be queued, this condition should be signaled.
air_bytes_cond: std.Thread.Condition,
/// Guarded by `mutex`.
state: union(enum) {
/// The link thread is currently running or queued to run.
running,
/// The link thread is not running or queued, because it has exhausted all immediately available
/// tasks. It should be spawned when more tasks are enqueued. If `prelink_wait_count` is not
/// zero, we are specifically waiting for prelink tasks.
finished,
/// The link thread is not running or queued, because it is waiting for this MIR to be populated.
/// Once codegen completes, it must call `mirReady` which will restart the link thread.
wait_for_mir: InternPool.Index,
},
/// In the worst observed case, MIR is around 50 times as large as AIR. More typically, the ratio is
/// around 20. Going by that 50x multiplier, and assuming we want to consume no more than 500 MiB of
/// memory on AIR/MIR, we see a limit of around 10 MiB of AIR in-flight.
const max_air_bytes_in_flight = 10 * 1024 * 1024;
/// The initial `Queue` state, containing no tasks, expecting no prelink tasks, and with no running worker thread.
/// The `queued_prelink` field may be appended to before calling `start`.
pub const empty: Queue = .{
.mutex = .{},
.flush_safety = .{},
.prelink_wait_count = undefined, // set in `start`
.queued_prelink = .empty,
.wip_prelink = .empty,
.queued_zcu = .empty,
.wip_zcu = .empty,
.wip_zcu_idx = 0,
.state = .finished,
.air_bytes_in_flight = 0,
.air_bytes_waiting = 0,
.air_bytes_cond = .{},
};
/// `lf` is needed to correctly deinit any pending `ZcuTask`s.
pub fn deinit(q: *Queue, comp: *Compilation) void {
const gpa = comp.gpa;
for (q.queued_zcu.items) |t| t.deinit(comp.zcu.?);
for (q.wip_zcu.items[q.wip_zcu_idx..]) |t| t.deinit(comp.zcu.?);
q.queued_prelink.deinit(gpa);
q.wip_prelink.deinit(gpa);
q.queued_zcu.deinit(gpa);
q.wip_zcu.deinit(gpa);
}
/// This is expected to be called exactly once, after which the caller must not directly access
/// `queued_prelink` any longer. This will spawn the link thread if necessary.
pub fn start(q: *Queue, comp: *Compilation) void {
assert(q.state == .finished);
assert(q.queued_zcu.items.len == 0);
// Reset this to 1. We can't init it to 1 in `empty`, because it would fall to 0 on successive
// incremental updates, but we still need the initial 1.
q.prelink_wait_count = 1;
if (q.queued_prelink.items.len != 0) {
q.state = .running;
comp.thread_pool.spawnWgId(&comp.link_task_wait_group, flushTaskQueue, .{ q, comp });
}
}
/// Every call to this must be paired with a call to `finishPrelinkItem`.
pub fn startPrelinkItem(q: *Queue) void {
q.mutex.lock();
defer q.mutex.unlock();
assert(q.prelink_wait_count > 0); // must not have finished everything already
q.prelink_wait_count += 1;
}
/// This function must be called exactly one more time than `startPrelinkItem` is. The final call
/// indicates that we have finished calling `startPrelinkItem`, so once all pending items finish,
/// we are ready to move on to ZCU tasks.
pub fn finishPrelinkItem(q: *Queue, comp: *Compilation) void {
{
q.mutex.lock();
defer q.mutex.unlock();
q.prelink_wait_count -= 1;
if (q.prelink_wait_count != 0) return;
// The prelink task count dropped to 0; restart the linker thread if necessary.
switch (q.state) {
.wait_for_mir => unreachable, // we've not started zcu tasks yet
.running => return,
.finished => {},
}
assert(q.queued_prelink.items.len == 0);
// Even if there are no ZCU tasks, we must restart the linker thread to make sure
// that `link.File.prelink()` is called.
q.state = .running;
}
comp.thread_pool.spawnWgId(&comp.link_task_wait_group, flushTaskQueue, .{ q, comp });
}
/// Called by codegen workers after they have populated a `ZcuTask.LinkFunc.SharedMir`. If the link
/// thread was waiting for this MIR, it can resume.
pub fn mirReady(q: *Queue, comp: *Compilation, func_index: InternPool.Index, mir: *ZcuTask.LinkFunc.SharedMir) void {
// We would like to assert that `mir` is not pending, but that would race with a worker thread
// potentially freeing it.
{
q.mutex.lock();
defer q.mutex.unlock();
switch (q.state) {
.finished, .running => return,
.wait_for_mir => |wait_for| if (wait_for != func_index) return,
}
// We were waiting for `mir`, so we will restart the linker thread.
q.state = .running;
}
assert(mir.status.load(.acquire) != .pending);
comp.thread_pool.spawnWgId(&comp.link_task_wait_group, flushTaskQueue, .{ q, comp });
}
/// Enqueues all prelink tasks in `tasks`. Asserts that they were expected, i.e. that
/// `prelink_wait_count` is not yet 0. Also asserts that `tasks.len` is not 0.
pub fn enqueuePrelink(q: *Queue, comp: *Compilation, tasks: []const PrelinkTask) Allocator.Error!void {
{
q.mutex.lock();
defer q.mutex.unlock();
assert(q.prelink_wait_count > 0);
try q.queued_prelink.appendSlice(comp.gpa, tasks);
switch (q.state) {
.wait_for_mir => unreachable, // we've not started zcu tasks yet
.running => return,
.finished => {},
}
// Restart the linker thread, because it was waiting for a task
q.state = .running;
}
comp.thread_pool.spawnWgId(&comp.link_task_wait_group, flushTaskQueue, .{ q, comp });
}
pub fn enqueueZcu(q: *Queue, comp: *Compilation, task: ZcuTask) Allocator.Error!void {
assert(comp.separateCodegenThreadOk());
{
q.mutex.lock();
defer q.mutex.unlock();
// If this is a `link_func` task, we might need to wait for `air_bytes_in_flight` to fall.
if (task == .link_func) {
const max_in_flight = max_air_bytes_in_flight -| task.link_func.air_bytes;
while (q.air_bytes_in_flight > max_in_flight) {
q.air_bytes_waiting = task.link_func.air_bytes;
q.air_bytes_cond.wait(&q.mutex);
q.air_bytes_waiting = 0;
}
q.air_bytes_in_flight += task.link_func.air_bytes;
}
try q.queued_zcu.append(comp.gpa, task);
switch (q.state) {
.running, .wait_for_mir => return,
.finished => if (q.prelink_wait_count > 0) return,
}
// Restart the linker thread, unless it would immediately be blocked
if (task == .link_func and task.link_func.mir.status.load(.acquire) == .pending) {
q.state = .{ .wait_for_mir = task.link_func.func };
return;
}
q.state = .running;
}
comp.thread_pool.spawnWgId(&comp.link_task_wait_group, flushTaskQueue, .{ q, comp });
}
fn flushTaskQueue(tid: usize, q: *Queue, comp: *Compilation) void {
q.flush_safety.lock(); // every `return` site should unlock this before unlocking `q.mutex`
if (std.debug.runtime_safety) {
q.mutex.lock();
defer q.mutex.unlock();
assert(q.state == .running);
}
var have_idle_tasks = true;
prelink: while (true) {
assert(q.wip_prelink.items.len == 0);
swap_queues: while (true) {
{
q.mutex.lock();
defer q.mutex.unlock();
std.mem.swap(std.ArrayList(PrelinkTask), &q.queued_prelink, &q.wip_prelink);
if (q.wip_prelink.items.len > 0) break :swap_queues;
if (q.prelink_wait_count == 0) break :prelink; // prelink is done
if (!have_idle_tasks) {
// We're expecting more prelink tasks so can't move on to ZCU tasks.
q.state = .finished;
q.flush_safety.unlock();
return;
}
}
have_idle_tasks = link.doIdleTask(comp, tid) catch |err| switch (err) {
error.OutOfMemory => have_idle_tasks: {
comp.link_diags.setAllocFailure();
break :have_idle_tasks false;
},
error.LinkFailure => false,
};
}
for (q.wip_prelink.items) |task| {
link.doPrelinkTask(comp, task);
}
have_idle_tasks = true;
q.wip_prelink.clearRetainingCapacity();
}
// We've finished the prelink tasks, so run prelink if necessary.
if (comp.bin_file) |lf| {
if (!lf.post_prelink) {
if (lf.prelink()) |_| {
lf.post_prelink = true;
} else |err| switch (err) {
error.OutOfMemory => comp.link_diags.setAllocFailure(),
error.LinkFailure => {},
}
}
}
// Now we can run ZCU tasks.
while (true) {
if (q.wip_zcu.items.len == q.wip_zcu_idx) swap_queues: {
q.wip_zcu.clearRetainingCapacity();
q.wip_zcu_idx = 0;
while (true) {
{
q.mutex.lock();
defer q.mutex.unlock();
std.mem.swap(std.ArrayList(ZcuTask), &q.queued_zcu, &q.wip_zcu);
if (q.wip_zcu.items.len > 0) break :swap_queues;
if (!have_idle_tasks) {
// We've exhausted all available tasks.
q.state = .finished;
q.flush_safety.unlock();
return;
}
}
have_idle_tasks = link.doIdleTask(comp, tid) catch |err| switch (err) {
error.OutOfMemory => have_idle_tasks: {
comp.link_diags.setAllocFailure();
break :have_idle_tasks false;
},
error.LinkFailure => false,
};
}
}
const task = q.wip_zcu.items[q.wip_zcu_idx];
// If the task is a `link_func`, we might have to stop until its MIR is populated.
pending: {
if (task != .link_func) break :pending;
const status_ptr = &task.link_func.mir.status;
while (true) {
// First check without the mutex to optimize for the common case where MIR is ready.
if (status_ptr.load(.acquire) != .pending) break :pending;
if (have_idle_tasks) have_idle_tasks = link.doIdleTask(comp, tid) catch |err| switch (err) {
error.OutOfMemory => have_idle_tasks: {
comp.link_diags.setAllocFailure();
break :have_idle_tasks false;
},
error.LinkFailure => false,
};
if (!have_idle_tasks) break;
}
q.mutex.lock();
defer q.mutex.unlock();
if (status_ptr.load(.acquire) != .pending) break :pending;
// We will stop for now, and get restarted once this MIR is ready.
q.state = .{ .wait_for_mir = task.link_func.func };
q.flush_safety.unlock();
return;
}
link.doZcuTask(comp, tid, task);
task.deinit(comp.zcu.?);
if (task == .link_func) {
// Decrease `air_bytes_in_flight`, since we've finished processing this MIR.
q.mutex.lock();
defer q.mutex.unlock();
q.air_bytes_in_flight -= task.link_func.air_bytes;
if (q.air_bytes_waiting != 0 and
q.air_bytes_in_flight <= max_air_bytes_in_flight -| q.air_bytes_waiting)
{
q.air_bytes_cond.signal();
}
}
q.wip_zcu_idx += 1;
have_idle_tasks = true;
}
}
const std = @import("std");
const assert = std.debug.assert;
const Allocator = std.mem.Allocator;
const Compilation = @import("../Compilation.zig");
const InternPool = @import("../InternPool.zig");
const link = @import("../link.zig");
const PrelinkTask = link.PrelinkTask;
const ZcuTask = link.ZcuTask;
const Queue = @This();