mirror of
https://github.com/ziglang/zig.git
synced 2025-12-06 06:13:07 +00:00
Io.Threaded PoC reimplementation
This is a reimplementation of Io.Threaded that fixes the issues highlighted in the recent Zulip discussion. It's poorly tested but it does successfully run to completion the litmust test example that I offered in the discussion. This implementation has the following key design decisions: - `t.cpu_count` is used as the threadpool size. - `t.concurrency_limit` is used as the maximum number of "burst, one-shot" threads that can be spawned by `io.concurrent` past `t.cpu_count`. - `t.available_thread_count` is the number of threads in the pool that is not currently busy with work (the bookkeeping happens in the worker function). - `t.one_shot_thread_count` is the number of active threads that were spawned by `io.concurrent` past `t.cpu_count`. In this implementation: - `io.async` first tries to decrement `t.available_thread_count`. If there are no threads available, it tries to spawn a new one if possible, otherwise it runs the task immediately. - `io.concurrent` first tries to use a thread in the pool same as `io.async`, but on failure (no available threads and pool size limit reached) it tries to spawn a new one-shot thread. One shot threads run a different main function that just executes one task, decrements the number of active one shot threads, and then exits. A relevant future improvement is to have one-shot threads stay on for a few seconds (and potentially pick up a new task) to amortize spawning costs.
This commit is contained in:
parent
d54fbc0123
commit
8eaebf5939
@ -24,8 +24,10 @@ run_queue: std.SinglyLinkedList = .{},
|
|||||||
join_requested: bool = false,
|
join_requested: bool = false,
|
||||||
threads: std.ArrayList(std.Thread),
|
threads: std.ArrayList(std.Thread),
|
||||||
stack_size: usize,
|
stack_size: usize,
|
||||||
cpu_count: std.Thread.CpuCountError!usize,
|
cpu_count: usize, // 0 means no limit
|
||||||
concurrent_count: usize,
|
concurrency_limit: usize, // 0 means no limit
|
||||||
|
available_thread_count: usize = 0,
|
||||||
|
one_shot_thread_count: usize = 0,
|
||||||
|
|
||||||
wsa: if (is_windows) Wsa else struct {} = .{},
|
wsa: if (is_windows) Wsa else struct {} = .{},
|
||||||
|
|
||||||
@ -70,8 +72,6 @@ const Closure = struct {
|
|||||||
start: Start,
|
start: Start,
|
||||||
node: std.SinglyLinkedList.Node = .{},
|
node: std.SinglyLinkedList.Node = .{},
|
||||||
cancel_tid: CancelId,
|
cancel_tid: CancelId,
|
||||||
/// Whether this task bumps minimum number of threads in the pool.
|
|
||||||
is_concurrent: bool,
|
|
||||||
|
|
||||||
const Start = *const fn (*Closure) void;
|
const Start = *const fn (*Closure) void;
|
||||||
|
|
||||||
@ -103,20 +103,20 @@ pub fn init(
|
|||||||
/// here.
|
/// here.
|
||||||
gpa: Allocator,
|
gpa: Allocator,
|
||||||
) Threaded {
|
) Threaded {
|
||||||
|
assert(!builtin.single_threaded); // use 'init_single_threaded' instead
|
||||||
|
|
||||||
var t: Threaded = .{
|
var t: Threaded = .{
|
||||||
.allocator = gpa,
|
.allocator = gpa,
|
||||||
.threads = .empty,
|
.threads = .empty,
|
||||||
.stack_size = std.Thread.SpawnConfig.default_stack_size,
|
.stack_size = std.Thread.SpawnConfig.default_stack_size,
|
||||||
.cpu_count = std.Thread.getCpuCount(),
|
.cpu_count = std.Thread.getCpuCount() catch 0,
|
||||||
.concurrent_count = 0,
|
.concurrency_limit = 0,
|
||||||
.old_sig_io = undefined,
|
.old_sig_io = undefined,
|
||||||
.old_sig_pipe = undefined,
|
.old_sig_pipe = undefined,
|
||||||
.have_signal_handler = false,
|
.have_signal_handler = false,
|
||||||
};
|
};
|
||||||
|
|
||||||
if (t.cpu_count) |n| {
|
t.threads.ensureTotalCapacity(gpa, t.cpu_count) catch {};
|
||||||
t.threads.ensureTotalCapacityPrecise(gpa, n - 1) catch {};
|
|
||||||
} else |_| {}
|
|
||||||
|
|
||||||
if (posix.Sigaction != void) {
|
if (posix.Sigaction != void) {
|
||||||
// This causes sending `posix.SIG.IO` to thread to interrupt blocking
|
// This causes sending `posix.SIG.IO` to thread to interrupt blocking
|
||||||
@ -145,7 +145,7 @@ pub const init_single_threaded: Threaded = .{
|
|||||||
.threads = .empty,
|
.threads = .empty,
|
||||||
.stack_size = std.Thread.SpawnConfig.default_stack_size,
|
.stack_size = std.Thread.SpawnConfig.default_stack_size,
|
||||||
.cpu_count = 1,
|
.cpu_count = 1,
|
||||||
.concurrent_count = 0,
|
.concurrency_limit = 0,
|
||||||
.old_sig_io = undefined,
|
.old_sig_io = undefined,
|
||||||
.old_sig_pipe = undefined,
|
.old_sig_pipe = undefined,
|
||||||
.have_signal_handler = false,
|
.have_signal_handler = false,
|
||||||
@ -184,18 +184,22 @@ fn worker(t: *Threaded) void {
|
|||||||
while (t.run_queue.popFirst()) |closure_node| {
|
while (t.run_queue.popFirst()) |closure_node| {
|
||||||
t.mutex.unlock();
|
t.mutex.unlock();
|
||||||
const closure: *Closure = @fieldParentPtr("node", closure_node);
|
const closure: *Closure = @fieldParentPtr("node", closure_node);
|
||||||
const is_concurrent = closure.is_concurrent;
|
|
||||||
closure.start(closure);
|
closure.start(closure);
|
||||||
t.mutex.lock();
|
t.mutex.lock();
|
||||||
if (is_concurrent) {
|
t.available_thread_count += 1;
|
||||||
t.concurrent_count -= 1;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
if (t.join_requested) break;
|
if (t.join_requested) break;
|
||||||
t.cond.wait(&t.mutex);
|
t.cond.wait(&t.mutex);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn oneShotWorker(t: *Threaded, closure: *Closure) void {
|
||||||
|
closure.start(closure);
|
||||||
|
t.mutex.lock();
|
||||||
|
defer t.mutex.unlock();
|
||||||
|
t.one_shot_thread_count -= 1;
|
||||||
|
}
|
||||||
|
|
||||||
pub fn io(t: *Threaded) Io {
|
pub fn io(t: *Threaded) Io {
|
||||||
return .{
|
return .{
|
||||||
.userdata = t,
|
.userdata = t,
|
||||||
@ -432,7 +436,6 @@ const AsyncClosure = struct {
|
|||||||
|
|
||||||
fn init(
|
fn init(
|
||||||
gpa: Allocator,
|
gpa: Allocator,
|
||||||
mode: enum { async, concurrent },
|
|
||||||
result_len: usize,
|
result_len: usize,
|
||||||
result_alignment: std.mem.Alignment,
|
result_alignment: std.mem.Alignment,
|
||||||
context: []const u8,
|
context: []const u8,
|
||||||
@ -454,10 +457,6 @@ const AsyncClosure = struct {
|
|||||||
.closure = .{
|
.closure = .{
|
||||||
.cancel_tid = .none,
|
.cancel_tid = .none,
|
||||||
.start = start,
|
.start = start,
|
||||||
.is_concurrent = switch (mode) {
|
|
||||||
.async => false,
|
|
||||||
.concurrent => true,
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
.func = func,
|
.func = func,
|
||||||
.context_alignment = context_alignment,
|
.context_alignment = context_alignment,
|
||||||
@ -490,55 +489,51 @@ fn async(
|
|||||||
context_alignment: std.mem.Alignment,
|
context_alignment: std.mem.Alignment,
|
||||||
start: *const fn (context: *const anyopaque, result: *anyopaque) void,
|
start: *const fn (context: *const anyopaque, result: *anyopaque) void,
|
||||||
) ?*Io.AnyFuture {
|
) ?*Io.AnyFuture {
|
||||||
if (builtin.single_threaded) {
|
const t: *Threaded = @ptrCast(@alignCast(userdata));
|
||||||
|
if (t.cpu_count == 1) {
|
||||||
start(context.ptr, result.ptr);
|
start(context.ptr, result.ptr);
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
const t: *Threaded = @ptrCast(@alignCast(userdata));
|
|
||||||
const cpu_count = t.cpu_count catch {
|
|
||||||
return concurrent(userdata, result.len, result_alignment, context, context_alignment, start) catch {
|
|
||||||
start(context.ptr, result.ptr);
|
|
||||||
return null;
|
|
||||||
};
|
|
||||||
};
|
|
||||||
|
|
||||||
const gpa = t.allocator;
|
const gpa = t.allocator;
|
||||||
const ac = AsyncClosure.init(gpa, .async, result.len, result_alignment, context, context_alignment, start) catch {
|
const ac = AsyncClosure.init(gpa, result.len, result_alignment, context, context_alignment, start) catch {
|
||||||
start(context.ptr, result.ptr);
|
start(context.ptr, result.ptr);
|
||||||
return null;
|
return null;
|
||||||
};
|
};
|
||||||
|
|
||||||
t.mutex.lock();
|
t.mutex.lock();
|
||||||
|
|
||||||
const thread_capacity = cpu_count - 1 + t.concurrent_count;
|
if (t.available_thread_count == 0) {
|
||||||
|
if (t.cpu_count != 0 and t.threads.items.len >= t.cpu_count) {
|
||||||
|
t.mutex.unlock();
|
||||||
|
ac.deinit(gpa);
|
||||||
|
start(context.ptr, result.ptr);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
t.threads.ensureTotalCapacityPrecise(gpa, thread_capacity) catch {
|
t.threads.ensureUnusedCapacity(gpa, 1) catch {
|
||||||
t.mutex.unlock();
|
t.mutex.unlock();
|
||||||
ac.deinit(gpa);
|
ac.deinit(gpa);
|
||||||
start(context.ptr, result.ptr);
|
start(context.ptr, result.ptr);
|
||||||
return null;
|
return null;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const thread = std.Thread.spawn(
|
||||||
|
.{ .stack_size = t.stack_size },
|
||||||
|
worker,
|
||||||
|
.{t},
|
||||||
|
) catch {
|
||||||
|
t.mutex.unlock();
|
||||||
|
ac.deinit(gpa);
|
||||||
|
start(context.ptr, result.ptr);
|
||||||
|
return null;
|
||||||
|
};
|
||||||
|
|
||||||
|
t.threads.appendAssumeCapacity(thread);
|
||||||
|
} else {
|
||||||
|
t.available_thread_count -= 1;
|
||||||
|
}
|
||||||
|
|
||||||
t.run_queue.prepend(&ac.closure.node);
|
t.run_queue.prepend(&ac.closure.node);
|
||||||
|
|
||||||
if (t.threads.items.len < thread_capacity) {
|
|
||||||
const thread = std.Thread.spawn(.{ .stack_size = t.stack_size }, worker, .{t}) catch {
|
|
||||||
if (t.threads.items.len == 0) {
|
|
||||||
assert(t.run_queue.popFirst() == &ac.closure.node);
|
|
||||||
t.mutex.unlock();
|
|
||||||
ac.deinit(gpa);
|
|
||||||
start(context.ptr, result.ptr);
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
// Rely on other workers to do it.
|
|
||||||
t.mutex.unlock();
|
|
||||||
t.cond.signal();
|
|
||||||
return @ptrCast(ac);
|
|
||||||
};
|
|
||||||
t.threads.appendAssumeCapacity(thread);
|
|
||||||
}
|
|
||||||
|
|
||||||
t.mutex.unlock();
|
t.mutex.unlock();
|
||||||
t.cond.signal();
|
t.cond.signal();
|
||||||
return @ptrCast(ac);
|
return @ptrCast(ac);
|
||||||
@ -555,38 +550,49 @@ fn concurrent(
|
|||||||
if (builtin.single_threaded) return error.ConcurrencyUnavailable;
|
if (builtin.single_threaded) return error.ConcurrencyUnavailable;
|
||||||
|
|
||||||
const t: *Threaded = @ptrCast(@alignCast(userdata));
|
const t: *Threaded = @ptrCast(@alignCast(userdata));
|
||||||
const cpu_count = t.cpu_count catch 1;
|
|
||||||
|
|
||||||
const gpa = t.allocator;
|
const gpa = t.allocator;
|
||||||
const ac = AsyncClosure.init(gpa, .concurrent, result_len, result_alignment, context, context_alignment, start) catch {
|
const ac = AsyncClosure.init(gpa, result_len, result_alignment, context, context_alignment, start) catch {
|
||||||
return error.ConcurrencyUnavailable;
|
return error.ConcurrencyUnavailable;
|
||||||
};
|
};
|
||||||
|
errdefer ac.deinit(gpa);
|
||||||
|
|
||||||
t.mutex.lock();
|
t.mutex.lock();
|
||||||
|
defer t.mutex.unlock();
|
||||||
|
|
||||||
t.concurrent_count += 1;
|
// If there's an avilable thread, use it.
|
||||||
const thread_capacity = cpu_count - 1 + t.concurrent_count;
|
if (t.available_thread_count > 0) {
|
||||||
|
t.available_thread_count -= 1;
|
||||||
t.threads.ensureTotalCapacity(gpa, thread_capacity) catch {
|
|
||||||
t.mutex.unlock();
|
|
||||||
ac.deinit(gpa);
|
|
||||||
return error.ConcurrencyUnavailable;
|
|
||||||
};
|
|
||||||
|
|
||||||
t.run_queue.prepend(&ac.closure.node);
|
t.run_queue.prepend(&ac.closure.node);
|
||||||
|
t.cond.signal();
|
||||||
if (t.threads.items.len < thread_capacity) {
|
return @ptrCast(ac);
|
||||||
const thread = std.Thread.spawn(.{ .stack_size = t.stack_size }, worker, .{t}) catch {
|
|
||||||
assert(t.run_queue.popFirst() == &ac.closure.node);
|
|
||||||
t.mutex.unlock();
|
|
||||||
ac.deinit(gpa);
|
|
||||||
return error.ConcurrencyUnavailable;
|
|
||||||
};
|
|
||||||
t.threads.appendAssumeCapacity(thread);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
t.mutex.unlock();
|
// If we can spawn a normal worker, spawn it and use it.
|
||||||
|
if (t.cpu_count == 0 or t.threads.items.len < t.cpu_count) {
|
||||||
|
t.threads.ensureUnusedCapacity(gpa, 1) catch return error.ConcurrencyUnavailable;
|
||||||
|
|
||||||
|
const thread = std.Thread.spawn(.{ .stack_size = t.stack_size }, worker, .{t}) catch
|
||||||
|
return error.ConcurrencyUnavailable;
|
||||||
|
|
||||||
|
t.threads.appendAssumeCapacity(thread);
|
||||||
|
t.run_queue.prepend(&ac.closure.node);
|
||||||
t.cond.signal();
|
t.cond.signal();
|
||||||
|
return @ptrCast(ac);
|
||||||
|
}
|
||||||
|
|
||||||
|
// If we have a concurrencty limit and we havent' hit it yet,
|
||||||
|
// spawn a new one-shot thread.
|
||||||
|
if (t.concurrency_limit != 0 and t.one_shot_thread_count >= t.concurrency_limit)
|
||||||
|
return error.ConcurrencyUnavailable;
|
||||||
|
|
||||||
|
t.one_shot_thread_count += 1;
|
||||||
|
errdefer t.one_shot_thread_count -= 1;
|
||||||
|
|
||||||
|
const thread = std.Thread.spawn(.{ .stack_size = t.stack_size }, oneShotWorker, .{ t, &ac.closure }) catch
|
||||||
|
return error.ConcurrencyUnavailable;
|
||||||
|
thread.detach();
|
||||||
|
|
||||||
return @ptrCast(ac);
|
return @ptrCast(ac);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -652,7 +658,6 @@ const GroupClosure = struct {
|
|||||||
.closure = .{
|
.closure = .{
|
||||||
.cancel_tid = .none,
|
.cancel_tid = .none,
|
||||||
.start = start,
|
.start = start,
|
||||||
.is_concurrent = false,
|
|
||||||
},
|
},
|
||||||
.t = t,
|
.t = t,
|
||||||
.group = group,
|
.group = group,
|
||||||
@ -684,12 +689,9 @@ fn groupAsync(
|
|||||||
if (builtin.single_threaded) return start(group, context.ptr);
|
if (builtin.single_threaded) return start(group, context.ptr);
|
||||||
|
|
||||||
const t: *Threaded = @ptrCast(@alignCast(userdata));
|
const t: *Threaded = @ptrCast(@alignCast(userdata));
|
||||||
const cpu_count = t.cpu_count catch 1;
|
|
||||||
|
|
||||||
const gpa = t.allocator;
|
const gpa = t.allocator;
|
||||||
const gc = GroupClosure.init(gpa, t, group, context, context_alignment, start) catch {
|
const gc = GroupClosure.init(gpa, t, group, context, context_alignment, start) catch
|
||||||
return start(group, context.ptr);
|
return start(group, context.ptr);
|
||||||
};
|
|
||||||
|
|
||||||
t.mutex.lock();
|
t.mutex.lock();
|
||||||
|
|
||||||
@ -697,26 +699,36 @@ fn groupAsync(
|
|||||||
gc.node = .{ .next = @ptrCast(@alignCast(group.token)) };
|
gc.node = .{ .next = @ptrCast(@alignCast(group.token)) };
|
||||||
group.token = &gc.node;
|
group.token = &gc.node;
|
||||||
|
|
||||||
const thread_capacity = cpu_count - 1 + t.concurrent_count;
|
if (t.available_thread_count == 0) {
|
||||||
|
if (t.cpu_count != 0 and t.threads.items.len >= t.cpu_count) {
|
||||||
|
t.mutex.unlock();
|
||||||
|
gc.deinit(gpa);
|
||||||
|
return start(group, context.ptr);
|
||||||
|
}
|
||||||
|
|
||||||
t.threads.ensureTotalCapacityPrecise(gpa, thread_capacity) catch {
|
t.threads.ensureUnusedCapacity(gpa, 1) catch {
|
||||||
t.mutex.unlock();
|
t.mutex.unlock();
|
||||||
gc.deinit(gpa);
|
gc.deinit(gpa);
|
||||||
return start(group, context.ptr);
|
return start(group, context.ptr);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const thread = std.Thread.spawn(
|
||||||
|
.{ .stack_size = t.stack_size },
|
||||||
|
worker,
|
||||||
|
.{t},
|
||||||
|
) catch {
|
||||||
|
t.mutex.unlock();
|
||||||
|
gc.deinit(gpa);
|
||||||
|
return start(group, context.ptr);
|
||||||
|
};
|
||||||
|
|
||||||
|
t.threads.appendAssumeCapacity(thread);
|
||||||
|
} else {
|
||||||
|
t.available_thread_count -= 1;
|
||||||
|
}
|
||||||
|
|
||||||
t.run_queue.prepend(&gc.closure.node);
|
t.run_queue.prepend(&gc.closure.node);
|
||||||
|
|
||||||
if (t.threads.items.len < thread_capacity) {
|
|
||||||
const thread = std.Thread.spawn(.{ .stack_size = t.stack_size }, worker, .{t}) catch {
|
|
||||||
assert(t.run_queue.popFirst() == &gc.closure.node);
|
|
||||||
t.mutex.unlock();
|
|
||||||
gc.deinit(gpa);
|
|
||||||
return start(group, context.ptr);
|
|
||||||
};
|
|
||||||
t.threads.appendAssumeCapacity(thread);
|
|
||||||
}
|
|
||||||
|
|
||||||
// This needs to be done before unlocking the mutex to avoid a race with
|
// This needs to be done before unlocking the mutex to avoid a race with
|
||||||
// the associated task finishing.
|
// the associated task finishing.
|
||||||
const group_state: *std.atomic.Value(usize) = @ptrCast(&group.state);
|
const group_state: *std.atomic.Value(usize) = @ptrCast(&group.state);
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user