Merge pull request #20632 from jacobly0/codegen-thread

InternPool: enable separate codegen/linking thread
This commit is contained in:
Jacob Young 2024-07-16 14:49:49 -04:00 committed by GitHub
commit 88bb0fd288
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 116 additions and 65 deletions

View File

@ -669,14 +669,8 @@ fn appendTreeSymbol(symbol: TreeSymbol, buf: []u8, start_i: usize) usize {
fn clearWrittenWithEscapeCodes() anyerror!void {
if (!global_progress.need_clear) return;
var i: usize = 0;
const buf = global_progress.draw_buffer;
buf[i..][0..clear.len].* = clear.*;
i += clear.len;
global_progress.need_clear = false;
try write(buf[0..i]);
try write(clear);
}
/// U+25BA or

View File

@ -1050,15 +1050,16 @@ pub fn indexOfSentinel(comptime T: type, comptime sentinel: T, p: [*:sentinel]co
// as we don't read into a new page. This should be the case for most architectures
// which use paged memory, however should be confirmed before adding a new arch below.
.aarch64, .x86, .x86_64 => if (std.simd.suggestVectorLength(T)) |block_len| {
const block_size = @sizeOf(T) * block_len;
const Block = @Vector(block_len, T);
const mask: Block = @splat(sentinel);
comptime std.debug.assert(std.mem.page_size % @sizeOf(Block) == 0);
comptime std.debug.assert(std.mem.page_size % block_size == 0);
// First block may be unaligned
const start_addr = @intFromPtr(&p[i]);
const offset_in_page = start_addr & (std.mem.page_size - 1);
if (offset_in_page <= std.mem.page_size - @sizeOf(Block)) {
if (offset_in_page <= std.mem.page_size - block_size) {
// Will not read past the end of a page, full block.
const block: Block = p[i..][0..block_len].*;
const matches = block == mask;
@ -1066,19 +1067,19 @@ pub fn indexOfSentinel(comptime T: type, comptime sentinel: T, p: [*:sentinel]co
return i + std.simd.firstTrue(matches).?;
}
i += (std.mem.alignForward(usize, start_addr, @alignOf(Block)) - start_addr) / @sizeOf(T);
i += @divExact(std.mem.alignForward(usize, start_addr, block_size) - start_addr, @sizeOf(T));
} else {
// Would read over a page boundary. Per-byte at a time until aligned or found.
// 0.39% chance this branch is taken for 4K pages at 16b block length.
//
// An alternate strategy is to do read a full block (the last in the page) and
// mask the entries before the pointer.
while ((@intFromPtr(&p[i]) & (@alignOf(Block) - 1)) != 0) : (i += 1) {
while ((@intFromPtr(&p[i]) & (block_size - 1)) != 0) : (i += 1) {
if (p[i] == sentinel) return i;
}
}
std.debug.assert(std.mem.isAligned(@intFromPtr(&p[i]), @alignOf(Block)));
std.debug.assert(std.mem.isAligned(@intFromPtr(&p[i]), block_size));
while (true) {
const block: *const Block = @ptrCast(@alignCast(p[i..][0..block_len]));
const matches = block.* == mask;

View File

@ -10,6 +10,8 @@ shards: []Shard = &.{},
global_error_set: GlobalErrorSet = GlobalErrorSet.empty,
/// Cached number of active bits in a `tid`.
tid_width: if (single_threaded) u0 else std.math.Log2Int(u32) = 0,
/// Cached shift amount to put a `tid` in the top bits of a 30-bit value.
tid_shift_30: if (single_threaded) u0 else std.math.Log2Int(u32) = if (single_threaded) 0 else 31,
/// Cached shift amount to put a `tid` in the top bits of a 31-bit value.
tid_shift_31: if (single_threaded) u0 else std.math.Log2Int(u32) = if (single_threaded) 0 else 31,
/// Cached shift amount to put a `tid` in the top bits of a 32-bit value.
@ -53,7 +55,7 @@ free_dep_entries: std.ArrayListUnmanaged(DepEntry.Index) = .{},
/// Whether a multi-threaded intern pool is useful.
/// Currently `false` until the intern pool is actually accessed
/// from multiple threads to reduce the cost of this data structure.
const want_multi_threaded = false;
const want_multi_threaded = true;
/// Whether a single-threaded intern pool impl is in use.
pub const single_threaded = builtin.single_threaded or !want_multi_threaded;
@ -941,8 +943,12 @@ const Shard = struct {
return @atomicLoad(Value, &entry.value, .acquire);
}
fn release(entry: *Entry, value: Value) void {
assert(value != .none);
@atomicStore(Value, &entry.value, value, .release);
}
fn resetUnordered(entry: *Entry) void {
@atomicStore(Value, &entry.value, .none, .unordered);
}
};
};
}
@ -4089,8 +4095,8 @@ pub const Index = enum(u32) {
fn wrap(unwrapped: Unwrapped, ip: *const InternPool) Index {
assert(@intFromEnum(unwrapped.tid) <= ip.getTidMask());
assert(unwrapped.index <= ip.getIndexMask(u31));
return @enumFromInt(@as(u32, @intFromEnum(unwrapped.tid)) << ip.tid_shift_31 | unwrapped.index);
assert(unwrapped.index <= ip.getIndexMask(u30));
return @enumFromInt(@as(u32, @intFromEnum(unwrapped.tid)) << ip.tid_shift_30 | unwrapped.index);
}
pub fn getExtra(unwrapped: Unwrapped, ip: *const InternPool) Local.Extra {
@ -4129,8 +4135,8 @@ pub const Index = enum(u32) {
.tid = .main,
.index = @intFromEnum(index),
} else .{
.tid = @enumFromInt(@intFromEnum(index) >> ip.tid_shift_31 & ip.getTidMask()),
.index = @intFromEnum(index) & ip.getIndexMask(u31),
.tid = @enumFromInt(@intFromEnum(index) >> ip.tid_shift_30 & ip.getTidMask()),
.index = @intFromEnum(index) & ip.getIndexMask(u30),
};
}
@ -5820,6 +5826,7 @@ pub fn init(ip: *InternPool, gpa: Allocator, available_threads: usize) !void {
});
ip.tid_width = @intCast(std.math.log2_int_ceil(usize, used_threads));
ip.tid_shift_30 = if (single_threaded) 0 else 30 - ip.tid_width;
ip.tid_shift_31 = if (single_threaded) 0 else 31 - ip.tid_width;
ip.tid_shift_32 = if (single_threaded) 0 else ip.tid_shift_31 +| 1;
ip.shards = try gpa.alloc(Shard, @as(usize, 1) << ip.tid_width);
@ -6585,35 +6592,55 @@ const GetOrPutKey = union(enum) {
},
fn put(gop: *GetOrPutKey) Index {
return gop.putAt(0);
}
fn putAt(gop: *GetOrPutKey, offset: u32) Index {
switch (gop.*) {
.existing => unreachable,
.new => |info| {
.new => |*info| {
const index = Index.Unwrapped.wrap(.{
.tid = info.tid,
.index = info.ip.getLocal(info.tid).mutate.items.len - 1 - offset,
.index = info.ip.getLocal(info.tid).mutate.items.len - 1,
}, info.ip);
info.shard.shared.map.entries[info.map_index].release(index);
info.shard.mutate.map.len += 1;
info.shard.mutate.map.mutex.unlock();
gop.* = .{ .existing = index };
gop.putTentative(index);
gop.putFinal(index);
return index;
},
}
}
fn assign(gop: *GetOrPutKey, new_gop: GetOrPutKey) void {
gop.deinit();
gop.* = new_gop;
fn putTentative(gop: *GetOrPutKey, index: Index) void {
assert(index != .none);
switch (gop.*) {
.existing => unreachable,
.new => |*info| gop.new.shard.shared.map.entries[info.map_index].release(index),
}
}
fn putFinal(gop: *GetOrPutKey, index: Index) void {
assert(index != .none);
switch (gop.*) {
.existing => unreachable,
.new => |info| {
assert(info.shard.shared.map.entries[info.map_index].value == index);
info.shard.mutate.map.len += 1;
info.shard.mutate.map.mutex.unlock();
gop.* = .{ .existing = index };
},
}
}
fn cancel(gop: *GetOrPutKey) void {
switch (gop.*) {
.existing => {},
.new => |info| info.shard.mutate.map.mutex.unlock(),
}
gop.* = .{ .existing = undefined };
}
fn deinit(gop: *GetOrPutKey) void {
switch (gop.*) {
.existing => {},
.new => |info| info.shard.mutate.map.mutex.unlock(),
.new => |info| info.shard.shared.map.entries[info.map_index].resetUnordered(),
}
gop.cancel();
gop.* = undefined;
}
};
@ -6622,6 +6649,15 @@ fn getOrPutKey(
gpa: Allocator,
tid: Zcu.PerThread.Id,
key: Key,
) Allocator.Error!GetOrPutKey {
return ip.getOrPutKeyEnsuringAdditionalCapacity(gpa, tid, key, 0);
}
fn getOrPutKeyEnsuringAdditionalCapacity(
ip: *InternPool,
gpa: Allocator,
tid: Zcu.PerThread.Id,
key: Key,
additional_capacity: u32,
) Allocator.Error!GetOrPutKey {
const full_hash = key.hash64(ip);
const hash: u32 = @truncate(full_hash >> 32);
@ -6657,11 +6693,16 @@ fn getOrPutKey(
}
}
const map_header = map.header().*;
if (shard.mutate.map.len >= map_header.capacity * 3 / 5) {
const required = shard.mutate.map.len + additional_capacity;
if (required >= map_header.capacity * 3 / 5) {
const arena_state = &ip.getLocal(tid).mutate.arena;
var arena = arena_state.promote(gpa);
defer arena_state.* = arena.state;
const new_map_capacity = map_header.capacity * 2;
var new_map_capacity = map_header.capacity;
while (true) {
new_map_capacity *= 2;
if (required < new_map_capacity * 3 / 5) break;
}
const new_map_buf = try arena.allocator().alignedAlloc(
u8,
Map.alignment,
@ -6730,10 +6771,11 @@ pub fn get(ip: *InternPool, gpa: Allocator, tid: Zcu.PerThread.Id, key: Key) All
assert(ptr_type.sentinel == .none or ip.typeOf(ptr_type.sentinel) == ptr_type.child);
if (ptr_type.flags.size == .Slice) {
gop.cancel();
var new_key = key;
new_key.ptr_type.flags.size = .Many;
const ptr_type_index = try ip.get(gpa, tid, new_key);
gop.assign(try ip.getOrPutKey(gpa, tid, key));
gop = try ip.getOrPutKey(gpa, tid, key);
try items.ensureUnusedCapacity(1);
items.appendAssumeCapacity(.{
@ -6913,9 +6955,10 @@ pub fn get(ip: *InternPool, gpa: Allocator, tid: Zcu.PerThread.Id, key: Key) All
},
.anon_decl => |anon_decl| if (ptrsHaveSameAlignment(ip, ptr.ty, ptr_type, anon_decl.orig_ty)) item: {
if (ptr.ty != anon_decl.orig_ty) {
gop.cancel();
var new_key = key;
new_key.ptr.base_addr.anon_decl.orig_ty = ptr.ty;
gop.assign(try ip.getOrPutKey(gpa, tid, new_key));
gop = try ip.getOrPutKey(gpa, tid, new_key);
if (gop == .existing) return gop.existing;
}
break :item .{
@ -6986,11 +7029,12 @@ pub fn get(ip: *InternPool, gpa: Allocator, tid: Zcu.PerThread.Id, key: Key) All
},
else => unreachable,
}
gop.cancel();
const index_index = try ip.get(gpa, tid, .{ .int = .{
.ty = .usize_type,
.storage = .{ .u64 = base_index.index },
} });
gop.assign(try ip.getOrPutKey(gpa, tid, key));
gop = try ip.getOrPutKey(gpa, tid, key);
try items.ensureUnusedCapacity(1);
items.appendAssumeCapacity(.{
.tag = switch (ptr.base_addr) {
@ -7399,11 +7443,12 @@ pub fn get(ip: *InternPool, gpa: Allocator, tid: Zcu.PerThread.Id, key: Key) All
}
const elem = switch (aggregate.storage) {
.bytes => |bytes| elem: {
gop.cancel();
const elem = try ip.get(gpa, tid, .{ .int = .{
.ty = .u8_type,
.storage = .{ .u64 = bytes.at(0, ip) },
} });
gop.assign(try ip.getOrPutKey(gpa, tid, key));
gop = try ip.getOrPutKey(gpa, tid, key);
try items.ensureUnusedCapacity(1);
break :elem elem;
},
@ -8221,9 +8266,9 @@ pub fn getFuncDeclIes(
extra.mutate.len = prev_extra_len;
}
var func_gop = try ip.getOrPutKey(gpa, tid, .{
var func_gop = try ip.getOrPutKeyEnsuringAdditionalCapacity(gpa, tid, .{
.func = extraFuncDecl(tid, extra.list.*, func_decl_extra_index),
});
}, 3);
defer func_gop.deinit();
if (func_gop == .existing) {
// An existing function type was found; undo the additions to our two arrays.
@ -8231,23 +8276,28 @@ pub fn getFuncDeclIes(
extra.mutate.len = prev_extra_len;
return func_gop.existing;
}
var error_union_type_gop = try ip.getOrPutKey(gpa, tid, .{ .error_union_type = .{
func_gop.putTentative(func_index);
var error_union_type_gop = try ip.getOrPutKeyEnsuringAdditionalCapacity(gpa, tid, .{ .error_union_type = .{
.error_set_type = error_set_type,
.payload_type = key.bare_return_type,
} });
} }, 2);
defer error_union_type_gop.deinit();
var error_set_type_gop = try ip.getOrPutKey(gpa, tid, .{
error_union_type_gop.putTentative(error_union_type);
var error_set_type_gop = try ip.getOrPutKeyEnsuringAdditionalCapacity(gpa, tid, .{
.inferred_error_set_type = func_index,
});
}, 1);
defer error_set_type_gop.deinit();
error_set_type_gop.putTentative(error_set_type);
var func_ty_gop = try ip.getOrPutKey(gpa, tid, .{
.func_type = extraFuncType(tid, extra.list.*, func_type_extra_index),
});
defer func_ty_gop.deinit();
assert(func_gop.putAt(3) == func_index);
assert(error_union_type_gop.putAt(2) == error_union_type);
assert(error_set_type_gop.putAt(1) == error_set_type);
assert(func_ty_gop.putAt(0) == func_ty);
func_ty_gop.putTentative(func_ty);
func_gop.putFinal(func_index);
error_union_type_gop.putFinal(error_union_type);
error_set_type_gop.putFinal(error_set_type);
func_ty_gop.putFinal(func_ty);
return func_index;
}
@ -8506,9 +8556,9 @@ pub fn getFuncInstanceIes(
extra.mutate.len = prev_extra_len;
}
var func_gop = try ip.getOrPutKey(gpa, tid, .{
var func_gop = try ip.getOrPutKeyEnsuringAdditionalCapacity(gpa, tid, .{
.func = ip.extraFuncInstance(tid, extra.list.*, func_extra_index),
});
}, 3);
defer func_gop.deinit();
if (func_gop == .existing) {
// Hot path: undo the additions to our two arrays.
@ -8516,19 +8566,23 @@ pub fn getFuncInstanceIes(
extra.mutate.len = prev_extra_len;
return func_gop.existing;
}
var error_union_type_gop = try ip.getOrPutKey(gpa, tid, .{ .error_union_type = .{
func_gop.putTentative(func_index);
var error_union_type_gop = try ip.getOrPutKeyEnsuringAdditionalCapacity(gpa, tid, .{ .error_union_type = .{
.error_set_type = error_set_type,
.payload_type = arg.bare_return_type,
} });
} }, 2);
defer error_union_type_gop.deinit();
var error_set_type_gop = try ip.getOrPutKey(gpa, tid, .{
error_union_type_gop.putTentative(error_union_type);
var error_set_type_gop = try ip.getOrPutKeyEnsuringAdditionalCapacity(gpa, tid, .{
.inferred_error_set_type = func_index,
});
}, 1);
defer error_set_type_gop.deinit();
error_set_type_gop.putTentative(error_set_type);
var func_ty_gop = try ip.getOrPutKey(gpa, tid, .{
.func_type = extraFuncType(tid, extra.list.*, func_type_extra_index),
});
defer func_ty_gop.deinit();
func_ty_gop.putTentative(func_ty);
try finishFuncInstance(
ip,
gpa,
@ -8540,10 +8594,11 @@ pub fn getFuncInstanceIes(
arg.alignment,
arg.section,
);
assert(func_gop.putAt(3) == func_index);
assert(error_union_type_gop.putAt(2) == error_union_type);
assert(error_set_type_gop.putAt(1) == error_set_type);
assert(func_ty_gop.putAt(0) == func_ty);
func_gop.putFinal(func_index);
error_union_type_gop.putFinal(error_union_type);
error_set_type_gop.putFinal(error_set_type);
func_ty_gop.putFinal(func_ty);
return func_index;
}
@ -10839,19 +10894,18 @@ pub fn getBackingDecl(ip: *const InternPool, val: Index) OptionalDeclIndex {
while (true) {
const unwrapped_base = base.unwrap(ip);
const base_item = unwrapped_base.getItem(ip);
const base_extra_items = unwrapped_base.getExtra(ip).view().items(.@"0");
switch (base_item.tag) {
.ptr_decl => return @enumFromInt(base_extra_items[
.ptr_decl => return @enumFromInt(unwrapped_base.getExtra(ip).view().items(.@"0")[
base_item.data + std.meta.fieldIndex(PtrDecl, "decl").?
]),
inline .ptr_eu_payload,
.ptr_opt_payload,
.ptr_elem,
.ptr_field,
=> |tag| base = @enumFromInt(base_extra_items[
=> |tag| base = @enumFromInt(unwrapped_base.getExtra(ip).view().items(.@"0")[
base_item.data + std.meta.fieldIndex(tag.Payload(), "base").?
]),
.ptr_slice => base = @enumFromInt(base_extra_items[
.ptr_slice => base = @enumFromInt(unwrapped_base.getExtra(ip).view().items(.@"0")[
base_item.data + std.meta.fieldIndex(PtrSlice, "ptr").?
]),
else => return .none,

View File

@ -3,7 +3,8 @@ zcu: *Zcu,
/// Dense, per-thread unique index.
tid: Id,
pub const Id = if (InternPool.single_threaded) enum { main } else enum(u8) { main, _ };
pub const IdBacking = u7;
pub const Id = if (InternPool.single_threaded) enum { main } else enum(IdBacking) { main, _ };
pub fn destroyDecl(pt: Zcu.PerThread, decl_index: Zcu.Decl.Index) void {
const zcu = pt.zcu;

View File

@ -3110,7 +3110,7 @@ fn buildOutputType(
var thread_pool: ThreadPool = undefined;
try thread_pool.init(.{
.allocator = gpa,
.n_jobs = @min(@max(n_jobs orelse std.Thread.getCpuCount() catch 1, 1), std.math.maxInt(u8)),
.n_jobs = @min(@max(n_jobs orelse std.Thread.getCpuCount() catch 1, 1), std.math.maxInt(Zcu.PerThread.IdBacking)),
.track_ids = true,
});
defer thread_pool.deinit();
@ -4964,7 +4964,7 @@ fn cmdBuild(gpa: Allocator, arena: Allocator, args: []const []const u8) !void {
var thread_pool: ThreadPool = undefined;
try thread_pool.init(.{
.allocator = gpa,
.n_jobs = @min(@max(n_jobs orelse std.Thread.getCpuCount() catch 1, 1), std.math.maxInt(u8)),
.n_jobs = @min(@max(n_jobs orelse std.Thread.getCpuCount() catch 1, 1), std.math.maxInt(Zcu.PerThread.IdBacking)),
.track_ids = true,
});
defer thread_pool.deinit();
@ -5402,7 +5402,7 @@ fn jitCmd(
var thread_pool: ThreadPool = undefined;
try thread_pool.init(.{
.allocator = gpa,
.n_jobs = @min(@max(std.Thread.getCpuCount() catch 1, 1), std.math.maxInt(u8)),
.n_jobs = @min(@max(std.Thread.getCpuCount() catch 1, 1), std.math.maxInt(Zcu.PerThread.IdBacking)),
.track_ids = true,
});
defer thread_pool.deinit();

View File

@ -572,7 +572,8 @@ pub inline fn backendSupportsFeature(backend: std.builtin.CompilerBackend, compt
else => false,
},
.separate_thread => switch (backend) {
else => false,
.stage2_llvm => false,
else => true,
},
};
}