Merge pull request #20632 from jacobly0/codegen-thread

InternPool: enable separate codegen/linking thread
2026-02-21 16:54:52 +00:00 · 2024-07-16 14:49:49 -04:00 · 2024-07-16 14:49:49 -04:00 · 88bb0fd288
commit 88bb0fd288
parent d8f81372f1 00fdbf05f3
6 changed files with 116 additions and 65 deletions
--- a/lib/std/Progress.zig
+++ b/lib/std/Progress.zig
@ -669,14 +669,8 @@ fn appendTreeSymbol(symbol: TreeSymbol, buf: []u8, start_i: usize) usize {
 fn clearWrittenWithEscapeCodes() anyerror!void {
    if (!global_progress.need_clear) return;

-    var i: usize = 0;
-    const buf = global_progress.draw_buffer;
-
-    buf[i..][0..clear.len].* = clear.*;
-    i += clear.len;
-
    global_progress.need_clear = false;
-    try write(buf[0..i]);
+    try write(clear);
 }

 /// U+25BA or ►
--- a/lib/std/mem.zig
+++ b/lib/std/mem.zig
@ -1050,15 +1050,16 @@ pub fn indexOfSentinel(comptime T: type, comptime sentinel: T, p: [*:sentinel]co
            // as we don't read into a new page. This should be the case for most architectures
            // which use paged memory, however should be confirmed before adding a new arch below.
            .aarch64, .x86, .x86_64 => if (std.simd.suggestVectorLength(T)) |block_len| {
+                const block_size = @sizeOf(T) * block_len;
                const Block = @Vector(block_len, T);
                const mask: Block = @splat(sentinel);

-                comptime std.debug.assert(std.mem.page_size % @sizeOf(Block) == 0);
+                comptime std.debug.assert(std.mem.page_size % block_size == 0);

                // First block may be unaligned
                const start_addr = @intFromPtr(&p[i]);
                const offset_in_page = start_addr & (std.mem.page_size - 1);
-                if (offset_in_page <= std.mem.page_size - @sizeOf(Block)) {
+                if (offset_in_page <= std.mem.page_size - block_size) {
                    // Will not read past the end of a page, full block.
                    const block: Block = p[i..][0..block_len].*;
                    const matches = block == mask;
@ -1066,19 +1067,19 @@ pub fn indexOfSentinel(comptime T: type, comptime sentinel: T, p: [*:sentinel]co
                        return i + std.simd.firstTrue(matches).?;
                    }

-                    i += (std.mem.alignForward(usize, start_addr, @alignOf(Block)) - start_addr) / @sizeOf(T);
+                    i += @divExact(std.mem.alignForward(usize, start_addr, block_size) - start_addr, @sizeOf(T));
                } else {
                    // Would read over a page boundary. Per-byte at a time until aligned or found.
                    // 0.39% chance this branch is taken for 4K pages at 16b block length.
                    //
                    // An alternate strategy is to do read a full block (the last in the page) and
                    // mask the entries before the pointer.
-                    while ((@intFromPtr(&p[i]) & (@alignOf(Block) - 1)) != 0) : (i += 1) {
+                    while ((@intFromPtr(&p[i]) & (block_size - 1)) != 0) : (i += 1) {
                        if (p[i] == sentinel) return i;
                    }
                }

-                std.debug.assert(std.mem.isAligned(@intFromPtr(&p[i]), @alignOf(Block)));
+                std.debug.assert(std.mem.isAligned(@intFromPtr(&p[i]), block_size));
                while (true) {
                    const block: *const Block = @ptrCast(@alignCast(p[i..][0..block_len]));
                    const matches = block.* == mask;
--- a/src/InternPool.zig
+++ b/src/InternPool.zig
@ -10,6 +10,8 @@ shards: []Shard = &.{},
 global_error_set: GlobalErrorSet = GlobalErrorSet.empty,
 /// Cached number of active bits in a `tid`.
 tid_width: if (single_threaded) u0 else std.math.Log2Int(u32) = 0,
+/// Cached shift amount to put a `tid` in the top bits of a 30-bit value.
+tid_shift_30: if (single_threaded) u0 else std.math.Log2Int(u32) = if (single_threaded) 0 else 31,
 /// Cached shift amount to put a `tid` in the top bits of a 31-bit value.
 tid_shift_31: if (single_threaded) u0 else std.math.Log2Int(u32) = if (single_threaded) 0 else 31,
 /// Cached shift amount to put a `tid` in the top bits of a 32-bit value.
@ -53,7 +55,7 @@ free_dep_entries: std.ArrayListUnmanaged(DepEntry.Index) = .{},
 /// Whether a multi-threaded intern pool is useful.
 /// Currently `false` until the intern pool is actually accessed
 /// from multiple threads to reduce the cost of this data structure.
-const want_multi_threaded = false;
+const want_multi_threaded = true;

 /// Whether a single-threaded intern pool impl is in use.
 pub const single_threaded = builtin.single_threaded or !want_multi_threaded;
@ -941,8 +943,12 @@ const Shard = struct {
                    return @atomicLoad(Value, &entry.value, .acquire);
                }
                fn release(entry: *Entry, value: Value) void {
+                    assert(value != .none);
                    @atomicStore(Value, &entry.value, value, .release);
                }
+                fn resetUnordered(entry: *Entry) void {
+                    @atomicStore(Value, &entry.value, .none, .unordered);
+                }
            };
        };
    }
@ -4089,8 +4095,8 @@ pub const Index = enum(u32) {

        fn wrap(unwrapped: Unwrapped, ip: *const InternPool) Index {
            assert(@intFromEnum(unwrapped.tid) <= ip.getTidMask());
-            assert(unwrapped.index <= ip.getIndexMask(u31));
-            return @enumFromInt(@as(u32, @intFromEnum(unwrapped.tid)) << ip.tid_shift_31 | unwrapped.index);
+            assert(unwrapped.index <= ip.getIndexMask(u30));
+            return @enumFromInt(@as(u32, @intFromEnum(unwrapped.tid)) << ip.tid_shift_30 | unwrapped.index);
        }

        pub fn getExtra(unwrapped: Unwrapped, ip: *const InternPool) Local.Extra {
@ -4129,8 +4135,8 @@ pub const Index = enum(u32) {
            .tid = .main,
            .index = @intFromEnum(index),
        } else .{
-            .tid = @enumFromInt(@intFromEnum(index) >> ip.tid_shift_31 & ip.getTidMask()),
-            .index = @intFromEnum(index) & ip.getIndexMask(u31),
+            .tid = @enumFromInt(@intFromEnum(index) >> ip.tid_shift_30 & ip.getTidMask()),
+            .index = @intFromEnum(index) & ip.getIndexMask(u30),
        };
    }

@ -5820,6 +5826,7 @@ pub fn init(ip: *InternPool, gpa: Allocator, available_threads: usize) !void {
    });

    ip.tid_width = @intCast(std.math.log2_int_ceil(usize, used_threads));
+    ip.tid_shift_30 = if (single_threaded) 0 else 30 - ip.tid_width;
    ip.tid_shift_31 = if (single_threaded) 0 else 31 - ip.tid_width;
    ip.tid_shift_32 = if (single_threaded) 0 else ip.tid_shift_31 +| 1;
    ip.shards = try gpa.alloc(Shard, @as(usize, 1) << ip.tid_width);
@ -6585,35 +6592,55 @@ const GetOrPutKey = union(enum) {
    },

    fn put(gop: *GetOrPutKey) Index {
-        return gop.putAt(0);
-    }
-    fn putAt(gop: *GetOrPutKey, offset: u32) Index {
        switch (gop.*) {
            .existing => unreachable,
-            .new => |info| {
+            .new => |*info| {
                const index = Index.Unwrapped.wrap(.{
                    .tid = info.tid,
-                    .index = info.ip.getLocal(info.tid).mutate.items.len - 1 - offset,
+                    .index = info.ip.getLocal(info.tid).mutate.items.len - 1,
                }, info.ip);
-                info.shard.shared.map.entries[info.map_index].release(index);
-                info.shard.mutate.map.len += 1;
-                info.shard.mutate.map.mutex.unlock();
-                gop.* = .{ .existing = index };
+                gop.putTentative(index);
+                gop.putFinal(index);
                return index;
            },
        }
    }

-    fn assign(gop: *GetOrPutKey, new_gop: GetOrPutKey) void {
-        gop.deinit();
-        gop.* = new_gop;
+    fn putTentative(gop: *GetOrPutKey, index: Index) void {
+        assert(index != .none);
+        switch (gop.*) {
+            .existing => unreachable,
+            .new => |*info| gop.new.shard.shared.map.entries[info.map_index].release(index),
+        }
+    }
+
+    fn putFinal(gop: *GetOrPutKey, index: Index) void {
+        assert(index != .none);
+        switch (gop.*) {
+            .existing => unreachable,
+            .new => |info| {
+                assert(info.shard.shared.map.entries[info.map_index].value == index);
+                info.shard.mutate.map.len += 1;
+                info.shard.mutate.map.mutex.unlock();
+                gop.* = .{ .existing = index };
+            },
+        }
+    }
+
+    fn cancel(gop: *GetOrPutKey) void {
+        switch (gop.*) {
+            .existing => {},
+            .new => |info| info.shard.mutate.map.mutex.unlock(),
+        }
+        gop.* = .{ .existing = undefined };
    }

    fn deinit(gop: *GetOrPutKey) void {
        switch (gop.*) {
            .existing => {},
-            .new => |info| info.shard.mutate.map.mutex.unlock(),
+            .new => |info| info.shard.shared.map.entries[info.map_index].resetUnordered(),
        }
+        gop.cancel();
        gop.* = undefined;
    }
 };
@ -6622,6 +6649,15 @@ fn getOrPutKey(
    gpa: Allocator,
    tid: Zcu.PerThread.Id,
    key: Key,
+) Allocator.Error!GetOrPutKey {
+    return ip.getOrPutKeyEnsuringAdditionalCapacity(gpa, tid, key, 0);
+}
+fn getOrPutKeyEnsuringAdditionalCapacity(
+    ip: *InternPool,
+    gpa: Allocator,
+    tid: Zcu.PerThread.Id,
+    key: Key,
+    additional_capacity: u32,
 ) Allocator.Error!GetOrPutKey {
    const full_hash = key.hash64(ip);
    const hash: u32 = @truncate(full_hash >> 32);
@ -6657,11 +6693,16 @@ fn getOrPutKey(
        }
    }
    const map_header = map.header().*;
-    if (shard.mutate.map.len >= map_header.capacity * 3 / 5) {
+    const required = shard.mutate.map.len + additional_capacity;
+    if (required >= map_header.capacity * 3 / 5) {
        const arena_state = &ip.getLocal(tid).mutate.arena;
        var arena = arena_state.promote(gpa);
        defer arena_state.* = arena.state;
-        const new_map_capacity = map_header.capacity * 2;
+        var new_map_capacity = map_header.capacity;
+        while (true) {
+            new_map_capacity *= 2;
+            if (required < new_map_capacity * 3 / 5) break;
+        }
        const new_map_buf = try arena.allocator().alignedAlloc(
            u8,
            Map.alignment,
@ -6730,10 +6771,11 @@ pub fn get(ip: *InternPool, gpa: Allocator, tid: Zcu.PerThread.Id, key: Key) All
            assert(ptr_type.sentinel == .none or ip.typeOf(ptr_type.sentinel) == ptr_type.child);

            if (ptr_type.flags.size == .Slice) {
+                gop.cancel();
                var new_key = key;
                new_key.ptr_type.flags.size = .Many;
                const ptr_type_index = try ip.get(gpa, tid, new_key);
-                gop.assign(try ip.getOrPutKey(gpa, tid, key));
+                gop = try ip.getOrPutKey(gpa, tid, key);

                try items.ensureUnusedCapacity(1);
                items.appendAssumeCapacity(.{
@ -6913,9 +6955,10 @@ pub fn get(ip: *InternPool, gpa: Allocator, tid: Zcu.PerThread.Id, key: Key) All
                },
                .anon_decl => |anon_decl| if (ptrsHaveSameAlignment(ip, ptr.ty, ptr_type, anon_decl.orig_ty)) item: {
                    if (ptr.ty != anon_decl.orig_ty) {
+                        gop.cancel();
                        var new_key = key;
                        new_key.ptr.base_addr.anon_decl.orig_ty = ptr.ty;
-                        gop.assign(try ip.getOrPutKey(gpa, tid, new_key));
+                        gop = try ip.getOrPutKey(gpa, tid, new_key);
                        if (gop == .existing) return gop.existing;
                    }
                    break :item .{
@ -6986,11 +7029,12 @@ pub fn get(ip: *InternPool, gpa: Allocator, tid: Zcu.PerThread.Id, key: Key) All
                        },
                        else => unreachable,
                    }
+                    gop.cancel();
                    const index_index = try ip.get(gpa, tid, .{ .int = .{
                        .ty = .usize_type,
                        .storage = .{ .u64 = base_index.index },
                    } });
-                    gop.assign(try ip.getOrPutKey(gpa, tid, key));
+                    gop = try ip.getOrPutKey(gpa, tid, key);
                    try items.ensureUnusedCapacity(1);
                    items.appendAssumeCapacity(.{
                        .tag = switch (ptr.base_addr) {
@ -7399,11 +7443,12 @@ pub fn get(ip: *InternPool, gpa: Allocator, tid: Zcu.PerThread.Id, key: Key) All
                }
                const elem = switch (aggregate.storage) {
                    .bytes => |bytes| elem: {
+                        gop.cancel();
                        const elem = try ip.get(gpa, tid, .{ .int = .{
                            .ty = .u8_type,
                            .storage = .{ .u64 = bytes.at(0, ip) },
                        } });
-                        gop.assign(try ip.getOrPutKey(gpa, tid, key));
+                        gop = try ip.getOrPutKey(gpa, tid, key);
                        try items.ensureUnusedCapacity(1);
                        break :elem elem;
                    },
@ -8221,9 +8266,9 @@ pub fn getFuncDeclIes(
        extra.mutate.len = prev_extra_len;
    }

-    var func_gop = try ip.getOrPutKey(gpa, tid, .{
+    var func_gop = try ip.getOrPutKeyEnsuringAdditionalCapacity(gpa, tid, .{
        .func = extraFuncDecl(tid, extra.list.*, func_decl_extra_index),
-    });
+    }, 3);
    defer func_gop.deinit();
    if (func_gop == .existing) {
        // An existing function type was found; undo the additions to our two arrays.
@ -8231,23 +8276,28 @@ pub fn getFuncDeclIes(
        extra.mutate.len = prev_extra_len;
        return func_gop.existing;
    }
-    var error_union_type_gop = try ip.getOrPutKey(gpa, tid, .{ .error_union_type = .{
+    func_gop.putTentative(func_index);
+    var error_union_type_gop = try ip.getOrPutKeyEnsuringAdditionalCapacity(gpa, tid, .{ .error_union_type = .{
        .error_set_type = error_set_type,
        .payload_type = key.bare_return_type,
-    } });
+    } }, 2);
    defer error_union_type_gop.deinit();
-    var error_set_type_gop = try ip.getOrPutKey(gpa, tid, .{
+    error_union_type_gop.putTentative(error_union_type);
+    var error_set_type_gop = try ip.getOrPutKeyEnsuringAdditionalCapacity(gpa, tid, .{
        .inferred_error_set_type = func_index,
-    });
+    }, 1);
    defer error_set_type_gop.deinit();
+    error_set_type_gop.putTentative(error_set_type);
    var func_ty_gop = try ip.getOrPutKey(gpa, tid, .{
        .func_type = extraFuncType(tid, extra.list.*, func_type_extra_index),
    });
    defer func_ty_gop.deinit();
-    assert(func_gop.putAt(3) == func_index);
-    assert(error_union_type_gop.putAt(2) == error_union_type);
-    assert(error_set_type_gop.putAt(1) == error_set_type);
-    assert(func_ty_gop.putAt(0) == func_ty);
+    func_ty_gop.putTentative(func_ty);
+
+    func_gop.putFinal(func_index);
+    error_union_type_gop.putFinal(error_union_type);
+    error_set_type_gop.putFinal(error_set_type);
+    func_ty_gop.putFinal(func_ty);
    return func_index;
 }

@ -8506,9 +8556,9 @@ pub fn getFuncInstanceIes(
        extra.mutate.len = prev_extra_len;
    }

-    var func_gop = try ip.getOrPutKey(gpa, tid, .{
+    var func_gop = try ip.getOrPutKeyEnsuringAdditionalCapacity(gpa, tid, .{
        .func = ip.extraFuncInstance(tid, extra.list.*, func_extra_index),
-    });
+    }, 3);
    defer func_gop.deinit();
    if (func_gop == .existing) {
        // Hot path: undo the additions to our two arrays.
@ -8516,19 +8566,23 @@ pub fn getFuncInstanceIes(
        extra.mutate.len = prev_extra_len;
        return func_gop.existing;
    }
-    var error_union_type_gop = try ip.getOrPutKey(gpa, tid, .{ .error_union_type = .{
+    func_gop.putTentative(func_index);
+    var error_union_type_gop = try ip.getOrPutKeyEnsuringAdditionalCapacity(gpa, tid, .{ .error_union_type = .{
        .error_set_type = error_set_type,
        .payload_type = arg.bare_return_type,
-    } });
+    } }, 2);
    defer error_union_type_gop.deinit();
-    var error_set_type_gop = try ip.getOrPutKey(gpa, tid, .{
+    error_union_type_gop.putTentative(error_union_type);
+    var error_set_type_gop = try ip.getOrPutKeyEnsuringAdditionalCapacity(gpa, tid, .{
        .inferred_error_set_type = func_index,
-    });
+    }, 1);
    defer error_set_type_gop.deinit();
+    error_set_type_gop.putTentative(error_set_type);
    var func_ty_gop = try ip.getOrPutKey(gpa, tid, .{
        .func_type = extraFuncType(tid, extra.list.*, func_type_extra_index),
    });
    defer func_ty_gop.deinit();
+    func_ty_gop.putTentative(func_ty);
    try finishFuncInstance(
        ip,
        gpa,
@ -8540,10 +8594,11 @@ pub fn getFuncInstanceIes(
        arg.alignment,
        arg.section,
    );
-    assert(func_gop.putAt(3) == func_index);
-    assert(error_union_type_gop.putAt(2) == error_union_type);
-    assert(error_set_type_gop.putAt(1) == error_set_type);
-    assert(func_ty_gop.putAt(0) == func_ty);
+
+    func_gop.putFinal(func_index);
+    error_union_type_gop.putFinal(error_union_type);
+    error_set_type_gop.putFinal(error_set_type);
+    func_ty_gop.putFinal(func_ty);
    return func_index;
 }

@ -10839,19 +10894,18 @@ pub fn getBackingDecl(ip: *const InternPool, val: Index) OptionalDeclIndex {
    while (true) {
        const unwrapped_base = base.unwrap(ip);
        const base_item = unwrapped_base.getItem(ip);
-        const base_extra_items = unwrapped_base.getExtra(ip).view().items(.@"0");
        switch (base_item.tag) {
-            .ptr_decl => return @enumFromInt(base_extra_items[
+            .ptr_decl => return @enumFromInt(unwrapped_base.getExtra(ip).view().items(.@"0")[
                base_item.data + std.meta.fieldIndex(PtrDecl, "decl").?
            ]),
            inline .ptr_eu_payload,
            .ptr_opt_payload,
            .ptr_elem,
            .ptr_field,
-            => |tag| base = @enumFromInt(base_extra_items[
+            => |tag| base = @enumFromInt(unwrapped_base.getExtra(ip).view().items(.@"0")[
                base_item.data + std.meta.fieldIndex(tag.Payload(), "base").?
            ]),
-            .ptr_slice => base = @enumFromInt(base_extra_items[
+            .ptr_slice => base = @enumFromInt(unwrapped_base.getExtra(ip).view().items(.@"0")[
                base_item.data + std.meta.fieldIndex(PtrSlice, "ptr").?
            ]),
            else => return .none,
--- a/src/Zcu/PerThread.zig
+++ b/src/Zcu/PerThread.zig
@ -3,7 +3,8 @@ zcu: *Zcu,
 /// Dense, per-thread unique index.
 tid: Id,

-pub const Id = if (InternPool.single_threaded) enum { main } else enum(u8) { main, _ };
+pub const IdBacking = u7;
+pub const Id = if (InternPool.single_threaded) enum { main } else enum(IdBacking) { main, _ };

 pub fn destroyDecl(pt: Zcu.PerThread, decl_index: Zcu.Decl.Index) void {
    const zcu = pt.zcu;
--- a/src/main.zig
+++ b/src/main.zig
@ -3110,7 +3110,7 @@ fn buildOutputType(
    var thread_pool: ThreadPool = undefined;
    try thread_pool.init(.{
        .allocator = gpa,
-        .n_jobs = @min(@max(n_jobs orelse std.Thread.getCpuCount() catch 1, 1), std.math.maxInt(u8)),
+        .n_jobs = @min(@max(n_jobs orelse std.Thread.getCpuCount() catch 1, 1), std.math.maxInt(Zcu.PerThread.IdBacking)),
        .track_ids = true,
    });
    defer thread_pool.deinit();
@ -4964,7 +4964,7 @@ fn cmdBuild(gpa: Allocator, arena: Allocator, args: []const []const u8) !void {
    var thread_pool: ThreadPool = undefined;
    try thread_pool.init(.{
        .allocator = gpa,
-        .n_jobs = @min(@max(n_jobs orelse std.Thread.getCpuCount() catch 1, 1), std.math.maxInt(u8)),
+        .n_jobs = @min(@max(n_jobs orelse std.Thread.getCpuCount() catch 1, 1), std.math.maxInt(Zcu.PerThread.IdBacking)),
        .track_ids = true,
    });
    defer thread_pool.deinit();
@ -5402,7 +5402,7 @@ fn jitCmd(
    var thread_pool: ThreadPool = undefined;
    try thread_pool.init(.{
        .allocator = gpa,
-        .n_jobs = @min(@max(std.Thread.getCpuCount() catch 1, 1), std.math.maxInt(u8)),
+        .n_jobs = @min(@max(std.Thread.getCpuCount() catch 1, 1), std.math.maxInt(Zcu.PerThread.IdBacking)),
        .track_ids = true,
    });
    defer thread_pool.deinit();
--- a/src/target.zig
+++ b/src/target.zig
@ -572,7 +572,8 @@ pub inline fn backendSupportsFeature(backend: std.builtin.CompilerBackend, compt
            else => false,
        },
        .separate_thread => switch (backend) {
-            else => false,
+            .stage2_llvm => false,
+            else => true,
        },
    };
 }