From 0fd52cdc5eb4b17e8066a06d8af761f934cf8808 Mon Sep 17 00:00:00 2001 From: mlugg Date: Fri, 2 Jun 2023 13:47:38 +0100 Subject: [PATCH] InternPool: avoid aggregate null bytes storage This is a workaround for InternPool currently not handling non-null-terminated strings. It avoids using the `bytes` storage for aggregates if there are any null bytes. In the future this should be changed so that the `bytes` storage can be used regardless of whether there are any null bytes. This is important for use cases such as `@embedFile`. However, this fixes a bug for now, and after this commit, stage2 self-hosts again. mlugg: stage5 passes all enabled behavior tests on my system. Commit message edited by Andrew Kelley --- src/InternPool.zig | 23 +++++++++++++++++++++-- src/Module.zig | 1 + 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/src/InternPool.zig b/src/InternPool.zig index 801e351b4e..2e592c4dd7 100644 --- a/src/InternPool.zig +++ b/src/InternPool.zig @@ -3951,6 +3951,15 @@ pub fn get(ip: *InternPool, gpa: Allocator, key: Key) Allocator.Error!Index { else => unreachable, }, } + // We can't dedup '0' bytes in the pool or it could add garbage to string_bytes. So + // if there are any 0 bytes, we have to skip the bytes case. Note that it's okay for + // our sentinel to be 0 since getOrPutTrailingString would add a 0 sentinel anyway. + for (ip.string_bytes.items[string_bytes_index..]) |x| { + if (x == 0) { + ip.string_bytes.shrinkRetainingCapacity(string_bytes_index); + break :bytes; + } + } if (sentinel != .none) ip.string_bytes.appendAssumeCapacity( @intCast(u8, ip.indexToKey(sentinel).int.storage.u64), ); @@ -3975,7 +3984,17 @@ pub fn get(ip: *InternPool, gpa: Allocator, key: Key) Allocator.Error!Index { .ty = aggregate.ty, }), }); - ip.extra.appendSliceAssumeCapacity(@ptrCast([]const u32, aggregate.storage.elems)); + switch (aggregate.storage) { + .bytes => |bytes| for (bytes) |b| { + const elem = try ip.get(gpa, .{ .int = .{ + .ty = .u8_type, + .storage = .{ .u64 = b }, + } }); + ip.extra.appendAssumeCapacity(@enumToInt(elem)); + }, + .elems => |elems| ip.extra.appendSliceAssumeCapacity(@ptrCast([]const u32, elems)), + .repeated_elem => |elem| ip.extra.appendNTimesAssumeCapacity(@enumToInt(elem), len), + } if (sentinel != .none) ip.extra.appendAssumeCapacity(@enumToInt(sentinel)); }, @@ -5203,7 +5222,7 @@ pub fn destroyFunc(ip: *InternPool, gpa: Allocator, index: Module.Fn.Index) void ip.funcPtr(index).* = undefined; ip.funcs_free_list.append(gpa, index) catch { // In order to keep `destroyFunc` a non-fallible function, we ignore memory - // allocation failures here, instead leaking the Union until garbage collection. + // allocation failures here, instead leaking the Fn until garbage collection. }; } diff --git a/src/Module.zig b/src/Module.zig index 5f28f4f069..9d58029cb5 100644 --- a/src/Module.zig +++ b/src/Module.zig @@ -337,6 +337,7 @@ pub const CaptureScope = struct { if (!self.failed()) { self.captures.deinit(gpa); } + gpa.destroy(self); } };