zig/lib/compiler/aro/aro/StringInterner.zig
Andrew Kelley 240d0b68f6 make aro-based translate-c lazily built from source
Part of #19063.

Primarily, this moves Aro from deps/ to lib/compiler/ so that it can be
lazily compiled from source. src/aro_translate_c.zig is moved to
lib/compiler/aro_translate_c.zig and some of Zig CLI logic moved to a
main() function there.

aro_translate_c.zig becomes the "common" import for clang-based
translate-c.

Not all of the compiler was able to be detangled from Aro, however, so
it still, for now, remains being compiled with the main compiler
sources due to the clang-based translate-c depending on it. Once
aro-based translate-c achieves feature parity with the clang-based
translate-c implementation, the clang-based one can be removed from Zig.

Aro made it unnecessarily difficult to depend on with these .def files
and all these Zig module requirements. I looked at the .def files and
made these observations:

- The canonical source is llvm .def files.
- Therefore there is an update process to sync with llvm that involves
  regenerating the .def files in Aro.
- Therefore you might as well just regenerate the .zig files directly
  and check those into Aro.
- Also with a small amount of tinkering, the file size on disk of these
  generated .zig files can be made many times smaller, without
  compromising type safety in the usage of the data.

This would make things much easier on Zig as downstream project,
particularly we could remove those pesky stubs when bootstrapping.

I have gone ahead with these changes since they unblock me and I will
have a chat with Vexu to see what he thinks.
2024-02-28 13:21:05 -07:00

84 lines
2.5 KiB
Zig
Vendored

const std = @import("std");
const mem = std.mem;
const Compilation = @import("Compilation.zig");
const StringToIdMap = std.StringHashMapUnmanaged(StringId);
pub const StringId = enum(u32) {
empty,
_,
};
pub const TypeMapper = struct {
const LookupSpeed = enum {
fast,
slow,
};
data: union(LookupSpeed) {
fast: []const []const u8,
slow: *const StringToIdMap,
},
pub fn lookup(self: TypeMapper, string_id: StringInterner.StringId) []const u8 {
if (string_id == .empty) return "";
switch (self.data) {
.fast => |arr| return arr[@intFromEnum(string_id)],
.slow => |map| {
var it = map.iterator();
while (it.next()) |entry| {
if (entry.value_ptr.* == string_id) return entry.key_ptr.*;
}
unreachable;
},
}
}
pub fn deinit(self: TypeMapper, allocator: mem.Allocator) void {
switch (self.data) {
.slow => {},
.fast => |arr| allocator.free(arr),
}
}
};
const StringInterner = @This();
string_table: StringToIdMap = .{},
next_id: StringId = @enumFromInt(@intFromEnum(StringId.empty) + 1),
pub fn deinit(self: *StringInterner, allocator: mem.Allocator) void {
self.string_table.deinit(allocator);
}
pub fn intern(comp: *Compilation, str: []const u8) !StringId {
return comp.string_interner.internExtra(comp.gpa, str);
}
pub fn internExtra(self: *StringInterner, allocator: mem.Allocator, str: []const u8) !StringId {
if (str.len == 0) return .empty;
const gop = try self.string_table.getOrPut(allocator, str);
if (gop.found_existing) return gop.value_ptr.*;
defer self.next_id = @enumFromInt(@intFromEnum(self.next_id) + 1);
gop.value_ptr.* = self.next_id;
return self.next_id;
}
/// deinit for the returned TypeMapper is a no-op and does not need to be called
pub fn getSlowTypeMapper(self: *const StringInterner) TypeMapper {
return TypeMapper{ .data = .{ .slow = &self.string_table } };
}
/// Caller must call `deinit` on the returned TypeMapper
pub fn getFastTypeMapper(self: *const StringInterner, allocator: mem.Allocator) !TypeMapper {
var strings = try allocator.alloc([]const u8, @intFromEnum(self.next_id));
var it = self.string_table.iterator();
strings[0] = "";
while (it.next()) |entry| {
strings[@intFromEnum(entry.value_ptr.*)] = entry.key_ptr.*;
}
return TypeMapper{ .data = .{ .fast = strings } };
}