Merge pull request #9684 from FnControlOption/astgen-string-table

AstGen: use string index as key for string table
This commit is contained in:
Andrew Kelley 2021-09-07 14:52:45 -04:00 committed by GitHub
commit a48e5af69d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 69 additions and 63 deletions

View File

@ -92,6 +92,34 @@ pub fn hashString(s: []const u8) u64 {
return std.hash.Wyhash.hash(0, s);
}
pub const StringIndexContext = struct {
bytes: *std.ArrayListUnmanaged(u8),
pub fn eql(self: @This(), a: u32, b: u32) bool {
_ = self;
return a == b;
}
pub fn hash(self: @This(), x: u32) u64 {
const x_slice = mem.spanZ(@ptrCast([*:0]const u8, self.bytes.items.ptr) + x);
return hashString(x_slice);
}
};
pub const StringIndexAdapter = struct {
bytes: *std.ArrayListUnmanaged(u8),
pub fn eql(self: @This(), a_slice: []const u8, b: u32) bool {
const b_slice = mem.spanZ(@ptrCast([*:0]const u8, self.bytes.items.ptr) + b);
return mem.eql(u8, a_slice, b_slice);
}
pub fn hash(self: @This(), adapted_key: []const u8) u64 {
_ = self;
return hashString(adapted_key);
}
};
/// Deprecated use `default_max_load_percentage`
pub const DefaultMaxLoadPercentage = default_max_load_percentage;

View File

@ -7,6 +7,8 @@ const mem = std.mem;
const Allocator = std.mem.Allocator;
const assert = std.debug.assert;
const ArrayListUnmanaged = std.ArrayListUnmanaged;
const StringIndexAdapter = std.hash_map.StringIndexAdapter;
const StringIndexContext = std.hash_map.StringIndexContext;
const Zir = @import("Zir.zig");
const trace = @import("tracy.zig").trace;
@ -30,7 +32,7 @@ source_column: u32 = 0,
/// Used for temporary allocations; freed after AstGen is complete.
/// The resulting ZIR code has no references to anything in this arena.
arena: *Allocator,
string_table: std.StringHashMapUnmanaged(u32) = .{},
string_table: std.HashMapUnmanaged(u32, void, StringIndexContext, std.hash_map.default_max_load_percentage) = .{},
compile_errors: ArrayListUnmanaged(Zir.Inst.CompileErrors.Item) = .{},
/// The topmost block of the current function.
fn_block: ?*GenZir = null,
@ -8781,16 +8783,16 @@ fn identAsString(astgen: *AstGen, ident_token: Ast.TokenIndex) !u32 {
const str_index = @intCast(u32, string_bytes.items.len);
try astgen.appendIdentStr(ident_token, string_bytes);
const key = string_bytes.items[str_index..];
const gop = try astgen.string_table.getOrPut(gpa, key);
const gop = try astgen.string_table.getOrPutContextAdapted(gpa, @as([]const u8, key), StringIndexAdapter{
.bytes = string_bytes,
}, StringIndexContext{
.bytes = string_bytes,
});
if (gop.found_existing) {
string_bytes.shrinkRetainingCapacity(str_index);
return gop.value_ptr.*;
return gop.key_ptr.*;
} else {
// We have to dupe the key into the arena, otherwise the memory
// becomes invalidated when string_bytes gets data appended.
// TODO https://github.com/ziglang/zig/issues/8528
gop.key_ptr.* = try astgen.arena.dupe(u8, key);
gop.value_ptr.* = str_index;
gop.key_ptr.* = str_index;
try string_bytes.append(gpa, 0);
return str_index;
}
@ -8805,19 +8807,19 @@ fn strLitAsString(astgen: *AstGen, str_lit_token: Ast.TokenIndex) !IndexSlice {
const token_bytes = astgen.tree.tokenSlice(str_lit_token);
try astgen.parseStrLit(str_lit_token, string_bytes, token_bytes, 0);
const key = string_bytes.items[str_index..];
const gop = try astgen.string_table.getOrPut(gpa, key);
const gop = try astgen.string_table.getOrPutContextAdapted(gpa, @as([]const u8, key), StringIndexAdapter{
.bytes = string_bytes,
}, StringIndexContext{
.bytes = string_bytes,
});
if (gop.found_existing) {
string_bytes.shrinkRetainingCapacity(str_index);
return IndexSlice{
.index = gop.value_ptr.*,
.index = gop.key_ptr.*,
.len = @intCast(u32, key.len),
};
} else {
// We have to dupe the key into the arena, otherwise the memory
// becomes invalidated when string_bytes gets data appended.
// TODO https://github.com/ziglang/zig/issues/8528
gop.key_ptr.* = try astgen.arena.dupe(u8, key);
gop.value_ptr.* = str_index;
gop.key_ptr.* = str_index;
// Still need a null byte because we are using the same table
// to lookup null terminated strings, so if we get a match, it has to
// be null terminated for that to work.

View File

@ -37,6 +37,8 @@ const LlvmObject = @import("../codegen/llvm.zig").Object;
const LoadCommand = commands.LoadCommand;
const Module = @import("../Module.zig");
const SegmentCommand = commands.SegmentCommand;
const StringIndexAdapter = std.hash_map.StringIndexAdapter;
const StringIndexContext = std.hash_map.StringIndexContext;
pub const TextBlock = @import("MachO/TextBlock.zig");
const Trie = @import("MachO/Trie.zig");
@ -224,33 +226,6 @@ decls: std.AutoArrayHashMapUnmanaged(*Module.Decl, void) = .{},
/// somewhere else in the codegen.
active_decl: ?*Module.Decl = null,
const StringIndexContext = struct {
strtab: *std.ArrayListUnmanaged(u8),
pub fn eql(_: StringIndexContext, a: u32, b: u32) bool {
return a == b;
}
pub fn hash(self: StringIndexContext, x: u32) u64 {
const x_slice = mem.spanZ(@ptrCast([*:0]const u8, self.strtab.items.ptr) + x);
return std.hash_map.hashString(x_slice);
}
};
pub const StringSliceAdapter = struct {
strtab: *std.ArrayListUnmanaged(u8),
pub fn eql(self: StringSliceAdapter, a_slice: []const u8, b: u32) bool {
const b_slice = mem.spanZ(@ptrCast([*:0]const u8, self.strtab.items.ptr) + b);
return mem.eql(u8, a_slice, b_slice);
}
pub fn hash(self: StringSliceAdapter, adapted_key: []const u8) u64 {
_ = self;
return std.hash_map.hashString(adapted_key);
}
};
const SymbolWithLoc = struct {
// Table where the symbol can be found.
where: enum {
@ -938,8 +913,8 @@ fn linkWithZld(self: *MachO, comp: *Compilation) !void {
{
// Add dyld_stub_binder as the final GOT entry.
const n_strx = self.strtab_dir.getKeyAdapted(@as([]const u8, "dyld_stub_binder"), StringSliceAdapter{
.strtab = &self.strtab,
const n_strx = self.strtab_dir.getKeyAdapted(@as([]const u8, "dyld_stub_binder"), StringIndexAdapter{
.bytes = &self.strtab,
}) orelse unreachable;
const resolv = self.symbol_resolver.get(n_strx) orelse unreachable;
const got_index = @intCast(u32, self.got_entries.items.len);
@ -1966,8 +1941,8 @@ fn writeStubHelperCommon(self: *MachO) !void {
code[9] = 0xff;
code[10] = 0x25;
{
const n_strx = self.strtab_dir.getKeyAdapted(@as([]const u8, "dyld_stub_binder"), StringSliceAdapter{
.strtab = &self.strtab,
const n_strx = self.strtab_dir.getKeyAdapted(@as([]const u8, "dyld_stub_binder"), StringIndexAdapter{
.bytes = &self.strtab,
}) orelse unreachable;
const resolv = self.symbol_resolver.get(n_strx) orelse unreachable;
const got_index = self.got_entries_map.get(.{
@ -2017,8 +1992,8 @@ fn writeStubHelperCommon(self: *MachO) !void {
code[10] = 0xbf;
code[11] = 0xa9;
binder_blk_outer: {
const n_strx = self.strtab_dir.getKeyAdapted(@as([]const u8, "dyld_stub_binder"), StringSliceAdapter{
.strtab = &self.strtab,
const n_strx = self.strtab_dir.getKeyAdapted(@as([]const u8, "dyld_stub_binder"), StringIndexAdapter{
.bytes = &self.strtab,
}) orelse unreachable;
const resolv = self.symbol_resolver.get(n_strx) orelse unreachable;
const got_index = self.got_entries_map.get(.{
@ -2435,8 +2410,8 @@ fn resolveSymbols(self: *MachO) !void {
}
// Fourth pass, handle synthetic symbols and flag any undefined references.
if (self.strtab_dir.getKeyAdapted(@as([]const u8, "___dso_handle"), StringSliceAdapter{
.strtab = &self.strtab,
if (self.strtab_dir.getKeyAdapted(@as([]const u8, "___dso_handle"), StringIndexAdapter{
.bytes = &self.strtab,
})) |n_strx| blk: {
const resolv = self.symbol_resolver.getPtr(n_strx) orelse break :blk;
if (resolv.where != .undef) break :blk;
@ -2985,8 +2960,8 @@ fn setEntryPoint(self: *MachO) !void {
// TODO we should respect the -entry flag passed in by the user to set a custom
// entrypoint. For now, assume default of `_main`.
const seg = self.load_commands.items[self.text_segment_cmd_index.?].Segment;
const n_strx = self.strtab_dir.getKeyAdapted(@as([]const u8, "_main"), StringSliceAdapter{
.strtab = &self.strtab,
const n_strx = self.strtab_dir.getKeyAdapted(@as([]const u8, "_main"), StringIndexAdapter{
.bytes = &self.strtab,
}) orelse {
log.err("'_main' export not found", .{});
return error.MissingMainEntrypoint;
@ -4475,8 +4450,8 @@ pub fn populateMissingMetadata(self: *MachO) !void {
});
self.load_commands_dirty = true;
}
if (!self.strtab_dir.containsAdapted(@as([]const u8, "dyld_stub_binder"), StringSliceAdapter{
.strtab = &self.strtab,
if (!self.strtab_dir.containsAdapted(@as([]const u8, "dyld_stub_binder"), StringIndexAdapter{
.bytes = &self.strtab,
})) {
const import_sym_index = @intCast(u32, self.undefs.items.len);
const n_strx = try self.makeString("dyld_stub_binder");
@ -4616,8 +4591,8 @@ pub fn addExternFn(self: *MachO, name: []const u8) !u32 {
const sym_name = try std.fmt.allocPrint(self.base.allocator, "_{s}", .{name});
defer self.base.allocator.free(sym_name);
if (self.strtab_dir.getKeyAdapted(@as([]const u8, sym_name), StringSliceAdapter{
.strtab = &self.strtab,
if (self.strtab_dir.getKeyAdapted(@as([]const u8, sym_name), StringIndexAdapter{
.bytes = &self.strtab,
})) |n_strx| {
const resolv = self.symbol_resolver.get(n_strx) orelse unreachable;
return resolv.where_index;
@ -5858,10 +5833,10 @@ pub fn padToIdeal(actual_size: anytype) @TypeOf(actual_size) {
}
pub fn makeString(self: *MachO, string: []const u8) !u32 {
const gop = try self.strtab_dir.getOrPutContextAdapted(self.base.allocator, @as([]const u8, string), StringSliceAdapter{
.strtab = &self.strtab,
const gop = try self.strtab_dir.getOrPutContextAdapted(self.base.allocator, @as([]const u8, string), StringIndexAdapter{
.bytes = &self.strtab,
}, StringIndexContext{
.strtab = &self.strtab,
.bytes = &self.strtab,
});
if (gop.found_existing) {
const off = gop.key_ptr.*;

View File

@ -14,6 +14,7 @@ const Allocator = mem.Allocator;
const Arch = std.Target.Cpu.Arch;
const MachO = @import("../MachO.zig");
const Object = @import("Object.zig");
const StringIndexAdapter = std.hash_map.StringIndexAdapter;
/// Each decl always gets a local symbol with the fully qualified name.
/// The vaddr and size are found here directly.
@ -656,8 +657,8 @@ fn initRelocFromObject(rel: macho.relocation_info, context: RelocContext) !Reloc
parsed_rel.where = .local;
parsed_rel.where_index = where_index;
} else {
const n_strx = context.macho_file.strtab_dir.getKeyAdapted(@as([]const u8, sym_name), MachO.StringSliceAdapter{
.strtab = &context.macho_file.strtab,
const n_strx = context.macho_file.strtab_dir.getKeyAdapted(@as([]const u8, sym_name), StringIndexAdapter{
.bytes = &context.macho_file.strtab,
}) orelse unreachable;
const resolv = context.macho_file.symbol_resolver.get(n_strx) orelse unreachable;
switch (resolv.where) {
@ -717,8 +718,8 @@ pub fn parseRelocs(self: *TextBlock, relocs: []macho.relocation_info, context: R
const where_index = context.object.symbol_mapping.get(rel.r_symbolnum) orelse unreachable;
subtractor = where_index;
} else {
const n_strx = context.macho_file.strtab_dir.getKeyAdapted(@as([]const u8, sym_name), MachO.StringSliceAdapter{
.strtab = &context.macho_file.strtab,
const n_strx = context.macho_file.strtab_dir.getKeyAdapted(@as([]const u8, sym_name), StringIndexAdapter{
.bytes = &context.macho_file.strtab,
}) orelse unreachable;
const resolv = context.macho_file.symbol_resolver.get(n_strx) orelse unreachable;
assert(resolv.where == .global);