wasm-linker: Intern all symbol names

For all symbols read from object files as well as generated from Zig code
will now be interned and have their offset into the string table saved on the `Symbol` instead.

Besides interning, local symbols now also use a decl's fully qualified name.
When a decl/symbol is extern/to-be-imported, the name of the decl itself will be used for symbol resolving.
Similarly for symbols that will be exported, will have their 'export name' set.
This commit is contained in:
Luuk de Gram 2022-02-27 21:15:33 +01:00
parent 49f01c0a0c
commit b1159ab7ae
4 changed files with 144 additions and 62 deletions

View File

@ -79,6 +79,8 @@ data_segments: std.StringArrayHashMapUnmanaged(u32) = .{},
/// A list of `types.Segment` which provide meta data
/// about a data symbol such as its name
segment_info: std.ArrayListUnmanaged(types.Segment) = .{},
/// Deduplicated string table for strings used by symbols, imports and exports.
string_table: StringTable = .{},
// Output sections
/// Output type section
@ -155,6 +157,79 @@ pub const SymbolLoc = struct {
}
return &wasm_bin.symbols.items[self.index];
}
/// From a given location, returns the name of the symbol.
pub fn getName(self: SymbolLoc, wasm_bin: *const Wasm) []const u8 {
if (wasm_bin.discarded.get(self)) |new_loc| {
return new_loc.getName(wasm_bin);
}
if (self.file) |object_index| {
const object = wasm_bin.objects.items[object_index];
return object.string_table.get(object.symtable[self.index].name);
}
return wasm_bin.string_table.get(wasm_bin.symbols.items[self.index].name);
}
};
/// Generic string table that duplicates strings
/// and converts them into offsets instead.
pub const StringTable = struct {
/// Table that maps string offsets, which is used to de-duplicate strings.
/// Rather than having the offset map to the data, the `StringContext` holds all bytes of the string.
/// The strings are stored as a contigious array where each string is zero-terminated.
string_table: std.HashMapUnmanaged(
u32,
void,
std.hash_map.StringIndexContext,
std.hash_map.default_max_load_percentage,
) = .{},
/// Holds the actual data of the string table.
string_data: std.ArrayListUnmanaged(u8) = .{},
/// Accepts a string and searches for a corresponding string.
/// When found, de-duplicates the string and returns the existing offset instead.
/// When the string is not found in the `string_table`, a new entry will be inserted
/// and the new offset to its data will be returned.
pub fn put(self: *StringTable, allocator: Allocator, string: []const u8) !u32 {
const gop = try self.string_table.getOrPutContextAdapted(
allocator,
string,
std.hash_map.StringIndexAdapter{ .bytes = &self.string_data },
.{ .bytes = &self.string_data },
);
if (gop.found_existing) {
const off = gop.key_ptr.*;
log.debug("reusing string '{s}' at offset 0x{x}", .{ string, off });
return off;
}
try self.string_data.ensureUnusedCapacity(allocator, string.len + 1);
const offset = @intCast(u32, self.string_data.items.len);
log.debug("writing new string '{s}' at offset 0x{x}", .{ string, offset });
self.string_data.appendSliceAssumeCapacity(string);
self.string_data.appendAssumeCapacity(0);
gop.key_ptr.* = offset;
return offset;
}
/// From a given offset, returns its corresponding string value.
/// Asserts offset does not exceed bounds.
pub fn get(self: StringTable, off: u32) []const u8 {
assert(off < self.string_data.items.len);
return mem.sliceTo(@ptrCast([*:0]const u8, self.string_data.items.ptr + off), 0);
}
/// Frees all resources of the string table. Any references pointing
/// to the strings will be invalid.
pub fn deinit(self: *StringTable, allocator: Allocator) void {
self.string_data.deinit(allocator);
self.string_table.deinit(allocator);
self.* = undefined;
}
};
pub fn openPath(allocator: Allocator, sub_path: []const u8, options: link.Options) !*Wasm {
@ -177,7 +252,7 @@ pub fn openPath(allocator: Allocator, sub_path: []const u8, options: link.Option
// As sym_index '0' is reserved, we use it for our stack pointer symbol
const symbol = try wasm_bin.symbols.addOne(allocator);
symbol.* = .{
.name = "__stack_pointer",
.name = try wasm_bin.string_table.put(allocator, "__stack_pointer"),
.tag = .global,
.flags = 0,
.index = 0,
@ -268,12 +343,12 @@ fn resolveSymbolsInObject(self: *Wasm, object_index: u16) !void {
.file = object_index,
.index = sym_index,
};
const sym_name = std.mem.sliceTo(symbol.name, 0);
const sym_name = object.string_table.get(symbol.name);
if (symbol.isLocal()) {
if (symbol.isUndefined()) {
log.err("Local symbols are not allowed to reference imports", .{});
log.err(" symbol '{s}' defined in '{s}'", .{ symbol.name, object.name });
log.err(" symbol '{s}' defined in '{s}'", .{ sym_name, object.name });
return error.undefinedLocal;
}
try self.resolved_symbols.putNoClobber(self.base.allocator, location, {});
@ -299,7 +374,7 @@ fn resolveSymbolsInObject(self: *Wasm, object_index: u16) !void {
if (!existing_sym.isUndefined()) {
if (!symbol.isUndefined()) {
log.err("symbol '{s}' defined multiple times", .{existing_sym.name});
log.err("symbol '{s}' defined multiple times", .{sym_name});
log.err(" first definition in '{s}'", .{existing_file_path});
log.err(" next definition in '{s}'", .{object.name});
return error.SymbolCollision;
@ -309,7 +384,7 @@ fn resolveSymbolsInObject(self: *Wasm, object_index: u16) !void {
}
// simply overwrite with the new symbol
log.debug("Overwriting symbol '{s}'", .{symbol.name});
log.debug("Overwriting symbol '{s}'", .{sym_name});
log.debug(" old definition in '{s}'", .{existing_file_path});
log.debug(" new definition in '{s}'", .{object.name});
try self.discarded.putNoClobber(self.base.allocator, maybe_existing.value_ptr.*, location);
@ -328,12 +403,7 @@ pub fn deinit(self: *Wasm) void {
var decl_it = self.decls.keyIterator();
while (decl_it.next()) |decl_ptr| {
const decl = decl_ptr.*;
const atom: *Atom = &decl.link.wasm;
for (atom.locals.items) |local| {
gpa.free(mem.sliceTo(self.symbols.items[local.sym_index].name, 0));
}
decl.link.wasm.deinit(gpa);
decl_ptr.*.link.wasm.deinit(gpa);
}
for (self.func_types.items) |*func_type| {
@ -374,6 +444,8 @@ pub fn deinit(self: *Wasm) void {
self.function_table.deinit(gpa);
self.tables.deinit(gpa);
self.exports.deinit(gpa);
self.string_table.deinit(gpa);
}
pub fn allocateDeclIndexes(self: *Wasm, decl: *Module.Decl) !void {
@ -498,7 +570,10 @@ fn finishUpdateDecl(self: *Wasm, decl: *Module.Decl, code: []const u8) !void {
atom.size = @intCast(u32, code.len);
atom.alignment = decl.ty.abiAlignment(self.base.options.target);
const symbol = &self.symbols.items[atom.sym_index];
symbol.name = decl.name;
const full_name = try decl.getFullyQualifiedName(self.base.allocator);
defer self.base.allocator.free(full_name);
symbol.name = try self.string_table.put(self.base.allocator, full_name);
try atom.code.appendSlice(self.base.allocator, code);
}
@ -511,8 +586,9 @@ pub fn lowerUnnamedConst(self: *Wasm, decl: *Module.Decl, tv: TypedValue) !u32 {
// Create and initialize a new local symbol and atom
const local_index = decl.link.wasm.locals.items.len;
const name = try std.fmt.allocPrintZ(self.base.allocator, "__unnamed_{s}_{d}", .{ decl.name, local_index });
defer self.base.allocator.free(name);
var symbol: Symbol = .{
.name = name,
.name = try self.string_table.put(self.base.allocator, name),
.flags = 0,
.tag = .data,
.index = undefined,
@ -615,7 +691,7 @@ pub fn deleteExport(self: *Wasm, exp: Export) void {
const sym_index = exp.sym_index orelse return;
const loc: SymbolLoc = .{ .file = null, .index = sym_index };
const symbol = loc.getSymbol(self);
const symbol_name = mem.sliceTo(symbol.name, 0);
const symbol_name = self.string_table.get(symbol.name);
log.debug("Deleting export for decl '{s}'", .{symbol_name});
if (self.export_names.fetchRemove(loc)) |kv| {
assert(self.globals.remove(kv.value));
@ -656,7 +732,7 @@ pub fn updateDeclExports(
// are strong symbols, we have a linker error.
// In the other case we replace one with the other.
if (!exp_is_weak and !existing_sym.isWeak()) {
try module.failed_exports.putNoClobber(module.gpa, exp, try Module.ErrorMsg.create(
try module.failed_exports.put(module.gpa, exp, try Module.ErrorMsg.create(
module.gpa,
decl.srcLoc(),
\\LinkError: symbol '{s}' defined multiple times
@ -665,6 +741,7 @@ pub fn updateDeclExports(
,
.{ exp.options.name, self.name, self.name },
));
continue;
} else if (exp_is_weak) {
continue; // to-be-exported symbol is weak, so we keep the existing symbol
} else {
@ -697,7 +774,7 @@ pub fn updateDeclExports(
},
}
// Ensure the symbol will be exported using the given name
if (!mem.eql(u8, exp.options.name, mem.sliceTo(exp.exported_decl.name, 0))) {
if (!mem.eql(u8, exp.options.name, sym_loc.getName(self))) {
try self.export_names.put(self.base.allocator, sym_loc, exp.options.name);
}
@ -725,7 +802,6 @@ pub fn freeDecl(self: *Wasm, decl: *Module.Decl) void {
for (atom.locals.items) |local_atom| {
const local_symbol = &self.symbols.items[local_atom.sym_index];
local_symbol.tag = .dead; // also for any local symbol
self.base.allocator.free(mem.sliceTo(local_symbol.name, 0));
self.symbols_free_list.append(self.base.allocator, local_atom.sym_index) catch {};
assert(self.resolved_symbols.swapRemove(local_atom.symbolLoc()));
}
@ -755,14 +831,15 @@ fn mapFunctionTable(self: *Wasm) void {
}
fn addOrUpdateImport(self: *Wasm, decl: *Module.Decl) !void {
// For the import name itself, we use the decl's name, rather than the fully qualified name
const decl_name = mem.sliceTo(decl.name, 0);
const symbol_index = decl.link.wasm.sym_index;
const symbol: *Symbol = &self.symbols.items[symbol_index];
symbol.name = decl.name;
symbol.setUndefined(true);
symbol.setGlobal(true);
try self.globals.putNoClobber(
self.base.allocator,
mem.sliceTo(symbol.name, 0),
decl_name,
.{ .file = null, .index = symbol_index },
);
try self.resolved_symbols.put(self.base.allocator, .{ .file = null, .index = symbol_index }, {});
@ -776,7 +853,7 @@ fn addOrUpdateImport(self: *Wasm, decl: *Module.Decl) !void {
if (!gop.found_existing) {
gop.value_ptr.* = .{
.module_name = module_name,
.name = mem.sliceTo(symbol.name, 0),
.name = decl_name,
.kind = .{ .function = decl.fn_link.wasm.type_index },
};
}
@ -815,7 +892,7 @@ fn parseAtom(self: *Wasm, atom: *Atom, kind: Kind) !void {
// TODO: Add mutables global decls to .bss section instead
const segment_name = try std.mem.concat(self.base.allocator, u8, &.{
".rodata.",
std.mem.span(symbol.name),
self.string_table.get(symbol.name),
});
errdefer self.base.allocator.free(segment_name);
const segment_info: types.Segment = .{
@ -886,7 +963,7 @@ fn allocateAtoms(self: *Wasm) !void {
atom.offset = offset;
const symbol_loc = atom.symbolLoc();
log.debug("Atom '{s}' allocated from 0x{x:0>8} to 0x{x:0>8} size={d}", .{
symbol_loc.getSymbol(self).name,
symbol_loc.getName(self),
offset,
offset + atom.size,
atom.size,
@ -906,7 +983,7 @@ fn setupImports(self: *Wasm) !void {
// remove an import if it was resolved
if (self.imports.remove(discarded.*)) {
log.debug("Removed symbol '{s}' as an import", .{
discarded.getSymbol(self).name,
discarded.getName(self),
});
}
}
@ -923,7 +1000,7 @@ fn setupImports(self: *Wasm) !void {
continue;
}
log.debug("Symbol '{s}' will be imported from the host", .{symbol.name});
log.debug("Symbol '{s}' will be imported from the host", .{symbol_loc.getName(self)});
const import = self.objects.items[symbol_loc.file.?].findImport(symbol.tag.externalType(), symbol.index);
// TODO: De-duplicate imports
try self.imports.putNoClobber(self.base.allocator, symbol_loc, import);
@ -1036,12 +1113,12 @@ fn mergeTypes(self: *Wasm) !void {
}
if (symbol.isUndefined()) {
log.debug("Adding type from extern function '{s}'", .{symbol.name});
log.debug("Adding type from extern function '{s}'", .{sym_loc.getName(self)});
const import: *wasm.Import = self.imports.getPtr(sym_loc).?;
const original_type = object.func_types[import.kind.function];
import.kind.function = try self.putOrGetFuncType(original_type);
} else {
log.debug("Adding type from function '{s}'", .{symbol.name});
log.debug("Adding type from function '{s}'", .{sym_loc.getName(self)});
const func = &self.functions.items[symbol.index - self.imported_functions_count];
func.type_index = try self.putOrGetFuncType(object.func_types[func.type_index]);
}
@ -1057,13 +1134,14 @@ fn setupExports(self: *Wasm) !void {
const symbol = sym_loc.getSymbol(self);
if (!symbol.isExported()) continue;
const export_name = if (self.export_names.get(sym_loc)) |name| name else mem.sliceTo(symbol.name, 0);
const sym_name = sym_loc.getName(self);
const export_name = if (self.export_names.get(sym_loc)) |name| name else sym_name;
const exp: wasm.Export = .{
.name = export_name,
.kind = symbol.tag.externalType(),
.index = symbol.index,
};
log.debug("Exporting symbol '{s}' as '{s}' at index: ({d})", .{ symbol.name, exp.name, exp.index });
log.debug("Exporting symbol '{s}' as '{s}' at index: ({d})", .{ sym_name, exp.name, exp.index });
try self.exports.append(self.base.allocator, exp);
}
@ -1670,8 +1748,8 @@ fn emitNameSection(self: *Wasm, file: fs.File, arena: Allocator) !void {
for (self.resolved_symbols.keys()) |sym_loc| {
const symbol = sym_loc.getSymbol(self).*;
switch (symbol.tag) {
.function => funcs.appendAssumeCapacity(.{ .index = symbol.index, .name = mem.sliceTo(symbol.name, 0) }),
.global => globals.appendAssumeCapacity(.{ .index = symbol.index, .name = mem.sliceTo(symbol.name, 0) }),
.function => funcs.appendAssumeCapacity(.{ .index = symbol.index, .name = sym_loc.getName(self) }),
.global => globals.appendAssumeCapacity(.{ .index = symbol.index, .name = sym_loc.getName(self) }),
else => {},
}
}
@ -2275,11 +2353,11 @@ fn emitSymbolTable(self: *Wasm, file: fs.File, arena: Allocator, symbol_table: *
try leb.writeULEB128(writer, @enumToInt(symbol.tag));
try leb.writeULEB128(writer, symbol.flags);
const sym_name = if (self.export_names.get(sym_loc)) |exp_name| exp_name else sym_loc.getName(self);
switch (symbol.tag) {
.data => {
const name = mem.sliceTo(symbol.name, 0);
try leb.writeULEB128(writer, @intCast(u32, name.len));
try writer.writeAll(name);
try leb.writeULEB128(writer, @intCast(u32, sym_name.len));
try writer.writeAll(sym_name);
if (symbol.isDefined()) {
try leb.writeULEB128(writer, symbol.index);
@ -2294,9 +2372,8 @@ fn emitSymbolTable(self: *Wasm, file: fs.File, arena: Allocator, symbol_table: *
else => {
try leb.writeULEB128(writer, symbol.index);
if (symbol.isDefined()) {
const name = mem.sliceTo(symbol.name, 0);
try leb.writeULEB128(writer, @intCast(u32, name.len));
try writer.writeAll(name);
try leb.writeULEB128(writer, @intCast(u32, sym_name.len));
try writer.writeAll(sym_name);
}
},
}

View File

@ -103,17 +103,17 @@ pub fn symbolLoc(self: Atom) Wasm.SymbolLoc {
/// at the calculated offset.
pub fn resolveRelocs(self: *Atom, wasm_bin: *const Wasm) !void {
if (self.relocs.items.len == 0) return;
const symbol = self.symbolLoc().getSymbol(wasm_bin).*;
const symbol_name = self.symbolLoc().getName(wasm_bin);
log.debug("Resolving relocs in atom '{s}' count({d})", .{
symbol.name,
symbol_name,
self.relocs.items.len,
});
for (self.relocs.items) |reloc| {
const value = try self.relocationValue(reloc, wasm_bin);
log.debug("Relocating '{s}' referenced in '{s}' offset=0x{x:0>8} value={d}", .{
(Wasm.SymbolLoc{ .file = self.file, .index = reloc.index }).getSymbol(wasm_bin).name,
symbol.name,
(Wasm.SymbolLoc{ .file = self.file, .index = reloc.index }).getName(wasm_bin),
symbol_name,
reloc.offset,
value,
});

View File

@ -59,6 +59,10 @@ comdat_info: []const types.Comdat = &.{},
/// Represents non-synthetic sections that can essentially be mem-cpy'd into place
/// after performing relocations.
relocatable_data: []const RelocatableData = &.{},
/// String table for all strings required by the object file, such as symbol names,
/// import name, module name and export names. Each string will be deduplicated
/// and returns an offset into the table.
string_table: Wasm.StringTable = .{},
/// Represents a single item within a section (depending on its `type`)
const RelocatableData = struct {
@ -142,9 +146,6 @@ pub fn deinit(self: *Object, gpa: Allocator) void {
gpa.free(val);
}
self.relocations.deinit(gpa);
for (self.symtable) |symbol| {
gpa.free(std.mem.sliceTo(symbol.name, 0));
}
gpa.free(self.symtable);
gpa.free(self.comdat_info);
gpa.free(self.init_funcs);
@ -156,6 +157,7 @@ pub fn deinit(self: *Object, gpa: Allocator) void {
gpa.free(rel_data.data[0..rel_data.size]);
}
gpa.free(self.relocatable_data);
self.string_table.deinit(gpa);
self.* = undefined;
}
@ -228,7 +230,7 @@ fn checkLegacyIndirectFunctionTable(self: *Object, gpa: Allocator) !?Symbol {
var table_symbol: Symbol = .{
.flags = 0,
.name = try gpa.dupeZ(u8, table_import.name),
.name = try self.string_table.put(gpa, table_import.name),
.tag = .table,
.index = 0,
};
@ -666,7 +668,7 @@ fn Parser(comptime ReaderType: type) type {
symbol.* = try self.parseSymbol(gpa, reader);
log.debug("Found symbol: type({s}) name({s}) flags(0b{b:0>8})", .{
@tagName(symbol.tag),
symbol.name,
self.object.string_table.get(symbol.name),
symbol.flags,
});
}
@ -699,10 +701,10 @@ fn Parser(comptime ReaderType: type) type {
switch (tag) {
.data => {
const name_len = try leb.readULEB128(u32, reader);
const name = try gpa.allocSentinel(u8, name_len, 0);
errdefer gpa.free(name);
const name = try gpa.alloc(u8, name_len);
defer gpa.free(name);
try reader.readNoEof(name);
symbol.name = name;
symbol.name = try self.object.string_table.put(gpa, name);
// Data symbols only have the following fields if the symbol is defined
if (symbol.isDefined()) {
@ -714,7 +716,7 @@ fn Parser(comptime ReaderType: type) type {
},
.section => {
symbol.index = try leb.readULEB128(u32, reader);
symbol.name = @tagName(symbol.tag);
symbol.name = try self.object.string_table.put(gpa, @tagName(symbol.tag));
},
else => {
symbol.index = try leb.readULEB128(u32, reader);
@ -727,12 +729,12 @@ fn Parser(comptime ReaderType: type) type {
const explicit_name = symbol.hasFlag(.WASM_SYM_EXPLICIT_NAME);
if (!(is_undefined and !explicit_name)) {
const name_len = try leb.readULEB128(u32, reader);
const name = try gpa.allocSentinel(u8, name_len, 0);
errdefer gpa.free(name);
const name = try gpa.alloc(u8, name_len);
defer gpa.free(name);
try reader.readNoEof(name);
symbol.name = name;
symbol.name = try self.object.string_table.put(gpa, name);
} else {
symbol.name = try gpa.dupeZ(u8, maybe_import.?.name);
symbol.name = try self.object.string_table.put(gpa, maybe_import.?.name);
}
},
}
@ -882,7 +884,7 @@ pub fn parseIntoAtoms(self: *Object, gpa: Allocator, object_index: u16, wasm_bin
} else {
try wasm_bin.atoms.putNoClobber(gpa, final_index, atom);
}
log.debug("Parsed into atom: '{s}'", .{self.symtable[atom.sym_index].name});
log.debug("Parsed into atom: '{s}'", .{self.string_table.get(self.symtable[atom.sym_index].name)});
}
}

View File

@ -1,5 +1,8 @@
//! Wasm symbols describing its kind,
//! name and its properties.
//! Represents a wasm symbol. Containing all of its properties,
//! as well as providing helper methods to determine its functionality
//! and how it will/must be linked.
//! The name of the symbol can be found by providing the offset, found
//! on the `name` field, to a string table in the wasm binary or object file.
const Symbol = @This();
const std = @import("std");
@ -8,15 +11,15 @@ const types = @import("types.zig");
/// Bitfield containings flags for a symbol
/// Can contain any of the flags defined in `Flag`
flags: u32,
/// Symbol name, when undefined this will be taken from the import.
name: [*:0]const u8,
/// An union that represents both the type of symbol
/// as well as the data it holds.
tag: Tag,
/// Symbol name, when the symbol is undefined the name will be taken from the import.
/// Note: This is an index into the string table.
name: u32,
/// Index into the list of objects based on set `tag`
/// NOTE: This will be set to `undefined` when `tag` is `data`
/// and the symbol is undefined.
index: u32,
/// Represents the kind of the symbol, such as a function or global.
tag: Tag,
pub const Tag = enum {
function,
@ -164,7 +167,7 @@ pub fn format(self: Symbol, comptime fmt: []const u8, options: std.fmt.FormatOpt
const binding: []const u8 = if (self.isLocal()) "local" else "global";
try writer.print(
"{c} binding={s} visible={s} id={d} name={s}",
"{c} binding={s} visible={s} id={d} name_offset={d}",
.{ kind_fmt, binding, visible, self.index, self.name },
);
}