From 2339b25fd42ccd136660b9e4575aab2bb85b1163 Mon Sep 17 00:00:00 2001 From: Luuk de Gram Date: Mon, 9 Jan 2023 19:22:55 +0100 Subject: [PATCH 1/5] wasm-linker: discard symbol when both undefined During symbol resolution when both symbols are undefined, we must discard the new symbol with a reference to the existing symbol. This ensures the original symbol remains undefined. This fixes symbol resolution when linking with WASI-libC. --- src/link/Wasm.zig | 55 ++++++++++++++++++++++++++++------------------- 1 file changed, 33 insertions(+), 22 deletions(-) diff --git a/src/link/Wasm.zig b/src/link/Wasm.zig index d62d5adb25..377d526249 100644 --- a/src/link/Wasm.zig +++ b/src/link/Wasm.zig @@ -600,27 +600,34 @@ fn resolveSymbolsInObject(wasm: *Wasm, object_index: u16) !void { } if (existing_sym.isUndefined() and symbol.isUndefined()) { - const existing_name = if (existing_loc.file) |file_index| blk: { - const obj = wasm.objects.items[file_index]; - const name_index = obj.findImport(symbol.tag.externalType(), existing_sym.index).module_name; - break :blk obj.string_table.get(name_index); - } else blk: { - const name_index = wasm.imports.get(existing_loc).?.module_name; - break :blk wasm.string_table.get(name_index); - }; + // only verify module/import name for function symbols + if (symbol.tag == .function) { + const existing_name = if (existing_loc.file) |file_index| blk: { + const obj = wasm.objects.items[file_index]; + const name_index = obj.findImport(symbol.tag.externalType(), existing_sym.index).module_name; + break :blk obj.string_table.get(name_index); + } else blk: { + const name_index = wasm.imports.get(existing_loc).?.module_name; + break :blk wasm.string_table.get(name_index); + }; - const module_index = object.findImport(symbol.tag.externalType(), symbol.index).module_name; - const module_name = object.string_table.get(module_index); - if (!mem.eql(u8, existing_name, module_name)) { - log.err("symbol '{s}' module name mismatch. Expected '{s}', but found '{s}'", .{ - sym_name, - existing_name, - module_name, - }); - log.err(" first definition in '{s}'", .{existing_file_path}); - log.err(" next definition in '{s}'", .{object.name}); - return error.ModuleNameMismatch; + const module_index = object.findImport(symbol.tag.externalType(), symbol.index).module_name; + const module_name = object.string_table.get(module_index); + if (!mem.eql(u8, existing_name, module_name)) { + log.err("symbol '{s}' module name mismatch. Expected '{s}', but found '{s}'", .{ + sym_name, + existing_name, + module_name, + }); + log.err(" first definition in '{s}'", .{existing_file_path}); + log.err(" next definition in '{s}'", .{object.name}); + return error.ModuleNameMismatch; + } } + + // both undefined so skip overwriting existing symbol and discard the new symbol + try wasm.discarded.put(wasm.base.allocator, location, existing_loc); + continue; } if (existing_sym.tag == .global) { @@ -646,8 +653,10 @@ fn resolveSymbolsInObject(wasm: *Wasm, object_index: u16) !void { } } - // when both symbols are weak, we skip overwriting - if (existing_sym.isWeak() and symbol.isWeak()) { + // when both symbols are weak, we skip overwriting unless the existing + // symbol is weak and the new one isn't, in which case we *do* overwrite it. + if (existing_sym.isWeak() and symbol.isWeak()) blk: { + if (existing_sym.isUndefined() and !symbol.isUndefined()) break :blk; try wasm.discarded.put(wasm.base.allocator, location, existing_loc); continue; } @@ -1935,7 +1944,9 @@ fn setupStart(wasm: *Wasm) !void { return error.MissingSymbol; }; - const symbol_loc = wasm.globals.get(symbol_name_offset).?; + const symbol_loc = wasm.globals.get(symbol_name_offset) orelse { + log.err("Entry symbol '{s}' not found", .{entry_name}); + }; const symbol = symbol_loc.getSymbol(wasm); if (symbol.tag != .function) { log.err("Entry symbol '{s}' is not a function", .{entry_name}); From 1072f82acbe976222851bdd357f52dd9659d73d3 Mon Sep 17 00:00:00 2001 From: Luuk de Gram Date: Mon, 9 Jan 2023 21:20:16 +0100 Subject: [PATCH 2/5] wasm-linker: Fix symbol name on undefined symbol When emitting errors for undefined symbols, rather than unconditionally always using the name from an import, we must verify it's a symbol type that could have such an import. e.g. undefined data symbols do not have a corresponding import. For this reason we must use the regular name. --- src/link/Wasm.zig | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/link/Wasm.zig b/src/link/Wasm.zig index 377d526249..12bb90ea64 100644 --- a/src/link/Wasm.zig +++ b/src/link/Wasm.zig @@ -824,7 +824,9 @@ fn checkUndefinedSymbols(wasm: *const Wasm) !void { } else wasm.name; const import_name = if (undef.file) |file_index| name: { const obj = wasm.objects.items[file_index]; - const name_index = obj.findImport(symbol.tag.externalType(), symbol.index).name; + const name_index = if (symbol.tag == .function) name_index: { + break :name_index obj.findImport(symbol.tag.externalType(), symbol.index).name; + } else symbol.name; break :name obj.string_table.get(name_index); } else wasm.string_table.get(wasm.imports.get(undef).?.name); log.err("could not resolve undefined symbol '{s}'", .{import_name}); From f8d1efd99ab0ff9ae49a17b437814f4fe329e83b Mon Sep 17 00:00:00 2001 From: Luuk de Gram Date: Wed, 11 Jan 2023 07:03:15 +0100 Subject: [PATCH 3/5] wasm-linker: implement __wasm_call_ctors symbol This implements the `__wasm_call_ctors` symbol. This symbol is automatically referenced by libc to initialize its constructors. We first retrieve all constructors from each object file, and then create a function body that calls each constructor based on its priority. Constructors are not allowed to have any parameters, but are allowed to have a return type. When a return type does exist, we simply drop its value from the stack after calling the constructor to ensure we pass the stack validator. --- src/link.zig | 1 + src/link/Wasm.zig | 189 +++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 180 insertions(+), 10 deletions(-) diff --git a/src/link.zig b/src/link.zig index 15bff217f0..2fb85cb482 100644 --- a/src/link.zig +++ b/src/link.zig @@ -716,6 +716,7 @@ pub const File = struct { InvalidFeatureSet, InvalidFormat, InvalidIndex, + InvalidInitFunc, InvalidMagicByte, InvalidWasmVersion, LLDCrashed, diff --git a/src/link/Wasm.zig b/src/link/Wasm.zig index 12bb90ea64..3f8aa2c62f 100644 --- a/src/link/Wasm.zig +++ b/src/link/Wasm.zig @@ -118,6 +118,9 @@ memories: std.wasm.Memory = .{ .limits = .{ .min = 0, .max = null } }, tables: std.ArrayListUnmanaged(std.wasm.Table) = .{}, /// Output export section exports: std.ArrayListUnmanaged(types.Export) = .{}, +/// List of initialization functions. These must be called in order of priority +/// by the (synthetic) __wasm_call_ctors function. +init_funcs: std.ArrayListUnmanaged(InitFuncLoc) = .{}, /// Indirect function table, used to call function pointers /// When this is non-zero, we must emit a table entry, @@ -238,6 +241,34 @@ pub const SymbolLoc = struct { } }; +// Contains the location of the function symbol, as well as +/// the priority itself of the initialization function. +pub const InitFuncLoc = struct { + /// object file index in the list of objects. + /// Unlike `SymbolLoc` this cannot be `null` as we never define + /// our own ctors. + file: u16, + /// Symbol index within the corresponding object file. + index: u32, + /// The priority in which the constructor must be called. + priority: u32, + + /// From a given `InitFuncLoc` returns the corresponding function symbol + fn getSymbol(loc: InitFuncLoc, wasm: *const Wasm) *Symbol { + return getSymbolLoc(loc).getSymbol(wasm); + } + + /// Turns the given `InitFuncLoc` into a `SymbolLoc` + fn getSymbolLoc(loc: InitFuncLoc) SymbolLoc { + return .{ .file = loc.file, .index = loc.index }; + } + + /// Returns true when `lhs` has a higher priority (e.i. value closer to 0) than `rhs`. + fn lessThan(ctx: void, lhs: InitFuncLoc, rhs: InitFuncLoc) bool { + _ = ctx; + return lhs.priority < rhs.priority; + } +}; /// Generic string table that duplicates strings /// and converts them into offsets instead. pub const StringTable = struct { @@ -393,6 +424,16 @@ pub fn openPath(allocator: Allocator, sub_path: []const u8, options: link.Option } } + // create __wasm_call_ctors + { + const loc = try wasm_bin.createSyntheticSymbol("__wasm_call_ctors", .function); + const symbol = loc.getSymbol(wasm_bin); + symbol.setFlag(.WASM_SYM_VISIBILITY_HIDDEN); + // we do not know the function index until after we merged all sections. + // Therefore we set `symbol.index` and create its corresponding references + // at the end during `initializeCallCtorsFunction`. + } + if (!options.strip and options.module != null) { wasm_bin.dwarf = Dwarf.init(allocator, &wasm_bin.base, options.target); try wasm_bin.initDebugSections(); @@ -896,6 +937,7 @@ pub fn deinit(wasm: *Wasm) void { wasm.wasm_globals.deinit(gpa); wasm.function_table.deinit(gpa); wasm.tables.deinit(gpa); + wasm.init_funcs.deinit(gpa); wasm.exports.deinit(gpa); wasm.string_table.deinit(gpa); @@ -1698,6 +1740,130 @@ fn sortDataSegments(wasm: *Wasm) !void { wasm.data_segments = new_mapping; } +/// Obtains all initfuncs from each object file, verifies its function signature, +/// and then appends it to our final `init_funcs` list. +/// After all functions have been inserted, the functions will be ordered based +/// on their priority. +/// NOTE: This function must be called before we merged any other section. +/// This is because all init funcs in the object files contain references to the +/// original functions and their types. We need to know the type to verify it doesn't +/// contain any parameters. +fn setupInitFunctions(wasm: *Wasm) !void { + for (wasm.objects.items) |object, file_index| { + try wasm.init_funcs.ensureUnusedCapacity(wasm.base.allocator, object.init_funcs.len); + for (object.init_funcs) |init_func| { + const symbol = object.symtable[init_func.symbol_index]; + const ty: std.wasm.Type = if (symbol.isUndefined()) ty: { + const imp: types.Import = object.findImport(.function, symbol.index); + break :ty object.func_types[imp.kind.function]; + } else ty: { + const func_index = symbol.index - object.importedCountByKind(.function); + const func = object.functions[func_index]; + break :ty object.func_types[func.type_index]; + }; + if (ty.params.len != 0) { + log.err("constructor functions cannot take arguments: '{s}'", .{object.string_table.get(symbol.name)}); + return error.InvalidInitFunc; + } + log.debug("appended init func '{s}'\n", .{object.string_table.get(symbol.name)}); + wasm.init_funcs.appendAssumeCapacity(.{ + .index = init_func.symbol_index, + .file = @intCast(u16, file_index), + .priority = init_func.priority, + }); + } + } + + // sort the initfunctions based on their priority + std.sort.sort(InitFuncLoc, wasm.init_funcs.items, {}, InitFuncLoc.lessThan); +} + +/// Creates a function body for the `__wasm_call_ctors` symbol. +/// Loops over all constructors found in `init_funcs` and calls them +/// respectively based on their priority which was sorted by `setupInitFunctions`. +/// NOTE: This function must be called after we merged all sections to ensure the +/// references to the function stored in the symbol have been finalized so we end +/// up calling the resolved function. +fn initializeCallCtorsFunction(wasm: *Wasm) !void { + // No code to emit, so also no ctors to call + if (wasm.code_section_index == null) { + // Make sure to remove it from the resolved symbols so we do not emit + // it within any section. TODO: Remove this once we implement garbage collection. + const loc = wasm.globals.get(wasm.string_table.getOffset("__wasm_call_ctors").?).?; + std.debug.assert(wasm.resolved_symbols.swapRemove(loc)); + return; + } + + var function_body = std.ArrayList(u8).init(wasm.base.allocator); + defer function_body.deinit(); + const writer = function_body.writer(); + + // Create the function body + { + // Write locals count (we have none) + try leb.writeULEB128(writer, @as(u32, 0)); + + // call constructors + for (wasm.init_funcs.items) |init_func_loc| { + const symbol = init_func_loc.getSymbol(wasm); + if (symbol.isUndefined()) { + std.debug.print("Undefined symbol '{s}'\n", .{wasm.string_table.get(symbol.name)}); + } + std.debug.print("Symbol: {s}\n", .{init_func_loc.getSymbolLoc().getName(wasm)}); + std.debug.assert(wasm.resolved_symbols.contains(init_func_loc.getSymbolLoc().finalLoc(wasm))); + const func = wasm.functions.values()[symbol.index - wasm.imported_functions_count]; + const ty = wasm.func_types.items[func.type_index]; + + // Call function by its function index + try writer.writeByte(std.wasm.opcode(.call)); + try leb.writeULEB128(writer, symbol.index); + + // drop all returned values from the stack as __wasm_call_ctors has no return value + for (ty.returns) |_| { + try writer.writeByte(std.wasm.opcode(.drop)); + } + } + + // End function body + try writer.writeByte(std.wasm.opcode(.end)); + } + + const loc = wasm.globals.get(wasm.string_table.getOffset("__wasm_call_ctors").?).?; + const symbol = loc.getSymbol(wasm); + // create type (() -> nil) as we do not have any parameters or return value. + const ty_index = try wasm.putOrGetFuncType(.{ .params = &[_]std.wasm.Valtype{}, .returns = &[_]std.wasm.Valtype{} }); + // create function with above type + const func_index = wasm.imported_functions_count + @intCast(u32, wasm.functions.count()); + try wasm.functions.putNoClobber( + wasm.base.allocator, + .{ .file = null, .index = func_index }, + .{ .type_index = ty_index }, + ); + symbol.index = func_index; + + // create the atom that will be output into the final binary + const atom = try wasm.base.allocator.create(Atom); + errdefer wasm.base.allocator.destroy(atom); + atom.* = .{ + .size = @intCast(u32, function_body.items.len), + .offset = 0, + .sym_index = loc.index, + .file = null, + .alignment = 1, + .next = null, + .prev = null, + .code = function_body.moveToUnmanaged(), + .dbg_info_atom = undefined, + }; + try wasm.managed_atoms.append(wasm.base.allocator, atom); + try wasm.appendAtomAtIndex(wasm.code_section_index.?, atom); + try wasm.symbol_atom.putNoClobber(wasm.base.allocator, loc, atom); + + // `allocateAtoms` has already been called, set the atom's offset manually. + // This is fine to do manually as we insert the atom at the very end. + atom.offset = atom.prev.?.offset + atom.prev.?.size; +} + fn setupImports(wasm: *Wasm) !void { log.debug("Merging imports", .{}); var discarded_it = wasm.discarded.keyIterator(); @@ -1870,16 +2036,17 @@ fn setupExports(wasm: *Wasm) !void { const force_exp_names = wasm.base.options.export_symbol_names; if (force_exp_names.len > 0) { - var failed_exports = try std.ArrayList([]const u8).initCapacity(wasm.base.allocator, force_exp_names.len); - defer failed_exports.deinit(); + var failed_exports = false; for (force_exp_names) |exp_name| { const name_index = wasm.string_table.getOffset(exp_name) orelse { - failed_exports.appendAssumeCapacity(exp_name); + log.err("could not export '{s}', symbol not found", .{exp_name}); + failed_exports = true; continue; }; const loc = wasm.globals.get(name_index) orelse { - failed_exports.appendAssumeCapacity(exp_name); + log.err("could not export '{s}', symbol not found", .{exp_name}); + failed_exports = true; continue; }; @@ -1887,10 +2054,7 @@ fn setupExports(wasm: *Wasm) !void { symbol.setFlag(.WASM_SYM_EXPORTED); } - if (failed_exports.items.len > 0) { - for (failed_exports.items) |exp_name| { - log.err("could not export '{s}', symbol not found", .{exp_name}); - } + if (failed_exports) { return error.MissingSymbol; } } @@ -1948,6 +2112,7 @@ fn setupStart(wasm: *Wasm) !void { const symbol_loc = wasm.globals.get(symbol_name_offset) orelse { log.err("Entry symbol '{s}' not found", .{entry_name}); + return error.MissingSymbol; }; const symbol = symbol_loc.getSymbol(wasm); if (symbol.tag != .function) { @@ -2503,6 +2668,7 @@ fn linkWithZld(wasm: *Wasm, comp: *Compilation, prog_node: *std.Progress.Node) l try wasm.resolveSymbolsInArchives(); try wasm.checkUndefinedSymbols(); + try wasm.setupInitFunctions(); try wasm.setupStart(); try wasm.setupImports(); @@ -2515,6 +2681,7 @@ fn linkWithZld(wasm: *Wasm, comp: *Compilation, prog_node: *std.Progress.Node) l wasm.mapFunctionTable(); try wasm.mergeSections(); try wasm.mergeTypes(); + try wasm.initializeCallCtorsFunction(); try wasm.setupExports(); try wasm.writeToFile(enabled_features, emit_features_count, arena); @@ -2587,6 +2754,7 @@ pub fn flushModule(wasm: *Wasm, comp: *Compilation, prog_node: *std.Progress.Nod // When we finish/error we reset the state of the linker // So we can rebuild the binary file on each incremental update defer wasm.resetState(); + try wasm.setupInitFunctions(); try wasm.setupStart(); try wasm.setupImports(); if (wasm.base.options.module) |mod| { @@ -2629,6 +2797,7 @@ pub fn flushModule(wasm: *Wasm, comp: *Compilation, prog_node: *std.Progress.Nod wasm.mapFunctionTable(); try wasm.mergeSections(); try wasm.mergeTypes(); + try wasm.initializeCallCtorsFunction(); try wasm.setupExports(); try wasm.writeToFile(enabled_features, emit_features_count, arena); } @@ -3909,8 +4078,8 @@ pub fn getTypeIndex(wasm: *const Wasm, func_type: std.wasm.Type) ?u32 { return null; } -/// Searches for an a matching function signature, when not found -/// a new entry will be made. The index of the existing/new signature will be returned. +/// Searches for a matching function signature. When no matching signature is found, +/// a new entry will be made. The value returned is the index of the type within `wasm.func_types`. pub fn putOrGetFuncType(wasm: *Wasm, func_type: std.wasm.Type) !u32 { if (wasm.getTypeIndex(func_type)) |index| { return index; From c77ca9174976a9fb8769276ce913e761d66af1de Mon Sep 17 00:00:00 2001 From: Luuk de Gram Date: Thu, 12 Jan 2023 09:43:31 +0100 Subject: [PATCH 4/5] wasm-linker: implement `__heap_base` symbol When any object files provides an undefined reference to the __heap_base symbol, we create a new defined symbol for it. During setupMemory we set the virtual address of this symbol so it can be used for relocations. This symbol represents where the heap starts and allocators can use this value for its allocations when it needs to determine where the heap lives. --- src/link/Wasm.zig | 112 ++++++++++++++++++++++++++++------------ src/link/Wasm/types.zig | 1 + 2 files changed, 79 insertions(+), 34 deletions(-) diff --git a/src/link/Wasm.zig b/src/link/Wasm.zig index 3f8aa2c62f..4abf12a2de 100644 --- a/src/link/Wasm.zig +++ b/src/link/Wasm.zig @@ -475,7 +475,7 @@ fn createSyntheticSymbol(wasm: *Wasm, name: []const u8, tag: Symbol.Tag) !Symbol .index = undefined, }); try wasm.resolved_symbols.putNoClobber(wasm.base.allocator, loc, {}); - try wasm.globals.putNoClobber(wasm.base.allocator, name_offset, loc); + try wasm.globals.put(wasm.base.allocator, name_offset, loc); return loc; } /// Initializes symbols and atoms for the debug sections @@ -851,6 +851,35 @@ fn validateFeatures( to_emit.* = allowed; } +/// Creates synthetic linker-symbols, but only if they are being referenced from +/// any object file. For instance, the `__heap_base` symbol will only be created, +/// if one or multiple undefined references exist. When none exist, the symbol will +/// not be created, ensuring we don't unneccesarily emit unreferenced symbols. +fn resolveLazySymbols(wasm: *Wasm) !void { + if (wasm.undefs.fetchSwapRemove("__heap_base")) |kv| { + const loc = try wasm.createSyntheticSymbol("__heap_base", .data); + try wasm.discarded.putNoClobber(wasm.base.allocator, kv.value, loc); + _ = wasm.resolved_symbols.swapRemove(loc); // we don't want to emit this symbol, only use it for relocations. + + const atom = try wasm.base.allocator.create(Atom); + errdefer wasm.base.allocator.destroy(atom); + try wasm.managed_atoms.append(wasm.base.allocator, atom); + atom.* = Atom.empty; + atom.sym_index = loc.index; + atom.alignment = 1; + + try wasm.parseAtom(atom, .{ .data = .synthetic }); + try wasm.symbol_atom.putNoClobber(wasm.base.allocator, loc, atom); + } +} + +// Tries to find a global symbol by its name. Returns null when not found, +/// and its location when it is found. +fn findGlobalSymbol(wasm: *Wasm, name: []const u8) ?SymbolLoc { + const offset = wasm.string_table.getOffset(name) orelse return null; + return wasm.globals.get(offset); +} + fn checkUndefinedSymbols(wasm: *const Wasm) !void { if (wasm.base.options.output_mode == .Obj) return; if (wasm.base.options.import_symbols) return; @@ -1458,14 +1487,13 @@ fn mapFunctionTable(wasm: *Wasm) void { } if (wasm.base.options.import_table or wasm.base.options.output_mode == .Obj) { - const sym_loc = wasm.globals.get(wasm.string_table.getOffset("__indirect_function_table").?).?; + const sym_loc = wasm.findGlobalSymbol("__indirect_function_table").?; const import = wasm.imports.getPtr(sym_loc).?; import.kind.table.limits.min = index - 1; // we start at index 1. } else if (index > 1) { log.debug("Appending indirect function table", .{}); - const offset = wasm.string_table.getOffset("__indirect_function_table").?; - const sym_with_loc = wasm.globals.get(offset).?; - const symbol = sym_with_loc.getSymbol(wasm); + const sym_loc = wasm.findGlobalSymbol("__indirect_function_table").?; + const symbol = sym_loc.getSymbol(wasm); const table = &wasm.tables.items[symbol.index - wasm.imported_tables_count]; table.limits = .{ .min = index, .max = index }; } @@ -1544,6 +1572,7 @@ const Kind = union(enum) { read_only, uninitialized, initialized, + synthetic, }, function: FnData, @@ -1554,6 +1583,7 @@ const Kind = union(enum) { .read_only => return ".rodata.", .uninitialized => return ".bss.", .initialized => return ".data.", + .synthetic => return ".synthetic", } } }; @@ -1690,9 +1720,14 @@ fn allocateAtoms(wasm: *Wasm) !void { var offset: u32 = 0; while (true) { const symbol_loc = atom.symbolLoc(); - if (!wasm.resolved_symbols.contains(symbol_loc)) { - atom = atom.next orelse break; - continue; + if (wasm.code_section_index) |index| { + if (index == entry.key_ptr.*) { + if (!wasm.resolved_symbols.contains(symbol_loc)) { + // only allocate resolved function body's. + atom = atom.next orelse break; + continue; + } + } } offset = std.mem.alignForwardGeneric(u32, offset, atom.alignment); atom.offset = offset; @@ -1727,6 +1762,7 @@ fn sortDataSegments(wasm: *Wasm) !void { if (mem.startsWith(u8, name, ".rodata")) return 0; if (mem.startsWith(u8, name, ".data")) return 1; if (mem.startsWith(u8, name, ".text")) return 2; + if (mem.startsWith(u8, name, ".synthetic")) return 100; // always at end return 3; } }; @@ -1789,7 +1825,7 @@ fn initializeCallCtorsFunction(wasm: *Wasm) !void { if (wasm.code_section_index == null) { // Make sure to remove it from the resolved symbols so we do not emit // it within any section. TODO: Remove this once we implement garbage collection. - const loc = wasm.globals.get(wasm.string_table.getOffset("__wasm_call_ctors").?).?; + const loc = wasm.findGlobalSymbol("__wasm_call_ctors").?; std.debug.assert(wasm.resolved_symbols.swapRemove(loc)); return; } @@ -1806,11 +1842,6 @@ fn initializeCallCtorsFunction(wasm: *Wasm) !void { // call constructors for (wasm.init_funcs.items) |init_func_loc| { const symbol = init_func_loc.getSymbol(wasm); - if (symbol.isUndefined()) { - std.debug.print("Undefined symbol '{s}'\n", .{wasm.string_table.get(symbol.name)}); - } - std.debug.print("Symbol: {s}\n", .{init_func_loc.getSymbolLoc().getName(wasm)}); - std.debug.assert(wasm.resolved_symbols.contains(init_func_loc.getSymbolLoc().finalLoc(wasm))); const func = wasm.functions.values()[symbol.index - wasm.imported_functions_count]; const ty = wasm.func_types.items[func.type_index]; @@ -1828,7 +1859,7 @@ fn initializeCallCtorsFunction(wasm: *Wasm) !void { try writer.writeByte(std.wasm.opcode(.end)); } - const loc = wasm.globals.get(wasm.string_table.getOffset("__wasm_call_ctors").?).?; + const loc = wasm.findGlobalSymbol("__wasm_call_ctors").?; const symbol = loc.getSymbol(wasm); // create type (() -> nil) as we do not have any parameters or return value. const ty_index = try wasm.putOrGetFuncType(.{ .params = &[_]std.wasm.Valtype{}, .returns = &[_]std.wasm.Valtype{} }); @@ -2039,12 +2070,7 @@ fn setupExports(wasm: *Wasm) !void { var failed_exports = false; for (force_exp_names) |exp_name| { - const name_index = wasm.string_table.getOffset(exp_name) orelse { - log.err("could not export '{s}', symbol not found", .{exp_name}); - failed_exports = true; - continue; - }; - const loc = wasm.globals.get(name_index) orelse { + const loc = wasm.findGlobalSymbol(exp_name) orelse { log.err("could not export '{s}', symbol not found", .{exp_name}); failed_exports = true; continue; @@ -2100,7 +2126,7 @@ fn setupExports(wasm: *Wasm) !void { fn setupStart(wasm: *Wasm) !void { const entry_name = wasm.base.options.entry orelse "_start"; - const symbol_name_offset = wasm.string_table.getOffset(entry_name) orelse { + const symbol_loc = wasm.findGlobalSymbol(entry_name) orelse { if (wasm.base.options.output_mode == .Exe) { if (wasm.base.options.wasi_exec_model == .reactor) return; // Not required for reactors } else { @@ -2110,10 +2136,6 @@ fn setupStart(wasm: *Wasm) !void { return error.MissingSymbol; }; - const symbol_loc = wasm.globals.get(symbol_name_offset) orelse { - log.err("Entry symbol '{s}' not found", .{entry_name}); - return error.MissingSymbol; - }; const symbol = symbol_loc.getSymbol(wasm); if (symbol.tag != .function) { log.err("Entry symbol '{s}' is not a function", .{entry_name}); @@ -2133,6 +2155,8 @@ fn setupMemory(wasm: *Wasm) !void { // Use the user-provided stack size or else we use 1MB by default const stack_size = wasm.base.options.stack_size_override orelse page_size * 16; const stack_alignment = 16; // wasm's stack alignment as specified by tool-convention + const heap_alignment = 16; // wasm's heap alignment as specified by tool-convention + // Always place the stack at the start by default // unless the user specified the global-base flag var place_stack_first = true; @@ -2151,8 +2175,13 @@ fn setupMemory(wasm: *Wasm) !void { } var offset: u32 = @intCast(u32, memory_ptr); - for (wasm.data_segments.values()) |segment_index| { - const segment = &wasm.segments.items[segment_index]; + var data_seg_it = wasm.data_segments.iterator(); + while (data_seg_it.next()) |entry| { + if (mem.eql(u8, entry.key_ptr.*, ".synthetic")) { + // do not update synthetic segments as they are not part of the output + continue; + } + const segment = &wasm.segments.items[entry.value_ptr.*]; memory_ptr = std.mem.alignForwardGeneric(u64, memory_ptr, segment.alignment); memory_ptr += segment.size; segment.offset = offset; @@ -2165,6 +2194,16 @@ fn setupMemory(wasm: *Wasm) !void { wasm.wasm_globals.items[0].init.i32_const = @bitCast(i32, @intCast(u32, memory_ptr)); } + // One of the linked object files has a reference to the __heap_base symbol. + // We must set its virtual address so it can be used in relocations. + if (wasm.findGlobalSymbol("__heap_base")) |loc| { + const segment_index = wasm.data_segments.get(".synthetic").?; + const segment = &wasm.segments.items[segment_index]; + segment.offset = 0; // for simplicity we store the entire VA into atom's offset. + const atom = wasm.symbol_atom.get(loc).?; + atom.offset = @intCast(u32, mem.alignForwardGeneric(u64, memory_ptr, heap_alignment)); + } + // Setup the max amount of pages // For now we only support wasm32 by setting the maximum allowed memory size 2^32-1 const max_memory_allowed: u64 = (1 << 32) - 1; @@ -2666,6 +2705,7 @@ fn linkWithZld(wasm: *Wasm, comp: *Compilation, prog_node: *std.Progress.Node) l var enabled_features: [@typeInfo(types.Feature.Tag).Enum.fields.len]bool = undefined; try wasm.validateFeatures(&enabled_features, &emit_features_count); try wasm.resolveSymbolsInArchives(); + try wasm.resolveLazySymbols(); try wasm.checkUndefinedSymbols(); try wasm.setupInitFunctions(); @@ -2749,6 +2789,7 @@ pub fn flushModule(wasm: *Wasm, comp: *Compilation, prog_node: *std.Progress.Nod var enabled_features: [@typeInfo(types.Feature.Tag).Enum.fields.len]bool = undefined; try wasm.validateFeatures(&enabled_features, &emit_features_count); try wasm.resolveSymbolsInArchives(); + try wasm.resolveLazySymbols(); try wasm.checkUndefinedSymbols(); // When we finish/error we reset the state of the linker @@ -2992,7 +3033,7 @@ fn writeToFile( if (wasm.function_table.count() > 0) { const header_offset = try reserveVecSectionHeader(&binary_bytes); - const table_loc = wasm.globals.get(wasm.string_table.getOffset("__indirect_function_table").?).?; + const table_loc = wasm.findGlobalSymbol("__indirect_function_table").?; const table_sym = table_loc.getSymbol(wasm); var flags: u32 = if (table_sym.index == 0) 0x0 else 0x02; // passive with implicit 0-index table or set table index manually @@ -3031,10 +3072,12 @@ fn writeToFile( defer sorted_atoms.deinit(); while (true) { - if (!is_obj) { - atom.resolveRelocs(wasm); + if (wasm.resolved_symbols.contains(atom.symbolLoc())) { + if (!is_obj) { + atom.resolveRelocs(wasm); + } + sorted_atoms.appendAssumeCapacity(atom); } - sorted_atoms.appendAssumeCapacity(atom); atom = atom.next orelse break; } @@ -3075,10 +3118,11 @@ fn writeToFile( // do not output 'bss' section unless we import memory and therefore // want to guarantee the data is zero initialized if (!import_memory and std.mem.eql(u8, entry.key_ptr.*, ".bss")) continue; - segment_count += 1; const atom_index = entry.value_ptr.*; - var atom: *Atom = wasm.atoms.getPtr(atom_index).?.*.getFirst(); const segment = wasm.segments.items[atom_index]; + if (segment.size == 0) continue; // do not emit empty segments + segment_count += 1; + var atom: *Atom = wasm.atoms.getPtr(atom_index).?.*.getFirst(); // flag and index to memory section (currently, there can only be 1 memory section in wasm) try leb.writeULEB128(binary_writer, @as(u32, 0)); diff --git a/src/link/Wasm/types.zig b/src/link/Wasm/types.zig index a46fad4e53..964ba04ba0 100644 --- a/src/link/Wasm/types.zig +++ b/src/link/Wasm/types.zig @@ -129,6 +129,7 @@ pub const Segment = struct { /// file or binary. When `merge_segments` is true, this will return the /// short name. i.e. ".rodata". When false, it returns the entire name instead. pub fn outputName(self: Segment, merge_segments: bool) []const u8 { + if (std.mem.startsWith(u8, self.name, ".synthetic")) return ".synthetic"; // always merge if (!merge_segments) return self.name; if (std.mem.startsWith(u8, self.name, ".rodata.")) { return ".rodata"; From 5468684456b13b6465c4fcd50c072e5d5c8536a3 Mon Sep 17 00:00:00 2001 From: Luuk de Gram Date: Fri, 13 Jan 2023 06:31:00 +0100 Subject: [PATCH 5/5] wasm-linker: implement the __heap_end symbol When any of the object files reference the __heap_end symbol, we will create it as a synthetic symbol. The symbol only exists within the linker and will not be emit within the binary as it's solely used for relocations. The symbol represents where the heap ends, so allocators can determine whether to allocate a new page or not. --- src/link/Wasm.zig | 40 ++++++++++++++++++++++++++++++---------- 1 file changed, 30 insertions(+), 10 deletions(-) diff --git a/src/link/Wasm.zig b/src/link/Wasm.zig index 4abf12a2de..7154cd7bc1 100644 --- a/src/link/Wasm.zig +++ b/src/link/Wasm.zig @@ -871,6 +871,22 @@ fn resolveLazySymbols(wasm: *Wasm) !void { try wasm.parseAtom(atom, .{ .data = .synthetic }); try wasm.symbol_atom.putNoClobber(wasm.base.allocator, loc, atom); } + + if (wasm.undefs.fetchSwapRemove("__heap_end")) |kv| { + const loc = try wasm.createSyntheticSymbol("__heap_end", .data); + try wasm.discarded.putNoClobber(wasm.base.allocator, kv.value, loc); + _ = wasm.resolved_symbols.swapRemove(loc); + + const atom = try wasm.base.allocator.create(Atom); + errdefer wasm.base.allocator.destroy(atom); + try wasm.managed_atoms.append(wasm.base.allocator, atom); + atom.* = Atom.empty; + atom.sym_index = loc.index; + atom.alignment = 1; + + try wasm.parseAtom(atom, .{ .data = .synthetic }); + try wasm.symbol_atom.putNoClobber(wasm.base.allocator, loc, atom); + } } // Tries to find a global symbol by its name. Returns null when not found, @@ -892,14 +908,8 @@ fn checkUndefinedSymbols(wasm: *const Wasm) !void { const file_name = if (undef.file) |file_index| name: { break :name wasm.objects.items[file_index].name; } else wasm.name; - const import_name = if (undef.file) |file_index| name: { - const obj = wasm.objects.items[file_index]; - const name_index = if (symbol.tag == .function) name_index: { - break :name_index obj.findImport(symbol.tag.externalType(), symbol.index).name; - } else symbol.name; - break :name obj.string_table.get(name_index); - } else wasm.string_table.get(wasm.imports.get(undef).?.name); - log.err("could not resolve undefined symbol '{s}'", .{import_name}); + const symbol_name = undef.getName(wasm); + log.err("could not resolve undefined symbol '{s}'", .{symbol_name}); log.err(" defined in '{s}'", .{file_name}); } } @@ -2223,12 +2233,20 @@ fn setupMemory(wasm: *Wasm) !void { } memory_ptr = initial_memory; } - + memory_ptr = mem.alignForwardGeneric(u64, memory_ptr, std.wasm.page_size); // In case we do not import memory, but define it ourselves, // set the minimum amount of pages on the memory section. - wasm.memories.limits.min = @intCast(u32, std.mem.alignForwardGeneric(u64, memory_ptr, page_size) / page_size); + wasm.memories.limits.min = @intCast(u32, memory_ptr / page_size); log.debug("Total memory pages: {d}", .{wasm.memories.limits.min}); + if (wasm.findGlobalSymbol("__heap_end")) |loc| { + const segment_index = wasm.data_segments.get(".synthetic").?; + const segment = &wasm.segments.items[segment_index]; + segment.offset = 0; + const atom = wasm.symbol_atom.get(loc).?; + atom.offset = @intCast(u32, memory_ptr); + } + if (wasm.base.options.max_memory) |max_memory| { if (!std.mem.isAlignedGeneric(u64, max_memory, page_size)) { log.err("Maximum memory must be {d}-byte aligned", .{page_size}); @@ -3392,6 +3410,8 @@ fn emitNameSection(wasm: *Wasm, binary_bytes: *std.ArrayList(u8), arena: std.mem // bss section is not emitted when this condition holds true, so we also // do not output a name for it. if (!wasm.base.options.import_memory and std.mem.eql(u8, key, ".bss")) continue; + // Synthetic segments are not emitted + if (std.mem.eql(u8, key, ".synthetic")) continue; segments.appendAssumeCapacity(.{ .index = data_segment_index, .name = key }); data_segment_index += 1; }