From f3626eb81662fb7519d09e14bed6e4d958c73e43 Mon Sep 17 00:00:00 2001 From: Luuk de Gram Date: Mon, 6 Nov 2023 17:09:42 +0100 Subject: [PATCH 01/10] wasm-link: ensure TLS global when resolved When a linked object contains references to the __tls_base symbol, we lazily create this symbol. However, we wouldn't create the corresponding Wasm global. This meant its address wasn't set correctly as well as fail to output it into the `Names` section. --- src/link/Wasm.zig | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/link/Wasm.zig b/src/link/Wasm.zig index 82f9f9f20d..ceed13620a 100644 --- a/src/link/Wasm.zig +++ b/src/link/Wasm.zig @@ -1242,6 +1242,14 @@ fn resolveLazySymbols(wasm: *Wasm) !void { if (wasm.undefs.fetchSwapRemove(name_offset)) |kv| { const loc = try wasm.createSyntheticSymbolOffset(name_offset, .global); try wasm.discarded.putNoClobber(wasm.base.allocator, kv.value, loc); + _ = wasm.resolved_symbols.swapRemove(kv.value); + const symbol = loc.getSymbol(wasm); + symbol.setFlag(.WASM_SYM_VISIBILITY_HIDDEN); + symbol.index = @intCast(wasm.imported_globals_count + wasm.wasm_globals.items.len); + try wasm.wasm_globals.append(wasm.base.allocator, .{ + .global_type = .{ .valtype = .i32, .mutable = true }, + .init = .{ .i32_const = undefined }, + }); } } } From 589aef153709a3c1e0b1ee8af4bb4d710d46b792 Mon Sep 17 00:00:00 2001 From: Luuk de Gram Date: Mon, 13 Nov 2023 15:25:17 +0100 Subject: [PATCH 02/10] wasm-linker: mark symbols and its references Symbols which are exported to the host, or contain the `NO_STRIP` flag, will be marked. All symbols which are referenced by this symbol are marked likewise. We achieve this by parsing all relocations of a symbol, and then marking the symbol it points to within the relocation. --- src/link/Wasm.zig | 39 ++++++++++++++++++++++++++++++++++++++- src/link/Wasm/Symbol.zig | 16 ++++++++++++++++ 2 files changed, 54 insertions(+), 1 deletion(-) diff --git a/src/link/Wasm.zig b/src/link/Wasm.zig index ceed13620a..f90db83e9f 100644 --- a/src/link/Wasm.zig +++ b/src/link/Wasm.zig @@ -3439,12 +3439,13 @@ fn linkWithZld(wasm: *Wasm, comp: *Compilation, prog_node: *std.Progress.Node) l try wasm.setupInitFunctions(); try wasm.setupStart(); - try wasm.setupImports(); for (wasm.objects.items, 0..) |*object, object_index| { try object.parseIntoAtoms(gpa, @as(u16, @intCast(object_index)), wasm); } + wasm.markReferences(); + try wasm.setupImports(); try wasm.allocateAtoms(); try wasm.setupMemory(); wasm.allocateVirtualAddresses(); @@ -3529,6 +3530,7 @@ pub fn flushModule(wasm: *Wasm, comp: *Compilation, prog_node: *std.Progress.Nod try wasm.setupInitFunctions(); try wasm.setupErrorsLen(); try wasm.setupStart(); + wasm.markReferences(); try wasm.setupImports(); if (wasm.base.options.module) |mod| { var decl_it = wasm.decls.iterator(); @@ -5026,3 +5028,38 @@ pub fn storeDeclType(wasm: *Wasm, decl_index: InternPool.DeclIndex, func_type: s try wasm.atom_types.put(wasm.base.allocator, atom_index, index); return index; } + +/// Verifies all resolved symbols and checks whether itself needs to be marked alive, +/// as well as any of its references. +fn markReferences(wasm: *Wasm) void { + const tracy = trace(@src()); + defer tracy.end(); + for (wasm.resolved_symbols.keys()) |sym_loc| { + const sym = sym_loc.getSymbol(wasm); + if (sym.isExported(wasm.base.options.rdynamic) or sym.isNoStrip()) { + wasm.mark(sym_loc); + } + } +} + +/// Marks a symbol as 'alive' recursively so itself and any references it contains to +/// other symbols will not be omit from the binary. +fn mark(wasm: *Wasm, loc: SymbolLoc) void { + const symbol = loc.getSymbol(wasm); + if (symbol.isAlive()) { + // Symbol is already marked alive, including its references. + // This means we can skip it so we don't end up marking the same symbols + // multiple times. + return; + } + symbol.mark(); + + if (wasm.symbol_atom.get(loc)) |atom_index| { + const atom = wasm.getAtom(atom_index); + const relocations: []const types.Relocation = atom.relocs.items; + for (relocations) |reloc| { + const target_loc: SymbolLoc = .{ .index = reloc.index, .file = loc.file }; + wasm.mark(target_loc.finalLoc(wasm)); + } + } +} diff --git a/src/link/Wasm/Symbol.zig b/src/link/Wasm/Symbol.zig index d15e86a666..b4507f9e14 100644 --- a/src/link/Wasm/Symbol.zig +++ b/src/link/Wasm/Symbol.zig @@ -79,6 +79,9 @@ pub const Flag = enum(u32) { WASM_SYM_NO_STRIP = 0x80, /// Indicates a symbol is TLS WASM_SYM_TLS = 0x100, + /// Zig specific flag. Uses the most significant bit of the flag to annotate whether a symbol is + /// alive or not. Dead symbols are allowed to be garbage collected. + alive = 0x80000000, }; /// Verifies if the given symbol should be imported from the @@ -92,6 +95,19 @@ pub fn requiresImport(symbol: Symbol) bool { return true; } +/// Marks a symbol as 'alive', ensuring the garbage collector will not collect the trash. +pub fn mark(symbol: *Symbol) void { + symbol.flags |= @intFromEnum(Flag.alive); +} + +pub fn isAlive(symbol: Symbol) bool { + return symbol.flags & @intFromEnum(Flag.alive) != 0; +} + +pub fn isDead(symbol: Symbol) bool { + return symbol.flags & @intFromEnum(Flag.alive) == 0; +} + pub fn isTLS(symbol: Symbol) bool { return symbol.flags & @intFromEnum(Flag.WASM_SYM_TLS) != 0; } From c986c6c90a5746cb671177e486a77bd78a5946b0 Mon Sep 17 00:00:00 2001 From: Luuk de Gram Date: Tue, 14 Nov 2023 19:58:52 +0100 Subject: [PATCH 03/10] wasm-linker: do not merge unreferenced symbols When a symbol is unreferenced and therefore garbage-collected, we do not merge its specific section into the final binary. --- src/link/Wasm.zig | 65 ++++++++++++++++++++++++++++++++++------------- 1 file changed, 47 insertions(+), 18 deletions(-) diff --git a/src/link/Wasm.zig b/src/link/Wasm.zig index f90db83e9f..91896dea02 100644 --- a/src/link/Wasm.zig +++ b/src/link/Wasm.zig @@ -1981,10 +1981,16 @@ pub fn addTableFunction(wasm: *Wasm, symbol_index: u32) !void { /// Starts at offset 1, where the value `0` represents an unresolved function pointer /// or null-pointer fn mapFunctionTable(wasm: *Wasm) void { - var it = wasm.function_table.valueIterator(); + var it = wasm.function_table.iterator(); var index: u32 = 1; - while (it.next()) |value_ptr| : (index += 1) { - value_ptr.* = index; + while (it.next()) |entry| { + const symbol = entry.key_ptr.*.getSymbol(wasm); + if (symbol.isAlive()) { + entry.value_ptr.* = index; + index += 1; + } else { + wasm.function_table.removeByPtr(entry.key_ptr); + } } if (wasm.base.options.import_table or wasm.base.options.output_mode == .Obj) { @@ -2242,14 +2248,23 @@ fn allocateAtoms(wasm: *Wasm) !void { while (true) { const atom = wasm.getAtomPtr(atom_index); const symbol_loc = atom.symbolLoc(); - if (wasm.code_section_index) |index| { - if (index == entry.key_ptr.*) { - if (!wasm.resolved_symbols.contains(symbol_loc)) { - // only allocate resolved function body's. - atom_index = atom.prev orelse break; - continue; - } + const sym = symbol_loc.getSymbol(wasm); + if (sym.isDead()) { + // Dead symbols must be unlinked from the linked-list to prevent them + // from being emit into the binary. + if (atom.prev) |prev_index| { + const prev = wasm.getAtomPtr(prev_index); + prev.next = atom.next; } + atom_index = atom.next orelse { + atom.prev = null; + break; + }; + const next = wasm.getAtomPtr(atom_index); + next.prev = atom.prev; + atom.prev = null; + atom.next = null; + continue; } offset = @intCast(atom.alignment.forward(offset)); atom.offset = offset; @@ -2358,11 +2373,17 @@ fn setupInitFunctions(wasm: *Wasm) !void { .file = @as(u16, @intCast(file_index)), .priority = init_func.priority, }); + try wasm.mark(.{ .index = init_func.symbol_index, .file = @intCast(file_index) }); } } // sort the initfunctions based on their priority mem.sort(InitFuncLoc, wasm.init_funcs.items, {}, InitFuncLoc.lessThan); + + if (wasm.init_funcs.items.len > 0) { + const loc = wasm.findGlobalSymbol("__wasm_call_ctors").?; + try wasm.mark(loc); + } } /// Generates an atom containing the global error set' size. @@ -2463,6 +2484,9 @@ fn createSyntheticFunction( const loc = wasm.findGlobalSymbol(symbol_name) orelse try wasm.createSyntheticSymbol(symbol_name, .function); const symbol = loc.getSymbol(wasm); + if (symbol.isDead()) { + return; + } const ty_index = try wasm.putOrGetFuncType(func_ty); // create function with above type const func_index = wasm.imported_functions_count + @as(u32, @intCast(wasm.functions.count())); @@ -2628,10 +2652,10 @@ fn setupImports(wasm: *Wasm) !void { } const symbol = symbol_loc.getSymbol(wasm); - if (std.mem.eql(u8, symbol_loc.getName(wasm), "__indirect_function_table")) { - continue; - } - if (!symbol.requiresImport()) { + if (symbol.isDead() or + !symbol.requiresImport() or + std.mem.eql(u8, symbol_loc.getName(wasm), "__indirect_function_table")) + { continue; } @@ -2697,7 +2721,11 @@ fn mergeSections(wasm: *Wasm) !void { const object = &wasm.objects.items[sym_loc.file.?]; const symbol = &object.symtable[sym_loc.index]; - if (symbol.isUndefined() or (symbol.tag != .function and symbol.tag != .global and symbol.tag != .table)) { + + if (symbol.isDead() or + symbol.isUndefined() or + (symbol.tag != .function and symbol.tag != .global and symbol.tag != .table)) + { // Skip undefined symbols as they go in the `import` section // Also skip symbols that do not need to have a section merged. continue; @@ -2753,8 +2781,8 @@ fn mergeTypes(wasm: *Wasm) !void { } const object = wasm.objects.items[sym_loc.file.?]; const symbol = object.symtable[sym_loc.index]; - if (symbol.tag != .function) { - // Only functions have types + if (symbol.tag != .function or symbol.isDead()) { + // Only functions have types. Only retrieve the type of referenced functions. continue; } @@ -3823,7 +3851,8 @@ fn writeToFile( try leb.writeULEB128(binary_writer, @as(u32, @intCast(wasm.function_table.count()))); var symbol_it = wasm.function_table.keyIterator(); while (symbol_it.next()) |symbol_loc_ptr| { - try leb.writeULEB128(binary_writer, symbol_loc_ptr.*.getSymbol(wasm).index); + const sym = symbol_loc_ptr.*.getSymbol(wasm); + try leb.writeULEB128(binary_writer, sym.index); } try writeVecSectionHeader( From 8856ba75059f74a326d1f8d3af40a30c5a3ac1ed Mon Sep 17 00:00:00 2001 From: Luuk de Gram Date: Thu, 16 Nov 2023 19:29:57 +0100 Subject: [PATCH 04/10] wasm-linker: parse symbols into atoms lazily Rather than parsing every symbol into an atom, we now only parse them into an atom when such atom is marked. This means garbage-collected symbols will also not be parsed into atoms, and neither are discarded symbols which have been resolved by other symbols. (Such as multiple weak symbols). This also introduces a binary search for finding the start index into the list of relocations. This speeds up finding the corresponding relocations tremendously as they're ordered ascended by address. Lastly, we re-use the memory of atom's data as well as relocations instead of duplicating it. This means we half the memory usage of atom's data and relocations for linked object files. As we are aware of decls and synthetic atoms, we free the memory of those atoms indepedently of the atoms of object files to prevent double-frees. --- src/link/Wasm.zig | 126 +++++++++++-------- src/link/Wasm/Atom.zig | 18 +-- src/link/Wasm/Object.zig | 266 ++++++++++++++++++--------------------- 3 files changed, 209 insertions(+), 201 deletions(-) diff --git a/src/link/Wasm.zig b/src/link/Wasm.zig index 91896dea02..81e73e6ecf 100644 --- a/src/link/Wasm.zig +++ b/src/link/Wasm.zig @@ -1309,6 +1309,31 @@ pub fn deinit(wasm: *Wasm) void { archive.deinit(gpa); } + // For decls and anon decls we free the memory of its atoms. + // The memory of atoms parsed from object files is managed by + // the object file itself, and therefore we can skip those. + { + var it = wasm.decls.valueIterator(); + while (it.next()) |atom_index_ptr| { + const atom = wasm.getAtomPtr(atom_index_ptr.*); + for (atom.locals.items) |local_index| { + const local_atom = wasm.getAtomPtr(local_index); + local_atom.deinit(gpa); + } + atom.deinit(gpa); + } + } + { + for (wasm.anon_decls.values()) |atom_index| { + const atom = wasm.getAtomPtr(atom_index); + for (atom.locals.items) |local_index| { + const local_atom = wasm.getAtomPtr(local_index); + local_atom.deinit(gpa); + } + atom.deinit(gpa); + } + } + wasm.decls.deinit(gpa); wasm.anon_decls.deinit(gpa); wasm.atom_types.deinit(gpa); @@ -1321,9 +1346,6 @@ pub fn deinit(wasm: *Wasm) void { wasm.symbol_atom.deinit(gpa); wasm.export_names.deinit(gpa); wasm.atoms.deinit(gpa); - for (wasm.managed_atoms.items) |*managed_atom| { - managed_atom.deinit(wasm); - } wasm.managed_atoms.deinit(gpa); wasm.segments.deinit(gpa); wasm.data_segments.deinit(gpa); @@ -1342,6 +1364,10 @@ pub fn deinit(wasm: *Wasm) void { wasm.exports.deinit(gpa); wasm.string_table.deinit(gpa); + for (wasm.synthetic_functions.items) |atom_index| { + const atom = wasm.getAtomPtr(atom_index); + atom.deinit(gpa); + } wasm.synthetic_functions.deinit(gpa); if (wasm.dwarf) |*dwarf| { @@ -2406,7 +2432,7 @@ fn setupErrorsLen(wasm: *Wasm) !void { prev_atom.next = atom.next; atom.prev = null; } - atom.deinit(wasm); + atom.deinit(wasm.base.allocator); break :blk index; } else new_atom: { const atom_index: Atom.Index = @intCast(wasm.managed_atoms.items.len); @@ -2509,6 +2535,7 @@ fn createSyntheticFunction( .next = null, .prev = null, .code = function_body.moveToUnmanaged(), + .original_offset = 0, }; try wasm.appendAtomAtIndex(wasm.code_section_index.?, atom_index); try wasm.symbol_atom.putNoClobber(wasm.base.allocator, loc, atom_index); @@ -2545,6 +2572,7 @@ pub fn createFunction( .prev = null, .code = function_body.moveToUnmanaged(), .relocs = relocations.moveToUnmanaged(), + .original_offset = 0, }; const symbol = loc.getSymbol(wasm); symbol.setFlag(.WASM_SYM_VISIBILITY_HIDDEN); // ensure function does not get exported @@ -3016,14 +3044,14 @@ fn setupMemory(wasm: *Wasm) !void { /// From a given object's index and the index of the segment, returns the corresponding /// index of the segment within the final data section. When the segment does not yet /// exist, a new one will be initialized and appended. The new index will be returned in that case. -pub fn getMatchingSegment(wasm: *Wasm, object_index: u16, relocatable_index: u32) !?u32 { +pub fn getMatchingSegment(wasm: *Wasm, object_index: u16, symbol_index: u32) !u32 { const object: Object = wasm.objects.items[object_index]; - const relocatable_data = object.relocatable_data[relocatable_index]; + const symbol = object.symtable[symbol_index]; const index = @as(u32, @intCast(wasm.segments.items.len)); - switch (relocatable_data.type) { + switch (symbol.tag) { .data => { - const segment_info = object.segment_info[relocatable_data.index]; + const segment_info = object.segment_info[symbol.index]; const merge_segment = wasm.base.options.output_mode != .Obj; const result = try wasm.data_segments.getOrPut(wasm.base.allocator, segment_info.outputName(merge_segment)); if (!result.found_existing) { @@ -3041,67 +3069,67 @@ pub fn getMatchingSegment(wasm: *Wasm, object_index: u16, relocatable_index: u32 return index; } else return result.value_ptr.*; }, - .code => return wasm.code_section_index orelse blk: { + .function => return wasm.code_section_index orelse blk: { wasm.code_section_index = index; try wasm.appendDummySegment(); break :blk index; }, - .debug => { - const debug_name = object.getDebugName(relocatable_data); - if (mem.eql(u8, debug_name, ".debug_info")) { + .section => { + const section_name = object.string_table.get(symbol.name); + if (mem.eql(u8, section_name, ".debug_info")) { return wasm.debug_info_index orelse blk: { wasm.debug_info_index = index; try wasm.appendDummySegment(); break :blk index; }; - } else if (mem.eql(u8, debug_name, ".debug_line")) { + } else if (mem.eql(u8, section_name, ".debug_line")) { return wasm.debug_line_index orelse blk: { wasm.debug_line_index = index; try wasm.appendDummySegment(); break :blk index; }; - } else if (mem.eql(u8, debug_name, ".debug_loc")) { + } else if (mem.eql(u8, section_name, ".debug_loc")) { return wasm.debug_loc_index orelse blk: { wasm.debug_loc_index = index; try wasm.appendDummySegment(); break :blk index; }; - } else if (mem.eql(u8, debug_name, ".debug_ranges")) { + } else if (mem.eql(u8, section_name, ".debug_ranges")) { return wasm.debug_line_index orelse blk: { wasm.debug_ranges_index = index; try wasm.appendDummySegment(); break :blk index; }; - } else if (mem.eql(u8, debug_name, ".debug_pubnames")) { + } else if (mem.eql(u8, section_name, ".debug_pubnames")) { return wasm.debug_pubnames_index orelse blk: { wasm.debug_pubnames_index = index; try wasm.appendDummySegment(); break :blk index; }; - } else if (mem.eql(u8, debug_name, ".debug_pubtypes")) { + } else if (mem.eql(u8, section_name, ".debug_pubtypes")) { return wasm.debug_pubtypes_index orelse blk: { wasm.debug_pubtypes_index = index; try wasm.appendDummySegment(); break :blk index; }; - } else if (mem.eql(u8, debug_name, ".debug_abbrev")) { + } else if (mem.eql(u8, section_name, ".debug_abbrev")) { return wasm.debug_abbrev_index orelse blk: { wasm.debug_abbrev_index = index; try wasm.appendDummySegment(); break :blk index; }; - } else if (mem.eql(u8, debug_name, ".debug_str")) { + } else if (mem.eql(u8, section_name, ".debug_str")) { return wasm.debug_str_index orelse blk: { wasm.debug_str_index = index; try wasm.appendDummySegment(); break :blk index; }; } else { - log.warn("found unknown debug section '{s}'", .{debug_name}); - log.warn(" debug section will be skipped", .{}); - return null; + log.warn("found unknown section '{s}'", .{section_name}); + return error.UnexpectedValue; } }, + else => unreachable, } } @@ -3468,11 +3496,7 @@ fn linkWithZld(wasm: *Wasm, comp: *Compilation, prog_node: *std.Progress.Node) l try wasm.setupInitFunctions(); try wasm.setupStart(); - for (wasm.objects.items, 0..) |*object, object_index| { - try object.parseIntoAtoms(gpa, @as(u16, @intCast(object_index)), wasm); - } - - wasm.markReferences(); + try wasm.markReferences(); try wasm.setupImports(); try wasm.allocateAtoms(); try wasm.setupMemory(); @@ -3558,7 +3582,7 @@ pub fn flushModule(wasm: *Wasm, comp: *Compilation, prog_node: *std.Progress.Nod try wasm.setupInitFunctions(); try wasm.setupErrorsLen(); try wasm.setupStart(); - wasm.markReferences(); + try wasm.markReferences(); try wasm.setupImports(); if (wasm.base.options.module) |mod| { var decl_it = wasm.decls.iterator(); @@ -3615,10 +3639,6 @@ pub fn flushModule(wasm: *Wasm, comp: *Compilation, prog_node: *std.Progress.Nod } } - for (wasm.objects.items, 0..) |*object, object_index| { - try object.parseIntoAtoms(wasm.base.allocator, @as(u16, @intCast(object_index)), wasm); - } - try wasm.allocateAtoms(); try wasm.setupMemory(); wasm.allocateVirtualAddresses(); @@ -3885,18 +3905,15 @@ fn writeToFile( var atom_index = wasm.atoms.get(code_index).?; // The code section must be sorted in line with the function order. - var sorted_atoms = try std.ArrayList(*Atom).initCapacity(wasm.base.allocator, wasm.functions.count()); + var sorted_atoms = try std.ArrayList(*const Atom).initCapacity(wasm.base.allocator, wasm.functions.count()); defer sorted_atoms.deinit(); while (true) { - var atom = wasm.getAtomPtr(atom_index); - if (wasm.resolved_symbols.contains(atom.symbolLoc())) { - if (!is_obj) { - atom.resolveRelocs(wasm); - } - sorted_atoms.appendAssumeCapacity(atom); + const atom = wasm.getAtomPtr(atom_index); + if (!is_obj) { + atom.resolveRelocs(wasm); } - // atom = if (atom.prev) |prev| wasm.getAtomPtr(prev) else break; + sorted_atoms.appendAssumeCapacity(atom); // found more code atoms than functions atom_index = atom.prev orelse break; } @@ -3908,7 +3925,7 @@ fn writeToFile( } }.sort; - mem.sort(*Atom, sorted_atoms.items, wasm, atom_sort_fn); + mem.sort(*const Atom, sorted_atoms.items, wasm, atom_sort_fn); for (sorted_atoms.items) |sorted_atom| { try leb.writeULEB128(binary_writer, sorted_atom.size); @@ -5060,20 +5077,20 @@ pub fn storeDeclType(wasm: *Wasm, decl_index: InternPool.DeclIndex, func_type: s /// Verifies all resolved symbols and checks whether itself needs to be marked alive, /// as well as any of its references. -fn markReferences(wasm: *Wasm) void { +fn markReferences(wasm: *Wasm) !void { const tracy = trace(@src()); defer tracy.end(); for (wasm.resolved_symbols.keys()) |sym_loc| { const sym = sym_loc.getSymbol(wasm); if (sym.isExported(wasm.base.options.rdynamic) or sym.isNoStrip()) { - wasm.mark(sym_loc); + try wasm.mark(sym_loc); } } } /// Marks a symbol as 'alive' recursively so itself and any references it contains to /// other symbols will not be omit from the binary. -fn mark(wasm: *Wasm, loc: SymbolLoc) void { +fn mark(wasm: *Wasm, loc: SymbolLoc) !void { const symbol = loc.getSymbol(wasm); if (symbol.isAlive()) { // Symbol is already marked alive, including its references. @@ -5082,13 +5099,20 @@ fn mark(wasm: *Wasm, loc: SymbolLoc) void { return; } symbol.mark(); + if (symbol.isUndefined()) { + // undefined symbols do not have an associated `Atom` and therefore also + // do not contain relocations. + return; + } - if (wasm.symbol_atom.get(loc)) |atom_index| { - const atom = wasm.getAtom(atom_index); - const relocations: []const types.Relocation = atom.relocs.items; - for (relocations) |reloc| { - const target_loc: SymbolLoc = .{ .index = reloc.index, .file = loc.file }; - wasm.mark(target_loc.finalLoc(wasm)); - } + const file = loc.file orelse return; // Marking synthetic and Zig symbols is done seperately + const object = &wasm.objects.items[file]; + const atom_index = try Object.parseSymbolIntoAtom(object, file, loc.index, wasm); + + const atom = wasm.getAtom(atom_index); + const relocations: []const types.Relocation = atom.relocs.items; + for (relocations) |reloc| { + const target_loc: SymbolLoc = .{ .index = reloc.index, .file = file }; + try wasm.mark(target_loc.finalLoc(wasm)); } } diff --git a/src/link/Wasm/Atom.zig b/src/link/Wasm/Atom.zig index 60727b6af1..b20e8628ba 100644 --- a/src/link/Wasm/Atom.zig +++ b/src/link/Wasm/Atom.zig @@ -23,6 +23,10 @@ alignment: Wasm.Alignment, /// Offset into the section where the atom lives, this already accounts /// for alignment. offset: u32, +/// The original offset within the object file. This value is substracted from +/// relocation offsets to determine where in the `data` to rewrite the value +original_offset: u32, + /// Represents the index of the file this atom was generated from. /// This is 'null' when the atom was generated by a Decl from Zig code. file: ?u16, @@ -50,11 +54,11 @@ pub const empty: Atom = .{ .prev = null, .size = 0, .sym_index = 0, + .original_offset = 0, }; /// Frees all resources owned by this `Atom`. -pub fn deinit(atom: *Atom, wasm: *Wasm) void { - const gpa = wasm.base.allocator; +pub fn deinit(atom: *Atom, gpa: std.mem.Allocator) void { atom.relocs.deinit(gpa); atom.code.deinit(gpa); atom.locals.deinit(gpa); @@ -114,10 +118,10 @@ pub fn resolveRelocs(atom: *Atom, wasm_bin: *const Wasm) void { .R_WASM_GLOBAL_INDEX_I32, .R_WASM_MEMORY_ADDR_I32, .R_WASM_SECTION_OFFSET_I32, - => std.mem.writeInt(u32, atom.code.items[reloc.offset..][0..4], @as(u32, @intCast(value)), .little), + => std.mem.writeInt(u32, atom.code.items[reloc.offset - atom.original_offset ..][0..4], @as(u32, @intCast(value)), .little), .R_WASM_TABLE_INDEX_I64, .R_WASM_MEMORY_ADDR_I64, - => std.mem.writeInt(u64, atom.code.items[reloc.offset..][0..8], value, .little), + => std.mem.writeInt(u64, atom.code.items[reloc.offset - atom.original_offset ..][0..8], value, .little), .R_WASM_GLOBAL_INDEX_LEB, .R_WASM_EVENT_INDEX_LEB, .R_WASM_FUNCTION_INDEX_LEB, @@ -127,12 +131,12 @@ pub fn resolveRelocs(atom: *Atom, wasm_bin: *const Wasm) void { .R_WASM_TABLE_NUMBER_LEB, .R_WASM_TYPE_INDEX_LEB, .R_WASM_MEMORY_ADDR_TLS_SLEB, - => leb.writeUnsignedFixed(5, atom.code.items[reloc.offset..][0..5], @as(u32, @intCast(value))), + => leb.writeUnsignedFixed(5, atom.code.items[reloc.offset - atom.original_offset ..][0..5], @as(u32, @intCast(value))), .R_WASM_MEMORY_ADDR_LEB64, .R_WASM_MEMORY_ADDR_SLEB64, .R_WASM_TABLE_INDEX_SLEB64, .R_WASM_MEMORY_ADDR_TLS_SLEB64, - => leb.writeUnsignedFixed(10, atom.code.items[reloc.offset..][0..10], value), + => leb.writeUnsignedFixed(10, atom.code.items[reloc.offset - atom.original_offset ..][0..10], value), } } } @@ -150,7 +154,7 @@ fn relocationValue(atom: Atom, relocation: types.Relocation, wasm_bin: *const Wa .R_WASM_TABLE_INDEX_I64, .R_WASM_TABLE_INDEX_SLEB, .R_WASM_TABLE_INDEX_SLEB64, - => return wasm_bin.function_table.get(target_loc) orelse 0, + => return wasm_bin.function_table.get(.{ .file = atom.file, .index = relocation.index }) orelse 0, .R_WASM_TYPE_INDEX_LEB => { const file_index = atom.file orelse { return relocation.index; diff --git a/src/link/Wasm/Object.zig b/src/link/Wasm/Object.zig index e7c118e48e..858d52f836 100644 --- a/src/link/Wasm/Object.zig +++ b/src/link/Wasm/Object.zig @@ -59,20 +59,16 @@ init_funcs: []const types.InitFunc = &.{}, comdat_info: []const types.Comdat = &.{}, /// Represents non-synthetic sections that can essentially be mem-cpy'd into place /// after performing relocations. -relocatable_data: []const RelocatableData = &.{}, +relocatable_data: std.AutoHashMapUnmanaged(RelocatableData.Tag, []RelocatableData) = .{}, /// String table for all strings required by the object file, such as symbol names, /// import name, module name and export names. Each string will be deduplicated /// and returns an offset into the table. string_table: Wasm.StringTable = .{}, -/// All the names of each debug section found in the current object file. -/// Each name is terminated by a null-terminator. The name can be found, -/// from the `index` offset within the `RelocatableData`. -debug_names: [:0]const u8, /// Represents a single item within a section (depending on its `type`) const RelocatableData = struct { /// The type of the relocatable data - type: enum { data, code, debug }, + type: Tag, /// Pointer to the data of the segment, where its length is written to `size` data: [*]u8, /// The size in bytes of the data representing the segment within the section @@ -85,6 +81,8 @@ const RelocatableData = struct { /// Represents the index of the section it belongs to section_index: u32, + const Tag = enum { data, code, custom }; + /// Returns the alignment of the segment, by retrieving it from the segment /// meta data of the given object file. /// NOTE: Alignment is encoded as a power of 2, so we shift the symbol's @@ -99,14 +97,14 @@ const RelocatableData = struct { return switch (relocatable_data.type) { .data => .data, .code => .function, - .debug => .section, + .custom => .section, }; } - /// Returns the index within a section itrelocatable_data, or in case of a debug section, + /// Returns the index within a section, or in case of a custom section, /// returns the section index within the object file. pub fn getIndex(relocatable_data: RelocatableData) u32 { - if (relocatable_data.type == .debug) return relocatable_data.section_index; + if (relocatable_data.type == .custom) return relocatable_data.section_index; return relocatable_data.index; } }; @@ -121,7 +119,6 @@ pub fn create(gpa: Allocator, file: std.fs.File, name: []const u8, maybe_max_siz var object: Object = .{ .file = file, .name = try gpa.dupe(u8, name), - .debug_names = &.{}, }; var is_object_file: bool = false; @@ -182,10 +179,16 @@ pub fn deinit(object: *Object, gpa: Allocator) void { gpa.free(info.name); } gpa.free(object.segment_info); - for (object.relocatable_data) |rel_data| { - gpa.free(rel_data.data[0..rel_data.size]); + { + var it = object.relocatable_data.valueIterator(); + while (it.next()) |relocatable_data| { + for (relocatable_data.*) |rel_data| { + gpa.free(rel_data.data[0..rel_data.size]); + } + gpa.free(relocatable_data.*); + } } - gpa.free(object.relocatable_data); + object.relocatable_data.deinit(gpa); object.string_table.deinit(gpa); gpa.free(object.name); object.* = undefined; @@ -345,23 +348,7 @@ fn Parser(comptime ReaderType: type) type { errdefer parser.object.deinit(gpa); try parser.verifyMagicBytes(); const version = try parser.reader.reader().readInt(u32, .little); - parser.object.version = version; - var relocatable_data = std.ArrayList(RelocatableData).init(gpa); - var debug_names = std.ArrayList(u8).init(gpa); - - errdefer { - // only free the inner contents of relocatable_data if we didn't - // assign it to the object yet. - if (parser.object.relocatable_data.len == 0) { - for (relocatable_data.items) |rel_data| { - gpa.free(rel_data.data[0..rel_data.size]); - } - relocatable_data.deinit(); - } - gpa.free(debug_names.items); - debug_names.deinit(); - } var section_index: u32 = 0; while (parser.reader.reader().readByte()) |byte| : (section_index += 1) { @@ -377,26 +364,34 @@ fn Parser(comptime ReaderType: type) type { if (std.mem.eql(u8, name, "linking")) { is_object_file.* = true; - parser.object.relocatable_data = relocatable_data.items; // at this point no new relocatable sections will appear so we're free to store them. try parser.parseMetadata(gpa, @as(usize, @intCast(reader.context.bytes_left))); } else if (std.mem.startsWith(u8, name, "reloc")) { try parser.parseRelocations(gpa); } else if (std.mem.eql(u8, name, "target_features")) { try parser.parseFeatures(gpa); } else if (std.mem.startsWith(u8, name, ".debug")) { + const gop = try parser.object.relocatable_data.getOrPut(gpa, .custom); + var relocatable_data: std.ArrayListUnmanaged(RelocatableData) = .{}; + defer relocatable_data.deinit(gpa); + if (!gop.found_existing) { + gop.value_ptr.* = &.{}; + } else { + relocatable_data = std.ArrayListUnmanaged(RelocatableData).fromOwnedSlice(gop.value_ptr.*); + } const debug_size = @as(u32, @intCast(reader.context.bytes_left)); const debug_content = try gpa.alloc(u8, debug_size); errdefer gpa.free(debug_content); try reader.readNoEof(debug_content); - try relocatable_data.append(.{ - .type = .debug, + try relocatable_data.append(gpa, .{ + .type = .custom, .data = debug_content.ptr, .size = debug_size, .index = try parser.object.string_table.put(gpa, name), .offset = 0, // debug sections only contain 1 entry, so no need to calculate offset .section_index = section_index, }); + gop.value_ptr.* = try relocatable_data.toOwnedSlice(gpa); } else { try reader.skipBytes(reader.context.bytes_left, .{}); } @@ -515,26 +510,32 @@ fn Parser(comptime ReaderType: type) type { const start = reader.context.bytes_left; var index: u32 = 0; const count = try readLeb(u32, reader); + const imported_function_count = parser.object.importedCountByKind(.function); + var relocatable_data = try std.ArrayList(RelocatableData).initCapacity(gpa, count); + defer relocatable_data.deinit(); while (index < count) : (index += 1) { const code_len = try readLeb(u32, reader); const offset = @as(u32, @intCast(start - reader.context.bytes_left)); const data = try gpa.alloc(u8, code_len); errdefer gpa.free(data); try reader.readNoEof(data); - try relocatable_data.append(.{ + relocatable_data.appendAssumeCapacity(.{ .type = .code, .data = data.ptr, .size = code_len, - .index = parser.object.importedCountByKind(.function) + index, + .index = imported_function_count + index, .offset = offset, .section_index = section_index, }); } + try parser.object.relocatable_data.put(gpa, .code, try relocatable_data.toOwnedSlice()); }, .data => { const start = reader.context.bytes_left; var index: u32 = 0; const count = try readLeb(u32, reader); + var relocatable_data = try std.ArrayList(RelocatableData).initCapacity(gpa, count); + defer relocatable_data.deinit(); while (index < count) : (index += 1) { const flags = try readLeb(u32, reader); const data_offset = try readInit(reader); @@ -545,7 +546,7 @@ fn Parser(comptime ReaderType: type) type { const data = try gpa.alloc(u8, data_len); errdefer gpa.free(data); try reader.readNoEof(data); - try relocatable_data.append(.{ + relocatable_data.appendAssumeCapacity(.{ .type = .data, .data = data.ptr, .size = data_len, @@ -554,6 +555,7 @@ fn Parser(comptime ReaderType: type) type { .section_index = section_index, }); } + try parser.object.relocatable_data.put(gpa, .data, try relocatable_data.toOwnedSlice()); }, else => try parser.reader.reader().skipBytes(len, .{}), } @@ -561,7 +563,6 @@ fn Parser(comptime ReaderType: type) type { error.EndOfStream => {}, // finished parsing the file else => |e| return e, } - parser.object.relocatable_data = try relocatable_data.toOwnedSlice(); } /// Based on the "features" custom section, parses it into a list of @@ -789,7 +790,8 @@ fn Parser(comptime ReaderType: type) type { }, .section => { symbol.index = try leb.readULEB128(u32, reader); - for (parser.object.relocatable_data) |data| { + const section_data = parser.object.relocatable_data.get(.custom).?; + for (section_data) |data| { if (data.section_index == symbol.index) { symbol.name = data.index; break; @@ -798,22 +800,15 @@ fn Parser(comptime ReaderType: type) type { }, else => { symbol.index = try leb.readULEB128(u32, reader); - var maybe_import: ?types.Import = null; - const is_undefined = symbol.isUndefined(); - if (is_undefined) { - maybe_import = parser.object.findImport(symbol.tag.externalType(), symbol.index); - } const explicit_name = symbol.hasFlag(.WASM_SYM_EXPLICIT_NAME); - if (!(is_undefined and !explicit_name)) { + symbol.name = if (!is_undefined or (is_undefined and explicit_name)) name: { const name_len = try leb.readULEB128(u32, reader); const name = try gpa.alloc(u8, name_len); defer gpa.free(name); try reader.readNoEof(name); - symbol.name = try parser.object.string_table.put(gpa, name); - } else { - symbol.name = maybe_import.?.name; - } + break :name try parser.object.string_table.put(gpa, name); + } else parser.object.findImport(symbol.tag.externalType(), symbol.index).name; }, } return symbol; @@ -887,110 +882,95 @@ fn assertEnd(reader: anytype) !void { } /// Parses an object file into atoms, for code and data sections -pub fn parseIntoAtoms(object: *Object, gpa: Allocator, object_index: u16, wasm_bin: *Wasm) !void { - const Key = struct { - kind: Symbol.Tag, - index: u32, +pub fn parseSymbolIntoAtom(object: *Object, object_index: u16, symbol_index: u32, wasm: *Wasm) !Atom.Index { + const symbol = &object.symtable[symbol_index]; + const relocatable_data: RelocatableData = switch (symbol.tag) { + .function => object.relocatable_data.get(.code).?[symbol.index - object.importedCountByKind(.function)], + .data => object.relocatable_data.get(.data).?[symbol.index], + .section => blk: { + const data = object.relocatable_data.get(.custom).?; + for (data) |dat| { + if (dat.section_index == symbol.index) { + break :blk dat; + } + } + unreachable; + }, + else => unreachable, }; - var symbol_for_segment = std.AutoArrayHashMap(Key, std.ArrayList(u32)).init(gpa); - defer for (symbol_for_segment.values()) |*list| { - list.deinit(); - } else symbol_for_segment.deinit(); + const final_index = try wasm.getMatchingSegment(object_index, symbol_index); + const atom_index = @as(Atom.Index, @intCast(wasm.managed_atoms.items.len)); + const atom = try wasm.managed_atoms.addOne(wasm.base.allocator); + atom.* = Atom.empty; + try wasm.appendAtomAtIndex(final_index, atom_index); - for (object.symtable, 0..) |symbol, symbol_index| { - switch (symbol.tag) { - .function, .data, .section => if (!symbol.isUndefined()) { - const gop = try symbol_for_segment.getOrPut(.{ .kind = symbol.tag, .index = symbol.index }); - const sym_idx = @as(u32, @intCast(symbol_index)); - if (!gop.found_existing) { - gop.value_ptr.* = std.ArrayList(u32).init(gpa); - } - try gop.value_ptr.*.append(sym_idx); - }, - else => continue, + atom.sym_index = symbol_index; + atom.file = object_index; + atom.size = relocatable_data.size; + atom.alignment = relocatable_data.getAlignment(object); + atom.code = std.ArrayListUnmanaged(u8).fromOwnedSlice(relocatable_data.data[0..relocatable_data.size]); + atom.original_offset = relocatable_data.offset; + try wasm.symbol_atom.putNoClobber(wasm.base.allocator, atom.symbolLoc(), atom_index); + const segment: *Wasm.Segment = &wasm.segments.items[final_index]; + if (relocatable_data.type == .data) { //code section and custom sections are 1-byte aligned + segment.alignment = segment.alignment.max(atom.alignment); + } + + if (object.relocations.get(relocatable_data.section_index)) |relocations| { + const start = searchRelocStart(relocations, relocatable_data.offset); + const len = searchRelocEnd(relocations[start..], relocatable_data.offset + atom.size); + atom.relocs = std.ArrayListUnmanaged(types.Relocation).fromOwnedSlice(relocations[start..][0..len]); + for (atom.relocs.items) |*reloc| { + switch (reloc.relocation_type) { + .R_WASM_TABLE_INDEX_I32, + .R_WASM_TABLE_INDEX_I64, + .R_WASM_TABLE_INDEX_SLEB, + .R_WASM_TABLE_INDEX_SLEB64, + => { + try wasm.function_table.put(wasm.base.allocator, .{ + .file = object_index, + .index = reloc.index, + }, 0); + }, + .R_WASM_GLOBAL_INDEX_I32, + .R_WASM_GLOBAL_INDEX_LEB, + => { + const sym = object.symtable[reloc.index]; + if (sym.tag != .global) { + try wasm.got_symbols.append( + wasm.base.allocator, + .{ .file = object_index, .index = reloc.index }, + ); + } + }, + else => {}, + } } } - for (object.relocatable_data, 0..) |relocatable_data, index| { - const final_index = (try wasm_bin.getMatchingSegment(object_index, @as(u32, @intCast(index)))) orelse { - continue; // found unknown section, so skip parsing into atom as we do not know how to handle it. - }; + return atom_index; +} - const atom_index: Atom.Index = @intCast(wasm_bin.managed_atoms.items.len); - const atom = try wasm_bin.managed_atoms.addOne(gpa); - atom.* = Atom.empty; - atom.file = object_index; - atom.size = relocatable_data.size; - atom.alignment = relocatable_data.getAlignment(object); - - const relocations: []types.Relocation = object.relocations.get(relocatable_data.section_index) orelse &.{}; - for (relocations) |relocation| { - if (isInbetween(relocatable_data.offset, atom.size, relocation.offset)) { - // set the offset relative to the offset of the segment itobject, - // rather than within the entire section. - var reloc = relocation; - reloc.offset -= relocatable_data.offset; - try atom.relocs.append(gpa, reloc); - - switch (relocation.relocation_type) { - .R_WASM_TABLE_INDEX_I32, - .R_WASM_TABLE_INDEX_I64, - .R_WASM_TABLE_INDEX_SLEB, - .R_WASM_TABLE_INDEX_SLEB64, - => { - try wasm_bin.function_table.put(gpa, .{ - .file = object_index, - .index = relocation.index, - }, 0); - }, - .R_WASM_GLOBAL_INDEX_I32, - .R_WASM_GLOBAL_INDEX_LEB, - => { - const sym = object.symtable[relocation.index]; - if (sym.tag != .global) { - try wasm_bin.got_symbols.append( - wasm_bin.base.allocator, - .{ .file = object_index, .index = relocation.index }, - ); - } - }, - else => {}, - } - } +fn searchRelocStart(relocs: []const types.Relocation, address: u32) usize { + var min: usize = 0; + var max: usize = relocs.len; + while (min < max) { + const index = (min + max) / 2; + const curr = relocs[index]; + if (curr.offset < address) { + min = index + 1; + } else { + max = index; } - - try atom.code.appendSlice(gpa, relocatable_data.data[0..relocatable_data.size]); - - if (symbol_for_segment.getPtr(.{ - .kind = relocatable_data.getSymbolKind(), - .index = relocatable_data.getIndex(), - })) |symbols| { - atom.sym_index = symbols.pop(); - try wasm_bin.symbol_atom.putNoClobber(gpa, atom.symbolLoc(), atom_index); - - // symbols referencing the same atom will be added as alias - // or as 'parent' when they are global. - while (symbols.popOrNull()) |idx| { - try wasm_bin.symbol_atom.putNoClobber(gpa, .{ .file = atom.file, .index = idx }, atom_index); - const alias_symbol = object.symtable[idx]; - if (alias_symbol.isGlobal()) { - atom.sym_index = idx; - } - } - } - - const segment: *Wasm.Segment = &wasm_bin.segments.items[final_index]; - if (relocatable_data.type == .data) { //code section and debug sections are 1-byte aligned - segment.alignment = segment.alignment.max(atom.alignment); - } - - try wasm_bin.appendAtomAtIndex(final_index, atom_index); - log.debug("Parsed into atom: '{s}' at segment index {d}", .{ object.string_table.get(object.symtable[atom.sym_index].name), final_index }); } + return min; } -/// Verifies if a given value is in between a minimum -and maximum value. -/// The maxmimum value is calculated using the length, both start and end are inclusive. -inline fn isInbetween(min: u32, length: u32, value: u32) bool { - return value >= min and value <= min + length; +fn searchRelocEnd(relocs: []const types.Relocation, address: u32) usize { + for (relocs, 0..relocs.len) |reloc, index| { + if (reloc.offset > address) { + return index; + } + } + return relocs.len; } From 6f7a9b31443debf3e6d2be645261372de1bc5877 Mon Sep 17 00:00:00 2001 From: Luuk de Gram Date: Mon, 20 Nov 2023 20:35:31 +0100 Subject: [PATCH 05/10] wasm-linker: deduplicate aliased functions When multiple symbols point to the same function, we ensure any other symbol other than the original will be discarded and point to the original instead. This prevents emitting the same function code more than once. --- src/link/Wasm.zig | 55 ++++++++++++++++++++++++++++++---------- src/link/Wasm/Symbol.zig | 4 +++ 2 files changed, 45 insertions(+), 14 deletions(-) diff --git a/src/link/Wasm.zig b/src/link/Wasm.zig index 81e73e6ecf..b7ed2c035f 100644 --- a/src/link/Wasm.zig +++ b/src/link/Wasm.zig @@ -110,7 +110,7 @@ func_types: std.ArrayListUnmanaged(std.wasm.Type) = .{}, /// Output function section where the key is the original /// function index and the value is function. /// This allows us to map multiple symbols to the same function. -functions: std.AutoArrayHashMapUnmanaged(struct { file: ?u16, index: u32 }, std.wasm.Func) = .{}, +functions: std.AutoArrayHashMapUnmanaged(struct { file: ?u16, index: u32 }, struct { func: std.wasm.Func, sym_index: u32 }) = .{}, /// Output global section wasm_globals: std.ArrayListUnmanaged(std.wasm.Global) = .{}, /// Memory section @@ -1584,7 +1584,7 @@ fn getFunctionSignature(wasm: *const Wasm, loc: SymbolLoc) std.wasm.Type { const ty_index = wasm.imports.get(loc).?.kind.function; return wasm.func_types.items[ty_index]; } - return wasm.func_types.items[wasm.functions.get(.{ .file = loc.file, .index = loc.index }).?.type_index]; + return wasm.func_types.items[wasm.functions.get(.{ .file = loc.file, .index = symbol.index }).?.func.type_index]; } /// Lowers a constant typed value to a local symbol and atom. @@ -2141,7 +2141,7 @@ fn parseAtom(wasm: *Wasm, atom_index: Atom.Index, kind: Kind) !void { try wasm.functions.putNoClobber( wasm.base.allocator, .{ .file = null, .index = index }, - .{ .type_index = type_index }, + .{ .func = .{ .type_index = type_index }, .sym_index = atom.sym_index }, ); symbol.tag = .function; symbol.index = index; @@ -2274,7 +2274,14 @@ fn allocateAtoms(wasm: *Wasm) !void { while (true) { const atom = wasm.getAtomPtr(atom_index); const symbol_loc = atom.symbolLoc(); - const sym = symbol_loc.getSymbol(wasm); + // Ensure we get the original symbol, so we verify the correct symbol on whether + // it is dead or not and ensure an atom is removed when dead. + // This is required as we may have parsed aliases into atoms. + const sym = if (symbol_loc.file) |object_index| sym: { + const object = wasm.objects.items[object_index]; + break :sym object.symtable[symbol_loc.index]; + } else wasm.symbols.items[symbol_loc.index]; + if (sym.isDead()) { // Dead symbols must be unlinked from the linked-list to prevent them // from being emit into the binary. @@ -2477,7 +2484,7 @@ fn initializeCallCtorsFunction(wasm: *Wasm) !void { // call constructors for (wasm.init_funcs.items) |init_func_loc| { const symbol = init_func_loc.getSymbol(wasm); - const func = wasm.functions.values()[symbol.index - wasm.imported_functions_count]; + const func = wasm.functions.values()[symbol.index - wasm.imported_functions_count].func; const ty = wasm.func_types.items[func.type_index]; // Call function by its function index @@ -2519,7 +2526,7 @@ fn createSyntheticFunction( try wasm.functions.putNoClobber( wasm.base.allocator, .{ .file = null, .index = func_index }, - .{ .type_index = ty_index }, + .{ .func = .{ .type_index = ty_index }, .sym_index = loc.index }, ); symbol.index = func_index; @@ -2740,6 +2747,9 @@ fn setupImports(wasm: *Wasm) !void { /// Takes the global, function and table section from each linked object file /// and merges it into a single section for each. fn mergeSections(wasm: *Wasm) !void { + var removed_duplicates = std.ArrayList(SymbolLoc).init(wasm.base.allocator); + defer removed_duplicates.deinit(); + for (wasm.resolved_symbols.keys()) |sym_loc| { if (sym_loc.file == null) { // Zig code-generated symbols are already within the sections and do not @@ -2767,9 +2777,19 @@ fn mergeSections(wasm: *Wasm) !void { wasm.base.allocator, .{ .file = sym_loc.file, .index = symbol.index }, ); - if (!gop.found_existing) { - gop.value_ptr.* = object.functions[index]; + if (gop.found_existing) { + // We found an alias to the same function, discard this symbol in favor of + // the original symbol and point the discard function to it. This ensures + // we only emit a single function, instead of duplicates. + try wasm.discarded.putNoClobber( + wasm.base.allocator, + sym_loc, + .{ .file = gop.key_ptr.*.file, .index = gop.value_ptr.*.sym_index }, + ); + try removed_duplicates.append(sym_loc); + continue; } + gop.value_ptr.* = .{ .func = object.functions[index], .sym_index = sym_loc.index }; symbol.index = @as(u32, @intCast(gop.index)) + wasm.imported_functions_count; }, .global => { @@ -2786,6 +2806,12 @@ fn mergeSections(wasm: *Wasm) !void { } } + // For any removed duplicates, remove them from the resolved symbols list + for (removed_duplicates.items) |sym_loc| { + assert(wasm.resolved_symbols.swapRemove(sym_loc)); + sym_loc.getSymbol(wasm).unmark(); + } + log.debug("Merged ({d}) functions", .{wasm.functions.count()}); log.debug("Merged ({d}) globals", .{wasm.wasm_globals.items.len}); log.debug("Merged ({d}) tables", .{wasm.tables.items.len}); @@ -2821,7 +2847,7 @@ fn mergeTypes(wasm: *Wasm) !void { import.kind.function = try wasm.putOrGetFuncType(original_type); } else if (!dirty.contains(symbol.index)) { log.debug("Adding type from function '{s}'", .{sym_loc.getName(wasm)}); - const func = &wasm.functions.values()[symbol.index - wasm.imported_functions_count]; + const func = &wasm.functions.values()[symbol.index - wasm.imported_functions_count].func; func.type_index = try wasm.putOrGetFuncType(object.func_types[func.type_index]); dirty.putAssumeCapacityNoClobber(symbol.index, {}); } @@ -3498,12 +3524,12 @@ fn linkWithZld(wasm: *Wasm, comp: *Compilation, prog_node: *std.Progress.Node) l try wasm.markReferences(); try wasm.setupImports(); + try wasm.mergeSections(); + try wasm.mergeTypes(); try wasm.allocateAtoms(); try wasm.setupMemory(); wasm.allocateVirtualAddresses(); wasm.mapFunctionTable(); - try wasm.mergeSections(); - try wasm.mergeTypes(); try wasm.initializeCallCtorsFunction(); try wasm.setupInitMemoryFunction(); try wasm.setupTLSRelocationsFunction(); @@ -3639,12 +3665,12 @@ pub fn flushModule(wasm: *Wasm, comp: *Compilation, prog_node: *std.Progress.Nod } } + try wasm.mergeSections(); + try wasm.mergeTypes(); try wasm.allocateAtoms(); try wasm.setupMemory(); wasm.allocateVirtualAddresses(); wasm.mapFunctionTable(); - try wasm.mergeSections(); - try wasm.mergeTypes(); try wasm.initializeCallCtorsFunction(); try wasm.setupInitMemoryFunction(); try wasm.setupTLSRelocationsFunction(); @@ -3745,7 +3771,7 @@ fn writeToFile( if (wasm.functions.count() != 0) { const header_offset = try reserveVecSectionHeader(&binary_bytes); for (wasm.functions.values()) |function| { - try leb.writeULEB128(binary_writer, function.type_index); + try leb.writeULEB128(binary_writer, function.func.type_index); } try writeVecSectionHeader( @@ -3916,6 +3942,7 @@ fn writeToFile( sorted_atoms.appendAssumeCapacity(atom); // found more code atoms than functions atom_index = atom.prev orelse break; } + std.debug.assert(wasm.functions.count() == sorted_atoms.items.len); const atom_sort_fn = struct { fn sort(ctx: *const Wasm, lhs: *const Atom, rhs: *const Atom) bool { diff --git a/src/link/Wasm/Symbol.zig b/src/link/Wasm/Symbol.zig index b4507f9e14..75c26ca10d 100644 --- a/src/link/Wasm/Symbol.zig +++ b/src/link/Wasm/Symbol.zig @@ -100,6 +100,10 @@ pub fn mark(symbol: *Symbol) void { symbol.flags |= @intFromEnum(Flag.alive); } +pub fn unmark(symbol: *Symbol) void { + symbol.flags &= ~@intFromEnum(Flag.alive); +} + pub fn isAlive(symbol: Symbol) bool { return symbol.flags & @intFromEnum(Flag.alive) != 0; } From f7d4f72fd5e3e3026e395f43c5b2b2c3cb49fce2 Mon Sep 17 00:00:00 2001 From: Luuk de Gram Date: Mon, 20 Nov 2023 21:08:40 +0100 Subject: [PATCH 06/10] wasm-linker: Only emit name of referenced symbols --- src/link/Wasm.zig | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/link/Wasm.zig b/src/link/Wasm.zig index b7ed2c035f..ddd3329bd8 100644 --- a/src/link/Wasm.zig +++ b/src/link/Wasm.zig @@ -4293,6 +4293,9 @@ fn emitNameSection(wasm: *Wasm, binary_bytes: *std.ArrayList(u8), arena: std.mem for (wasm.resolved_symbols.keys()) |sym_loc| { const symbol = sym_loc.getSymbol(wasm).*; + if (symbol.isDead()) { + continue; + } const name = sym_loc.getName(wasm); switch (symbol.tag) { .function => { From 8447d4fb1f5b628b3c0c9f2b1254d8ddc7801ab1 Mon Sep 17 00:00:00 2001 From: Luuk de Gram Date: Wed, 22 Nov 2023 06:51:50 +0100 Subject: [PATCH 07/10] wasm-linker: handle debug info during gc When we encounter a debug info symbol, we initially have to parse it into an atom to find its relocations. We then go through its relocations to find out if any of the target symbols are marked alive. When it finds an alive symbol, we also mark the debug symbol as alive to ensure this piece of debug info is emit to the binary. When it does not encounter any alive symbols, the debug symbol remains dead and will be garbage- collected during `allocateAtoms`. --- src/link/Wasm.zig | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/src/link/Wasm.zig b/src/link/Wasm.zig index ddd3329bd8..0ed32caf4c 100644 --- a/src/link/Wasm.zig +++ b/src/link/Wasm.zig @@ -5115,6 +5115,23 @@ fn markReferences(wasm: *Wasm) !void { if (sym.isExported(wasm.base.options.rdynamic) or sym.isNoStrip()) { try wasm.mark(sym_loc); } + + // Debug sections may require to be parsed and marked when it contains + // relocations to alive symbols. + if (sym.tag == .section and !wasm.base.options.strip) { + const file = sym_loc.file orelse continue; // Incremental debug info is done independently + const object = &wasm.objects.items[file]; + const atom_index = try Object.parseSymbolIntoAtom(object, file, sym_loc.index, wasm); + const atom = wasm.getAtom(atom_index); + for (atom.relocs.items) |reloc| { + const target_loc: SymbolLoc = .{ .index = reloc.index, .file = atom.file }; + const target_sym = target_loc.getSymbol(wasm); + if (target_sym.isAlive()) { + sym.mark(); + continue; // Skip all other relocations as this debug atom is already marked now + } + } + } } } From 4be3cd2754f4af4f18b969dd60d954c53a281a83 Mon Sep 17 00:00:00 2001 From: Luuk de Gram Date: Sat, 25 Nov 2023 18:03:23 +0100 Subject: [PATCH 08/10] wasm-linker: support gc for wasm backend code When using the Wasm backend, we will now also perform garbage collection there, to ensure unreferenced symbols do not get parsed nor emit into the final binary. --- src/link/Wasm.zig | 56 +++++++++++++++++++++++++++++------------------ 1 file changed, 35 insertions(+), 21 deletions(-) diff --git a/src/link/Wasm.zig b/src/link/Wasm.zig index 0ed32caf4c..663d9d4a4d 100644 --- a/src/link/Wasm.zig +++ b/src/link/Wasm.zig @@ -1333,6 +1333,10 @@ pub fn deinit(wasm: *Wasm) void { atom.deinit(gpa); } } + for (wasm.synthetic_functions.items) |atom_index| { + const atom = wasm.getAtomPtr(atom_index); + atom.deinit(gpa); + } wasm.decls.deinit(gpa); wasm.anon_decls.deinit(gpa); @@ -1364,10 +1368,6 @@ pub fn deinit(wasm: *Wasm) void { wasm.exports.deinit(gpa); wasm.string_table.deinit(gpa); - for (wasm.synthetic_functions.items) |atom_index| { - const atom = wasm.getAtomPtr(atom_index); - atom.deinit(gpa); - } wasm.synthetic_functions.deinit(gpa); if (wasm.dwarf) |*dwarf| { @@ -2134,9 +2134,13 @@ const Kind = union(enum) { fn parseAtom(wasm: *Wasm, atom_index: Atom.Index, kind: Kind) !void { const atom = wasm.getAtomPtr(atom_index); const symbol = (SymbolLoc{ .file = null, .index = atom.sym_index }).getSymbol(wasm); + if (symbol.isDead()) { + // Prevent unreferenced symbols from being parsed. + return; + } const final_index: u32 = switch (kind) { .function => result: { - const index = @as(u32, @intCast(wasm.functions.count() + wasm.imported_functions_count)); + const index: u32 = @intCast(wasm.functions.count() + wasm.imported_functions_count); const type_index = wasm.atom_types.get(atom_index).?; try wasm.functions.putNoClobber( wasm.base.allocator, @@ -2147,7 +2151,7 @@ fn parseAtom(wasm: *Wasm, atom_index: Atom.Index, kind: Kind) !void { symbol.index = index; if (wasm.code_section_index == null) { - wasm.code_section_index = @as(u32, @intCast(wasm.segments.items.len)); + wasm.code_section_index = @intCast(wasm.segments.items.len); try wasm.segments.append(wasm.base.allocator, .{ .alignment = atom.alignment, .size = atom.size, @@ -2185,12 +2189,12 @@ fn parseAtom(wasm: *Wasm, atom_index: Atom.Index, kind: Kind) !void { const index = gop.value_ptr.*; wasm.segments.items[index].size += atom.size; - symbol.index = @as(u32, @intCast(wasm.segment_info.getIndex(index).?)); + symbol.index = @intCast(wasm.segment_info.getIndex(index).?); // segment info already exists, so free its memory wasm.base.allocator.free(segment_name); break :result index; } else { - const index = @as(u32, @intCast(wasm.segments.items.len)); + const index: u32 = @intCast(wasm.segments.items.len); var flags: u32 = 0; if (wasm.base.options.shared_memory) { flags |= @intFromEnum(Segment.Flag.WASM_DATA_SEGMENT_IS_PASSIVE); @@ -2203,7 +2207,7 @@ fn parseAtom(wasm: *Wasm, atom_index: Atom.Index, kind: Kind) !void { }); gop.value_ptr.* = index; - const info_index = @as(u32, @intCast(wasm.segment_info.count())); + const info_index: u32 = @intCast(wasm.segment_info.count()); try wasm.segment_info.put(wasm.base.allocator, index, segment_info); symbol.index = info_index; break :result index; @@ -2318,8 +2322,10 @@ fn allocateAtoms(wasm: *Wasm) !void { fn allocateVirtualAddresses(wasm: *Wasm) void { for (wasm.resolved_symbols.keys()) |loc| { const symbol = loc.getSymbol(wasm); - if (symbol.tag != .data) { - continue; // only data symbols have virtual addresses + if (symbol.tag != .data or symbol.isDead()) { + // Only data symbols have virtual addresses. + // Dead symbols do not get allocated, so we don't need to set their virtual address either. + continue; } const atom_index = wasm.symbol_atom.get(loc) orelse { // synthetic symbol that does not contain an atom @@ -2681,10 +2687,10 @@ fn setupImports(wasm: *Wasm) !void { } for (wasm.resolved_symbols.keys()) |symbol_loc| { - if (symbol_loc.file == null) { + const file_index = symbol_loc.file orelse { // imports generated by Zig code are already in the `import` section continue; - } + }; const symbol = symbol_loc.getSymbol(wasm); if (symbol.isDead() or @@ -2695,7 +2701,7 @@ fn setupImports(wasm: *Wasm) !void { } log.debug("Symbol '{s}' will be imported from the host", .{symbol_loc.getName(wasm)}); - const object = wasm.objects.items[symbol_loc.file.?]; + const object = wasm.objects.items[file_index]; const import = object.findImport(symbol.tag.externalType(), symbol.index); // We copy the import to a new import to ensure the names contain references @@ -3092,6 +3098,11 @@ pub fn getMatchingSegment(wasm: *Wasm, object_index: u16, symbol_index: u32) !u3 .offset = 0, .flags = flags, }); + try wasm.segment_info.putNoClobber(wasm.base.allocator, index, .{ + .name = try wasm.base.allocator.dupe(u8, segment_info.name), + .alignment = segment_info.alignment, + .flags = segment_info.flags, + }); return index; } else return result.value_ptr.*; }, @@ -3198,6 +3209,7 @@ pub fn getErrorTableSymbol(wasm: *Wasm) !u32 { .virtual_address = undefined, }; symbol.setFlag(.WASM_SYM_VISIBILITY_HIDDEN); + symbol.mark(); try wasm.resolved_symbols.put(wasm.base.allocator, atom.symbolLoc(), {}); @@ -3230,6 +3242,7 @@ fn populateErrorNameTable(wasm: *Wasm) !void { .virtual_address = undefined, }; names_symbol.setFlag(.WASM_SYM_VISIBILITY_HIDDEN); + names_symbol.mark(); log.debug("Populating error names", .{}); @@ -3606,9 +3619,9 @@ pub fn flushModule(wasm: *Wasm, comp: *Compilation, prog_node: *std.Progress.Nod // So we can rebuild the binary file on each incremental update defer wasm.resetState(); try wasm.setupInitFunctions(); - try wasm.setupErrorsLen(); try wasm.setupStart(); try wasm.markReferences(); + try wasm.setupErrorsLen(); try wasm.setupImports(); if (wasm.base.options.module) |mod| { var decl_it = wasm.decls.iterator(); @@ -5152,14 +5165,15 @@ fn mark(wasm: *Wasm, loc: SymbolLoc) !void { return; } - const file = loc.file orelse return; // Marking synthetic and Zig symbols is done seperately - const object = &wasm.objects.items[file]; - const atom_index = try Object.parseSymbolIntoAtom(object, file, loc.index, wasm); + const atom_index = if (loc.file) |file_index| idx: { + const object = &wasm.objects.items[file_index]; + const atom_index = try object.parseSymbolIntoAtom(file_index, loc.index, wasm); + break :idx atom_index; + } else wasm.symbol_atom.get(loc) orelse return; const atom = wasm.getAtom(atom_index); - const relocations: []const types.Relocation = atom.relocs.items; - for (relocations) |reloc| { - const target_loc: SymbolLoc = .{ .index = reloc.index, .file = file }; + for (atom.relocs.items) |reloc| { + const target_loc: SymbolLoc = .{ .index = reloc.index, .file = loc.file }; try wasm.mark(target_loc.finalLoc(wasm)); } } From 596d1cd5a8a2daad25df9d39ad384df1b67eb39e Mon Sep 17 00:00:00 2001 From: Luuk de Gram Date: Sun, 26 Nov 2023 18:55:06 +0100 Subject: [PATCH 09/10] wasm-linker: support `--no-gc-sections` By default we garbage-collect sections for Wasm to reduce size, as well as finish linking quicker (as we have fewer things to do). However, when the user specifies `--no-gc-sections` we ensure all resolved symbols get marked and therefore do not get garbage collected. This is supported in both incremental-mode and traditional linking. --- src/link/Wasm.zig | 93 +++++++++++++++++++++++++--------------- src/link/Wasm/Object.zig | 2 +- 2 files changed, 59 insertions(+), 36 deletions(-) diff --git a/src/link/Wasm.zig b/src/link/Wasm.zig index 663d9d4a4d..513922f247 100644 --- a/src/link/Wasm.zig +++ b/src/link/Wasm.zig @@ -2134,10 +2134,14 @@ const Kind = union(enum) { fn parseAtom(wasm: *Wasm, atom_index: Atom.Index, kind: Kind) !void { const atom = wasm.getAtomPtr(atom_index); const symbol = (SymbolLoc{ .file = null, .index = atom.sym_index }).getSymbol(wasm); - if (symbol.isDead()) { + const do_garbage_collect = wasm.base.options.gc_sections orelse + (wasm.base.options.output_mode != .Obj); + + if (symbol.isDead() and do_garbage_collect) { // Prevent unreferenced symbols from being parsed. return; } + const final_index: u32 = switch (kind) { .function => result: { const index: u32 = @intCast(wasm.functions.count() + wasm.imported_functions_count); @@ -2289,16 +2293,23 @@ fn allocateAtoms(wasm: *Wasm) !void { if (sym.isDead()) { // Dead symbols must be unlinked from the linked-list to prevent them // from being emit into the binary. - if (atom.prev) |prev_index| { - const prev = wasm.getAtomPtr(prev_index); - prev.next = atom.next; + if (atom.next) |next_index| { + const next = wasm.getAtomPtr(next_index); + next.prev = atom.prev; + } else if (entry.value_ptr.* == atom_index) { + // When the atom is dead and is also the first atom retrieved from wasm.atoms(index) we update + // the entry to point it to the previous atom to ensure we do not start with a dead symbol that + // was removed and therefore do not emit any code at all. + if (atom.prev) |prev| { + entry.value_ptr.* = prev; + } } - atom_index = atom.next orelse { - atom.prev = null; + atom_index = atom.prev orelse { + atom.next = null; break; }; - const next = wasm.getAtomPtr(atom_index); - next.prev = atom.prev; + const prev = wasm.getAtomPtr(atom_index); + prev.next = atom.next; atom.prev = null; atom.next = null; continue; @@ -2787,6 +2798,7 @@ fn mergeSections(wasm: *Wasm) !void { // We found an alias to the same function, discard this symbol in favor of // the original symbol and point the discard function to it. This ensures // we only emit a single function, instead of duplicates. + symbol.unmark(); try wasm.discarded.putNoClobber( wasm.base.allocator, sym_loc, @@ -2815,7 +2827,6 @@ fn mergeSections(wasm: *Wasm) !void { // For any removed duplicates, remove them from the resolved symbols list for (removed_duplicates.items) |sym_loc| { assert(wasm.resolved_symbols.swapRemove(sym_loc)); - sym_loc.getSymbol(wasm).unmark(); } log.debug("Merged ({d}) functions", .{wasm.functions.count()}); @@ -3741,8 +3752,8 @@ fn writeToFile( binary_bytes.items, header_offset, .type, - @as(u32, @intCast(binary_bytes.items.len - header_offset - header_size)), - @as(u32, @intCast(wasm.func_types.items.len)), + @intCast(binary_bytes.items.len - header_offset - header_size), + @intCast(wasm.func_types.items.len), ); section_count += 1; } @@ -3774,8 +3785,8 @@ fn writeToFile( binary_bytes.items, header_offset, .import, - @as(u32, @intCast(binary_bytes.items.len - header_offset - header_size)), - @as(u32, @intCast(wasm.imports.count() + @intFromBool(import_memory))), + @intCast(binary_bytes.items.len - header_offset - header_size), + @intCast(wasm.imports.count() + @intFromBool(import_memory)), ); section_count += 1; } @@ -3791,8 +3802,8 @@ fn writeToFile( binary_bytes.items, header_offset, .function, - @as(u32, @intCast(binary_bytes.items.len - header_offset - header_size)), - @as(u32, @intCast(wasm.functions.count())), + @intCast(binary_bytes.items.len - header_offset - header_size), + @intCast(wasm.functions.count()), ); section_count += 1; } @@ -3810,8 +3821,8 @@ fn writeToFile( binary_bytes.items, header_offset, .table, - @as(u32, @intCast(binary_bytes.items.len - header_offset - header_size)), - @as(u32, @intCast(wasm.tables.items.len)), + @intCast(binary_bytes.items.len - header_offset - header_size), + @intCast(wasm.tables.items.len), ); section_count += 1; } @@ -3825,8 +3836,8 @@ fn writeToFile( binary_bytes.items, header_offset, .memory, - @as(u32, @intCast(binary_bytes.items.len - header_offset - header_size)), - @as(u32, 1), // wasm currently only supports 1 linear memory segment + @intCast(binary_bytes.items.len - header_offset - header_size), + 1, // wasm currently only supports 1 linear memory segment ); section_count += 1; } @@ -3845,8 +3856,8 @@ fn writeToFile( binary_bytes.items, header_offset, .global, - @as(u32, @intCast(binary_bytes.items.len - header_offset - header_size)), - @as(u32, @intCast(wasm.wasm_globals.items.len)), + @intCast(binary_bytes.items.len - header_offset - header_size), + @intCast(wasm.wasm_globals.items.len), ); section_count += 1; } @@ -3874,8 +3885,8 @@ fn writeToFile( binary_bytes.items, header_offset, .@"export", - @as(u32, @intCast(binary_bytes.items.len - header_offset - header_size)), - @as(u32, @intCast(wasm.exports.items.len)) + @intFromBool(export_memory), + @intCast(binary_bytes.items.len - header_offset - header_size), + @intCast(wasm.exports.items.len + @intFromBool(export_memory)), ); section_count += 1; } @@ -3918,8 +3929,8 @@ fn writeToFile( binary_bytes.items, header_offset, .element, - @as(u32, @intCast(binary_bytes.items.len - header_offset - header_size)), - @as(u32, 1), + @intCast(binary_bytes.items.len - header_offset - header_size), + 1, ); section_count += 1; } @@ -3932,8 +3943,8 @@ fn writeToFile( binary_bytes.items, header_offset, .data_count, - @as(u32, @intCast(binary_bytes.items.len - header_offset - header_size)), - @as(u32, @intCast(data_segments_count)), + @intCast(binary_bytes.items.len - header_offset - header_size), + @intCast(data_segments_count), ); } @@ -3978,7 +3989,7 @@ fn writeToFile( header_offset, .code, code_section_size, - @as(u32, @intCast(wasm.functions.count())), + @intCast(wasm.functions.count()), ); code_section_index = section_count; section_count += 1; @@ -4049,8 +4060,8 @@ fn writeToFile( binary_bytes.items, header_offset, .data, - @as(u32, @intCast(binary_bytes.items.len - header_offset - header_size)), - @as(u32, @intCast(segment_count)), + @intCast(binary_bytes.items.len - header_offset - header_size), + @intCast(segment_count), ); data_section_index = section_count; section_count += 1; @@ -4597,6 +4608,14 @@ fn linkWithLLD(wasm: *Wasm, comp: *Compilation, prog_node: *std.Progress.Node) ! try argv.append("--export-table"); } + if (wasm.base.options.gc_sections) |gc| { + // For wasm-ld we only need to specify '--no-gc-sections' when the user explicitly + // specified it as garbage collection is enabled by default. + if (!gc) { + try argv.append("--no-gc-sections"); + } + } + if (wasm.base.options.strip) { try argv.append("-s"); } @@ -4882,7 +4901,7 @@ fn emitLinkSection(wasm: *Wasm, binary_bytes: *std.ArrayList(u8), symbol_table: try wasm.emitSymbolTable(binary_bytes, symbol_table); try wasm.emitSegmentInfo(binary_bytes); - const size = @as(u32, @intCast(binary_bytes.items.len - offset - 6)); + const size: u32 = @intCast(binary_bytes.items.len - offset - 6); try writeCustomSectionHeader(binary_bytes.items, offset, size); } @@ -4930,7 +4949,7 @@ fn emitSymbolTable(wasm: *Wasm, binary_bytes: *std.ArrayList(u8), symbol_table: } var buf: [10]u8 = undefined; - leb.writeUnsignedFixed(5, buf[0..5], @as(u32, @intCast(binary_bytes.items.len - table_offset + 5))); + leb.writeUnsignedFixed(5, buf[0..5], @intCast(binary_bytes.items.len - table_offset + 5)); leb.writeUnsignedFixed(5, buf[5..], symbol_count); try binary_bytes.insertSlice(table_offset, &buf); } @@ -5013,7 +5032,7 @@ fn emitCodeRelocations( var buf: [5]u8 = undefined; leb.writeUnsignedFixed(5, &buf, count); try binary_bytes.insertSlice(reloc_start, &buf); - const size = @as(u32, @intCast(binary_bytes.items.len - header_offset - 6)); + const size: u32 = @intCast(binary_bytes.items.len - header_offset - 6); try writeCustomSectionHeader(binary_bytes.items, header_offset, size); } @@ -5123,10 +5142,14 @@ pub fn storeDeclType(wasm: *Wasm, decl_index: InternPool.DeclIndex, func_type: s fn markReferences(wasm: *Wasm) !void { const tracy = trace(@src()); defer tracy.end(); + const do_garbage_collect = wasm.base.options.gc_sections orelse + (wasm.base.options.output_mode != .Obj); + for (wasm.resolved_symbols.keys()) |sym_loc| { const sym = sym_loc.getSymbol(wasm); - if (sym.isExported(wasm.base.options.rdynamic) or sym.isNoStrip()) { + if (sym.isExported(wasm.base.options.rdynamic) or sym.isNoStrip() or !do_garbage_collect) { try wasm.mark(sym_loc); + continue; } // Debug sections may require to be parsed and marked when it contains @@ -5139,7 +5162,7 @@ fn markReferences(wasm: *Wasm) !void { for (atom.relocs.items) |reloc| { const target_loc: SymbolLoc = .{ .index = reloc.index, .file = atom.file }; const target_sym = target_loc.getSymbol(wasm); - if (target_sym.isAlive()) { + if (target_sym.isAlive() or !do_garbage_collect) { sym.mark(); continue; // Skip all other relocations as this debug atom is already marked now } diff --git a/src/link/Wasm/Object.zig b/src/link/Wasm/Object.zig index 858d52f836..610c534c88 100644 --- a/src/link/Wasm/Object.zig +++ b/src/link/Wasm/Object.zig @@ -920,7 +920,7 @@ pub fn parseSymbolIntoAtom(object: *Object, object_index: u16, symbol_index: u32 const start = searchRelocStart(relocations, relocatable_data.offset); const len = searchRelocEnd(relocations[start..], relocatable_data.offset + atom.size); atom.relocs = std.ArrayListUnmanaged(types.Relocation).fromOwnedSlice(relocations[start..][0..len]); - for (atom.relocs.items) |*reloc| { + for (atom.relocs.items) |reloc| { switch (reloc.relocation_type) { .R_WASM_TABLE_INDEX_I32, .R_WASM_TABLE_INDEX_I64, From 4115f70cd3ac30027618a56976207c3bde378d85 Mon Sep 17 00:00:00 2001 From: Luuk de Gram Date: Tue, 28 Nov 2023 18:32:31 +0100 Subject: [PATCH 10/10] test/link: update wasm linker tests Disable garbage-collection for certain tests to ensure the tested sections are being emitted. --- test/link/wasm/bss/build.zig | 2 ++ test/link/wasm/function-table/build.zig | 3 +++ test/link/wasm/segments/build.zig | 1 + test/link/wasm/stack_pointer/build.zig | 1 + 4 files changed, 7 insertions(+) diff --git a/test/link/wasm/bss/build.zig b/test/link/wasm/bss/build.zig index 1bc059acde..faf8202cd9 100644 --- a/test/link/wasm/bss/build.zig +++ b/test/link/wasm/bss/build.zig @@ -26,6 +26,7 @@ fn add(b: *std.Build, test_step: *std.Build.Step, optimize_mode: std.builtin.Opt lib.strip = false; // to make sure the bss segment is emitted, we must import memory lib.import_memory = true; + lib.link_gc_sections = false; const check_lib = lib.checkObject(); @@ -73,6 +74,7 @@ fn add(b: *std.Build, test_step: *std.Build.Step, optimize_mode: std.builtin.Opt lib.strip = false; // to make sure the bss segment is emitted, we must import memory lib.import_memory = true; + lib.link_gc_sections = false; const check_lib = lib.checkObject(); check_lib.checkStart(); diff --git a/test/link/wasm/function-table/build.zig b/test/link/wasm/function-table/build.zig index 906a255642..acf7043476 100644 --- a/test/link/wasm/function-table/build.zig +++ b/test/link/wasm/function-table/build.zig @@ -23,6 +23,7 @@ fn add(b: *std.Build, test_step: *std.Build.Step, optimize: std.builtin.Optimize import_table.use_llvm = false; import_table.use_lld = false; import_table.import_table = true; + import_table.link_gc_sections = false; const export_table = b.addExecutable(.{ .name = "export_table", @@ -34,6 +35,7 @@ fn add(b: *std.Build, test_step: *std.Build.Step, optimize: std.builtin.Optimize export_table.use_llvm = false; export_table.use_lld = false; export_table.export_table = true; + export_table.link_gc_sections = false; const regular_table = b.addExecutable(.{ .name = "regular_table", @@ -44,6 +46,7 @@ fn add(b: *std.Build, test_step: *std.Build.Step, optimize: std.builtin.Optimize regular_table.entry = .disabled; regular_table.use_llvm = false; regular_table.use_lld = false; + regular_table.link_gc_sections = false; // Ensure function table is not empty const check_import = import_table.checkObject(); const check_export = export_table.checkObject(); diff --git a/test/link/wasm/segments/build.zig b/test/link/wasm/segments/build.zig index 21b954a902..64d25d3fae 100644 --- a/test/link/wasm/segments/build.zig +++ b/test/link/wasm/segments/build.zig @@ -23,6 +23,7 @@ fn add(b: *std.Build, test_step: *std.Build.Step, optimize: std.builtin.Optimize lib.use_llvm = false; lib.use_lld = false; lib.strip = false; + lib.link_gc_sections = false; // so data is not garbage collected and we can verify data section b.installArtifact(lib); const check_lib = lib.checkObject(); diff --git a/test/link/wasm/stack_pointer/build.zig b/test/link/wasm/stack_pointer/build.zig index 00ef54c052..da54c140ca 100644 --- a/test/link/wasm/stack_pointer/build.zig +++ b/test/link/wasm/stack_pointer/build.zig @@ -24,6 +24,7 @@ fn add(b: *std.Build, test_step: *std.Build.Step, optimize: std.builtin.Optimize lib.use_lld = false; lib.strip = false; lib.stack_size = std.wasm.page_size * 2; // set an explicit stack size + lib.link_gc_sections = false; b.installArtifact(lib); const check_lib = lib.checkObject();