diff --git a/src/link/Wasm.zig b/src/link/Wasm.zig index 82f9f9f20d..513922f247 100644 --- a/src/link/Wasm.zig +++ b/src/link/Wasm.zig @@ -110,7 +110,7 @@ func_types: std.ArrayListUnmanaged(std.wasm.Type) = .{}, /// Output function section where the key is the original /// function index and the value is function. /// This allows us to map multiple symbols to the same function. -functions: std.AutoArrayHashMapUnmanaged(struct { file: ?u16, index: u32 }, std.wasm.Func) = .{}, +functions: std.AutoArrayHashMapUnmanaged(struct { file: ?u16, index: u32 }, struct { func: std.wasm.Func, sym_index: u32 }) = .{}, /// Output global section wasm_globals: std.ArrayListUnmanaged(std.wasm.Global) = .{}, /// Memory section @@ -1242,6 +1242,14 @@ fn resolveLazySymbols(wasm: *Wasm) !void { if (wasm.undefs.fetchSwapRemove(name_offset)) |kv| { const loc = try wasm.createSyntheticSymbolOffset(name_offset, .global); try wasm.discarded.putNoClobber(wasm.base.allocator, kv.value, loc); + _ = wasm.resolved_symbols.swapRemove(kv.value); + const symbol = loc.getSymbol(wasm); + symbol.setFlag(.WASM_SYM_VISIBILITY_HIDDEN); + symbol.index = @intCast(wasm.imported_globals_count + wasm.wasm_globals.items.len); + try wasm.wasm_globals.append(wasm.base.allocator, .{ + .global_type = .{ .valtype = .i32, .mutable = true }, + .init = .{ .i32_const = undefined }, + }); } } } @@ -1301,6 +1309,35 @@ pub fn deinit(wasm: *Wasm) void { archive.deinit(gpa); } + // For decls and anon decls we free the memory of its atoms. + // The memory of atoms parsed from object files is managed by + // the object file itself, and therefore we can skip those. + { + var it = wasm.decls.valueIterator(); + while (it.next()) |atom_index_ptr| { + const atom = wasm.getAtomPtr(atom_index_ptr.*); + for (atom.locals.items) |local_index| { + const local_atom = wasm.getAtomPtr(local_index); + local_atom.deinit(gpa); + } + atom.deinit(gpa); + } + } + { + for (wasm.anon_decls.values()) |atom_index| { + const atom = wasm.getAtomPtr(atom_index); + for (atom.locals.items) |local_index| { + const local_atom = wasm.getAtomPtr(local_index); + local_atom.deinit(gpa); + } + atom.deinit(gpa); + } + } + for (wasm.synthetic_functions.items) |atom_index| { + const atom = wasm.getAtomPtr(atom_index); + atom.deinit(gpa); + } + wasm.decls.deinit(gpa); wasm.anon_decls.deinit(gpa); wasm.atom_types.deinit(gpa); @@ -1313,9 +1350,6 @@ pub fn deinit(wasm: *Wasm) void { wasm.symbol_atom.deinit(gpa); wasm.export_names.deinit(gpa); wasm.atoms.deinit(gpa); - for (wasm.managed_atoms.items) |*managed_atom| { - managed_atom.deinit(wasm); - } wasm.managed_atoms.deinit(gpa); wasm.segments.deinit(gpa); wasm.data_segments.deinit(gpa); @@ -1550,7 +1584,7 @@ fn getFunctionSignature(wasm: *const Wasm, loc: SymbolLoc) std.wasm.Type { const ty_index = wasm.imports.get(loc).?.kind.function; return wasm.func_types.items[ty_index]; } - return wasm.func_types.items[wasm.functions.get(.{ .file = loc.file, .index = loc.index }).?.type_index]; + return wasm.func_types.items[wasm.functions.get(.{ .file = loc.file, .index = symbol.index }).?.func.type_index]; } /// Lowers a constant typed value to a local symbol and atom. @@ -1973,10 +2007,16 @@ pub fn addTableFunction(wasm: *Wasm, symbol_index: u32) !void { /// Starts at offset 1, where the value `0` represents an unresolved function pointer /// or null-pointer fn mapFunctionTable(wasm: *Wasm) void { - var it = wasm.function_table.valueIterator(); + var it = wasm.function_table.iterator(); var index: u32 = 1; - while (it.next()) |value_ptr| : (index += 1) { - value_ptr.* = index; + while (it.next()) |entry| { + const symbol = entry.key_ptr.*.getSymbol(wasm); + if (symbol.isAlive()) { + entry.value_ptr.* = index; + index += 1; + } else { + wasm.function_table.removeByPtr(entry.key_ptr); + } } if (wasm.base.options.import_table or wasm.base.options.output_mode == .Obj) { @@ -2094,20 +2134,28 @@ const Kind = union(enum) { fn parseAtom(wasm: *Wasm, atom_index: Atom.Index, kind: Kind) !void { const atom = wasm.getAtomPtr(atom_index); const symbol = (SymbolLoc{ .file = null, .index = atom.sym_index }).getSymbol(wasm); + const do_garbage_collect = wasm.base.options.gc_sections orelse + (wasm.base.options.output_mode != .Obj); + + if (symbol.isDead() and do_garbage_collect) { + // Prevent unreferenced symbols from being parsed. + return; + } + const final_index: u32 = switch (kind) { .function => result: { - const index = @as(u32, @intCast(wasm.functions.count() + wasm.imported_functions_count)); + const index: u32 = @intCast(wasm.functions.count() + wasm.imported_functions_count); const type_index = wasm.atom_types.get(atom_index).?; try wasm.functions.putNoClobber( wasm.base.allocator, .{ .file = null, .index = index }, - .{ .type_index = type_index }, + .{ .func = .{ .type_index = type_index }, .sym_index = atom.sym_index }, ); symbol.tag = .function; symbol.index = index; if (wasm.code_section_index == null) { - wasm.code_section_index = @as(u32, @intCast(wasm.segments.items.len)); + wasm.code_section_index = @intCast(wasm.segments.items.len); try wasm.segments.append(wasm.base.allocator, .{ .alignment = atom.alignment, .size = atom.size, @@ -2145,12 +2193,12 @@ fn parseAtom(wasm: *Wasm, atom_index: Atom.Index, kind: Kind) !void { const index = gop.value_ptr.*; wasm.segments.items[index].size += atom.size; - symbol.index = @as(u32, @intCast(wasm.segment_info.getIndex(index).?)); + symbol.index = @intCast(wasm.segment_info.getIndex(index).?); // segment info already exists, so free its memory wasm.base.allocator.free(segment_name); break :result index; } else { - const index = @as(u32, @intCast(wasm.segments.items.len)); + const index: u32 = @intCast(wasm.segments.items.len); var flags: u32 = 0; if (wasm.base.options.shared_memory) { flags |= @intFromEnum(Segment.Flag.WASM_DATA_SEGMENT_IS_PASSIVE); @@ -2163,7 +2211,7 @@ fn parseAtom(wasm: *Wasm, atom_index: Atom.Index, kind: Kind) !void { }); gop.value_ptr.* = index; - const info_index = @as(u32, @intCast(wasm.segment_info.count())); + const info_index: u32 = @intCast(wasm.segment_info.count()); try wasm.segment_info.put(wasm.base.allocator, index, segment_info); symbol.index = info_index; break :result index; @@ -2234,14 +2282,37 @@ fn allocateAtoms(wasm: *Wasm) !void { while (true) { const atom = wasm.getAtomPtr(atom_index); const symbol_loc = atom.symbolLoc(); - if (wasm.code_section_index) |index| { - if (index == entry.key_ptr.*) { - if (!wasm.resolved_symbols.contains(symbol_loc)) { - // only allocate resolved function body's. - atom_index = atom.prev orelse break; - continue; + // Ensure we get the original symbol, so we verify the correct symbol on whether + // it is dead or not and ensure an atom is removed when dead. + // This is required as we may have parsed aliases into atoms. + const sym = if (symbol_loc.file) |object_index| sym: { + const object = wasm.objects.items[object_index]; + break :sym object.symtable[symbol_loc.index]; + } else wasm.symbols.items[symbol_loc.index]; + + if (sym.isDead()) { + // Dead symbols must be unlinked from the linked-list to prevent them + // from being emit into the binary. + if (atom.next) |next_index| { + const next = wasm.getAtomPtr(next_index); + next.prev = atom.prev; + } else if (entry.value_ptr.* == atom_index) { + // When the atom is dead and is also the first atom retrieved from wasm.atoms(index) we update + // the entry to point it to the previous atom to ensure we do not start with a dead symbol that + // was removed and therefore do not emit any code at all. + if (atom.prev) |prev| { + entry.value_ptr.* = prev; } } + atom_index = atom.prev orelse { + atom.next = null; + break; + }; + const prev = wasm.getAtomPtr(atom_index); + prev.next = atom.next; + atom.prev = null; + atom.next = null; + continue; } offset = @intCast(atom.alignment.forward(offset)); atom.offset = offset; @@ -2262,8 +2333,10 @@ fn allocateAtoms(wasm: *Wasm) !void { fn allocateVirtualAddresses(wasm: *Wasm) void { for (wasm.resolved_symbols.keys()) |loc| { const symbol = loc.getSymbol(wasm); - if (symbol.tag != .data) { - continue; // only data symbols have virtual addresses + if (symbol.tag != .data or symbol.isDead()) { + // Only data symbols have virtual addresses. + // Dead symbols do not get allocated, so we don't need to set their virtual address either. + continue; } const atom_index = wasm.symbol_atom.get(loc) orelse { // synthetic symbol that does not contain an atom @@ -2350,11 +2423,17 @@ fn setupInitFunctions(wasm: *Wasm) !void { .file = @as(u16, @intCast(file_index)), .priority = init_func.priority, }); + try wasm.mark(.{ .index = init_func.symbol_index, .file = @intCast(file_index) }); } } // sort the initfunctions based on their priority mem.sort(InitFuncLoc, wasm.init_funcs.items, {}, InitFuncLoc.lessThan); + + if (wasm.init_funcs.items.len > 0) { + const loc = wasm.findGlobalSymbol("__wasm_call_ctors").?; + try wasm.mark(loc); + } } /// Generates an atom containing the global error set' size. @@ -2377,7 +2456,7 @@ fn setupErrorsLen(wasm: *Wasm) !void { prev_atom.next = atom.next; atom.prev = null; } - atom.deinit(wasm); + atom.deinit(wasm.base.allocator); break :blk index; } else new_atom: { const atom_index: Atom.Index = @intCast(wasm.managed_atoms.items.len); @@ -2422,7 +2501,7 @@ fn initializeCallCtorsFunction(wasm: *Wasm) !void { // call constructors for (wasm.init_funcs.items) |init_func_loc| { const symbol = init_func_loc.getSymbol(wasm); - const func = wasm.functions.values()[symbol.index - wasm.imported_functions_count]; + const func = wasm.functions.values()[symbol.index - wasm.imported_functions_count].func; const ty = wasm.func_types.items[func.type_index]; // Call function by its function index @@ -2455,13 +2534,16 @@ fn createSyntheticFunction( const loc = wasm.findGlobalSymbol(symbol_name) orelse try wasm.createSyntheticSymbol(symbol_name, .function); const symbol = loc.getSymbol(wasm); + if (symbol.isDead()) { + return; + } const ty_index = try wasm.putOrGetFuncType(func_ty); // create function with above type const func_index = wasm.imported_functions_count + @as(u32, @intCast(wasm.functions.count())); try wasm.functions.putNoClobber( wasm.base.allocator, .{ .file = null, .index = func_index }, - .{ .type_index = ty_index }, + .{ .func = .{ .type_index = ty_index }, .sym_index = loc.index }, ); symbol.index = func_index; @@ -2477,6 +2559,7 @@ fn createSyntheticFunction( .next = null, .prev = null, .code = function_body.moveToUnmanaged(), + .original_offset = 0, }; try wasm.appendAtomAtIndex(wasm.code_section_index.?, atom_index); try wasm.symbol_atom.putNoClobber(wasm.base.allocator, loc, atom_index); @@ -2513,6 +2596,7 @@ pub fn createFunction( .prev = null, .code = function_body.moveToUnmanaged(), .relocs = relocations.moveToUnmanaged(), + .original_offset = 0, }; const symbol = loc.getSymbol(wasm); symbol.setFlag(.WASM_SYM_VISIBILITY_HIDDEN); // ensure function does not get exported @@ -2614,21 +2698,21 @@ fn setupImports(wasm: *Wasm) !void { } for (wasm.resolved_symbols.keys()) |symbol_loc| { - if (symbol_loc.file == null) { + const file_index = symbol_loc.file orelse { // imports generated by Zig code are already in the `import` section continue; - } + }; const symbol = symbol_loc.getSymbol(wasm); - if (std.mem.eql(u8, symbol_loc.getName(wasm), "__indirect_function_table")) { - continue; - } - if (!symbol.requiresImport()) { + if (symbol.isDead() or + !symbol.requiresImport() or + std.mem.eql(u8, symbol_loc.getName(wasm), "__indirect_function_table")) + { continue; } log.debug("Symbol '{s}' will be imported from the host", .{symbol_loc.getName(wasm)}); - const object = wasm.objects.items[symbol_loc.file.?]; + const object = wasm.objects.items[file_index]; const import = object.findImport(symbol.tag.externalType(), symbol.index); // We copy the import to a new import to ensure the names contain references @@ -2680,6 +2764,9 @@ fn setupImports(wasm: *Wasm) !void { /// Takes the global, function and table section from each linked object file /// and merges it into a single section for each. fn mergeSections(wasm: *Wasm) !void { + var removed_duplicates = std.ArrayList(SymbolLoc).init(wasm.base.allocator); + defer removed_duplicates.deinit(); + for (wasm.resolved_symbols.keys()) |sym_loc| { if (sym_loc.file == null) { // Zig code-generated symbols are already within the sections and do not @@ -2689,7 +2776,11 @@ fn mergeSections(wasm: *Wasm) !void { const object = &wasm.objects.items[sym_loc.file.?]; const symbol = &object.symtable[sym_loc.index]; - if (symbol.isUndefined() or (symbol.tag != .function and symbol.tag != .global and symbol.tag != .table)) { + + if (symbol.isDead() or + symbol.isUndefined() or + (symbol.tag != .function and symbol.tag != .global and symbol.tag != .table)) + { // Skip undefined symbols as they go in the `import` section // Also skip symbols that do not need to have a section merged. continue; @@ -2703,9 +2794,20 @@ fn mergeSections(wasm: *Wasm) !void { wasm.base.allocator, .{ .file = sym_loc.file, .index = symbol.index }, ); - if (!gop.found_existing) { - gop.value_ptr.* = object.functions[index]; + if (gop.found_existing) { + // We found an alias to the same function, discard this symbol in favor of + // the original symbol and point the discard function to it. This ensures + // we only emit a single function, instead of duplicates. + symbol.unmark(); + try wasm.discarded.putNoClobber( + wasm.base.allocator, + sym_loc, + .{ .file = gop.key_ptr.*.file, .index = gop.value_ptr.*.sym_index }, + ); + try removed_duplicates.append(sym_loc); + continue; } + gop.value_ptr.* = .{ .func = object.functions[index], .sym_index = sym_loc.index }; symbol.index = @as(u32, @intCast(gop.index)) + wasm.imported_functions_count; }, .global => { @@ -2722,6 +2824,11 @@ fn mergeSections(wasm: *Wasm) !void { } } + // For any removed duplicates, remove them from the resolved symbols list + for (removed_duplicates.items) |sym_loc| { + assert(wasm.resolved_symbols.swapRemove(sym_loc)); + } + log.debug("Merged ({d}) functions", .{wasm.functions.count()}); log.debug("Merged ({d}) globals", .{wasm.wasm_globals.items.len}); log.debug("Merged ({d}) tables", .{wasm.tables.items.len}); @@ -2745,8 +2852,8 @@ fn mergeTypes(wasm: *Wasm) !void { } const object = wasm.objects.items[sym_loc.file.?]; const symbol = object.symtable[sym_loc.index]; - if (symbol.tag != .function) { - // Only functions have types + if (symbol.tag != .function or symbol.isDead()) { + // Only functions have types. Only retrieve the type of referenced functions. continue; } @@ -2757,7 +2864,7 @@ fn mergeTypes(wasm: *Wasm) !void { import.kind.function = try wasm.putOrGetFuncType(original_type); } else if (!dirty.contains(symbol.index)) { log.debug("Adding type from function '{s}'", .{sym_loc.getName(wasm)}); - const func = &wasm.functions.values()[symbol.index - wasm.imported_functions_count]; + const func = &wasm.functions.values()[symbol.index - wasm.imported_functions_count].func; func.type_index = try wasm.putOrGetFuncType(object.func_types[func.type_index]); dirty.putAssumeCapacityNoClobber(symbol.index, {}); } @@ -2980,14 +3087,14 @@ fn setupMemory(wasm: *Wasm) !void { /// From a given object's index and the index of the segment, returns the corresponding /// index of the segment within the final data section. When the segment does not yet /// exist, a new one will be initialized and appended. The new index will be returned in that case. -pub fn getMatchingSegment(wasm: *Wasm, object_index: u16, relocatable_index: u32) !?u32 { +pub fn getMatchingSegment(wasm: *Wasm, object_index: u16, symbol_index: u32) !u32 { const object: Object = wasm.objects.items[object_index]; - const relocatable_data = object.relocatable_data[relocatable_index]; + const symbol = object.symtable[symbol_index]; const index = @as(u32, @intCast(wasm.segments.items.len)); - switch (relocatable_data.type) { + switch (symbol.tag) { .data => { - const segment_info = object.segment_info[relocatable_data.index]; + const segment_info = object.segment_info[symbol.index]; const merge_segment = wasm.base.options.output_mode != .Obj; const result = try wasm.data_segments.getOrPut(wasm.base.allocator, segment_info.outputName(merge_segment)); if (!result.found_existing) { @@ -3002,70 +3109,75 @@ pub fn getMatchingSegment(wasm: *Wasm, object_index: u16, relocatable_index: u32 .offset = 0, .flags = flags, }); + try wasm.segment_info.putNoClobber(wasm.base.allocator, index, .{ + .name = try wasm.base.allocator.dupe(u8, segment_info.name), + .alignment = segment_info.alignment, + .flags = segment_info.flags, + }); return index; } else return result.value_ptr.*; }, - .code => return wasm.code_section_index orelse blk: { + .function => return wasm.code_section_index orelse blk: { wasm.code_section_index = index; try wasm.appendDummySegment(); break :blk index; }, - .debug => { - const debug_name = object.getDebugName(relocatable_data); - if (mem.eql(u8, debug_name, ".debug_info")) { + .section => { + const section_name = object.string_table.get(symbol.name); + if (mem.eql(u8, section_name, ".debug_info")) { return wasm.debug_info_index orelse blk: { wasm.debug_info_index = index; try wasm.appendDummySegment(); break :blk index; }; - } else if (mem.eql(u8, debug_name, ".debug_line")) { + } else if (mem.eql(u8, section_name, ".debug_line")) { return wasm.debug_line_index orelse blk: { wasm.debug_line_index = index; try wasm.appendDummySegment(); break :blk index; }; - } else if (mem.eql(u8, debug_name, ".debug_loc")) { + } else if (mem.eql(u8, section_name, ".debug_loc")) { return wasm.debug_loc_index orelse blk: { wasm.debug_loc_index = index; try wasm.appendDummySegment(); break :blk index; }; - } else if (mem.eql(u8, debug_name, ".debug_ranges")) { + } else if (mem.eql(u8, section_name, ".debug_ranges")) { return wasm.debug_line_index orelse blk: { wasm.debug_ranges_index = index; try wasm.appendDummySegment(); break :blk index; }; - } else if (mem.eql(u8, debug_name, ".debug_pubnames")) { + } else if (mem.eql(u8, section_name, ".debug_pubnames")) { return wasm.debug_pubnames_index orelse blk: { wasm.debug_pubnames_index = index; try wasm.appendDummySegment(); break :blk index; }; - } else if (mem.eql(u8, debug_name, ".debug_pubtypes")) { + } else if (mem.eql(u8, section_name, ".debug_pubtypes")) { return wasm.debug_pubtypes_index orelse blk: { wasm.debug_pubtypes_index = index; try wasm.appendDummySegment(); break :blk index; }; - } else if (mem.eql(u8, debug_name, ".debug_abbrev")) { + } else if (mem.eql(u8, section_name, ".debug_abbrev")) { return wasm.debug_abbrev_index orelse blk: { wasm.debug_abbrev_index = index; try wasm.appendDummySegment(); break :blk index; }; - } else if (mem.eql(u8, debug_name, ".debug_str")) { + } else if (mem.eql(u8, section_name, ".debug_str")) { return wasm.debug_str_index orelse blk: { wasm.debug_str_index = index; try wasm.appendDummySegment(); break :blk index; }; } else { - log.warn("found unknown debug section '{s}'", .{debug_name}); - log.warn(" debug section will be skipped", .{}); - return null; + log.warn("found unknown section '{s}'", .{section_name}); + return error.UnexpectedValue; } }, + else => unreachable, } } @@ -3108,6 +3220,7 @@ pub fn getErrorTableSymbol(wasm: *Wasm) !u32 { .virtual_address = undefined, }; symbol.setFlag(.WASM_SYM_VISIBILITY_HIDDEN); + symbol.mark(); try wasm.resolved_symbols.put(wasm.base.allocator, atom.symbolLoc(), {}); @@ -3140,6 +3253,7 @@ fn populateErrorNameTable(wasm: *Wasm) !void { .virtual_address = undefined, }; names_symbol.setFlag(.WASM_SYM_VISIBILITY_HIDDEN); + names_symbol.mark(); log.debug("Populating error names", .{}); @@ -3431,18 +3545,15 @@ fn linkWithZld(wasm: *Wasm, comp: *Compilation, prog_node: *std.Progress.Node) l try wasm.setupInitFunctions(); try wasm.setupStart(); + + try wasm.markReferences(); try wasm.setupImports(); - - for (wasm.objects.items, 0..) |*object, object_index| { - try object.parseIntoAtoms(gpa, @as(u16, @intCast(object_index)), wasm); - } - + try wasm.mergeSections(); + try wasm.mergeTypes(); try wasm.allocateAtoms(); try wasm.setupMemory(); wasm.allocateVirtualAddresses(); wasm.mapFunctionTable(); - try wasm.mergeSections(); - try wasm.mergeTypes(); try wasm.initializeCallCtorsFunction(); try wasm.setupInitMemoryFunction(); try wasm.setupTLSRelocationsFunction(); @@ -3519,8 +3630,9 @@ pub fn flushModule(wasm: *Wasm, comp: *Compilation, prog_node: *std.Progress.Nod // So we can rebuild the binary file on each incremental update defer wasm.resetState(); try wasm.setupInitFunctions(); - try wasm.setupErrorsLen(); try wasm.setupStart(); + try wasm.markReferences(); + try wasm.setupErrorsLen(); try wasm.setupImports(); if (wasm.base.options.module) |mod| { var decl_it = wasm.decls.iterator(); @@ -3577,16 +3689,12 @@ pub fn flushModule(wasm: *Wasm, comp: *Compilation, prog_node: *std.Progress.Nod } } - for (wasm.objects.items, 0..) |*object, object_index| { - try object.parseIntoAtoms(wasm.base.allocator, @as(u16, @intCast(object_index)), wasm); - } - + try wasm.mergeSections(); + try wasm.mergeTypes(); try wasm.allocateAtoms(); try wasm.setupMemory(); wasm.allocateVirtualAddresses(); wasm.mapFunctionTable(); - try wasm.mergeSections(); - try wasm.mergeTypes(); try wasm.initializeCallCtorsFunction(); try wasm.setupInitMemoryFunction(); try wasm.setupTLSRelocationsFunction(); @@ -3644,8 +3752,8 @@ fn writeToFile( binary_bytes.items, header_offset, .type, - @as(u32, @intCast(binary_bytes.items.len - header_offset - header_size)), - @as(u32, @intCast(wasm.func_types.items.len)), + @intCast(binary_bytes.items.len - header_offset - header_size), + @intCast(wasm.func_types.items.len), ); section_count += 1; } @@ -3677,8 +3785,8 @@ fn writeToFile( binary_bytes.items, header_offset, .import, - @as(u32, @intCast(binary_bytes.items.len - header_offset - header_size)), - @as(u32, @intCast(wasm.imports.count() + @intFromBool(import_memory))), + @intCast(binary_bytes.items.len - header_offset - header_size), + @intCast(wasm.imports.count() + @intFromBool(import_memory)), ); section_count += 1; } @@ -3687,15 +3795,15 @@ fn writeToFile( if (wasm.functions.count() != 0) { const header_offset = try reserveVecSectionHeader(&binary_bytes); for (wasm.functions.values()) |function| { - try leb.writeULEB128(binary_writer, function.type_index); + try leb.writeULEB128(binary_writer, function.func.type_index); } try writeVecSectionHeader( binary_bytes.items, header_offset, .function, - @as(u32, @intCast(binary_bytes.items.len - header_offset - header_size)), - @as(u32, @intCast(wasm.functions.count())), + @intCast(binary_bytes.items.len - header_offset - header_size), + @intCast(wasm.functions.count()), ); section_count += 1; } @@ -3713,8 +3821,8 @@ fn writeToFile( binary_bytes.items, header_offset, .table, - @as(u32, @intCast(binary_bytes.items.len - header_offset - header_size)), - @as(u32, @intCast(wasm.tables.items.len)), + @intCast(binary_bytes.items.len - header_offset - header_size), + @intCast(wasm.tables.items.len), ); section_count += 1; } @@ -3728,8 +3836,8 @@ fn writeToFile( binary_bytes.items, header_offset, .memory, - @as(u32, @intCast(binary_bytes.items.len - header_offset - header_size)), - @as(u32, 1), // wasm currently only supports 1 linear memory segment + @intCast(binary_bytes.items.len - header_offset - header_size), + 1, // wasm currently only supports 1 linear memory segment ); section_count += 1; } @@ -3748,8 +3856,8 @@ fn writeToFile( binary_bytes.items, header_offset, .global, - @as(u32, @intCast(binary_bytes.items.len - header_offset - header_size)), - @as(u32, @intCast(wasm.wasm_globals.items.len)), + @intCast(binary_bytes.items.len - header_offset - header_size), + @intCast(wasm.wasm_globals.items.len), ); section_count += 1; } @@ -3777,8 +3885,8 @@ fn writeToFile( binary_bytes.items, header_offset, .@"export", - @as(u32, @intCast(binary_bytes.items.len - header_offset - header_size)), - @as(u32, @intCast(wasm.exports.items.len)) + @intFromBool(export_memory), + @intCast(binary_bytes.items.len - header_offset - header_size), + @intCast(wasm.exports.items.len + @intFromBool(export_memory)), ); section_count += 1; } @@ -3813,15 +3921,16 @@ fn writeToFile( try leb.writeULEB128(binary_writer, @as(u32, @intCast(wasm.function_table.count()))); var symbol_it = wasm.function_table.keyIterator(); while (symbol_it.next()) |symbol_loc_ptr| { - try leb.writeULEB128(binary_writer, symbol_loc_ptr.*.getSymbol(wasm).index); + const sym = symbol_loc_ptr.*.getSymbol(wasm); + try leb.writeULEB128(binary_writer, sym.index); } try writeVecSectionHeader( binary_bytes.items, header_offset, .element, - @as(u32, @intCast(binary_bytes.items.len - header_offset - header_size)), - @as(u32, 1), + @intCast(binary_bytes.items.len - header_offset - header_size), + 1, ); section_count += 1; } @@ -3834,8 +3943,8 @@ fn writeToFile( binary_bytes.items, header_offset, .data_count, - @as(u32, @intCast(binary_bytes.items.len - header_offset - header_size)), - @as(u32, @intCast(data_segments_count)), + @intCast(binary_bytes.items.len - header_offset - header_size), + @intCast(data_segments_count), ); } @@ -3846,20 +3955,18 @@ fn writeToFile( var atom_index = wasm.atoms.get(code_index).?; // The code section must be sorted in line with the function order. - var sorted_atoms = try std.ArrayList(*Atom).initCapacity(wasm.base.allocator, wasm.functions.count()); + var sorted_atoms = try std.ArrayList(*const Atom).initCapacity(wasm.base.allocator, wasm.functions.count()); defer sorted_atoms.deinit(); while (true) { - var atom = wasm.getAtomPtr(atom_index); - if (wasm.resolved_symbols.contains(atom.symbolLoc())) { - if (!is_obj) { - atom.resolveRelocs(wasm); - } - sorted_atoms.appendAssumeCapacity(atom); + const atom = wasm.getAtomPtr(atom_index); + if (!is_obj) { + atom.resolveRelocs(wasm); } - // atom = if (atom.prev) |prev| wasm.getAtomPtr(prev) else break; + sorted_atoms.appendAssumeCapacity(atom); // found more code atoms than functions atom_index = atom.prev orelse break; } + std.debug.assert(wasm.functions.count() == sorted_atoms.items.len); const atom_sort_fn = struct { fn sort(ctx: *const Wasm, lhs: *const Atom, rhs: *const Atom) bool { @@ -3869,7 +3976,7 @@ fn writeToFile( } }.sort; - mem.sort(*Atom, sorted_atoms.items, wasm, atom_sort_fn); + mem.sort(*const Atom, sorted_atoms.items, wasm, atom_sort_fn); for (sorted_atoms.items) |sorted_atom| { try leb.writeULEB128(binary_writer, sorted_atom.size); @@ -3882,7 +3989,7 @@ fn writeToFile( header_offset, .code, code_section_size, - @as(u32, @intCast(wasm.functions.count())), + @intCast(wasm.functions.count()), ); code_section_index = section_count; section_count += 1; @@ -3953,8 +4060,8 @@ fn writeToFile( binary_bytes.items, header_offset, .data, - @as(u32, @intCast(binary_bytes.items.len - header_offset - header_size)), - @as(u32, @intCast(segment_count)), + @intCast(binary_bytes.items.len - header_offset - header_size), + @intCast(segment_count), ); data_section_index = section_count; section_count += 1; @@ -4210,6 +4317,9 @@ fn emitNameSection(wasm: *Wasm, binary_bytes: *std.ArrayList(u8), arena: std.mem for (wasm.resolved_symbols.keys()) |sym_loc| { const symbol = sym_loc.getSymbol(wasm).*; + if (symbol.isDead()) { + continue; + } const name = sym_loc.getName(wasm); switch (symbol.tag) { .function => { @@ -4498,6 +4608,14 @@ fn linkWithLLD(wasm: *Wasm, comp: *Compilation, prog_node: *std.Progress.Node) ! try argv.append("--export-table"); } + if (wasm.base.options.gc_sections) |gc| { + // For wasm-ld we only need to specify '--no-gc-sections' when the user explicitly + // specified it as garbage collection is enabled by default. + if (!gc) { + try argv.append("--no-gc-sections"); + } + } + if (wasm.base.options.strip) { try argv.append("-s"); } @@ -4783,7 +4901,7 @@ fn emitLinkSection(wasm: *Wasm, binary_bytes: *std.ArrayList(u8), symbol_table: try wasm.emitSymbolTable(binary_bytes, symbol_table); try wasm.emitSegmentInfo(binary_bytes); - const size = @as(u32, @intCast(binary_bytes.items.len - offset - 6)); + const size: u32 = @intCast(binary_bytes.items.len - offset - 6); try writeCustomSectionHeader(binary_bytes.items, offset, size); } @@ -4831,7 +4949,7 @@ fn emitSymbolTable(wasm: *Wasm, binary_bytes: *std.ArrayList(u8), symbol_table: } var buf: [10]u8 = undefined; - leb.writeUnsignedFixed(5, buf[0..5], @as(u32, @intCast(binary_bytes.items.len - table_offset + 5))); + leb.writeUnsignedFixed(5, buf[0..5], @intCast(binary_bytes.items.len - table_offset + 5)); leb.writeUnsignedFixed(5, buf[5..], symbol_count); try binary_bytes.insertSlice(table_offset, &buf); } @@ -4914,7 +5032,7 @@ fn emitCodeRelocations( var buf: [5]u8 = undefined; leb.writeUnsignedFixed(5, &buf, count); try binary_bytes.insertSlice(reloc_start, &buf); - const size = @as(u32, @intCast(binary_bytes.items.len - header_offset - 6)); + const size: u32 = @intCast(binary_bytes.items.len - header_offset - 6); try writeCustomSectionHeader(binary_bytes.items, header_offset, size); } @@ -5018,3 +5136,67 @@ pub fn storeDeclType(wasm: *Wasm, decl_index: InternPool.DeclIndex, func_type: s try wasm.atom_types.put(wasm.base.allocator, atom_index, index); return index; } + +/// Verifies all resolved symbols and checks whether itself needs to be marked alive, +/// as well as any of its references. +fn markReferences(wasm: *Wasm) !void { + const tracy = trace(@src()); + defer tracy.end(); + const do_garbage_collect = wasm.base.options.gc_sections orelse + (wasm.base.options.output_mode != .Obj); + + for (wasm.resolved_symbols.keys()) |sym_loc| { + const sym = sym_loc.getSymbol(wasm); + if (sym.isExported(wasm.base.options.rdynamic) or sym.isNoStrip() or !do_garbage_collect) { + try wasm.mark(sym_loc); + continue; + } + + // Debug sections may require to be parsed and marked when it contains + // relocations to alive symbols. + if (sym.tag == .section and !wasm.base.options.strip) { + const file = sym_loc.file orelse continue; // Incremental debug info is done independently + const object = &wasm.objects.items[file]; + const atom_index = try Object.parseSymbolIntoAtom(object, file, sym_loc.index, wasm); + const atom = wasm.getAtom(atom_index); + for (atom.relocs.items) |reloc| { + const target_loc: SymbolLoc = .{ .index = reloc.index, .file = atom.file }; + const target_sym = target_loc.getSymbol(wasm); + if (target_sym.isAlive() or !do_garbage_collect) { + sym.mark(); + continue; // Skip all other relocations as this debug atom is already marked now + } + } + } + } +} + +/// Marks a symbol as 'alive' recursively so itself and any references it contains to +/// other symbols will not be omit from the binary. +fn mark(wasm: *Wasm, loc: SymbolLoc) !void { + const symbol = loc.getSymbol(wasm); + if (symbol.isAlive()) { + // Symbol is already marked alive, including its references. + // This means we can skip it so we don't end up marking the same symbols + // multiple times. + return; + } + symbol.mark(); + if (symbol.isUndefined()) { + // undefined symbols do not have an associated `Atom` and therefore also + // do not contain relocations. + return; + } + + const atom_index = if (loc.file) |file_index| idx: { + const object = &wasm.objects.items[file_index]; + const atom_index = try object.parseSymbolIntoAtom(file_index, loc.index, wasm); + break :idx atom_index; + } else wasm.symbol_atom.get(loc) orelse return; + + const atom = wasm.getAtom(atom_index); + for (atom.relocs.items) |reloc| { + const target_loc: SymbolLoc = .{ .index = reloc.index, .file = loc.file }; + try wasm.mark(target_loc.finalLoc(wasm)); + } +} diff --git a/src/link/Wasm/Atom.zig b/src/link/Wasm/Atom.zig index 60727b6af1..b20e8628ba 100644 --- a/src/link/Wasm/Atom.zig +++ b/src/link/Wasm/Atom.zig @@ -23,6 +23,10 @@ alignment: Wasm.Alignment, /// Offset into the section where the atom lives, this already accounts /// for alignment. offset: u32, +/// The original offset within the object file. This value is substracted from +/// relocation offsets to determine where in the `data` to rewrite the value +original_offset: u32, + /// Represents the index of the file this atom was generated from. /// This is 'null' when the atom was generated by a Decl from Zig code. file: ?u16, @@ -50,11 +54,11 @@ pub const empty: Atom = .{ .prev = null, .size = 0, .sym_index = 0, + .original_offset = 0, }; /// Frees all resources owned by this `Atom`. -pub fn deinit(atom: *Atom, wasm: *Wasm) void { - const gpa = wasm.base.allocator; +pub fn deinit(atom: *Atom, gpa: std.mem.Allocator) void { atom.relocs.deinit(gpa); atom.code.deinit(gpa); atom.locals.deinit(gpa); @@ -114,10 +118,10 @@ pub fn resolveRelocs(atom: *Atom, wasm_bin: *const Wasm) void { .R_WASM_GLOBAL_INDEX_I32, .R_WASM_MEMORY_ADDR_I32, .R_WASM_SECTION_OFFSET_I32, - => std.mem.writeInt(u32, atom.code.items[reloc.offset..][0..4], @as(u32, @intCast(value)), .little), + => std.mem.writeInt(u32, atom.code.items[reloc.offset - atom.original_offset ..][0..4], @as(u32, @intCast(value)), .little), .R_WASM_TABLE_INDEX_I64, .R_WASM_MEMORY_ADDR_I64, - => std.mem.writeInt(u64, atom.code.items[reloc.offset..][0..8], value, .little), + => std.mem.writeInt(u64, atom.code.items[reloc.offset - atom.original_offset ..][0..8], value, .little), .R_WASM_GLOBAL_INDEX_LEB, .R_WASM_EVENT_INDEX_LEB, .R_WASM_FUNCTION_INDEX_LEB, @@ -127,12 +131,12 @@ pub fn resolveRelocs(atom: *Atom, wasm_bin: *const Wasm) void { .R_WASM_TABLE_NUMBER_LEB, .R_WASM_TYPE_INDEX_LEB, .R_WASM_MEMORY_ADDR_TLS_SLEB, - => leb.writeUnsignedFixed(5, atom.code.items[reloc.offset..][0..5], @as(u32, @intCast(value))), + => leb.writeUnsignedFixed(5, atom.code.items[reloc.offset - atom.original_offset ..][0..5], @as(u32, @intCast(value))), .R_WASM_MEMORY_ADDR_LEB64, .R_WASM_MEMORY_ADDR_SLEB64, .R_WASM_TABLE_INDEX_SLEB64, .R_WASM_MEMORY_ADDR_TLS_SLEB64, - => leb.writeUnsignedFixed(10, atom.code.items[reloc.offset..][0..10], value), + => leb.writeUnsignedFixed(10, atom.code.items[reloc.offset - atom.original_offset ..][0..10], value), } } } @@ -150,7 +154,7 @@ fn relocationValue(atom: Atom, relocation: types.Relocation, wasm_bin: *const Wa .R_WASM_TABLE_INDEX_I64, .R_WASM_TABLE_INDEX_SLEB, .R_WASM_TABLE_INDEX_SLEB64, - => return wasm_bin.function_table.get(target_loc) orelse 0, + => return wasm_bin.function_table.get(.{ .file = atom.file, .index = relocation.index }) orelse 0, .R_WASM_TYPE_INDEX_LEB => { const file_index = atom.file orelse { return relocation.index; diff --git a/src/link/Wasm/Object.zig b/src/link/Wasm/Object.zig index e7c118e48e..610c534c88 100644 --- a/src/link/Wasm/Object.zig +++ b/src/link/Wasm/Object.zig @@ -59,20 +59,16 @@ init_funcs: []const types.InitFunc = &.{}, comdat_info: []const types.Comdat = &.{}, /// Represents non-synthetic sections that can essentially be mem-cpy'd into place /// after performing relocations. -relocatable_data: []const RelocatableData = &.{}, +relocatable_data: std.AutoHashMapUnmanaged(RelocatableData.Tag, []RelocatableData) = .{}, /// String table for all strings required by the object file, such as symbol names, /// import name, module name and export names. Each string will be deduplicated /// and returns an offset into the table. string_table: Wasm.StringTable = .{}, -/// All the names of each debug section found in the current object file. -/// Each name is terminated by a null-terminator. The name can be found, -/// from the `index` offset within the `RelocatableData`. -debug_names: [:0]const u8, /// Represents a single item within a section (depending on its `type`) const RelocatableData = struct { /// The type of the relocatable data - type: enum { data, code, debug }, + type: Tag, /// Pointer to the data of the segment, where its length is written to `size` data: [*]u8, /// The size in bytes of the data representing the segment within the section @@ -85,6 +81,8 @@ const RelocatableData = struct { /// Represents the index of the section it belongs to section_index: u32, + const Tag = enum { data, code, custom }; + /// Returns the alignment of the segment, by retrieving it from the segment /// meta data of the given object file. /// NOTE: Alignment is encoded as a power of 2, so we shift the symbol's @@ -99,14 +97,14 @@ const RelocatableData = struct { return switch (relocatable_data.type) { .data => .data, .code => .function, - .debug => .section, + .custom => .section, }; } - /// Returns the index within a section itrelocatable_data, or in case of a debug section, + /// Returns the index within a section, or in case of a custom section, /// returns the section index within the object file. pub fn getIndex(relocatable_data: RelocatableData) u32 { - if (relocatable_data.type == .debug) return relocatable_data.section_index; + if (relocatable_data.type == .custom) return relocatable_data.section_index; return relocatable_data.index; } }; @@ -121,7 +119,6 @@ pub fn create(gpa: Allocator, file: std.fs.File, name: []const u8, maybe_max_siz var object: Object = .{ .file = file, .name = try gpa.dupe(u8, name), - .debug_names = &.{}, }; var is_object_file: bool = false; @@ -182,10 +179,16 @@ pub fn deinit(object: *Object, gpa: Allocator) void { gpa.free(info.name); } gpa.free(object.segment_info); - for (object.relocatable_data) |rel_data| { - gpa.free(rel_data.data[0..rel_data.size]); + { + var it = object.relocatable_data.valueIterator(); + while (it.next()) |relocatable_data| { + for (relocatable_data.*) |rel_data| { + gpa.free(rel_data.data[0..rel_data.size]); + } + gpa.free(relocatable_data.*); + } } - gpa.free(object.relocatable_data); + object.relocatable_data.deinit(gpa); object.string_table.deinit(gpa); gpa.free(object.name); object.* = undefined; @@ -345,23 +348,7 @@ fn Parser(comptime ReaderType: type) type { errdefer parser.object.deinit(gpa); try parser.verifyMagicBytes(); const version = try parser.reader.reader().readInt(u32, .little); - parser.object.version = version; - var relocatable_data = std.ArrayList(RelocatableData).init(gpa); - var debug_names = std.ArrayList(u8).init(gpa); - - errdefer { - // only free the inner contents of relocatable_data if we didn't - // assign it to the object yet. - if (parser.object.relocatable_data.len == 0) { - for (relocatable_data.items) |rel_data| { - gpa.free(rel_data.data[0..rel_data.size]); - } - relocatable_data.deinit(); - } - gpa.free(debug_names.items); - debug_names.deinit(); - } var section_index: u32 = 0; while (parser.reader.reader().readByte()) |byte| : (section_index += 1) { @@ -377,26 +364,34 @@ fn Parser(comptime ReaderType: type) type { if (std.mem.eql(u8, name, "linking")) { is_object_file.* = true; - parser.object.relocatable_data = relocatable_data.items; // at this point no new relocatable sections will appear so we're free to store them. try parser.parseMetadata(gpa, @as(usize, @intCast(reader.context.bytes_left))); } else if (std.mem.startsWith(u8, name, "reloc")) { try parser.parseRelocations(gpa); } else if (std.mem.eql(u8, name, "target_features")) { try parser.parseFeatures(gpa); } else if (std.mem.startsWith(u8, name, ".debug")) { + const gop = try parser.object.relocatable_data.getOrPut(gpa, .custom); + var relocatable_data: std.ArrayListUnmanaged(RelocatableData) = .{}; + defer relocatable_data.deinit(gpa); + if (!gop.found_existing) { + gop.value_ptr.* = &.{}; + } else { + relocatable_data = std.ArrayListUnmanaged(RelocatableData).fromOwnedSlice(gop.value_ptr.*); + } const debug_size = @as(u32, @intCast(reader.context.bytes_left)); const debug_content = try gpa.alloc(u8, debug_size); errdefer gpa.free(debug_content); try reader.readNoEof(debug_content); - try relocatable_data.append(.{ - .type = .debug, + try relocatable_data.append(gpa, .{ + .type = .custom, .data = debug_content.ptr, .size = debug_size, .index = try parser.object.string_table.put(gpa, name), .offset = 0, // debug sections only contain 1 entry, so no need to calculate offset .section_index = section_index, }); + gop.value_ptr.* = try relocatable_data.toOwnedSlice(gpa); } else { try reader.skipBytes(reader.context.bytes_left, .{}); } @@ -515,26 +510,32 @@ fn Parser(comptime ReaderType: type) type { const start = reader.context.bytes_left; var index: u32 = 0; const count = try readLeb(u32, reader); + const imported_function_count = parser.object.importedCountByKind(.function); + var relocatable_data = try std.ArrayList(RelocatableData).initCapacity(gpa, count); + defer relocatable_data.deinit(); while (index < count) : (index += 1) { const code_len = try readLeb(u32, reader); const offset = @as(u32, @intCast(start - reader.context.bytes_left)); const data = try gpa.alloc(u8, code_len); errdefer gpa.free(data); try reader.readNoEof(data); - try relocatable_data.append(.{ + relocatable_data.appendAssumeCapacity(.{ .type = .code, .data = data.ptr, .size = code_len, - .index = parser.object.importedCountByKind(.function) + index, + .index = imported_function_count + index, .offset = offset, .section_index = section_index, }); } + try parser.object.relocatable_data.put(gpa, .code, try relocatable_data.toOwnedSlice()); }, .data => { const start = reader.context.bytes_left; var index: u32 = 0; const count = try readLeb(u32, reader); + var relocatable_data = try std.ArrayList(RelocatableData).initCapacity(gpa, count); + defer relocatable_data.deinit(); while (index < count) : (index += 1) { const flags = try readLeb(u32, reader); const data_offset = try readInit(reader); @@ -545,7 +546,7 @@ fn Parser(comptime ReaderType: type) type { const data = try gpa.alloc(u8, data_len); errdefer gpa.free(data); try reader.readNoEof(data); - try relocatable_data.append(.{ + relocatable_data.appendAssumeCapacity(.{ .type = .data, .data = data.ptr, .size = data_len, @@ -554,6 +555,7 @@ fn Parser(comptime ReaderType: type) type { .section_index = section_index, }); } + try parser.object.relocatable_data.put(gpa, .data, try relocatable_data.toOwnedSlice()); }, else => try parser.reader.reader().skipBytes(len, .{}), } @@ -561,7 +563,6 @@ fn Parser(comptime ReaderType: type) type { error.EndOfStream => {}, // finished parsing the file else => |e| return e, } - parser.object.relocatable_data = try relocatable_data.toOwnedSlice(); } /// Based on the "features" custom section, parses it into a list of @@ -789,7 +790,8 @@ fn Parser(comptime ReaderType: type) type { }, .section => { symbol.index = try leb.readULEB128(u32, reader); - for (parser.object.relocatable_data) |data| { + const section_data = parser.object.relocatable_data.get(.custom).?; + for (section_data) |data| { if (data.section_index == symbol.index) { symbol.name = data.index; break; @@ -798,22 +800,15 @@ fn Parser(comptime ReaderType: type) type { }, else => { symbol.index = try leb.readULEB128(u32, reader); - var maybe_import: ?types.Import = null; - const is_undefined = symbol.isUndefined(); - if (is_undefined) { - maybe_import = parser.object.findImport(symbol.tag.externalType(), symbol.index); - } const explicit_name = symbol.hasFlag(.WASM_SYM_EXPLICIT_NAME); - if (!(is_undefined and !explicit_name)) { + symbol.name = if (!is_undefined or (is_undefined and explicit_name)) name: { const name_len = try leb.readULEB128(u32, reader); const name = try gpa.alloc(u8, name_len); defer gpa.free(name); try reader.readNoEof(name); - symbol.name = try parser.object.string_table.put(gpa, name); - } else { - symbol.name = maybe_import.?.name; - } + break :name try parser.object.string_table.put(gpa, name); + } else parser.object.findImport(symbol.tag.externalType(), symbol.index).name; }, } return symbol; @@ -887,110 +882,95 @@ fn assertEnd(reader: anytype) !void { } /// Parses an object file into atoms, for code and data sections -pub fn parseIntoAtoms(object: *Object, gpa: Allocator, object_index: u16, wasm_bin: *Wasm) !void { - const Key = struct { - kind: Symbol.Tag, - index: u32, +pub fn parseSymbolIntoAtom(object: *Object, object_index: u16, symbol_index: u32, wasm: *Wasm) !Atom.Index { + const symbol = &object.symtable[symbol_index]; + const relocatable_data: RelocatableData = switch (symbol.tag) { + .function => object.relocatable_data.get(.code).?[symbol.index - object.importedCountByKind(.function)], + .data => object.relocatable_data.get(.data).?[symbol.index], + .section => blk: { + const data = object.relocatable_data.get(.custom).?; + for (data) |dat| { + if (dat.section_index == symbol.index) { + break :blk dat; + } + } + unreachable; + }, + else => unreachable, }; - var symbol_for_segment = std.AutoArrayHashMap(Key, std.ArrayList(u32)).init(gpa); - defer for (symbol_for_segment.values()) |*list| { - list.deinit(); - } else symbol_for_segment.deinit(); + const final_index = try wasm.getMatchingSegment(object_index, symbol_index); + const atom_index = @as(Atom.Index, @intCast(wasm.managed_atoms.items.len)); + const atom = try wasm.managed_atoms.addOne(wasm.base.allocator); + atom.* = Atom.empty; + try wasm.appendAtomAtIndex(final_index, atom_index); - for (object.symtable, 0..) |symbol, symbol_index| { - switch (symbol.tag) { - .function, .data, .section => if (!symbol.isUndefined()) { - const gop = try symbol_for_segment.getOrPut(.{ .kind = symbol.tag, .index = symbol.index }); - const sym_idx = @as(u32, @intCast(symbol_index)); - if (!gop.found_existing) { - gop.value_ptr.* = std.ArrayList(u32).init(gpa); - } - try gop.value_ptr.*.append(sym_idx); - }, - else => continue, + atom.sym_index = symbol_index; + atom.file = object_index; + atom.size = relocatable_data.size; + atom.alignment = relocatable_data.getAlignment(object); + atom.code = std.ArrayListUnmanaged(u8).fromOwnedSlice(relocatable_data.data[0..relocatable_data.size]); + atom.original_offset = relocatable_data.offset; + try wasm.symbol_atom.putNoClobber(wasm.base.allocator, atom.symbolLoc(), atom_index); + const segment: *Wasm.Segment = &wasm.segments.items[final_index]; + if (relocatable_data.type == .data) { //code section and custom sections are 1-byte aligned + segment.alignment = segment.alignment.max(atom.alignment); + } + + if (object.relocations.get(relocatable_data.section_index)) |relocations| { + const start = searchRelocStart(relocations, relocatable_data.offset); + const len = searchRelocEnd(relocations[start..], relocatable_data.offset + atom.size); + atom.relocs = std.ArrayListUnmanaged(types.Relocation).fromOwnedSlice(relocations[start..][0..len]); + for (atom.relocs.items) |reloc| { + switch (reloc.relocation_type) { + .R_WASM_TABLE_INDEX_I32, + .R_WASM_TABLE_INDEX_I64, + .R_WASM_TABLE_INDEX_SLEB, + .R_WASM_TABLE_INDEX_SLEB64, + => { + try wasm.function_table.put(wasm.base.allocator, .{ + .file = object_index, + .index = reloc.index, + }, 0); + }, + .R_WASM_GLOBAL_INDEX_I32, + .R_WASM_GLOBAL_INDEX_LEB, + => { + const sym = object.symtable[reloc.index]; + if (sym.tag != .global) { + try wasm.got_symbols.append( + wasm.base.allocator, + .{ .file = object_index, .index = reloc.index }, + ); + } + }, + else => {}, + } } } - for (object.relocatable_data, 0..) |relocatable_data, index| { - const final_index = (try wasm_bin.getMatchingSegment(object_index, @as(u32, @intCast(index)))) orelse { - continue; // found unknown section, so skip parsing into atom as we do not know how to handle it. - }; + return atom_index; +} - const atom_index: Atom.Index = @intCast(wasm_bin.managed_atoms.items.len); - const atom = try wasm_bin.managed_atoms.addOne(gpa); - atom.* = Atom.empty; - atom.file = object_index; - atom.size = relocatable_data.size; - atom.alignment = relocatable_data.getAlignment(object); - - const relocations: []types.Relocation = object.relocations.get(relocatable_data.section_index) orelse &.{}; - for (relocations) |relocation| { - if (isInbetween(relocatable_data.offset, atom.size, relocation.offset)) { - // set the offset relative to the offset of the segment itobject, - // rather than within the entire section. - var reloc = relocation; - reloc.offset -= relocatable_data.offset; - try atom.relocs.append(gpa, reloc); - - switch (relocation.relocation_type) { - .R_WASM_TABLE_INDEX_I32, - .R_WASM_TABLE_INDEX_I64, - .R_WASM_TABLE_INDEX_SLEB, - .R_WASM_TABLE_INDEX_SLEB64, - => { - try wasm_bin.function_table.put(gpa, .{ - .file = object_index, - .index = relocation.index, - }, 0); - }, - .R_WASM_GLOBAL_INDEX_I32, - .R_WASM_GLOBAL_INDEX_LEB, - => { - const sym = object.symtable[relocation.index]; - if (sym.tag != .global) { - try wasm_bin.got_symbols.append( - wasm_bin.base.allocator, - .{ .file = object_index, .index = relocation.index }, - ); - } - }, - else => {}, - } - } +fn searchRelocStart(relocs: []const types.Relocation, address: u32) usize { + var min: usize = 0; + var max: usize = relocs.len; + while (min < max) { + const index = (min + max) / 2; + const curr = relocs[index]; + if (curr.offset < address) { + min = index + 1; + } else { + max = index; } - - try atom.code.appendSlice(gpa, relocatable_data.data[0..relocatable_data.size]); - - if (symbol_for_segment.getPtr(.{ - .kind = relocatable_data.getSymbolKind(), - .index = relocatable_data.getIndex(), - })) |symbols| { - atom.sym_index = symbols.pop(); - try wasm_bin.symbol_atom.putNoClobber(gpa, atom.symbolLoc(), atom_index); - - // symbols referencing the same atom will be added as alias - // or as 'parent' when they are global. - while (symbols.popOrNull()) |idx| { - try wasm_bin.symbol_atom.putNoClobber(gpa, .{ .file = atom.file, .index = idx }, atom_index); - const alias_symbol = object.symtable[idx]; - if (alias_symbol.isGlobal()) { - atom.sym_index = idx; - } - } - } - - const segment: *Wasm.Segment = &wasm_bin.segments.items[final_index]; - if (relocatable_data.type == .data) { //code section and debug sections are 1-byte aligned - segment.alignment = segment.alignment.max(atom.alignment); - } - - try wasm_bin.appendAtomAtIndex(final_index, atom_index); - log.debug("Parsed into atom: '{s}' at segment index {d}", .{ object.string_table.get(object.symtable[atom.sym_index].name), final_index }); } + return min; } -/// Verifies if a given value is in between a minimum -and maximum value. -/// The maxmimum value is calculated using the length, both start and end are inclusive. -inline fn isInbetween(min: u32, length: u32, value: u32) bool { - return value >= min and value <= min + length; +fn searchRelocEnd(relocs: []const types.Relocation, address: u32) usize { + for (relocs, 0..relocs.len) |reloc, index| { + if (reloc.offset > address) { + return index; + } + } + return relocs.len; } diff --git a/src/link/Wasm/Symbol.zig b/src/link/Wasm/Symbol.zig index d15e86a666..75c26ca10d 100644 --- a/src/link/Wasm/Symbol.zig +++ b/src/link/Wasm/Symbol.zig @@ -79,6 +79,9 @@ pub const Flag = enum(u32) { WASM_SYM_NO_STRIP = 0x80, /// Indicates a symbol is TLS WASM_SYM_TLS = 0x100, + /// Zig specific flag. Uses the most significant bit of the flag to annotate whether a symbol is + /// alive or not. Dead symbols are allowed to be garbage collected. + alive = 0x80000000, }; /// Verifies if the given symbol should be imported from the @@ -92,6 +95,23 @@ pub fn requiresImport(symbol: Symbol) bool { return true; } +/// Marks a symbol as 'alive', ensuring the garbage collector will not collect the trash. +pub fn mark(symbol: *Symbol) void { + symbol.flags |= @intFromEnum(Flag.alive); +} + +pub fn unmark(symbol: *Symbol) void { + symbol.flags &= ~@intFromEnum(Flag.alive); +} + +pub fn isAlive(symbol: Symbol) bool { + return symbol.flags & @intFromEnum(Flag.alive) != 0; +} + +pub fn isDead(symbol: Symbol) bool { + return symbol.flags & @intFromEnum(Flag.alive) == 0; +} + pub fn isTLS(symbol: Symbol) bool { return symbol.flags & @intFromEnum(Flag.WASM_SYM_TLS) != 0; } diff --git a/test/link/wasm/bss/build.zig b/test/link/wasm/bss/build.zig index 1bc059acde..faf8202cd9 100644 --- a/test/link/wasm/bss/build.zig +++ b/test/link/wasm/bss/build.zig @@ -26,6 +26,7 @@ fn add(b: *std.Build, test_step: *std.Build.Step, optimize_mode: std.builtin.Opt lib.strip = false; // to make sure the bss segment is emitted, we must import memory lib.import_memory = true; + lib.link_gc_sections = false; const check_lib = lib.checkObject(); @@ -73,6 +74,7 @@ fn add(b: *std.Build, test_step: *std.Build.Step, optimize_mode: std.builtin.Opt lib.strip = false; // to make sure the bss segment is emitted, we must import memory lib.import_memory = true; + lib.link_gc_sections = false; const check_lib = lib.checkObject(); check_lib.checkStart(); diff --git a/test/link/wasm/function-table/build.zig b/test/link/wasm/function-table/build.zig index 906a255642..acf7043476 100644 --- a/test/link/wasm/function-table/build.zig +++ b/test/link/wasm/function-table/build.zig @@ -23,6 +23,7 @@ fn add(b: *std.Build, test_step: *std.Build.Step, optimize: std.builtin.Optimize import_table.use_llvm = false; import_table.use_lld = false; import_table.import_table = true; + import_table.link_gc_sections = false; const export_table = b.addExecutable(.{ .name = "export_table", @@ -34,6 +35,7 @@ fn add(b: *std.Build, test_step: *std.Build.Step, optimize: std.builtin.Optimize export_table.use_llvm = false; export_table.use_lld = false; export_table.export_table = true; + export_table.link_gc_sections = false; const regular_table = b.addExecutable(.{ .name = "regular_table", @@ -44,6 +46,7 @@ fn add(b: *std.Build, test_step: *std.Build.Step, optimize: std.builtin.Optimize regular_table.entry = .disabled; regular_table.use_llvm = false; regular_table.use_lld = false; + regular_table.link_gc_sections = false; // Ensure function table is not empty const check_import = import_table.checkObject(); const check_export = export_table.checkObject(); diff --git a/test/link/wasm/segments/build.zig b/test/link/wasm/segments/build.zig index 21b954a902..64d25d3fae 100644 --- a/test/link/wasm/segments/build.zig +++ b/test/link/wasm/segments/build.zig @@ -23,6 +23,7 @@ fn add(b: *std.Build, test_step: *std.Build.Step, optimize: std.builtin.Optimize lib.use_llvm = false; lib.use_lld = false; lib.strip = false; + lib.link_gc_sections = false; // so data is not garbage collected and we can verify data section b.installArtifact(lib); const check_lib = lib.checkObject(); diff --git a/test/link/wasm/stack_pointer/build.zig b/test/link/wasm/stack_pointer/build.zig index 00ef54c052..da54c140ca 100644 --- a/test/link/wasm/stack_pointer/build.zig +++ b/test/link/wasm/stack_pointer/build.zig @@ -24,6 +24,7 @@ fn add(b: *std.Build, test_step: *std.Build.Step, optimize: std.builtin.Optimize lib.use_lld = false; lib.strip = false; lib.stack_size = std.wasm.page_size * 2; // set an explicit stack size + lib.link_gc_sections = false; b.installArtifact(lib); const check_lib = lib.checkObject();