From ba0e84a411074fe661b7df14edb2595267edcd30 Mon Sep 17 00:00:00 2001 From: Luuk de Gram Date: Sat, 13 Jan 2024 17:48:21 +0100 Subject: [PATCH 01/21] wasm: move Zig module-linkage to ZigObject Rather than specializing the linker-driver to be able to handle objects generated by a ZCU, we store all data in-memory in ZigObject. ZigObject acts more like a regular object file which will allow us to treat it as us. This will make linking much more simple, but will also reduce the complexity of incremental-linking as we can simply update ZigObject and relink it. --- src/link/Wasm/ZigObject.zig | 998 ++++++++++++++++++++++++++++++++++++ 1 file changed, 998 insertions(+) create mode 100644 src/link/Wasm/ZigObject.zig diff --git a/src/link/Wasm/ZigObject.zig b/src/link/Wasm/ZigObject.zig new file mode 100644 index 0000000000..275593f348 --- /dev/null +++ b/src/link/Wasm/ZigObject.zig @@ -0,0 +1,998 @@ +//! ZigObject encapsulates the state of the incrementally compiled Zig module. +//! It stores the associated input local and global symbols, allocated atoms, +//! and any relocations that may have been emitted. +//! Think about this as fake in-memory Object file for the Zig module. + +/// List of all `Decl` that are currently alive. +/// Each index maps to the corresponding `Atom.Index`. +decls: std.AutoHashMapUnmanaged(InternPool.DeclIndex, Atom.Index) = .{}, +/// List of function type signatures for this Zig module. +func_types: std.ArrayListUnmanaged(std.wasm.Type) = .{}, +/// Map of symbol locations, represented by its `types.Import`. +imports: std.AutoHashMapUnmanaged(u32, types.Import) = .{}, +/// List of WebAssembly globals. +globals: std.ArrayListUnmanaged(std.wasm.Global) = .{}, +/// Mapping between an `Atom` and its type index representing the Wasm +/// type of the function signature. +atom_types: std.AutoHashMapUnmanaged(Atom.Index, u32) = .{}, +/// List of all symbols generated by Zig code. +symbols: std.ArrayListUnmanaged(Symbol) = .{}, +/// Map from symbol name offset to their index into the `symbols` list. +global_syms: std.AutoHashMapUnmanaged(u32, u32) = .{}, +/// List of symbol indexes which are free to be used. +symbols_free_list: std.ArrayListUnmanaged(u32) = .{}, +/// Extra metadata about the linking section, such as alignment of segments and their name. +segment_info: std.ArrayListUnmanage(types.Segment) = &.{}, +/// File encapsulated string table, used to deduplicate strings within the generated file. +string_table: StringTable = .{}, +/// Map for storing anonymous declarations. Each anonymous decl maps to its Atom's index. +anon_decls: std.AutoArrayHashMapUnmanaged(InternPool.Index, Atom.Index) = .{}, +/// Represents the symbol index of the error name table +/// When this is `null`, no code references an error using runtime `@errorName`. +/// During initializion, a symbol with corresponding atom will be created that is +/// used to perform relocations to the pointer of this table. +/// The actual table is populated during `flush`. +error_table_symbol: ?u32 = null, +/// Amount of functions in the `import` sections. +imported_functions_count: u32 = 0, +/// Amount of globals in the `import` section. +imported_globals_count: u32 = 0, +/// Symbol index representing the stack pointer. This will be set upon initializion +/// of a new `ZigObject`. Codegen will make calls into this to create relocations for +/// this symbol each time the stack pointer is moved. +stack_pointer_sym: u32, + +/// Frees and invalidates all memory of the incrementally compiled Zig module. +/// It is illegal behavior to access the `ZigObject` after calling `deinit`. +pub fn deinit(zig_object: *ZigObject, gpa: std.mem.Allocator) void { + for (zig_object.segment_info.values()) |segment_info| { + gpa.free(segment_info.name); + } + + // For decls and anon decls we free the memory of its atoms. + // The memory of atoms parsed from object files is managed by + // the object file itself, and therefore we can skip those. + { + var it = zig_object.decls.valueIterator(); + while (it.next()) |atom_index_ptr| { + const atom = zig_object.getAtomPtr(atom_index_ptr.*); + for (atom.locals.items) |local_index| { + const local_atom = zig_object.getAtomPtr(local_index); + local_atom.deinit(gpa); + } + atom.deinit(gpa); + } + } + { + for (zig_object.anon_decls.values()) |atom_index| { + const atom = zig_object.getAtomPtr(atom_index); + for (atom.locals.items) |local_index| { + const local_atom = zig_object.getAtomPtr(local_index); + local_atom.deinit(gpa); + } + atom.deinit(gpa); + } + } + zig_object.decls.deinit(gpa); + zig_object.anon_decls.deinit(gpa); + zig_object.symbols.deinit(gpa); + zig_object.symbols_free_list.deinit(gpa); + zig_object.segment_info.deinit(gpa); + + zig_object.string_table.deinit(gpa); + zig_object.* = undefined; +} + +/// Allocates a new symbol and returns its index. +/// Will re-use slots when a symbol was freed at an earlier stage. +pub fn allocateSymbol(zig_object: *ZigObject, gpa: std.mem.Allocator) !u32 { + try zig_object.symbols.ensureUnusedCapacity(gpa, 1); + const symbol: Symbol = .{ + .name = std.math.maxInt(u32), // will be set after updateDecl as well as during atom creation for decls + .flags = @intFromEnum(Symbol.Flag.WASM_SYM_BINDING_LOCAL), + .tag = .undefined, // will be set after updateDecl + .index = std.math.maxInt(u32), // will be set during atom parsing + .virtual_address = std.math.maxInt(u32), // will be set during atom allocation + }; + if (zig_object.symbols_free_list.popOrNull()) |index| { + zig_object.symbols.items[index] = symbol; + return index; + } + const index = @as(u32, @intCast(zig_object.symbols.items.len)); + zig_object.symbols.appendAssumeCapacity(symbol); + return index; +} + +// Generate code for the Decl, storing it in memory to be later written to +// the file on flush(). +pub fn updateDecl(zig_object: *ZigObject, wasm_file: *Wasm, mod: *Module, decl_index: InternPool.DeclIndex) !void { + const decl = mod.declPtr(decl_index); + if (decl.val.getFunction(mod)) |_| { + return; + } else if (decl.val.getExternFunc(mod)) |_| { + return; + } + + const gpa = wasm_file.base.comp.gpa; + const atom_index = try zig_object.getOrCreateAtomForDecl(decl_index); + const atom = wasm_file.getAtomPtr(atom_index); + atom.clear(); + + if (decl.isExtern(mod)) { + const variable = decl.getOwnedVariable(mod).?; + const name = mod.intern_pool.stringToSlice(decl.name); + const lib_name = mod.intern_pool.stringToSliceUnwrap(variable.lib_name); + return wasm_file.addOrUpdateImport(name, atom.sym_index, lib_name, null); + } + const val = if (decl.val.getVariable(mod)) |variable| Value.fromInterned(variable.init) else decl.val; + + var code_writer = std.ArrayList(u8).init(gpa); + defer code_writer.deinit(); + + const res = try codegen.generateSymbol( + &wasm_file.base, + decl.srcLoc(mod), + .{ .ty = decl.ty, .val = val }, + &code_writer, + .none, + .{ .parent_atom_index = atom.sym_index }, + ); + + const code = switch (res) { + .ok => code_writer.items, + .fail => |em| { + decl.analysis = .codegen_failure; + try mod.failed_decls.put(mod.gpa, decl_index, em); + return; + }, + }; + + return wasm_file.finishUpdateDecl(decl_index, code, .data); +} + +pub fn updateFunc(zig_object: *ZigObject, wasm_file: *Wasm, mod: *Module, func_index: InternPool.Index, air: Air, liveness: Liveness) !void { + const gpa = wasm_file.base.comp.gpa; + const func = mod.funcInfo(func_index); + const decl_index = func.owner_decl; + const decl = mod.declPtr(decl_index); + const atom_index = try zig_object.getOrCreateAtomForDecl(decl_index); + const atom = wasm_file.getAtomPtr(atom_index); + atom.clear(); + + var code_writer = std.ArrayList(u8).init(gpa); + defer code_writer.deinit(); + const result = try codegen.generateFunction( + &wasm_file.base, + decl.srcLoc(mod), + func_index, + air, + liveness, + &code_writer, + .none, + ); + + const code = switch (result) { + .ok => code_writer.items, + .fail => |em| { + decl.analysis = .codegen_failure; + try mod.failed_decls.put(mod.gpa, decl_index, em); + return; + }, + }; + + return zig_object.finishUpdateDecl(wasm_file, decl_index, code, .function); +} + +fn finishUpdateDecl(zig_object: *ZigObject, wasm_file: *Wasm, decl_index: InternPool.DeclIndex, code: []const u8, symbol_tag: Symbol.Tag) !void { + const gpa = wasm_file.base.comp.gpa; + const mod = wasm_file.base.comp.module.?; + const decl = mod.declPtr(decl_index); + const atom_index = zig_object.decls.get(decl_index).?; + const atom = wasm_file.getAtomPtr(atom_index); + const symbol = &zig_object.symbols.items[atom.sym_index]; + const full_name = mod.intern_pool.stringToSlice(try decl.getFullyQualifiedName(mod)); + symbol.name = try zig_object.string_table.insert(gpa, full_name); + symbol.tag = symbol_tag; + try atom.code.appendSlice(gpa, code); + try wasm_file.resolved_symbols.put(gpa, atom.symbolLoc(), {}); + + atom.size = @intCast(code.len); + if (code.len == 0) return; + atom.alignment = decl.getAlignment(mod); +} + +/// For a given `InternPool.DeclIndex` returns its corresponding `Atom.Index`. +/// When the index was not found, a new `Atom` will be created, and its index will be returned. +/// The newly created Atom is empty with default fields as specified by `Atom.empty`. +pub fn getOrCreateAtomForDecl(zig_object: *ZigObject, wasm_file: *Wasm, decl_index: InternPool.DeclIndex) !Atom.Index { + const gpa = wasm_file.base.comp.gpa; + const gop = try zig_object.decls.getOrPut(gpa, decl_index); + if (!gop.found_existing) { + const atom_index = try wasm_file.createAtom(); + gop.value_ptr.* = atom_index; + const atom = wasm_file.getAtom(atom_index); + const symbol = atom.symbolLoc().getSymbol(wasm_file); + const mod = wasm_file.base.comp.module.?; + const decl = mod.declPtr(decl_index); + const full_name = mod.intern_pool.stringToSlice(try decl.getFullyQualifiedName(mod)); + symbol.name = try wasm_file.string_table.insert(gpa, full_name); + } + return gop.value_ptr.*; +} + +pub fn lowerAnonDecl( + zig_object: *ZigObject, + wasm_file: *Wasm, + decl_val: InternPool.Index, + explicit_alignment: InternPool.Alignment, + src_loc: Module.SrcLoc, +) !codegen.Result { + const gpa = wasm_file.base.comp.gpa; + const gop = try zig_object.anon_decls.getOrPut(gpa, decl_val); + if (!gop.found_existing) { + const mod = wasm_file.base.comp.module.?; + const ty = Type.fromInterned(mod.intern_pool.typeOf(decl_val)); + const tv: TypedValue = .{ .ty = ty, .val = Value.fromInterned(decl_val) }; + var name_buf: [32]u8 = undefined; + const name = std.fmt.bufPrint(&name_buf, "__anon_{d}", .{ + @intFromEnum(decl_val), + }) catch unreachable; + + switch (try zig_object.lowerConst(name, tv, src_loc)) { + .ok => |atom_index| zig_object.anon_decls.values()[gop.index] = atom_index, + .fail => |em| return .{ .fail = em }, + } + } + + const atom = wasm_file.getAtomPtr(zig_object.anon_decls.values()[gop.index]); + atom.alignment = switch (atom.alignment) { + .none => explicit_alignment, + else => switch (explicit_alignment) { + .none => atom.alignment, + else => atom.alignment.maxStrict(explicit_alignment), + }, + }; + return .ok; +} + +/// Lowers a constant typed value to a local symbol and atom. +/// Returns the symbol index of the local +/// The given `decl` is the parent decl whom owns the constant. +pub fn lowerUnnamedConst(zig_object: *ZigObject, wasm_file: *Wasm, tv: TypedValue, decl_index: InternPool.DeclIndex) !u32 { + const gpa = wasm_file.base.comp.gpa; + const mod = wasm_file.base.comp.module.?; + std.debug.assert(tv.ty.zigTypeTag(mod) != .Fn); // cannot create local symbols for functions + const decl = mod.declPtr(decl_index); + + const parent_atom_index = try zig_object.getOrCreateAtomForDecl(decl_index); + const parent_atom = wasm_file.getAtom(parent_atom_index); + const local_index = parent_atom.locals.items.len; + const fqn = mod.intern_pool.stringToSlice(try decl.getFullyQualifiedName(mod)); + const name = try std.fmt.allocPrintZ(gpa, "__unnamed_{s}_{d}", .{ + fqn, local_index, + }); + defer gpa.free(name); + + switch (try zig_object.lowerConst(name, tv, decl.srcLoc(mod))) { + .ok => |atom_index| { + try wasm_file.getAtomPtr(parent_atom_index).locals.append(gpa, atom_index); + return wasm_file.getAtom(atom_index).getSymbolIndex().?; + }, + .fail => |em| { + decl.analysis = .codegen_failure; + try mod.failed_decls.put(mod.gpa, decl_index, em); + return error.CodegenFail; + }, + } +} + +const LowerConstResult = union(enum) { + ok: Atom.Index, + fail: *Module.ErrorMsg, +}; + +fn lowerConst(zig_object: *ZigObject, wasm_file: *Wasm, name: []const u8, tv: TypedValue, src_loc: Module.SrcLoc) !LowerConstResult { + const gpa = wasm_file.base.comp.gpa; + const mod = wasm_file.base.comp.module.?; + + // Create and initialize a new local symbol and atom + const atom_index = try wasm_file.createAtom(); + var value_bytes = std.ArrayList(u8).init(gpa); + defer value_bytes.deinit(); + + const code = code: { + const atom = wasm_file.getAtomPtr(atom_index); + atom.alignment = tv.ty.abiAlignment(mod); + zig_object.symbols.items[atom.sym_index] = .{ + .name = try zig_object.string_table.insert(gpa, name), + .flags = @intFromEnum(Symbol.Flag.WASM_SYM_BINDING_LOCAL), + .tag = .data, + .index = undefined, + .virtual_address = undefined, + }; + + const result = try codegen.generateSymbol( + &wasm_file.base, + src_loc, + tv, + &value_bytes, + .none, + .{ + .parent_atom_index = atom.sym_index, + .addend = null, + }, + ); + break :code switch (result) { + .ok => value_bytes.items, + .fail => |em| { + return .{ .fail = em }; + }, + }; + }; + + const atom = wasm_file.getAtomPtr(atom_index); + atom.size = @intCast(code.len); + try atom.code.appendSlice(gpa, code); + return .{ .ok = atom_index }; +} + +/// Returns the symbol index of the error name table. +/// +/// When the symbol does not yet exist, it will create a new one instead. +pub fn getErrorTableSymbol(zig_object: *ZigObject, wasm_file: *Wasm) !u32 { + if (zig_object.error_table_symbol) |symbol| { + return symbol; + } + + // no error was referenced yet, so create a new symbol and atom for it + // and then return said symbol's index. The final table will be populated + // during `flush` when we know all possible error names. + const gpa = wasm_file.base.gpa; + const sym_index = try zig_object.allocateSymbol(gpa); + const atom_index = try wasm_file.createAtom(sym_index); + const atom = wasm_file.getAtomPtr(atom_index); + const slice_ty = Type.slice_const_u8_sentinel_0; + const mod = wasm_file.base.comp.module.?; + atom.alignment = slice_ty.abiAlignment(mod); + + const sym_name = try zig_object.string_table.insert(gpa, "__zig_err_name_table"); + const symbol = &zig_object.symbols.items[sym_index]; + symbol.* = .{ + .name = sym_name, + .tag = .data, + .flags = @intFromEnum(Symbol.Flag.WASM_SYM_BINDING_LOCAL), + .index = 0, + .virtual_address = undefined, + }; + symbol.mark(); + + log.debug("Error name table was created with symbol index: ({d})", .{sym_index}); + zig_object.error_table_symbol = sym_index; + return sym_index; +} + +/// Populates the error name table, when `error_table_symbol` is not null. +/// +/// This creates a table that consists of pointers and length to each error name. +/// The table is what is being pointed to within the runtime bodies that are generated. +fn populateErrorNameTable(zig_object: *ZigObject, wasm_file: *Wasm) !void { + const symbol_index = zig_object.error_table_symbol orelse return; + const gpa = wasm_file.base.comp.gpa; + const atom_index = wasm_file.symbol_atom.get(.{ .file = null, .index = symbol_index }).?; + + // Rather than creating a symbol for each individual error name, + // we create a symbol for the entire region of error names. We then calculate + // the pointers into the list using addends which are appended to the relocation. + const names_sym_index = try zig_object.allocateSymbol(gpa); + const names_atom_index = try wasm_file.createAtom(names_sym_index); + const names_atom = wasm_file.getAtomPtr(names_atom_index); + names_atom.alignment = .@"1"; + const sym_name = try zig_object.string_table.insert(gpa, "__zig_err_names"); + const names_symbol = &zig_object.symbols.items[names_sym_index]; + names_symbol.* = .{ + .name = sym_name, + .tag = .data, + .flags = @intFromEnum(Symbol.Flag.WASM_SYM_BINDING_LOCAL), + .index = 0, + .virtual_address = undefined, + }; + names_symbol.mark(); + + log.debug("Populating error names", .{}); + + // Addend for each relocation to the table + var addend: u32 = 0; + const mod = wasm_file.base.comp.module.?; + for (mod.global_error_set.keys()) |error_name_nts| { + const atom = wasm_file.getAtomPtr(atom_index); + + const error_name = mod.intern_pool.stringToSlice(error_name_nts); + const len = @as(u32, @intCast(error_name.len + 1)); // names are 0-termianted + + const slice_ty = Type.slice_const_u8_sentinel_0; + const offset = @as(u32, @intCast(atom.code.items.len)); + // first we create the data for the slice of the name + try atom.code.appendNTimes(gpa, 0, 4); // ptr to name, will be relocated + try atom.code.writer(gpa).writeInt(u32, len - 1, .little); + // create relocation to the error name + try atom.relocs.append(gpa, .{ + .index = names_atom.sym_index, + .relocation_type = .R_WASM_MEMORY_ADDR_I32, + .offset = offset, + .addend = @as(i32, @intCast(addend)), + }); + atom.size += @as(u32, @intCast(slice_ty.abiSize(mod))); + addend += len; + + // as we updated the error name table, we now store the actual name within the names atom + try names_atom.code.ensureUnusedCapacity(gpa, len); + names_atom.code.appendSliceAssumeCapacity(error_name); + names_atom.code.appendAssumeCapacity(0); + + log.debug("Populated error name: '{s}'", .{error_name}); + } + names_atom.size = addend; + + // link the atoms with the rest of the binary so they can be allocated + // and relocations will be performed. + try wasm_file.parseAtom(atom_index, .{ .data = .read_only }); + try wasm_file.parseAtom(names_atom_index, .{ .data = .read_only }); +} + +/// Either creates a new import, or updates one if existing. +/// When `type_index` is non-null, we assume an external function. +/// In all other cases, a data-symbol will be created instead. +pub fn addOrUpdateImport( + zig_object: *ZigObject, + wasm_file: *Wasm, + /// Name of the import + name: []const u8, + /// Symbol index that is external + symbol_index: u32, + /// Optional library name (i.e. `extern "c" fn foo() void` + lib_name: ?[:0]const u8, + /// The index of the type that represents the function signature + /// when the extern is a function. When this is null, a data-symbol + /// is asserted instead. + type_index: ?u32, +) !void { + const gpa = wasm_file.base.comp.gpa; + std.debug.assert(symbol_index != 0); + // For the import name, we use the decl's name, rather than the fully qualified name + // Also mangle the name when the lib name is set and not equal to "C" so imports with the same + // name but different module can be resolved correctly. + const mangle_name = lib_name != null and + !std.mem.eql(u8, lib_name.?, "c"); + const full_name = if (mangle_name) full_name: { + break :full_name try std.fmt.allocPrint(gpa, "{s}|{s}", .{ name, lib_name.? }); + } else name; + defer if (mangle_name) gpa.free(full_name); + + const decl_name_index = try zig_object.string_table.insert(gpa, full_name); + const symbol: *Symbol = &zig_object.symbols.items[symbol_index]; + symbol.setUndefined(true); + symbol.setGlobal(true); + symbol.name = decl_name_index; + if (mangle_name) { + // we specified a specific name for the symbol that does not match the import name + symbol.setFlag(.WASM_SYM_EXPLICIT_NAME); + } + + if (type_index) |ty_index| { + const gop = try zig_object.imports.getOrPut(gpa, symbol_index); + const module_name = if (lib_name) |l_name| blk: { + break :blk l_name; + } else wasm_file.host_name; + if (!gop.found_existing) { + gop.value_ptr.* = .{ + .module_name = try zig_object.string_table.insert(gpa, module_name), + .name = try zig_object.string_table.insert(gpa, name), + .kind = .{ .function = ty_index }, + }; + zig_object.imported_functions_count += 1; + } + } +} + +/// Returns the symbol index from a symbol of which its flag is set global, +/// such as an exported or imported symbol. +/// If the symbol does not yet exist, creates a new one symbol instead +/// and then returns the index to it. +pub fn getGlobalSymbol(zig_object: *ZigObject, wasm_file: *Wasm, name: []const u8) !u32 { + const gpa = wasm_file.base.comp.gpa; + const name_index = try zig_object.string_table.insert(gpa, name); + const gop = try zig_object.global_syms.getOrPut(gpa, name_index); + if (gop.found_existing) { + return gop.value_ptr.index; + } + + var symbol: Symbol = .{ + .name = name_index, + .flags = 0, + .index = undefined, // index to type will be set after merging function symbols + .tag = .function, + .virtual_address = undefined, + }; + symbol.setGlobal(true); + symbol.setUndefined(true); + + const sym_index = if (zig_object.symbol.popOrNull()) |index| index else blk: { + const index: u32 = @intCast(zig_object.symbols.items.len); + try zig_object.symbols.ensureUnusedCapacity(gpa, 1); + zig_object.symbols.items.len += 1; + break :blk index; + }; + zig_object.symbols.items[sym_index] = symbol; + gop.value_ptr.* = .{ .index = sym_index, .file = null }; + return sym_index; +} + +/// For a given decl, find the given symbol index's atom, and create a relocation for the type. +/// Returns the given pointer address +pub fn getDeclVAddr( + zig_object: *ZigObject, + wasm_file: *Wasm, + decl_index: InternPool.DeclIndex, + reloc_info: link.File.RelocInfo, +) !u64 { + const target = wasm_file.base.comp.root_mod.resolved_target.result; + const gpa = wasm_file.base.comp.gpa; + const mod = wasm_file.base.comp.module.?; + const decl = mod.declPtr(decl_index); + + const target_atom_index = try zig_object.getOrCreateAtomForDecl(wasm_file, decl_index); + const target_symbol_index = wasm_file.getAtom(target_atom_index).sym_index; + + std.debug.assert(reloc_info.parent_atom_index != 0); + const atom_index = wasm_file.symbol_atom.get(.{ .file = null, .index = reloc_info.parent_atom_index }).?; + const atom = wasm_file.getAtomPtr(atom_index); + const is_wasm32 = target.cpu.arch == .wasm32; + if (decl.ty.zigTypeTag(mod) == .Fn) { + std.debug.assert(reloc_info.addend == 0); // addend not allowed for function relocations + try atom.relocs.append(gpa, .{ + .index = target_symbol_index, + .offset = @intCast(reloc_info.offset), + .relocation_type = if (is_wasm32) .R_WASM_TABLE_INDEX_I32 else .R_WASM_TABLE_INDEX_I64, + }); + } else { + try atom.relocs.append(gpa, .{ + .index = target_symbol_index, + .offset = @intCast(reloc_info.offset), + .relocation_type = if (is_wasm32) .R_WASM_MEMORY_ADDR_I32 else .R_WASM_MEMORY_ADDR_I64, + .addend = @intCast(reloc_info.addend), + }); + } + + // we do not know the final address at this point, + // as atom allocation will determine the address and relocations + // will calculate and rewrite this. Therefore, we simply return the symbol index + // that was targeted. + return target_symbol_index; +} + +pub fn getAnonDeclVAddr( + zig_object: *ZigObject, + wasm_file: *Wasm, + decl_val: InternPool.Index, + reloc_info: link.File.RelocInfo, +) !u64 { + const gpa = wasm_file.base.comp.gpa; + const target = wasm_file.base.comp.root_mod.resolved_target.result; + const atom_index = zig_object.anon_decls.get(decl_val).?; + const target_symbol_index = wasm_file.getAtom(atom_index).getSymbolIndex().?; + + const parent_atom_index = wasm_file.symbol_atom.get(.{ .file = null, .index = reloc_info.parent_atom_index }).?; + const parent_atom = wasm_file.getAtomPtr(parent_atom_index); + const is_wasm32 = target.cpu.arch == .wasm32; + const mod = wasm_file.base.comp.module.?; + const ty = Type.fromInterned(mod.intern_pool.typeOf(decl_val)); + if (ty.zigTypeTag(mod) == .Fn) { + std.debug.assert(reloc_info.addend == 0); // addend not allowed for function relocations + try parent_atom.relocs.append(gpa, .{ + .index = target_symbol_index, + .offset = @intCast(reloc_info.offset), + .relocation_type = if (is_wasm32) .R_WASM_TABLE_INDEX_I32 else .R_WASM_TABLE_INDEX_I64, + }); + } else { + try parent_atom.relocs.append(gpa, .{ + .index = target_symbol_index, + .offset = @intCast(reloc_info.offset), + .relocation_type = if (is_wasm32) .R_WASM_MEMORY_ADDR_I32 else .R_WASM_MEMORY_ADDR_I64, + .addend = @intCast(reloc_info.addend), + }); + } + + // we do not know the final address at this point, + // as atom allocation will determine the address and relocations + // will calculate and rewrite this. Therefore, we simply return the symbol index + // that was targeted. + return target_symbol_index; +} + +pub fn deleteDeclExport( + zig_object: *ZigObject, + wasm_file: *Wasm, + decl_index: InternPool.DeclIndex, +) void { + const atom_index = zig_object.decls.get(decl_index) orelse return; + const sym_index = wasm_file.getAtom(atom_index).sym_index; + const loc: Wasm.SymbolLoc = .{ .file = null, .index = sym_index }; + const symbol = loc.getSymbol(wasm_file); + std.debug.assert(zig_object.global_syms.remove(symbol.name)); +} + +pub fn updateExports( + zig_object: *ZigObject, + wasm_file: *Wasm, + mod: *Module, + exported: Module.Exported, + exports: []const *Module.Export, +) !void { + const decl_index = switch (exported) { + .decl_index => |i| i, + .value => |val| { + _ = val; + @panic("TODO: implement Wasm linker code for exporting a constant value"); + }, + }; + const decl = mod.declPtr(decl_index); + const atom_index = try zig_object.getOrCreateAtomForDecl(decl_index); + const atom = wasm_file.getAtom(atom_index); + const atom_sym = atom.symbolLoc().getSymbol(wasm_file).*; + const gpa = mod.gpa; + + for (exports) |exp| { + if (mod.intern_pool.stringToSliceUnwrap(exp.opts.section)) |section| { + try mod.failed_exports.putNoClobber(gpa, exp, try Module.ErrorMsg.create( + gpa, + decl.srcLoc(mod), + "Unimplemented: ExportOptions.section '{s}'", + .{section}, + )); + continue; + } + + const exported_decl_index = switch (exp.exported) { + .value => { + try mod.failed_exports.putNoClobber(gpa, exp, try Module.ErrorMsg.create( + gpa, + decl.srcLoc(mod), + "Unimplemented: exporting a named constant value", + .{}, + )); + continue; + }, + .decl_index => |i| i, + }; + const exported_atom_index = try zig_object.getOrCreateAtomForDecl(exported_decl_index); + const exported_atom = wasm_file.getAtom(exported_atom_index); + // const export_name = try zig_object.string_table.put(gpa, mod.intern_pool.stringToSlice(exp.opts.name)); + const sym_loc = exported_atom.symbolLoc(); + const symbol = sym_loc.getSymbol(wasm_file); + symbol.setGlobal(true); + symbol.setUndefined(false); + symbol.index = atom_sym.index; + symbol.tag = atom_sym.tag; + symbol.name = atom_sym.name; + + switch (exp.opts.linkage) { + .Internal => { + symbol.setFlag(.WASM_SYM_VISIBILITY_HIDDEN); + symbol.setFlag(.WASM_SYM_BINDING_WEAK); + }, + .Weak => { + symbol.setFlag(.WASM_SYM_BINDING_WEAK); + }, + .Strong => {}, // symbols are strong by default + .LinkOnce => { + try mod.failed_exports.putNoClobber(gpa, exp, try Module.ErrorMsg.create( + gpa, + decl.srcLoc(mod), + "Unimplemented: LinkOnce", + .{}, + )); + continue; + }, + } + + // TODO: Revisit this + // if (zig_object.global_syms.get(export_name)) |existing_loc| { + // if (existing_loc.index == atom.sym_index) continue; + // const existing_sym: Symbol = existing_loc.getSymbol(wasm_file).*; + + // if (!existing_sym.isUndefined()) blk: { + // if (symbol.isWeak()) { + // try wasm_file.discarded.put(gpa, existing_loc, sym_loc); + // continue; // to-be-exported symbol is weak, so we keep the existing symbol + // } + + // // new symbol is not weak while existing is, replace existing symbol + // if (existing_sym.isWeak()) { + // break :blk; + // } + // // When both the to-be-exported symbol and the already existing symbol + // // are strong symbols, we have a linker error. + // // In the other case we replace one with the other. + // try mod.failed_exports.put(gpa, exp, try Module.ErrorMsg.create( + // gpa, + // decl.srcLoc(mod), + // \\LinkError: symbol '{}' defined multiple times + // \\ first definition in '{s}' + // \\ next definition in '{s}' + // , + // .{ exp.opts.name.fmt(&mod.intern_pool), wasm_file.name, wasm_file.name }, + // )); + // continue; + // } + + // // in this case the existing symbol must be replaced either because it's weak or undefined. + // try wasm.discarded.put(gpa, existing_loc, sym_loc); + // _ = wasm.imports.remove(existing_loc); + // _ = wasm.undefs.swapRemove(existing_sym.name); + // } + + // // Ensure the symbol will be exported using the given name + // if (!mod.intern_pool.stringEqlSlice(exp.opts.name, sym_loc.getName(wasm))) { + // try wasm.export_names.put(gpa, sym_loc, export_name); + // } + + // try wasm.globals.put( + // gpa, + // export_name, + // sym_loc, + // ); + } +} + +pub fn freeDecl(zig_object: *ZigObject, wasm_file: *Wasm, decl_index: InternPool.DeclIndex) void { + const gpa = wasm_file.base.comp.gpa; + const mod = wasm_file.base.comp.module.?; + const decl = mod.declPtr(decl_index); + const atom_index = zig_object.decls.get(decl_index).?; + const atom = wasm_file.getAtomPtr(atom_index); + zig_object.symbols_free_list.append(gpa, atom.sym_index) catch {}; + _ = zig_object.decls.remove(decl_index); + zig_object.symbols.items[atom.sym_index].tag = .dead; + for (atom.locals.items) |local_atom_index| { + const local_atom = wasm_file.getAtom(local_atom_index); + const local_symbol = &zig_object.symbols.items[local_atom.sym_index]; + local_symbol.tag = .dead; // also for any local symbol + zig_object.symbols_free_list.append(gpa, local_atom.sym_index) catch {}; + std.denug.assert(wasm_file.symbol_atom.remove(local_atom.symbolLoc())); + } + + if (decl.isExtern(mod)) { + _ = zig_object.imports.remove(atom.getSymbolIndex().?); + } + _ = wasm_file.symbol_atom.remove(atom.symbolLoc()); + + // if (wasm.dwarf) |*dwarf| { + // dwarf.freeDecl(decl_index); + // } + + if (atom.next) |next_atom_index| { + const next_atom = wasm_file.getAtomPtr(next_atom_index); + next_atom.prev = atom.prev; + atom.next = null; + } + if (atom.prev) |prev_index| { + const prev_atom = wasm_file.getAtomPtr(prev_index); + prev_atom.next = atom.next; + atom.prev = null; + } +} + +pub fn getTypeIndex(zig_object: *const ZigObject, func_type: std.wasm.Type) ?u32 { + var index: u32 = 0; + while (index < zig_object.func_types.items.len) : (index += 1) { + if (zig_object.func_types.items[index].eql(func_type)) return index; + } + return null; +} + +/// Searches for a matching function signature. When no matching signature is found, +/// a new entry will be made. The value returned is the index of the type within `wasm.func_types`. +pub fn putOrGetFuncType(zig_object: *ZigObject, gpa: std.mem.Allocator, func_type: std.wasm.Type) !u32 { + if (zig_object.getTypeIndex(func_type)) |index| { + return index; + } + + // functype does not exist. + const index: u32 = @intCast(zig_object.func_types.items.len); + const params = try gpa.dupe(std.wasm.Valtype, func_type.params); + errdefer gpa.free(params); + const returns = try gpa.dupe(std.wasm.Valtype, func_type.returns); + errdefer gpa.free(returns); + try zig_object.func_types.append(gpa, .{ + .params = params, + .returns = returns, + }); + return index; +} + +/// Kind represents the type of an Atom, which is only +/// used to parse a decl into an Atom to define in which section +/// or segment it should be placed. +const Kind = union(enum) { + /// Represents the segment the data symbol should + /// be inserted into. + /// TODO: Add TLS segments + data: enum { + read_only, + uninitialized, + initialized, + }, + function: void, + + /// Returns the segment name the data kind represents. + /// Asserts `kind` has its active tag set to `data`. + fn segmentName(kind: Kind) []const u8 { + switch (kind.data) { + .read_only => return ".rodata.", + .uninitialized => return ".bss.", + .initialized => return ".data.", + } + } +}; + +/// Parses an Atom and inserts its metadata into the corresponding sections. +pub fn parseAtom(zig_object: *ZigObject, wasm_file: *Wasm, atom_index: Atom.Index, kind: Kind) !void { + // TODO: Revisit + _ = zig_object; + _ = wasm_file; + _ = atom_index; + _ = kind; + // const comp = wasm.base.comp; + // const gpa = comp.gpa; + // const shared_memory = comp.config.shared_memory; + // const import_memory = comp.config.import_memory; + // const atom = wasm.getAtomPtr(atom_index); + // const symbol = (SymbolLoc{ .file = null, .index = atom.sym_index }).getSymbol(wasm); + // const do_garbage_collect = wasm.base.gc_sections; + + // if (symbol.isDead() and do_garbage_collect) { + // // Prevent unreferenced symbols from being parsed. + // return; + // } + + // const final_index: u32 = switch (kind) { + // .function => result: { + // const index: u32 = @intCast(wasm.functions.count() + wasm.imported_functions_count); + // const type_index = wasm.atom_types.get(atom_index).?; + // try wasm.functions.putNoClobber( + // gpa, + // .{ .file = null, .index = index }, + // .{ .func = .{ .type_index = type_index }, .sym_index = atom.sym_index }, + // ); + // symbol.tag = .function; + // symbol.index = index; + + // if (wasm.code_section_index == null) { + // wasm.code_section_index = @intCast(wasm.segments.items.len); + // try wasm.segments.append(gpa, .{ + // .alignment = atom.alignment, + // .size = atom.size, + // .offset = 0, + // .flags = 0, + // }); + // } + + // break :result wasm.code_section_index.?; + // }, + // .data => result: { + // const segment_name = try std.mem.concat(gpa, u8, &.{ + // kind.segmentName(), + // wasm.string_table.get(symbol.name), + // }); + // errdefer gpa.free(segment_name); + // const segment_info: types.Segment = .{ + // .name = segment_name, + // .alignment = atom.alignment, + // .flags = 0, + // }; + // symbol.tag = .data; + + // // when creating an object file, or importing memory and the data belongs in the .bss segment + // // we set the entire region of it to zeroes. + // // We do not have to do this when exporting the memory (the default) because the runtime + // // will do it for us, and we do not emit the bss segment at all. + // if ((wasm.base.comp.config.output_mode == .Obj or import_memory) and kind.data == .uninitialized) { + // @memset(atom.code.items, 0); + // } + + // const should_merge = wasm.base.comp.config.output_mode != .Obj; + // const gop = try wasm.data_segments.getOrPut(gpa, segment_info.outputName(should_merge)); + // if (gop.found_existing) { + // const index = gop.value_ptr.*; + // wasm.segments.items[index].size += atom.size; + + // symbol.index = @intCast(wasm.segment_info.getIndex(index).?); + // // segment info already exists, so free its memory + // gpa.free(segment_name); + // break :result index; + // } else { + // const index: u32 = @intCast(wasm.segments.items.len); + // var flags: u32 = 0; + // if (shared_memory) { + // flags |= @intFromEnum(Segment.Flag.WASM_DATA_SEGMENT_IS_PASSIVE); + // } + // try wasm.segments.append(gpa, .{ + // .alignment = atom.alignment, + // .size = 0, + // .offset = 0, + // .flags = flags, + // }); + // gop.value_ptr.* = index; + + // const info_index: u32 = @intCast(wasm.segment_info.count()); + // try wasm.segment_info.put(gpa, index, segment_info); + // symbol.index = info_index; + // break :result index; + // } + // }, + // }; + + // const segment: *Segment = &wasm.segments.items[final_index]; + // segment.alignment = segment.alignment.max(atom.alignment); + + // try wasm.appendAtomAtIndex(final_index, atom_index); +} + +/// Generates an atom containing the global error set' size. +/// This will only be generated if the symbol exists. +fn setupErrorsLen(zig_object: *ZigObject, wasm_file: *Wasm) !void { + const gpa = wasm_file.base.comp.gpa; + const loc = zig_object.findGlobalSymbol("__zig_errors_len") orelse return; + + const errors_len = wasm_file.base.comp.module.?.global_error_set.count(); + // overwrite existing atom if it already exists (maybe the error set has increased) + // if not, allcoate a new atom. + const atom_index = if (wasm_file.symbol_atom.get(loc)) |index| blk: { + const atom = wasm_file.getAtomPtr(index); + if (atom.next) |next_atom_index| { + const next_atom = wasm_file.getAtomPtr(next_atom_index); + next_atom.prev = atom.prev; + atom.next = null; + } + if (atom.prev) |prev_index| { + const prev_atom = wasm_file.getAtomPtr(prev_index); + prev_atom.next = atom.next; + atom.prev = null; + } + atom.deinit(gpa); + break :blk index; + } else new_atom: { + const atom_index: Atom.Index = @intCast(wasm_file.managed_atoms.items.len); + try wasm_file.symbol_atom.put(gpa, loc, atom_index); + try wasm_file.managed_atoms.append(gpa, undefined); + break :new_atom atom_index; + }; + const atom = wasm_file.getAtomPtr(atom_index); + atom.* = Atom.empty; + atom.sym_index = loc.index; + atom.size = 2; + try atom.code.writer(gpa).writeInt(u16, @intCast(errors_len), .little); + + // try wasm.parseAtom(atom_index, .{ .data = .read_only }); +} + +const build_options = @import("build_options"); +const builtin = @import("builtin"); +const codegen = @import("../../codegen.zig"); +const link = @import("../../link.zig"); +const log = std.log.scoped(.zig_object); +const std = @import("std"); +const types = @import("types.zig"); + +const Air = @import("../../Air.zig"); +const Atom = @import("Atom.zig"); +const InternPool = @import("../../InternPool.zig"); +const Liveness = @import("../../Liveness.zig"); +const Module = @import("../../Module.zig"); +const StringTable = @import("../StringTable.zig"); +const Symbol = @import("Symbol.zig"); +const Type = @import("../../type.zig").Type; +const TypedValue = @import("../../TypedValue.zig"); +const Value = @import("../../value.zig").Value; +const Wasm = @import("../Wasm.zig"); +const ZigObject = @This(); From e54177e852e5674ff14a850586b7517697e52297 Mon Sep 17 00:00:00 2001 From: Luuk de Gram Date: Sun, 14 Jan 2024 15:36:28 +0100 Subject: [PATCH 02/21] wasm: move incremental Dwarf info into ZigObject --- src/arch/wasm/CodeGen.zig | 1 - src/link/Dwarf.zig | 166 +++-- src/link/Wasm.zig | 1207 ++++------------------------------- src/link/Wasm/ZigObject.zig | 115 ++++ 4 files changed, 326 insertions(+), 1163 deletions(-) diff --git a/src/arch/wasm/CodeGen.zig b/src/arch/wasm/CodeGen.zig index 3a2a9c2d06..a13c61f367 100644 --- a/src/arch/wasm/CodeGen.zig +++ b/src/arch/wasm/CodeGen.zig @@ -3191,7 +3191,6 @@ fn lowerDeclRefValue(func: *CodeGen, tv: TypedValue, decl_index: InternPool.Decl const target_sym_index = atom.sym_index; if (decl.ty.zigTypeTag(mod) == .Fn) { - try func.bin_file.addTableFunction(target_sym_index); return WValue{ .function_index = target_sym_index }; } else if (offset == 0) { return WValue{ .memory = target_sym_index }; diff --git a/src/link/Dwarf.zig b/src/link/Dwarf.zig index a9a6942299..a28926c1c0 100644 --- a/src/link/Dwarf.zig +++ b/src/link/Dwarf.zig @@ -1297,9 +1297,9 @@ pub fn commitDeclState( } }, .wasm => { - const wasm_file = self.bin_file.cast(File.Wasm).?; - const debug_line = wasm_file.getAtomPtr(wasm_file.debug_line_atom.?).code; - writeDbgLineNopsBuffered(debug_line.items, src_fn.off, 0, &.{}, src_fn.len); + // const wasm_file = self.bin_file.cast(File.Wasm).?; + // const debug_line = wasm_file.getAtomPtr(wasm_file.debug_line_atom.?).code; + // writeDbgLineNopsBuffered(debug_line.items, src_fn.off, 0, &.{}, src_fn.len); }, else => unreachable, } @@ -1390,26 +1390,26 @@ pub fn commitDeclState( }, .wasm => { - const wasm_file = self.bin_file.cast(File.Wasm).?; - const atom = wasm_file.getAtomPtr(wasm_file.debug_line_atom.?); - const debug_line = &atom.code; - const segment_size = debug_line.items.len; - if (needed_size != segment_size) { - log.debug(" needed size does not equal allocated size: {d}", .{needed_size}); - if (needed_size > segment_size) { - log.debug(" allocating {d} bytes for 'debug line' information", .{needed_size - segment_size}); - try debug_line.resize(self.allocator, needed_size); - @memset(debug_line.items[segment_size..], 0); - } - debug_line.items.len = needed_size; - } - writeDbgLineNopsBuffered( - debug_line.items, - src_fn.off, - prev_padding_size, - dbg_line_buffer.items, - next_padding_size, - ); + // const wasm_file = self.bin_file.cast(File.Wasm).?; + // const atom = wasm_file.getAtomPtr(wasm_file.debug_line_atom.?); + // const debug_line = &atom.code; + // const segment_size = debug_line.items.len; + // if (needed_size != segment_size) { + // log.debug(" needed size does not equal allocated size: {d}", .{needed_size}); + // if (needed_size > segment_size) { + // log.debug(" allocating {d} bytes for 'debug line' information", .{needed_size - segment_size}); + // try debug_line.resize(self.allocator, needed_size); + // @memset(debug_line.items[segment_size..], 0); + // } + // debug_line.items.len = needed_size; + // } + // writeDbgLineNopsBuffered( + // debug_line.items, + // src_fn.off, + // prev_padding_size, + // dbg_line_buffer.items, + // next_padding_size, + // ); }, else => unreachable, } @@ -1553,10 +1553,10 @@ fn updateDeclDebugInfoAllocation(self: *Dwarf, atom_index: Atom.Index, len: u32) } }, .wasm => { - const wasm_file = self.bin_file.cast(File.Wasm).?; - const debug_info_index = wasm_file.debug_info_atom.?; - const debug_info = &wasm_file.getAtomPtr(debug_info_index).code; - try writeDbgInfoNopsToArrayList(gpa, debug_info, atom.off, 0, &.{0}, atom.len, false); + // const wasm_file = self.bin_file.cast(File.Wasm).?; + // const debug_info_index = wasm_file.debug_info_atom.?; + // const debug_info = &wasm_file.getAtomPtr(debug_info_index).code; + // try writeDbgInfoNopsToArrayList(gpa, debug_info, atom.off, 0, &.{0}, atom.len, false); }, else => unreachable, } @@ -1594,7 +1594,6 @@ fn writeDeclDebugInfo(self: *Dwarf, atom_index: Atom.Index, dbg_info_buf: []cons // This logic is nearly identical to the logic above in `updateDecl` for // `SrcFn` and the line number programs. If you are editing this logic, you // probably need to edit that logic too. - const gpa = self.allocator; const atom = self.getAtom(.di_atom, atom_index); const last_decl_index = self.di_atom_last_index.?; @@ -1665,31 +1664,31 @@ fn writeDeclDebugInfo(self: *Dwarf, atom_index: Atom.Index, dbg_info_buf: []cons }, .wasm => { - const wasm_file = self.bin_file.cast(File.Wasm).?; - const info_atom = wasm_file.debug_info_atom.?; - const debug_info = &wasm_file.getAtomPtr(info_atom).code; - const segment_size = debug_info.items.len; - if (needed_size != segment_size) { - log.debug(" needed size does not equal allocated size: {d}", .{needed_size}); - if (needed_size > segment_size) { - log.debug(" allocating {d} bytes for 'debug info' information", .{needed_size - segment_size}); - try debug_info.resize(self.allocator, needed_size); - @memset(debug_info.items[segment_size..], 0); - } - debug_info.items.len = needed_size; - } - log.debug(" writeDbgInfoNopsToArrayList debug_info_len={d} offset={d} content_len={d} next_padding_size={d}", .{ - debug_info.items.len, atom.off, dbg_info_buf.len, next_padding_size, - }); - try writeDbgInfoNopsToArrayList( - gpa, - debug_info, - atom.off, - prev_padding_size, - dbg_info_buf, - next_padding_size, - trailing_zero, - ); + // const wasm_file = self.bin_file.cast(File.Wasm).?; + // const info_atom = wasm_file.debug_info_atom.?; + // const debug_info = &wasm_file.getAtomPtr(info_atom).code; + // const segment_size = debug_info.items.len; + // if (needed_size != segment_size) { + // log.debug(" needed size does not equal allocated size: {d}", .{needed_size}); + // if (needed_size > segment_size) { + // log.debug(" allocating {d} bytes for 'debug info' information", .{needed_size - segment_size}); + // try debug_info.resize(self.allocator, needed_size); + // @memset(debug_info.items[segment_size..], 0); + // } + // debug_info.items.len = needed_size; + // } + // log.debug(" writeDbgInfoNopsToArrayList debug_info_len={d} offset={d} content_len={d} next_padding_size={d}", .{ + // debug_info.items.len, atom.off, dbg_info_buf.len, next_padding_size, + // }); + // try writeDbgInfoNopsToArrayList( + // gpa, + // debug_info, + // atom.off, + // prev_padding_size, + // dbg_info_buf, + // next_padding_size, + // trailing_zero, + // ); }, else => unreachable, } @@ -1735,10 +1734,10 @@ pub fn updateDeclLineNumber(self: *Dwarf, mod: *Module, decl_index: InternPool.D } }, .wasm => { - const wasm_file = self.bin_file.cast(File.Wasm).?; - const offset = atom.off + self.getRelocDbgLineOff(); - const line_atom_index = wasm_file.debug_line_atom.?; - wasm_file.getAtomPtr(line_atom_index).code.items[offset..][0..data.len].* = data; + // const wasm_file = self.bin_file.cast(File.Wasm).?; + // const offset = atom.off + self.getRelocDbgLineOff(); + // const line_atom_index = wasm_file.debug_line_atom.?; + // wasm_file.getAtomPtr(line_atom_index).code.items[offset..][0..data.len].* = data; }, else => unreachable, } @@ -1803,7 +1802,6 @@ pub fn freeDecl(self: *Dwarf, decl_index: InternPool.DeclIndex) void { } pub fn writeDbgAbbrev(self: *Dwarf) !void { - const gpa = self.allocator; // These are LEB encoded but since the values are all less than 127 // we can simply append these bytes. // zig fmt: off @@ -1960,10 +1958,10 @@ pub fn writeDbgAbbrev(self: *Dwarf) !void { } }, .wasm => { - const wasm_file = self.bin_file.cast(File.Wasm).?; - const debug_abbrev = &wasm_file.getAtomPtr(wasm_file.debug_abbrev_atom.?).code; - try debug_abbrev.resize(gpa, needed_size); - debug_abbrev.items[0..abbrev_buf.len].* = abbrev_buf; + // const wasm_file = self.bin_file.cast(File.Wasm).?; + // const debug_abbrev = &wasm_file.getAtomPtr(wasm_file.debug_abbrev_atom.?).code; + // try debug_abbrev.resize(gpa, needed_size); + // debug_abbrev.items[0..abbrev_buf.len].* = abbrev_buf; }, else => unreachable, } @@ -2055,9 +2053,9 @@ pub fn writeDbgInfoHeader(self: *Dwarf, zcu: *Module, low_pc: u64, high_pc: u64) } }, .wasm => { - const wasm_file = self.bin_file.cast(File.Wasm).?; - const debug_info = &wasm_file.getAtomPtr(wasm_file.debug_info_atom.?).code; - try writeDbgInfoNopsToArrayList(self.allocator, debug_info, 0, 0, di_buf.items, jmp_amt, false); + // const wasm_file = self.bin_file.cast(File.Wasm).?; + // const debug_info = &wasm_file.getAtomPtr(wasm_file.debug_info_atom.?).code; + // try writeDbgInfoNopsToArrayList(self.allocator, debug_info, 0, 0, di_buf.items, jmp_amt, false); }, else => unreachable, } @@ -2318,7 +2316,6 @@ fn writeDbgInfoNopsToArrayList( pub fn writeDbgAranges(self: *Dwarf, addr: u64, size: u64) !void { const comp = self.bin_file.comp; - const gpa = comp.gpa; const target = comp.root_mod.resolved_target.result; const target_endian = target.cpu.arch.endian(); const ptr_width_bytes = self.ptrWidthBytes(); @@ -2391,10 +2388,10 @@ pub fn writeDbgAranges(self: *Dwarf, addr: u64, size: u64) !void { } }, .wasm => { - const wasm_file = self.bin_file.cast(File.Wasm).?; - const debug_ranges = &wasm_file.getAtomPtr(wasm_file.debug_ranges_atom.?).code; - try debug_ranges.resize(gpa, needed_size); - @memcpy(debug_ranges.items[0..di_buf.items.len], di_buf.items); + // const wasm_file = self.bin_file.cast(File.Wasm).?; + // const debug_ranges = &wasm_file.getAtomPtr(wasm_file.debug_ranges_atom.?).code; + // try debug_ranges.resize(gpa, needed_size); + // @memcpy(debug_ranges.items[0..di_buf.items.len], di_buf.items); }, else => unreachable, } @@ -2548,14 +2545,15 @@ pub fn writeDbgLineHeader(self: *Dwarf) !void { } }, .wasm => { - const wasm_file = self.bin_file.cast(File.Wasm).?; - const debug_line = &wasm_file.getAtomPtr(wasm_file.debug_line_atom.?).code; - { - const src = debug_line.items[first_fn.off..]; - @memcpy(buffer[0..src.len], src); - } - try debug_line.resize(self.allocator, debug_line.items.len + delta); - @memcpy(debug_line.items[first_fn.off + delta ..][0..buffer.len], buffer); + _ = &buffer; + // const wasm_file = self.bin_file.cast(File.Wasm).?; + // const debug_line = &wasm_file.getAtomPtr(wasm_file.debug_line_atom.?).code; + // { + // const src = debug_line.items[first_fn.off..]; + // @memcpy(buffer[0..src.len], src); + // } + // try debug_line.resize(self.allocator, debug_line.items.len + delta); + // @memcpy(debug_line.items[first_fn.off + delta ..][0..buffer.len], buffer); }, else => unreachable, } @@ -2604,9 +2602,9 @@ pub fn writeDbgLineHeader(self: *Dwarf) !void { } }, .wasm => { - const wasm_file = self.bin_file.cast(File.Wasm).?; - const debug_line = &wasm_file.getAtomPtr(wasm_file.debug_line_atom.?).code; - writeDbgLineNopsBuffered(debug_line.items, 0, 0, di_buf.items, jmp_amt); + // const wasm_file = self.bin_file.cast(File.Wasm).?; + // const debug_line = &wasm_file.getAtomPtr(wasm_file.debug_line_atom.?).code; + // writeDbgLineNopsBuffered(debug_line.items, 0, 0, di_buf.items, jmp_amt); }, else => unreachable, } @@ -2754,9 +2752,9 @@ pub fn flushModule(self: *Dwarf, module: *Module) !void { } }, .wasm => { - const wasm_file = self.bin_file.cast(File.Wasm).?; - const debug_info = wasm_file.getAtomPtr(wasm_file.debug_info_atom.?).code; - debug_info.items[atom.off + reloc.offset ..][0..buf.len].* = buf; + // const wasm_file = self.bin_file.cast(File.Wasm).?; + // const debug_info = wasm_file.getAtomPtr(wasm_file.debug_info_atom.?).code; + // debug_info.items[atom.off + reloc.offset ..][0..buf.len].* = buf; }, else => unreachable, } diff --git a/src/link/Wasm.zig b/src/link/Wasm.zig index ecb0ed7115..cdc019c4fc 100644 --- a/src/link/Wasm.zig +++ b/src/link/Wasm.zig @@ -37,12 +37,22 @@ pub const Relocation = types.Relocation; pub const base_tag: link.File.Tag = .wasm; base: link.File, +/// Symbol name of the entry function to export entry_name: ?[]const u8, +/// When true, will allow undefined symbols import_symbols: bool, +/// List of *global* symbol names to export to the host environment. export_symbol_names: []const []const u8, +/// When defined, sets the start of the data section. global_base: ?u64, +/// When defined, sets the initial memory size of the memory. initial_memory: ?u64, +/// When defined, sets the maximum memory size of the memory. max_memory: ?u64, +/// When true, will import the function table from the host environment. +import_table: bool, +/// When true, will export the function table to the host environment. +export_table: bool, /// Output name of the file name: []const u8, /// If this is not null, an object file is created by LLVM and linked with LLD afterwards. @@ -52,16 +62,8 @@ llvm_object: ?*LlvmObject = null, /// to support existing code. /// TODO: Allow setting this through a flag? host_name: []const u8 = "env", -/// List of all `Decl` that are currently alive. -/// Each index maps to the corresponding `Atom.Index`. -decls: std.AutoHashMapUnmanaged(InternPool.DeclIndex, Atom.Index) = .{}, -/// Mapping between an `Atom` and its type index representing the Wasm -/// type of the function signature. -atom_types: std.AutoHashMapUnmanaged(Atom.Index, u32) = .{}, /// List of all symbols generated by Zig code. -symbols: std.ArrayListUnmanaged(Symbol) = .{}, -/// List of symbol indexes which are free to be used. -symbols_free_list: std.ArrayListUnmanaged(u32) = .{}, +synthetic_symbols: std.ArrayListUnmanaged(Symbol) = .{}, /// Maps atoms to their segment index atoms: std.AutoHashMapUnmanaged(u32, Atom.Index) = .{}, /// List of all atoms. @@ -107,8 +109,6 @@ data_segments: std.StringArrayHashMapUnmanaged(u32) = .{}, segment_info: std.AutoArrayHashMapUnmanaged(u32, types.Segment) = .{}, /// Deduplicated string table for strings used by symbols, imports and exports. string_table: StringTable = .{}, -/// Debug information for wasm -dwarf: ?Dwarf = null, // Output sections /// Output type section @@ -170,36 +170,10 @@ symbol_atom: std.AutoHashMapUnmanaged(SymbolLoc, Atom.Index) = .{}, /// Note: The value represents the offset into the string table, rather than the actual string. export_names: std.AutoHashMapUnmanaged(SymbolLoc, u32) = .{}, -/// Represents the symbol index of the error name table -/// When this is `null`, no code references an error using runtime `@errorName`. -/// During initializion, a symbol with corresponding atom will be created that is -/// used to perform relocations to the pointer of this table. -/// The actual table is populated during `flush`. -error_table_symbol: ?u32 = null, - -// Debug section atoms. These are only set when the current compilation -// unit contains Zig code. The lifetime of these atoms are extended -// until the end of the compiler's lifetime. Meaning they're not freed -// during `flush()` in incremental-mode. -debug_info_atom: ?Atom.Index = null, -debug_line_atom: ?Atom.Index = null, -debug_loc_atom: ?Atom.Index = null, -debug_ranges_atom: ?Atom.Index = null, -debug_abbrev_atom: ?Atom.Index = null, -debug_str_atom: ?Atom.Index = null, -debug_pubnames_atom: ?Atom.Index = null, -debug_pubtypes_atom: ?Atom.Index = null, - /// List of atom indexes of functions that are generated by the backend, /// rather than by the linker. synthetic_functions: std.ArrayListUnmanaged(Atom.Index) = .{}, -/// Map for storing anonymous declarations. Each anonymous decl maps to its Atom's index. -anon_decls: std.AutoArrayHashMapUnmanaged(InternPool.Index, Atom.Index) = .{}, - -import_table: bool, -export_table: bool, - pub const Alignment = types.Alignment; pub const Segment = struct { @@ -238,27 +212,27 @@ pub const SymbolLoc = struct { file: ?u16, /// From a given location, returns the corresponding symbol in the wasm binary - pub fn getSymbol(loc: SymbolLoc, wasm_bin: *const Wasm) *Symbol { - if (wasm_bin.discarded.get(loc)) |new_loc| { - return new_loc.getSymbol(wasm_bin); + pub fn getSymbol(loc: SymbolLoc, wasm_file: *const Wasm) *Symbol { + if (wasm_file.discarded.get(loc)) |new_loc| { + return new_loc.getSymbol(wasm_file); } if (loc.file) |object_index| { - const object = wasm_bin.objects.items[object_index]; + const object = wasm_file.objects.items[object_index]; return &object.symtable[loc.index]; } - return &wasm_bin.symbols.items[loc.index]; + return &wasm_file.synthetic_symbols.items[loc.index]; } /// From a given location, returns the name of the symbol. - pub fn getName(loc: SymbolLoc, wasm_bin: *const Wasm) []const u8 { - if (wasm_bin.discarded.get(loc)) |new_loc| { - return new_loc.getName(wasm_bin); + pub fn getName(loc: SymbolLoc, wasm_file: *const Wasm) []const u8 { + if (wasm_file.discarded.get(loc)) |new_loc| { + return new_loc.getName(wasm_file); } if (loc.file) |object_index| { - const object = wasm_bin.objects.items[object_index]; + const object = wasm_file.objects.items[object_index]; return object.string_table.get(object.symtable[loc.index].name); } - return wasm_bin.string_table.get(wasm_bin.symbols.items[loc.index].name); + return wasm_file.string_table.get(wasm_file.synthetic_symbols.items[loc.index].name); } /// From a given symbol location, returns the final location. @@ -266,9 +240,9 @@ pub const SymbolLoc = struct { /// in a different file, this will return said location. /// If the symbol wasn't replaced by another, this will return /// the given location itwasm. - pub fn finalLoc(loc: SymbolLoc, wasm_bin: *const Wasm) SymbolLoc { - if (wasm_bin.discarded.get(loc)) |new_loc| { - return new_loc.finalLoc(wasm_bin); + pub fn finalLoc(loc: SymbolLoc, wasm_file: *const Wasm) SymbolLoc { + if (wasm_file.discarded.get(loc)) |new_loc| { + return new_loc.finalLoc(wasm_file); } return loc; } @@ -594,10 +568,10 @@ fn createSyntheticSymbol(wasm: *Wasm, name: []const u8, tag: Symbol.Tag) !Symbol } fn createSyntheticSymbolOffset(wasm: *Wasm, name_offset: u32, tag: Symbol.Tag) !SymbolLoc { - const sym_index = @as(u32, @intCast(wasm.symbols.items.len)); + const sym_index = @as(u32, @intCast(wasm.synthetic_symbols.items.len)); const loc: SymbolLoc = .{ .index = sym_index, .file = null }; const gpa = wasm.base.comp.gpa; - try wasm.symbols.append(gpa, .{ + try wasm.synthetic_symbols.append(gpa, .{ .name = name_offset, .flags = 0, .tag = tag, @@ -609,24 +583,6 @@ fn createSyntheticSymbolOffset(wasm: *Wasm, name_offset: u32, tag: Symbol.Tag) ! return loc; } -/// Initializes symbols and atoms for the debug sections -/// Initialization is only done when compiling Zig code. -/// When Zig is invoked as a linker instead, the atoms -/// and symbols come from the object files instead. -pub fn initDebugSections(wasm: *Wasm) !void { - if (wasm.dwarf == null) return; // not compiling Zig code, so no need to pre-initialize debug sections - assert(wasm.debug_info_index == null); - // this will create an Atom and set the index for us. - wasm.debug_info_atom = try wasm.createDebugSectionForIndex(&wasm.debug_info_index, ".debug_info"); - wasm.debug_line_atom = try wasm.createDebugSectionForIndex(&wasm.debug_line_index, ".debug_line"); - wasm.debug_loc_atom = try wasm.createDebugSectionForIndex(&wasm.debug_loc_index, ".debug_loc"); - wasm.debug_abbrev_atom = try wasm.createDebugSectionForIndex(&wasm.debug_abbrev_index, ".debug_abbrev"); - wasm.debug_ranges_atom = try wasm.createDebugSectionForIndex(&wasm.debug_ranges_index, ".debug_ranges"); - wasm.debug_str_atom = try wasm.createDebugSectionForIndex(&wasm.debug_str_index, ".debug_str"); - wasm.debug_pubnames_atom = try wasm.createDebugSectionForIndex(&wasm.debug_pubnames_index, ".debug_pubnames"); - wasm.debug_pubtypes_atom = try wasm.createDebugSectionForIndex(&wasm.debug_pubtypes_index, ".debug_pubtypes"); -} - fn parseInputFiles(wasm: *Wasm, files: []const []const u8) !void { for (files) |path| { if (try wasm.parseObjectFile(path)) continue; @@ -652,33 +608,14 @@ fn parseObjectFile(wasm: *Wasm, path: []const u8) !bool { return true; } -/// For a given `InternPool.DeclIndex` returns its corresponding `Atom.Index`. -/// When the index was not found, a new `Atom` will be created, and its index will be returned. -/// The newly created Atom is empty with default fields as specified by `Atom.empty`. -pub fn getOrCreateAtomForDecl(wasm: *Wasm, decl_index: InternPool.DeclIndex) !Atom.Index { - const gpa = wasm.base.comp.gpa; - const gop = try wasm.decls.getOrPut(gpa, decl_index); - if (!gop.found_existing) { - const atom_index = try wasm.createAtom(); - gop.value_ptr.* = atom_index; - const atom = wasm.getAtom(atom_index); - const symbol = atom.symbolLoc().getSymbol(wasm); - const mod = wasm.base.comp.module.?; - const decl = mod.declPtr(decl_index); - const full_name = mod.intern_pool.stringToSlice(try decl.fullyQualifiedName(mod)); - symbol.name = try wasm.string_table.put(gpa, full_name); - } - return gop.value_ptr.*; -} - /// Creates a new empty `Atom` and returns its `Atom.Index` -fn createAtom(wasm: *Wasm) !Atom.Index { +pub fn createAtom(wasm: *Wasm, sym_index: u32) !Atom.Index { const gpa = wasm.base.comp.gpa; const index: Atom.Index = @intCast(wasm.managed_atoms.items.len); const atom = try wasm.managed_atoms.addOne(gpa); atom.* = Atom.empty; - atom.sym_index = try wasm.allocateSymbol(); - try wasm.symbol_atom.putNoClobber(gpa, .{ .file = null, .index = atom.sym_index }, index); + atom.sym_index = sym_index; + try wasm.symbol_atom.putNoClobber(gpa, .{ .file = null, .index = sym_index }, index); return index; } @@ -1386,40 +1323,12 @@ pub fn deinit(wasm: *Wasm) void { archive.deinit(gpa); } - // For decls and anon decls we free the memory of its atoms. - // The memory of atoms parsed from object files is managed by - // the object file itself, and therefore we can skip those. - { - var it = wasm.decls.valueIterator(); - while (it.next()) |atom_index_ptr| { - const atom = wasm.getAtomPtr(atom_index_ptr.*); - for (atom.locals.items) |local_index| { - const local_atom = wasm.getAtomPtr(local_index); - local_atom.deinit(gpa); - } - atom.deinit(gpa); - } - } - { - for (wasm.anon_decls.values()) |atom_index| { - const atom = wasm.getAtomPtr(atom_index); - for (atom.locals.items) |local_index| { - const local_atom = wasm.getAtomPtr(local_index); - local_atom.deinit(gpa); - } - atom.deinit(gpa); - } - } for (wasm.synthetic_functions.items) |atom_index| { const atom = wasm.getAtomPtr(atom_index); atom.deinit(gpa); } - wasm.decls.deinit(gpa); - wasm.anon_decls.deinit(gpa); - wasm.atom_types.deinit(gpa); - wasm.symbols.deinit(gpa); - wasm.symbols_free_list.deinit(gpa); + wasm.synthetic_symbols.deinit(gpa); wasm.globals.deinit(gpa); wasm.resolved_symbols.deinit(gpa); wasm.undefs.deinit(gpa); @@ -1446,32 +1355,6 @@ pub fn deinit(wasm: *Wasm) void { wasm.string_table.deinit(gpa); wasm.synthetic_functions.deinit(gpa); - - if (wasm.dwarf) |*dwarf| { - dwarf.deinit(); - } -} - -/// Allocates a new symbol and returns its index. -/// Will re-use slots when a symbol was freed at an earlier stage. -pub fn allocateSymbol(wasm: *Wasm) !u32 { - const gpa = wasm.base.comp.gpa; - - try wasm.symbols.ensureUnusedCapacity(gpa, 1); - const symbol: Symbol = .{ - .name = std.math.maxInt(u32), // will be set after updateDecl as well as during atom creation for decls - .flags = @intFromEnum(Symbol.Flag.WASM_SYM_BINDING_LOCAL), - .tag = .undefined, // will be set after updateDecl - .index = std.math.maxInt(u32), // will be set during atom parsing - .virtual_address = std.math.maxInt(u32), // will be set during atom allocation - }; - if (wasm.symbols_free_list.popOrNull()) |index| { - wasm.symbols.items[index] = symbol; - return index; - } - const index = @as(u32, @intCast(wasm.symbols.items.len)); - wasm.symbols.appendAssumeCapacity(symbol); - return index; } pub fn updateFunc(wasm: *Wasm, mod: *Module, func_index: InternPool.Index, air: Air, liveness: Liveness) !void { @@ -1546,84 +1429,12 @@ pub fn updateDecl(wasm: *Wasm, mod: *Module, decl_index: InternPool.DeclIndex) ! @panic("Attempted to compile for object format that was disabled by build configuration"); } if (wasm.llvm_object) |llvm_object| return llvm_object.updateDecl(mod, decl_index); - - const tracy = trace(@src()); - defer tracy.end(); - - const decl = mod.declPtr(decl_index); - if (decl.val.getFunction(mod)) |_| { - return; - } else if (decl.val.getExternFunc(mod)) |_| { - return; - } - - const gpa = wasm.base.comp.gpa; - const atom_index = try wasm.getOrCreateAtomForDecl(decl_index); - const atom = wasm.getAtomPtr(atom_index); - atom.clear(); - - if (decl.isExtern(mod)) { - const variable = decl.getOwnedVariable(mod).?; - const name = mod.intern_pool.stringToSlice(decl.name); - const lib_name = mod.intern_pool.stringToSliceUnwrap(variable.lib_name); - return wasm.addOrUpdateImport(name, atom.sym_index, lib_name, null); - } - const val = if (decl.val.getVariable(mod)) |variable| Value.fromInterned(variable.init) else decl.val; - - var code_writer = std.ArrayList(u8).init(gpa); - defer code_writer.deinit(); - - const res = try codegen.generateSymbol( - &wasm.base, - decl.srcLoc(mod), - .{ .ty = decl.ty, .val = val }, - &code_writer, - .none, - .{ .parent_atom_index = atom.sym_index }, - ); - - const code = switch (res) { - .ok => code_writer.items, - .fail => |em| { - decl.analysis = .codegen_failure; - try mod.failed_decls.put(mod.gpa, decl_index, em); - return; - }, - }; - - return wasm.finishUpdateDecl(decl_index, code, .data); } pub fn updateDeclLineNumber(wasm: *Wasm, mod: *Module, decl_index: InternPool.DeclIndex) !void { if (wasm.llvm_object) |_| return; - if (wasm.dwarf) |*dw| { - const tracy = trace(@src()); - defer tracy.end(); - - const decl = mod.declPtr(decl_index); - const decl_name = mod.intern_pool.stringToSlice(try decl.fullyQualifiedName(mod)); - - log.debug("updateDeclLineNumber {s}{*}", .{ decl_name, decl }); - try dw.updateDeclLineNumber(mod, decl_index); - } -} - -fn finishUpdateDecl(wasm: *Wasm, decl_index: InternPool.DeclIndex, code: []const u8, symbol_tag: Symbol.Tag) !void { - const gpa = wasm.base.comp.gpa; - const mod = wasm.base.comp.module.?; - const decl = mod.declPtr(decl_index); - const atom_index = wasm.decls.get(decl_index).?; - const atom = wasm.getAtomPtr(atom_index); - const symbol = &wasm.symbols.items[atom.sym_index]; - const full_name = mod.intern_pool.stringToSlice(try decl.fullyQualifiedName(mod)); - symbol.name = try wasm.string_table.put(gpa, full_name); - symbol.tag = symbol_tag; - try atom.code.appendSlice(gpa, code); - try wasm.resolved_symbols.put(gpa, atom.symbolLoc(), {}); - - atom.size = @intCast(code.len); - if (code.len == 0) return; - atom.alignment = decl.getAlignment(mod); + _ = mod; + _ = decl_index; } /// From a given symbol location, returns its `wasm.GlobalType`. @@ -1673,82 +1484,9 @@ fn getFunctionSignature(wasm: *const Wasm, loc: SymbolLoc) std.wasm.Type { /// Returns the symbol index of the local /// The given `decl` is the parent decl whom owns the constant. pub fn lowerUnnamedConst(wasm: *Wasm, tv: TypedValue, decl_index: InternPool.DeclIndex) !u32 { - const gpa = wasm.base.comp.gpa; - const mod = wasm.base.comp.module.?; - assert(tv.ty.zigTypeTag(mod) != .Fn); // cannot create local symbols for functions - const decl = mod.declPtr(decl_index); - - const parent_atom_index = try wasm.getOrCreateAtomForDecl(decl_index); - const parent_atom = wasm.getAtom(parent_atom_index); - const local_index = parent_atom.locals.items.len; - const fqn = mod.intern_pool.stringToSlice(try decl.fullyQualifiedName(mod)); - const name = try std.fmt.allocPrintZ(gpa, "__unnamed_{s}_{d}", .{ - fqn, local_index, - }); - defer gpa.free(name); - - switch (try wasm.lowerConst(name, tv, decl.srcLoc(mod))) { - .ok => |atom_index| { - try wasm.getAtomPtr(parent_atom_index).locals.append(gpa, atom_index); - return wasm.getAtom(atom_index).getSymbolIndex().?; - }, - .fail => |em| { - decl.analysis = .codegen_failure; - try mod.failed_decls.put(mod.gpa, decl_index, em); - return error.CodegenFail; - }, - } -} - -const LowerConstResult = union(enum) { - ok: Atom.Index, - fail: *Module.ErrorMsg, -}; - -fn lowerConst(wasm: *Wasm, name: []const u8, tv: TypedValue, src_loc: Module.SrcLoc) !LowerConstResult { - const gpa = wasm.base.comp.gpa; - const mod = wasm.base.comp.module.?; - - // Create and initialize a new local symbol and atom - const atom_index = try wasm.createAtom(); - var value_bytes = std.ArrayList(u8).init(gpa); - defer value_bytes.deinit(); - - const code = code: { - const atom = wasm.getAtomPtr(atom_index); - atom.alignment = tv.ty.abiAlignment(mod); - wasm.symbols.items[atom.sym_index] = .{ - .name = try wasm.string_table.put(gpa, name), - .flags = @intFromEnum(Symbol.Flag.WASM_SYM_BINDING_LOCAL), - .tag = .data, - .index = undefined, - .virtual_address = undefined, - }; - try wasm.resolved_symbols.putNoClobber(gpa, atom.symbolLoc(), {}); - - const result = try codegen.generateSymbol( - &wasm.base, - src_loc, - tv, - &value_bytes, - .none, - .{ - .parent_atom_index = atom.sym_index, - .addend = null, - }, - ); - break :code switch (result) { - .ok => value_bytes.items, - .fail => |em| { - return .{ .fail = em }; - }, - }; - }; - - const atom = wasm.getAtomPtr(atom_index); - atom.size = @intCast(code.len); - try atom.code.appendSlice(gpa, code); - return .{ .ok = atom_index }; + _ = wasm; + _ = tv; + _ = decl_index; } /// Returns the symbol index from a symbol of which its flag is set global, @@ -1757,34 +1495,8 @@ fn lowerConst(wasm: *Wasm, name: []const u8, tv: TypedValue, src_loc: Module.Src /// and then returns the index to it. pub fn getGlobalSymbol(wasm: *Wasm, name: []const u8, lib_name: ?[]const u8) !u32 { _ = lib_name; - const gpa = wasm.base.comp.gpa; - const name_index = try wasm.string_table.put(gpa, name); - const gop = try wasm.globals.getOrPut(gpa, name_index); - if (gop.found_existing) { - return gop.value_ptr.*.index; - } - - var symbol: Symbol = .{ - .name = name_index, - .flags = 0, - .index = undefined, // index to type will be set after merging function symbols - .tag = .function, - .virtual_address = undefined, - }; - symbol.setGlobal(true); - symbol.setUndefined(true); - - const sym_index = if (wasm.symbols_free_list.popOrNull()) |index| index else blk: { - const index: u32 = @intCast(wasm.symbols.items.len); - try wasm.symbols.ensureUnusedCapacity(gpa, 1); - wasm.symbols.items.len += 1; - break :blk index; - }; - wasm.symbols.items[sym_index] = symbol; - gop.value_ptr.* = .{ .index = sym_index, .file = null }; - try wasm.resolved_symbols.put(gpa, gop.value_ptr.*, {}); - try wasm.undefs.putNoClobber(gpa, name_index, gop.value_ptr.*); - return sym_index; + _ = name; + _ = wasm; } /// For a given decl, find the given symbol index's atom, and create a relocation for the type. @@ -1794,42 +1506,9 @@ pub fn getDeclVAddr( decl_index: InternPool.DeclIndex, reloc_info: link.File.RelocInfo, ) !u64 { - const target = wasm.base.comp.root_mod.resolved_target.result; - const gpa = wasm.base.comp.gpa; - const mod = wasm.base.comp.module.?; - const decl = mod.declPtr(decl_index); - - const target_atom_index = try wasm.getOrCreateAtomForDecl(decl_index); - const target_symbol_index = wasm.getAtom(target_atom_index).sym_index; - - assert(reloc_info.parent_atom_index != 0); - const atom_index = wasm.symbol_atom.get(.{ .file = null, .index = reloc_info.parent_atom_index }).?; - const atom = wasm.getAtomPtr(atom_index); - const is_wasm32 = target.cpu.arch == .wasm32; - if (decl.ty.zigTypeTag(mod) == .Fn) { - assert(reloc_info.addend == 0); // addend not allowed for function relocations - // We found a function pointer, so add it to our table, - // as function pointers are not allowed to be stored inside the data section. - // They are instead stored in a function table which are called by index. - try wasm.addTableFunction(target_symbol_index); - try atom.relocs.append(gpa, .{ - .index = target_symbol_index, - .offset = @intCast(reloc_info.offset), - .relocation_type = if (is_wasm32) .R_WASM_TABLE_INDEX_I32 else .R_WASM_TABLE_INDEX_I64, - }); - } else { - try atom.relocs.append(gpa, .{ - .index = target_symbol_index, - .offset = @intCast(reloc_info.offset), - .relocation_type = if (is_wasm32) .R_WASM_MEMORY_ADDR_I32 else .R_WASM_MEMORY_ADDR_I64, - .addend = @intCast(reloc_info.addend), - }); - } - // we do not know the final address at this point, - // as atom allocation will determine the address and relocations - // will calculate and rewrite this. Therefore, we simply return the symbol index - // that was targeted. - return target_symbol_index; + _ = wasm; + _ = decl_index; + _ = reloc_info; } pub fn lowerAnonDecl( @@ -1838,70 +1517,16 @@ pub fn lowerAnonDecl( explicit_alignment: Alignment, src_loc: Module.SrcLoc, ) !codegen.Result { - const gpa = wasm.base.comp.gpa; - const gop = try wasm.anon_decls.getOrPut(gpa, decl_val); - if (!gop.found_existing) { - const mod = wasm.base.comp.module.?; - const ty = Type.fromInterned(mod.intern_pool.typeOf(decl_val)); - const tv: TypedValue = .{ .ty = ty, .val = Value.fromInterned(decl_val) }; - var name_buf: [32]u8 = undefined; - const name = std.fmt.bufPrint(&name_buf, "__anon_{d}", .{ - @intFromEnum(decl_val), - }) catch unreachable; - - switch (try wasm.lowerConst(name, tv, src_loc)) { - .ok => |atom_index| wasm.anon_decls.values()[gop.index] = atom_index, - .fail => |em| return .{ .fail = em }, - } - } - - const atom = wasm.getAtomPtr(wasm.anon_decls.values()[gop.index]); - atom.alignment = switch (atom.alignment) { - .none => explicit_alignment, - else => switch (explicit_alignment) { - .none => atom.alignment, - else => atom.alignment.maxStrict(explicit_alignment), - }, - }; - return .ok; + _ = wasm; + _ = decl_val; + _ = explicit_alignment; + _ = src_loc; } pub fn getAnonDeclVAddr(wasm: *Wasm, decl_val: InternPool.Index, reloc_info: link.File.RelocInfo) !u64 { - const gpa = wasm.base.comp.gpa; - const target = wasm.base.comp.root_mod.resolved_target.result; - const atom_index = wasm.anon_decls.get(decl_val).?; - const target_symbol_index = wasm.getAtom(atom_index).getSymbolIndex().?; - - const parent_atom_index = wasm.symbol_atom.get(.{ .file = null, .index = reloc_info.parent_atom_index }).?; - const parent_atom = wasm.getAtomPtr(parent_atom_index); - const is_wasm32 = target.cpu.arch == .wasm32; - const mod = wasm.base.comp.module.?; - const ty = Type.fromInterned(mod.intern_pool.typeOf(decl_val)); - if (ty.zigTypeTag(mod) == .Fn) { - assert(reloc_info.addend == 0); // addend not allowed for function relocations - // We found a function pointer, so add it to our table, - // as function pointers are not allowed to be stored inside the data section. - // They are instead stored in a function table which are called by index. - try wasm.addTableFunction(target_symbol_index); - try parent_atom.relocs.append(gpa, .{ - .index = target_symbol_index, - .offset = @intCast(reloc_info.offset), - .relocation_type = if (is_wasm32) .R_WASM_TABLE_INDEX_I32 else .R_WASM_TABLE_INDEX_I64, - }); - } else { - try parent_atom.relocs.append(gpa, .{ - .index = target_symbol_index, - .offset = @intCast(reloc_info.offset), - .relocation_type = if (is_wasm32) .R_WASM_MEMORY_ADDR_I32 else .R_WASM_MEMORY_ADDR_I64, - .addend = @intCast(reloc_info.addend), - }); - } - - // we do not know the final address at this point, - // as atom allocation will determine the address and relocations - // will calculate and rewrite this. Therefore, we simply return the symbol index - // that was targeted. - return target_symbol_index; + _ = wasm; + _ = decl_val; + _ = reloc_info; } pub fn deleteDeclExport( @@ -1909,19 +1534,9 @@ pub fn deleteDeclExport( decl_index: InternPool.DeclIndex, name: InternPool.NullTerminatedString, ) void { - _ = name; if (wasm.llvm_object) |_| return; - const atom_index = wasm.decls.get(decl_index) orelse return; - const sym_index = wasm.getAtom(atom_index).sym_index; - const loc: SymbolLoc = .{ .file = null, .index = sym_index }; - const symbol = loc.getSymbol(wasm); - const symbol_name = wasm.string_table.get(symbol.name); - log.debug("Deleting export for decl '{s}'", .{symbol_name}); - if (wasm.export_names.fetchRemove(loc)) |kv| { - assert(wasm.globals.remove(kv.value)); - } else { - assert(wasm.globals.remove(symbol.name)); - } + _ = name; + _ = decl_index; } pub fn updateExports( @@ -1934,159 +1549,10 @@ pub fn updateExports( @panic("Attempted to compile for object format that was disabled by build configuration"); } if (wasm.llvm_object) |llvm_object| return llvm_object.updateExports(mod, exported, exports); - - const decl_index = switch (exported) { - .decl_index => |i| i, - .value => |val| { - _ = val; - @panic("TODO: implement Wasm linker code for exporting a constant value"); - }, - }; - const decl = mod.declPtr(decl_index); - const atom_index = try wasm.getOrCreateAtomForDecl(decl_index); - const atom = wasm.getAtom(atom_index); - const atom_sym = atom.symbolLoc().getSymbol(wasm).*; - const gpa = mod.gpa; - - for (exports) |exp| { - if (mod.intern_pool.stringToSliceUnwrap(exp.opts.section)) |section| { - try mod.failed_exports.putNoClobber(gpa, exp, try Module.ErrorMsg.create( - gpa, - decl.srcLoc(mod), - "Unimplemented: ExportOptions.section '{s}'", - .{section}, - )); - continue; - } - - const exported_decl_index = switch (exp.exported) { - .value => { - try mod.failed_exports.putNoClobber(gpa, exp, try Module.ErrorMsg.create( - gpa, - decl.srcLoc(mod), - "Unimplemented: exporting a named constant value", - .{}, - )); - continue; - }, - .decl_index => |i| i, - }; - const exported_atom_index = try wasm.getOrCreateAtomForDecl(exported_decl_index); - const exported_atom = wasm.getAtom(exported_atom_index); - const export_name = try wasm.string_table.put(gpa, mod.intern_pool.stringToSlice(exp.opts.name)); - const sym_loc = exported_atom.symbolLoc(); - const symbol = sym_loc.getSymbol(wasm); - symbol.setGlobal(true); - symbol.setUndefined(false); - symbol.index = atom_sym.index; - symbol.tag = atom_sym.tag; - symbol.name = atom_sym.name; - - switch (exp.opts.linkage) { - .Internal => { - symbol.setFlag(.WASM_SYM_VISIBILITY_HIDDEN); - symbol.setFlag(.WASM_SYM_BINDING_WEAK); - }, - .Weak => { - symbol.setFlag(.WASM_SYM_BINDING_WEAK); - }, - .Strong => {}, // symbols are strong by default - .LinkOnce => { - try mod.failed_exports.putNoClobber(gpa, exp, try Module.ErrorMsg.create( - gpa, - decl.srcLoc(mod), - "Unimplemented: LinkOnce", - .{}, - )); - continue; - }, - } - - if (wasm.globals.get(export_name)) |existing_loc| { - if (existing_loc.index == atom.sym_index) continue; - const existing_sym: Symbol = existing_loc.getSymbol(wasm).*; - - if (!existing_sym.isUndefined()) blk: { - if (symbol.isWeak()) { - try wasm.discarded.put(gpa, existing_loc, sym_loc); - continue; // to-be-exported symbol is weak, so we keep the existing symbol - } - - // new symbol is not weak while existing is, replace existing symbol - if (existing_sym.isWeak()) { - break :blk; - } - // When both the to-be-exported symbol and the already existing symbol - // are strong symbols, we have a linker error. - // In the other case we replace one with the other. - try mod.failed_exports.put(gpa, exp, try Module.ErrorMsg.create( - gpa, - decl.srcLoc(mod), - \\LinkError: symbol '{}' defined multiple times - \\ first definition in '{s}' - \\ next definition in '{s}' - , - .{ exp.opts.name.fmt(&mod.intern_pool), wasm.name, wasm.name }, - )); - continue; - } - - // in this case the existing symbol must be replaced either because it's weak or undefined. - try wasm.discarded.put(gpa, existing_loc, sym_loc); - _ = wasm.imports.remove(existing_loc); - _ = wasm.undefs.swapRemove(existing_sym.name); - } - - // Ensure the symbol will be exported using the given name - if (!mod.intern_pool.stringEqlSlice(exp.opts.name, sym_loc.getName(wasm))) { - try wasm.export_names.put(gpa, sym_loc, export_name); - } - - try wasm.globals.put( - gpa, - export_name, - sym_loc, - ); - } } pub fn freeDecl(wasm: *Wasm, decl_index: InternPool.DeclIndex) void { if (wasm.llvm_object) |llvm_object| return llvm_object.freeDecl(decl_index); - const gpa = wasm.base.comp.gpa; - const mod = wasm.base.comp.module.?; - const decl = mod.declPtr(decl_index); - const atom_index = wasm.decls.get(decl_index).?; - const atom = wasm.getAtomPtr(atom_index); - atom.prev = null; - wasm.symbols_free_list.append(gpa, atom.sym_index) catch {}; - _ = wasm.decls.remove(decl_index); - wasm.symbols.items[atom.sym_index].tag = .dead; - for (atom.locals.items) |local_atom_index| { - const local_atom = wasm.getAtom(local_atom_index); - const local_symbol = &wasm.symbols.items[local_atom.sym_index]; - local_symbol.tag = .dead; // also for any local symbol - wasm.symbols_free_list.append(gpa, local_atom.sym_index) catch {}; - assert(wasm.resolved_symbols.swapRemove(local_atom.symbolLoc())); - assert(wasm.symbol_atom.remove(local_atom.symbolLoc())); - } - - if (decl.isExtern(mod)) { - _ = wasm.imports.remove(atom.symbolLoc()); - } - _ = wasm.resolved_symbols.swapRemove(atom.symbolLoc()); - _ = wasm.symbol_atom.remove(atom.symbolLoc()); - - // if (wasm.dwarf) |*dwarf| { - // dwarf.freeDecl(decl_index); - // } - -} - -/// Appends a new entry to the indirect function table -pub fn addTableFunction(wasm: *Wasm, symbol_index: u32) !void { - const gpa = wasm.base.comp.gpa; - const index: u32 = @intCast(wasm.function_table.count()); - try wasm.function_table.put(gpa, .{ .file = null, .index = symbol_index }, index); } /// Assigns indexes to all indirect functions. @@ -2118,203 +1584,6 @@ fn mapFunctionTable(wasm: *Wasm) void { } } -/// Either creates a new import, or updates one if existing. -/// When `type_index` is non-null, we assume an external function. -/// In all other cases, a data-symbol will be created instead. -pub fn addOrUpdateImport( - wasm: *Wasm, - /// Name of the import - name: []const u8, - /// Symbol index that is external - symbol_index: u32, - /// Optional library name (i.e. `extern "c" fn foo() void` - lib_name: ?[:0]const u8, - /// The index of the type that represents the function signature - /// when the extern is a function. When this is null, a data-symbol - /// is asserted instead. - type_index: ?u32, -) !void { - const gpa = wasm.base.comp.gpa; - assert(symbol_index != 0); - // For the import name, we use the decl's name, rather than the fully qualified name - // Also mangle the name when the lib name is set and not equal to "C" so imports with the same - // name but different module can be resolved correctly. - const mangle_name = lib_name != null and - !std.mem.eql(u8, lib_name.?, "c"); - const full_name = if (mangle_name) full_name: { - break :full_name try std.fmt.allocPrint(gpa, "{s}|{s}", .{ name, lib_name.? }); - } else name; - defer if (mangle_name) gpa.free(full_name); - - const decl_name_index = try wasm.string_table.put(gpa, full_name); - const symbol: *Symbol = &wasm.symbols.items[symbol_index]; - symbol.setUndefined(true); - symbol.setGlobal(true); - symbol.name = decl_name_index; - if (mangle_name) { - // we specified a specific name for the symbol that does not match the import name - symbol.setFlag(.WASM_SYM_EXPLICIT_NAME); - } - const global_gop = try wasm.globals.getOrPut(gpa, decl_name_index); - if (!global_gop.found_existing) { - const loc: SymbolLoc = .{ .file = null, .index = symbol_index }; - global_gop.value_ptr.* = loc; - try wasm.resolved_symbols.put(gpa, loc, {}); - try wasm.undefs.putNoClobber(gpa, decl_name_index, loc); - } else if (global_gop.value_ptr.*.index != symbol_index) { - // We are not updating a symbol, but found an existing global - // symbol with the same name. This means we always favor the - // existing symbol, regardless whether it's defined or not. - // We can also skip storing the import as we will not output - // this symbol. - return wasm.discarded.put( - gpa, - .{ .file = null, .index = symbol_index }, - global_gop.value_ptr.*, - ); - } - - if (type_index) |ty_index| { - const gop = try wasm.imports.getOrPut(gpa, .{ .index = symbol_index, .file = null }); - const module_name = if (lib_name) |l_name| blk: { - break :blk l_name; - } else wasm.host_name; - if (!gop.found_existing) { - gop.value_ptr.* = .{ - .module_name = try wasm.string_table.put(gpa, module_name), - .name = try wasm.string_table.put(gpa, name), - .kind = .{ .function = ty_index }, - }; - } - } else { - // non-functions will not be imported from the runtime, but only resolved during link-time - symbol.tag = .data; - } -} - -/// Kind represents the type of an Atom, which is only -/// used to parse a decl into an Atom to define in which section -/// or segment it should be placed. -const Kind = union(enum) { - /// Represents the segment the data symbol should - /// be inserted into. - /// TODO: Add TLS segments - data: enum { - read_only, - uninitialized, - initialized, - }, - function: void, - - /// Returns the segment name the data kind represents. - /// Asserts `kind` has its active tag set to `data`. - fn segmentName(kind: Kind) []const u8 { - switch (kind.data) { - .read_only => return ".rodata.", - .uninitialized => return ".bss.", - .initialized => return ".data.", - } - } -}; - -/// Parses an Atom and inserts its metadata into the corresponding sections. -fn parseAtom(wasm: *Wasm, atom_index: Atom.Index, kind: Kind) !void { - const comp = wasm.base.comp; - const gpa = comp.gpa; - const shared_memory = comp.config.shared_memory; - const import_memory = comp.config.import_memory; - const atom = wasm.getAtomPtr(atom_index); - const symbol = (SymbolLoc{ .file = null, .index = atom.sym_index }).getSymbol(wasm); - const do_garbage_collect = wasm.base.gc_sections; - - if (symbol.isDead() and do_garbage_collect) { - // Prevent unreferenced symbols from being parsed. - return; - } - - const final_index: u32 = switch (kind) { - .function => result: { - const index: u32 = @intCast(wasm.functions.count() + wasm.imported_functions_count); - const type_index = wasm.atom_types.get(atom_index).?; - try wasm.functions.putNoClobber( - gpa, - .{ .file = null, .index = index }, - .{ .func = .{ .type_index = type_index }, .sym_index = atom.sym_index }, - ); - symbol.tag = .function; - symbol.index = index; - - if (wasm.code_section_index == null) { - wasm.code_section_index = @intCast(wasm.segments.items.len); - try wasm.segments.append(gpa, .{ - .alignment = atom.alignment, - .size = atom.size, - .offset = 0, - .flags = 0, - }); - } - - break :result wasm.code_section_index.?; - }, - .data => result: { - const segment_name = try std.mem.concat(gpa, u8, &.{ - kind.segmentName(), - wasm.string_table.get(symbol.name), - }); - errdefer gpa.free(segment_name); - const segment_info: types.Segment = .{ - .name = segment_name, - .alignment = atom.alignment, - .flags = 0, - }; - symbol.tag = .data; - - // when creating an object file, or importing memory and the data belongs in the .bss segment - // we set the entire region of it to zeroes. - // We do not have to do this when exporting the memory (the default) because the runtime - // will do it for us, and we do not emit the bss segment at all. - if ((wasm.base.comp.config.output_mode == .Obj or import_memory) and kind.data == .uninitialized) { - @memset(atom.code.items, 0); - } - - const should_merge = wasm.base.comp.config.output_mode != .Obj; - const gop = try wasm.data_segments.getOrPut(gpa, segment_info.outputName(should_merge)); - if (gop.found_existing) { - const index = gop.value_ptr.*; - wasm.segments.items[index].size += atom.size; - - symbol.index = @intCast(wasm.segment_info.getIndex(index).?); - // segment info already exists, so free its memory - gpa.free(segment_name); - break :result index; - } else { - const index: u32 = @intCast(wasm.segments.items.len); - var flags: u32 = 0; - if (shared_memory) { - flags |= @intFromEnum(Segment.Flag.WASM_DATA_SEGMENT_IS_PASSIVE); - } - try wasm.segments.append(gpa, .{ - .alignment = atom.alignment, - .size = 0, - .offset = 0, - .flags = flags, - }); - gop.value_ptr.* = index; - - const info_index: u32 = @intCast(wasm.segment_info.count()); - try wasm.segment_info.put(gpa, index, segment_info); - symbol.index = info_index; - break :result index; - } - }, - }; - - const segment: *Segment = &wasm.segments.items[final_index]; - segment.alignment = segment.alignment.max(atom.alignment); - - try wasm.appendAtomAtIndex(final_index, atom_index); -} - /// From a given index, append the given `Atom` at the back of the linked list. /// Simply inserts it into the map of atoms when it doesn't exist yet. pub fn appendAtomAtIndex(wasm: *Wasm, index: u32, atom_index: Atom.Index) !void { @@ -2328,40 +1597,9 @@ pub fn appendAtomAtIndex(wasm: *Wasm, index: u32, atom_index: Atom.Index) !void } } -/// Allocates debug atoms into their respective debug sections -/// to merge them with maybe-existing debug atoms from object files. -fn allocateDebugAtoms(wasm: *Wasm) !void { - if (wasm.dwarf == null) return; - - const allocAtom = struct { - fn f(bin: *Wasm, maybe_index: *?u32, atom_index: Atom.Index) !void { - const index = maybe_index.* orelse idx: { - const index = @as(u32, @intCast(bin.segments.items.len)); - try bin.appendDummySegment(); - maybe_index.* = index; - break :idx index; - }; - const atom = bin.getAtomPtr(atom_index); - atom.size = @as(u32, @intCast(atom.code.items.len)); - bin.symbols.items[atom.sym_index].index = index; - try bin.appendAtomAtIndex(index, atom_index); - } - }.f; - - try allocAtom(wasm, &wasm.debug_info_index, wasm.debug_info_atom.?); - try allocAtom(wasm, &wasm.debug_line_index, wasm.debug_line_atom.?); - try allocAtom(wasm, &wasm.debug_loc_index, wasm.debug_loc_atom.?); - try allocAtom(wasm, &wasm.debug_str_index, wasm.debug_str_atom.?); - try allocAtom(wasm, &wasm.debug_ranges_index, wasm.debug_ranges_atom.?); - try allocAtom(wasm, &wasm.debug_abbrev_index, wasm.debug_abbrev_atom.?); - try allocAtom(wasm, &wasm.debug_pubnames_index, wasm.debug_pubnames_atom.?); - try allocAtom(wasm, &wasm.debug_pubtypes_index, wasm.debug_pubtypes_atom.?); -} - fn allocateAtoms(wasm: *Wasm) !void { // first sort the data segments try sortDataSegments(wasm); - try allocateDebugAtoms(wasm); var it = wasm.atoms.iterator(); while (it.next()) |entry| { @@ -2382,7 +1620,7 @@ fn allocateAtoms(wasm: *Wasm) !void { const sym = if (symbol_loc.file) |object_index| sym: { const object = wasm.objects.items[object_index]; break :sym object.symtable[symbol_loc.index]; - } else wasm.symbols.items[symbol_loc.index]; + } else wasm.synthetic_symbols.items[symbol_loc.index]; // Dead symbols must be unlinked from the linked-list to prevent them // from being emit into the binary. @@ -2521,34 +1759,6 @@ fn setupInitFunctions(wasm: *Wasm) !void { } } -/// Generates an atom containing the global error set' size. -/// This will only be generated if the symbol exists. -fn setupErrorsLen(wasm: *Wasm) !void { - const gpa = wasm.base.comp.gpa; - const loc = wasm.findGlobalSymbol("__zig_errors_len") orelse return; - - const errors_len = wasm.base.comp.module.?.global_error_set.count(); - // overwrite existing atom if it already exists (maybe the error set has increased) - // if not, allcoate a new atom. - const atom_index = if (wasm.symbol_atom.get(loc)) |index| blk: { - const atom = wasm.getAtomPtr(index); - atom.deinit(gpa); - break :blk index; - } else new_atom: { - const atom_index: Atom.Index = @intCast(wasm.managed_atoms.items.len); - try wasm.symbol_atom.put(gpa, loc, atom_index); - try wasm.managed_atoms.append(gpa, undefined); - break :new_atom atom_index; - }; - const atom = wasm.getAtomPtr(atom_index); - atom.* = Atom.empty; - atom.sym_index = loc.index; - atom.size = 2; - try atom.code.writer(gpa).writeInt(u16, @intCast(errors_len), .little); - - try wasm.parseAtom(atom_index, .{ .data = .read_only }); -} - /// Creates a function body for the `__wasm_call_ctors` symbol. /// Loops over all constructors found in `init_funcs` and calls them /// respectively based on their priority which was sorted by `setupInitFunctions`. @@ -3278,139 +2488,6 @@ fn appendDummySegment(wasm: *Wasm) !void { }); } -/// Returns the symbol index of the error name table. -/// -/// When the symbol does not yet exist, it will create a new one instead. -pub fn getErrorTableSymbol(wasm: *Wasm) !u32 { - if (wasm.error_table_symbol) |symbol| { - return symbol; - } - - // no error was referenced yet, so create a new symbol and atom for it - // and then return said symbol's index. The final table will be populated - // during `flush` when we know all possible error names. - - const gpa = wasm.base.comp.gpa; - const atom_index = try wasm.createAtom(); - const atom = wasm.getAtomPtr(atom_index); - const slice_ty = Type.slice_const_u8_sentinel_0; - const mod = wasm.base.comp.module.?; - atom.alignment = slice_ty.abiAlignment(mod); - const sym_index = atom.sym_index; - - const sym_name = try wasm.string_table.put(gpa, "__zig_err_name_table"); - const symbol = &wasm.symbols.items[sym_index]; - symbol.* = .{ - .name = sym_name, - .tag = .data, - .flags = 0, - .index = 0, - .virtual_address = undefined, - }; - symbol.setFlag(.WASM_SYM_VISIBILITY_HIDDEN); - symbol.mark(); - - try wasm.resolved_symbols.put(gpa, atom.symbolLoc(), {}); - - log.debug("Error name table was created with symbol index: ({d})", .{sym_index}); - wasm.error_table_symbol = sym_index; - return sym_index; -} - -/// Populates the error name table, when `error_table_symbol` is not null. -/// -/// This creates a table that consists of pointers and length to each error name. -/// The table is what is being pointed to within the runtime bodies that are generated. -fn populateErrorNameTable(wasm: *Wasm) !void { - const gpa = wasm.base.comp.gpa; - const symbol_index = wasm.error_table_symbol orelse return; - const atom_index = wasm.symbol_atom.get(.{ .file = null, .index = symbol_index }).?; - - // Rather than creating a symbol for each individual error name, - // we create a symbol for the entire region of error names. We then calculate - // the pointers into the list using addends which are appended to the relocation. - const names_atom_index = try wasm.createAtom(); - const names_atom = wasm.getAtomPtr(names_atom_index); - names_atom.alignment = .@"1"; - const sym_name = try wasm.string_table.put(gpa, "__zig_err_names"); - const names_symbol = &wasm.symbols.items[names_atom.sym_index]; - names_symbol.* = .{ - .name = sym_name, - .tag = .data, - .flags = 0, - .index = 0, - .virtual_address = undefined, - }; - names_symbol.setFlag(.WASM_SYM_VISIBILITY_HIDDEN); - names_symbol.mark(); - - log.debug("Populating error names", .{}); - - // Addend for each relocation to the table - var addend: u32 = 0; - const mod = wasm.base.comp.module.?; - for (mod.global_error_set.keys()) |error_name_nts| { - const atom = wasm.getAtomPtr(atom_index); - - const error_name = mod.intern_pool.stringToSlice(error_name_nts); - const len = @as(u32, @intCast(error_name.len + 1)); // names are 0-termianted - - const slice_ty = Type.slice_const_u8_sentinel_0; - const offset = @as(u32, @intCast(atom.code.items.len)); - // first we create the data for the slice of the name - try atom.code.appendNTimes(gpa, 0, 4); // ptr to name, will be relocated - try atom.code.writer(gpa).writeInt(u32, len - 1, .little); - // create relocation to the error name - try atom.relocs.append(gpa, .{ - .index = names_atom.sym_index, - .relocation_type = .R_WASM_MEMORY_ADDR_I32, - .offset = offset, - .addend = @as(i32, @intCast(addend)), - }); - atom.size += @as(u32, @intCast(slice_ty.abiSize(mod))); - addend += len; - - // as we updated the error name table, we now store the actual name within the names atom - try names_atom.code.ensureUnusedCapacity(gpa, len); - names_atom.code.appendSliceAssumeCapacity(error_name); - names_atom.code.appendAssumeCapacity(0); - - log.debug("Populated error name: '{s}'", .{error_name}); - } - names_atom.size = addend; - - const name_loc = names_atom.symbolLoc(); - try wasm.resolved_symbols.put(gpa, name_loc, {}); - try wasm.symbol_atom.put(gpa, name_loc, names_atom_index); - - // link the atoms with the rest of the binary so they can be allocated - // and relocations will be performed. - try wasm.parseAtom(atom_index, .{ .data = .read_only }); - try wasm.parseAtom(names_atom_index, .{ .data = .read_only }); -} - -/// From a given index variable, creates a new debug section. -/// This initializes the index, appends a new segment, -/// and finally, creates a managed `Atom`. -pub fn createDebugSectionForIndex(wasm: *Wasm, index: *?u32, name: []const u8) !Atom.Index { - const gpa = wasm.base.comp.gpa; - const new_index: u32 = @intCast(wasm.segments.items.len); - index.* = new_index; - try wasm.appendDummySegment(); - - const atom_index = try wasm.createAtom(); - const atom = wasm.getAtomPtr(atom_index); - wasm.symbols.items[atom.sym_index] = .{ - .tag = .section, - .name = try wasm.string_table.put(gpa, name), - .index = 0, - .flags = @intFromEnum(Symbol.Flag.WASM_SYM_BINDING_LOCAL), - }; - - atom.alignment = .@"1"; // debug sections are always 1-byte-aligned - return atom_index; -} - fn resetState(wasm: *Wasm) void { const gpa = wasm.base.comp.gpa; @@ -3418,16 +2495,19 @@ fn resetState(wasm: *Wasm) void { gpa.free(segment_info.name); } - var atom_it = wasm.decls.valueIterator(); - while (atom_it.next()) |atom_index| { - const atom = wasm.getAtomPtr(atom_index.*); - atom.prev = null; + // TODO: Revisit + // var atom_it = wasm.decls.valueIterator(); + // while (atom_it.next()) |atom_index| { + // const atom = wasm.getAtomPtr(atom_index.*); + // atom.next = null; + // atom.prev = null; - for (atom.locals.items) |local_atom_index| { - const local_atom = wasm.getAtomPtr(local_atom_index); - local_atom.prev = null; - } - } + // for (atom.locals.items) |local_atom_index| { + // const local_atom = wasm.getAtomPtr(local_atom_index); + // local_atom.next = null; + // local_atom.prev = null; + // } + // } wasm.functions.clearRetainingCapacity(); wasm.exports.clearRetainingCapacity(); @@ -3684,7 +2764,7 @@ pub fn flushModule(wasm: *Wasm, arena: Allocator, prog_node: *std.Progress.Node) defer sub_prog_node.end(); // ensure the error names table is populated when an error name is referenced - try wasm.populateErrorNameTable(); + // try wasm.populateErrorNameTable(); const objects = comp.objects; @@ -3722,66 +2802,66 @@ pub fn flushModule(wasm: *Wasm, arena: Allocator, prog_node: *std.Progress.Node) try wasm.setupInitFunctions(); try wasm.setupStart(); try wasm.markReferences(); - try wasm.setupErrorsLen(); + // try wasm.setupErrorsLen(); try wasm.setupImports(); - if (comp.module) |mod| { - var decl_it = wasm.decls.iterator(); - while (decl_it.next()) |entry| { - const decl = mod.declPtr(entry.key_ptr.*); - if (decl.isExtern(mod)) continue; - const atom_index = entry.value_ptr.*; - const atom = wasm.getAtomPtr(atom_index); - if (decl.ty.zigTypeTag(mod) == .Fn) { - try wasm.parseAtom(atom_index, .function); - } else if (decl.getOwnedVariable(mod)) |variable| { - if (variable.is_const) { - try wasm.parseAtom(atom_index, .{ .data = .read_only }); - } else if (Value.fromInterned(variable.init).isUndefDeep(mod)) { - // for safe build modes, we store the atom in the data segment, - // whereas for unsafe build modes we store it in bss. - const decl_namespace = mod.namespacePtr(decl.src_namespace); - const optimize_mode = decl_namespace.file_scope.mod.optimize_mode; - const is_initialized = switch (optimize_mode) { - .Debug, .ReleaseSafe => true, - .ReleaseFast, .ReleaseSmall => false, - }; - try wasm.parseAtom(atom_index, .{ .data = if (is_initialized) .initialized else .uninitialized }); - } else { - // when the decl is all zeroes, we store the atom in the bss segment, - // in all other cases it will be in the data segment. - const is_zeroes = for (atom.code.items) |byte| { - if (byte != 0) break false; - } else true; - try wasm.parseAtom(atom_index, .{ .data = if (is_zeroes) .uninitialized else .initialized }); - } - } else { - try wasm.parseAtom(atom_index, .{ .data = .read_only }); - } + // if (comp.module) |mod| { + // var decl_it = wasm.decls.iterator(); + // while (decl_it.next()) |entry| { + // const decl = mod.declPtr(entry.key_ptr.*); + // if (decl.isExtern(mod)) continue; + // const atom_index = entry.value_ptr.*; + // const atom = wasm.getAtomPtr(atom_index); + // if (decl.ty.zigTypeTag(mod) == .Fn) { + // try wasm.parseAtom(atom_index, .function); + // } else if (decl.getOwnedVariable(mod)) |variable| { + // if (variable.is_const) { + // try wasm.parseAtom(atom_index, .{ .data = .read_only }); + // } else if (Value.fromInterned(variable.init).isUndefDeep(mod)) { + // // for safe build modes, we store the atom in the data segment, + // // whereas for unsafe build modes we store it in bss. + // const decl_namespace = mod.namespacePtr(decl.src_namespace); + // const optimize_mode = decl_namespace.file_scope.mod.optimize_mode; + // const is_initialized = switch (optimize_mode) { + // .Debug, .ReleaseSafe => true, + // .ReleaseFast, .ReleaseSmall => false, + // }; + // try wasm.parseAtom(atom_index, .{ .data = if (is_initialized) .initialized else .uninitialized }); + // } else { + // // when the decl is all zeroes, we store the atom in the bss segment, + // // in all other cases it will be in the data segment. + // const is_zeroes = for (atom.code.items) |byte| { + // if (byte != 0) break false; + // } else true; + // try wasm.parseAtom(atom_index, .{ .data = if (is_zeroes) .uninitialized else .initialized }); + // } + // } else { + // try wasm.parseAtom(atom_index, .{ .data = .read_only }); + // } - // also parse atoms for a decl's locals - for (atom.locals.items) |local_atom_index| { - try wasm.parseAtom(local_atom_index, .{ .data = .read_only }); - } - } - // parse anonymous declarations - for (wasm.anon_decls.keys(), wasm.anon_decls.values()) |decl_val, atom_index| { - const ty = Type.fromInterned(mod.intern_pool.typeOf(decl_val)); - if (ty.zigTypeTag(mod) == .Fn) { - try wasm.parseAtom(atom_index, .function); - } else { - try wasm.parseAtom(atom_index, .{ .data = .read_only }); - } - } + // // also parse atoms for a decl's locals + // for (atom.locals.items) |local_atom_index| { + // try wasm.parseAtom(local_atom_index, .{ .data = .read_only }); + // } + // } + // // parse anonymous declarations + // for (wasm.anon_decls.keys(), wasm.anon_decls.values()) |decl_val, atom_index| { + // const ty = Type.fromInterned(mod.intern_pool.typeOf(decl_val)); + // if (ty.zigTypeTag(mod) == .Fn) { + // try wasm.parseAtom(atom_index, .function); + // } else { + // try wasm.parseAtom(atom_index, .{ .data = .read_only }); + // } + // } - // also parse any backend-generated functions - for (wasm.synthetic_functions.items) |atom_index| { - try wasm.parseAtom(atom_index, .function); - } + // // also parse any backend-generated functions + // for (wasm.synthetic_functions.items) |atom_index| { + // try wasm.parseAtom(atom_index, .function); + // } - if (wasm.dwarf) |*dwarf| { - try dwarf.flushModule(comp.module.?); - } - } + // if (wasm.dwarf) |*dwarf| { + // try dwarf.flushModule(comp.module.?); + // } + // } try wasm.mergeSections(); try wasm.mergeTypes(); @@ -4194,16 +3274,6 @@ fn writeToFile( else => |mode| log.err("build-id '{s}' is not supported for WASM", .{@tagName(mode)}), } - // if (wasm.dwarf) |*dwarf| { - // const mod = comp.module.?; - // try dwarf.writeDbgAbbrev(); - // // for debug info and ranges, the address is always 0, - // // as locations are always offsets relative to 'code' section. - // try dwarf.writeDbgInfoHeader(mod, 0, code_section_size); - // try dwarf.writeDbgAranges(0, code_section_size); - // try dwarf.writeDbgLineHeader(); - // } - var debug_bytes = std.ArrayList(u8).init(gpa); defer debug_bytes.deinit(); @@ -5185,44 +4255,25 @@ fn hasPassiveInitializationSegments(wasm: *const Wasm) bool { return false; } -pub fn getTypeIndex(wasm: *const Wasm, func_type: std.wasm.Type) ?u32 { - var index: u32 = 0; - while (index < wasm.func_types.items.len) : (index += 1) { - if (wasm.func_types.items[index].eql(func_type)) return index; - } - return null; -} - /// Searches for a matching function signature. When no matching signature is found, /// a new entry will be made. The value returned is the index of the type within `wasm.func_types`. pub fn putOrGetFuncType(wasm: *Wasm, func_type: std.wasm.Type) !u32 { - if (wasm.getTypeIndex(func_type)) |index| { - return index; - } - const gpa = wasm.base.comp.gpa; - - // functype does not exist. - const index: u32 = @intCast(wasm.func_types.items.len); - const params = try gpa.dupe(std.wasm.Valtype, func_type.params); - errdefer gpa.free(params); - const returns = try gpa.dupe(std.wasm.Valtype, func_type.returns); - errdefer gpa.free(returns); - try wasm.func_types.append(gpa, .{ - .params = params, - .returns = returns, - }); - return index; + _ = wasm; + _ = func_type; } /// For the given `decl_index`, stores the corresponding type representing the function signature. /// Asserts declaration has an associated `Atom`. /// Returns the index into the list of types. pub fn storeDeclType(wasm: *Wasm, decl_index: InternPool.DeclIndex, func_type: std.wasm.Type) !u32 { - const gpa = wasm.base.comp.gpa; - const atom_index = wasm.decls.get(decl_index).?; - const index = try wasm.putOrGetFuncType(func_type); - try wasm.atom_types.put(gpa, atom_index, index); - return index; + _ = wasm; + _ = decl_index; + _ = func_type; + // const gpa = wasm.base.comp.gpa; + // const atom_index = wasm.decls.get(decl_index).?; + // const index = try wasm.putOrGetFuncType(func_type); + // try wasm.atom_types.put(gpa, atom_index, index); + // return index; } /// Verifies all resolved symbols and checks whether itself needs to be marked alive, diff --git a/src/link/Wasm/ZigObject.zig b/src/link/Wasm/ZigObject.zig index 275593f348..505d73a630 100644 --- a/src/link/Wasm/ZigObject.zig +++ b/src/link/Wasm/ZigObject.zig @@ -41,6 +41,36 @@ imported_globals_count: u32 = 0, /// of a new `ZigObject`. Codegen will make calls into this to create relocations for /// this symbol each time the stack pointer is moved. stack_pointer_sym: u32, +/// Debug information for the Zig module. +dwarf: ?Dwarf = null, +// Debug section atoms. These are only set when the current compilation +// unit contains Zig code. The lifetime of these atoms are extended +// until the end of the compiler's lifetime. Meaning they're not freed +// during `flush()` in incremental-mode. +debug_info_atom: ?Atom.Index = null, +debug_line_atom: ?Atom.Index = null, +debug_loc_atom: ?Atom.Index = null, +debug_ranges_atom: ?Atom.Index = null, +debug_abbrev_atom: ?Atom.Index = null, +debug_str_atom: ?Atom.Index = null, +debug_pubnames_atom: ?Atom.Index = null, +debug_pubtypes_atom: ?Atom.Index = null, +/// The index of the segment representing the custom '.debug_info' section. +debug_info_index: ?u32 = null, +/// The index of the segment representing the custom '.debug_line' section. +debug_line_index: ?u32 = null, +/// The index of the segment representing the custom '.debug_loc' section. +debug_loc_index: ?u32 = null, +/// The index of the segment representing the custom '.debug_ranges' section. +debug_ranges_index: ?u32 = null, +/// The index of the segment representing the custom '.debug_pubnames' section. +debug_pubnames_index: ?u32 = null, +/// The index of the segment representing the custom '.debug_pubtypes' section. +debug_pubtypes_index: ?u32 = null, +/// The index of the segment representing the custom '.debug_pubtypes' section. +debug_str_index: ?u32 = null, +/// The index of the segment representing the custom '.debug_pubtypes' section. +debug_abbrev_index: ?u32 = null, /// Frees and invalidates all memory of the incrementally compiled Zig module. /// It is illegal behavior to access the `ZigObject` after calling `deinit`. @@ -80,6 +110,9 @@ pub fn deinit(zig_object: *ZigObject, gpa: std.mem.Allocator) void { zig_object.segment_info.deinit(gpa); zig_object.string_table.deinit(gpa); + if (zig_object.dwarf) |*dwarf| { + dwarf.deinit(); + } zig_object.* = undefined; } @@ -976,6 +1009,87 @@ fn setupErrorsLen(zig_object: *ZigObject, wasm_file: *Wasm) !void { // try wasm.parseAtom(atom_index, .{ .data = .read_only }); } +/// Initializes symbols and atoms for the debug sections +/// Initialization is only done when compiling Zig code. +/// When Zig is invoked as a linker instead, the atoms +/// and symbols come from the object files instead. +pub fn initDebugSections(zig_object: *ZigObject) !void { + if (zig_object.dwarf == null) return; // not compiling Zig code, so no need to pre-initialize debug sections + std.debug.assert(zig_object.debug_info_index == null); + // this will create an Atom and set the index for us. + zig_object.debug_info_atom = try zig_object.createDebugSectionForIndex(&zig_object.debug_info_index, ".debug_info"); + zig_object.debug_line_atom = try zig_object.createDebugSectionForIndex(&zig_object.debug_line_index, ".debug_line"); + zig_object.debug_loc_atom = try zig_object.createDebugSectionForIndex(&zig_object.debug_loc_index, ".debug_loc"); + zig_object.debug_abbrev_atom = try zig_object.createDebugSectionForIndex(&zig_object.debug_abbrev_index, ".debug_abbrev"); + zig_object.debug_ranges_atom = try zig_object.createDebugSectionForIndex(&zig_object.debug_ranges_index, ".debug_ranges"); + zig_object.debug_str_atom = try zig_object.createDebugSectionForIndex(&zig_object.debug_str_index, ".debug_str"); + zig_object.debug_pubnames_atom = try zig_object.createDebugSectionForIndex(&zig_object.debug_pubnames_index, ".debug_pubnames"); + zig_object.debug_pubtypes_atom = try zig_object.createDebugSectionForIndex(&zig_object.debug_pubtypes_index, ".debug_pubtypes"); +} + +/// From a given index variable, creates a new debug section. +/// This initializes the index, appends a new segment, +/// and finally, creates a managed `Atom`. +pub fn createDebugSectionForIndex(zig_object: *ZigObject, wasm_file: *Wasm, index: *?u32, name: []const u8) !Atom.Index { + const gpa = wasm_file.base.comp.gpa; + const new_index: u32 = @intCast(zig_object.segments.items.len); + index.* = new_index; + try zig_object.appendDummySegment(); + + const sym_index = try zig_object.allocateSymbol(gpa); + const atom_index = try wasm_file.createAtom(sym_index); + const atom = wasm_file.getAtomPtr(atom_index); + zig_object.symbols.items[sym_index] = .{ + .tag = .section, + .name = try zig_object.string_table.put(gpa, name), + .index = 0, + .flags = @intFromEnum(Symbol.Flag.WASM_SYM_BINDING_LOCAL), + }; + + atom.alignment = .@"1"; // debug sections are always 1-byte-aligned + return atom_index; +} + +pub fn updateDeclLineNumber(zig_object: *ZigObject, mod: *Module, decl_index: InternPool.DeclIndex) !void { + if (zig_object.dwarf) |*dw| { + const decl = mod.declPtr(decl_index); + const decl_name = mod.intern_pool.stringToSlice(try decl.getFullyQualifiedName(mod)); + + log.debug("updateDeclLineNumber {s}{*}", .{ decl_name, decl }); + try dw.updateDeclLineNumber(mod, decl_index); + } +} + +/// Allocates debug atoms into their respective debug sections +/// to merge them with maybe-existing debug atoms from object files. +fn allocateDebugAtoms(zig_object: *ZigObject) !void { + if (zig_object.dwarf == null) return; + + const allocAtom = struct { + fn f(ctx: *ZigObject, maybe_index: *?u32, atom_index: Atom.Index) !void { + const index = maybe_index.* orelse idx: { + const index = @as(u32, @intCast(ctx.segments.items.len)); + try ctx.appendDummySegment(); + maybe_index.* = index; + break :idx index; + }; + const atom = ctx.getAtomPtr(atom_index); + atom.size = @as(u32, @intCast(atom.code.items.len)); + ctx.symbols.items[atom.sym_index].index = index; + try ctx.appendAtomAtIndex(index, atom_index); + } + }.f; + + try allocAtom(zig_object, &zig_object.debug_info_index, zig_object.debug_info_atom.?); + try allocAtom(zig_object, &zig_object.debug_line_index, zig_object.debug_line_atom.?); + try allocAtom(zig_object, &zig_object.debug_loc_index, zig_object.debug_loc_atom.?); + try allocAtom(zig_object, &zig_object.debug_str_index, zig_object.debug_str_atom.?); + try allocAtom(zig_object, &zig_object.debug_ranges_index, zig_object.debug_ranges_atom.?); + try allocAtom(zig_object, &zig_object.debug_abbrev_index, zig_object.debug_abbrev_atom.?); + try allocAtom(zig_object, &zig_object.debug_pubnames_index, zig_object.debug_pubnames_atom.?); + try allocAtom(zig_object, &zig_object.debug_pubtypes_index, zig_object.debug_pubtypes_atom.?); +} + const build_options = @import("build_options"); const builtin = @import("builtin"); const codegen = @import("../../codegen.zig"); @@ -986,6 +1100,7 @@ const types = @import("types.zig"); const Air = @import("../../Air.zig"); const Atom = @import("Atom.zig"); +const Dwarf = @import("../Dwarf.zig"); const InternPool = @import("../../InternPool.zig"); const Liveness = @import("../../Liveness.zig"); const Module = @import("../../Module.zig"); From 9b3c8fd3a8aef81f3a6face78f9e0b34508edc1b Mon Sep 17 00:00:00 2001 From: Luuk de Gram Date: Sun, 14 Jan 2024 17:24:18 +0100 Subject: [PATCH 03/21] wasm: initialize a `ZigObject` when required When we have a ZigCompileUnit and don't use LLVM, we initialize the ZigObject which will encapsulate the Zig Module as an object file in- memory. During initialization we also create symbols which the object will need such as the stack pointer. --- src/link/Wasm.zig | 75 +++++++++++++++++++++++++------------ src/link/Wasm/ZigObject.zig | 43 ++++++++++++++++----- 2 files changed, 85 insertions(+), 33 deletions(-) diff --git a/src/link/Wasm.zig b/src/link/Wasm.zig index cdc019c4fc..d9e1432b63 100644 --- a/src/link/Wasm.zig +++ b/src/link/Wasm.zig @@ -1,37 +1,41 @@ const Wasm = @This(); const std = @import("std"); -const builtin = @import("builtin"); -const mem = std.mem; -const Allocator = std.mem.Allocator; + const assert = std.debug.assert; +const build_options = @import("build_options"); +const builtin = @import("builtin"); +const codegen = @import("../codegen.zig"); const fs = std.fs; const leb = std.leb; -const log = std.log.scoped(.link); - -pub const Atom = @import("Wasm/Atom.zig"); -const Dwarf = @import("Dwarf.zig"); -const Module = @import("../Module.zig"); -const InternPool = @import("../InternPool.zig"); -const Compilation = @import("../Compilation.zig"); -const CodeGen = @import("../arch/wasm/CodeGen.zig"); -const codegen = @import("../codegen.zig"); const link = @import("../link.zig"); const lldMain = @import("../main.zig").lldMain; +const log = std.log.scoped(.link); +const mem = std.mem; const trace = @import("../tracy.zig").trace; -const build_options = @import("build_options"); -const wasi_libc = @import("../wasi_libc.zig"); -const Cache = std.Build.Cache; -const Type = @import("../type.zig").Type; -const Value = @import("../Value.zig"); -const TypedValue = @import("../TypedValue.zig"); -const LlvmObject = @import("../codegen/llvm.zig").Object; -const Air = @import("../Air.zig"); -const Liveness = @import("../Liveness.zig"); -const Symbol = @import("Wasm/Symbol.zig"); -const Object = @import("Wasm/Object.zig"); -const Archive = @import("Wasm/Archive.zig"); const types = @import("Wasm/types.zig"); +const wasi_libc = @import("../wasi_libc.zig"); + +const Air = @import("../Air.zig"); +const Allocator = std.mem.Allocator; +const Archive = @import("Wasm/Archive.zig"); +const Cache = std.Build.Cache; +const CodeGen = @import("../arch/wasm/CodeGen.zig"); +const Compilation = @import("../Compilation.zig"); +const Dwarf = @import("Dwarf.zig"); +const File = @import("Wasm/file.zig").File; +const InternPool = @import("../InternPool.zig"); +const Liveness = @import("../Liveness.zig"); +const LlvmObject = @import("../codegen/llvm.zig").Object; +const Module = @import("../Module.zig"); +const Object = @import("Wasm/Object.zig"); +const Symbol = @import("Wasm/Symbol.zig"); +const Type = @import("../type.zig").Type; +const TypedValue = @import("../TypedValue.zig"); +const Value = @import("../value.zig").Value; +const ZigObject = @import("Wasm/ZigObject.zig"); + +pub const Atom = @import("Wasm/Atom.zig"); pub const Relocation = types.Relocation; pub const base_tag: link.File.Tag = .wasm; @@ -57,6 +61,11 @@ export_table: bool, name: []const u8, /// If this is not null, an object file is created by LLVM and linked with LLD afterwards. llvm_object: ?*LlvmObject = null, +/// The file index of a `ZigObject`. This will only contain a valid index when a zcu exists, +/// and the chosen backend is the Wasm backend. +zig_object_index: File.Index = .null, +/// List of relocatable files to be linked into the final binary. +files: std.MultiArrayList(File.Entry) = .{}, /// When importing objects from the host environment, a name must be supplied. /// LLVM uses "env" by default when none is given. This would be a good default for Zig /// to support existing code. @@ -556,9 +565,27 @@ pub fn createEmpty( } } + if (comp.module) |zcu| { + if (!use_llvm) { + const index: File.Index = @enumFromInt(wasm.files.len); + var zig_object: ZigObject = .{ + .path = try std.fmt.allocPrint(gpa, "{s}.o", .{std.fs.path.stem(zcu.main_mod.root_src_path)}), + .stack_pointer_sym = undefined, + }; + try zig_object.init(wasm); + try wasm.files.append(gpa, .{ .zig_object = zig_object }); + wasm.zig_object_index = index; + } + } + return wasm; } +fn zigObjectPtr(wasm: *Wasm) ?*ZigObject { + if (wasm.zig_object_index == .null) return null; + return &wasm.files.items(.data)[@intFromEnum(wasm.zig_object_index)].zig_object; +} + /// For a given name, creates a new global synthetic symbol. /// Leaves index undefined and the default flags (0). fn createSyntheticSymbol(wasm: *Wasm, name: []const u8, tag: Symbol.Tag) !SymbolLoc { diff --git a/src/link/Wasm/ZigObject.zig b/src/link/Wasm/ZigObject.zig index 505d73a630..4f32f6891e 100644 --- a/src/link/Wasm/ZigObject.zig +++ b/src/link/Wasm/ZigObject.zig @@ -3,6 +3,7 @@ //! and any relocations that may have been emitted. //! Think about this as fake in-memory Object file for the Zig module. +path: []const u8, /// List of all `Decl` that are currently alive. /// Each index maps to the corresponding `Atom.Index`. decls: std.AutoHashMapUnmanaged(InternPool.DeclIndex, Atom.Index) = .{}, @@ -22,7 +23,7 @@ global_syms: std.AutoHashMapUnmanaged(u32, u32) = .{}, /// List of symbol indexes which are free to be used. symbols_free_list: std.ArrayListUnmanaged(u32) = .{}, /// Extra metadata about the linking section, such as alignment of segments and their name. -segment_info: std.ArrayListUnmanage(types.Segment) = &.{}, +segment_info: std.ArrayListUnmanaged(types.Segment) = .{}, /// File encapsulated string table, used to deduplicate strings within the generated file. string_table: StringTable = .{}, /// Map for storing anonymous declarations. Each anonymous decl maps to its Atom's index. @@ -72,6 +73,30 @@ debug_str_index: ?u32 = null, /// The index of the segment representing the custom '.debug_pubtypes' section. debug_abbrev_index: ?u32 = null, +/// Initializes the `ZigObject` with initial symbols. +pub fn init(zig_object: *ZigObject, wasm_file: *Wasm) !void { + // Initialize an undefined global with the name __stack_pointer. Codegen will use + // this to generate relocations when moving the stack pointer. This symbol will be + // resolved automatically by the final linking stage. + try zig_object.createStackPointer(wasm_file); + + // TODO: Initialize debug information when we reimplement Dwarf support. +} + +fn createStackPointer(zig_object: *ZigObject, wasm_file: *Wasm) !void { + const gpa = wasm_file.base.comp.gpa; + const sym_index = try zig_object.getGlobalSymbol(gpa, "__stack_pointer", .global); + zig_object.symbols.items[sym_index].index = zig_object.imported_globals_count; + const is_wasm32 = wasm_file.base.comp.root_mod.resolved_target.result.cpu.arch == .wasm32; + try zig_object.imports.putNoClobber(gpa, sym_index, .{ + .name = zig_object.symbols.items[sym_index].name, + .module_name = try zig_object.string_table.insert(gpa, wasm_file.host_name), + .kind = .{ .global = .{ .valtype = if (is_wasm32) .i32 else .i64, .mutable = true } }, + }); + zig_object.imported_globals_count += 1; + zig_object.stack_pointer_sym = sym_index; +} + /// Frees and invalidates all memory of the incrementally compiled Zig module. /// It is illegal behavior to access the `ZigObject` after calling `deinit`. pub fn deinit(zig_object: *ZigObject, gpa: std.mem.Allocator) void { @@ -113,6 +138,7 @@ pub fn deinit(zig_object: *ZigObject, gpa: std.mem.Allocator) void { if (zig_object.dwarf) |*dwarf| { dwarf.deinit(); } + gpa.free(zig_object.path); zig_object.* = undefined; } @@ -531,32 +557,31 @@ pub fn addOrUpdateImport( /// such as an exported or imported symbol. /// If the symbol does not yet exist, creates a new one symbol instead /// and then returns the index to it. -pub fn getGlobalSymbol(zig_object: *ZigObject, wasm_file: *Wasm, name: []const u8) !u32 { - const gpa = wasm_file.base.comp.gpa; +pub fn getGlobalSymbol(zig_object: *ZigObject, gpa: std.mem.Allocator, name: []const u8, tag: Symbol.Tag) !u32 { const name_index = try zig_object.string_table.insert(gpa, name); const gop = try zig_object.global_syms.getOrPut(gpa, name_index); if (gop.found_existing) { - return gop.value_ptr.index; + return gop.value_ptr.*; } var symbol: Symbol = .{ .name = name_index, .flags = 0, - .index = undefined, // index to type will be set after merging function symbols - .tag = .function, - .virtual_address = undefined, + .index = undefined, // index to type will be set after merging symbols + .tag = tag, + .virtual_address = std.math.maxInt(u32), }; symbol.setGlobal(true); symbol.setUndefined(true); - const sym_index = if (zig_object.symbol.popOrNull()) |index| index else blk: { + const sym_index = if (zig_object.symbols_free_list.popOrNull()) |index| index else blk: { const index: u32 = @intCast(zig_object.symbols.items.len); try zig_object.symbols.ensureUnusedCapacity(gpa, 1); zig_object.symbols.items.len += 1; break :blk index; }; zig_object.symbols.items[sym_index] = symbol; - gop.value_ptr.* = .{ .index = sym_index, .file = null }; + gop.value_ptr.* = sym_index; return sym_index; } From f6896ef2180709fedeb5bafde3fe58ca6d06aa3a Mon Sep 17 00:00:00 2001 From: Luuk de Gram Date: Mon, 15 Jan 2024 16:05:39 +0100 Subject: [PATCH 04/21] wasm: create linking objects in correct module CodeGen will create linking objects such as symbols, function types, etc in ZigObject, rather than in the linker driver where the final result will be stored. They will end up in the linker driver module during the `flush` phase instead. This must mean we must call functions such as `addOrGetFuncType` in the correct namespace or else it will be created in the incorrect list and therefore return incorrect indexes. --- src/arch/wasm/CodeGen.zig | 8 +- src/arch/wasm/Emit.zig | 8 +- src/link/Wasm.zig | 165 +++++++++++++++++------------------- src/link/Wasm/ZigObject.zig | 149 +++++++++++++++++++------------- 4 files changed, 176 insertions(+), 154 deletions(-) diff --git a/src/arch/wasm/CodeGen.zig b/src/arch/wasm/CodeGen.zig index a13c61f367..314518caef 100644 --- a/src/arch/wasm/CodeGen.zig +++ b/src/arch/wasm/CodeGen.zig @@ -2239,7 +2239,7 @@ fn airCall(func: *CodeGen, inst: Air.Inst.Index, modifier: std.builtin.CallModif } if (callee) |direct| { - const atom_index = func.bin_file.decls.get(direct).?; + const atom_index = func.bin_file.zigObjectPtr().?.decls.get(direct).?; try func.addLabel(.call, func.bin_file.getAtom(atom_index).sym_index); } else { // in this case we call a function pointer @@ -2251,7 +2251,7 @@ fn airCall(func: *CodeGen, inst: Air.Inst.Index, modifier: std.builtin.CallModif var fn_type = try genFunctype(func.gpa, fn_info.cc, fn_info.param_types.get(ip), Type.fromInterned(fn_info.return_type), mod); defer fn_type.deinit(func.gpa); - const fn_type_index = try func.bin_file.putOrGetFuncType(fn_type); + const fn_type_index = try func.bin_file.zigObjectPtr().?.putOrGetFuncType(func.gpa, fn_type); try func.addLabel(.call_indirect, fn_type_index); } @@ -3157,7 +3157,7 @@ fn lowerAnonDeclRef( return error.CodegenFail; }, } - const target_atom_index = func.bin_file.anon_decls.get(decl_val).?; + const target_atom_index = func.bin_file.zigObjectPtr().?.anon_decls.get(decl_val).?; const target_sym_index = func.bin_file.getAtom(target_atom_index).getSymbolIndex().?; if (is_fn_body) { return WValue{ .function_index = target_sym_index }; @@ -7161,7 +7161,7 @@ fn callIntrinsic( const mod = func.bin_file.base.comp.module.?; var func_type = try genFunctype(func.gpa, .C, param_types, return_type, mod); defer func_type.deinit(func.gpa); - const func_type_index = try func.bin_file.putOrGetFuncType(func_type); + const func_type_index = try func.bin_file.zigObjectPtr().?.putOrGetFuncType(func.gpa, func_type); try func.bin_file.addOrUpdateImport(name, symbol_index, null, func_type_index); const want_sret_param = firstParamSRet(.C, return_type, mod); diff --git a/src/arch/wasm/Emit.zig b/src/arch/wasm/Emit.zig index 7e67a98285..3d495dcff6 100644 --- a/src/arch/wasm/Emit.zig +++ b/src/arch/wasm/Emit.zig @@ -310,7 +310,7 @@ fn emitGlobal(emit: *Emit, tag: Mir.Inst.Tag, inst: Mir.Inst.Index) !void { const global_offset = emit.offset(); try emit.code.appendSlice(&buf); - const atom_index = emit.bin_file.decls.get(emit.decl_index).?; + const atom_index = emit.bin_file.zigObjectPtr().?.decls.get(emit.decl_index).?; const atom = emit.bin_file.getAtomPtr(atom_index); try atom.relocs.append(gpa, .{ .index = label, @@ -370,7 +370,7 @@ fn emitCall(emit: *Emit, inst: Mir.Inst.Index) !void { try emit.code.appendSlice(&buf); if (label != 0) { - const atom_index = emit.bin_file.decls.get(emit.decl_index).?; + const atom_index = emit.bin_file.zigObjectPtr().?.decls.get(emit.decl_index).?; const atom = emit.bin_file.getAtomPtr(atom_index); try atom.relocs.append(gpa, .{ .offset = call_offset, @@ -400,7 +400,7 @@ fn emitFunctionIndex(emit: *Emit, inst: Mir.Inst.Index) !void { try emit.code.appendSlice(&buf); if (symbol_index != 0) { - const atom_index = emit.bin_file.decls.get(emit.decl_index).?; + const atom_index = emit.bin_file.zigObjectPtr().?.decls.get(emit.decl_index).?; const atom = emit.bin_file.getAtomPtr(atom_index); try atom.relocs.append(gpa, .{ .offset = index_offset, @@ -431,7 +431,7 @@ fn emitMemAddress(emit: *Emit, inst: Mir.Inst.Index) !void { } if (mem.pointer != 0) { - const atom_index = emit.bin_file.decls.get(emit.decl_index).?; + const atom_index = emit.bin_file.zigObjectPtr().?.decls.get(emit.decl_index).?; const atom = emit.bin_file.getAtomPtr(atom_index); try atom.relocs.append(gpa, .{ .offset = mem_offset, diff --git a/src/link/Wasm.zig b/src/link/Wasm.zig index d9e1432b63..fb808d79fc 100644 --- a/src/link/Wasm.zig +++ b/src/link/Wasm.zig @@ -581,11 +581,38 @@ pub fn createEmpty( return wasm; } -fn zigObjectPtr(wasm: *Wasm) ?*ZigObject { +pub fn zigObjectPtr(wasm: *Wasm) ?*ZigObject { if (wasm.zig_object_index == .null) return null; return &wasm.files.items(.data)[@intFromEnum(wasm.zig_object_index)].zig_object; } +pub fn getTypeIndex(wasm: *const Wasm, func_type: std.wasm.Type) ?u32 { + var index: u32 = 0; + while (index < wasm.func_types.items.len) : (index += 1) { + if (wasm.func_types.items[index].eql(func_type)) return index; + } + return null; +} + +/// Either creates a new import, or updates one if existing. +/// When `type_index` is non-null, we assume an external function. +/// In all other cases, a data-symbol will be created instead. +pub fn addOrUpdateImport( + wasm: *Wasm, + /// Name of the import + name: []const u8, + /// Symbol index that is external + symbol_index: u32, + /// Optional library name (i.e. `extern "c" fn foo() void` + lib_name: ?[:0]const u8, + /// The index of the type that represents the function signature + /// when the extern is a function. When this is null, a data-symbol + /// is asserted instead. + type_index: ?u32, +) !void { + return wasm.zigObjectPtr().?.addOrUpdateImport(wasm, name, symbol_index, lib_name, type_index); +} + /// For a given name, creates a new global synthetic symbol. /// Leaves index undefined and the default flags (0). fn createSyntheticSymbol(wasm: *Wasm, name: []const u8, tag: Symbol.Tag) !SymbolLoc { @@ -1389,64 +1416,7 @@ pub fn updateFunc(wasm: *Wasm, mod: *Module, func_index: InternPool.Index, air: @panic("Attempted to compile for object format that was disabled by build configuration"); } if (wasm.llvm_object) |llvm_object| return llvm_object.updateFunc(mod, func_index, air, liveness); - - const tracy = trace(@src()); - defer tracy.end(); - - const gpa = wasm.base.comp.gpa; - const func = mod.funcInfo(func_index); - const decl_index = func.owner_decl; - const decl = mod.declPtr(decl_index); - const atom_index = try wasm.getOrCreateAtomForDecl(decl_index); - const atom = wasm.getAtomPtr(atom_index); - atom.clear(); - - // var decl_state: ?Dwarf.DeclState = if (wasm.dwarf) |*dwarf| try dwarf.initDeclState(mod, decl_index) else null; - // defer if (decl_state) |*ds| ds.deinit(); - - var code_writer = std.ArrayList(u8).init(gpa); - defer code_writer.deinit(); - // const result = try codegen.generateFunction( - // &wasm.base, - // decl.srcLoc(mod), - // func, - // air, - // liveness, - // &code_writer, - // if (decl_state) |*ds| .{ .dwarf = ds } else .none, - // ); - const result = try codegen.generateFunction( - &wasm.base, - decl.srcLoc(mod), - func_index, - air, - liveness, - &code_writer, - .none, - ); - - const code = switch (result) { - .ok => code_writer.items, - .fail => |em| { - func.analysis(&mod.intern_pool).state = .codegen_failure; - try mod.failed_decls.put(mod.gpa, decl_index, em); - return; - }, - }; - - // if (wasm.dwarf) |*dwarf| { - // try dwarf.commitDeclState( - // mod, - // decl_index, - // // Actual value will be written after relocation. - // // For Wasm, this is the offset relative to the code section - // // which isn't known until flush(). - // 0, - // code.len, - // &decl_state.?, - // ); - // } - return wasm.finishUpdateDecl(decl_index, code, .function); + try wasm.zigObjectPtr().?.updateFunc(wasm, mod, func_index, air, liveness); } // Generate code for the Decl, storing it in memory to be later written to @@ -1456,12 +1426,12 @@ pub fn updateDecl(wasm: *Wasm, mod: *Module, decl_index: InternPool.DeclIndex) ! @panic("Attempted to compile for object format that was disabled by build configuration"); } if (wasm.llvm_object) |llvm_object| return llvm_object.updateDecl(mod, decl_index); + try wasm.zigObjectPtr().?.updateDecl(wasm, mod, decl_index); } pub fn updateDeclLineNumber(wasm: *Wasm, mod: *Module, decl_index: InternPool.DeclIndex) !void { if (wasm.llvm_object) |_| return; - _ = mod; - _ = decl_index; + try wasm.zigObjectPtr().?.updateDeclLineNumber(mod, decl_index); } /// From a given symbol location, returns its `wasm.GlobalType`. @@ -1511,9 +1481,7 @@ fn getFunctionSignature(wasm: *const Wasm, loc: SymbolLoc) std.wasm.Type { /// Returns the symbol index of the local /// The given `decl` is the parent decl whom owns the constant. pub fn lowerUnnamedConst(wasm: *Wasm, tv: TypedValue, decl_index: InternPool.DeclIndex) !u32 { - _ = wasm; - _ = tv; - _ = decl_index; + return wasm.zigObjectPtr().?.lowerUnnamedConst(wasm, tv, decl_index); } /// Returns the symbol index from a symbol of which its flag is set global, @@ -1522,8 +1490,7 @@ pub fn lowerUnnamedConst(wasm: *Wasm, tv: TypedValue, decl_index: InternPool.Dec /// and then returns the index to it. pub fn getGlobalSymbol(wasm: *Wasm, name: []const u8, lib_name: ?[]const u8) !u32 { _ = lib_name; - _ = name; - _ = wasm; + return wasm.zigObjectPtr().?.getGlobalSymbol(wasm.base.comp.gpa, name); } /// For a given decl, find the given symbol index's atom, and create a relocation for the type. @@ -1533,9 +1500,7 @@ pub fn getDeclVAddr( decl_index: InternPool.DeclIndex, reloc_info: link.File.RelocInfo, ) !u64 { - _ = wasm; - _ = decl_index; - _ = reloc_info; + return wasm.zigObjectPtr().?.getDeclVAddr(wasm, decl_index, reloc_info); } pub fn lowerAnonDecl( @@ -1544,16 +1509,11 @@ pub fn lowerAnonDecl( explicit_alignment: Alignment, src_loc: Module.SrcLoc, ) !codegen.Result { - _ = wasm; - _ = decl_val; - _ = explicit_alignment; - _ = src_loc; + return wasm.zigObjectPtr().?.lowerAnonDecl(wasm, decl_val, explicit_alignment, src_loc); } pub fn getAnonDeclVAddr(wasm: *Wasm, decl_val: InternPool.Index, reloc_info: link.File.RelocInfo) !u64 { - _ = wasm; - _ = decl_val; - _ = reloc_info; + return wasm.zigObjectPtr().?.getAnonDeclVAddr(wasm, decl_val, reloc_info); } pub fn deleteDeclExport( @@ -1561,9 +1521,9 @@ pub fn deleteDeclExport( decl_index: InternPool.DeclIndex, name: InternPool.NullTerminatedString, ) void { - if (wasm.llvm_object) |_| return; _ = name; - _ = decl_index; + if (wasm.llvm_object) |_| return; + return wasm.zigObjectPtr().?.deleteDeclExport(wasm, decl_index); } pub fn updateExports( @@ -1576,10 +1536,12 @@ pub fn updateExports( @panic("Attempted to compile for object format that was disabled by build configuration"); } if (wasm.llvm_object) |llvm_object| return llvm_object.updateExports(mod, exported, exports); + return wasm.zigObjectPtr().?.updateExports(wasm, mod, exported, exports); } pub fn freeDecl(wasm: *Wasm, decl_index: InternPool.DeclIndex) void { if (wasm.llvm_object) |llvm_object| return llvm_object.freeDecl(decl_index); + return wasm.zigObjectPtr().?.freeDecl(wasm, decl_index); } /// Assigns indexes to all indirect functions. @@ -1917,7 +1879,11 @@ pub fn createFunction( }; try wasm.appendAtomAtIndex(section_index, atom_index); try wasm.symbol_atom.putNoClobber(gpa, loc, atom_index); - try wasm.atom_types.put(gpa, atom_index, try wasm.putOrGetFuncType(func_ty)); + try wasm.zigObjectPtr().?.atom_types.put( + gpa, + atom_index, + try wasm.zigObjectPtr().?.putOrGetFuncType(gpa, func_ty), + ); try wasm.synthetic_functions.append(gpa, atom_index); return loc.index; @@ -4285,22 +4251,43 @@ fn hasPassiveInitializationSegments(wasm: *const Wasm) bool { /// Searches for a matching function signature. When no matching signature is found, /// a new entry will be made. The value returned is the index of the type within `wasm.func_types`. pub fn putOrGetFuncType(wasm: *Wasm, func_type: std.wasm.Type) !u32 { - _ = wasm; - _ = func_type; + if (wasm.getTypeIndex(func_type)) |index| { + return index; + } + + // functype does not exist. + const gpa = wasm.base.comp.gpa; + const index: u32 = @intCast(wasm.func_types.items.len); + const params = try gpa.dupe(std.wasm.Valtype, func_type.params); + errdefer gpa.free(params); + const returns = try gpa.dupe(std.wasm.Valtype, func_type.returns); + errdefer gpa.free(returns); + try wasm.func_types.append(gpa, .{ + .params = params, + .returns = returns, + }); + return index; } /// For the given `decl_index`, stores the corresponding type representing the function signature. /// Asserts declaration has an associated `Atom`. /// Returns the index into the list of types. pub fn storeDeclType(wasm: *Wasm, decl_index: InternPool.DeclIndex, func_type: std.wasm.Type) !u32 { - _ = wasm; - _ = decl_index; - _ = func_type; - // const gpa = wasm.base.comp.gpa; - // const atom_index = wasm.decls.get(decl_index).?; - // const index = try wasm.putOrGetFuncType(func_type); - // try wasm.atom_types.put(gpa, atom_index, index); - // return index; + return wasm.zigObjectPtr().?.storeDeclType(wasm.base.comp.gpa, decl_index, func_type); +} + +/// Returns the symbol index of the error name table. +/// +/// When the symbol does not yet exist, it will create a new one instead. +pub fn getErrorTableSymbol(wasm_file: *Wasm) !u32 { + return wasm_file.zigObjectPtr().?.getErrorTableSymbol(wasm_file); +} + +/// For a given `InternPool.DeclIndex` returns its corresponding `Atom.Index`. +/// When the index was not found, a new `Atom` will be created, and its index will be returned. +/// The newly created Atom is empty with default fields as specified by `Atom.empty`. +pub fn getOrCreateAtomForDecl(wasm_file: *Wasm, decl_index: InternPool.DeclIndex) !Atom.Index { + return wasm_file.zigObjectPtr().?.getOrCreateAtomForDecl(wasm_file, decl_index); } /// Verifies all resolved symbols and checks whether itself needs to be marked alive, diff --git a/src/link/Wasm/ZigObject.zig b/src/link/Wasm/ZigObject.zig index 4f32f6891e..8b4d703a34 100644 --- a/src/link/Wasm/ZigObject.zig +++ b/src/link/Wasm/ZigObject.zig @@ -85,11 +85,13 @@ pub fn init(zig_object: *ZigObject, wasm_file: *Wasm) !void { fn createStackPointer(zig_object: *ZigObject, wasm_file: *Wasm) !void { const gpa = wasm_file.base.comp.gpa; - const sym_index = try zig_object.getGlobalSymbol(gpa, "__stack_pointer", .global); - zig_object.symbols.items[sym_index].index = zig_object.imported_globals_count; + const sym_index = try zig_object.getGlobalSymbol(gpa, "__stack_pointer"); + const sym = zig_object.symbol(sym_index); + sym.index = zig_object.imported_globals_count; + sym.tag = .global; const is_wasm32 = wasm_file.base.comp.root_mod.resolved_target.result.cpu.arch == .wasm32; try zig_object.imports.putNoClobber(gpa, sym_index, .{ - .name = zig_object.symbols.items[sym_index].name, + .name = sym.name, .module_name = try zig_object.string_table.insert(gpa, wasm_file.host_name), .kind = .{ .global = .{ .valtype = if (is_wasm32) .i32 else .i64, .mutable = true } }, }); @@ -97,6 +99,10 @@ fn createStackPointer(zig_object: *ZigObject, wasm_file: *Wasm) !void { zig_object.stack_pointer_sym = sym_index; } +fn symbol(zig_object: *const ZigObject, index: u32) *Symbol { + return &zig_object.symbols.items[index]; +} + /// Frees and invalidates all memory of the incrementally compiled Zig module. /// It is illegal behavior to access the `ZigObject` after calling `deinit`. pub fn deinit(zig_object: *ZigObject, gpa: std.mem.Allocator) void { @@ -146,7 +152,7 @@ pub fn deinit(zig_object: *ZigObject, gpa: std.mem.Allocator) void { /// Will re-use slots when a symbol was freed at an earlier stage. pub fn allocateSymbol(zig_object: *ZigObject, gpa: std.mem.Allocator) !u32 { try zig_object.symbols.ensureUnusedCapacity(gpa, 1); - const symbol: Symbol = .{ + const sym: Symbol = .{ .name = std.math.maxInt(u32), // will be set after updateDecl as well as during atom creation for decls .flags = @intFromEnum(Symbol.Flag.WASM_SYM_BINDING_LOCAL), .tag = .undefined, // will be set after updateDecl @@ -154,17 +160,22 @@ pub fn allocateSymbol(zig_object: *ZigObject, gpa: std.mem.Allocator) !u32 { .virtual_address = std.math.maxInt(u32), // will be set during atom allocation }; if (zig_object.symbols_free_list.popOrNull()) |index| { - zig_object.symbols.items[index] = symbol; + zig_object.symbols.items[index] = sym; return index; } const index = @as(u32, @intCast(zig_object.symbols.items.len)); - zig_object.symbols.appendAssumeCapacity(symbol); + zig_object.symbols.appendAssumeCapacity(sym); return index; } // Generate code for the Decl, storing it in memory to be later written to // the file on flush(). -pub fn updateDecl(zig_object: *ZigObject, wasm_file: *Wasm, mod: *Module, decl_index: InternPool.DeclIndex) !void { +pub fn updateDecl( + zig_object: *ZigObject, + wasm_file: *Wasm, + mod: *Module, + decl_index: InternPool.DeclIndex, +) !void { const decl = mod.declPtr(decl_index); if (decl.val.getFunction(mod)) |_| { return; @@ -173,7 +184,7 @@ pub fn updateDecl(zig_object: *ZigObject, wasm_file: *Wasm, mod: *Module, decl_i } const gpa = wasm_file.base.comp.gpa; - const atom_index = try zig_object.getOrCreateAtomForDecl(decl_index); + const atom_index = try zig_object.getOrCreateAtomForDecl(wasm_file, decl_index); const atom = wasm_file.getAtomPtr(atom_index); atom.clear(); @@ -181,7 +192,7 @@ pub fn updateDecl(zig_object: *ZigObject, wasm_file: *Wasm, mod: *Module, decl_i const variable = decl.getOwnedVariable(mod).?; const name = mod.intern_pool.stringToSlice(decl.name); const lib_name = mod.intern_pool.stringToSliceUnwrap(variable.lib_name); - return wasm_file.addOrUpdateImport(name, atom.sym_index, lib_name, null); + return zig_object.addOrUpdateImport(wasm_file, name, atom.sym_index, lib_name, null); } const val = if (decl.val.getVariable(mod)) |variable| Value.fromInterned(variable.init) else decl.val; @@ -206,15 +217,22 @@ pub fn updateDecl(zig_object: *ZigObject, wasm_file: *Wasm, mod: *Module, decl_i }, }; - return wasm_file.finishUpdateDecl(decl_index, code, .data); + return zig_object.finishUpdateDecl(wasm_file, decl_index, code, .data); } -pub fn updateFunc(zig_object: *ZigObject, wasm_file: *Wasm, mod: *Module, func_index: InternPool.Index, air: Air, liveness: Liveness) !void { +pub fn updateFunc( + zig_object: *ZigObject, + wasm_file: *Wasm, + mod: *Module, + func_index: InternPool.Index, + air: Air, + liveness: Liveness, +) !void { const gpa = wasm_file.base.comp.gpa; const func = mod.funcInfo(func_index); const decl_index = func.owner_decl; const decl = mod.declPtr(decl_index); - const atom_index = try zig_object.getOrCreateAtomForDecl(decl_index); + const atom_index = try zig_object.getOrCreateAtomForDecl(wasm_file, decl_index); const atom = wasm_file.getAtomPtr(atom_index); atom.clear(); @@ -242,16 +260,22 @@ pub fn updateFunc(zig_object: *ZigObject, wasm_file: *Wasm, mod: *Module, func_i return zig_object.finishUpdateDecl(wasm_file, decl_index, code, .function); } -fn finishUpdateDecl(zig_object: *ZigObject, wasm_file: *Wasm, decl_index: InternPool.DeclIndex, code: []const u8, symbol_tag: Symbol.Tag) !void { +fn finishUpdateDecl( + zig_object: *ZigObject, + wasm_file: *Wasm, + decl_index: InternPool.DeclIndex, + code: []const u8, + symbol_tag: Symbol.Tag, +) !void { const gpa = wasm_file.base.comp.gpa; const mod = wasm_file.base.comp.module.?; const decl = mod.declPtr(decl_index); const atom_index = zig_object.decls.get(decl_index).?; const atom = wasm_file.getAtomPtr(atom_index); - const symbol = &zig_object.symbols.items[atom.sym_index]; + const sym = zig_object.symbol(atom.getSymbolIndex().?); const full_name = mod.intern_pool.stringToSlice(try decl.getFullyQualifiedName(mod)); - symbol.name = try zig_object.string_table.insert(gpa, full_name); - symbol.tag = symbol_tag; + sym.name = try zig_object.string_table.insert(gpa, full_name); + sym.tag = symbol_tag; try atom.code.appendSlice(gpa, code); try wasm_file.resolved_symbols.put(gpa, atom.symbolLoc(), {}); @@ -267,14 +291,13 @@ pub fn getOrCreateAtomForDecl(zig_object: *ZigObject, wasm_file: *Wasm, decl_ind const gpa = wasm_file.base.comp.gpa; const gop = try zig_object.decls.getOrPut(gpa, decl_index); if (!gop.found_existing) { - const atom_index = try wasm_file.createAtom(); - gop.value_ptr.* = atom_index; - const atom = wasm_file.getAtom(atom_index); - const symbol = atom.symbolLoc().getSymbol(wasm_file); + const sym_index = try zig_object.allocateSymbol(gpa); + gop.value_ptr.* = try wasm_file.createAtom(sym_index); const mod = wasm_file.base.comp.module.?; const decl = mod.declPtr(decl_index); const full_name = mod.intern_pool.stringToSlice(try decl.getFullyQualifiedName(mod)); - symbol.name = try wasm_file.string_table.insert(gpa, full_name); + const sym = zig_object.symbol(sym_index); + sym.name = try zig_object.string_table.insert(gpa, full_name); } return gop.value_ptr.*; } @@ -297,7 +320,7 @@ pub fn lowerAnonDecl( @intFromEnum(decl_val), }) catch unreachable; - switch (try zig_object.lowerConst(name, tv, src_loc)) { + switch (try zig_object.lowerConst(wasm_file, name, tv, src_loc)) { .ok => |atom_index| zig_object.anon_decls.values()[gop.index] = atom_index, .fail => |em| return .{ .fail = em }, } @@ -323,7 +346,7 @@ pub fn lowerUnnamedConst(zig_object: *ZigObject, wasm_file: *Wasm, tv: TypedValu std.debug.assert(tv.ty.zigTypeTag(mod) != .Fn); // cannot create local symbols for functions const decl = mod.declPtr(decl_index); - const parent_atom_index = try zig_object.getOrCreateAtomForDecl(decl_index); + const parent_atom_index = try zig_object.getOrCreateAtomForDecl(wasm_file, decl_index); const parent_atom = wasm_file.getAtom(parent_atom_index); const local_index = parent_atom.locals.items.len; const fqn = mod.intern_pool.stringToSlice(try decl.getFullyQualifiedName(mod)); @@ -332,7 +355,7 @@ pub fn lowerUnnamedConst(zig_object: *ZigObject, wasm_file: *Wasm, tv: TypedValu }); defer gpa.free(name); - switch (try zig_object.lowerConst(name, tv, decl.srcLoc(mod))) { + switch (try zig_object.lowerConst(wasm_file, name, tv, decl.srcLoc(mod))) { .ok => |atom_index| { try wasm_file.getAtomPtr(parent_atom_index).locals.append(gpa, atom_index); return wasm_file.getAtom(atom_index).getSymbolIndex().?; @@ -355,14 +378,15 @@ fn lowerConst(zig_object: *ZigObject, wasm_file: *Wasm, name: []const u8, tv: Ty const mod = wasm_file.base.comp.module.?; // Create and initialize a new local symbol and atom - const atom_index = try wasm_file.createAtom(); + const sym_index = try zig_object.allocateSymbol(gpa); + const atom_index = try wasm_file.createAtom(sym_index); var value_bytes = std.ArrayList(u8).init(gpa); defer value_bytes.deinit(); const code = code: { const atom = wasm_file.getAtomPtr(atom_index); atom.alignment = tv.ty.abiAlignment(mod); - zig_object.symbols.items[atom.sym_index] = .{ + zig_object.symbols.items[sym_index] = .{ .name = try zig_object.string_table.insert(gpa, name), .flags = @intFromEnum(Symbol.Flag.WASM_SYM_BINDING_LOCAL), .tag = .data, @@ -399,14 +423,14 @@ fn lowerConst(zig_object: *ZigObject, wasm_file: *Wasm, name: []const u8, tv: Ty /// /// When the symbol does not yet exist, it will create a new one instead. pub fn getErrorTableSymbol(zig_object: *ZigObject, wasm_file: *Wasm) !u32 { - if (zig_object.error_table_symbol) |symbol| { - return symbol; + if (zig_object.error_table_symbol) |sym| { + return sym; } // no error was referenced yet, so create a new symbol and atom for it // and then return said symbol's index. The final table will be populated // during `flush` when we know all possible error names. - const gpa = wasm_file.base.gpa; + const gpa = wasm_file.base.comp.gpa; const sym_index = try zig_object.allocateSymbol(gpa); const atom_index = try wasm_file.createAtom(sym_index); const atom = wasm_file.getAtomPtr(atom_index); @@ -415,15 +439,16 @@ pub fn getErrorTableSymbol(zig_object: *ZigObject, wasm_file: *Wasm) !u32 { atom.alignment = slice_ty.abiAlignment(mod); const sym_name = try zig_object.string_table.insert(gpa, "__zig_err_name_table"); - const symbol = &zig_object.symbols.items[sym_index]; - symbol.* = .{ + const sym = zig_object.symbol(sym_index); + sym.* = .{ .name = sym_name, .tag = .data, .flags = @intFromEnum(Symbol.Flag.WASM_SYM_BINDING_LOCAL), .index = 0, .virtual_address = undefined, }; - symbol.mark(); + // TODO: can we remove this? + // sym.mark(); log.debug("Error name table was created with symbol index: ({d})", .{sym_index}); zig_object.error_table_symbol = sym_index; @@ -528,13 +553,13 @@ pub fn addOrUpdateImport( defer if (mangle_name) gpa.free(full_name); const decl_name_index = try zig_object.string_table.insert(gpa, full_name); - const symbol: *Symbol = &zig_object.symbols.items[symbol_index]; - symbol.setUndefined(true); - symbol.setGlobal(true); - symbol.name = decl_name_index; + const sym: *Symbol = &zig_object.symbols.items[symbol_index]; + sym.setUndefined(true); + sym.setGlobal(true); + sym.name = decl_name_index; if (mangle_name) { // we specified a specific name for the symbol that does not match the import name - symbol.setFlag(.WASM_SYM_EXPLICIT_NAME); + sym.setFlag(.WASM_SYM_EXPLICIT_NAME); } if (type_index) |ty_index| { @@ -557,22 +582,22 @@ pub fn addOrUpdateImport( /// such as an exported or imported symbol. /// If the symbol does not yet exist, creates a new one symbol instead /// and then returns the index to it. -pub fn getGlobalSymbol(zig_object: *ZigObject, gpa: std.mem.Allocator, name: []const u8, tag: Symbol.Tag) !u32 { +pub fn getGlobalSymbol(zig_object: *ZigObject, gpa: std.mem.Allocator, name: []const u8) !u32 { const name_index = try zig_object.string_table.insert(gpa, name); const gop = try zig_object.global_syms.getOrPut(gpa, name_index); if (gop.found_existing) { return gop.value_ptr.*; } - var symbol: Symbol = .{ + var sym: Symbol = .{ .name = name_index, .flags = 0, .index = undefined, // index to type will be set after merging symbols - .tag = tag, + .tag = .function, .virtual_address = std.math.maxInt(u32), }; - symbol.setGlobal(true); - symbol.setUndefined(true); + sym.setGlobal(true); + sym.setUndefined(true); const sym_index = if (zig_object.symbols_free_list.popOrNull()) |index| index else blk: { const index: u32 = @intCast(zig_object.symbols.items.len); @@ -580,7 +605,7 @@ pub fn getGlobalSymbol(zig_object: *ZigObject, gpa: std.mem.Allocator, name: []c zig_object.symbols.items.len += 1; break :blk index; }; - zig_object.symbols.items[sym_index] = symbol; + zig_object.symbols.items[sym_index] = sym; gop.value_ptr.* = sym_index; return sym_index; } @@ -675,8 +700,8 @@ pub fn deleteDeclExport( const atom_index = zig_object.decls.get(decl_index) orelse return; const sym_index = wasm_file.getAtom(atom_index).sym_index; const loc: Wasm.SymbolLoc = .{ .file = null, .index = sym_index }; - const symbol = loc.getSymbol(wasm_file); - std.debug.assert(zig_object.global_syms.remove(symbol.name)); + const sym = loc.getSymbol(wasm_file); + std.debug.assert(zig_object.global_syms.remove(sym.name)); } pub fn updateExports( @@ -694,7 +719,7 @@ pub fn updateExports( }, }; const decl = mod.declPtr(decl_index); - const atom_index = try zig_object.getOrCreateAtomForDecl(decl_index); + const atom_index = try zig_object.getOrCreateAtomForDecl(wasm_file, decl_index); const atom = wasm_file.getAtom(atom_index); const atom_sym = atom.symbolLoc().getSymbol(wasm_file).*; const gpa = mod.gpa; @@ -722,24 +747,24 @@ pub fn updateExports( }, .decl_index => |i| i, }; - const exported_atom_index = try zig_object.getOrCreateAtomForDecl(exported_decl_index); + const exported_atom_index = try zig_object.getOrCreateAtomForDecl(wasm_file, exported_decl_index); const exported_atom = wasm_file.getAtom(exported_atom_index); // const export_name = try zig_object.string_table.put(gpa, mod.intern_pool.stringToSlice(exp.opts.name)); const sym_loc = exported_atom.symbolLoc(); - const symbol = sym_loc.getSymbol(wasm_file); - symbol.setGlobal(true); - symbol.setUndefined(false); - symbol.index = atom_sym.index; - symbol.tag = atom_sym.tag; - symbol.name = atom_sym.name; + const sym = sym_loc.getSymbol(wasm_file); + sym.setGlobal(true); + sym.setUndefined(false); + sym.index = atom_sym.index; + sym.tag = atom_sym.tag; + sym.name = atom_sym.name; switch (exp.opts.linkage) { .Internal => { - symbol.setFlag(.WASM_SYM_VISIBILITY_HIDDEN); - symbol.setFlag(.WASM_SYM_BINDING_WEAK); + sym.setFlag(.WASM_SYM_VISIBILITY_HIDDEN); + sym.setFlag(.WASM_SYM_BINDING_WEAK); }, .Weak => { - symbol.setFlag(.WASM_SYM_BINDING_WEAK); + sym.setFlag(.WASM_SYM_BINDING_WEAK); }, .Strong => {}, // symbols are strong by default .LinkOnce => { @@ -840,7 +865,7 @@ pub fn freeDecl(zig_object: *ZigObject, wasm_file: *Wasm, decl_index: InternPool } } -pub fn getTypeIndex(zig_object: *const ZigObject, func_type: std.wasm.Type) ?u32 { +fn getTypeIndex(zig_object: *const ZigObject, func_type: std.wasm.Type) ?u32 { var index: u32 = 0; while (index < zig_object.func_types.items.len) : (index += 1) { if (zig_object.func_types.items[index].eql(func_type)) return index; @@ -1115,6 +1140,16 @@ fn allocateDebugAtoms(zig_object: *ZigObject) !void { try allocAtom(zig_object, &zig_object.debug_pubtypes_index, zig_object.debug_pubtypes_atom.?); } +/// For the given `decl_index`, stores the corresponding type representing the function signature. +/// Asserts declaration has an associated `Atom`. +/// Returns the index into the list of types. +pub fn storeDeclType(zig_object: *ZigObject, gpa: std.mem.Allocator, decl_index: InternPool.DeclIndex, func_type: std.wasm.Type) !u32 { + const atom_index = zig_object.decls.get(decl_index).?; + const index = try zig_object.putOrGetFuncType(gpa, func_type); + try zig_object.atom_types.put(gpa, atom_index, index); + return index; +} + const build_options = @import("build_options"); const builtin = @import("builtin"); const codegen = @import("../../codegen.zig"); From 12505c6d3d4ccfc859b67e4b43c5b3844bebb475 Mon Sep 17 00:00:00 2001 From: Luuk de Gram Date: Mon, 15 Jan 2024 16:43:22 +0100 Subject: [PATCH 05/21] wasm: store `File.Index` on the Atom Also, consolidate the creation of Atoms so they all use `createAtom`. --- src/link/Wasm.zig | 42 ++++++++-------------------- src/link/Wasm/Atom.zig | 56 ++++++++++++++++--------------------- src/link/Wasm/ZigObject.zig | 13 +++++---- 3 files changed, 44 insertions(+), 67 deletions(-) diff --git a/src/link/Wasm.zig b/src/link/Wasm.zig index fb808d79fc..55e08babd2 100644 --- a/src/link/Wasm.zig +++ b/src/link/Wasm.zig @@ -569,6 +569,7 @@ pub fn createEmpty( if (!use_llvm) { const index: File.Index = @enumFromInt(wasm.files.len); var zig_object: ZigObject = .{ + .index = index, .path = try std.fmt.allocPrint(gpa, "{s}.o", .{std.fs.path.stem(zcu.main_mod.root_src_path)}), .stack_pointer_sym = undefined, }; @@ -663,12 +664,11 @@ fn parseObjectFile(wasm: *Wasm, path: []const u8) !bool { } /// Creates a new empty `Atom` and returns its `Atom.Index` -pub fn createAtom(wasm: *Wasm, sym_index: u32) !Atom.Index { +pub fn createAtom(wasm: *Wasm, sym_index: u32, file_index: File.Index) !Atom.Index { const gpa = wasm.base.comp.gpa; const index: Atom.Index = @intCast(wasm.managed_atoms.items.len); const atom = try wasm.managed_atoms.addOne(gpa); - atom.* = Atom.empty; - atom.sym_index = sym_index; + atom.* = .{ .file_index = file_index, .sym_index = sym_index }; try wasm.symbol_atom.putNoClobber(gpa, .{ .file = null, .index = sym_index }, index); return index; @@ -1825,20 +1825,11 @@ fn createSyntheticFunction( symbol.index = func_index; // create the atom that will be output into the final binary - const atom_index = @as(Atom.Index, @intCast(wasm.managed_atoms.items.len)); - const atom = try wasm.managed_atoms.addOne(gpa); - atom.* = .{ - .size = @as(u32, @intCast(function_body.items.len)), - .offset = 0, - .sym_index = loc.index, - .file = null, - .alignment = .@"1", - .prev = null, - .code = function_body.moveToUnmanaged(), - .original_offset = 0, - }; + const atom_index = try wasm.createAtom(loc.index, .null); + const atom = wasm.getAtomPtr(atom_index); + atom.code = function_body.moveToUnmanaged(); + atom.size = @intCast(function_body.items.len); try wasm.appendAtomAtIndex(wasm.code_section_index.?, atom_index); - try wasm.symbol_atom.putNoClobber(gpa, loc, atom_index); } /// Unlike `createSyntheticFunction` this function is to be called by @@ -1856,19 +1847,11 @@ pub fn createFunction( const gpa = wasm.base.comp.gpa; const loc = try wasm.createSyntheticSymbol(symbol_name, .function); - const atom_index: Atom.Index = @intCast(wasm.managed_atoms.items.len); - const atom = try wasm.managed_atoms.addOne(gpa); - atom.* = .{ - .size = @intCast(function_body.items.len), - .offset = 0, - .sym_index = loc.index, - .file = null, - .alignment = .@"1", - .prev = null, - .code = function_body.moveToUnmanaged(), - .relocs = relocations.moveToUnmanaged(), - .original_offset = 0, - }; + const atom_index = try wasm.createAtom(loc.index, wasm.zig_object_index); + const atom = wasm.getAtomPtr(atom_index); + atom.code = function_body.moveToUnmanaged(); + atom.relocs = relocations.moveToUnmanaged(); + atom.size = @intCast(function_body.items.len); const symbol = loc.getSymbol(wasm); symbol.setFlag(.WASM_SYM_VISIBILITY_HIDDEN); // ensure function does not get exported @@ -1878,7 +1861,6 @@ pub fn createFunction( break :idx index; }; try wasm.appendAtomAtIndex(section_index, atom_index); - try wasm.symbol_atom.putNoClobber(gpa, loc, atom_index); try wasm.zigObjectPtr().?.atom_types.put( gpa, atom_index, diff --git a/src/link/Wasm/Atom.zig b/src/link/Wasm/Atom.zig index c8d115b872..fb3d1a5724 100644 --- a/src/link/Wasm/Atom.zig +++ b/src/link/Wasm/Atom.zig @@ -1,55 +1,36 @@ -const Atom = @This(); - -const std = @import("std"); -const types = @import("types.zig"); -const Wasm = @import("../Wasm.zig"); -const Symbol = @import("Symbol.zig"); - -const leb = std.leb; -const log = std.log.scoped(.link); -const mem = std.mem; -const Allocator = mem.Allocator; - +/// Represents the index of the file this atom was generated from. +/// This is 'null' when the atom was generated by a synthetic linker symbol. +file: FileIndex, /// symbol index of the symbol representing this atom sym_index: u32, /// Size of the atom, used to calculate section sizes in the final binary -size: u32, +size: u32 = 0, /// List of relocations belonging to this atom relocs: std.ArrayListUnmanaged(types.Relocation) = .{}, /// Contains the binary data of an atom, which can be non-relocated code: std.ArrayListUnmanaged(u8) = .{}, /// For code this is 1, for data this is set to the highest value of all segments -alignment: Wasm.Alignment, +alignment: Wasm.Alignment = .@"1", /// Offset into the section where the atom lives, this already accounts /// for alignment. -offset: u32, +offset: u32 = 0, /// The original offset within the object file. This value is substracted from /// relocation offsets to determine where in the `data` to rewrite the value -original_offset: u32, -/// Represents the index of the file this atom was generated from. -/// This is 'null' when the atom was generated by a Decl from Zig code. -file: ?u16, +original_offset: u32 = 0, +/// Next atom in relation to this atom. +/// When null, this atom is the last atom +next: ?Atom.Index = null, /// Previous atom in relation to this atom. /// is null when this atom is the first in its order -prev: ?Atom.Index, +prev: ?Atom.Index = null, /// Contains atoms local to a decl, all managed by this `Atom`. /// When the parent atom is being freed, it will also do so for all local atoms. locals: std.ArrayListUnmanaged(Atom.Index) = .{}, -/// Alias to an unsigned 32-bit integer +/// Alias to an unsigned 32-bit integer. +// TODO: Make this a non-exhaustive enum. pub const Index = u32; -/// Represents a default empty wasm `Atom` -pub const empty: Atom = .{ - .alignment = .@"1", - .file = null, - .offset = 0, - .prev = null, - .size = 0, - .sym_index = 0, - .original_offset = 0, -}; - /// Frees all resources owned by this `Atom`. pub fn deinit(atom: *Atom, gpa: std.mem.Allocator) void { atom.relocs.deinit(gpa); @@ -217,3 +198,14 @@ fn thombstone(atom: Atom, wasm: *const Wasm) ?i64 { } return null; } +const leb = std.leb; +const log = std.log.scoped(.link); +const mem = std.mem; +const std = @import("std"); +const types = @import("types.zig"); + +const Allocator = mem.Allocator; +const Atom = @This(); +const FileIndex = @import("file.zig").File.Index; +const Symbol = @import("Symbol.zig"); +const Wasm = @import("../Wasm.zig"); diff --git a/src/link/Wasm/ZigObject.zig b/src/link/Wasm/ZigObject.zig index 8b4d703a34..81eda1e413 100644 --- a/src/link/Wasm/ZigObject.zig +++ b/src/link/Wasm/ZigObject.zig @@ -4,6 +4,8 @@ //! Think about this as fake in-memory Object file for the Zig module. path: []const u8, +/// Index within the list of relocatable objects of the linker driver. +index: File.Index, /// List of all `Decl` that are currently alive. /// Each index maps to the corresponding `Atom.Index`. decls: std.AutoHashMapUnmanaged(InternPool.DeclIndex, Atom.Index) = .{}, @@ -292,7 +294,7 @@ pub fn getOrCreateAtomForDecl(zig_object: *ZigObject, wasm_file: *Wasm, decl_ind const gop = try zig_object.decls.getOrPut(gpa, decl_index); if (!gop.found_existing) { const sym_index = try zig_object.allocateSymbol(gpa); - gop.value_ptr.* = try wasm_file.createAtom(sym_index); + gop.value_ptr.* = try wasm_file.createAtom(sym_index, zig_object.index); const mod = wasm_file.base.comp.module.?; const decl = mod.declPtr(decl_index); const full_name = mod.intern_pool.stringToSlice(try decl.getFullyQualifiedName(mod)); @@ -379,7 +381,7 @@ fn lowerConst(zig_object: *ZigObject, wasm_file: *Wasm, name: []const u8, tv: Ty // Create and initialize a new local symbol and atom const sym_index = try zig_object.allocateSymbol(gpa); - const atom_index = try wasm_file.createAtom(sym_index); + const atom_index = try wasm_file.createAtom(sym_index, zig_object.index); var value_bytes = std.ArrayList(u8).init(gpa); defer value_bytes.deinit(); @@ -432,7 +434,7 @@ pub fn getErrorTableSymbol(zig_object: *ZigObject, wasm_file: *Wasm) !u32 { // during `flush` when we know all possible error names. const gpa = wasm_file.base.comp.gpa; const sym_index = try zig_object.allocateSymbol(gpa); - const atom_index = try wasm_file.createAtom(sym_index); + const atom_index = try wasm_file.createAtom(sym_index, zig_object.index); const atom = wasm_file.getAtomPtr(atom_index); const slice_ty = Type.slice_const_u8_sentinel_0; const mod = wasm_file.base.comp.module.?; @@ -468,7 +470,7 @@ fn populateErrorNameTable(zig_object: *ZigObject, wasm_file: *Wasm) !void { // we create a symbol for the entire region of error names. We then calculate // the pointers into the list using addends which are appended to the relocation. const names_sym_index = try zig_object.allocateSymbol(gpa); - const names_atom_index = try wasm_file.createAtom(names_sym_index); + const names_atom_index = try wasm_file.createAtom(names_sym_index, zig_object.index); const names_atom = wasm_file.getAtomPtr(names_atom_index); names_atom.alignment = .@"1"; const sym_name = try zig_object.string_table.insert(gpa, "__zig_err_names"); @@ -1087,7 +1089,7 @@ pub fn createDebugSectionForIndex(zig_object: *ZigObject, wasm_file: *Wasm, inde try zig_object.appendDummySegment(); const sym_index = try zig_object.allocateSymbol(gpa); - const atom_index = try wasm_file.createAtom(sym_index); + const atom_index = try wasm_file.createAtom(sym_index, zig_object.index); const atom = wasm_file.getAtomPtr(atom_index); zig_object.symbols.items[sym_index] = .{ .tag = .section, @@ -1161,6 +1163,7 @@ const types = @import("types.zig"); const Air = @import("../../Air.zig"); const Atom = @import("Atom.zig"); const Dwarf = @import("../Dwarf.zig"); +const File = @import("file.zig").File; const InternPool = @import("../../InternPool.zig"); const Liveness = @import("../../Liveness.zig"); const Module = @import("../../Module.zig"); From 143e9599d64e7ac7991f360679a5611ee0d59376 Mon Sep 17 00:00:00 2001 From: Luuk de Gram Date: Wed, 17 Jan 2024 17:21:59 +0100 Subject: [PATCH 06/21] wasm: use `File` abstraction instead of object When merging sections we now make use of the `File` abstraction so all objects such as globals, functions, imports, etc are also merged from the `ZigObject` module. This allows us to use a singular way to perform each link action without having to check the kind of the file. The logic is mostly handled in the abstract file module, unless its complexity warrants the handling within the corresponding module itself. --- src/link/Wasm.zig | 209 ++++++++++++++++++++---------------- src/link/Wasm/Object.zig | 75 ++++++------- src/link/Wasm/ZigObject.zig | 36 +++++++ 3 files changed, 192 insertions(+), 128 deletions(-) diff --git a/src/link/Wasm.zig b/src/link/Wasm.zig index 55e08babd2..142365ecb3 100644 --- a/src/link/Wasm.zig +++ b/src/link/Wasm.zig @@ -152,7 +152,7 @@ entry: ?u32 = null, function_table: std.AutoHashMapUnmanaged(SymbolLoc, u32) = .{}, /// All object files and their data which are linked into the final binary -objects: std.ArrayListUnmanaged(Object) = .{}, +objects: std.ArrayListUnmanaged(File.Index) = .{}, /// All archive files that are lazy loaded. /// e.g. when an undefined symbol references a symbol from the archive. archives: std.ArrayListUnmanaged(Archive) = .{}, @@ -442,7 +442,7 @@ pub fn createEmpty( // can be passed to LLD. const sub_path = if (use_lld) zcu_object_sub_path.? else emit.sub_path; - const file = try emit.directory.handle.createFile(sub_path, .{ + wasm.base.file = try emit.directory.handle.createFile(sub_path, .{ .truncate = true, .read = true, .mode = if (fs.has_executable_bit) @@ -453,7 +453,6 @@ pub fn createEmpty( else 0, }); - wasm.base.file = file; wasm.name = sub_path; // create stack pointer symbol @@ -582,6 +581,15 @@ pub fn createEmpty( return wasm; } +pub fn file(wasm: *Wasm, index: File.Index) ?File { + const tag = wasm.files.items(.tags)[index]; + return switch (tag) { + .null => null, + .zig_object => .{ .zig_object = &wasm.files.items(.data)[index].zig_object }, + .object => .{ .object = &wasm.files.items(.data)[index].object }, + }; +} + pub fn zigObjectPtr(wasm: *Wasm) ?*ZigObject { if (wasm.zig_object_index == .null) return null; return &wasm.files.items(.data)[@intFromEnum(wasm.zig_object_index)].zig_object; @@ -650,16 +658,18 @@ fn parseInputFiles(wasm: *Wasm, files: []const []const u8) !void { /// file and parsed successfully. Returns false when file is not an object file. /// May return an error instead when parsing failed. fn parseObjectFile(wasm: *Wasm, path: []const u8) !bool { - const file = try fs.cwd().openFile(path, .{}); - errdefer file.close(); + const obj_file = try fs.cwd().openFile(path, .{}); + errdefer obj_file.close(); const gpa = wasm.base.comp.gpa; - var object = Object.create(gpa, file, path, null) catch |err| switch (err) { + var object = Object.create(gpa, obj_file, path, null) catch |err| switch (err) { error.InvalidMagicByte, error.NotObjectFile => return false, else => |e| return e, }; errdefer object.deinit(gpa); - try wasm.objects.append(gpa, object); + object.index = @enumFromInt(wasm.files.len); + try wasm.files.append(gpa, .{ .object = object }); + try wasm.objects.append(gpa, object.index); return true; } @@ -693,11 +703,11 @@ pub inline fn getAtomPtr(wasm: *Wasm, index: Atom.Index) *Atom { fn parseArchive(wasm: *Wasm, path: []const u8, force_load: bool) !bool { const gpa = wasm.base.comp.gpa; - const file = try fs.cwd().openFile(path, .{}); - errdefer file.close(); + const archive_file = try fs.cwd().openFile(path, .{}); + errdefer archive_file.close(); var archive: Archive = .{ - .file = file, + .file = archive_file, .name = path, }; archive.parse(gpa) catch |err| switch (err) { @@ -727,8 +737,10 @@ fn parseArchive(wasm: *Wasm, path: []const u8, force_load: bool) !bool { } for (offsets.keys()) |file_offset| { - const object = try wasm.objects.addOne(gpa); - object.* = try archive.parseObject(gpa, file_offset); + var object = try archive.parseObject(gpa, file_offset); + object.index = @enumFromInt(wasm.files.len); + try wasm.files.append(gpa, .{ .object = object }); + try wasm.objects.append(gpa, object.index); } return true; @@ -784,8 +796,8 @@ fn resolveSymbolsInObject(wasm: *Wasm, object_index: u16) !void { const existing_loc = maybe_existing.value_ptr.*; const existing_sym: *Symbol = existing_loc.getSymbol(wasm); - const existing_file_path = if (existing_loc.file) |file| blk: { - break :blk wasm.objects.items[file].name; + const existing_file_path = if (existing_loc.file) |file_index| blk: { + break :blk wasm.objects.items[file_index].name; } else wasm.name; if (!existing_sym.isUndefined()) outer: { @@ -911,10 +923,11 @@ fn resolveSymbolsInArchives(wasm: *Wasm) !void { // Symbol is found in unparsed object file within current archive. // Parse object and and resolve symbols again before we check remaining // undefined symbols. - const object_file_index: u16 = @intCast(wasm.objects.items.len); - const object = try archive.parseObject(gpa, offset.items[0]); - try wasm.objects.append(gpa, object); - try wasm.resolveSymbolsInObject(object_file_index); + var object = try archive.parseObject(gpa, offset.items[0]); + object.index = @enumFromInt(wasm.files.len); + try wasm.files.append(gpa, .{ .object = object }); + try wasm.objects.append(gpa, object.index); + try wasm.resolveSymbolsInObject(object.index); // continue loop for any remaining undefined symbols that still exist // after resolving last object file @@ -1176,9 +1189,10 @@ fn validateFeatures( // extract all the used, disallowed and required features from each // linked object file so we can test them. - for (wasm.objects.items, 0..) |object, object_index| { + for (wasm.objects.items) |file_index| { + const object: Object = wasm.files.items(.data)[file_index].object; for (object.features) |feature| { - const value = @as(u16, @intCast(object_index)) << 1 | @as(u1, 1); + const value = @as(u16, @intFromEnum(file_index)) << 1 | @as(u1, 1); switch (feature.prefix) { .used => { used[@intFromEnum(feature.tag)] = value; @@ -1210,7 +1224,7 @@ fn validateFeatures( emit_features_count.* += @intFromBool(is_enabled); } else if (is_enabled and !allowed[used_index]) { log.err("feature '{}' not allowed, but used by linked object", .{@as(types.Feature.Tag, @enumFromInt(used_index))}); - log.err(" defined in '{s}'", .{wasm.objects.items[used_set >> 1].name}); + log.err(" defined in '{s}'", .{wasm.files.items(.data)[used_set >> 1].object.path}); valid_feature_set = false; } } @@ -1224,7 +1238,7 @@ fn validateFeatures( if (@as(u1, @truncate(disallowed_feature)) != 0) { log.err( "shared-memory is disallowed by '{s}' because it wasn't compiled with 'atomics' and 'bulk-memory' features enabled", - .{wasm.objects.items[disallowed_feature >> 1].name}, + .{wasm.files.items(.data)[disallowed_feature >> 1].object.path}, ); valid_feature_set = false; } @@ -1244,16 +1258,17 @@ fn validateFeatures( } } // For each linked object, validate the required and disallowed features - for (wasm.objects.items) |object| { + for (wasm.objects.items) |file_index| { var object_used_features = [_]bool{false} ** known_features_count; + const object = wasm.files.items(.data)[file_index].object; for (object.features) |feature| { if (feature.prefix == .disallowed) continue; // already defined in 'disallowed' set. // from here a feature is always used const disallowed_feature = disallowed[@intFromEnum(feature.tag)]; if (@as(u1, @truncate(disallowed_feature)) != 0) { log.err("feature '{}' is disallowed, but used by linked object", .{feature.tag}); - log.err(" disallowed by '{s}'", .{wasm.objects.items[disallowed_feature >> 1].name}); - log.err(" used in '{s}'", .{object.name}); + log.err(" disallowed by '{s}'", .{wasm.files.items(.data)[disallowed_feature >> 1].object.path}); + log.err(" used in '{s}'", .{object.path}); valid_feature_set = false; } @@ -1265,8 +1280,8 @@ fn validateFeatures( const is_required = @as(u1, @truncate(required_feature)) != 0; if (is_required and !object_used_features[feature_index]) { log.err("feature '{}' is required but not used in linked object", .{@as(types.Feature.Tag, @enumFromInt(feature_index))}); - log.err(" required by '{s}'", .{wasm.objects.items[required_feature >> 1].name}); - log.err(" missing in '{s}'", .{object.name}); + log.err(" required by '{s}'", .{wasm.files.items(.data)[required_feature >> 1].object.path}); + log.err(" missing in '{s}'", .{object.path}); valid_feature_set = false; } } @@ -1346,9 +1361,10 @@ fn checkUndefinedSymbols(wasm: *const Wasm) !void { const symbol = undef.getSymbol(wasm); if (symbol.tag == .data) { found_undefined_symbols = true; - const file_name = if (undef.file) |file_index| name: { - break :name wasm.objects.items[file_index].name; - } else wasm.name; + const file_name = if (undef.file) |file_index| + wasm.file(file_index).?.path() + else + wasm.name; const symbol_name = undef.getName(wasm); log.err("could not resolve undefined symbol '{s}'", .{symbol_name}); log.err(" defined in '{s}'", .{file_name}); @@ -1369,8 +1385,11 @@ pub fn deinit(wasm: *Wasm) void { for (wasm.segment_info.values()) |segment_info| { gpa.free(segment_info.name); } - for (wasm.objects.items) |*object| { - object.deinit(gpa); + if (wasm.zigObjectPtr()) |zig_obj| { + zig_obj.deinit(gpa); + } + for (wasm.objects.items) |obj_index| { + wasm.file(obj_index).?.object.deinit(gpa); } for (wasm.archives.items) |*archive| { @@ -1441,12 +1460,11 @@ fn getGlobalType(wasm: *const Wasm, loc: SymbolLoc) std.wasm.GlobalType { assert(symbol.tag == .global); const is_undefined = symbol.isUndefined(); if (loc.file) |file_index| { - const obj: Object = wasm.objects.items[file_index]; + const obj_file = wasm.file(@enumFromInt(file_index)).?; if (is_undefined) { - return obj.findImport(.global, symbol.index).kind.global; + return obj_file.import(loc.index).kind.global; } - const import_global_count = obj.importedCountByKind(.global); - return obj.globals[symbol.index - import_global_count].global_type; + return obj_file.globals()[symbol.index - obj_file.importedGlobals()].global_type; } if (is_undefined) { return wasm.imports.get(loc).?.kind.global; @@ -1461,14 +1479,13 @@ fn getFunctionSignature(wasm: *const Wasm, loc: SymbolLoc) std.wasm.Type { assert(symbol.tag == .function); const is_undefined = symbol.isUndefined(); if (loc.file) |file_index| { - const obj: Object = wasm.objects.items[file_index]; + const obj_file = wasm.file(@enumFromInt(file_index)).?; if (is_undefined) { - const ty_index = obj.findImport(.function, symbol.index).kind.function; - return obj.func_types[ty_index]; + const ty_index = obj_file.import(loc.index).kind.function; + return obj_file.funcTypes()[ty_index]; } - const import_function_count = obj.importedCountByKind(.function); - const type_index = obj.functions[symbol.index - import_function_count].type_index; - return obj.func_types[type_index]; + const type_index = obj_file.functions()[symbol.index - obj_file.importedFunctions()].type_index; + return obj_file.funcTypes()[type_index]; } if (is_undefined) { const ty_index = wasm.imports.get(loc).?.kind.function; @@ -1606,10 +1623,10 @@ fn allocateAtoms(wasm: *Wasm) !void { // Ensure we get the original symbol, so we verify the correct symbol on whether // it is dead or not and ensure an atom is removed when dead. // This is required as we may have parsed aliases into atoms. - const sym = if (symbol_loc.file) |object_index| sym: { - const object = wasm.objects.items[object_index]; - break :sym object.symtable[symbol_loc.index]; - } else wasm.synthetic_symbols.items[symbol_loc.index]; + const sym = if (symbol_loc.file) |object_index| + wasm.file(object_index).?.symbol(symbol_loc.index).* + else + wasm.synthetic_symbols.items[symbol_loc.index]; // Dead symbols must be unlinked from the linked-list to prevent them // from being emit into the binary. @@ -1655,9 +1672,10 @@ fn allocateVirtualAddresses(wasm: *Wasm) void { const atom = wasm.getAtom(atom_index); const merge_segment = wasm.base.comp.config.output_mode != .Obj; - const segment_info = if (atom.file) |object_index| blk: { - break :blk wasm.objects.items[object_index].segment_info; - } else wasm.segment_info.values(); + const segment_info = if (atom.file) |object_index| + wasm.file(object_index).?.segmentInfo() + else + wasm.segment_info.values(); const segment_name = segment_info[symbol.index].outputName(merge_segment); const segment_index = wasm.data_segments.get(segment_name).?; const segment = wasm.segments.items[segment_index]; @@ -1713,7 +1731,8 @@ fn sortDataSegments(wasm: *Wasm) !void { /// contain any parameters. fn setupInitFunctions(wasm: *Wasm) !void { const gpa = wasm.base.comp.gpa; - for (wasm.objects.items, 0..) |object, file_index| { + for (wasm.objects.items) |file_index| { + const object = wasm.files.items(.data)[file_index].object; try wasm.init_funcs.ensureUnusedCapacity(gpa, object.init_funcs.len); for (object.init_funcs) |init_func| { const symbol = object.symtable[init_func.symbol_index]; @@ -1961,7 +1980,7 @@ fn setupImports(wasm: *Wasm) !void { for (wasm.resolved_symbols.keys()) |symbol_loc| { const file_index = symbol_loc.file orelse { - // imports generated by Zig code are already in the `import` section + // Synthetic symbols will already exist in the `import` section continue; }; @@ -1974,14 +1993,14 @@ fn setupImports(wasm: *Wasm) !void { } log.debug("Symbol '{s}' will be imported from the host", .{symbol_loc.getName(wasm)}); - const object = wasm.objects.items[file_index]; - const import = object.findImport(symbol.tag.externalType(), symbol.index); + const obj_file = wasm.file(file_index).?; + const import = obj_file.import(symbol_loc.index); // We copy the import to a new import to ensure the names contain references // to the internal string table, rather than of the object file. const new_imp: types.Import = .{ - .module_name = try wasm.string_table.put(gpa, object.string_table.get(import.module_name)), - .name = try wasm.string_table.put(gpa, object.string_table.get(import.name)), + .module_name = try wasm.string_table.put(gpa, obj_file.string(import.module_name)), + .name = try wasm.string_table.put(gpa, obj_file.string(import.name)), .kind = import.kind, }; // TODO: De-duplicate imports when they contain the same names and type @@ -2032,28 +2051,23 @@ fn mergeSections(wasm: *Wasm) !void { defer removed_duplicates.deinit(); for (wasm.resolved_symbols.keys()) |sym_loc| { - if (sym_loc.file == null) { + const file_index = sym_loc.file orelse { // Zig code-generated symbols are already within the sections and do not // require to be merged continue; - } + }; - const object = &wasm.objects.items[sym_loc.file.?]; - const symbol = &object.symtable[sym_loc.index]; + const obj_file = wasm.file(@enumFromInt(file_index)).?; + const symbol = obj_file.symbol[sym_loc.index]; - if (symbol.isDead() or - symbol.isUndefined() or - (symbol.tag != .function and symbol.tag != .global and symbol.tag != .table)) - { + if (symbol.isDead() or symbol.isUndefined()) { // Skip undefined symbols as they go in the `import` section - // Also skip symbols that do not need to have a section merged. continue; } - const offset = object.importedCountByKind(symbol.tag.externalType()); - const index = symbol.index - offset; switch (symbol.tag) { .function => { + const index = symbol.index - obj_file.importedFunctions(); const gop = try wasm.functions.getOrPut( gpa, .{ .file = sym_loc.file, .index = symbol.index }, @@ -2071,20 +2085,24 @@ fn mergeSections(wasm: *Wasm) !void { try removed_duplicates.append(sym_loc); continue; } - gop.value_ptr.* = .{ .func = object.functions[index], .sym_index = sym_loc.index }; + gop.value_ptr.* = .{ .func = obj_file.functions()[index], .sym_index = sym_loc.index }; symbol.index = @as(u32, @intCast(gop.index)) + wasm.imported_functions_count; }, .global => { - const original_global = object.globals[index]; + const index = symbol.index - obj_file.importedFunctions(); + const original_global = obj_file.globals()[index]; symbol.index = @as(u32, @intCast(wasm.wasm_globals.items.len)) + wasm.imported_globals_count; try wasm.wasm_globals.append(gpa, original_global); }, .table => { - const original_table = object.tables[index]; + const index = symbol.index - obj_file.importedFunctions(); + // assert it's a regular relocatable object file as `ZigObject` will never + // contain a table. + const original_table = obj_file.object.tables[index]; symbol.index = @as(u32, @intCast(wasm.tables.items.len)) + wasm.imported_tables_count; try wasm.tables.append(gpa, original_table); }, - else => unreachable, + else => continue, } } @@ -2111,12 +2129,13 @@ fn mergeTypes(wasm: *Wasm) !void { defer dirty.deinit(); for (wasm.resolved_symbols.keys()) |sym_loc| { - if (sym_loc.file == null) { + const file_index = sym_loc.file orelse { // zig code-generated symbols are already present in final type section continue; - } - const object = wasm.objects.items[sym_loc.file.?]; - const symbol = object.symtable[sym_loc.index]; + }; + + const obj_file = wasm.file(@enumFromInt(file_index)).?; + const symbol = obj_file.symbol(sym_loc.index); if (symbol.tag != .function or symbol.isDead()) { // Only functions have types. Only retrieve the type of referenced functions. continue; @@ -2125,12 +2144,12 @@ fn mergeTypes(wasm: *Wasm) !void { if (symbol.isUndefined()) { log.debug("Adding type from extern function '{s}'", .{sym_loc.getName(wasm)}); const import: *types.Import = wasm.imports.getPtr(sym_loc) orelse continue; - const original_type = object.func_types[import.kind.function]; + const original_type = obj_file.funcTypes()[import.kind.function]; import.kind.function = try wasm.putOrGetFuncType(original_type); } else if (!dirty.contains(symbol.index)) { log.debug("Adding type from function '{s}'", .{sym_loc.getName(wasm)}); const func = &wasm.functions.values()[symbol.index - wasm.imported_functions_count].func; - func.type_index = try wasm.putOrGetFuncType(object.func_types[func.type_index]); + func.type_index = try wasm.putOrGetFuncType(obj_file.funcTypes()[func.type_index]); dirty.putAssumeCapacityNoClobber(symbol.index, {}); } } @@ -2240,11 +2259,18 @@ fn setupMemory(wasm: *Wasm) !void { const is_obj = comp.config.output_mode == .Obj; + const stack_ptr = if (wasm.findGlobalSymbol("__stack_pointer")) |loc| index: { + const sym = loc.getSymbol(wasm); + break :index sym.index - wasm.imported_globals_count; + } else null; + if (place_stack_first and !is_obj) { memory_ptr = stack_alignment.forward(memory_ptr); memory_ptr += wasm.base.stack_size; // We always put the stack pointer global at index 0 - wasm.wasm_globals.items[0].init.i32_const = @as(i32, @bitCast(@as(u32, @intCast(memory_ptr)))); + if (stack_ptr) |index| { + wasm.wasm_globals.items[index].init.i32_const = @as(i32, @bitCast(@as(u32, @intCast(memory_ptr)))); + } } var offset: u32 = @as(u32, @intCast(memory_ptr)); @@ -2290,7 +2316,9 @@ fn setupMemory(wasm: *Wasm) !void { if (!place_stack_first and !is_obj) { memory_ptr = stack_alignment.forward(memory_ptr); memory_ptr += wasm.base.stack_size; - wasm.wasm_globals.items[0].init.i32_const = @as(i32, @bitCast(@as(u32, @intCast(memory_ptr)))); + if (stack_ptr) |index| { + wasm.wasm_globals.items[index].init.i32_const = @as(i32, @bitCast(@as(u32, @intCast(memory_ptr)))); + } } // One of the linked object files has a reference to the __heap_base symbol. @@ -2355,17 +2383,17 @@ fn setupMemory(wasm: *Wasm) !void { /// From a given object's index and the index of the segment, returns the corresponding /// index of the segment within the final data section. When the segment does not yet /// exist, a new one will be initialized and appended. The new index will be returned in that case. -pub fn getMatchingSegment(wasm: *Wasm, object_index: u16, symbol_index: u32) !u32 { +pub fn getMatchingSegment(wasm: *Wasm, file_index: File.Index, symbol_index: u32) !u32 { const comp = wasm.base.comp; const gpa = comp.gpa; - const object: Object = wasm.objects.items[object_index]; - const symbol = object.symtable[symbol_index]; + const obj_file = wasm.file(file_index).?; + const symbol = obj_file.symbols()[symbol_index]; const index: u32 = @intCast(wasm.segments.items.len); const shared_memory = comp.config.shared_memory; switch (symbol.tag) { .data => { - const segment_info = object.segment_info[symbol.index]; + const segment_info = obj_file.segmentInfo()[symbol.index]; const merge_segment = comp.config.output_mode != .Obj; const result = try wasm.data_segments.getOrPut(gpa, segment_info.outputName(merge_segment)); if (!result.found_existing) { @@ -2394,7 +2422,7 @@ pub fn getMatchingSegment(wasm: *Wasm, object_index: u16, symbol_index: u32) !u3 break :blk index; }, .section => { - const section_name = object.string_table.get(symbol.name); + const section_name = file.symbolName(symbol.index); if (mem.eql(u8, section_name, ".debug_info")) { return wasm.debug_info_index orelse blk: { wasm.debug_info_index = index; @@ -4291,12 +4319,10 @@ fn markReferences(wasm: *Wasm) !void { // Debug sections may require to be parsed and marked when it contains // relocations to alive symbols. if (sym.tag == .section and comp.config.debug_format != .strip) { - const file = sym_loc.file orelse continue; // Incremental debug info is done independently - const object = &wasm.objects.items[file]; - const atom_index = try Object.parseSymbolIntoAtom(object, file, sym_loc.index, wasm); - const atom = wasm.getAtom(atom_index); - const atom_sym = atom.symbolLoc().getSymbol(wasm); - atom_sym.mark(); + const file_index = sym_loc.file orelse continue; // Incremental debug info is done independently + const obj_file = wasm.file(@enumFromInt(file_index)).?; + _ = try obj_file.parseSymbolIntoAtom(wasm, sym_loc.index); + sym.mark(); } } } @@ -4319,9 +4345,8 @@ fn mark(wasm: *Wasm, loc: SymbolLoc) !void { } const atom_index = if (loc.file) |file_index| idx: { - const object = &wasm.objects.items[file_index]; - const atom_index = try object.parseSymbolIntoAtom(file_index, loc.index, wasm); - break :idx atom_index; + const obj_file = wasm.file(@enumFromInt(file_index)).?; + break :idx try obj_file.parseSymbolIntoAtom(wasm, loc.index); } else wasm.symbol_atom.get(loc) orelse return; const atom = wasm.getAtom(atom_index); diff --git a/src/link/Wasm/Object.zig b/src/link/Wasm/Object.zig index aaa99292bc..de6f0500e8 100644 --- a/src/link/Wasm/Object.zig +++ b/src/link/Wasm/Object.zig @@ -9,6 +9,7 @@ const std = @import("std"); const Wasm = @import("../Wasm.zig"); const Symbol = @import("Symbol.zig"); const Alignment = types.Alignment; +const File = @import("file.zig").File; const Allocator = std.mem.Allocator; const leb = std.leb; @@ -16,12 +17,14 @@ const meta = std.meta; const log = std.log.scoped(.link); +/// Index into the list of relocatable object files within the linker driver. +index: File.Index = .null, /// Wasm spec version used for this `Object` version: u32 = 0, /// The file descriptor that represents the wasm object file. file: ?std.fs.File = null, /// Name (read path) of the object file. -name: []const u8, +path: []const u8, /// Parsed type section func_types: []const std.wasm.Type = &.{}, /// A list of all imports for this module @@ -64,6 +67,12 @@ relocatable_data: std.AutoHashMapUnmanaged(RelocatableData.Tag, []RelocatableDat /// import name, module name and export names. Each string will be deduplicated /// and returns an offset into the table. string_table: Wasm.StringTable = .{}, +/// Amount of functions in the `import` sections. +imported_functions_count: u32 = 0, +/// Amount of globals in the `import` section. +imported_globals_count: u32 = 0, +/// Amount of tables in the `import` section. +imported_tables_count: u32 = 0, /// Represents a single item within a section (depending on its `type`) const RelocatableData = struct { @@ -121,7 +130,7 @@ pub const InitError = error{NotObjectFile} || ParseError || std.fs.File.ReadErro pub fn create(gpa: Allocator, file: std.fs.File, name: []const u8, maybe_max_size: ?usize) InitError!Object { var object: Object = .{ .file = file, - .name = try gpa.dupe(u8, name), + .path = try gpa.dupe(u8, name), }; var is_object_file: bool = false; @@ -199,29 +208,17 @@ pub fn deinit(object: *Object, gpa: Allocator) void { /// Finds the import within the list of imports from a given kind and index of that kind. /// Asserts the import exists -pub fn findImport(object: *const Object, import_kind: std.wasm.ExternalKind, index: u32) types.Import { +pub fn findImport(object: *const Object, index: u32) types.Import { + const sym = object.symtable[index]; var i: u32 = 0; return for (object.imports) |import| { - if (std.meta.activeTag(import.kind) == import_kind) { + if (std.meta.activeTag(import.kind) == sym.tag) { if (i == index) return import; i += 1; } } else unreachable; // Only existing imports are allowed to be found } -/// Counts the entries of imported `kind` and returns the result -pub fn importedCountByKind(object: *const Object, kind: std.wasm.ExternalKind) u32 { - var i: u32 = 0; - return for (object.imports) |imp| { - if (@as(std.wasm.ExternalKind, imp.kind) == kind) i += 1; - } else i; -} - -/// From a given `RelocatableDate`, find the corresponding debug section name -pub fn getDebugName(object: *const Object, relocatable_data: RelocatableData) []const u8 { - return object.string_table.get(relocatable_data.index); -} - /// Checks if the object file is an MVP version. /// When that's the case, we check if there's an import table definiton with its name /// set to '__indirect_function_table". When that's also the case, @@ -427,16 +424,25 @@ fn Parser(comptime ReaderType: type) type { const kind = try readEnum(std.wasm.ExternalKind, reader); const kind_value: std.wasm.Import.Kind = switch (kind) { - .function => .{ .function = try readLeb(u32, reader) }, + .function => val: { + parser.object.imported_functions_count += 1; + break :val .{ .function = try readLeb(u32, reader) }; + }, .memory => .{ .memory = try readLimits(reader) }, - .global => .{ .global = .{ - .valtype = try readEnum(std.wasm.Valtype, reader), - .mutable = (try reader.readByte()) == 0x01, - } }, - .table => .{ .table = .{ - .reftype = try readEnum(std.wasm.RefType, reader), - .limits = try readLimits(reader), - } }, + .global => val: { + parser.object.imported_globals_count += 1; + break :val .{ .global = .{ + .valtype = try readEnum(std.wasm.Valtype, reader), + .mutable = (try reader.readByte()) == 0x01, + } }; + }, + .table => val: { + parser.object.imported_tables_count += 1; + break :val .{ .table = .{ + .reftype = try readEnum(std.wasm.RefType, reader), + .limits = try readLimits(reader), + } }; + }, }; import.* = .{ @@ -904,7 +910,7 @@ fn assertEnd(reader: anytype) !void { } /// Parses an object file into atoms, for code and data sections -pub fn parseSymbolIntoAtom(object: *Object, object_index: u16, symbol_index: u32, wasm: *Wasm) !Atom.Index { +pub fn parseSymbolIntoAtom(object: *Object, wasm: *Wasm, symbol_index: u32) !Atom.Index { const comp = wasm.base.comp; const gpa = comp.gpa; const symbol = &object.symtable[symbol_index]; @@ -922,19 +928,16 @@ pub fn parseSymbolIntoAtom(object: *Object, object_index: u16, symbol_index: u32 }, else => unreachable, }; - const final_index = try wasm.getMatchingSegment(object_index, symbol_index); - const atom_index = @as(Atom.Index, @intCast(wasm.managed_atoms.items.len)); - const atom = try wasm.managed_atoms.addOne(gpa); - atom.* = Atom.empty; + const final_index = try wasm.getMatchingSegment(object.index, symbol_index); + const atom_index = try wasm.createAtom(symbol_index, object.index); try wasm.appendAtomAtIndex(final_index, atom_index); - atom.sym_index = symbol_index; - atom.file = object_index; + const atom = wasm.getAtomPtr(atom_index); atom.size = relocatable_data.size; atom.alignment = relocatable_data.getAlignment(object); atom.code = std.ArrayListUnmanaged(u8).fromOwnedSlice(relocatable_data.data[0..relocatable_data.size]); atom.original_offset = relocatable_data.offset; - try wasm.symbol_atom.putNoClobber(gpa, atom.symbolLoc(), atom_index); + const segment: *Wasm.Segment = &wasm.segments.items[final_index]; if (relocatable_data.type == .data) { //code section and custom sections are 1-byte aligned segment.alignment = segment.alignment.max(atom.alignment); @@ -952,7 +955,7 @@ pub fn parseSymbolIntoAtom(object: *Object, object_index: u16, symbol_index: u32 .R_WASM_TABLE_INDEX_SLEB64, => { try wasm.function_table.put(gpa, .{ - .file = object_index, + .file = object.index, .index = reloc.index, }, 0); }, @@ -963,7 +966,7 @@ pub fn parseSymbolIntoAtom(object: *Object, object_index: u16, symbol_index: u32 if (sym.tag != .global) { try wasm.got_symbols.append( gpa, - .{ .file = object_index, .index = reloc.index }, + .{ .file = object.index, .index = reloc.index }, ); } }, diff --git a/src/link/Wasm/ZigObject.zig b/src/link/Wasm/ZigObject.zig index 81eda1e413..608ea7e201 100644 --- a/src/link/Wasm/ZigObject.zig +++ b/src/link/Wasm/ZigObject.zig @@ -11,6 +11,9 @@ index: File.Index, decls: std.AutoHashMapUnmanaged(InternPool.DeclIndex, Atom.Index) = .{}, /// List of function type signatures for this Zig module. func_types: std.ArrayListUnmanaged(std.wasm.Type) = .{}, +/// List of `std.wasm.Func`. Each entry contains the function signature, +/// rather than the actual body. +functions: std.ArrayListUnmanaged(std.wasm.Func) = .{}, /// Map of symbol locations, represented by its `types.Import`. imports: std.AutoHashMapUnmanaged(u32, types.Import) = .{}, /// List of WebAssembly globals. @@ -1152,6 +1155,39 @@ pub fn storeDeclType(zig_object: *ZigObject, gpa: std.mem.Allocator, decl_index: return index; } +/// The symbols in ZigObject are already represented by an atom as we need to store its data. +/// So rather than creating a new Atom and returning its index, we use this oppertunity to scan +/// its relocations and create any GOT symbols or function table indexes it may require. +pub fn parseSymbolIntoAtom(zig_object: *ZigObject, wasm_file: *Wasm, index: u32) Atom.Index { + const gpa = wasm_file.base.comp.gpa; + const loc: Wasm.SymbolLoc = .{ .file = @intFromEnum(zig_object.index), .index = index }; + const final_index = try wasm_file.getMatchingSegment(zig_object.index, index); + const atom_index = wasm_file.symbol_atom.get(loc).?; + try wasm_file.appendAtomAtIndex(final_index, atom_index); + const atom = wasm_file.getAtom(atom_index); + for (atom.relocs.items) |reloc| { + switch (reloc.relocation_type) { + .R_WASM_TABLE_INDEX_I32, + .R_WASM_TABLE_INDEX_I64, + .R_WASM_TABLE_INDEX_SLEB, + .R_WASM_TABLE_INDEX_SLEB64, + => { + try wasm_file.function_table.put(gpa, loc, 0); + }, + .R_WASM_GLOBAL_INDEX_I32, + .R_WASM_GLOBAL_INDEX_LEB, + => { + const sym = zig_object.symbol(reloc.index); + if (sym.tag != .global) { + try wasm_file.got_symbols.append(gpa, loc); + } + }, + else => {}, + } + } + return atom_index; +} + const build_options = @import("build_options"); const builtin = @import("builtin"); const codegen = @import("../../codegen.zig"); From cbc8d330622c597527397d98b14b6d298b1b981e Mon Sep 17 00:00:00 2001 From: Luuk de Gram Date: Fri, 19 Jan 2024 18:04:06 +0100 Subject: [PATCH 07/21] wasm: fix symbol resolution and atom processing --- src/link/Wasm.zig | 107 ++++++++++++++++++------------------ src/link/Wasm/Atom.zig | 22 +++++--- src/link/Wasm/Object.zig | 29 +++++----- src/link/Wasm/ZigObject.zig | 15 ++--- 4 files changed, 89 insertions(+), 84 deletions(-) diff --git a/src/link/Wasm.zig b/src/link/Wasm.zig index 142365ecb3..3fcd6333b0 100644 --- a/src/link/Wasm.zig +++ b/src/link/Wasm.zig @@ -226,8 +226,8 @@ pub const SymbolLoc = struct { return new_loc.getSymbol(wasm_file); } if (loc.file) |object_index| { - const object = wasm_file.objects.items[object_index]; - return &object.symtable[loc.index]; + const obj_file = wasm_file.file(@enumFromInt(object_index)).?; + return obj_file.symbol(loc.index); } return &wasm_file.synthetic_symbols.items[loc.index]; } @@ -238,8 +238,8 @@ pub const SymbolLoc = struct { return new_loc.getName(wasm_file); } if (loc.file) |object_index| { - const object = wasm_file.objects.items[object_index]; - return object.string_table.get(object.symtable[loc.index].name); + const obj_file = wasm_file.file(@enumFromInt(object_index)).?; + return obj_file.symbolName(loc.index); } return wasm_file.string_table.get(wasm_file.synthetic_symbols.items[loc.index].name); } @@ -581,12 +581,13 @@ pub fn createEmpty( return wasm; } -pub fn file(wasm: *Wasm, index: File.Index) ?File { - const tag = wasm.files.items(.tags)[index]; +pub fn file(wasm: *const Wasm, index: File.Index) ?File { + if (index == .null) return null; + const tag = wasm.files.items(.tags)[@intFromEnum(index)]; return switch (tag) { .null => null, - .zig_object => .{ .zig_object = &wasm.files.items(.data)[index].zig_object }, - .object => .{ .object = &wasm.files.items(.data)[index].object }, + .zig_object => .{ .zig_object = &wasm.files.items(.data)[@intFromEnum(index)].zig_object }, + .object => .{ .object = &wasm.files.items(.data)[@intFromEnum(index)].object }, }; } @@ -678,7 +679,7 @@ pub fn createAtom(wasm: *Wasm, sym_index: u32, file_index: File.Index) !Atom.Ind const gpa = wasm.base.comp.gpa; const index: Atom.Index = @intCast(wasm.managed_atoms.items.len); const atom = try wasm.managed_atoms.addOne(gpa); - atom.* = .{ .file_index = file_index, .sym_index = sym_index }; + atom.* = .{ .file = file_index, .sym_index = sym_index }; try wasm.symbol_atom.putNoClobber(gpa, .{ .file = null, .index = sym_index }, index); return index; @@ -755,18 +756,18 @@ fn requiresTLSReloc(wasm: *const Wasm) bool { return false; } -fn resolveSymbolsInObject(wasm: *Wasm, object_index: u16) !void { +fn resolveSymbolsInObject(wasm: *Wasm, file_index: File.Index) !void { const gpa = wasm.base.comp.gpa; - const object: Object = wasm.objects.items[object_index]; - log.debug("Resolving symbols in object: '{s}'", .{object.name}); + const obj_file = wasm.file(file_index).?; + log.debug("Resolving symbols in object: '{s}'", .{obj_file.path()}); - for (object.symtable, 0..) |symbol, i| { - const sym_index = @as(u32, @intCast(i)); + for (obj_file.symbols(), 0..) |symbol, i| { + const sym_index: u32 = @intCast(i); const location: SymbolLoc = .{ - .file = object_index, + .file = @intFromEnum(file_index), .index = sym_index, }; - const sym_name = object.string_table.get(symbol.name); + const sym_name = obj_file.string(symbol.name); if (mem.eql(u8, sym_name, "__indirect_function_table")) { continue; } @@ -775,7 +776,7 @@ fn resolveSymbolsInObject(wasm: *Wasm, object_index: u16) !void { if (symbol.isLocal()) { if (symbol.isUndefined()) { log.err("Local symbols are not allowed to reference imports", .{}); - log.err(" symbol '{s}' defined in '{s}'", .{ sym_name, object.name }); + log.err(" symbol '{s}' defined in '{s}'", .{ sym_name, obj_file.path() }); return error.UndefinedLocal; } try wasm.resolved_symbols.putNoClobber(gpa, location, {}); @@ -796,9 +797,10 @@ fn resolveSymbolsInObject(wasm: *Wasm, object_index: u16) !void { const existing_loc = maybe_existing.value_ptr.*; const existing_sym: *Symbol = existing_loc.getSymbol(wasm); - const existing_file_path = if (existing_loc.file) |file_index| blk: { - break :blk wasm.objects.items[file_index].name; - } else wasm.name; + const existing_file_path = if (existing_loc.file) |existing_file_index| + wasm.file(@enumFromInt(existing_file_index)).?.path() + else + wasm.name; if (!existing_sym.isUndefined()) outer: { if (!symbol.isUndefined()) inner: { @@ -811,7 +813,7 @@ fn resolveSymbolsInObject(wasm: *Wasm, object_index: u16) !void { // both are defined and weak, we have a symbol collision. log.err("symbol '{s}' defined multiple times", .{sym_name}); log.err(" first definition in '{s}'", .{existing_file_path}); - log.err(" next definition in '{s}'", .{object.name}); + log.err(" next definition in '{s}'", .{obj_file.path()}); return error.SymbolCollision; } @@ -822,24 +824,24 @@ fn resolveSymbolsInObject(wasm: *Wasm, object_index: u16) !void { if (symbol.tag != existing_sym.tag) { log.err("symbol '{s}' mismatching type '{s}", .{ sym_name, @tagName(symbol.tag) }); log.err(" first definition in '{s}'", .{existing_file_path}); - log.err(" next definition in '{s}'", .{object.name}); + log.err(" next definition in '{s}'", .{obj_file.path()}); return error.SymbolMismatchingType; } if (existing_sym.isUndefined() and symbol.isUndefined()) { // only verify module/import name for function symbols if (symbol.tag == .function) { - const existing_name = if (existing_loc.file) |file_index| blk: { - const obj = wasm.objects.items[file_index]; - const name_index = obj.findImport(symbol.tag.externalType(), existing_sym.index).module_name; - break :blk obj.string_table.get(name_index); + const existing_name = if (existing_loc.file) |existing_file_index| blk: { + const existing_obj = wasm.file(@enumFromInt(existing_file_index)).?; + const imp = existing_obj.import(existing_loc.index); + break :blk existing_obj.string(imp.module_name); } else blk: { const name_index = wasm.imports.get(existing_loc).?.module_name; break :blk wasm.string_table.get(name_index); }; - const module_index = object.findImport(symbol.tag.externalType(), symbol.index).module_name; - const module_name = object.string_table.get(module_index); + const imp = obj_file.import(sym_index); + const module_name = obj_file.string(imp.module_name); if (!mem.eql(u8, existing_name, module_name)) { log.err("symbol '{s}' module name mismatch. Expected '{s}', but found '{s}'", .{ sym_name, @@ -847,7 +849,7 @@ fn resolveSymbolsInObject(wasm: *Wasm, object_index: u16) !void { module_name, }); log.err(" first definition in '{s}'", .{existing_file_path}); - log.err(" next definition in '{s}'", .{object.name}); + log.err(" next definition in '{s}'", .{obj_file.path()}); return error.ModuleNameMismatch; } } @@ -863,7 +865,7 @@ fn resolveSymbolsInObject(wasm: *Wasm, object_index: u16) !void { if (existing_ty.mutable != new_ty.mutable or existing_ty.valtype != new_ty.valtype) { log.err("symbol '{s}' mismatching global types", .{sym_name}); log.err(" first definition in '{s}'", .{existing_file_path}); - log.err(" next definition in '{s}'", .{object.name}); + log.err(" next definition in '{s}'", .{obj_file.path()}); return error.GlobalTypeMismatch; } } @@ -875,7 +877,7 @@ fn resolveSymbolsInObject(wasm: *Wasm, object_index: u16) !void { log.err("symbol '{s}' mismatching function signatures.", .{sym_name}); log.err(" expected signature {}, but found signature {}", .{ existing_ty, new_ty }); log.err(" first definition in '{s}'", .{existing_file_path}); - log.err(" next definition in '{s}'", .{object.name}); + log.err(" next definition in '{s}'", .{obj_file.path()}); return error.FunctionSignatureMismatch; } } @@ -891,7 +893,7 @@ fn resolveSymbolsInObject(wasm: *Wasm, object_index: u16) !void { // simply overwrite with the new symbol log.debug("Overwriting symbol '{s}'", .{sym_name}); log.debug(" old definition in '{s}'", .{existing_file_path}); - log.debug(" new definition in '{s}'", .{object.name}); + log.debug(" new definition in '{s}'", .{obj_file.path()}); try wasm.discarded.putNoClobber(gpa, existing_loc, location); maybe_existing.value_ptr.* = location; try wasm.globals.put(gpa, sym_name_index, location); @@ -1190,7 +1192,7 @@ fn validateFeatures( // extract all the used, disallowed and required features from each // linked object file so we can test them. for (wasm.objects.items) |file_index| { - const object: Object = wasm.files.items(.data)[file_index].object; + const object: Object = wasm.files.items(.data)[@intFromEnum(file_index)].object; for (object.features) |feature| { const value = @as(u16, @intFromEnum(file_index)) << 1 | @as(u1, 1); switch (feature.prefix) { @@ -1260,7 +1262,7 @@ fn validateFeatures( // For each linked object, validate the required and disallowed features for (wasm.objects.items) |file_index| { var object_used_features = [_]bool{false} ** known_features_count; - const object = wasm.files.items(.data)[file_index].object; + const object = wasm.files.items(.data)[@intFromEnum(file_index)].object; for (object.features) |feature| { if (feature.prefix == .disallowed) continue; // already defined in 'disallowed' set. // from here a feature is always used @@ -1362,7 +1364,7 @@ fn checkUndefinedSymbols(wasm: *const Wasm) !void { if (symbol.tag == .data) { found_undefined_symbols = true; const file_name = if (undef.file) |file_index| - wasm.file(file_index).?.path() + wasm.file(@enumFromInt(file_index)).?.path() else wasm.name; const symbol_name = undef.getName(wasm); @@ -1386,7 +1388,7 @@ pub fn deinit(wasm: *Wasm) void { gpa.free(segment_info.name); } if (wasm.zigObjectPtr()) |zig_obj| { - zig_obj.deinit(gpa); + zig_obj.deinit(wasm); } for (wasm.objects.items) |obj_index| { wasm.file(obj_index).?.object.deinit(gpa); @@ -1623,8 +1625,8 @@ fn allocateAtoms(wasm: *Wasm) !void { // Ensure we get the original symbol, so we verify the correct symbol on whether // it is dead or not and ensure an atom is removed when dead. // This is required as we may have parsed aliases into atoms. - const sym = if (symbol_loc.file) |object_index| - wasm.file(object_index).?.symbol(symbol_loc.index).* + const sym = if (symbol_loc.file) |file_index| + wasm.file(@enumFromInt(file_index)).?.symbol(symbol_loc.index).* else wasm.synthetic_symbols.items[symbol_loc.index]; @@ -1672,8 +1674,8 @@ fn allocateVirtualAddresses(wasm: *Wasm) void { const atom = wasm.getAtom(atom_index); const merge_segment = wasm.base.comp.config.output_mode != .Obj; - const segment_info = if (atom.file) |object_index| - wasm.file(object_index).?.segmentInfo() + const segment_info = if (atom.file != .null) + wasm.file(atom.file).?.segmentInfo() else wasm.segment_info.values(); const segment_name = segment_info[symbol.index].outputName(merge_segment); @@ -1731,16 +1733,17 @@ fn sortDataSegments(wasm: *Wasm) !void { /// contain any parameters. fn setupInitFunctions(wasm: *Wasm) !void { const gpa = wasm.base.comp.gpa; + // There's no constructors for Zig so we can simply search through linked object files only. for (wasm.objects.items) |file_index| { - const object = wasm.files.items(.data)[file_index].object; + const object: Object = wasm.files.items(.data)[@intFromEnum(file_index)].object; try wasm.init_funcs.ensureUnusedCapacity(gpa, object.init_funcs.len); for (object.init_funcs) |init_func| { const symbol = object.symtable[init_func.symbol_index]; const ty: std.wasm.Type = if (symbol.isUndefined()) ty: { - const imp: types.Import = object.findImport(.function, symbol.index); + const imp: types.Import = object.findImport(symbol); break :ty object.func_types[imp.kind.function]; } else ty: { - const func_index = symbol.index - object.importedCountByKind(.function); + const func_index = symbol.index - object.imported_functions_count; const func = object.functions[func_index]; break :ty object.func_types[func.type_index]; }; @@ -1751,10 +1754,10 @@ fn setupInitFunctions(wasm: *Wasm) !void { log.debug("appended init func '{s}'\n", .{object.string_table.get(symbol.name)}); wasm.init_funcs.appendAssumeCapacity(.{ .index = init_func.symbol_index, - .file = @as(u16, @intCast(file_index)), + .file = @intFromEnum(file_index), .priority = init_func.priority, }); - try wasm.mark(.{ .index = init_func.symbol_index, .file = @intCast(file_index) }); + try wasm.mark(.{ .index = init_func.symbol_index, .file = @intFromEnum(file_index) }); } } @@ -1993,7 +1996,7 @@ fn setupImports(wasm: *Wasm) !void { } log.debug("Symbol '{s}' will be imported from the host", .{symbol_loc.getName(wasm)}); - const obj_file = wasm.file(file_index).?; + const obj_file = wasm.file(@enumFromInt(file_index)).?; const import = obj_file.import(symbol_loc.index); // We copy the import to a new import to ensure the names contain references @@ -2058,7 +2061,7 @@ fn mergeSections(wasm: *Wasm) !void { }; const obj_file = wasm.file(@enumFromInt(file_index)).?; - const symbol = obj_file.symbol[sym_loc.index]; + const symbol = obj_file.symbol(sym_loc.index); if (symbol.isDead() or symbol.isUndefined()) { // Skip undefined symbols as they go in the `import` section @@ -2422,7 +2425,7 @@ pub fn getMatchingSegment(wasm: *Wasm, file_index: File.Index, symbol_index: u32 break :blk index; }, .section => { - const section_name = file.symbolName(symbol.index); + const section_name = obj_file.symbolName(symbol.index); if (mem.eql(u8, section_name, ".debug_info")) { return wasm.debug_info_index orelse blk: { wasm.debug_info_index = index; @@ -2705,8 +2708,8 @@ fn linkWithZld(wasm: *Wasm, arena: Allocator, prog_node: *std.Progress.Node) lin try wasm.parseInputFiles(positionals.items); - for (wasm.objects.items, 0..) |_, object_index| { - try wasm.resolveSymbolsInObject(@as(u16, @intCast(object_index))); + for (wasm.objects.items) |object_index| { + try wasm.resolveSymbolsInObject(object_index); } var emit_features_count: u32 = 0; @@ -2788,8 +2791,8 @@ pub fn flushModule(wasm: *Wasm, arena: Allocator, prog_node: *std.Progress.Node) try wasm.parseInputFiles(positionals.items); - for (wasm.objects.items, 0..) |_, object_index| { - try wasm.resolveSymbolsInObject(@as(u16, @intCast(object_index))); + for (wasm.objects.items) |object_index| { + try wasm.resolveSymbolsInObject(object_index); } var emit_features_count: u32 = 0; diff --git a/src/link/Wasm/Atom.zig b/src/link/Wasm/Atom.zig index fb3d1a5724..ad2b811c0b 100644 --- a/src/link/Wasm/Atom.zig +++ b/src/link/Wasm/Atom.zig @@ -59,7 +59,10 @@ pub fn format(atom: Atom, comptime fmt: []const u8, options: std.fmt.FormatOptio /// Returns the location of the symbol that represents this `Atom` pub fn symbolLoc(atom: Atom) Wasm.SymbolLoc { - return .{ .file = atom.file, .index = atom.sym_index }; + if (atom.file == .null) { + return .{ .file = null, .index = atom.sym_index }; + } + return .{ .file = @intFromEnum(atom.file), .index = atom.sym_index }; } pub fn getSymbolIndex(atom: Atom) ?u32 { @@ -80,7 +83,7 @@ pub fn resolveRelocs(atom: *Atom, wasm_bin: *const Wasm) void { for (atom.relocs.items) |reloc| { const value = atom.relocationValue(reloc, wasm_bin); log.debug("Relocating '{s}' referenced in '{s}' offset=0x{x:0>8} value={d}", .{ - (Wasm.SymbolLoc{ .file = atom.file, .index = reloc.index }).getName(wasm_bin), + (Wasm.SymbolLoc{ .file = @intFromEnum(atom.file), .index = reloc.index }).getName(wasm_bin), symbol_name, reloc.offset, value, @@ -119,7 +122,11 @@ pub fn resolveRelocs(atom: *Atom, wasm_bin: *const Wasm) void { /// All values will be represented as a `u64` as all values can fit within it. /// The final value must be casted to the correct size. fn relocationValue(atom: Atom, relocation: types.Relocation, wasm_bin: *const Wasm) u64 { - const target_loc = (Wasm.SymbolLoc{ .file = atom.file, .index = relocation.index }).finalLoc(wasm_bin); + const target_loc = if (atom.file == .null) + (Wasm.SymbolLoc{ .file = null, .index = relocation.index }).finalLoc(wasm_bin) + else + (Wasm.SymbolLoc{ .file = @intFromEnum(atom.file), .index = relocation.index }).finalLoc(wasm_bin); + const symbol = target_loc.getSymbol(wasm_bin); if (relocation.relocation_type != .R_WASM_TYPE_INDEX_LEB and symbol.tag != .section and @@ -135,13 +142,10 @@ fn relocationValue(atom: Atom, relocation: types.Relocation, wasm_bin: *const Wa .R_WASM_TABLE_INDEX_I64, .R_WASM_TABLE_INDEX_SLEB, .R_WASM_TABLE_INDEX_SLEB64, - => return wasm_bin.function_table.get(.{ .file = atom.file, .index = relocation.index }) orelse 0, + => return wasm_bin.function_table.get(.{ .file = @intFromEnum(atom.file), .index = relocation.index }) orelse 0, .R_WASM_TYPE_INDEX_LEB => { - const file_index = atom.file orelse { - return relocation.index; - }; - - const original_type = wasm_bin.objects.items[file_index].func_types[relocation.index]; + const obj_file = wasm_bin.file(atom.file) orelse return relocation.index; + const original_type = obj_file.funcTypes()[relocation.index]; return wasm_bin.getTypeIndex(original_type).?; }, .R_WASM_GLOBAL_INDEX_I32, diff --git a/src/link/Wasm/Object.zig b/src/link/Wasm/Object.zig index de6f0500e8..f22007366f 100644 --- a/src/link/Wasm/Object.zig +++ b/src/link/Wasm/Object.zig @@ -135,7 +135,7 @@ pub fn create(gpa: Allocator, file: std.fs.File, name: []const u8, maybe_max_siz var is_object_file: bool = false; const size = maybe_max_size orelse size: { - errdefer gpa.free(object.name); + errdefer gpa.free(object.path); const stat = try file.stat(); break :size @as(usize, @intCast(stat.size)); }; @@ -202,18 +202,17 @@ pub fn deinit(object: *Object, gpa: Allocator) void { } object.relocatable_data.deinit(gpa); object.string_table.deinit(gpa); - gpa.free(object.name); + gpa.free(object.path); object.* = undefined; } /// Finds the import within the list of imports from a given kind and index of that kind. /// Asserts the import exists -pub fn findImport(object: *const Object, index: u32) types.Import { - const sym = object.symtable[index]; +pub fn findImport(object: *const Object, sym: Symbol) types.Import { var i: u32 = 0; return for (object.imports) |import| { - if (std.meta.activeTag(import.kind) == sym.tag) { - if (i == index) return import; + if (std.meta.activeTag(import.kind) == sym.tag.externalType()) { + if (i == sym.index) return import; i += 1; } } else unreachable; // Only existing imports are allowed to be found @@ -231,14 +230,12 @@ fn checkLegacyIndirectFunctionTable(object: *Object) !?Symbol { if (sym.tag == .table) table_count += 1; } - const import_table_count = object.importedCountByKind(.table); - // For each import table, we also have a symbol so this is not a legacy object file - if (import_table_count == table_count) return null; + if (object.imported_tables_count == table_count) return null; if (table_count != 0) { log.err("Expected a table entry symbol for each of the {d} table(s), but instead got {d} symbols.", .{ - import_table_count, + object.imported_tables_count, table_count, }); return error.MissingTableSymbols; @@ -250,7 +247,7 @@ fn checkLegacyIndirectFunctionTable(object: *Object) !?Symbol { return error.UnexpectedTable; } - if (import_table_count != 1) { + if (object.imported_tables_count != 1) { log.err("Found more than one table import, but no representing table symbols", .{}); return error.MissingTableSymbols; } @@ -519,7 +516,7 @@ fn Parser(comptime ReaderType: type) type { const start = reader.context.bytes_left; var index: u32 = 0; const count = try readLeb(u32, reader); - const imported_function_count = parser.object.importedCountByKind(.function); + const imported_function_count = parser.object.imported_functions_count; var relocatable_data = try std.ArrayList(RelocatableData).initCapacity(gpa, count); defer relocatable_data.deinit(); while (index < count) : (index += 1) { @@ -836,7 +833,7 @@ fn Parser(comptime ReaderType: type) type { defer gpa.free(name); try reader.readNoEof(name); break :name try parser.object.string_table.put(gpa, name); - } else parser.object.findImport(symbol.tag.externalType(), symbol.index).name; + } else parser.object.findImport(symbol).name; }, } return symbol; @@ -915,7 +912,7 @@ pub fn parseSymbolIntoAtom(object: *Object, wasm: *Wasm, symbol_index: u32) !Ato const gpa = comp.gpa; const symbol = &object.symtable[symbol_index]; const relocatable_data: RelocatableData = switch (symbol.tag) { - .function => object.relocatable_data.get(.code).?[symbol.index - object.importedCountByKind(.function)], + .function => object.relocatable_data.get(.code).?[symbol.index - object.imported_functions_count], .data => object.relocatable_data.get(.data).?[symbol.index], .section => blk: { const data = object.relocatable_data.get(.custom).?; @@ -955,7 +952,7 @@ pub fn parseSymbolIntoAtom(object: *Object, wasm: *Wasm, symbol_index: u32) !Ato .R_WASM_TABLE_INDEX_SLEB64, => { try wasm.function_table.put(gpa, .{ - .file = object.index, + .file = @intFromEnum(object.index), .index = reloc.index, }, 0); }, @@ -966,7 +963,7 @@ pub fn parseSymbolIntoAtom(object: *Object, wasm: *Wasm, symbol_index: u32) !Ato if (sym.tag != .global) { try wasm.got_symbols.append( gpa, - .{ .file = object.index, .index = reloc.index }, + .{ .file = @intFromEnum(object.index), .index = reloc.index }, ); } }, diff --git a/src/link/Wasm/ZigObject.zig b/src/link/Wasm/ZigObject.zig index 608ea7e201..4e15edecf6 100644 --- a/src/link/Wasm/ZigObject.zig +++ b/src/link/Wasm/ZigObject.zig @@ -110,8 +110,9 @@ fn symbol(zig_object: *const ZigObject, index: u32) *Symbol { /// Frees and invalidates all memory of the incrementally compiled Zig module. /// It is illegal behavior to access the `ZigObject` after calling `deinit`. -pub fn deinit(zig_object: *ZigObject, gpa: std.mem.Allocator) void { - for (zig_object.segment_info.values()) |segment_info| { +pub fn deinit(zig_object: *ZigObject, wasm_file: *Wasm) void { + const gpa = wasm_file.base.comp.gpa; + for (zig_object.segment_info.items) |segment_info| { gpa.free(segment_info.name); } @@ -121,9 +122,9 @@ pub fn deinit(zig_object: *ZigObject, gpa: std.mem.Allocator) void { { var it = zig_object.decls.valueIterator(); while (it.next()) |atom_index_ptr| { - const atom = zig_object.getAtomPtr(atom_index_ptr.*); + const atom = wasm_file.getAtomPtr(atom_index_ptr.*); for (atom.locals.items) |local_index| { - const local_atom = zig_object.getAtomPtr(local_index); + const local_atom = wasm_file.getAtomPtr(local_index); local_atom.deinit(gpa); } atom.deinit(gpa); @@ -131,9 +132,9 @@ pub fn deinit(zig_object: *ZigObject, gpa: std.mem.Allocator) void { } { for (zig_object.anon_decls.values()) |atom_index| { - const atom = zig_object.getAtomPtr(atom_index); + const atom = wasm_file.getAtomPtr(atom_index); for (atom.locals.items) |local_index| { - const local_atom = zig_object.getAtomPtr(local_index); + const local_atom = wasm_file.getAtomPtr(local_index); local_atom.deinit(gpa); } atom.deinit(gpa); @@ -1158,7 +1159,7 @@ pub fn storeDeclType(zig_object: *ZigObject, gpa: std.mem.Allocator, decl_index: /// The symbols in ZigObject are already represented by an atom as we need to store its data. /// So rather than creating a new Atom and returning its index, we use this oppertunity to scan /// its relocations and create any GOT symbols or function table indexes it may require. -pub fn parseSymbolIntoAtom(zig_object: *ZigObject, wasm_file: *Wasm, index: u32) Atom.Index { +pub fn parseSymbolIntoAtom(zig_object: *ZigObject, wasm_file: *Wasm, index: u32) !Atom.Index { const gpa = wasm_file.base.comp.gpa; const loc: Wasm.SymbolLoc = .{ .file = @intFromEnum(zig_object.index), .index = index }; const final_index = try wasm_file.getMatchingSegment(zig_object.index, index); From 94f3a18c88eaee9a36a08a1b00c9df0584a01b05 Mon Sep 17 00:00:00 2001 From: Luuk de Gram Date: Fri, 19 Jan 2024 18:04:40 +0100 Subject: [PATCH 08/21] wasm: Add `File` abstraction --- src/link/Wasm/file.zig | 127 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 127 insertions(+) create mode 100644 src/link/Wasm/file.zig diff --git a/src/link/Wasm/file.zig b/src/link/Wasm/file.zig new file mode 100644 index 0000000000..e9190bd8ca --- /dev/null +++ b/src/link/Wasm/file.zig @@ -0,0 +1,127 @@ +pub const File = union(enum) { + zig_object: *ZigObject, + object: *Object, + + pub const Index = enum(u16) { + null = std.math.maxInt(u16), + _, + }; + + pub fn path(file: File) []const u8 { + return switch (file) { + inline else => |obj| obj.path, + }; + } + + pub fn segmentInfo(file: File) []const types.Segment { + return switch (file) { + .zig_object => |obj| obj.segment_info.items, + .object => |obj| obj.segment_info, + }; + } + + pub fn symbol(file: File, index: u32) *Symbol { + return switch (file) { + .zig_object => |obj| &obj.symbols.items[index], + .object => |obj| &obj.symtable[index], + }; + } + + pub fn symbols(file: File) []const Symbol { + return switch (file) { + .zig_object => |obj| obj.symbols.items, + .object => |obj| obj.symtable, + }; + } + + pub fn symbolName(file: File, index: u32) []const u8 { + switch (file) { + .zig_object => |obj| { + const sym = obj.symbols.items[index]; + return obj.string_table.get(sym.name).?; + }, + .object => |obj| { + const sym = obj.symtable[index]; + return obj.string_table.get(sym.name); + }, + } + } + + pub fn parseSymbolIntoAtom(file: File, wasm_file: *Wasm, index: u32) !AtomIndex { + return switch (file) { + inline else => |obj| obj.parseSymbolIntoAtom(wasm_file, index), + }; + } + + /// For a given symbol index, find its corresponding import. + /// Asserts import exists. + pub fn import(file: File, symbol_index: u32) types.Import { + return switch (file) { + .zig_object => |obj| obj.imports.get(symbol_index).?, + .object => |obj| obj.findImport(obj.symtable[symbol_index]), + }; + } + + /// For a given offset, returns its string value. + /// Asserts string exists in the object string table. + pub fn string(file: File, offset: u32) []const u8 { + return switch (file) { + .zig_object => |obj| obj.string_table.get(offset).?, + .object => |obj| obj.string_table.get(offset), + }; + } + + pub fn importedGlobals(file: File) u32 { + return switch (file) { + inline else => |obj| obj.imported_globals_count, + }; + } + + pub fn importedFunctions(file: File) u32 { + return switch (file) { + inline else => |obj| obj.imported_functions_count, + }; + } + + pub fn importedTables(file: File) u32 { + return switch (file) { + inline else => |obj| obj.imported_tables_count, + }; + } + + pub fn functions(file: File) []const std.wasm.Func { + return switch (file) { + .zig_object => |obj| obj.functions.items, + .object => |obj| obj.functions, + }; + } + + pub fn globals(file: File) []const std.wasm.Global { + return switch (file) { + .zig_object => |obj| obj.globals.items, + .object => |obj| obj.globals, + }; + } + + pub fn funcTypes(file: File) []const std.wasm.Type { + return switch (file) { + .zig_object => |obj| obj.func_types.items, + .object => |obj| obj.func_types, + }; + } + + pub const Entry = union(enum) { + null: void, + zig_object: ZigObject, + object: Object, + }; +}; + +const std = @import("std"); +const types = @import("types.zig"); + +const AtomIndex = @import("Atom.zig").Index; +const Object = @import("Object.zig"); +const Symbol = @import("Symbol.zig"); +const Wasm = @import("../Wasm.zig"); +const ZigObject = @import("ZigObject.zig"); From 0a030d6598a42eae6f6af829e03bba053336b51c Mon Sep 17 00:00:00 2001 From: Luuk de Gram Date: Sun, 21 Jan 2024 12:06:33 +0100 Subject: [PATCH 09/21] wasm: Use `File.Index` for symbol locations Rather than using the optional, we now directly use `File.Index` which can already represent an unknown file due to its `.null` value. This means we do not pay for the memory cost. This type of index is now used for: - SymbolLoc - Key of the functions map - InitFunc Now we can simply pass things like atom.file, object.file, loc.file etc whenever we need to access its representing object file which makes it a lot easier. --- src/link/Wasm.zig | 101 ++++++++++++++---------------------- src/link/Wasm/Atom.zig | 15 ++---- src/link/Wasm/Object.zig | 7 +-- src/link/Wasm/ZigObject.zig | 10 ++-- 4 files changed, 49 insertions(+), 84 deletions(-) diff --git a/src/link/Wasm.zig b/src/link/Wasm.zig index 3fcd6333b0..a81c7e7629 100644 --- a/src/link/Wasm.zig +++ b/src/link/Wasm.zig @@ -125,7 +125,10 @@ func_types: std.ArrayListUnmanaged(std.wasm.Type) = .{}, /// Output function section where the key is the original /// function index and the value is function. /// This allows us to map multiple symbols to the same function. -functions: std.AutoArrayHashMapUnmanaged(struct { file: ?u16, index: u32 }, struct { func: std.wasm.Func, sym_index: u32 }) = .{}, +functions: std.AutoArrayHashMapUnmanaged( + struct { file: File.Index, index: u32 }, + struct { func: std.wasm.Func, sym_index: u32 }, +) = .{}, /// Output global section wasm_globals: std.ArrayListUnmanaged(std.wasm.Global) = .{}, /// Memory section @@ -217,16 +220,14 @@ pub const SymbolLoc = struct { /// The index of the symbol within the specified file index: u32, /// The index of the object file where the symbol resides. - /// When this is `null` the symbol comes from a non-object file. - file: ?u16, + file: File.Index, /// From a given location, returns the corresponding symbol in the wasm binary pub fn getSymbol(loc: SymbolLoc, wasm_file: *const Wasm) *Symbol { if (wasm_file.discarded.get(loc)) |new_loc| { return new_loc.getSymbol(wasm_file); } - if (loc.file) |object_index| { - const obj_file = wasm_file.file(@enumFromInt(object_index)).?; + if (wasm_file.file(loc.file)) |obj_file| { return obj_file.symbol(loc.index); } return &wasm_file.synthetic_symbols.items[loc.index]; @@ -237,8 +238,7 @@ pub const SymbolLoc = struct { if (wasm_file.discarded.get(loc)) |new_loc| { return new_loc.getName(wasm_file); } - if (loc.file) |object_index| { - const obj_file = wasm_file.file(@enumFromInt(object_index)).?; + if (wasm_file.file(loc.file)) |obj_file| { return obj_file.symbolName(loc.index); } return wasm_file.string_table.get(wasm_file.synthetic_symbols.items[loc.index].name); @@ -263,7 +263,7 @@ pub const InitFuncLoc = struct { /// object file index in the list of objects. /// Unlike `SymbolLoc` this cannot be `null` as we never define /// our own ctors. - file: u16, + file: File.Index, /// Symbol index within the corresponding object file. index: u32, /// The priority in which the constructor must be called. @@ -633,7 +633,7 @@ fn createSyntheticSymbol(wasm: *Wasm, name: []const u8, tag: Symbol.Tag) !Symbol fn createSyntheticSymbolOffset(wasm: *Wasm, name_offset: u32, tag: Symbol.Tag) !SymbolLoc { const sym_index = @as(u32, @intCast(wasm.synthetic_symbols.items.len)); - const loc: SymbolLoc = .{ .index = sym_index, .file = null }; + const loc: SymbolLoc = .{ .index = sym_index, .file = .null }; const gpa = wasm.base.comp.gpa; try wasm.synthetic_symbols.append(gpa, .{ .name = name_offset, @@ -680,7 +680,7 @@ pub fn createAtom(wasm: *Wasm, sym_index: u32, file_index: File.Index) !Atom.Ind const index: Atom.Index = @intCast(wasm.managed_atoms.items.len); const atom = try wasm.managed_atoms.addOne(gpa); atom.* = .{ .file = file_index, .sym_index = sym_index }; - try wasm.symbol_atom.putNoClobber(gpa, .{ .file = null, .index = sym_index }, index); + try wasm.symbol_atom.putNoClobber(gpa, atom.symbolLoc(), index); return index; } @@ -763,10 +763,7 @@ fn resolveSymbolsInObject(wasm: *Wasm, file_index: File.Index) !void { for (obj_file.symbols(), 0..) |symbol, i| { const sym_index: u32 = @intCast(i); - const location: SymbolLoc = .{ - .file = @intFromEnum(file_index), - .index = sym_index, - }; + const location: SymbolLoc = .{ .file = file_index, .index = sym_index }; const sym_name = obj_file.string(symbol.name); if (mem.eql(u8, sym_name, "__indirect_function_table")) { continue; @@ -796,9 +793,10 @@ fn resolveSymbolsInObject(wasm: *Wasm, file_index: File.Index) !void { const existing_loc = maybe_existing.value_ptr.*; const existing_sym: *Symbol = existing_loc.getSymbol(wasm); + const existing_file = wasm.file(existing_loc.file); - const existing_file_path = if (existing_loc.file) |existing_file_index| - wasm.file(@enumFromInt(existing_file_index)).?.path() + const existing_file_path = if (existing_file) |existing_obj_file| + existing_obj_file.path() else wasm.name; @@ -831,8 +829,7 @@ fn resolveSymbolsInObject(wasm: *Wasm, file_index: File.Index) !void { if (existing_sym.isUndefined() and symbol.isUndefined()) { // only verify module/import name for function symbols if (symbol.tag == .function) { - const existing_name = if (existing_loc.file) |existing_file_index| blk: { - const existing_obj = wasm.file(@enumFromInt(existing_file_index)).?; + const existing_name = if (existing_file) |existing_obj| blk: { const imp = existing_obj.import(existing_loc.index); break :blk existing_obj.string(imp.module_name); } else blk: { @@ -1363,8 +1360,8 @@ fn checkUndefinedSymbols(wasm: *const Wasm) !void { const symbol = undef.getSymbol(wasm); if (symbol.tag == .data) { found_undefined_symbols = true; - const file_name = if (undef.file) |file_index| - wasm.file(@enumFromInt(file_index)).?.path() + const file_name = if (wasm.file(undef.file)) |obj_file| + obj_file.path() else wasm.name; const symbol_name = undef.getName(wasm); @@ -1461,8 +1458,7 @@ fn getGlobalType(wasm: *const Wasm, loc: SymbolLoc) std.wasm.GlobalType { const symbol = loc.getSymbol(wasm); assert(symbol.tag == .global); const is_undefined = symbol.isUndefined(); - if (loc.file) |file_index| { - const obj_file = wasm.file(@enumFromInt(file_index)).?; + if (wasm.file(loc.file)) |obj_file| { if (is_undefined) { return obj_file.import(loc.index).kind.global; } @@ -1480,8 +1476,7 @@ fn getFunctionSignature(wasm: *const Wasm, loc: SymbolLoc) std.wasm.Type { const symbol = loc.getSymbol(wasm); assert(symbol.tag == .function); const is_undefined = symbol.isUndefined(); - if (loc.file) |file_index| { - const obj_file = wasm.file(@enumFromInt(file_index)).?; + if (wasm.file(loc.file)) |obj_file| { if (is_undefined) { const ty_index = obj_file.import(loc.index).kind.function; return obj_file.funcTypes()[ty_index]; @@ -1625,8 +1620,8 @@ fn allocateAtoms(wasm: *Wasm) !void { // Ensure we get the original symbol, so we verify the correct symbol on whether // it is dead or not and ensure an atom is removed when dead. // This is required as we may have parsed aliases into atoms. - const sym = if (symbol_loc.file) |file_index| - wasm.file(@enumFromInt(file_index)).?.symbol(symbol_loc.index).* + const sym = if (wasm.file(symbol_loc.file)) |obj_file| + obj_file.symbol(symbol_loc.index).* else wasm.synthetic_symbols.items[symbol_loc.index]; @@ -1754,10 +1749,10 @@ fn setupInitFunctions(wasm: *Wasm) !void { log.debug("appended init func '{s}'\n", .{object.string_table.get(symbol.name)}); wasm.init_funcs.appendAssumeCapacity(.{ .index = init_func.symbol_index, - .file = @intFromEnum(file_index), + .file = file_index, .priority = init_func.priority, }); - try wasm.mark(.{ .index = init_func.symbol_index, .file = @intFromEnum(file_index) }); + try wasm.mark(.{ .index = init_func.symbol_index, .file = file_index }); } } @@ -1841,7 +1836,7 @@ fn createSyntheticFunction( const func_index = wasm.imported_functions_count + @as(u32, @intCast(wasm.functions.count())); try wasm.functions.putNoClobber( gpa, - .{ .file = null, .index = func_index }, + .{ .file = .null, .index = func_index }, .{ .func = .{ .type_index = ty_index }, .sym_index = loc.index }, ); symbol.index = func_index; @@ -1849,8 +1844,8 @@ fn createSyntheticFunction( // create the atom that will be output into the final binary const atom_index = try wasm.createAtom(loc.index, .null); const atom = wasm.getAtomPtr(atom_index); - atom.code = function_body.moveToUnmanaged(); atom.size = @intCast(function_body.items.len); + atom.code = function_body.moveToUnmanaged(); try wasm.appendAtomAtIndex(wasm.code_section_index.?, atom_index); } @@ -1969,20 +1964,8 @@ fn initializeTLSFunction(wasm: *Wasm) !void { fn setupImports(wasm: *Wasm) !void { const gpa = wasm.base.comp.gpa; log.debug("Merging imports", .{}); - var discarded_it = wasm.discarded.keyIterator(); - while (discarded_it.next()) |discarded| { - if (discarded.file == null) { - // remove an import if it was resolved - if (wasm.imports.remove(discarded.*)) { - log.debug("Removed symbol '{s}' as an import", .{ - discarded.getName(wasm), - }); - } - } - } - for (wasm.resolved_symbols.keys()) |symbol_loc| { - const file_index = symbol_loc.file orelse { + const obj_file = wasm.file(symbol_loc.file) orelse { // Synthetic symbols will already exist in the `import` section continue; }; @@ -1996,7 +1979,6 @@ fn setupImports(wasm: *Wasm) !void { } log.debug("Symbol '{s}' will be imported from the host", .{symbol_loc.getName(wasm)}); - const obj_file = wasm.file(@enumFromInt(file_index)).?; const import = obj_file.import(symbol_loc.index); // We copy the import to a new import to ensure the names contain references @@ -2054,15 +2036,13 @@ fn mergeSections(wasm: *Wasm) !void { defer removed_duplicates.deinit(); for (wasm.resolved_symbols.keys()) |sym_loc| { - const file_index = sym_loc.file orelse { + const obj_file = wasm.file(sym_loc.file) orelse { // Zig code-generated symbols are already within the sections and do not // require to be merged continue; }; - const obj_file = wasm.file(@enumFromInt(file_index)).?; const symbol = obj_file.symbol(sym_loc.index); - if (symbol.isDead() or symbol.isUndefined()) { // Skip undefined symbols as they go in the `import` section continue; @@ -2105,7 +2085,7 @@ fn mergeSections(wasm: *Wasm) !void { symbol.index = @as(u32, @intCast(wasm.tables.items.len)) + wasm.imported_tables_count; try wasm.tables.append(gpa, original_table); }, - else => continue, + else => {}, } } @@ -2132,12 +2112,11 @@ fn mergeTypes(wasm: *Wasm) !void { defer dirty.deinit(); for (wasm.resolved_symbols.keys()) |sym_loc| { - const file_index = sym_loc.file orelse { + const obj_file = wasm.file(sym_loc.file) orelse { // zig code-generated symbols are already present in final type section continue; }; - const obj_file = wasm.file(@enumFromInt(file_index)).?; const symbol = obj_file.symbol(sym_loc.index); if (symbol.tag != .function or symbol.isDead()) { // Only functions have types. Only retrieve the type of referenced functions. @@ -2191,7 +2170,7 @@ fn setupExports(wasm: *Wasm) !void { const sym_name = sym_loc.getName(wasm); const export_name = if (wasm.export_names.get(sym_loc)) |name| name else blk: { - if (sym_loc.file == null) break :blk symbol.name; + if (sym_loc.file == .null) break :blk symbol.name; break :blk try wasm.string_table.put(gpa, sym_name); }; const exp: types.Export = if (symbol.tag == .data) exp: { @@ -2425,7 +2404,7 @@ pub fn getMatchingSegment(wasm: *Wasm, file_index: File.Index, symbol_index: u32 break :blk index; }, .section => { - const section_name = obj_file.symbolName(symbol.index); + const section_name = obj_file.symbolName(symbol_index); if (mem.eql(u8, section_name, ".debug_info")) { return wasm.debug_info_index orelse blk: { wasm.debug_info_index = index; @@ -2475,7 +2454,7 @@ pub fn getMatchingSegment(wasm: *Wasm, file_index: File.Index, symbol_index: u32 break :blk index; }; } else { - log.warn("found unknown section '{s}'", .{section_name}); + log.err("found unknown section '{s}'", .{section_name}); return error.UnexpectedValue; } }, @@ -4221,10 +4200,7 @@ fn emitDataRelocations( size_offset += getULEB128Size(atom.size); for (atom.relocs.items) |relocation| { count += 1; - const sym_loc: SymbolLoc = .{ - .file = atom.file, - .index = relocation.index, - }; + const sym_loc: SymbolLoc = .{ .file = atom.file, .index = relocation.index }; const symbol_index = symbol_table.get(sym_loc).?; try leb.writeULEB128(writer, @intFromEnum(relocation.relocation_type)); const offset = atom.offset + relocation.offset + size_offset; @@ -4322,8 +4298,7 @@ fn markReferences(wasm: *Wasm) !void { // Debug sections may require to be parsed and marked when it contains // relocations to alive symbols. if (sym.tag == .section and comp.config.debug_format != .strip) { - const file_index = sym_loc.file orelse continue; // Incremental debug info is done independently - const obj_file = wasm.file(@enumFromInt(file_index)).?; + const obj_file = wasm.file(sym_loc.file) orelse continue; // Incremental debug info is done independently _ = try obj_file.parseSymbolIntoAtom(wasm, sym_loc.index); sym.mark(); } @@ -4347,10 +4322,10 @@ fn mark(wasm: *Wasm, loc: SymbolLoc) !void { return; } - const atom_index = if (loc.file) |file_index| idx: { - const obj_file = wasm.file(@enumFromInt(file_index)).?; - break :idx try obj_file.parseSymbolIntoAtom(wasm, loc.index); - } else wasm.symbol_atom.get(loc) orelse return; + const atom_index = if (wasm.file(loc.file)) |obj_file| + try obj_file.parseSymbolIntoAtom(wasm, loc.index) + else + wasm.symbol_atom.get(loc) orelse return; const atom = wasm.getAtom(atom_index); for (atom.relocs.items) |reloc| { diff --git a/src/link/Wasm/Atom.zig b/src/link/Wasm/Atom.zig index ad2b811c0b..ade66b687f 100644 --- a/src/link/Wasm/Atom.zig +++ b/src/link/Wasm/Atom.zig @@ -59,10 +59,7 @@ pub fn format(atom: Atom, comptime fmt: []const u8, options: std.fmt.FormatOptio /// Returns the location of the symbol that represents this `Atom` pub fn symbolLoc(atom: Atom) Wasm.SymbolLoc { - if (atom.file == .null) { - return .{ .file = null, .index = atom.sym_index }; - } - return .{ .file = @intFromEnum(atom.file), .index = atom.sym_index }; + return .{ .file = atom.file, .index = atom.sym_index }; } pub fn getSymbolIndex(atom: Atom) ?u32 { @@ -83,7 +80,7 @@ pub fn resolveRelocs(atom: *Atom, wasm_bin: *const Wasm) void { for (atom.relocs.items) |reloc| { const value = atom.relocationValue(reloc, wasm_bin); log.debug("Relocating '{s}' referenced in '{s}' offset=0x{x:0>8} value={d}", .{ - (Wasm.SymbolLoc{ .file = @intFromEnum(atom.file), .index = reloc.index }).getName(wasm_bin), + (Wasm.SymbolLoc{ .file = atom.file, .index = reloc.index }).getName(wasm_bin), symbol_name, reloc.offset, value, @@ -122,11 +119,7 @@ pub fn resolveRelocs(atom: *Atom, wasm_bin: *const Wasm) void { /// All values will be represented as a `u64` as all values can fit within it. /// The final value must be casted to the correct size. fn relocationValue(atom: Atom, relocation: types.Relocation, wasm_bin: *const Wasm) u64 { - const target_loc = if (atom.file == .null) - (Wasm.SymbolLoc{ .file = null, .index = relocation.index }).finalLoc(wasm_bin) - else - (Wasm.SymbolLoc{ .file = @intFromEnum(atom.file), .index = relocation.index }).finalLoc(wasm_bin); - + const target_loc = (Wasm.SymbolLoc{ .file = atom.file, .index = relocation.index }).finalLoc(wasm_bin); const symbol = target_loc.getSymbol(wasm_bin); if (relocation.relocation_type != .R_WASM_TYPE_INDEX_LEB and symbol.tag != .section and @@ -142,7 +135,7 @@ fn relocationValue(atom: Atom, relocation: types.Relocation, wasm_bin: *const Wa .R_WASM_TABLE_INDEX_I64, .R_WASM_TABLE_INDEX_SLEB, .R_WASM_TABLE_INDEX_SLEB64, - => return wasm_bin.function_table.get(.{ .file = @intFromEnum(atom.file), .index = relocation.index }) orelse 0, + => return wasm_bin.function_table.get(.{ .file = atom.file, .index = relocation.index }) orelse 0, .R_WASM_TYPE_INDEX_LEB => { const obj_file = wasm_bin.file(atom.file) orelse return relocation.index; const original_type = obj_file.funcTypes()[relocation.index]; diff --git a/src/link/Wasm/Object.zig b/src/link/Wasm/Object.zig index f22007366f..1c5640c526 100644 --- a/src/link/Wasm/Object.zig +++ b/src/link/Wasm/Object.zig @@ -952,7 +952,7 @@ pub fn parseSymbolIntoAtom(object: *Object, wasm: *Wasm, symbol_index: u32) !Ato .R_WASM_TABLE_INDEX_SLEB64, => { try wasm.function_table.put(gpa, .{ - .file = @intFromEnum(object.index), + .file = object.index, .index = reloc.index, }, 0); }, @@ -961,10 +961,7 @@ pub fn parseSymbolIntoAtom(object: *Object, wasm: *Wasm, symbol_index: u32) !Ato => { const sym = object.symtable[reloc.index]; if (sym.tag != .global) { - try wasm.got_symbols.append( - gpa, - .{ .file = @intFromEnum(object.index), .index = reloc.index }, - ); + try wasm.got_symbols.append(gpa, .{ .file = object.index, .index = reloc.index }); } }, else => {}, diff --git a/src/link/Wasm/ZigObject.zig b/src/link/Wasm/ZigObject.zig index 4e15edecf6..1de15b3f4c 100644 --- a/src/link/Wasm/ZigObject.zig +++ b/src/link/Wasm/ZigObject.zig @@ -468,7 +468,7 @@ pub fn getErrorTableSymbol(zig_object: *ZigObject, wasm_file: *Wasm) !u32 { fn populateErrorNameTable(zig_object: *ZigObject, wasm_file: *Wasm) !void { const symbol_index = zig_object.error_table_symbol orelse return; const gpa = wasm_file.base.comp.gpa; - const atom_index = wasm_file.symbol_atom.get(.{ .file = null, .index = symbol_index }).?; + const atom_index = wasm_file.symbol_atom.get(.{ .file = zig_object.index, .index = symbol_index }).?; // Rather than creating a symbol for each individual error name, // we create a symbol for the entire region of error names. We then calculate @@ -633,7 +633,7 @@ pub fn getDeclVAddr( const target_symbol_index = wasm_file.getAtom(target_atom_index).sym_index; std.debug.assert(reloc_info.parent_atom_index != 0); - const atom_index = wasm_file.symbol_atom.get(.{ .file = null, .index = reloc_info.parent_atom_index }).?; + const atom_index = wasm_file.symbol_atom.get(.{ .file = zig_object.index, .index = reloc_info.parent_atom_index }).?; const atom = wasm_file.getAtomPtr(atom_index); const is_wasm32 = target.cpu.arch == .wasm32; if (decl.ty.zigTypeTag(mod) == .Fn) { @@ -670,7 +670,7 @@ pub fn getAnonDeclVAddr( const atom_index = zig_object.anon_decls.get(decl_val).?; const target_symbol_index = wasm_file.getAtom(atom_index).getSymbolIndex().?; - const parent_atom_index = wasm_file.symbol_atom.get(.{ .file = null, .index = reloc_info.parent_atom_index }).?; + const parent_atom_index = wasm_file.symbol_atom.get(.{ .file = zig_object.index, .index = reloc_info.parent_atom_index }).?; const parent_atom = wasm_file.getAtomPtr(parent_atom_index); const is_wasm32 = target.cpu.arch == .wasm32; const mod = wasm_file.base.comp.module.?; @@ -705,7 +705,7 @@ pub fn deleteDeclExport( ) void { const atom_index = zig_object.decls.get(decl_index) orelse return; const sym_index = wasm_file.getAtom(atom_index).sym_index; - const loc: Wasm.SymbolLoc = .{ .file = null, .index = sym_index }; + const loc: Wasm.SymbolLoc = .{ .file = zig_object.index, .index = sym_index }; const sym = loc.getSymbol(wasm_file); std.debug.assert(zig_object.global_syms.remove(sym.name)); } @@ -1161,7 +1161,7 @@ pub fn storeDeclType(zig_object: *ZigObject, gpa: std.mem.Allocator, decl_index: /// its relocations and create any GOT symbols or function table indexes it may require. pub fn parseSymbolIntoAtom(zig_object: *ZigObject, wasm_file: *Wasm, index: u32) !Atom.Index { const gpa = wasm_file.base.comp.gpa; - const loc: Wasm.SymbolLoc = .{ .file = @intFromEnum(zig_object.index), .index = index }; + const loc: Wasm.SymbolLoc = .{ .file = zig_object.index, .index = index }; const final_index = try wasm_file.getMatchingSegment(zig_object.index, index); const atom_index = wasm_file.symbol_atom.get(loc).?; try wasm_file.appendAtomAtIndex(final_index, atom_index); From 4d14374a668f6caca7490a136030dbc73507f233 Mon Sep 17 00:00:00 2001 From: Luuk de Gram Date: Sun, 21 Jan 2024 15:20:24 +0100 Subject: [PATCH 10/21] wasm: Move `createFunction` to `ZigObject` This function was previously only called by the backend which generates a synthetical function that is not represented by any AIR or Zig code. For this reason, the ownership is moved to the zig-object and stored there so it can be linked with the other object files without the driver having to specialize it. --- src/link/Wasm.zig | 35 ++++------------------------------- src/link/Wasm/ZigObject.zig | 29 ++++++++++++++++++++++++++++- src/link/Wasm/file.zig | 13 ++++++++----- 3 files changed, 40 insertions(+), 37 deletions(-) diff --git a/src/link/Wasm.zig b/src/link/Wasm.zig index a81c7e7629..c8ed59f307 100644 --- a/src/link/Wasm.zig +++ b/src/link/Wasm.zig @@ -1481,7 +1481,7 @@ fn getFunctionSignature(wasm: *const Wasm, loc: SymbolLoc) std.wasm.Type { const ty_index = obj_file.import(loc.index).kind.function; return obj_file.funcTypes()[ty_index]; } - const type_index = obj_file.functions()[symbol.index - obj_file.importedFunctions()].type_index; + const type_index = obj_file.function(loc.index).type_index; return obj_file.funcTypes()[type_index]; } if (is_undefined) { @@ -1850,9 +1850,7 @@ fn createSyntheticFunction( } /// Unlike `createSyntheticFunction` this function is to be called by -/// the codegeneration backend. This will not allocate the created Atom yet, -/// but will instead be appended to `synthetic_functions` list and will be -/// parsed at the end of code generation. +/// the codegeneration backend. This will not allocate the created Atom yet. /// Returns the index of the symbol. pub fn createFunction( wasm: *Wasm, @@ -1861,31 +1859,7 @@ pub fn createFunction( function_body: *std.ArrayList(u8), relocations: *std.ArrayList(Relocation), ) !u32 { - const gpa = wasm.base.comp.gpa; - const loc = try wasm.createSyntheticSymbol(symbol_name, .function); - - const atom_index = try wasm.createAtom(loc.index, wasm.zig_object_index); - const atom = wasm.getAtomPtr(atom_index); - atom.code = function_body.moveToUnmanaged(); - atom.relocs = relocations.moveToUnmanaged(); - atom.size = @intCast(function_body.items.len); - const symbol = loc.getSymbol(wasm); - symbol.setFlag(.WASM_SYM_VISIBILITY_HIDDEN); // ensure function does not get exported - - const section_index = wasm.code_section_index orelse idx: { - const index = @as(u32, @intCast(wasm.segments.items.len)); - try wasm.appendDummySegment(); - break :idx index; - }; - try wasm.appendAtomAtIndex(section_index, atom_index); - try wasm.zigObjectPtr().?.atom_types.put( - gpa, - atom_index, - try wasm.zigObjectPtr().?.putOrGetFuncType(gpa, func_ty), - ); - try wasm.synthetic_functions.append(gpa, atom_index); - - return loc.index; + return wasm.zigObjectPtr().?.createFunction(wasm, symbol_name, func_ty, function_body, relocations); } /// If required, sets the function index in the `start` section. @@ -2050,7 +2024,6 @@ fn mergeSections(wasm: *Wasm) !void { switch (symbol.tag) { .function => { - const index = symbol.index - obj_file.importedFunctions(); const gop = try wasm.functions.getOrPut( gpa, .{ .file = sym_loc.file, .index = symbol.index }, @@ -2068,7 +2041,7 @@ fn mergeSections(wasm: *Wasm) !void { try removed_duplicates.append(sym_loc); continue; } - gop.value_ptr.* = .{ .func = obj_file.functions()[index], .sym_index = sym_loc.index }; + gop.value_ptr.* = .{ .func = obj_file.function(sym_loc.index), .sym_index = sym_loc.index }; symbol.index = @as(u32, @intCast(gop.index)) + wasm.imported_functions_count; }, .global => { diff --git a/src/link/Wasm/ZigObject.zig b/src/link/Wasm/ZigObject.zig index 1de15b3f4c..f6261ba90a 100644 --- a/src/link/Wasm/ZigObject.zig +++ b/src/link/Wasm/ZigObject.zig @@ -13,7 +13,7 @@ decls: std.AutoHashMapUnmanaged(InternPool.DeclIndex, Atom.Index) = .{}, func_types: std.ArrayListUnmanaged(std.wasm.Type) = .{}, /// List of `std.wasm.Func`. Each entry contains the function signature, /// rather than the actual body. -functions: std.ArrayListUnmanaged(std.wasm.Func) = .{}, +functions: std.AutoHashMapUnmanaged(u32, std.wasm.Func) = .{}, /// Map of symbol locations, represented by its `types.Import`. imports: std.AutoHashMapUnmanaged(u32, types.Import) = .{}, /// List of WebAssembly globals. @@ -1189,6 +1189,33 @@ pub fn parseSymbolIntoAtom(zig_object: *ZigObject, wasm_file: *Wasm, index: u32) return atom_index; } +/// Creates a new Wasm function with a given symbol name and body. +/// Returns the symbol index of the new function. +pub fn createFunction( + zig_object: *ZigObject, + wasm_file: *Wasm, + symbol_name: []const u8, + func_ty: std.wasm.Type, + function_body: *std.ArrayList(u8), + relocations: *std.ArrayList(types.Relocation), +) !u32 { + const gpa = wasm_file.base.comp.gpa; + const sym_index = try zig_object.allocateSymbol(gpa); + const sym = &zig_object.symbols.items[sym_index]; + sym.tag = .function; + sym.name = try zig_object.string_table.insert(gpa, symbol_name); + const type_index = try zig_object.putOrGetFuncType(gpa, func_ty); + try zig_object.functions.putNoClobber(gpa, sym_index, .{ .type_index = type_index }); + + const atom_index = try wasm_file.createAtom(sym_index, zig_object.index); + const atom = wasm_file.getAtomPtr(atom_index); + atom.size = @intCast(function_body.items.len); + atom.code = function_body.moveToUnmanaged(); + atom.relocs = relocations.moveToUnmanaged(); + + return sym_index; +} + const build_options = @import("build_options"); const builtin = @import("builtin"); const codegen = @import("../../codegen.zig"); diff --git a/src/link/Wasm/file.zig b/src/link/Wasm/file.zig index e9190bd8ca..79b4fd0e36 100644 --- a/src/link/Wasm/file.zig +++ b/src/link/Wasm/file.zig @@ -89,11 +89,14 @@ pub const File = union(enum) { }; } - pub fn functions(file: File) []const std.wasm.Func { - return switch (file) { - .zig_object => |obj| obj.functions.items, - .object => |obj| obj.functions, - }; + pub fn function(file: File, sym_index: u32) std.wasm.Func { + switch (file) { + .zig_object => |obj| return obj.functions.get(sym_index).?, + .object => |obj| { + const sym = obj.symtable[sym_index]; + return obj.functions[sym.index - obj.imported_functions_count]; + }, + } } pub fn globals(file: File) []const std.wasm.Global { From a028b10b9f8213f5f31c06b57cd18f9852f0df13 Mon Sep 17 00:00:00 2001 From: Luuk de Gram Date: Mon, 22 Jan 2024 07:12:26 +0100 Subject: [PATCH 11/21] wasm: update `freeDecl` and `finishDecl` We now parse the decls right away into atoms and allocate the corresponding linker-object, such as segment and function, rather than waiting until `flushModule`. --- src/link/Wasm.zig | 1 - src/link/Wasm/ZigObject.zig | 111 +++++++++++++++++++++++++++++++----- src/link/Wasm/file.zig | 1 - 3 files changed, 98 insertions(+), 15 deletions(-) diff --git a/src/link/Wasm.zig b/src/link/Wasm.zig index c8ed59f307..9de8fdc1d2 100644 --- a/src/link/Wasm.zig +++ b/src/link/Wasm.zig @@ -585,7 +585,6 @@ pub fn file(wasm: *const Wasm, index: File.Index) ?File { if (index == .null) return null; const tag = wasm.files.items(.tags)[@intFromEnum(index)]; return switch (tag) { - .null => null, .zig_object => .{ .zig_object = &wasm.files.items(.data)[@intFromEnum(index)].zig_object }, .object => .{ .object = &wasm.files.items(.data)[@intFromEnum(index)].object }, }; diff --git a/src/link/Wasm/ZigObject.zig b/src/link/Wasm/ZigObject.zig index f6261ba90a..cd5577bd21 100644 --- a/src/link/Wasm/ZigObject.zig +++ b/src/link/Wasm/ZigObject.zig @@ -29,6 +29,8 @@ global_syms: std.AutoHashMapUnmanaged(u32, u32) = .{}, symbols_free_list: std.ArrayListUnmanaged(u32) = .{}, /// Extra metadata about the linking section, such as alignment of segments and their name. segment_info: std.ArrayListUnmanaged(types.Segment) = .{}, +/// List of indexes which contain a free slot in the `segment_info` list. +segment_free_list: std.ArrayListUnmanaged(u32) = .{}, /// File encapsulated string table, used to deduplicate strings within the generated file. string_table: StringTable = .{}, /// Map for storing anonymous declarations. Each anonymous decl maps to its Atom's index. @@ -145,6 +147,7 @@ pub fn deinit(zig_object: *ZigObject, wasm_file: *Wasm) void { zig_object.symbols.deinit(gpa); zig_object.symbols_free_list.deinit(gpa); zig_object.segment_info.deinit(gpa); + zig_object.segment_free_list.deinit(gpa); zig_object.string_table.deinit(gpa); if (zig_object.dwarf) |*dwarf| { @@ -223,7 +226,7 @@ pub fn updateDecl( }, }; - return zig_object.finishUpdateDecl(wasm_file, decl_index, code, .data); + return zig_object.finishUpdateDecl(wasm_file, decl_index, code); } pub fn updateFunc( @@ -263,7 +266,7 @@ pub fn updateFunc( }, }; - return zig_object.finishUpdateDecl(wasm_file, decl_index, code, .function); + return zig_object.finishUpdateDecl(wasm_file, decl_index, code); } fn finishUpdateDecl( @@ -271,25 +274,89 @@ fn finishUpdateDecl( wasm_file: *Wasm, decl_index: InternPool.DeclIndex, code: []const u8, - symbol_tag: Symbol.Tag, ) !void { const gpa = wasm_file.base.comp.gpa; const mod = wasm_file.base.comp.module.?; const decl = mod.declPtr(decl_index); const atom_index = zig_object.decls.get(decl_index).?; const atom = wasm_file.getAtomPtr(atom_index); - const sym = zig_object.symbol(atom.getSymbolIndex().?); + const sym = zig_object.symbol(atom.sym_index); const full_name = mod.intern_pool.stringToSlice(try decl.getFullyQualifiedName(mod)); sym.name = try zig_object.string_table.insert(gpa, full_name); - sym.tag = symbol_tag; try atom.code.appendSlice(gpa, code); - try wasm_file.resolved_symbols.put(gpa, atom.symbolLoc(), {}); - atom.size = @intCast(code.len); + + switch (decl.ty.zigTypeTag(mod)) { + .Fn => { + try zig_object.functions.put( + gpa, + atom.sym_index, + .{ .type_index = zig_object.atom_types.get(atom_index).? }, + ); + sym.tag = .function; + }, + else => { + const segment_name: []const u8 = if (decl.getOwnedVariable(mod)) |variable| name: { + if (variable.is_const) { + break :name ".rodata."; + } else if (Value.fromInterned(variable.init).isUndefDeep(mod)) { + const decl_namespace = mod.namespacePtr(decl.src_namespace); + const optimize_mode = decl_namespace.file_scope.mod.optimize_mode; + const is_initialized = switch (optimize_mode) { + .Debug, .ReleaseSafe => true, + .ReleaseFast, .ReleaseSmall => false, + }; + if (is_initialized) { + break :name ".data."; + } + break :name ".bss."; + } + // when the decl is all zeroes, we store the atom in the bss segment, + // in all other cases it will be in the data segment. + for (atom.code.items) |byte| { + if (byte != 0) break :name ".data."; + } + break :name ".bss."; + } else ".rodata."; + if ((wasm_file.base.isObject() or wasm_file.base.comp.config.import_memory) and + std.mem.startsWith(u8, segment_name, ".bss")) + { + @memset(atom.code.items, 0); + } + // Will be freed upon freeing of decl or after cleanup of Wasm binary. + const full_segment_name = try std.mem.concat(gpa, u8, &.{ + segment_name, + full_name, + }); + errdefer gpa.free(full_segment_name); + sym.tag = .data; + sym.index = try zig_object.createDataSegment(gpa, full_segment_name, decl.alignment); + }, + } if (code.len == 0) return; atom.alignment = decl.getAlignment(mod); } +fn createDataSegment( + zig_object: *ZigObject, + gpa: std.mem.Allocator, + name: []const u8, + alignment: InternPool.Alignment, +) !u32 { + const segment_index: u32 = if (zig_object.segment_free_list.popOrNull()) |index| + index + else index: { + const idx: u32 = @intCast(zig_object.segment_info.items.len); + _ = try zig_object.segment_info.addOne(gpa); + break :index idx; + }; + zig_object.segment_info.items[segment_index] = .{ + .alignment = alignment, + .flags = 0, + .name = name, + }; +} + /// For a given `InternPool.DeclIndex` returns its corresponding `Atom.Index`. /// When the index was not found, a new `Atom` will be created, and its index will be returned. /// The newly created Atom is empty with default fields as specified by `Atom.empty`. @@ -840,20 +907,24 @@ pub fn freeDecl(zig_object: *ZigObject, wasm_file: *Wasm, decl_index: InternPool const atom_index = zig_object.decls.get(decl_index).?; const atom = wasm_file.getAtomPtr(atom_index); zig_object.symbols_free_list.append(gpa, atom.sym_index) catch {}; - _ = zig_object.decls.remove(decl_index); - zig_object.symbols.items[atom.sym_index].tag = .dead; + std.debug.assert(zig_object.decls.remove(decl_index)); + const sym = &zig_object.symbols.items[atom.sym_index]; for (atom.locals.items) |local_atom_index| { const local_atom = wasm_file.getAtom(local_atom_index); const local_symbol = &zig_object.symbols.items[local_atom.sym_index]; - local_symbol.tag = .dead; // also for any local symbol + std.debug.assert(local_symbol.tag == .data); zig_object.symbols_free_list.append(gpa, local_atom.sym_index) catch {}; - std.denug.assert(wasm_file.symbol_atom.remove(local_atom.symbolLoc())); + std.debug.assert(wasm_file.symbol_atom.remove(local_atom.symbolLoc())); + local_symbol.tag = .dead; // also for any local symbol + const segment = &zig_object.segment_info.items[local_atom.sym_index]; + gpa.free(segment.name); + segment.name = &.{}; // Ensure no accidental double free } if (decl.isExtern(mod)) { - _ = zig_object.imports.remove(atom.getSymbolIndex().?); + std.debug.assert(zig_object.imports.remove(atom.sym_index)); } - _ = wasm_file.symbol_atom.remove(atom.symbolLoc()); + std.debug.assert(wasm_file.symbol_atom.remove(atom.symbolLoc())); // if (wasm.dwarf) |*dwarf| { // dwarf.freeDecl(decl_index); @@ -869,6 +940,20 @@ pub fn freeDecl(zig_object: *ZigObject, wasm_file: *Wasm, decl_index: InternPool prev_atom.next = atom.next; atom.prev = null; } + + sym.tag = .dead; + switch (decl.ty.zigTypeTag(mod)) { + .Fn => { + std.debug.assert(zig_object.functions.remove(atom.sym_index)); + std.debug.assert(zig_object.atom_types.remove(atom_index)); + }, + else => { + zig_object.segment_free_list.append(gpa, sym.index) catch {}; + const segment = &zig_object.segment_info.items[sym.index]; + gpa.free(segment.name); + segment.name = &.{}; // Prevent accidental double free + }, + } } fn getTypeIndex(zig_object: *const ZigObject, func_type: std.wasm.Type) ?u32 { diff --git a/src/link/Wasm/file.zig b/src/link/Wasm/file.zig index 79b4fd0e36..5e92f3e079 100644 --- a/src/link/Wasm/file.zig +++ b/src/link/Wasm/file.zig @@ -114,7 +114,6 @@ pub const File = union(enum) { } pub const Entry = union(enum) { - null: void, zig_object: ZigObject, object: Object, }; From 8f96e7eec1b2af005e17bf21f91fc91add92d7dd Mon Sep 17 00:00:00 2001 From: Luuk de Gram Date: Wed, 24 Jan 2024 16:52:46 +0100 Subject: [PATCH 12/21] wasm: re-implement `updateExports` We now correctly create a symbol for each exported decl with its export- name. The symbol points to the same linker-object. We store a map from decl to all of its exports so we can update exports if it already exists rather than infinitely create new exports. --- src/arch/wasm/CodeGen.zig | 2 +- src/arch/wasm/Emit.zig | 8 +- src/link/Wasm.zig | 3 + src/link/Wasm/ZigObject.zig | 252 +++++++++++------------------------- 4 files changed, 83 insertions(+), 182 deletions(-) diff --git a/src/arch/wasm/CodeGen.zig b/src/arch/wasm/CodeGen.zig index 314518caef..5440147296 100644 --- a/src/arch/wasm/CodeGen.zig +++ b/src/arch/wasm/CodeGen.zig @@ -2239,7 +2239,7 @@ fn airCall(func: *CodeGen, inst: Air.Inst.Index, modifier: std.builtin.CallModif } if (callee) |direct| { - const atom_index = func.bin_file.zigObjectPtr().?.decls.get(direct).?; + const atom_index = func.bin_file.zigObjectPtr().?.decls_map.get(direct).?.atom; try func.addLabel(.call, func.bin_file.getAtom(atom_index).sym_index); } else { // in this case we call a function pointer diff --git a/src/arch/wasm/Emit.zig b/src/arch/wasm/Emit.zig index 3d495dcff6..31c15ce0ef 100644 --- a/src/arch/wasm/Emit.zig +++ b/src/arch/wasm/Emit.zig @@ -310,7 +310,7 @@ fn emitGlobal(emit: *Emit, tag: Mir.Inst.Tag, inst: Mir.Inst.Index) !void { const global_offset = emit.offset(); try emit.code.appendSlice(&buf); - const atom_index = emit.bin_file.zigObjectPtr().?.decls.get(emit.decl_index).?; + const atom_index = emit.bin_file.zigObjectPtr().?.decls_map.get(emit.decl_index).?.atom; const atom = emit.bin_file.getAtomPtr(atom_index); try atom.relocs.append(gpa, .{ .index = label, @@ -370,7 +370,7 @@ fn emitCall(emit: *Emit, inst: Mir.Inst.Index) !void { try emit.code.appendSlice(&buf); if (label != 0) { - const atom_index = emit.bin_file.zigObjectPtr().?.decls.get(emit.decl_index).?; + const atom_index = emit.bin_file.zigObjectPtr().?.decls_map.get(emit.decl_index).?.atom; const atom = emit.bin_file.getAtomPtr(atom_index); try atom.relocs.append(gpa, .{ .offset = call_offset, @@ -400,7 +400,7 @@ fn emitFunctionIndex(emit: *Emit, inst: Mir.Inst.Index) !void { try emit.code.appendSlice(&buf); if (symbol_index != 0) { - const atom_index = emit.bin_file.zigObjectPtr().?.decls.get(emit.decl_index).?; + const atom_index = emit.bin_file.zigObjectPtr().?.decls_map.get(emit.decl_index).?.atom; const atom = emit.bin_file.getAtomPtr(atom_index); try atom.relocs.append(gpa, .{ .offset = index_offset, @@ -431,7 +431,7 @@ fn emitMemAddress(emit: *Emit, inst: Mir.Inst.Index) !void { } if (mem.pointer != 0) { - const atom_index = emit.bin_file.zigObjectPtr().?.decls.get(emit.decl_index).?; + const atom_index = emit.bin_file.zigObjectPtr().?.decls_map.get(emit.decl_index).?.atom; const atom = emit.bin_file.getAtomPtr(atom_index); try atom.relocs.append(gpa, .{ .offset = mem_offset, diff --git a/src/link/Wasm.zig b/src/link/Wasm.zig index 9de8fdc1d2..82fa7a9439 100644 --- a/src/link/Wasm.zig +++ b/src/link/Wasm.zig @@ -2742,6 +2742,9 @@ pub fn flushModule(wasm: *Wasm, arena: Allocator, prog_node: *std.Progress.Node) try wasm.parseInputFiles(positionals.items); + if (wasm.zig_object_index != .null) { + try wasm.resolveSymbolsInObject(wasm.zig_object_index); + } for (wasm.objects.items) |object_index| { try wasm.resolveSymbolsInObject(object_index); } diff --git a/src/link/Wasm/ZigObject.zig b/src/link/Wasm/ZigObject.zig index cd5577bd21..01f253d9b1 100644 --- a/src/link/Wasm/ZigObject.zig +++ b/src/link/Wasm/ZigObject.zig @@ -6,9 +6,9 @@ path: []const u8, /// Index within the list of relocatable objects of the linker driver. index: File.Index, -/// List of all `Decl` that are currently alive. -/// Each index maps to the corresponding `Atom.Index`. -decls: std.AutoHashMapUnmanaged(InternPool.DeclIndex, Atom.Index) = .{}, +/// Map of all `Decl` that are currently alive. +/// Each index maps to the corresponding `DeclInfo`. +decls_map: std.AutoHashMapUnmanaged(InternPool.DeclIndex, DeclInfo) = .{}, /// List of function type signatures for this Zig module. func_types: std.ArrayListUnmanaged(std.wasm.Type) = .{}, /// List of `std.wasm.Func`. Each entry contains the function signature, @@ -80,6 +80,26 @@ debug_str_index: ?u32 = null, /// The index of the segment representing the custom '.debug_pubtypes' section. debug_abbrev_index: ?u32 = null, +const DeclInfo = struct { + atom: Atom.Index = std.math.maxInt(Atom.Index), + exports: std.ArrayListUnmanaged(u32) = .{}, + + fn @"export"(di: DeclInfo, zig_object: *const ZigObject, name: []const u8) ?u32 { + for (di.exports.items) |sym_index| { + const sym_name_index = zig_object.symbol(sym_index).name; + const sym_name = zig_object.string_table.getAssumeExists(sym_name_index); + if (std.mem.eql(u8, name, sym_name)) { + return sym_index; + } + } + return null; + } + + fn appendExport(di: *DeclInfo, gpa: std.mem.Allocator, sym_index: u32) !void { + return di.exports.append(gpa, sym_index); + } +}; + /// Initializes the `ZigObject` with initial symbols. pub fn init(zig_object: *ZigObject, wasm_file: *Wasm) !void { // Initialize an undefined global with the name __stack_pointer. Codegen will use @@ -122,14 +142,15 @@ pub fn deinit(zig_object: *ZigObject, wasm_file: *Wasm) void { // The memory of atoms parsed from object files is managed by // the object file itself, and therefore we can skip those. { - var it = zig_object.decls.valueIterator(); - while (it.next()) |atom_index_ptr| { - const atom = wasm_file.getAtomPtr(atom_index_ptr.*); + var it = zig_object.decls_map.valueIterator(); + while (it.next()) |decl_info| { + const atom = wasm_file.getAtomPtr(decl_info.atom); for (atom.locals.items) |local_index| { const local_atom = wasm_file.getAtomPtr(local_index); local_atom.deinit(gpa); } atom.deinit(gpa); + decl_info.exports.deinit(gpa); } } { @@ -142,7 +163,7 @@ pub fn deinit(zig_object: *ZigObject, wasm_file: *Wasm) void { atom.deinit(gpa); } } - zig_object.decls.deinit(gpa); + zig_object.decls_map.deinit(gpa); zig_object.anon_decls.deinit(gpa); zig_object.symbols.deinit(gpa); zig_object.symbols_free_list.deinit(gpa); @@ -278,7 +299,8 @@ fn finishUpdateDecl( const gpa = wasm_file.base.comp.gpa; const mod = wasm_file.base.comp.module.?; const decl = mod.declPtr(decl_index); - const atom_index = zig_object.decls.get(decl_index).?; + const decl_info = zig_object.decls_map.get(decl_index).?; + const atom_index = decl_info.atom; const atom = wasm_file.getAtomPtr(atom_index); const sym = zig_object.symbol(atom.sym_index); const full_name = mod.intern_pool.stringToSlice(try decl.getFullyQualifiedName(mod)); @@ -337,6 +359,8 @@ fn finishUpdateDecl( atom.alignment = decl.getAlignment(mod); } +/// Creates and initializes a new segment in the 'Data' section. +/// Reuses free slots in the list of segments and returns the index. fn createDataSegment( zig_object: *ZigObject, gpa: std.mem.Allocator, @@ -355,6 +379,7 @@ fn createDataSegment( .flags = 0, .name = name, }; + return segment_index; } /// For a given `InternPool.DeclIndex` returns its corresponding `Atom.Index`. @@ -362,17 +387,17 @@ fn createDataSegment( /// The newly created Atom is empty with default fields as specified by `Atom.empty`. pub fn getOrCreateAtomForDecl(zig_object: *ZigObject, wasm_file: *Wasm, decl_index: InternPool.DeclIndex) !Atom.Index { const gpa = wasm_file.base.comp.gpa; - const gop = try zig_object.decls.getOrPut(gpa, decl_index); + const gop = try zig_object.decls_map.getOrPut(gpa, decl_index); if (!gop.found_existing) { const sym_index = try zig_object.allocateSymbol(gpa); - gop.value_ptr.* = try wasm_file.createAtom(sym_index, zig_object.index); + gop.value_ptr.* = .{ .atom = try wasm_file.createAtom(sym_index, zig_object.index) }; const mod = wasm_file.base.comp.module.?; const decl = mod.declPtr(decl_index); const full_name = mod.intern_pool.stringToSlice(try decl.getFullyQualifiedName(mod)); const sym = zig_object.symbol(sym_index); sym.name = try zig_object.string_table.insert(gpa, full_name); } - return gop.value_ptr.*; + return gop.value_ptr.atom; } pub fn lowerAnonDecl( @@ -459,11 +484,17 @@ fn lowerConst(zig_object: *ZigObject, wasm_file: *Wasm, name: []const u8, tv: Ty const code = code: { const atom = wasm_file.getAtomPtr(atom_index); atom.alignment = tv.ty.abiAlignment(mod); + const segment_name = try std.mem.concat(gpa, u8, &.{ ".rodata.", name }); + errdefer gpa.free(segment_name); zig_object.symbols.items[sym_index] = .{ .name = try zig_object.string_table.insert(gpa, name), .flags = @intFromEnum(Symbol.Flag.WASM_SYM_BINDING_LOCAL), .tag = .data, - .index = undefined, + .index = try zig_object.createDataSegment( + gpa, + segment_name, + tv.ty.abiAlignment(mod), + ), .virtual_address = undefined, }; @@ -770,8 +801,8 @@ pub fn deleteDeclExport( wasm_file: *Wasm, decl_index: InternPool.DeclIndex, ) void { - const atom_index = zig_object.decls.get(decl_index) orelse return; - const sym_index = wasm_file.getAtom(atom_index).sym_index; + const decl_info = zig_object.decls_map.get(decl_index) orelse return; + const sym_index = wasm_file.getAtom(decl_info.atom).sym_index; const loc: Wasm.SymbolLoc = .{ .file = zig_object.index, .index = sym_index }; const sym = loc.getSymbol(wasm_file); std.debug.assert(zig_object.global_syms.remove(sym.name)); @@ -793,6 +824,7 @@ pub fn updateExports( }; const decl = mod.declPtr(decl_index); const atom_index = try zig_object.getOrCreateAtomForDecl(wasm_file, decl_index); + const decl_info = zig_object.decls_map.getPtr(decl_index).?; const atom = wasm_file.getAtom(atom_index); const atom_sym = atom.symbolLoc().getSymbol(wasm_file).*; const gpa = mod.gpa; @@ -808,33 +840,26 @@ pub fn updateExports( continue; } - const exported_decl_index = switch (exp.exported) { - .value => { - try mod.failed_exports.putNoClobber(gpa, exp, try Module.ErrorMsg.create( - gpa, - decl.srcLoc(mod), - "Unimplemented: exporting a named constant value", - .{}, - )); - continue; - }, - .decl_index => |i| i, + const export_string = mod.intern_pool.stringToSlice(exp.opts.name); + const sym_index = if (decl_info.@"export"(zig_object, export_string)) |idx| + idx + else index: { + const sym_index = try zig_object.allocateSymbol(gpa); + try decl_info.appendExport(gpa, sym_index); + break :index sym_index; }; - const exported_atom_index = try zig_object.getOrCreateAtomForDecl(wasm_file, exported_decl_index); - const exported_atom = wasm_file.getAtom(exported_atom_index); - // const export_name = try zig_object.string_table.put(gpa, mod.intern_pool.stringToSlice(exp.opts.name)); - const sym_loc = exported_atom.symbolLoc(); - const sym = sym_loc.getSymbol(wasm_file); + + const export_name = try zig_object.string_table.insert(gpa, export_string); + const sym = zig_object.symbol(sym_index); sym.setGlobal(true); sym.setUndefined(false); sym.index = atom_sym.index; sym.tag = atom_sym.tag; - sym.name = atom_sym.name; + sym.name = export_name; switch (exp.opts.linkage) { .Internal => { sym.setFlag(.WASM_SYM_VISIBILITY_HIDDEN); - sym.setFlag(.WASM_SYM_BINDING_WEAK); }, .Weak => { sym.setFlag(.WASM_SYM_BINDING_WEAK); @@ -850,53 +875,10 @@ pub fn updateExports( continue; }, } - - // TODO: Revisit this - // if (zig_object.global_syms.get(export_name)) |existing_loc| { - // if (existing_loc.index == atom.sym_index) continue; - // const existing_sym: Symbol = existing_loc.getSymbol(wasm_file).*; - - // if (!existing_sym.isUndefined()) blk: { - // if (symbol.isWeak()) { - // try wasm_file.discarded.put(gpa, existing_loc, sym_loc); - // continue; // to-be-exported symbol is weak, so we keep the existing symbol - // } - - // // new symbol is not weak while existing is, replace existing symbol - // if (existing_sym.isWeak()) { - // break :blk; - // } - // // When both the to-be-exported symbol and the already existing symbol - // // are strong symbols, we have a linker error. - // // In the other case we replace one with the other. - // try mod.failed_exports.put(gpa, exp, try Module.ErrorMsg.create( - // gpa, - // decl.srcLoc(mod), - // \\LinkError: symbol '{}' defined multiple times - // \\ first definition in '{s}' - // \\ next definition in '{s}' - // , - // .{ exp.opts.name.fmt(&mod.intern_pool), wasm_file.name, wasm_file.name }, - // )); - // continue; - // } - - // // in this case the existing symbol must be replaced either because it's weak or undefined. - // try wasm.discarded.put(gpa, existing_loc, sym_loc); - // _ = wasm.imports.remove(existing_loc); - // _ = wasm.undefs.swapRemove(existing_sym.name); - // } - - // // Ensure the symbol will be exported using the given name - // if (!mod.intern_pool.stringEqlSlice(exp.opts.name, sym_loc.getName(wasm))) { - // try wasm.export_names.put(gpa, sym_loc, export_name); - // } - - // try wasm.globals.put( - // gpa, - // export_name, - // sym_loc, - // ); + if (exp.opts.visibility == .hidden) { + sym.setFlag(.WASM_SYM_VISIBILITY_HIDDEN); + } + try zig_object.global_syms.put(gpa, export_name, sym_index); } } @@ -904,10 +886,17 @@ pub fn freeDecl(zig_object: *ZigObject, wasm_file: *Wasm, decl_index: InternPool const gpa = wasm_file.base.comp.gpa; const mod = wasm_file.base.comp.module.?; const decl = mod.declPtr(decl_index); - const atom_index = zig_object.decls.get(decl_index).?; + const decl_info = zig_object.decls_map.getPtr(decl_index).?; + const atom_index = decl_info.atom; const atom = wasm_file.getAtomPtr(atom_index); zig_object.symbols_free_list.append(gpa, atom.sym_index) catch {}; - std.debug.assert(zig_object.decls.remove(decl_index)); + for (decl_info.exports.items) |exp_sym_index| { + const exp_sym = zig_object.symbol(exp_sym_index); + exp_sym.tag = .dead; + zig_object.symbols_free_list.append(exp_sym_index) catch {}; + } + decl_info.exports.deinit(gpa); + std.debug.assert(zig_object.decls_map.remove(decl_index)); const sym = &zig_object.symbols.items[atom.sym_index]; for (atom.locals.items) |local_atom_index| { const local_atom = wasm_file.getAtom(local_atom_index); @@ -942,6 +931,9 @@ pub fn freeDecl(zig_object: *ZigObject, wasm_file: *Wasm, decl_index: InternPool } sym.tag = .dead; + if (sym.isGlobal()) { + std.debug.assert(zig_object.global_syms.remove(atom.sym_index)); + } switch (decl.ty.zigTypeTag(mod)) { .Fn => { std.debug.assert(zig_object.functions.remove(atom.sym_index)); @@ -1016,100 +1008,6 @@ pub fn parseAtom(zig_object: *ZigObject, wasm_file: *Wasm, atom_index: Atom.Inde _ = wasm_file; _ = atom_index; _ = kind; - // const comp = wasm.base.comp; - // const gpa = comp.gpa; - // const shared_memory = comp.config.shared_memory; - // const import_memory = comp.config.import_memory; - // const atom = wasm.getAtomPtr(atom_index); - // const symbol = (SymbolLoc{ .file = null, .index = atom.sym_index }).getSymbol(wasm); - // const do_garbage_collect = wasm.base.gc_sections; - - // if (symbol.isDead() and do_garbage_collect) { - // // Prevent unreferenced symbols from being parsed. - // return; - // } - - // const final_index: u32 = switch (kind) { - // .function => result: { - // const index: u32 = @intCast(wasm.functions.count() + wasm.imported_functions_count); - // const type_index = wasm.atom_types.get(atom_index).?; - // try wasm.functions.putNoClobber( - // gpa, - // .{ .file = null, .index = index }, - // .{ .func = .{ .type_index = type_index }, .sym_index = atom.sym_index }, - // ); - // symbol.tag = .function; - // symbol.index = index; - - // if (wasm.code_section_index == null) { - // wasm.code_section_index = @intCast(wasm.segments.items.len); - // try wasm.segments.append(gpa, .{ - // .alignment = atom.alignment, - // .size = atom.size, - // .offset = 0, - // .flags = 0, - // }); - // } - - // break :result wasm.code_section_index.?; - // }, - // .data => result: { - // const segment_name = try std.mem.concat(gpa, u8, &.{ - // kind.segmentName(), - // wasm.string_table.get(symbol.name), - // }); - // errdefer gpa.free(segment_name); - // const segment_info: types.Segment = .{ - // .name = segment_name, - // .alignment = atom.alignment, - // .flags = 0, - // }; - // symbol.tag = .data; - - // // when creating an object file, or importing memory and the data belongs in the .bss segment - // // we set the entire region of it to zeroes. - // // We do not have to do this when exporting the memory (the default) because the runtime - // // will do it for us, and we do not emit the bss segment at all. - // if ((wasm.base.comp.config.output_mode == .Obj or import_memory) and kind.data == .uninitialized) { - // @memset(atom.code.items, 0); - // } - - // const should_merge = wasm.base.comp.config.output_mode != .Obj; - // const gop = try wasm.data_segments.getOrPut(gpa, segment_info.outputName(should_merge)); - // if (gop.found_existing) { - // const index = gop.value_ptr.*; - // wasm.segments.items[index].size += atom.size; - - // symbol.index = @intCast(wasm.segment_info.getIndex(index).?); - // // segment info already exists, so free its memory - // gpa.free(segment_name); - // break :result index; - // } else { - // const index: u32 = @intCast(wasm.segments.items.len); - // var flags: u32 = 0; - // if (shared_memory) { - // flags |= @intFromEnum(Segment.Flag.WASM_DATA_SEGMENT_IS_PASSIVE); - // } - // try wasm.segments.append(gpa, .{ - // .alignment = atom.alignment, - // .size = 0, - // .offset = 0, - // .flags = flags, - // }); - // gop.value_ptr.* = index; - - // const info_index: u32 = @intCast(wasm.segment_info.count()); - // try wasm.segment_info.put(gpa, index, segment_info); - // symbol.index = info_index; - // break :result index; - // } - // }, - // }; - - // const segment: *Segment = &wasm.segments.items[final_index]; - // segment.alignment = segment.alignment.max(atom.alignment); - - // try wasm.appendAtomAtIndex(final_index, atom_index); } /// Generates an atom containing the global error set' size. @@ -1235,9 +1133,9 @@ fn allocateDebugAtoms(zig_object: *ZigObject) !void { /// Asserts declaration has an associated `Atom`. /// Returns the index into the list of types. pub fn storeDeclType(zig_object: *ZigObject, gpa: std.mem.Allocator, decl_index: InternPool.DeclIndex, func_type: std.wasm.Type) !u32 { - const atom_index = zig_object.decls.get(decl_index).?; + const decl_info = zig_object.decls_map.get(decl_index).?; const index = try zig_object.putOrGetFuncType(gpa, func_type); - try zig_object.atom_types.put(gpa, atom_index, index); + try zig_object.atom_types.put(gpa, decl_info.atom, index); return index; } From fde8c2f41a76f6bc56d733a8cb6aae90f8e3f41b Mon Sep 17 00:00:00 2001 From: Luuk de Gram Date: Wed, 24 Jan 2024 17:24:58 +0100 Subject: [PATCH 13/21] wasm: reimplement `deleteDeclExport` Removes the symbol from the decl's list of exports, marks it as dead, as well as appends it to the symbol free list. Also removes it from the list of global symbols as all exports are global. In the future we should perhaps use a map for the export list to prevent linear lookups. But this requires a benchmark as having more than 1 export for the same decl is very rare. --- src/link/Wasm.zig | 3 +-- src/link/Wasm/ZigObject.zig | 45 ++++++++++++++++++++++++++++++++----- 2 files changed, 40 insertions(+), 8 deletions(-) diff --git a/src/link/Wasm.zig b/src/link/Wasm.zig index 82fa7a9439..98ace9e4ac 100644 --- a/src/link/Wasm.zig +++ b/src/link/Wasm.zig @@ -1534,9 +1534,8 @@ pub fn deleteDeclExport( decl_index: InternPool.DeclIndex, name: InternPool.NullTerminatedString, ) void { - _ = name; if (wasm.llvm_object) |_| return; - return wasm.zigObjectPtr().?.deleteDeclExport(wasm, decl_index); + return wasm.zigObjectPtr().?.deleteDeclExport(wasm, decl_index, name); } pub fn updateExports( diff --git a/src/link/Wasm/ZigObject.zig b/src/link/Wasm/ZigObject.zig index 01f253d9b1..9a306fa80c 100644 --- a/src/link/Wasm/ZigObject.zig +++ b/src/link/Wasm/ZigObject.zig @@ -98,6 +98,16 @@ const DeclInfo = struct { fn appendExport(di: *DeclInfo, gpa: std.mem.Allocator, sym_index: u32) !void { return di.exports.append(gpa, sym_index); } + + fn deleteExport(di: *DeclInfo, sym_index: u32) void { + for (di.exports.items, 0..) |idx, index| { + if (idx == sym_index) { + _ = di.exports.swapRemove(index); + return; + } + } + unreachable; // invalid sym_index + } }; /// Initializes the `ZigObject` with initial symbols. @@ -800,12 +810,23 @@ pub fn deleteDeclExport( zig_object: *ZigObject, wasm_file: *Wasm, decl_index: InternPool.DeclIndex, + name: InternPool.NullTerminatedString, ) void { - const decl_info = zig_object.decls_map.get(decl_index) orelse return; - const sym_index = wasm_file.getAtom(decl_info.atom).sym_index; - const loc: Wasm.SymbolLoc = .{ .file = zig_object.index, .index = sym_index }; - const sym = loc.getSymbol(wasm_file); - std.debug.assert(zig_object.global_syms.remove(sym.name)); + const mod = wasm_file.base.comp.module.?; + const decl_info = zig_object.decls_map.getPtr(decl_index) orelse return; + const export_name = mod.intern_pool.stringToSlice(name); + if (decl_info.@"export"(zig_object, export_name)) |sym_index| { + const sym = zig_object.symbol(sym_index); + decl_info.deleteExport(sym_index); + std.debug.assert(zig_object.global_syms.remove(sym.name)); + std.debug.assert(wasm_file.symbol_atom.remove(.{ .file = zig_object.index, .index = sym_index })); + zig_object.symbols_free_list.append(wasm_file.base.comp.gpa, sym_index) catch {}; + + if (sym.tag == .function) { + std.debug.assert(zig_object.functions.remove(sym_index)); + } + sym.tag = .dead; + } } pub fn updateExports( @@ -846,6 +867,17 @@ pub fn updateExports( else index: { const sym_index = try zig_object.allocateSymbol(gpa); try decl_info.appendExport(gpa, sym_index); + + // For functions, we also need to put the alias in the function section. + // We simply copy the aliased function. + // The final linakge will deduplicate these functions. + if (decl.ty.zigTypeTag(mod) == .Fn) { + try zig_object.functions.putNoClobber( + gpa, + sym_index, + zig_object.functions.get(atom.sym_index).?, + ); + } break :index sym_index; }; @@ -879,6 +911,7 @@ pub fn updateExports( sym.setFlag(.WASM_SYM_VISIBILITY_HIDDEN); } try zig_object.global_syms.put(gpa, export_name, sym_index); + try wasm_file.symbol_atom.put(gpa, .{ .file = zig_object.index, .index = sym_index }, atom_index); } } @@ -1145,8 +1178,8 @@ pub fn storeDeclType(zig_object: *ZigObject, gpa: std.mem.Allocator, decl_index: pub fn parseSymbolIntoAtom(zig_object: *ZigObject, wasm_file: *Wasm, index: u32) !Atom.Index { const gpa = wasm_file.base.comp.gpa; const loc: Wasm.SymbolLoc = .{ .file = zig_object.index, .index = index }; - const final_index = try wasm_file.getMatchingSegment(zig_object.index, index); const atom_index = wasm_file.symbol_atom.get(loc).?; + const final_index = try wasm_file.getMatchingSegment(zig_object.index, index); try wasm_file.appendAtomAtIndex(final_index, atom_index); const atom = wasm_file.getAtom(atom_index); for (atom.relocs.items) |reloc| { From c153f94c892fc3b718d29ba4ae3234e99d4baba4 Mon Sep 17 00:00:00 2001 From: Luuk de Gram Date: Mon, 29 Jan 2024 06:52:50 +0100 Subject: [PATCH 14/21] wasm: ensure unique function indexes We cannot keep function indexes as maxInt(u32) due to functions being dedupliated when they point to the same function. For this reason we now use a regular arraylist which will have new functions appended to, and when deleted, its index is appended to the free list, allowing us to re-use slots in the function list. --- src/link/Wasm.zig | 2 +- src/link/Wasm/ZigObject.zig | 46 ++++++++++++++++++------------------- src/link/Wasm/file.zig | 5 +++- 3 files changed, 28 insertions(+), 25 deletions(-) diff --git a/src/link/Wasm.zig b/src/link/Wasm.zig index 98ace9e4ac..4770da7688 100644 --- a/src/link/Wasm.zig +++ b/src/link/Wasm.zig @@ -819,7 +819,7 @@ fn resolveSymbolsInObject(wasm: *Wasm, file_index: File.Index) !void { } if (symbol.tag != existing_sym.tag) { - log.err("symbol '{s}' mismatching type '{s}", .{ sym_name, @tagName(symbol.tag) }); + log.err("symbol '{s}' mismatching types '{s}' and '{s}'", .{ sym_name, @tagName(symbol.tag), @tagName(existing_sym.tag) }); log.err(" first definition in '{s}'", .{existing_file_path}); log.err(" next definition in '{s}'", .{obj_file.path()}); return error.SymbolMismatchingType; diff --git a/src/link/Wasm/ZigObject.zig b/src/link/Wasm/ZigObject.zig index 9a306fa80c..ad38fa0def 100644 --- a/src/link/Wasm/ZigObject.zig +++ b/src/link/Wasm/ZigObject.zig @@ -13,7 +13,9 @@ decls_map: std.AutoHashMapUnmanaged(InternPool.DeclIndex, DeclInfo) = .{}, func_types: std.ArrayListUnmanaged(std.wasm.Type) = .{}, /// List of `std.wasm.Func`. Each entry contains the function signature, /// rather than the actual body. -functions: std.AutoHashMapUnmanaged(u32, std.wasm.Func) = .{}, +functions: std.ArrayListUnmanaged(std.wasm.Func) = .{}, +/// List of indexes pointing to an entry within the `functions` list which has been removed. +functions_free_list: std.ArrayListUnmanaged(u32) = .{}, /// Map of symbol locations, represented by its `types.Import`. imports: std.AutoHashMapUnmanaged(u32, types.Import) = .{}, /// List of WebAssembly globals. @@ -320,11 +322,7 @@ fn finishUpdateDecl( switch (decl.ty.zigTypeTag(mod)) { .Fn => { - try zig_object.functions.put( - gpa, - atom.sym_index, - .{ .type_index = zig_object.atom_types.get(atom_index).? }, - ); + sym.index = try zig_object.appendFunction(gpa, .{ .type_index = zig_object.atom_types.get(atom_index).? }); sym.tag = .function; }, else => { @@ -689,6 +687,9 @@ pub fn addOrUpdateImport( }; zig_object.imported_functions_count += 1; } + sym.tag = .function; + } else { + sym.tag = .data; } } @@ -821,10 +822,6 @@ pub fn deleteDeclExport( std.debug.assert(zig_object.global_syms.remove(sym.name)); std.debug.assert(wasm_file.symbol_atom.remove(.{ .file = zig_object.index, .index = sym_index })); zig_object.symbols_free_list.append(wasm_file.base.comp.gpa, sym_index) catch {}; - - if (sym.tag == .function) { - std.debug.assert(zig_object.functions.remove(sym_index)); - } sym.tag = .dead; } } @@ -867,17 +864,6 @@ pub fn updateExports( else index: { const sym_index = try zig_object.allocateSymbol(gpa); try decl_info.appendExport(gpa, sym_index); - - // For functions, we also need to put the alias in the function section. - // We simply copy the aliased function. - // The final linakge will deduplicate these functions. - if (decl.ty.zigTypeTag(mod) == .Fn) { - try zig_object.functions.putNoClobber( - gpa, - sym_index, - zig_object.functions.get(atom.sym_index).?, - ); - } break :index sym_index; }; @@ -969,7 +955,7 @@ pub fn freeDecl(zig_object: *ZigObject, wasm_file: *Wasm, decl_index: InternPool } switch (decl.ty.zigTypeTag(mod)) { .Fn => { - std.debug.assert(zig_object.functions.remove(atom.sym_index)); + zig_object.functions_free_list.append(gpa, sym.index) catch {}; std.debug.assert(zig_object.atom_types.remove(atom_index)); }, else => { @@ -1221,7 +1207,7 @@ pub fn createFunction( sym.tag = .function; sym.name = try zig_object.string_table.insert(gpa, symbol_name); const type_index = try zig_object.putOrGetFuncType(gpa, func_ty); - try zig_object.functions.putNoClobber(gpa, sym_index, .{ .type_index = type_index }); + sym.index = try zig_object.appendFunction(gpa, .{ .type_index = type_index }); const atom_index = try wasm_file.createAtom(sym_index, zig_object.index); const atom = wasm_file.getAtomPtr(atom_index); @@ -1232,6 +1218,20 @@ pub fn createFunction( return sym_index; } +/// Appends a new `std.wasm.Func` to the list of functions and returns its index. +fn appendFunction(zig_object: *ZigObject, gpa: std.mem.Allocator, func: std.wasm.Func) !u32 { + const index: u32 = if (zig_object.functions_free_list.popOrNull()) |idx| + idx + else idx: { + const len: u32 = @intCast(zig_object.functions.items.len); + _ = try zig_object.functions.addOne(gpa); + break :idx len; + }; + zig_object.functions.items[index] = func; + + return index; +} + const build_options = @import("build_options"); const builtin = @import("builtin"); const codegen = @import("../../codegen.zig"); diff --git a/src/link/Wasm/file.zig b/src/link/Wasm/file.zig index 5e92f3e079..1bb9805d83 100644 --- a/src/link/Wasm/file.zig +++ b/src/link/Wasm/file.zig @@ -91,7 +91,10 @@ pub const File = union(enum) { pub fn function(file: File, sym_index: u32) std.wasm.Func { switch (file) { - .zig_object => |obj| return obj.functions.get(sym_index).?, + .zig_object => |obj| { + const sym = obj.symbols.items[sym_index]; + return obj.functions.items[sym.index]; + }, .object => |obj| { const sym = obj.symtable[sym_index]; return obj.functions[sym.index - obj.imported_functions_count]; From 5a0f2af7e4aa01f861d86bfe9fb457ffde3d335e Mon Sep 17 00:00:00 2001 From: Luuk de Gram Date: Wed, 31 Jan 2024 17:21:32 +0100 Subject: [PATCH 15/21] wasm: reimplement Zig errors in linker --- src/link/Wasm.zig | 15 +++--- src/link/Wasm/ZigObject.zig | 101 ++++++++++++------------------------ 2 files changed, 40 insertions(+), 76 deletions(-) diff --git a/src/link/Wasm.zig b/src/link/Wasm.zig index 4770da7688..26dd8b47b6 100644 --- a/src/link/Wasm.zig +++ b/src/link/Wasm.zig @@ -1333,13 +1333,6 @@ fn resolveLazySymbols(wasm: *Wasm) !void { } } } - if (wasm.string_table.getOffset("__zig_errors_len")) |name_offset| { - if (wasm.undefs.fetchSwapRemove(name_offset)) |kv| { - const loc = try wasm.createSyntheticSymbolOffset(name_offset, .data); - try wasm.discarded.putNoClobber(gpa, kv.value, loc); - _ = wasm.resolved_symbols.swapRemove(kv.value); - } - } } // Tries to find a global symbol by its name. Returns null when not found, @@ -2009,8 +2002,7 @@ fn mergeSections(wasm: *Wasm) !void { for (wasm.resolved_symbols.keys()) |sym_loc| { const obj_file = wasm.file(sym_loc.file) orelse { - // Zig code-generated symbols are already within the sections and do not - // require to be merged + // Synthetic symbols already live in the corresponding sections. continue; }; @@ -2056,6 +2048,7 @@ fn mergeSections(wasm: *Wasm) !void { symbol.index = @as(u32, @intCast(wasm.tables.items.len)) + wasm.imported_tables_count; try wasm.tables.append(gpa, original_table); }, + .dead, .undefined => unreachable, else => {}, } } @@ -2719,6 +2712,10 @@ pub fn flushModule(wasm: *Wasm, arena: Allocator, prog_node: *std.Progress.Node) sub_prog_node.activate(); defer sub_prog_node.end(); + if (wasm.zigObjectPtr()) |zig_object| { + try zig_object.flushModule(wasm); + } + // ensure the error names table is populated when an error name is referenced // try wasm.populateErrorNameTable(); diff --git a/src/link/Wasm/ZigObject.zig b/src/link/Wasm/ZigObject.zig index ad38fa0def..ae5252cdd4 100644 --- a/src/link/Wasm/ZigObject.zig +++ b/src/link/Wasm/ZigObject.zig @@ -551,16 +551,15 @@ pub fn getErrorTableSymbol(zig_object: *ZigObject, wasm_file: *Wasm) !u32 { atom.alignment = slice_ty.abiAlignment(mod); const sym_name = try zig_object.string_table.insert(gpa, "__zig_err_name_table"); + const segment_name = try gpa.dupe(u8, ".rodata.__zig_err_name_table"); const sym = zig_object.symbol(sym_index); sym.* = .{ .name = sym_name, .tag = .data, .flags = @intFromEnum(Symbol.Flag.WASM_SYM_BINDING_LOCAL), - .index = 0, + .index = try zig_object.createDataSegment(gpa, segment_name, atom.alignment), .virtual_address = undefined, }; - // TODO: can we remove this? - // sym.mark(); log.debug("Error name table was created with symbol index: ({d})", .{sym_index}); zig_object.error_table_symbol = sym_index; @@ -584,15 +583,15 @@ fn populateErrorNameTable(zig_object: *ZigObject, wasm_file: *Wasm) !void { const names_atom = wasm_file.getAtomPtr(names_atom_index); names_atom.alignment = .@"1"; const sym_name = try zig_object.string_table.insert(gpa, "__zig_err_names"); + const segment_name = try gpa.dupe(u8, ".rodata.__zig_err_names"); const names_symbol = &zig_object.symbols.items[names_sym_index]; names_symbol.* = .{ .name = sym_name, .tag = .data, .flags = @intFromEnum(Symbol.Flag.WASM_SYM_BINDING_LOCAL), - .index = 0, + .index = try zig_object.createDataSegment(gpa, segment_name, names_atom.alignment), .virtual_address = undefined, }; - names_symbol.mark(); log.debug("Populating error names", .{}); @@ -628,11 +627,6 @@ fn populateErrorNameTable(zig_object: *ZigObject, wasm_file: *Wasm) !void { log.debug("Populated error name: '{s}'", .{error_name}); } names_atom.size = addend; - - // link the atoms with the rest of the binary so they can be allocated - // and relocations will be performed. - try wasm_file.parseAtom(atom_index, .{ .data = .read_only }); - try wasm_file.parseAtom(names_atom_index, .{ .data = .read_only }); } /// Either creates a new import, or updates one if existing. @@ -995,76 +989,44 @@ pub fn putOrGetFuncType(zig_object: *ZigObject, gpa: std.mem.Allocator, func_typ return index; } -/// Kind represents the type of an Atom, which is only -/// used to parse a decl into an Atom to define in which section -/// or segment it should be placed. -const Kind = union(enum) { - /// Represents the segment the data symbol should - /// be inserted into. - /// TODO: Add TLS segments - data: enum { - read_only, - uninitialized, - initialized, - }, - function: void, - - /// Returns the segment name the data kind represents. - /// Asserts `kind` has its active tag set to `data`. - fn segmentName(kind: Kind) []const u8 { - switch (kind.data) { - .read_only => return ".rodata.", - .uninitialized => return ".bss.", - .initialized => return ".data.", - } - } -}; - -/// Parses an Atom and inserts its metadata into the corresponding sections. -pub fn parseAtom(zig_object: *ZigObject, wasm_file: *Wasm, atom_index: Atom.Index, kind: Kind) !void { - // TODO: Revisit - _ = zig_object; - _ = wasm_file; - _ = atom_index; - _ = kind; -} - /// Generates an atom containing the global error set' size. /// This will only be generated if the symbol exists. fn setupErrorsLen(zig_object: *ZigObject, wasm_file: *Wasm) !void { const gpa = wasm_file.base.comp.gpa; - const loc = zig_object.findGlobalSymbol("__zig_errors_len") orelse return; + const sym_index = zig_object.findGlobalSymbol("__zig_errors_len") orelse return; const errors_len = wasm_file.base.comp.module.?.global_error_set.count(); // overwrite existing atom if it already exists (maybe the error set has increased) // if not, allcoate a new atom. - const atom_index = if (wasm_file.symbol_atom.get(loc)) |index| blk: { + const atom_index = if (wasm_file.symbol_atom.get(.{ .file = zig_object.index, .index = sym_index })) |index| blk: { const atom = wasm_file.getAtomPtr(index); - if (atom.next) |next_atom_index| { - const next_atom = wasm_file.getAtomPtr(next_atom_index); - next_atom.prev = atom.prev; - atom.next = null; - } - if (atom.prev) |prev_index| { - const prev_atom = wasm_file.getAtomPtr(prev_index); - prev_atom.next = atom.next; - atom.prev = null; - } + atom.prev = null; atom.deinit(gpa); break :blk index; - } else new_atom: { - const atom_index: Atom.Index = @intCast(wasm_file.managed_atoms.items.len); - try wasm_file.symbol_atom.put(gpa, loc, atom_index); - try wasm_file.managed_atoms.append(gpa, undefined); - break :new_atom atom_index; + } else idx: { + // We found a call to __zig_errors_len so make the symbol a local symbol + // and define it, so the final binary or resulting object file will not attempt + // to resolve it. + const sym = zig_object.symbol(sym_index); + sym.setGlobal(false); + sym.setUndefined(false); + sym.tag = .data; + const segment_name = try gpa.dupe(u8, ".rodata.__zig_errors_len"); + sym.index = try zig_object.createDataSegment(gpa, segment_name, .@"2"); + break :idx try wasm_file.createAtom(sym_index, zig_object.index); }; - const atom = wasm_file.getAtomPtr(atom_index); - atom.* = Atom.empty; - atom.sym_index = loc.index; - atom.size = 2; - try atom.code.writer(gpa).writeInt(u16, @intCast(errors_len), .little); - // try wasm.parseAtom(atom_index, .{ .data = .read_only }); + const atom = wasm_file.getAtomPtr(atom_index); + atom.code.clearRetainingCapacity(); + atom.sym_index = sym_index; + atom.size = 2; + atom.alignment = .@"2"; + try atom.code.writer(gpa).writeInt(u16, @intCast(errors_len), .little); +} + +fn findGlobalSymbol(zig_object: *ZigObject, name: []const u8) ?u32 { + const offset = zig_object.string_table.getOffset(name) orelse return null; + return zig_object.global_syms.get(offset); } /// Initializes symbols and atoms for the debug sections @@ -1232,6 +1194,11 @@ fn appendFunction(zig_object: *ZigObject, gpa: std.mem.Allocator, func: std.wasm return index; } +pub fn flushModule(zig_object: *ZigObject, wasm_file: *Wasm) !void { + try zig_object.populateErrorNameTable(wasm_file); + try zig_object.setupErrorsLen(wasm_file); +} + const build_options = @import("build_options"); const builtin = @import("builtin"); const codegen = @import("../../codegen.zig"); From 5aec88fa4102e87295bf60971209d114c6ae6733 Mon Sep 17 00:00:00 2001 From: Luuk de Gram Date: Mon, 5 Feb 2024 17:17:07 +0100 Subject: [PATCH 16/21] wasm: correctly generate relocations for type index Previously we could directly write the type index because we used the index that was known in the final binary. However, as we now process the Zig module as its own relocatable object file, we must ensure to generate a relocation for type indexes. This also ensures that we can later link the relocatable object file as a standalone also. This also fixes generating indirect function table entries for ZigObject as it now correctly points to the relocation symbol index rather than the symbol index that owns the relocation. --- src/arch/wasm/Emit.zig | 14 +++++++++++++- src/link/Wasm.zig | 4 +++- src/link/Wasm/ZigObject.zig | 10 ++++++++-- 3 files changed, 24 insertions(+), 4 deletions(-) diff --git a/src/arch/wasm/Emit.zig b/src/arch/wasm/Emit.zig index 31c15ce0ef..ded960daf1 100644 --- a/src/arch/wasm/Emit.zig +++ b/src/arch/wasm/Emit.zig @@ -385,7 +385,19 @@ fn emitCallIndirect(emit: *Emit, inst: Mir.Inst.Index) !void { try emit.code.append(std.wasm.opcode(.call_indirect)); // NOTE: If we remove unused function types in the future for incremental // linking, we must also emit a relocation for this `type_index` - try leb128.writeULEB128(emit.code.writer(), type_index); + const call_offset = emit.offset(); + var buf: [5]u8 = undefined; + leb128.writeUnsignedFixed(5, &buf, type_index); + try emit.code.appendSlice(&buf); + if (type_index != 0) { + const atom_index = emit.bin_file.zigObjectPtr().?.decls_map.get(emit.decl_index).?.atom; + const atom = emit.bin_file.getAtomPtr(atom_index); + try atom.relocs.append(emit.bin_file.base.comp.gpa, .{ + .offset = call_offset, + .index = type_index, + .relocation_type = .R_WASM_TYPE_INDEX_LEB, + }); + } try leb128.writeULEB128(emit.code.writer(), @as(u32, 0)); // TODO: Emit relocation for table index } diff --git a/src/link/Wasm.zig b/src/link/Wasm.zig index 26dd8b47b6..5f4277dfdd 100644 --- a/src/link/Wasm.zig +++ b/src/link/Wasm.zig @@ -3057,7 +3057,9 @@ fn writeToFile( try leb.writeULEB128(binary_writer, @as(u32, @intCast(wasm.function_table.count()))); var symbol_it = wasm.function_table.keyIterator(); while (symbol_it.next()) |symbol_loc_ptr| { - const sym = symbol_loc_ptr.*.getSymbol(wasm); + const sym = symbol_loc_ptr.getSymbol(wasm); + std.debug.assert(sym.isAlive()); + std.debug.assert(sym.index < wasm.functions.count() + wasm.imported_functions_count); try leb.writeULEB128(binary_writer, sym.index); } diff --git a/src/link/Wasm/ZigObject.zig b/src/link/Wasm/ZigObject.zig index ae5252cdd4..e73bd466cc 100644 --- a/src/link/Wasm/ZigObject.zig +++ b/src/link/Wasm/ZigObject.zig @@ -1137,14 +1137,20 @@ pub fn parseSymbolIntoAtom(zig_object: *ZigObject, wasm_file: *Wasm, index: u32) .R_WASM_TABLE_INDEX_SLEB, .R_WASM_TABLE_INDEX_SLEB64, => { - try wasm_file.function_table.put(gpa, loc, 0); + try wasm_file.function_table.put(gpa, .{ + .file = zig_object.index, + .index = reloc.index, + }, 0); }, .R_WASM_GLOBAL_INDEX_I32, .R_WASM_GLOBAL_INDEX_LEB, => { const sym = zig_object.symbol(reloc.index); if (sym.tag != .global) { - try wasm_file.got_symbols.append(gpa, loc); + try wasm_file.got_symbols.append(gpa, .{ + .file = zig_object.index, + .index = reloc.index, + }); } }, else => {}, From c99ef23862573269ae4052bd2236f9803f9e36a2 Mon Sep 17 00:00:00 2001 From: Luuk de Gram Date: Wed, 7 Feb 2024 06:57:32 +0100 Subject: [PATCH 17/21] wasm: consolidate flushModule and linkWithZld We now use a single function to use the in-house WebAssembly linker rather than wasm-ld. For both incremental compilation and traditional linking we use the same codepath. --- src/link/Wasm.zig | 327 +++--------------------------------- src/link/Wasm/ZigObject.zig | 14 +- 2 files changed, 33 insertions(+), 308 deletions(-) diff --git a/src/link/Wasm.zig b/src/link/Wasm.zig index 5f4277dfdd..a50c0bedfe 100644 --- a/src/link/Wasm.zig +++ b/src/link/Wasm.zig @@ -177,10 +177,6 @@ undefs: std.AutoArrayHashMapUnmanaged(u32, SymbolLoc) = .{}, /// data of a symbol, such as its size, or its offset to perform a relocation. /// Undefined (and synthetic) symbols do not have an Atom and therefore cannot be mapped. symbol_atom: std.AutoHashMapUnmanaged(SymbolLoc, Atom.Index) = .{}, -/// Maps a symbol's location to its export name, which may differ from the decl's name -/// which does the exporting. -/// Note: The value represents the offset into the string table, rather than the actual string. -export_names: std.AutoHashMapUnmanaged(SymbolLoc, u32) = .{}, /// List of atom indexes of functions that are generated by the backend, /// rather than by the linker. @@ -1398,7 +1394,6 @@ pub fn deinit(wasm: *Wasm) void { wasm.undefs.deinit(gpa); wasm.discarded.deinit(gpa); wasm.symbol_atom.deinit(gpa); - wasm.export_names.deinit(gpa); wasm.atoms.deinit(gpa); wasm.managed_atoms.deinit(gpa); wasm.segments.deinit(gpa); @@ -2133,10 +2128,10 @@ fn setupExports(wasm: *Wasm) !void { if (!symbol.isExported(comp.config.rdynamic)) continue; const sym_name = sym_loc.getName(wasm); - const export_name = if (wasm.export_names.get(sym_loc)) |name| name else blk: { - if (sym_loc.file == .null) break :blk symbol.name; - break :blk try wasm.string_table.put(gpa, sym_name); - }; + const export_name = if (sym_loc.file == .null) + symbol.name + else + try wasm.string_table.put(gpa, sym_name); const exp: types.Export = if (symbol.tag == .data) exp: { const global_index = @as(u32, @intCast(wasm.imported_globals_count + wasm.wasm_globals.items.len)); try wasm.wasm_globals.append(gpa, .{ @@ -2437,168 +2432,45 @@ fn appendDummySegment(wasm: *Wasm) !void { }); } -fn resetState(wasm: *Wasm) void { - const gpa = wasm.base.comp.gpa; - - for (wasm.segment_info.values()) |segment_info| { - gpa.free(segment_info.name); - } - - // TODO: Revisit - // var atom_it = wasm.decls.valueIterator(); - // while (atom_it.next()) |atom_index| { - // const atom = wasm.getAtomPtr(atom_index.*); - // atom.next = null; - // atom.prev = null; - - // for (atom.locals.items) |local_atom_index| { - // const local_atom = wasm.getAtomPtr(local_atom_index); - // local_atom.next = null; - // local_atom.prev = null; - // } - // } - - wasm.functions.clearRetainingCapacity(); - wasm.exports.clearRetainingCapacity(); - wasm.segments.clearRetainingCapacity(); - wasm.segment_info.clearRetainingCapacity(); - wasm.data_segments.clearRetainingCapacity(); - wasm.atoms.clearRetainingCapacity(); - wasm.symbol_atom.clearRetainingCapacity(); - wasm.code_section_index = null; - wasm.debug_info_index = null; - wasm.debug_line_index = null; - wasm.debug_loc_index = null; - wasm.debug_str_index = null; - wasm.debug_ranges_index = null; - wasm.debug_abbrev_index = null; - wasm.debug_pubnames_index = null; - wasm.debug_pubtypes_index = null; -} - pub fn flush(wasm: *Wasm, arena: Allocator, prog_node: *std.Progress.Node) link.File.FlushError!void { const comp = wasm.base.comp; const use_lld = build_options.have_llvm and comp.config.use_lld; - const use_llvm = comp.config.use_llvm; if (use_lld) { return wasm.linkWithLLD(arena, prog_node); - } else if (use_llvm) { - return wasm.linkWithZld(arena, prog_node); - } else { - return wasm.flushModule(arena, prog_node); } + return wasm.flushModule(arena, prog_node); } /// Uses the in-house linker to link one or multiple object -and archive files into a WebAssembly binary. -fn linkWithZld(wasm: *Wasm, arena: Allocator, prog_node: *std.Progress.Node) link.File.FlushError!void { +pub fn flushModule(wasm: *Wasm, arena: Allocator, prog_node: *std.Progress.Node) link.File.FlushError!void { const tracy = trace(@src()); defer tracy.end(); const comp = wasm.base.comp; - const shared_memory = comp.config.shared_memory; - const import_memory = comp.config.import_memory; - - const directory = wasm.base.emit.directory; // Just an alias to make it shorter to type. - const full_out_path = try directory.join(arena, &[_][]const u8{wasm.base.emit.sub_path}); - const opt_zcu = comp.module; - const use_llvm = comp.config.use_llvm; - - // If there is no Zig code to compile, then we should skip flushing the output file because it - // will not be part of the linker line anyway. - const module_obj_path: ?[]const u8 = if (opt_zcu != null) blk: { - assert(use_llvm); // `linkWithZld` should never be called when the Wasm backend is used - try wasm.flushModule(arena, prog_node); - - if (fs.path.dirname(full_out_path)) |dirname| { - break :blk try fs.path.join(arena, &.{ dirname, wasm.base.zcu_object_sub_path.? }); - } else { - break :blk wasm.base.zcu_object_sub_path.?; - } - } else null; + if (wasm.llvm_object) |llvm_object| { + try wasm.base.emitLlvmObject(arena, llvm_object, prog_node); + const use_lld = build_options.have_llvm and comp.config.use_lld; + if (use_lld) return; + } var sub_prog_node = prog_node.start("Wasm Flush", 0); sub_prog_node.activate(); defer sub_prog_node.end(); - const compiler_rt_path: ?[]const u8 = blk: { - if (comp.compiler_rt_obj) |obj| break :blk obj.full_object_path; - if (comp.compiler_rt_lib) |lib| break :blk lib.full_object_path; - break :blk null; - }; - - const id_symlink_basename = "zld.id"; - - var man: Cache.Manifest = undefined; - defer if (!wasm.base.disable_lld_caching) man.deinit(); - var digest: [Cache.hex_digest_len]u8 = undefined; - - const objects = comp.objects; - - // NOTE: The following section must be maintained to be equal - // as the section defined in `linkWithLLD` - if (!wasm.base.disable_lld_caching) { - man = comp.cache_parent.obtain(); - - // We are about to obtain this lock, so here we give other processes a chance first. - wasm.base.releaseLock(); - - comptime assert(Compilation.link_hash_implementation_version == 12); - - for (objects) |obj| { - _ = try man.addFile(obj.path, null); - man.hash.add(obj.must_link); + const directory = wasm.base.emit.directory; // Just an alias to make it shorter to type. + const full_out_path = try directory.join(arena, &[_][]const u8{wasm.base.emit.sub_path}); + const module_obj_path: ?[]const u8 = if (wasm.base.zcu_object_sub_path) |path| blk: { + if (fs.path.dirname(full_out_path)) |dirname| { + break :blk try fs.path.join(arena, &.{ dirname, path }); + } else { + break :blk path; } - for (comp.c_object_table.keys()) |key| { - _ = try man.addFile(key.status.success.object_path, null); - } - try man.addOptionalFile(module_obj_path); - try man.addOptionalFile(compiler_rt_path); - man.hash.addOptionalBytes(wasm.entry_name); - man.hash.add(wasm.base.stack_size); - man.hash.add(wasm.base.build_id); - man.hash.add(import_memory); - man.hash.add(shared_memory); - man.hash.add(wasm.import_table); - man.hash.add(wasm.export_table); - man.hash.addOptional(wasm.initial_memory); - man.hash.addOptional(wasm.max_memory); - man.hash.addOptional(wasm.global_base); - man.hash.addListOfBytes(wasm.export_symbol_names); - // strip does not need to go into the linker hash because it is part of the hash namespace - - // We don't actually care whether it's a cache hit or miss; we just need the digest and the lock. - _ = try man.hit(); - digest = man.final(); - - var prev_digest_buf: [digest.len]u8 = undefined; - const prev_digest: []u8 = Cache.readSmallFile( - directory.handle, - id_symlink_basename, - &prev_digest_buf, - ) catch |err| blk: { - log.debug("WASM LLD new_digest={s} error: {s}", .{ std.fmt.fmtSliceHexLower(&digest), @errorName(err) }); - // Handle this as a cache miss. - break :blk prev_digest_buf[0..0]; - }; - if (mem.eql(u8, prev_digest, &digest)) { - log.debug("WASM LLD digest={s} match - skipping invocation", .{std.fmt.fmtSliceHexLower(&digest)}); - // Hot diggity dog! The output binary is already there. - wasm.base.lock = man.toOwnedLock(); - return; - } - log.debug("WASM LLD prev_digest={s} new_digest={s}", .{ std.fmt.fmtSliceHexLower(prev_digest), std.fmt.fmtSliceHexLower(&digest) }); - - // We are about to change the output file to be different, so we invalidate the build hash now. - directory.handle.deleteFile(id_symlink_basename) catch |err| switch (err) { - error.FileNotFound => {}, - else => |e| return e, - }; - } + } else null; // Positional arguments to the linker such as object files and static archives. var positionals = std.ArrayList([]const u8).init(arena); - try positionals.ensureUnusedCapacity(objects.len); + try positionals.ensureUnusedCapacity(comp.objects.len); const target = comp.root_mod.resolved_target.result; const output_mode = comp.config.output_mode; @@ -2607,6 +2479,10 @@ fn linkWithZld(wasm: *Wasm, arena: Allocator, prog_node: *std.Progress.Node) lin const link_libcpp = comp.config.link_libcpp; const wasi_exec_model = comp.config.wasi_exec_model; + if (wasm.zigObjectPtr()) |zig_object| { + try zig_object.flushModule(wasm); + } + // When the target os is WASI, we allow linking with WASI-LIBC if (target.os.tag == .wasi) { const is_exe_or_dyn_lib = output_mode == .Exe or @@ -2638,7 +2514,7 @@ fn linkWithZld(wasm: *Wasm, arena: Allocator, prog_node: *std.Progress.Node) lin try positionals.append(path); } - for (objects) |object| { + for (comp.objects) |object| { try positionals.append(object.path); } @@ -2651,93 +2527,6 @@ fn linkWithZld(wasm: *Wasm, arena: Allocator, prog_node: *std.Progress.Node) lin try wasm.parseInputFiles(positionals.items); - for (wasm.objects.items) |object_index| { - try wasm.resolveSymbolsInObject(object_index); - } - - var emit_features_count: u32 = 0; - var enabled_features: [@typeInfo(types.Feature.Tag).Enum.fields.len]bool = undefined; - try wasm.validateFeatures(&enabled_features, &emit_features_count); - try wasm.resolveSymbolsInArchives(); - try wasm.resolveLazySymbols(); - try wasm.checkUndefinedSymbols(); - - try wasm.setupInitFunctions(); - try wasm.setupStart(); - - try wasm.markReferences(); - try wasm.setupImports(); - try wasm.mergeSections(); - try wasm.mergeTypes(); - try wasm.allocateAtoms(); - try wasm.setupMemory(); - wasm.allocateVirtualAddresses(); - wasm.mapFunctionTable(); - try wasm.initializeCallCtorsFunction(); - try wasm.setupInitMemoryFunction(); - try wasm.setupTLSRelocationsFunction(); - try wasm.initializeTLSFunction(); - try wasm.setupStartSection(); - try wasm.setupExports(); - try wasm.writeToFile(enabled_features, emit_features_count, arena); - - if (!wasm.base.disable_lld_caching) { - // Update the file with the digest. If it fails we can continue; it only - // means that the next invocation will have an unnecessary cache miss. - Cache.writeSmallFile(directory.handle, id_symlink_basename, &digest) catch |err| { - log.warn("failed to save linking hash digest symlink: {s}", .{@errorName(err)}); - }; - // Again failure here only means an unnecessary cache miss. - man.writeManifest() catch |err| { - log.warn("failed to write cache manifest when linking: {s}", .{@errorName(err)}); - }; - // We hang on to this lock so that the output file path can be used without - // other processes clobbering it. - wasm.base.lock = man.toOwnedLock(); - } -} - -pub fn flushModule(wasm: *Wasm, arena: Allocator, prog_node: *std.Progress.Node) link.File.FlushError!void { - const tracy = trace(@src()); - defer tracy.end(); - - const comp = wasm.base.comp; - - if (wasm.llvm_object) |llvm_object| { - try wasm.base.emitLlvmObject(arena, llvm_object, prog_node); - return; - } - - var sub_prog_node = prog_node.start("Wasm Flush", 0); - sub_prog_node.activate(); - defer sub_prog_node.end(); - - if (wasm.zigObjectPtr()) |zig_object| { - try zig_object.flushModule(wasm); - } - - // ensure the error names table is populated when an error name is referenced - // try wasm.populateErrorNameTable(); - - const objects = comp.objects; - - // Positional arguments to the linker such as object files and static archives. - var positionals = std.ArrayList([]const u8).init(arena); - try positionals.ensureUnusedCapacity(objects.len); - - for (objects) |object| { - positionals.appendAssumeCapacity(object.path); - } - - for (comp.c_object_table.keys()) |c_object| { - try positionals.append(c_object.status.success.object_path); - } - - if (comp.compiler_rt_lib) |lib| try positionals.append(lib.full_object_path); - if (comp.compiler_rt_obj) |obj| try positionals.append(obj.full_object_path); - - try wasm.parseInputFiles(positionals.items); - if (wasm.zig_object_index != .null) { try wasm.resolveSymbolsInObject(wasm.zig_object_index); } @@ -2752,73 +2541,11 @@ pub fn flushModule(wasm: *Wasm, arena: Allocator, prog_node: *std.Progress.Node) try wasm.resolveLazySymbols(); try wasm.checkUndefinedSymbols(); - // When we finish/error we reset the state of the linker - // So we can rebuild the binary file on each incremental update - defer wasm.resetState(); try wasm.setupInitFunctions(); try wasm.setupStart(); + try wasm.markReferences(); - // try wasm.setupErrorsLen(); try wasm.setupImports(); - // if (comp.module) |mod| { - // var decl_it = wasm.decls.iterator(); - // while (decl_it.next()) |entry| { - // const decl = mod.declPtr(entry.key_ptr.*); - // if (decl.isExtern(mod)) continue; - // const atom_index = entry.value_ptr.*; - // const atom = wasm.getAtomPtr(atom_index); - // if (decl.ty.zigTypeTag(mod) == .Fn) { - // try wasm.parseAtom(atom_index, .function); - // } else if (decl.getOwnedVariable(mod)) |variable| { - // if (variable.is_const) { - // try wasm.parseAtom(atom_index, .{ .data = .read_only }); - // } else if (Value.fromInterned(variable.init).isUndefDeep(mod)) { - // // for safe build modes, we store the atom in the data segment, - // // whereas for unsafe build modes we store it in bss. - // const decl_namespace = mod.namespacePtr(decl.src_namespace); - // const optimize_mode = decl_namespace.file_scope.mod.optimize_mode; - // const is_initialized = switch (optimize_mode) { - // .Debug, .ReleaseSafe => true, - // .ReleaseFast, .ReleaseSmall => false, - // }; - // try wasm.parseAtom(atom_index, .{ .data = if (is_initialized) .initialized else .uninitialized }); - // } else { - // // when the decl is all zeroes, we store the atom in the bss segment, - // // in all other cases it will be in the data segment. - // const is_zeroes = for (atom.code.items) |byte| { - // if (byte != 0) break false; - // } else true; - // try wasm.parseAtom(atom_index, .{ .data = if (is_zeroes) .uninitialized else .initialized }); - // } - // } else { - // try wasm.parseAtom(atom_index, .{ .data = .read_only }); - // } - - // // also parse atoms for a decl's locals - // for (atom.locals.items) |local_atom_index| { - // try wasm.parseAtom(local_atom_index, .{ .data = .read_only }); - // } - // } - // // parse anonymous declarations - // for (wasm.anon_decls.keys(), wasm.anon_decls.values()) |decl_val, atom_index| { - // const ty = Type.fromInterned(mod.intern_pool.typeOf(decl_val)); - // if (ty.zigTypeTag(mod) == .Fn) { - // try wasm.parseAtom(atom_index, .function); - // } else { - // try wasm.parseAtom(atom_index, .{ .data = .read_only }); - // } - // } - - // // also parse any backend-generated functions - // for (wasm.synthetic_functions.items) |atom_index| { - // try wasm.parseAtom(atom_index, .function); - // } - - // if (wasm.dwarf) |*dwarf| { - // try dwarf.flushModule(comp.module.?); - // } - // } - try wasm.mergeSections(); try wasm.mergeTypes(); try wasm.allocateAtoms(); @@ -4032,7 +3759,7 @@ fn emitSymbolTable(wasm: *Wasm, binary_bytes: *std.ArrayList(u8), symbol_table: try leb.writeULEB128(writer, @intFromEnum(symbol.tag)); try leb.writeULEB128(writer, symbol.flags); - const sym_name = if (wasm.export_names.get(sym_loc)) |exp_name| wasm.string_table.get(exp_name) else sym_loc.getName(wasm); + const sym_name = sym_loc.getName(wasm); switch (symbol.tag) { .data => { try leb.writeULEB128(writer, @as(u32, @intCast(sym_name.len))); diff --git a/src/link/Wasm/ZigObject.zig b/src/link/Wasm/ZigObject.zig index e73bd466cc..268448e41c 100644 --- a/src/link/Wasm/ZigObject.zig +++ b/src/link/Wasm/ZigObject.zig @@ -670,17 +670,15 @@ pub fn addOrUpdateImport( if (type_index) |ty_index| { const gop = try zig_object.imports.getOrPut(gpa, symbol_index); - const module_name = if (lib_name) |l_name| blk: { - break :blk l_name; - } else wasm_file.host_name; + const module_name = if (lib_name) |l_name| l_name else wasm_file.host_name; if (!gop.found_existing) { - gop.value_ptr.* = .{ - .module_name = try zig_object.string_table.insert(gpa, module_name), - .name = try zig_object.string_table.insert(gpa, name), - .kind = .{ .function = ty_index }, - }; zig_object.imported_functions_count += 1; } + gop.value_ptr.* = .{ + .module_name = try zig_object.string_table.insert(gpa, module_name), + .name = try zig_object.string_table.insert(gpa, name), + .kind = .{ .function = ty_index }, + }; sym.tag = .function; } else { sym.tag = .data; From 5ef832133895fd69fc8378463b86759eaab6913a Mon Sep 17 00:00:00 2001 From: Luuk de Gram Date: Fri, 9 Feb 2024 16:36:09 +0100 Subject: [PATCH 18/21] wasm: make symbol indexes a non-exhaustive enum This introduces some type safety so we cannot accidently give an atom index as a symbol index. This also means we do not have to store any optionals and therefore allow for memory optimizations. Lastly, we can now always simply access the symbol index of an atom, rather than having to call `getSymbolIndex` as it is easy to forget. --- src/arch/wasm/CodeGen.zig | 26 ++++----- src/link/Wasm.zig | 77 ++++++++++++++------------- src/link/Wasm/Atom.zig | 30 +++++------ src/link/Wasm/Object.zig | 8 +-- src/link/Wasm/Symbol.zig | 16 ++++-- src/link/Wasm/ZigObject.zig | 103 ++++++++++++++++-------------------- src/link/Wasm/file.zig | 24 ++++----- 7 files changed, 141 insertions(+), 143 deletions(-) diff --git a/src/arch/wasm/CodeGen.zig b/src/arch/wasm/CodeGen.zig index 5440147296..361fd96374 100644 --- a/src/arch/wasm/CodeGen.zig +++ b/src/arch/wasm/CodeGen.zig @@ -1286,8 +1286,9 @@ fn genFunc(func: *CodeGen) InnerError!void { var prologue = std.ArrayList(Mir.Inst).init(func.gpa); defer prologue.deinit(); + const sp = @intFromEnum(func.bin_file.zigObjectPtr().?.stack_pointer_sym); // load stack pointer - try prologue.append(.{ .tag = .global_get, .data = .{ .label = 0 } }); + try prologue.append(.{ .tag = .global_get, .data = .{ .label = sp } }); // store stack pointer so we can restore it when we return from the function try prologue.append(.{ .tag = .local_tee, .data = .{ .label = func.initial_stack_value.local.value } }); // get the total stack size @@ -1303,7 +1304,7 @@ fn genFunc(func: *CodeGen) InnerError!void { try prologue.append(.{ .tag = .local_tee, .data = .{ .label = func.bottom_stack_value.local.value } }); // Store the current stack pointer value into the global stack pointer so other function calls will // start from this value instead and not overwrite the current stack. - try prologue.append(.{ .tag = .global_set, .data = .{ .label = 0 } }); + try prologue.append(.{ .tag = .global_set, .data = .{ .label = sp } }); // reserve space and insert all prologue instructions at the front of the instruction list // We insert them in reserve order as there is no insertSlice in multiArrayList. @@ -1502,7 +1503,7 @@ fn restoreStackPointer(func: *CodeGen) !void { try func.emitWValue(func.initial_stack_value); // save its value in the global stack pointer - try func.addLabel(.global_set, 0); + try func.addLabel(.global_set, @intFromEnum(func.bin_file.zigObjectPtr().?.stack_pointer_sym)); } /// From a given type, will create space on the virtual stack to store the value of such type. @@ -2205,7 +2206,7 @@ fn airCall(func: *CodeGen, inst: Air.Inst.Index, modifier: std.builtin.CallModif const type_index = try func.bin_file.storeDeclType(extern_func.decl, func_type); try func.bin_file.addOrUpdateImport( mod.intern_pool.stringToSlice(ext_decl.name), - atom.getSymbolIndex().?, + atom.sym_index, mod.intern_pool.stringToSliceUnwrap(ext_decl.getOwnedExternFunc(mod).?.lib_name), type_index, ); @@ -2240,7 +2241,7 @@ fn airCall(func: *CodeGen, inst: Air.Inst.Index, modifier: std.builtin.CallModif if (callee) |direct| { const atom_index = func.bin_file.zigObjectPtr().?.decls_map.get(direct).?.atom; - try func.addLabel(.call, func.bin_file.getAtom(atom_index).sym_index); + try func.addLabel(.call, @intFromEnum(func.bin_file.getAtom(atom_index).sym_index)); } else { // in this case we call a function pointer // so load its value onto the stack @@ -3158,7 +3159,7 @@ fn lowerAnonDeclRef( }, } const target_atom_index = func.bin_file.zigObjectPtr().?.anon_decls.get(decl_val).?; - const target_sym_index = func.bin_file.getAtom(target_atom_index).getSymbolIndex().?; + const target_sym_index = @intFromEnum(func.bin_file.getAtom(target_atom_index).sym_index); if (is_fn_body) { return WValue{ .function_index = target_sym_index }; } else if (offset == 0) { @@ -3189,7 +3190,7 @@ fn lowerDeclRefValue(func: *CodeGen, tv: TypedValue, decl_index: InternPool.Decl const atom_index = try func.bin_file.getOrCreateAtomForDecl(decl_index); const atom = func.bin_file.getAtom(atom_index); - const target_sym_index = atom.sym_index; + const target_sym_index = @intFromEnum(atom.sym_index); if (decl.ty.zigTypeTag(mod) == .Fn) { return WValue{ .function_index = target_sym_index }; } else if (offset == 0) { @@ -3711,7 +3712,7 @@ fn airCmpLtErrorsLen(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { const un_op = func.air.instructions.items(.data)[@intFromEnum(inst)].un_op; const operand = try func.resolveInst(un_op); const sym_index = try func.bin_file.getGlobalSymbol("__zig_errors_len", null); - const errors_len = WValue{ .memory = sym_index }; + const errors_len = WValue{ .memory = @intFromEnum(sym_index) }; try func.emitWValue(operand); const mod = func.bin_file.base.comp.module.?; @@ -7153,7 +7154,7 @@ fn callIntrinsic( args: []const WValue, ) InnerError!WValue { assert(param_types.len == args.len); - const symbol_index = func.bin_file.base.getGlobalSymbol(name, null) catch |err| { + const symbol_index = func.bin_file.getGlobalSymbol(name, null) catch |err| { return func.fail("Could not find or create global symbol '{s}'", .{@errorName(err)}); }; @@ -7181,7 +7182,7 @@ fn callIntrinsic( } // Actually call our intrinsic - try func.addLabel(.call, symbol_index); + try func.addLabel(.call, @intFromEnum(symbol_index)); if (!return_type.hasRuntimeBitsIgnoreComptime(mod)) { return WValue.none; @@ -7224,7 +7225,7 @@ fn getTagNameFunction(func: *CodeGen, enum_ty: Type) InnerError!u32 { // check if we already generated code for this. if (func.bin_file.findGlobalSymbol(func_name)) |loc| { - return loc.index; + return @intFromEnum(loc.index); } const int_tag_ty = enum_ty.intTagType(mod); @@ -7364,7 +7365,8 @@ fn getTagNameFunction(func: *CodeGen, enum_ty: Type) InnerError!u32 { const slice_ty = Type.slice_const_u8_sentinel_0; const func_type = try genFunctype(arena, .Unspecified, &.{int_tag_ty.ip_index}, slice_ty, mod); - return func.bin_file.createFunction(func_name, func_type, &body_list, &relocs); + const sym_index = try func.bin_file.createFunction(func_name, func_type, &body_list, &relocs); + return @intFromEnum(sym_index); } fn airErrorSetHasValue(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { diff --git a/src/link/Wasm.zig b/src/link/Wasm.zig index a50c0bedfe..ce5f451a6f 100644 --- a/src/link/Wasm.zig +++ b/src/link/Wasm.zig @@ -127,7 +127,7 @@ func_types: std.ArrayListUnmanaged(std.wasm.Type) = .{}, /// This allows us to map multiple symbols to the same function. functions: std.AutoArrayHashMapUnmanaged( struct { file: File.Index, index: u32 }, - struct { func: std.wasm.Func, sym_index: u32 }, + struct { func: std.wasm.Func, sym_index: Symbol.Index }, ) = .{}, /// Output global section wasm_globals: std.ArrayListUnmanaged(std.wasm.Global) = .{}, @@ -208,13 +208,9 @@ pub const Segment = struct { } }; -pub const Export = struct { - sym_index: ?u32 = null, -}; - pub const SymbolLoc = struct { /// The index of the symbol within the specified file - index: u32, + index: Symbol.Index, /// The index of the object file where the symbol resides. file: File.Index, @@ -226,7 +222,7 @@ pub const SymbolLoc = struct { if (wasm_file.file(loc.file)) |obj_file| { return obj_file.symbol(loc.index); } - return &wasm_file.synthetic_symbols.items[loc.index]; + return &wasm_file.synthetic_symbols.items[@intFromEnum(loc.index)]; } /// From a given location, returns the name of the symbol. @@ -237,7 +233,8 @@ pub const SymbolLoc = struct { if (wasm_file.file(loc.file)) |obj_file| { return obj_file.symbolName(loc.index); } - return wasm_file.string_table.get(wasm_file.synthetic_symbols.items[loc.index].name); + const sym = wasm_file.synthetic_symbols.items[@intFromEnum(loc.index)]; + return wasm_file.string_table.get(sym.name); } /// From a given symbol location, returns the final location. @@ -272,7 +269,7 @@ pub const InitFuncLoc = struct { /// Turns the given `InitFuncLoc` into a `SymbolLoc` fn getSymbolLoc(loc: InitFuncLoc) SymbolLoc { - return .{ .file = loc.file, .index = loc.index }; + return .{ .file = loc.file, .index = @enumFromInt(loc.index) }; } /// Returns true when `lhs` has a higher priority (e.i. value closer to 0) than `rhs`. @@ -566,7 +563,7 @@ pub fn createEmpty( var zig_object: ZigObject = .{ .index = index, .path = try std.fmt.allocPrint(gpa, "{s}.o", .{std.fs.path.stem(zcu.main_mod.root_src_path)}), - .stack_pointer_sym = undefined, + .stack_pointer_sym = .null, }; try zig_object.init(wasm); try wasm.files.append(gpa, .{ .zig_object = zig_object }); @@ -607,7 +604,7 @@ pub fn addOrUpdateImport( /// Name of the import name: []const u8, /// Symbol index that is external - symbol_index: u32, + symbol_index: Symbol.Index, /// Optional library name (i.e. `extern "c" fn foo() void` lib_name: ?[:0]const u8, /// The index of the type that represents the function signature @@ -627,7 +624,7 @@ fn createSyntheticSymbol(wasm: *Wasm, name: []const u8, tag: Symbol.Tag) !Symbol } fn createSyntheticSymbolOffset(wasm: *Wasm, name_offset: u32, tag: Symbol.Tag) !SymbolLoc { - const sym_index = @as(u32, @intCast(wasm.synthetic_symbols.items.len)); + const sym_index: Symbol.Index = @enumFromInt(wasm.synthetic_symbols.items.len); const loc: SymbolLoc = .{ .index = sym_index, .file = .null }; const gpa = wasm.base.comp.gpa; try wasm.synthetic_symbols.append(gpa, .{ @@ -670,9 +667,9 @@ fn parseObjectFile(wasm: *Wasm, path: []const u8) !bool { } /// Creates a new empty `Atom` and returns its `Atom.Index` -pub fn createAtom(wasm: *Wasm, sym_index: u32, file_index: File.Index) !Atom.Index { +pub fn createAtom(wasm: *Wasm, sym_index: Symbol.Index, file_index: File.Index) !Atom.Index { const gpa = wasm.base.comp.gpa; - const index: Atom.Index = @intCast(wasm.managed_atoms.items.len); + const index: Atom.Index = @enumFromInt(wasm.managed_atoms.items.len); const atom = try wasm.managed_atoms.addOne(gpa); atom.* = .{ .file = file_index, .sym_index = sym_index }; try wasm.symbol_atom.putNoClobber(gpa, atom.symbolLoc(), index); @@ -681,11 +678,11 @@ pub fn createAtom(wasm: *Wasm, sym_index: u32, file_index: File.Index) !Atom.Ind } pub inline fn getAtom(wasm: *const Wasm, index: Atom.Index) Atom { - return wasm.managed_atoms.items[index]; + return wasm.managed_atoms.items[@intFromEnum(index)]; } pub inline fn getAtomPtr(wasm: *Wasm, index: Atom.Index) *Atom { - return &wasm.managed_atoms.items[index]; + return &wasm.managed_atoms.items[@intFromEnum(index)]; } /// Parses an archive file and will then parse each object file @@ -757,7 +754,7 @@ fn resolveSymbolsInObject(wasm: *Wasm, file_index: File.Index) !void { log.debug("Resolving symbols in object: '{s}'", .{obj_file.path()}); for (obj_file.symbols(), 0..) |symbol, i| { - const sym_index: u32 = @intCast(i); + const sym_index: Symbol.Index = @enumFromInt(i); const location: SymbolLoc = .{ .file = file_index, .index = sym_index }; const sym_name = obj_file.string(symbol.name); if (mem.eql(u8, sym_name, "__indirect_function_table")) { @@ -1489,7 +1486,7 @@ pub fn lowerUnnamedConst(wasm: *Wasm, tv: TypedValue, decl_index: InternPool.Dec /// such as an exported or imported symbol. /// If the symbol does not yet exist, creates a new one symbol instead /// and then returns the index to it. -pub fn getGlobalSymbol(wasm: *Wasm, name: []const u8, lib_name: ?[]const u8) !u32 { +pub fn getGlobalSymbol(wasm: *Wasm, name: []const u8, lib_name: ?[]const u8) !Symbol.Index { _ = lib_name; return wasm.zigObjectPtr().?.getGlobalSymbol(wasm.base.comp.gpa, name); } @@ -1609,19 +1606,20 @@ fn allocateAtoms(wasm: *Wasm) !void { const sym = if (wasm.file(symbol_loc.file)) |obj_file| obj_file.symbol(symbol_loc.index).* else - wasm.synthetic_symbols.items[symbol_loc.index]; + wasm.synthetic_symbols.items[@intFromEnum(symbol_loc.index)]; // Dead symbols must be unlinked from the linked-list to prevent them // from being emit into the binary. if (sym.isDead()) { - if (entry.value_ptr.* == atom_index and atom.prev != null) { + if (entry.value_ptr.* == atom_index and atom.prev != .null) { // When the atom is dead and is also the first atom retrieved from wasm.atoms(index) we update // the entry to point it to the previous atom to ensure we do not start with a dead symbol that // was removed and therefore do not emit any code at all. - entry.value_ptr.* = atom.prev.?; + entry.value_ptr.* = atom.prev; } - atom_index = atom.prev orelse break; - atom.prev = null; + if (atom.prev == .null) break; + atom_index = atom.prev; + atom.prev = .null; continue; } offset = @intCast(atom.alignment.forward(offset)); @@ -1633,7 +1631,8 @@ fn allocateAtoms(wasm: *Wasm) !void { atom.size, }); offset += atom.size; - atom_index = atom.prev orelse break; + if (atom.prev == .null) break; + atom_index = atom.prev; } segment.size = @intCast(segment.alignment.forward(offset)); } @@ -1738,7 +1737,7 @@ fn setupInitFunctions(wasm: *Wasm) !void { .file = file_index, .priority = init_func.priority, }); - try wasm.mark(.{ .index = init_func.symbol_index, .file = file_index }); + try wasm.mark(.{ .index = @enumFromInt(init_func.symbol_index), .file = file_index }); } } @@ -1844,7 +1843,7 @@ pub fn createFunction( func_ty: std.wasm.Type, function_body: *std.ArrayList(u8), relocations: *std.ArrayList(Relocation), -) !u32 { +) !Symbol.Index { return wasm.zigObjectPtr().?.createFunction(wasm, symbol_name, func_ty, function_body, relocations); } @@ -2324,11 +2323,11 @@ fn setupMemory(wasm: *Wasm) !void { /// From a given object's index and the index of the segment, returns the corresponding /// index of the segment within the final data section. When the segment does not yet /// exist, a new one will be initialized and appended. The new index will be returned in that case. -pub fn getMatchingSegment(wasm: *Wasm, file_index: File.Index, symbol_index: u32) !u32 { +pub fn getMatchingSegment(wasm: *Wasm, file_index: File.Index, symbol_index: Symbol.Index) !u32 { const comp = wasm.base.comp; const gpa = comp.gpa; const obj_file = wasm.file(file_index).?; - const symbol = obj_file.symbols()[symbol_index]; + const symbol = obj_file.symbols()[@intFromEnum(symbol_index)]; const index: u32 = @intCast(wasm.segments.items.len); const shared_memory = comp.config.shared_memory; @@ -2889,8 +2888,8 @@ fn writeToFile( try binary_writer.writeAll(atom.code.items); current_offset += atom.size; - if (atom.prev) |prev| { - atom_index = prev; + if (atom.prev != .null) { + atom_index = atom.prev; } else { // also pad with zeroes when last atom to ensure // segments are aligned. @@ -2984,7 +2983,8 @@ fn writeToFile( while (true) { atom.resolveRelocs(wasm); try debug_bytes.appendSlice(atom.code.items); - atom = if (atom.prev) |prev| wasm.getAtomPtr(prev) else break; + if (atom.prev == .null) break; + atom = wasm.getAtomPtr(atom.prev); } try emitDebugSection(&binary_bytes, debug_bytes.items, item.name); debug_bytes.clearRetainingCapacity(); @@ -3853,7 +3853,7 @@ fn emitCodeRelocations( size_offset += getULEB128Size(atom.size); for (atom.relocs.items) |relocation| { count += 1; - const sym_loc: SymbolLoc = .{ .file = atom.file, .index = relocation.index }; + const sym_loc: SymbolLoc = .{ .file = atom.file, .index = @enumFromInt(relocation.index) }; const symbol_index = symbol_table.get(sym_loc).?; try leb.writeULEB128(writer, @intFromEnum(relocation.relocation_type)); const offset = atom.offset + relocation.offset + size_offset; @@ -3864,7 +3864,8 @@ fn emitCodeRelocations( } log.debug("Emit relocation: {}", .{relocation}); } - atom = if (atom.prev) |prev| wasm.getAtomPtr(prev) else break; + if (atom.prev == .null) break; + atom = wasm.getAtomPtr(atom.prev); } if (count == 0) return; var buf: [5]u8 = undefined; @@ -3900,7 +3901,7 @@ fn emitDataRelocations( size_offset += getULEB128Size(atom.size); for (atom.relocs.items) |relocation| { count += 1; - const sym_loc: SymbolLoc = .{ .file = atom.file, .index = relocation.index }; + const sym_loc: SymbolLoc = .{ .file = atom.file, .index = @enumFromInt(relocation.index) }; const symbol_index = symbol_table.get(sym_loc).?; try leb.writeULEB128(writer, @intFromEnum(relocation.relocation_type)); const offset = atom.offset + relocation.offset + size_offset; @@ -3911,7 +3912,8 @@ fn emitDataRelocations( } log.debug("Emit relocation: {}", .{relocation}); } - atom = if (atom.prev) |prev| wasm.getAtomPtr(prev) else break; + if (atom.prev == .null) break; + atom = wasm.getAtomPtr(atom.prev); } } if (count == 0) return; @@ -3969,7 +3971,8 @@ pub fn storeDeclType(wasm: *Wasm, decl_index: InternPool.DeclIndex, func_type: s /// /// When the symbol does not yet exist, it will create a new one instead. pub fn getErrorTableSymbol(wasm_file: *Wasm) !u32 { - return wasm_file.zigObjectPtr().?.getErrorTableSymbol(wasm_file); + const sym_index = try wasm_file.zigObjectPtr().?.getErrorTableSymbol(wasm_file); + return @intFromEnum(sym_index); } /// For a given `InternPool.DeclIndex` returns its corresponding `Atom.Index`. @@ -4029,7 +4032,7 @@ fn mark(wasm: *Wasm, loc: SymbolLoc) !void { const atom = wasm.getAtom(atom_index); for (atom.relocs.items) |reloc| { - const target_loc: SymbolLoc = .{ .index = reloc.index, .file = loc.file }; + const target_loc: SymbolLoc = .{ .index = @enumFromInt(reloc.index), .file = loc.file }; try wasm.mark(target_loc.finalLoc(wasm)); } } diff --git a/src/link/Wasm/Atom.zig b/src/link/Wasm/Atom.zig index ade66b687f..77d0790086 100644 --- a/src/link/Wasm/Atom.zig +++ b/src/link/Wasm/Atom.zig @@ -2,7 +2,7 @@ /// This is 'null' when the atom was generated by a synthetic linker symbol. file: FileIndex, /// symbol index of the symbol representing this atom -sym_index: u32, +sym_index: Symbol.Index, /// Size of the atom, used to calculate section sizes in the final binary size: u32 = 0, /// List of relocations belonging to this atom @@ -17,19 +17,19 @@ offset: u32 = 0, /// The original offset within the object file. This value is substracted from /// relocation offsets to determine where in the `data` to rewrite the value original_offset: u32 = 0, -/// Next atom in relation to this atom. -/// When null, this atom is the last atom -next: ?Atom.Index = null, /// Previous atom in relation to this atom. /// is null when this atom is the first in its order -prev: ?Atom.Index = null, +prev: Atom.Index = .null, /// Contains atoms local to a decl, all managed by this `Atom`. /// When the parent atom is being freed, it will also do so for all local atoms. locals: std.ArrayListUnmanaged(Atom.Index) = .{}, -/// Alias to an unsigned 32-bit integer. -// TODO: Make this a non-exhaustive enum. -pub const Index = u32; +/// Represents the index of an Atom where `null` is considered +/// an invalid atom. +pub const Index = enum(u32) { + null = std.math.maxInt(u32), + _, +}; /// Frees all resources owned by this `Atom`. pub fn deinit(atom: *Atom, gpa: std.mem.Allocator) void { @@ -50,7 +50,7 @@ pub fn format(atom: Atom, comptime fmt: []const u8, options: std.fmt.FormatOptio _ = fmt; _ = options; try writer.print("Atom{{ .sym_index = {d}, .alignment = {d}, .size = {d}, .offset = 0x{x:0>8} }}", .{ - atom.sym_index, + @intFromEnum(atom.sym_index), atom.alignment, atom.size, atom.offset, @@ -62,11 +62,6 @@ pub fn symbolLoc(atom: Atom) Wasm.SymbolLoc { return .{ .file = atom.file, .index = atom.sym_index }; } -pub fn getSymbolIndex(atom: Atom) ?u32 { - if (atom.sym_index == 0) return null; - return atom.sym_index; -} - /// Resolves the relocations within the atom, writing the new value /// at the calculated offset. pub fn resolveRelocs(atom: *Atom, wasm_bin: *const Wasm) void { @@ -80,7 +75,7 @@ pub fn resolveRelocs(atom: *Atom, wasm_bin: *const Wasm) void { for (atom.relocs.items) |reloc| { const value = atom.relocationValue(reloc, wasm_bin); log.debug("Relocating '{s}' referenced in '{s}' offset=0x{x:0>8} value={d}", .{ - (Wasm.SymbolLoc{ .file = atom.file, .index = reloc.index }).getName(wasm_bin), + (Wasm.SymbolLoc{ .file = atom.file, .index = @enumFromInt(reloc.index) }).getName(wasm_bin), symbol_name, reloc.offset, value, @@ -119,7 +114,7 @@ pub fn resolveRelocs(atom: *Atom, wasm_bin: *const Wasm) void { /// All values will be represented as a `u64` as all values can fit within it. /// The final value must be casted to the correct size. fn relocationValue(atom: Atom, relocation: types.Relocation, wasm_bin: *const Wasm) u64 { - const target_loc = (Wasm.SymbolLoc{ .file = atom.file, .index = relocation.index }).finalLoc(wasm_bin); + const target_loc = (Wasm.SymbolLoc{ .file = atom.file, .index = @enumFromInt(relocation.index) }).finalLoc(wasm_bin); const symbol = target_loc.getSymbol(wasm_bin); if (relocation.relocation_type != .R_WASM_TYPE_INDEX_LEB and symbol.tag != .section and @@ -135,7 +130,7 @@ fn relocationValue(atom: Atom, relocation: types.Relocation, wasm_bin: *const Wa .R_WASM_TABLE_INDEX_I64, .R_WASM_TABLE_INDEX_SLEB, .R_WASM_TABLE_INDEX_SLEB64, - => return wasm_bin.function_table.get(.{ .file = atom.file, .index = relocation.index }) orelse 0, + => return wasm_bin.function_table.get(.{ .file = atom.file, .index = @enumFromInt(relocation.index) }) orelse 0, .R_WASM_TYPE_INDEX_LEB => { const obj_file = wasm_bin.file(atom.file) orelse return relocation.index; const original_type = obj_file.funcTypes()[relocation.index]; @@ -195,6 +190,7 @@ fn thombstone(atom: Atom, wasm: *const Wasm) ?i64 { } return null; } + const leb = std.leb; const log = std.log.scoped(.link); const mem = std.mem; diff --git a/src/link/Wasm/Object.zig b/src/link/Wasm/Object.zig index 1c5640c526..297e71991d 100644 --- a/src/link/Wasm/Object.zig +++ b/src/link/Wasm/Object.zig @@ -907,10 +907,10 @@ fn assertEnd(reader: anytype) !void { } /// Parses an object file into atoms, for code and data sections -pub fn parseSymbolIntoAtom(object: *Object, wasm: *Wasm, symbol_index: u32) !Atom.Index { +pub fn parseSymbolIntoAtom(object: *Object, wasm: *Wasm, symbol_index: Symbol.Index) !Atom.Index { const comp = wasm.base.comp; const gpa = comp.gpa; - const symbol = &object.symtable[symbol_index]; + const symbol = &object.symtable[@intFromEnum(symbol_index)]; const relocatable_data: RelocatableData = switch (symbol.tag) { .function => object.relocatable_data.get(.code).?[symbol.index - object.imported_functions_count], .data => object.relocatable_data.get(.data).?[symbol.index], @@ -953,7 +953,7 @@ pub fn parseSymbolIntoAtom(object: *Object, wasm: *Wasm, symbol_index: u32) !Ato => { try wasm.function_table.put(gpa, .{ .file = object.index, - .index = reloc.index, + .index = @enumFromInt(reloc.index), }, 0); }, .R_WASM_GLOBAL_INDEX_I32, @@ -961,7 +961,7 @@ pub fn parseSymbolIntoAtom(object: *Object, wasm: *Wasm, symbol_index: u32) !Ato => { const sym = object.symtable[reloc.index]; if (sym.tag != .global) { - try wasm.got_symbols.append(gpa, .{ .file = object.index, .index = reloc.index }); + try wasm.got_symbols.append(gpa, .{ .file = object.index, .index = @enumFromInt(reloc.index) }); } }, else => {}, diff --git a/src/link/Wasm/Symbol.zig b/src/link/Wasm/Symbol.zig index 75c26ca10d..f913591fec 100644 --- a/src/link/Wasm/Symbol.zig +++ b/src/link/Wasm/Symbol.zig @@ -1,12 +1,8 @@ -//! Represents a wasm symbol. Containing all of its properties, +//! Represents a WebAssembly symbol. Containing all of its properties, //! as well as providing helper methods to determine its functionality //! and how it will/must be linked. //! The name of the symbol can be found by providing the offset, found //! on the `name` field, to a string table in the wasm binary or object file. -const Symbol = @This(); - -const std = @import("std"); -const types = @import("types.zig"); /// Bitfield containings flags for a symbol /// Can contain any of the flags defined in `Flag` @@ -24,6 +20,12 @@ tag: Tag, /// This differs from the offset of an `Atom` which is relative to the start of a segment. virtual_address: u32, +/// Represents a symbol index where `null` represents an invalid index. +pub const Index = enum(u32) { + null, + _, +}; + pub const Tag = enum { function, data, @@ -202,3 +204,7 @@ pub fn format(symbol: Symbol, comptime fmt: []const u8, options: std.fmt.FormatO .{ kind_fmt, binding, visible, symbol.index, symbol.name, undef }, ); } + +const std = @import("std"); +const types = @import("types.zig"); +const Symbol = @This(); diff --git a/src/link/Wasm/ZigObject.zig b/src/link/Wasm/ZigObject.zig index 268448e41c..d47bb6b721 100644 --- a/src/link/Wasm/ZigObject.zig +++ b/src/link/Wasm/ZigObject.zig @@ -17,7 +17,7 @@ functions: std.ArrayListUnmanaged(std.wasm.Func) = .{}, /// List of indexes pointing to an entry within the `functions` list which has been removed. functions_free_list: std.ArrayListUnmanaged(u32) = .{}, /// Map of symbol locations, represented by its `types.Import`. -imports: std.AutoHashMapUnmanaged(u32, types.Import) = .{}, +imports: std.AutoHashMapUnmanaged(Symbol.Index, types.Import) = .{}, /// List of WebAssembly globals. globals: std.ArrayListUnmanaged(std.wasm.Global) = .{}, /// Mapping between an `Atom` and its type index representing the Wasm @@ -26,9 +26,9 @@ atom_types: std.AutoHashMapUnmanaged(Atom.Index, u32) = .{}, /// List of all symbols generated by Zig code. symbols: std.ArrayListUnmanaged(Symbol) = .{}, /// Map from symbol name offset to their index into the `symbols` list. -global_syms: std.AutoHashMapUnmanaged(u32, u32) = .{}, +global_syms: std.AutoHashMapUnmanaged(u32, Symbol.Index) = .{}, /// List of symbol indexes which are free to be used. -symbols_free_list: std.ArrayListUnmanaged(u32) = .{}, +symbols_free_list: std.ArrayListUnmanaged(Symbol.Index) = .{}, /// Extra metadata about the linking section, such as alignment of segments and their name. segment_info: std.ArrayListUnmanaged(types.Segment) = .{}, /// List of indexes which contain a free slot in the `segment_info` list. @@ -42,7 +42,7 @@ anon_decls: std.AutoArrayHashMapUnmanaged(InternPool.Index, Atom.Index) = .{}, /// During initializion, a symbol with corresponding atom will be created that is /// used to perform relocations to the pointer of this table. /// The actual table is populated during `flush`. -error_table_symbol: ?u32 = null, +error_table_symbol: Symbol.Index = .null, /// Amount of functions in the `import` sections. imported_functions_count: u32 = 0, /// Amount of globals in the `import` section. @@ -50,7 +50,7 @@ imported_globals_count: u32 = 0, /// Symbol index representing the stack pointer. This will be set upon initializion /// of a new `ZigObject`. Codegen will make calls into this to create relocations for /// this symbol each time the stack pointer is moved. -stack_pointer_sym: u32, +stack_pointer_sym: Symbol.Index, /// Debug information for the Zig module. dwarf: ?Dwarf = null, // Debug section atoms. These are only set when the current compilation @@ -83,10 +83,10 @@ debug_str_index: ?u32 = null, debug_abbrev_index: ?u32 = null, const DeclInfo = struct { - atom: Atom.Index = std.math.maxInt(Atom.Index), - exports: std.ArrayListUnmanaged(u32) = .{}, + atom: Atom.Index = .null, + exports: std.ArrayListUnmanaged(Symbol.Index) = .{}, - fn @"export"(di: DeclInfo, zig_object: *const ZigObject, name: []const u8) ?u32 { + fn @"export"(di: DeclInfo, zig_object: *const ZigObject, name: []const u8) ?Symbol.Index { for (di.exports.items) |sym_index| { const sym_name_index = zig_object.symbol(sym_index).name; const sym_name = zig_object.string_table.getAssumeExists(sym_name_index); @@ -97,11 +97,11 @@ const DeclInfo = struct { return null; } - fn appendExport(di: *DeclInfo, gpa: std.mem.Allocator, sym_index: u32) !void { + fn appendExport(di: *DeclInfo, gpa: std.mem.Allocator, sym_index: Symbol.Index) !void { return di.exports.append(gpa, sym_index); } - fn deleteExport(di: *DeclInfo, sym_index: u32) void { + fn deleteExport(di: *DeclInfo, sym_index: Symbol.Index) void { for (di.exports.items, 0..) |idx, index| { if (idx == sym_index) { _ = di.exports.swapRemove(index); @@ -138,8 +138,8 @@ fn createStackPointer(zig_object: *ZigObject, wasm_file: *Wasm) !void { zig_object.stack_pointer_sym = sym_index; } -fn symbol(zig_object: *const ZigObject, index: u32) *Symbol { - return &zig_object.symbols.items[index]; +fn symbol(zig_object: *const ZigObject, index: Symbol.Index) *Symbol { + return &zig_object.symbols.items[@intFromEnum(index)]; } /// Frees and invalidates all memory of the incrementally compiled Zig module. @@ -192,7 +192,7 @@ pub fn deinit(zig_object: *ZigObject, wasm_file: *Wasm) void { /// Allocates a new symbol and returns its index. /// Will re-use slots when a symbol was freed at an earlier stage. -pub fn allocateSymbol(zig_object: *ZigObject, gpa: std.mem.Allocator) !u32 { +pub fn allocateSymbol(zig_object: *ZigObject, gpa: std.mem.Allocator) !Symbol.Index { try zig_object.symbols.ensureUnusedCapacity(gpa, 1); const sym: Symbol = .{ .name = std.math.maxInt(u32), // will be set after updateDecl as well as during atom creation for decls @@ -202,10 +202,10 @@ pub fn allocateSymbol(zig_object: *ZigObject, gpa: std.mem.Allocator) !u32 { .virtual_address = std.math.maxInt(u32), // will be set during atom allocation }; if (zig_object.symbols_free_list.popOrNull()) |index| { - zig_object.symbols.items[index] = sym; + zig_object.symbols.items[@intFromEnum(index)] = sym; return index; } - const index = @as(u32, @intCast(zig_object.symbols.items.len)); + const index: Symbol.Index = @enumFromInt(zig_object.symbols.items.len); zig_object.symbols.appendAssumeCapacity(sym); return index; } @@ -247,7 +247,7 @@ pub fn updateDecl( .{ .ty = decl.ty, .val = val }, &code_writer, .none, - .{ .parent_atom_index = atom.sym_index }, + .{ .parent_atom_index = @intFromEnum(atom.sym_index) }, ); const code = switch (res) { @@ -464,7 +464,7 @@ pub fn lowerUnnamedConst(zig_object: *ZigObject, wasm_file: *Wasm, tv: TypedValu switch (try zig_object.lowerConst(wasm_file, name, tv, decl.srcLoc(mod))) { .ok => |atom_index| { try wasm_file.getAtomPtr(parent_atom_index).locals.append(gpa, atom_index); - return wasm_file.getAtom(atom_index).getSymbolIndex().?; + return @intFromEnum(wasm_file.getAtom(atom_index).sym_index); }, .fail => |em| { decl.analysis = .codegen_failure; @@ -494,7 +494,7 @@ fn lowerConst(zig_object: *ZigObject, wasm_file: *Wasm, name: []const u8, tv: Ty atom.alignment = tv.ty.abiAlignment(mod); const segment_name = try std.mem.concat(gpa, u8, &.{ ".rodata.", name }); errdefer gpa.free(segment_name); - zig_object.symbols.items[sym_index] = .{ + zig_object.symbol(sym_index).* = .{ .name = try zig_object.string_table.insert(gpa, name), .flags = @intFromEnum(Symbol.Flag.WASM_SYM_BINDING_LOCAL), .tag = .data, @@ -513,7 +513,7 @@ fn lowerConst(zig_object: *ZigObject, wasm_file: *Wasm, name: []const u8, tv: Ty &value_bytes, .none, .{ - .parent_atom_index = atom.sym_index, + .parent_atom_index = @intFromEnum(atom.sym_index), .addend = null, }, ); @@ -534,9 +534,9 @@ fn lowerConst(zig_object: *ZigObject, wasm_file: *Wasm, name: []const u8, tv: Ty /// Returns the symbol index of the error name table. /// /// When the symbol does not yet exist, it will create a new one instead. -pub fn getErrorTableSymbol(zig_object: *ZigObject, wasm_file: *Wasm) !u32 { - if (zig_object.error_table_symbol) |sym| { - return sym; +pub fn getErrorTableSymbol(zig_object: *ZigObject, wasm_file: *Wasm) !Symbol.Index { + if (zig_object.error_table_symbol != .null) { + return zig_object.error_table_symbol; } // no error was referenced yet, so create a new symbol and atom for it @@ -561,7 +561,7 @@ pub fn getErrorTableSymbol(zig_object: *ZigObject, wasm_file: *Wasm) !u32 { .virtual_address = undefined, }; - log.debug("Error name table was created with symbol index: ({d})", .{sym_index}); + log.debug("Error name table was created with symbol index: ({d})", .{@intFromEnum(sym_index)}); zig_object.error_table_symbol = sym_index; return sym_index; } @@ -571,9 +571,9 @@ pub fn getErrorTableSymbol(zig_object: *ZigObject, wasm_file: *Wasm) !u32 { /// This creates a table that consists of pointers and length to each error name. /// The table is what is being pointed to within the runtime bodies that are generated. fn populateErrorNameTable(zig_object: *ZigObject, wasm_file: *Wasm) !void { - const symbol_index = zig_object.error_table_symbol orelse return; + if (zig_object.error_table_symbol == .null) return; const gpa = wasm_file.base.comp.gpa; - const atom_index = wasm_file.symbol_atom.get(.{ .file = zig_object.index, .index = symbol_index }).?; + const atom_index = wasm_file.symbol_atom.get(.{ .file = zig_object.index, .index = zig_object.error_table_symbol }).?; // Rather than creating a symbol for each individual error name, // we create a symbol for the entire region of error names. We then calculate @@ -584,7 +584,7 @@ fn populateErrorNameTable(zig_object: *ZigObject, wasm_file: *Wasm) !void { names_atom.alignment = .@"1"; const sym_name = try zig_object.string_table.insert(gpa, "__zig_err_names"); const segment_name = try gpa.dupe(u8, ".rodata.__zig_err_names"); - const names_symbol = &zig_object.symbols.items[names_sym_index]; + const names_symbol = zig_object.symbol(names_sym_index); names_symbol.* = .{ .name = sym_name, .tag = .data, @@ -611,7 +611,7 @@ fn populateErrorNameTable(zig_object: *ZigObject, wasm_file: *Wasm) !void { try atom.code.writer(gpa).writeInt(u32, len - 1, .little); // create relocation to the error name try atom.relocs.append(gpa, .{ - .index = names_atom.sym_index, + .index = @intFromEnum(names_atom.sym_index), .relocation_type = .R_WASM_MEMORY_ADDR_I32, .offset = offset, .addend = @as(i32, @intCast(addend)), @@ -638,7 +638,7 @@ pub fn addOrUpdateImport( /// Name of the import name: []const u8, /// Symbol index that is external - symbol_index: u32, + symbol_index: Symbol.Index, /// Optional library name (i.e. `extern "c" fn foo() void` lib_name: ?[:0]const u8, /// The index of the type that represents the function signature @@ -647,7 +647,7 @@ pub fn addOrUpdateImport( type_index: ?u32, ) !void { const gpa = wasm_file.base.comp.gpa; - std.debug.assert(symbol_index != 0); + std.debug.assert(symbol_index != .null); // For the import name, we use the decl's name, rather than the fully qualified name // Also mangle the name when the lib name is set and not equal to "C" so imports with the same // name but different module can be resolved correctly. @@ -659,7 +659,7 @@ pub fn addOrUpdateImport( defer if (mangle_name) gpa.free(full_name); const decl_name_index = try zig_object.string_table.insert(gpa, full_name); - const sym: *Symbol = &zig_object.symbols.items[symbol_index]; + const sym: *Symbol = &zig_object.symbols.items[@intFromEnum(symbol_index)]; sym.setUndefined(true); sym.setGlobal(true); sym.name = decl_name_index; @@ -689,7 +689,7 @@ pub fn addOrUpdateImport( /// such as an exported or imported symbol. /// If the symbol does not yet exist, creates a new one symbol instead /// and then returns the index to it. -pub fn getGlobalSymbol(zig_object: *ZigObject, gpa: std.mem.Allocator, name: []const u8) !u32 { +pub fn getGlobalSymbol(zig_object: *ZigObject, gpa: std.mem.Allocator, name: []const u8) !Symbol.Index { const name_index = try zig_object.string_table.insert(gpa, name); const gop = try zig_object.global_syms.getOrPut(gpa, name_index); if (gop.found_existing) { @@ -707,12 +707,12 @@ pub fn getGlobalSymbol(zig_object: *ZigObject, gpa: std.mem.Allocator, name: []c sym.setUndefined(true); const sym_index = if (zig_object.symbols_free_list.popOrNull()) |index| index else blk: { - const index: u32 = @intCast(zig_object.symbols.items.len); + const index: Symbol.Index = @enumFromInt(zig_object.symbols.items.len); try zig_object.symbols.ensureUnusedCapacity(gpa, 1); zig_object.symbols.items.len += 1; break :blk index; }; - zig_object.symbols.items[sym_index] = sym; + zig_object.symbol(sym_index).* = sym; gop.value_ptr.* = sym_index; return sym_index; } @@ -731,10 +731,10 @@ pub fn getDeclVAddr( const decl = mod.declPtr(decl_index); const target_atom_index = try zig_object.getOrCreateAtomForDecl(wasm_file, decl_index); - const target_symbol_index = wasm_file.getAtom(target_atom_index).sym_index; + const target_symbol_index = @intFromEnum(wasm_file.getAtom(target_atom_index).sym_index); std.debug.assert(reloc_info.parent_atom_index != 0); - const atom_index = wasm_file.symbol_atom.get(.{ .file = zig_object.index, .index = reloc_info.parent_atom_index }).?; + const atom_index = wasm_file.symbol_atom.get(.{ .file = zig_object.index, .index = @enumFromInt(reloc_info.parent_atom_index) }).?; const atom = wasm_file.getAtomPtr(atom_index); const is_wasm32 = target.cpu.arch == .wasm32; if (decl.ty.zigTypeTag(mod) == .Fn) { @@ -769,9 +769,9 @@ pub fn getAnonDeclVAddr( const gpa = wasm_file.base.comp.gpa; const target = wasm_file.base.comp.root_mod.resolved_target.result; const atom_index = zig_object.anon_decls.get(decl_val).?; - const target_symbol_index = wasm_file.getAtom(atom_index).getSymbolIndex().?; + const target_symbol_index = @intFromEnum(wasm_file.getAtom(atom_index).sym_index); - const parent_atom_index = wasm_file.symbol_atom.get(.{ .file = zig_object.index, .index = reloc_info.parent_atom_index }).?; + const parent_atom_index = wasm_file.symbol_atom.get(.{ .file = zig_object.index, .index = @enumFromInt(reloc_info.parent_atom_index) }).?; const parent_atom = wasm_file.getAtomPtr(parent_atom_index); const is_wasm32 = target.cpu.arch == .wasm32; const mod = wasm_file.base.comp.module.?; @@ -930,17 +930,7 @@ pub fn freeDecl(zig_object: *ZigObject, wasm_file: *Wasm, decl_index: InternPool // dwarf.freeDecl(decl_index); // } - if (atom.next) |next_atom_index| { - const next_atom = wasm_file.getAtomPtr(next_atom_index); - next_atom.prev = atom.prev; - atom.next = null; - } - if (atom.prev) |prev_index| { - const prev_atom = wasm_file.getAtomPtr(prev_index); - prev_atom.next = atom.next; - atom.prev = null; - } - + atom.prev = null; sym.tag = .dead; if (sym.isGlobal()) { std.debug.assert(zig_object.global_syms.remove(atom.sym_index)); @@ -998,7 +988,7 @@ fn setupErrorsLen(zig_object: *ZigObject, wasm_file: *Wasm) !void { // if not, allcoate a new atom. const atom_index = if (wasm_file.symbol_atom.get(.{ .file = zig_object.index, .index = sym_index })) |index| blk: { const atom = wasm_file.getAtomPtr(index); - atom.prev = null; + atom.prev = .null; atom.deinit(gpa); break :blk index; } else idx: { @@ -1022,7 +1012,7 @@ fn setupErrorsLen(zig_object: *ZigObject, wasm_file: *Wasm) !void { try atom.code.writer(gpa).writeInt(u16, @intCast(errors_len), .little); } -fn findGlobalSymbol(zig_object: *ZigObject, name: []const u8) ?u32 { +fn findGlobalSymbol(zig_object: *ZigObject, name: []const u8) ?Symbol.Index { const offset = zig_object.string_table.getOffset(name) orelse return null; return zig_object.global_syms.get(offset); } @@ -1121,7 +1111,7 @@ pub fn storeDeclType(zig_object: *ZigObject, gpa: std.mem.Allocator, decl_index: /// The symbols in ZigObject are already represented by an atom as we need to store its data. /// So rather than creating a new Atom and returning its index, we use this oppertunity to scan /// its relocations and create any GOT symbols or function table indexes it may require. -pub fn parseSymbolIntoAtom(zig_object: *ZigObject, wasm_file: *Wasm, index: u32) !Atom.Index { +pub fn parseSymbolIntoAtom(zig_object: *ZigObject, wasm_file: *Wasm, index: Symbol.Index) !Atom.Index { const gpa = wasm_file.base.comp.gpa; const loc: Wasm.SymbolLoc = .{ .file = zig_object.index, .index = index }; const atom_index = wasm_file.symbol_atom.get(loc).?; @@ -1129,6 +1119,7 @@ pub fn parseSymbolIntoAtom(zig_object: *ZigObject, wasm_file: *Wasm, index: u32) try wasm_file.appendAtomAtIndex(final_index, atom_index); const atom = wasm_file.getAtom(atom_index); for (atom.relocs.items) |reloc| { + const reloc_index: Symbol.Index = @enumFromInt(reloc.index); switch (reloc.relocation_type) { .R_WASM_TABLE_INDEX_I32, .R_WASM_TABLE_INDEX_I64, @@ -1137,17 +1128,17 @@ pub fn parseSymbolIntoAtom(zig_object: *ZigObject, wasm_file: *Wasm, index: u32) => { try wasm_file.function_table.put(gpa, .{ .file = zig_object.index, - .index = reloc.index, + .index = reloc_index, }, 0); }, .R_WASM_GLOBAL_INDEX_I32, .R_WASM_GLOBAL_INDEX_LEB, => { - const sym = zig_object.symbol(reloc.index); + const sym = zig_object.symbol(reloc_index); if (sym.tag != .global) { try wasm_file.got_symbols.append(gpa, .{ .file = zig_object.index, - .index = reloc.index, + .index = reloc_index, }); } }, @@ -1166,10 +1157,10 @@ pub fn createFunction( func_ty: std.wasm.Type, function_body: *std.ArrayList(u8), relocations: *std.ArrayList(types.Relocation), -) !u32 { +) !Symbol.Index { const gpa = wasm_file.base.comp.gpa; const sym_index = try zig_object.allocateSymbol(gpa); - const sym = &zig_object.symbols.items[sym_index]; + const sym = zig_object.symbol(sym_index); sym.tag = .function; sym.name = try zig_object.string_table.insert(gpa, symbol_name); const type_index = try zig_object.putOrGetFuncType(gpa, func_ty); diff --git a/src/link/Wasm/file.zig b/src/link/Wasm/file.zig index 1bb9805d83..e0ff121322 100644 --- a/src/link/Wasm/file.zig +++ b/src/link/Wasm/file.zig @@ -20,10 +20,10 @@ pub const File = union(enum) { }; } - pub fn symbol(file: File, index: u32) *Symbol { + pub fn symbol(file: File, index: Symbol.Index) *Symbol { return switch (file) { - .zig_object => |obj| &obj.symbols.items[index], - .object => |obj| &obj.symtable[index], + .zig_object => |obj| &obj.symbols.items[@intFromEnum(index)], + .object => |obj| &obj.symtable[@intFromEnum(index)], }; } @@ -34,20 +34,20 @@ pub const File = union(enum) { }; } - pub fn symbolName(file: File, index: u32) []const u8 { + pub fn symbolName(file: File, index: Symbol.Index) []const u8 { switch (file) { .zig_object => |obj| { - const sym = obj.symbols.items[index]; + const sym = obj.symbols.items[@intFromEnum(index)]; return obj.string_table.get(sym.name).?; }, .object => |obj| { - const sym = obj.symtable[index]; + const sym = obj.symtable[@intFromEnum(index)]; return obj.string_table.get(sym.name); }, } } - pub fn parseSymbolIntoAtom(file: File, wasm_file: *Wasm, index: u32) !AtomIndex { + pub fn parseSymbolIntoAtom(file: File, wasm_file: *Wasm, index: Symbol.Index) !AtomIndex { return switch (file) { inline else => |obj| obj.parseSymbolIntoAtom(wasm_file, index), }; @@ -55,10 +55,10 @@ pub const File = union(enum) { /// For a given symbol index, find its corresponding import. /// Asserts import exists. - pub fn import(file: File, symbol_index: u32) types.Import { + pub fn import(file: File, symbol_index: Symbol.Index) types.Import { return switch (file) { .zig_object => |obj| obj.imports.get(symbol_index).?, - .object => |obj| obj.findImport(obj.symtable[symbol_index]), + .object => |obj| obj.findImport(obj.symtable[@intFromEnum(symbol_index)]), }; } @@ -89,14 +89,14 @@ pub const File = union(enum) { }; } - pub fn function(file: File, sym_index: u32) std.wasm.Func { + pub fn function(file: File, sym_index: Symbol.Index) std.wasm.Func { switch (file) { .zig_object => |obj| { - const sym = obj.symbols.items[sym_index]; + const sym = obj.symbols.items[@intFromEnum(sym_index)]; return obj.functions.items[sym.index]; }, .object => |obj| { - const sym = obj.symtable[sym_index]; + const sym = obj.symtable[@intFromEnum(sym_index)]; return obj.functions[sym.index - obj.imported_functions_count]; }, } From 5ba5a2c133be5e06083f088aa875ff18658fbf8c Mon Sep 17 00:00:00 2001 From: Luuk de Gram Date: Mon, 12 Feb 2024 17:12:53 +0100 Subject: [PATCH 19/21] wasm: integrate linker errors with `Compilation` Rather than using the logger, we now emit proper 'compiler'-errors just like the ELF and MachO linkers with notes. We now also support emitting multiple errors before quiting the linking process in certain phases, such as symbol resolution. This means we will print all symbols which were resolved incorrectly, rather than the first one we encounter. --- src/link/Wasm.zig | 213 ++++++++++++++++++++++++------------ src/link/Wasm/Archive.zig | 46 ++++---- src/link/Wasm/Object.zig | 39 ++++--- src/link/Wasm/ZigObject.zig | 2 +- 4 files changed, 194 insertions(+), 106 deletions(-) diff --git a/src/link/Wasm.zig b/src/link/Wasm.zig index ce5f451a6f..db0e275810 100644 --- a/src/link/Wasm.zig +++ b/src/link/Wasm.zig @@ -655,9 +655,14 @@ fn parseObjectFile(wasm: *Wasm, path: []const u8) !bool { errdefer obj_file.close(); const gpa = wasm.base.comp.gpa; - var object = Object.create(gpa, obj_file, path, null) catch |err| switch (err) { + var object = Object.create(wasm, obj_file, path, null) catch |err| switch (err) { error.InvalidMagicByte, error.NotObjectFile => return false, - else => |e| return e, + else => |e| { + var err_note = try wasm.addErrorWithNotes(1); + try err_note.addMsg(wasm, "Failed parsing object file: {s}", .{@errorName(e)}); + try err_note.addNote(wasm, "while parsing '{s}'", .{path}); + return error.FlushFailure; + }, }; errdefer object.deinit(gpa); object.index = @enumFromInt(wasm.files.len); @@ -708,7 +713,12 @@ fn parseArchive(wasm: *Wasm, path: []const u8, force_load: bool) !bool { archive.deinit(gpa); return false; }, - else => |e| return e, + else => |e| { + var err_note = try wasm.addErrorWithNotes(1); + try err_note.addMsg(wasm, "Failed parsing archive: {s}", .{@errorName(e)}); + try err_note.addNote(wasm, "while parsing archive {s}", .{path}); + return error.FlushFailure; + }, }; if (!force_load) { @@ -730,7 +740,12 @@ fn parseArchive(wasm: *Wasm, path: []const u8, force_load: bool) !bool { } for (offsets.keys()) |file_offset| { - var object = try archive.parseObject(gpa, file_offset); + var object = archive.parseObject(wasm, file_offset) catch |e| { + var err_note = try wasm.addErrorWithNotes(1); + try err_note.addMsg(wasm, "Failed parsing object: {s}", .{@errorName(e)}); + try err_note.addNote(wasm, "while parsing object in archive {s}", .{path}); + return error.FlushFailure; + }; object.index = @enumFromInt(wasm.files.len); try wasm.files.append(gpa, .{ .object = object }); try wasm.objects.append(gpa, object.index); @@ -764,9 +779,9 @@ fn resolveSymbolsInObject(wasm: *Wasm, file_index: File.Index) !void { if (symbol.isLocal()) { if (symbol.isUndefined()) { - log.err("Local symbols are not allowed to reference imports", .{}); - log.err(" symbol '{s}' defined in '{s}'", .{ sym_name, obj_file.path() }); - return error.UndefinedLocal; + var err = try wasm.addErrorWithNotes(1); + try err.addMsg(wasm, "Local symbols are not allowed to reference imports", .{}); + try err.addNote(wasm, "symbol '{s}' defined in '{s}'", .{ sym_name, obj_file.path() }); } try wasm.resolved_symbols.putNoClobber(gpa, location, {}); continue; @@ -801,10 +816,10 @@ fn resolveSymbolsInObject(wasm: *Wasm, file_index: File.Index) !void { break :outer; // existing is weak, while new one isn't. Replace it. } // both are defined and weak, we have a symbol collision. - log.err("symbol '{s}' defined multiple times", .{sym_name}); - log.err(" first definition in '{s}'", .{existing_file_path}); - log.err(" next definition in '{s}'", .{obj_file.path()}); - return error.SymbolCollision; + var err = try wasm.addErrorWithNotes(2); + try err.addMsg(wasm, "symbol '{s}' defined multiple times", .{sym_name}); + try err.addNote(wasm, "first definition in '{s}'", .{existing_file_path}); + try err.addNote(wasm, "next definition in '{s}'", .{obj_file.path()}); } try wasm.discarded.put(gpa, location, existing_loc); @@ -812,10 +827,10 @@ fn resolveSymbolsInObject(wasm: *Wasm, file_index: File.Index) !void { } if (symbol.tag != existing_sym.tag) { - log.err("symbol '{s}' mismatching types '{s}' and '{s}'", .{ sym_name, @tagName(symbol.tag), @tagName(existing_sym.tag) }); - log.err(" first definition in '{s}'", .{existing_file_path}); - log.err(" next definition in '{s}'", .{obj_file.path()}); - return error.SymbolMismatchingType; + var err = try wasm.addErrorWithNotes(2); + try err.addMsg(wasm, "symbol '{s}' mismatching types '{s}' and '{s}'", .{ sym_name, @tagName(symbol.tag), @tagName(existing_sym.tag) }); + try err.addNote(wasm, "first definition in '{s}'", .{existing_file_path}); + try err.addNote(wasm, "next definition in '{s}'", .{obj_file.path()}); } if (existing_sym.isUndefined() and symbol.isUndefined()) { @@ -832,14 +847,14 @@ fn resolveSymbolsInObject(wasm: *Wasm, file_index: File.Index) !void { const imp = obj_file.import(sym_index); const module_name = obj_file.string(imp.module_name); if (!mem.eql(u8, existing_name, module_name)) { - log.err("symbol '{s}' module name mismatch. Expected '{s}', but found '{s}'", .{ + var err = try wasm.addErrorWithNotes(2); + try err.addMsg(wasm, "symbol '{s}' module name mismatch. Expected '{s}', but found '{s}'", .{ sym_name, existing_name, module_name, }); - log.err(" first definition in '{s}'", .{existing_file_path}); - log.err(" next definition in '{s}'", .{obj_file.path()}); - return error.ModuleNameMismatch; + try err.addNote(wasm, "first definition in '{s}'", .{existing_file_path}); + try err.addNote(wasm, "next definition in '{s}'", .{obj_file.path()}); } } @@ -852,10 +867,10 @@ fn resolveSymbolsInObject(wasm: *Wasm, file_index: File.Index) !void { const existing_ty = wasm.getGlobalType(existing_loc); const new_ty = wasm.getGlobalType(location); if (existing_ty.mutable != new_ty.mutable or existing_ty.valtype != new_ty.valtype) { - log.err("symbol '{s}' mismatching global types", .{sym_name}); - log.err(" first definition in '{s}'", .{existing_file_path}); - log.err(" next definition in '{s}'", .{obj_file.path()}); - return error.GlobalTypeMismatch; + var err = try wasm.addErrorWithNotes(2); + try err.addMsg(wasm, "symbol '{s}' mismatching global types", .{sym_name}); + try err.addNote(wasm, "first definition in '{s}'", .{existing_file_path}); + try err.addNote(wasm, "next definition in '{s}'", .{obj_file.path()}); } } @@ -863,11 +878,11 @@ fn resolveSymbolsInObject(wasm: *Wasm, file_index: File.Index) !void { const existing_ty = wasm.getFunctionSignature(existing_loc); const new_ty = wasm.getFunctionSignature(location); if (!existing_ty.eql(new_ty)) { - log.err("symbol '{s}' mismatching function signatures.", .{sym_name}); - log.err(" expected signature {}, but found signature {}", .{ existing_ty, new_ty }); - log.err(" first definition in '{s}'", .{existing_file_path}); - log.err(" next definition in '{s}'", .{obj_file.path()}); - return error.FunctionSignatureMismatch; + var err = try wasm.addErrorWithNotes(3); + try err.addMsg(wasm, "symbol '{s}' mismatching function signatures.", .{sym_name}); + try err.addNote(wasm, "expected signature {}, but found signature {}", .{ existing_ty, new_ty }); + try err.addNote(wasm, "first definition in '{s}'", .{existing_file_path}); + try err.addNote(wasm, "next definition in '{s}'", .{obj_file.path()}); } } @@ -914,7 +929,12 @@ fn resolveSymbolsInArchives(wasm: *Wasm) !void { // Symbol is found in unparsed object file within current archive. // Parse object and and resolve symbols again before we check remaining // undefined symbols. - var object = try archive.parseObject(gpa, offset.items[0]); + var object = archive.parseObject(wasm, offset.items[0]) catch |e| { + var err_note = try wasm.addErrorWithNotes(1); + try err_note.addMsg(wasm, "Failed parsing object: {s}", .{@errorName(e)}); + try err_note.addNote(wasm, "while parsing object in archive {s}", .{archive.name}); + return error.FlushFailure; + }; object.index = @enumFromInt(wasm.files.len); try wasm.files.append(gpa, .{ .object = object }); try wasm.objects.append(gpa, object.index); @@ -1214,20 +1234,21 @@ fn validateFeatures( allowed[used_index] = is_enabled; emit_features_count.* += @intFromBool(is_enabled); } else if (is_enabled and !allowed[used_index]) { - log.err("feature '{}' not allowed, but used by linked object", .{@as(types.Feature.Tag, @enumFromInt(used_index))}); - log.err(" defined in '{s}'", .{wasm.files.items(.data)[used_set >> 1].object.path}); + var err = try wasm.addErrorWithNotes(1); + try err.addMsg(wasm, "feature '{}' not allowed, but used by linked object", .{@as(types.Feature.Tag, @enumFromInt(used_index))}); + try err.addNote(wasm, "defined in '{s}'", .{wasm.files.items(.data)[used_set >> 1].object.path}); valid_feature_set = false; } } if (!valid_feature_set) { - return error.InvalidFeatureSet; + return error.FlushFailure; } if (shared_memory) { const disallowed_feature = disallowed[@intFromEnum(types.Feature.Tag.shared_mem)]; if (@as(u1, @truncate(disallowed_feature)) != 0) { - log.err( + try wasm.addErrorWithoutNotes( "shared-memory is disallowed by '{s}' because it wasn't compiled with 'atomics' and 'bulk-memory' features enabled", .{wasm.files.items(.data)[disallowed_feature >> 1].object.path}, ); @@ -1236,7 +1257,7 @@ fn validateFeatures( for ([_]types.Feature.Tag{ .atomics, .bulk_memory }) |feature| { if (!allowed[@intFromEnum(feature)]) { - log.err("feature '{}' is not used but is required for shared-memory", .{feature}); + try wasm.addErrorWithoutNotes("feature '{}' is not used but is required for shared-memory", .{feature}); } } } @@ -1244,7 +1265,7 @@ fn validateFeatures( if (has_tls) { for ([_]types.Feature.Tag{ .atomics, .bulk_memory }) |feature| { if (!allowed[@intFromEnum(feature)]) { - log.err("feature '{}' is not used but is required for thread-local storage", .{feature}); + try wasm.addErrorWithoutNotes("feature '{}' is not used but is required for thread-local storage", .{feature}); } } } @@ -1257,9 +1278,10 @@ fn validateFeatures( // from here a feature is always used const disallowed_feature = disallowed[@intFromEnum(feature.tag)]; if (@as(u1, @truncate(disallowed_feature)) != 0) { - log.err("feature '{}' is disallowed, but used by linked object", .{feature.tag}); - log.err(" disallowed by '{s}'", .{wasm.files.items(.data)[disallowed_feature >> 1].object.path}); - log.err(" used in '{s}'", .{object.path}); + var err = try wasm.addErrorWithNotes(2); + try err.addMsg(wasm, "feature '{}' is disallowed, but used by linked object", .{feature.tag}); + try err.addNote(wasm, "disallowed by '{s}'", .{wasm.files.items(.data)[disallowed_feature >> 1].object.path}); + try err.addNote(wasm, "used in '{s}'", .{object.path}); valid_feature_set = false; } @@ -1270,16 +1292,17 @@ fn validateFeatures( for (required, 0..) |required_feature, feature_index| { const is_required = @as(u1, @truncate(required_feature)) != 0; if (is_required and !object_used_features[feature_index]) { - log.err("feature '{}' is required but not used in linked object", .{@as(types.Feature.Tag, @enumFromInt(feature_index))}); - log.err(" required by '{s}'", .{wasm.files.items(.data)[required_feature >> 1].object.path}); - log.err(" missing in '{s}'", .{object.path}); + var err = try wasm.addErrorWithNotes(2); + try err.addMsg(wasm, "feature '{}' is required but not used in linked object", .{@as(types.Feature.Tag, @enumFromInt(feature_index))}); + try err.addNote(wasm, "required by '{s}'", .{wasm.files.items(.data)[required_feature >> 1].object.path}); + try err.addNote(wasm, "missing in '{s}'", .{object.path}); valid_feature_set = false; } } } if (!valid_feature_set) { - return error.InvalidFeatureSet; + return error.FlushFailure; } to_emit.* = allowed; @@ -1350,12 +1373,13 @@ fn checkUndefinedSymbols(wasm: *const Wasm) !void { else wasm.name; const symbol_name = undef.getName(wasm); - log.err("could not resolve undefined symbol '{s}'", .{symbol_name}); - log.err(" defined in '{s}'", .{file_name}); + var err = try wasm.addErrorWithNotes(1); + try err.addMsg(wasm, "could not resolve undefined symbol '{s}'", .{symbol_name}); + try err.addNote(wasm, "defined in '{s}'", .{file_name}); } } if (found_undefined_symbols) { - return error.UndefinedSymbol; + return error.FlushFailure; } } @@ -1728,8 +1752,7 @@ fn setupInitFunctions(wasm: *Wasm) !void { break :ty object.func_types[func.type_index]; }; if (ty.params.len != 0) { - log.err("constructor functions cannot take arguments: '{s}'", .{object.string_table.get(symbol.name)}); - return error.InvalidInitFunc; + try wasm.addErrorWithoutNotes("constructor functions cannot take arguments: '{s}'", .{object.string_table.get(symbol.name)}); } log.debug("appended init func '{s}'\n", .{object.string_table.get(symbol.name)}); wasm.init_funcs.appendAssumeCapacity(.{ @@ -2108,7 +2131,7 @@ fn setupExports(wasm: *Wasm) !void { for (force_exp_names) |exp_name| { const loc = wasm.findGlobalSymbol(exp_name) orelse { - log.err("could not export '{s}', symbol not found", .{exp_name}); + try wasm.addErrorWithoutNotes("could not export '{s}', symbol not found", .{exp_name}); failed_exports = true; continue; }; @@ -2118,7 +2141,7 @@ fn setupExports(wasm: *Wasm) !void { } if (failed_exports) { - return error.MissingSymbol; + return error.FlushFailure; } } @@ -2164,14 +2187,14 @@ fn setupStart(wasm: *Wasm) !void { const entry_name = wasm.entry_name orelse return; const symbol_loc = wasm.findGlobalSymbol(entry_name) orelse { - log.err("Entry symbol '{s}' missing, use '-fno-entry' to suppress", .{entry_name}); - return error.MissingSymbol; + try wasm.addErrorWithoutNotes("Entry symbol '{s}' missing, use '-fno-entry' to suppress", .{entry_name}); + return error.FlushFailure; }; const symbol = symbol_loc.getSymbol(wasm); if (symbol.tag != .function) { - log.err("Entry symbol '{s}' is not a function", .{entry_name}); - return error.InvalidEntryKind; + try wasm.addErrorWithoutNotes("Entry symbol '{s}' is not a function", .{entry_name}); + return error.FlushFailure; } // Ensure the symbol is exported so host environment can access it @@ -2274,16 +2297,13 @@ fn setupMemory(wasm: *Wasm) !void { if (wasm.initial_memory) |initial_memory| { if (!std.mem.isAlignedGeneric(u64, initial_memory, page_size)) { - log.err("Initial memory must be {d}-byte aligned", .{page_size}); - return error.MissAlignment; + try wasm.addErrorWithoutNotes("Initial memory must be {d}-byte aligned", .{page_size}); } if (memory_ptr > initial_memory) { - log.err("Initial memory too small, must be at least {d} bytes", .{memory_ptr}); - return error.MemoryTooSmall; + try wasm.addErrorWithoutNotes("Initial memory too small, must be at least {d} bytes", .{memory_ptr}); } if (initial_memory > max_memory_allowed) { - log.err("Initial memory exceeds maximum memory {d}", .{max_memory_allowed}); - return error.MemoryTooBig; + try wasm.addErrorWithoutNotes("Initial memory exceeds maximum memory {d}", .{max_memory_allowed}); } memory_ptr = initial_memory; } @@ -2300,16 +2320,13 @@ fn setupMemory(wasm: *Wasm) !void { if (wasm.max_memory) |max_memory| { if (!std.mem.isAlignedGeneric(u64, max_memory, page_size)) { - log.err("Maximum memory must be {d}-byte aligned", .{page_size}); - return error.MissAlignment; + try wasm.addErrorWithoutNotes("Maximum memory must be {d}-byte aligned", .{page_size}); } if (memory_ptr > max_memory) { - log.err("Maxmimum memory too small, must be at least {d} bytes", .{memory_ptr}); - return error.MemoryTooSmall; + try wasm.addErrorWithoutNotes("Maxmimum memory too small, must be at least {d} bytes", .{memory_ptr}); } if (max_memory > max_memory_allowed) { - log.err("Maximum memory exceeds maxmium amount {d}", .{max_memory_allowed}); - return error.MemoryTooBig; + try wasm.addErrorWithoutNotes("Maximum memory exceeds maxmium amount {d}", .{max_memory_allowed}); } wasm.memories.limits.max = @as(u32, @intCast(max_memory / page_size)); wasm.memories.limits.setFlag(.WASM_LIMITS_FLAG_HAS_MAX); @@ -2412,7 +2429,9 @@ pub fn getMatchingSegment(wasm: *Wasm, file_index: File.Index, symbol_index: Sym break :blk index; }; } else { - log.err("found unknown section '{s}'", .{section_name}); + var err = try wasm.addErrorWithNotes(1); + try err.addMsg(wasm, "found unknown section '{s}'", .{section_name}); + try err.addNote(wasm, "defined in '{s}'", .{obj_file.path()}); return error.UnexpectedValue; } }, @@ -2529,18 +2548,22 @@ pub fn flushModule(wasm: *Wasm, arena: Allocator, prog_node: *std.Progress.Node) if (wasm.zig_object_index != .null) { try wasm.resolveSymbolsInObject(wasm.zig_object_index); } + if (comp.link_errors.items.len > 0) return error.FlushFailure; for (wasm.objects.items) |object_index| { try wasm.resolveSymbolsInObject(object_index); } + if (comp.link_errors.items.len > 0) return error.FlushFailure; var emit_features_count: u32 = 0; var enabled_features: [@typeInfo(types.Feature.Tag).Enum.fields.len]bool = undefined; try wasm.validateFeatures(&enabled_features, &emit_features_count); try wasm.resolveSymbolsInArchives(); + if (comp.link_errors.items.len > 0) return error.FlushFailure; try wasm.resolveLazySymbols(); try wasm.checkUndefinedSymbols(); try wasm.setupInitFunctions(); + if (comp.link_errors.items.len > 0) return error.FlushFailure; try wasm.setupStart(); try wasm.markReferences(); @@ -2549,6 +2572,7 @@ pub fn flushModule(wasm: *Wasm, arena: Allocator, prog_node: *std.Progress.Node) try wasm.mergeTypes(); try wasm.allocateAtoms(); try wasm.setupMemory(); + if (comp.link_errors.items.len > 0) return error.FlushFailure; wasm.allocateVirtualAddresses(); wasm.mapFunctionTable(); try wasm.initializeCallCtorsFunction(); @@ -2558,6 +2582,7 @@ pub fn flushModule(wasm: *Wasm, arena: Allocator, prog_node: *std.Progress.Node) try wasm.setupStartSection(); try wasm.setupExports(); try wasm.writeToFile(enabled_features, emit_features_count, arena); + if (comp.link_errors.items.len > 0) return error.FlushFailure; } /// Writes the WebAssembly in-memory module to the file @@ -2955,7 +2980,7 @@ fn writeToFile( }) catch unreachable; try emitBuildIdSection(&binary_bytes, str); }, - else => |mode| log.err("build-id '{s}' is not supported for WASM", .{@tagName(mode)}), + else => |mode| try wasm.addErrorWithoutNotes("build-id '{s}' is not supported for WebAssembly", .{@tagName(mode)}), } var debug_bytes = std.ArrayList(u8).init(gpa); @@ -4043,3 +4068,57 @@ fn defaultEntrySymbolName(wasi_exec_model: std.builtin.WasiExecModel) []const u8 .command => "_start", }; } + +const ErrorWithNotes = struct { + /// Allocated index in comp.link_errors array. + index: usize, + + /// Next available note slot. + note_slot: usize = 0, + + pub fn addMsg( + err: ErrorWithNotes, + wasm_file: *const Wasm, + comptime format: []const u8, + args: anytype, + ) error{OutOfMemory}!void { + const comp = wasm_file.base.comp; + const gpa = comp.gpa; + const err_msg = &comp.link_errors.items[err.index]; + err_msg.msg = try std.fmt.allocPrint(gpa, format, args); + } + + pub fn addNote( + err: *ErrorWithNotes, + wasm_file: *const Wasm, + comptime format: []const u8, + args: anytype, + ) error{OutOfMemory}!void { + const comp = wasm_file.base.comp; + const gpa = comp.gpa; + const err_msg = &comp.link_errors.items[err.index]; + err_msg.notes[err.note_slot] = .{ .msg = try std.fmt.allocPrint(gpa, format, args) }; + err.note_slot += 1; + } +}; + +pub fn addErrorWithNotes(wasm: *const Wasm, note_count: usize) error{OutOfMemory}!ErrorWithNotes { + const comp = wasm.base.comp; + const gpa = comp.gpa; + try comp.link_errors.ensureUnusedCapacity(gpa, 1); + return wasm.addErrorWithNotesAssumeCapacity(note_count); +} + +pub fn addErrorWithoutNotes(wasm: *const Wasm, comptime fmt: []const u8, args: anytype) !void { + const err = try wasm.addErrorWithNotes(0); + try err.addMsg(wasm, fmt, args); +} + +fn addErrorWithNotesAssumeCapacity(wasm: *const Wasm, note_count: usize) error{OutOfMemory}!ErrorWithNotes { + const comp = wasm.base.comp; + const gpa = comp.gpa; + const index = comp.link_errors.items.len; + const err = comp.link_errors.addOneAssumeCapacity(); + err.* = .{ .msg = undefined, .notes = try gpa.alloc(link.File.ErrorMsg, note_count) }; + return .{ .index = index }; +} diff --git a/src/link/Wasm/Archive.zig b/src/link/Wasm/Archive.zig index a618aaebbf..028ef95726 100644 --- a/src/link/Wasm/Archive.zig +++ b/src/link/Wasm/Archive.zig @@ -1,14 +1,3 @@ -const Archive = @This(); - -const std = @import("std"); -const assert = std.debug.assert; -const fs = std.fs; -const log = std.log.scoped(.archive); -const mem = std.mem; - -const Allocator = mem.Allocator; -const Object = @import("Object.zig"); - file: fs.File, name: []const u8, @@ -151,10 +140,7 @@ fn parseTableOfContents(archive: *Archive, allocator: Allocator, reader: anytype const sym_tab = try allocator.alloc(u8, sym_tab_size - 4 - (4 * num_symbols)); defer allocator.free(sym_tab); - reader.readNoEof(sym_tab) catch { - log.err("incomplete symbol table: expected symbol table of length 0x{x}", .{sym_tab.len}); - return error.MalformedArchive; - }; + reader.readNoEof(sym_tab) catch return error.IncompleteSymbolTable; var i: usize = 0; var pos: usize = 0; @@ -178,12 +164,10 @@ fn parseTableOfContents(archive: *Archive, allocator: Allocator, reader: anytype fn parseNameTable(archive: *Archive, allocator: Allocator, reader: anytype) !void { const header: ar_hdr = try reader.readStruct(ar_hdr); if (!mem.eql(u8, &header.ar_fmag, ARFMAG)) { - log.err("invalid header delimiter: expected '{s}', found '{s}'", .{ ARFMAG, header.ar_fmag }); - return error.MalformedArchive; + return error.InvalidHeaderDelimiter; } if (!mem.eql(u8, header.ar_name[0..2], "//")) { - log.err("invalid archive. Long name table missing", .{}); - return error.MalformedArchive; + return error.MissingTableName; } const table_size = try header.size(); const long_file_names = try allocator.alloc(u8, table_size); @@ -194,7 +178,8 @@ fn parseNameTable(archive: *Archive, allocator: Allocator, reader: anytype) !voi /// From a given file offset, starts reading for a file header. /// When found, parses the object file into an `Object` and returns it. -pub fn parseObject(archive: Archive, allocator: Allocator, file_offset: u32) !Object { +pub fn parseObject(archive: Archive, wasm_file: *const Wasm, file_offset: u32) !Object { + const gpa = wasm_file.base.comp.gpa; try archive.file.seekTo(file_offset); const reader = archive.file.reader(); const header = try reader.readStruct(ar_hdr); @@ -202,22 +187,33 @@ pub fn parseObject(archive: Archive, allocator: Allocator, file_offset: u32) !Ob try archive.file.seekTo(0); if (!mem.eql(u8, &header.ar_fmag, ARFMAG)) { - log.err("invalid header delimiter: expected '{s}', found '{s}'", .{ ARFMAG, header.ar_fmag }); - return error.MalformedArchive; + return error.InvalidHeaderDelimiter; } const object_name = try archive.parseName(header); const name = name: { var buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined; const path = try std.os.realpath(archive.name, &buffer); - break :name try std.fmt.allocPrint(allocator, "{s}({s})", .{ path, object_name }); + break :name try std.fmt.allocPrint(gpa, "{s}({s})", .{ path, object_name }); }; - defer allocator.free(name); + defer gpa.free(name); const object_file = try std.fs.cwd().openFile(archive.name, .{}); errdefer object_file.close(); const object_file_size = try header.size(); try object_file.seekTo(current_offset); - return Object.create(allocator, object_file, name, object_file_size); + return Object.create(wasm_file, object_file, name, object_file_size); } + +const std = @import("std"); +const assert = std.debug.assert; +const fs = std.fs; +const log = std.log.scoped(.archive); +const mem = std.mem; + +const Allocator = mem.Allocator; +const Object = @import("Object.zig"); +const Wasm = @import("../Wasm.zig"); + +const Archive = @This(); diff --git a/src/link/Wasm/Object.zig b/src/link/Wasm/Object.zig index 297e71991d..ca9f746d51 100644 --- a/src/link/Wasm/Object.zig +++ b/src/link/Wasm/Object.zig @@ -127,7 +127,8 @@ pub const InitError = error{NotObjectFile} || ParseError || std.fs.File.ReadErro /// This also parses and verifies the object file. /// When a max size is given, will only parse up to the given size, /// else will read until the end of the file. -pub fn create(gpa: Allocator, file: std.fs.File, name: []const u8, maybe_max_size: ?usize) InitError!Object { +pub fn create(wasm_file: *const Wasm, file: std.fs.File, name: []const u8, maybe_max_size: ?usize) InitError!Object { + const gpa = wasm_file.base.comp.gpa; var object: Object = .{ .file = file, .path = try gpa.dupe(u8, name), @@ -151,7 +152,7 @@ pub fn create(gpa: Allocator, file: std.fs.File, name: []const u8, maybe_max_siz } var fbs = std.io.fixedBufferStream(file_contents); - try object.parse(gpa, fbs.reader(), &is_object_file); + try object.parse(gpa, wasm_file, fbs.reader(), &is_object_file); errdefer object.deinit(gpa); if (!is_object_file) return error.NotObjectFile; @@ -224,7 +225,7 @@ pub fn findImport(object: *const Object, sym: Symbol) types.Import { /// we initialize a new table symbol that corresponds to that import and return that symbol. /// /// When the object file is *NOT* MVP, we return `null`. -fn checkLegacyIndirectFunctionTable(object: *Object) !?Symbol { +fn checkLegacyIndirectFunctionTable(object: *Object, wasm_file: *const Wasm) !?Symbol { var table_count: usize = 0; for (object.symtable) |sym| { if (sym.tag == .table) table_count += 1; @@ -234,21 +235,27 @@ fn checkLegacyIndirectFunctionTable(object: *Object) !?Symbol { if (object.imported_tables_count == table_count) return null; if (table_count != 0) { - log.err("Expected a table entry symbol for each of the {d} table(s), but instead got {d} symbols.", .{ + var err = try wasm_file.addErrorWithNotes(1); + try err.addMsg(wasm_file, "Expected a table entry symbol for each of the {d} table(s), but instead got {d} symbols.", .{ object.imported_tables_count, table_count, }); + try err.addNote(wasm_file, "defined in '{s}'", .{object.path}); return error.MissingTableSymbols; } // MVP object files cannot have any table definitions, only imports (for the indirect function table). if (object.tables.len > 0) { - log.err("Unexpected table definition without representing table symbols.", .{}); + var err = try wasm_file.addErrorWithNotes(1); + try err.addMsg(wasm_file, "Unexpected table definition without representing table symbols.", .{}); + try err.addNote(wasm_file, "defined in '{s}'", .{object.path}); return error.UnexpectedTable; } if (object.imported_tables_count != 1) { - log.err("Found more than one table import, but no representing table symbols", .{}); + var err = try wasm_file.addErrorWithNotes(1); + try err.addMsg(wasm_file, "Found more than one table import, but no representing table symbols", .{}); + try err.addNote(wasm_file, "defined in '{s}'", .{object.path}); return error.MissingTableSymbols; } @@ -259,7 +266,9 @@ fn checkLegacyIndirectFunctionTable(object: *Object) !?Symbol { } else unreachable; if (!std.mem.eql(u8, object.string_table.get(table_import.name), "__indirect_function_table")) { - log.err("Non-indirect function table import '{s}' is missing a corresponding symbol", .{object.string_table.get(table_import.name)}); + var err = try wasm_file.addErrorWithNotes(1); + try err.addMsg(wasm_file, "Non-indirect function table import '{s}' is missing a corresponding symbol", .{object.string_table.get(table_import.name)}); + try err.addNote(wasm_file, "defined in '{s}'", .{object.path}); return error.MissingTableSymbols; } @@ -312,8 +321,8 @@ pub const ParseError = error{ UnknownFeature, }; -fn parse(object: *Object, gpa: Allocator, reader: anytype, is_object_file: *bool) Parser(@TypeOf(reader)).Error!void { - var parser = Parser(@TypeOf(reader)).init(object, reader); +fn parse(object: *Object, gpa: Allocator, wasm_file: *const Wasm, reader: anytype, is_object_file: *bool) Parser(@TypeOf(reader)).Error!void { + var parser = Parser(@TypeOf(reader)).init(object, wasm_file, reader); return parser.parseObject(gpa, is_object_file); } @@ -325,9 +334,11 @@ fn Parser(comptime ReaderType: type) type { reader: std.io.CountingReader(ReaderType), /// Object file we're building object: *Object, + /// Read-only reference to the WebAssembly linker + wasm_file: *const Wasm, - fn init(object: *Object, reader: ReaderType) ObjectParser { - return .{ .object = object, .reader = std.io.countingReader(reader) }; + fn init(object: *Object, wasm_file: *const Wasm, reader: ReaderType) ObjectParser { + return .{ .object = object, .wasm_file = wasm_file, .reader = std.io.countingReader(reader) }; } /// Verifies that the first 4 bytes contains \0Asm @@ -585,7 +596,9 @@ fn Parser(comptime ReaderType: type) type { try reader.readNoEof(name); const tag = types.known_features.get(name) orelse { - log.err("Object file contains unknown feature: {s}", .{name}); + var err = try parser.wasm_file.addErrorWithNotes(1); + try err.addMsg(parser.wasm_file, "Object file contains unknown feature: {s}", .{name}); + try err.addNote(parser.wasm_file, "defined in '{s}'", .{parser.object.path}); return error.UnknownFeature; }; feature.* = .{ @@ -754,7 +767,7 @@ fn Parser(comptime ReaderType: type) type { // we found all symbols, check for indirect function table // in case of an MVP object file - if (try parser.object.checkLegacyIndirectFunctionTable()) |symbol| { + if (try parser.object.checkLegacyIndirectFunctionTable(parser.wasm_file)) |symbol| { try symbols.append(symbol); log.debug("Found legacy indirect function table. Created symbol", .{}); } diff --git a/src/link/Wasm/ZigObject.zig b/src/link/Wasm/ZigObject.zig index d47bb6b721..9cb3e82c90 100644 --- a/src/link/Wasm/ZigObject.zig +++ b/src/link/Wasm/ZigObject.zig @@ -1213,6 +1213,6 @@ const StringTable = @import("../StringTable.zig"); const Symbol = @import("Symbol.zig"); const Type = @import("../../type.zig").Type; const TypedValue = @import("../../TypedValue.zig"); -const Value = @import("../../value.zig").Value; +const Value = @import("../../Value.zig"); const Wasm = @import("../Wasm.zig"); const ZigObject = @This(); From 196ba706a05046b2209529744d2df47215819691 Mon Sep 17 00:00:00 2001 From: Luuk de Gram Date: Wed, 28 Feb 2024 06:31:26 +0100 Subject: [PATCH 20/21] wasm: gc fixes and re-enable linker tests Certain symbols were left unmarked, meaning they would not be emit into the final binary incorrectly. We now mark the synthetic symbols to ensure they are emit as they are already created under the circumstance they're needed for. This also re-enables disabled tests that were left disabled in a previous merge conflict. Lastly, this adds the shared-memory test to the test harnass as it was previously forgotten and therefore regressed. --- src/link/Wasm.zig | 55 +++++++++++----- src/link/Wasm/Object.zig | 2 +- src/link/Wasm/ZigObject.zig | 10 +-- test/link.zig | 22 ++++--- test/link/wasm/archive/build.zig | 3 +- test/link/wasm/export-data/build.zig | 2 +- test/link/wasm/shared-memory/build.zig | 87 ++++++++++++-------------- test/link/wasm/type/build.zig | 33 +++++----- 8 files changed, 118 insertions(+), 96 deletions(-) diff --git a/src/link/Wasm.zig b/src/link/Wasm.zig index db0e275810..9a59484aa1 100644 --- a/src/link/Wasm.zig +++ b/src/link/Wasm.zig @@ -11,6 +11,7 @@ const leb = std.leb; const link = @import("../link.zig"); const lldMain = @import("../main.zig").lldMain; const log = std.log.scoped(.link); +const gc_log = std.log.scoped(.gc); const mem = std.mem; const trace = @import("../tracy.zig").trace; const types = @import("Wasm/types.zig"); @@ -525,6 +526,7 @@ pub fn createEmpty( const symbol = loc.getSymbol(wasm); symbol.setFlag(.WASM_SYM_VISIBILITY_HIDDEN); symbol.index = @intCast(wasm.imported_globals_count + wasm.wasm_globals.items.len); + symbol.mark(); try wasm.wasm_globals.append(gpa, .{ .global_type = .{ .valtype = .i32, .mutable = true }, .init = .{ .i32_const = undefined }, @@ -535,6 +537,7 @@ pub fn createEmpty( const symbol = loc.getSymbol(wasm); symbol.setFlag(.WASM_SYM_VISIBILITY_HIDDEN); symbol.index = @intCast(wasm.imported_globals_count + wasm.wasm_globals.items.len); + symbol.mark(); try wasm.wasm_globals.append(gpa, .{ .global_type = .{ .valtype = .i32, .mutable = false }, .init = .{ .i32_const = undefined }, @@ -545,6 +548,7 @@ pub fn createEmpty( const symbol = loc.getSymbol(wasm); symbol.setFlag(.WASM_SYM_VISIBILITY_HIDDEN); symbol.index = @intCast(wasm.imported_globals_count + wasm.wasm_globals.items.len); + symbol.mark(); try wasm.wasm_globals.append(gpa, .{ .global_type = .{ .valtype = .i32, .mutable = false }, .init = .{ .i32_const = undefined }, @@ -968,6 +972,8 @@ fn setupInitMemoryFunction(wasm: *Wasm) !void { if (!wasm.hasPassiveInitializationSegments()) { return; } + const sym_loc = try wasm.createSyntheticSymbol("__wasm_init_memory", .function); + sym_loc.getSymbol(wasm).mark(); const flag_address: u32 = if (shared_memory) address: { // when we have passive initialization segments and shared memory @@ -1130,7 +1136,8 @@ fn setupTLSRelocationsFunction(wasm: *Wasm) !void { return; } - // const loc = try wasm.createSyntheticSymbol("__wasm_apply_global_tls_relocs"); + const loc = try wasm.createSyntheticSymbol("__wasm_apply_global_tls_relocs", .function); + loc.getSymbol(wasm).mark(); var function_body = std.ArrayList(u8).init(gpa); defer function_body.deinit(); const writer = function_body.writer(); @@ -1833,8 +1840,7 @@ fn createSyntheticFunction( function_body: *std.ArrayList(u8), ) !void { const gpa = wasm.base.comp.gpa; - const loc = wasm.findGlobalSymbol(symbol_name) orelse - try wasm.createSyntheticSymbol(symbol_name, .function); + const loc = wasm.findGlobalSymbol(symbol_name).?; // forgot to create symbol? const symbol = loc.getSymbol(wasm); if (symbol.isDead()) { return; @@ -1884,6 +1890,9 @@ fn initializeTLSFunction(wasm: *Wasm) !void { if (!shared_memory) return; + // ensure function is marked as we must emit it + wasm.findGlobalSymbol("__wasm_init_tls").?.getSymbol(wasm).mark(); + var function_body = std.ArrayList(u8).init(gpa); defer function_body.deinit(); const writer = function_body.writer(); @@ -1932,6 +1941,7 @@ fn initializeTLSFunction(wasm: *Wasm) !void { if (wasm.findGlobalSymbol("__wasm_apply_global_tls_relocs")) |loc| { try writer.writeByte(std.wasm.opcode(.call)); try leb.writeULEB128(writer, loc.getSymbol(wasm).index); + loc.getSymbol(wasm).mark(); } try writer.writeByte(std.wasm.opcode(.end)); @@ -2039,14 +2049,19 @@ fn mergeSections(wasm: *Wasm) !void { // We found an alias to the same function, discard this symbol in favor of // the original symbol and point the discard function to it. This ensures // we only emit a single function, instead of duplicates. - symbol.unmark(); - try wasm.discarded.putNoClobber( - gpa, - sym_loc, - .{ .file = gop.key_ptr.*.file, .index = gop.value_ptr.*.sym_index }, - ); - try removed_duplicates.append(sym_loc); - continue; + // we favor keeping the global over a local. + const original_loc: SymbolLoc = .{ .file = gop.key_ptr.file, .index = gop.value_ptr.sym_index }; + const original_sym = original_loc.getSymbol(wasm); + if (original_sym.isLocal() and symbol.isGlobal()) { + original_sym.unmark(); + try wasm.discarded.put(gpa, original_loc, sym_loc); + try removed_duplicates.append(original_loc); + } else { + symbol.unmark(); + try wasm.discarded.putNoClobber(gpa, sym_loc, original_loc); + try removed_duplicates.append(sym_loc); + continue; + } } gop.value_ptr.* = .{ .func = obj_file.function(sym_loc.index), .sym_index = sym_loc.index }; symbol.index = @as(u32, @intCast(gop.index)) + wasm.imported_functions_count; @@ -2073,6 +2088,7 @@ fn mergeSections(wasm: *Wasm) !void { // For any removed duplicates, remove them from the resolved symbols list for (removed_duplicates.items) |sym_loc| { assert(wasm.resolved_symbols.swapRemove(sym_loc)); + gc_log.debug("Removed duplicate for function '{s}'", .{sym_loc.getName(wasm)}); } log.debug("Merged ({d}) functions", .{wasm.functions.count()}); @@ -2119,12 +2135,7 @@ fn mergeTypes(wasm: *Wasm) !void { log.debug("Completed merging and deduplicating types. Total count: ({d})", .{wasm.func_types.items.len}); } -fn setupExports(wasm: *Wasm) !void { - const comp = wasm.base.comp; - const gpa = comp.gpa; - if (comp.config.output_mode == .Obj) return; - log.debug("Building exports from symbols", .{}); - +fn checkExportNames(wasm: *Wasm) !void { const force_exp_names = wasm.export_symbol_names; if (force_exp_names.len > 0) { var failed_exports = false; @@ -2144,6 +2155,13 @@ fn setupExports(wasm: *Wasm) !void { return error.FlushFailure; } } +} + +fn setupExports(wasm: *Wasm) !void { + const comp = wasm.base.comp; + const gpa = comp.gpa; + if (comp.config.output_mode == .Obj) return; + log.debug("Building exports from symbols", .{}); for (wasm.resolved_symbols.keys()) |sym_loc| { const symbol = sym_loc.getSymbol(wasm); @@ -2272,6 +2290,7 @@ fn setupMemory(wasm: *Wasm) !void { memory_ptr = mem.alignForward(u64, memory_ptr, 4); const loc = try wasm.createSyntheticSymbol("__wasm_init_memory_flag", .data); const sym = loc.getSymbol(wasm); + sym.mark(); sym.virtual_address = @as(u32, @intCast(memory_ptr)); memory_ptr += 4; } @@ -2561,6 +2580,7 @@ pub fn flushModule(wasm: *Wasm, arena: Allocator, prog_node: *std.Progress.Node) if (comp.link_errors.items.len > 0) return error.FlushFailure; try wasm.resolveLazySymbols(); try wasm.checkUndefinedSymbols(); + try wasm.checkExportNames(); try wasm.setupInitFunctions(); if (comp.link_errors.items.len > 0) return error.FlushFailure; @@ -4044,6 +4064,7 @@ fn mark(wasm: *Wasm, loc: SymbolLoc) !void { return; } symbol.mark(); + gc_log.debug("Marked symbol '{s}'", .{loc.getName(wasm)}); if (symbol.isUndefined()) { // undefined symbols do not have an associated `Atom` and therefore also // do not contain relocations. diff --git a/src/link/Wasm/Object.zig b/src/link/Wasm/Object.zig index ca9f746d51..b2f41a33b0 100644 --- a/src/link/Wasm/Object.zig +++ b/src/link/Wasm/Object.zig @@ -15,7 +15,7 @@ const Allocator = std.mem.Allocator; const leb = std.leb; const meta = std.meta; -const log = std.log.scoped(.link); +const log = std.log.scoped(.object); /// Index into the list of relocatable object files within the linker driver. index: File.Index = .null, diff --git a/src/link/Wasm/ZigObject.zig b/src/link/Wasm/ZigObject.zig index 9cb3e82c90..7d017e20c8 100644 --- a/src/link/Wasm/ZigObject.zig +++ b/src/link/Wasm/ZigObject.zig @@ -315,7 +315,7 @@ fn finishUpdateDecl( const atom_index = decl_info.atom; const atom = wasm_file.getAtomPtr(atom_index); const sym = zig_object.symbol(atom.sym_index); - const full_name = mod.intern_pool.stringToSlice(try decl.getFullyQualifiedName(mod)); + const full_name = mod.intern_pool.stringToSlice(try decl.fullyQualifiedName(mod)); sym.name = try zig_object.string_table.insert(gpa, full_name); try atom.code.appendSlice(gpa, code); atom.size = @intCast(code.len); @@ -401,7 +401,7 @@ pub fn getOrCreateAtomForDecl(zig_object: *ZigObject, wasm_file: *Wasm, decl_ind gop.value_ptr.* = .{ .atom = try wasm_file.createAtom(sym_index, zig_object.index) }; const mod = wasm_file.base.comp.module.?; const decl = mod.declPtr(decl_index); - const full_name = mod.intern_pool.stringToSlice(try decl.getFullyQualifiedName(mod)); + const full_name = mod.intern_pool.stringToSlice(try decl.fullyQualifiedName(mod)); const sym = zig_object.symbol(sym_index); sym.name = try zig_object.string_table.insert(gpa, full_name); } @@ -455,7 +455,7 @@ pub fn lowerUnnamedConst(zig_object: *ZigObject, wasm_file: *Wasm, tv: TypedValu const parent_atom_index = try zig_object.getOrCreateAtomForDecl(wasm_file, decl_index); const parent_atom = wasm_file.getAtom(parent_atom_index); const local_index = parent_atom.locals.items.len; - const fqn = mod.intern_pool.stringToSlice(try decl.getFullyQualifiedName(mod)); + const fqn = mod.intern_pool.stringToSlice(try decl.fullyQualifiedName(mod)); const name = try std.fmt.allocPrintZ(gpa, "__unnamed_{s}_{d}", .{ fqn, local_index, }); @@ -838,6 +838,7 @@ pub fn updateExports( const atom = wasm_file.getAtom(atom_index); const atom_sym = atom.symbolLoc().getSymbol(wasm_file).*; const gpa = mod.gpa; + log.debug("Updating exports for decl '{s}'", .{mod.intern_pool.stringToSlice(decl.name)}); for (exports) |exp| { if (mod.intern_pool.stringToSliceUnwrap(exp.opts.section)) |section| { @@ -888,6 +889,7 @@ pub fn updateExports( if (exp.opts.visibility == .hidden) { sym.setFlag(.WASM_SYM_VISIBILITY_HIDDEN); } + log.debug(" with name '{s}' - {}", .{ export_string, sym }); try zig_object.global_syms.put(gpa, export_name, sym_index); try wasm_file.symbol_atom.put(gpa, .{ .file = zig_object.index, .index = sym_index }, atom_index); } @@ -1061,7 +1063,7 @@ pub fn createDebugSectionForIndex(zig_object: *ZigObject, wasm_file: *Wasm, inde pub fn updateDeclLineNumber(zig_object: *ZigObject, mod: *Module, decl_index: InternPool.DeclIndex) !void { if (zig_object.dwarf) |*dw| { const decl = mod.declPtr(decl_index); - const decl_name = mod.intern_pool.stringToSlice(try decl.getFullyQualifiedName(mod)); + const decl_name = mod.intern_pool.stringToSlice(try decl.fullyQualifiedName(mod)); log.debug("updateDeclLineNumber {s}{*}", .{ decl_name, decl }); try dw.updateDeclLineNumber(mod, decl_index); diff --git a/test/link.zig b/test/link.zig index 731736c553..58b2dc9f96 100644 --- a/test/link.zig +++ b/test/link.zig @@ -35,11 +35,10 @@ pub const cases = [_]Case{ }, // WASM Cases - // https://github.com/ziglang/zig/issues/16938 - //.{ - // .build_root = "test/link/wasm/archive", - // .import = @import("link/wasm/archive/build.zig"), - //}, + .{ + .build_root = "test/link/wasm/archive", + .import = @import("link/wasm/archive/build.zig"), + }, .{ .build_root = "test/link/wasm/basic-features", .import = @import("link/wasm/basic-features/build.zig"), @@ -52,11 +51,10 @@ pub const cases = [_]Case{ .build_root = "test/link/wasm/export", .import = @import("link/wasm/export/build.zig"), }, - // https://github.com/ziglang/zig/issues/16937 - //.{ - // .build_root = "test/link/wasm/export-data", - // .import = @import("link/wasm/export-data/build.zig"), - //}, + .{ + .build_root = "test/link/wasm/export-data", + .import = @import("link/wasm/export-data/build.zig"), + }, .{ .build_root = "test/link/wasm/extern", .import = @import("link/wasm/extern/build.zig"), @@ -81,6 +79,10 @@ pub const cases = [_]Case{ .build_root = "test/link/wasm/segments", .import = @import("link/wasm/segments/build.zig"), }, + .{ + .build_root = "test/link/wasm/shared-memory", + .import = @import("link/wasm/shared-memory/build.zig"), + }, .{ .build_root = "test/link/wasm/stack_pointer", .import = @import("link/wasm/stack_pointer/build.zig"), diff --git a/test/link/wasm/archive/build.zig b/test/link/wasm/archive/build.zig index 1d5e031848..cd91feae65 100644 --- a/test/link/wasm/archive/build.zig +++ b/test/link/wasm/archive/build.zig @@ -19,12 +19,13 @@ fn add(b: *std.Build, test_step: *std.Build.Step, optimize: std.builtin.Optimize .name = "main", .root_source_file = .{ .path = "main.zig" }, .optimize = optimize, - .target = .{ .cpu_arch = .wasm32, .os_tag = .freestanding }, + .target = b.resolveTargetQuery(.{ .cpu_arch = .wasm32, .os_tag = .freestanding }), .strip = false, }); lib.entry = .disabled; lib.use_llvm = false; lib.use_lld = false; + lib.root_module.export_symbol_names = &.{"foo"}; const check = lib.checkObject(); check.checkInHeaders(); diff --git a/test/link/wasm/export-data/build.zig b/test/link/wasm/export-data/build.zig index bba00d5c12..05b2ca161b 100644 --- a/test/link/wasm/export-data/build.zig +++ b/test/link/wasm/export-data/build.zig @@ -13,7 +13,7 @@ pub fn build(b: *std.Build) void { .name = "lib", .root_source_file = .{ .path = "lib.zig" }, .optimize = .ReleaseSafe, // to make the output deterministic in address positions - .target = .{ .cpu_arch = .wasm32, .os_tag = .freestanding }, + .target = b.resolveTargetQuery(.{ .cpu_arch = .wasm32, .os_tag = .freestanding }), }); lib.entry = .disabled; lib.use_lld = false; diff --git a/test/link/wasm/shared-memory/build.zig b/test/link/wasm/shared-memory/build.zig index f8751d99c0..0cad2560cb 100644 --- a/test/link/wasm/shared-memory/build.zig +++ b/test/link/wasm/shared-memory/build.zig @@ -11,37 +11,39 @@ pub fn build(b: *std.Build) void { } fn add(b: *std.Build, test_step: *std.Build.Step, optimize_mode: std.builtin.OptimizeMode) void { - const lib = b.addExecutable(.{ + const exe = b.addExecutable(.{ .name = "lib", .root_source_file = .{ .path = "lib.zig" }, - .target = .{ + .target = b.resolveTargetQuery(.{ .cpu_arch = .wasm32, .cpu_model = .{ .explicit = &std.Target.wasm.cpu.mvp }, .cpu_features_add = std.Target.wasm.featureSet(&.{ .atomics, .bulk_memory }), .os_tag = .freestanding, - }, + }), .optimize = optimize_mode, .strip = false, .single_threaded = false, }); - lib.entry = .disabled; - lib.use_lld = false; - lib.import_memory = true; - lib.export_memory = true; - lib.shared_memory = true; - lib.max_memory = 67108864; - lib.root_module.export_symbol_names = &.{"foo"}; + exe.entry = .disabled; + exe.use_lld = false; + exe.import_memory = true; + exe.export_memory = true; + exe.shared_memory = true; + exe.max_memory = 67108864; + exe.root_module.export_symbol_names = &.{"foo"}; - const check_lib = lib.checkObject(); + const check_exe = exe.checkObject(); - check_lib.checkStart("Section import"); - check_lib.checkNext("entries 1"); - check_lib.checkNext("module env"); - check_lib.checkNext("name memory"); // ensure we are importing memory + check_exe.checkInHeaders(); + check_exe.checkExact("Section import"); + check_exe.checkExact("entries 1"); + check_exe.checkExact("module env"); + check_exe.checkExact("name memory"); // ensure we are importing memory - check_lib.checkStart("Section export"); - check_lib.checkNext("entries 2"); - check_lib.checkNext("name memory"); // ensure we also export memory again + check_exe.checkInHeaders(); + check_exe.checkExact("Section export"); + check_exe.checkExact("entries 2"); + check_exe.checkExact("name memory"); // ensure we also export memory again // This section *must* be emit as the start function is set to the index // of __wasm_init_memory @@ -49,49 +51,42 @@ fn add(b: *std.Build, test_step: *std.Build.Step, optimize_mode: std.builtin.Opt // This means we won't have __wasm_init_memory in such case, and therefore // should also not have a section "start" if (optimize_mode == .Debug) { - check_lib.checkStart("Section start"); + check_exe.checkInHeaders(); + check_exe.checkExact("Section start"); } // This section is only and *must* be emit when shared-memory is enabled // release modes will have the TLS segment optimized out in our test-case. if (optimize_mode == .Debug) { - check_lib.checkStart("Section data_count"); - check_lib.checkNext("count 3"); + check_exe.checkInHeaders(); + check_exe.checkExact("Section data_count"); + check_exe.checkExact("count 1"); } - check_lib.checkStart("Section custom"); - check_lib.checkNext("name name"); - check_lib.checkNext("type function"); + check_exe.checkInHeaders(); + check_exe.checkExact("Section custom"); + check_exe.checkExact("name name"); + check_exe.checkExact("type function"); if (optimize_mode == .Debug) { - check_lib.checkNext("name __wasm_init_memory"); + check_exe.checkExact("name __wasm_init_memory"); } - check_lib.checkNext("name __wasm_init_tls"); - check_lib.checkNext("type global"); + check_exe.checkExact("name __wasm_init_tls"); + check_exe.checkExact("type global"); // In debug mode the symbol __tls_base is resolved to an undefined symbol // from the object file, hence its placement differs than in release modes // where the entire tls segment is optimized away, and tls_base will have // its original position. + check_exe.checkExact("name __tls_base"); + check_exe.checkExact("name __tls_size"); + check_exe.checkExact("name __tls_align"); + + check_exe.checkExact("type data_segment"); if (optimize_mode == .Debug) { - check_lib.checkNext("name __tls_size"); - check_lib.checkNext("name __tls_align"); - check_lib.checkNext("name __tls_base"); - } else { - check_lib.checkNext("name __tls_base"); - check_lib.checkNext("name __tls_size"); - check_lib.checkNext("name __tls_align"); + check_exe.checkExact("names 1"); + check_exe.checkExact("index 0"); + check_exe.checkExact("name .tdata"); } - check_lib.checkNext("type data_segment"); - if (optimize_mode == .Debug) { - check_lib.checkNext("names 3"); - check_lib.checkNext("index 0"); - check_lib.checkNext("name .rodata"); - check_lib.checkNext("index 1"); - check_lib.checkNext("name .bss"); - check_lib.checkNext("index 2"); - check_lib.checkNext("name .tdata"); - } - - test_step.dependOn(&check_lib.step); + test_step.dependOn(&check_exe.step); } diff --git a/test/link/wasm/type/build.zig b/test/link/wasm/type/build.zig index a318ddb1f9..b62886c74e 100644 --- a/test/link/wasm/type/build.zig +++ b/test/link/wasm/type/build.zig @@ -13,31 +13,32 @@ pub fn build(b: *std.Build) void { } fn add(b: *std.Build, test_step: *std.Build.Step, optimize: std.builtin.OptimizeMode) void { - const lib = b.addExecutable(.{ + const exe = b.addExecutable(.{ .name = "lib", .root_source_file = .{ .path = "lib.zig" }, .target = b.resolveTargetQuery(.{ .cpu_arch = .wasm32, .os_tag = .freestanding }), .optimize = optimize, .strip = false, }); - lib.entry = .disabled; - lib.use_llvm = false; - lib.use_lld = false; - b.installArtifact(lib); + exe.entry = .disabled; + exe.use_llvm = false; + exe.use_lld = false; + exe.root_module.export_symbol_names = &.{"foo"}; + b.installArtifact(exe); - const check_lib = lib.checkObject(); - check_lib.checkInHeaders(); - check_lib.checkExact("Section type"); + const check_exe = exe.checkObject(); + check_exe.checkInHeaders(); + check_exe.checkExact("Section type"); // only 2 entries, although we have more functions. // This is to test functions with the same function signature // have their types deduplicated. - check_lib.checkExact("entries 2"); - check_lib.checkExact("params 1"); - check_lib.checkExact("type i32"); - check_lib.checkExact("returns 1"); - check_lib.checkExact("type i64"); - check_lib.checkExact("params 0"); - check_lib.checkExact("returns 0"); + check_exe.checkExact("entries 2"); + check_exe.checkExact("params 1"); + check_exe.checkExact("type i32"); + check_exe.checkExact("returns 1"); + check_exe.checkExact("type i64"); + check_exe.checkExact("params 0"); + check_exe.checkExact("returns 0"); - test_step.dependOn(&check_lib.step); + test_step.dependOn(&check_exe.step); } From 202ed7330fdc55cce22bfa9d9b5da03776e871b4 Mon Sep 17 00:00:00 2001 From: Luuk de Gram Date: Wed, 28 Feb 2024 19:02:16 +0100 Subject: [PATCH 21/21] fix memory leaks --- src/link/Wasm.zig | 21 ++++++++----------- src/link/Wasm/ZigObject.zig | 40 +++++++++++++++++++++++++++++++------ 2 files changed, 42 insertions(+), 19 deletions(-) diff --git a/src/link/Wasm.zig b/src/link/Wasm.zig index 9a59484aa1..b9997cf883 100644 --- a/src/link/Wasm.zig +++ b/src/link/Wasm.zig @@ -33,7 +33,6 @@ const Object = @import("Wasm/Object.zig"); const Symbol = @import("Wasm/Symbol.zig"); const Type = @import("../type.zig").Type; const TypedValue = @import("../TypedValue.zig"); -const Value = @import("../value.zig").Value; const ZigObject = @import("Wasm/ZigObject.zig"); pub const Atom = @import("Wasm/Atom.zig"); @@ -72,7 +71,7 @@ files: std.MultiArrayList(File.Entry) = .{}, /// to support existing code. /// TODO: Allow setting this through a flag? host_name: []const u8 = "env", -/// List of all symbols generated by Zig code. +/// List of symbols generated by the linker. synthetic_symbols: std.ArrayListUnmanaged(Symbol) = .{}, /// Maps atoms to their segment index atoms: std.AutoHashMapUnmanaged(u32, Atom.Index) = .{}, @@ -179,10 +178,6 @@ undefs: std.AutoArrayHashMapUnmanaged(u32, SymbolLoc) = .{}, /// Undefined (and synthetic) symbols do not have an Atom and therefore cannot be mapped. symbol_atom: std.AutoHashMapUnmanaged(SymbolLoc, Atom.Index) = .{}, -/// List of atom indexes of functions that are generated by the backend, -/// rather than by the linker. -synthetic_functions: std.ArrayListUnmanaged(Atom.Index) = .{}, - pub const Alignment = types.Alignment; pub const Segment = struct { @@ -259,7 +254,7 @@ pub const InitFuncLoc = struct { /// our own ctors. file: File.Index, /// Symbol index within the corresponding object file. - index: u32, + index: Symbol.Index, /// The priority in which the constructor must be called. priority: u32, @@ -270,7 +265,7 @@ pub const InitFuncLoc = struct { /// Turns the given `InitFuncLoc` into a `SymbolLoc` fn getSymbolLoc(loc: InitFuncLoc) SymbolLoc { - return .{ .file = loc.file, .index = @enumFromInt(loc.index) }; + return .{ .file = loc.file, .index = loc.index }; } /// Returns true when `lhs` has a higher priority (e.i. value closer to 0) than `rhs`. @@ -1411,9 +1406,9 @@ pub fn deinit(wasm: *Wasm) void { archive.deinit(gpa); } - for (wasm.synthetic_functions.items) |atom_index| { - const atom = wasm.getAtomPtr(atom_index); - atom.deinit(gpa); + if (wasm.findGlobalSymbol("__wasm_init_tls")) |loc| { + const atom = wasm.symbol_atom.get(loc).?; + wasm.getAtomPtr(atom).deinit(gpa); } wasm.synthetic_symbols.deinit(gpa); @@ -1441,7 +1436,7 @@ pub fn deinit(wasm: *Wasm) void { wasm.exports.deinit(gpa); wasm.string_table.deinit(gpa); - wasm.synthetic_functions.deinit(gpa); + wasm.files.deinit(gpa); } pub fn updateFunc(wasm: *Wasm, mod: *Module, func_index: InternPool.Index, air: Air, liveness: Liveness) !void { @@ -1763,7 +1758,7 @@ fn setupInitFunctions(wasm: *Wasm) !void { } log.debug("appended init func '{s}'\n", .{object.string_table.get(symbol.name)}); wasm.init_funcs.appendAssumeCapacity(.{ - .index = init_func.symbol_index, + .index = @enumFromInt(init_func.symbol_index), .file = file_index, .priority = init_func.priority, }); diff --git a/src/link/Wasm/ZigObject.zig b/src/link/Wasm/ZigObject.zig index 7d017e20c8..5b1c587a74 100644 --- a/src/link/Wasm/ZigObject.zig +++ b/src/link/Wasm/ZigObject.zig @@ -37,12 +37,16 @@ segment_free_list: std.ArrayListUnmanaged(u32) = .{}, string_table: StringTable = .{}, /// Map for storing anonymous declarations. Each anonymous decl maps to its Atom's index. anon_decls: std.AutoArrayHashMapUnmanaged(InternPool.Index, Atom.Index) = .{}, +/// List of atom indexes of functions that are generated by the backend. +synthetic_functions: std.ArrayListUnmanaged(Atom.Index) = .{}, /// Represents the symbol index of the error name table /// When this is `null`, no code references an error using runtime `@errorName`. /// During initializion, a symbol with corresponding atom will be created that is /// used to perform relocations to the pointer of this table. /// The actual table is populated during `flush`. error_table_symbol: Symbol.Index = .null, +/// Atom index of the table of symbol names. This is stored so we can clean up the atom. +error_names_atom: Atom.Index = .null, /// Amount of functions in the `import` sections. imported_functions_count: u32 = 0, /// Amount of globals in the `import` section. @@ -150,9 +154,6 @@ pub fn deinit(zig_object: *ZigObject, wasm_file: *Wasm) void { gpa.free(segment_info.name); } - // For decls and anon decls we free the memory of its atoms. - // The memory of atoms parsed from object files is managed by - // the object file itself, and therefore we can skip those. { var it = zig_object.decls_map.valueIterator(); while (it.next()) |decl_info| { @@ -175,6 +176,31 @@ pub fn deinit(zig_object: *ZigObject, wasm_file: *Wasm) void { atom.deinit(gpa); } } + if (zig_object.findGlobalSymbol("__zig_errors_len")) |sym_index| { + const atom_index = wasm_file.symbol_atom.get(.{ .file = zig_object.index, .index = sym_index }).?; + wasm_file.getAtomPtr(atom_index).deinit(gpa); + } + if (wasm_file.symbol_atom.get(.{ .file = zig_object.index, .index = zig_object.error_table_symbol })) |atom_index| { + const atom = wasm_file.getAtomPtr(atom_index); + atom.deinit(gpa); + } + for (zig_object.synthetic_functions.items) |atom_index| { + const atom = wasm_file.getAtomPtr(atom_index); + atom.deinit(gpa); + } + zig_object.synthetic_functions.deinit(gpa); + for (zig_object.func_types.items) |*ty| { + ty.deinit(gpa); + } + if (zig_object.error_names_atom != .null) { + const atom = wasm_file.getAtomPtr(zig_object.error_names_atom); + atom.deinit(gpa); + } + zig_object.global_syms.deinit(gpa); + zig_object.func_types.deinit(gpa); + zig_object.atom_types.deinit(gpa); + zig_object.functions.deinit(gpa); + zig_object.imports.deinit(gpa); zig_object.decls_map.deinit(gpa); zig_object.anon_decls.deinit(gpa); zig_object.symbols.deinit(gpa); @@ -602,7 +628,7 @@ fn populateErrorNameTable(zig_object: *ZigObject, wasm_file: *Wasm) !void { const atom = wasm_file.getAtomPtr(atom_index); const error_name = mod.intern_pool.stringToSlice(error_name_nts); - const len = @as(u32, @intCast(error_name.len + 1)); // names are 0-termianted + const len: u32 = @intCast(error_name.len + 1); // names are 0-terminated const slice_ty = Type.slice_const_u8_sentinel_0; const offset = @as(u32, @intCast(atom.code.items.len)); @@ -614,9 +640,9 @@ fn populateErrorNameTable(zig_object: *ZigObject, wasm_file: *Wasm) !void { .index = @intFromEnum(names_atom.sym_index), .relocation_type = .R_WASM_MEMORY_ADDR_I32, .offset = offset, - .addend = @as(i32, @intCast(addend)), + .addend = @intCast(addend), }); - atom.size += @as(u32, @intCast(slice_ty.abiSize(mod))); + atom.size += @intCast(slice_ty.abiSize(mod)); addend += len; // as we updated the error name table, we now store the actual name within the names atom @@ -627,6 +653,7 @@ fn populateErrorNameTable(zig_object: *ZigObject, wasm_file: *Wasm) !void { log.debug("Populated error name: '{s}'", .{error_name}); } names_atom.size = addend; + zig_object.error_names_atom = names_atom_index; } /// Either creates a new import, or updates one if existing. @@ -1174,6 +1201,7 @@ pub fn createFunction( atom.code = function_body.moveToUnmanaged(); atom.relocs = relocations.moveToUnmanaged(); + try zig_object.synthetic_functions.append(gpa, atom_index); return sym_index; }