From e7be0bef43e5fc7d19bbe184b9dc5209f52f745c Mon Sep 17 00:00:00 2001 From: Luuk de Gram Date: Sun, 13 Feb 2022 16:34:51 +0100 Subject: [PATCH] wasm-linker: Add Object file parsing This upstreams the object file parsing from zwld, bringing us closer to being able to link stage2 code with object files/C-code as well as replacing lld with the self-hosted linker once feature complete. --- src/link/Wasm.zig | 80 +++- src/link/Wasm/Atom.zig | 2 +- src/link/Wasm/Object.zig | 847 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 920 insertions(+), 9 deletions(-) create mode 100644 src/link/Wasm/Object.zig diff --git a/src/link/Wasm.zig b/src/link/Wasm.zig index 8f6dfacf46..dbb9ab801e 100644 --- a/src/link/Wasm.zig +++ b/src/link/Wasm.zig @@ -25,6 +25,7 @@ const LlvmObject = @import("../codegen/llvm.zig").Object; const Air = @import("../Air.zig"); const Liveness = @import("../Liveness.zig"); const Symbol = @import("Wasm/Symbol.zig"); +const Object = @import("Wasm/Object.zig"); const types = @import("Wasm/types.zig"); pub const base_tag = link.File.Tag.wasm; @@ -73,7 +74,7 @@ func_types: std.ArrayListUnmanaged(wasm.Type) = .{}, /// Output function section functions: std.ArrayListUnmanaged(wasm.Func) = .{}, /// Output global section -globals: std.ArrayListUnmanaged(wasm.Global) = .{}, +wasm_globals: std.ArrayListUnmanaged(wasm.Global) = .{}, /// Memory section memories: wasm.Memory = .{ .limits = .{ .min = 0, .max = null } }, @@ -84,6 +85,17 @@ memories: wasm.Memory = .{ .limits = .{ .min = 0, .max = null } }, /// Note: Key is symbol index, value represents the index into the table function_table: std.AutoHashMapUnmanaged(u32, u32) = .{}, +/// All object files and their data which are linked into the final binary +objects: std.ArrayListUnmanaged(Object) = .{}, +/// Maps discarded symbols and their positions to the location of the symbol +/// it was resolved to +discarded: std.AutoHashMapUnmanaged(SymbolLoc, SymbolLoc) = .{}, +/// Mapping between symbol names and their respective location. +/// This map contains all symbols that will be written into the final binary +/// and were either defined, or resolved. +/// TODO: Use string interning and make the key an index, rather than a unique string. +symbol_resolver: std.StringArrayHashMapUnmanaged(SymbolLoc) = .{}, + pub const Segment = struct { alignment: u32, size: u32, @@ -98,6 +110,14 @@ pub const FnData = struct { }; }; +pub const SymbolLoc = struct { + /// The index of the symbol within the specified file + index: u32, + /// The index of the object file where the symbol resides. + /// When this is `null` the symbol comes from a non-object file. + file: ?u16, +}; + pub fn openPath(allocator: Allocator, sub_path: []const u8, options: link.Options) !*Wasm { assert(options.object_format == .wasm); @@ -115,7 +135,7 @@ pub fn openPath(allocator: Allocator, sub_path: []const u8, options: link.Option try file.writeAll(&(wasm.magic ++ wasm.version)); // As sym_index '0' is reserved, we use it for our stack pointer symbol - const global = try wasm_bin.globals.addOne(allocator); + const global = try wasm_bin.wasm_globals.addOne(allocator); global.* = .{ .global_type = .{ .valtype = .i32, @@ -152,6 +172,31 @@ pub fn createEmpty(gpa: Allocator, options: link.Options) !*Wasm { return self; } +fn parseInputFiles(self: *Wasm, files: []const []const u8) !void { + for (files) |path| { + if (try self.parseObjectFile(path)) continue; + log.warn("Unexpected file format at path: '{s}'", .{path}); + } +} + +/// Parses the object file from given path. Returns true when the given file was an object +/// file and parsed successfully. Returns false when file is not an object file. +/// May return an error instead when parsing failed. +fn parseObjectFile(self: *Wasm, path: []const u8) !bool { + const file = try fs.cwd().openFile(path, .{}); + errdefer file.close(); + + var object = Object.init(self.base.allocator, file, path) catch |err| { + if (err == error.InvalidMagicByte) { + log.warn("Self hosted linker does not support non-object file parsing", .{}); + return false; + } else return err; + }; + errdefer object.deinit(self.base.allocator); + try self.objects.append(self.base.allocator, object); + return true; +} + pub fn deinit(self: *Wasm) void { if (build_options.have_llvm) { if (self.llvm_object) |llvm_object| llvm_object.destroy(self.base.allocator); @@ -182,7 +227,7 @@ pub fn deinit(self: *Wasm) void { self.imports.deinit(self.base.allocator); self.func_types.deinit(self.base.allocator); self.functions.deinit(self.base.allocator); - self.globals.deinit(self.base.allocator); + self.wasm_globals.deinit(self.base.allocator); self.function_table.deinit(self.base.allocator); } @@ -587,7 +632,7 @@ fn setupMemory(self: *Wasm) !void { memory_ptr = std.mem.alignForwardGeneric(u64, memory_ptr, stack_alignment); memory_ptr += stack_size; // We always put the stack pointer global at index 0 - self.globals.items[0].init.i32_const = @bitCast(i32, @intCast(u32, memory_ptr)); + self.wasm_globals.items[0].init.i32_const = @bitCast(i32, @intCast(u32, memory_ptr)); } var offset: u32 = @intCast(u32, memory_ptr); @@ -605,7 +650,7 @@ fn setupMemory(self: *Wasm) !void { if (!place_stack_first) { memory_ptr = std.mem.alignForwardGeneric(u64, memory_ptr, stack_alignment); memory_ptr += stack_size; - self.globals.items[0].init.i32_const = @bitCast(i32, @intCast(u32, memory_ptr)); + self.wasm_globals.items[0].init.i32_const = @bitCast(i32, @intCast(u32, memory_ptr)); } // Setup the max amount of pages @@ -690,6 +735,25 @@ pub fn flushModule(self: *Wasm, comp: *Compilation) !void { const tracy = trace(@src()); defer tracy.end(); + // Used for all temporary memory allocated during flushin + var arena_instance = std.heap.ArenaAllocator.init(self.base.allocator); + defer arena_instance.deinit(); + const arena = arena_instance.allocator(); + + // Positional arguments to the linker such as object files and static archives. + var positionals = std.ArrayList([]const u8).init(arena); + try positionals.ensureUnusedCapacity(self.base.options.objects.len); + + for (self.base.options.objects) |object| { + positionals.appendAssumeCapacity(object.path); + } + + for (comp.c_object_table.keys()) |c_object| { + try positionals.append(c_object.status.success.object_path); + } + // TODO: Also link with other objects such as compiler-rt + try self.parseInputFiles(positionals.items); + // When we finish/error we reset the state of the linker // So we can rebuild the binary file on each incremental update defer self.resetState(); @@ -852,7 +916,7 @@ pub fn flushModule(self: *Wasm, comp: *Compilation) !void { const header_offset = try reserveVecSectionHeader(file); const writer = file.writer(); - for (self.globals.items) |global| { + for (self.wasm_globals.items) |global| { try writer.writeByte(wasm.valtype(global.global_type.valtype)); try writer.writeByte(@boolToInt(global.global_type.mutable)); try emitInit(writer, global.init); @@ -863,7 +927,7 @@ pub fn flushModule(self: *Wasm, comp: *Compilation) !void { header_offset, .global, @intCast(u32, (try file.getPos()) - header_offset - header_size), - @intCast(u32, self.globals.items.len), + @intCast(u32, self.wasm_globals.items.len), ); } @@ -1039,7 +1103,7 @@ pub fn flushModule(self: *Wasm, comp: *Compilation) !void { var funcs = try std.ArrayList(Name).initCapacity(self.base.allocator, self.functions.items.len + self.imported_functions_count); defer funcs.deinit(); - var globals = try std.ArrayList(Name).initCapacity(self.base.allocator, self.globals.items.len); + var globals = try std.ArrayList(Name).initCapacity(self.base.allocator, self.wasm_globals.items.len); defer globals.deinit(); var segments = try std.ArrayList(Name).initCapacity(self.base.allocator, self.data_segments.count()); defer segments.deinit(); diff --git a/src/link/Wasm/Atom.zig b/src/link/Wasm/Atom.zig index 09fb6735f7..fc4effd714 100644 --- a/src/link/Wasm/Atom.zig +++ b/src/link/Wasm/Atom.zig @@ -50,7 +50,7 @@ pub fn deinit(self: *Atom, gpa: Allocator) void { self.relocs.deinit(gpa); self.code.deinit(gpa); - while (self.locals.popOrNull()) |*local| { + while (self.locals.items) |*local| { local.deinit(gpa); } self.locals.deinit(gpa); diff --git a/src/link/Wasm/Object.zig b/src/link/Wasm/Object.zig new file mode 100644 index 0000000000..df98ea4d37 --- /dev/null +++ b/src/link/Wasm/Object.zig @@ -0,0 +1,847 @@ +//! Object represents a wasm object file. When initializing a new +//! `Object`, it will parse the contents of a given file handler, and verify +//! the data on correctness. The result can then be used by the linker. +const Object = @This(); + +const Atom = @import("Atom.zig"); +const types = @import("types.zig"); +const std = @import("std"); +const Wasm = @import("Wasm.zig"); +const Symbol = @import("Symbol.zig"); + +const Allocator = std.mem.Allocator; +const leb = std.leb; +const meta = std.meta; + +const log = std.log.scoped(.zwld); + +/// Wasm spec version used for this `Object` +version: u32 = 0, +/// The entire object file is read and parsed in a single pass. +/// For this reason it's a lot simpler to use an arena and store the entire +/// state after parsing. This also allows to free all memory at once. +arena: std.heap.ArenaAllocator.State = .{}, +/// The file descriptor that represents the wasm object file. +file: ?std.fs.File = null, +/// Name (read path) of the object file. +name: []const u8, +/// Parsed type section +types: []const std.wasm.Type = &.{}, +/// A list of all imports for this module +imports: []std.wasm.Import = &.{}, +/// Parsed function section +functions: []std.wasm.Func = &.{}, +/// Parsed table section +tables: []std.wasm.Table = &.{}, +/// Parsed memory section +memories: []const std.wasm.Memory = &.{}, +/// Parsed global section +globals: []std.wasm.Global = &.{}, +/// Parsed export section +exports: []const std.wasm.Export = &.{}, +/// Parsed element section +elements: []const std.wasm.Element = &.{}, +/// Represents the function ID that must be called on startup. +/// This is `null` by default as runtimes may determine the startup +/// function themselves. This is essentially legacy. +start: ?u32 = null, +/// A slice of features that tell the linker what features are mandatory, +/// used (or therefore missing) and must generate an error when another +/// object uses features that are not supported by the other. +features: []const types.Feature = &.{}, +/// A table that maps the relocations we must perform where the key represents +/// the section that the list of relocations applies to. +relocations: std.AutoArrayHashMapUnmanaged(u32, []types.Relocation) = .{}, +/// Table of symbols belonging to this Object file +symtable: []Symbol = &.{}, +/// Extra metadata about the linking section, such as alignment of segments and their name +segment_info: []const types.Segment = &.{}, +/// A sequence of function initializers that must be called on startup +init_funcs: []const types.InitFunc = &.{}, +/// Comdat information +comdat_info: []const types.Comdat = &.{}, +/// Represents non-synthetic sections that can essentially be mem-cpy'd into place +/// after performing relocations. +relocatable_data: []RelocatableData = &.{}, + +/// Represents a single item within a section (depending on its `type`) +const RelocatableData = struct { + /// The type of the relocatable data + type: enum { data, code, custom }, + /// Pointer to the data of the segment, where it's length is written to `size` + data: [*]u8, + /// The size in bytes of the data representing the segment within the section + size: u32, + /// The index within the section itself + index: u32, + /// The offset within the section where the data starts + offset: u32, + /// Represents the index of the section it belongs to + section_index: u32, + + /// Returns the alignment of the segment, by retrieving it from the segment + /// meta data of the given object file. + /// NOTE: Alignment is encoded as a power of 2, so we shift the symbol's + /// alignment to retrieve the natural alignment. + pub fn getAlignment(self: RelocatableData, object: *const Object) u32 { + if (self.type != .data) return 1; + const data_alignment = object.segment_info[self.index].alignment; + if (data_alignment == 0) return 1; + // Decode from power of 2 to natural alignment + return @as(u32, 1) << @intCast(u5, data_alignment); + } + + /// Returns the symbol kind that corresponds to the relocatable section + pub fn getSymbolKind(self: RelocatableData) Symbol.Tag { + return switch (self.type) { + .data => .data, + .code => .function, + .custom => .section, + }; + } +}; + +pub const InitError = error{NotObjectFile} || ParseError || std.fs.File.ReadError; + +/// Initializes a new `Object` from a wasm object file. +pub fn init(gpa: Allocator, file: std.fs.File, path: []const u8) InitError!Object { + var object: Object = .{ + .file = file, + .name = path, + }; + + var arena = std.heap.ArenaAllocator.init(gpa); + errdefer arena.deinit(); + + var is_object_file: bool = false; + try object.parse(arena.allocator(), file.reader(), &is_object_file); + object.arena = arena.state; + if (!is_object_file) return error.NotObjectFile; + + return object; +} + +/// Frees all memory of `Object` at once. The given `Allocator` must be +/// the same allocator that was used when `init` was called. +pub fn deinit(self: *Object, gpa: Allocator) void { + self.arena.promote(gpa).deinit(); + self.* = undefined; +} + +/// Finds the import within the list of imports from a given kind and index of that kind. +/// Asserts the import exists +pub fn findImport(self: *const Object, import_kind: std.wasm.ExternalKind, index: u32) *std.wasm.Import { + var i: u32 = 0; + return for (self.imports) |*import| { + if (std.meta.activeTag(import.kind) == import_kind) { + if (i == index) return import; + i += 1; + } + } else unreachable; // Only existing imports are allowed to be found +} + +/// Counts the entries of imported `kind` and returns the result +pub fn importedCountByKind(self: *const Object, kind: std.wasm.ExternalKind) u32 { + var i: u32 = 0; + return for (self.imports) |imp| { + if (@as(std.wasm.ExternalKind, imp.kind) == kind) i += 1; + } else i; +} + +/// Returns a table by a given id, rather than by its index within the list. +pub fn getTable(self: *const Object, id: u32) *std.wasm.Table { + return for (self.tables) |*table| { + if (table.table_idx == id) break table; + } else unreachable; +} + +/// Checks if the object file is an MVP version. +/// When that's the case, we check if there's an import table definiton with its name +/// set to '__indirect_function_table". When that's also the case, +/// we initialize a new table symbol that corresponds to that import and return that symbol. +/// +/// When the object file is *NOT* MVP, we return `null`. +fn checkLegacyIndirectFunctionTable(self: *Object) !?Symbol { + var table_count: usize = 0; + for (self.symtable) |sym| { + if (sym.tag == .table) table_count += 1; + } + + const import_table_count = self.importedCountByKind(.table); + + // For each import table, we also have a symbol so this is not a legacy object file + if (import_table_count == table_count) return null; + + if (table_count != 0) { + log.err("Expected a table entry symbol for each of the {d} table(s), but instead got {d} symbols.", .{ + import_table_count, + table_count, + }); + return error.MissingTableSymbols; + } + + // MVP object files cannot have any table definitions, only imports (for the indirect function table). + if (self.tables.len > 0) { + log.err("Unexpected table definition without representing table symbols.", .{}); + return error.UnexpectedTable; + } + + if (import_table_count != 1) { + log.err("Found more than one table import, but no representing table symbols", .{}); + return error.MissingTableSymbols; + } + + var table_import: std.wasm.Import = for (self.imports) |imp| { + if (imp.kind == .table) { + break imp; + } + } else unreachable; + + if (!std.mem.eql(u8, table_import.name, "__indirect_function_table")) { + log.err("Non-indirect function table import '{s}' is missing a corresponding symbol", .{table_import.name}); + return error.MissingTableSymbols; + } + + var table_symbol: Symbol = .{ + .flags = 0, + .name = table_import.name, + .tag = .table, + .index = 0, + }; + table_symbol.setFlag(.WASM_SYM_UNDEFINED); + table_symbol.setFlag(.WASM_SYM_NO_STRIP); + return table_symbol; +} + +/// Error set containing parsing errors. +/// Merged with reader's errorset by `Parser` +pub const ParseError = error{ + /// The magic byte is either missing or does not contain \0Asm + InvalidMagicByte, + /// The wasm version is either missing or does not match the supported version. + InvalidWasmVersion, + /// Expected the functype byte while parsing the Type section but did not find it. + ExpectedFuncType, + /// Missing an 'end' opcode when defining a constant expression. + MissingEndForExpression, + /// Missing an 'end' opcode at the end of a body expression. + MissingEndForBody, + /// The size defined in the section code mismatches with the actual payload size. + MalformedSection, + /// Stream has reached the end. Unreachable for caller and must be handled internally + /// by the parser. + EndOfStream, + /// Ran out of memory when allocating. + OutOfMemory, + /// A non-zero flag was provided for comdat info + UnexpectedValue, + /// An import symbol contains an index to an import that does + /// not exist, or no imports were defined. + InvalidIndex, + /// The section "linking" contains a version that is not supported. + UnsupportedVersion, + /// When reading the data in leb128 compressed format, its value was overflown. + Overflow, + /// Found table definitions but no corresponding table symbols + MissingTableSymbols, + /// Did not expect a table definiton, but did find one + UnexpectedTable, + /// Object file contains a feature that is unknown to the linker + UnknownFeature, +}; + +fn parse(self: *Object, gpa: Allocator, reader: anytype, is_object_file: *bool) Parser(@TypeOf(reader)).Error!void { + var parser = Parser(@TypeOf(reader)).init(self, reader); + return parser.parseObject(gpa, is_object_file); +} + +fn Parser(comptime ReaderType: type) type { + return struct { + const Self = @This(); + const Error = ReaderType.Error || ParseError; + + reader: std.io.CountingReader(ReaderType), + /// Object file we're building + object: *Object, + + fn init(object: *Object, reader: ReaderType) Self { + return .{ .object = object, .reader = std.io.countingReader(reader) }; + } + + /// Verifies that the first 4 bytes contains \0Asm + fn verifyMagicBytes(self: *Self) Error!void { + var magic_bytes: [4]u8 = undefined; + + try self.reader.reader().readNoEof(&magic_bytes); + if (!std.mem.eql(u8, &magic_bytes, &std.wasm.magic)) { + log.debug("Invalid magic bytes '{s}'", .{&magic_bytes}); + return error.InvalidMagicByte; + } + } + + fn parseObject(self: *Self, gpa: Allocator, is_object_file: *bool) Error!void { + try self.verifyMagicBytes(); + const version = try self.reader.reader().readIntLittle(u32); + + self.object.version = version; + var relocatable_data = std.ArrayList(RelocatableData).init(gpa); + defer relocatable_data.deinit(); + + var section_index: u32 = 0; + while (self.reader.reader().readByte()) |byte| : (section_index += 1) { + const len = try readLeb(u32, self.reader.reader()); + const reader = std.io.limitedReader(self.reader.reader(), len).reader(); + switch (@intToEnum(std.wasm.Section, byte)) { + .custom => { + const name_len = try readLeb(u32, reader); + const name = try gpa.alloc(u8, name_len); + defer gpa.free(name); + try reader.readNoEof(name); + + if (std.mem.eql(u8, name, "linking")) { + is_object_file.* = true; + try self.parseMetadata(gpa, reader.context.bytes_left); + } else if (std.mem.startsWith(u8, name, "reloc")) { + try self.parseRelocations(gpa); + } else if (std.mem.eql(u8, name, "target_features")) { + try self.parseFeatures(gpa); + } else { + try reader.skipBytes(reader.context.bytes_left, .{}); + } + }, + .type => { + for (try readVec(&self.object.types, reader, gpa)) |*type_val| { + if ((try reader.readByte()) != std.wasm.function_type) return error.ExpectedFuncType; + + for (try readVec(&type_val.params, reader, gpa)) |*param| { + param.* = try readEnum(std.wasm.Valtype, reader); + } + + for (try readVec(&type_val.returns, reader, gpa)) |*result| { + result.* = try readEnum(std.wasm.Valtype, reader); + } + } + try assertEnd(reader); + }, + .import => { + for (try readVec(&self.object.imports, reader, gpa)) |*import| { + const module_len = try readLeb(u32, reader); + const module_name = try gpa.alloc(u8, module_len); + try reader.readNoEof(module_name); + + const name_len = try readLeb(u32, reader); + const name = try gpa.alloc(u8, name_len); + try reader.readNoEof(name); + + const kind = try readEnum(std.wasm.ExternalKind, reader); + const kind_value: std.wasm.Import.Kind = switch (kind) { + .function => .{ .function = try readLeb(u32, reader) }, + .memory => .{ .memory = try readLimits(reader) }, + .global => .{ .global = .{ + .valtype = try readEnum(std.wasm.Valtype, reader), + .mutable = (try reader.readByte()) == 0x01, + } }, + .table => .{ .table = .{ + .reftype = try readEnum(std.wasm.RefType, reader), + .limits = try readLimits(reader), + } }, + }; + + import.* = .{ + .module_name = module_name, + .name = name, + .kind = kind_value, + }; + } + try assertEnd(reader); + }, + .function => { + for (try readVec(&self.object.functions, reader, gpa)) |*func| { + func.* = .{ .type_index = try readLeb(u32, reader) }; + } + try assertEnd(reader); + }, + .table => { + for (try readVec(&self.object.tables, reader, gpa)) |*table| { + table.* = .{ + .reftype = try readEnum(std.wasm.RefType, reader), + .limits = try readLimits(reader), + }; + } + try assertEnd(reader); + }, + .memory => { + for (try readVec(&self.object.memories, reader, gpa)) |*memory| { + memory.* = .{ .limits = try readLimits(reader) }; + } + try assertEnd(reader); + }, + .global => { + for (try readVec(&self.object.globals, reader, gpa)) |*global| { + global.* = .{ + .global_type = .{ + .valtype = try readEnum(std.wasm.Valtype, reader), + .mutable = (try reader.readByte()) == 0x01, + }, + .init = try readInit(reader), + }; + } + try assertEnd(reader); + }, + .@"export" => { + for (try readVec(&self.object.exports, reader, gpa)) |*exp| { + const name_len = try readLeb(u32, reader); + const name = try gpa.alloc(u8, name_len); + try reader.readNoEof(name); + exp.* = .{ + .name = name, + .kind = try readEnum(std.wasm.ExternalKind, reader), + .index = try readLeb(u32, reader), + }; + } + try assertEnd(reader); + }, + .start => { + self.object.start = try readLeb(u32, reader); + try assertEnd(reader); + }, + .element => { + for (try readVec(&self.object.elements, reader, gpa)) |*elem| { + elem.table_index = try readLeb(u32, reader); + elem.offset = try readInit(reader); + + for (try readVec(&elem.func_indexes, reader, gpa)) |*idx| { + idx.* = try readLeb(u32, reader); + } + } + try assertEnd(reader); + }, + .code => { + var start = reader.context.bytes_left; + var index: u32 = 0; + const count = try readLeb(u32, reader); + while (index < count) : (index += 1) { + const code_len = try readLeb(u32, reader); + const offset = @intCast(u32, start - reader.context.bytes_left); + const data = try gpa.alloc(u8, code_len); + try reader.readNoEof(data); + try relocatable_data.append(.{ + .type = .code, + .data = data.ptr, + .size = code_len, + .index = self.object.importedCountByKind(.function) + index, + .offset = offset, + .section_index = section_index, + }); + } + }, + .data => { + var start = reader.context.bytes_left; + var index: u32 = 0; + const count = try readLeb(u32, reader); + while (index < count) : (index += 1) { + const flags = try readLeb(u32, reader); + const data_offset = try readInit(reader); + _ = flags; // TODO: Do we need to check flags to detect passive/active memory? + _ = data_offset; + const data_len = try readLeb(u32, reader); + const offset = @intCast(u32, start - reader.context.bytes_left); + const data = try gpa.alloc(u8, data_len); + try reader.readNoEof(data); + try relocatable_data.append(.{ + .type = .data, + .data = data.ptr, + .size = data_len, + .index = index, + .offset = offset, + .section_index = section_index, + }); + } + }, + else => try self.reader.reader().skipBytes(len, .{}), + } + } else |err| switch (err) { + error.EndOfStream => {}, // finished parsing the file + else => |e| return e, + } + self.object.relocatable_data = relocatable_data.toOwnedSlice(); + } + + /// Based on the "features" custom section, parses it into a list of + /// features that tell the linker what features were enabled and may be mandatory + /// to be able to link. + /// Logs an info message when an undefined feature is detected. + fn parseFeatures(self: *Self, gpa: Allocator) !void { + const reader = self.reader.reader(); + for (try readVec(&self.object.features, reader, gpa)) |*feature| { + const prefix = try readEnum(types.Feature.Prefix, reader); + const name_len = try leb.readULEB128(u32, reader); + const name = try gpa.alloc(u8, name_len); + try reader.readNoEof(name); + + const tag = types.known_features.get(name) orelse { + log.err("Object file contains unknown feature: {s}", .{name}); + return error.UnknownFeature; + }; + feature.* = .{ + .prefix = prefix, + .tag = tag, + }; + } + } + + /// Parses a "reloc" custom section into a list of relocations. + /// The relocations are mapped into `Object` where the key is the section + /// they apply to. + fn parseRelocations(self: *Self, gpa: Allocator) !void { + const reader = self.reader.reader(); + const section = try leb.readULEB128(u32, reader); + const count = try leb.readULEB128(u32, reader); + const relocations = try gpa.alloc(types.Relocation, count); + + log.debug("Found {d} relocations for section ({d})", .{ + count, + section, + }); + + for (relocations) |*relocation| { + const rel_type = try leb.readULEB128(u8, reader); + const rel_type_enum = @intToEnum(types.Relocation.RelocationType, rel_type); + relocation.* = .{ + .relocation_type = rel_type_enum, + .offset = try leb.readULEB128(u32, reader), + .index = try leb.readULEB128(u32, reader), + .addend = if (rel_type_enum.addendIsPresent()) try leb.readULEB128(u32, reader) else null, + }; + log.debug("Found relocation: type({s}) offset({d}) index({d}) addend({d})", .{ + @tagName(relocation.relocation_type), + relocation.offset, + relocation.index, + relocation.addend, + }); + } + + try self.object.relocations.putNoClobber(gpa, section, relocations); + } + + /// Parses the "linking" custom section. Versions that are not + /// supported will be an error. `payload_size` is required to be able + /// to calculate the subsections we need to parse, as that data is not + /// available within the section itself. + fn parseMetadata(self: *Self, gpa: Allocator, payload_size: usize) !void { + var limited = std.io.limitedReader(self.reader.reader(), payload_size); + const limited_reader = limited.reader(); + + const version = try leb.readULEB128(u32, limited_reader); + log.debug("Link meta data version: {d}", .{version}); + if (version != 2) return error.UnsupportedVersion; + + while (limited.bytes_left > 0) { + try self.parseSubsection(gpa, limited_reader); + } + } + + /// Parses a `spec.Subsection`. + /// The `reader` param for this is to provide a `LimitedReader`, which allows + /// us to only read until a max length. + /// + /// `self` is used to provide access to other sections that may be needed, + /// such as access to the `import` section to find the name of a symbol. + fn parseSubsection(self: *Self, gpa: Allocator, reader: anytype) !void { + const sub_type = try leb.readULEB128(u8, reader); + log.debug("Found subsection: {s}", .{@tagName(@intToEnum(types.SubsectionType, sub_type))}); + const payload_len = try leb.readULEB128(u32, reader); + if (payload_len == 0) return; + + var limited = std.io.limitedReader(reader, payload_len); + const limited_reader = limited.reader(); + + // every subsection contains a 'count' field + const count = try leb.readULEB128(u32, limited_reader); + + switch (@intToEnum(types.SubsectionType, sub_type)) { + .WASM_SEGMENT_INFO => { + const segments = try gpa.alloc(types.Segment, count); + for (segments) |*segment| { + const name_len = try leb.readULEB128(u32, reader); + const name = try gpa.alloc(u8, name_len); + try reader.readNoEof(name); + segment.* = .{ + .name = name, + .alignment = try leb.readULEB128(u32, reader), + .flags = try leb.readULEB128(u32, reader), + }; + log.debug("Found segment: {s} align({d}) flags({b})", .{ + segment.name, + segment.alignment, + segment.flags, + }); + } + self.object.segment_info = segments; + }, + .WASM_INIT_FUNCS => { + const funcs = try gpa.alloc(types.InitFunc, count); + for (funcs) |*func| { + func.* = .{ + .priority = try leb.readULEB128(u32, reader), + .symbol_index = try leb.readULEB128(u32, reader), + }; + log.debug("Found function - prio: {d}, index: {d}", .{ func.priority, func.symbol_index }); + } + self.object.init_funcs = funcs; + }, + .WASM_COMDAT_INFO => { + const comdats = try gpa.alloc(types.Comdat, count); + for (comdats) |*comdat| { + const name_len = try leb.readULEB128(u32, reader); + const name = try gpa.alloc(u8, name_len); + try reader.readNoEof(name); + + const flags = try leb.readULEB128(u32, reader); + if (flags != 0) { + return error.UnexpectedValue; + } + + const symbol_count = try leb.readULEB128(u32, reader); + const symbols = try gpa.alloc(types.ComdatSym, symbol_count); + for (symbols) |*symbol| { + symbol.* = .{ + .kind = @intToEnum(types.ComdatSym.Type, try leb.readULEB128(u8, reader)), + .index = try leb.readULEB128(u32, reader), + }; + } + + comdat.* = .{ + .name = name, + .flags = flags, + .symbols = symbols, + }; + } + + self.object.comdat_info = comdats; + }, + .WASM_SYMBOL_TABLE => { + var symbols = try std.ArrayList(Symbol).initCapacity(gpa, count); + + var i: usize = 0; + while (i < count) : (i += 1) { + const symbol = symbols.addOneAssumeCapacity(); + symbol.* = try self.parseSymbol(gpa, reader); + log.debug("Found symbol: type({s}) name({s}) flags(0b{b:0>8})", .{ + @tagName(symbol.tag), + symbol.name, + symbol.flags, + }); + } + + // we found all symbols, check for indirect function table + // in case of an MVP object file + if (try self.object.checkLegacyIndirectFunctionTable()) |symbol| { + try symbols.append(symbol); + log.debug("Found legacy indirect function table. Created symbol", .{}); + } + + self.object.symtable = symbols.toOwnedSlice(); + }, + } + } + + /// Parses the symbol information based on its kind, + /// requires access to `Object` to find the name of a symbol when it's + /// an import and flag `WASM_SYM_EXPLICIT_NAME` is not set. + fn parseSymbol(self: *Self, gpa: Allocator, reader: anytype) !Symbol { + const tag = @intToEnum(Symbol.Tag, try leb.readULEB128(u8, reader)); + const flags = try leb.readULEB128(u32, reader); + var symbol: Symbol = .{ + .flags = flags, + .tag = tag, + .name = undefined, + .index = undefined, + }; + + switch (tag) { + .data => { + const name_len = try leb.readULEB128(u32, reader); + const name = try gpa.alloc(u8, name_len); + try reader.readNoEof(name); + symbol.name = name; + + // Data symbols only have the following fields if the symbol is defined + if (symbol.isDefined()) { + symbol.index = try leb.readULEB128(u32, reader); + // @TODO: We should verify those values + _ = try leb.readULEB128(u32, reader); + _ = try leb.readULEB128(u32, reader); + } + }, + .section => { + symbol.index = try leb.readULEB128(u32, reader); + symbol.name = @tagName(symbol.tag); + }, + else => { + symbol.index = try leb.readULEB128(u32, reader); + var maybe_import: ?*std.wasm.Import = null; + + const is_undefined = symbol.isUndefined(); + if (is_undefined) { + maybe_import = self.object.findImport(symbol.externalType(), symbol.index); + } + const explicit_name = symbol.hasFlag(.WASM_SYM_EXPLICIT_NAME); + if (!(is_undefined and !explicit_name)) { + const name_len = try leb.readULEB128(u32, reader); + const name = try gpa.alloc(u8, name_len); + try reader.readNoEof(name); + symbol.name = name; + } else { + symbol.name = maybe_import.?.name; + } + }, + } + return symbol; + } + }; +} + +/// First reads the count from the reader and then allocate +/// a slice of ptr child's element type. +fn readVec(ptr: anytype, reader: anytype, gpa: Allocator) ![]ElementType(@TypeOf(ptr)) { + const len = try readLeb(u32, reader); + const slice = try gpa.alloc(ElementType(@TypeOf(ptr)), len); + ptr.* = slice; + return slice; +} + +fn ElementType(comptime ptr: type) type { + return meta.Elem(meta.Child(ptr)); +} + +/// Uses either `readILEB128` or `readULEB128` depending on the +/// signedness of the given type `T`. +/// Asserts `T` is an integer. +fn readLeb(comptime T: type, reader: anytype) !T { + if (comptime std.meta.trait.isSignedInt(T)) { + return try leb.readILEB128(T, reader); + } else { + return try leb.readULEB128(T, reader); + } +} + +/// Reads an enum type from the given reader. +/// Asserts `T` is an enum +fn readEnum(comptime T: type, reader: anytype) !T { + switch (@typeInfo(T)) { + .Enum => |enum_type| return @intToEnum(T, try readLeb(enum_type.tag_type, reader)), + else => @compileError("T must be an enum. Instead was given type " ++ @typeName(T)), + } +} + +fn readLimits(reader: anytype) !std.wasm.Limits { + const flags = try readLeb(u1, reader); + const min = try readLeb(u32, reader); + return std.wasm.Limits{ + .min = min, + .max = if (flags == 0) null else try readLeb(u32, reader), + }; +} + +fn readInit(reader: anytype) !std.wasm.InitExpression { + const opcode = try reader.readByte(); + const init_expr: std.wasm.InitExpression = switch (@intToEnum(std.wasm.Opcode, opcode)) { + .i32_const => .{ .i32_const = try readLeb(i32, reader) }, + .global_get => .{ .global_get = try readLeb(u32, reader) }, + else => @panic("TODO: initexpression for other opcodes"), + }; + + if ((try readEnum(std.wasm.Opcode, reader)) != .end) return error.MissingEndForExpression; + return init_expr; +} + +fn assertEnd(reader: anytype) !void { + var buf: [1]u8 = undefined; + const len = try reader.read(&buf); + if (len != 0) return error.MalformedSection; + if (reader.context.bytes_left != 0) return error.MalformedSection; +} + +/// Parses an object file into atoms, for code and data sections +pub fn parseIntoAtoms(self: *Object, gpa: Allocator, object_index: u16, wasm_bin: *Wasm) !void { + log.debug("Parsing data section into atoms", .{}); + const Key = struct { + kind: Symbol.Tag, + index: u32, + }; + var symbol_for_segment = std.AutoArrayHashMap(Key, u32).init(gpa); + defer symbol_for_segment.deinit(); + + for (self.symtable) |symbol, symbol_index| { + switch (symbol.tag) { + .function, .data => if (!symbol.isUndefined()) { + try symbol_for_segment.putNoClobber( + .{ .kind = symbol.tag, .index = symbol.index }, + @intCast(u32, symbol_index), + ); + }, + else => continue, + } + } + + for (self.relocatable_data) |relocatable_data, index| { + const sym_index = symbol_for_segment.get(.{ + .kind = relocatable_data.getSymbolKind(), + .index = @intCast(u32, relocatable_data.index), + }) orelse continue; // encountered a segment we do not create an atom for + const final_index = try wasm_bin.getMatchingSegment(gpa, object_index, @intCast(u32, index)); + + const atom = try Atom.create(gpa); + errdefer atom.deinit(gpa); + + try wasm_bin.managed_atoms.append(gpa, atom); + atom.file = object_index; + atom.size = relocatable_data.size; + atom.alignment = relocatable_data.getAlignment(self); + atom.sym_index = sym_index; + + const relocations: []types.Relocation = self.relocations.get(relocatable_data.section_index) orelse &.{}; + for (relocations) |*relocation| { + if (isInbetween(relocatable_data.offset, atom.size, relocation.offset)) { + // set the offset relative to the offset of the segment itself, + // rather than within the entire section. + relocation.offset -= relocatable_data.offset; + try atom.relocs.append(gpa, relocation.*); + + if (relocation.isTableIndex()) { + try wasm_bin.elements.appendSymbol(gpa, .{ + .file = object_index, + .sym_index = relocation.index, + }); + } + } + } + + // TODO: Replace `atom.code` from an existing slice to a pointer to the data + try atom.code.appendSlice(gpa, relocatable_data.data[0..relocatable_data.size]); + + const segment: *Wasm.Segment = &wasm_bin.segments.items[final_index]; + segment.alignment = std.math.max(segment.alignment, atom.alignment); + segment.size = std.mem.alignForwardGeneric( + u32, + std.mem.alignForwardGeneric(u32, segment.size, atom.alignment) + atom.size, + segment.alignment, + ); + + if (wasm_bin.atoms.getPtr(final_index)) |last| { + last.*.next = atom; + atom.prev = last.*; + last.* = atom; + } else { + try wasm_bin.atoms.putNoClobber(gpa, final_index, atom); + } + log.debug("Parsed into atom: '{s}'", .{self.symtable[atom.sym_index].name}); + } +} + +/// Verifies if a given value is in between a minimum -and maximum value. +/// The maxmimum value is calculated using the length, both start and end are inclusive. +inline fn isInbetween(min: u32, length: u32, value: u32) bool { + return value >= min and value <= min + length; +}