From aca911ca18d43fddcc3d36f44d3750d07b53fb56 Mon Sep 17 00:00:00 2001 From: Luuk de Gram Date: Fri, 19 Aug 2022 19:26:44 +0200 Subject: [PATCH 1/4] wasm/archive: correctly parse long file names Wasm archive files are encoded the same way as GNU. This means that the header notates the character index within the long file name list rather than the length of the name. The entire name is then delimited by an LF character (0x0a). This also makes a cosmetic update to remove the `self` name, and rather label it as `archive` instead. --- src/link/Wasm/Archive.zig | 118 ++++++++++++++++++++++---------------- 1 file changed, 67 insertions(+), 51 deletions(-) diff --git a/src/link/Wasm/Archive.zig b/src/link/Wasm/Archive.zig index e214d1b124..4a0abb1dfa 100644 --- a/src/link/Wasm/Archive.zig +++ b/src/link/Wasm/Archive.zig @@ -15,6 +15,12 @@ name: []const u8, header: ar_hdr = undefined, +/// A list of long file names, delimited by a LF character (0x0a). +/// This is stored as a single slice of bytes, as the header-names +/// point to the character index of a file name, rather than the index +/// in the list. +long_file_names: []const u8 = undefined, + /// Parsed table of contents. /// Each symbol name points to a list of all definition /// sites within the current static archive. @@ -53,32 +59,33 @@ const ar_hdr = extern struct { /// Always contains ARFMAG. ar_fmag: [2]u8, - const NameOrLength = union(enum) { - Name: []const u8, - Length: u32, + const NameOrIndex = union(enum) { + name: []const u8, + index: u32, }; - fn nameOrLength(self: ar_hdr) !NameOrLength { - const value = getValue(&self.ar_name); + + fn nameOrIndex(archive: ar_hdr) !NameOrIndex { + const value = getValue(&archive.ar_name); const slash_index = mem.indexOfScalar(u8, value, '/') orelse return error.MalformedArchive; const len = value.len; if (slash_index == len - 1) { // Name stored directly - return NameOrLength{ .Name = value }; + return NameOrIndex{ .name = value }; } else { // Name follows the header directly and its length is encoded in // the name field. - const length = try std.fmt.parseInt(u32, value[slash_index + 1 ..], 10); - return NameOrLength{ .Length = length }; + const index = try std.fmt.parseInt(u32, value[slash_index + 1 ..], 10); + return NameOrIndex{ .index = index }; } } - fn date(self: ar_hdr) !u64 { - const value = getValue(&self.ar_date); + fn date(archive: ar_hdr) !u64 { + const value = getValue(&archive.ar_date); return std.fmt.parseInt(u64, value, 10); } - fn size(self: ar_hdr) !u32 { - const value = getValue(&self.ar_size); + fn size(archive: ar_hdr) !u32 { + const value = getValue(&archive.ar_size); return std.fmt.parseInt(u32, value, 10); } @@ -87,18 +94,19 @@ const ar_hdr = extern struct { } }; -pub fn deinit(self: *Archive, allocator: Allocator) void { - for (self.toc.keys()) |*key| { +pub fn deinit(archive: *Archive, allocator: Allocator) void { + for (archive.toc.keys()) |*key| { allocator.free(key.*); } - for (self.toc.values()) |*value| { + for (archive.toc.values()) |*value| { value.deinit(allocator); } - self.toc.deinit(allocator); + archive.toc.deinit(allocator); + allocator.free(archive.long_file_names); } -pub fn parse(self: *Archive, allocator: Allocator) !void { - const reader = self.file.reader(); +pub fn parse(archive: *Archive, allocator: Allocator) !void { + const reader = archive.file.reader(); const magic = try reader.readBytesNoEof(SARMAG); if (!mem.eql(u8, &magic, ARMAG)) { @@ -106,38 +114,31 @@ pub fn parse(self: *Archive, allocator: Allocator) !void { return error.NotArchive; } - self.header = try reader.readStruct(ar_hdr); - if (!mem.eql(u8, &self.header.ar_fmag, ARFMAG)) { - log.debug("invalid header delimiter: expected '{s}', found '{s}'", .{ ARFMAG, self.header.ar_fmag }); + archive.header = try reader.readStruct(ar_hdr); + if (!mem.eql(u8, &archive.header.ar_fmag, ARFMAG)) { + log.debug("invalid header delimiter: expected '{s}', found '{s}'", .{ ARFMAG, archive.header.ar_fmag }); return error.NotArchive; } - try self.parseTableOfContents(allocator, reader); + try archive.parseTableOfContents(allocator, reader); + try archive.parseNameTable(allocator, reader); } -fn parseName(allocator: Allocator, header: ar_hdr, reader: anytype) ![]u8 { - const name_or_length = try header.nameOrLength(); - var name: []u8 = undefined; - switch (name_or_length) { - .Name => |n| { - name = try allocator.dupe(u8, n); - }, - .Length => |len| { - var n = try allocator.alloc(u8, len); - defer allocator.free(n); - try reader.readNoEof(n); - const actual_len = mem.indexOfScalar(u8, n, @as(u8, 0)) orelse n.len; - name = try allocator.dupe(u8, n[0..actual_len]); +fn parseName(archive: *const Archive, header: ar_hdr) ![]const u8 { + const name_or_index = try header.nameOrIndex(); + switch (name_or_index) { + .name => |name| return name, + .index => |index| { + const name = mem.sliceTo(archive.long_file_names[index..], 0x0a); + return mem.trimRight(u8, name, "/"); }, } - return name; } -fn parseTableOfContents(self: *Archive, allocator: Allocator, reader: anytype) !void { - log.debug("parsing table of contents for archive file '{s}'", .{self.name}); +fn parseTableOfContents(archive: *Archive, allocator: Allocator, reader: anytype) !void { // size field can have extra spaces padded in front as well as the end, // so we trim those first before parsing the ASCII value. - const size_trimmed = std.mem.trim(u8, &self.header.ar_size, " "); + const size_trimmed = mem.trim(u8, &archive.header.ar_size, " "); const sym_tab_size = try std.fmt.parseInt(u32, size_trimmed, 10); const num_symbols = try reader.readIntBig(u32); @@ -157,7 +158,7 @@ fn parseTableOfContents(self: *Archive, allocator: Allocator, reader: anytype) ! var i: usize = 0; while (i < sym_tab.len) { - const string = std.mem.sliceTo(sym_tab[i..], 0); + const string = mem.sliceTo(sym_tab[i..], 0); if (string.len == 0) { i += 1; continue; @@ -165,7 +166,7 @@ fn parseTableOfContents(self: *Archive, allocator: Allocator, reader: anytype) ! i += string.len; const name = try allocator.dupe(u8, string); errdefer allocator.free(name); - const gop = try self.toc.getOrPut(allocator, name); + const gop = try archive.toc.getOrPut(allocator, name); if (gop.found_existing) { allocator.free(name); } else { @@ -175,31 +176,46 @@ fn parseTableOfContents(self: *Archive, allocator: Allocator, reader: anytype) ! } } +fn parseNameTable(archive: *Archive, allocator: Allocator, reader: anytype) !void { + const header: ar_hdr = try reader.readStruct(ar_hdr); + if (!mem.eql(u8, &header.ar_fmag, ARFMAG)) { + log.err("invalid header delimiter: expected '{s}', found '{s}'", .{ ARFMAG, header.ar_fmag }); + return error.MalformedArchive; + } + if (!mem.eql(u8, header.ar_name[0..2], "//")) { + log.err("invalid archive. Long name table missing", .{}); + return error.MalformedArchive; + } + const table_size = try header.size(); + const long_file_names = try allocator.alloc(u8, table_size); + errdefer allocator.free(long_file_names); + try reader.readNoEof(long_file_names); + archive.long_file_names = long_file_names; +} + /// From a given file offset, starts reading for a file header. /// When found, parses the object file into an `Object` and returns it. -pub fn parseObject(self: Archive, allocator: Allocator, file_offset: u32) !Object { - try self.file.seekTo(file_offset); - const reader = self.file.reader(); +pub fn parseObject(archive: Archive, allocator: Allocator, file_offset: u32) !Object { + try archive.file.seekTo(file_offset); + const reader = archive.file.reader(); const header = try reader.readStruct(ar_hdr); - const current_offset = try self.file.getPos(); - try self.file.seekTo(0); + const current_offset = try archive.file.getPos(); + try archive.file.seekTo(0); if (!mem.eql(u8, &header.ar_fmag, ARFMAG)) { log.err("invalid header delimiter: expected '{s}', found '{s}'", .{ ARFMAG, header.ar_fmag }); return error.MalformedArchive; } - const object_name = try parseName(allocator, header, reader); - defer allocator.free(object_name); - + const object_name = try archive.parseName(header); const name = name: { var buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined; - const path = try std.os.realpath(self.name, &buffer); + const path = try std.os.realpath(archive.name, &buffer); break :name try std.fmt.allocPrint(allocator, "{s}({s})", .{ path, object_name }); }; defer allocator.free(name); - const object_file = try std.fs.cwd().openFile(self.name, .{}); + const object_file = try std.fs.cwd().openFile(archive.name, .{}); errdefer object_file.close(); try object_file.seekTo(current_offset); From 1544625df3fb4732f2cd63d1e7c4c738d1b7d0c0 Mon Sep 17 00:00:00 2001 From: Luuk de Gram Date: Fri, 19 Aug 2022 21:15:16 +0200 Subject: [PATCH 2/4] wasm/Object: parse using the correct file size When an object file is being parsed from within an archive file, we provide the object file size to ensure we do not read past the object file. This is because follow up object files can exist there, as well as an LF character to notate the end of the file was reached. Such a character is invalid within the object file. This also fixes a bug in getting the function/global type for defined globals/functions from object files as it was missing the substraction with the import count of the respective type. --- src/link/Wasm.zig | 13 ++++++++----- src/link/Wasm/Archive.zig | 3 ++- src/link/Wasm/Object.zig | 23 +++++++++++++++++++++-- 3 files changed, 31 insertions(+), 8 deletions(-) diff --git a/src/link/Wasm.zig b/src/link/Wasm.zig index 626f176652..37e25e4360 100644 --- a/src/link/Wasm.zig +++ b/src/link/Wasm.zig @@ -378,7 +378,7 @@ fn parseObjectFile(self: *Wasm, path: []const u8) !bool { const file = try fs.cwd().openFile(path, .{}); errdefer file.close(); - var object = Object.create(self.base.allocator, file, path) catch |err| switch (err) { + var object = Object.create(self.base.allocator, file, path, null) catch |err| switch (err) { error.InvalidMagicByte, error.NotObjectFile => return false, else => |e| return e, }; @@ -595,8 +595,8 @@ fn resolveSymbolsInArchives(self: *Wasm) !void { // Parse object and and resolve symbols again before we check remaining // undefined symbols. const object_file_index = @intCast(u16, self.objects.items.len); - const object = try self.objects.addOne(self.base.allocator); - object.* = try archive.parseObject(self.base.allocator, offset.items[0]); + var object = try archive.parseObject(self.base.allocator, offset.items[0]); + try self.objects.append(self.base.allocator, object); try self.resolveSymbolsInObject(object_file_index); // continue loop for any remaining undefined symbols that still exist @@ -860,7 +860,8 @@ fn getGlobalType(self: *const Wasm, loc: SymbolLoc) wasm.GlobalType { if (is_undefined) { return obj.findImport(.global, symbol.index).kind.global; } - return obj.globals[symbol.index].global_type; + const import_global_count = obj.importedCountByKind(.global); + return obj.globals[symbol.index - import_global_count].global_type; } if (is_undefined) { return self.imports.get(loc).?.kind.global; @@ -880,7 +881,9 @@ fn getFunctionSignature(self: *const Wasm, loc: SymbolLoc) wasm.Type { const ty_index = obj.findImport(.function, symbol.index).kind.function; return obj.func_types[ty_index]; } - return obj.func_types[obj.functions[symbol.index].type_index]; + const import_function_count = obj.importedCountByKind(.function); + const type_index = obj.functions[symbol.index - import_function_count].type_index; + return obj.func_types[type_index]; } if (is_undefined) { const ty_index = self.imports.get(loc).?.kind.function; diff --git a/src/link/Wasm/Archive.zig b/src/link/Wasm/Archive.zig index 4a0abb1dfa..c80d26d17d 100644 --- a/src/link/Wasm/Archive.zig +++ b/src/link/Wasm/Archive.zig @@ -218,6 +218,7 @@ pub fn parseObject(archive: Archive, allocator: Allocator, file_offset: u32) !Ob const object_file = try std.fs.cwd().openFile(archive.name, .{}); errdefer object_file.close(); + const object_file_size = try header.size(); try object_file.seekTo(current_offset); - return Object.create(allocator, object_file, name); + return Object.create(allocator, object_file, name, object_file_size); } diff --git a/src/link/Wasm/Object.zig b/src/link/Wasm/Object.zig index a1308ec045..50827ca9fb 100644 --- a/src/link/Wasm/Object.zig +++ b/src/link/Wasm/Object.zig @@ -105,14 +105,33 @@ pub const InitError = error{NotObjectFile} || ParseError || std.fs.File.ReadErro /// Initializes a new `Object` from a wasm object file. /// This also parses and verifies the object file. -pub fn create(gpa: Allocator, file: std.fs.File, name: []const u8) InitError!Object { +/// When a max size is given, will only parse up to the given size, +/// else will read until the end of the file. +pub fn create(gpa: Allocator, file: std.fs.File, name: []const u8, maybe_max_size: ?usize) InitError!Object { var object: Object = .{ .file = file, .name = try gpa.dupe(u8, name), }; var is_object_file: bool = false; - try object.parse(gpa, file.reader(), &is_object_file); + const size = maybe_max_size orelse size: { + errdefer gpa.free(object.name); + const stat = try file.stat(); + break :size @intCast(usize, stat.size); + }; + + const file_contents = try gpa.alloc(u8, size); + defer gpa.free(file_contents); + var file_reader = file.reader(); + var read: usize = 0; + while (read < size) { + const n = try file_reader.read(file_contents[read..]); + std.debug.assert(n != 0); + read += n; + } + var fbs = std.io.fixedBufferStream(file_contents); + + try object.parse(gpa, fbs.reader(), &is_object_file); errdefer object.deinit(gpa); if (!is_object_file) return error.NotObjectFile; From 7fe2a3d104319835127456d02ba73efc5fe5f207 Mon Sep 17 00:00:00 2001 From: Luuk de Gram Date: Sat, 20 Aug 2022 15:46:39 +0200 Subject: [PATCH 3/4] test/link: add wasm linker-test for archives Adds a test case that will pull-in compiler-rt symbols, and therefore link with the compiler-rt archive file. --- lib/std/build.zig | 7 +------ test/link.zig | 5 +++++ test/link/wasm/archive/build.zig | 27 +++++++++++++++++++++++++++ test/link/wasm/archive/main.zig | 6 ++++++ 4 files changed, 39 insertions(+), 6 deletions(-) create mode 100644 test/link/wasm/archive/build.zig create mode 100644 test/link/wasm/archive/main.zig diff --git a/lib/std/build.zig b/lib/std/build.zig index 57f0d126d8..1cbf188d2c 100644 --- a/lib/std/build.zig +++ b/lib/std/build.zig @@ -1898,12 +1898,7 @@ pub const LibExeObjStep = struct { pub fn runEmulatable(exe: *LibExeObjStep) *EmulatableRunStep { assert(exe.kind == .exe or exe.kind == .test_exe); - const run_step = EmulatableRunStep.create(exe.builder.fmt("run {s}", .{exe.step.name}), exe); - if (exe.vcpkg_bin_path) |path| { - run_step.addPathDir(path); - } - - return run_step; + return EmulatableRunStep.create(exe.builder, exe.builder.fmt("run {s}", .{exe.step.name}), exe); } pub fn checkObject(self: *LibExeObjStep, obj_format: std.Target.ObjectFormat) *CheckObjectStep { diff --git a/test/link.zig b/test/link.zig index 88b370f343..215a0511fc 100644 --- a/test/link.zig +++ b/test/link.zig @@ -47,6 +47,11 @@ fn addWasmCases(cases: *tests.StandaloneContext) void { .build_modes = true, .requires_stage2 = true, }); + + cases.addBuildFile("test/link/wasm/archive/build.zig", .{ + .build_modes = true, + .requires_stage2 = true, + }); } fn addMachOCases(cases: *tests.StandaloneContext) void { diff --git a/test/link/wasm/archive/build.zig b/test/link/wasm/archive/build.zig new file mode 100644 index 0000000000..95ce444659 --- /dev/null +++ b/test/link/wasm/archive/build.zig @@ -0,0 +1,27 @@ +const std = @import("std"); +const Builder = std.build.Builder; + +pub fn build(b: *Builder) void { + const mode = b.standardReleaseOptions(); + + const test_step = b.step("test", "Test"); + test_step.dependOn(b.getInstallStep()); + + // The code in question will pull-in compiler-rt, + // and therefore link with its archive file. + const lib = b.addSharedLibrary("main", "main.zig", .unversioned); + lib.setBuildMode(mode); + lib.setTarget(.{ .cpu_arch = .wasm32, .os_tag = .freestanding }); + lib.use_llvm = false; + lib.use_stage1 = false; + lib.use_lld = false; + + const check = lib.checkObject(.wasm); + check.checkStart("Section import"); + check.checkNext("entries 1"); // __truncsfhf2 should have been resolved, so only 1 import (compiler-rt's memcpy). + + check.checkStart("Section custom"); + check.checkNext("name __truncsfhf2"); // Ensure it was imported and resolved + + test_step.dependOn(&check.step); +} diff --git a/test/link/wasm/archive/main.zig b/test/link/wasm/archive/main.zig new file mode 100644 index 0000000000..29be3af0ac --- /dev/null +++ b/test/link/wasm/archive/main.zig @@ -0,0 +1,6 @@ +export fn foo() void { + var a: f16 = 2.2; + // this will pull-in compiler-rt + var b = @trunc(a); + _ = b; +} From d4c56804dfd3f53de1f0950fac9478b096a2a0b4 Mon Sep 17 00:00:00 2001 From: Luuk de Gram Date: Sun, 21 Aug 2022 16:42:11 +0200 Subject: [PATCH 4/4] std: fix EmulatableRunStep Fixes a compilation error when using the `EmulatableRunStep` that is being generated from a step directly using `runEmulatable`. --- lib/std/build.zig | 6 +++++- lib/std/build/RunStep.zig | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/lib/std/build.zig b/lib/std/build.zig index 1cbf188d2c..b47f962a9d 100644 --- a/lib/std/build.zig +++ b/lib/std/build.zig @@ -1898,7 +1898,11 @@ pub const LibExeObjStep = struct { pub fn runEmulatable(exe: *LibExeObjStep) *EmulatableRunStep { assert(exe.kind == .exe or exe.kind == .test_exe); - return EmulatableRunStep.create(exe.builder, exe.builder.fmt("run {s}", .{exe.step.name}), exe); + const run_step = EmulatableRunStep.create(exe.builder, exe.builder.fmt("run {s}", .{exe.step.name}), exe); + if (exe.vcpkg_bin_path) |path| { + RunStep.addPathDirInternal(&run_step.step, exe.builder, path); + } + return run_step; } pub fn checkObject(self: *LibExeObjStep, obj_format: std.Target.ObjectFormat) *CheckObjectStep { diff --git a/lib/std/build/RunStep.zig b/lib/std/build/RunStep.zig index 1e22fd10a3..168f5d9d58 100644 --- a/lib/std/build/RunStep.zig +++ b/lib/std/build/RunStep.zig @@ -101,7 +101,7 @@ pub fn addPathDir(self: *RunStep, search_path: []const u8) void { } /// For internal use only, users of `RunStep` should use `addPathDir` directly. -fn addPathDirInternal(step: *Step, builder: *Builder, search_path: []const u8) void { +pub fn addPathDirInternal(step: *Step, builder: *Builder, search_path: []const u8) void { const env_map = getEnvMapInternal(step, builder.allocator); const key = "PATH";