From ed06a78f35e7281289249b0d0c119bd64845dd51 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Mon, 28 Sep 2020 23:20:14 -0700 Subject: [PATCH] stage2: WASM LLD linking --- BRANCH_TODO | 1 - lib/std/target.zig | 2 +- src/Compilation.zig | 4 +- src/link/Wasm.zig | 210 +++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 212 insertions(+), 5 deletions(-) diff --git a/BRANCH_TODO b/BRANCH_TODO index a579d4a5eb..5c0b4ce27a 100644 --- a/BRANCH_TODO +++ b/BRANCH_TODO @@ -1,5 +1,4 @@ * MachO LLD linking - * WASM LLD linking * audit the CLI options for stage2 * audit the base cache hash * On operating systems that support it, do an execve for `zig test` and `zig run` rather than child process. diff --git a/lib/std/target.zig b/lib/std/target.zig index 65e9f75457..e1a7e1a2bf 100644 --- a/lib/std/target.zig +++ b/lib/std/target.zig @@ -1486,7 +1486,7 @@ pub const Target = struct { /// Return whether or not the given host target is capable of executing natively executables /// of the other target. - pub fn canExecBinariesOf(host_target: std.Target, binary_target: std.Target) bool { + pub fn canExecBinariesOf(host_target: Target, binary_target: Target) bool { if (host_target.os.tag != binary_target.os.tag) return false; diff --git a/src/Compilation.zig b/src/Compilation.zig index 314b7cdbdd..4bc9679ce2 100644 --- a/src/Compilation.zig +++ b/src/Compilation.zig @@ -2418,7 +2418,7 @@ fn buildStaticLibFromZig(comp: *Compilation, src_basename: []const u8, out: *?CR const bin_basename = try std.zig.binNameAlloc(comp.gpa, .{ .root_name = root_name, .target = target, - .output_mode = .Lib, + .output_mode = .Obj, }); defer comp.gpa.free(bin_basename); @@ -2441,7 +2441,7 @@ fn buildStaticLibFromZig(comp: *Compilation, src_basename: []const u8, out: *?CR .target = target, .root_name = root_name, .root_pkg = &root_pkg, - .output_mode = .Lib, + .output_mode = .Obj, .rand = comp.rand, .libc_installation = comp.bin_file.options.libc_installation, .emit_bin = emit_bin, diff --git a/src/link/Wasm.zig b/src/link/Wasm.zig index 4cff09ef69..509544c94f 100644 --- a/src/link/Wasm.zig +++ b/src/link/Wasm.zig @@ -1,10 +1,12 @@ const Wasm = @This(); const std = @import("std"); +const mem = std.mem; const Allocator = std.mem.Allocator; const assert = std.debug.assert; const fs = std.fs; const leb = std.debug.leb; +const log = std.log.scoped(.link); const Module = @import("../Module.zig"); const Compilation = @import("../Compilation.zig"); @@ -12,6 +14,7 @@ const codegen = @import("../codegen/wasm.zig"); const link = @import("../link.zig"); const trace = @import("../tracy.zig").trace; const build_options = @import("build_options"); +const Cache = @import("../Cache.zig"); /// Various magic numbers defined by the wasm spec const spec = struct { @@ -137,7 +140,7 @@ pub fn freeDecl(self: *Wasm, decl: *Module.Decl) void { pub fn flush(self: *Wasm, comp: *Compilation) !void { if (build_options.have_llvm and self.base.options.use_lld) { - return error.WasmLinkingWithLLDUnimplemented; + return self.linkWithLLD(comp); } else { return self.flushModule(comp); } @@ -248,6 +251,211 @@ pub fn flushModule(self: *Wasm, comp: *Compilation) !void { } } +fn linkWithLLD(self: *Wasm, comp: *Compilation) !void { + const tracy = trace(@src()); + defer tracy.end(); + + var arena_allocator = std.heap.ArenaAllocator.init(self.base.allocator); + defer arena_allocator.deinit(); + const arena = &arena_allocator.allocator; + + const directory = self.base.options.emit.?.directory; // Just an alias to make it shorter to type. + + // If there is no Zig code to compile, then we should skip flushing the output file because it + // will not be part of the linker line anyway. + const module_obj_path: ?[]const u8 = if (self.base.options.module) |module| blk: { + const use_stage1 = build_options.is_stage1 and self.base.options.use_llvm; + if (use_stage1) { + const obj_basename = try std.zig.binNameAlloc(arena, .{ + .root_name = self.base.options.root_name, + .target = self.base.options.target, + .output_mode = .Obj, + }); + const o_directory = self.base.options.module.?.zig_cache_artifact_directory; + const full_obj_path = try o_directory.join(arena, &[_][]const u8{obj_basename}); + break :blk full_obj_path; + } + + try self.flushModule(comp); + const obj_basename = self.base.intermediary_basename.?; + const full_obj_path = try directory.join(arena, &[_][]const u8{obj_basename}); + break :blk full_obj_path; + } else null; + + const target = self.base.options.target; + + const id_symlink_basename = "lld.id"; + + var man: Cache.Manifest = undefined; + defer if (!self.base.options.disable_lld_caching) man.deinit(); + + var digest: [Cache.hex_digest_len]u8 = undefined; + + if (!self.base.options.disable_lld_caching) { + man = comp.cache_parent.obtain(); + + // We are about to obtain this lock, so here we give other processes a chance first. + self.base.releaseLock(); + + try man.addListOfFiles(self.base.options.objects); + for (comp.c_object_table.items()) |entry| { + _ = try man.addFile(entry.key.status.success.object_path, null); + } + try man.addOptionalFile(module_obj_path); + man.hash.addOptional(self.base.options.stack_size_override); + man.hash.addListOfBytes(self.base.options.extra_lld_args); + + // We don't actually care whether it's a cache hit or miss; we just need the digest and the lock. + _ = try man.hit(); + digest = man.final(); + + var prev_digest_buf: [digest.len]u8 = undefined; + const prev_digest: []u8 = directory.handle.readLink(id_symlink_basename, &prev_digest_buf) catch |err| blk: { + log.debug("WASM LLD new_digest={} readlink error: {}", .{ digest, @errorName(err) }); + // Handle this as a cache miss. + break :blk prev_digest_buf[0..0]; + }; + if (mem.eql(u8, prev_digest, &digest)) { + log.debug("WASM LLD digest={} match - skipping invocation", .{digest}); + // Hot diggity dog! The output binary is already there. + self.base.lock = man.toOwnedLock(); + return; + } + log.debug("WASM LLD prev_digest={} new_digest={}", .{ prev_digest, digest }); + + // We are about to change the output file to be different, so we invalidate the build hash now. + directory.handle.deleteFile(id_symlink_basename) catch |err| switch (err) { + error.FileNotFound => {}, + else => |e| return e, + }; + } + + const is_obj = self.base.options.output_mode == .Obj; + + // Create an LLD command line and invoke it. + var argv = std.ArrayList([]const u8).init(self.base.allocator); + defer argv.deinit(); + // Even though we're calling LLD as a library it thinks the first argument is its own exe name. + try argv.append("lld"); + if (is_obj) { + try argv.append("-r"); + } + + try argv.append("-error-limit=0"); + + if (self.base.options.output_mode == .Exe) { + // Increase the default stack size to a more reasonable value of 1MB instead of + // the default of 1 Wasm page being 64KB, unless overriden by the user. + try argv.append("-z"); + const stack_size = self.base.options.stack_size_override orelse 1048576; + const arg = try std.fmt.allocPrint(arena, "stack-size={d}", .{stack_size}); + try argv.append(arg); + + // Put stack before globals so that stack overflow results in segfault immediately + // before corrupting globals. See https://github.com/ziglang/zig/issues/4496 + try argv.append("--stack-first"); + } else { + try argv.append("--no-entry"); // So lld doesn't look for _start. + try argv.append("--export-all"); + } + try argv.appendSlice(&[_][]const u8{ + "--allow-undefined", + "-o", + try directory.join(arena, &[_][]const u8{self.base.options.emit.?.sub_path}), + }); + + // Positional arguments to the linker such as object files. + try argv.appendSlice(self.base.options.objects); + + for (comp.c_object_table.items()) |entry| { + try argv.append(entry.key.status.success.object_path); + } + if (module_obj_path) |p| { + try argv.append(p); + } + + if (self.base.options.output_mode == .Exe and !self.base.options.is_compiler_rt_or_libc) { + if (!self.base.options.link_libc) { + try argv.append(comp.libc_static_lib.?.full_object_path); + } + try argv.append(comp.compiler_rt_static_lib.?.full_object_path); + } + + if (self.base.options.verbose_link) { + Compilation.dump_argv(argv.items); + } + + // TODO allocSentinel crashed stage1 so this is working around it. + const new_argv_with_sentinel = try arena.alloc(?[*:0]const u8, argv.items.len + 1); + new_argv_with_sentinel[argv.items.len] = null; + const new_argv = new_argv_with_sentinel[0..argv.items.len :null]; + for (argv.items) |arg, i| { + new_argv[i] = try arena.dupeZ(u8, arg); + } + + var stderr_context: LLDContext = .{ + .wasm = self, + .data = std.ArrayList(u8).init(self.base.allocator), + }; + defer stderr_context.data.deinit(); + var stdout_context: LLDContext = .{ + .wasm = self, + .data = std.ArrayList(u8).init(self.base.allocator), + }; + defer stdout_context.data.deinit(); + const llvm = @import("../llvm.zig"); + const ok = llvm.Link( + .Wasm, + new_argv.ptr, + new_argv.len, + append_diagnostic, + @ptrToInt(&stdout_context), + @ptrToInt(&stderr_context), + ); + if (stderr_context.oom or stdout_context.oom) return error.OutOfMemory; + if (stdout_context.data.items.len != 0) { + std.log.warn("unexpected LLD stdout: {}", .{stdout_context.data.items}); + } + if (!ok) { + // TODO parse this output and surface with the Compilation API rather than + // directly outputting to stderr here. + std.debug.print("{}", .{stderr_context.data.items}); + return error.LLDReportedFailure; + } + if (stderr_context.data.items.len != 0) { + std.log.warn("unexpected LLD stderr: {}", .{stderr_context.data.items}); + } + + if (!self.base.options.disable_lld_caching) { + // Update the dangling symlink with the digest. If it fails we can continue; it only + // means that the next invocation will have an unnecessary cache miss. + directory.handle.symLink(&digest, id_symlink_basename, .{}) catch |err| { + std.log.warn("failed to save linking hash digest symlink: {}", .{@errorName(err)}); + }; + // Again failure here only means an unnecessary cache miss. + man.writeManifest() catch |err| { + std.log.warn("failed to write cache manifest when linking: {}", .{@errorName(err)}); + }; + // We hang on to this lock so that the output file path can be used without + // other processes clobbering it. + self.base.lock = man.toOwnedLock(); + } +} + +const LLDContext = struct { + data: std.ArrayList(u8), + wasm: *Wasm, + oom: bool = false, +}; + +fn append_diagnostic(context: usize, ptr: [*]const u8, len: usize) callconv(.C) void { + const lld_context = @intToPtr(*LLDContext, context); + const msg = ptr[0..len]; + lld_context.data.appendSlice(msg) catch |err| switch (err) { + error.OutOfMemory => lld_context.oom = true, + }; +} + /// Get the current index of a given Decl in the function list /// TODO: we could maintain a hash map to potentially make this fn getFuncidx(self: Wasm, decl: *Module.Decl) ?u32 {