diff --git a/build.zig b/build.zig index 1bb4a159e9..5df7f035fd 100644 --- a/build.zig +++ b/build.zig @@ -205,6 +205,7 @@ pub fn build(b: *Builder) !void { } const enable_logging = b.option(bool, "log", "Whether to enable logging") orelse false; + const enable_link_snapshots = b.option(bool, "link-snapshot", "Whether to enable linker state snapshots") orelse false; const opt_version_string = b.option([]const u8, "version-string", "Override Zig version string. Default is to find out with git."); const version = if (opt_version_string) |version| version else v: { @@ -261,6 +262,7 @@ pub fn build(b: *Builder) !void { exe_options.addOption(std.SemanticVersion, "semver", semver); exe_options.addOption(bool, "enable_logging", enable_logging); + exe_options.addOption(bool, "enable_link_snapshots", enable_link_snapshots); exe_options.addOption(bool, "enable_tracy", tracy != null); exe_options.addOption(bool, "is_stage1", is_stage1); exe_options.addOption(bool, "omit_stage2", omit_stage2); @@ -301,6 +303,7 @@ pub fn build(b: *Builder) !void { test_stage2.addOptions("build_options", test_stage2_options); test_stage2_options.addOption(bool, "enable_logging", enable_logging); + test_stage2_options.addOption(bool, "enable_link_snapshots", enable_link_snapshots); test_stage2_options.addOption(bool, "skip_non_native", skip_non_native); test_stage2_options.addOption(bool, "skip_compile_errors", skip_compile_errors); test_stage2_options.addOption(bool, "is_stage1", is_stage1); diff --git a/src/Compilation.zig b/src/Compilation.zig index 63a6b50d5b..a7a76633ab 100644 --- a/src/Compilation.zig +++ b/src/Compilation.zig @@ -757,6 +757,8 @@ pub const InitOptions = struct { subsystem: ?std.Target.SubSystem = null, /// WASI-only. Type of WASI execution model ("command" or "reactor"). wasi_exec_model: ?std.builtin.WasiExecModel = null, + /// (Zig compiler development) Enable dumping linker's state as JSON. + enable_link_snapshots: bool = false, }; fn addPackageTableToCacheHash( @@ -1438,6 +1440,7 @@ pub fn create(gpa: *Allocator, options: InitOptions) !*Compilation { .is_test = options.is_test, .wasi_exec_model = wasi_exec_model, .use_stage1 = use_stage1, + .enable_link_snapshots = options.enable_link_snapshots, }); errdefer bin_file.destroy(); comp.* = .{ diff --git a/src/config.zig.in b/src/config.zig.in index 62e8785ccb..f193fddb20 100644 --- a/src/config.zig.in +++ b/src/config.zig.in @@ -6,6 +6,7 @@ pub const llvm_has_arc = false; pub const version: [:0]const u8 = "@ZIG_VERSION@"; pub const semver = @import("std").SemanticVersion.parse(version) catch unreachable; pub const enable_logging: bool = @ZIG_ENABLE_LOGGING_BOOL@; +pub const enable_link_snapshots: bool = false; pub const enable_tracy = false; pub const is_stage1 = true; pub const skip_non_native = false; diff --git a/src/link.zig b/src/link.zig index 5874ed5703..8cb2c4a485 100644 --- a/src/link.zig +++ b/src/link.zig @@ -126,6 +126,9 @@ pub const Options = struct { /// WASI-only. Type of WASI execution model ("command" or "reactor"). wasi_exec_model: std.builtin.WasiExecModel = undefined, + /// (Zig compiler development) Enable dumping of linker's state as JSON. + enable_link_snapshots: bool = false, + pub fn effectiveOutputMode(options: Options) std.builtin.OutputMode { return if (options.use_lld) .Obj else options.output_mode; } diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 8d951fe878..923811af36 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -938,6 +938,11 @@ pub fn flushModule(self: *MachO, comp: *Compilation) !void { if (self.requires_adhoc_codesig) { try self.writeCodeSignature(); // code signing always comes last } + + if (build_options.enable_link_snapshots) { + if (self.base.options.enable_link_snapshots) + try self.snapshotState(); + } } cache: { @@ -2424,6 +2429,14 @@ fn resolveSymbolsInObject(self: *MachO, object_id: u16) !void { continue; }, .undef => { + const undef = &self.undefs.items[resolv.where_index]; + undef.* = .{ + .n_strx = 0, + .n_type = macho.N_UNDF, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }; _ = self.unresolved.fetchSwapRemove(resolv.where_index); }, } @@ -4826,9 +4839,17 @@ fn writeSymbolTable(self: *MachO) !void { } } + var undefs = std.ArrayList(macho.nlist_64).init(self.base.allocator); + defer undefs.deinit(); + + for (self.undefs.items) |sym| { + if (sym.n_strx == 0) continue; + try undefs.append(sym); + } + const nlocals = locals.items.len; const nexports = self.globals.items.len; - const nundefs = self.undefs.items.len; + const nundefs = undefs.items.len; const locals_off = symtab.symoff; const locals_size = nlocals * @sizeOf(macho.nlist_64); @@ -4843,7 +4864,7 @@ fn writeSymbolTable(self: *MachO) !void { const undefs_off = exports_off + exports_size; const undefs_size = nundefs * @sizeOf(macho.nlist_64); log.debug("writing undefined symbols from 0x{x} to 0x{x}", .{ undefs_off, undefs_size + undefs_off }); - try self.base.file.?.pwriteAll(mem.sliceAsBytes(self.undefs.items), undefs_off); + try self.base.file.?.pwriteAll(mem.sliceAsBytes(undefs.items), undefs_off); symtab.nsyms = @intCast(u32, nlocals + nexports + nundefs); seg.inner.filesize += locals_size + exports_size + undefs_size; @@ -5188,3 +5209,274 @@ pub fn findFirst(comptime T: type, haystack: []T, start: usize, predicate: anyty } return i; } + +fn snapshotState(self: *MachO) !void { + const emit = self.base.options.emit orelse { + log.debug("no emit directory found; skipping snapshot...", .{}); + return; + }; + + const Snapshot = struct { + const Node = struct { + const Tag = enum { + section_start, + section_end, + atom_start, + atom_end, + relocation, + + pub fn jsonStringify( + tag: Tag, + options: std.json.StringifyOptions, + out_stream: anytype, + ) !void { + _ = options; + switch (tag) { + .section_start => try out_stream.writeAll("\"section_start\""), + .section_end => try out_stream.writeAll("\"section_end\""), + .atom_start => try out_stream.writeAll("\"atom_start\""), + .atom_end => try out_stream.writeAll("\"atom_end\""), + .relocation => try out_stream.writeAll("\"relocation\""), + } + } + }; + const Payload = struct { + name: []const u8 = "", + aliases: [][]const u8 = &[0][]const u8{}, + is_global: bool = false, + target: u64 = 0, + }; + address: u64, + tag: Tag, + payload: Payload, + }; + timestamp: i128, + nodes: []Node, + }; + + var arena_allocator = std.heap.ArenaAllocator.init(self.base.allocator); + defer arena_allocator.deinit(); + const arena = &arena_allocator.allocator; + + const out_file = try emit.directory.handle.createFile("snapshots.json", .{ + .truncate = self.cold_start, + .read = true, + }); + defer out_file.close(); + + if (out_file.seekFromEnd(-1)) { + try out_file.writer().writeByte(','); + } else |err| switch (err) { + error.Unseekable => try out_file.writer().writeByte('['), + else => |e| return e, + } + var writer = out_file.writer(); + + var snapshot = Snapshot{ + .timestamp = std.time.nanoTimestamp(), + .nodes = undefined, + }; + var nodes = std.ArrayList(Snapshot.Node).init(arena); + + for (self.section_ordinals.keys()) |key| { + const seg = self.load_commands.items[key.seg].Segment; + const sect = seg.sections.items[key.sect]; + const sect_name = try std.fmt.allocPrint(arena, "{s},{s}", .{ + commands.segmentName(sect), + commands.sectionName(sect), + }); + try nodes.append(.{ + .address = sect.addr, + .tag = .section_start, + .payload = .{ .name = sect_name }, + }); + + var atom: *Atom = self.atoms.get(key) orelse { + try nodes.append(.{ + .address = sect.addr + sect.size, + .tag = .section_end, + .payload = .{}, + }); + continue; + }; + + while (atom.prev) |prev| { + atom = prev; + } + + while (true) { + const atom_sym = self.locals.items[atom.local_sym_index]; + var node = Snapshot.Node{ + .address = atom_sym.n_value, + .tag = .atom_start, + .payload = .{ + .name = self.getString(atom_sym.n_strx), + .is_global = self.symbol_resolver.contains(atom_sym.n_strx), + }, + }; + + var aliases = std.ArrayList([]const u8).init(arena); + for (atom.aliases.items) |loc| { + try aliases.append(self.getString(self.locals.items[loc].n_strx)); + } + node.payload.aliases = aliases.toOwnedSlice(); + try nodes.append(node); + + var relocs = std.ArrayList(Snapshot.Node).init(arena); + try relocs.ensureTotalCapacity(atom.relocs.items.len); + for (atom.relocs.items) |rel| { + const arch = self.base.options.target.cpu.arch; + const source_addr = blk: { + const sym = self.locals.items[atom.local_sym_index]; + break :blk sym.n_value + rel.offset; + }; + const target_addr = blk: { + const is_via_got = got: { + switch (arch) { + .aarch64 => break :got switch (@intToEnum(macho.reloc_type_arm64, rel.@"type")) { + .ARM64_RELOC_GOT_LOAD_PAGE21, .ARM64_RELOC_GOT_LOAD_PAGEOFF12 => true, + else => false, + }, + .x86_64 => break :got switch (@intToEnum(macho.reloc_type_x86_64, rel.@"type")) { + .X86_64_RELOC_GOT, .X86_64_RELOC_GOT_LOAD => true, + else => false, + }, + else => unreachable, + } + }; + + if (is_via_got) { + const got_atom = self.got_entries_map.get(rel.target).?; + break :blk self.locals.items[got_atom.local_sym_index].n_value; + } + + switch (rel.target) { + .local => |sym_index| { + const sym = self.locals.items[sym_index]; + const is_tlv = is_tlv: { + const source_sym = self.locals.items[atom.local_sym_index]; + const match = self.section_ordinals.keys()[source_sym.n_sect - 1]; + const match_seg = self.load_commands.items[match.seg].Segment; + const match_sect = match_seg.sections.items[match.sect]; + break :is_tlv commands.sectionType(match_sect) == macho.S_THREAD_LOCAL_VARIABLES; + }; + if (is_tlv) { + const match_seg = self.load_commands.items[self.data_segment_cmd_index.?].Segment; + const base_address = inner: { + if (self.tlv_data_section_index) |i| { + break :inner match_seg.sections.items[i].addr; + } else if (self.tlv_bss_section_index) |i| { + break :inner match_seg.sections.items[i].addr; + } else unreachable; + }; + break :blk sym.n_value - base_address; + } + break :blk sym.n_value; + }, + .global => |n_strx| { + const resolv = self.symbol_resolver.get(n_strx).?; + switch (resolv.where) { + .global => break :blk self.globals.items[resolv.where_index].n_value, + .undef => { + break :blk if (self.stubs_map.get(n_strx)) |stub_atom| + self.locals.items[stub_atom.local_sym_index].n_value + else + 0; + }, + } + }, + } + }; + + relocs.appendAssumeCapacity(.{ + .address = source_addr, + .tag = .relocation, + .payload = .{ .target = target_addr }, + }); + } + + if (atom.contained.items.len == 0) { + try nodes.appendSlice(relocs.items); + } else { + // Need to reverse iteration order of relocs since by default for relocatable sources + // they come in reverse. For linking, this doesn't matter in any way, however, for + // arranging the memoryline for displaying it does. + std.mem.reverse(Snapshot.Node, relocs.items); + + var next_i: usize = 0; + var last_rel: usize = 0; + while (next_i < atom.contained.items.len) : (next_i += 1) { + const loc = atom.contained.items[next_i]; + const cont_sym = self.locals.items[loc.local_sym_index]; + const cont_sym_name = self.getString(cont_sym.n_strx); + var contained_node = Snapshot.Node{ + .address = cont_sym.n_value, + .tag = .atom_start, + .payload = .{ + .name = cont_sym_name, + .is_global = self.symbol_resolver.contains(cont_sym.n_strx), + }, + }; + + // Accumulate aliases + var inner_aliases = std.ArrayList([]const u8).init(arena); + while (true) { + if (next_i + 1 >= atom.contained.items.len) break; + const next_sym = self.locals.items[atom.contained.items[next_i + 1].local_sym_index]; + if (next_sym.n_value != cont_sym.n_value) break; + const next_sym_name = self.getString(next_sym.n_strx); + if (self.symbol_resolver.contains(next_sym.n_strx)) { + try inner_aliases.append(contained_node.payload.name); + contained_node.payload.name = next_sym_name; + contained_node.payload.is_global = true; + } else try inner_aliases.append(next_sym_name); + next_i += 1; + } + + const cont_size = if (next_i + 1 < atom.contained.items.len) + self.locals.items[atom.contained.items[next_i + 1].local_sym_index].n_value - cont_sym.n_value + else + atom_sym.n_value + atom.size - cont_sym.n_value; + + contained_node.payload.aliases = inner_aliases.toOwnedSlice(); + try nodes.append(contained_node); + + for (relocs.items[last_rel..]) |rel, rel_i| { + if (rel.address >= cont_sym.n_value + cont_size) { + last_rel = rel_i; + break; + } + try nodes.append(rel); + } + + try nodes.append(.{ + .address = cont_sym.n_value + cont_size, + .tag = .atom_end, + .payload = .{}, + }); + } + } + + try nodes.append(.{ + .address = atom_sym.n_value + atom.size, + .tag = .atom_end, + .payload = .{}, + }); + + if (atom.next) |next| { + atom = next; + } else break; + } + + try nodes.append(.{ + .address = sect.addr + sect.size, + .tag = .section_end, + .payload = .{}, + }); + } + + snapshot.nodes = nodes.toOwnedSlice(); + + try std.json.stringify(snapshot, .{}, writer); + try writer.writeByte(']'); +} diff --git a/src/link/MachO/Atom.zig b/src/link/MachO/Atom.zig index c32d1f1d8f..07a20ac336 100644 --- a/src/link/MachO/Atom.zig +++ b/src/link/MachO/Atom.zig @@ -345,15 +345,9 @@ pub fn parseRelocs(self: *Atom, relocs: []macho.relocation_info, context: RelocC const seg = context.object.load_commands.items[context.object.segment_cmd_index.?].Segment; const sect = seg.sections.items[sect_id]; const match = (try context.macho_file.getMatchingSection(sect)) orelse unreachable; - const sym_name = try std.fmt.allocPrint(context.allocator, "{s}_{s}_{s}", .{ - context.object.name, - commands.segmentName(sect), - commands.sectionName(sect), - }); - defer context.allocator.free(sym_name); const local_sym_index = @intCast(u32, context.macho_file.locals.items.len); try context.macho_file.locals.append(context.allocator, .{ - .n_strx = try context.macho_file.makeString(sym_name), + .n_strx = 0, .n_type = macho.N_SECT, .n_sect = @intCast(u8, context.macho_file.section_ordinals.getIndex(match).? + 1), .n_desc = 0, diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index de747eb4c7..f0a299182c 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -174,7 +174,13 @@ pub fn free(self: *Object, allocator: *Allocator, macho_file: *MachO) void { if (atom.local_sym_index != 0) { macho_file.locals_free_list.append(allocator, atom.local_sym_index) catch {}; const local = &macho_file.locals.items[atom.local_sym_index]; - local.n_type = 0; + local.* = .{ + .n_strx = 0, + .n_type = 0, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }; atom.local_sym_index = 0; } if (atom == last_atom) { @@ -458,15 +464,9 @@ pub fn parseIntoAtoms(self: *Object, allocator: *Allocator, macho_file: *MachO) // a temp one, unless we already did that when working out the relocations // of other atoms. const atom_local_sym_index = self.sections_as_symbols.get(sect_id) orelse blk: { - const sym_name = try std.fmt.allocPrint(allocator, "{s}_{s}_{s}", .{ - self.name, - segmentName(sect), - sectionName(sect), - }); - defer allocator.free(sym_name); const atom_local_sym_index = @intCast(u32, macho_file.locals.items.len); try macho_file.locals.append(allocator, .{ - .n_strx = try macho_file.makeString(sym_name), + .n_strx = 0, .n_type = macho.N_SECT, .n_sect = @intCast(u8, macho_file.section_ordinals.getIndex(match).? + 1), .n_desc = 0, diff --git a/src/main.zig b/src/main.zig index fbe388ed47..3fc2be7e30 100644 --- a/src/main.zig +++ b/src/main.zig @@ -434,6 +434,7 @@ const usage_build_generic = \\ --verbose-llvm-cpu-features Enable compiler debug output for LLVM CPU features \\ --debug-log [scope] Enable printing debug/info log messages for scope \\ --debug-compile-errors Crash with helpful diagnostics at the first compile error + \\ --debug-link-snapshot Enable dumping of the linker's state in JSON format \\ ; @@ -632,6 +633,7 @@ fn buildOutputType( var major_subsystem_version: ?u32 = null; var minor_subsystem_version: ?u32 = null; var wasi_exec_model: ?std.builtin.WasiExecModel = null; + var enable_link_snapshots: bool = false; var system_libs = std.ArrayList([]const u8).init(gpa); defer system_libs.deinit(); @@ -929,6 +931,12 @@ fn buildOutputType( } else { try log_scopes.append(gpa, args[i]); } + } else if (mem.eql(u8, arg, "--debug-link-snapshot")) { + if (!build_options.enable_link_snapshots) { + std.log.warn("Zig was compiled without linker snapshots enabled (-Dlink-snapshot). --debug-link-snapshot has no effect.", .{}); + } else { + enable_link_snapshots = true; + } } else if (mem.eql(u8, arg, "-fcompiler-rt")) { want_compiler_rt = true; } else if (mem.eql(u8, arg, "-fno-compiler-rt")) { @@ -2139,6 +2147,7 @@ fn buildOutputType( .subsystem = subsystem, .wasi_exec_model = wasi_exec_model, .debug_compile_errors = debug_compile_errors, + .enable_link_snapshots = enable_link_snapshots, }) catch |err| { fatal("unable to create compilation: {s}", .{@errorName(err)}); };