macho: dump linker's state as JSON

Each element of the output JSON has the VM address of the generated
binary nondecreasing (some elements might occupy the same VM address
for example the atom and the relocation might coincide in the address
space).

The generated JSON can be inspected manually or via a preview tool
`zig-snapshots` that I am currently working on and will allow the user
to inspect interactively the state of the linker together with the
positioning of sections, symbols, atoms and relocations within each
snapshot state, and in the future, between snapshots too. This should
allow for quicker debugging of the linker which is nontrivial when
run in the incremental mode.

Note that the state will only be dumped if the compiler is built with
`-Dlink-snapshot` flag on, and then the compiler is passed `--debug-link-snapshot`
flag upon compiling a source/project.
This commit is contained in:
Jakub Konka 2021-10-14 13:50:10 +02:00
parent 912e7dc54b
commit d0dceae736
8 changed files with 322 additions and 17 deletions

View File

@ -205,6 +205,7 @@ pub fn build(b: *Builder) !void {
}
const enable_logging = b.option(bool, "log", "Whether to enable logging") orelse false;
const enable_link_snapshots = b.option(bool, "link-snapshot", "Whether to enable linker state snapshots") orelse false;
const opt_version_string = b.option([]const u8, "version-string", "Override Zig version string. Default is to find out with git.");
const version = if (opt_version_string) |version| version else v: {
@ -261,6 +262,7 @@ pub fn build(b: *Builder) !void {
exe_options.addOption(std.SemanticVersion, "semver", semver);
exe_options.addOption(bool, "enable_logging", enable_logging);
exe_options.addOption(bool, "enable_link_snapshots", enable_link_snapshots);
exe_options.addOption(bool, "enable_tracy", tracy != null);
exe_options.addOption(bool, "is_stage1", is_stage1);
exe_options.addOption(bool, "omit_stage2", omit_stage2);
@ -301,6 +303,7 @@ pub fn build(b: *Builder) !void {
test_stage2.addOptions("build_options", test_stage2_options);
test_stage2_options.addOption(bool, "enable_logging", enable_logging);
test_stage2_options.addOption(bool, "enable_link_snapshots", enable_link_snapshots);
test_stage2_options.addOption(bool, "skip_non_native", skip_non_native);
test_stage2_options.addOption(bool, "skip_compile_errors", skip_compile_errors);
test_stage2_options.addOption(bool, "is_stage1", is_stage1);

View File

@ -757,6 +757,8 @@ pub const InitOptions = struct {
subsystem: ?std.Target.SubSystem = null,
/// WASI-only. Type of WASI execution model ("command" or "reactor").
wasi_exec_model: ?std.builtin.WasiExecModel = null,
/// (Zig compiler development) Enable dumping linker's state as JSON.
enable_link_snapshots: bool = false,
};
fn addPackageTableToCacheHash(
@ -1438,6 +1440,7 @@ pub fn create(gpa: *Allocator, options: InitOptions) !*Compilation {
.is_test = options.is_test,
.wasi_exec_model = wasi_exec_model,
.use_stage1 = use_stage1,
.enable_link_snapshots = options.enable_link_snapshots,
});
errdefer bin_file.destroy();
comp.* = .{

View File

@ -6,6 +6,7 @@ pub const llvm_has_arc = false;
pub const version: [:0]const u8 = "@ZIG_VERSION@";
pub const semver = @import("std").SemanticVersion.parse(version) catch unreachable;
pub const enable_logging: bool = @ZIG_ENABLE_LOGGING_BOOL@;
pub const enable_link_snapshots: bool = false;
pub const enable_tracy = false;
pub const is_stage1 = true;
pub const skip_non_native = false;

View File

@ -126,6 +126,9 @@ pub const Options = struct {
/// WASI-only. Type of WASI execution model ("command" or "reactor").
wasi_exec_model: std.builtin.WasiExecModel = undefined,
/// (Zig compiler development) Enable dumping of linker's state as JSON.
enable_link_snapshots: bool = false,
pub fn effectiveOutputMode(options: Options) std.builtin.OutputMode {
return if (options.use_lld) .Obj else options.output_mode;
}

View File

@ -938,6 +938,11 @@ pub fn flushModule(self: *MachO, comp: *Compilation) !void {
if (self.requires_adhoc_codesig) {
try self.writeCodeSignature(); // code signing always comes last
}
if (build_options.enable_link_snapshots) {
if (self.base.options.enable_link_snapshots)
try self.snapshotState();
}
}
cache: {
@ -2424,6 +2429,14 @@ fn resolveSymbolsInObject(self: *MachO, object_id: u16) !void {
continue;
},
.undef => {
const undef = &self.undefs.items[resolv.where_index];
undef.* = .{
.n_strx = 0,
.n_type = macho.N_UNDF,
.n_sect = 0,
.n_desc = 0,
.n_value = 0,
};
_ = self.unresolved.fetchSwapRemove(resolv.where_index);
},
}
@ -4826,9 +4839,17 @@ fn writeSymbolTable(self: *MachO) !void {
}
}
var undefs = std.ArrayList(macho.nlist_64).init(self.base.allocator);
defer undefs.deinit();
for (self.undefs.items) |sym| {
if (sym.n_strx == 0) continue;
try undefs.append(sym);
}
const nlocals = locals.items.len;
const nexports = self.globals.items.len;
const nundefs = self.undefs.items.len;
const nundefs = undefs.items.len;
const locals_off = symtab.symoff;
const locals_size = nlocals * @sizeOf(macho.nlist_64);
@ -4843,7 +4864,7 @@ fn writeSymbolTable(self: *MachO) !void {
const undefs_off = exports_off + exports_size;
const undefs_size = nundefs * @sizeOf(macho.nlist_64);
log.debug("writing undefined symbols from 0x{x} to 0x{x}", .{ undefs_off, undefs_size + undefs_off });
try self.base.file.?.pwriteAll(mem.sliceAsBytes(self.undefs.items), undefs_off);
try self.base.file.?.pwriteAll(mem.sliceAsBytes(undefs.items), undefs_off);
symtab.nsyms = @intCast(u32, nlocals + nexports + nundefs);
seg.inner.filesize += locals_size + exports_size + undefs_size;
@ -5188,3 +5209,274 @@ pub fn findFirst(comptime T: type, haystack: []T, start: usize, predicate: anyty
}
return i;
}
fn snapshotState(self: *MachO) !void {
const emit = self.base.options.emit orelse {
log.debug("no emit directory found; skipping snapshot...", .{});
return;
};
const Snapshot = struct {
const Node = struct {
const Tag = enum {
section_start,
section_end,
atom_start,
atom_end,
relocation,
pub fn jsonStringify(
tag: Tag,
options: std.json.StringifyOptions,
out_stream: anytype,
) !void {
_ = options;
switch (tag) {
.section_start => try out_stream.writeAll("\"section_start\""),
.section_end => try out_stream.writeAll("\"section_end\""),
.atom_start => try out_stream.writeAll("\"atom_start\""),
.atom_end => try out_stream.writeAll("\"atom_end\""),
.relocation => try out_stream.writeAll("\"relocation\""),
}
}
};
const Payload = struct {
name: []const u8 = "",
aliases: [][]const u8 = &[0][]const u8{},
is_global: bool = false,
target: u64 = 0,
};
address: u64,
tag: Tag,
payload: Payload,
};
timestamp: i128,
nodes: []Node,
};
var arena_allocator = std.heap.ArenaAllocator.init(self.base.allocator);
defer arena_allocator.deinit();
const arena = &arena_allocator.allocator;
const out_file = try emit.directory.handle.createFile("snapshots.json", .{
.truncate = self.cold_start,
.read = true,
});
defer out_file.close();
if (out_file.seekFromEnd(-1)) {
try out_file.writer().writeByte(',');
} else |err| switch (err) {
error.Unseekable => try out_file.writer().writeByte('['),
else => |e| return e,
}
var writer = out_file.writer();
var snapshot = Snapshot{
.timestamp = std.time.nanoTimestamp(),
.nodes = undefined,
};
var nodes = std.ArrayList(Snapshot.Node).init(arena);
for (self.section_ordinals.keys()) |key| {
const seg = self.load_commands.items[key.seg].Segment;
const sect = seg.sections.items[key.sect];
const sect_name = try std.fmt.allocPrint(arena, "{s},{s}", .{
commands.segmentName(sect),
commands.sectionName(sect),
});
try nodes.append(.{
.address = sect.addr,
.tag = .section_start,
.payload = .{ .name = sect_name },
});
var atom: *Atom = self.atoms.get(key) orelse {
try nodes.append(.{
.address = sect.addr + sect.size,
.tag = .section_end,
.payload = .{},
});
continue;
};
while (atom.prev) |prev| {
atom = prev;
}
while (true) {
const atom_sym = self.locals.items[atom.local_sym_index];
var node = Snapshot.Node{
.address = atom_sym.n_value,
.tag = .atom_start,
.payload = .{
.name = self.getString(atom_sym.n_strx),
.is_global = self.symbol_resolver.contains(atom_sym.n_strx),
},
};
var aliases = std.ArrayList([]const u8).init(arena);
for (atom.aliases.items) |loc| {
try aliases.append(self.getString(self.locals.items[loc].n_strx));
}
node.payload.aliases = aliases.toOwnedSlice();
try nodes.append(node);
var relocs = std.ArrayList(Snapshot.Node).init(arena);
try relocs.ensureTotalCapacity(atom.relocs.items.len);
for (atom.relocs.items) |rel| {
const arch = self.base.options.target.cpu.arch;
const source_addr = blk: {
const sym = self.locals.items[atom.local_sym_index];
break :blk sym.n_value + rel.offset;
};
const target_addr = blk: {
const is_via_got = got: {
switch (arch) {
.aarch64 => break :got switch (@intToEnum(macho.reloc_type_arm64, rel.@"type")) {
.ARM64_RELOC_GOT_LOAD_PAGE21, .ARM64_RELOC_GOT_LOAD_PAGEOFF12 => true,
else => false,
},
.x86_64 => break :got switch (@intToEnum(macho.reloc_type_x86_64, rel.@"type")) {
.X86_64_RELOC_GOT, .X86_64_RELOC_GOT_LOAD => true,
else => false,
},
else => unreachable,
}
};
if (is_via_got) {
const got_atom = self.got_entries_map.get(rel.target).?;
break :blk self.locals.items[got_atom.local_sym_index].n_value;
}
switch (rel.target) {
.local => |sym_index| {
const sym = self.locals.items[sym_index];
const is_tlv = is_tlv: {
const source_sym = self.locals.items[atom.local_sym_index];
const match = self.section_ordinals.keys()[source_sym.n_sect - 1];
const match_seg = self.load_commands.items[match.seg].Segment;
const match_sect = match_seg.sections.items[match.sect];
break :is_tlv commands.sectionType(match_sect) == macho.S_THREAD_LOCAL_VARIABLES;
};
if (is_tlv) {
const match_seg = self.load_commands.items[self.data_segment_cmd_index.?].Segment;
const base_address = inner: {
if (self.tlv_data_section_index) |i| {
break :inner match_seg.sections.items[i].addr;
} else if (self.tlv_bss_section_index) |i| {
break :inner match_seg.sections.items[i].addr;
} else unreachable;
};
break :blk sym.n_value - base_address;
}
break :blk sym.n_value;
},
.global => |n_strx| {
const resolv = self.symbol_resolver.get(n_strx).?;
switch (resolv.where) {
.global => break :blk self.globals.items[resolv.where_index].n_value,
.undef => {
break :blk if (self.stubs_map.get(n_strx)) |stub_atom|
self.locals.items[stub_atom.local_sym_index].n_value
else
0;
},
}
},
}
};
relocs.appendAssumeCapacity(.{
.address = source_addr,
.tag = .relocation,
.payload = .{ .target = target_addr },
});
}
if (atom.contained.items.len == 0) {
try nodes.appendSlice(relocs.items);
} else {
// Need to reverse iteration order of relocs since by default for relocatable sources
// they come in reverse. For linking, this doesn't matter in any way, however, for
// arranging the memoryline for displaying it does.
std.mem.reverse(Snapshot.Node, relocs.items);
var next_i: usize = 0;
var last_rel: usize = 0;
while (next_i < atom.contained.items.len) : (next_i += 1) {
const loc = atom.contained.items[next_i];
const cont_sym = self.locals.items[loc.local_sym_index];
const cont_sym_name = self.getString(cont_sym.n_strx);
var contained_node = Snapshot.Node{
.address = cont_sym.n_value,
.tag = .atom_start,
.payload = .{
.name = cont_sym_name,
.is_global = self.symbol_resolver.contains(cont_sym.n_strx),
},
};
// Accumulate aliases
var inner_aliases = std.ArrayList([]const u8).init(arena);
while (true) {
if (next_i + 1 >= atom.contained.items.len) break;
const next_sym = self.locals.items[atom.contained.items[next_i + 1].local_sym_index];
if (next_sym.n_value != cont_sym.n_value) break;
const next_sym_name = self.getString(next_sym.n_strx);
if (self.symbol_resolver.contains(next_sym.n_strx)) {
try inner_aliases.append(contained_node.payload.name);
contained_node.payload.name = next_sym_name;
contained_node.payload.is_global = true;
} else try inner_aliases.append(next_sym_name);
next_i += 1;
}
const cont_size = if (next_i + 1 < atom.contained.items.len)
self.locals.items[atom.contained.items[next_i + 1].local_sym_index].n_value - cont_sym.n_value
else
atom_sym.n_value + atom.size - cont_sym.n_value;
contained_node.payload.aliases = inner_aliases.toOwnedSlice();
try nodes.append(contained_node);
for (relocs.items[last_rel..]) |rel, rel_i| {
if (rel.address >= cont_sym.n_value + cont_size) {
last_rel = rel_i;
break;
}
try nodes.append(rel);
}
try nodes.append(.{
.address = cont_sym.n_value + cont_size,
.tag = .atom_end,
.payload = .{},
});
}
}
try nodes.append(.{
.address = atom_sym.n_value + atom.size,
.tag = .atom_end,
.payload = .{},
});
if (atom.next) |next| {
atom = next;
} else break;
}
try nodes.append(.{
.address = sect.addr + sect.size,
.tag = .section_end,
.payload = .{},
});
}
snapshot.nodes = nodes.toOwnedSlice();
try std.json.stringify(snapshot, .{}, writer);
try writer.writeByte(']');
}

View File

@ -345,15 +345,9 @@ pub fn parseRelocs(self: *Atom, relocs: []macho.relocation_info, context: RelocC
const seg = context.object.load_commands.items[context.object.segment_cmd_index.?].Segment;
const sect = seg.sections.items[sect_id];
const match = (try context.macho_file.getMatchingSection(sect)) orelse unreachable;
const sym_name = try std.fmt.allocPrint(context.allocator, "{s}_{s}_{s}", .{
context.object.name,
commands.segmentName(sect),
commands.sectionName(sect),
});
defer context.allocator.free(sym_name);
const local_sym_index = @intCast(u32, context.macho_file.locals.items.len);
try context.macho_file.locals.append(context.allocator, .{
.n_strx = try context.macho_file.makeString(sym_name),
.n_strx = 0,
.n_type = macho.N_SECT,
.n_sect = @intCast(u8, context.macho_file.section_ordinals.getIndex(match).? + 1),
.n_desc = 0,

View File

@ -174,7 +174,13 @@ pub fn free(self: *Object, allocator: *Allocator, macho_file: *MachO) void {
if (atom.local_sym_index != 0) {
macho_file.locals_free_list.append(allocator, atom.local_sym_index) catch {};
const local = &macho_file.locals.items[atom.local_sym_index];
local.n_type = 0;
local.* = .{
.n_strx = 0,
.n_type = 0,
.n_sect = 0,
.n_desc = 0,
.n_value = 0,
};
atom.local_sym_index = 0;
}
if (atom == last_atom) {
@ -458,15 +464,9 @@ pub fn parseIntoAtoms(self: *Object, allocator: *Allocator, macho_file: *MachO)
// a temp one, unless we already did that when working out the relocations
// of other atoms.
const atom_local_sym_index = self.sections_as_symbols.get(sect_id) orelse blk: {
const sym_name = try std.fmt.allocPrint(allocator, "{s}_{s}_{s}", .{
self.name,
segmentName(sect),
sectionName(sect),
});
defer allocator.free(sym_name);
const atom_local_sym_index = @intCast(u32, macho_file.locals.items.len);
try macho_file.locals.append(allocator, .{
.n_strx = try macho_file.makeString(sym_name),
.n_strx = 0,
.n_type = macho.N_SECT,
.n_sect = @intCast(u8, macho_file.section_ordinals.getIndex(match).? + 1),
.n_desc = 0,

View File

@ -434,6 +434,7 @@ const usage_build_generic =
\\ --verbose-llvm-cpu-features Enable compiler debug output for LLVM CPU features
\\ --debug-log [scope] Enable printing debug/info log messages for scope
\\ --debug-compile-errors Crash with helpful diagnostics at the first compile error
\\ --debug-link-snapshot Enable dumping of the linker's state in JSON format
\\
;
@ -632,6 +633,7 @@ fn buildOutputType(
var major_subsystem_version: ?u32 = null;
var minor_subsystem_version: ?u32 = null;
var wasi_exec_model: ?std.builtin.WasiExecModel = null;
var enable_link_snapshots: bool = false;
var system_libs = std.ArrayList([]const u8).init(gpa);
defer system_libs.deinit();
@ -929,6 +931,12 @@ fn buildOutputType(
} else {
try log_scopes.append(gpa, args[i]);
}
} else if (mem.eql(u8, arg, "--debug-link-snapshot")) {
if (!build_options.enable_link_snapshots) {
std.log.warn("Zig was compiled without linker snapshots enabled (-Dlink-snapshot). --debug-link-snapshot has no effect.", .{});
} else {
enable_link_snapshots = true;
}
} else if (mem.eql(u8, arg, "-fcompiler-rt")) {
want_compiler_rt = true;
} else if (mem.eql(u8, arg, "-fno-compiler-rt")) {
@ -2139,6 +2147,7 @@ fn buildOutputType(
.subsystem = subsystem,
.wasi_exec_model = wasi_exec_model,
.debug_compile_errors = debug_compile_errors,
.enable_link_snapshots = enable_link_snapshots,
}) catch |err| {
fatal("unable to create compilation: {s}", .{@errorName(err)});
};