Merge pull request #25981 from mlugg/macos-fuzz-2

make the fuzzer vaguely work on macOS
This commit is contained in:
Matthew Lugg 2025-11-20 17:48:35 +00:00 committed by GitHub
commit 8a73fc8d8e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
21 changed files with 876 additions and 534 deletions

View File

@ -228,20 +228,21 @@ fn unpackSourcesInner(tar_bytes: []u8) !void {
if (std.mem.endsWith(u8, tar_file.name, ".zig")) { if (std.mem.endsWith(u8, tar_file.name, ".zig")) {
log.debug("found file: '{s}'", .{tar_file.name}); log.debug("found file: '{s}'", .{tar_file.name});
const file_name = try gpa.dupe(u8, tar_file.name); const file_name = try gpa.dupe(u8, tar_file.name);
if (std.mem.indexOfScalar(u8, file_name, '/')) |pkg_name_end| { // This is a hack to guess modules from the tar file contents. To handle modules
const pkg_name = file_name[0..pkg_name_end]; // properly, the build system will need to change the structure here to have one
const gop = try Walk.modules.getOrPut(gpa, pkg_name); // directory per module. This in turn requires compiler enhancements to allow
const file: Walk.File.Index = @enumFromInt(Walk.files.entries.len); // the build system to actually discover the required information.
if (!gop.found_existing or const mod_name, const is_module_root = p: {
std.mem.eql(u8, file_name[pkg_name_end..], "/root.zig") or if (std.mem.find(u8, file_name, "std/")) |i| break :p .{ "std", std.mem.eql(u8, file_name[i + 4 ..], "std.zig") };
std.mem.eql(u8, file_name[pkg_name_end + 1 .. file_name.len - ".zig".len], pkg_name)) if (std.mem.endsWith(u8, file_name, "/builtin.zig")) break :p .{ "builtin", true };
{ break :p .{ "root", std.mem.endsWith(u8, file_name, "/root.zig") };
gop.value_ptr.* = file; };
} const gop = try Walk.modules.getOrPut(gpa, mod_name);
const file_bytes = tar_reader.take(@intCast(tar_file.size)) catch unreachable; const file: Walk.File.Index = @enumFromInt(Walk.files.entries.len);
it.unread_file_bytes = 0; // we have read the whole thing if (!gop.found_existing or is_module_root) gop.value_ptr.* = file;
assert(file == try Walk.add_file(file_name, file_bytes)); const file_bytes = tar_reader.take(@intCast(tar_file.size)) catch unreachable;
} it.unread_file_bytes = 0; // we have read the whole thing
assert(file == try Walk.add_file(file_name, file_bytes));
} else { } else {
log.warn("skipping: '{s}' - the tar creation should have done that", .{tar_file.name}); log.warn("skipping: '{s}' - the tar creation should have done that", .{tar_file.name});
} }

View File

@ -184,7 +184,7 @@ fn mainServer() !void {
const test_fn = builtin.test_functions[index]; const test_fn = builtin.test_functions[index];
const entry_addr = @intFromPtr(test_fn.func); const entry_addr = @intFromPtr(test_fn.func);
try server.serveU64Message(.fuzz_start_addr, entry_addr); try server.serveU64Message(.fuzz_start_addr, fuzz_abi.fuzzer_unslide_address(entry_addr));
defer if (testing.allocator_instance.deinit() == .leak) std.process.exit(1); defer if (testing.allocator_instance.deinit() == .leak) std.process.exit(1);
is_fuzz_test = false; is_fuzz_test = false;
fuzz_test_index = index; fuzz_test_index = index;

View File

@ -116,13 +116,18 @@ const Executable = struct {
"failed to init memory map for coverage file '{s}': {t}", "failed to init memory map for coverage file '{s}': {t}",
.{ &coverage_file_name, e }, .{ &coverage_file_name, e },
); );
map.appendSliceAssumeCapacity(mem.asBytes(&abi.SeenPcsHeader{ map.appendSliceAssumeCapacity(@ptrCast(&abi.SeenPcsHeader{
.n_runs = 0, .n_runs = 0,
.unique_runs = 0, .unique_runs = 0,
.pcs_len = pcs.len, .pcs_len = pcs.len,
})); }));
map.appendNTimesAssumeCapacity(0, pc_bitset_usizes * @sizeOf(usize)); map.appendNTimesAssumeCapacity(0, pc_bitset_usizes * @sizeOf(usize));
map.appendSliceAssumeCapacity(mem.sliceAsBytes(pcs)); // Relocations have been applied to `pcs` so it contains runtime addresses (with slide
// applied). We need to translate these to the virtual addresses as on disk.
for (pcs) |pc| {
const pc_vaddr = fuzzer_unslide_address(pc);
map.appendSliceAssumeCapacity(@ptrCast(&pc_vaddr));
}
return map; return map;
} else { } else {
const size = coverage_file.getEndPos() catch |e| panic( const size = coverage_file.getEndPos() catch |e| panic(
@ -215,7 +220,16 @@ const Executable = struct {
.{ self.pc_counters.len, pcs.len }, .{ self.pc_counters.len, pcs.len },
); );
self.pc_digest = std.hash.Wyhash.hash(0, mem.sliceAsBytes(pcs)); self.pc_digest = digest: {
// Relocations have been applied to `pcs` so it contains runtime addresses (with slide
// applied). We need to translate these to the virtual addresses as on disk.
var h: std.hash.Wyhash = .init(0);
for (pcs) |pc| {
const pc_vaddr = fuzzer_unslide_address(pc);
h.update(@ptrCast(&pc_vaddr));
}
break :digest h.final();
};
self.shared_seen_pcs = getCoverageFile(cache_dir, pcs, self.pc_digest); self.shared_seen_pcs = getCoverageFile(cache_dir, pcs, self.pc_digest);
return self; return self;
@ -622,6 +636,14 @@ export fn fuzzer_main(limit_kind: abi.LimitKind, amount: u64) void {
} }
} }
export fn fuzzer_unslide_address(addr: usize) usize {
const si = std.debug.getSelfDebugInfo() catch @compileError("unsupported");
const slide = si.getModuleSlide(std.debug.getDebugInfoAllocator(), addr) catch |err| {
std.debug.panic("failed to find virtual address slide: {t}", .{err});
};
return addr - slide;
}
/// Helps determine run uniqueness in the face of recursion. /// Helps determine run uniqueness in the face of recursion.
/// Currently not used by the fuzzer. /// Currently not used by the fuzzer.
export threadlocal var __sancov_lowest_stack: usize = 0; export threadlocal var __sancov_lowest_stack: usize = 0;
@ -1185,13 +1207,13 @@ const Mutation = enum {
const j = rng.uintAtMostBiased(usize, corpus[splice_i].len - len); const j = rng.uintAtMostBiased(usize, corpus[splice_i].len - len);
out.appendSliceAssumeCapacity(corpus[splice_i][j..][0..len]); out.appendSliceAssumeCapacity(corpus[splice_i][j..][0..len]);
}, },
.@"const" => out.appendSliceAssumeCapacity(mem.asBytes( .@"const" => out.appendSliceAssumeCapacity(@ptrCast(
&data_ctx[rng.uintLessThanBiased(usize, data_ctx.len)], &data_ctx[rng.uintLessThanBiased(usize, data_ctx.len)],
)), )),
.small => out.appendSliceAssumeCapacity(mem.asBytes( .small => out.appendSliceAssumeCapacity(@ptrCast(
&mem.nativeTo(data_ctx[0], rng.int(SmallValue), data_ctx[1]), &mem.nativeTo(data_ctx[0], rng.int(SmallValue), data_ctx[1]),
)), )),
.few => out.appendSliceAssumeCapacity(mem.asBytes( .few => out.appendSliceAssumeCapacity(@ptrCast(
&fewValue(rng, data_ctx[0], data_ctx[1]), &fewValue(rng, data_ctx[0], data_ctx[1]),
)), )),
} }

View File

@ -383,7 +383,14 @@ fn prepareTables(fuzz: *Fuzz, run_step: *Step.Run, coverage_id: u64) error{ OutO
errdefer gop.value_ptr.coverage.deinit(fuzz.gpa); errdefer gop.value_ptr.coverage.deinit(fuzz.gpa);
const rebuilt_exe_path = run_step.rebuilt_executable.?; const rebuilt_exe_path = run_step.rebuilt_executable.?;
var debug_info = std.debug.Info.load(fuzz.gpa, rebuilt_exe_path, &gop.value_ptr.coverage) catch |err| { const target = run_step.producer.?.rootModuleTarget();
var debug_info = std.debug.Info.load(
fuzz.gpa,
rebuilt_exe_path,
&gop.value_ptr.coverage,
target.ofmt,
target.cpu.arch,
) catch |err| {
log.err("step '{s}': failed to load debug information for '{f}': {s}", .{ log.err("step '{s}': failed to load debug information for '{f}': {s}", .{
run_step.step.name, rebuilt_exe_path, @errorName(err), run_step.step.name, rebuilt_exe_path, @errorName(err),
}); });
@ -479,9 +486,23 @@ fn addEntryPoint(fuzz: *Fuzz, coverage_id: u64, addr: u64) error{ AlreadyReporte
if (false) { if (false) {
const sl = coverage_map.source_locations[index]; const sl = coverage_map.source_locations[index];
const file_name = coverage_map.coverage.stringAt(coverage_map.coverage.fileAt(sl.file).basename); const file_name = coverage_map.coverage.stringAt(coverage_map.coverage.fileAt(sl.file).basename);
log.debug("server found entry point for 0x{x} at {s}:{d}:{d} - index {d} between {x} and {x}", .{ if (pcs.len == 1) {
addr, file_name, sl.line, sl.column, index, pcs[index - 1], pcs[index + 1], log.debug("server found entry point for 0x{x} at {s}:{d}:{d} - index 0 (final)", .{
}); addr, file_name, sl.line, sl.column,
});
} else if (index == 0) {
log.debug("server found entry point for 0x{x} at {s}:{d}:{d} - index 0 before {x}", .{
addr, file_name, sl.line, sl.column, pcs[index + 1],
});
} else if (index == pcs.len - 1) {
log.debug("server found entry point for 0x{x} at {s}:{d}:{d} - index {d} (final) after {x}", .{
addr, file_name, sl.line, sl.column, index, pcs[index - 1],
});
} else {
log.debug("server found entry point for 0x{x} at {s}:{d}:{d} - index {d} between {x} and {x}", .{
addr, file_name, sl.line, sl.column, index, pcs[index - 1], pcs[index + 1],
});
}
} }
try coverage_map.entry_points.append(fuzz.gpa, @intCast(index)); try coverage_map.entry_points.append(fuzz.gpa, @intCast(index));
} }

View File

@ -729,10 +729,10 @@ const MachODumper = struct {
imports: std.ArrayListUnmanaged([]const u8) = .empty, imports: std.ArrayListUnmanaged([]const u8) = .empty,
fn parse(ctx: *ObjectContext) !void { fn parse(ctx: *ObjectContext) !void {
var it = ctx.getLoadCommandIterator(); var it = try ctx.getLoadCommandIterator();
var i: usize = 0; var i: usize = 0;
while (it.next()) |cmd| { while (try it.next()) |cmd| {
switch (cmd.cmd()) { switch (cmd.hdr.cmd) {
.SEGMENT_64 => { .SEGMENT_64 => {
const seg = cmd.cast(macho.segment_command_64).?; const seg = cmd.cast(macho.segment_command_64).?;
try ctx.segments.append(ctx.gpa, seg); try ctx.segments.append(ctx.gpa, seg);
@ -771,14 +771,13 @@ const MachODumper = struct {
return mem.sliceTo(@as([*:0]const u8, @ptrCast(ctx.strtab.items.ptr + off)), 0); return mem.sliceTo(@as([*:0]const u8, @ptrCast(ctx.strtab.items.ptr + off)), 0);
} }
fn getLoadCommandIterator(ctx: ObjectContext) macho.LoadCommandIterator { fn getLoadCommandIterator(ctx: ObjectContext) !macho.LoadCommandIterator {
const data = ctx.data[@sizeOf(macho.mach_header_64)..][0..ctx.header.sizeofcmds]; return .init(&ctx.header, ctx.data[@sizeOf(macho.mach_header_64)..]);
return .{ .ncmds = ctx.header.ncmds, .buffer = data };
} }
fn getLoadCommand(ctx: ObjectContext, cmd: macho.LC) ?macho.LoadCommandIterator.LoadCommand { fn getLoadCommand(ctx: ObjectContext, cmd: macho.LC) !?macho.LoadCommandIterator.LoadCommand {
var it = ctx.getLoadCommandIterator(); var it = try ctx.getLoadCommandIterator();
while (it.next()) |lc| if (lc.cmd() == cmd) { while (try it.next()) |lc| if (lc.hdr.cmd == cmd) {
return lc; return lc;
}; };
return null; return null;
@ -872,9 +871,9 @@ const MachODumper = struct {
\\LC {d} \\LC {d}
\\cmd {s} \\cmd {s}
\\cmdsize {d} \\cmdsize {d}
, .{ index, @tagName(lc.cmd()), lc.cmdsize() }); , .{ index, @tagName(lc.hdr.cmd), lc.hdr.cmdsize });
switch (lc.cmd()) { switch (lc.hdr.cmd) {
.SEGMENT_64 => { .SEGMENT_64 => {
const seg = lc.cast(macho.segment_command_64).?; const seg = lc.cast(macho.segment_command_64).?;
try writer.writeByte('\n'); try writer.writeByte('\n');
@ -1592,9 +1591,9 @@ const MachODumper = struct {
.headers => { .headers => {
try ObjectContext.dumpHeader(ctx.header, writer); try ObjectContext.dumpHeader(ctx.header, writer);
var it = ctx.getLoadCommandIterator(); var it = try ctx.getLoadCommandIterator();
var i: usize = 0; var i: usize = 0;
while (it.next()) |cmd| { while (try it.next()) |cmd| {
try ObjectContext.dumpLoadCommand(cmd, i, writer); try ObjectContext.dumpLoadCommand(cmd, i, writer);
try writer.writeByte('\n'); try writer.writeByte('\n');
@ -1615,7 +1614,7 @@ const MachODumper = struct {
.dyld_weak_bind, .dyld_weak_bind,
.dyld_lazy_bind, .dyld_lazy_bind,
=> { => {
const cmd = ctx.getLoadCommand(.DYLD_INFO_ONLY) orelse const cmd = try ctx.getLoadCommand(.DYLD_INFO_ONLY) orelse
return step.fail("no dyld info found", .{}); return step.fail("no dyld info found", .{});
const lc = cmd.cast(macho.dyld_info_command).?; const lc = cmd.cast(macho.dyld_info_command).?;
@ -1649,7 +1648,7 @@ const MachODumper = struct {
}, },
.exports => blk: { .exports => blk: {
if (ctx.getLoadCommand(.DYLD_INFO_ONLY)) |cmd| { if (try ctx.getLoadCommand(.DYLD_INFO_ONLY)) |cmd| {
const lc = cmd.cast(macho.dyld_info_command).?; const lc = cmd.cast(macho.dyld_info_command).?;
if (lc.export_size > 0) { if (lc.export_size > 0) {
const data = ctx.data[lc.export_off..][0..lc.export_size]; const data = ctx.data[lc.export_off..][0..lc.export_size];

View File

@ -1932,6 +1932,11 @@ pub fn rebuildInFuzzMode(c: *Compile, gpa: Allocator, progress_node: std.Progres
c.step.result_error_bundle.deinit(gpa); c.step.result_error_bundle.deinit(gpa);
c.step.result_error_bundle = std.zig.ErrorBundle.empty; c.step.result_error_bundle = std.zig.ErrorBundle.empty;
if (c.step.result_failed_command) |cmd| {
gpa.free(cmd);
c.step.result_failed_command = null;
}
const zig_args = try getZigArgs(c, true); const zig_args = try getZigArgs(c, true);
const maybe_output_bin_path = try c.step.evalZigProcess(zig_args, progress_node, false, null, gpa); const maybe_output_bin_path = try c.step.evalZigProcess(zig_args, progress_node, false, null, gpa);
return maybe_output_bin_path.?; return maybe_output_bin_path.?;

View File

@ -1140,6 +1140,12 @@ pub fn rerunInFuzzMode(
.output_file, .output_directory => unreachable, .output_file, .output_directory => unreachable,
} }
} }
if (run.step.result_failed_command) |cmd| {
fuzz.gpa.free(cmd);
run.step.result_failed_command = null;
}
const has_side_effects = false; const has_side_effects = false;
const rand_int = std.crypto.random.int(u64); const rand_int = std.crypto.random.int(u64);
const tmp_dir_path = "tmp" ++ fs.path.sep_str ++ std.fmt.hex(rand_int); const tmp_dir_path = "tmp" ++ fs.path.sep_str ++ std.fmt.hex(rand_int);
@ -1150,7 +1156,7 @@ pub fn rerunInFuzzMode(
.web_server = null, // only needed for time reports .web_server = null, // only needed for time reports
.ttyconf = fuzz.ttyconf, .ttyconf = fuzz.ttyconf,
.unit_test_timeout_ns = null, // don't time out fuzz tests for now .unit_test_timeout_ns = null, // don't time out fuzz tests for now
.gpa = undefined, // not used by `runCommand` .gpa = fuzz.gpa,
}, .{ }, .{
.unit_test_index = unit_test_index, .unit_test_index = unit_test_index,
.fuzz = fuzz, .fuzz = fuzz,
@ -1870,7 +1876,10 @@ fn pollZigTest(
// test. For instance, if the test runner leaves this much time between us requesting a test to // test. For instance, if the test runner leaves this much time between us requesting a test to
// start and it acknowledging the test starting, we terminate the child and raise an error. This // start and it acknowledging the test starting, we terminate the child and raise an error. This
// *should* never happen, but could in theory be caused by some very unlucky IB in a test. // *should* never happen, but could in theory be caused by some very unlucky IB in a test.
const response_timeout_ns = @max(options.unit_test_timeout_ns orelse 0, 60 * std.time.ns_per_s); const response_timeout_ns: ?u64 = ns: {
if (fuzz_context != null) break :ns null; // don't timeout fuzz tests
break :ns @max(options.unit_test_timeout_ns orelse 0, 60 * std.time.ns_per_s);
};
const stdout = poller.reader(.stdout); const stdout = poller.reader(.stdout);
const stderr = poller.reader(.stderr); const stderr = poller.reader(.stderr);

View File

@ -145,6 +145,7 @@ pub const fuzz = struct {
pub extern fn fuzzer_init_test(test_one: TestOne, unit_test_name: Slice) void; pub extern fn fuzzer_init_test(test_one: TestOne, unit_test_name: Slice) void;
pub extern fn fuzzer_new_input(bytes: Slice) void; pub extern fn fuzzer_new_input(bytes: Slice) void;
pub extern fn fuzzer_main(limit_kind: LimitKind, amount: u64) void; pub extern fn fuzzer_main(limit_kind: LimitKind, amount: u64) void;
pub extern fn fuzzer_unslide_address(addr: usize) usize;
pub const Slice = extern struct { pub const Slice = extern struct {
ptr: [*]const u8, ptr: [*]const u8,

View File

@ -270,16 +270,17 @@ fn writeSplatHeaderLimitFinish(
remaining -= copy_len; remaining -= copy_len;
if (remaining == 0) break :v; if (remaining == 0) break :v;
} }
for (data[0 .. data.len - 1]) |buf| if (buf.len != 0) { for (data[0 .. data.len - 1]) |buf| {
const copy_len = @min(header.len, remaining); if (buf.len == 0) continue;
vecs[i] = buf; const copy_len = @min(buf.len, remaining);
vecs[i] = buf[0..copy_len];
i += 1; i += 1;
remaining -= copy_len; remaining -= copy_len;
if (remaining == 0) break :v; if (remaining == 0) break :v;
if (vecs.len - i == 0) break :v; if (vecs.len - i == 0) break :v;
}; }
const pattern = data[data.len - 1]; const pattern = data[data.len - 1];
if (splat == 1) { if (splat == 1 or remaining < pattern.len) {
vecs[i] = pattern[0..@min(remaining, pattern.len)]; vecs[i] = pattern[0..@min(remaining, pattern.len)];
i += 1; i += 1;
break :v; break :v;
@ -915,7 +916,16 @@ pub fn sendFileHeader(
if (new_end <= w.buffer.len) { if (new_end <= w.buffer.len) {
@memcpy(w.buffer[w.end..][0..header.len], header); @memcpy(w.buffer[w.end..][0..header.len], header);
w.end = new_end; w.end = new_end;
return header.len + try w.vtable.sendFile(w, file_reader, limit); const file_bytes = w.vtable.sendFile(w, file_reader, limit) catch |err| switch (err) {
error.ReadFailed, error.WriteFailed => |e| return e,
error.EndOfStream, error.Unimplemented => |e| {
// These errors are non-fatal, so if we wrote any header bytes, we will report that
// and suppress this error. Only if there was no header may we return the error.
if (header.len != 0) return header.len;
return e;
},
};
return header.len + file_bytes;
} }
const buffered_contents = limit.slice(file_reader.interface.buffered()); const buffered_contents = limit.slice(file_reader.interface.buffered());
const n = try w.vtable.drain(w, &.{ header, buffered_contents }, 1); const n = try w.vtable.drain(w, &.{ header, buffered_contents }, 1);

View File

@ -21,6 +21,7 @@ const root = @import("root");
pub const Dwarf = @import("debug/Dwarf.zig"); pub const Dwarf = @import("debug/Dwarf.zig");
pub const Pdb = @import("debug/Pdb.zig"); pub const Pdb = @import("debug/Pdb.zig");
pub const ElfFile = @import("debug/ElfFile.zig"); pub const ElfFile = @import("debug/ElfFile.zig");
pub const MachOFile = @import("debug/MachOFile.zig");
pub const Info = @import("debug/Info.zig"); pub const Info = @import("debug/Info.zig");
pub const Coverage = @import("debug/Coverage.zig"); pub const Coverage = @import("debug/Coverage.zig");
pub const cpu_context = @import("debug/cpu_context.zig"); pub const cpu_context = @import("debug/cpu_context.zig");
@ -1366,7 +1367,7 @@ test printLineFromFile {
/// The returned allocator should be thread-safe if the compilation is multi-threaded, because /// The returned allocator should be thread-safe if the compilation is multi-threaded, because
/// multiple threads could capture and/or print stack traces simultaneously. /// multiple threads could capture and/or print stack traces simultaneously.
fn getDebugInfoAllocator() Allocator { pub fn getDebugInfoAllocator() Allocator {
// Allow overriding the debug info allocator by exposing `root.debug.getDebugInfoAllocator`. // Allow overriding the debug info allocator by exposing `root.debug.getDebugInfoAllocator`.
if (@hasDecl(root, "debug") and @hasDecl(root.debug, "getDebugInfoAllocator")) { if (@hasDecl(root, "debug") and @hasDecl(root.debug, "getDebugInfoAllocator")) {
return root.debug.getDebugInfoAllocator(); return root.debug.getDebugInfoAllocator();

View File

@ -9,49 +9,67 @@
const std = @import("../std.zig"); const std = @import("../std.zig");
const Allocator = std.mem.Allocator; const Allocator = std.mem.Allocator;
const Path = std.Build.Cache.Path; const Path = std.Build.Cache.Path;
const ElfFile = std.debug.ElfFile;
const assert = std.debug.assert; const assert = std.debug.assert;
const Coverage = std.debug.Coverage; const Coverage = std.debug.Coverage;
const SourceLocation = std.debug.Coverage.SourceLocation; const SourceLocation = std.debug.Coverage.SourceLocation;
const ElfFile = std.debug.ElfFile;
const MachOFile = std.debug.MachOFile;
const Info = @This(); const Info = @This();
/// Sorted by key, ascending. impl: union(enum) {
address_map: std.AutoArrayHashMapUnmanaged(u64, ElfFile), elf: ElfFile,
macho: MachOFile,
},
/// Externally managed, outlives this `Info` instance. /// Externally managed, outlives this `Info` instance.
coverage: *Coverage, coverage: *Coverage,
pub const LoadError = std.fs.File.OpenError || ElfFile.LoadError || std.debug.Dwarf.ScanError || error{MissingDebugInfo}; pub const LoadError = std.fs.File.OpenError || ElfFile.LoadError || MachOFile.Error || std.debug.Dwarf.ScanError || error{ MissingDebugInfo, UnsupportedDebugInfo };
pub fn load(gpa: Allocator, path: Path, coverage: *Coverage) LoadError!Info { pub fn load(gpa: Allocator, path: Path, coverage: *Coverage, format: std.Target.ObjectFormat, arch: std.Target.Cpu.Arch) LoadError!Info {
var file = try path.root_dir.handle.openFile(path.sub_path, .{}); switch (format) {
defer file.close(); .elf => {
var file = try path.root_dir.handle.openFile(path.sub_path, .{});
defer file.close();
var elf_file: ElfFile = try .load(gpa, file, null, &.none); var elf_file: ElfFile = try .load(gpa, file, null, &.none);
errdefer elf_file.deinit(gpa); errdefer elf_file.deinit(gpa);
if (elf_file.dwarf == null) return error.MissingDebugInfo; if (elf_file.dwarf == null) return error.MissingDebugInfo;
try elf_file.dwarf.?.open(gpa, elf_file.endian); try elf_file.dwarf.?.open(gpa, elf_file.endian);
try elf_file.dwarf.?.populateRanges(gpa, elf_file.endian); try elf_file.dwarf.?.populateRanges(gpa, elf_file.endian);
var info: Info = .{ return .{
.address_map = .{}, .impl = .{ .elf = elf_file },
.coverage = coverage, .coverage = coverage,
}; };
try info.address_map.put(gpa, 0, elf_file); },
errdefer comptime unreachable; // elf_file is owned by the map now .macho => {
return info; const path_str = try path.toString(gpa);
defer gpa.free(path_str);
var macho_file: MachOFile = try .load(gpa, path_str, arch);
errdefer macho_file.deinit(gpa);
return .{
.impl = .{ .macho = macho_file },
.coverage = coverage,
};
},
else => return error.UnsupportedDebugInfo,
}
} }
pub fn deinit(info: *Info, gpa: Allocator) void { pub fn deinit(info: *Info, gpa: Allocator) void {
for (info.address_map.values()) |*elf_file| { switch (info.impl) {
elf_file.dwarf.?.deinit(gpa); .elf => |*ef| ef.deinit(gpa),
.macho => |*mf| mf.deinit(gpa),
} }
info.address_map.deinit(gpa);
info.* = undefined; info.* = undefined;
} }
pub const ResolveAddressesError = Coverage.ResolveAddressesDwarfError; pub const ResolveAddressesError = Coverage.ResolveAddressesDwarfError || error{UnsupportedDebugInfo};
/// Given an array of virtual memory addresses, sorted ascending, outputs a /// Given an array of virtual memory addresses, sorted ascending, outputs a
/// corresponding array of source locations. /// corresponding array of source locations.
@ -64,7 +82,28 @@ pub fn resolveAddresses(
output: []SourceLocation, output: []SourceLocation,
) ResolveAddressesError!void { ) ResolveAddressesError!void {
assert(sorted_pc_addrs.len == output.len); assert(sorted_pc_addrs.len == output.len);
if (info.address_map.entries.len != 1) @panic("TODO"); switch (info.impl) {
const elf_file = &info.address_map.values()[0]; .elf => |*ef| return info.coverage.resolveAddressesDwarf(gpa, ef.endian, sorted_pc_addrs, output, &ef.dwarf.?),
return info.coverage.resolveAddressesDwarf(gpa, elf_file.endian, sorted_pc_addrs, output, &elf_file.dwarf.?); .macho => |*mf| {
// Resolving all of the addresses at once unfortunately isn't so easy in Mach-O binaries
// due to split debug information. For now, we'll just resolve the addreses one by one.
for (sorted_pc_addrs, output) |pc_addr, *src_loc| {
const dwarf, const dwarf_pc_addr = mf.getDwarfForAddress(gpa, pc_addr) catch |err| switch (err) {
error.InvalidMachO, error.InvalidDwarf => return error.InvalidDebugInfo,
else => |e| return e,
};
if (dwarf.ranges.items.len == 0) {
dwarf.populateRanges(gpa, .little) catch |err| switch (err) {
error.EndOfStream,
error.Overflow,
error.StreamTooLong,
error.ReadFailed,
=> return error.InvalidDebugInfo,
else => |e| return e,
};
}
try info.coverage.resolveAddressesDwarf(gpa, .little, &.{dwarf_pc_addr}, src_loc[0..1], dwarf);
}
},
}
} }

548
lib/std/debug/MachOFile.zig Normal file
View File

@ -0,0 +1,548 @@
mapped_memory: []align(std.heap.page_size_min) const u8,
symbols: []const Symbol,
strings: []const u8,
text_vmaddr: u64,
/// Key is index into `strings` of the file path.
ofiles: std.AutoArrayHashMapUnmanaged(u32, Error!OFile),
pub const Error = error{
InvalidMachO,
InvalidDwarf,
MissingDebugInfo,
UnsupportedDebugInfo,
ReadFailed,
OutOfMemory,
};
pub fn deinit(mf: *MachOFile, gpa: Allocator) void {
for (mf.ofiles.values()) |*maybe_of| {
const of = &(maybe_of.* catch continue);
posix.munmap(of.mapped_memory);
of.dwarf.deinit(gpa);
of.symbols_by_name.deinit(gpa);
}
mf.ofiles.deinit(gpa);
gpa.free(mf.symbols);
posix.munmap(mf.mapped_memory);
}
pub fn load(gpa: Allocator, path: []const u8, arch: std.Target.Cpu.Arch) Error!MachOFile {
switch (arch) {
.x86_64, .aarch64 => {},
else => unreachable,
}
const all_mapped_memory = try mapDebugInfoFile(path);
errdefer posix.munmap(all_mapped_memory);
// In most cases, the file we just mapped is a Mach-O binary. However, it could be a "universal
// binary": a simple file format which contains Mach-O binaries for multiple targets. For
// instance, `/usr/lib/dyld` is currently distributed as a universal binary containing images
// for both ARM64 macOS and x86_64 macOS.
if (all_mapped_memory.len < 4) return error.InvalidMachO;
const magic = std.mem.readInt(u32, all_mapped_memory.ptr[0..4], .little);
// The contents of a Mach-O file, which may or may not be the whole of `all_mapped_memory`.
const mapped_macho = switch (magic) {
macho.MH_MAGIC_64 => all_mapped_memory,
macho.FAT_CIGAM => mapped_macho: {
// This is the universal binary format (aka a "fat binary").
var fat_r: Io.Reader = .fixed(all_mapped_memory);
const hdr = fat_r.takeStruct(macho.fat_header, .big) catch |err| switch (err) {
error.ReadFailed => unreachable,
error.EndOfStream => return error.InvalidMachO,
};
const want_cpu_type = switch (arch) {
.x86_64 => macho.CPU_TYPE_X86_64,
.aarch64 => macho.CPU_TYPE_ARM64,
else => unreachable,
};
for (0..hdr.nfat_arch) |_| {
const fat_arch = fat_r.takeStruct(macho.fat_arch, .big) catch |err| switch (err) {
error.ReadFailed => unreachable,
error.EndOfStream => return error.InvalidMachO,
};
if (fat_arch.cputype != want_cpu_type) continue;
if (fat_arch.offset + fat_arch.size > all_mapped_memory.len) return error.InvalidMachO;
break :mapped_macho all_mapped_memory[fat_arch.offset..][0..fat_arch.size];
}
// `arch` was not present in the fat binary.
return error.MissingDebugInfo;
},
// Even on modern 64-bit targets, this format doesn't seem to be too extensively used. It
// will be fairly easy to add support here if necessary; it's very similar to above.
macho.FAT_CIGAM_64 => return error.UnsupportedDebugInfo,
else => return error.InvalidMachO,
};
var r: Io.Reader = .fixed(mapped_macho);
const hdr = r.takeStruct(macho.mach_header_64, .little) catch |err| switch (err) {
error.ReadFailed => unreachable,
error.EndOfStream => return error.InvalidMachO,
};
if (hdr.magic != macho.MH_MAGIC_64)
return error.InvalidMachO;
const symtab: macho.symtab_command, const text_vmaddr: u64 = lcs: {
var it: macho.LoadCommandIterator = try .init(&hdr, mapped_macho[@sizeOf(macho.mach_header_64)..]);
var symtab: ?macho.symtab_command = null;
var text_vmaddr: ?u64 = null;
while (try it.next()) |cmd| switch (cmd.hdr.cmd) {
.SYMTAB => symtab = cmd.cast(macho.symtab_command) orelse return error.InvalidMachO,
.SEGMENT_64 => if (cmd.cast(macho.segment_command_64)) |seg_cmd| {
if (!mem.eql(u8, seg_cmd.segName(), "__TEXT")) continue;
text_vmaddr = seg_cmd.vmaddr;
},
else => {},
};
break :lcs .{
symtab orelse return error.MissingDebugInfo,
text_vmaddr orelse return error.MissingDebugInfo,
};
};
const strings = mapped_macho[symtab.stroff..][0 .. symtab.strsize - 1];
var symbols: std.ArrayList(Symbol) = try .initCapacity(gpa, symtab.nsyms);
defer symbols.deinit(gpa);
// This map is temporary; it is used only to detect duplicates here. This is
// necessary because we prefer to use STAB ("symbolic debugging table") symbols,
// but they might not be present, so we track normal symbols too.
// Indices match 1-1 with those of `symbols`.
var symbol_names: std.StringArrayHashMapUnmanaged(void) = .empty;
defer symbol_names.deinit(gpa);
try symbol_names.ensureUnusedCapacity(gpa, symtab.nsyms);
var ofile: u32 = undefined;
var last_sym: Symbol = undefined;
var state: enum {
init,
oso_open,
oso_close,
bnsym,
fun_strx,
fun_size,
ensym,
} = .init;
var sym_r: Io.Reader = .fixed(mapped_macho[symtab.symoff..]);
for (0..symtab.nsyms) |_| {
const sym = sym_r.takeStruct(macho.nlist_64, .little) catch |err| switch (err) {
error.ReadFailed => unreachable,
error.EndOfStream => return error.InvalidMachO,
};
if (sym.n_type.bits.is_stab == 0) {
if (sym.n_strx == 0) continue;
switch (sym.n_type.bits.type) {
.undf, .pbud, .indr, .abs, _ => continue,
.sect => {
const name = std.mem.sliceTo(strings[sym.n_strx..], 0);
const gop = symbol_names.getOrPutAssumeCapacity(name);
if (!gop.found_existing) {
assert(gop.index == symbols.items.len);
symbols.appendAssumeCapacity(.{
.strx = sym.n_strx,
.addr = sym.n_value,
.ofile = Symbol.unknown_ofile,
});
}
},
}
continue;
}
// TODO handle globals N_GSYM, and statics N_STSYM
switch (sym.n_type.stab) {
.oso => switch (state) {
.init, .oso_close => {
state = .oso_open;
ofile = sym.n_strx;
},
else => return error.InvalidMachO,
},
.bnsym => switch (state) {
.oso_open, .ensym => {
state = .bnsym;
last_sym = .{
.strx = 0,
.addr = sym.n_value,
.ofile = ofile,
};
},
else => return error.InvalidMachO,
},
.fun => switch (state) {
.bnsym => {
state = .fun_strx;
last_sym.strx = sym.n_strx;
},
.fun_strx => {
state = .fun_size;
},
else => return error.InvalidMachO,
},
.ensym => switch (state) {
.fun_size => {
state = .ensym;
if (last_sym.strx != 0) {
const name = std.mem.sliceTo(strings[last_sym.strx..], 0);
const gop = symbol_names.getOrPutAssumeCapacity(name);
if (!gop.found_existing) {
assert(gop.index == symbols.items.len);
symbols.appendAssumeCapacity(last_sym);
} else {
symbols.items[gop.index] = last_sym;
}
}
},
else => return error.InvalidMachO,
},
.so => switch (state) {
.init, .oso_close => {},
.oso_open, .ensym => {
state = .oso_close;
},
else => return error.InvalidMachO,
},
else => {},
}
}
switch (state) {
.init => {
// Missing STAB symtab entries is still okay, unless there were also no normal symbols.
if (symbols.items.len == 0) return error.MissingDebugInfo;
},
.oso_close => {},
else => return error.InvalidMachO, // corrupted STAB entries in symtab
}
const symbols_slice = try symbols.toOwnedSlice(gpa);
errdefer gpa.free(symbols_slice);
// Even though lld emits symbols in ascending order, this debug code
// should work for programs linked in any valid way.
// This sort is so that we can binary search later.
mem.sort(Symbol, symbols_slice, {}, Symbol.addressLessThan);
return .{
.mapped_memory = all_mapped_memory,
.symbols = symbols_slice,
.strings = strings,
.ofiles = .empty,
.text_vmaddr = text_vmaddr,
};
}
pub fn getDwarfForAddress(mf: *MachOFile, gpa: Allocator, vaddr: u64) !struct { *Dwarf, u64 } {
const symbol = Symbol.find(mf.symbols, vaddr) orelse return error.MissingDebugInfo;
if (symbol.ofile == Symbol.unknown_ofile) return error.MissingDebugInfo;
// offset of `address` from start of `symbol`
const address_symbol_offset = vaddr - symbol.addr;
// Take the symbol name from the N_FUN STAB entry, we're going to
// use it if we fail to find the DWARF infos
const stab_symbol = mem.sliceTo(mf.strings[symbol.strx..], 0);
const gop = try mf.ofiles.getOrPut(gpa, symbol.ofile);
if (!gop.found_existing) {
const name = mem.sliceTo(mf.strings[symbol.ofile..], 0);
gop.value_ptr.* = loadOFile(gpa, name);
}
const of = &(gop.value_ptr.* catch |err| return err);
const symbol_index = of.symbols_by_name.getKeyAdapted(
@as([]const u8, stab_symbol),
@as(OFile.SymbolAdapter, .{ .strtab = of.strtab, .symtab_raw = of.symtab_raw }),
) orelse return error.MissingDebugInfo;
const symbol_ofile_vaddr = vaddr: {
var sym = of.symtab_raw[symbol_index];
if (builtin.cpu.arch.endian() != .little) std.mem.byteSwapAllFields(macho.nlist_64, &sym);
break :vaddr sym.n_value;
};
return .{ &of.dwarf, symbol_ofile_vaddr + address_symbol_offset };
}
pub fn lookupSymbolName(mf: *MachOFile, vaddr: u64) error{MissingDebugInfo}![]const u8 {
const symbol = Symbol.find(mf.symbols, vaddr) orelse return error.MissingDebugInfo;
return mem.sliceTo(mf.strings[symbol.strx..], 0);
}
const OFile = struct {
mapped_memory: []align(std.heap.page_size_min) const u8,
dwarf: Dwarf,
strtab: []const u8,
symtab_raw: []align(1) const macho.nlist_64,
/// All named symbols in `symtab_raw`. Stored `u32` key is the index into `symtab_raw`. Accessed
/// through `SymbolAdapter`, so that the symbol name is used as the logical key.
symbols_by_name: std.ArrayHashMapUnmanaged(u32, void, void, true),
const SymbolAdapter = struct {
strtab: []const u8,
symtab_raw: []align(1) const macho.nlist_64,
pub fn hash(ctx: SymbolAdapter, sym_name: []const u8) u32 {
_ = ctx;
return @truncate(std.hash.Wyhash.hash(0, sym_name));
}
pub fn eql(ctx: SymbolAdapter, a_sym_name: []const u8, b_sym_index: u32, b_index: usize) bool {
_ = b_index;
var b_sym = ctx.symtab_raw[b_sym_index];
if (builtin.cpu.arch.endian() != .little) std.mem.byteSwapAllFields(macho.nlist_64, &b_sym);
const b_sym_name = std.mem.sliceTo(ctx.strtab[b_sym.n_strx..], 0);
return mem.eql(u8, a_sym_name, b_sym_name);
}
};
};
const Symbol = struct {
strx: u32,
addr: u64,
/// Value may be `unknown_ofile`.
ofile: u32,
const unknown_ofile = std.math.maxInt(u32);
fn addressLessThan(context: void, lhs: Symbol, rhs: Symbol) bool {
_ = context;
return lhs.addr < rhs.addr;
}
/// Assumes that `symbols` is sorted in order of ascending `addr`.
fn find(symbols: []const Symbol, address: usize) ?*const Symbol {
if (symbols.len == 0) return null; // no potential match
if (address < symbols[0].addr) return null; // address is before the lowest-address symbol
var left: usize = 0;
var len: usize = symbols.len;
while (len > 1) {
const mid = left + len / 2;
if (address < symbols[mid].addr) {
len /= 2;
} else {
left = mid;
len -= len / 2;
}
}
return &symbols[left];
}
test find {
const symbols: []const Symbol = &.{
.{ .addr = 100, .strx = undefined, .ofile = undefined },
.{ .addr = 200, .strx = undefined, .ofile = undefined },
.{ .addr = 300, .strx = undefined, .ofile = undefined },
};
try testing.expectEqual(null, find(symbols, 0));
try testing.expectEqual(null, find(symbols, 99));
try testing.expectEqual(&symbols[0], find(symbols, 100).?);
try testing.expectEqual(&symbols[0], find(symbols, 150).?);
try testing.expectEqual(&symbols[0], find(symbols, 199).?);
try testing.expectEqual(&symbols[1], find(symbols, 200).?);
try testing.expectEqual(&symbols[1], find(symbols, 250).?);
try testing.expectEqual(&symbols[1], find(symbols, 299).?);
try testing.expectEqual(&symbols[2], find(symbols, 300).?);
try testing.expectEqual(&symbols[2], find(symbols, 301).?);
try testing.expectEqual(&symbols[2], find(symbols, 5000).?);
}
};
test {
_ = Symbol;
}
fn loadOFile(gpa: Allocator, o_file_name: []const u8) !OFile {
const all_mapped_memory, const mapped_ofile = map: {
const open_paren = paren: {
if (std.mem.endsWith(u8, o_file_name, ")")) {
if (std.mem.findScalarLast(u8, o_file_name, '(')) |i| {
break :paren i;
}
}
// Not an archive, just a normal path to a .o file
const m = try mapDebugInfoFile(o_file_name);
break :map .{ m, m };
};
// We have the form 'path/to/archive.a(entry.o)'. Map the archive and find the object file in question.
const archive_path = o_file_name[0..open_paren];
const target_name_in_archive = o_file_name[open_paren + 1 .. o_file_name.len - 1];
const mapped_archive = try mapDebugInfoFile(archive_path);
errdefer posix.munmap(mapped_archive);
var ar_reader: Io.Reader = .fixed(mapped_archive);
const ar_magic = ar_reader.take(8) catch return error.InvalidMachO;
if (!std.mem.eql(u8, ar_magic, "!<arch>\n")) return error.InvalidMachO;
while (true) {
if (ar_reader.seek == ar_reader.buffer.len) return error.MissingDebugInfo;
const raw_name = ar_reader.takeArray(16) catch return error.InvalidMachO;
ar_reader.discardAll(12 + 6 + 6 + 8) catch return error.InvalidMachO;
const raw_size = ar_reader.takeArray(10) catch return error.InvalidMachO;
const file_magic = ar_reader.takeArray(2) catch return error.InvalidMachO;
if (!std.mem.eql(u8, file_magic, "`\n")) return error.InvalidMachO;
const size = std.fmt.parseInt(u32, mem.sliceTo(raw_size, ' '), 10) catch return error.InvalidMachO;
const raw_data = ar_reader.take(size) catch return error.InvalidMachO;
const entry_name: []const u8, const entry_contents: []const u8 = entry: {
if (!std.mem.startsWith(u8, raw_name, "#1/")) {
break :entry .{ mem.sliceTo(raw_name, '/'), raw_data };
}
const len = std.fmt.parseInt(u32, mem.sliceTo(raw_name[3..], ' '), 10) catch return error.InvalidMachO;
if (len > size) return error.InvalidMachO;
break :entry .{ mem.sliceTo(raw_data[0..len], 0), raw_data[len..] };
};
if (std.mem.eql(u8, entry_name, target_name_in_archive)) {
break :map .{ mapped_archive, entry_contents };
}
}
};
errdefer posix.munmap(all_mapped_memory);
var r: Io.Reader = .fixed(mapped_ofile);
const hdr = r.takeStruct(macho.mach_header_64, .little) catch |err| switch (err) {
error.ReadFailed => unreachable,
error.EndOfStream => return error.InvalidMachO,
};
if (hdr.magic != std.macho.MH_MAGIC_64) return error.InvalidMachO;
const seg_cmd: macho.LoadCommandIterator.LoadCommand, const symtab_cmd: macho.symtab_command = cmds: {
var seg_cmd: ?macho.LoadCommandIterator.LoadCommand = null;
var symtab_cmd: ?macho.symtab_command = null;
var it: macho.LoadCommandIterator = try .init(&hdr, mapped_ofile[@sizeOf(macho.mach_header_64)..]);
while (try it.next()) |lc| switch (lc.hdr.cmd) {
.SEGMENT_64 => seg_cmd = lc,
.SYMTAB => symtab_cmd = lc.cast(macho.symtab_command) orelse return error.InvalidMachO,
else => {},
};
break :cmds .{
seg_cmd orelse return error.MissingDebugInfo,
symtab_cmd orelse return error.MissingDebugInfo,
};
};
if (mapped_ofile.len < symtab_cmd.stroff + symtab_cmd.strsize) return error.InvalidMachO;
if (mapped_ofile[symtab_cmd.stroff + symtab_cmd.strsize - 1] != 0) return error.InvalidMachO;
const strtab = mapped_ofile[symtab_cmd.stroff..][0 .. symtab_cmd.strsize - 1];
const n_sym_bytes = symtab_cmd.nsyms * @sizeOf(macho.nlist_64);
if (mapped_ofile.len < symtab_cmd.symoff + n_sym_bytes) return error.InvalidMachO;
const symtab_raw: []align(1) const macho.nlist_64 = @ptrCast(mapped_ofile[symtab_cmd.symoff..][0..n_sym_bytes]);
// TODO handle tentative (common) symbols
var symbols_by_name: std.ArrayHashMapUnmanaged(u32, void, void, true) = .empty;
defer symbols_by_name.deinit(gpa);
try symbols_by_name.ensureUnusedCapacity(gpa, @intCast(symtab_raw.len));
for (symtab_raw, 0..) |sym_raw, sym_index| {
var sym = sym_raw;
if (builtin.cpu.arch.endian() != .little) std.mem.byteSwapAllFields(macho.nlist_64, &sym);
if (sym.n_strx == 0) continue;
switch (sym.n_type.bits.type) {
.undf => continue, // includes tentative symbols
.abs => continue,
else => {},
}
const sym_name = mem.sliceTo(strtab[sym.n_strx..], 0);
const gop = symbols_by_name.getOrPutAssumeCapacityAdapted(
@as([]const u8, sym_name),
@as(OFile.SymbolAdapter, .{ .strtab = strtab, .symtab_raw = symtab_raw }),
);
if (gop.found_existing) return error.InvalidMachO;
gop.key_ptr.* = @intCast(sym_index);
}
var sections: Dwarf.SectionArray = @splat(null);
for (seg_cmd.getSections()) |sect_raw| {
var sect = sect_raw;
if (builtin.cpu.arch.endian() != .little) std.mem.byteSwapAllFields(macho.section_64, &sect);
if (!std.mem.eql(u8, "__DWARF", sect.segName())) continue;
const section_index: usize = inline for (@typeInfo(Dwarf.Section.Id).@"enum".fields, 0..) |section, i| {
if (mem.eql(u8, "__" ++ section.name, sect.sectName())) break i;
} else continue;
if (mapped_ofile.len < sect.offset + sect.size) return error.InvalidMachO;
const section_bytes = mapped_ofile[sect.offset..][0..sect.size];
sections[section_index] = .{
.data = section_bytes,
.owned = false,
};
}
if (sections[@intFromEnum(Dwarf.Section.Id.debug_info)] == null or
sections[@intFromEnum(Dwarf.Section.Id.debug_abbrev)] == null or
sections[@intFromEnum(Dwarf.Section.Id.debug_str)] == null or
sections[@intFromEnum(Dwarf.Section.Id.debug_line)] == null)
{
return error.MissingDebugInfo;
}
var dwarf: Dwarf = .{ .sections = sections };
errdefer dwarf.deinit(gpa);
dwarf.open(gpa, .little) catch |err| switch (err) {
error.InvalidDebugInfo,
error.EndOfStream,
error.Overflow,
error.StreamTooLong,
=> return error.InvalidDwarf,
error.MissingDebugInfo,
error.ReadFailed,
error.OutOfMemory,
=> |e| return e,
};
return .{
.mapped_memory = all_mapped_memory,
.dwarf = dwarf,
.strtab = strtab,
.symtab_raw = symtab_raw,
.symbols_by_name = symbols_by_name.move(),
};
}
/// Uses `mmap` to map the file at `path` into memory.
fn mapDebugInfoFile(path: []const u8) ![]align(std.heap.page_size_min) const u8 {
const file = std.fs.cwd().openFile(path, .{}) catch |err| switch (err) {
error.FileNotFound => return error.MissingDebugInfo,
else => return error.ReadFailed,
};
defer file.close();
const file_len = std.math.cast(
usize,
file.getEndPos() catch return error.ReadFailed,
) orelse return error.ReadFailed;
return posix.mmap(
null,
file_len,
posix.PROT.READ,
.{ .TYPE = .SHARED },
file.handle,
0,
) catch return error.ReadFailed;
}
const std = @import("std");
const Allocator = std.mem.Allocator;
const Dwarf = std.debug.Dwarf;
const Io = std.Io;
const assert = std.debug.assert;
const posix = std.posix;
const macho = std.macho;
const mem = std.mem;
const testing = std.testing;
const builtin = @import("builtin");
const MachOFile = @This();

View File

@ -80,6 +80,11 @@ pub fn getModuleName(si: *SelfInfo, gpa: Allocator, address: usize) Error![]cons
if (module.name.len == 0) return error.MissingDebugInfo; if (module.name.len == 0) return error.MissingDebugInfo;
return module.name; return module.name;
} }
pub fn getModuleSlide(si: *SelfInfo, gpa: Allocator, address: usize) Error!usize {
const module = try si.findModule(gpa, address, .shared);
defer si.rwlock.unlockShared();
return module.load_offset;
}
pub const can_unwind: bool = s: { pub const can_unwind: bool = s: {
// The DWARF code can't deal with ILP32 ABIs yet: https://github.com/ziglang/zig/issues/25447 // The DWARF code can't deal with ILP32 ABIs yet: https://github.com/ziglang/zig/issues/25447

View File

@ -1,12 +1,10 @@
mutex: std.Thread.Mutex, mutex: std.Thread.Mutex,
/// Accessed through `Module.Adapter`. /// Accessed through `Module.Adapter`.
modules: std.ArrayHashMapUnmanaged(Module, void, Module.Context, false), modules: std.ArrayHashMapUnmanaged(Module, void, Module.Context, false),
ofiles: std.StringArrayHashMapUnmanaged(?OFile),
pub const init: SelfInfo = .{ pub const init: SelfInfo = .{
.mutex = .{}, .mutex = .{},
.modules = .empty, .modules = .empty,
.ofiles = .empty,
}; };
pub fn deinit(si: *SelfInfo, gpa: Allocator) void { pub fn deinit(si: *SelfInfo, gpa: Allocator) void {
for (si.modules.keys()) |*module| { for (si.modules.keys()) |*module| {
@ -14,20 +12,12 @@ pub fn deinit(si: *SelfInfo, gpa: Allocator) void {
const u = &(module.unwind orelse break :unwind catch break :unwind); const u = &(module.unwind orelse break :unwind catch break :unwind);
if (u.dwarf) |*dwarf| dwarf.deinit(gpa); if (u.dwarf) |*dwarf| dwarf.deinit(gpa);
} }
loaded: { file: {
const l = &(module.loaded_macho orelse break :loaded catch break :loaded); const f = &(module.file orelse break :file catch break :file);
gpa.free(l.symbols); f.deinit(gpa);
posix.munmap(l.mapped_memory);
} }
} }
for (si.ofiles.values()) |*opt_ofile| {
const ofile = &(opt_ofile.* orelse continue);
ofile.dwarf.deinit(gpa);
ofile.symbols_by_name.deinit(gpa);
posix.munmap(ofile.mapped_memory);
}
si.modules.deinit(gpa); si.modules.deinit(gpa);
si.ofiles.deinit(gpa);
} }
pub fn getSymbol(si: *SelfInfo, gpa: Allocator, io: Io, address: usize) Error!std.debug.Symbol { pub fn getSymbol(si: *SelfInfo, gpa: Allocator, io: Io, address: usize) Error!std.debug.Symbol {
@ -35,67 +25,55 @@ pub fn getSymbol(si: *SelfInfo, gpa: Allocator, io: Io, address: usize) Error!st
const module = try si.findModule(gpa, address); const module = try si.findModule(gpa, address);
defer si.mutex.unlock(); defer si.mutex.unlock();
const loaded_macho = try module.getLoadedMachO(gpa); const file = try module.getFile(gpa);
const vaddr = address - loaded_macho.vaddr_offset; // This is not necessarily the same as the vmaddr_slide that dyld would report. This is
const symbol = MachoSymbol.find(loaded_macho.symbols, vaddr) orelse return .unknown; // because the segments in the file on disk might differ from the ones in memory. Normally
// we wouldn't necessarily expect that to work, but /usr/lib/dyld is incredibly annoying:
// it exists on disk (necessarily, because the kernel needs to load it!), but is also in
// the dyld cache (dyld actually restart itself from cache after loading it), and the two
// versions have (very) different segment base addresses. It's sort of like a large slide
// has been applied to all addresses in memory. For an optimal experience, we consider the
// on-disk vmaddr instead of the in-memory one.
const vaddr_offset = module.text_base - file.text_vmaddr;
// offset of `address` from start of `symbol` const vaddr = address - vaddr_offset;
const address_symbol_offset = vaddr - symbol.addr;
// Take the symbol name from the N_FUN STAB entry, we're going to const ofile_dwarf, const ofile_vaddr = file.getDwarfForAddress(gpa, vaddr) catch {
// use it if we fail to find the DWARF infos // Return at least the symbol name if available.
const stab_symbol = mem.sliceTo(loaded_macho.strings[symbol.strx..], 0); return .{
.name = try file.lookupSymbolName(vaddr),
// If any information is missing, we can at least return this from now on. .compile_unit_name = null,
const sym_only_result: std.debug.Symbol = .{ .source_location = null,
.name = stab_symbol, };
.compile_unit_name = null,
.source_location = null,
}; };
if (symbol.ofile == MachoSymbol.unknown_ofile) { const compile_unit = ofile_dwarf.findCompileUnit(native_endian, ofile_vaddr) catch {
// We don't have STAB info, so can't track down the object file; all we can do is the symbol name. // Return at least the symbol name if available.
return sym_only_result; return .{
} .name = try file.lookupSymbolName(vaddr),
.compile_unit_name = null,
const o_file: *OFile = of: { .source_location = null,
const path = mem.sliceTo(loaded_macho.strings[symbol.ofile..], 0); };
const gop = try si.ofiles.getOrPut(gpa, path);
if (!gop.found_existing) {
gop.value_ptr.* = loadOFile(gpa, path) catch null;
}
if (gop.value_ptr.*) |*o_file| {
break :of o_file;
} else {
return sym_only_result;
}
}; };
const symbol_index = o_file.symbols_by_name.getKeyAdapted(
@as([]const u8, stab_symbol),
@as(OFile.SymbolAdapter, .{ .strtab = o_file.strtab, .symtab = o_file.symtab }),
) orelse return sym_only_result;
const symbol_ofile_vaddr = o_file.symtab[symbol_index].n_value;
const compile_unit = o_file.dwarf.findCompileUnit(native_endian, symbol_ofile_vaddr) catch return sym_only_result;
return .{ return .{
.name = o_file.dwarf.getSymbolName(symbol_ofile_vaddr + address_symbol_offset) orelse stab_symbol, .name = ofile_dwarf.getSymbolName(ofile_vaddr) orelse
try file.lookupSymbolName(vaddr),
.compile_unit_name = compile_unit.die.getAttrString( .compile_unit_name = compile_unit.die.getAttrString(
&o_file.dwarf, ofile_dwarf,
native_endian, native_endian,
std.dwarf.AT.name, std.dwarf.AT.name,
o_file.dwarf.section(.debug_str), ofile_dwarf.section(.debug_str),
compile_unit, compile_unit,
) catch |err| switch (err) { ) catch |err| switch (err) {
error.MissingDebugInfo, error.InvalidDebugInfo => null, error.MissingDebugInfo, error.InvalidDebugInfo => null,
}, },
.source_location = o_file.dwarf.getLineNumberInfo( .source_location = ofile_dwarf.getLineNumberInfo(
gpa, gpa,
native_endian, native_endian,
compile_unit, compile_unit,
symbol_ofile_vaddr + address_symbol_offset, ofile_vaddr,
) catch null, ) catch null,
}; };
} }
@ -104,6 +82,20 @@ pub fn getModuleName(si: *SelfInfo, gpa: Allocator, address: usize) Error![]cons
defer si.mutex.unlock(); defer si.mutex.unlock();
return module.name; return module.name;
} }
pub fn getModuleSlide(si: *SelfInfo, gpa: Allocator, address: usize) Error!usize {
const module = try si.findModule(gpa, address);
defer si.mutex.unlock();
const header: *std.macho.mach_header_64 = @ptrFromInt(module.text_base);
const raw_macho: [*]u8 = @ptrCast(header);
var it = macho.LoadCommandIterator.init(header, raw_macho[@sizeOf(macho.mach_header_64)..][0..header.sizeofcmds]) catch unreachable;
const text_vmaddr = while (it.next() catch unreachable) |load_cmd| {
if (load_cmd.hdr.cmd != .SEGMENT_64) continue;
const segment_cmd = load_cmd.cast(macho.segment_command_64).?;
if (!mem.eql(u8, segment_cmd.segName(), "__TEXT")) continue;
break segment_cmd.vmaddr;
} else unreachable;
return module.text_base - text_vmaddr;
}
pub const can_unwind: bool = true; pub const can_unwind: bool = true;
pub const UnwindContext = std.debug.Dwarf.SelfUnwinder; pub const UnwindContext = std.debug.Dwarf.SelfUnwinder;
@ -447,7 +439,7 @@ fn findModule(si: *SelfInfo, gpa: Allocator, address: usize) Error!*Module {
.text_base = @intFromPtr(info.fbase), .text_base = @intFromPtr(info.fbase),
.name = std.mem.span(info.fname), .name = std.mem.span(info.fname),
.unwind = null, .unwind = null,
.loaded_macho = null, .file = null,
}; };
} }
return gop.key_ptr; return gop.key_ptr;
@ -457,7 +449,7 @@ const Module = struct {
text_base: usize, text_base: usize,
name: []const u8, name: []const u8,
unwind: ?(Error!Unwind), unwind: ?(Error!Unwind),
loaded_macho: ?(Error!LoadedMachO), file: ?(Error!MachOFile),
const Adapter = struct { const Adapter = struct {
pub fn hash(_: Adapter, text_base: usize) u32 { pub fn hash(_: Adapter, text_base: usize) u32 {
@ -488,34 +480,17 @@ const Module = struct {
dwarf: ?Dwarf.Unwind, dwarf: ?Dwarf.Unwind,
}; };
const LoadedMachO = struct {
mapped_memory: []align(std.heap.page_size_min) const u8,
symbols: []const MachoSymbol,
strings: []const u8,
/// This is not necessarily the same as the vmaddr_slide that dyld would report. This is
/// because the segments in the file on disk might differ from the ones in memory. Normally
/// we wouldn't necessarily expect that to work, but /usr/lib/dyld is incredibly annoying:
/// it exists on disk (necessarily, because the kernel needs to load it!), but is also in
/// the dyld cache (dyld actually restart itself from cache after loading it), and the two
/// versions have (very) different segment base addresses. It's sort of like a large slide
/// has been applied to all addresses in memory. For an optimal experience, we consider the
/// on-disk vmaddr instead of the in-memory one.
vaddr_offset: usize,
};
fn getUnwindInfo(module: *Module, gpa: Allocator) Error!*Unwind { fn getUnwindInfo(module: *Module, gpa: Allocator) Error!*Unwind {
if (module.unwind == null) module.unwind = loadUnwindInfo(module, gpa); if (module.unwind == null) module.unwind = loadUnwindInfo(module, gpa);
return if (module.unwind.?) |*unwind| unwind else |err| err; return if (module.unwind.?) |*unwind| unwind else |err| err;
} }
fn loadUnwindInfo(module: *const Module, gpa: Allocator) Error!Unwind { fn loadUnwindInfo(module: *const Module, gpa: Allocator) Error!Unwind {
const header: *std.macho.mach_header = @ptrFromInt(module.text_base); const header: *std.macho.mach_header_64 = @ptrFromInt(module.text_base);
var it: macho.LoadCommandIterator = .{ const raw_macho: [*]u8 = @ptrCast(header);
.ncmds = header.ncmds, var it = macho.LoadCommandIterator.init(header, raw_macho[@sizeOf(macho.mach_header_64)..][0..header.sizeofcmds]) catch unreachable;
.buffer = @as([*]u8, @ptrCast(header))[@sizeOf(macho.mach_header_64)..][0..header.sizeofcmds], const sections, const text_vmaddr = while (it.next() catch unreachable) |load_cmd| {
}; if (load_cmd.hdr.cmd != .SEGMENT_64) continue;
const sections, const text_vmaddr = while (it.next()) |load_cmd| {
if (load_cmd.cmd() != .SEGMENT_64) continue;
const segment_cmd = load_cmd.cast(macho.segment_command_64).?; const segment_cmd = load_cmd.cast(macho.segment_command_64).?;
if (!mem.eql(u8, segment_cmd.segName(), "__TEXT")) continue; if (!mem.eql(u8, segment_cmd.segName(), "__TEXT")) continue;
break .{ load_cmd.getSections(), segment_cmd.vmaddr }; break .{ load_cmd.getSections(), segment_cmd.vmaddr };
@ -568,235 +543,13 @@ const Module = struct {
}; };
} }
fn getLoadedMachO(module: *Module, gpa: Allocator) Error!*LoadedMachO { fn getFile(module: *Module, gpa: Allocator) Error!*MachOFile {
if (module.loaded_macho == null) module.loaded_macho = loadMachO(module, gpa) catch |err| switch (err) { if (module.file == null) module.file = MachOFile.load(gpa, module.name, builtin.cpu.arch) catch |err| switch (err) {
error.InvalidDebugInfo, error.MissingDebugInfo, error.OutOfMemory, error.Unexpected => |e| e, error.InvalidMachO, error.InvalidDwarf => error.InvalidDebugInfo,
else => error.ReadFailed, error.MissingDebugInfo, error.OutOfMemory, error.UnsupportedDebugInfo, error.ReadFailed => |e| e,
}; };
return if (module.loaded_macho.?) |*lm| lm else |err| err; return if (module.file.?) |*f| f else |err| err;
} }
fn loadMachO(module: *const Module, gpa: Allocator) Error!LoadedMachO {
const all_mapped_memory = try mapDebugInfoFile(module.name);
errdefer posix.munmap(all_mapped_memory);
// In most cases, the file we just mapped is a Mach-O binary. However, it could be a "universal
// binary": a simple file format which contains Mach-O binaries for multiple targets. For
// instance, `/usr/lib/dyld` is currently distributed as a universal binary containing images
// for both ARM64 macOS and x86_64 macOS.
if (all_mapped_memory.len < 4) return error.InvalidDebugInfo;
const magic = @as(*const u32, @ptrCast(all_mapped_memory.ptr)).*;
// The contents of a Mach-O file, which may or may not be the whole of `all_mapped_memory`.
const mapped_macho = switch (magic) {
macho.MH_MAGIC_64 => all_mapped_memory,
macho.FAT_CIGAM => mapped_macho: {
// This is the universal binary format (aka a "fat binary"). Annoyingly, the whole thing
// is big-endian, so we'll be swapping some bytes.
if (all_mapped_memory.len < @sizeOf(macho.fat_header)) return error.InvalidDebugInfo;
const hdr: *const macho.fat_header = @ptrCast(all_mapped_memory.ptr);
const archs_ptr: [*]const macho.fat_arch = @ptrCast(all_mapped_memory.ptr + @sizeOf(macho.fat_header));
const archs: []const macho.fat_arch = archs_ptr[0..@byteSwap(hdr.nfat_arch)];
const native_cpu_type = switch (builtin.cpu.arch) {
.x86_64 => macho.CPU_TYPE_X86_64,
.aarch64 => macho.CPU_TYPE_ARM64,
else => comptime unreachable,
};
for (archs) |*arch| {
if (@byteSwap(arch.cputype) != native_cpu_type) continue;
const offset = @byteSwap(arch.offset);
const size = @byteSwap(arch.size);
break :mapped_macho all_mapped_memory[offset..][0..size];
}
// Our native architecture was not present in the fat binary.
return error.MissingDebugInfo;
},
// Even on modern 64-bit targets, this format doesn't seem to be too extensively used. It
// will be fairly easy to add support here if necessary; it's very similar to above.
macho.FAT_CIGAM_64 => return error.UnsupportedDebugInfo,
else => return error.InvalidDebugInfo,
};
const hdr: *const macho.mach_header_64 = @ptrCast(@alignCast(mapped_macho.ptr));
if (hdr.magic != macho.MH_MAGIC_64)
return error.InvalidDebugInfo;
const symtab: macho.symtab_command, const text_vmaddr: u64 = lc_iter: {
var it: macho.LoadCommandIterator = .{
.ncmds = hdr.ncmds,
.buffer = mapped_macho[@sizeOf(macho.mach_header_64)..][0..hdr.sizeofcmds],
};
var symtab: ?macho.symtab_command = null;
var text_vmaddr: ?u64 = null;
while (it.next()) |cmd| switch (cmd.cmd()) {
.SYMTAB => symtab = cmd.cast(macho.symtab_command) orelse return error.InvalidDebugInfo,
.SEGMENT_64 => if (cmd.cast(macho.segment_command_64)) |seg_cmd| {
if (!mem.eql(u8, seg_cmd.segName(), "__TEXT")) continue;
text_vmaddr = seg_cmd.vmaddr;
},
else => {},
};
break :lc_iter .{
symtab orelse return error.MissingDebugInfo,
text_vmaddr orelse return error.MissingDebugInfo,
};
};
const syms_ptr: [*]align(1) const macho.nlist_64 = @ptrCast(mapped_macho[symtab.symoff..]);
const syms = syms_ptr[0..symtab.nsyms];
const strings = mapped_macho[symtab.stroff..][0 .. symtab.strsize - 1];
var symbols: std.ArrayList(MachoSymbol) = try .initCapacity(gpa, syms.len);
defer symbols.deinit(gpa);
// This map is temporary; it is used only to detect duplicates here. This is
// necessary because we prefer to use STAB ("symbolic debugging table") symbols,
// but they might not be present, so we track normal symbols too.
// Indices match 1-1 with those of `symbols`.
var symbol_names: std.StringArrayHashMapUnmanaged(void) = .empty;
defer symbol_names.deinit(gpa);
try symbol_names.ensureUnusedCapacity(gpa, syms.len);
var ofile: u32 = undefined;
var last_sym: MachoSymbol = undefined;
var state: enum {
init,
oso_open,
oso_close,
bnsym,
fun_strx,
fun_size,
ensym,
} = .init;
for (syms) |*sym| {
if (sym.n_type.bits.is_stab == 0) {
if (sym.n_strx == 0) continue;
switch (sym.n_type.bits.type) {
.undf, .pbud, .indr, .abs, _ => continue,
.sect => {
const name = std.mem.sliceTo(strings[sym.n_strx..], 0);
const gop = symbol_names.getOrPutAssumeCapacity(name);
if (!gop.found_existing) {
assert(gop.index == symbols.items.len);
symbols.appendAssumeCapacity(.{
.strx = sym.n_strx,
.addr = sym.n_value,
.ofile = MachoSymbol.unknown_ofile,
});
}
},
}
continue;
}
// TODO handle globals N_GSYM, and statics N_STSYM
switch (sym.n_type.stab) {
.oso => switch (state) {
.init, .oso_close => {
state = .oso_open;
ofile = sym.n_strx;
},
else => return error.InvalidDebugInfo,
},
.bnsym => switch (state) {
.oso_open, .ensym => {
state = .bnsym;
last_sym = .{
.strx = 0,
.addr = sym.n_value,
.ofile = ofile,
};
},
else => return error.InvalidDebugInfo,
},
.fun => switch (state) {
.bnsym => {
state = .fun_strx;
last_sym.strx = sym.n_strx;
},
.fun_strx => {
state = .fun_size;
},
else => return error.InvalidDebugInfo,
},
.ensym => switch (state) {
.fun_size => {
state = .ensym;
if (last_sym.strx != 0) {
const name = std.mem.sliceTo(strings[last_sym.strx..], 0);
const gop = symbol_names.getOrPutAssumeCapacity(name);
if (!gop.found_existing) {
assert(gop.index == symbols.items.len);
symbols.appendAssumeCapacity(last_sym);
} else {
symbols.items[gop.index] = last_sym;
}
}
},
else => return error.InvalidDebugInfo,
},
.so => switch (state) {
.init, .oso_close => {},
.oso_open, .ensym => {
state = .oso_close;
},
else => return error.InvalidDebugInfo,
},
else => {},
}
}
switch (state) {
.init => {
// Missing STAB symtab entries is still okay, unless there were also no normal symbols.
if (symbols.items.len == 0) return error.MissingDebugInfo;
},
.oso_close => {},
else => return error.InvalidDebugInfo, // corrupted STAB entries in symtab
}
const symbols_slice = try symbols.toOwnedSlice(gpa);
errdefer gpa.free(symbols_slice);
// Even though lld emits symbols in ascending order, this debug code
// should work for programs linked in any valid way.
// This sort is so that we can binary search later.
mem.sort(MachoSymbol, symbols_slice, {}, MachoSymbol.addressLessThan);
return .{
.mapped_memory = all_mapped_memory,
.symbols = symbols_slice,
.strings = strings,
.vaddr_offset = module.text_base - text_vmaddr,
};
}
};
const OFile = struct {
mapped_memory: []align(std.heap.page_size_min) const u8,
dwarf: Dwarf,
strtab: []const u8,
symtab: []align(1) const macho.nlist_64,
/// All named symbols in `symtab`. Stored `u32` key is the index into `symtab`. Accessed
/// through `SymbolAdapter`, so that the symbol name is used as the logical key.
symbols_by_name: std.ArrayHashMapUnmanaged(u32, void, void, true),
const SymbolAdapter = struct {
strtab: []const u8,
symtab: []align(1) const macho.nlist_64,
pub fn hash(ctx: SymbolAdapter, sym_name: []const u8) u32 {
_ = ctx;
return @truncate(std.hash.Wyhash.hash(0, sym_name));
}
pub fn eql(ctx: SymbolAdapter, a_sym_name: []const u8, b_sym_index: u32, b_index: usize) bool {
_ = b_index;
const b_sym = ctx.symtab[b_sym_index];
const b_sym_name = std.mem.sliceTo(ctx.strtab[b_sym.n_strx..], 0);
return mem.eql(u8, a_sym_name, b_sym_name);
}
};
}; };
const MachoSymbol = struct { const MachoSymbol = struct {
@ -880,101 +633,12 @@ fn mapDebugInfoFile(path: []const u8) ![]align(std.heap.page_size_min) const u8
}; };
} }
fn loadOFile(gpa: Allocator, o_file_path: []const u8) !OFile {
const mapped_mem = try mapDebugInfoFile(o_file_path);
errdefer posix.munmap(mapped_mem);
if (mapped_mem.len < @sizeOf(macho.mach_header_64)) return error.InvalidDebugInfo;
const hdr: *const macho.mach_header_64 = @ptrCast(@alignCast(mapped_mem.ptr));
if (hdr.magic != std.macho.MH_MAGIC_64) return error.InvalidDebugInfo;
const seg_cmd: macho.LoadCommandIterator.LoadCommand, const symtab_cmd: macho.symtab_command = cmds: {
var seg_cmd: ?macho.LoadCommandIterator.LoadCommand = null;
var symtab_cmd: ?macho.symtab_command = null;
var it: macho.LoadCommandIterator = .{
.ncmds = hdr.ncmds,
.buffer = mapped_mem[@sizeOf(macho.mach_header_64)..][0..hdr.sizeofcmds],
};
while (it.next()) |cmd| switch (cmd.cmd()) {
.SEGMENT_64 => seg_cmd = cmd,
.SYMTAB => symtab_cmd = cmd.cast(macho.symtab_command) orelse return error.InvalidDebugInfo,
else => {},
};
break :cmds .{
seg_cmd orelse return error.MissingDebugInfo,
symtab_cmd orelse return error.MissingDebugInfo,
};
};
if (mapped_mem.len < symtab_cmd.stroff + symtab_cmd.strsize) return error.InvalidDebugInfo;
if (mapped_mem[symtab_cmd.stroff + symtab_cmd.strsize - 1] != 0) return error.InvalidDebugInfo;
const strtab = mapped_mem[symtab_cmd.stroff..][0 .. symtab_cmd.strsize - 1];
const n_sym_bytes = symtab_cmd.nsyms * @sizeOf(macho.nlist_64);
if (mapped_mem.len < symtab_cmd.symoff + n_sym_bytes) return error.InvalidDebugInfo;
const symtab: []align(1) const macho.nlist_64 = @ptrCast(mapped_mem[symtab_cmd.symoff..][0..n_sym_bytes]);
// TODO handle tentative (common) symbols
var symbols_by_name: std.ArrayHashMapUnmanaged(u32, void, void, true) = .empty;
defer symbols_by_name.deinit(gpa);
try symbols_by_name.ensureUnusedCapacity(gpa, @intCast(symtab.len));
for (symtab, 0..) |sym, sym_index| {
if (sym.n_strx == 0) continue;
switch (sym.n_type.bits.type) {
.undf => continue, // includes tentative symbols
.abs => continue,
else => {},
}
const sym_name = mem.sliceTo(strtab[sym.n_strx..], 0);
const gop = symbols_by_name.getOrPutAssumeCapacityAdapted(
@as([]const u8, sym_name),
@as(OFile.SymbolAdapter, .{ .strtab = strtab, .symtab = symtab }),
);
if (gop.found_existing) return error.InvalidDebugInfo;
gop.key_ptr.* = @intCast(sym_index);
}
var sections: Dwarf.SectionArray = @splat(null);
for (seg_cmd.getSections()) |sect| {
if (!std.mem.eql(u8, "__DWARF", sect.segName())) continue;
const section_index: usize = inline for (@typeInfo(Dwarf.Section.Id).@"enum".fields, 0..) |section, i| {
if (mem.eql(u8, "__" ++ section.name, sect.sectName())) break i;
} else continue;
if (mapped_mem.len < sect.offset + sect.size) return error.InvalidDebugInfo;
const section_bytes = mapped_mem[sect.offset..][0..sect.size];
sections[section_index] = .{
.data = section_bytes,
.owned = false,
};
}
const missing_debug_info =
sections[@intFromEnum(Dwarf.Section.Id.debug_info)] == null or
sections[@intFromEnum(Dwarf.Section.Id.debug_abbrev)] == null or
sections[@intFromEnum(Dwarf.Section.Id.debug_str)] == null or
sections[@intFromEnum(Dwarf.Section.Id.debug_line)] == null;
if (missing_debug_info) return error.MissingDebugInfo;
var dwarf: Dwarf = .{ .sections = sections };
errdefer dwarf.deinit(gpa);
try dwarf.open(gpa, native_endian);
return .{
.mapped_memory = mapped_mem,
.dwarf = dwarf,
.strtab = strtab,
.symtab = symtab,
.symbols_by_name = symbols_by_name.move(),
};
}
const std = @import("std"); const std = @import("std");
const Io = std.Io; const Io = std.Io;
const Allocator = std.mem.Allocator; const Allocator = std.mem.Allocator;
const Dwarf = std.debug.Dwarf; const Dwarf = std.debug.Dwarf;
const Error = std.debug.SelfInfoError; const Error = std.debug.SelfInfoError;
const MachOFile = std.debug.MachOFile;
const assert = std.debug.assert; const assert = std.debug.assert;
const posix = std.posix; const posix = std.posix;
const macho = std.macho; const macho = std.macho;

View File

@ -33,6 +33,12 @@ pub fn getModuleName(si: *SelfInfo, gpa: Allocator, address: usize) Error![]cons
const module = try si.findModule(gpa, address); const module = try si.findModule(gpa, address);
return module.name; return module.name;
} }
pub fn getModuleSlide(si: *SelfInfo, gpa: Allocator, address: usize) Error!usize {
si.mutex.lock();
defer si.mutex.unlock();
const module = try si.findModule(gpa, address);
return module.base_address;
}
pub const can_unwind: bool = switch (builtin.cpu.arch) { pub const can_unwind: bool = switch (builtin.cpu.arch) {
else => true, else => true,

View File

@ -962,6 +962,7 @@ pub const BodyWriter = struct {
// have to flush the chunk header before knowing the chunk length. // have to flush the chunk header before knowing the chunk length.
return error.Unimplemented; return error.Unimplemented;
}; };
if (data_len == 0) return error.EndOfStream;
const out = bw.http_protocol_output; const out = bw.http_protocol_output;
l: switch (bw.state.chunk_len) { l: switch (bw.state.chunk_len) {
0 => { 0 => {
@ -975,8 +976,7 @@ pub const BodyWriter = struct {
2 => { 2 => {
try out.writeAll("\r\n"); try out.writeAll("\r\n");
bw.state.chunk_len = 0; bw.state.chunk_len = 0;
assert(file_reader.atEnd()); continue :l 0;
return error.EndOfStream;
}, },
else => { else => {
const chunk_limit: std.Io.Limit = .limited(bw.state.chunk_len - 2); const chunk_limit: std.Io.Limit = .limited(bw.state.chunk_len - 2);
@ -985,8 +985,7 @@ pub const BodyWriter = struct {
else else
try out.write(chunk_limit.slice(w.buffered())); try out.write(chunk_limit.slice(w.buffered()));
bw.state.chunk_len -= n; bw.state.chunk_len -= n;
const ret = w.consume(n); return w.consume(n);
return ret;
}, },
} }
} }

View File

@ -1902,74 +1902,76 @@ pub const data_in_code_entry = extern struct {
}; };
pub const LoadCommandIterator = struct { pub const LoadCommandIterator = struct {
next_index: usize,
ncmds: usize, ncmds: usize,
buffer: []const u8, r: std.Io.Reader,
index: usize = 0,
pub const LoadCommand = struct { pub const LoadCommand = struct {
hdr: load_command, hdr: load_command,
data: []const u8, data: []const u8,
pub fn cmd(lc: LoadCommand) LC {
return lc.hdr.cmd;
}
pub fn cmdsize(lc: LoadCommand) u32 {
return lc.hdr.cmdsize;
}
pub fn cast(lc: LoadCommand, comptime Cmd: type) ?Cmd { pub fn cast(lc: LoadCommand, comptime Cmd: type) ?Cmd {
if (lc.data.len < @sizeOf(Cmd)) return null; if (lc.data.len < @sizeOf(Cmd)) return null;
return @as(*align(1) const Cmd, @ptrCast(lc.data.ptr)).*; const ptr: *align(1) const Cmd = @ptrCast(lc.data.ptr);
var cmd = ptr.*;
if (builtin.cpu.arch.endian() != .little) std.mem.byteSwapAllFields(Cmd, &cmd);
return cmd;
} }
/// Asserts LoadCommand is of type segment_command_64. /// Asserts LoadCommand is of type segment_command_64.
/// If the native endian is not `.little`, the `section_64` values must be byte-swapped by the caller.
pub fn getSections(lc: LoadCommand) []align(1) const section_64 { pub fn getSections(lc: LoadCommand) []align(1) const section_64 {
const segment_lc = lc.cast(segment_command_64).?; const segment_lc = lc.cast(segment_command_64).?;
if (segment_lc.nsects == 0) return &[0]section_64{}; const sects_ptr: [*]align(1) const section_64 = @ptrCast(lc.data[@sizeOf(segment_command_64)..]);
const data = lc.data[@sizeOf(segment_command_64)..]; return sects_ptr[0..segment_lc.nsects];
const sections = @as([*]align(1) const section_64, @ptrCast(data.ptr))[0..segment_lc.nsects];
return sections;
} }
/// Asserts LoadCommand is of type dylib_command. /// Asserts LoadCommand is of type dylib_command.
pub fn getDylibPathName(lc: LoadCommand) []const u8 { pub fn getDylibPathName(lc: LoadCommand) []const u8 {
const dylib_lc = lc.cast(dylib_command).?; const dylib_lc = lc.cast(dylib_command).?;
const data = lc.data[dylib_lc.dylib.name..]; return mem.sliceTo(lc.data[dylib_lc.dylib.name..], 0);
return mem.sliceTo(data, 0);
} }
/// Asserts LoadCommand is of type rpath_command. /// Asserts LoadCommand is of type rpath_command.
pub fn getRpathPathName(lc: LoadCommand) []const u8 { pub fn getRpathPathName(lc: LoadCommand) []const u8 {
const rpath_lc = lc.cast(rpath_command).?; const rpath_lc = lc.cast(rpath_command).?;
const data = lc.data[rpath_lc.path..]; return mem.sliceTo(lc.data[rpath_lc.path..], 0);
return mem.sliceTo(data, 0);
} }
/// Asserts LoadCommand is of type build_version_command. /// Asserts LoadCommand is of type build_version_command.
/// If the native endian is not `.little`, the `build_tool_version` values must be byte-swapped by the caller.
pub fn getBuildVersionTools(lc: LoadCommand) []align(1) const build_tool_version { pub fn getBuildVersionTools(lc: LoadCommand) []align(1) const build_tool_version {
const build_lc = lc.cast(build_version_command).?; const build_lc = lc.cast(build_version_command).?;
const ntools = build_lc.ntools; const tools_ptr: [*]align(1) const build_tool_version = @ptrCast(lc.data[@sizeOf(build_version_command)..]);
if (ntools == 0) return &[0]build_tool_version{}; return tools_ptr[0..build_lc.ntools];
const data = lc.data[@sizeOf(build_version_command)..];
const tools = @as([*]align(1) const build_tool_version, @ptrCast(data.ptr))[0..ntools];
return tools;
} }
}; };
pub fn next(it: *LoadCommandIterator) ?LoadCommand { pub fn next(it: *LoadCommandIterator) error{InvalidMachO}!?LoadCommand {
if (it.index >= it.ncmds) return null; if (it.next_index >= it.ncmds) return null;
const hdr = @as(*align(1) const load_command, @ptrCast(it.buffer.ptr)).*; const hdr = it.r.peekStruct(load_command, .little) catch |err| switch (err) {
const cmd = LoadCommand{ error.ReadFailed => unreachable,
.hdr = hdr, error.EndOfStream => return error.InvalidMachO,
.data = it.buffer[0..hdr.cmdsize], };
const data = it.r.take(hdr.cmdsize) catch |err| switch (err) {
error.ReadFailed => unreachable,
error.EndOfStream => return error.InvalidMachO,
}; };
it.buffer = it.buffer[hdr.cmdsize..]; it.next_index += 1;
it.index += 1; return .{ .hdr = hdr, .data = data };
}
return cmd; pub fn init(hdr: *const mach_header_64, cmds_buf_overlong: []const u8) error{InvalidMachO}!LoadCommandIterator {
if (cmds_buf_overlong.len < hdr.sizeofcmds) return error.InvalidMachO;
if (hdr.ncmds > 0 and hdr.sizeofcmds < @sizeOf(load_command)) return error.InvalidMachO;
const cmds_buf = cmds_buf_overlong[0..hdr.sizeofcmds];
return .{
.next_index = 0,
.ncmds = hdr.ncmds,
.r = .fixed(cmds_buf),
};
} }
}; };

View File

@ -4167,7 +4167,7 @@ pub const Platform = struct {
/// Using Apple's ld64 as our blueprint, `min_version` as well as `sdk_version` are set to /// Using Apple's ld64 as our blueprint, `min_version` as well as `sdk_version` are set to
/// the extracted minimum platform version. /// the extracted minimum platform version.
pub fn fromLoadCommand(lc: macho.LoadCommandIterator.LoadCommand) Platform { pub fn fromLoadCommand(lc: macho.LoadCommandIterator.LoadCommand) Platform {
switch (lc.cmd()) { switch (lc.hdr.cmd) {
.BUILD_VERSION => { .BUILD_VERSION => {
const cmd = lc.cast(macho.build_version_command).?; const cmd = lc.cast(macho.build_version_command).?;
return .{ return .{
@ -4200,7 +4200,7 @@ pub const Platform = struct {
// We can't distinguish Mac Catalyst here, but this is legacy stuff anyway. // We can't distinguish Mac Catalyst here, but this is legacy stuff anyway.
const cmd = lc.cast(macho.version_min_command).?; const cmd = lc.cast(macho.version_min_command).?;
return .{ return .{
.os_tag = switch (lc.cmd()) { .os_tag = switch (lc.hdr.cmd) {
.VERSION_MIN_IPHONEOS => .ios, .VERSION_MIN_IPHONEOS => .ios,
.VERSION_MIN_MACOSX => .macos, .VERSION_MIN_MACOSX => .macos,
.VERSION_MIN_TVOS => .tvos, .VERSION_MIN_TVOS => .tvos,

View File

@ -90,11 +90,8 @@ fn parseBinary(self: *Dylib, macho_file: *MachO) !void {
if (amt != lc_buffer.len) return error.InputOutput; if (amt != lc_buffer.len) return error.InputOutput;
} }
var it = LoadCommandIterator{ var it = LoadCommandIterator.init(&header, lc_buffer) catch |err| std.debug.panic("bad dylib: {t}", .{err});
.ncmds = header.ncmds, while (it.next() catch |err| std.debug.panic("bad dylib: {t}", .{err})) |cmd| switch (cmd.hdr.cmd) {
.buffer = lc_buffer,
};
while (it.next()) |cmd| switch (cmd.cmd()) {
.ID_DYLIB => { .ID_DYLIB => {
self.id = try Id.fromLoadCommand(gpa, cmd.cast(macho.dylib_command).?, cmd.getDylibPathName()); self.id = try Id.fromLoadCommand(gpa, cmd.cast(macho.dylib_command).?, cmd.getDylibPathName());
}, },

View File

@ -109,11 +109,8 @@ pub fn parse(self: *Object, macho_file: *MachO) !void {
if (amt != self.header.?.sizeofcmds) return error.InputOutput; if (amt != self.header.?.sizeofcmds) return error.InputOutput;
} }
var it = LoadCommandIterator{ var it = LoadCommandIterator.init(&self.header.?, lc_buffer) catch |err| std.debug.panic("bad object: {t}", .{err});
.ncmds = self.header.?.ncmds, while (it.next() catch |err| std.debug.panic("bad object: {t}", .{err})) |lc| switch (lc.hdr.cmd) {
.buffer = lc_buffer,
};
while (it.next()) |lc| switch (lc.cmd()) {
.SEGMENT_64 => { .SEGMENT_64 => {
const sections = lc.getSections(); const sections = lc.getSections();
try self.sections.ensureUnusedCapacity(gpa, sections.len); try self.sections.ensureUnusedCapacity(gpa, sections.len);
@ -1644,11 +1641,8 @@ pub fn parseAr(self: *Object, macho_file: *MachO) !void {
if (amt != self.header.?.sizeofcmds) return error.InputOutput; if (amt != self.header.?.sizeofcmds) return error.InputOutput;
} }
var it = LoadCommandIterator{ var it = LoadCommandIterator.init(&self.header.?, lc_buffer) catch |err| std.debug.panic("bad object: {t}", .{err});
.ncmds = self.header.?.ncmds, while (it.next() catch |err| std.debug.panic("bad object: {t}", .{err})) |lc| switch (lc.hdr.cmd) {
.buffer = lc_buffer,
};
while (it.next()) |lc| switch (lc.cmd()) {
.SYMTAB => { .SYMTAB => {
const cmd = lc.cast(macho.symtab_command).?; const cmd = lc.cast(macho.symtab_command).?;
try self.strtab.resize(gpa, cmd.strsize); try self.strtab.resize(gpa, cmd.strsize);

View File

@ -8,31 +8,50 @@ const assert = std.debug.assert;
const SeenPcsHeader = std.Build.abi.fuzz.SeenPcsHeader; const SeenPcsHeader = std.Build.abi.fuzz.SeenPcsHeader;
pub fn main() !void { pub fn main() !void {
var general_purpose_allocator: std.heap.GeneralPurposeAllocator(.{}) = .init; var debug_allocator: std.heap.DebugAllocator(.{}) = .init;
defer _ = general_purpose_allocator.deinit(); defer _ = debug_allocator.deinit();
const gpa = general_purpose_allocator.allocator(); const gpa = debug_allocator.allocator();
var arena_instance = std.heap.ArenaAllocator.init(gpa); var arena_instance: std.heap.ArenaAllocator = .init(gpa);
defer arena_instance.deinit(); defer arena_instance.deinit();
const arena = arena_instance.allocator(); const arena = arena_instance.allocator();
var threaded: std.Io.Threaded = .init(gpa);
defer threaded.deinit();
const io = threaded.io();
const args = try std.process.argsAlloc(arena); const args = try std.process.argsAlloc(arena);
const target_query_str = switch (args.len) {
3 => "native",
4 => args[3],
else => return fatal(
\\usage: {0s} path/to/exe path/to/coverage [target]
\\ if omitted, 'target' defaults to 'native'
\\ example: {0s} zig-out/test .zig-cache/v/xxxxxxxx x86_64-linux
, .{if (args.len == 0) "dump-cov" else args[0]}),
};
const target = std.zig.resolveTargetQueryOrFatal(io, try .parse(.{
.arch_os_abi = target_query_str,
}));
const exe_file_name = args[1]; const exe_file_name = args[1];
const cov_file_name = args[2]; const cov_file_name = args[2];
const exe_path: Path = .{ const exe_path: Path = .{
.root_dir = std.Build.Cache.Directory.cwd(), .root_dir = .cwd(),
.sub_path = exe_file_name, .sub_path = exe_file_name,
}; };
const cov_path: Path = .{ const cov_path: Path = .{
.root_dir = std.Build.Cache.Directory.cwd(), .root_dir = .cwd(),
.sub_path = cov_file_name, .sub_path = cov_file_name,
}; };
var coverage = std.debug.Coverage.init; var coverage: std.debug.Coverage = .init;
defer coverage.deinit(gpa); defer coverage.deinit(gpa);
var debug_info = std.debug.Info.load(gpa, exe_path, &coverage) catch |err| { var debug_info = std.debug.Info.load(gpa, exe_path, &coverage, target.ofmt, target.cpu.arch) catch |err| {
fatal("failed to load debug info for {f}: {s}", .{ exe_path, @errorName(err) }); fatal("failed to load debug info for {f}: {s}", .{ exe_path, @errorName(err) });
}; };
defer debug_info.deinit(gpa); defer debug_info.deinit(gpa);