From ba3f38959a31ace9af1816f16cda6c0717518b7f Mon Sep 17 00:00:00 2001 From: mlugg Date: Tue, 2 Sep 2025 18:36:05 +0100 Subject: [PATCH] split SelfInfo into a file per impl --- lib/std/debug/Dwarf/abi.zig | 2 +- lib/std/debug/SelfInfo.zig | 1619 +++------------------- lib/std/debug/SelfInfo/DarwinModule.zig | 801 +++++++++++ lib/std/debug/SelfInfo/ElfModule.zig | 144 ++ lib/std/debug/SelfInfo/WindowsModule.zig | 255 ++++ 5 files changed, 1430 insertions(+), 1391 deletions(-) create mode 100644 lib/std/debug/SelfInfo/DarwinModule.zig create mode 100644 lib/std/debug/SelfInfo/ElfModule.zig create mode 100644 lib/std/debug/SelfInfo/WindowsModule.zig diff --git a/lib/std/debug/Dwarf/abi.zig b/lib/std/debug/Dwarf/abi.zig index 79f7e87217..9fca02e382 100644 --- a/lib/std/debug/Dwarf/abi.zig +++ b/lib/std/debug/Dwarf/abi.zig @@ -7,7 +7,7 @@ const Arch = std.Target.Cpu.Arch; /// Tells whether unwinding for this target is supported by the Dwarf standard. /// -/// See also `std.debug.SelfInfo.supportsUnwinding` which tells whether the Zig +/// See also `std.debug.SelfInfo.supports_unwinding` which tells whether the Zig /// standard library has a working implementation of unwinding for this target. pub fn supportsUnwinding(target: *const std.Target) bool { return switch (target.cpu.arch) { diff --git a/lib/std/debug/SelfInfo.zig b/lib/std/debug/SelfInfo.zig index bb4cb6c7ca..32382ac1e1 100644 --- a/lib/std/debug/SelfInfo.zig +++ b/lib/std/debug/SelfInfo.zig @@ -11,18 +11,8 @@ const native_arch = builtin.cpu.arch; const std = @import("../std.zig"); const mem = std.mem; const Allocator = std.mem.Allocator; -const windows = std.os.windows; -const macho = std.macho; -const fs = std.fs; -const coff = std.coff; const assert = std.debug.assert; -const posix = std.posix; -const elf = std.elf; const Dwarf = std.debug.Dwarf; -const Pdb = std.debug.Pdb; -const File = std.fs.File; -const math = std.math; -const testing = std.testing; const regBytes = Dwarf.abi.regBytes; const regValueNative = Dwarf.abi.regValueNative; @@ -31,6 +21,7 @@ const SelfInfo = @This(); modules: std.AutoHashMapUnmanaged(usize, Module.DebugInfo), lookup_cache: Module.LookupCache, +/// Indicates whether the `SelfInfo` implementation has support for this target. pub const target_supported: bool = switch (native_os) { .linux, .freebsd, @@ -45,9 +36,39 @@ pub const target_supported: bool = switch (native_os) { else => false, }; +/// Indicates whether unwinding for the host is *implemented* here in the Zig +/// standard library. +/// +/// See also `Dwarf.abi.supportsUnwinding` which tells whether Dwarf supports +/// unwinding on a target *in theory*. +pub const supports_unwinding: bool = switch (builtin.target.cpu.arch) { + .x86 => switch (builtin.target.os.tag) { + .linux, .netbsd, .solaris, .illumos => true, + else => false, + }, + .x86_64 => switch (builtin.target.os.tag) { + .linux, .netbsd, .freebsd, .openbsd, .macos, .ios, .solaris, .illumos => true, + else => false, + }, + .arm, .armeb, .thumb, .thumbeb => switch (builtin.target.os.tag) { + .linux => true, + else => false, + }, + .aarch64, .aarch64_be => switch (builtin.target.os.tag) { + .linux, .netbsd, .freebsd, .macos, .ios => true, + else => false, + }, + // Unwinding is possible on other targets but this implementation does + // not support them...yet! + else => false, +}; +comptime { + if (supports_unwinding) assert(Dwarf.abi.supportsUnwinding(&builtin.target)); +} + pub const init: SelfInfo = .{ .modules = .empty, - .lookup_cache = if (Module.LookupCache != void) .init, + .lookup_cache = .init, }; pub fn deinit(self: *SelfInfo) void { @@ -59,19 +80,14 @@ pub fn deinit(self: *SelfInfo) void { self.allocator.destroy(mdi); } self.modules.deinit(self.allocator); - if (native_os == .windows) { - for (self.modules.items) |module| { - self.allocator.free(module.name); - if (module.mapped_file) |mapped_file| mapped_file.deinit(); - } - self.modules.deinit(self.allocator); - } } pub fn unwindFrame(self: *SelfInfo, gpa: Allocator, context: *UnwindContext) !usize { - comptime assert(target_supported); + comptime assert(supports_unwinding); const module: Module = try .lookup(&self.lookup_cache, gpa, context.pc); const gop = try self.modules.getOrPut(gpa, module.load_offset); + self.modules.lockPointers(); + defer self.modules.unlockPointers(); if (!gop.found_existing) gop.value_ptr.* = .init; return module.unwindFrame(gpa, gop.value_ptr, context); } @@ -80,417 +96,39 @@ pub fn getSymbolAtAddress(self: *SelfInfo, gpa: Allocator, address: usize) !std. comptime assert(target_supported); const module: Module = try .lookup(&self.lookup_cache, gpa, address); const gop = try self.modules.getOrPut(gpa, module.key()); + self.modules.lockPointers(); + defer self.modules.unlockPointers(); if (!gop.found_existing) gop.value_ptr.* = .init; return module.getSymbolAtAddress(gpa, gop.value_ptr, address); } -/// Returns the module name for a given address. -/// This can be called when getModuleForAddress fails, so implementations should provide -/// a path that doesn't rely on any side-effects of a prior successful module lookup. pub fn getModuleNameForAddress(self: *SelfInfo, gpa: Allocator, address: usize) error{ Unexpected, OutOfMemory, MissingDebugInfo }![]const u8 { comptime assert(target_supported); const module: Module = try .lookup(&self.lookup_cache, gpa, address); return module.name; } +/// This type contains the target-specific implementation. It must expose the following declarations: +/// +/// * `LookupCache: type` +/// * `LookupCache.init: LookupCache` +/// * `lookup: fn (*LookupCache, Allocator, address: usize) !Module` +/// * `key: fn (*const Module) usize` +/// * `DebugInfo: type` +/// * `DebugInfo.init: DebugInfo` +/// * `getSymbolAtAddress: fn (*const Module, Allocator, *DebugInfo, address: usize) !std.debug.Symbol` +/// +/// If unwinding is supported on this target, it must additionally expose the following declarations: +/// +/// * `unwindFrame: fn (*const Module, Allocator, *DebugInfo, *UnwindContext) !usize` const Module = switch (native_os) { else => {}, // Dwarf, // TODO MLUGG: it's this on master but that's definitely broken atm... - .macos, .ios, .watchos, .tvos, .visionos => struct { - /// The runtime address where __TEXT is loaded. - text_base: usize, - load_offset: usize, - name: []const u8, - fn key(m: *const Module) usize { - return m.text_base; - } - fn lookup(cache: *LookupCache, gpa: Allocator, address: usize) !Module { - _ = cache; - _ = gpa; - const image_count = std.c._dyld_image_count(); - for (0..image_count) |image_idx| { - const header = std.c._dyld_get_image_header(@intCast(image_idx)) orelse continue; - const text_base = @intFromPtr(header); - if (address < text_base) continue; - const load_offset = std.c._dyld_get_image_vmaddr_slide(@intCast(image_idx)); - - // Find the __TEXT segment - var it: macho.LoadCommandIterator = .{ - .ncmds = header.ncmds, - .buffer = @as([*]u8, @ptrCast(header))[@sizeOf(macho.mach_header_64)..][0..header.sizeofcmds], - }; - const text_segment_cmd = while (it.next()) |load_cmd| { - if (load_cmd.cmd() != .SEGMENT_64) continue; - const segment_cmd = load_cmd.cast(macho.segment_command_64).?; - if (!mem.eql(u8, segment_cmd.segName(), "__TEXT")) continue; - break segment_cmd; - } else continue; - - const seg_start = load_offset + text_segment_cmd.vmaddr; - assert(seg_start == text_base); - const seg_end = seg_start + text_segment_cmd.vmsize; - if (address < seg_start or address >= seg_end) continue; - - // We've found the matching __TEXT segment. This is the image we need. - return .{ - .text_base = text_base, - .load_offset = load_offset, - .name = mem.span(std.c._dyld_get_image_name(@intCast(image_idx))), - }; - } - return error.MissingDebugInfo; - } - fn loadLocationInfo(module: *const Module, gpa: Allocator, di: *Module.DebugInfo) !void { - const mapped_mem = try mapDebugInfoFile(module.name); - errdefer posix.munmap(mapped_mem); - - const hdr: *const macho.mach_header_64 = @ptrCast(@alignCast(mapped_mem.ptr)); - if (hdr.magic != macho.MH_MAGIC_64) - return error.InvalidDebugInfo; - - const symtab: macho.symtab_command = symtab: { - var it: macho.LoadCommandIterator = .{ - .ncmds = hdr.ncmds, - .buffer = mapped_mem[@sizeOf(macho.mach_header_64)..][0..hdr.sizeofcmds], - }; - while (it.next()) |cmd| switch (cmd.cmd()) { - .SYMTAB => break :symtab cmd.cast(macho.symtab_command) orelse return error.InvalidDebugInfo, - else => {}, - }; - return error.MissingDebugInfo; - }; - - const syms_ptr: [*]align(1) const macho.nlist_64 = @ptrCast(mapped_mem[symtab.symoff..]); - const syms = syms_ptr[0..symtab.nsyms]; - const strings = mapped_mem[symtab.stroff..][0 .. symtab.strsize - 1 :0]; - - var symbols: std.ArrayList(MachoSymbol) = try .initCapacity(gpa, syms.len); - defer symbols.deinit(gpa); - - var ofile: u32 = undefined; - var last_sym: MachoSymbol = undefined; - var state: enum { - init, - oso_open, - oso_close, - bnsym, - fun_strx, - fun_size, - ensym, - } = .init; - - for (syms) |*sym| { - if (sym.n_type.bits.is_stab == 0) continue; - - // TODO handle globals N_GSYM, and statics N_STSYM - switch (sym.n_type.stab) { - .oso => switch (state) { - .init, .oso_close => { - state = .oso_open; - ofile = sym.n_strx; - }, - else => return error.InvalidDebugInfo, - }, - .bnsym => switch (state) { - .oso_open, .ensym => { - state = .bnsym; - last_sym = .{ - .strx = 0, - .addr = sym.n_value, - .size = 0, - .ofile = ofile, - }; - }, - else => return error.InvalidDebugInfo, - }, - .fun => switch (state) { - .bnsym => { - state = .fun_strx; - last_sym.strx = sym.n_strx; - }, - .fun_strx => { - state = .fun_size; - last_sym.size = @intCast(sym.n_value); - }, - else => return error.InvalidDebugInfo, - }, - .ensym => switch (state) { - .fun_size => { - state = .ensym; - symbols.appendAssumeCapacity(last_sym); - }, - else => return error.InvalidDebugInfo, - }, - .so => switch (state) { - .init, .oso_close => {}, - .oso_open, .ensym => { - state = .oso_close; - }, - else => return error.InvalidDebugInfo, - }, - else => {}, - } - } - - switch (state) { - .init => return error.MissingDebugInfo, - .oso_close => {}, - else => return error.InvalidDebugInfo, - } - - const symbols_slice = try symbols.toOwnedSlice(gpa); - errdefer gpa.free(symbols_slice); - - // Even though lld emits symbols in ascending order, this debug code - // should work for programs linked in any valid way. - // This sort is so that we can binary search later. - mem.sort(MachoSymbol, symbols_slice, {}, MachoSymbol.addressLessThan); - - di.full = .{ - .mapped_memory = mapped_mem, - .symbols = symbols_slice, - .strings = strings, - .ofiles = .empty, - }; - } - fn loadUnwindInfo(module: *const Module, gpa: Allocator, di: *Module.DebugInfo) !void { - _ = gpa; - - const header: *std.macho.mach_header = @ptrFromInt(module.text_base); - - var it: macho.LoadCommandIterator = .{ - .ncmds = header.ncmds, - .buffer = @as([*]u8, @ptrCast(header))[@sizeOf(macho.mach_header_64)..][0..header.sizeofcmds], - }; - const sections = while (it.next()) |load_cmd| { - if (load_cmd.cmd() != .SEGMENT_64) continue; - const segment_cmd = load_cmd.cast(macho.segment_command_64).?; - if (!mem.eql(u8, segment_cmd.segName(), "__TEXT")) continue; - break load_cmd.getSections(); - } else unreachable; - - var unwind_info: ?[]const u8 = null; - var eh_frame: ?[]const u8 = null; - for (sections) |sect| { - if (mem.eql(u8, sect.sectName(), "__unwind_info")) { - const sect_ptr: [*]u8 = @ptrFromInt(@as(usize, @intCast(module.load_offset + sect.addr))); - unwind_info = sect_ptr[0..@intCast(sect.size)]; - } else if (mem.eql(u8, sect.sectName(), "__eh_frame")) { - const sect_ptr: [*]u8 = @ptrFromInt(@as(usize, @intCast(module.load_offset + sect.addr))); - eh_frame = sect_ptr[0..@intCast(sect.size)]; - } - } - di.unwind = .{ - .unwind_info = unwind_info, - .eh_frame = eh_frame, - }; - } - fn getSymbolAtAddress(module: *const Module, gpa: Allocator, di: *DebugInfo, address: usize) !std.debug.Symbol { - if (di.full == null) try module.loadLocationInfo(gpa, di); - const vaddr = address - module.load_offset; - const symbol = MachoSymbol.find(di.full.?.symbols, vaddr) orelse return .{ - .name = null, - .compile_unit_name = null, - .source_location = null, - }; - - // offset of `address` from start of `symbol` - const address_symbol_offset = vaddr - symbol.addr; - - // Take the symbol name from the N_FUN STAB entry, we're going to - // use it if we fail to find the DWARF infos - const stab_symbol = mem.sliceTo(di.full.?.strings[symbol.strx..], 0); - const o_file_path = mem.sliceTo(di.full.?.strings[symbol.ofile..], 0); - - // If any information is missing, we can at least return this from now on. - const sym_only_result: std.debug.Symbol = .{ - .name = stab_symbol, - .compile_unit_name = null, - .source_location = null, - }; - - const o_file: *DebugInfo.OFile = of: { - const gop = try di.full.?.ofiles.getOrPut(gpa, o_file_path); - if (!gop.found_existing) { - gop.value_ptr.* = DebugInfo.loadOFile(gpa, o_file_path) catch |err| { - defer _ = di.full.?.ofiles.pop().?; - switch (err) { - error.MissingDebugInfo, - error.InvalidDebugInfo, - => return sym_only_result, - else => |e| return e, - } - }; - } - break :of gop.value_ptr; - }; - - const symbol_ofile_vaddr = o_file.addr_table.get(stab_symbol) orelse return sym_only_result; - - const compile_unit = o_file.dwarf.findCompileUnit(native_endian, symbol_ofile_vaddr) catch |err| switch (err) { - error.MissingDebugInfo, error.InvalidDebugInfo => return sym_only_result, - else => |e| return e, - }; - - return .{ - .name = o_file.dwarf.getSymbolName(symbol_ofile_vaddr) orelse stab_symbol, - .compile_unit_name = compile_unit.die.getAttrString( - &o_file.dwarf, - native_endian, - std.dwarf.AT.name, - o_file.dwarf.section(.debug_str), - compile_unit, - ) catch |err| switch (err) { - error.MissingDebugInfo, error.InvalidDebugInfo => null, - }, - .source_location = o_file.dwarf.getLineNumberInfo( - gpa, - native_endian, - compile_unit, - symbol_ofile_vaddr + address_symbol_offset, - ) catch |err| switch (err) { - error.MissingDebugInfo, error.InvalidDebugInfo => null, - else => return err, - }, - }; - } - fn unwindFrame(module: *const Module, gpa: Allocator, di: *DebugInfo, context: *UnwindContext) !usize { - if (di.unwind == null) try module.loadUnwindInfo(gpa, di); - const unwind_info = di.unwind.?.unwind_info orelse return error.MissingUnwindInfo; - // MLUGG TODO: inline? - return unwindFrameMachO( - module.text_base, - module.load_offset, - context, - unwind_info, - di.unwind.?.eh_frame, - ); - } - const LookupCache = void; - const DebugInfo = struct { - unwind: ?struct { - // Backed by the in-memory sections mapped by the loader - unwind_info: ?[]const u8, - eh_frame: ?[]const u8, - }, - // MLUGG TODO: awful field name - full: ?struct { - mapped_memory: []align(std.heap.page_size_min) const u8, - symbols: []const MachoSymbol, - strings: [:0]const u8, - // MLUGG TODO: this could use an adapter to just index straight into `strings`! - ofiles: std.StringArrayHashMapUnmanaged(OFile), - }, - - const init: DebugInfo = .{ - .unwind = null, - .full = null, - }; - - const OFile = struct { - dwarf: Dwarf, - // MLUGG TODO: this could use an adapter to just index straight into the strtab! - addr_table: std.StringArrayHashMapUnmanaged(u64), - }; - - fn deinit(di: *DebugInfo, gpa: Allocator) void { - for (di.full.ofiles.values()) |*ofile| { - ofile.dwarf.deinit(gpa); - ofile.addr_table.deinit(gpa); - } - di.full.ofiles.deinit(); - gpa.free(di.full.symbols); - posix.munmap(di.full.mapped_memory); - } - - fn loadOFile(gpa: Allocator, o_file_path: []const u8) !OFile { - const mapped_mem = try mapDebugInfoFile(o_file_path); - errdefer posix.munmap(mapped_mem); - - if (mapped_mem.len < @sizeOf(macho.mach_header_64)) return error.InvalidDebugInfo; - const hdr: *const macho.mach_header_64 = @ptrCast(@alignCast(mapped_mem.ptr)); - if (hdr.magic != std.macho.MH_MAGIC_64) return error.InvalidDebugInfo; - - const seg_cmd: macho.LoadCommandIterator.LoadCommand, const symtab_cmd: macho.symtab_command = cmds: { - var seg_cmd: ?macho.LoadCommandIterator.LoadCommand = null; - var symtab_cmd: ?macho.symtab_command = null; - var it: macho.LoadCommandIterator = .{ - .ncmds = hdr.ncmds, - .buffer = mapped_mem[@sizeOf(macho.mach_header_64)..][0..hdr.sizeofcmds], - }; - while (it.next()) |cmd| switch (cmd.cmd()) { - .SEGMENT_64 => seg_cmd = cmd, - .SYMTAB => symtab_cmd = cmd.cast(macho.symtab_command) orelse return error.InvalidDebugInfo, - else => {}, - }; - break :cmds .{ - seg_cmd orelse return error.MissingDebugInfo, - symtab_cmd orelse return error.MissingDebugInfo, - }; - }; - - if (mapped_mem.len < symtab_cmd.stroff + symtab_cmd.strsize) return error.InvalidDebugInfo; - if (mapped_mem[symtab_cmd.stroff + symtab_cmd.strsize - 1] != 0) return error.InvalidDebugInfo; - const strtab = mapped_mem[symtab_cmd.stroff..][0 .. symtab_cmd.strsize - 1]; - - const n_sym_bytes = symtab_cmd.nsyms * @sizeOf(macho.nlist_64); - if (mapped_mem.len < symtab_cmd.symoff + n_sym_bytes) return error.InvalidDebugInfo; - const symtab: []align(1) const macho.nlist_64 = @ptrCast(mapped_mem[symtab_cmd.symoff..][0..n_sym_bytes]); - - // TODO handle tentative (common) symbols - var addr_table: std.StringArrayHashMapUnmanaged(u64) = .empty; - defer addr_table.deinit(gpa); - try addr_table.ensureUnusedCapacity(gpa, @intCast(symtab.len)); - for (symtab) |sym| { - if (sym.n_strx == 0) continue; - switch (sym.n_type.bits.type) { - .undf => continue, // includes tentative symbols - .abs => continue, - else => {}, - } - const sym_name = mem.sliceTo(strtab[sym.n_strx..], 0); - const gop = addr_table.getOrPutAssumeCapacity(sym_name); - if (gop.found_existing) return error.InvalidDebugInfo; - gop.value_ptr.* = sym.n_value; - } - - var sections: Dwarf.SectionArray = @splat(null); - for (seg_cmd.getSections()) |sect| { - if (!std.mem.eql(u8, "__DWARF", sect.segName())) continue; - - const section_index: usize = inline for (@typeInfo(Dwarf.Section.Id).@"enum".fields, 0..) |section, i| { - if (mem.eql(u8, "__" ++ section.name, sect.sectName())) break i; - } else continue; - - if (mapped_mem.len < sect.offset + sect.size) return error.InvalidDebugInfo; - const section_bytes = mapped_mem[sect.offset..][0..sect.size]; - sections[section_index] = .{ - .data = section_bytes, - .owned = false, - }; - } - - const missing_debug_info = - sections[@intFromEnum(Dwarf.Section.Id.debug_info)] == null or - sections[@intFromEnum(Dwarf.Section.Id.debug_abbrev)] == null or - sections[@intFromEnum(Dwarf.Section.Id.debug_str)] == null or - sections[@intFromEnum(Dwarf.Section.Id.debug_line)] == null; - if (missing_debug_info) return error.MissingDebugInfo; - - var dwarf: Dwarf = .{ .sections = sections }; - errdefer dwarf.deinit(gpa); - try dwarf.open(gpa, native_endian); - - return .{ - .dwarf = dwarf, - .addr_table = addr_table.move(), - }; - } - }; - }, + .linux, .netbsd, .freebsd, .dragonfly, .openbsd, .haiku, .solaris, .illumos => @import("SelfInfo/ElfModule.zig"), + .macos, .ios, .watchos, .tvos, .visionos => @import("SelfInfo/DarwinModule.zig"), + .uefi, .windows => @import("SelfInfo/WindowsModule.zig"), .wasi, .emscripten => struct { - const LookupCache = void; - const DebugInfo = struct { - const init: DebugInfo = .{}; + const LookupCache = struct { + const init: LookupCache = .{}; }; fn lookup(cache: *LookupCache, gpa: Allocator, address: usize) !Module { _ = cache; @@ -498,6 +136,9 @@ const Module = switch (native_os) { _ = address; @panic("TODO implement lookup module for Wasm"); } + const DebugInfo = struct { + const init: DebugInfo = .{}; + }; fn getSymbolAtAddress(module: *const Module, gpa: Allocator, di: *DebugInfo, address: usize) !std.debug.Symbol { _ = module; _ = gpa; @@ -506,430 +147,9 @@ const Module = switch (native_os) { unreachable; } }, - .linux, .netbsd, .freebsd, .dragonfly, .openbsd, .haiku, .solaris, .illumos => struct { - load_offset: usize, - name: []const u8, - build_id: ?[]const u8, - gnu_eh_frame: ?[]const u8, - const LookupCache = void; - const DebugInfo = struct { - loaded_elf: ?Dwarf.ElfModule, - unwind: ?Dwarf.Unwind, - const init: DebugInfo = .{ - .loaded_elf = null, - .unwind = null, - }; - }; - fn key(m: Module) usize { - return m.load_offset; - } - fn lookup(cache: *LookupCache, gpa: Allocator, address: usize) !Module { - _ = cache; - _ = gpa; - if (native_os == .haiku) @panic("TODO implement lookup module for Haiku"); - const DlIterContext = struct { - /// input - address: usize, - /// output - module: Module, - - fn callback(info: *posix.dl_phdr_info, size: usize, context: *@This()) !void { - _ = size; - // The base address is too high - if (context.address < info.addr) - return; - - const phdrs = info.phdr[0..info.phnum]; - for (phdrs) |*phdr| { - if (phdr.p_type != elf.PT_LOAD) continue; - - // Overflowing addition is used to handle the case of VSDOs having a p_vaddr = 0xffffffffff700000 - const seg_start = info.addr +% phdr.p_vaddr; - const seg_end = seg_start + phdr.p_memsz; - if (context.address >= seg_start and context.address < seg_end) { - context.module = .{ - .load_offset = info.addr, - // Android libc uses NULL instead of "" to mark the main program - .name = mem.sliceTo(info.name, 0) orelse "", - .build_id = null, - .gnu_eh_frame = null, - }; - break; - } - } else return; - - for (info.phdr[0..info.phnum]) |phdr| { - switch (phdr.p_type) { - elf.PT_NOTE => { - // Look for .note.gnu.build-id - const segment_ptr: [*]const u8 = @ptrFromInt(info.addr + phdr.p_vaddr); - var r: std.Io.Reader = .fixed(segment_ptr[0..phdr.p_memsz]); - const name_size = r.takeInt(u32, native_endian) catch continue; - const desc_size = r.takeInt(u32, native_endian) catch continue; - const note_type = r.takeInt(u32, native_endian) catch continue; - const name = r.take(name_size) catch continue; - if (note_type != elf.NT_GNU_BUILD_ID) continue; - if (!mem.eql(u8, name, "GNU\x00")) continue; - const desc = r.take(desc_size) catch continue; - context.module.build_id = desc; - }, - elf.PT_GNU_EH_FRAME => { - const segment_ptr: [*]const u8 = @ptrFromInt(info.addr + phdr.p_vaddr); - context.module.gnu_eh_frame = segment_ptr[0..phdr.p_memsz]; - }, - else => {}, - } - } - - // Stop the iteration - return error.Found; - } - }; - var ctx: DlIterContext = .{ - .address = address, - .module = undefined, - }; - posix.dl_iterate_phdr(&ctx, error{Found}, DlIterContext.callback) catch |err| switch (err) { - error.Found => return ctx.module, - }; - return error.MissingDebugInfo; - } - fn loadLocationInfo(module: *const Module, gpa: Allocator, di: *Module.DebugInfo) !void { - if (module.name.len > 0) { - di.loaded_elf = Dwarf.ElfModule.load(gpa, .{ - .root_dir = .cwd(), - .sub_path = module.name, - }, module.build_id, null, null, null) catch |err| switch (err) { - error.FileNotFound => return error.MissingDebugInfo, - error.Overflow => return error.InvalidDebugInfo, - else => |e| return e, - }; - } else { - const path = try std.fs.selfExePathAlloc(gpa); - defer gpa.free(path); - di.loaded_elf = Dwarf.ElfModule.load(gpa, .{ - .root_dir = .cwd(), - .sub_path = path, - }, module.build_id, null, null, null) catch |err| switch (err) { - error.FileNotFound => return error.MissingDebugInfo, - error.Overflow => return error.InvalidDebugInfo, - else => |e| return e, - }; - } - } - fn getSymbolAtAddress(module: *const Module, gpa: Allocator, di: *DebugInfo, address: usize) !std.debug.Symbol { - if (di.loaded_elf == null) try module.loadLocationInfo(gpa, di); - const vaddr = address - module.load_offset; - return di.loaded_elf.?.dwarf.getSymbol(gpa, native_endian, vaddr); - } - fn loadUnwindInfo(module: *const Module, gpa: Allocator, di: *Module.DebugInfo) !void { - const section_bytes = module.gnu_eh_frame orelse return error.MissingUnwindInfo; // MLUGG TODO: load from file - const section_vaddr: u64 = @intFromPtr(section_bytes.ptr) - module.load_offset; - const header: Dwarf.Unwind.EhFrameHeader = try .parse(section_vaddr, section_bytes, @sizeOf(usize), native_endian); - di.unwind = .initEhFrameHdr(header, section_vaddr, @ptrFromInt(module.load_offset + header.eh_frame_vaddr)); - try di.unwind.?.prepareLookup(gpa, @sizeOf(usize), native_endian); - } - fn unwindFrame(module: *const Module, gpa: Allocator, di: *DebugInfo, context: *UnwindContext) !usize { - if (di.unwind == null) try module.loadUnwindInfo(gpa, di); - return unwindFrameDwarf(&di.unwind.?, module.load_offset, context, null); - } - }, - .uefi, .windows => struct { - base_address: usize, - size: usize, - name: []const u8, - handle: windows.HMODULE, - fn key(m: Module) usize { - return m.base_address; - } - fn lookup(cache: *LookupCache, gpa: Allocator, address: usize) !Module { - if (lookupInCache(cache, address)) |m| return m; - { - // Check a new module hasn't been loaded - cache.modules.clearRetainingCapacity(); - - const handle = windows.kernel32.CreateToolhelp32Snapshot(windows.TH32CS_SNAPMODULE | windows.TH32CS_SNAPMODULE32, 0); - if (handle == windows.INVALID_HANDLE_VALUE) { - return windows.unexpectedError(windows.GetLastError()); - } - defer windows.CloseHandle(handle); - - var entry: windows.MODULEENTRY32 = undefined; - entry.dwSize = @sizeOf(windows.MODULEENTRY32); - if (windows.kernel32.Module32First(handle, &entry) != 0) { - try cache.modules.append(gpa, entry); - while (windows.kernel32.Module32Next(handle, &entry) != 0) { - try cache.modules.append(gpa, entry); - } - } - } - if (lookupInCache(cache, address)) |m| return m; - return error.MissingDebugInfo; - } - fn lookupInCache(cache: *const LookupCache, address: usize) ?Module { - for (cache.modules.items) |*entry| { - const base_address = @intFromPtr(entry.modBaseAddr); - if (address >= base_address and address < base_address + entry.modBaseSize) { - return .{ - .base_address = base_address, - .size = entry.modBaseSize, - .name = std.mem.sliceTo(&entry.szModule, 0), - .handle = entry.hModule, - }; - } - } - return null; - } - fn loadLocationInfo(module: *const Module, gpa: Allocator, di: *DebugInfo) !void { - const mapped_ptr: [*]const u8 = @ptrFromInt(module.base_address); - const mapped = mapped_ptr[0..module.size]; - var coff_obj = coff.Coff.init(mapped, true) catch return error.InvalidDebugInfo; - // The string table is not mapped into memory by the loader, so if a section name is in the - // string table then we have to map the full image file from disk. This can happen when - // a binary is produced with -gdwarf, since the section names are longer than 8 bytes. - if (coff_obj.strtabRequired()) { - var name_buffer: [windows.PATH_MAX_WIDE + 4:0]u16 = undefined; - name_buffer[0..4].* = .{ '\\', '?', '?', '\\' }; // openFileAbsoluteW requires the prefix to be present - const process_handle = windows.GetCurrentProcess(); - const len = windows.kernel32.GetModuleFileNameExW( - process_handle, - module.handle, - name_buffer[4..], - windows.PATH_MAX_WIDE, - ); - if (len == 0) return error.MissingDebugInfo; - const coff_file = fs.openFileAbsoluteW(name_buffer[0 .. len + 4 :0], .{}) catch |err| switch (err) { - error.FileNotFound => return error.MissingDebugInfo, - else => |e| return e, - }; - errdefer coff_file.close(); - var section_handle: windows.HANDLE = undefined; - const create_section_rc = windows.ntdll.NtCreateSection( - §ion_handle, - windows.STANDARD_RIGHTS_REQUIRED | windows.SECTION_QUERY | windows.SECTION_MAP_READ, - null, - null, - windows.PAGE_READONLY, - // The documentation states that if no AllocationAttribute is specified, then SEC_COMMIT is the default. - // In practice, this isn't the case and specifying 0 will result in INVALID_PARAMETER_6. - windows.SEC_COMMIT, - coff_file.handle, - ); - if (create_section_rc != .SUCCESS) return error.MissingDebugInfo; - errdefer windows.CloseHandle(section_handle); - var coff_len: usize = 0; - var section_view_ptr: [*]const u8 = undefined; - const map_section_rc = windows.ntdll.NtMapViewOfSection( - section_handle, - process_handle, - @ptrCast(§ion_view_ptr), - null, - 0, - null, - &coff_len, - .ViewUnmap, - 0, - windows.PAGE_READONLY, - ); - if (map_section_rc != .SUCCESS) return error.MissingDebugInfo; - errdefer assert(windows.ntdll.NtUnmapViewOfSection(process_handle, @constCast(section_view_ptr)) == .SUCCESS); - const section_view = section_view_ptr[0..coff_len]; - coff_obj = coff.Coff.init(section_view, false) catch return error.InvalidDebugInfo; - di.mapped_file = .{ - .file = coff_file, - .section_handle = section_handle, - .section_view = section_view, - }; - } - di.coff_image_base = coff_obj.getImageBase(); - - if (coff_obj.getSectionByName(".debug_info")) |_| { - di.dwarf = .{}; - - inline for (@typeInfo(Dwarf.Section.Id).@"enum".fields, 0..) |section, i| { - di.dwarf.?.sections[i] = if (coff_obj.getSectionByName("." ++ section.name)) |section_header| blk: { - break :blk .{ - .data = try coff_obj.getSectionDataAlloc(section_header, gpa), - .owned = true, - }; - } else null; - } - - try di.dwarf.?.open(gpa, native_endian); - } - - if (try coff_obj.getPdbPath()) |raw_path| pdb: { - const path = blk: { - if (fs.path.isAbsolute(raw_path)) { - break :blk raw_path; - } else { - const self_dir = try fs.selfExeDirPathAlloc(gpa); - defer gpa.free(self_dir); - break :blk try fs.path.join(gpa, &.{ self_dir, raw_path }); - } - }; - defer if (path.ptr != raw_path.ptr) gpa.free(path); - - di.pdb = Pdb.init(gpa, path) catch |err| switch (err) { - error.FileNotFound, error.IsDir => break :pdb, - else => return err, - }; - try di.pdb.?.parseInfoStream(); - try di.pdb.?.parseDbiStream(); - - if (!mem.eql(u8, &coff_obj.guid, &di.pdb.?.guid) or coff_obj.age != di.pdb.?.age) - return error.InvalidDebugInfo; - - di.coff_section_headers = try coff_obj.getSectionHeadersAlloc(gpa); - } - - di.loaded = true; - } - const LookupCache = struct { - modules: std.ArrayListUnmanaged(windows.MODULEENTRY32), - const init: LookupCache = .{ .modules = .empty }; - }; - const DebugInfo = struct { - loaded: bool, - - coff_image_base: u64, - mapped_file: ?struct { - file: File, - section_handle: windows.HANDLE, - section_view: []const u8, - fn deinit(mapped: @This()) void { - const process_handle = windows.GetCurrentProcess(); - assert(windows.ntdll.NtUnmapViewOfSection(process_handle, @constCast(mapped.section_view.ptr)) == .SUCCESS); - windows.CloseHandle(mapped.section_handle); - mapped.file.close(); - } - }, - - dwarf: ?Dwarf, - - pdb: ?Pdb, - /// Populated iff `pdb != null`; otherwise `&.{}`. - coff_section_headers: []coff.SectionHeader, - - const init: DebugInfo = .{ - .loaded = false, - .coff_image_base = undefined, - .mapped_file = null, - .dwarf = null, - .pdb = null, - .coff_section_headers = &.{}, - }; - - fn deinit(di: *DebugInfo, gpa: Allocator) void { - if (di.dwarf) |*dwarf| dwarf.deinit(gpa); - if (di.pdb) |*pdb| pdb.deinit(); - gpa.free(di.coff_section_headers); - if (di.mapped_file) |mapped| mapped.deinit(); - } - - fn getSymbolFromPdb(di: *DebugInfo, relocated_address: usize) !?std.debug.Symbol { - var coff_section: *align(1) const coff.SectionHeader = undefined; - const mod_index = for (di.pdb.?.sect_contribs) |sect_contrib| { - if (sect_contrib.section > di.coff_section_headers.len) continue; - // Remember that SectionContribEntry.Section is 1-based. - coff_section = &di.coff_section_headers[sect_contrib.section - 1]; - - const vaddr_start = coff_section.virtual_address + sect_contrib.offset; - const vaddr_end = vaddr_start + sect_contrib.size; - if (relocated_address >= vaddr_start and relocated_address < vaddr_end) { - break sect_contrib.module_index; - } - } else { - // we have no information to add to the address - return null; - }; - - const module = try di.pdb.?.getModule(mod_index) orelse return error.InvalidDebugInfo; - - return .{ - .name = di.pdb.?.getSymbolName( - module, - relocated_address - coff_section.virtual_address, - ), - .compile_unit_name = fs.path.basename(module.obj_file_name), - .source_location = try di.pdb.?.getLineNumberInfo( - module, - relocated_address - coff_section.virtual_address, - ), - }; - } - }; - - fn getSymbolAtAddress(module: *const Module, gpa: Allocator, di: *DebugInfo, address: usize) !std.debug.Symbol { - if (!di.loaded) try module.loadLocationInfo(gpa, di); - // Translate the runtime address into a virtual address into the module - const vaddr = address - module.base_address; - - if (di.pdb != null) { - if (try di.getSymbolFromPdb(vaddr)) |symbol| return symbol; - } - - if (di.dwarf) |*dwarf| { - const dwarf_address = vaddr + di.coff_image_base; - return dwarf.getSymbol(gpa, native_endian, dwarf_address); - } - - return error.MissingDebugInfo; - } - }, -}; - -const MachoSymbol = struct { - strx: u32, - addr: u64, - size: u32, - ofile: u32, - fn addressLessThan(context: void, lhs: MachoSymbol, rhs: MachoSymbol) bool { - _ = context; - return lhs.addr < rhs.addr; - } - /// Assumes that `symbols` is sorted in order of ascending `addr`. - fn find(symbols: []const MachoSymbol, address: usize) ?*const MachoSymbol { - if (symbols.len == 0) return null; // no potential match - if (address < symbols[0].addr) return null; // address is before the lowest-address symbol - var left: usize = 0; - var len: usize = symbols.len; - while (len > 1) { - const mid = left + len / 2; - if (address < symbols[mid].addr) { - len /= 2; - } else { - left = mid; - len -= len / 2; - } - } - return &symbols[left]; - } - - test find { - const symbols: []const MachoSymbol = &.{ - .{ .addr = 100, .strx = undefined, .size = undefined, .ofile = undefined }, - .{ .addr = 200, .strx = undefined, .size = undefined, .ofile = undefined }, - .{ .addr = 300, .strx = undefined, .size = undefined, .ofile = undefined }, - }; - - try testing.expectEqual(null, find(symbols, 0)); - try testing.expectEqual(null, find(symbols, 99)); - try testing.expectEqual(&symbols[0], find(symbols, 100).?); - try testing.expectEqual(&symbols[0], find(symbols, 150).?); - try testing.expectEqual(&symbols[0], find(symbols, 199).?); - - try testing.expectEqual(&symbols[1], find(symbols, 200).?); - try testing.expectEqual(&symbols[1], find(symbols, 250).?); - try testing.expectEqual(&symbols[1], find(symbols, 299).?); - - try testing.expectEqual(&symbols[2], find(symbols, 300).?); - try testing.expectEqual(&symbols[2], find(symbols, 301).?); - try testing.expectEqual(&symbols[2], find(symbols, 5000).?); - } }; test { - _ = MachoSymbol; + _ = Module; } pub const UnwindContext = struct { @@ -944,6 +164,7 @@ pub const UnwindContext = struct { pub fn init(gpa: Allocator, thread_context: *std.debug.ThreadContext) !UnwindContext { comptime assert(supports_unwinding); + const ip_reg_num = Dwarf.abi.ipRegNum(native_arch).?; const pc = stripInstructionPtrAuthCode( (try regValueNative(thread_context, ip_reg_num, null)).*, ); @@ -970,7 +191,7 @@ pub const UnwindContext = struct { } pub fn getFp(self: *const UnwindContext) !usize { - return (try regValueNative(self.thread_context, fpRegNum(self.reg_context), self.reg_context)).*; + return (try regValueNative(self.thread_context, Dwarf.abi.fpRegNum(native_arch, self.reg_context), self.reg_context)).*; } /// Resolves the register rule and places the result into `out` (see regBytes) @@ -1019,7 +240,7 @@ pub const UnwindContext = struct { .register => |register| { const src = try regBytes(context.thread_context, register, context.reg_context); if (src.len != out.len) return error.RegisterSizeMismatch; - @memcpy(out, try regBytes(context.thread_context, register, context.reg_context)); + @memcpy(out, src); }, .expression => |expression| { context.stack_machine.reset(); @@ -1043,553 +264,171 @@ pub const UnwindContext = struct { .architectural => return error.UnimplementedRegisterRule, } } + + /// Unwind a stack frame using DWARF unwinding info, updating the register context. + /// + /// If `.eh_frame_hdr` is available and complete, it will be used to binary search for the FDE. + /// Otherwise, a linear scan of `.eh_frame` and `.debug_frame` is done to find the FDE. The latter + /// may require lazily loading the data in those sections. + /// + /// `explicit_fde_offset` is for cases where the FDE offset is known, such as when __unwind_info + /// defers unwinding to DWARF. This is an offset into the `.eh_frame` section. + pub fn unwindFrameDwarf( + context: *UnwindContext, + unwind: *const Dwarf.Unwind, + load_offset: usize, + explicit_fde_offset: ?usize, + ) !usize { + if (!supports_unwinding) return error.UnsupportedCpuArchitecture; + if (context.pc == 0) return 0; + + const pc_vaddr = context.pc - load_offset; + + const fde_offset = explicit_fde_offset orelse try unwind.lookupPc( + pc_vaddr, + @sizeOf(usize), + native_endian, + ) orelse return error.MissingDebugInfo; + const format, const cie, const fde = try unwind.getFde(fde_offset, @sizeOf(usize), native_endian); + + // Check if this FDE *actually* includes the address. + if (pc_vaddr < fde.pc_begin or pc_vaddr >= fde.pc_begin + fde.pc_range) return error.MissingDebugInfo; + + // Do not set `compile_unit` because the spec states that CFIs + // may not reference other debug sections anyway. + var expression_context: Dwarf.expression.Context = .{ + .format = format, + .thread_context = context.thread_context, + .reg_context = context.reg_context, + .cfa = context.cfa, + }; + + context.vm.reset(); + context.reg_context.eh_frame = cie.version != 4; + context.reg_context.is_macho = native_os.isDarwin(); + + const row = try context.vm.runTo(context.gpa, context.pc - load_offset, cie, fde, @sizeOf(usize), native_endian); + context.cfa = switch (row.cfa.rule) { + .val_offset => |offset| blk: { + const register = row.cfa.register orelse return error.InvalidCFARule; + const value = (try regValueNative(context.thread_context, register, context.reg_context)).*; + break :blk try applyOffset(value, offset); + }, + .expression => |expr| blk: { + context.stack_machine.reset(); + const value = try context.stack_machine.run( + expr, + context.gpa, + expression_context, + context.cfa, + ); + + if (value) |v| { + if (v != .generic) return error.InvalidExpressionValue; + break :blk v.generic; + } else return error.NoExpressionValue; + }, + else => return error.InvalidCFARule, + }; + + expression_context.cfa = context.cfa; + + // Buffering the modifications is done because copying the thread context is not portable, + // some implementations (ie. darwin) use internal pointers to the mcontext. + var arena: std.heap.ArenaAllocator = .init(context.gpa); + defer arena.deinit(); + const update_arena = arena.allocator(); + + const RegisterUpdate = struct { + // Backed by thread_context + dest: []u8, + // Backed by arena + src: []const u8, + prev: ?*@This(), + }; + + var update_tail: ?*RegisterUpdate = null; + var has_return_address = true; + for (context.vm.rowColumns(row)) |column| { + if (column.register) |register| { + if (register == cie.return_address_register) { + has_return_address = column.rule != .undefined; + } + + const dest = try regBytes(context.thread_context, register, context.reg_context); + const src = try update_arena.alloc(u8, dest.len); + try context.resolveRegisterRule(column, expression_context, src); + + const new_update = try update_arena.create(RegisterUpdate); + new_update.* = .{ + .dest = dest, + .src = src, + .prev = update_tail, + }; + update_tail = new_update; + } + } + + // On all implemented architectures, the CFA is defined as being the previous frame's SP + (try regValueNative(context.thread_context, Dwarf.abi.spRegNum(native_arch, context.reg_context), context.reg_context)).* = context.cfa.?; + + while (update_tail) |tail| { + @memcpy(tail.dest, tail.src); + update_tail = tail.prev; + } + + if (has_return_address) { + context.pc = stripInstructionPtrAuthCode((try regValueNative( + context.thread_context, + cie.return_address_register, + context.reg_context, + )).*); + } else { + context.pc = 0; + } + + const ip_reg_num = Dwarf.abi.ipRegNum(native_arch).?; + (try regValueNative(context.thread_context, ip_reg_num, context.reg_context)).* = context.pc; + + // The call instruction will have pushed the address of the instruction that follows the call as the return address. + // This next instruction may be past the end of the function if the caller was `noreturn` (ie. the last instruction in + // the function was the call). If we were to look up an FDE entry using the return address directly, it could end up + // either not finding an FDE at all, or using the next FDE in the program, producing incorrect results. To prevent this, + // we subtract one so that the next lookup is guaranteed to land inside the + // + // The exception to this rule is signal frames, where we return execution would be returned to the instruction + // that triggered the handler. + const return_address = context.pc; + if (context.pc > 0 and !cie.is_signal_frame) context.pc -= 1; + + return return_address; + } + /// Since register rules are applied (usually) during a panic, + /// checked addition / subtraction is used so that we can return + /// an error and fall back to FP-based unwinding. + fn applyOffset(base: usize, offset: i64) !usize { + return if (offset >= 0) + try std.math.add(usize, base, @as(usize, @intCast(offset))) + else + try std.math.sub(usize, base, @as(usize, @intCast(-offset))); + } + /// Some platforms use pointer authentication - the upper bits of instruction pointers contain a signature. + /// This function clears these signature bits to make the pointer usable. + pub inline fn stripInstructionPtrAuthCode(ptr: usize) usize { + if (native_arch.isAARCH64()) { + // `hint 0x07` maps to `xpaclri` (or `nop` if the hardware doesn't support it) + // The save / restore is because `xpaclri` operates on x30 (LR) + return asm ( + \\mov x16, x30 + \\mov x30, x15 + \\hint 0x07 + \\mov x15, x30 + \\mov x30, x16 + : [ret] "={x15}" (-> usize), + : [ptr] "{x15}" (ptr), + : .{ .x16 = true }); + } + + return ptr; + } }; - -/// Some platforms use pointer authentication - the upper bits of instruction pointers contain a signature. -/// This function clears these signature bits to make the pointer usable. -pub inline fn stripInstructionPtrAuthCode(ptr: usize) usize { - if (native_arch.isAARCH64()) { - // `hint 0x07` maps to `xpaclri` (or `nop` if the hardware doesn't support it) - // The save / restore is because `xpaclri` operates on x30 (LR) - return asm ( - \\mov x16, x30 - \\mov x30, x15 - \\hint 0x07 - \\mov x15, x30 - \\mov x30, x16 - : [ret] "={x15}" (-> usize), - : [ptr] "{x15}" (ptr), - : .{ .x16 = true }); - } - - return ptr; -} - -/// Unwind a stack frame using DWARF unwinding info, updating the register context. -/// -/// If `.eh_frame_hdr` is available and complete, it will be used to binary search for the FDE. -/// Otherwise, a linear scan of `.eh_frame` and `.debug_frame` is done to find the FDE. The latter -/// may require lazily loading the data in those sections. -/// -/// `explicit_fde_offset` is for cases where the FDE offset is known, such as when __unwind_info -/// defers unwinding to DWARF. This is an offset into the `.eh_frame` section. -fn unwindFrameDwarf( - unwind: *const Dwarf.Unwind, - load_offset: usize, - context: *UnwindContext, - explicit_fde_offset: ?usize, -) !usize { - if (!supports_unwinding) return error.UnsupportedCpuArchitecture; - if (context.pc == 0) return 0; - - const pc_vaddr = context.pc - load_offset; - - const fde_offset = explicit_fde_offset orelse try unwind.lookupPc( - pc_vaddr, - @sizeOf(usize), - native_endian, - ) orelse return error.MissingDebugInfo; - const format, const cie, const fde = try unwind.getFde(fde_offset, @sizeOf(usize), native_endian); - - // Check if this FDE *actually* includes the address. - if (pc_vaddr < fde.pc_begin or pc_vaddr >= fde.pc_begin + fde.pc_range) return error.MissingDebugInfo; - - // Do not set `compile_unit` because the spec states that CFIs - // may not reference other debug sections anyway. - var expression_context: Dwarf.expression.Context = .{ - .format = format, - .thread_context = context.thread_context, - .reg_context = context.reg_context, - .cfa = context.cfa, - }; - - context.vm.reset(); - context.reg_context.eh_frame = cie.version != 4; - context.reg_context.is_macho = native_os.isDarwin(); - - const row = try context.vm.runTo(context.gpa, context.pc - load_offset, cie, fde, @sizeOf(usize), native_endian); - context.cfa = switch (row.cfa.rule) { - .val_offset => |offset| blk: { - const register = row.cfa.register orelse return error.InvalidCFARule; - const value = mem.readInt(usize, (try regBytes(context.thread_context, register, context.reg_context))[0..@sizeOf(usize)], native_endian); - break :blk try applyOffset(value, offset); - }, - .expression => |expr| blk: { - context.stack_machine.reset(); - const value = try context.stack_machine.run( - expr, - context.gpa, - expression_context, - context.cfa, - ); - - if (value) |v| { - if (v != .generic) return error.InvalidExpressionValue; - break :blk v.generic; - } else return error.NoExpressionValue; - }, - else => return error.InvalidCFARule, - }; - - expression_context.cfa = context.cfa; - - // Buffering the modifications is done because copying the thread context is not portable, - // some implementations (ie. darwin) use internal pointers to the mcontext. - var arena: std.heap.ArenaAllocator = .init(context.gpa); - defer arena.deinit(); - const update_arena = arena.allocator(); - - const RegisterUpdate = struct { - // Backed by thread_context - dest: []u8, - // Backed by arena - src: []const u8, - prev: ?*@This(), - }; - - var update_tail: ?*RegisterUpdate = null; - var has_return_address = true; - for (context.vm.rowColumns(row)) |column| { - if (column.register) |register| { - if (register == cie.return_address_register) { - has_return_address = column.rule != .undefined; - } - - const dest = try regBytes(context.thread_context, register, context.reg_context); - const src = try update_arena.alloc(u8, dest.len); - try context.resolveRegisterRule(column, expression_context, src); - - const new_update = try update_arena.create(RegisterUpdate); - new_update.* = .{ - .dest = dest, - .src = src, - .prev = update_tail, - }; - update_tail = new_update; - } - } - - // On all implemented architectures, the CFA is defined as being the previous frame's SP - (try regValueNative(context.thread_context, spRegNum(context.reg_context), context.reg_context)).* = context.cfa.?; - - while (update_tail) |tail| { - @memcpy(tail.dest, tail.src); - update_tail = tail.prev; - } - - if (has_return_address) { - context.pc = stripInstructionPtrAuthCode(mem.readInt(usize, (try regBytes( - context.thread_context, - cie.return_address_register, - context.reg_context, - ))[0..@sizeOf(usize)], native_endian)); - } else { - context.pc = 0; - } - - (try regValueNative(context.thread_context, ip_reg_num, context.reg_context)).* = context.pc; - - // The call instruction will have pushed the address of the instruction that follows the call as the return address. - // This next instruction may be past the end of the function if the caller was `noreturn` (ie. the last instruction in - // the function was the call). If we were to look up an FDE entry using the return address directly, it could end up - // either not finding an FDE at all, or using the next FDE in the program, producing incorrect results. To prevent this, - // we subtract one so that the next lookup is guaranteed to land inside the - // - // The exception to this rule is signal frames, where we return execution would be returned to the instruction - // that triggered the handler. - const return_address = context.pc; - if (context.pc > 0 and !cie.is_signal_frame) context.pc -= 1; - - return return_address; -} - -fn fpRegNum(reg_context: Dwarf.abi.RegisterContext) u8 { - return Dwarf.abi.fpRegNum(native_arch, reg_context); -} - -fn spRegNum(reg_context: Dwarf.abi.RegisterContext) u8 { - return Dwarf.abi.spRegNum(native_arch, reg_context); -} - -const ip_reg_num = Dwarf.abi.ipRegNum(native_arch).?; - -/// Tells whether unwinding for the host is implemented. -pub const supports_unwinding = supportsUnwinding(&builtin.target); - -comptime { - if (supports_unwinding) assert(Dwarf.abi.supportsUnwinding(&builtin.target)); -} - -/// Tells whether unwinding for this target is *implemented* here in the Zig -/// standard library. -/// -/// See also `Dwarf.abi.supportsUnwinding` which tells whether Dwarf supports -/// unwinding on that target *in theory*. -pub fn supportsUnwinding(target: *const std.Target) bool { - return switch (target.cpu.arch) { - .x86 => switch (target.os.tag) { - .linux, .netbsd, .solaris, .illumos => true, - else => false, - }, - .x86_64 => switch (target.os.tag) { - .linux, .netbsd, .freebsd, .openbsd, .macos, .ios, .solaris, .illumos => true, - else => false, - }, - .arm, .armeb, .thumb, .thumbeb => switch (target.os.tag) { - .linux => true, - else => false, - }, - .aarch64, .aarch64_be => switch (target.os.tag) { - .linux, .netbsd, .freebsd, .macos, .ios => true, - else => false, - }, - // Unwinding is possible on other targets but this implementation does - // not support them...yet! - else => false, - }; -} - -/// Since register rules are applied (usually) during a panic, -/// checked addition / subtraction is used so that we can return -/// an error and fall back to FP-based unwinding. -fn applyOffset(base: usize, offset: i64) !usize { - return if (offset >= 0) - try std.math.add(usize, base, @as(usize, @intCast(offset))) - else - try std.math.sub(usize, base, @as(usize, @intCast(-offset))); -} - -/// Uses `mmap` to map the file at `opt_path` (or, if `null`, the self executable image) into memory. -fn mapDebugInfoFile(opt_path: ?[]const u8) ![]align(std.heap.page_size_min) const u8 { - const open_result = if (opt_path) |path| - fs.cwd().openFile(path, .{}) - else - fs.openSelfExe(.{}); - const file = open_result catch |err| switch (err) { - error.FileNotFound => return error.MissingDebugInfo, - else => |e| return e, - }; - defer file.close(); - - const file_len = math.cast(usize, try file.getEndPos()) orelse return error.InvalidDebugInfo; - - return posix.mmap( - null, - file_len, - posix.PROT.READ, - .{ .TYPE = .SHARED }, - file.handle, - 0, - ); -} - -/// Unwind a frame using MachO compact unwind info (from __unwind_info). -/// If the compact encoding can't encode a way to unwind a frame, it will -/// defer unwinding to DWARF, in which case `.eh_frame` will be used if available. -fn unwindFrameMachO( - text_base: usize, - load_offset: usize, - context: *UnwindContext, - unwind_info: []const u8, - opt_eh_frame: ?[]const u8, -) !usize { - if (unwind_info.len < @sizeOf(macho.unwind_info_section_header)) return error.InvalidUnwindInfo; - const header: *align(1) const macho.unwind_info_section_header = @ptrCast(unwind_info); - - const index_byte_count = header.indexCount * @sizeOf(macho.unwind_info_section_header_index_entry); - if (unwind_info.len < header.indexSectionOffset + index_byte_count) return error.InvalidUnwindInfo; - const indices: []align(1) const macho.unwind_info_section_header_index_entry = @ptrCast(unwind_info[header.indexSectionOffset..][0..index_byte_count]); - if (indices.len == 0) return error.MissingUnwindInfo; - - // offset of the PC into the `__TEXT` segment - const pc_text_offset = context.pc - text_base; - - const start_offset: u32, const first_level_offset: u32 = index: { - var left: usize = 0; - var len: usize = indices.len; - while (len > 1) { - const mid = left + len / 2; - if (pc_text_offset < indices[mid].functionOffset) { - len /= 2; - } else { - left = mid; - len -= len / 2; - } - } - break :index .{ indices[left].secondLevelPagesSectionOffset, indices[left].functionOffset }; - }; - // An offset of 0 is a sentinel indicating a range does not have unwind info. - if (start_offset == 0) return error.MissingUnwindInfo; - - const common_encodings_byte_count = header.commonEncodingsArrayCount * @sizeOf(macho.compact_unwind_encoding_t); - if (unwind_info.len < header.commonEncodingsArraySectionOffset + common_encodings_byte_count) return error.InvalidUnwindInfo; - const common_encodings: []align(1) const macho.compact_unwind_encoding_t = @ptrCast( - unwind_info[header.commonEncodingsArraySectionOffset..][0..common_encodings_byte_count], - ); - - if (unwind_info.len < start_offset + @sizeOf(macho.UNWIND_SECOND_LEVEL)) return error.InvalidUnwindInfo; - const kind: *align(1) const macho.UNWIND_SECOND_LEVEL = @ptrCast(unwind_info[start_offset..]); - - const entry: struct { - function_offset: usize, - raw_encoding: u32, - } = switch (kind.*) { - .REGULAR => entry: { - if (unwind_info.len < start_offset + @sizeOf(macho.unwind_info_regular_second_level_page_header)) return error.InvalidUnwindInfo; - const page_header: *align(1) const macho.unwind_info_regular_second_level_page_header = @ptrCast(unwind_info[start_offset..]); - - const entries_byte_count = page_header.entryCount * @sizeOf(macho.unwind_info_regular_second_level_entry); - if (unwind_info.len < start_offset + entries_byte_count) return error.InvalidUnwindInfo; - const entries: []align(1) const macho.unwind_info_regular_second_level_entry = @ptrCast( - unwind_info[start_offset + page_header.entryPageOffset ..][0..entries_byte_count], - ); - if (entries.len == 0) return error.InvalidUnwindInfo; - - var left: usize = 0; - var len: usize = entries.len; - while (len > 1) { - const mid = left + len / 2; - if (pc_text_offset < entries[mid].functionOffset) { - len /= 2; - } else { - left = mid; - len -= len / 2; - } - } - break :entry .{ - .function_offset = entries[left].functionOffset, - .raw_encoding = entries[left].encoding, - }; - }, - .COMPRESSED => entry: { - if (unwind_info.len < start_offset + @sizeOf(macho.unwind_info_compressed_second_level_page_header)) return error.InvalidUnwindInfo; - const page_header: *align(1) const macho.unwind_info_compressed_second_level_page_header = @ptrCast(unwind_info[start_offset..]); - - const entries_byte_count = page_header.entryCount * @sizeOf(macho.UnwindInfoCompressedEntry); - if (unwind_info.len < start_offset + entries_byte_count) return error.InvalidUnwindInfo; - const entries: []align(1) const macho.UnwindInfoCompressedEntry = @ptrCast( - unwind_info[start_offset + page_header.entryPageOffset ..][0..entries_byte_count], - ); - if (entries.len == 0) return error.InvalidUnwindInfo; - - var left: usize = 0; - var len: usize = entries.len; - while (len > 1) { - const mid = left + len / 2; - if (pc_text_offset < first_level_offset + entries[mid].funcOffset) { - len /= 2; - } else { - left = mid; - len -= len / 2; - } - } - const entry = entries[left]; - - const function_offset = first_level_offset + entry.funcOffset; - if (entry.encodingIndex < common_encodings.len) { - break :entry .{ - .function_offset = function_offset, - .raw_encoding = common_encodings[entry.encodingIndex], - }; - } - - const local_index = entry.encodingIndex - common_encodings.len; - const local_encodings_byte_count = page_header.encodingsCount * @sizeOf(macho.compact_unwind_encoding_t); - if (unwind_info.len < start_offset + page_header.encodingsPageOffset + local_encodings_byte_count) return error.InvalidUnwindInfo; - const local_encodings: []align(1) const macho.compact_unwind_encoding_t = @ptrCast( - unwind_info[start_offset + page_header.encodingsPageOffset ..][0..local_encodings_byte_count], - ); - if (local_index >= local_encodings.len) return error.InvalidUnwindInfo; - break :entry .{ - .function_offset = function_offset, - .raw_encoding = local_encodings[local_index], - }; - }, - else => return error.InvalidUnwindInfo, - }; - - if (entry.raw_encoding == 0) return error.NoUnwindInfo; - const reg_context: Dwarf.abi.RegisterContext = .{ .eh_frame = false, .is_macho = true }; - - const encoding: macho.CompactUnwindEncoding = @bitCast(entry.raw_encoding); - const new_ip = switch (builtin.cpu.arch) { - .x86_64 => switch (encoding.mode.x86_64) { - .OLD => return error.UnimplementedUnwindEncoding, - .RBP_FRAME => ip: { - const frame = encoding.value.x86_64.frame; - - const fp = (try regValueNative(context.thread_context, fpRegNum(reg_context), reg_context)).*; - const new_sp = fp + 2 * @sizeOf(usize); - - const ip_ptr = fp + @sizeOf(usize); - const new_ip = @as(*const usize, @ptrFromInt(ip_ptr)).*; - const new_fp = @as(*const usize, @ptrFromInt(fp)).*; - - (try regValueNative(context.thread_context, fpRegNum(reg_context), reg_context)).* = new_fp; - (try regValueNative(context.thread_context, spRegNum(reg_context), reg_context)).* = new_sp; - (try regValueNative(context.thread_context, ip_reg_num, reg_context)).* = new_ip; - - const regs: [5]u3 = .{ - frame.reg0, - frame.reg1, - frame.reg2, - frame.reg3, - frame.reg4, - }; - for (regs, 0..) |reg, i| { - if (reg == 0) continue; - const addr = fp - frame.frame_offset * @sizeOf(usize) + i * @sizeOf(usize); - const reg_number = try Dwarf.compactUnwindToDwarfRegNumber(reg); - (try regValueNative(context.thread_context, reg_number, reg_context)).* = @as(*const usize, @ptrFromInt(addr)).*; - } - - break :ip new_ip; - }, - .STACK_IMMD, - .STACK_IND, - => ip: { - const frameless = encoding.value.x86_64.frameless; - - const sp = (try regValueNative(context.thread_context, spRegNum(reg_context), reg_context)).*; - const stack_size: usize = stack_size: { - if (encoding.mode.x86_64 == .STACK_IMMD) { - break :stack_size @as(usize, frameless.stack.direct.stack_size) * @sizeOf(usize); - } - // In .STACK_IND, the stack size is inferred from the subq instruction at the beginning of the function. - const sub_offset_addr = - text_base + - entry.function_offset + - frameless.stack.indirect.sub_offset; - // `sub_offset_addr` points to the offset of the literal within the instruction - const sub_operand = @as(*align(1) const u32, @ptrFromInt(sub_offset_addr)).*; - break :stack_size sub_operand + @sizeOf(usize) * @as(usize, frameless.stack.indirect.stack_adjust); - }; - - // Decode the Lehmer-coded sequence of registers. - // For a description of the encoding see lib/libc/include/any-macos.13-any/mach-o/compact_unwind_encoding.h - - // Decode the variable-based permutation number into its digits. Each digit represents - // an index into the list of register numbers that weren't yet used in the sequence at - // the time the digit was added. - const reg_count = frameless.stack_reg_count; - const ip_ptr = ip_ptr: { - var digits: [6]u3 = undefined; - var accumulator: usize = frameless.stack_reg_permutation; - var base: usize = 2; - for (0..reg_count) |i| { - const div = accumulator / base; - digits[digits.len - 1 - i] = @intCast(accumulator - base * div); - accumulator = div; - base += 1; - } - - var registers: [6]u3 = undefined; - var used_indices: [6]bool = @splat(false); - for (digits[digits.len - reg_count ..], 0..) |target_unused_index, i| { - var unused_count: u8 = 0; - const unused_index = for (used_indices, 0..) |used, index| { - if (!used) { - if (target_unused_index == unused_count) break index; - unused_count += 1; - } - } else unreachable; - registers[i] = @intCast(unused_index + 1); - used_indices[unused_index] = true; - } - - var reg_addr = sp + stack_size - @sizeOf(usize) * @as(usize, reg_count + 1); - for (0..reg_count) |i| { - const reg_number = try Dwarf.compactUnwindToDwarfRegNumber(registers[i]); - (try regValueNative(context.thread_context, reg_number, reg_context)).* = @as(*const usize, @ptrFromInt(reg_addr)).*; - reg_addr += @sizeOf(usize); - } - - break :ip_ptr reg_addr; - }; - - const new_ip = @as(*const usize, @ptrFromInt(ip_ptr)).*; - const new_sp = ip_ptr + @sizeOf(usize); - - (try regValueNative(context.thread_context, spRegNum(reg_context), reg_context)).* = new_sp; - (try regValueNative(context.thread_context, ip_reg_num, reg_context)).* = new_ip; - - break :ip new_ip; - }, - .DWARF => { - const eh_frame = opt_eh_frame orelse return error.MissingEhFrame; - const eh_frame_vaddr = @intFromPtr(eh_frame.ptr) - load_offset; - return unwindFrameDwarf( - &.initSection(.eh_frame, eh_frame_vaddr, eh_frame), - load_offset, - context, - @intCast(encoding.value.x86_64.dwarf), - ); - }, - }, - .aarch64, .aarch64_be => switch (encoding.mode.arm64) { - .OLD => return error.UnimplementedUnwindEncoding, - .FRAMELESS => ip: { - const sp = (try regValueNative(context.thread_context, spRegNum(reg_context), reg_context)).*; - const new_sp = sp + encoding.value.arm64.frameless.stack_size * 16; - const new_ip = (try regValueNative(context.thread_context, 30, reg_context)).*; - (try regValueNative(context.thread_context, spRegNum(reg_context), reg_context)).* = new_sp; - break :ip new_ip; - }, - .DWARF => { - const eh_frame = opt_eh_frame orelse return error.MissingEhFrame; - const eh_frame_vaddr = @intFromPtr(eh_frame.ptr) - load_offset; - return unwindFrameDwarf( - &.initSection(.eh_frame, eh_frame_vaddr, eh_frame), - load_offset, - context, - @intCast(encoding.value.x86_64.dwarf), - ); - }, - .FRAME => ip: { - const frame = encoding.value.arm64.frame; - - const fp = (try regValueNative(context.thread_context, fpRegNum(reg_context), reg_context)).*; - const ip_ptr = fp + @sizeOf(usize); - - var reg_addr = fp - @sizeOf(usize); - inline for (@typeInfo(@TypeOf(frame.x_reg_pairs)).@"struct".fields, 0..) |field, i| { - if (@field(frame.x_reg_pairs, field.name) != 0) { - (try regValueNative(context.thread_context, 19 + i, reg_context)).* = @as(*const usize, @ptrFromInt(reg_addr)).*; - reg_addr += @sizeOf(usize); - (try regValueNative(context.thread_context, 20 + i, reg_context)).* = @as(*const usize, @ptrFromInt(reg_addr)).*; - reg_addr += @sizeOf(usize); - } - } - - inline for (@typeInfo(@TypeOf(frame.d_reg_pairs)).@"struct".fields, 0..) |field, i| { - if (@field(frame.d_reg_pairs, field.name) != 0) { - // Only the lower half of the 128-bit V registers are restored during unwinding - { - const dest: *align(1) usize = @ptrCast(try regBytes(context.thread_context, 64 + 8 + i, context.reg_context)); - dest.* = @as(*const usize, @ptrFromInt(reg_addr)).*; - } - reg_addr += @sizeOf(usize); - { - const dest: *align(1) usize = @ptrCast(try regBytes(context.thread_context, 64 + 9 + i, context.reg_context)); - dest.* = @as(*const usize, @ptrFromInt(reg_addr)).*; - } - reg_addr += @sizeOf(usize); - } - } - - const new_ip = @as(*const usize, @ptrFromInt(ip_ptr)).*; - const new_fp = @as(*const usize, @ptrFromInt(fp)).*; - - (try regValueNative(context.thread_context, fpRegNum(reg_context), reg_context)).* = new_fp; - (try regValueNative(context.thread_context, ip_reg_num, reg_context)).* = new_ip; - - break :ip new_ip; - }, - }, - else => comptime unreachable, // unimplemented - }; - - context.pc = stripInstructionPtrAuthCode(new_ip); - if (context.pc > 0) context.pc -= 1; - return new_ip; -} diff --git a/lib/std/debug/SelfInfo/DarwinModule.zig b/lib/std/debug/SelfInfo/DarwinModule.zig new file mode 100644 index 0000000000..d0cb47281f --- /dev/null +++ b/lib/std/debug/SelfInfo/DarwinModule.zig @@ -0,0 +1,801 @@ +/// The runtime address where __TEXT is loaded. +text_base: usize, +load_offset: usize, +name: []const u8, + +pub fn key(m: *const DarwinModule) usize { + return m.text_base; +} + +pub fn lookup(cache: *LookupCache, gpa: Allocator, address: usize) !DarwinModule { + _ = cache; + _ = gpa; + const image_count = std.c._dyld_image_count(); + for (0..image_count) |image_idx| { + const header = std.c._dyld_get_image_header(@intCast(image_idx)) orelse continue; + const text_base = @intFromPtr(header); + if (address < text_base) continue; + const load_offset = std.c._dyld_get_image_vmaddr_slide(@intCast(image_idx)); + + // Find the __TEXT segment + var it: macho.LoadCommandIterator = .{ + .ncmds = header.ncmds, + .buffer = @as([*]u8, @ptrCast(header))[@sizeOf(macho.mach_header_64)..][0..header.sizeofcmds], + }; + const text_segment_cmd = while (it.next()) |load_cmd| { + if (load_cmd.cmd() != .SEGMENT_64) continue; + const segment_cmd = load_cmd.cast(macho.segment_command_64).?; + if (!mem.eql(u8, segment_cmd.segName(), "__TEXT")) continue; + break segment_cmd; + } else continue; + + const seg_start = load_offset + text_segment_cmd.vmaddr; + assert(seg_start == text_base); + const seg_end = seg_start + text_segment_cmd.vmsize; + if (address < seg_start or address >= seg_end) continue; + + // We've found the matching __TEXT segment. This is the image we need. + return .{ + .text_base = text_base, + .load_offset = load_offset, + .name = mem.span(std.c._dyld_get_image_name(@intCast(image_idx))), + }; + } + return error.MissingDebugInfo; +} +fn loadLocationInfo(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo) !void { + const mapped_mem = try mapDebugInfoFile(module.name); + errdefer posix.munmap(mapped_mem); + + const hdr: *const macho.mach_header_64 = @ptrCast(@alignCast(mapped_mem.ptr)); + if (hdr.magic != macho.MH_MAGIC_64) + return error.InvalidDebugInfo; + + const symtab: macho.symtab_command = symtab: { + var it: macho.LoadCommandIterator = .{ + .ncmds = hdr.ncmds, + .buffer = mapped_mem[@sizeOf(macho.mach_header_64)..][0..hdr.sizeofcmds], + }; + while (it.next()) |cmd| switch (cmd.cmd()) { + .SYMTAB => break :symtab cmd.cast(macho.symtab_command) orelse return error.InvalidDebugInfo, + else => {}, + }; + return error.MissingDebugInfo; + }; + + const syms_ptr: [*]align(1) const macho.nlist_64 = @ptrCast(mapped_mem[symtab.symoff..]); + const syms = syms_ptr[0..symtab.nsyms]; + const strings = mapped_mem[symtab.stroff..][0 .. symtab.strsize - 1 :0]; + + var symbols: std.ArrayList(MachoSymbol) = try .initCapacity(gpa, syms.len); + defer symbols.deinit(gpa); + + var ofile: u32 = undefined; + var last_sym: MachoSymbol = undefined; + var state: enum { + init, + oso_open, + oso_close, + bnsym, + fun_strx, + fun_size, + ensym, + } = .init; + + for (syms) |*sym| { + if (sym.n_type.bits.is_stab == 0) continue; + + // TODO handle globals N_GSYM, and statics N_STSYM + switch (sym.n_type.stab) { + .oso => switch (state) { + .init, .oso_close => { + state = .oso_open; + ofile = sym.n_strx; + }, + else => return error.InvalidDebugInfo, + }, + .bnsym => switch (state) { + .oso_open, .ensym => { + state = .bnsym; + last_sym = .{ + .strx = 0, + .addr = sym.n_value, + .size = 0, + .ofile = ofile, + }; + }, + else => return error.InvalidDebugInfo, + }, + .fun => switch (state) { + .bnsym => { + state = .fun_strx; + last_sym.strx = sym.n_strx; + }, + .fun_strx => { + state = .fun_size; + last_sym.size = @intCast(sym.n_value); + }, + else => return error.InvalidDebugInfo, + }, + .ensym => switch (state) { + .fun_size => { + state = .ensym; + symbols.appendAssumeCapacity(last_sym); + }, + else => return error.InvalidDebugInfo, + }, + .so => switch (state) { + .init, .oso_close => {}, + .oso_open, .ensym => { + state = .oso_close; + }, + else => return error.InvalidDebugInfo, + }, + else => {}, + } + } + + switch (state) { + .init => return error.MissingDebugInfo, + .oso_close => {}, + else => return error.InvalidDebugInfo, + } + + const symbols_slice = try symbols.toOwnedSlice(gpa); + errdefer gpa.free(symbols_slice); + + // Even though lld emits symbols in ascending order, this debug code + // should work for programs linked in any valid way. + // This sort is so that we can binary search later. + mem.sort(MachoSymbol, symbols_slice, {}, MachoSymbol.addressLessThan); + + di.full = .{ + .mapped_memory = mapped_mem, + .symbols = symbols_slice, + .strings = strings, + .ofiles = .empty, + }; +} +fn loadUnwindInfo(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo) !void { + _ = gpa; + + const header: *std.macho.mach_header = @ptrFromInt(module.text_base); + + var it: macho.LoadCommandIterator = .{ + .ncmds = header.ncmds, + .buffer = @as([*]u8, @ptrCast(header))[@sizeOf(macho.mach_header_64)..][0..header.sizeofcmds], + }; + const sections = while (it.next()) |load_cmd| { + if (load_cmd.cmd() != .SEGMENT_64) continue; + const segment_cmd = load_cmd.cast(macho.segment_command_64).?; + if (!mem.eql(u8, segment_cmd.segName(), "__TEXT")) continue; + break load_cmd.getSections(); + } else unreachable; + + var unwind_info: ?[]const u8 = null; + var eh_frame: ?[]const u8 = null; + for (sections) |sect| { + if (mem.eql(u8, sect.sectName(), "__unwind_info")) { + const sect_ptr: [*]u8 = @ptrFromInt(@as(usize, @intCast(module.load_offset + sect.addr))); + unwind_info = sect_ptr[0..@intCast(sect.size)]; + } else if (mem.eql(u8, sect.sectName(), "__eh_frame")) { + const sect_ptr: [*]u8 = @ptrFromInt(@as(usize, @intCast(module.load_offset + sect.addr))); + eh_frame = sect_ptr[0..@intCast(sect.size)]; + } + } + di.unwind = .{ + .unwind_info = unwind_info, + .eh_frame = eh_frame, + }; +} +pub fn getSymbolAtAddress(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo, address: usize) !std.debug.Symbol { + if (di.full == null) try module.loadLocationInfo(gpa, di); + const vaddr = address - module.load_offset; + const symbol = MachoSymbol.find(di.full.?.symbols, vaddr) orelse return .{ + .name = null, + .compile_unit_name = null, + .source_location = null, + }; + + // offset of `address` from start of `symbol` + const address_symbol_offset = vaddr - symbol.addr; + + // Take the symbol name from the N_FUN STAB entry, we're going to + // use it if we fail to find the DWARF infos + const stab_symbol = mem.sliceTo(di.full.?.strings[symbol.strx..], 0); + const o_file_path = mem.sliceTo(di.full.?.strings[symbol.ofile..], 0); + + // If any information is missing, we can at least return this from now on. + const sym_only_result: std.debug.Symbol = .{ + .name = stab_symbol, + .compile_unit_name = null, + .source_location = null, + }; + + const o_file: *DebugInfo.OFile = of: { + const gop = try di.full.?.ofiles.getOrPut(gpa, o_file_path); + if (!gop.found_existing) { + gop.value_ptr.* = DebugInfo.loadOFile(gpa, o_file_path) catch |err| { + defer _ = di.full.?.ofiles.pop().?; + switch (err) { + error.MissingDebugInfo, + error.InvalidDebugInfo, + => return sym_only_result, + else => |e| return e, + } + }; + } + break :of gop.value_ptr; + }; + + const symbol_ofile_vaddr = o_file.addr_table.get(stab_symbol) orelse return sym_only_result; + + const compile_unit = o_file.dwarf.findCompileUnit(native_endian, symbol_ofile_vaddr) catch |err| switch (err) { + error.MissingDebugInfo, error.InvalidDebugInfo => return sym_only_result, + else => |e| return e, + }; + + return .{ + .name = o_file.dwarf.getSymbolName(symbol_ofile_vaddr) orelse stab_symbol, + .compile_unit_name = compile_unit.die.getAttrString( + &o_file.dwarf, + native_endian, + std.dwarf.AT.name, + o_file.dwarf.section(.debug_str), + compile_unit, + ) catch |err| switch (err) { + error.MissingDebugInfo, error.InvalidDebugInfo => null, + }, + .source_location = o_file.dwarf.getLineNumberInfo( + gpa, + native_endian, + compile_unit, + symbol_ofile_vaddr + address_symbol_offset, + ) catch |err| switch (err) { + error.MissingDebugInfo, error.InvalidDebugInfo => null, + else => return err, + }, + }; +} +pub fn unwindFrame(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo, context: *UnwindContext) !usize { + if (di.unwind == null) try module.loadUnwindInfo(gpa, di); + const unwind_info = di.unwind.?.unwind_info orelse return error.MissingUnwindInfo; + // MLUGG TODO: inline? + return unwindFrameMachO( + module.text_base, + module.load_offset, + context, + unwind_info, + di.unwind.?.eh_frame, + ); +} +/// Unwind a frame using MachO compact unwind info (from __unwind_info). +/// If the compact encoding can't encode a way to unwind a frame, it will +/// defer unwinding to DWARF, in which case `.eh_frame` will be used if available. +fn unwindFrameMachO( + text_base: usize, + load_offset: usize, + context: *UnwindContext, + unwind_info: []const u8, + opt_eh_frame: ?[]const u8, +) !usize { + if (unwind_info.len < @sizeOf(macho.unwind_info_section_header)) return error.InvalidUnwindInfo; + const header: *align(1) const macho.unwind_info_section_header = @ptrCast(unwind_info); + + const index_byte_count = header.indexCount * @sizeOf(macho.unwind_info_section_header_index_entry); + if (unwind_info.len < header.indexSectionOffset + index_byte_count) return error.InvalidUnwindInfo; + const indices: []align(1) const macho.unwind_info_section_header_index_entry = @ptrCast(unwind_info[header.indexSectionOffset..][0..index_byte_count]); + if (indices.len == 0) return error.MissingUnwindInfo; + + // offset of the PC into the `__TEXT` segment + const pc_text_offset = context.pc - text_base; + + const start_offset: u32, const first_level_offset: u32 = index: { + var left: usize = 0; + var len: usize = indices.len; + while (len > 1) { + const mid = left + len / 2; + if (pc_text_offset < indices[mid].functionOffset) { + len /= 2; + } else { + left = mid; + len -= len / 2; + } + } + break :index .{ indices[left].secondLevelPagesSectionOffset, indices[left].functionOffset }; + }; + // An offset of 0 is a sentinel indicating a range does not have unwind info. + if (start_offset == 0) return error.MissingUnwindInfo; + + const common_encodings_byte_count = header.commonEncodingsArrayCount * @sizeOf(macho.compact_unwind_encoding_t); + if (unwind_info.len < header.commonEncodingsArraySectionOffset + common_encodings_byte_count) return error.InvalidUnwindInfo; + const common_encodings: []align(1) const macho.compact_unwind_encoding_t = @ptrCast( + unwind_info[header.commonEncodingsArraySectionOffset..][0..common_encodings_byte_count], + ); + + if (unwind_info.len < start_offset + @sizeOf(macho.UNWIND_SECOND_LEVEL)) return error.InvalidUnwindInfo; + const kind: *align(1) const macho.UNWIND_SECOND_LEVEL = @ptrCast(unwind_info[start_offset..]); + + const entry: struct { + function_offset: usize, + raw_encoding: u32, + } = switch (kind.*) { + .REGULAR => entry: { + if (unwind_info.len < start_offset + @sizeOf(macho.unwind_info_regular_second_level_page_header)) return error.InvalidUnwindInfo; + const page_header: *align(1) const macho.unwind_info_regular_second_level_page_header = @ptrCast(unwind_info[start_offset..]); + + const entries_byte_count = page_header.entryCount * @sizeOf(macho.unwind_info_regular_second_level_entry); + if (unwind_info.len < start_offset + entries_byte_count) return error.InvalidUnwindInfo; + const entries: []align(1) const macho.unwind_info_regular_second_level_entry = @ptrCast( + unwind_info[start_offset + page_header.entryPageOffset ..][0..entries_byte_count], + ); + if (entries.len == 0) return error.InvalidUnwindInfo; + + var left: usize = 0; + var len: usize = entries.len; + while (len > 1) { + const mid = left + len / 2; + if (pc_text_offset < entries[mid].functionOffset) { + len /= 2; + } else { + left = mid; + len -= len / 2; + } + } + break :entry .{ + .function_offset = entries[left].functionOffset, + .raw_encoding = entries[left].encoding, + }; + }, + .COMPRESSED => entry: { + if (unwind_info.len < start_offset + @sizeOf(macho.unwind_info_compressed_second_level_page_header)) return error.InvalidUnwindInfo; + const page_header: *align(1) const macho.unwind_info_compressed_second_level_page_header = @ptrCast(unwind_info[start_offset..]); + + const entries_byte_count = page_header.entryCount * @sizeOf(macho.UnwindInfoCompressedEntry); + if (unwind_info.len < start_offset + entries_byte_count) return error.InvalidUnwindInfo; + const entries: []align(1) const macho.UnwindInfoCompressedEntry = @ptrCast( + unwind_info[start_offset + page_header.entryPageOffset ..][0..entries_byte_count], + ); + if (entries.len == 0) return error.InvalidUnwindInfo; + + var left: usize = 0; + var len: usize = entries.len; + while (len > 1) { + const mid = left + len / 2; + if (pc_text_offset < first_level_offset + entries[mid].funcOffset) { + len /= 2; + } else { + left = mid; + len -= len / 2; + } + } + const entry = entries[left]; + + const function_offset = first_level_offset + entry.funcOffset; + if (entry.encodingIndex < common_encodings.len) { + break :entry .{ + .function_offset = function_offset, + .raw_encoding = common_encodings[entry.encodingIndex], + }; + } + + const local_index = entry.encodingIndex - common_encodings.len; + const local_encodings_byte_count = page_header.encodingsCount * @sizeOf(macho.compact_unwind_encoding_t); + if (unwind_info.len < start_offset + page_header.encodingsPageOffset + local_encodings_byte_count) return error.InvalidUnwindInfo; + const local_encodings: []align(1) const macho.compact_unwind_encoding_t = @ptrCast( + unwind_info[start_offset + page_header.encodingsPageOffset ..][0..local_encodings_byte_count], + ); + if (local_index >= local_encodings.len) return error.InvalidUnwindInfo; + break :entry .{ + .function_offset = function_offset, + .raw_encoding = local_encodings[local_index], + }; + }, + else => return error.InvalidUnwindInfo, + }; + + if (entry.raw_encoding == 0) return error.NoUnwindInfo; + const reg_context: Dwarf.abi.RegisterContext = .{ .eh_frame = false, .is_macho = true }; + + const encoding: macho.CompactUnwindEncoding = @bitCast(entry.raw_encoding); + const new_ip = switch (builtin.cpu.arch) { + .x86_64 => switch (encoding.mode.x86_64) { + .OLD => return error.UnimplementedUnwindEncoding, + .RBP_FRAME => ip: { + const frame = encoding.value.x86_64.frame; + + const fp = (try regValueNative(context.thread_context, fpRegNum(reg_context), reg_context)).*; + const new_sp = fp + 2 * @sizeOf(usize); + + const ip_ptr = fp + @sizeOf(usize); + const new_ip = @as(*const usize, @ptrFromInt(ip_ptr)).*; + const new_fp = @as(*const usize, @ptrFromInt(fp)).*; + + (try regValueNative(context.thread_context, fpRegNum(reg_context), reg_context)).* = new_fp; + (try regValueNative(context.thread_context, spRegNum(reg_context), reg_context)).* = new_sp; + (try regValueNative(context.thread_context, ip_reg_num, reg_context)).* = new_ip; + + const regs: [5]u3 = .{ + frame.reg0, + frame.reg1, + frame.reg2, + frame.reg3, + frame.reg4, + }; + for (regs, 0..) |reg, i| { + if (reg == 0) continue; + const addr = fp - frame.frame_offset * @sizeOf(usize) + i * @sizeOf(usize); + const reg_number = try Dwarf.compactUnwindToDwarfRegNumber(reg); + (try regValueNative(context.thread_context, reg_number, reg_context)).* = @as(*const usize, @ptrFromInt(addr)).*; + } + + break :ip new_ip; + }, + .STACK_IMMD, + .STACK_IND, + => ip: { + const frameless = encoding.value.x86_64.frameless; + + const sp = (try regValueNative(context.thread_context, spRegNum(reg_context), reg_context)).*; + const stack_size: usize = stack_size: { + if (encoding.mode.x86_64 == .STACK_IMMD) { + break :stack_size @as(usize, frameless.stack.direct.stack_size) * @sizeOf(usize); + } + // In .STACK_IND, the stack size is inferred from the subq instruction at the beginning of the function. + const sub_offset_addr = + text_base + + entry.function_offset + + frameless.stack.indirect.sub_offset; + // `sub_offset_addr` points to the offset of the literal within the instruction + const sub_operand = @as(*align(1) const u32, @ptrFromInt(sub_offset_addr)).*; + break :stack_size sub_operand + @sizeOf(usize) * @as(usize, frameless.stack.indirect.stack_adjust); + }; + + // Decode the Lehmer-coded sequence of registers. + // For a description of the encoding see lib/libc/include/any-macos.13-any/mach-o/compact_unwind_encoding.h + + // Decode the variable-based permutation number into its digits. Each digit represents + // an index into the list of register numbers that weren't yet used in the sequence at + // the time the digit was added. + const reg_count = frameless.stack_reg_count; + const ip_ptr = ip_ptr: { + var digits: [6]u3 = undefined; + var accumulator: usize = frameless.stack_reg_permutation; + var base: usize = 2; + for (0..reg_count) |i| { + const div = accumulator / base; + digits[digits.len - 1 - i] = @intCast(accumulator - base * div); + accumulator = div; + base += 1; + } + + var registers: [6]u3 = undefined; + var used_indices: [6]bool = @splat(false); + for (digits[digits.len - reg_count ..], 0..) |target_unused_index, i| { + var unused_count: u8 = 0; + const unused_index = for (used_indices, 0..) |used, index| { + if (!used) { + if (target_unused_index == unused_count) break index; + unused_count += 1; + } + } else unreachable; + registers[i] = @intCast(unused_index + 1); + used_indices[unused_index] = true; + } + + var reg_addr = sp + stack_size - @sizeOf(usize) * @as(usize, reg_count + 1); + for (0..reg_count) |i| { + const reg_number = try Dwarf.compactUnwindToDwarfRegNumber(registers[i]); + (try regValueNative(context.thread_context, reg_number, reg_context)).* = @as(*const usize, @ptrFromInt(reg_addr)).*; + reg_addr += @sizeOf(usize); + } + + break :ip_ptr reg_addr; + }; + + const new_ip = @as(*const usize, @ptrFromInt(ip_ptr)).*; + const new_sp = ip_ptr + @sizeOf(usize); + + (try regValueNative(context.thread_context, spRegNum(reg_context), reg_context)).* = new_sp; + (try regValueNative(context.thread_context, ip_reg_num, reg_context)).* = new_ip; + + break :ip new_ip; + }, + .DWARF => { + const eh_frame = opt_eh_frame orelse return error.MissingEhFrame; + const eh_frame_vaddr = @intFromPtr(eh_frame.ptr) - load_offset; + return context.unwindFrameDwarf( + &.initSection(.eh_frame, eh_frame_vaddr, eh_frame), + load_offset, + @intCast(encoding.value.x86_64.dwarf), + ); + }, + }, + .aarch64, .aarch64_be => switch (encoding.mode.arm64) { + .OLD => return error.UnimplementedUnwindEncoding, + .FRAMELESS => ip: { + const sp = (try regValueNative(context.thread_context, spRegNum(reg_context), reg_context)).*; + const new_sp = sp + encoding.value.arm64.frameless.stack_size * 16; + const new_ip = (try regValueNative(context.thread_context, 30, reg_context)).*; + (try regValueNative(context.thread_context, spRegNum(reg_context), reg_context)).* = new_sp; + break :ip new_ip; + }, + .DWARF => { + const eh_frame = opt_eh_frame orelse return error.MissingEhFrame; + const eh_frame_vaddr = @intFromPtr(eh_frame.ptr) - load_offset; + return context.unwindFrameDwarf( + &.initSection(.eh_frame, eh_frame_vaddr, eh_frame), + load_offset, + @intCast(encoding.value.x86_64.dwarf), + ); + }, + .FRAME => ip: { + const frame = encoding.value.arm64.frame; + + const fp = (try regValueNative(context.thread_context, fpRegNum(reg_context), reg_context)).*; + const ip_ptr = fp + @sizeOf(usize); + + var reg_addr = fp - @sizeOf(usize); + inline for (@typeInfo(@TypeOf(frame.x_reg_pairs)).@"struct".fields, 0..) |field, i| { + if (@field(frame.x_reg_pairs, field.name) != 0) { + (try regValueNative(context.thread_context, 19 + i, reg_context)).* = @as(*const usize, @ptrFromInt(reg_addr)).*; + reg_addr += @sizeOf(usize); + (try regValueNative(context.thread_context, 20 + i, reg_context)).* = @as(*const usize, @ptrFromInt(reg_addr)).*; + reg_addr += @sizeOf(usize); + } + } + + inline for (@typeInfo(@TypeOf(frame.d_reg_pairs)).@"struct".fields, 0..) |field, i| { + if (@field(frame.d_reg_pairs, field.name) != 0) { + // Only the lower half of the 128-bit V registers are restored during unwinding + { + const dest: *align(1) usize = @ptrCast(try regBytes(context.thread_context, 64 + 8 + i, context.reg_context)); + dest.* = @as(*const usize, @ptrFromInt(reg_addr)).*; + } + reg_addr += @sizeOf(usize); + { + const dest: *align(1) usize = @ptrCast(try regBytes(context.thread_context, 64 + 9 + i, context.reg_context)); + dest.* = @as(*const usize, @ptrFromInt(reg_addr)).*; + } + reg_addr += @sizeOf(usize); + } + } + + const new_ip = @as(*const usize, @ptrFromInt(ip_ptr)).*; + const new_fp = @as(*const usize, @ptrFromInt(fp)).*; + + (try regValueNative(context.thread_context, fpRegNum(reg_context), reg_context)).* = new_fp; + (try regValueNative(context.thread_context, ip_reg_num, reg_context)).* = new_ip; + + break :ip new_ip; + }, + }, + else => comptime unreachable, // unimplemented + }; + + context.pc = UnwindContext.stripInstructionPtrAuthCode(new_ip); + if (context.pc > 0) context.pc -= 1; + return new_ip; +} +/// No cache needed, because `_dyld_get_image_header` etc are already fast. +pub const LookupCache = struct { + pub const init: LookupCache = .{}; +}; +pub const DebugInfo = struct { + unwind: ?struct { + // Backed by the in-memory sections mapped by the loader + unwind_info: ?[]const u8, + eh_frame: ?[]const u8, + }, + // MLUGG TODO: awful field name + full: ?struct { + mapped_memory: []align(std.heap.page_size_min) const u8, + symbols: []const MachoSymbol, + strings: [:0]const u8, + // MLUGG TODO: this could use an adapter to just index straight into `strings`! + ofiles: std.StringArrayHashMapUnmanaged(OFile), + }, + + pub const init: DebugInfo = .{ + .unwind = null, + .full = null, + }; + + const OFile = struct { + dwarf: Dwarf, + // MLUGG TODO: this could use an adapter to just index straight into the strtab! + addr_table: std.StringArrayHashMapUnmanaged(u64), + }; + + fn deinit(di: *DebugInfo, gpa: Allocator) void { + for (di.full.ofiles.values()) |*ofile| { + ofile.dwarf.deinit(gpa); + ofile.addr_table.deinit(gpa); + } + di.full.ofiles.deinit(); + gpa.free(di.full.symbols); + posix.munmap(di.full.mapped_memory); + } + + fn loadOFile(gpa: Allocator, o_file_path: []const u8) !OFile { + const mapped_mem = try mapDebugInfoFile(o_file_path); + errdefer posix.munmap(mapped_mem); + + if (mapped_mem.len < @sizeOf(macho.mach_header_64)) return error.InvalidDebugInfo; + const hdr: *const macho.mach_header_64 = @ptrCast(@alignCast(mapped_mem.ptr)); + if (hdr.magic != std.macho.MH_MAGIC_64) return error.InvalidDebugInfo; + + const seg_cmd: macho.LoadCommandIterator.LoadCommand, const symtab_cmd: macho.symtab_command = cmds: { + var seg_cmd: ?macho.LoadCommandIterator.LoadCommand = null; + var symtab_cmd: ?macho.symtab_command = null; + var it: macho.LoadCommandIterator = .{ + .ncmds = hdr.ncmds, + .buffer = mapped_mem[@sizeOf(macho.mach_header_64)..][0..hdr.sizeofcmds], + }; + while (it.next()) |cmd| switch (cmd.cmd()) { + .SEGMENT_64 => seg_cmd = cmd, + .SYMTAB => symtab_cmd = cmd.cast(macho.symtab_command) orelse return error.InvalidDebugInfo, + else => {}, + }; + break :cmds .{ + seg_cmd orelse return error.MissingDebugInfo, + symtab_cmd orelse return error.MissingDebugInfo, + }; + }; + + if (mapped_mem.len < symtab_cmd.stroff + symtab_cmd.strsize) return error.InvalidDebugInfo; + if (mapped_mem[symtab_cmd.stroff + symtab_cmd.strsize - 1] != 0) return error.InvalidDebugInfo; + const strtab = mapped_mem[symtab_cmd.stroff..][0 .. symtab_cmd.strsize - 1]; + + const n_sym_bytes = symtab_cmd.nsyms * @sizeOf(macho.nlist_64); + if (mapped_mem.len < symtab_cmd.symoff + n_sym_bytes) return error.InvalidDebugInfo; + const symtab: []align(1) const macho.nlist_64 = @ptrCast(mapped_mem[symtab_cmd.symoff..][0..n_sym_bytes]); + + // TODO handle tentative (common) symbols + var addr_table: std.StringArrayHashMapUnmanaged(u64) = .empty; + defer addr_table.deinit(gpa); + try addr_table.ensureUnusedCapacity(gpa, @intCast(symtab.len)); + for (symtab) |sym| { + if (sym.n_strx == 0) continue; + switch (sym.n_type.bits.type) { + .undf => continue, // includes tentative symbols + .abs => continue, + else => {}, + } + const sym_name = mem.sliceTo(strtab[sym.n_strx..], 0); + const gop = addr_table.getOrPutAssumeCapacity(sym_name); + if (gop.found_existing) return error.InvalidDebugInfo; + gop.value_ptr.* = sym.n_value; + } + + var sections: Dwarf.SectionArray = @splat(null); + for (seg_cmd.getSections()) |sect| { + if (!std.mem.eql(u8, "__DWARF", sect.segName())) continue; + + const section_index: usize = inline for (@typeInfo(Dwarf.Section.Id).@"enum".fields, 0..) |section, i| { + if (mem.eql(u8, "__" ++ section.name, sect.sectName())) break i; + } else continue; + + if (mapped_mem.len < sect.offset + sect.size) return error.InvalidDebugInfo; + const section_bytes = mapped_mem[sect.offset..][0..sect.size]; + sections[section_index] = .{ + .data = section_bytes, + .owned = false, + }; + } + + const missing_debug_info = + sections[@intFromEnum(Dwarf.Section.Id.debug_info)] == null or + sections[@intFromEnum(Dwarf.Section.Id.debug_abbrev)] == null or + sections[@intFromEnum(Dwarf.Section.Id.debug_str)] == null or + sections[@intFromEnum(Dwarf.Section.Id.debug_line)] == null; + if (missing_debug_info) return error.MissingDebugInfo; + + var dwarf: Dwarf = .{ .sections = sections }; + errdefer dwarf.deinit(gpa); + try dwarf.open(gpa, native_endian); + + return .{ + .dwarf = dwarf, + .addr_table = addr_table.move(), + }; + } +}; + +const MachoSymbol = struct { + strx: u32, + addr: u64, + size: u32, + ofile: u32, + fn addressLessThan(context: void, lhs: MachoSymbol, rhs: MachoSymbol) bool { + _ = context; + return lhs.addr < rhs.addr; + } + /// Assumes that `symbols` is sorted in order of ascending `addr`. + fn find(symbols: []const MachoSymbol, address: usize) ?*const MachoSymbol { + if (symbols.len == 0) return null; // no potential match + if (address < symbols[0].addr) return null; // address is before the lowest-address symbol + var left: usize = 0; + var len: usize = symbols.len; + while (len > 1) { + const mid = left + len / 2; + if (address < symbols[mid].addr) { + len /= 2; + } else { + left = mid; + len -= len / 2; + } + } + return &symbols[left]; + } + + test find { + const symbols: []const MachoSymbol = &.{ + .{ .addr = 100, .strx = undefined, .size = undefined, .ofile = undefined }, + .{ .addr = 200, .strx = undefined, .size = undefined, .ofile = undefined }, + .{ .addr = 300, .strx = undefined, .size = undefined, .ofile = undefined }, + }; + + try testing.expectEqual(null, find(symbols, 0)); + try testing.expectEqual(null, find(symbols, 99)); + try testing.expectEqual(&symbols[0], find(symbols, 100).?); + try testing.expectEqual(&symbols[0], find(symbols, 150).?); + try testing.expectEqual(&symbols[0], find(symbols, 199).?); + + try testing.expectEqual(&symbols[1], find(symbols, 200).?); + try testing.expectEqual(&symbols[1], find(symbols, 250).?); + try testing.expectEqual(&symbols[1], find(symbols, 299).?); + + try testing.expectEqual(&symbols[2], find(symbols, 300).?); + try testing.expectEqual(&symbols[2], find(symbols, 301).?); + try testing.expectEqual(&symbols[2], find(symbols, 5000).?); + } +}; +test { + _ = MachoSymbol; +} + +fn fpRegNum(reg_context: Dwarf.abi.RegisterContext) u8 { + return Dwarf.abi.fpRegNum(builtin.target.cpu.arch, reg_context); +} +fn spRegNum(reg_context: Dwarf.abi.RegisterContext) u8 { + return Dwarf.abi.spRegNum(builtin.target.cpu.arch, reg_context); +} +const ip_reg_num = Dwarf.abi.ipRegNum(builtin.target.cpu.arch).?; + +/// Uses `mmap` to map the file at `path` into memory. +fn mapDebugInfoFile(path: []const u8) ![]align(std.heap.page_size_min) const u8 { + const file = std.fs.cwd().openFile(path, .{}) catch |err| switch (err) { + error.FileNotFound => return error.MissingDebugInfo, + else => |e| return e, + }; + defer file.close(); + + const file_len = std.math.cast(usize, try file.getEndPos()) orelse return error.InvalidDebugInfo; + + return posix.mmap( + null, + file_len, + posix.PROT.READ, + .{ .TYPE = .SHARED }, + file.handle, + 0, + ); +} + +const DarwinModule = @This(); + +const std = @import("../../std.zig"); +const Allocator = std.mem.Allocator; +const Dwarf = std.debug.Dwarf; +const assert = std.debug.assert; +const macho = std.macho; +const mem = std.mem; +const posix = std.posix; +const testing = std.testing; +const UnwindContext = std.debug.SelfInfo.UnwindContext; +const regBytes = Dwarf.abi.regBytes; +const regValueNative = Dwarf.abi.regValueNative; + +const builtin = @import("builtin"); +const native_endian = builtin.target.cpu.arch.endian(); diff --git a/lib/std/debug/SelfInfo/ElfModule.zig b/lib/std/debug/SelfInfo/ElfModule.zig new file mode 100644 index 0000000000..41ea722962 --- /dev/null +++ b/lib/std/debug/SelfInfo/ElfModule.zig @@ -0,0 +1,144 @@ +load_offset: usize, +name: []const u8, +build_id: ?[]const u8, +gnu_eh_frame: ?[]const u8, + +/// No cache needed, because `dl_iterate_phdr` is already fast. +pub const LookupCache = struct { + pub const init: LookupCache = .{}; +}; + +pub const DebugInfo = struct { + loaded_elf: ?Dwarf.ElfModule, + unwind: ?Dwarf.Unwind, + pub const init: DebugInfo = .{ + .loaded_elf = null, + .unwind = null, + }; +}; + +pub fn key(m: ElfModule) usize { + return m.load_offset; +} +pub fn lookup(cache: *LookupCache, gpa: Allocator, address: usize) !ElfModule { + _ = cache; + _ = gpa; + if (builtin.target.os.tag == .haiku) @panic("TODO implement lookup module for Haiku"); + const DlIterContext = struct { + /// input + address: usize, + /// output + module: ElfModule, + + fn callback(info: *std.posix.dl_phdr_info, size: usize, context: *@This()) !void { + _ = size; + // The base address is too high + if (context.address < info.addr) + return; + + const phdrs = info.phdr[0..info.phnum]; + for (phdrs) |*phdr| { + if (phdr.p_type != elf.PT_LOAD) continue; + + // Overflowing addition is used to handle the case of VSDOs having a p_vaddr = 0xffffffffff700000 + const seg_start = info.addr +% phdr.p_vaddr; + const seg_end = seg_start + phdr.p_memsz; + if (context.address >= seg_start and context.address < seg_end) { + context.module = .{ + .load_offset = info.addr, + // Android libc uses NULL instead of "" to mark the main program + .name = mem.sliceTo(info.name, 0) orelse "", + .build_id = null, + .gnu_eh_frame = null, + }; + break; + } + } else return; + + for (info.phdr[0..info.phnum]) |phdr| { + switch (phdr.p_type) { + elf.PT_NOTE => { + // Look for .note.gnu.build-id + const segment_ptr: [*]const u8 = @ptrFromInt(info.addr + phdr.p_vaddr); + var r: std.Io.Reader = .fixed(segment_ptr[0..phdr.p_memsz]); + const name_size = r.takeInt(u32, native_endian) catch continue; + const desc_size = r.takeInt(u32, native_endian) catch continue; + const note_type = r.takeInt(u32, native_endian) catch continue; + const name = r.take(name_size) catch continue; + if (note_type != elf.NT_GNU_BUILD_ID) continue; + if (!mem.eql(u8, name, "GNU\x00")) continue; + const desc = r.take(desc_size) catch continue; + context.module.build_id = desc; + }, + elf.PT_GNU_EH_FRAME => { + const segment_ptr: [*]const u8 = @ptrFromInt(info.addr + phdr.p_vaddr); + context.module.gnu_eh_frame = segment_ptr[0..phdr.p_memsz]; + }, + else => {}, + } + } + + // Stop the iteration + return error.Found; + } + }; + var ctx: DlIterContext = .{ + .address = address, + .module = undefined, + }; + std.posix.dl_iterate_phdr(&ctx, error{Found}, DlIterContext.callback) catch |err| switch (err) { + error.Found => return ctx.module, + }; + return error.MissingDebugInfo; +} +fn loadLocationInfo(module: *const ElfModule, gpa: Allocator, di: *DebugInfo) !void { + if (module.name.len > 0) { + di.loaded_elf = Dwarf.ElfModule.load(gpa, .{ + .root_dir = .cwd(), + .sub_path = module.name, + }, module.build_id, null, null, null) catch |err| switch (err) { + error.FileNotFound => return error.MissingDebugInfo, + error.Overflow => return error.InvalidDebugInfo, + else => |e| return e, + }; + } else { + const path = try std.fs.selfExePathAlloc(gpa); + defer gpa.free(path); + di.loaded_elf = Dwarf.ElfModule.load(gpa, .{ + .root_dir = .cwd(), + .sub_path = path, + }, module.build_id, null, null, null) catch |err| switch (err) { + error.FileNotFound => return error.MissingDebugInfo, + error.Overflow => return error.InvalidDebugInfo, + else => |e| return e, + }; + } +} +pub fn getSymbolAtAddress(module: *const ElfModule, gpa: Allocator, di: *DebugInfo, address: usize) !std.debug.Symbol { + if (di.loaded_elf == null) try module.loadLocationInfo(gpa, di); + const vaddr = address - module.load_offset; + return di.loaded_elf.?.dwarf.getSymbol(gpa, native_endian, vaddr); +} +fn loadUnwindInfo(module: *const ElfModule, gpa: Allocator, di: *DebugInfo) !void { + const section_bytes = module.gnu_eh_frame orelse return error.MissingUnwindInfo; // MLUGG TODO: load from file + const section_vaddr: u64 = @intFromPtr(section_bytes.ptr) - module.load_offset; + const header: Dwarf.Unwind.EhFrameHeader = try .parse(section_vaddr, section_bytes, @sizeOf(usize), native_endian); + di.unwind = .initEhFrameHdr(header, section_vaddr, @ptrFromInt(module.load_offset + header.eh_frame_vaddr)); + try di.unwind.?.prepareLookup(gpa, @sizeOf(usize), native_endian); +} +pub fn unwindFrame(module: *const ElfModule, gpa: Allocator, di: *DebugInfo, context: *UnwindContext) !usize { + if (di.unwind == null) try module.loadUnwindInfo(gpa, di); + return context.unwindFrameDwarf(&di.unwind.?, module.load_offset, null); +} + +const ElfModule = @This(); + +const std = @import("../../std.zig"); +const Allocator = std.mem.Allocator; +const Dwarf = std.debug.Dwarf; +const elf = std.elf; +const mem = std.mem; +const UnwindContext = std.debug.SelfInfo.UnwindContext; + +const builtin = @import("builtin"); +const native_endian = builtin.target.cpu.arch.endian(); diff --git a/lib/std/debug/SelfInfo/WindowsModule.zig b/lib/std/debug/SelfInfo/WindowsModule.zig new file mode 100644 index 0000000000..ab322c201a --- /dev/null +++ b/lib/std/debug/SelfInfo/WindowsModule.zig @@ -0,0 +1,255 @@ +base_address: usize, +size: usize, +name: []const u8, +handle: windows.HMODULE, +pub fn key(m: WindowsModule) usize { + return m.base_address; +} +pub fn lookup(cache: *LookupCache, gpa: Allocator, address: usize) !WindowsModule { + if (lookupInCache(cache, address)) |m| return m; + { + // Check a new module hasn't been loaded + cache.modules.clearRetainingCapacity(); + + const handle = windows.kernel32.CreateToolhelp32Snapshot(windows.TH32CS_SNAPMODULE | windows.TH32CS_SNAPMODULE32, 0); + if (handle == windows.INVALID_HANDLE_VALUE) { + return windows.unexpectedError(windows.GetLastError()); + } + defer windows.CloseHandle(handle); + + var entry: windows.MODULEENTRY32 = undefined; + entry.dwSize = @sizeOf(windows.MODULEENTRY32); + if (windows.kernel32.Module32First(handle, &entry) != 0) { + try cache.modules.append(gpa, entry); + while (windows.kernel32.Module32Next(handle, &entry) != 0) { + try cache.modules.append(gpa, entry); + } + } + } + if (lookupInCache(cache, address)) |m| return m; + return error.MissingDebugInfo; +} +pub fn getSymbolAtAddress(module: *const WindowsModule, gpa: Allocator, di: *DebugInfo, address: usize) !std.debug.Symbol { + if (!di.loaded) try module.loadLocationInfo(gpa, di); + // Translate the runtime address into a virtual address into the module + const vaddr = address - module.base_address; + + if (di.pdb != null) { + if (try di.getSymbolFromPdb(vaddr)) |symbol| return symbol; + } + + if (di.dwarf) |*dwarf| { + const dwarf_address = vaddr + di.coff_image_base; + return dwarf.getSymbol(gpa, native_endian, dwarf_address); + } + + return error.MissingDebugInfo; +} +fn lookupInCache(cache: *const LookupCache, address: usize) ?WindowsModule { + for (cache.modules.items) |*entry| { + const base_address = @intFromPtr(entry.modBaseAddr); + if (address >= base_address and address < base_address + entry.modBaseSize) { + return .{ + .base_address = base_address, + .size = entry.modBaseSize, + .name = std.mem.sliceTo(&entry.szModule, 0), + .handle = entry.hModule, + }; + } + } + return null; +} +fn loadLocationInfo(module: *const WindowsModule, gpa: Allocator, di: *DebugInfo) !void { + const mapped_ptr: [*]const u8 = @ptrFromInt(module.base_address); + const mapped = mapped_ptr[0..module.size]; + var coff_obj = coff.Coff.init(mapped, true) catch return error.InvalidDebugInfo; + // The string table is not mapped into memory by the loader, so if a section name is in the + // string table then we have to map the full image file from disk. This can happen when + // a binary is produced with -gdwarf, since the section names are longer than 8 bytes. + if (coff_obj.strtabRequired()) { + var name_buffer: [windows.PATH_MAX_WIDE + 4:0]u16 = undefined; + name_buffer[0..4].* = .{ '\\', '?', '?', '\\' }; // openFileAbsoluteW requires the prefix to be present + const process_handle = windows.GetCurrentProcess(); + const len = windows.kernel32.GetModuleFileNameExW( + process_handle, + module.handle, + name_buffer[4..], + windows.PATH_MAX_WIDE, + ); + if (len == 0) return error.MissingDebugInfo; + const coff_file = fs.openFileAbsoluteW(name_buffer[0 .. len + 4 :0], .{}) catch |err| switch (err) { + error.FileNotFound => return error.MissingDebugInfo, + else => |e| return e, + }; + errdefer coff_file.close(); + var section_handle: windows.HANDLE = undefined; + const create_section_rc = windows.ntdll.NtCreateSection( + §ion_handle, + windows.STANDARD_RIGHTS_REQUIRED | windows.SECTION_QUERY | windows.SECTION_MAP_READ, + null, + null, + windows.PAGE_READONLY, + // The documentation states that if no AllocationAttribute is specified, then SEC_COMMIT is the default. + // In practice, this isn't the case and specifying 0 will result in INVALID_PARAMETER_6. + windows.SEC_COMMIT, + coff_file.handle, + ); + if (create_section_rc != .SUCCESS) return error.MissingDebugInfo; + errdefer windows.CloseHandle(section_handle); + var coff_len: usize = 0; + var section_view_ptr: [*]const u8 = undefined; + const map_section_rc = windows.ntdll.NtMapViewOfSection( + section_handle, + process_handle, + @ptrCast(§ion_view_ptr), + null, + 0, + null, + &coff_len, + .ViewUnmap, + 0, + windows.PAGE_READONLY, + ); + if (map_section_rc != .SUCCESS) return error.MissingDebugInfo; + errdefer assert(windows.ntdll.NtUnmapViewOfSection(process_handle, @constCast(section_view_ptr)) == .SUCCESS); + const section_view = section_view_ptr[0..coff_len]; + coff_obj = coff.Coff.init(section_view, false) catch return error.InvalidDebugInfo; + di.mapped_file = .{ + .file = coff_file, + .section_handle = section_handle, + .section_view = section_view, + }; + } + di.coff_image_base = coff_obj.getImageBase(); + + if (coff_obj.getSectionByName(".debug_info")) |_| { + di.dwarf = .{}; + + inline for (@typeInfo(Dwarf.Section.Id).@"enum".fields, 0..) |section, i| { + di.dwarf.?.sections[i] = if (coff_obj.getSectionByName("." ++ section.name)) |section_header| blk: { + break :blk .{ + .data = try coff_obj.getSectionDataAlloc(section_header, gpa), + .owned = true, + }; + } else null; + } + + try di.dwarf.?.open(gpa, native_endian); + } + + if (try coff_obj.getPdbPath()) |raw_path| pdb: { + const path = blk: { + if (fs.path.isAbsolute(raw_path)) { + break :blk raw_path; + } else { + const self_dir = try fs.selfExeDirPathAlloc(gpa); + defer gpa.free(self_dir); + break :blk try fs.path.join(gpa, &.{ self_dir, raw_path }); + } + }; + defer if (path.ptr != raw_path.ptr) gpa.free(path); + + di.pdb = Pdb.init(gpa, path) catch |err| switch (err) { + error.FileNotFound, error.IsDir => break :pdb, + else => return err, + }; + try di.pdb.?.parseInfoStream(); + try di.pdb.?.parseDbiStream(); + + if (!mem.eql(u8, &coff_obj.guid, &di.pdb.?.guid) or coff_obj.age != di.pdb.?.age) + return error.InvalidDebugInfo; + + di.coff_section_headers = try coff_obj.getSectionHeadersAlloc(gpa); + } + + di.loaded = true; +} +pub const LookupCache = struct { + modules: std.ArrayListUnmanaged(windows.MODULEENTRY32), + pub const init: LookupCache = .{ .modules = .empty }; +}; +pub const DebugInfo = struct { + loaded: bool, + + coff_image_base: u64, + mapped_file: ?struct { + file: fs.File, + section_handle: windows.HANDLE, + section_view: []const u8, + fn deinit(mapped: @This()) void { + const process_handle = windows.GetCurrentProcess(); + assert(windows.ntdll.NtUnmapViewOfSection(process_handle, @constCast(mapped.section_view.ptr)) == .SUCCESS); + windows.CloseHandle(mapped.section_handle); + mapped.file.close(); + } + }, + + dwarf: ?Dwarf, + + pdb: ?Pdb, + /// Populated iff `pdb != null`; otherwise `&.{}`. + coff_section_headers: []coff.SectionHeader, + + pub const init: DebugInfo = .{ + .loaded = false, + .coff_image_base = undefined, + .mapped_file = null, + .dwarf = null, + .pdb = null, + .coff_section_headers = &.{}, + }; + + fn deinit(di: *DebugInfo, gpa: Allocator) void { + if (di.dwarf) |*dwarf| dwarf.deinit(gpa); + if (di.pdb) |*pdb| pdb.deinit(); + gpa.free(di.coff_section_headers); + if (di.mapped_file) |mapped| mapped.deinit(); + } + + fn getSymbolFromPdb(di: *DebugInfo, relocated_address: usize) !?std.debug.Symbol { + var coff_section: *align(1) const coff.SectionHeader = undefined; + const mod_index = for (di.pdb.?.sect_contribs) |sect_contrib| { + if (sect_contrib.section > di.coff_section_headers.len) continue; + // Remember that SectionContribEntry.Section is 1-based. + coff_section = &di.coff_section_headers[sect_contrib.section - 1]; + + const vaddr_start = coff_section.virtual_address + sect_contrib.offset; + const vaddr_end = vaddr_start + sect_contrib.size; + if (relocated_address >= vaddr_start and relocated_address < vaddr_end) { + break sect_contrib.module_index; + } + } else { + // we have no information to add to the address + return null; + }; + + const module = try di.pdb.?.getModule(mod_index) orelse return error.InvalidDebugInfo; + + return .{ + .name = di.pdb.?.getSymbolName( + module, + relocated_address - coff_section.virtual_address, + ), + .compile_unit_name = fs.path.basename(module.obj_file_name), + .source_location = try di.pdb.?.getLineNumberInfo( + module, + relocated_address - coff_section.virtual_address, + ), + }; + } +}; + +const WindowsModule = @This(); + +const std = @import("../../std.zig"); +const Allocator = std.mem.Allocator; +const Dwarf = std.debug.Dwarf; +const Pdb = std.debug.Pdb; +const assert = std.debug.assert; +const coff = std.coff; +const fs = std.fs; +const mem = std.mem; +const windows = std.os.windows; + +const builtin = @import("builtin"); +const native_endian = builtin.target.cpu.arch.endian();