diff --git a/lib/std/c.zig b/lib/std/c.zig index d1affab207..4c139d5023 100644 --- a/lib/std/c.zig +++ b/lib/std/c.zig @@ -10994,6 +10994,9 @@ pub extern "c" fn dlclose(handle: *anyopaque) c_int; pub extern "c" fn dlsym(handle: ?*anyopaque, symbol: [*:0]const u8) ?*anyopaque; pub extern "c" fn dlerror() ?[*:0]u8; +pub const dladdr = if (native_os.isDarwin()) darwin.dladdr else {}; +pub const dl_info = if (native_os.isDarwin()) darwin.dl_info else {}; + pub extern "c" fn sync() void; pub extern "c" fn syncfs(fd: c_int) c_int; pub extern "c" fn fsync(fd: c_int) c_int; diff --git a/lib/std/c/darwin.zig b/lib/std/c/darwin.zig index 2ad979ecf2..cf7d3127eb 100644 --- a/lib/std/c/darwin.zig +++ b/lib/std/c/darwin.zig @@ -354,6 +354,14 @@ pub extern "c" fn _dyld_image_count() u32; pub extern "c" fn _dyld_get_image_header(image_index: u32) ?*mach_header; pub extern "c" fn _dyld_get_image_vmaddr_slide(image_index: u32) usize; pub extern "c" fn _dyld_get_image_name(image_index: u32) [*:0]const u8; +pub extern "c" fn dladdr(addr: *const anyopaque, info: *dl_info) c_int; + +pub const dl_info = extern struct { + fname: [*:0]const u8, + fbase: *anyopaque, + sname: ?[*:0]const u8, + saddr: ?*anyopaque, +}; pub const COPYFILE = packed struct(u32) { ACL: bool = false, diff --git a/lib/std/debug.zig b/lib/std/debug.zig index 088152d873..5b7a6bf715 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -585,12 +585,14 @@ pub fn captureCurrentStackTrace(options: StackUnwindOptions, addr_buf: []usize) while (true) switch (it.next()) { .switch_to_fp => if (!it.stratOk(options.allow_unsafe_unwind)) break, .end => break, - .frame => |return_address| { + .frame => |pc_addr| { if (wait_for) |target| { - if (return_address != target) continue; + // Possible off-by-one error: `pc_addr` might be one less than the return address (so + // that it falls *inside* the function call), while `target` *is* a return address. + if (pc_addr != target and pc_addr + 1 != target) continue; wait_for = null; } - if (frame_idx < addr_buf.len) addr_buf[frame_idx] = return_address; + if (frame_idx < addr_buf.len) addr_buf[frame_idx] = pc_addr; frame_idx += 1; }, }; @@ -631,6 +633,7 @@ pub fn writeCurrentStackTrace(options: StackUnwindOptions, writer: *Writer, tty_ var printed_any_frame = false; while (true) switch (it.next()) { .switch_to_fp => |unwind_error| { + if (StackIterator.fp_unwind_is_safe) continue; // no need to even warn const module_name = di.getModuleNameForAddress(di_gpa, unwind_error.address) catch "???"; const caption: []const u8 = switch (unwind_error.err) { error.MissingDebugInfo => "unwind info unavailable", @@ -658,12 +661,14 @@ pub fn writeCurrentStackTrace(options: StackUnwindOptions, writer: *Writer, tty_ } }, .end => break, - .frame => |return_address| { + .frame => |pc_addr| { if (wait_for) |target| { - if (return_address != target) continue; + // Possible off-by-one error: `pc_addr` might be one less than the return address (so + // that it falls *inside* the function call), while `target` *is* a return address. + if (pc_addr != target and pc_addr + 1 != target) continue; wait_for = null; } - try printSourceAtAddress(di_gpa, di, writer, return_address -| 1, tty_config); + try printSourceAtAddress(di_gpa, di, writer, pc_addr, tty_config); printed_any_frame = true; }, }; @@ -703,8 +708,8 @@ pub fn writeStackTrace(st: *const std.builtin.StackTrace, writer: *Writer, tty_c }, }; const captured_frames = @min(n_frames, st.instruction_addresses.len); - for (st.instruction_addresses[0..captured_frames]) |return_address| { - try printSourceAtAddress(di_gpa, di, writer, return_address -| 1, tty_config); + for (st.instruction_addresses[0..captured_frames]) |pc_addr| { + try printSourceAtAddress(di_gpa, di, writer, pc_addr, tty_config); } if (n_frames > captured_frames) { tty_config.setColor(writer, .bold) catch {}; @@ -725,6 +730,8 @@ pub fn dumpStackTrace(st: *const std.builtin.StackTrace) void { const StackIterator = union(enum) { /// Unwinding using debug info (e.g. DWARF CFI). di: if (SelfInfo.supports_unwinding) SelfInfo.UnwindContext else noreturn, + /// We will first report the *current* PC of this `UnwindContext`, then we will switch to `di`. + di_first: if (SelfInfo.supports_unwinding) SelfInfo.UnwindContext else noreturn, /// Naive frame-pointer-based unwinding. Very simple, but typically unreliable. fp: usize, @@ -742,9 +749,12 @@ const StackIterator = union(enum) { } if (opt_context_ptr) |context_ptr| { if (!SelfInfo.supports_unwinding) return error.CannotUnwindFromContext; - return .{ .di = .init(context_ptr) }; + // Use `di_first` here so we report the PC in the context before unwinding any further. + return .{ .di_first = .init(context_ptr) }; } if (SelfInfo.supports_unwinding and cpu_context.Native != noreturn) { + // We don't need `di_first` here, because our PC is in `std.debug`; we're only interested + // in our caller's frame and above. return .{ .di = .init(&.current()) }; } return .{ .fp = @frameAddress() }; @@ -752,7 +762,7 @@ const StackIterator = union(enum) { fn deinit(si: *StackIterator) void { switch (si.*) { .fp => {}, - .di => |*unwind_context| unwind_context.deinit(getDebugInfoAllocator()), + .di, .di_first => |*unwind_context| unwind_context.deinit(getDebugInfoAllocator()), } } @@ -763,7 +773,7 @@ const StackIterator = union(enum) { /// Whether the current unwind strategy is allowed given `allow_unsafe`. fn stratOk(it: *const StackIterator, allow_unsafe: bool) bool { return switch (it.*) { - .di => true, + .di, .di_first => true, // If we omitted frame pointers from *this* compilation, FP unwinding would crash // immediately regardless of anything. But FPs could also be omitted from a different // linked object, so it's not guaranteed to be safe, unless the target specifically @@ -773,11 +783,11 @@ const StackIterator = union(enum) { } const Result = union(enum) { - /// A stack frame has been found; this is the corresponding return address. + /// A stack frame has been found; this is the corresponding program counter address. frame: usize, /// The end of the stack has been reached. end, - /// We were using the `.di` strategy, but are now switching to `.fp` due to this error. + /// We were using `SelfInfo.UnwindInfo`, but are now switching to FP unwinding due to this error. switch_to_fp: struct { address: usize, err: SelfInfo.Error, @@ -785,20 +795,25 @@ const StackIterator = union(enum) { }; fn next(it: *StackIterator) Result { switch (it.*) { + .di_first => |unwind_context| { + const first_pc = unwind_context.pc; + if (first_pc == 0) return .end; + it.* = .{ .di = unwind_context }; + return .{ .frame = first_pc }; + }, .di => |*unwind_context| { const di = getSelfDebugInfo() catch unreachable; const di_gpa = getDebugInfoAllocator(); - if (di.unwindFrame(di_gpa, unwind_context)) |ra| { - if (ra <= 1) return .end; - return .{ .frame = ra }; - } else |err| { + di.unwindFrame(di_gpa, unwind_context) catch |err| { const pc = unwind_context.pc; it.* = .{ .fp = unwind_context.getFp() }; return .{ .switch_to_fp = .{ .address = pc, .err = err, } }; - } + }; + const pc = unwind_context.pc; + return if (pc == 0) .end else .{ .frame = pc }; }, .fp => |fp| { if (fp == 0) return .end; // we reached the "sentinel" base pointer @@ -824,9 +839,9 @@ const StackIterator = union(enum) { if (bp != 0 and bp <= fp) return .end; it.fp = bp; - const ra = ra_ptr.*; + const ra = stripInstructionPtrAuthCode(ra_ptr.*); if (ra <= 1) return .end; - return .{ .frame = ra }; + return .{ .frame = ra - 1 }; }, } } @@ -860,6 +875,26 @@ const StackIterator = union(enum) { } }; +/// Some platforms use pointer authentication: the upper bits of instruction pointers contain a +/// signature. This function clears those signature bits to make the pointer directly usable. +pub inline fn stripInstructionPtrAuthCode(ptr: usize) usize { + if (native_arch.isAARCH64()) { + // `hint 0x07` maps to `xpaclri` (or `nop` if the hardware doesn't support it) + // The save / restore is because `xpaclri` operates on x30 (LR) + return asm ( + \\mov x16, x30 + \\mov x30, x15 + \\hint 0x07 + \\mov x15, x30 + \\mov x30, x16 + : [ret] "={x15}" (-> usize), + : [ptr] "{x15}" (ptr), + : .{ .x16 = true }); + } + + return ptr; +} + fn printSourceAtAddress(gpa: Allocator, debug_info: *SelfInfo, writer: *Writer, address: usize, tty_config: tty.Config) Writer.Error!void { const symbol: Symbol = debug_info.getSymbolAtAddress(gpa, address) catch |err| switch (err) { error.MissingDebugInfo, diff --git a/lib/std/debug/SelfInfo.zig b/lib/std/debug/SelfInfo.zig index 321e67bb7c..efa9d782f6 100644 --- a/lib/std/debug/SelfInfo.zig +++ b/lib/std/debug/SelfInfo.zig @@ -2,7 +2,6 @@ //! goal of minimal code bloat and compilation speed penalty. const builtin = @import("builtin"); -const native_os = builtin.os.tag; const native_endian = native_arch.endian(); const native_arch = builtin.cpu.arch; @@ -13,6 +12,8 @@ const assert = std.debug.assert; const Dwarf = std.debug.Dwarf; const CpuContext = std.debug.cpu_context.Native; +const stripInstructionPtrAuthCode = std.debug.stripInstructionPtrAuthCode; + const root = @import("root"); const SelfInfo = @This(); @@ -52,7 +53,7 @@ pub fn deinit(self: *SelfInfo, gpa: Allocator) void { if (Module.LookupCache != void) self.lookup_cache.deinit(gpa); } -pub fn unwindFrame(self: *SelfInfo, gpa: Allocator, context: *UnwindContext) Error!usize { +pub fn unwindFrame(self: *SelfInfo, gpa: Allocator, context: *UnwindContext) Error!void { comptime assert(supports_unwinding); const module: Module = try .lookup(&self.lookup_cache, gpa, context.pc); const gop = try self.modules.getOrPut(gpa, module.key()); @@ -115,7 +116,7 @@ pub fn getModuleNameForAddress(self: *SelfInfo, gpa: Allocator, address: usize) /// pub const supports_unwinding: bool; /// /// Only required if `supports_unwinding == true`. /// pub const UnwindContext = struct { -/// /// A PC value inside the function of the last unwound frame. +/// /// A PC value representing the location in the last frame. /// pc: usize, /// pub fn init(ctx: *std.debug.cpu_context.Native, gpa: Allocator) Allocator.Error!UnwindContext; /// pub fn deinit(uc: *UnwindContext, gpa: Allocator) void; @@ -123,21 +124,22 @@ pub fn getModuleNameForAddress(self: *SelfInfo, gpa: Allocator, address: usize) /// /// pointer is unknown, 0 may be returned instead. /// pub fn getFp(uc: *UnwindContext) usize; /// }; -/// /// Only required if `supports_unwinding == true`. Unwinds a single stack frame and returns -/// /// the next return address (which may be 0 indicating end of stack). +/// /// Only required if `supports_unwinding == true`. Unwinds a single stack frame. +/// /// The caller will read the new instruction poiter from the `pc` field. +/// /// `pc = 0` indicates end of stack / no more frames. /// pub fn unwindFrame( /// mod: *const Module, /// gpa: Allocator, /// di: *DebugInfo, /// ctx: *UnwindContext, -/// ) SelfInfo.Error!usize; +/// ) SelfInfo.Error!void; /// ``` const Module: type = Module: { // Allow overriding the target-specific `SelfInfo` implementation by exposing `root.debug.Module`. if (@hasDecl(root, "debug") and @hasDecl(root.debug, "Module")) { break :Module root.debug.Module; } - break :Module switch (native_os) { + break :Module switch (builtin.os.tag) { .linux, .netbsd, .freebsd, @@ -222,7 +224,7 @@ pub const DwarfUnwindContext = struct { const register = col.register orelse return error.InvalidRegister; // The default type is usually undefined, but can be overriden by ABI authors. // See the doc comment on `Dwarf.Unwind.VirtualMachine.RegisterRule.default`. - if (builtin.cpu.arch.isAARCH64() and register >= 19 and register <= 18) { + if (builtin.cpu.arch.isAARCH64() and register >= 19 and register <= 28) { // Callee-saved registers are initialized as if they had the .same_value rule const src = try context.cpu_context.dwarfRegisterBytes(register); if (src.len != out.len) return error.RegisterSizeMismatch; @@ -310,7 +312,7 @@ pub const DwarfUnwindContext = struct { unwind: *const Dwarf.Unwind, load_offset: usize, explicit_fde_offset: ?usize, - ) Error!usize { + ) Error!void { return unwindFrameInner(context, gpa, unwind, load_offset, explicit_fde_offset) catch |err| switch (err) { error.InvalidDebugInfo, error.MissingDebugInfo, error.OutOfMemory => |e| return e, @@ -358,9 +360,10 @@ pub const DwarfUnwindContext = struct { unwind: *const Dwarf.Unwind, load_offset: usize, explicit_fde_offset: ?usize, - ) !usize { - if (!supports_unwinding) return error.UnsupportedCpuArchitecture; - if (context.pc == 0) return 0; + ) !void { + comptime assert(supports_unwinding); + + if (context.pc == 0) return; const pc_vaddr = context.pc - load_offset; @@ -430,12 +433,12 @@ pub const DwarfUnwindContext = struct { } } - const return_address: u64 = if (has_return_address) pc: { + const return_address: usize = if (has_return_address) pc: { const raw_ptr = try regNative(&new_cpu_context, cie.return_address_register); break :pc stripInstructionPtrAuthCode(raw_ptr.*); } else 0; - (try regNative(new_cpu_context, ip_reg_num)).* = return_address; + (try regNative(&new_cpu_context, ip_reg_num)).* = return_address; // The new CPU context is complete; flush changes. context.cpu_context = new_cpu_context; @@ -444,11 +447,9 @@ pub const DwarfUnwindContext = struct { // *after* the call, it could (in the case of noreturn functions) actually point outside of // the caller's address range, meaning an FDE lookup would fail. We can handle this by // subtracting 1 from `return_address` so that the next lookup is guaranteed to land inside - // the `call` instruction`. The exception to this rule is signal frames, where the return + // the `call` instruction. The exception to this rule is signal frames, where the return // address is the same instruction that triggered the handler. context.pc = if (cie.is_signal_frame) return_address else return_address -| 1; - - return return_address; } /// Since register rules are applied (usually) during a panic, /// checked addition / subtraction is used so that we can return @@ -459,25 +460,6 @@ pub const DwarfUnwindContext = struct { else try std.math.sub(usize, base, @as(usize, @intCast(-offset))); } - /// Some platforms use pointer authentication - the upper bits of instruction pointers contain a signature. - /// This function clears these signature bits to make the pointer usable. - pub inline fn stripInstructionPtrAuthCode(ptr: usize) usize { - if (native_arch.isAARCH64()) { - // `hint 0x07` maps to `xpaclri` (or `nop` if the hardware doesn't support it) - // The save / restore is because `xpaclri` operates on x30 (LR) - return asm ( - \\mov x16, x30 - \\mov x30, x15 - \\hint 0x07 - \\mov x15, x30 - \\mov x30, x16 - : [ret] "={x15}" (-> usize), - : [ptr] "{x15}" (ptr), - : .{ .x16 = true }); - } - - return ptr; - } pub fn regNative(ctx: *CpuContext, num: u16) error{ InvalidRegister, diff --git a/lib/std/debug/SelfInfo/DarwinModule.zig b/lib/std/debug/SelfInfo/DarwinModule.zig index fc2f1c89bb..e3cbeb7edd 100644 --- a/lib/std/debug/SelfInfo/DarwinModule.zig +++ b/lib/std/debug/SelfInfo/DarwinModule.zig @@ -1,6 +1,5 @@ /// The runtime address where __TEXT is loaded. text_base: usize, -load_offset: usize, name: []const u8, pub fn key(m: *const DarwinModule) usize { @@ -12,38 +11,14 @@ pub const LookupCache = void; pub fn lookup(cache: *LookupCache, gpa: Allocator, address: usize) Error!DarwinModule { _ = cache; _ = gpa; - const image_count = std.c._dyld_image_count(); - for (0..image_count) |image_idx| { - const header = std.c._dyld_get_image_header(@intCast(image_idx)) orelse continue; - const text_base = @intFromPtr(header); - if (address < text_base) continue; - const load_offset = std.c._dyld_get_image_vmaddr_slide(@intCast(image_idx)); - - // Find the __TEXT segment - var it: macho.LoadCommandIterator = .{ - .ncmds = header.ncmds, - .buffer = @as([*]u8, @ptrCast(header))[@sizeOf(macho.mach_header_64)..][0..header.sizeofcmds], - }; - const text_segment_cmd = while (it.next()) |load_cmd| { - if (load_cmd.cmd() != .SEGMENT_64) continue; - const segment_cmd = load_cmd.cast(macho.segment_command_64).?; - if (!mem.eql(u8, segment_cmd.segName(), "__TEXT")) continue; - break segment_cmd; - } else continue; - - const seg_start = load_offset + text_segment_cmd.vmaddr; - assert(seg_start == text_base); - const seg_end = seg_start + text_segment_cmd.vmsize; - if (address < seg_start or address >= seg_end) continue; - - // We've found the matching __TEXT segment. This is the image we need. - return .{ - .text_base = text_base, - .load_offset = load_offset, - .name = mem.span(std.c._dyld_get_image_name(@intCast(image_idx))), - }; + var info: std.c.dl_info = undefined; + switch (std.c.dladdr(@ptrFromInt(address), &info)) { + 0 => return error.MissingDebugInfo, + else => return .{ + .name = std.mem.span(info.fname), + .text_base = @intFromPtr(info.fbase), + }, } - return error.MissingDebugInfo; } fn loadUnwindInfo(module: *const DarwinModule) DebugInfo.Unwind { const header: *std.macho.mach_header = @ptrFromInt(module.text_base); @@ -52,56 +27,115 @@ fn loadUnwindInfo(module: *const DarwinModule) DebugInfo.Unwind { .ncmds = header.ncmds, .buffer = @as([*]u8, @ptrCast(header))[@sizeOf(macho.mach_header_64)..][0..header.sizeofcmds], }; - const sections = while (it.next()) |load_cmd| { + const sections, const text_vmaddr = while (it.next()) |load_cmd| { if (load_cmd.cmd() != .SEGMENT_64) continue; const segment_cmd = load_cmd.cast(macho.segment_command_64).?; if (!mem.eql(u8, segment_cmd.segName(), "__TEXT")) continue; - break load_cmd.getSections(); + break .{ load_cmd.getSections(), segment_cmd.vmaddr }; } else unreachable; + const vmaddr_slide = module.text_base - text_vmaddr; + var unwind_info: ?[]const u8 = null; var eh_frame: ?[]const u8 = null; for (sections) |sect| { if (mem.eql(u8, sect.sectName(), "__unwind_info")) { - const sect_ptr: [*]u8 = @ptrFromInt(@as(usize, @intCast(module.load_offset + sect.addr))); + const sect_ptr: [*]u8 = @ptrFromInt(@as(usize, @intCast(vmaddr_slide + sect.addr))); unwind_info = sect_ptr[0..@intCast(sect.size)]; } else if (mem.eql(u8, sect.sectName(), "__eh_frame")) { - const sect_ptr: [*]u8 = @ptrFromInt(@as(usize, @intCast(module.load_offset + sect.addr))); + const sect_ptr: [*]u8 = @ptrFromInt(@as(usize, @intCast(vmaddr_slide + sect.addr))); eh_frame = sect_ptr[0..@intCast(sect.size)]; } } return .{ + .vmaddr_slide = vmaddr_slide, .unwind_info = unwind_info, .eh_frame = eh_frame, }; } fn loadMachO(module: *const DarwinModule, gpa: Allocator) !DebugInfo.LoadedMachO { - const mapped_mem = try mapDebugInfoFile(module.name); - errdefer posix.munmap(mapped_mem); + const all_mapped_memory = try mapDebugInfoFile(module.name); + errdefer posix.munmap(all_mapped_memory); - const hdr: *const macho.mach_header_64 = @ptrCast(@alignCast(mapped_mem.ptr)); + // In most cases, the file we just mapped is a Mach-O binary. However, it could be a "universal + // binary": a simple file format which contains Mach-O binaries for multiple targets. For + // instance, `/usr/lib/dyld` is currently distributed as a universal binary containing images + // for both ARM64 Macs and x86_64 Macs. + if (all_mapped_memory.len < 4) return error.InvalidDebugInfo; + const magic = @as(*const u32, @ptrCast(all_mapped_memory.ptr)).*; + // The contents of a Mach-O file, which may or may not be the whole of `all_mapped_memory`. + const mapped_macho = switch (magic) { + macho.MH_MAGIC_64 => all_mapped_memory, + + macho.FAT_CIGAM => mapped_macho: { + // This is the universal binary format (aka a "fat binary"). Annoyingly, the whole thing + // is big-endian, so we'll be swapping some bytes. + if (all_mapped_memory.len < @sizeOf(macho.fat_header)) return error.InvalidDebugInfo; + const hdr: *const macho.fat_header = @ptrCast(all_mapped_memory.ptr); + const archs_ptr: [*]const macho.fat_arch = @ptrCast(all_mapped_memory.ptr + @sizeOf(macho.fat_header)); + const archs: []const macho.fat_arch = archs_ptr[0..@byteSwap(hdr.nfat_arch)]; + const native_cpu_type = switch (builtin.cpu.arch) { + .x86_64 => macho.CPU_TYPE_X86_64, + .aarch64 => macho.CPU_TYPE_ARM64, + else => comptime unreachable, + }; + for (archs) |*arch| { + if (@byteSwap(arch.cputype) != native_cpu_type) continue; + const offset = @byteSwap(arch.offset); + const size = @byteSwap(arch.size); + break :mapped_macho all_mapped_memory[offset..][0..size]; + } + // Our native architecture was not present in the fat binary. + return error.MissingDebugInfo; + }, + + // Even on modern 64-bit targets, this format doesn't seem to be too extensively used. It + // will be fairly easy to add support here if necessary; it's very similar to above. + macho.FAT_CIGAM_64 => return error.UnsupportedDebugInfo, + + else => return error.InvalidDebugInfo, + }; + + const hdr: *const macho.mach_header_64 = @ptrCast(@alignCast(mapped_macho.ptr)); if (hdr.magic != macho.MH_MAGIC_64) return error.InvalidDebugInfo; - const symtab: macho.symtab_command = symtab: { + const symtab: macho.symtab_command, const text_vmaddr: u64 = lc_iter: { var it: macho.LoadCommandIterator = .{ .ncmds = hdr.ncmds, - .buffer = mapped_mem[@sizeOf(macho.mach_header_64)..][0..hdr.sizeofcmds], + .buffer = mapped_macho[@sizeOf(macho.mach_header_64)..][0..hdr.sizeofcmds], }; + var symtab: ?macho.symtab_command = null; + var text_vmaddr: ?u64 = null; while (it.next()) |cmd| switch (cmd.cmd()) { - .SYMTAB => break :symtab cmd.cast(macho.symtab_command) orelse return error.InvalidDebugInfo, + .SYMTAB => symtab = cmd.cast(macho.symtab_command) orelse return error.InvalidDebugInfo, + .SEGMENT_64 => if (cmd.cast(macho.segment_command_64)) |seg_cmd| { + if (!mem.eql(u8, seg_cmd.segName(), "__TEXT")) continue; + text_vmaddr = seg_cmd.vmaddr; + }, else => {}, }; - return error.MissingDebugInfo; + break :lc_iter .{ + symtab orelse return error.MissingDebugInfo, + text_vmaddr orelse return error.MissingDebugInfo, + }; }; - const syms_ptr: [*]align(1) const macho.nlist_64 = @ptrCast(mapped_mem[symtab.symoff..]); + const syms_ptr: [*]align(1) const macho.nlist_64 = @ptrCast(mapped_macho[symtab.symoff..]); const syms = syms_ptr[0..symtab.nsyms]; - const strings = mapped_mem[symtab.stroff..][0 .. symtab.strsize - 1 :0]; + const strings = mapped_macho[symtab.stroff..][0 .. symtab.strsize - 1]; var symbols: std.ArrayList(MachoSymbol) = try .initCapacity(gpa, syms.len); defer symbols.deinit(gpa); + // This map is temporary; it is used only to detect duplicates here. This is + // necessary because we prefer to use STAB ("symbolic debugging table") symbols, + // but they might not be present, so we track normal symbols too. + // Indices match 1-1 with those of `symbols`. + var symbol_names: std.StringArrayHashMapUnmanaged(void) = .empty; + defer symbol_names.deinit(gpa); + try symbol_names.ensureUnusedCapacity(gpa, syms.len); + var ofile: u32 = undefined; var last_sym: MachoSymbol = undefined; var state: enum { @@ -115,7 +149,25 @@ fn loadMachO(module: *const DarwinModule, gpa: Allocator) !DebugInfo.LoadedMachO } = .init; for (syms) |*sym| { - if (sym.n_type.bits.is_stab == 0) continue; + if (sym.n_type.bits.is_stab == 0) { + if (sym.n_strx == 0) continue; + switch (sym.n_type.bits.type) { + .undf, .pbud, .indr, .abs, _ => continue, + .sect => { + const name = std.mem.sliceTo(strings[sym.n_strx..], 0); + const gop = symbol_names.getOrPutAssumeCapacity(name); + if (!gop.found_existing) { + assert(gop.index == symbols.items.len); + symbols.appendAssumeCapacity(.{ + .strx = sym.n_strx, + .addr = sym.n_value, + .ofile = MachoSymbol.unknown_ofile, + }); + } + }, + } + continue; + } // TODO handle globals N_GSYM, and statics N_STSYM switch (sym.n_type.stab) { @@ -132,7 +184,6 @@ fn loadMachO(module: *const DarwinModule, gpa: Allocator) !DebugInfo.LoadedMachO last_sym = .{ .strx = 0, .addr = sym.n_value, - .size = 0, .ofile = ofile, }; }, @@ -145,14 +196,22 @@ fn loadMachO(module: *const DarwinModule, gpa: Allocator) !DebugInfo.LoadedMachO }, .fun_strx => { state = .fun_size; - last_sym.size = @intCast(sym.n_value); }, else => return error.InvalidDebugInfo, }, .ensym => switch (state) { .fun_size => { state = .ensym; - symbols.appendAssumeCapacity(last_sym); + if (last_sym.strx != 0) { + const name = std.mem.sliceTo(strings[sym.n_strx..], 0); + const gop = symbol_names.getOrPutAssumeCapacity(name); + if (!gop.found_existing) { + assert(gop.index == symbols.items.len); + symbols.appendAssumeCapacity(last_sym); + } else { + symbols.items[gop.index] = last_sym; + } + } }, else => return error.InvalidDebugInfo, }, @@ -168,9 +227,12 @@ fn loadMachO(module: *const DarwinModule, gpa: Allocator) !DebugInfo.LoadedMachO } switch (state) { - .init => return error.MissingDebugInfo, + .init => { + // Missing STAB symtab entries is still okay, unless there were also no normal symbols. + if (symbols.items.len == 0) return error.MissingDebugInfo; + }, .oso_close => {}, - else => return error.InvalidDebugInfo, + else => return error.InvalidDebugInfo, // corrupted STAB entries in symtab } const symbols_slice = try symbols.toOwnedSlice(gpa); @@ -182,10 +244,11 @@ fn loadMachO(module: *const DarwinModule, gpa: Allocator) !DebugInfo.LoadedMachO mem.sort(MachoSymbol, symbols_slice, {}, MachoSymbol.addressLessThan); return .{ - .mapped_memory = mapped_mem, + .mapped_memory = all_mapped_memory, .symbols = symbols_slice, .strings = strings, .ofiles = .empty, + .vaddr_offset = module.text_base - text_vmaddr, }; } pub fn getSymbolAtAddress(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo, address: usize) Error!std.debug.Symbol { @@ -195,7 +258,7 @@ pub fn getSymbolAtAddress(module: *const DarwinModule, gpa: Allocator, di: *Debu }; const loaded_macho = &di.loaded_macho.?; - const vaddr = address - module.load_offset; + const vaddr = address - loaded_macho.vaddr_offset; const symbol = MachoSymbol.find(loaded_macho.symbols, vaddr) orelse return .unknown; // offset of `address` from start of `symbol` @@ -212,6 +275,11 @@ pub fn getSymbolAtAddress(module: *const DarwinModule, gpa: Allocator, di: *Debu .source_location = null, }; + if (symbol.ofile == MachoSymbol.unknown_ofile) { + // We don't have STAB info, so can't track down the object file; all we can do is the symbol name. + return sym_only_result; + } + const o_file: *DebugInfo.OFile = of: { const gop = try loaded_macho.ofiles.getOrPut(gpa, symbol.ofile); if (!gop.found_existing) { @@ -233,7 +301,7 @@ pub fn getSymbolAtAddress(module: *const DarwinModule, gpa: Allocator, di: *Debu const compile_unit = o_file.dwarf.findCompileUnit(native_endian, symbol_ofile_vaddr) catch return sym_only_result; return .{ - .name = o_file.dwarf.getSymbolName(symbol_ofile_vaddr) orelse stab_symbol, + .name = o_file.dwarf.getSymbolName(symbol_ofile_vaddr + address_symbol_offset) orelse stab_symbol, .compile_unit_name = compile_unit.die.getAttrString( &o_file.dwarf, native_endian, @@ -256,7 +324,7 @@ pub const UnwindContext = std.debug.SelfInfo.DwarfUnwindContext; /// Unwind a frame using MachO compact unwind info (from __unwind_info). /// If the compact encoding can't encode a way to unwind a frame, it will /// defer unwinding to DWARF, in which case `.eh_frame` will be used if available. -pub fn unwindFrame(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo, context: *UnwindContext) Error!usize { +pub fn unwindFrame(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo, context: *UnwindContext) Error!void { return unwindFrameInner(module, gpa, di, context) catch |err| switch (err) { error.InvalidDebugInfo, error.MissingDebugInfo, @@ -272,7 +340,7 @@ pub fn unwindFrame(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo, => return error.InvalidDebugInfo, }; } -fn unwindFrameInner(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo, context: *UnwindContext) !usize { +fn unwindFrameInner(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo, context: *UnwindContext) !void { if (di.unwind == null) di.unwind = module.loadUnwindInfo(); const unwind = &di.unwind.?; @@ -500,11 +568,11 @@ fn unwindFrameInner(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo, }, .DWARF => { const eh_frame = unwind.eh_frame orelse return error.MissingDebugInfo; - const eh_frame_vaddr = @intFromPtr(eh_frame.ptr) - module.load_offset; + const eh_frame_vaddr = @intFromPtr(eh_frame.ptr) - unwind.vmaddr_slide; return context.unwindFrame( gpa, &.initSection(.eh_frame, eh_frame_vaddr, eh_frame), - module.load_offset, + unwind.vmaddr_slide, @intCast(encoding.value.x86_64.dwarf), ); }, @@ -520,11 +588,11 @@ fn unwindFrameInner(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo, }, .DWARF => { const eh_frame = unwind.eh_frame orelse return error.MissingDebugInfo; - const eh_frame_vaddr = @intFromPtr(eh_frame.ptr) - module.load_offset; + const eh_frame_vaddr = @intFromPtr(eh_frame.ptr) - unwind.vmaddr_slide; return context.unwindFrame( gpa, &.initSection(.eh_frame, eh_frame_vaddr, eh_frame), - module.load_offset, + unwind.vmaddr_slide, @intCast(encoding.value.x86_64.dwarf), ); }, @@ -572,9 +640,7 @@ fn unwindFrameInner(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo, else => comptime unreachable, // unimplemented }; - context.pc = UnwindContext.stripInstructionPtrAuthCode(new_ip); - if (context.pc > 0) context.pc -= 1; - return new_ip; + context.pc = std.debug.stripInstructionPtrAuthCode(new_ip) -| 1; } pub const DebugInfo = struct { unwind: ?Unwind, @@ -590,6 +656,7 @@ pub const DebugInfo = struct { for (loaded_macho.ofiles.values()) |*ofile| { ofile.dwarf.deinit(gpa); ofile.symbols_by_name.deinit(gpa); + posix.munmap(ofile.mapped_memory); } loaded_macho.ofiles.deinit(gpa); gpa.free(loaded_macho.symbols); @@ -598,6 +665,9 @@ pub const DebugInfo = struct { } const Unwind = struct { + /// The slide applied to the following sections. So, `unwind_info.ptr` is this many bytes + /// higher than the vmaddr of `__unwind_info`, and likewise for `__eh_frame`. + vmaddr_slide: u64, // Backed by the in-memory sections mapped by the loader unwind_info: ?[]const u8, eh_frame: ?[]const u8, @@ -606,21 +676,31 @@ pub const DebugInfo = struct { const LoadedMachO = struct { mapped_memory: []align(std.heap.page_size_min) const u8, symbols: []const MachoSymbol, - strings: [:0]const u8, + strings: []const u8, /// Key is index into `strings` of the file path. ofiles: std.AutoArrayHashMapUnmanaged(u32, OFile), + /// This is not necessarily the same as the vmaddr_slide that dyld would report. This is + /// because the segments in the file on disk might differ from the ones in memory. Normally + /// we wouldn't necessarily expect that to work, but /usr/lib/dyld is incredibly annoying: + /// it exists on disk (necessarily, because the kernel needs to load it!), but is also in + /// the dyld cache (dyld actually restart itself from cache after loading it), and the two + /// versions have (very) different segment base addresses. It's sort of like a large slide + /// has been applied to all addresses in memory. For an optimal experience, we consider the + /// on-disk vmaddr instead of the in-memory one. + vaddr_offset: usize, }; const OFile = struct { + mapped_memory: []align(std.heap.page_size_min) const u8, dwarf: Dwarf, - strtab: [:0]const u8, + strtab: []const u8, symtab: []align(1) const macho.nlist_64, /// All named symbols in `symtab`. Stored `u32` key is the index into `symtab`. Accessed /// through `SymbolAdapter`, so that the symbol name is used as the logical key. symbols_by_name: std.ArrayHashMapUnmanaged(u32, void, void, true), const SymbolAdapter = struct { - strtab: [:0]const u8, + strtab: []const u8, symtab: []align(1) const macho.nlist_64, pub fn hash(ctx: SymbolAdapter, sym_name: []const u8) u32 { _ = ctx; @@ -663,7 +743,7 @@ pub const DebugInfo = struct { if (mapped_mem.len < symtab_cmd.stroff + symtab_cmd.strsize) return error.InvalidDebugInfo; if (mapped_mem[symtab_cmd.stroff + symtab_cmd.strsize - 1] != 0) return error.InvalidDebugInfo; - const strtab = mapped_mem[symtab_cmd.stroff..][0 .. symtab_cmd.strsize - 1 :0]; + const strtab = mapped_mem[symtab_cmd.stroff..][0 .. symtab_cmd.strsize - 1]; const n_sym_bytes = symtab_cmd.nsyms * @sizeOf(macho.nlist_64); if (mapped_mem.len < symtab_cmd.symoff + n_sym_bytes) return error.InvalidDebugInfo; @@ -717,6 +797,7 @@ pub const DebugInfo = struct { try dwarf.open(gpa, native_endian); return .{ + .mapped_memory = mapped_mem, .dwarf = dwarf, .strtab = strtab, .symtab = symtab, @@ -728,8 +809,9 @@ pub const DebugInfo = struct { const MachoSymbol = struct { strx: u32, addr: u64, - size: u32, + /// Value may be `unknown_ofile`. ofile: u32, + const unknown_ofile = std.math.maxInt(u32); fn addressLessThan(context: void, lhs: MachoSymbol, rhs: MachoSymbol) bool { _ = context; return lhs.addr < rhs.addr; @@ -754,9 +836,9 @@ const MachoSymbol = struct { test find { const symbols: []const MachoSymbol = &.{ - .{ .addr = 100, .strx = undefined, .size = undefined, .ofile = undefined }, - .{ .addr = 200, .strx = undefined, .size = undefined, .ofile = undefined }, - .{ .addr = 300, .strx = undefined, .size = undefined, .ofile = undefined }, + .{ .addr = 100, .strx = undefined, .ofile = undefined }, + .{ .addr = 200, .strx = undefined, .ofile = undefined }, + .{ .addr = 300, .strx = undefined, .ofile = undefined }, }; try testing.expectEqual(null, find(symbols, 0)); diff --git a/lib/std/debug/SelfInfo/ElfModule.zig b/lib/std/debug/SelfInfo/ElfModule.zig index fde61d8140..e080665497 100644 --- a/lib/std/debug/SelfInfo/ElfModule.zig +++ b/lib/std/debug/SelfInfo/ElfModule.zig @@ -230,7 +230,7 @@ fn loadUnwindInfo(module: *const ElfModule, gpa: Allocator, di: *DebugInfo) Erro else => unreachable, } } -pub fn unwindFrame(module: *const ElfModule, gpa: Allocator, di: *DebugInfo, context: *UnwindContext) Error!usize { +pub fn unwindFrame(module: *const ElfModule, gpa: Allocator, di: *DebugInfo, context: *UnwindContext) Error!void { if (di.unwind[0] == null) try module.loadUnwindInfo(gpa, di); std.debug.assert(di.unwind[0] != null); for (&di.unwind) |*opt_unwind| { diff --git a/lib/std/debug/SelfInfo/WindowsModule.zig b/lib/std/debug/SelfInfo/WindowsModule.zig index 8c88bd8b2f..75abc39ff5 100644 --- a/lib/std/debug/SelfInfo/WindowsModule.zig +++ b/lib/std/debug/SelfInfo/WindowsModule.zig @@ -332,6 +332,34 @@ pub const UnwindContext = struct { .Wcr = @splat(0), .Wvr = @splat(0), }, + .thumb => .{ + .ContextFlags = 0, + .R0 = ctx.r[0], + .R1 = ctx.r[1], + .R2 = ctx.r[2], + .R3 = ctx.r[3], + .R4 = ctx.r[4], + .R5 = ctx.r[5], + .R6 = ctx.r[6], + .R7 = ctx.r[7], + .R8 = ctx.r[8], + .R9 = ctx.r[9], + .R10 = ctx.r[10], + .R11 = ctx.r[11], + .R12 = ctx.r[12], + .Sp = ctx.r[13], + .Lr = ctx.r[14], + .Pc = ctx.r[15], + .Cpsr = 0, + .Fpcsr = 0, + .Padding = 0, + .DUMMYUNIONNAME = .{ .S = @splat(0) }, + .Bvr = @splat(0), + .Bcr = @splat(0), + .Wvr = @splat(0), + .Wcr = @splat(0), + .Padding2 = @splat(0), + }, else => comptime unreachable, }, .history_table = std.mem.zeroes(windows.UNWIND_HISTORY_TABLE), @@ -345,7 +373,7 @@ pub const UnwindContext = struct { return ctx.cur.getRegs().bp; } }; -pub fn unwindFrame(module: *const WindowsModule, gpa: Allocator, di: *DebugInfo, context: *UnwindContext) !usize { +pub fn unwindFrame(module: *const WindowsModule, gpa: Allocator, di: *DebugInfo, context: *UnwindContext) !void { _ = module; _ = gpa; _ = di; @@ -374,10 +402,10 @@ pub fn unwindFrame(module: *const WindowsModule, gpa: Allocator, di: *DebugInfo, const next_regs = context.cur.getRegs(); const tib = &windows.teb().NtTib; if (next_regs.sp < @intFromPtr(tib.StackLimit) or next_regs.sp > @intFromPtr(tib.StackBase)) { - return 0; + context.pc = 0; + } else { + context.pc = next_regs.ip -| 1; } - context.pc = next_regs.ip -| 1; - return next_regs.ip; } const WindowsModule = @This(); diff --git a/lib/std/debug/cpu_context.zig b/lib/std/debug/cpu_context.zig index 9859575fa3..b9dd49767f 100644 --- a/lib/std/debug/cpu_context.zig +++ b/lib/std/debug/cpu_context.zig @@ -214,6 +214,12 @@ pub fn fromWindowsContext(ctx: *const std.os.windows.CONTEXT) Native { .sp = ctx.Sp, .pc = ctx.Pc, }, + .thumb => .{ .r = .{ + ctx.R0, ctx.R1, ctx.R2, ctx.R3, + ctx.R4, ctx.R5, ctx.R6, ctx.R7, + ctx.R8, ctx.R9, ctx.R10, ctx.R11, + ctx.R12, ctx.Sp, ctx.Lr, ctx.Pc, + } }, else => comptime unreachable, }; }