mirror of
https://github.com/ziglang/zig.git
synced 2025-12-06 06:13:07 +00:00
Our usage of `ucontext_t` in the standard library was kind of problematic. We unnecessarily mimiced libc-specific structures, and our `getcontext` implementation was overkill for our use case of stack tracing. This commit introduces a new namespace, `std.debug.cpu_context`, which contains "context" types for various architectures (currently x86, x86_64, ARM, and AARCH64) containing the general-purpose CPU registers; the ones needed in practice for stack unwinding. Each implementation has a function `current` which populates the structure using inline assembly. The structure is user-overrideable, though that should only be necessary if the standard library does not have an implementation for the *architecture*: that is to say, none of this is OS-dependent. Of course, in POSIX signal handlers, we get a `ucontext_t` from the kernel. The function `std.debug.cpu_context.fromPosixSignalContext` converts this to a `std.debug.cpu_context.Native` with a big ol' target switch. This functionality is not exposed from `std.c` or `std.posix`, and neither are `ucontext_t`, `mcontext_t`, or `getcontext`. The rationale is that these types and functions do not conform to a specific ABI, and in fact tend to get updated over time based on CPU features and extensions; in addition, different libcs use different structures which are "partially compatible" with the kernel structure. Overall, it's a mess, but all we need is the kernel context, so we can just define a kernel-compatible structure as long as we don't claim C compatibility by putting it in `std.c` or `std.posix`. This change resulted in a few nice `std.debug` simplifications, but nothing too noteworthy. However, the main benefit of this change is that DWARF unwinding---sometimes necessary for collecting stack traces reliably---now requires far less target-specific integration. Also fix a bug I noticed in `PageAllocator` (I found this due to a bug in my distro's QEMU distribution; thanks, broken QEMU patch!) and I think a couple of minor bugs in `std.debug`. Resolves: #23801 Resolves: #23802
496 lines
20 KiB
Zig
496 lines
20 KiB
Zig
//! Cross-platform abstraction for this binary's own debug information, with a
|
|
//! goal of minimal code bloat and compilation speed penalty.
|
|
|
|
const builtin = @import("builtin");
|
|
const native_os = builtin.os.tag;
|
|
const native_endian = native_arch.endian();
|
|
const native_arch = builtin.cpu.arch;
|
|
|
|
const std = @import("../std.zig");
|
|
const mem = std.mem;
|
|
const Allocator = std.mem.Allocator;
|
|
const assert = std.debug.assert;
|
|
const Dwarf = std.debug.Dwarf;
|
|
const CpuContext = std.debug.cpu_context.Native;
|
|
|
|
const root = @import("root");
|
|
|
|
const SelfInfo = @This();
|
|
|
|
modules: if (target_supported) std.AutoArrayHashMapUnmanaged(usize, Module.DebugInfo) else void,
|
|
lookup_cache: if (target_supported) Module.LookupCache else void,
|
|
|
|
pub const Error = error{
|
|
/// The required debug info is invalid or corrupted.
|
|
InvalidDebugInfo,
|
|
/// The required debug info could not be found.
|
|
MissingDebugInfo,
|
|
/// The required debug info was found, and may be valid, but is not supported by this implementation.
|
|
UnsupportedDebugInfo,
|
|
/// The required debug info could not be read from disk due to some IO error.
|
|
ReadFailed,
|
|
OutOfMemory,
|
|
Unexpected,
|
|
};
|
|
|
|
/// Indicates whether the `SelfInfo` implementation has support for this target.
|
|
pub const target_supported: bool = Module != void;
|
|
|
|
/// Indicates whether the `SelfInfo` implementation has support for unwinding on this target.
|
|
pub const supports_unwinding: bool = target_supported and Module.supports_unwinding;
|
|
|
|
pub const UnwindContext = if (supports_unwinding) Module.UnwindContext;
|
|
|
|
pub const init: SelfInfo = .{
|
|
.modules = .empty,
|
|
.lookup_cache = if (Module.LookupCache != void) .init,
|
|
};
|
|
|
|
pub fn deinit(self: *SelfInfo, gpa: Allocator) void {
|
|
for (self.modules.values()) |*di| di.deinit(gpa);
|
|
self.modules.deinit(gpa);
|
|
if (Module.LookupCache != void) self.lookup_cache.deinit(gpa);
|
|
}
|
|
|
|
pub fn unwindFrame(self: *SelfInfo, gpa: Allocator, context: *UnwindContext) Error!usize {
|
|
comptime assert(supports_unwinding);
|
|
const module: Module = try .lookup(&self.lookup_cache, gpa, context.pc);
|
|
const gop = try self.modules.getOrPut(gpa, module.key());
|
|
self.modules.lockPointers();
|
|
defer self.modules.unlockPointers();
|
|
if (!gop.found_existing) gop.value_ptr.* = .init;
|
|
return module.unwindFrame(gpa, gop.value_ptr, context);
|
|
}
|
|
|
|
pub fn getSymbolAtAddress(self: *SelfInfo, gpa: Allocator, address: usize) Error!std.debug.Symbol {
|
|
comptime assert(target_supported);
|
|
const module: Module = try .lookup(&self.lookup_cache, gpa, address);
|
|
const gop = try self.modules.getOrPut(gpa, module.key());
|
|
self.modules.lockPointers();
|
|
defer self.modules.unlockPointers();
|
|
if (!gop.found_existing) gop.value_ptr.* = .init;
|
|
return module.getSymbolAtAddress(gpa, gop.value_ptr, address);
|
|
}
|
|
|
|
pub fn getModuleNameForAddress(self: *SelfInfo, gpa: Allocator, address: usize) Error![]const u8 {
|
|
comptime assert(target_supported);
|
|
const module: Module = try .lookup(&self.lookup_cache, gpa, address);
|
|
if (module.name.len == 0) return error.MissingDebugInfo;
|
|
return module.name;
|
|
}
|
|
|
|
/// `void` indicates that `SelfInfo` is not supported for this target.
|
|
///
|
|
/// This type contains the target-specific implementation. Logically, a `Module` represents a subset
|
|
/// of the executable with its own debug information. This typically corresponds to what ELF calls a
|
|
/// module, i.e. a shared library or executable image, but could be anything. For instance, it would
|
|
/// be valid to consider the entire application one module, or on the other hand to consider each
|
|
/// object file a module.
|
|
///
|
|
/// This type must must expose the following declarations:
|
|
///
|
|
/// ```
|
|
/// /// Holds state cached by the implementation between calls to `lookup`.
|
|
/// /// This may be `void`, in which case the inner declarations can be omitted.
|
|
/// pub const LookupCache = struct {
|
|
/// pub const init: LookupCache;
|
|
/// pub fn deinit(lc: *LookupCache, gpa: Allocator) void;
|
|
/// };
|
|
/// /// Holds debug information associated with a particular `Module`.
|
|
/// pub const DebugInfo = struct {
|
|
/// pub const init: DebugInfo;
|
|
/// };
|
|
/// /// Finds the `Module` corresponding to `address`.
|
|
/// pub fn lookup(lc: *LookupCache, gpa: Allocator, address: usize) SelfInfo.Error!Module;
|
|
/// /// Returns a unique identifier for this `Module`, such as a load address.
|
|
/// pub fn key(mod: *const Module) usize;
|
|
/// /// Locates and loads location information for the symbol corresponding to `address`.
|
|
/// pub fn getSymbolAtAddress(
|
|
/// mod: *const Module,
|
|
/// gpa: Allocator,
|
|
/// di: *DebugInfo,
|
|
/// address: usize,
|
|
/// ) SelfInfo.Error!std.debug.Symbol;
|
|
/// /// Whether a reliable stack unwinding strategy, such as DWARF unwinding, is available.
|
|
/// pub const supports_unwinding: bool;
|
|
/// /// Only required if `supports_unwinding == true`.
|
|
/// pub const UnwindContext = struct {
|
|
/// /// A PC value inside the function of the last unwound frame.
|
|
/// pc: usize,
|
|
/// pub fn init(ctx: *std.debug.cpu_context.Native, gpa: Allocator) Allocator.Error!UnwindContext;
|
|
/// pub fn deinit(uc: *UnwindContext, gpa: Allocator) void;
|
|
/// /// Returns the frame pointer associated with the last unwound stack frame. If the frame
|
|
/// /// pointer is unknown, 0 may be returned instead.
|
|
/// pub fn getFp(uc: *UnwindContext) usize;
|
|
/// };
|
|
/// /// Only required if `supports_unwinding == true`. Unwinds a single stack frame and returns
|
|
/// /// the next return address (which may be 0 indicating end of stack).
|
|
/// pub fn unwindFrame(
|
|
/// mod: *const Module,
|
|
/// gpa: Allocator,
|
|
/// di: *DebugInfo,
|
|
/// ctx: *UnwindContext,
|
|
/// ) SelfInfo.Error!usize;
|
|
/// ```
|
|
const Module: type = Module: {
|
|
// Allow overriding the target-specific `SelfInfo` implementation by exposing `root.debug.Module`.
|
|
if (@hasDecl(root, "debug") and @hasDecl(root.debug, "Module")) {
|
|
break :Module root.debug.Module;
|
|
}
|
|
break :Module switch (native_os) {
|
|
.linux,
|
|
.netbsd,
|
|
.freebsd,
|
|
.dragonfly,
|
|
.openbsd,
|
|
.solaris,
|
|
.illumos,
|
|
=> @import("SelfInfo/ElfModule.zig"),
|
|
|
|
.macos,
|
|
.ios,
|
|
.watchos,
|
|
.tvos,
|
|
.visionos,
|
|
=> @import("SelfInfo/DarwinModule.zig"),
|
|
|
|
.uefi,
|
|
.windows,
|
|
=> @import("SelfInfo/WindowsModule.zig"),
|
|
|
|
else => void,
|
|
};
|
|
};
|
|
|
|
/// An implementation of `UnwindContext` useful for DWARF-based unwinders. The `Module.unwindFrame`
|
|
/// implementation should wrap `DwarfUnwindContext.unwindFrame`.
|
|
pub const DwarfUnwindContext = struct {
|
|
cfa: ?usize,
|
|
pc: usize,
|
|
cpu_context: CpuContext,
|
|
vm: Dwarf.Unwind.VirtualMachine,
|
|
stack_machine: Dwarf.expression.StackMachine(.{ .call_frame_context = true }),
|
|
|
|
pub fn init(cpu_context: *const CpuContext) DwarfUnwindContext {
|
|
comptime assert(supports_unwinding);
|
|
|
|
// `@constCast` is safe because we aren't going to store to the resulting pointer.
|
|
const raw_pc_ptr = regNative(@constCast(cpu_context), ip_reg_num) catch |err| switch (err) {
|
|
error.InvalidRegister => unreachable, // `ip_reg_num` is definitely valid
|
|
error.UnsupportedRegister => unreachable, // the implementation needs to support ip
|
|
error.IncompatibleRegisterSize => unreachable, // ip is definitely `usize`-sized
|
|
};
|
|
const pc = stripInstructionPtrAuthCode(raw_pc_ptr.*);
|
|
|
|
return .{
|
|
.cfa = null,
|
|
.pc = pc,
|
|
.cpu_context = cpu_context.*,
|
|
.vm = .{},
|
|
.stack_machine = .{},
|
|
};
|
|
}
|
|
|
|
pub fn deinit(self: *DwarfUnwindContext, gpa: Allocator) void {
|
|
self.vm.deinit(gpa);
|
|
self.stack_machine.deinit(gpa);
|
|
self.* = undefined;
|
|
}
|
|
|
|
pub fn getFp(self: *const DwarfUnwindContext) usize {
|
|
// `@constCast` is safe because we aren't going to store to the resulting pointer.
|
|
const ptr = regNative(@constCast(&self.cpu_context), fp_reg_num) catch |err| switch (err) {
|
|
error.InvalidRegister => unreachable, // `fp_reg_num` is definitely valid
|
|
error.UnsupportedRegister => unreachable, // the implementation needs to support fp
|
|
error.IncompatibleRegisterSize => unreachable, // fp is a pointer so is `usize`-sized
|
|
};
|
|
return ptr.*;
|
|
}
|
|
|
|
/// Resolves the register rule and places the result into `out` (see regBytes). Returns `true`
|
|
/// iff the rule was undefined. This is *not* the same as `col.rule == .undefined`, because the
|
|
/// default rule may be undefined.
|
|
pub fn resolveRegisterRule(
|
|
context: *DwarfUnwindContext,
|
|
gpa: Allocator,
|
|
col: Dwarf.Unwind.VirtualMachine.Column,
|
|
expression_context: std.debug.Dwarf.expression.Context,
|
|
out: []u8,
|
|
) !bool {
|
|
switch (col.rule) {
|
|
.default => {
|
|
const register = col.register orelse return error.InvalidRegister;
|
|
// The default type is usually undefined, but can be overriden by ABI authors.
|
|
// See the doc comment on `Dwarf.Unwind.VirtualMachine.RegisterRule.default`.
|
|
if (builtin.cpu.arch.isAARCH64() and register >= 19 and register <= 18) {
|
|
// Callee-saved registers are initialized as if they had the .same_value rule
|
|
const src = try context.cpu_context.dwarfRegisterBytes(register);
|
|
if (src.len != out.len) return error.RegisterSizeMismatch;
|
|
@memcpy(out, src);
|
|
return false;
|
|
}
|
|
@memset(out, undefined);
|
|
return true;
|
|
},
|
|
.undefined => {
|
|
@memset(out, undefined);
|
|
return true;
|
|
},
|
|
.same_value => {
|
|
// TODO: This copy could be eliminated if callers always copy the state then call this function to update it
|
|
const register = col.register orelse return error.InvalidRegister;
|
|
const src = try context.cpu_context.dwarfRegisterBytes(register);
|
|
if (src.len != out.len) return error.RegisterSizeMismatch;
|
|
@memcpy(out, src);
|
|
return false;
|
|
},
|
|
.offset => |offset| {
|
|
const cfa = context.cfa orelse return error.InvalidCFA;
|
|
const addr = try applyOffset(cfa, offset);
|
|
const ptr: *const usize = @ptrFromInt(addr);
|
|
mem.writeInt(usize, out[0..@sizeOf(usize)], ptr.*, native_endian);
|
|
return false;
|
|
},
|
|
.val_offset => |offset| {
|
|
const cfa = context.cfa orelse return error.InvalidCFA;
|
|
mem.writeInt(usize, out[0..@sizeOf(usize)], try applyOffset(cfa, offset), native_endian);
|
|
return false;
|
|
},
|
|
.register => |register| {
|
|
const src = try context.cpu_context.dwarfRegisterBytes(register);
|
|
if (src.len != out.len) return error.RegisterSizeMismatch;
|
|
@memcpy(out, src);
|
|
return false;
|
|
},
|
|
.expression => |expression| {
|
|
context.stack_machine.reset();
|
|
const value = try context.stack_machine.run(
|
|
expression,
|
|
gpa,
|
|
expression_context,
|
|
context.cfa.?,
|
|
) orelse return error.NoExpressionValue;
|
|
const addr = switch (value) {
|
|
.generic => |addr| addr,
|
|
else => return error.InvalidExpressionValue,
|
|
};
|
|
const ptr: *usize = @ptrFromInt(addr);
|
|
mem.writeInt(usize, out[0..@sizeOf(usize)], ptr.*, native_endian);
|
|
return false;
|
|
},
|
|
.val_expression => |expression| {
|
|
context.stack_machine.reset();
|
|
const value = try context.stack_machine.run(
|
|
expression,
|
|
gpa,
|
|
expression_context,
|
|
context.cfa.?,
|
|
) orelse return error.NoExpressionValue;
|
|
const val_raw = switch (value) {
|
|
.generic => |raw| raw,
|
|
else => return error.InvalidExpressionValue,
|
|
};
|
|
mem.writeInt(usize, out[0..@sizeOf(usize)], val_raw, native_endian);
|
|
return false;
|
|
},
|
|
.architectural => return error.UnimplementedRegisterRule,
|
|
}
|
|
}
|
|
|
|
/// Unwind a stack frame using DWARF unwinding info, updating the register context.
|
|
///
|
|
/// If `.eh_frame_hdr` is available and complete, it will be used to binary search for the FDE.
|
|
/// Otherwise, a linear scan of `.eh_frame` and `.debug_frame` is done to find the FDE. The latter
|
|
/// may require lazily loading the data in those sections.
|
|
///
|
|
/// `explicit_fde_offset` is for cases where the FDE offset is known, such as when __unwind_info
|
|
pub fn unwindFrame(
|
|
context: *DwarfUnwindContext,
|
|
gpa: Allocator,
|
|
unwind: *const Dwarf.Unwind,
|
|
load_offset: usize,
|
|
explicit_fde_offset: ?usize,
|
|
) Error!usize {
|
|
return unwindFrameInner(context, gpa, unwind, load_offset, explicit_fde_offset) catch |err| switch (err) {
|
|
error.InvalidDebugInfo, error.MissingDebugInfo, error.OutOfMemory => |e| return e,
|
|
|
|
error.UnimplementedRegisterRule,
|
|
error.UnsupportedAddrSize,
|
|
error.UnsupportedDwarfVersion,
|
|
error.UnimplementedUserOpcode,
|
|
error.UnimplementedExpressionCall,
|
|
error.UnimplementedOpcode,
|
|
error.UnimplementedTypedComparison,
|
|
error.UnimplementedTypeConversion,
|
|
error.UnknownExpressionOpcode,
|
|
error.UnsupportedRegister,
|
|
=> return error.UnsupportedDebugInfo,
|
|
|
|
error.InvalidRegister,
|
|
error.ReadFailed,
|
|
error.EndOfStream,
|
|
error.IncompatibleRegisterSize,
|
|
error.Overflow,
|
|
error.StreamTooLong,
|
|
error.InvalidOperand,
|
|
error.InvalidOpcode,
|
|
error.InvalidOperation,
|
|
error.InvalidCFARule,
|
|
error.IncompleteExpressionContext,
|
|
error.InvalidCFAOpcode,
|
|
error.InvalidExpression,
|
|
error.InvalidFrameBase,
|
|
error.InvalidIntegralTypeSize,
|
|
error.InvalidSubExpression,
|
|
error.InvalidTypeLength,
|
|
error.TruncatedIntegralType,
|
|
error.DivisionByZero,
|
|
error.InvalidExpressionValue,
|
|
error.NoExpressionValue,
|
|
error.RegisterSizeMismatch,
|
|
error.InvalidCFA,
|
|
=> return error.InvalidDebugInfo,
|
|
};
|
|
}
|
|
fn unwindFrameInner(
|
|
context: *DwarfUnwindContext,
|
|
gpa: Allocator,
|
|
unwind: *const Dwarf.Unwind,
|
|
load_offset: usize,
|
|
explicit_fde_offset: ?usize,
|
|
) !usize {
|
|
if (!supports_unwinding) return error.UnsupportedCpuArchitecture;
|
|
if (context.pc == 0) return 0;
|
|
|
|
const pc_vaddr = context.pc - load_offset;
|
|
|
|
const fde_offset = explicit_fde_offset orelse try unwind.lookupPc(
|
|
pc_vaddr,
|
|
@sizeOf(usize),
|
|
native_endian,
|
|
) orelse return error.MissingDebugInfo;
|
|
const format, const cie, const fde = try unwind.getFde(fde_offset, @sizeOf(usize), native_endian);
|
|
|
|
// Check if the FDE *actually* includes the pc (`lookupPc` can return false positives).
|
|
if (pc_vaddr < fde.pc_begin or pc_vaddr >= fde.pc_begin + fde.pc_range) {
|
|
return error.MissingDebugInfo;
|
|
}
|
|
|
|
// Do not set `compile_unit` because the spec states that CFIs
|
|
// may not reference other debug sections anyway.
|
|
var expression_context: Dwarf.expression.Context = .{
|
|
.format = format,
|
|
.cpu_context = &context.cpu_context,
|
|
.cfa = context.cfa,
|
|
};
|
|
|
|
context.vm.reset();
|
|
|
|
const row = try context.vm.runTo(gpa, pc_vaddr, cie, fde, @sizeOf(usize), native_endian);
|
|
context.cfa = switch (row.cfa.rule) {
|
|
.val_offset => |offset| blk: {
|
|
const register = row.cfa.register orelse return error.InvalidCFARule;
|
|
const value = (try regNative(&context.cpu_context, register)).*;
|
|
break :blk try applyOffset(value, offset);
|
|
},
|
|
.expression => |expr| blk: {
|
|
context.stack_machine.reset();
|
|
const value = try context.stack_machine.run(
|
|
expr,
|
|
gpa,
|
|
expression_context,
|
|
context.cfa,
|
|
);
|
|
|
|
if (value) |v| {
|
|
if (v != .generic) return error.InvalidExpressionValue;
|
|
break :blk v.generic;
|
|
} else return error.NoExpressionValue;
|
|
},
|
|
else => return error.InvalidCFARule,
|
|
};
|
|
|
|
expression_context.cfa = context.cfa;
|
|
|
|
var has_return_address = true;
|
|
|
|
// Create a copy of the CPU context, to which we will apply the new rules.
|
|
var new_cpu_context = context.cpu_context;
|
|
|
|
// On all implemented architectures, the CFA is defined as being the previous frame's SP
|
|
(try regNative(&new_cpu_context, sp_reg_num)).* = context.cfa.?;
|
|
|
|
for (context.vm.rowColumns(row)) |column| {
|
|
if (column.register) |register| {
|
|
const dest = try new_cpu_context.dwarfRegisterBytes(register);
|
|
const rule_undef = try context.resolveRegisterRule(gpa, column, expression_context, dest);
|
|
if (register == cie.return_address_register) {
|
|
has_return_address = !rule_undef;
|
|
}
|
|
}
|
|
}
|
|
|
|
const return_address: u64 = if (has_return_address) pc: {
|
|
const raw_ptr = try regNative(&new_cpu_context, cie.return_address_register);
|
|
break :pc stripInstructionPtrAuthCode(raw_ptr.*);
|
|
} else 0;
|
|
|
|
(try regNative(new_cpu_context, ip_reg_num)).* = return_address;
|
|
|
|
// The new CPU context is complete; flush changes.
|
|
context.cpu_context = new_cpu_context;
|
|
|
|
// Also update the stored pc. However, because `return_address` points to the instruction
|
|
// *after* the call, it could (in the case of noreturn functions) actually point outside of
|
|
// the caller's address range, meaning an FDE lookup would fail. We can handle this by
|
|
// subtracting 1 from `return_address` so that the next lookup is guaranteed to land inside
|
|
// the `call` instruction`. The exception to this rule is signal frames, where the return
|
|
// address is the same instruction that triggered the handler.
|
|
context.pc = if (cie.is_signal_frame) return_address else return_address -| 1;
|
|
|
|
return return_address;
|
|
}
|
|
/// Since register rules are applied (usually) during a panic,
|
|
/// checked addition / subtraction is used so that we can return
|
|
/// an error and fall back to FP-based unwinding.
|
|
fn applyOffset(base: usize, offset: i64) !usize {
|
|
return if (offset >= 0)
|
|
try std.math.add(usize, base, @as(usize, @intCast(offset)))
|
|
else
|
|
try std.math.sub(usize, base, @as(usize, @intCast(-offset)));
|
|
}
|
|
/// Some platforms use pointer authentication - the upper bits of instruction pointers contain a signature.
|
|
/// This function clears these signature bits to make the pointer usable.
|
|
pub inline fn stripInstructionPtrAuthCode(ptr: usize) usize {
|
|
if (native_arch.isAARCH64()) {
|
|
// `hint 0x07` maps to `xpaclri` (or `nop` if the hardware doesn't support it)
|
|
// The save / restore is because `xpaclri` operates on x30 (LR)
|
|
return asm (
|
|
\\mov x16, x30
|
|
\\mov x30, x15
|
|
\\hint 0x07
|
|
\\mov x15, x30
|
|
\\mov x30, x16
|
|
: [ret] "={x15}" (-> usize),
|
|
: [ptr] "{x15}" (ptr),
|
|
: .{ .x16 = true });
|
|
}
|
|
|
|
return ptr;
|
|
}
|
|
|
|
pub fn regNative(ctx: *CpuContext, num: u16) error{
|
|
InvalidRegister,
|
|
UnsupportedRegister,
|
|
IncompatibleRegisterSize,
|
|
}!*align(1) usize {
|
|
const bytes = try ctx.dwarfRegisterBytes(num);
|
|
if (bytes.len != @sizeOf(usize)) return error.IncompatibleRegisterSize;
|
|
return @ptrCast(bytes);
|
|
}
|
|
|
|
const ip_reg_num = Dwarf.ipRegNum(native_arch).?;
|
|
const fp_reg_num = Dwarf.fpRegNum(native_arch);
|
|
const sp_reg_num = Dwarf.spRegNum(native_arch);
|
|
};
|