doc comments

This commit is contained in:
mlugg 2025-09-03 13:58:41 +01:00
parent 5e6a1919c7
commit dd9cb1beea
No known key found for this signature in database
GPG Key ID: 3F5B7DCCBF4AF02E
2 changed files with 97 additions and 47 deletions

View File

@ -1,4 +1,24 @@
//! MLUGG TODO DOCUMENT THIS
//! Contains state relevant to stack unwinding through the DWARF `.debug_frame` section, or the
//! `.eh_frame` section which is an extension of the former specified by Linux Standard Base Core.
//! Like `Dwarf`, no assumptions are made about the host's relationship to the target of the unwind
//! information -- unwind data for any target can be read by any host.
//!
//! `Unwind` specifically deals with loading the data from CIEs and FDEs in the section, and with
//! performing fast lookups of a program counter's corresponding FDE. The CFI instructions in the
//! CIEs and FDEs can be interpreted by `VirtualMachine`.
//!
//! The typical usage of `Unwind` is as follows:
//!
//! * Initialize with `initEhFrameHdr` or `initSection`, depending on the available data
//! * Call `prepareLookup` to construct a search table if necessary
//! * Call `lookupPc` to find the section offset of the FDE corresponding to a PC
//! * Call `getFde` to load the corresponding FDE and CIE
//! * Check that the PC does indeed fall in that range (`lookupPc` may return a false positive)
//! * Interpret the embedded CFI instructions using `VirtualMachine`
//!
//! In some cases, such as when using the "compact unwind" data in Mach-O binaries, the FDE offsets
//! may already be known. In that case, no call to `lookupPc` is necessary, which means the call to
//! `prepareLookup` can also be omitted.
pub const VirtualMachine = @import("Unwind/VirtualMachine.zig");
@ -8,7 +28,8 @@ frame_section: struct {
/// the binary (e.g. `sh_addr` in an ELF file); the equivalent runtime address may be relocated
/// in position-independent binaries.
vaddr: u64,
/// The full contents of the section. May have imprecise bounds depending on `section`.
/// The full contents of the section. May have imprecise bounds depending on `section`. This
/// memory is externally managed.
///
/// For `.debug_frame`, the slice length is exactly equal to the section length. This is needed
/// to know the number of CIEs and FDEs.
@ -22,13 +43,18 @@ frame_section: struct {
bytes: []const u8,
},
/// A structure allowing fast lookups of the FDE corresponding to a particular PC. We use a binary
/// search table for the lookup; essentially, a list of all FDEs ordered by PC range. `null` means
/// the lookup data is not yet populated, so `prepareLookup` must be called before `lookupPc`.
lookup: ?union(enum) {
/// The `.eh_frame_hdr` section contains a pre-computed search table which we can use.
eh_frame_hdr: struct {
/// Virtual address of the `.eh_frame_hdr` section.
vaddr: u64,
table: EhFrameHeader.SearchTable,
},
/// Offsets into `frame_section` of FDEs, sorted by ascending `pc_begin`.
/// There is no pre-computed search table, so we have built one ourselves.
/// Allocated into `gpa` and freed by `deinit`.
sorted_fdes: []SortedFdeEntry,
},
@ -39,29 +65,13 @@ const SortedFdeEntry = struct {
fde_offset: u64,
};
const Section = enum { debug_frame, eh_frame };
/// Initialize with unwind information from the contents of a `.debug_frame` or `.eh_frame` section.
///
/// If the `.eh_frame_hdr` section is available, consider instead using `initEhFrameHdr`. This
/// allows the implementation to use a search table embedded in that section if it is available.
pub fn initSection(section: Section, section_vaddr: u64, section_bytes: []const u8) Unwind {
return .{
.frame_section = .{
.id = section,
.bytes = section_bytes,
.vaddr = section_vaddr,
},
.lookup = null,
};
}
pub const Section = enum { debug_frame, eh_frame };
/// Initialize with unwind information from a header loaded from an `.eh_frame_hdr` section, and a
/// pointer to the contents of the `.eh_frame` section.
///
/// This differs from `loadFromSection` because `.eh_frame_hdr` may embed a binary search table, and
/// if it does, this function will use that for address lookups instead of constructing our own
/// search table.
/// `.eh_frame_hdr` may embed a binary search table of FDEs. If it does, we will use that table for
/// PC lookups rather than spending time constructing our own search table.
pub fn initEhFrameHdr(header: EhFrameHeader, section_vaddr: u64, section_bytes_ptr: [*]const u8) Unwind {
return .{
.frame_section = .{
@ -76,6 +86,23 @@ pub fn initEhFrameHdr(header: EhFrameHeader, section_vaddr: u64, section_bytes_p
};
}
/// Initialize with unwind information from the contents of a `.debug_frame` or `.eh_frame` section.
///
/// If the `.eh_frame_hdr` section is available, consider instead using `initEhFrameHdr`, which
/// allows the implementation to use a search table embedded in that section if it is available.
pub fn initSection(section: Section, section_vaddr: u64, section_bytes: []const u8) Unwind {
return .{
.frame_section = .{
.id = section,
.bytes = section_bytes,
.vaddr = section_vaddr,
},
.lookup = null,
};
}
/// Technically, it is only necessary to call this if `prepareLookup` has previously been called,
/// since no other function here allocates resources.
pub fn deinit(unwind: *Unwind, gpa: Allocator) void {
if (unwind.lookup) |lookup| switch (lookup) {
.eh_frame_hdr => {},
@ -83,8 +110,12 @@ pub fn deinit(unwind: *Unwind, gpa: Allocator) void {
};
}
/// This represents the decoded .eh_frame_hdr header
/// Decoded version of the `.eh_frame_hdr` section.
pub const EhFrameHeader = struct {
/// The virtual address (i.e. as given in the binary, before relocations) of the `.eh_frame`
/// section. This value is important when using `.eh_frame_hdr` to find debug information for
/// the current binary, because it allows locating where the `.eh_frame` section is loaded in
/// memory (by adding it to the ELF module's base address).
eh_frame_vaddr: u64,
search_table: ?SearchTable,
@ -93,6 +124,8 @@ pub const EhFrameHeader = struct {
offset: u8,
encoding: EH.PE,
fde_count: usize,
/// The actual table entries are viewed as a plain byte slice because `encoding` causes the
/// size of entries in the table to vary.
entries: []const u8,
/// Returns the vaddr of the FDE for `pc`, or `null` if no matching FDE was found.
@ -104,7 +137,7 @@ pub const EhFrameHeader = struct {
endian: Endian,
) !?u64 {
const table_vaddr = eh_frame_hdr_vaddr + table.offset;
const entry_size = try EhFrameHeader.entrySize(table.encoding, addr_size_bytes);
const entry_size = try entrySize(table.encoding, addr_size_bytes);
var left: usize = 0;
var len: usize = table.fde_count;
while (len > 1) {
@ -131,18 +164,18 @@ pub const EhFrameHeader = struct {
}, endian);
return fde_ptr;
}
};
pub fn entrySize(table_enc: EH.PE, addr_size_bytes: u8) !u8 {
return switch (table_enc.type) {
.absptr => 2 * addr_size_bytes,
.udata2, .sdata2 => 4,
.udata4, .sdata4 => 8,
.udata8, .sdata8 => 16,
.uleb128, .sleb128 => return bad(), // this is a binary search table; all entries must be the same size
_ => return bad(),
};
}
fn entrySize(table_enc: EH.PE, addr_size_bytes: u8) !u8 {
return switch (table_enc.type) {
.absptr => 2 * addr_size_bytes,
.udata2, .sdata2 => 4,
.udata4, .sdata4 => 8,
.udata8, .sdata8 => 16,
.uleb128, .sleb128 => return bad(), // this is a binary search table; all entries must be the same size
_ => return bad(),
};
}
};
pub fn parse(
eh_frame_hdr_vaddr: u64,
@ -169,7 +202,7 @@ pub const EhFrameHeader = struct {
const fde_count = try readEhPointer(&r, fde_count_enc, addr_size_bytes, .{
.pc_rel_base = eh_frame_hdr_vaddr + r.seek,
}, endian);
const entry_size = try entrySize(table_enc, addr_size_bytes);
const entry_size = try SearchTable.entrySize(table_enc, addr_size_bytes);
const bytes_offset = r.seek;
const bytes_len = cast(usize, fde_count * entry_size) orelse return error.EndOfStream;
const bytes = try r.take(bytes_len);
@ -188,7 +221,15 @@ pub const EhFrameHeader = struct {
}
};
pub const EntryHeader = union(enum) {
/// The shared header of an FDE/CIE, containing a length in bytes (DWARF's "initial length field")
/// and a value which differentiates CIEs from FDEs and maps FDEs to their corresponding CIEs. The
/// `.eh_frame` format also includes a third variation, here called `.terminator`, which acts as a
/// sentinel for the whole section.
///
/// `CommonInformationEntry.parse` and `FrameDescriptionEntry.parse` expect the `EntryHeader` to
/// have been parsed first: they accept data stored in the `EntryHeader`, and only read the bytes
/// following this header.
const EntryHeader = union(enum) {
cie: struct {
format: Format,
/// Remaining bytes in the CIE. These are parseable by `CommonInformationEntry.parse`.
@ -206,7 +247,7 @@ pub const EntryHeader = union(enum) {
/// keep track of how many section bytes remain when parsing all entries in `.debug_frame`.
terminator,
pub fn read(r: *Reader, header_section_offset: u64, section: Section, endian: Endian) !EntryHeader {
fn read(r: *Reader, header_section_offset: u64, section: Section, endian: Endian) !EntryHeader {
const unit_header = try Dwarf.readUnitHeader(r, endian);
if (unit_header.unit_length == 0) return .terminator;
@ -284,7 +325,7 @@ pub const CommonInformationEntry = struct {
///
/// `length_offset` specifies the offset of this CIE's length field in the
/// .eh_frame / .debug_frame section.
pub fn parse(
fn parse(
cie_bytes: []const u8,
section: Section,
default_addr_size_bytes: u8,
@ -364,7 +405,7 @@ pub const FrameDescriptionEntry = struct {
/// This function expects to read the FDE starting at the PC Begin field.
/// The returned struct references memory backed by `fde_bytes`.
pub fn parse(
fn parse(
/// The virtual address of the FDE we're parsing, *excluding* its entry header (i.e. the
/// address is after the header). If `fde_bytes` is backed by the memory of a loaded
/// module's `.eh_frame` section, this will equal `fde_bytes.ptr`.
@ -405,6 +446,9 @@ pub const FrameDescriptionEntry = struct {
}
};
/// Builds the PC FDE lookup table if it is not already built. It is required to call this function
/// at least once before calling `lookupPc`. Once this function is called, memory has been allocated
/// and so `deinit` (matching this `gpa`) is required to free it.
pub fn prepareLookup(unwind: *Unwind, gpa: Allocator, addr_size_bytes: u8, endian: Endian) !void {
if (unwind.lookup != null) return;
@ -443,22 +487,24 @@ pub fn prepareLookup(unwind: *Unwind, gpa: Allocator, addr_size_bytes: u8, endia
.debug_frame => if (saw_terminator) return bad(), // `.debug_frame` uses the section bounds and does not specify a sentinel entry
}
const fde_slice = try fde_list.toOwnedSlice(gpa);
errdefer comptime unreachable;
std.mem.sortUnstable(SortedFdeEntry, fde_slice, {}, struct {
std.mem.sortUnstable(SortedFdeEntry, fde_list.items, {}, struct {
fn lessThan(ctx: void, a: SortedFdeEntry, b: SortedFdeEntry) bool {
ctx;
return a.pc_begin < b.pc_begin;
}
}.lessThan);
unwind.lookup = .{ .sorted_fdes = fde_slice };
// This temporary is necessary to avoid an RLS footgun where `lookup` ends up non-null `undefined` on OOM.
const final_fdes = try fde_list.toOwnedSlice(gpa);
unwind.lookup = .{ .sorted_fdes = final_fdes };
}
/// Given a program counter value, returns the offset of the corresponding FDE, or `null` if no
/// matching FDE was found. The returned offset can be passed to `getFde` to load the data
/// associated with the FDE.
///
/// Before calling this function, `prepareLookup` must return successfully.
/// Before calling this function, `prepareLookup` must return successfully at least once, to ensure
/// that `unwind.lookup` is populated.
///
/// The return value may be a false positive. After loading the FDE with `loadFde`, the caller must
/// validate that `pc` is indeed in its range -- if it is not, then no FDE matches `pc`.
@ -486,6 +532,8 @@ pub fn lookupPc(unwind: *const Unwind, pc: u64, addr_size_bytes: u8, endian: End
return sorted_fdes[first_bad_idx - 1].fde_offset;
}
/// Get the FDE at a given offset, as well as its associated CIE. This offset typically comes from
/// `lookupPc`. The CFI instructions within can be evaluated with `VirtualMachine`.
pub fn getFde(unwind: *const Unwind, fde_offset: u64, addr_size_bytes: u8, endian: Endian) !struct { Format, CommonInformationEntry, FrameDescriptionEntry } {
const section = unwind.frame_section;

View File

@ -290,8 +290,10 @@ pub const UnwindContext = struct {
) orelse return error.MissingDebugInfo;
const format, const cie, const fde = try unwind.getFde(fde_offset, @sizeOf(usize), native_endian);
// Check if this FDE *actually* includes the address.
if (pc_vaddr < fde.pc_begin or pc_vaddr >= fde.pc_begin + fde.pc_range) return error.MissingDebugInfo;
// Check if the FDE *actually* includes the pc (`lookupPc` can return false positives).
if (pc_vaddr < fde.pc_begin or pc_vaddr >= fde.pc_begin + fde.pc_range) {
return error.MissingDebugInfo;
}
// Do not set `compile_unit` because the spec states that CFIs
// may not reference other debug sections anyway.