From de47acd732dca8b4d2f2b3559307f488ccac940d Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Fri, 2 Aug 2024 17:45:31 -0700 Subject: [PATCH] code coverage dumping tool basic implementation * std.debug.Dwarf: add `sortCompileUnits` along with a field to track the state for the purpose of assertions and correct API usage. This makes batch lookups faster. - in the future, findCompileUnit should be enhanced to rely on sorted compile units as well. * implement `std.debug.Dwarf.resolveSourceLocations` as well as `std.debug.Info.resolveSourceLocations`. It's still pretty slow, since it calls getLineNumberInfo for each array element, repeating a lot of work unnecessarily. * integrate these APIs with `std.Progress` to understand what is taking so long. The output I'm seeing from this tool shows a lot of missing source locations. In particular, the main area of interest is missing for my tokenizer fuzzing example. --- lib/std/debug.zig | 6 +++ lib/std/debug/Dwarf.zig | 85 +++++++++++++++++++++++++++++++++++++---- lib/std/debug/Info.zig | 17 +++++---- tools/dump-cov.zig | 10 ++++- 4 files changed, 102 insertions(+), 16 deletions(-) diff --git a/lib/std/debug.zig b/lib/std/debug.zig index 907f7711a7..6d034146c3 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -27,6 +27,12 @@ pub const SourceLocation = struct { line: u64, column: u64, file_name: []const u8, + + pub const invalid: SourceLocation = .{ + .line = 0, + .column = 0, + .file_name = &.{}, + }; }; pub const Symbol = struct { diff --git a/lib/std/debug/Dwarf.zig b/lib/std/debug/Dwarf.zig index 3c150b3b18..170fa774c0 100644 --- a/lib/std/debug/Dwarf.zig +++ b/lib/std/debug/Dwarf.zig @@ -39,6 +39,7 @@ pub const call_frame = @import("Dwarf/call_frame.zig"); endian: std.builtin.Endian, sections: SectionArray = null_section_array, is_macho: bool, +compile_units_sorted: bool, // Filled later by the initializer abbrev_table_list: std.ArrayListUnmanaged(Abbrev.Table) = .{}, @@ -728,9 +729,9 @@ pub const OpenError = ScanError; /// Initialize DWARF info. The caller has the responsibility to initialize most /// the `Dwarf` fields before calling. `binary_mem` is the raw bytes of the /// main binary file (not the secondary debug info file). -pub fn open(di: *Dwarf, gpa: Allocator) OpenError!void { - try di.scanAllFunctions(gpa); - try di.scanAllCompileUnits(gpa); +pub fn open(d: *Dwarf, gpa: Allocator) OpenError!void { + try d.scanAllFunctions(gpa); + try d.scanAllCompileUnits(gpa); } const PcRange = struct { @@ -1061,6 +1062,39 @@ fn scanAllCompileUnits(di: *Dwarf, allocator: Allocator) ScanError!void { } } +/// Populate missing PC ranges in compilation units, and then sort them by start address. +/// Does not guarantee pc_range to be non-null because there could be missing debug info. +pub fn sortCompileUnits(d: *Dwarf) ScanError!void { + assert(!d.compile_units_sorted); + + for (d.compile_unit_list.items) |*cu| { + if (cu.pc_range != null) continue; + const ranges_value = cu.die.getAttr(AT.ranges) orelse continue; + var iter = DebugRangeIterator.init(ranges_value, d, cu) catch continue; + var start: u64 = maxInt(u64); + var end: u64 = 0; + while (try iter.next()) |range| { + start = @min(start, range.start_addr); + end = @max(end, range.end_addr); + } + if (end != 0) cu.pc_range = .{ + .start = start, + .end = end, + }; + } + + std.mem.sortUnstable(CompileUnit, d.compile_unit_list.items, {}, struct { + fn lessThan(ctx: void, a: CompileUnit, b: CompileUnit) bool { + _ = ctx; + const a_range = a.pc_range orelse return false; + const b_range = b.pc_range orelse return true; + return a_range.start < b_range.start; + } + }.lessThan); + + d.compile_units_sorted = true; +} + const DebugRangeIterator = struct { base_address: u64, section_type: Section.Id, @@ -1208,6 +1242,7 @@ const DebugRangeIterator = struct { } }; +/// TODO: change this to binary searching the sorted compile unit list pub fn findCompileUnit(di: *const Dwarf, target_address: u64) !*const CompileUnit { for (di.compile_unit_list.items) |*compile_unit| { if (compile_unit.pc_range) |range| { @@ -2275,6 +2310,7 @@ pub const ElfModule = struct { .endian = endian, .sections = sections, .is_macho = false, + .compile_units_sorted = false, }; try Dwarf.open(&di, gpa); @@ -2326,6 +2362,8 @@ pub const ElfModule = struct { } }; +pub const ResolveSourceLocationsError = Allocator.Error || DeprecatedFixedBufferReader.Error; + /// Given an array of virtual memory addresses, sorted ascending, outputs a /// corresponding array of source locations, by appending to the provided /// array list. @@ -2335,11 +2373,44 @@ pub fn resolveSourceLocations( sorted_pc_addrs: []const u64, /// Asserts its length equals length of `sorted_pc_addrs`. output: []std.debug.SourceLocation, -) error{ MissingDebugInfo, InvalidDebugInfo }!void { + parent_prog_node: std.Progress.Node, +) ResolveSourceLocationsError!void { assert(sorted_pc_addrs.len == output.len); - _ = d; - _ = gpa; - @panic("TODO"); + assert(d.compile_units_sorted); + + const prog_node = parent_prog_node.start("Resolve Source Locations", sorted_pc_addrs.len); + defer prog_node.end(); + + var cu_i: usize = 0; + var cu: *const CompileUnit = &d.compile_unit_list.items[0]; + var range = cu.pc_range.?; + next_pc: for (sorted_pc_addrs, output) |pc, *out| { + defer prog_node.completeOne(); + while (pc >= range.end) { + cu_i += 1; + if (cu_i >= d.compile_unit_list.items.len) { + out.* = std.debug.SourceLocation.invalid; + continue :next_pc; + } + cu = &d.compile_unit_list.items[cu_i]; + range = cu.pc_range orelse { + out.* = std.debug.SourceLocation.invalid; + continue :next_pc; + }; + } + if (pc < range.start) { + out.* = std.debug.SourceLocation.invalid; + continue :next_pc; + } + // TODO: instead of calling this function, break the function up into one that parses the + // information once and prepares a context that can be reused for the entire batch. + if (getLineNumberInfo(d, gpa, cu.*, pc)) |src_loc| { + out.* = src_loc; + } else |err| switch (err) { + error.MissingDebugInfo, error.InvalidDebugInfo => out.* = std.debug.SourceLocation.invalid, + else => |e| return e, + } + } } fn getSymbol(di: *Dwarf, allocator: Allocator, address: u64) !std.debug.Symbol { diff --git a/lib/std/debug/Info.zig b/lib/std/debug/Info.zig index 5276ba68ec..3c61c4072f 100644 --- a/lib/std/debug/Info.zig +++ b/lib/std/debug/Info.zig @@ -20,9 +20,14 @@ address_map: std.AutoArrayHashMapUnmanaged(u64, Dwarf.ElfModule), pub const LoadError = Dwarf.ElfModule.LoadError; -pub fn load(gpa: Allocator, path: Path) LoadError!Info { +pub fn load(gpa: Allocator, path: Path, parent_prog_node: std.Progress.Node) LoadError!Info { var sections: Dwarf.SectionArray = Dwarf.null_section_array; - const elf_module = try Dwarf.ElfModule.loadPath(gpa, path, null, null, §ions, null); + var prog_node = parent_prog_node.start("Loading Debug Info", 0); + defer prog_node.end(); + var elf_module = try Dwarf.ElfModule.loadPath(gpa, path, null, null, §ions, null); + prog_node.end(); + prog_node = parent_prog_node.start("Sort Compile Units", 0); + try elf_module.dwarf.sortCompileUnits(); var info: Info = .{ .address_map = .{}, }; @@ -38,10 +43,7 @@ pub fn deinit(info: *Info, gpa: Allocator) void { info.* = undefined; } -pub const ResolveSourceLocationsError = error{ - MissingDebugInfo, - InvalidDebugInfo, -} || Allocator.Error; +pub const ResolveSourceLocationsError = Dwarf.ResolveSourceLocationsError; pub fn resolveSourceLocations( info: *Info, @@ -49,9 +51,10 @@ pub fn resolveSourceLocations( sorted_pc_addrs: []const u64, /// Asserts its length equals length of `sorted_pc_addrs`. output: []std.debug.SourceLocation, + parent_prog_node: std.Progress.Node, ) ResolveSourceLocationsError!void { assert(sorted_pc_addrs.len == output.len); if (info.address_map.entries.len != 1) @panic("TODO"); const elf_module = &info.address_map.values()[0]; - return elf_module.dwarf.resolveSourceLocations(gpa, sorted_pc_addrs, output); + return elf_module.dwarf.resolveSourceLocations(gpa, sorted_pc_addrs, output, parent_prog_node); } diff --git a/tools/dump-cov.zig b/tools/dump-cov.zig index aba2911a91..8449dec33e 100644 --- a/tools/dump-cov.zig +++ b/tools/dump-cov.zig @@ -28,7 +28,10 @@ pub fn main() !void { .sub_path = cov_file_name, }; - var debug_info = std.debug.Info.load(gpa, exe_path) catch |err| { + const prog_node = std.Progress.start(.{}); + defer prog_node.end(); + + var debug_info = std.debug.Info.load(gpa, exe_path, prog_node) catch |err| { fatal("failed to load debug info for {}: {s}", .{ exe_path, @errorName(err) }); }; defer debug_info.deinit(gpa); @@ -51,7 +54,10 @@ pub fn main() !void { assert(std.sort.isSorted(usize, pcs, {}, std.sort.asc(usize))); const source_locations = try arena.alloc(std.debug.SourceLocation, pcs.len); - try debug_info.resolveSourceLocations(gpa, pcs, source_locations); + try debug_info.resolveSourceLocations(gpa, pcs, source_locations, prog_node); + defer for (source_locations) |sl| { + gpa.free(sl.file_name); + }; for (pcs, source_locations) |pc, sl| { try stdout.print("{x}: {s}:{d}:{d}\n", .{