diff --git a/lib/std/debug.zig b/lib/std/debug.zig index 6dac92188e..2753a0f52f 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -6,11 +6,6 @@ const io = std.io; const posix = std.posix; const fs = std.fs; const testing = std.testing; -const elf = std.elf; -const DW = std.dwarf; -const macho = std.macho; -const coff = std.coff; -const pdb = std.pdb; const root = @import("root"); const File = std.fs.File; const windows = std.os.windows; @@ -19,8 +14,22 @@ const native_os = builtin.os.tag; const native_endian = native_arch.endian(); pub const Dwarf = @import("debug/Dwarf.zig"); -pub const Info = @import("debug/Info.zig"); +pub const Pdb = @import("debug/Pdb.zig"); +pub const SelfInfo = @import("debug/SelfInfo.zig"); +/// Unresolved source locations can be represented with a single `usize` that +/// corresponds to a virtual memory address of the program counter. Combined +/// with debug information, those values can be converted into a resolved +/// source location, including file, line, and column. +pub const SourceLocation = struct { + line: u64, + column: u64, + file_name: []const u8, +}; + +/// Deprecated because it returns the optimization mode of the standard +/// library, when the caller probably wants to use the optimization mode of +/// their own module. pub const runtime_safety = switch (builtin.mode) { .Debug, .ReleaseSafe => true, .ReleaseFast, .ReleaseSmall => false, @@ -72,13 +81,13 @@ pub fn getStderrMutex() *std.Thread.Mutex { } /// TODO multithreaded awareness -var self_debug_info: ?Info = null; +var self_debug_info: ?SelfInfo = null; -pub fn getSelfDebugInfo() !*Info { +pub fn getSelfDebugInfo() !*SelfInfo { if (self_debug_info) |*info| { return info; } else { - self_debug_info = try Info.openSelf(getDebugInfoAllocator()); + self_debug_info = try SelfInfo.openSelf(getDebugInfoAllocator()); return &self_debug_info.?; } } @@ -316,7 +325,7 @@ pub fn captureStackTrace(first_address: ?usize, stack_trace: *std.builtin.StackT stack_trace.index = slice.len; } else { // TODO: This should use the DWARF unwinder if .eh_frame_hdr is available (so that full debug info parsing isn't required). - // A new path for loading Info needs to be created which will only attempt to parse in-memory sections, because + // A new path for loading SelfInfo needs to be created which will only attempt to parse in-memory sections, because // stopping to load other debug info (ie. source line info) from disk here is not required for unwinding. var it = StackIterator.init(first_address, null); defer it.deinit(); @@ -494,7 +503,7 @@ pub fn writeStackTrace( stack_trace: std.builtin.StackTrace, out_stream: anytype, allocator: mem.Allocator, - debug_info: *Info, + debug_info: *SelfInfo, tty_config: io.tty.Config, ) !void { _ = allocator; @@ -531,11 +540,11 @@ pub const StackIterator = struct { fp: usize, ma: MemoryAccessor = MemoryAccessor.init, - // When Info and a register context is available, this iterator can unwind + // When SelfInfo and a register context is available, this iterator can unwind // stacks with frames that don't use a frame pointer (ie. -fomit-frame-pointer), // using DWARF and MachO unwind info. unwind_state: if (have_ucontext) ?struct { - debug_info: *Info, + debug_info: *SelfInfo, dwarf_context: Dwarf.UnwindContext, last_error: ?UnwindError = null, failed: bool = false, @@ -560,7 +569,7 @@ pub const StackIterator = struct { }; } - pub fn initWithContext(first_address: ?usize, debug_info: *Info, context: *const posix.ucontext_t) !StackIterator { + pub fn initWithContext(first_address: ?usize, debug_info: *SelfInfo, context: *const posix.ucontext_t) !StackIterator { // The implementation of DWARF unwinding on aarch64-macos is not complete. However, Apple mandates that // the frame pointer register is always used, so on this platform we can safely use the FP-based unwinder. if (comptime builtin.target.isDarwin() and native_arch == .aarch64) { @@ -820,7 +829,7 @@ const have_msync = switch (native_os) { pub fn writeCurrentStackTrace( out_stream: anytype, - debug_info: *Info, + debug_info: *SelfInfo, tty_config: io.tty.Config, start_addr: ?usize, ) !void { @@ -906,7 +915,7 @@ pub noinline fn walkStackWindows(addresses: []usize, existing_context: ?*const w pub fn writeStackTraceWindows( out_stream: anytype, - debug_info: *Info, + debug_info: *SelfInfo, tty_config: io.tty.Config, context: *const windows.CONTEXT, start_addr: ?usize, @@ -925,7 +934,7 @@ pub fn writeStackTraceWindows( } } -fn printUnknownSource(debug_info: *Info, out_stream: anytype, address: usize, tty_config: io.tty.Config) !void { +fn printUnknownSource(debug_info: *SelfInfo, out_stream: anytype, address: usize, tty_config: io.tty.Config) !void { const module_name = debug_info.getModuleNameForAddress(address); return printLineInfo( out_stream, @@ -938,14 +947,14 @@ fn printUnknownSource(debug_info: *Info, out_stream: anytype, address: usize, tt ); } -fn printLastUnwindError(it: *StackIterator, debug_info: *Info, out_stream: anytype, tty_config: io.tty.Config) void { +fn printLastUnwindError(it: *StackIterator, debug_info: *SelfInfo, out_stream: anytype, tty_config: io.tty.Config) void { if (!have_ucontext) return; if (it.getLastError()) |unwind_error| { printUnwindError(debug_info, out_stream, unwind_error.address, unwind_error.err, tty_config) catch {}; } } -fn printUnwindError(debug_info: *Info, out_stream: anytype, address: usize, err: UnwindError, tty_config: io.tty.Config) !void { +fn printUnwindError(debug_info: *SelfInfo, out_stream: anytype, address: usize, err: UnwindError, tty_config: io.tty.Config) !void { const module_name = debug_info.getModuleNameForAddress(address) orelse "???"; try tty_config.setColor(out_stream, .dim); if (err == error.MissingDebugInfo) { @@ -956,7 +965,7 @@ fn printUnwindError(debug_info: *Info, out_stream: anytype, address: usize, err: try tty_config.setColor(out_stream, .reset); } -pub fn printSourceAtAddress(debug_info: *Info, out_stream: anytype, address: usize, tty_config: io.tty.Config) !void { +pub fn printSourceAtAddress(debug_info: *SelfInfo, out_stream: anytype, address: usize, tty_config: io.tty.Config) !void { const module = debug_info.getModuleForAddress(address) catch |err| switch (err) { error.MissingDebugInfo, error.InvalidDebugInfo => return printUnknownSource(debug_info, out_stream, address, tty_config), else => return err, @@ -981,7 +990,7 @@ pub fn printSourceAtAddress(debug_info: *Info, out_stream: anytype, address: usi fn printLineInfo( out_stream: anytype, - line_info: ?Info.SourceLocation, + line_info: ?SourceLocation, address: usize, symbol_name: []const u8, compile_unit_name: []const u8, @@ -1027,7 +1036,7 @@ fn printLineInfo( } } -fn printLineFromFileAnyOs(out_stream: anytype, line_info: Info.SourceLocation) !void { +fn printLineFromFileAnyOs(out_stream: anytype, line_info: SourceLocation) !void { // Need this to always block even in async I/O mode, because this could potentially // be called from e.g. the event loop code crashing. var f = try fs.cwd().openFile(line_info.file_name, .{}); @@ -1093,7 +1102,7 @@ test printLineFromFileAnyOs { var test_dir = std.testing.tmpDir(.{}); defer test_dir.cleanup(); - // Relies on testing.tmpDir internals which is not ideal, but Info.SourceLocation requires paths. + // Relies on testing.tmpDir internals which is not ideal, but SourceLocation requires paths. const test_dir_path = try join(allocator, &.{ ".zig-cache", "tmp", test_dir.sub_path[0..] }); defer allocator.free(test_dir_path); @@ -1439,7 +1448,7 @@ test "manage resources correctly" { } const writer = std.io.null_writer; - var di = try Info.openSelf(testing.allocator); + var di = try SelfInfo.openSelf(testing.allocator); defer di.deinit(); try printSourceAtAddress(&di, writer, showMyTrace(), io.tty.detectConfig(std.io.getStdErr())); } diff --git a/lib/std/debug/Dwarf.zig b/lib/std/debug/Dwarf.zig index 353c097471..4fff2562b2 100644 --- a/lib/std/debug/Dwarf.zig +++ b/lib/std/debug/Dwarf.zig @@ -1353,7 +1353,7 @@ pub fn getLineNumberInfo( allocator: Allocator, compile_unit: CompileUnit, target_address: u64, -) !std.debug.Info.SourceLocation { +) !std.debug.SourceLocation { const compile_unit_cwd = try compile_unit.die.getAttrString(di, AT.comp_dir, di.section(.debug_line_str), compile_unit); const line_info_offset = try compile_unit.die.getAttrSecOffset(AT.stmt_list); @@ -2084,7 +2084,7 @@ const LineNumberProgram = struct { self: *LineNumberProgram, allocator: Allocator, file_entries: []const FileEntry, - ) !?std.debug.Info.SourceLocation { + ) !?std.debug.SourceLocation { if (self.prev_valid and self.target_address >= self.prev_address and self.target_address < self.address) @@ -2104,7 +2104,7 @@ const LineNumberProgram = struct { dir_name, file_entry.path, }); - return std.debug.Info.SourceLocation{ + return std.debug.SourceLocation{ .line = if (self.prev_line >= 0) @as(u64, @intCast(self.prev_line)) else 0, .column = self.prev_column, .file_name = file_name, diff --git a/lib/std/debug/Pdb.zig b/lib/std/debug/Pdb.zig new file mode 100644 index 0000000000..bdcc108c1d --- /dev/null +++ b/lib/std/debug/Pdb.zig @@ -0,0 +1,591 @@ +const std = @import("../std.zig"); +const File = std.fs.File; +const Allocator = std.mem.Allocator; +const pdb = std.pdb; + +const Pdb = @This(); + +in_file: File, +msf: Msf, +allocator: Allocator, +string_table: ?*MsfStream, +dbi: ?*MsfStream, +modules: []Module, +sect_contribs: []pdb.SectionContribEntry, +guid: [16]u8, +age: u32, + +pub const Module = struct { + mod_info: pdb.ModInfo, + module_name: []u8, + obj_file_name: []u8, + // The fields below are filled on demand. + populated: bool, + symbols: []u8, + subsect_info: []u8, + checksum_offset: ?usize, + + pub fn deinit(self: *Module, allocator: Allocator) void { + allocator.free(self.module_name); + allocator.free(self.obj_file_name); + if (self.populated) { + allocator.free(self.symbols); + allocator.free(self.subsect_info); + } + } +}; + +pub fn init(allocator: Allocator, path: []const u8) !Pdb { + const file = try std.fs.cwd().openFile(path, .{}); + errdefer file.close(); + + return .{ + .in_file = file, + .allocator = allocator, + .string_table = null, + .dbi = null, + .msf = try Msf.init(allocator, file), + .modules = &[_]Module{}, + .sect_contribs = &[_]pdb.SectionContribEntry{}, + .guid = undefined, + .age = undefined, + }; +} + +pub fn deinit(self: *Pdb) void { + self.in_file.close(); + self.msf.deinit(self.allocator); + for (self.modules) |*module| { + module.deinit(self.allocator); + } + self.allocator.free(self.modules); + self.allocator.free(self.sect_contribs); +} + +pub fn parseDbiStream(self: *Pdb) !void { + var stream = self.getStream(pdb.StreamType.Dbi) orelse + return error.InvalidDebugInfo; + const reader = stream.reader(); + + const header = try reader.readStruct(std.pdb.DbiStreamHeader); + if (header.VersionHeader != 19990903) // V70, only value observed by LLVM team + return error.UnknownPDBVersion; + // if (header.Age != age) + // return error.UnmatchingPDB; + + const mod_info_size = header.ModInfoSize; + const section_contrib_size = header.SectionContributionSize; + + var modules = std.ArrayList(Module).init(self.allocator); + errdefer modules.deinit(); + + // Module Info Substream + var mod_info_offset: usize = 0; + while (mod_info_offset != mod_info_size) { + const mod_info = try reader.readStruct(pdb.ModInfo); + var this_record_len: usize = @sizeOf(pdb.ModInfo); + + const module_name = try reader.readUntilDelimiterAlloc(self.allocator, 0, 1024); + errdefer self.allocator.free(module_name); + this_record_len += module_name.len + 1; + + const obj_file_name = try reader.readUntilDelimiterAlloc(self.allocator, 0, 1024); + errdefer self.allocator.free(obj_file_name); + this_record_len += obj_file_name.len + 1; + + if (this_record_len % 4 != 0) { + const round_to_next_4 = (this_record_len | 0x3) + 1; + const march_forward_bytes = round_to_next_4 - this_record_len; + try stream.seekBy(@as(isize, @intCast(march_forward_bytes))); + this_record_len += march_forward_bytes; + } + + try modules.append(Module{ + .mod_info = mod_info, + .module_name = module_name, + .obj_file_name = obj_file_name, + + .populated = false, + .symbols = undefined, + .subsect_info = undefined, + .checksum_offset = null, + }); + + mod_info_offset += this_record_len; + if (mod_info_offset > mod_info_size) + return error.InvalidDebugInfo; + } + + // Section Contribution Substream + var sect_contribs = std.ArrayList(pdb.SectionContribEntry).init(self.allocator); + errdefer sect_contribs.deinit(); + + var sect_cont_offset: usize = 0; + if (section_contrib_size != 0) { + const version = reader.readEnum(std.pdb.SectionContrSubstreamVersion, .little) catch |err| switch (err) { + error.InvalidValue => return error.InvalidDebugInfo, + else => |e| return e, + }; + _ = version; + sect_cont_offset += @sizeOf(u32); + } + while (sect_cont_offset != section_contrib_size) { + const entry = try sect_contribs.addOne(); + entry.* = try reader.readStruct(pdb.SectionContribEntry); + sect_cont_offset += @sizeOf(pdb.SectionContribEntry); + + if (sect_cont_offset > section_contrib_size) + return error.InvalidDebugInfo; + } + + self.modules = try modules.toOwnedSlice(); + self.sect_contribs = try sect_contribs.toOwnedSlice(); +} + +pub fn parseInfoStream(self: *Pdb) !void { + var stream = self.getStream(pdb.StreamType.Pdb) orelse + return error.InvalidDebugInfo; + const reader = stream.reader(); + + // Parse the InfoStreamHeader. + const version = try reader.readInt(u32, .little); + const signature = try reader.readInt(u32, .little); + _ = signature; + const age = try reader.readInt(u32, .little); + const guid = try reader.readBytesNoEof(16); + + if (version != 20000404) // VC70, only value observed by LLVM team + return error.UnknownPDBVersion; + + self.guid = guid; + self.age = age; + + // Find the string table. + const string_table_index = str_tab_index: { + const name_bytes_len = try reader.readInt(u32, .little); + const name_bytes = try self.allocator.alloc(u8, name_bytes_len); + defer self.allocator.free(name_bytes); + try reader.readNoEof(name_bytes); + + const HashTableHeader = extern struct { + Size: u32, + Capacity: u32, + + fn maxLoad(cap: u32) u32 { + return cap * 2 / 3 + 1; + } + }; + const hash_tbl_hdr = try reader.readStruct(HashTableHeader); + if (hash_tbl_hdr.Capacity == 0) + return error.InvalidDebugInfo; + + if (hash_tbl_hdr.Size > HashTableHeader.maxLoad(hash_tbl_hdr.Capacity)) + return error.InvalidDebugInfo; + + const present = try readSparseBitVector(&reader, self.allocator); + defer self.allocator.free(present); + if (present.len != hash_tbl_hdr.Size) + return error.InvalidDebugInfo; + const deleted = try readSparseBitVector(&reader, self.allocator); + defer self.allocator.free(deleted); + + for (present) |_| { + const name_offset = try reader.readInt(u32, .little); + const name_index = try reader.readInt(u32, .little); + if (name_offset > name_bytes.len) + return error.InvalidDebugInfo; + const name = std.mem.sliceTo(name_bytes[name_offset..], 0); + if (std.mem.eql(u8, name, "/names")) { + break :str_tab_index name_index; + } + } + return error.MissingDebugInfo; + }; + + self.string_table = self.getStreamById(string_table_index) orelse + return error.MissingDebugInfo; +} + +pub fn getSymbolName(self: *Pdb, module: *Module, address: u64) ?[]const u8 { + _ = self; + std.debug.assert(module.populated); + + var symbol_i: usize = 0; + while (symbol_i != module.symbols.len) { + const prefix = @as(*align(1) pdb.RecordPrefix, @ptrCast(&module.symbols[symbol_i])); + if (prefix.RecordLen < 2) + return null; + switch (prefix.RecordKind) { + .S_LPROC32, .S_GPROC32 => { + const proc_sym = @as(*align(1) pdb.ProcSym, @ptrCast(&module.symbols[symbol_i + @sizeOf(pdb.RecordPrefix)])); + if (address >= proc_sym.CodeOffset and address < proc_sym.CodeOffset + proc_sym.CodeSize) { + return std.mem.sliceTo(@as([*:0]u8, @ptrCast(&proc_sym.Name[0])), 0); + } + }, + else => {}, + } + symbol_i += prefix.RecordLen + @sizeOf(u16); + } + + return null; +} + +pub fn getLineNumberInfo(self: *Pdb, module: *Module, address: u64) !std.debug.SourceLocation { + std.debug.assert(module.populated); + const subsect_info = module.subsect_info; + + var sect_offset: usize = 0; + var skip_len: usize = undefined; + const checksum_offset = module.checksum_offset orelse return error.MissingDebugInfo; + while (sect_offset != subsect_info.len) : (sect_offset += skip_len) { + const subsect_hdr = @as(*align(1) pdb.DebugSubsectionHeader, @ptrCast(&subsect_info[sect_offset])); + skip_len = subsect_hdr.Length; + sect_offset += @sizeOf(pdb.DebugSubsectionHeader); + + switch (subsect_hdr.Kind) { + .Lines => { + var line_index = sect_offset; + + const line_hdr = @as(*align(1) pdb.LineFragmentHeader, @ptrCast(&subsect_info[line_index])); + if (line_hdr.RelocSegment == 0) + return error.MissingDebugInfo; + line_index += @sizeOf(pdb.LineFragmentHeader); + const frag_vaddr_start = line_hdr.RelocOffset; + const frag_vaddr_end = frag_vaddr_start + line_hdr.CodeSize; + + if (address >= frag_vaddr_start and address < frag_vaddr_end) { + // There is an unknown number of LineBlockFragmentHeaders (and their accompanying line and column records) + // from now on. We will iterate through them, and eventually find a SourceLocation that we're interested in, + // breaking out to :subsections. If not, we will make sure to not read anything outside of this subsection. + const subsection_end_index = sect_offset + subsect_hdr.Length; + + while (line_index < subsection_end_index) { + const block_hdr = @as(*align(1) pdb.LineBlockFragmentHeader, @ptrCast(&subsect_info[line_index])); + line_index += @sizeOf(pdb.LineBlockFragmentHeader); + const start_line_index = line_index; + + const has_column = line_hdr.Flags.LF_HaveColumns; + + // All line entries are stored inside their line block by ascending start address. + // Heuristic: we want to find the last line entry + // that has a vaddr_start <= address. + // This is done with a simple linear search. + var line_i: u32 = 0; + while (line_i < block_hdr.NumLines) : (line_i += 1) { + const line_num_entry = @as(*align(1) pdb.LineNumberEntry, @ptrCast(&subsect_info[line_index])); + line_index += @sizeOf(pdb.LineNumberEntry); + + const vaddr_start = frag_vaddr_start + line_num_entry.Offset; + if (address < vaddr_start) { + break; + } + } + + // line_i == 0 would mean that no matching pdb.LineNumberEntry was found. + if (line_i > 0) { + const subsect_index = checksum_offset + block_hdr.NameIndex; + const chksum_hdr = @as(*align(1) pdb.FileChecksumEntryHeader, @ptrCast(&module.subsect_info[subsect_index])); + const strtab_offset = @sizeOf(pdb.StringTableHeader) + chksum_hdr.FileNameOffset; + try self.string_table.?.seekTo(strtab_offset); + const source_file_name = try self.string_table.?.reader().readUntilDelimiterAlloc(self.allocator, 0, 1024); + + const line_entry_idx = line_i - 1; + + const column = if (has_column) blk: { + const start_col_index = start_line_index + @sizeOf(pdb.LineNumberEntry) * block_hdr.NumLines; + const col_index = start_col_index + @sizeOf(pdb.ColumnNumberEntry) * line_entry_idx; + const col_num_entry = @as(*align(1) pdb.ColumnNumberEntry, @ptrCast(&subsect_info[col_index])); + break :blk col_num_entry.StartColumn; + } else 0; + + const found_line_index = start_line_index + line_entry_idx * @sizeOf(pdb.LineNumberEntry); + const line_num_entry: *align(1) pdb.LineNumberEntry = @ptrCast(&subsect_info[found_line_index]); + const flags: *align(1) pdb.LineNumberEntry.Flags = @ptrCast(&line_num_entry.Flags); + + return .{ + .file_name = source_file_name, + .line = flags.Start, + .column = column, + }; + } + } + + // Checking that we are not reading garbage after the (possibly) multiple block fragments. + if (line_index != subsection_end_index) { + return error.InvalidDebugInfo; + } + } + }, + else => {}, + } + + if (sect_offset > subsect_info.len) + return error.InvalidDebugInfo; + } + + return error.MissingDebugInfo; +} + +pub fn getModule(self: *Pdb, index: usize) !?*Module { + if (index >= self.modules.len) + return null; + + const mod = &self.modules[index]; + if (mod.populated) + return mod; + + // At most one can be non-zero. + if (mod.mod_info.C11ByteSize != 0 and mod.mod_info.C13ByteSize != 0) + return error.InvalidDebugInfo; + if (mod.mod_info.C13ByteSize == 0) + return error.InvalidDebugInfo; + + const stream = self.getStreamById(mod.mod_info.ModuleSymStream) orelse + return error.MissingDebugInfo; + const reader = stream.reader(); + + const signature = try reader.readInt(u32, .little); + if (signature != 4) + return error.InvalidDebugInfo; + + mod.symbols = try self.allocator.alloc(u8, mod.mod_info.SymByteSize - 4); + errdefer self.allocator.free(mod.symbols); + try reader.readNoEof(mod.symbols); + + mod.subsect_info = try self.allocator.alloc(u8, mod.mod_info.C13ByteSize); + errdefer self.allocator.free(mod.subsect_info); + try reader.readNoEof(mod.subsect_info); + + var sect_offset: usize = 0; + var skip_len: usize = undefined; + while (sect_offset != mod.subsect_info.len) : (sect_offset += skip_len) { + const subsect_hdr = @as(*align(1) pdb.DebugSubsectionHeader, @ptrCast(&mod.subsect_info[sect_offset])); + skip_len = subsect_hdr.Length; + sect_offset += @sizeOf(pdb.DebugSubsectionHeader); + + switch (subsect_hdr.Kind) { + .FileChecksums => { + mod.checksum_offset = sect_offset; + break; + }, + else => {}, + } + + if (sect_offset > mod.subsect_info.len) + return error.InvalidDebugInfo; + } + + mod.populated = true; + return mod; +} + +pub fn getStreamById(self: *Pdb, id: u32) ?*MsfStream { + if (id >= self.msf.streams.len) + return null; + return &self.msf.streams[id]; +} + +pub fn getStream(self: *Pdb, stream: pdb.StreamType) ?*MsfStream { + const id = @intFromEnum(stream); + return self.getStreamById(id); +} + +/// https://llvm.org/docs/PDB/MsfFile.html +const Msf = struct { + directory: MsfStream, + streams: []MsfStream, + + fn init(allocator: Allocator, file: File) !Msf { + const in = file.reader(); + + const superblock = try in.readStruct(pdb.SuperBlock); + + // Sanity checks + if (!std.mem.eql(u8, &superblock.FileMagic, pdb.SuperBlock.file_magic)) + return error.InvalidDebugInfo; + if (superblock.FreeBlockMapBlock != 1 and superblock.FreeBlockMapBlock != 2) + return error.InvalidDebugInfo; + const file_len = try file.getEndPos(); + if (superblock.NumBlocks * superblock.BlockSize != file_len) + return error.InvalidDebugInfo; + switch (superblock.BlockSize) { + // llvm only supports 4096 but we can handle any of these values + 512, 1024, 2048, 4096 => {}, + else => return error.InvalidDebugInfo, + } + + const dir_block_count = blockCountFromSize(superblock.NumDirectoryBytes, superblock.BlockSize); + if (dir_block_count > superblock.BlockSize / @sizeOf(u32)) + return error.UnhandledBigDirectoryStream; // cf. BlockMapAddr comment. + + try file.seekTo(superblock.BlockSize * superblock.BlockMapAddr); + const dir_blocks = try allocator.alloc(u32, dir_block_count); + for (dir_blocks) |*b| { + b.* = try in.readInt(u32, .little); + } + var directory = MsfStream.init( + superblock.BlockSize, + file, + dir_blocks, + ); + + const begin = directory.pos; + const stream_count = try directory.reader().readInt(u32, .little); + const stream_sizes = try allocator.alloc(u32, stream_count); + defer allocator.free(stream_sizes); + + // Microsoft's implementation uses @as(u32, -1) for inexistent streams. + // These streams are not used, but still participate in the file + // and must be taken into account when resolving stream indices. + const Nil = 0xFFFFFFFF; + for (stream_sizes) |*s| { + const size = try directory.reader().readInt(u32, .little); + s.* = if (size == Nil) 0 else blockCountFromSize(size, superblock.BlockSize); + } + + const streams = try allocator.alloc(MsfStream, stream_count); + for (streams, 0..) |*stream, i| { + const size = stream_sizes[i]; + if (size == 0) { + stream.* = MsfStream{ + .blocks = &[_]u32{}, + }; + } else { + var blocks = try allocator.alloc(u32, size); + var j: u32 = 0; + while (j < size) : (j += 1) { + const block_id = try directory.reader().readInt(u32, .little); + const n = (block_id % superblock.BlockSize); + // 0 is for pdb.SuperBlock, 1 and 2 for FPMs. + if (block_id == 0 or n == 1 or n == 2 or block_id * superblock.BlockSize > file_len) + return error.InvalidBlockIndex; + blocks[j] = block_id; + } + + stream.* = MsfStream.init( + superblock.BlockSize, + file, + blocks, + ); + } + } + + const end = directory.pos; + if (end - begin != superblock.NumDirectoryBytes) + return error.InvalidStreamDirectory; + + return Msf{ + .directory = directory, + .streams = streams, + }; + } + + fn deinit(self: *Msf, allocator: Allocator) void { + allocator.free(self.directory.blocks); + for (self.streams) |*stream| { + allocator.free(stream.blocks); + } + allocator.free(self.streams); + } +}; + +const MsfStream = struct { + in_file: File = undefined, + pos: u64 = undefined, + blocks: []u32 = undefined, + block_size: u32 = undefined, + + pub const Error = @typeInfo(@typeInfo(@TypeOf(read)).Fn.return_type.?).ErrorUnion.error_set; + + fn init(block_size: u32, file: File, blocks: []u32) MsfStream { + const stream = MsfStream{ + .in_file = file, + .pos = 0, + .blocks = blocks, + .block_size = block_size, + }; + + return stream; + } + + fn read(self: *MsfStream, buffer: []u8) !usize { + var block_id = @as(usize, @intCast(self.pos / self.block_size)); + if (block_id >= self.blocks.len) return 0; // End of Stream + var block = self.blocks[block_id]; + var offset = self.pos % self.block_size; + + try self.in_file.seekTo(block * self.block_size + offset); + const in = self.in_file.reader(); + + var size: usize = 0; + var rem_buffer = buffer; + while (size < buffer.len) { + const size_to_read = @min(self.block_size - offset, rem_buffer.len); + size += try in.read(rem_buffer[0..size_to_read]); + rem_buffer = buffer[size..]; + offset += size_to_read; + + // If we're at the end of a block, go to the next one. + if (offset == self.block_size) { + offset = 0; + block_id += 1; + if (block_id >= self.blocks.len) break; // End of Stream + block = self.blocks[block_id]; + try self.in_file.seekTo(block * self.block_size); + } + } + + self.pos += buffer.len; + return buffer.len; + } + + pub fn seekBy(self: *MsfStream, len: i64) !void { + self.pos = @as(u64, @intCast(@as(i64, @intCast(self.pos)) + len)); + if (self.pos >= self.blocks.len * self.block_size) + return error.EOF; + } + + pub fn seekTo(self: *MsfStream, len: u64) !void { + self.pos = len; + if (self.pos >= self.blocks.len * self.block_size) + return error.EOF; + } + + fn getSize(self: *const MsfStream) u64 { + return self.blocks.len * self.block_size; + } + + fn getFilePos(self: MsfStream) u64 { + const block_id = self.pos / self.block_size; + const block = self.blocks[block_id]; + const offset = self.pos % self.block_size; + + return block * self.block_size + offset; + } + + pub fn reader(self: *MsfStream) std.io.Reader(*MsfStream, Error, read) { + return .{ .context = self }; + } +}; + +fn readSparseBitVector(stream: anytype, allocator: Allocator) ![]u32 { + const num_words = try stream.readInt(u32, .little); + var list = std.ArrayList(u32).init(allocator); + errdefer list.deinit(); + var word_i: u32 = 0; + while (word_i != num_words) : (word_i += 1) { + const word = try stream.readInt(u32, .little); + var bit_i: u5 = 0; + while (true) : (bit_i += 1) { + if (word & (@as(u32, 1) << bit_i) != 0) { + try list.append(word_i * 32 + bit_i); + } + if (bit_i == std.math.maxInt(u5)) break; + } + } + return try list.toOwnedSlice(); +} + +fn blockCountFromSize(size: u32, block_size: u32) u32 { + return (size + block_size - 1) / block_size; +} diff --git a/lib/std/debug/Info.zig b/lib/std/debug/SelfInfo.zig similarity index 96% rename from lib/std/debug/Info.zig rename to lib/std/debug/SelfInfo.zig index 9d3074834b..58fe4b23b2 100644 --- a/lib/std/debug/Info.zig +++ b/lib/std/debug/SelfInfo.zig @@ -1,4 +1,5 @@ -//! Cross-platform abstraction for debug information. +//! Cross-platform abstraction for this binary's own debug information, with a +//! goal of minimal code bloat and compilation speed penalty. const builtin = @import("builtin"); const native_os = builtin.os.tag; @@ -17,24 +18,25 @@ const assert = std.debug.assert; const posix = std.posix; const elf = std.elf; const Dwarf = std.debug.Dwarf; +const Pdb = std.debug.Pdb; const File = std.fs.File; const math = std.math; const testing = std.testing; -const Info = @This(); +const SelfInfo = @This(); const root = @import("root"); allocator: Allocator, address_map: std.AutoHashMap(usize, *Module), -modules: if (native_os == .windows) std.ArrayListUnmanaged(WindowsModuleInfo) else void, +modules: if (native_os == .windows) std.ArrayListUnmanaged(WindowsModule) else void, pub const OpenSelfError = error{ MissingDebugInfo, UnsupportedOperatingSystem, -} || @typeInfo(@typeInfo(@TypeOf(Info.init)).Fn.return_type.?).ErrorUnion.error_set; +} || @typeInfo(@typeInfo(@TypeOf(SelfInfo.init)).Fn.return_type.?).ErrorUnion.error_set; -pub fn openSelf(allocator: Allocator) OpenSelfError!Info { +pub fn openSelf(allocator: Allocator) OpenSelfError!SelfInfo { nosuspend { if (builtin.strip_debug_info) return error.MissingDebugInfo; @@ -51,14 +53,14 @@ pub fn openSelf(allocator: Allocator) OpenSelfError!Info { .solaris, .illumos, .windows, - => return try Info.init(allocator), + => return try SelfInfo.init(allocator), else => return error.UnsupportedOperatingSystem, } } } -pub fn init(allocator: Allocator) !Info { - var debug_info: Info = .{ +pub fn init(allocator: Allocator) !SelfInfo { + var debug_info: SelfInfo = .{ .allocator = allocator, .address_map = std.AutoHashMap(usize, *Module).init(allocator), .modules = if (native_os == .windows) .{} else {}, @@ -101,7 +103,7 @@ pub fn init(allocator: Allocator) !Info { return debug_info; } -pub fn deinit(self: *Info) void { +pub fn deinit(self: *SelfInfo) void { var it = self.address_map.iterator(); while (it.next()) |entry| { const mdi = entry.value_ptr.*; @@ -118,7 +120,7 @@ pub fn deinit(self: *Info) void { } } -pub fn getModuleForAddress(self: *Info, address: usize) !*Module { +pub fn getModuleForAddress(self: *SelfInfo, address: usize) !*Module { if (comptime builtin.target.isDarwin()) { return self.lookupModuleDyld(address); } else if (native_os == .windows) { @@ -135,7 +137,7 @@ pub fn getModuleForAddress(self: *Info, address: usize) !*Module { // Returns the module name for a given address. // This can be called when getModuleForAddress fails, so implementations should provide // a path that doesn't rely on any side-effects of a prior successful module lookup. -pub fn getModuleNameForAddress(self: *Info, address: usize) ?[]const u8 { +pub fn getModuleNameForAddress(self: *SelfInfo, address: usize) ?[]const u8 { if (comptime builtin.target.isDarwin()) { return self.lookupModuleNameDyld(address); } else if (native_os == .windows) { @@ -149,7 +151,7 @@ pub fn getModuleNameForAddress(self: *Info, address: usize) ?[]const u8 { } } -fn lookupModuleDyld(self: *Info, address: usize) !*Module { +fn lookupModuleDyld(self: *SelfInfo, address: usize) !*Module { const image_count = std.c._dyld_image_count(); var i: u32 = 0; @@ -215,7 +217,7 @@ fn lookupModuleDyld(self: *Info, address: usize) !*Module { return error.MissingDebugInfo; } -fn lookupModuleNameDyld(self: *Info, address: usize) ?[]const u8 { +fn lookupModuleNameDyld(self: *SelfInfo, address: usize) ?[]const u8 { _ = self; const image_count = std.c._dyld_image_count(); @@ -253,7 +255,7 @@ fn lookupModuleNameDyld(self: *Info, address: usize) ?[]const u8 { return null; } -fn lookupModuleWin32(self: *Info, address: usize) !*Module { +fn lookupModuleWin32(self: *SelfInfo, address: usize) !*Module { for (self.modules.items) |*module| { if (address >= module.base_address and address < module.base_address + module.size) { if (self.address_map.get(module.base_address)) |obj_di| { @@ -343,7 +345,7 @@ fn lookupModuleWin32(self: *Info, address: usize) !*Module { return error.MissingDebugInfo; } -fn lookupModuleNameWin32(self: *Info, address: usize) ?[]const u8 { +fn lookupModuleNameWin32(self: *SelfInfo, address: usize) ?[]const u8 { for (self.modules.items) |module| { if (address >= module.base_address and address < module.base_address + module.size) { return module.name; @@ -352,7 +354,7 @@ fn lookupModuleNameWin32(self: *Info, address: usize) ?[]const u8 { return null; } -fn lookupModuleNameDl(self: *Info, address: usize) ?[]const u8 { +fn lookupModuleNameDl(self: *SelfInfo, address: usize) ?[]const u8 { _ = self; var ctx: struct { @@ -390,7 +392,7 @@ fn lookupModuleNameDl(self: *Info, address: usize) ?[]const u8 { return null; } -fn lookupModuleDl(self: *Info, address: usize) !*Module { +fn lookupModuleDl(self: *SelfInfo, address: usize) !*Module { var ctx: struct { // Input address: usize, @@ -484,13 +486,13 @@ fn lookupModuleDl(self: *Info, address: usize) !*Module { return obj_di; } -fn lookupModuleHaiku(self: *Info, address: usize) !*Module { +fn lookupModuleHaiku(self: *SelfInfo, address: usize) !*Module { _ = self; _ = address; @panic("TODO implement lookup module for Haiku"); } -fn lookupModuleWasm(self: *Info, address: usize) !*Module { +fn lookupModuleWasm(self: *SelfInfo, address: usize) !*Module { _ = self; _ = address; @panic("TODO implement lookup module for Wasm"); @@ -709,7 +711,7 @@ pub const Module = switch (native_os) { }, .uefi, .windows => struct { base_address: usize, - pdb: ?pdb.Pdb = null, + pdb: ?Pdb = null, dwarf: ?Dwarf = null, coff_image_base: u64, @@ -837,7 +839,11 @@ pub const Module = switch (native_os) { else => Dwarf, }; -pub const WindowsModuleInfo = struct { +/// How is this different than `Module` when the host is Windows? +/// Why are both stored in the `SelfInfo` struct? +/// Boy, it sure would be nice if someone added documentation comments for this +/// struct explaining it. +pub const WindowsModule = struct { base_address: usize, size: u32, name: []const u8, @@ -1030,7 +1036,7 @@ fn readCoffDebugInfo(allocator: Allocator, coff_obj: *coff.Coff) !Module { }; defer if (path.ptr != raw_path.ptr) allocator.free(path); - di.pdb = pdb.Pdb.init(allocator, path) catch |err| switch (err) { + di.pdb = Pdb.init(allocator, path) catch |err| switch (err) { error.FileNotFound, error.IsDir => { if (di.dwarf == null) return error.MissingDebugInfo; return di; @@ -1292,22 +1298,10 @@ fn chopSlice(ptr: []const u8, offset: u64, size: u64) error{Overflow}![]const u8 pub const SymbolInfo = struct { symbol_name: []const u8 = "???", compile_unit_name: []const u8 = "???", - line_info: ?SourceLocation = null, + line_info: ?std.debug.SourceLocation = null, pub fn deinit(self: SymbolInfo, allocator: Allocator) void { - if (self.line_info) |li| { - li.deinit(allocator); - } - } -}; - -pub const SourceLocation = struct { - line: u64, - column: u64, - file_name: []const u8, - - pub fn deinit(self: SourceLocation, allocator: Allocator) void { - allocator.free(self.file_name); + if (self.line_info) |li| allocator.free(li.file_name); } }; diff --git a/lib/std/pdb.zig b/lib/std/pdb.zig index c96eb81fa9..31ad02e945 100644 --- a/lib/std/pdb.zig +++ b/lib/std/pdb.zig @@ -1,3 +1,12 @@ +//! Program Data Base debugging information format. +//! +//! This namespace contains unopinionated types and data definitions only. For +//! an implementation of parsing and caching PDB information, see +//! `std.debug.Pdb`. +//! +//! Most of this is based on information gathered from LLVM source code, +//! documentation and/or contributors. + const std = @import("std.zig"); const io = std.io; const math = std.math; @@ -9,10 +18,7 @@ const debug = std.debug; const ArrayList = std.ArrayList; -// Note: most of this is based on information gathered from LLVM source code, -// documentation and/or contributors. - -// https://llvm.org/docs/PDB/DbiStream.html#stream-header +/// https://llvm.org/docs/PDB/DbiStream.html#stream-header pub const DbiStreamHeader = extern struct { VersionSignature: i32, VersionHeader: u32, @@ -415,10 +421,8 @@ pub const ColumnNumberEntry = extern struct { pub const FileChecksumEntryHeader = extern struct { /// Byte offset of filename in global string table. FileNameOffset: u32, - /// Number of bytes of checksum. ChecksumSize: u8, - /// FileChecksumKind ChecksumKind: u8, }; @@ -451,525 +455,15 @@ pub const DebugSubsectionHeader = extern struct { Length: u32, }; -pub const PDBStringTableHeader = extern struct { +pub const StringTableHeader = extern struct { /// PDBStringTableSignature Signature: u32, - /// 1 or 2 HashVersion: u32, - /// Number of bytes of names buffer. ByteSize: u32, }; -fn readSparseBitVector(stream: anytype, allocator: mem.Allocator) ![]u32 { - const num_words = try stream.readInt(u32, .little); - var list = ArrayList(u32).init(allocator); - errdefer list.deinit(); - var word_i: u32 = 0; - while (word_i != num_words) : (word_i += 1) { - const word = try stream.readInt(u32, .little); - var bit_i: u5 = 0; - while (true) : (bit_i += 1) { - if (word & (@as(u32, 1) << bit_i) != 0) { - try list.append(word_i * 32 + bit_i); - } - if (bit_i == std.math.maxInt(u5)) break; - } - } - return try list.toOwnedSlice(); -} - -pub const Pdb = struct { - in_file: File, - msf: Msf, - allocator: mem.Allocator, - string_table: ?*MsfStream, - dbi: ?*MsfStream, - modules: []Module, - sect_contribs: []SectionContribEntry, - guid: [16]u8, - age: u32, - - pub const Module = struct { - mod_info: ModInfo, - module_name: []u8, - obj_file_name: []u8, - // The fields below are filled on demand. - populated: bool, - symbols: []u8, - subsect_info: []u8, - checksum_offset: ?usize, - - pub fn deinit(self: *Module, allocator: mem.Allocator) void { - allocator.free(self.module_name); - allocator.free(self.obj_file_name); - if (self.populated) { - allocator.free(self.symbols); - allocator.free(self.subsect_info); - } - } - }; - - pub fn init(allocator: mem.Allocator, path: []const u8) !Pdb { - const file = try fs.cwd().openFile(path, .{}); - errdefer file.close(); - - return Pdb{ - .in_file = file, - .allocator = allocator, - .string_table = null, - .dbi = null, - .msf = try Msf.init(allocator, file), - .modules = &[_]Module{}, - .sect_contribs = &[_]SectionContribEntry{}, - .guid = undefined, - .age = undefined, - }; - } - - pub fn deinit(self: *Pdb) void { - self.in_file.close(); - self.msf.deinit(self.allocator); - for (self.modules) |*module| { - module.deinit(self.allocator); - } - self.allocator.free(self.modules); - self.allocator.free(self.sect_contribs); - } - - pub fn parseDbiStream(self: *Pdb) !void { - var stream = self.getStream(StreamType.Dbi) orelse - return error.InvalidDebugInfo; - const reader = stream.reader(); - - const header = try reader.readStruct(DbiStreamHeader); - if (header.VersionHeader != 19990903) // V70, only value observed by LLVM team - return error.UnknownPDBVersion; - // if (header.Age != age) - // return error.UnmatchingPDB; - - const mod_info_size = header.ModInfoSize; - const section_contrib_size = header.SectionContributionSize; - - var modules = ArrayList(Module).init(self.allocator); - errdefer modules.deinit(); - - // Module Info Substream - var mod_info_offset: usize = 0; - while (mod_info_offset != mod_info_size) { - const mod_info = try reader.readStruct(ModInfo); - var this_record_len: usize = @sizeOf(ModInfo); - - const module_name = try reader.readUntilDelimiterAlloc(self.allocator, 0, 1024); - errdefer self.allocator.free(module_name); - this_record_len += module_name.len + 1; - - const obj_file_name = try reader.readUntilDelimiterAlloc(self.allocator, 0, 1024); - errdefer self.allocator.free(obj_file_name); - this_record_len += obj_file_name.len + 1; - - if (this_record_len % 4 != 0) { - const round_to_next_4 = (this_record_len | 0x3) + 1; - const march_forward_bytes = round_to_next_4 - this_record_len; - try stream.seekBy(@as(isize, @intCast(march_forward_bytes))); - this_record_len += march_forward_bytes; - } - - try modules.append(Module{ - .mod_info = mod_info, - .module_name = module_name, - .obj_file_name = obj_file_name, - - .populated = false, - .symbols = undefined, - .subsect_info = undefined, - .checksum_offset = null, - }); - - mod_info_offset += this_record_len; - if (mod_info_offset > mod_info_size) - return error.InvalidDebugInfo; - } - - // Section Contribution Substream - var sect_contribs = ArrayList(SectionContribEntry).init(self.allocator); - errdefer sect_contribs.deinit(); - - var sect_cont_offset: usize = 0; - if (section_contrib_size != 0) { - const version = reader.readEnum(SectionContrSubstreamVersion, .little) catch |err| switch (err) { - error.InvalidValue => return error.InvalidDebugInfo, - else => |e| return e, - }; - _ = version; - sect_cont_offset += @sizeOf(u32); - } - while (sect_cont_offset != section_contrib_size) { - const entry = try sect_contribs.addOne(); - entry.* = try reader.readStruct(SectionContribEntry); - sect_cont_offset += @sizeOf(SectionContribEntry); - - if (sect_cont_offset > section_contrib_size) - return error.InvalidDebugInfo; - } - - self.modules = try modules.toOwnedSlice(); - self.sect_contribs = try sect_contribs.toOwnedSlice(); - } - - pub fn parseInfoStream(self: *Pdb) !void { - var stream = self.getStream(StreamType.Pdb) orelse - return error.InvalidDebugInfo; - const reader = stream.reader(); - - // Parse the InfoStreamHeader. - const version = try reader.readInt(u32, .little); - const signature = try reader.readInt(u32, .little); - _ = signature; - const age = try reader.readInt(u32, .little); - const guid = try reader.readBytesNoEof(16); - - if (version != 20000404) // VC70, only value observed by LLVM team - return error.UnknownPDBVersion; - - self.guid = guid; - self.age = age; - - // Find the string table. - const string_table_index = str_tab_index: { - const name_bytes_len = try reader.readInt(u32, .little); - const name_bytes = try self.allocator.alloc(u8, name_bytes_len); - defer self.allocator.free(name_bytes); - try reader.readNoEof(name_bytes); - - const HashTableHeader = extern struct { - Size: u32, - Capacity: u32, - - fn maxLoad(cap: u32) u32 { - return cap * 2 / 3 + 1; - } - }; - const hash_tbl_hdr = try reader.readStruct(HashTableHeader); - if (hash_tbl_hdr.Capacity == 0) - return error.InvalidDebugInfo; - - if (hash_tbl_hdr.Size > HashTableHeader.maxLoad(hash_tbl_hdr.Capacity)) - return error.InvalidDebugInfo; - - const present = try readSparseBitVector(&reader, self.allocator); - defer self.allocator.free(present); - if (present.len != hash_tbl_hdr.Size) - return error.InvalidDebugInfo; - const deleted = try readSparseBitVector(&reader, self.allocator); - defer self.allocator.free(deleted); - - for (present) |_| { - const name_offset = try reader.readInt(u32, .little); - const name_index = try reader.readInt(u32, .little); - if (name_offset > name_bytes.len) - return error.InvalidDebugInfo; - const name = mem.sliceTo(name_bytes[name_offset..], 0); - if (mem.eql(u8, name, "/names")) { - break :str_tab_index name_index; - } - } - return error.MissingDebugInfo; - }; - - self.string_table = self.getStreamById(string_table_index) orelse - return error.MissingDebugInfo; - } - - pub fn getSymbolName(self: *Pdb, module: *Module, address: u64) ?[]const u8 { - _ = self; - std.debug.assert(module.populated); - - var symbol_i: usize = 0; - while (symbol_i != module.symbols.len) { - const prefix = @as(*align(1) RecordPrefix, @ptrCast(&module.symbols[symbol_i])); - if (prefix.RecordLen < 2) - return null; - switch (prefix.RecordKind) { - .S_LPROC32, .S_GPROC32 => { - const proc_sym = @as(*align(1) ProcSym, @ptrCast(&module.symbols[symbol_i + @sizeOf(RecordPrefix)])); - if (address >= proc_sym.CodeOffset and address < proc_sym.CodeOffset + proc_sym.CodeSize) { - return mem.sliceTo(@as([*:0]u8, @ptrCast(&proc_sym.Name[0])), 0); - } - }, - else => {}, - } - symbol_i += prefix.RecordLen + @sizeOf(u16); - } - - return null; - } - - pub fn getLineNumberInfo(self: *Pdb, module: *Module, address: u64) !debug.Info.SourceLocation { - std.debug.assert(module.populated); - const subsect_info = module.subsect_info; - - var sect_offset: usize = 0; - var skip_len: usize = undefined; - const checksum_offset = module.checksum_offset orelse return error.MissingDebugInfo; - while (sect_offset != subsect_info.len) : (sect_offset += skip_len) { - const subsect_hdr = @as(*align(1) DebugSubsectionHeader, @ptrCast(&subsect_info[sect_offset])); - skip_len = subsect_hdr.Length; - sect_offset += @sizeOf(DebugSubsectionHeader); - - switch (subsect_hdr.Kind) { - .Lines => { - var line_index = sect_offset; - - const line_hdr = @as(*align(1) LineFragmentHeader, @ptrCast(&subsect_info[line_index])); - if (line_hdr.RelocSegment == 0) - return error.MissingDebugInfo; - line_index += @sizeOf(LineFragmentHeader); - const frag_vaddr_start = line_hdr.RelocOffset; - const frag_vaddr_end = frag_vaddr_start + line_hdr.CodeSize; - - if (address >= frag_vaddr_start and address < frag_vaddr_end) { - // There is an unknown number of LineBlockFragmentHeaders (and their accompanying line and column records) - // from now on. We will iterate through them, and eventually find a SourceLocation that we're interested in, - // breaking out to :subsections. If not, we will make sure to not read anything outside of this subsection. - const subsection_end_index = sect_offset + subsect_hdr.Length; - - while (line_index < subsection_end_index) { - const block_hdr = @as(*align(1) LineBlockFragmentHeader, @ptrCast(&subsect_info[line_index])); - line_index += @sizeOf(LineBlockFragmentHeader); - const start_line_index = line_index; - - const has_column = line_hdr.Flags.LF_HaveColumns; - - // All line entries are stored inside their line block by ascending start address. - // Heuristic: we want to find the last line entry - // that has a vaddr_start <= address. - // This is done with a simple linear search. - var line_i: u32 = 0; - while (line_i < block_hdr.NumLines) : (line_i += 1) { - const line_num_entry = @as(*align(1) LineNumberEntry, @ptrCast(&subsect_info[line_index])); - line_index += @sizeOf(LineNumberEntry); - - const vaddr_start = frag_vaddr_start + line_num_entry.Offset; - if (address < vaddr_start) { - break; - } - } - - // line_i == 0 would mean that no matching LineNumberEntry was found. - if (line_i > 0) { - const subsect_index = checksum_offset + block_hdr.NameIndex; - const chksum_hdr = @as(*align(1) FileChecksumEntryHeader, @ptrCast(&module.subsect_info[subsect_index])); - const strtab_offset = @sizeOf(PDBStringTableHeader) + chksum_hdr.FileNameOffset; - try self.string_table.?.seekTo(strtab_offset); - const source_file_name = try self.string_table.?.reader().readUntilDelimiterAlloc(self.allocator, 0, 1024); - - const line_entry_idx = line_i - 1; - - const column = if (has_column) blk: { - const start_col_index = start_line_index + @sizeOf(LineNumberEntry) * block_hdr.NumLines; - const col_index = start_col_index + @sizeOf(ColumnNumberEntry) * line_entry_idx; - const col_num_entry = @as(*align(1) ColumnNumberEntry, @ptrCast(&subsect_info[col_index])); - break :blk col_num_entry.StartColumn; - } else 0; - - const found_line_index = start_line_index + line_entry_idx * @sizeOf(LineNumberEntry); - const line_num_entry: *align(1) LineNumberEntry = @ptrCast(&subsect_info[found_line_index]); - const flags: *align(1) LineNumberEntry.Flags = @ptrCast(&line_num_entry.Flags); - - return debug.Info.SourceLocation{ - .file_name = source_file_name, - .line = flags.Start, - .column = column, - }; - } - } - - // Checking that we are not reading garbage after the (possibly) multiple block fragments. - if (line_index != subsection_end_index) { - return error.InvalidDebugInfo; - } - } - }, - else => {}, - } - - if (sect_offset > subsect_info.len) - return error.InvalidDebugInfo; - } - - return error.MissingDebugInfo; - } - - pub fn getModule(self: *Pdb, index: usize) !?*Module { - if (index >= self.modules.len) - return null; - - const mod = &self.modules[index]; - if (mod.populated) - return mod; - - // At most one can be non-zero. - if (mod.mod_info.C11ByteSize != 0 and mod.mod_info.C13ByteSize != 0) - return error.InvalidDebugInfo; - if (mod.mod_info.C13ByteSize == 0) - return error.InvalidDebugInfo; - - const stream = self.getStreamById(mod.mod_info.ModuleSymStream) orelse - return error.MissingDebugInfo; - const reader = stream.reader(); - - const signature = try reader.readInt(u32, .little); - if (signature != 4) - return error.InvalidDebugInfo; - - mod.symbols = try self.allocator.alloc(u8, mod.mod_info.SymByteSize - 4); - errdefer self.allocator.free(mod.symbols); - try reader.readNoEof(mod.symbols); - - mod.subsect_info = try self.allocator.alloc(u8, mod.mod_info.C13ByteSize); - errdefer self.allocator.free(mod.subsect_info); - try reader.readNoEof(mod.subsect_info); - - var sect_offset: usize = 0; - var skip_len: usize = undefined; - while (sect_offset != mod.subsect_info.len) : (sect_offset += skip_len) { - const subsect_hdr = @as(*align(1) DebugSubsectionHeader, @ptrCast(&mod.subsect_info[sect_offset])); - skip_len = subsect_hdr.Length; - sect_offset += @sizeOf(DebugSubsectionHeader); - - switch (subsect_hdr.Kind) { - .FileChecksums => { - mod.checksum_offset = sect_offset; - break; - }, - else => {}, - } - - if (sect_offset > mod.subsect_info.len) - return error.InvalidDebugInfo; - } - - mod.populated = true; - return mod; - } - - pub fn getStreamById(self: *Pdb, id: u32) ?*MsfStream { - if (id >= self.msf.streams.len) - return null; - return &self.msf.streams[id]; - } - - pub fn getStream(self: *Pdb, stream: StreamType) ?*MsfStream { - const id = @intFromEnum(stream); - return self.getStreamById(id); - } -}; - -// see https://llvm.org/docs/PDB/MsfFile.html -const Msf = struct { - directory: MsfStream, - streams: []MsfStream, - - fn init(allocator: mem.Allocator, file: File) !Msf { - const in = file.reader(); - - const superblock = try in.readStruct(SuperBlock); - - // Sanity checks - if (!mem.eql(u8, &superblock.FileMagic, SuperBlock.file_magic)) - return error.InvalidDebugInfo; - if (superblock.FreeBlockMapBlock != 1 and superblock.FreeBlockMapBlock != 2) - return error.InvalidDebugInfo; - const file_len = try file.getEndPos(); - if (superblock.NumBlocks * superblock.BlockSize != file_len) - return error.InvalidDebugInfo; - switch (superblock.BlockSize) { - // llvm only supports 4096 but we can handle any of these values - 512, 1024, 2048, 4096 => {}, - else => return error.InvalidDebugInfo, - } - - const dir_block_count = blockCountFromSize(superblock.NumDirectoryBytes, superblock.BlockSize); - if (dir_block_count > superblock.BlockSize / @sizeOf(u32)) - return error.UnhandledBigDirectoryStream; // cf. BlockMapAddr comment. - - try file.seekTo(superblock.BlockSize * superblock.BlockMapAddr); - const dir_blocks = try allocator.alloc(u32, dir_block_count); - for (dir_blocks) |*b| { - b.* = try in.readInt(u32, .little); - } - var directory = MsfStream.init( - superblock.BlockSize, - file, - dir_blocks, - ); - - const begin = directory.pos; - const stream_count = try directory.reader().readInt(u32, .little); - const stream_sizes = try allocator.alloc(u32, stream_count); - defer allocator.free(stream_sizes); - - // Microsoft's implementation uses @as(u32, -1) for inexistent streams. - // These streams are not used, but still participate in the file - // and must be taken into account when resolving stream indices. - const Nil = 0xFFFFFFFF; - for (stream_sizes) |*s| { - const size = try directory.reader().readInt(u32, .little); - s.* = if (size == Nil) 0 else blockCountFromSize(size, superblock.BlockSize); - } - - const streams = try allocator.alloc(MsfStream, stream_count); - for (streams, 0..) |*stream, i| { - const size = stream_sizes[i]; - if (size == 0) { - stream.* = MsfStream{ - .blocks = &[_]u32{}, - }; - } else { - var blocks = try allocator.alloc(u32, size); - var j: u32 = 0; - while (j < size) : (j += 1) { - const block_id = try directory.reader().readInt(u32, .little); - const n = (block_id % superblock.BlockSize); - // 0 is for SuperBlock, 1 and 2 for FPMs. - if (block_id == 0 or n == 1 or n == 2 or block_id * superblock.BlockSize > file_len) - return error.InvalidBlockIndex; - blocks[j] = block_id; - } - - stream.* = MsfStream.init( - superblock.BlockSize, - file, - blocks, - ); - } - } - - const end = directory.pos; - if (end - begin != superblock.NumDirectoryBytes) - return error.InvalidStreamDirectory; - - return Msf{ - .directory = directory, - .streams = streams, - }; - } - - fn deinit(self: *Msf, allocator: mem.Allocator) void { - allocator.free(self.directory.blocks); - for (self.streams) |*stream| { - allocator.free(stream.blocks); - } - allocator.free(self.streams); - } -}; - -fn blockCountFromSize(size: u32, block_size: u32) u32 { - return (size + block_size - 1) / block_size; -} - // https://llvm.org/docs/PDB/MsfFile.html#the-superblock pub const SuperBlock = extern struct { /// The LLVM docs list a space between C / C++ but empirically this is not the case. @@ -1016,82 +510,3 @@ pub const SuperBlock = extern struct { // implement it so we're kind of safe making this assumption for now. BlockMapAddr: u32, }; - -const MsfStream = struct { - in_file: File = undefined, - pos: u64 = undefined, - blocks: []u32 = undefined, - block_size: u32 = undefined, - - pub const Error = @typeInfo(@typeInfo(@TypeOf(read)).Fn.return_type.?).ErrorUnion.error_set; - - fn init(block_size: u32, file: File, blocks: []u32) MsfStream { - const stream = MsfStream{ - .in_file = file, - .pos = 0, - .blocks = blocks, - .block_size = block_size, - }; - - return stream; - } - - fn read(self: *MsfStream, buffer: []u8) !usize { - var block_id = @as(usize, @intCast(self.pos / self.block_size)); - if (block_id >= self.blocks.len) return 0; // End of Stream - var block = self.blocks[block_id]; - var offset = self.pos % self.block_size; - - try self.in_file.seekTo(block * self.block_size + offset); - const in = self.in_file.reader(); - - var size: usize = 0; - var rem_buffer = buffer; - while (size < buffer.len) { - const size_to_read = @min(self.block_size - offset, rem_buffer.len); - size += try in.read(rem_buffer[0..size_to_read]); - rem_buffer = buffer[size..]; - offset += size_to_read; - - // If we're at the end of a block, go to the next one. - if (offset == self.block_size) { - offset = 0; - block_id += 1; - if (block_id >= self.blocks.len) break; // End of Stream - block = self.blocks[block_id]; - try self.in_file.seekTo(block * self.block_size); - } - } - - self.pos += buffer.len; - return buffer.len; - } - - pub fn seekBy(self: *MsfStream, len: i64) !void { - self.pos = @as(u64, @intCast(@as(i64, @intCast(self.pos)) + len)); - if (self.pos >= self.blocks.len * self.block_size) - return error.EOF; - } - - pub fn seekTo(self: *MsfStream, len: u64) !void { - self.pos = len; - if (self.pos >= self.blocks.len * self.block_size) - return error.EOF; - } - - fn getSize(self: *const MsfStream) u64 { - return self.blocks.len * self.block_size; - } - - fn getFilePos(self: MsfStream) u64 { - const block_id = self.pos / self.block_size; - const block = self.blocks[block_id]; - const offset = self.pos % self.block_size; - - return block * self.block_size + offset; - } - - pub fn reader(self: *MsfStream) std.io.Reader(*MsfStream, Error, read) { - return .{ .context = self }; - } -};