add CommonInformationEntry parser

This commit is contained in:
kcbanner 2023-05-04 01:35:43 -04:00
parent ea9917d9bd
commit f6148f123e
3 changed files with 244 additions and 126 deletions

View File

@ -800,52 +800,20 @@ fn readCoffDebugInfo(allocator: mem.Allocator, coff_bytes: []const u8) !ModuleDe
// This coff file has embedded DWARF debug info
_ = sec;
const debug_info = coff_obj.getSectionDataAlloc(".debug_info", allocator) catch return error.MissingDebugInfo;
errdefer allocator.free(debug_info);
const debug_abbrev = coff_obj.getSectionDataAlloc(".debug_abbrev", allocator) catch return error.MissingDebugInfo;
errdefer allocator.free(debug_abbrev);
const debug_str = coff_obj.getSectionDataAlloc(".debug_str", allocator) catch return error.MissingDebugInfo;
errdefer allocator.free(debug_str);
const debug_line = coff_obj.getSectionDataAlloc(".debug_line", allocator) catch return error.MissingDebugInfo;
errdefer allocator.free(debug_line);
const num_sections = std.enums.directEnumArrayLen(DW.DwarfSection, 0);
var sections: [num_sections]?[]const u8 = [_]?[]const u8{null} ** num_sections;
errdefer for (sections) |section| if (section) |s| allocator.free(s);
const debug_str_offsets = coff_obj.getSectionDataAlloc(".debug_str_offsets", allocator) catch null;
const debug_line_str = coff_obj.getSectionDataAlloc(".debug_line_str", allocator) catch null;
const debug_ranges = coff_obj.getSectionDataAlloc(".debug_ranges", allocator) catch null;
const debug_loclists = coff_obj.getSectionDataAlloc(".debug_loclists", allocator) catch null;
const debug_rnglists = coff_obj.getSectionDataAlloc(".debug_rnglists", allocator) catch null;
const debug_addr = coff_obj.getSectionDataAlloc(".debug_addr", allocator) catch null;
const debug_names = coff_obj.getSectionDataAlloc(".debug_names", allocator) catch null;
const debug_frame = coff_obj.getSectionDataAlloc(".debug_frame", allocator) catch null;
inline for (@typeInfo(DW.DwarfSection).Enum.fields, 0..) |section, i| {
sections[i] = try coff_obj.getSectionDataAlloc("." ++ section.name, allocator);
}
var dwarf = DW.DwarfInfo{
.endian = native_endian,
.debug_info = debug_info,
.debug_abbrev = debug_abbrev,
.debug_str = debug_str,
.debug_str_offsets = debug_str_offsets,
.debug_line = debug_line,
.debug_line_str = debug_line_str,
.debug_ranges = debug_ranges,
.debug_loclists = debug_loclists,
.debug_rnglists = debug_rnglists,
.debug_addr = debug_addr,
.debug_names = debug_names,
.debug_frame = debug_frame,
};
DW.openDwarfDebugInfo(&dwarf, allocator) catch |err| {
if (debug_str_offsets) |d| allocator.free(d);
if (debug_line_str) |d| allocator.free(d);
if (debug_ranges) |d| allocator.free(d);
if (debug_loclists) |d| allocator.free(d);
if (debug_rnglists) |d| allocator.free(d);
if (debug_addr) |d| allocator.free(d);
if (debug_names) |d| allocator.free(d);
if (debug_frame) |d| allocator.free(d);
return err;
.sections = sections,
};
try DW.openDwarfDebugInfo(&dwarf, allocator);
di.debug_data = PdbOrDwarf{ .dwarf = dwarf };
return di;
}
@ -901,7 +869,7 @@ pub fn readElfDebugInfo(
};
const mapped_mem = try mapWholeFile(elf_file);
if (expected_crc) |crc| if (crc != std.hash.crc.Crc32SmallWithPoly(.IEEE).hash(mapped_mem)) return error.MissingDebugInfo;
if (expected_crc) |crc| if (crc != std.hash.crc.Crc32SmallWithPoly(.IEEE).hash(mapped_mem)) return error.InvalidDebugInfo;
const hdr: *const elf.Ehdr = @ptrCast(&mapped_mem[0]);
if (!mem.eql(u8, hdr.e_ident[0..4], elf.MAGIC)) return error.InvalidElfMagic;
@ -916,36 +884,23 @@ pub fn readElfDebugInfo(
const shoff = hdr.e_shoff;
const str_section_off = shoff + @as(u64, hdr.e_shentsize) * @as(u64, hdr.e_shstrndx);
const str_shdr: *const elf.Shdr = @ptrCast(@alignCast(
&mapped_mem[math.cast(usize, str_section_off) orelse return error.Overflow],
));
const header_strings = mapped_mem[str_shdr.sh_offset .. str_shdr.sh_offset + str_shdr.sh_size];
const str_shdr: *const elf.Shdr = @ptrCast(@alignCast(&mapped_mem[math.cast(usize, str_section_off) orelse return error.Overflow]));
const header_strings = mapped_mem[str_shdr.sh_offset..][0..str_shdr.sh_size];
const shdrs = @as(
[*]const elf.Shdr,
@ptrCast(@alignCast(&mapped_mem[shoff])),
)[0..hdr.e_shnum];
var opt_debug_info: ?[]const u8 = null;
var opt_debug_abbrev: ?[]const u8 = null;
var opt_debug_str: ?[]const u8 = null;
var opt_debug_str_offsets: ?[]const u8 = null;
var opt_debug_line: ?[]const u8 = null;
var opt_debug_line_str: ?[]const u8 = null;
var opt_debug_ranges: ?[]const u8 = null;
var opt_debug_loclists: ?[]const u8 = null;
var opt_debug_rnglists: ?[]const u8 = null;
var opt_debug_addr: ?[]const u8 = null;
var opt_debug_names: ?[]const u8 = null;
var opt_debug_frame: ?[]const u8 = null;
var owned_sections: [ModuleDebugInfo.num_sections][]const u8 = [_][]const u8{&.{}} ** ModuleDebugInfo.num_sections;
const num_sections = std.enums.directEnumArrayLen(DW.DwarfSection, 0);
var sections: [num_sections]?[]const u8 = [_]?[]const u8{null} ** num_sections;
var owned_sections: [num_sections][]const u8 = [_][]const u8{&.{}} ** num_sections;
errdefer for (owned_sections) |section| allocator.free(section);
var separate_debug_filename: ?[]const u8 = null;
var separate_debug_crc: ?u32 = null;
for (shdrs) |*shdr| {
if (shdr.sh_type == elf.SHT_NULL) continue;
if (shdr.sh_type == elf.SHT_NULL or shdr.sh_type == elf.SHT_NOBITS) continue;
const name = mem.sliceTo(header_strings[shdr.sh_name..], 0);
if (mem.eql(u8, name, ".gnu_debuglink")) {
@ -958,26 +913,11 @@ pub fn readElfDebugInfo(
continue;
}
const sections = [_]struct { name: []const u8, out: *?[]const u8 }{
.{ .name = ".debug_info", .out = &opt_debug_info },
.{ .name = ".debug_abbrev", .out = &opt_debug_abbrev },
.{ .name = ".debug_str", .out = &opt_debug_str },
.{ .name = ".debug_str_offsets", .out = &opt_debug_str_offsets },
.{ .name = ".debug_line", .out = &opt_debug_line },
.{ .name = ".debug_line_str", .out = &opt_debug_line_str },
.{ .name = ".debug_ranges", .out = &opt_debug_ranges },
.{ .name = ".debug_loclists", .out = &opt_debug_loclists },
.{ .name = ".debug_rnglists", .out = &opt_debug_rnglists },
.{ .name = ".debug_addr", .out = &opt_debug_addr },
.{ .name = ".debug_names", .out = &opt_debug_names },
.{ .name = ".debug_frame", .out = &opt_debug_frame },
};
var section_index = for (sections, 0..) |section, i| {
if (mem.eql(u8, section.name, name)) {
break i;
}
} else continue;
var section_index: ?usize = null;
inline for (@typeInfo(DW.DwarfSection).Enum.fields, 0..) |section, i| {
if (mem.eql(u8, "." ++ section.name, name)) section_index = i;
}
if (section_index == null) continue;
const section_bytes = try chopSlice(mapped_mem, shdr.sh_offset, shdr.sh_size);
if ((shdr.sh_flags & elf.SHF_COMPRESSED) > 0) {
@ -997,25 +937,24 @@ pub fn readElfDebugInfo(
const read = zlib_stream.reader().readAll(decompressed_section) catch continue;
assert(read == decompressed_section.len);
sections[section_index].out.* = decompressed_section;
owned_sections[section_index] = decompressed_section;
sections[section_index.?] = decompressed_section;
owned_sections[section_index.?] = decompressed_section;
} else {
sections[section_index].out.* = section_bytes;
}
sections[section_index.?] = section_bytes;
}
}
const missing_debug_info =
opt_debug_info == null or
opt_debug_abbrev == null or
opt_debug_str == null or
opt_debug_line == null;
sections[@enumToInt(DW.DwarfSection.debug_info)] == null or
sections[@enumToInt(DW.DwarfSection.debug_abbrev)] == null or
sections[@enumToInt(DW.DwarfSection.debug_str)] == null or
sections[@enumToInt(DW.DwarfSection.debug_line)] == null;
// Attempt to load debug info from an external file
// See: https://sourceware.org/gdb/onlinedocs/gdb/Separate-Debug-Files.html
if (missing_debug_info) {
const global_debug_directories = [_][]const u8{
"/usr/lib/debug",
// TODO: Determine the set of directories used by most distros for this path (check GDB sources)
};
// <global debug directory>/.build-id/<2-character id prefix>/<id remainder>.debug
@ -1074,18 +1013,7 @@ pub fn readElfDebugInfo(
var di = DW.DwarfInfo{
.endian = endian,
.debug_info = opt_debug_info.?,
.debug_abbrev = opt_debug_abbrev.?,
.debug_str = opt_debug_str.?,
.debug_str_offsets = opt_debug_str_offsets,
.debug_line = opt_debug_line.?,
.debug_line_str = opt_debug_line_str,
.debug_ranges = opt_debug_ranges,
.debug_loclists = opt_debug_loclists,
.debug_rnglists = opt_debug_rnglists,
.debug_addr = opt_debug_addr,
.debug_names = opt_debug_names,
.debug_frame = opt_debug_frame,
.sections = sections,
};
try DW.openDwarfDebugInfo(&di, allocator);
@ -1882,7 +1810,7 @@ pub const ModuleDebugInfo = switch (native_os) {
mapped_memory: []align(mem.page_size) const u8,
owned_sections: [num_sections][]const u8 = [_][]const u8{&.{}} ** num_sections,
const num_sections = 12;
const num_sections = 14;
fn deinit(self: *@This(), allocator: mem.Allocator) void {
self.dwarf.deinit(allocator);
@ -1916,7 +1844,7 @@ fn getSymbolFromDwarf(allocator: mem.Allocator, address: u64, di: *DW.DwarfInfo)
if (nosuspend di.findCompileUnit(address)) |compile_unit| {
return SymbolInfo{
.symbol_name = nosuspend di.getSymbolName(address) orelse "???",
.compile_unit_name = compile_unit.die.getAttrString(di, DW.AT.name, di.debug_str, compile_unit.*) catch |err| switch (err) {
.compile_unit_name = compile_unit.die.getAttrString(di, DW.AT.name, di.section(.debug_str), compile_unit.*) catch |err| switch (err) {
error.MissingDebugInfo, error.InvalidDebugInfo => "???",
},
.line_info = nosuspend di.getLineNumberInfo(allocator, compile_unit.*, address) catch |err| switch (err) {

View File

@ -13,6 +13,7 @@ pub const OP = @import("dwarf/OP.zig");
pub const LANG = @import("dwarf/LANG.zig");
pub const FORM = @import("dwarf/FORM.zig");
pub const ATE = @import("dwarf/ATE.zig");
pub const EH = @import("dwarf/EH.zig");
pub const LLE = struct {
pub const end_of_list = 0x00;
@ -337,7 +338,7 @@ const Die = struct {
FormValue.String => |value| return value,
FormValue.StrPtr => |offset| return di.getString(offset),
FormValue.StrOffset => |index| {
const debug_str_offsets = di.debug_str_offsets orelse return badDwarf();
const debug_str_offsets = di.section(.debug_str_offsets) orelse return badDwarf();
if (compile_unit.str_offsets_base == 0) return badDwarf();
if (compile_unit.is_64) {
const byte_offset = compile_unit.str_offsets_base + 8 * index;
@ -642,26 +643,36 @@ fn getAbbrevTableEntry(abbrev_table: *const AbbrevTable, abbrev_code: u64) ?*con
return null;
}
pub const DwarfSection = enum {
debug_info,
debug_abbrev,
debug_str,
debug_str_offsets,
debug_line,
debug_line_str,
debug_ranges,
debug_loclists,
debug_rnglists,
debug_addr,
debug_names,
debug_frame,
eh_frame,
eh_frame_hdr,
};
pub const DwarfInfo = struct {
endian: std.builtin.Endian,
// No memory is owned by the DwarfInfo
debug_info: []const u8,
debug_abbrev: []const u8,
debug_str: []const u8,
debug_str_offsets: ?[]const u8,
debug_line: []const u8,
debug_line_str: ?[]const u8,
debug_ranges: ?[]const u8,
debug_loclists: ?[]const u8,
debug_rnglists: ?[]const u8,
debug_addr: ?[]const u8,
debug_names: ?[]const u8,
debug_frame: ?[]const u8,
sections: [std.enums.directEnumArrayLen(DwarfSection, 0)]?[]const u8,
// Filled later by the initializer
abbrev_table_list: std.ArrayListUnmanaged(AbbrevTableHeader) = .{},
compile_unit_list: std.ArrayListUnmanaged(CompileUnit) = .{},
func_list: std.ArrayListUnmanaged(Func) = .{},
pub fn section(di: DwarfInfo, dwarf_section: DwarfSection) ?[]const u8 {
return di.sections[@enumToInt(dwarf_section)];
}
pub fn deinit(di: *DwarfInfo, allocator: mem.Allocator) void {
for (di.abbrev_table_list.items) |*abbrev| {
abbrev.deinit();
@ -691,7 +702,7 @@ pub const DwarfInfo = struct {
}
fn scanAllFunctions(di: *DwarfInfo, allocator: mem.Allocator) !void {
var stream = io.fixedBufferStream(di.debug_info);
var stream = io.fixedBufferStream(di.section(.debug_info).?);
const in = stream.reader();
const seekable = &stream.seekableStream();
var this_unit_offset: u64 = 0;
@ -764,7 +775,7 @@ pub const DwarfInfo = struct {
// Prevent endless loops
while (depth > 0) : (depth -= 1) {
if (this_die_obj.getAttr(AT.name)) |_| {
const name = try this_die_obj.getAttrString(di, AT.name, di.debug_str, compile_unit);
const name = try this_die_obj.getAttrString(di, AT.name, di.section(.debug_str), compile_unit);
break :x try allocator.dupe(u8, name);
} else if (this_die_obj.getAttr(AT.abstract_origin)) |_| {
// Follow the DIE it points to and repeat
@ -836,7 +847,7 @@ pub const DwarfInfo = struct {
}
fn scanAllCompileUnits(di: *DwarfInfo, allocator: mem.Allocator) !void {
var stream = io.fixedBufferStream(di.debug_info);
var stream = io.fixedBufferStream(di.section(.debug_info).?);
const in = &stream.reader();
const seekable = &stream.seekableStream();
var this_unit_offset: u64 = 0;
@ -930,7 +941,7 @@ pub const DwarfInfo = struct {
if (target_address >= range.start and target_address < range.end) return compile_unit;
}
const opt_debug_ranges = if (compile_unit.version >= 5) di.debug_rnglists else di.debug_ranges;
const opt_debug_ranges = if (compile_unit.version >= 5) di.section(.debug_rnglists) else di.section(.debug_ranges);
const debug_ranges = opt_debug_ranges orelse continue;
const ranges_val = compile_unit.die.getAttr(AT.ranges) orelse continue;
@ -1065,7 +1076,7 @@ pub const DwarfInfo = struct {
}
fn parseAbbrevTable(di: *DwarfInfo, allocator: mem.Allocator, offset: u64) !AbbrevTable {
var stream = io.fixedBufferStream(di.debug_abbrev);
var stream = io.fixedBufferStream(di.section(.debug_abbrev).?);
const in = &stream.reader();
const seekable = &stream.seekableStream();
@ -1146,11 +1157,11 @@ pub const DwarfInfo = struct {
compile_unit: CompileUnit,
target_address: u64,
) !debug.LineInfo {
var stream = io.fixedBufferStream(di.debug_line);
var stream = io.fixedBufferStream(di.section(.debug_line).?);
const in = &stream.reader();
const seekable = &stream.seekableStream();
const compile_unit_cwd = try compile_unit.die.getAttrString(di, AT.comp_dir, di.debug_line_str, compile_unit);
const compile_unit_cwd = try compile_unit.die.getAttrString(di, AT.comp_dir, di.section(.debug_line_str), compile_unit);
const line_info_offset = try compile_unit.die.getAttrSecOffset(AT.stmt_list);
try seekable.seekTo(line_info_offset);
@ -1416,15 +1427,15 @@ pub const DwarfInfo = struct {
}
fn getString(di: DwarfInfo, offset: u64) ![]const u8 {
return getStringGeneric(di.debug_str, offset);
return getStringGeneric(di.section(.debug_str), offset);
}
fn getLineString(di: DwarfInfo, offset: u64) ![]const u8 {
return getStringGeneric(di.debug_line_str, offset);
return getStringGeneric(di.section(.debug_line_str), offset);
}
fn readDebugAddr(di: DwarfInfo, compile_unit: CompileUnit, index: u64) !u64 {
const debug_addr = di.debug_addr orelse return badDwarf();
const debug_addr = di.section(.debug_addr) orelse return badDwarf();
// addr_base points to the first item after the header, however we
// need to read the header to know the size of each item. Empirically,
@ -1455,6 +1466,12 @@ pub const DwarfInfo = struct {
pub fn openDwarfDebugInfo(di: *DwarfInfo, allocator: mem.Allocator) !void {
try di.scanAllFunctions(allocator);
try di.scanAllCompileUnits(allocator);
// DEBUG
if (di.section(.eh_frame)) |eh_frame| {
_ = try CommonInformationEntry.parse(eh_frame, 8, .Little);
}
}
/// This function is to make it handy to comment out the return and make it
@ -1477,3 +1494,157 @@ fn getStringGeneric(opt_str: ?[]const u8, offset: u64) ![:0]const u8 {
const last = mem.indexOfScalarPos(u8, str, casted_offset, 0) orelse return badDwarf();
return str[casted_offset..last :0];
}
const EhPointer = struct {
value: union(enum) {
signed: i64,
unsigned: u64,
},
relative_to: u8,
// address of the encoded value
pc: u64,
// TODO: Function to resolve the value given input state (.text start, .eh_frame_hdr start, functions start)
};
fn readEhPointer(enc: u8, pc: usize, addr_size_bytes: u8, endian: std.builtin.Endian, reader: anytype) !?EhPointer {
if (enc == EH.PE.omit) return null;
return EhPointer{
.value = switch (enc & 0x0f) {
EH.PE.absptr => .{ .unsigned = switch (addr_size_bytes) {
2 => try reader.readInt(u16, endian),
4 => try reader.readInt(u32, endian),
8 => try reader.readInt(u64, endian),
else => return error.InvalidAddrSize,
} },
EH.PE.uleb128 => .{ .unsigned = try leb.readULEB128(u64, reader) },
EH.PE.udata2 => .{ .unsigned = try reader.readInt(u16, endian) },
EH.PE.udata4 => .{ .unsigned = try reader.readInt(u32, endian) },
EH.PE.udata8 => .{ .unsigned = try reader.readInt(u64, endian) },
EH.PE.sleb128 => .{ .signed = try leb.readILEB128(i64, reader) },
EH.PE.sdata2 => .{ .signed = try reader.readInt(i16, endian) },
EH.PE.sdata4 => .{ .signed = try reader.readInt(i32, endian) },
EH.PE.sdata8 => .{ .signed = try reader.readInt(i64, endian) },
else => return badDwarf(),
},
.relative_to = enc & 0xf0,
.pc = pc
};
}
const CommonInformationEntry = struct {
length: u32,
id: u32,
version: u8,
code_alignment_factor: u64,
data_alignment_factor: u64,
return_address_register: u64,
// Augmented data
lsda_pointer_enc: ?u8,
personality_routine_pointer: ?EhPointer,
fde_pointer_enc: ?u8,
initial_instructions: []const u8,
// The returned struct references memory in `bytes`.
pub fn parse(bytes: []const u8, addr_size_bytes: u8, endian: std.builtin.Endian) !CommonInformationEntry {
if (addr_size_bytes > 8) return error.InvalidAddrSize;
if (bytes.len < 4) return badDwarf();
const length = mem.readInt(u32, bytes[0..4], endian);
const cie_bytes = bytes[4..][0..length];
var stream = io.fixedBufferStream(cie_bytes);
const reader = stream.reader();
const id = try reader.readInt(u32, endian);
if (id != 0) return badDwarf();
const version = try reader.readByte();
if (version != 1) return badDwarf();
var has_eh_data = false;
var has_aug_data = false;
var aug_str_len: usize = 0;
var aug_str_start = stream.pos;
var aug_byte = try reader.readByte();
while (aug_byte != 0) : (aug_byte = try reader.readByte()) {
switch (aug_byte) {
'z' => {
if (aug_str_len != 0) return badDwarf();
has_aug_data = true;
aug_str_start = stream.pos;
},
'e' => {
if (has_aug_data or aug_str_len != 0) return badDwarf();
if (try reader.readByte() != 'h') return badDwarf();
has_eh_data = true;
},
else => {
if (has_eh_data) return badDwarf();
aug_str_len += 1;
},
}
}
if (has_eh_data) {
// legacy data created by older versions of gcc - ignored here
for (0..addr_size_bytes) |_| _ = try reader.readByte();
}
const code_alignment_factor = try leb.readULEB128(u64, reader);
const data_alignment_factor = try leb.readULEB128(u64, reader);
const return_address_register = try leb.readULEB128(u64, reader);
var lsda_pointer_enc: ?u8 = null;
var personality_routine_pointer: ?EhPointer = null;
var fde_pointer_enc: ?u8 = null;
if (has_aug_data) {
const aug_data_len = try leb.readULEB128(usize, reader);
const aug_data_start = stream.pos;
const aug_str = cie_bytes[aug_str_start..][0..aug_str_len];
for (aug_str) |byte| {
switch (byte) {
'L' => {
lsda_pointer_enc = try reader.readByte();
},
'P' => {
const personality_enc = try reader.readByte();
personality_routine_pointer = try readEhPointer(
personality_enc,
@ptrToInt(&cie_bytes[stream.pos]),
addr_size_bytes,
endian,
reader,
);
},
'R' => {
fde_pointer_enc = try reader.readByte();
},
else => return badDwarf(),
}
}
// verify length field
if (stream.pos != (aug_data_start + aug_data_len)) return badDwarf();
}
const initial_instructions = cie_bytes[stream.pos..];
return .{
.length = length,
.id = id,
.version = version,
.code_alignment_factor = code_alignment_factor,
.data_alignment_factor = data_alignment_factor,
.return_address_register = return_address_register,
.lsda_pointer_enc = lsda_pointer_enc,
.personality_routine_pointer = personality_routine_pointer,
.fde_pointer_enc = fde_pointer_enc,
.initial_instructions = initial_instructions,
};
}
};

19
lib/std/dwarf/EH.zig Normal file
View File

@ -0,0 +1,19 @@
pub const PE = struct {
pub const absptr = 0x00;
pub const uleb128 = 0x01;
pub const udata2 = 0x02;
pub const udata4 = 0x03;
pub const udata8 = 0x04;
pub const sleb128 = 0x09;
pub const sdata2 = 0x0A;
pub const sdata4 = 0x0B;
pub const sdata8 = 0x0C;
pub const pcrel = 0x10;
pub const textrel = 0x20;
pub const datarel = 0x30;
pub const funcrel = 0x40;
pub const aligned = 0x50;
pub const omit = 0xff;
};