Merge pull request #21720 from kubkon/macho-dwarf-v5

macho: add basic handling of DWARFv5
This commit is contained in:
Andrew Kelley 2024-12-03 02:28:22 -05:00 committed by GitHub
commit 4e09e363cd
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 520 additions and 457 deletions

View File

@ -611,6 +611,7 @@ set(ZIG_STAGE2_SOURCES
src/link/MachO/Atom.zig
src/link/MachO/CodeSignature.zig
src/link/MachO/DebugSymbols.zig
src/link/MachO/Dwarf.zig
src/link/MachO/Dylib.zig
src/link/MachO/InternalObject.zig
src/link/MachO/Object.zig
@ -622,7 +623,6 @@ set(ZIG_STAGE2_SOURCES
src/link/MachO/dyld_info/Rebase.zig
src/link/MachO/dyld_info/Trie.zig
src/link/MachO/dyld_info/bind.zig
src/link/MachO/dwarf.zig
src/link/MachO/eh_frame.zig
src/link/MachO/fat.zig
src/link/MachO/file.zig

409
src/link/MachO/Dwarf.zig Normal file
View File

@ -0,0 +1,409 @@
debug_info: []u8 = &[0]u8{},
debug_abbrev: []u8 = &[0]u8{},
debug_str: []u8 = &[0]u8{},
debug_str_offsets: []u8 = &[0]u8{},
pub fn deinit(dwarf: *Dwarf, allocator: Allocator) void {
allocator.free(dwarf.debug_info);
allocator.free(dwarf.debug_abbrev);
allocator.free(dwarf.debug_str);
allocator.free(dwarf.debug_str_offsets);
}
/// Pulls an offset into __debug_str section from a __debug_str_offs section.
/// This is new in DWARFv5 and requires the producer to specify DW_FORM_strx* (`index` arg)
/// but also DW_AT_str_offsets_base with DW_FORM_sec_offset (`base` arg) in the opening header
/// of a "referencing entity" such as DW_TAG_compile_unit.
fn getOffset(debug_str_offsets: []const u8, base: u64, index: u64, dw_fmt: DwarfFormat) error{Overflow}!u64 {
const base_as_usize = math.cast(usize, base) orelse return error.Overflow;
const index_as_usize = math.cast(usize, index) orelse return error.Overflow;
return switch (dw_fmt) {
.dwarf32 => @as(
*align(1) const u32,
@ptrCast(debug_str_offsets.ptr + base_as_usize + index_as_usize * @sizeOf(u32)),
).*,
.dwarf64 => @as(
*align(1) const u64,
@ptrCast(debug_str_offsets.ptr + base_as_usize + index_as_usize * @sizeOf(u64)),
).*,
};
}
pub const InfoReader = struct {
ctx: Dwarf,
pos: usize = 0,
fn bytes(p: InfoReader) []const u8 {
return p.ctx.debug_info;
}
pub fn readCompileUnitHeader(p: *InfoReader) !CompileUnitHeader {
var length: u64 = try p.readInt(u32);
const is_64bit = length == 0xffffffff;
if (is_64bit) {
length = try p.readInt(u64);
}
const dw_fmt: DwarfFormat = if (is_64bit) .dwarf64 else .dwarf32;
const version = try p.readInt(Version);
const rest: struct {
debug_abbrev_offset: u64,
address_size: u8,
unit_type: u8,
} = switch (version) {
4 => .{
.debug_abbrev_offset = try p.readOffset(dw_fmt),
.address_size = try p.readByte(),
.unit_type = 0,
},
5 => .{
// According to the spec, version 5 introduced .unit_type field in the header, and
// it reordered .debug_abbrev_offset with .address_size fields.
.unit_type = try p.readByte(),
.address_size = try p.readByte(),
.debug_abbrev_offset = try p.readOffset(dw_fmt),
},
else => return error.InvalidVersion,
};
return .{
.format = dw_fmt,
.length = length,
.version = version,
.debug_abbrev_offset = rest.debug_abbrev_offset,
.address_size = rest.address_size,
.unit_type = rest.unit_type,
};
}
pub fn seekToDie(p: *InfoReader, code: Code, cuh: CompileUnitHeader, abbrev_reader: *AbbrevReader) !void {
const cuh_length = math.cast(usize, cuh.length) orelse return error.Overflow;
const end_pos = p.pos + switch (cuh.format) {
.dwarf32 => @as(usize, 4),
.dwarf64 => 12,
} + cuh_length;
while (p.pos < end_pos) {
const di_code = try p.readUleb128(u64);
if (di_code == 0) return error.UnexpectedEndOfFile;
if (di_code == code) return;
while (try abbrev_reader.readAttr()) |attr| {
try p.skip(attr.form, cuh);
}
}
return error.UnexpectedEndOfFile;
}
/// When skipping attributes, we don't really need to be able to handle them all
/// since we only ever care about the DW_TAG_compile_unit.
pub fn skip(p: *InfoReader, form: Form, cuh: CompileUnitHeader) !void {
switch (form) {
dw.FORM.sec_offset,
dw.FORM.ref_addr,
=> {
_ = try p.readOffset(cuh.format);
},
dw.FORM.addr => {
_ = try p.readNBytes(cuh.address_size);
},
dw.FORM.block1,
dw.FORM.block2,
dw.FORM.block4,
dw.FORM.block,
=> {
_ = try p.readBlock(form);
},
dw.FORM.exprloc => {
_ = try p.readExprLoc();
},
dw.FORM.flag_present => {},
dw.FORM.data1,
dw.FORM.ref1,
dw.FORM.flag,
dw.FORM.data2,
dw.FORM.ref2,
dw.FORM.data4,
dw.FORM.ref4,
dw.FORM.data8,
dw.FORM.ref8,
dw.FORM.ref_sig8,
dw.FORM.udata,
dw.FORM.ref_udata,
dw.FORM.sdata,
=> {
_ = try p.readConstant(form);
},
dw.FORM.strp,
dw.FORM.string,
=> {
_ = try p.readString(form, cuh);
},
else => if (cuh.version >= 5) switch (form) {
dw.FORM.strx,
dw.FORM.strx1,
dw.FORM.strx2,
dw.FORM.strx3,
dw.FORM.strx4,
=> {
// We are just iterating over the __debug_info data, so we don't care about an actual
// string, therefore we set the `base = 0`.
_ = try p.readStringIndexed(form, cuh, 0);
},
dw.FORM.addrx,
dw.FORM.addrx1,
dw.FORM.addrx2,
dw.FORM.addrx3,
dw.FORM.addrx4,
=> {
_ = try p.readIndex(form);
},
else => return error.UnhandledForm,
} else return error.UnhandledForm,
}
}
pub fn readBlock(p: *InfoReader, form: Form) ![]const u8 {
const len: u64 = switch (form) {
dw.FORM.block1 => try p.readByte(),
dw.FORM.block2 => try p.readInt(u16),
dw.FORM.block4 => try p.readInt(u32),
dw.FORM.block => try p.readUleb128(u64),
else => unreachable,
};
return p.readNBytes(len);
}
pub fn readExprLoc(p: *InfoReader) ![]const u8 {
const len: u64 = try p.readUleb128(u64);
return p.readNBytes(len);
}
pub fn readConstant(p: *InfoReader, form: Form) !u64 {
return switch (form) {
dw.FORM.data1, dw.FORM.ref1, dw.FORM.flag => try p.readByte(),
dw.FORM.data2, dw.FORM.ref2 => try p.readInt(u16),
dw.FORM.data4, dw.FORM.ref4 => try p.readInt(u32),
dw.FORM.data8, dw.FORM.ref8, dw.FORM.ref_sig8 => try p.readInt(u64),
dw.FORM.udata, dw.FORM.ref_udata => try p.readUleb128(u64),
dw.FORM.sdata => @bitCast(try p.readIleb128(i64)),
else => return error.UnhandledConstantForm,
};
}
pub fn readIndex(p: *InfoReader, form: Form) !u64 {
return switch (form) {
dw.FORM.strx1, dw.FORM.addrx1 => try p.readByte(),
dw.FORM.strx2, dw.FORM.addrx2 => try p.readInt(u16),
dw.FORM.strx3, dw.FORM.addrx3 => error.UnhandledForm,
dw.FORM.strx4, dw.FORM.addrx4 => try p.readInt(u32),
dw.FORM.strx, dw.FORM.addrx => try p.readUleb128(u64),
else => return error.UnhandledIndexForm,
};
}
pub fn readString(p: *InfoReader, form: Form, cuh: CompileUnitHeader) ![:0]const u8 {
switch (form) {
dw.FORM.strp => {
const off = try p.readOffset(cuh.format);
const off_u = math.cast(usize, off) orelse return error.Overflow;
return mem.sliceTo(@as([*:0]const u8, @ptrCast(p.ctx.debug_str.ptr + off_u)), 0);
},
dw.FORM.string => {
const start = p.pos;
while (p.pos < p.bytes().len) : (p.pos += 1) {
if (p.bytes()[p.pos] == 0) break;
}
if (p.bytes()[p.pos] != 0) return error.UnexpectedEndOfFile;
return p.bytes()[start..p.pos :0];
},
else => unreachable,
}
}
pub fn readStringIndexed(p: *InfoReader, form: Form, cuh: CompileUnitHeader, base: u64) ![:0]const u8 {
switch (form) {
dw.FORM.strx,
dw.FORM.strx1,
dw.FORM.strx2,
dw.FORM.strx3,
dw.FORM.strx4,
=> {
const index = try p.readIndex(form);
const off = math.cast(
usize,
try getOffset(p.ctx.debug_str_offsets, base, index, cuh.format),
) orelse return error.Overflow;
return mem.sliceTo(@as([*:0]const u8, @ptrCast(p.ctx.debug_str.ptr + off)), 0);
},
else => unreachable,
}
}
pub fn readByte(p: *InfoReader) !u8 {
if (p.pos + 1 > p.bytes().len) return error.UnexpectedEndOfFile;
defer p.pos += 1;
return p.bytes()[p.pos];
}
pub fn readNBytes(p: *InfoReader, num: u64) ![]const u8 {
const num_usize = math.cast(usize, num) orelse return error.Overflow;
if (p.pos + num_usize > p.bytes().len) return error.UnexpectedEndOfFile;
defer p.pos += num_usize;
return p.bytes()[p.pos..][0..num_usize];
}
pub fn readInt(p: *InfoReader, comptime Int: type) !Int {
if (p.pos + @sizeOf(Int) > p.bytes().len) return error.UnexpectedEndOfFile;
defer p.pos += @sizeOf(Int);
return mem.readInt(Int, p.bytes()[p.pos..][0..@sizeOf(Int)], .little);
}
pub fn readOffset(p: *InfoReader, dw_fmt: DwarfFormat) !u64 {
return switch (dw_fmt) {
.dwarf32 => try p.readInt(u32),
.dwarf64 => try p.readInt(u64),
};
}
pub fn readUleb128(p: *InfoReader, comptime Type: type) !Type {
var stream = std.io.fixedBufferStream(p.bytes()[p.pos..]);
var creader = std.io.countingReader(stream.reader());
const value: Type = try leb.readUleb128(Type, creader.reader());
p.pos += math.cast(usize, creader.bytes_read) orelse return error.Overflow;
return value;
}
pub fn readIleb128(p: *InfoReader, comptime Type: type) !Type {
var stream = std.io.fixedBufferStream(p.bytes()[p.pos..]);
var creader = std.io.countingReader(stream.reader());
const value: Type = try leb.readIleb128(Type, creader.reader());
p.pos += math.cast(usize, creader.bytes_read) orelse return error.Overflow;
return value;
}
pub fn seekTo(p: *InfoReader, off: u64) !void {
p.pos = math.cast(usize, off) orelse return error.Overflow;
}
};
pub const AbbrevReader = struct {
ctx: Dwarf,
pos: usize = 0,
fn bytes(p: AbbrevReader) []const u8 {
return p.ctx.debug_abbrev;
}
pub fn hasMore(p: AbbrevReader) bool {
return p.pos < p.bytes().len;
}
pub fn readDecl(p: *AbbrevReader) !?AbbrevDecl {
const pos = p.pos;
const code = try p.readUleb128(Code);
if (code == 0) return null;
const tag = try p.readUleb128(Tag);
const has_children = (try p.readByte()) > 0;
return .{
.code = code,
.pos = pos,
.len = p.pos - pos,
.tag = tag,
.has_children = has_children,
};
}
pub fn readAttr(p: *AbbrevReader) !?AbbrevAttr {
const pos = p.pos;
const at = try p.readUleb128(At);
const form = try p.readUleb128(Form);
return if (at == 0 and form == 0) null else .{
.at = at,
.form = form,
.pos = pos,
.len = p.pos - pos,
};
}
pub fn readByte(p: *AbbrevReader) !u8 {
if (p.pos + 1 > p.bytes().len) return error.Eof;
defer p.pos += 1;
return p.bytes()[p.pos];
}
pub fn readUleb128(p: *AbbrevReader, comptime Type: type) !Type {
var stream = std.io.fixedBufferStream(p.bytes()[p.pos..]);
var creader = std.io.countingReader(stream.reader());
const value: Type = try leb.readUleb128(Type, creader.reader());
p.pos += math.cast(usize, creader.bytes_read) orelse return error.Overflow;
return value;
}
pub fn seekTo(p: *AbbrevReader, off: u64) !void {
p.pos = math.cast(usize, off) orelse return error.Overflow;
}
};
const AbbrevDecl = struct {
code: Code,
pos: usize,
len: usize,
tag: Tag,
has_children: bool,
};
const AbbrevAttr = struct {
at: At,
form: Form,
pos: usize,
len: usize,
};
const CompileUnitHeader = struct {
format: DwarfFormat,
length: u64,
version: Version,
debug_abbrev_offset: u64,
address_size: u8,
unit_type: u8,
};
const Die = struct {
pos: usize,
len: usize,
};
const DwarfFormat = enum {
dwarf32,
dwarf64,
};
const dw = std.dwarf;
const leb = std.leb;
const log = std.log.scoped(.link);
const math = std.math;
const mem = std.mem;
const std = @import("std");
const Allocator = mem.Allocator;
const Dwarf = @This();
const File = @import("file.zig").File;
const MachO = @import("../MachO.zig");
const Object = @import("Object.zig");
pub const At = u64;
pub const Code = u64;
pub const Form = u64;
pub const Tag = u64;
pub const Version = u16;
pub const AT = dw.AT;
pub const FORM = dw.FORM;
pub const TAG = dw.TAG;

View File

@ -443,11 +443,8 @@ fn initCstringLiterals(self: *Object, allocator: Allocator, file: File.Handle, m
for (slice.items(.header), 0..) |sect, n_sect| {
if (!isCstringLiteral(sect)) continue;
const sect_size = math.cast(usize, sect.size) orelse return error.Overflow;
const data = try allocator.alloc(u8, sect_size);
const data = try self.readSectionData(allocator, file, @intCast(n_sect));
defer allocator.free(data);
const amt = try file.preadAll(data, sect.offset + self.offset);
if (amt != data.len) return error.InputOutput;
var count: u32 = 0;
var start: u32 = 0;
@ -646,13 +643,10 @@ pub fn resolveLiterals(self: *Object, lp: *MachO.LiteralPool, macho_file: *MachO
}
const slice = self.sections.slice();
for (slice.items(.header), slice.items(.subsections)) |header, subs| {
for (slice.items(.header), slice.items(.subsections), 0..) |header, subs, n_sect| {
if (isCstringLiteral(header) or isFixedSizeLiteral(header)) {
const sect_size = math.cast(usize, header.size) orelse return error.Overflow;
const data = try gpa.alloc(u8, sect_size);
const data = try self.readSectionData(gpa, file, @intCast(n_sect));
defer gpa.free(data);
const amt = try file.preadAll(data, header.offset + self.offset);
if (amt != data.len) return error.InputOutput;
for (subs.items) |sub| {
const atom = self.getAtom(sub.atom).?;
@ -686,12 +680,7 @@ pub fn resolveLiterals(self: *Object, lp: *MachO.LiteralPool, macho_file: *MachO
buffer.resize(target_size) catch unreachable;
const gop = try sections_data.getOrPut(target.n_sect);
if (!gop.found_existing) {
const target_sect = slice.items(.header)[target.n_sect];
const target_sect_size = math.cast(usize, target_sect.size) orelse return error.Overflow;
const data = try gpa.alloc(u8, target_sect_size);
const amt = try file.preadAll(data, target_sect.offset + self.offset);
if (amt != data.len) return error.InputOutput;
gop.value_ptr.* = data;
gop.value_ptr.* = try self.readSectionData(gpa, file, @intCast(target.n_sect));
}
const data = gop.value_ptr.*;
const target_off = math.cast(usize, target.off) orelse return error.Overflow;
@ -1000,7 +989,7 @@ fn initRelocs(self: *Object, file: File.Handle, cpu_arch: std.Target.Cpu.Arch, m
defer tracy.end();
const slice = self.sections.slice();
for (slice.items(.header), slice.items(.relocs)) |sect, *out| {
for (slice.items(.header), slice.items(.relocs), 0..) |sect, *out, n_sect| {
if (sect.nreloc == 0) continue;
// We skip relocs for __DWARF since even in -r mode, the linker is expected to emit
// debug symbol stabs in the relocatable. This made me curious why that is. For now,
@ -1009,8 +998,8 @@ fn initRelocs(self: *Object, file: File.Handle, cpu_arch: std.Target.Cpu.Arch, m
!mem.eql(u8, sect.sectName(), "__compact_unwind")) continue;
switch (cpu_arch) {
.x86_64 => try x86_64.parseRelocs(self, sect, out, file, macho_file),
.aarch64 => try aarch64.parseRelocs(self, sect, out, file, macho_file),
.x86_64 => try x86_64.parseRelocs(self, @intCast(n_sect), sect, out, file, macho_file),
.aarch64 => try aarch64.parseRelocs(self, @intCast(n_sect), sect, out, file, macho_file),
else => unreachable,
}
@ -1146,11 +1135,8 @@ fn initUnwindRecords(self: *Object, allocator: Allocator, sect_id: u8, file: Fil
};
const header = self.sections.items(.header)[sect_id];
const size = math.cast(usize, header.size) orelse return error.Overflow;
const data = try allocator.alloc(u8, size);
const data = try self.readSectionData(allocator, file, sect_id);
defer allocator.free(data);
const amt = try file.preadAll(data, header.offset + self.offset);
if (amt != data.len) return error.InputOutput;
const nrecs = @divExact(data.len, @sizeOf(macho.compact_unwind_entry));
const recs = @as([*]align(1) const macho.compact_unwind_entry, @ptrCast(data.ptr))[0..nrecs];
@ -1359,151 +1345,106 @@ fn parseDebugInfo(self: *Object, macho_file: *MachO) !void {
defer tracy.end();
const gpa = macho_file.base.comp.gpa;
const file = macho_file.getFileHandle(self.file_handle);
var debug_info_index: ?usize = null;
var debug_abbrev_index: ?usize = null;
var debug_str_index: ?usize = null;
var dwarf: Dwarf = .{};
defer dwarf.deinit(gpa);
for (self.sections.items(.header), 0..) |sect, index| {
const n_sect: u8 = @intCast(index);
if (sect.attrs() & macho.S_ATTR_DEBUG == 0) continue;
if (mem.eql(u8, sect.sectName(), "__debug_info")) debug_info_index = index;
if (mem.eql(u8, sect.sectName(), "__debug_abbrev")) debug_abbrev_index = index;
if (mem.eql(u8, sect.sectName(), "__debug_str")) debug_str_index = index;
if (mem.eql(u8, sect.sectName(), "__debug_info")) {
dwarf.debug_info = try self.readSectionData(gpa, file, n_sect);
}
if (mem.eql(u8, sect.sectName(), "__debug_abbrev")) {
dwarf.debug_abbrev = try self.readSectionData(gpa, file, n_sect);
}
if (mem.eql(u8, sect.sectName(), "__debug_str")) {
dwarf.debug_str = try self.readSectionData(gpa, file, n_sect);
}
// __debug_str_offs[ets] section is a new addition in DWARFv5 and is generally
// required in order to correctly parse strings.
if (mem.eql(u8, sect.sectName(), "__debug_str_offs")) {
dwarf.debug_str_offsets = try self.readSectionData(gpa, file, n_sect);
}
}
if (debug_info_index == null or debug_abbrev_index == null) return;
if (dwarf.debug_info.len == 0) return;
const slice = self.sections.slice();
const file = macho_file.getFileHandle(self.file_handle);
const debug_info = blk: {
const sect = slice.items(.header)[debug_info_index.?];
const size = math.cast(usize, sect.size) orelse return error.Overflow;
const data = try gpa.alloc(u8, size);
const amt = try file.preadAll(data, sect.offset + self.offset);
if (amt != data.len) return error.InputOutput;
break :blk data;
};
defer gpa.free(debug_info);
const debug_abbrev = blk: {
const sect = slice.items(.header)[debug_abbrev_index.?];
const size = math.cast(usize, sect.size) orelse return error.Overflow;
const data = try gpa.alloc(u8, size);
const amt = try file.preadAll(data, sect.offset + self.offset);
if (amt != data.len) return error.InputOutput;
break :blk data;
};
defer gpa.free(debug_abbrev);
const debug_str = if (debug_str_index) |sid| blk: {
const sect = slice.items(.header)[sid];
const size = math.cast(usize, sect.size) orelse return error.Overflow;
const data = try gpa.alloc(u8, size);
const amt = try file.preadAll(data, sect.offset + self.offset);
if (amt != data.len) return error.InputOutput;
break :blk data;
} else &[0]u8{};
defer gpa.free(debug_str);
self.compile_unit = self.findCompileUnit(.{
.gpa = gpa,
.debug_info = debug_info,
.debug_abbrev = debug_abbrev,
.debug_str = debug_str,
}) catch null; // TODO figure out what errors are fatal, and when we silently fail
// TODO return error once we fix emitting DWARF in self-hosted backend.
// https://github.com/ziglang/zig/issues/21719
self.compile_unit = self.findCompileUnit(gpa, dwarf) catch null;
}
fn findCompileUnit(self: *Object, args: struct {
gpa: Allocator,
debug_info: []const u8,
debug_abbrev: []const u8,
debug_str: []const u8,
}) !CompileUnit {
var cu_wip: struct {
comp_dir: ?[:0]const u8 = null,
tu_name: ?[:0]const u8 = null,
} = .{};
const gpa = args.gpa;
var info_reader = dwarf.InfoReader{ .bytes = args.debug_info, .strtab = args.debug_str };
var abbrev_reader = dwarf.AbbrevReader{ .bytes = args.debug_abbrev };
fn findCompileUnit(self: *Object, gpa: Allocator, ctx: Dwarf) !CompileUnit {
var info_reader = Dwarf.InfoReader{ .ctx = ctx };
var abbrev_reader = Dwarf.AbbrevReader{ .ctx = ctx };
const cuh = try info_reader.readCompileUnitHeader();
try abbrev_reader.seekTo(cuh.debug_abbrev_offset);
const cu_decl = (try abbrev_reader.readDecl()) orelse return error.Eof;
if (cu_decl.tag != dwarf.TAG.compile_unit) return error.UnexpectedTag;
const cu_decl = (try abbrev_reader.readDecl()) orelse return error.UnexpectedEndOfFile;
if (cu_decl.tag != Dwarf.TAG.compile_unit) return error.UnexpectedTag;
try info_reader.seekToDie(cu_decl.code, cuh, &abbrev_reader);
while (try abbrev_reader.readAttr()) |attr| switch (attr.at) {
dwarf.AT.name => {
cu_wip.tu_name = try info_reader.readString(attr.form, cuh);
},
dwarf.AT.comp_dir => {
cu_wip.comp_dir = try info_reader.readString(attr.form, cuh);
},
else => switch (attr.form) {
dwarf.FORM.sec_offset,
dwarf.FORM.ref_addr,
=> {
_ = try info_reader.readOffset(cuh.format);
},
dwarf.FORM.addr => {
_ = try info_reader.readNBytes(cuh.address_size);
},
dwarf.FORM.block1,
dwarf.FORM.block2,
dwarf.FORM.block4,
dwarf.FORM.block,
=> {
_ = try info_reader.readBlock(attr.form);
},
dwarf.FORM.exprloc => {
_ = try info_reader.readExprLoc();
},
dwarf.FORM.flag_present => {},
dwarf.FORM.data1,
dwarf.FORM.ref1,
dwarf.FORM.flag,
dwarf.FORM.data2,
dwarf.FORM.ref2,
dwarf.FORM.data4,
dwarf.FORM.ref4,
dwarf.FORM.data8,
dwarf.FORM.ref8,
dwarf.FORM.ref_sig8,
dwarf.FORM.udata,
dwarf.FORM.ref_udata,
dwarf.FORM.sdata,
=> {
_ = try info_reader.readConstant(attr.form);
},
dwarf.FORM.strp,
dwarf.FORM.string,
=> {
_ = try info_reader.readString(attr.form, cuh);
},
else => {
// TODO actual errors?
log.err("unhandled DW_FORM_* value with identifier {x}", .{attr.form});
return error.UnhandledForm;
},
},
const Pos = struct {
pos: usize,
form: Dwarf.Form,
};
if (cu_wip.comp_dir == null) return error.MissingCompDir;
if (cu_wip.tu_name == null) return error.MissingTuName;
return .{
.comp_dir = try self.addString(gpa, cu_wip.comp_dir.?),
.tu_name = try self.addString(gpa, cu_wip.tu_name.?),
var saved: struct {
tu_name: ?Pos,
comp_dir: ?Pos,
str_offsets_base: ?Pos,
} = .{
.tu_name = null,
.comp_dir = null,
.str_offsets_base = null,
};
while (try abbrev_reader.readAttr()) |attr| {
const pos: Pos = .{ .pos = info_reader.pos, .form = attr.form };
switch (attr.at) {
Dwarf.AT.name => saved.tu_name = pos,
Dwarf.AT.comp_dir => saved.comp_dir = pos,
Dwarf.AT.str_offsets_base => saved.str_offsets_base = pos,
else => {},
}
try info_reader.skip(attr.form, cuh);
}
if (saved.comp_dir == null) return error.MissingCompileDir;
if (saved.tu_name == null) return error.MissingTuName;
const str_offsets_base: ?u64 = if (saved.str_offsets_base) |str_offsets_base| str_offsets_base: {
try info_reader.seekTo(str_offsets_base.pos);
break :str_offsets_base try info_reader.readOffset(cuh.format);
} else null;
var cu: CompileUnit = .{ .comp_dir = .{}, .tu_name = .{} };
for (&[_]struct { Pos, *MachO.String }{
.{ saved.comp_dir.?, &cu.comp_dir },
.{ saved.tu_name.?, &cu.tu_name },
}) |tuple| {
const pos, const str_offset_ptr = tuple;
try info_reader.seekTo(pos.pos);
str_offset_ptr.* = switch (pos.form) {
Dwarf.FORM.strp,
Dwarf.FORM.string,
=> try self.addString(gpa, try info_reader.readString(pos.form, cuh)),
Dwarf.FORM.strx,
Dwarf.FORM.strx1,
Dwarf.FORM.strx2,
Dwarf.FORM.strx3,
Dwarf.FORM.strx4,
=> blk: {
const base = str_offsets_base orelse return error.MissingStrOffsetsBase;
break :blk try self.addString(gpa, try info_reader.readStringIndexed(pos.form, cuh, base));
},
else => return error.InvalidForm,
};
}
return cu;
}
pub fn resolveSymbols(self: *Object, macho_file: *MachO) !void {
@ -2561,6 +2502,17 @@ pub fn getUnwindRecord(self: *Object, index: UnwindInfo.Record.Index) *UnwindInf
return &self.unwind_records.items[index];
}
/// Caller owns the memory.
pub fn readSectionData(self: Object, allocator: Allocator, file: File.Handle, n_sect: u8) ![]u8 {
const header = self.sections.items(.header)[n_sect];
const size = math.cast(usize, header.size) orelse return error.Overflow;
const data = try allocator.alloc(u8, size);
const amt = try file.preadAll(data, header.offset + self.offset);
errdefer allocator.free(data);
if (amt != data.len) return error.InputOutput;
return data;
}
pub fn format(
self: *Object,
comptime unused_fmt_string: []const u8,
@ -2848,6 +2800,7 @@ const CompactUnwindCtx = struct {
const x86_64 = struct {
fn parseRelocs(
self: *Object,
n_sect: u8,
sect: macho.section_64,
out: *std.ArrayListUnmanaged(Relocation),
handle: File.Handle,
@ -2857,19 +2810,12 @@ const x86_64 = struct {
const relocs_buffer = try gpa.alloc(u8, sect.nreloc * @sizeOf(macho.relocation_info));
defer gpa.free(relocs_buffer);
{
const amt = try handle.preadAll(relocs_buffer, sect.reloff + self.offset);
if (amt != relocs_buffer.len) return error.InputOutput;
}
const amt = try handle.preadAll(relocs_buffer, sect.reloff + self.offset);
if (amt != relocs_buffer.len) return error.InputOutput;
const relocs = @as([*]align(1) const macho.relocation_info, @ptrCast(relocs_buffer.ptr))[0..sect.nreloc];
const sect_size = math.cast(usize, sect.size) orelse return error.Overflow;
const code = try gpa.alloc(u8, sect_size);
const code = try self.readSectionData(gpa, handle, n_sect);
defer gpa.free(code);
{
const amt = try handle.preadAll(code, sect.offset + self.offset);
if (amt != code.len) return error.InputOutput;
}
try out.ensureTotalCapacityPrecise(gpa, relocs.len);
@ -3021,6 +2967,7 @@ const x86_64 = struct {
const aarch64 = struct {
fn parseRelocs(
self: *Object,
n_sect: u8,
sect: macho.section_64,
out: *std.ArrayListUnmanaged(Relocation),
handle: File.Handle,
@ -3030,19 +2977,12 @@ const aarch64 = struct {
const relocs_buffer = try gpa.alloc(u8, sect.nreloc * @sizeOf(macho.relocation_info));
defer gpa.free(relocs_buffer);
{
const amt = try handle.preadAll(relocs_buffer, sect.reloff + self.offset);
if (amt != relocs_buffer.len) return error.InputOutput;
}
const amt = try handle.preadAll(relocs_buffer, sect.reloff + self.offset);
if (amt != relocs_buffer.len) return error.InputOutput;
const relocs = @as([*]align(1) const macho.relocation_info, @ptrCast(relocs_buffer.ptr))[0..sect.nreloc];
const sect_size = math.cast(usize, sect.size) orelse return error.Overflow;
const code = try gpa.alloc(u8, sect_size);
const code = try self.readSectionData(gpa, handle, n_sect);
defer gpa.free(code);
{
const amt = try handle.preadAll(code, sect.offset + self.offset);
if (amt != code.len) return error.InputOutput;
}
try out.ensureTotalCapacityPrecise(gpa, relocs.len);
@ -3219,7 +3159,6 @@ const aarch64 = struct {
};
const assert = std.debug.assert;
const dwarf = @import("dwarf.zig");
const eh_frame = @import("eh_frame.zig");
const log = std.log.scoped(.link);
const macho = std.macho;
@ -3233,6 +3172,7 @@ const Allocator = mem.Allocator;
const Archive = @import("Archive.zig");
const Atom = @import("Atom.zig");
const Cie = eh_frame.Cie;
const Dwarf = @import("Dwarf.zig");
const Fde = eh_frame.Fde;
const File = @import("file.zig").File;
const LoadCommandIterator = macho.LoadCommandIterator;

View File

@ -1,286 +0,0 @@
pub const InfoReader = struct {
bytes: []const u8,
strtab: []const u8,
pos: usize = 0,
pub fn readCompileUnitHeader(p: *InfoReader) !CompileUnitHeader {
var length: u64 = try p.readInt(u32);
const is_64bit = length == 0xffffffff;
if (is_64bit) {
length = try p.readInt(u64);
}
const dw_fmt: DwarfFormat = if (is_64bit) .dwarf64 else .dwarf32;
return .{
.format = dw_fmt,
.length = length,
.version = try p.readInt(u16),
.debug_abbrev_offset = try p.readOffset(dw_fmt),
.address_size = try p.readByte(),
};
}
pub fn seekToDie(p: *InfoReader, code: Code, cuh: CompileUnitHeader, abbrev_reader: *AbbrevReader) !void {
const cuh_length = math.cast(usize, cuh.length) orelse return error.Overflow;
const end_pos = p.pos + switch (cuh.format) {
.dwarf32 => @as(usize, 4),
.dwarf64 => 12,
} + cuh_length;
while (p.pos < end_pos) {
const di_code = try p.readUleb128(u64);
if (di_code == 0) return error.Eof;
if (di_code == code) return;
while (try abbrev_reader.readAttr()) |attr| switch (attr.at) {
dwarf.FORM.sec_offset,
dwarf.FORM.ref_addr,
=> {
_ = try p.readOffset(cuh.format);
},
dwarf.FORM.addr => {
_ = try p.readNBytes(cuh.address_size);
},
dwarf.FORM.block1,
dwarf.FORM.block2,
dwarf.FORM.block4,
dwarf.FORM.block,
=> {
_ = try p.readBlock(attr.form);
},
dwarf.FORM.exprloc => {
_ = try p.readExprLoc();
},
dwarf.FORM.flag_present => {},
dwarf.FORM.data1,
dwarf.FORM.ref1,
dwarf.FORM.flag,
dwarf.FORM.data2,
dwarf.FORM.ref2,
dwarf.FORM.data4,
dwarf.FORM.ref4,
dwarf.FORM.data8,
dwarf.FORM.ref8,
dwarf.FORM.ref_sig8,
dwarf.FORM.udata,
dwarf.FORM.ref_udata,
dwarf.FORM.sdata,
=> {
_ = try p.readConstant(attr.form);
},
dwarf.FORM.strp,
dwarf.FORM.string,
=> {
_ = try p.readString(attr.form, cuh);
},
else => {
// TODO better errors
log.err("unhandled DW_FORM_* value with identifier {x}", .{attr.form});
return error.UnhandledDwFormValue;
},
};
}
}
pub fn readBlock(p: *InfoReader, form: Form) ![]const u8 {
const len: u64 = switch (form) {
dwarf.FORM.block1 => try p.readByte(),
dwarf.FORM.block2 => try p.readInt(u16),
dwarf.FORM.block4 => try p.readInt(u32),
dwarf.FORM.block => try p.readUleb128(u64),
else => unreachable,
};
return p.readNBytes(len);
}
pub fn readExprLoc(p: *InfoReader) ![]const u8 {
const len: u64 = try p.readUleb128(u64);
return p.readNBytes(len);
}
pub fn readConstant(p: *InfoReader, form: Form) !u64 {
return switch (form) {
dwarf.FORM.data1, dwarf.FORM.ref1, dwarf.FORM.flag => try p.readByte(),
dwarf.FORM.data2, dwarf.FORM.ref2 => try p.readInt(u16),
dwarf.FORM.data4, dwarf.FORM.ref4 => try p.readInt(u32),
dwarf.FORM.data8, dwarf.FORM.ref8, dwarf.FORM.ref_sig8 => try p.readInt(u64),
dwarf.FORM.udata, dwarf.FORM.ref_udata => try p.readUleb128(u64),
dwarf.FORM.sdata => @bitCast(try p.readIleb128(i64)),
else => return error.UnhandledConstantForm,
};
}
pub fn readString(p: *InfoReader, form: Form, cuh: CompileUnitHeader) ![:0]const u8 {
switch (form) {
dwarf.FORM.strp => {
const off = try p.readOffset(cuh.format);
const off_u = math.cast(usize, off) orelse return error.Overflow;
return mem.sliceTo(@as([*:0]const u8, @ptrCast(p.strtab.ptr + off_u)), 0);
},
dwarf.FORM.string => {
const start = p.pos;
while (p.pos < p.bytes.len) : (p.pos += 1) {
if (p.bytes[p.pos] == 0) break;
}
if (p.bytes[p.pos] != 0) return error.Eof;
return p.bytes[start..p.pos :0];
},
else => unreachable,
}
}
pub fn readByte(p: *InfoReader) !u8 {
if (p.pos + 1 > p.bytes.len) return error.Eof;
defer p.pos += 1;
return p.bytes[p.pos];
}
pub fn readNBytes(p: *InfoReader, num: u64) ![]const u8 {
const num_usize = math.cast(usize, num) orelse return error.Overflow;
if (p.pos + num_usize > p.bytes.len) return error.Eof;
defer p.pos += num_usize;
return p.bytes[p.pos..][0..num_usize];
}
pub fn readInt(p: *InfoReader, comptime Int: type) !Int {
if (p.pos + @sizeOf(Int) > p.bytes.len) return error.Eof;
defer p.pos += @sizeOf(Int);
return mem.readInt(Int, p.bytes[p.pos..][0..@sizeOf(Int)], .little);
}
pub fn readOffset(p: *InfoReader, dw_fmt: DwarfFormat) !u64 {
return switch (dw_fmt) {
.dwarf32 => try p.readInt(u32),
.dwarf64 => try p.readInt(u64),
};
}
pub fn readUleb128(p: *InfoReader, comptime Type: type) !Type {
var stream = std.io.fixedBufferStream(p.bytes[p.pos..]);
var creader = std.io.countingReader(stream.reader());
const value: Type = try leb.readUleb128(Type, creader.reader());
p.pos += math.cast(usize, creader.bytes_read) orelse return error.Overflow;
return value;
}
pub fn readIleb128(p: *InfoReader, comptime Type: type) !Type {
var stream = std.io.fixedBufferStream(p.bytes[p.pos..]);
var creader = std.io.countingReader(stream.reader());
const value: Type = try leb.readIleb128(Type, creader.reader());
p.pos += math.cast(usize, creader.bytes_read) orelse return error.Overflow;
return value;
}
pub fn seekTo(p: *InfoReader, off: u64) !void {
p.pos = math.cast(usize, off) orelse return error.Overflow;
}
};
pub const AbbrevReader = struct {
bytes: []const u8,
pos: usize = 0,
pub fn hasMore(p: AbbrevReader) bool {
return p.pos < p.bytes.len;
}
pub fn readDecl(p: *AbbrevReader) !?AbbrevDecl {
const pos = p.pos;
const code = try p.readUleb128(Code);
if (code == 0) return null;
const tag = try p.readUleb128(Tag);
const has_children = (try p.readByte()) > 0;
return .{
.code = code,
.pos = pos,
.len = p.pos - pos,
.tag = tag,
.has_children = has_children,
};
}
pub fn readAttr(p: *AbbrevReader) !?AbbrevAttr {
const pos = p.pos;
const at = try p.readUleb128(At);
const form = try p.readUleb128(Form);
return if (at == 0 and form == 0) null else .{
.at = at,
.form = form,
.pos = pos,
.len = p.pos - pos,
};
}
pub fn readByte(p: *AbbrevReader) !u8 {
if (p.pos + 1 > p.bytes.len) return error.Eof;
defer p.pos += 1;
return p.bytes[p.pos];
}
pub fn readUleb128(p: *AbbrevReader, comptime Type: type) !Type {
var stream = std.io.fixedBufferStream(p.bytes[p.pos..]);
var creader = std.io.countingReader(stream.reader());
const value: Type = try leb.readUleb128(Type, creader.reader());
p.pos += math.cast(usize, creader.bytes_read) orelse return error.Overflow;
return value;
}
pub fn seekTo(p: *AbbrevReader, off: u64) !void {
p.pos = math.cast(usize, off) orelse return error.Overflow;
}
};
const AbbrevDecl = struct {
code: Code,
pos: usize,
len: usize,
tag: Tag,
has_children: bool,
};
const AbbrevAttr = struct {
at: At,
form: Form,
pos: usize,
len: usize,
};
const CompileUnitHeader = struct {
format: DwarfFormat,
length: u64,
version: u16,
debug_abbrev_offset: u64,
address_size: u8,
};
const Die = struct {
pos: usize,
len: usize,
};
const DwarfFormat = enum {
dwarf32,
dwarf64,
};
const dwarf = std.dwarf;
const leb = std.leb;
const log = std.log.scoped(.link);
const math = std.math;
const mem = std.mem;
const std = @import("std");
const At = u64;
const Code = u64;
const Form = u64;
const Tag = u64;
pub const AT = dwarf.AT;
pub const FORM = dwarf.FORM;
pub const TAG = dwarf.TAG;