diff --git a/CMakeLists.txt b/CMakeLists.txt index 200a1cd2b9..0e80135251 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -611,6 +611,7 @@ set(ZIG_STAGE2_SOURCES src/link/MachO/Atom.zig src/link/MachO/CodeSignature.zig src/link/MachO/DebugSymbols.zig + src/link/MachO/Dwarf.zig src/link/MachO/Dylib.zig src/link/MachO/InternalObject.zig src/link/MachO/Object.zig @@ -622,7 +623,6 @@ set(ZIG_STAGE2_SOURCES src/link/MachO/dyld_info/Rebase.zig src/link/MachO/dyld_info/Trie.zig src/link/MachO/dyld_info/bind.zig - src/link/MachO/dwarf.zig src/link/MachO/eh_frame.zig src/link/MachO/fat.zig src/link/MachO/file.zig diff --git a/src/link/MachO/Dwarf.zig b/src/link/MachO/Dwarf.zig new file mode 100644 index 0000000000..fdc3f33bbc --- /dev/null +++ b/src/link/MachO/Dwarf.zig @@ -0,0 +1,409 @@ +debug_info: []u8 = &[0]u8{}, +debug_abbrev: []u8 = &[0]u8{}, +debug_str: []u8 = &[0]u8{}, +debug_str_offsets: []u8 = &[0]u8{}, + +pub fn deinit(dwarf: *Dwarf, allocator: Allocator) void { + allocator.free(dwarf.debug_info); + allocator.free(dwarf.debug_abbrev); + allocator.free(dwarf.debug_str); + allocator.free(dwarf.debug_str_offsets); +} + +/// Pulls an offset into __debug_str section from a __debug_str_offs section. +/// This is new in DWARFv5 and requires the producer to specify DW_FORM_strx* (`index` arg) +/// but also DW_AT_str_offsets_base with DW_FORM_sec_offset (`base` arg) in the opening header +/// of a "referencing entity" such as DW_TAG_compile_unit. +fn getOffset(debug_str_offsets: []const u8, base: u64, index: u64, dw_fmt: DwarfFormat) error{Overflow}!u64 { + const base_as_usize = math.cast(usize, base) orelse return error.Overflow; + const index_as_usize = math.cast(usize, index) orelse return error.Overflow; + return switch (dw_fmt) { + .dwarf32 => @as( + *align(1) const u32, + @ptrCast(debug_str_offsets.ptr + base_as_usize + index_as_usize * @sizeOf(u32)), + ).*, + .dwarf64 => @as( + *align(1) const u64, + @ptrCast(debug_str_offsets.ptr + base_as_usize + index_as_usize * @sizeOf(u64)), + ).*, + }; +} + +pub const InfoReader = struct { + ctx: Dwarf, + pos: usize = 0, + + fn bytes(p: InfoReader) []const u8 { + return p.ctx.debug_info; + } + + pub fn readCompileUnitHeader(p: *InfoReader) !CompileUnitHeader { + var length: u64 = try p.readInt(u32); + const is_64bit = length == 0xffffffff; + if (is_64bit) { + length = try p.readInt(u64); + } + const dw_fmt: DwarfFormat = if (is_64bit) .dwarf64 else .dwarf32; + const version = try p.readInt(Version); + const rest: struct { + debug_abbrev_offset: u64, + address_size: u8, + unit_type: u8, + } = switch (version) { + 4 => .{ + .debug_abbrev_offset = try p.readOffset(dw_fmt), + .address_size = try p.readByte(), + .unit_type = 0, + }, + 5 => .{ + // According to the spec, version 5 introduced .unit_type field in the header, and + // it reordered .debug_abbrev_offset with .address_size fields. + .unit_type = try p.readByte(), + .address_size = try p.readByte(), + .debug_abbrev_offset = try p.readOffset(dw_fmt), + }, + else => return error.InvalidVersion, + }; + return .{ + .format = dw_fmt, + .length = length, + .version = version, + .debug_abbrev_offset = rest.debug_abbrev_offset, + .address_size = rest.address_size, + .unit_type = rest.unit_type, + }; + } + + pub fn seekToDie(p: *InfoReader, code: Code, cuh: CompileUnitHeader, abbrev_reader: *AbbrevReader) !void { + const cuh_length = math.cast(usize, cuh.length) orelse return error.Overflow; + const end_pos = p.pos + switch (cuh.format) { + .dwarf32 => @as(usize, 4), + .dwarf64 => 12, + } + cuh_length; + while (p.pos < end_pos) { + const di_code = try p.readUleb128(u64); + if (di_code == 0) return error.UnexpectedEndOfFile; + if (di_code == code) return; + + while (try abbrev_reader.readAttr()) |attr| { + try p.skip(attr.form, cuh); + } + } + return error.UnexpectedEndOfFile; + } + + /// When skipping attributes, we don't really need to be able to handle them all + /// since we only ever care about the DW_TAG_compile_unit. + pub fn skip(p: *InfoReader, form: Form, cuh: CompileUnitHeader) !void { + switch (form) { + dw.FORM.sec_offset, + dw.FORM.ref_addr, + => { + _ = try p.readOffset(cuh.format); + }, + + dw.FORM.addr => { + _ = try p.readNBytes(cuh.address_size); + }, + + dw.FORM.block1, + dw.FORM.block2, + dw.FORM.block4, + dw.FORM.block, + => { + _ = try p.readBlock(form); + }, + + dw.FORM.exprloc => { + _ = try p.readExprLoc(); + }, + + dw.FORM.flag_present => {}, + + dw.FORM.data1, + dw.FORM.ref1, + dw.FORM.flag, + dw.FORM.data2, + dw.FORM.ref2, + dw.FORM.data4, + dw.FORM.ref4, + dw.FORM.data8, + dw.FORM.ref8, + dw.FORM.ref_sig8, + dw.FORM.udata, + dw.FORM.ref_udata, + dw.FORM.sdata, + => { + _ = try p.readConstant(form); + }, + + dw.FORM.strp, + dw.FORM.string, + => { + _ = try p.readString(form, cuh); + }, + + else => if (cuh.version >= 5) switch (form) { + dw.FORM.strx, + dw.FORM.strx1, + dw.FORM.strx2, + dw.FORM.strx3, + dw.FORM.strx4, + => { + // We are just iterating over the __debug_info data, so we don't care about an actual + // string, therefore we set the `base = 0`. + _ = try p.readStringIndexed(form, cuh, 0); + }, + + dw.FORM.addrx, + dw.FORM.addrx1, + dw.FORM.addrx2, + dw.FORM.addrx3, + dw.FORM.addrx4, + => { + _ = try p.readIndex(form); + }, + + else => return error.UnhandledForm, + } else return error.UnhandledForm, + } + } + + pub fn readBlock(p: *InfoReader, form: Form) ![]const u8 { + const len: u64 = switch (form) { + dw.FORM.block1 => try p.readByte(), + dw.FORM.block2 => try p.readInt(u16), + dw.FORM.block4 => try p.readInt(u32), + dw.FORM.block => try p.readUleb128(u64), + else => unreachable, + }; + return p.readNBytes(len); + } + + pub fn readExprLoc(p: *InfoReader) ![]const u8 { + const len: u64 = try p.readUleb128(u64); + return p.readNBytes(len); + } + + pub fn readConstant(p: *InfoReader, form: Form) !u64 { + return switch (form) { + dw.FORM.data1, dw.FORM.ref1, dw.FORM.flag => try p.readByte(), + dw.FORM.data2, dw.FORM.ref2 => try p.readInt(u16), + dw.FORM.data4, dw.FORM.ref4 => try p.readInt(u32), + dw.FORM.data8, dw.FORM.ref8, dw.FORM.ref_sig8 => try p.readInt(u64), + dw.FORM.udata, dw.FORM.ref_udata => try p.readUleb128(u64), + dw.FORM.sdata => @bitCast(try p.readIleb128(i64)), + else => return error.UnhandledConstantForm, + }; + } + + pub fn readIndex(p: *InfoReader, form: Form) !u64 { + return switch (form) { + dw.FORM.strx1, dw.FORM.addrx1 => try p.readByte(), + dw.FORM.strx2, dw.FORM.addrx2 => try p.readInt(u16), + dw.FORM.strx3, dw.FORM.addrx3 => error.UnhandledForm, + dw.FORM.strx4, dw.FORM.addrx4 => try p.readInt(u32), + dw.FORM.strx, dw.FORM.addrx => try p.readUleb128(u64), + else => return error.UnhandledIndexForm, + }; + } + + pub fn readString(p: *InfoReader, form: Form, cuh: CompileUnitHeader) ![:0]const u8 { + switch (form) { + dw.FORM.strp => { + const off = try p.readOffset(cuh.format); + const off_u = math.cast(usize, off) orelse return error.Overflow; + return mem.sliceTo(@as([*:0]const u8, @ptrCast(p.ctx.debug_str.ptr + off_u)), 0); + }, + dw.FORM.string => { + const start = p.pos; + while (p.pos < p.bytes().len) : (p.pos += 1) { + if (p.bytes()[p.pos] == 0) break; + } + if (p.bytes()[p.pos] != 0) return error.UnexpectedEndOfFile; + return p.bytes()[start..p.pos :0]; + }, + else => unreachable, + } + } + + pub fn readStringIndexed(p: *InfoReader, form: Form, cuh: CompileUnitHeader, base: u64) ![:0]const u8 { + switch (form) { + dw.FORM.strx, + dw.FORM.strx1, + dw.FORM.strx2, + dw.FORM.strx3, + dw.FORM.strx4, + => { + const index = try p.readIndex(form); + const off = math.cast( + usize, + try getOffset(p.ctx.debug_str_offsets, base, index, cuh.format), + ) orelse return error.Overflow; + return mem.sliceTo(@as([*:0]const u8, @ptrCast(p.ctx.debug_str.ptr + off)), 0); + }, + else => unreachable, + } + } + + pub fn readByte(p: *InfoReader) !u8 { + if (p.pos + 1 > p.bytes().len) return error.UnexpectedEndOfFile; + defer p.pos += 1; + return p.bytes()[p.pos]; + } + + pub fn readNBytes(p: *InfoReader, num: u64) ![]const u8 { + const num_usize = math.cast(usize, num) orelse return error.Overflow; + if (p.pos + num_usize > p.bytes().len) return error.UnexpectedEndOfFile; + defer p.pos += num_usize; + return p.bytes()[p.pos..][0..num_usize]; + } + + pub fn readInt(p: *InfoReader, comptime Int: type) !Int { + if (p.pos + @sizeOf(Int) > p.bytes().len) return error.UnexpectedEndOfFile; + defer p.pos += @sizeOf(Int); + return mem.readInt(Int, p.bytes()[p.pos..][0..@sizeOf(Int)], .little); + } + + pub fn readOffset(p: *InfoReader, dw_fmt: DwarfFormat) !u64 { + return switch (dw_fmt) { + .dwarf32 => try p.readInt(u32), + .dwarf64 => try p.readInt(u64), + }; + } + + pub fn readUleb128(p: *InfoReader, comptime Type: type) !Type { + var stream = std.io.fixedBufferStream(p.bytes()[p.pos..]); + var creader = std.io.countingReader(stream.reader()); + const value: Type = try leb.readUleb128(Type, creader.reader()); + p.pos += math.cast(usize, creader.bytes_read) orelse return error.Overflow; + return value; + } + + pub fn readIleb128(p: *InfoReader, comptime Type: type) !Type { + var stream = std.io.fixedBufferStream(p.bytes()[p.pos..]); + var creader = std.io.countingReader(stream.reader()); + const value: Type = try leb.readIleb128(Type, creader.reader()); + p.pos += math.cast(usize, creader.bytes_read) orelse return error.Overflow; + return value; + } + + pub fn seekTo(p: *InfoReader, off: u64) !void { + p.pos = math.cast(usize, off) orelse return error.Overflow; + } +}; + +pub const AbbrevReader = struct { + ctx: Dwarf, + pos: usize = 0, + + fn bytes(p: AbbrevReader) []const u8 { + return p.ctx.debug_abbrev; + } + + pub fn hasMore(p: AbbrevReader) bool { + return p.pos < p.bytes().len; + } + + pub fn readDecl(p: *AbbrevReader) !?AbbrevDecl { + const pos = p.pos; + const code = try p.readUleb128(Code); + if (code == 0) return null; + + const tag = try p.readUleb128(Tag); + const has_children = (try p.readByte()) > 0; + return .{ + .code = code, + .pos = pos, + .len = p.pos - pos, + .tag = tag, + .has_children = has_children, + }; + } + + pub fn readAttr(p: *AbbrevReader) !?AbbrevAttr { + const pos = p.pos; + const at = try p.readUleb128(At); + const form = try p.readUleb128(Form); + return if (at == 0 and form == 0) null else .{ + .at = at, + .form = form, + .pos = pos, + .len = p.pos - pos, + }; + } + + pub fn readByte(p: *AbbrevReader) !u8 { + if (p.pos + 1 > p.bytes().len) return error.Eof; + defer p.pos += 1; + return p.bytes()[p.pos]; + } + + pub fn readUleb128(p: *AbbrevReader, comptime Type: type) !Type { + var stream = std.io.fixedBufferStream(p.bytes()[p.pos..]); + var creader = std.io.countingReader(stream.reader()); + const value: Type = try leb.readUleb128(Type, creader.reader()); + p.pos += math.cast(usize, creader.bytes_read) orelse return error.Overflow; + return value; + } + + pub fn seekTo(p: *AbbrevReader, off: u64) !void { + p.pos = math.cast(usize, off) orelse return error.Overflow; + } +}; + +const AbbrevDecl = struct { + code: Code, + pos: usize, + len: usize, + tag: Tag, + has_children: bool, +}; + +const AbbrevAttr = struct { + at: At, + form: Form, + pos: usize, + len: usize, +}; + +const CompileUnitHeader = struct { + format: DwarfFormat, + length: u64, + version: Version, + debug_abbrev_offset: u64, + address_size: u8, + unit_type: u8, +}; + +const Die = struct { + pos: usize, + len: usize, +}; + +const DwarfFormat = enum { + dwarf32, + dwarf64, +}; + +const dw = std.dwarf; +const leb = std.leb; +const log = std.log.scoped(.link); +const math = std.math; +const mem = std.mem; +const std = @import("std"); +const Allocator = mem.Allocator; +const Dwarf = @This(); +const File = @import("file.zig").File; +const MachO = @import("../MachO.zig"); +const Object = @import("Object.zig"); + +pub const At = u64; +pub const Code = u64; +pub const Form = u64; +pub const Tag = u64; +pub const Version = u16; + +pub const AT = dw.AT; +pub const FORM = dw.FORM; +pub const TAG = dw.TAG; diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index 3efc0a1e5a..349ee99ca4 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -443,11 +443,8 @@ fn initCstringLiterals(self: *Object, allocator: Allocator, file: File.Handle, m for (slice.items(.header), 0..) |sect, n_sect| { if (!isCstringLiteral(sect)) continue; - const sect_size = math.cast(usize, sect.size) orelse return error.Overflow; - const data = try allocator.alloc(u8, sect_size); + const data = try self.readSectionData(allocator, file, @intCast(n_sect)); defer allocator.free(data); - const amt = try file.preadAll(data, sect.offset + self.offset); - if (amt != data.len) return error.InputOutput; var count: u32 = 0; var start: u32 = 0; @@ -646,13 +643,10 @@ pub fn resolveLiterals(self: *Object, lp: *MachO.LiteralPool, macho_file: *MachO } const slice = self.sections.slice(); - for (slice.items(.header), slice.items(.subsections)) |header, subs| { + for (slice.items(.header), slice.items(.subsections), 0..) |header, subs, n_sect| { if (isCstringLiteral(header) or isFixedSizeLiteral(header)) { - const sect_size = math.cast(usize, header.size) orelse return error.Overflow; - const data = try gpa.alloc(u8, sect_size); + const data = try self.readSectionData(gpa, file, @intCast(n_sect)); defer gpa.free(data); - const amt = try file.preadAll(data, header.offset + self.offset); - if (amt != data.len) return error.InputOutput; for (subs.items) |sub| { const atom = self.getAtom(sub.atom).?; @@ -686,12 +680,7 @@ pub fn resolveLiterals(self: *Object, lp: *MachO.LiteralPool, macho_file: *MachO buffer.resize(target_size) catch unreachable; const gop = try sections_data.getOrPut(target.n_sect); if (!gop.found_existing) { - const target_sect = slice.items(.header)[target.n_sect]; - const target_sect_size = math.cast(usize, target_sect.size) orelse return error.Overflow; - const data = try gpa.alloc(u8, target_sect_size); - const amt = try file.preadAll(data, target_sect.offset + self.offset); - if (amt != data.len) return error.InputOutput; - gop.value_ptr.* = data; + gop.value_ptr.* = try self.readSectionData(gpa, file, @intCast(target.n_sect)); } const data = gop.value_ptr.*; const target_off = math.cast(usize, target.off) orelse return error.Overflow; @@ -1000,7 +989,7 @@ fn initRelocs(self: *Object, file: File.Handle, cpu_arch: std.Target.Cpu.Arch, m defer tracy.end(); const slice = self.sections.slice(); - for (slice.items(.header), slice.items(.relocs)) |sect, *out| { + for (slice.items(.header), slice.items(.relocs), 0..) |sect, *out, n_sect| { if (sect.nreloc == 0) continue; // We skip relocs for __DWARF since even in -r mode, the linker is expected to emit // debug symbol stabs in the relocatable. This made me curious why that is. For now, @@ -1009,8 +998,8 @@ fn initRelocs(self: *Object, file: File.Handle, cpu_arch: std.Target.Cpu.Arch, m !mem.eql(u8, sect.sectName(), "__compact_unwind")) continue; switch (cpu_arch) { - .x86_64 => try x86_64.parseRelocs(self, sect, out, file, macho_file), - .aarch64 => try aarch64.parseRelocs(self, sect, out, file, macho_file), + .x86_64 => try x86_64.parseRelocs(self, @intCast(n_sect), sect, out, file, macho_file), + .aarch64 => try aarch64.parseRelocs(self, @intCast(n_sect), sect, out, file, macho_file), else => unreachable, } @@ -1146,11 +1135,8 @@ fn initUnwindRecords(self: *Object, allocator: Allocator, sect_id: u8, file: Fil }; const header = self.sections.items(.header)[sect_id]; - const size = math.cast(usize, header.size) orelse return error.Overflow; - const data = try allocator.alloc(u8, size); + const data = try self.readSectionData(allocator, file, sect_id); defer allocator.free(data); - const amt = try file.preadAll(data, header.offset + self.offset); - if (amt != data.len) return error.InputOutput; const nrecs = @divExact(data.len, @sizeOf(macho.compact_unwind_entry)); const recs = @as([*]align(1) const macho.compact_unwind_entry, @ptrCast(data.ptr))[0..nrecs]; @@ -1359,151 +1345,106 @@ fn parseDebugInfo(self: *Object, macho_file: *MachO) !void { defer tracy.end(); const gpa = macho_file.base.comp.gpa; + const file = macho_file.getFileHandle(self.file_handle); - var debug_info_index: ?usize = null; - var debug_abbrev_index: ?usize = null; - var debug_str_index: ?usize = null; + var dwarf: Dwarf = .{}; + defer dwarf.deinit(gpa); for (self.sections.items(.header), 0..) |sect, index| { + const n_sect: u8 = @intCast(index); if (sect.attrs() & macho.S_ATTR_DEBUG == 0) continue; - if (mem.eql(u8, sect.sectName(), "__debug_info")) debug_info_index = index; - if (mem.eql(u8, sect.sectName(), "__debug_abbrev")) debug_abbrev_index = index; - if (mem.eql(u8, sect.sectName(), "__debug_str")) debug_str_index = index; + if (mem.eql(u8, sect.sectName(), "__debug_info")) { + dwarf.debug_info = try self.readSectionData(gpa, file, n_sect); + } + if (mem.eql(u8, sect.sectName(), "__debug_abbrev")) { + dwarf.debug_abbrev = try self.readSectionData(gpa, file, n_sect); + } + if (mem.eql(u8, sect.sectName(), "__debug_str")) { + dwarf.debug_str = try self.readSectionData(gpa, file, n_sect); + } + // __debug_str_offs[ets] section is a new addition in DWARFv5 and is generally + // required in order to correctly parse strings. + if (mem.eql(u8, sect.sectName(), "__debug_str_offs")) { + dwarf.debug_str_offsets = try self.readSectionData(gpa, file, n_sect); + } } - if (debug_info_index == null or debug_abbrev_index == null) return; + if (dwarf.debug_info.len == 0) return; - const slice = self.sections.slice(); - const file = macho_file.getFileHandle(self.file_handle); - const debug_info = blk: { - const sect = slice.items(.header)[debug_info_index.?]; - const size = math.cast(usize, sect.size) orelse return error.Overflow; - const data = try gpa.alloc(u8, size); - const amt = try file.preadAll(data, sect.offset + self.offset); - if (amt != data.len) return error.InputOutput; - break :blk data; - }; - defer gpa.free(debug_info); - const debug_abbrev = blk: { - const sect = slice.items(.header)[debug_abbrev_index.?]; - const size = math.cast(usize, sect.size) orelse return error.Overflow; - const data = try gpa.alloc(u8, size); - const amt = try file.preadAll(data, sect.offset + self.offset); - if (amt != data.len) return error.InputOutput; - break :blk data; - }; - defer gpa.free(debug_abbrev); - const debug_str = if (debug_str_index) |sid| blk: { - const sect = slice.items(.header)[sid]; - const size = math.cast(usize, sect.size) orelse return error.Overflow; - const data = try gpa.alloc(u8, size); - const amt = try file.preadAll(data, sect.offset + self.offset); - if (amt != data.len) return error.InputOutput; - break :blk data; - } else &[0]u8{}; - defer gpa.free(debug_str); - - self.compile_unit = self.findCompileUnit(.{ - .gpa = gpa, - .debug_info = debug_info, - .debug_abbrev = debug_abbrev, - .debug_str = debug_str, - }) catch null; // TODO figure out what errors are fatal, and when we silently fail + // TODO return error once we fix emitting DWARF in self-hosted backend. + // https://github.com/ziglang/zig/issues/21719 + self.compile_unit = self.findCompileUnit(gpa, dwarf) catch null; } -fn findCompileUnit(self: *Object, args: struct { - gpa: Allocator, - debug_info: []const u8, - debug_abbrev: []const u8, - debug_str: []const u8, -}) !CompileUnit { - var cu_wip: struct { - comp_dir: ?[:0]const u8 = null, - tu_name: ?[:0]const u8 = null, - } = .{}; - - const gpa = args.gpa; - var info_reader = dwarf.InfoReader{ .bytes = args.debug_info, .strtab = args.debug_str }; - var abbrev_reader = dwarf.AbbrevReader{ .bytes = args.debug_abbrev }; +fn findCompileUnit(self: *Object, gpa: Allocator, ctx: Dwarf) !CompileUnit { + var info_reader = Dwarf.InfoReader{ .ctx = ctx }; + var abbrev_reader = Dwarf.AbbrevReader{ .ctx = ctx }; const cuh = try info_reader.readCompileUnitHeader(); try abbrev_reader.seekTo(cuh.debug_abbrev_offset); - const cu_decl = (try abbrev_reader.readDecl()) orelse return error.Eof; - if (cu_decl.tag != dwarf.TAG.compile_unit) return error.UnexpectedTag; + const cu_decl = (try abbrev_reader.readDecl()) orelse return error.UnexpectedEndOfFile; + if (cu_decl.tag != Dwarf.TAG.compile_unit) return error.UnexpectedTag; try info_reader.seekToDie(cu_decl.code, cuh, &abbrev_reader); - while (try abbrev_reader.readAttr()) |attr| switch (attr.at) { - dwarf.AT.name => { - cu_wip.tu_name = try info_reader.readString(attr.form, cuh); - }, - dwarf.AT.comp_dir => { - cu_wip.comp_dir = try info_reader.readString(attr.form, cuh); - }, - else => switch (attr.form) { - dwarf.FORM.sec_offset, - dwarf.FORM.ref_addr, - => { - _ = try info_reader.readOffset(cuh.format); - }, - - dwarf.FORM.addr => { - _ = try info_reader.readNBytes(cuh.address_size); - }, - - dwarf.FORM.block1, - dwarf.FORM.block2, - dwarf.FORM.block4, - dwarf.FORM.block, - => { - _ = try info_reader.readBlock(attr.form); - }, - - dwarf.FORM.exprloc => { - _ = try info_reader.readExprLoc(); - }, - - dwarf.FORM.flag_present => {}, - - dwarf.FORM.data1, - dwarf.FORM.ref1, - dwarf.FORM.flag, - dwarf.FORM.data2, - dwarf.FORM.ref2, - dwarf.FORM.data4, - dwarf.FORM.ref4, - dwarf.FORM.data8, - dwarf.FORM.ref8, - dwarf.FORM.ref_sig8, - dwarf.FORM.udata, - dwarf.FORM.ref_udata, - dwarf.FORM.sdata, - => { - _ = try info_reader.readConstant(attr.form); - }, - - dwarf.FORM.strp, - dwarf.FORM.string, - => { - _ = try info_reader.readString(attr.form, cuh); - }, - - else => { - // TODO actual errors? - log.err("unhandled DW_FORM_* value with identifier {x}", .{attr.form}); - return error.UnhandledForm; - }, - }, + const Pos = struct { + pos: usize, + form: Dwarf.Form, }; - - if (cu_wip.comp_dir == null) return error.MissingCompDir; - if (cu_wip.tu_name == null) return error.MissingTuName; - - return .{ - .comp_dir = try self.addString(gpa, cu_wip.comp_dir.?), - .tu_name = try self.addString(gpa, cu_wip.tu_name.?), + var saved: struct { + tu_name: ?Pos, + comp_dir: ?Pos, + str_offsets_base: ?Pos, + } = .{ + .tu_name = null, + .comp_dir = null, + .str_offsets_base = null, }; + while (try abbrev_reader.readAttr()) |attr| { + const pos: Pos = .{ .pos = info_reader.pos, .form = attr.form }; + switch (attr.at) { + Dwarf.AT.name => saved.tu_name = pos, + Dwarf.AT.comp_dir => saved.comp_dir = pos, + Dwarf.AT.str_offsets_base => saved.str_offsets_base = pos, + else => {}, + } + try info_reader.skip(attr.form, cuh); + } + + if (saved.comp_dir == null) return error.MissingCompileDir; + if (saved.tu_name == null) return error.MissingTuName; + + const str_offsets_base: ?u64 = if (saved.str_offsets_base) |str_offsets_base| str_offsets_base: { + try info_reader.seekTo(str_offsets_base.pos); + break :str_offsets_base try info_reader.readOffset(cuh.format); + } else null; + + var cu: CompileUnit = .{ .comp_dir = .{}, .tu_name = .{} }; + for (&[_]struct { Pos, *MachO.String }{ + .{ saved.comp_dir.?, &cu.comp_dir }, + .{ saved.tu_name.?, &cu.tu_name }, + }) |tuple| { + const pos, const str_offset_ptr = tuple; + try info_reader.seekTo(pos.pos); + str_offset_ptr.* = switch (pos.form) { + Dwarf.FORM.strp, + Dwarf.FORM.string, + => try self.addString(gpa, try info_reader.readString(pos.form, cuh)), + Dwarf.FORM.strx, + Dwarf.FORM.strx1, + Dwarf.FORM.strx2, + Dwarf.FORM.strx3, + Dwarf.FORM.strx4, + => blk: { + const base = str_offsets_base orelse return error.MissingStrOffsetsBase; + break :blk try self.addString(gpa, try info_reader.readStringIndexed(pos.form, cuh, base)); + }, + else => return error.InvalidForm, + }; + } + + return cu; } pub fn resolveSymbols(self: *Object, macho_file: *MachO) !void { @@ -2561,6 +2502,17 @@ pub fn getUnwindRecord(self: *Object, index: UnwindInfo.Record.Index) *UnwindInf return &self.unwind_records.items[index]; } +/// Caller owns the memory. +pub fn readSectionData(self: Object, allocator: Allocator, file: File.Handle, n_sect: u8) ![]u8 { + const header = self.sections.items(.header)[n_sect]; + const size = math.cast(usize, header.size) orelse return error.Overflow; + const data = try allocator.alloc(u8, size); + const amt = try file.preadAll(data, header.offset + self.offset); + errdefer allocator.free(data); + if (amt != data.len) return error.InputOutput; + return data; +} + pub fn format( self: *Object, comptime unused_fmt_string: []const u8, @@ -2848,6 +2800,7 @@ const CompactUnwindCtx = struct { const x86_64 = struct { fn parseRelocs( self: *Object, + n_sect: u8, sect: macho.section_64, out: *std.ArrayListUnmanaged(Relocation), handle: File.Handle, @@ -2857,19 +2810,12 @@ const x86_64 = struct { const relocs_buffer = try gpa.alloc(u8, sect.nreloc * @sizeOf(macho.relocation_info)); defer gpa.free(relocs_buffer); - { - const amt = try handle.preadAll(relocs_buffer, sect.reloff + self.offset); - if (amt != relocs_buffer.len) return error.InputOutput; - } + const amt = try handle.preadAll(relocs_buffer, sect.reloff + self.offset); + if (amt != relocs_buffer.len) return error.InputOutput; const relocs = @as([*]align(1) const macho.relocation_info, @ptrCast(relocs_buffer.ptr))[0..sect.nreloc]; - const sect_size = math.cast(usize, sect.size) orelse return error.Overflow; - const code = try gpa.alloc(u8, sect_size); + const code = try self.readSectionData(gpa, handle, n_sect); defer gpa.free(code); - { - const amt = try handle.preadAll(code, sect.offset + self.offset); - if (amt != code.len) return error.InputOutput; - } try out.ensureTotalCapacityPrecise(gpa, relocs.len); @@ -3021,6 +2967,7 @@ const x86_64 = struct { const aarch64 = struct { fn parseRelocs( self: *Object, + n_sect: u8, sect: macho.section_64, out: *std.ArrayListUnmanaged(Relocation), handle: File.Handle, @@ -3030,19 +2977,12 @@ const aarch64 = struct { const relocs_buffer = try gpa.alloc(u8, sect.nreloc * @sizeOf(macho.relocation_info)); defer gpa.free(relocs_buffer); - { - const amt = try handle.preadAll(relocs_buffer, sect.reloff + self.offset); - if (amt != relocs_buffer.len) return error.InputOutput; - } + const amt = try handle.preadAll(relocs_buffer, sect.reloff + self.offset); + if (amt != relocs_buffer.len) return error.InputOutput; const relocs = @as([*]align(1) const macho.relocation_info, @ptrCast(relocs_buffer.ptr))[0..sect.nreloc]; - const sect_size = math.cast(usize, sect.size) orelse return error.Overflow; - const code = try gpa.alloc(u8, sect_size); + const code = try self.readSectionData(gpa, handle, n_sect); defer gpa.free(code); - { - const amt = try handle.preadAll(code, sect.offset + self.offset); - if (amt != code.len) return error.InputOutput; - } try out.ensureTotalCapacityPrecise(gpa, relocs.len); @@ -3219,7 +3159,6 @@ const aarch64 = struct { }; const assert = std.debug.assert; -const dwarf = @import("dwarf.zig"); const eh_frame = @import("eh_frame.zig"); const log = std.log.scoped(.link); const macho = std.macho; @@ -3233,6 +3172,7 @@ const Allocator = mem.Allocator; const Archive = @import("Archive.zig"); const Atom = @import("Atom.zig"); const Cie = eh_frame.Cie; +const Dwarf = @import("Dwarf.zig"); const Fde = eh_frame.Fde; const File = @import("file.zig").File; const LoadCommandIterator = macho.LoadCommandIterator; diff --git a/src/link/MachO/dwarf.zig b/src/link/MachO/dwarf.zig deleted file mode 100644 index c9db7ed7b7..0000000000 --- a/src/link/MachO/dwarf.zig +++ /dev/null @@ -1,286 +0,0 @@ -pub const InfoReader = struct { - bytes: []const u8, - strtab: []const u8, - pos: usize = 0, - - pub fn readCompileUnitHeader(p: *InfoReader) !CompileUnitHeader { - var length: u64 = try p.readInt(u32); - const is_64bit = length == 0xffffffff; - if (is_64bit) { - length = try p.readInt(u64); - } - const dw_fmt: DwarfFormat = if (is_64bit) .dwarf64 else .dwarf32; - return .{ - .format = dw_fmt, - .length = length, - .version = try p.readInt(u16), - .debug_abbrev_offset = try p.readOffset(dw_fmt), - .address_size = try p.readByte(), - }; - } - - pub fn seekToDie(p: *InfoReader, code: Code, cuh: CompileUnitHeader, abbrev_reader: *AbbrevReader) !void { - const cuh_length = math.cast(usize, cuh.length) orelse return error.Overflow; - const end_pos = p.pos + switch (cuh.format) { - .dwarf32 => @as(usize, 4), - .dwarf64 => 12, - } + cuh_length; - while (p.pos < end_pos) { - const di_code = try p.readUleb128(u64); - if (di_code == 0) return error.Eof; - if (di_code == code) return; - - while (try abbrev_reader.readAttr()) |attr| switch (attr.at) { - dwarf.FORM.sec_offset, - dwarf.FORM.ref_addr, - => { - _ = try p.readOffset(cuh.format); - }, - - dwarf.FORM.addr => { - _ = try p.readNBytes(cuh.address_size); - }, - - dwarf.FORM.block1, - dwarf.FORM.block2, - dwarf.FORM.block4, - dwarf.FORM.block, - => { - _ = try p.readBlock(attr.form); - }, - - dwarf.FORM.exprloc => { - _ = try p.readExprLoc(); - }, - - dwarf.FORM.flag_present => {}, - - dwarf.FORM.data1, - dwarf.FORM.ref1, - dwarf.FORM.flag, - dwarf.FORM.data2, - dwarf.FORM.ref2, - dwarf.FORM.data4, - dwarf.FORM.ref4, - dwarf.FORM.data8, - dwarf.FORM.ref8, - dwarf.FORM.ref_sig8, - dwarf.FORM.udata, - dwarf.FORM.ref_udata, - dwarf.FORM.sdata, - => { - _ = try p.readConstant(attr.form); - }, - - dwarf.FORM.strp, - dwarf.FORM.string, - => { - _ = try p.readString(attr.form, cuh); - }, - - else => { - // TODO better errors - log.err("unhandled DW_FORM_* value with identifier {x}", .{attr.form}); - return error.UnhandledDwFormValue; - }, - }; - } - } - - pub fn readBlock(p: *InfoReader, form: Form) ![]const u8 { - const len: u64 = switch (form) { - dwarf.FORM.block1 => try p.readByte(), - dwarf.FORM.block2 => try p.readInt(u16), - dwarf.FORM.block4 => try p.readInt(u32), - dwarf.FORM.block => try p.readUleb128(u64), - else => unreachable, - }; - return p.readNBytes(len); - } - - pub fn readExprLoc(p: *InfoReader) ![]const u8 { - const len: u64 = try p.readUleb128(u64); - return p.readNBytes(len); - } - - pub fn readConstant(p: *InfoReader, form: Form) !u64 { - return switch (form) { - dwarf.FORM.data1, dwarf.FORM.ref1, dwarf.FORM.flag => try p.readByte(), - dwarf.FORM.data2, dwarf.FORM.ref2 => try p.readInt(u16), - dwarf.FORM.data4, dwarf.FORM.ref4 => try p.readInt(u32), - dwarf.FORM.data8, dwarf.FORM.ref8, dwarf.FORM.ref_sig8 => try p.readInt(u64), - dwarf.FORM.udata, dwarf.FORM.ref_udata => try p.readUleb128(u64), - dwarf.FORM.sdata => @bitCast(try p.readIleb128(i64)), - else => return error.UnhandledConstantForm, - }; - } - - pub fn readString(p: *InfoReader, form: Form, cuh: CompileUnitHeader) ![:0]const u8 { - switch (form) { - dwarf.FORM.strp => { - const off = try p.readOffset(cuh.format); - const off_u = math.cast(usize, off) orelse return error.Overflow; - return mem.sliceTo(@as([*:0]const u8, @ptrCast(p.strtab.ptr + off_u)), 0); - }, - dwarf.FORM.string => { - const start = p.pos; - while (p.pos < p.bytes.len) : (p.pos += 1) { - if (p.bytes[p.pos] == 0) break; - } - if (p.bytes[p.pos] != 0) return error.Eof; - return p.bytes[start..p.pos :0]; - }, - else => unreachable, - } - } - - pub fn readByte(p: *InfoReader) !u8 { - if (p.pos + 1 > p.bytes.len) return error.Eof; - defer p.pos += 1; - return p.bytes[p.pos]; - } - - pub fn readNBytes(p: *InfoReader, num: u64) ![]const u8 { - const num_usize = math.cast(usize, num) orelse return error.Overflow; - if (p.pos + num_usize > p.bytes.len) return error.Eof; - defer p.pos += num_usize; - return p.bytes[p.pos..][0..num_usize]; - } - - pub fn readInt(p: *InfoReader, comptime Int: type) !Int { - if (p.pos + @sizeOf(Int) > p.bytes.len) return error.Eof; - defer p.pos += @sizeOf(Int); - return mem.readInt(Int, p.bytes[p.pos..][0..@sizeOf(Int)], .little); - } - - pub fn readOffset(p: *InfoReader, dw_fmt: DwarfFormat) !u64 { - return switch (dw_fmt) { - .dwarf32 => try p.readInt(u32), - .dwarf64 => try p.readInt(u64), - }; - } - - pub fn readUleb128(p: *InfoReader, comptime Type: type) !Type { - var stream = std.io.fixedBufferStream(p.bytes[p.pos..]); - var creader = std.io.countingReader(stream.reader()); - const value: Type = try leb.readUleb128(Type, creader.reader()); - p.pos += math.cast(usize, creader.bytes_read) orelse return error.Overflow; - return value; - } - - pub fn readIleb128(p: *InfoReader, comptime Type: type) !Type { - var stream = std.io.fixedBufferStream(p.bytes[p.pos..]); - var creader = std.io.countingReader(stream.reader()); - const value: Type = try leb.readIleb128(Type, creader.reader()); - p.pos += math.cast(usize, creader.bytes_read) orelse return error.Overflow; - return value; - } - - pub fn seekTo(p: *InfoReader, off: u64) !void { - p.pos = math.cast(usize, off) orelse return error.Overflow; - } -}; - -pub const AbbrevReader = struct { - bytes: []const u8, - pos: usize = 0, - - pub fn hasMore(p: AbbrevReader) bool { - return p.pos < p.bytes.len; - } - - pub fn readDecl(p: *AbbrevReader) !?AbbrevDecl { - const pos = p.pos; - const code = try p.readUleb128(Code); - if (code == 0) return null; - - const tag = try p.readUleb128(Tag); - const has_children = (try p.readByte()) > 0; - return .{ - .code = code, - .pos = pos, - .len = p.pos - pos, - .tag = tag, - .has_children = has_children, - }; - } - - pub fn readAttr(p: *AbbrevReader) !?AbbrevAttr { - const pos = p.pos; - const at = try p.readUleb128(At); - const form = try p.readUleb128(Form); - return if (at == 0 and form == 0) null else .{ - .at = at, - .form = form, - .pos = pos, - .len = p.pos - pos, - }; - } - - pub fn readByte(p: *AbbrevReader) !u8 { - if (p.pos + 1 > p.bytes.len) return error.Eof; - defer p.pos += 1; - return p.bytes[p.pos]; - } - - pub fn readUleb128(p: *AbbrevReader, comptime Type: type) !Type { - var stream = std.io.fixedBufferStream(p.bytes[p.pos..]); - var creader = std.io.countingReader(stream.reader()); - const value: Type = try leb.readUleb128(Type, creader.reader()); - p.pos += math.cast(usize, creader.bytes_read) orelse return error.Overflow; - return value; - } - - pub fn seekTo(p: *AbbrevReader, off: u64) !void { - p.pos = math.cast(usize, off) orelse return error.Overflow; - } -}; - -const AbbrevDecl = struct { - code: Code, - pos: usize, - len: usize, - tag: Tag, - has_children: bool, -}; - -const AbbrevAttr = struct { - at: At, - form: Form, - pos: usize, - len: usize, -}; - -const CompileUnitHeader = struct { - format: DwarfFormat, - length: u64, - version: u16, - debug_abbrev_offset: u64, - address_size: u8, -}; - -const Die = struct { - pos: usize, - len: usize, -}; - -const DwarfFormat = enum { - dwarf32, - dwarf64, -}; - -const dwarf = std.dwarf; -const leb = std.leb; -const log = std.log.scoped(.link); -const math = std.math; -const mem = std.mem; -const std = @import("std"); - -const At = u64; -const Code = u64; -const Form = u64; -const Tag = u64; - -pub const AT = dwarf.AT; -pub const FORM = dwarf.FORM; -pub const TAG = dwarf.TAG;