diff --git a/CMakeLists.txt b/CMakeLists.txt index e1e8fcf921..8ec067d591 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -613,7 +613,6 @@ set(ZIG_STAGE2_SOURCES src/link/MachO/Atom.zig src/link/MachO/CodeSignature.zig src/link/MachO/DebugSymbols.zig - src/link/MachO/DwarfInfo.zig src/link/MachO/Dylib.zig src/link/MachO/InternalObject.zig src/link/MachO/Object.zig @@ -625,6 +624,7 @@ set(ZIG_STAGE2_SOURCES src/link/MachO/dyld_info/Rebase.zig src/link/MachO/dyld_info/Trie.zig src/link/MachO/dyld_info/bind.zig + src/link/MachO/dwarf.zig src/link/MachO/eh_frame.zig src/link/MachO/fat.zig src/link/MachO/file.zig diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 326f714796..7bf195f5f9 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -535,6 +535,7 @@ pub fn flushModule(self: *MachO, arena: Allocator, prog_node: *std.Progress.Node try self.addUndefinedGlobals(); try self.resolveSymbols(); + try self.parseDebugInfo(); try self.resolveSyntheticSymbols(); try self.convertTentativeDefinitions(); @@ -1409,6 +1410,12 @@ fn markLive(self: *MachO) void { } } +pub fn parseDebugInfo(self: *MachO) !void { + for (self.objects.items) |index| { + try self.getFile(index).?.object.parseDebugInfo(self); + } +} + fn resolveSyntheticSymbols(self: *MachO) !void { const internal = self.getInternalObject() orelse return; @@ -4840,7 +4847,6 @@ const Cache = std.Build.Cache; const CodeSignature = @import("MachO/CodeSignature.zig"); const Compilation = @import("../Compilation.zig"); pub const DebugSymbols = @import("MachO/DebugSymbols.zig"); -const DwarfInfo = @import("MachO/DwarfInfo.zig"); const Dylib = @import("MachO/Dylib.zig"); const ExportTrieSection = synthetic.ExportTrieSection; const File = @import("MachO/file.zig").File; diff --git a/src/link/MachO/DwarfInfo.zig b/src/link/MachO/DwarfInfo.zig deleted file mode 100644 index 9974386bb7..0000000000 --- a/src/link/MachO/DwarfInfo.zig +++ /dev/null @@ -1,490 +0,0 @@ -/// Abbreviation table indexed by offset in the .debug_abbrev bytestream -abbrev_tables: std.AutoArrayHashMapUnmanaged(u64, AbbrevTable) = .{}, -/// List of compile units as they appear in the .debug_info bytestream -compile_units: std.ArrayListUnmanaged(CompileUnit) = .{}, -/// Debug info string table -strtab: std.ArrayListUnmanaged(u8) = .{}, -/// Debug info data -di_data: std.ArrayListUnmanaged(u8) = .{}, - -pub fn init(dw: *DwarfInfo, allocator: Allocator, di: DebugInfo) !void { - try dw.strtab.ensureTotalCapacityPrecise(allocator, di.debug_str.len); - dw.strtab.appendSliceAssumeCapacity(di.debug_str); - try dw.parseAbbrevTables(allocator, di); - try dw.parseCompileUnits(allocator, di); -} - -pub fn deinit(dw: *DwarfInfo, allocator: Allocator) void { - dw.abbrev_tables.deinit(allocator); - for (dw.compile_units.items) |*cu| { - cu.deinit(allocator); - } - dw.compile_units.deinit(allocator); - dw.strtab.deinit(allocator); - dw.di_data.deinit(allocator); -} - -fn appendDiData(dw: *DwarfInfo, allocator: Allocator, values: []const u8) error{OutOfMemory}!u32 { - const index: u32 = @intCast(dw.di_data.items.len); - try dw.di_data.ensureUnusedCapacity(allocator, values.len); - dw.di_data.appendSliceAssumeCapacity(values); - return index; -} - -fn getString(dw: DwarfInfo, off: usize) [:0]const u8 { - assert(off < dw.strtab.items.len); - return mem.sliceTo(@as([*:0]const u8, @ptrCast(dw.strtab.items.ptr + off)), 0); -} - -fn parseAbbrevTables(dw: *DwarfInfo, allocator: Allocator, di: DebugInfo) !void { - const tracy = trace(@src()); - defer tracy.end(); - - const debug_abbrev = di.debug_abbrev; - var stream = std.io.fixedBufferStream(debug_abbrev); - var creader = std.io.countingReader(stream.reader()); - const reader = creader.reader(); - - while (true) { - if (creader.bytes_read >= debug_abbrev.len) break; - - try dw.abbrev_tables.ensureUnusedCapacity(allocator, 1); - const table_gop = dw.abbrev_tables.getOrPutAssumeCapacity(@intCast(creader.bytes_read)); - assert(!table_gop.found_existing); - const table = table_gop.value_ptr; - table.* = .{}; - - while (true) { - const code = try leb.readULEB128(Code, reader); - if (code == 0) break; - - try table.decls.ensureUnusedCapacity(allocator, 1); - const decl_gop = table.decls.getOrPutAssumeCapacity(code); - assert(!decl_gop.found_existing); - const decl = decl_gop.value_ptr; - decl.* = .{ - .code = code, - .tag = undefined, - .children = false, - }; - decl.tag = try leb.readULEB128(Tag, reader); - decl.children = (try reader.readByte()) > 0; - - while (true) { - const at = try leb.readULEB128(At, reader); - const form = try leb.readULEB128(Form, reader); - if (at == 0 and form == 0) break; - - try decl.attrs.ensureUnusedCapacity(allocator, 1); - const attr_gop = decl.attrs.getOrPutAssumeCapacity(at); - assert(!attr_gop.found_existing); - const attr = attr_gop.value_ptr; - attr.* = .{ - .at = at, - .form = form, - }; - } - } - } -} - -fn parseCompileUnits(dw: *DwarfInfo, allocator: Allocator, di: DebugInfo) !void { - const tracy = trace(@src()); - defer tracy.end(); - - const debug_info = di.debug_info; - var stream = std.io.fixedBufferStream(debug_info); - var creader = std.io.countingReader(stream.reader()); - const reader = creader.reader(); - - while (true) { - if (creader.bytes_read == debug_info.len) break; - - const cu = try dw.compile_units.addOne(allocator); - cu.* = .{ - .header = undefined, - .pos = creader.bytes_read, - }; - - var length: u64 = try reader.readInt(u32, .little); - const is_64bit = length == 0xffffffff; - if (is_64bit) { - length = try reader.readInt(u64, .little); - } - cu.header.format = if (is_64bit) .dwarf64 else .dwarf32; - cu.header.length = length; - cu.header.version = try reader.readInt(u16, .little); - cu.header.debug_abbrev_offset = try readOffset(cu.header.format, reader); - cu.header.address_size = try reader.readInt(u8, .little); - - const table = dw.abbrev_tables.get(cu.header.debug_abbrev_offset).?; - try dw.parseDie(allocator, cu, table, di, null, &creader); - } -} - -fn parseDie( - dw: *DwarfInfo, - allocator: Allocator, - cu: *CompileUnit, - table: AbbrevTable, - di: DebugInfo, - parent: ?u32, - creader: anytype, -) anyerror!void { - const tracy = trace(@src()); - defer tracy.end(); - - while (creader.bytes_read < cu.nextCompileUnitOffset()) { - const die = try cu.addDie(allocator); - cu.diePtr(die).* = .{ .code = undefined }; - if (parent) |p| { - try cu.diePtr(p).children.append(allocator, die); - } else { - try cu.children.append(allocator, die); - } - - const code = try leb.readULEB128(Code, creader.reader()); - cu.diePtr(die).code = code; - - if (code == 0) { - if (parent == null) continue; - return; // Close scope - } - - const decl = table.decls.get(code) orelse return error.MalformedDwarf; // TODO better errors - const data = di.debug_info; - try cu.diePtr(die).values.ensureTotalCapacityPrecise(allocator, decl.attrs.values().len); - - for (decl.attrs.values()) |attr| { - const start = std.math.cast(usize, creader.bytes_read) orelse return error.Overflow; - try advanceByFormSize(cu, attr.form, creader); - const end = std.math.cast(usize, creader.bytes_read) orelse return error.Overflow; - const index = try dw.appendDiData(allocator, data[start..end]); - cu.diePtr(die).values.appendAssumeCapacity(.{ .index = index, .len = @intCast(end - start) }); - } - - if (decl.children) { - // Open scope - try dw.parseDie(allocator, cu, table, di, die, creader); - } - } -} - -fn advanceByFormSize(cu: *CompileUnit, form: Form, creader: anytype) !void { - const tracy = trace(@src()); - defer tracy.end(); - - const reader = creader.reader(); - switch (form) { - dwarf.FORM.strp, - dwarf.FORM.sec_offset, - dwarf.FORM.ref_addr, - => { - _ = try readOffset(cu.header.format, reader); - }, - - dwarf.FORM.addr => try reader.skipBytes(cu.header.address_size, .{}), - - dwarf.FORM.block1, - dwarf.FORM.block2, - dwarf.FORM.block4, - dwarf.FORM.block, - => { - const len: u64 = switch (form) { - dwarf.FORM.block1 => try reader.readInt(u8, .little), - dwarf.FORM.block2 => try reader.readInt(u16, .little), - dwarf.FORM.block4 => try reader.readInt(u32, .little), - dwarf.FORM.block => try leb.readULEB128(u64, reader), - else => unreachable, - }; - var i: u64 = 0; - while (i < len) : (i += 1) { - _ = try reader.readByte(); - } - }, - - dwarf.FORM.exprloc => { - const len = try leb.readULEB128(u64, reader); - var i: u64 = 0; - while (i < len) : (i += 1) { - _ = try reader.readByte(); - } - }, - dwarf.FORM.flag_present => {}, - - dwarf.FORM.data1, - dwarf.FORM.ref1, - dwarf.FORM.flag, - => try reader.skipBytes(1, .{}), - - dwarf.FORM.data2, - dwarf.FORM.ref2, - => try reader.skipBytes(2, .{}), - - dwarf.FORM.data4, - dwarf.FORM.ref4, - => try reader.skipBytes(4, .{}), - - dwarf.FORM.data8, - dwarf.FORM.ref8, - dwarf.FORM.ref_sig8, - => try reader.skipBytes(8, .{}), - - dwarf.FORM.udata, - dwarf.FORM.ref_udata, - => { - _ = try leb.readULEB128(u64, reader); - }, - - dwarf.FORM.sdata => { - _ = try leb.readILEB128(i64, reader); - }, - - dwarf.FORM.string => { - while (true) { - const byte = try reader.readByte(); - if (byte == 0x0) break; - } - }, - - else => { - // TODO better errors - log.err("unhandled DW_FORM_* value with identifier {x}", .{form}); - return error.UnhandledDwFormValue; - }, - } -} - -fn readOffset(format: Format, reader: anytype) !u64 { - return switch (format) { - .dwarf32 => try reader.readInt(u32, .little), - .dwarf64 => try reader.readInt(u64, .little), - }; -} - -pub const AbbrevTable = struct { - /// Table of abbreviation declarations indexed by their assigned code value - decls: std.AutoArrayHashMapUnmanaged(Code, Decl) = .{}, - - pub fn deinit(table: *AbbrevTable, gpa: Allocator) void { - for (table.decls.values()) |*decl| { - decl.deinit(gpa); - } - table.decls.deinit(gpa); - } -}; - -pub const Decl = struct { - code: Code, - tag: Tag, - children: bool, - - /// Table of attributes indexed by their AT value - attrs: std.AutoArrayHashMapUnmanaged(At, Attr) = .{}, - - pub fn deinit(decl: *Decl, gpa: Allocator) void { - decl.attrs.deinit(gpa); - } -}; - -pub const Attr = struct { - at: At, - form: Form, -}; - -pub const At = u64; -pub const Code = u64; -pub const Form = u64; -pub const Tag = u64; - -pub const CompileUnitHeader = struct { - format: Format, - length: u64, - version: u16, - debug_abbrev_offset: u64, - address_size: u8, -}; - -pub const CompileUnit = struct { - header: CompileUnitHeader, - pos: u64, - dies: std.ArrayListUnmanaged(Die) = .{}, - children: std.ArrayListUnmanaged(Die.Index) = .{}, - - pub fn deinit(cu: *CompileUnit, gpa: Allocator) void { - for (cu.dies.items) |*die| { - die.deinit(gpa); - } - cu.dies.deinit(gpa); - cu.children.deinit(gpa); - } - - pub fn addDie(cu: *CompileUnit, gpa: Allocator) !Die.Index { - const index = @as(Die.Index, @intCast(cu.dies.items.len)); - _ = try cu.dies.addOne(gpa); - return index; - } - - pub fn diePtr(cu: *CompileUnit, index: Die.Index) *Die { - return &cu.dies.items[index]; - } - - pub fn getCompileDir(cu: CompileUnit, ctx: DwarfInfo) error{Overflow}!?[:0]const u8 { - assert(cu.dies.items.len > 0); - const die = cu.dies.items[0]; - const res = die.find(dwarf.AT.comp_dir, cu, ctx) orelse return null; - return res.getString(cu.header.format, ctx); - } - - pub fn getSourceFile(cu: CompileUnit, ctx: DwarfInfo) error{Overflow}!?[:0]const u8 { - assert(cu.dies.items.len > 0); - const die = cu.dies.items[0]; - const res = die.find(dwarf.AT.name, cu, ctx) orelse return null; - return res.getString(cu.header.format, ctx); - } - - pub fn nextCompileUnitOffset(cu: CompileUnit) u64 { - return cu.pos + switch (cu.header.format) { - .dwarf32 => @as(u64, 4), - .dwarf64 => 12, - } + cu.header.length; - } -}; - -pub const Die = struct { - code: Code, - values: std.ArrayListUnmanaged(struct { index: u32, len: u32 }) = .{}, - children: std.ArrayListUnmanaged(Die.Index) = .{}, - - pub fn deinit(die: *Die, gpa: Allocator) void { - die.values.deinit(gpa); - die.children.deinit(gpa); - } - - pub fn find(die: Die, at: At, cu: CompileUnit, ctx: DwarfInfo) ?DieValue { - const table = ctx.abbrev_tables.get(cu.header.debug_abbrev_offset) orelse return null; - const decl = table.decls.get(die.code).?; - const index = decl.attrs.getIndex(at) orelse return null; - const attr = decl.attrs.values()[index]; - const value = die.values.items[index]; - return .{ .attr = attr, .bytes = ctx.di_data.items[value.index..][0..value.len] }; - } - - pub const Index = u32; -}; - -pub const DieValue = struct { - attr: Attr, - bytes: []const u8, - - pub fn getFlag(value: DieValue) ?bool { - return switch (value.attr.form) { - dwarf.FORM.flag => value.bytes[0] == 1, - dwarf.FORM.flag_present => true, - else => null, - }; - } - - pub fn getString(value: DieValue, format: Format, ctx: DwarfInfo) error{Overflow}!?[:0]const u8 { - switch (value.attr.form) { - dwarf.FORM.string => { - return mem.sliceTo(@as([*:0]const u8, @ptrCast(value.bytes.ptr)), 0); - }, - dwarf.FORM.strp => { - const off = switch (format) { - .dwarf64 => mem.readInt(u64, value.bytes[0..8], .little), - .dwarf32 => mem.readInt(u32, value.bytes[0..4], .little), - }; - const off_u = std.math.cast(usize, off) orelse return error.Overflow; - return ctx.getString(off_u); - }, - else => return null, - } - } - - pub fn getSecOffset(value: DieValue, format: Format) ?u64 { - return switch (value.attr.form) { - dwarf.FORM.sec_offset => switch (format) { - .dwarf32 => mem.readInt(u32, value.bytes[0..4], .little), - .dwarf64 => mem.readInt(u64, value.bytes[0..8], .little), - }, - else => null, - }; - } - - pub fn getConstant(value: DieValue) !?i128 { - var stream = std.io.fixedBufferStream(value.bytes); - const reader = stream.reader(); - return switch (value.attr.form) { - dwarf.FORM.data1 => value.bytes[0], - dwarf.FORM.data2 => mem.readInt(u16, value.bytes[0..2], .little), - dwarf.FORM.data4 => mem.readInt(u32, value.bytes[0..4], .little), - dwarf.FORM.data8 => mem.readInt(u64, value.bytes[0..8], .little), - dwarf.FORM.udata => try leb.readULEB128(u64, reader), - dwarf.FORM.sdata => try leb.readILEB128(i64, reader), - else => null, - }; - } - - pub fn getReference(value: DieValue, format: Format) !?u64 { - var stream = std.io.fixedBufferStream(value.bytes); - const reader = stream.reader(); - return switch (value.attr.form) { - dwarf.FORM.ref1 => value.bytes[0], - dwarf.FORM.ref2 => mem.readInt(u16, value.bytes[0..2], .little), - dwarf.FORM.ref4 => mem.readInt(u32, value.bytes[0..4], .little), - dwarf.FORM.ref8 => mem.readInt(u64, value.bytes[0..8], .little), - dwarf.FORM.ref_udata => try leb.readULEB128(u64, reader), - dwarf.FORM.ref_addr => switch (format) { - .dwarf32 => mem.readInt(u32, value.bytes[0..4], .little), - .dwarf64 => mem.readInt(u64, value.bytes[0..8], .little), - }, - else => null, - }; - } - - pub fn getAddr(value: DieValue, header: CompileUnitHeader) ?u64 { - return switch (value.attr.form) { - dwarf.FORM.addr => switch (header.address_size) { - 1 => value.bytes[0], - 2 => mem.readInt(u16, value.bytes[0..2], .little), - 4 => mem.readInt(u32, value.bytes[0..4], .little), - 8 => mem.readInt(u64, value.bytes[0..8], .little), - else => null, - }, - else => null, - }; - } - - pub fn getExprloc(value: DieValue) !?[]const u8 { - if (value.attr.form != dwarf.FORM.exprloc) return null; - var stream = std.io.fixedBufferStream(value.bytes); - var creader = std.io.countingReader(stream.reader()); - const reader = creader.reader(); - const expr_len = try leb.readULEB128(u64, reader); - return value.bytes[creader.bytes_read..][0..expr_len]; - } -}; - -pub const Format = enum { - dwarf32, - dwarf64, -}; - -const DebugInfo = struct { - debug_info: []const u8, - debug_abbrev: []const u8, - debug_str: []const u8, -}; - -const assert = std.debug.assert; -const dwarf = std.dwarf; -const leb = std.leb; -const log = std.log.scoped(.link); -const mem = std.mem; -const std = @import("std"); -const trace = @import("../../tracy.zig").trace; - -const Allocator = mem.Allocator; -const DwarfInfo = @This(); -const MachO = @import("../MachO.zig"); diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index 28c3c127e3..c856c65d4e 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -13,7 +13,7 @@ symbols: std.ArrayListUnmanaged(Symbol.Index) = .{}, atoms: std.ArrayListUnmanaged(Atom.Index) = .{}, platform: ?MachO.Platform = null, -dwarf_info: ?DwarfInfo = null, +compile_unit: ?CompileUnit = null, stab_files: std.ArrayListUnmanaged(StabFile) = .{}, eh_frame_sect_index: ?u8 = null, @@ -31,12 +31,6 @@ dynamic_relocs: MachO.DynamicRelocs = .{}, output_symtab_ctx: MachO.SymtabCtx = .{}, output_ar_state: Archive.ArState = .{}, -const InArchive = struct { - path: []const u8, - offset: u64, - size: u32, -}; - pub fn isObject(path: []const u8) !bool { const file = try std.fs.cwd().openFile(path, .{}); defer file.close(); @@ -60,7 +54,6 @@ pub fn deinit(self: *Object, allocator: Allocator) void { self.fdes.deinit(allocator); self.eh_frame_data.deinit(allocator); self.unwind_records.deinit(allocator); - if (self.dwarf_info) |*dw| dw.deinit(allocator); for (self.stab_files.items) |*sf| { sf.stabs.deinit(allocator); } @@ -251,8 +244,6 @@ pub fn parse(self: *Object, macho_file: *MachO) !void { // } } - try self.initDwarfInfo(macho_file); - for (self.atoms.items) |atom_index| { const atom = macho_file.getAtom(atom_index).?; const isec = atom.getInputSection(macho_file); @@ -1214,7 +1205,7 @@ fn parseUnwindRecords(self: *Object, macho_file: *MachO) !void { /// and record that so that we can emit symbol stabs. /// TODO in the future, we want parse debug info and debug line sections so that /// we can provide nice error locations to the user. -fn initDwarfInfo(self: *Object, macho_file: *MachO) !void { +pub fn parseDebugInfo(self: *Object, macho_file: *MachO) !void { const tracy = trace(@src()); defer tracy.end(); @@ -1240,17 +1231,107 @@ fn initDwarfInfo(self: *Object, macho_file: *MachO) !void { const debug_str = if (debug_str_index) |index| try self.getSectionData(@intCast(index), macho_file) else &[0]u8{}; defer gpa.free(debug_str); - var dwarf_info = DwarfInfo{}; - errdefer dwarf_info.deinit(gpa); - dwarf_info.init(gpa, .{ + self.compile_unit = self.findCompileUnit(.{ + .gpa = gpa, .debug_info = debug_info, .debug_abbrev = debug_abbrev, .debug_str = debug_str, - }) catch { - try macho_file.reportParseError2(self.index, "invalid __DWARF info found", .{}); - return error.MalformedObject; + }) catch null; // TODO figure out what errors are fatal, and when we silently fail +} + +fn findCompileUnit(self: *Object, args: struct { + gpa: Allocator, + debug_info: []const u8, + debug_abbrev: []const u8, + debug_str: []const u8, +}) !CompileUnit { + var cu_wip: struct { + comp_dir: ?[:0]const u8 = null, + tu_name: ?[:0]const u8 = null, + } = .{}; + + const gpa = args.gpa; + var info_reader = dwarf.InfoReader{ .bytes = args.debug_info, .strtab = args.debug_str }; + var abbrev_reader = dwarf.AbbrevReader{ .bytes = args.debug_abbrev }; + + const cuh = try info_reader.readCompileUnitHeader(); + try abbrev_reader.seekTo(cuh.debug_abbrev_offset); + + const cu_decl = (try abbrev_reader.readDecl()) orelse return error.Eof; + if (cu_decl.tag != dwarf.TAG.compile_unit) return error.UnexpectedTag; + + try info_reader.seekToDie(cu_decl.code, cuh, &abbrev_reader); + + while (try abbrev_reader.readAttr()) |attr| switch (attr.at) { + dwarf.AT.name => { + cu_wip.tu_name = try info_reader.readString(attr.form, cuh); + }, + dwarf.AT.comp_dir => { + cu_wip.comp_dir = try info_reader.readString(attr.form, cuh); + }, + else => switch (attr.form) { + dwarf.FORM.sec_offset, + dwarf.FORM.ref_addr, + => { + _ = try info_reader.readOffset(cuh.format); + }, + + dwarf.FORM.addr => { + _ = try info_reader.readNBytes(cuh.address_size); + }, + + dwarf.FORM.block1, + dwarf.FORM.block2, + dwarf.FORM.block4, + dwarf.FORM.block, + => { + _ = try info_reader.readBlock(attr.form); + }, + + dwarf.FORM.exprloc => { + _ = try info_reader.readExprLoc(); + }, + + dwarf.FORM.flag_present => {}, + + dwarf.FORM.data1, + dwarf.FORM.ref1, + dwarf.FORM.flag, + dwarf.FORM.data2, + dwarf.FORM.ref2, + dwarf.FORM.data4, + dwarf.FORM.ref4, + dwarf.FORM.data8, + dwarf.FORM.ref8, + dwarf.FORM.ref_sig8, + dwarf.FORM.udata, + dwarf.FORM.ref_udata, + dwarf.FORM.sdata, + => { + _ = try info_reader.readConstant(attr.form); + }, + + dwarf.FORM.strp, + dwarf.FORM.string, + => { + _ = try info_reader.readString(attr.form, cuh); + }, + + else => { + // TODO actual errors? + log.err("unhandled DW_FORM_* value with identifier {x}", .{attr.form}); + return error.UnhandledForm; + }, + }, + }; + + if (cu_wip.comp_dir == null) return error.MissingCompDir; + if (cu_wip.tu_name == null) return error.MissingTuName; + + return .{ + .comp_dir = try self.addString(gpa, cu_wip.comp_dir.?), + .tu_name = try self.addString(gpa, cu_wip.tu_name.?), }; - self.dwarf_info = dwarf_info; } pub fn resolveSymbols(self: *Object, macho_file: *MachO) void { @@ -1591,10 +1672,9 @@ pub fn calcSymtabSize(self: *Object, macho_file: *MachO) !void { } pub fn calcStabsSize(self: *Object, macho_file: *MachO) error{Overflow}!void { - if (self.dwarf_info) |dw| { - const cu = dw.compile_units.items[0]; - const comp_dir = try cu.getCompileDir(dw) orelse return; - const tu_name = try cu.getSourceFile(dw) orelse return; + if (self.compile_unit) |cu| { + const comp_dir = cu.getCompDir(self); + const tu_name = cu.getTuName(self); self.output_symtab_ctx.nstabs += 4; // N_SO, N_SO, N_OSO, N_SO self.output_symtab_ctx.strsize += @as(u32, @intCast(comp_dir.len + 1)); // comp_dir @@ -1709,10 +1789,9 @@ pub fn writeStabs(self: *const Object, macho_file: *MachO, ctx: anytype) error{O var index = self.output_symtab_ctx.istab; - if (self.dwarf_info) |dw| { - const cu = dw.compile_units.items[0]; - const comp_dir = try cu.getCompileDir(dw) orelse return; - const tu_name = try cu.getSourceFile(dw) orelse return; + if (self.compile_unit) |cu| { + const comp_dir = cu.getCompDir(self); + const tu_name = cu.getTuName(self); // Open scope // N_SO comp_dir @@ -1958,10 +2037,7 @@ pub fn hasEhFrameRecords(self: Object) bool { } pub fn hasDebugInfo(self: Object) bool { - if (self.dwarf_info) |dw| { - return dw.compile_units.items.len > 0; - } - return self.hasSymbolStabs(); + return self.compile_unit != null or self.hasSymbolStabs(); } fn hasSymbolStabs(self: Object) bool { @@ -2194,6 +2270,25 @@ const StabFile = struct { }; }; +const CompileUnit = struct { + comp_dir: u32, + tu_name: u32, + + fn getCompDir(cu: CompileUnit, object: *const Object) [:0]const u8 { + return object.getString(cu.comp_dir); + } + + fn getTuName(cu: CompileUnit, object: *const Object) [:0]const u8 { + return object.getString(cu.tu_name); + } +}; + +const InArchive = struct { + path: []const u8, + offset: u64, + size: u32, +}; + const x86_64 = struct { fn parseRelocs( self: *const Object, @@ -2548,6 +2643,7 @@ const aarch64 = struct { }; const assert = std.debug.assert; +const dwarf = @import("dwarf.zig"); const eh_frame = @import("eh_frame.zig"); const log = std.log.scoped(.link); const macho = std.macho; @@ -2560,7 +2656,6 @@ const Allocator = mem.Allocator; const Archive = @import("Archive.zig"); const Atom = @import("Atom.zig"); const Cie = eh_frame.Cie; -const DwarfInfo = @import("DwarfInfo.zig"); const Fde = eh_frame.Fde; const File = @import("file.zig").File; const LoadCommandIterator = macho.LoadCommandIterator; diff --git a/src/link/MachO/dwarf.zig b/src/link/MachO/dwarf.zig new file mode 100644 index 0000000000..2b4977aebe --- /dev/null +++ b/src/link/MachO/dwarf.zig @@ -0,0 +1,285 @@ +pub const InfoReader = struct { + bytes: []const u8, + strtab: []const u8, + pos: usize = 0, + + pub fn readCompileUnitHeader(p: *InfoReader) !CompileUnitHeader { + var length: u64 = try p.readInt(u32); + const is_64bit = length == 0xffffffff; + if (is_64bit) { + length = try p.readInt(u64); + } + const dw_fmt: DwarfFormat = if (is_64bit) .dwarf64 else .dwarf32; + return .{ + .format = dw_fmt, + .length = length, + .version = try p.readInt(u16), + .debug_abbrev_offset = try p.readOffset(dw_fmt), + .address_size = try p.readByte(), + }; + } + + pub fn seekToDie(p: *InfoReader, code: Code, cuh: CompileUnitHeader, abbrev_reader: *AbbrevReader) !void { + const cuh_length = math.cast(usize, cuh.length) orelse return error.Overflow; + const end_pos = p.pos + switch (cuh.format) { + .dwarf32 => @as(usize, 4), + .dwarf64 => 12, + } + cuh_length; + while (p.pos < end_pos) { + const di_code = try p.readULEB128(u64); + if (di_code == 0) return error.Eof; + if (di_code == code) return; + + while (try abbrev_reader.readAttr()) |attr| switch (attr.at) { + dwarf.FORM.sec_offset, + dwarf.FORM.ref_addr, + => { + _ = try p.readOffset(cuh.format); + }, + + dwarf.FORM.addr => { + _ = try p.readNBytes(cuh.address_size); + }, + + dwarf.FORM.block1, + dwarf.FORM.block2, + dwarf.FORM.block4, + dwarf.FORM.block, + => { + _ = try p.readBlock(attr.form); + }, + + dwarf.FORM.exprloc => { + _ = try p.readExprLoc(); + }, + + dwarf.FORM.flag_present => {}, + + dwarf.FORM.data1, + dwarf.FORM.ref1, + dwarf.FORM.flag, + dwarf.FORM.data2, + dwarf.FORM.ref2, + dwarf.FORM.data4, + dwarf.FORM.ref4, + dwarf.FORM.data8, + dwarf.FORM.ref8, + dwarf.FORM.ref_sig8, + dwarf.FORM.udata, + dwarf.FORM.ref_udata, + dwarf.FORM.sdata, + => { + _ = try p.readConstant(attr.form); + }, + + dwarf.FORM.strp, + dwarf.FORM.string, + => { + _ = try p.readString(attr.form, cuh); + }, + + else => { + // TODO better errors + log.err("unhandled DW_FORM_* value with identifier {x}", .{attr.form}); + return error.UnhandledDwFormValue; + }, + }; + } + } + + pub fn readBlock(p: *InfoReader, form: Form) ![]const u8 { + const len: u64 = switch (form) { + dwarf.FORM.block1 => try p.readByte(), + dwarf.FORM.block2 => try p.readInt(u16), + dwarf.FORM.block4 => try p.readInt(u32), + dwarf.FORM.block => try p.readULEB128(u64), + else => unreachable, + }; + return p.readNBytes(len); + } + + pub fn readExprLoc(p: *InfoReader) ![]const u8 { + const len: u64 = try p.readULEB128(u64); + return p.readNBytes(len); + } + + pub fn readConstant(p: *InfoReader, form: Form) !u64 { + return switch (form) { + dwarf.FORM.data1, dwarf.FORM.ref1, dwarf.FORM.flag => try p.readByte(), + dwarf.FORM.data2, dwarf.FORM.ref2 => try p.readInt(u16), + dwarf.FORM.data4, dwarf.FORM.ref4 => try p.readInt(u32), + dwarf.FORM.data8, dwarf.FORM.ref8, dwarf.FORM.ref_sig8 => try p.readInt(u64), + dwarf.FORM.udata, dwarf.FORM.ref_udata => try p.readULEB128(u64), + dwarf.FORM.sdata => @bitCast(try p.readILEB128(i64)), + else => return error.UnhandledConstantForm, + }; + } + + pub fn readString(p: *InfoReader, form: Form, cuh: CompileUnitHeader) ![:0]const u8 { + switch (form) { + dwarf.FORM.strp => { + const off = try p.readOffset(cuh.format); + return mem.sliceTo(@as([*:0]const u8, @ptrCast(p.strtab.ptr + off)), 0); + }, + dwarf.FORM.string => { + const start = p.pos; + while (p.pos < p.bytes.len) : (p.pos += 1) { + if (p.bytes[p.pos] == 0) break; + } + if (p.bytes[p.pos] != 0) return error.Eof; + return p.bytes[start..p.pos :0]; + }, + else => unreachable, + } + } + + pub fn readByte(p: *InfoReader) !u8 { + if (p.pos + 1 > p.bytes.len) return error.Eof; + defer p.pos += 1; + return p.bytes[p.pos]; + } + + pub fn readNBytes(p: *InfoReader, num: u64) ![]const u8 { + const num_usize = math.cast(usize, num) orelse return error.Overflow; + if (p.pos + num_usize > p.bytes.len) return error.Eof; + defer p.pos += num_usize; + return p.bytes[p.pos..][0..num_usize]; + } + + pub fn readInt(p: *InfoReader, comptime Int: type) !Int { + if (p.pos + @sizeOf(Int) > p.bytes.len) return error.Eof; + defer p.pos += @sizeOf(Int); + return mem.readInt(Int, p.bytes[p.pos..][0..@sizeOf(Int)], .little); + } + + pub fn readOffset(p: *InfoReader, dw_fmt: DwarfFormat) !u64 { + return switch (dw_fmt) { + .dwarf32 => try p.readInt(u32), + .dwarf64 => try p.readInt(u64), + }; + } + + pub fn readULEB128(p: *InfoReader, comptime Type: type) !Type { + var stream = std.io.fixedBufferStream(p.bytes[p.pos..]); + var creader = std.io.countingReader(stream.reader()); + const value: Type = try leb.readULEB128(Type, creader.reader()); + p.pos += creader.bytes_read; + return value; + } + + pub fn readILEB128(p: *InfoReader, comptime Type: type) !Type { + var stream = std.io.fixedBufferStream(p.bytes[p.pos..]); + var creader = std.io.countingReader(stream.reader()); + const value: Type = try leb.readILEB128(Type, creader.reader()); + p.pos += creader.bytes_read; + return value; + } + + pub fn seekTo(p: *InfoReader, off: u64) !void { + p.pos = math.cast(usize, off) orelse return error.Overflow; + } +}; + +pub const AbbrevReader = struct { + bytes: []const u8, + pos: usize = 0, + + pub fn hasMore(p: AbbrevReader) bool { + return p.pos < p.bytes.len; + } + + pub fn readDecl(p: *AbbrevReader) !?AbbrevDecl { + const pos = p.pos; + const code = try p.readULEB128(Code); + if (code == 0) return null; + + const tag = try p.readULEB128(Tag); + const has_children = (try p.readByte()) > 0; + return .{ + .code = code, + .pos = pos, + .len = p.pos - pos, + .tag = tag, + .has_children = has_children, + }; + } + + pub fn readAttr(p: *AbbrevReader) !?AbbrevAttr { + const pos = p.pos; + const at = try p.readULEB128(At); + const form = try p.readULEB128(Form); + return if (at == 0 and form == 0) null else .{ + .at = at, + .form = form, + .pos = pos, + .len = p.pos - pos, + }; + } + + pub fn readByte(p: *AbbrevReader) !u8 { + if (p.pos + 1 > p.bytes.len) return error.Eof; + defer p.pos += 1; + return p.bytes[p.pos]; + } + + pub fn readULEB128(p: *AbbrevReader, comptime Type: type) !Type { + var stream = std.io.fixedBufferStream(p.bytes[p.pos..]); + var creader = std.io.countingReader(stream.reader()); + const value: Type = try leb.readULEB128(Type, creader.reader()); + p.pos += creader.bytes_read; + return value; + } + + pub fn seekTo(p: *AbbrevReader, off: u64) !void { + p.pos = math.cast(usize, off) orelse return error.Overflow; + } +}; + +const AbbrevDecl = struct { + code: Code, + pos: usize, + len: usize, + tag: Tag, + has_children: bool, +}; + +const AbbrevAttr = struct { + at: At, + form: Form, + pos: usize, + len: usize, +}; + +const CompileUnitHeader = struct { + format: DwarfFormat, + length: u64, + version: u16, + debug_abbrev_offset: u64, + address_size: u8, +}; + +const Die = struct { + pos: usize, + len: usize, +}; + +const DwarfFormat = enum { + dwarf32, + dwarf64, +}; + +const dwarf = std.dwarf; +const leb = std.leb; +const log = std.log.scoped(.link); +const math = std.math; +const mem = std.mem; +const std = @import("std"); + +const At = u64; +const Code = u64; +const Form = u64; +const Tag = u64; + +pub const AT = dwarf.AT; +pub const FORM = dwarf.FORM; +pub const TAG = dwarf.TAG; diff --git a/src/link/MachO/relocatable.zig b/src/link/MachO/relocatable.zig index 4f7a7cfa2d..3fc37ef9ac 100644 --- a/src/link/MachO/relocatable.zig +++ b/src/link/MachO/relocatable.zig @@ -46,6 +46,7 @@ pub fn flushObject(macho_file: *MachO, comp: *Compilation, module_obj_path: ?[]c try macho_file.addUndefinedGlobals(); try macho_file.resolveSymbols(); + try macho_file.parseDebugInfo(); try macho_file.dedupLiterals(); markExports(macho_file); claimUnresolved(macho_file);