From 0da8ba816a14aeb43c0c98adc13943336f9525fa Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sun, 17 Jul 2022 23:48:44 +0200 Subject: [PATCH] macho: do not store stabs; generate on-the-fly instead --- src/link/MachO.zig | 266 +++++++++++++++++++++++++++++--------- src/link/MachO/Atom.zig | 70 ---------- src/link/MachO/Object.zig | 163 ++--------------------- 3 files changed, 214 insertions(+), 285 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index d4fbd14287..bcfbc4bb1c 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -4,6 +4,7 @@ const std = @import("std"); const build_options = @import("build_options"); const builtin = @import("builtin"); const assert = std.debug.assert; +const dwarf = std.dwarf; const fmt = std.fmt; const fs = std.fs; const log = std.log.scoped(.link); @@ -187,7 +188,6 @@ error_flags: File.ErrorFlags = File.ErrorFlags{}, load_commands_dirty: bool = false, sections_order_dirty: bool = false, has_dices: bool = false, -has_stabs: bool = false, /// A helper var to indicate if we are at the start of the incremental updates, or /// already somewhere further along the update-and-run chain. @@ -725,6 +725,7 @@ fn linkOneShot(self: *MachO, comp: *Compilation, prog_node: *std.Progress.Node) man.hash.add(self.base.options.headerpad_max_install_names); man.hash.add(dead_strip); man.hash.add(self.base.options.dead_strip_dylibs); + man.hash.add(self.base.options.strip); man.hash.addListOfBytes(self.base.options.lib_dirs); man.hash.addListOfBytes(self.base.options.framework_dirs); link.hashAddSystemLibs(&man.hash, self.base.options.frameworks); @@ -1388,9 +1389,15 @@ fn parseObject(self: *MachO, path: []const u8) !bool { const name = try self.base.allocator.dupe(u8, path); errdefer self.base.allocator.free(name); + const mtime: u64 = mtime: { + const stat = file.stat() catch break :mtime 0; + break :mtime @intCast(u64, @divFloor(stat.mtime, 1_000_000_000)); + }; + var object = Object{ .name = name, .file = file, + .mtime = mtime, }; object.parse(self.base.allocator, self.base.options.target) catch |err| switch (err) { @@ -2910,7 +2917,6 @@ fn createTentativeDefAtoms(self: *MachO) !void { try atom.contained.append(gpa, .{ .sym_index = global.sym_index, .offset = 0, - .stab = if (object.debug_info) |_| .static else null, }); try object.managed_atoms.append(gpa, atom); @@ -6188,64 +6194,6 @@ fn writeSymtab(self: *MachO) !void { } for (self.objects.items) |object, object_id| { - if (self.has_stabs) { - if (object.debug_info) |_| { - // Open scope - try locals.ensureUnusedCapacity(3); - locals.appendAssumeCapacity(.{ - .n_strx = try self.strtab.insert(gpa, object.tu_comp_dir.?), - .n_type = macho.N_SO, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }); - locals.appendAssumeCapacity(.{ - .n_strx = try self.strtab.insert(gpa, object.tu_name.?), - .n_type = macho.N_SO, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }); - locals.appendAssumeCapacity(.{ - .n_strx = try self.strtab.insert(gpa, object.name), - .n_type = macho.N_OSO, - .n_sect = 0, - .n_desc = 1, - .n_value = object.mtime orelse 0, - }); - - for (object.managed_atoms.items) |atom| { - for (atom.contained.items) |sym_at_off| { - const stab = sym_at_off.stab orelse continue; - const sym_loc = SymbolWithLoc{ - .sym_index = sym_at_off.sym_index, - .file = atom.file, - }; - const sym = self.getSymbol(sym_loc); - if (sym.n_strx == 0) continue; - if (sym.n_desc == N_DESC_GCED) continue; - if (self.symbolIsTemp(sym_loc)) continue; - - const nlists = try stab.asNlists(.{ - .sym_index = sym_at_off.sym_index, - .file = atom.file, - }, self); - defer gpa.free(nlists); - - try locals.appendSlice(nlists); - } - } - - // Close scope - try locals.append(.{ - .n_strx = 0, - .n_type = macho.N_SO, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }); - } - } for (object.symtab.items) |sym, sym_id| { if (sym.n_strx == 0) continue; // no name, skip if (sym.n_desc == N_DESC_GCED) continue; // GCed, skip @@ -6256,6 +6204,10 @@ fn writeSymtab(self: *MachO) !void { out_sym.n_strx = try self.strtab.insert(gpa, self.getSymbolName(sym_loc)); try locals.append(out_sym); } + + if (!self.base.options.strip) { + try self.generateSymbolStabs(object, &locals); + } } var exports = std.ArrayList(macho.nlist_64).init(gpa); @@ -6663,6 +6615,200 @@ pub fn findFirst(comptime T: type, haystack: []const T, start: usize, predicate: return i; } +const DebugInfo = struct { + inner: dwarf.DwarfInfo, + debug_info: []const u8, + debug_abbrev: []const u8, + debug_str: []const u8, + debug_line: []const u8, + debug_line_str: []const u8, + debug_ranges: []const u8, + + pub fn parse(allocator: Allocator, object: Object) !?DebugInfo { + var debug_info = blk: { + const index = object.dwarf_debug_info_index orelse return null; + break :blk try object.getSectionContents(index); + }; + var debug_abbrev = blk: { + const index = object.dwarf_debug_abbrev_index orelse return null; + break :blk try object.getSectionContents(index); + }; + var debug_str = blk: { + const index = object.dwarf_debug_str_index orelse return null; + break :blk try object.getSectionContents(index); + }; + var debug_line = blk: { + const index = object.dwarf_debug_line_index orelse return null; + break :blk try object.getSectionContents(index); + }; + var debug_line_str = blk: { + if (object.dwarf_debug_line_str_index) |ind| { + break :blk try object.getSectionContents(ind); + } + break :blk &[0]u8{}; + }; + var debug_ranges = blk: { + if (object.dwarf_debug_ranges_index) |ind| { + break :blk try object.getSectionContents(ind); + } + break :blk &[0]u8{}; + }; + + var inner: dwarf.DwarfInfo = .{ + .endian = .Little, + .debug_info = debug_info, + .debug_abbrev = debug_abbrev, + .debug_str = debug_str, + .debug_line = debug_line, + .debug_line_str = debug_line_str, + .debug_ranges = debug_ranges, + }; + try dwarf.openDwarfDebugInfo(&inner, allocator); + + return DebugInfo{ + .inner = inner, + .debug_info = debug_info, + .debug_abbrev = debug_abbrev, + .debug_str = debug_str, + .debug_line = debug_line, + .debug_line_str = debug_line_str, + .debug_ranges = debug_ranges, + }; + } + + pub fn deinit(self: *DebugInfo, allocator: Allocator) void { + self.inner.deinit(allocator); + } +}; + +pub fn generateSymbolStabs( + self: *MachO, + object: Object, + locals: *std.ArrayList(macho.nlist_64), +) !void { + assert(!self.base.options.strip); + + const gpa = self.base.allocator; + + log.debug("parsing debug info in '{s}'", .{object.name}); + + var debug_info = (try DebugInfo.parse(gpa, object)) orelse return; + + // We assume there is only one CU. + const compile_unit = debug_info.inner.findCompileUnit(0x0) catch |err| switch (err) { + error.MissingDebugInfo => { + // TODO audit cases with missing debug info and audit our dwarf.zig module. + log.debug("invalid or missing debug info in {s}; skipping", .{object.name}); + return; + }, + else => |e| return e, + }; + const tu_name = try compile_unit.die.getAttrString(&debug_info.inner, dwarf.AT.name); + const tu_comp_dir = try compile_unit.die.getAttrString(&debug_info.inner, dwarf.AT.comp_dir); + const source_symtab = object.getSourceSymtab(); + + // Open scope + try locals.ensureUnusedCapacity(3); + locals.appendAssumeCapacity(.{ + .n_strx = try self.strtab.insert(gpa, tu_comp_dir), + .n_type = macho.N_SO, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }); + locals.appendAssumeCapacity(.{ + .n_strx = try self.strtab.insert(gpa, tu_name), + .n_type = macho.N_SO, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }); + locals.appendAssumeCapacity(.{ + .n_strx = try self.strtab.insert(gpa, object.name), + .n_type = macho.N_OSO, + .n_sect = 0, + .n_desc = 1, + .n_value = object.mtime, + }); + + for (object.managed_atoms.items) |atom| { + for (atom.contained.items) |sym_at_off| { + const sym_loc = SymbolWithLoc{ + .sym_index = sym_at_off.sym_index, + .file = atom.file, + }; + const sym = self.getSymbol(sym_loc); + const sym_name = self.getSymbolName(sym_loc); + if (sym.n_strx == 0) continue; + if (sym.n_desc == N_DESC_GCED) continue; + if (self.symbolIsTemp(sym_loc)) continue; + if (sym_at_off.sym_index >= source_symtab.len) continue; // synthetic, linker generated + + const source_sym = source_symtab[sym_at_off.sym_index]; + const size: ?u64 = size: { + if (source_sym.tentative()) break :size null; + for (debug_info.inner.func_list.items) |func| { + if (func.pc_range) |range| { + if (source_sym.n_value >= range.start and source_sym.n_value < range.end) { + break :size range.end - range.start; + } + } + } + break :size null; + }; + + if (size) |ss| { + try locals.ensureUnusedCapacity(4); + locals.appendAssumeCapacity(.{ + .n_strx = 0, + .n_type = macho.N_BNSYM, + .n_sect = sym.n_sect, + .n_desc = 0, + .n_value = sym.n_value, + }); + locals.appendAssumeCapacity(.{ + .n_strx = try self.strtab.insert(gpa, sym_name), + .n_type = macho.N_FUN, + .n_sect = sym.n_sect, + .n_desc = 0, + .n_value = sym.n_value, + }); + locals.appendAssumeCapacity(.{ + .n_strx = 0, + .n_type = macho.N_FUN, + .n_sect = 0, + .n_desc = 0, + .n_value = ss, + }); + locals.appendAssumeCapacity(.{ + .n_strx = 0, + .n_type = macho.N_ENSYM, + .n_sect = sym.n_sect, + .n_desc = 0, + .n_value = ss, + }); + } else { + try locals.append(.{ + .n_strx = try self.strtab.insert(gpa, sym_name), + .n_type = macho.N_STSYM, + .n_sect = sym.n_sect, + .n_desc = 0, + .n_value = sym.n_value, + }); + } + } + } + + // Close scope + try locals.append(.{ + .n_strx = 0, + .n_type = macho.N_SO, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }); +} + fn snapshotState(self: *MachO) !void { const emit = self.base.options.emit orelse { log.debug("no emit directory found; skipping snapshot...", .{}); diff --git a/src/link/MachO/Atom.zig b/src/link/MachO/Atom.zig index e5a940bdda..7aa4e1093a 100644 --- a/src/link/MachO/Atom.zig +++ b/src/link/MachO/Atom.zig @@ -78,76 +78,6 @@ pub const Binding = struct { pub const SymbolAtOffset = struct { sym_index: u32, offset: u64, - stab: ?Stab = null, -}; - -pub const Stab = union(enum) { - function: u64, - static, - global, - - pub fn asNlists(stab: Stab, sym_loc: SymbolWithLoc, macho_file: *MachO) ![]macho.nlist_64 { - const gpa = macho_file.base.allocator; - - var nlists = std.ArrayList(macho.nlist_64).init(gpa); - defer nlists.deinit(); - - const sym = macho_file.getSymbol(sym_loc); - const sym_name = macho_file.getSymbolName(sym_loc); - switch (stab) { - .function => |size| { - try nlists.ensureUnusedCapacity(4); - nlists.appendAssumeCapacity(.{ - .n_strx = 0, - .n_type = macho.N_BNSYM, - .n_sect = sym.n_sect, - .n_desc = 0, - .n_value = sym.n_value, - }); - nlists.appendAssumeCapacity(.{ - .n_strx = try macho_file.strtab.insert(gpa, sym_name), - .n_type = macho.N_FUN, - .n_sect = sym.n_sect, - .n_desc = 0, - .n_value = sym.n_value, - }); - nlists.appendAssumeCapacity(.{ - .n_strx = 0, - .n_type = macho.N_FUN, - .n_sect = 0, - .n_desc = 0, - .n_value = size, - }); - nlists.appendAssumeCapacity(.{ - .n_strx = 0, - .n_type = macho.N_ENSYM, - .n_sect = sym.n_sect, - .n_desc = 0, - .n_value = size, - }); - }, - .global => { - try nlists.append(.{ - .n_strx = try macho_file.strtab.insert(gpa, sym_name), - .n_type = macho.N_GSYM, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }); - }, - .static => { - try nlists.append(.{ - .n_strx = try macho_file.strtab.insert(gpa, sym_name), - .n_type = macho.N_STSYM, - .n_sect = sym.n_sect, - .n_desc = 0, - .n_value = sym.n_value, - }); - }, - } - - return nlists.toOwnedSlice(); - } }; pub const Relocation = struct { diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index 82d872f68c..2901b54087 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -3,7 +3,6 @@ const Object = @This(); const std = @import("std"); const build_options = @import("build_options"); const assert = std.debug.assert; -const dwarf = std.dwarf; const fs = std.fs; const io = std.io; const log = std.log.scoped(.link); @@ -17,9 +16,11 @@ const Allocator = mem.Allocator; const Atom = @import("Atom.zig"); const MachO = @import("../MachO.zig"); const MatchingSection = MachO.MatchingSection; +const SymbolWithLoc = MachO.SymbolWithLoc; file: fs.File, name: []const u8, +mtime: u64, /// Data contents of the file. Includes sections, and data of load commands. /// Excludes the backing memory for the header and load commands. @@ -51,12 +52,6 @@ symtab: std.ArrayListUnmanaged(macho.nlist_64) = .{}, strtab: []const u8 = &.{}, data_in_code_entries: []const macho.data_in_code_entry = &.{}, -// Debug info -debug_info: ?DebugInfo = null, -tu_name: ?[]const u8 = null, -tu_comp_dir: ?[]const u8 = null, -mtime: ?u64 = null, - sections_as_symbols: std.AutoHashMapUnmanaged(u16, u32) = .{}, /// List of atoms that map to the symbols parsed from this object file. @@ -65,72 +60,6 @@ managed_atoms: std.ArrayListUnmanaged(*Atom) = .{}, /// Table of atoms belonging to this object file indexed by the symbol index. atom_by_index_table: std.AutoHashMapUnmanaged(u32, *Atom) = .{}, -const DebugInfo = struct { - inner: dwarf.DwarfInfo, - debug_info: []const u8, - debug_abbrev: []const u8, - debug_str: []const u8, - debug_line: []const u8, - debug_line_str: []const u8, - debug_ranges: []const u8, - - pub fn parseFromObject(allocator: Allocator, object: *const Object) !?DebugInfo { - var debug_info = blk: { - const index = object.dwarf_debug_info_index orelse return null; - break :blk try object.getSectionContents(index); - }; - var debug_abbrev = blk: { - const index = object.dwarf_debug_abbrev_index orelse return null; - break :blk try object.getSectionContents(index); - }; - var debug_str = blk: { - const index = object.dwarf_debug_str_index orelse return null; - break :blk try object.getSectionContents(index); - }; - var debug_line = blk: { - const index = object.dwarf_debug_line_index orelse return null; - break :blk try object.getSectionContents(index); - }; - var debug_line_str = blk: { - if (object.dwarf_debug_line_str_index) |ind| { - break :blk try object.getSectionContents(ind); - } - break :blk &[0]u8{}; - }; - var debug_ranges = blk: { - if (object.dwarf_debug_ranges_index) |ind| { - break :blk try object.getSectionContents(ind); - } - break :blk &[0]u8{}; - }; - - var inner: dwarf.DwarfInfo = .{ - .endian = .Little, - .debug_info = debug_info, - .debug_abbrev = debug_abbrev, - .debug_str = debug_str, - .debug_line = debug_line, - .debug_line_str = debug_line_str, - .debug_ranges = debug_ranges, - }; - try dwarf.openDwarfDebugInfo(&inner, allocator); - - return DebugInfo{ - .inner = inner, - .debug_info = debug_info, - .debug_abbrev = debug_abbrev, - .debug_str = debug_str, - .debug_line = debug_line, - .debug_line_str = debug_line_str, - .debug_ranges = debug_ranges, - }; - } - - pub fn deinit(self: *DebugInfo, allocator: Allocator) void { - self.inner.deinit(allocator); - } -}; - pub fn deinit(self: *Object, gpa: Allocator) void { for (self.load_commands.items) |*lc| { lc.deinit(gpa); @@ -147,10 +76,6 @@ pub fn deinit(self: *Object, gpa: Allocator) void { self.managed_atoms.deinit(gpa); gpa.free(self.name); - - if (self.debug_info) |*db| { - db.deinit(gpa); - } } pub fn parse(self: *Object, allocator: Allocator, target: std.Target) !void { @@ -253,7 +178,6 @@ pub fn parse(self: *Object, allocator: Allocator, target: std.Target) !void { try self.parseSymtab(allocator); self.parseDataInCode(); - try self.parseDebugInfo(allocator); } const Context = struct { @@ -462,7 +386,6 @@ pub fn splitIntoAtomsOneShot( } break :blk false; }; - macho_file.has_stabs = macho_file.has_stabs or self.debug_info != null; if (subsections_via_symbols and filtered_syms.len > 0) { // If the first nlist does not match the start of the section, @@ -566,7 +489,6 @@ pub fn splitIntoAtomsOneShot( try atom.contained.append(gpa, .{ .sym_index = alias, .offset = 0, - .stab = null, }); try self.atom_by_index_table.put(gpa, alias, atom); } @@ -671,54 +593,17 @@ fn createAtomFromSubsection( // we can properly allocate addresses down the line. // While we're at it, we need to update segment,section mapping of each symbol too. try atom.contained.ensureTotalCapacity(gpa, indexes.len + 1); - - { - const stab: ?Atom.Stab = if (self.debug_info) |di| blk: { - // TODO there has to be a better to handle this. - for (di.inner.func_list.items) |func| { - if (func.pc_range) |range| { - if (sym.n_value >= range.start and sym.n_value < range.end) { - break :blk Atom.Stab{ - .function = range.end - range.start, - }; - } - } - } - // TODO - // if (zld.globals.contains(zld.getString(sym.strx))) break :blk .global; - break :blk .static; - } else null; - - atom.contained.appendAssumeCapacity(.{ - .sym_index = sym_index, - .offset = 0, - .stab = stab, - }); - } + atom.contained.appendAssumeCapacity(.{ + .sym_index = sym_index, + .offset = 0, + }); for (indexes) |inner_sym_index| { const inner_sym = &self.symtab.items[inner_sym_index.index]; inner_sym.n_sect = macho_file.getSectionOrdinal(match); - - const stab: ?Atom.Stab = if (self.debug_info) |di| blk: { - // TODO there has to be a better to handle this. - for (di.inner.func_list.items) |func| { - if (func.pc_range) |range| { - if (inner_sym.n_value >= range.start and inner_sym.n_value < range.end) { - break :blk Atom.Stab{ - .function = range.end - range.start, - }; - } - } - } - // TODO - // if (zld.globals.contains(zld.getString(sym.strx))) break :blk .global; - break :blk .static; - } else null; atom.contained.appendAssumeCapacity(.{ .sym_index = inner_sym_index.index, .offset = inner_sym.n_value - sym.n_value, - .stab = stab, }); try self.atom_by_index_table.putNoClobber(gpa, inner_sym_index.index, atom); @@ -755,7 +640,7 @@ fn parseSymtab(self: *Object, allocator: Allocator) !void { self.strtab = self.contents[symtab.stroff..][0..symtab.strsize]; } -fn getSourceSymtab(self: *Object) []const macho.nlist_64 { +pub fn getSourceSymtab(self: Object) []const macho.nlist_64 { const index = self.symtab_cmd_index orelse return &[0]macho.nlist_64{}; const symtab = self.load_commands.items[index].symtab; const symtab_size = @sizeOf(macho.nlist_64) * symtab.nsyms; @@ -766,38 +651,6 @@ fn getSourceSymtab(self: *Object) []const macho.nlist_64 { ); } -fn parseDebugInfo(self: *Object, allocator: Allocator) !void { - log.debug("parsing debug info in '{s}'", .{self.name}); - - var debug_info = blk: { - var di = try DebugInfo.parseFromObject(allocator, self); - break :blk di orelse return; - }; - - // We assume there is only one CU. - const compile_unit = debug_info.inner.findCompileUnit(0x0) catch |err| switch (err) { - error.MissingDebugInfo => { - // TODO audit cases with missing debug info and audit our dwarf.zig module. - log.debug("invalid or missing debug info in {s}; skipping", .{self.name}); - return; - }, - else => |e| return e, - }; - const name = try compile_unit.die.getAttrString(&debug_info.inner, dwarf.AT.name); - const comp_dir = try compile_unit.die.getAttrString(&debug_info.inner, dwarf.AT.comp_dir); - - self.debug_info = debug_info; - self.tu_name = name; - self.tu_comp_dir = comp_dir; - - if (self.mtime == null) { - self.mtime = mtime: { - const stat = self.file.stat() catch break :mtime 0; - break :mtime @intCast(u64, @divFloor(stat.mtime, 1_000_000_000)); - }; - } -} - fn parseDataInCode(self: *Object) void { const index = self.data_in_code_cmd_index orelse return; const data_in_code = self.load_commands.items[index].linkedit_data; @@ -808,7 +661,7 @@ fn parseDataInCode(self: *Object) void { ); } -fn getSectionContents(self: Object, sect_id: u16) error{Overflow}![]const u8 { +pub fn getSectionContents(self: Object, sect_id: u16) error{Overflow}![]const u8 { const sect = self.getSection(sect_id); const size = math.cast(usize, sect.size) orelse return error.Overflow; log.debug("getting {s},{s} data at 0x{x} - 0x{x}", .{