const Object = @This(); const std = @import("std"); const assert = std.debug.assert; const dwarf = std.dwarf; const fs = std.fs; const io = std.io; const log = std.log.scoped(.object); const macho = std.macho; const math = std.math; const mem = std.mem; const sort = std.sort; const commands = @import("commands.zig"); const segmentName = commands.segmentName; const sectionName = commands.sectionName; const Allocator = mem.Allocator; const LoadCommand = commands.LoadCommand; const MachO = @import("../MachO.zig"); const TextBlock = @import("TextBlock.zig"); file: fs.File, name: []const u8, file_offset: ?u32 = null, header: ?macho.mach_header_64 = null, load_commands: std.ArrayListUnmanaged(LoadCommand) = .{}, segment_cmd_index: ?u16 = null, symtab_cmd_index: ?u16 = null, dysymtab_cmd_index: ?u16 = null, build_version_cmd_index: ?u16 = null, data_in_code_cmd_index: ?u16 = null, text_section_index: ?u16 = null, mod_init_func_section_index: ?u16 = null, // __DWARF segment sections dwarf_debug_info_index: ?u16 = null, dwarf_debug_abbrev_index: ?u16 = null, dwarf_debug_str_index: ?u16 = null, dwarf_debug_line_index: ?u16 = null, dwarf_debug_ranges_index: ?u16 = null, symtab: std.ArrayListUnmanaged(macho.nlist_64) = .{}, strtab: std.ArrayListUnmanaged(u8) = .{}, data_in_code_entries: std.ArrayListUnmanaged(macho.data_in_code_entry) = .{}, // Debug info debug_info: ?DebugInfo = null, tu_name: ?[]const u8 = null, tu_comp_dir: ?[]const u8 = null, mtime: ?u64 = null, text_blocks: std.ArrayListUnmanaged(*TextBlock) = .{}, sections_as_symbols: std.AutoHashMapUnmanaged(u16, u32) = .{}, // TODO symbol mapping and its inverse can probably be simple arrays // instead of hash maps. symbol_mapping: std.AutoHashMapUnmanaged(u32, u32) = .{}, reverse_symbol_mapping: std.AutoHashMapUnmanaged(u32, u32) = .{}, const DebugInfo = struct { inner: dwarf.DwarfInfo, debug_info: []u8, debug_abbrev: []u8, debug_str: []u8, debug_line: []u8, debug_ranges: []u8, pub fn parseFromObject(allocator: *Allocator, object: *const Object) !?DebugInfo { var debug_info = blk: { const index = object.dwarf_debug_info_index orelse return null; break :blk try object.readSection(allocator, index); }; var debug_abbrev = blk: { const index = object.dwarf_debug_abbrev_index orelse return null; break :blk try object.readSection(allocator, index); }; var debug_str = blk: { const index = object.dwarf_debug_str_index orelse return null; break :blk try object.readSection(allocator, index); }; var debug_line = blk: { const index = object.dwarf_debug_line_index orelse return null; break :blk try object.readSection(allocator, index); }; var debug_ranges = blk: { if (object.dwarf_debug_ranges_index) |ind| { break :blk try object.readSection(allocator, ind); } break :blk try allocator.alloc(u8, 0); }; var inner: dwarf.DwarfInfo = .{ .endian = .Little, .debug_info = debug_info, .debug_abbrev = debug_abbrev, .debug_str = debug_str, .debug_line = debug_line, .debug_ranges = debug_ranges, }; try dwarf.openDwarfDebugInfo(&inner, allocator); return DebugInfo{ .inner = inner, .debug_info = debug_info, .debug_abbrev = debug_abbrev, .debug_str = debug_str, .debug_line = debug_line, .debug_ranges = debug_ranges, }; } pub fn deinit(self: *DebugInfo, allocator: *Allocator) void { allocator.free(self.debug_info); allocator.free(self.debug_abbrev); allocator.free(self.debug_str); allocator.free(self.debug_line); allocator.free(self.debug_ranges); self.inner.abbrev_table_list.deinit(); self.inner.compile_unit_list.deinit(); self.inner.func_list.deinit(); } }; pub fn deinit(self: *Object, allocator: *Allocator) void { for (self.load_commands.items) |*lc| { lc.deinit(allocator); } self.load_commands.deinit(allocator); self.data_in_code_entries.deinit(allocator); self.symtab.deinit(allocator); self.strtab.deinit(allocator); self.text_blocks.deinit(allocator); self.sections_as_symbols.deinit(allocator); self.symbol_mapping.deinit(allocator); self.reverse_symbol_mapping.deinit(allocator); allocator.free(self.name); if (self.debug_info) |*db| { db.deinit(allocator); } if (self.tu_name) |n| { allocator.free(n); } if (self.tu_comp_dir) |n| { allocator.free(n); } } pub fn createAndParseFromPath(allocator: *Allocator, target: std.Target, path: []const u8) !?Object { const file = fs.cwd().openFile(path, .{}) catch |err| switch (err) { error.FileNotFound => return null, else => |e| return e, }; errdefer file.close(); const name = try allocator.dupe(u8, path); errdefer allocator.free(name); var object = Object{ .name = name, .file = file, }; object.parse(allocator, target) catch |err| switch (err) { error.EndOfStream, error.NotObject => { object.deinit(allocator); return null; }, else => |e| return e, }; return object; } pub fn parse(self: *Object, allocator: *Allocator, target: std.Target) !void { const reader = self.file.reader(); if (self.file_offset) |offset| { try reader.context.seekTo(offset); } const header = try reader.readStruct(macho.mach_header_64); if (header.filetype != macho.MH_OBJECT) { log.debug("invalid filetype: expected 0x{x}, found 0x{x}", .{ macho.MH_OBJECT, header.filetype, }); return error.NotObject; } const this_arch: std.Target.Cpu.Arch = switch (header.cputype) { macho.CPU_TYPE_ARM64 => .aarch64, macho.CPU_TYPE_X86_64 => .x86_64, else => |value| { log.err("unsupported cpu architecture 0x{x}", .{value}); return error.UnsupportedCpuArchitecture; }, }; if (this_arch != target.cpu.arch) { log.err("mismatched cpu architecture: expected {s}, found {s}", .{ target.cpu.arch, this_arch }); return error.MismatchedCpuArchitecture; } self.header = header; try self.readLoadCommands(allocator, reader); try self.parseSymtab(allocator); try self.parseDataInCode(allocator); try self.parseDebugInfo(allocator); } pub fn readLoadCommands(self: *Object, allocator: *Allocator, reader: anytype) !void { const header = self.header orelse unreachable; // Unreachable here signifies a fatal unexplored condition. const offset = self.file_offset orelse 0; try self.load_commands.ensureCapacity(allocator, header.ncmds); var i: u16 = 0; while (i < header.ncmds) : (i += 1) { var cmd = try LoadCommand.read(allocator, reader); switch (cmd.cmd()) { macho.LC_SEGMENT_64 => { self.segment_cmd_index = i; var seg = cmd.Segment; for (seg.sections.items) |*sect, j| { const index = @intCast(u16, j); const segname = segmentName(sect.*); const sectname = sectionName(sect.*); if (mem.eql(u8, segname, "__DWARF")) { if (mem.eql(u8, sectname, "__debug_info")) { self.dwarf_debug_info_index = index; } else if (mem.eql(u8, sectname, "__debug_abbrev")) { self.dwarf_debug_abbrev_index = index; } else if (mem.eql(u8, sectname, "__debug_str")) { self.dwarf_debug_str_index = index; } else if (mem.eql(u8, sectname, "__debug_line")) { self.dwarf_debug_line_index = index; } else if (mem.eql(u8, sectname, "__debug_ranges")) { self.dwarf_debug_ranges_index = index; } } else if (mem.eql(u8, segname, "__TEXT")) { if (mem.eql(u8, sectname, "__text")) { self.text_section_index = index; } } else if (mem.eql(u8, segname, "__DATA")) { if (mem.eql(u8, sectname, "__mod_init_func")) { self.mod_init_func_section_index = index; } } sect.offset += offset; if (sect.reloff > 0) { sect.reloff += offset; } } seg.inner.fileoff += offset; }, macho.LC_SYMTAB => { self.symtab_cmd_index = i; cmd.Symtab.symoff += offset; cmd.Symtab.stroff += offset; }, macho.LC_DYSYMTAB => { self.dysymtab_cmd_index = i; }, macho.LC_BUILD_VERSION => { self.build_version_cmd_index = i; }, macho.LC_DATA_IN_CODE => { self.data_in_code_cmd_index = i; cmd.LinkeditData.dataoff += offset; }, else => { log.debug("Unknown load command detected: 0x{x}.", .{cmd.cmd()}); }, } self.load_commands.appendAssumeCapacity(cmd); } } const NlistWithIndex = struct { nlist: macho.nlist_64, index: u32, fn lessThan(_: void, lhs: NlistWithIndex, rhs: NlistWithIndex) bool { // We sort by type: defined < undefined, and // afterwards by address in each group. Normally, dysymtab should // be enough to guarantee the sort, but turns out not every compiler // is kind enough to specify the symbols in the correct order. if (MachO.symbolIsSect(lhs.nlist)) { if (MachO.symbolIsSect(rhs.nlist)) { // Same group, sort by address. return lhs.nlist.n_value < rhs.nlist.n_value; } else { return true; } } else { return false; } } fn filterInSection(symbols: []NlistWithIndex, sect: macho.section_64) []NlistWithIndex { const Predicate = struct { addr: u64, pub fn predicate(self: @This(), symbol: NlistWithIndex) bool { return symbol.nlist.n_value >= self.addr; } }; const start = MachO.findFirst(NlistWithIndex, symbols, 0, Predicate{ .addr = sect.addr }); const end = MachO.findFirst(NlistWithIndex, symbols, start, Predicate{ .addr = sect.addr + sect.size }); return symbols[start..end]; } }; fn filterDice(dices: []macho.data_in_code_entry, start_addr: u64, end_addr: u64) []macho.data_in_code_entry { const Predicate = struct { addr: u64, pub fn predicate(self: @This(), dice: macho.data_in_code_entry) bool { return dice.offset >= self.addr; } }; const start = MachO.findFirst(macho.data_in_code_entry, dices, 0, Predicate{ .addr = start_addr }); const end = MachO.findFirst(macho.data_in_code_entry, dices, start, Predicate{ .addr = end_addr }); return dices[start..end]; } const Context = struct { allocator: *Allocator, object: *Object, macho_file: *MachO, match: MachO.MatchingSection, }; const TextBlockParser = struct { section: macho.section_64, code: []u8, relocs: []macho.relocation_info, nlists: []NlistWithIndex, index: u32 = 0, fn peek(self: TextBlockParser) ?NlistWithIndex { return if (self.index + 1 < self.nlists.len) self.nlists[self.index + 1] else null; } fn lessThanBySeniority(context: Context, lhs: NlistWithIndex, rhs: NlistWithIndex) bool { if (!MachO.symbolIsExt(rhs.nlist)) { return MachO.symbolIsTemp(lhs.nlist, context.object.getString(lhs.nlist.n_strx)); } else if (MachO.symbolIsPext(rhs.nlist) or MachO.symbolIsWeakDef(rhs.nlist)) { return !MachO.symbolIsExt(lhs.nlist); } else { return false; } } pub fn next(self: *TextBlockParser, context: Context) !?*TextBlock { if (self.index == self.nlists.len) return null; var aliases = std.ArrayList(NlistWithIndex).init(context.allocator); defer aliases.deinit(); const next_nlist: ?NlistWithIndex = blk: while (true) { const curr_nlist = self.nlists[self.index]; try aliases.append(curr_nlist); if (self.peek()) |next_nlist| { if (curr_nlist.nlist.n_value == next_nlist.nlist.n_value) { self.index += 1; continue; } break :blk next_nlist; } break :blk null; } else null; for (aliases.items) |*nlist_with_index| { nlist_with_index.index = context.object.symbol_mapping.get(nlist_with_index.index) orelse unreachable; } if (aliases.items.len > 1) { // Bubble-up senior symbol as the main link to the text block. sort.sort( NlistWithIndex, aliases.items, context, TextBlockParser.lessThanBySeniority, ); } const senior_nlist = aliases.pop(); const senior_sym = &context.macho_file.locals.items[senior_nlist.index]; senior_sym.n_sect = @intCast(u8, context.macho_file.section_ordinals.getIndex(context.match).? + 1); const start_addr = senior_nlist.nlist.n_value - self.section.addr; const end_addr = if (next_nlist) |n| n.nlist.n_value - self.section.addr else self.section.size; const code = self.code[start_addr..end_addr]; const size = code.len; const max_align = self.section.@"align"; const actual_align = if (senior_nlist.nlist.n_value > 0) math.min(@ctz(u64, senior_nlist.nlist.n_value), max_align) else max_align; const stab: ?TextBlock.Stab = if (context.object.debug_info) |di| blk: { // TODO there has to be a better to handle this. for (di.inner.func_list.items) |func| { if (func.pc_range) |range| { if (senior_nlist.nlist.n_value >= range.start and senior_nlist.nlist.n_value < range.end) { break :blk TextBlock.Stab{ .function = range.end - range.start, }; } } } // TODO // if (self.macho_file.globals.contains(self.macho_file.getString(senior_sym.strx))) break :blk .global; break :blk .static; } else null; const block = try context.allocator.create(TextBlock); block.* = TextBlock.empty; block.local_sym_index = senior_nlist.index; block.stab = stab; block.size = size; block.alignment = actual_align; try context.macho_file.managed_blocks.append(context.allocator, block); try block.code.appendSlice(context.allocator, code); try block.aliases.ensureTotalCapacity(context.allocator, aliases.items.len); for (aliases.items) |alias| { block.aliases.appendAssumeCapacity(alias.index); const sym = &context.macho_file.locals.items[alias.index]; sym.n_sect = @intCast(u8, context.macho_file.section_ordinals.getIndex(context.match).? + 1); } try block.parseRelocs(self.relocs, .{ .base_addr = start_addr, .allocator = context.allocator, .object = context.object, .macho_file = context.macho_file, }); if (context.macho_file.has_dices) { const dices = filterDice( context.object.data_in_code_entries.items, senior_nlist.nlist.n_value, senior_nlist.nlist.n_value + size, ); try block.dices.ensureTotalCapacity(context.allocator, dices.len); for (dices) |dice| { block.dices.appendAssumeCapacity(.{ .offset = dice.offset - try math.cast(u32, senior_nlist.nlist.n_value), .length = dice.length, .kind = dice.kind, }); } } self.index += 1; return block; } }; pub fn parseTextBlocks( self: *Object, allocator: *Allocator, object_id: u16, macho_file: *MachO, ) !void { const seg = self.load_commands.items[self.segment_cmd_index.?].Segment; log.debug("analysing {s}", .{self.name}); // You would expect that the symbol table is at least pre-sorted based on symbol's type: // local < extern defined < undefined. Unfortunately, this is not guaranteed! For instance, // the GO compiler does not necessarily respect that therefore we sort immediately by type // and address within. var sorted_all_nlists = std.ArrayList(NlistWithIndex).init(allocator); defer sorted_all_nlists.deinit(); try sorted_all_nlists.ensureTotalCapacity(self.symtab.items.len); for (self.symtab.items) |nlist, index| { sorted_all_nlists.appendAssumeCapacity(.{ .nlist = nlist, .index = @intCast(u32, index), }); } sort.sort(NlistWithIndex, sorted_all_nlists.items, {}, NlistWithIndex.lessThan); // Well, shit, sometimes compilers skip the dysymtab load command altogether, meaning we // have to infer the start of undef section in the symtab ourselves. const iundefsym = if (self.dysymtab_cmd_index) |cmd_index| blk: { const dysymtab = self.load_commands.items[cmd_index].Dysymtab; break :blk dysymtab.iundefsym; } else blk: { var iundefsym: usize = sorted_all_nlists.items.len; while (iundefsym > 0) : (iundefsym -= 1) { const nlist = sorted_all_nlists.items[iundefsym]; if (MachO.symbolIsSect(nlist.nlist)) break; } break :blk iundefsym; }; // We only care about defined symbols, so filter every other out. const sorted_nlists = sorted_all_nlists.items[0..iundefsym]; for (seg.sections.items) |sect, id| { const sect_id = @intCast(u8, id); log.debug("putting section '{s},{s}' as a TextBlock", .{ segmentName(sect), sectionName(sect), }); // Get matching segment/section in the final artifact. const match = (try macho_file.getMatchingSection(sect)) orelse { log.debug("unhandled section", .{}); continue; }; // Read section's code var code = try allocator.alloc(u8, @intCast(usize, sect.size)); defer allocator.free(code); _ = try self.file.preadAll(code, sect.offset); // Read section's list of relocations var raw_relocs = try allocator.alloc(u8, sect.nreloc * @sizeOf(macho.relocation_info)); defer allocator.free(raw_relocs); _ = try self.file.preadAll(raw_relocs, sect.reloff); const relocs = mem.bytesAsSlice(macho.relocation_info, raw_relocs); // Symbols within this section only. const filtered_nlists = NlistWithIndex.filterInSection(sorted_nlists, sect); // In release mode, if the object file was generated with dead code stripping optimisations, // note it now and parse sections as atoms. const is_splittable = blk: { if (macho_file.base.options.optimize_mode == .Debug) break :blk false; break :blk self.header.?.flags & macho.MH_SUBSECTIONS_VIA_SYMBOLS != 0; }; macho_file.has_dices = blk: { if (self.text_section_index) |index| { if (index != id) break :blk false; if (self.data_in_code_entries.items.len == 0) break :blk false; break :blk true; } break :blk false; }; macho_file.has_stabs = macho_file.has_stabs or self.debug_info != null; next: { if (is_splittable) blocks: { if (filtered_nlists.len == 0) break :blocks; // If the first nlist does not match the start of the section, // then we need to encapsulate the memory range [section start, first symbol) // as a temporary symbol and insert the matching TextBlock. const first_nlist = filtered_nlists[0].nlist; if (first_nlist.n_value > sect.addr) { const sym_name = try std.fmt.allocPrint(allocator, "l_{s}_{s}_{s}", .{ self.name, segmentName(sect), sectionName(sect), }); defer allocator.free(sym_name); const block_local_sym_index = self.sections_as_symbols.get(sect_id) orelse blk: { const block_local_sym_index = @intCast(u32, macho_file.locals.items.len); try macho_file.locals.append(allocator, .{ .n_strx = try macho_file.makeString(sym_name), .n_type = macho.N_SECT, .n_sect = @intCast(u8, macho_file.section_ordinals.getIndex(match).? + 1), .n_desc = 0, .n_value = sect.addr, }); try self.sections_as_symbols.putNoClobber(allocator, sect_id, block_local_sym_index); break :blk block_local_sym_index; }; const block_code = code[0 .. first_nlist.n_value - sect.addr]; const block_size = block_code.len; const block = try allocator.create(TextBlock); block.* = TextBlock.empty; block.local_sym_index = block_local_sym_index; block.size = block_size; block.alignment = sect.@"align"; try macho_file.managed_blocks.append(allocator, block); try block.code.appendSlice(allocator, block_code); try block.parseRelocs(relocs, .{ .base_addr = 0, .allocator = allocator, .object = self, .macho_file = macho_file, }); if (macho_file.has_dices) { const dices = filterDice(self.data_in_code_entries.items, sect.addr, sect.addr + block_size); try block.dices.ensureTotalCapacity(allocator, dices.len); for (dices) |dice| { block.dices.appendAssumeCapacity(.{ .offset = dice.offset - try math.cast(u32, sect.addr), .length = dice.length, .kind = dice.kind, }); } } // Update target section's metadata // TODO should we update segment's size here too? // How does it tie with incremental space allocs? const tseg = &macho_file.load_commands.items[match.seg].Segment; const tsect = &tseg.sections.items[match.sect]; const new_alignment = math.max(tsect.@"align", block.alignment); const new_alignment_pow_2 = try math.powi(u32, 2, new_alignment); const new_size = mem.alignForwardGeneric(u64, tsect.size, new_alignment_pow_2) + block.size; tsect.size = new_size; tsect.@"align" = new_alignment; if (macho_file.blocks.getPtr(match)) |last| { last.*.next = block; block.prev = last.*; last.* = block; } else { try macho_file.blocks.putNoClobber(allocator, match, block); } try self.text_blocks.append(allocator, block); } var parser = TextBlockParser{ .section = sect, .code = code, .relocs = relocs, .nlists = filtered_nlists, }; while (try parser.next(.{ .allocator = allocator, .object = self, .macho_file = macho_file, .match = match, })) |block| { const sym = macho_file.locals.items[block.local_sym_index]; const is_ext = blk: { const orig_sym_id = self.reverse_symbol_mapping.get(block.local_sym_index) orelse unreachable; break :blk MachO.symbolIsExt(self.symtab.items[orig_sym_id]); }; if (is_ext) { if (macho_file.symbol_resolver.get(sym.n_strx)) |resolv| { assert(resolv.where == .global); if (resolv.file != object_id) { log.debug("deduping definition of {s} in {s}", .{ macho_file.getString(sym.n_strx), self.name, }); log.debug(" already defined in {s}", .{ macho_file.objects.items[resolv.file].name, }); continue; } } } if (sym.n_value == sect.addr) { if (self.sections_as_symbols.get(sect_id)) |alias| { // In x86_64 relocs, it can so happen that the compiler refers to the same // atom by both the actual assigned symbol and the start of the section. In this // case, we need to link the two together so add an alias. try block.aliases.append(allocator, alias); } } // Update target section's metadata // TODO should we update segment's size here too? // How does it tie with incremental space allocs? const tseg = &macho_file.load_commands.items[match.seg].Segment; const tsect = &tseg.sections.items[match.sect]; const new_alignment = math.max(tsect.@"align", block.alignment); const new_alignment_pow_2 = try math.powi(u32, 2, new_alignment); const new_size = mem.alignForwardGeneric(u64, tsect.size, new_alignment_pow_2) + block.size; tsect.size = new_size; tsect.@"align" = new_alignment; if (macho_file.blocks.getPtr(match)) |last| { last.*.next = block; block.prev = last.*; last.* = block; } else { try macho_file.blocks.putNoClobber(allocator, match, block); } try self.text_blocks.append(allocator, block); } break :next; } // Since there is no symbol to refer to this block, we create // a temp one, unless we already did that when working out the relocations // of other text blocks. const sym_name = try std.fmt.allocPrint(allocator, "l_{s}_{s}_{s}", .{ self.name, segmentName(sect), sectionName(sect), }); defer allocator.free(sym_name); const block_local_sym_index = self.sections_as_symbols.get(sect_id) orelse blk: { const block_local_sym_index = @intCast(u32, macho_file.locals.items.len); try macho_file.locals.append(allocator, .{ .n_strx = try macho_file.makeString(sym_name), .n_type = macho.N_SECT, .n_sect = @intCast(u8, macho_file.section_ordinals.getIndex(match).? + 1), .n_desc = 0, .n_value = sect.addr, }); try self.sections_as_symbols.putNoClobber(allocator, sect_id, block_local_sym_index); break :blk block_local_sym_index; }; const block = try allocator.create(TextBlock); block.* = TextBlock.empty; block.local_sym_index = block_local_sym_index; block.size = sect.size; block.alignment = sect.@"align"; try macho_file.managed_blocks.append(allocator, block); try block.code.appendSlice(allocator, code); try block.parseRelocs(relocs, .{ .base_addr = 0, .allocator = allocator, .object = self, .macho_file = macho_file, }); if (macho_file.has_dices) { const dices = filterDice(self.data_in_code_entries.items, sect.addr, sect.addr + sect.size); try block.dices.ensureTotalCapacity(allocator, dices.len); for (dices) |dice| { block.dices.appendAssumeCapacity(.{ .offset = dice.offset - try math.cast(u32, sect.addr), .length = dice.length, .kind = dice.kind, }); } } // Since this is block gets a helper local temporary symbol that didn't exist // in the object file which encompasses the entire section, we need traverse // the filtered symbols and note which symbol is contained within so that // we can properly allocate addresses down the line. // While we're at it, we need to update segment,section mapping of each symbol too. try block.contained.ensureTotalCapacity(allocator, filtered_nlists.len); for (filtered_nlists) |nlist_with_index| { const nlist = nlist_with_index.nlist; const local_sym_index = self.symbol_mapping.get(nlist_with_index.index) orelse unreachable; const local = &macho_file.locals.items[local_sym_index]; local.n_sect = @intCast(u8, macho_file.section_ordinals.getIndex(match).? + 1); const stab: ?TextBlock.Stab = if (self.debug_info) |di| blk: { // TODO there has to be a better to handle this. for (di.inner.func_list.items) |func| { if (func.pc_range) |range| { if (nlist.n_value >= range.start and nlist.n_value < range.end) { break :blk TextBlock.Stab{ .function = range.end - range.start, }; } } } // TODO // if (zld.globals.contains(zld.getString(sym.strx))) break :blk .global; break :blk .static; } else null; block.contained.appendAssumeCapacity(.{ .local_sym_index = local_sym_index, .offset = nlist.n_value - sect.addr, .stab = stab, }); } // Update target section's metadata // TODO should we update segment's size here too? // How does it tie with incremental space allocs? const tseg = &macho_file.load_commands.items[match.seg].Segment; const tsect = &tseg.sections.items[match.sect]; const new_alignment = math.max(tsect.@"align", block.alignment); const new_alignment_pow_2 = try math.powi(u32, 2, new_alignment); const new_size = mem.alignForwardGeneric(u64, tsect.size, new_alignment_pow_2) + block.size; tsect.size = new_size; tsect.@"align" = new_alignment; if (macho_file.blocks.getPtr(match)) |last| { last.*.next = block; block.prev = last.*; last.* = block; } else { try macho_file.blocks.putNoClobber(allocator, match, block); } try self.text_blocks.append(allocator, block); } } } fn parseSymtab(self: *Object, allocator: *Allocator) !void { const index = self.symtab_cmd_index orelse return; const symtab_cmd = self.load_commands.items[index].Symtab; var symtab = try allocator.alloc(u8, @sizeOf(macho.nlist_64) * symtab_cmd.nsyms); defer allocator.free(symtab); _ = try self.file.preadAll(symtab, symtab_cmd.symoff); const slice = @alignCast(@alignOf(macho.nlist_64), mem.bytesAsSlice(macho.nlist_64, symtab)); try self.symtab.appendSlice(allocator, slice); var strtab = try allocator.alloc(u8, symtab_cmd.strsize); defer allocator.free(strtab); _ = try self.file.preadAll(strtab, symtab_cmd.stroff); try self.strtab.appendSlice(allocator, strtab); } pub fn parseDebugInfo(self: *Object, allocator: *Allocator) !void { log.debug("parsing debug info in '{s}'", .{self.name}); var debug_info = blk: { var di = try DebugInfo.parseFromObject(allocator, self); break :blk di orelse return; }; // We assume there is only one CU. const compile_unit = debug_info.inner.findCompileUnit(0x0) catch |err| switch (err) { error.MissingDebugInfo => { // TODO audit cases with missing debug info and audit our dwarf.zig module. log.debug("invalid or missing debug info in {s}; skipping", .{self.name}); return; }, else => |e| return e, }; const name = try compile_unit.die.getAttrString(&debug_info.inner, dwarf.AT_name); const comp_dir = try compile_unit.die.getAttrString(&debug_info.inner, dwarf.AT_comp_dir); self.debug_info = debug_info; self.tu_name = try allocator.dupe(u8, name); self.tu_comp_dir = try allocator.dupe(u8, comp_dir); if (self.mtime == null) { self.mtime = mtime: { const stat = self.file.stat() catch break :mtime 0; break :mtime @intCast(u64, @divFloor(stat.mtime, 1_000_000_000)); }; } } pub fn parseDataInCode(self: *Object, allocator: *Allocator) !void { const index = self.data_in_code_cmd_index orelse return; const data_in_code = self.load_commands.items[index].LinkeditData; var buffer = try allocator.alloc(u8, data_in_code.datasize); defer allocator.free(buffer); _ = try self.file.preadAll(buffer, data_in_code.dataoff); var stream = io.fixedBufferStream(buffer); var reader = stream.reader(); while (true) { const dice = reader.readStruct(macho.data_in_code_entry) catch |err| switch (err) { error.EndOfStream => break, else => |e| return e, }; try self.data_in_code_entries.append(allocator, dice); } } fn readSection(self: Object, allocator: *Allocator, index: u16) ![]u8 { const seg = self.load_commands.items[self.segment_cmd_index.?].Segment; const sect = seg.sections.items[index]; var buffer = try allocator.alloc(u8, @intCast(usize, sect.size)); _ = try self.file.preadAll(buffer, sect.offset); return buffer; } pub fn getString(self: Object, off: u32) []const u8 { assert(off < self.strtab.items.len); return mem.spanZ(@ptrCast([*:0]const u8, self.strtab.items.ptr + off)); }