mirror of
https://github.com/ziglang/zig.git
synced 2025-12-30 10:03:21 +00:00
912 lines
36 KiB
Zig
912 lines
36 KiB
Zig
const Object = @This();
|
|
|
|
const std = @import("std");
|
|
const assert = std.debug.assert;
|
|
const dwarf = std.dwarf;
|
|
const fs = std.fs;
|
|
const io = std.io;
|
|
const log = std.log.scoped(.object);
|
|
const macho = std.macho;
|
|
const math = std.math;
|
|
const mem = std.mem;
|
|
const sort = std.sort;
|
|
const commands = @import("commands.zig");
|
|
const segmentName = commands.segmentName;
|
|
const sectionName = commands.sectionName;
|
|
|
|
const Allocator = mem.Allocator;
|
|
const LoadCommand = commands.LoadCommand;
|
|
const MachO = @import("../MachO.zig");
|
|
const TextBlock = @import("TextBlock.zig");
|
|
|
|
file: fs.File,
|
|
name: []const u8,
|
|
|
|
file_offset: ?u32 = null,
|
|
|
|
header: ?macho.mach_header_64 = null,
|
|
|
|
load_commands: std.ArrayListUnmanaged(LoadCommand) = .{},
|
|
|
|
segment_cmd_index: ?u16 = null,
|
|
symtab_cmd_index: ?u16 = null,
|
|
dysymtab_cmd_index: ?u16 = null,
|
|
build_version_cmd_index: ?u16 = null,
|
|
data_in_code_cmd_index: ?u16 = null,
|
|
|
|
text_section_index: ?u16 = null,
|
|
mod_init_func_section_index: ?u16 = null,
|
|
|
|
// __DWARF segment sections
|
|
dwarf_debug_info_index: ?u16 = null,
|
|
dwarf_debug_abbrev_index: ?u16 = null,
|
|
dwarf_debug_str_index: ?u16 = null,
|
|
dwarf_debug_line_index: ?u16 = null,
|
|
dwarf_debug_ranges_index: ?u16 = null,
|
|
|
|
symtab: std.ArrayListUnmanaged(macho.nlist_64) = .{},
|
|
strtab: std.ArrayListUnmanaged(u8) = .{},
|
|
data_in_code_entries: std.ArrayListUnmanaged(macho.data_in_code_entry) = .{},
|
|
|
|
// Debug info
|
|
debug_info: ?DebugInfo = null,
|
|
tu_name: ?[]const u8 = null,
|
|
tu_comp_dir: ?[]const u8 = null,
|
|
mtime: ?u64 = null,
|
|
|
|
text_blocks: std.ArrayListUnmanaged(*TextBlock) = .{},
|
|
sections_as_symbols: std.AutoHashMapUnmanaged(u16, u32) = .{},
|
|
|
|
// TODO symbol mapping and its inverse can probably be simple arrays
|
|
// instead of hash maps.
|
|
symbol_mapping: std.AutoHashMapUnmanaged(u32, u32) = .{},
|
|
reverse_symbol_mapping: std.AutoHashMapUnmanaged(u32, u32) = .{},
|
|
|
|
const DebugInfo = struct {
|
|
inner: dwarf.DwarfInfo,
|
|
debug_info: []u8,
|
|
debug_abbrev: []u8,
|
|
debug_str: []u8,
|
|
debug_line: []u8,
|
|
debug_ranges: []u8,
|
|
|
|
pub fn parseFromObject(allocator: *Allocator, object: *const Object) !?DebugInfo {
|
|
var debug_info = blk: {
|
|
const index = object.dwarf_debug_info_index orelse return null;
|
|
break :blk try object.readSection(allocator, index);
|
|
};
|
|
var debug_abbrev = blk: {
|
|
const index = object.dwarf_debug_abbrev_index orelse return null;
|
|
break :blk try object.readSection(allocator, index);
|
|
};
|
|
var debug_str = blk: {
|
|
const index = object.dwarf_debug_str_index orelse return null;
|
|
break :blk try object.readSection(allocator, index);
|
|
};
|
|
var debug_line = blk: {
|
|
const index = object.dwarf_debug_line_index orelse return null;
|
|
break :blk try object.readSection(allocator, index);
|
|
};
|
|
var debug_ranges = blk: {
|
|
if (object.dwarf_debug_ranges_index) |ind| {
|
|
break :blk try object.readSection(allocator, ind);
|
|
}
|
|
break :blk try allocator.alloc(u8, 0);
|
|
};
|
|
|
|
var inner: dwarf.DwarfInfo = .{
|
|
.endian = .Little,
|
|
.debug_info = debug_info,
|
|
.debug_abbrev = debug_abbrev,
|
|
.debug_str = debug_str,
|
|
.debug_line = debug_line,
|
|
.debug_ranges = debug_ranges,
|
|
};
|
|
try dwarf.openDwarfDebugInfo(&inner, allocator);
|
|
|
|
return DebugInfo{
|
|
.inner = inner,
|
|
.debug_info = debug_info,
|
|
.debug_abbrev = debug_abbrev,
|
|
.debug_str = debug_str,
|
|
.debug_line = debug_line,
|
|
.debug_ranges = debug_ranges,
|
|
};
|
|
}
|
|
|
|
pub fn deinit(self: *DebugInfo, allocator: *Allocator) void {
|
|
allocator.free(self.debug_info);
|
|
allocator.free(self.debug_abbrev);
|
|
allocator.free(self.debug_str);
|
|
allocator.free(self.debug_line);
|
|
allocator.free(self.debug_ranges);
|
|
self.inner.abbrev_table_list.deinit();
|
|
self.inner.compile_unit_list.deinit();
|
|
self.inner.func_list.deinit();
|
|
}
|
|
};
|
|
|
|
pub fn deinit(self: *Object, allocator: *Allocator) void {
|
|
for (self.load_commands.items) |*lc| {
|
|
lc.deinit(allocator);
|
|
}
|
|
self.load_commands.deinit(allocator);
|
|
self.data_in_code_entries.deinit(allocator);
|
|
self.symtab.deinit(allocator);
|
|
self.strtab.deinit(allocator);
|
|
self.text_blocks.deinit(allocator);
|
|
self.sections_as_symbols.deinit(allocator);
|
|
self.symbol_mapping.deinit(allocator);
|
|
self.reverse_symbol_mapping.deinit(allocator);
|
|
allocator.free(self.name);
|
|
|
|
if (self.debug_info) |*db| {
|
|
db.deinit(allocator);
|
|
}
|
|
|
|
if (self.tu_name) |n| {
|
|
allocator.free(n);
|
|
}
|
|
|
|
if (self.tu_comp_dir) |n| {
|
|
allocator.free(n);
|
|
}
|
|
}
|
|
|
|
pub fn createAndParseFromPath(allocator: *Allocator, target: std.Target, path: []const u8) !?Object {
|
|
const file = fs.cwd().openFile(path, .{}) catch |err| switch (err) {
|
|
error.FileNotFound => return null,
|
|
else => |e| return e,
|
|
};
|
|
errdefer file.close();
|
|
|
|
const name = try allocator.dupe(u8, path);
|
|
errdefer allocator.free(name);
|
|
|
|
var object = Object{
|
|
.name = name,
|
|
.file = file,
|
|
};
|
|
|
|
object.parse(allocator, target) catch |err| switch (err) {
|
|
error.EndOfStream, error.NotObject => {
|
|
object.deinit(allocator);
|
|
return null;
|
|
},
|
|
else => |e| return e,
|
|
};
|
|
|
|
return object;
|
|
}
|
|
|
|
pub fn parse(self: *Object, allocator: *Allocator, target: std.Target) !void {
|
|
const reader = self.file.reader();
|
|
if (self.file_offset) |offset| {
|
|
try reader.context.seekTo(offset);
|
|
}
|
|
|
|
const header = try reader.readStruct(macho.mach_header_64);
|
|
if (header.filetype != macho.MH_OBJECT) {
|
|
log.debug("invalid filetype: expected 0x{x}, found 0x{x}", .{
|
|
macho.MH_OBJECT,
|
|
header.filetype,
|
|
});
|
|
return error.NotObject;
|
|
}
|
|
|
|
const this_arch: std.Target.Cpu.Arch = switch (header.cputype) {
|
|
macho.CPU_TYPE_ARM64 => .aarch64,
|
|
macho.CPU_TYPE_X86_64 => .x86_64,
|
|
else => |value| {
|
|
log.err("unsupported cpu architecture 0x{x}", .{value});
|
|
return error.UnsupportedCpuArchitecture;
|
|
},
|
|
};
|
|
if (this_arch != target.cpu.arch) {
|
|
log.err("mismatched cpu architecture: expected {s}, found {s}", .{ target.cpu.arch, this_arch });
|
|
return error.MismatchedCpuArchitecture;
|
|
}
|
|
|
|
self.header = header;
|
|
|
|
try self.readLoadCommands(allocator, reader);
|
|
try self.parseSymtab(allocator);
|
|
try self.parseDataInCode(allocator);
|
|
try self.parseDebugInfo(allocator);
|
|
}
|
|
|
|
pub fn readLoadCommands(self: *Object, allocator: *Allocator, reader: anytype) !void {
|
|
const header = self.header orelse unreachable; // Unreachable here signifies a fatal unexplored condition.
|
|
const offset = self.file_offset orelse 0;
|
|
|
|
try self.load_commands.ensureCapacity(allocator, header.ncmds);
|
|
|
|
var i: u16 = 0;
|
|
while (i < header.ncmds) : (i += 1) {
|
|
var cmd = try LoadCommand.read(allocator, reader);
|
|
switch (cmd.cmd()) {
|
|
macho.LC_SEGMENT_64 => {
|
|
self.segment_cmd_index = i;
|
|
var seg = cmd.Segment;
|
|
for (seg.sections.items) |*sect, j| {
|
|
const index = @intCast(u16, j);
|
|
const segname = segmentName(sect.*);
|
|
const sectname = sectionName(sect.*);
|
|
if (mem.eql(u8, segname, "__DWARF")) {
|
|
if (mem.eql(u8, sectname, "__debug_info")) {
|
|
self.dwarf_debug_info_index = index;
|
|
} else if (mem.eql(u8, sectname, "__debug_abbrev")) {
|
|
self.dwarf_debug_abbrev_index = index;
|
|
} else if (mem.eql(u8, sectname, "__debug_str")) {
|
|
self.dwarf_debug_str_index = index;
|
|
} else if (mem.eql(u8, sectname, "__debug_line")) {
|
|
self.dwarf_debug_line_index = index;
|
|
} else if (mem.eql(u8, sectname, "__debug_ranges")) {
|
|
self.dwarf_debug_ranges_index = index;
|
|
}
|
|
} else if (mem.eql(u8, segname, "__TEXT")) {
|
|
if (mem.eql(u8, sectname, "__text")) {
|
|
self.text_section_index = index;
|
|
}
|
|
} else if (mem.eql(u8, segname, "__DATA")) {
|
|
if (mem.eql(u8, sectname, "__mod_init_func")) {
|
|
self.mod_init_func_section_index = index;
|
|
}
|
|
}
|
|
|
|
sect.offset += offset;
|
|
if (sect.reloff > 0) {
|
|
sect.reloff += offset;
|
|
}
|
|
}
|
|
|
|
seg.inner.fileoff += offset;
|
|
},
|
|
macho.LC_SYMTAB => {
|
|
self.symtab_cmd_index = i;
|
|
cmd.Symtab.symoff += offset;
|
|
cmd.Symtab.stroff += offset;
|
|
},
|
|
macho.LC_DYSYMTAB => {
|
|
self.dysymtab_cmd_index = i;
|
|
},
|
|
macho.LC_BUILD_VERSION => {
|
|
self.build_version_cmd_index = i;
|
|
},
|
|
macho.LC_DATA_IN_CODE => {
|
|
self.data_in_code_cmd_index = i;
|
|
cmd.LinkeditData.dataoff += offset;
|
|
},
|
|
else => {
|
|
log.debug("Unknown load command detected: 0x{x}.", .{cmd.cmd()});
|
|
},
|
|
}
|
|
self.load_commands.appendAssumeCapacity(cmd);
|
|
}
|
|
}
|
|
|
|
const NlistWithIndex = struct {
|
|
nlist: macho.nlist_64,
|
|
index: u32,
|
|
|
|
fn lessThan(_: void, lhs: NlistWithIndex, rhs: NlistWithIndex) bool {
|
|
// We sort by type: defined < undefined, and
|
|
// afterwards by address in each group. Normally, dysymtab should
|
|
// be enough to guarantee the sort, but turns out not every compiler
|
|
// is kind enough to specify the symbols in the correct order.
|
|
if (MachO.symbolIsSect(lhs.nlist)) {
|
|
if (MachO.symbolIsSect(rhs.nlist)) {
|
|
// Same group, sort by address.
|
|
return lhs.nlist.n_value < rhs.nlist.n_value;
|
|
} else {
|
|
return true;
|
|
}
|
|
} else {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
fn filterInSection(symbols: []NlistWithIndex, sect: macho.section_64) []NlistWithIndex {
|
|
const Predicate = struct {
|
|
addr: u64,
|
|
|
|
pub fn predicate(self: @This(), symbol: NlistWithIndex) bool {
|
|
return symbol.nlist.n_value >= self.addr;
|
|
}
|
|
};
|
|
|
|
const start = MachO.findFirst(NlistWithIndex, symbols, 0, Predicate{ .addr = sect.addr });
|
|
const end = MachO.findFirst(NlistWithIndex, symbols, start, Predicate{ .addr = sect.addr + sect.size });
|
|
|
|
return symbols[start..end];
|
|
}
|
|
};
|
|
|
|
fn filterDice(dices: []macho.data_in_code_entry, start_addr: u64, end_addr: u64) []macho.data_in_code_entry {
|
|
const Predicate = struct {
|
|
addr: u64,
|
|
|
|
pub fn predicate(self: @This(), dice: macho.data_in_code_entry) bool {
|
|
return dice.offset >= self.addr;
|
|
}
|
|
};
|
|
|
|
const start = MachO.findFirst(macho.data_in_code_entry, dices, 0, Predicate{ .addr = start_addr });
|
|
const end = MachO.findFirst(macho.data_in_code_entry, dices, start, Predicate{ .addr = end_addr });
|
|
|
|
return dices[start..end];
|
|
}
|
|
|
|
const Context = struct {
|
|
allocator: *Allocator,
|
|
object: *Object,
|
|
macho_file: *MachO,
|
|
match: MachO.MatchingSection,
|
|
};
|
|
|
|
const TextBlockParser = struct {
|
|
section: macho.section_64,
|
|
code: []u8,
|
|
relocs: []macho.relocation_info,
|
|
nlists: []NlistWithIndex,
|
|
index: u32 = 0,
|
|
|
|
fn peek(self: TextBlockParser) ?NlistWithIndex {
|
|
return if (self.index + 1 < self.nlists.len) self.nlists[self.index + 1] else null;
|
|
}
|
|
|
|
fn lessThanBySeniority(context: Context, lhs: NlistWithIndex, rhs: NlistWithIndex) bool {
|
|
if (!MachO.symbolIsExt(rhs.nlist)) {
|
|
return MachO.symbolIsTemp(lhs.nlist, context.object.getString(lhs.nlist.n_strx));
|
|
} else if (MachO.symbolIsPext(rhs.nlist) or MachO.symbolIsWeakDef(rhs.nlist)) {
|
|
return !MachO.symbolIsExt(lhs.nlist);
|
|
} else {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
pub fn next(self: *TextBlockParser, context: Context) !?*TextBlock {
|
|
if (self.index == self.nlists.len) return null;
|
|
|
|
var aliases = std.ArrayList(NlistWithIndex).init(context.allocator);
|
|
defer aliases.deinit();
|
|
|
|
const next_nlist: ?NlistWithIndex = blk: while (true) {
|
|
const curr_nlist = self.nlists[self.index];
|
|
try aliases.append(curr_nlist);
|
|
|
|
if (self.peek()) |next_nlist| {
|
|
if (curr_nlist.nlist.n_value == next_nlist.nlist.n_value) {
|
|
self.index += 1;
|
|
continue;
|
|
}
|
|
break :blk next_nlist;
|
|
}
|
|
break :blk null;
|
|
} else null;
|
|
|
|
for (aliases.items) |*nlist_with_index| {
|
|
nlist_with_index.index = context.object.symbol_mapping.get(nlist_with_index.index) orelse unreachable;
|
|
}
|
|
|
|
if (aliases.items.len > 1) {
|
|
// Bubble-up senior symbol as the main link to the text block.
|
|
sort.sort(
|
|
NlistWithIndex,
|
|
aliases.items,
|
|
context,
|
|
TextBlockParser.lessThanBySeniority,
|
|
);
|
|
}
|
|
|
|
const senior_nlist = aliases.pop();
|
|
const senior_sym = &context.macho_file.locals.items[senior_nlist.index];
|
|
senior_sym.n_sect = @intCast(u8, context.macho_file.section_ordinals.getIndex(context.match).? + 1);
|
|
|
|
const start_addr = senior_nlist.nlist.n_value - self.section.addr;
|
|
const end_addr = if (next_nlist) |n| n.nlist.n_value - self.section.addr else self.section.size;
|
|
|
|
const code = self.code[start_addr..end_addr];
|
|
const size = code.len;
|
|
|
|
const max_align = self.section.@"align";
|
|
const actual_align = if (senior_nlist.nlist.n_value > 0)
|
|
math.min(@ctz(u64, senior_nlist.nlist.n_value), max_align)
|
|
else
|
|
max_align;
|
|
|
|
const stab: ?TextBlock.Stab = if (context.object.debug_info) |di| blk: {
|
|
// TODO there has to be a better to handle this.
|
|
for (di.inner.func_list.items) |func| {
|
|
if (func.pc_range) |range| {
|
|
if (senior_nlist.nlist.n_value >= range.start and senior_nlist.nlist.n_value < range.end) {
|
|
break :blk TextBlock.Stab{
|
|
.function = range.end - range.start,
|
|
};
|
|
}
|
|
}
|
|
}
|
|
// TODO
|
|
// if (self.macho_file.globals.contains(self.macho_file.getString(senior_sym.strx))) break :blk .global;
|
|
break :blk .static;
|
|
} else null;
|
|
|
|
const block = try context.allocator.create(TextBlock);
|
|
block.* = TextBlock.empty;
|
|
block.local_sym_index = senior_nlist.index;
|
|
block.stab = stab;
|
|
block.size = size;
|
|
block.alignment = actual_align;
|
|
try context.macho_file.managed_blocks.append(context.allocator, block);
|
|
|
|
try block.code.appendSlice(context.allocator, code);
|
|
|
|
try block.aliases.ensureTotalCapacity(context.allocator, aliases.items.len);
|
|
for (aliases.items) |alias| {
|
|
block.aliases.appendAssumeCapacity(alias.index);
|
|
const sym = &context.macho_file.locals.items[alias.index];
|
|
sym.n_sect = @intCast(u8, context.macho_file.section_ordinals.getIndex(context.match).? + 1);
|
|
}
|
|
|
|
try block.parseRelocs(self.relocs, .{
|
|
.base_addr = start_addr,
|
|
.allocator = context.allocator,
|
|
.object = context.object,
|
|
.macho_file = context.macho_file,
|
|
});
|
|
|
|
if (context.macho_file.has_dices) {
|
|
const dices = filterDice(
|
|
context.object.data_in_code_entries.items,
|
|
senior_nlist.nlist.n_value,
|
|
senior_nlist.nlist.n_value + size,
|
|
);
|
|
try block.dices.ensureTotalCapacity(context.allocator, dices.len);
|
|
|
|
for (dices) |dice| {
|
|
block.dices.appendAssumeCapacity(.{
|
|
.offset = dice.offset - try math.cast(u32, senior_nlist.nlist.n_value),
|
|
.length = dice.length,
|
|
.kind = dice.kind,
|
|
});
|
|
}
|
|
}
|
|
|
|
self.index += 1;
|
|
|
|
return block;
|
|
}
|
|
};
|
|
|
|
pub fn parseTextBlocks(
|
|
self: *Object,
|
|
allocator: *Allocator,
|
|
object_id: u16,
|
|
macho_file: *MachO,
|
|
) !void {
|
|
const seg = self.load_commands.items[self.segment_cmd_index.?].Segment;
|
|
|
|
log.debug("analysing {s}", .{self.name});
|
|
|
|
// You would expect that the symbol table is at least pre-sorted based on symbol's type:
|
|
// local < extern defined < undefined. Unfortunately, this is not guaranteed! For instance,
|
|
// the GO compiler does not necessarily respect that therefore we sort immediately by type
|
|
// and address within.
|
|
var sorted_all_nlists = std.ArrayList(NlistWithIndex).init(allocator);
|
|
defer sorted_all_nlists.deinit();
|
|
try sorted_all_nlists.ensureTotalCapacity(self.symtab.items.len);
|
|
|
|
for (self.symtab.items) |nlist, index| {
|
|
sorted_all_nlists.appendAssumeCapacity(.{
|
|
.nlist = nlist,
|
|
.index = @intCast(u32, index),
|
|
});
|
|
}
|
|
|
|
sort.sort(NlistWithIndex, sorted_all_nlists.items, {}, NlistWithIndex.lessThan);
|
|
|
|
// Well, shit, sometimes compilers skip the dysymtab load command altogether, meaning we
|
|
// have to infer the start of undef section in the symtab ourselves.
|
|
const iundefsym = if (self.dysymtab_cmd_index) |cmd_index| blk: {
|
|
const dysymtab = self.load_commands.items[cmd_index].Dysymtab;
|
|
break :blk dysymtab.iundefsym;
|
|
} else blk: {
|
|
var iundefsym: usize = sorted_all_nlists.items.len;
|
|
while (iundefsym > 0) : (iundefsym -= 1) {
|
|
const nlist = sorted_all_nlists.items[iundefsym];
|
|
if (MachO.symbolIsSect(nlist.nlist)) break;
|
|
}
|
|
break :blk iundefsym;
|
|
};
|
|
|
|
// We only care about defined symbols, so filter every other out.
|
|
const sorted_nlists = sorted_all_nlists.items[0..iundefsym];
|
|
|
|
for (seg.sections.items) |sect, id| {
|
|
const sect_id = @intCast(u8, id);
|
|
log.debug("putting section '{s},{s}' as a TextBlock", .{
|
|
segmentName(sect),
|
|
sectionName(sect),
|
|
});
|
|
|
|
// Get matching segment/section in the final artifact.
|
|
const match = (try macho_file.getMatchingSection(sect)) orelse {
|
|
log.debug("unhandled section", .{});
|
|
continue;
|
|
};
|
|
|
|
// Read section's code
|
|
var code = try allocator.alloc(u8, @intCast(usize, sect.size));
|
|
defer allocator.free(code);
|
|
_ = try self.file.preadAll(code, sect.offset);
|
|
|
|
// Read section's list of relocations
|
|
var raw_relocs = try allocator.alloc(u8, sect.nreloc * @sizeOf(macho.relocation_info));
|
|
defer allocator.free(raw_relocs);
|
|
_ = try self.file.preadAll(raw_relocs, sect.reloff);
|
|
const relocs = mem.bytesAsSlice(macho.relocation_info, raw_relocs);
|
|
|
|
// Symbols within this section only.
|
|
const filtered_nlists = NlistWithIndex.filterInSection(sorted_nlists, sect);
|
|
|
|
// In release mode, if the object file was generated with dead code stripping optimisations,
|
|
// note it now and parse sections as atoms.
|
|
const is_splittable = blk: {
|
|
if (macho_file.base.options.optimize_mode == .Debug) break :blk false;
|
|
break :blk self.header.?.flags & macho.MH_SUBSECTIONS_VIA_SYMBOLS != 0;
|
|
};
|
|
|
|
macho_file.has_dices = blk: {
|
|
if (self.text_section_index) |index| {
|
|
if (index != id) break :blk false;
|
|
if (self.data_in_code_entries.items.len == 0) break :blk false;
|
|
break :blk true;
|
|
}
|
|
break :blk false;
|
|
};
|
|
macho_file.has_stabs = macho_file.has_stabs or self.debug_info != null;
|
|
|
|
next: {
|
|
if (is_splittable) blocks: {
|
|
if (filtered_nlists.len == 0) break :blocks;
|
|
|
|
// If the first nlist does not match the start of the section,
|
|
// then we need to encapsulate the memory range [section start, first symbol)
|
|
// as a temporary symbol and insert the matching TextBlock.
|
|
const first_nlist = filtered_nlists[0].nlist;
|
|
if (first_nlist.n_value > sect.addr) {
|
|
const sym_name = try std.fmt.allocPrint(allocator, "l_{s}_{s}_{s}", .{
|
|
self.name,
|
|
segmentName(sect),
|
|
sectionName(sect),
|
|
});
|
|
defer allocator.free(sym_name);
|
|
|
|
const block_local_sym_index = self.sections_as_symbols.get(sect_id) orelse blk: {
|
|
const block_local_sym_index = @intCast(u32, macho_file.locals.items.len);
|
|
try macho_file.locals.append(allocator, .{
|
|
.n_strx = try macho_file.makeString(sym_name),
|
|
.n_type = macho.N_SECT,
|
|
.n_sect = @intCast(u8, macho_file.section_ordinals.getIndex(match).? + 1),
|
|
.n_desc = 0,
|
|
.n_value = sect.addr,
|
|
});
|
|
try self.sections_as_symbols.putNoClobber(allocator, sect_id, block_local_sym_index);
|
|
break :blk block_local_sym_index;
|
|
};
|
|
|
|
const block_code = code[0 .. first_nlist.n_value - sect.addr];
|
|
const block_size = block_code.len;
|
|
|
|
const block = try allocator.create(TextBlock);
|
|
block.* = TextBlock.empty;
|
|
block.local_sym_index = block_local_sym_index;
|
|
block.size = block_size;
|
|
block.alignment = sect.@"align";
|
|
try macho_file.managed_blocks.append(allocator, block);
|
|
|
|
try block.code.appendSlice(allocator, block_code);
|
|
|
|
try block.parseRelocs(relocs, .{
|
|
.base_addr = 0,
|
|
.allocator = allocator,
|
|
.object = self,
|
|
.macho_file = macho_file,
|
|
});
|
|
|
|
if (macho_file.has_dices) {
|
|
const dices = filterDice(self.data_in_code_entries.items, sect.addr, sect.addr + block_size);
|
|
try block.dices.ensureTotalCapacity(allocator, dices.len);
|
|
|
|
for (dices) |dice| {
|
|
block.dices.appendAssumeCapacity(.{
|
|
.offset = dice.offset - try math.cast(u32, sect.addr),
|
|
.length = dice.length,
|
|
.kind = dice.kind,
|
|
});
|
|
}
|
|
}
|
|
|
|
// Update target section's metadata
|
|
// TODO should we update segment's size here too?
|
|
// How does it tie with incremental space allocs?
|
|
const tseg = &macho_file.load_commands.items[match.seg].Segment;
|
|
const tsect = &tseg.sections.items[match.sect];
|
|
const new_alignment = math.max(tsect.@"align", block.alignment);
|
|
const new_alignment_pow_2 = try math.powi(u32, 2, new_alignment);
|
|
const new_size = mem.alignForwardGeneric(u64, tsect.size, new_alignment_pow_2) + block.size;
|
|
tsect.size = new_size;
|
|
tsect.@"align" = new_alignment;
|
|
|
|
if (macho_file.blocks.getPtr(match)) |last| {
|
|
last.*.next = block;
|
|
block.prev = last.*;
|
|
last.* = block;
|
|
} else {
|
|
try macho_file.blocks.putNoClobber(allocator, match, block);
|
|
}
|
|
|
|
try self.text_blocks.append(allocator, block);
|
|
}
|
|
|
|
var parser = TextBlockParser{
|
|
.section = sect,
|
|
.code = code,
|
|
.relocs = relocs,
|
|
.nlists = filtered_nlists,
|
|
};
|
|
|
|
while (try parser.next(.{
|
|
.allocator = allocator,
|
|
.object = self,
|
|
.macho_file = macho_file,
|
|
.match = match,
|
|
})) |block| {
|
|
const sym = macho_file.locals.items[block.local_sym_index];
|
|
const is_ext = blk: {
|
|
const orig_sym_id = self.reverse_symbol_mapping.get(block.local_sym_index) orelse unreachable;
|
|
break :blk MachO.symbolIsExt(self.symtab.items[orig_sym_id]);
|
|
};
|
|
if (is_ext) {
|
|
if (macho_file.symbol_resolver.get(sym.n_strx)) |resolv| {
|
|
assert(resolv.where == .global);
|
|
if (resolv.file != object_id) {
|
|
log.debug("deduping definition of {s} in {s}", .{
|
|
macho_file.getString(sym.n_strx),
|
|
self.name,
|
|
});
|
|
log.debug(" already defined in {s}", .{
|
|
macho_file.objects.items[resolv.file].name,
|
|
});
|
|
continue;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (sym.n_value == sect.addr) {
|
|
if (self.sections_as_symbols.get(sect_id)) |alias| {
|
|
// In x86_64 relocs, it can so happen that the compiler refers to the same
|
|
// atom by both the actual assigned symbol and the start of the section. In this
|
|
// case, we need to link the two together so add an alias.
|
|
try block.aliases.append(allocator, alias);
|
|
}
|
|
}
|
|
|
|
// Update target section's metadata
|
|
// TODO should we update segment's size here too?
|
|
// How does it tie with incremental space allocs?
|
|
const tseg = &macho_file.load_commands.items[match.seg].Segment;
|
|
const tsect = &tseg.sections.items[match.sect];
|
|
const new_alignment = math.max(tsect.@"align", block.alignment);
|
|
const new_alignment_pow_2 = try math.powi(u32, 2, new_alignment);
|
|
const new_size = mem.alignForwardGeneric(u64, tsect.size, new_alignment_pow_2) + block.size;
|
|
tsect.size = new_size;
|
|
tsect.@"align" = new_alignment;
|
|
|
|
if (macho_file.blocks.getPtr(match)) |last| {
|
|
last.*.next = block;
|
|
block.prev = last.*;
|
|
last.* = block;
|
|
} else {
|
|
try macho_file.blocks.putNoClobber(allocator, match, block);
|
|
}
|
|
|
|
try self.text_blocks.append(allocator, block);
|
|
}
|
|
|
|
break :next;
|
|
}
|
|
|
|
// Since there is no symbol to refer to this block, we create
|
|
// a temp one, unless we already did that when working out the relocations
|
|
// of other text blocks.
|
|
const sym_name = try std.fmt.allocPrint(allocator, "l_{s}_{s}_{s}", .{
|
|
self.name,
|
|
segmentName(sect),
|
|
sectionName(sect),
|
|
});
|
|
defer allocator.free(sym_name);
|
|
|
|
const block_local_sym_index = self.sections_as_symbols.get(sect_id) orelse blk: {
|
|
const block_local_sym_index = @intCast(u32, macho_file.locals.items.len);
|
|
try macho_file.locals.append(allocator, .{
|
|
.n_strx = try macho_file.makeString(sym_name),
|
|
.n_type = macho.N_SECT,
|
|
.n_sect = @intCast(u8, macho_file.section_ordinals.getIndex(match).? + 1),
|
|
.n_desc = 0,
|
|
.n_value = sect.addr,
|
|
});
|
|
try self.sections_as_symbols.putNoClobber(allocator, sect_id, block_local_sym_index);
|
|
break :blk block_local_sym_index;
|
|
};
|
|
|
|
const block = try allocator.create(TextBlock);
|
|
block.* = TextBlock.empty;
|
|
block.local_sym_index = block_local_sym_index;
|
|
block.size = sect.size;
|
|
block.alignment = sect.@"align";
|
|
try macho_file.managed_blocks.append(allocator, block);
|
|
|
|
try block.code.appendSlice(allocator, code);
|
|
|
|
try block.parseRelocs(relocs, .{
|
|
.base_addr = 0,
|
|
.allocator = allocator,
|
|
.object = self,
|
|
.macho_file = macho_file,
|
|
});
|
|
|
|
if (macho_file.has_dices) {
|
|
const dices = filterDice(self.data_in_code_entries.items, sect.addr, sect.addr + sect.size);
|
|
try block.dices.ensureTotalCapacity(allocator, dices.len);
|
|
|
|
for (dices) |dice| {
|
|
block.dices.appendAssumeCapacity(.{
|
|
.offset = dice.offset - try math.cast(u32, sect.addr),
|
|
.length = dice.length,
|
|
.kind = dice.kind,
|
|
});
|
|
}
|
|
}
|
|
|
|
// Since this is block gets a helper local temporary symbol that didn't exist
|
|
// in the object file which encompasses the entire section, we need traverse
|
|
// the filtered symbols and note which symbol is contained within so that
|
|
// we can properly allocate addresses down the line.
|
|
// While we're at it, we need to update segment,section mapping of each symbol too.
|
|
try block.contained.ensureTotalCapacity(allocator, filtered_nlists.len);
|
|
|
|
for (filtered_nlists) |nlist_with_index| {
|
|
const nlist = nlist_with_index.nlist;
|
|
const local_sym_index = self.symbol_mapping.get(nlist_with_index.index) orelse unreachable;
|
|
const local = &macho_file.locals.items[local_sym_index];
|
|
local.n_sect = @intCast(u8, macho_file.section_ordinals.getIndex(match).? + 1);
|
|
|
|
const stab: ?TextBlock.Stab = if (self.debug_info) |di| blk: {
|
|
// TODO there has to be a better to handle this.
|
|
for (di.inner.func_list.items) |func| {
|
|
if (func.pc_range) |range| {
|
|
if (nlist.n_value >= range.start and nlist.n_value < range.end) {
|
|
break :blk TextBlock.Stab{
|
|
.function = range.end - range.start,
|
|
};
|
|
}
|
|
}
|
|
}
|
|
// TODO
|
|
// if (zld.globals.contains(zld.getString(sym.strx))) break :blk .global;
|
|
break :blk .static;
|
|
} else null;
|
|
|
|
block.contained.appendAssumeCapacity(.{
|
|
.local_sym_index = local_sym_index,
|
|
.offset = nlist.n_value - sect.addr,
|
|
.stab = stab,
|
|
});
|
|
}
|
|
|
|
// Update target section's metadata
|
|
// TODO should we update segment's size here too?
|
|
// How does it tie with incremental space allocs?
|
|
const tseg = &macho_file.load_commands.items[match.seg].Segment;
|
|
const tsect = &tseg.sections.items[match.sect];
|
|
const new_alignment = math.max(tsect.@"align", block.alignment);
|
|
const new_alignment_pow_2 = try math.powi(u32, 2, new_alignment);
|
|
const new_size = mem.alignForwardGeneric(u64, tsect.size, new_alignment_pow_2) + block.size;
|
|
tsect.size = new_size;
|
|
tsect.@"align" = new_alignment;
|
|
|
|
if (macho_file.blocks.getPtr(match)) |last| {
|
|
last.*.next = block;
|
|
block.prev = last.*;
|
|
last.* = block;
|
|
} else {
|
|
try macho_file.blocks.putNoClobber(allocator, match, block);
|
|
}
|
|
|
|
try self.text_blocks.append(allocator, block);
|
|
}
|
|
}
|
|
}
|
|
|
|
fn parseSymtab(self: *Object, allocator: *Allocator) !void {
|
|
const index = self.symtab_cmd_index orelse return;
|
|
const symtab_cmd = self.load_commands.items[index].Symtab;
|
|
|
|
var symtab = try allocator.alloc(u8, @sizeOf(macho.nlist_64) * symtab_cmd.nsyms);
|
|
defer allocator.free(symtab);
|
|
_ = try self.file.preadAll(symtab, symtab_cmd.symoff);
|
|
const slice = @alignCast(@alignOf(macho.nlist_64), mem.bytesAsSlice(macho.nlist_64, symtab));
|
|
try self.symtab.appendSlice(allocator, slice);
|
|
|
|
var strtab = try allocator.alloc(u8, symtab_cmd.strsize);
|
|
defer allocator.free(strtab);
|
|
_ = try self.file.preadAll(strtab, symtab_cmd.stroff);
|
|
try self.strtab.appendSlice(allocator, strtab);
|
|
}
|
|
|
|
pub fn parseDebugInfo(self: *Object, allocator: *Allocator) !void {
|
|
log.debug("parsing debug info in '{s}'", .{self.name});
|
|
|
|
var debug_info = blk: {
|
|
var di = try DebugInfo.parseFromObject(allocator, self);
|
|
break :blk di orelse return;
|
|
};
|
|
|
|
// We assume there is only one CU.
|
|
const compile_unit = debug_info.inner.findCompileUnit(0x0) catch |err| switch (err) {
|
|
error.MissingDebugInfo => {
|
|
// TODO audit cases with missing debug info and audit our dwarf.zig module.
|
|
log.debug("invalid or missing debug info in {s}; skipping", .{self.name});
|
|
return;
|
|
},
|
|
else => |e| return e,
|
|
};
|
|
const name = try compile_unit.die.getAttrString(&debug_info.inner, dwarf.AT_name);
|
|
const comp_dir = try compile_unit.die.getAttrString(&debug_info.inner, dwarf.AT_comp_dir);
|
|
|
|
self.debug_info = debug_info;
|
|
self.tu_name = try allocator.dupe(u8, name);
|
|
self.tu_comp_dir = try allocator.dupe(u8, comp_dir);
|
|
|
|
if (self.mtime == null) {
|
|
self.mtime = mtime: {
|
|
const stat = self.file.stat() catch break :mtime 0;
|
|
break :mtime @intCast(u64, @divFloor(stat.mtime, 1_000_000_000));
|
|
};
|
|
}
|
|
}
|
|
|
|
pub fn parseDataInCode(self: *Object, allocator: *Allocator) !void {
|
|
const index = self.data_in_code_cmd_index orelse return;
|
|
const data_in_code = self.load_commands.items[index].LinkeditData;
|
|
|
|
var buffer = try allocator.alloc(u8, data_in_code.datasize);
|
|
defer allocator.free(buffer);
|
|
|
|
_ = try self.file.preadAll(buffer, data_in_code.dataoff);
|
|
|
|
var stream = io.fixedBufferStream(buffer);
|
|
var reader = stream.reader();
|
|
while (true) {
|
|
const dice = reader.readStruct(macho.data_in_code_entry) catch |err| switch (err) {
|
|
error.EndOfStream => break,
|
|
else => |e| return e,
|
|
};
|
|
try self.data_in_code_entries.append(allocator, dice);
|
|
}
|
|
}
|
|
|
|
fn readSection(self: Object, allocator: *Allocator, index: u16) ![]u8 {
|
|
const seg = self.load_commands.items[self.segment_cmd_index.?].Segment;
|
|
const sect = seg.sections.items[index];
|
|
var buffer = try allocator.alloc(u8, @intCast(usize, sect.size));
|
|
_ = try self.file.preadAll(buffer, sect.offset);
|
|
return buffer;
|
|
}
|
|
|
|
pub fn getString(self: Object, off: u32) []const u8 {
|
|
assert(off < self.strtab.items.len);
|
|
return mem.spanZ(@ptrCast([*:0]const u8, self.strtab.items.ptr + off));
|
|
}
|