Merge pull request #13356 from ziglang/macho-no-dupe

macho: fix performance regression for x86_64 and -dead_strip
This commit is contained in:
Jakub Konka 2022-10-30 16:38:28 +01:00 committed by GitHub
commit 2b25d3c333
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 104 additions and 56 deletions

View File

@ -44,6 +44,10 @@ symtab: []macho.nlist_64 = undefined,
/// Can be undefined as set together with in_symtab.
source_symtab_lookup: []u32 = undefined,
/// Can be undefined as set together with in_symtab.
source_address_lookup: []i64 = undefined,
/// Can be undefined as set together with in_symtab.
source_section_index_lookup: []i64 = undefined,
/// Can be undefined as set together with in_symtab.
strtab_lookup: []u32 = undefined,
/// Can be undefined as set together with in_symtab.
atom_by_index_table: []AtomIndex = undefined,
@ -58,6 +62,8 @@ pub fn deinit(self: *Object, gpa: Allocator) void {
gpa.free(self.contents);
if (self.in_symtab) |_| {
gpa.free(self.source_symtab_lookup);
gpa.free(self.source_address_lookup);
gpa.free(self.source_section_index_lookup);
gpa.free(self.strtab_lookup);
gpa.free(self.symtab);
gpa.free(self.atom_by_index_table);
@ -116,6 +122,10 @@ pub fn parse(self: *Object, allocator: Allocator, cpu_arch: std.Target.Cpu.Arch)
self.strtab_lookup = try allocator.alloc(u32, self.in_symtab.?.len);
self.globals_lookup = try allocator.alloc(i64, self.in_symtab.?.len);
self.atom_by_index_table = try allocator.alloc(AtomIndex, self.in_symtab.?.len + nsects);
// This is wasteful but we need to be able to lookup source symbol address after stripping and
// allocating of sections.
self.source_address_lookup = try allocator.alloc(i64, self.in_symtab.?.len);
self.source_section_index_lookup = try allocator.alloc(i64, nsects);
for (self.symtab) |*sym| {
sym.* = .{
@ -129,6 +139,7 @@ pub fn parse(self: *Object, allocator: Allocator, cpu_arch: std.Target.Cpu.Arch)
mem.set(i64, self.globals_lookup, -1);
mem.set(AtomIndex, self.atom_by_index_table, 0);
mem.set(i64, self.source_section_index_lookup, -1);
// You would expect that the symbol table is at least pre-sorted based on symbol's type:
// local < extern defined < undefined. Unfortunately, this is not guaranteed! For instance,
@ -150,8 +161,13 @@ pub fn parse(self: *Object, allocator: Allocator, cpu_arch: std.Target.Cpu.Arch)
for (sorted_all_syms.items) |sym_id, i| {
const sym = sym_id.getSymbol(self);
if (sym.sect() and self.source_section_index_lookup[sym.n_sect - 1] == -1) {
self.source_section_index_lookup[sym.n_sect - 1] = @intCast(i64, i);
}
self.symtab[i] = sym;
self.source_symtab_lookup[i] = sym_id.index;
self.source_address_lookup[i] = if (sym.undf()) -1 else @intCast(i64, sym.n_value);
const sym_name_len = mem.sliceTo(@ptrCast([*:0]const u8, self.in_strtab.?.ptr + sym.n_strx), 0).len + 1;
self.strtab_lookup[i] = @intCast(u32, sym_name_len);
@ -244,13 +260,12 @@ fn filterSymbolsBySection(symbols: []macho.nlist_64, n_sect: u8) struct {
return .{ .index = @intCast(u32, index), .len = @intCast(u32, len) };
}
fn filterSymbolsByAddress(symbols: []macho.nlist_64, n_sect: u8, start_addr: u64, end_addr: u64) struct {
fn filterSymbolsByAddress(symbols: []macho.nlist_64, start_addr: u64, end_addr: u64) struct {
index: u32,
len: u32,
} {
const Predicate = struct {
addr: u64,
n_sect: u8,
pub fn predicate(pred: @This(), symbol: macho.nlist_64) bool {
return symbol.n_value >= pred.addr;
@ -259,11 +274,9 @@ fn filterSymbolsByAddress(symbols: []macho.nlist_64, n_sect: u8, start_addr: u64
const index = @import("zld.zig").lsearch(macho.nlist_64, symbols, Predicate{
.addr = start_addr,
.n_sect = n_sect,
});
const len = @import("zld.zig").lsearch(macho.nlist_64, symbols[index..], Predicate{
.addr = end_addr,
.n_sect = n_sect,
});
return .{ .index = @intCast(u32, index), .len = @intCast(u32, len) };
@ -412,12 +425,7 @@ pub fn splitIntoAtoms(self: *Object, zld: *Zld, object_id: u31) !void {
while (next_sym_index < sect_start_index + sect_loc.len) {
const next_sym = symtab[next_sym_index];
const addr = next_sym.n_value;
const atom_loc = filterSymbolsByAddress(
symtab[next_sym_index..],
sect_id + 1,
addr,
addr + 1,
);
const atom_loc = filterSymbolsByAddress(symtab[next_sym_index..], addr, addr + 1);
assert(atom_loc.len > 0);
const atom_sym_index = atom_loc.index + next_sym_index;
const nsyms_trailing = atom_loc.len - 1;

View File

@ -181,6 +181,27 @@ const RelocContext = struct {
base_offset: i32 = 0,
};
pub fn getRelocContext(zld: *Zld, atom_index: AtomIndex) RelocContext {
const atom = zld.getAtom(atom_index);
assert(atom.getFile() != null); // synthetic atoms do not have relocs
const object = zld.objects.items[atom.getFile().?];
if (object.getSourceSymbol(atom.sym_index)) |source_sym| {
const source_sect = object.getSourceSection(source_sym.n_sect - 1);
return .{
.base_addr = source_sect.addr,
.base_offset = @intCast(i32, source_sym.n_value - source_sect.addr),
};
}
const nbase = @intCast(u32, object.in_symtab.?.len);
const sect_id = @intCast(u16, atom.sym_index - nbase);
const source_sect = object.getSourceSection(sect_id);
return .{
.base_addr = source_sect.addr,
.base_offset = 0,
};
}
pub fn parseRelocTarget(
zld: *Zld,
atom_index: AtomIndex,
@ -192,6 +213,52 @@ pub fn parseRelocTarget(
if (rel.r_extern == 0) {
const sect_id = @intCast(u8, rel.r_symbolnum - 1);
const ctx = getRelocContext(zld, atom_index);
const atom_code = getAtomCode(zld, atom_index);
const rel_offset = @intCast(u32, rel.r_address - ctx.base_offset);
const address_in_section = if (rel.r_pcrel == 0) blk: {
break :blk if (rel.r_length == 3)
mem.readIntLittle(i64, atom_code[rel_offset..][0..8])
else
mem.readIntLittle(i32, atom_code[rel_offset..][0..4]);
} else blk: {
const correction: u3 = switch (@intToEnum(macho.reloc_type_x86_64, rel.r_type)) {
.X86_64_RELOC_SIGNED => 0,
.X86_64_RELOC_SIGNED_1 => 1,
.X86_64_RELOC_SIGNED_2 => 2,
.X86_64_RELOC_SIGNED_4 => 4,
else => unreachable,
};
const addend = mem.readIntLittle(i32, atom_code[rel_offset..][0..4]);
const target_address = @intCast(i64, ctx.base_addr) + rel.r_address + 4 + correction + addend;
break :blk target_address;
};
// Find containing atom
const Predicate = struct {
addr: i64,
pub fn predicate(pred: @This(), other: i64) bool {
return if (other == -1) true else other > pred.addr;
}
};
if (object.source_section_index_lookup[sect_id] > -1) {
const first_sym_index = @intCast(usize, object.source_section_index_lookup[sect_id]);
const target_sym_index = @import("zld.zig").lsearch(i64, object.source_address_lookup[first_sym_index..], Predicate{
.addr = address_in_section,
});
if (target_sym_index > 0) {
return SymbolWithLoc{
.sym_index = @intCast(u32, first_sym_index + target_sym_index - 1),
.file = atom.file,
};
}
}
// Start of section is not contained anywhere, return synthetic atom.
const sym_index = object.getSectionAliasSymbolIndex(sect_id);
return SymbolWithLoc{ .sym_index = sym_index, .file = atom.file };
}
@ -405,29 +472,13 @@ pub fn resolveRelocs(
const atom = zld.getAtom(atom_index);
assert(atom.getFile() != null); // synthetic atoms do not have relocs
const object = zld.objects.items[atom.getFile().?];
const ctx: RelocContext = blk: {
if (object.getSourceSymbol(atom.sym_index)) |source_sym| {
const source_sect = object.getSourceSection(source_sym.n_sect - 1);
break :blk .{
.base_addr = source_sect.addr,
.base_offset = @intCast(i32, source_sym.n_value - source_sect.addr),
};
}
const nbase = @intCast(u32, object.in_symtab.?.len);
const sect_id = @intCast(u16, atom.sym_index - nbase);
const source_sect = object.getSourceSection(sect_id);
break :blk .{
.base_addr = source_sect.addr,
.base_offset = 0,
};
};
log.debug("resolving relocations in ATOM(%{d}, '{s}')", .{
atom.sym_index,
zld.getSymbolName(atom.getSymbolWithLoc()),
});
const ctx = getRelocContext(zld, atom_index);
return switch (arch) {
.aarch64 => resolveRelocsArm64(zld, atom_index, atom_code, atom_relocs, reverse_lookup, ctx),
.x86_64 => resolveRelocsX86(zld, atom_index, atom_code, atom_relocs, reverse_lookup, ctx),
@ -744,8 +795,11 @@ fn resolveRelocsArm64(
mem.readIntLittle(i32, atom_code[rel_offset..][0..4]);
if (rel.r_extern == 0) {
const target_sect_base_addr = object.getSourceSection(@intCast(u16, rel.r_symbolnum - 1)).addr;
ptr_addend -= @intCast(i64, target_sect_base_addr);
const base_addr = if (target.sym_index > object.source_address_lookup.len)
@intCast(i64, object.getSourceSection(@intCast(u16, rel.r_symbolnum - 1)).addr)
else
object.source_address_lookup[target.sym_index];
ptr_addend -= base_addr;
}
const result = blk: {
@ -878,13 +932,12 @@ fn resolveRelocsX86(
var addend = mem.readIntLittle(i32, atom_code[rel_offset..][0..4]) + correction;
if (rel.r_extern == 0) {
// Note for the future self: when r_extern == 0, we should subtract correction from the
// addend.
const target_sect_base_addr = object.getSourceSection(@intCast(u16, rel.r_symbolnum - 1)).addr;
// We need to add base_offset, i.e., offset of this atom wrt to the source
// section. Otherwise, the addend will over-/under-shoot.
addend += @intCast(i32, @intCast(i64, context.base_addr + rel_offset + 4) -
@intCast(i64, target_sect_base_addr) + context.base_offset);
const base_addr = if (target.sym_index > object.source_address_lookup.len)
@intCast(i64, object.getSourceSection(@intCast(u16, rel.r_symbolnum - 1)).addr)
else
object.source_address_lookup[target.sym_index];
addend += @intCast(i32, @intCast(i64, context.base_addr) + rel.r_address + 4 -
@intCast(i64, base_addr));
}
const adjusted_target_addr = @intCast(u64, @intCast(i64, target_addr) + addend);
@ -902,8 +955,11 @@ fn resolveRelocsX86(
mem.readIntLittle(i32, atom_code[rel_offset..][0..4]);
if (rel.r_extern == 0) {
const target_sect_base_addr = object.getSourceSection(@intCast(u16, rel.r_symbolnum - 1)).addr;
addend -= @intCast(i64, target_sect_base_addr);
const base_addr = if (target.sym_index > object.source_address_lookup.len)
@intCast(i64, object.getSourceSection(@intCast(u16, rel.r_symbolnum - 1)).addr)
else
object.source_address_lookup[target.sym_index];
addend -= base_addr;
}
const result = blk: {

View File

@ -162,21 +162,6 @@ fn markLive(
};
const target_sym = zld.getSymbol(target);
if (rel.r_extern == 0) {
// We are pessimistic and mark all atoms within the target section as live.
// TODO: this can be improved by marking only the relevant atoms.
const sect_id = target_sym.n_sect;
const object = zld.objects.items[target.getFile().?];
for (object.atoms.items) |other_atom_index| {
const other_atom = zld.getAtom(other_atom_index);
const other_sym = zld.getSymbol(other_atom.getSymbolWithLoc());
if (other_sym.n_sect == sect_id) {
markLive(zld, other_atom_index, alive, reverse_lookups);
}
}
continue;
}
if (target_sym.undf()) continue;
if (target.getFile() == null) {
const target_sym_name = zld.getSymbolName(target);

View File

@ -966,10 +966,9 @@ pub const Zld = struct {
const global_index = resolver.table.get(sym_name) orelse {
const gpa = self.gpa;
const name = try resolver.arena.dupe(u8, sym_name);
const global_index = @intCast(u32, self.globals.items.len);
try self.globals.append(gpa, sym_loc);
try resolver.table.putNoClobber(name, global_index);
try resolver.table.putNoClobber(sym_name, global_index);
if (sym.undf() and !sym.tentative()) {
try resolver.unresolved.putNoClobber(global_index, {});
}