mirror of
https://github.com/ziglang/zig.git
synced 2026-02-13 04:48:20 +00:00
Merge pull request #13356 from ziglang/macho-no-dupe
macho: fix performance regression for x86_64 and -dead_strip
This commit is contained in:
commit
2b25d3c333
@ -44,6 +44,10 @@ symtab: []macho.nlist_64 = undefined,
|
||||
/// Can be undefined as set together with in_symtab.
|
||||
source_symtab_lookup: []u32 = undefined,
|
||||
/// Can be undefined as set together with in_symtab.
|
||||
source_address_lookup: []i64 = undefined,
|
||||
/// Can be undefined as set together with in_symtab.
|
||||
source_section_index_lookup: []i64 = undefined,
|
||||
/// Can be undefined as set together with in_symtab.
|
||||
strtab_lookup: []u32 = undefined,
|
||||
/// Can be undefined as set together with in_symtab.
|
||||
atom_by_index_table: []AtomIndex = undefined,
|
||||
@ -58,6 +62,8 @@ pub fn deinit(self: *Object, gpa: Allocator) void {
|
||||
gpa.free(self.contents);
|
||||
if (self.in_symtab) |_| {
|
||||
gpa.free(self.source_symtab_lookup);
|
||||
gpa.free(self.source_address_lookup);
|
||||
gpa.free(self.source_section_index_lookup);
|
||||
gpa.free(self.strtab_lookup);
|
||||
gpa.free(self.symtab);
|
||||
gpa.free(self.atom_by_index_table);
|
||||
@ -116,6 +122,10 @@ pub fn parse(self: *Object, allocator: Allocator, cpu_arch: std.Target.Cpu.Arch)
|
||||
self.strtab_lookup = try allocator.alloc(u32, self.in_symtab.?.len);
|
||||
self.globals_lookup = try allocator.alloc(i64, self.in_symtab.?.len);
|
||||
self.atom_by_index_table = try allocator.alloc(AtomIndex, self.in_symtab.?.len + nsects);
|
||||
// This is wasteful but we need to be able to lookup source symbol address after stripping and
|
||||
// allocating of sections.
|
||||
self.source_address_lookup = try allocator.alloc(i64, self.in_symtab.?.len);
|
||||
self.source_section_index_lookup = try allocator.alloc(i64, nsects);
|
||||
|
||||
for (self.symtab) |*sym| {
|
||||
sym.* = .{
|
||||
@ -129,6 +139,7 @@ pub fn parse(self: *Object, allocator: Allocator, cpu_arch: std.Target.Cpu.Arch)
|
||||
|
||||
mem.set(i64, self.globals_lookup, -1);
|
||||
mem.set(AtomIndex, self.atom_by_index_table, 0);
|
||||
mem.set(i64, self.source_section_index_lookup, -1);
|
||||
|
||||
// You would expect that the symbol table is at least pre-sorted based on symbol's type:
|
||||
// local < extern defined < undefined. Unfortunately, this is not guaranteed! For instance,
|
||||
@ -150,8 +161,13 @@ pub fn parse(self: *Object, allocator: Allocator, cpu_arch: std.Target.Cpu.Arch)
|
||||
for (sorted_all_syms.items) |sym_id, i| {
|
||||
const sym = sym_id.getSymbol(self);
|
||||
|
||||
if (sym.sect() and self.source_section_index_lookup[sym.n_sect - 1] == -1) {
|
||||
self.source_section_index_lookup[sym.n_sect - 1] = @intCast(i64, i);
|
||||
}
|
||||
|
||||
self.symtab[i] = sym;
|
||||
self.source_symtab_lookup[i] = sym_id.index;
|
||||
self.source_address_lookup[i] = if (sym.undf()) -1 else @intCast(i64, sym.n_value);
|
||||
|
||||
const sym_name_len = mem.sliceTo(@ptrCast([*:0]const u8, self.in_strtab.?.ptr + sym.n_strx), 0).len + 1;
|
||||
self.strtab_lookup[i] = @intCast(u32, sym_name_len);
|
||||
@ -244,13 +260,12 @@ fn filterSymbolsBySection(symbols: []macho.nlist_64, n_sect: u8) struct {
|
||||
return .{ .index = @intCast(u32, index), .len = @intCast(u32, len) };
|
||||
}
|
||||
|
||||
fn filterSymbolsByAddress(symbols: []macho.nlist_64, n_sect: u8, start_addr: u64, end_addr: u64) struct {
|
||||
fn filterSymbolsByAddress(symbols: []macho.nlist_64, start_addr: u64, end_addr: u64) struct {
|
||||
index: u32,
|
||||
len: u32,
|
||||
} {
|
||||
const Predicate = struct {
|
||||
addr: u64,
|
||||
n_sect: u8,
|
||||
|
||||
pub fn predicate(pred: @This(), symbol: macho.nlist_64) bool {
|
||||
return symbol.n_value >= pred.addr;
|
||||
@ -259,11 +274,9 @@ fn filterSymbolsByAddress(symbols: []macho.nlist_64, n_sect: u8, start_addr: u64
|
||||
|
||||
const index = @import("zld.zig").lsearch(macho.nlist_64, symbols, Predicate{
|
||||
.addr = start_addr,
|
||||
.n_sect = n_sect,
|
||||
});
|
||||
const len = @import("zld.zig").lsearch(macho.nlist_64, symbols[index..], Predicate{
|
||||
.addr = end_addr,
|
||||
.n_sect = n_sect,
|
||||
});
|
||||
|
||||
return .{ .index = @intCast(u32, index), .len = @intCast(u32, len) };
|
||||
@ -412,12 +425,7 @@ pub fn splitIntoAtoms(self: *Object, zld: *Zld, object_id: u31) !void {
|
||||
while (next_sym_index < sect_start_index + sect_loc.len) {
|
||||
const next_sym = symtab[next_sym_index];
|
||||
const addr = next_sym.n_value;
|
||||
const atom_loc = filterSymbolsByAddress(
|
||||
symtab[next_sym_index..],
|
||||
sect_id + 1,
|
||||
addr,
|
||||
addr + 1,
|
||||
);
|
||||
const atom_loc = filterSymbolsByAddress(symtab[next_sym_index..], addr, addr + 1);
|
||||
assert(atom_loc.len > 0);
|
||||
const atom_sym_index = atom_loc.index + next_sym_index;
|
||||
const nsyms_trailing = atom_loc.len - 1;
|
||||
|
||||
@ -181,6 +181,27 @@ const RelocContext = struct {
|
||||
base_offset: i32 = 0,
|
||||
};
|
||||
|
||||
pub fn getRelocContext(zld: *Zld, atom_index: AtomIndex) RelocContext {
|
||||
const atom = zld.getAtom(atom_index);
|
||||
assert(atom.getFile() != null); // synthetic atoms do not have relocs
|
||||
|
||||
const object = zld.objects.items[atom.getFile().?];
|
||||
if (object.getSourceSymbol(atom.sym_index)) |source_sym| {
|
||||
const source_sect = object.getSourceSection(source_sym.n_sect - 1);
|
||||
return .{
|
||||
.base_addr = source_sect.addr,
|
||||
.base_offset = @intCast(i32, source_sym.n_value - source_sect.addr),
|
||||
};
|
||||
}
|
||||
const nbase = @intCast(u32, object.in_symtab.?.len);
|
||||
const sect_id = @intCast(u16, atom.sym_index - nbase);
|
||||
const source_sect = object.getSourceSection(sect_id);
|
||||
return .{
|
||||
.base_addr = source_sect.addr,
|
||||
.base_offset = 0,
|
||||
};
|
||||
}
|
||||
|
||||
pub fn parseRelocTarget(
|
||||
zld: *Zld,
|
||||
atom_index: AtomIndex,
|
||||
@ -192,6 +213,52 @@ pub fn parseRelocTarget(
|
||||
|
||||
if (rel.r_extern == 0) {
|
||||
const sect_id = @intCast(u8, rel.r_symbolnum - 1);
|
||||
const ctx = getRelocContext(zld, atom_index);
|
||||
const atom_code = getAtomCode(zld, atom_index);
|
||||
const rel_offset = @intCast(u32, rel.r_address - ctx.base_offset);
|
||||
|
||||
const address_in_section = if (rel.r_pcrel == 0) blk: {
|
||||
break :blk if (rel.r_length == 3)
|
||||
mem.readIntLittle(i64, atom_code[rel_offset..][0..8])
|
||||
else
|
||||
mem.readIntLittle(i32, atom_code[rel_offset..][0..4]);
|
||||
} else blk: {
|
||||
const correction: u3 = switch (@intToEnum(macho.reloc_type_x86_64, rel.r_type)) {
|
||||
.X86_64_RELOC_SIGNED => 0,
|
||||
.X86_64_RELOC_SIGNED_1 => 1,
|
||||
.X86_64_RELOC_SIGNED_2 => 2,
|
||||
.X86_64_RELOC_SIGNED_4 => 4,
|
||||
else => unreachable,
|
||||
};
|
||||
const addend = mem.readIntLittle(i32, atom_code[rel_offset..][0..4]);
|
||||
const target_address = @intCast(i64, ctx.base_addr) + rel.r_address + 4 + correction + addend;
|
||||
break :blk target_address;
|
||||
};
|
||||
|
||||
// Find containing atom
|
||||
const Predicate = struct {
|
||||
addr: i64,
|
||||
|
||||
pub fn predicate(pred: @This(), other: i64) bool {
|
||||
return if (other == -1) true else other > pred.addr;
|
||||
}
|
||||
};
|
||||
|
||||
if (object.source_section_index_lookup[sect_id] > -1) {
|
||||
const first_sym_index = @intCast(usize, object.source_section_index_lookup[sect_id]);
|
||||
const target_sym_index = @import("zld.zig").lsearch(i64, object.source_address_lookup[first_sym_index..], Predicate{
|
||||
.addr = address_in_section,
|
||||
});
|
||||
|
||||
if (target_sym_index > 0) {
|
||||
return SymbolWithLoc{
|
||||
.sym_index = @intCast(u32, first_sym_index + target_sym_index - 1),
|
||||
.file = atom.file,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// Start of section is not contained anywhere, return synthetic atom.
|
||||
const sym_index = object.getSectionAliasSymbolIndex(sect_id);
|
||||
return SymbolWithLoc{ .sym_index = sym_index, .file = atom.file };
|
||||
}
|
||||
@ -405,29 +472,13 @@ pub fn resolveRelocs(
|
||||
const atom = zld.getAtom(atom_index);
|
||||
assert(atom.getFile() != null); // synthetic atoms do not have relocs
|
||||
|
||||
const object = zld.objects.items[atom.getFile().?];
|
||||
const ctx: RelocContext = blk: {
|
||||
if (object.getSourceSymbol(atom.sym_index)) |source_sym| {
|
||||
const source_sect = object.getSourceSection(source_sym.n_sect - 1);
|
||||
break :blk .{
|
||||
.base_addr = source_sect.addr,
|
||||
.base_offset = @intCast(i32, source_sym.n_value - source_sect.addr),
|
||||
};
|
||||
}
|
||||
const nbase = @intCast(u32, object.in_symtab.?.len);
|
||||
const sect_id = @intCast(u16, atom.sym_index - nbase);
|
||||
const source_sect = object.getSourceSection(sect_id);
|
||||
break :blk .{
|
||||
.base_addr = source_sect.addr,
|
||||
.base_offset = 0,
|
||||
};
|
||||
};
|
||||
|
||||
log.debug("resolving relocations in ATOM(%{d}, '{s}')", .{
|
||||
atom.sym_index,
|
||||
zld.getSymbolName(atom.getSymbolWithLoc()),
|
||||
});
|
||||
|
||||
const ctx = getRelocContext(zld, atom_index);
|
||||
|
||||
return switch (arch) {
|
||||
.aarch64 => resolveRelocsArm64(zld, atom_index, atom_code, atom_relocs, reverse_lookup, ctx),
|
||||
.x86_64 => resolveRelocsX86(zld, atom_index, atom_code, atom_relocs, reverse_lookup, ctx),
|
||||
@ -744,8 +795,11 @@ fn resolveRelocsArm64(
|
||||
mem.readIntLittle(i32, atom_code[rel_offset..][0..4]);
|
||||
|
||||
if (rel.r_extern == 0) {
|
||||
const target_sect_base_addr = object.getSourceSection(@intCast(u16, rel.r_symbolnum - 1)).addr;
|
||||
ptr_addend -= @intCast(i64, target_sect_base_addr);
|
||||
const base_addr = if (target.sym_index > object.source_address_lookup.len)
|
||||
@intCast(i64, object.getSourceSection(@intCast(u16, rel.r_symbolnum - 1)).addr)
|
||||
else
|
||||
object.source_address_lookup[target.sym_index];
|
||||
ptr_addend -= base_addr;
|
||||
}
|
||||
|
||||
const result = blk: {
|
||||
@ -878,13 +932,12 @@ fn resolveRelocsX86(
|
||||
var addend = mem.readIntLittle(i32, atom_code[rel_offset..][0..4]) + correction;
|
||||
|
||||
if (rel.r_extern == 0) {
|
||||
// Note for the future self: when r_extern == 0, we should subtract correction from the
|
||||
// addend.
|
||||
const target_sect_base_addr = object.getSourceSection(@intCast(u16, rel.r_symbolnum - 1)).addr;
|
||||
// We need to add base_offset, i.e., offset of this atom wrt to the source
|
||||
// section. Otherwise, the addend will over-/under-shoot.
|
||||
addend += @intCast(i32, @intCast(i64, context.base_addr + rel_offset + 4) -
|
||||
@intCast(i64, target_sect_base_addr) + context.base_offset);
|
||||
const base_addr = if (target.sym_index > object.source_address_lookup.len)
|
||||
@intCast(i64, object.getSourceSection(@intCast(u16, rel.r_symbolnum - 1)).addr)
|
||||
else
|
||||
object.source_address_lookup[target.sym_index];
|
||||
addend += @intCast(i32, @intCast(i64, context.base_addr) + rel.r_address + 4 -
|
||||
@intCast(i64, base_addr));
|
||||
}
|
||||
|
||||
const adjusted_target_addr = @intCast(u64, @intCast(i64, target_addr) + addend);
|
||||
@ -902,8 +955,11 @@ fn resolveRelocsX86(
|
||||
mem.readIntLittle(i32, atom_code[rel_offset..][0..4]);
|
||||
|
||||
if (rel.r_extern == 0) {
|
||||
const target_sect_base_addr = object.getSourceSection(@intCast(u16, rel.r_symbolnum - 1)).addr;
|
||||
addend -= @intCast(i64, target_sect_base_addr);
|
||||
const base_addr = if (target.sym_index > object.source_address_lookup.len)
|
||||
@intCast(i64, object.getSourceSection(@intCast(u16, rel.r_symbolnum - 1)).addr)
|
||||
else
|
||||
object.source_address_lookup[target.sym_index];
|
||||
addend -= base_addr;
|
||||
}
|
||||
|
||||
const result = blk: {
|
||||
|
||||
@ -162,21 +162,6 @@ fn markLive(
|
||||
};
|
||||
const target_sym = zld.getSymbol(target);
|
||||
|
||||
if (rel.r_extern == 0) {
|
||||
// We are pessimistic and mark all atoms within the target section as live.
|
||||
// TODO: this can be improved by marking only the relevant atoms.
|
||||
const sect_id = target_sym.n_sect;
|
||||
const object = zld.objects.items[target.getFile().?];
|
||||
for (object.atoms.items) |other_atom_index| {
|
||||
const other_atom = zld.getAtom(other_atom_index);
|
||||
const other_sym = zld.getSymbol(other_atom.getSymbolWithLoc());
|
||||
if (other_sym.n_sect == sect_id) {
|
||||
markLive(zld, other_atom_index, alive, reverse_lookups);
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (target_sym.undf()) continue;
|
||||
if (target.getFile() == null) {
|
||||
const target_sym_name = zld.getSymbolName(target);
|
||||
|
||||
@ -966,10 +966,9 @@ pub const Zld = struct {
|
||||
|
||||
const global_index = resolver.table.get(sym_name) orelse {
|
||||
const gpa = self.gpa;
|
||||
const name = try resolver.arena.dupe(u8, sym_name);
|
||||
const global_index = @intCast(u32, self.globals.items.len);
|
||||
try self.globals.append(gpa, sym_loc);
|
||||
try resolver.table.putNoClobber(name, global_index);
|
||||
try resolver.table.putNoClobber(sym_name, global_index);
|
||||
if (sym.undf() and !sym.tentative()) {
|
||||
try resolver.unresolved.putNoClobber(global_index, {});
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user