From 5b5466626810302371b2d21c39e7ad04ea13a8dc Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Mon, 20 Mar 2023 16:12:51 +0100 Subject: [PATCH 01/11] macho+zld: relax assumption about dead strip atoms uniqueness In case the compiler outputted an object file that is not slicable into subsections, entry point may overlap with a section atom which is perfectly fine, so don't panic in that case. --- src/link/MachO/dead_strip.zig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/link/MachO/dead_strip.zig b/src/link/MachO/dead_strip.zig index 9dfd6226b4..a132ecb2de 100644 --- a/src/link/MachO/dead_strip.zig +++ b/src/link/MachO/dead_strip.zig @@ -102,7 +102,7 @@ fn collectRoots(zld: *Zld, roots: *AtomTable) !void { }; if (is_gc_root) { - try roots.putNoClobber(atom_index, {}); + _ = try roots.getOrPut(atom_index); log.debug("root(ATOM({d}, %{d}, {?d}))", .{ atom_index, From a88ffa7fa91e568fe234ac9ea2b15f005876cca6 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Tue, 21 Mar 2023 11:38:19 +0100 Subject: [PATCH 02/11] macho+zld: save locals from section atoms to symtab too --- src/link/MachO/zld.zig | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/src/link/MachO/zld.zig b/src/link/MachO/zld.zig index 931352545e..17d122b30d 100644 --- a/src/link/MachO/zld.zig +++ b/src/link/MachO/zld.zig @@ -2456,6 +2456,17 @@ pub const Zld = struct { try self.writeStrtab(); } + fn addLocalToSymtab(self: *Zld, sym_loc: SymbolWithLoc, locals: *std.ArrayList(macho.nlist_64)) !void { + const sym = self.getSymbol(sym_loc); + if (sym.n_strx == 0) return; // no name, skip + if (sym.ext()) return; // an export lands in its own symtab section, skip + if (self.symbolIsTemp(sym_loc)) return; // local temp symbol, skip + + var out_sym = sym; + out_sym.n_strx = try self.strtab.insert(self.gpa, self.getSymbolName(sym_loc)); + try locals.append(out_sym); + } + fn writeSymtab(self: *Zld) !SymtabCtx { const gpa = self.gpa; @@ -2466,14 +2477,12 @@ pub const Zld = struct { for (object.atoms.items) |atom_index| { const atom = self.getAtom(atom_index); const sym_loc = atom.getSymbolWithLoc(); - const sym = self.getSymbol(sym_loc); - if (sym.n_strx == 0) continue; // no name, skip - if (sym.ext()) continue; // an export lands in its own symtab section, skip - if (self.symbolIsTemp(sym_loc)) continue; // local temp symbol, skip + try self.addLocalToSymtab(sym_loc, &locals); - var out_sym = sym; - out_sym.n_strx = try self.strtab.insert(gpa, self.getSymbolName(sym_loc)); - try locals.append(out_sym); + var it = Atom.getInnerSymbolsIterator(self, atom_index); + while (it.next()) |inner_sym_loc| { + try self.addLocalToSymtab(inner_sym_loc, &locals); + } } } From 073f9a18a92fd233e07470e737e15e651618e47f Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Tue, 21 Mar 2023 11:38:49 +0100 Subject: [PATCH 03/11] macho+zld: return null rather than error on invalid AbbrevKind --- src/link/MachO/DwarfInfo.zig | 21 +++------------------ 1 file changed, 3 insertions(+), 18 deletions(-) diff --git a/src/link/MachO/DwarfInfo.zig b/src/link/MachO/DwarfInfo.zig index 1ec4a79871..3218435734 100644 --- a/src/link/MachO/DwarfInfo.zig +++ b/src/link/MachO/DwarfInfo.zig @@ -27,7 +27,7 @@ const CompileUnitIterator = struct { pub fn next(self: *CompileUnitIterator) !?CompileUnit { if (self.pos >= self.ctx.debug_info.len) return null; - var stream = std.io.fixedBufferStream(self.ctx.debug_info); + var stream = std.io.fixedBufferStream(self.ctx.debug_info[self.pos..]); var creader = std.io.countingReader(stream.reader()); const reader = creader.reader(); @@ -37,7 +37,7 @@ const CompileUnitIterator = struct { const cu = CompileUnit{ .cuh = cuh, - .debug_info_off = offset, + .debug_info_off = self.pos + offset, }; self.pos += (math.cast(usize, total_length) orelse return error.Overflow); @@ -188,7 +188,7 @@ const AbbrevEntryIterator = struct { return AbbrevEntry.null(); } - const abbrev_pos = lookup.get(kind) orelse return error.MalformedDwarf; + const abbrev_pos = lookup.get(kind) orelse return null; const len = try findAbbrevEntrySize( self.ctx, abbrev_pos.pos, @@ -290,21 +290,6 @@ pub const Attribute = struct { }; } - pub fn getReference(self: Attribute, ctx: DwarfInfo) !?u64 { - const debug_info = self.getDebugInfo(ctx); - var stream = std.io.fixedBufferStream(debug_info); - const reader = stream.reader(); - - return switch (self.form) { - dwarf.FORM.ref1 => debug_info[0], - dwarf.FORM.ref2 => mem.readIntLittle(u16, debug_info[0..2]), - dwarf.FORM.ref4 => mem.readIntLittle(u32, debug_info[0..4]), - dwarf.FORM.ref8 => mem.readIntLittle(u64, debug_info[0..8]), - dwarf.FORM.ref_udata => try leb.readULEB128(u64, reader), - else => null, - }; - } - pub fn getAddr(self: Attribute, ctx: DwarfInfo, cuh: CompileUnit.Header) ?u64 { if (self.form != dwarf.FORM.addr) return null; const debug_info = self.getDebugInfo(ctx); From b73159f4f57dcdffab526bb2c944b37942d65fc8 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Tue, 21 Mar 2023 12:47:43 +0100 Subject: [PATCH 04/11] macho: use TOOL=0x5 to mean ZIG as the build tool --- lib/std/macho.zig | 2 ++ src/link/MachO/load_commands.zig | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/std/macho.zig b/lib/std/macho.zig index 8f695b14b7..ff12e718f6 100644 --- a/lib/std/macho.zig +++ b/lib/std/macho.zig @@ -143,6 +143,8 @@ pub const TOOL = enum(u32) { CLANG = 0x1, SWIFT = 0x2, LD = 0x3, + LLD = 0x4, // LLVM's stock LLD linker + ZIG = 0x5, // Unofficially Zig _, }; diff --git a/src/link/MachO/load_commands.zig b/src/link/MachO/load_commands.zig index a452551a0a..43469ac435 100644 --- a/src/link/MachO/load_commands.zig +++ b/src/link/MachO/load_commands.zig @@ -294,7 +294,7 @@ pub fn writeBuildVersionLC(options: *const link.Options, lc_writer: anytype) !vo .ntools = 1, }); try lc_writer.writeAll(mem.asBytes(&macho.build_tool_version{ - .tool = .LD, + .tool = .ZIG, .version = 0x0, })); } From 83352678d433c9ffbda23b88066f628ab9d1c76d Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Tue, 21 Mar 2023 14:30:30 +0100 Subject: [PATCH 05/11] macho+zld: put __TEXT bound sections in __TEXT segment --- src/link/MachO/zld.zig | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/src/link/MachO/zld.zig b/src/link/MachO/zld.zig index 17d122b30d..7a15782ae6 100644 --- a/src/link/MachO/zld.zig +++ b/src/link/MachO/zld.zig @@ -477,9 +477,9 @@ pub const Zld = struct { mem.eql(u8, sectname, "__gosymtab") or mem.eql(u8, sectname, "__gopclntab")) { - break :blk self.getSectionByName("__DATA_CONST", "__const") orelse try self.initSection( - "__DATA_CONST", - "__const", + break :blk self.getSectionByName("__TEXT", sectname) orelse try self.initSection( + "__TEXT", + sectname, .{}, ); } @@ -490,15 +490,13 @@ pub const Zld = struct { mem.eql(u8, sectname, "__objc_classlist") or mem.eql(u8, sectname, "__objc_imageinfo")) { - break :blk self.getSectionByName("__DATA_CONST", sectname) orelse - try self.initSection( + break :blk self.getSectionByName("__DATA_CONST", sectname) orelse try self.initSection( "__DATA_CONST", sectname, .{}, ); } else if (mem.eql(u8, sectname, "__data")) { - break :blk self.getSectionByName("__DATA", "__data") orelse - try self.initSection( + break :blk self.getSectionByName("__DATA", "__data") orelse try self.initSection( "__DATA", "__data", .{}, From cb34d6f4362c8f5d806c0d0b328ea29e877d5c5f Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Tue, 21 Mar 2023 14:43:02 +0100 Subject: [PATCH 06/11] macho+zld: put locals and globals in function-starts section --- src/link/MachO/zld.zig | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/src/link/MachO/zld.zig b/src/link/MachO/zld.zig index 7a15782ae6..845a12276f 100644 --- a/src/link/MachO/zld.zig +++ b/src/link/MachO/zld.zig @@ -2298,9 +2298,16 @@ pub const Zld = struct { const asc_u64 = std.sort.asc(u64); + fn addSymbolToFunctionStarts(self: *Zld, sym_loc: SymbolWithLoc, addresses: *std.ArrayList(u64)) !void { + const sym = self.getSymbol(sym_loc); + if (sym.n_strx == 0) return; + if (sym.n_desc == N_DEAD) return; + if (self.symbolIsTemp(sym_loc)) return; + try addresses.append(sym.n_value); + } + fn writeFunctionStarts(self: *Zld) !void { const text_seg_index = self.getSegmentByName("__TEXT") orelse return; - const text_sect_index = self.getSectionByName("__TEXT", "__text") orelse return; const text_seg = self.segments.items[text_seg_index]; const gpa = self.gpa; @@ -2308,17 +2315,18 @@ pub const Zld = struct { // We need to sort by address first var addresses = std.ArrayList(u64).init(gpa); defer addresses.deinit(); - try addresses.ensureTotalCapacityPrecise(self.globals.items.len); - for (self.globals.items) |global| { - const sym = self.getSymbol(global); - if (sym.undf()) continue; - if (sym.n_desc == N_DEAD) continue; + for (self.objects.items) |object| { + for (object.exec_atoms.items) |atom_index| { + const atom = self.getAtom(atom_index); + const sym_loc = atom.getSymbolWithLoc(); + try self.addSymbolToFunctionStarts(sym_loc, &addresses); - const sect_id = sym.n_sect - 1; - if (sect_id != text_sect_index) continue; - - addresses.appendAssumeCapacity(sym.n_value); + var it = Atom.getInnerSymbolsIterator(self, atom_index); + while (it.next()) |inner_sym_loc| { + try self.addSymbolToFunctionStarts(inner_sym_loc, &addresses); + } + } } std.sort.sort(u64, addresses.items, {}, asc_u64); @@ -2457,6 +2465,7 @@ pub const Zld = struct { fn addLocalToSymtab(self: *Zld, sym_loc: SymbolWithLoc, locals: *std.ArrayList(macho.nlist_64)) !void { const sym = self.getSymbol(sym_loc); if (sym.n_strx == 0) return; // no name, skip + if (sym.n_desc == N_DEAD) return; // garbage-collected, skip if (sym.ext()) return; // an export lands in its own symtab section, skip if (self.symbolIsTemp(sym_loc)) return; // local temp symbol, skip From dc98009e36a344f8d0330af6b9e9226a2ba6a474 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Tue, 21 Mar 2023 16:12:25 +0100 Subject: [PATCH 07/11] macho+zld: save all defined globals in the export trie --- src/link/MachO/zld.zig | 39 +++++++++++---------------------------- 1 file changed, 11 insertions(+), 28 deletions(-) diff --git a/src/link/MachO/zld.zig b/src/link/MachO/zld.zig index 845a12276f..fe0ab16c92 100644 --- a/src/link/MachO/zld.zig +++ b/src/link/MachO/zld.zig @@ -2141,35 +2141,18 @@ pub const Zld = struct { const exec_segment = self.segments.items[segment_index]; const base_address = exec_segment.vmaddr; - if (self.options.output_mode == .Exe) { - for (&[_]SymbolWithLoc{ - self.getEntryPoint(), - self.globals.items[self.mh_execute_header_index.?], - }) |global| { - const sym = self.getSymbol(global); - const sym_name = self.getSymbolName(global); - log.debug(" (putting '{s}' defined at 0x{x})", .{ sym_name, sym.n_value }); - try trie.put(gpa, .{ - .name = sym_name, - .vmaddr_offset = sym.n_value - base_address, - .export_flags = macho.EXPORT_SYMBOL_FLAGS_KIND_REGULAR, - }); - } - } else { - assert(self.options.output_mode == .Lib); - for (self.globals.items) |global| { - const sym = self.getSymbol(global); - if (sym.undf()) continue; - if (sym.n_desc == N_DEAD) continue; + for (self.globals.items) |global| { + const sym = self.getSymbol(global); + if (sym.undf()) continue; + if (sym.n_desc == N_DEAD) continue; - const sym_name = self.getSymbolName(global); - log.debug(" (putting '{s}' defined at 0x{x})", .{ sym_name, sym.n_value }); - try trie.put(gpa, .{ - .name = sym_name, - .vmaddr_offset = sym.n_value - base_address, - .export_flags = macho.EXPORT_SYMBOL_FLAGS_KIND_REGULAR, - }); - } + const sym_name = self.getSymbolName(global); + log.debug(" (putting '{s}' defined at 0x{x})", .{ sym_name, sym.n_value }); + try trie.put(gpa, .{ + .name = sym_name, + .vmaddr_offset = sym.n_value - base_address, + .export_flags = macho.EXPORT_SYMBOL_FLAGS_KIND_REGULAR, + }); } try trie.finalize(gpa); From 1be86218153ae77109d785aafb29430f787adefd Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Tue, 21 Mar 2023 21:27:17 +0100 Subject: [PATCH 08/11] macho+zld: when finding by address, note the end of section symbols too Previously, if we were looking for the very last symbol by address in some section, and the next symbol happened to also have the same address value but would reside in a different section, we would keep going finding the wrong symbol in the wrong section. This mechanism turns out vital for correct linking of Go binaries where the runtime looks for specially crafted synthetic symbols which mark the beginning and end of each section. In this case, we had an unfortunate clash between the end of PC marked machine code section (`_runtime.etext`) and beginning of read-only data (`_runtime.rodata`). --- src/link/MachO/Object.zig | 66 +++++++++++++++++++++----------------- src/link/MachO/ZldAtom.zig | 3 +- 2 files changed, 39 insertions(+), 30 deletions(-) diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index fdcdb47224..a3e322179d 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -50,7 +50,7 @@ reverse_symtab_lookup: []u32 = undefined, /// Can be undefined as set together with in_symtab. source_address_lookup: []i64 = undefined, /// Can be undefined as set together with in_symtab. -source_section_index_lookup: []i64 = undefined, +source_section_index_lookup: []Entry = undefined, /// Can be undefined as set together with in_symtab. strtab_lookup: []u32 = undefined, /// Can be undefined as set together with in_symtab. @@ -58,7 +58,7 @@ atom_by_index_table: []AtomIndex = undefined, /// Can be undefined as set together with in_symtab. globals_lookup: []i64 = undefined, /// Can be undefined as set together with in_symtab. -relocs_lookup: []RelocEntry = undefined, +relocs_lookup: []Entry = undefined, /// All relocations sorted and flatened, sorted by address descending /// per section. @@ -81,11 +81,14 @@ unwind_info_sect_id: ?u8 = null, unwind_relocs_lookup: []Record = undefined, unwind_records_lookup: std.AutoHashMapUnmanaged(AtomIndex, u32) = .{}, -const RelocEntry = struct { start: u32, len: u32 }; +const Entry = struct { + start: u32 = 0, + len: u32 = 0, +}; const Record = struct { dead: bool, - reloc: RelocEntry, + reloc: Entry, }; pub fn deinit(self: *Object, gpa: Allocator) void { @@ -170,11 +173,11 @@ pub fn parse(self: *Object, allocator: Allocator, cpu_arch: std.Target.Cpu.Arch) self.strtab_lookup = try allocator.alloc(u32, self.in_symtab.?.len); self.globals_lookup = try allocator.alloc(i64, self.in_symtab.?.len); self.atom_by_index_table = try allocator.alloc(AtomIndex, self.in_symtab.?.len + nsects); - self.relocs_lookup = try allocator.alloc(RelocEntry, self.in_symtab.?.len + nsects); + self.relocs_lookup = try allocator.alloc(Entry, self.in_symtab.?.len + nsects); // This is wasteful but we need to be able to lookup source symbol address after stripping and // allocating of sections. self.source_address_lookup = try allocator.alloc(i64, self.in_symtab.?.len); - self.source_section_index_lookup = try allocator.alloc(i64, nsects); + self.source_section_index_lookup = try allocator.alloc(Entry, nsects); for (self.symtab) |*sym| { sym.* = .{ @@ -188,11 +191,8 @@ pub fn parse(self: *Object, allocator: Allocator, cpu_arch: std.Target.Cpu.Arch) mem.set(i64, self.globals_lookup, -1); mem.set(AtomIndex, self.atom_by_index_table, 0); - mem.set(i64, self.source_section_index_lookup, -1); - mem.set(RelocEntry, self.relocs_lookup, .{ - .start = 0, - .len = 0, - }); + mem.set(Entry, self.source_section_index_lookup, .{}); + mem.set(Entry, self.relocs_lookup, .{}); // You would expect that the symbol table is at least pre-sorted based on symbol's type: // local < extern defined < undefined. Unfortunately, this is not guaranteed! For instance, @@ -211,12 +211,24 @@ pub fn parse(self: *Object, allocator: Allocator, cpu_arch: std.Target.Cpu.Arch) // is kind enough to specify the symbols in the correct order. sort.sort(SymbolAtIndex, sorted_all_syms.items, self, SymbolAtIndex.lessThan); + var prev_sect_id: u8 = 0; + var section_index_lookup: ?Entry = null; for (sorted_all_syms.items, 0..) |sym_id, i| { const sym = sym_id.getSymbol(self); - if (sym.sect() and self.source_section_index_lookup[sym.n_sect - 1] == -1) { - self.source_section_index_lookup[sym.n_sect - 1] = @intCast(i64, i); + if (section_index_lookup) |*lookup| { + if (sym.n_sect != prev_sect_id or sym.undf()) { + self.source_section_index_lookup[prev_sect_id - 1] = lookup.*; + section_index_lookup = null; + } else { + lookup.len += 1; + } } + if (sym.sect() and section_index_lookup == null) { + section_index_lookup = .{ .start = @intCast(u32, i), .len = 1 }; + } + + prev_sect_id = sym.n_sect; self.symtab[i] = sym; self.source_symtab_lookup[i] = sym_id.index; @@ -234,13 +246,7 @@ pub fn parse(self: *Object, allocator: Allocator, cpu_arch: std.Target.Cpu.Arch) self.unwind_info_sect_id = self.getSourceSectionIndexByName("__LD", "__compact_unwind"); if (self.hasUnwindRecords()) { self.unwind_relocs_lookup = try allocator.alloc(Record, self.getUnwindRecords().len); - mem.set(Record, self.unwind_relocs_lookup, .{ - .dead = true, - .reloc = .{ - .start = 0, - .len = 0, - }, - }); + mem.set(Record, self.unwind_relocs_lookup, .{ .dead = true, .reloc = .{} }); } } @@ -620,7 +626,7 @@ fn filterRelocs( relocs: []align(1) const macho.relocation_info, start_addr: u64, end_addr: u64, -) RelocEntry { +) Entry { const Predicate = struct { addr: u64, @@ -712,9 +718,9 @@ fn parseEhFrameSection(self: *Object, zld: *Zld, object_id: u32) !void { while (try it.next()) |record| { const offset = it.pos - record.getSize(); - const rel_pos = switch (cpu_arch) { + const rel_pos: Entry = switch (cpu_arch) { .aarch64 => filterRelocs(relocs, offset, offset + record.getSize()), - .x86_64 => RelocEntry{ .start = 0, .len = 0 }, + .x86_64 => .{}, else => unreachable, }; self.eh_frame_relocs_lookup.putAssumeCapacityNoClobber(offset, .{ @@ -990,13 +996,15 @@ pub fn getSymbolByAddress(self: Object, addr: u64, sect_hint: ?u8) u32 { }; if (sect_hint) |sect_id| { - if (self.source_section_index_lookup[sect_id] > -1) { - const first_sym_index = @intCast(usize, self.source_section_index_lookup[sect_id]); - const target_sym_index = @import("zld.zig").lsearch(i64, self.source_address_lookup[first_sym_index..], Predicate{ - .addr = @intCast(i64, addr), - }); + if (self.source_section_index_lookup[sect_id].len > 0) { + const lookup = self.source_section_index_lookup[sect_id]; + const target_sym_index = @import("zld.zig").lsearch( + i64, + self.source_address_lookup[lookup.start..][0..lookup.len], + Predicate{ .addr = @intCast(i64, addr) }, + ); if (target_sym_index > 0) { - return @intCast(u32, first_sym_index + target_sym_index - 1); + return @intCast(u32, lookup.start + target_sym_index - 1); } } return self.getSectionAliasSymbolIndex(sect_id); diff --git a/src/link/MachO/ZldAtom.zig b/src/link/MachO/ZldAtom.zig index e3d5f62a12..eb5e1c6ded 100644 --- a/src/link/MachO/ZldAtom.zig +++ b/src/link/MachO/ZldAtom.zig @@ -790,10 +790,11 @@ fn resolveRelocsX86( const target = parseRelocTarget(zld, atom_index, rel); const rel_offset = @intCast(u32, rel.r_address - context.base_offset); - log.debug(" RELA({s}) @ {x} => %{d} in object({?})", .{ + log.debug(" RELA({s}) @ {x} => %{d} ('{s}') in object({?})", .{ @tagName(rel_type), rel.r_address, target.sym_index, + zld.getSymbolName(target), target.getFile(), }); From 8bffe87e9eeaf602d06eec60dffc955a86228fbd Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Tue, 21 Mar 2023 21:31:24 +0100 Subject: [PATCH 09/11] macho: collect all exports into the export trie --- src/link/MachO.zig | 39 +++++++++++---------------------------- src/link/MachO/zld.zig | 4 +--- 2 files changed, 12 insertions(+), 31 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 151b947141..2f594d1fda 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -3340,36 +3340,19 @@ fn collectExportData(self: *MachO, trie: *Trie) !void { const exec_segment = self.segments.items[self.header_segment_cmd_index.?]; const base_address = exec_segment.vmaddr; - if (self.base.options.output_mode == .Exe) { - for (&[_]SymbolWithLoc{ - try self.getEntryPoint(), - self.getGlobal("__mh_execute_header").?, - }) |global| { - const sym = self.getSymbol(global); - const sym_name = self.getSymbolName(global); - log.debug(" (putting '{s}' defined at 0x{x})", .{ sym_name, sym.n_value }); - try trie.put(gpa, .{ - .name = sym_name, - .vmaddr_offset = sym.n_value - base_address, - .export_flags = macho.EXPORT_SYMBOL_FLAGS_KIND_REGULAR, - }); - } - } else { - assert(self.base.options.output_mode == .Lib); - for (self.globals.items) |global| { - const sym = self.getSymbol(global); + for (self.globals.items) |global| { + const sym = self.getSymbol(global); - if (sym.undf()) continue; - if (!sym.ext()) continue; + if (sym.undf()) continue; + if (!sym.ext()) continue; - const sym_name = self.getSymbolName(global); - log.debug(" (putting '{s}' defined at 0x{x})", .{ sym_name, sym.n_value }); - try trie.put(gpa, .{ - .name = sym_name, - .vmaddr_offset = sym.n_value - base_address, - .export_flags = macho.EXPORT_SYMBOL_FLAGS_KIND_REGULAR, - }); - } + const sym_name = self.getSymbolName(global); + log.debug(" (putting '{s}' defined at 0x{x})", .{ sym_name, sym.n_value }); + try trie.put(gpa, .{ + .name = sym_name, + .vmaddr_offset = sym.n_value - base_address, + .export_flags = macho.EXPORT_SYMBOL_FLAGS_KIND_REGULAR, + }); } try trie.finalize(gpa); diff --git a/src/link/MachO/zld.zig b/src/link/MachO/zld.zig index fe0ab16c92..27a0fa5579 100644 --- a/src/link/MachO/zld.zig +++ b/src/link/MachO/zld.zig @@ -2158,9 +2158,7 @@ pub const Zld = struct { try trie.finalize(gpa); } - fn writeDyldInfoData( - self: *Zld, - ) !void { + fn writeDyldInfoData(self: *Zld) !void { const gpa = self.gpa; var rebase = Rebase{}; From c984201ddb10d2977290c6f1d6857e78573a2dff Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Wed, 22 Mar 2023 13:57:43 +0100 Subject: [PATCH 10/11] macho+zld: refactor parsing of relocation target --- src/link/MachO/Object.zig | 26 ++++----- src/link/MachO/UnwindInfo.zig | 74 ++++++----------------- src/link/MachO/ZldAtom.zig | 107 +++++++++++++++++++++++----------- src/link/MachO/dead_strip.zig | 77 ++++++++++++++++-------- src/link/MachO/eh_frame.zig | 26 ++++----- src/link/MachO/thunks.zig | 11 +++- src/link/MachO/zld.zig | 37 ++++++------ 7 files changed, 198 insertions(+), 160 deletions(-) diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index a3e322179d..c6b86cce63 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -735,13 +735,12 @@ fn parseEhFrameSection(self: *Object, zld: *Zld, object_id: u32) !void { assert(rel_pos.len > 0); // TODO convert to an error as the FDE eh frame is malformed // Find function symbol that this record describes const rel = relocs[rel_pos.start..][rel_pos.len - 1]; - const target = UnwindInfo.parseRelocTarget( - zld, - object_id, - rel, - it.data[offset..], - @intCast(i32, offset), - ); + const target = Atom.parseRelocTarget(zld, .{ + .object_id = object_id, + .rel = rel, + .code = it.data[offset..], + .base_offset = @intCast(i32, offset), + }); break :blk target; }, .x86_64 => { @@ -825,13 +824,12 @@ fn parseUnwindInfo(self: *Object, zld: *Zld, object_id: u32) !void { // Find function symbol that this record describes const rel = relocs[rel_pos.start..][rel_pos.len - 1]; - const target = UnwindInfo.parseRelocTarget( - zld, - object_id, - rel, - mem.asBytes(&record), - @intCast(i32, offset), - ); + const target = Atom.parseRelocTarget(zld, .{ + .object_id = object_id, + .rel = rel, + .code = mem.asBytes(&record), + .base_offset = @intCast(i32, offset), + }); log.debug("unwind record {d} tracks {s}", .{ record_id, zld.getSymbolName(target) }); if (target.getFile() != object_id) { self.unwind_relocs_lookup[record_id].dead = true; diff --git a/src/link/MachO/UnwindInfo.zig b/src/link/MachO/UnwindInfo.zig index c64e617a35..e59f5fe250 100644 --- a/src/link/MachO/UnwindInfo.zig +++ b/src/link/MachO/UnwindInfo.zig @@ -218,13 +218,12 @@ pub fn scanRelocs(zld: *Zld) !void { record_id, )) |rel| { // Personality function; add GOT pointer. - const target = parseRelocTarget( - zld, - @intCast(u32, object_id), - rel, - mem.asBytes(&record), - @intCast(i32, record_id * @sizeOf(macho.compact_unwind_entry)), - ); + const target = Atom.parseRelocTarget(zld, .{ + .object_id = @intCast(u32, object_id), + .rel = rel, + .code = mem.asBytes(&record), + .base_offset = @intCast(i32, record_id * @sizeOf(macho.compact_unwind_entry)), + }); try Atom.addGotEntry(zld, target); } } @@ -266,13 +265,12 @@ pub fn collect(info: *UnwindInfo, zld: *Zld) !void { @intCast(u32, object_id), record_id, )) |rel| { - const target = parseRelocTarget( - zld, - @intCast(u32, object_id), - rel, - mem.asBytes(&record), - @intCast(i32, record_id * @sizeOf(macho.compact_unwind_entry)), - ); + const target = Atom.parseRelocTarget(zld, .{ + .object_id = @intCast(u32, object_id), + .rel = rel, + .code = mem.asBytes(&record), + .base_offset = @intCast(i32, record_id * @sizeOf(macho.compact_unwind_entry)), + }); const personality_index = info.getPersonalityFunction(target) orelse inner: { const personality_index = info.personalities_count; info.personalities[personality_index] = target; @@ -285,13 +283,12 @@ pub fn collect(info: *UnwindInfo, zld: *Zld) !void { } if (getLsdaReloc(zld, @intCast(u32, object_id), record_id)) |rel| { - const target = parseRelocTarget( - zld, - @intCast(u32, object_id), - rel, - mem.asBytes(&record), - @intCast(i32, record_id * @sizeOf(macho.compact_unwind_entry)), - ); + const target = Atom.parseRelocTarget(zld, .{ + .object_id = @intCast(u32, object_id), + .rel = rel, + .code = mem.asBytes(&record), + .base_offset = @intCast(i32, record_id * @sizeOf(macho.compact_unwind_entry)), + }); record.lsda = @bitCast(u64, target); } } @@ -668,41 +665,6 @@ pub fn write(info: *UnwindInfo, zld: *Zld) !void { try zld.file.pwriteAll(buffer.items, sect.offset); } -pub fn parseRelocTarget( - zld: *Zld, - object_id: u32, - rel: macho.relocation_info, - code: []const u8, - base_offset: i32, -) SymbolWithLoc { - const tracy = trace(@src()); - defer tracy.end(); - - const object = &zld.objects.items[object_id]; - - const sym_index = if (rel.r_extern == 0) blk: { - const sect_id = @intCast(u8, rel.r_symbolnum - 1); - const rel_offset = @intCast(u32, rel.r_address - base_offset); - assert(rel.r_pcrel == 0 and rel.r_length == 3); - const address_in_section = mem.readIntLittle(u64, code[rel_offset..][0..8]); - const sym_index = object.getSymbolByAddress(address_in_section, sect_id); - break :blk sym_index; - } else object.reverse_symtab_lookup[rel.r_symbolnum]; - - const sym_loc = SymbolWithLoc{ .sym_index = sym_index, .file = object_id + 1 }; - const sym = zld.getSymbol(sym_loc); - - if (sym.sect() and !sym.ext()) { - // Make sure we are not dealing with a local alias. - const atom_index = object.getAtomIndexForSymbol(sym_index) orelse - return sym_loc; - const atom = zld.getAtom(atom_index); - return atom.getSymbolWithLoc(); - } else if (object.getGlobal(sym_index)) |global_index| { - return zld.globals.items[global_index]; - } else return sym_loc; -} - fn getRelocs(zld: *Zld, object_id: u32, record_id: usize) []const macho.relocation_info { const object = &zld.objects.items[object_id]; assert(object.hasUnwindRecords()); diff --git a/src/link/MachO/ZldAtom.zig b/src/link/MachO/ZldAtom.zig index eb5e1c6ded..c47bce7c17 100644 --- a/src/link/MachO/ZldAtom.zig +++ b/src/link/MachO/ZldAtom.zig @@ -15,6 +15,7 @@ const macho = std.macho; const math = std.math; const mem = std.mem; const meta = std.meta; +const trace = @import("../../tracy.zig").trace; const Allocator = mem.Allocator; const Arch = std.Target.Cpu.Arch; @@ -163,7 +164,7 @@ pub fn scanAtomRelocs(zld: *Zld, atom_index: AtomIndex, relocs: []align(1) const } const RelocContext = struct { - base_addr: u64 = 0, + base_addr: i64 = 0, base_offset: i32 = 0, }; @@ -175,7 +176,7 @@ pub fn getRelocContext(zld: *Zld, atom_index: AtomIndex) RelocContext { if (object.getSourceSymbol(atom.sym_index)) |source_sym| { const source_sect = object.getSourceSection(source_sym.n_sect - 1); return .{ - .base_addr = source_sect.addr, + .base_addr = @intCast(i64, source_sect.addr), .base_offset = @intCast(i32, source_sym.n_value - source_sect.addr), }; } @@ -183,55 +184,71 @@ pub fn getRelocContext(zld: *Zld, atom_index: AtomIndex) RelocContext { const sect_id = @intCast(u8, atom.sym_index - nbase); const source_sect = object.getSourceSection(sect_id); return .{ - .base_addr = source_sect.addr, + .base_addr = @intCast(i64, source_sect.addr), .base_offset = 0, }; } -pub fn parseRelocTarget(zld: *Zld, atom_index: AtomIndex, rel: macho.relocation_info) SymbolWithLoc { - const atom = zld.getAtom(atom_index); - const object = &zld.objects.items[atom.getFile().?]; +pub fn parseRelocTarget(zld: *Zld, ctx: struct { + object_id: u32, + rel: macho.relocation_info, + code: []const u8, + base_addr: i64 = 0, + base_offset: i32 = 0, +}) SymbolWithLoc { + const tracy = trace(@src()); + defer tracy.end(); - const sym_index = if (rel.r_extern == 0) sym_index: { - const sect_id = @intCast(u8, rel.r_symbolnum - 1); - const ctx = getRelocContext(zld, atom_index); - const atom_code = getAtomCode(zld, atom_index); - const rel_offset = @intCast(u32, rel.r_address - ctx.base_offset); + const object = &zld.objects.items[ctx.object_id]; + log.debug("parsing reloc target in object({d}) '{s}' ", .{ ctx.object_id, object.name }); - const address_in_section = if (rel.r_pcrel == 0) blk: { - break :blk if (rel.r_length == 3) - mem.readIntLittle(u64, atom_code[rel_offset..][0..8]) + const sym_index = if (ctx.rel.r_extern == 0) sym_index: { + const sect_id = @intCast(u8, ctx.rel.r_symbolnum - 1); + const rel_offset = @intCast(u32, ctx.rel.r_address - ctx.base_offset); + + const address_in_section = if (ctx.rel.r_pcrel == 0) blk: { + break :blk if (ctx.rel.r_length == 3) + mem.readIntLittle(u64, ctx.code[rel_offset..][0..8]) else - mem.readIntLittle(u32, atom_code[rel_offset..][0..4]); + mem.readIntLittle(u32, ctx.code[rel_offset..][0..4]); } else blk: { - const correction: u3 = switch (@intToEnum(macho.reloc_type_x86_64, rel.r_type)) { + assert(zld.options.target.cpu.arch == .x86_64); + const correction: u3 = switch (@intToEnum(macho.reloc_type_x86_64, ctx.rel.r_type)) { .X86_64_RELOC_SIGNED => 0, .X86_64_RELOC_SIGNED_1 => 1, .X86_64_RELOC_SIGNED_2 => 2, .X86_64_RELOC_SIGNED_4 => 4, else => unreachable, }; - const addend = mem.readIntLittle(i32, atom_code[rel_offset..][0..4]); - const target_address = @intCast(i64, ctx.base_addr) + rel.r_address + 4 + correction + addend; + const addend = mem.readIntLittle(i32, ctx.code[rel_offset..][0..4]); + const target_address = @intCast(i64, ctx.base_addr) + ctx.rel.r_address + 4 + correction + addend; break :blk @intCast(u64, target_address); }; // Find containing atom + log.debug(" | locating symbol by address @{x} in section {d}", .{ address_in_section, sect_id }); const sym_index = object.getSymbolByAddress(address_in_section, sect_id); break :sym_index sym_index; - } else object.reverse_symtab_lookup[rel.r_symbolnum]; + } else object.reverse_symtab_lookup[ctx.rel.r_symbolnum]; - const sym_loc = SymbolWithLoc{ - .sym_index = sym_index, - .file = atom.file, - }; + const sym_loc = SymbolWithLoc{ .sym_index = sym_index, .file = ctx.object_id + 1 }; const sym = zld.getSymbol(sym_loc); - - if (sym.sect() and !sym.ext()) { - return sym_loc; - } else if (object.getGlobal(sym_index)) |global_index| { - return zld.globals.items[global_index]; - } else return sym_loc; + const target = target: { + if (sym.sect() and !sym.ext()) { + // Make sure we are not dealing with a local alias. + const atom_index = object.getAtomIndexForSymbol(sym_index) orelse break :target sym_loc; + const atom = zld.getAtom(atom_index); + break :target atom.getSymbolWithLoc(); + } else if (object.getGlobal(sym_index)) |global_index| { + break :target zld.globals.items[global_index]; + } else break :target sym_loc; + }; + log.debug(" | target %{d} ('{s}') in object({?d})", .{ + target.sym_index, + zld.getSymbolName(target), + target.getFile(), + }); + return target; } pub fn getRelocTargetAtomIndex(zld: *Zld, target: SymbolWithLoc, is_via_got: bool) ?AtomIndex { @@ -499,13 +516,25 @@ fn resolveRelocsArm64( atom.getFile(), }); - subtractor = parseRelocTarget(zld, atom_index, rel); + subtractor = parseRelocTarget(zld, .{ + .object_id = atom.getFile().?, + .rel = rel, + .code = atom_code, + .base_addr = context.base_addr, + .base_offset = context.base_offset, + }); continue; }, else => {}, } - const target = parseRelocTarget(zld, atom_index, rel); + const target = parseRelocTarget(zld, .{ + .object_id = atom.getFile().?, + .rel = rel, + .code = atom_code, + .base_addr = context.base_addr, + .base_offset = context.base_offset, + }); const rel_offset = @intCast(u32, rel.r_address - context.base_offset); log.debug(" RELA({s}) @ {x} => %{d} ('{s}') in object({?})", .{ @@ -781,13 +810,25 @@ fn resolveRelocsX86( atom.getFile(), }); - subtractor = parseRelocTarget(zld, atom_index, rel); + subtractor = parseRelocTarget(zld, .{ + .object_id = atom.getFile().?, + .rel = rel, + .code = atom_code, + .base_addr = context.base_addr, + .base_offset = context.base_offset, + }); continue; }, else => {}, } - const target = parseRelocTarget(zld, atom_index, rel); + const target = parseRelocTarget(zld, .{ + .object_id = atom.getFile().?, + .rel = rel, + .code = atom_code, + .base_addr = context.base_addr, + .base_offset = context.base_offset, + }); const rel_offset = @intCast(u32, rel.r_address - context.base_offset); log.debug(" RELA({s}) @ {x} => %{d} ('{s}') in object({?})", .{ diff --git a/src/link/MachO/dead_strip.zig b/src/link/MachO/dead_strip.zig index a132ecb2de..cd64e72170 100644 --- a/src/link/MachO/dead_strip.zig +++ b/src/link/MachO/dead_strip.zig @@ -130,14 +130,29 @@ fn markLive(zld: *Zld, atom_index: AtomIndex, alive: *AtomTable) void { const header = zld.sections.items(.header)[sym.n_sect - 1]; if (header.isZerofill()) return; + const code = Atom.getAtomCode(zld, atom_index); const relocs = Atom.getAtomRelocs(zld, atom_index); + const ctx = Atom.getRelocContext(zld, atom_index); + for (relocs) |rel| { const target = switch (cpu_arch) { .aarch64 => switch (@intToEnum(macho.reloc_type_arm64, rel.r_type)) { .ARM64_RELOC_ADDEND => continue, - else => Atom.parseRelocTarget(zld, atom_index, rel), + else => Atom.parseRelocTarget(zld, .{ + .object_id = atom.getFile().?, + .rel = rel, + .code = code, + .base_offset = ctx.base_offset, + .base_addr = ctx.base_addr, + }), }, - .x86_64 => Atom.parseRelocTarget(zld, atom_index, rel), + .x86_64 => Atom.parseRelocTarget(zld, .{ + .object_id = atom.getFile().?, + .rel = rel, + .code = code, + .base_offset = ctx.base_offset, + .base_addr = ctx.base_addr, + }), else => unreachable, }; const target_sym = zld.getSymbol(target); @@ -175,14 +190,29 @@ fn refersLive(zld: *Zld, atom_index: AtomIndex, alive: AtomTable) bool { const header = zld.sections.items(.header)[sym.n_sect - 1]; assert(!header.isZerofill()); + const code = Atom.getAtomCode(zld, atom_index); const relocs = Atom.getAtomRelocs(zld, atom_index); + const ctx = Atom.getRelocContext(zld, atom_index); + for (relocs) |rel| { const target = switch (cpu_arch) { .aarch64 => switch (@intToEnum(macho.reloc_type_arm64, rel.r_type)) { .ARM64_RELOC_ADDEND => continue, - else => Atom.parseRelocTarget(zld, atom_index, rel), + else => Atom.parseRelocTarget(zld, .{ + .object_id = atom.getFile().?, + .rel = rel, + .code = code, + .base_offset = ctx.base_offset, + .base_addr = ctx.base_addr, + }), }, - .x86_64 => Atom.parseRelocTarget(zld, atom_index, rel), + .x86_64 => Atom.parseRelocTarget(zld, .{ + .object_id = atom.getFile().?, + .rel = rel, + .code = code, + .base_offset = ctx.base_offset, + .base_addr = ctx.base_addr, + }), else => unreachable, }; @@ -283,13 +313,12 @@ fn markUnwindRecords(zld: *Zld, object_id: u32, alive: *AtomTable) !void { try markEhFrameRecord(zld, object_id, atom_index, alive); } else { if (UnwindInfo.getPersonalityFunctionReloc(zld, object_id, record_id)) |rel| { - const target = UnwindInfo.parseRelocTarget( - zld, - object_id, - rel, - mem.asBytes(&record), - @intCast(i32, record_id * @sizeOf(macho.compact_unwind_entry)), - ); + const target = Atom.parseRelocTarget(zld, .{ + .object_id = object_id, + .rel = rel, + .code = mem.asBytes(&record), + .base_offset = @intCast(i32, record_id * @sizeOf(macho.compact_unwind_entry)), + }); const target_sym = zld.getSymbol(target); if (!target_sym.undf()) { const target_object = zld.objects.items[target.getFile().?]; @@ -299,13 +328,12 @@ fn markUnwindRecords(zld: *Zld, object_id: u32, alive: *AtomTable) !void { } if (UnwindInfo.getLsdaReloc(zld, object_id, record_id)) |rel| { - const target = UnwindInfo.parseRelocTarget( - zld, - object_id, - rel, - mem.asBytes(&record), - @intCast(i32, record_id * @sizeOf(macho.compact_unwind_entry)), - ); + const target = Atom.parseRelocTarget(zld, .{ + .object_id = object_id, + .rel = rel, + .code = mem.asBytes(&record), + .base_offset = @intCast(i32, record_id * @sizeOf(macho.compact_unwind_entry)), + }); const target_object = zld.objects.items[target.getFile().?]; const target_atom_index = target_object.getAtomIndexForSymbol(target.sym_index).?; markLive(zld, target_atom_index, alive); @@ -333,13 +361,12 @@ fn markEhFrameRecord(zld: *Zld, object_id: u32, atom_index: AtomIndex, alive: *A // Mark FDE references which should include any referenced LSDA record const relocs = eh_frame.getRelocs(zld, object_id, fde_offset); for (relocs) |rel| { - const target = UnwindInfo.parseRelocTarget( - zld, - object_id, - rel, - fde.data, - @intCast(i32, fde_offset) + 4, - ); + const target = Atom.parseRelocTarget(zld, .{ + .object_id = object_id, + .rel = rel, + .code = fde.data, + .base_offset = @intCast(i32, fde_offset) + 4, + }); const target_sym = zld.getSymbol(target); if (!target_sym.undf()) blk: { const target_object = zld.objects.items[target.getFile().?]; diff --git a/src/link/MachO/eh_frame.zig b/src/link/MachO/eh_frame.zig index 5420bf6c29..7c5c5b7c25 100644 --- a/src/link/MachO/eh_frame.zig +++ b/src/link/MachO/eh_frame.zig @@ -308,13 +308,12 @@ pub fn EhFrameRecord(comptime is_mutable: bool) type { }, else => unreachable, } - const target = UnwindInfo.parseRelocTarget( - zld, - object_id, - rel, - rec.data, - @intCast(i32, source_offset) + 4, - ); + const target = Atom.parseRelocTarget(zld, .{ + .object_id = object_id, + .rel = rel, + .code = rec.data, + .base_offset = @intCast(i32, source_offset) + 4, + }); return target; } return null; @@ -331,13 +330,12 @@ pub fn EhFrameRecord(comptime is_mutable: bool) type { const relocs = getRelocs(zld, object_id, ctx.source_offset); for (relocs) |rel| { - const target = UnwindInfo.parseRelocTarget( - zld, - object_id, - rel, - rec.data, - @intCast(i32, ctx.source_offset) + 4, - ); + const target = Atom.parseRelocTarget(zld, .{ + .object_id = object_id, + .rel = rel, + .code = rec.data, + .base_offset = @intCast(i32, ctx.source_offset) + 4, + }); const rel_offset = @intCast(u32, rel.r_address - @intCast(i32, ctx.source_offset) - 4); const source_addr = ctx.sect_addr + rel_offset + ctx.out_offset + 4; diff --git a/src/link/MachO/thunks.zig b/src/link/MachO/thunks.zig index ce3fda0b1f..afea08750c 100644 --- a/src/link/MachO/thunks.zig +++ b/src/link/MachO/thunks.zig @@ -225,11 +225,20 @@ fn scanRelocs( break :blk @intCast(i32, source_sym.n_value - source_sect.addr); } else 0; + const code = Atom.getAtomCode(zld, atom_index); const relocs = Atom.getAtomRelocs(zld, atom_index); + const ctx = Atom.getRelocContext(zld, atom_index); + for (relocs) |rel| { if (!relocNeedsThunk(rel)) continue; - const target = Atom.parseRelocTarget(zld, atom_index, rel); + const target = Atom.parseRelocTarget(zld, .{ + .object_id = atom.getFile().?, + .rel = rel, + .code = code, + .base_offset = ctx.base_offset, + .base_addr = ctx.base_addr, + }); if (isReachable(zld, atom_index, rel, base_offset, target, allocated)) continue; log.debug("{x}: source = {s}@{x}, target = {s}@{x} unreachable", .{ diff --git a/src/link/MachO/zld.zig b/src/link/MachO/zld.zig index 27a0fa5579..07241b54cd 100644 --- a/src/link/MachO/zld.zig +++ b/src/link/MachO/zld.zig @@ -1884,13 +1884,9 @@ pub const Zld = struct { if (should_rebase) { log.debug(" ATOM({d}, %{d}, '{s}')", .{ atom_index, atom.sym_index, self.getSymbolName(atom.getSymbolWithLoc()) }); - const object = self.objects.items[atom.getFile().?]; - const base_rel_offset: i32 = blk: { - const source_sym = object.getSourceSymbol(atom.sym_index) orelse break :blk 0; - const source_sect = object.getSourceSection(source_sym.n_sect - 1); - break :blk @intCast(i32, source_sym.n_value - source_sect.addr); - }; + const code = Atom.getAtomCode(self, atom_index); const relocs = Atom.getAtomRelocs(self, atom_index); + const ctx = Atom.getRelocContext(self, atom_index); for (relocs) |rel| { switch (cpu_arch) { @@ -1906,12 +1902,18 @@ pub const Zld = struct { }, else => unreachable, } - const target = Atom.parseRelocTarget(self, atom_index, rel); + const target = Atom.parseRelocTarget(self, .{ + .object_id = atom.getFile().?, + .rel = rel, + .code = code, + .base_offset = ctx.base_offset, + .base_addr = ctx.base_addr, + }); const target_sym = self.getSymbol(target); if (target_sym.undf()) continue; const base_offset = @intCast(i32, sym.n_value - segment.vmaddr); - const rel_offset = rel.r_address - base_rel_offset; + const rel_offset = rel.r_address - ctx.base_offset; const offset = @intCast(u64, base_offset + rel_offset); log.debug(" | rebase at {x}", .{offset}); @@ -2021,13 +2023,9 @@ pub const Zld = struct { }; if (should_bind) { - const object = self.objects.items[atom.getFile().?]; - const base_rel_offset: i32 = blk: { - const source_sym = object.getSourceSymbol(atom.sym_index) orelse break :blk 0; - const source_sect = object.getSourceSection(source_sym.n_sect - 1); - break :blk @intCast(i32, source_sym.n_value - source_sect.addr); - }; + const code = Atom.getAtomCode(self, atom_index); const relocs = Atom.getAtomRelocs(self, atom_index); + const ctx = Atom.getRelocContext(self, atom_index); for (relocs) |rel| { switch (cpu_arch) { @@ -2044,15 +2042,20 @@ pub const Zld = struct { else => unreachable, } - const global = Atom.parseRelocTarget(self, atom_index, rel); + const global = Atom.parseRelocTarget(self, .{ + .object_id = atom.getFile().?, + .rel = rel, + .code = code, + .base_offset = ctx.base_offset, + .base_addr = ctx.base_addr, + }); const bind_sym_name = self.getSymbolName(global); const bind_sym = self.getSymbol(global); if (!bind_sym.undf()) continue; const base_offset = sym.n_value - segment.vmaddr; - const rel_offset = @intCast(u32, rel.r_address - base_rel_offset); + const rel_offset = @intCast(u32, rel.r_address - ctx.base_offset); const offset = @intCast(u64, base_offset + rel_offset); - const code = Atom.getAtomCode(self, atom_index); const addend = mem.readIntLittle(i64, code[rel_offset..][0..8]); const dylib_ordinal = @divTrunc(@bitCast(i16, bind_sym.n_desc), macho.N_SYMBOL_RESOLVER); From 1eb4264b7aa9e9e2b8ec46a95b508dfa7a7ab0f7 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Wed, 22 Mar 2023 15:13:52 +0100 Subject: [PATCH 11/11] macho+zld: make sure we populate source section index lookup if no undefs --- src/link/MachO/Object.zig | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index c6b86cce63..e407457e03 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -239,6 +239,12 @@ pub fn parse(self: *Object, allocator: Allocator, cpu_arch: std.Target.Cpu.Arch) self.strtab_lookup[i] = @intCast(u32, sym_name_len); } + // If there were no undefined symbols, make sure we populate the + // source section index lookup for the last scanned section. + if (section_index_lookup) |lookup| { + self.source_section_index_lookup[prev_sect_id - 1] = lookup; + } + // Parse __TEXT,__eh_frame header if one exists self.eh_frame_sect_id = self.getSourceSectionIndexByName("__TEXT", "__eh_frame");