macho: rework symbol handling for incremental stage2 builds

2025-12-15 02:33:07 +00:00 · 2022-07-12 23:03:15 +02:00 · 2022-07-12 23:03:15 +02:00 · d80fcc8a0b
commit d80fcc8a0b
parent eeb6d8f045
5 changed files with 640 additions and 460 deletions
--- a/src/link/MachO.zig
+++ b/src/link/MachO.zig
--- a/src/link/MachO/Atom.zig
+++ b/src/link/MachO/Atom.zig
@ -187,7 +187,7 @@ pub const Relocation = struct {

        const target_sym = macho_file.getSymbol(self.target);
        if (is_via_got) {
-            const got_index = macho_file.got_entries_table.get(self.target) orelse {
+            const got_atom = macho_file.getGotAtomForSymbol(self.target) orelse {
                log.err("expected GOT entry for symbol", .{});
                if (target_sym.undf()) {
                    log.err("  import('{s}')", .{macho_file.getSymbolName(self.target)});
@ -197,14 +197,12 @@ pub const Relocation = struct {
                log.err("  this is an internal linker error", .{});
                return error.FailedToResolveRelocationTarget;
            };
-            return macho_file.got_entries.items[got_index].atom;
+            return got_atom;
        }

-        if (macho_file.stubs_table.get(self.target)) |stub_index| {
-            return macho_file.stubs.items[stub_index].atom;
-        } else if (macho_file.tlv_ptr_entries_table.get(self.target)) |tlv_ptr_index| {
-            return macho_file.tlv_ptr_entries.items[tlv_ptr_index].atom;
-        } else return macho_file.getAtomForSymbol(self.target);
+        if (macho_file.getStubsAtomForSymbol(self.target)) |stubs_atom| return stubs_atom;
+        if (macho_file.getTlvPtrAtomForSymbol(self.target)) |tlv_ptr_atom| return tlv_ptr_atom;
+        return macho_file.getAtomForSymbol(self.target);
    }
 };

@ -402,7 +400,7 @@ pub fn parseRelocs(self: *Atom, relocs: []const macho.relocation_info, context:
                        .n_type = macho.N_SECT,
                        .n_sect = context.macho_file.getSectionOrdinal(match),
                        .n_desc = 0,
-                        .n_value = 0,
+                        .n_value = sect.addr,
                    });
                    try object.sections_as_symbols.putNoClobber(gpa, sect_id, sym_index);
                    break :blk sym_index;
@ -499,8 +497,10 @@ pub fn parseRelocs(self: *Atom, relocs: []const macho.relocation_info, context:
                            // Note for the future self: when r_extern == 0, we should subtract correction from the
                            // addend.
                            const target_sect_base_addr = object.getSection(@intCast(u16, rel.r_symbolnum - 1)).addr;
+                            // We need to add base_offset, i.e., offset of this atom wrt to the source
+                            // section. Otherwise, the addend will over-/under-shoot.
                            addend += @intCast(i64, context.base_addr + offset + 4) -
-                                @intCast(i64, target_sect_base_addr);
+                                @intCast(i64, target_sect_base_addr) + context.base_offset;
                        }
                    },
                    .X86_64_RELOC_TLV => {
--- a/src/link/MachO/DebugSymbols.zig
+++ b/src/link/MachO/DebugSymbols.zig
@ -5,7 +5,7 @@ const build_options = @import("build_options");
 const assert = std.debug.assert;
 const fs = std.fs;
 const link = @import("../../link.zig");
-const log = std.log.scoped(.link);
+const log = std.log.scoped(.dsym);
 const macho = std.macho;
 const makeStaticString = MachO.makeStaticString;
 const math = std.math;
@ -60,7 +60,7 @@ debug_aranges_section_dirty: bool = false,
 debug_info_header_dirty: bool = false,
 debug_line_header_dirty: bool = false,

-strtab: StringTable(.link) = .{},
+strtab: StringTable(.strtab) = .{},

 relocs: std.ArrayListUnmanaged(Reloc) = .{},

--- a/src/link/MachO/Object.zig
+++ b/src/link/MachO/Object.zig
@ -270,7 +270,7 @@ const SymbolAtIndex = struct {

    fn getSymbolName(self: SymbolAtIndex, ctx: Context) []const u8 {
        const sym = self.getSymbol(ctx);
-        if (sym.n_strx == 0) return "";
+        assert(sym.n_strx < ctx.strtab.len);
        return mem.sliceTo(@ptrCast([*:0]const u8, ctx.strtab.ptr + sym.n_strx), 0);
    }

@ -359,15 +359,17 @@ fn filterDice(
    return dices[start..end];
 }

-/// Splits object into atoms assuming whole cache mode aka traditional linking mode.
-pub fn splitIntoAtomsWhole(self: *Object, macho_file: *MachO, object_id: u32) !void {
+/// Splits object into atoms assuming one-shot linking mode.
+pub fn splitIntoAtomsOneShot(self: *Object, macho_file: *MachO, object_id: u32) !void {
+    assert(macho_file.mode == .one_shot);
+
    const tracy = trace(@src());
    defer tracy.end();

    const gpa = macho_file.base.allocator;
    const seg = self.load_commands.items[self.segment_cmd_index.?].segment;

-    log.debug("splitting object({d}, {s}) into atoms: whole cache mode", .{ object_id, self.name });
+    log.debug("splitting object({d}, {s}) into atoms: one-shot mode", .{ object_id, self.name });

    // You would expect that the symbol table is at least pre-sorted based on symbol's type:
    // local < extern defined < undefined. Unfortunately, this is not guaranteed! For instance,
@ -416,11 +418,11 @@ pub fn splitIntoAtomsWhole(self: *Object, macho_file: *MachO, object_id: u32) !v
            log.debug("  unhandled section", .{});
            continue;
        };
-        const target_sect = macho_file.getSection(match);
+
        log.debug("  output sect({d}, '{s},{s}')", .{
            macho_file.getSectionOrdinal(match),
-            target_sect.segName(),
-            target_sect.sectName(),
+            macho_file.getSection(match).segName(),
+            macho_file.getSection(match).sectName(),
        });

        const is_zerofill = blk: {
@ -585,10 +587,19 @@ fn createAtomFromSubsection(
    sect: macho.section_64,
 ) !*Atom {
    const gpa = macho_file.base.allocator;
-    const sym = &self.symtab.items[sym_index];
+    const sym = self.symtab.items[sym_index];
    const atom = try MachO.createEmptyAtom(gpa, sym_index, size, alignment);
    atom.file = object_id;
-    sym.n_sect = macho_file.getSectionOrdinal(match);
+    self.symtab.items[sym_index].n_sect = macho_file.getSectionOrdinal(match);
+
+    log.debug("creating ATOM(%{d}, '{s}') in sect({d}, '{s},{s}') in object({d})", .{
+        sym_index,
+        self.getString(sym.n_strx),
+        macho_file.getSectionOrdinal(match),
+        macho_file.getSection(match).segName(),
+        macho_file.getSection(match).sectName(),
+        object_id,
+    });

    try self.atom_by_index_table.putNoClobber(gpa, sym_index, atom);
    try self.managed_atoms.append(gpa, atom);
@ -669,7 +680,6 @@ fn createAtomFromSubsection(
            // if (zld.globals.contains(zld.getString(sym.strx))) break :blk .global;
            break :blk .static;
        } else null;
-
        atom.contained.appendAssumeCapacity(.{
            .sym_index = inner_sym_index.index,
            .offset = inner_sym.n_value - sym.n_value,
--- a/test/link/macho/objcpp/build.zig
+++ b/test/link/macho/objcpp/build.zig
@ -16,7 +16,6 @@ pub fn build(b: *Builder) void {
    // TODO when we figure out how to ship framework stubs for cross-compilation,
    // populate paths to the sysroot here.
    exe.linkFramework("Foundation");
-    exe.link_gc_sections = true;

    const run_cmd = exe.run();
    run_cmd.expectStdOutEqual("Hello from C++ and Zig");