macho: remove sorting sections and refactor atom parsing in objects

2026-02-21 16:54:52 +00:00 · 2021-08-27 20:32:11 +02:00 · 2021-08-27 20:32:11 +02:00 · a14e98fcac
commit a14e98fcac
parent ad4a8e7665
2 changed files with 29 additions and 233 deletions
--- a/src/link/MachO.zig
+++ b/src/link/MachO.zig
@ -782,7 +782,6 @@ pub fn flush(self: *MachO, comp: *Compilation) !void {
            }

            try self.parseTextBlocks();
-            // try self.sortSections();
            try self.allocateTextSegment();
            try self.allocateDataConstSegment();
            try self.allocateDataSegment();
@ -1500,158 +1499,6 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio
    return res;
 }

-fn sortSections(self: *MachO) !void {
-    var text_index_mapping = std.AutoHashMap(u16, u16).init(self.base.allocator);
-    defer text_index_mapping.deinit();
-    var data_const_index_mapping = std.AutoHashMap(u16, u16).init(self.base.allocator);
-    defer data_const_index_mapping.deinit();
-    var data_index_mapping = std.AutoHashMap(u16, u16).init(self.base.allocator);
-    defer data_index_mapping.deinit();
-
-    {
-        // __TEXT segment
-        const seg = &self.load_commands.items[self.text_segment_cmd_index.?].Segment;
-        var sections = seg.sections.toOwnedSlice(self.base.allocator);
-        defer self.base.allocator.free(sections);
-        try seg.sections.ensureCapacity(self.base.allocator, sections.len);
-
-        const indices = &[_]*?u16{
-            &self.text_section_index,
-            &self.stubs_section_index,
-            &self.stub_helper_section_index,
-            &self.gcc_except_tab_section_index,
-            &self.cstring_section_index,
-            &self.ustring_section_index,
-            &self.text_const_section_index,
-            &self.objc_methlist_section_index,
-            &self.objc_methname_section_index,
-            &self.objc_methtype_section_index,
-            &self.objc_classname_section_index,
-            &self.eh_frame_section_index,
-        };
-        for (indices) |maybe_index| {
-            const new_index: u16 = if (maybe_index.*) |index| blk: {
-                const idx = @intCast(u16, seg.sections.items.len);
-                seg.sections.appendAssumeCapacity(sections[index]);
-                try text_index_mapping.putNoClobber(index, idx);
-                break :blk idx;
-            } else continue;
-            maybe_index.* = new_index;
-        }
-    }
-
-    {
-        // __DATA_CONST segment
-        const seg = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment;
-        var sections = seg.sections.toOwnedSlice(self.base.allocator);
-        defer self.base.allocator.free(sections);
-        try seg.sections.ensureCapacity(self.base.allocator, sections.len);
-
-        const indices = &[_]*?u16{
-            &self.got_section_index,
-            &self.mod_init_func_section_index,
-            &self.mod_term_func_section_index,
-            &self.data_const_section_index,
-            &self.objc_cfstring_section_index,
-            &self.objc_classlist_section_index,
-            &self.objc_imageinfo_section_index,
-        };
-        for (indices) |maybe_index| {
-            const new_index: u16 = if (maybe_index.*) |index| blk: {
-                const idx = @intCast(u16, seg.sections.items.len);
-                seg.sections.appendAssumeCapacity(sections[index]);
-                try data_const_index_mapping.putNoClobber(index, idx);
-                break :blk idx;
-            } else continue;
-            maybe_index.* = new_index;
-        }
-    }
-
-    {
-        // __DATA segment
-        const seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment;
-        var sections = seg.sections.toOwnedSlice(self.base.allocator);
-        defer self.base.allocator.free(sections);
-        try seg.sections.ensureCapacity(self.base.allocator, sections.len);
-
-        // __DATA segment
-        const indices = &[_]*?u16{
-            &self.la_symbol_ptr_section_index,
-            &self.objc_const_section_index,
-            &self.objc_selrefs_section_index,
-            &self.objc_classrefs_section_index,
-            &self.objc_data_section_index,
-            &self.data_section_index,
-            &self.tlv_section_index,
-            &self.tlv_data_section_index,
-            &self.tlv_bss_section_index,
-            &self.bss_section_index,
-        };
-        for (indices) |maybe_index| {
-            const new_index: u16 = if (maybe_index.*) |index| blk: {
-                const idx = @intCast(u16, seg.sections.items.len);
-                seg.sections.appendAssumeCapacity(sections[index]);
-                try data_index_mapping.putNoClobber(index, idx);
-                break :blk idx;
-            } else continue;
-            maybe_index.* = new_index;
-        }
-    }
-
-    {
-        var transient: std.AutoHashMapUnmanaged(MatchingSection, *TextBlock) = .{};
-        try transient.ensureCapacity(self.base.allocator, self.blocks.count());
-
-        var it = self.blocks.iterator();
-        while (it.next()) |entry| {
-            const old = entry.key_ptr.*;
-            const sect = if (old.seg == self.text_segment_cmd_index.?)
-                text_index_mapping.get(old.sect).?
-            else if (old.seg == self.data_const_segment_cmd_index.?)
-                data_const_index_mapping.get(old.sect).?
-            else
-                data_index_mapping.get(old.sect).?;
-            transient.putAssumeCapacityNoClobber(.{
-                .seg = old.seg,
-                .sect = sect,
-            }, entry.value_ptr.*);
-        }
-
-        self.blocks.clearAndFree(self.base.allocator);
-        self.blocks.deinit(self.base.allocator);
-        self.blocks = transient;
-    }
-
-    {
-        // Create new section ordinals.
-        self.section_ordinals.clearRetainingCapacity();
-        const text_seg = self.load_commands.items[self.text_segment_cmd_index.?].Segment;
-        for (text_seg.sections.items) |_, sect_id| {
-            const res = self.section_ordinals.getOrPutAssumeCapacity(.{
-                .seg = self.text_segment_cmd_index.?,
-                .sect = @intCast(u16, sect_id),
-            });
-            assert(!res.found_existing);
-        }
-        const data_const_seg = self.load_commands.items[self.data_const_segment_cmd_index.?].Segment;
-        for (data_const_seg.sections.items) |_, sect_id| {
-            const res = self.section_ordinals.getOrPutAssumeCapacity(.{
-                .seg = self.data_const_segment_cmd_index.?,
-                .sect = @intCast(u16, sect_id),
-            });
-            assert(!res.found_existing);
-        }
-        const data_seg = self.load_commands.items[self.data_segment_cmd_index.?].Segment;
-        for (data_seg.sections.items) |_, sect_id| {
-            const res = self.section_ordinals.getOrPutAssumeCapacity(.{
-                .seg = self.data_segment_cmd_index.?,
-                .sect = @intCast(u16, sect_id),
-            });
-            assert(!res.found_existing);
-        }
-    }
-}
-
 fn allocateTextSegment(self: *MachO) !void {
    const seg = &self.load_commands.items[self.text_segment_cmd_index.?].Segment;
    const base_vmaddr = self.load_commands.items[self.pagezero_segment_cmd_index.?].Segment.inner.vmsize;
@ -1894,7 +1741,7 @@ fn writeTextBlocks(self: *MachO) !void {
    }
 }

-fn createEmptyAtom(self: *MachO, local_sym_index: u32, size: u64, alignment: u32) !*TextBlock {
+pub fn createEmptyAtom(self: *MachO, local_sym_index: u32, size: u64, alignment: u32) !*TextBlock {
    const code = try self.base.allocator.alloc(u8, size);
    defer self.base.allocator.free(code);
    mem.set(u8, code, 0);
@ -1924,8 +1771,12 @@ pub fn allocateAtom(self: *MachO, atom: *TextBlock, match: MatchingSection) !u64
    const vaddr = mem.alignForwardGeneric(u64, base_addr, atom_alignment);
    log.debug("allocating atom for symbol {s} at address 0x{x}", .{ self.getString(sym.n_strx), vaddr });

-    // TODO we should check if we need to expand the section or not like we
-    // do in `allocateTextBlock`.
+    const expand_section = true;
+    if (expand_section) {
+        // Expand the section, possibly shifting all the atoms for the sections following it.
+        // It might also be needed to shift entire segments too if there is not enough
+        // padding left.
+    }
    const n_sect = @intCast(u8, self.section_ordinals.getIndex(match).? + 1);
    sym.n_value = vaddr;
    sym.n_sect = n_sect;
--- a/src/link/MachO/Object.zig
+++ b/src/link/MachO/Object.zig
@ -1,6 +1,7 @@
 const Object = @This();

 const std = @import("std");
+const build_options = @import("build_options");
 const assert = std.debug.assert;
 const dwarf = std.dwarf;
 const fs = std.fs;
@ -405,15 +406,9 @@ const TextBlockParser = struct {
            break :blk .static;
        } else null;

-        const block = try context.allocator.create(TextBlock);
-        block.* = TextBlock.empty;
-        block.local_sym_index = senior_nlist.index;
+        const block = try context.macho_file.createEmptyAtom(senior_nlist.index, size, actual_align);
        block.stab = stab;
-        block.size = size;
-        block.alignment = actual_align;
-        try context.macho_file.managed_blocks.append(context.allocator, block);
-
-        try block.code.appendSlice(context.allocator, code);
+        mem.copy(u8, block.code.items, code);

        try block.aliases.ensureTotalCapacity(context.allocator, aliases.items.len);
        for (aliases.items) |alias| {
@ -458,6 +453,7 @@ pub fn parseTextBlocks(
    object_id: u16,
    macho_file: *MachO,
 ) !void {
+    const use_stage1 = build_options.is_stage1 and macho_file.base.options.use_stage1;
    const seg = self.load_commands.items[self.segment_cmd_index.?].Segment;

    log.debug("analysing {s}", .{self.name});
@ -508,6 +504,7 @@ pub fn parseTextBlocks(
            log.debug("unhandled section", .{});
            continue;
        };
+        // TODO allocate section here.

        // Read section's code
        var code = try allocator.alloc(u8, @intCast(usize, sect.size));
@ -568,18 +565,17 @@ pub fn parseTextBlocks(
                        try self.sections_as_symbols.putNoClobber(allocator, sect_id, block_local_sym_index);
                        break :blk block_local_sym_index;
                    };
-
                    const block_code = code[0 .. first_nlist.n_value - sect.addr];
                    const block_size = block_code.len;
+                    const block = try macho_file.createEmptyAtom(block_local_sym_index, block_size, sect.@"align");

-                    const block = try allocator.create(TextBlock);
-                    block.* = TextBlock.empty;
-                    block.local_sym_index = block_local_sym_index;
-                    block.size = block_size;
-                    block.alignment = sect.@"align";
-                    try macho_file.managed_blocks.append(allocator, block);
+                    if (use_stage1) {
+                        try macho_file.allocateAtomStage1(block, match);
+                    } else {
+                        _ = try macho_file.allocateAtom(block, match);
+                    }

-                    try block.code.appendSlice(allocator, block_code);
+                    mem.copy(u8, block.code.items, block_code);

                    try block.parseRelocs(relocs, .{
                        .base_addr = 0,
@ -601,25 +597,6 @@ pub fn parseTextBlocks(
                        }
                    }

-                    // Update target section's metadata
-                    // TODO should we update segment's size here too?
-                    // How does it tie with incremental space allocs?
-                    const tseg = &macho_file.load_commands.items[match.seg].Segment;
-                    const tsect = &tseg.sections.items[match.sect];
-                    const new_alignment = math.max(tsect.@"align", block.alignment);
-                    const new_alignment_pow_2 = try math.powi(u32, 2, new_alignment);
-                    const new_size = mem.alignForwardGeneric(u64, tsect.size, new_alignment_pow_2) + block.size;
-                    tsect.size = new_size;
-                    tsect.@"align" = new_alignment;
-
-                    if (macho_file.blocks.getPtr(match)) |last| {
-                        last.*.next = block;
-                        block.prev = last.*;
-                        last.* = block;
-                    } else {
-                        try macho_file.blocks.putNoClobber(allocator, match, block);
-                    }
-
                    try self.text_blocks.append(allocator, block);
                }

@ -666,23 +643,10 @@ pub fn parseTextBlocks(
                        }
                    }

-                    // Update target section's metadata
-                    // TODO should we update segment's size here too?
-                    // How does it tie with incremental space allocs?
-                    const tseg = &macho_file.load_commands.items[match.seg].Segment;
-                    const tsect = &tseg.sections.items[match.sect];
-                    const new_alignment = math.max(tsect.@"align", block.alignment);
-                    const new_alignment_pow_2 = try math.powi(u32, 2, new_alignment);
-                    const new_size = mem.alignForwardGeneric(u64, tsect.size, new_alignment_pow_2) + block.size;
-                    tsect.size = new_size;
-                    tsect.@"align" = new_alignment;
-
-                    if (macho_file.blocks.getPtr(match)) |last| {
-                        last.*.next = block;
-                        block.prev = last.*;
-                        last.* = block;
+                    if (use_stage1) {
+                        try macho_file.allocateAtomStage1(block, match);
                    } else {
-                        try macho_file.blocks.putNoClobber(allocator, match, block);
+                        _ = try macho_file.allocateAtom(block, match);
                    }

                    try self.text_blocks.append(allocator, block);
@ -713,15 +677,15 @@ pub fn parseTextBlocks(
                try self.sections_as_symbols.putNoClobber(allocator, sect_id, block_local_sym_index);
                break :blk block_local_sym_index;
            };
+            const block = try macho_file.createEmptyAtom(block_local_sym_index, sect.size, sect.@"align");

-            const block = try allocator.create(TextBlock);
-            block.* = TextBlock.empty;
-            block.local_sym_index = block_local_sym_index;
-            block.size = sect.size;
-            block.alignment = sect.@"align";
-            try macho_file.managed_blocks.append(allocator, block);
+            if (use_stage1) {
+                try macho_file.allocateAtomStage1(block, match);
+            } else {
+                _ = try macho_file.allocateAtom(block, match);
+            }

-            try block.code.appendSlice(allocator, code);
+            mem.copy(u8, block.code.items, code);

            try block.parseRelocs(relocs, .{
                .base_addr = 0,
@ -779,25 +743,6 @@ pub fn parseTextBlocks(
                });
            }

-            // Update target section's metadata
-            // TODO should we update segment's size here too?
-            // How does it tie with incremental space allocs?
-            const tseg = &macho_file.load_commands.items[match.seg].Segment;
-            const tsect = &tseg.sections.items[match.sect];
-            const new_alignment = math.max(tsect.@"align", block.alignment);
-            const new_alignment_pow_2 = try math.powi(u32, 2, new_alignment);
-            const new_size = mem.alignForwardGeneric(u64, tsect.size, new_alignment_pow_2) + block.size;
-            tsect.size = new_size;
-            tsect.@"align" = new_alignment;
-
-            if (macho_file.blocks.getPtr(match)) |last| {
-                last.*.next = block;
-                block.prev = last.*;
-                last.* = block;
-            } else {
-                try macho_file.blocks.putNoClobber(allocator, match, block);
-            }
-
            try self.text_blocks.append(allocator, block);
        }
    }