diff --git a/CMakeLists.txt b/CMakeLists.txt index 83352beea8..661535e2d6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -583,7 +583,6 @@ set(ZIG_STAGE2_SOURCES "${CMAKE_SOURCE_DIR}/src/link/MachO/Object.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/TextBlock.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/Trie.zig" - "${CMAKE_SOURCE_DIR}/src/link/MachO/Zld.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/bind.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/commands.zig" "${CMAKE_SOURCE_DIR}/src/link/Wasm.zig" diff --git a/src/link/MachO.zig b/src/link/MachO.zig index ace95297d2..66a1ee4e04 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -20,16 +20,19 @@ const target_util = @import("../target.zig"); const trace = @import("../tracy.zig").trace; const Allocator = mem.Allocator; +const Archive = @import("MachO/Archive.zig"); const Cache = @import("../Cache.zig"); const CodeSignature = @import("MachO/CodeSignature.zig"); const Compilation = @import("../Compilation.zig"); const DebugSymbols = @import("MachO/DebugSymbols.zig"); +const Dylib = @import("MachO/Dylib.zig"); +const Object = @import("MachO/Object.zig"); const LoadCommand = commands.LoadCommand; const Module = @import("../Module.zig"); const File = link.File; +pub const TextBlock = @import("MachO/TextBlock.zig"); const Trie = @import("MachO/Trie.zig"); const SegmentCommand = commands.SegmentCommand; -const Zld = @import("MachO/Zld.zig"); pub const base_tag: File.Tag = File.Tag.macho; @@ -47,63 +50,83 @@ page_size: u16, /// potential future extensions. header_pad: u16 = 0x1000, -/// Table of all load commands -load_commands: std.ArrayListUnmanaged(LoadCommand) = .{}, -/// __PAGEZERO segment -pagezero_segment_cmd_index: ?u16 = null, -/// __TEXT segment -text_segment_cmd_index: ?u16 = null, -/// __DATA_CONST segment -data_const_segment_cmd_index: ?u16 = null, -/// __DATA segment -data_segment_cmd_index: ?u16 = null, -/// __LINKEDIT segment -linkedit_segment_cmd_index: ?u16 = null, -/// Dyld info -dyld_info_cmd_index: ?u16 = null, -/// Symbol table -symtab_cmd_index: ?u16 = null, -/// Dynamic symbol table -dysymtab_cmd_index: ?u16 = null, -/// Path to dyld linker -dylinker_cmd_index: ?u16 = null, -/// Path to libSystem -libsystem_cmd_index: ?u16 = null, -/// Data-in-code section of __LINKEDIT segment -data_in_code_cmd_index: ?u16 = null, -/// Address to entry point function -function_starts_cmd_index: ?u16 = null, -/// Main/entry point -/// Specifies offset wrt __TEXT segment start address to the main entry point -/// of the binary. -main_cmd_index: ?u16 = null, -/// Minimum OS version -version_min_cmd_index: ?u16 = null, -/// Source version -source_version_cmd_index: ?u16 = null, -/// UUID load command -uuid_cmd_index: ?u16 = null, -/// Code signature -code_signature_cmd_index: ?u16 = null, - -/// Index into __TEXT,__text section. -text_section_index: ?u16 = null, -/// Index into __TEXT,__stubs section. -stubs_section_index: ?u16 = null, -/// Index into __TEXT,__stub_helper section. -stub_helper_section_index: ?u16 = null, -/// Index into __DATA_CONST,__got section. -got_section_index: ?u16 = null, -/// Index into __DATA,__la_symbol_ptr section. -la_symbol_ptr_section_index: ?u16 = null, -/// Index into __DATA,__data section. -data_section_index: ?u16 = null, /// The absolute address of the entry point. entry_addr: ?u64 = null, +objects: std.ArrayListUnmanaged(*Object) = .{}, +archives: std.ArrayListUnmanaged(*Archive) = .{}, +dylibs: std.ArrayListUnmanaged(*Dylib) = .{}, + +next_dylib_ordinal: u16 = 1, + +load_commands: std.ArrayListUnmanaged(LoadCommand) = .{}, + +pagezero_segment_cmd_index: ?u16 = null, +text_segment_cmd_index: ?u16 = null, +data_const_segment_cmd_index: ?u16 = null, +data_segment_cmd_index: ?u16 = null, +linkedit_segment_cmd_index: ?u16 = null, +dyld_info_cmd_index: ?u16 = null, +symtab_cmd_index: ?u16 = null, +dysymtab_cmd_index: ?u16 = null, +dylinker_cmd_index: ?u16 = null, +data_in_code_cmd_index: ?u16 = null, +function_starts_cmd_index: ?u16 = null, +main_cmd_index: ?u16 = null, +dylib_id_cmd_index: ?u16 = null, +version_min_cmd_index: ?u16 = null, +source_version_cmd_index: ?u16 = null, +uuid_cmd_index: ?u16 = null, +code_signature_cmd_index: ?u16 = null, +/// Path to libSystem +/// TODO this is obsolete, remove it. +libsystem_cmd_index: ?u16 = null, + +// __TEXT segment sections +text_section_index: ?u16 = null, +stubs_section_index: ?u16 = null, +stub_helper_section_index: ?u16 = null, +text_const_section_index: ?u16 = null, +cstring_section_index: ?u16 = null, +ustring_section_index: ?u16 = null, +gcc_except_tab_section_index: ?u16 = null, +unwind_info_section_index: ?u16 = null, +eh_frame_section_index: ?u16 = null, + +objc_methlist_section_index: ?u16 = null, +objc_methname_section_index: ?u16 = null, +objc_methtype_section_index: ?u16 = null, +objc_classname_section_index: ?u16 = null, + +// __DATA_CONST segment sections +got_section_index: ?u16 = null, +mod_init_func_section_index: ?u16 = null, +mod_term_func_section_index: ?u16 = null, +data_const_section_index: ?u16 = null, + +objc_cfstring_section_index: ?u16 = null, +objc_classlist_section_index: ?u16 = null, +objc_imageinfo_section_index: ?u16 = null, + +// __DATA segment sections +tlv_section_index: ?u16 = null, +tlv_data_section_index: ?u16 = null, +tlv_bss_section_index: ?u16 = null, +la_symbol_ptr_section_index: ?u16 = null, +data_section_index: ?u16 = null, +bss_section_index: ?u16 = null, +common_section_index: ?u16 = null, + +objc_const_section_index: ?u16 = null, +objc_selrefs_section_index: ?u16 = null, +objc_classrefs_section_index: ?u16 = null, +objc_data_section_index: ?u16 = null, + locals: std.ArrayListUnmanaged(macho.nlist_64) = .{}, globals: std.ArrayListUnmanaged(macho.nlist_64) = .{}, imports: std.ArrayListUnmanaged(macho.nlist_64) = .{}, +undefs: std.ArrayListUnmanaged(macho.nlist_64) = .{}, +tentatives: std.ArrayListUnmanaged(macho.nlist_64) = .{}, symbol_resolver: std.StringArrayHashMapUnmanaged(SymbolWithLoc) = .{}, locals_free_list: std.ArrayListUnmanaged(u32) = .{}, @@ -133,6 +156,9 @@ export_info_dirty: bool = false, strtab_dirty: bool = false, strtab_needs_relocation: bool = false, +has_dices: bool = false, +has_stabs: bool = false, + /// A list of text blocks that have surplus capacity. This list can have false /// positives, as functions grow and shrink over time, only sometimes being added /// or removed from the freelist. @@ -153,6 +179,8 @@ text_block_free_list: std.ArrayListUnmanaged(*TextBlock) = .{}, /// Pointer to the last allocated text block last_text_block: ?*TextBlock = null, +blocks: std.AutoHashMapUnmanaged(MatchingSection, *TextBlock) = .{}, + /// A list of all PIE fixups required for this run of the linker. /// Warning, this is currently NOT thread-safe. See the TODO below. /// TODO Move this list inside `updateDecl` where it should be allocated @@ -236,71 +264,7 @@ const LIB_SYSTEM_PATH: [*:0]const u8 = DEFAULT_LIB_SEARCH_PATH ++ "/libSystem.B. /// it as a possible place to put new symbols, it must have enough room for this many bytes /// (plus extra for reserved capacity). const minimum_text_block_size = 64; -const min_text_capacity = padToIdeal(minimum_text_block_size); - -pub const TextBlock = struct { - /// Each decl always gets a local symbol with the fully qualified name. - /// The vaddr and size are found here directly. - /// The file offset is found by computing the vaddr offset from the section vaddr - /// the symbol references, and adding that to the file offset of the section. - /// If this field is 0, it means the codegen size = 0 and there is no symbol or - /// offset table entry. - local_sym_index: u32, - /// Size of this text block - /// Unlike in Elf, we need to store the size of this symbol as part of - /// the TextBlock since macho.nlist_64 lacks this information. - size: u64, - /// Points to the previous and next neighbours - prev: ?*TextBlock, - next: ?*TextBlock, - - /// Previous/next linked list pointers. - /// This is the linked list node for this Decl's corresponding .debug_info tag. - dbg_info_prev: ?*TextBlock, - dbg_info_next: ?*TextBlock, - /// Offset into .debug_info pointing to the tag for this Decl. - dbg_info_off: u32, - /// Size of the .debug_info tag for this Decl, not including padding. - dbg_info_len: u32, - - pub const empty = TextBlock{ - .local_sym_index = 0, - .size = 0, - .prev = null, - .next = null, - .dbg_info_prev = null, - .dbg_info_next = null, - .dbg_info_off = undefined, - .dbg_info_len = undefined, - }; - - /// Returns how much room there is to grow in virtual address space. - /// File offset relocation happens transparently, so it is not included in - /// this calculation. - fn capacity(self: TextBlock, macho_file: MachO) u64 { - const self_sym = macho_file.locals.items[self.local_sym_index]; - if (self.next) |next| { - const next_sym = macho_file.locals.items[next.local_sym_index]; - return next_sym.n_value - self_sym.n_value; - } else { - // We are the last block. - // The capacity is limited only by virtual address space. - return std.math.maxInt(u64) - self_sym.n_value; - } - } - - fn freeListEligible(self: TextBlock, macho_file: MachO) bool { - // No need to keep a free list node for the last block. - const next = self.next orelse return false; - const self_sym = macho_file.locals.items[self.local_sym_index]; - const next_sym = macho_file.locals.items[next.local_sym_index]; - const cap = next_sym.n_value - self_sym.n_value; - const ideal_cap = padToIdeal(self.size); - if (cap <= ideal_cap) return false; - const surplus = cap - ideal_cap; - return surplus >= min_text_capacity; - } -}; +pub const min_text_capacity = padToIdeal(minimum_text_block_size); pub const Export = struct { sym_index: ?u32 = null, @@ -452,9 +416,9 @@ pub fn flushModule(self: *MachO, comp: *Compilation) !void { self.load_commands_dirty = true; } try self.writeRebaseInfoTable(); - try self.writeBindingInfoTable(); - try self.writeLazyBindingInfoTable(); - try self.writeExportTrie(); + try self.writeBindInfoTable(); + try self.writeLazyBindInfoTable(); + try self.writeExportInfo(); try self.writeAllGlobalAndUndefSymbols(); try self.writeIndirectSymbolTable(); try self.writeStringTable(); @@ -718,14 +682,6 @@ fn linkWithZld(self: *MachO, comp: *Compilation) !void { try fs.cwd().copyFile(the_object_path, fs.cwd(), full_out_path, .{}); } } else { - var zld = try Zld.init(self.base.allocator); - defer { - zld.closeFiles(); - zld.deinit(); - } - zld.target = target; - zld.stack_size = stack_size; - // Positional arguments to the linker such as object files and static archives. var positionals = std.ArrayList([]const u8).init(arena); @@ -868,23 +824,6 @@ fn linkWithZld(self: *MachO, comp: *Compilation) !void { rpaths.appendAssumeCapacity(key.*); } - const output: Zld.Output = output: { - if (is_dyn_lib) { - const install_name = try std.fmt.allocPrint(arena, "@rpath/{s}", .{ - self.base.options.emit.?.sub_path, - }); - break :output .{ - .tag = .dylib, - .path = full_out_path, - .install_name = install_name, - }; - } - break :output .{ - .tag = .exe, - .path = full_out_path, - }; - }; - if (self.base.options.verbose_link) { var argv = std.ArrayList([]const u8).init(arena); @@ -898,8 +837,11 @@ fn linkWithZld(self: *MachO, comp: *Compilation) !void { if (is_dyn_lib) { try argv.append("-dylib"); + const install_name = try std.fmt.allocPrint(arena, "@rpath/{s}", .{ + self.base.options.emit.?.sub_path, + }); try argv.append("-install_name"); - try argv.append(output.install_name.?); + try argv.append(install_name); } if (self.base.options.sysroot) |syslibroot| { @@ -915,7 +857,7 @@ fn linkWithZld(self: *MachO, comp: *Compilation) !void { try argv.appendSlice(positionals.items); try argv.append("-o"); - try argv.append(output.path); + try argv.append(full_out_path); if (native_libsystem_available) { try argv.append("-lSystem"); @@ -933,11 +875,99 @@ fn linkWithZld(self: *MachO, comp: *Compilation) !void { Compilation.dump_argv(argv.items); } - try zld.link(positionals.items, output, .{ - .syslibroot = self.base.options.sysroot, - .libs = libs.items, - .rpaths = rpaths.items, + self.base.file = try fs.cwd().createFile(full_out_path, .{ + .truncate = true, + .read = true, + .mode = if (std.Target.current.os.tag == .windows) 0 else 0o777, }); + self.page_size = switch (self.base.options.target.cpu.arch) { + .aarch64 => 0x4000, + .x86_64 => 0x1000, + else => unreachable, + }; + + try self.populateMetadata(); + try self.parseInputFiles(positionals.items, self.base.options.sysroot); + try self.parseLibs(libs.items, self.base.options.sysroot); + try self.resolveSymbols(); + try self.parseTextBlocks(); + + { + // Add dyld_stub_binder as the final GOT entry. + const resolv = self.symbol_resolver.get("dyld_stub_binder") orelse unreachable; + const got_index = @intCast(u32, self.got_entries.items.len); + const got_entry = GotIndirectionKey{ + .where = .import, + .where_index = resolv.where_index, + }; + try self.got_entries.append(self.base.allocator, got_entry); + try self.got_entries_map.putNoClobber(self.base.allocator, got_entry, got_index); + } + + try self.sortSections(); + try self.addRpaths(rpaths.items); + try self.addDataInCodeLC(); + try self.addCodeSignatureLC(); + try self.allocateTextSegment(); + try self.allocateDataConstSegment(); + try self.allocateDataSegment(); + self.allocateLinkeditSegment(); + try self.allocateTextBlocks(); + + // log.warn("locals", .{}); + // for (self.locals.items) |sym, id| { + // log.warn(" {d}: {s}, {}", .{ id, self.getString(sym.n_strx), sym }); + // } + + // log.warn("globals", .{}); + // for (self.globals.items) |sym, id| { + // log.warn(" {d}: {s}, {}", .{ id, self.getString(sym.n_strx), sym }); + // } + + // log.warn("tentatives", .{}); + // for (self.tentatives.items) |sym, id| { + // log.warn(" {d}: {s}, {}", .{ id, self.getString(sym.n_strx), sym }); + // } + + // log.warn("undefines", .{}); + // for (self.undefs.items) |sym, id| { + // log.warn(" {d}: {s}, {}", .{ id, self.getString(sym.n_strx), sym }); + // } + + // log.warn("imports", .{}); + // for (self.imports.items) |sym, id| { + // log.warn(" {d}: {s}, {}", .{ id, self.getString(sym.n_strx), sym }); + // } + + // log.warn("symbol resolver", .{}); + // for (self.symbol_resolver.keys()) |key| { + // log.warn(" {s} => {}", .{ key, self.symbol_resolver.get(key).? }); + // } + + // log.warn("mappings", .{}); + // for (self.objects.items) |object, id| { + // const object_id = @intCast(u16, id); + // log.warn(" in object {s}", .{object.name.?}); + // for (object.symtab.items) |sym, sym_id| { + // if (object.symbol_mapping.get(@intCast(u32, sym_id))) |local_id| { + // log.warn(" | {d} => {d}", .{ sym_id, local_id }); + // } else { + // log.warn(" | {d} no local mapping for {s}", .{ sym_id, object.getString(sym.n_strx) }); + // } + // } + // } + + // var it = self.blocks.iterator(); + // while (it.next()) |entry| { + // const seg = self.load_commands.items[entry.key_ptr.seg].Segment; + // const sect = seg.sections.items[entry.key_ptr.sect]; + + // log.warn("\n\n{s},{s} contents:", .{ segmentName(sect), sectionName(sect) }); + // log.warn(" {}", .{sect}); + // entry.value_ptr.*.print(self); + // } + + try self.flushZld(); } if (!self.base.options.disable_lld_caching) { @@ -956,6 +986,2020 @@ fn linkWithZld(self: *MachO, comp: *Compilation) !void { } } +fn parseInputFiles(self: *MachO, files: []const []const u8, syslibroot: ?[]const u8) !void { + const arch = self.base.options.target.cpu.arch; + for (files) |file_name| { + const full_path = full_path: { + var buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined; + const path = try std.fs.realpath(file_name, &buffer); + break :full_path try self.base.allocator.dupe(u8, path); + }; + + if (try Object.createAndParseFromPath(self.base.allocator, arch, full_path)) |object| { + try self.objects.append(self.base.allocator, object); + continue; + } + + if (try Archive.createAndParseFromPath(self.base.allocator, arch, full_path)) |archive| { + try self.archives.append(self.base.allocator, archive); + continue; + } + + if (try Dylib.createAndParseFromPath(self.base.allocator, arch, full_path, .{ + .syslibroot = syslibroot, + })) |dylibs| { + defer self.base.allocator.free(dylibs); + try self.dylibs.appendSlice(self.base.allocator, dylibs); + continue; + } + + log.warn("unknown filetype for positional input file: '{s}'", .{file_name}); + } +} + +fn parseLibs(self: *MachO, libs: []const []const u8, syslibroot: ?[]const u8) !void { + const arch = self.base.options.target.cpu.arch; + for (libs) |lib| { + if (try Dylib.createAndParseFromPath(self.base.allocator, arch, lib, .{ + .syslibroot = syslibroot, + })) |dylibs| { + defer self.base.allocator.free(dylibs); + try self.dylibs.appendSlice(self.base.allocator, dylibs); + continue; + } + + if (try Archive.createAndParseFromPath(self.base.allocator, arch, lib)) |archive| { + try self.archives.append(self.base.allocator, archive); + continue; + } + + log.warn("unknown filetype for a library: '{s}'", .{lib}); + } +} + +pub const MatchingSection = struct { + seg: u16, + sect: u16, +}; + +pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSection { + const text_seg = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; + const data_const_seg = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; + const data_seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; + const segname = commands.segmentName(sect); + const sectname = commands.sectionName(sect); + + const res: ?MatchingSection = blk: { + switch (commands.sectionType(sect)) { + macho.S_4BYTE_LITERALS, macho.S_8BYTE_LITERALS, macho.S_16BYTE_LITERALS => { + if (self.text_const_section_index == null) { + self.text_const_section_index = @intCast(u16, text_seg.sections.items.len); + try text_seg.addSection(self.base.allocator, "__const", .{}); + } + + break :blk .{ + .seg = self.text_segment_cmd_index.?, + .sect = self.text_const_section_index.?, + }; + }, + macho.S_CSTRING_LITERALS => { + if (mem.eql(u8, sectname, "__objc_methname")) { + // TODO it seems the common values within the sections in objects are deduplicated/merged + // on merging the sections' contents. + if (self.objc_methname_section_index == null) { + self.objc_methname_section_index = @intCast(u16, text_seg.sections.items.len); + try text_seg.addSection(self.base.allocator, "__objc_methname", .{ + .flags = macho.S_CSTRING_LITERALS, + }); + } + + break :blk .{ + .seg = self.text_segment_cmd_index.?, + .sect = self.objc_methname_section_index.?, + }; + } else if (mem.eql(u8, sectname, "__objc_methtype")) { + if (self.objc_methtype_section_index == null) { + self.objc_methtype_section_index = @intCast(u16, text_seg.sections.items.len); + try text_seg.addSection(self.base.allocator, "__objc_methtype", .{ + .flags = macho.S_CSTRING_LITERALS, + }); + } + + break :blk .{ + .seg = self.text_segment_cmd_index.?, + .sect = self.objc_methtype_section_index.?, + }; + } else if (mem.eql(u8, sectname, "__objc_classname")) { + if (self.objc_classname_section_index == null) { + self.objc_classname_section_index = @intCast(u16, text_seg.sections.items.len); + try text_seg.addSection(self.base.allocator, "__objc_classname", .{}); + } + + break :blk .{ + .seg = self.text_segment_cmd_index.?, + .sect = self.objc_classname_section_index.?, + }; + } + + if (self.cstring_section_index == null) { + self.cstring_section_index = @intCast(u16, text_seg.sections.items.len); + try text_seg.addSection(self.base.allocator, "__cstring", .{ + .flags = macho.S_CSTRING_LITERALS, + }); + } + + break :blk .{ + .seg = self.text_segment_cmd_index.?, + .sect = self.cstring_section_index.?, + }; + }, + macho.S_LITERAL_POINTERS => { + if (mem.eql(u8, segname, "__DATA") and mem.eql(u8, sectname, "__objc_selrefs")) { + if (self.objc_selrefs_section_index == null) { + self.objc_selrefs_section_index = @intCast(u16, data_seg.sections.items.len); + try data_seg.addSection(self.base.allocator, "__objc_selrefs", .{ + .flags = macho.S_LITERAL_POINTERS, + }); + } + + break :blk .{ + .seg = self.data_segment_cmd_index.?, + .sect = self.objc_selrefs_section_index.?, + }; + } + + // TODO investigate + break :blk null; + }, + macho.S_MOD_INIT_FUNC_POINTERS => { + if (self.mod_init_func_section_index == null) { + self.mod_init_func_section_index = @intCast(u16, data_const_seg.sections.items.len); + try data_const_seg.addSection(self.base.allocator, "__mod_init_func", .{ + .flags = macho.S_MOD_INIT_FUNC_POINTERS, + }); + } + + break :blk .{ + .seg = self.data_const_segment_cmd_index.?, + .sect = self.mod_init_func_section_index.?, + }; + }, + macho.S_MOD_TERM_FUNC_POINTERS => { + if (self.mod_term_func_section_index == null) { + self.mod_term_func_section_index = @intCast(u16, data_const_seg.sections.items.len); + try data_const_seg.addSection(self.base.allocator, "__mod_term_func", .{ + .flags = macho.S_MOD_TERM_FUNC_POINTERS, + }); + } + + break :blk .{ + .seg = self.data_const_segment_cmd_index.?, + .sect = self.mod_term_func_section_index.?, + }; + }, + macho.S_ZEROFILL => { + if (mem.eql(u8, sectname, "__common")) { + if (self.common_section_index == null) { + self.common_section_index = @intCast(u16, data_seg.sections.items.len); + try data_seg.addSection(self.base.allocator, "__common", .{ + .flags = macho.S_ZEROFILL, + }); + } + + break :blk .{ + .seg = self.data_segment_cmd_index.?, + .sect = self.common_section_index.?, + }; + } else { + if (self.bss_section_index == null) { + self.bss_section_index = @intCast(u16, data_seg.sections.items.len); + try data_seg.addSection(self.base.allocator, "__bss", .{ + .flags = macho.S_ZEROFILL, + }); + } + + break :blk .{ + .seg = self.data_segment_cmd_index.?, + .sect = self.bss_section_index.?, + }; + } + }, + macho.S_THREAD_LOCAL_VARIABLES => { + if (self.tlv_section_index == null) { + self.tlv_section_index = @intCast(u16, data_seg.sections.items.len); + try data_seg.addSection(self.base.allocator, "__thread_vars", .{ + .flags = macho.S_THREAD_LOCAL_VARIABLES, + }); + } + + break :blk .{ + .seg = self.data_segment_cmd_index.?, + .sect = self.tlv_section_index.?, + }; + }, + macho.S_THREAD_LOCAL_REGULAR => { + if (self.tlv_data_section_index == null) { + self.tlv_data_section_index = @intCast(u16, data_seg.sections.items.len); + try data_seg.addSection(self.base.allocator, "__thread_data", .{ + .flags = macho.S_THREAD_LOCAL_REGULAR, + }); + } + + break :blk .{ + .seg = self.data_segment_cmd_index.?, + .sect = self.tlv_data_section_index.?, + }; + }, + macho.S_THREAD_LOCAL_ZEROFILL => { + if (self.tlv_bss_section_index == null) { + self.tlv_bss_section_index = @intCast(u16, data_seg.sections.items.len); + try data_seg.addSection(self.base.allocator, "__thread_bss", .{ + .flags = macho.S_THREAD_LOCAL_ZEROFILL, + }); + } + + break :blk .{ + .seg = self.data_segment_cmd_index.?, + .sect = self.tlv_bss_section_index.?, + }; + }, + macho.S_COALESCED => { + if (mem.eql(u8, "__TEXT", segname) and mem.eql(u8, "__eh_frame", sectname)) { + // TODO I believe __eh_frame is currently part of __unwind_info section + // in the latest ld64 output. + if (self.eh_frame_section_index == null) { + self.eh_frame_section_index = @intCast(u16, text_seg.sections.items.len); + try text_seg.addSection(self.base.allocator, "__eh_frame", .{}); + } + + break :blk .{ + .seg = self.text_segment_cmd_index.?, + .sect = self.eh_frame_section_index.?, + }; + } + + // TODO audit this: is this the right mapping? + if (self.data_const_section_index == null) { + self.data_const_section_index = @intCast(u16, data_const_seg.sections.items.len); + try data_const_seg.addSection(self.base.allocator, "__const", .{}); + } + + break :blk .{ + .seg = self.data_const_segment_cmd_index.?, + .sect = self.data_const_section_index.?, + }; + }, + macho.S_REGULAR => { + if (commands.sectionIsCode(sect)) { + if (self.text_section_index == null) { + self.text_section_index = @intCast(u16, text_seg.sections.items.len); + try text_seg.addSection(self.base.allocator, "__text", .{ + .flags = macho.S_REGULAR | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS, + }); + } + + break :blk .{ + .seg = self.text_segment_cmd_index.?, + .sect = self.text_section_index.?, + }; + } + if (commands.sectionIsDebug(sect)) { + // TODO debug attributes + if (mem.eql(u8, "__LD", segname) and mem.eql(u8, "__compact_unwind", sectname)) { + log.debug("TODO compact unwind section: type 0x{x}, name '{s},{s}'", .{ + sect.flags, segname, sectname, + }); + } + break :blk null; + } + + if (mem.eql(u8, segname, "__TEXT")) { + if (mem.eql(u8, sectname, "__ustring")) { + if (self.ustring_section_index == null) { + self.ustring_section_index = @intCast(u16, text_seg.sections.items.len); + try text_seg.addSection(self.base.allocator, "__ustring", .{}); + } + + break :blk .{ + .seg = self.text_segment_cmd_index.?, + .sect = self.ustring_section_index.?, + }; + } else if (mem.eql(u8, sectname, "__gcc_except_tab")) { + if (self.gcc_except_tab_section_index == null) { + self.gcc_except_tab_section_index = @intCast(u16, text_seg.sections.items.len); + try text_seg.addSection(self.base.allocator, "__gcc_except_tab", .{}); + } + + break :blk .{ + .seg = self.text_segment_cmd_index.?, + .sect = self.gcc_except_tab_section_index.?, + }; + } else if (mem.eql(u8, sectname, "__objc_methlist")) { + if (self.objc_methlist_section_index == null) { + self.objc_methlist_section_index = @intCast(u16, text_seg.sections.items.len); + try text_seg.addSection(self.base.allocator, "__objc_methlist", .{}); + } + + break :blk .{ + .seg = self.text_segment_cmd_index.?, + .sect = self.objc_methlist_section_index.?, + }; + } else if (mem.eql(u8, sectname, "__rodata") or + mem.eql(u8, sectname, "__typelink") or + mem.eql(u8, sectname, "__itablink") or + mem.eql(u8, sectname, "__gosymtab") or + mem.eql(u8, sectname, "__gopclntab")) + { + if (self.data_const_section_index == null) { + self.data_const_section_index = @intCast(u16, data_const_seg.sections.items.len); + try data_const_seg.addSection(self.base.allocator, "__const", .{}); + } + + break :blk .{ + .seg = self.data_const_segment_cmd_index.?, + .sect = self.data_const_section_index.?, + }; + } else { + if (self.text_const_section_index == null) { + self.text_const_section_index = @intCast(u16, text_seg.sections.items.len); + try text_seg.addSection(self.base.allocator, "__const", .{}); + } + + break :blk .{ + .seg = self.text_segment_cmd_index.?, + .sect = self.text_const_section_index.?, + }; + } + } + + if (mem.eql(u8, segname, "__DATA_CONST")) { + if (self.data_const_section_index == null) { + self.data_const_section_index = @intCast(u16, data_const_seg.sections.items.len); + try data_const_seg.addSection(self.base.allocator, "__const", .{}); + } + + break :blk .{ + .seg = self.data_const_segment_cmd_index.?, + .sect = self.data_const_section_index.?, + }; + } + + if (mem.eql(u8, segname, "__DATA")) { + if (mem.eql(u8, sectname, "__const")) { + if (self.data_const_section_index == null) { + self.data_const_section_index = @intCast(u16, data_const_seg.sections.items.len); + try data_const_seg.addSection(self.base.allocator, "__const", .{}); + } + + break :blk .{ + .seg = self.data_const_segment_cmd_index.?, + .sect = self.data_const_section_index.?, + }; + } else if (mem.eql(u8, sectname, "__cfstring")) { + if (self.objc_cfstring_section_index == null) { + self.objc_cfstring_section_index = @intCast(u16, data_const_seg.sections.items.len); + try data_const_seg.addSection(self.base.allocator, "__cfstring", .{}); + } + + break :blk .{ + .seg = self.data_const_segment_cmd_index.?, + .sect = self.objc_cfstring_section_index.?, + }; + } else if (mem.eql(u8, sectname, "__objc_classlist")) { + if (self.objc_classlist_section_index == null) { + self.objc_classlist_section_index = @intCast(u16, data_const_seg.sections.items.len); + try data_const_seg.addSection(self.base.allocator, "__objc_classlist", .{}); + } + + break :blk .{ + .seg = self.data_const_segment_cmd_index.?, + .sect = self.objc_classlist_section_index.?, + }; + } else if (mem.eql(u8, sectname, "__objc_imageinfo")) { + if (self.objc_imageinfo_section_index == null) { + self.objc_imageinfo_section_index = @intCast(u16, data_const_seg.sections.items.len); + try data_const_seg.addSection(self.base.allocator, "__objc_imageinfo", .{}); + } + + break :blk .{ + .seg = self.data_const_segment_cmd_index.?, + .sect = self.objc_imageinfo_section_index.?, + }; + } else if (mem.eql(u8, sectname, "__objc_const")) { + if (self.objc_const_section_index == null) { + self.objc_const_section_index = @intCast(u16, data_seg.sections.items.len); + try data_seg.addSection(self.base.allocator, "__objc_const", .{}); + } + + break :blk .{ + .seg = self.data_segment_cmd_index.?, + .sect = self.objc_const_section_index.?, + }; + } else if (mem.eql(u8, sectname, "__objc_classrefs")) { + if (self.objc_classrefs_section_index == null) { + self.objc_classrefs_section_index = @intCast(u16, data_seg.sections.items.len); + try data_seg.addSection(self.base.allocator, "__objc_classrefs", .{}); + } + + break :blk .{ + .seg = self.data_segment_cmd_index.?, + .sect = self.objc_classrefs_section_index.?, + }; + } else if (mem.eql(u8, sectname, "__objc_data")) { + if (self.objc_data_section_index == null) { + self.objc_data_section_index = @intCast(u16, data_seg.sections.items.len); + try data_seg.addSection(self.base.allocator, "__objc_data", .{}); + } + + break :blk .{ + .seg = self.data_segment_cmd_index.?, + .sect = self.objc_data_section_index.?, + }; + } else { + if (self.data_section_index == null) { + self.data_section_index = @intCast(u16, data_seg.sections.items.len); + try data_seg.addSection(self.base.allocator, "__data", .{}); + } + + break :blk .{ + .seg = self.data_segment_cmd_index.?, + .sect = self.data_section_index.?, + }; + } + } + + if (mem.eql(u8, "__LLVM", segname) and mem.eql(u8, "__asm", sectname)) { + log.debug("TODO LLVM asm section: type 0x{x}, name '{s},{s}'", .{ + sect.flags, segname, sectname, + }); + } + + break :blk null; + }, + else => break :blk null, + } + }; + + return res; +} + +fn sortSections(self: *MachO) !void { + var text_index_mapping = std.AutoHashMap(u16, u16).init(self.base.allocator); + defer text_index_mapping.deinit(); + var data_const_index_mapping = std.AutoHashMap(u16, u16).init(self.base.allocator); + defer data_const_index_mapping.deinit(); + var data_index_mapping = std.AutoHashMap(u16, u16).init(self.base.allocator); + defer data_index_mapping.deinit(); + + { + // __TEXT segment + const seg = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; + var sections = seg.sections.toOwnedSlice(self.base.allocator); + defer self.base.allocator.free(sections); + try seg.sections.ensureCapacity(self.base.allocator, sections.len); + + const indices = &[_]*?u16{ + &self.text_section_index, + &self.stubs_section_index, + &self.stub_helper_section_index, + &self.gcc_except_tab_section_index, + &self.cstring_section_index, + &self.ustring_section_index, + &self.text_const_section_index, + &self.objc_methname_section_index, + &self.objc_methtype_section_index, + &self.objc_classname_section_index, + &self.eh_frame_section_index, + }; + for (indices) |maybe_index| { + const new_index: u16 = if (maybe_index.*) |index| blk: { + const idx = @intCast(u16, seg.sections.items.len); + seg.sections.appendAssumeCapacity(sections[index]); + try text_index_mapping.putNoClobber(index, idx); + break :blk idx; + } else continue; + maybe_index.* = new_index; + } + } + + { + // __DATA_CONST segment + const seg = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; + var sections = seg.sections.toOwnedSlice(self.base.allocator); + defer self.base.allocator.free(sections); + try seg.sections.ensureCapacity(self.base.allocator, sections.len); + + const indices = &[_]*?u16{ + &self.got_section_index, + &self.mod_init_func_section_index, + &self.mod_term_func_section_index, + &self.data_const_section_index, + &self.objc_cfstring_section_index, + &self.objc_classlist_section_index, + &self.objc_imageinfo_section_index, + }; + for (indices) |maybe_index| { + const new_index: u16 = if (maybe_index.*) |index| blk: { + const idx = @intCast(u16, seg.sections.items.len); + seg.sections.appendAssumeCapacity(sections[index]); + try data_const_index_mapping.putNoClobber(index, idx); + break :blk idx; + } else continue; + maybe_index.* = new_index; + } + } + + { + // __DATA segment + const seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; + var sections = seg.sections.toOwnedSlice(self.base.allocator); + defer self.base.allocator.free(sections); + try seg.sections.ensureCapacity(self.base.allocator, sections.len); + + // __DATA segment + const indices = &[_]*?u16{ + &self.la_symbol_ptr_section_index, + &self.objc_const_section_index, + &self.objc_selrefs_section_index, + &self.objc_classrefs_section_index, + &self.objc_data_section_index, + &self.data_section_index, + &self.tlv_section_index, + &self.tlv_data_section_index, + &self.tlv_bss_section_index, + &self.bss_section_index, + &self.common_section_index, + }; + for (indices) |maybe_index| { + const new_index: u16 = if (maybe_index.*) |index| blk: { + const idx = @intCast(u16, seg.sections.items.len); + seg.sections.appendAssumeCapacity(sections[index]); + try data_index_mapping.putNoClobber(index, idx); + break :blk idx; + } else continue; + maybe_index.* = new_index; + } + } + + { + var transient: std.AutoHashMapUnmanaged(MatchingSection, *TextBlock) = .{}; + try transient.ensureCapacity(self.base.allocator, self.blocks.count()); + + var it = self.blocks.iterator(); + while (it.next()) |entry| { + const old = entry.key_ptr.*; + const sect = if (old.seg == self.text_segment_cmd_index.?) + text_index_mapping.get(old.sect).? + else if (old.seg == self.data_const_segment_cmd_index.?) + data_const_index_mapping.get(old.sect).? + else + data_index_mapping.get(old.sect).?; + transient.putAssumeCapacityNoClobber(.{ + .seg = old.seg, + .sect = sect, + }, entry.value_ptr.*); + } + + self.blocks.clearAndFree(self.base.allocator); + self.blocks.deinit(self.base.allocator); + self.blocks = transient; + } +} + +fn allocateTextSegment(self: *MachO) !void { + const seg = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; + const nstubs = @intCast(u32, self.stubs.items.len); + + const base_vmaddr = self.load_commands.items[self.pagezero_segment_cmd_index.?].Segment.inner.vmsize; + seg.inner.fileoff = 0; + seg.inner.vmaddr = base_vmaddr; + + // Set stubs and stub_helper sizes + const stubs = &seg.sections.items[self.stubs_section_index.?]; + const stub_helper = &seg.sections.items[self.stub_helper_section_index.?]; + stubs.size += nstubs * stubs.reserved2; + + const stub_size: u4 = switch (self.base.options.target.cpu.arch) { + .x86_64 => 10, + .aarch64 => 3 * @sizeOf(u32), + else => unreachable, + }; + stub_helper.size += nstubs * stub_size; + + var sizeofcmds: u64 = 0; + for (self.load_commands.items) |lc| { + sizeofcmds += lc.cmdsize(); + } + + try self.allocateSegment(self.text_segment_cmd_index.?, @sizeOf(macho.mach_header_64) + sizeofcmds); + + // Shift all sections to the back to minimize jump size between __TEXT and __DATA segments. + var min_alignment: u32 = 0; + for (seg.sections.items) |sect| { + const alignment = try math.powi(u32, 2, sect.@"align"); + min_alignment = math.max(min_alignment, alignment); + } + + assert(min_alignment > 0); + const last_sect_idx = seg.sections.items.len - 1; + const last_sect = seg.sections.items[last_sect_idx]; + const shift: u32 = blk: { + const diff = seg.inner.filesize - last_sect.offset - last_sect.size; + const factor = @divTrunc(diff, min_alignment); + break :blk @intCast(u32, factor * min_alignment); + }; + + if (shift > 0) { + for (seg.sections.items) |*sect| { + sect.offset += shift; + sect.addr += shift; + } + } +} + +fn allocateDataConstSegment(self: *MachO) !void { + const seg = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; + const nentries = @intCast(u32, self.got_entries.items.len); + + const text_seg = self.load_commands.items[self.text_segment_cmd_index.?].Segment; + seg.inner.fileoff = text_seg.inner.fileoff + text_seg.inner.filesize; + seg.inner.vmaddr = text_seg.inner.vmaddr + text_seg.inner.vmsize; + + // Set got size + const got = &seg.sections.items[self.got_section_index.?]; + got.size += nentries * @sizeOf(u64); + + try self.allocateSegment(self.data_const_segment_cmd_index.?, 0); +} + +fn allocateDataSegment(self: *MachO) !void { + const seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; + const nstubs = @intCast(u32, self.stubs.items.len); + + const data_const_seg = self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; + seg.inner.fileoff = data_const_seg.inner.fileoff + data_const_seg.inner.filesize; + seg.inner.vmaddr = data_const_seg.inner.vmaddr + data_const_seg.inner.vmsize; + + // Set la_symbol_ptr and data size + const la_symbol_ptr = &seg.sections.items[self.la_symbol_ptr_section_index.?]; + const data = &seg.sections.items[self.data_section_index.?]; + la_symbol_ptr.size += nstubs * @sizeOf(u64); + data.size += @sizeOf(u64); // We need at least 8bytes for address of dyld_stub_binder + + try self.allocateSegment(self.data_segment_cmd_index.?, 0); +} + +fn allocateLinkeditSegment(self: *MachO) void { + const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; + const data_seg = self.load_commands.items[self.data_segment_cmd_index.?].Segment; + seg.inner.fileoff = data_seg.inner.fileoff + data_seg.inner.filesize; + seg.inner.vmaddr = data_seg.inner.vmaddr + data_seg.inner.vmsize; +} + +fn allocateSegment(self: *MachO, index: u16, offset: u64) !void { + const seg = &self.load_commands.items[index].Segment; + + // Allocate the sections according to their alignment at the beginning of the segment. + var start: u64 = offset; + for (seg.sections.items) |*sect| { + const alignment = try math.powi(u32, 2, sect.@"align"); + const start_aligned = mem.alignForwardGeneric(u64, start, alignment); + const end_aligned = mem.alignForwardGeneric(u64, start_aligned + sect.size, alignment); + sect.offset = @intCast(u32, seg.inner.fileoff + start_aligned); + sect.addr = seg.inner.vmaddr + start_aligned; + start = end_aligned; + } + + const seg_size_aligned = mem.alignForwardGeneric(u64, start, self.page_size); + seg.inner.filesize = seg_size_aligned; + seg.inner.vmsize = seg_size_aligned; +} + +fn allocateTextBlocks(self: *MachO) !void { + var it = self.blocks.iterator(); + while (it.next()) |entry| { + const match = entry.key_ptr.*; + var block: *TextBlock = entry.value_ptr.*; + + // Find the first block + while (block.prev) |prev| { + block = prev; + } + + const seg = self.load_commands.items[match.seg].Segment; + const sect = seg.sections.items[match.sect]; + + var base_addr: u64 = sect.addr; + const n_sect = self.sectionId(match); + + log.debug(" within section {s},{s}", .{ commands.segmentName(sect), commands.sectionName(sect) }); + log.debug(" {}", .{sect}); + + while (true) { + const block_alignment = try math.powi(u32, 2, block.alignment); + base_addr = mem.alignForwardGeneric(u64, base_addr, block_alignment); + + const sym = &self.locals.items[block.local_sym_index]; + sym.n_value = base_addr; + sym.n_sect = n_sect; + + log.debug(" {s}: start=0x{x}, end=0x{x}, size={}, align={}", .{ + self.getString(sym.n_strx), + base_addr, + base_addr + block.size, + block.size, + block.alignment, + }); + + // Update each alias (if any) + for (block.aliases.items) |index| { + const alias_sym = &self.locals.items[index]; + alias_sym.n_value = base_addr; + alias_sym.n_sect = n_sect; + } + + // Update each symbol contained within the TextBlock + for (block.contained.items) |sym_at_off| { + const contained_sym = &self.locals.items[sym_at_off.local_sym_index]; + contained_sym.n_value = base_addr + sym_at_off.offset; + contained_sym.n_sect = n_sect; + } + + base_addr += block.size; + + if (block.next) |next| { + block = next; + } else break; + } + } + + // Update globals + for (self.symbol_resolver.values()) |resolv| { + if (resolv.where != .global) continue; + + assert(resolv.local_sym_index != 0); + const local_sym = self.locals.items[resolv.local_sym_index]; + const sym = &self.globals.items[resolv.where_index]; + sym.n_value = local_sym.n_value; + sym.n_sect = local_sym.n_sect; + } +} + +fn writeTextBlocks(self: *MachO) !void { + var it = self.blocks.iterator(); + while (it.next()) |entry| { + const match = entry.key_ptr.*; + var block: *TextBlock = entry.value_ptr.*; + + while (block.prev) |prev| { + block = prev; + } + + const seg = self.load_commands.items[match.seg].Segment; + const sect = seg.sections.items[match.sect]; + const sect_type = commands.sectionType(sect); + + log.debug(" for section {s},{s}", .{ commands.segmentName(sect), commands.sectionName(sect) }); + log.debug(" {}", .{sect}); + + var code = try self.base.allocator.alloc(u8, sect.size); + defer self.base.allocator.free(code); + + if (sect_type == macho.S_ZEROFILL or sect_type == macho.S_THREAD_LOCAL_ZEROFILL) { + mem.set(u8, code, 0); + } else { + var base_off: u64 = 0; + + while (true) { + const block_alignment = try math.powi(u32, 2, block.alignment); + const aligned_base_off = mem.alignForwardGeneric(u64, base_off, block_alignment); + + const sym = self.locals.items[block.local_sym_index]; + log.debug(" {s}: start=0x{x}, end=0x{x}, size={}, align={}", .{ + self.getString(sym.n_strx), + aligned_base_off, + aligned_base_off + block.size, + block.size, + block.alignment, + }); + + try block.resolveRelocs(self); + mem.copy(u8, code[aligned_base_off..][0..block.size], block.code); + + // TODO NOP for machine code instead of just zeroing out + const padding_len = aligned_base_off - base_off; + mem.set(u8, code[base_off..][0..padding_len], 0); + + base_off = aligned_base_off + block.size; + + if (block.next) |next| { + block = next; + } else break; + } + + mem.set(u8, code[base_off..], 0); + } + + try self.base.file.?.pwriteAll(code, sect.offset); + } +} + +fn writeStubHelperCommon(self: *MachO) !void { + const text_segment = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; + const stub_helper = &text_segment.sections.items[self.stub_helper_section_index.?]; + const data_const_segment = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; + const got = &data_const_segment.sections.items[self.got_section_index.?]; + const data_segment = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; + const data = &data_segment.sections.items[self.data_section_index.?]; + + self.stub_helper_stubs_start_off = blk: { + switch (self.base.options.target.cpu.arch) { + .x86_64 => { + const code_size = 15; + var code: [code_size]u8 = undefined; + // lea %r11, [rip + disp] + code[0] = 0x4c; + code[1] = 0x8d; + code[2] = 0x1d; + { + const target_addr = data.addr + data.size - @sizeOf(u64); + const displacement = try math.cast(u32, target_addr - stub_helper.addr - 7); + mem.writeIntLittle(u32, code[3..7], displacement); + } + // push %r11 + code[7] = 0x41; + code[8] = 0x53; + // jmp [rip + disp] + code[9] = 0xff; + code[10] = 0x25; + { + const resolv = self.symbol_resolver.get("dyld_stub_binder") orelse unreachable; + const got_index = self.got_entries_map.get(.{ + .where = .import, + .where_index = resolv.where_index, + }) orelse unreachable; + const addr = got.addr + got_index * @sizeOf(u64); + const displacement = try math.cast(u32, addr - stub_helper.addr - code_size); + mem.writeIntLittle(u32, code[11..], displacement); + } + try self.base.file.?.pwriteAll(&code, stub_helper.offset); + break :blk stub_helper.offset + code_size; + }, + .aarch64 => { + var code: [6 * @sizeOf(u32)]u8 = undefined; + data_blk_outer: { + const this_addr = stub_helper.addr; + const target_addr = data.addr + data.size - @sizeOf(u64); + data_blk: { + const displacement = math.cast(i21, target_addr - this_addr) catch break :data_blk; + // adr x17, disp + mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.adr(.x17, displacement).toU32()); + // nop + mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.nop().toU32()); + break :data_blk_outer; + } + data_blk: { + const new_this_addr = this_addr + @sizeOf(u32); + const displacement = math.cast(i21, target_addr - new_this_addr) catch break :data_blk; + // nop + mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.nop().toU32()); + // adr x17, disp + mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.adr(.x17, displacement).toU32()); + break :data_blk_outer; + } + // Jump is too big, replace adr with adrp and add. + const this_page = @intCast(i32, this_addr >> 12); + const target_page = @intCast(i32, target_addr >> 12); + const pages = @intCast(i21, target_page - this_page); + mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.adrp(.x17, pages).toU32()); + const narrowed = @truncate(u12, target_addr); + mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.add(.x17, .x17, narrowed, false).toU32()); + } + // stp x16, x17, [sp, #-16]! + code[8] = 0xf0; + code[9] = 0x47; + code[10] = 0xbf; + code[11] = 0xa9; + binder_blk_outer: { + const resolv = self.symbol_resolver.get("dyld_stub_binder") orelse unreachable; + const got_index = self.got_entries_map.get(.{ + .where = .import, + .where_index = resolv.where_index, + }) orelse unreachable; + const this_addr = stub_helper.addr + 3 * @sizeOf(u32); + const target_addr = got.addr + got_index * @sizeOf(u64); + binder_blk: { + const displacement = math.divExact(u64, target_addr - this_addr, 4) catch break :binder_blk; + const literal = math.cast(u18, displacement) catch break :binder_blk; + // ldr x16, label + mem.writeIntLittle(u32, code[12..16], aarch64.Instruction.ldr(.x16, .{ + .literal = literal, + }).toU32()); + // nop + mem.writeIntLittle(u32, code[16..20], aarch64.Instruction.nop().toU32()); + break :binder_blk_outer; + } + binder_blk: { + const new_this_addr = this_addr + @sizeOf(u32); + const displacement = math.divExact(u64, target_addr - new_this_addr, 4) catch break :binder_blk; + const literal = math.cast(u18, displacement) catch break :binder_blk; + // Pad with nop to please division. + // nop + mem.writeIntLittle(u32, code[12..16], aarch64.Instruction.nop().toU32()); + // ldr x16, label + mem.writeIntLittle(u32, code[16..20], aarch64.Instruction.ldr(.x16, .{ + .literal = literal, + }).toU32()); + break :binder_blk_outer; + } + // Use adrp followed by ldr(immediate). + const this_page = @intCast(i32, this_addr >> 12); + const target_page = @intCast(i32, target_addr >> 12); + const pages = @intCast(i21, target_page - this_page); + mem.writeIntLittle(u32, code[12..16], aarch64.Instruction.adrp(.x16, pages).toU32()); + const narrowed = @truncate(u12, target_addr); + const offset = try math.divExact(u12, narrowed, 8); + mem.writeIntLittle(u32, code[16..20], aarch64.Instruction.ldr(.x16, .{ + .register = .{ + .rn = .x16, + .offset = aarch64.Instruction.LoadStoreOffset.imm(offset), + }, + }).toU32()); + } + // br x16 + code[20] = 0x00; + code[21] = 0x02; + code[22] = 0x1f; + code[23] = 0xd6; + try self.base.file.?.pwriteAll(&code, stub_helper.offset); + break :blk stub_helper.offset + 6 * @sizeOf(u32); + }, + else => unreachable, + } + }; + + for (self.stubs.items) |_, i| { + const index = @intCast(u32, i); + // TODO weak bound pointers + try self.writeLazySymbolPointer(index); + try self.writeStub(index); + try self.writeStubInStubHelper(index); + } +} + +fn resolveSymbolsInObject(self: *MachO, object_id: u16) !void { + const object = self.objects.items[object_id]; + + log.debug("resolving symbols in '{s}'", .{object.name}); + + for (object.symtab.items) |sym, id| { + const sym_id = @intCast(u32, id); + const sym_name = object.getString(sym.n_strx); + + if (symbolIsStab(sym)) { + log.err("unhandled symbol type: stab", .{}); + log.err(" symbol '{s}'", .{sym_name}); + log.err(" first definition in '{s}'", .{object.name.?}); + return error.UnhandledSymbolType; + } + + if (symbolIsIndr(sym)) { + log.err("unhandled symbol type: indirect", .{}); + log.err(" symbol '{s}'", .{sym_name}); + log.err(" first definition in '{s}'", .{object.name.?}); + return error.UnhandledSymbolType; + } + + if (symbolIsAbs(sym)) { + log.err("unhandled symbol type: absolute", .{}); + log.err(" symbol '{s}'", .{sym_name}); + log.err(" first definition in '{s}'", .{object.name.?}); + return error.UnhandledSymbolType; + } + + if (symbolIsSect(sym)) { + // Defined symbol regardless of scope lands in the locals symbol table. + const n_strx = blk: { + if (self.symbol_resolver.get(sym_name)) |resolv| { + switch (resolv.where) { + .global => break :blk self.globals.items[resolv.where_index].n_strx, + .tentative => break :blk self.tentatives.items[resolv.where_index].n_strx, + .undef => break :blk self.undefs.items[resolv.where_index].n_strx, + .import => unreachable, + } + } + break :blk try self.makeString(sym_name); + }; + const local_sym_index = @intCast(u32, self.locals.items.len); + try self.locals.append(self.base.allocator, .{ + .n_strx = n_strx, + .n_type = macho.N_SECT, + .n_sect = 0, + .n_desc = 0, + .n_value = sym.n_value, + }); + try object.symbol_mapping.putNoClobber(self.base.allocator, sym_id, local_sym_index); + + // If the symbol's scope is not local aka translation unit, then we need work out + // if we should save the symbol as a global, or potentially flag the error. + if (!symbolIsExt(sym)) continue; + + const local = self.locals.items[local_sym_index]; + const resolv = self.symbol_resolver.getPtr(sym_name) orelse { + const global_sym_index = @intCast(u32, self.globals.items.len); + try self.globals.append(self.base.allocator, .{ + .n_strx = n_strx, + .n_type = sym.n_type, + .n_sect = 0, + .n_desc = sym.n_desc, + .n_value = sym.n_value, + }); + try self.symbol_resolver.putNoClobber(self.base.allocator, try self.base.allocator.dupe(u8, sym_name), .{ + .where = .global, + .where_index = global_sym_index, + .local_sym_index = local_sym_index, + .file = object_id, + }); + continue; + }; + + switch (resolv.where) { + .import => unreachable, + .global => { + const global = &self.globals.items[resolv.where_index]; + + if (!(symbolIsWeakDef(sym) or symbolIsPext(sym)) and + !(symbolIsWeakDef(global.*) or symbolIsPext(global.*))) + { + log.err("symbol '{s}' defined multiple times", .{sym_name}); + log.err(" first definition in '{s}'", .{self.objects.items[resolv.file].name.?}); + log.err(" next definition in '{s}'", .{object.name.?}); + return error.MultipleSymbolDefinitions; + } + + if (symbolIsWeakDef(sym) or symbolIsPext(sym)) continue; // Current symbol is weak, so skip it. + + // Otherwise, update the resolver and the global symbol. + global.n_type = sym.n_type; + resolv.local_sym_index = local_sym_index; + resolv.file = object_id; + + continue; + }, + .undef => { + const undef = &self.undefs.items[resolv.where_index]; + undef.* = .{ + .n_strx = 0, + .n_type = macho.N_UNDF, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }; + }, + .tentative => { + const tentative = &self.tentatives.items[resolv.where_index]; + tentative.* = .{ + .n_strx = 0, + .n_type = macho.N_UNDF, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }; + }, + } + + const global_sym_index = @intCast(u32, self.globals.items.len); + try self.globals.append(self.base.allocator, .{ + .n_strx = local.n_strx, + .n_type = sym.n_type, + .n_sect = 0, + .n_desc = sym.n_desc, + .n_value = sym.n_value, + }); + resolv.* = .{ + .where = .global, + .where_index = global_sym_index, + .local_sym_index = local_sym_index, + .file = object_id, + }; + } else if (symbolIsTentative(sym)) { + // Symbol is a tentative definition. + const resolv = self.symbol_resolver.getPtr(sym_name) orelse { + const tent_sym_index = @intCast(u32, self.tentatives.items.len); + try self.tentatives.append(self.base.allocator, .{ + .n_strx = try self.makeString(sym_name), + .n_type = sym.n_type, + .n_sect = 0, + .n_desc = sym.n_desc, + .n_value = sym.n_value, + }); + try self.symbol_resolver.putNoClobber(self.base.allocator, try self.base.allocator.dupe(u8, sym_name), .{ + .where = .tentative, + .where_index = tent_sym_index, + .file = object_id, + }); + continue; + }; + + switch (resolv.where) { + .import => unreachable, + .global => {}, + .undef => { + const undef = &self.undefs.items[resolv.where_index]; + const tent_sym_index = @intCast(u32, self.tentatives.items.len); + try self.tentatives.append(self.base.allocator, .{ + .n_strx = undef.n_strx, + .n_type = sym.n_type, + .n_sect = 0, + .n_desc = sym.n_desc, + .n_value = sym.n_value, + }); + resolv.* = .{ + .where = .tentative, + .where_index = tent_sym_index, + .file = object_id, + }; + undef.* = .{ + .n_strx = 0, + .n_type = macho.N_UNDF, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }; + }, + .tentative => { + const tentative = &self.tentatives.items[resolv.where_index]; + if (tentative.n_value >= sym.n_value) continue; + + tentative.n_desc = sym.n_desc; + tentative.n_value = sym.n_value; + resolv.file = object_id; + }, + } + } else { + // Symbol is undefined. + if (self.symbol_resolver.contains(sym_name)) continue; + + const undef_sym_index = @intCast(u32, self.undefs.items.len); + try self.undefs.append(self.base.allocator, .{ + .n_strx = try self.makeString(sym_name), + .n_type = macho.N_UNDF, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }); + try self.symbol_resolver.putNoClobber(self.base.allocator, try self.base.allocator.dupe(u8, sym_name), .{ + .where = .undef, + .where_index = undef_sym_index, + .file = object_id, + }); + } + } +} + +fn resolveSymbols(self: *MachO) !void { + // TODO mimicking insertion of null symbol from incremental linker. + // This will need to moved. + try self.locals.append(self.base.allocator, .{ + .n_strx = 0, + .n_type = macho.N_UNDF, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }); + try self.strtab.append(self.base.allocator, 0); + + // First pass, resolve symbols in provided objects. + for (self.objects.items) |_, object_id| { + try self.resolveSymbolsInObject(@intCast(u16, object_id)); + } + + // Second pass, resolve symbols in static libraries. + var next_sym: usize = 0; + loop: while (true) : (next_sym += 1) { + if (next_sym == self.undefs.items.len) break; + + const sym = self.undefs.items[next_sym]; + if (symbolIsNull(sym)) continue; + + const sym_name = self.getString(sym.n_strx); + + for (self.archives.items) |archive| { + // Check if the entry exists in a static archive. + const offsets = archive.toc.get(sym_name) orelse { + // No hit. + continue; + }; + assert(offsets.items.len > 0); + + const object = try archive.parseObject(offsets.items[0]); + const object_id = @intCast(u16, self.objects.items.len); + try self.objects.append(self.base.allocator, object); + try self.resolveSymbolsInObject(object_id); + + continue :loop; + } + } + + // Convert any tentative definition into a regular symbol and allocate + // text blocks for each tentative defintion. + for (self.tentatives.items) |sym| { + if (symbolIsNull(sym)) continue; + + const sym_name = self.getString(sym.n_strx); + const match: MatchingSection = blk: { + if (self.common_section_index == null) { + const data_seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; + self.common_section_index = @intCast(u16, data_seg.sections.items.len); + try data_seg.addSection(self.base.allocator, "__common", .{ + .flags = macho.S_ZEROFILL, + }); + } + break :blk .{ + .seg = self.data_segment_cmd_index.?, + .sect = self.common_section_index.?, + }; + }; + + const size = sym.n_value; + const code = try self.base.allocator.alloc(u8, size); + mem.set(u8, code, 0); + const alignment = (sym.n_desc >> 8) & 0x0f; + + const resolv = self.symbol_resolver.getPtr(sym_name) orelse unreachable; + const local_sym_index = @intCast(u32, self.locals.items.len); + var nlist = macho.nlist_64{ + .n_strx = sym.n_strx, + .n_type = macho.N_SECT, + .n_sect = self.sectionId(match), + .n_desc = 0, + .n_value = 0, + }; + try self.locals.append(self.base.allocator, nlist); + const global_sym_index = @intCast(u32, self.globals.items.len); + nlist.n_type |= macho.N_EXT; + try self.globals.append(self.base.allocator, nlist); + resolv.* = .{ + .where = .global, + .where_index = global_sym_index, + .local_sym_index = local_sym_index, + }; + + const block = try self.base.allocator.create(TextBlock); + errdefer self.base.allocator.destroy(block); + + block.* = TextBlock.empty; + block.local_sym_index = local_sym_index; + block.code = code; + block.size = size; + block.alignment = alignment; + + // Update target section's metadata + // TODO should we update segment's size here too? + // How does it tie with incremental space allocs? + const tseg = &self.load_commands.items[match.seg].Segment; + const tsect = &tseg.sections.items[match.sect]; + const new_alignment = math.max(tsect.@"align", block.alignment); + const new_alignment_pow_2 = try math.powi(u32, 2, new_alignment); + const new_size = mem.alignForwardGeneric(u64, tsect.size, new_alignment_pow_2) + block.size; + tsect.size = new_size; + tsect.@"align" = new_alignment; + + if (self.blocks.getPtr(match)) |last| { + last.*.next = block; + block.prev = last.*; + last.* = block; + } else { + try self.blocks.putNoClobber(self.base.allocator, match, block); + } + } + + // Third pass, resolve symbols in dynamic libraries. + { + // Put dyld_stub_binder as an undefined special symbol. + const undef_sym_index = @intCast(u32, self.undefs.items.len); + try self.undefs.append(self.base.allocator, .{ + .n_strx = try self.makeString("dyld_stub_binder"), + .n_type = macho.N_UNDF, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }); + try self.symbol_resolver.putNoClobber(self.base.allocator, try self.base.allocator.dupe(u8, "dyld_stub_binder"), .{ + .where = .undef, + .where_index = undef_sym_index, + }); + } + + var referenced = std.AutoHashMap(*Dylib, void).init(self.base.allocator); + defer referenced.deinit(); + + loop: for (self.undefs.items) |sym| { + if (symbolIsNull(sym)) continue; + + const sym_name = self.getString(sym.n_strx); + for (self.dylibs.items) |dylib| { + if (!dylib.symbols.contains(sym_name)) continue; + + if (!referenced.contains(dylib)) { + // Add LC_LOAD_DYLIB load command for each referenced dylib/stub. + dylib.ordinal = self.next_dylib_ordinal; + const dylib_id = dylib.id orelse unreachable; + var dylib_cmd = try commands.createLoadDylibCommand( + self.base.allocator, + dylib_id.name, + dylib_id.timestamp, + dylib_id.current_version, + dylib_id.compatibility_version, + ); + errdefer dylib_cmd.deinit(self.base.allocator); + try self.load_commands.append(self.base.allocator, .{ .Dylib = dylib_cmd }); + self.next_dylib_ordinal += 1; + try referenced.putNoClobber(dylib, {}); + } + + const resolv = self.symbol_resolver.getPtr(sym_name) orelse unreachable; + const undef = &self.undefs.items[resolv.where_index]; + const import_sym_index = @intCast(u32, self.imports.items.len); + try self.imports.append(self.base.allocator, .{ + .n_strx = undef.n_strx, + .n_type = macho.N_UNDF | macho.N_EXT, + .n_sect = 0, + .n_desc = packDylibOrdinal(dylib.ordinal.?), + .n_value = 0, + }); + resolv.* = .{ + .where = .import, + .where_index = import_sym_index, + }; + undef.* = .{ + .n_strx = 0, + .n_type = macho.N_UNDF, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }; + + continue :loop; + } + } + + // Fourth pass, handle synthetic symbols and flag any undefined references. + if (self.symbol_resolver.getPtr("___dso_handle")) |resolv| blk: { + if (resolv.where != .undef) break :blk; + + const undef = &self.undefs.items[resolv.where_index]; + const match: MatchingSection = .{ + .seg = self.text_segment_cmd_index.?, + .sect = self.text_section_index.?, + }; + const local_sym_index = @intCast(u32, self.locals.items.len); + var nlist = macho.nlist_64{ + .n_strx = undef.n_strx, + .n_type = macho.N_SECT, + .n_sect = self.sectionId(match), + .n_desc = 0, + .n_value = 0, + }; + try self.locals.append(self.base.allocator, nlist); + const global_sym_index = @intCast(u32, self.globals.items.len); + nlist.n_type |= macho.N_EXT; + nlist.n_desc = macho.N_WEAK_DEF; + try self.globals.append(self.base.allocator, nlist); + + undef.* = .{ + .n_strx = 0, + .n_type = macho.N_UNDF, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }; + resolv.* = .{ + .where = .global, + .where_index = global_sym_index, + .local_sym_index = local_sym_index, + }; + + // We create an empty atom for this symbol. + // TODO perhaps we should special-case special symbols? Create a separate + // linked list of atoms? + const block = try self.base.allocator.create(TextBlock); + errdefer self.base.allocator.destroy(block); + + block.* = TextBlock.empty; + block.local_sym_index = local_sym_index; + block.code = try self.base.allocator.alloc(u8, 0); + block.size = 0; + block.alignment = 0; + + if (self.blocks.getPtr(match)) |last| { + last.*.next = block; + block.prev = last.*; + last.* = block; + } else { + try self.blocks.putNoClobber(self.base.allocator, match, block); + } + } + + var has_undefined = false; + for (self.undefs.items) |sym| { + if (symbolIsNull(sym)) continue; + + const sym_name = self.getString(sym.n_strx); + const resolv = self.symbol_resolver.get(sym_name) orelse unreachable; + + log.err("undefined reference to symbol '{s}'", .{sym_name}); + log.err(" first referenced in '{s}'", .{self.objects.items[resolv.file].name.?}); + has_undefined = true; + } + + if (has_undefined) return error.UndefinedSymbolReference; +} + +fn parseTextBlocks(self: *MachO) !void { + for (self.objects.items) |object| { + try object.parseTextBlocks(self); + } +} + +fn populateMetadata(self: *MachO) !void { + if (self.pagezero_segment_cmd_index == null) { + self.pagezero_segment_cmd_index = @intCast(u16, self.load_commands.items.len); + try self.load_commands.append(self.base.allocator, .{ + .Segment = SegmentCommand.empty("__PAGEZERO", .{ + .vmsize = 0x100000000, // size always set to 4GB + }), + }); + } + + if (self.text_segment_cmd_index == null) { + self.text_segment_cmd_index = @intCast(u16, self.load_commands.items.len); + try self.load_commands.append(self.base.allocator, .{ + .Segment = SegmentCommand.empty("__TEXT", .{ + .vmaddr = 0x100000000, // always starts at 4GB + .maxprot = macho.VM_PROT_READ | macho.VM_PROT_EXECUTE, + .initprot = macho.VM_PROT_READ | macho.VM_PROT_EXECUTE, + }), + }); + } + + if (self.text_section_index == null) { + const text_seg = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; + self.text_section_index = @intCast(u16, text_seg.sections.items.len); + const alignment: u2 = switch (self.base.options.target.cpu.arch) { + .x86_64 => 0, + .aarch64 => 2, + else => unreachable, // unhandled architecture type + }; + try text_seg.addSection(self.base.allocator, "__text", .{ + .@"align" = alignment, + .flags = macho.S_REGULAR | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS, + }); + } + + if (self.stubs_section_index == null) { + const text_seg = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; + self.stubs_section_index = @intCast(u16, text_seg.sections.items.len); + const alignment: u2 = switch (self.base.options.target.cpu.arch) { + .x86_64 => 0, + .aarch64 => 2, + else => unreachable, // unhandled architecture type + }; + const stub_size: u4 = switch (self.base.options.target.cpu.arch) { + .x86_64 => 6, + .aarch64 => 3 * @sizeOf(u32), + else => unreachable, // unhandled architecture type + }; + try text_seg.addSection(self.base.allocator, "__stubs", .{ + .@"align" = alignment, + .flags = macho.S_SYMBOL_STUBS | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS, + .reserved2 = stub_size, + }); + } + + if (self.stub_helper_section_index == null) { + const text_seg = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; + self.stub_helper_section_index = @intCast(u16, text_seg.sections.items.len); + const alignment: u2 = switch (self.base.options.target.cpu.arch) { + .x86_64 => 0, + .aarch64 => 2, + else => unreachable, // unhandled architecture type + }; + const stub_helper_size: u6 = switch (self.base.options.target.cpu.arch) { + .x86_64 => 15, + .aarch64 => 6 * @sizeOf(u32), + else => unreachable, + }; + try text_seg.addSection(self.base.allocator, "__stub_helper", .{ + .size = stub_helper_size, + .@"align" = alignment, + .flags = macho.S_REGULAR | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS, + }); + } + + if (self.data_const_segment_cmd_index == null) { + self.data_const_segment_cmd_index = @intCast(u16, self.load_commands.items.len); + try self.load_commands.append(self.base.allocator, .{ + .Segment = SegmentCommand.empty("__DATA_CONST", .{ + .maxprot = macho.VM_PROT_READ | macho.VM_PROT_WRITE, + .initprot = macho.VM_PROT_READ | macho.VM_PROT_WRITE, + }), + }); + } + + if (self.got_section_index == null) { + const data_const_seg = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; + self.got_section_index = @intCast(u16, data_const_seg.sections.items.len); + try data_const_seg.addSection(self.base.allocator, "__got", .{ + .@"align" = 3, // 2^3 = @sizeOf(u64) + .flags = macho.S_NON_LAZY_SYMBOL_POINTERS, + }); + } + + if (self.data_segment_cmd_index == null) { + self.data_segment_cmd_index = @intCast(u16, self.load_commands.items.len); + try self.load_commands.append(self.base.allocator, .{ + .Segment = SegmentCommand.empty("__DATA", .{ + .maxprot = macho.VM_PROT_READ | macho.VM_PROT_WRITE, + .initprot = macho.VM_PROT_READ | macho.VM_PROT_WRITE, + }), + }); + } + + if (self.la_symbol_ptr_section_index == null) { + const data_seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; + self.la_symbol_ptr_section_index = @intCast(u16, data_seg.sections.items.len); + try data_seg.addSection(self.base.allocator, "__la_symbol_ptr", .{ + .@"align" = 3, // 2^3 = @sizeOf(u64) + .flags = macho.S_LAZY_SYMBOL_POINTERS, + }); + } + + if (self.data_section_index == null) { + const data_seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; + self.data_section_index = @intCast(u16, data_seg.sections.items.len); + try data_seg.addSection(self.base.allocator, "__data", .{ + .@"align" = 3, // 2^3 = @sizeOf(u64) + }); + } + + if (self.linkedit_segment_cmd_index == null) { + self.linkedit_segment_cmd_index = @intCast(u16, self.load_commands.items.len); + try self.load_commands.append(self.base.allocator, .{ + .Segment = SegmentCommand.empty("__LINKEDIT", .{ + .maxprot = macho.VM_PROT_READ, + .initprot = macho.VM_PROT_READ, + }), + }); + } + + if (self.dyld_info_cmd_index == null) { + self.dyld_info_cmd_index = @intCast(u16, self.load_commands.items.len); + try self.load_commands.append(self.base.allocator, .{ + .DyldInfoOnly = .{ + .cmd = macho.LC_DYLD_INFO_ONLY, + .cmdsize = @sizeOf(macho.dyld_info_command), + .rebase_off = 0, + .rebase_size = 0, + .bind_off = 0, + .bind_size = 0, + .weak_bind_off = 0, + .weak_bind_size = 0, + .lazy_bind_off = 0, + .lazy_bind_size = 0, + .export_off = 0, + .export_size = 0, + }, + }); + } + + if (self.symtab_cmd_index == null) { + self.symtab_cmd_index = @intCast(u16, self.load_commands.items.len); + try self.load_commands.append(self.base.allocator, .{ + .Symtab = .{ + .cmd = macho.LC_SYMTAB, + .cmdsize = @sizeOf(macho.symtab_command), + .symoff = 0, + .nsyms = 0, + .stroff = 0, + .strsize = 0, + }, + }); + } + + if (self.dysymtab_cmd_index == null) { + self.dysymtab_cmd_index = @intCast(u16, self.load_commands.items.len); + try self.load_commands.append(self.base.allocator, .{ + .Dysymtab = .{ + .cmd = macho.LC_DYSYMTAB, + .cmdsize = @sizeOf(macho.dysymtab_command), + .ilocalsym = 0, + .nlocalsym = 0, + .iextdefsym = 0, + .nextdefsym = 0, + .iundefsym = 0, + .nundefsym = 0, + .tocoff = 0, + .ntoc = 0, + .modtaboff = 0, + .nmodtab = 0, + .extrefsymoff = 0, + .nextrefsyms = 0, + .indirectsymoff = 0, + .nindirectsyms = 0, + .extreloff = 0, + .nextrel = 0, + .locreloff = 0, + .nlocrel = 0, + }, + }); + } + + if (self.dylinker_cmd_index == null) { + self.dylinker_cmd_index = @intCast(u16, self.load_commands.items.len); + const cmdsize = @intCast(u32, mem.alignForwardGeneric( + u64, + @sizeOf(macho.dylinker_command) + mem.lenZ(DEFAULT_DYLD_PATH), + @sizeOf(u64), + )); + var dylinker_cmd = commands.emptyGenericCommandWithData(macho.dylinker_command{ + .cmd = macho.LC_LOAD_DYLINKER, + .cmdsize = cmdsize, + .name = @sizeOf(macho.dylinker_command), + }); + dylinker_cmd.data = try self.base.allocator.alloc(u8, cmdsize - dylinker_cmd.inner.name); + mem.set(u8, dylinker_cmd.data, 0); + mem.copy(u8, dylinker_cmd.data, mem.spanZ(DEFAULT_DYLD_PATH)); + try self.load_commands.append(self.base.allocator, .{ .Dylinker = dylinker_cmd }); + } + + if (self.main_cmd_index == null and self.base.options.output_mode == .Exe) { + self.main_cmd_index = @intCast(u16, self.load_commands.items.len); + try self.load_commands.append(self.base.allocator, .{ + .Main = .{ + .cmd = macho.LC_MAIN, + .cmdsize = @sizeOf(macho.entry_point_command), + .entryoff = 0x0, + .stacksize = 0, + }, + }); + } + + if (self.dylib_id_cmd_index == null and self.base.options.output_mode == .Lib) { + self.dylib_id_cmd_index = @intCast(u16, self.load_commands.items.len); + const install_name = try std.fmt.allocPrint(self.base.allocator, "@rpath/{s}", .{ + self.base.options.emit.?.sub_path, + }); + defer self.base.allocator.free(install_name); + var dylib_cmd = try commands.createLoadDylibCommand( + self.base.allocator, + install_name, + 2, + 0x10000, // TODO forward user-provided versions + 0x10000, + ); + errdefer dylib_cmd.deinit(self.base.allocator); + dylib_cmd.inner.cmd = macho.LC_ID_DYLIB; + try self.load_commands.append(self.base.allocator, .{ .Dylib = dylib_cmd }); + } + + if (self.version_min_cmd_index == null) { + self.version_min_cmd_index = @intCast(u16, self.load_commands.items.len); + const cmd: u32 = switch (self.base.options.target.os.tag) { + .macos => macho.LC_VERSION_MIN_MACOSX, + .ios => macho.LC_VERSION_MIN_IPHONEOS, + .tvos => macho.LC_VERSION_MIN_TVOS, + .watchos => macho.LC_VERSION_MIN_WATCHOS, + else => unreachable, // wrong OS + }; + const ver = self.base.options.target.os.version_range.semver.min; + const version = ver.major << 16 | ver.minor << 8 | ver.patch; + try self.load_commands.append(self.base.allocator, .{ + .VersionMin = .{ + .cmd = cmd, + .cmdsize = @sizeOf(macho.version_min_command), + .version = version, + .sdk = version, + }, + }); + } + + if (self.source_version_cmd_index == null) { + self.source_version_cmd_index = @intCast(u16, self.load_commands.items.len); + try self.load_commands.append(self.base.allocator, .{ + .SourceVersion = .{ + .cmd = macho.LC_SOURCE_VERSION, + .cmdsize = @sizeOf(macho.source_version_command), + .version = 0x0, + }, + }); + } + + if (self.uuid_cmd_index == null) { + self.uuid_cmd_index = @intCast(u16, self.load_commands.items.len); + var uuid_cmd: macho.uuid_command = .{ + .cmd = macho.LC_UUID, + .cmdsize = @sizeOf(macho.uuid_command), + .uuid = undefined, + }; + std.crypto.random.bytes(&uuid_cmd.uuid); + try self.load_commands.append(self.base.allocator, .{ .Uuid = uuid_cmd }); + } +} + +fn addDataInCodeLC(self: *MachO) !void { + if (self.data_in_code_cmd_index == null) { + self.data_in_code_cmd_index = @intCast(u16, self.load_commands.items.len); + try self.load_commands.append(self.base.allocator, .{ + .LinkeditData = .{ + .cmd = macho.LC_DATA_IN_CODE, + .cmdsize = @sizeOf(macho.linkedit_data_command), + .dataoff = 0, + .datasize = 0, + }, + }); + } +} + +fn addCodeSignatureLC(self: *MachO) !void { + if (self.code_signature_cmd_index == null and self.base.options.target.cpu.arch == .aarch64) { + self.code_signature_cmd_index = @intCast(u16, self.load_commands.items.len); + try self.load_commands.append(self.base.allocator, .{ + .LinkeditData = .{ + .cmd = macho.LC_CODE_SIGNATURE, + .cmdsize = @sizeOf(macho.linkedit_data_command), + .dataoff = 0, + .datasize = 0, + }, + }); + } +} + +fn addRpaths(self: *MachO, rpaths: []const []const u8) !void { + for (rpaths) |rpath| { + const cmdsize = @intCast(u32, mem.alignForwardGeneric( + u64, + @sizeOf(macho.rpath_command) + rpath.len + 1, + @sizeOf(u64), + )); + var rpath_cmd = commands.emptyGenericCommandWithData(macho.rpath_command{ + .cmd = macho.LC_RPATH, + .cmdsize = cmdsize, + .path = @sizeOf(macho.rpath_command), + }); + rpath_cmd.data = try self.base.allocator.alloc(u8, cmdsize - rpath_cmd.inner.path); + mem.set(u8, rpath_cmd.data, 0); + mem.copy(u8, rpath_cmd.data, rpath); + try self.load_commands.append(self.base.allocator, .{ .Rpath = rpath_cmd }); + } +} + +fn flushZld(self: *MachO) !void { + try self.writeTextBlocks(); + try self.writeStubHelperCommon(); + + if (self.common_section_index) |index| { + const seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; + const sect = &seg.sections.items[index]; + sect.offset = 0; + } + + if (self.bss_section_index) |index| { + const seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; + const sect = &seg.sections.items[index]; + sect.offset = 0; + } + + if (self.tlv_bss_section_index) |index| { + const seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; + const sect = &seg.sections.items[index]; + sect.offset = 0; + } + + try self.writeGotEntries(); + try self.setEntryPoint(); + try self.writeRebaseInfoTable(); + try self.writeBindInfoTable(); + try self.writeLazyBindInfoTable(); + try self.writeExportInfo(); + try self.writeDices(); + + { + const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; + const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab; + symtab.symoff = @intCast(u32, seg.inner.fileoff + seg.inner.filesize); + } + + try self.writeSymbolTable(); + try self.writeStringTable(); + + { + // Seal __LINKEDIT size + const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; + seg.inner.vmsize = mem.alignForwardGeneric(u64, seg.inner.filesize, self.page_size); + } + + if (self.base.options.target.cpu.arch == .aarch64) { + try self.writeCodeSignaturePadding(); + } + + try self.writeLoadCommands(); + try self.writeHeader(); + + if (self.base.options.target.cpu.arch == .aarch64) { + try self.writeCodeSignature(); + } + + // if (comptime std.Target.current.isDarwin() and std.Target.current.cpu.arch == .aarch64) { + // const out_path = self.output.?.path; + // try fs.cwd().copyFile(out_path, fs.cwd(), out_path, .{}); + // } +} + +fn writeGotEntries(self: *MachO) !void { + const seg = self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; + const sect = seg.sections.items[self.got_section_index.?]; + + var buffer = try self.base.allocator.alloc(u8, self.got_entries.items.len * @sizeOf(u64)); + defer self.base.allocator.free(buffer); + + var stream = std.io.fixedBufferStream(buffer); + var writer = stream.writer(); + + for (self.got_entries.items) |key| { + const address: u64 = switch (key.where) { + .local => self.locals.items[key.where_index].n_value, + .import => 0, + }; + try writer.writeIntLittle(u64, address); + } + + log.debug("writing GOT pointers at 0x{x} to 0x{x}", .{ sect.offset, sect.offset + buffer.len }); + + try self.base.file.?.pwriteAll(buffer, sect.offset); +} + +fn setEntryPoint(self: *MachO) !void { + if (self.base.options.output_mode != .Exe) return; + + // TODO we should respect the -entry flag passed in by the user to set a custom + // entrypoint. For now, assume default of `_main`. + const seg = self.load_commands.items[self.text_segment_cmd_index.?].Segment; + const resolv = self.symbol_resolver.get("_main") orelse { + log.err("'_main' export not found", .{}); + return error.MissingMainEntrypoint; + }; + assert(resolv.where == .global); + const sym = self.globals.items[resolv.where_index]; + const ec = &self.load_commands.items[self.main_cmd_index.?].Main; + ec.entryoff = @intCast(u32, sym.n_value - seg.inner.vmaddr); + ec.stacksize = self.base.options.stack_size_override orelse 0; +} + +fn writeSymbolTable(self: *MachO) !void { + const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; + const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab; + + var locals = std.ArrayList(macho.nlist_64).init(self.base.allocator); + defer locals.deinit(); + try locals.appendSlice(self.locals.items); + + if (self.has_stabs) { + for (self.objects.items) |object| { + if (object.debug_info == null) continue; + + // Open scope + try locals.ensureUnusedCapacity(4); + locals.appendAssumeCapacity(.{ + .n_strx = try self.makeString(object.tu_comp_dir.?), + .n_type = macho.N_SO, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }); + locals.appendAssumeCapacity(.{ + .n_strx = try self.makeString(object.tu_name.?), + .n_type = macho.N_SO, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }); + locals.appendAssumeCapacity(.{ + .n_strx = try self.makeString(object.name.?), + .n_type = macho.N_OSO, + .n_sect = 0, + .n_desc = 1, + .n_value = object.mtime orelse 0, + }); + + for (object.text_blocks.items) |block| { + if (block.stab) |stab| { + const nlists = try stab.asNlists(block.local_sym_index, self); + defer self.base.allocator.free(nlists); + try locals.appendSlice(nlists); + } else { + for (block.contained.items) |sym_at_off| { + const stab = sym_at_off.stab orelse continue; + const nlists = try stab.asNlists(sym_at_off.local_sym_index, self); + defer self.base.allocator.free(nlists); + try locals.appendSlice(nlists); + } + } + } + + // Close scope + locals.appendAssumeCapacity(.{ + .n_strx = 0, + .n_type = macho.N_SO, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }); + } + } + + const nlocals = locals.items.len; + const nexports = self.globals.items.len; + const nundefs = self.imports.items.len; + + const locals_off = symtab.symoff + symtab.nsyms * @sizeOf(macho.nlist_64); + const locals_size = nlocals * @sizeOf(macho.nlist_64); + log.debug("writing local symbols from 0x{x} to 0x{x}", .{ locals_off, locals_size + locals_off }); + try self.base.file.?.pwriteAll(mem.sliceAsBytes(locals.items), locals_off); + + const exports_off = locals_off + locals_size; + const exports_size = nexports * @sizeOf(macho.nlist_64); + log.debug("writing exported symbols from 0x{x} to 0x{x}", .{ exports_off, exports_size + exports_off }); + try self.base.file.?.pwriteAll(mem.sliceAsBytes(self.globals.items), exports_off); + + const undefs_off = exports_off + exports_size; + const undefs_size = nundefs * @sizeOf(macho.nlist_64); + log.debug("writing undefined symbols from 0x{x} to 0x{x}", .{ undefs_off, undefs_size + undefs_off }); + try self.base.file.?.pwriteAll(mem.sliceAsBytes(self.imports.items), undefs_off); + + symtab.nsyms += @intCast(u32, nlocals + nexports + nundefs); + seg.inner.filesize += locals_size + exports_size + undefs_size; + + // Update dynamic symbol table. + const dysymtab = &self.load_commands.items[self.dysymtab_cmd_index.?].Dysymtab; + dysymtab.nlocalsym += @intCast(u32, nlocals); + dysymtab.iextdefsym = dysymtab.nlocalsym; + dysymtab.nextdefsym = @intCast(u32, nexports); + dysymtab.iundefsym = dysymtab.nlocalsym + dysymtab.nextdefsym; + dysymtab.nundefsym = @intCast(u32, nundefs); + + const text_segment = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; + const stubs = &text_segment.sections.items[self.stubs_section_index.?]; + const data_const_segment = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; + const got = &data_const_segment.sections.items[self.got_section_index.?]; + const data_segment = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; + const la_symbol_ptr = &data_segment.sections.items[self.la_symbol_ptr_section_index.?]; + + const nstubs = @intCast(u32, self.stubs.items.len); + const ngot_entries = @intCast(u32, self.got_entries.items.len); + + dysymtab.indirectsymoff = @intCast(u32, seg.inner.fileoff + seg.inner.filesize); + dysymtab.nindirectsyms = nstubs * 2 + ngot_entries; + + const needed_size = dysymtab.nindirectsyms * @sizeOf(u32); + seg.inner.filesize += needed_size; + + log.debug("writing indirect symbol table from 0x{x} to 0x{x}", .{ + dysymtab.indirectsymoff, + dysymtab.indirectsymoff + needed_size, + }); + + var buf = try self.base.allocator.alloc(u8, needed_size); + defer self.base.allocator.free(buf); + + var stream = std.io.fixedBufferStream(buf); + var writer = stream.writer(); + + stubs.reserved1 = 0; + for (self.stubs.items) |id| { + try writer.writeIntLittle(u32, dysymtab.iundefsym + id); + } + + got.reserved1 = nstubs; + for (self.got_entries.items) |entry| { + switch (entry.where) { + .import => { + try writer.writeIntLittle(u32, dysymtab.iundefsym + entry.where_index); + }, + .local => { + try writer.writeIntLittle(u32, macho.INDIRECT_SYMBOL_LOCAL); + }, + } + } + + la_symbol_ptr.reserved1 = got.reserved1 + ngot_entries; + for (self.stubs.items) |id| { + try writer.writeIntLittle(u32, dysymtab.iundefsym + id); + } + + try self.base.file.?.pwriteAll(buf, dysymtab.indirectsymoff); +} + pub fn deinit(self: *MachO) void { if (self.d_sym) |*ds| { ds.deinit(self.base.allocator); @@ -970,6 +3014,8 @@ pub fn deinit(self: *MachO) void { self.stubs.deinit(self.base.allocator); self.stubs_map.deinit(self.base.allocator); self.strtab.deinit(self.base.allocator); + self.undefs.deinit(self.base.allocator); + self.tentatives.deinit(self.base.allocator); self.imports.deinit(self.base.allocator); self.globals.deinit(self.base.allocator); self.globals_free_list.deinit(self.base.allocator); @@ -981,10 +3027,40 @@ pub fn deinit(self: *MachO) void { } self.symbol_resolver.deinit(self.base.allocator); + for (self.objects.items) |object| { + object.deinit(); + self.base.allocator.destroy(object); + } + self.objects.deinit(self.base.allocator); + + for (self.archives.items) |archive| { + archive.deinit(); + self.base.allocator.destroy(archive); + } + self.archives.deinit(self.base.allocator); + + for (self.dylibs.items) |dylib| { + dylib.deinit(); + self.base.allocator.destroy(dylib); + } + self.dylibs.deinit(self.base.allocator); + for (self.load_commands.items) |*lc| { lc.deinit(self.base.allocator); } self.load_commands.deinit(self.base.allocator); + + // TODO dealloc all blocks + self.blocks.deinit(self.base.allocator); +} + +pub fn closeFiles(self: MachO) void { + for (self.objects.items) |object| { + object.closeFile(); + } + for (self.archives.items) |archive| { + archive.closeFile(); + } } fn freeTextBlock(self: *MachO, text_block: *TextBlock) void { @@ -2664,6 +4740,60 @@ fn writeIndirectSymbolTable(self: *MachO) !void { self.load_commands_dirty = true; } +fn writeDices(self: *MachO) !void { + if (!self.has_dices) return; + + const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; + const dice_cmd = &self.load_commands.items[self.data_in_code_cmd_index.?].LinkeditData; + const fileoff = seg.inner.fileoff + seg.inner.filesize; + + var buf = std.ArrayList(u8).init(self.base.allocator); + defer buf.deinit(); + + var block: *TextBlock = self.blocks.get(.{ + .seg = self.text_segment_cmd_index orelse return, + .sect = self.text_section_index orelse return, + }) orelse return; + + while (block.prev) |prev| { + block = prev; + } + + const text_seg = self.load_commands.items[self.text_segment_cmd_index.?].Segment; + const text_sect = text_seg.sections.items[self.text_section_index.?]; + + while (true) { + if (block.dices.items.len > 0) { + const sym = self.locals.items[block.local_sym_index]; + const base_off = try math.cast(u32, sym.n_value - text_sect.addr + text_sect.offset); + + try buf.ensureUnusedCapacity(block.dices.items.len * @sizeOf(macho.data_in_code_entry)); + for (block.dices.items) |dice| { + const rebased_dice = macho.data_in_code_entry{ + .offset = base_off + dice.offset, + .length = dice.length, + .kind = dice.kind, + }; + buf.appendSliceAssumeCapacity(mem.asBytes(&rebased_dice)); + } + } + + if (block.next) |next| { + block = next; + } else break; + } + + const datasize = @intCast(u32, buf.items.len); + + dice_cmd.dataoff = @intCast(u32, fileoff); + dice_cmd.datasize = datasize; + seg.inner.filesize += datasize; + + log.debug("writing data-in-code from 0x{x} to 0x{x}", .{ fileoff, fileoff + datasize }); + + try self.base.file.?.pwriteAll(buf.items, fileoff); +} + fn writeCodeSignaturePadding(self: *MachO) !void { // TODO figure out how not to rewrite padding every single time. const tracy = trace(@src()); @@ -2719,7 +4849,7 @@ fn writeCodeSignature(self: *MachO) !void { try self.base.file.?.pwriteAll(buffer, code_sig_cmd.dataoff); } -fn writeExportTrie(self: *MachO) !void { +fn writeExportInfo(self: *MachO) !void { if (!self.export_info_dirty) return; if (self.globals.items.len == 0) return; @@ -2779,6 +4909,34 @@ fn writeRebaseInfoTable(self: *MachO) !void { var pointers = std.ArrayList(bind.Pointer).init(self.base.allocator); defer pointers.deinit(); + { + var it = self.blocks.iterator(); + while (it.next()) |entry| { + const match = entry.key_ptr.*; + var block: *TextBlock = entry.value_ptr.*; + + if (match.seg == self.text_segment_cmd_index.?) continue; // __TEXT is non-writable + + const seg = self.load_commands.items[match.seg].Segment; + + while (true) { + const sym = self.locals.items[block.local_sym_index]; + const base_offset = sym.n_value - seg.inner.vmaddr; + + for (block.rebases.items) |offset| { + try pointers.append(.{ + .offset = base_offset + offset, + .segment_id = match.seg, + }); + } + + if (block.prev) |prev| { + block = prev; + } else break; + } + } + } + if (self.got_section_index) |idx| { const seg = self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; const sect = seg.sections.items[idx]; @@ -2837,7 +4995,7 @@ fn writeRebaseInfoTable(self: *MachO) !void { self.rebase_info_dirty = false; } -fn writeBindingInfoTable(self: *MachO) !void { +fn writeBindInfoTable(self: *MachO) !void { if (!self.binding_info_dirty) return; const tracy = trace(@src()); @@ -2865,6 +5023,37 @@ fn writeBindingInfoTable(self: *MachO) !void { } } + { + var it = self.blocks.iterator(); + while (it.next()) |entry| { + const match = entry.key_ptr.*; + var block: *TextBlock = entry.value_ptr.*; + + if (match.seg == self.text_segment_cmd_index.?) continue; // __TEXT is non-writable + + const seg = self.load_commands.items[match.seg].Segment; + + while (true) { + const sym = self.locals.items[block.local_sym_index]; + const base_offset = sym.n_value - seg.inner.vmaddr; + + for (block.bindings.items) |binding| { + const bind_sym = self.imports.items[binding.local_sym_index]; + try pointers.append(.{ + .offset = binding.offset + base_offset, + .segment_id = match.seg, + .dylib_ordinal = unpackDylibOrdinal(bind_sym.n_desc), + .name = self.getString(bind_sym.n_strx), + }); + } + + if (block.prev) |prev| { + block = prev; + } else break; + } + } + } + const size = try bind.bindInfoSize(pointers.items); var buffer = try self.base.allocator.alloc(u8, @intCast(usize, size)); defer self.base.allocator.free(buffer); @@ -2890,7 +5079,7 @@ fn writeBindingInfoTable(self: *MachO) !void { self.binding_info_dirty = false; } -fn writeLazyBindingInfoTable(self: *MachO) !void { +fn writeLazyBindInfoTable(self: *MachO) !void { if (!self.lazy_binding_info_dirty) return; const tracy = trace(@src()); @@ -3134,7 +5323,7 @@ fn writeHeader(self: *MachO) !void { else => unreachable, } - if (self.hasTlvDescriptors()) { + if (self.tlv_section_index) |_| { header.flags |= macho.MH_HAS_TLV_DESCRIPTORS; } @@ -3156,10 +5345,6 @@ pub fn padToIdeal(actual_size: anytype) @TypeOf(actual_size) { std.math.maxInt(@TypeOf(actual_size)); } -fn hasTlvDescriptors(_: *MachO) bool { - return false; -} - pub fn makeString(self: *MachO, string: []const u8) !u32 { try self.strtab.ensureUnusedCapacity(self.base.allocator, string.len + 1); const new_off = @intCast(u32, self.strtab.items.len); @@ -3177,6 +5362,92 @@ pub fn getString(self: *MachO, off: u32) []const u8 { return mem.spanZ(@ptrCast([*:0]const u8, self.strtab.items.ptr + off)); } +pub fn symbolIsStab(sym: macho.nlist_64) bool { + return (macho.N_STAB & sym.n_type) != 0; +} + +pub fn symbolIsPext(sym: macho.nlist_64) bool { + return (macho.N_PEXT & sym.n_type) != 0; +} + +pub fn symbolIsExt(sym: macho.nlist_64) bool { + return (macho.N_EXT & sym.n_type) != 0; +} + +pub fn symbolIsSect(sym: macho.nlist_64) bool { + const type_ = macho.N_TYPE & sym.n_type; + return type_ == macho.N_SECT; +} + +pub fn symbolIsUndf(sym: macho.nlist_64) bool { + const type_ = macho.N_TYPE & sym.n_type; + return type_ == macho.N_UNDF; +} + +pub fn symbolIsIndr(sym: macho.nlist_64) bool { + const type_ = macho.N_TYPE & sym.n_type; + return type_ == macho.N_INDR; +} + +pub fn symbolIsAbs(sym: macho.nlist_64) bool { + const type_ = macho.N_TYPE & sym.n_type; + return type_ == macho.N_ABS; +} + +pub fn symbolIsWeakDef(sym: macho.nlist_64) bool { + return (sym.n_desc & macho.N_WEAK_DEF) != 0; +} + +pub fn symbolIsWeakRef(sym: macho.nlist_64) bool { + return (sym.n_desc & macho.N_WEAK_REF) != 0; +} + +pub fn symbolIsTentative(sym: macho.nlist_64) bool { + if (!symbolIsUndf(sym)) return false; + return sym.n_value != 0; +} + +pub fn symbolIsNull(sym: macho.nlist_64) bool { + return sym.n_value == 0 and sym.n_desc == 0 and sym.n_type == 0 and sym.n_strx == 0 and sym.n_sect == 0; +} + +pub fn symbolIsTemp(sym: macho.nlist_64, sym_name: []const u8) bool { + if (!symbolIsSect(sym)) return false; + if (symbolIsExt(sym)) return false; + return mem.startsWith(u8, sym_name, "l") or mem.startsWith(u8, sym_name, "L"); +} + +pub fn sectionId(self: MachO, match: MatchingSection) u8 { + // TODO there might be a more generic way of doing this. + var section: u8 = 0; + for (self.load_commands.items) |cmd, cmd_id| { + if (cmd != .Segment) break; + if (cmd_id == match.seg) { + section += @intCast(u8, match.sect) + 1; + break; + } + section += @intCast(u8, cmd.Segment.sections.items.len); + } + return section; +} + +pub fn unpackSectionId(self: MachO, section_id: u8) MatchingSection { + var match: MatchingSection = undefined; + var section: u8 = 0; + outer: for (self.load_commands.items) |cmd, cmd_id| { + assert(cmd == .Segment); + for (cmd.Segment.sections.items) |_, sect_id| { + section += 1; + if (section_id == section) { + match.seg = @intCast(u16, cmd_id); + match.sect = @intCast(u16, sect_id); + break :outer; + } + } + } + return match; +} + fn packDylibOrdinal(ordinal: u16) u16 { return ordinal * macho.N_SYMBOL_RESOLVER; } @@ -3184,3 +5455,16 @@ fn packDylibOrdinal(ordinal: u16) u16 { fn unpackDylibOrdinal(pack: u16) u16 { return @divExact(pack, macho.N_SYMBOL_RESOLVER); } + +pub fn findFirst(comptime T: type, haystack: []T, start: usize, predicate: anytype) usize { + if (!@hasDecl(@TypeOf(predicate), "predicate")) + @compileError("Predicate is required to define fn predicate(@This(), T) bool"); + + if (start == haystack.len) return start; + + var i = start; + while (i < haystack.len) : (i += 1) { + if (predicate.predicate(haystack[i])) break; + } + return i; +} diff --git a/src/link/MachO/Dylib.zig b/src/link/MachO/Dylib.zig index ca71b7613c..0e7f95a3d5 100644 --- a/src/link/MachO/Dylib.zig +++ b/src/link/MachO/Dylib.zig @@ -13,7 +13,7 @@ const fat = @import("fat.zig"); const Allocator = mem.Allocator; const Arch = std.Target.Cpu.Arch; const LibStub = @import("../tapi.zig").LibStub; -const Zld = @import("Zld.zig"); +const MachO = @import("../MachO.zig"); usingnamespace @import("commands.zig"); @@ -324,7 +324,7 @@ fn parseSymbols(self: *Dylib) !void { _ = try self.file.?.preadAll(strtab, symtab_cmd.stroff + self.library_offset); for (slice) |sym| { - const add_to_symtab = Zld.symbolIsExt(sym) and (Zld.symbolIsSect(sym) or Zld.symbolIsIndr(sym)); + const add_to_symtab = MachO.symbolIsExt(sym) and (MachO.symbolIsSect(sym) or MachO.symbolIsIndr(sym)); if (!add_to_symtab) continue; diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index f8e88673a1..eaab05140e 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -13,8 +13,8 @@ const sort = std.sort; const Allocator = mem.Allocator; const Arch = std.Target.Cpu.Arch; +const MachO = @import("../MachO.zig"); const TextBlock = @import("TextBlock.zig"); -const Zld = @import("Zld.zig"); usingnamespace @import("commands.zig"); @@ -307,8 +307,8 @@ const NlistWithIndex = struct { } }; - const start = Zld.findFirst(NlistWithIndex, symbols, 0, Predicate{ .addr = sect.addr }); - const end = Zld.findFirst(NlistWithIndex, symbols, start, Predicate{ .addr = sect.addr + sect.size }); + const start = MachO.findFirst(NlistWithIndex, symbols, 0, Predicate{ .addr = sect.addr }); + const end = MachO.findFirst(NlistWithIndex, symbols, start, Predicate{ .addr = sect.addr + sect.size }); return symbols[start..end]; } @@ -323,8 +323,8 @@ fn filterDice(dices: []macho.data_in_code_entry, start_addr: u64, end_addr: u64) } }; - const start = Zld.findFirst(macho.data_in_code_entry, dices, 0, Predicate{ .addr = start_addr }); - const end = Zld.findFirst(macho.data_in_code_entry, dices, start, Predicate{ .addr = end_addr }); + const start = MachO.findFirst(macho.data_in_code_entry, dices, 0, Predicate{ .addr = start_addr }); + const end = MachO.findFirst(macho.data_in_code_entry, dices, start, Predicate{ .addr = end_addr }); return dices[start..end]; } @@ -335,10 +335,10 @@ const TextBlockParser = struct { code: []u8, relocs: []macho.relocation_info, object: *Object, - zld: *Zld, + macho_file: *MachO, nlists: []NlistWithIndex, index: u32 = 0, - match: Zld.MatchingSection, + match: MachO.MatchingSection, fn peek(self: *TextBlockParser) ?NlistWithIndex { return if (self.index + 1 < self.nlists.len) self.nlists[self.index + 1] else null; @@ -349,10 +349,10 @@ const TextBlockParser = struct { }; fn lessThanBySeniority(context: SeniorityContext, lhs: NlistWithIndex, rhs: NlistWithIndex) bool { - if (!Zld.symbolIsExt(rhs.nlist)) { - return Zld.symbolIsTemp(lhs.nlist, context.object.getString(lhs.nlist.n_strx)); - } else if (Zld.symbolIsPext(rhs.nlist) or Zld.symbolIsWeakDef(rhs.nlist)) { - return !Zld.symbolIsExt(lhs.nlist); + if (!MachO.symbolIsExt(rhs.nlist)) { + return MachO.symbolIsTemp(lhs.nlist, context.object.getString(lhs.nlist.n_strx)); + } else if (MachO.symbolIsPext(rhs.nlist) or MachO.symbolIsWeakDef(rhs.nlist)) { + return !MachO.symbolIsExt(lhs.nlist); } else { return true; } @@ -383,7 +383,7 @@ const TextBlockParser = struct { const sym = self.object.symbols.items[nlist_with_index.index]; if (sym.payload != .regular) { log.err("expected a regular symbol, found {s}", .{sym.payload}); - log.err(" when remapping {s}", .{self.zld.getString(sym.strx)}); + log.err(" when remapping {s}", .{self.macho_file.getString(sym.strx)}); return error.SymbolIsNotRegular; } assert(sym.payload.regular.local_sym_index != 0); // This means the symbol has not been properly resolved. @@ -401,7 +401,7 @@ const TextBlockParser = struct { } const senior_nlist = aliases.pop(); - const senior_sym = self.zld.locals.items[senior_nlist.index]; + const senior_sym = self.macho_file.locals.items[senior_nlist.index]; assert(senior_sym.payload == .regular); senior_sym.payload.regular.segment_id = self.match.seg; senior_sym.payload.regular.section_id = self.match.sect; @@ -429,7 +429,7 @@ const TextBlockParser = struct { } } } - if (self.zld.globals.contains(self.zld.getString(senior_sym.strx))) break :blk .global; + if (self.macho_file.globals.contains(self.macho_file.getString(senior_sym.strx))) break :blk .global; break :blk .static; } else null; @@ -448,19 +448,19 @@ const TextBlockParser = struct { for (aliases.items) |alias| { block.aliases.appendAssumeCapacity(alias.index); - const sym = self.zld.locals.items[alias.index]; + const sym = self.macho_file.locals.items[alias.index]; const reg = &sym.payload.regular; reg.segment_id = self.match.seg; reg.section_id = self.match.sect; } } - try block.parseRelocsFromObject(relocs, object, .{ + try block.parseRelocsFromObject(self.allocator, relocs, object, .{ .base_addr = start_addr, - .zld = self.zld, + .macho_file = self.macho_file, }); - if (self.zld.has_dices) { + if (self.macho_file.has_dices) { const dices = filterDice( self.object.data_in_code_entries.items, senior_nlist.nlist.n_value, @@ -483,7 +483,7 @@ const TextBlockParser = struct { } }; -pub fn parseTextBlocks(self: *Object, zld: *Zld) !void { +pub fn parseTextBlocks(self: *Object, macho_file: *MachO) !void { const seg = self.load_commands.items[self.segment_cmd_index.?].Segment; log.debug("analysing {s}", .{self.name.?}); @@ -513,7 +513,7 @@ pub fn parseTextBlocks(self: *Object, zld: *Zld) !void { }); // Get matching segment/section in the final artifact. - const match = (try zld.getMatchingSection(sect)) orelse { + const match = (try macho_file.getMatchingSection(sect)) orelse { log.debug("unhandled section", .{}); continue; }; @@ -538,7 +538,7 @@ pub fn parseTextBlocks(self: *Object, zld: *Zld) !void { // duplicates at all? Need some benchmarks! // const is_splittable = false; - zld.has_dices = blk: { + macho_file.has_dices = blk: { if (self.text_section_index) |index| { if (index != id) break :blk false; if (self.data_in_code_entries.items.len == 0) break :blk false; @@ -546,7 +546,7 @@ pub fn parseTextBlocks(self: *Object, zld: *Zld) !void { } break :blk false; }; - zld.has_stabs = zld.has_stabs or self.debug_info != null; + macho_file.has_stabs = macho_file.has_stabs or self.debug_info != null; { // next: { @@ -711,11 +711,11 @@ pub fn parseTextBlocks(self: *Object, zld: *Zld) !void { defer self.allocator.free(sym_name); const block_local_sym_index = self.sections_as_symbols.get(sect_id) orelse blk: { - const block_local_sym_index = @intCast(u32, zld.locals.items.len); - try zld.locals.append(zld.allocator, .{ - .n_strx = try zld.makeString(sym_name), + const block_local_sym_index = @intCast(u32, macho_file.locals.items.len); + try macho_file.locals.append(macho_file.base.allocator, .{ + .n_strx = try macho_file.makeString(sym_name), .n_type = macho.N_SECT, - .n_sect = zld.sectionId(match), + .n_sect = macho_file.sectionId(match), .n_desc = 0, .n_value = sect.addr, }); @@ -726,20 +726,20 @@ pub fn parseTextBlocks(self: *Object, zld: *Zld) !void { const block = try self.allocator.create(TextBlock); errdefer self.allocator.destroy(block); - block.* = TextBlock.init(self.allocator); + block.* = TextBlock.empty; block.local_sym_index = block_local_sym_index; block.code = try self.allocator.dupe(u8, code); block.size = sect.size; block.alignment = sect.@"align"; - try block.parseRelocsFromObject(relocs, self, .{ + try block.parseRelocsFromObject(self.allocator, relocs, self, .{ .base_addr = 0, - .zld = zld, + .macho_file = macho_file, }); - if (zld.has_dices) { + if (macho_file.has_dices) { const dices = filterDice(self.data_in_code_entries.items, sect.addr, sect.addr + sect.size); - try block.dices.ensureTotalCapacity(dices.len); + try block.dices.ensureTotalCapacity(self.allocator, dices.len); for (dices) |dice| { block.dices.appendAssumeCapacity(.{ @@ -755,15 +755,13 @@ pub fn parseTextBlocks(self: *Object, zld: *Zld) !void { // the filtered symbols and note which symbol is contained within so that // we can properly allocate addresses down the line. // While we're at it, we need to update segment,section mapping of each symbol too. - var contained = std.ArrayList(TextBlock.SymbolAtOffset).init(self.allocator); - defer contained.deinit(); - try contained.ensureTotalCapacity(filtered_nlists.len); + try block.contained.ensureTotalCapacity(self.allocator, filtered_nlists.len); for (filtered_nlists) |nlist_with_index| { const nlist = nlist_with_index.nlist; const local_sym_index = self.symbol_mapping.get(nlist_with_index.index) orelse unreachable; - const local = &zld.locals.items[local_sym_index]; - local.n_sect = zld.sectionId(match); + const local = &macho_file.locals.items[local_sym_index]; + local.n_sect = macho_file.sectionId(match); const stab: ?TextBlock.Stab = if (self.debug_info) |di| blk: { // TODO there has to be a better to handle this. @@ -781,19 +779,17 @@ pub fn parseTextBlocks(self: *Object, zld: *Zld) !void { break :blk .static; } else null; - contained.appendAssumeCapacity(.{ + block.contained.appendAssumeCapacity(.{ .local_sym_index = local_sym_index, .offset = nlist.n_value - sect.addr, .stab = stab, }); } - block.contained = contained.toOwnedSlice(); - // Update target section's metadata // TODO should we update segment's size here too? // How does it tie with incremental space allocs? - const tseg = &zld.load_commands.items[match.seg].Segment; + const tseg = &macho_file.load_commands.items[match.seg].Segment; const tsect = &tseg.sections.items[match.sect]; const new_alignment = math.max(tsect.@"align", block.alignment); const new_alignment_pow_2 = try math.powi(u32, 2, new_alignment); @@ -801,12 +797,12 @@ pub fn parseTextBlocks(self: *Object, zld: *Zld) !void { tsect.size = new_size; tsect.@"align" = new_alignment; - if (zld.blocks.getPtr(match)) |last| { + if (macho_file.blocks.getPtr(match)) |last| { last.*.next = block; block.prev = last.*; last.* = block; } else { - try zld.blocks.putNoClobber(zld.allocator, match, block); + try macho_file.blocks.putNoClobber(self.allocator, match, block); } try self.text_blocks.append(self.allocator, block); @@ -814,7 +810,7 @@ pub fn parseTextBlocks(self: *Object, zld: *Zld) !void { } } -pub fn symbolFromReloc(self: *Object, zld: *Zld, rel: macho.relocation_info) !*Symbol { +pub fn symbolFromReloc(self: *Object, macho_file: *MachO, rel: macho.relocation_info) !*Symbol { const symbol = blk: { if (rel.r_extern == 1) { break :blk self.symbols.items[rel.r_symbolnum]; @@ -832,9 +828,9 @@ pub fn symbolFromReloc(self: *Object, zld: *Zld, rel: macho.relocation_info) !*S sectionName(sect), }); defer self.allocator.free(name); - const symbol = try zld.allocator.create(Symbol); + const symbol = try macho_file.allocator.create(Symbol); symbol.* = .{ - .strx = try zld.makeString(name), + .strx = try macho_file.makeString(name), .payload = .{ .regular = .{ .linkage = .translation_unit, diff --git a/src/link/MachO/TextBlock.zig b/src/link/MachO/TextBlock.zig index ad2d4c11cf..0b6ff20d94 100644 --- a/src/link/MachO/TextBlock.zig +++ b/src/link/MachO/TextBlock.zig @@ -12,24 +12,64 @@ const meta = std.meta; const Allocator = mem.Allocator; const Arch = std.Target.Cpu.Arch; +const MachO = @import("../MachO.zig"); const Object = @import("Object.zig"); -const Zld = @import("Zld.zig"); -allocator: *Allocator, +/// Each decl always gets a local symbol with the fully qualified name. +/// The vaddr and size are found here directly. +/// The file offset is found by computing the vaddr offset from the section vaddr +/// the symbol references, and adding that to the file offset of the section. +/// If this field is 0, it means the codegen size = 0 and there is no symbol or +/// offset table entry. local_sym_index: u32, -stab: ?Stab = null, -aliases: std.ArrayList(u32), -references: std.AutoArrayHashMap(u32, void), -contained: ?[]SymbolAtOffset = null, + +/// List of symbol aliases pointing to the same block via different nlists +aliases: std.ArrayListUnmanaged(u32) = .{}, + +/// List of symbols contained within this block +contained: std.ArrayListUnmanaged(SymbolAtOffset) = .{}, + +/// Code (may be non-relocated) this block represents code: []u8, -relocs: std.ArrayList(Relocation), + +/// Size and alignment of this text block +/// Unlike in Elf, we need to store the size of this symbol as part of +/// the TextBlock since macho.nlist_64 lacks this information. size: u64, alignment: u32, -rebases: std.ArrayList(u64), -bindings: std.ArrayList(SymbolAtOffset), -dices: std.ArrayList(macho.data_in_code_entry), -next: ?*TextBlock = null, -prev: ?*TextBlock = null, + +relocs: std.ArrayListUnmanaged(Relocation) = .{}, + +/// List of offsets contained within this block that need rebasing by the dynamic +/// loader in presence of ASLR +rebases: std.ArrayListUnmanaged(u64) = .{}, + +/// List of offsets contained within this block that will be dynamically bound +/// by the dynamic loader and contain pointers to resolved (at load time) extern +/// symbols (aka proxies aka imports) +bindings: std.ArrayListUnmanaged(SymbolAtOffset) = .{}, + +/// List of data-in-code entries. This is currently specific to x86_64 only. +dices: std.ArrayListUnmanaged(macho.data_in_code_entry) = .{}, + +/// Stab entry for this block. This is currently specific to a binary created +/// by linking object files in a traditional sense - in incremental sense, we +/// bypass stabs altogether to produce dSYM bundle directly with fully relocated +/// DWARF sections. +stab: ?Stab = null, + +/// Points to the previous and next neighbours +next: ?*TextBlock, +prev: ?*TextBlock, + +/// Previous/next linked list pointers. +/// This is the linked list node for this Decl's corresponding .debug_info tag. +dbg_info_prev: ?*TextBlock, +dbg_info_next: ?*TextBlock, +/// Offset into .debug_info pointing to the tag for this Decl. +dbg_info_off: u32, +/// Size of the .debug_info tag for this Decl, not including padding. +dbg_info_len: u32, pub const SymbolAtOffset = struct { local_sym_index: u32, @@ -42,11 +82,11 @@ pub const Stab = union(enum) { static, global, - pub fn asNlists(stab: Stab, local_sym_index: u32, zld: *Zld) ![]macho.nlist_64 { - var nlists = std.ArrayList(macho.nlist_64).init(zld.allocator); + pub fn asNlists(stab: Stab, local_sym_index: u32, macho_file: anytype) ![]macho.nlist_64 { + var nlists = std.ArrayList(macho.nlist_64).init(macho_file.base.allocator); defer nlists.deinit(); - const sym = zld.locals.items[local_sym_index]; + const sym = macho_file.locals.items[local_sym_index]; switch (stab) { .function => |size| { try nlists.ensureUnusedCapacity(4); @@ -130,7 +170,7 @@ pub const Relocation = struct { offset: u32, source_addr: u64, target_addr: u64, - zld: *Zld, + macho_file: *MachO, }; pub const Unsigned = struct { @@ -148,7 +188,7 @@ pub const Relocation = struct { pub fn resolve(self: Unsigned, args: ResolveArgs) !void { const result = blk: { if (self.subtractor) |subtractor| { - const sym = args.zld.locals.items[subtractor]; + const sym = args.macho_file.locals.items[subtractor]; break :blk @intCast(i64, args.target_addr) - @intCast(i64, sym.n_value) + self.addend; } else { break :blk @intCast(i64, args.target_addr) + self.addend; @@ -500,38 +540,59 @@ pub const Relocation = struct { } }; -pub fn init(allocator: *Allocator) TextBlock { - return .{ - .allocator = allocator, - .local_sym_index = undefined, - .aliases = std.ArrayList(u32).init(allocator), - .references = std.AutoArrayHashMap(u32, void).init(allocator), - .code = undefined, - .relocs = std.ArrayList(Relocation).init(allocator), - .size = undefined, - .alignment = undefined, - .rebases = std.ArrayList(u64).init(allocator), - .bindings = std.ArrayList(SymbolAtOffset).init(allocator), - .dices = std.ArrayList(macho.data_in_code_entry).init(allocator), - }; +pub const empty = TextBlock{ + .local_sym_index = 0, + .code = undefined, + .size = 0, + .alignment = 0, + .prev = null, + .next = null, + .dbg_info_prev = null, + .dbg_info_next = null, + .dbg_info_off = undefined, + .dbg_info_len = undefined, +}; + +pub fn deinit(self: *TextBlock, allocator: *Allocator) void { + self.dices.deinit(allocator); + self.bindings.deinit(allocator); + self.rebases.deinit(allocator); + self.relocs.deinit(allocator); + self.allocator.free(self.code); + self.contained.deinit(allocator); + self.aliases.deinit(allocator); } -pub fn deinit(self: *TextBlock) void { - self.aliases.deinit(); - self.references.deinit(); - if (self.contained) |contained| { - self.allocator.free(contained); +/// Returns how much room there is to grow in virtual address space. +/// File offset relocation happens transparently, so it is not included in +/// this calculation. +pub fn capacity(self: TextBlock, macho_file: MachO) u64 { + const self_sym = macho_file.locals.items[self.local_sym_index]; + if (self.next) |next| { + const next_sym = macho_file.locals.items[next.local_sym_index]; + return next_sym.n_value - self_sym.n_value; + } else { + // We are the last block. + // The capacity is limited only by virtual address space. + return std.math.maxInt(u64) - self_sym.n_value; } - self.allocator.free(self.code); - self.relocs.deinit(); - self.rebases.deinit(); - self.bindings.deinit(); - self.dices.deinit(); +} + +pub fn freeListEligible(self: TextBlock, macho_file: MachO) bool { + // No need to keep a free list node for the last block. + const next = self.next orelse return false; + const self_sym = macho_file.locals.items[self.local_sym_index]; + const next_sym = macho_file.locals.items[next.local_sym_index]; + const cap = next_sym.n_value - self_sym.n_value; + const ideal_cap = MachO.padToIdeal(self.size); + if (cap <= ideal_cap) return false; + const surplus = cap - ideal_cap; + return surplus >= MachO.min_text_capacity; } const RelocContext = struct { base_addr: u64 = 0, - zld: *Zld, + macho_file: *MachO, }; fn initRelocFromObject(rel: macho.relocation_info, object: *Object, ctx: RelocContext) !Relocation { @@ -548,19 +609,19 @@ fn initRelocFromObject(rel: macho.relocation_info, object: *Object, ctx: RelocCo const local_sym_index = object.sections_as_symbols.get(sect_id) orelse blk: { const seg = object.load_commands.items[object.segment_cmd_index.?].Segment; const sect = seg.sections.items[sect_id]; - const match = (try ctx.zld.getMatchingSection(sect)) orelse unreachable; - const local_sym_index = @intCast(u32, ctx.zld.locals.items.len); - const sym_name = try std.fmt.allocPrint(ctx.zld.allocator, "l_{s}_{s}_{s}", .{ + const match = (try ctx.macho_file.getMatchingSection(sect)) orelse unreachable; + const local_sym_index = @intCast(u32, ctx.macho_file.locals.items.len); + const sym_name = try std.fmt.allocPrint(ctx.macho_file.base.allocator, "l_{s}_{s}_{s}", .{ object.name.?, commands.segmentName(sect), commands.sectionName(sect), }); - defer ctx.zld.allocator.free(sym_name); + defer ctx.macho_file.base.allocator.free(sym_name); - try ctx.zld.locals.append(ctx.zld.allocator, .{ - .n_strx = try ctx.zld.makeString(sym_name), + try ctx.macho_file.locals.append(ctx.macho_file.base.allocator, .{ + .n_strx = try ctx.macho_file.makeString(sym_name), .n_type = macho.N_SECT, - .n_sect = ctx.zld.sectionId(match), + .n_sect = ctx.macho_file.sectionId(match), .n_desc = 0, .n_value = sect.addr, }); @@ -574,12 +635,12 @@ fn initRelocFromObject(rel: macho.relocation_info, object: *Object, ctx: RelocCo const sym = object.symtab.items[rel.r_symbolnum]; const sym_name = object.getString(sym.n_strx); - if (Zld.symbolIsSect(sym) and !Zld.symbolIsExt(sym)) { + if (MachO.symbolIsSect(sym) and !MachO.symbolIsExt(sym)) { const where_index = object.symbol_mapping.get(rel.r_symbolnum) orelse unreachable; parsed_rel.where = .local; parsed_rel.where_index = where_index; } else { - const resolv = ctx.zld.symbol_resolver.get(sym_name) orelse unreachable; + const resolv = ctx.macho_file.symbol_resolver.get(sym_name) orelse unreachable; switch (resolv.where) { .global => { parsed_rel.where = .local; @@ -599,6 +660,7 @@ fn initRelocFromObject(rel: macho.relocation_info, object: *Object, ctx: RelocCo pub fn parseRelocsFromObject( self: *TextBlock, + allocator: *Allocator, relocs: []macho.relocation_info, object: *Object, ctx: RelocContext, @@ -638,11 +700,11 @@ pub fn parseRelocsFromObject( const sym = object.symtab.items[rel.r_symbolnum]; const sym_name = object.getString(sym.n_strx); - if (Zld.symbolIsSect(sym) and !Zld.symbolIsExt(sym)) { + if (MachO.symbolIsSect(sym) and !MachO.symbolIsExt(sym)) { const where_index = object.symbol_mapping.get(rel.r_symbolnum) orelse unreachable; subtractor = where_index; } else { - const resolv = ctx.zld.symbol_resolver.get(sym_name) orelse unreachable; + const resolv = ctx.macho_file.symbol_resolver.get(sym_name) orelse unreachable; assert(resolv.where == .global); subtractor = resolv.local_sym_index; } @@ -732,11 +794,7 @@ pub fn parseRelocsFromObject( else => unreachable, } - try self.relocs.append(parsed_rel); - - if (parsed_rel.where == .local) { - try self.references.put(parsed_rel.where_index, {}); - } + try self.relocs.append(allocator, parsed_rel); const is_via_got = switch (parsed_rel.payload) { .pointer_to_got => true, @@ -747,28 +805,30 @@ pub fn parseRelocsFromObject( }; if (is_via_got) blk: { - const key = Zld.GotIndirectionKey{ + const key = MachO.GotIndirectionKey{ .where = switch (parsed_rel.where) { .local => .local, .import => .import, }, .where_index = parsed_rel.where_index, }; - if (ctx.zld.got_entries.contains(key)) break :blk; + if (ctx.macho_file.got_entries_map.contains(key)) break :blk; - try ctx.zld.got_entries.putNoClobber(ctx.zld.allocator, key, {}); + const got_index = @intCast(u32, ctx.macho_file.got_entries.items.len); + try ctx.macho_file.got_entries.append(ctx.macho_file.base.allocator, key); + try ctx.macho_file.got_entries_map.putNoClobber(ctx.macho_file.base.allocator, key, got_index); } else if (parsed_rel.payload == .unsigned) { switch (parsed_rel.where) { .import => { - try self.bindings.append(.{ + try self.bindings.append(allocator, .{ .local_sym_index = parsed_rel.where_index, .offset = parsed_rel.offset, }); }, .local => { - const source_sym = ctx.zld.locals.items[self.local_sym_index]; - const match = ctx.zld.unpackSectionId(source_sym.n_sect); - const seg = ctx.zld.load_commands.items[match.seg].Segment; + const source_sym = ctx.macho_file.locals.items[self.local_sym_index]; + const match = ctx.macho_file.unpackSectionId(source_sym.n_sect); + const seg = ctx.macho_file.load_commands.items[match.seg].Segment; const sect = seg.sections.items[match.sect]; const sect_type = commands.sectionType(sect); @@ -778,12 +838,12 @@ pub fn parseRelocsFromObject( // TODO actually, a check similar to what dyld is doing, that is, verifying // that the segment is writable should be enough here. const is_right_segment = blk: { - if (ctx.zld.data_segment_cmd_index) |idx| { + if (ctx.macho_file.data_segment_cmd_index) |idx| { if (match.seg == idx) { break :blk true; } } - if (ctx.zld.data_const_segment_cmd_index) |idx| { + if (ctx.macho_file.data_const_segment_cmd_index) |idx| { if (match.seg == idx) { break :blk true; } @@ -804,15 +864,17 @@ pub fn parseRelocsFromObject( }; if (should_rebase) { - try self.rebases.append(parsed_rel.offset); + try self.rebases.append(allocator, parsed_rel.offset); } }, } } else if (parsed_rel.payload == .branch) blk: { if (parsed_rel.where != .import) break :blk; - if (ctx.zld.stubs.contains(parsed_rel.where_index)) break :blk; + if (ctx.macho_file.stubs_map.contains(parsed_rel.where_index)) break :blk; - try ctx.zld.stubs.putNoClobber(ctx.zld.allocator, parsed_rel.where_index, {}); + const stubs_index = @intCast(u32, ctx.macho_file.stubs.items.len); + try ctx.macho_file.stubs.append(ctx.macho_file.base.allocator, parsed_rel.where_index); + try ctx.macho_file.stubs_map.putNoClobber(ctx.macho_file.base.allocator, parsed_rel.where_index, stubs_index); } } } @@ -852,7 +914,7 @@ fn parseUnsigned( if (rel.r_extern == 0) { assert(out.where == .local); - const target_sym = ctx.zld.locals.items[out.where_index]; + const target_sym = ctx.macho_file.locals.items[out.where_index]; addend -= @intCast(i64, target_sym.n_value); } @@ -872,7 +934,7 @@ fn parseBranch(self: TextBlock, rel: macho.relocation_info, out: *Relocation, ct out.payload = .{ .branch = .{ - .arch = ctx.zld.target.?.cpu.arch, + .arch = ctx.macho_file.base.options.target.cpu.arch, }, }; } @@ -948,10 +1010,10 @@ fn parseSigned(self: TextBlock, rel: macho.relocation_info, out: *Relocation, ct var addend: i64 = mem.readIntLittle(i32, self.code[out.offset..][0..4]) + correction; if (rel.r_extern == 0) { - const source_sym = ctx.zld.locals.items[self.local_sym_index]; + const source_sym = ctx.macho_file.locals.items[self.local_sym_index]; const target_sym = switch (out.where) { - .local => ctx.zld.locals.items[out.where_index], - .import => ctx.zld.imports.items[out.where_index], + .local => ctx.macho_file.locals.items[out.where_index], + .import => ctx.macho_file.imports.items[out.where_index], }; addend = @intCast(i64, source_sym.n_value + out.offset + 4) + addend - @intCast(i64, target_sym.n_value); } @@ -986,12 +1048,12 @@ fn parseLoad(self: TextBlock, rel: macho.relocation_info, out: *Relocation) void }; } -pub fn resolveRelocs(self: *TextBlock, zld: *Zld) !void { +pub fn resolveRelocs(self: *TextBlock, macho_file: *MachO) !void { for (self.relocs.items) |rel| { log.debug("relocating {}", .{rel}); const source_addr = blk: { - const sym = zld.locals.items[self.local_sym_index]; + const sym = macho_file.locals.items[self.local_sym_index]; break :blk sym.n_value + rel.offset; }; const target_addr = blk: { @@ -1004,9 +1066,9 @@ pub fn resolveRelocs(self: *TextBlock, zld: *Zld) !void { }; if (is_via_got) { - const dc_seg = zld.load_commands.items[zld.data_const_segment_cmd_index.?].Segment; - const got = dc_seg.sections.items[zld.got_section_index.?]; - const got_index = zld.got_entries.getIndex(.{ + const dc_seg = macho_file.load_commands.items[macho_file.data_const_segment_cmd_index.?].Segment; + const got = dc_seg.sections.items[macho_file.got_section_index.?]; + const got_index = macho_file.got_entries_map.get(.{ .where = switch (rel.where) { .local => .local, .import => .import, @@ -1014,10 +1076,10 @@ pub fn resolveRelocs(self: *TextBlock, zld: *Zld) !void { .where_index = rel.where_index, }) orelse { const sym = switch (rel.where) { - .local => zld.locals.items[rel.where_index], - .import => zld.imports.items[rel.where_index], + .local => macho_file.locals.items[rel.where_index], + .import => macho_file.imports.items[rel.where_index], }; - log.err("expected GOT entry for symbol '{s}'", .{zld.getString(sym.n_strx)}); + log.err("expected GOT entry for symbol '{s}'", .{macho_file.getString(sym.n_strx)}); log.err(" this is an internal linker error", .{}); return error.FailedToResolveRelocationTarget; }; @@ -1026,11 +1088,11 @@ pub fn resolveRelocs(self: *TextBlock, zld: *Zld) !void { switch (rel.where) { .local => { - const sym = zld.locals.items[rel.where_index]; + const sym = macho_file.locals.items[rel.where_index]; const is_tlv = is_tlv: { - const source_sym = zld.locals.items[self.local_sym_index]; - const match = zld.unpackSectionId(source_sym.n_sect); - const seg = zld.load_commands.items[match.seg].Segment; + const source_sym = macho_file.locals.items[self.local_sym_index]; + const match = macho_file.unpackSectionId(source_sym.n_sect); + const seg = macho_file.load_commands.items[match.seg].Segment; const sect = seg.sections.items[match.sect]; break :is_tlv commands.sectionType(sect) == macho.S_THREAD_LOCAL_VARIABLES; }; @@ -1040,11 +1102,11 @@ pub fn resolveRelocs(self: *TextBlock, zld: *Zld) !void { // defined TLV template init section in the following order: // * wrt to __thread_data if defined, then // * wrt to __thread_bss - const seg = zld.load_commands.items[zld.data_segment_cmd_index.?].Segment; + const seg = macho_file.load_commands.items[macho_file.data_segment_cmd_index.?].Segment; const base_address = inner: { - if (zld.tlv_data_section_index) |i| { + if (macho_file.tlv_data_section_index) |i| { break :inner seg.sections.items[i].addr; - } else if (zld.tlv_bss_section_index) |i| { + } else if (macho_file.tlv_bss_section_index) |i| { break :inner seg.sections.items[i].addr; } else { log.err("threadlocal variables present but no initializer sections found", .{}); @@ -1059,12 +1121,12 @@ pub fn resolveRelocs(self: *TextBlock, zld: *Zld) !void { break :blk sym.n_value; }, .import => { - const stubs_index = zld.stubs.getIndex(rel.where_index) orelse { + const stubs_index = macho_file.stubs_map.get(rel.where_index) orelse { // TODO verify in TextBlock that the symbol is indeed dynamically bound. break :blk 0; // Dynamically bound by dyld. }; - const segment = zld.load_commands.items[zld.text_segment_cmd_index.?].Segment; - const stubs = segment.sections.items[zld.stubs_section_index.?]; + const segment = macho_file.load_commands.items[macho_file.text_segment_cmd_index.?].Segment; + const stubs = segment.sections.items[macho_file.stubs_section_index.?]; break :blk stubs.addr + stubs_index * stubs.reserved2; }, } @@ -1078,14 +1140,14 @@ pub fn resolveRelocs(self: *TextBlock, zld: *Zld) !void { .offset = rel.offset, .source_addr = source_addr, .target_addr = target_addr, - .zld = zld, + .macho_file = macho_file, }); } } -pub fn print_this(self: *const TextBlock, zld: *Zld) void { +pub fn print_this(self: *const TextBlock, macho_file: MachO) void { log.warn("TextBlock", .{}); - log.warn(" {}: {}", .{ self.local_sym_index, zld.locals.items[self.local_sym_index] }); + log.warn(" {}: {}", .{ self.local_sym_index, macho_file.locals.items[self.local_sym_index] }); if (self.stab) |stab| { log.warn(" stab: {}", .{stab}); } @@ -1125,11 +1187,11 @@ pub fn print_this(self: *const TextBlock, zld: *Zld) void { log.warn(" align = {}", .{self.alignment}); } -pub fn print(self: *const TextBlock, zld: *Zld) void { +pub fn print(self: *const TextBlock, macho_file: MachO) void { if (self.prev) |prev| { - prev.print(zld); + prev.print(macho_file); } - self.print_this(zld); + self.print_this(macho_file); } const RelocIterator = struct { @@ -1159,8 +1221,8 @@ fn filterRelocs(relocs: []macho.relocation_info, start_addr: u64, end_addr: u64) } }; - const start = Zld.findFirst(macho.relocation_info, relocs, 0, Predicate{ .addr = end_addr }); - const end = Zld.findFirst(macho.relocation_info, relocs, start, Predicate{ .addr = start_addr }); + const start = MachO.findFirst(macho.relocation_info, relocs, 0, Predicate{ .addr = end_addr }); + const end = MachO.findFirst(macho.relocation_info, relocs, start, Predicate{ .addr = start_addr }); return relocs[start..end]; } diff --git a/src/link/MachO/Zld.zig b/src/link/MachO/Zld.zig deleted file mode 100644 index 41edbb5988..0000000000 --- a/src/link/MachO/Zld.zig +++ /dev/null @@ -1,3062 +0,0 @@ -const Zld = @This(); - -const std = @import("std"); -const assert = std.debug.assert; -const leb = std.leb; -const mem = std.mem; -const meta = std.meta; -const fs = std.fs; -const macho = std.macho; -const math = std.math; -const log = std.log.scoped(.zld); -const aarch64 = @import("../../codegen/aarch64.zig"); - -const Allocator = mem.Allocator; -const Archive = @import("Archive.zig"); -const CodeSignature = @import("CodeSignature.zig"); -const Dylib = @import("Dylib.zig"); -const Object = @import("Object.zig"); -const TextBlock = @import("TextBlock.zig"); -const Trie = @import("Trie.zig"); - -usingnamespace @import("commands.zig"); -usingnamespace @import("bind.zig"); - -allocator: *Allocator, - -target: ?std.Target = null, -page_size: ?u16 = null, -file: ?fs.File = null, -output: ?Output = null, - -// TODO these args will become obselete once Zld is coalesced with incremental -// linker. -stack_size: u64 = 0, - -objects: std.ArrayListUnmanaged(*Object) = .{}, -archives: std.ArrayListUnmanaged(*Archive) = .{}, -dylibs: std.ArrayListUnmanaged(*Dylib) = .{}, - -next_dylib_ordinal: u16 = 1, - -load_commands: std.ArrayListUnmanaged(LoadCommand) = .{}, - -pagezero_segment_cmd_index: ?u16 = null, -text_segment_cmd_index: ?u16 = null, -data_const_segment_cmd_index: ?u16 = null, -data_segment_cmd_index: ?u16 = null, -linkedit_segment_cmd_index: ?u16 = null, -dyld_info_cmd_index: ?u16 = null, -symtab_cmd_index: ?u16 = null, -dysymtab_cmd_index: ?u16 = null, -dylinker_cmd_index: ?u16 = null, -data_in_code_cmd_index: ?u16 = null, -function_starts_cmd_index: ?u16 = null, -main_cmd_index: ?u16 = null, -dylib_id_cmd_index: ?u16 = null, -version_min_cmd_index: ?u16 = null, -source_version_cmd_index: ?u16 = null, -uuid_cmd_index: ?u16 = null, -code_signature_cmd_index: ?u16 = null, - -// __TEXT segment sections -text_section_index: ?u16 = null, -stubs_section_index: ?u16 = null, -stub_helper_section_index: ?u16 = null, -text_const_section_index: ?u16 = null, -cstring_section_index: ?u16 = null, -ustring_section_index: ?u16 = null, -gcc_except_tab_section_index: ?u16 = null, -unwind_info_section_index: ?u16 = null, -eh_frame_section_index: ?u16 = null, - -objc_methlist_section_index: ?u16 = null, -objc_methname_section_index: ?u16 = null, -objc_methtype_section_index: ?u16 = null, -objc_classname_section_index: ?u16 = null, - -// __DATA_CONST segment sections -got_section_index: ?u16 = null, -mod_init_func_section_index: ?u16 = null, -mod_term_func_section_index: ?u16 = null, -data_const_section_index: ?u16 = null, - -objc_cfstring_section_index: ?u16 = null, -objc_classlist_section_index: ?u16 = null, -objc_imageinfo_section_index: ?u16 = null, - -// __DATA segment sections -tlv_section_index: ?u16 = null, -tlv_data_section_index: ?u16 = null, -tlv_bss_section_index: ?u16 = null, -la_symbol_ptr_section_index: ?u16 = null, -data_section_index: ?u16 = null, -bss_section_index: ?u16 = null, -common_section_index: ?u16 = null, - -objc_const_section_index: ?u16 = null, -objc_selrefs_section_index: ?u16 = null, -objc_classrefs_section_index: ?u16 = null, -objc_data_section_index: ?u16 = null, - -locals: std.ArrayListUnmanaged(macho.nlist_64) = .{}, -globals: std.ArrayListUnmanaged(macho.nlist_64) = .{}, -imports: std.ArrayListUnmanaged(macho.nlist_64) = .{}, -undefs: std.ArrayListUnmanaged(macho.nlist_64) = .{}, -tentatives: std.ArrayListUnmanaged(macho.nlist_64) = .{}, -symbol_resolver: std.StringArrayHashMapUnmanaged(SymbolWithLoc) = .{}, - -strtab: std.ArrayListUnmanaged(u8) = .{}, - -stubs: std.AutoArrayHashMapUnmanaged(u32, void) = .{}, -got_entries: std.AutoArrayHashMapUnmanaged(GotIndirectionKey, void) = .{}, - -stub_helper_stubs_start_off: ?u64 = null, - -blocks: std.AutoHashMapUnmanaged(MatchingSection, *TextBlock) = .{}, - -has_dices: bool = false, -has_stabs: bool = false, - -const SymbolWithLoc = struct { - // Table where the symbol can be found. - where: enum { - global, - import, - undef, - tentative, - }, - where_index: u32, - local_sym_index: u32 = 0, - file: u16 = 0, -}; - -pub const GotIndirectionKey = struct { - where: enum { - local, - import, - }, - where_index: u32, -}; - -pub const Output = struct { - tag: enum { exe, dylib }, - path: []const u8, - install_name: ?[]const u8 = null, -}; - -/// Default path to dyld -const DEFAULT_DYLD_PATH: [*:0]const u8 = "/usr/lib/dyld"; - -pub fn init(allocator: *Allocator) !Zld { - return Zld{ .allocator = allocator }; -} - -pub fn deinit(self: *Zld) void { - self.stubs.deinit(self.allocator); - self.got_entries.deinit(self.allocator); - - for (self.load_commands.items) |*lc| { - lc.deinit(self.allocator); - } - self.load_commands.deinit(self.allocator); - - for (self.objects.items) |object| { - object.deinit(); - self.allocator.destroy(object); - } - self.objects.deinit(self.allocator); - - for (self.archives.items) |archive| { - archive.deinit(); - self.allocator.destroy(archive); - } - self.archives.deinit(self.allocator); - - for (self.dylibs.items) |dylib| { - dylib.deinit(); - self.allocator.destroy(dylib); - } - self.dylibs.deinit(self.allocator); - - self.locals.deinit(self.allocator); - self.globals.deinit(self.allocator); - self.imports.deinit(self.allocator); - self.undefs.deinit(self.allocator); - self.tentatives.deinit(self.allocator); - - for (self.symbol_resolver.keys()) |key| { - self.allocator.free(key); - } - self.symbol_resolver.deinit(self.allocator); - - self.strtab.deinit(self.allocator); - - // TODO dealloc all blocks - self.blocks.deinit(self.allocator); -} - -pub fn closeFiles(self: Zld) void { - for (self.objects.items) |object| { - object.closeFile(); - } - for (self.archives.items) |archive| { - archive.closeFile(); - } - if (self.file) |f| f.close(); -} - -const LinkArgs = struct { - syslibroot: ?[]const u8, - libs: []const []const u8, - rpaths: []const []const u8, -}; - -pub fn link(self: *Zld, files: []const []const u8, output: Output, args: LinkArgs) !void { - if (files.len == 0) return error.NoInputFiles; - if (output.path.len == 0) return error.EmptyOutputPath; - - self.page_size = switch (self.target.?.cpu.arch) { - .aarch64 => 0x4000, - .x86_64 => 0x1000, - else => unreachable, - }; - self.output = output; - self.file = try fs.cwd().createFile(self.output.?.path, .{ - .truncate = true, - .read = true, - .mode = if (std.Target.current.os.tag == .windows) 0 else 0o777, - }); - - try self.populateMetadata(); - try self.parseInputFiles(files, args.syslibroot); - try self.parseLibs(args.libs, args.syslibroot); - try self.resolveSymbols(); - try self.parseTextBlocks(); - - { - // Add dyld_stub_binder as the final GOT entry. - const resolv = self.symbol_resolver.get("dyld_stub_binder") orelse unreachable; - try self.got_entries.putNoClobber(self.allocator, .{ - .where = .import, - .where_index = resolv.where_index, - }, {}); - } - - try self.sortSections(); - try self.addRpaths(args.rpaths); - try self.addDataInCodeLC(); - try self.addCodeSignatureLC(); - try self.allocateTextSegment(); - try self.allocateDataConstSegment(); - try self.allocateDataSegment(); - self.allocateLinkeditSegment(); - try self.allocateTextBlocks(); - - // log.warn("locals", .{}); - // for (self.locals.items) |sym, id| { - // log.warn(" {d}: {s}, {}", .{ id, self.getString(sym.n_strx), sym }); - // } - - // log.warn("globals", .{}); - // for (self.globals.items) |sym, id| { - // log.warn(" {d}: {s}, {}", .{ id, self.getString(sym.n_strx), sym }); - // } - - // log.warn("tentatives", .{}); - // for (self.tentatives.items) |sym, id| { - // log.warn(" {d}: {s}, {}", .{ id, self.getString(sym.n_strx), sym }); - // } - - // log.warn("undefines", .{}); - // for (self.undefs.items) |sym, id| { - // log.warn(" {d}: {s}, {}", .{ id, self.getString(sym.n_strx), sym }); - // } - - // log.warn("imports", .{}); - // for (self.imports.items) |sym, id| { - // log.warn(" {d}: {s}, {}", .{ id, self.getString(sym.n_strx), sym }); - // } - - // log.warn("symbol resolver", .{}); - // for (self.symbol_resolver.keys()) |key| { - // log.warn(" {s} => {}", .{ key, self.symbol_resolver.get(key).? }); - // } - - // log.warn("mappings", .{}); - // for (self.objects.items) |object, id| { - // const object_id = @intCast(u16, id); - // log.warn(" in object {s}", .{object.name.?}); - // for (object.symtab.items) |sym, sym_id| { - // if (object.symbol_mapping.get(@intCast(u32, sym_id))) |local_id| { - // log.warn(" | {d} => {d}", .{ sym_id, local_id }); - // } else { - // log.warn(" | {d} no local mapping for {s}", .{ sym_id, object.getString(sym.n_strx) }); - // } - // } - // } - - // var it = self.blocks.iterator(); - // while (it.next()) |entry| { - // const seg = self.load_commands.items[entry.key_ptr.seg].Segment; - // const sect = seg.sections.items[entry.key_ptr.sect]; - - // log.warn("\n\n{s},{s} contents:", .{ segmentName(sect), sectionName(sect) }); - // log.warn(" {}", .{sect}); - // entry.value_ptr.*.print(self); - // } - - try self.flush(); -} - -fn parseInputFiles(self: *Zld, files: []const []const u8, syslibroot: ?[]const u8) !void { - for (files) |file_name| { - const full_path = full_path: { - var buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined; - const path = try std.fs.realpath(file_name, &buffer); - break :full_path try self.allocator.dupe(u8, path); - }; - - if (try Object.createAndParseFromPath(self.allocator, self.target.?.cpu.arch, full_path)) |object| { - try self.objects.append(self.allocator, object); - continue; - } - - if (try Archive.createAndParseFromPath(self.allocator, self.target.?.cpu.arch, full_path)) |archive| { - try self.archives.append(self.allocator, archive); - continue; - } - - if (try Dylib.createAndParseFromPath( - self.allocator, - self.target.?.cpu.arch, - full_path, - .{ .syslibroot = syslibroot }, - )) |dylibs| { - defer self.allocator.free(dylibs); - try self.dylibs.appendSlice(self.allocator, dylibs); - continue; - } - - log.warn("unknown filetype for positional input file: '{s}'", .{file_name}); - } -} - -fn parseLibs(self: *Zld, libs: []const []const u8, syslibroot: ?[]const u8) !void { - for (libs) |lib| { - if (try Dylib.createAndParseFromPath( - self.allocator, - self.target.?.cpu.arch, - lib, - .{ .syslibroot = syslibroot }, - )) |dylibs| { - defer self.allocator.free(dylibs); - try self.dylibs.appendSlice(self.allocator, dylibs); - continue; - } - - if (try Archive.createAndParseFromPath(self.allocator, self.target.?.cpu.arch, lib)) |archive| { - try self.archives.append(self.allocator, archive); - continue; - } - - log.warn("unknown filetype for a library: '{s}'", .{lib}); - } -} - -pub const MatchingSection = struct { - seg: u16, - sect: u16, -}; - -pub fn getMatchingSection(self: *Zld, sect: macho.section_64) !?MatchingSection { - const text_seg = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; - const data_const_seg = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; - const data_seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; - const segname = segmentName(sect); - const sectname = sectionName(sect); - - const res: ?MatchingSection = blk: { - switch (sectionType(sect)) { - macho.S_4BYTE_LITERALS, macho.S_8BYTE_LITERALS, macho.S_16BYTE_LITERALS => { - if (self.text_const_section_index == null) { - self.text_const_section_index = @intCast(u16, text_seg.sections.items.len); - try text_seg.addSection(self.allocator, "__const", .{}); - } - - break :blk .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.text_const_section_index.?, - }; - }, - macho.S_CSTRING_LITERALS => { - if (mem.eql(u8, sectname, "__objc_methname")) { - // TODO it seems the common values within the sections in objects are deduplicated/merged - // on merging the sections' contents. - if (self.objc_methname_section_index == null) { - self.objc_methname_section_index = @intCast(u16, text_seg.sections.items.len); - try text_seg.addSection(self.allocator, "__objc_methname", .{ - .flags = macho.S_CSTRING_LITERALS, - }); - } - - break :blk .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.objc_methname_section_index.?, - }; - } else if (mem.eql(u8, sectname, "__objc_methtype")) { - if (self.objc_methtype_section_index == null) { - self.objc_methtype_section_index = @intCast(u16, text_seg.sections.items.len); - try text_seg.addSection(self.allocator, "__objc_methtype", .{ - .flags = macho.S_CSTRING_LITERALS, - }); - } - - break :blk .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.objc_methtype_section_index.?, - }; - } else if (mem.eql(u8, sectname, "__objc_classname")) { - if (self.objc_classname_section_index == null) { - self.objc_classname_section_index = @intCast(u16, text_seg.sections.items.len); - try text_seg.addSection(self.allocator, "__objc_classname", .{}); - } - - break :blk .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.objc_classname_section_index.?, - }; - } - - if (self.cstring_section_index == null) { - self.cstring_section_index = @intCast(u16, text_seg.sections.items.len); - try text_seg.addSection(self.allocator, "__cstring", .{ - .flags = macho.S_CSTRING_LITERALS, - }); - } - - break :blk .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.cstring_section_index.?, - }; - }, - macho.S_LITERAL_POINTERS => { - if (mem.eql(u8, segname, "__DATA") and mem.eql(u8, sectname, "__objc_selrefs")) { - if (self.objc_selrefs_section_index == null) { - self.objc_selrefs_section_index = @intCast(u16, data_seg.sections.items.len); - try data_seg.addSection(self.allocator, "__objc_selrefs", .{ - .flags = macho.S_LITERAL_POINTERS, - }); - } - - break :blk .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.objc_selrefs_section_index.?, - }; - } - - // TODO investigate - break :blk null; - }, - macho.S_MOD_INIT_FUNC_POINTERS => { - if (self.mod_init_func_section_index == null) { - self.mod_init_func_section_index = @intCast(u16, data_const_seg.sections.items.len); - try data_const_seg.addSection(self.allocator, "__mod_init_func", .{ - .flags = macho.S_MOD_INIT_FUNC_POINTERS, - }); - } - - break :blk .{ - .seg = self.data_const_segment_cmd_index.?, - .sect = self.mod_init_func_section_index.?, - }; - }, - macho.S_MOD_TERM_FUNC_POINTERS => { - if (self.mod_term_func_section_index == null) { - self.mod_term_func_section_index = @intCast(u16, data_const_seg.sections.items.len); - try data_const_seg.addSection(self.allocator, "__mod_term_func", .{ - .flags = macho.S_MOD_TERM_FUNC_POINTERS, - }); - } - - break :blk .{ - .seg = self.data_const_segment_cmd_index.?, - .sect = self.mod_term_func_section_index.?, - }; - }, - macho.S_ZEROFILL => { - if (mem.eql(u8, sectname, "__common")) { - if (self.common_section_index == null) { - self.common_section_index = @intCast(u16, data_seg.sections.items.len); - try data_seg.addSection(self.allocator, "__common", .{ - .flags = macho.S_ZEROFILL, - }); - } - - break :blk .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.common_section_index.?, - }; - } else { - if (self.bss_section_index == null) { - self.bss_section_index = @intCast(u16, data_seg.sections.items.len); - try data_seg.addSection(self.allocator, "__bss", .{ - .flags = macho.S_ZEROFILL, - }); - } - - break :blk .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.bss_section_index.?, - }; - } - }, - macho.S_THREAD_LOCAL_VARIABLES => { - if (self.tlv_section_index == null) { - self.tlv_section_index = @intCast(u16, data_seg.sections.items.len); - try data_seg.addSection(self.allocator, "__thread_vars", .{ - .flags = macho.S_THREAD_LOCAL_VARIABLES, - }); - } - - break :blk .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.tlv_section_index.?, - }; - }, - macho.S_THREAD_LOCAL_REGULAR => { - if (self.tlv_data_section_index == null) { - self.tlv_data_section_index = @intCast(u16, data_seg.sections.items.len); - try data_seg.addSection(self.allocator, "__thread_data", .{ - .flags = macho.S_THREAD_LOCAL_REGULAR, - }); - } - - break :blk .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.tlv_data_section_index.?, - }; - }, - macho.S_THREAD_LOCAL_ZEROFILL => { - if (self.tlv_bss_section_index == null) { - self.tlv_bss_section_index = @intCast(u16, data_seg.sections.items.len); - try data_seg.addSection(self.allocator, "__thread_bss", .{ - .flags = macho.S_THREAD_LOCAL_ZEROFILL, - }); - } - - break :blk .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.tlv_bss_section_index.?, - }; - }, - macho.S_COALESCED => { - if (mem.eql(u8, "__TEXT", segname) and mem.eql(u8, "__eh_frame", sectname)) { - // TODO I believe __eh_frame is currently part of __unwind_info section - // in the latest ld64 output. - if (self.eh_frame_section_index == null) { - self.eh_frame_section_index = @intCast(u16, text_seg.sections.items.len); - try text_seg.addSection(self.allocator, "__eh_frame", .{}); - } - - break :blk .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.eh_frame_section_index.?, - }; - } - - // TODO audit this: is this the right mapping? - if (self.data_const_section_index == null) { - self.data_const_section_index = @intCast(u16, data_const_seg.sections.items.len); - try data_const_seg.addSection(self.allocator, "__const", .{}); - } - - break :blk .{ - .seg = self.data_const_segment_cmd_index.?, - .sect = self.data_const_section_index.?, - }; - }, - macho.S_REGULAR => { - if (sectionIsCode(sect)) { - if (self.text_section_index == null) { - self.text_section_index = @intCast(u16, text_seg.sections.items.len); - try text_seg.addSection(self.allocator, "__text", .{ - .flags = macho.S_REGULAR | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS, - }); - } - - break :blk .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.text_section_index.?, - }; - } - if (sectionIsDebug(sect)) { - // TODO debug attributes - if (mem.eql(u8, "__LD", segname) and mem.eql(u8, "__compact_unwind", sectname)) { - log.debug("TODO compact unwind section: type 0x{x}, name '{s},{s}'", .{ - sect.flags, segname, sectname, - }); - } - break :blk null; - } - - if (mem.eql(u8, segname, "__TEXT")) { - if (mem.eql(u8, sectname, "__ustring")) { - if (self.ustring_section_index == null) { - self.ustring_section_index = @intCast(u16, text_seg.sections.items.len); - try text_seg.addSection(self.allocator, "__ustring", .{}); - } - - break :blk .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.ustring_section_index.?, - }; - } else if (mem.eql(u8, sectname, "__gcc_except_tab")) { - if (self.gcc_except_tab_section_index == null) { - self.gcc_except_tab_section_index = @intCast(u16, text_seg.sections.items.len); - try text_seg.addSection(self.allocator, "__gcc_except_tab", .{}); - } - - break :blk .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.gcc_except_tab_section_index.?, - }; - } else if (mem.eql(u8, sectname, "__objc_methlist")) { - if (self.objc_methlist_section_index == null) { - self.objc_methlist_section_index = @intCast(u16, text_seg.sections.items.len); - try text_seg.addSection(self.allocator, "__objc_methlist", .{}); - } - - break :blk .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.objc_methlist_section_index.?, - }; - } else if (mem.eql(u8, sectname, "__rodata") or - mem.eql(u8, sectname, "__typelink") or - mem.eql(u8, sectname, "__itablink") or - mem.eql(u8, sectname, "__gosymtab") or - mem.eql(u8, sectname, "__gopclntab")) - { - if (self.data_const_section_index == null) { - self.data_const_section_index = @intCast(u16, data_const_seg.sections.items.len); - try data_const_seg.addSection(self.allocator, "__const", .{}); - } - - break :blk .{ - .seg = self.data_const_segment_cmd_index.?, - .sect = self.data_const_section_index.?, - }; - } else { - if (self.text_const_section_index == null) { - self.text_const_section_index = @intCast(u16, text_seg.sections.items.len); - try text_seg.addSection(self.allocator, "__const", .{}); - } - - break :blk .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.text_const_section_index.?, - }; - } - } - - if (mem.eql(u8, segname, "__DATA_CONST")) { - if (self.data_const_section_index == null) { - self.data_const_section_index = @intCast(u16, data_const_seg.sections.items.len); - try data_const_seg.addSection(self.allocator, "__const", .{}); - } - - break :blk .{ - .seg = self.data_const_segment_cmd_index.?, - .sect = self.data_const_section_index.?, - }; - } - - if (mem.eql(u8, segname, "__DATA")) { - if (mem.eql(u8, sectname, "__const")) { - if (self.data_const_section_index == null) { - self.data_const_section_index = @intCast(u16, data_const_seg.sections.items.len); - try data_const_seg.addSection(self.allocator, "__const", .{}); - } - - break :blk .{ - .seg = self.data_const_segment_cmd_index.?, - .sect = self.data_const_section_index.?, - }; - } else if (mem.eql(u8, sectname, "__cfstring")) { - if (self.objc_cfstring_section_index == null) { - self.objc_cfstring_section_index = @intCast(u16, data_const_seg.sections.items.len); - try data_const_seg.addSection(self.allocator, "__cfstring", .{}); - } - - break :blk .{ - .seg = self.data_const_segment_cmd_index.?, - .sect = self.objc_cfstring_section_index.?, - }; - } else if (mem.eql(u8, sectname, "__objc_classlist")) { - if (self.objc_classlist_section_index == null) { - self.objc_classlist_section_index = @intCast(u16, data_const_seg.sections.items.len); - try data_const_seg.addSection(self.allocator, "__objc_classlist", .{}); - } - - break :blk .{ - .seg = self.data_const_segment_cmd_index.?, - .sect = self.objc_classlist_section_index.?, - }; - } else if (mem.eql(u8, sectname, "__objc_imageinfo")) { - if (self.objc_imageinfo_section_index == null) { - self.objc_imageinfo_section_index = @intCast(u16, data_const_seg.sections.items.len); - try data_const_seg.addSection(self.allocator, "__objc_imageinfo", .{}); - } - - break :blk .{ - .seg = self.data_const_segment_cmd_index.?, - .sect = self.objc_imageinfo_section_index.?, - }; - } else if (mem.eql(u8, sectname, "__objc_const")) { - if (self.objc_const_section_index == null) { - self.objc_const_section_index = @intCast(u16, data_seg.sections.items.len); - try data_seg.addSection(self.allocator, "__objc_const", .{}); - } - - break :blk .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.objc_const_section_index.?, - }; - } else if (mem.eql(u8, sectname, "__objc_classrefs")) { - if (self.objc_classrefs_section_index == null) { - self.objc_classrefs_section_index = @intCast(u16, data_seg.sections.items.len); - try data_seg.addSection(self.allocator, "__objc_classrefs", .{}); - } - - break :blk .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.objc_classrefs_section_index.?, - }; - } else if (mem.eql(u8, sectname, "__objc_data")) { - if (self.objc_data_section_index == null) { - self.objc_data_section_index = @intCast(u16, data_seg.sections.items.len); - try data_seg.addSection(self.allocator, "__objc_data", .{}); - } - - break :blk .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.objc_data_section_index.?, - }; - } else { - if (self.data_section_index == null) { - self.data_section_index = @intCast(u16, data_seg.sections.items.len); - try data_seg.addSection(self.allocator, "__data", .{}); - } - - break :blk .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.data_section_index.?, - }; - } - } - - if (mem.eql(u8, "__LLVM", segname) and mem.eql(u8, "__asm", sectname)) { - log.debug("TODO LLVM asm section: type 0x{x}, name '{s},{s}'", .{ - sect.flags, segname, sectname, - }); - } - - break :blk null; - }, - else => break :blk null, - } - }; - - return res; -} - -fn sortSections(self: *Zld) !void { - var text_index_mapping = std.AutoHashMap(u16, u16).init(self.allocator); - defer text_index_mapping.deinit(); - var data_const_index_mapping = std.AutoHashMap(u16, u16).init(self.allocator); - defer data_const_index_mapping.deinit(); - var data_index_mapping = std.AutoHashMap(u16, u16).init(self.allocator); - defer data_index_mapping.deinit(); - - { - // __TEXT segment - const seg = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; - var sections = seg.sections.toOwnedSlice(self.allocator); - defer self.allocator.free(sections); - try seg.sections.ensureCapacity(self.allocator, sections.len); - - const indices = &[_]*?u16{ - &self.text_section_index, - &self.stubs_section_index, - &self.stub_helper_section_index, - &self.gcc_except_tab_section_index, - &self.cstring_section_index, - &self.ustring_section_index, - &self.text_const_section_index, - &self.objc_methname_section_index, - &self.objc_methtype_section_index, - &self.objc_classname_section_index, - &self.eh_frame_section_index, - }; - for (indices) |maybe_index| { - const new_index: u16 = if (maybe_index.*) |index| blk: { - const idx = @intCast(u16, seg.sections.items.len); - seg.sections.appendAssumeCapacity(sections[index]); - try text_index_mapping.putNoClobber(index, idx); - break :blk idx; - } else continue; - maybe_index.* = new_index; - } - } - - { - // __DATA_CONST segment - const seg = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; - var sections = seg.sections.toOwnedSlice(self.allocator); - defer self.allocator.free(sections); - try seg.sections.ensureCapacity(self.allocator, sections.len); - - const indices = &[_]*?u16{ - &self.got_section_index, - &self.mod_init_func_section_index, - &self.mod_term_func_section_index, - &self.data_const_section_index, - &self.objc_cfstring_section_index, - &self.objc_classlist_section_index, - &self.objc_imageinfo_section_index, - }; - for (indices) |maybe_index| { - const new_index: u16 = if (maybe_index.*) |index| blk: { - const idx = @intCast(u16, seg.sections.items.len); - seg.sections.appendAssumeCapacity(sections[index]); - try data_const_index_mapping.putNoClobber(index, idx); - break :blk idx; - } else continue; - maybe_index.* = new_index; - } - } - - { - // __DATA segment - const seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; - var sections = seg.sections.toOwnedSlice(self.allocator); - defer self.allocator.free(sections); - try seg.sections.ensureCapacity(self.allocator, sections.len); - - // __DATA segment - const indices = &[_]*?u16{ - &self.la_symbol_ptr_section_index, - &self.objc_const_section_index, - &self.objc_selrefs_section_index, - &self.objc_classrefs_section_index, - &self.objc_data_section_index, - &self.data_section_index, - &self.tlv_section_index, - &self.tlv_data_section_index, - &self.tlv_bss_section_index, - &self.bss_section_index, - &self.common_section_index, - }; - for (indices) |maybe_index| { - const new_index: u16 = if (maybe_index.*) |index| blk: { - const idx = @intCast(u16, seg.sections.items.len); - seg.sections.appendAssumeCapacity(sections[index]); - try data_index_mapping.putNoClobber(index, idx); - break :blk idx; - } else continue; - maybe_index.* = new_index; - } - } - - { - var transient: std.AutoHashMapUnmanaged(MatchingSection, *TextBlock) = .{}; - try transient.ensureCapacity(self.allocator, self.blocks.count()); - - var it = self.blocks.iterator(); - while (it.next()) |entry| { - const old = entry.key_ptr.*; - const sect = if (old.seg == self.text_segment_cmd_index.?) - text_index_mapping.get(old.sect).? - else if (old.seg == self.data_const_segment_cmd_index.?) - data_const_index_mapping.get(old.sect).? - else - data_index_mapping.get(old.sect).?; - transient.putAssumeCapacityNoClobber(.{ - .seg = old.seg, - .sect = sect, - }, entry.value_ptr.*); - } - - self.blocks.clearAndFree(self.allocator); - self.blocks.deinit(self.allocator); - self.blocks = transient; - } -} - -fn allocateTextSegment(self: *Zld) !void { - const seg = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; - const nstubs = @intCast(u32, self.stubs.count()); - - const base_vmaddr = self.load_commands.items[self.pagezero_segment_cmd_index.?].Segment.inner.vmsize; - seg.inner.fileoff = 0; - seg.inner.vmaddr = base_vmaddr; - - // Set stubs and stub_helper sizes - const stubs = &seg.sections.items[self.stubs_section_index.?]; - const stub_helper = &seg.sections.items[self.stub_helper_section_index.?]; - stubs.size += nstubs * stubs.reserved2; - - const stub_size: u4 = switch (self.target.?.cpu.arch) { - .x86_64 => 10, - .aarch64 => 3 * @sizeOf(u32), - else => unreachable, - }; - stub_helper.size += nstubs * stub_size; - - var sizeofcmds: u64 = 0; - for (self.load_commands.items) |lc| { - sizeofcmds += lc.cmdsize(); - } - - try self.allocateSegment(self.text_segment_cmd_index.?, @sizeOf(macho.mach_header_64) + sizeofcmds); - - // Shift all sections to the back to minimize jump size between __TEXT and __DATA segments. - var min_alignment: u32 = 0; - for (seg.sections.items) |sect| { - const alignment = try math.powi(u32, 2, sect.@"align"); - min_alignment = math.max(min_alignment, alignment); - } - - assert(min_alignment > 0); - const last_sect_idx = seg.sections.items.len - 1; - const last_sect = seg.sections.items[last_sect_idx]; - const shift: u32 = blk: { - const diff = seg.inner.filesize - last_sect.offset - last_sect.size; - const factor = @divTrunc(diff, min_alignment); - break :blk @intCast(u32, factor * min_alignment); - }; - - if (shift > 0) { - for (seg.sections.items) |*sect| { - sect.offset += shift; - sect.addr += shift; - } - } -} - -fn allocateDataConstSegment(self: *Zld) !void { - const seg = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; - const nentries = @intCast(u32, self.got_entries.count()); - - const text_seg = self.load_commands.items[self.text_segment_cmd_index.?].Segment; - seg.inner.fileoff = text_seg.inner.fileoff + text_seg.inner.filesize; - seg.inner.vmaddr = text_seg.inner.vmaddr + text_seg.inner.vmsize; - - // Set got size - const got = &seg.sections.items[self.got_section_index.?]; - got.size += nentries * @sizeOf(u64); - - try self.allocateSegment(self.data_const_segment_cmd_index.?, 0); -} - -fn allocateDataSegment(self: *Zld) !void { - const seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; - const nstubs = @intCast(u32, self.stubs.count()); - - const data_const_seg = self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; - seg.inner.fileoff = data_const_seg.inner.fileoff + data_const_seg.inner.filesize; - seg.inner.vmaddr = data_const_seg.inner.vmaddr + data_const_seg.inner.vmsize; - - // Set la_symbol_ptr and data size - const la_symbol_ptr = &seg.sections.items[self.la_symbol_ptr_section_index.?]; - const data = &seg.sections.items[self.data_section_index.?]; - la_symbol_ptr.size += nstubs * @sizeOf(u64); - data.size += @sizeOf(u64); // We need at least 8bytes for address of dyld_stub_binder - - try self.allocateSegment(self.data_segment_cmd_index.?, 0); -} - -fn allocateLinkeditSegment(self: *Zld) void { - const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; - const data_seg = self.load_commands.items[self.data_segment_cmd_index.?].Segment; - seg.inner.fileoff = data_seg.inner.fileoff + data_seg.inner.filesize; - seg.inner.vmaddr = data_seg.inner.vmaddr + data_seg.inner.vmsize; -} - -fn allocateSegment(self: *Zld, index: u16, offset: u64) !void { - const seg = &self.load_commands.items[index].Segment; - - // Allocate the sections according to their alignment at the beginning of the segment. - var start: u64 = offset; - for (seg.sections.items) |*sect| { - const alignment = try math.powi(u32, 2, sect.@"align"); - const start_aligned = mem.alignForwardGeneric(u64, start, alignment); - const end_aligned = mem.alignForwardGeneric(u64, start_aligned + sect.size, alignment); - sect.offset = @intCast(u32, seg.inner.fileoff + start_aligned); - sect.addr = seg.inner.vmaddr + start_aligned; - start = end_aligned; - } - - const seg_size_aligned = mem.alignForwardGeneric(u64, start, self.page_size.?); - seg.inner.filesize = seg_size_aligned; - seg.inner.vmsize = seg_size_aligned; -} - -fn allocateTextBlocks(self: *Zld) !void { - var it = self.blocks.iterator(); - while (it.next()) |entry| { - const match = entry.key_ptr.*; - var block: *TextBlock = entry.value_ptr.*; - - // Find the first block - while (block.prev) |prev| { - block = prev; - } - - const seg = self.load_commands.items[match.seg].Segment; - const sect = seg.sections.items[match.sect]; - - var base_addr: u64 = sect.addr; - const n_sect = self.sectionId(match); - - log.debug(" within section {s},{s}", .{ segmentName(sect), sectionName(sect) }); - log.debug(" {}", .{sect}); - - while (true) { - const block_alignment = try math.powi(u32, 2, block.alignment); - base_addr = mem.alignForwardGeneric(u64, base_addr, block_alignment); - - const sym = &self.locals.items[block.local_sym_index]; - sym.n_value = base_addr; - sym.n_sect = n_sect; - - log.debug(" {s}: start=0x{x}, end=0x{x}, size={}, align={}", .{ - self.getString(sym.n_strx), - base_addr, - base_addr + block.size, - block.size, - block.alignment, - }); - - // Update each alias (if any) - for (block.aliases.items) |index| { - const alias_sym = &self.locals.items[index]; - alias_sym.n_value = base_addr; - alias_sym.n_sect = n_sect; - } - - // Update each symbol contained within the TextBlock - if (block.contained) |contained| { - for (contained) |sym_at_off| { - const contained_sym = &self.locals.items[sym_at_off.local_sym_index]; - contained_sym.n_value = base_addr + sym_at_off.offset; - contained_sym.n_sect = n_sect; - } - } - - base_addr += block.size; - - if (block.next) |next| { - block = next; - } else break; - } - } - - // Update globals - for (self.symbol_resolver.values()) |resolv| { - if (resolv.where != .global) continue; - - assert(resolv.local_sym_index != 0); - const local_sym = self.locals.items[resolv.local_sym_index]; - const sym = &self.globals.items[resolv.where_index]; - sym.n_value = local_sym.n_value; - sym.n_sect = local_sym.n_sect; - } -} - -fn writeTextBlocks(self: *Zld) !void { - var it = self.blocks.iterator(); - while (it.next()) |entry| { - const match = entry.key_ptr.*; - var block: *TextBlock = entry.value_ptr.*; - - while (block.prev) |prev| { - block = prev; - } - - const seg = self.load_commands.items[match.seg].Segment; - const sect = seg.sections.items[match.sect]; - const sect_type = sectionType(sect); - - log.debug(" for section {s},{s}", .{ segmentName(sect), sectionName(sect) }); - log.debug(" {}", .{sect}); - - var code = try self.allocator.alloc(u8, sect.size); - defer self.allocator.free(code); - - if (sect_type == macho.S_ZEROFILL or sect_type == macho.S_THREAD_LOCAL_ZEROFILL) { - mem.set(u8, code, 0); - } else { - var base_off: u64 = 0; - - while (true) { - const block_alignment = try math.powi(u32, 2, block.alignment); - const aligned_base_off = mem.alignForwardGeneric(u64, base_off, block_alignment); - - const sym = self.locals.items[block.local_sym_index]; - log.debug(" {s}: start=0x{x}, end=0x{x}, size={}, align={}", .{ - self.getString(sym.n_strx), - aligned_base_off, - aligned_base_off + block.size, - block.size, - block.alignment, - }); - - try block.resolveRelocs(self); - mem.copy(u8, code[aligned_base_off..][0..block.size], block.code); - - // TODO NOP for machine code instead of just zeroing out - const padding_len = aligned_base_off - base_off; - mem.set(u8, code[base_off..][0..padding_len], 0); - - base_off = aligned_base_off + block.size; - - if (block.next) |next| { - block = next; - } else break; - } - - mem.set(u8, code[base_off..], 0); - } - - try self.file.?.pwriteAll(code, sect.offset); - } -} - -fn writeStubHelperCommon(self: *Zld) !void { - const text_segment = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; - const stub_helper = &text_segment.sections.items[self.stub_helper_section_index.?]; - const data_const_segment = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; - const got = &data_const_segment.sections.items[self.got_section_index.?]; - const data_segment = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; - const data = &data_segment.sections.items[self.data_section_index.?]; - - self.stub_helper_stubs_start_off = blk: { - switch (self.target.?.cpu.arch) { - .x86_64 => { - const code_size = 15; - var code: [code_size]u8 = undefined; - // lea %r11, [rip + disp] - code[0] = 0x4c; - code[1] = 0x8d; - code[2] = 0x1d; - { - const target_addr = data.addr + data.size - @sizeOf(u64); - const displacement = try math.cast(u32, target_addr - stub_helper.addr - 7); - mem.writeIntLittle(u32, code[3..7], displacement); - } - // push %r11 - code[7] = 0x41; - code[8] = 0x53; - // jmp [rip + disp] - code[9] = 0xff; - code[10] = 0x25; - { - const resolv = self.symbol_resolver.get("dyld_stub_binder") orelse unreachable; - const got_index = self.got_entries.getIndex(.{ - .where = .import, - .where_index = resolv.where_index, - }) orelse unreachable; - const addr = got.addr + got_index * @sizeOf(u64); - const displacement = try math.cast(u32, addr - stub_helper.addr - code_size); - mem.writeIntLittle(u32, code[11..], displacement); - } - try self.file.?.pwriteAll(&code, stub_helper.offset); - break :blk stub_helper.offset + code_size; - }, - .aarch64 => { - var code: [6 * @sizeOf(u32)]u8 = undefined; - data_blk_outer: { - const this_addr = stub_helper.addr; - const target_addr = data.addr + data.size - @sizeOf(u64); - data_blk: { - const displacement = math.cast(i21, target_addr - this_addr) catch break :data_blk; - // adr x17, disp - mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.adr(.x17, displacement).toU32()); - // nop - mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.nop().toU32()); - break :data_blk_outer; - } - data_blk: { - const new_this_addr = this_addr + @sizeOf(u32); - const displacement = math.cast(i21, target_addr - new_this_addr) catch break :data_blk; - // nop - mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.nop().toU32()); - // adr x17, disp - mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.adr(.x17, displacement).toU32()); - break :data_blk_outer; - } - // Jump is too big, replace adr with adrp and add. - const this_page = @intCast(i32, this_addr >> 12); - const target_page = @intCast(i32, target_addr >> 12); - const pages = @intCast(i21, target_page - this_page); - mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.adrp(.x17, pages).toU32()); - const narrowed = @truncate(u12, target_addr); - mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.add(.x17, .x17, narrowed, false).toU32()); - } - // stp x16, x17, [sp, #-16]! - code[8] = 0xf0; - code[9] = 0x47; - code[10] = 0xbf; - code[11] = 0xa9; - binder_blk_outer: { - const resolv = self.symbol_resolver.get("dyld_stub_binder") orelse unreachable; - const got_index = self.got_entries.getIndex(.{ - .where = .import, - .where_index = resolv.where_index, - }) orelse unreachable; - const this_addr = stub_helper.addr + 3 * @sizeOf(u32); - const target_addr = got.addr + got_index * @sizeOf(u64); - binder_blk: { - const displacement = math.divExact(u64, target_addr - this_addr, 4) catch break :binder_blk; - const literal = math.cast(u18, displacement) catch break :binder_blk; - // ldr x16, label - mem.writeIntLittle(u32, code[12..16], aarch64.Instruction.ldr(.x16, .{ - .literal = literal, - }).toU32()); - // nop - mem.writeIntLittle(u32, code[16..20], aarch64.Instruction.nop().toU32()); - break :binder_blk_outer; - } - binder_blk: { - const new_this_addr = this_addr + @sizeOf(u32); - const displacement = math.divExact(u64, target_addr - new_this_addr, 4) catch break :binder_blk; - const literal = math.cast(u18, displacement) catch break :binder_blk; - // Pad with nop to please division. - // nop - mem.writeIntLittle(u32, code[12..16], aarch64.Instruction.nop().toU32()); - // ldr x16, label - mem.writeIntLittle(u32, code[16..20], aarch64.Instruction.ldr(.x16, .{ - .literal = literal, - }).toU32()); - break :binder_blk_outer; - } - // Use adrp followed by ldr(immediate). - const this_page = @intCast(i32, this_addr >> 12); - const target_page = @intCast(i32, target_addr >> 12); - const pages = @intCast(i21, target_page - this_page); - mem.writeIntLittle(u32, code[12..16], aarch64.Instruction.adrp(.x16, pages).toU32()); - const narrowed = @truncate(u12, target_addr); - const offset = try math.divExact(u12, narrowed, 8); - mem.writeIntLittle(u32, code[16..20], aarch64.Instruction.ldr(.x16, .{ - .register = .{ - .rn = .x16, - .offset = aarch64.Instruction.LoadStoreOffset.imm(offset), - }, - }).toU32()); - } - // br x16 - code[20] = 0x00; - code[21] = 0x02; - code[22] = 0x1f; - code[23] = 0xd6; - try self.file.?.pwriteAll(&code, stub_helper.offset); - break :blk stub_helper.offset + 6 * @sizeOf(u32); - }, - else => unreachable, - } - }; - - for (self.stubs.keys()) |_, i| { - const index = @intCast(u32, i); - // TODO weak bound pointers - try self.writeLazySymbolPointer(index); - try self.writeStub(index); - try self.writeStubInStubHelper(index); - } -} - -fn writeLazySymbolPointer(self: *Zld, index: u32) !void { - const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment; - const stub_helper = text_segment.sections.items[self.stub_helper_section_index.?]; - const data_segment = self.load_commands.items[self.data_segment_cmd_index.?].Segment; - const la_symbol_ptr = data_segment.sections.items[self.la_symbol_ptr_section_index.?]; - - const stub_size: u4 = switch (self.target.?.cpu.arch) { - .x86_64 => 10, - .aarch64 => 3 * @sizeOf(u32), - else => unreachable, - }; - const stub_off = self.stub_helper_stubs_start_off.? + index * stub_size; - const end = stub_helper.addr + stub_off - stub_helper.offset; - var buf: [@sizeOf(u64)]u8 = undefined; - mem.writeIntLittle(u64, &buf, end); - const off = la_symbol_ptr.offset + index * @sizeOf(u64); - log.debug("writing lazy symbol pointer entry 0x{x} at 0x{x}", .{ end, off }); - try self.file.?.pwriteAll(&buf, off); -} - -fn writeStub(self: *Zld, index: u32) !void { - const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment; - const stubs = text_segment.sections.items[self.stubs_section_index.?]; - const data_segment = self.load_commands.items[self.data_segment_cmd_index.?].Segment; - const la_symbol_ptr = data_segment.sections.items[self.la_symbol_ptr_section_index.?]; - - const stub_off = stubs.offset + index * stubs.reserved2; - const stub_addr = stubs.addr + index * stubs.reserved2; - const la_ptr_addr = la_symbol_ptr.addr + index * @sizeOf(u64); - log.debug("writing stub at 0x{x}", .{stub_off}); - var code = try self.allocator.alloc(u8, stubs.reserved2); - defer self.allocator.free(code); - switch (self.target.?.cpu.arch) { - .x86_64 => { - assert(la_ptr_addr >= stub_addr + stubs.reserved2); - const displacement = try math.cast(u32, la_ptr_addr - stub_addr - stubs.reserved2); - // jmp - code[0] = 0xff; - code[1] = 0x25; - mem.writeIntLittle(u32, code[2..][0..4], displacement); - }, - .aarch64 => { - assert(la_ptr_addr >= stub_addr); - outer: { - const this_addr = stub_addr; - const target_addr = la_ptr_addr; - inner: { - const displacement = math.divExact(u64, target_addr - this_addr, 4) catch break :inner; - const literal = math.cast(u18, displacement) catch break :inner; - // ldr x16, literal - mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.ldr(.x16, .{ - .literal = literal, - }).toU32()); - // nop - mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.nop().toU32()); - break :outer; - } - inner: { - const new_this_addr = this_addr + @sizeOf(u32); - const displacement = math.divExact(u64, target_addr - new_this_addr, 4) catch break :inner; - const literal = math.cast(u18, displacement) catch break :inner; - // nop - mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.nop().toU32()); - // ldr x16, literal - mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.ldr(.x16, .{ - .literal = literal, - }).toU32()); - break :outer; - } - // Use adrp followed by ldr(immediate). - const this_page = @intCast(i32, this_addr >> 12); - const target_page = @intCast(i32, target_addr >> 12); - const pages = @intCast(i21, target_page - this_page); - mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.adrp(.x16, pages).toU32()); - const narrowed = @truncate(u12, target_addr); - const offset = try math.divExact(u12, narrowed, 8); - mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.ldr(.x16, .{ - .register = .{ - .rn = .x16, - .offset = aarch64.Instruction.LoadStoreOffset.imm(offset), - }, - }).toU32()); - } - // br x16 - mem.writeIntLittle(u32, code[8..12], aarch64.Instruction.br(.x16).toU32()); - }, - else => unreachable, - } - try self.file.?.pwriteAll(code, stub_off); -} - -fn writeStubInStubHelper(self: *Zld, index: u32) !void { - const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment; - const stub_helper = text_segment.sections.items[self.stub_helper_section_index.?]; - - const stub_size: u4 = switch (self.target.?.cpu.arch) { - .x86_64 => 10, - .aarch64 => 3 * @sizeOf(u32), - else => unreachable, - }; - const stub_off = self.stub_helper_stubs_start_off.? + index * stub_size; - var code = try self.allocator.alloc(u8, stub_size); - defer self.allocator.free(code); - switch (self.target.?.cpu.arch) { - .x86_64 => { - const displacement = try math.cast( - i32, - @intCast(i64, stub_helper.offset) - @intCast(i64, stub_off) - stub_size, - ); - // pushq - code[0] = 0x68; - mem.writeIntLittle(u32, code[1..][0..4], 0x0); // Just a placeholder populated in `populateLazyBindOffsetsInStubHelper`. - // jmpq - code[5] = 0xe9; - mem.writeIntLittle(u32, code[6..][0..4], @bitCast(u32, displacement)); - }, - .aarch64 => { - const displacement = try math.cast(i28, @intCast(i64, stub_helper.offset) - @intCast(i64, stub_off) - 4); - const literal = @divExact(stub_size - @sizeOf(u32), 4); - // ldr w16, literal - mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.ldr(.w16, .{ - .literal = literal, - }).toU32()); - // b disp - mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.b(displacement).toU32()); - mem.writeIntLittle(u32, code[8..12], 0x0); // Just a placeholder populated in `populateLazyBindOffsetsInStubHelper`. - }, - else => unreachable, - } - try self.file.?.pwriteAll(code, stub_off); -} - -fn resolveSymbolsInObject(self: *Zld, object_id: u16) !void { - const object = self.objects.items[object_id]; - - log.debug("resolving symbols in '{s}'", .{object.name}); - - for (object.symtab.items) |sym, id| { - const sym_id = @intCast(u32, id); - const sym_name = object.getString(sym.n_strx); - - if (symbolIsStab(sym)) { - log.err("unhandled symbol type: stab", .{}); - log.err(" symbol '{s}'", .{sym_name}); - log.err(" first definition in '{s}'", .{object.name.?}); - return error.UnhandledSymbolType; - } - - if (symbolIsIndr(sym)) { - log.err("unhandled symbol type: indirect", .{}); - log.err(" symbol '{s}'", .{sym_name}); - log.err(" first definition in '{s}'", .{object.name.?}); - return error.UnhandledSymbolType; - } - - if (symbolIsAbs(sym)) { - log.err("unhandled symbol type: absolute", .{}); - log.err(" symbol '{s}'", .{sym_name}); - log.err(" first definition in '{s}'", .{object.name.?}); - return error.UnhandledSymbolType; - } - - if (symbolIsSect(sym)) { - // Defined symbol regardless of scope lands in the locals symbol table. - const n_strx = blk: { - if (self.symbol_resolver.get(sym_name)) |resolv| { - switch (resolv.where) { - .global => break :blk self.globals.items[resolv.where_index].n_strx, - .tentative => break :blk self.tentatives.items[resolv.where_index].n_strx, - .undef => break :blk self.undefs.items[resolv.where_index].n_strx, - .import => unreachable, - } - } - break :blk try self.makeString(sym_name); - }; - const local_sym_index = @intCast(u32, self.locals.items.len); - try self.locals.append(self.allocator, .{ - .n_strx = n_strx, - .n_type = macho.N_SECT, - .n_sect = 0, - .n_desc = 0, - .n_value = sym.n_value, - }); - try object.symbol_mapping.putNoClobber(self.allocator, sym_id, local_sym_index); - - // If the symbol's scope is not local aka translation unit, then we need work out - // if we should save the symbol as a global, or potentially flag the error. - if (!symbolIsExt(sym)) continue; - - const local = self.locals.items[local_sym_index]; - const resolv = self.symbol_resolver.getPtr(sym_name) orelse { - const global_sym_index = @intCast(u32, self.globals.items.len); - try self.globals.append(self.allocator, .{ - .n_strx = n_strx, - .n_type = sym.n_type, - .n_sect = 0, - .n_desc = sym.n_desc, - .n_value = sym.n_value, - }); - try self.symbol_resolver.putNoClobber(self.allocator, try self.allocator.dupe(u8, sym_name), .{ - .where = .global, - .where_index = global_sym_index, - .local_sym_index = local_sym_index, - .file = object_id, - }); - continue; - }; - - switch (resolv.where) { - .import => unreachable, - .global => { - const global = &self.globals.items[resolv.where_index]; - - if (!(symbolIsWeakDef(sym) or symbolIsPext(sym)) and - !(symbolIsWeakDef(global.*) or symbolIsPext(global.*))) - { - log.err("symbol '{s}' defined multiple times", .{sym_name}); - log.err(" first definition in '{s}'", .{self.objects.items[resolv.file].name.?}); - log.err(" next definition in '{s}'", .{object.name.?}); - return error.MultipleSymbolDefinitions; - } - - if (symbolIsWeakDef(sym) or symbolIsPext(sym)) continue; // Current symbol is weak, so skip it. - - // Otherwise, update the resolver and the global symbol. - global.n_type = sym.n_type; - resolv.local_sym_index = local_sym_index; - resolv.file = object_id; - - continue; - }, - .undef => { - const undef = &self.undefs.items[resolv.where_index]; - undef.* = .{ - .n_strx = 0, - .n_type = macho.N_UNDF, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }; - }, - .tentative => { - const tentative = &self.tentatives.items[resolv.where_index]; - tentative.* = .{ - .n_strx = 0, - .n_type = macho.N_UNDF, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }; - }, - } - - const global_sym_index = @intCast(u32, self.globals.items.len); - try self.globals.append(self.allocator, .{ - .n_strx = local.n_strx, - .n_type = sym.n_type, - .n_sect = 0, - .n_desc = sym.n_desc, - .n_value = sym.n_value, - }); - resolv.* = .{ - .where = .global, - .where_index = global_sym_index, - .local_sym_index = local_sym_index, - .file = object_id, - }; - } else if (symbolIsTentative(sym)) { - // Symbol is a tentative definition. - const resolv = self.symbol_resolver.getPtr(sym_name) orelse { - const tent_sym_index = @intCast(u32, self.tentatives.items.len); - try self.tentatives.append(self.allocator, .{ - .n_strx = try self.makeString(sym_name), - .n_type = sym.n_type, - .n_sect = 0, - .n_desc = sym.n_desc, - .n_value = sym.n_value, - }); - try self.symbol_resolver.putNoClobber(self.allocator, try self.allocator.dupe(u8, sym_name), .{ - .where = .tentative, - .where_index = tent_sym_index, - .file = object_id, - }); - continue; - }; - - switch (resolv.where) { - .import => unreachable, - .global => {}, - .undef => { - const undef = &self.undefs.items[resolv.where_index]; - const tent_sym_index = @intCast(u32, self.tentatives.items.len); - try self.tentatives.append(self.allocator, .{ - .n_strx = undef.n_strx, - .n_type = sym.n_type, - .n_sect = 0, - .n_desc = sym.n_desc, - .n_value = sym.n_value, - }); - resolv.* = .{ - .where = .tentative, - .where_index = tent_sym_index, - .file = object_id, - }; - undef.* = .{ - .n_strx = 0, - .n_type = macho.N_UNDF, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }; - }, - .tentative => { - const tentative = &self.tentatives.items[resolv.where_index]; - if (tentative.n_value >= sym.n_value) continue; - - tentative.n_desc = sym.n_desc; - tentative.n_value = sym.n_value; - resolv.file = object_id; - }, - } - } else { - // Symbol is undefined. - if (self.symbol_resolver.contains(sym_name)) continue; - - const undef_sym_index = @intCast(u32, self.undefs.items.len); - try self.undefs.append(self.allocator, .{ - .n_strx = try self.makeString(sym_name), - .n_type = macho.N_UNDF, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }); - try self.symbol_resolver.putNoClobber(self.allocator, try self.allocator.dupe(u8, sym_name), .{ - .where = .undef, - .where_index = undef_sym_index, - .file = object_id, - }); - } - } -} - -fn resolveSymbols(self: *Zld) !void { - // TODO mimicking insertion of null symbol from incremental linker. - // This will need to moved. - try self.locals.append(self.allocator, .{ - .n_strx = 0, - .n_type = macho.N_UNDF, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }); - try self.strtab.append(self.allocator, 0); - - // First pass, resolve symbols in provided objects. - for (self.objects.items) |_, object_id| { - try self.resolveSymbolsInObject(@intCast(u16, object_id)); - } - - // Second pass, resolve symbols in static libraries. - var next_sym: usize = 0; - loop: while (true) : (next_sym += 1) { - if (next_sym == self.undefs.items.len) break; - - const sym = self.undefs.items[next_sym]; - if (symbolIsNull(sym)) continue; - - const sym_name = self.getString(sym.n_strx); - - for (self.archives.items) |archive| { - // Check if the entry exists in a static archive. - const offsets = archive.toc.get(sym_name) orelse { - // No hit. - continue; - }; - assert(offsets.items.len > 0); - - const object = try archive.parseObject(offsets.items[0]); - const object_id = @intCast(u16, self.objects.items.len); - try self.objects.append(self.allocator, object); - try self.resolveSymbolsInObject(object_id); - - continue :loop; - } - } - - // Convert any tentative definition into a regular symbol and allocate - // text blocks for each tentative defintion. - for (self.tentatives.items) |sym| { - if (symbolIsNull(sym)) continue; - - const sym_name = self.getString(sym.n_strx); - const match: MatchingSection = blk: { - if (self.common_section_index == null) { - const data_seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; - self.common_section_index = @intCast(u16, data_seg.sections.items.len); - try data_seg.addSection(self.allocator, "__common", .{ - .flags = macho.S_ZEROFILL, - }); - } - break :blk .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.common_section_index.?, - }; - }; - - const size = sym.n_value; - const code = try self.allocator.alloc(u8, size); - mem.set(u8, code, 0); - const alignment = (sym.n_desc >> 8) & 0x0f; - - const resolv = self.symbol_resolver.getPtr(sym_name) orelse unreachable; - const local_sym_index = @intCast(u32, self.locals.items.len); - var nlist = macho.nlist_64{ - .n_strx = sym.n_strx, - .n_type = macho.N_SECT, - .n_sect = self.sectionId(match), - .n_desc = 0, - .n_value = 0, - }; - try self.locals.append(self.allocator, nlist); - const global_sym_index = @intCast(u32, self.globals.items.len); - nlist.n_type |= macho.N_EXT; - try self.globals.append(self.allocator, nlist); - resolv.* = .{ - .where = .global, - .where_index = global_sym_index, - .local_sym_index = local_sym_index, - }; - - const block = try self.allocator.create(TextBlock); - errdefer self.allocator.destroy(block); - - block.* = TextBlock.init(self.allocator); - block.local_sym_index = local_sym_index; - block.code = code; - block.size = size; - block.alignment = alignment; - - // Update target section's metadata - // TODO should we update segment's size here too? - // How does it tie with incremental space allocs? - const tseg = &self.load_commands.items[match.seg].Segment; - const tsect = &tseg.sections.items[match.sect]; - const new_alignment = math.max(tsect.@"align", block.alignment); - const new_alignment_pow_2 = try math.powi(u32, 2, new_alignment); - const new_size = mem.alignForwardGeneric(u64, tsect.size, new_alignment_pow_2) + block.size; - tsect.size = new_size; - tsect.@"align" = new_alignment; - - if (self.blocks.getPtr(match)) |last| { - last.*.next = block; - block.prev = last.*; - last.* = block; - } else { - try self.blocks.putNoClobber(self.allocator, match, block); - } - } - - // Third pass, resolve symbols in dynamic libraries. - { - // Put dyld_stub_binder as an undefined special symbol. - const undef_sym_index = @intCast(u32, self.undefs.items.len); - try self.undefs.append(self.allocator, .{ - .n_strx = try self.makeString("dyld_stub_binder"), - .n_type = macho.N_UNDF, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }); - try self.symbol_resolver.putNoClobber(self.allocator, try self.allocator.dupe(u8, "dyld_stub_binder"), .{ - .where = .undef, - .where_index = undef_sym_index, - }); - } - - var referenced = std.AutoHashMap(*Dylib, void).init(self.allocator); - defer referenced.deinit(); - - loop: for (self.undefs.items) |sym| { - if (symbolIsNull(sym)) continue; - - const sym_name = self.getString(sym.n_strx); - for (self.dylibs.items) |dylib| { - if (!dylib.symbols.contains(sym_name)) continue; - - if (!referenced.contains(dylib)) { - // Add LC_LOAD_DYLIB load command for each referenced dylib/stub. - dylib.ordinal = self.next_dylib_ordinal; - const dylib_id = dylib.id orelse unreachable; - var dylib_cmd = try createLoadDylibCommand( - self.allocator, - dylib_id.name, - dylib_id.timestamp, - dylib_id.current_version, - dylib_id.compatibility_version, - ); - errdefer dylib_cmd.deinit(self.allocator); - try self.load_commands.append(self.allocator, .{ .Dylib = dylib_cmd }); - self.next_dylib_ordinal += 1; - try referenced.putNoClobber(dylib, {}); - } - - const resolv = self.symbol_resolver.getPtr(sym_name) orelse unreachable; - const undef = &self.undefs.items[resolv.where_index]; - const import_sym_index = @intCast(u32, self.imports.items.len); - try self.imports.append(self.allocator, .{ - .n_strx = undef.n_strx, - .n_type = macho.N_UNDF | macho.N_EXT, - .n_sect = 0, - .n_desc = packDylibOrdinal(dylib.ordinal.?), - .n_value = 0, - }); - resolv.* = .{ - .where = .import, - .where_index = import_sym_index, - }; - undef.* = .{ - .n_strx = 0, - .n_type = macho.N_UNDF, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }; - - continue :loop; - } - } - - // Fourth pass, handle synthetic symbols and flag any undefined references. - if (self.symbol_resolver.getPtr("___dso_handle")) |resolv| blk: { - if (resolv.where != .undef) break :blk; - - const undef = &self.undefs.items[resolv.where_index]; - const match: MatchingSection = .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.text_section_index.?, - }; - const local_sym_index = @intCast(u32, self.locals.items.len); - var nlist = macho.nlist_64{ - .n_strx = undef.n_strx, - .n_type = macho.N_SECT, - .n_sect = self.sectionId(match), - .n_desc = 0, - .n_value = 0, - }; - try self.locals.append(self.allocator, nlist); - const global_sym_index = @intCast(u32, self.globals.items.len); - nlist.n_type |= macho.N_EXT; - nlist.n_desc = macho.N_WEAK_DEF; - try self.globals.append(self.allocator, nlist); - - undef.* = .{ - .n_strx = 0, - .n_type = macho.N_UNDF, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }; - resolv.* = .{ - .where = .global, - .where_index = global_sym_index, - .local_sym_index = local_sym_index, - }; - - // We create an empty atom for this symbol. - // TODO perhaps we should special-case special symbols? Create a separate - // linked list of atoms? - const block = try self.allocator.create(TextBlock); - errdefer self.allocator.destroy(block); - - block.* = TextBlock.init(self.allocator); - block.local_sym_index = local_sym_index; - block.code = try self.allocator.alloc(u8, 0); - block.size = 0; - block.alignment = 0; - - if (self.blocks.getPtr(match)) |last| { - last.*.next = block; - block.prev = last.*; - last.* = block; - } else { - try self.blocks.putNoClobber(self.allocator, match, block); - } - } - - var has_undefined = false; - for (self.undefs.items) |sym| { - if (symbolIsNull(sym)) continue; - - const sym_name = self.getString(sym.n_strx); - const resolv = self.symbol_resolver.get(sym_name) orelse unreachable; - - log.err("undefined reference to symbol '{s}'", .{sym_name}); - log.err(" first referenced in '{s}'", .{self.objects.items[resolv.file].name.?}); - has_undefined = true; - } - - if (has_undefined) return error.UndefinedSymbolReference; -} - -fn parseTextBlocks(self: *Zld) !void { - for (self.objects.items) |object| { - try object.parseTextBlocks(self); - } -} - -fn populateMetadata(self: *Zld) !void { - if (self.pagezero_segment_cmd_index == null) { - self.pagezero_segment_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.allocator, .{ - .Segment = SegmentCommand.empty("__PAGEZERO", .{ - .vmsize = 0x100000000, // size always set to 4GB - }), - }); - } - - if (self.text_segment_cmd_index == null) { - self.text_segment_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.allocator, .{ - .Segment = SegmentCommand.empty("__TEXT", .{ - .vmaddr = 0x100000000, // always starts at 4GB - .maxprot = macho.VM_PROT_READ | macho.VM_PROT_EXECUTE, - .initprot = macho.VM_PROT_READ | macho.VM_PROT_EXECUTE, - }), - }); - } - - if (self.text_section_index == null) { - const text_seg = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; - self.text_section_index = @intCast(u16, text_seg.sections.items.len); - const alignment: u2 = switch (self.target.?.cpu.arch) { - .x86_64 => 0, - .aarch64 => 2, - else => unreachable, // unhandled architecture type - }; - try text_seg.addSection(self.allocator, "__text", .{ - .@"align" = alignment, - .flags = macho.S_REGULAR | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS, - }); - } - - if (self.stubs_section_index == null) { - const text_seg = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; - self.stubs_section_index = @intCast(u16, text_seg.sections.items.len); - const alignment: u2 = switch (self.target.?.cpu.arch) { - .x86_64 => 0, - .aarch64 => 2, - else => unreachable, // unhandled architecture type - }; - const stub_size: u4 = switch (self.target.?.cpu.arch) { - .x86_64 => 6, - .aarch64 => 3 * @sizeOf(u32), - else => unreachable, // unhandled architecture type - }; - try text_seg.addSection(self.allocator, "__stubs", .{ - .@"align" = alignment, - .flags = macho.S_SYMBOL_STUBS | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS, - .reserved2 = stub_size, - }); - } - - if (self.stub_helper_section_index == null) { - const text_seg = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; - self.stub_helper_section_index = @intCast(u16, text_seg.sections.items.len); - const alignment: u2 = switch (self.target.?.cpu.arch) { - .x86_64 => 0, - .aarch64 => 2, - else => unreachable, // unhandled architecture type - }; - const stub_helper_size: u6 = switch (self.target.?.cpu.arch) { - .x86_64 => 15, - .aarch64 => 6 * @sizeOf(u32), - else => unreachable, - }; - try text_seg.addSection(self.allocator, "__stub_helper", .{ - .size = stub_helper_size, - .@"align" = alignment, - .flags = macho.S_REGULAR | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS, - }); - } - - if (self.data_const_segment_cmd_index == null) { - self.data_const_segment_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.allocator, .{ - .Segment = SegmentCommand.empty("__DATA_CONST", .{ - .maxprot = macho.VM_PROT_READ | macho.VM_PROT_WRITE, - .initprot = macho.VM_PROT_READ | macho.VM_PROT_WRITE, - }), - }); - } - - if (self.got_section_index == null) { - const data_const_seg = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; - self.got_section_index = @intCast(u16, data_const_seg.sections.items.len); - try data_const_seg.addSection(self.allocator, "__got", .{ - .@"align" = 3, // 2^3 = @sizeOf(u64) - .flags = macho.S_NON_LAZY_SYMBOL_POINTERS, - }); - } - - if (self.data_segment_cmd_index == null) { - self.data_segment_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.allocator, .{ - .Segment = SegmentCommand.empty("__DATA", .{ - .maxprot = macho.VM_PROT_READ | macho.VM_PROT_WRITE, - .initprot = macho.VM_PROT_READ | macho.VM_PROT_WRITE, - }), - }); - } - - if (self.la_symbol_ptr_section_index == null) { - const data_seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; - self.la_symbol_ptr_section_index = @intCast(u16, data_seg.sections.items.len); - try data_seg.addSection(self.allocator, "__la_symbol_ptr", .{ - .@"align" = 3, // 2^3 = @sizeOf(u64) - .flags = macho.S_LAZY_SYMBOL_POINTERS, - }); - } - - if (self.data_section_index == null) { - const data_seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; - self.data_section_index = @intCast(u16, data_seg.sections.items.len); - try data_seg.addSection(self.allocator, "__data", .{ - .@"align" = 3, // 2^3 = @sizeOf(u64) - }); - } - - if (self.linkedit_segment_cmd_index == null) { - self.linkedit_segment_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.allocator, .{ - .Segment = SegmentCommand.empty("__LINKEDIT", .{ - .maxprot = macho.VM_PROT_READ, - .initprot = macho.VM_PROT_READ, - }), - }); - } - - if (self.dyld_info_cmd_index == null) { - self.dyld_info_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.allocator, .{ - .DyldInfoOnly = .{ - .cmd = macho.LC_DYLD_INFO_ONLY, - .cmdsize = @sizeOf(macho.dyld_info_command), - .rebase_off = 0, - .rebase_size = 0, - .bind_off = 0, - .bind_size = 0, - .weak_bind_off = 0, - .weak_bind_size = 0, - .lazy_bind_off = 0, - .lazy_bind_size = 0, - .export_off = 0, - .export_size = 0, - }, - }); - } - - if (self.symtab_cmd_index == null) { - self.symtab_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.allocator, .{ - .Symtab = .{ - .cmd = macho.LC_SYMTAB, - .cmdsize = @sizeOf(macho.symtab_command), - .symoff = 0, - .nsyms = 0, - .stroff = 0, - .strsize = 0, - }, - }); - } - - if (self.dysymtab_cmd_index == null) { - self.dysymtab_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.allocator, .{ - .Dysymtab = .{ - .cmd = macho.LC_DYSYMTAB, - .cmdsize = @sizeOf(macho.dysymtab_command), - .ilocalsym = 0, - .nlocalsym = 0, - .iextdefsym = 0, - .nextdefsym = 0, - .iundefsym = 0, - .nundefsym = 0, - .tocoff = 0, - .ntoc = 0, - .modtaboff = 0, - .nmodtab = 0, - .extrefsymoff = 0, - .nextrefsyms = 0, - .indirectsymoff = 0, - .nindirectsyms = 0, - .extreloff = 0, - .nextrel = 0, - .locreloff = 0, - .nlocrel = 0, - }, - }); - } - - if (self.dylinker_cmd_index == null) { - self.dylinker_cmd_index = @intCast(u16, self.load_commands.items.len); - const cmdsize = @intCast(u32, mem.alignForwardGeneric( - u64, - @sizeOf(macho.dylinker_command) + mem.lenZ(DEFAULT_DYLD_PATH), - @sizeOf(u64), - )); - var dylinker_cmd = emptyGenericCommandWithData(macho.dylinker_command{ - .cmd = macho.LC_LOAD_DYLINKER, - .cmdsize = cmdsize, - .name = @sizeOf(macho.dylinker_command), - }); - dylinker_cmd.data = try self.allocator.alloc(u8, cmdsize - dylinker_cmd.inner.name); - mem.set(u8, dylinker_cmd.data, 0); - mem.copy(u8, dylinker_cmd.data, mem.spanZ(DEFAULT_DYLD_PATH)); - try self.load_commands.append(self.allocator, .{ .Dylinker = dylinker_cmd }); - } - - if (self.main_cmd_index == null and self.output.?.tag == .exe) { - self.main_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.allocator, .{ - .Main = .{ - .cmd = macho.LC_MAIN, - .cmdsize = @sizeOf(macho.entry_point_command), - .entryoff = 0x0, - .stacksize = 0, - }, - }); - } - - if (self.dylib_id_cmd_index == null and self.output.?.tag == .dylib) { - self.dylib_id_cmd_index = @intCast(u16, self.load_commands.items.len); - var dylib_cmd = try createLoadDylibCommand( - self.allocator, - self.output.?.install_name.?, - 2, - 0x10000, // TODO forward user-provided versions - 0x10000, - ); - errdefer dylib_cmd.deinit(self.allocator); - dylib_cmd.inner.cmd = macho.LC_ID_DYLIB; - try self.load_commands.append(self.allocator, .{ .Dylib = dylib_cmd }); - } - - if (self.version_min_cmd_index == null) { - self.version_min_cmd_index = @intCast(u16, self.load_commands.items.len); - const cmd: u32 = switch (self.target.?.os.tag) { - .macos => macho.LC_VERSION_MIN_MACOSX, - .ios => macho.LC_VERSION_MIN_IPHONEOS, - .tvos => macho.LC_VERSION_MIN_TVOS, - .watchos => macho.LC_VERSION_MIN_WATCHOS, - else => unreachable, // wrong OS - }; - const ver = self.target.?.os.version_range.semver.min; - const version = ver.major << 16 | ver.minor << 8 | ver.patch; - try self.load_commands.append(self.allocator, .{ - .VersionMin = .{ - .cmd = cmd, - .cmdsize = @sizeOf(macho.version_min_command), - .version = version, - .sdk = version, - }, - }); - } - - if (self.source_version_cmd_index == null) { - self.source_version_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.allocator, .{ - .SourceVersion = .{ - .cmd = macho.LC_SOURCE_VERSION, - .cmdsize = @sizeOf(macho.source_version_command), - .version = 0x0, - }, - }); - } - - if (self.uuid_cmd_index == null) { - self.uuid_cmd_index = @intCast(u16, self.load_commands.items.len); - var uuid_cmd: macho.uuid_command = .{ - .cmd = macho.LC_UUID, - .cmdsize = @sizeOf(macho.uuid_command), - .uuid = undefined, - }; - std.crypto.random.bytes(&uuid_cmd.uuid); - try self.load_commands.append(self.allocator, .{ .Uuid = uuid_cmd }); - } -} - -fn addDataInCodeLC(self: *Zld) !void { - if (self.data_in_code_cmd_index == null) { - self.data_in_code_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.allocator, .{ - .LinkeditData = .{ - .cmd = macho.LC_DATA_IN_CODE, - .cmdsize = @sizeOf(macho.linkedit_data_command), - .dataoff = 0, - .datasize = 0, - }, - }); - } -} - -fn addCodeSignatureLC(self: *Zld) !void { - if (self.code_signature_cmd_index == null and self.target.?.cpu.arch == .aarch64) { - self.code_signature_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.allocator, .{ - .LinkeditData = .{ - .cmd = macho.LC_CODE_SIGNATURE, - .cmdsize = @sizeOf(macho.linkedit_data_command), - .dataoff = 0, - .datasize = 0, - }, - }); - } -} - -fn addRpaths(self: *Zld, rpaths: []const []const u8) !void { - for (rpaths) |rpath| { - const cmdsize = @intCast(u32, mem.alignForwardGeneric( - u64, - @sizeOf(macho.rpath_command) + rpath.len + 1, - @sizeOf(u64), - )); - var rpath_cmd = emptyGenericCommandWithData(macho.rpath_command{ - .cmd = macho.LC_RPATH, - .cmdsize = cmdsize, - .path = @sizeOf(macho.rpath_command), - }); - rpath_cmd.data = try self.allocator.alloc(u8, cmdsize - rpath_cmd.inner.path); - mem.set(u8, rpath_cmd.data, 0); - mem.copy(u8, rpath_cmd.data, rpath); - try self.load_commands.append(self.allocator, .{ .Rpath = rpath_cmd }); - } -} - -fn flush(self: *Zld) !void { - try self.writeTextBlocks(); - try self.writeStubHelperCommon(); - - if (self.common_section_index) |index| { - const seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; - const sect = &seg.sections.items[index]; - sect.offset = 0; - } - - if (self.bss_section_index) |index| { - const seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; - const sect = &seg.sections.items[index]; - sect.offset = 0; - } - - if (self.tlv_bss_section_index) |index| { - const seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; - const sect = &seg.sections.items[index]; - sect.offset = 0; - } - - try self.writeGotEntries(); - try self.setEntryPoint(); - try self.writeRebaseInfoTable(); - try self.writeBindInfoTable(); - try self.writeLazyBindInfoTable(); - try self.writeExportInfo(); - try self.writeDices(); - - { - const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; - const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab; - symtab.symoff = @intCast(u32, seg.inner.fileoff + seg.inner.filesize); - } - - try self.writeSymbolTable(); - try self.writeStringTable(); - - { - // Seal __LINKEDIT size - const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; - seg.inner.vmsize = mem.alignForwardGeneric(u64, seg.inner.filesize, self.page_size.?); - } - - if (self.target.?.cpu.arch == .aarch64) { - try self.writeCodeSignaturePadding(); - } - - try self.writeLoadCommands(); - try self.writeHeader(); - - if (self.target.?.cpu.arch == .aarch64) { - try self.writeCodeSignature(); - } - - if (comptime std.Target.current.isDarwin() and std.Target.current.cpu.arch == .aarch64) { - const out_path = self.output.?.path; - try fs.cwd().copyFile(out_path, fs.cwd(), out_path, .{}); - } -} - -fn writeGotEntries(self: *Zld) !void { - const seg = self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; - const sect = seg.sections.items[self.got_section_index.?]; - - var buffer = try self.allocator.alloc(u8, self.got_entries.count() * @sizeOf(u64)); - defer self.allocator.free(buffer); - - var stream = std.io.fixedBufferStream(buffer); - var writer = stream.writer(); - - for (self.got_entries.keys()) |key| { - const address: u64 = switch (key.where) { - .local => self.locals.items[key.where_index].n_value, - .import => 0, - }; - try writer.writeIntLittle(u64, address); - } - - log.debug("writing GOT pointers at 0x{x} to 0x{x}", .{ sect.offset, sect.offset + buffer.len }); - - try self.file.?.pwriteAll(buffer, sect.offset); -} - -fn setEntryPoint(self: *Zld) !void { - if (self.output.?.tag != .exe) return; - - // TODO we should respect the -entry flag passed in by the user to set a custom - // entrypoint. For now, assume default of `_main`. - const seg = self.load_commands.items[self.text_segment_cmd_index.?].Segment; - const resolv = self.symbol_resolver.get("_main") orelse { - log.err("'_main' export not found", .{}); - return error.MissingMainEntrypoint; - }; - assert(resolv.where == .global); - const sym = self.globals.items[resolv.where_index]; - const ec = &self.load_commands.items[self.main_cmd_index.?].Main; - ec.entryoff = @intCast(u32, sym.n_value - seg.inner.vmaddr); - ec.stacksize = self.stack_size; -} - -fn writeRebaseInfoTable(self: *Zld) !void { - var pointers = std.ArrayList(Pointer).init(self.allocator); - defer pointers.deinit(); - - { - var it = self.blocks.iterator(); - while (it.next()) |entry| { - const match = entry.key_ptr.*; - var block: *TextBlock = entry.value_ptr.*; - - if (match.seg == self.text_segment_cmd_index.?) continue; // __TEXT is non-writable - - const seg = self.load_commands.items[match.seg].Segment; - - while (true) { - const sym = self.locals.items[block.local_sym_index]; - const base_offset = sym.n_value - seg.inner.vmaddr; - - for (block.rebases.items) |offset| { - try pointers.append(.{ - .offset = base_offset + offset, - .segment_id = match.seg, - }); - } - - if (block.prev) |prev| { - block = prev; - } else break; - } - } - } - - if (self.got_section_index) |idx| { - const seg = self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; - const sect = seg.sections.items[idx]; - const base_offset = sect.addr - seg.inner.vmaddr; - const segment_id = @intCast(u16, self.data_const_segment_cmd_index.?); - - for (self.got_entries.keys()) |key, i| { - if (key.where == .import) continue; - - try pointers.append(.{ - .offset = base_offset + i * @sizeOf(u64), - .segment_id = segment_id, - }); - } - } - - if (self.la_symbol_ptr_section_index) |idx| { - const seg = self.load_commands.items[self.data_segment_cmd_index.?].Segment; - const sect = seg.sections.items[idx]; - const base_offset = sect.addr - seg.inner.vmaddr; - const segment_id = @intCast(u16, self.data_segment_cmd_index.?); - - try pointers.ensureUnusedCapacity(self.stubs.count()); - for (self.stubs.keys()) |_, i| { - pointers.appendAssumeCapacity(.{ - .offset = base_offset + i * @sizeOf(u64), - .segment_id = segment_id, - }); - } - } - - std.sort.sort(Pointer, pointers.items, {}, pointerCmp); - - const size = try rebaseInfoSize(pointers.items); - var buffer = try self.allocator.alloc(u8, @intCast(usize, size)); - defer self.allocator.free(buffer); - - var stream = std.io.fixedBufferStream(buffer); - try writeRebaseInfo(pointers.items, stream.writer()); - - const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; - const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly; - dyld_info.rebase_off = @intCast(u32, seg.inner.fileoff); - dyld_info.rebase_size = @intCast(u32, mem.alignForwardGeneric(u64, buffer.len, @sizeOf(u64))); - seg.inner.filesize += dyld_info.rebase_size; - - log.debug("writing rebase info from 0x{x} to 0x{x}", .{ dyld_info.rebase_off, dyld_info.rebase_off + dyld_info.rebase_size }); - - try self.file.?.pwriteAll(buffer, dyld_info.rebase_off); -} - -fn writeBindInfoTable(self: *Zld) !void { - var pointers = std.ArrayList(Pointer).init(self.allocator); - defer pointers.deinit(); - - if (self.got_section_index) |idx| { - const seg = self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; - const sect = seg.sections.items[idx]; - const base_offset = sect.addr - seg.inner.vmaddr; - const segment_id = @intCast(u16, self.data_const_segment_cmd_index.?); - - for (self.got_entries.keys()) |key, i| { - if (key.where == .local) continue; - - const sym = self.imports.items[key.where_index]; - try pointers.append(.{ - .offset = base_offset + i * @sizeOf(u64), - .segment_id = segment_id, - .dylib_ordinal = unpackDylibOrdinal(sym.n_desc), - .name = self.getString(sym.n_strx), - }); - } - } - - { - var it = self.blocks.iterator(); - while (it.next()) |entry| { - const match = entry.key_ptr.*; - var block: *TextBlock = entry.value_ptr.*; - - if (match.seg == self.text_segment_cmd_index.?) continue; // __TEXT is non-writable - - const seg = self.load_commands.items[match.seg].Segment; - - while (true) { - const sym = self.locals.items[block.local_sym_index]; - const base_offset = sym.n_value - seg.inner.vmaddr; - - for (block.bindings.items) |binding| { - const bind_sym = self.imports.items[binding.local_sym_index]; - try pointers.append(.{ - .offset = binding.offset + base_offset, - .segment_id = match.seg, - .dylib_ordinal = unpackDylibOrdinal(bind_sym.n_desc), - .name = self.getString(bind_sym.n_strx), - }); - } - - if (block.prev) |prev| { - block = prev; - } else break; - } - } - } - - const size = try bindInfoSize(pointers.items); - var buffer = try self.allocator.alloc(u8, @intCast(usize, size)); - defer self.allocator.free(buffer); - - var stream = std.io.fixedBufferStream(buffer); - try writeBindInfo(pointers.items, stream.writer()); - - const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; - const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly; - dyld_info.bind_off = @intCast(u32, seg.inner.fileoff + seg.inner.filesize); - dyld_info.bind_size = @intCast(u32, mem.alignForwardGeneric(u64, buffer.len, @alignOf(u64))); - seg.inner.filesize += dyld_info.bind_size; - - log.debug("writing binding info from 0x{x} to 0x{x}", .{ dyld_info.bind_off, dyld_info.bind_off + dyld_info.bind_size }); - - try self.file.?.pwriteAll(buffer, dyld_info.bind_off); -} - -fn writeLazyBindInfoTable(self: *Zld) !void { - var pointers = std.ArrayList(Pointer).init(self.allocator); - defer pointers.deinit(); - - if (self.la_symbol_ptr_section_index) |idx| { - const seg = self.load_commands.items[self.data_segment_cmd_index.?].Segment; - const sect = seg.sections.items[idx]; - const base_offset = sect.addr - seg.inner.vmaddr; - const segment_id = @intCast(u16, self.data_segment_cmd_index.?); - - try pointers.ensureUnusedCapacity(self.stubs.count()); - - for (self.stubs.keys()) |key, i| { - const sym = self.imports.items[key]; - pointers.appendAssumeCapacity(.{ - .offset = base_offset + i * @sizeOf(u64), - .segment_id = segment_id, - .dylib_ordinal = unpackDylibOrdinal(sym.n_desc), - .name = self.getString(sym.n_strx), - }); - } - } - - const size = try lazyBindInfoSize(pointers.items); - var buffer = try self.allocator.alloc(u8, @intCast(usize, size)); - defer self.allocator.free(buffer); - - var stream = std.io.fixedBufferStream(buffer); - try writeLazyBindInfo(pointers.items, stream.writer()); - - const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; - const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly; - dyld_info.lazy_bind_off = @intCast(u32, seg.inner.fileoff + seg.inner.filesize); - dyld_info.lazy_bind_size = @intCast(u32, mem.alignForwardGeneric(u64, buffer.len, @alignOf(u64))); - seg.inner.filesize += dyld_info.lazy_bind_size; - - log.debug("writing lazy binding info from 0x{x} to 0x{x}", .{ dyld_info.lazy_bind_off, dyld_info.lazy_bind_off + dyld_info.lazy_bind_size }); - - try self.file.?.pwriteAll(buffer, dyld_info.lazy_bind_off); - try self.populateLazyBindOffsetsInStubHelper(buffer); -} - -fn populateLazyBindOffsetsInStubHelper(self: *Zld, buffer: []const u8) !void { - var stream = std.io.fixedBufferStream(buffer); - var reader = stream.reader(); - var offsets = std.ArrayList(u32).init(self.allocator); - try offsets.append(0); - defer offsets.deinit(); - var valid_block = false; - - while (true) { - const inst = reader.readByte() catch |err| switch (err) { - error.EndOfStream => break, - else => return err, - }; - const opcode: u8 = inst & macho.BIND_OPCODE_MASK; - - switch (opcode) { - macho.BIND_OPCODE_DO_BIND => { - valid_block = true; - }, - macho.BIND_OPCODE_DONE => { - if (valid_block) { - const offset = try stream.getPos(); - try offsets.append(@intCast(u32, offset)); - } - valid_block = false; - }, - macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM => { - var next = try reader.readByte(); - while (next != @as(u8, 0)) { - next = try reader.readByte(); - } - }, - macho.BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB => { - _ = try leb.readULEB128(u64, reader); - }, - macho.BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB => { - _ = try leb.readULEB128(u64, reader); - }, - macho.BIND_OPCODE_SET_ADDEND_SLEB => { - _ = try leb.readILEB128(i64, reader); - }, - else => {}, - } - } - assert(self.stubs.count() <= offsets.items.len); - - const stub_size: u4 = switch (self.target.?.cpu.arch) { - .x86_64 => 10, - .aarch64 => 3 * @sizeOf(u32), - else => unreachable, - }; - const off: u4 = switch (self.target.?.cpu.arch) { - .x86_64 => 1, - .aarch64 => 2 * @sizeOf(u32), - else => unreachable, - }; - var buf: [@sizeOf(u32)]u8 = undefined; - for (self.stubs.keys()) |_, index| { - const placeholder_off = self.stub_helper_stubs_start_off.? + index * stub_size + off; - mem.writeIntLittle(u32, &buf, offsets.items[index]); - try self.file.?.pwriteAll(&buf, placeholder_off); - } -} - -fn writeExportInfo(self: *Zld) !void { - var trie = Trie.init(self.allocator); - defer trie.deinit(); - - const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment; - const base_address = text_segment.inner.vmaddr; - - // TODO handle macho.EXPORT_SYMBOL_FLAGS_REEXPORT and macho.EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER. - log.debug("writing export trie", .{}); - - for (self.globals.items) |sym| { - const sym_name = self.getString(sym.n_strx); - log.debug(" | putting '{s}' defined at 0x{x}", .{ sym_name, sym.n_value }); - - try trie.put(.{ - .name = sym_name, - .vmaddr_offset = sym.n_value - base_address, - .export_flags = macho.EXPORT_SYMBOL_FLAGS_KIND_REGULAR, - }); - } - - try trie.finalize(); - - var buffer = try self.allocator.alloc(u8, @intCast(usize, trie.size)); - defer self.allocator.free(buffer); - - var stream = std.io.fixedBufferStream(buffer); - const nwritten = try trie.write(stream.writer()); - assert(nwritten == trie.size); - - const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; - const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly; - dyld_info.export_off = @intCast(u32, seg.inner.fileoff + seg.inner.filesize); - dyld_info.export_size = @intCast(u32, mem.alignForwardGeneric(u64, buffer.len, @alignOf(u64))); - seg.inner.filesize += dyld_info.export_size; - - log.debug("writing export info from 0x{x} to 0x{x}", .{ dyld_info.export_off, dyld_info.export_off + dyld_info.export_size }); - - try self.file.?.pwriteAll(buffer, dyld_info.export_off); -} - -fn writeSymbolTable(self: *Zld) !void { - const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; - const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab; - - var locals = std.ArrayList(macho.nlist_64).init(self.allocator); - defer locals.deinit(); - try locals.appendSlice(self.locals.items); - - if (self.has_stabs) { - for (self.objects.items) |object| { - if (object.debug_info == null) continue; - - // Open scope - try locals.ensureUnusedCapacity(4); - locals.appendAssumeCapacity(.{ - .n_strx = try self.makeString(object.tu_comp_dir.?), - .n_type = macho.N_SO, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }); - locals.appendAssumeCapacity(.{ - .n_strx = try self.makeString(object.tu_name.?), - .n_type = macho.N_SO, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }); - locals.appendAssumeCapacity(.{ - .n_strx = try self.makeString(object.name.?), - .n_type = macho.N_OSO, - .n_sect = 0, - .n_desc = 1, - .n_value = object.mtime orelse 0, - }); - - for (object.text_blocks.items) |block| { - if (block.stab) |stab| { - const nlists = try stab.asNlists(block.local_sym_index, self); - defer self.allocator.free(nlists); - try locals.appendSlice(nlists); - } else { - const contained = block.contained orelse continue; - for (contained) |sym_at_off| { - const stab = sym_at_off.stab orelse continue; - const nlists = try stab.asNlists(sym_at_off.local_sym_index, self); - defer self.allocator.free(nlists); - try locals.appendSlice(nlists); - } - } - } - - // Close scope - locals.appendAssumeCapacity(.{ - .n_strx = 0, - .n_type = macho.N_SO, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }); - } - } - - const nlocals = locals.items.len; - const nexports = self.globals.items.len; - const nundefs = self.imports.items.len; - - const locals_off = symtab.symoff + symtab.nsyms * @sizeOf(macho.nlist_64); - const locals_size = nlocals * @sizeOf(macho.nlist_64); - log.debug("writing local symbols from 0x{x} to 0x{x}", .{ locals_off, locals_size + locals_off }); - try self.file.?.pwriteAll(mem.sliceAsBytes(locals.items), locals_off); - - const exports_off = locals_off + locals_size; - const exports_size = nexports * @sizeOf(macho.nlist_64); - log.debug("writing exported symbols from 0x{x} to 0x{x}", .{ exports_off, exports_size + exports_off }); - try self.file.?.pwriteAll(mem.sliceAsBytes(self.globals.items), exports_off); - - const undefs_off = exports_off + exports_size; - const undefs_size = nundefs * @sizeOf(macho.nlist_64); - log.debug("writing undefined symbols from 0x{x} to 0x{x}", .{ undefs_off, undefs_size + undefs_off }); - try self.file.?.pwriteAll(mem.sliceAsBytes(self.imports.items), undefs_off); - - symtab.nsyms += @intCast(u32, nlocals + nexports + nundefs); - seg.inner.filesize += locals_size + exports_size + undefs_size; - - // Update dynamic symbol table. - const dysymtab = &self.load_commands.items[self.dysymtab_cmd_index.?].Dysymtab; - dysymtab.nlocalsym += @intCast(u32, nlocals); - dysymtab.iextdefsym = dysymtab.nlocalsym; - dysymtab.nextdefsym = @intCast(u32, nexports); - dysymtab.iundefsym = dysymtab.nlocalsym + dysymtab.nextdefsym; - dysymtab.nundefsym = @intCast(u32, nundefs); - - const text_segment = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; - const stubs = &text_segment.sections.items[self.stubs_section_index.?]; - const data_const_segment = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; - const got = &data_const_segment.sections.items[self.got_section_index.?]; - const data_segment = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; - const la_symbol_ptr = &data_segment.sections.items[self.la_symbol_ptr_section_index.?]; - - const nstubs = @intCast(u32, self.stubs.count()); - const ngot_entries = @intCast(u32, self.got_entries.count()); - - dysymtab.indirectsymoff = @intCast(u32, seg.inner.fileoff + seg.inner.filesize); - dysymtab.nindirectsyms = nstubs * 2 + ngot_entries; - - const needed_size = dysymtab.nindirectsyms * @sizeOf(u32); - seg.inner.filesize += needed_size; - - log.debug("writing indirect symbol table from 0x{x} to 0x{x}", .{ - dysymtab.indirectsymoff, - dysymtab.indirectsymoff + needed_size, - }); - - var buf = try self.allocator.alloc(u8, needed_size); - defer self.allocator.free(buf); - - var stream = std.io.fixedBufferStream(buf); - var writer = stream.writer(); - - stubs.reserved1 = 0; - for (self.stubs.keys()) |key| { - try writer.writeIntLittle(u32, dysymtab.iundefsym + key); - } - - got.reserved1 = nstubs; - for (self.got_entries.keys()) |key| { - switch (key.where) { - .import => { - try writer.writeIntLittle(u32, dysymtab.iundefsym + key.where_index); - }, - .local => { - try writer.writeIntLittle(u32, macho.INDIRECT_SYMBOL_LOCAL); - }, - } - } - - la_symbol_ptr.reserved1 = got.reserved1 + ngot_entries; - for (self.stubs.keys()) |key| { - try writer.writeIntLittle(u32, dysymtab.iundefsym + key); - } - - try self.file.?.pwriteAll(buf, dysymtab.indirectsymoff); -} - -fn writeStringTable(self: *Zld) !void { - const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; - const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab; - symtab.stroff = @intCast(u32, seg.inner.fileoff + seg.inner.filesize); - symtab.strsize = @intCast(u32, mem.alignForwardGeneric(u64, self.strtab.items.len, @alignOf(u64))); - seg.inner.filesize += symtab.strsize; - - log.debug("writing string table from 0x{x} to 0x{x}", .{ symtab.stroff, symtab.stroff + symtab.strsize }); - - try self.file.?.pwriteAll(self.strtab.items, symtab.stroff); - - if (symtab.strsize > self.strtab.items.len and self.target.?.cpu.arch == .x86_64) { - // This is the last section, so we need to pad it out. - try self.file.?.pwriteAll(&[_]u8{0}, seg.inner.fileoff + seg.inner.filesize - 1); - } -} - -fn writeDices(self: *Zld) !void { - if (!self.has_dices) return; - - const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; - const dice_cmd = &self.load_commands.items[self.data_in_code_cmd_index.?].LinkeditData; - const fileoff = seg.inner.fileoff + seg.inner.filesize; - - var buf = std.ArrayList(u8).init(self.allocator); - defer buf.deinit(); - - var block: *TextBlock = self.blocks.get(.{ - .seg = self.text_segment_cmd_index orelse return, - .sect = self.text_section_index orelse return, - }) orelse return; - - while (block.prev) |prev| { - block = prev; - } - - const text_seg = self.load_commands.items[self.text_segment_cmd_index.?].Segment; - const text_sect = text_seg.sections.items[self.text_section_index.?]; - - while (true) { - if (block.dices.items.len > 0) { - const sym = self.locals.items[block.local_sym_index]; - const base_off = try math.cast(u32, sym.n_value - text_sect.addr + text_sect.offset); - - try buf.ensureUnusedCapacity(block.dices.items.len * @sizeOf(macho.data_in_code_entry)); - for (block.dices.items) |dice| { - const rebased_dice = macho.data_in_code_entry{ - .offset = base_off + dice.offset, - .length = dice.length, - .kind = dice.kind, - }; - buf.appendSliceAssumeCapacity(mem.asBytes(&rebased_dice)); - } - } - - if (block.next) |next| { - block = next; - } else break; - } - - const datasize = @intCast(u32, buf.items.len); - - dice_cmd.dataoff = @intCast(u32, fileoff); - dice_cmd.datasize = datasize; - seg.inner.filesize += datasize; - - log.debug("writing data-in-code from 0x{x} to 0x{x}", .{ fileoff, fileoff + datasize }); - - try self.file.?.pwriteAll(buf.items, fileoff); -} - -fn writeCodeSignaturePadding(self: *Zld) !void { - const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; - const code_sig_cmd = &self.load_commands.items[self.code_signature_cmd_index.?].LinkeditData; - const fileoff = seg.inner.fileoff + seg.inner.filesize; - const needed_size = CodeSignature.calcCodeSignaturePaddingSize( - self.output.?.path, - fileoff, - self.page_size.?, - ); - code_sig_cmd.dataoff = @intCast(u32, fileoff); - code_sig_cmd.datasize = needed_size; - - // Advance size of __LINKEDIT segment - seg.inner.filesize += needed_size; - seg.inner.vmsize = mem.alignForwardGeneric(u64, seg.inner.filesize, self.page_size.?); - - log.debug("writing code signature padding from 0x{x} to 0x{x}", .{ fileoff, fileoff + needed_size }); - - // Pad out the space. We need to do this to calculate valid hashes for everything in the file - // except for code signature data. - try self.file.?.pwriteAll(&[_]u8{0}, fileoff + needed_size - 1); -} - -fn writeCodeSignature(self: *Zld) !void { - const text_seg = self.load_commands.items[self.text_segment_cmd_index.?].Segment; - const code_sig_cmd = self.load_commands.items[self.code_signature_cmd_index.?].LinkeditData; - - var code_sig = CodeSignature.init(self.allocator, self.page_size.?); - defer code_sig.deinit(); - try code_sig.calcAdhocSignature( - self.file.?, - self.output.?.path, - text_seg.inner, - code_sig_cmd, - .Exe, - ); - - var buffer = try self.allocator.alloc(u8, code_sig.size()); - defer self.allocator.free(buffer); - var stream = std.io.fixedBufferStream(buffer); - try code_sig.write(stream.writer()); - - log.debug("writing code signature from 0x{x} to 0x{x}", .{ code_sig_cmd.dataoff, code_sig_cmd.dataoff + buffer.len }); - try self.file.?.pwriteAll(buffer, code_sig_cmd.dataoff); -} - -fn writeLoadCommands(self: *Zld) !void { - var sizeofcmds: u32 = 0; - for (self.load_commands.items) |lc| { - sizeofcmds += lc.cmdsize(); - } - - var buffer = try self.allocator.alloc(u8, sizeofcmds); - defer self.allocator.free(buffer); - var writer = std.io.fixedBufferStream(buffer).writer(); - for (self.load_commands.items) |lc| { - try lc.write(writer); - } - - const off = @sizeOf(macho.mach_header_64); - log.debug("writing {} load commands from 0x{x} to 0x{x}", .{ self.load_commands.items.len, off, off + sizeofcmds }); - try self.file.?.pwriteAll(buffer, off); -} - -fn writeHeader(self: *Zld) !void { - var header = emptyHeader(.{ - .flags = macho.MH_NOUNDEFS | macho.MH_DYLDLINK | macho.MH_PIE | macho.MH_TWOLEVEL, - }); - - switch (self.target.?.cpu.arch) { - .aarch64 => { - header.cputype = macho.CPU_TYPE_ARM64; - header.cpusubtype = macho.CPU_SUBTYPE_ARM_ALL; - }, - .x86_64 => { - header.cputype = macho.CPU_TYPE_X86_64; - header.cpusubtype = macho.CPU_SUBTYPE_X86_64_ALL; - }, - else => return error.UnsupportedCpuArchitecture, - } - - switch (self.output.?.tag) { - .exe => { - header.filetype = macho.MH_EXECUTE; - }, - .dylib => { - header.filetype = macho.MH_DYLIB; - header.flags |= macho.MH_NO_REEXPORTED_DYLIBS; - }, - } - - if (self.tlv_section_index) |_| - header.flags |= macho.MH_HAS_TLV_DESCRIPTORS; - - header.ncmds = @intCast(u32, self.load_commands.items.len); - header.sizeofcmds = 0; - - for (self.load_commands.items) |cmd| { - header.sizeofcmds += cmd.cmdsize(); - } - - log.debug("writing Mach-O header {}", .{header}); - - try self.file.?.pwriteAll(mem.asBytes(&header), 0); -} - -pub fn makeString(self: *Zld, string: []const u8) !u32 { - try self.strtab.ensureUnusedCapacity(self.allocator, string.len + 1); - const new_off = @intCast(u32, self.strtab.items.len); - - log.debug("writing new string '{s}' at offset 0x{x}", .{ string, new_off }); - - self.strtab.appendSliceAssumeCapacity(string); - self.strtab.appendAssumeCapacity(0); - - return new_off; -} - -pub fn getString(self: *Zld, off: u32) []const u8 { - assert(off < self.strtab.items.len); - return mem.spanZ(@ptrCast([*:0]const u8, self.strtab.items.ptr + off)); -} - -pub fn symbolIsStab(sym: macho.nlist_64) bool { - return (macho.N_STAB & sym.n_type) != 0; -} - -pub fn symbolIsPext(sym: macho.nlist_64) bool { - return (macho.N_PEXT & sym.n_type) != 0; -} - -pub fn symbolIsExt(sym: macho.nlist_64) bool { - return (macho.N_EXT & sym.n_type) != 0; -} - -pub fn symbolIsSect(sym: macho.nlist_64) bool { - const type_ = macho.N_TYPE & sym.n_type; - return type_ == macho.N_SECT; -} - -pub fn symbolIsUndf(sym: macho.nlist_64) bool { - const type_ = macho.N_TYPE & sym.n_type; - return type_ == macho.N_UNDF; -} - -pub fn symbolIsIndr(sym: macho.nlist_64) bool { - const type_ = macho.N_TYPE & sym.n_type; - return type_ == macho.N_INDR; -} - -pub fn symbolIsAbs(sym: macho.nlist_64) bool { - const type_ = macho.N_TYPE & sym.n_type; - return type_ == macho.N_ABS; -} - -pub fn symbolIsWeakDef(sym: macho.nlist_64) bool { - return (sym.n_desc & macho.N_WEAK_DEF) != 0; -} - -pub fn symbolIsWeakRef(sym: macho.nlist_64) bool { - return (sym.n_desc & macho.N_WEAK_REF) != 0; -} - -pub fn symbolIsTentative(sym: macho.nlist_64) bool { - if (!symbolIsUndf(sym)) return false; - return sym.n_value != 0; -} - -pub fn symbolIsNull(sym: macho.nlist_64) bool { - return sym.n_value == 0 and sym.n_desc == 0 and sym.n_type == 0 and sym.n_strx == 0 and sym.n_sect == 0; -} - -pub fn symbolIsTemp(sym: macho.nlist_64, sym_name: []const u8) bool { - if (!symbolIsSect(sym)) return false; - if (symbolIsExt(sym)) return false; - return mem.startsWith(u8, sym_name, "l") or mem.startsWith(u8, sym_name, "L"); -} - -pub fn sectionId(self: Zld, match: MatchingSection) u8 { - // TODO there might be a more generic way of doing this. - var section: u8 = 0; - for (self.load_commands.items) |cmd, cmd_id| { - if (cmd != .Segment) break; - if (cmd_id == match.seg) { - section += @intCast(u8, match.sect) + 1; - break; - } - section += @intCast(u8, cmd.Segment.sections.items.len); - } - return section; -} - -pub fn unpackSectionId(self: Zld, section_id: u8) MatchingSection { - var match: MatchingSection = undefined; - var section: u8 = 0; - outer: for (self.load_commands.items) |cmd, cmd_id| { - assert(cmd == .Segment); - for (cmd.Segment.sections.items) |_, sect_id| { - section += 1; - if (section_id == section) { - match.seg = @intCast(u16, cmd_id); - match.sect = @intCast(u16, sect_id); - break :outer; - } - } - } - return match; -} - -fn packDylibOrdinal(ordinal: u16) u16 { - return ordinal * macho.N_SYMBOL_RESOLVER; -} - -fn unpackDylibOrdinal(pack: u16) u16 { - return @divExact(pack, macho.N_SYMBOL_RESOLVER); -} - -pub fn findFirst(comptime T: type, haystack: []T, start: usize, predicate: anytype) usize { - if (!@hasDecl(@TypeOf(predicate), "predicate")) - @compileError("Predicate is required to define fn predicate(@This(), T) bool"); - - if (start == haystack.len) return start; - - var i = start; - while (i < haystack.len) : (i += 1) { - if (predicate.predicate(haystack[i])) break; - } - return i; -}