diff --git a/CMakeLists.txt b/CMakeLists.txt index 2714fa6d6b..20d19fa167 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -581,14 +581,10 @@ set(ZIG_STAGE2_SOURCES "${CMAKE_SOURCE_DIR}/src/link/MachO/DebugSymbols.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/Dylib.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/Object.zig" - "${CMAKE_SOURCE_DIR}/src/link/MachO/Symbol.zig" + "${CMAKE_SOURCE_DIR}/src/link/MachO/TextBlock.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/Trie.zig" - "${CMAKE_SOURCE_DIR}/src/link/MachO/Zld.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/bind.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/commands.zig" - "${CMAKE_SOURCE_DIR}/src/link/MachO/reloc.zig" - "${CMAKE_SOURCE_DIR}/src/link/MachO/reloc/aarch64.zig" - "${CMAKE_SOURCE_DIR}/src/link/MachO/reloc/x86_64.zig" "${CMAKE_SOURCE_DIR}/src/link/Wasm.zig" "${CMAKE_SOURCE_DIR}/src/link/tapi.zig" "${CMAKE_SOURCE_DIR}/src/link/tapi/parse.zig" diff --git a/src/Compilation.zig b/src/Compilation.zig index 3f3a41956c..72209b657e 100644 --- a/src/Compilation.zig +++ b/src/Compilation.zig @@ -894,6 +894,10 @@ pub fn create(gpa: *Allocator, options: InitOptions) !*Compilation { // Make a decision on whether to use LLD or our own linker. const use_lld = options.use_lld orelse blk: { + if (options.target.isDarwin()) { + break :blk false; + } + if (!build_options.have_llvm) break :blk false; @@ -931,11 +935,10 @@ pub fn create(gpa: *Allocator, options: InitOptions) !*Compilation { break :blk false; }; - const darwin_can_use_system_sdk = - // comptime conditions - ((build_options.have_llvm and comptime std.Target.current.isDarwin()) and - // runtime conditions - (use_lld and builtin.os.tag == .macos and options.target.isDarwin())); + const darwin_can_use_system_sdk = blk: { + if (comptime !std.Target.current.isDarwin()) break :blk false; + break :blk std.builtin.os.tag == .macos and options.target.isDarwin(); + }; const sysroot = blk: { if (options.sysroot) |sysroot| { @@ -952,10 +955,12 @@ pub fn create(gpa: *Allocator, options: InitOptions) !*Compilation { const lto = blk: { if (options.want_lto) |explicit| { - if (!use_lld) + if (!use_lld and !options.target.isDarwin()) return error.LtoUnavailableWithoutLld; break :blk explicit; } else if (!use_lld) { + // TODO zig ld LTO support + // See https://github.com/ziglang/zig/issues/8680 break :blk false; } else if (options.c_source_files.len == 0) { break :blk false; diff --git a/src/codegen.zig b/src/codegen.zig index bf7f167849..0a3169bb9b 100644 --- a/src/codegen.zig +++ b/src/codegen.zig @@ -2590,9 +2590,12 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { const got_addr = blk: { const seg = macho_file.load_commands.items[macho_file.data_const_segment_cmd_index.?].Segment; const got = seg.sections.items[macho_file.got_section_index.?]; - break :blk got.addr + func.owner_decl.link.macho.offset_table_index * @sizeOf(u64); + const got_index = macho_file.got_entries_map.get(.{ + .where = .local, + .where_index = func.owner_decl.link.macho.local_sym_index, + }) orelse unreachable; + break :blk got.addr + got_index * @sizeOf(u64); }; - log.debug("got_addr = 0x{x}", .{got_addr}); switch (arch) { .x86_64 => { try self.genSetReg(Type.initTag(.u64), .rax, .{ .memory = got_addr }); @@ -2609,37 +2612,33 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { } } else if (func_value.castTag(.extern_fn)) |func_payload| { const decl = func_payload.data; - const decl_name = try std.fmt.allocPrint(self.bin_file.allocator, "_{s}", .{decl.name}); - defer self.bin_file.allocator.free(decl_name); - const already_defined = macho_file.lazy_imports.contains(decl_name); - const symbol: u32 = if (macho_file.lazy_imports.getIndex(decl_name)) |index| - @intCast(u32, index) - else - try macho_file.addExternSymbol(decl_name); - const start = self.code.items.len; - const len: usize = blk: { + const where_index = try macho_file.addExternFn(mem.spanZ(decl.name)); + const offset = blk: { switch (arch) { .x86_64 => { // callq try self.code.ensureCapacity(self.code.items.len + 5); self.code.appendSliceAssumeCapacity(&[5]u8{ 0xe8, 0x0, 0x0, 0x0, 0x0 }); - break :blk 5; + break :blk @intCast(u32, self.code.items.len) - 4; }, .aarch64 => { + const offset = @intCast(u32, self.code.items.len); // bl - writeInt(u32, try self.code.addManyAsArray(4), 0); - break :blk 4; + writeInt(u32, try self.code.addManyAsArray(4), Instruction.bl(0).toU32()); + break :blk offset; }, else => unreachable, // unsupported architecture on MachO } }; - try macho_file.stub_fixups.append(self.bin_file.allocator, .{ - .symbol = symbol, - .already_defined = already_defined, - .start = start, - .len = len, + // Add relocation to the decl. + try macho_file.active_decl.?.link.macho.relocs.append(self.bin_file.allocator, .{ + .offset = offset, + .where = .import, + .where_index = where_index, + .payload = .{ .branch = .{ + .arch = arch, + } }, }); - // We mark the space and fix it up later. } else { return self.fail("TODO implement calling bitcasted functions", .{}); } @@ -4144,19 +4143,8 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { .memory => |addr| { if (self.bin_file.options.pie) { // PC-relative displacement to the entry in the GOT table. - // TODO we should come up with our own, backend independent relocation types - // which each backend (Elf, MachO, etc.) would then translate into an actual - // fixup when linking. - // adrp reg, pages - if (self.bin_file.cast(link.File.MachO)) |macho_file| { - try macho_file.pie_fixups.append(self.bin_file.allocator, .{ - .target_addr = addr, - .offset = self.code.items.len, - .size = 4, - }); - } else { - return self.fail("TODO implement genSetReg for PIE GOT indirection on this platform", .{}); - } + // adrp + const offset = @intCast(u32, self.code.items.len); mem.writeIntLittle( u32, try self.code.addManyAsArray(4), @@ -4169,6 +4157,36 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { .offset = Instruction.LoadStoreOffset.imm(0), }, }).toU32()); + + if (self.bin_file.cast(link.File.MachO)) |macho_file| { + // TODO this is super awkward. We are reversing the address of the GOT entry here. + // We should probably have it cached or move the reloc adding somewhere else. + const got_addr = blk: { + const seg = macho_file.load_commands.items[macho_file.data_const_segment_cmd_index.?].Segment; + const got = seg.sections.items[macho_file.got_section_index.?]; + break :blk got.addr; + }; + const where_index = blk: for (macho_file.got_entries.items) |key, id| { + if (got_addr + id * @sizeOf(u64) == addr) break :blk key.where_index; + } else unreachable; + const decl = macho_file.active_decl.?; + // Page reloc for adrp instruction. + try decl.link.macho.relocs.append(self.bin_file.allocator, .{ + .offset = offset, + .where = .local, + .where_index = where_index, + .payload = .{ .page = .{ .kind = .got } }, + }); + // Pageoff reloc for adrp instruction. + try decl.link.macho.relocs.append(self.bin_file.allocator, .{ + .offset = offset + 4, + .where = .local, + .where_index = where_index, + .payload = .{ .page_off = .{ .kind = .got } }, + }); + } else { + return self.fail("TODO implement genSetReg for PIE GOT indirection on this platform", .{}); + } } else { // The value is in memory at a hard-coded address. // If the type is a pointer, it means the pointer address is at this memory location. @@ -4421,14 +4439,26 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { encoder.modRm_RIPDisp32(reg.low_id()); encoder.disp32(0); - // TODO we should come up with our own, backend independent relocation types - // which each backend (Elf, MachO, etc.) would then translate into an actual - // fixup when linking. + const offset = @intCast(u32, self.code.items.len); + if (self.bin_file.cast(link.File.MachO)) |macho_file| { - try macho_file.pie_fixups.append(self.bin_file.allocator, .{ - .target_addr = x, - .offset = self.code.items.len - 4, - .size = 4, + // TODO this is super awkward. We are reversing the address of the GOT entry here. + // We should probably have it cached or move the reloc adding somewhere else. + const got_addr = blk: { + const seg = macho_file.load_commands.items[macho_file.data_const_segment_cmd_index.?].Segment; + const got = seg.sections.items[macho_file.got_section_index.?]; + break :blk got.addr; + }; + const where_index = blk: for (macho_file.got_entries.items) |key, id| { + if (got_addr + id * @sizeOf(u64) == x) break :blk key.where_index; + } else unreachable; + const decl = macho_file.active_decl.?; + // Load reloc for LEA instruction. + try decl.link.macho.relocs.append(self.bin_file.allocator, .{ + .offset = offset - 4, + .where = .local, + .where_index = where_index, + .payload = .{ .load = .{ .kind = .got } }, }); } else { return self.fail("TODO implement genSetReg for PIE GOT indirection on this platform", .{}); @@ -4647,7 +4677,11 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { const got_addr = blk: { const seg = macho_file.load_commands.items[macho_file.data_const_segment_cmd_index.?].Segment; const got = seg.sections.items[macho_file.got_section_index.?]; - break :blk got.addr + decl.link.macho.offset_table_index * ptr_bytes; + const got_index = macho_file.got_entries_map.get(.{ + .where = .local, + .where_index = decl.link.macho.local_sym_index, + }) orelse unreachable; + break :blk got.addr + got_index * ptr_bytes; }; return MCValue{ .memory = got_addr }; } else if (self.bin_file.cast(link.File.Coff)) |coff_file| { diff --git a/src/link.zig b/src/link.zig index 85ff2ca603..1293fab4d2 100644 --- a/src/link.zig +++ b/src/link.zig @@ -543,7 +543,7 @@ pub const File = struct { } } - fn linkAsArchive(base: *File, comp: *Compilation) !void { + pub fn linkAsArchive(base: *File, comp: *Compilation) !void { const tracy = trace(@src()); defer tracy.end(); diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 8675295b2a..21f4e9c33c 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -1,38 +1,43 @@ const MachO = @This(); const std = @import("std"); +const build_options = @import("build_options"); const builtin = @import("builtin"); -const Allocator = std.mem.Allocator; const assert = std.debug.assert; const fmt = std.fmt; const fs = std.fs; const log = std.log.scoped(.link); const macho = std.macho; -const codegen = @import("../codegen.zig"); -const aarch64 = @import("../codegen/aarch64.zig"); const math = std.math; const mem = std.mem; const meta = std.meta; +const aarch64 = @import("../codegen/aarch64.zig"); const bind = @import("MachO/bind.zig"); -const trace = @import("../tracy.zig").trace; -const build_options = @import("build_options"); -const Module = @import("../Module.zig"); -const Compilation = @import("../Compilation.zig"); +const codegen = @import("../codegen.zig"); +const commands = @import("MachO/commands.zig"); const link = @import("../link.zig"); -const File = link.File; -const Cache = @import("../Cache.zig"); +const llvm_backend = @import("../codegen/llvm.zig"); const target_util = @import("../target.zig"); +const trace = @import("../tracy.zig").trace; + const Air = @import("../Air.zig"); -const Liveness = @import("../Liveness.zig"); - -const DebugSymbols = @import("MachO/DebugSymbols.zig"); -const Trie = @import("MachO/Trie.zig"); +const Allocator = mem.Allocator; +const Archive = @import("MachO/Archive.zig"); +const Cache = @import("../Cache.zig"); const CodeSignature = @import("MachO/CodeSignature.zig"); -const Zld = @import("MachO/Zld.zig"); +const Compilation = @import("../Compilation.zig"); +const DebugSymbols = @import("MachO/DebugSymbols.zig"); +const Dylib = @import("MachO/Dylib.zig"); +const File = link.File; +const Object = @import("MachO/Object.zig"); +const Liveness = @import("../Liveness.zig"); const LlvmObject = @import("../codegen/llvm.zig").Object; - -usingnamespace @import("MachO/commands.zig"); +const LoadCommand = commands.LoadCommand; +const Module = @import("../Module.zig"); +const SegmentCommand = commands.SegmentCommand; +pub const TextBlock = @import("MachO/TextBlock.zig"); +const Trie = @import("MachO/Trie.zig"); pub const base_tag: File.Tag = File.Tag.macho; @@ -48,102 +53,131 @@ d_sym: ?DebugSymbols = null, /// For x86_64 that's 4KB, whereas for aarch64, that's 16KB. page_size: u16, -/// Mach-O header -header: ?macho.mach_header_64 = null, /// We commit 0x1000 = 4096 bytes of space to the header and /// the table of load commands. This should be plenty for any /// potential future extensions. header_pad: u16 = 0x1000, -/// Table of all load commands -load_commands: std.ArrayListUnmanaged(LoadCommand) = .{}, -/// __PAGEZERO segment -pagezero_segment_cmd_index: ?u16 = null, -/// __TEXT segment -text_segment_cmd_index: ?u16 = null, -/// __DATA_CONST segment -data_const_segment_cmd_index: ?u16 = null, -/// __DATA segment -data_segment_cmd_index: ?u16 = null, -/// __LINKEDIT segment -linkedit_segment_cmd_index: ?u16 = null, -/// Dyld info -dyld_info_cmd_index: ?u16 = null, -/// Symbol table -symtab_cmd_index: ?u16 = null, -/// Dynamic symbol table -dysymtab_cmd_index: ?u16 = null, -/// Path to dyld linker -dylinker_cmd_index: ?u16 = null, -/// Path to libSystem -libsystem_cmd_index: ?u16 = null, -/// Data-in-code section of __LINKEDIT segment -data_in_code_cmd_index: ?u16 = null, -/// Address to entry point function -function_starts_cmd_index: ?u16 = null, -/// Main/entry point -/// Specifies offset wrt __TEXT segment start address to the main entry point -/// of the binary. -main_cmd_index: ?u16 = null, -/// Minimum OS version -version_min_cmd_index: ?u16 = null, -/// Source version -source_version_cmd_index: ?u16 = null, -/// UUID load command -uuid_cmd_index: ?u16 = null, -/// Code signature -code_signature_cmd_index: ?u16 = null, - -/// Index into __TEXT,__text section. -text_section_index: ?u16 = null, -/// Index into __TEXT,__stubs section. -stubs_section_index: ?u16 = null, -/// Index into __TEXT,__stub_helper section. -stub_helper_section_index: ?u16 = null, -/// Index into __DATA_CONST,__got section. -got_section_index: ?u16 = null, -/// Index into __DATA,__la_symbol_ptr section. -la_symbol_ptr_section_index: ?u16 = null, -/// Index into __DATA,__data section. -data_section_index: ?u16 = null, /// The absolute address of the entry point. entry_addr: ?u64 = null, -/// Table of all local symbols -/// Internally references string table for names (which are optional). +objects: std.ArrayListUnmanaged(*Object) = .{}, +archives: std.ArrayListUnmanaged(*Archive) = .{}, +dylibs: std.ArrayListUnmanaged(*Dylib) = .{}, + +next_dylib_ordinal: u16 = 1, + +load_commands: std.ArrayListUnmanaged(LoadCommand) = .{}, + +pagezero_segment_cmd_index: ?u16 = null, +text_segment_cmd_index: ?u16 = null, +data_const_segment_cmd_index: ?u16 = null, +data_segment_cmd_index: ?u16 = null, +linkedit_segment_cmd_index: ?u16 = null, +dyld_info_cmd_index: ?u16 = null, +symtab_cmd_index: ?u16 = null, +dysymtab_cmd_index: ?u16 = null, +dylinker_cmd_index: ?u16 = null, +data_in_code_cmd_index: ?u16 = null, +function_starts_cmd_index: ?u16 = null, +main_cmd_index: ?u16 = null, +dylib_id_cmd_index: ?u16 = null, +version_min_cmd_index: ?u16 = null, +source_version_cmd_index: ?u16 = null, +uuid_cmd_index: ?u16 = null, +code_signature_cmd_index: ?u16 = null, +/// Path to libSystem +/// TODO this is obsolete, remove it. +libsystem_cmd_index: ?u16 = null, + +// __TEXT segment sections +text_section_index: ?u16 = null, +stubs_section_index: ?u16 = null, +stub_helper_section_index: ?u16 = null, +text_const_section_index: ?u16 = null, +cstring_section_index: ?u16 = null, +ustring_section_index: ?u16 = null, +gcc_except_tab_section_index: ?u16 = null, +unwind_info_section_index: ?u16 = null, +eh_frame_section_index: ?u16 = null, + +objc_methlist_section_index: ?u16 = null, +objc_methname_section_index: ?u16 = null, +objc_methtype_section_index: ?u16 = null, +objc_classname_section_index: ?u16 = null, + +// __DATA_CONST segment sections +got_section_index: ?u16 = null, +mod_init_func_section_index: ?u16 = null, +mod_term_func_section_index: ?u16 = null, +data_const_section_index: ?u16 = null, + +objc_cfstring_section_index: ?u16 = null, +objc_classlist_section_index: ?u16 = null, +objc_imageinfo_section_index: ?u16 = null, + +// __DATA segment sections +tlv_section_index: ?u16 = null, +tlv_data_section_index: ?u16 = null, +tlv_bss_section_index: ?u16 = null, +la_symbol_ptr_section_index: ?u16 = null, +data_section_index: ?u16 = null, +bss_section_index: ?u16 = null, +common_section_index: ?u16 = null, + +objc_const_section_index: ?u16 = null, +objc_selrefs_section_index: ?u16 = null, +objc_classrefs_section_index: ?u16 = null, +objc_data_section_index: ?u16 = null, + locals: std.ArrayListUnmanaged(macho.nlist_64) = .{}, -/// Table of all global symbols globals: std.ArrayListUnmanaged(macho.nlist_64) = .{}, -/// Table of all extern nonlazy symbols, indexed by name. -nonlazy_imports: std.StringArrayHashMapUnmanaged(Import) = .{}, -/// Table of all extern lazy symbols, indexed by name. -lazy_imports: std.StringArrayHashMapUnmanaged(Import) = .{}, +imports: std.ArrayListUnmanaged(macho.nlist_64) = .{}, +undefs: std.ArrayListUnmanaged(macho.nlist_64) = .{}, +tentatives: std.ArrayListUnmanaged(macho.nlist_64) = .{}, +symbol_resolver: std.AutoHashMapUnmanaged(u32, SymbolWithLoc) = .{}, locals_free_list: std.ArrayListUnmanaged(u32) = .{}, globals_free_list: std.ArrayListUnmanaged(u32) = .{}, -offset_table_free_list: std.ArrayListUnmanaged(u32) = .{}, stub_helper_stubs_start_off: ?u64 = null, -/// Table of symbol names aka the string table. -string_table: std.ArrayListUnmanaged(u8) = .{}, -string_table_directory: std.StringHashMapUnmanaged(u32) = .{}, +strtab: std.ArrayListUnmanaged(u8) = .{}, +strtab_dir: std.HashMapUnmanaged(u32, u32, StringIndexContext, std.hash_map.default_max_load_percentage) = .{}, -/// Table of GOT entries. -offset_table: std.ArrayListUnmanaged(GOTEntry) = .{}, +got_entries: std.ArrayListUnmanaged(GotIndirectionKey) = .{}, +got_entries_map: std.AutoHashMapUnmanaged(GotIndirectionKey, u32) = .{}, + +got_entries_free_list: std.ArrayListUnmanaged(u32) = .{}, + +stubs: std.ArrayListUnmanaged(u32) = .{}, +stubs_map: std.AutoHashMapUnmanaged(u32, u32) = .{}, error_flags: File.ErrorFlags = File.ErrorFlags{}, -offset_table_count_dirty: bool = false, -header_dirty: bool = false, +got_entries_count_dirty: bool = false, load_commands_dirty: bool = false, rebase_info_dirty: bool = false, binding_info_dirty: bool = false, lazy_binding_info_dirty: bool = false, export_info_dirty: bool = false, -string_table_dirty: bool = false, -string_table_needs_relocation: bool = false, +strtab_dirty: bool = false, +strtab_needs_relocation: bool = false, + +has_dices: bool = false, +has_stabs: bool = false, + +section_ordinals: std.ArrayListUnmanaged(MatchingSection) = .{}, +section_to_ordinal: std.AutoHashMapUnmanaged(MatchingSection, u8) = .{}, + +pending_updates: std.ArrayListUnmanaged(struct { + kind: enum { + got, + stub, + }, + index: u32, +}) = .{}, /// A list of text blocks that have surplus capacity. This list can have false /// positives, as functions grow and shrink over time, only sometimes being added @@ -165,47 +199,73 @@ text_block_free_list: std.ArrayListUnmanaged(*TextBlock) = .{}, /// Pointer to the last allocated text block last_text_block: ?*TextBlock = null, -/// A list of all PIE fixups required for this run of the linker. -/// Warning, this is currently NOT thread-safe. See the TODO below. -/// TODO Move this list inside `updateDecl` where it should be allocated -/// prior to calling `generateSymbol`, and then immediately deallocated -/// rather than sitting in the global scope. -/// TODO We should also rewrite this using generic relocations common to all -/// backends. -pie_fixups: std.ArrayListUnmanaged(PIEFixup) = .{}, +/// List of TextBlocks that are owned directly by the linker. +/// Currently these are only TextBlocks that are the result of linking +/// object files. TextBlock which take part in incremental linking are +/// at present owned by Module.Decl. +/// TODO consolidate this. +managed_blocks: std.ArrayListUnmanaged(*TextBlock) = .{}, -/// A list of all stub (extern decls) fixups required for this run of the linker. -/// Warning, this is currently NOT thread-safe. See the TODO below. -/// TODO Move this list inside `updateDecl` where it should be allocated -/// prior to calling `generateSymbol`, and then immediately deallocated -/// rather than sitting in the global scope. -stub_fixups: std.ArrayListUnmanaged(StubFixup) = .{}, +blocks: std.AutoHashMapUnmanaged(MatchingSection, *TextBlock) = .{}, -pub const GOTEntry = struct { - /// GOT entry can either be a local pointer or an extern (nonlazy) import. - kind: enum { - Local, - Extern, - }, +/// Table of Decls that are currently alive. +/// We store them here so that we can properly dispose of any allocated +/// memory within the TextBlock in the incremental linker. +/// TODO consolidate this. +decls: std.AutoArrayHashMapUnmanaged(*Module.Decl, void) = .{}, - /// Id to the macho.nlist_64 from the respective table: either locals or nonlazy imports. - /// TODO I'm more and more inclined to just manage a single, max two symbol tables - /// rather than 4 as we currently do, but I'll follow up in the future PR. - symbol: u32, +/// Currently active Module.Decl. +/// TODO this might not be necessary if we figure out how to pass Module.Decl instance +/// to codegen.genSetReg() or alterntively move PIE displacement for MCValue{ .memory = x } +/// somewhere else in the codegen. +active_decl: ?*Module.Decl = null, - /// Index of this entry in the GOT. - index: u32, +const StringIndexContext = struct { + strtab: *std.ArrayListUnmanaged(u8), + + pub fn eql(_: StringIndexContext, a: u32, b: u32) bool { + return a == b; + } + + pub fn hash(self: StringIndexContext, x: u32) u64 { + const x_slice = mem.spanZ(@ptrCast([*:0]const u8, self.strtab.items.ptr) + x); + return std.hash_map.hashString(x_slice); + } }; -pub const Import = struct { - /// MachO symbol table entry. - symbol: macho.nlist_64, +pub const StringSliceAdapter = struct { + strtab: *std.ArrayListUnmanaged(u8), - /// Id of the dynamic library where the specified entries can be found. - dylib_ordinal: i64, + pub fn eql(self: StringSliceAdapter, a_slice: []const u8, b: u32) bool { + const b_slice = mem.spanZ(@ptrCast([*:0]const u8, self.strtab.items.ptr) + b); + return mem.eql(u8, a_slice, b_slice); + } - /// Index of this import within the import list. - index: u32, + pub fn hash(self: StringSliceAdapter, adapted_key: []const u8) u64 { + _ = self; + return std.hash_map.hashString(adapted_key); + } +}; + +const SymbolWithLoc = struct { + // Table where the symbol can be found. + where: enum { + global, + import, + undef, + tentative, + }, + where_index: u32, + local_sym_index: u32 = 0, + file: u16 = 0, +}; + +pub const GotIndirectionKey = struct { + where: enum { + local, + import, + }, + where_index: u32, }; pub const PIEFixup = struct { @@ -219,19 +279,6 @@ pub const PIEFixup = struct { size: usize, }; -pub const StubFixup = struct { - /// Id of extern (lazy) symbol. - symbol: u32, - /// Signals whether the symbol has already been declared before. If so, - /// then there is no need to rewrite the stub entry and related. - already_defined: bool, - /// Where in the byte stream we should perform the fixup. - start: usize, - /// The length of the byte stream. For x86_64, this will be - /// variable. For aarch64, it will be fixed at 4 bytes. - len: usize, -}; - /// When allocating, the ideal_capacity is calculated by /// actual_capacity + (actual_capacity / ideal_factor) const ideal_factor = 2; @@ -254,75 +301,7 @@ const LIB_SYSTEM_PATH: [*:0]const u8 = DEFAULT_LIB_SEARCH_PATH ++ "/libSystem.B. /// it as a possible place to put new symbols, it must have enough room for this many bytes /// (plus extra for reserved capacity). const minimum_text_block_size = 64; -const min_text_capacity = padToIdeal(minimum_text_block_size); - -pub const TextBlock = struct { - /// Each decl always gets a local symbol with the fully qualified name. - /// The vaddr and size are found here directly. - /// The file offset is found by computing the vaddr offset from the section vaddr - /// the symbol references, and adding that to the file offset of the section. - /// If this field is 0, it means the codegen size = 0 and there is no symbol or - /// offset table entry. - local_sym_index: u32, - /// Index into offset table - /// This field is undefined for symbols with size = 0. - offset_table_index: u32, - /// Size of this text block - /// Unlike in Elf, we need to store the size of this symbol as part of - /// the TextBlock since macho.nlist_64 lacks this information. - size: u64, - /// Points to the previous and next neighbours - prev: ?*TextBlock, - next: ?*TextBlock, - - /// Previous/next linked list pointers. - /// This is the linked list node for this Decl's corresponding .debug_info tag. - dbg_info_prev: ?*TextBlock, - dbg_info_next: ?*TextBlock, - /// Offset into .debug_info pointing to the tag for this Decl. - dbg_info_off: u32, - /// Size of the .debug_info tag for this Decl, not including padding. - dbg_info_len: u32, - - pub const empty = TextBlock{ - .local_sym_index = 0, - .offset_table_index = undefined, - .size = 0, - .prev = null, - .next = null, - .dbg_info_prev = null, - .dbg_info_next = null, - .dbg_info_off = undefined, - .dbg_info_len = undefined, - }; - - /// Returns how much room there is to grow in virtual address space. - /// File offset relocation happens transparently, so it is not included in - /// this calculation. - fn capacity(self: TextBlock, macho_file: MachO) u64 { - const self_sym = macho_file.locals.items[self.local_sym_index]; - if (self.next) |next| { - const next_sym = macho_file.locals.items[next.local_sym_index]; - return next_sym.n_value - self_sym.n_value; - } else { - // We are the last block. - // The capacity is limited only by virtual address space. - return std.math.maxInt(u64) - self_sym.n_value; - } - } - - fn freeListEligible(self: TextBlock, macho_file: MachO) bool { - // No need to keep a free list node for the last block. - const next = self.next orelse return false; - const self_sym = macho_file.locals.items[self.local_sym_index]; - const next_sym = macho_file.locals.items[next.local_sym_index]; - const cap = next_sym.n_value - self_sym.n_value; - const ideal_cap = padToIdeal(self.size); - if (cap <= ideal_cap) return false; - const surplus = cap - ideal_cap; - return surplus >= min_text_capacity; - } -}; +pub const min_text_capacity = padToIdeal(minimum_text_block_size); pub const Export = struct { sym_index: ?u32 = null, @@ -374,6 +353,10 @@ pub fn openPath(allocator: *Allocator, sub_path: []const u8, options: link.Optio self.base.file = file; + if (options.output_mode == .Lib and options.link_mode == .Static) { + return self; + } + if (!options.strip and options.module != null) { // Create dSYM bundle. const dir = options.module.?.zig_cache_artifact_directory; @@ -409,12 +392,6 @@ pub fn openPath(allocator: *Allocator, sub_path: []const u8, options: link.Optio .n_value = 0, }); - switch (options.output_mode) { - .Exe => {}, - .Obj => {}, - .Lib => return error.TODOImplementWritingLibFiles, - } - try self.populateMissingMetadata(); try self.writeLocalSymbol(0); @@ -428,6 +405,7 @@ pub fn openPath(allocator: *Allocator, sub_path: []const u8, options: link.Optio pub fn createEmpty(gpa: *Allocator, options: link.Options) !*MachO { const self = try gpa.create(MachO); + self.* = .{ .base = .{ .tag = .macho, @@ -437,11 +415,22 @@ pub fn createEmpty(gpa: *Allocator, options: link.Options) !*MachO { }, .page_size = if (options.target.cpu.arch == .aarch64) 0x4000 else 0x1000, }; + return self; } pub fn flush(self: *MachO, comp: *Compilation) !void { - if (build_options.have_llvm and self.base.options.use_lld) { + if (self.base.options.output_mode == .Lib and self.base.options.link_mode == .Static) { + if (build_options.have_llvm) { + return self.base.linkAsArchive(comp); + } else { + log.err("TODO: non-LLVM archiver for MachO object files", .{}); + return error.TODOImplementWritingStaticLibFiles; + } + } + + const use_stage1 = build_options.is_stage1 and self.base.options.use_stage1; + if (use_stage1) { return self.linkWithZld(comp); } else { switch (self.base.options.effectiveOutputMode()) { @@ -471,9 +460,9 @@ pub fn flushModule(self: *MachO, comp: *Compilation) !void { self.load_commands_dirty = true; } try self.writeRebaseInfoTable(); - try self.writeBindingInfoTable(); - try self.writeLazyBindingInfoTable(); - try self.writeExportTrie(); + try self.writeBindInfoTable(); + try self.writeLazyBindInfoTable(); + try self.writeExportInfo(); try self.writeAllGlobalAndUndefSymbols(); try self.writeIndirectSymbolTable(); try self.writeStringTable(); @@ -508,15 +497,14 @@ pub fn flushModule(self: *MachO, comp: *Compilation) !void { self.error_flags.no_entry_point_found = false; } - assert(!self.offset_table_count_dirty); - assert(!self.header_dirty); + assert(!self.got_entries_count_dirty); assert(!self.load_commands_dirty); assert(!self.rebase_info_dirty); assert(!self.binding_info_dirty); assert(!self.lazy_binding_info_dirty); assert(!self.export_info_dirty); - assert(!self.string_table_dirty); - assert(!self.string_table_needs_relocation); + assert(!self.strtab_dirty); + assert(!self.strtab_needs_relocation); if (target.cpu.arch == .aarch64) { switch (output_mode) { @@ -639,7 +627,6 @@ fn linkWithZld(self: *MachO, comp: *Compilation) !void { const is_lib = self.base.options.output_mode == .Lib; const is_dyn_lib = self.base.options.link_mode == .Dynamic and is_lib; const is_exe_or_dyn_lib = is_dyn_lib or self.base.options.output_mode == .Exe; - const target = self.base.options.target; const stack_size = self.base.options.stack_size_override orelse 0; const allow_shlib_undefined = self.base.options.allow_shlib_undefined orelse !self.base.options.is_native_os; @@ -738,14 +725,6 @@ fn linkWithZld(self: *MachO, comp: *Compilation) !void { try fs.cwd().copyFile(the_object_path, fs.cwd(), full_out_path, .{}); } } else { - var zld = Zld.init(self.base.allocator); - defer { - zld.closeFiles(); - zld.deinit(); - } - zld.target = target; - zld.stack_size = stack_size; - // Positional arguments to the linker such as object files and static archives. var positionals = std.ArrayList([]const u8).init(arena); @@ -888,23 +867,6 @@ fn linkWithZld(self: *MachO, comp: *Compilation) !void { rpaths.appendAssumeCapacity(key.*); } - const output: Zld.Output = output: { - if (is_dyn_lib) { - const install_name = try std.fmt.allocPrint(arena, "@rpath/{s}", .{ - self.base.options.emit.?.sub_path, - }); - break :output .{ - .tag = .dylib, - .path = full_out_path, - .install_name = install_name, - }; - } - break :output .{ - .tag = .exe, - .path = full_out_path, - }; - }; - if (self.base.options.verbose_link) { var argv = std.ArrayList([]const u8).init(arena); @@ -918,8 +880,11 @@ fn linkWithZld(self: *MachO, comp: *Compilation) !void { if (is_dyn_lib) { try argv.append("-dylib"); + const install_name = try std.fmt.allocPrint(arena, "@rpath/{s}", .{ + self.base.options.emit.?.sub_path, + }); try argv.append("-install_name"); - try argv.append(output.install_name.?); + try argv.append(install_name); } if (self.base.options.sysroot) |syslibroot| { @@ -935,7 +900,7 @@ fn linkWithZld(self: *MachO, comp: *Compilation) !void { try argv.appendSlice(positionals.items); try argv.append("-o"); - try argv.append(output.path); + try argv.append(full_out_path); if (native_libsystem_available) { try argv.append("-lSystem"); @@ -953,11 +918,56 @@ fn linkWithZld(self: *MachO, comp: *Compilation) !void { Compilation.dump_argv(argv.items); } - try zld.link(positionals.items, output, .{ - .syslibroot = self.base.options.sysroot, - .libs = libs.items, - .rpaths = rpaths.items, + self.base.file = try fs.cwd().createFile(full_out_path, .{ + .truncate = true, + .read = true, + .mode = if (std.Target.current.os.tag == .windows) 0 else 0o777, }); + self.page_size = switch (self.base.options.target.cpu.arch) { + .aarch64 => 0x4000, + .x86_64 => 0x1000, + else => unreachable, + }; + + // Initialize section ordinals with null ordinal pointing at + // PAGEZERO segment. + try self.section_ordinals.append(self.base.allocator, .{ + .seg = 0, + .sect = 0, + }); + + try self.populateMetadata(); + try self.parseInputFiles(positionals.items, self.base.options.sysroot); + try self.parseLibs(libs.items, self.base.options.sysroot); + try self.resolveSymbols(); + try self.parseTextBlocks(); + + { + // Add dyld_stub_binder as the final GOT entry. + const n_strx = self.strtab_dir.getAdapted(@as([]const u8, "dyld_stub_binder"), StringSliceAdapter{ + .strtab = &self.strtab, + }) orelse unreachable; + const resolv = self.symbol_resolver.get(n_strx) orelse unreachable; + const got_index = @intCast(u32, self.got_entries.items.len); + const got_entry = GotIndirectionKey{ + .where = .import, + .where_index = resolv.where_index, + }; + try self.got_entries.append(self.base.allocator, got_entry); + try self.got_entries_map.putNoClobber(self.base.allocator, got_entry, got_index); + } + + try self.sortSections(); + try self.addRpaths(rpaths.items); + try self.addDataInCodeLC(); + try self.addCodeSignatureLC(); + try self.allocateTextSegment(); + try self.allocateDataConstSegment(); + try self.allocateDataSegment(); + self.allocateLinkeditSegment(); + try self.allocateTextBlocks(); + self.printSymtabAndTextBlock(); + try self.flushZld(); } if (!self.base.options.disable_lld_caching) { @@ -976,57 +986,2403 @@ fn linkWithZld(self: *MachO, comp: *Compilation) !void { } } -fn darwinArchString(arch: std.Target.Cpu.Arch) []const u8 { - return switch (arch) { - .aarch64, .aarch64_be, .aarch64_32 => "arm64", - .thumb, .arm => "arm", - .thumbeb, .armeb => "armeb", - .powerpc => "ppc", - .powerpc64 => "ppc64", - .powerpc64le => "ppc64le", - else => @tagName(arch), +fn parseInputFiles(self: *MachO, files: []const []const u8, syslibroot: ?[]const u8) !void { + const arch = self.base.options.target.cpu.arch; + for (files) |file_name| { + const full_path = full_path: { + var buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined; + const path = try std.fs.realpath(file_name, &buffer); + break :full_path try self.base.allocator.dupe(u8, path); + }; + + if (try Object.createAndParseFromPath(self.base.allocator, arch, full_path)) |object| { + try self.objects.append(self.base.allocator, object); + continue; + } + + if (try Archive.createAndParseFromPath(self.base.allocator, arch, full_path)) |archive| { + try self.archives.append(self.base.allocator, archive); + continue; + } + + if (try Dylib.createAndParseFromPath(self.base.allocator, arch, full_path, .{ + .syslibroot = syslibroot, + })) |dylibs| { + defer self.base.allocator.free(dylibs); + try self.dylibs.appendSlice(self.base.allocator, dylibs); + continue; + } + + log.warn("unknown filetype for positional input file: '{s}'", .{file_name}); + } +} + +fn parseLibs(self: *MachO, libs: []const []const u8, syslibroot: ?[]const u8) !void { + const arch = self.base.options.target.cpu.arch; + for (libs) |lib| { + if (try Dylib.createAndParseFromPath(self.base.allocator, arch, lib, .{ + .syslibroot = syslibroot, + })) |dylibs| { + defer self.base.allocator.free(dylibs); + try self.dylibs.appendSlice(self.base.allocator, dylibs); + continue; + } + + if (try Archive.createAndParseFromPath(self.base.allocator, arch, lib)) |archive| { + try self.archives.append(self.base.allocator, archive); + continue; + } + + log.warn("unknown filetype for a library: '{s}'", .{lib}); + } +} + +pub const MatchingSection = struct { + seg: u16, + sect: u16, +}; + +pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSection { + const text_seg = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; + const data_const_seg = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; + const data_seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; + const segname = commands.segmentName(sect); + const sectname = commands.sectionName(sect); + + const res: ?MatchingSection = blk: { + switch (commands.sectionType(sect)) { + macho.S_4BYTE_LITERALS, macho.S_8BYTE_LITERALS, macho.S_16BYTE_LITERALS => { + if (self.text_const_section_index == null) { + self.text_const_section_index = @intCast(u16, text_seg.sections.items.len); + try text_seg.addSection(self.base.allocator, "__const", .{}); + } + + break :blk .{ + .seg = self.text_segment_cmd_index.?, + .sect = self.text_const_section_index.?, + }; + }, + macho.S_CSTRING_LITERALS => { + if (mem.eql(u8, sectname, "__objc_methname")) { + // TODO it seems the common values within the sections in objects are deduplicated/merged + // on merging the sections' contents. + if (self.objc_methname_section_index == null) { + self.objc_methname_section_index = @intCast(u16, text_seg.sections.items.len); + try text_seg.addSection(self.base.allocator, "__objc_methname", .{ + .flags = macho.S_CSTRING_LITERALS, + }); + } + + break :blk .{ + .seg = self.text_segment_cmd_index.?, + .sect = self.objc_methname_section_index.?, + }; + } else if (mem.eql(u8, sectname, "__objc_methtype")) { + if (self.objc_methtype_section_index == null) { + self.objc_methtype_section_index = @intCast(u16, text_seg.sections.items.len); + try text_seg.addSection(self.base.allocator, "__objc_methtype", .{ + .flags = macho.S_CSTRING_LITERALS, + }); + } + + break :blk .{ + .seg = self.text_segment_cmd_index.?, + .sect = self.objc_methtype_section_index.?, + }; + } else if (mem.eql(u8, sectname, "__objc_classname")) { + if (self.objc_classname_section_index == null) { + self.objc_classname_section_index = @intCast(u16, text_seg.sections.items.len); + try text_seg.addSection(self.base.allocator, "__objc_classname", .{}); + } + + break :blk .{ + .seg = self.text_segment_cmd_index.?, + .sect = self.objc_classname_section_index.?, + }; + } + + if (self.cstring_section_index == null) { + self.cstring_section_index = @intCast(u16, text_seg.sections.items.len); + try text_seg.addSection(self.base.allocator, "__cstring", .{ + .flags = macho.S_CSTRING_LITERALS, + }); + } + + break :blk .{ + .seg = self.text_segment_cmd_index.?, + .sect = self.cstring_section_index.?, + }; + }, + macho.S_LITERAL_POINTERS => { + if (mem.eql(u8, segname, "__DATA") and mem.eql(u8, sectname, "__objc_selrefs")) { + if (self.objc_selrefs_section_index == null) { + self.objc_selrefs_section_index = @intCast(u16, data_seg.sections.items.len); + try data_seg.addSection(self.base.allocator, "__objc_selrefs", .{ + .flags = macho.S_LITERAL_POINTERS, + }); + } + + break :blk .{ + .seg = self.data_segment_cmd_index.?, + .sect = self.objc_selrefs_section_index.?, + }; + } + + // TODO investigate + break :blk null; + }, + macho.S_MOD_INIT_FUNC_POINTERS => { + if (self.mod_init_func_section_index == null) { + self.mod_init_func_section_index = @intCast(u16, data_const_seg.sections.items.len); + try data_const_seg.addSection(self.base.allocator, "__mod_init_func", .{ + .flags = macho.S_MOD_INIT_FUNC_POINTERS, + }); + } + + break :blk .{ + .seg = self.data_const_segment_cmd_index.?, + .sect = self.mod_init_func_section_index.?, + }; + }, + macho.S_MOD_TERM_FUNC_POINTERS => { + if (self.mod_term_func_section_index == null) { + self.mod_term_func_section_index = @intCast(u16, data_const_seg.sections.items.len); + try data_const_seg.addSection(self.base.allocator, "__mod_term_func", .{ + .flags = macho.S_MOD_TERM_FUNC_POINTERS, + }); + } + + break :blk .{ + .seg = self.data_const_segment_cmd_index.?, + .sect = self.mod_term_func_section_index.?, + }; + }, + macho.S_ZEROFILL => { + if (mem.eql(u8, sectname, "__common")) { + if (self.common_section_index == null) { + self.common_section_index = @intCast(u16, data_seg.sections.items.len); + try data_seg.addSection(self.base.allocator, "__common", .{ + .flags = macho.S_ZEROFILL, + }); + } + + break :blk .{ + .seg = self.data_segment_cmd_index.?, + .sect = self.common_section_index.?, + }; + } else { + if (self.bss_section_index == null) { + self.bss_section_index = @intCast(u16, data_seg.sections.items.len); + try data_seg.addSection(self.base.allocator, "__bss", .{ + .flags = macho.S_ZEROFILL, + }); + } + + break :blk .{ + .seg = self.data_segment_cmd_index.?, + .sect = self.bss_section_index.?, + }; + } + }, + macho.S_THREAD_LOCAL_VARIABLES => { + if (self.tlv_section_index == null) { + self.tlv_section_index = @intCast(u16, data_seg.sections.items.len); + try data_seg.addSection(self.base.allocator, "__thread_vars", .{ + .flags = macho.S_THREAD_LOCAL_VARIABLES, + }); + } + + break :blk .{ + .seg = self.data_segment_cmd_index.?, + .sect = self.tlv_section_index.?, + }; + }, + macho.S_THREAD_LOCAL_REGULAR => { + if (self.tlv_data_section_index == null) { + self.tlv_data_section_index = @intCast(u16, data_seg.sections.items.len); + try data_seg.addSection(self.base.allocator, "__thread_data", .{ + .flags = macho.S_THREAD_LOCAL_REGULAR, + }); + } + + break :blk .{ + .seg = self.data_segment_cmd_index.?, + .sect = self.tlv_data_section_index.?, + }; + }, + macho.S_THREAD_LOCAL_ZEROFILL => { + if (self.tlv_bss_section_index == null) { + self.tlv_bss_section_index = @intCast(u16, data_seg.sections.items.len); + try data_seg.addSection(self.base.allocator, "__thread_bss", .{ + .flags = macho.S_THREAD_LOCAL_ZEROFILL, + }); + } + + break :blk .{ + .seg = self.data_segment_cmd_index.?, + .sect = self.tlv_bss_section_index.?, + }; + }, + macho.S_COALESCED => { + if (mem.eql(u8, "__TEXT", segname) and mem.eql(u8, "__eh_frame", sectname)) { + // TODO I believe __eh_frame is currently part of __unwind_info section + // in the latest ld64 output. + if (self.eh_frame_section_index == null) { + self.eh_frame_section_index = @intCast(u16, text_seg.sections.items.len); + try text_seg.addSection(self.base.allocator, "__eh_frame", .{}); + } + + break :blk .{ + .seg = self.text_segment_cmd_index.?, + .sect = self.eh_frame_section_index.?, + }; + } + + // TODO audit this: is this the right mapping? + if (self.data_const_section_index == null) { + self.data_const_section_index = @intCast(u16, data_const_seg.sections.items.len); + try data_const_seg.addSection(self.base.allocator, "__const", .{}); + } + + break :blk .{ + .seg = self.data_const_segment_cmd_index.?, + .sect = self.data_const_section_index.?, + }; + }, + macho.S_REGULAR => { + if (commands.sectionIsCode(sect)) { + if (self.text_section_index == null) { + self.text_section_index = @intCast(u16, text_seg.sections.items.len); + try text_seg.addSection(self.base.allocator, "__text", .{ + .flags = macho.S_REGULAR | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS, + }); + } + + break :blk .{ + .seg = self.text_segment_cmd_index.?, + .sect = self.text_section_index.?, + }; + } + if (commands.sectionIsDebug(sect)) { + // TODO debug attributes + if (mem.eql(u8, "__LD", segname) and mem.eql(u8, "__compact_unwind", sectname)) { + log.debug("TODO compact unwind section: type 0x{x}, name '{s},{s}'", .{ + sect.flags, segname, sectname, + }); + } + break :blk null; + } + + if (mem.eql(u8, segname, "__TEXT")) { + if (mem.eql(u8, sectname, "__ustring")) { + if (self.ustring_section_index == null) { + self.ustring_section_index = @intCast(u16, text_seg.sections.items.len); + try text_seg.addSection(self.base.allocator, "__ustring", .{}); + } + + break :blk .{ + .seg = self.text_segment_cmd_index.?, + .sect = self.ustring_section_index.?, + }; + } else if (mem.eql(u8, sectname, "__gcc_except_tab")) { + if (self.gcc_except_tab_section_index == null) { + self.gcc_except_tab_section_index = @intCast(u16, text_seg.sections.items.len); + try text_seg.addSection(self.base.allocator, "__gcc_except_tab", .{}); + } + + break :blk .{ + .seg = self.text_segment_cmd_index.?, + .sect = self.gcc_except_tab_section_index.?, + }; + } else if (mem.eql(u8, sectname, "__objc_methlist")) { + if (self.objc_methlist_section_index == null) { + self.objc_methlist_section_index = @intCast(u16, text_seg.sections.items.len); + try text_seg.addSection(self.base.allocator, "__objc_methlist", .{}); + } + + break :blk .{ + .seg = self.text_segment_cmd_index.?, + .sect = self.objc_methlist_section_index.?, + }; + } else if (mem.eql(u8, sectname, "__rodata") or + mem.eql(u8, sectname, "__typelink") or + mem.eql(u8, sectname, "__itablink") or + mem.eql(u8, sectname, "__gosymtab") or + mem.eql(u8, sectname, "__gopclntab")) + { + if (self.data_const_section_index == null) { + self.data_const_section_index = @intCast(u16, data_const_seg.sections.items.len); + try data_const_seg.addSection(self.base.allocator, "__const", .{}); + } + + break :blk .{ + .seg = self.data_const_segment_cmd_index.?, + .sect = self.data_const_section_index.?, + }; + } else { + if (self.text_const_section_index == null) { + self.text_const_section_index = @intCast(u16, text_seg.sections.items.len); + try text_seg.addSection(self.base.allocator, "__const", .{}); + } + + break :blk .{ + .seg = self.text_segment_cmd_index.?, + .sect = self.text_const_section_index.?, + }; + } + } + + if (mem.eql(u8, segname, "__DATA_CONST")) { + if (self.data_const_section_index == null) { + self.data_const_section_index = @intCast(u16, data_const_seg.sections.items.len); + try data_const_seg.addSection(self.base.allocator, "__const", .{}); + } + + break :blk .{ + .seg = self.data_const_segment_cmd_index.?, + .sect = self.data_const_section_index.?, + }; + } + + if (mem.eql(u8, segname, "__DATA")) { + if (mem.eql(u8, sectname, "__const")) { + if (self.data_const_section_index == null) { + self.data_const_section_index = @intCast(u16, data_const_seg.sections.items.len); + try data_const_seg.addSection(self.base.allocator, "__const", .{}); + } + + break :blk .{ + .seg = self.data_const_segment_cmd_index.?, + .sect = self.data_const_section_index.?, + }; + } else if (mem.eql(u8, sectname, "__cfstring")) { + if (self.objc_cfstring_section_index == null) { + self.objc_cfstring_section_index = @intCast(u16, data_const_seg.sections.items.len); + try data_const_seg.addSection(self.base.allocator, "__cfstring", .{}); + } + + break :blk .{ + .seg = self.data_const_segment_cmd_index.?, + .sect = self.objc_cfstring_section_index.?, + }; + } else if (mem.eql(u8, sectname, "__objc_classlist")) { + if (self.objc_classlist_section_index == null) { + self.objc_classlist_section_index = @intCast(u16, data_const_seg.sections.items.len); + try data_const_seg.addSection(self.base.allocator, "__objc_classlist", .{}); + } + + break :blk .{ + .seg = self.data_const_segment_cmd_index.?, + .sect = self.objc_classlist_section_index.?, + }; + } else if (mem.eql(u8, sectname, "__objc_imageinfo")) { + if (self.objc_imageinfo_section_index == null) { + self.objc_imageinfo_section_index = @intCast(u16, data_const_seg.sections.items.len); + try data_const_seg.addSection(self.base.allocator, "__objc_imageinfo", .{}); + } + + break :blk .{ + .seg = self.data_const_segment_cmd_index.?, + .sect = self.objc_imageinfo_section_index.?, + }; + } else if (mem.eql(u8, sectname, "__objc_const")) { + if (self.objc_const_section_index == null) { + self.objc_const_section_index = @intCast(u16, data_seg.sections.items.len); + try data_seg.addSection(self.base.allocator, "__objc_const", .{}); + } + + break :blk .{ + .seg = self.data_segment_cmd_index.?, + .sect = self.objc_const_section_index.?, + }; + } else if (mem.eql(u8, sectname, "__objc_classrefs")) { + if (self.objc_classrefs_section_index == null) { + self.objc_classrefs_section_index = @intCast(u16, data_seg.sections.items.len); + try data_seg.addSection(self.base.allocator, "__objc_classrefs", .{}); + } + + break :blk .{ + .seg = self.data_segment_cmd_index.?, + .sect = self.objc_classrefs_section_index.?, + }; + } else if (mem.eql(u8, sectname, "__objc_data")) { + if (self.objc_data_section_index == null) { + self.objc_data_section_index = @intCast(u16, data_seg.sections.items.len); + try data_seg.addSection(self.base.allocator, "__objc_data", .{}); + } + + break :blk .{ + .seg = self.data_segment_cmd_index.?, + .sect = self.objc_data_section_index.?, + }; + } else { + if (self.data_section_index == null) { + self.data_section_index = @intCast(u16, data_seg.sections.items.len); + try data_seg.addSection(self.base.allocator, "__data", .{}); + } + + break :blk .{ + .seg = self.data_segment_cmd_index.?, + .sect = self.data_section_index.?, + }; + } + } + + if (mem.eql(u8, "__LLVM", segname) and mem.eql(u8, "__asm", sectname)) { + log.debug("TODO LLVM asm section: type 0x{x}, name '{s},{s}'", .{ + sect.flags, segname, sectname, + }); + } + + break :blk null; + }, + else => break :blk null, + } }; + + if (res) |match| { + try self.createSectionOrdinal(match); + } + + return res; +} + +fn sortSections(self: *MachO) !void { + var text_index_mapping = std.AutoHashMap(u16, u16).init(self.base.allocator); + defer text_index_mapping.deinit(); + var data_const_index_mapping = std.AutoHashMap(u16, u16).init(self.base.allocator); + defer data_const_index_mapping.deinit(); + var data_index_mapping = std.AutoHashMap(u16, u16).init(self.base.allocator); + defer data_index_mapping.deinit(); + + { + // __TEXT segment + const seg = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; + var sections = seg.sections.toOwnedSlice(self.base.allocator); + defer self.base.allocator.free(sections); + try seg.sections.ensureCapacity(self.base.allocator, sections.len); + + const indices = &[_]*?u16{ + &self.text_section_index, + &self.stubs_section_index, + &self.stub_helper_section_index, + &self.gcc_except_tab_section_index, + &self.cstring_section_index, + &self.ustring_section_index, + &self.text_const_section_index, + &self.objc_methname_section_index, + &self.objc_methtype_section_index, + &self.objc_classname_section_index, + &self.eh_frame_section_index, + }; + for (indices) |maybe_index| { + const new_index: u16 = if (maybe_index.*) |index| blk: { + const idx = @intCast(u16, seg.sections.items.len); + seg.sections.appendAssumeCapacity(sections[index]); + try text_index_mapping.putNoClobber(index, idx); + break :blk idx; + } else continue; + maybe_index.* = new_index; + } + } + + { + // __DATA_CONST segment + const seg = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; + var sections = seg.sections.toOwnedSlice(self.base.allocator); + defer self.base.allocator.free(sections); + try seg.sections.ensureCapacity(self.base.allocator, sections.len); + + const indices = &[_]*?u16{ + &self.got_section_index, + &self.mod_init_func_section_index, + &self.mod_term_func_section_index, + &self.data_const_section_index, + &self.objc_cfstring_section_index, + &self.objc_classlist_section_index, + &self.objc_imageinfo_section_index, + }; + for (indices) |maybe_index| { + const new_index: u16 = if (maybe_index.*) |index| blk: { + const idx = @intCast(u16, seg.sections.items.len); + seg.sections.appendAssumeCapacity(sections[index]); + try data_const_index_mapping.putNoClobber(index, idx); + break :blk idx; + } else continue; + maybe_index.* = new_index; + } + } + + { + // __DATA segment + const seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; + var sections = seg.sections.toOwnedSlice(self.base.allocator); + defer self.base.allocator.free(sections); + try seg.sections.ensureCapacity(self.base.allocator, sections.len); + + // __DATA segment + const indices = &[_]*?u16{ + &self.la_symbol_ptr_section_index, + &self.objc_const_section_index, + &self.objc_selrefs_section_index, + &self.objc_classrefs_section_index, + &self.objc_data_section_index, + &self.data_section_index, + &self.tlv_section_index, + &self.tlv_data_section_index, + &self.tlv_bss_section_index, + &self.bss_section_index, + &self.common_section_index, + }; + for (indices) |maybe_index| { + const new_index: u16 = if (maybe_index.*) |index| blk: { + const idx = @intCast(u16, seg.sections.items.len); + seg.sections.appendAssumeCapacity(sections[index]); + try data_index_mapping.putNoClobber(index, idx); + break :blk idx; + } else continue; + maybe_index.* = new_index; + } + } + + { + var transient: std.AutoHashMapUnmanaged(MatchingSection, *TextBlock) = .{}; + try transient.ensureCapacity(self.base.allocator, self.blocks.count()); + + var it = self.blocks.iterator(); + while (it.next()) |entry| { + const old = entry.key_ptr.*; + const sect = if (old.seg == self.text_segment_cmd_index.?) + text_index_mapping.get(old.sect).? + else if (old.seg == self.data_const_segment_cmd_index.?) + data_const_index_mapping.get(old.sect).? + else + data_index_mapping.get(old.sect).?; + transient.putAssumeCapacityNoClobber(.{ + .seg = old.seg, + .sect = sect, + }, entry.value_ptr.*); + } + + self.blocks.clearAndFree(self.base.allocator); + self.blocks.deinit(self.base.allocator); + self.blocks = transient; + } + + { + // Create new section ordinals. + self.section_ordinals.clearRetainingCapacity(); + self.section_to_ordinal.clearRetainingCapacity(); + // First ordinal is always null + self.section_ordinals.appendAssumeCapacity(.{ + .seg = 0, + .sect = 0, + }); + const text_seg = self.load_commands.items[self.text_segment_cmd_index.?].Segment; + for (text_seg.sections.items) |_, sect_id| { + try self.createSectionOrdinal(.{ + .seg = self.text_segment_cmd_index.?, + .sect = @intCast(u16, sect_id), + }); + } + const data_const_seg = self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; + for (data_const_seg.sections.items) |_, sect_id| { + try self.createSectionOrdinal(.{ + .seg = self.data_const_segment_cmd_index.?, + .sect = @intCast(u16, sect_id), + }); + } + const data_seg = self.load_commands.items[self.data_segment_cmd_index.?].Segment; + for (data_seg.sections.items) |_, sect_id| { + try self.createSectionOrdinal(.{ + .seg = self.data_segment_cmd_index.?, + .sect = @intCast(u16, sect_id), + }); + } + } +} + +fn allocateTextSegment(self: *MachO) !void { + const seg = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; + const nstubs = @intCast(u32, self.stubs.items.len); + + const base_vmaddr = self.load_commands.items[self.pagezero_segment_cmd_index.?].Segment.inner.vmsize; + seg.inner.fileoff = 0; + seg.inner.vmaddr = base_vmaddr; + + // Set stubs and stub_helper sizes + const stubs = &seg.sections.items[self.stubs_section_index.?]; + const stub_helper = &seg.sections.items[self.stub_helper_section_index.?]; + stubs.size += nstubs * stubs.reserved2; + + const stub_size: u4 = switch (self.base.options.target.cpu.arch) { + .x86_64 => 10, + .aarch64 => 3 * @sizeOf(u32), + else => unreachable, + }; + stub_helper.size += nstubs * stub_size; + + var sizeofcmds: u64 = 0; + for (self.load_commands.items) |lc| { + sizeofcmds += lc.cmdsize(); + } + + try self.allocateSegment(self.text_segment_cmd_index.?, @sizeOf(macho.mach_header_64) + sizeofcmds); + + // Shift all sections to the back to minimize jump size between __TEXT and __DATA segments. + var min_alignment: u32 = 0; + for (seg.sections.items) |sect| { + const alignment = try math.powi(u32, 2, sect.@"align"); + min_alignment = math.max(min_alignment, alignment); + } + + assert(min_alignment > 0); + const last_sect_idx = seg.sections.items.len - 1; + const last_sect = seg.sections.items[last_sect_idx]; + const shift: u32 = blk: { + const diff = seg.inner.filesize - last_sect.offset - last_sect.size; + const factor = @divTrunc(diff, min_alignment); + break :blk @intCast(u32, factor * min_alignment); + }; + + if (shift > 0) { + for (seg.sections.items) |*sect| { + sect.offset += shift; + sect.addr += shift; + } + } +} + +fn allocateDataConstSegment(self: *MachO) !void { + const seg = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; + const nentries = @intCast(u32, self.got_entries.items.len); + + const text_seg = self.load_commands.items[self.text_segment_cmd_index.?].Segment; + seg.inner.fileoff = text_seg.inner.fileoff + text_seg.inner.filesize; + seg.inner.vmaddr = text_seg.inner.vmaddr + text_seg.inner.vmsize; + + // Set got size + const got = &seg.sections.items[self.got_section_index.?]; + got.size += nentries * @sizeOf(u64); + + try self.allocateSegment(self.data_const_segment_cmd_index.?, 0); +} + +fn allocateDataSegment(self: *MachO) !void { + const seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; + const nstubs = @intCast(u32, self.stubs.items.len); + + const data_const_seg = self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; + seg.inner.fileoff = data_const_seg.inner.fileoff + data_const_seg.inner.filesize; + seg.inner.vmaddr = data_const_seg.inner.vmaddr + data_const_seg.inner.vmsize; + + // Set la_symbol_ptr and data size + const la_symbol_ptr = &seg.sections.items[self.la_symbol_ptr_section_index.?]; + const data = &seg.sections.items[self.data_section_index.?]; + la_symbol_ptr.size += nstubs * @sizeOf(u64); + data.size += @sizeOf(u64); // We need at least 8bytes for address of dyld_stub_binder + + try self.allocateSegment(self.data_segment_cmd_index.?, 0); +} + +fn allocateLinkeditSegment(self: *MachO) void { + const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; + const data_seg = self.load_commands.items[self.data_segment_cmd_index.?].Segment; + seg.inner.fileoff = data_seg.inner.fileoff + data_seg.inner.filesize; + seg.inner.vmaddr = data_seg.inner.vmaddr + data_seg.inner.vmsize; +} + +fn allocateSegment(self: *MachO, index: u16, offset: u64) !void { + const seg = &self.load_commands.items[index].Segment; + + // Allocate the sections according to their alignment at the beginning of the segment. + var start: u64 = offset; + for (seg.sections.items) |*sect| { + const alignment = try math.powi(u32, 2, sect.@"align"); + const start_aligned = mem.alignForwardGeneric(u64, start, alignment); + const end_aligned = mem.alignForwardGeneric(u64, start_aligned + sect.size, alignment); + sect.offset = @intCast(u32, seg.inner.fileoff + start_aligned); + sect.addr = seg.inner.vmaddr + start_aligned; + start = end_aligned; + } + + const seg_size_aligned = mem.alignForwardGeneric(u64, start, self.page_size); + seg.inner.filesize = seg_size_aligned; + seg.inner.vmsize = seg_size_aligned; +} + +fn allocateTextBlocks(self: *MachO) !void { + var it = self.blocks.iterator(); + while (it.next()) |entry| { + const match = entry.key_ptr.*; + var block: *TextBlock = entry.value_ptr.*; + + // Find the first block + while (block.prev) |prev| { + block = prev; + } + + const seg = self.load_commands.items[match.seg].Segment; + const sect = seg.sections.items[match.sect]; + + var base_addr: u64 = sect.addr; + const n_sect = self.section_to_ordinal.get(match) orelse unreachable; + + log.debug(" within section {s},{s}", .{ commands.segmentName(sect), commands.sectionName(sect) }); + log.debug(" {}", .{sect}); + + while (true) { + const block_alignment = try math.powi(u32, 2, block.alignment); + base_addr = mem.alignForwardGeneric(u64, base_addr, block_alignment); + + const sym = &self.locals.items[block.local_sym_index]; + sym.n_value = base_addr; + sym.n_sect = n_sect; + + log.debug(" {s}: start=0x{x}, end=0x{x}, size={}, align={}", .{ + self.getString(sym.n_strx), + base_addr, + base_addr + block.size, + block.size, + block.alignment, + }); + + // Update each alias (if any) + for (block.aliases.items) |index| { + const alias_sym = &self.locals.items[index]; + alias_sym.n_value = base_addr; + alias_sym.n_sect = n_sect; + } + + // Update each symbol contained within the TextBlock + for (block.contained.items) |sym_at_off| { + const contained_sym = &self.locals.items[sym_at_off.local_sym_index]; + contained_sym.n_value = base_addr + sym_at_off.offset; + contained_sym.n_sect = n_sect; + } + + base_addr += block.size; + + if (block.next) |next| { + block = next; + } else break; + } + } + + // Update globals + { + var sym_it = self.symbol_resolver.valueIterator(); + while (sym_it.next()) |resolv| { + if (resolv.where != .global) continue; + + assert(resolv.local_sym_index != 0); + const local_sym = self.locals.items[resolv.local_sym_index]; + const sym = &self.globals.items[resolv.where_index]; + sym.n_value = local_sym.n_value; + sym.n_sect = local_sym.n_sect; + } + } +} + +fn writeTextBlocks(self: *MachO) !void { + var it = self.blocks.iterator(); + while (it.next()) |entry| { + const match = entry.key_ptr.*; + var block: *TextBlock = entry.value_ptr.*; + + while (block.prev) |prev| { + block = prev; + } + + const seg = self.load_commands.items[match.seg].Segment; + const sect = seg.sections.items[match.sect]; + const sect_type = commands.sectionType(sect); + + log.debug(" for section {s},{s}", .{ commands.segmentName(sect), commands.sectionName(sect) }); + log.debug(" {}", .{sect}); + + var code = try self.base.allocator.alloc(u8, sect.size); + defer self.base.allocator.free(code); + + if (sect_type == macho.S_ZEROFILL or sect_type == macho.S_THREAD_LOCAL_ZEROFILL) { + mem.set(u8, code, 0); + } else { + var base_off: u64 = 0; + + while (true) { + const block_alignment = try math.powi(u32, 2, block.alignment); + const aligned_base_off = mem.alignForwardGeneric(u64, base_off, block_alignment); + + const sym = self.locals.items[block.local_sym_index]; + log.debug(" {s}: start=0x{x}, end=0x{x}, size={}, align={}", .{ + self.getString(sym.n_strx), + aligned_base_off, + aligned_base_off + block.size, + block.size, + block.alignment, + }); + + try block.resolveRelocs(self); + mem.copy(u8, code[aligned_base_off..][0..block.size], block.code.items); + + // TODO NOP for machine code instead of just zeroing out + const padding_len = aligned_base_off - base_off; + mem.set(u8, code[base_off..][0..padding_len], 0); + + base_off = aligned_base_off + block.size; + + if (block.next) |next| { + block = next; + } else break; + } + + mem.set(u8, code[base_off..], 0); + } + + try self.base.file.?.pwriteAll(code, sect.offset); + } +} + +fn writeStubHelperCommon(self: *MachO) !void { + const text_segment = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; + const stub_helper = &text_segment.sections.items[self.stub_helper_section_index.?]; + const data_const_segment = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; + const got = &data_const_segment.sections.items[self.got_section_index.?]; + const data_segment = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; + const data = &data_segment.sections.items[self.data_section_index.?]; + + self.stub_helper_stubs_start_off = blk: { + switch (self.base.options.target.cpu.arch) { + .x86_64 => { + const code_size = 15; + var code: [code_size]u8 = undefined; + // lea %r11, [rip + disp] + code[0] = 0x4c; + code[1] = 0x8d; + code[2] = 0x1d; + { + const target_addr = data.addr + data.size - @sizeOf(u64); + const displacement = try math.cast(u32, target_addr - stub_helper.addr - 7); + mem.writeIntLittle(u32, code[3..7], displacement); + } + // push %r11 + code[7] = 0x41; + code[8] = 0x53; + // jmp [rip + disp] + code[9] = 0xff; + code[10] = 0x25; + { + const n_strx = self.strtab_dir.getAdapted(@as([]const u8, "dyld_stub_binder"), StringSliceAdapter{ + .strtab = &self.strtab, + }) orelse unreachable; + const resolv = self.symbol_resolver.get(n_strx) orelse unreachable; + const got_index = self.got_entries_map.get(.{ + .where = .import, + .where_index = resolv.where_index, + }) orelse unreachable; + const addr = got.addr + got_index * @sizeOf(u64); + const displacement = try math.cast(u32, addr - stub_helper.addr - code_size); + mem.writeIntLittle(u32, code[11..], displacement); + } + try self.base.file.?.pwriteAll(&code, stub_helper.offset); + break :blk stub_helper.offset + code_size; + }, + .aarch64 => { + var code: [6 * @sizeOf(u32)]u8 = undefined; + data_blk_outer: { + const this_addr = stub_helper.addr; + const target_addr = data.addr + data.size - @sizeOf(u64); + data_blk: { + const displacement = math.cast(i21, target_addr - this_addr) catch break :data_blk; + // adr x17, disp + mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.adr(.x17, displacement).toU32()); + // nop + mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.nop().toU32()); + break :data_blk_outer; + } + data_blk: { + const new_this_addr = this_addr + @sizeOf(u32); + const displacement = math.cast(i21, target_addr - new_this_addr) catch break :data_blk; + // nop + mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.nop().toU32()); + // adr x17, disp + mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.adr(.x17, displacement).toU32()); + break :data_blk_outer; + } + // Jump is too big, replace adr with adrp and add. + const this_page = @intCast(i32, this_addr >> 12); + const target_page = @intCast(i32, target_addr >> 12); + const pages = @intCast(i21, target_page - this_page); + mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.adrp(.x17, pages).toU32()); + const narrowed = @truncate(u12, target_addr); + mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.add(.x17, .x17, narrowed, false).toU32()); + } + // stp x16, x17, [sp, #-16]! + code[8] = 0xf0; + code[9] = 0x47; + code[10] = 0xbf; + code[11] = 0xa9; + binder_blk_outer: { + const n_strx = self.strtab_dir.getAdapted(@as([]const u8, "dyld_stub_binder"), StringSliceAdapter{ + .strtab = &self.strtab, + }) orelse unreachable; + const resolv = self.symbol_resolver.get(n_strx) orelse unreachable; + const got_index = self.got_entries_map.get(.{ + .where = .import, + .where_index = resolv.where_index, + }) orelse unreachable; + const this_addr = stub_helper.addr + 3 * @sizeOf(u32); + const target_addr = got.addr + got_index * @sizeOf(u64); + binder_blk: { + const displacement = math.divExact(u64, target_addr - this_addr, 4) catch break :binder_blk; + const literal = math.cast(u18, displacement) catch break :binder_blk; + // ldr x16, label + mem.writeIntLittle(u32, code[12..16], aarch64.Instruction.ldr(.x16, .{ + .literal = literal, + }).toU32()); + // nop + mem.writeIntLittle(u32, code[16..20], aarch64.Instruction.nop().toU32()); + break :binder_blk_outer; + } + binder_blk: { + const new_this_addr = this_addr + @sizeOf(u32); + const displacement = math.divExact(u64, target_addr - new_this_addr, 4) catch break :binder_blk; + const literal = math.cast(u18, displacement) catch break :binder_blk; + // Pad with nop to please division. + // nop + mem.writeIntLittle(u32, code[12..16], aarch64.Instruction.nop().toU32()); + // ldr x16, label + mem.writeIntLittle(u32, code[16..20], aarch64.Instruction.ldr(.x16, .{ + .literal = literal, + }).toU32()); + break :binder_blk_outer; + } + // Use adrp followed by ldr(immediate). + const this_page = @intCast(i32, this_addr >> 12); + const target_page = @intCast(i32, target_addr >> 12); + const pages = @intCast(i21, target_page - this_page); + mem.writeIntLittle(u32, code[12..16], aarch64.Instruction.adrp(.x16, pages).toU32()); + const narrowed = @truncate(u12, target_addr); + const offset = try math.divExact(u12, narrowed, 8); + mem.writeIntLittle(u32, code[16..20], aarch64.Instruction.ldr(.x16, .{ + .register = .{ + .rn = .x16, + .offset = aarch64.Instruction.LoadStoreOffset.imm(offset), + }, + }).toU32()); + } + // br x16 + code[20] = 0x00; + code[21] = 0x02; + code[22] = 0x1f; + code[23] = 0xd6; + try self.base.file.?.pwriteAll(&code, stub_helper.offset); + break :blk stub_helper.offset + 6 * @sizeOf(u32); + }, + else => unreachable, + } + }; + + for (self.stubs.items) |_, i| { + const index = @intCast(u32, i); + // TODO weak bound pointers + try self.writeLazySymbolPointer(index); + try self.writeStub(index); + try self.writeStubInStubHelper(index); + } +} + +fn resolveSymbolsInObject(self: *MachO, object_id: u16) !void { + const object = self.objects.items[object_id]; + + log.debug("resolving symbols in '{s}'", .{object.name}); + + for (object.symtab.items) |sym, id| { + const sym_id = @intCast(u32, id); + const sym_name = object.getString(sym.n_strx); + + if (symbolIsStab(sym)) { + log.err("unhandled symbol type: stab", .{}); + log.err(" symbol '{s}'", .{sym_name}); + log.err(" first definition in '{s}'", .{object.name.?}); + return error.UnhandledSymbolType; + } + + if (symbolIsIndr(sym)) { + log.err("unhandled symbol type: indirect", .{}); + log.err(" symbol '{s}'", .{sym_name}); + log.err(" first definition in '{s}'", .{object.name.?}); + return error.UnhandledSymbolType; + } + + if (symbolIsAbs(sym)) { + log.err("unhandled symbol type: absolute", .{}); + log.err(" symbol '{s}'", .{sym_name}); + log.err(" first definition in '{s}'", .{object.name.?}); + return error.UnhandledSymbolType; + } + + const n_strx = try self.makeString(sym_name); + if (symbolIsSect(sym)) { + // Defined symbol regardless of scope lands in the locals symbol table. + const local_sym_index = @intCast(u32, self.locals.items.len); + try self.locals.append(self.base.allocator, .{ + .n_strx = n_strx, + .n_type = macho.N_SECT, + .n_sect = 0, + .n_desc = 0, + .n_value = sym.n_value, + }); + try object.symbol_mapping.putNoClobber(self.base.allocator, sym_id, local_sym_index); + try object.reverse_symbol_mapping.putNoClobber(self.base.allocator, local_sym_index, sym_id); + + // If the symbol's scope is not local aka translation unit, then we need work out + // if we should save the symbol as a global, or potentially flag the error. + if (!symbolIsExt(sym)) continue; + + const local = self.locals.items[local_sym_index]; + const resolv = self.symbol_resolver.getPtr(n_strx) orelse { + const global_sym_index = @intCast(u32, self.globals.items.len); + try self.globals.append(self.base.allocator, .{ + .n_strx = n_strx, + .n_type = sym.n_type, + .n_sect = 0, + .n_desc = sym.n_desc, + .n_value = sym.n_value, + }); + try self.symbol_resolver.putNoClobber(self.base.allocator, n_strx, .{ + .where = .global, + .where_index = global_sym_index, + .local_sym_index = local_sym_index, + .file = object_id, + }); + continue; + }; + + switch (resolv.where) { + .import => unreachable, + .global => { + const global = &self.globals.items[resolv.where_index]; + + if (!(symbolIsWeakDef(sym) or symbolIsPext(sym)) and + !(symbolIsWeakDef(global.*) or symbolIsPext(global.*))) + { + log.err("symbol '{s}' defined multiple times", .{sym_name}); + log.err(" first definition in '{s}'", .{self.objects.items[resolv.file].name.?}); + log.err(" next definition in '{s}'", .{object.name.?}); + return error.MultipleSymbolDefinitions; + } + + if (symbolIsWeakDef(sym) or symbolIsPext(sym)) continue; // Current symbol is weak, so skip it. + + // Otherwise, update the resolver and the global symbol. + global.n_type = sym.n_type; + resolv.local_sym_index = local_sym_index; + resolv.file = object_id; + + continue; + }, + .undef => { + const undef = &self.undefs.items[resolv.where_index]; + undef.* = .{ + .n_strx = 0, + .n_type = macho.N_UNDF, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }; + }, + .tentative => { + const tentative = &self.tentatives.items[resolv.where_index]; + tentative.* = .{ + .n_strx = 0, + .n_type = macho.N_UNDF, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }; + }, + } + + const global_sym_index = @intCast(u32, self.globals.items.len); + try self.globals.append(self.base.allocator, .{ + .n_strx = local.n_strx, + .n_type = sym.n_type, + .n_sect = 0, + .n_desc = sym.n_desc, + .n_value = sym.n_value, + }); + resolv.* = .{ + .where = .global, + .where_index = global_sym_index, + .local_sym_index = local_sym_index, + .file = object_id, + }; + } else if (symbolIsTentative(sym)) { + // Symbol is a tentative definition. + const resolv = self.symbol_resolver.getPtr(n_strx) orelse { + const tent_sym_index = @intCast(u32, self.tentatives.items.len); + try self.tentatives.append(self.base.allocator, .{ + .n_strx = try self.makeString(sym_name), + .n_type = sym.n_type, + .n_sect = 0, + .n_desc = sym.n_desc, + .n_value = sym.n_value, + }); + try self.symbol_resolver.putNoClobber(self.base.allocator, n_strx, .{ + .where = .tentative, + .where_index = tent_sym_index, + .file = object_id, + }); + continue; + }; + + switch (resolv.where) { + .import => unreachable, + .global => {}, + .undef => { + const undef = &self.undefs.items[resolv.where_index]; + const tent_sym_index = @intCast(u32, self.tentatives.items.len); + try self.tentatives.append(self.base.allocator, .{ + .n_strx = undef.n_strx, + .n_type = sym.n_type, + .n_sect = 0, + .n_desc = sym.n_desc, + .n_value = sym.n_value, + }); + resolv.* = .{ + .where = .tentative, + .where_index = tent_sym_index, + .file = object_id, + }; + undef.* = .{ + .n_strx = 0, + .n_type = macho.N_UNDF, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }; + }, + .tentative => { + const tentative = &self.tentatives.items[resolv.where_index]; + if (tentative.n_value >= sym.n_value) continue; + + tentative.n_desc = sym.n_desc; + tentative.n_value = sym.n_value; + resolv.file = object_id; + }, + } + } else { + // Symbol is undefined. + if (self.symbol_resolver.contains(n_strx)) continue; + + const undef_sym_index = @intCast(u32, self.undefs.items.len); + try self.undefs.append(self.base.allocator, .{ + .n_strx = try self.makeString(sym_name), + .n_type = macho.N_UNDF, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }); + try self.symbol_resolver.putNoClobber(self.base.allocator, n_strx, .{ + .where = .undef, + .where_index = undef_sym_index, + .file = object_id, + }); + } + } +} + +fn resolveSymbols(self: *MachO) !void { + // TODO mimicking insertion of null symbol from incremental linker. + // This will need to moved. + try self.locals.append(self.base.allocator, .{ + .n_strx = 0, + .n_type = macho.N_UNDF, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }); + try self.strtab.append(self.base.allocator, 0); + + // First pass, resolve symbols in provided objects. + for (self.objects.items) |_, object_id| { + try self.resolveSymbolsInObject(@intCast(u16, object_id)); + } + + // Second pass, resolve symbols in static libraries. + var next_sym: usize = 0; + loop: while (true) : (next_sym += 1) { + if (next_sym == self.undefs.items.len) break; + + const sym = self.undefs.items[next_sym]; + if (symbolIsNull(sym)) continue; + + const sym_name = self.getString(sym.n_strx); + + for (self.archives.items) |archive| { + // Check if the entry exists in a static archive. + const offsets = archive.toc.get(sym_name) orelse { + // No hit. + continue; + }; + assert(offsets.items.len > 0); + + const object = try archive.parseObject(offsets.items[0]); + const object_id = @intCast(u16, self.objects.items.len); + try self.objects.append(self.base.allocator, object); + try self.resolveSymbolsInObject(object_id); + + continue :loop; + } + } + + // Convert any tentative definition into a regular symbol and allocate + // text blocks for each tentative defintion. + for (self.tentatives.items) |sym| { + if (symbolIsNull(sym)) continue; + + const match: MatchingSection = blk: { + if (self.common_section_index == null) { + const data_seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; + self.common_section_index = @intCast(u16, data_seg.sections.items.len); + try data_seg.addSection(self.base.allocator, "__common", .{ + .flags = macho.S_ZEROFILL, + }); + } + break :blk .{ + .seg = self.data_segment_cmd_index.?, + .sect = self.common_section_index.?, + }; + }; + try self.createSectionOrdinal(match); + + const size = sym.n_value; + const code = try self.base.allocator.alloc(u8, size); + defer self.base.allocator.free(code); + mem.set(u8, code, 0); + const alignment = (sym.n_desc >> 8) & 0x0f; + + const resolv = self.symbol_resolver.getPtr(sym.n_strx) orelse unreachable; + const local_sym_index = @intCast(u32, self.locals.items.len); + var nlist = macho.nlist_64{ + .n_strx = sym.n_strx, + .n_type = macho.N_SECT, + .n_sect = self.section_to_ordinal.get(match) orelse unreachable, + .n_desc = 0, + .n_value = 0, + }; + try self.locals.append(self.base.allocator, nlist); + const global_sym_index = @intCast(u32, self.globals.items.len); + nlist.n_type |= macho.N_EXT; + try self.globals.append(self.base.allocator, nlist); + resolv.* = .{ + .where = .global, + .where_index = global_sym_index, + .local_sym_index = local_sym_index, + }; + + const block = try self.base.allocator.create(TextBlock); + block.* = TextBlock.empty; + block.local_sym_index = local_sym_index; + block.size = size; + block.alignment = alignment; + try self.managed_blocks.append(self.base.allocator, block); + + try block.code.appendSlice(self.base.allocator, code); + + // Update target section's metadata + // TODO should we update segment's size here too? + // How does it tie with incremental space allocs? + const tseg = &self.load_commands.items[match.seg].Segment; + const tsect = &tseg.sections.items[match.sect]; + const new_alignment = math.max(tsect.@"align", block.alignment); + const new_alignment_pow_2 = try math.powi(u32, 2, new_alignment); + const new_size = mem.alignForwardGeneric(u64, tsect.size, new_alignment_pow_2) + block.size; + tsect.size = new_size; + tsect.@"align" = new_alignment; + + if (self.blocks.getPtr(match)) |last| { + last.*.next = block; + block.prev = last.*; + last.* = block; + } else { + try self.blocks.putNoClobber(self.base.allocator, match, block); + } + } + + // Third pass, resolve symbols in dynamic libraries. + { + // Put dyld_stub_binder as an undefined special symbol. + const n_strx = try self.makeString("dyld_stub_binder"); + const undef_sym_index = @intCast(u32, self.undefs.items.len); + try self.undefs.append(self.base.allocator, .{ + .n_strx = n_strx, + .n_type = macho.N_UNDF, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }); + try self.symbol_resolver.putNoClobber(self.base.allocator, n_strx, .{ + .where = .undef, + .where_index = undef_sym_index, + }); + } + + var referenced = std.AutoHashMap(*Dylib, void).init(self.base.allocator); + defer referenced.deinit(); + + loop: for (self.undefs.items) |sym| { + if (symbolIsNull(sym)) continue; + + const sym_name = self.getString(sym.n_strx); + for (self.dylibs.items) |dylib| { + if (!dylib.symbols.contains(sym_name)) continue; + + if (!referenced.contains(dylib)) { + // Add LC_LOAD_DYLIB load command for each referenced dylib/stub. + dylib.ordinal = self.next_dylib_ordinal; + const dylib_id = dylib.id orelse unreachable; + var dylib_cmd = try commands.createLoadDylibCommand( + self.base.allocator, + dylib_id.name, + dylib_id.timestamp, + dylib_id.current_version, + dylib_id.compatibility_version, + ); + errdefer dylib_cmd.deinit(self.base.allocator); + try self.load_commands.append(self.base.allocator, .{ .Dylib = dylib_cmd }); + self.next_dylib_ordinal += 1; + try referenced.putNoClobber(dylib, {}); + } + + const resolv = self.symbol_resolver.getPtr(sym.n_strx) orelse unreachable; + const undef = &self.undefs.items[resolv.where_index]; + const import_sym_index = @intCast(u32, self.imports.items.len); + try self.imports.append(self.base.allocator, .{ + .n_strx = undef.n_strx, + .n_type = macho.N_UNDF | macho.N_EXT, + .n_sect = 0, + .n_desc = packDylibOrdinal(dylib.ordinal.?), + .n_value = 0, + }); + resolv.* = .{ + .where = .import, + .where_index = import_sym_index, + }; + undef.* = .{ + .n_strx = 0, + .n_type = macho.N_UNDF, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }; + + continue :loop; + } + } + + // Fourth pass, handle synthetic symbols and flag any undefined references. + if (self.strtab_dir.getAdapted(@as([]const u8, "___dso_handle"), StringSliceAdapter{ + .strtab = &self.strtab, + })) |n_strx| blk: { + const resolv = self.symbol_resolver.getPtr(n_strx) orelse break :blk; + if (resolv.where != .undef) break :blk; + + const undef = &self.undefs.items[resolv.where_index]; + const match: MatchingSection = .{ + .seg = self.text_segment_cmd_index.?, + .sect = self.text_section_index.?, + }; + const local_sym_index = @intCast(u32, self.locals.items.len); + var nlist = macho.nlist_64{ + .n_strx = undef.n_strx, + .n_type = macho.N_SECT, + .n_sect = self.section_to_ordinal.get(match) orelse unreachable, + .n_desc = 0, + .n_value = 0, + }; + try self.locals.append(self.base.allocator, nlist); + const global_sym_index = @intCast(u32, self.globals.items.len); + nlist.n_type |= macho.N_EXT; + nlist.n_desc = macho.N_WEAK_DEF; + try self.globals.append(self.base.allocator, nlist); + + undef.* = .{ + .n_strx = 0, + .n_type = macho.N_UNDF, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }; + resolv.* = .{ + .where = .global, + .where_index = global_sym_index, + .local_sym_index = local_sym_index, + }; + + // We create an empty atom for this symbol. + // TODO perhaps we should special-case special symbols? Create a separate + // linked list of atoms? + const block = try self.base.allocator.create(TextBlock); + block.* = TextBlock.empty; + block.local_sym_index = local_sym_index; + block.size = 0; + block.alignment = 0; + try self.managed_blocks.append(self.base.allocator, block); + + if (self.blocks.getPtr(match)) |last| { + last.*.next = block; + block.prev = last.*; + last.* = block; + } else { + try self.blocks.putNoClobber(self.base.allocator, match, block); + } + } + + var has_undefined = false; + for (self.undefs.items) |sym| { + if (symbolIsNull(sym)) continue; + + const sym_name = self.getString(sym.n_strx); + const resolv = self.symbol_resolver.get(sym.n_strx) orelse unreachable; + + log.err("undefined reference to symbol '{s}'", .{sym_name}); + log.err(" first referenced in '{s}'", .{self.objects.items[resolv.file].name.?}); + has_undefined = true; + } + + if (has_undefined) return error.UndefinedSymbolReference; +} + +fn parseTextBlocks(self: *MachO) !void { + for (self.objects.items) |object| { + try object.parseTextBlocks(self); + } +} + +fn populateMetadata(self: *MachO) !void { + if (self.pagezero_segment_cmd_index == null) { + self.pagezero_segment_cmd_index = @intCast(u16, self.load_commands.items.len); + try self.load_commands.append(self.base.allocator, .{ + .Segment = SegmentCommand.empty("__PAGEZERO", .{ + .vmsize = 0x100000000, // size always set to 4GB + }), + }); + } + + if (self.text_segment_cmd_index == null) { + self.text_segment_cmd_index = @intCast(u16, self.load_commands.items.len); + try self.load_commands.append(self.base.allocator, .{ + .Segment = SegmentCommand.empty("__TEXT", .{ + .vmaddr = 0x100000000, // always starts at 4GB + .maxprot = macho.VM_PROT_READ | macho.VM_PROT_EXECUTE, + .initprot = macho.VM_PROT_READ | macho.VM_PROT_EXECUTE, + }), + }); + } + + if (self.text_section_index == null) { + const text_seg = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; + self.text_section_index = @intCast(u16, text_seg.sections.items.len); + const alignment: u2 = switch (self.base.options.target.cpu.arch) { + .x86_64 => 0, + .aarch64 => 2, + else => unreachable, // unhandled architecture type + }; + try text_seg.addSection(self.base.allocator, "__text", .{ + .@"align" = alignment, + .flags = macho.S_REGULAR | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS, + }); + try self.createSectionOrdinal(.{ + .seg = self.text_segment_cmd_index.?, + .sect = self.text_section_index.?, + }); + } + + if (self.stubs_section_index == null) { + const text_seg = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; + self.stubs_section_index = @intCast(u16, text_seg.sections.items.len); + const alignment: u2 = switch (self.base.options.target.cpu.arch) { + .x86_64 => 0, + .aarch64 => 2, + else => unreachable, // unhandled architecture type + }; + const stub_size: u4 = switch (self.base.options.target.cpu.arch) { + .x86_64 => 6, + .aarch64 => 3 * @sizeOf(u32), + else => unreachable, // unhandled architecture type + }; + try text_seg.addSection(self.base.allocator, "__stubs", .{ + .@"align" = alignment, + .flags = macho.S_SYMBOL_STUBS | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS, + .reserved2 = stub_size, + }); + try self.createSectionOrdinal(.{ + .seg = self.text_segment_cmd_index.?, + .sect = self.stubs_section_index.?, + }); + } + + if (self.stub_helper_section_index == null) { + const text_seg = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; + self.stub_helper_section_index = @intCast(u16, text_seg.sections.items.len); + const alignment: u2 = switch (self.base.options.target.cpu.arch) { + .x86_64 => 0, + .aarch64 => 2, + else => unreachable, // unhandled architecture type + }; + const stub_helper_size: u6 = switch (self.base.options.target.cpu.arch) { + .x86_64 => 15, + .aarch64 => 6 * @sizeOf(u32), + else => unreachable, + }; + try text_seg.addSection(self.base.allocator, "__stub_helper", .{ + .size = stub_helper_size, + .@"align" = alignment, + .flags = macho.S_REGULAR | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS, + }); + try self.createSectionOrdinal(.{ + .seg = self.text_segment_cmd_index.?, + .sect = self.stub_helper_section_index.?, + }); + } + + if (self.data_const_segment_cmd_index == null) { + self.data_const_segment_cmd_index = @intCast(u16, self.load_commands.items.len); + try self.load_commands.append(self.base.allocator, .{ + .Segment = SegmentCommand.empty("__DATA_CONST", .{ + .maxprot = macho.VM_PROT_READ | macho.VM_PROT_WRITE, + .initprot = macho.VM_PROT_READ | macho.VM_PROT_WRITE, + }), + }); + } + + if (self.got_section_index == null) { + const data_const_seg = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; + self.got_section_index = @intCast(u16, data_const_seg.sections.items.len); + try data_const_seg.addSection(self.base.allocator, "__got", .{ + .@"align" = 3, // 2^3 = @sizeOf(u64) + .flags = macho.S_NON_LAZY_SYMBOL_POINTERS, + }); + try self.createSectionOrdinal(.{ + .seg = self.data_const_segment_cmd_index.?, + .sect = self.got_section_index.?, + }); + } + + if (self.data_segment_cmd_index == null) { + self.data_segment_cmd_index = @intCast(u16, self.load_commands.items.len); + try self.load_commands.append(self.base.allocator, .{ + .Segment = SegmentCommand.empty("__DATA", .{ + .maxprot = macho.VM_PROT_READ | macho.VM_PROT_WRITE, + .initprot = macho.VM_PROT_READ | macho.VM_PROT_WRITE, + }), + }); + } + + if (self.la_symbol_ptr_section_index == null) { + const data_seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; + self.la_symbol_ptr_section_index = @intCast(u16, data_seg.sections.items.len); + try data_seg.addSection(self.base.allocator, "__la_symbol_ptr", .{ + .@"align" = 3, // 2^3 = @sizeOf(u64) + .flags = macho.S_LAZY_SYMBOL_POINTERS, + }); + try self.createSectionOrdinal(.{ + .seg = self.data_segment_cmd_index.?, + .sect = self.la_symbol_ptr_section_index.?, + }); + } + + if (self.data_section_index == null) { + const data_seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; + self.data_section_index = @intCast(u16, data_seg.sections.items.len); + try data_seg.addSection(self.base.allocator, "__data", .{ + .@"align" = 3, // 2^3 = @sizeOf(u64) + }); + try self.createSectionOrdinal(.{ + .seg = self.data_segment_cmd_index.?, + .sect = self.data_section_index.?, + }); + } + + if (self.linkedit_segment_cmd_index == null) { + self.linkedit_segment_cmd_index = @intCast(u16, self.load_commands.items.len); + try self.load_commands.append(self.base.allocator, .{ + .Segment = SegmentCommand.empty("__LINKEDIT", .{ + .maxprot = macho.VM_PROT_READ, + .initprot = macho.VM_PROT_READ, + }), + }); + } + + if (self.dyld_info_cmd_index == null) { + self.dyld_info_cmd_index = @intCast(u16, self.load_commands.items.len); + try self.load_commands.append(self.base.allocator, .{ + .DyldInfoOnly = .{ + .cmd = macho.LC_DYLD_INFO_ONLY, + .cmdsize = @sizeOf(macho.dyld_info_command), + .rebase_off = 0, + .rebase_size = 0, + .bind_off = 0, + .bind_size = 0, + .weak_bind_off = 0, + .weak_bind_size = 0, + .lazy_bind_off = 0, + .lazy_bind_size = 0, + .export_off = 0, + .export_size = 0, + }, + }); + } + + if (self.symtab_cmd_index == null) { + self.symtab_cmd_index = @intCast(u16, self.load_commands.items.len); + try self.load_commands.append(self.base.allocator, .{ + .Symtab = .{ + .cmd = macho.LC_SYMTAB, + .cmdsize = @sizeOf(macho.symtab_command), + .symoff = 0, + .nsyms = 0, + .stroff = 0, + .strsize = 0, + }, + }); + } + + if (self.dysymtab_cmd_index == null) { + self.dysymtab_cmd_index = @intCast(u16, self.load_commands.items.len); + try self.load_commands.append(self.base.allocator, .{ + .Dysymtab = .{ + .cmd = macho.LC_DYSYMTAB, + .cmdsize = @sizeOf(macho.dysymtab_command), + .ilocalsym = 0, + .nlocalsym = 0, + .iextdefsym = 0, + .nextdefsym = 0, + .iundefsym = 0, + .nundefsym = 0, + .tocoff = 0, + .ntoc = 0, + .modtaboff = 0, + .nmodtab = 0, + .extrefsymoff = 0, + .nextrefsyms = 0, + .indirectsymoff = 0, + .nindirectsyms = 0, + .extreloff = 0, + .nextrel = 0, + .locreloff = 0, + .nlocrel = 0, + }, + }); + } + + if (self.dylinker_cmd_index == null) { + self.dylinker_cmd_index = @intCast(u16, self.load_commands.items.len); + const cmdsize = @intCast(u32, mem.alignForwardGeneric( + u64, + @sizeOf(macho.dylinker_command) + mem.lenZ(DEFAULT_DYLD_PATH), + @sizeOf(u64), + )); + var dylinker_cmd = commands.emptyGenericCommandWithData(macho.dylinker_command{ + .cmd = macho.LC_LOAD_DYLINKER, + .cmdsize = cmdsize, + .name = @sizeOf(macho.dylinker_command), + }); + dylinker_cmd.data = try self.base.allocator.alloc(u8, cmdsize - dylinker_cmd.inner.name); + mem.set(u8, dylinker_cmd.data, 0); + mem.copy(u8, dylinker_cmd.data, mem.spanZ(DEFAULT_DYLD_PATH)); + try self.load_commands.append(self.base.allocator, .{ .Dylinker = dylinker_cmd }); + } + + if (self.main_cmd_index == null and self.base.options.output_mode == .Exe) { + self.main_cmd_index = @intCast(u16, self.load_commands.items.len); + try self.load_commands.append(self.base.allocator, .{ + .Main = .{ + .cmd = macho.LC_MAIN, + .cmdsize = @sizeOf(macho.entry_point_command), + .entryoff = 0x0, + .stacksize = 0, + }, + }); + } + + if (self.dylib_id_cmd_index == null and self.base.options.output_mode == .Lib) { + self.dylib_id_cmd_index = @intCast(u16, self.load_commands.items.len); + const install_name = try std.fmt.allocPrint(self.base.allocator, "@rpath/{s}", .{ + self.base.options.emit.?.sub_path, + }); + defer self.base.allocator.free(install_name); + var dylib_cmd = try commands.createLoadDylibCommand( + self.base.allocator, + install_name, + 2, + 0x10000, // TODO forward user-provided versions + 0x10000, + ); + errdefer dylib_cmd.deinit(self.base.allocator); + dylib_cmd.inner.cmd = macho.LC_ID_DYLIB; + try self.load_commands.append(self.base.allocator, .{ .Dylib = dylib_cmd }); + } + + if (self.version_min_cmd_index == null) { + self.version_min_cmd_index = @intCast(u16, self.load_commands.items.len); + const cmd: u32 = switch (self.base.options.target.os.tag) { + .macos => macho.LC_VERSION_MIN_MACOSX, + .ios => macho.LC_VERSION_MIN_IPHONEOS, + .tvos => macho.LC_VERSION_MIN_TVOS, + .watchos => macho.LC_VERSION_MIN_WATCHOS, + else => unreachable, // wrong OS + }; + const ver = self.base.options.target.os.version_range.semver.min; + const version = ver.major << 16 | ver.minor << 8 | ver.patch; + try self.load_commands.append(self.base.allocator, .{ + .VersionMin = .{ + .cmd = cmd, + .cmdsize = @sizeOf(macho.version_min_command), + .version = version, + .sdk = version, + }, + }); + } + + if (self.source_version_cmd_index == null) { + self.source_version_cmd_index = @intCast(u16, self.load_commands.items.len); + try self.load_commands.append(self.base.allocator, .{ + .SourceVersion = .{ + .cmd = macho.LC_SOURCE_VERSION, + .cmdsize = @sizeOf(macho.source_version_command), + .version = 0x0, + }, + }); + } + + if (self.uuid_cmd_index == null) { + self.uuid_cmd_index = @intCast(u16, self.load_commands.items.len); + var uuid_cmd: macho.uuid_command = .{ + .cmd = macho.LC_UUID, + .cmdsize = @sizeOf(macho.uuid_command), + .uuid = undefined, + }; + std.crypto.random.bytes(&uuid_cmd.uuid); + try self.load_commands.append(self.base.allocator, .{ .Uuid = uuid_cmd }); + } +} + +fn addDataInCodeLC(self: *MachO) !void { + if (self.data_in_code_cmd_index == null) { + self.data_in_code_cmd_index = @intCast(u16, self.load_commands.items.len); + try self.load_commands.append(self.base.allocator, .{ + .LinkeditData = .{ + .cmd = macho.LC_DATA_IN_CODE, + .cmdsize = @sizeOf(macho.linkedit_data_command), + .dataoff = 0, + .datasize = 0, + }, + }); + } +} + +fn addCodeSignatureLC(self: *MachO) !void { + if (self.code_signature_cmd_index == null and self.base.options.target.cpu.arch == .aarch64) { + self.code_signature_cmd_index = @intCast(u16, self.load_commands.items.len); + try self.load_commands.append(self.base.allocator, .{ + .LinkeditData = .{ + .cmd = macho.LC_CODE_SIGNATURE, + .cmdsize = @sizeOf(macho.linkedit_data_command), + .dataoff = 0, + .datasize = 0, + }, + }); + } +} + +fn addRpaths(self: *MachO, rpaths: []const []const u8) !void { + for (rpaths) |rpath| { + const cmdsize = @intCast(u32, mem.alignForwardGeneric( + u64, + @sizeOf(macho.rpath_command) + rpath.len + 1, + @sizeOf(u64), + )); + var rpath_cmd = commands.emptyGenericCommandWithData(macho.rpath_command{ + .cmd = macho.LC_RPATH, + .cmdsize = cmdsize, + .path = @sizeOf(macho.rpath_command), + }); + rpath_cmd.data = try self.base.allocator.alloc(u8, cmdsize - rpath_cmd.inner.path); + mem.set(u8, rpath_cmd.data, 0); + mem.copy(u8, rpath_cmd.data, rpath); + try self.load_commands.append(self.base.allocator, .{ .Rpath = rpath_cmd }); + } +} + +fn flushZld(self: *MachO) !void { + self.load_commands_dirty = true; + try self.writeTextBlocks(); + try self.writeStubHelperCommon(); + + if (self.common_section_index) |index| { + const seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; + const sect = &seg.sections.items[index]; + sect.offset = 0; + } + + if (self.bss_section_index) |index| { + const seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; + const sect = &seg.sections.items[index]; + sect.offset = 0; + } + + if (self.tlv_bss_section_index) |index| { + const seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; + const sect = &seg.sections.items[index]; + sect.offset = 0; + } + + try self.writeGotEntries(); + try self.setEntryPoint(); + try self.writeRebaseInfoTableZld(); + try self.writeBindInfoTableZld(); + try self.writeLazyBindInfoTableZld(); + try self.writeExportInfoZld(); + try self.writeDices(); + + { + const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; + const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab; + symtab.symoff = @intCast(u32, seg.inner.fileoff + seg.inner.filesize); + } + + try self.writeSymbolTable(); + try self.writeStringTableZld(); + + { + // Seal __LINKEDIT size + const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; + seg.inner.vmsize = mem.alignForwardGeneric(u64, seg.inner.filesize, self.page_size); + } + + if (self.base.options.target.cpu.arch == .aarch64) { + try self.writeCodeSignaturePadding(); + } + + try self.writeLoadCommands(); + try self.writeHeader(); + + if (self.base.options.target.cpu.arch == .aarch64) { + try self.writeCodeSignature(); + } + + // if (comptime std.Target.current.isDarwin() and std.Target.current.cpu.arch == .aarch64) { + // const out_path = self.output.?.path; + // try fs.cwd().copyFile(out_path, fs.cwd(), out_path, .{}); + // } +} + +fn writeGotEntries(self: *MachO) !void { + const seg = self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; + const sect = seg.sections.items[self.got_section_index.?]; + + var buffer = try self.base.allocator.alloc(u8, self.got_entries.items.len * @sizeOf(u64)); + defer self.base.allocator.free(buffer); + + var stream = std.io.fixedBufferStream(buffer); + var writer = stream.writer(); + + for (self.got_entries.items) |key| { + const address: u64 = switch (key.where) { + .local => self.locals.items[key.where_index].n_value, + .import => 0, + }; + try writer.writeIntLittle(u64, address); + } + + log.debug("writing GOT pointers at 0x{x} to 0x{x}", .{ sect.offset, sect.offset + buffer.len }); + + try self.base.file.?.pwriteAll(buffer, sect.offset); +} + +fn setEntryPoint(self: *MachO) !void { + if (self.base.options.output_mode != .Exe) return; + + // TODO we should respect the -entry flag passed in by the user to set a custom + // entrypoint. For now, assume default of `_main`. + const seg = self.load_commands.items[self.text_segment_cmd_index.?].Segment; + const n_strx = self.strtab_dir.getAdapted(@as([]const u8, "_main"), StringSliceAdapter{ + .strtab = &self.strtab, + }) orelse { + log.err("'_main' export not found", .{}); + return error.MissingMainEntrypoint; + }; + const resolv = self.symbol_resolver.get(n_strx) orelse unreachable; + assert(resolv.where == .global); + const sym = self.globals.items[resolv.where_index]; + const ec = &self.load_commands.items[self.main_cmd_index.?].Main; + ec.entryoff = @intCast(u32, sym.n_value - seg.inner.vmaddr); + ec.stacksize = self.base.options.stack_size_override orelse 0; +} + +fn writeRebaseInfoTableZld(self: *MachO) !void { + var pointers = std.ArrayList(bind.Pointer).init(self.base.allocator); + defer pointers.deinit(); + + { + var it = self.blocks.iterator(); + while (it.next()) |entry| { + const match = entry.key_ptr.*; + var block: *TextBlock = entry.value_ptr.*; + + if (match.seg == self.text_segment_cmd_index.?) continue; // __TEXT is non-writable + + const seg = self.load_commands.items[match.seg].Segment; + + while (true) { + const sym = self.locals.items[block.local_sym_index]; + const base_offset = sym.n_value - seg.inner.vmaddr; + + for (block.rebases.items) |offset| { + try pointers.append(.{ + .offset = base_offset + offset, + .segment_id = match.seg, + }); + } + + if (block.prev) |prev| { + block = prev; + } else break; + } + } + } + + if (self.got_section_index) |idx| { + const seg = self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; + const sect = seg.sections.items[idx]; + const base_offset = sect.addr - seg.inner.vmaddr; + const segment_id = @intCast(u16, self.data_const_segment_cmd_index.?); + + for (self.got_entries.items) |entry, i| { + if (entry.where == .import) continue; + + try pointers.append(.{ + .offset = base_offset + i * @sizeOf(u64), + .segment_id = segment_id, + }); + } + } + + if (self.la_symbol_ptr_section_index) |idx| { + const seg = self.load_commands.items[self.data_segment_cmd_index.?].Segment; + const sect = seg.sections.items[idx]; + const base_offset = sect.addr - seg.inner.vmaddr; + const segment_id = @intCast(u16, self.data_segment_cmd_index.?); + + try pointers.ensureUnusedCapacity(self.stubs.items.len); + for (self.stubs.items) |_, i| { + pointers.appendAssumeCapacity(.{ + .offset = base_offset + i * @sizeOf(u64), + .segment_id = segment_id, + }); + } + } + + std.sort.sort(bind.Pointer, pointers.items, {}, bind.pointerCmp); + + const size = try bind.rebaseInfoSize(pointers.items); + var buffer = try self.base.allocator.alloc(u8, @intCast(usize, size)); + defer self.base.allocator.free(buffer); + + var stream = std.io.fixedBufferStream(buffer); + try bind.writeRebaseInfo(pointers.items, stream.writer()); + + const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; + const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly; + dyld_info.rebase_off = @intCast(u32, seg.inner.fileoff); + dyld_info.rebase_size = @intCast(u32, mem.alignForwardGeneric(u64, buffer.len, @sizeOf(u64))); + seg.inner.filesize += dyld_info.rebase_size; + + log.debug("writing rebase info from 0x{x} to 0x{x}", .{ dyld_info.rebase_off, dyld_info.rebase_off + dyld_info.rebase_size }); + + try self.base.file.?.pwriteAll(buffer, dyld_info.rebase_off); +} + +fn writeBindInfoTableZld(self: *MachO) !void { + var pointers = std.ArrayList(bind.Pointer).init(self.base.allocator); + defer pointers.deinit(); + + if (self.got_section_index) |idx| { + const seg = self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; + const sect = seg.sections.items[idx]; + const base_offset = sect.addr - seg.inner.vmaddr; + const segment_id = @intCast(u16, self.data_const_segment_cmd_index.?); + + for (self.got_entries.items) |entry, i| { + if (entry.where == .local) continue; + + const sym = self.imports.items[entry.where_index]; + try pointers.append(.{ + .offset = base_offset + i * @sizeOf(u64), + .segment_id = segment_id, + .dylib_ordinal = unpackDylibOrdinal(sym.n_desc), + .name = self.getString(sym.n_strx), + }); + } + } + + { + var it = self.blocks.iterator(); + while (it.next()) |entry| { + const match = entry.key_ptr.*; + var block: *TextBlock = entry.value_ptr.*; + + if (match.seg == self.text_segment_cmd_index.?) continue; // __TEXT is non-writable + + const seg = self.load_commands.items[match.seg].Segment; + + while (true) { + const sym = self.locals.items[block.local_sym_index]; + const base_offset = sym.n_value - seg.inner.vmaddr; + + for (block.bindings.items) |binding| { + const bind_sym = self.imports.items[binding.local_sym_index]; + try pointers.append(.{ + .offset = binding.offset + base_offset, + .segment_id = match.seg, + .dylib_ordinal = unpackDylibOrdinal(bind_sym.n_desc), + .name = self.getString(bind_sym.n_strx), + }); + } + + if (block.prev) |prev| { + block = prev; + } else break; + } + } + } + + const size = try bind.bindInfoSize(pointers.items); + var buffer = try self.base.allocator.alloc(u8, @intCast(usize, size)); + defer self.base.allocator.free(buffer); + + var stream = std.io.fixedBufferStream(buffer); + try bind.writeBindInfo(pointers.items, stream.writer()); + + const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; + const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly; + dyld_info.bind_off = @intCast(u32, seg.inner.fileoff + seg.inner.filesize); + dyld_info.bind_size = @intCast(u32, mem.alignForwardGeneric(u64, buffer.len, @alignOf(u64))); + seg.inner.filesize += dyld_info.bind_size; + + log.debug("writing binding info from 0x{x} to 0x{x}", .{ dyld_info.bind_off, dyld_info.bind_off + dyld_info.bind_size }); + + try self.base.file.?.pwriteAll(buffer, dyld_info.bind_off); +} + +fn writeLazyBindInfoTableZld(self: *MachO) !void { + var pointers = std.ArrayList(bind.Pointer).init(self.base.allocator); + defer pointers.deinit(); + + if (self.la_symbol_ptr_section_index) |idx| { + const seg = self.load_commands.items[self.data_segment_cmd_index.?].Segment; + const sect = seg.sections.items[idx]; + const base_offset = sect.addr - seg.inner.vmaddr; + const segment_id = @intCast(u16, self.data_segment_cmd_index.?); + + try pointers.ensureUnusedCapacity(self.stubs.items.len); + + for (self.stubs.items) |import_id, i| { + const sym = self.imports.items[import_id]; + pointers.appendAssumeCapacity(.{ + .offset = base_offset + i * @sizeOf(u64), + .segment_id = segment_id, + .dylib_ordinal = unpackDylibOrdinal(sym.n_desc), + .name = self.getString(sym.n_strx), + }); + } + } + + const size = try bind.lazyBindInfoSize(pointers.items); + var buffer = try self.base.allocator.alloc(u8, @intCast(usize, size)); + defer self.base.allocator.free(buffer); + + var stream = std.io.fixedBufferStream(buffer); + try bind.writeLazyBindInfo(pointers.items, stream.writer()); + + const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; + const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly; + dyld_info.lazy_bind_off = @intCast(u32, seg.inner.fileoff + seg.inner.filesize); + dyld_info.lazy_bind_size = @intCast(u32, mem.alignForwardGeneric(u64, buffer.len, @alignOf(u64))); + seg.inner.filesize += dyld_info.lazy_bind_size; + + log.debug("writing lazy binding info from 0x{x} to 0x{x}", .{ dyld_info.lazy_bind_off, dyld_info.lazy_bind_off + dyld_info.lazy_bind_size }); + + try self.base.file.?.pwriteAll(buffer, dyld_info.lazy_bind_off); + try self.populateLazyBindOffsetsInStubHelper(buffer); +} + +fn writeExportInfoZld(self: *MachO) !void { + var trie = Trie.init(self.base.allocator); + defer trie.deinit(); + + const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment; + const base_address = text_segment.inner.vmaddr; + + // TODO handle macho.EXPORT_SYMBOL_FLAGS_REEXPORT and macho.EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER. + log.debug("writing export trie", .{}); + + for (self.globals.items) |sym| { + const sym_name = self.getString(sym.n_strx); + log.debug(" | putting '{s}' defined at 0x{x}", .{ sym_name, sym.n_value }); + + try trie.put(.{ + .name = sym_name, + .vmaddr_offset = sym.n_value - base_address, + .export_flags = macho.EXPORT_SYMBOL_FLAGS_KIND_REGULAR, + }); + } + + try trie.finalize(); + + var buffer = try self.base.allocator.alloc(u8, @intCast(usize, trie.size)); + defer self.base.allocator.free(buffer); + + var stream = std.io.fixedBufferStream(buffer); + const nwritten = try trie.write(stream.writer()); + assert(nwritten == trie.size); + + const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; + const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly; + dyld_info.export_off = @intCast(u32, seg.inner.fileoff + seg.inner.filesize); + dyld_info.export_size = @intCast(u32, mem.alignForwardGeneric(u64, buffer.len, @alignOf(u64))); + seg.inner.filesize += dyld_info.export_size; + + log.debug("writing export info from 0x{x} to 0x{x}", .{ dyld_info.export_off, dyld_info.export_off + dyld_info.export_size }); + + try self.base.file.?.pwriteAll(buffer, dyld_info.export_off); +} + +fn writeSymbolTable(self: *MachO) !void { + const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; + const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab; + + var locals = std.ArrayList(macho.nlist_64).init(self.base.allocator); + defer locals.deinit(); + try locals.appendSlice(self.locals.items); + + if (self.has_stabs) { + for (self.objects.items) |object| { + if (object.debug_info == null) continue; + + // Open scope + try locals.ensureUnusedCapacity(3); + locals.appendAssumeCapacity(.{ + .n_strx = try self.makeString(object.tu_comp_dir.?), + .n_type = macho.N_SO, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }); + locals.appendAssumeCapacity(.{ + .n_strx = try self.makeString(object.tu_name.?), + .n_type = macho.N_SO, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }); + locals.appendAssumeCapacity(.{ + .n_strx = try self.makeString(object.name.?), + .n_type = macho.N_OSO, + .n_sect = 0, + .n_desc = 1, + .n_value = object.mtime orelse 0, + }); + + for (object.text_blocks.items) |block| { + if (block.stab) |stab| { + const nlists = try stab.asNlists(block.local_sym_index, self); + defer self.base.allocator.free(nlists); + try locals.appendSlice(nlists); + } else { + for (block.contained.items) |sym_at_off| { + const stab = sym_at_off.stab orelse continue; + const nlists = try stab.asNlists(sym_at_off.local_sym_index, self); + defer self.base.allocator.free(nlists); + try locals.appendSlice(nlists); + } + } + } + + // Close scope + try locals.append(.{ + .n_strx = 0, + .n_type = macho.N_SO, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }); + } + } + + const nlocals = locals.items.len; + const nexports = self.globals.items.len; + const nundefs = self.imports.items.len; + + const locals_off = symtab.symoff + symtab.nsyms * @sizeOf(macho.nlist_64); + const locals_size = nlocals * @sizeOf(macho.nlist_64); + log.debug("writing local symbols from 0x{x} to 0x{x}", .{ locals_off, locals_size + locals_off }); + try self.base.file.?.pwriteAll(mem.sliceAsBytes(locals.items), locals_off); + + const exports_off = locals_off + locals_size; + const exports_size = nexports * @sizeOf(macho.nlist_64); + log.debug("writing exported symbols from 0x{x} to 0x{x}", .{ exports_off, exports_size + exports_off }); + try self.base.file.?.pwriteAll(mem.sliceAsBytes(self.globals.items), exports_off); + + const undefs_off = exports_off + exports_size; + const undefs_size = nundefs * @sizeOf(macho.nlist_64); + log.debug("writing undefined symbols from 0x{x} to 0x{x}", .{ undefs_off, undefs_size + undefs_off }); + try self.base.file.?.pwriteAll(mem.sliceAsBytes(self.imports.items), undefs_off); + + symtab.nsyms += @intCast(u32, nlocals + nexports + nundefs); + seg.inner.filesize += locals_size + exports_size + undefs_size; + + // Update dynamic symbol table. + const dysymtab = &self.load_commands.items[self.dysymtab_cmd_index.?].Dysymtab; + dysymtab.nlocalsym += @intCast(u32, nlocals); + dysymtab.iextdefsym = dysymtab.nlocalsym; + dysymtab.nextdefsym = @intCast(u32, nexports); + dysymtab.iundefsym = dysymtab.nlocalsym + dysymtab.nextdefsym; + dysymtab.nundefsym = @intCast(u32, nundefs); + + const text_segment = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; + const stubs = &text_segment.sections.items[self.stubs_section_index.?]; + const data_const_segment = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; + const got = &data_const_segment.sections.items[self.got_section_index.?]; + const data_segment = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; + const la_symbol_ptr = &data_segment.sections.items[self.la_symbol_ptr_section_index.?]; + + const nstubs = @intCast(u32, self.stubs.items.len); + const ngot_entries = @intCast(u32, self.got_entries.items.len); + + dysymtab.indirectsymoff = @intCast(u32, seg.inner.fileoff + seg.inner.filesize); + dysymtab.nindirectsyms = nstubs * 2 + ngot_entries; + + const needed_size = dysymtab.nindirectsyms * @sizeOf(u32); + seg.inner.filesize += needed_size; + + log.debug("writing indirect symbol table from 0x{x} to 0x{x}", .{ + dysymtab.indirectsymoff, + dysymtab.indirectsymoff + needed_size, + }); + + var buf = try self.base.allocator.alloc(u8, needed_size); + defer self.base.allocator.free(buf); + + var stream = std.io.fixedBufferStream(buf); + var writer = stream.writer(); + + stubs.reserved1 = 0; + for (self.stubs.items) |id| { + try writer.writeIntLittle(u32, dysymtab.iundefsym + id); + } + + got.reserved1 = nstubs; + for (self.got_entries.items) |entry| { + switch (entry.where) { + .import => { + try writer.writeIntLittle(u32, dysymtab.iundefsym + entry.where_index); + }, + .local => { + try writer.writeIntLittle(u32, macho.INDIRECT_SYMBOL_LOCAL); + }, + } + } + + la_symbol_ptr.reserved1 = got.reserved1 + ngot_entries; + for (self.stubs.items) |id| { + try writer.writeIntLittle(u32, dysymtab.iundefsym + id); + } + + try self.base.file.?.pwriteAll(buf, dysymtab.indirectsymoff); } pub fn deinit(self: *MachO) void { if (build_options.have_llvm) { if (self.llvm_object) |llvm_object| llvm_object.destroy(self.base.allocator); } + if (self.d_sym) |*ds| { ds.deinit(self.base.allocator); } - for (self.lazy_imports.keys()) |*key| { - self.base.allocator.free(key.*); - } - self.lazy_imports.deinit(self.base.allocator); - for (self.nonlazy_imports.keys()) |*key| { - self.base.allocator.free(key.*); - } - self.nonlazy_imports.deinit(self.base.allocator); - self.pie_fixups.deinit(self.base.allocator); - self.stub_fixups.deinit(self.base.allocator); - self.text_block_free_list.deinit(self.base.allocator); - self.offset_table.deinit(self.base.allocator); - self.offset_table_free_list.deinit(self.base.allocator); - { - var it = self.string_table_directory.keyIterator(); - while (it.next()) |key| { - self.base.allocator.free(key.*); - } - } - self.string_table_directory.deinit(self.base.allocator); - self.string_table.deinit(self.base.allocator); + + self.section_ordinals.deinit(self.base.allocator); + self.section_to_ordinal.deinit(self.base.allocator); + self.pending_updates.deinit(self.base.allocator); + self.got_entries.deinit(self.base.allocator); + self.got_entries_map.deinit(self.base.allocator); + self.got_entries_free_list.deinit(self.base.allocator); + self.stubs.deinit(self.base.allocator); + self.stubs_map.deinit(self.base.allocator); + self.strtab_dir.deinit(self.base.allocator); + self.strtab.deinit(self.base.allocator); + self.undefs.deinit(self.base.allocator); + self.tentatives.deinit(self.base.allocator); + self.imports.deinit(self.base.allocator); self.globals.deinit(self.base.allocator); self.globals_free_list.deinit(self.base.allocator); self.locals.deinit(self.base.allocator); self.locals_free_list.deinit(self.base.allocator); + self.symbol_resolver.deinit(self.base.allocator); + + for (self.objects.items) |object| { + object.deinit(); + self.base.allocator.destroy(object); + } + self.objects.deinit(self.base.allocator); + + for (self.archives.items) |archive| { + archive.deinit(); + self.base.allocator.destroy(archive); + } + self.archives.deinit(self.base.allocator); + + for (self.dylibs.items) |dylib| { + dylib.deinit(); + self.base.allocator.destroy(dylib); + } + self.dylibs.deinit(self.base.allocator); + for (self.load_commands.items) |*lc| { lc.deinit(self.base.allocator); } self.load_commands.deinit(self.base.allocator); + + for (self.managed_blocks.items) |block| { + block.deinit(self.base.allocator); + self.base.allocator.destroy(block); + } + self.managed_blocks.deinit(self.base.allocator); + self.blocks.deinit(self.base.allocator); + self.text_block_free_list.deinit(self.base.allocator); + + for (self.decls.keys()) |decl| { + decl.link.macho.deinit(self.base.allocator); + } + self.decls.deinit(self.base.allocator); +} + +pub fn closeFiles(self: MachO) void { + for (self.objects.items) |object| { + object.closeFile(); + } + for (self.archives.items) |archive| { + archive.closeFile(); + } } fn freeTextBlock(self: *MachO, text_block: *TextBlock) void { + log.debug("freeTextBlock {*}", .{text_block}); + text_block.deinit(self.base.allocator); + var already_have_free_list_node = false; { var i: usize = 0; @@ -1110,8 +3466,10 @@ fn growTextBlock(self: *MachO, text_block: *TextBlock, new_block_size: u64, alig pub fn allocateDeclIndexes(self: *MachO, decl: *Module.Decl) !void { if (decl.link.macho.local_sym_index != 0) return; - try self.locals.ensureCapacity(self.base.allocator, self.locals.items.len + 1); - try self.offset_table.ensureCapacity(self.base.allocator, self.offset_table.items.len + 1); + try self.locals.ensureUnusedCapacity(self.base.allocator, 1); + try self.got_entries.ensureUnusedCapacity(self.base.allocator, 1); + + try self.decls.putNoClobber(self.base.allocator, decl, {}); if (self.locals_free_list.popOrNull()) |i| { log.debug("reusing symbol index {d} for {s}", .{ i, decl.name }); @@ -1122,16 +3480,19 @@ pub fn allocateDeclIndexes(self: *MachO, decl: *Module.Decl) !void { _ = self.locals.addOneAssumeCapacity(); } - if (self.offset_table_free_list.popOrNull()) |i| { - log.debug("reusing offset table entry index {d} for {s}", .{ i, decl.name }); - decl.link.macho.offset_table_index = i; - } else { - log.debug("allocating offset table entry index {d} for {s}", .{ self.offset_table.items.len, decl.name }); - decl.link.macho.offset_table_index = @intCast(u32, self.offset_table.items.len); - _ = self.offset_table.addOneAssumeCapacity(); - self.offset_table_count_dirty = true; - self.rebase_info_dirty = true; - } + const got_index: u32 = blk: { + if (self.got_entries_free_list.popOrNull()) |i| { + log.debug("reusing GOT entry index {d} for {s}", .{ i, decl.name }); + break :blk i; + } else { + const got_index = @intCast(u32, self.got_entries.items.len); + log.debug("allocating GOT entry index {d} for {s}", .{ got_index, decl.name }); + _ = self.got_entries.addOneAssumeCapacity(); + self.got_entries_count_dirty = true; + self.rebase_info_dirty = true; + break :blk got_index; + } + }; self.locals.items[decl.link.macho.local_sym_index] = .{ .n_strx = 0, @@ -1140,11 +3501,12 @@ pub fn allocateDeclIndexes(self: *MachO, decl: *Module.Decl) !void { .n_desc = 0, .n_value = 0, }; - self.offset_table.items[decl.link.macho.offset_table_index] = .{ - .kind = .Local, - .symbol = decl.link.macho.local_sym_index, - .index = decl.link.macho.offset_table_index, + const got_entry = GotIndirectionKey{ + .where = .local, + .where_index = decl.link.macho.local_sym_index, }; + self.got_entries.items[got_index] = got_entry; + try self.got_entries_map.putNoClobber(self.base.allocator, got_entry, got_index); } pub fn updateFunc(self: *MachO, module: *Module, func: *Module.Fn, air: Air, liveness: Liveness) !void { @@ -1179,6 +3541,8 @@ pub fn updateFunc(self: *MachO, module: *Module, func: *Module.Fn, air: Air, liv } } + self.active_decl = decl; + const res = if (debug_buffers) |dbg| try codegen.generateFunction(&self.base, decl.srcLoc(), func, air, liveness, &code_buffer, .{ .dwarf = .{ @@ -1190,92 +3554,25 @@ pub fn updateFunc(self: *MachO, module: *Module, func: *Module.Fn, air: Air, liv else try codegen.generateFunction(&self.base, decl.srcLoc(), func, air, liveness, &code_buffer, .none); switch (res) { - .appended => {}, + .appended => { + // TODO clearing the code and relocs buffer should probably be orchestrated + // in a different, smarter, more automatic way somewhere else, in a more centralised + // way than this. + // If we don't clear the buffers here, we are up for some nasty surprises when + // this TextBlock is reused later on and was not freed by freeTextBlock(). + decl.link.macho.code.clearAndFree(self.base.allocator); + try decl.link.macho.code.appendSlice(self.base.allocator, code_buffer.items); + }, .fail => |em| { - // Clear any PIE fixups for this decl. - self.pie_fixups.shrinkRetainingCapacity(0); - // Clear any stub fixups for this decl. - self.stub_fixups.shrinkRetainingCapacity(0); decl.analysis = .codegen_failure; try module.failed_decls.put(module.gpa, decl, em); return; }, } - const symbol = try self.placeDecl(decl, code_buffer.items.len); - // Calculate displacements to target addr (if any). - while (self.pie_fixups.popOrNull()) |fixup| { - assert(fixup.size == 4); - const this_addr = symbol.n_value + fixup.offset; - const target_addr = fixup.target_addr; + const symbol = try self.placeDecl(decl, decl.link.macho.code.items.len); - switch (self.base.options.target.cpu.arch) { - .x86_64 => { - const displacement = try math.cast(u32, target_addr - this_addr - 4); - mem.writeIntLittle(u32, code_buffer.items[fixup.offset..][0..4], displacement); - }, - .aarch64 => { - // TODO optimize instruction based on jump length (use ldr(literal) + nop if possible). - { - const inst = code_buffer.items[fixup.offset..][0..4]; - const parsed = mem.bytesAsValue(meta.TagPayload( - aarch64.Instruction, - aarch64.Instruction.pc_relative_address, - ), inst); - const this_page = @intCast(i32, this_addr >> 12); - const target_page = @intCast(i32, target_addr >> 12); - const pages = @bitCast(u21, @intCast(i21, target_page - this_page)); - parsed.immhi = @truncate(u19, pages >> 2); - parsed.immlo = @truncate(u2, pages); - } - { - const inst = code_buffer.items[fixup.offset + 4 ..][0..4]; - const parsed = mem.bytesAsValue(meta.TagPayload( - aarch64.Instruction, - aarch64.Instruction.load_store_register, - ), inst); - const narrowed = @truncate(u12, target_addr); - const offset = try math.divExact(u12, narrowed, 8); - parsed.offset = offset; - } - }, - else => unreachable, // unsupported target architecture - } - } - - // Resolve stubs (if any) - const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment; - const stubs = text_segment.sections.items[self.stubs_section_index.?]; - for (self.stub_fixups.items) |fixup| { - const stub_addr = stubs.addr + fixup.symbol * stubs.reserved2; - const text_addr = symbol.n_value + fixup.start; - switch (self.base.options.target.cpu.arch) { - .x86_64 => { - assert(stub_addr >= text_addr + fixup.len); - const displacement = try math.cast(u32, stub_addr - text_addr - fixup.len); - const placeholder = code_buffer.items[fixup.start + fixup.len - @sizeOf(u32) ..][0..@sizeOf(u32)]; - mem.writeIntSliceLittle(u32, placeholder, displacement); - }, - .aarch64 => { - assert(stub_addr >= text_addr); - const displacement = try math.cast(i28, stub_addr - text_addr); - const placeholder = code_buffer.items[fixup.start..][0..fixup.len]; - mem.writeIntSliceLittle(u32, placeholder, aarch64.Instruction.bl(displacement).toU32()); - }, - else => unreachable, // unsupported target architecture - } - if (!fixup.already_defined) { - try self.writeStub(fixup.symbol); - try self.writeStubInStubHelper(fixup.symbol); - try self.writeLazySymbolPointer(fixup.symbol); - - self.rebase_info_dirty = true; - self.lazy_binding_info_dirty = true; - } - } - self.stub_fixups.shrinkRetainingCapacity(0); - - try self.writeCode(symbol, code_buffer.items); + try self.writeCode(symbol, decl.link.macho.code.items); if (debug_buffers) |db| { try self.d_sym.?.commitDeclDebugInfo( @@ -1327,6 +3624,8 @@ pub fn updateDecl(self: *MachO, module: *Module, decl: *Module.Decl) !void { } } + self.active_decl = decl; + const res = if (debug_buffers) |dbg| try codegen.generateSymbol(&self.base, decl.srcLoc(), .{ .ty = decl.ty, @@ -1344,18 +3643,27 @@ pub fn updateDecl(self: *MachO, module: *Module, decl: *Module.Decl) !void { .val = decl.val, }, &code_buffer, .none); - const code = switch (res) { - .externally_managed => |x| x, - .appended => code_buffer.items, - .fail => |em| { - decl.analysis = .codegen_failure; - try module.failed_decls.put(module.gpa, decl, em); - return; - }, + const code = blk: { + switch (res) { + .externally_managed => |x| break :blk x, + .appended => { + // TODO clearing the code and relocs buffer should probably be orchestrated + // in a different, smarter, more automatic way somewhere else, in a more centralised + // way than this. + // If we don't clear the buffers here, we are up for some nasty surprises when + // this TextBlock is reused later on and was not freed by freeTextBlock(). + decl.link.macho.code.clearAndFree(self.base.allocator); + try decl.link.macho.code.appendSlice(self.base.allocator, code_buffer.items); + break :blk decl.link.macho.code.items; + }, + .fail => |em| { + decl.analysis = .codegen_failure; + try module.failed_decls.put(module.gpa, decl, em); + return; + }, + } }; const symbol = try self.placeDecl(decl, code.len); - assert(self.pie_fixups.items.len == 0); - assert(self.stub_fixups.items.len == 0); try self.writeCode(symbol, code); @@ -1379,13 +3687,12 @@ fn placeDecl(self: *MachO, decl: *Module.Decl, code_len: usize) !*macho.nlist_64 log.debug("growing {s} and moving from 0x{x} to 0x{x}", .{ decl.name, symbol.n_value, vaddr }); if (vaddr != symbol.n_value) { - log.debug(" (writing new offset table entry)", .{}); - self.offset_table.items[decl.link.macho.offset_table_index] = .{ - .kind = .Local, - .symbol = decl.link.macho.local_sym_index, - .index = decl.link.macho.offset_table_index, - }; - try self.writeOffsetTableEntry(decl.link.macho.offset_table_index); + log.debug(" (writing new GOT entry)", .{}); + const got_index = self.got_entries_map.get(.{ + .where = .local, + .where_index = decl.link.macho.local_sym_index, + }) orelse unreachable; + try self.writeGotEntry(got_index); } symbol.n_value = vaddr; @@ -1397,7 +3704,7 @@ fn placeDecl(self: *MachO, decl: *Module.Decl, code_len: usize) !*macho.nlist_64 const new_name = try std.fmt.allocPrint(self.base.allocator, "_{s}", .{mem.spanZ(decl.name)}); defer self.base.allocator.free(new_name); - symbol.n_strx = try self.updateString(symbol.n_strx, new_name); + symbol.n_strx = try self.makeString(new_name); symbol.n_type = macho.N_SECT; symbol.n_sect = @intCast(u8, self.text_section_index.?) + 1; symbol.n_desc = 0; @@ -1423,16 +3730,35 @@ fn placeDecl(self: *MachO, decl: *Module.Decl, code_len: usize) !*macho.nlist_64 .n_desc = 0, .n_value = addr, }; - self.offset_table.items[decl.link.macho.offset_table_index] = .{ - .kind = .Local, - .symbol = decl.link.macho.local_sym_index, - .index = decl.link.macho.offset_table_index, - }; + const got_index = self.got_entries_map.get(.{ + .where = .local, + .where_index = decl.link.macho.local_sym_index, + }) orelse unreachable; + try self.writeGotEntry(got_index); try self.writeLocalSymbol(decl.link.macho.local_sym_index); if (self.d_sym) |*ds| try ds.writeLocalSymbol(decl.link.macho.local_sym_index); - try self.writeOffsetTableEntry(decl.link.macho.offset_table_index); + } + + // Resolve relocations + try decl.link.macho.resolveRelocs(self); + // TODO this requires further investigation: should we dispose of resolved relocs, or keep them + // so that we can reapply them when moving/growing sections? + decl.link.macho.relocs.clearAndFree(self.base.allocator); + + // Apply pending updates + while (self.pending_updates.popOrNull()) |update| { + switch (update.kind) { + .got => unreachable, + .stub => { + try self.writeStub(update.index); + try self.writeStubInStubHelper(update.index); + try self.writeLazySymbolPointer(update.index); + self.rebase_info_dirty = true; + self.lazy_binding_info_dirty = true; + }, + } } return symbol; @@ -1443,6 +3769,7 @@ fn writeCode(self: *MachO, symbol: *macho.nlist_64, code: []const u8) !void { const text_section = text_segment.sections.items[self.text_section_index.?]; const section_offset = symbol.n_value - text_section.addr; const file_offset = text_section.offset + section_offset; + log.debug("writing code for symbol {s} at file offset 0x{x}", .{ self.getString(symbol.n_strx), file_offset }); try self.base.file.?.pwriteAll(code, file_offset); } @@ -1518,7 +3845,7 @@ pub fn updateDeclExports( if (exp.link.macho.sym_index) |i| { const sym = &self.globals.items[i]; sym.* = .{ - .n_strx = try self.updateString(sym.n_strx, exp_name), + .n_strx = sym.n_strx, .n_type = n_type, .n_sect = @intCast(u8, self.text_section_index.?) + 1, .n_desc = n_desc, @@ -1529,7 +3856,7 @@ pub fn updateDeclExports( const i = if (self.globals_free_list.popOrNull()) |i| i else blk: { _ = self.globals.addOneAssumeCapacity(); self.export_info_dirty = true; - break :blk self.globals.items.len - 1; + break :blk @intCast(u32, self.globals.items.len - 1); }; self.globals.items[i] = .{ .n_strx = name_str_index, @@ -1538,6 +3865,12 @@ pub fn updateDeclExports( .n_desc = n_desc, .n_value = decl_sym.n_value, }; + const resolv = try self.symbol_resolver.getOrPut(self.base.allocator, name_str_index); + resolv.value_ptr.* = .{ + .where = .global, + .where_index = i, + .local_sym_index = decl.link.macho.local_sym_index, + }; exp.link.macho.sym_index = @intCast(u32, i); } @@ -1551,14 +3884,22 @@ pub fn deleteExport(self: *MachO, exp: Export) void { } pub fn freeDecl(self: *MachO, decl: *Module.Decl) void { + log.debug("freeDecl {*}", .{decl}); + _ = self.decls.swapRemove(decl); // Appending to free lists is allowed to fail because the free lists are heuristics based anyway. self.freeTextBlock(&decl.link.macho); if (decl.link.macho.local_sym_index != 0) { self.locals_free_list.append(self.base.allocator, decl.link.macho.local_sym_index) catch {}; - self.offset_table_free_list.append(self.base.allocator, decl.link.macho.offset_table_index) catch {}; + + const got_key = GotIndirectionKey{ + .where = .local, + .where_index = decl.link.macho.local_sym_index, + }; + const got_index = self.got_entries_map.get(got_key) orelse unreachable; + _ = self.got_entries_map.remove(got_key); + self.got_entries_free_list.append(self.base.allocator, got_index) catch {}; self.locals.items[decl.link.macho.local_sym_index].n_type = 0; - decl.link.macho.local_sym_index = 0; } if (self.d_sym) |*ds| { @@ -1598,54 +3939,6 @@ pub fn populateMissingMetadata(self: *MachO) !void { .Lib => return error.TODOImplementWritingLibFiles, } - if (self.header == null) { - var header: macho.mach_header_64 = undefined; - header.magic = macho.MH_MAGIC_64; - - const CpuInfo = struct { - cpu_type: macho.cpu_type_t, - cpu_subtype: macho.cpu_subtype_t, - }; - - const cpu_info: CpuInfo = switch (self.base.options.target.cpu.arch) { - .aarch64 => .{ - .cpu_type = macho.CPU_TYPE_ARM64, - .cpu_subtype = macho.CPU_SUBTYPE_ARM_ALL, - }, - .x86_64 => .{ - .cpu_type = macho.CPU_TYPE_X86_64, - .cpu_subtype = macho.CPU_SUBTYPE_X86_64_ALL, - }, - else => return error.UnsupportedMachOArchitecture, - }; - header.cputype = cpu_info.cpu_type; - header.cpusubtype = cpu_info.cpu_subtype; - - const filetype: u32 = switch (self.base.options.output_mode) { - .Exe => macho.MH_EXECUTE, - .Obj => macho.MH_OBJECT, - .Lib => switch (self.base.options.link_mode) { - .Static => return error.TODOStaticLibMachOType, - .Dynamic => macho.MH_DYLIB, - }, - }; - header.filetype = filetype; - // These will get populated at the end of flushing the results to file. - header.ncmds = 0; - header.sizeofcmds = 0; - - switch (self.base.options.output_mode) { - .Exe => { - header.flags = macho.MH_NOUNDEFS | macho.MH_DYLDLINK | macho.MH_PIE; - }, - else => { - header.flags = 0; - }, - } - header.reserved = 0; - self.header = header; - self.header_dirty = true; - } if (self.pagezero_segment_cmd_index == null) { self.pagezero_segment_cmd_index = @intCast(u16, self.load_commands.items.len); try self.load_commands.append(self.base.allocator, .{ @@ -1653,7 +3946,6 @@ pub fn populateMissingMetadata(self: *MachO) !void { .vmsize = 0x100000000, // size always set to 4GB }), }); - self.header_dirty = true; self.load_commands_dirty = true; } if (self.text_segment_cmd_index == null) { @@ -1662,8 +3954,8 @@ pub fn populateMissingMetadata(self: *MachO) !void { const initprot = macho.VM_PROT_READ | macho.VM_PROT_EXECUTE; const program_code_size_hint = self.base.options.program_code_size_hint; - const offset_table_size_hint = @sizeOf(u64) * self.base.options.symbol_count_hint; - const ideal_size = self.header_pad + program_code_size_hint + 3 * offset_table_size_hint; + const got_size_hint = @sizeOf(u64) * self.base.options.symbol_count_hint; + const ideal_size = self.header_pad + program_code_size_hint + 3 * got_size_hint; const needed_size = mem.alignForwardGeneric(u64, padToIdeal(ideal_size), self.page_size); log.debug("found __TEXT segment free space 0x{x} to 0x{x}", .{ 0, needed_size }); @@ -1677,7 +3969,6 @@ pub fn populateMissingMetadata(self: *MachO) !void { .initprot = initprot, }), }); - self.header_dirty = true; self.load_commands_dirty = true; } if (self.text_section_index == null) { @@ -1702,7 +3993,6 @@ pub fn populateMissingMetadata(self: *MachO) !void { .@"align" = alignment, .flags = flags, }); - self.header_dirty = true; self.load_commands_dirty = true; } if (self.stubs_section_index == null) { @@ -1734,7 +4024,6 @@ pub fn populateMissingMetadata(self: *MachO) !void { .flags = flags, .reserved2 = stub_size, }); - self.header_dirty = true; self.load_commands_dirty = true; } if (self.stub_helper_section_index == null) { @@ -1760,7 +4049,6 @@ pub fn populateMissingMetadata(self: *MachO) !void { .@"align" = alignment, .flags = flags, }); - self.header_dirty = true; self.load_commands_dirty = true; } if (self.data_const_segment_cmd_index == null) { @@ -1784,7 +4072,6 @@ pub fn populateMissingMetadata(self: *MachO) !void { .initprot = initprot, }), }); - self.header_dirty = true; self.load_commands_dirty = true; } if (self.got_section_index == null) { @@ -1805,7 +4092,6 @@ pub fn populateMissingMetadata(self: *MachO) !void { .@"align" = 3, // 2^3 = @sizeOf(u64) .flags = flags, }); - self.header_dirty = true; self.load_commands_dirty = true; } if (self.data_segment_cmd_index == null) { @@ -1829,7 +4115,6 @@ pub fn populateMissingMetadata(self: *MachO) !void { .initprot = initprot, }), }); - self.header_dirty = true; self.load_commands_dirty = true; } if (self.la_symbol_ptr_section_index == null) { @@ -1850,7 +4135,6 @@ pub fn populateMissingMetadata(self: *MachO) !void { .@"align" = 3, // 2^3 = @sizeOf(u64) .flags = flags, }); - self.header_dirty = true; self.load_commands_dirty = true; } if (self.data_section_index == null) { @@ -1869,7 +4153,6 @@ pub fn populateMissingMetadata(self: *MachO) !void { .offset = @intCast(u32, off), .@"align" = 3, // 2^3 = @sizeOf(u64) }); - self.header_dirty = true; self.load_commands_dirty = true; } if (self.linkedit_segment_cmd_index == null) { @@ -1889,7 +4172,6 @@ pub fn populateMissingMetadata(self: *MachO) !void { .initprot = initprot, }), }); - self.header_dirty = true; self.load_commands_dirty = true; } if (self.dyld_info_cmd_index == null) { @@ -1936,7 +4218,6 @@ pub fn populateMissingMetadata(self: *MachO) !void { dyld.export_off = @intCast(u32, export_off); dyld.export_size = expected_size; - self.header_dirty = true; self.load_commands_dirty = true; } if (self.symtab_cmd_index == null) { @@ -1961,16 +4242,15 @@ pub fn populateMissingMetadata(self: *MachO) !void { symtab.symoff = @intCast(u32, symtab_off); symtab.nsyms = @intCast(u32, self.base.options.symbol_count_hint); - try self.string_table.append(self.base.allocator, 0); // Need a null at position 0. - const strtab_size = self.string_table.items.len; + try self.strtab.append(self.base.allocator, 0); + const strtab_size = self.strtab.items.len; const strtab_off = self.findFreeSpaceLinkedit(strtab_size, 1, symtab_off); log.debug("found string table free space 0x{x} to 0x{x}", .{ strtab_off, strtab_off + strtab_size }); symtab.stroff = @intCast(u32, strtab_off); symtab.strsize = @intCast(u32, strtab_size); - self.header_dirty = true; self.load_commands_dirty = true; - self.string_table_dirty = true; + self.strtab_dirty = true; } if (self.dysymtab_cmd_index == null) { self.dysymtab_cmd_index = @intCast(u16, self.load_commands.items.len); @@ -2005,7 +4285,6 @@ pub fn populateMissingMetadata(self: *MachO) !void { .nlocrel = 0, }, }); - self.header_dirty = true; self.load_commands_dirty = true; } if (self.dylinker_cmd_index == null) { @@ -2015,7 +4294,7 @@ pub fn populateMissingMetadata(self: *MachO) !void { @sizeOf(macho.dylinker_command) + mem.lenZ(DEFAULT_DYLD_PATH), @sizeOf(u64), )); - var dylinker_cmd = emptyGenericCommandWithData(macho.dylinker_command{ + var dylinker_cmd = commands.emptyGenericCommandWithData(macho.dylinker_command{ .cmd = macho.LC_LOAD_DYLINKER, .cmdsize = cmdsize, .name = @sizeOf(macho.dylinker_command), @@ -2024,18 +4303,16 @@ pub fn populateMissingMetadata(self: *MachO) !void { mem.set(u8, dylinker_cmd.data, 0); mem.copy(u8, dylinker_cmd.data, mem.spanZ(DEFAULT_DYLD_PATH)); try self.load_commands.append(self.base.allocator, .{ .Dylinker = dylinker_cmd }); - self.header_dirty = true; self.load_commands_dirty = true; } if (self.libsystem_cmd_index == null) { self.libsystem_cmd_index = @intCast(u16, self.load_commands.items.len); - var dylib_cmd = try createLoadDylibCommand(self.base.allocator, mem.spanZ(LIB_SYSTEM_PATH), 2, 0, 0); + var dylib_cmd = try commands.createLoadDylibCommand(self.base.allocator, mem.spanZ(LIB_SYSTEM_PATH), 2, 0, 0); errdefer dylib_cmd.deinit(self.base.allocator); try self.load_commands.append(self.base.allocator, .{ .Dylib = dylib_cmd }); - self.header_dirty = true; self.load_commands_dirty = true; } if (self.main_cmd_index == null) { @@ -2048,7 +4325,6 @@ pub fn populateMissingMetadata(self: *MachO) !void { .stacksize = 0, }, }); - self.header_dirty = true; self.load_commands_dirty = true; } if (self.version_min_cmd_index == null) { @@ -2070,7 +4346,6 @@ pub fn populateMissingMetadata(self: *MachO) !void { .sdk = version, }, }); - self.header_dirty = true; self.load_commands_dirty = true; } if (self.source_version_cmd_index == null) { @@ -2082,7 +4357,6 @@ pub fn populateMissingMetadata(self: *MachO) !void { .version = 0x0, }, }); - self.header_dirty = true; self.load_commands_dirty = true; } if (self.uuid_cmd_index == null) { @@ -2094,7 +4368,6 @@ pub fn populateMissingMetadata(self: *MachO) !void { }; std.crypto.random.bytes(&uuid_cmd.uuid); try self.load_commands.append(self.base.allocator, .{ .Uuid = uuid_cmd }); - self.header_dirty = true; self.load_commands_dirty = true; } if (self.code_signature_cmd_index == null) { @@ -2107,31 +4380,32 @@ pub fn populateMissingMetadata(self: *MachO) !void { .datasize = 0, }, }); - self.header_dirty = true; self.load_commands_dirty = true; } - if (!self.nonlazy_imports.contains("dyld_stub_binder")) { - const index = @intCast(u32, self.nonlazy_imports.count()); - const name = try self.base.allocator.dupe(u8, "dyld_stub_binder"); - const offset = try self.makeString("dyld_stub_binder"); - try self.nonlazy_imports.putNoClobber(self.base.allocator, name, .{ - .symbol = .{ - .n_strx = offset, - .n_type = std.macho.N_UNDF | std.macho.N_EXT, - .n_sect = 0, - .n_desc = std.macho.REFERENCE_FLAG_UNDEFINED_NON_LAZY | std.macho.N_SYMBOL_RESOLVER, - .n_value = 0, - }, - .dylib_ordinal = 1, // TODO this is currently hardcoded. - .index = index, + if (!self.strtab_dir.containsAdapted(@as([]const u8, "dyld_stub_binder"), StringSliceAdapter{ + .strtab = &self.strtab, + })) { + const import_sym_index = @intCast(u32, self.imports.items.len); + const n_strx = try self.makeString("dyld_stub_binder"); + try self.imports.append(self.base.allocator, .{ + .n_strx = n_strx, + .n_type = macho.N_UNDF | macho.N_EXT, + .n_sect = 0, + .n_desc = packDylibOrdinal(1), + .n_value = 0, }); - const off_index = @intCast(u32, self.offset_table.items.len); - try self.offset_table.append(self.base.allocator, .{ - .kind = .Extern, - .symbol = index, - .index = off_index, + try self.symbol_resolver.putNoClobber(self.base.allocator, n_strx, .{ + .where = .import, + .where_index = import_sym_index, }); - try self.writeOffsetTableEntry(off_index); + const got_key = GotIndirectionKey{ + .where = .import, + .where_index = import_sym_index, + }; + const got_index = @intCast(u32, self.got_entries.items.len); + try self.got_entries.append(self.base.allocator, got_key); + try self.got_entries_map.putNoClobber(self.base.allocator, got_key, got_index); + try self.writeGotEntry(got_index); self.binding_info_dirty = true; } if (self.stub_helper_stubs_start_off == null) { @@ -2172,7 +4446,8 @@ fn allocateTextBlock(self: *MachO, text_block: *TextBlock, new_block_size: u64, // should be deleted because the block that it points to has grown to take up // more of the extra capacity. if (!big_block.freeListEligible(self.*)) { - _ = self.text_block_free_list.swapRemove(i); + const bl = self.text_block_free_list.swapRemove(i); + bl.deinit(self.base.allocator); } else { i += 1; } @@ -2244,64 +4519,45 @@ fn allocateTextBlock(self: *MachO, text_block: *TextBlock, new_block_size: u64, return vaddr; } -fn makeString(self: *MachO, bytes: []const u8) !u32 { - if (self.string_table_directory.get(bytes)) |offset| { - log.debug("reusing '{s}' from string table at offset 0x{x}", .{ bytes, offset }); - return offset; +pub fn addExternFn(self: *MachO, name: []const u8) !u32 { + const sym_name = try std.fmt.allocPrint(self.base.allocator, "_{s}", .{name}); + defer self.base.allocator.free(sym_name); + + if (self.strtab_dir.getAdapted(@as([]const u8, sym_name), StringSliceAdapter{ + .strtab = &self.strtab, + })) |n_strx| { + const resolv = self.symbol_resolver.get(n_strx) orelse unreachable; + return resolv.where_index; } - try self.string_table.ensureCapacity(self.base.allocator, self.string_table.items.len + bytes.len + 1); - const offset = @intCast(u32, self.string_table.items.len); - - log.debug("writing new string '{s}' into string table at offset 0x{x}", .{ bytes, offset }); - - self.string_table.appendSliceAssumeCapacity(bytes); - self.string_table.appendAssumeCapacity(0); - - try self.string_table_directory.putNoClobber( - self.base.allocator, - try self.base.allocator.dupe(u8, bytes), - offset, - ); - - self.string_table_dirty = true; - if (self.d_sym) |*ds| - ds.string_table_dirty = true; - - return offset; -} - -fn getString(self: *MachO, str_off: u32) []const u8 { - assert(str_off < self.string_table.items.len); - return mem.spanZ(@ptrCast([*:0]const u8, self.string_table.items.ptr + str_off)); -} - -fn updateString(self: *MachO, old_str_off: u32, new_name: []const u8) !u32 { - const existing_name = self.getString(old_str_off); - if (mem.eql(u8, existing_name, new_name)) { - return old_str_off; - } - return self.makeString(new_name); -} - -pub fn addExternSymbol(self: *MachO, name: []const u8) !u32 { - const index = @intCast(u32, self.lazy_imports.count()); - const offset = try self.makeString(name); - const sym_name = try self.base.allocator.dupe(u8, name); - const dylib_ordinal = 1; // TODO this is now hardcoded, since we only support libSystem. - try self.lazy_imports.putNoClobber(self.base.allocator, sym_name, .{ - .symbol = .{ - .n_strx = offset, - .n_type = macho.N_UNDF | macho.N_EXT, - .n_sect = 0, - .n_desc = macho.REFERENCE_FLAG_UNDEFINED_NON_LAZY | macho.N_SYMBOL_RESOLVER, - .n_value = 0, - }, - .dylib_ordinal = dylib_ordinal, - .index = index, + log.debug("adding new extern function '{s}' with dylib ordinal 1", .{sym_name}); + const import_sym_index = @intCast(u32, self.imports.items.len); + const n_strx = try self.makeString(sym_name); + try self.imports.append(self.base.allocator, .{ + .n_strx = n_strx, + .n_type = macho.N_UNDF | macho.N_EXT, + .n_sect = 0, + .n_desc = packDylibOrdinal(1), + .n_value = 0, }); - log.debug("adding new extern symbol '{s}' with dylib ordinal '{}'", .{ name, dylib_ordinal }); - return index; + try self.symbol_resolver.putNoClobber(self.base.allocator, n_strx, .{ + .where = .import, + .where_index = import_sym_index, + }); + + const stubs_index = @intCast(u32, self.stubs.items.len); + try self.stubs.append(self.base.allocator, import_sym_index); + try self.stubs_map.putNoClobber(self.base.allocator, import_sym_index, stubs_index); + + // TODO discuss this. The caller context expects codegen.InnerError{ OutOfMemory, CodegenFail }, + // which obviously doesn't include file writing op errors. So instead of trying to write the stub + // entry right here and now, queue it up and dispose of when updating decl. + try self.pending_updates.append(self.base.allocator, .{ + .kind = .stub, + .index = stubs_index, + }); + + return import_sym_index; } const NextSegmentAddressAndOffset = struct { @@ -2455,29 +4711,26 @@ fn findFreeSpaceLinkedit(self: *MachO, object_size: u64, min_alignment: u16, sta return st; } -fn writeOffsetTableEntry(self: *MachO, index: usize) !void { +fn writeGotEntry(self: *MachO, index: usize) !void { const seg = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; const sect = &seg.sections.items[self.got_section_index.?]; const off = sect.offset + @sizeOf(u64) * index; - if (self.offset_table_count_dirty) { + if (self.got_entries_count_dirty) { // TODO relocate. - self.offset_table_count_dirty = false; + self.got_entries_count_dirty = false; } - const got_entry = self.offset_table.items[index]; - const sym = blk: { - switch (got_entry.kind) { - .Local => { - break :blk self.locals.items[got_entry.symbol]; - }, - .Extern => { - break :blk self.nonlazy_imports.values()[got_entry.symbol].symbol; - }, - } + const got_entry = self.got_entries.items[index]; + const sym = switch (got_entry.where) { + .local => self.locals.items[got_entry.where_index], + .import => self.imports.items[got_entry.where_index], }; - const sym_name = self.getString(sym.n_strx); - log.debug("writing offset table entry [ 0x{x} => 0x{x} ({s}) ]", .{ off, sym.n_value, sym_name }); + log.debug("writing offset table entry [ 0x{x} => 0x{x} ({s}) ]", .{ + off, + sym.n_value, + self.getString(sym.n_strx), + }); try self.base.file.?.pwriteAll(mem.asBytes(&sym.n_value), off); } @@ -2755,7 +5008,7 @@ fn relocateSymbolTable(self: *MachO) !void { const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab; const nlocals = self.locals.items.len; const nglobals = self.globals.items.len; - const nundefs = self.lazy_imports.count() + self.nonlazy_imports.count(); + const nundefs = self.imports.items.len; const nsyms = nlocals + nglobals + nundefs; if (symtab.nsyms < nsyms) { @@ -2775,7 +5028,7 @@ fn relocateSymbolTable(self: *MachO) !void { const amt = try self.base.file.?.copyRangeAll(symtab.symoff, self.base.file.?, new_symoff, existing_size); if (amt != existing_size) return error.InputOutput; symtab.symoff = @intCast(u32, new_symoff); - self.string_table_needs_relocation = true; + self.strtab_needs_relocation = true; } symtab.nsyms = @intCast(u32, nsyms); self.load_commands_dirty = true; @@ -2800,17 +5053,7 @@ fn writeAllGlobalAndUndefSymbols(self: *MachO) !void { const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab; const nlocals = self.locals.items.len; const nglobals = self.globals.items.len; - - const nundefs = self.lazy_imports.count() + self.nonlazy_imports.count(); - var undefs = std.ArrayList(macho.nlist_64).init(self.base.allocator); - defer undefs.deinit(); - try undefs.ensureCapacity(nundefs); - for (self.lazy_imports.values()) |*value| { - undefs.appendAssumeCapacity(value.symbol); - } - for (self.nonlazy_imports.values()) |*value| { - undefs.appendAssumeCapacity(value.symbol); - } + const nundefs = self.imports.items.len; const locals_off = symtab.symoff; const locals_size = nlocals * @sizeOf(macho.nlist_64); @@ -2823,7 +5066,7 @@ fn writeAllGlobalAndUndefSymbols(self: *MachO) !void { const undefs_off = globals_off + globals_size; const undefs_size = nundefs * @sizeOf(macho.nlist_64); log.debug("writing extern symbols from 0x{x} to 0x{x}", .{ undefs_off, undefs_size + undefs_off }); - try self.base.file.?.pwriteAll(mem.sliceAsBytes(undefs.items), undefs_off); + try self.base.file.?.pwriteAll(mem.sliceAsBytes(self.imports.items), undefs_off); // Update dynamic symbol table. const dysymtab = &self.load_commands.items[self.dysymtab_cmd_index.?].Dysymtab; @@ -2849,10 +5092,10 @@ fn writeIndirectSymbolTable(self: *MachO) !void { const la_symbol_ptr = &data_segment.sections.items[self.la_symbol_ptr_section_index.?]; const dysymtab = &self.load_commands.items[self.dysymtab_cmd_index.?].Dysymtab; - const lazy_count = self.lazy_imports.count(); - const got_entries = self.offset_table.items; + const nstubs = @intCast(u32, self.stubs.items.len); + const ngot_entries = @intCast(u32, self.got_entries.items.len); const allocated_size = self.allocatedSizeLinkedit(dysymtab.indirectsymoff); - const nindirectsyms = @intCast(u32, lazy_count * 2 + got_entries.len); + const nindirectsyms = nstubs * 2 + ngot_entries; const needed_size = @intCast(u32, nindirectsyms * @sizeOf(u32)); if (needed_size > allocated_size) { @@ -2871,41 +5114,85 @@ fn writeIndirectSymbolTable(self: *MachO) !void { var writer = stream.writer(); stubs.reserved1 = 0; - { - var i: usize = 0; - while (i < lazy_count) : (i += 1) { - const symtab_idx = @intCast(u32, dysymtab.iundefsym + i); - try writer.writeIntLittle(u32, symtab_idx); - } + for (self.stubs.items) |id| { + try writer.writeIntLittle(u32, dysymtab.iundefsym + id); } - const base_id = @intCast(u32, lazy_count); - got.reserved1 = base_id; - for (got_entries) |entry| { - switch (entry.kind) { - .Local => { + got.reserved1 = nstubs; + for (self.got_entries.items) |entry| { + switch (entry.where) { + .import => { + try writer.writeIntLittle(u32, dysymtab.iundefsym + entry.where_index); + }, + .local => { try writer.writeIntLittle(u32, macho.INDIRECT_SYMBOL_LOCAL); }, - .Extern => { - const symtab_idx = @intCast(u32, dysymtab.iundefsym + entry.index + base_id); - try writer.writeIntLittle(u32, symtab_idx); - }, } } - la_symbol_ptr.reserved1 = got.reserved1 + @intCast(u32, got_entries.len); - { - var i: usize = 0; - while (i < lazy_count) : (i += 1) { - const symtab_idx = @intCast(u32, dysymtab.iundefsym + i); - try writer.writeIntLittle(u32, symtab_idx); - } + la_symbol_ptr.reserved1 = got.reserved1 + ngot_entries; + for (self.stubs.items) |id| { + try writer.writeIntLittle(u32, dysymtab.iundefsym + id); } try self.base.file.?.pwriteAll(buf, dysymtab.indirectsymoff); self.load_commands_dirty = true; } +fn writeDices(self: *MachO) !void { + if (!self.has_dices) return; + + const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; + const dice_cmd = &self.load_commands.items[self.data_in_code_cmd_index.?].LinkeditData; + const fileoff = seg.inner.fileoff + seg.inner.filesize; + + var buf = std.ArrayList(u8).init(self.base.allocator); + defer buf.deinit(); + + var block: *TextBlock = self.blocks.get(.{ + .seg = self.text_segment_cmd_index orelse return, + .sect = self.text_section_index orelse return, + }) orelse return; + + while (block.prev) |prev| { + block = prev; + } + + const text_seg = self.load_commands.items[self.text_segment_cmd_index.?].Segment; + const text_sect = text_seg.sections.items[self.text_section_index.?]; + + while (true) { + if (block.dices.items.len > 0) { + const sym = self.locals.items[block.local_sym_index]; + const base_off = try math.cast(u32, sym.n_value - text_sect.addr + text_sect.offset); + + try buf.ensureUnusedCapacity(block.dices.items.len * @sizeOf(macho.data_in_code_entry)); + for (block.dices.items) |dice| { + const rebased_dice = macho.data_in_code_entry{ + .offset = base_off + dice.offset, + .length = dice.length, + .kind = dice.kind, + }; + buf.appendSliceAssumeCapacity(mem.asBytes(&rebased_dice)); + } + } + + if (block.next) |next| { + block = next; + } else break; + } + + const datasize = @intCast(u32, buf.items.len); + + dice_cmd.dataoff = @intCast(u32, fileoff); + dice_cmd.datasize = datasize; + seg.inner.filesize += datasize; + + log.debug("writing data-in-code from 0x{x} to 0x{x}", .{ fileoff, fileoff + datasize }); + + try self.base.file.?.pwriteAll(buf.items, fileoff); +} + fn writeCodeSignaturePadding(self: *MachO) !void { // TODO figure out how not to rewrite padding every single time. const tracy = trace(@src()); @@ -2961,7 +5248,7 @@ fn writeCodeSignature(self: *MachO) !void { try self.base.file.?.pwriteAll(buffer, code_sig_cmd.dataoff); } -fn writeExportTrie(self: *MachO) !void { +fn writeExportInfo(self: *MachO) !void { if (!self.export_info_dirty) return; if (self.globals.items.len == 0) return; @@ -2972,13 +5259,18 @@ fn writeExportTrie(self: *MachO) !void { defer trie.deinit(); const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment; - for (self.globals.items) |symbol| { - // TODO figure out if we should put all global symbols into the export trie - const name = self.getString(symbol.n_strx); - assert(symbol.n_value >= text_segment.inner.vmaddr); + const base_address = text_segment.inner.vmaddr; + + // TODO handle macho.EXPORT_SYMBOL_FLAGS_REEXPORT and macho.EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER. + log.debug("writing export trie", .{}); + + for (self.globals.items) |sym| { + const sym_name = self.getString(sym.n_strx); + log.debug(" | putting '{s}' defined at 0x{x}", .{ sym_name, sym.n_value }); + try trie.put(.{ - .name = name, - .vmaddr_offset = symbol.n_value - text_segment.inner.vmaddr, + .name = sym_name, + .vmaddr_offset = sym.n_value - base_address, .export_flags = macho.EXPORT_SYMBOL_FLAGS_KIND_REGULAR, }); } @@ -3016,31 +5308,60 @@ fn writeRebaseInfoTable(self: *MachO) !void { var pointers = std.ArrayList(bind.Pointer).init(self.base.allocator); defer pointers.deinit(); + { + var it = self.blocks.iterator(); + while (it.next()) |entry| { + const match = entry.key_ptr.*; + var block: *TextBlock = entry.value_ptr.*; + + if (match.seg == self.text_segment_cmd_index.?) continue; // __TEXT is non-writable + + const seg = self.load_commands.items[match.seg].Segment; + + while (true) { + const sym = self.locals.items[block.local_sym_index]; + const base_offset = sym.n_value - seg.inner.vmaddr; + + for (block.rebases.items) |offset| { + try pointers.append(.{ + .offset = base_offset + offset, + .segment_id = match.seg, + }); + } + + if (block.prev) |prev| { + block = prev; + } else break; + } + } + } + if (self.got_section_index) |idx| { const seg = self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; const sect = seg.sections.items[idx]; const base_offset = sect.addr - seg.inner.vmaddr; - const segment_id = self.data_const_segment_cmd_index.?; + const segment_id = @intCast(u16, self.data_const_segment_cmd_index.?); + + for (self.got_entries.items) |entry, i| { + if (entry.where == .import) continue; - for (self.offset_table.items) |entry| { - if (entry.kind == .Extern) continue; try pointers.append(.{ - .offset = base_offset + entry.index * @sizeOf(u64), + .offset = base_offset + i * @sizeOf(u64), .segment_id = segment_id, }); } } if (self.la_symbol_ptr_section_index) |idx| { - try pointers.ensureCapacity(pointers.items.len + self.lazy_imports.count()); const seg = self.load_commands.items[self.data_segment_cmd_index.?].Segment; const sect = seg.sections.items[idx]; const base_offset = sect.addr - seg.inner.vmaddr; - const segment_id = self.data_segment_cmd_index.?; + const segment_id = @intCast(u16, self.data_segment_cmd_index.?); - for (self.lazy_imports.values()) |*value| { + try pointers.ensureUnusedCapacity(self.stubs.items.len); + for (self.stubs.items) |_, i| { pointers.appendAssumeCapacity(.{ - .offset = base_offset + value.index * @sizeOf(u64), + .offset = base_offset + i * @sizeOf(u64), .segment_id = segment_id, }); } @@ -3073,7 +5394,7 @@ fn writeRebaseInfoTable(self: *MachO) !void { self.rebase_info_dirty = false; } -fn writeBindingInfoTable(self: *MachO) !void { +fn writeBindInfoTable(self: *MachO) !void { if (!self.binding_info_dirty) return; const tracy = trace(@src()); @@ -3088,19 +5409,50 @@ fn writeBindingInfoTable(self: *MachO) !void { const base_offset = sect.addr - seg.inner.vmaddr; const segment_id = @intCast(u16, self.data_const_segment_cmd_index.?); - for (self.offset_table.items) |entry| { - if (entry.kind == .Local) continue; - const import_key = self.nonlazy_imports.keys()[entry.symbol]; - const import_ordinal = self.nonlazy_imports.values()[entry.symbol].dylib_ordinal; + for (self.got_entries.items) |entry, i| { + if (entry.where == .local) continue; + + const sym = self.imports.items[entry.where_index]; try pointers.append(.{ - .offset = base_offset + entry.index * @sizeOf(u64), + .offset = base_offset + i * @sizeOf(u64), .segment_id = segment_id, - .dylib_ordinal = import_ordinal, - .name = import_key, + .dylib_ordinal = unpackDylibOrdinal(sym.n_desc), + .name = self.getString(sym.n_strx), }); } } + { + var it = self.blocks.iterator(); + while (it.next()) |entry| { + const match = entry.key_ptr.*; + var block: *TextBlock = entry.value_ptr.*; + + if (match.seg == self.text_segment_cmd_index.?) continue; // __TEXT is non-writable + + const seg = self.load_commands.items[match.seg].Segment; + + while (true) { + const sym = self.locals.items[block.local_sym_index]; + const base_offset = sym.n_value - seg.inner.vmaddr; + + for (block.bindings.items) |binding| { + const bind_sym = self.imports.items[binding.local_sym_index]; + try pointers.append(.{ + .offset = binding.offset + base_offset, + .segment_id = match.seg, + .dylib_ordinal = unpackDylibOrdinal(bind_sym.n_desc), + .name = self.getString(bind_sym.n_strx), + }); + } + + if (block.prev) |prev| { + block = prev; + } else break; + } + } + } + const size = try bind.bindInfoSize(pointers.items); var buffer = try self.base.allocator.alloc(u8, @intCast(usize, size)); defer self.base.allocator.free(buffer); @@ -3126,7 +5478,7 @@ fn writeBindingInfoTable(self: *MachO) !void { self.binding_info_dirty = false; } -fn writeLazyBindingInfoTable(self: *MachO) !void { +fn writeLazyBindInfoTable(self: *MachO) !void { if (!self.lazy_binding_info_dirty) return; const tracy = trace(@src()); @@ -3136,21 +5488,20 @@ fn writeLazyBindingInfoTable(self: *MachO) !void { defer pointers.deinit(); if (self.la_symbol_ptr_section_index) |idx| { - try pointers.ensureCapacity(self.lazy_imports.count()); const seg = self.load_commands.items[self.data_segment_cmd_index.?].Segment; const sect = seg.sections.items[idx]; const base_offset = sect.addr - seg.inner.vmaddr; const segment_id = @intCast(u16, self.data_segment_cmd_index.?); - const slice = self.lazy_imports.entries.slice(); - const keys = slice.items(.key); - const values = slice.items(.value); - for (keys) |*key, i| { + try pointers.ensureUnusedCapacity(self.stubs.items.len); + + for (self.stubs.items) |import_id, i| { + const sym = self.imports.items[import_id]; pointers.appendAssumeCapacity(.{ - .offset = base_offset + values[i].index * @sizeOf(u64), + .offset = base_offset + i * @sizeOf(u64), .segment_id = segment_id, - .dylib_ordinal = values[i].dylib_ordinal, - .name = key.*, + .dylib_ordinal = unpackDylibOrdinal(sym.n_desc), + .name = self.getString(sym.n_strx), }); } } @@ -3182,7 +5533,7 @@ fn writeLazyBindingInfoTable(self: *MachO) !void { } fn populateLazyBindOffsetsInStubHelper(self: *MachO, buffer: []const u8) !void { - if (self.lazy_imports.count() == 0) return; + if (self.stubs.items.len == 0) return; var stream = std.io.fixedBufferStream(buffer); var reader = stream.reader(); @@ -3227,7 +5578,7 @@ fn populateLazyBindOffsetsInStubHelper(self: *MachO, buffer: []const u8) !void { else => {}, } } - assert(self.lazy_imports.count() <= offsets.items.len); + assert(self.stubs.items.len <= offsets.items.len); const stub_size: u4 = switch (self.base.options.target.cpu.arch) { .x86_64 => 10, @@ -3240,34 +5591,51 @@ fn populateLazyBindOffsetsInStubHelper(self: *MachO, buffer: []const u8) !void { else => unreachable, }; var buf: [@sizeOf(u32)]u8 = undefined; - for (offsets.items[0..self.lazy_imports.count()]) |offset, i| { - const placeholder_off = self.stub_helper_stubs_start_off.? + i * stub_size + off; - mem.writeIntLittle(u32, &buf, offset); + for (self.stubs.items) |_, index| { + const placeholder_off = self.stub_helper_stubs_start_off.? + index * stub_size + off; + mem.writeIntLittle(u32, &buf, offsets.items[index]); try self.base.file.?.pwriteAll(&buf, placeholder_off); } } fn writeStringTable(self: *MachO) !void { - if (!self.string_table_dirty) return; + if (!self.strtab_dirty) return; const tracy = trace(@src()); defer tracy.end(); const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab; const allocated_size = self.allocatedSizeLinkedit(symtab.stroff); - const needed_size = mem.alignForwardGeneric(u64, self.string_table.items.len, @alignOf(u64)); + const needed_size = mem.alignForwardGeneric(u64, self.strtab.items.len, @alignOf(u64)); - if (needed_size > allocated_size or self.string_table_needs_relocation) { + if (needed_size > allocated_size or self.strtab_needs_relocation) { symtab.strsize = 0; symtab.stroff = @intCast(u32, self.findFreeSpaceLinkedit(needed_size, 1, symtab.symoff)); - self.string_table_needs_relocation = false; + self.strtab_needs_relocation = false; } symtab.strsize = @intCast(u32, needed_size); log.debug("writing string table from 0x{x} to 0x{x}", .{ symtab.stroff, symtab.stroff + symtab.strsize }); - try self.base.file.?.pwriteAll(self.string_table.items, symtab.stroff); + try self.base.file.?.pwriteAll(self.strtab.items, symtab.stroff); self.load_commands_dirty = true; - self.string_table_dirty = false; + self.strtab_dirty = false; +} + +fn writeStringTableZld(self: *MachO) !void { + const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; + const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab; + symtab.stroff = @intCast(u32, seg.inner.fileoff + seg.inner.filesize); + symtab.strsize = @intCast(u32, mem.alignForwardGeneric(u64, self.strtab.items.len, @alignOf(u64))); + seg.inner.filesize += symtab.strsize; + + log.debug("writing string table from 0x{x} to 0x{x}", .{ symtab.stroff, symtab.stroff + symtab.strsize }); + + try self.base.file.?.pwriteAll(self.strtab.items, symtab.stroff); + + if (symtab.strsize > self.strtab.items.len and self.base.options.target.cpu.arch == .x86_64) { + // This is the last section, so we need to pad it out. + try self.base.file.?.pwriteAll(&[_]u8{0}, seg.inner.fileoff + seg.inner.filesize - 1); + } } fn updateLinkeditSegmentSizes(self: *MachO) !void { @@ -3334,24 +5702,57 @@ fn writeLoadCommands(self: *MachO) !void { } const off = @sizeOf(macho.mach_header_64); + log.debug("writing {} load commands from 0x{x} to 0x{x}", .{ self.load_commands.items.len, off, off + sizeofcmds }); + try self.base.file.?.pwriteAll(buffer, off); self.load_commands_dirty = false; } /// Writes Mach-O file header. fn writeHeader(self: *MachO) !void { - if (!self.header_dirty) return; + var header = commands.emptyHeader(.{ + .flags = macho.MH_NOUNDEFS | macho.MH_DYLDLINK | macho.MH_PIE | macho.MH_TWOLEVEL, + }); - self.header.?.ncmds = @intCast(u32, self.load_commands.items.len); - var sizeofcmds: u32 = 0; - for (self.load_commands.items) |cmd| { - sizeofcmds += cmd.cmdsize(); + switch (self.base.options.target.cpu.arch) { + .aarch64 => { + header.cputype = macho.CPU_TYPE_ARM64; + header.cpusubtype = macho.CPU_SUBTYPE_ARM_ALL; + }, + .x86_64 => { + header.cputype = macho.CPU_TYPE_X86_64; + header.cpusubtype = macho.CPU_SUBTYPE_X86_64_ALL; + }, + else => return error.UnsupportedCpuArchitecture, } - self.header.?.sizeofcmds = sizeofcmds; - log.debug("writing Mach-O header {}", .{self.header.?}); - try self.base.file.?.pwriteAll(mem.asBytes(&self.header.?), 0); - self.header_dirty = false; + + switch (self.base.options.output_mode) { + .Exe => { + header.filetype = macho.MH_EXECUTE; + }, + .Lib => { + // By this point, it can only be a dylib. + header.filetype = macho.MH_DYLIB; + header.flags |= macho.MH_NO_REEXPORTED_DYLIBS; + }, + else => unreachable, + } + + if (self.tlv_section_index) |_| { + header.flags |= macho.MH_HAS_TLV_DESCRIPTORS; + } + + header.ncmds = @intCast(u32, self.load_commands.items.len); + header.sizeofcmds = 0; + + for (self.load_commands.items) |cmd| { + header.sizeofcmds += cmd.cmdsize(); + } + + log.debug("writing Mach-O header {}", .{header}); + + try self.base.file.?.pwriteAll(mem.asBytes(&header), 0); } pub fn padToIdeal(actual_size: anytype) @TypeOf(actual_size) { @@ -3359,3 +5760,179 @@ pub fn padToIdeal(actual_size: anytype) @TypeOf(actual_size) { return std.math.add(@TypeOf(actual_size), actual_size, actual_size / ideal_factor) catch std.math.maxInt(@TypeOf(actual_size)); } + +pub fn makeString(self: *MachO, string: []const u8) !u32 { + if (self.strtab_dir.getAdapted(@as([]const u8, string), StringSliceAdapter{ .strtab = &self.strtab })) |off| { + log.debug("reusing string '{s}' at offset 0x{x}", .{ string, off }); + return off; + } + + try self.strtab.ensureUnusedCapacity(self.base.allocator, string.len + 1); + const new_off = @intCast(u32, self.strtab.items.len); + + log.debug("writing new string '{s}' at offset 0x{x}", .{ string, new_off }); + + self.strtab.appendSliceAssumeCapacity(string); + self.strtab.appendAssumeCapacity(0); + + try self.strtab_dir.putContext(self.base.allocator, new_off, new_off, StringIndexContext{ + .strtab = &self.strtab, + }); + + return new_off; +} + +pub fn getString(self: *MachO, off: u32) []const u8 { + assert(off < self.strtab.items.len); + return mem.spanZ(@ptrCast([*:0]const u8, self.strtab.items.ptr + off)); +} + +pub fn symbolIsStab(sym: macho.nlist_64) bool { + return (macho.N_STAB & sym.n_type) != 0; +} + +pub fn symbolIsPext(sym: macho.nlist_64) bool { + return (macho.N_PEXT & sym.n_type) != 0; +} + +pub fn symbolIsExt(sym: macho.nlist_64) bool { + return (macho.N_EXT & sym.n_type) != 0; +} + +pub fn symbolIsSect(sym: macho.nlist_64) bool { + const type_ = macho.N_TYPE & sym.n_type; + return type_ == macho.N_SECT; +} + +pub fn symbolIsUndf(sym: macho.nlist_64) bool { + const type_ = macho.N_TYPE & sym.n_type; + return type_ == macho.N_UNDF; +} + +pub fn symbolIsIndr(sym: macho.nlist_64) bool { + const type_ = macho.N_TYPE & sym.n_type; + return type_ == macho.N_INDR; +} + +pub fn symbolIsAbs(sym: macho.nlist_64) bool { + const type_ = macho.N_TYPE & sym.n_type; + return type_ == macho.N_ABS; +} + +pub fn symbolIsWeakDef(sym: macho.nlist_64) bool { + return (sym.n_desc & macho.N_WEAK_DEF) != 0; +} + +pub fn symbolIsWeakRef(sym: macho.nlist_64) bool { + return (sym.n_desc & macho.N_WEAK_REF) != 0; +} + +pub fn symbolIsTentative(sym: macho.nlist_64) bool { + if (!symbolIsUndf(sym)) return false; + return sym.n_value != 0; +} + +pub fn symbolIsNull(sym: macho.nlist_64) bool { + return sym.n_value == 0 and sym.n_desc == 0 and sym.n_type == 0 and sym.n_strx == 0 and sym.n_sect == 0; +} + +pub fn symbolIsTemp(sym: macho.nlist_64, sym_name: []const u8) bool { + if (!symbolIsSect(sym)) return false; + if (symbolIsExt(sym)) return false; + return mem.startsWith(u8, sym_name, "l") or mem.startsWith(u8, sym_name, "L"); +} + +fn packDylibOrdinal(ordinal: u16) u16 { + return ordinal * macho.N_SYMBOL_RESOLVER; +} + +fn unpackDylibOrdinal(pack: u16) u16 { + return @divExact(pack, macho.N_SYMBOL_RESOLVER); +} + +pub fn findFirst(comptime T: type, haystack: []T, start: usize, predicate: anytype) usize { + if (!@hasDecl(@TypeOf(predicate), "predicate")) + @compileError("Predicate is required to define fn predicate(@This(), T) bool"); + + if (start == haystack.len) return start; + + var i = start; + while (i < haystack.len) : (i += 1) { + if (predicate.predicate(haystack[i])) break; + } + return i; +} + +fn createSectionOrdinal(self: *MachO, match: MatchingSection) !void { + if (self.section_to_ordinal.contains(match)) return; + const ordinal = @intCast(u8, self.section_ordinals.items.len); + try self.section_ordinals.append(self.base.allocator, match); + try self.section_to_ordinal.putNoClobber(self.base.allocator, match, ordinal); +} + +fn printSymtabAndTextBlock(self: *MachO) void { + log.debug("locals", .{}); + for (self.locals.items) |sym, id| { + log.debug(" {d}: {s}, {}", .{ id, self.getString(sym.n_strx), sym }); + } + + log.debug("globals", .{}); + for (self.globals.items) |sym, id| { + log.debug(" {d}: {s}, {}", .{ id, self.getString(sym.n_strx), sym }); + } + + log.debug("tentatives", .{}); + for (self.tentatives.items) |sym, id| { + log.debug(" {d}: {s}, {}", .{ id, self.getString(sym.n_strx), sym }); + } + + log.debug("undefines", .{}); + for (self.undefs.items) |sym, id| { + log.debug(" {d}: {s}, {}", .{ id, self.getString(sym.n_strx), sym }); + } + + log.debug("imports", .{}); + for (self.imports.items) |sym, id| { + log.debug(" {d}: {s}, {}", .{ id, self.getString(sym.n_strx), sym }); + } + + { + log.debug("symbol resolver", .{}); + var it = self.symbol_resolver.keyIterator(); + while (it.next()) |key_ptr| { + const sym_name = self.getString(key_ptr.*); + log.debug(" {s} => {}", .{ sym_name, self.symbol_resolver.get(key_ptr.*).? }); + } + } + + log.debug("mappings", .{}); + for (self.objects.items) |object| { + log.debug(" in object {s}", .{object.name.?}); + for (object.symtab.items) |sym, sym_id| { + if (object.symbol_mapping.get(@intCast(u32, sym_id))) |local_id| { + log.debug(" | {d} => {d}", .{ sym_id, local_id }); + } else { + log.debug(" | {d} no local mapping for {s}", .{ sym_id, object.getString(sym.n_strx) }); + } + } + } + + { + var it = self.blocks.iterator(); + while (it.next()) |entry| { + const seg = self.load_commands.items[entry.key_ptr.seg].Segment; + const sect = seg.sections.items[entry.key_ptr.sect]; + + var block: *TextBlock = entry.value_ptr.*; + + log.debug("\n\n{s},{s} contents:", .{ commands.segmentName(sect), commands.sectionName(sect) }); + log.debug("{}", .{sect}); + log.debug("{}", .{block}); + + while (block.prev) |prev| { + block = prev; + log.debug("{}", .{block}); + } + } + } +} diff --git a/src/link/MachO/Archive.zig b/src/link/MachO/Archive.zig index 8f047b4968..4004cdaefc 100644 --- a/src/link/MachO/Archive.zig +++ b/src/link/MachO/Archive.zig @@ -81,6 +81,11 @@ const ar_hdr = extern struct { } } + fn date(self: ar_hdr) !u64 { + const value = getValue(&self.ar_date); + return std.fmt.parseInt(u64, value, 10); + } + fn size(self: ar_hdr) !u32 { const value = getValue(&self.ar_size); return std.fmt.parseInt(u32, value, 10); @@ -264,6 +269,7 @@ pub fn parseObject(self: Archive, offset: u32) !*Object { .file = try fs.cwd().openFile(self.name.?, .{}), .name = name, .file_offset = @intCast(u32, try reader.context.getPos()), + .mtime = try self.header.?.date(), }; try object.parse(); try reader.context.seekTo(0); diff --git a/src/link/MachO/DebugSymbols.zig b/src/link/MachO/DebugSymbols.zig index 684861ebf5..025959793e 100644 --- a/src/link/MachO/DebugSymbols.zig +++ b/src/link/MachO/DebugSymbols.zig @@ -3,7 +3,7 @@ const DebugSymbols = @This(); const std = @import("std"); const assert = std.debug.assert; const fs = std.fs; -const log = std.log.scoped(.link); +const log = std.log.scoped(.dsym); const macho = std.macho; const mem = std.mem; const DW = std.dwarf; @@ -27,9 +27,6 @@ const page_size: u16 = 0x1000; base: *MachO, file: fs.File, -/// Mach header -header: ?macho.mach_header_64 = null, - /// Table of all load commands load_commands: std.ArrayListUnmanaged(LoadCommand) = .{}, /// __PAGEZERO segment @@ -78,9 +75,8 @@ dbg_info_decl_last: ?*TextBlock = null, /// Table of debug symbol names aka the debug string table. debug_string_table: std.ArrayListUnmanaged(u8) = .{}, -header_dirty: bool = false, load_commands_dirty: bool = false, -string_table_dirty: bool = false, +strtab_dirty: bool = false, debug_string_table_dirty: bool = false, debug_abbrev_section_dirty: bool = false, debug_aranges_section_dirty: bool = false, @@ -106,26 +102,10 @@ const min_nop_size = 2; /// You must call this function *after* `MachO.populateMissingMetadata()` /// has been called to get a viable debug symbols output. pub fn populateMissingMetadata(self: *DebugSymbols, allocator: *Allocator) !void { - if (self.header == null) { - const base_header = self.base.header.?; - var header: macho.mach_header_64 = undefined; - header.magic = macho.MH_MAGIC_64; - header.cputype = base_header.cputype; - header.cpusubtype = base_header.cpusubtype; - header.filetype = macho.MH_DSYM; - // These will get populated at the end of flushing the results to file. - header.ncmds = 0; - header.sizeofcmds = 0; - header.flags = 0; - header.reserved = 0; - self.header = header; - self.header_dirty = true; - } if (self.uuid_cmd_index == null) { const base_cmd = self.base.load_commands.items[self.base.uuid_cmd_index.?]; self.uuid_cmd_index = @intCast(u16, self.load_commands.items.len); try self.load_commands.append(allocator, base_cmd); - self.header_dirty = true; self.load_commands_dirty = true; } if (self.symtab_cmd_index == null) { @@ -134,11 +114,11 @@ pub fn populateMissingMetadata(self: *DebugSymbols, allocator: *Allocator) !void const symtab_size = base_cmd.nsyms * @sizeOf(macho.nlist_64); const symtab_off = self.findFreeSpaceLinkedit(symtab_size, @sizeOf(macho.nlist_64)); - log.debug("found dSym symbol table free space 0x{x} to 0x{x}", .{ symtab_off, symtab_off + symtab_size }); + log.debug("found symbol table free space 0x{x} to 0x{x}", .{ symtab_off, symtab_off + symtab_size }); const strtab_off = self.findFreeSpaceLinkedit(base_cmd.strsize, 1); - log.debug("found dSym string table free space 0x{x} to 0x{x}", .{ strtab_off, strtab_off + base_cmd.strsize }); + log.debug("found string table free space 0x{x} to 0x{x}", .{ strtab_off, strtab_off + base_cmd.strsize }); try self.load_commands.append(allocator, .{ .Symtab = .{ @@ -150,16 +130,14 @@ pub fn populateMissingMetadata(self: *DebugSymbols, allocator: *Allocator) !void .strsize = base_cmd.strsize, }, }); - self.header_dirty = true; self.load_commands_dirty = true; - self.string_table_dirty = true; + self.strtab_dirty = true; } if (self.pagezero_segment_cmd_index == null) { self.pagezero_segment_cmd_index = @intCast(u16, self.load_commands.items.len); const base_cmd = self.base.load_commands.items[self.base.pagezero_segment_cmd_index.?].Segment; const cmd = try self.copySegmentCommand(allocator, base_cmd); try self.load_commands.append(allocator, .{ .Segment = cmd }); - self.header_dirty = true; self.load_commands_dirty = true; } if (self.text_segment_cmd_index == null) { @@ -167,7 +145,6 @@ pub fn populateMissingMetadata(self: *DebugSymbols, allocator: *Allocator) !void const base_cmd = self.base.load_commands.items[self.base.text_segment_cmd_index.?].Segment; const cmd = try self.copySegmentCommand(allocator, base_cmd); try self.load_commands.append(allocator, .{ .Segment = cmd }); - self.header_dirty = true; self.load_commands_dirty = true; } if (self.data_const_segment_cmd_index == null) outer: { @@ -176,7 +153,6 @@ pub fn populateMissingMetadata(self: *DebugSymbols, allocator: *Allocator) !void const base_cmd = self.base.load_commands.items[self.base.data_const_segment_cmd_index.?].Segment; const cmd = try self.copySegmentCommand(allocator, base_cmd); try self.load_commands.append(allocator, .{ .Segment = cmd }); - self.header_dirty = true; self.load_commands_dirty = true; } if (self.data_segment_cmd_index == null) outer: { @@ -185,7 +161,6 @@ pub fn populateMissingMetadata(self: *DebugSymbols, allocator: *Allocator) !void const base_cmd = self.base.load_commands.items[self.base.data_segment_cmd_index.?].Segment; const cmd = try self.copySegmentCommand(allocator, base_cmd); try self.load_commands.append(allocator, .{ .Segment = cmd }); - self.header_dirty = true; self.load_commands_dirty = true; } if (self.linkedit_segment_cmd_index == null) { @@ -196,7 +171,6 @@ pub fn populateMissingMetadata(self: *DebugSymbols, allocator: *Allocator) !void cmd.inner.fileoff = self.linkedit_off; cmd.inner.filesize = self.linkedit_size; try self.load_commands.append(allocator, .{ .Segment = cmd }); - self.header_dirty = true; self.load_commands_dirty = true; } if (self.dwarf_segment_cmd_index == null) { @@ -208,7 +182,7 @@ pub fn populateMissingMetadata(self: *DebugSymbols, allocator: *Allocator) !void const off = linkedit.inner.fileoff + linkedit.inner.filesize; const vmaddr = linkedit.inner.vmaddr + linkedit.inner.vmsize; - log.debug("found dSym __DWARF segment free space 0x{x} to 0x{x}", .{ off, off + needed_size }); + log.debug("found __DWARF segment free space 0x{x} to 0x{x}", .{ off, off + needed_size }); try self.load_commands.append(allocator, .{ .Segment = SegmentCommand.empty("__DWARF", .{ @@ -218,7 +192,6 @@ pub fn populateMissingMetadata(self: *DebugSymbols, allocator: *Allocator) !void .filesize = needed_size, }), }); - self.header_dirty = true; self.load_commands_dirty = true; } if (self.debug_str_section_index == null) { @@ -232,7 +205,6 @@ pub fn populateMissingMetadata(self: *DebugSymbols, allocator: *Allocator) !void .offset = @intCast(u32, dwarf_segment.inner.fileoff), .@"align" = 1, }); - self.header_dirty = true; self.load_commands_dirty = true; self.debug_string_table_dirty = true; } @@ -244,7 +216,7 @@ pub fn populateMissingMetadata(self: *DebugSymbols, allocator: *Allocator) !void const p_align = 1; const off = dwarf_segment.findFreeSpace(file_size_hint, p_align, null); - log.debug("found dSym __debug_info free space 0x{x} to 0x{x}", .{ off, off + file_size_hint }); + log.debug("found __debug_info free space 0x{x} to 0x{x}", .{ off, off + file_size_hint }); try dwarf_segment.addSection(allocator, "__debug_info", .{ .addr = dwarf_segment.inner.vmaddr + off - dwarf_segment.inner.fileoff, @@ -252,7 +224,6 @@ pub fn populateMissingMetadata(self: *DebugSymbols, allocator: *Allocator) !void .offset = @intCast(u32, off), .@"align" = p_align, }); - self.header_dirty = true; self.load_commands_dirty = true; self.debug_info_header_dirty = true; } @@ -264,7 +235,7 @@ pub fn populateMissingMetadata(self: *DebugSymbols, allocator: *Allocator) !void const p_align = 1; const off = dwarf_segment.findFreeSpace(file_size_hint, p_align, null); - log.debug("found dSym __debug_abbrev free space 0x{x} to 0x{x}", .{ off, off + file_size_hint }); + log.debug("found __debug_abbrev free space 0x{x} to 0x{x}", .{ off, off + file_size_hint }); try dwarf_segment.addSection(allocator, "__debug_abbrev", .{ .addr = dwarf_segment.inner.vmaddr + off - dwarf_segment.inner.fileoff, @@ -272,7 +243,6 @@ pub fn populateMissingMetadata(self: *DebugSymbols, allocator: *Allocator) !void .offset = @intCast(u32, off), .@"align" = p_align, }); - self.header_dirty = true; self.load_commands_dirty = true; self.debug_abbrev_section_dirty = true; } @@ -284,7 +254,7 @@ pub fn populateMissingMetadata(self: *DebugSymbols, allocator: *Allocator) !void const p_align = 16; const off = dwarf_segment.findFreeSpace(file_size_hint, p_align, null); - log.debug("found dSym __debug_aranges free space 0x{x} to 0x{x}", .{ off, off + file_size_hint }); + log.debug("found __debug_aranges free space 0x{x} to 0x{x}", .{ off, off + file_size_hint }); try dwarf_segment.addSection(allocator, "__debug_aranges", .{ .addr = dwarf_segment.inner.vmaddr + off - dwarf_segment.inner.fileoff, @@ -292,7 +262,6 @@ pub fn populateMissingMetadata(self: *DebugSymbols, allocator: *Allocator) !void .offset = @intCast(u32, off), .@"align" = p_align, }); - self.header_dirty = true; self.load_commands_dirty = true; self.debug_aranges_section_dirty = true; } @@ -304,7 +273,7 @@ pub fn populateMissingMetadata(self: *DebugSymbols, allocator: *Allocator) !void const p_align = 1; const off = dwarf_segment.findFreeSpace(file_size_hint, p_align, null); - log.debug("found dSym __debug_line free space 0x{x} to 0x{x}", .{ off, off + file_size_hint }); + log.debug("found __debug_line free space 0x{x} to 0x{x}", .{ off, off + file_size_hint }); try dwarf_segment.addSection(allocator, "__debug_line", .{ .addr = dwarf_segment.inner.vmaddr + off - dwarf_segment.inner.fileoff, @@ -312,7 +281,6 @@ pub fn populateMissingMetadata(self: *DebugSymbols, allocator: *Allocator) !void .offset = @intCast(u32, off), .@"align" = p_align, }); - self.header_dirty = true; self.load_commands_dirty = true; self.debug_line_header_dirty = true; } @@ -624,9 +592,8 @@ pub fn flushModule(self: *DebugSymbols, allocator: *Allocator, options: link.Opt try self.writeLoadCommands(allocator); try self.writeHeader(); - assert(!self.header_dirty); assert(!self.load_commands_dirty); - assert(!self.string_table_dirty); + assert(!self.strtab_dirty); assert(!self.debug_abbrev_section_dirty); assert(!self.debug_aranges_section_dirty); assert(!self.debug_string_table_dirty); @@ -716,23 +683,38 @@ fn writeLoadCommands(self: *DebugSymbols, allocator: *Allocator) !void { } const off = @sizeOf(macho.mach_header_64); - log.debug("writing {} dSym load commands from 0x{x} to 0x{x}", .{ self.load_commands.items.len, off, off + sizeofcmds }); + log.debug("writing {} load commands from 0x{x} to 0x{x}", .{ self.load_commands.items.len, off, off + sizeofcmds }); try self.file.pwriteAll(buffer, off); self.load_commands_dirty = false; } fn writeHeader(self: *DebugSymbols) !void { - if (!self.header_dirty) return; + var header = emptyHeader(.{ + .filetype = macho.MH_DSYM, + }); - self.header.?.ncmds = @intCast(u32, self.load_commands.items.len); - var sizeofcmds: u32 = 0; - for (self.load_commands.items) |cmd| { - sizeofcmds += cmd.cmdsize(); + switch (self.base.base.options.target.cpu.arch) { + .aarch64 => { + header.cputype = macho.CPU_TYPE_ARM64; + header.cpusubtype = macho.CPU_SUBTYPE_ARM_ALL; + }, + .x86_64 => { + header.cputype = macho.CPU_TYPE_X86_64; + header.cpusubtype = macho.CPU_SUBTYPE_X86_64_ALL; + }, + else => return error.UnsupportedCpuArchitecture, } - self.header.?.sizeofcmds = sizeofcmds; - log.debug("writing Mach-O dSym header {}", .{self.header.?}); - try self.file.pwriteAll(mem.asBytes(&self.header.?), 0); - self.header_dirty = false; + + header.ncmds = @intCast(u32, self.load_commands.items.len); + header.sizeofcmds = 0; + + for (self.load_commands.items) |cmd| { + header.sizeofcmds += cmd.cmdsize(); + } + + log.debug("writing Mach-O header {}", .{header}); + + try self.file.pwriteAll(mem.asBytes(&header), 0); } fn allocatedSizeLinkedit(self: *DebugSymbols, start: u64) u64 { @@ -798,7 +780,7 @@ fn relocateSymbolTable(self: *DebugSymbols) !void { const existing_size = symtab.nsyms * @sizeOf(macho.nlist_64); assert(new_symoff + existing_size <= self.linkedit_off + self.linkedit_size); // TODO expand LINKEDIT segment. - log.debug("relocating dSym symbol table from 0x{x}-0x{x} to 0x{x}-0x{x}", .{ + log.debug("relocating symbol table from 0x{x}-0x{x} to 0x{x}-0x{x}", .{ symtab.symoff, symtab.symoff + existing_size, new_symoff, @@ -820,30 +802,30 @@ pub fn writeLocalSymbol(self: *DebugSymbols, index: usize) !void { try self.relocateSymbolTable(); const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab; const off = symtab.symoff + @sizeOf(macho.nlist_64) * index; - log.debug("writing dSym local symbol {} at 0x{x}", .{ index, off }); + log.debug("writing local symbol {} at 0x{x}", .{ index, off }); try self.file.pwriteAll(mem.asBytes(&self.base.locals.items[index]), off); } fn writeStringTable(self: *DebugSymbols) !void { - if (!self.string_table_dirty) return; + if (!self.strtab_dirty) return; const tracy = trace(@src()); defer tracy.end(); const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab; const allocated_size = self.allocatedSizeLinkedit(symtab.stroff); - const needed_size = mem.alignForwardGeneric(u64, self.base.string_table.items.len, @alignOf(u64)); + const needed_size = mem.alignForwardGeneric(u64, self.base.strtab.items.len, @alignOf(u64)); if (needed_size > allocated_size) { symtab.strsize = 0; symtab.stroff = @intCast(u32, self.findFreeSpaceLinkedit(needed_size, 1)); } symtab.strsize = @intCast(u32, needed_size); - log.debug("writing dSym string table from 0x{x} to 0x{x}", .{ symtab.stroff, symtab.stroff + symtab.strsize }); + log.debug("writing string table from 0x{x} to 0x{x}", .{ symtab.stroff, symtab.stroff + symtab.strsize }); - try self.file.pwriteAll(self.base.string_table.items, symtab.stroff); + try self.file.pwriteAll(self.base.strtab.items, symtab.stroff); self.load_commands_dirty = true; - self.string_table_dirty = false; + self.strtab_dirty = false; } pub fn updateDeclLineNumber(self: *DebugSymbols, module: *Module, decl: *const Module.Decl) !void { diff --git a/src/link/MachO/Dylib.zig b/src/link/MachO/Dylib.zig index 324c54d362..0e7f95a3d5 100644 --- a/src/link/MachO/Dylib.zig +++ b/src/link/MachO/Dylib.zig @@ -12,13 +12,12 @@ const fat = @import("fat.zig"); const Allocator = mem.Allocator; const Arch = std.Target.Cpu.Arch; -const Symbol = @import("Symbol.zig"); const LibStub = @import("../tapi.zig").LibStub; +const MachO = @import("../MachO.zig"); usingnamespace @import("commands.zig"); allocator: *Allocator, - arch: ?Arch = null, header: ?macho.mach_header_64 = null, file: ?fs.File = null, @@ -146,7 +145,12 @@ pub const CreateOpts = struct { id: ?Id = null, }; -pub fn createAndParseFromPath(allocator: *Allocator, arch: Arch, path: []const u8, opts: CreateOpts) Error!?[]*Dylib { +pub fn createAndParseFromPath( + allocator: *Allocator, + arch: Arch, + path: []const u8, + opts: CreateOpts, +) Error!?[]*Dylib { const file = fs.cwd().openFile(path, .{}) catch |err| switch (err) { error.FileNotFound => return null, else => |e| return e, @@ -320,7 +324,7 @@ fn parseSymbols(self: *Dylib) !void { _ = try self.file.?.preadAll(strtab, symtab_cmd.stroff + self.library_offset); for (slice) |sym| { - const add_to_symtab = Symbol.isExt(sym) and (Symbol.isSect(sym) or Symbol.isIndr(sym)); + const add_to_symtab = MachO.symbolIsExt(sym) and (MachO.symbolIsSect(sym) or MachO.symbolIsIndr(sym)); if (!add_to_symtab) continue; @@ -502,21 +506,3 @@ pub fn parseDependentLibs(self: *Dylib, out: *std.ArrayList(*Dylib)) !void { } } } - -pub fn createProxy(self: *Dylib, sym_name: []const u8) !?*Symbol { - if (!self.symbols.contains(sym_name)) return null; - - const name = try self.allocator.dupe(u8, sym_name); - const proxy = try self.allocator.create(Symbol.Proxy); - errdefer self.allocator.destroy(proxy); - - proxy.* = .{ - .base = .{ - .@"type" = .proxy, - .name = name, - }, - .file = self, - }; - - return &proxy.base; -} diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index cb55dd1fd8..fc17669e04 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -7,14 +7,14 @@ const fs = std.fs; const io = std.io; const log = std.log.scoped(.object); const macho = std.macho; +const math = std.math; const mem = std.mem; -const reloc = @import("reloc.zig"); +const sort = std.sort; const Allocator = mem.Allocator; const Arch = std.Target.Cpu.Arch; -const Relocation = reloc.Relocation; -const Symbol = @import("Symbol.zig"); -const parseName = @import("Zld.zig").parseName; +const MachO = @import("../MachO.zig"); +const TextBlock = @import("TextBlock.zig"); usingnamespace @import("commands.zig"); @@ -26,7 +26,6 @@ file_offset: ?u32 = null, name: ?[]const u8 = null, load_commands: std.ArrayListUnmanaged(LoadCommand) = .{}, -sections: std.ArrayListUnmanaged(Section) = .{}, segment_cmd_index: ?u16 = null, symtab_cmd_index: ?u16 = null, @@ -44,71 +43,23 @@ dwarf_debug_str_index: ?u16 = null, dwarf_debug_line_index: ?u16 = null, dwarf_debug_ranges_index: ?u16 = null, -symbols: std.ArrayListUnmanaged(*Symbol) = .{}, -initializers: std.ArrayListUnmanaged(*Symbol) = .{}, +symtab: std.ArrayListUnmanaged(macho.nlist_64) = .{}, +strtab: std.ArrayListUnmanaged(u8) = .{}, data_in_code_entries: std.ArrayListUnmanaged(macho.data_in_code_entry) = .{}, -tu_path: ?[]const u8 = null, -tu_mtime: ?u64 = null, +// Debug info +debug_info: ?DebugInfo = null, +tu_name: ?[]const u8 = null, +tu_comp_dir: ?[]const u8 = null, +mtime: ?u64 = null, -pub const Section = struct { - inner: macho.section_64, - code: []u8, - relocs: ?[]*Relocation, - target_map: ?struct { - segment_id: u16, - section_id: u16, - offset: u32, - } = null, +text_blocks: std.ArrayListUnmanaged(*TextBlock) = .{}, +sections_as_symbols: std.AutoHashMapUnmanaged(u16, u32) = .{}, - pub fn deinit(self: *Section, allocator: *Allocator) void { - allocator.free(self.code); - - if (self.relocs) |relocs| { - for (relocs) |rel| { - allocator.destroy(rel); - } - allocator.free(relocs); - } - } - - pub fn segname(self: Section) []const u8 { - return parseName(&self.inner.segname); - } - - pub fn sectname(self: Section) []const u8 { - return parseName(&self.inner.sectname); - } - - pub fn flags(self: Section) u32 { - return self.inner.flags; - } - - pub fn sectionType(self: Section) u8 { - return @truncate(u8, self.flags() & 0xff); - } - - pub fn sectionAttrs(self: Section) u32 { - return self.flags() & 0xffffff00; - } - - pub fn isCode(self: Section) bool { - const attr = self.sectionAttrs(); - return attr & macho.S_ATTR_PURE_INSTRUCTIONS != 0 or attr & macho.S_ATTR_SOME_INSTRUCTIONS != 0; - } - - pub fn isDebug(self: Section) bool { - return self.sectionAttrs() & macho.S_ATTR_DEBUG != 0; - } - - pub fn dontDeadStrip(self: Section) bool { - return self.sectionAttrs() & macho.S_ATTR_NO_DEAD_STRIP != 0; - } - - pub fn dontDeadStripIfReferencesLive(self: Section) bool { - return self.sectionAttrs() & macho.S_ATTR_LIVE_SUPPORT != 0; - } -}; +// TODO symbol mapping and its inverse can probably be simple arrays +// instead of hash maps. +symbol_mapping: std.AutoHashMapUnmanaged(u32, u32) = .{}, +reverse_symbol_mapping: std.AutoHashMapUnmanaged(u32, u32) = .{}, const DebugInfo = struct { inner: dwarf.DwarfInfo, @@ -211,27 +162,28 @@ pub fn deinit(self: *Object) void { lc.deinit(self.allocator); } self.load_commands.deinit(self.allocator); - - for (self.sections.items) |*sect| { - sect.deinit(self.allocator); - } - self.sections.deinit(self.allocator); - - for (self.symbols.items) |sym| { - sym.deinit(self.allocator); - self.allocator.destroy(sym); - } - self.symbols.deinit(self.allocator); - self.data_in_code_entries.deinit(self.allocator); - self.initializers.deinit(self.allocator); + self.symtab.deinit(self.allocator); + self.strtab.deinit(self.allocator); + self.text_blocks.deinit(self.allocator); + self.sections_as_symbols.deinit(self.allocator); + self.symbol_mapping.deinit(self.allocator); + self.reverse_symbol_mapping.deinit(self.allocator); - if (self.name) |n| { + if (self.debug_info) |*db| { + db.deinit(self.allocator); + } + + if (self.tu_name) |n| { self.allocator.free(n); } - if (self.tu_path) |tu_path| { - self.allocator.free(tu_path); + if (self.tu_comp_dir) |n| { + self.allocator.free(n); + } + + if (self.name) |n| { + self.allocator.free(n); } } @@ -270,10 +222,8 @@ pub fn parse(self: *Object) !void { self.header = header; try self.readLoadCommands(reader); - try self.parseSymbols(); - try self.parseSections(); + try self.parseSymtab(); try self.parseDataInCode(); - try self.parseInitializers(); try self.parseDebugInfo(); } @@ -290,8 +240,8 @@ pub fn readLoadCommands(self: *Object, reader: anytype) !void { var seg = cmd.Segment; for (seg.sections.items) |*sect, j| { const index = @intCast(u16, j); - const segname = parseName(§.segname); - const sectname = parseName(§.sectname); + const segname = segmentName(sect.*); + const sectname = sectionName(sect.*); if (mem.eql(u8, segname, "__DWARF")) { if (mem.eql(u8, sectname, "__debug_info")) { self.dwarf_debug_info_index = index; @@ -345,62 +295,539 @@ pub fn readLoadCommands(self: *Object, reader: anytype) !void { } } -pub fn parseSections(self: *Object) !void { - const seg = self.load_commands.items[self.segment_cmd_index.?].Segment; +const NlistWithIndex = struct { + nlist: macho.nlist_64, + index: u32, - log.debug("parsing sections in {s}", .{self.name.?}); + fn lessThan(_: void, lhs: NlistWithIndex, rhs: NlistWithIndex) bool { + // We sort by type: defined < undefined, and + // afterwards by address in each group. Normally, dysymtab should + // be enough to guarantee the sort, but turns out not every compiler + // is kind enough to specify the symbols in the correct order. + if (MachO.symbolIsSect(lhs.nlist)) { + if (MachO.symbolIsSect(rhs.nlist)) { + // Same group, sort by address. + return lhs.nlist.n_value < rhs.nlist.n_value; + } else { + return true; + } + } else { + return false; + } + } - try self.sections.ensureCapacity(self.allocator, seg.sections.items.len); + fn filterInSection(symbols: []NlistWithIndex, sect: macho.section_64) []NlistWithIndex { + const Predicate = struct { + addr: u64, - for (seg.sections.items) |sect| { - log.debug("parsing section '{s},{s}'", .{ parseName(§.segname), parseName(§.sectname) }); - // Read sections' code - var code = try self.allocator.alloc(u8, @intCast(usize, sect.size)); - _ = try self.file.?.preadAll(code, sect.offset); - - var section = Section{ - .inner = sect, - .code = code, - .relocs = null, + pub fn predicate(self: @This(), symbol: NlistWithIndex) bool { + return symbol.nlist.n_value >= self.addr; + } }; - // Parse relocations - if (sect.nreloc > 0) { - var raw_relocs = try self.allocator.alloc(u8, @sizeOf(macho.relocation_info) * sect.nreloc); - defer self.allocator.free(raw_relocs); + const start = MachO.findFirst(NlistWithIndex, symbols, 0, Predicate{ .addr = sect.addr }); + const end = MachO.findFirst(NlistWithIndex, symbols, start, Predicate{ .addr = sect.addr + sect.size }); - _ = try self.file.?.preadAll(raw_relocs, sect.reloff); + return symbols[start..end]; + } +}; - section.relocs = try reloc.parse( - self.allocator, - self.arch.?, - section.code, - mem.bytesAsSlice(macho.relocation_info, raw_relocs), - self.symbols.items, +fn filterDice(dices: []macho.data_in_code_entry, start_addr: u64, end_addr: u64) []macho.data_in_code_entry { + const Predicate = struct { + addr: u64, + + pub fn predicate(self: @This(), dice: macho.data_in_code_entry) bool { + return dice.offset >= self.addr; + } + }; + + const start = MachO.findFirst(macho.data_in_code_entry, dices, 0, Predicate{ .addr = start_addr }); + const end = MachO.findFirst(macho.data_in_code_entry, dices, start, Predicate{ .addr = end_addr }); + + return dices[start..end]; +} + +const TextBlockParser = struct { + allocator: *Allocator, + section: macho.section_64, + code: []u8, + relocs: []macho.relocation_info, + object: *Object, + macho_file: *MachO, + nlists: []NlistWithIndex, + index: u32 = 0, + match: MachO.MatchingSection, + + fn peek(self: *TextBlockParser) ?NlistWithIndex { + return if (self.index + 1 < self.nlists.len) self.nlists[self.index + 1] else null; + } + + const SeniorityContext = struct { + object: *Object, + }; + + fn lessThanBySeniority(context: SeniorityContext, lhs: NlistWithIndex, rhs: NlistWithIndex) bool { + if (!MachO.symbolIsExt(rhs.nlist)) { + return MachO.symbolIsTemp(lhs.nlist, context.object.getString(lhs.nlist.n_strx)); + } else if (MachO.symbolIsPext(rhs.nlist) or MachO.symbolIsWeakDef(rhs.nlist)) { + return !MachO.symbolIsExt(lhs.nlist); + } else { + return false; + } + } + + pub fn next(self: *TextBlockParser) !?*TextBlock { + if (self.index == self.nlists.len) return null; + + var aliases = std.ArrayList(NlistWithIndex).init(self.allocator); + defer aliases.deinit(); + + const next_nlist: ?NlistWithIndex = blk: while (true) { + const curr_nlist = self.nlists[self.index]; + try aliases.append(curr_nlist); + + if (self.peek()) |next_nlist| { + if (curr_nlist.nlist.n_value == next_nlist.nlist.n_value) { + self.index += 1; + continue; + } + break :blk next_nlist; + } + break :blk null; + } else null; + + for (aliases.items) |*nlist_with_index| { + nlist_with_index.index = self.object.symbol_mapping.get(nlist_with_index.index) orelse unreachable; + } + + if (aliases.items.len > 1) { + // Bubble-up senior symbol as the main link to the text block. + sort.sort( + NlistWithIndex, + aliases.items, + SeniorityContext{ .object = self.object }, + TextBlockParser.lessThanBySeniority, ); } - self.sections.appendAssumeCapacity(section); + const senior_nlist = aliases.pop(); + const senior_sym = &self.macho_file.locals.items[senior_nlist.index]; + senior_sym.n_sect = self.macho_file.section_to_ordinal.get(self.match) orelse unreachable; + + const start_addr = senior_nlist.nlist.n_value - self.section.addr; + const end_addr = if (next_nlist) |n| n.nlist.n_value - self.section.addr else self.section.size; + + const code = self.code[start_addr..end_addr]; + const size = code.len; + + const max_align = self.section.@"align"; + const actual_align = if (senior_nlist.nlist.n_value > 0) + math.min(@ctz(u64, senior_nlist.nlist.n_value), max_align) + else + max_align; + + const stab: ?TextBlock.Stab = if (self.object.debug_info) |di| blk: { + // TODO there has to be a better to handle this. + for (di.inner.func_list.items) |func| { + if (func.pc_range) |range| { + if (senior_nlist.nlist.n_value >= range.start and senior_nlist.nlist.n_value < range.end) { + break :blk TextBlock.Stab{ + .function = range.end - range.start, + }; + } + } + } + // TODO + // if (self.macho_file.globals.contains(self.macho_file.getString(senior_sym.strx))) break :blk .global; + break :blk .static; + } else null; + + const block = try self.macho_file.base.allocator.create(TextBlock); + block.* = TextBlock.empty; + block.local_sym_index = senior_nlist.index; + block.stab = stab; + block.size = size; + block.alignment = actual_align; + try self.macho_file.managed_blocks.append(self.macho_file.base.allocator, block); + + try block.code.appendSlice(self.macho_file.base.allocator, code); + + try block.aliases.ensureTotalCapacity(self.macho_file.base.allocator, aliases.items.len); + for (aliases.items) |alias| { + block.aliases.appendAssumeCapacity(alias.index); + const sym = &self.macho_file.locals.items[alias.index]; + sym.n_sect = self.macho_file.section_to_ordinal.get(self.match) orelse unreachable; + } + + try block.parseRelocsFromObject(self.macho_file.base.allocator, self.relocs, self.object, .{ + .base_addr = start_addr, + .macho_file = self.macho_file, + }); + + if (self.macho_file.has_dices) { + const dices = filterDice( + self.object.data_in_code_entries.items, + senior_nlist.nlist.n_value, + senior_nlist.nlist.n_value + size, + ); + try block.dices.ensureTotalCapacity(self.macho_file.base.allocator, dices.len); + + for (dices) |dice| { + block.dices.appendAssumeCapacity(.{ + .offset = dice.offset - try math.cast(u32, senior_nlist.nlist.n_value), + .length = dice.length, + .kind = dice.kind, + }); + } + } + + self.index += 1; + + return block; + } +}; + +pub fn parseTextBlocks(self: *Object, macho_file: *MachO) !void { + const seg = self.load_commands.items[self.segment_cmd_index.?].Segment; + + log.debug("analysing {s}", .{self.name.?}); + + // You would expect that the symbol table is at least pre-sorted based on symbol's type: + // local < extern defined < undefined. Unfortunately, this is not guaranteed! For instance, + // the GO compiler does not necessarily respect that therefore we sort immediately by type + // and address within. + var sorted_all_nlists = std.ArrayList(NlistWithIndex).init(self.allocator); + defer sorted_all_nlists.deinit(); + try sorted_all_nlists.ensureTotalCapacity(self.symtab.items.len); + + for (self.symtab.items) |nlist, index| { + sorted_all_nlists.appendAssumeCapacity(.{ + .nlist = nlist, + .index = @intCast(u32, index), + }); + } + + sort.sort(NlistWithIndex, sorted_all_nlists.items, {}, NlistWithIndex.lessThan); + + // Well, shit, sometimes compilers skip the dysymtab load command altogether, meaning we + // have to infer the start of undef section in the symtab ourselves. + const iundefsym = if (self.dysymtab_cmd_index) |cmd_index| blk: { + const dysymtab = self.load_commands.items[cmd_index].Dysymtab; + break :blk dysymtab.iundefsym; + } else blk: { + var iundefsym: usize = sorted_all_nlists.items.len; + while (iundefsym > 0) : (iundefsym -= 1) { + const nlist = sorted_all_nlists.items[iundefsym]; + if (MachO.symbolIsSect(nlist.nlist)) break; + } + break :blk iundefsym; + }; + + // We only care about defined symbols, so filter every other out. + const sorted_nlists = sorted_all_nlists.items[0..iundefsym]; + + for (seg.sections.items) |sect, id| { + const sect_id = @intCast(u8, id); + log.debug("putting section '{s},{s}' as a TextBlock", .{ + segmentName(sect), + sectionName(sect), + }); + + // Get matching segment/section in the final artifact. + const match = (try macho_file.getMatchingSection(sect)) orelse { + log.debug("unhandled section", .{}); + continue; + }; + + // Read section's code + var code = try self.allocator.alloc(u8, @intCast(usize, sect.size)); + defer self.allocator.free(code); + _ = try self.file.?.preadAll(code, sect.offset); + + // Read section's list of relocations + var raw_relocs = try self.allocator.alloc(u8, sect.nreloc * @sizeOf(macho.relocation_info)); + defer self.allocator.free(raw_relocs); + _ = try self.file.?.preadAll(raw_relocs, sect.reloff); + const relocs = mem.bytesAsSlice(macho.relocation_info, raw_relocs); + + // Symbols within this section only. + const filtered_nlists = NlistWithIndex.filterInSection(sorted_nlists, sect); + + // In release mode, if the object file was generated with dead code stripping optimisations, + // note it now and parse sections as atoms. + const is_splittable = blk: { + if (macho_file.base.options.optimize_mode == .Debug) break :blk false; + break :blk self.header.?.flags & macho.MH_SUBSECTIONS_VIA_SYMBOLS != 0; + }; + + macho_file.has_dices = blk: { + if (self.text_section_index) |index| { + if (index != id) break :blk false; + if (self.data_in_code_entries.items.len == 0) break :blk false; + break :blk true; + } + break :blk false; + }; + macho_file.has_stabs = macho_file.has_stabs or self.debug_info != null; + + next: { + if (is_splittable) blocks: { + if (filtered_nlists.len == 0) break :blocks; + + // If the first nlist does not match the start of the section, + // then we need to encapsulate the memory range [section start, first symbol) + // as a temporary symbol and insert the matching TextBlock. + const first_nlist = filtered_nlists[0].nlist; + if (first_nlist.n_value > sect.addr) { + const sym_name = try std.fmt.allocPrint(self.allocator, "l_{s}_{s}_{s}", .{ + self.name.?, + segmentName(sect), + sectionName(sect), + }); + defer self.allocator.free(sym_name); + + const block_local_sym_index = self.sections_as_symbols.get(sect_id) orelse blk: { + const block_local_sym_index = @intCast(u32, macho_file.locals.items.len); + try macho_file.locals.append(macho_file.base.allocator, .{ + .n_strx = try macho_file.makeString(sym_name), + .n_type = macho.N_SECT, + .n_sect = macho_file.section_to_ordinal.get(match) orelse unreachable, + .n_desc = 0, + .n_value = sect.addr, + }); + try self.sections_as_symbols.putNoClobber(self.allocator, sect_id, block_local_sym_index); + break :blk block_local_sym_index; + }; + + const block_code = code[0 .. first_nlist.n_value - sect.addr]; + const block_size = block_code.len; + + const block = try macho_file.base.allocator.create(TextBlock); + block.* = TextBlock.empty; + block.local_sym_index = block_local_sym_index; + block.size = block_size; + block.alignment = sect.@"align"; + try macho_file.managed_blocks.append(macho_file.base.allocator, block); + + try block.code.appendSlice(macho_file.base.allocator, block_code); + + try block.parseRelocsFromObject(self.allocator, relocs, self, .{ + .base_addr = 0, + .macho_file = macho_file, + }); + + if (macho_file.has_dices) { + const dices = filterDice(self.data_in_code_entries.items, sect.addr, sect.addr + block_size); + try block.dices.ensureTotalCapacity(macho_file.base.allocator, dices.len); + + for (dices) |dice| { + block.dices.appendAssumeCapacity(.{ + .offset = dice.offset - try math.cast(u32, sect.addr), + .length = dice.length, + .kind = dice.kind, + }); + } + } + + // Update target section's metadata + // TODO should we update segment's size here too? + // How does it tie with incremental space allocs? + const tseg = &macho_file.load_commands.items[match.seg].Segment; + const tsect = &tseg.sections.items[match.sect]; + const new_alignment = math.max(tsect.@"align", block.alignment); + const new_alignment_pow_2 = try math.powi(u32, 2, new_alignment); + const new_size = mem.alignForwardGeneric(u64, tsect.size, new_alignment_pow_2) + block.size; + tsect.size = new_size; + tsect.@"align" = new_alignment; + + if (macho_file.blocks.getPtr(match)) |last| { + last.*.next = block; + block.prev = last.*; + last.* = block; + } else { + try macho_file.blocks.putNoClobber(macho_file.base.allocator, match, block); + } + + try self.text_blocks.append(self.allocator, block); + } + + var parser = TextBlockParser{ + .allocator = self.allocator, + .section = sect, + .code = code, + .relocs = relocs, + .object = self, + .macho_file = macho_file, + .nlists = filtered_nlists, + .match = match, + }; + + while (try parser.next()) |block| { + const sym = macho_file.locals.items[block.local_sym_index]; + const is_ext = blk: { + const orig_sym_id = self.reverse_symbol_mapping.get(block.local_sym_index) orelse unreachable; + break :blk MachO.symbolIsExt(self.symtab.items[orig_sym_id]); + }; + if (is_ext) { + if (macho_file.symbol_resolver.get(sym.n_strx)) |resolv| { + assert(resolv.where == .global); + const global_object = macho_file.objects.items[resolv.file]; + if (global_object != self) { + log.debug("deduping definition of {s} in {s}", .{ + macho_file.getString(sym.n_strx), + self.name.?, + }); + log.debug(" already defined in {s}", .{global_object.name.?}); + continue; + } + } + } + + if (sym.n_value == sect.addr) { + if (self.sections_as_symbols.get(sect_id)) |alias| { + // In x86_64 relocs, it can so happen that the compiler refers to the same + // atom by both the actual assigned symbol and the start of the section. In this + // case, we need to link the two together so add an alias. + try block.aliases.append(macho_file.base.allocator, alias); + } + } + + // Update target section's metadata + // TODO should we update segment's size here too? + // How does it tie with incremental space allocs? + const tseg = &macho_file.load_commands.items[match.seg].Segment; + const tsect = &tseg.sections.items[match.sect]; + const new_alignment = math.max(tsect.@"align", block.alignment); + const new_alignment_pow_2 = try math.powi(u32, 2, new_alignment); + const new_size = mem.alignForwardGeneric(u64, tsect.size, new_alignment_pow_2) + block.size; + tsect.size = new_size; + tsect.@"align" = new_alignment; + + if (macho_file.blocks.getPtr(match)) |last| { + last.*.next = block; + block.prev = last.*; + last.* = block; + } else { + try macho_file.blocks.putNoClobber(macho_file.base.allocator, match, block); + } + + try self.text_blocks.append(self.allocator, block); + } + + break :next; + } + + // Since there is no symbol to refer to this block, we create + // a temp one, unless we already did that when working out the relocations + // of other text blocks. + const sym_name = try std.fmt.allocPrint(self.allocator, "l_{s}_{s}_{s}", .{ + self.name.?, + segmentName(sect), + sectionName(sect), + }); + defer self.allocator.free(sym_name); + + const block_local_sym_index = self.sections_as_symbols.get(sect_id) orelse blk: { + const block_local_sym_index = @intCast(u32, macho_file.locals.items.len); + try macho_file.locals.append(macho_file.base.allocator, .{ + .n_strx = try macho_file.makeString(sym_name), + .n_type = macho.N_SECT, + .n_sect = macho_file.section_to_ordinal.get(match) orelse unreachable, + .n_desc = 0, + .n_value = sect.addr, + }); + try self.sections_as_symbols.putNoClobber(self.allocator, sect_id, block_local_sym_index); + break :blk block_local_sym_index; + }; + + const block = try macho_file.base.allocator.create(TextBlock); + block.* = TextBlock.empty; + block.local_sym_index = block_local_sym_index; + block.size = sect.size; + block.alignment = sect.@"align"; + try macho_file.managed_blocks.append(macho_file.base.allocator, block); + + try block.code.appendSlice(macho_file.base.allocator, code); + + try block.parseRelocsFromObject(self.allocator, relocs, self, .{ + .base_addr = 0, + .macho_file = macho_file, + }); + + if (macho_file.has_dices) { + const dices = filterDice(self.data_in_code_entries.items, sect.addr, sect.addr + sect.size); + try block.dices.ensureTotalCapacity(macho_file.base.allocator, dices.len); + + for (dices) |dice| { + block.dices.appendAssumeCapacity(.{ + .offset = dice.offset - try math.cast(u32, sect.addr), + .length = dice.length, + .kind = dice.kind, + }); + } + } + + // Since this is block gets a helper local temporary symbol that didn't exist + // in the object file which encompasses the entire section, we need traverse + // the filtered symbols and note which symbol is contained within so that + // we can properly allocate addresses down the line. + // While we're at it, we need to update segment,section mapping of each symbol too. + try block.contained.ensureTotalCapacity(self.allocator, filtered_nlists.len); + + for (filtered_nlists) |nlist_with_index| { + const nlist = nlist_with_index.nlist; + const local_sym_index = self.symbol_mapping.get(nlist_with_index.index) orelse unreachable; + const local = &macho_file.locals.items[local_sym_index]; + local.n_sect = macho_file.section_to_ordinal.get(match) orelse unreachable; + + const stab: ?TextBlock.Stab = if (self.debug_info) |di| blk: { + // TODO there has to be a better to handle this. + for (di.inner.func_list.items) |func| { + if (func.pc_range) |range| { + if (nlist.n_value >= range.start and nlist.n_value < range.end) { + break :blk TextBlock.Stab{ + .function = range.end - range.start, + }; + } + } + } + // TODO + // if (zld.globals.contains(zld.getString(sym.strx))) break :blk .global; + break :blk .static; + } else null; + + block.contained.appendAssumeCapacity(.{ + .local_sym_index = local_sym_index, + .offset = nlist.n_value - sect.addr, + .stab = stab, + }); + } + + // Update target section's metadata + // TODO should we update segment's size here too? + // How does it tie with incremental space allocs? + const tseg = &macho_file.load_commands.items[match.seg].Segment; + const tsect = &tseg.sections.items[match.sect]; + const new_alignment = math.max(tsect.@"align", block.alignment); + const new_alignment_pow_2 = try math.powi(u32, 2, new_alignment); + const new_size = mem.alignForwardGeneric(u64, tsect.size, new_alignment_pow_2) + block.size; + tsect.size = new_size; + tsect.@"align" = new_alignment; + + if (macho_file.blocks.getPtr(match)) |last| { + last.*.next = block; + block.prev = last.*; + last.* = block; + } else { + try macho_file.blocks.putNoClobber(macho_file.base.allocator, match, block); + } + + try self.text_blocks.append(self.allocator, block); + } } } -pub fn parseInitializers(self: *Object) !void { - const index = self.mod_init_func_section_index orelse return; - const section = self.sections.items[index]; - - log.debug("parsing initializers in {s}", .{self.name.?}); - - // Parse C++ initializers - const relocs = section.relocs orelse unreachable; - try self.initializers.ensureCapacity(self.allocator, relocs.len); - for (relocs) |rel| { - self.initializers.appendAssumeCapacity(rel.target.symbol); - } - - mem.reverse(*Symbol, self.initializers.items); -} - -pub fn parseSymbols(self: *Object) !void { +fn parseSymtab(self: *Object) !void { const index = self.symtab_cmd_index orelse return; const symtab_cmd = self.load_commands.items[index].Symtab; @@ -408,90 +835,21 @@ pub fn parseSymbols(self: *Object) !void { defer self.allocator.free(symtab); _ = try self.file.?.preadAll(symtab, symtab_cmd.symoff); const slice = @alignCast(@alignOf(macho.nlist_64), mem.bytesAsSlice(macho.nlist_64, symtab)); + try self.symtab.appendSlice(self.allocator, slice); var strtab = try self.allocator.alloc(u8, symtab_cmd.strsize); defer self.allocator.free(strtab); _ = try self.file.?.preadAll(strtab, symtab_cmd.stroff); - - for (slice) |sym| { - const sym_name = mem.spanZ(@ptrCast([*:0]const u8, strtab.ptr + sym.n_strx)); - - if (Symbol.isStab(sym)) { - log.err("unhandled symbol type: stab {s} in {s}", .{ sym_name, self.name.? }); - return error.UnhandledSymbolType; - } - if (Symbol.isIndr(sym)) { - log.err("unhandled symbol type: indirect {s} in {s}", .{ sym_name, self.name.? }); - return error.UnhandledSymbolType; - } - if (Symbol.isAbs(sym)) { - log.err("unhandled symbol type: absolute {s} in {s}", .{ sym_name, self.name.? }); - return error.UnhandledSymbolType; - } - - const name = try self.allocator.dupe(u8, sym_name); - const symbol: *Symbol = symbol: { - if (Symbol.isSect(sym)) { - const linkage: Symbol.Regular.Linkage = linkage: { - if (!Symbol.isExt(sym)) break :linkage .translation_unit; - if (Symbol.isWeakDef(sym) or Symbol.isPext(sym)) break :linkage .linkage_unit; - break :linkage .global; - }; - const regular = try self.allocator.create(Symbol.Regular); - errdefer self.allocator.destroy(regular); - regular.* = .{ - .base = .{ - .@"type" = .regular, - .name = name, - }, - .linkage = linkage, - .address = sym.n_value, - .section = sym.n_sect - 1, - .weak_ref = Symbol.isWeakRef(sym), - .file = self, - }; - break :symbol ®ular.base; - } - - if (sym.n_value != 0) { - const tentative = try self.allocator.create(Symbol.Tentative); - errdefer self.allocator.destroy(tentative); - tentative.* = .{ - .base = .{ - .@"type" = .tentative, - .name = name, - }, - .size = sym.n_value, - .alignment = (sym.n_desc >> 8) & 0x0f, - .file = self, - }; - break :symbol &tentative.base; - } - - const undef = try self.allocator.create(Symbol.Unresolved); - errdefer self.allocator.destroy(undef); - undef.* = .{ - .base = .{ - .@"type" = .unresolved, - .name = name, - }, - .file = self, - }; - break :symbol &undef.base; - }; - - try self.symbols.append(self.allocator, symbol); - } + try self.strtab.appendSlice(self.allocator, strtab); } pub fn parseDebugInfo(self: *Object) !void { + log.debug("parsing debug info in '{s}'", .{self.name.?}); + var debug_info = blk: { var di = try DebugInfo.parseFromObject(self.allocator, self); break :blk di orelse return; }; - defer debug_info.deinit(self.allocator); - - log.debug("parsing debug info in '{s}'", .{self.name.?}); // We assume there is only one CU. const compile_unit = debug_info.inner.findCompileUnit(0x0) catch |err| switch (err) { @@ -505,44 +863,19 @@ pub fn parseDebugInfo(self: *Object) !void { const name = try compile_unit.die.getAttrString(&debug_info.inner, dwarf.AT_name); const comp_dir = try compile_unit.die.getAttrString(&debug_info.inner, dwarf.AT_comp_dir); - self.tu_path = try std.fs.path.join(self.allocator, &[_][]const u8{ comp_dir, name }); - self.tu_mtime = mtime: { - const stat = try self.file.?.stat(); - break :mtime @intCast(u64, @divFloor(stat.mtime, 1_000_000_000)); - }; + self.debug_info = debug_info; + self.tu_name = try self.allocator.dupe(u8, name); + self.tu_comp_dir = try self.allocator.dupe(u8, comp_dir); - for (self.symbols.items) |sym| { - if (sym.cast(Symbol.Regular)) |reg| { - const size: u64 = blk: for (debug_info.inner.func_list.items) |func| { - if (func.pc_range) |range| { - if (reg.address >= range.start and reg.address < range.end) { - break :blk range.end - range.start; - } - } - } else 0; - - reg.stab = .{ - .kind = kind: { - if (size > 0) break :kind .function; - switch (reg.linkage) { - .translation_unit => break :kind .static, - else => break :kind .global, - } - }, - .size = size, - }; - } + if (self.mtime == null) { + self.mtime = mtime: { + const file = self.file orelse break :mtime 0; + const stat = file.stat() catch break :mtime 0; + break :mtime @intCast(u64, @divFloor(stat.mtime, 1_000_000_000)); + }; } } -fn readSection(self: Object, allocator: *Allocator, index: u16) ![]u8 { - const seg = self.load_commands.items[self.segment_cmd_index.?].Segment; - const sect = seg.sections.items[index]; - var buffer = try allocator.alloc(u8, @intCast(usize, sect.size)); - _ = try self.file.?.preadAll(buffer, sect.offset); - return buffer; -} - pub fn parseDataInCode(self: *Object) !void { const index = self.data_in_code_cmd_index orelse return; const data_in_code = self.load_commands.items[index].LinkeditData; @@ -562,3 +895,16 @@ pub fn parseDataInCode(self: *Object) !void { try self.data_in_code_entries.append(self.allocator, dice); } } + +fn readSection(self: Object, allocator: *Allocator, index: u16) ![]u8 { + const seg = self.load_commands.items[self.segment_cmd_index.?].Segment; + const sect = seg.sections.items[index]; + var buffer = try allocator.alloc(u8, @intCast(usize, sect.size)); + _ = try self.file.?.preadAll(buffer, sect.offset); + return buffer; +} + +pub fn getString(self: Object, off: u32) []const u8 { + assert(off < self.strtab.items.len); + return mem.spanZ(@ptrCast([*:0]const u8, self.strtab.items.ptr + off)); +} diff --git a/src/link/MachO/Symbol.zig b/src/link/MachO/Symbol.zig deleted file mode 100644 index 8da4704909..0000000000 --- a/src/link/MachO/Symbol.zig +++ /dev/null @@ -1,195 +0,0 @@ -const Symbol = @This(); - -const std = @import("std"); -const macho = std.macho; -const mem = std.mem; - -const Allocator = mem.Allocator; -const Dylib = @import("Dylib.zig"); -const Object = @import("Object.zig"); - -pub const Type = enum { - regular, - proxy, - unresolved, - tentative, -}; - -/// Symbol type. -@"type": Type, - -/// Symbol name. Owned slice. -name: []u8, - -/// Alias of. -alias: ?*Symbol = null, - -/// Index in GOT table for indirection. -got_index: ?u32 = null, - -/// Index in stubs table for late binding. -stubs_index: ?u32 = null, - -pub const Regular = struct { - base: Symbol, - - /// Linkage type. - linkage: Linkage, - - /// Symbol address. - address: u64, - - /// Section ID where the symbol resides. - section: u8, - - /// Whether the symbol is a weak ref. - weak_ref: bool, - - /// Object file where to locate this symbol. - file: *Object, - - /// Debug stab if defined. - stab: ?struct { - /// Stab kind - kind: enum { - function, - global, - static, - }, - - /// Size of the stab. - size: u64, - } = null, - - /// True if symbol was already committed into the final - /// symbol table. - visited: bool = false, - - pub const base_type: Symbol.Type = .regular; - - pub const Linkage = enum { - translation_unit, - linkage_unit, - global, - }; - - pub fn isTemp(regular: *Regular) bool { - if (regular.linkage == .translation_unit) { - return mem.startsWith(u8, regular.base.name, "l") or mem.startsWith(u8, regular.base.name, "L"); - } - return false; - } -}; - -pub const Proxy = struct { - base: Symbol, - - /// Dynamic binding info - spots within the final - /// executable where this proxy is referenced from. - bind_info: std.ArrayListUnmanaged(struct { - segment_id: u16, - address: u64, - }) = .{}, - - /// Dylib where to locate this symbol. - /// null means self-reference. - file: ?*Dylib = null, - - pub const base_type: Symbol.Type = .proxy; - - pub fn deinit(proxy: *Proxy, allocator: *Allocator) void { - proxy.bind_info.deinit(allocator); - } - - pub fn dylibOrdinal(proxy: *Proxy) u16 { - const dylib = proxy.file orelse return 0; - return dylib.ordinal.?; - } -}; - -pub const Unresolved = struct { - base: Symbol, - - /// File where this symbol was referenced. - /// null means synthetic, e.g., dyld_stub_binder. - file: ?*Object = null, - - pub const base_type: Symbol.Type = .unresolved; -}; - -pub const Tentative = struct { - base: Symbol, - - /// Symbol size. - size: u64, - - /// Symbol alignment as power of two. - alignment: u16, - - /// File where this symbol was referenced. - file: *Object, - - pub const base_type: Symbol.Type = .tentative; -}; - -pub fn deinit(base: *Symbol, allocator: *Allocator) void { - allocator.free(base.name); - switch (base.@"type") { - .proxy => @fieldParentPtr(Proxy, "base", base).deinit(allocator), - else => {}, - } -} - -pub fn cast(base: *Symbol, comptime T: type) ?*T { - if (base.@"type" != T.base_type) { - return null; - } - return @fieldParentPtr(T, "base", base); -} - -pub fn getTopmostAlias(base: *Symbol) *Symbol { - if (base.alias) |alias| { - return alias.getTopmostAlias(); - } - return base; -} - -pub fn isStab(sym: macho.nlist_64) bool { - return (macho.N_STAB & sym.n_type) != 0; -} - -pub fn isPext(sym: macho.nlist_64) bool { - return (macho.N_PEXT & sym.n_type) != 0; -} - -pub fn isExt(sym: macho.nlist_64) bool { - return (macho.N_EXT & sym.n_type) != 0; -} - -pub fn isSect(sym: macho.nlist_64) bool { - const type_ = macho.N_TYPE & sym.n_type; - return type_ == macho.N_SECT; -} - -pub fn isUndf(sym: macho.nlist_64) bool { - const type_ = macho.N_TYPE & sym.n_type; - return type_ == macho.N_UNDF; -} - -pub fn isIndr(sym: macho.nlist_64) bool { - const type_ = macho.N_TYPE & sym.n_type; - return type_ == macho.N_INDR; -} - -pub fn isAbs(sym: macho.nlist_64) bool { - const type_ = macho.N_TYPE & sym.n_type; - return type_ == macho.N_ABS; -} - -pub fn isWeakDef(sym: macho.nlist_64) bool { - return (sym.n_desc & macho.N_WEAK_DEF) != 0; -} - -pub fn isWeakRef(sym: macho.nlist_64) bool { - return (sym.n_desc & macho.N_WEAK_REF) != 0; -} diff --git a/src/link/MachO/TextBlock.zig b/src/link/MachO/TextBlock.zig new file mode 100644 index 0000000000..8dca7bc37b --- /dev/null +++ b/src/link/MachO/TextBlock.zig @@ -0,0 +1,1224 @@ +const TextBlock = @This(); + +const std = @import("std"); +const aarch64 = @import("../../codegen/aarch64.zig"); +const assert = std.debug.assert; +const commands = @import("commands.zig"); +const log = std.log.scoped(.text_block); +const macho = std.macho; +const math = std.math; +const mem = std.mem; +const meta = std.meta; + +const Allocator = mem.Allocator; +const Arch = std.Target.Cpu.Arch; +const MachO = @import("../MachO.zig"); +const Object = @import("Object.zig"); + +/// Each decl always gets a local symbol with the fully qualified name. +/// The vaddr and size are found here directly. +/// The file offset is found by computing the vaddr offset from the section vaddr +/// the symbol references, and adding that to the file offset of the section. +/// If this field is 0, it means the codegen size = 0 and there is no symbol or +/// offset table entry. +local_sym_index: u32, + +/// List of symbol aliases pointing to the same block via different nlists +aliases: std.ArrayListUnmanaged(u32) = .{}, + +/// List of symbols contained within this block +contained: std.ArrayListUnmanaged(SymbolAtOffset) = .{}, + +/// Code (may be non-relocated) this block represents +code: std.ArrayListUnmanaged(u8) = .{}, + +/// Size and alignment of this text block +/// Unlike in Elf, we need to store the size of this symbol as part of +/// the TextBlock since macho.nlist_64 lacks this information. +size: u64, +alignment: u32, + +relocs: std.ArrayListUnmanaged(Relocation) = .{}, + +/// List of offsets contained within this block that need rebasing by the dynamic +/// loader in presence of ASLR +rebases: std.ArrayListUnmanaged(u64) = .{}, + +/// List of offsets contained within this block that will be dynamically bound +/// by the dynamic loader and contain pointers to resolved (at load time) extern +/// symbols (aka proxies aka imports) +bindings: std.ArrayListUnmanaged(SymbolAtOffset) = .{}, + +/// List of data-in-code entries. This is currently specific to x86_64 only. +dices: std.ArrayListUnmanaged(macho.data_in_code_entry) = .{}, + +/// Stab entry for this block. This is currently specific to a binary created +/// by linking object files in a traditional sense - in incremental sense, we +/// bypass stabs altogether to produce dSYM bundle directly with fully relocated +/// DWARF sections. +stab: ?Stab = null, + +/// Points to the previous and next neighbours +next: ?*TextBlock, +prev: ?*TextBlock, + +/// Previous/next linked list pointers. +/// This is the linked list node for this Decl's corresponding .debug_info tag. +dbg_info_prev: ?*TextBlock, +dbg_info_next: ?*TextBlock, +/// Offset into .debug_info pointing to the tag for this Decl. +dbg_info_off: u32, +/// Size of the .debug_info tag for this Decl, not including padding. +dbg_info_len: u32, + +pub const SymbolAtOffset = struct { + local_sym_index: u32, + offset: u64, + stab: ?Stab = null, + + pub fn format( + self: SymbolAtOffset, + comptime fmt: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, + ) !void { + _ = fmt; + _ = options; + try std.fmt.format(writer, "{{ {d}: .offset = {d}", .{ self.local_sym_index, self.offset }); + if (self.stab) |stab| { + try std.fmt.format(writer, ", .stab = {any}", .{stab}); + } + try std.fmt.format(writer, " }}", .{}); + } +}; + +pub const Stab = union(enum) { + function: u64, + static, + global, + + pub fn asNlists(stab: Stab, local_sym_index: u32, macho_file: anytype) ![]macho.nlist_64 { + var nlists = std.ArrayList(macho.nlist_64).init(macho_file.base.allocator); + defer nlists.deinit(); + + const sym = macho_file.locals.items[local_sym_index]; + switch (stab) { + .function => |size| { + try nlists.ensureUnusedCapacity(4); + nlists.appendAssumeCapacity(.{ + .n_strx = 0, + .n_type = macho.N_BNSYM, + .n_sect = sym.n_sect, + .n_desc = 0, + .n_value = sym.n_value, + }); + nlists.appendAssumeCapacity(.{ + .n_strx = sym.n_strx, + .n_type = macho.N_FUN, + .n_sect = sym.n_sect, + .n_desc = 0, + .n_value = sym.n_value, + }); + nlists.appendAssumeCapacity(.{ + .n_strx = 0, + .n_type = macho.N_FUN, + .n_sect = 0, + .n_desc = 0, + .n_value = size, + }); + nlists.appendAssumeCapacity(.{ + .n_strx = 0, + .n_type = macho.N_ENSYM, + .n_sect = sym.n_sect, + .n_desc = 0, + .n_value = size, + }); + }, + .global => { + try nlists.append(.{ + .n_strx = sym.n_strx, + .n_type = macho.N_GSYM, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }); + }, + .static => { + try nlists.append(.{ + .n_strx = sym.n_strx, + .n_type = macho.N_STSYM, + .n_sect = sym.n_sect, + .n_desc = 0, + .n_value = sym.n_value, + }); + }, + } + + return nlists.toOwnedSlice(); + } +}; + +pub const Relocation = struct { + /// Offset within the `block`s code buffer. + /// Note relocation size can be inferred by relocation's kind. + offset: u32, + + where: enum { + local, + import, + }, + + where_index: u32, + + payload: union(enum) { + unsigned: Unsigned, + branch: Branch, + page: Page, + page_off: PageOff, + pointer_to_got: PointerToGot, + signed: Signed, + load: Load, + }, + + const ResolveArgs = struct { + block: *TextBlock, + offset: u32, + source_addr: u64, + target_addr: u64, + macho_file: *MachO, + }; + + pub const Unsigned = struct { + subtractor: ?u32, + + /// Addend embedded directly in the relocation slot + addend: i64, + + /// Extracted from r_length: + /// => 3 implies true + /// => 2 implies false + /// => * is unreachable + is_64bit: bool, + + pub fn resolve(self: Unsigned, args: ResolveArgs) !void { + const result = blk: { + if (self.subtractor) |subtractor| { + const sym = args.macho_file.locals.items[subtractor]; + break :blk @intCast(i64, args.target_addr) - @intCast(i64, sym.n_value) + self.addend; + } else { + break :blk @intCast(i64, args.target_addr) + self.addend; + } + }; + + if (self.is_64bit) { + mem.writeIntLittle(u64, args.block.code.items[args.offset..][0..8], @bitCast(u64, result)); + } else { + mem.writeIntLittle(u32, args.block.code.items[args.offset..][0..4], @truncate(u32, @bitCast(u64, result))); + } + } + + pub fn format(self: Unsigned, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + _ = fmt; + _ = options; + try std.fmt.format(writer, "Unsigned {{ ", .{}); + if (self.subtractor) |sub| { + try std.fmt.format(writer, ".subtractor = {}, ", .{sub}); + } + try std.fmt.format(writer, ".addend = {}, ", .{self.addend}); + const length: usize = if (self.is_64bit) 8 else 4; + try std.fmt.format(writer, ".length = {}, ", .{length}); + try std.fmt.format(writer, "}}", .{}); + } + }; + + pub const Branch = struct { + arch: Arch, + + pub fn resolve(self: Branch, args: ResolveArgs) !void { + switch (self.arch) { + .aarch64 => { + const displacement = try math.cast( + i28, + @intCast(i64, args.target_addr) - @intCast(i64, args.source_addr), + ); + const code = args.block.code.items[args.offset..][0..4]; + var inst = aarch64.Instruction{ + .unconditional_branch_immediate = mem.bytesToValue(meta.TagPayload( + aarch64.Instruction, + aarch64.Instruction.unconditional_branch_immediate, + ), code), + }; + inst.unconditional_branch_immediate.imm26 = @truncate(u26, @bitCast(u28, displacement >> 2)); + mem.writeIntLittle(u32, code, inst.toU32()); + }, + .x86_64 => { + const displacement = try math.cast( + i32, + @intCast(i64, args.target_addr) - @intCast(i64, args.source_addr) - 4, + ); + mem.writeIntLittle(u32, args.block.code.items[args.offset..][0..4], @bitCast(u32, displacement)); + }, + else => return error.UnsupportedCpuArchitecture, + } + } + + pub fn format(self: Branch, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + _ = self; + _ = fmt; + _ = options; + try std.fmt.format(writer, "Branch {{}}", .{}); + } + }; + + pub const Page = struct { + kind: enum { + page, + got, + tlvp, + }, + addend: u32 = 0, + + pub fn resolve(self: Page, args: ResolveArgs) !void { + const target_addr = args.target_addr + self.addend; + const source_page = @intCast(i32, args.source_addr >> 12); + const target_page = @intCast(i32, target_addr >> 12); + const pages = @bitCast(u21, @intCast(i21, target_page - source_page)); + + const code = args.block.code.items[args.offset..][0..4]; + var inst = aarch64.Instruction{ + .pc_relative_address = mem.bytesToValue(meta.TagPayload( + aarch64.Instruction, + aarch64.Instruction.pc_relative_address, + ), code), + }; + inst.pc_relative_address.immhi = @truncate(u19, pages >> 2); + inst.pc_relative_address.immlo = @truncate(u2, pages); + + mem.writeIntLittle(u32, code, inst.toU32()); + } + + pub fn format(self: Page, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + _ = fmt; + _ = options; + try std.fmt.format(writer, "Page {{ ", .{}); + switch (self.kind) { + .page => {}, + .got => { + try std.fmt.format(writer, ".got, ", .{}); + }, + .tlvp => { + try std.fmt.format(writer, ".tlvp", .{}); + }, + } + try std.fmt.format(writer, ".addend = {}, ", .{self.addend}); + try std.fmt.format(writer, "}}", .{}); + } + }; + + pub const PageOff = struct { + kind: enum { + page, + got, + tlvp, + }, + addend: u32 = 0, + op_kind: ?OpKind = null, + + pub const OpKind = enum { + arithmetic, + load, + }; + + pub fn resolve(self: PageOff, args: ResolveArgs) !void { + const code = args.block.code.items[args.offset..][0..4]; + + switch (self.kind) { + .page => { + const target_addr = args.target_addr + self.addend; + const narrowed = @truncate(u12, target_addr); + + const op_kind = self.op_kind orelse unreachable; + var inst: aarch64.Instruction = blk: { + switch (op_kind) { + .arithmetic => { + break :blk .{ + .add_subtract_immediate = mem.bytesToValue(meta.TagPayload( + aarch64.Instruction, + aarch64.Instruction.add_subtract_immediate, + ), code), + }; + }, + .load => { + break :blk .{ + .load_store_register = mem.bytesToValue(meta.TagPayload( + aarch64.Instruction, + aarch64.Instruction.load_store_register, + ), code), + }; + }, + } + }; + + if (op_kind == .arithmetic) { + inst.add_subtract_immediate.imm12 = narrowed; + } else { + const offset: u12 = blk: { + if (inst.load_store_register.size == 0) { + if (inst.load_store_register.v == 1) { + // 128-bit SIMD is scaled by 16. + break :blk try math.divExact(u12, narrowed, 16); + } + // Otherwise, 8-bit SIMD or ldrb. + break :blk narrowed; + } else { + const denom: u4 = try math.powi(u4, 2, inst.load_store_register.size); + break :blk try math.divExact(u12, narrowed, denom); + } + }; + inst.load_store_register.offset = offset; + } + + mem.writeIntLittle(u32, code, inst.toU32()); + }, + .got => { + const narrowed = @truncate(u12, args.target_addr); + var inst: aarch64.Instruction = .{ + .load_store_register = mem.bytesToValue(meta.TagPayload( + aarch64.Instruction, + aarch64.Instruction.load_store_register, + ), code), + }; + const offset = try math.divExact(u12, narrowed, 8); + inst.load_store_register.offset = offset; + mem.writeIntLittle(u32, code, inst.toU32()); + }, + .tlvp => { + const RegInfo = struct { + rd: u5, + rn: u5, + size: u1, + }; + const reg_info: RegInfo = blk: { + if (isArithmeticOp(code)) { + const inst = mem.bytesToValue(meta.TagPayload( + aarch64.Instruction, + aarch64.Instruction.add_subtract_immediate, + ), code); + break :blk .{ + .rd = inst.rd, + .rn = inst.rn, + .size = inst.sf, + }; + } else { + const inst = mem.bytesToValue(meta.TagPayload( + aarch64.Instruction, + aarch64.Instruction.load_store_register, + ), code); + break :blk .{ + .rd = inst.rt, + .rn = inst.rn, + .size = @truncate(u1, inst.size), + }; + } + }; + const narrowed = @truncate(u12, args.target_addr); + var inst = aarch64.Instruction{ + .add_subtract_immediate = .{ + .rd = reg_info.rd, + .rn = reg_info.rn, + .imm12 = narrowed, + .sh = 0, + .s = 0, + .op = 0, + .sf = reg_info.size, + }, + }; + mem.writeIntLittle(u32, code, inst.toU32()); + }, + } + } + + pub fn format(self: PageOff, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + _ = fmt; + _ = options; + try std.fmt.format(writer, "PageOff {{ ", .{}); + switch (self.kind) { + .page => {}, + .got => { + try std.fmt.format(writer, ".got, ", .{}); + }, + .tlvp => { + try std.fmt.format(writer, ".tlvp, ", .{}); + }, + } + try std.fmt.format(writer, ".addend = {}, ", .{self.addend}); + try std.fmt.format(writer, ".op_kind = {s}, ", .{self.op_kind}); + try std.fmt.format(writer, "}}", .{}); + } + }; + + pub const PointerToGot = struct { + pub fn resolve(_: PointerToGot, args: ResolveArgs) !void { + const result = try math.cast(i32, @intCast(i64, args.target_addr) - @intCast(i64, args.source_addr)); + mem.writeIntLittle(u32, args.block.code.items[args.offset..][0..4], @bitCast(u32, result)); + } + + pub fn format(self: PointerToGot, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + _ = self; + _ = fmt; + _ = options; + try std.fmt.format(writer, "PointerToGot {{}}", .{}); + } + }; + + pub const Signed = struct { + addend: i64, + correction: i4, + + pub fn resolve(self: Signed, args: ResolveArgs) !void { + const target_addr = @intCast(i64, args.target_addr) + self.addend; + const displacement = try math.cast( + i32, + target_addr - @intCast(i64, args.source_addr) - self.correction - 4, + ); + mem.writeIntLittle(u32, args.block.code.items[args.offset..][0..4], @bitCast(u32, displacement)); + } + + pub fn format(self: Signed, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + _ = fmt; + _ = options; + try std.fmt.format(writer, "Signed {{ ", .{}); + try std.fmt.format(writer, ".addend = {}, ", .{self.addend}); + try std.fmt.format(writer, ".correction = {}, ", .{self.correction}); + try std.fmt.format(writer, "}}", .{}); + } + }; + + pub const Load = struct { + kind: enum { + got, + tlvp, + }, + addend: i32 = 0, + + pub fn resolve(self: Load, args: ResolveArgs) !void { + if (self.kind == .tlvp) { + // We need to rewrite the opcode from movq to leaq. + args.block.code.items[args.offset - 2] = 0x8d; + } + const displacement = try math.cast( + i32, + @intCast(i64, args.target_addr) - @intCast(i64, args.source_addr) - 4 + self.addend, + ); + mem.writeIntLittle(u32, args.block.code.items[args.offset..][0..4], @bitCast(u32, displacement)); + } + + pub fn format(self: Load, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + _ = fmt; + _ = options; + try std.fmt.format(writer, "Load {{ ", .{}); + try std.fmt.format(writer, "{s}, ", .{self.kind}); + try std.fmt.format(writer, ".addend = {}, ", .{self.addend}); + try std.fmt.format(writer, "}}", .{}); + } + }; + + pub fn resolve(self: Relocation, args: ResolveArgs) !void { + switch (self.payload) { + .unsigned => |unsigned| try unsigned.resolve(args), + .branch => |branch| try branch.resolve(args), + .page => |page| try page.resolve(args), + .page_off => |page_off| try page_off.resolve(args), + .pointer_to_got => |pointer_to_got| try pointer_to_got.resolve(args), + .signed => |signed| try signed.resolve(args), + .load => |load| try load.resolve(args), + } + } + + pub fn format(self: Relocation, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + try std.fmt.format(writer, "Relocation {{ ", .{}); + try std.fmt.format(writer, ".offset = {}, ", .{self.offset}); + try std.fmt.format(writer, ".where = {}, ", .{self.where}); + try std.fmt.format(writer, ".where_index = {d}, ", .{self.where_index}); + + switch (self.payload) { + .unsigned => |unsigned| try unsigned.format(fmt, options, writer), + .branch => |branch| try branch.format(fmt, options, writer), + .page => |page| try page.format(fmt, options, writer), + .page_off => |page_off| try page_off.format(fmt, options, writer), + .pointer_to_got => |pointer_to_got| try pointer_to_got.format(fmt, options, writer), + .signed => |signed| try signed.format(fmt, options, writer), + .load => |load| try load.format(fmt, options, writer), + } + + try std.fmt.format(writer, "}}", .{}); + } +}; + +pub const empty = TextBlock{ + .local_sym_index = 0, + .size = 0, + .alignment = 0, + .prev = null, + .next = null, + .dbg_info_prev = null, + .dbg_info_next = null, + .dbg_info_off = undefined, + .dbg_info_len = undefined, +}; + +pub fn deinit(self: *TextBlock, allocator: *Allocator) void { + self.dices.deinit(allocator); + self.bindings.deinit(allocator); + self.rebases.deinit(allocator); + self.relocs.deinit(allocator); + self.contained.deinit(allocator); + self.aliases.deinit(allocator); + self.code.deinit(allocator); +} + +/// Returns how much room there is to grow in virtual address space. +/// File offset relocation happens transparently, so it is not included in +/// this calculation. +pub fn capacity(self: TextBlock, macho_file: MachO) u64 { + const self_sym = macho_file.locals.items[self.local_sym_index]; + if (self.next) |next| { + const next_sym = macho_file.locals.items[next.local_sym_index]; + return next_sym.n_value - self_sym.n_value; + } else { + // We are the last block. + // The capacity is limited only by virtual address space. + return std.math.maxInt(u64) - self_sym.n_value; + } +} + +pub fn freeListEligible(self: TextBlock, macho_file: MachO) bool { + // No need to keep a free list node for the last block. + const next = self.next orelse return false; + const self_sym = macho_file.locals.items[self.local_sym_index]; + const next_sym = macho_file.locals.items[next.local_sym_index]; + const cap = next_sym.n_value - self_sym.n_value; + const ideal_cap = MachO.padToIdeal(self.size); + if (cap <= ideal_cap) return false; + const surplus = cap - ideal_cap; + return surplus >= MachO.min_text_capacity; +} + +const RelocContext = struct { + base_addr: u64 = 0, + macho_file: *MachO, +}; + +fn initRelocFromObject(rel: macho.relocation_info, object: *Object, ctx: RelocContext) !Relocation { + var parsed_rel = Relocation{ + .offset = @intCast(u32, @intCast(u64, rel.r_address) - ctx.base_addr), + .where = undefined, + .where_index = undefined, + .payload = undefined, + }; + + if (rel.r_extern == 0) { + const sect_id = @intCast(u16, rel.r_symbolnum - 1); + + const local_sym_index = object.sections_as_symbols.get(sect_id) orelse blk: { + const seg = object.load_commands.items[object.segment_cmd_index.?].Segment; + const sect = seg.sections.items[sect_id]; + const match = (try ctx.macho_file.getMatchingSection(sect)) orelse unreachable; + const local_sym_index = @intCast(u32, ctx.macho_file.locals.items.len); + const sym_name = try std.fmt.allocPrint(ctx.macho_file.base.allocator, "l_{s}_{s}_{s}", .{ + object.name.?, + commands.segmentName(sect), + commands.sectionName(sect), + }); + defer ctx.macho_file.base.allocator.free(sym_name); + + try ctx.macho_file.locals.append(ctx.macho_file.base.allocator, .{ + .n_strx = try ctx.macho_file.makeString(sym_name), + .n_type = macho.N_SECT, + .n_sect = ctx.macho_file.section_to_ordinal.get(match) orelse unreachable, + .n_desc = 0, + .n_value = sect.addr, + }); + try object.sections_as_symbols.putNoClobber(object.allocator, sect_id, local_sym_index); + break :blk local_sym_index; + }; + + parsed_rel.where = .local; + parsed_rel.where_index = local_sym_index; + } else { + const sym = object.symtab.items[rel.r_symbolnum]; + const sym_name = object.getString(sym.n_strx); + + if (MachO.symbolIsSect(sym) and !MachO.symbolIsExt(sym)) { + const where_index = object.symbol_mapping.get(rel.r_symbolnum) orelse unreachable; + parsed_rel.where = .local; + parsed_rel.where_index = where_index; + } else { + const n_strx = ctx.macho_file.strtab_dir.getAdapted(@as([]const u8, sym_name), MachO.StringSliceAdapter{ + .strtab = &ctx.macho_file.strtab, + }) orelse unreachable; + const resolv = ctx.macho_file.symbol_resolver.get(n_strx) orelse unreachable; + switch (resolv.where) { + .global => { + parsed_rel.where = .local; + parsed_rel.where_index = resolv.local_sym_index; + }, + .import => { + parsed_rel.where = .import; + parsed_rel.where_index = resolv.where_index; + }, + else => unreachable, + } + } + } + + return parsed_rel; +} + +pub fn parseRelocsFromObject( + self: *TextBlock, + allocator: *Allocator, + relocs: []macho.relocation_info, + object: *Object, + ctx: RelocContext, +) !void { + const filtered_relocs = filterRelocs(relocs, ctx.base_addr, ctx.base_addr + self.size); + var it = RelocIterator{ + .buffer = filtered_relocs, + }; + + var addend: u32 = 0; + var subtractor: ?u32 = null; + + while (it.next()) |rel| { + if (isAddend(rel, object.arch.?)) { + // Addend is not a relocation with effect on the TextBlock, so + // parse it and carry on. + assert(addend == 0); // Oh no, addend was not reset! + addend = rel.r_symbolnum; + + // Verify ADDEND is followed by a load. + const next = @intToEnum(macho.reloc_type_arm64, it.peek().r_type); + switch (next) { + .ARM64_RELOC_PAGE21, .ARM64_RELOC_PAGEOFF12 => {}, + else => { + log.err("unexpected relocation type: expected PAGE21 or PAGEOFF12, found {s}", .{next}); + return error.UnexpectedRelocationType; + }, + } + continue; + } + + if (isSubtractor(rel, object.arch.?)) { + // Subtractor is not a relocation with effect on the TextBlock, so + // parse it and carry on. + assert(subtractor == null); // Oh no, subtractor was not reset! + assert(rel.r_extern == 1); + const sym = object.symtab.items[rel.r_symbolnum]; + const sym_name = object.getString(sym.n_strx); + + if (MachO.symbolIsSect(sym) and !MachO.symbolIsExt(sym)) { + const where_index = object.symbol_mapping.get(rel.r_symbolnum) orelse unreachable; + subtractor = where_index; + } else { + const n_strx = ctx.macho_file.strtab_dir.getAdapted(@as([]const u8, sym_name), MachO.StringSliceAdapter{ + .strtab = &ctx.macho_file.strtab, + }) orelse unreachable; + const resolv = ctx.macho_file.symbol_resolver.get(n_strx) orelse unreachable; + assert(resolv.where == .global); + subtractor = resolv.local_sym_index; + } + + // Verify SUBTRACTOR is followed by UNSIGNED. + switch (object.arch.?) { + .aarch64 => { + const next = @intToEnum(macho.reloc_type_arm64, it.peek().r_type); + if (next != .ARM64_RELOC_UNSIGNED) { + log.err("unexpected relocation type: expected UNSIGNED, found {s}", .{next}); + return error.UnexpectedRelocationType; + } + }, + .x86_64 => { + const next = @intToEnum(macho.reloc_type_x86_64, it.peek().r_type); + if (next != .X86_64_RELOC_UNSIGNED) { + log.err("unexpected relocation type: expected UNSIGNED, found {s}", .{next}); + return error.UnexpectedRelocationType; + } + }, + else => unreachable, + } + continue; + } + + var parsed_rel = try initRelocFromObject(rel, object, ctx); + + switch (object.arch.?) { + .aarch64 => { + const rel_type = @intToEnum(macho.reloc_type_arm64, rel.r_type); + switch (rel_type) { + .ARM64_RELOC_ADDEND => unreachable, + .ARM64_RELOC_SUBTRACTOR => unreachable, + .ARM64_RELOC_BRANCH26 => { + self.parseBranch(rel, &parsed_rel, ctx); + }, + .ARM64_RELOC_UNSIGNED => { + self.parseUnsigned(rel, &parsed_rel, subtractor, ctx); + subtractor = null; + }, + .ARM64_RELOC_PAGE21, + .ARM64_RELOC_GOT_LOAD_PAGE21, + .ARM64_RELOC_TLVP_LOAD_PAGE21, + => { + self.parsePage(rel, &parsed_rel, addend); + if (rel_type == .ARM64_RELOC_PAGE21) + addend = 0; + }, + .ARM64_RELOC_PAGEOFF12, + .ARM64_RELOC_GOT_LOAD_PAGEOFF12, + .ARM64_RELOC_TLVP_LOAD_PAGEOFF12, + => { + self.parsePageOff(rel, &parsed_rel, addend); + if (rel_type == .ARM64_RELOC_PAGEOFF12) + addend = 0; + }, + .ARM64_RELOC_POINTER_TO_GOT => { + self.parsePointerToGot(rel, &parsed_rel); + }, + } + }, + .x86_64 => { + switch (@intToEnum(macho.reloc_type_x86_64, rel.r_type)) { + .X86_64_RELOC_SUBTRACTOR => unreachable, + .X86_64_RELOC_BRANCH => { + self.parseBranch(rel, &parsed_rel, ctx); + }, + .X86_64_RELOC_UNSIGNED => { + self.parseUnsigned(rel, &parsed_rel, subtractor, ctx); + subtractor = null; + }, + .X86_64_RELOC_SIGNED, + .X86_64_RELOC_SIGNED_1, + .X86_64_RELOC_SIGNED_2, + .X86_64_RELOC_SIGNED_4, + => { + self.parseSigned(rel, &parsed_rel, ctx); + }, + .X86_64_RELOC_GOT_LOAD, + .X86_64_RELOC_GOT, + .X86_64_RELOC_TLV, + => { + self.parseLoad(rel, &parsed_rel); + }, + } + }, + else => unreachable, + } + + try self.relocs.append(allocator, parsed_rel); + + const is_via_got = switch (parsed_rel.payload) { + .pointer_to_got => true, + .load => |load| load.kind == .got, + .page => |page| page.kind == .got, + .page_off => |page_off| page_off.kind == .got, + else => false, + }; + + if (is_via_got) blk: { + const key = MachO.GotIndirectionKey{ + .where = switch (parsed_rel.where) { + .local => .local, + .import => .import, + }, + .where_index = parsed_rel.where_index, + }; + if (ctx.macho_file.got_entries_map.contains(key)) break :blk; + + const got_index = @intCast(u32, ctx.macho_file.got_entries.items.len); + try ctx.macho_file.got_entries.append(ctx.macho_file.base.allocator, key); + try ctx.macho_file.got_entries_map.putNoClobber(ctx.macho_file.base.allocator, key, got_index); + } else if (parsed_rel.payload == .unsigned) { + switch (parsed_rel.where) { + .import => { + try self.bindings.append(allocator, .{ + .local_sym_index = parsed_rel.where_index, + .offset = parsed_rel.offset, + }); + }, + .local => { + const source_sym = ctx.macho_file.locals.items[self.local_sym_index]; + const match = ctx.macho_file.section_ordinals.items[source_sym.n_sect]; + const seg = ctx.macho_file.load_commands.items[match.seg].Segment; + const sect = seg.sections.items[match.sect]; + const sect_type = commands.sectionType(sect); + + const should_rebase = rebase: { + if (!parsed_rel.payload.unsigned.is_64bit) break :rebase false; + + // TODO actually, a check similar to what dyld is doing, that is, verifying + // that the segment is writable should be enough here. + const is_right_segment = blk: { + if (ctx.macho_file.data_segment_cmd_index) |idx| { + if (match.seg == idx) { + break :blk true; + } + } + if (ctx.macho_file.data_const_segment_cmd_index) |idx| { + if (match.seg == idx) { + break :blk true; + } + } + break :blk false; + }; + + if (!is_right_segment) break :rebase false; + if (sect_type != macho.S_LITERAL_POINTERS and + sect_type != macho.S_REGULAR and + sect_type != macho.S_MOD_INIT_FUNC_POINTERS and + sect_type != macho.S_MOD_TERM_FUNC_POINTERS) + { + break :rebase false; + } + + break :rebase true; + }; + + if (should_rebase) { + try self.rebases.append(allocator, parsed_rel.offset); + } + }, + } + } else if (parsed_rel.payload == .branch) blk: { + if (parsed_rel.where != .import) break :blk; + if (ctx.macho_file.stubs_map.contains(parsed_rel.where_index)) break :blk; + + const stubs_index = @intCast(u32, ctx.macho_file.stubs.items.len); + try ctx.macho_file.stubs.append(ctx.macho_file.base.allocator, parsed_rel.where_index); + try ctx.macho_file.stubs_map.putNoClobber(ctx.macho_file.base.allocator, parsed_rel.where_index, stubs_index); + } + } +} + +fn isAddend(rel: macho.relocation_info, arch: Arch) bool { + if (arch != .aarch64) return false; + return @intToEnum(macho.reloc_type_arm64, rel.r_type) == .ARM64_RELOC_ADDEND; +} + +fn isSubtractor(rel: macho.relocation_info, arch: Arch) bool { + return switch (arch) { + .aarch64 => @intToEnum(macho.reloc_type_arm64, rel.r_type) == .ARM64_RELOC_SUBTRACTOR, + .x86_64 => @intToEnum(macho.reloc_type_x86_64, rel.r_type) == .X86_64_RELOC_SUBTRACTOR, + else => unreachable, + }; +} + +fn parseUnsigned( + self: TextBlock, + rel: macho.relocation_info, + out: *Relocation, + subtractor: ?u32, + ctx: RelocContext, +) void { + assert(rel.r_pcrel == 0); + + const is_64bit: bool = switch (rel.r_length) { + 3 => true, + 2 => false, + else => unreachable, + }; + + var addend: i64 = if (is_64bit) + mem.readIntLittle(i64, self.code.items[out.offset..][0..8]) + else + mem.readIntLittle(i32, self.code.items[out.offset..][0..4]); + + if (rel.r_extern == 0) { + assert(out.where == .local); + const target_sym = ctx.macho_file.locals.items[out.where_index]; + addend -= @intCast(i64, target_sym.n_value); + } + + out.payload = .{ + .unsigned = .{ + .subtractor = subtractor, + .is_64bit = is_64bit, + .addend = addend, + }, + }; +} + +fn parseBranch(self: TextBlock, rel: macho.relocation_info, out: *Relocation, ctx: RelocContext) void { + _ = self; + assert(rel.r_pcrel == 1); + assert(rel.r_length == 2); + + out.payload = .{ + .branch = .{ + .arch = ctx.macho_file.base.options.target.cpu.arch, + }, + }; +} + +fn parsePage(self: TextBlock, rel: macho.relocation_info, out: *Relocation, addend: u32) void { + _ = self; + assert(rel.r_pcrel == 1); + assert(rel.r_length == 2); + + out.payload = .{ + .page = .{ + .kind = switch (@intToEnum(macho.reloc_type_arm64, rel.r_type)) { + .ARM64_RELOC_PAGE21 => .page, + .ARM64_RELOC_GOT_LOAD_PAGE21 => .got, + .ARM64_RELOC_TLVP_LOAD_PAGE21 => .tlvp, + else => unreachable, + }, + .addend = addend, + }, + }; +} + +fn parsePageOff(self: TextBlock, rel: macho.relocation_info, out: *Relocation, addend: u32) void { + assert(rel.r_pcrel == 0); + assert(rel.r_length == 2); + + const rel_type = @intToEnum(macho.reloc_type_arm64, rel.r_type); + const op_kind: ?Relocation.PageOff.OpKind = blk: { + if (rel_type != .ARM64_RELOC_PAGEOFF12) break :blk null; + const op_kind: Relocation.PageOff.OpKind = if (isArithmeticOp(self.code.items[out.offset..][0..4])) + .arithmetic + else + .load; + break :blk op_kind; + }; + + out.payload = .{ + .page_off = .{ + .kind = switch (rel_type) { + .ARM64_RELOC_PAGEOFF12 => .page, + .ARM64_RELOC_GOT_LOAD_PAGEOFF12 => .got, + .ARM64_RELOC_TLVP_LOAD_PAGEOFF12 => .tlvp, + else => unreachable, + }, + .addend = addend, + .op_kind = op_kind, + }, + }; +} + +fn parsePointerToGot(self: TextBlock, rel: macho.relocation_info, out: *Relocation) void { + _ = self; + assert(rel.r_pcrel == 1); + assert(rel.r_length == 2); + + out.payload = .{ + .pointer_to_got = .{}, + }; +} + +fn parseSigned(self: TextBlock, rel: macho.relocation_info, out: *Relocation, ctx: RelocContext) void { + assert(rel.r_pcrel == 1); + assert(rel.r_length == 2); + + const rel_type = @intToEnum(macho.reloc_type_x86_64, rel.r_type); + const correction: i4 = switch (rel_type) { + .X86_64_RELOC_SIGNED => 0, + .X86_64_RELOC_SIGNED_1 => 1, + .X86_64_RELOC_SIGNED_2 => 2, + .X86_64_RELOC_SIGNED_4 => 4, + else => unreachable, + }; + var addend: i64 = mem.readIntLittle(i32, self.code.items[out.offset..][0..4]) + correction; + + if (rel.r_extern == 0) { + const source_sym = ctx.macho_file.locals.items[self.local_sym_index]; + const target_sym = switch (out.where) { + .local => ctx.macho_file.locals.items[out.where_index], + .import => ctx.macho_file.imports.items[out.where_index], + }; + addend = @intCast(i64, source_sym.n_value + out.offset + 4) + addend - @intCast(i64, target_sym.n_value); + } + + out.payload = .{ + .signed = .{ + .correction = correction, + .addend = addend, + }, + }; +} + +fn parseLoad(self: TextBlock, rel: macho.relocation_info, out: *Relocation) void { + assert(rel.r_pcrel == 1); + assert(rel.r_length == 2); + + const rel_type = @intToEnum(macho.reloc_type_x86_64, rel.r_type); + const addend: i32 = if (rel_type == .X86_64_RELOC_GOT) + mem.readIntLittle(i32, self.code.items[out.offset..][0..4]) + else + 0; + + out.payload = .{ + .load = .{ + .kind = switch (rel_type) { + .X86_64_RELOC_GOT_LOAD, .X86_64_RELOC_GOT => .got, + .X86_64_RELOC_TLV => .tlvp, + else => unreachable, + }, + .addend = addend, + }, + }; +} + +pub fn resolveRelocs(self: *TextBlock, macho_file: *MachO) !void { + for (self.relocs.items) |rel| { + log.debug("relocating {}", .{rel}); + + const source_addr = blk: { + const sym = macho_file.locals.items[self.local_sym_index]; + break :blk sym.n_value + rel.offset; + }; + const target_addr = blk: { + const is_via_got = switch (rel.payload) { + .pointer_to_got => true, + .page => |page| page.kind == .got, + .page_off => |page_off| page_off.kind == .got, + .load => |load| load.kind == .got, + else => false, + }; + + if (is_via_got) { + const dc_seg = macho_file.load_commands.items[macho_file.data_const_segment_cmd_index.?].Segment; + const got = dc_seg.sections.items[macho_file.got_section_index.?]; + const got_index = macho_file.got_entries_map.get(.{ + .where = switch (rel.where) { + .local => .local, + .import => .import, + }, + .where_index = rel.where_index, + }) orelse { + const sym = switch (rel.where) { + .local => macho_file.locals.items[rel.where_index], + .import => macho_file.imports.items[rel.where_index], + }; + log.err("expected GOT entry for symbol '{s}'", .{macho_file.getString(sym.n_strx)}); + log.err(" this is an internal linker error", .{}); + return error.FailedToResolveRelocationTarget; + }; + break :blk got.addr + got_index * @sizeOf(u64); + } + + switch (rel.where) { + .local => { + const sym = macho_file.locals.items[rel.where_index]; + const is_tlv = is_tlv: { + const source_sym = macho_file.locals.items[self.local_sym_index]; + const match = macho_file.section_ordinals.items[source_sym.n_sect]; + const seg = macho_file.load_commands.items[match.seg].Segment; + const sect = seg.sections.items[match.sect]; + break :is_tlv commands.sectionType(sect) == macho.S_THREAD_LOCAL_VARIABLES; + }; + if (is_tlv) { + // For TLV relocations, the value specified as a relocation is the displacement from the + // TLV initializer (either value in __thread_data or zero-init in __thread_bss) to the first + // defined TLV template init section in the following order: + // * wrt to __thread_data if defined, then + // * wrt to __thread_bss + const seg = macho_file.load_commands.items[macho_file.data_segment_cmd_index.?].Segment; + const base_address = inner: { + if (macho_file.tlv_data_section_index) |i| { + break :inner seg.sections.items[i].addr; + } else if (macho_file.tlv_bss_section_index) |i| { + break :inner seg.sections.items[i].addr; + } else { + log.err("threadlocal variables present but no initializer sections found", .{}); + log.err(" __thread_data not found", .{}); + log.err(" __thread_bss not found", .{}); + return error.FailedToResolveRelocationTarget; + } + }; + break :blk sym.n_value - base_address; + } + + break :blk sym.n_value; + }, + .import => { + const stubs_index = macho_file.stubs_map.get(rel.where_index) orelse { + // TODO verify in TextBlock that the symbol is indeed dynamically bound. + break :blk 0; // Dynamically bound by dyld. + }; + const segment = macho_file.load_commands.items[macho_file.text_segment_cmd_index.?].Segment; + const stubs = segment.sections.items[macho_file.stubs_section_index.?]; + break :blk stubs.addr + stubs_index * stubs.reserved2; + }, + } + }; + + log.debug(" | source_addr = 0x{x}", .{source_addr}); + log.debug(" | target_addr = 0x{x}", .{target_addr}); + + try rel.resolve(.{ + .block = self, + .offset = rel.offset, + .source_addr = source_addr, + .target_addr = target_addr, + .macho_file = macho_file, + }); + } +} + +pub fn format(self: TextBlock, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + _ = fmt; + _ = options; + try std.fmt.format(writer, "TextBlock {{ ", .{}); + try std.fmt.format(writer, ".local_sym_index = {d}, ", .{self.local_sym_index}); + try std.fmt.format(writer, ".aliases = {any}, ", .{self.aliases.items}); + try std.fmt.format(writer, ".contained = {any}, ", .{self.contained.items}); + try std.fmt.format(writer, ".code = {*}, ", .{self.code.items}); + try std.fmt.format(writer, ".size = {d}, ", .{self.size}); + try std.fmt.format(writer, ".alignment = {d}, ", .{self.alignment}); + try std.fmt.format(writer, ".relocs = {any}, ", .{self.relocs.items}); + try std.fmt.format(writer, ".rebases = {any}, ", .{self.rebases.items}); + try std.fmt.format(writer, ".bindings = {any}, ", .{self.bindings.items}); + try std.fmt.format(writer, ".dices = {any}, ", .{self.dices.items}); + if (self.stab) |stab| { + try std.fmt.format(writer, ".stab = {any}, ", .{stab}); + } + try std.fmt.format(writer, "}}", .{}); +} + +const RelocIterator = struct { + buffer: []const macho.relocation_info, + index: i32 = -1, + + pub fn next(self: *RelocIterator) ?macho.relocation_info { + self.index += 1; + if (self.index < self.buffer.len) { + return self.buffer[@intCast(u32, self.index)]; + } + return null; + } + + pub fn peek(self: RelocIterator) macho.relocation_info { + assert(self.index + 1 < self.buffer.len); + return self.buffer[@intCast(u32, self.index + 1)]; + } +}; + +fn filterRelocs(relocs: []macho.relocation_info, start_addr: u64, end_addr: u64) []macho.relocation_info { + const Predicate = struct { + addr: u64, + + pub fn predicate(self: @This(), rel: macho.relocation_info) bool { + return rel.r_address < self.addr; + } + }; + + const start = MachO.findFirst(macho.relocation_info, relocs, 0, Predicate{ .addr = end_addr }); + const end = MachO.findFirst(macho.relocation_info, relocs, start, Predicate{ .addr = start_addr }); + + return relocs[start..end]; +} + +inline fn isArithmeticOp(inst: *const [4]u8) bool { + const group_decode = @truncate(u5, inst[3]); + return ((group_decode >> 2) == 4); +} diff --git a/src/link/MachO/Zld.zig b/src/link/MachO/Zld.zig deleted file mode 100644 index 454b5dbcfe..0000000000 --- a/src/link/MachO/Zld.zig +++ /dev/null @@ -1,3209 +0,0 @@ -const Zld = @This(); - -const std = @import("std"); -const assert = std.debug.assert; -const leb = std.leb; -const mem = std.mem; -const meta = std.meta; -const fs = std.fs; -const macho = std.macho; -const math = std.math; -const log = std.log.scoped(.zld); -const aarch64 = @import("../../codegen/aarch64.zig"); -const reloc = @import("reloc.zig"); - -const Allocator = mem.Allocator; -const Archive = @import("Archive.zig"); -const CodeSignature = @import("CodeSignature.zig"); -const Dylib = @import("Dylib.zig"); -const Object = @import("Object.zig"); -const Symbol = @import("Symbol.zig"); -const Trie = @import("Trie.zig"); - -usingnamespace @import("commands.zig"); -usingnamespace @import("bind.zig"); - -allocator: *Allocator, - -target: ?std.Target = null, -page_size: ?u16 = null, -file: ?fs.File = null, -output: ?Output = null, - -// TODO these args will become obselete once Zld is coalesced with incremental -// linker. -stack_size: u64 = 0, - -objects: std.ArrayListUnmanaged(*Object) = .{}, -archives: std.ArrayListUnmanaged(*Archive) = .{}, -dylibs: std.ArrayListUnmanaged(*Dylib) = .{}, - -next_dylib_ordinal: u16 = 1, - -load_commands: std.ArrayListUnmanaged(LoadCommand) = .{}, - -pagezero_segment_cmd_index: ?u16 = null, -text_segment_cmd_index: ?u16 = null, -data_const_segment_cmd_index: ?u16 = null, -data_segment_cmd_index: ?u16 = null, -linkedit_segment_cmd_index: ?u16 = null, -dyld_info_cmd_index: ?u16 = null, -symtab_cmd_index: ?u16 = null, -dysymtab_cmd_index: ?u16 = null, -dylinker_cmd_index: ?u16 = null, -data_in_code_cmd_index: ?u16 = null, -function_starts_cmd_index: ?u16 = null, -main_cmd_index: ?u16 = null, -dylib_id_cmd_index: ?u16 = null, -version_min_cmd_index: ?u16 = null, -source_version_cmd_index: ?u16 = null, -uuid_cmd_index: ?u16 = null, -code_signature_cmd_index: ?u16 = null, - -// __TEXT segment sections -text_section_index: ?u16 = null, -stubs_section_index: ?u16 = null, -stub_helper_section_index: ?u16 = null, -text_const_section_index: ?u16 = null, -cstring_section_index: ?u16 = null, -ustring_section_index: ?u16 = null, -gcc_except_tab_section_index: ?u16 = null, -unwind_info_section_index: ?u16 = null, -eh_frame_section_index: ?u16 = null, - -objc_methlist_section_index: ?u16 = null, -objc_methname_section_index: ?u16 = null, -objc_methtype_section_index: ?u16 = null, -objc_classname_section_index: ?u16 = null, - -// __DATA_CONST segment sections -got_section_index: ?u16 = null, -mod_init_func_section_index: ?u16 = null, -mod_term_func_section_index: ?u16 = null, -data_const_section_index: ?u16 = null, - -objc_cfstring_section_index: ?u16 = null, -objc_classlist_section_index: ?u16 = null, -objc_imageinfo_section_index: ?u16 = null, - -// __DATA segment sections -tlv_section_index: ?u16 = null, -tlv_data_section_index: ?u16 = null, -tlv_bss_section_index: ?u16 = null, -la_symbol_ptr_section_index: ?u16 = null, -data_section_index: ?u16 = null, -bss_section_index: ?u16 = null, -common_section_index: ?u16 = null, - -objc_const_section_index: ?u16 = null, -objc_selrefs_section_index: ?u16 = null, -objc_classrefs_section_index: ?u16 = null, -objc_data_section_index: ?u16 = null, - -globals: std.StringArrayHashMapUnmanaged(*Symbol) = .{}, -imports: std.StringArrayHashMapUnmanaged(*Symbol) = .{}, -unresolved: std.StringArrayHashMapUnmanaged(*Symbol) = .{}, -tentatives: std.StringArrayHashMapUnmanaged(*Symbol) = .{}, - -/// Offset into __DATA,__common section. -/// Set if the linker found tentative definitions in any of the objects. -tentative_defs_offset: u64 = 0, - -strtab: std.ArrayListUnmanaged(u8) = .{}, -strtab_dir: std.StringHashMapUnmanaged(u32) = .{}, - -threadlocal_offsets: std.ArrayListUnmanaged(TlvOffset) = .{}, // TODO merge with Symbol abstraction -local_rebases: std.ArrayListUnmanaged(Pointer) = .{}, -stubs: std.ArrayListUnmanaged(*Symbol) = .{}, -got_entries: std.ArrayListUnmanaged(*Symbol) = .{}, - -stub_helper_stubs_start_off: ?u64 = null, - -pub const Output = struct { - tag: enum { exe, dylib }, - path: []const u8, - install_name: ?[]const u8 = null, -}; - -const TlvOffset = struct { - source_addr: u64, - offset: u64, - - fn cmp(context: void, a: TlvOffset, b: TlvOffset) bool { - _ = context; - return a.source_addr < b.source_addr; - } -}; - -/// Default path to dyld -const DEFAULT_DYLD_PATH: [*:0]const u8 = "/usr/lib/dyld"; - -pub fn init(allocator: *Allocator) Zld { - return .{ .allocator = allocator }; -} - -pub fn deinit(self: *Zld) void { - self.threadlocal_offsets.deinit(self.allocator); - self.local_rebases.deinit(self.allocator); - self.stubs.deinit(self.allocator); - self.got_entries.deinit(self.allocator); - - for (self.load_commands.items) |*lc| { - lc.deinit(self.allocator); - } - self.load_commands.deinit(self.allocator); - - for (self.objects.items) |object| { - object.deinit(); - self.allocator.destroy(object); - } - self.objects.deinit(self.allocator); - - for (self.archives.items) |archive| { - archive.deinit(); - self.allocator.destroy(archive); - } - self.archives.deinit(self.allocator); - - for (self.dylibs.items) |dylib| { - dylib.deinit(); - self.allocator.destroy(dylib); - } - self.dylibs.deinit(self.allocator); - - for (self.imports.values()) |proxy| { - proxy.deinit(self.allocator); - self.allocator.destroy(proxy); - } - self.imports.deinit(self.allocator); - - self.tentatives.deinit(self.allocator); - self.globals.deinit(self.allocator); - self.unresolved.deinit(self.allocator); - self.strtab.deinit(self.allocator); - - { - var it = self.strtab_dir.keyIterator(); - while (it.next()) |key| { - self.allocator.free(key.*); - } - } - self.strtab_dir.deinit(self.allocator); -} - -pub fn closeFiles(self: Zld) void { - for (self.objects.items) |object| { - object.closeFile(); - } - for (self.archives.items) |archive| { - archive.closeFile(); - } - if (self.file) |f| f.close(); -} - -const LinkArgs = struct { - syslibroot: ?[]const u8, - libs: []const []const u8, - rpaths: []const []const u8, -}; - -pub fn link(self: *Zld, files: []const []const u8, output: Output, args: LinkArgs) !void { - if (files.len == 0) return error.NoInputFiles; - if (output.path.len == 0) return error.EmptyOutputPath; - - self.page_size = switch (self.target.?.cpu.arch) { - .aarch64 => 0x4000, - .x86_64 => 0x1000, - else => unreachable, - }; - self.output = output; - self.file = try fs.cwd().createFile(self.output.?.path, .{ - .truncate = true, - .read = true, - .mode = if (std.Target.current.os.tag == .windows) 0 else 0o777, - }); - - try self.populateMetadata(); - try self.parseInputFiles(files, args.syslibroot); - try self.parseLibs(args.libs, args.syslibroot); - try self.resolveSymbols(); - try self.resolveStubsAndGotEntries(); - try self.updateMetadata(); - try self.sortSections(); - try self.addRpaths(args.rpaths); - try self.addDataInCodeLC(); - try self.addCodeSignatureLC(); - try self.allocateTextSegment(); - try self.allocateDataConstSegment(); - try self.allocateDataSegment(); - self.allocateLinkeditSegment(); - try self.allocateSymbols(); - try self.allocateTentativeSymbols(); - try self.allocateProxyBindAddresses(); - try self.flush(); -} - -fn parseInputFiles(self: *Zld, files: []const []const u8, syslibroot: ?[]const u8) !void { - for (files) |file_name| { - const full_path = full_path: { - var buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined; - const path = try std.fs.realpath(file_name, &buffer); - break :full_path try self.allocator.dupe(u8, path); - }; - - if (try Object.createAndParseFromPath(self.allocator, self.target.?.cpu.arch, full_path)) |object| { - try self.objects.append(self.allocator, object); - continue; - } - - if (try Archive.createAndParseFromPath(self.allocator, self.target.?.cpu.arch, full_path)) |archive| { - try self.archives.append(self.allocator, archive); - continue; - } - - if (try Dylib.createAndParseFromPath( - self.allocator, - self.target.?.cpu.arch, - full_path, - .{ .syslibroot = syslibroot }, - )) |dylibs| { - defer self.allocator.free(dylibs); - try self.dylibs.appendSlice(self.allocator, dylibs); - continue; - } - - log.warn("unknown filetype for positional input file: '{s}'", .{file_name}); - } -} - -fn parseLibs(self: *Zld, libs: []const []const u8, syslibroot: ?[]const u8) !void { - for (libs) |lib| { - if (try Dylib.createAndParseFromPath( - self.allocator, - self.target.?.cpu.arch, - lib, - .{ .syslibroot = syslibroot }, - )) |dylibs| { - defer self.allocator.free(dylibs); - try self.dylibs.appendSlice(self.allocator, dylibs); - continue; - } - - if (try Archive.createAndParseFromPath(self.allocator, self.target.?.cpu.arch, lib)) |archive| { - try self.archives.append(self.allocator, archive); - continue; - } - - log.warn("unknown filetype for a library: '{s}'", .{lib}); - } -} - -fn mapAndUpdateSections( - self: *Zld, - object: *Object, - source_sect_id: u16, - target_seg_id: u16, - target_sect_id: u16, -) !void { - const source_sect = &object.sections.items[source_sect_id]; - const target_seg = &self.load_commands.items[target_seg_id].Segment; - const target_sect = &target_seg.sections.items[target_sect_id]; - - const alignment = try math.powi(u32, 2, target_sect.@"align"); - const offset = mem.alignForwardGeneric(u64, target_sect.size, alignment); - const size = mem.alignForwardGeneric(u64, source_sect.inner.size, alignment); - - log.debug("{s}: '{s},{s}' mapped to '{s},{s}' from 0x{x} to 0x{x}", .{ - object.name.?, - parseName(&source_sect.inner.segname), - parseName(&source_sect.inner.sectname), - parseName(&target_sect.segname), - parseName(&target_sect.sectname), - offset, - offset + size, - }); - log.debug(" | flags 0x{x}", .{source_sect.inner.flags}); - - source_sect.target_map = .{ - .segment_id = target_seg_id, - .section_id = target_sect_id, - .offset = @intCast(u32, offset), - }; - target_sect.size = offset + size; -} - -fn updateMetadata(self: *Zld) !void { - for (self.objects.items) |object| { - // Find ideal section alignment and update section mappings - for (object.sections.items) |sect, sect_id| { - const match = (try self.getMatchingSection(sect)) orelse { - log.debug("{s}: unhandled section type 0x{x} for '{s},{s}'", .{ - object.name.?, - sect.flags(), - sect.segname(), - sect.sectname(), - }); - continue; - }; - const target_seg = &self.load_commands.items[match.seg].Segment; - const target_sect = &target_seg.sections.items[match.sect]; - target_sect.@"align" = math.max(target_sect.@"align", sect.inner.@"align"); - - try self.mapAndUpdateSections(object, @intCast(u16, sect_id), match.seg, match.sect); - } - } - - // Ensure we have __DATA,__common section if we have tentative definitions. - // Update size and alignment of __DATA,__common section. - if (self.tentatives.values().len > 0) { - const data_seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; - const common_section_index = self.common_section_index orelse ind: { - self.common_section_index = @intCast(u16, data_seg.sections.items.len); - try data_seg.addSection(self.allocator, "__common", .{ - .flags = macho.S_ZEROFILL, - }); - break :ind self.common_section_index.?; - }; - const common_sect = &data_seg.sections.items[common_section_index]; - - var max_align: u16 = 0; - var added_size: u64 = 0; - for (self.tentatives.values()) |sym| { - const tent = sym.cast(Symbol.Tentative) orelse unreachable; - max_align = math.max(max_align, tent.alignment); - added_size += tent.size; - } - - common_sect.@"align" = math.max(common_sect.@"align", max_align); - - const alignment = try math.powi(u32, 2, common_sect.@"align"); - const offset = mem.alignForwardGeneric(u64, common_sect.size, alignment); - const size = mem.alignForwardGeneric(u64, added_size, alignment); - - common_sect.size = offset + size; - self.tentative_defs_offset = offset; - } - - tlv_align: { - const has_tlv = - self.tlv_section_index != null or - self.tlv_data_section_index != null or - self.tlv_bss_section_index != null; - - if (!has_tlv) break :tlv_align; - - const seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; - - if (self.tlv_section_index) |index| { - const sect = &seg.sections.items[index]; - sect.@"align" = 3; // __thread_vars is always 8byte aligned - } - - // Apparently __tlv_data and __tlv_bss need to have matching alignment, so fix it up. - // All __thread_data and __thread_bss sections must have same alignment - // https://github.com/apple-opensource/ld64/blob/e28c028b20af187a16a7161d89e91868a450cadc/src/ld/ld.cpp#L1172 - const data_align: u32 = data: { - if (self.tlv_data_section_index) |index| { - const sect = &seg.sections.items[index]; - break :data sect.@"align"; - } - break :tlv_align; - }; - const bss_align: u32 = bss: { - if (self.tlv_bss_section_index) |index| { - const sect = &seg.sections.items[index]; - break :bss sect.@"align"; - } - break :tlv_align; - }; - const max_align = math.max(data_align, bss_align); - - if (self.tlv_data_section_index) |index| { - const sect = &seg.sections.items[index]; - sect.@"align" = max_align; - } - if (self.tlv_bss_section_index) |index| { - const sect = &seg.sections.items[index]; - sect.@"align" = max_align; - } - } -} - -const MatchingSection = struct { - seg: u16, - sect: u16, -}; - -fn getMatchingSection(self: *Zld, sect: Object.Section) !?MatchingSection { - const text_seg = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; - const data_const_seg = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; - const data_seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; - const segname = sect.segname(); - const sectname = sect.sectname(); - - const res: ?MatchingSection = blk: { - switch (sect.sectionType()) { - macho.S_4BYTE_LITERALS, macho.S_8BYTE_LITERALS, macho.S_16BYTE_LITERALS => { - if (self.text_const_section_index == null) { - self.text_const_section_index = @intCast(u16, text_seg.sections.items.len); - try text_seg.addSection(self.allocator, "__const", .{}); - } - - break :blk .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.text_const_section_index.?, - }; - }, - macho.S_CSTRING_LITERALS => { - if (mem.eql(u8, sectname, "__objc_methname")) { - // TODO it seems the common values within the sections in objects are deduplicated/merged - // on merging the sections' contents. - if (self.objc_methname_section_index == null) { - self.objc_methname_section_index = @intCast(u16, text_seg.sections.items.len); - try text_seg.addSection(self.allocator, "__objc_methname", .{ - .flags = macho.S_CSTRING_LITERALS, - }); - } - - break :blk .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.objc_methname_section_index.?, - }; - } else if (mem.eql(u8, sectname, "__objc_methtype")) { - if (self.objc_methtype_section_index == null) { - self.objc_methtype_section_index = @intCast(u16, text_seg.sections.items.len); - try text_seg.addSection(self.allocator, "__objc_methtype", .{ - .flags = macho.S_CSTRING_LITERALS, - }); - } - - break :blk .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.objc_methtype_section_index.?, - }; - } else if (mem.eql(u8, sectname, "__objc_classname")) { - if (self.objc_classname_section_index == null) { - self.objc_classname_section_index = @intCast(u16, text_seg.sections.items.len); - try text_seg.addSection(self.allocator, "__objc_classname", .{}); - } - - break :blk .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.objc_classname_section_index.?, - }; - } - - if (self.cstring_section_index == null) { - self.cstring_section_index = @intCast(u16, text_seg.sections.items.len); - try text_seg.addSection(self.allocator, "__cstring", .{ - .flags = macho.S_CSTRING_LITERALS, - }); - } - - break :blk .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.cstring_section_index.?, - }; - }, - macho.S_LITERAL_POINTERS => { - if (mem.eql(u8, segname, "__DATA") and mem.eql(u8, sectname, "__objc_selrefs")) { - if (self.objc_selrefs_section_index == null) { - self.objc_selrefs_section_index = @intCast(u16, data_seg.sections.items.len); - try data_seg.addSection(self.allocator, "__objc_selrefs", .{ - .flags = macho.S_LITERAL_POINTERS, - }); - } - - break :blk .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.objc_selrefs_section_index.?, - }; - } - - // TODO investigate - break :blk null; - }, - macho.S_MOD_INIT_FUNC_POINTERS => { - if (self.mod_init_func_section_index == null) { - self.mod_init_func_section_index = @intCast(u16, data_const_seg.sections.items.len); - try data_const_seg.addSection(self.allocator, "__mod_init_func", .{ - .flags = macho.S_MOD_INIT_FUNC_POINTERS, - }); - } - - break :blk .{ - .seg = self.data_const_segment_cmd_index.?, - .sect = self.mod_init_func_section_index.?, - }; - }, - macho.S_MOD_TERM_FUNC_POINTERS => { - if (self.mod_term_func_section_index == null) { - self.mod_term_func_section_index = @intCast(u16, data_const_seg.sections.items.len); - try data_const_seg.addSection(self.allocator, "__mod_term_func", .{ - .flags = macho.S_MOD_TERM_FUNC_POINTERS, - }); - } - - break :blk .{ - .seg = self.data_const_segment_cmd_index.?, - .sect = self.mod_term_func_section_index.?, - }; - }, - macho.S_ZEROFILL => { - if (mem.eql(u8, sectname, "__common")) { - if (self.common_section_index == null) { - self.common_section_index = @intCast(u16, data_seg.sections.items.len); - try data_seg.addSection(self.allocator, "__common", .{ - .flags = macho.S_ZEROFILL, - }); - } - - break :blk .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.common_section_index.?, - }; - } else { - if (self.bss_section_index == null) { - self.bss_section_index = @intCast(u16, data_seg.sections.items.len); - try data_seg.addSection(self.allocator, "__bss", .{ - .flags = macho.S_ZEROFILL, - }); - } - - break :blk .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.bss_section_index.?, - }; - } - }, - macho.S_THREAD_LOCAL_VARIABLES => { - if (self.tlv_section_index == null) { - self.tlv_section_index = @intCast(u16, data_seg.sections.items.len); - try data_seg.addSection(self.allocator, "__thread_vars", .{ - .flags = macho.S_THREAD_LOCAL_VARIABLES, - }); - } - - break :blk .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.tlv_section_index.?, - }; - }, - macho.S_THREAD_LOCAL_REGULAR => { - if (self.tlv_data_section_index == null) { - self.tlv_data_section_index = @intCast(u16, data_seg.sections.items.len); - try data_seg.addSection(self.allocator, "__thread_data", .{ - .flags = macho.S_THREAD_LOCAL_REGULAR, - }); - } - - break :blk .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.tlv_data_section_index.?, - }; - }, - macho.S_THREAD_LOCAL_ZEROFILL => { - if (self.tlv_bss_section_index == null) { - self.tlv_bss_section_index = @intCast(u16, data_seg.sections.items.len); - try data_seg.addSection(self.allocator, "__thread_bss", .{ - .flags = macho.S_THREAD_LOCAL_ZEROFILL, - }); - } - - break :blk .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.tlv_bss_section_index.?, - }; - }, - macho.S_COALESCED => { - if (mem.eql(u8, "__TEXT", segname) and mem.eql(u8, "__eh_frame", sectname)) { - // TODO I believe __eh_frame is currently part of __unwind_info section - // in the latest ld64 output. - if (self.eh_frame_section_index == null) { - self.eh_frame_section_index = @intCast(u16, text_seg.sections.items.len); - try text_seg.addSection(self.allocator, "__eh_frame", .{}); - } - - break :blk .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.eh_frame_section_index.?, - }; - } - - // TODO audit this: is this the right mapping? - if (self.data_const_section_index == null) { - self.data_const_section_index = @intCast(u16, data_const_seg.sections.items.len); - try data_const_seg.addSection(self.allocator, "__const", .{}); - } - - break :blk .{ - .seg = self.data_const_segment_cmd_index.?, - .sect = self.data_const_section_index.?, - }; - }, - macho.S_REGULAR => { - if (sect.isCode()) { - if (self.text_section_index == null) { - self.text_section_index = @intCast(u16, text_seg.sections.items.len); - try text_seg.addSection(self.allocator, "__text", .{ - .flags = macho.S_REGULAR | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS, - }); - } - - break :blk .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.text_section_index.?, - }; - } - if (sect.isDebug()) { - // TODO debug attributes - if (mem.eql(u8, "__LD", segname) and mem.eql(u8, "__compact_unwind", sectname)) { - log.debug("TODO compact unwind section: type 0x{x}, name '{s},{s}'", .{ - sect.flags(), segname, sectname, - }); - } - break :blk null; - } - - if (mem.eql(u8, segname, "__TEXT")) { - if (mem.eql(u8, sectname, "__ustring")) { - if (self.ustring_section_index == null) { - self.ustring_section_index = @intCast(u16, text_seg.sections.items.len); - try text_seg.addSection(self.allocator, "__ustring", .{}); - } - - break :blk .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.ustring_section_index.?, - }; - } else if (mem.eql(u8, sectname, "__gcc_except_tab")) { - if (self.gcc_except_tab_section_index == null) { - self.gcc_except_tab_section_index = @intCast(u16, text_seg.sections.items.len); - try text_seg.addSection(self.allocator, "__gcc_except_tab", .{}); - } - - break :blk .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.gcc_except_tab_section_index.?, - }; - } else if (mem.eql(u8, sectname, "__objc_methlist")) { - if (self.objc_methlist_section_index == null) { - self.objc_methlist_section_index = @intCast(u16, text_seg.sections.items.len); - try text_seg.addSection(self.allocator, "__objc_methlist", .{}); - } - - break :blk .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.objc_methlist_section_index.?, - }; - } else if (mem.eql(u8, sectname, "__rodata") or - mem.eql(u8, sectname, "__typelink") or - mem.eql(u8, sectname, "__itablink") or - mem.eql(u8, sectname, "__gosymtab") or - mem.eql(u8, sectname, "__gopclntab")) - { - if (self.data_const_section_index == null) { - self.data_const_section_index = @intCast(u16, data_const_seg.sections.items.len); - try data_const_seg.addSection(self.allocator, "__const", .{}); - } - - break :blk .{ - .seg = self.data_const_segment_cmd_index.?, - .sect = self.data_const_section_index.?, - }; - } else { - if (self.text_const_section_index == null) { - self.text_const_section_index = @intCast(u16, text_seg.sections.items.len); - try text_seg.addSection(self.allocator, "__const", .{}); - } - - break :blk .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.text_const_section_index.?, - }; - } - } - - if (mem.eql(u8, segname, "__DATA_CONST")) { - if (self.data_const_section_index == null) { - self.data_const_section_index = @intCast(u16, data_const_seg.sections.items.len); - try data_const_seg.addSection(self.allocator, "__const", .{}); - } - - break :blk .{ - .seg = self.data_const_segment_cmd_index.?, - .sect = self.data_const_section_index.?, - }; - } - - if (mem.eql(u8, segname, "__DATA")) { - if (mem.eql(u8, sectname, "__const")) { - if (self.data_const_section_index == null) { - self.data_const_section_index = @intCast(u16, data_const_seg.sections.items.len); - try data_const_seg.addSection(self.allocator, "__const", .{}); - } - - break :blk .{ - .seg = self.data_const_segment_cmd_index.?, - .sect = self.data_const_section_index.?, - }; - } else if (mem.eql(u8, sectname, "__cfstring")) { - if (self.objc_cfstring_section_index == null) { - self.objc_cfstring_section_index = @intCast(u16, data_const_seg.sections.items.len); - try data_const_seg.addSection(self.allocator, "__cfstring", .{}); - } - - break :blk .{ - .seg = self.data_const_segment_cmd_index.?, - .sect = self.objc_cfstring_section_index.?, - }; - } else if (mem.eql(u8, sectname, "__objc_classlist")) { - if (self.objc_classlist_section_index == null) { - self.objc_classlist_section_index = @intCast(u16, data_const_seg.sections.items.len); - try data_const_seg.addSection(self.allocator, "__objc_classlist", .{}); - } - - break :blk .{ - .seg = self.data_const_segment_cmd_index.?, - .sect = self.objc_classlist_section_index.?, - }; - } else if (mem.eql(u8, sectname, "__objc_imageinfo")) { - if (self.objc_imageinfo_section_index == null) { - self.objc_imageinfo_section_index = @intCast(u16, data_const_seg.sections.items.len); - try data_const_seg.addSection(self.allocator, "__objc_imageinfo", .{}); - } - - break :blk .{ - .seg = self.data_const_segment_cmd_index.?, - .sect = self.objc_imageinfo_section_index.?, - }; - } else if (mem.eql(u8, sectname, "__objc_const")) { - if (self.objc_const_section_index == null) { - self.objc_const_section_index = @intCast(u16, data_seg.sections.items.len); - try data_seg.addSection(self.allocator, "__objc_const", .{}); - } - - break :blk .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.objc_const_section_index.?, - }; - } else if (mem.eql(u8, sectname, "__objc_classrefs")) { - if (self.objc_classrefs_section_index == null) { - self.objc_classrefs_section_index = @intCast(u16, data_seg.sections.items.len); - try data_seg.addSection(self.allocator, "__objc_classrefs", .{}); - } - - break :blk .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.objc_classrefs_section_index.?, - }; - } else if (mem.eql(u8, sectname, "__objc_data")) { - if (self.objc_data_section_index == null) { - self.objc_data_section_index = @intCast(u16, data_seg.sections.items.len); - try data_seg.addSection(self.allocator, "__objc_data", .{}); - } - - break :blk .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.objc_data_section_index.?, - }; - } else { - if (self.data_section_index == null) { - self.data_section_index = @intCast(u16, data_seg.sections.items.len); - try data_seg.addSection(self.allocator, "__data", .{}); - } - - break :blk .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.data_section_index.?, - }; - } - } - - if (mem.eql(u8, "__LLVM", segname) and mem.eql(u8, "__asm", sectname)) { - log.debug("TODO LLVM asm section: type 0x{x}, name '{s},{s}'", .{ - sect.flags(), segname, sectname, - }); - } - - break :blk null; - }, - else => break :blk null, - } - }; - - return res; -} - -fn sortSections(self: *Zld) !void { - var text_index_mapping = std.AutoHashMap(u16, u16).init(self.allocator); - defer text_index_mapping.deinit(); - var data_const_index_mapping = std.AutoHashMap(u16, u16).init(self.allocator); - defer data_const_index_mapping.deinit(); - var data_index_mapping = std.AutoHashMap(u16, u16).init(self.allocator); - defer data_index_mapping.deinit(); - - { - // __TEXT segment - const seg = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; - var sections = seg.sections.toOwnedSlice(self.allocator); - defer self.allocator.free(sections); - try seg.sections.ensureCapacity(self.allocator, sections.len); - - const indices = &[_]*?u16{ - &self.text_section_index, - &self.stubs_section_index, - &self.stub_helper_section_index, - &self.gcc_except_tab_section_index, - &self.cstring_section_index, - &self.ustring_section_index, - &self.text_const_section_index, - &self.objc_methname_section_index, - &self.objc_methtype_section_index, - &self.objc_classname_section_index, - &self.eh_frame_section_index, - }; - for (indices) |maybe_index| { - const new_index: u16 = if (maybe_index.*) |index| blk: { - const idx = @intCast(u16, seg.sections.items.len); - seg.sections.appendAssumeCapacity(sections[index]); - try text_index_mapping.putNoClobber(index, idx); - break :blk idx; - } else continue; - maybe_index.* = new_index; - } - } - - { - // __DATA_CONST segment - const seg = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; - var sections = seg.sections.toOwnedSlice(self.allocator); - defer self.allocator.free(sections); - try seg.sections.ensureCapacity(self.allocator, sections.len); - - const indices = &[_]*?u16{ - &self.got_section_index, - &self.mod_init_func_section_index, - &self.mod_term_func_section_index, - &self.data_const_section_index, - &self.objc_cfstring_section_index, - &self.objc_classlist_section_index, - &self.objc_imageinfo_section_index, - }; - for (indices) |maybe_index| { - const new_index: u16 = if (maybe_index.*) |index| blk: { - const idx = @intCast(u16, seg.sections.items.len); - seg.sections.appendAssumeCapacity(sections[index]); - try data_const_index_mapping.putNoClobber(index, idx); - break :blk idx; - } else continue; - maybe_index.* = new_index; - } - } - - { - // __DATA segment - const seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; - var sections = seg.sections.toOwnedSlice(self.allocator); - defer self.allocator.free(sections); - try seg.sections.ensureCapacity(self.allocator, sections.len); - - // __DATA segment - const indices = &[_]*?u16{ - &self.la_symbol_ptr_section_index, - &self.objc_const_section_index, - &self.objc_selrefs_section_index, - &self.objc_classrefs_section_index, - &self.objc_data_section_index, - &self.data_section_index, - &self.tlv_section_index, - &self.tlv_data_section_index, - &self.tlv_bss_section_index, - &self.bss_section_index, - &self.common_section_index, - }; - for (indices) |maybe_index| { - const new_index: u16 = if (maybe_index.*) |index| blk: { - const idx = @intCast(u16, seg.sections.items.len); - seg.sections.appendAssumeCapacity(sections[index]); - try data_index_mapping.putNoClobber(index, idx); - break :blk idx; - } else continue; - maybe_index.* = new_index; - } - } - - for (self.objects.items) |object| { - for (object.sections.items) |*sect| { - const target_map = sect.target_map orelse continue; - - const new_index = blk: { - if (self.text_segment_cmd_index.? == target_map.segment_id) { - break :blk text_index_mapping.get(target_map.section_id) orelse unreachable; - } else if (self.data_const_segment_cmd_index.? == target_map.segment_id) { - break :blk data_const_index_mapping.get(target_map.section_id) orelse unreachable; - } else if (self.data_segment_cmd_index.? == target_map.segment_id) { - break :blk data_index_mapping.get(target_map.section_id) orelse unreachable; - } else unreachable; - }; - - log.debug("remapping in {s}: '{s},{s}': {} => {}", .{ - object.name.?, - parseName(§.inner.segname), - parseName(§.inner.sectname), - target_map.section_id, - new_index, - }); - - sect.target_map = .{ - .segment_id = target_map.segment_id, - .section_id = new_index, - .offset = target_map.offset, - }; - } - } -} - -fn allocateTextSegment(self: *Zld) !void { - const seg = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; - const nstubs = @intCast(u32, self.stubs.items.len); - - const base_vmaddr = self.load_commands.items[self.pagezero_segment_cmd_index.?].Segment.inner.vmsize; - seg.inner.fileoff = 0; - seg.inner.vmaddr = base_vmaddr; - - // Set stubs and stub_helper sizes - const stubs = &seg.sections.items[self.stubs_section_index.?]; - const stub_helper = &seg.sections.items[self.stub_helper_section_index.?]; - stubs.size += nstubs * stubs.reserved2; - - const stub_size: u4 = switch (self.target.?.cpu.arch) { - .x86_64 => 10, - .aarch64 => 3 * @sizeOf(u32), - else => unreachable, - }; - stub_helper.size += nstubs * stub_size; - - var sizeofcmds: u64 = 0; - for (self.load_commands.items) |lc| { - sizeofcmds += lc.cmdsize(); - } - - try self.allocateSegment(self.text_segment_cmd_index.?, @sizeOf(macho.mach_header_64) + sizeofcmds); - - // Shift all sections to the back to minimize jump size between __TEXT and __DATA segments. - var min_alignment: u32 = 0; - for (seg.sections.items) |sect| { - const alignment = try math.powi(u32, 2, sect.@"align"); - min_alignment = math.max(min_alignment, alignment); - } - - assert(min_alignment > 0); - const last_sect_idx = seg.sections.items.len - 1; - const last_sect = seg.sections.items[last_sect_idx]; - const shift: u32 = blk: { - const diff = seg.inner.filesize - last_sect.offset - last_sect.size; - const factor = @divTrunc(diff, min_alignment); - break :blk @intCast(u32, factor * min_alignment); - }; - - if (shift > 0) { - for (seg.sections.items) |*sect| { - sect.offset += shift; - sect.addr += shift; - } - } -} - -fn allocateDataConstSegment(self: *Zld) !void { - const seg = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; - const nentries = @intCast(u32, self.got_entries.items.len); - - const text_seg = self.load_commands.items[self.text_segment_cmd_index.?].Segment; - seg.inner.fileoff = text_seg.inner.fileoff + text_seg.inner.filesize; - seg.inner.vmaddr = text_seg.inner.vmaddr + text_seg.inner.vmsize; - - // Set got size - const got = &seg.sections.items[self.got_section_index.?]; - got.size += nentries * @sizeOf(u64); - - try self.allocateSegment(self.data_const_segment_cmd_index.?, 0); -} - -fn allocateDataSegment(self: *Zld) !void { - const seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; - const nstubs = @intCast(u32, self.stubs.items.len); - - const data_const_seg = self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; - seg.inner.fileoff = data_const_seg.inner.fileoff + data_const_seg.inner.filesize; - seg.inner.vmaddr = data_const_seg.inner.vmaddr + data_const_seg.inner.vmsize; - - // Set la_symbol_ptr and data size - const la_symbol_ptr = &seg.sections.items[self.la_symbol_ptr_section_index.?]; - const data = &seg.sections.items[self.data_section_index.?]; - la_symbol_ptr.size += nstubs * @sizeOf(u64); - data.size += @sizeOf(u64); // We need at least 8bytes for address of dyld_stub_binder - - try self.allocateSegment(self.data_segment_cmd_index.?, 0); -} - -fn allocateLinkeditSegment(self: *Zld) void { - const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; - const data_seg = self.load_commands.items[self.data_segment_cmd_index.?].Segment; - seg.inner.fileoff = data_seg.inner.fileoff + data_seg.inner.filesize; - seg.inner.vmaddr = data_seg.inner.vmaddr + data_seg.inner.vmsize; -} - -fn allocateSegment(self: *Zld, index: u16, offset: u64) !void { - const seg = &self.load_commands.items[index].Segment; - - // Allocate the sections according to their alignment at the beginning of the segment. - var start: u64 = offset; - for (seg.sections.items) |*sect| { - const alignment = try math.powi(u32, 2, sect.@"align"); - const start_aligned = mem.alignForwardGeneric(u64, start, alignment); - const end_aligned = mem.alignForwardGeneric(u64, start_aligned + sect.size, alignment); - sect.offset = @intCast(u32, seg.inner.fileoff + start_aligned); - sect.addr = seg.inner.vmaddr + start_aligned; - start = end_aligned; - } - - const seg_size_aligned = mem.alignForwardGeneric(u64, start, self.page_size.?); - seg.inner.filesize = seg_size_aligned; - seg.inner.vmsize = seg_size_aligned; -} - -fn allocateSymbols(self: *Zld) !void { - for (self.objects.items) |object| { - for (object.symbols.items) |sym| { - const reg = sym.cast(Symbol.Regular) orelse continue; - - const source_sect = &object.sections.items[reg.section]; - const target_map = source_sect.target_map orelse { - log.debug("section '{s},{s}' not mapped for symbol '{s}'", .{ - parseName(&source_sect.inner.segname), - parseName(&source_sect.inner.sectname), - sym.name, - }); - continue; - }; - - const target_seg = self.load_commands.items[target_map.segment_id].Segment; - const target_sect = target_seg.sections.items[target_map.section_id]; - const target_addr = target_sect.addr + target_map.offset; - const address = reg.address - source_sect.inner.addr + target_addr; - - log.debug("resolving symbol '{s}' at 0x{x}", .{ sym.name, address }); - - // TODO there might be a more generic way of doing this. - var section: u8 = 0; - for (self.load_commands.items) |cmd, cmd_id| { - if (cmd != .Segment) break; - if (cmd_id == target_map.segment_id) { - section += @intCast(u8, target_map.section_id) + 1; - break; - } - section += @intCast(u8, cmd.Segment.sections.items.len); - } - - reg.address = address; - reg.section = section; - } - } -} - -fn allocateTentativeSymbols(self: *Zld) !void { - if (self.tentatives.values().len == 0) return; - - const data_seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; - const common_sect = &data_seg.sections.items[self.common_section_index.?]; - - const alignment = try math.powi(u32, 2, common_sect.@"align"); - var base_address: u64 = common_sect.addr + self.tentative_defs_offset; - - log.debug("base address for tentative definitions 0x{x}", .{base_address}); - - // TODO there might be a more generic way of doing this. - var section: u8 = 0; - for (self.load_commands.items) |cmd, cmd_id| { - if (cmd != .Segment) break; - if (cmd_id == self.data_segment_cmd_index.?) { - section += @intCast(u8, self.common_section_index.?) + 1; - break; - } - section += @intCast(u8, cmd.Segment.sections.items.len); - } - - // Convert tentative definitions into regular symbols. - for (self.tentatives.values()) |sym| { - const tent = sym.cast(Symbol.Tentative) orelse unreachable; - const reg = try self.allocator.create(Symbol.Regular); - errdefer self.allocator.destroy(reg); - - reg.* = .{ - .base = .{ - .@"type" = .regular, - .name = try self.allocator.dupe(u8, tent.base.name), - .got_index = tent.base.got_index, - .stubs_index = tent.base.stubs_index, - }, - .linkage = .global, - .address = base_address, - .section = section, - .weak_ref = false, - .file = tent.file, - .stab = .{ - .kind = .global, - .size = 0, - }, - }; - - try self.globals.putNoClobber(self.allocator, reg.base.name, ®.base); - tent.base.alias = ®.base; - - if (tent.base.got_index) |idx| { - self.got_entries.items[idx] = ®.base; - } - if (tent.base.stubs_index) |idx| { - self.stubs.items[idx] = ®.base; - } - - const address = mem.alignForwardGeneric(u64, base_address + tent.size, alignment); - - log.debug("tentative definition '{s}' allocated from 0x{x} to 0x{x}", .{ - tent.base.name, - base_address, - address, - }); - - base_address = address; - } -} - -fn allocateProxyBindAddresses(self: *Zld) !void { - for (self.objects.items) |object| { - for (object.sections.items) |sect| { - const relocs = sect.relocs orelse continue; - - for (relocs) |rel| { - if (rel.@"type" != .unsigned) continue; // GOT is currently special-cased - if (rel.target != .symbol) continue; - - const sym = rel.target.symbol.getTopmostAlias(); - if (sym.cast(Symbol.Proxy)) |proxy| { - const target_map = sect.target_map orelse continue; - const target_seg = self.load_commands.items[target_map.segment_id].Segment; - const target_sect = target_seg.sections.items[target_map.section_id]; - - try proxy.bind_info.append(self.allocator, .{ - .segment_id = target_map.segment_id, - .address = target_sect.addr + target_map.offset + rel.offset, - }); - } - } - } - } -} - -fn writeStubHelperCommon(self: *Zld) !void { - const text_segment = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; - const stub_helper = &text_segment.sections.items[self.stub_helper_section_index.?]; - const data_const_segment = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; - const got = &data_const_segment.sections.items[self.got_section_index.?]; - const data_segment = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; - const data = &data_segment.sections.items[self.data_section_index.?]; - - self.stub_helper_stubs_start_off = blk: { - switch (self.target.?.cpu.arch) { - .x86_64 => { - const code_size = 15; - var code: [code_size]u8 = undefined; - // lea %r11, [rip + disp] - code[0] = 0x4c; - code[1] = 0x8d; - code[2] = 0x1d; - { - const target_addr = data.addr + data.size - @sizeOf(u64); - const displacement = try math.cast(u32, target_addr - stub_helper.addr - 7); - mem.writeIntLittle(u32, code[3..7], displacement); - } - // push %r11 - code[7] = 0x41; - code[8] = 0x53; - // jmp [rip + disp] - code[9] = 0xff; - code[10] = 0x25; - { - const dyld_stub_binder = self.imports.get("dyld_stub_binder").?; - const addr = (got.addr + dyld_stub_binder.got_index.? * @sizeOf(u64)); - const displacement = try math.cast(u32, addr - stub_helper.addr - code_size); - mem.writeIntLittle(u32, code[11..], displacement); - } - try self.file.?.pwriteAll(&code, stub_helper.offset); - break :blk stub_helper.offset + code_size; - }, - .aarch64 => { - var code: [6 * @sizeOf(u32)]u8 = undefined; - data_blk_outer: { - const this_addr = stub_helper.addr; - const target_addr = data.addr + data.size - @sizeOf(u64); - data_blk: { - const displacement = math.cast(i21, target_addr - this_addr) catch break :data_blk; - // adr x17, disp - mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.adr(.x17, displacement).toU32()); - // nop - mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.nop().toU32()); - break :data_blk_outer; - } - data_blk: { - const new_this_addr = this_addr + @sizeOf(u32); - const displacement = math.cast(i21, target_addr - new_this_addr) catch break :data_blk; - // nop - mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.nop().toU32()); - // adr x17, disp - mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.adr(.x17, displacement).toU32()); - break :data_blk_outer; - } - // Jump is too big, replace adr with adrp and add. - const this_page = @intCast(i32, this_addr >> 12); - const target_page = @intCast(i32, target_addr >> 12); - const pages = @intCast(i21, target_page - this_page); - mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.adrp(.x17, pages).toU32()); - const narrowed = @truncate(u12, target_addr); - mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.add(.x17, .x17, narrowed, false).toU32()); - } - // stp x16, x17, [sp, #-16]! - code[8] = 0xf0; - code[9] = 0x47; - code[10] = 0xbf; - code[11] = 0xa9; - binder_blk_outer: { - const dyld_stub_binder = self.imports.get("dyld_stub_binder").?; - const this_addr = stub_helper.addr + 3 * @sizeOf(u32); - const target_addr = (got.addr + dyld_stub_binder.got_index.? * @sizeOf(u64)); - binder_blk: { - const displacement = math.divExact(u64, target_addr - this_addr, 4) catch break :binder_blk; - const literal = math.cast(u18, displacement) catch break :binder_blk; - // ldr x16, label - mem.writeIntLittle(u32, code[12..16], aarch64.Instruction.ldr(.x16, .{ - .literal = literal, - }).toU32()); - // nop - mem.writeIntLittle(u32, code[16..20], aarch64.Instruction.nop().toU32()); - break :binder_blk_outer; - } - binder_blk: { - const new_this_addr = this_addr + @sizeOf(u32); - const displacement = math.divExact(u64, target_addr - new_this_addr, 4) catch break :binder_blk; - const literal = math.cast(u18, displacement) catch break :binder_blk; - // Pad with nop to please division. - // nop - mem.writeIntLittle(u32, code[12..16], aarch64.Instruction.nop().toU32()); - // ldr x16, label - mem.writeIntLittle(u32, code[16..20], aarch64.Instruction.ldr(.x16, .{ - .literal = literal, - }).toU32()); - break :binder_blk_outer; - } - // Use adrp followed by ldr(immediate). - const this_page = @intCast(i32, this_addr >> 12); - const target_page = @intCast(i32, target_addr >> 12); - const pages = @intCast(i21, target_page - this_page); - mem.writeIntLittle(u32, code[12..16], aarch64.Instruction.adrp(.x16, pages).toU32()); - const narrowed = @truncate(u12, target_addr); - const offset = try math.divExact(u12, narrowed, 8); - mem.writeIntLittle(u32, code[16..20], aarch64.Instruction.ldr(.x16, .{ - .register = .{ - .rn = .x16, - .offset = aarch64.Instruction.LoadStoreOffset.imm(offset), - }, - }).toU32()); - } - // br x16 - code[20] = 0x00; - code[21] = 0x02; - code[22] = 0x1f; - code[23] = 0xd6; - try self.file.?.pwriteAll(&code, stub_helper.offset); - break :blk stub_helper.offset + 6 * @sizeOf(u32); - }, - else => unreachable, - } - }; - - for (self.stubs.items) |sym| { - // TODO weak bound pointers - const index = sym.stubs_index orelse unreachable; - try self.writeLazySymbolPointer(index); - try self.writeStub(index); - try self.writeStubInStubHelper(index); - } -} - -fn writeLazySymbolPointer(self: *Zld, index: u32) !void { - const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment; - const stub_helper = text_segment.sections.items[self.stub_helper_section_index.?]; - const data_segment = self.load_commands.items[self.data_segment_cmd_index.?].Segment; - const la_symbol_ptr = data_segment.sections.items[self.la_symbol_ptr_section_index.?]; - - const stub_size: u4 = switch (self.target.?.cpu.arch) { - .x86_64 => 10, - .aarch64 => 3 * @sizeOf(u32), - else => unreachable, - }; - const stub_off = self.stub_helper_stubs_start_off.? + index * stub_size; - const end = stub_helper.addr + stub_off - stub_helper.offset; - var buf: [@sizeOf(u64)]u8 = undefined; - mem.writeIntLittle(u64, &buf, end); - const off = la_symbol_ptr.offset + index * @sizeOf(u64); - log.debug("writing lazy symbol pointer entry 0x{x} at 0x{x}", .{ end, off }); - try self.file.?.pwriteAll(&buf, off); -} - -fn writeStub(self: *Zld, index: u32) !void { - const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment; - const stubs = text_segment.sections.items[self.stubs_section_index.?]; - const data_segment = self.load_commands.items[self.data_segment_cmd_index.?].Segment; - const la_symbol_ptr = data_segment.sections.items[self.la_symbol_ptr_section_index.?]; - - const stub_off = stubs.offset + index * stubs.reserved2; - const stub_addr = stubs.addr + index * stubs.reserved2; - const la_ptr_addr = la_symbol_ptr.addr + index * @sizeOf(u64); - log.debug("writing stub at 0x{x}", .{stub_off}); - var code = try self.allocator.alloc(u8, stubs.reserved2); - defer self.allocator.free(code); - switch (self.target.?.cpu.arch) { - .x86_64 => { - assert(la_ptr_addr >= stub_addr + stubs.reserved2); - const displacement = try math.cast(u32, la_ptr_addr - stub_addr - stubs.reserved2); - // jmp - code[0] = 0xff; - code[1] = 0x25; - mem.writeIntLittle(u32, code[2..][0..4], displacement); - }, - .aarch64 => { - assert(la_ptr_addr >= stub_addr); - outer: { - const this_addr = stub_addr; - const target_addr = la_ptr_addr; - inner: { - const displacement = math.divExact(u64, target_addr - this_addr, 4) catch break :inner; - const literal = math.cast(u18, displacement) catch break :inner; - // ldr x16, literal - mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.ldr(.x16, .{ - .literal = literal, - }).toU32()); - // nop - mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.nop().toU32()); - break :outer; - } - inner: { - const new_this_addr = this_addr + @sizeOf(u32); - const displacement = math.divExact(u64, target_addr - new_this_addr, 4) catch break :inner; - const literal = math.cast(u18, displacement) catch break :inner; - // nop - mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.nop().toU32()); - // ldr x16, literal - mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.ldr(.x16, .{ - .literal = literal, - }).toU32()); - break :outer; - } - // Use adrp followed by ldr(immediate). - const this_page = @intCast(i32, this_addr >> 12); - const target_page = @intCast(i32, target_addr >> 12); - const pages = @intCast(i21, target_page - this_page); - mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.adrp(.x16, pages).toU32()); - const narrowed = @truncate(u12, target_addr); - const offset = try math.divExact(u12, narrowed, 8); - mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.ldr(.x16, .{ - .register = .{ - .rn = .x16, - .offset = aarch64.Instruction.LoadStoreOffset.imm(offset), - }, - }).toU32()); - } - // br x16 - mem.writeIntLittle(u32, code[8..12], aarch64.Instruction.br(.x16).toU32()); - }, - else => unreachable, - } - try self.file.?.pwriteAll(code, stub_off); -} - -fn writeStubInStubHelper(self: *Zld, index: u32) !void { - const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment; - const stub_helper = text_segment.sections.items[self.stub_helper_section_index.?]; - - const stub_size: u4 = switch (self.target.?.cpu.arch) { - .x86_64 => 10, - .aarch64 => 3 * @sizeOf(u32), - else => unreachable, - }; - const stub_off = self.stub_helper_stubs_start_off.? + index * stub_size; - var code = try self.allocator.alloc(u8, stub_size); - defer self.allocator.free(code); - switch (self.target.?.cpu.arch) { - .x86_64 => { - const displacement = try math.cast( - i32, - @intCast(i64, stub_helper.offset) - @intCast(i64, stub_off) - stub_size, - ); - // pushq - code[0] = 0x68; - mem.writeIntLittle(u32, code[1..][0..4], 0x0); // Just a placeholder populated in `populateLazyBindOffsetsInStubHelper`. - // jmpq - code[5] = 0xe9; - mem.writeIntLittle(u32, code[6..][0..4], @bitCast(u32, displacement)); - }, - .aarch64 => { - const displacement = try math.cast(i28, @intCast(i64, stub_helper.offset) - @intCast(i64, stub_off) - 4); - const literal = @divExact(stub_size - @sizeOf(u32), 4); - // ldr w16, literal - mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.ldr(.w16, .{ - .literal = literal, - }).toU32()); - // b disp - mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.b(displacement).toU32()); - mem.writeIntLittle(u32, code[8..12], 0x0); // Just a placeholder populated in `populateLazyBindOffsetsInStubHelper`. - }, - else => unreachable, - } - try self.file.?.pwriteAll(code, stub_off); -} - -fn resolveSymbolsInObject(self: *Zld, object: *Object) !void { - log.debug("resolving symbols in '{s}'", .{object.name}); - - for (object.symbols.items) |sym| { - if (sym.cast(Symbol.Regular)) |reg| { - if (reg.linkage == .translation_unit) continue; // Symbol local to TU. - - if (self.tentatives.fetchSwapRemove(sym.name)) |kv| { - // Create link to the global. - kv.value.alias = sym; - } - if (self.unresolved.fetchSwapRemove(sym.name)) |kv| { - // Create link to the global. - kv.value.alias = sym; - } - const sym_ptr = self.globals.getPtr(sym.name) orelse { - // Put new global symbol into the symbol table. - try self.globals.putNoClobber(self.allocator, sym.name, sym); - continue; - }; - const g_sym = sym_ptr.*; - const g_reg = g_sym.cast(Symbol.Regular) orelse unreachable; - - switch (g_reg.linkage) { - .translation_unit => unreachable, - .linkage_unit => { - if (reg.linkage == .linkage_unit) { - // Create link to the first encountered linkage_unit symbol. - sym.alias = g_sym; - continue; - } - }, - .global => { - if (reg.linkage == .global) { - log.debug("symbol '{s}' defined multiple times", .{reg.base.name}); - return error.MultipleSymbolDefinitions; - } - sym.alias = g_sym; - continue; - }, - } - - g_sym.alias = sym; - sym_ptr.* = sym; - } else if (sym.cast(Symbol.Tentative)) |tent| { - if (self.globals.get(sym.name)) |g_sym| { - sym.alias = g_sym; - continue; - } - - if (self.unresolved.fetchSwapRemove(sym.name)) |kv| { - kv.value.alias = sym; - } - - const sym_ptr = self.tentatives.getPtr(sym.name) orelse { - // Put new tentative definition symbol into symbol table. - try self.tentatives.putNoClobber(self.allocator, sym.name, sym); - continue; - }; - - // Compare by size and pick the largest tentative definition. - // We model this like a heap where the tentative definition with the - // largest size always washes up on top. - const t_sym = sym_ptr.*; - const t_tent = t_sym.cast(Symbol.Tentative) orelse unreachable; - - if (tent.size < t_tent.size) { - sym.alias = t_sym; - continue; - } - - t_sym.alias = sym; - sym_ptr.* = sym; - } else if (sym.cast(Symbol.Unresolved)) |_| { - if (self.globals.get(sym.name)) |g_sym| { - sym.alias = g_sym; - continue; - } - if (self.tentatives.get(sym.name)) |t_sym| { - sym.alias = t_sym; - continue; - } - if (self.unresolved.get(sym.name)) |u_sym| { - sym.alias = u_sym; - continue; - } - - try self.unresolved.putNoClobber(self.allocator, sym.name, sym); - } else unreachable; - } -} - -fn resolveSymbols(self: *Zld) !void { - // First pass, resolve symbols in provided objects. - for (self.objects.items) |object| { - try self.resolveSymbolsInObject(object); - } - - // Second pass, resolve symbols in static libraries. - var next_sym: usize = 0; - while (true) { - if (next_sym == self.unresolved.count()) break; - - const sym = self.unresolved.values()[next_sym]; - - var reset: bool = false; - for (self.archives.items) |archive| { - // Check if the entry exists in a static archive. - const offsets = archive.toc.get(sym.name) orelse { - // No hit. - continue; - }; - assert(offsets.items.len > 0); - - const object = try archive.parseObject(offsets.items[0]); - try self.objects.append(self.allocator, object); - try self.resolveSymbolsInObject(object); - - reset = true; - break; - } - - if (reset) { - next_sym = 0; - } else { - next_sym += 1; - } - } - - // Third pass, resolve symbols in dynamic libraries. - var unresolved = std.ArrayList(*Symbol).init(self.allocator); - defer unresolved.deinit(); - - try unresolved.ensureCapacity(self.unresolved.count()); - for (self.unresolved.values()) |value| { - unresolved.appendAssumeCapacity(value); - } - self.unresolved.clearRetainingCapacity(); - - // Put dyld_stub_binder as an unresolved special symbol. - { - const name = try self.allocator.dupe(u8, "dyld_stub_binder"); - errdefer self.allocator.free(name); - const undef = try self.allocator.create(Symbol.Unresolved); - errdefer self.allocator.destroy(undef); - undef.* = .{ - .base = .{ - .@"type" = .unresolved, - .name = name, - }, - }; - try unresolved.append(&undef.base); - } - - var referenced = std.AutoHashMap(*Dylib, void).init(self.allocator); - defer referenced.deinit(); - - loop: while (unresolved.popOrNull()) |undef| { - const proxy = self.imports.get(undef.name) orelse outer: { - const proxy = inner: { - for (self.dylibs.items) |dylib| { - const proxy = (try dylib.createProxy(undef.name)) orelse continue; - try referenced.put(dylib, {}); - break :inner proxy; - } - if (mem.eql(u8, undef.name, "___dso_handle")) { - // TODO this is just a temp patch until I work out what to actually - // do with ___dso_handle and __mh_execute_header symbols which are - // synthetically created by the linker on macOS. - const name = try self.allocator.dupe(u8, undef.name); - const proxy = try self.allocator.create(Symbol.Proxy); - errdefer self.allocator.destroy(proxy); - proxy.* = .{ - .base = .{ - .@"type" = .proxy, - .name = name, - }, - }; - break :inner &proxy.base; - } - - self.unresolved.putAssumeCapacityNoClobber(undef.name, undef); - continue :loop; - }; - - try self.imports.putNoClobber(self.allocator, proxy.name, proxy); - break :outer proxy; - }; - undef.alias = proxy; - } - - // Add LC_LOAD_DYLIB load command for each referenced dylib/stub. - var it = referenced.iterator(); - while (it.next()) |entry| { - const dylib = entry.key_ptr.*; - dylib.ordinal = self.next_dylib_ordinal; - const dylib_id = dylib.id orelse unreachable; - var dylib_cmd = try createLoadDylibCommand( - self.allocator, - dylib_id.name, - dylib_id.timestamp, - dylib_id.current_version, - dylib_id.compatibility_version, - ); - errdefer dylib_cmd.deinit(self.allocator); - try self.load_commands.append(self.allocator, .{ .Dylib = dylib_cmd }); - self.next_dylib_ordinal += 1; - } - - if (self.unresolved.count() > 0) { - for (self.unresolved.values()) |undef| { - log.err("undefined reference to symbol '{s}'", .{undef.name}); - if (undef.cast(Symbol.Unresolved).?.file) |file| { - log.err(" | referenced in {s}", .{file.name.?}); - } - } - - return error.UndefinedSymbolReference; - } -} - -fn resolveStubsAndGotEntries(self: *Zld) !void { - for (self.objects.items) |object| { - log.debug("resolving stubs and got entries from {s}", .{object.name}); - - for (object.sections.items) |sect| { - const relocs = sect.relocs orelse continue; - for (relocs) |rel| { - switch (rel.@"type") { - .unsigned => continue, - .got_page, .got_page_off, .got_load, .got, .pointer_to_got => { - const sym = rel.target.symbol.getTopmostAlias(); - if (sym.got_index != null) continue; - - const index = @intCast(u32, self.got_entries.items.len); - sym.got_index = index; - try self.got_entries.append(self.allocator, sym); - - log.debug(" | found GOT entry {s}: {*}", .{ sym.name, sym }); - }, - else => { - if (rel.target != .symbol) continue; - - const sym = rel.target.symbol.getTopmostAlias(); - assert(sym.@"type" != .unresolved); - - if (sym.stubs_index != null) continue; - if (sym.@"type" != .proxy) continue; - - const index = @intCast(u32, self.stubs.items.len); - sym.stubs_index = index; - try self.stubs.append(self.allocator, sym); - - log.debug(" | found stub {s}: {*}", .{ sym.name, sym }); - }, - } - } - } - } - - // Finally, put dyld_stub_binder as the final GOT entry - const sym = self.imports.get("dyld_stub_binder") orelse unreachable; - const index = @intCast(u32, self.got_entries.items.len); - sym.got_index = index; - try self.got_entries.append(self.allocator, sym); - - log.debug(" | found GOT entry {s}: {*}", .{ sym.name, sym }); -} - -fn resolveRelocsAndWriteSections(self: *Zld) !void { - for (self.objects.items) |object| { - log.debug("relocating object {s}", .{object.name}); - - for (object.sections.items) |sect| { - if (sect.inner.flags == macho.S_MOD_INIT_FUNC_POINTERS or - sect.inner.flags == macho.S_MOD_TERM_FUNC_POINTERS) continue; - - const segname = parseName(§.inner.segname); - const sectname = parseName(§.inner.sectname); - - log.debug("relocating section '{s},{s}'", .{ segname, sectname }); - - // Get target mapping - const target_map = sect.target_map orelse { - log.debug("no mapping for '{s},{s}'; skipping", .{ segname, sectname }); - continue; - }; - const target_seg = self.load_commands.items[target_map.segment_id].Segment; - const target_sect = target_seg.sections.items[target_map.section_id]; - const target_sect_addr = target_sect.addr + target_map.offset; - const target_sect_off = target_sect.offset + target_map.offset; - - if (sect.relocs) |relocs| { - for (relocs) |rel| { - const source_addr = target_sect_addr + rel.offset; - - var args: reloc.Relocation.ResolveArgs = .{ - .source_addr = source_addr, - .target_addr = undefined, - }; - - switch (rel.@"type") { - .unsigned => { - args.target_addr = try self.relocTargetAddr(object, rel.target); - - const unsigned = rel.cast(reloc.Unsigned) orelse unreachable; - if (unsigned.subtractor) |subtractor| { - args.subtractor = try self.relocTargetAddr(object, subtractor); - } - if (rel.target == .section) { - const source_sect = object.sections.items[rel.target.section]; - args.source_source_sect_addr = sect.inner.addr; - args.source_target_sect_addr = source_sect.inner.addr; - } - - const flags = @truncate(u8, target_sect.flags & 0xff); - const should_rebase = rebase: { - if (!unsigned.is_64bit) break :rebase false; - - // TODO actually, a check similar to what dyld is doing, that is, verifying - // that the segment is writable should be enough here. - const is_right_segment = blk: { - if (self.data_segment_cmd_index) |idx| { - if (target_map.segment_id == idx) { - break :blk true; - } - } - if (self.data_const_segment_cmd_index) |idx| { - if (target_map.segment_id == idx) { - break :blk true; - } - } - break :blk false; - }; - - if (!is_right_segment) break :rebase false; - if (flags != macho.S_LITERAL_POINTERS and - flags != macho.S_REGULAR) - { - break :rebase false; - } - if (rel.target == .symbol) { - const final = rel.target.symbol.getTopmostAlias(); - if (final.cast(Symbol.Proxy)) |_| { - break :rebase false; - } - } - - break :rebase true; - }; - - if (should_rebase) { - try self.local_rebases.append(self.allocator, .{ - .offset = source_addr - target_seg.inner.vmaddr, - .segment_id = target_map.segment_id, - }); - } - - // TLV is handled via a separate offset mechanism. - // Calculate the offset to the initializer. - if (flags == macho.S_THREAD_LOCAL_VARIABLES) tlv: { - // TODO we don't want to save offset to tlv_bootstrap - if (mem.eql(u8, rel.target.symbol.name, "__tlv_bootstrap")) break :tlv; - - const base_addr = blk: { - if (self.tlv_data_section_index) |index| { - const tlv_data = target_seg.sections.items[index]; - break :blk tlv_data.addr; - } else { - const tlv_bss = target_seg.sections.items[self.tlv_bss_section_index.?]; - break :blk tlv_bss.addr; - } - }; - // Since we require TLV data to always preceed TLV bss section, we calculate - // offsets wrt to the former if it is defined; otherwise, wrt to the latter. - try self.threadlocal_offsets.append(self.allocator, .{ - .source_addr = args.source_addr, - .offset = args.target_addr - base_addr, - }); - } - }, - .got_page, .got_page_off, .got_load, .got, .pointer_to_got => { - const dc_seg = self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; - const got = dc_seg.sections.items[self.got_section_index.?]; - const final = rel.target.symbol.getTopmostAlias(); - const got_index = final.got_index orelse { - log.err("expected GOT index relocating symbol '{s}'", .{final.name}); - log.err("this is an internal linker error", .{}); - return error.FailedToResolveRelocationTarget; - }; - args.target_addr = got.addr + got_index * @sizeOf(u64); - }, - else => |tt| { - if (tt == .signed and rel.target == .section) { - const source_sect = object.sections.items[rel.target.section]; - args.source_source_sect_addr = sect.inner.addr; - args.source_target_sect_addr = source_sect.inner.addr; - } - args.target_addr = try self.relocTargetAddr(object, rel.target); - }, - } - - try rel.resolve(args); - } - } - - log.debug("writing contents of '{s},{s}' section from '{s}' from 0x{x} to 0x{x}", .{ - segname, - sectname, - object.name, - target_sect_off, - target_sect_off + sect.code.len, - }); - - if (target_sect.flags == macho.S_ZEROFILL or - target_sect.flags == macho.S_THREAD_LOCAL_ZEROFILL or - target_sect.flags == macho.S_THREAD_LOCAL_VARIABLES) - { - log.debug("zeroing out '{s},{s}' from 0x{x} to 0x{x}", .{ - parseName(&target_sect.segname), - parseName(&target_sect.sectname), - target_sect_off, - target_sect_off + sect.code.len, - }); - - // Zero-out the space - var zeroes = try self.allocator.alloc(u8, sect.code.len); - defer self.allocator.free(zeroes); - mem.set(u8, zeroes, 0); - try self.file.?.pwriteAll(zeroes, target_sect_off); - } else { - try self.file.?.pwriteAll(sect.code, target_sect_off); - } - } - } -} - -fn relocTargetAddr(self: *Zld, object: *const Object, target: reloc.Relocation.Target) !u64 { - const target_addr = blk: { - switch (target) { - .symbol => |sym| { - const final = sym.getTopmostAlias(); - if (final.cast(Symbol.Regular)) |reg| { - log.debug(" | regular '{s}'", .{sym.name}); - break :blk reg.address; - } else if (final.cast(Symbol.Proxy)) |proxy| { - if (mem.eql(u8, sym.name, "__tlv_bootstrap")) { - log.debug(" | symbol '__tlv_bootstrap'", .{}); - const segment = self.load_commands.items[self.data_segment_cmd_index.?].Segment; - const tlv = segment.sections.items[self.tlv_section_index.?]; - break :blk tlv.addr; - } - - log.debug(" | symbol stub '{s}'", .{sym.name}); - const segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment; - const stubs = segment.sections.items[self.stubs_section_index.?]; - const stubs_index = proxy.base.stubs_index orelse { - if (proxy.bind_info.items.len > 0) { - break :blk 0; // Dynamically bound by dyld. - } - log.err( - "expected stubs index or dynamic bind address when relocating symbol '{s}'", - .{final.name}, - ); - log.err("this is an internal linker error", .{}); - return error.FailedToResolveRelocationTarget; - }; - break :blk stubs.addr + stubs_index * stubs.reserved2; - } else { - log.err("failed to resolve symbol '{s}' as a relocation target", .{sym.name}); - log.err("this is an internal linker error", .{}); - return error.FailedToResolveRelocationTarget; - } - }, - .section => |sect_id| { - log.debug(" | section offset", .{}); - const source_sect = object.sections.items[sect_id]; - log.debug(" | section '{s},{s}'", .{ - parseName(&source_sect.inner.segname), - parseName(&source_sect.inner.sectname), - }); - const target_map = source_sect.target_map orelse unreachable; - const target_seg = self.load_commands.items[target_map.segment_id].Segment; - const target_sect = target_seg.sections.items[target_map.section_id]; - break :blk target_sect.addr + target_map.offset; - }, - } - }; - return target_addr; -} - -fn populateMetadata(self: *Zld) !void { - if (self.pagezero_segment_cmd_index == null) { - self.pagezero_segment_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.allocator, .{ - .Segment = SegmentCommand.empty("__PAGEZERO", .{ - .vmsize = 0x100000000, // size always set to 4GB - }), - }); - } - - if (self.text_segment_cmd_index == null) { - self.text_segment_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.allocator, .{ - .Segment = SegmentCommand.empty("__TEXT", .{ - .vmaddr = 0x100000000, // always starts at 4GB - .maxprot = macho.VM_PROT_READ | macho.VM_PROT_EXECUTE, - .initprot = macho.VM_PROT_READ | macho.VM_PROT_EXECUTE, - }), - }); - } - - if (self.text_section_index == null) { - const text_seg = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; - self.text_section_index = @intCast(u16, text_seg.sections.items.len); - const alignment: u2 = switch (self.target.?.cpu.arch) { - .x86_64 => 0, - .aarch64 => 2, - else => unreachable, // unhandled architecture type - }; - try text_seg.addSection(self.allocator, "__text", .{ - .@"align" = alignment, - .flags = macho.S_REGULAR | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS, - }); - } - - if (self.stubs_section_index == null) { - const text_seg = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; - self.stubs_section_index = @intCast(u16, text_seg.sections.items.len); - const alignment: u2 = switch (self.target.?.cpu.arch) { - .x86_64 => 0, - .aarch64 => 2, - else => unreachable, // unhandled architecture type - }; - const stub_size: u4 = switch (self.target.?.cpu.arch) { - .x86_64 => 6, - .aarch64 => 3 * @sizeOf(u32), - else => unreachable, // unhandled architecture type - }; - try text_seg.addSection(self.allocator, "__stubs", .{ - .@"align" = alignment, - .flags = macho.S_SYMBOL_STUBS | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS, - .reserved2 = stub_size, - }); - } - - if (self.stub_helper_section_index == null) { - const text_seg = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; - self.stub_helper_section_index = @intCast(u16, text_seg.sections.items.len); - const alignment: u2 = switch (self.target.?.cpu.arch) { - .x86_64 => 0, - .aarch64 => 2, - else => unreachable, // unhandled architecture type - }; - const stub_helper_size: u6 = switch (self.target.?.cpu.arch) { - .x86_64 => 15, - .aarch64 => 6 * @sizeOf(u32), - else => unreachable, - }; - try text_seg.addSection(self.allocator, "__stub_helper", .{ - .size = stub_helper_size, - .@"align" = alignment, - .flags = macho.S_REGULAR | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS, - }); - } - - if (self.data_const_segment_cmd_index == null) { - self.data_const_segment_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.allocator, .{ - .Segment = SegmentCommand.empty("__DATA_CONST", .{ - .maxprot = macho.VM_PROT_READ | macho.VM_PROT_WRITE, - .initprot = macho.VM_PROT_READ | macho.VM_PROT_WRITE, - }), - }); - } - - if (self.got_section_index == null) { - const data_const_seg = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; - self.got_section_index = @intCast(u16, data_const_seg.sections.items.len); - try data_const_seg.addSection(self.allocator, "__got", .{ - .@"align" = 3, // 2^3 = @sizeOf(u64) - .flags = macho.S_NON_LAZY_SYMBOL_POINTERS, - }); - } - - if (self.data_segment_cmd_index == null) { - self.data_segment_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.allocator, .{ - .Segment = SegmentCommand.empty("__DATA", .{ - .maxprot = macho.VM_PROT_READ | macho.VM_PROT_WRITE, - .initprot = macho.VM_PROT_READ | macho.VM_PROT_WRITE, - }), - }); - } - - if (self.la_symbol_ptr_section_index == null) { - const data_seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; - self.la_symbol_ptr_section_index = @intCast(u16, data_seg.sections.items.len); - try data_seg.addSection(self.allocator, "__la_symbol_ptr", .{ - .@"align" = 3, // 2^3 = @sizeOf(u64) - .flags = macho.S_LAZY_SYMBOL_POINTERS, - }); - } - - if (self.data_section_index == null) { - const data_seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; - self.data_section_index = @intCast(u16, data_seg.sections.items.len); - try data_seg.addSection(self.allocator, "__data", .{ - .@"align" = 3, // 2^3 = @sizeOf(u64) - }); - } - - if (self.linkedit_segment_cmd_index == null) { - self.linkedit_segment_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.allocator, .{ - .Segment = SegmentCommand.empty("__LINKEDIT", .{ - .maxprot = macho.VM_PROT_READ, - .initprot = macho.VM_PROT_READ, - }), - }); - } - - if (self.dyld_info_cmd_index == null) { - self.dyld_info_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.allocator, .{ - .DyldInfoOnly = .{ - .cmd = macho.LC_DYLD_INFO_ONLY, - .cmdsize = @sizeOf(macho.dyld_info_command), - .rebase_off = 0, - .rebase_size = 0, - .bind_off = 0, - .bind_size = 0, - .weak_bind_off = 0, - .weak_bind_size = 0, - .lazy_bind_off = 0, - .lazy_bind_size = 0, - .export_off = 0, - .export_size = 0, - }, - }); - } - - if (self.symtab_cmd_index == null) { - self.symtab_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.allocator, .{ - .Symtab = .{ - .cmd = macho.LC_SYMTAB, - .cmdsize = @sizeOf(macho.symtab_command), - .symoff = 0, - .nsyms = 0, - .stroff = 0, - .strsize = 0, - }, - }); - try self.strtab.append(self.allocator, 0); - } - - if (self.dysymtab_cmd_index == null) { - self.dysymtab_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.allocator, .{ - .Dysymtab = .{ - .cmd = macho.LC_DYSYMTAB, - .cmdsize = @sizeOf(macho.dysymtab_command), - .ilocalsym = 0, - .nlocalsym = 0, - .iextdefsym = 0, - .nextdefsym = 0, - .iundefsym = 0, - .nundefsym = 0, - .tocoff = 0, - .ntoc = 0, - .modtaboff = 0, - .nmodtab = 0, - .extrefsymoff = 0, - .nextrefsyms = 0, - .indirectsymoff = 0, - .nindirectsyms = 0, - .extreloff = 0, - .nextrel = 0, - .locreloff = 0, - .nlocrel = 0, - }, - }); - } - - if (self.dylinker_cmd_index == null) { - self.dylinker_cmd_index = @intCast(u16, self.load_commands.items.len); - const cmdsize = @intCast(u32, mem.alignForwardGeneric( - u64, - @sizeOf(macho.dylinker_command) + mem.lenZ(DEFAULT_DYLD_PATH), - @sizeOf(u64), - )); - var dylinker_cmd = emptyGenericCommandWithData(macho.dylinker_command{ - .cmd = macho.LC_LOAD_DYLINKER, - .cmdsize = cmdsize, - .name = @sizeOf(macho.dylinker_command), - }); - dylinker_cmd.data = try self.allocator.alloc(u8, cmdsize - dylinker_cmd.inner.name); - mem.set(u8, dylinker_cmd.data, 0); - mem.copy(u8, dylinker_cmd.data, mem.spanZ(DEFAULT_DYLD_PATH)); - try self.load_commands.append(self.allocator, .{ .Dylinker = dylinker_cmd }); - } - - if (self.main_cmd_index == null and self.output.?.tag == .exe) { - self.main_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.allocator, .{ - .Main = .{ - .cmd = macho.LC_MAIN, - .cmdsize = @sizeOf(macho.entry_point_command), - .entryoff = 0x0, - .stacksize = 0, - }, - }); - } - - if (self.dylib_id_cmd_index == null and self.output.?.tag == .dylib) { - self.dylib_id_cmd_index = @intCast(u16, self.load_commands.items.len); - var dylib_cmd = try createLoadDylibCommand( - self.allocator, - self.output.?.install_name.?, - 2, - 0x10000, // TODO forward user-provided versions - 0x10000, - ); - errdefer dylib_cmd.deinit(self.allocator); - dylib_cmd.inner.cmd = macho.LC_ID_DYLIB; - try self.load_commands.append(self.allocator, .{ .Dylib = dylib_cmd }); - } - - if (self.version_min_cmd_index == null) { - self.version_min_cmd_index = @intCast(u16, self.load_commands.items.len); - const cmd: u32 = switch (self.target.?.os.tag) { - .macos => macho.LC_VERSION_MIN_MACOSX, - .ios => macho.LC_VERSION_MIN_IPHONEOS, - .tvos => macho.LC_VERSION_MIN_TVOS, - .watchos => macho.LC_VERSION_MIN_WATCHOS, - else => unreachable, // wrong OS - }; - const ver = self.target.?.os.version_range.semver.min; - const version = ver.major << 16 | ver.minor << 8 | ver.patch; - try self.load_commands.append(self.allocator, .{ - .VersionMin = .{ - .cmd = cmd, - .cmdsize = @sizeOf(macho.version_min_command), - .version = version, - .sdk = version, - }, - }); - } - - if (self.source_version_cmd_index == null) { - self.source_version_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.allocator, .{ - .SourceVersion = .{ - .cmd = macho.LC_SOURCE_VERSION, - .cmdsize = @sizeOf(macho.source_version_command), - .version = 0x0, - }, - }); - } - - if (self.uuid_cmd_index == null) { - self.uuid_cmd_index = @intCast(u16, self.load_commands.items.len); - var uuid_cmd: macho.uuid_command = .{ - .cmd = macho.LC_UUID, - .cmdsize = @sizeOf(macho.uuid_command), - .uuid = undefined, - }; - std.crypto.random.bytes(&uuid_cmd.uuid); - try self.load_commands.append(self.allocator, .{ .Uuid = uuid_cmd }); - } -} - -fn addDataInCodeLC(self: *Zld) !void { - if (self.data_in_code_cmd_index == null) { - self.data_in_code_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.allocator, .{ - .LinkeditData = .{ - .cmd = macho.LC_DATA_IN_CODE, - .cmdsize = @sizeOf(macho.linkedit_data_command), - .dataoff = 0, - .datasize = 0, - }, - }); - } -} - -fn addCodeSignatureLC(self: *Zld) !void { - if (self.code_signature_cmd_index == null and self.target.?.cpu.arch == .aarch64) { - self.code_signature_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.allocator, .{ - .LinkeditData = .{ - .cmd = macho.LC_CODE_SIGNATURE, - .cmdsize = @sizeOf(macho.linkedit_data_command), - .dataoff = 0, - .datasize = 0, - }, - }); - } -} - -fn addRpaths(self: *Zld, rpaths: []const []const u8) !void { - for (rpaths) |rpath| { - const cmdsize = @intCast(u32, mem.alignForwardGeneric( - u64, - @sizeOf(macho.rpath_command) + rpath.len + 1, - @sizeOf(u64), - )); - var rpath_cmd = emptyGenericCommandWithData(macho.rpath_command{ - .cmd = macho.LC_RPATH, - .cmdsize = cmdsize, - .path = @sizeOf(macho.rpath_command), - }); - rpath_cmd.data = try self.allocator.alloc(u8, cmdsize - rpath_cmd.inner.path); - mem.set(u8, rpath_cmd.data, 0); - mem.copy(u8, rpath_cmd.data, rpath); - try self.load_commands.append(self.allocator, .{ .Rpath = rpath_cmd }); - } -} - -fn flush(self: *Zld) !void { - try self.writeStubHelperCommon(); - try self.resolveRelocsAndWriteSections(); - - if (self.common_section_index) |index| { - const seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; - const sect = &seg.sections.items[index]; - sect.offset = 0; - } - - if (self.bss_section_index) |index| { - const seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; - const sect = &seg.sections.items[index]; - sect.offset = 0; - } - - if (self.tlv_bss_section_index) |index| { - const seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; - const sect = &seg.sections.items[index]; - sect.offset = 0; - } - - if (self.tlv_section_index) |index| { - const seg = self.load_commands.items[self.data_segment_cmd_index.?].Segment; - const sect = &seg.sections.items[index]; - - var buffer = try self.allocator.alloc(u8, @intCast(usize, sect.size)); - defer self.allocator.free(buffer); - _ = try self.file.?.preadAll(buffer, sect.offset); - - var stream = std.io.fixedBufferStream(buffer); - var writer = stream.writer(); - - std.sort.sort(TlvOffset, self.threadlocal_offsets.items, {}, TlvOffset.cmp); - - const seek_amt = 2 * @sizeOf(u64); - for (self.threadlocal_offsets.items) |tlv| { - try writer.context.seekBy(seek_amt); - try writer.writeIntLittle(u64, tlv.offset); - } - - try self.file.?.pwriteAll(buffer, sect.offset); - } - - if (self.mod_init_func_section_index) |index| { - const seg = self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; - const sect = &seg.sections.items[index]; - - var initializers = std.ArrayList(u64).init(self.allocator); - defer initializers.deinit(); - - for (self.objects.items) |object| { - for (object.initializers.items) |initializer| { - const address = initializer.cast(Symbol.Regular).?.address; - try initializers.append(address); - } - } - - _ = try self.file.?.pwriteAll(mem.sliceAsBytes(initializers.items), sect.offset); - sect.size = @intCast(u32, initializers.items.len * @sizeOf(u64)); - } - - try self.writeGotEntries(); - try self.setEntryPoint(); - try self.writeRebaseInfoTable(); - try self.writeBindInfoTable(); - try self.writeLazyBindInfoTable(); - try self.writeExportInfo(); - try self.writeDataInCode(); - - { - const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; - const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab; - symtab.symoff = @intCast(u32, seg.inner.fileoff + seg.inner.filesize); - } - - try self.writeDebugInfo(); - try self.writeSymbolTable(); - try self.writeStringTable(); - - { - // Seal __LINKEDIT size - const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; - seg.inner.vmsize = mem.alignForwardGeneric(u64, seg.inner.filesize, self.page_size.?); - } - - if (self.target.?.cpu.arch == .aarch64) { - try self.writeCodeSignaturePadding(); - } - - try self.writeLoadCommands(); - try self.writeHeader(); - - if (self.target.?.cpu.arch == .aarch64) { - try self.writeCodeSignature(); - } - - if (comptime std.Target.current.isDarwin() and std.Target.current.cpu.arch == .aarch64) { - const out_path = self.output.?.path; - try fs.cwd().copyFile(out_path, fs.cwd(), out_path, .{}); - } -} - -fn writeGotEntries(self: *Zld) !void { - const seg = self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; - const sect = seg.sections.items[self.got_section_index.?]; - - var buffer = try self.allocator.alloc(u8, self.got_entries.items.len * @sizeOf(u64)); - defer self.allocator.free(buffer); - - var stream = std.io.fixedBufferStream(buffer); - var writer = stream.writer(); - - for (self.got_entries.items) |sym| { - const address: u64 = if (sym.cast(Symbol.Regular)) |reg| reg.address else 0; - try writer.writeIntLittle(u64, address); - } - - log.debug("writing GOT pointers at 0x{x} to 0x{x}", .{ sect.offset, sect.offset + buffer.len }); - - try self.file.?.pwriteAll(buffer, sect.offset); -} - -fn setEntryPoint(self: *Zld) !void { - if (self.output.?.tag != .exe) return; - - // TODO we should respect the -entry flag passed in by the user to set a custom - // entrypoint. For now, assume default of `_main`. - const seg = self.load_commands.items[self.text_segment_cmd_index.?].Segment; - const sym = self.globals.get("_main") orelse return error.MissingMainEntrypoint; - const entry_sym = sym.cast(Symbol.Regular) orelse unreachable; - const ec = &self.load_commands.items[self.main_cmd_index.?].Main; - ec.entryoff = @intCast(u32, entry_sym.address - seg.inner.vmaddr); - ec.stacksize = self.stack_size; -} - -fn writeRebaseInfoTable(self: *Zld) !void { - var pointers = std.ArrayList(Pointer).init(self.allocator); - defer pointers.deinit(); - - try pointers.ensureCapacity(self.local_rebases.items.len); - pointers.appendSliceAssumeCapacity(self.local_rebases.items); - - if (self.got_section_index) |idx| { - const seg = self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; - const sect = seg.sections.items[idx]; - const base_offset = sect.addr - seg.inner.vmaddr; - const segment_id = @intCast(u16, self.data_const_segment_cmd_index.?); - - for (self.got_entries.items) |sym| { - if (sym.@"type" == .proxy) continue; - try pointers.append(.{ - .offset = base_offset + sym.got_index.? * @sizeOf(u64), - .segment_id = segment_id, - }); - } - } - - if (self.mod_init_func_section_index) |idx| { - const seg = self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; - const sect = seg.sections.items[idx]; - const base_offset = sect.addr - seg.inner.vmaddr; - const segment_id = @intCast(u16, self.data_const_segment_cmd_index.?); - - var index: u64 = 0; - for (self.objects.items) |object| { - for (object.initializers.items) |_| { - try pointers.append(.{ - .offset = base_offset + index * @sizeOf(u64), - .segment_id = segment_id, - }); - index += 1; - } - } - } - - if (self.la_symbol_ptr_section_index) |idx| { - const seg = self.load_commands.items[self.data_segment_cmd_index.?].Segment; - const sect = seg.sections.items[idx]; - const base_offset = sect.addr - seg.inner.vmaddr; - const segment_id = @intCast(u16, self.data_segment_cmd_index.?); - - try pointers.ensureCapacity(pointers.items.len + self.stubs.items.len); - for (self.stubs.items) |sym| { - pointers.appendAssumeCapacity(.{ - .offset = base_offset + sym.stubs_index.? * @sizeOf(u64), - .segment_id = segment_id, - }); - } - } - - std.sort.sort(Pointer, pointers.items, {}, pointerCmp); - - const size = try rebaseInfoSize(pointers.items); - var buffer = try self.allocator.alloc(u8, @intCast(usize, size)); - defer self.allocator.free(buffer); - - var stream = std.io.fixedBufferStream(buffer); - try writeRebaseInfo(pointers.items, stream.writer()); - - const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; - const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly; - dyld_info.rebase_off = @intCast(u32, seg.inner.fileoff); - dyld_info.rebase_size = @intCast(u32, mem.alignForwardGeneric(u64, buffer.len, @sizeOf(u64))); - seg.inner.filesize += dyld_info.rebase_size; - - log.debug("writing rebase info from 0x{x} to 0x{x}", .{ dyld_info.rebase_off, dyld_info.rebase_off + dyld_info.rebase_size }); - - try self.file.?.pwriteAll(buffer, dyld_info.rebase_off); -} - -fn writeBindInfoTable(self: *Zld) !void { - var pointers = std.ArrayList(Pointer).init(self.allocator); - defer pointers.deinit(); - - if (self.got_section_index) |idx| { - const seg = self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; - const sect = seg.sections.items[idx]; - const base_offset = sect.addr - seg.inner.vmaddr; - const segment_id = @intCast(u16, self.data_const_segment_cmd_index.?); - - for (self.got_entries.items) |sym| { - if (sym.cast(Symbol.Proxy)) |proxy| { - try pointers.append(.{ - .offset = base_offset + proxy.base.got_index.? * @sizeOf(u64), - .segment_id = segment_id, - .dylib_ordinal = proxy.dylibOrdinal(), - .name = proxy.base.name, - }); - } - } - } - - for (self.imports.values()) |sym| { - if (sym.cast(Symbol.Proxy)) |proxy| { - for (proxy.bind_info.items) |info| { - const seg = self.load_commands.items[info.segment_id].Segment; - try pointers.append(.{ - .offset = info.address - seg.inner.vmaddr, - .segment_id = info.segment_id, - .dylib_ordinal = proxy.dylibOrdinal(), - .name = proxy.base.name, - }); - } - } - } - - if (self.tlv_section_index) |idx| { - const seg = self.load_commands.items[self.data_segment_cmd_index.?].Segment; - const sect = seg.sections.items[idx]; - const base_offset = sect.addr - seg.inner.vmaddr; - const segment_id = @intCast(u16, self.data_segment_cmd_index.?); - - const sym = self.imports.get("__tlv_bootstrap") orelse unreachable; - const proxy = sym.cast(Symbol.Proxy) orelse unreachable; - - try pointers.append(.{ - .offset = base_offset, - .segment_id = segment_id, - .dylib_ordinal = proxy.dylibOrdinal(), - .name = proxy.base.name, - }); - } - - const size = try bindInfoSize(pointers.items); - var buffer = try self.allocator.alloc(u8, @intCast(usize, size)); - defer self.allocator.free(buffer); - - var stream = std.io.fixedBufferStream(buffer); - try writeBindInfo(pointers.items, stream.writer()); - - const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; - const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly; - dyld_info.bind_off = @intCast(u32, seg.inner.fileoff + seg.inner.filesize); - dyld_info.bind_size = @intCast(u32, mem.alignForwardGeneric(u64, buffer.len, @alignOf(u64))); - seg.inner.filesize += dyld_info.bind_size; - - log.debug("writing binding info from 0x{x} to 0x{x}", .{ dyld_info.bind_off, dyld_info.bind_off + dyld_info.bind_size }); - - try self.file.?.pwriteAll(buffer, dyld_info.bind_off); -} - -fn writeLazyBindInfoTable(self: *Zld) !void { - var pointers = std.ArrayList(Pointer).init(self.allocator); - defer pointers.deinit(); - - if (self.la_symbol_ptr_section_index) |idx| { - const seg = self.load_commands.items[self.data_segment_cmd_index.?].Segment; - const sect = seg.sections.items[idx]; - const base_offset = sect.addr - seg.inner.vmaddr; - const segment_id = @intCast(u16, self.data_segment_cmd_index.?); - - try pointers.ensureCapacity(self.stubs.items.len); - - for (self.stubs.items) |sym| { - const proxy = sym.cast(Symbol.Proxy) orelse unreachable; - pointers.appendAssumeCapacity(.{ - .offset = base_offset + sym.stubs_index.? * @sizeOf(u64), - .segment_id = segment_id, - .dylib_ordinal = proxy.dylibOrdinal(), - .name = sym.name, - }); - } - } - - const size = try lazyBindInfoSize(pointers.items); - var buffer = try self.allocator.alloc(u8, @intCast(usize, size)); - defer self.allocator.free(buffer); - - var stream = std.io.fixedBufferStream(buffer); - try writeLazyBindInfo(pointers.items, stream.writer()); - - const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; - const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly; - dyld_info.lazy_bind_off = @intCast(u32, seg.inner.fileoff + seg.inner.filesize); - dyld_info.lazy_bind_size = @intCast(u32, mem.alignForwardGeneric(u64, buffer.len, @alignOf(u64))); - seg.inner.filesize += dyld_info.lazy_bind_size; - - log.debug("writing lazy binding info from 0x{x} to 0x{x}", .{ dyld_info.lazy_bind_off, dyld_info.lazy_bind_off + dyld_info.lazy_bind_size }); - - try self.file.?.pwriteAll(buffer, dyld_info.lazy_bind_off); - try self.populateLazyBindOffsetsInStubHelper(buffer); -} - -fn populateLazyBindOffsetsInStubHelper(self: *Zld, buffer: []const u8) !void { - var stream = std.io.fixedBufferStream(buffer); - var reader = stream.reader(); - var offsets = std.ArrayList(u32).init(self.allocator); - try offsets.append(0); - defer offsets.deinit(); - var valid_block = false; - - while (true) { - const inst = reader.readByte() catch |err| switch (err) { - error.EndOfStream => break, - else => return err, - }; - const opcode: u8 = inst & macho.BIND_OPCODE_MASK; - - switch (opcode) { - macho.BIND_OPCODE_DO_BIND => { - valid_block = true; - }, - macho.BIND_OPCODE_DONE => { - if (valid_block) { - const offset = try stream.getPos(); - try offsets.append(@intCast(u32, offset)); - } - valid_block = false; - }, - macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM => { - var next = try reader.readByte(); - while (next != @as(u8, 0)) { - next = try reader.readByte(); - } - }, - macho.BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB => { - _ = try leb.readULEB128(u64, reader); - }, - macho.BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB => { - _ = try leb.readULEB128(u64, reader); - }, - macho.BIND_OPCODE_SET_ADDEND_SLEB => { - _ = try leb.readILEB128(i64, reader); - }, - else => {}, - } - } - assert(self.stubs.items.len <= offsets.items.len); - - const stub_size: u4 = switch (self.target.?.cpu.arch) { - .x86_64 => 10, - .aarch64 => 3 * @sizeOf(u32), - else => unreachable, - }; - const off: u4 = switch (self.target.?.cpu.arch) { - .x86_64 => 1, - .aarch64 => 2 * @sizeOf(u32), - else => unreachable, - }; - var buf: [@sizeOf(u32)]u8 = undefined; - for (self.stubs.items) |sym| { - const index = sym.stubs_index orelse unreachable; - const placeholder_off = self.stub_helper_stubs_start_off.? + index * stub_size + off; - mem.writeIntLittle(u32, &buf, offsets.items[index]); - try self.file.?.pwriteAll(&buf, placeholder_off); - } -} - -fn writeExportInfo(self: *Zld) !void { - var trie = Trie.init(self.allocator); - defer trie.deinit(); - - const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment; - const base_address = text_segment.inner.vmaddr; - - // TODO handle macho.EXPORT_SYMBOL_FLAGS_REEXPORT and macho.EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER. - log.debug("writing export trie", .{}); - - const Sorter = struct { - fn lessThan(_: void, a: []const u8, b: []const u8) bool { - return mem.lessThan(u8, a, b); - } - }; - - var sorted_globals = std.ArrayList([]const u8).init(self.allocator); - defer sorted_globals.deinit(); - - for (self.globals.values()) |sym| { - const reg = sym.cast(Symbol.Regular) orelse continue; - if (reg.linkage != .global) continue; - try sorted_globals.append(sym.name); - } - - std.sort.sort([]const u8, sorted_globals.items, {}, Sorter.lessThan); - - for (sorted_globals.items) |sym_name| { - const sym = self.globals.get(sym_name) orelse unreachable; - const reg = sym.cast(Symbol.Regular) orelse unreachable; - - log.debug(" | putting '{s}' defined at 0x{x}", .{ reg.base.name, reg.address }); - - try trie.put(.{ - .name = sym.name, - .vmaddr_offset = reg.address - base_address, - .export_flags = macho.EXPORT_SYMBOL_FLAGS_KIND_REGULAR, - }); - } - - try trie.finalize(); - - var buffer = try self.allocator.alloc(u8, @intCast(usize, trie.size)); - defer self.allocator.free(buffer); - - var stream = std.io.fixedBufferStream(buffer); - const nwritten = try trie.write(stream.writer()); - assert(nwritten == trie.size); - - const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; - const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly; - dyld_info.export_off = @intCast(u32, seg.inner.fileoff + seg.inner.filesize); - dyld_info.export_size = @intCast(u32, mem.alignForwardGeneric(u64, buffer.len, @alignOf(u64))); - seg.inner.filesize += dyld_info.export_size; - - log.debug("writing export info from 0x{x} to 0x{x}", .{ dyld_info.export_off, dyld_info.export_off + dyld_info.export_size }); - - try self.file.?.pwriteAll(buffer, dyld_info.export_off); -} - -fn writeDebugInfo(self: *Zld) !void { - var stabs = std.ArrayList(macho.nlist_64).init(self.allocator); - defer stabs.deinit(); - - for (self.objects.items) |object| { - const tu_path = object.tu_path orelse continue; - const tu_mtime = object.tu_mtime orelse continue; - _ = tu_mtime; - const dirname = std.fs.path.dirname(tu_path) orelse "./"; - // Current dir - try stabs.append(.{ - .n_strx = try self.makeString(tu_path[0 .. dirname.len + 1]), - .n_type = macho.N_SO, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }); - // Artifact name - try stabs.append(.{ - .n_strx = try self.makeString(tu_path[dirname.len + 1 ..]), - .n_type = macho.N_SO, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }); - // Path to object file with debug info - try stabs.append(.{ - .n_strx = try self.makeString(object.name.?), - .n_type = macho.N_OSO, - .n_sect = 0, - .n_desc = 1, - .n_value = 0, //tu_mtime, TODO figure out why precalculated mtime value doesn't work - }); - - for (object.symbols.items) |sym| { - const reg = reg: { - switch (sym.@"type") { - .regular => break :reg sym.cast(Symbol.Regular) orelse unreachable, - .tentative => { - const final = sym.getTopmostAlias().cast(Symbol.Regular) orelse unreachable; - if (object != final.file) continue; - break :reg final; - }, - else => continue, - } - }; - - if (reg.isTemp() or reg.stab == null) continue; - const stab = reg.stab orelse unreachable; - - switch (stab.kind) { - .function => { - try stabs.append(.{ - .n_strx = 0, - .n_type = macho.N_BNSYM, - .n_sect = reg.section, - .n_desc = 0, - .n_value = reg.address, - }); - try stabs.append(.{ - .n_strx = try self.makeString(sym.name), - .n_type = macho.N_FUN, - .n_sect = reg.section, - .n_desc = 0, - .n_value = reg.address, - }); - try stabs.append(.{ - .n_strx = 0, - .n_type = macho.N_FUN, - .n_sect = 0, - .n_desc = 0, - .n_value = stab.size, - }); - try stabs.append(.{ - .n_strx = 0, - .n_type = macho.N_ENSYM, - .n_sect = reg.section, - .n_desc = 0, - .n_value = stab.size, - }); - }, - .global => { - try stabs.append(.{ - .n_strx = try self.makeString(sym.name), - .n_type = macho.N_GSYM, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }); - }, - .static => { - try stabs.append(.{ - .n_strx = try self.makeString(sym.name), - .n_type = macho.N_STSYM, - .n_sect = reg.section, - .n_desc = 0, - .n_value = reg.address, - }); - }, - } - } - - // Close the source file! - try stabs.append(.{ - .n_strx = 0, - .n_type = macho.N_SO, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }); - } - - if (stabs.items.len == 0) return; - - // Write stabs into the symbol table - const linkedit = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; - const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab; - - symtab.nsyms = @intCast(u32, stabs.items.len); - - const stabs_off = symtab.symoff; - const stabs_size = symtab.nsyms * @sizeOf(macho.nlist_64); - log.debug("writing symbol stabs from 0x{x} to 0x{x}", .{ stabs_off, stabs_size + stabs_off }); - try self.file.?.pwriteAll(mem.sliceAsBytes(stabs.items), stabs_off); - - linkedit.inner.filesize += stabs_size; - - // Update dynamic symbol table. - const dysymtab = &self.load_commands.items[self.dysymtab_cmd_index.?].Dysymtab; - dysymtab.nlocalsym = symtab.nsyms; -} - -fn writeSymbolTable(self: *Zld) !void { - const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; - const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab; - - var locals = std.ArrayList(macho.nlist_64).init(self.allocator); - defer locals.deinit(); - - var exports = std.ArrayList(macho.nlist_64).init(self.allocator); - defer exports.deinit(); - - for (self.objects.items) |object| { - for (object.symbols.items) |sym| { - const final = sym.getTopmostAlias(); - if (final.@"type" != .regular) continue; - - const reg = final.cast(Symbol.Regular) orelse unreachable; - if (reg.isTemp()) continue; - if (reg.visited) continue; - - switch (reg.linkage) { - .translation_unit => { - try locals.append(.{ - .n_strx = try self.makeString(sym.name), - .n_type = macho.N_SECT, - .n_sect = reg.section, - .n_desc = 0, - .n_value = reg.address, - }); - }, - else => { - try exports.append(.{ - .n_strx = try self.makeString(sym.name), - .n_type = macho.N_SECT | macho.N_EXT, - .n_sect = reg.section, - .n_desc = 0, - .n_value = reg.address, - }); - }, - } - - reg.visited = true; - } - } - - var undefs = std.ArrayList(macho.nlist_64).init(self.allocator); - defer undefs.deinit(); - - for (self.imports.values()) |sym| { - const proxy = sym.cast(Symbol.Proxy) orelse unreachable; - try undefs.append(.{ - .n_strx = try self.makeString(sym.name), - .n_type = macho.N_UNDF | macho.N_EXT, - .n_sect = 0, - .n_desc = (proxy.dylibOrdinal() * macho.N_SYMBOL_RESOLVER) | macho.REFERENCE_FLAG_UNDEFINED_NON_LAZY, - .n_value = 0, - }); - } - - const nlocals = locals.items.len; - const nexports = exports.items.len; - const nundefs = undefs.items.len; - - const locals_off = symtab.symoff + symtab.nsyms * @sizeOf(macho.nlist_64); - const locals_size = nlocals * @sizeOf(macho.nlist_64); - log.debug("writing local symbols from 0x{x} to 0x{x}", .{ locals_off, locals_size + locals_off }); - try self.file.?.pwriteAll(mem.sliceAsBytes(locals.items), locals_off); - - const exports_off = locals_off + locals_size; - const exports_size = nexports * @sizeOf(macho.nlist_64); - log.debug("writing exported symbols from 0x{x} to 0x{x}", .{ exports_off, exports_size + exports_off }); - try self.file.?.pwriteAll(mem.sliceAsBytes(exports.items), exports_off); - - const undefs_off = exports_off + exports_size; - const undefs_size = nundefs * @sizeOf(macho.nlist_64); - log.debug("writing undefined symbols from 0x{x} to 0x{x}", .{ undefs_off, undefs_size + undefs_off }); - try self.file.?.pwriteAll(mem.sliceAsBytes(undefs.items), undefs_off); - - symtab.nsyms += @intCast(u32, nlocals + nexports + nundefs); - seg.inner.filesize += locals_size + exports_size + undefs_size; - - // Update dynamic symbol table. - const dysymtab = &self.load_commands.items[self.dysymtab_cmd_index.?].Dysymtab; - dysymtab.nlocalsym += @intCast(u32, nlocals); - dysymtab.iextdefsym = dysymtab.nlocalsym; - dysymtab.nextdefsym = @intCast(u32, nexports); - dysymtab.iundefsym = dysymtab.nlocalsym + dysymtab.nextdefsym; - dysymtab.nundefsym = @intCast(u32, nundefs); - - const text_segment = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; - const stubs = &text_segment.sections.items[self.stubs_section_index.?]; - const data_const_segment = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; - const got = &data_const_segment.sections.items[self.got_section_index.?]; - const data_segment = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; - const la_symbol_ptr = &data_segment.sections.items[self.la_symbol_ptr_section_index.?]; - - const nstubs = @intCast(u32, self.stubs.items.len); - const ngot_entries = @intCast(u32, self.got_entries.items.len); - - dysymtab.indirectsymoff = @intCast(u32, seg.inner.fileoff + seg.inner.filesize); - dysymtab.nindirectsyms = nstubs * 2 + ngot_entries; - - const needed_size = dysymtab.nindirectsyms * @sizeOf(u32); - seg.inner.filesize += needed_size; - - log.debug("writing indirect symbol table from 0x{x} to 0x{x}", .{ - dysymtab.indirectsymoff, - dysymtab.indirectsymoff + needed_size, - }); - - var buf = try self.allocator.alloc(u8, needed_size); - defer self.allocator.free(buf); - - var stream = std.io.fixedBufferStream(buf); - var writer = stream.writer(); - - stubs.reserved1 = 0; - for (self.stubs.items) |sym| { - const id = self.imports.getIndex(sym.name) orelse unreachable; - try writer.writeIntLittle(u32, dysymtab.iundefsym + @intCast(u32, id)); - } - - got.reserved1 = nstubs; - for (self.got_entries.items) |sym| { - if (sym.@"type" == .proxy) { - const id = self.imports.getIndex(sym.name) orelse unreachable; - try writer.writeIntLittle(u32, dysymtab.iundefsym + @intCast(u32, id)); - } else { - try writer.writeIntLittle(u32, macho.INDIRECT_SYMBOL_LOCAL); - } - } - - la_symbol_ptr.reserved1 = got.reserved1 + ngot_entries; - for (self.stubs.items) |sym| { - const id = self.imports.getIndex(sym.name) orelse unreachable; - try writer.writeIntLittle(u32, dysymtab.iundefsym + @intCast(u32, id)); - } - - try self.file.?.pwriteAll(buf, dysymtab.indirectsymoff); -} - -fn writeStringTable(self: *Zld) !void { - const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; - const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab; - symtab.stroff = @intCast(u32, seg.inner.fileoff + seg.inner.filesize); - symtab.strsize = @intCast(u32, mem.alignForwardGeneric(u64, self.strtab.items.len, @alignOf(u64))); - seg.inner.filesize += symtab.strsize; - - log.debug("writing string table from 0x{x} to 0x{x}", .{ symtab.stroff, symtab.stroff + symtab.strsize }); - - try self.file.?.pwriteAll(self.strtab.items, symtab.stroff); - - if (symtab.strsize > self.strtab.items.len and self.target.?.cpu.arch == .x86_64) { - // This is the last section, so we need to pad it out. - try self.file.?.pwriteAll(&[_]u8{0}, seg.inner.fileoff + seg.inner.filesize - 1); - } -} - -fn writeDataInCode(self: *Zld) !void { - const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; - const dice_cmd = &self.load_commands.items[self.data_in_code_cmd_index.?].LinkeditData; - const fileoff = seg.inner.fileoff + seg.inner.filesize; - - var buf = std.ArrayList(u8).init(self.allocator); - defer buf.deinit(); - - const text_seg = self.load_commands.items[self.text_segment_cmd_index.?].Segment; - const text_sect = text_seg.sections.items[self.text_section_index.?]; - for (self.objects.items) |object| { - const source_sect = object.sections.items[object.text_section_index.?]; - const target_map = source_sect.target_map orelse continue; - - try buf.ensureCapacity( - buf.items.len + object.data_in_code_entries.items.len * @sizeOf(macho.data_in_code_entry), - ); - for (object.data_in_code_entries.items) |dice| { - const new_dice: macho.data_in_code_entry = .{ - .offset = text_sect.offset + target_map.offset + dice.offset, - .length = dice.length, - .kind = dice.kind, - }; - buf.appendSliceAssumeCapacity(mem.asBytes(&new_dice)); - } - } - const datasize = @intCast(u32, buf.items.len); - - dice_cmd.dataoff = @intCast(u32, fileoff); - dice_cmd.datasize = datasize; - seg.inner.filesize += datasize; - - log.debug("writing data-in-code from 0x{x} to 0x{x}", .{ fileoff, fileoff + datasize }); - - try self.file.?.pwriteAll(buf.items, fileoff); -} - -fn writeCodeSignaturePadding(self: *Zld) !void { - const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; - const code_sig_cmd = &self.load_commands.items[self.code_signature_cmd_index.?].LinkeditData; - const fileoff = seg.inner.fileoff + seg.inner.filesize; - const needed_size = CodeSignature.calcCodeSignaturePaddingSize( - self.output.?.path, - fileoff, - self.page_size.?, - ); - code_sig_cmd.dataoff = @intCast(u32, fileoff); - code_sig_cmd.datasize = needed_size; - - // Advance size of __LINKEDIT segment - seg.inner.filesize += needed_size; - seg.inner.vmsize = mem.alignForwardGeneric(u64, seg.inner.filesize, self.page_size.?); - - log.debug("writing code signature padding from 0x{x} to 0x{x}", .{ fileoff, fileoff + needed_size }); - - // Pad out the space. We need to do this to calculate valid hashes for everything in the file - // except for code signature data. - try self.file.?.pwriteAll(&[_]u8{0}, fileoff + needed_size - 1); -} - -fn writeCodeSignature(self: *Zld) !void { - const text_seg = self.load_commands.items[self.text_segment_cmd_index.?].Segment; - const code_sig_cmd = self.load_commands.items[self.code_signature_cmd_index.?].LinkeditData; - - var code_sig = CodeSignature.init(self.allocator, self.page_size.?); - defer code_sig.deinit(); - try code_sig.calcAdhocSignature( - self.file.?, - self.output.?.path, - text_seg.inner, - code_sig_cmd, - .Exe, - ); - - var buffer = try self.allocator.alloc(u8, code_sig.size()); - defer self.allocator.free(buffer); - var stream = std.io.fixedBufferStream(buffer); - try code_sig.write(stream.writer()); - - log.debug("writing code signature from 0x{x} to 0x{x}", .{ code_sig_cmd.dataoff, code_sig_cmd.dataoff + buffer.len }); - try self.file.?.pwriteAll(buffer, code_sig_cmd.dataoff); -} - -fn writeLoadCommands(self: *Zld) !void { - var sizeofcmds: u32 = 0; - for (self.load_commands.items) |lc| { - sizeofcmds += lc.cmdsize(); - } - - var buffer = try self.allocator.alloc(u8, sizeofcmds); - defer self.allocator.free(buffer); - var writer = std.io.fixedBufferStream(buffer).writer(); - for (self.load_commands.items) |lc| { - try lc.write(writer); - } - - const off = @sizeOf(macho.mach_header_64); - log.debug("writing {} load commands from 0x{x} to 0x{x}", .{ self.load_commands.items.len, off, off + sizeofcmds }); - try self.file.?.pwriteAll(buffer, off); -} - -fn writeHeader(self: *Zld) !void { - var header: macho.mach_header_64 = undefined; - header.magic = macho.MH_MAGIC_64; - - const CpuInfo = struct { - cpu_type: macho.cpu_type_t, - cpu_subtype: macho.cpu_subtype_t, - }; - - const cpu_info: CpuInfo = switch (self.target.?.cpu.arch) { - .aarch64 => .{ - .cpu_type = macho.CPU_TYPE_ARM64, - .cpu_subtype = macho.CPU_SUBTYPE_ARM_ALL, - }, - .x86_64 => .{ - .cpu_type = macho.CPU_TYPE_X86_64, - .cpu_subtype = macho.CPU_SUBTYPE_X86_64_ALL, - }, - else => return error.UnsupportedCpuArchitecture, - }; - header.cputype = cpu_info.cpu_type; - header.cpusubtype = cpu_info.cpu_subtype; - - switch (self.output.?.tag) { - .exe => { - header.filetype = macho.MH_EXECUTE; - header.flags = macho.MH_NOUNDEFS | macho.MH_DYLDLINK | macho.MH_PIE | macho.MH_TWOLEVEL; - }, - .dylib => { - header.filetype = macho.MH_DYLIB; - header.flags = macho.MH_NOUNDEFS | - macho.MH_DYLDLINK | - macho.MH_PIE | - macho.MH_TWOLEVEL | - macho.MH_NO_REEXPORTED_DYLIBS; - }, - } - - header.reserved = 0; - - if (self.tlv_section_index) |_| - header.flags |= macho.MH_HAS_TLV_DESCRIPTORS; - - header.ncmds = @intCast(u32, self.load_commands.items.len); - header.sizeofcmds = 0; - for (self.load_commands.items) |cmd| { - header.sizeofcmds += cmd.cmdsize(); - } - log.debug("writing Mach-O header {}", .{header}); - try self.file.?.pwriteAll(mem.asBytes(&header), 0); -} - -fn makeString(self: *Zld, bytes: []const u8) !u32 { - if (self.strtab_dir.get(bytes)) |offset| { - log.debug("reusing '{s}' from string table at offset 0x{x}", .{ bytes, offset }); - return offset; - } - - try self.strtab.ensureCapacity(self.allocator, self.strtab.items.len + bytes.len + 1); - const offset = @intCast(u32, self.strtab.items.len); - log.debug("writing new string '{s}' into string table at offset 0x{x}", .{ bytes, offset }); - self.strtab.appendSliceAssumeCapacity(bytes); - self.strtab.appendAssumeCapacity(0); - try self.strtab_dir.putNoClobber(self.allocator, try self.allocator.dupe(u8, bytes), offset); - return offset; -} - -fn getString(self: *const Zld, str_off: u32) []const u8 { - assert(str_off < self.strtab.items.len); - return mem.spanZ(@ptrCast([*:0]const u8, self.strtab.items.ptr + str_off)); -} - -pub fn parseName(name: *const [16]u8) []const u8 { - const len = mem.indexOfScalar(u8, name, @as(u8, 0)) orelse name.len; - return name[0..len]; -} diff --git a/src/link/MachO/commands.zig b/src/link/MachO/commands.zig index 6958b8d1e6..f7a2fd3eda 100644 --- a/src/link/MachO/commands.zig +++ b/src/link/MachO/commands.zig @@ -11,6 +11,28 @@ const Allocator = std.mem.Allocator; const MachO = @import("../MachO.zig"); const padToIdeal = MachO.padToIdeal; +pub const HeaderArgs = struct { + magic: u32 = macho.MH_MAGIC_64, + cputype: macho.cpu_type_t = 0, + cpusubtype: macho.cpu_subtype_t = 0, + filetype: u32 = 0, + flags: u32 = 0, + reserved: u32 = 0, +}; + +pub fn emptyHeader(args: HeaderArgs) macho.mach_header_64 { + return .{ + .magic = args.magic, + .cputype = args.cputype, + .cpusubtype = args.cpusubtype, + .filetype = args.filetype, + .ncmds = 0, + .sizeofcmds = 0, + .flags = args.flags, + .reserved = args.reserved, + }; +} + pub const LoadCommand = union(enum) { Segment: SegmentCommand, DyldInfoOnly: macho.dyld_info_command, @@ -403,6 +425,44 @@ fn makeStaticString(bytes: []const u8) [16]u8 { return buf; } +fn parseName(name: *const [16]u8) []const u8 { + const len = mem.indexOfScalar(u8, name, @as(u8, 0)) orelse name.len; + return name[0..len]; +} + +pub fn segmentName(sect: macho.section_64) []const u8 { + return parseName(§.segname); +} + +pub fn sectionName(sect: macho.section_64) []const u8 { + return parseName(§.sectname); +} + +pub fn sectionType(sect: macho.section_64) u8 { + return @truncate(u8, sect.flags & 0xff); +} + +pub fn sectionAttrs(sect: macho.section_64) u32 { + return sect.flags & 0xffffff00; +} + +pub fn sectionIsCode(sect: macho.section_64) bool { + const attr = sectionAttrs(sect); + return attr & macho.S_ATTR_PURE_INSTRUCTIONS != 0 or attr & macho.S_ATTR_SOME_INSTRUCTIONS != 0; +} + +pub fn sectionIsDebug(sect: macho.section_64) bool { + return sectionAttrs(sect) & macho.S_ATTR_DEBUG != 0; +} + +pub fn sectionIsDontDeadStrip(sect: macho.section_64) bool { + return sectionAttrs(sect) & macho.S_ATTR_NO_DEAD_STRIP != 0; +} + +pub fn sectionIsDontDeadStripIfReferencesLive(sect: macho.section_64) bool { + return sectionAttrs(sect) & macho.S_ATTR_LIVE_SUPPORT != 0; +} + fn testRead(allocator: *Allocator, buffer: []const u8, expected: anytype) !void { var stream = io.fixedBufferStream(buffer); var given = try LoadCommand.read(allocator, stream.reader()); diff --git a/src/link/MachO/reloc.zig b/src/link/MachO/reloc.zig deleted file mode 100644 index 1e1b938196..0000000000 --- a/src/link/MachO/reloc.zig +++ /dev/null @@ -1,206 +0,0 @@ -const std = @import("std"); -const assert = std.debug.assert; -const log = std.log.scoped(.reloc); -const macho = std.macho; -const math = std.math; -const mem = std.mem; -const meta = std.meta; - -const aarch64 = @import("reloc/aarch64.zig"); -const x86_64 = @import("reloc/x86_64.zig"); - -const Allocator = mem.Allocator; -const Symbol = @import("Symbol.zig"); - -pub const Relocation = struct { - @"type": Type, - code: []u8, - offset: u32, - target: Target, - - pub fn cast(base: *Relocation, comptime T: type) ?*T { - if (base.@"type" != T.base_type) - return null; - - return @fieldParentPtr(T, "base", base); - } - - pub const ResolveArgs = struct { - source_addr: u64, - target_addr: u64, - subtractor: ?u64 = null, - source_source_sect_addr: ?u64 = null, - source_target_sect_addr: ?u64 = null, - }; - - pub fn resolve(base: *Relocation, args: ResolveArgs) !void { - log.debug("{s}", .{base.@"type"}); - log.debug(" | offset 0x{x}", .{base.offset}); - log.debug(" | source address 0x{x}", .{args.source_addr}); - log.debug(" | target address 0x{x}", .{args.target_addr}); - if (args.subtractor) |sub| - log.debug(" | subtractor address 0x{x}", .{sub}); - if (args.source_source_sect_addr) |addr| - log.debug(" | source source section address 0x{x}", .{addr}); - if (args.source_target_sect_addr) |addr| - log.debug(" | source target section address 0x{x}", .{addr}); - - return switch (base.@"type") { - .unsigned => @fieldParentPtr(Unsigned, "base", base).resolve(args), - .branch_aarch64 => @fieldParentPtr(aarch64.Branch, "base", base).resolve(args), - .page => @fieldParentPtr(aarch64.Page, "base", base).resolve(args), - .page_off => @fieldParentPtr(aarch64.PageOff, "base", base).resolve(args), - .got_page => @fieldParentPtr(aarch64.GotPage, "base", base).resolve(args), - .got_page_off => @fieldParentPtr(aarch64.GotPageOff, "base", base).resolve(args), - .pointer_to_got => @fieldParentPtr(aarch64.PointerToGot, "base", base).resolve(args), - .tlvp_page => @fieldParentPtr(aarch64.TlvpPage, "base", base).resolve(args), - .tlvp_page_off => @fieldParentPtr(aarch64.TlvpPageOff, "base", base).resolve(args), - .branch_x86_64 => @fieldParentPtr(x86_64.Branch, "base", base).resolve(args), - .signed => @fieldParentPtr(x86_64.Signed, "base", base).resolve(args), - .got_load => @fieldParentPtr(x86_64.GotLoad, "base", base).resolve(args), - .got => @fieldParentPtr(x86_64.Got, "base", base).resolve(args), - .tlv => @fieldParentPtr(x86_64.Tlv, "base", base).resolve(args), - }; - } - - pub const Type = enum { - branch_aarch64, - unsigned, - page, - page_off, - got_page, - got_page_off, - tlvp_page, - pointer_to_got, - tlvp_page_off, - branch_x86_64, - signed, - got_load, - got, - tlv, - }; - - pub const Target = union(enum) { - symbol: *Symbol, - section: u16, - - pub fn from_reloc(reloc: macho.relocation_info, symbols: []*Symbol) Target { - return if (reloc.r_extern == 1) .{ - .symbol = symbols[reloc.r_symbolnum], - } else .{ - .section = @intCast(u16, reloc.r_symbolnum - 1), - }; - } - }; -}; - -pub const Unsigned = struct { - base: Relocation, - subtractor: ?Relocation.Target = null, - /// Addend embedded directly in the relocation slot - addend: i64, - /// Extracted from r_length: - /// => 3 implies true - /// => 2 implies false - /// => * is unreachable - is_64bit: bool, - - pub const base_type: Relocation.Type = .unsigned; - - pub fn resolve(unsigned: Unsigned, args: Relocation.ResolveArgs) !void { - const addend = if (unsigned.base.target == .section) - unsigned.addend - @intCast(i64, args.source_target_sect_addr.?) - else - unsigned.addend; - - const result = if (args.subtractor) |subtractor| - @intCast(i64, args.target_addr) - @intCast(i64, subtractor) + addend - else - @intCast(i64, args.target_addr) + addend; - - log.debug(" | calculated addend 0x{x}", .{addend}); - log.debug(" | calculated unsigned value 0x{x}", .{result}); - - if (unsigned.is_64bit) { - mem.writeIntLittle( - u64, - unsigned.base.code[0..8], - @bitCast(u64, result), - ); - } else { - mem.writeIntLittle( - u32, - unsigned.base.code[0..4], - @truncate(u32, @bitCast(u64, result)), - ); - } - } -}; - -pub fn parse( - allocator: *Allocator, - arch: std.Target.Cpu.Arch, - code: []u8, - relocs: []const macho.relocation_info, - symbols: []*Symbol, -) ![]*Relocation { - var it = RelocIterator{ - .buffer = relocs, - }; - - switch (arch) { - .aarch64 => { - var parser = aarch64.Parser{ - .allocator = allocator, - .it = &it, - .code = code, - .parsed = std.ArrayList(*Relocation).init(allocator), - .symbols = symbols, - }; - defer parser.deinit(); - try parser.parse(); - - return parser.parsed.toOwnedSlice(); - }, - .x86_64 => { - var parser = x86_64.Parser{ - .allocator = allocator, - .it = &it, - .code = code, - .parsed = std.ArrayList(*Relocation).init(allocator), - .symbols = symbols, - }; - defer parser.deinit(); - try parser.parse(); - - return parser.parsed.toOwnedSlice(); - }, - else => unreachable, - } -} - -pub const RelocIterator = struct { - buffer: []const macho.relocation_info, - index: i32 = -1, - - pub fn next(self: *RelocIterator) ?macho.relocation_info { - self.index += 1; - if (self.index < self.buffer.len) { - const reloc = self.buffer[@intCast(u32, self.index)]; - log.debug("relocation", .{}); - log.debug(" | type = {}", .{reloc.r_type}); - log.debug(" | offset = {}", .{reloc.r_address}); - log.debug(" | PC = {}", .{reloc.r_pcrel == 1}); - log.debug(" | length = {}", .{reloc.r_length}); - log.debug(" | symbolnum = {}", .{reloc.r_symbolnum}); - log.debug(" | extern = {}", .{reloc.r_extern == 1}); - return reloc; - } - return null; - } - - pub fn peek(self: RelocIterator) macho.relocation_info { - assert(self.index + 1 < self.buffer.len); - return self.buffer[@intCast(u32, self.index + 1)]; - } -}; diff --git a/src/link/MachO/reloc/aarch64.zig b/src/link/MachO/reloc/aarch64.zig deleted file mode 100644 index 16b982bf90..0000000000 --- a/src/link/MachO/reloc/aarch64.zig +++ /dev/null @@ -1,628 +0,0 @@ -const std = @import("std"); -const aarch64 = @import("../../../codegen/aarch64.zig"); -const assert = std.debug.assert; -const log = std.log.scoped(.reloc); -const macho = std.macho; -const math = std.math; -const mem = std.mem; -const meta = std.meta; -const reloc = @import("../reloc.zig"); - -const Allocator = mem.Allocator; -const Relocation = reloc.Relocation; -const Symbol = @import("../Symbol.zig"); - -pub const Branch = struct { - base: Relocation, - /// Always .UnconditionalBranchImmediate - inst: aarch64.Instruction, - - pub const base_type: Relocation.Type = .branch_aarch64; - - pub fn resolve(branch: Branch, args: Relocation.ResolveArgs) !void { - const displacement = try math.cast(i28, @intCast(i64, args.target_addr) - @intCast(i64, args.source_addr)); - - log.debug(" | displacement 0x{x}", .{displacement}); - - var inst = branch.inst; - inst.unconditional_branch_immediate.imm26 = @truncate(u26, @bitCast(u28, displacement >> 2)); - mem.writeIntLittle(u32, branch.base.code[0..4], inst.toU32()); - } -}; - -pub const Page = struct { - base: Relocation, - addend: ?u32 = null, - /// Always .PCRelativeAddress - inst: aarch64.Instruction, - - pub const base_type: Relocation.Type = .page; - - pub fn resolve(page: Page, args: Relocation.ResolveArgs) !void { - const target_addr = if (page.addend) |addend| args.target_addr + addend else args.target_addr; - const source_page = @intCast(i32, args.source_addr >> 12); - const target_page = @intCast(i32, target_addr >> 12); - const pages = @bitCast(u21, @intCast(i21, target_page - source_page)); - - log.debug(" | calculated addend 0x{x}", .{page.addend}); - log.debug(" | moving by {} pages", .{pages}); - - var inst = page.inst; - inst.pc_relative_address.immhi = @truncate(u19, pages >> 2); - inst.pc_relative_address.immlo = @truncate(u2, pages); - - mem.writeIntLittle(u32, page.base.code[0..4], inst.toU32()); - } -}; - -pub const PageOff = struct { - base: Relocation, - addend: ?u32 = null, - op_kind: OpKind, - inst: aarch64.Instruction, - - pub const base_type: Relocation.Type = .page_off; - - pub const OpKind = enum { - arithmetic, - load_store, - }; - - pub fn resolve(page_off: PageOff, args: Relocation.ResolveArgs) !void { - const target_addr = if (page_off.addend) |addend| args.target_addr + addend else args.target_addr; - const narrowed = @truncate(u12, target_addr); - - log.debug(" | narrowed address within the page 0x{x}", .{narrowed}); - log.debug(" | {s} opcode", .{page_off.op_kind}); - - var inst = page_off.inst; - if (page_off.op_kind == .arithmetic) { - inst.add_subtract_immediate.imm12 = narrowed; - } else { - const offset: u12 = blk: { - if (inst.load_store_register.size == 0) { - if (inst.load_store_register.v == 1) { - // 128-bit SIMD is scaled by 16. - break :blk try math.divExact(u12, narrowed, 16); - } - // Otherwise, 8-bit SIMD or ldrb. - break :blk narrowed; - } else { - const denom: u4 = try math.powi(u4, 2, inst.load_store_register.size); - break :blk try math.divExact(u12, narrowed, denom); - } - }; - inst.load_store_register.offset = offset; - } - - mem.writeIntLittle(u32, page_off.base.code[0..4], inst.toU32()); - } -}; - -pub const GotPage = struct { - base: Relocation, - /// Always .PCRelativeAddress - inst: aarch64.Instruction, - - pub const base_type: Relocation.Type = .got_page; - - pub fn resolve(page: GotPage, args: Relocation.ResolveArgs) !void { - const source_page = @intCast(i32, args.source_addr >> 12); - const target_page = @intCast(i32, args.target_addr >> 12); - const pages = @bitCast(u21, @intCast(i21, target_page - source_page)); - - log.debug(" | moving by {} pages", .{pages}); - - var inst = page.inst; - inst.pc_relative_address.immhi = @truncate(u19, pages >> 2); - inst.pc_relative_address.immlo = @truncate(u2, pages); - - mem.writeIntLittle(u32, page.base.code[0..4], inst.toU32()); - } -}; - -pub const GotPageOff = struct { - base: Relocation, - /// Always .LoadStoreRegister with size = 3 for GOT indirection - inst: aarch64.Instruction, - - pub const base_type: Relocation.Type = .got_page_off; - - pub fn resolve(page_off: GotPageOff, args: Relocation.ResolveArgs) !void { - const narrowed = @truncate(u12, args.target_addr); - - log.debug(" | narrowed address within the page 0x{x}", .{narrowed}); - - var inst = page_off.inst; - const offset = try math.divExact(u12, narrowed, 8); - inst.load_store_register.offset = offset; - - mem.writeIntLittle(u32, page_off.base.code[0..4], inst.toU32()); - } -}; - -pub const PointerToGot = struct { - base: Relocation, - - pub const base_type: Relocation.Type = .pointer_to_got; - - pub fn resolve(ptr_to_got: PointerToGot, args: Relocation.ResolveArgs) !void { - const result = try math.cast(i32, @intCast(i64, args.target_addr) - @intCast(i64, args.source_addr)); - - log.debug(" | calculated value 0x{x}", .{result}); - - mem.writeIntLittle(u32, ptr_to_got.base.code[0..4], @bitCast(u32, result)); - } -}; - -pub const TlvpPage = struct { - base: Relocation, - /// Always .PCRelativeAddress - inst: aarch64.Instruction, - - pub const base_type: Relocation.Type = .tlvp_page; - - pub fn resolve(page: TlvpPage, args: Relocation.ResolveArgs) !void { - const source_page = @intCast(i32, args.source_addr >> 12); - const target_page = @intCast(i32, args.target_addr >> 12); - const pages = @bitCast(u21, @intCast(i21, target_page - source_page)); - - log.debug(" | moving by {} pages", .{pages}); - - var inst = page.inst; - inst.pc_relative_address.immhi = @truncate(u19, pages >> 2); - inst.pc_relative_address.immlo = @truncate(u2, pages); - - mem.writeIntLittle(u32, page.base.code[0..4], inst.toU32()); - } -}; - -pub const TlvpPageOff = struct { - base: Relocation, - /// Always .AddSubtractImmediate regardless of the source instruction. - /// This means, we always rewrite the instruction to add even if the - /// source instruction was an ldr. - inst: aarch64.Instruction, - - pub const base_type: Relocation.Type = .tlvp_page_off; - - pub fn resolve(page_off: TlvpPageOff, args: Relocation.ResolveArgs) !void { - const narrowed = @truncate(u12, args.target_addr); - - log.debug(" | narrowed address within the page 0x{x}", .{narrowed}); - - var inst = page_off.inst; - inst.add_subtract_immediate.imm12 = narrowed; - - mem.writeIntLittle(u32, page_off.base.code[0..4], inst.toU32()); - } -}; - -pub const Parser = struct { - allocator: *Allocator, - it: *reloc.RelocIterator, - code: []u8, - parsed: std.ArrayList(*Relocation), - symbols: []*Symbol, - addend: ?u32 = null, - subtractor: ?Relocation.Target = null, - - pub fn deinit(parser: *Parser) void { - parser.parsed.deinit(); - } - - pub fn parse(parser: *Parser) !void { - while (parser.it.next()) |rel| { - switch (@intToEnum(macho.reloc_type_arm64, rel.r_type)) { - .ARM64_RELOC_BRANCH26 => { - try parser.parseBranch(rel); - }, - .ARM64_RELOC_SUBTRACTOR => { - try parser.parseSubtractor(rel); - }, - .ARM64_RELOC_UNSIGNED => { - try parser.parseUnsigned(rel); - }, - .ARM64_RELOC_ADDEND => { - try parser.parseAddend(rel); - }, - .ARM64_RELOC_PAGE21, - .ARM64_RELOC_GOT_LOAD_PAGE21, - .ARM64_RELOC_TLVP_LOAD_PAGE21, - => { - try parser.parsePage(rel); - }, - .ARM64_RELOC_PAGEOFF12 => { - try parser.parsePageOff(rel); - }, - .ARM64_RELOC_GOT_LOAD_PAGEOFF12 => { - try parser.parseGotLoadPageOff(rel); - }, - .ARM64_RELOC_TLVP_LOAD_PAGEOFF12 => { - try parser.parseTlvpLoadPageOff(rel); - }, - .ARM64_RELOC_POINTER_TO_GOT => { - try parser.parsePointerToGot(rel); - }, - } - } - } - - fn parseAddend(parser: *Parser, rel: macho.relocation_info) !void { - const rel_type = @intToEnum(macho.reloc_type_arm64, rel.r_type); - assert(rel_type == .ARM64_RELOC_ADDEND); - assert(rel.r_pcrel == 0); - assert(rel.r_extern == 0); - assert(parser.addend == null); - - parser.addend = rel.r_symbolnum; - - // Verify ADDEND is followed by a load. - const next = @intToEnum(macho.reloc_type_arm64, parser.it.peek().r_type); - switch (next) { - .ARM64_RELOC_PAGE21, .ARM64_RELOC_PAGEOFF12 => {}, - else => { - log.err("unexpected relocation type: expected PAGE21 or PAGEOFF12, found {s}", .{next}); - return error.UnexpectedRelocationType; - }, - } - } - - fn parseBranch(parser: *Parser, rel: macho.relocation_info) !void { - const rel_type = @intToEnum(macho.reloc_type_arm64, rel.r_type); - assert(rel_type == .ARM64_RELOC_BRANCH26); - assert(rel.r_pcrel == 1); - assert(rel.r_length == 2); - - const offset = @intCast(u32, rel.r_address); - const inst = parser.code[offset..][0..4]; - const parsed_inst = aarch64.Instruction{ .unconditional_branch_immediate = mem.bytesToValue( - meta.TagPayload( - aarch64.Instruction, - aarch64.Instruction.unconditional_branch_immediate, - ), - inst, - ) }; - - var branch = try parser.allocator.create(Branch); - errdefer parser.allocator.destroy(branch); - - const target = Relocation.Target.from_reloc(rel, parser.symbols); - - branch.* = .{ - .base = .{ - .@"type" = .branch_aarch64, - .code = inst, - .offset = offset, - .target = target, - }, - .inst = parsed_inst, - }; - - log.debug(" | emitting {}", .{branch}); - try parser.parsed.append(&branch.base); - } - - fn parsePage(parser: *Parser, rel: macho.relocation_info) !void { - assert(rel.r_pcrel == 1); - assert(rel.r_length == 2); - - const rel_type = @intToEnum(macho.reloc_type_arm64, rel.r_type); - const target = Relocation.Target.from_reloc(rel, parser.symbols); - - const offset = @intCast(u32, rel.r_address); - const inst = parser.code[offset..][0..4]; - const parsed_inst = aarch64.Instruction{ .pc_relative_address = mem.bytesToValue(meta.TagPayload( - aarch64.Instruction, - aarch64.Instruction.pc_relative_address, - ), inst) }; - - const ptr: *Relocation = ptr: { - switch (rel_type) { - .ARM64_RELOC_PAGE21 => { - defer { - // Reset parser's addend state - parser.addend = null; - } - var page = try parser.allocator.create(Page); - errdefer parser.allocator.destroy(page); - - page.* = .{ - .base = .{ - .@"type" = .page, - .code = inst, - .offset = offset, - .target = target, - }, - .addend = parser.addend, - .inst = parsed_inst, - }; - - log.debug(" | emitting {}", .{page}); - - break :ptr &page.base; - }, - .ARM64_RELOC_GOT_LOAD_PAGE21 => { - var page = try parser.allocator.create(GotPage); - errdefer parser.allocator.destroy(page); - - page.* = .{ - .base = .{ - .@"type" = .got_page, - .code = inst, - .offset = offset, - .target = target, - }, - .inst = parsed_inst, - }; - - log.debug(" | emitting {}", .{page}); - - break :ptr &page.base; - }, - .ARM64_RELOC_TLVP_LOAD_PAGE21 => { - var page = try parser.allocator.create(TlvpPage); - errdefer parser.allocator.destroy(page); - - page.* = .{ - .base = .{ - .@"type" = .tlvp_page, - .code = inst, - .offset = offset, - .target = target, - }, - .inst = parsed_inst, - }; - - log.debug(" | emitting {}", .{page}); - - break :ptr &page.base; - }, - else => unreachable, - } - }; - - try parser.parsed.append(ptr); - } - - fn parsePageOff(parser: *Parser, rel: macho.relocation_info) !void { - defer { - // Reset parser's addend state - parser.addend = null; - } - - const rel_type = @intToEnum(macho.reloc_type_arm64, rel.r_type); - assert(rel_type == .ARM64_RELOC_PAGEOFF12); - assert(rel.r_pcrel == 0); - assert(rel.r_length == 2); - - const offset = @intCast(u32, rel.r_address); - const inst = parser.code[offset..][0..4]; - - var op_kind: PageOff.OpKind = undefined; - var parsed_inst: aarch64.Instruction = undefined; - if (isArithmeticOp(inst)) { - op_kind = .arithmetic; - parsed_inst = .{ .add_subtract_immediate = mem.bytesToValue(meta.TagPayload( - aarch64.Instruction, - aarch64.Instruction.add_subtract_immediate, - ), inst) }; - } else { - op_kind = .load_store; - parsed_inst = .{ .load_store_register = mem.bytesToValue(meta.TagPayload( - aarch64.Instruction, - aarch64.Instruction.load_store_register, - ), inst) }; - } - const target = Relocation.Target.from_reloc(rel, parser.symbols); - - var page_off = try parser.allocator.create(PageOff); - errdefer parser.allocator.destroy(page_off); - - page_off.* = .{ - .base = .{ - .@"type" = .page_off, - .code = inst, - .offset = offset, - .target = target, - }, - .op_kind = op_kind, - .inst = parsed_inst, - .addend = parser.addend, - }; - - log.debug(" | emitting {}", .{page_off}); - try parser.parsed.append(&page_off.base); - } - - fn parseGotLoadPageOff(parser: *Parser, rel: macho.relocation_info) !void { - const rel_type = @intToEnum(macho.reloc_type_arm64, rel.r_type); - assert(rel_type == .ARM64_RELOC_GOT_LOAD_PAGEOFF12); - assert(rel.r_pcrel == 0); - assert(rel.r_length == 2); - - const offset = @intCast(u32, rel.r_address); - const inst = parser.code[offset..][0..4]; - assert(!isArithmeticOp(inst)); - - const parsed_inst = mem.bytesToValue(meta.TagPayload( - aarch64.Instruction, - aarch64.Instruction.load_store_register, - ), inst); - assert(parsed_inst.size == 3); - - const target = Relocation.Target.from_reloc(rel, parser.symbols); - - var page_off = try parser.allocator.create(GotPageOff); - errdefer parser.allocator.destroy(page_off); - - page_off.* = .{ - .base = .{ - .@"type" = .got_page_off, - .code = inst, - .offset = offset, - .target = target, - }, - .inst = .{ - .load_store_register = parsed_inst, - }, - }; - - log.debug(" | emitting {}", .{page_off}); - try parser.parsed.append(&page_off.base); - } - - fn parseTlvpLoadPageOff(parser: *Parser, rel: macho.relocation_info) !void { - const rel_type = @intToEnum(macho.reloc_type_arm64, rel.r_type); - assert(rel_type == .ARM64_RELOC_TLVP_LOAD_PAGEOFF12); - assert(rel.r_pcrel == 0); - assert(rel.r_length == 2); - - const RegInfo = struct { - rd: u5, - rn: u5, - size: u1, - }; - - const offset = @intCast(u32, rel.r_address); - const inst = parser.code[offset..][0..4]; - const parsed: RegInfo = parsed: { - if (isArithmeticOp(inst)) { - const parsed_inst = mem.bytesAsValue(meta.TagPayload( - aarch64.Instruction, - aarch64.Instruction.add_subtract_immediate, - ), inst); - break :parsed .{ - .rd = parsed_inst.rd, - .rn = parsed_inst.rn, - .size = parsed_inst.sf, - }; - } else { - const parsed_inst = mem.bytesAsValue(meta.TagPayload( - aarch64.Instruction, - aarch64.Instruction.load_store_register, - ), inst); - break :parsed .{ - .rd = parsed_inst.rt, - .rn = parsed_inst.rn, - .size = @truncate(u1, parsed_inst.size), - }; - } - }; - - const target = Relocation.Target.from_reloc(rel, parser.symbols); - - var page_off = try parser.allocator.create(TlvpPageOff); - errdefer parser.allocator.destroy(page_off); - - page_off.* = .{ - .base = .{ - .@"type" = .tlvp_page_off, - .code = inst, - .offset = offset, - .target = target, - }, - .inst = .{ - .add_subtract_immediate = .{ - .rd = parsed.rd, - .rn = parsed.rn, - .imm12 = 0, // This will be filled when target addresses are known. - .sh = 0, - .s = 0, - .op = 0, - .sf = parsed.size, - }, - }, - }; - - log.debug(" | emitting {}", .{page_off}); - try parser.parsed.append(&page_off.base); - } - - fn parseSubtractor(parser: *Parser, rel: macho.relocation_info) !void { - const rel_type = @intToEnum(macho.reloc_type_arm64, rel.r_type); - assert(rel_type == .ARM64_RELOC_SUBTRACTOR); - assert(rel.r_pcrel == 0); - assert(parser.subtractor == null); - - parser.subtractor = Relocation.Target.from_reloc(rel, parser.symbols); - - // Verify SUBTRACTOR is followed by UNSIGNED. - const next = @intToEnum(macho.reloc_type_arm64, parser.it.peek().r_type); - if (next != .ARM64_RELOC_UNSIGNED) { - log.err("unexpected relocation type: expected UNSIGNED, found {s}", .{next}); - return error.UnexpectedRelocationType; - } - } - - fn parseUnsigned(parser: *Parser, rel: macho.relocation_info) !void { - defer { - // Reset parser's subtractor state - parser.subtractor = null; - } - - const rel_type = @intToEnum(macho.reloc_type_arm64, rel.r_type); - assert(rel_type == .ARM64_RELOC_UNSIGNED); - assert(rel.r_pcrel == 0); - - var unsigned = try parser.allocator.create(reloc.Unsigned); - errdefer parser.allocator.destroy(unsigned); - - const target = Relocation.Target.from_reloc(rel, parser.symbols); - const is_64bit: bool = switch (rel.r_length) { - 3 => true, - 2 => false, - else => unreachable, - }; - const offset = @intCast(u32, rel.r_address); - const addend: i64 = if (is_64bit) - mem.readIntLittle(i64, parser.code[offset..][0..8]) - else - mem.readIntLittle(i32, parser.code[offset..][0..4]); - - unsigned.* = .{ - .base = .{ - .@"type" = .unsigned, - .code = if (is_64bit) parser.code[offset..][0..8] else parser.code[offset..][0..4], - .offset = offset, - .target = target, - }, - .subtractor = parser.subtractor, - .is_64bit = is_64bit, - .addend = addend, - }; - - log.debug(" | emitting {}", .{unsigned}); - try parser.parsed.append(&unsigned.base); - } - - fn parsePointerToGot(parser: *Parser, rel: macho.relocation_info) !void { - const rel_type = @intToEnum(macho.reloc_type_arm64, rel.r_type); - assert(rel_type == .ARM64_RELOC_POINTER_TO_GOT); - assert(rel.r_pcrel == 1); - assert(rel.r_length == 2); - - var ptr_to_got = try parser.allocator.create(PointerToGot); - errdefer parser.allocator.destroy(ptr_to_got); - - const target = Relocation.Target.from_reloc(rel, parser.symbols); - const offset = @intCast(u32, rel.r_address); - - ptr_to_got.* = .{ - .base = .{ - .@"type" = .pointer_to_got, - .code = parser.code[offset..][0..4], - .offset = offset, - .target = target, - }, - }; - - log.debug(" | emitting {}", .{ptr_to_got}); - try parser.parsed.append(&ptr_to_got.base); - } -}; - -inline fn isArithmeticOp(inst: *const [4]u8) bool { - const group_decode = @truncate(u5, inst[3]); - return ((group_decode >> 2) == 4); -} diff --git a/src/link/MachO/reloc/x86_64.zig b/src/link/MachO/reloc/x86_64.zig deleted file mode 100644 index 6df68b6b3e..0000000000 --- a/src/link/MachO/reloc/x86_64.zig +++ /dev/null @@ -1,345 +0,0 @@ -const std = @import("std"); -const assert = std.debug.assert; -const log = std.log.scoped(.reloc); -const macho = std.macho; -const math = std.math; -const mem = std.mem; -const meta = std.meta; -const reloc = @import("../reloc.zig"); - -const Allocator = mem.Allocator; -const Relocation = reloc.Relocation; -const Symbol = @import("../Symbol.zig"); - -pub const Branch = struct { - base: Relocation, - - pub const base_type: Relocation.Type = .branch_x86_64; - - pub fn resolve(branch: Branch, args: Relocation.ResolveArgs) !void { - const displacement = try math.cast(i32, @intCast(i64, args.target_addr) - @intCast(i64, args.source_addr) - 4); - log.debug(" | displacement 0x{x}", .{displacement}); - mem.writeIntLittle(u32, branch.base.code[0..4], @bitCast(u32, displacement)); - } -}; - -pub const Signed = struct { - base: Relocation, - addend: i32, - correction: i4, - - pub const base_type: Relocation.Type = .signed; - - pub fn resolve(signed: Signed, args: Relocation.ResolveArgs) !void { - const target_addr = target_addr: { - if (signed.base.target == .section) { - const source_target = @intCast(i64, args.source_source_sect_addr.?) + @intCast(i64, signed.base.offset) + signed.addend + 4; - const source_disp = source_target - @intCast(i64, args.source_target_sect_addr.?); - break :target_addr @intCast(i64, args.target_addr) + source_disp; - } - break :target_addr @intCast(i64, args.target_addr) + signed.addend; - }; - const displacement = try math.cast( - i32, - target_addr - @intCast(i64, args.source_addr) - signed.correction - 4, - ); - - log.debug(" | addend 0x{x}", .{signed.addend}); - log.debug(" | correction 0x{x}", .{signed.correction}); - log.debug(" | displacement 0x{x}", .{displacement}); - - mem.writeIntLittle(u32, signed.base.code[0..4], @bitCast(u32, displacement)); - } -}; - -pub const GotLoad = struct { - base: Relocation, - - pub const base_type: Relocation.Type = .got_load; - - pub fn resolve(got_load: GotLoad, args: Relocation.ResolveArgs) !void { - const displacement = try math.cast(i32, @intCast(i64, args.target_addr) - @intCast(i64, args.source_addr) - 4); - log.debug(" | displacement 0x{x}", .{displacement}); - mem.writeIntLittle(u32, got_load.base.code[0..4], @bitCast(u32, displacement)); - } -}; - -pub const Got = struct { - base: Relocation, - addend: i32, - - pub const base_type: Relocation.Type = .got; - - pub fn resolve(got: Got, args: Relocation.ResolveArgs) !void { - const displacement = try math.cast( - i32, - @intCast(i64, args.target_addr) - @intCast(i64, args.source_addr) - 4 + got.addend, - ); - log.debug(" | displacement 0x{x}", .{displacement}); - mem.writeIntLittle(u32, got.base.code[0..4], @bitCast(u32, displacement)); - } -}; - -pub const Tlv = struct { - base: Relocation, - op: *u8, - - pub const base_type: Relocation.Type = .tlv; - - pub fn resolve(tlv: Tlv, args: Relocation.ResolveArgs) !void { - // We need to rewrite the opcode from movq to leaq. - tlv.op.* = 0x8d; - log.debug(" | rewriting op to leaq", .{}); - - const displacement = try math.cast(i32, @intCast(i64, args.target_addr) - @intCast(i64, args.source_addr) - 4); - log.debug(" | displacement 0x{x}", .{displacement}); - - mem.writeIntLittle(u32, tlv.base.code[0..4], @bitCast(u32, displacement)); - } -}; - -pub const Parser = struct { - allocator: *Allocator, - it: *reloc.RelocIterator, - code: []u8, - parsed: std.ArrayList(*Relocation), - symbols: []*Symbol, - subtractor: ?Relocation.Target = null, - - pub fn deinit(parser: *Parser) void { - parser.parsed.deinit(); - } - - pub fn parse(parser: *Parser) !void { - while (parser.it.next()) |rel| { - switch (@intToEnum(macho.reloc_type_x86_64, rel.r_type)) { - .X86_64_RELOC_BRANCH => { - try parser.parseBranch(rel); - }, - .X86_64_RELOC_SUBTRACTOR => { - try parser.parseSubtractor(rel); - }, - .X86_64_RELOC_UNSIGNED => { - try parser.parseUnsigned(rel); - }, - .X86_64_RELOC_SIGNED, - .X86_64_RELOC_SIGNED_1, - .X86_64_RELOC_SIGNED_2, - .X86_64_RELOC_SIGNED_4, - => { - try parser.parseSigned(rel); - }, - .X86_64_RELOC_GOT_LOAD => { - try parser.parseGotLoad(rel); - }, - .X86_64_RELOC_GOT => { - try parser.parseGot(rel); - }, - .X86_64_RELOC_TLV => { - try parser.parseTlv(rel); - }, - } - } - } - - fn parseBranch(parser: *Parser, rel: macho.relocation_info) !void { - const rel_type = @intToEnum(macho.reloc_type_x86_64, rel.r_type); - assert(rel_type == .X86_64_RELOC_BRANCH); - assert(rel.r_pcrel == 1); - assert(rel.r_length == 2); - - const offset = @intCast(u32, rel.r_address); - const inst = parser.code[offset..][0..4]; - - var branch = try parser.allocator.create(Branch); - errdefer parser.allocator.destroy(branch); - - const target = Relocation.Target.from_reloc(rel, parser.symbols); - - branch.* = .{ - .base = .{ - .@"type" = .branch_x86_64, - .code = inst, - .offset = offset, - .target = target, - }, - }; - - log.debug(" | emitting {}", .{branch}); - try parser.parsed.append(&branch.base); - } - - fn parseSigned(parser: *Parser, rel: macho.relocation_info) !void { - assert(rel.r_pcrel == 1); - assert(rel.r_length == 2); - - const rel_type = @intToEnum(macho.reloc_type_x86_64, rel.r_type); - const target = Relocation.Target.from_reloc(rel, parser.symbols); - - const offset = @intCast(u32, rel.r_address); - const inst = parser.code[offset..][0..4]; - const correction: i4 = switch (rel_type) { - .X86_64_RELOC_SIGNED => 0, - .X86_64_RELOC_SIGNED_1 => 1, - .X86_64_RELOC_SIGNED_2 => 2, - .X86_64_RELOC_SIGNED_4 => 4, - else => unreachable, - }; - const addend = mem.readIntLittle(i32, inst) + correction; - - var signed = try parser.allocator.create(Signed); - errdefer parser.allocator.destroy(signed); - - signed.* = .{ - .base = .{ - .@"type" = .signed, - .code = inst, - .offset = offset, - .target = target, - }, - .addend = addend, - .correction = correction, - }; - - log.debug(" | emitting {}", .{signed}); - try parser.parsed.append(&signed.base); - } - - fn parseGotLoad(parser: *Parser, rel: macho.relocation_info) !void { - const rel_type = @intToEnum(macho.reloc_type_x86_64, rel.r_type); - assert(rel_type == .X86_64_RELOC_GOT_LOAD); - assert(rel.r_pcrel == 1); - assert(rel.r_length == 2); - - const offset = @intCast(u32, rel.r_address); - const inst = parser.code[offset..][0..4]; - const target = Relocation.Target.from_reloc(rel, parser.symbols); - - var got_load = try parser.allocator.create(GotLoad); - errdefer parser.allocator.destroy(got_load); - - got_load.* = .{ - .base = .{ - .@"type" = .got_load, - .code = inst, - .offset = offset, - .target = target, - }, - }; - - log.debug(" | emitting {}", .{got_load}); - try parser.parsed.append(&got_load.base); - } - - fn parseGot(parser: *Parser, rel: macho.relocation_info) !void { - const rel_type = @intToEnum(macho.reloc_type_x86_64, rel.r_type); - assert(rel_type == .X86_64_RELOC_GOT); - assert(rel.r_pcrel == 1); - assert(rel.r_length == 2); - - const offset = @intCast(u32, rel.r_address); - const inst = parser.code[offset..][0..4]; - const target = Relocation.Target.from_reloc(rel, parser.symbols); - const addend = mem.readIntLittle(i32, inst); - - var got = try parser.allocator.create(Got); - errdefer parser.allocator.destroy(got); - - got.* = .{ - .base = .{ - .@"type" = .got, - .code = inst, - .offset = offset, - .target = target, - }, - .addend = addend, - }; - - log.debug(" | emitting {}", .{got}); - try parser.parsed.append(&got.base); - } - - fn parseTlv(parser: *Parser, rel: macho.relocation_info) !void { - const rel_type = @intToEnum(macho.reloc_type_x86_64, rel.r_type); - assert(rel_type == .X86_64_RELOC_TLV); - assert(rel.r_pcrel == 1); - assert(rel.r_length == 2); - - const offset = @intCast(u32, rel.r_address); - const inst = parser.code[offset..][0..4]; - const target = Relocation.Target.from_reloc(rel, parser.symbols); - - var tlv = try parser.allocator.create(Tlv); - errdefer parser.allocator.destroy(tlv); - - tlv.* = .{ - .base = .{ - .@"type" = .tlv, - .code = inst, - .offset = offset, - .target = target, - }, - .op = &parser.code[offset - 2], - }; - - log.debug(" | emitting {}", .{tlv}); - try parser.parsed.append(&tlv.base); - } - - fn parseSubtractor(parser: *Parser, rel: macho.relocation_info) !void { - const rel_type = @intToEnum(macho.reloc_type_x86_64, rel.r_type); - assert(rel_type == .X86_64_RELOC_SUBTRACTOR); - assert(rel.r_pcrel == 0); - assert(parser.subtractor == null); - - parser.subtractor = Relocation.Target.from_reloc(rel, parser.symbols); - - // Verify SUBTRACTOR is followed by UNSIGNED. - const next = @intToEnum(macho.reloc_type_x86_64, parser.it.peek().r_type); - if (next != .X86_64_RELOC_UNSIGNED) { - log.err("unexpected relocation type: expected UNSIGNED, found {s}", .{next}); - return error.UnexpectedRelocationType; - } - } - - fn parseUnsigned(parser: *Parser, rel: macho.relocation_info) !void { - defer { - // Reset parser's subtractor state - parser.subtractor = null; - } - - const rel_type = @intToEnum(macho.reloc_type_x86_64, rel.r_type); - assert(rel_type == .X86_64_RELOC_UNSIGNED); - assert(rel.r_pcrel == 0); - - var unsigned = try parser.allocator.create(reloc.Unsigned); - errdefer parser.allocator.destroy(unsigned); - - const target = Relocation.Target.from_reloc(rel, parser.symbols); - const is_64bit: bool = switch (rel.r_length) { - 3 => true, - 2 => false, - else => unreachable, - }; - const offset = @intCast(u32, rel.r_address); - const addend: i64 = if (is_64bit) - mem.readIntLittle(i64, parser.code[offset..][0..8]) - else - mem.readIntLittle(i32, parser.code[offset..][0..4]); - - unsigned.* = .{ - .base = .{ - .@"type" = .unsigned, - .code = if (is_64bit) parser.code[offset..][0..8] else parser.code[offset..][0..4], - .offset = offset, - .target = target, - }, - .subtractor = parser.subtractor, - .is_64bit = is_64bit, - .addend = addend, - }; - - log.debug(" | emitting {}", .{unsigned}); - try parser.parsed.append(&unsigned.base); - } -}; diff --git a/src/main.zig b/src/main.zig index d3c7d024a1..547744ab94 100644 --- a/src/main.zig +++ b/src/main.zig @@ -1655,6 +1655,18 @@ fn buildOutputType( } } + if (use_lld) |opt| { + if (opt and cross_target.isDarwin()) { + fatal("LLD requested with Mach-O object format. Only the self-hosted linker is supported for this target.", .{}); + } + } + + if (want_lto) |opt| { + if (opt and cross_target.isDarwin()) { + fatal("LTO is not yet supported with the Mach-O object format. More details: https://github.com/ziglang/zig/issues/8680", .{}); + } + } + if (comptime std.Target.current.isDarwin()) { // If we want to link against frameworks, we need system headers. if (framework_dirs.items.len > 0 or frameworks.items.len > 0)