diff --git a/CMakeLists.txt b/CMakeLists.txt index 5b1d20f671..54ff9b6179 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -574,11 +574,11 @@ set(ZIG_STAGE2_SOURCES "${CMAKE_SOURCE_DIR}/src/link/Plan9/aout.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/Archive.zig" + "${CMAKE_SOURCE_DIR}/src/link/MachO/Atom.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/CodeSignature.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/DebugSymbols.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/Dylib.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/Object.zig" - "${CMAKE_SOURCE_DIR}/src/link/MachO/TextBlock.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/Trie.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/bind.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/commands.zig" diff --git a/lib/std/macho.zig b/lib/std/macho.zig index 3c41b65522..b4d7964b5e 100644 --- a/lib/std/macho.zig +++ b/lib/std/macho.zig @@ -601,35 +601,35 @@ pub const segment_command = extern struct { /// command and their size is reflected in cmdsize. pub const segment_command_64 = extern struct { /// LC_SEGMENT_64 - cmd: u32, + cmd: u32 = LC_SEGMENT_64, /// includes sizeof section_64 structs - cmdsize: u32, + cmdsize: u32 = @sizeOf(segment_command_64), /// segment name segname: [16]u8, /// memory address of this segment - vmaddr: u64, + vmaddr: u64 = 0, /// memory size of this segment - vmsize: u64, + vmsize: u64 = 0, /// file offset of this segment - fileoff: u64, + fileoff: u64 = 0, /// amount to map from the file - filesize: u64, + filesize: u64 = 0, /// maximum VM protection - maxprot: vm_prot_t, + maxprot: vm_prot_t = VM_PROT_NONE, /// initial VM protection - initprot: vm_prot_t, + initprot: vm_prot_t = VM_PROT_NONE, /// number of sections in segment - nsects: u32, - flags: u32, + nsects: u32 = 0, + flags: u32 = 0, }; /// A segment is made up of zero or more sections. Non-MH_OBJECT files have @@ -700,34 +700,34 @@ pub const section_64 = extern struct { segname: [16]u8, /// memory address of this section - addr: u64, + addr: u64 = 0, /// size in bytes of this section - size: u64, + size: u64 = 0, /// file offset of this section - offset: u32, + offset: u32 = 0, /// section alignment (power of 2) - @"align": u32, + @"align": u32 = 0, /// file offset of relocation entries - reloff: u32, + reloff: u32 = 0, /// number of relocation entries - nreloc: u32, + nreloc: u32 = 0, /// flags (section type and attributes - flags: u32, + flags: u32 = S_REGULAR, /// reserved (for offset or index) - reserved1: u32, + reserved1: u32 = 0, /// reserved (for count or sizeof) - reserved2: u32, + reserved2: u32 = 0, /// reserved - reserved3: u32, + reserved3: u32 = 0, }; pub const nlist = extern struct { diff --git a/src/codegen.zig b/src/codegen.zig index e28192ec1f..1995d8baa7 100644 --- a/src/codegen.zig +++ b/src/codegen.zig @@ -2816,24 +2816,21 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { if (self.air.value(callee)) |func_value| { if (func_value.castTag(.function)) |func_payload| { const func = func_payload.data; - const got_addr = blk: { - const seg = macho_file.load_commands.items[macho_file.data_const_segment_cmd_index.?].Segment; - const got = seg.sections.items[macho_file.got_section_index.?]; - const got_index = macho_file.got_entries_map.get(.{ - .where = .local, - .where_index = func.owner_decl.link.macho.local_sym_index, - }) orelse unreachable; - break :blk got.addr + got_index * @sizeOf(u64); - }; + // TODO I'm hacking my way through here by repurposing .memory for storing + // index to the GOT target symbol index. switch (arch) { .x86_64 => { - try self.genSetReg(Type.initTag(.u64), .rax, .{ .memory = got_addr }); + try self.genSetReg(Type.initTag(.u64), .rax, .{ + .memory = func.owner_decl.link.macho.local_sym_index, + }); // callq *%rax try self.code.ensureCapacity(self.code.items.len + 2); self.code.appendSliceAssumeCapacity(&[2]u8{ 0xff, 0xd0 }); }, .aarch64 => { - try self.genSetReg(Type.initTag(.u64), .x30, .{ .memory = got_addr }); + try self.genSetReg(Type.initTag(.u64), .x30, .{ + .memory = func.owner_decl.link.macho.local_sym_index, + }); // blr x30 writeInt(u32, try self.code.addManyAsArray(4), Instruction.blr(.x30).toU32()); }, @@ -4345,29 +4342,20 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { }).toU32()); if (self.bin_file.cast(link.File.MachO)) |macho_file| { - // TODO this is super awkward. We are reversing the address of the GOT entry here. - // We should probably have it cached or move the reloc adding somewhere else. - const got_addr = blk: { - const seg = macho_file.load_commands.items[macho_file.data_const_segment_cmd_index.?].Segment; - const got = seg.sections.items[macho_file.got_section_index.?]; - break :blk got.addr; - }; - const where_index = blk: for (macho_file.got_entries.items) |key, id| { - if (got_addr + id * @sizeOf(u64) == addr) break :blk key.where_index; - } else unreachable; + // TODO I think the reloc might be in the wrong place. const decl = macho_file.active_decl.?; // Page reloc for adrp instruction. try decl.link.macho.relocs.append(self.bin_file.allocator, .{ .offset = offset, .where = .local, - .where_index = where_index, + .where_index = @intCast(u32, addr), .payload = .{ .page = .{ .kind = .got } }, }); // Pageoff reloc for adrp instruction. try decl.link.macho.relocs.append(self.bin_file.allocator, .{ .offset = offset + 4, .where = .local, - .where_index = where_index, + .where_index = @intCast(u32, addr), .payload = .{ .page_off = .{ .kind = .got } }, }); } else { @@ -4628,22 +4616,13 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { const offset = @intCast(u32, self.code.items.len); if (self.bin_file.cast(link.File.MachO)) |macho_file| { - // TODO this is super awkward. We are reversing the address of the GOT entry here. - // We should probably have it cached or move the reloc adding somewhere else. - const got_addr = blk: { - const seg = macho_file.load_commands.items[macho_file.data_const_segment_cmd_index.?].Segment; - const got = seg.sections.items[macho_file.got_section_index.?]; - break :blk got.addr; - }; - const where_index = blk: for (macho_file.got_entries.items) |key, id| { - if (got_addr + id * @sizeOf(u64) == x) break :blk key.where_index; - } else unreachable; + // TODO I think the reloc might be in the wrong place. const decl = macho_file.active_decl.?; // Load reloc for LEA instruction. try decl.link.macho.relocs.append(self.bin_file.allocator, .{ .offset = offset - 4, .where = .local, - .where_index = where_index, + .where_index = @intCast(u32, x), .payload = .{ .load = .{ .kind = .got } }, }); } else { @@ -4869,17 +4848,10 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { const got = &elf_file.program_headers.items[elf_file.phdr_got_index.?]; const got_addr = got.p_vaddr + decl.link.elf.offset_table_index * ptr_bytes; return MCValue{ .memory = got_addr }; - } else if (self.bin_file.cast(link.File.MachO)) |macho_file| { - const got_addr = blk: { - const seg = macho_file.load_commands.items[macho_file.data_const_segment_cmd_index.?].Segment; - const got = seg.sections.items[macho_file.got_section_index.?]; - const got_index = macho_file.got_entries_map.get(.{ - .where = .local, - .where_index = decl.link.macho.local_sym_index, - }) orelse unreachable; - break :blk got.addr + got_index * ptr_bytes; - }; - return MCValue{ .memory = got_addr }; + } else if (self.bin_file.cast(link.File.MachO)) |_| { + // TODO I'm hacking my way through here by repurposing .memory for storing + // index to the GOT target symbol index. + return MCValue{ .memory = decl.link.macho.local_sym_index }; } else if (self.bin_file.cast(link.File.Coff)) |coff_file| { const got_addr = coff_file.offset_table_virtual_address + decl.link.coff.offset_table_index * ptr_bytes; return MCValue{ .memory = got_addr }; diff --git a/src/link/MachO.zig b/src/link/MachO.zig index de5e789b8c..ce00c85dea 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -24,6 +24,7 @@ const trace = @import("../tracy.zig").trace; const Air = @import("../Air.zig"); const Allocator = mem.Allocator; const Archive = @import("MachO/Archive.zig"); +const Atom = @import("MachO/Atom.zig"); const Cache = @import("../Cache.zig"); const CodeSignature = @import("MachO/CodeSignature.zig"); const Compilation = @import("../Compilation.zig"); @@ -39,9 +40,10 @@ const Module = @import("../Module.zig"); const SegmentCommand = commands.SegmentCommand; const StringIndexAdapter = std.hash_map.StringIndexAdapter; const StringIndexContext = std.hash_map.StringIndexContext; -pub const TextBlock = @import("MachO/TextBlock.zig"); const Trie = @import("MachO/Trie.zig"); +pub const TextBlock = Atom; + pub const base_tag: File.Tag = File.Tag.macho; base: File, @@ -95,9 +97,6 @@ source_version_cmd_index: ?u16 = null, build_version_cmd_index: ?u16 = null, uuid_cmd_index: ?u16 = null, code_signature_cmd_index: ?u16 = null, -/// Path to libSystem -/// TODO this is obsolete, remove it. -libsystem_cmd_index: ?u16 = null, // __TEXT segment sections text_section_index: ?u16 = null, @@ -132,65 +131,59 @@ tlv_bss_section_index: ?u16 = null, la_symbol_ptr_section_index: ?u16 = null, data_section_index: ?u16 = null, bss_section_index: ?u16 = null, -common_section_index: ?u16 = null, objc_const_section_index: ?u16 = null, objc_selrefs_section_index: ?u16 = null, objc_classrefs_section_index: ?u16 = null, objc_data_section_index: ?u16 = null, +bss_file_offset: u32 = 0, +tlv_bss_file_offset: u32 = 0, + locals: std.ArrayListUnmanaged(macho.nlist_64) = .{}, globals: std.ArrayListUnmanaged(macho.nlist_64) = .{}, undefs: std.ArrayListUnmanaged(macho.nlist_64) = .{}, symbol_resolver: std.AutoHashMapUnmanaged(u32, SymbolWithLoc) = .{}, +unresolved: std.AutoArrayHashMapUnmanaged(u32, enum { + none, + stub, + got, +}) = .{}, +tentatives: std.AutoArrayHashMapUnmanaged(u32, void) = .{}, locals_free_list: std.ArrayListUnmanaged(u32) = .{}, globals_free_list: std.ArrayListUnmanaged(u32) = .{}, -stub_helper_stubs_start_off: ?u64 = null, +dyld_stub_binder_index: ?u32 = null, +dyld_private_atom: ?*Atom = null, +stub_helper_preamble_atom: ?*Atom = null, strtab: std.ArrayListUnmanaged(u8) = .{}, strtab_dir: std.HashMapUnmanaged(u32, void, StringIndexContext, std.hash_map.default_max_load_percentage) = .{}, -got_entries: std.ArrayListUnmanaged(GotIndirectionKey) = .{}, -got_entries_map: std.AutoHashMapUnmanaged(GotIndirectionKey, u32) = .{}, - -got_entries_free_list: std.ArrayListUnmanaged(u32) = .{}, - -stubs: std.ArrayListUnmanaged(u32) = .{}, -stubs_map: std.AutoHashMapUnmanaged(u32, u32) = .{}, +got_entries_map: std.AutoArrayHashMapUnmanaged(GotIndirectionKey, *Atom) = .{}, +stubs_map: std.AutoArrayHashMapUnmanaged(u32, *Atom) = .{}, error_flags: File.ErrorFlags = File.ErrorFlags{}, -got_entries_count_dirty: bool = false, load_commands_dirty: bool = false, -rebase_info_dirty: bool = false, -binding_info_dirty: bool = false, -lazy_binding_info_dirty: bool = false, -export_info_dirty: bool = false, - -strtab_dirty: bool = false, -strtab_needs_relocation: bool = false, - +sections_order_dirty: bool = false, has_dices: bool = false, has_stabs: bool = false, +/// A helper var to indicate if we are at the start of the incremental updates, or +/// already somewhere further along the update-and-run chain. +/// TODO once we add opening a prelinked output binary from file, this will become +/// obsolete as we will carry on where we left off. +cold_start: bool = false, section_ordinals: std.AutoArrayHashMapUnmanaged(MatchingSection, void) = .{}, -pending_updates: std.ArrayListUnmanaged(struct { - kind: enum { - got, - stub, - }, - index: u32, -}) = .{}, - -/// A list of text blocks that have surplus capacity. This list can have false +/// A list of atoms that have surplus capacity. This list can have false /// positives, as functions grow and shrink over time, only sometimes being added /// or removed from the freelist. /// -/// A text block has surplus capacity when its overcapacity value is greater than -/// padToIdeal(minimum_text_block_size). That is, when it has so +/// An atom has surplus capacity when its overcapacity value is greater than +/// padToIdeal(minimum_atom_size). That is, when it has so /// much extra capacity, that we could fit a small new symbol in it, itself with /// ideal_capacity or more. /// @@ -198,25 +191,23 @@ pending_updates: std.ArrayListUnmanaged(struct { /// /// Overcapacity is measured by actual_capacity - ideal_capacity. Note that /// overcapacity can be negative. A simple way to have negative overcapacity is to -/// allocate a fresh text block, which will have ideal capacity, and then grow it +/// allocate a fresh atom, which will have ideal capacity, and then grow it /// by 1 byte. It will then have -1 overcapacity. -text_block_free_list: std.ArrayListUnmanaged(*TextBlock) = .{}, +atom_free_lists: std.AutoHashMapUnmanaged(MatchingSection, std.ArrayListUnmanaged(*Atom)) = .{}, -/// Pointer to the last allocated text block -last_text_block: ?*TextBlock = null, +/// Pointer to the last allocated atom +atoms: std.AutoHashMapUnmanaged(MatchingSection, *Atom) = .{}, -/// List of TextBlocks that are owned directly by the linker. -/// Currently these are only TextBlocks that are the result of linking -/// object files. TextBlock which take part in incremental linking are +/// List of atoms that are owned directly by the linker. +/// Currently these are only atoms that are the result of linking +/// object files. Atoms which take part in incremental linking are /// at present owned by Module.Decl. /// TODO consolidate this. -managed_blocks: std.ArrayListUnmanaged(*TextBlock) = .{}, - -blocks: std.AutoHashMapUnmanaged(MatchingSection, *TextBlock) = .{}, +managed_atoms: std.ArrayListUnmanaged(*Atom) = .{}, /// Table of Decls that are currently alive. /// We store them here so that we can properly dispose of any allocated -/// memory within the TextBlock in the incremental linker. +/// memory within the atom in the incremental linker. /// TODO consolidate this. decls: std.AutoArrayHashMapUnmanaged(*Module.Decl, void) = .{}, @@ -226,6 +217,12 @@ decls: std.AutoArrayHashMapUnmanaged(*Module.Decl, void) = .{}, /// somewhere else in the codegen. active_decl: ?*Module.Decl = null, +const PendingUpdate = union(enum) { + resolve_undef: u32, + add_stub_entry: u32, + add_got_entry: u32, +}; + const SymbolWithLoc = struct { // Table where the symbol can be found. where: enum { @@ -247,21 +244,10 @@ pub const GotIndirectionKey = struct { /// When allocating, the ideal_capacity is calculated by /// actual_capacity + (actual_capacity / ideal_factor) -const ideal_factor = 2; +const ideal_factor = 4; /// Default path to dyld -/// TODO instead of hardcoding it, we should probably look through some env vars and search paths -/// instead but this will do for now. -const DEFAULT_DYLD_PATH: [*:0]const u8 = "/usr/lib/dyld"; - -/// Default lib search path -/// TODO instead of hardcoding it, we should probably look through some env vars and search paths -/// instead but this will do for now. -const DEFAULT_LIB_SEARCH_PATH: []const u8 = "/usr/lib"; - -const LIB_SYSTEM_NAME: [*:0]const u8 = "System"; -/// TODO we should search for libSystem and fail if it doesn't exist, instead of hardcoding it -const LIB_SYSTEM_PATH: [*:0]const u8 = DEFAULT_LIB_SEARCH_PATH ++ "/libSystem.B.dylib"; +const default_dyld_path: [*:0]const u8 = "/usr/lib/dyld"; /// In order for a slice of bytes to be considered eligible to keep metadata pointing at /// it as a possible place to put new symbols, it must have enough room for this many bytes @@ -269,6 +255,10 @@ const LIB_SYSTEM_PATH: [*:0]const u8 = DEFAULT_LIB_SEARCH_PATH ++ "/libSystem.B. const minimum_text_block_size = 64; pub const min_text_capacity = padToIdeal(minimum_text_block_size); +/// Virtual memory offset corresponds to the size of __PAGEZERO segment and start of +/// __TEXT segment. +const pagezero_vmsize: u64 = 0x100000000; + pub const Export = struct { sym_index: ?u32 = null, }; @@ -323,31 +313,32 @@ pub fn openPath(allocator: *Allocator, sub_path: []const u8, options: link.Optio return self; } - if (!options.strip and options.module != null) { - // Create dSYM bundle. - const dir = options.module.?.zig_cache_artifact_directory; - log.debug("creating {s}.dSYM bundle in {s}", .{ sub_path, dir.path }); + // TODO Migrate DebugSymbols to the merged linker codepaths + // if (!options.strip and options.module != null) { + // // Create dSYM bundle. + // const dir = options.module.?.zig_cache_artifact_directory; + // log.debug("creating {s}.dSYM bundle in {s}", .{ sub_path, dir.path }); - const d_sym_path = try fmt.allocPrint( - allocator, - "{s}.dSYM" ++ fs.path.sep_str ++ "Contents" ++ fs.path.sep_str ++ "Resources" ++ fs.path.sep_str ++ "DWARF", - .{sub_path}, - ); - defer allocator.free(d_sym_path); + // const d_sym_path = try fmt.allocPrint( + // allocator, + // "{s}.dSYM" ++ fs.path.sep_str ++ "Contents" ++ fs.path.sep_str ++ "Resources" ++ fs.path.sep_str ++ "DWARF", + // .{sub_path}, + // ); + // defer allocator.free(d_sym_path); - var d_sym_bundle = try dir.handle.makeOpenPath(d_sym_path, .{}); - defer d_sym_bundle.close(); + // var d_sym_bundle = try dir.handle.makeOpenPath(d_sym_path, .{}); + // defer d_sym_bundle.close(); - const d_sym_file = try d_sym_bundle.createFile(sub_path, .{ - .truncate = false, - .read = true, - }); + // const d_sym_file = try d_sym_bundle.createFile(sub_path, .{ + // .truncate = false, + // .read = true, + // }); - self.d_sym = .{ - .base = self, - .file = d_sym_file, - }; - } + // self.d_sym = .{ + // .base = self, + // .file = d_sym_file, + // }; + // } // Index 0 is always a null symbol. try self.locals.append(allocator, .{ @@ -357,13 +348,12 @@ pub fn openPath(allocator: *Allocator, sub_path: []const u8, options: link.Optio .n_desc = 0, .n_value = 0, }); + try self.strtab.append(allocator, 0); try self.populateMissingMetadata(); - try self.writeLocalSymbol(0); if (self.d_sym) |*ds| { try ds.populateMissingMetadata(allocator); - try ds.writeLocalSymbol(0); } return self; @@ -403,60 +393,489 @@ pub fn flush(self: *MachO, comp: *Compilation) !void { } } + const tracy = trace(@src()); + defer tracy.end(); + + var arena_allocator = std.heap.ArenaAllocator.init(self.base.allocator); + defer arena_allocator.deinit(); + const arena = &arena_allocator.allocator; + + const directory = self.base.options.emit.?.directory; // Just an alias to make it shorter to type. const use_stage1 = build_options.is_stage1 and self.base.options.use_stage1; - if (use_stage1) { - return self.linkWithZld(comp); - } else { - switch (self.base.options.effectiveOutputMode()) { - .Exe, .Obj => {}, - .Lib => return error.TODOImplementWritingLibFiles, + + // If there is no Zig code to compile, then we should skip flushing the output file because it + // will not be part of the linker line anyway. + const module_obj_path: ?[]const u8 = if (self.base.options.module) |module| blk: { + if (use_stage1) { + const obj_basename = try std.zig.binNameAlloc(arena, .{ + .root_name = self.base.options.root_name, + .target = self.base.options.target, + .output_mode = .Obj, + }); + const o_directory = module.zig_cache_artifact_directory; + const full_obj_path = try o_directory.join(arena, &[_][]const u8{obj_basename}); + break :blk full_obj_path; } - return self.flushModule(comp); + + const obj_basename = self.base.intermediary_basename orelse break :blk null; + try self.flushModule(comp); + const full_obj_path = try directory.join(arena, &[_][]const u8{obj_basename}); + break :blk full_obj_path; + } else null; + + const is_lib = self.base.options.output_mode == .Lib; + const is_dyn_lib = self.base.options.link_mode == .Dynamic and is_lib; + const is_exe_or_dyn_lib = is_dyn_lib or self.base.options.output_mode == .Exe; + const stack_size = self.base.options.stack_size_override orelse 0; + + const id_symlink_basename = "zld.id"; + + var man: Cache.Manifest = undefined; + defer if (!self.base.options.disable_lld_caching) man.deinit(); + + var digest: [Cache.hex_digest_len]u8 = undefined; + var needs_full_relink = true; + + cache: { + if (use_stage1 and self.base.options.disable_lld_caching) break :cache; + + man = comp.cache_parent.obtain(); + + // We are about to obtain this lock, so here we give other processes a chance first. + self.base.releaseLock(); + + try man.addListOfFiles(self.base.options.objects); + for (comp.c_object_table.keys()) |key| { + _ = try man.addFile(key.status.success.object_path, null); + } + try man.addOptionalFile(module_obj_path); + // We can skip hashing libc and libc++ components that we are in charge of building from Zig + // installation sources because they are always a product of the compiler version + target information. + man.hash.add(stack_size); + man.hash.addListOfBytes(self.base.options.lib_dirs); + man.hash.addListOfBytes(self.base.options.framework_dirs); + man.hash.addListOfBytes(self.base.options.frameworks); + man.hash.addListOfBytes(self.base.options.rpath_list); + if (is_dyn_lib) { + man.hash.addOptional(self.base.options.version); + } + man.hash.addStringSet(self.base.options.system_libs); + man.hash.addOptionalBytes(self.base.options.sysroot); + + // We don't actually care whether it's a cache hit or miss; we just need the digest and the lock. + _ = try man.hit(); + digest = man.final(); + + var prev_digest_buf: [digest.len]u8 = undefined; + const prev_digest: []u8 = Cache.readSmallFile( + directory.handle, + id_symlink_basename, + &prev_digest_buf, + ) catch |err| blk: { + log.debug("MachO Zld new_digest={s} error: {s}", .{ + std.fmt.fmtSliceHexLower(&digest), + @errorName(err), + }); + // Handle this as a cache miss. + break :blk prev_digest_buf[0..0]; + }; + if (mem.eql(u8, prev_digest, &digest)) { + // Hot diggity dog! The output binary is already there. + + if (use_stage1) { + log.debug("MachO Zld digest={s} match - skipping invocation", .{std.fmt.fmtSliceHexLower(&digest)}); + self.base.lock = man.toOwnedLock(); + return; + } else { + log.debug("MachO Zld digest={s} match", .{std.fmt.fmtSliceHexLower(&digest)}); + if (!self.cold_start) { + log.debug(" no need to relink objects", .{}); + needs_full_relink = false; + } else { + log.debug(" TODO parse prelinked binary and continue linking where we left off", .{}); + // TODO until such time however, perform a full relink of objects. + needs_full_relink = true; + } + } + } + log.debug("MachO Zld prev_digest={s} new_digest={s}", .{ + std.fmt.fmtSliceHexLower(prev_digest), + std.fmt.fmtSliceHexLower(&digest), + }); + + // We are about to change the output file to be different, so we invalidate the build hash now. + directory.handle.deleteFile(id_symlink_basename) catch |err| switch (err) { + error.FileNotFound => {}, + else => |e| return e, + }; } + const full_out_path = try directory.join(arena, &[_][]const u8{self.base.options.emit.?.sub_path}); + + if (self.base.options.output_mode == .Obj) { + // LLD's MachO driver does not support the equvialent of `-r` so we do a simple file copy + // here. TODO: think carefully about how we can avoid this redundant operation when doing + // build-obj. See also the corresponding TODO in linkAsArchive. + const the_object_path = blk: { + if (self.base.options.objects.len != 0) + break :blk self.base.options.objects[0]; + + if (comp.c_object_table.count() != 0) + break :blk comp.c_object_table.keys()[0].status.success.object_path; + + if (module_obj_path) |p| + break :blk p; + + // TODO I think this is unreachable. Audit this situation when solving the above TODO + // regarding eliding redundant object -> object transformations. + return error.NoObjectsToLink; + }; + // This can happen when using --enable-cache and using the stage1 backend. In this case + // we can skip the file copy. + if (!mem.eql(u8, the_object_path, full_out_path)) { + try fs.cwd().copyFile(the_object_path, fs.cwd(), full_out_path, .{}); + } + } else { + if (use_stage1) { + const sub_path = self.base.options.emit.?.sub_path; + self.base.file = try directory.handle.createFile(sub_path, .{ + .truncate = true, + .read = true, + .mode = link.determineMode(self.base.options), + }); + try self.populateMissingMetadata(); + + // TODO mimicking insertion of null symbol from incremental linker. + // This will need to moved. + try self.locals.append(self.base.allocator, .{ + .n_strx = 0, + .n_type = macho.N_UNDF, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }); + try self.strtab.append(self.base.allocator, 0); + } + + if (needs_full_relink) { + self.objects.clearRetainingCapacity(); + self.archives.clearRetainingCapacity(); + self.dylibs.clearRetainingCapacity(); + self.dylibs_map.clearRetainingCapacity(); + self.referenced_dylibs.clearRetainingCapacity(); + + // TODO figure out how to clear atoms from objects, etc. + + // Positional arguments to the linker such as object files and static archives. + var positionals = std.ArrayList([]const u8).init(arena); + + try positionals.appendSlice(self.base.options.objects); + + for (comp.c_object_table.keys()) |key| { + try positionals.append(key.status.success.object_path); + } + + if (module_obj_path) |p| { + try positionals.append(p); + } + + if (comp.compiler_rt_static_lib) |lib| { + try positionals.append(lib.full_object_path); + } + + // libc++ dep + if (self.base.options.link_libcpp) { + try positionals.append(comp.libcxxabi_static_lib.?.full_object_path); + try positionals.append(comp.libcxx_static_lib.?.full_object_path); + } + + // Shared and static libraries passed via `-l` flag. + var search_lib_names = std.ArrayList([]const u8).init(arena); + + const system_libs = self.base.options.system_libs.keys(); + for (system_libs) |link_lib| { + // By this time, we depend on these libs being dynamically linked libraries and not static libraries + // (the check for that needs to be earlier), but they could be full paths to .dylib files, in which + // case we want to avoid prepending "-l". + if (Compilation.classifyFileExt(link_lib) == .shared_library) { + try positionals.append(link_lib); + continue; + } + + try search_lib_names.append(link_lib); + } + + var lib_dirs = std.ArrayList([]const u8).init(arena); + for (self.base.options.lib_dirs) |dir| { + if (try resolveSearchDir(arena, dir, self.base.options.sysroot)) |search_dir| { + try lib_dirs.append(search_dir); + } else { + log.warn("directory not found for '-L{s}'", .{dir}); + } + } + + var libs = std.ArrayList([]const u8).init(arena); + var lib_not_found = false; + for (search_lib_names.items) |lib_name| { + // Assume ld64 default: -search_paths_first + // Look in each directory for a dylib (stub first), and then for archive + // TODO implement alternative: -search_dylibs_first + for (&[_][]const u8{ ".tbd", ".dylib", ".a" }) |ext| { + if (try resolveLib(arena, lib_dirs.items, lib_name, ext)) |full_path| { + try libs.append(full_path); + break; + } + } else { + log.warn("library not found for '-l{s}'", .{lib_name}); + lib_not_found = true; + } + } + + if (lib_not_found) { + log.warn("Library search paths:", .{}); + for (lib_dirs.items) |dir| { + log.warn(" {s}", .{dir}); + } + } + + // If we were given the sysroot, try to look there first for libSystem.B.{dylib, tbd}. + var libsystem_available = false; + if (self.base.options.sysroot != null) blk: { + // Try stub file first. If we hit it, then we're done as the stub file + // re-exports every single symbol definition. + if (try resolveLib(arena, lib_dirs.items, "System", ".tbd")) |full_path| { + try libs.append(full_path); + libsystem_available = true; + break :blk; + } + // If we didn't hit the stub file, try .dylib next. However, libSystem.dylib + // doesn't export libc.dylib which we'll need to resolve subsequently also. + if (try resolveLib(arena, lib_dirs.items, "System", ".dylib")) |libsystem_path| { + if (try resolveLib(arena, lib_dirs.items, "c", ".dylib")) |libc_path| { + try libs.append(libsystem_path); + try libs.append(libc_path); + libsystem_available = true; + break :blk; + } + } + } + if (!libsystem_available) { + const full_path = try comp.zig_lib_directory.join(arena, &[_][]const u8{ + "libc", "darwin", "libSystem.B.tbd", + }); + try libs.append(full_path); + } + + // frameworks + var framework_dirs = std.ArrayList([]const u8).init(arena); + for (self.base.options.framework_dirs) |dir| { + if (try resolveSearchDir(arena, dir, self.base.options.sysroot)) |search_dir| { + try framework_dirs.append(search_dir); + } else { + log.warn("directory not found for '-F{s}'", .{dir}); + } + } + + var framework_not_found = false; + for (self.base.options.frameworks) |framework| { + for (&[_][]const u8{ ".tbd", ".dylib", "" }) |ext| { + if (try resolveFramework(arena, framework_dirs.items, framework, ext)) |full_path| { + try libs.append(full_path); + break; + } + } else { + log.warn("framework not found for '-framework {s}'", .{framework}); + framework_not_found = true; + } + } + + if (framework_not_found) { + log.warn("Framework search paths:", .{}); + for (framework_dirs.items) |dir| { + log.warn(" {s}", .{dir}); + } + } + + // rpaths + var rpath_table = std.StringArrayHashMap(void).init(arena); + for (self.base.options.rpath_list) |rpath| { + if (rpath_table.contains(rpath)) continue; + const cmdsize = @intCast(u32, mem.alignForwardGeneric( + u64, + @sizeOf(macho.rpath_command) + rpath.len + 1, + @sizeOf(u64), + )); + var rpath_cmd = commands.emptyGenericCommandWithData(macho.rpath_command{ + .cmd = macho.LC_RPATH, + .cmdsize = cmdsize, + .path = @sizeOf(macho.rpath_command), + }); + rpath_cmd.data = try self.base.allocator.alloc(u8, cmdsize - rpath_cmd.inner.path); + mem.set(u8, rpath_cmd.data, 0); + mem.copy(u8, rpath_cmd.data, rpath); + try self.load_commands.append(self.base.allocator, .{ .Rpath = rpath_cmd }); + try rpath_table.putNoClobber(rpath, {}); + self.load_commands_dirty = true; + } + + if (self.base.options.verbose_link) { + var argv = std.ArrayList([]const u8).init(arena); + + try argv.append("zig"); + try argv.append("ld"); + + if (is_exe_or_dyn_lib) { + try argv.append("-dynamic"); + } + + if (is_dyn_lib) { + try argv.append("-dylib"); + + const install_name = try std.fmt.allocPrint(arena, "@rpath/{s}", .{ + self.base.options.emit.?.sub_path, + }); + try argv.append("-install_name"); + try argv.append(install_name); + } + + if (self.base.options.sysroot) |syslibroot| { + try argv.append("-syslibroot"); + try argv.append(syslibroot); + } + + for (rpath_table.keys()) |rpath| { + try argv.append("-rpath"); + try argv.append(rpath); + } + + try argv.appendSlice(positionals.items); + + try argv.append("-o"); + try argv.append(full_out_path); + + try argv.append("-lSystem"); + try argv.append("-lc"); + + for (search_lib_names.items) |l_name| { + try argv.append(try std.fmt.allocPrint(arena, "-l{s}", .{l_name})); + } + + for (self.base.options.lib_dirs) |lib_dir| { + try argv.append(try std.fmt.allocPrint(arena, "-L{s}", .{lib_dir})); + } + + for (self.base.options.frameworks) |framework| { + try argv.append(try std.fmt.allocPrint(arena, "-framework {s}", .{framework})); + } + + for (self.base.options.framework_dirs) |framework_dir| { + try argv.append(try std.fmt.allocPrint(arena, "-F{s}", .{framework_dir})); + } + + Compilation.dump_argv(argv.items); + } + + try self.parseInputFiles(positionals.items, self.base.options.sysroot); + try self.parseLibs(libs.items, self.base.options.sysroot); + } + + if (self.bss_section_index) |idx| { + const seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; + const sect = &seg.sections.items[idx]; + sect.offset = self.bss_file_offset; + } + if (self.tlv_bss_section_index) |idx| { + const seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; + const sect = &seg.sections.items[idx]; + sect.offset = self.tlv_bss_file_offset; + } + + for (self.objects.items) |*object, object_id| { + if (object.analyzed) continue; + try self.resolveSymbolsInObject(@intCast(u16, object_id)); + } + + try self.resolveSymbolsInArchives(); + try self.resolveDyldStubBinder(); + try self.createDyldPrivateAtom(); + try self.createStubHelperPreambleAtom(); + try self.resolveSymbolsInDylibs(); + try self.createDsoHandleAtom(); + try self.addCodeSignatureLC(); + + for (self.unresolved.keys()) |index| { + const sym = self.undefs.items[index]; + const sym_name = self.getString(sym.n_strx); + const resolv = self.symbol_resolver.get(sym.n_strx) orelse unreachable; + + log.err("undefined reference to symbol '{s}'", .{sym_name}); + log.err(" first referenced in '{s}'", .{self.objects.items[resolv.file].name}); + } + if (self.unresolved.count() > 0) { + return error.UndefinedSymbolReference; + } + + try self.createTentativeDefAtoms(); + try self.parseObjectsIntoAtoms(); + try self.allocateGlobalSymbols(); + try self.writeAtoms(); + + if (self.bss_section_index) |idx| { + const seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; + const sect = &seg.sections.items[idx]; + self.bss_file_offset = sect.offset; + sect.offset = 0; + } + if (self.tlv_bss_section_index) |idx| { + const seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; + const sect = &seg.sections.items[idx]; + self.tlv_bss_file_offset = sect.offset; + sect.offset = 0; + } + + try self.flushModule(comp); + } + + cache: { + if (use_stage1 and self.base.options.disable_lld_caching) break :cache; + // Update the file with the digest. If it fails we can continue; it only + // means that the next invocation will have an unnecessary cache miss. + Cache.writeSmallFile(directory.handle, id_symlink_basename, &digest) catch |err| { + log.debug("failed to save linking hash digest file: {s}", .{@errorName(err)}); + }; + // Again failure here only means an unnecessary cache miss. + man.writeManifest() catch |err| { + log.debug("failed to write cache manifest when linking: {s}", .{@errorName(err)}); + }; + // We hang on to this lock so that the output file path can be used without + // other processes clobbering it. + self.base.lock = man.toOwnedLock(); + } + + self.cold_start = false; } pub fn flushModule(self: *MachO, comp: *Compilation) !void { _ = comp; + const tracy = trace(@src()); defer tracy.end(); - const output_mode = self.base.options.output_mode; + try self.setEntryPoint(); + try self.updateSectionOrdinals(); + try self.writeLinkeditSegment(); - switch (output_mode) { - .Exe => { - if (self.entry_addr) |addr| { - // Update LC_MAIN with entry offset. - const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment; - const main_cmd = &self.load_commands.items[self.main_cmd_index.?].Main; - main_cmd.entryoff = addr - text_segment.inner.vmaddr; - main_cmd.stacksize = self.base.options.stack_size_override orelse 0; - self.load_commands_dirty = true; - } - try self.writeRebaseInfoTable(); - try self.writeBindInfoTable(); - try self.writeLazyBindInfoTable(); - try self.writeExportInfo(); - try self.writeAllGlobalAndUndefSymbols(); - try self.writeIndirectSymbolTable(); - try self.writeStringTable(); - try self.updateLinkeditSegmentSizes(); + if (self.d_sym) |*ds| { + // Flush debug symbols bundle. + try ds.flushModule(self.base.allocator, self.base.options); + } - if (self.d_sym) |*ds| { - // Flush debug symbols bundle. - try ds.flushModule(self.base.allocator, self.base.options); - } - - if (self.requires_adhoc_codesig) { - // Preallocate space for the code signature. - // We need to do this at this stage so that we have the load commands with proper values - // written out to the file. - // The most important here is to have the correct vm and filesize of the __LINKEDIT segment - // where the code signature goes into. - try self.writeCodeSignaturePadding(); - } - }, - .Obj => {}, - .Lib => return error.TODOImplementWritingLibFiles, + if (self.requires_adhoc_codesig) { + // Preallocate space for the code signature. + // We need to do this at this stage so that we have the load commands with proper values + // written out to the file. + // The most important here is to have the correct vm and filesize of the __LINKEDIT segment + // where the code signature goes into. + try self.writeCodeSignaturePadding(); } try self.writeLoadCommands(); @@ -470,14 +889,7 @@ pub fn flushModule(self: *MachO, comp: *Compilation) !void { self.error_flags.no_entry_point_found = false; } - assert(!self.got_entries_count_dirty); assert(!self.load_commands_dirty); - assert(!self.rebase_info_dirty); - assert(!self.binding_info_dirty); - assert(!self.lazy_binding_info_dirty); - assert(!self.export_info_dirty); - assert(!self.strtab_dirty); - assert(!self.strtab_needs_relocation); if (self.requires_adhoc_codesig) { try self.writeCodeSignature(); // code signing always comes last @@ -575,394 +987,6 @@ fn resolveFramework( return null; } -fn linkWithZld(self: *MachO, comp: *Compilation) !void { - const tracy = trace(@src()); - defer tracy.end(); - - var arena_allocator = std.heap.ArenaAllocator.init(self.base.allocator); - defer arena_allocator.deinit(); - const arena = &arena_allocator.allocator; - - const directory = self.base.options.emit.?.directory; // Just an alias to make it shorter to type. - - // If there is no Zig code to compile, then we should skip flushing the output file because it - // will not be part of the linker line anyway. - const module_obj_path: ?[]const u8 = if (self.base.options.module) |module| blk: { - const use_stage1 = build_options.is_stage1 and self.base.options.use_stage1; - if (use_stage1) { - const obj_basename = try std.zig.binNameAlloc(arena, .{ - .root_name = self.base.options.root_name, - .target = self.base.options.target, - .output_mode = .Obj, - }); - const o_directory = module.zig_cache_artifact_directory; - const full_obj_path = try o_directory.join(arena, &[_][]const u8{obj_basename}); - break :blk full_obj_path; - } - - try self.flushModule(comp); - const obj_basename = self.base.intermediary_basename.?; - const full_obj_path = try directory.join(arena, &[_][]const u8{obj_basename}); - break :blk full_obj_path; - } else null; - - const is_lib = self.base.options.output_mode == .Lib; - const is_dyn_lib = self.base.options.link_mode == .Dynamic and is_lib; - const is_exe_or_dyn_lib = is_dyn_lib or self.base.options.output_mode == .Exe; - const stack_size = self.base.options.stack_size_override orelse 0; - - const id_symlink_basename = "zld.id"; - - var man: Cache.Manifest = undefined; - defer if (!self.base.options.disable_lld_caching) man.deinit(); - - var digest: [Cache.hex_digest_len]u8 = undefined; - - if (!self.base.options.disable_lld_caching) { - man = comp.cache_parent.obtain(); - - // We are about to obtain this lock, so here we give other processes a chance first. - self.base.releaseLock(); - - try man.addListOfFiles(self.base.options.objects); - for (comp.c_object_table.keys()) |key| { - _ = try man.addFile(key.status.success.object_path, null); - } - try man.addOptionalFile(module_obj_path); - // We can skip hashing libc and libc++ components that we are in charge of building from Zig - // installation sources because they are always a product of the compiler version + target information. - man.hash.add(stack_size); - man.hash.addListOfBytes(self.base.options.lib_dirs); - man.hash.addListOfBytes(self.base.options.framework_dirs); - man.hash.addListOfBytes(self.base.options.frameworks); - man.hash.addListOfBytes(self.base.options.rpath_list); - if (is_dyn_lib) { - man.hash.addOptional(self.base.options.version); - } - man.hash.addStringSet(self.base.options.system_libs); - man.hash.addOptionalBytes(self.base.options.sysroot); - - // We don't actually care whether it's a cache hit or miss; we just need the digest and the lock. - _ = try man.hit(); - digest = man.final(); - - var prev_digest_buf: [digest.len]u8 = undefined; - const prev_digest: []u8 = Cache.readSmallFile( - directory.handle, - id_symlink_basename, - &prev_digest_buf, - ) catch |err| blk: { - log.debug("MachO Zld new_digest={s} error: {s}", .{ std.fmt.fmtSliceHexLower(&digest), @errorName(err) }); - // Handle this as a cache miss. - break :blk prev_digest_buf[0..0]; - }; - if (mem.eql(u8, prev_digest, &digest)) { - log.debug("MachO Zld digest={s} match - skipping invocation", .{std.fmt.fmtSliceHexLower(&digest)}); - // Hot diggity dog! The output binary is already there. - self.base.lock = man.toOwnedLock(); - return; - } - log.debug("MachO Zld prev_digest={s} new_digest={s}", .{ std.fmt.fmtSliceHexLower(prev_digest), std.fmt.fmtSliceHexLower(&digest) }); - - // We are about to change the output file to be different, so we invalidate the build hash now. - directory.handle.deleteFile(id_symlink_basename) catch |err| switch (err) { - error.FileNotFound => {}, - else => |e| return e, - }; - } - - const full_out_path = try directory.join(arena, &[_][]const u8{self.base.options.emit.?.sub_path}); - - if (self.base.options.output_mode == .Obj) { - // LLD's MachO driver does not support the equvialent of `-r` so we do a simple file copy - // here. TODO: think carefully about how we can avoid this redundant operation when doing - // build-obj. See also the corresponding TODO in linkAsArchive. - const the_object_path = blk: { - if (self.base.options.objects.len != 0) - break :blk self.base.options.objects[0]; - - if (comp.c_object_table.count() != 0) - break :blk comp.c_object_table.keys()[0].status.success.object_path; - - if (module_obj_path) |p| - break :blk p; - - // TODO I think this is unreachable. Audit this situation when solving the above TODO - // regarding eliding redundant object -> object transformations. - return error.NoObjectsToLink; - }; - // This can happen when using --enable-cache and using the stage1 backend. In this case - // we can skip the file copy. - if (!mem.eql(u8, the_object_path, full_out_path)) { - try fs.cwd().copyFile(the_object_path, fs.cwd(), full_out_path, .{}); - } - } else { - // Positional arguments to the linker such as object files and static archives. - var positionals = std.ArrayList([]const u8).init(arena); - - try positionals.appendSlice(self.base.options.objects); - - for (comp.c_object_table.keys()) |key| { - try positionals.append(key.status.success.object_path); - } - - if (module_obj_path) |p| { - try positionals.append(p); - } - - try positionals.append(comp.compiler_rt_static_lib.?.full_object_path); - - // libc++ dep - if (self.base.options.link_libcpp) { - try positionals.append(comp.libcxxabi_static_lib.?.full_object_path); - try positionals.append(comp.libcxx_static_lib.?.full_object_path); - } - - // Shared and static libraries passed via `-l` flag. - var search_lib_names = std.ArrayList([]const u8).init(arena); - - const system_libs = self.base.options.system_libs.keys(); - for (system_libs) |link_lib| { - // By this time, we depend on these libs being dynamically linked libraries and not static libraries - // (the check for that needs to be earlier), but they could be full paths to .dylib files, in which - // case we want to avoid prepending "-l". - if (Compilation.classifyFileExt(link_lib) == .shared_library) { - try positionals.append(link_lib); - continue; - } - - try search_lib_names.append(link_lib); - } - - var lib_dirs = std.ArrayList([]const u8).init(arena); - for (self.base.options.lib_dirs) |dir| { - if (try resolveSearchDir(arena, dir, self.base.options.sysroot)) |search_dir| { - try lib_dirs.append(search_dir); - } else { - log.warn("directory not found for '-L{s}'", .{dir}); - } - } - - var libs = std.ArrayList([]const u8).init(arena); - var lib_not_found = false; - for (search_lib_names.items) |lib_name| { - // Assume ld64 default: -search_paths_first - // Look in each directory for a dylib (stub first), and then for archive - // TODO implement alternative: -search_dylibs_first - for (&[_][]const u8{ ".tbd", ".dylib", ".a" }) |ext| { - if (try resolveLib(arena, lib_dirs.items, lib_name, ext)) |full_path| { - try libs.append(full_path); - break; - } - } else { - log.warn("library not found for '-l{s}'", .{lib_name}); - lib_not_found = true; - } - } - - if (lib_not_found) { - log.warn("Library search paths:", .{}); - for (lib_dirs.items) |dir| { - log.warn(" {s}", .{dir}); - } - } - - // If we were given the sysroot, try to look there first for libSystem.B.{dylib, tbd}. - var libsystem_available = false; - if (self.base.options.sysroot != null) blk: { - // Try stub file first. If we hit it, then we're done as the stub file - // re-exports every single symbol definition. - if (try resolveLib(arena, lib_dirs.items, "System", ".tbd")) |full_path| { - try libs.append(full_path); - libsystem_available = true; - break :blk; - } - // If we didn't hit the stub file, try .dylib next. However, libSystem.dylib - // doesn't export libc.dylib which we'll need to resolve subsequently also. - if (try resolveLib(arena, lib_dirs.items, "System", ".dylib")) |libsystem_path| { - if (try resolveLib(arena, lib_dirs.items, "c", ".dylib")) |libc_path| { - try libs.append(libsystem_path); - try libs.append(libc_path); - libsystem_available = true; - break :blk; - } - } - } - if (!libsystem_available) { - const full_path = try comp.zig_lib_directory.join(arena, &[_][]const u8{ - "libc", "darwin", "libSystem.B.tbd", - }); - try libs.append(full_path); - } - - // frameworks - var framework_dirs = std.ArrayList([]const u8).init(arena); - for (self.base.options.framework_dirs) |dir| { - if (try resolveSearchDir(arena, dir, self.base.options.sysroot)) |search_dir| { - try framework_dirs.append(search_dir); - } else { - log.warn("directory not found for '-F{s}'", .{dir}); - } - } - - var framework_not_found = false; - for (self.base.options.frameworks) |framework| { - for (&[_][]const u8{ ".tbd", ".dylib", "" }) |ext| { - if (try resolveFramework(arena, framework_dirs.items, framework, ext)) |full_path| { - try libs.append(full_path); - break; - } - } else { - log.warn("framework not found for '-framework {s}'", .{framework}); - framework_not_found = true; - } - } - - if (framework_not_found) { - log.warn("Framework search paths:", .{}); - for (framework_dirs.items) |dir| { - log.warn(" {s}", .{dir}); - } - } - - // rpaths - var rpath_table = std.StringArrayHashMap(void).init(arena); - for (self.base.options.rpath_list) |rpath| { - if (rpath_table.contains(rpath)) continue; - try rpath_table.putNoClobber(rpath, {}); - } - - var rpaths = std.ArrayList([]const u8).init(arena); - try rpaths.ensureCapacity(rpath_table.count()); - for (rpath_table.keys()) |*key| { - rpaths.appendAssumeCapacity(key.*); - } - - if (self.base.options.verbose_link) { - var argv = std.ArrayList([]const u8).init(arena); - - try argv.append("zig"); - try argv.append("ld"); - - if (is_exe_or_dyn_lib) { - try argv.append("-dynamic"); - } - - if (is_dyn_lib) { - try argv.append("-dylib"); - - const install_name = try std.fmt.allocPrint(arena, "@rpath/{s}", .{ - self.base.options.emit.?.sub_path, - }); - try argv.append("-install_name"); - try argv.append(install_name); - } - - if (self.base.options.sysroot) |syslibroot| { - try argv.append("-syslibroot"); - try argv.append(syslibroot); - } - - for (rpaths.items) |rpath| { - try argv.append("-rpath"); - try argv.append(rpath); - } - - try argv.appendSlice(positionals.items); - - try argv.append("-o"); - try argv.append(full_out_path); - - try argv.append("-lSystem"); - try argv.append("-lc"); - - for (search_lib_names.items) |l_name| { - try argv.append(try std.fmt.allocPrint(arena, "-l{s}", .{l_name})); - } - - for (self.base.options.lib_dirs) |lib_dir| { - try argv.append(try std.fmt.allocPrint(arena, "-L{s}", .{lib_dir})); - } - - for (self.base.options.frameworks) |framework| { - try argv.append(try std.fmt.allocPrint(arena, "-framework {s}", .{framework})); - } - - for (self.base.options.framework_dirs) |framework_dir| { - try argv.append(try std.fmt.allocPrint(arena, "-F{s}", .{framework_dir})); - } - - Compilation.dump_argv(argv.items); - } - - const sub_path = self.base.options.emit.?.sub_path; - self.base.file = try directory.handle.createFile(sub_path, .{ - .truncate = true, - .read = true, - .mode = link.determineMode(self.base.options), - }); - - // TODO mimicking insertion of null symbol from incremental linker. - // This will need to moved. - try self.locals.append(self.base.allocator, .{ - .n_strx = 0, - .n_type = macho.N_UNDF, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }); - try self.strtab.append(self.base.allocator, 0); - - try self.populateMetadata(); - try self.addRpathLCs(rpaths.items); - try self.parseInputFiles(positionals.items, self.base.options.sysroot); - try self.parseLibs(libs.items, self.base.options.sysroot); - try self.resolveSymbols(); - try self.parseTextBlocks(); - try self.addLoadDylibLCs(); - try self.addDataInCodeLC(); - try self.addCodeSignatureLC(); - - { - // Add dyld_stub_binder as the final GOT entry. - const n_strx = self.strtab_dir.getKeyAdapted(@as([]const u8, "dyld_stub_binder"), StringIndexAdapter{ - .bytes = &self.strtab, - }) orelse unreachable; - const resolv = self.symbol_resolver.get(n_strx) orelse unreachable; - const got_index = @intCast(u32, self.got_entries.items.len); - const got_entry = GotIndirectionKey{ - .where = .undef, - .where_index = resolv.where_index, - }; - try self.got_entries.append(self.base.allocator, got_entry); - try self.got_entries_map.putNoClobber(self.base.allocator, got_entry, got_index); - } - - try self.sortSections(); - try self.allocateTextSegment(); - try self.allocateDataConstSegment(); - try self.allocateDataSegment(); - self.allocateLinkeditSegment(); - try self.allocateTextBlocks(); - try self.flushZld(); - } - - if (!self.base.options.disable_lld_caching) { - // Update the file with the digest. If it fails we can continue; it only - // means that the next invocation will have an unnecessary cache miss. - Cache.writeSmallFile(directory.handle, id_symlink_basename, &digest) catch |err| { - log.warn("failed to save linking hash digest file: {s}", .{@errorName(err)}); - }; - // Again failure here only means an unnecessary cache miss. - man.writeManifest() catch |err| { - log.warn("failed to write cache manifest when linking: {s}", .{@errorName(err)}); - }; - // We hang on to this lock so that the output file path can be used without - // other processes clobbering it. - self.base.lock = man.toOwnedLock(); - } -} - fn parseObject(self: *MachO, path: []const u8) !bool { const file = fs.cwd().openFile(path, .{}) catch |err| switch (err) { error.FileNotFound => return false, @@ -1080,6 +1104,7 @@ pub fn parseDylib(self: *MachO, path: []const u8, opts: DylibCreateOpts) ParseDy try self.dylibs_map.putNoClobber(self.base.allocator, dylib.id.?.name, dylib_id); if (!(opts.is_dependent or self.referenced_dylibs.contains(dylib_id))) { + try self.addLoadDylibLC(dylib_id); try self.referenced_dylibs.putNoClobber(self.base.allocator, dylib_id, {}); } @@ -1098,6 +1123,7 @@ fn parseInputFiles(self: *MachO, files: []const []const u8, syslibroot: ?[]const break :full_path try self.base.allocator.dupe(u8, path); }; defer self.base.allocator.free(full_path); + log.debug("parsing input file path '{s}'", .{full_path}); if (try self.parseObject(full_path)) continue; if (try self.parseArchive(full_path)) continue; @@ -1111,6 +1137,7 @@ fn parseInputFiles(self: *MachO, files: []const []const u8, syslibroot: ?[]const fn parseLibs(self: *MachO, libs: []const []const u8, syslibroot: ?[]const u8) !void { for (libs) |lib| { + log.debug("parsing lib path '{s}'", .{lib}); if (try self.parseDylib(lib, .{ .syslibroot = syslibroot, })) continue; @@ -1126,18 +1153,19 @@ pub const MatchingSection = struct { }; pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSection { - const text_seg = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; - const data_const_seg = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; - const data_seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; const segname = commands.segmentName(sect); const sectname = commands.sectionName(sect); - const res: ?MatchingSection = blk: { switch (commands.sectionType(sect)) { macho.S_4BYTE_LITERALS, macho.S_8BYTE_LITERALS, macho.S_16BYTE_LITERALS => { if (self.text_const_section_index == null) { - self.text_const_section_index = @intCast(u16, text_seg.sections.items.len); - try text_seg.addSection(self.base.allocator, "__const", .{}); + self.text_const_section_index = try self.allocateSection( + self.text_segment_cmd_index.?, + "__const", + sect.size, + sect.@"align", + .{}, + ); } break :blk .{ @@ -1150,10 +1178,13 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio // TODO it seems the common values within the sections in objects are deduplicated/merged // on merging the sections' contents. if (self.objc_methname_section_index == null) { - self.objc_methname_section_index = @intCast(u16, text_seg.sections.items.len); - try text_seg.addSection(self.base.allocator, "__objc_methname", .{ - .flags = macho.S_CSTRING_LITERALS, - }); + self.objc_methname_section_index = try self.allocateSection( + self.text_segment_cmd_index.?, + "__objc_methname", + sect.size, + sect.@"align", + .{}, + ); } break :blk .{ @@ -1162,10 +1193,13 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio }; } else if (mem.eql(u8, sectname, "__objc_methtype")) { if (self.objc_methtype_section_index == null) { - self.objc_methtype_section_index = @intCast(u16, text_seg.sections.items.len); - try text_seg.addSection(self.base.allocator, "__objc_methtype", .{ - .flags = macho.S_CSTRING_LITERALS, - }); + self.objc_methtype_section_index = try self.allocateSection( + self.text_segment_cmd_index.?, + "__objc_methtype", + sect.size, + sect.@"align", + .{}, + ); } break :blk .{ @@ -1174,8 +1208,13 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio }; } else if (mem.eql(u8, sectname, "__objc_classname")) { if (self.objc_classname_section_index == null) { - self.objc_classname_section_index = @intCast(u16, text_seg.sections.items.len); - try text_seg.addSection(self.base.allocator, "__objc_classname", .{}); + self.objc_classname_section_index = try self.allocateSection( + self.text_segment_cmd_index.?, + "__objc_classname", + sect.size, + sect.@"align", + .{}, + ); } break :blk .{ @@ -1185,10 +1224,15 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio } if (self.cstring_section_index == null) { - self.cstring_section_index = @intCast(u16, text_seg.sections.items.len); - try text_seg.addSection(self.base.allocator, "__cstring", .{ - .flags = macho.S_CSTRING_LITERALS, - }); + self.cstring_section_index = try self.allocateSection( + self.text_segment_cmd_index.?, + "__cstring", + sect.size, + sect.@"align", + .{ + .flags = macho.S_CSTRING_LITERALS, + }, + ); } break :blk .{ @@ -1199,27 +1243,37 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio macho.S_LITERAL_POINTERS => { if (mem.eql(u8, segname, "__DATA") and mem.eql(u8, sectname, "__objc_selrefs")) { if (self.objc_selrefs_section_index == null) { - self.objc_selrefs_section_index = @intCast(u16, data_seg.sections.items.len); - try data_seg.addSection(self.base.allocator, "__objc_selrefs", .{ - .flags = macho.S_LITERAL_POINTERS, - }); + self.objc_selrefs_section_index = try self.allocateSection( + self.data_segment_cmd_index.?, + "__objc_selrefs", + sect.size, + sect.@"align", + .{ + .flags = macho.S_LITERAL_POINTERS, + }, + ); } break :blk .{ .seg = self.data_segment_cmd_index.?, .sect = self.objc_selrefs_section_index.?, }; + } else { + // TODO investigate + break :blk null; } - - // TODO investigate - break :blk null; }, macho.S_MOD_INIT_FUNC_POINTERS => { if (self.mod_init_func_section_index == null) { - self.mod_init_func_section_index = @intCast(u16, data_const_seg.sections.items.len); - try data_const_seg.addSection(self.base.allocator, "__mod_init_func", .{ - .flags = macho.S_MOD_INIT_FUNC_POINTERS, - }); + self.mod_init_func_section_index = try self.allocateSection( + self.data_const_segment_cmd_index.?, + "__mod_init_func", + sect.size, + sect.@"align", + .{ + .flags = macho.S_MOD_INIT_FUNC_POINTERS, + }, + ); } break :blk .{ @@ -1229,10 +1283,15 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio }, macho.S_MOD_TERM_FUNC_POINTERS => { if (self.mod_term_func_section_index == null) { - self.mod_term_func_section_index = @intCast(u16, data_const_seg.sections.items.len); - try data_const_seg.addSection(self.base.allocator, "__mod_term_func", .{ - .flags = macho.S_MOD_TERM_FUNC_POINTERS, - }); + self.mod_term_func_section_index = try self.allocateSection( + self.data_const_segment_cmd_index.?, + "__mod_term_func", + sect.size, + sect.@"align", + .{ + .flags = macho.S_MOD_TERM_FUNC_POINTERS, + }, + ); } break :blk .{ @@ -1241,38 +1300,34 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio }; }, macho.S_ZEROFILL => { - if (mem.eql(u8, sectname, "__common")) { - if (self.common_section_index == null) { - self.common_section_index = @intCast(u16, data_seg.sections.items.len); - try data_seg.addSection(self.base.allocator, "__common", .{ + if (self.bss_section_index == null) { + self.bss_section_index = try self.allocateSection( + self.data_segment_cmd_index.?, + "__bss", + sect.size, + sect.@"align", + .{ .flags = macho.S_ZEROFILL, - }); - } - - break :blk .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.common_section_index.?, - }; - } else { - if (self.bss_section_index == null) { - self.bss_section_index = @intCast(u16, data_seg.sections.items.len); - try data_seg.addSection(self.base.allocator, "__bss", .{ - .flags = macho.S_ZEROFILL, - }); - } - - break :blk .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.bss_section_index.?, - }; + }, + ); } + + break :blk .{ + .seg = self.data_segment_cmd_index.?, + .sect = self.bss_section_index.?, + }; }, macho.S_THREAD_LOCAL_VARIABLES => { if (self.tlv_section_index == null) { - self.tlv_section_index = @intCast(u16, data_seg.sections.items.len); - try data_seg.addSection(self.base.allocator, "__thread_vars", .{ - .flags = macho.S_THREAD_LOCAL_VARIABLES, - }); + self.tlv_section_index = try self.allocateSection( + self.data_segment_cmd_index.?, + "__thread_vars", + sect.size, + sect.@"align", + .{ + .flags = macho.S_THREAD_LOCAL_VARIABLES, + }, + ); } break :blk .{ @@ -1282,10 +1337,15 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio }, macho.S_THREAD_LOCAL_REGULAR => { if (self.tlv_data_section_index == null) { - self.tlv_data_section_index = @intCast(u16, data_seg.sections.items.len); - try data_seg.addSection(self.base.allocator, "__thread_data", .{ - .flags = macho.S_THREAD_LOCAL_REGULAR, - }); + self.tlv_data_section_index = try self.allocateSection( + self.data_segment_cmd_index.?, + "__thread_data", + sect.size, + sect.@"align", + .{ + .flags = macho.S_THREAD_LOCAL_REGULAR, + }, + ); } break :blk .{ @@ -1295,10 +1355,15 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio }, macho.S_THREAD_LOCAL_ZEROFILL => { if (self.tlv_bss_section_index == null) { - self.tlv_bss_section_index = @intCast(u16, data_seg.sections.items.len); - try data_seg.addSection(self.base.allocator, "__thread_bss", .{ - .flags = macho.S_THREAD_LOCAL_ZEROFILL, - }); + self.tlv_bss_section_index = try self.allocateSection( + self.data_segment_cmd_index.?, + "__thread_bss", + sect.size, + sect.@"align", + .{ + .flags = macho.S_THREAD_LOCAL_ZEROFILL, + }, + ); } break :blk .{ @@ -1311,8 +1376,13 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio // TODO I believe __eh_frame is currently part of __unwind_info section // in the latest ld64 output. if (self.eh_frame_section_index == null) { - self.eh_frame_section_index = @intCast(u16, text_seg.sections.items.len); - try text_seg.addSection(self.base.allocator, "__eh_frame", .{}); + self.eh_frame_section_index = try self.allocateSection( + self.text_segment_cmd_index.?, + "__eh_frame", + sect.size, + sect.@"align", + .{}, + ); } break :blk .{ @@ -1323,8 +1393,13 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio // TODO audit this: is this the right mapping? if (self.data_const_section_index == null) { - self.data_const_section_index = @intCast(u16, data_const_seg.sections.items.len); - try data_const_seg.addSection(self.base.allocator, "__const", .{}); + self.data_const_section_index = try self.allocateSection( + self.data_const_segment_cmd_index.?, + "__const", + sect.size, + sect.@"align", + .{}, + ); } break :blk .{ @@ -1335,10 +1410,17 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio macho.S_REGULAR => { if (commands.sectionIsCode(sect)) { if (self.text_section_index == null) { - self.text_section_index = @intCast(u16, text_seg.sections.items.len); - try text_seg.addSection(self.base.allocator, "__text", .{ - .flags = macho.S_REGULAR | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS, - }); + self.text_section_index = try self.allocateSection( + self.text_segment_cmd_index.?, + "__text", + sect.size, + sect.@"align", + .{ + .flags = macho.S_REGULAR | + macho.S_ATTR_PURE_INSTRUCTIONS | + macho.S_ATTR_SOME_INSTRUCTIONS, + }, + ); } break :blk .{ @@ -1359,8 +1441,13 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio if (mem.eql(u8, segname, "__TEXT")) { if (mem.eql(u8, sectname, "__ustring")) { if (self.ustring_section_index == null) { - self.ustring_section_index = @intCast(u16, text_seg.sections.items.len); - try text_seg.addSection(self.base.allocator, "__ustring", .{}); + self.ustring_section_index = try self.allocateSection( + self.text_segment_cmd_index.?, + "__ustring", + sect.size, + sect.@"align", + .{}, + ); } break :blk .{ @@ -1369,8 +1456,13 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio }; } else if (mem.eql(u8, sectname, "__gcc_except_tab")) { if (self.gcc_except_tab_section_index == null) { - self.gcc_except_tab_section_index = @intCast(u16, text_seg.sections.items.len); - try text_seg.addSection(self.base.allocator, "__gcc_except_tab", .{}); + self.gcc_except_tab_section_index = try self.allocateSection( + self.text_segment_cmd_index.?, + "__gcc_except_tab", + sect.size, + sect.@"align", + .{}, + ); } break :blk .{ @@ -1379,8 +1471,13 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio }; } else if (mem.eql(u8, sectname, "__objc_methlist")) { if (self.objc_methlist_section_index == null) { - self.objc_methlist_section_index = @intCast(u16, text_seg.sections.items.len); - try text_seg.addSection(self.base.allocator, "__objc_methlist", .{}); + self.objc_methlist_section_index = try self.allocateSection( + self.text_segment_cmd_index.?, + "__objc_methlist", + sect.size, + sect.@"align", + .{}, + ); } break :blk .{ @@ -1394,8 +1491,13 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio mem.eql(u8, sectname, "__gopclntab")) { if (self.data_const_section_index == null) { - self.data_const_section_index = @intCast(u16, data_const_seg.sections.items.len); - try data_const_seg.addSection(self.base.allocator, "__const", .{}); + self.data_const_section_index = try self.allocateSection( + self.data_const_segment_cmd_index.?, + "__const", + sect.size, + sect.@"align", + .{}, + ); } break :blk .{ @@ -1404,8 +1506,13 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio }; } else { if (self.text_const_section_index == null) { - self.text_const_section_index = @intCast(u16, text_seg.sections.items.len); - try text_seg.addSection(self.base.allocator, "__const", .{}); + self.text_const_section_index = try self.allocateSection( + self.text_segment_cmd_index.?, + "__const", + sect.size, + sect.@"align", + .{}, + ); } break :blk .{ @@ -1417,8 +1524,13 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio if (mem.eql(u8, segname, "__DATA_CONST")) { if (self.data_const_section_index == null) { - self.data_const_section_index = @intCast(u16, data_const_seg.sections.items.len); - try data_const_seg.addSection(self.base.allocator, "__const", .{}); + self.data_const_section_index = try self.allocateSection( + self.data_const_segment_cmd_index.?, + "__const", + sect.size, + sect.@"align", + .{}, + ); } break :blk .{ @@ -1430,8 +1542,13 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio if (mem.eql(u8, segname, "__DATA")) { if (mem.eql(u8, sectname, "__const")) { if (self.data_const_section_index == null) { - self.data_const_section_index = @intCast(u16, data_const_seg.sections.items.len); - try data_const_seg.addSection(self.base.allocator, "__const", .{}); + self.data_const_section_index = try self.allocateSection( + self.data_const_segment_cmd_index.?, + "__const", + sect.size, + sect.@"align", + .{}, + ); } break :blk .{ @@ -1440,8 +1557,13 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio }; } else if (mem.eql(u8, sectname, "__cfstring")) { if (self.objc_cfstring_section_index == null) { - self.objc_cfstring_section_index = @intCast(u16, data_const_seg.sections.items.len); - try data_const_seg.addSection(self.base.allocator, "__cfstring", .{}); + self.objc_cfstring_section_index = try self.allocateSection( + self.data_const_segment_cmd_index.?, + "__cfstring", + sect.size, + sect.@"align", + .{}, + ); } break :blk .{ @@ -1450,8 +1572,13 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio }; } else if (mem.eql(u8, sectname, "__objc_classlist")) { if (self.objc_classlist_section_index == null) { - self.objc_classlist_section_index = @intCast(u16, data_const_seg.sections.items.len); - try data_const_seg.addSection(self.base.allocator, "__objc_classlist", .{}); + self.objc_classlist_section_index = try self.allocateSection( + self.data_const_segment_cmd_index.?, + "__objc_classlist", + sect.size, + sect.@"align", + .{}, + ); } break :blk .{ @@ -1460,8 +1587,13 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio }; } else if (mem.eql(u8, sectname, "__objc_imageinfo")) { if (self.objc_imageinfo_section_index == null) { - self.objc_imageinfo_section_index = @intCast(u16, data_const_seg.sections.items.len); - try data_const_seg.addSection(self.base.allocator, "__objc_imageinfo", .{}); + self.objc_imageinfo_section_index = try self.allocateSection( + self.data_const_segment_cmd_index.?, + "__objc_imageinfo", + sect.size, + sect.@"align", + .{}, + ); } break :blk .{ @@ -1470,8 +1602,13 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio }; } else if (mem.eql(u8, sectname, "__objc_const")) { if (self.objc_const_section_index == null) { - self.objc_const_section_index = @intCast(u16, data_seg.sections.items.len); - try data_seg.addSection(self.base.allocator, "__objc_const", .{}); + self.objc_const_section_index = try self.allocateSection( + self.data_segment_cmd_index.?, + "__objc_const", + sect.size, + sect.@"align", + .{}, + ); } break :blk .{ @@ -1480,8 +1617,13 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio }; } else if (mem.eql(u8, sectname, "__objc_classrefs")) { if (self.objc_classrefs_section_index == null) { - self.objc_classrefs_section_index = @intCast(u16, data_seg.sections.items.len); - try data_seg.addSection(self.base.allocator, "__objc_classrefs", .{}); + self.objc_classrefs_section_index = try self.allocateSection( + self.data_segment_cmd_index.?, + "__objc_classrefs", + sect.size, + sect.@"align", + .{}, + ); } break :blk .{ @@ -1490,8 +1632,13 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio }; } else if (mem.eql(u8, sectname, "__objc_data")) { if (self.objc_data_section_index == null) { - self.objc_data_section_index = @intCast(u16, data_seg.sections.items.len); - try data_seg.addSection(self.base.allocator, "__objc_data", .{}); + self.objc_data_section_index = try self.allocateSection( + self.data_segment_cmd_index.?, + "__objc_data", + sect.size, + sect.@"align", + .{}, + ); } break :blk .{ @@ -1500,8 +1647,13 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio }; } else { if (self.data_section_index == null) { - self.data_section_index = @intCast(u16, data_seg.sections.items.len); - try data_seg.addSection(self.base.allocator, "__data", .{}); + self.data_section_index = try self.allocateSection( + self.data_segment_cmd_index.?, + "__data", + sect.size, + sect.@"align", + .{}, + ); } break :blk .{ @@ -1522,563 +1674,605 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio else => break :blk null, } }; - - if (res) |match| { - _ = try self.section_ordinals.getOrPut(self.base.allocator, match); - } - return res; } -fn sortSections(self: *MachO) !void { - var text_index_mapping = std.AutoHashMap(u16, u16).init(self.base.allocator); - defer text_index_mapping.deinit(); - var data_const_index_mapping = std.AutoHashMap(u16, u16).init(self.base.allocator); - defer data_const_index_mapping.deinit(); - var data_index_mapping = std.AutoHashMap(u16, u16).init(self.base.allocator); - defer data_index_mapping.deinit(); +pub fn createEmptyAtom(self: *MachO, local_sym_index: u32, size: u64, alignment: u32) !*Atom { + const code = try self.base.allocator.alloc(u8, size); + defer self.base.allocator.free(code); + mem.set(u8, code, 0); - { - // __TEXT segment - const seg = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; - var sections = seg.sections.toOwnedSlice(self.base.allocator); - defer self.base.allocator.free(sections); - try seg.sections.ensureCapacity(self.base.allocator, sections.len); + const atom = try self.base.allocator.create(Atom); + errdefer self.base.allocator.destroy(atom); + atom.* = Atom.empty; + atom.local_sym_index = local_sym_index; + atom.size = size; + atom.alignment = alignment; + try atom.code.appendSlice(self.base.allocator, code); + try self.managed_atoms.append(self.base.allocator, atom); - const indices = &[_]*?u16{ - &self.text_section_index, - &self.stubs_section_index, - &self.stub_helper_section_index, - &self.gcc_except_tab_section_index, - &self.cstring_section_index, - &self.ustring_section_index, - &self.text_const_section_index, - &self.objc_methlist_section_index, - &self.objc_methname_section_index, - &self.objc_methtype_section_index, - &self.objc_classname_section_index, - &self.eh_frame_section_index, - }; - for (indices) |maybe_index| { - const new_index: u16 = if (maybe_index.*) |index| blk: { - const idx = @intCast(u16, seg.sections.items.len); - seg.sections.appendAssumeCapacity(sections[index]); - try text_index_mapping.putNoClobber(index, idx); - break :blk idx; - } else continue; - maybe_index.* = new_index; + return atom; +} + +pub fn writeAtom(self: *MachO, atom: *Atom, match: MatchingSection) !void { + const seg = self.load_commands.items[match.seg].Segment; + const sect = seg.sections.items[match.sect]; + const sym = self.locals.items[atom.local_sym_index]; + const file_offset = sect.offset + sym.n_value - sect.addr; + try atom.resolveRelocs(self); + log.debug("writing atom for symbol {s} at file offset 0x{x}", .{ self.getString(sym.n_strx), file_offset }); + try self.base.file.?.pwriteAll(atom.code.items, file_offset); +} + +fn allocateLocalSymbols(self: *MachO, match: MatchingSection, offset: i64) !void { + var atom = self.atoms.get(match) orelse return; + + while (true) { + const atom_sym = &self.locals.items[atom.local_sym_index]; + atom_sym.n_value = @intCast(u64, @intCast(i64, atom_sym.n_value) + offset); + + for (atom.aliases.items) |index| { + const alias_sym = &self.locals.items[index]; + alias_sym.n_value = @intCast(u64, @intCast(i64, alias_sym.n_value) + offset); } + + for (atom.contained.items) |sym_at_off| { + const contained_sym = &self.locals.items[sym_at_off.local_sym_index]; + contained_sym.n_value = @intCast(u64, @intCast(i64, contained_sym.n_value) + offset); + } + + if (atom.prev) |prev| { + atom = prev; + } else break; } +} - { - // __DATA_CONST segment - const seg = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; - var sections = seg.sections.toOwnedSlice(self.base.allocator); - defer self.base.allocator.free(sections); - try seg.sections.ensureCapacity(self.base.allocator, sections.len); +fn allocateGlobalSymbols(self: *MachO) !void { + var sym_it = self.symbol_resolver.valueIterator(); + while (sym_it.next()) |resolv| { + if (resolv.where != .global) continue; - const indices = &[_]*?u16{ - &self.got_section_index, - &self.mod_init_func_section_index, - &self.mod_term_func_section_index, - &self.data_const_section_index, - &self.objc_cfstring_section_index, - &self.objc_classlist_section_index, - &self.objc_imageinfo_section_index, - }; - for (indices) |maybe_index| { - const new_index: u16 = if (maybe_index.*) |index| blk: { - const idx = @intCast(u16, seg.sections.items.len); - seg.sections.appendAssumeCapacity(sections[index]); - try data_const_index_mapping.putNoClobber(index, idx); - break :blk idx; - } else continue; - maybe_index.* = new_index; - } + assert(resolv.local_sym_index != 0); + const local_sym = self.locals.items[resolv.local_sym_index]; + const sym = &self.globals.items[resolv.where_index]; + sym.n_value = local_sym.n_value; + sym.n_sect = local_sym.n_sect; + log.debug("allocating global symbol {s} at 0x{x}", .{ self.getString(sym.n_strx), local_sym.n_value }); } +} - { - // __DATA segment - const seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; - var sections = seg.sections.toOwnedSlice(self.base.allocator); - defer self.base.allocator.free(sections); - try seg.sections.ensureCapacity(self.base.allocator, sections.len); +fn writeAtoms(self: *MachO) !void { + var buffer = std.ArrayList(u8).init(self.base.allocator); + defer buffer.deinit(); + var file_offset: ?u64 = null; - // __DATA segment - const indices = &[_]*?u16{ - &self.la_symbol_ptr_section_index, - &self.objc_const_section_index, - &self.objc_selrefs_section_index, - &self.objc_classrefs_section_index, - &self.objc_data_section_index, - &self.data_section_index, - &self.tlv_section_index, - &self.tlv_data_section_index, - &self.tlv_bss_section_index, - &self.bss_section_index, - &self.common_section_index, - }; - for (indices) |maybe_index| { - const new_index: u16 = if (maybe_index.*) |index| blk: { - const idx = @intCast(u16, seg.sections.items.len); - seg.sections.appendAssumeCapacity(sections[index]); - try data_index_mapping.putNoClobber(index, idx); - break :blk idx; - } else continue; - maybe_index.* = new_index; - } - } + var it = self.atoms.iterator(); + while (it.next()) |entry| { + const match = entry.key_ptr.*; + const seg = self.load_commands.items[match.seg].Segment; + const sect = seg.sections.items[match.sect]; + var atom: *Atom = entry.value_ptr.*; - { - var transient: std.AutoHashMapUnmanaged(MatchingSection, *TextBlock) = .{}; - try transient.ensureCapacity(self.base.allocator, self.blocks.count()); + log.debug("writing atoms in {s},{s}", .{ commands.segmentName(sect), commands.sectionName(sect) }); - var it = self.blocks.iterator(); - while (it.next()) |entry| { - const old = entry.key_ptr.*; - const sect = if (old.seg == self.text_segment_cmd_index.?) - text_index_mapping.get(old.sect).? - else if (old.seg == self.data_const_segment_cmd_index.?) - data_const_index_mapping.get(old.sect).? - else - data_index_mapping.get(old.sect).?; - transient.putAssumeCapacityNoClobber(.{ - .seg = old.seg, - .sect = sect, - }, entry.value_ptr.*); + while (atom.prev) |prev| { + atom = prev; } - self.blocks.clearAndFree(self.base.allocator); - self.blocks.deinit(self.base.allocator); - self.blocks = transient; - } + while (true) { + if (atom.dirty) { + const atom_sym = self.locals.items[atom.local_sym_index]; + const padding_size: u64 = if (atom.next) |next| blk: { + const next_sym = self.locals.items[next.local_sym_index]; + break :blk next_sym.n_value - (atom_sym.n_value + atom.size); + } else 0; - { - // Create new section ordinals. - self.section_ordinals.clearRetainingCapacity(); - const text_seg = self.load_commands.items[self.text_segment_cmd_index.?].Segment; - for (text_seg.sections.items) |_, sect_id| { - const res = self.section_ordinals.getOrPutAssumeCapacity(.{ - .seg = self.text_segment_cmd_index.?, - .sect = @intCast(u16, sect_id), - }); - assert(!res.found_existing); - } - const data_const_seg = self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; - for (data_const_seg.sections.items) |_, sect_id| { - const res = self.section_ordinals.getOrPutAssumeCapacity(.{ - .seg = self.data_const_segment_cmd_index.?, - .sect = @intCast(u16, sect_id), - }); - assert(!res.found_existing); - } - const data_seg = self.load_commands.items[self.data_segment_cmd_index.?].Segment; - for (data_seg.sections.items) |_, sect_id| { - const res = self.section_ordinals.getOrPutAssumeCapacity(.{ - .seg = self.data_segment_cmd_index.?, - .sect = @intCast(u16, sect_id), - }); - assert(!res.found_existing); + log.debug(" (adding atom {s} to buffer: {})", .{ self.getString(atom_sym.n_strx), atom_sym }); + + try atom.resolveRelocs(self); + try buffer.appendSlice(atom.code.items); + try buffer.ensureUnusedCapacity(padding_size); + + var i: usize = 0; + while (i < padding_size) : (i += 1) { + buffer.appendAssumeCapacity(0); + } + + if (file_offset == null) { + file_offset = sect.offset + atom_sym.n_value - sect.addr; + } + atom.dirty = false; + } else { + if (file_offset) |off| { + try self.base.file.?.pwriteAll(buffer.items, off); + } + file_offset = null; + buffer.clearRetainingCapacity(); + } + + if (atom.next) |next| { + atom = next; + } else { + if (file_offset) |off| { + try self.base.file.?.pwriteAll(buffer.items, off); + } + file_offset = null; + buffer.clearRetainingCapacity(); + break; + } } } } -fn allocateTextSegment(self: *MachO) !void { - const seg = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; - const nstubs = @intCast(u32, self.stubs.items.len); +pub fn createGotAtom(self: *MachO, key: GotIndirectionKey) !*Atom { + const local_sym_index = @intCast(u32, self.locals.items.len); + try self.locals.append(self.base.allocator, .{ + .n_strx = try self.makeString("l_zld_got_entry"), + .n_type = macho.N_SECT, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }); + const atom = try self.createEmptyAtom(local_sym_index, @sizeOf(u64), 3); + switch (key.where) { + .local => { + try atom.relocs.append(self.base.allocator, .{ + .offset = 0, + .where = .local, + .where_index = key.where_index, + .payload = .{ + .unsigned = .{ + .subtractor = null, + .addend = 0, + .is_64bit = true, + }, + }, + }); + try atom.rebases.append(self.base.allocator, 0); + }, + .undef => { + try atom.bindings.append(self.base.allocator, .{ + .local_sym_index = key.where_index, + .offset = 0, + }); + }, + } + return atom; +} - const base_vmaddr = self.load_commands.items[self.pagezero_segment_cmd_index.?].Segment.inner.vmsize; - seg.inner.fileoff = 0; - seg.inner.vmaddr = base_vmaddr; +fn createDyldPrivateAtom(self: *MachO) !void { + if (self.dyld_private_atom != null) return; + const local_sym_index = @intCast(u32, self.locals.items.len); + const sym = try self.locals.addOne(self.base.allocator); + sym.* = .{ + .n_strx = try self.makeString("l_zld_dyld_private"), + .n_type = macho.N_SECT, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }; + const atom = try self.createEmptyAtom(local_sym_index, @sizeOf(u64), 3); + self.dyld_private_atom = atom; + const match = MatchingSection{ + .seg = self.data_segment_cmd_index.?, + .sect = self.data_section_index.?, + }; + const vaddr = try self.allocateAtom(atom, @sizeOf(u64), 8, match); + sym.n_value = vaddr; + sym.n_sect = @intCast(u8, self.section_ordinals.getIndex(match).? + 1); + log.debug("allocated {s} atom at 0x{x}", .{ self.getString(sym.n_strx), vaddr }); +} - // Set stubs and stub_helper sizes - const stubs = &seg.sections.items[self.stubs_section_index.?]; - const stub_helper = &seg.sections.items[self.stub_helper_section_index.?]; - stubs.size += nstubs * stubs.reserved2; +fn createStubHelperPreambleAtom(self: *MachO) !void { + if (self.stub_helper_preamble_atom != null) return; + const arch = self.base.options.target.cpu.arch; + const size: u64 = switch (arch) { + .x86_64 => 15, + .aarch64 => 6 * @sizeOf(u32), + else => unreachable, + }; + const alignment: u32 = switch (arch) { + .x86_64 => 0, + .aarch64 => 2, + else => unreachable, + }; + const local_sym_index = @intCast(u32, self.locals.items.len); + const sym = try self.locals.addOne(self.base.allocator); + sym.* = .{ + .n_strx = try self.makeString("l_zld_stub_preamble"), + .n_type = macho.N_SECT, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }; + const atom = try self.createEmptyAtom(local_sym_index, size, alignment); + const dyld_private_sym_index = self.dyld_private_atom.?.local_sym_index; + switch (arch) { + .x86_64 => { + try atom.relocs.ensureUnusedCapacity(self.base.allocator, 2); + // lea %r11, [rip + disp] + atom.code.items[0] = 0x4c; + atom.code.items[1] = 0x8d; + atom.code.items[2] = 0x1d; + atom.relocs.appendAssumeCapacity(.{ + .offset = 3, + .where = .local, + .where_index = dyld_private_sym_index, + .payload = .{ + .signed = .{ + .addend = 0, + .correction = 0, + }, + }, + }); + // push %r11 + atom.code.items[7] = 0x41; + atom.code.items[8] = 0x53; + // jmp [rip + disp] + atom.code.items[9] = 0xff; + atom.code.items[10] = 0x25; + atom.relocs.appendAssumeCapacity(.{ + .offset = 11, + .where = .undef, + .where_index = self.dyld_stub_binder_index.?, + .payload = .{ + .load = .{ + .kind = .got, + .addend = 0, + }, + }, + }); + }, + .aarch64 => { + try atom.relocs.ensureUnusedCapacity(self.base.allocator, 4); + // adrp x17, 0 + mem.writeIntLittle(u32, atom.code.items[0..][0..4], aarch64.Instruction.adrp(.x17, 0).toU32()); + atom.relocs.appendAssumeCapacity(.{ + .offset = 0, + .where = .local, + .where_index = dyld_private_sym_index, + .payload = .{ + .page = .{ + .kind = .page, + .addend = 0, + }, + }, + }); + // add x17, x17, 0 + mem.writeIntLittle(u32, atom.code.items[4..][0..4], aarch64.Instruction.add(.x17, .x17, 0, false).toU32()); + atom.relocs.appendAssumeCapacity(.{ + .offset = 4, + .where = .local, + .where_index = dyld_private_sym_index, + .payload = .{ + .page_off = .{ + .kind = .page, + .addend = 0, + .op_kind = .arithmetic, + }, + }, + }); + // stp x16, x17, [sp, #-16]! + mem.writeIntLittle(u32, atom.code.items[8..][0..4], aarch64.Instruction.stp( + .x16, + .x17, + aarch64.Register.sp, + aarch64.Instruction.LoadStorePairOffset.pre_index(-16), + ).toU32()); + // adrp x16, 0 + mem.writeIntLittle(u32, atom.code.items[12..][0..4], aarch64.Instruction.adrp(.x16, 0).toU32()); + atom.relocs.appendAssumeCapacity(.{ + .offset = 12, + .where = .undef, + .where_index = self.dyld_stub_binder_index.?, + .payload = .{ + .page = .{ + .kind = .got, + .addend = 0, + }, + }, + }); + // ldr x16, [x16, 0] + mem.writeIntLittle(u32, atom.code.items[16..][0..4], aarch64.Instruction.ldr(.x16, .{ + .register = .{ + .rn = .x16, + .offset = aarch64.Instruction.LoadStoreOffset.imm(0), + }, + }).toU32()); + atom.relocs.appendAssumeCapacity(.{ + .offset = 16, + .where = .undef, + .where_index = self.dyld_stub_binder_index.?, + .payload = .{ + .page_off = .{ + .kind = .got, + .addend = 0, + }, + }, + }); + // br x16 + mem.writeIntLittle(u32, atom.code.items[20..][0..4], aarch64.Instruction.br(.x16).toU32()); + }, + else => unreachable, + } + self.stub_helper_preamble_atom = atom; + const match = MatchingSection{ + .seg = self.text_segment_cmd_index.?, + .sect = self.stub_helper_section_index.?, + }; + const alignment_pow_2 = try math.powi(u32, 2, atom.alignment); + const vaddr = try self.allocateAtom(atom, atom.size, alignment_pow_2, match); + sym.n_value = vaddr; + sym.n_sect = @intCast(u8, self.section_ordinals.getIndex(match).? + 1); + log.debug("allocated {s} atom at 0x{x}", .{ self.getString(sym.n_strx), vaddr }); +} - const stub_size: u4 = switch (self.base.options.target.cpu.arch) { +pub fn createStubHelperAtom(self: *MachO) !*Atom { + const arch = self.base.options.target.cpu.arch; + const stub_size: u4 = switch (arch) { .x86_64 => 10, .aarch64 => 3 * @sizeOf(u32), else => unreachable, }; - stub_helper.size += nstubs * stub_size; - - var sizeofcmds: u64 = 0; - for (self.load_commands.items) |lc| { - sizeofcmds += lc.cmdsize(); - } - - try self.allocateSegment(self.text_segment_cmd_index.?, @sizeOf(macho.mach_header_64) + sizeofcmds); - - // Shift all sections to the back to minimize jump size between __TEXT and __DATA segments. - var min_alignment: u32 = 0; - for (seg.sections.items) |sect| { - const alignment = try math.powi(u32, 2, sect.@"align"); - min_alignment = math.max(min_alignment, alignment); - } - - assert(min_alignment > 0); - const last_sect_idx = seg.sections.items.len - 1; - const last_sect = seg.sections.items[last_sect_idx]; - const shift: u32 = blk: { - const diff = seg.inner.filesize - last_sect.offset - last_sect.size; - const factor = @divTrunc(diff, min_alignment); - break :blk @intCast(u32, factor * min_alignment); + const alignment: u2 = switch (arch) { + .x86_64 => 0, + .aarch64 => 2, + else => unreachable, }; + const local_sym_index = @intCast(u32, self.locals.items.len); + try self.locals.append(self.base.allocator, .{ + .n_strx = try self.makeString("l_zld_stub_in_stub_helper"), + .n_type = macho.N_SECT, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }); + const atom = try self.createEmptyAtom(local_sym_index, stub_size, alignment); + try atom.relocs.ensureTotalCapacity(self.base.allocator, 1); - if (shift > 0) { - for (seg.sections.items) |*sect| { - sect.offset += shift; - sect.addr += shift; - } - } -} - -fn allocateDataConstSegment(self: *MachO) !void { - const seg = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; - const nentries = @intCast(u32, self.got_entries.items.len); - - const text_seg = self.load_commands.items[self.text_segment_cmd_index.?].Segment; - seg.inner.fileoff = text_seg.inner.fileoff + text_seg.inner.filesize; - seg.inner.vmaddr = text_seg.inner.vmaddr + text_seg.inner.vmsize; - - // Set got size - const got = &seg.sections.items[self.got_section_index.?]; - got.size += nentries * @sizeOf(u64); - - try self.allocateSegment(self.data_const_segment_cmd_index.?, 0); -} - -fn allocateDataSegment(self: *MachO) !void { - const seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; - const nstubs = @intCast(u32, self.stubs.items.len); - - const data_const_seg = self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; - seg.inner.fileoff = data_const_seg.inner.fileoff + data_const_seg.inner.filesize; - seg.inner.vmaddr = data_const_seg.inner.vmaddr + data_const_seg.inner.vmsize; - - // Set la_symbol_ptr and data size - const la_symbol_ptr = &seg.sections.items[self.la_symbol_ptr_section_index.?]; - const data = &seg.sections.items[self.data_section_index.?]; - la_symbol_ptr.size += nstubs * @sizeOf(u64); - data.size += @sizeOf(u64); // We need at least 8bytes for address of dyld_stub_binder - - try self.allocateSegment(self.data_segment_cmd_index.?, 0); -} - -fn allocateLinkeditSegment(self: *MachO) void { - const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; - const data_seg = self.load_commands.items[self.data_segment_cmd_index.?].Segment; - seg.inner.fileoff = data_seg.inner.fileoff + data_seg.inner.filesize; - seg.inner.vmaddr = data_seg.inner.vmaddr + data_seg.inner.vmsize; -} - -fn allocateSegment(self: *MachO, index: u16, offset: u64) !void { - const seg = &self.load_commands.items[index].Segment; - - // Allocate the sections according to their alignment at the beginning of the segment. - var start: u64 = offset; - for (seg.sections.items) |*sect| { - const alignment = try math.powi(u32, 2, sect.@"align"); - const start_aligned = mem.alignForwardGeneric(u64, start, alignment); - const end_aligned = mem.alignForwardGeneric(u64, start_aligned + sect.size, alignment); - sect.offset = @intCast(u32, seg.inner.fileoff + start_aligned); - sect.addr = seg.inner.vmaddr + start_aligned; - start = end_aligned; - } - - const seg_size_aligned = mem.alignForwardGeneric(u64, start, self.page_size); - seg.inner.filesize = seg_size_aligned; - seg.inner.vmsize = seg_size_aligned; -} - -fn allocateTextBlocks(self: *MachO) !void { - var it = self.blocks.iterator(); - while (it.next()) |entry| { - const match = entry.key_ptr.*; - var block: *TextBlock = entry.value_ptr.*; - - // Find the first block - while (block.prev) |prev| { - block = prev; - } - - const seg = self.load_commands.items[match.seg].Segment; - const sect = seg.sections.items[match.sect]; - - var base_addr: u64 = sect.addr; - const n_sect = @intCast(u8, self.section_ordinals.getIndex(match).? + 1); - - log.debug(" within section {s},{s}", .{ commands.segmentName(sect), commands.sectionName(sect) }); - log.debug(" {}", .{sect}); - - while (true) { - const block_alignment = try math.powi(u32, 2, block.alignment); - base_addr = mem.alignForwardGeneric(u64, base_addr, block_alignment); - - const sym = &self.locals.items[block.local_sym_index]; - sym.n_value = base_addr; - sym.n_sect = n_sect; - - log.debug(" {s}: start=0x{x}, end=0x{x}, size={}, align={}", .{ - self.getString(sym.n_strx), - base_addr, - base_addr + block.size, - block.size, - block.alignment, + switch (arch) { + .x86_64 => { + // pushq + atom.code.items[0] = 0x68; + // Next 4 bytes 1..4 are just a placeholder populated in `populateLazyBindOffsetsInStubHelper`. + // jmpq + atom.code.items[5] = 0xe9; + atom.relocs.appendAssumeCapacity(.{ + .offset = 6, + .where = .local, + .where_index = self.stub_helper_preamble_atom.?.local_sym_index, + .payload = .{ + .branch = .{ .arch = arch }, + }, }); - - // Update each alias (if any) - for (block.aliases.items) |index| { - const alias_sym = &self.locals.items[index]; - alias_sym.n_value = base_addr; - alias_sym.n_sect = n_sect; - } - - // Update each symbol contained within the TextBlock - for (block.contained.items) |sym_at_off| { - const contained_sym = &self.locals.items[sym_at_off.local_sym_index]; - contained_sym.n_value = base_addr + sym_at_off.offset; - contained_sym.n_sect = n_sect; - } - - base_addr += block.size; - - if (block.next) |next| { - block = next; - } else break; - } + }, + .aarch64 => { + const literal = blk: { + const div_res = try math.divExact(u64, stub_size - @sizeOf(u32), 4); + break :blk try math.cast(u18, div_res); + }; + // ldr w16, literal + mem.writeIntLittle(u32, atom.code.items[0..4], aarch64.Instruction.ldr(.w16, .{ + .literal = literal, + }).toU32()); + // b disp + mem.writeIntLittle(u32, atom.code.items[4..8], aarch64.Instruction.b(0).toU32()); + atom.relocs.appendAssumeCapacity(.{ + .offset = 4, + .where = .local, + .where_index = self.stub_helper_preamble_atom.?.local_sym_index, + .payload = .{ + .branch = .{ .arch = arch }, + }, + }); + // Next 4 bytes 8..12 are just a placeholder populated in `populateLazyBindOffsetsInStubHelper`. + }, + else => unreachable, } - // Update globals - { - var sym_it = self.symbol_resolver.valueIterator(); - while (sym_it.next()) |resolv| { - if (resolv.where != .global) continue; - - assert(resolv.local_sym_index != 0); - const local_sym = self.locals.items[resolv.local_sym_index]; - const sym = &self.globals.items[resolv.where_index]; - sym.n_value = local_sym.n_value; - sym.n_sect = local_sym.n_sect; - } - } + return atom; } -fn writeTextBlocks(self: *MachO) !void { - var it = self.blocks.iterator(); - while (it.next()) |entry| { - const match = entry.key_ptr.*; - var block: *TextBlock = entry.value_ptr.*; - - while (block.prev) |prev| { - block = prev; - } - - const seg = self.load_commands.items[match.seg].Segment; - const sect = seg.sections.items[match.sect]; - const sect_type = commands.sectionType(sect); - - log.debug(" for section {s},{s}", .{ commands.segmentName(sect), commands.sectionName(sect) }); - log.debug(" {}", .{sect}); - - var code = try self.base.allocator.alloc(u8, sect.size); - defer self.base.allocator.free(code); - - if (sect_type == macho.S_ZEROFILL or sect_type == macho.S_THREAD_LOCAL_ZEROFILL) { - mem.set(u8, code, 0); - } else { - var base_off: u64 = 0; - - while (true) { - const block_alignment = try math.powi(u32, 2, block.alignment); - const aligned_base_off = mem.alignForwardGeneric(u64, base_off, block_alignment); - - const sym = self.locals.items[block.local_sym_index]; - log.debug(" {s}: start=0x{x}, end=0x{x}, size={}, align={}", .{ - self.getString(sym.n_strx), - aligned_base_off, - aligned_base_off + block.size, - block.size, - block.alignment, - }); - - try block.resolveRelocs(self); - mem.copy(u8, code[aligned_base_off..][0..block.size], block.code.items); - - // TODO NOP for machine code instead of just zeroing out - const padding_len = aligned_base_off - base_off; - mem.set(u8, code[base_off..][0..padding_len], 0); - - base_off = aligned_base_off + block.size; - - if (block.next) |next| { - block = next; - } else break; - } - - mem.set(u8, code[base_off..], 0); - } - - try self.base.file.?.pwriteAll(code, sect.offset); - } +pub fn createLazyPointerAtom(self: *MachO, stub_sym_index: u32, lazy_binding_sym_index: u32) !*Atom { + const local_sym_index = @intCast(u32, self.locals.items.len); + try self.locals.append(self.base.allocator, .{ + .n_strx = try self.makeString("l_zld_lazy_ptr"), + .n_type = macho.N_SECT, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }); + const atom = try self.createEmptyAtom(local_sym_index, @sizeOf(u64), 3); + try atom.relocs.append(self.base.allocator, .{ + .offset = 0, + .where = .local, + .where_index = stub_sym_index, + .payload = .{ + .unsigned = .{ + .subtractor = null, + .addend = 0, + .is_64bit = true, + }, + }, + }); + try atom.rebases.append(self.base.allocator, 0); + try atom.lazy_bindings.append(self.base.allocator, .{ + .local_sym_index = lazy_binding_sym_index, + .offset = 0, + }); + return atom; } -fn writeStubHelperCommon(self: *MachO) !void { - const text_segment = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; - const stub_helper = &text_segment.sections.items[self.stub_helper_section_index.?]; - const data_const_segment = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; - const got = &data_const_segment.sections.items[self.got_section_index.?]; - const data_segment = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; - const data = &data_segment.sections.items[self.data_section_index.?]; - - self.stub_helper_stubs_start_off = blk: { - switch (self.base.options.target.cpu.arch) { - .x86_64 => { - const code_size = 15; - var code: [code_size]u8 = undefined; - // lea %r11, [rip + disp] - code[0] = 0x4c; - code[1] = 0x8d; - code[2] = 0x1d; - { - const target_addr = data.addr + data.size - @sizeOf(u64); - const displacement = try math.cast(u32, target_addr - stub_helper.addr - 7); - mem.writeIntLittle(u32, code[3..7], displacement); - } - // push %r11 - code[7] = 0x41; - code[8] = 0x53; - // jmp [rip + disp] - code[9] = 0xff; - code[10] = 0x25; - { - const n_strx = self.strtab_dir.getKeyAdapted(@as([]const u8, "dyld_stub_binder"), StringIndexAdapter{ - .bytes = &self.strtab, - }) orelse unreachable; - const resolv = self.symbol_resolver.get(n_strx) orelse unreachable; - const got_index = self.got_entries_map.get(.{ - .where = .undef, - .where_index = resolv.where_index, - }) orelse unreachable; - const addr = got.addr + got_index * @sizeOf(u64); - const displacement = try math.cast(u32, addr - stub_helper.addr - code_size); - mem.writeIntLittle(u32, code[11..], displacement); - } - try self.base.file.?.pwriteAll(&code, stub_helper.offset); - break :blk stub_helper.offset + code_size; - }, - .aarch64 => { - var code: [6 * @sizeOf(u32)]u8 = undefined; - data_blk_outer: { - const this_addr = stub_helper.addr; - const target_addr = data.addr + data.size - @sizeOf(u64); - data_blk: { - const displacement = math.cast(i21, target_addr - this_addr) catch break :data_blk; - // adr x17, disp - mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.adr(.x17, displacement).toU32()); - // nop - mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.nop().toU32()); - break :data_blk_outer; - } - data_blk: { - const new_this_addr = this_addr + @sizeOf(u32); - const displacement = math.cast(i21, target_addr - new_this_addr) catch break :data_blk; - // nop - mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.nop().toU32()); - // adr x17, disp - mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.adr(.x17, displacement).toU32()); - break :data_blk_outer; - } - // Jump is too big, replace adr with adrp and add. - const this_page = @intCast(i32, this_addr >> 12); - const target_page = @intCast(i32, target_addr >> 12); - const pages = @intCast(i21, target_page - this_page); - mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.adrp(.x17, pages).toU32()); - const narrowed = @truncate(u12, target_addr); - mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.add(.x17, .x17, narrowed, false).toU32()); - } - // stp x16, x17, [sp, #-16]! - code[8] = 0xf0; - code[9] = 0x47; - code[10] = 0xbf; - code[11] = 0xa9; - binder_blk_outer: { - const n_strx = self.strtab_dir.getKeyAdapted(@as([]const u8, "dyld_stub_binder"), StringIndexAdapter{ - .bytes = &self.strtab, - }) orelse unreachable; - const resolv = self.symbol_resolver.get(n_strx) orelse unreachable; - const got_index = self.got_entries_map.get(.{ - .where = .undef, - .where_index = resolv.where_index, - }) orelse unreachable; - const this_addr = stub_helper.addr + 3 * @sizeOf(u32); - const target_addr = got.addr + got_index * @sizeOf(u64); - binder_blk: { - const displacement = math.divExact(u64, target_addr - this_addr, 4) catch break :binder_blk; - const literal = math.cast(u18, displacement) catch break :binder_blk; - // ldr x16, label - mem.writeIntLittle(u32, code[12..16], aarch64.Instruction.ldr(.x16, .{ - .literal = literal, - }).toU32()); - // nop - mem.writeIntLittle(u32, code[16..20], aarch64.Instruction.nop().toU32()); - break :binder_blk_outer; - } - binder_blk: { - const new_this_addr = this_addr + @sizeOf(u32); - const displacement = math.divExact(u64, target_addr - new_this_addr, 4) catch break :binder_blk; - const literal = math.cast(u18, displacement) catch break :binder_blk; - // Pad with nop to please division. - // nop - mem.writeIntLittle(u32, code[12..16], aarch64.Instruction.nop().toU32()); - // ldr x16, label - mem.writeIntLittle(u32, code[16..20], aarch64.Instruction.ldr(.x16, .{ - .literal = literal, - }).toU32()); - break :binder_blk_outer; - } - // Use adrp followed by ldr(immediate). - const this_page = @intCast(i32, this_addr >> 12); - const target_page = @intCast(i32, target_addr >> 12); - const pages = @intCast(i21, target_page - this_page); - mem.writeIntLittle(u32, code[12..16], aarch64.Instruction.adrp(.x16, pages).toU32()); - const narrowed = @truncate(u12, target_addr); - const offset = try math.divExact(u12, narrowed, 8); - mem.writeIntLittle(u32, code[16..20], aarch64.Instruction.ldr(.x16, .{ - .register = .{ - .rn = .x16, - .offset = aarch64.Instruction.LoadStoreOffset.imm(offset), - }, - }).toU32()); - } - // br x16 - code[20] = 0x00; - code[21] = 0x02; - code[22] = 0x1f; - code[23] = 0xd6; - try self.base.file.?.pwriteAll(&code, stub_helper.offset); - break :blk stub_helper.offset + 6 * @sizeOf(u32); - }, - else => unreachable, - } +pub fn createStubAtom(self: *MachO, laptr_sym_index: u32) !*Atom { + const arch = self.base.options.target.cpu.arch; + const alignment: u2 = switch (arch) { + .x86_64 => 0, + .aarch64 => 2, + else => unreachable, // unhandled architecture type }; + const stub_size: u4 = switch (arch) { + .x86_64 => 6, + .aarch64 => 3 * @sizeOf(u32), + else => unreachable, // unhandled architecture type + }; + const local_sym_index = @intCast(u32, self.locals.items.len); + try self.locals.append(self.base.allocator, .{ + .n_strx = try self.makeString("l_zld_stub"), + .n_type = macho.N_SECT, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }); + const atom = try self.createEmptyAtom(local_sym_index, stub_size, alignment); + switch (arch) { + .x86_64 => { + // jmp + atom.code.items[0] = 0xff; + atom.code.items[1] = 0x25; + try atom.relocs.append(self.base.allocator, .{ + .offset = 2, + .where = .local, + .where_index = laptr_sym_index, + .payload = .{ + .branch = .{ .arch = arch }, + }, + }); + }, + .aarch64 => { + try atom.relocs.ensureTotalCapacity(self.base.allocator, 2); + // adrp x16, pages + mem.writeIntLittle(u32, atom.code.items[0..4], aarch64.Instruction.adrp(.x16, 0).toU32()); + atom.relocs.appendAssumeCapacity(.{ + .offset = 0, + .where = .local, + .where_index = laptr_sym_index, + .payload = .{ + .page = .{ + .kind = .page, + .addend = 0, + }, + }, + }); + // ldr x16, x16, offset + mem.writeIntLittle(u32, atom.code.items[4..8], aarch64.Instruction.ldr(.x16, .{ + .register = .{ + .rn = .x16, + .offset = aarch64.Instruction.LoadStoreOffset.imm(0), + }, + }).toU32()); + atom.relocs.appendAssumeCapacity(.{ + .offset = 4, + .where = .local, + .where_index = laptr_sym_index, + .payload = .{ + .page_off = .{ + .kind = .page, + .addend = 0, + .op_kind = .load, + }, + }, + }); + // br x16 + mem.writeIntLittle(u32, atom.code.items[8..12], aarch64.Instruction.br(.x16).toU32()); + }, + else => unreachable, + } + return atom; +} - for (self.stubs.items) |_, i| { - const index = @intCast(u32, i); - // TODO weak bound pointers - try self.writeLazySymbolPointer(index); - try self.writeStub(index); - try self.writeStubInStubHelper(index); +fn createTentativeDefAtoms(self: *MachO) !void { + if (self.tentatives.count() == 0) return; + // Convert any tentative definition into a regular symbol and allocate + // text blocks for each tentative defintion. + while (self.tentatives.popOrNull()) |entry| { + const match = MatchingSection{ + .seg = self.data_segment_cmd_index.?, + .sect = self.bss_section_index.?, + }; + _ = try self.section_ordinals.getOrPut(self.base.allocator, match); + + const global_sym = &self.globals.items[entry.key]; + const size = global_sym.n_value; + const alignment = (global_sym.n_desc >> 8) & 0x0f; + + global_sym.n_value = 0; + global_sym.n_desc = 0; + global_sym.n_sect = @intCast(u8, self.section_ordinals.getIndex(match).? + 1); + + const local_sym_index = @intCast(u32, self.locals.items.len); + const local_sym = try self.locals.addOne(self.base.allocator); + local_sym.* = .{ + .n_strx = global_sym.n_strx, + .n_type = macho.N_SECT, + .n_sect = global_sym.n_sect, + .n_desc = 0, + .n_value = 0, + }; + + const resolv = self.symbol_resolver.getPtr(local_sym.n_strx) orelse unreachable; + resolv.local_sym_index = local_sym_index; + + const atom = try self.createEmptyAtom(local_sym_index, size, alignment); + const alignment_pow_2 = try math.powi(u32, 2, alignment); + const vaddr = try self.allocateAtom(atom, size, alignment_pow_2, match); + local_sym.n_value = vaddr; + global_sym.n_value = vaddr; } } -fn resolveSymbolsInObject( - self: *MachO, - object_id: u16, - tentatives: *std.AutoArrayHashMap(u32, void), - unresolved: *std.AutoArrayHashMap(u32, void), -) !void { +fn createDsoHandleAtom(self: *MachO) !void { + if (self.strtab_dir.getKeyAdapted(@as([]const u8, "___dso_handle"), StringIndexAdapter{ + .bytes = &self.strtab, + })) |n_strx| blk: { + const resolv = self.symbol_resolver.getPtr(n_strx) orelse break :blk; + if (resolv.where != .undef) break :blk; + + const undef = &self.undefs.items[resolv.where_index]; + const match: MatchingSection = .{ + .seg = self.text_segment_cmd_index.?, + .sect = self.text_section_index.?, + }; + const local_sym_index = @intCast(u32, self.locals.items.len); + var nlist = macho.nlist_64{ + .n_strx = undef.n_strx, + .n_type = macho.N_SECT, + .n_sect = @intCast(u8, self.section_ordinals.getIndex(match).? + 1), + .n_desc = 0, + .n_value = 0, + }; + try self.locals.append(self.base.allocator, nlist); + const global_sym_index = @intCast(u32, self.globals.items.len); + nlist.n_type |= macho.N_EXT; + nlist.n_desc = macho.N_WEAK_DEF; + try self.globals.append(self.base.allocator, nlist); + + _ = self.unresolved.fetchSwapRemove(resolv.where_index); + + undef.* = .{ + .n_strx = 0, + .n_type = macho.N_UNDF, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }; + resolv.* = .{ + .where = .global, + .where_index = global_sym_index, + .local_sym_index = local_sym_index, + }; + + // We create an empty atom for this symbol. + // TODO perhaps we should special-case special symbols? Create a separate + // linked list of atoms? + const atom = try self.createEmptyAtom(local_sym_index, 0, 0); + const sym = &self.locals.items[local_sym_index]; + const vaddr = try self.allocateAtom(atom, 0, 1, match); + sym.n_value = vaddr; + atom.dirty = false; // We don't really want to write it to file. + } +} + +fn resolveSymbolsInObject(self: *MachO, object_id: u16) !void { const object = &self.objects.items[object_id]; log.debug("resolving symbols in '{s}'", .{object.name}); @@ -2150,7 +2344,7 @@ fn resolveSymbolsInObject( const global = &self.globals.items[resolv.where_index]; if (symbolIsTentative(global.*)) { - _ = tentatives.fetchSwapRemove(resolv.where_index); + _ = self.tentatives.fetchSwapRemove(resolv.where_index); } else if (!(symbolIsWeakDef(sym) or symbolIsPext(sym)) and !(symbolIsWeakDef(global.*) or symbolIsPext(global.*))) { @@ -2168,15 +2362,7 @@ fn resolveSymbolsInObject( continue; }, .undef => { - const undef = &self.undefs.items[resolv.where_index]; - undef.* = .{ - .n_strx = 0, - .n_type = macho.N_UNDF, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }; - _ = unresolved.fetchSwapRemove(resolv.where_index); + _ = self.unresolved.fetchSwapRemove(resolv.where_index); }, } @@ -2210,7 +2396,7 @@ fn resolveSymbolsInObject( .where_index = global_sym_index, .file = object_id, }); - _ = try tentatives.getOrPut(global_sym_index); + _ = try self.tentatives.getOrPut(self.base.allocator, global_sym_index); continue; }; @@ -2234,7 +2420,7 @@ fn resolveSymbolsInObject( .n_desc = sym.n_desc, .n_value = sym.n_value, }); - _ = try tentatives.getOrPut(global_sym_index); + _ = try self.tentatives.getOrPut(self.base.allocator, global_sym_index); resolv.* = .{ .where = .global, .where_index = global_sym_index, @@ -2247,7 +2433,7 @@ fn resolveSymbolsInObject( .n_desc = 0, .n_value = 0, }; - _ = unresolved.fetchSwapRemove(resolv.where_index); + _ = self.unresolved.fetchSwapRemove(resolv.where_index); }, } } else { @@ -2267,27 +2453,17 @@ fn resolveSymbolsInObject( .where_index = undef_sym_index, .file = object_id, }); - _ = try unresolved.getOrPut(undef_sym_index); + try self.unresolved.putNoClobber(self.base.allocator, undef_sym_index, .none); } } } -fn resolveSymbols(self: *MachO) !void { - var tentatives = std.AutoArrayHashMap(u32, void).init(self.base.allocator); - defer tentatives.deinit(); +fn resolveSymbolsInArchives(self: *MachO) !void { + if (self.archives.items.len == 0) return; - var unresolved = std.AutoArrayHashMap(u32, void).init(self.base.allocator); - defer unresolved.deinit(); - - // First pass, resolve symbols in provided objects. - for (self.objects.items) |_, object_id| { - try self.resolveSymbolsInObject(@intCast(u16, object_id), &tentatives, &unresolved); - } - - // Second pass, resolve symbols in static libraries. var next_sym: usize = 0; - loop: while (next_sym < unresolved.count()) { - const sym = self.undefs.items[unresolved.keys()[next_sym]]; + loop: while (next_sym < self.unresolved.count()) { + const sym = self.undefs.items[self.unresolved.keys()[next_sym]]; const sym_name = self.getString(sym.n_strx); for (self.archives.items) |archive| { @@ -2301,102 +2477,21 @@ fn resolveSymbols(self: *MachO) !void { const object_id = @intCast(u16, self.objects.items.len); const object = try self.objects.addOne(self.base.allocator); object.* = try archive.parseObject(self.base.allocator, self.base.options.target, offsets.items[0]); - try self.resolveSymbolsInObject(object_id, &tentatives, &unresolved); + try self.resolveSymbolsInObject(object_id); continue :loop; } next_sym += 1; } +} - // Convert any tentative definition into a regular symbol and allocate - // text blocks for each tentative defintion. - while (tentatives.popOrNull()) |entry| { - const sym = &self.globals.items[entry.key]; - const match: MatchingSection = blk: { - if (self.common_section_index == null) { - const data_seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; - self.common_section_index = @intCast(u16, data_seg.sections.items.len); - try data_seg.addSection(self.base.allocator, "__common", .{ - .flags = macho.S_ZEROFILL, - }); - } - break :blk .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.common_section_index.?, - }; - }; - _ = try self.section_ordinals.getOrPut(self.base.allocator, match); +fn resolveSymbolsInDylibs(self: *MachO) !void { + if (self.dylibs.items.len == 0) return; - const size = sym.n_value; - const code = try self.base.allocator.alloc(u8, size); - defer self.base.allocator.free(code); - mem.set(u8, code, 0); - const alignment = (sym.n_desc >> 8) & 0x0f; - - sym.n_value = 0; - sym.n_desc = 0; - sym.n_sect = @intCast(u8, self.section_ordinals.getIndex(match).? + 1); - var local_sym = sym.*; - local_sym.n_type = macho.N_SECT; - - const local_sym_index = @intCast(u32, self.locals.items.len); - try self.locals.append(self.base.allocator, local_sym); - - const resolv = self.symbol_resolver.getPtr(sym.n_strx) orelse unreachable; - resolv.local_sym_index = local_sym_index; - - const block = try self.base.allocator.create(TextBlock); - block.* = TextBlock.empty; - block.local_sym_index = local_sym_index; - block.size = size; - block.alignment = alignment; - try self.managed_blocks.append(self.base.allocator, block); - - try block.code.appendSlice(self.base.allocator, code); - - // Update target section's metadata - // TODO should we update segment's size here too? - // How does it tie with incremental space allocs? - const tseg = &self.load_commands.items[match.seg].Segment; - const tsect = &tseg.sections.items[match.sect]; - const new_alignment = math.max(tsect.@"align", block.alignment); - const new_alignment_pow_2 = try math.powi(u32, 2, new_alignment); - const new_size = mem.alignForwardGeneric(u64, tsect.size, new_alignment_pow_2) + block.size; - tsect.size = new_size; - tsect.@"align" = new_alignment; - - if (self.blocks.getPtr(match)) |last| { - last.*.next = block; - block.prev = last.*; - last.* = block; - } else { - try self.blocks.putNoClobber(self.base.allocator, match, block); - } - } - - // Third pass, resolve symbols in dynamic libraries. - { - // Put dyld_stub_binder as an undefined special symbol. - const n_strx = try self.makeString("dyld_stub_binder"); - const undef_sym_index = @intCast(u32, self.undefs.items.len); - try self.undefs.append(self.base.allocator, .{ - .n_strx = n_strx, - .n_type = macho.N_UNDF, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }); - try self.symbol_resolver.putNoClobber(self.base.allocator, n_strx, .{ - .where = .undef, - .where_index = undef_sym_index, - }); - _ = try unresolved.getOrPut(undef_sym_index); - } - - next_sym = 0; - loop: while (next_sym < unresolved.count()) { - const sym = self.undefs.items[unresolved.keys()[next_sym]]; + var next_sym: usize = 0; + loop: while (next_sym < self.unresolved.count()) { + const sym = self.undefs.items[self.unresolved.keys()[next_sym]]; const sym_name = self.getString(sym.n_strx); for (self.dylibs.items) |dylib, id| { @@ -2404,6 +2499,7 @@ fn resolveSymbols(self: *MachO) !void { const dylib_id = @intCast(u16, id); if (!self.referenced_dylibs.contains(dylib_id)) { + try self.addLoadDylibLC(dylib_id); try self.referenced_dylibs.putNoClobber(self.base.allocator, dylib_id, {}); } @@ -2413,557 +2509,298 @@ fn resolveSymbols(self: *MachO) !void { undef.n_type |= macho.N_EXT; undef.n_desc = @intCast(u16, ordinal + 1) * macho.N_SYMBOL_RESOLVER; - _ = unresolved.fetchSwapRemove(resolv.where_index); + if (self.unresolved.fetchSwapRemove(resolv.where_index)) |entry| outer_blk: { + switch (entry.value) { + .none => {}, + .got => return error.TODOGotHint, + .stub => { + if (self.stubs_map.contains(resolv.where_index)) break :outer_blk; + const stub_helper_atom = blk: { + const match = MatchingSection{ + .seg = self.text_segment_cmd_index.?, + .sect = self.stub_helper_section_index.?, + }; + const atom = try self.createStubHelperAtom(); + const atom_sym = &self.locals.items[atom.local_sym_index]; + const alignment = try math.powi(u32, 2, atom.alignment); + const vaddr = try self.allocateAtom(atom, atom.size, alignment, match); + atom_sym.n_value = vaddr; + atom_sym.n_sect = @intCast(u8, self.section_ordinals.getIndex(match).? + 1); + break :blk atom; + }; + const laptr_atom = blk: { + const match = MatchingSection{ + .seg = self.data_segment_cmd_index.?, + .sect = self.la_symbol_ptr_section_index.?, + }; + const atom = try self.createLazyPointerAtom( + stub_helper_atom.local_sym_index, + resolv.where_index, + ); + const atom_sym = &self.locals.items[atom.local_sym_index]; + const alignment = try math.powi(u32, 2, atom.alignment); + const vaddr = try self.allocateAtom(atom, atom.size, alignment, match); + atom_sym.n_value = vaddr; + atom_sym.n_sect = @intCast(u8, self.section_ordinals.getIndex(match).? + 1); + break :blk atom; + }; + const stub_atom = blk: { + const match = MatchingSection{ + .seg = self.text_segment_cmd_index.?, + .sect = self.stubs_section_index.?, + }; + const atom = try self.createStubAtom(laptr_atom.local_sym_index); + const atom_sym = &self.locals.items[atom.local_sym_index]; + const alignment = try math.powi(u32, 2, atom.alignment); + const vaddr = try self.allocateAtom(atom, atom.size, alignment, match); + atom_sym.n_value = vaddr; + atom_sym.n_sect = @intCast(u8, self.section_ordinals.getIndex(match).? + 1); + break :blk atom; + }; + try self.stubs_map.putNoClobber(self.base.allocator, resolv.where_index, stub_atom); + }, + } + } continue :loop; } next_sym += 1; } +} - // Fourth pass, handle synthetic symbols and flag any undefined references. - if (self.strtab_dir.getKeyAdapted(@as([]const u8, "___dso_handle"), StringIndexAdapter{ - .bytes = &self.strtab, - })) |n_strx| blk: { - const resolv = self.symbol_resolver.getPtr(n_strx) orelse break :blk; - if (resolv.where != .undef) break :blk; +fn resolveDyldStubBinder(self: *MachO) !void { + if (self.dyld_stub_binder_index != null) return; - const undef = &self.undefs.items[resolv.where_index]; - const match: MatchingSection = .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.text_section_index.?, - }; - const local_sym_index = @intCast(u32, self.locals.items.len); - var nlist = macho.nlist_64{ - .n_strx = undef.n_strx, - .n_type = macho.N_SECT, - .n_sect = @intCast(u8, self.section_ordinals.getIndex(match).? + 1), - .n_desc = 0, - .n_value = 0, - }; - try self.locals.append(self.base.allocator, nlist); - const global_sym_index = @intCast(u32, self.globals.items.len); - nlist.n_type |= macho.N_EXT; - nlist.n_desc = macho.N_WEAK_DEF; - try self.globals.append(self.base.allocator, nlist); + const n_strx = try self.makeString("dyld_stub_binder"); + const sym_index = @intCast(u32, self.undefs.items.len); + try self.undefs.append(self.base.allocator, .{ + .n_strx = n_strx, + .n_type = macho.N_UNDF, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }); + try self.symbol_resolver.putNoClobber(self.base.allocator, n_strx, .{ + .where = .undef, + .where_index = sym_index, + }); + const sym = &self.undefs.items[sym_index]; + const sym_name = self.getString(n_strx); - _ = unresolved.fetchSwapRemove(resolv.where_index); + for (self.dylibs.items) |dylib, id| { + if (!dylib.symbols.contains(sym_name)) continue; - undef.* = .{ - .n_strx = 0, - .n_type = macho.N_UNDF, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }; - resolv.* = .{ - .where = .global, - .where_index = global_sym_index, - .local_sym_index = local_sym_index, - }; - - // We create an empty atom for this symbol. - // TODO perhaps we should special-case special symbols? Create a separate - // linked list of atoms? - const block = try self.base.allocator.create(TextBlock); - block.* = TextBlock.empty; - block.local_sym_index = local_sym_index; - block.size = 0; - block.alignment = 0; - try self.managed_blocks.append(self.base.allocator, block); - - if (self.blocks.getPtr(match)) |last| { - last.*.next = block; - block.prev = last.*; - last.* = block; - } else { - try self.blocks.putNoClobber(self.base.allocator, match, block); + const dylib_id = @intCast(u16, id); + if (!self.referenced_dylibs.contains(dylib_id)) { + try self.addLoadDylibLC(dylib_id); + try self.referenced_dylibs.putNoClobber(self.base.allocator, dylib_id, {}); } + + const ordinal = self.referenced_dylibs.getIndex(dylib_id) orelse unreachable; + sym.n_type |= macho.N_EXT; + sym.n_desc = @intCast(u16, ordinal + 1) * macho.N_SYMBOL_RESOLVER; + self.dyld_stub_binder_index = sym_index; + + break; } - for (unresolved.keys()) |index| { - const sym = self.undefs.items[index]; - const sym_name = self.getString(sym.n_strx); - const resolv = self.symbol_resolver.get(sym.n_strx) orelse unreachable; - + if (self.dyld_stub_binder_index == null) { log.err("undefined reference to symbol '{s}'", .{sym_name}); - log.err(" first referenced in '{s}'", .{self.objects.items[resolv.file].name}); - } - - if (unresolved.count() > 0) return error.UndefinedSymbolReference; + } + + // Add dyld_stub_binder as the final GOT entry. + const got_entry = GotIndirectionKey{ + .where = .undef, + .where_index = self.dyld_stub_binder_index.?, + }; + const atom = try self.createGotAtom(got_entry); + try self.got_entries_map.putNoClobber(self.base.allocator, got_entry, atom); + const match = MatchingSection{ + .seg = self.data_const_segment_cmd_index.?, + .sect = self.got_section_index.?, + }; + const atom_sym = &self.locals.items[atom.local_sym_index]; + const vaddr = try self.allocateAtom(atom, @sizeOf(u64), 8, match); + atom_sym.n_value = vaddr; + atom_sym.n_sect = @intCast(u8, self.section_ordinals.getIndex(match).? + 1); + log.debug("allocated {s} atom at 0x{x}", .{ self.getString(sym.n_strx), vaddr }); } -fn parseTextBlocks(self: *MachO) !void { +fn parseObjectsIntoAtoms(self: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + + var parsed_atoms = Object.ParsedAtoms.init(self.base.allocator); + defer parsed_atoms.deinit(); + + var first_atoms = Object.ParsedAtoms.init(self.base.allocator); + defer first_atoms.deinit(); + + var section_metadata = std.AutoHashMap(MatchingSection, struct { + size: u64, + alignment: u32, + }).init(self.base.allocator); + defer section_metadata.deinit(); + for (self.objects.items) |*object, object_id| { - try object.parseTextBlocks(self.base.allocator, @intCast(u16, object_id), self); + if (object.analyzed) continue; + + var atoms_in_objects = try object.parseIntoAtoms(self.base.allocator, @intCast(u16, object_id), self); + defer atoms_in_objects.deinit(); + + var it = atoms_in_objects.iterator(); + while (it.next()) |entry| { + const match = entry.key_ptr.*; + const last_atom = entry.value_ptr.*; + var atom = last_atom; + + const metadata = try section_metadata.getOrPut(match); + if (!metadata.found_existing) { + metadata.value_ptr.* = .{ + .size = 0, + .alignment = 0, + }; + } + + while (true) { + const alignment = try math.powi(u32, 2, atom.alignment); + metadata.value_ptr.size += mem.alignForwardGeneric(u64, atom.size, alignment); + metadata.value_ptr.alignment = math.max(metadata.value_ptr.alignment, atom.alignment); + + const sym = self.locals.items[atom.local_sym_index]; + log.debug(" {s}: n_value=0x{x}, size=0x{x}, alignment=0x{x}", .{ + self.getString(sym.n_strx), + sym.n_value, + atom.size, + atom.alignment, + }); + + if (atom.prev) |prev| { + atom = prev; + } else break; + } + + if (parsed_atoms.getPtr(match)) |last| { + last.*.next = atom; + atom.prev = last.*; + last.* = atom; + } + _ = try parsed_atoms.put(match, last_atom); + + if (!first_atoms.contains(match)) { + try first_atoms.putNoClobber(match, atom); + } + } + + object.analyzed = true; + } + + var it = section_metadata.iterator(); + while (it.next()) |entry| { + const match = entry.key_ptr.*; + const metadata = entry.value_ptr.*; + const seg = &self.load_commands.items[match.seg].Segment; + const sect = &seg.sections.items[match.sect]; + log.debug("{s},{s} => size: 0x{x}, alignment: 0x{x}", .{ + commands.segmentName(sect.*), + commands.sectionName(sect.*), + metadata.size, + metadata.alignment, + }); + + const sect_size = if (self.atoms.get(match)) |last| blk: { + const last_atom_sym = self.locals.items[last.local_sym_index]; + break :blk last_atom_sym.n_value + last.size - sect.addr; + } else 0; + + sect.@"align" = math.max(sect.@"align", metadata.alignment); + const needed_size = @intCast(u32, metadata.size + sect_size); + try self.growSection(match, needed_size); + sect.size = needed_size; + + var base_vaddr = if (self.atoms.get(match)) |last| blk: { + const last_atom_sym = self.locals.items[last.local_sym_index]; + break :blk last_atom_sym.n_value + last.size; + } else sect.addr; + const n_sect = @intCast(u8, self.section_ordinals.getIndex(match).? + 1); + + var atom = first_atoms.get(match).?; + while (true) { + const alignment = try math.powi(u32, 2, atom.alignment); + base_vaddr = mem.alignForwardGeneric(u64, base_vaddr, alignment); + + const sym = &self.locals.items[atom.local_sym_index]; + sym.n_value = base_vaddr; + sym.n_sect = n_sect; + + log.debug(" {s}: start=0x{x}, end=0x{x}, size=0x{x}, alignment=0x{x}", .{ + self.getString(sym.n_strx), + base_vaddr, + base_vaddr + atom.size, + atom.size, + atom.alignment, + }); + + // Update each alias (if any) + for (atom.aliases.items) |index| { + const alias_sym = &self.locals.items[index]; + alias_sym.n_value = base_vaddr; + alias_sym.n_sect = n_sect; + } + + // Update each symbol contained within the atom + for (atom.contained.items) |sym_at_off| { + const contained_sym = &self.locals.items[sym_at_off.local_sym_index]; + contained_sym.n_value = base_vaddr + sym_at_off.offset; + contained_sym.n_sect = n_sect; + } + + base_vaddr += atom.size; + + if (atom.next) |next| { + atom = next; + } else break; + } + + if (self.atoms.getPtr(match)) |last| { + const first_atom = first_atoms.get(match).?; + last.*.next = first_atom; + first_atom.prev = last.*; + last.* = first_atom; + } + _ = try self.atoms.put(self.base.allocator, match, parsed_atoms.get(match).?); } } -fn populateMetadata(self: *MachO) !void { - if (self.pagezero_segment_cmd_index == null) { - self.pagezero_segment_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.base.allocator, .{ - .Segment = SegmentCommand.empty("__PAGEZERO", .{ - .vmsize = 0x100000000, // size always set to 4GB - }), - }); - } - - if (self.text_segment_cmd_index == null) { - self.text_segment_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.base.allocator, .{ - .Segment = SegmentCommand.empty("__TEXT", .{ - .vmaddr = 0x100000000, // always starts at 4GB - .maxprot = macho.VM_PROT_READ | macho.VM_PROT_EXECUTE, - .initprot = macho.VM_PROT_READ | macho.VM_PROT_EXECUTE, - }), - }); - } - - if (self.text_section_index == null) { - const text_seg = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; - self.text_section_index = @intCast(u16, text_seg.sections.items.len); - const alignment: u2 = switch (self.base.options.target.cpu.arch) { - .x86_64 => 0, - .aarch64 => 2, - else => unreachable, // unhandled architecture type - }; - try text_seg.addSection(self.base.allocator, "__text", .{ - .@"align" = alignment, - .flags = macho.S_REGULAR | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS, - }); - _ = try self.section_ordinals.getOrPut(self.base.allocator, .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.text_section_index.?, - }); - } - - if (self.stubs_section_index == null) { - const text_seg = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; - self.stubs_section_index = @intCast(u16, text_seg.sections.items.len); - const alignment: u2 = switch (self.base.options.target.cpu.arch) { - .x86_64 => 0, - .aarch64 => 2, - else => unreachable, // unhandled architecture type - }; - const stub_size: u4 = switch (self.base.options.target.cpu.arch) { - .x86_64 => 6, - .aarch64 => 3 * @sizeOf(u32), - else => unreachable, // unhandled architecture type - }; - try text_seg.addSection(self.base.allocator, "__stubs", .{ - .@"align" = alignment, - .flags = macho.S_SYMBOL_STUBS | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS, - .reserved2 = stub_size, - }); - _ = try self.section_ordinals.getOrPut(self.base.allocator, .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.stubs_section_index.?, - }); - } - - if (self.stub_helper_section_index == null) { - const text_seg = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; - self.stub_helper_section_index = @intCast(u16, text_seg.sections.items.len); - const alignment: u2 = switch (self.base.options.target.cpu.arch) { - .x86_64 => 0, - .aarch64 => 2, - else => unreachable, // unhandled architecture type - }; - const stub_helper_size: u6 = switch (self.base.options.target.cpu.arch) { - .x86_64 => 15, - .aarch64 => 6 * @sizeOf(u32), - else => unreachable, - }; - try text_seg.addSection(self.base.allocator, "__stub_helper", .{ - .size = stub_helper_size, - .@"align" = alignment, - .flags = macho.S_REGULAR | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS, - }); - _ = try self.section_ordinals.getOrPut(self.base.allocator, .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.stub_helper_section_index.?, - }); - } - - if (self.data_const_segment_cmd_index == null) { - self.data_const_segment_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.base.allocator, .{ - .Segment = SegmentCommand.empty("__DATA_CONST", .{ - .maxprot = macho.VM_PROT_READ | macho.VM_PROT_WRITE, - .initprot = macho.VM_PROT_READ | macho.VM_PROT_WRITE, - }), - }); - } - - if (self.got_section_index == null) { - const data_const_seg = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; - self.got_section_index = @intCast(u16, data_const_seg.sections.items.len); - try data_const_seg.addSection(self.base.allocator, "__got", .{ - .@"align" = 3, // 2^3 = @sizeOf(u64) - .flags = macho.S_NON_LAZY_SYMBOL_POINTERS, - }); - _ = try self.section_ordinals.getOrPut(self.base.allocator, .{ - .seg = self.data_const_segment_cmd_index.?, - .sect = self.got_section_index.?, - }); - } - - if (self.data_segment_cmd_index == null) { - self.data_segment_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.base.allocator, .{ - .Segment = SegmentCommand.empty("__DATA", .{ - .maxprot = macho.VM_PROT_READ | macho.VM_PROT_WRITE, - .initprot = macho.VM_PROT_READ | macho.VM_PROT_WRITE, - }), - }); - } - - if (self.la_symbol_ptr_section_index == null) { - const data_seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; - self.la_symbol_ptr_section_index = @intCast(u16, data_seg.sections.items.len); - try data_seg.addSection(self.base.allocator, "__la_symbol_ptr", .{ - .@"align" = 3, // 2^3 = @sizeOf(u64) - .flags = macho.S_LAZY_SYMBOL_POINTERS, - }); - _ = try self.section_ordinals.getOrPut(self.base.allocator, .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.la_symbol_ptr_section_index.?, - }); - } - - if (self.data_section_index == null) { - const data_seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; - self.data_section_index = @intCast(u16, data_seg.sections.items.len); - try data_seg.addSection(self.base.allocator, "__data", .{ - .@"align" = 3, // 2^3 = @sizeOf(u64) - }); - _ = try self.section_ordinals.getOrPut(self.base.allocator, .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.data_section_index.?, - }); - } - - if (self.linkedit_segment_cmd_index == null) { - self.linkedit_segment_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.base.allocator, .{ - .Segment = SegmentCommand.empty("__LINKEDIT", .{ - .maxprot = macho.VM_PROT_READ, - .initprot = macho.VM_PROT_READ, - }), - }); - } - - if (self.dyld_info_cmd_index == null) { - self.dyld_info_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.base.allocator, .{ - .DyldInfoOnly = .{ - .cmd = macho.LC_DYLD_INFO_ONLY, - .cmdsize = @sizeOf(macho.dyld_info_command), - .rebase_off = 0, - .rebase_size = 0, - .bind_off = 0, - .bind_size = 0, - .weak_bind_off = 0, - .weak_bind_size = 0, - .lazy_bind_off = 0, - .lazy_bind_size = 0, - .export_off = 0, - .export_size = 0, - }, - }); - } - - if (self.symtab_cmd_index == null) { - self.symtab_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.base.allocator, .{ - .Symtab = .{ - .cmd = macho.LC_SYMTAB, - .cmdsize = @sizeOf(macho.symtab_command), - .symoff = 0, - .nsyms = 0, - .stroff = 0, - .strsize = 0, - }, - }); - } - - if (self.dysymtab_cmd_index == null) { - self.dysymtab_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.base.allocator, .{ - .Dysymtab = .{ - .cmd = macho.LC_DYSYMTAB, - .cmdsize = @sizeOf(macho.dysymtab_command), - .ilocalsym = 0, - .nlocalsym = 0, - .iextdefsym = 0, - .nextdefsym = 0, - .iundefsym = 0, - .nundefsym = 0, - .tocoff = 0, - .ntoc = 0, - .modtaboff = 0, - .nmodtab = 0, - .extrefsymoff = 0, - .nextrefsyms = 0, - .indirectsymoff = 0, - .nindirectsyms = 0, - .extreloff = 0, - .nextrel = 0, - .locreloff = 0, - .nlocrel = 0, - }, - }); - } - - if (self.dylinker_cmd_index == null) { - self.dylinker_cmd_index = @intCast(u16, self.load_commands.items.len); - const cmdsize = @intCast(u32, mem.alignForwardGeneric( - u64, - @sizeOf(macho.dylinker_command) + mem.lenZ(DEFAULT_DYLD_PATH), - @sizeOf(u64), - )); - var dylinker_cmd = commands.emptyGenericCommandWithData(macho.dylinker_command{ - .cmd = macho.LC_LOAD_DYLINKER, - .cmdsize = cmdsize, - .name = @sizeOf(macho.dylinker_command), - }); - dylinker_cmd.data = try self.base.allocator.alloc(u8, cmdsize - dylinker_cmd.inner.name); - mem.set(u8, dylinker_cmd.data, 0); - mem.copy(u8, dylinker_cmd.data, mem.spanZ(DEFAULT_DYLD_PATH)); - try self.load_commands.append(self.base.allocator, .{ .Dylinker = dylinker_cmd }); - } - - if (self.main_cmd_index == null and self.base.options.output_mode == .Exe) { - self.main_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.base.allocator, .{ - .Main = .{ - .cmd = macho.LC_MAIN, - .cmdsize = @sizeOf(macho.entry_point_command), - .entryoff = 0x0, - .stacksize = 0, - }, - }); - } - - if (self.dylib_id_cmd_index == null and self.base.options.output_mode == .Lib) { - self.dylib_id_cmd_index = @intCast(u16, self.load_commands.items.len); - const install_name = try std.fmt.allocPrint(self.base.allocator, "@rpath/{s}", .{ - self.base.options.emit.?.sub_path, - }); - defer self.base.allocator.free(install_name); - var dylib_cmd = try commands.createLoadDylibCommand( - self.base.allocator, - install_name, - 2, - 0x10000, // TODO forward user-provided versions - 0x10000, - ); - errdefer dylib_cmd.deinit(self.base.allocator); - dylib_cmd.inner.cmd = macho.LC_ID_DYLIB; - try self.load_commands.append(self.base.allocator, .{ .Dylib = dylib_cmd }); - } - - if (self.source_version_cmd_index == null) { - self.source_version_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.base.allocator, .{ - .SourceVersion = .{ - .cmd = macho.LC_SOURCE_VERSION, - .cmdsize = @sizeOf(macho.source_version_command), - .version = 0x0, - }, - }); - } - - if (self.build_version_cmd_index == null) { - self.build_version_cmd_index = @intCast(u16, self.load_commands.items.len); - const cmdsize = @intCast(u32, mem.alignForwardGeneric( - u64, - @sizeOf(macho.build_version_command) + @sizeOf(macho.build_tool_version), - @sizeOf(u64), - )); - const ver = self.base.options.target.os.version_range.semver.min; - const version = ver.major << 16 | ver.minor << 8 | ver.patch; - const is_simulator_abi = self.base.options.target.abi == .simulator; - var cmd = commands.emptyGenericCommandWithData(macho.build_version_command{ - .cmd = macho.LC_BUILD_VERSION, - .cmdsize = cmdsize, - .platform = switch (self.base.options.target.os.tag) { - .macos => macho.PLATFORM_MACOS, - .ios => if (is_simulator_abi) macho.PLATFORM_IOSSIMULATOR else macho.PLATFORM_IOS, - .watchos => if (is_simulator_abi) macho.PLATFORM_WATCHOSSIMULATOR else macho.PLATFORM_WATCHOS, - .tvos => if (is_simulator_abi) macho.PLATFORM_TVOSSIMULATOR else macho.PLATFORM_TVOS, - else => unreachable, - }, - .minos = version, - .sdk = version, - .ntools = 1, - }); - const ld_ver = macho.build_tool_version{ - .tool = macho.TOOL_LD, - .version = 0x0, - }; - cmd.data = try self.base.allocator.alloc(u8, cmdsize - @sizeOf(macho.build_version_command)); - mem.set(u8, cmd.data, 0); - mem.copy(u8, cmd.data, mem.asBytes(&ld_ver)); - try self.load_commands.append(self.base.allocator, .{ .BuildVersion = cmd }); - } - - if (self.uuid_cmd_index == null) { - self.uuid_cmd_index = @intCast(u16, self.load_commands.items.len); - var uuid_cmd: macho.uuid_command = .{ - .cmd = macho.LC_UUID, - .cmdsize = @sizeOf(macho.uuid_command), - .uuid = undefined, - }; - std.crypto.random.bytes(&uuid_cmd.uuid); - try self.load_commands.append(self.base.allocator, .{ .Uuid = uuid_cmd }); - } -} - -fn addDataInCodeLC(self: *MachO) !void { - if (self.data_in_code_cmd_index == null) { - self.data_in_code_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.base.allocator, .{ - .LinkeditData = .{ - .cmd = macho.LC_DATA_IN_CODE, - .cmdsize = @sizeOf(macho.linkedit_data_command), - .dataoff = 0, - .datasize = 0, - }, - }); - } +fn addLoadDylibLC(self: *MachO, id: u16) !void { + const dylib = self.dylibs.items[id]; + const dylib_id = dylib.id orelse unreachable; + var dylib_cmd = try commands.createLoadDylibCommand( + self.base.allocator, + dylib_id.name, + dylib_id.timestamp, + dylib_id.current_version, + dylib_id.compatibility_version, + ); + errdefer dylib_cmd.deinit(self.base.allocator); + try self.load_commands.append(self.base.allocator, .{ .Dylib = dylib_cmd }); + self.load_commands_dirty = true; } fn addCodeSignatureLC(self: *MachO) !void { - if (self.code_signature_cmd_index == null and self.requires_adhoc_codesig) { - self.code_signature_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.base.allocator, .{ - .LinkeditData = .{ - .cmd = macho.LC_CODE_SIGNATURE, - .cmdsize = @sizeOf(macho.linkedit_data_command), - .dataoff = 0, - .datasize = 0, - }, - }); - } -} - -fn addRpathLCs(self: *MachO, rpaths: []const []const u8) !void { - for (rpaths) |rpath| { - const cmdsize = @intCast(u32, mem.alignForwardGeneric( - u64, - @sizeOf(macho.rpath_command) + rpath.len + 1, - @sizeOf(u64), - )); - var rpath_cmd = commands.emptyGenericCommandWithData(macho.rpath_command{ - .cmd = macho.LC_RPATH, - .cmdsize = cmdsize, - .path = @sizeOf(macho.rpath_command), - }); - rpath_cmd.data = try self.base.allocator.alloc(u8, cmdsize - rpath_cmd.inner.path); - mem.set(u8, rpath_cmd.data, 0); - mem.copy(u8, rpath_cmd.data, rpath); - try self.load_commands.append(self.base.allocator, .{ .Rpath = rpath_cmd }); - } -} - -fn addLoadDylibLCs(self: *MachO) !void { - for (self.referenced_dylibs.keys()) |id| { - const dylib = self.dylibs.items[id]; - const dylib_id = dylib.id orelse unreachable; - var dylib_cmd = try commands.createLoadDylibCommand( - self.base.allocator, - dylib_id.name, - dylib_id.timestamp, - dylib_id.current_version, - dylib_id.compatibility_version, - ); - errdefer dylib_cmd.deinit(self.base.allocator); - try self.load_commands.append(self.base.allocator, .{ .Dylib = dylib_cmd }); - } -} - -fn flushZld(self: *MachO) !void { + if (self.code_signature_cmd_index != null or !self.requires_adhoc_codesig) return; + self.code_signature_cmd_index = @intCast(u16, self.load_commands.items.len); + try self.load_commands.append(self.base.allocator, .{ + .LinkeditData = .{ + .cmd = macho.LC_CODE_SIGNATURE, + .cmdsize = @sizeOf(macho.linkedit_data_command), + .dataoff = 0, + .datasize = 0, + }, + }); self.load_commands_dirty = true; - try self.writeTextBlocks(); - try self.writeStubHelperCommon(); - - if (self.common_section_index) |index| { - const seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; - const sect = &seg.sections.items[index]; - sect.offset = 0; - } - - if (self.bss_section_index) |index| { - const seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; - const sect = &seg.sections.items[index]; - sect.offset = 0; - } - - if (self.tlv_bss_section_index) |index| { - const seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; - const sect = &seg.sections.items[index]; - sect.offset = 0; - } - - try self.writeGotEntries(); - try self.setEntryPoint(); - try self.writeRebaseInfoTableZld(); - try self.writeBindInfoTableZld(); - try self.writeLazyBindInfoTableZld(); - try self.writeExportInfoZld(); - try self.writeDices(); - - { - const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; - const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab; - symtab.symoff = @intCast(u32, seg.inner.fileoff + seg.inner.filesize); - } - - try self.writeSymbolTable(); - try self.writeStringTableZld(); - - { - // Seal __LINKEDIT size - const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; - seg.inner.vmsize = mem.alignForwardGeneric(u64, seg.inner.filesize, self.page_size); - } - - if (self.requires_adhoc_codesig) { - try self.writeCodeSignaturePadding(); - } - - try self.writeLoadCommands(); - try self.writeHeader(); - - if (self.requires_adhoc_codesig) { - try self.writeCodeSignature(); - } -} - -fn writeGotEntries(self: *MachO) !void { - const seg = self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; - const sect = seg.sections.items[self.got_section_index.?]; - - var buffer = try self.base.allocator.alloc(u8, self.got_entries.items.len * @sizeOf(u64)); - defer self.base.allocator.free(buffer); - - var stream = std.io.fixedBufferStream(buffer); - var writer = stream.writer(); - - for (self.got_entries.items) |key| { - const address: u64 = switch (key.where) { - .local => self.locals.items[key.where_index].n_value, - .undef => 0, - }; - try writer.writeIntLittle(u64, address); - } - - log.debug("writing GOT pointers at 0x{x} to 0x{x}", .{ sect.offset, sect.offset + buffer.len }); - - try self.base.file.?.pwriteAll(buffer, sect.offset); } fn setEntryPoint(self: *MachO) !void { @@ -2984,388 +2821,8 @@ fn setEntryPoint(self: *MachO) !void { const ec = &self.load_commands.items[self.main_cmd_index.?].Main; ec.entryoff = @intCast(u32, sym.n_value - seg.inner.vmaddr); ec.stacksize = self.base.options.stack_size_override orelse 0; -} - -fn writeRebaseInfoTableZld(self: *MachO) !void { - var pointers = std.ArrayList(bind.Pointer).init(self.base.allocator); - defer pointers.deinit(); - - { - var it = self.blocks.iterator(); - while (it.next()) |entry| { - const match = entry.key_ptr.*; - var block: *TextBlock = entry.value_ptr.*; - - if (match.seg == self.text_segment_cmd_index.?) continue; // __TEXT is non-writable - - const seg = self.load_commands.items[match.seg].Segment; - - while (true) { - const sym = self.locals.items[block.local_sym_index]; - const base_offset = sym.n_value - seg.inner.vmaddr; - - for (block.rebases.items) |offset| { - try pointers.append(.{ - .offset = base_offset + offset, - .segment_id = match.seg, - }); - } - - if (block.prev) |prev| { - block = prev; - } else break; - } - } - } - - if (self.got_section_index) |idx| { - const seg = self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; - const sect = seg.sections.items[idx]; - const base_offset = sect.addr - seg.inner.vmaddr; - const segment_id = @intCast(u16, self.data_const_segment_cmd_index.?); - - for (self.got_entries.items) |entry, i| { - if (entry.where == .undef) continue; - - try pointers.append(.{ - .offset = base_offset + i * @sizeOf(u64), - .segment_id = segment_id, - }); - } - } - - if (self.la_symbol_ptr_section_index) |idx| { - const seg = self.load_commands.items[self.data_segment_cmd_index.?].Segment; - const sect = seg.sections.items[idx]; - const base_offset = sect.addr - seg.inner.vmaddr; - const segment_id = @intCast(u16, self.data_segment_cmd_index.?); - - try pointers.ensureUnusedCapacity(self.stubs.items.len); - for (self.stubs.items) |_, i| { - pointers.appendAssumeCapacity(.{ - .offset = base_offset + i * @sizeOf(u64), - .segment_id = segment_id, - }); - } - } - - std.sort.sort(bind.Pointer, pointers.items, {}, bind.pointerCmp); - - const size = try bind.rebaseInfoSize(pointers.items); - var buffer = try self.base.allocator.alloc(u8, @intCast(usize, size)); - defer self.base.allocator.free(buffer); - - var stream = std.io.fixedBufferStream(buffer); - try bind.writeRebaseInfo(pointers.items, stream.writer()); - - const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; - const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly; - dyld_info.rebase_off = @intCast(u32, seg.inner.fileoff); - dyld_info.rebase_size = @intCast(u32, mem.alignForwardGeneric(u64, buffer.len, @sizeOf(u64))); - seg.inner.filesize += dyld_info.rebase_size; - - log.debug("writing rebase info from 0x{x} to 0x{x}", .{ dyld_info.rebase_off, dyld_info.rebase_off + dyld_info.rebase_size }); - - try self.base.file.?.pwriteAll(buffer, dyld_info.rebase_off); -} - -fn writeBindInfoTableZld(self: *MachO) !void { - var pointers = std.ArrayList(bind.Pointer).init(self.base.allocator); - defer pointers.deinit(); - - if (self.got_section_index) |idx| { - const seg = self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; - const sect = seg.sections.items[idx]; - const base_offset = sect.addr - seg.inner.vmaddr; - const segment_id = @intCast(u16, self.data_const_segment_cmd_index.?); - - for (self.got_entries.items) |entry, i| { - if (entry.where == .local) continue; - - const sym = self.undefs.items[entry.where_index]; - try pointers.append(.{ - .offset = base_offset + i * @sizeOf(u64), - .segment_id = segment_id, - .dylib_ordinal = @divExact(sym.n_desc, macho.N_SYMBOL_RESOLVER), - .name = self.getString(sym.n_strx), - }); - } - } - - { - var it = self.blocks.iterator(); - while (it.next()) |entry| { - const match = entry.key_ptr.*; - var block: *TextBlock = entry.value_ptr.*; - - if (match.seg == self.text_segment_cmd_index.?) continue; // __TEXT is non-writable - - const seg = self.load_commands.items[match.seg].Segment; - - while (true) { - const sym = self.locals.items[block.local_sym_index]; - const base_offset = sym.n_value - seg.inner.vmaddr; - - for (block.bindings.items) |binding| { - const bind_sym = self.undefs.items[binding.local_sym_index]; - try pointers.append(.{ - .offset = binding.offset + base_offset, - .segment_id = match.seg, - .dylib_ordinal = @divExact(bind_sym.n_desc, macho.N_SYMBOL_RESOLVER), - .name = self.getString(bind_sym.n_strx), - }); - } - - if (block.prev) |prev| { - block = prev; - } else break; - } - } - } - - const size = try bind.bindInfoSize(pointers.items); - var buffer = try self.base.allocator.alloc(u8, @intCast(usize, size)); - defer self.base.allocator.free(buffer); - - var stream = std.io.fixedBufferStream(buffer); - try bind.writeBindInfo(pointers.items, stream.writer()); - - const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; - const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly; - dyld_info.bind_off = @intCast(u32, seg.inner.fileoff + seg.inner.filesize); - dyld_info.bind_size = @intCast(u32, mem.alignForwardGeneric(u64, buffer.len, @alignOf(u64))); - seg.inner.filesize += dyld_info.bind_size; - - log.debug("writing binding info from 0x{x} to 0x{x}", .{ dyld_info.bind_off, dyld_info.bind_off + dyld_info.bind_size }); - - try self.base.file.?.pwriteAll(buffer, dyld_info.bind_off); -} - -fn writeLazyBindInfoTableZld(self: *MachO) !void { - var pointers = std.ArrayList(bind.Pointer).init(self.base.allocator); - defer pointers.deinit(); - - if (self.la_symbol_ptr_section_index) |idx| { - const seg = self.load_commands.items[self.data_segment_cmd_index.?].Segment; - const sect = seg.sections.items[idx]; - const base_offset = sect.addr - seg.inner.vmaddr; - const segment_id = @intCast(u16, self.data_segment_cmd_index.?); - - try pointers.ensureUnusedCapacity(self.stubs.items.len); - - for (self.stubs.items) |import_id, i| { - const sym = self.undefs.items[import_id]; - pointers.appendAssumeCapacity(.{ - .offset = base_offset + i * @sizeOf(u64), - .segment_id = segment_id, - .dylib_ordinal = @divExact(sym.n_desc, macho.N_SYMBOL_RESOLVER), - .name = self.getString(sym.n_strx), - }); - } - } - - const size = try bind.lazyBindInfoSize(pointers.items); - var buffer = try self.base.allocator.alloc(u8, @intCast(usize, size)); - defer self.base.allocator.free(buffer); - - var stream = std.io.fixedBufferStream(buffer); - try bind.writeLazyBindInfo(pointers.items, stream.writer()); - - const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; - const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly; - dyld_info.lazy_bind_off = @intCast(u32, seg.inner.fileoff + seg.inner.filesize); - dyld_info.lazy_bind_size = @intCast(u32, mem.alignForwardGeneric(u64, buffer.len, @alignOf(u64))); - seg.inner.filesize += dyld_info.lazy_bind_size; - - log.debug("writing lazy binding info from 0x{x} to 0x{x}", .{ dyld_info.lazy_bind_off, dyld_info.lazy_bind_off + dyld_info.lazy_bind_size }); - - try self.base.file.?.pwriteAll(buffer, dyld_info.lazy_bind_off); - try self.populateLazyBindOffsetsInStubHelper(buffer); -} - -fn writeExportInfoZld(self: *MachO) !void { - var trie: Trie = .{}; - defer trie.deinit(self.base.allocator); - - const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment; - const base_address = text_segment.inner.vmaddr; - - // TODO handle macho.EXPORT_SYMBOL_FLAGS_REEXPORT and macho.EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER. - log.debug("writing export trie", .{}); - - for (self.globals.items) |sym| { - const sym_name = self.getString(sym.n_strx); - log.debug(" | putting '{s}' defined at 0x{x}", .{ sym_name, sym.n_value }); - - try trie.put(self.base.allocator, .{ - .name = sym_name, - .vmaddr_offset = sym.n_value - base_address, - .export_flags = macho.EXPORT_SYMBOL_FLAGS_KIND_REGULAR, - }); - } - - try trie.finalize(self.base.allocator); - - var buffer = try self.base.allocator.alloc(u8, @intCast(usize, trie.size)); - defer self.base.allocator.free(buffer); - - var stream = std.io.fixedBufferStream(buffer); - const nwritten = try trie.write(stream.writer()); - assert(nwritten == trie.size); - - const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; - const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly; - dyld_info.export_off = @intCast(u32, seg.inner.fileoff + seg.inner.filesize); - dyld_info.export_size = @intCast(u32, mem.alignForwardGeneric(u64, buffer.len, @alignOf(u64))); - seg.inner.filesize += dyld_info.export_size; - - log.debug("writing export info from 0x{x} to 0x{x}", .{ dyld_info.export_off, dyld_info.export_off + dyld_info.export_size }); - - try self.base.file.?.pwriteAll(buffer, dyld_info.export_off); -} - -fn writeSymbolTable(self: *MachO) !void { - const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; - const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab; - - var locals = std.ArrayList(macho.nlist_64).init(self.base.allocator); - defer locals.deinit(); - try locals.appendSlice(self.locals.items); - - if (self.has_stabs) { - for (self.objects.items) |object| { - if (object.debug_info == null) continue; - - // Open scope - try locals.ensureUnusedCapacity(3); - locals.appendAssumeCapacity(.{ - .n_strx = try self.makeString(object.tu_comp_dir.?), - .n_type = macho.N_SO, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }); - locals.appendAssumeCapacity(.{ - .n_strx = try self.makeString(object.tu_name.?), - .n_type = macho.N_SO, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }); - locals.appendAssumeCapacity(.{ - .n_strx = try self.makeString(object.name), - .n_type = macho.N_OSO, - .n_sect = 0, - .n_desc = 1, - .n_value = object.mtime orelse 0, - }); - - for (object.text_blocks.items) |block| { - if (block.stab) |stab| { - const nlists = try stab.asNlists(block.local_sym_index, self); - defer self.base.allocator.free(nlists); - try locals.appendSlice(nlists); - } else { - for (block.contained.items) |sym_at_off| { - const stab = sym_at_off.stab orelse continue; - const nlists = try stab.asNlists(sym_at_off.local_sym_index, self); - defer self.base.allocator.free(nlists); - try locals.appendSlice(nlists); - } - } - } - - // Close scope - try locals.append(.{ - .n_strx = 0, - .n_type = macho.N_SO, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }); - } - } - - const nlocals = locals.items.len; - const nexports = self.globals.items.len; - const nundefs = self.undefs.items.len; - - const locals_off = symtab.symoff + symtab.nsyms * @sizeOf(macho.nlist_64); - const locals_size = nlocals * @sizeOf(macho.nlist_64); - log.debug("writing local symbols from 0x{x} to 0x{x}", .{ locals_off, locals_size + locals_off }); - try self.base.file.?.pwriteAll(mem.sliceAsBytes(locals.items), locals_off); - - const exports_off = locals_off + locals_size; - const exports_size = nexports * @sizeOf(macho.nlist_64); - log.debug("writing exported symbols from 0x{x} to 0x{x}", .{ exports_off, exports_size + exports_off }); - try self.base.file.?.pwriteAll(mem.sliceAsBytes(self.globals.items), exports_off); - - const undefs_off = exports_off + exports_size; - const undefs_size = nundefs * @sizeOf(macho.nlist_64); - log.debug("writing undefined symbols from 0x{x} to 0x{x}", .{ undefs_off, undefs_size + undefs_off }); - try self.base.file.?.pwriteAll(mem.sliceAsBytes(self.undefs.items), undefs_off); - - symtab.nsyms += @intCast(u32, nlocals + nexports + nundefs); - seg.inner.filesize += locals_size + exports_size + undefs_size; - - // Update dynamic symbol table. - const dysymtab = &self.load_commands.items[self.dysymtab_cmd_index.?].Dysymtab; - dysymtab.nlocalsym += @intCast(u32, nlocals); - dysymtab.iextdefsym = dysymtab.nlocalsym; - dysymtab.nextdefsym = @intCast(u32, nexports); - dysymtab.iundefsym = dysymtab.nlocalsym + dysymtab.nextdefsym; - dysymtab.nundefsym = @intCast(u32, nundefs); - - const text_segment = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; - const stubs = &text_segment.sections.items[self.stubs_section_index.?]; - const data_const_segment = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; - const got = &data_const_segment.sections.items[self.got_section_index.?]; - const data_segment = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; - const la_symbol_ptr = &data_segment.sections.items[self.la_symbol_ptr_section_index.?]; - - const nstubs = @intCast(u32, self.stubs.items.len); - const ngot_entries = @intCast(u32, self.got_entries.items.len); - - dysymtab.indirectsymoff = @intCast(u32, seg.inner.fileoff + seg.inner.filesize); - dysymtab.nindirectsyms = nstubs * 2 + ngot_entries; - - const needed_size = dysymtab.nindirectsyms * @sizeOf(u32); - seg.inner.filesize += needed_size; - - log.debug("writing indirect symbol table from 0x{x} to 0x{x}", .{ - dysymtab.indirectsymoff, - dysymtab.indirectsymoff + needed_size, - }); - - var buf = try self.base.allocator.alloc(u8, needed_size); - defer self.base.allocator.free(buf); - - var stream = std.io.fixedBufferStream(buf); - var writer = stream.writer(); - - stubs.reserved1 = 0; - for (self.stubs.items) |id| { - try writer.writeIntLittle(u32, dysymtab.iundefsym + id); - } - - got.reserved1 = nstubs; - for (self.got_entries.items) |entry| { - switch (entry.where) { - .undef => { - try writer.writeIntLittle(u32, dysymtab.iundefsym + entry.where_index); - }, - .local => { - try writer.writeIntLittle(u32, macho.INDIRECT_SYMBOL_LOCAL); - }, - } - } - - la_symbol_ptr.reserved1 = got.reserved1 + ngot_entries; - for (self.stubs.items) |id| { - try writer.writeIntLittle(u32, dysymtab.iundefsym + id); - } - - try self.base.file.?.pwriteAll(buf, dysymtab.indirectsymoff); + self.entry_addr = sym.n_value; + self.load_commands_dirty = true; } pub fn deinit(self: *MachO) void { @@ -3378,11 +2835,7 @@ pub fn deinit(self: *MachO) void { } self.section_ordinals.deinit(self.base.allocator); - self.pending_updates.deinit(self.base.allocator); - self.got_entries.deinit(self.base.allocator); self.got_entries_map.deinit(self.base.allocator); - self.got_entries_free_list.deinit(self.base.allocator); - self.stubs.deinit(self.base.allocator); self.stubs_map.deinit(self.base.allocator); self.strtab_dir.deinit(self.base.allocator); self.strtab.deinit(self.base.allocator); @@ -3392,6 +2845,8 @@ pub fn deinit(self: *MachO) void { self.locals.deinit(self.base.allocator); self.locals_free_list.deinit(self.base.allocator); self.symbol_resolver.deinit(self.base.allocator); + self.unresolved.deinit(self.base.allocator); + self.tentatives.deinit(self.base.allocator); for (self.objects.items) |*object| { object.deinit(self.base.allocator); @@ -3415,14 +2870,19 @@ pub fn deinit(self: *MachO) void { } self.load_commands.deinit(self.base.allocator); - for (self.managed_blocks.items) |block| { - block.deinit(self.base.allocator); - self.base.allocator.destroy(block); + for (self.managed_atoms.items) |atom| { + atom.deinit(self.base.allocator); + self.base.allocator.destroy(atom); + } + self.managed_atoms.deinit(self.base.allocator); + self.atoms.deinit(self.base.allocator); + { + var it = self.atom_free_lists.valueIterator(); + while (it.next()) |free_list| { + free_list.deinit(self.base.allocator); + } + self.atom_free_lists.deinit(self.base.allocator); } - self.managed_blocks.deinit(self.base.allocator); - self.blocks.deinit(self.base.allocator); - self.text_block_free_list.deinit(self.base.allocator); - for (self.decls.keys()) |decl| { decl.link.macho.deinit(self.base.allocator); } @@ -3441,20 +2901,21 @@ pub fn closeFiles(self: MachO) void { } } -fn freeTextBlock(self: *MachO, text_block: *TextBlock) void { - log.debug("freeTextBlock {*}", .{text_block}); - text_block.deinit(self.base.allocator); +fn freeAtom(self: *MachO, atom: *Atom, match: MatchingSection) void { + log.debug("freeAtom {*}", .{atom}); + atom.deinit(self.base.allocator); + const free_list = self.atom_free_lists.getPtr(match).?; var already_have_free_list_node = false; { var i: usize = 0; - // TODO turn text_block_free_list into a hash map - while (i < self.text_block_free_list.items.len) { - if (self.text_block_free_list.items[i] == text_block) { - _ = self.text_block_free_list.swapRemove(i); + // TODO turn free_list into a hash map + while (i < free_list.items.len) { + if (free_list.items[i] == atom) { + _ = free_list.swapRemove(i); continue; } - if (self.text_block_free_list.items[i] == text_block.prev) { + if (free_list.items[i] == atom.prev) { already_have_free_list_node = true; } i += 1; @@ -3462,75 +2923,81 @@ fn freeTextBlock(self: *MachO, text_block: *TextBlock) void { } // TODO process free list for dbg info just like we do above for vaddrs - if (self.last_text_block == text_block) { - // TODO shrink the __text section size here - self.last_text_block = text_block.prev; - } - if (self.d_sym) |*ds| { - if (ds.dbg_info_decl_first == text_block) { - ds.dbg_info_decl_first = text_block.dbg_info_next; - } - if (ds.dbg_info_decl_last == text_block) { - // TODO shrink the .debug_info section size here - ds.dbg_info_decl_last = text_block.dbg_info_prev; + if (self.atoms.getPtr(match)) |last_atom| { + if (last_atom.* == atom) { + if (atom.prev) |prev| { + // TODO shrink the section size here + last_atom.* = prev; + } else { + _ = self.atoms.fetchRemove(match); + } } } - if (text_block.prev) |prev| { - prev.next = text_block.next; + if (self.d_sym) |*ds| { + if (ds.dbg_info_decl_first == atom) { + ds.dbg_info_decl_first = atom.dbg_info_next; + } + if (ds.dbg_info_decl_last == atom) { + // TODO shrink the .debug_info section size here + ds.dbg_info_decl_last = atom.dbg_info_prev; + } + } + + if (atom.prev) |prev| { + prev.next = atom.next; if (!already_have_free_list_node and prev.freeListEligible(self.*)) { // The free list is heuristics, it doesn't have to be perfect, so we can ignore // the OOM here. - self.text_block_free_list.append(self.base.allocator, prev) catch {}; + free_list.append(self.base.allocator, prev) catch {}; } } else { - text_block.prev = null; + atom.prev = null; } - if (text_block.next) |next| { - next.prev = text_block.prev; + if (atom.next) |next| { + next.prev = atom.prev; } else { - text_block.next = null; + atom.next = null; } - if (text_block.dbg_info_prev) |prev| { - prev.dbg_info_next = text_block.dbg_info_next; + if (atom.dbg_info_prev) |prev| { + prev.dbg_info_next = atom.dbg_info_next; - // TODO the free list logic like we do for text blocks above + // TODO the free list logic like we do for atoms above } else { - text_block.dbg_info_prev = null; + atom.dbg_info_prev = null; } - if (text_block.dbg_info_next) |next| { - next.dbg_info_prev = text_block.dbg_info_prev; + if (atom.dbg_info_next) |next| { + next.dbg_info_prev = atom.dbg_info_prev; } else { - text_block.dbg_info_next = null; + atom.dbg_info_next = null; } } -fn shrinkTextBlock(self: *MachO, text_block: *TextBlock, new_block_size: u64) void { +fn shrinkAtom(self: *MachO, atom: *Atom, new_block_size: u64, match: MatchingSection) void { _ = self; - _ = text_block; + _ = atom; _ = new_block_size; + _ = match; // TODO check the new capacity, and if it crosses the size threshold into a big enough // capacity, insert a free list node for it. } -fn growTextBlock(self: *MachO, text_block: *TextBlock, new_block_size: u64, alignment: u64) !u64 { - const sym = self.locals.items[text_block.local_sym_index]; +fn growAtom(self: *MachO, atom: *Atom, new_atom_size: u64, alignment: u64, match: MatchingSection) !u64 { + const sym = self.locals.items[atom.local_sym_index]; const align_ok = mem.alignBackwardGeneric(u64, sym.n_value, alignment) == sym.n_value; - const need_realloc = !align_ok or new_block_size > text_block.capacity(self.*); + const need_realloc = !align_ok or new_atom_size > atom.capacity(self.*); if (!need_realloc) return sym.n_value; - return self.allocateTextBlock(text_block, new_block_size, alignment); + return self.allocateAtom(atom, new_atom_size, alignment, match); } pub fn allocateDeclIndexes(self: *MachO, decl: *Module.Decl) !void { if (decl.link.macho.local_sym_index != 0) return; try self.locals.ensureUnusedCapacity(self.base.allocator, 1); - try self.got_entries.ensureUnusedCapacity(self.base.allocator, 1); - try self.decls.putNoClobber(self.base.allocator, decl, {}); if (self.locals_free_list.popOrNull()) |i| { @@ -3542,20 +3009,6 @@ pub fn allocateDeclIndexes(self: *MachO, decl: *Module.Decl) !void { _ = self.locals.addOneAssumeCapacity(); } - const got_index: u32 = blk: { - if (self.got_entries_free_list.popOrNull()) |i| { - log.debug("reusing GOT entry index {d} for {s}", .{ i, decl.name }); - break :blk i; - } else { - const got_index = @intCast(u32, self.got_entries.items.len); - log.debug("allocating GOT entry index {d} for {s}", .{ got_index, decl.name }); - _ = self.got_entries.addOneAssumeCapacity(); - self.got_entries_count_dirty = true; - self.rebase_info_dirty = true; - break :blk got_index; - } - }; - self.locals.items[decl.link.macho.local_sym_index] = .{ .n_strx = 0, .n_type = 0, @@ -3563,12 +3016,14 @@ pub fn allocateDeclIndexes(self: *MachO, decl: *Module.Decl) !void { .n_desc = 0, .n_value = 0, }; - const got_entry = GotIndirectionKey{ + + // TODO try popping from free list first before allocating a new GOT atom. + const key = GotIndirectionKey{ .where = .local, .where_index = decl.link.macho.local_sym_index, }; - self.got_entries.items[got_index] = got_entry; - try self.got_entries_map.putNoClobber(self.base.allocator, got_entry, got_index); + const got_atom = try self.createGotAtom(key); + try self.got_entries_map.put(self.base.allocator, key, got_atom); } pub fn updateFunc(self: *MachO, module: *Module, func: *Module.Fn, air: Air, liveness: Liveness) !void { @@ -3582,6 +3037,12 @@ pub fn updateFunc(self: *MachO, module: *Module, func: *Module.Fn, air: Air, liv defer tracy.end(); const decl = func.owner_decl; + // TODO clearing the code and relocs buffer should probably be orchestrated + // in a different, smarter, more automatic way somewhere else, in a more centralised + // way than this. + // If we don't clear the buffers here, we are up for some nasty surprises when + // this atom is reused later on and was not freed by freeAtom(). + decl.link.macho.clearRetainingCapacity(); var code_buffer = std.ArrayList(u8).init(self.base.allocator); defer code_buffer.deinit(); @@ -3617,12 +3078,6 @@ pub fn updateFunc(self: *MachO, module: *Module, func: *Module.Fn, air: Air, liv try codegen.generateFunction(&self.base, decl.srcLoc(), func, air, liveness, &code_buffer, .none); switch (res) { .appended => { - // TODO clearing the code and relocs buffer should probably be orchestrated - // in a different, smarter, more automatic way somewhere else, in a more centralised - // way than this. - // If we don't clear the buffers here, we are up for some nasty surprises when - // this TextBlock is reused later on and was not freed by freeTextBlock(). - decl.link.macho.code.clearAndFree(self.base.allocator); try decl.link.macho.code.appendSlice(self.base.allocator, code_buffer.items); }, .fail => |em| { @@ -3632,9 +3087,7 @@ pub fn updateFunc(self: *MachO, module: *Module, func: *Module.Fn, air: Air, liv }, } - const symbol = try self.placeDecl(decl, decl.link.macho.code.items.len); - - try self.writeCode(symbol, decl.link.macho.code.items); + _ = try self.placeDecl(decl, decl.link.macho.code.items.len); if (debug_buffers) |db| { try self.d_sym.?.commitDeclDebugInfo( @@ -3713,7 +3166,7 @@ pub fn updateDecl(self: *MachO, module: *Module, decl: *Module.Decl) !void { // in a different, smarter, more automatic way somewhere else, in a more centralised // way than this. // If we don't clear the buffers here, we are up for some nasty surprises when - // this TextBlock is reused later on and was not freed by freeTextBlock(). + // this atom is reused later on and was not freed by freeAtom(). decl.link.macho.code.clearAndFree(self.base.allocator); try decl.link.macho.code.appendSlice(self.base.allocator, code_buffer.items); break :blk decl.link.macho.code.items; @@ -3725,9 +3178,7 @@ pub fn updateDecl(self: *MachO, module: *Module, decl: *Module.Decl) !void { }, } }; - const symbol = try self.placeDecl(decl, code.len); - - try self.writeCode(symbol, code); + _ = try self.placeDecl(decl, code.len); // Since we updated the vaddr and the size, each corresponding export symbol also // needs to be updated. @@ -3744,24 +3195,40 @@ fn placeDecl(self: *MachO, decl: *Module.Decl, code_len: usize) !*macho.nlist_64 const capacity = decl.link.macho.capacity(self.*); const need_realloc = code_len > capacity or !mem.isAlignedGeneric(u64, symbol.n_value, required_alignment); if (need_realloc) { - const vaddr = try self.growTextBlock(&decl.link.macho, code_len, required_alignment); + const vaddr = try self.growAtom(&decl.link.macho, code_len, required_alignment, .{ + .seg = self.text_segment_cmd_index.?, + .sect = self.text_section_index.?, + }); log.debug("growing {s} and moving from 0x{x} to 0x{x}", .{ decl.name, symbol.n_value, vaddr }); if (vaddr != symbol.n_value) { log.debug(" (writing new GOT entry)", .{}); - const got_index = self.got_entries_map.get(.{ + const got_atom = self.got_entries_map.get(.{ .where = .local, .where_index = decl.link.macho.local_sym_index, }) orelse unreachable; - try self.writeGotEntry(got_index); + const got_sym = &self.locals.items[got_atom.local_sym_index]; + const got_vaddr = try self.allocateAtom(got_atom, @sizeOf(u64), 8, .{ + .seg = self.data_const_segment_cmd_index.?, + .sect = self.got_section_index.?, + }); + got_sym.n_value = got_vaddr; + got_sym.n_sect = @intCast(u8, self.section_ordinals.getIndex(.{ + .seg = self.data_const_segment_cmd_index.?, + .sect = self.got_section_index.?, + }).? + 1); } symbol.n_value = vaddr; } else if (code_len < decl.link.macho.size) { - self.shrinkTextBlock(&decl.link.macho, code_len); + self.shrinkAtom(&decl.link.macho, code_len, .{ + .seg = self.text_segment_cmd_index.?, + .sect = self.text_section_index.?, + }); } decl.link.macho.size = code_len; + decl.link.macho.dirty = true; const new_name = try std.fmt.allocPrint(self.base.allocator, "_{s}", .{mem.spanZ(decl.name)}); defer self.base.allocator.free(new_name); @@ -3770,20 +3237,22 @@ fn placeDecl(self: *MachO, decl: *Module.Decl, code_len: usize) !*macho.nlist_64 symbol.n_type = macho.N_SECT; symbol.n_sect = @intCast(u8, self.text_section_index.?) + 1; symbol.n_desc = 0; - - try self.writeLocalSymbol(decl.link.macho.local_sym_index); - if (self.d_sym) |*ds| - try ds.writeLocalSymbol(decl.link.macho.local_sym_index); } else { const decl_name = try std.fmt.allocPrint(self.base.allocator, "_{s}", .{mem.spanZ(decl.name)}); defer self.base.allocator.free(decl_name); const name_str_index = try self.makeString(decl_name); - const addr = try self.allocateTextBlock(&decl.link.macho, code_len, required_alignment); + const addr = try self.allocateAtom(&decl.link.macho, code_len, required_alignment, .{ + .seg = self.text_segment_cmd_index.?, + .sect = self.text_section_index.?, + }); - log.debug("allocated text block for {s} at 0x{x}", .{ decl_name, addr }); + log.debug("allocated atom for {s} at 0x{x}", .{ decl_name, addr }); - errdefer self.freeTextBlock(&decl.link.macho); + errdefer self.freeAtom(&decl.link.macho, .{ + .seg = self.text_segment_cmd_index.?, + .sect = self.text_section_index.?, + }); symbol.* = .{ .n_strx = name_str_index, @@ -3792,49 +3261,25 @@ fn placeDecl(self: *MachO, decl: *Module.Decl, code_len: usize) !*macho.nlist_64 .n_desc = 0, .n_value = addr, }; - const got_index = self.got_entries_map.get(.{ + const got_atom = self.got_entries_map.get(.{ .where = .local, .where_index = decl.link.macho.local_sym_index, }) orelse unreachable; - try self.writeGotEntry(got_index); - - try self.writeLocalSymbol(decl.link.macho.local_sym_index); - if (self.d_sym) |*ds| - try ds.writeLocalSymbol(decl.link.macho.local_sym_index); - } - - // Resolve relocations - try decl.link.macho.resolveRelocs(self); - // TODO this requires further investigation: should we dispose of resolved relocs, or keep them - // so that we can reapply them when moving/growing sections? - decl.link.macho.relocs.clearAndFree(self.base.allocator); - - // Apply pending updates - while (self.pending_updates.popOrNull()) |update| { - switch (update.kind) { - .got => unreachable, - .stub => { - try self.writeStub(update.index); - try self.writeStubInStubHelper(update.index); - try self.writeLazySymbolPointer(update.index); - self.rebase_info_dirty = true; - self.lazy_binding_info_dirty = true; - }, - } + const got_sym = &self.locals.items[got_atom.local_sym_index]; + const vaddr = try self.allocateAtom(got_atom, @sizeOf(u64), 8, .{ + .seg = self.data_const_segment_cmd_index.?, + .sect = self.got_section_index.?, + }); + got_sym.n_value = vaddr; + got_sym.n_sect = @intCast(u8, self.section_ordinals.getIndex(.{ + .seg = self.data_const_segment_cmd_index.?, + .sect = self.got_section_index.?, + }).? + 1); } return symbol; } -fn writeCode(self: *MachO, symbol: *macho.nlist_64, code: []const u8) !void { - const text_segment = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; - const text_section = text_segment.sections.items[self.text_section_index.?]; - const section_offset = symbol.n_value - text_section.addr; - const file_offset = text_section.offset + section_offset; - log.debug("writing code for symbol {s} at file offset 0x{x}", .{ self.getString(symbol.n_strx), file_offset }); - try self.base.file.?.pwriteAll(code, file_offset); -} - pub fn updateDeclLineNumber(self: *MachO, module: *Module, decl: *const Module.Decl) !void { if (self.d_sym) |*ds| { try ds.updateDeclLineNumber(module, decl); @@ -3847,6 +3292,8 @@ pub fn updateDeclExports( decl: *Module.Decl, exports: []const *Module.Export, ) !void { + // TODO If we are exporting with global linkage, check for already defined globals and flag + // symbol duplicate/collision! if (build_options.skip_non_native and builtin.object_format != .macho) { @panic("Attempted to compile for object format that was disabled by build configuration"); } @@ -3886,15 +3333,7 @@ pub fn updateDeclExports( n_type |= macho.N_PEXT; n_desc |= macho.N_WEAK_DEF; }, - .Strong => { - // Check if the export is _main, and note if os. - // Otherwise, don't do anything since we already have all the flags - // set that we need for global (strong) linkage. - // n_type == N_SECT | N_EXT - if (mem.eql(u8, exp_name, "_main")) { - self.entry_addr = decl_sym.n_value; - } - }, + .Strong => {}, .Weak => { // Weak linkage is specified as part of n_desc field. // Symbol's n_type is like for a symbol with strong linkage. @@ -3910,68 +3349,60 @@ pub fn updateDeclExports( }, } - if (exp.link.macho.sym_index) |i| { - const sym = &self.globals.items[i]; - sym.* = .{ - .n_strx = sym.n_strx, - .n_type = n_type, - .n_sect = @intCast(u8, self.text_section_index.?) + 1, - .n_desc = n_desc, - .n_value = decl_sym.n_value, - }; - } else { - const name_str_index = try self.makeString(exp_name); - const i = if (self.globals_free_list.popOrNull()) |i| i else blk: { + const global_sym_index = if (exp.link.macho.sym_index) |i| i else blk: { + const i = if (self.globals_free_list.popOrNull()) |i| i else inner: { _ = self.globals.addOneAssumeCapacity(); - self.export_info_dirty = true; - break :blk @intCast(u32, self.globals.items.len - 1); - }; - self.globals.items[i] = .{ - .n_strx = name_str_index, - .n_type = n_type, - .n_sect = @intCast(u8, self.text_section_index.?) + 1, - .n_desc = n_desc, - .n_value = decl_sym.n_value, - }; - const resolv = try self.symbol_resolver.getOrPut(self.base.allocator, name_str_index); - resolv.value_ptr.* = .{ - .where = .global, - .where_index = i, - .local_sym_index = decl.link.macho.local_sym_index, + break :inner @intCast(u32, self.globals.items.len - 1); }; + break :blk i; + }; - exp.link.macho.sym_index = @intCast(u32, i); - } + const n_strx = try self.makeString(exp_name); + const sym = &self.globals.items[global_sym_index]; + sym.* = .{ + .n_strx = try self.makeString(exp_name), + .n_type = n_type, + .n_sect = @intCast(u8, self.text_section_index.?) + 1, + .n_desc = n_desc, + .n_value = decl_sym.n_value, + }; + exp.link.macho.sym_index = global_sym_index; + + const resolv = try self.symbol_resolver.getOrPut(self.base.allocator, n_strx); + resolv.value_ptr.* = .{ + .where = .global, + .where_index = global_sym_index, + .local_sym_index = decl.link.macho.local_sym_index, + }; } } pub fn deleteExport(self: *MachO, exp: Export) void { const sym_index = exp.sym_index orelse return; self.globals_free_list.append(self.base.allocator, sym_index) catch {}; - self.globals.items[sym_index].n_type = 0; + const global = &self.globals.items[sym_index]; + global.n_type = 0; + assert(self.symbol_resolver.remove(global.n_strx)); } pub fn freeDecl(self: *MachO, decl: *Module.Decl) void { log.debug("freeDecl {*}", .{decl}); _ = self.decls.swapRemove(decl); // Appending to free lists is allowed to fail because the free lists are heuristics based anyway. - self.freeTextBlock(&decl.link.macho); + self.freeAtom(&decl.link.macho, .{ + .seg = self.text_segment_cmd_index.?, + .sect = self.text_section_index.?, + }); if (decl.link.macho.local_sym_index != 0) { self.locals_free_list.append(self.base.allocator, decl.link.macho.local_sym_index) catch {}; - const got_key = GotIndirectionKey{ - .where = .local, - .where_index = decl.link.macho.local_sym_index, - }; - const got_index = self.got_entries_map.get(got_key) orelse unreachable; - _ = self.got_entries_map.remove(got_key); - self.got_entries_free_list.append(self.base.allocator, got_index) catch {}; + // TODO free GOT atom here. self.locals.items[decl.link.macho.local_sym_index].n_type = 0; decl.link.macho.local_sym_index = 0; } if (self.d_sym) |*ds| { - // TODO make this logic match freeTextBlock. Maybe abstract the logic + // TODO make this logic match freeAtom. Maybe abstract the logic // out since the same thing is desired for both. _ = ds.dbg_line_fn_free_list.remove(&decl.fn_link.macho); if (decl.fn_link.macho.prev) |prev| { @@ -4001,72 +3432,62 @@ pub fn getDeclVAddr(self: *MachO, decl: *const Module.Decl) u64 { } pub fn populateMissingMetadata(self: *MachO) !void { - switch (self.base.options.output_mode) { - .Exe => {}, - .Obj => return error.TODOImplementWritingObjFiles, - .Lib => return error.TODOImplementWritingLibFiles, - } - if (self.pagezero_segment_cmd_index == null) { self.pagezero_segment_cmd_index = @intCast(u16, self.load_commands.items.len); try self.load_commands.append(self.base.allocator, .{ - .Segment = SegmentCommand.empty("__PAGEZERO", .{ - .vmsize = 0x100000000, // size always set to 4GB - }), + .Segment = .{ + .inner = .{ + .segname = makeStaticString("__PAGEZERO"), + .vmsize = pagezero_vmsize, + }, + }, }); self.load_commands_dirty = true; } + if (self.text_segment_cmd_index == null) { self.text_segment_cmd_index = @intCast(u16, self.load_commands.items.len); - const maxprot = macho.VM_PROT_READ | macho.VM_PROT_WRITE | macho.VM_PROT_EXECUTE; - const initprot = macho.VM_PROT_READ | macho.VM_PROT_EXECUTE; - const program_code_size_hint = self.base.options.program_code_size_hint; const got_size_hint = @sizeOf(u64) * self.base.options.symbol_count_hint; - const ideal_size = self.header_pad + program_code_size_hint + 3 * got_size_hint; + const ideal_size = self.header_pad + program_code_size_hint + got_size_hint; const needed_size = mem.alignForwardGeneric(u64, padToIdeal(ideal_size), self.page_size); log.debug("found __TEXT segment free space 0x{x} to 0x{x}", .{ 0, needed_size }); try self.load_commands.append(self.base.allocator, .{ - .Segment = SegmentCommand.empty("__TEXT", .{ - .vmaddr = 0x100000000, // always starts at 4GB - .vmsize = needed_size, - .filesize = needed_size, - .maxprot = maxprot, - .initprot = initprot, - }), + .Segment = .{ + .inner = .{ + .segname = makeStaticString("__TEXT"), + .vmaddr = pagezero_vmsize, + .vmsize = needed_size, + .filesize = needed_size, + .maxprot = macho.VM_PROT_READ | macho.VM_PROT_EXECUTE, + .initprot = macho.VM_PROT_READ | macho.VM_PROT_EXECUTE, + }, + }, }); self.load_commands_dirty = true; } - if (self.text_section_index == null) { - const text_segment = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; - self.text_section_index = @intCast(u16, text_segment.sections.items.len); + if (self.text_section_index == null) { const alignment: u2 = switch (self.base.options.target.cpu.arch) { .x86_64 => 0, .aarch64 => 2, else => unreachable, // unhandled architecture type }; - const flags = macho.S_REGULAR | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS; const needed_size = self.base.options.program_code_size_hint; - const off = text_segment.findFreeSpace(needed_size, @as(u16, 1) << alignment, self.header_pad); - - log.debug("found __text section free space 0x{x} to 0x{x}", .{ off, off + needed_size }); - - try text_segment.addSection(self.base.allocator, "__text", .{ - .addr = text_segment.inner.vmaddr + off, - .size = @intCast(u32, needed_size), - .offset = @intCast(u32, off), - .@"align" = alignment, - .flags = flags, - }); - self.load_commands_dirty = true; + self.text_section_index = try self.allocateSection( + self.text_segment_cmd_index.?, + "__text", + needed_size, + alignment, + .{ + .flags = macho.S_REGULAR | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS, + }, + ); } - if (self.stubs_section_index == null) { - const text_segment = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; - self.stubs_section_index = @intCast(u16, text_segment.sections.items.len); + if (self.stubs_section_index == null) { const alignment: u2 = switch (self.base.options.target.cpu.arch) { .x86_64 => 0, .aarch64 => 2, @@ -4077,174 +3498,222 @@ pub fn populateMissingMetadata(self: *MachO) !void { .aarch64 => 3 * @sizeOf(u32), else => unreachable, // unhandled architecture type }; - const flags = macho.S_SYMBOL_STUBS | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS; - const needed_size = @sizeOf(u64) * self.base.options.symbol_count_hint; - const off = text_segment.findFreeSpace(needed_size, @alignOf(u64), self.header_pad); - assert(off + needed_size <= text_segment.inner.fileoff + text_segment.inner.filesize); // TODO Must expand __TEXT segment. - - log.debug("found __stubs section free space 0x{x} to 0x{x}", .{ off, off + needed_size }); - - try text_segment.addSection(self.base.allocator, "__stubs", .{ - .addr = text_segment.inner.vmaddr + off, - .size = needed_size, - .offset = @intCast(u32, off), - .@"align" = alignment, - .flags = flags, - .reserved2 = stub_size, - }); - self.load_commands_dirty = true; + const needed_size = stub_size * self.base.options.symbol_count_hint; + self.stubs_section_index = try self.allocateSection( + self.text_segment_cmd_index.?, + "__stubs", + needed_size, + alignment, + .{ + .flags = macho.S_SYMBOL_STUBS | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS, + .reserved2 = stub_size, + }, + ); } - if (self.stub_helper_section_index == null) { - const text_segment = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; - self.stub_helper_section_index = @intCast(u16, text_segment.sections.items.len); + if (self.stub_helper_section_index == null) { const alignment: u2 = switch (self.base.options.target.cpu.arch) { .x86_64 => 0, .aarch64 => 2, else => unreachable, // unhandled architecture type }; - const flags = macho.S_REGULAR | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS; - const needed_size = @sizeOf(u64) * self.base.options.symbol_count_hint; - const off = text_segment.findFreeSpace(needed_size, @alignOf(u64), self.header_pad); - assert(off + needed_size <= text_segment.inner.fileoff + text_segment.inner.filesize); // TODO Must expand __TEXT segment. - - log.debug("found __stub_helper section free space 0x{x} to 0x{x}", .{ off, off + needed_size }); - - try text_segment.addSection(self.base.allocator, "__stub_helper", .{ - .addr = text_segment.inner.vmaddr + off, - .size = needed_size, - .offset = @intCast(u32, off), - .@"align" = alignment, - .flags = flags, - }); - self.load_commands_dirty = true; + const preamble_size: u6 = switch (self.base.options.target.cpu.arch) { + .x86_64 => 15, + .aarch64 => 6 * @sizeOf(u32), + else => unreachable, + }; + const stub_size: u4 = switch (self.base.options.target.cpu.arch) { + .x86_64 => 10, + .aarch64 => 3 * @sizeOf(u32), + else => unreachable, + }; + const needed_size = stub_size * self.base.options.symbol_count_hint + preamble_size; + self.stub_helper_section_index = try self.allocateSection( + self.text_segment_cmd_index.?, + "__stub_helper", + needed_size, + alignment, + .{ + .flags = macho.S_REGULAR | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS, + }, + ); } + if (self.data_const_segment_cmd_index == null) { self.data_const_segment_cmd_index = @intCast(u16, self.load_commands.items.len); - const maxprot = macho.VM_PROT_READ | macho.VM_PROT_WRITE | macho.VM_PROT_EXECUTE; - const initprot = macho.VM_PROT_READ | macho.VM_PROT_WRITE; const address_and_offset = self.nextSegmentAddressAndOffset(); - const ideal_size = @sizeOf(u64) * self.base.options.symbol_count_hint; const needed_size = mem.alignForwardGeneric(u64, padToIdeal(ideal_size), self.page_size); - log.debug("found __DATA_CONST segment free space 0x{x} to 0x{x}", .{ address_and_offset.offset, address_and_offset.offset + needed_size }); + log.debug("found __DATA_CONST segment free space 0x{x} to 0x{x}", .{ + address_and_offset.offset, + address_and_offset.offset + needed_size, + }); try self.load_commands.append(self.base.allocator, .{ - .Segment = SegmentCommand.empty("__DATA_CONST", .{ - .vmaddr = address_and_offset.address, - .vmsize = needed_size, - .fileoff = address_and_offset.offset, - .filesize = needed_size, - .maxprot = maxprot, - .initprot = initprot, - }), + .Segment = .{ + .inner = .{ + .segname = makeStaticString("__DATA_CONST"), + .vmaddr = address_and_offset.address, + .vmsize = needed_size, + .fileoff = address_and_offset.offset, + .filesize = needed_size, + .maxprot = macho.VM_PROT_READ | macho.VM_PROT_WRITE, + .initprot = macho.VM_PROT_READ | macho.VM_PROT_WRITE, + }, + }, }); self.load_commands_dirty = true; } + if (self.got_section_index == null) { - const dc_segment = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; - self.got_section_index = @intCast(u16, dc_segment.sections.items.len); - - const flags = macho.S_NON_LAZY_SYMBOL_POINTERS; const needed_size = @sizeOf(u64) * self.base.options.symbol_count_hint; - const off = dc_segment.findFreeSpace(needed_size, @alignOf(u64), null); - assert(off + needed_size <= dc_segment.inner.fileoff + dc_segment.inner.filesize); // TODO Must expand __DATA_CONST segment. - - log.debug("found __got section free space 0x{x} to 0x{x}", .{ off, off + needed_size }); - - try dc_segment.addSection(self.base.allocator, "__got", .{ - .addr = dc_segment.inner.vmaddr + off - dc_segment.inner.fileoff, - .size = needed_size, - .offset = @intCast(u32, off), - .@"align" = 3, // 2^3 = @sizeOf(u64) - .flags = flags, - }); - self.load_commands_dirty = true; + const alignment: u16 = 3; // 2^3 = @sizeOf(u64) + self.got_section_index = try self.allocateSection( + self.data_const_segment_cmd_index.?, + "__got", + needed_size, + alignment, + .{ + .flags = macho.S_NON_LAZY_SYMBOL_POINTERS, + }, + ); } + if (self.data_segment_cmd_index == null) { self.data_segment_cmd_index = @intCast(u16, self.load_commands.items.len); - const maxprot = macho.VM_PROT_READ | macho.VM_PROT_WRITE | macho.VM_PROT_EXECUTE; - const initprot = macho.VM_PROT_READ | macho.VM_PROT_WRITE; const address_and_offset = self.nextSegmentAddressAndOffset(); - const ideal_size = 2 * @sizeOf(u64) * self.base.options.symbol_count_hint; const needed_size = mem.alignForwardGeneric(u64, padToIdeal(ideal_size), self.page_size); log.debug("found __DATA segment free space 0x{x} to 0x{x}", .{ address_and_offset.offset, address_and_offset.offset + needed_size }); try self.load_commands.append(self.base.allocator, .{ - .Segment = SegmentCommand.empty("__DATA", .{ - .vmaddr = address_and_offset.address, - .vmsize = needed_size, - .fileoff = address_and_offset.offset, - .filesize = needed_size, - .maxprot = maxprot, - .initprot = initprot, - }), + .Segment = .{ + .inner = .{ + .segname = makeStaticString("__DATA"), + .vmaddr = address_and_offset.address, + .vmsize = needed_size, + .fileoff = address_and_offset.offset, + .filesize = needed_size, + .maxprot = macho.VM_PROT_READ | macho.VM_PROT_WRITE, + .initprot = macho.VM_PROT_READ | macho.VM_PROT_WRITE, + }, + }, }); self.load_commands_dirty = true; } + if (self.la_symbol_ptr_section_index == null) { - const data_segment = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; - self.la_symbol_ptr_section_index = @intCast(u16, data_segment.sections.items.len); - - const flags = macho.S_LAZY_SYMBOL_POINTERS; const needed_size = @sizeOf(u64) * self.base.options.symbol_count_hint; - const off = data_segment.findFreeSpace(needed_size, @alignOf(u64), null); - assert(off + needed_size <= data_segment.inner.fileoff + data_segment.inner.filesize); // TODO Must expand __DATA segment. - - log.debug("found __la_symbol_ptr section free space 0x{x} to 0x{x}", .{ off, off + needed_size }); - - try data_segment.addSection(self.base.allocator, "__la_symbol_ptr", .{ - .addr = data_segment.inner.vmaddr + off - data_segment.inner.fileoff, - .size = needed_size, - .offset = @intCast(u32, off), - .@"align" = 3, // 2^3 = @sizeOf(u64) - .flags = flags, - }); - self.load_commands_dirty = true; + const alignment: u16 = 3; // 2^3 = @sizeOf(u64) + self.la_symbol_ptr_section_index = try self.allocateSection( + self.data_segment_cmd_index.?, + "__la_symbol_ptr", + needed_size, + alignment, + .{ + .flags = macho.S_LAZY_SYMBOL_POINTERS, + }, + ); } + if (self.data_section_index == null) { - const data_segment = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; - self.data_section_index = @intCast(u16, data_segment.sections.items.len); - const needed_size = @sizeOf(u64) * self.base.options.symbol_count_hint; - const off = data_segment.findFreeSpace(needed_size, @alignOf(u64), null); - assert(off + needed_size <= data_segment.inner.fileoff + data_segment.inner.filesize); // TODO Must expand __DATA segment. - - log.debug("found __data section free space 0x{x} to 0x{x}", .{ off, off + needed_size }); - - try data_segment.addSection(self.base.allocator, "__data", .{ - .addr = data_segment.inner.vmaddr + off - data_segment.inner.fileoff, - .size = needed_size, - .offset = @intCast(u32, off), - .@"align" = 3, // 2^3 = @sizeOf(u64) - }); - self.load_commands_dirty = true; + const alignment: u16 = 3; // 2^3 = @sizeOf(u64) + self.data_section_index = try self.allocateSection( + self.data_segment_cmd_index.?, + "__data", + needed_size, + alignment, + .{}, + ); } + + if (self.tlv_section_index == null) { + const needed_size = @sizeOf(u64) * self.base.options.symbol_count_hint; + const alignment: u16 = 3; // 2^3 = @sizeOf(u64) + self.tlv_section_index = try self.allocateSection( + self.data_segment_cmd_index.?, + "__thread_vars", + needed_size, + alignment, + .{ + .flags = macho.S_THREAD_LOCAL_VARIABLES, + }, + ); + } + + if (self.tlv_data_section_index == null) { + const needed_size = @sizeOf(u64) * self.base.options.symbol_count_hint; + const alignment: u16 = 3; // 2^3 = @sizeOf(u64) + self.tlv_data_section_index = try self.allocateSection( + self.data_segment_cmd_index.?, + "__thread_data", + needed_size, + alignment, + .{ + .flags = macho.S_THREAD_LOCAL_REGULAR, + }, + ); + } + + if (self.tlv_bss_section_index == null) { + const needed_size = @sizeOf(u64) * self.base.options.symbol_count_hint; + const alignment: u16 = 3; // 2^3 = @sizeOf(u64) + self.tlv_bss_section_index = try self.allocateSection( + self.data_segment_cmd_index.?, + "__thread_bss", + needed_size, + alignment, + .{ + .flags = macho.S_THREAD_LOCAL_ZEROFILL, + }, + ); + const seg = self.load_commands.items[self.data_segment_cmd_index.?].Segment; + const sect = seg.sections.items[self.tlv_bss_section_index.?]; + self.tlv_bss_file_offset = sect.offset; + } + + if (self.bss_section_index == null) { + const needed_size = @sizeOf(u64) * self.base.options.symbol_count_hint; + const alignment: u16 = 3; // 2^3 = @sizeOf(u64) + self.bss_section_index = try self.allocateSection( + self.data_segment_cmd_index.?, + "__bss", + needed_size, + alignment, + .{ + .flags = macho.S_ZEROFILL, + }, + ); + const seg = self.load_commands.items[self.data_segment_cmd_index.?].Segment; + const sect = seg.sections.items[self.bss_section_index.?]; + self.bss_file_offset = sect.offset; + } + if (self.linkedit_segment_cmd_index == null) { self.linkedit_segment_cmd_index = @intCast(u16, self.load_commands.items.len); - - const maxprot = macho.VM_PROT_READ | macho.VM_PROT_WRITE | macho.VM_PROT_EXECUTE; - const initprot = macho.VM_PROT_READ; const address_and_offset = self.nextSegmentAddressAndOffset(); log.debug("found __LINKEDIT segment free space at 0x{x}", .{address_and_offset.offset}); try self.load_commands.append(self.base.allocator, .{ - .Segment = SegmentCommand.empty("__LINKEDIT", .{ - .vmaddr = address_and_offset.address, - .fileoff = address_and_offset.offset, - .maxprot = maxprot, - .initprot = initprot, - }), + .Segment = .{ + .inner = .{ + .segname = makeStaticString("__LINKEDIT"), + .vmaddr = address_and_offset.address, + .fileoff = address_and_offset.offset, + .maxprot = macho.VM_PROT_READ, + .initprot = macho.VM_PROT_READ, + }, + }, }); self.load_commands_dirty = true; } + if (self.dyld_info_cmd_index == null) { self.dyld_info_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.base.allocator, .{ .DyldInfoOnly = .{ .cmd = macho.LC_DYLD_INFO_ONLY, @@ -4261,36 +3730,11 @@ pub fn populateMissingMetadata(self: *MachO) !void { .export_size = 0, }, }); - - const dyld = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly; - - // Preallocate rebase, binding, lazy binding info, and export info. - const expected_size = 48; // TODO This is totally random. - const rebase_off = self.findFreeSpaceLinkedit(expected_size, 1, null); - log.debug("found rebase info free space 0x{x} to 0x{x}", .{ rebase_off, rebase_off + expected_size }); - dyld.rebase_off = @intCast(u32, rebase_off); - dyld.rebase_size = expected_size; - - const bind_off = self.findFreeSpaceLinkedit(expected_size, 1, null); - log.debug("found binding info free space 0x{x} to 0x{x}", .{ bind_off, bind_off + expected_size }); - dyld.bind_off = @intCast(u32, bind_off); - dyld.bind_size = expected_size; - - const lazy_bind_off = self.findFreeSpaceLinkedit(expected_size, 1, null); - log.debug("found lazy binding info free space 0x{x} to 0x{x}", .{ lazy_bind_off, lazy_bind_off + expected_size }); - dyld.lazy_bind_off = @intCast(u32, lazy_bind_off); - dyld.lazy_bind_size = expected_size; - - const export_off = self.findFreeSpaceLinkedit(expected_size, 1, null); - log.debug("found export info free space 0x{x} to 0x{x}", .{ export_off, export_off + expected_size }); - dyld.export_off = @intCast(u32, export_off); - dyld.export_size = expected_size; - self.load_commands_dirty = true; } + if (self.symtab_cmd_index == null) { self.symtab_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.base.allocator, .{ .Symtab = .{ .cmd = macho.LC_SYMTAB, @@ -4301,34 +3745,11 @@ pub fn populateMissingMetadata(self: *MachO) !void { .strsize = 0, }, }); - - const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab; - - const symtab_size = self.base.options.symbol_count_hint * @sizeOf(macho.nlist_64); - const symtab_off = self.findFreeSpaceLinkedit(symtab_size, @sizeOf(macho.nlist_64), null); - log.debug("found symbol table free space 0x{x} to 0x{x}", .{ symtab_off, symtab_off + symtab_size }); - symtab.symoff = @intCast(u32, symtab_off); - symtab.nsyms = @intCast(u32, self.base.options.symbol_count_hint); - - try self.strtab.append(self.base.allocator, 0); - const strtab_size = self.strtab.items.len; - const strtab_off = self.findFreeSpaceLinkedit(strtab_size, 1, symtab_off); - log.debug("found string table free space 0x{x} to 0x{x}", .{ strtab_off, strtab_off + strtab_size }); - symtab.stroff = @intCast(u32, strtab_off); - symtab.strsize = @intCast(u32, strtab_size); - self.load_commands_dirty = true; - self.strtab_dirty = true; } + if (self.dysymtab_cmd_index == null) { self.dysymtab_cmd_index = @intCast(u16, self.load_commands.items.len); - - // Preallocate space for indirect symbol table. - const indsymtab_size = self.base.options.symbol_count_hint * @sizeOf(u64); // Each entry is just a u64. - const indsymtab_off = self.findFreeSpaceLinkedit(indsymtab_size, @sizeOf(u64), null); - - log.debug("found indirect symbol table free space 0x{x} to 0x{x}", .{ indsymtab_off, indsymtab_off + indsymtab_size }); - try self.load_commands.append(self.base.allocator, .{ .Dysymtab = .{ .cmd = macho.LC_DYSYMTAB, @@ -4345,8 +3766,8 @@ pub fn populateMissingMetadata(self: *MachO) !void { .nmodtab = 0, .extrefsymoff = 0, .nextrefsyms = 0, - .indirectsymoff = @intCast(u32, indsymtab_off), - .nindirectsyms = @intCast(u32, self.base.options.symbol_count_hint), + .indirectsymoff = 0, + .nindirectsyms = 0, .extreloff = 0, .nextrel = 0, .locreloff = 0, @@ -4355,11 +3776,12 @@ pub fn populateMissingMetadata(self: *MachO) !void { }); self.load_commands_dirty = true; } + if (self.dylinker_cmd_index == null) { self.dylinker_cmd_index = @intCast(u16, self.load_commands.items.len); const cmdsize = @intCast(u32, mem.alignForwardGeneric( u64, - @sizeOf(macho.dylinker_command) + mem.lenZ(DEFAULT_DYLD_PATH), + @sizeOf(macho.dylinker_command) + mem.lenZ(default_dyld_path), @sizeOf(u64), )); var dylinker_cmd = commands.emptyGenericCommandWithData(macho.dylinker_command{ @@ -4369,21 +3791,12 @@ pub fn populateMissingMetadata(self: *MachO) !void { }); dylinker_cmd.data = try self.base.allocator.alloc(u8, cmdsize - dylinker_cmd.inner.name); mem.set(u8, dylinker_cmd.data, 0); - mem.copy(u8, dylinker_cmd.data, mem.spanZ(DEFAULT_DYLD_PATH)); + mem.copy(u8, dylinker_cmd.data, mem.spanZ(default_dyld_path)); try self.load_commands.append(self.base.allocator, .{ .Dylinker = dylinker_cmd }); self.load_commands_dirty = true; } - if (self.libsystem_cmd_index == null) { - self.libsystem_cmd_index = @intCast(u16, self.load_commands.items.len); - var dylib_cmd = try commands.createLoadDylibCommand(self.base.allocator, mem.spanZ(LIB_SYSTEM_PATH), 2, 0, 0); - errdefer dylib_cmd.deinit(self.base.allocator); - - try self.load_commands.append(self.base.allocator, .{ .Dylib = dylib_cmd }); - - self.load_commands_dirty = true; - } - if (self.main_cmd_index == null) { + if (self.main_cmd_index == null and self.base.options.output_mode == .Exe) { self.main_cmd_index = @intCast(u16, self.load_commands.items.len); try self.load_commands.append(self.base.allocator, .{ .Main = .{ @@ -4395,6 +3808,38 @@ pub fn populateMissingMetadata(self: *MachO) !void { }); self.load_commands_dirty = true; } + + if (self.dylib_id_cmd_index == null and self.base.options.output_mode == .Lib) { + self.dylib_id_cmd_index = @intCast(u16, self.load_commands.items.len); + const install_name = try std.fmt.allocPrint(self.base.allocator, "@rpath/{s}", .{ + self.base.options.emit.?.sub_path, + }); + defer self.base.allocator.free(install_name); + var dylib_cmd = try commands.createLoadDylibCommand( + self.base.allocator, + install_name, + 2, + 0x10000, // TODO forward user-provided versions + 0x10000, + ); + errdefer dylib_cmd.deinit(self.base.allocator); + dylib_cmd.inner.cmd = macho.LC_ID_DYLIB; + try self.load_commands.append(self.base.allocator, .{ .Dylib = dylib_cmd }); + self.load_commands_dirty = true; + } + + if (self.source_version_cmd_index == null) { + self.source_version_cmd_index = @intCast(u16, self.load_commands.items.len); + try self.load_commands.append(self.base.allocator, .{ + .SourceVersion = .{ + .cmd = macho.LC_SOURCE_VERSION, + .cmdsize = @sizeOf(macho.source_version_command), + .version = 0x0, + }, + }); + self.load_commands_dirty = true; + } + if (self.build_version_cmd_index == null) { self.build_version_cmd_index = @intCast(u16, self.load_commands.items.len); const cmdsize = @intCast(u32, mem.alignForwardGeneric( @@ -4427,18 +3872,9 @@ pub fn populateMissingMetadata(self: *MachO) !void { mem.set(u8, cmd.data, 0); mem.copy(u8, cmd.data, mem.asBytes(&ld_ver)); try self.load_commands.append(self.base.allocator, .{ .BuildVersion = cmd }); - } - if (self.source_version_cmd_index == null) { - self.source_version_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.base.allocator, .{ - .SourceVersion = .{ - .cmd = macho.LC_SOURCE_VERSION, - .cmdsize = @sizeOf(macho.source_version_command), - .version = 0x0, - }, - }); self.load_commands_dirty = true; } + if (self.uuid_cmd_index == null) { self.uuid_cmd_index = @intCast(u16, self.load_commands.items.len); var uuid_cmd: macho.uuid_command = .{ @@ -4450,11 +3886,12 @@ pub fn populateMissingMetadata(self: *MachO) !void { try self.load_commands.append(self.base.allocator, .{ .Uuid = uuid_cmd }); self.load_commands_dirty = true; } - if (self.code_signature_cmd_index == null and self.requires_adhoc_codesig) { - self.code_signature_cmd_index = @intCast(u16, self.load_commands.items.len); + + if (self.data_in_code_cmd_index == null) { + self.data_in_code_cmd_index = @intCast(u16, self.load_commands.items.len); try self.load_commands.append(self.base.allocator, .{ .LinkeditData = .{ - .cmd = macho.LC_CODE_SIGNATURE, + .cmd = macho.LC_DATA_IN_CODE, .cmdsize = @sizeOf(macho.linkedit_data_command), .dataoff = 0, .datasize = 0, @@ -4462,138 +3899,330 @@ pub fn populateMissingMetadata(self: *MachO) !void { }); self.load_commands_dirty = true; } - if (!self.strtab_dir.containsAdapted(@as([]const u8, "dyld_stub_binder"), StringIndexAdapter{ - .bytes = &self.strtab, - })) { - const import_sym_index = @intCast(u32, self.undefs.items.len); - const n_strx = try self.makeString("dyld_stub_binder"); - try self.undefs.append(self.base.allocator, .{ - .n_strx = n_strx, - .n_type = macho.N_UNDF | macho.N_EXT, - .n_sect = 0, - .n_desc = @intCast(u8, 1) * macho.N_SYMBOL_RESOLVER, - .n_value = 0, - }); - try self.symbol_resolver.putNoClobber(self.base.allocator, n_strx, .{ - .where = .undef, - .where_index = import_sym_index, - }); - const got_key = GotIndirectionKey{ - .where = .undef, - .where_index = import_sym_index, - }; - const got_index = @intCast(u32, self.got_entries.items.len); - try self.got_entries.append(self.base.allocator, got_key); - try self.got_entries_map.putNoClobber(self.base.allocator, got_key, got_index); - try self.writeGotEntry(got_index); - self.binding_info_dirty = true; + + self.cold_start = true; +} + +const AllocateSectionOpts = struct { + flags: u32 = macho.S_REGULAR, + reserved1: u32 = 0, + reserved2: u32 = 0, +}; + +fn allocateSection( + self: *MachO, + segment_id: u16, + sectname: []const u8, + size: u64, + alignment: u32, + opts: AllocateSectionOpts, +) !u16 { + const seg = &self.load_commands.items[segment_id].Segment; + var sect = macho.section_64{ + .sectname = makeStaticString(sectname), + .segname = seg.inner.segname, + .size = @intCast(u32, size), + .@"align" = alignment, + .flags = opts.flags, + .reserved1 = opts.reserved1, + .reserved2 = opts.reserved2, + }; + + const alignment_pow_2 = try math.powi(u32, 2, alignment); + const padding: ?u64 = if (segment_id == self.text_segment_cmd_index.?) self.header_pad else null; + const off = self.findFreeSpace(segment_id, alignment_pow_2, padding); + + log.debug("allocating {s},{s} section from 0x{x} to 0x{x}", .{ + commands.segmentName(sect), + commands.sectionName(sect), + off, + off + size, + }); + + sect.addr = seg.inner.vmaddr + off - seg.inner.fileoff; + sect.offset = @intCast(u32, off); + + const index = @intCast(u16, seg.sections.items.len); + try seg.sections.append(self.base.allocator, sect); + seg.inner.cmdsize += @sizeOf(macho.section_64); + seg.inner.nsects += 1; + + const match = MatchingSection{ + .seg = segment_id, + .sect = index, + }; + _ = try self.section_ordinals.getOrPut(self.base.allocator, match); + try self.atom_free_lists.putNoClobber(self.base.allocator, match, .{}); + + self.load_commands_dirty = true; + self.sections_order_dirty = true; + + return index; +} + +fn findFreeSpace(self: MachO, segment_id: u16, alignment: u64, start: ?u64) u64 { + const seg = self.load_commands.items[segment_id].Segment; + if (seg.sections.items.len == 0) { + return if (start) |v| v else seg.inner.fileoff; } - if (self.stub_helper_stubs_start_off == null) { - try self.writeStubHelperPreamble(); + const last_sect = seg.sections.items[seg.sections.items.len - 1]; + const final_off = last_sect.offset + padToIdeal(last_sect.size); + return mem.alignForwardGeneric(u64, final_off, alignment); +} + +fn growSection(self: *MachO, match: MatchingSection, new_size: u32) !void { + const tracy = trace(@src()); + defer tracy.end(); + + const seg = &self.load_commands.items[match.seg].Segment; + const sect = &seg.sections.items[match.sect]; + + const alignment = try math.powi(u32, 2, sect.@"align"); + const max_size = self.allocatedSize(match.seg, sect.offset); + const ideal_size = padToIdeal(new_size); + const needed_size = mem.alignForwardGeneric(u32, ideal_size, alignment); + + if (needed_size > max_size) blk: { + log.debug(" (need to grow!)", .{}); + // Need to move all sections below in file and address spaces. + const offset_amt = offset: { + const max_alignment = try self.getSectionMaxAlignment(match.seg, match.sect + 1); + break :offset mem.alignForwardGeneric(u64, needed_size - max_size, max_alignment); + }; + + // Before we commit to this, check if the segment needs to grow too. + // We assume that each section header is growing linearly with the increasing + // file offset / virtual memory address space. + const last_sect = seg.sections.items[seg.sections.items.len - 1]; + const last_sect_off = last_sect.offset + last_sect.size; + const seg_off = seg.inner.fileoff + seg.inner.filesize; + + if (last_sect_off + offset_amt > seg_off) { + // Need to grow segment first. + log.debug(" (need to grow segment first)", .{}); + const spill_size = (last_sect_off + offset_amt) - seg_off; + const seg_offset_amt = mem.alignForwardGeneric(u64, spill_size, self.page_size); + seg.inner.filesize += seg_offset_amt; + seg.inner.vmsize += seg_offset_amt; + + log.debug(" (new {s} segment file offsets from 0x{x} to 0x{x} (in memory 0x{x} to 0x{x}))", .{ + seg.inner.segname, + seg.inner.fileoff, + seg.inner.fileoff + seg.inner.filesize, + seg.inner.vmaddr, + seg.inner.vmaddr + seg.inner.vmsize, + }); + + // TODO We should probably nop the expanded by distance, or put 0s. + + // TODO copyRangeAll doesn't automatically extend the file on macOS. + const ledit_seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; + const new_filesize = seg_offset_amt + ledit_seg.inner.fileoff + ledit_seg.inner.filesize; + try self.base.file.?.pwriteAll(&[_]u8{0}, new_filesize - 1); + + var next: usize = match.seg + 1; + while (next < self.linkedit_segment_cmd_index.? + 1) : (next += 1) { + const next_seg = &self.load_commands.items[next].Segment; + _ = try self.base.file.?.copyRangeAll( + next_seg.inner.fileoff, + self.base.file.?, + next_seg.inner.fileoff + seg_offset_amt, + next_seg.inner.filesize, + ); + next_seg.inner.fileoff += seg_offset_amt; + next_seg.inner.vmaddr += seg_offset_amt; + + log.debug(" (new {s} segment file offsets from 0x{x} to 0x{x} (in memory 0x{x} to 0x{x}))", .{ + next_seg.inner.segname, + next_seg.inner.fileoff, + next_seg.inner.fileoff + next_seg.inner.filesize, + next_seg.inner.vmaddr, + next_seg.inner.vmaddr + next_seg.inner.vmsize, + }); + + for (next_seg.sections.items) |*moved_sect, moved_sect_id| { + moved_sect.offset += @intCast(u32, seg_offset_amt); + moved_sect.addr += seg_offset_amt; + + log.debug(" (new {s},{s} file offsets from 0x{x} to 0x{x} (in memory 0x{x} to 0x{x}))", .{ + commands.segmentName(moved_sect.*), + commands.sectionName(moved_sect.*), + moved_sect.offset, + moved_sect.offset + moved_sect.size, + moved_sect.addr, + moved_sect.addr + moved_sect.size, + }); + + try self.allocateLocalSymbols(.{ + .seg = @intCast(u16, next), + .sect = @intCast(u16, moved_sect_id), + }, @intCast(i64, seg_offset_amt)); + } + } + } + + if (match.sect + 1 >= seg.sections.items.len) break :blk; + + // We have enough space to expand within the segment, so move all sections by + // the required amount and update their header offsets. + const next_sect = seg.sections.items[match.sect + 1]; + const total_size = last_sect_off - next_sect.offset; + _ = try self.base.file.?.copyRangeAll( + next_sect.offset, + self.base.file.?, + next_sect.offset + offset_amt, + total_size, + ); + + var next = match.sect + 1; + while (next < seg.sections.items.len) : (next += 1) { + const moved_sect = &seg.sections.items[next]; + moved_sect.offset += @intCast(u32, offset_amt); + moved_sect.addr += offset_amt; + + log.debug(" (new {s},{s} file offsets from 0x{x} to 0x{x} (in memory 0x{x} to 0x{x}))", .{ + commands.segmentName(moved_sect.*), + commands.sectionName(moved_sect.*), + moved_sect.offset, + moved_sect.offset + moved_sect.size, + moved_sect.addr, + moved_sect.addr + moved_sect.size, + }); + + try self.allocateLocalSymbols(.{ + .seg = match.seg, + .sect = next, + }, @intCast(i64, offset_amt)); + } } } -fn allocateTextBlock(self: *MachO, text_block: *TextBlock, new_block_size: u64, alignment: u64) !u64 { - const text_segment = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; - const text_section = &text_segment.sections.items[self.text_section_index.?]; - const new_block_ideal_capacity = padToIdeal(new_block_size); +fn allocatedSize(self: MachO, segment_id: u16, start: u64) u64 { + const seg = self.load_commands.items[segment_id].Segment; + assert(start >= seg.inner.fileoff); + var min_pos: u64 = seg.inner.fileoff + seg.inner.filesize; + if (start > min_pos) return 0; + for (seg.sections.items) |section| { + if (section.offset <= start) continue; + if (section.offset < min_pos) min_pos = section.offset; + } + return min_pos - start; +} - // We use these to indicate our intention to update metadata, placing the new block, +fn getSectionMaxAlignment(self: *MachO, segment_id: u16, start_sect_id: u16) !u32 { + const seg = self.load_commands.items[segment_id].Segment; + var max_alignment: u32 = 1; + var next = start_sect_id; + while (next < seg.sections.items.len) : (next += 1) { + const sect = seg.sections.items[next]; + const alignment = try math.powi(u32, 2, sect.@"align"); + max_alignment = math.max(max_alignment, alignment); + } + return max_alignment; +} + +fn allocateAtom(self: *MachO, atom: *Atom, new_atom_size: u64, alignment: u64, match: MatchingSection) !u64 { + const tracy = trace(@src()); + defer tracy.end(); + + const seg = &self.load_commands.items[match.seg].Segment; + const sect = &seg.sections.items[match.sect]; + var free_list = self.atom_free_lists.get(match).?; + const needs_padding = match.seg == self.text_segment_cmd_index.? and match.sect == self.text_section_index.?; + const new_atom_ideal_capacity = if (needs_padding) padToIdeal(new_atom_size) else new_atom_size; + + // We use these to indicate our intention to update metadata, placing the new atom, // and possibly removing a free list node. // It would be simpler to do it inside the for loop below, but that would cause a // problem if an error was returned later in the function. So this action // is actually carried out at the end of the function, when errors are no longer possible. - var block_placement: ?*TextBlock = null; + var atom_placement: ?*Atom = null; var free_list_removal: ?usize = null; // First we look for an appropriately sized free list node. // The list is unordered. We'll just take the first thing that works. - const vaddr = blk: { + var vaddr = blk: { var i: usize = 0; - while (i < self.text_block_free_list.items.len) { - const big_block = self.text_block_free_list.items[i]; - // We now have a pointer to a live text block that has too much capacity. - // Is it enough that we could fit this new text block? - const sym = self.locals.items[big_block.local_sym_index]; - const capacity = big_block.capacity(self.*); - const ideal_capacity = padToIdeal(capacity); + while (i < free_list.items.len) { + const big_atom = free_list.items[i]; + // We now have a pointer to a live atom that has too much capacity. + // Is it enough that we could fit this new atom? + const sym = self.locals.items[big_atom.local_sym_index]; + const capacity = big_atom.capacity(self.*); + const ideal_capacity = if (needs_padding) padToIdeal(capacity) else capacity; const ideal_capacity_end_vaddr = sym.n_value + ideal_capacity; const capacity_end_vaddr = sym.n_value + capacity; - const new_start_vaddr_unaligned = capacity_end_vaddr - new_block_ideal_capacity; + const new_start_vaddr_unaligned = capacity_end_vaddr - new_atom_ideal_capacity; const new_start_vaddr = mem.alignBackwardGeneric(u64, new_start_vaddr_unaligned, alignment); if (new_start_vaddr < ideal_capacity_end_vaddr) { // Additional bookkeeping here to notice if this free list node - // should be deleted because the block that it points to has grown to take up + // should be deleted because the atom that it points to has grown to take up // more of the extra capacity. - if (!big_block.freeListEligible(self.*)) { - const bl = self.text_block_free_list.swapRemove(i); + if (!big_atom.freeListEligible(self.*)) { + const bl = free_list.swapRemove(i); bl.deinit(self.base.allocator); } else { i += 1; } continue; } - // At this point we know that we will place the new block here. But the + // At this point we know that we will place the new atom here. But the // remaining question is whether there is still yet enough capacity left // over for there to still be a free list node. const remaining_capacity = new_start_vaddr - ideal_capacity_end_vaddr; const keep_free_list_node = remaining_capacity >= min_text_capacity; // Set up the metadata to be updated, after errors are no longer possible. - block_placement = big_block; + atom_placement = big_atom; if (!keep_free_list_node) { free_list_removal = i; } break :blk new_start_vaddr; - } else if (self.last_text_block) |last| { + } else if (self.atoms.get(match)) |last| { const last_symbol = self.locals.items[last.local_sym_index]; - // TODO We should pad out the excess capacity with NOPs. For executables, - // no padding seems to be OK, but it will probably not be for objects. - const ideal_capacity = padToIdeal(last.size); + const ideal_capacity = if (needs_padding) padToIdeal(last.size) else last.size; const ideal_capacity_end_vaddr = last_symbol.n_value + ideal_capacity; const new_start_vaddr = mem.alignForwardGeneric(u64, ideal_capacity_end_vaddr, alignment); - block_placement = last; + atom_placement = last; break :blk new_start_vaddr; } else { - break :blk text_section.addr; + break :blk mem.alignForwardGeneric(u64, sect.addr, alignment); } }; - const expand_text_section = block_placement == null or block_placement.?.next == null; - if (expand_text_section) { - const needed_size = (vaddr + new_block_size) - text_section.addr; - assert(needed_size <= text_segment.inner.filesize); // TODO must move the entire text section. - - self.last_text_block = text_block; - text_section.size = needed_size; - self.load_commands_dirty = true; // TODO Make more granular. - - if (self.d_sym) |*ds| { - const debug_text_seg = &ds.load_commands.items[ds.text_segment_cmd_index.?].Segment; - const debug_text_sect = &debug_text_seg.sections.items[ds.text_section_index.?]; - debug_text_sect.size = needed_size; - ds.load_commands_dirty = true; - } + const expand_section = atom_placement == null or atom_placement.?.next == null; + if (expand_section) { + const needed_size = @intCast(u32, (vaddr + new_atom_size) - sect.addr); + try self.growSection(match, needed_size); + _ = try self.atoms.put(self.base.allocator, match, atom); + sect.size = needed_size; + self.load_commands_dirty = true; } - text_block.size = new_block_size; - - if (text_block.prev) |prev| { - prev.next = text_block.next; + const align_pow = @intCast(u32, math.log2(alignment)); + if (sect.@"align" < align_pow) { + sect.@"align" = align_pow; + self.load_commands_dirty = true; } - if (text_block.next) |next| { - next.prev = text_block.prev; + atom.size = new_atom_size; + atom.alignment = align_pow; + + if (atom.prev) |prev| { + prev.next = atom.next; + } + if (atom.next) |next| { + next.prev = atom.prev; } - if (block_placement) |big_block| { - text_block.prev = big_block; - text_block.next = big_block.next; - big_block.next = text_block; + if (atom_placement) |big_atom| { + atom.prev = big_atom; + atom.next = big_atom.next; + big_atom.next = atom; } else { - text_block.prev = null; - text_block.next = null; + atom.prev = null; + atom.next = null; } if (free_list_removal) |i| { - _ = self.text_block_free_list.swapRemove(i); + _ = free_list.swapRemove(i); } return vaddr; @@ -4610,34 +4239,23 @@ pub fn addExternFn(self: *MachO, name: []const u8) !u32 { return resolv.where_index; } - log.debug("adding new extern function '{s}' with dylib ordinal 1", .{sym_name}); - const import_sym_index = @intCast(u32, self.undefs.items.len); + log.debug("adding new extern function '{s}'", .{sym_name}); + const sym_index = @intCast(u32, self.undefs.items.len); const n_strx = try self.makeString(sym_name); try self.undefs.append(self.base.allocator, .{ .n_strx = n_strx, - .n_type = macho.N_UNDF | macho.N_EXT, + .n_type = macho.N_UNDF, .n_sect = 0, - .n_desc = @intCast(u8, 1) * macho.N_SYMBOL_RESOLVER, + .n_desc = 0, .n_value = 0, }); try self.symbol_resolver.putNoClobber(self.base.allocator, n_strx, .{ .where = .undef, - .where_index = import_sym_index, + .where_index = sym_index, }); + try self.unresolved.putNoClobber(self.base.allocator, sym_index, .stub); - const stubs_index = @intCast(u32, self.stubs.items.len); - try self.stubs.append(self.base.allocator, import_sym_index); - try self.stubs_map.putNoClobber(self.base.allocator, import_sym_index, stubs_index); - - // TODO discuss this. The caller context expects codegen.InnerError{ OutOfMemory, CodegenFail }, - // which obviously doesn't include file writing op errors. So instead of trying to write the stub - // entry right here and now, queue it up and dispose of when updating decl. - try self.pending_updates.append(self.base.allocator, .{ - .kind = .stub, - .index = stubs_index, - }); - - return import_sym_index; + return sym_index; } const NextSegmentAddressAndOffset = struct { @@ -4661,593 +4279,294 @@ fn nextSegmentAddressAndOffset(self: *MachO) NextSegmentAddressAndOffset { }; } -fn allocatedSizeLinkedit(self: *MachO, start: u64) u64 { - assert(start > 0); - var min_pos: u64 = std.math.maxInt(u64); +fn updateSectionOrdinals(self: *MachO) !void { + if (!self.sections_order_dirty) return; - // __LINKEDIT is a weird segment where sections get their own load commands so we - // special-case it. - if (self.dyld_info_cmd_index) |idx| { - const dyld_info = self.load_commands.items[idx].DyldInfoOnly; - if (dyld_info.rebase_off > start and dyld_info.rebase_off < min_pos) min_pos = dyld_info.rebase_off; - if (dyld_info.bind_off > start and dyld_info.bind_off < min_pos) min_pos = dyld_info.bind_off; - if (dyld_info.weak_bind_off > start and dyld_info.weak_bind_off < min_pos) min_pos = dyld_info.weak_bind_off; - if (dyld_info.lazy_bind_off > start and dyld_info.lazy_bind_off < min_pos) min_pos = dyld_info.lazy_bind_off; - if (dyld_info.export_off > start and dyld_info.export_off < min_pos) min_pos = dyld_info.export_off; - } + const tracy = trace(@src()); + defer tracy.end(); - if (self.function_starts_cmd_index) |idx| { - const fstart = self.load_commands.items[idx].LinkeditData; - if (fstart.dataoff > start and fstart.dataoff < min_pos) min_pos = fstart.dataoff; - } + var ordinal_remap = std.AutoHashMap(u8, u8).init(self.base.allocator); + defer ordinal_remap.deinit(); + var ordinals: std.AutoArrayHashMapUnmanaged(MatchingSection, void) = .{}; - if (self.data_in_code_cmd_index) |idx| { - const dic = self.load_commands.items[idx].LinkeditData; - if (dic.dataoff > start and dic.dataoff < min_pos) min_pos = dic.dataoff; - } + var new_ordinal: u8 = 0; + for (self.load_commands.items) |lc, lc_id| { + if (lc != .Segment) break; - if (self.dysymtab_cmd_index) |idx| { - const dysymtab = self.load_commands.items[idx].Dysymtab; - if (dysymtab.indirectsymoff > start and dysymtab.indirectsymoff < min_pos) min_pos = dysymtab.indirectsymoff; - // TODO Handle more dynamic symbol table sections. - } - - if (self.symtab_cmd_index) |idx| { - const symtab = self.load_commands.items[idx].Symtab; - if (symtab.symoff > start and symtab.symoff < min_pos) min_pos = symtab.symoff; - if (symtab.stroff > start and symtab.stroff < min_pos) min_pos = symtab.stroff; - } - - return min_pos - start; -} -inline fn checkForCollision(start: u64, end: u64, off: u64, size: u64) ?u64 { - const increased_size = padToIdeal(size); - const test_end = off + increased_size; - if (end > off and start < test_end) { - return test_end; - } - return null; -} - -fn detectAllocCollisionLinkedit(self: *MachO, start: u64, size: u64) ?u64 { - const end = start + padToIdeal(size); - - // __LINKEDIT is a weird segment where sections get their own load commands so we - // special-case it. - if (self.dyld_info_cmd_index) |idx| outer: { - if (self.load_commands.items.len == idx) break :outer; - const dyld_info = self.load_commands.items[idx].DyldInfoOnly; - if (checkForCollision(start, end, dyld_info.rebase_off, dyld_info.rebase_size)) |pos| { - return pos; - } - // Binding info - if (checkForCollision(start, end, dyld_info.bind_off, dyld_info.bind_size)) |pos| { - return pos; - } - // Weak binding info - if (checkForCollision(start, end, dyld_info.weak_bind_off, dyld_info.weak_bind_size)) |pos| { - return pos; - } - // Lazy binding info - if (checkForCollision(start, end, dyld_info.lazy_bind_off, dyld_info.lazy_bind_size)) |pos| { - return pos; - } - // Export info - if (checkForCollision(start, end, dyld_info.export_off, dyld_info.export_size)) |pos| { - return pos; - } - } - - if (self.function_starts_cmd_index) |idx| outer: { - if (self.load_commands.items.len == idx) break :outer; - const fstart = self.load_commands.items[idx].LinkeditData; - if (checkForCollision(start, end, fstart.dataoff, fstart.datasize)) |pos| { - return pos; - } - } - - if (self.data_in_code_cmd_index) |idx| outer: { - if (self.load_commands.items.len == idx) break :outer; - const dic = self.load_commands.items[idx].LinkeditData; - if (checkForCollision(start, end, dic.dataoff, dic.datasize)) |pos| { - return pos; - } - } - - if (self.dysymtab_cmd_index) |idx| outer: { - if (self.load_commands.items.len == idx) break :outer; - const dysymtab = self.load_commands.items[idx].Dysymtab; - // Indirect symbol table - const nindirectsize = dysymtab.nindirectsyms * @sizeOf(u32); - if (checkForCollision(start, end, dysymtab.indirectsymoff, nindirectsize)) |pos| { - return pos; - } - // TODO Handle more dynamic symbol table sections. - } - - if (self.symtab_cmd_index) |idx| outer: { - if (self.load_commands.items.len == idx) break :outer; - const symtab = self.load_commands.items[idx].Symtab; - // Symbol table - const symsize = symtab.nsyms * @sizeOf(macho.nlist_64); - if (checkForCollision(start, end, symtab.symoff, symsize)) |pos| { - return pos; - } - // String table - if (checkForCollision(start, end, symtab.stroff, symtab.strsize)) |pos| { - return pos; - } - } - - return null; -} - -fn findFreeSpaceLinkedit(self: *MachO, object_size: u64, min_alignment: u16, start: ?u64) u64 { - const linkedit = self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; - var st: u64 = start orelse linkedit.inner.fileoff; - while (self.detectAllocCollisionLinkedit(st, object_size)) |item_end| { - st = mem.alignForwardGeneric(u64, item_end, min_alignment); - } - return st; -} - -fn writeGotEntry(self: *MachO, index: usize) !void { - const seg = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; - const sect = &seg.sections.items[self.got_section_index.?]; - const off = sect.offset + @sizeOf(u64) * index; - - if (self.got_entries_count_dirty) { - // TODO relocate. - self.got_entries_count_dirty = false; - } - - const got_entry = self.got_entries.items[index]; - const sym = switch (got_entry.where) { - .local => self.locals.items[got_entry.where_index], - .undef => self.undefs.items[got_entry.where_index], - }; - log.debug("writing offset table entry [ 0x{x} => 0x{x} ({s}) ]", .{ - off, - sym.n_value, - self.getString(sym.n_strx), - }); - try self.base.file.?.pwriteAll(mem.asBytes(&sym.n_value), off); -} - -fn writeLazySymbolPointer(self: *MachO, index: u32) !void { - const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment; - const stub_helper = text_segment.sections.items[self.stub_helper_section_index.?]; - const data_segment = self.load_commands.items[self.data_segment_cmd_index.?].Segment; - const la_symbol_ptr = data_segment.sections.items[self.la_symbol_ptr_section_index.?]; - - const stub_size: u4 = switch (self.base.options.target.cpu.arch) { - .x86_64 => 10, - .aarch64 => 3 * @sizeOf(u32), - else => unreachable, - }; - const stub_off = self.stub_helper_stubs_start_off.? + index * stub_size; - const end = stub_helper.addr + stub_off - stub_helper.offset; - var buf: [@sizeOf(u64)]u8 = undefined; - mem.writeIntLittle(u64, &buf, end); - const off = la_symbol_ptr.offset + index * @sizeOf(u64); - log.debug("writing lazy symbol pointer entry 0x{x} at 0x{x}", .{ end, off }); - try self.base.file.?.pwriteAll(&buf, off); -} - -fn writeStubHelperPreamble(self: *MachO) !void { - const text_segment = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; - const stub_helper = &text_segment.sections.items[self.stub_helper_section_index.?]; - const data_const_segment = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; - const got = &data_const_segment.sections.items[self.got_section_index.?]; - const data_segment = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; - const data = &data_segment.sections.items[self.data_section_index.?]; - - switch (self.base.options.target.cpu.arch) { - .x86_64 => { - const code_size = 15; - var code: [code_size]u8 = undefined; - // lea %r11, [rip + disp] - code[0] = 0x4c; - code[1] = 0x8d; - code[2] = 0x1d; - { - const target_addr = data.addr; - const displacement = try math.cast(u32, target_addr - stub_helper.addr - 7); - mem.writeIntLittle(u32, code[3..7], displacement); - } - // push %r11 - code[7] = 0x41; - code[8] = 0x53; - // jmp [rip + disp] - code[9] = 0xff; - code[10] = 0x25; - { - const displacement = try math.cast(u32, got.addr - stub_helper.addr - code_size); - mem.writeIntLittle(u32, code[11..], displacement); - } - try self.base.file.?.pwriteAll(&code, stub_helper.offset); - self.stub_helper_stubs_start_off = stub_helper.offset + code_size; - }, - .aarch64 => { - var code: [6 * @sizeOf(u32)]u8 = undefined; - - data_blk_outer: { - const this_addr = stub_helper.addr; - const target_addr = data.addr; - data_blk: { - const displacement = math.cast(i21, target_addr - this_addr) catch break :data_blk; - // adr x17, disp - mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.adr(.x17, displacement).toU32()); - // nop - mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.nop().toU32()); - break :data_blk_outer; - } - data_blk: { - const new_this_addr = this_addr + @sizeOf(u32); - const displacement = math.cast(i21, target_addr - new_this_addr) catch break :data_blk; - // nop - mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.nop().toU32()); - // adr x17, disp - mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.adr(.x17, displacement).toU32()); - break :data_blk_outer; - } - // Jump is too big, replace adr with adrp and add. - const this_page = @intCast(i32, this_addr >> 12); - const target_page = @intCast(i32, target_addr >> 12); - const pages = @intCast(i21, target_page - this_page); - // adrp x17, pages - mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.adrp(.x17, pages).toU32()); - const narrowed = @truncate(u12, target_addr); - mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.add(.x17, .x17, narrowed, false).toU32()); - } - - // stp x16, x17, [sp, #-16]! - mem.writeIntLittle(u32, code[8..12], aarch64.Instruction.stp( - .x16, - .x17, - aarch64.Register.sp, - aarch64.Instruction.LoadStorePairOffset.pre_index(-16), - ).toU32()); - - binder_blk_outer: { - const this_addr = stub_helper.addr + 3 * @sizeOf(u32); - const target_addr = got.addr; - binder_blk: { - const displacement = math.divExact(u64, target_addr - this_addr, 4) catch break :binder_blk; - const literal = math.cast(u18, displacement) catch break :binder_blk; - // ldr x16, label - mem.writeIntLittle(u32, code[12..16], aarch64.Instruction.ldr(.x16, .{ - .literal = literal, - }).toU32()); - // nop - mem.writeIntLittle(u32, code[16..20], aarch64.Instruction.nop().toU32()); - break :binder_blk_outer; - } - binder_blk: { - const new_this_addr = this_addr + @sizeOf(u32); - const displacement = math.divExact(u64, target_addr - new_this_addr, 4) catch break :binder_blk; - const literal = math.cast(u18, displacement) catch break :binder_blk; - // nop - mem.writeIntLittle(u32, code[12..16], aarch64.Instruction.nop().toU32()); - // ldr x16, label - mem.writeIntLittle(u32, code[16..20], aarch64.Instruction.ldr(.x16, .{ - .literal = literal, - }).toU32()); - break :binder_blk_outer; - } - // Jump is too big, replace ldr with adrp and ldr(register). - const this_page = @intCast(i32, this_addr >> 12); - const target_page = @intCast(i32, target_addr >> 12); - const pages = @intCast(i21, target_page - this_page); - // adrp x16, pages - mem.writeIntLittle(u32, code[12..16], aarch64.Instruction.adrp(.x16, pages).toU32()); - const narrowed = @truncate(u12, target_addr); - const offset = try math.divExact(u12, narrowed, 8); - // ldr x16, x16, offset - mem.writeIntLittle(u32, code[16..20], aarch64.Instruction.ldr(.x16, .{ - .register = .{ - .rn = .x16, - .offset = aarch64.Instruction.LoadStoreOffset.imm(offset), - }, - }).toU32()); - } - - // br x16 - mem.writeIntLittle(u32, code[20..24], aarch64.Instruction.br(.x16).toU32()); - try self.base.file.?.pwriteAll(&code, stub_helper.offset); - self.stub_helper_stubs_start_off = stub_helper.offset + code.len; - }, - else => unreachable, - } -} - -fn writeStub(self: *MachO, index: u32) !void { - const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment; - const stubs = text_segment.sections.items[self.stubs_section_index.?]; - const data_segment = self.load_commands.items[self.data_segment_cmd_index.?].Segment; - const la_symbol_ptr = data_segment.sections.items[self.la_symbol_ptr_section_index.?]; - - const stub_off = stubs.offset + index * stubs.reserved2; - const stub_addr = stubs.addr + index * stubs.reserved2; - const la_ptr_addr = la_symbol_ptr.addr + index * @sizeOf(u64); - - log.debug("writing stub at 0x{x}", .{stub_off}); - - var code = try self.base.allocator.alloc(u8, stubs.reserved2); - defer self.base.allocator.free(code); - - switch (self.base.options.target.cpu.arch) { - .x86_64 => { - assert(la_ptr_addr >= stub_addr + stubs.reserved2); - const displacement = try math.cast(u32, la_ptr_addr - stub_addr - stubs.reserved2); - // jmp - code[0] = 0xff; - code[1] = 0x25; - mem.writeIntLittle(u32, code[2..][0..4], displacement); - }, - .aarch64 => { - assert(la_ptr_addr >= stub_addr); - outer: { - const this_addr = stub_addr; - const target_addr = la_ptr_addr; - inner: { - const displacement = math.divExact(u64, target_addr - this_addr, 4) catch break :inner; - const literal = math.cast(u18, displacement) catch break :inner; - // ldr x16, literal - mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.ldr(.x16, .{ - .literal = literal, - }).toU32()); - // nop - mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.nop().toU32()); - break :outer; - } - inner: { - const new_this_addr = this_addr + @sizeOf(u32); - const displacement = math.divExact(u64, target_addr - new_this_addr, 4) catch break :inner; - const literal = math.cast(u18, displacement) catch break :inner; - // nop - mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.nop().toU32()); - // ldr x16, literal - mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.ldr(.x16, .{ - .literal = literal, - }).toU32()); - break :outer; - } - // Use adrp followed by ldr(register). - const this_page = @intCast(i32, this_addr >> 12); - const target_page = @intCast(i32, target_addr >> 12); - const pages = @intCast(i21, target_page - this_page); - // adrp x16, pages - mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.adrp(.x16, pages).toU32()); - const narrowed = @truncate(u12, target_addr); - const offset = try math.divExact(u12, narrowed, 8); - // ldr x16, x16, offset - mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.ldr(.x16, .{ - .register = .{ - .rn = .x16, - .offset = aarch64.Instruction.LoadStoreOffset.imm(offset), - }, - }).toU32()); - } - // br x16 - mem.writeIntLittle(u32, code[8..12], aarch64.Instruction.br(.x16).toU32()); - }, - else => unreachable, - } - try self.base.file.?.pwriteAll(code, stub_off); -} - -fn writeStubInStubHelper(self: *MachO, index: u32) !void { - const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment; - const stub_helper = text_segment.sections.items[self.stub_helper_section_index.?]; - - const stub_size: u4 = switch (self.base.options.target.cpu.arch) { - .x86_64 => 10, - .aarch64 => 3 * @sizeOf(u32), - else => unreachable, - }; - const stub_off = self.stub_helper_stubs_start_off.? + index * stub_size; - - var code = try self.base.allocator.alloc(u8, stub_size); - defer self.base.allocator.free(code); - - switch (self.base.options.target.cpu.arch) { - .x86_64 => { - const displacement = try math.cast( - i32, - @intCast(i64, stub_helper.offset) - @intCast(i64, stub_off) - stub_size, - ); - // pushq - code[0] = 0x68; - mem.writeIntLittle(u32, code[1..][0..4], 0x0); // Just a placeholder populated in `populateLazyBindOffsetsInStubHelper`. - // jmpq - code[5] = 0xe9; - mem.writeIntLittle(u32, code[6..][0..4], @bitCast(u32, displacement)); - }, - .aarch64 => { - const literal = blk: { - const div_res = try math.divExact(u64, stub_size - @sizeOf(u32), 4); - break :blk try math.cast(u18, div_res); + for (lc.Segment.sections.items) |_, sect_id| { + const match = MatchingSection{ + .seg = @intCast(u16, lc_id), + .sect = @intCast(u16, sect_id), }; - // ldr w16, literal - mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.ldr(.w16, .{ - .literal = literal, - }).toU32()); - const displacement = try math.cast(i28, @intCast(i64, stub_helper.offset) - @intCast(i64, stub_off) - 4); - // b disp - mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.b(displacement).toU32()); - // Just a placeholder populated in `populateLazyBindOffsetsInStubHelper`. - mem.writeIntLittle(u32, code[8..12], 0x0); - }, - else => unreachable, - } - try self.base.file.?.pwriteAll(code, stub_off); -} - -fn relocateSymbolTable(self: *MachO) !void { - const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab; - const nlocals = self.locals.items.len; - const nglobals = self.globals.items.len; - const nundefs = self.undefs.items.len; - const nsyms = nlocals + nglobals + nundefs; - - if (symtab.nsyms < nsyms) { - const needed_size = nsyms * @sizeOf(macho.nlist_64); - if (needed_size > self.allocatedSizeLinkedit(symtab.symoff)) { - // Move the entire symbol table to a new location - const new_symoff = self.findFreeSpaceLinkedit(needed_size, @alignOf(macho.nlist_64), null); - const existing_size = symtab.nsyms * @sizeOf(macho.nlist_64); - - log.debug("relocating symbol table from 0x{x}-0x{x} to 0x{x}-0x{x}", .{ - symtab.symoff, - symtab.symoff + existing_size, - new_symoff, - new_symoff + existing_size, - }); - - const amt = try self.base.file.?.copyRangeAll(symtab.symoff, self.base.file.?, new_symoff, existing_size); - if (amt != existing_size) return error.InputOutput; - symtab.symoff = @intCast(u32, new_symoff); - self.strtab_needs_relocation = true; + const old_ordinal = @intCast(u8, self.section_ordinals.getIndex(match).? + 1); + new_ordinal += 1; + try ordinal_remap.putNoClobber(old_ordinal, new_ordinal); + try ordinals.putNoClobber(self.base.allocator, match, {}); } - symtab.nsyms = @intCast(u32, nsyms); - self.load_commands_dirty = true; } -} -fn writeLocalSymbol(self: *MachO, index: usize) !void { - const tracy = trace(@src()); - defer tracy.end(); - try self.relocateSymbolTable(); - const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab; - const off = symtab.symoff + @sizeOf(macho.nlist_64) * index; - log.debug("writing local symbol {} at 0x{x}", .{ index, off }); - try self.base.file.?.pwriteAll(mem.asBytes(&self.locals.items[index]), off); -} - -fn writeAllGlobalAndUndefSymbols(self: *MachO) !void { - const tracy = trace(@src()); - defer tracy.end(); - - try self.relocateSymbolTable(); - const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab; - const nlocals = self.locals.items.len; - const nglobals = self.globals.items.len; - const nundefs = self.undefs.items.len; - - const locals_off = symtab.symoff; - const locals_size = nlocals * @sizeOf(macho.nlist_64); - - const globals_off = locals_off + locals_size; - const globals_size = nglobals * @sizeOf(macho.nlist_64); - log.debug("writing global symbols from 0x{x} to 0x{x}", .{ globals_off, globals_size + globals_off }); - try self.base.file.?.pwriteAll(mem.sliceAsBytes(self.globals.items), globals_off); - - const undefs_off = globals_off + globals_size; - const undefs_size = nundefs * @sizeOf(macho.nlist_64); - log.debug("writing extern symbols from 0x{x} to 0x{x}", .{ undefs_off, undefs_size + undefs_off }); - try self.base.file.?.pwriteAll(mem.sliceAsBytes(self.undefs.items), undefs_off); - - // Update dynamic symbol table. - const dysymtab = &self.load_commands.items[self.dysymtab_cmd_index.?].Dysymtab; - dysymtab.nlocalsym = @intCast(u32, nlocals); - dysymtab.iextdefsym = @intCast(u32, nlocals); - dysymtab.nextdefsym = @intCast(u32, nglobals); - dysymtab.iundefsym = @intCast(u32, nlocals + nglobals); - dysymtab.nundefsym = @intCast(u32, nundefs); - self.load_commands_dirty = true; -} - -fn writeIndirectSymbolTable(self: *MachO) !void { - // TODO figure out a way not to rewrite the table every time if - // no new undefs are not added. - const tracy = trace(@src()); - defer tracy.end(); - - const text_segment = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; - const stubs = &text_segment.sections.items[self.stubs_section_index.?]; - const data_const_seg = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; - const got = &data_const_seg.sections.items[self.got_section_index.?]; - const data_segment = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; - const la_symbol_ptr = &data_segment.sections.items[self.la_symbol_ptr_section_index.?]; - const dysymtab = &self.load_commands.items[self.dysymtab_cmd_index.?].Dysymtab; - - const nstubs = @intCast(u32, self.stubs.items.len); - const ngot_entries = @intCast(u32, self.got_entries.items.len); - const allocated_size = self.allocatedSizeLinkedit(dysymtab.indirectsymoff); - const nindirectsyms = nstubs * 2 + ngot_entries; - const needed_size = @intCast(u32, nindirectsyms * @sizeOf(u32)); - - if (needed_size > allocated_size) { - dysymtab.nindirectsyms = 0; - dysymtab.indirectsymoff = @intCast(u32, self.findFreeSpaceLinkedit(needed_size, @sizeOf(u32), null)); + for (self.locals.items) |*sym| { + if (sym.n_sect == 0) continue; + sym.n_sect = ordinal_remap.get(sym.n_sect).?; } - dysymtab.nindirectsyms = nindirectsyms; - log.debug("writing indirect symbol table from 0x{x} to 0x{x}", .{ - dysymtab.indirectsymoff, - dysymtab.indirectsymoff + needed_size, + for (self.globals.items) |*sym| { + sym.n_sect = ordinal_remap.get(sym.n_sect).?; + } + + self.section_ordinals.deinit(self.base.allocator); + self.section_ordinals = ordinals; +} + +fn writeDyldInfoData(self: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + + var rebase_pointers = std.ArrayList(bind.Pointer).init(self.base.allocator); + defer rebase_pointers.deinit(); + var bind_pointers = std.ArrayList(bind.Pointer).init(self.base.allocator); + defer bind_pointers.deinit(); + var lazy_bind_pointers = std.ArrayList(bind.Pointer).init(self.base.allocator); + defer lazy_bind_pointers.deinit(); + + { + var it = self.atoms.iterator(); + while (it.next()) |entry| { + const match = entry.key_ptr.*; + var atom: *Atom = entry.value_ptr.*; + + if (match.seg == self.text_segment_cmd_index.?) continue; // __TEXT is non-writable + + const seg = self.load_commands.items[match.seg].Segment; + + while (true) { + const sym = self.locals.items[atom.local_sym_index]; + const base_offset = sym.n_value - seg.inner.vmaddr; + + for (atom.rebases.items) |offset| { + try rebase_pointers.append(.{ + .offset = base_offset + offset, + .segment_id = match.seg, + }); + } + + for (atom.bindings.items) |binding| { + const bind_sym = self.undefs.items[binding.local_sym_index]; + try bind_pointers.append(.{ + .offset = binding.offset + base_offset, + .segment_id = match.seg, + .dylib_ordinal = @divExact(bind_sym.n_desc, macho.N_SYMBOL_RESOLVER), + .name = self.getString(bind_sym.n_strx), + }); + } + + for (atom.lazy_bindings.items) |binding| { + const bind_sym = self.undefs.items[binding.local_sym_index]; + try lazy_bind_pointers.append(.{ + .offset = binding.offset + base_offset, + .segment_id = match.seg, + .dylib_ordinal = @divExact(bind_sym.n_desc, macho.N_SYMBOL_RESOLVER), + .name = self.getString(bind_sym.n_strx), + }); + } + + if (atom.prev) |prev| { + atom = prev; + } else break; + } + } + } + + var trie: Trie = .{}; + defer trie.deinit(self.base.allocator); + + { + // TODO handle macho.EXPORT_SYMBOL_FLAGS_REEXPORT and macho.EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER. + log.debug("generating export trie", .{}); + const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment; + const base_address = text_segment.inner.vmaddr; + + for (self.globals.items) |sym| { + const sym_name = self.getString(sym.n_strx); + log.debug(" (putting '{s}' defined at 0x{x})", .{ sym_name, sym.n_value }); + + try trie.put(self.base.allocator, .{ + .name = sym_name, + .vmaddr_offset = sym.n_value - base_address, + .export_flags = macho.EXPORT_SYMBOL_FLAGS_KIND_REGULAR, + }); + } + + try trie.finalize(self.base.allocator); + } + + const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; + const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly; + const rebase_size = try bind.rebaseInfoSize(rebase_pointers.items); + const bind_size = try bind.bindInfoSize(bind_pointers.items); + const lazy_bind_size = try bind.lazyBindInfoSize(lazy_bind_pointers.items); + const export_size = trie.size; + + dyld_info.rebase_off = @intCast(u32, seg.inner.fileoff); + dyld_info.rebase_size = @intCast(u32, mem.alignForwardGeneric(u64, rebase_size, @alignOf(u64))); + seg.inner.filesize += dyld_info.rebase_size; + + dyld_info.bind_off = dyld_info.rebase_off + dyld_info.rebase_size; + dyld_info.bind_size = @intCast(u32, mem.alignForwardGeneric(u64, bind_size, @alignOf(u64))); + seg.inner.filesize += dyld_info.bind_size; + + dyld_info.lazy_bind_off = dyld_info.bind_off + dyld_info.bind_size; + dyld_info.lazy_bind_size = @intCast(u32, mem.alignForwardGeneric(u64, lazy_bind_size, @alignOf(u64))); + seg.inner.filesize += dyld_info.lazy_bind_size; + + dyld_info.export_off = dyld_info.lazy_bind_off + dyld_info.lazy_bind_size; + dyld_info.export_size = @intCast(u32, mem.alignForwardGeneric(u64, export_size, @alignOf(u64))); + seg.inner.filesize += dyld_info.export_size; + + const needed_size = dyld_info.rebase_size + dyld_info.bind_size + dyld_info.lazy_bind_size + dyld_info.export_size; + var buffer = try self.base.allocator.alloc(u8, needed_size); + defer self.base.allocator.free(buffer); + mem.set(u8, buffer, 0); + + var stream = std.io.fixedBufferStream(buffer); + const writer = stream.writer(); + + try bind.writeRebaseInfo(rebase_pointers.items, writer); + try stream.seekBy(@intCast(i64, dyld_info.rebase_size) - @intCast(i64, rebase_size)); + + try bind.writeBindInfo(bind_pointers.items, writer); + try stream.seekBy(@intCast(i64, dyld_info.bind_size) - @intCast(i64, bind_size)); + + try bind.writeLazyBindInfo(lazy_bind_pointers.items, writer); + try stream.seekBy(@intCast(i64, dyld_info.lazy_bind_size) - @intCast(i64, lazy_bind_size)); + + _ = try trie.write(writer); + + log.debug("writing dyld info from 0x{x} to 0x{x}", .{ + dyld_info.rebase_off, + dyld_info.rebase_off + needed_size, }); - var buf = try self.base.allocator.alloc(u8, needed_size); - defer self.base.allocator.free(buf); - var stream = std.io.fixedBufferStream(buf); - var writer = stream.writer(); + try self.base.file.?.pwriteAll(buffer, dyld_info.rebase_off); + try self.populateLazyBindOffsetsInStubHelper( + buffer[dyld_info.rebase_size + dyld_info.bind_size ..][0..dyld_info.lazy_bind_size], + ); + self.load_commands_dirty = true; +} - stubs.reserved1 = 0; - for (self.stubs.items) |id| { - try writer.writeIntLittle(u32, dysymtab.iundefsym + id); +fn populateLazyBindOffsetsInStubHelper(self: *MachO, buffer: []const u8) !void { + const last_atom = self.atoms.get(.{ + .seg = self.text_segment_cmd_index.?, + .sect = self.stub_helper_section_index.?, + }) orelse return; + if (last_atom == self.stub_helper_preamble_atom.?) return; + + // Because we insert lazy binding opcodes in reverse order (from last to the first atom), + // we need reverse the order of atom traversal here as well. + // TODO figure out a less error prone mechanims for this! + var atom = last_atom; + while (atom.prev) |prev| { + atom = prev; } + atom = atom.next.?; - got.reserved1 = nstubs; - for (self.got_entries.items) |entry| { - switch (entry.where) { - .undef => { - try writer.writeIntLittle(u32, dysymtab.iundefsym + entry.where_index); + var stream = std.io.fixedBufferStream(buffer); + var reader = stream.reader(); + var offsets = std.ArrayList(u32).init(self.base.allocator); + try offsets.append(0); + defer offsets.deinit(); + var valid_block = false; + + while (true) { + const inst = reader.readByte() catch |err| switch (err) { + error.EndOfStream => break, + else => return err, + }; + const opcode: u8 = inst & macho.BIND_OPCODE_MASK; + + switch (opcode) { + macho.BIND_OPCODE_DO_BIND => { + valid_block = true; }, - .local => { - try writer.writeIntLittle(u32, macho.INDIRECT_SYMBOL_LOCAL); + macho.BIND_OPCODE_DONE => { + if (valid_block) { + const offset = try stream.getPos(); + try offsets.append(@intCast(u32, offset)); + } + valid_block = false; }, + macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM => { + var next = try reader.readByte(); + while (next != @as(u8, 0)) { + next = try reader.readByte(); + } + }, + macho.BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB => { + _ = try std.leb.readULEB128(u64, reader); + }, + macho.BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB => { + _ = try std.leb.readULEB128(u64, reader); + }, + macho.BIND_OPCODE_SET_ADDEND_SLEB => { + _ = try std.leb.readILEB128(i64, reader); + }, + else => {}, } } - la_symbol_ptr.reserved1 = got.reserved1 + ngot_entries; - for (self.stubs.items) |id| { - try writer.writeIntLittle(u32, dysymtab.iundefsym + id); - } + const seg = self.load_commands.items[self.text_segment_cmd_index.?].Segment; + const sect = seg.sections.items[self.stub_helper_section_index.?]; + const stub_offset: u4 = switch (self.base.options.target.cpu.arch) { + .x86_64 => 1, + .aarch64 => 2 * @sizeOf(u32), + else => unreachable, + }; + var buf: [@sizeOf(u32)]u8 = undefined; + _ = offsets.pop(); + while (offsets.popOrNull()) |bind_offset| { + const sym = self.locals.items[atom.local_sym_index]; + const file_offset = sect.offset + sym.n_value - sect.addr + stub_offset; + mem.writeIntLittle(u32, &buf, bind_offset); + log.debug("writing lazy bind offset in stub helper of 0x{x} for symbol {s} at offset 0x{x}", .{ + bind_offset, + self.getString(sym.n_strx), + file_offset, + }); + try self.base.file.?.pwriteAll(&buf, file_offset); - try self.base.file.?.pwriteAll(buf, dysymtab.indirectsymoff); - self.load_commands_dirty = true; + if (atom.next) |next| { + atom = next; + } else break; + } } fn writeDices(self: *MachO) !void { if (!self.has_dices) return; - const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; - const dice_cmd = &self.load_commands.items[self.data_in_code_cmd_index.?].LinkeditData; - const fileoff = seg.inner.fileoff + seg.inner.filesize; + const tracy = trace(@src()); + defer tracy.end(); var buf = std.ArrayList(u8).init(self.base.allocator); defer buf.deinit(); - var block: *TextBlock = self.blocks.get(.{ + var atom: *Atom = self.atoms.get(.{ .seg = self.text_segment_cmd_index orelse return, .sect = self.text_section_index orelse return, }) orelse return; - while (block.prev) |prev| { - block = prev; + while (atom.prev) |prev| { + atom = prev; } const text_seg = self.load_commands.items[self.text_segment_cmd_index.?].Segment; const text_sect = text_seg.sections.items[self.text_section_index.?]; while (true) { - if (block.dices.items.len > 0) { - const sym = self.locals.items[block.local_sym_index]; + if (atom.dices.items.len > 0) { + const sym = self.locals.items[atom.local_sym_index]; const base_off = try math.cast(u32, sym.n_value - text_sect.addr + text_sect.offset); - try buf.ensureUnusedCapacity(block.dices.items.len * @sizeOf(macho.data_in_code_entry)); - for (block.dices.items) |dice| { + try buf.ensureUnusedCapacity(atom.dices.items.len * @sizeOf(macho.data_in_code_entry)); + for (atom.dices.items) |dice| { const rebased_dice = macho.data_in_code_entry{ .offset = base_off + dice.offset, .length = dice.length, @@ -5257,24 +4576,214 @@ fn writeDices(self: *MachO) !void { } } - if (block.next) |next| { - block = next; + if (atom.next) |next| { + atom = next; } else break; } - const datasize = @intCast(u32, buf.items.len); + const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; + const dice_cmd = &self.load_commands.items[self.data_in_code_cmd_index.?].LinkeditData; + const needed_size = @intCast(u32, buf.items.len); - dice_cmd.dataoff = @intCast(u32, fileoff); - dice_cmd.datasize = datasize; - seg.inner.filesize += datasize; + dice_cmd.dataoff = @intCast(u32, seg.inner.fileoff + seg.inner.filesize); + dice_cmd.datasize = needed_size; + seg.inner.filesize += needed_size; - log.debug("writing data-in-code from 0x{x} to 0x{x}", .{ fileoff, fileoff + datasize }); + log.debug("writing data-in-code from 0x{x} to 0x{x}", .{ + dice_cmd.dataoff, + dice_cmd.dataoff + dice_cmd.datasize, + }); - try self.base.file.?.pwriteAll(buf.items, fileoff); + try self.base.file.?.pwriteAll(buf.items, dice_cmd.dataoff); + self.load_commands_dirty = true; +} + +fn writeSymbolTable(self: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + + const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; + const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab; + symtab.symoff = @intCast(u32, seg.inner.fileoff + seg.inner.filesize); + + var locals = std.ArrayList(macho.nlist_64).init(self.base.allocator); + defer locals.deinit(); + try locals.appendSlice(self.locals.items); + + if (self.has_stabs) { + for (self.objects.items) |object| { + if (object.debug_info == null) continue; + + // Open scope + try locals.ensureUnusedCapacity(3); + locals.appendAssumeCapacity(.{ + .n_strx = try self.makeString(object.tu_comp_dir.?), + .n_type = macho.N_SO, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }); + locals.appendAssumeCapacity(.{ + .n_strx = try self.makeString(object.tu_name.?), + .n_type = macho.N_SO, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }); + locals.appendAssumeCapacity(.{ + .n_strx = try self.makeString(object.name), + .n_type = macho.N_OSO, + .n_sect = 0, + .n_desc = 1, + .n_value = object.mtime orelse 0, + }); + + for (object.atoms.items) |atom| { + if (atom.stab) |stab| { + const nlists = try stab.asNlists(atom.local_sym_index, self); + defer self.base.allocator.free(nlists); + try locals.appendSlice(nlists); + } else { + for (atom.contained.items) |sym_at_off| { + const stab = sym_at_off.stab orelse continue; + const nlists = try stab.asNlists(sym_at_off.local_sym_index, self); + defer self.base.allocator.free(nlists); + try locals.appendSlice(nlists); + } + } + } + + // Close scope + try locals.append(.{ + .n_strx = 0, + .n_type = macho.N_SO, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }); + } + } + + const nlocals = locals.items.len; + const nexports = self.globals.items.len; + const nundefs = self.undefs.items.len; + + const locals_off = symtab.symoff; + const locals_size = nlocals * @sizeOf(macho.nlist_64); + log.debug("writing local symbols from 0x{x} to 0x{x}", .{ locals_off, locals_size + locals_off }); + try self.base.file.?.pwriteAll(mem.sliceAsBytes(locals.items), locals_off); + + const exports_off = locals_off + locals_size; + const exports_size = nexports * @sizeOf(macho.nlist_64); + log.debug("writing exported symbols from 0x{x} to 0x{x}", .{ exports_off, exports_size + exports_off }); + try self.base.file.?.pwriteAll(mem.sliceAsBytes(self.globals.items), exports_off); + + const undefs_off = exports_off + exports_size; + const undefs_size = nundefs * @sizeOf(macho.nlist_64); + log.debug("writing undefined symbols from 0x{x} to 0x{x}", .{ undefs_off, undefs_size + undefs_off }); + try self.base.file.?.pwriteAll(mem.sliceAsBytes(self.undefs.items), undefs_off); + + symtab.nsyms = @intCast(u32, nlocals + nexports + nundefs); + seg.inner.filesize += locals_size + exports_size + undefs_size; + + // Update dynamic symbol table. + const dysymtab = &self.load_commands.items[self.dysymtab_cmd_index.?].Dysymtab; + dysymtab.nlocalsym = @intCast(u32, nlocals); + dysymtab.iextdefsym = dysymtab.nlocalsym; + dysymtab.nextdefsym = @intCast(u32, nexports); + dysymtab.iundefsym = dysymtab.nlocalsym + dysymtab.nextdefsym; + dysymtab.nundefsym = @intCast(u32, nundefs); + + const text_segment = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; + const stubs = &text_segment.sections.items[self.stubs_section_index.?]; + const data_const_segment = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; + const got = &data_const_segment.sections.items[self.got_section_index.?]; + const data_segment = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; + const la_symbol_ptr = &data_segment.sections.items[self.la_symbol_ptr_section_index.?]; + + const nstubs = @intCast(u32, self.stubs_map.keys().len); + const ngot_entries = @intCast(u32, self.got_entries_map.keys().len); + + dysymtab.indirectsymoff = @intCast(u32, seg.inner.fileoff + seg.inner.filesize); + dysymtab.nindirectsyms = nstubs * 2 + ngot_entries; + + const needed_size = dysymtab.nindirectsyms * @sizeOf(u32); + seg.inner.filesize += needed_size; + + log.debug("writing indirect symbol table from 0x{x} to 0x{x}", .{ + dysymtab.indirectsymoff, + dysymtab.indirectsymoff + needed_size, + }); + + var buf = try self.base.allocator.alloc(u8, needed_size); + defer self.base.allocator.free(buf); + + var stream = std.io.fixedBufferStream(buf); + var writer = stream.writer(); + + stubs.reserved1 = 0; + for (self.stubs_map.keys()) |key| { + try writer.writeIntLittle(u32, dysymtab.iundefsym + key); + } + + got.reserved1 = nstubs; + for (self.got_entries_map.keys()) |key| { + switch (key.where) { + .undef => { + try writer.writeIntLittle(u32, dysymtab.iundefsym + key.where_index); + }, + .local => { + try writer.writeIntLittle(u32, macho.INDIRECT_SYMBOL_LOCAL); + }, + } + } + + la_symbol_ptr.reserved1 = got.reserved1 + ngot_entries; + for (self.stubs_map.keys()) |key| { + try writer.writeIntLittle(u32, dysymtab.iundefsym + key); + } + + try self.base.file.?.pwriteAll(buf, dysymtab.indirectsymoff); + self.load_commands_dirty = true; +} + +fn writeStringTable(self: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + + const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; + const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab; + symtab.stroff = @intCast(u32, seg.inner.fileoff + seg.inner.filesize); + symtab.strsize = @intCast(u32, mem.alignForwardGeneric(u64, self.strtab.items.len, @alignOf(u64))); + seg.inner.filesize += symtab.strsize; + + log.debug("writing string table from 0x{x} to 0x{x}", .{ symtab.stroff, symtab.stroff + symtab.strsize }); + + try self.base.file.?.pwriteAll(self.strtab.items, symtab.stroff); + + if (symtab.strsize > self.strtab.items.len) { + // This is potentially the last section, so we need to pad it out. + try self.base.file.?.pwriteAll(&[_]u8{0}, seg.inner.fileoff + seg.inner.filesize - 1); + } + self.load_commands_dirty = true; +} + +fn writeLinkeditSegment(self: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + + const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; + seg.inner.filesize = 0; + + try self.writeDyldInfoData(); + try self.writeDices(); + try self.writeSymbolTable(); + try self.writeStringTable(); + + seg.inner.vmsize = mem.alignForwardGeneric(u64, seg.inner.filesize, self.page_size); } fn writeCodeSignaturePadding(self: *MachO) !void { - // TODO figure out how not to rewrite padding every single time. const tracy = trace(@src()); defer tracy.end(); @@ -5331,443 +4840,6 @@ fn writeCodeSignature(self: *MachO) !void { try self.base.file.?.pwriteAll(buffer, code_sig_cmd.dataoff); } -fn writeExportInfo(self: *MachO) !void { - if (!self.export_info_dirty) return; - if (self.globals.items.len == 0) return; - - const tracy = trace(@src()); - defer tracy.end(); - - var trie: Trie = .{}; - defer trie.deinit(self.base.allocator); - - const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment; - const base_address = text_segment.inner.vmaddr; - - // TODO handle macho.EXPORT_SYMBOL_FLAGS_REEXPORT and macho.EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER. - log.debug("writing export trie", .{}); - - for (self.globals.items) |sym| { - const sym_name = self.getString(sym.n_strx); - log.debug(" | putting '{s}' defined at 0x{x}", .{ sym_name, sym.n_value }); - - try trie.put(self.base.allocator, .{ - .name = sym_name, - .vmaddr_offset = sym.n_value - base_address, - .export_flags = macho.EXPORT_SYMBOL_FLAGS_KIND_REGULAR, - }); - } - try trie.finalize(self.base.allocator); - - var buffer = try self.base.allocator.alloc(u8, @intCast(usize, trie.size)); - defer self.base.allocator.free(buffer); - var stream = std.io.fixedBufferStream(buffer); - const nwritten = try trie.write(stream.writer()); - assert(nwritten == trie.size); - - const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly; - const allocated_size = self.allocatedSizeLinkedit(dyld_info.export_off); - const needed_size = mem.alignForwardGeneric(u64, buffer.len, @alignOf(u64)); - - if (needed_size > allocated_size) { - dyld_info.export_off = 0; - dyld_info.export_off = @intCast(u32, self.findFreeSpaceLinkedit(needed_size, 1, null)); - // TODO this might require relocating all following LC_DYLD_INFO_ONLY sections too. - } - dyld_info.export_size = @intCast(u32, needed_size); - log.debug("writing export info from 0x{x} to 0x{x}", .{ dyld_info.export_off, dyld_info.export_off + dyld_info.export_size }); - - try self.base.file.?.pwriteAll(buffer, dyld_info.export_off); - self.load_commands_dirty = true; - self.export_info_dirty = false; -} - -fn writeRebaseInfoTable(self: *MachO) !void { - if (!self.rebase_info_dirty) return; - - const tracy = trace(@src()); - defer tracy.end(); - - var pointers = std.ArrayList(bind.Pointer).init(self.base.allocator); - defer pointers.deinit(); - - { - var it = self.blocks.iterator(); - while (it.next()) |entry| { - const match = entry.key_ptr.*; - var block: *TextBlock = entry.value_ptr.*; - - if (match.seg == self.text_segment_cmd_index.?) continue; // __TEXT is non-writable - - const seg = self.load_commands.items[match.seg].Segment; - - while (true) { - const sym = self.locals.items[block.local_sym_index]; - const base_offset = sym.n_value - seg.inner.vmaddr; - - for (block.rebases.items) |offset| { - try pointers.append(.{ - .offset = base_offset + offset, - .segment_id = match.seg, - }); - } - - if (block.prev) |prev| { - block = prev; - } else break; - } - } - } - - if (self.got_section_index) |idx| { - const seg = self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; - const sect = seg.sections.items[idx]; - const base_offset = sect.addr - seg.inner.vmaddr; - const segment_id = @intCast(u16, self.data_const_segment_cmd_index.?); - - for (self.got_entries.items) |entry, i| { - if (entry.where == .undef) continue; - - try pointers.append(.{ - .offset = base_offset + i * @sizeOf(u64), - .segment_id = segment_id, - }); - } - } - - if (self.la_symbol_ptr_section_index) |idx| { - const seg = self.load_commands.items[self.data_segment_cmd_index.?].Segment; - const sect = seg.sections.items[idx]; - const base_offset = sect.addr - seg.inner.vmaddr; - const segment_id = @intCast(u16, self.data_segment_cmd_index.?); - - try pointers.ensureUnusedCapacity(self.stubs.items.len); - for (self.stubs.items) |_, i| { - pointers.appendAssumeCapacity(.{ - .offset = base_offset + i * @sizeOf(u64), - .segment_id = segment_id, - }); - } - } - - std.sort.sort(bind.Pointer, pointers.items, {}, bind.pointerCmp); - - const size = try bind.rebaseInfoSize(pointers.items); - var buffer = try self.base.allocator.alloc(u8, @intCast(usize, size)); - defer self.base.allocator.free(buffer); - - var stream = std.io.fixedBufferStream(buffer); - try bind.writeRebaseInfo(pointers.items, stream.writer()); - - const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly; - const allocated_size = self.allocatedSizeLinkedit(dyld_info.rebase_off); - const needed_size = mem.alignForwardGeneric(u64, buffer.len, @alignOf(u64)); - - if (needed_size > allocated_size) { - dyld_info.rebase_off = 0; - dyld_info.rebase_off = @intCast(u32, self.findFreeSpaceLinkedit(needed_size, 1, null)); - // TODO this might require relocating all following LC_DYLD_INFO_ONLY sections too. - } - - dyld_info.rebase_size = @intCast(u32, needed_size); - log.debug("writing rebase info from 0x{x} to 0x{x}", .{ dyld_info.rebase_off, dyld_info.rebase_off + dyld_info.rebase_size }); - - try self.base.file.?.pwriteAll(buffer, dyld_info.rebase_off); - self.load_commands_dirty = true; - self.rebase_info_dirty = false; -} - -fn writeBindInfoTable(self: *MachO) !void { - if (!self.binding_info_dirty) return; - - const tracy = trace(@src()); - defer tracy.end(); - - var pointers = std.ArrayList(bind.Pointer).init(self.base.allocator); - defer pointers.deinit(); - - if (self.got_section_index) |idx| { - const seg = self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; - const sect = seg.sections.items[idx]; - const base_offset = sect.addr - seg.inner.vmaddr; - const segment_id = @intCast(u16, self.data_const_segment_cmd_index.?); - - for (self.got_entries.items) |entry, i| { - if (entry.where == .local) continue; - - const sym = self.undefs.items[entry.where_index]; - try pointers.append(.{ - .offset = base_offset + i * @sizeOf(u64), - .segment_id = segment_id, - .dylib_ordinal = @divExact(sym.n_desc, macho.N_SYMBOL_RESOLVER), - .name = self.getString(sym.n_strx), - }); - } - } - - { - var it = self.blocks.iterator(); - while (it.next()) |entry| { - const match = entry.key_ptr.*; - var block: *TextBlock = entry.value_ptr.*; - - if (match.seg == self.text_segment_cmd_index.?) continue; // __TEXT is non-writable - - const seg = self.load_commands.items[match.seg].Segment; - - while (true) { - const sym = self.locals.items[block.local_sym_index]; - const base_offset = sym.n_value - seg.inner.vmaddr; - - for (block.bindings.items) |binding| { - const bind_sym = self.undefs.items[binding.local_sym_index]; - try pointers.append(.{ - .offset = binding.offset + base_offset, - .segment_id = match.seg, - .dylib_ordinal = @divExact(bind_sym.n_desc, macho.N_SYMBOL_RESOLVER), - .name = self.getString(bind_sym.n_strx), - }); - } - - if (block.prev) |prev| { - block = prev; - } else break; - } - } - } - - const size = try bind.bindInfoSize(pointers.items); - var buffer = try self.base.allocator.alloc(u8, @intCast(usize, size)); - defer self.base.allocator.free(buffer); - - var stream = std.io.fixedBufferStream(buffer); - try bind.writeBindInfo(pointers.items, stream.writer()); - - const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly; - const allocated_size = self.allocatedSizeLinkedit(dyld_info.bind_off); - const needed_size = mem.alignForwardGeneric(u64, buffer.len, @alignOf(u64)); - - if (needed_size > allocated_size) { - dyld_info.bind_off = 0; - dyld_info.bind_off = @intCast(u32, self.findFreeSpaceLinkedit(needed_size, 1, null)); - // TODO this might require relocating all following LC_DYLD_INFO_ONLY sections too. - } - - dyld_info.bind_size = @intCast(u32, needed_size); - log.debug("writing binding info from 0x{x} to 0x{x}", .{ dyld_info.bind_off, dyld_info.bind_off + dyld_info.bind_size }); - - try self.base.file.?.pwriteAll(buffer, dyld_info.bind_off); - self.load_commands_dirty = true; - self.binding_info_dirty = false; -} - -fn writeLazyBindInfoTable(self: *MachO) !void { - if (!self.lazy_binding_info_dirty) return; - - const tracy = trace(@src()); - defer tracy.end(); - - var pointers = std.ArrayList(bind.Pointer).init(self.base.allocator); - defer pointers.deinit(); - - if (self.la_symbol_ptr_section_index) |idx| { - const seg = self.load_commands.items[self.data_segment_cmd_index.?].Segment; - const sect = seg.sections.items[idx]; - const base_offset = sect.addr - seg.inner.vmaddr; - const segment_id = @intCast(u16, self.data_segment_cmd_index.?); - - try pointers.ensureUnusedCapacity(self.stubs.items.len); - - for (self.stubs.items) |import_id, i| { - const sym = self.undefs.items[import_id]; - pointers.appendAssumeCapacity(.{ - .offset = base_offset + i * @sizeOf(u64), - .segment_id = segment_id, - .dylib_ordinal = @divExact(sym.n_desc, macho.N_SYMBOL_RESOLVER), - .name = self.getString(sym.n_strx), - }); - } - } - - const size = try bind.lazyBindInfoSize(pointers.items); - var buffer = try self.base.allocator.alloc(u8, @intCast(usize, size)); - defer self.base.allocator.free(buffer); - - var stream = std.io.fixedBufferStream(buffer); - try bind.writeLazyBindInfo(pointers.items, stream.writer()); - - const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly; - const allocated_size = self.allocatedSizeLinkedit(dyld_info.lazy_bind_off); - const needed_size = mem.alignForwardGeneric(u64, buffer.len, @alignOf(u64)); - - if (needed_size > allocated_size) { - dyld_info.lazy_bind_off = 0; - dyld_info.lazy_bind_off = @intCast(u32, self.findFreeSpaceLinkedit(needed_size, 1, null)); - // TODO this might require relocating all following LC_DYLD_INFO_ONLY sections too. - } - - dyld_info.lazy_bind_size = @intCast(u32, needed_size); - log.debug("writing lazy binding info from 0x{x} to 0x{x}", .{ dyld_info.lazy_bind_off, dyld_info.lazy_bind_off + dyld_info.lazy_bind_size }); - - try self.base.file.?.pwriteAll(buffer, dyld_info.lazy_bind_off); - try self.populateLazyBindOffsetsInStubHelper(buffer); - self.load_commands_dirty = true; - self.lazy_binding_info_dirty = false; -} - -fn populateLazyBindOffsetsInStubHelper(self: *MachO, buffer: []const u8) !void { - if (self.stubs.items.len == 0) return; - - var stream = std.io.fixedBufferStream(buffer); - var reader = stream.reader(); - var offsets = std.ArrayList(u32).init(self.base.allocator); - try offsets.append(0); - defer offsets.deinit(); - var valid_block = false; - - while (true) { - const inst = reader.readByte() catch |err| switch (err) { - error.EndOfStream => break, - else => return err, - }; - const opcode: u8 = inst & macho.BIND_OPCODE_MASK; - - switch (opcode) { - macho.BIND_OPCODE_DO_BIND => { - valid_block = true; - }, - macho.BIND_OPCODE_DONE => { - if (valid_block) { - const offset = try stream.getPos(); - try offsets.append(@intCast(u32, offset)); - } - valid_block = false; - }, - macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM => { - var next = try reader.readByte(); - while (next != @as(u8, 0)) { - next = try reader.readByte(); - } - }, - macho.BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB => { - _ = try std.leb.readULEB128(u64, reader); - }, - macho.BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB => { - _ = try std.leb.readULEB128(u64, reader); - }, - macho.BIND_OPCODE_SET_ADDEND_SLEB => { - _ = try std.leb.readILEB128(i64, reader); - }, - else => {}, - } - } - assert(self.stubs.items.len <= offsets.items.len); - - const stub_size: u4 = switch (self.base.options.target.cpu.arch) { - .x86_64 => 10, - .aarch64 => 3 * @sizeOf(u32), - else => unreachable, - }; - const off: u4 = switch (self.base.options.target.cpu.arch) { - .x86_64 => 1, - .aarch64 => 2 * @sizeOf(u32), - else => unreachable, - }; - var buf: [@sizeOf(u32)]u8 = undefined; - for (self.stubs.items) |_, index| { - const placeholder_off = self.stub_helper_stubs_start_off.? + index * stub_size + off; - mem.writeIntLittle(u32, &buf, offsets.items[index]); - try self.base.file.?.pwriteAll(&buf, placeholder_off); - } -} - -fn writeStringTable(self: *MachO) !void { - if (!self.strtab_dirty) return; - - const tracy = trace(@src()); - defer tracy.end(); - - const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab; - const allocated_size = self.allocatedSizeLinkedit(symtab.stroff); - const needed_size = mem.alignForwardGeneric(u64, self.strtab.items.len, @alignOf(u64)); - - if (needed_size > allocated_size or self.strtab_needs_relocation) { - symtab.strsize = 0; - symtab.stroff = @intCast(u32, self.findFreeSpaceLinkedit(needed_size, 1, symtab.symoff)); - self.strtab_needs_relocation = false; - } - symtab.strsize = @intCast(u32, needed_size); - log.debug("writing string table from 0x{x} to 0x{x}", .{ symtab.stroff, symtab.stroff + symtab.strsize }); - - try self.base.file.?.pwriteAll(self.strtab.items, symtab.stroff); - self.load_commands_dirty = true; - self.strtab_dirty = false; -} - -fn writeStringTableZld(self: *MachO) !void { - const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; - const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab; - symtab.stroff = @intCast(u32, seg.inner.fileoff + seg.inner.filesize); - symtab.strsize = @intCast(u32, mem.alignForwardGeneric(u64, self.strtab.items.len, @alignOf(u64))); - seg.inner.filesize += symtab.strsize; - - log.debug("writing string table from 0x{x} to 0x{x}", .{ symtab.stroff, symtab.stroff + symtab.strsize }); - - try self.base.file.?.pwriteAll(self.strtab.items, symtab.stroff); - - if (symtab.strsize > self.strtab.items.len) { - // This is potentially the last section, so we need to pad it out. - try self.base.file.?.pwriteAll(&[_]u8{0}, seg.inner.fileoff + seg.inner.filesize - 1); - } -} - -fn updateLinkeditSegmentSizes(self: *MachO) !void { - if (!self.load_commands_dirty) return; - - const tracy = trace(@src()); - defer tracy.end(); - - // Now, we are in position to update __LINKEDIT segment sizes. - // TODO Add checkpointing so that we don't have to do this every single time. - const linkedit_segment = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; - var final_offset = linkedit_segment.inner.fileoff; - - if (self.dyld_info_cmd_index) |idx| { - const dyld_info = self.load_commands.items[idx].DyldInfoOnly; - final_offset = std.math.max(final_offset, dyld_info.rebase_off + dyld_info.rebase_size); - final_offset = std.math.max(final_offset, dyld_info.bind_off + dyld_info.bind_size); - final_offset = std.math.max(final_offset, dyld_info.weak_bind_off + dyld_info.weak_bind_size); - final_offset = std.math.max(final_offset, dyld_info.lazy_bind_off + dyld_info.lazy_bind_size); - final_offset = std.math.max(final_offset, dyld_info.export_off + dyld_info.export_size); - } - if (self.function_starts_cmd_index) |idx| { - const fstart = self.load_commands.items[idx].LinkeditData; - final_offset = std.math.max(final_offset, fstart.dataoff + fstart.datasize); - } - if (self.data_in_code_cmd_index) |idx| { - const dic = self.load_commands.items[idx].LinkeditData; - final_offset = std.math.max(final_offset, dic.dataoff + dic.datasize); - } - if (self.dysymtab_cmd_index) |idx| { - const dysymtab = self.load_commands.items[idx].Dysymtab; - const nindirectsize = dysymtab.nindirectsyms * @sizeOf(u32); - final_offset = std.math.max(final_offset, dysymtab.indirectsymoff + nindirectsize); - // TODO Handle more dynamic symbol table sections. - } - if (self.symtab_cmd_index) |idx| { - const symtab = self.load_commands.items[idx].Symtab; - const symsize = symtab.nsyms * @sizeOf(macho.nlist_64); - final_offset = std.math.max(final_offset, symtab.symoff + symsize); - final_offset = std.math.max(final_offset, symtab.stroff + symtab.strsize); - } - - const filesize = final_offset - linkedit_segment.inner.fileoff; - linkedit_segment.inner.filesize = filesize; - linkedit_segment.inner.vmsize = mem.alignForwardGeneric(u64, filesize, self.page_size); - try self.base.file.?.pwriteAll(&[_]u8{0}, final_offset); - self.load_commands_dirty = true; -} - /// Writes all load commands and section headers. fn writeLoadCommands(self: *MachO) !void { if (!self.load_commands_dirty) return; @@ -5844,6 +4916,13 @@ pub fn padToIdeal(actual_size: anytype) @TypeOf(actual_size) { std.math.maxInt(@TypeOf(actual_size)); } +pub fn makeStaticString(bytes: []const u8) [16]u8 { + var buf = [_]u8{0} ** 16; + assert(bytes.len <= buf.len); + mem.copy(u8, &buf, bytes); + return buf; +} + pub fn makeString(self: *MachO, string: []const u8) !u32 { const gop = try self.strtab_dir.getOrPutContextAdapted(self.base.allocator, @as([]const u8, string), StringIndexAdapter{ .bytes = &self.strtab, diff --git a/src/link/MachO/TextBlock.zig b/src/link/MachO/Atom.zig similarity index 83% rename from src/link/MachO/TextBlock.zig rename to src/link/MachO/Atom.zig index 7e08ae3a35..298855934e 100644 --- a/src/link/MachO/TextBlock.zig +++ b/src/link/MachO/Atom.zig @@ -1,6 +1,7 @@ -const TextBlock = @This(); +const Atom = @This(); const std = @import("std"); +const build_options = @import("build_options"); const aarch64 = @import("../../codegen/aarch64.zig"); const assert = std.debug.assert; const commands = @import("commands.zig"); @@ -9,6 +10,7 @@ const macho = std.macho; const math = std.math; const mem = std.mem; const meta = std.meta; +const trace = @import("../../tracy.zig").trace; const Allocator = mem.Allocator; const Arch = std.Target.Cpu.Arch; @@ -24,54 +26,63 @@ const StringIndexAdapter = std.hash_map.StringIndexAdapter; /// offset table entry. local_sym_index: u32, -/// List of symbol aliases pointing to the same block via different nlists +/// List of symbol aliases pointing to the same atom via different nlists aliases: std.ArrayListUnmanaged(u32) = .{}, -/// List of symbols contained within this block +/// List of symbols contained within this atom contained: std.ArrayListUnmanaged(SymbolAtOffset) = .{}, -/// Code (may be non-relocated) this block represents +/// Code (may be non-relocated) this atom represents code: std.ArrayListUnmanaged(u8) = .{}, -/// Size and alignment of this text block +/// Size and alignment of this atom /// Unlike in Elf, we need to store the size of this symbol as part of -/// the TextBlock since macho.nlist_64 lacks this information. +/// the atom since macho.nlist_64 lacks this information. size: u64, + +/// Alignment of this atom as a power of 2. +/// For instance, aligmment of 0 should be read as 2^0 = 1 byte aligned. alignment: u32, +/// List of relocations belonging to this atom. relocs: std.ArrayListUnmanaged(Relocation) = .{}, -/// List of offsets contained within this block that need rebasing by the dynamic -/// loader in presence of ASLR +/// List of offsets contained within this atom that need rebasing by the dynamic +/// loader in presence of ASLR. rebases: std.ArrayListUnmanaged(u64) = .{}, -/// List of offsets contained within this block that will be dynamically bound +/// List of offsets contained within this atom that will be dynamically bound /// by the dynamic loader and contain pointers to resolved (at load time) extern /// symbols (aka proxies aka imports) bindings: std.ArrayListUnmanaged(SymbolAtOffset) = .{}, +/// List of lazy bindings +lazy_bindings: std.ArrayListUnmanaged(SymbolAtOffset) = .{}, + /// List of data-in-code entries. This is currently specific to x86_64 only. dices: std.ArrayListUnmanaged(macho.data_in_code_entry) = .{}, -/// Stab entry for this block. This is currently specific to a binary created +/// Stab entry for this atom. This is currently specific to a binary created /// by linking object files in a traditional sense - in incremental sense, we /// bypass stabs altogether to produce dSYM bundle directly with fully relocated /// DWARF sections. stab: ?Stab = null, /// Points to the previous and next neighbours -next: ?*TextBlock, -prev: ?*TextBlock, +next: ?*Atom, +prev: ?*Atom, /// Previous/next linked list pointers. /// This is the linked list node for this Decl's corresponding .debug_info tag. -dbg_info_prev: ?*TextBlock, -dbg_info_next: ?*TextBlock, +dbg_info_prev: ?*Atom, +dbg_info_next: ?*Atom, /// Offset into .debug_info pointing to the tag for this Decl. dbg_info_off: u32, /// Size of the .debug_info tag for this Decl, not including padding. dbg_info_len: u32, +dirty: bool = true, + pub const SymbolAtOffset = struct { local_sym_index: u32, offset: u64, @@ -160,7 +171,7 @@ pub const Stab = union(enum) { }; pub const Relocation = struct { - /// Offset within the `block`s code buffer. + /// Offset within the atom's code buffer. /// Note relocation size can be inferred by relocation's kind. offset: u32, @@ -182,7 +193,7 @@ pub const Relocation = struct { }, const ResolveArgs = struct { - block: *TextBlock, + block: *Atom, offset: u32, source_addr: u64, target_addr: u64, @@ -238,10 +249,21 @@ pub const Relocation = struct { pub fn resolve(self: Branch, args: ResolveArgs) !void { switch (self.arch) { .aarch64 => { - const displacement = try math.cast( + const displacement = math.cast( i28, @intCast(i64, args.target_addr) - @intCast(i64, args.source_addr), - ); + ) catch |err| switch (err) { + error.Overflow => { + log.err("jump too big to encode as i28 displacement value", .{}); + log.err(" (target - source) = displacement => 0x{x} - 0x{x} = 0x{x}", .{ + args.target_addr, + args.source_addr, + @intCast(i64, args.target_addr) - @intCast(i64, args.source_addr), + }); + log.err(" TODO implement branch islands to extend jump distance for arm64", .{}); + return error.TODOImplementBranchIslands; + }, + }; const code = args.block.code.items[args.offset..][0..4]; var inst = aarch64.Instruction{ .unconditional_branch_immediate = mem.bytesToValue(meta.TagPayload( @@ -474,13 +496,13 @@ pub const Relocation = struct { pub const Signed = struct { addend: i64, - correction: i4, + correction: u3, pub fn resolve(self: Signed, args: ResolveArgs) !void { const target_addr = @intCast(i64, args.target_addr) + self.addend; const displacement = try math.cast( i32, - target_addr - @intCast(i64, args.source_addr) - self.correction - 4, + target_addr - @intCast(i64, args.source_addr + self.correction + 4), ); mem.writeIntLittle(u32, args.block.code.items[args.offset..][0..4], @bitCast(u32, displacement)); } @@ -556,7 +578,7 @@ pub const Relocation = struct { } }; -pub const empty = TextBlock{ +pub const empty = Atom{ .local_sym_index = 0, .size = 0, .alignment = 0, @@ -568,8 +590,9 @@ pub const empty = TextBlock{ .dbg_info_len = undefined, }; -pub fn deinit(self: *TextBlock, allocator: *Allocator) void { +pub fn deinit(self: *Atom, allocator: *Allocator) void { self.dices.deinit(allocator); + self.lazy_bindings.deinit(allocator); self.bindings.deinit(allocator); self.rebases.deinit(allocator); self.relocs.deinit(allocator); @@ -578,23 +601,34 @@ pub fn deinit(self: *TextBlock, allocator: *Allocator) void { self.code.deinit(allocator); } +pub fn clearRetainingCapacity(self: *Atom) void { + self.dices.clearRetainingCapacity(); + self.lazy_bindings.clearRetainingCapacity(); + self.bindings.clearRetainingCapacity(); + self.rebases.clearRetainingCapacity(); + self.relocs.clearRetainingCapacity(); + self.contained.clearRetainingCapacity(); + self.aliases.clearRetainingCapacity(); + self.code.clearRetainingCapacity(); +} + /// Returns how much room there is to grow in virtual address space. /// File offset relocation happens transparently, so it is not included in /// this calculation. -pub fn capacity(self: TextBlock, macho_file: MachO) u64 { +pub fn capacity(self: Atom, macho_file: MachO) u64 { const self_sym = macho_file.locals.items[self.local_sym_index]; if (self.next) |next| { const next_sym = macho_file.locals.items[next.local_sym_index]; return next_sym.n_value - self_sym.n_value; } else { - // We are the last block. + // We are the last atom. // The capacity is limited only by virtual address space. return std.math.maxInt(u64) - self_sym.n_value; } } -pub fn freeListEligible(self: TextBlock, macho_file: MachO) bool { - // No need to keep a free list node for the last block. +pub fn freeListEligible(self: Atom, macho_file: MachO) bool { + // No need to keep a free list node for the last atom. const next = self.next orelse return false; const self_sym = macho_file.locals.items[self.local_sym_index]; const next_sym = macho_file.locals.items[next.local_sym_index]; @@ -607,14 +641,16 @@ pub fn freeListEligible(self: TextBlock, macho_file: MachO) bool { const RelocContext = struct { base_addr: u64 = 0, + base_offset: u64 = 0, allocator: *Allocator, object: *Object, macho_file: *MachO, + parsed_atoms: *Object.ParsedAtoms, }; fn initRelocFromObject(rel: macho.relocation_info, context: RelocContext) !Relocation { var parsed_rel = Relocation{ - .offset = @intCast(u32, @intCast(u64, rel.r_address) - context.base_addr), + .offset = @intCast(u32, @intCast(u64, rel.r_address) - context.base_offset), .where = undefined, .where_index = undefined, .payload = undefined, @@ -640,7 +676,7 @@ fn initRelocFromObject(rel: macho.relocation_info, context: RelocContext) !Reloc .n_type = macho.N_SECT, .n_sect = @intCast(u8, context.macho_file.section_ordinals.getIndex(match).? + 1), .n_desc = 0, - .n_value = sect.addr, + .n_value = 0, }); try context.object.sections_as_symbols.putNoClobber(context.allocator, sect_id, local_sym_index); break :blk local_sym_index; @@ -677,8 +713,11 @@ fn initRelocFromObject(rel: macho.relocation_info, context: RelocContext) !Reloc return parsed_rel; } -pub fn parseRelocs(self: *TextBlock, relocs: []macho.relocation_info, context: RelocContext) !void { - const filtered_relocs = filterRelocs(relocs, context.base_addr, context.base_addr + self.size); +pub fn parseRelocs(self: *Atom, relocs: []macho.relocation_info, context: RelocContext) !void { + const tracy = trace(@src()); + defer tracy.end(); + + const filtered_relocs = filterRelocs(relocs, context.base_offset, context.base_offset + self.size); var it = RelocIterator{ .buffer = filtered_relocs, }; @@ -831,9 +870,20 @@ pub fn parseRelocs(self: *TextBlock, relocs: []macho.relocation_info, context: R }; if (context.macho_file.got_entries_map.contains(key)) break :blk; - const got_index = @intCast(u32, context.macho_file.got_entries.items.len); - try context.macho_file.got_entries.append(context.allocator, key); - try context.macho_file.got_entries_map.putNoClobber(context.allocator, key, got_index); + const atom = try context.macho_file.createGotAtom(key); + try context.macho_file.got_entries_map.putNoClobber(context.macho_file.base.allocator, key, atom); + const match = MachO.MatchingSection{ + .seg = context.macho_file.data_const_segment_cmd_index.?, + .sect = context.macho_file.got_section_index.?, + }; + + if (context.parsed_atoms.getPtr(match)) |last| { + last.*.next = atom; + atom.prev = last.*; + last.* = atom; + } else { + try context.parsed_atoms.putNoClobber(match, atom); + } } else if (parsed_rel.payload == .unsigned) { switch (parsed_rel.where) { .undef => { @@ -889,9 +939,53 @@ pub fn parseRelocs(self: *TextBlock, relocs: []macho.relocation_info, context: R if (parsed_rel.where != .undef) break :blk; if (context.macho_file.stubs_map.contains(parsed_rel.where_index)) break :blk; - const stubs_index = @intCast(u32, context.macho_file.stubs.items.len); - try context.macho_file.stubs.append(context.allocator, parsed_rel.where_index); - try context.macho_file.stubs_map.putNoClobber(context.allocator, parsed_rel.where_index, stubs_index); + const stub_helper_atom = try context.macho_file.createStubHelperAtom(); + const laptr_atom = try context.macho_file.createLazyPointerAtom( + stub_helper_atom.local_sym_index, + parsed_rel.where_index, + ); + const stub_atom = try context.macho_file.createStubAtom(laptr_atom.local_sym_index); + try context.macho_file.stubs_map.putNoClobber(context.allocator, parsed_rel.where_index, stub_atom); + // TODO clean this up! + if (context.parsed_atoms.getPtr(.{ + .seg = context.macho_file.text_segment_cmd_index.?, + .sect = context.macho_file.stub_helper_section_index.?, + })) |last| { + last.*.next = stub_helper_atom; + stub_helper_atom.prev = last.*; + last.* = stub_helper_atom; + } else { + try context.parsed_atoms.putNoClobber(.{ + .seg = context.macho_file.text_segment_cmd_index.?, + .sect = context.macho_file.stub_helper_section_index.?, + }, stub_helper_atom); + } + if (context.parsed_atoms.getPtr(.{ + .seg = context.macho_file.text_segment_cmd_index.?, + .sect = context.macho_file.stubs_section_index.?, + })) |last| { + last.*.next = stub_atom; + stub_atom.prev = last.*; + last.* = stub_atom; + } else { + try context.parsed_atoms.putNoClobber(.{ + .seg = context.macho_file.text_segment_cmd_index.?, + .sect = context.macho_file.stubs_section_index.?, + }, stub_atom); + } + if (context.parsed_atoms.getPtr(.{ + .seg = context.macho_file.data_segment_cmd_index.?, + .sect = context.macho_file.la_symbol_ptr_section_index.?, + })) |last| { + last.*.next = laptr_atom; + laptr_atom.prev = last.*; + last.* = laptr_atom; + } else { + try context.parsed_atoms.putNoClobber(.{ + .seg = context.macho_file.data_segment_cmd_index.?, + .sect = context.macho_file.la_symbol_ptr_section_index.?, + }, laptr_atom); + } } } } @@ -910,7 +1004,7 @@ fn isSubtractor(rel: macho.relocation_info, arch: Arch) bool { } fn parseUnsigned( - self: TextBlock, + self: Atom, rel: macho.relocation_info, out: *Relocation, subtractor: ?u32, @@ -930,9 +1024,9 @@ fn parseUnsigned( mem.readIntLittle(i32, self.code.items[out.offset..][0..4]); if (rel.r_extern == 0) { - assert(out.where == .local); - const target_sym = context.macho_file.locals.items[out.where_index]; - addend -= @intCast(i64, target_sym.n_value); + const seg = context.object.load_commands.items[context.object.segment_cmd_index.?].Segment; + const target_sect_base_addr = seg.sections.items[rel.r_symbolnum - 1].addr; + addend -= @intCast(i64, target_sect_base_addr); } out.payload = .{ @@ -944,7 +1038,7 @@ fn parseUnsigned( }; } -fn parseBranch(self: TextBlock, rel: macho.relocation_info, out: *Relocation, context: RelocContext) void { +fn parseBranch(self: Atom, rel: macho.relocation_info, out: *Relocation, context: RelocContext) void { _ = self; assert(rel.r_pcrel == 1); assert(rel.r_length == 2); @@ -956,7 +1050,7 @@ fn parseBranch(self: TextBlock, rel: macho.relocation_info, out: *Relocation, co }; } -fn parsePage(self: TextBlock, rel: macho.relocation_info, out: *Relocation, addend: u32) void { +fn parsePage(self: Atom, rel: macho.relocation_info, out: *Relocation, addend: u32) void { _ = self; assert(rel.r_pcrel == 1); assert(rel.r_length == 2); @@ -974,7 +1068,7 @@ fn parsePage(self: TextBlock, rel: macho.relocation_info, out: *Relocation, adde }; } -fn parsePageOff(self: TextBlock, rel: macho.relocation_info, out: *Relocation, addend: u32) void { +fn parsePageOff(self: Atom, rel: macho.relocation_info, out: *Relocation, addend: u32) void { assert(rel.r_pcrel == 0); assert(rel.r_length == 2); @@ -1002,7 +1096,7 @@ fn parsePageOff(self: TextBlock, rel: macho.relocation_info, out: *Relocation, a }; } -fn parsePointerToGot(self: TextBlock, rel: macho.relocation_info, out: *Relocation) void { +fn parsePointerToGot(self: Atom, rel: macho.relocation_info, out: *Relocation) void { _ = self; assert(rel.r_pcrel == 1); assert(rel.r_length == 2); @@ -1012,12 +1106,12 @@ fn parsePointerToGot(self: TextBlock, rel: macho.relocation_info, out: *Relocati }; } -fn parseSigned(self: TextBlock, rel: macho.relocation_info, out: *Relocation, context: RelocContext) void { +fn parseSigned(self: Atom, rel: macho.relocation_info, out: *Relocation, context: RelocContext) void { assert(rel.r_pcrel == 1); assert(rel.r_length == 2); const rel_type = @intToEnum(macho.reloc_type_x86_64, rel.r_type); - const correction: i4 = switch (rel_type) { + const correction: u3 = switch (rel_type) { .X86_64_RELOC_SIGNED => 0, .X86_64_RELOC_SIGNED_1 => 1, .X86_64_RELOC_SIGNED_2 => 2, @@ -1027,12 +1121,9 @@ fn parseSigned(self: TextBlock, rel: macho.relocation_info, out: *Relocation, co var addend: i64 = mem.readIntLittle(i32, self.code.items[out.offset..][0..4]) + correction; if (rel.r_extern == 0) { - const source_sym = context.macho_file.locals.items[self.local_sym_index]; - const target_sym = switch (out.where) { - .local => context.macho_file.locals.items[out.where_index], - .undef => context.macho_file.undefs.items[out.where_index], - }; - addend = @intCast(i64, source_sym.n_value + out.offset + 4) + addend - @intCast(i64, target_sym.n_value); + const seg = context.object.load_commands.items[context.object.segment_cmd_index.?].Segment; + const target_sect_base_addr = seg.sections.items[rel.r_symbolnum - 1].addr; + addend += @intCast(i64, context.base_addr + out.offset + correction + 4) - @intCast(i64, target_sect_base_addr); } out.payload = .{ @@ -1043,7 +1134,7 @@ fn parseSigned(self: TextBlock, rel: macho.relocation_info, out: *Relocation, co }; } -fn parseLoad(self: TextBlock, rel: macho.relocation_info, out: *Relocation) void { +fn parseLoad(self: Atom, rel: macho.relocation_info, out: *Relocation) void { assert(rel.r_pcrel == 1); assert(rel.r_length == 2); @@ -1065,7 +1156,10 @@ fn parseLoad(self: TextBlock, rel: macho.relocation_info, out: *Relocation) void }; } -pub fn resolveRelocs(self: *TextBlock, macho_file: *MachO) !void { +pub fn resolveRelocs(self: *Atom, macho_file: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + for (self.relocs.items) |rel| { log.debug("relocating {}", .{rel}); @@ -1083,9 +1177,7 @@ pub fn resolveRelocs(self: *TextBlock, macho_file: *MachO) !void { }; if (is_via_got) { - const dc_seg = macho_file.load_commands.items[macho_file.data_const_segment_cmd_index.?].Segment; - const got = dc_seg.sections.items[macho_file.got_section_index.?]; - const got_index = macho_file.got_entries_map.get(.{ + const atom = macho_file.got_entries_map.get(.{ .where = switch (rel.where) { .local => .local, .undef => .undef, @@ -1100,7 +1192,7 @@ pub fn resolveRelocs(self: *TextBlock, macho_file: *MachO) !void { log.err(" this is an internal linker error", .{}); return error.FailedToResolveRelocationTarget; }; - break :blk got.addr + got_index * @sizeOf(u64); + break :blk macho_file.locals.items[atom.local_sym_index].n_value; } switch (rel.where) { @@ -1138,13 +1230,24 @@ pub fn resolveRelocs(self: *TextBlock, macho_file: *MachO) !void { break :blk sym.n_value; }, .undef => { - const stubs_index = macho_file.stubs_map.get(rel.where_index) orelse { + const atom = macho_file.stubs_map.get(rel.where_index) orelse { + // TODO this is required for incremental when we don't have every symbol + // resolved when creating relocations. In this case, we will insert a branch + // reloc to an undef symbol which may happen to be defined within the binary. + // Then, the undef we point at will be a null symbol (free symbol) which we + // should remove/repurpose. To circumvent this (for now), we check if the symbol + // we point to is garbage, and if so we fall back to symbol resolver to find by name. + const n_strx = macho_file.undefs.items[rel.where_index].n_strx; + if (macho_file.symbol_resolver.get(n_strx)) |resolv| inner: { + if (resolv.where != .global) break :inner; + break :blk macho_file.globals.items[resolv.where_index].n_value; + } + // TODO verify in TextBlock that the symbol is indeed dynamically bound. break :blk 0; // Dynamically bound by dyld. }; - const segment = macho_file.load_commands.items[macho_file.text_segment_cmd_index.?].Segment; - const stubs = segment.sections.items[macho_file.stubs_section_index.?]; - break :blk stubs.addr + stubs_index * stubs.reserved2; + + break :blk macho_file.locals.items[atom.local_sym_index].n_value; }, } }; @@ -1162,7 +1265,7 @@ pub fn resolveRelocs(self: *TextBlock, macho_file: *MachO) !void { } } -pub fn format(self: TextBlock, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { +pub fn format(self: Atom, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { _ = fmt; _ = options; try std.fmt.format(writer, "TextBlock {{ ", .{}); diff --git a/src/link/MachO/DebugSymbols.zig b/src/link/MachO/DebugSymbols.zig index ee99adafaa..a8c0138f60 100644 --- a/src/link/MachO/DebugSymbols.zig +++ b/src/link/MachO/DebugSymbols.zig @@ -5,25 +5,25 @@ const assert = std.debug.assert; const fs = std.fs; const log = std.log.scoped(.dsym); const macho = std.macho; +const math = std.math; const mem = std.mem; const DW = std.dwarf; const leb = std.leb; const Allocator = mem.Allocator; const build_options = @import("build_options"); +const commands = @import("commands.zig"); const trace = @import("../../tracy.zig").trace; +const LoadCommand = commands.LoadCommand; const Module = @import("../../Module.zig"); const Type = @import("../../type.zig").Type; const link = @import("../../link.zig"); const MachO = @import("../MachO.zig"); -const SrcFn = MachO.SrcFn; const TextBlock = MachO.TextBlock; -const padToIdeal = MachO.padToIdeal; - -const commands = @import("commands.zig"); -const emptyHeader = commands.emptyHeader; -const LoadCommand = commands.LoadCommand; const SegmentCommand = commands.SegmentCommand; +const SrcFn = MachO.SrcFn; +const makeStaticString = MachO.makeStaticString; +const padToIdeal = MachO.padToIdeal; const page_size: u16 = 0x1000; @@ -188,107 +188,86 @@ pub fn populateMissingMetadata(self: *DebugSymbols, allocator: *Allocator) !void log.debug("found __DWARF segment free space 0x{x} to 0x{x}", .{ off, off + needed_size }); try self.load_commands.append(allocator, .{ - .Segment = SegmentCommand.empty("__DWARF", .{ - .vmaddr = vmaddr, - .vmsize = needed_size, - .fileoff = off, - .filesize = needed_size, - }), + .Segment = .{ + .inner = .{ + .segname = makeStaticString("__DWARF"), + .vmaddr = vmaddr, + .vmsize = needed_size, + .fileoff = off, + .filesize = needed_size, + }, + }, }); self.load_commands_dirty = true; } if (self.debug_str_section_index == null) { - const dwarf_segment = &self.load_commands.items[self.dwarf_segment_cmd_index.?].Segment; - self.debug_str_section_index = @intCast(u16, dwarf_segment.sections.items.len); assert(self.debug_string_table.items.len == 0); - - try dwarf_segment.addSection(allocator, "__debug_str", .{ - .addr = dwarf_segment.inner.vmaddr, - .size = @intCast(u32, self.debug_string_table.items.len), - .offset = @intCast(u32, dwarf_segment.inner.fileoff), - .@"align" = 1, - }); - self.load_commands_dirty = true; + self.debug_str_section_index = try self.allocateSection( + "__debug_str", + @intCast(u32, self.debug_string_table.items.len), + 0, + ); self.debug_string_table_dirty = true; } if (self.debug_info_section_index == null) { - const dwarf_segment = &self.load_commands.items[self.dwarf_segment_cmd_index.?].Segment; - self.debug_info_section_index = @intCast(u16, dwarf_segment.sections.items.len); - - const file_size_hint = 200; - const p_align = 1; - const off = dwarf_segment.findFreeSpace(file_size_hint, p_align, null); - - log.debug("found __debug_info free space 0x{x} to 0x{x}", .{ off, off + file_size_hint }); - - try dwarf_segment.addSection(allocator, "__debug_info", .{ - .addr = dwarf_segment.inner.vmaddr + off - dwarf_segment.inner.fileoff, - .size = file_size_hint, - .offset = @intCast(u32, off), - .@"align" = p_align, - }); - self.load_commands_dirty = true; + self.debug_info_section_index = try self.allocateSection("__debug_info", 200, 0); self.debug_info_header_dirty = true; } if (self.debug_abbrev_section_index == null) { - const dwarf_segment = &self.load_commands.items[self.dwarf_segment_cmd_index.?].Segment; - self.debug_abbrev_section_index = @intCast(u16, dwarf_segment.sections.items.len); - - const file_size_hint = 128; - const p_align = 1; - const off = dwarf_segment.findFreeSpace(file_size_hint, p_align, null); - - log.debug("found __debug_abbrev free space 0x{x} to 0x{x}", .{ off, off + file_size_hint }); - - try dwarf_segment.addSection(allocator, "__debug_abbrev", .{ - .addr = dwarf_segment.inner.vmaddr + off - dwarf_segment.inner.fileoff, - .size = file_size_hint, - .offset = @intCast(u32, off), - .@"align" = p_align, - }); - self.load_commands_dirty = true; + self.debug_abbrev_section_index = try self.allocateSection("__debug_abbrev", 128, 0); self.debug_abbrev_section_dirty = true; } if (self.debug_aranges_section_index == null) { - const dwarf_segment = &self.load_commands.items[self.dwarf_segment_cmd_index.?].Segment; - self.debug_aranges_section_index = @intCast(u16, dwarf_segment.sections.items.len); - - const file_size_hint = 160; - const p_align = 16; - const off = dwarf_segment.findFreeSpace(file_size_hint, p_align, null); - - log.debug("found __debug_aranges free space 0x{x} to 0x{x}", .{ off, off + file_size_hint }); - - try dwarf_segment.addSection(allocator, "__debug_aranges", .{ - .addr = dwarf_segment.inner.vmaddr + off - dwarf_segment.inner.fileoff, - .size = file_size_hint, - .offset = @intCast(u32, off), - .@"align" = p_align, - }); - self.load_commands_dirty = true; + self.debug_aranges_section_index = try self.allocateSection("__debug_aranges", 160, 4); self.debug_aranges_section_dirty = true; } if (self.debug_line_section_index == null) { - const dwarf_segment = &self.load_commands.items[self.dwarf_segment_cmd_index.?].Segment; - self.debug_line_section_index = @intCast(u16, dwarf_segment.sections.items.len); - - const file_size_hint = 250; - const p_align = 1; - const off = dwarf_segment.findFreeSpace(file_size_hint, p_align, null); - - log.debug("found __debug_line free space 0x{x} to 0x{x}", .{ off, off + file_size_hint }); - - try dwarf_segment.addSection(allocator, "__debug_line", .{ - .addr = dwarf_segment.inner.vmaddr + off - dwarf_segment.inner.fileoff, - .size = file_size_hint, - .offset = @intCast(u32, off), - .@"align" = p_align, - }); - self.load_commands_dirty = true; + self.debug_line_section_index = try self.allocateSection("__debug_line", 250, 0); self.debug_line_header_dirty = true; } } +fn allocateSection(self: *DebugSymbols, sectname: []const u8, size: u64, alignment: u16) !u16 { + const seg = &self.load_commands.items[self.dwarf_segment_cmd_index.?].Segment; + var sect = macho.section_64{ + .sectname = makeStaticString(sectname), + .segname = seg.inner.segname, + .size = @intCast(u32, size), + .@"align" = alignment, + }; + const alignment_pow_2 = try math.powi(u32, 2, alignment); + const off = seg.findFreeSpace(size, alignment_pow_2, null); + + assert(off + size <= seg.inner.fileoff + seg.inner.filesize); // TODO expand + + log.debug("found {s},{s} section free space 0x{x} to 0x{x}", .{ + commands.segmentName(sect), + commands.sectionName(sect), + off, + off + size, + }); + + sect.addr = seg.inner.vmaddr + off - seg.inner.fileoff; + sect.offset = @intCast(u32, off); + + const index = @intCast(u16, seg.sections.items.len); + try seg.sections.append(self.base.base.allocator, sect); + seg.inner.cmdsize += @sizeOf(macho.section_64); + seg.inner.nsects += 1; + + // TODO + // const match = MatchingSection{ + // .seg = segment_id, + // .sect = index, + // }; + // _ = try self.section_ordinals.getOrPut(self.base.allocator, match); + // try self.block_free_lists.putNoClobber(self.base.allocator, match, .{}); + + self.load_commands_dirty = true; + + return index; +} + pub fn flushModule(self: *DebugSymbols, allocator: *Allocator, options: link.Options) !void { // TODO This linker code currently assumes there is only 1 compilation unit and it corresponds to the // Zig source code. @@ -614,15 +593,18 @@ pub fn deinit(self: *DebugSymbols, allocator: *Allocator) void { } fn copySegmentCommand(self: *DebugSymbols, allocator: *Allocator, base_cmd: SegmentCommand) !SegmentCommand { - var cmd = SegmentCommand.empty("", .{ - .cmdsize = base_cmd.inner.cmdsize, - .vmaddr = base_cmd.inner.vmaddr, - .vmsize = base_cmd.inner.vmsize, - .maxprot = base_cmd.inner.maxprot, - .initprot = base_cmd.inner.initprot, - .nsects = base_cmd.inner.nsects, - .flags = base_cmd.inner.flags, - }); + var cmd = SegmentCommand{ + .inner = .{ + .segname = undefined, + .cmdsize = base_cmd.inner.cmdsize, + .vmaddr = base_cmd.inner.vmaddr, + .vmsize = base_cmd.inner.vmsize, + .maxprot = base_cmd.inner.maxprot, + .initprot = base_cmd.inner.initprot, + .nsects = base_cmd.inner.nsects, + .flags = base_cmd.inner.flags, + }, + }; mem.copy(u8, &cmd.inner.segname, &base_cmd.inner.segname); try cmd.sections.ensureCapacity(allocator, cmd.inner.nsects); @@ -692,7 +674,7 @@ fn writeLoadCommands(self: *DebugSymbols, allocator: *Allocator) !void { } fn writeHeader(self: *DebugSymbols) !void { - var header = emptyHeader(.{ + var header = commands.emptyHeader(.{ .filetype = macho.MH_DSYM, }); diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index 1051cda694..27da019be8 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -1,6 +1,7 @@ const Object = @This(); const std = @import("std"); +const build_options = @import("build_options"); const assert = std.debug.assert; const dwarf = std.dwarf; const fs = std.fs; @@ -13,11 +14,12 @@ const sort = std.sort; const commands = @import("commands.zig"); const segmentName = commands.segmentName; const sectionName = commands.sectionName; +const trace = @import("../../tracy.zig").trace; const Allocator = mem.Allocator; +const Atom = @import("Atom.zig"); const LoadCommand = commands.LoadCommand; const MachO = @import("../MachO.zig"); -const TextBlock = @import("TextBlock.zig"); file: fs.File, name: []const u8, @@ -54,7 +56,7 @@ tu_name: ?[]const u8 = null, tu_comp_dir: ?[]const u8 = null, mtime: ?u64 = null, -text_blocks: std.ArrayListUnmanaged(*TextBlock) = .{}, +atoms: std.ArrayListUnmanaged(*Atom) = .{}, sections_as_symbols: std.AutoHashMapUnmanaged(u16, u32) = .{}, // TODO symbol mapping and its inverse can probably be simple arrays @@ -62,6 +64,8 @@ sections_as_symbols: std.AutoHashMapUnmanaged(u16, u32) = .{}, symbol_mapping: std.AutoHashMapUnmanaged(u32, u32) = .{}, reverse_symbol_mapping: std.AutoHashMapUnmanaged(u32, u32) = .{}, +analyzed: bool = false, + const DebugInfo = struct { inner: dwarf.DwarfInfo, debug_info: []u8, @@ -134,7 +138,7 @@ pub fn deinit(self: *Object, allocator: *Allocator) void { self.data_in_code_entries.deinit(allocator); self.symtab.deinit(allocator); self.strtab.deinit(allocator); - self.text_blocks.deinit(allocator); + self.atoms.deinit(allocator); self.sections_as_symbols.deinit(allocator); self.symbol_mapping.deinit(allocator); self.reverse_symbol_mapping.deinit(allocator); @@ -316,16 +320,17 @@ const Context = struct { object: *Object, macho_file: *MachO, match: MachO.MatchingSection, + parsed_atoms: *ParsedAtoms, }; -const TextBlockParser = struct { +const AtomParser = struct { section: macho.section_64, code: []u8, relocs: []macho.relocation_info, nlists: []NlistWithIndex, index: u32 = 0, - fn peek(self: TextBlockParser) ?NlistWithIndex { + fn peek(self: AtomParser) ?NlistWithIndex { return if (self.index + 1 < self.nlists.len) self.nlists[self.index + 1] else null; } @@ -339,9 +344,12 @@ const TextBlockParser = struct { } } - pub fn next(self: *TextBlockParser, context: Context) !?*TextBlock { + pub fn next(self: *AtomParser, context: Context) !?*Atom { if (self.index == self.nlists.len) return null; + const tracy = trace(@src()); + defer tracy.end(); + var aliases = std.ArrayList(NlistWithIndex).init(context.allocator); defer aliases.deinit(); @@ -364,12 +372,12 @@ const TextBlockParser = struct { } if (aliases.items.len > 1) { - // Bubble-up senior symbol as the main link to the text block. + // Bubble-up senior symbol as the main link to the atom. sort.sort( NlistWithIndex, aliases.items, context, - TextBlockParser.lessThanBySeniority, + AtomParser.lessThanBySeniority, ); } @@ -389,12 +397,12 @@ const TextBlockParser = struct { else max_align; - const stab: ?TextBlock.Stab = if (context.object.debug_info) |di| blk: { + const stab: ?Atom.Stab = if (context.object.debug_info) |di| blk: { // TODO there has to be a better to handle this. for (di.inner.func_list.items) |func| { if (func.pc_range) |range| { if (senior_nlist.nlist.n_value >= range.start and senior_nlist.nlist.n_value < range.end) { - break :blk TextBlock.Stab{ + break :blk Atom.Stab{ .function = range.end - range.start, }; } @@ -405,28 +413,31 @@ const TextBlockParser = struct { break :blk .static; } else null; - const block = try context.allocator.create(TextBlock); - block.* = TextBlock.empty; - block.local_sym_index = senior_nlist.index; - block.stab = stab; - block.size = size; - block.alignment = actual_align; - try context.macho_file.managed_blocks.append(context.allocator, block); + const atom = try context.macho_file.createEmptyAtom(senior_nlist.index, size, actual_align); + atom.stab = stab; - try block.code.appendSlice(context.allocator, code); + const is_zerofill = blk: { + const section_type = commands.sectionType(self.section); + break :blk section_type == macho.S_ZEROFILL or section_type == macho.S_THREAD_LOCAL_ZEROFILL; + }; + if (!is_zerofill) { + mem.copy(u8, atom.code.items, code); + } - try block.aliases.ensureTotalCapacity(context.allocator, aliases.items.len); + try atom.aliases.ensureTotalCapacity(context.allocator, aliases.items.len); for (aliases.items) |alias| { - block.aliases.appendAssumeCapacity(alias.index); + atom.aliases.appendAssumeCapacity(alias.index); const sym = &context.macho_file.locals.items[alias.index]; sym.n_sect = @intCast(u8, context.macho_file.section_ordinals.getIndex(context.match).? + 1); } - try block.parseRelocs(self.relocs, .{ - .base_addr = start_addr, + try atom.parseRelocs(self.relocs, .{ + .base_addr = self.section.addr, + .base_offset = start_addr, .allocator = context.allocator, .object = context.object, .macho_file = context.macho_file, + .parsed_atoms = context.parsed_atoms, }); if (context.macho_file.has_dices) { @@ -435,10 +446,10 @@ const TextBlockParser = struct { senior_nlist.nlist.n_value, senior_nlist.nlist.n_value + size, ); - try block.dices.ensureTotalCapacity(context.allocator, dices.len); + try atom.dices.ensureTotalCapacity(context.allocator, dices.len); for (dices) |dice| { - block.dices.appendAssumeCapacity(.{ + atom.dices.appendAssumeCapacity(.{ .offset = dice.offset - try math.cast(u32, senior_nlist.nlist.n_value), .length = dice.length, .kind = dice.kind, @@ -448,16 +459,22 @@ const TextBlockParser = struct { self.index += 1; - return block; + return atom; } }; -pub fn parseTextBlocks( +pub const ParsedAtoms = std.AutoHashMap(MachO.MatchingSection, *Atom); + +pub fn parseIntoAtoms( self: *Object, allocator: *Allocator, object_id: u16, macho_file: *MachO, -) !void { +) !ParsedAtoms { + const tracy = trace(@src()); + defer tracy.end(); + + var parsed_atoms = ParsedAtoms.init(allocator); const seg = self.load_commands.items[self.segment_cmd_index.?].Segment; log.debug("analysing {s}", .{self.name}); @@ -498,7 +515,7 @@ pub fn parseTextBlocks( for (seg.sections.items) |sect, id| { const sect_id = @intCast(u8, id); - log.debug("putting section '{s},{s}' as a TextBlock", .{ + log.debug("putting section '{s},{s}' as an Atom", .{ segmentName(sect), sectionName(sect), }); @@ -523,14 +540,17 @@ pub fn parseTextBlocks( // Symbols within this section only. const filtered_nlists = NlistWithIndex.filterInSection(sorted_nlists, sect); + // TODO rewrite and re-enable dead-code stripping optimisation. I think it might make sense + // to do this in a standalone pass after we parse the sections as atoms. // In release mode, if the object file was generated with dead code stripping optimisations, // note it now and parse sections as atoms. - const is_splittable = blk: { - if (macho_file.base.options.optimize_mode == .Debug) break :blk false; - break :blk self.header.?.flags & macho.MH_SUBSECTIONS_VIA_SYMBOLS != 0; - }; + // const is_splittable = blk: { + // if (macho_file.base.options.optimize_mode == .Debug) break :blk false; + // break :blk self.header.?.flags & macho.MH_SUBSECTIONS_VIA_SYMBOLS != 0; + // }; + const is_splittable = false; - macho_file.has_dices = blk: { + macho_file.has_dices = macho_file.has_dices or blk: { if (self.text_section_index) |index| { if (index != id) break :blk false; if (self.data_in_code_entries.items.len == 0) break :blk false; @@ -541,12 +561,12 @@ pub fn parseTextBlocks( macho_file.has_stabs = macho_file.has_stabs or self.debug_info != null; next: { - if (is_splittable) blocks: { - if (filtered_nlists.len == 0) break :blocks; + if (is_splittable) atoms: { + if (filtered_nlists.len == 0) break :atoms; // If the first nlist does not match the start of the section, // then we need to encapsulate the memory range [section start, first symbol) - // as a temporary symbol and insert the matching TextBlock. + // as a temporary symbol and insert the matching Atom. const first_nlist = filtered_nlists[0].nlist; if (first_nlist.n_value > sect.addr) { const sym_name = try std.fmt.allocPrint(allocator, "l_{s}_{s}_{s}", .{ @@ -556,44 +576,45 @@ pub fn parseTextBlocks( }); defer allocator.free(sym_name); - const block_local_sym_index = self.sections_as_symbols.get(sect_id) orelse blk: { - const block_local_sym_index = @intCast(u32, macho_file.locals.items.len); + const atom_local_sym_index = self.sections_as_symbols.get(sect_id) orelse blk: { + const atom_local_sym_index = @intCast(u32, macho_file.locals.items.len); try macho_file.locals.append(allocator, .{ .n_strx = try macho_file.makeString(sym_name), .n_type = macho.N_SECT, .n_sect = @intCast(u8, macho_file.section_ordinals.getIndex(match).? + 1), .n_desc = 0, - .n_value = sect.addr, + .n_value = 0, }); - try self.sections_as_symbols.putNoClobber(allocator, sect_id, block_local_sym_index); - break :blk block_local_sym_index; + try self.sections_as_symbols.putNoClobber(allocator, sect_id, atom_local_sym_index); + break :blk atom_local_sym_index; }; + const atom_code = code[0 .. first_nlist.n_value - sect.addr]; + const atom_size = atom_code.len; + const atom = try macho_file.createEmptyAtom(atom_local_sym_index, atom_size, sect.@"align"); - const block_code = code[0 .. first_nlist.n_value - sect.addr]; - const block_size = block_code.len; + const is_zerofill = blk: { + const section_type = commands.sectionType(sect); + break :blk section_type == macho.S_ZEROFILL or section_type == macho.S_THREAD_LOCAL_ZEROFILL; + }; + if (!is_zerofill) { + mem.copy(u8, atom.code.items, atom_code); + } - const block = try allocator.create(TextBlock); - block.* = TextBlock.empty; - block.local_sym_index = block_local_sym_index; - block.size = block_size; - block.alignment = sect.@"align"; - try macho_file.managed_blocks.append(allocator, block); - - try block.code.appendSlice(allocator, block_code); - - try block.parseRelocs(relocs, .{ - .base_addr = 0, + try atom.parseRelocs(relocs, .{ + .base_addr = sect.addr, + .base_offset = 0, .allocator = allocator, .object = self, .macho_file = macho_file, + .parsed_atoms = &parsed_atoms, }); if (macho_file.has_dices) { - const dices = filterDice(self.data_in_code_entries.items, sect.addr, sect.addr + block_size); - try block.dices.ensureTotalCapacity(allocator, dices.len); + const dices = filterDice(self.data_in_code_entries.items, sect.addr, sect.addr + atom_size); + try atom.dices.ensureTotalCapacity(allocator, dices.len); for (dices) |dice| { - block.dices.appendAssumeCapacity(.{ + atom.dices.appendAssumeCapacity(.{ .offset = dice.offset - try math.cast(u32, sect.addr), .length = dice.length, .kind = dice.kind, @@ -601,29 +622,17 @@ pub fn parseTextBlocks( } } - // Update target section's metadata - // TODO should we update segment's size here too? - // How does it tie with incremental space allocs? - const tseg = &macho_file.load_commands.items[match.seg].Segment; - const tsect = &tseg.sections.items[match.sect]; - const new_alignment = math.max(tsect.@"align", block.alignment); - const new_alignment_pow_2 = try math.powi(u32, 2, new_alignment); - const new_size = mem.alignForwardGeneric(u64, tsect.size, new_alignment_pow_2) + block.size; - tsect.size = new_size; - tsect.@"align" = new_alignment; - - if (macho_file.blocks.getPtr(match)) |last| { - last.*.next = block; - block.prev = last.*; - last.* = block; + if (parsed_atoms.getPtr(match)) |last| { + last.*.next = atom; + atom.prev = last.*; + last.* = atom; } else { - try macho_file.blocks.putNoClobber(allocator, match, block); + try parsed_atoms.putNoClobber(match, atom); } - - try self.text_blocks.append(allocator, block); + try self.atoms.append(allocator, atom); } - var parser = TextBlockParser{ + var parser = AtomParser{ .section = sect, .code = code, .relocs = relocs, @@ -635,10 +644,11 @@ pub fn parseTextBlocks( .object = self, .macho_file = macho_file, .match = match, - })) |block| { - const sym = macho_file.locals.items[block.local_sym_index]; + .parsed_atoms = &parsed_atoms, + })) |atom| { + const sym = macho_file.locals.items[atom.local_sym_index]; const is_ext = blk: { - const orig_sym_id = self.reverse_symbol_mapping.get(block.local_sym_index) orelse unreachable; + const orig_sym_id = self.reverse_symbol_mapping.get(atom.local_sym_index) orelse unreachable; break :blk MachO.symbolIsExt(self.symtab.items[orig_sym_id]); }; if (is_ext) { @@ -662,38 +672,26 @@ pub fn parseTextBlocks( // In x86_64 relocs, it can so happen that the compiler refers to the same // atom by both the actual assigned symbol and the start of the section. In this // case, we need to link the two together so add an alias. - try block.aliases.append(allocator, alias); + try atom.aliases.append(allocator, alias); } } - // Update target section's metadata - // TODO should we update segment's size here too? - // How does it tie with incremental space allocs? - const tseg = &macho_file.load_commands.items[match.seg].Segment; - const tsect = &tseg.sections.items[match.sect]; - const new_alignment = math.max(tsect.@"align", block.alignment); - const new_alignment_pow_2 = try math.powi(u32, 2, new_alignment); - const new_size = mem.alignForwardGeneric(u64, tsect.size, new_alignment_pow_2) + block.size; - tsect.size = new_size; - tsect.@"align" = new_alignment; - - if (macho_file.blocks.getPtr(match)) |last| { - last.*.next = block; - block.prev = last.*; - last.* = block; + if (parsed_atoms.getPtr(match)) |last| { + last.*.next = atom; + atom.prev = last.*; + last.* = atom; } else { - try macho_file.blocks.putNoClobber(allocator, match, block); + try parsed_atoms.putNoClobber(match, atom); } - - try self.text_blocks.append(allocator, block); + try self.atoms.append(allocator, atom); } break :next; } - // Since there is no symbol to refer to this block, we create + // Since there is no symbol to refer to this atom, we create // a temp one, unless we already did that when working out the relocations - // of other text blocks. + // of other atoms. const sym_name = try std.fmt.allocPrint(allocator, "l_{s}_{s}_{s}", .{ self.name, segmentName(sect), @@ -701,41 +699,43 @@ pub fn parseTextBlocks( }); defer allocator.free(sym_name); - const block_local_sym_index = self.sections_as_symbols.get(sect_id) orelse blk: { - const block_local_sym_index = @intCast(u32, macho_file.locals.items.len); + const atom_local_sym_index = self.sections_as_symbols.get(sect_id) orelse blk: { + const atom_local_sym_index = @intCast(u32, macho_file.locals.items.len); try macho_file.locals.append(allocator, .{ .n_strx = try macho_file.makeString(sym_name), .n_type = macho.N_SECT, .n_sect = @intCast(u8, macho_file.section_ordinals.getIndex(match).? + 1), .n_desc = 0, - .n_value = sect.addr, + .n_value = 0, }); - try self.sections_as_symbols.putNoClobber(allocator, sect_id, block_local_sym_index); - break :blk block_local_sym_index; + try self.sections_as_symbols.putNoClobber(allocator, sect_id, atom_local_sym_index); + break :blk atom_local_sym_index; }; + const atom = try macho_file.createEmptyAtom(atom_local_sym_index, sect.size, sect.@"align"); - const block = try allocator.create(TextBlock); - block.* = TextBlock.empty; - block.local_sym_index = block_local_sym_index; - block.size = sect.size; - block.alignment = sect.@"align"; - try macho_file.managed_blocks.append(allocator, block); + const is_zerofill = blk: { + const section_type = commands.sectionType(sect); + break :blk section_type == macho.S_ZEROFILL or section_type == macho.S_THREAD_LOCAL_ZEROFILL; + }; + if (!is_zerofill) { + mem.copy(u8, atom.code.items, code); + } - try block.code.appendSlice(allocator, code); - - try block.parseRelocs(relocs, .{ - .base_addr = 0, + try atom.parseRelocs(relocs, .{ + .base_addr = sect.addr, + .base_offset = 0, .allocator = allocator, .object = self, .macho_file = macho_file, + .parsed_atoms = &parsed_atoms, }); if (macho_file.has_dices) { const dices = filterDice(self.data_in_code_entries.items, sect.addr, sect.addr + sect.size); - try block.dices.ensureTotalCapacity(allocator, dices.len); + try atom.dices.ensureTotalCapacity(allocator, dices.len); for (dices) |dice| { - block.dices.appendAssumeCapacity(.{ + atom.dices.appendAssumeCapacity(.{ .offset = dice.offset - try math.cast(u32, sect.addr), .length = dice.length, .kind = dice.kind, @@ -743,12 +743,12 @@ pub fn parseTextBlocks( } } - // Since this is block gets a helper local temporary symbol that didn't exist + // Since this is atom gets a helper local temporary symbol that didn't exist // in the object file which encompasses the entire section, we need traverse // the filtered symbols and note which symbol is contained within so that // we can properly allocate addresses down the line. // While we're at it, we need to update segment,section mapping of each symbol too. - try block.contained.ensureTotalCapacity(allocator, filtered_nlists.len); + try atom.contained.ensureTotalCapacity(allocator, filtered_nlists.len); for (filtered_nlists) |nlist_with_index| { const nlist = nlist_with_index.nlist; @@ -756,12 +756,12 @@ pub fn parseTextBlocks( const local = &macho_file.locals.items[local_sym_index]; local.n_sect = @intCast(u8, macho_file.section_ordinals.getIndex(match).? + 1); - const stab: ?TextBlock.Stab = if (self.debug_info) |di| blk: { + const stab: ?Atom.Stab = if (self.debug_info) |di| blk: { // TODO there has to be a better to handle this. for (di.inner.func_list.items) |func| { if (func.pc_range) |range| { if (nlist.n_value >= range.start and nlist.n_value < range.end) { - break :blk TextBlock.Stab{ + break :blk Atom.Stab{ .function = range.end - range.start, }; } @@ -772,35 +772,25 @@ pub fn parseTextBlocks( break :blk .static; } else null; - block.contained.appendAssumeCapacity(.{ + atom.contained.appendAssumeCapacity(.{ .local_sym_index = local_sym_index, .offset = nlist.n_value - sect.addr, .stab = stab, }); } - // Update target section's metadata - // TODO should we update segment's size here too? - // How does it tie with incremental space allocs? - const tseg = &macho_file.load_commands.items[match.seg].Segment; - const tsect = &tseg.sections.items[match.sect]; - const new_alignment = math.max(tsect.@"align", block.alignment); - const new_alignment_pow_2 = try math.powi(u32, 2, new_alignment); - const new_size = mem.alignForwardGeneric(u64, tsect.size, new_alignment_pow_2) + block.size; - tsect.size = new_size; - tsect.@"align" = new_alignment; - - if (macho_file.blocks.getPtr(match)) |last| { - last.*.next = block; - block.prev = last.*; - last.* = block; + if (parsed_atoms.getPtr(match)) |last| { + last.*.next = atom; + atom.prev = last.*; + last.* = atom; } else { - try macho_file.blocks.putNoClobber(allocator, match, block); + try parsed_atoms.putNoClobber(match, atom); } - - try self.text_blocks.append(allocator, block); + try self.atoms.append(allocator, atom); } } + + return parsed_atoms; } fn parseSymtab(self: *Object, allocator: *Allocator) !void { diff --git a/src/link/MachO/bind.zig b/src/link/MachO/bind.zig index 402e74d776..14a5ba3e30 100644 --- a/src/link/MachO/bind.zig +++ b/src/link/MachO/bind.zig @@ -9,15 +9,6 @@ pub const Pointer = struct { name: ?[]const u8 = null, }; -pub fn pointerCmp(context: void, a: Pointer, b: Pointer) bool { - _ = context; - if (a.segment_id < b.segment_id) return true; - if (a.segment_id == b.segment_id) { - return a.offset < b.offset; - } - return false; -} - pub fn rebaseInfoSize(pointers: []const Pointer) !u64 { var stream = std.io.countingWriter(std.io.null_writer); var writer = stream.writer(); diff --git a/src/link/MachO/commands.zig b/src/link/MachO/commands.zig index 6e75af08c4..d9ca056c8e 100644 --- a/src/link/MachO/commands.zig +++ b/src/link/MachO/commands.zig @@ -9,6 +9,7 @@ const assert = std.debug.assert; const Allocator = std.mem.Allocator; const MachO = @import("../MachO.zig"); +const makeStaticString = MachO.makeStaticString; const padToIdeal = MachO.padToIdeal; pub const HeaderArgs = struct { @@ -217,75 +218,6 @@ pub const SegmentCommand = struct { inner: macho.segment_command_64, sections: std.ArrayListUnmanaged(macho.section_64) = .{}, - const SegmentOptions = struct { - cmdsize: u32 = @sizeOf(macho.segment_command_64), - vmaddr: u64 = 0, - vmsize: u64 = 0, - fileoff: u64 = 0, - filesize: u64 = 0, - maxprot: macho.vm_prot_t = macho.VM_PROT_NONE, - initprot: macho.vm_prot_t = macho.VM_PROT_NONE, - nsects: u32 = 0, - flags: u32 = 0, - }; - - pub fn empty(comptime segname: []const u8, opts: SegmentOptions) SegmentCommand { - return .{ - .inner = .{ - .cmd = macho.LC_SEGMENT_64, - .cmdsize = opts.cmdsize, - .segname = makeStaticString(segname), - .vmaddr = opts.vmaddr, - .vmsize = opts.vmsize, - .fileoff = opts.fileoff, - .filesize = opts.filesize, - .maxprot = opts.maxprot, - .initprot = opts.initprot, - .nsects = opts.nsects, - .flags = opts.flags, - }, - }; - } - - const SectionOptions = struct { - addr: u64 = 0, - size: u64 = 0, - offset: u32 = 0, - @"align": u32 = 0, - reloff: u32 = 0, - nreloc: u32 = 0, - flags: u32 = macho.S_REGULAR, - reserved1: u32 = 0, - reserved2: u32 = 0, - reserved3: u32 = 0, - }; - - pub fn addSection( - self: *SegmentCommand, - alloc: *Allocator, - comptime sectname: []const u8, - opts: SectionOptions, - ) !void { - var section = macho.section_64{ - .sectname = makeStaticString(sectname), - .segname = undefined, - .addr = opts.addr, - .size = opts.size, - .offset = opts.offset, - .@"align" = opts.@"align", - .reloff = opts.reloff, - .nreloc = opts.nreloc, - .flags = opts.flags, - .reserved1 = opts.reserved1, - .reserved2 = opts.reserved2, - .reserved3 = opts.reserved3, - }; - mem.copy(u8, §ion.segname, &self.inner.segname); - try self.sections.append(alloc, section); - self.inner.cmdsize += @sizeOf(macho.section_64); - self.inner.nsects += 1; - } - pub fn read(alloc: *Allocator, reader: anytype) !SegmentCommand { const inner = try reader.readStruct(macho.segment_command_64); var segment = SegmentCommand{ @@ -314,10 +246,8 @@ pub const SegmentCommand = struct { } pub fn allocatedSize(self: SegmentCommand, start: u64) u64 { - assert(start > 0); - if (start == self.inner.fileoff) - return 0; - var min_pos: u64 = std.math.maxInt(u64); + assert(start >= self.inner.fileoff); + var min_pos: u64 = self.inner.fileoff + self.inner.filesize; for (self.sections.items) |section| { if (section.offset <= start) continue; if (section.offset < min_pos) min_pos = section.offset; @@ -337,12 +267,12 @@ pub const SegmentCommand = struct { return null; } - pub fn findFreeSpace(self: SegmentCommand, object_size: u64, min_alignment: u16, start: ?u64) u64 { - var st: u64 = if (start) |v| v else self.inner.fileoff; - while (self.detectAllocCollision(st, object_size)) |item_end| { - st = mem.alignForwardGeneric(u64, item_end, min_alignment); + pub fn findFreeSpace(self: SegmentCommand, object_size: u64, min_alignment: u64, start: ?u64) u64 { + var offset: u64 = if (start) |v| v else self.inner.fileoff; + while (self.detectAllocCollision(offset, object_size)) |item_end| { + offset = mem.alignForwardGeneric(u64, item_end, min_alignment); } - return st; + return offset; } fn eql(self: SegmentCommand, other: SegmentCommand) bool { @@ -427,13 +357,6 @@ pub fn createLoadDylibCommand( return dylib_cmd; } -fn makeStaticString(bytes: []const u8) [16]u8 { - var buf = [_]u8{0} ** 16; - assert(bytes.len <= buf.len); - mem.copy(u8, &buf, bytes); - return buf; -} - fn parseName(name: *const [16]u8) []const u8 { const len = mem.indexOfScalar(u8, name, @as(u8, 0)) orelse name.len; return name[0..len]; @@ -514,17 +437,14 @@ test "read-write segment command" { }; var cmd = SegmentCommand{ .inner = .{ - .cmd = macho.LC_SEGMENT_64, .cmdsize = 152, .segname = makeStaticString("__TEXT"), .vmaddr = 4294967296, .vmsize = 294912, - .fileoff = 0, .filesize = 294912, .maxprot = macho.VM_PROT_READ | macho.VM_PROT_WRITE | macho.VM_PROT_EXECUTE, .initprot = macho.VM_PROT_EXECUTE | macho.VM_PROT_READ, .nsects = 1, - .flags = 0, }, }; try cmd.sections.append(gpa, .{ @@ -534,12 +454,7 @@ test "read-write segment command" { .size = 448, .offset = 16384, .@"align" = 2, - .reloff = 0, - .nreloc = 0, .flags = macho.S_REGULAR | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS, - .reserved1 = 0, - .reserved2 = 0, - .reserved3 = 0, }); defer cmd.deinit(gpa); try testRead(gpa, in_buffer, LoadCommand{ .Segment = cmd }); diff --git a/test/stage2/darwin.zig b/test/stage2/darwin.zig index 87b04d1dff..86c2d313a0 100644 --- a/test/stage2/darwin.zig +++ b/test/stage2/darwin.zig @@ -27,8 +27,8 @@ pub fn addCases(ctx: *TestContext) !void { // Regular old hello world case.addCompareOutput( - \\extern "c" fn write(usize, usize, usize) usize; - \\extern "c" fn exit(usize) noreturn; + \\extern fn write(usize, usize, usize) usize; + \\extern fn exit(usize) noreturn; \\ \\pub export fn main() noreturn { \\ print(); @@ -47,8 +47,8 @@ pub fn addCases(ctx: *TestContext) !void { // Print it 4 times and force growth and realloc. case.addCompareOutput( - \\extern "c" fn write(usize, usize, usize) usize; - \\extern "c" fn exit(usize) noreturn; + \\extern fn write(usize, usize, usize) usize; + \\extern fn exit(usize) noreturn; \\ \\pub export fn main() noreturn { \\ print(); @@ -74,8 +74,8 @@ pub fn addCases(ctx: *TestContext) !void { // Print it once, and change the message. case.addCompareOutput( - \\extern "c" fn write(usize, usize, usize) usize; - \\extern "c" fn exit(usize) noreturn; + \\extern fn write(usize, usize, usize) usize; + \\extern fn exit(usize) noreturn; \\ \\pub export fn main() noreturn { \\ print(); @@ -94,8 +94,8 @@ pub fn addCases(ctx: *TestContext) !void { // Now we print it twice. case.addCompareOutput( - \\extern "c" fn write(usize, usize, usize) usize; - \\extern "c" fn exit(usize) noreturn; + \\extern fn write(usize, usize, usize) usize; + \\extern fn exit(usize) noreturn; \\ \\pub export fn main() noreturn { \\ print(); @@ -121,7 +121,7 @@ pub fn addCases(ctx: *TestContext) !void { // This test case also covers an infrequent scenarion where the string table *may* be relocated // into the position preceeding the symbol table which results in a dyld error. case.addCompareOutput( - \\extern "c" fn exit(usize) noreturn; + \\extern fn exit(usize) noreturn; \\ \\pub export fn main() noreturn { \\ exit(0); @@ -131,8 +131,8 @@ pub fn addCases(ctx: *TestContext) !void { ); case.addCompareOutput( - \\extern "c" fn exit(usize) noreturn; - \\extern "c" fn write(usize, usize, usize) usize; + \\extern fn exit(usize) noreturn; + \\extern fn write(usize, usize, usize) usize; \\ \\pub export fn main() noreturn { \\ _ = write(1, @ptrToInt("Hey!\n"), 5);