diff --git a/CMakeLists.txt b/CMakeLists.txt index 0108f448df..851e554923 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -603,20 +603,24 @@ set(ZIG_STAGE2_SOURCES "${CMAKE_SOURCE_DIR}/src/link/MachO/DebugSymbols.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/DwarfInfo.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/Dylib.zig" + "${CMAKE_SOURCE_DIR}/src/link/MachO/InternalObject.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/Object.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/Relocation.zig" - "${CMAKE_SOURCE_DIR}/src/link/MachO/Trie.zig" + "${CMAKE_SOURCE_DIR}/src/link/MachO/Symbol.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/UnwindInfo.zig" + "${CMAKE_SOURCE_DIR}/src/link/MachO/dead_strip.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/dyld_info/bind.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/dyld_info/Rebase.zig" - "${CMAKE_SOURCE_DIR}/src/link/MachO/dead_strip.zig" + "${CMAKE_SOURCE_DIR}/src/link/MachO/dyld_info/Trie.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/eh_frame.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/fat.zig" + "${CMAKE_SOURCE_DIR}/src/link/MachO/file.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/hasher.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/load_commands.zig" + "${CMAKE_SOURCE_DIR}/src/link/MachO/relocatable.zig" + "${CMAKE_SOURCE_DIR}/src/link/MachO/synthetic.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/thunks.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/uuid.zig" - "${CMAKE_SOURCE_DIR}/src/link/MachO/zld.zig" "${CMAKE_SOURCE_DIR}/src/link/Plan9.zig" "${CMAKE_SOURCE_DIR}/src/link/Plan9/aout.zig" "${CMAKE_SOURCE_DIR}/src/link/Wasm.zig" diff --git a/src/link/MachO/Archive.zig b/src/link/MachO/Archive.zig index ba3915f51b..c31278ce1c 100644 --- a/src/link/MachO/Archive.zig +++ b/src/link/MachO/Archive.zig @@ -1,20 +1,15 @@ -file: fs.File, -fat_offset: u64, -name: []const u8, -header: ar_hdr = undefined, +path: []const u8, +data: []const u8, -/// Parsed table of contents. -/// Each symbol name points to a list of all definition -/// sites within the current static archive. -toc: std.StringArrayHashMapUnmanaged(std.ArrayListUnmanaged(u32)) = .{}, +objects: std.ArrayListUnmanaged(Object) = .{}, // Archive files start with the ARMAG identifying string. Then follows a // `struct ar_hdr', and as many bytes of member file data as its `ar_size' // member indicates, for each member file. /// String that begins an archive file. -const ARMAG: *const [SARMAG:0]u8 = "!\n"; +pub const ARMAG: *const [SARMAG:0]u8 = "!\n"; /// Size of that string. -const SARMAG: u4 = 8; +pub const SARMAG: u4 = 8; /// String in ar_fmag at the end of each header. const ARFMAG: *const [2:0]u8 = "`\n"; @@ -41,177 +36,93 @@ const ar_hdr = extern struct { /// Always contains ARFMAG. ar_fmag: [2]u8, - const NameOrLength = union(enum) { - Name: []const u8, - Length: u32, - }; - fn nameOrLength(self: ar_hdr) !NameOrLength { - const value = getValue(&self.ar_name); - const slash_index = mem.indexOf(u8, value, "/") orelse return error.MalformedArchive; - const len = value.len; - if (slash_index == len - 1) { - // Name stored directly - return NameOrLength{ .Name = value }; - } else { - // Name follows the header directly and its length is encoded in - // the name field. - const length = try std.fmt.parseInt(u32, value[slash_index + 1 ..], 10); - return NameOrLength{ .Length = length }; - } - } - fn date(self: ar_hdr) !u64 { - const value = getValue(&self.ar_date); + const value = mem.trimRight(u8, &self.ar_date, &[_]u8{@as(u8, 0x20)}); return std.fmt.parseInt(u64, value, 10); } fn size(self: ar_hdr) !u32 { - const value = getValue(&self.ar_size); + const value = mem.trimRight(u8, &self.ar_size, &[_]u8{@as(u8, 0x20)}); return std.fmt.parseInt(u32, value, 10); } - fn getValue(raw: []const u8) []const u8 { - return mem.trimRight(u8, raw, &[_]u8{@as(u8, 0x20)}); + fn name(self: *const ar_hdr) ?[]const u8 { + const value = &self.ar_name; + if (mem.startsWith(u8, value, "#1/")) return null; + const sentinel = mem.indexOfScalar(u8, value, '/') orelse value.len; + return value[0..sentinel]; + } + + fn nameLength(self: ar_hdr) !?u32 { + const value = &self.ar_name; + if (!mem.startsWith(u8, value, "#1/")) return null; + const trimmed = mem.trimRight(u8, self.ar_name["#1/".len..], &[_]u8{0x20}); + return try std.fmt.parseInt(u32, trimmed, 10); } }; -pub fn isArchive(file: fs.File, fat_offset: u64) bool { - const reader = file.reader(); - const magic = reader.readBytesNoEof(SARMAG) catch return false; - defer file.seekTo(fat_offset) catch {}; - return mem.eql(u8, &magic, ARMAG); -} - pub fn deinit(self: *Archive, allocator: Allocator) void { - self.file.close(); - for (self.toc.keys()) |*key| { - allocator.free(key.*); - } - for (self.toc.values()) |*value| { - value.deinit(allocator); - } - self.toc.deinit(allocator); - allocator.free(self.name); + self.objects.deinit(allocator); } -pub fn parse(self: *Archive, allocator: Allocator, reader: anytype) !void { - _ = try reader.readBytesNoEof(SARMAG); - self.header = try reader.readStruct(ar_hdr); - const name_or_length = try self.header.nameOrLength(); - const embedded_name = try parseName(allocator, name_or_length, reader); - log.debug("parsing archive '{s}' at '{s}'", .{ embedded_name, self.name }); - defer allocator.free(embedded_name); +pub fn parse(self: *Archive, arena: Allocator, macho_file: *MachO) !void { + const gpa = macho_file.base.allocator; - try self.parseTableOfContents(allocator, reader); -} - -fn parseName(allocator: Allocator, name_or_length: ar_hdr.NameOrLength, reader: anytype) ![]u8 { - var name: []u8 = undefined; - switch (name_or_length) { - .Name => |n| { - name = try allocator.dupe(u8, n); - }, - .Length => |len| { - var n = try allocator.alloc(u8, len); - defer allocator.free(n); - try reader.readNoEof(n); - const actual_len = mem.indexOfScalar(u8, n, @as(u8, 0)) orelse n.len; - name = try allocator.dupe(u8, n[0..actual_len]); - }, - } - return name; -} - -fn parseTableOfContents(self: *Archive, allocator: Allocator, reader: anytype) !void { - const symtab_size = try reader.readInt(u32, .little); - const symtab = try allocator.alloc(u8, symtab_size); - defer allocator.free(symtab); - - reader.readNoEof(symtab) catch { - log.debug("incomplete symbol table: expected symbol table of length 0x{x}", .{symtab_size}); - return error.MalformedArchive; - }; - - const strtab_size = try reader.readInt(u32, .little); - const strtab = try allocator.alloc(u8, strtab_size); - defer allocator.free(strtab); - - reader.readNoEof(strtab) catch { - log.debug("incomplete symbol table: expected string table of length 0x{x}", .{strtab_size}); - return error.MalformedArchive; - }; - - var symtab_stream = std.io.fixedBufferStream(symtab); - var symtab_reader = symtab_stream.reader(); + var stream = std.io.fixedBufferStream(self.data); + const reader = stream.reader(); while (true) { - const n_strx = symtab_reader.readInt(u32, .little) catch |err| switch (err) { - error.EndOfStream => break, - else => |e| return e, - }; - const object_offset = try symtab_reader.readInt(u32, .little); + if (stream.pos >= self.data.len) break; + if (!mem.isAligned(stream.pos, 2)) stream.pos += 1; - const sym_name = mem.sliceTo(@as([*:0]const u8, @ptrCast(strtab.ptr + n_strx)), 0); - const owned_name = try allocator.dupe(u8, sym_name); - const res = try self.toc.getOrPut(allocator, owned_name); - defer if (res.found_existing) allocator.free(owned_name); + const hdr = try reader.readStruct(ar_hdr); - if (!res.found_existing) { - res.value_ptr.* = .{}; + if (!mem.eql(u8, &hdr.ar_fmag, ARFMAG)) { + macho_file.base.fatal("{s}: invalid header delimiter: expected '{s}', found '{s}'", .{ + self.path, std.fmt.fmtSliceEscapeLower(ARFMAG), std.fmt.fmtSliceEscapeLower(&hdr.ar_fmag), + }); + return error.ParseFailed; } - try res.value_ptr.append(allocator, object_offset); + var size = try hdr.size(); + const name = name: { + if (hdr.name()) |n| break :name try arena.dupe(u8, n); + if (try hdr.nameLength()) |len| { + size -= len; + const buf = try arena.alloc(u8, len); + try reader.readNoEof(buf); + const actual_len = mem.indexOfScalar(u8, buf, @as(u8, 0)) orelse len; + break :name buf[0..actual_len]; + } + unreachable; + }; + defer { + _ = stream.seekBy(size) catch {}; + } + + if (mem.eql(u8, name, "__.SYMDEF") or mem.eql(u8, name, "__.SYMDEF SORTED")) continue; + + const object = Object{ + .archive = self.path, + .path = name, + .data = self.data[stream.pos..][0..size], + .index = undefined, + .alive = false, + .mtime = hdr.date() catch 0, + }; + + log.debug("extracting object '{s}' from archive '{s}'", .{ object.path, self.path }); + + try self.objects.append(gpa, object); } } -pub fn parseObject(self: Archive, gpa: Allocator, offset: u32) !Object { - const reader = self.file.reader(); - try reader.context.seekTo(self.fat_offset + offset); - - const object_header = try reader.readStruct(ar_hdr); - - const name_or_length = try object_header.nameOrLength(); - const object_name = try parseName(gpa, name_or_length, reader); - defer gpa.free(object_name); - - log.debug("extracting object '{s}' from archive '{s}'", .{ object_name, self.name }); - - const name = name: { - var buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined; - const path = try std.os.realpath(self.name, &buffer); - break :name try std.fmt.allocPrint(gpa, "{s}({s})", .{ path, object_name }); - }; - - const object_name_len = switch (name_or_length) { - .Name => 0, - .Length => |len| len, - }; - const object_size = (try object_header.size()) - object_name_len; - const contents = try gpa.allocWithOptions(u8, object_size, @alignOf(u64), null); - const amt = try reader.readAll(contents); - if (amt != object_size) { - return error.InputOutput; - } - - var object = Object{ - .name = name, - .mtime = object_header.date() catch 0, - .contents = contents, - }; - - try object.parse(gpa); - - return object; -} - -const Archive = @This(); - -const std = @import("std"); -const assert = std.debug.assert; -const fs = std.fs; const log = std.log.scoped(.link); const macho = std.macho; const mem = std.mem; +const std = @import("std"); const Allocator = mem.Allocator; +const Archive = @This(); +const MachO = @import("../MachO.zig"); const Object = @import("Object.zig"); diff --git a/src/link/MachO/Atom.zig b/src/link/MachO/Atom.zig index d76a6de841..d734faa487 100644 --- a/src/link/MachO/Atom.zig +++ b/src/link/MachO/Atom.zig @@ -1,1271 +1,905 @@ -/// Each Atom always gets a symbol with the fully qualified name. -/// The symbol can reside in any object file context structure in `symtab` array -/// (see `Object`), or if the symbol is a synthetic symbol such as a GOT cell or -/// a stub trampoline, it can be found in the linkers `locals` arraylist. -/// If this field is 0 and file is 0, it means the codegen size = 0 and there is no symbol or -/// offset table entry. -sym_index: u32 = 0, +/// Address allocated for this Atom. +value: u64 = 0, -/// 0 means an Atom is a synthetic Atom such as a GOT cell defined by the linker. -/// Otherwise, it is the index into appropriate object file (indexing from 1). -/// Prefer using `getFile()` helper to get the file index out rather than using -/// the field directly. -file: u32 = 0, +/// Name of this Atom. +name: u32 = 0, -/// If this Atom is not a synthetic Atom, i.e., references a subsection in an -/// Object file, `inner_sym_index` and `inner_nsyms_trailing` tell where and if -/// this Atom contains any additional symbol references that fall within this Atom's -/// address range. These could for example be an alias symbol which can be used -/// internally by the relocation records, or if the Object file couldn't be split -/// into subsections, this Atom may encompass an entire input section. -inner_sym_index: u32 = 0, -inner_nsyms_trailing: u32 = 0, +/// Index into linker's input file table. +file: File.Index = 0, -/// Size and alignment of this atom -/// Unlike in Elf, we need to store the size of this symbol as part of -/// the atom since macho.nlist_64 lacks this information. +/// Size of this atom size: u64 = 0, -/// Alignment of this atom as a power of 2. -/// For instance, aligmment of 0 should be read as 2^0 = 1 byte aligned. -alignment: Alignment = .@"1", +/// Alignment of this atom as a power of two. +alignment: u32 = 0, -/// Points to the previous and next neighbours -/// TODO use the same trick as with symbols: reserve index 0 as null atom -next_index: ?Index = null, -prev_index: ?Index = null, +/// Index of the input section. +n_sect: u32 = 0, -pub const Alignment = @import("../../InternPool.zig").Alignment; +/// Index of the output section. +out_n_sect: u8 = 0, -pub const Index = u32; +/// Offset within the parent section pointed to by n_sect. +/// off + size <= parent section size. +off: u64 = 0, -pub const Binding = struct { - target: SymbolWithLoc, - offset: u64, -}; +/// Relocations of this atom. +relocs: Loc = .{}, -/// Returns `null` if the Atom is a synthetic Atom. -/// Otherwise, returns an index into an array of Objects. -pub fn getFile(self: Atom) ?u32 { - if (self.file == 0) return null; - return self.file - 1; +/// Index of this atom in the linker's atoms table. +atom_index: Index = 0, + +/// Index of the thunk for this atom. +thunk_index: Thunk.Index = 0, + +/// Unwind records associated with this atom. +unwind_records: Loc = .{}, + +flags: Flags = .{}, + +pub fn getName(self: Atom, macho_file: *MachO) [:0]const u8 { + return macho_file.string_intern.getAssumeExists(self.name); } -pub fn getSymbolIndex(self: Atom) ?u32 { - if (self.getFile() == null and self.sym_index == 0) return null; - return self.sym_index; +pub fn getFile(self: Atom, macho_file: *MachO) File { + return macho_file.getFile(self.file).?; } -/// Returns symbol referencing this atom. -pub fn getSymbol(self: Atom, macho_file: *MachO) macho.nlist_64 { - return self.getSymbolPtr(macho_file).*; +pub fn getInputSection(self: Atom, macho_file: *MachO) macho.section_64 { + return switch (self.getFile(macho_file)) { + .dylib => unreachable, + inline else => |x| x.sections.items(.header)[self.n_sect], + }; } -/// Returns pointer-to-symbol referencing this atom. -pub fn getSymbolPtr(self: Atom, macho_file: *MachO) *macho.nlist_64 { - const sym_index = self.getSymbolIndex().?; - return macho_file.getSymbolPtr(.{ .sym_index = sym_index, .file = self.file }); +pub fn getInputAddress(self: Atom, macho_file: *MachO) u64 { + return self.getInputSection(macho_file).addr + self.off; } -pub fn getSymbolWithLoc(self: Atom) SymbolWithLoc { - const sym_index = self.getSymbolIndex().?; - return .{ .sym_index = sym_index, .file = self.file }; +pub fn getPriority(self: Atom, macho_file: *MachO) u64 { + const file = self.getFile(macho_file); + return (@as(u64, @intCast(file.getIndex())) << 32) | @as(u64, @intCast(self.n_sect)); } -/// Returns the name of this atom. -pub fn getName(self: Atom, macho_file: *MachO) []const u8 { - const sym_index = self.getSymbolIndex().?; - return macho_file.getSymbolName(.{ .sym_index = sym_index, .file = self.file }); +pub fn getCode(self: Atom, macho_file: *MachO) []const u8 { + const code = switch (self.getFile(macho_file)) { + .dylib => unreachable, + inline else => |x| x.getSectionData(self.n_sect), + }; + return code[self.off..][0..self.size]; } -/// Returns how much room there is to grow in virtual address space. -/// File offset relocation happens transparently, so it is not included in -/// this calculation. -pub fn capacity(self: Atom, macho_file: *MachO) u64 { - const self_sym = self.getSymbol(macho_file); - if (self.next_index) |next_index| { - const next = macho_file.getAtom(next_index); - const next_sym = next.getSymbol(macho_file); - return next_sym.n_value - self_sym.n_value; - } else { - // We are the last atom. - // The capacity is limited only by virtual address space. - return macho_file.allocatedVirtualSize(self_sym.n_value); +pub fn getRelocs(self: Atom, macho_file: *MachO) []const Relocation { + const relocs = switch (self.getFile(macho_file)) { + .dylib => unreachable, + inline else => |x| x.sections.items(.relocs)[self.n_sect], + }; + return relocs.items[self.relocs.pos..][0..self.relocs.len]; +} + +pub fn getUnwindRecords(self: Atom, macho_file: *MachO) []const UnwindInfo.Record.Index { + return switch (self.getFile(macho_file)) { + .dylib => unreachable, + .internal => &[0]UnwindInfo.Record.Index{}, + .object => |x| x.unwind_records.items[self.unwind_records.pos..][0..self.unwind_records.len], + }; +} + +pub fn markUnwindRecordsDead(self: Atom, macho_file: *MachO) void { + for (self.getUnwindRecords(macho_file)) |cu_index| { + const cu = macho_file.getUnwindRecord(cu_index); + cu.alive = false; + + if (cu.getFdePtr(macho_file)) |fde| { + fde.alive = false; + } } } -pub fn freeListEligible(self: Atom, macho_file: *MachO) bool { - // No need to keep a free list node for the last atom. - const next_index = self.next_index orelse return false; - const next = macho_file.getAtom(next_index); - const self_sym = self.getSymbol(macho_file); - const next_sym = next.getSymbol(macho_file); - const cap = next_sym.n_value - self_sym.n_value; - const ideal_cap = MachO.padToIdeal(self.size); - if (cap <= ideal_cap) return false; - const surplus = cap - ideal_cap; - return surplus >= MachO.min_text_capacity; +pub fn getThunk(self: Atom, macho_file: *MachO) *Thunk { + return macho_file.getThunk(self.thunk_index); } -pub fn getOutputSection(macho_file: *MachO, sect: macho.section_64) !?u8 { - const segname = sect.segName(); - const sectname = sect.sectName(); - const res: ?u8 = blk: { - if (mem.eql(u8, "__LLVM", segname)) { - log.debug("TODO LLVM section: type 0x{x}, name '{s},{s}'", .{ - sect.flags, segname, sectname, - }); - break :blk null; - } - - // We handle unwind info separately. - if (mem.eql(u8, "__TEXT", segname) and mem.eql(u8, "__eh_frame", sectname)) { - break :blk null; - } - if (mem.eql(u8, "__LD", segname) and mem.eql(u8, "__compact_unwind", sectname)) { - break :blk null; - } - - if (sect.isCode()) { - if (macho_file.text_section_index == null) { - macho_file.text_section_index = try macho_file.initSection("__TEXT", "__text", .{ - .flags = macho.S_REGULAR | - macho.S_ATTR_PURE_INSTRUCTIONS | - macho.S_ATTR_SOME_INSTRUCTIONS, - }); - } - break :blk macho_file.text_section_index.?; - } - - if (sect.isDebug()) { - break :blk null; - } +pub fn initOutputSection(sect: macho.section_64, macho_file: *MachO) !u8 { + const segname, const sectname, const flags = blk: { + if (sect.isCode()) break :blk .{ + "__TEXT", + "__text", + macho.S_REGULAR | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS, + }; switch (sect.type()) { macho.S_4BYTE_LITERALS, macho.S_8BYTE_LITERALS, macho.S_16BYTE_LITERALS, - => { - break :blk macho_file.getSectionByName("__TEXT", "__const") orelse - try macho_file.initSection("__TEXT", "__const", .{}); - }, + => break :blk .{ "__TEXT", "__const", macho.S_REGULAR }, + macho.S_CSTRING_LITERALS => { - if (mem.startsWith(u8, sectname, "__objc")) { - break :blk macho_file.getSectionByName(segname, sectname) orelse - try macho_file.initSection(segname, sectname, .{}); - } - break :blk macho_file.getSectionByName("__TEXT", "__cstring") orelse - try macho_file.initSection("__TEXT", "__cstring", .{ - .flags = macho.S_CSTRING_LITERALS, - }); + if (mem.startsWith(u8, sect.sectName(), "__objc")) break :blk .{ + sect.segName(), sect.sectName(), macho.S_REGULAR, + }; + break :blk .{ "__TEXT", "__cstring", macho.S_CSTRING_LITERALS }; }, + macho.S_MOD_INIT_FUNC_POINTERS, macho.S_MOD_TERM_FUNC_POINTERS, - => { - break :blk macho_file.getSectionByName("__DATA_CONST", sectname) orelse - try macho_file.initSection("__DATA_CONST", sectname, .{ - .flags = sect.flags, - }); - }, + => break :blk .{ "__DATA_CONST", sect.sectName(), sect.flags }, + macho.S_LITERAL_POINTERS, macho.S_ZEROFILL, + macho.S_GB_ZEROFILL, macho.S_THREAD_LOCAL_VARIABLES, macho.S_THREAD_LOCAL_VARIABLE_POINTERS, macho.S_THREAD_LOCAL_REGULAR, macho.S_THREAD_LOCAL_ZEROFILL, - => { - break :blk macho_file.getSectionByName(segname, sectname) orelse - try macho_file.initSection(segname, sectname, .{ - .flags = sect.flags, - }); - }, - macho.S_COALESCED => { - break :blk macho_file.getSectionByName(segname, sectname) orelse - try macho_file.initSection(segname, sectname, .{}); + => break :blk .{ sect.segName(), sect.sectName(), sect.flags }, + + macho.S_COALESCED => break :blk .{ + sect.segName(), + sect.sectName(), + macho.S_REGULAR, }, + macho.S_REGULAR => { - if (mem.eql(u8, segname, "__TEXT")) { - if (mem.eql(u8, sectname, "__rodata") or - mem.eql(u8, sectname, "__typelink") or - mem.eql(u8, sectname, "__itablink") or - mem.eql(u8, sectname, "__gosymtab") or - mem.eql(u8, sectname, "__gopclntab")) - { - break :blk macho_file.getSectionByName("__TEXT", sectname) orelse - try macho_file.initSection("__TEXT", sectname, .{}); - } - } + const segname = sect.segName(); + const sectname = sect.sectName(); if (mem.eql(u8, segname, "__DATA")) { if (mem.eql(u8, sectname, "__const") or mem.eql(u8, sectname, "__cfstring") or mem.eql(u8, sectname, "__objc_classlist") or - mem.eql(u8, sectname, "__objc_imageinfo")) - { - break :blk macho_file.getSectionByName("__DATA_CONST", sectname) orelse - try macho_file.initSection("__DATA_CONST", sectname, .{}); - } else if (mem.eql(u8, sectname, "__data")) { - if (macho_file.data_section_index == null) { - macho_file.data_section_index = try macho_file.initSection("__DATA", "__data", .{}); - } - break :blk macho_file.data_section_index.?; - } + mem.eql(u8, sectname, "__objc_imageinfo")) break :blk .{ + "__DATA_CONST", + sectname, + macho.S_REGULAR, + }; } - break :blk macho_file.getSectionByName(segname, sectname) orelse - try macho_file.initSection(segname, sectname, .{}); + break :blk .{ segname, sectname, sect.flags }; }, - else => break :blk null, + + else => break :blk .{ sect.segName(), sect.sectName(), sect.flags }, } }; - - // TODO we can do this directly in the selection logic above. - // Or is it not worth it? - if (macho_file.data_const_section_index == null) { - if (macho_file.getSectionByName("__DATA_CONST", "__const")) |index| { - macho_file.data_const_section_index = index; - } + const osec = macho_file.getSectionByName(segname, sectname) orelse try macho_file.addSection( + segname, + sectname, + .{ .flags = flags }, + ); + if (mem.eql(u8, segname, "__DATA") and mem.eql(u8, sectname, "__data")) { + macho_file.data_sect_index = osec; } - if (macho_file.thread_vars_section_index == null) { - if (macho_file.getSectionByName("__DATA", "__thread_vars")) |index| { - macho_file.thread_vars_section_index = index; - } - } - if (macho_file.thread_data_section_index == null) { - if (macho_file.getSectionByName("__DATA", "__thread_data")) |index| { - macho_file.thread_data_section_index = index; - } - } - if (macho_file.thread_bss_section_index == null) { - if (macho_file.getSectionByName("__DATA", "__thread_bss")) |index| { - macho_file.thread_bss_section_index = index; - } - } - if (macho_file.bss_section_index == null) { - if (macho_file.getSectionByName("__DATA", "__bss")) |index| { - macho_file.bss_section_index = index; - } - } - - return res; + return osec; } -pub fn addRelocation(macho_file: *MachO, atom_index: Index, reloc: Relocation) !void { - return addRelocations(macho_file, atom_index, &[_]Relocation{reloc}); -} - -pub fn addRelocations(macho_file: *MachO, atom_index: Index, relocs: []const Relocation) !void { - const comp = macho_file.base.comp; - const gpa = comp.gpa; - const gop = try macho_file.relocs.getOrPut(gpa, atom_index); - if (!gop.found_existing) { - gop.value_ptr.* = .{}; - } - try gop.value_ptr.ensureUnusedCapacity(gpa, relocs.len); - for (relocs) |reloc| { - log.debug(" (adding reloc of type {s} to target %{d})", .{ - @tagName(reloc.type), - reloc.target.sym_index, - }); - gop.value_ptr.appendAssumeCapacity(reloc); - } -} - -pub fn addRebase(macho_file: *MachO, atom_index: Index, offset: u32) !void { - const comp = macho_file.base.comp; - const gpa = comp.gpa; - const atom = macho_file.getAtom(atom_index); - log.debug(" (adding rebase at offset 0x{x} in %{?d})", .{ offset, atom.getSymbolIndex() }); - const gop = try macho_file.rebases.getOrPut(gpa, atom_index); - if (!gop.found_existing) { - gop.value_ptr.* = .{}; - } - try gop.value_ptr.append(gpa, offset); -} - -pub fn addBinding(macho_file: *MachO, atom_index: Index, binding: Binding) !void { - const comp = macho_file.base.comp; - const gpa = comp.gpa; - const atom = macho_file.getAtom(atom_index); - log.debug(" (adding binding to symbol {s} at offset 0x{x} in %{?d})", .{ - macho_file.getSymbolName(binding.target), - binding.offset, - atom.getSymbolIndex(), - }); - const gop = try macho_file.bindings.getOrPut(gpa, atom_index); - if (!gop.found_existing) { - gop.value_ptr.* = .{}; - } - try gop.value_ptr.append(gpa, binding); -} - -pub fn resolveRelocations( - macho_file: *MachO, - atom_index: Index, - relocs: []*const Relocation, - code: []u8, -) void { - relocs_log.debug("relocating '{s}'", .{macho_file.getAtom(atom_index).getName(macho_file)}); - for (relocs) |reloc| { - reloc.resolve(macho_file, atom_index, code); - } -} - -pub fn freeRelocations(macho_file: *MachO, atom_index: Index) void { - const comp = macho_file.base.comp; - const gpa = comp.gpa; - var removed_relocs = macho_file.relocs.fetchOrderedRemove(atom_index); - if (removed_relocs) |*relocs| relocs.value.deinit(gpa); - var removed_rebases = macho_file.rebases.fetchOrderedRemove(atom_index); - if (removed_rebases) |*rebases| rebases.value.deinit(gpa); - var removed_bindings = macho_file.bindings.fetchOrderedRemove(atom_index); - if (removed_bindings) |*bindings| bindings.value.deinit(gpa); -} - -const InnerSymIterator = struct { - sym_index: u32, - nsyms: u32, - file: u32, - pos: u32 = 0, - - pub fn next(it: *@This()) ?SymbolWithLoc { - if (it.pos == it.nsyms) return null; - const res = SymbolWithLoc{ .sym_index = it.sym_index + it.pos, .file = it.file }; - it.pos += 1; - return res; - } -}; - -/// Returns an iterator over potentially contained symbols. -/// Panics when called on a synthetic Atom. -pub fn getInnerSymbolsIterator(macho_file: *MachO, atom_index: Index) InnerSymIterator { - const atom = macho_file.getAtom(atom_index); - assert(atom.getFile() != null); - return .{ - .sym_index = atom.inner_sym_index, - .nsyms = atom.inner_nsyms_trailing, - .file = atom.file, - }; -} - -/// Returns a section alias symbol if one is defined. -/// An alias symbol is used to represent the start of an input section -/// if there were no symbols defined within that range. -/// Alias symbols are only used on x86_64. -pub fn getSectionAlias(macho_file: *MachO, atom_index: Index) ?SymbolWithLoc { - const atom = macho_file.getAtom(atom_index); - assert(atom.getFile() != null); - - const object = macho_file.objects.items[atom.getFile().?]; - const nbase = @as(u32, @intCast(object.in_symtab.?.len)); - const ntotal = @as(u32, @intCast(object.symtab.len)); - var sym_index: u32 = nbase; - while (sym_index < ntotal) : (sym_index += 1) { - if (object.getAtomIndexForSymbol(sym_index)) |other_atom_index| { - if (other_atom_index == atom_index) return SymbolWithLoc{ - .sym_index = sym_index, - .file = atom.file, - }; - } - } - return null; -} - -/// Given an index into a contained symbol within, calculates an offset wrt -/// the start of this Atom. -pub fn calcInnerSymbolOffset(macho_file: *MachO, atom_index: Index, sym_index: u32) u64 { - const atom = macho_file.getAtom(atom_index); - assert(atom.getFile() != null); - - if (atom.sym_index == sym_index) return 0; - - const object = macho_file.objects.items[atom.getFile().?]; - const source_sym = object.getSourceSymbol(sym_index).?; - const base_addr = if (object.getSourceSymbol(atom.sym_index)) |sym| - sym.n_value - else blk: { - const nbase = @as(u32, @intCast(object.in_symtab.?.len)); - const sect_id = @as(u8, @intCast(atom.sym_index - nbase)); - const source_sect = object.getSourceSection(sect_id); - break :blk source_sect.addr; - }; - return source_sym.n_value - base_addr; -} - -pub fn scanAtomRelocs(macho_file: *MachO, atom_index: Index, relocs: []align(1) const macho.relocation_info) !void { - const target = macho_file.base.comp.root_mod.resolved_target.result; - const arch = target.cpu.arch; - const atom = macho_file.getAtom(atom_index); - assert(atom.getFile() != null); // synthetic atoms do not have relocs - - return switch (arch) { - .aarch64 => scanAtomRelocsArm64(macho_file, atom_index, relocs), - .x86_64 => scanAtomRelocsX86(macho_file, atom_index, relocs), - else => unreachable, - }; -} - -const RelocContext = struct { - base_addr: i64 = 0, - base_offset: i32 = 0, -}; - -pub fn getRelocContext(macho_file: *MachO, atom_index: Index) RelocContext { - const atom = macho_file.getAtom(atom_index); - assert(atom.getFile() != null); // synthetic atoms do not have relocs - - const object = macho_file.objects.items[atom.getFile().?]; - if (object.getSourceSymbol(atom.sym_index)) |source_sym| { - const source_sect = object.getSourceSection(source_sym.n_sect - 1); - return .{ - .base_addr = @as(i64, @intCast(source_sect.addr)), - .base_offset = @as(i32, @intCast(source_sym.n_value - source_sect.addr)), - }; - } - const nbase = @as(u32, @intCast(object.in_symtab.?.len)); - const sect_id = @as(u8, @intCast(atom.sym_index - nbase)); - const source_sect = object.getSourceSection(sect_id); - return .{ - .base_addr = @as(i64, @intCast(source_sect.addr)), - .base_offset = 0, - }; -} - -pub fn parseRelocTarget(macho_file: *MachO, ctx: struct { - object_id: u32, - rel: macho.relocation_info, - code: []const u8, - base_addr: i64 = 0, - base_offset: i32 = 0, -}) SymbolWithLoc { +pub fn scanRelocs(self: Atom, macho_file: *MachO) !void { const tracy = trace(@src()); defer tracy.end(); - const target = macho_file.base.comp.root_mod.resolved_target.result; - const object = &macho_file.objects.items[ctx.object_id]; - log.debug("parsing reloc target in object({d}) '{s}' ", .{ ctx.object_id, object.name }); + const object = self.getFile(macho_file).object; + const relocs = self.getRelocs(macho_file); - const sym_index = if (ctx.rel.r_extern == 0) sym_index: { - const sect_id = @as(u8, @intCast(ctx.rel.r_symbolnum - 1)); - const rel_offset = @as(u32, @intCast(ctx.rel.r_address - ctx.base_offset)); + for (relocs) |rel| { + if (try self.reportUndefSymbol(rel, macho_file)) continue; - const address_in_section = if (ctx.rel.r_pcrel == 0) blk: { - break :blk if (ctx.rel.r_length == 3) - mem.readInt(u64, ctx.code[rel_offset..][0..8], .little) - else - mem.readInt(u32, ctx.code[rel_offset..][0..4], .little); - } else blk: { - assert(target.cpu.arch == .x86_64); - const correction: u3 = switch (@as(macho.reloc_type_x86_64, @enumFromInt(ctx.rel.r_type))) { - .X86_64_RELOC_SIGNED => 0, - .X86_64_RELOC_SIGNED_1 => 1, - .X86_64_RELOC_SIGNED_2 => 2, - .X86_64_RELOC_SIGNED_4 => 4, + switch (rel.type) { + .branch => { + const symbol = rel.getTargetSymbol(macho_file); + if (symbol.flags.import or (symbol.flags.@"export" and (symbol.flags.weak or symbol.flags.interposable))) { + symbol.flags.stubs = true; + if (symbol.flags.weak) { + macho_file.binds_to_weak = true; + } + } else if (mem.startsWith(u8, symbol.getName(macho_file), "_objc_msgSend$")) { + symbol.flags.objc_stubs = true; + } + }, + + .got_load, + .got_load_page, + .got_load_pageoff, + => { + const symbol = rel.getTargetSymbol(macho_file); + if (symbol.flags.import or + (symbol.flags.@"export" and (symbol.flags.weak or symbol.flags.interposable)) or + macho_file.options.cpu_arch.? == .aarch64) // TODO relax on arm64 + { + symbol.flags.got = true; + if (symbol.flags.weak) { + macho_file.binds_to_weak = true; + } + } + }, + + .got => { + rel.getTargetSymbol(macho_file).flags.got = true; + }, + + .tlv, + .tlvp_page, + .tlvp_pageoff, + => { + const symbol = rel.getTargetSymbol(macho_file); + if (!symbol.flags.tlv) { + macho_file.base.fatal( + "{}: {s}: illegal thread-local variable reference to regular symbol {s}", + .{ object.fmtPath(), self.getName(macho_file), symbol.getName(macho_file) }, + ); + } + if (symbol.flags.import or (symbol.flags.@"export" and (symbol.flags.weak or symbol.flags.interposable))) { + symbol.flags.tlv_ptr = true; + if (symbol.flags.weak) { + macho_file.binds_to_weak = true; + } + } + }, + + .unsigned => { + if (rel.meta.length == 3) { // TODO this really should check if this is pointer width + if (rel.tag == .@"extern") { + const symbol = rel.getTargetSymbol(macho_file); + if (symbol.isTlvInit(macho_file)) { + macho_file.has_tlv = true; + continue; + } + if (symbol.flags.import) { + object.num_bind_relocs += 1; + if (symbol.flags.weak) { + object.num_weak_bind_relocs += 1; + macho_file.binds_to_weak = true; + } + continue; + } + if (symbol.flags.@"export") { + if (symbol.flags.weak) { + object.num_weak_bind_relocs += 1; + macho_file.binds_to_weak = true; + } else if (symbol.flags.interposable) { + object.num_bind_relocs += 1; + } + } + } + object.num_rebase_relocs += 1; + } + }, + + else => {}, + } + } +} + +fn reportUndefSymbol(self: Atom, rel: Relocation, macho_file: *MachO) !bool { + if (rel.tag == .local) return false; + + const sym = rel.getTargetSymbol(macho_file); + if (sym.getFile(macho_file) == null) { + const gpa = macho_file.base.allocator; + const gop = try macho_file.undefs.getOrPut(gpa, rel.target); + if (!gop.found_existing) { + gop.value_ptr.* = .{}; + } + try gop.value_ptr.append(gpa, self.atom_index); + return true; + } + + return false; +} + +pub fn resolveRelocs(self: Atom, macho_file: *MachO, buffer: []u8) !void { + const tracy = trace(@src()); + defer tracy.end(); + + assert(!self.getInputSection(macho_file).isZerofill()); + const relocs = self.getRelocs(macho_file); + const file = self.getFile(macho_file); + const name = self.getName(macho_file); + @memcpy(buffer, self.getCode(macho_file)); + + relocs_log.debug("{x}: {s}", .{ self.value, name }); + + var stream = std.io.fixedBufferStream(buffer); + + var i: usize = 0; + while (i < relocs.len) : (i += 1) { + const rel = relocs[i]; + const rel_offset = rel.offset - self.off; + const subtractor = if (rel.meta.has_subtractor) relocs[i - 1] else null; + + if (rel.tag == .@"extern") { + if (rel.getTargetSymbol(macho_file).getFile(macho_file) == null) continue; + } + + try stream.seekTo(rel_offset); + self.resolveRelocInner(rel, subtractor, buffer, macho_file, stream.writer()) catch |err| { + switch (err) { + error.RelaxFail => macho_file.base.fatal( + "{}: {s}: 0x{x}: failed to relax relocation: in {s}", + .{ file.fmtPath(), name, rel.offset, @tagName(rel.type) }, + ), + else => |e| return e, + } + return error.ResolveFailed; + }; + } +} + +const ResolveError = error{ + RelaxFail, + NoSpaceLeft, + DivisionByZero, + UnexpectedRemainder, + Overflow, +}; + +fn resolveRelocInner( + self: Atom, + rel: Relocation, + subtractor: ?Relocation, + code: []u8, + macho_file: *MachO, + writer: anytype, +) ResolveError!void { + const cpu_arch = macho_file.options.cpu_arch.?; + const rel_offset = rel.offset - self.off; + const seg_id = macho_file.sections.items(.segment_id)[self.out_n_sect]; + const seg = macho_file.segments.items[seg_id]; + const P = @as(i64, @intCast(self.value)) + @as(i64, @intCast(rel_offset)); + const A = rel.addend + rel.getRelocAddend(cpu_arch); + const S: i64 = @intCast(rel.getTargetAddress(macho_file)); + const G: i64 = @intCast(rel.getGotTargetAddress(macho_file)); + const TLS = @as(i64, @intCast(macho_file.getTlsAddress())); + const SUB = if (subtractor) |sub| @as(i64, @intCast(sub.getTargetAddress(macho_file))) else 0; + + switch (rel.tag) { + .local => relocs_log.debug(" {x}<+{d}>: {s}: [=> {x}] atom({d})", .{ + P, + rel_offset, + @tagName(rel.type), + S + A - SUB, + rel.getTargetAtom(macho_file).atom_index, + }), + .@"extern" => relocs_log.debug(" {x}<+{d}>: {s}: [=> {x}] G({x}) ({s})", .{ + P, + rel_offset, + @tagName(rel.type), + S + A - SUB, + G + A, + rel.getTargetSymbol(macho_file).getName(macho_file), + }), + } + + switch (rel.type) { + .subtractor => {}, + + .unsigned => { + assert(!rel.meta.pcrel); + if (rel.meta.length == 3) { + if (rel.tag == .@"extern") { + const sym = rel.getTargetSymbol(macho_file); + if (sym.isTlvInit(macho_file)) { + try writer.writeInt(u64, @intCast(S - TLS), .little); + return; + } + const entry = bind.Entry{ + .target = rel.target, + .offset = @as(u64, @intCast(P)) - seg.vmaddr, + .segment_id = seg_id, + .addend = A, + }; + if (sym.flags.import) { + macho_file.bind.entries.appendAssumeCapacity(entry); + if (sym.flags.weak) { + macho_file.weak_bind.entries.appendAssumeCapacity(entry); + } + return; + } + if (sym.flags.@"export") { + if (sym.flags.weak) { + macho_file.weak_bind.entries.appendAssumeCapacity(entry); + } else if (sym.flags.interposable) { + macho_file.bind.entries.appendAssumeCapacity(entry); + } + } + } + macho_file.rebase.entries.appendAssumeCapacity(.{ + .offset = @as(u64, @intCast(P)) - seg.vmaddr, + .segment_id = seg_id, + }); + try writer.writeInt(u64, @bitCast(S + A - SUB), .little); + } else if (rel.meta.length == 2) { + try writer.writeInt(u32, @bitCast(@as(i32, @truncate(S + A - SUB))), .little); + } else unreachable; + }, + + .got => { + assert(rel.tag == .@"extern"); + assert(rel.meta.length == 2); + assert(rel.meta.pcrel); + try writer.writeInt(i32, @intCast(G + A - P), .little); + }, + + .branch => { + assert(rel.meta.length == 2); + assert(rel.meta.pcrel); + assert(rel.tag == .@"extern"); + + switch (cpu_arch) { + .x86_64 => try writer.writeInt(i32, @intCast(S + A - P), .little), + .aarch64 => { + const disp: i28 = math.cast(i28, S + A - P) orelse blk: { + const thunk = self.getThunk(macho_file); + const S_: i64 = @intCast(thunk.getAddress(rel.target)); + break :blk math.cast(i28, S_ + A - P) orelse return error.Overflow; + }; + var inst = aarch64.Instruction{ + .unconditional_branch_immediate = mem.bytesToValue(std.meta.TagPayload( + aarch64.Instruction, + aarch64.Instruction.unconditional_branch_immediate, + ), code[rel_offset..][0..4]), + }; + inst.unconditional_branch_immediate.imm26 = @as(u26, @truncate(@as(u28, @bitCast(disp >> 2)))); + try writer.writeInt(u32, inst.toU32(), .little); + }, else => unreachable, - }; - const addend = mem.readInt(i32, ctx.code[rel_offset..][0..4], .little); - const target_address = @as(i64, @intCast(ctx.base_addr)) + ctx.rel.r_address + 4 + correction + addend; - break :blk @as(u64, @intCast(target_address)); - }; - - // Find containing atom - log.debug(" | locating symbol by address @{x} in section {d}", .{ address_in_section, sect_id }); - break :sym_index object.getSymbolByAddress(address_in_section, sect_id); - } else object.reverse_symtab_lookup[ctx.rel.r_symbolnum]; - - const sym_loc = SymbolWithLoc{ .sym_index = sym_index, .file = ctx.object_id + 1 }; - const sym = macho_file.getSymbol(sym_loc); - const reloc_target = if (sym.sect() and !sym.ext()) - sym_loc - else if (object.getGlobal(sym_index)) |global_index| - macho_file.globals.items[global_index] - else - sym_loc; - log.debug(" | target %{d} ('{s}') in object({?d})", .{ - reloc_target.sym_index, - macho_file.getSymbolName(reloc_target), - reloc_target.getFile(), - }); - return reloc_target; -} - -pub fn getRelocTargetAtomIndex(macho_file: *MachO, target: SymbolWithLoc) ?Index { - if (target.getFile() == null) { - const target_sym_name = macho_file.getSymbolName(target); - if (mem.eql(u8, "__mh_execute_header", target_sym_name)) return null; - if (mem.eql(u8, "___dso_handle", target_sym_name)) return null; - - unreachable; // referenced symbol not found - } - - const object = macho_file.objects.items[target.getFile().?]; - return object.getAtomIndexForSymbol(target.sym_index); -} - -fn scanAtomRelocsArm64( - macho_file: *MachO, - atom_index: Index, - relocs: []align(1) const macho.relocation_info, -) !void { - for (relocs) |rel| { - const rel_type = @as(macho.reloc_type_arm64, @enumFromInt(rel.r_type)); - - switch (rel_type) { - .ARM64_RELOC_ADDEND, .ARM64_RELOC_SUBTRACTOR => continue, - else => {}, - } - - if (rel.r_extern == 0) continue; - - const atom = macho_file.getAtom(atom_index); - const object = &macho_file.objects.items[atom.getFile().?]; - const sym_index = object.reverse_symtab_lookup[rel.r_symbolnum]; - const sym_loc = SymbolWithLoc{ - .sym_index = sym_index, - .file = atom.file, - }; - - const target = if (object.getGlobal(sym_index)) |global_index| - macho_file.globals.items[global_index] - else - sym_loc; - - switch (rel_type) { - .ARM64_RELOC_BRANCH26 => { - // TODO rewrite relocation - const sym = macho_file.getSymbol(target); - if (sym.undf()) try macho_file.addStubEntry(target); - }, - .ARM64_RELOC_GOT_LOAD_PAGE21, - .ARM64_RELOC_GOT_LOAD_PAGEOFF12, - .ARM64_RELOC_POINTER_TO_GOT, - => { - // TODO rewrite relocation - try macho_file.addGotEntry(target); - }, - .ARM64_RELOC_TLVP_LOAD_PAGE21, - .ARM64_RELOC_TLVP_LOAD_PAGEOFF12, - => { - const sym = macho_file.getSymbol(target); - if (sym.undf()) try macho_file.addTlvPtrEntry(target); - }, - else => {}, - } - } -} - -fn scanAtomRelocsX86( - macho_file: *MachO, - atom_index: Index, - relocs: []align(1) const macho.relocation_info, -) !void { - for (relocs) |rel| { - const rel_type = @as(macho.reloc_type_x86_64, @enumFromInt(rel.r_type)); - - switch (rel_type) { - .X86_64_RELOC_SUBTRACTOR => continue, - else => {}, - } - - if (rel.r_extern == 0) continue; - - const atom = macho_file.getAtom(atom_index); - const object = &macho_file.objects.items[atom.getFile().?]; - const sym_index = object.reverse_symtab_lookup[rel.r_symbolnum]; - const sym_loc = SymbolWithLoc{ - .sym_index = sym_index, - .file = atom.file, - }; - - const target = if (object.getGlobal(sym_index)) |global_index| - macho_file.globals.items[global_index] - else - sym_loc; - - switch (rel_type) { - .X86_64_RELOC_BRANCH => { - // TODO rewrite relocation - const sym = macho_file.getSymbol(target); - if (sym.undf()) try macho_file.addStubEntry(target); - }, - .X86_64_RELOC_GOT, .X86_64_RELOC_GOT_LOAD => { - // TODO rewrite relocation - try macho_file.addGotEntry(target); - }, - .X86_64_RELOC_TLV => { - const sym = macho_file.getSymbol(target); - if (sym.undf()) try macho_file.addTlvPtrEntry(target); - }, - else => {}, - } - } -} - -pub fn resolveRelocs( - macho_file: *MachO, - atom_index: Index, - atom_code: []u8, - atom_relocs: []align(1) const macho.relocation_info, -) !void { - const target = macho_file.base.comp.root_mod.resolved_target.result; - const arch = target.cpu.arch; - const atom = macho_file.getAtom(atom_index); - assert(atom.getFile() != null); // synthetic atoms do not have relocs - - relocs_log.debug("resolving relocations in ATOM(%{d}, '{s}')", .{ - atom.sym_index, - macho_file.getSymbolName(atom.getSymbolWithLoc()), - }); - - const ctx = getRelocContext(macho_file, atom_index); - - return switch (arch) { - .aarch64 => resolveRelocsArm64(macho_file, atom_index, atom_code, atom_relocs, ctx), - .x86_64 => resolveRelocsX86(macho_file, atom_index, atom_code, atom_relocs, ctx), - else => unreachable, - }; -} - -pub fn getRelocTargetAddress(macho_file: *MachO, target: SymbolWithLoc, is_tlv: bool) u64 { - const target_atom_index = getRelocTargetAtomIndex(macho_file, target) orelse { - // If there is no atom for target, we still need to check for special, atom-less - // symbols such as `___dso_handle`. - const target_name = macho_file.getSymbolName(target); - const atomless_sym = macho_file.getSymbol(target); - log.debug(" | atomless target '{s}'", .{target_name}); - return atomless_sym.n_value; - }; - const target_atom = macho_file.getAtom(target_atom_index); - log.debug(" | target ATOM(%{d}, '{s}') in object({?})", .{ - target_atom.sym_index, - macho_file.getSymbolName(target_atom.getSymbolWithLoc()), - target_atom.getFile(), - }); - - const target_sym = macho_file.getSymbol(target_atom.getSymbolWithLoc()); - assert(target_sym.n_desc != MachO.N_DEAD); - - // If `target` is contained within the target atom, pull its address value. - const offset = if (target_atom.getFile() != null) blk: { - const object = macho_file.objects.items[target_atom.getFile().?]; - break :blk if (object.getSourceSymbol(target.sym_index)) |_| - Atom.calcInnerSymbolOffset(macho_file, target_atom_index, target.sym_index) - else - 0; // section alias - } else 0; - const base_address: u64 = if (is_tlv) base_address: { - // For TLV relocations, the value specified as a relocation is the displacement from the - // TLV initializer (either value in __thread_data or zero-init in __thread_bss) to the first - // defined TLV template init section in the following order: - // * wrt to __thread_data if defined, then - // * wrt to __thread_bss - // TODO remember to check what the mechanism was prior to HAS_TLV_INITIALIZERS in earlier versions of macOS - const sect_id: u16 = sect_id: { - if (macho_file.thread_data_section_index) |i| { - break :sect_id i; - } else if (macho_file.thread_bss_section_index) |i| { - break :sect_id i; - } else break :base_address 0; - }; - break :base_address macho_file.sections.items(.header)[sect_id].addr; - } else 0; - return target_sym.n_value + offset - base_address; -} - -fn resolveRelocsArm64( - macho_file: *MachO, - atom_index: Index, - atom_code: []u8, - atom_relocs: []align(1) const macho.relocation_info, - context: RelocContext, -) !void { - const atom = macho_file.getAtom(atom_index); - const object = macho_file.objects.items[atom.getFile().?]; - - var addend: ?i64 = null; - var subtractor: ?SymbolWithLoc = null; - - for (atom_relocs) |rel| { - const rel_type = @as(macho.reloc_type_arm64, @enumFromInt(rel.r_type)); - - switch (rel_type) { - .ARM64_RELOC_ADDEND => { - assert(addend == null); - - relocs_log.debug(" RELA({s}) @ {x} => {x}", .{ @tagName(rel_type), rel.r_address, rel.r_symbolnum }); - - addend = rel.r_symbolnum; - continue; - }, - .ARM64_RELOC_SUBTRACTOR => { - assert(subtractor == null); - - relocs_log.debug(" RELA({s}) @ {x} => %{d} in object({?d})", .{ - @tagName(rel_type), - rel.r_address, - rel.r_symbolnum, - atom.getFile(), - }); - - subtractor = parseRelocTarget(macho_file, .{ - .object_id = atom.getFile().?, - .rel = rel, - .code = atom_code, - .base_addr = context.base_addr, - .base_offset = context.base_offset, - }); - continue; - }, - else => {}, - } - - const target = parseRelocTarget(macho_file, .{ - .object_id = atom.getFile().?, - .rel = rel, - .code = atom_code, - .base_addr = context.base_addr, - .base_offset = context.base_offset, - }); - const rel_offset = @as(u32, @intCast(rel.r_address - context.base_offset)); - - relocs_log.debug(" RELA({s}) @ {x} => %{d} ('{s}') in object({?})", .{ - @tagName(rel_type), - rel.r_address, - target.sym_index, - macho_file.getSymbolName(target), - target.getFile(), - }); - - const source_addr = blk: { - const source_sym = macho_file.getSymbol(atom.getSymbolWithLoc()); - break :blk source_sym.n_value + rel_offset; - }; - const target_addr = blk: { - if (relocRequiresGot(macho_file, rel)) break :blk macho_file.getGotEntryAddress(target).?; - if (relocIsTlv(macho_file, rel) and macho_file.getSymbol(target).undf()) - break :blk macho_file.getTlvPtrEntryAddress(target).?; - if (relocIsStub(macho_file, rel) and macho_file.getSymbol(target).undf()) - break :blk macho_file.getStubsEntryAddress(target).?; - const is_tlv = is_tlv: { - const source_sym = macho_file.getSymbol(atom.getSymbolWithLoc()); - const header = macho_file.sections.items(.header)[source_sym.n_sect - 1]; - break :is_tlv header.type() == macho.S_THREAD_LOCAL_VARIABLES; - }; - break :blk getRelocTargetAddress(macho_file, target, is_tlv); - }; - - relocs_log.debug(" | source_addr = 0x{x}", .{source_addr}); - - switch (rel_type) { - .ARM64_RELOC_BRANCH26 => { - relocs_log.debug(" source {s} (object({?})), target {s}", .{ - macho_file.getSymbolName(atom.getSymbolWithLoc()), - atom.getFile(), - macho_file.getSymbolName(target), - }); - - const displacement = if (Relocation.calcPcRelativeDisplacementArm64( - source_addr, - target_addr, - )) |disp| blk: { - relocs_log.debug(" | target_addr = 0x{x}", .{target_addr}); - break :blk disp; - } else |_| blk: { - const thunk_index = macho_file.thunk_table.get(atom_index).?; - const thunk = macho_file.thunks.items[thunk_index]; - const thunk_sym_loc = if (macho_file.getSymbol(target).undf()) - thunk.getTrampoline(macho_file, .stub, target).? - else - thunk.getTrampoline(macho_file, .atom, target).?; - const thunk_addr = macho_file.getSymbol(thunk_sym_loc).n_value; - relocs_log.debug(" | target_addr = 0x{x} (thunk)", .{thunk_addr}); - break :blk try Relocation.calcPcRelativeDisplacementArm64(source_addr, thunk_addr); - }; - - const code = atom_code[rel_offset..][0..4]; - var inst = aarch64.Instruction{ - .unconditional_branch_immediate = mem.bytesToValue(meta.TagPayload( - aarch64.Instruction, - aarch64.Instruction.unconditional_branch_immediate, - ), code), - }; - inst.unconditional_branch_immediate.imm26 = @as(u26, @truncate(@as(u28, @bitCast(displacement >> 2)))); - mem.writeInt(u32, code, inst.toU32(), .little); - }, - - .ARM64_RELOC_PAGE21, - .ARM64_RELOC_GOT_LOAD_PAGE21, - .ARM64_RELOC_TLVP_LOAD_PAGE21, - => { - const adjusted_target_addr = @as(u64, @intCast(@as(i64, @intCast(target_addr)) + (addend orelse 0))); - - relocs_log.debug(" | target_addr = 0x{x}", .{adjusted_target_addr}); - - const pages = @as(u21, @bitCast(Relocation.calcNumberOfPages(source_addr, adjusted_target_addr))); - const code = atom_code[rel_offset..][0..4]; - var inst = aarch64.Instruction{ - .pc_relative_address = mem.bytesToValue(meta.TagPayload( - aarch64.Instruction, - aarch64.Instruction.pc_relative_address, - ), code), - }; - inst.pc_relative_address.immhi = @as(u19, @truncate(pages >> 2)); - inst.pc_relative_address.immlo = @as(u2, @truncate(pages)); - mem.writeInt(u32, code, inst.toU32(), .little); - addend = null; - }, - - .ARM64_RELOC_PAGEOFF12 => { - const adjusted_target_addr = @as(u64, @intCast(@as(i64, @intCast(target_addr)) + (addend orelse 0))); - - relocs_log.debug(" | target_addr = 0x{x}", .{adjusted_target_addr}); - - const code = atom_code[rel_offset..][0..4]; - if (Relocation.isArithmeticOp(code)) { - const off = try Relocation.calcPageOffset(adjusted_target_addr, .arithmetic); - var inst = aarch64.Instruction{ - .add_subtract_immediate = mem.bytesToValue(meta.TagPayload( - aarch64.Instruction, - aarch64.Instruction.add_subtract_immediate, - ), code), - }; - inst.add_subtract_immediate.imm12 = off; - mem.writeInt(u32, code, inst.toU32(), .little); - } else { - var inst = aarch64.Instruction{ - .load_store_register = mem.bytesToValue(meta.TagPayload( - aarch64.Instruction, - aarch64.Instruction.load_store_register, - ), code), - }; - const off = try Relocation.calcPageOffset(adjusted_target_addr, switch (inst.load_store_register.size) { - 0 => if (inst.load_store_register.v == 1) - Relocation.PageOffsetInstKind.load_store_128 - else - Relocation.PageOffsetInstKind.load_store_8, - 1 => .load_store_16, - 2 => .load_store_32, - 3 => .load_store_64, - }); - inst.load_store_register.offset = off; - mem.writeInt(u32, code, inst.toU32(), .little); - } - addend = null; - }, - - .ARM64_RELOC_GOT_LOAD_PAGEOFF12 => { - const code = atom_code[rel_offset..][0..4]; - const adjusted_target_addr = @as(u64, @intCast(@as(i64, @intCast(target_addr)) + (addend orelse 0))); - - relocs_log.debug(" | target_addr = 0x{x}", .{adjusted_target_addr}); - - const off = try Relocation.calcPageOffset(adjusted_target_addr, .load_store_64); - var inst: aarch64.Instruction = .{ - .load_store_register = mem.bytesToValue(meta.TagPayload( - aarch64.Instruction, - aarch64.Instruction.load_store_register, - ), code), - }; - inst.load_store_register.offset = off; - mem.writeInt(u32, code, inst.toU32(), .little); - addend = null; - }, - - .ARM64_RELOC_TLVP_LOAD_PAGEOFF12 => { - const code = atom_code[rel_offset..][0..4]; - const adjusted_target_addr = @as(u64, @intCast(@as(i64, @intCast(target_addr)) + (addend orelse 0))); - - relocs_log.debug(" | target_addr = 0x{x}", .{adjusted_target_addr}); - - const RegInfo = struct { - rd: u5, - rn: u5, - size: u2, - }; - const reg_info: RegInfo = blk: { - if (Relocation.isArithmeticOp(code)) { - const inst = mem.bytesToValue(meta.TagPayload( - aarch64.Instruction, - aarch64.Instruction.add_subtract_immediate, - ), code); - break :blk .{ - .rd = inst.rd, - .rn = inst.rn, - .size = inst.sf, - }; - } else { - const inst = mem.bytesToValue(meta.TagPayload( - aarch64.Instruction, - aarch64.Instruction.load_store_register, - ), code); - break :blk .{ - .rd = inst.rt, - .rn = inst.rn, - .size = inst.size, - }; - } - }; - - var inst = if (macho_file.tlv_ptr_table.lookup.contains(target)) aarch64.Instruction{ - .load_store_register = .{ - .rt = reg_info.rd, - .rn = reg_info.rn, - .offset = try Relocation.calcPageOffset(adjusted_target_addr, .load_store_64), - .opc = 0b01, - .op1 = 0b01, - .v = 0, - .size = reg_info.size, - }, - } else aarch64.Instruction{ - .add_subtract_immediate = .{ - .rd = reg_info.rd, - .rn = reg_info.rn, - .imm12 = try Relocation.calcPageOffset(adjusted_target_addr, .arithmetic), - .sh = 0, - .s = 0, - .op = 0, - .sf = @as(u1, @truncate(reg_info.size)), - }, - }; - mem.writeInt(u32, code, inst.toU32(), .little); - addend = null; - }, - - .ARM64_RELOC_POINTER_TO_GOT => { - relocs_log.debug(" | target_addr = 0x{x}", .{target_addr}); - const result = math.cast(i32, @as(i64, @intCast(target_addr)) - @as(i64, @intCast(source_addr))) orelse - return error.Overflow; - mem.writeInt(u32, atom_code[rel_offset..][0..4], @as(u32, @bitCast(result)), .little); - }, - - .ARM64_RELOC_UNSIGNED => { - var ptr_addend = if (rel.r_length == 3) - mem.readInt(i64, atom_code[rel_offset..][0..8], .little) - else - mem.readInt(i32, atom_code[rel_offset..][0..4], .little); - - if (rel.r_extern == 0) { - const base_addr = if (target.sym_index >= object.source_address_lookup.len) - @as(i64, @intCast(object.getSourceSection(@as(u8, @intCast(rel.r_symbolnum - 1))).addr)) - else - object.source_address_lookup[target.sym_index]; - ptr_addend -= base_addr; - } - - const result = blk: { - if (subtractor) |sub| { - const sym = macho_file.getSymbol(sub); - break :blk @as(i64, @intCast(target_addr)) - @as(i64, @intCast(sym.n_value)) + ptr_addend; - } else { - break :blk @as(i64, @intCast(target_addr)) + ptr_addend; - } - }; - relocs_log.debug(" | target_addr = 0x{x}", .{result}); - - if (rel.r_length == 3) { - mem.writeInt(u64, atom_code[rel_offset..][0..8], @as(u64, @bitCast(result)), .little); - } else { - mem.writeInt(u32, atom_code[rel_offset..][0..4], @as(u32, @truncate(@as(u64, @bitCast(result)))), .little); - } - - subtractor = null; - }, - - .ARM64_RELOC_ADDEND => unreachable, - .ARM64_RELOC_SUBTRACTOR => unreachable, - } - } -} - -fn resolveRelocsX86( - macho_file: *MachO, - atom_index: Index, - atom_code: []u8, - atom_relocs: []align(1) const macho.relocation_info, - context: RelocContext, -) !void { - const atom = macho_file.getAtom(atom_index); - const object = macho_file.objects.items[atom.getFile().?]; - - var subtractor: ?SymbolWithLoc = null; - - for (atom_relocs) |rel| { - const rel_type = @as(macho.reloc_type_x86_64, @enumFromInt(rel.r_type)); - - switch (rel_type) { - .X86_64_RELOC_SUBTRACTOR => { - assert(subtractor == null); - - relocs_log.debug(" RELA({s}) @ {x} => %{d} in object({?d})", .{ - @tagName(rel_type), - rel.r_address, - rel.r_symbolnum, - atom.getFile(), - }); - - subtractor = parseRelocTarget(macho_file, .{ - .object_id = atom.getFile().?, - .rel = rel, - .code = atom_code, - .base_addr = context.base_addr, - .base_offset = context.base_offset, - }); - continue; - }, - else => {}, - } - - const target = parseRelocTarget(macho_file, .{ - .object_id = atom.getFile().?, - .rel = rel, - .code = atom_code, - .base_addr = context.base_addr, - .base_offset = context.base_offset, - }); - const rel_offset = @as(u32, @intCast(rel.r_address - context.base_offset)); - - relocs_log.debug(" RELA({s}) @ {x} => %{d} ('{s}') in object({?})", .{ - @tagName(rel_type), - rel.r_address, - target.sym_index, - macho_file.getSymbolName(target), - target.getFile(), - }); - - const source_addr = blk: { - const source_sym = macho_file.getSymbol(atom.getSymbolWithLoc()); - break :blk source_sym.n_value + rel_offset; - }; - const target_addr = blk: { - if (relocRequiresGot(macho_file, rel)) break :blk macho_file.getGotEntryAddress(target).?; - if (relocIsStub(macho_file, rel) and macho_file.getSymbol(target).undf()) - break :blk macho_file.getStubsEntryAddress(target).?; - if (relocIsTlv(macho_file, rel) and macho_file.getSymbol(target).undf()) - break :blk macho_file.getTlvPtrEntryAddress(target).?; - const is_tlv = is_tlv: { - const source_sym = macho_file.getSymbol(atom.getSymbolWithLoc()); - const header = macho_file.sections.items(.header)[source_sym.n_sect - 1]; - break :is_tlv header.type() == macho.S_THREAD_LOCAL_VARIABLES; - }; - break :blk getRelocTargetAddress(macho_file, target, is_tlv); - }; - - relocs_log.debug(" | source_addr = 0x{x}", .{source_addr}); - - switch (rel_type) { - .X86_64_RELOC_BRANCH => { - const addend = mem.readInt(i32, atom_code[rel_offset..][0..4], .little); - const adjusted_target_addr = @as(u64, @intCast(@as(i64, @intCast(target_addr)) + addend)); - relocs_log.debug(" | target_addr = 0x{x}", .{adjusted_target_addr}); - const disp = try Relocation.calcPcRelativeDisplacementX86(source_addr, adjusted_target_addr, 0); - mem.writeInt(i32, atom_code[rel_offset..][0..4], disp, .little); - }, - - .X86_64_RELOC_GOT, - .X86_64_RELOC_GOT_LOAD, - => { - const addend = mem.readInt(i32, atom_code[rel_offset..][0..4], .little); - const adjusted_target_addr = @as(u64, @intCast(@as(i64, @intCast(target_addr)) + addend)); - relocs_log.debug(" | target_addr = 0x{x}", .{adjusted_target_addr}); - const disp = try Relocation.calcPcRelativeDisplacementX86(source_addr, adjusted_target_addr, 0); - mem.writeInt(i32, atom_code[rel_offset..][0..4], disp, .little); - }, - - .X86_64_RELOC_TLV => { - const addend = mem.readInt(i32, atom_code[rel_offset..][0..4], .little); - const adjusted_target_addr = @as(u64, @intCast(@as(i64, @intCast(target_addr)) + addend)); - relocs_log.debug(" | target_addr = 0x{x}", .{adjusted_target_addr}); - const disp = try Relocation.calcPcRelativeDisplacementX86(source_addr, adjusted_target_addr, 0); - - if (macho_file.tlv_ptr_table.lookup.get(target) == null) { - // We need to rewrite the opcode from movq to leaq. - atom_code[rel_offset - 2] = 0x8d; - } - - mem.writeInt(i32, atom_code[rel_offset..][0..4], disp, .little); - }, - - .X86_64_RELOC_SIGNED, - .X86_64_RELOC_SIGNED_1, - .X86_64_RELOC_SIGNED_2, - .X86_64_RELOC_SIGNED_4, - => { - const correction: u3 = switch (rel_type) { - .X86_64_RELOC_SIGNED => 0, - .X86_64_RELOC_SIGNED_1 => 1, - .X86_64_RELOC_SIGNED_2 => 2, - .X86_64_RELOC_SIGNED_4 => 4, + } + }, + + .got_load => { + assert(rel.tag == .@"extern"); + assert(rel.meta.length == 2); + assert(rel.meta.pcrel); + if (rel.getTargetSymbol(macho_file).flags.got) { + try writer.writeInt(i32, @intCast(G + A - P), .little); + } else { + try relaxGotLoad(code[rel_offset - 3 ..]); + try writer.writeInt(i32, @intCast(S + A - P), .little); + } + }, + + .tlv => { + assert(rel.tag == .@"extern"); + assert(rel.meta.length == 2); + assert(rel.meta.pcrel); + const sym = rel.getTargetSymbol(macho_file); + if (sym.flags.tlv_ptr) { + const S_: i64 = @intCast(sym.getTlvPtrAddress(macho_file)); + try writer.writeInt(i32, @intCast(S_ + A - P), .little); + } else { + try relaxTlv(code[rel_offset - 3 ..]); + try writer.writeInt(i32, @intCast(S + A - P), .little); + } + }, + + .signed, .signed1, .signed2, .signed4 => { + assert(rel.meta.length == 2); + assert(rel.meta.pcrel); + try writer.writeInt(i32, @intCast(S + A - P), .little); + }, + + .page, + .got_load_page, + .tlvp_page, + => { + assert(rel.tag == .@"extern"); + assert(rel.meta.length == 2); + assert(rel.meta.pcrel); + const sym = rel.getTargetSymbol(macho_file); + const source = math.cast(u64, P) orelse return error.Overflow; + const target = target: { + const target = switch (rel.type) { + .page => S + A, + .got_load_page => G + A, + .tlvp_page => if (sym.flags.tlv_ptr) blk: { + const S_: i64 = @intCast(sym.getTlvPtrAddress(macho_file)); + break :blk S_ + A; + } else S + A, else => unreachable, }; - var addend = mem.readInt(i32, atom_code[rel_offset..][0..4], .little) + correction; + break :target math.cast(u64, target) orelse return error.Overflow; + }; + const pages = @as(u21, @bitCast(try Relocation.calcNumberOfPages(source, target))); + var inst = aarch64.Instruction{ + .pc_relative_address = mem.bytesToValue(std.meta.TagPayload( + aarch64.Instruction, + aarch64.Instruction.pc_relative_address, + ), code[rel_offset..][0..4]), + }; + inst.pc_relative_address.immhi = @as(u19, @truncate(pages >> 2)); + inst.pc_relative_address.immlo = @as(u2, @truncate(pages)); + try writer.writeInt(u32, inst.toU32(), .little); + }, - if (rel.r_extern == 0) { - const base_addr = if (target.sym_index >= object.source_address_lookup.len) - @as(i64, @intCast(object.getSourceSection(@as(u8, @intCast(rel.r_symbolnum - 1))).addr)) - else - object.source_address_lookup[target.sym_index]; - addend += @as(i32, @intCast(@as(i64, @intCast(context.base_addr)) + rel.r_address + 4 - - @as(i64, @intCast(base_addr)))); - } - - const adjusted_target_addr = @as(u64, @intCast(@as(i64, @intCast(target_addr)) + addend)); - - relocs_log.debug(" | target_addr = 0x{x}", .{adjusted_target_addr}); - - const disp = try Relocation.calcPcRelativeDisplacementX86(source_addr, adjusted_target_addr, correction); - mem.writeInt(i32, atom_code[rel_offset..][0..4], disp, .little); - }, - - .X86_64_RELOC_UNSIGNED => { - var addend = if (rel.r_length == 3) - mem.readInt(i64, atom_code[rel_offset..][0..8], .little) - else - mem.readInt(i32, atom_code[rel_offset..][0..4], .little); - - if (rel.r_extern == 0) { - const base_addr = if (target.sym_index >= object.source_address_lookup.len) - @as(i64, @intCast(object.getSourceSection(@as(u8, @intCast(rel.r_symbolnum - 1))).addr)) - else - object.source_address_lookup[target.sym_index]; - addend -= base_addr; - } - - const result = blk: { - if (subtractor) |sub| { - const sym = macho_file.getSymbol(sub); - break :blk @as(i64, @intCast(target_addr)) - @as(i64, @intCast(sym.n_value)) + addend; - } else { - break :blk @as(i64, @intCast(target_addr)) + addend; - } + .pageoff => { + assert(rel.tag == .@"extern"); + assert(rel.meta.length == 2); + assert(!rel.meta.pcrel); + const target = math.cast(u64, S + A) orelse return error.Overflow; + const inst_code = code[rel_offset..][0..4]; + if (Relocation.isArithmeticOp(inst_code)) { + const off = try Relocation.calcPageOffset(target, .arithmetic); + var inst = aarch64.Instruction{ + .add_subtract_immediate = mem.bytesToValue(std.meta.TagPayload( + aarch64.Instruction, + aarch64.Instruction.add_subtract_immediate, + ), inst_code), }; - relocs_log.debug(" | target_addr = 0x{x}", .{result}); + inst.add_subtract_immediate.imm12 = off; + try writer.writeInt(u32, inst.toU32(), .little); + } else { + var inst = aarch64.Instruction{ + .load_store_register = mem.bytesToValue(std.meta.TagPayload( + aarch64.Instruction, + aarch64.Instruction.load_store_register, + ), inst_code), + }; + const off = try Relocation.calcPageOffset(target, switch (inst.load_store_register.size) { + 0 => if (inst.load_store_register.v == 1) + Relocation.PageOffsetInstKind.load_store_128 + else + Relocation.PageOffsetInstKind.load_store_8, + 1 => .load_store_16, + 2 => .load_store_32, + 3 => .load_store_64, + }); + inst.load_store_register.offset = off; + try writer.writeInt(u32, inst.toU32(), .little); + } + }, - if (rel.r_length == 3) { - mem.writeInt(u64, atom_code[rel_offset..][0..8], @as(u64, @bitCast(result)), .little); + .got_load_pageoff => { + assert(rel.tag == .@"extern"); + assert(rel.meta.length == 2); + assert(!rel.meta.pcrel); + const target = math.cast(u64, G + A) orelse return error.Overflow; + const off = try Relocation.calcPageOffset(target, .load_store_64); + var inst: aarch64.Instruction = .{ + .load_store_register = mem.bytesToValue(std.meta.TagPayload( + aarch64.Instruction, + aarch64.Instruction.load_store_register, + ), code[rel_offset..][0..4]), + }; + inst.load_store_register.offset = off; + try writer.writeInt(u32, inst.toU32(), .little); + }, + + .tlvp_pageoff => { + assert(rel.tag == .@"extern"); + assert(rel.meta.length == 2); + assert(!rel.meta.pcrel); + + const sym = rel.getTargetSymbol(macho_file); + const target = target: { + const target = if (sym.flags.tlv_ptr) blk: { + const S_: i64 = @intCast(sym.getTlvPtrAddress(macho_file)); + break :blk S_ + A; + } else S + A; + break :target math.cast(u64, target) orelse return error.Overflow; + }; + + const RegInfo = struct { + rd: u5, + rn: u5, + size: u2, + }; + + const inst_code = code[rel_offset..][0..4]; + const reg_info: RegInfo = blk: { + if (Relocation.isArithmeticOp(inst_code)) { + const inst = mem.bytesToValue(std.meta.TagPayload( + aarch64.Instruction, + aarch64.Instruction.add_subtract_immediate, + ), inst_code); + break :blk .{ + .rd = inst.rd, + .rn = inst.rn, + .size = inst.sf, + }; } else { - mem.writeInt(u32, atom_code[rel_offset..][0..4], @as(u32, @truncate(@as(u64, @bitCast(result)))), .little); + const inst = mem.bytesToValue(std.meta.TagPayload( + aarch64.Instruction, + aarch64.Instruction.load_store_register, + ), inst_code); + break :blk .{ + .rd = inst.rt, + .rn = inst.rn, + .size = inst.size, + }; + } + }; + + var inst = if (sym.flags.tlv_ptr) aarch64.Instruction{ + .load_store_register = .{ + .rt = reg_info.rd, + .rn = reg_info.rn, + .offset = try Relocation.calcPageOffset(target, .load_store_64), + .opc = 0b01, + .op1 = 0b01, + .v = 0, + .size = reg_info.size, + }, + } else aarch64.Instruction{ + .add_subtract_immediate = .{ + .rd = reg_info.rd, + .rn = reg_info.rn, + .imm12 = try Relocation.calcPageOffset(target, .arithmetic), + .sh = 0, + .s = 0, + .op = 0, + .sf = @as(u1, @truncate(reg_info.size)), + }, + }; + try writer.writeInt(u32, inst.toU32(), .little); + }, + } +} + +fn relaxGotLoad(code: []u8) error{RelaxFail}!void { + const old_inst = disassemble(code) orelse return error.RelaxFail; + switch (old_inst.encoding.mnemonic) { + .mov => { + const inst = Instruction.new(old_inst.prefix, .lea, &old_inst.ops) catch return error.RelaxFail; + relocs_log.debug(" relaxing {} => {}", .{ old_inst.encoding, inst.encoding }); + encode(&.{inst}, code) catch return error.RelaxFail; + }, + else => return error.RelaxFail, + } +} + +fn relaxTlv(code: []u8) error{RelaxFail}!void { + const old_inst = disassemble(code) orelse return error.RelaxFail; + switch (old_inst.encoding.mnemonic) { + .mov => { + const inst = Instruction.new(old_inst.prefix, .lea, &old_inst.ops) catch return error.RelaxFail; + relocs_log.debug(" relaxing {} => {}", .{ old_inst.encoding, inst.encoding }); + encode(&.{inst}, code) catch return error.RelaxFail; + }, + else => return error.RelaxFail, + } +} + +fn disassemble(code: []const u8) ?Instruction { + var disas = Disassembler.init(code); + const inst = disas.next() catch return null; + return inst; +} + +fn encode(insts: []const Instruction, code: []u8) !void { + var stream = std.io.fixedBufferStream(code); + const writer = stream.writer(); + for (insts) |inst| { + try inst.encode(writer, .{}); + } +} + +pub fn calcNumRelocs(self: Atom, macho_file: *MachO) u32 { + switch (macho_file.options.cpu_arch.?) { + .aarch64 => { + var nreloc: u32 = 0; + for (self.getRelocs(macho_file)) |rel| { + nreloc += 1; + switch (rel.type) { + .page, .pageoff => if (rel.addend > 0) { + nreloc += 1; + }, + else => {}, + } + } + return nreloc; + }, + .x86_64 => return @intCast(self.getRelocs(macho_file).len), + else => unreachable, + } +} + +pub fn writeRelocs(self: Atom, macho_file: *MachO, code: []u8, buffer: *std.ArrayList(macho.relocation_info)) !void { + const tracy = trace(@src()); + defer tracy.end(); + + const cpu_arch = macho_file.options.cpu_arch.?; + const relocs = self.getRelocs(macho_file); + const sect = macho_file.sections.items(.header)[self.out_n_sect]; + var stream = std.io.fixedBufferStream(code); + + for (relocs) |rel| { + const rel_offset = rel.offset - self.off; + const r_address: i32 = math.cast(i32, self.value + rel_offset - sect.addr) orelse return error.Overflow; + const r_symbolnum = r_symbolnum: { + const r_symbolnum: u32 = switch (rel.tag) { + .local => rel.getTargetAtom(macho_file).out_n_sect + 1, + .@"extern" => rel.getTargetSymbol(macho_file).getOutputSymtabIndex(macho_file).?, + }; + break :r_symbolnum math.cast(u24, r_symbolnum) orelse return error.Overflow; + }; + const r_extern = rel.tag == .@"extern"; + var addend = rel.addend + rel.getRelocAddend(cpu_arch); + if (rel.tag == .local) { + const target: i64 = @intCast(rel.getTargetAddress(macho_file)); + addend += target; + } + + try stream.seekTo(rel_offset); + + switch (cpu_arch) { + .aarch64 => { + if (rel.type == .unsigned) switch (rel.meta.length) { + 0, 1 => unreachable, + 2 => try stream.writer().writeInt(i32, @truncate(addend), .little), + 3 => try stream.writer().writeInt(i64, addend, .little), + } else if (addend > 0) { + buffer.appendAssumeCapacity(.{ + .r_address = r_address, + .r_symbolnum = @bitCast(math.cast(i24, addend) orelse return error.Overflow), + .r_pcrel = 0, + .r_length = 2, + .r_extern = 0, + .r_type = @intFromEnum(macho.reloc_type_arm64.ARM64_RELOC_ADDEND), + }); } - subtractor = null; - }, + const r_type: macho.reloc_type_arm64 = switch (rel.type) { + .page => .ARM64_RELOC_PAGE21, + .pageoff => .ARM64_RELOC_PAGEOFF12, + .got_load_page => .ARM64_RELOC_GOT_LOAD_PAGE21, + .got_load_pageoff => .ARM64_RELOC_GOT_LOAD_PAGEOFF12, + .tlvp_page => .ARM64_RELOC_TLVP_LOAD_PAGE21, + .tlvp_pageoff => .ARM64_RELOC_TLVP_LOAD_PAGEOFF12, + .branch => .ARM64_RELOC_BRANCH26, + .got => .ARM64_RELOC_POINTER_TO_GOT, + .subtractor => .ARM64_RELOC_SUBTRACTOR, + .unsigned => .ARM64_RELOC_UNSIGNED, - .X86_64_RELOC_SUBTRACTOR => unreachable, + .signed, + .signed1, + .signed2, + .signed4, + .got_load, + .tlv, + => unreachable, + }; + buffer.appendAssumeCapacity(.{ + .r_address = r_address, + .r_symbolnum = r_symbolnum, + .r_pcrel = @intFromBool(rel.meta.pcrel), + .r_extern = @intFromBool(r_extern), + .r_length = rel.meta.length, + .r_type = @intFromEnum(r_type), + }); + }, + .x86_64 => { + if (rel.meta.pcrel) { + if (rel.tag == .local) { + addend -= @as(i64, @intCast(self.value + rel_offset)); + } else { + addend += 4; + } + } + switch (rel.meta.length) { + 0, 1 => unreachable, + 2 => try stream.writer().writeInt(i32, @truncate(addend), .little), + 3 => try stream.writer().writeInt(i64, addend, .little), + } + + const r_type: macho.reloc_type_x86_64 = switch (rel.type) { + .signed => .X86_64_RELOC_SIGNED, + .signed1 => .X86_64_RELOC_SIGNED_1, + .signed2 => .X86_64_RELOC_SIGNED_2, + .signed4 => .X86_64_RELOC_SIGNED_4, + .got_load => .X86_64_RELOC_GOT_LOAD, + .tlv => .X86_64_RELOC_TLV, + .branch => .X86_64_RELOC_BRANCH, + .got => .X86_64_RELOC_GOT, + .subtractor => .X86_64_RELOC_SUBTRACTOR, + .unsigned => .X86_64_RELOC_UNSIGNED, + + .page, + .pageoff, + .got_load_page, + .got_load_pageoff, + .tlvp_page, + .tlvp_pageoff, + => unreachable, + }; + buffer.appendAssumeCapacity(.{ + .r_address = r_address, + .r_symbolnum = r_symbolnum, + .r_pcrel = @intFromBool(rel.meta.pcrel), + .r_extern = @intFromBool(r_extern), + .r_length = rel.meta.length, + .r_type = @intFromEnum(r_type), + }); + }, + else => unreachable, } } } -pub fn getAtomCode(macho_file: *MachO, atom_index: Index) []const u8 { - const atom = macho_file.getAtom(atom_index); - assert(atom.getFile() != null); // Synthetic atom shouldn't need to inquire for code. - const object = macho_file.objects.items[atom.getFile().?]; - const source_sym = object.getSourceSymbol(atom.sym_index) orelse { - // If there was no matching symbol present in the source symtab, this means - // we are dealing with either an entire section, or part of it, but also - // starting at the beginning. - const nbase = @as(u32, @intCast(object.in_symtab.?.len)); - const sect_id = @as(u8, @intCast(atom.sym_index - nbase)); - const source_sect = object.getSourceSection(sect_id); - assert(!source_sect.isZerofill()); - const code = object.getSectionContents(source_sect); - const code_len = @as(usize, @intCast(atom.size)); - return code[0..code_len]; - }; - const source_sect = object.getSourceSection(source_sym.n_sect - 1); - assert(!source_sect.isZerofill()); - const code = object.getSectionContents(source_sect); - const offset = @as(usize, @intCast(source_sym.n_value - source_sect.addr)); - const code_len = @as(usize, @intCast(atom.size)); - return code[offset..][0..code_len]; +pub fn format( + atom: Atom, + comptime unused_fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, +) !void { + _ = atom; + _ = unused_fmt_string; + _ = options; + _ = writer; + @compileError("do not format Atom directly"); } -pub fn getAtomRelocs(macho_file: *MachO, atom_index: Index) []const macho.relocation_info { - const atom = macho_file.getAtom(atom_index); - assert(atom.getFile() != null); // Synthetic atom shouldn't need to unique for relocs. - const object = macho_file.objects.items[atom.getFile().?]; - const cache = object.relocs_lookup[atom.sym_index]; - - const source_sect_id = if (object.getSourceSymbol(atom.sym_index)) |source_sym| blk: { - break :blk source_sym.n_sect - 1; - } else blk: { - // If there was no matching symbol present in the source symtab, this means - // we are dealing with either an entire section, or part of it, but also - // starting at the beginning. - const nbase = @as(u32, @intCast(object.in_symtab.?.len)); - const sect_id = @as(u8, @intCast(atom.sym_index - nbase)); - break :blk sect_id; - }; - const source_sect = object.getSourceSection(source_sect_id); - assert(!source_sect.isZerofill()); - const relocs = object.getRelocs(source_sect_id); - return relocs[cache.start..][0..cache.len]; +pub fn fmt(atom: Atom, macho_file: *MachO) std.fmt.Formatter(format2) { + return .{ .data = .{ + .atom = atom, + .macho_file = macho_file, + } }; } -pub fn relocRequiresGot(macho_file: *MachO, rel: macho.relocation_info) bool { - const target = macho_file.base.comp.root_mod.resolved_target.result; - switch (target.cpu.arch) { - .aarch64 => switch (@as(macho.reloc_type_arm64, @enumFromInt(rel.r_type))) { - .ARM64_RELOC_GOT_LOAD_PAGE21, - .ARM64_RELOC_GOT_LOAD_PAGEOFF12, - .ARM64_RELOC_POINTER_TO_GOT, - => return true, - else => return false, - }, - .x86_64 => switch (@as(macho.reloc_type_x86_64, @enumFromInt(rel.r_type))) { - .X86_64_RELOC_GOT, - .X86_64_RELOC_GOT_LOAD, - => return true, - else => return false, - }, - else => unreachable, +const FormatContext = struct { + atom: Atom, + macho_file: *MachO, +}; + +fn format2( + ctx: FormatContext, + comptime unused_fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, +) !void { + _ = options; + _ = unused_fmt_string; + const atom = ctx.atom; + const macho_file = ctx.macho_file; + try writer.print("atom({d}) : {s} : @{x} : sect({d}) : align({x}) : size({x}) : thunk({d})", .{ + atom.atom_index, atom.getName(macho_file), atom.value, + atom.out_n_sect, atom.alignment, atom.size, + atom.thunk_index, + }); + if (!atom.flags.alive) try writer.writeAll(" : [*]"); + if (atom.unwind_records.len > 0) { + try writer.writeAll(" : unwind{ "); + for (atom.getUnwindRecords(macho_file), atom.unwind_records.pos..) |index, i| { + const rec = macho_file.getUnwindRecord(index); + try writer.print("{d}", .{index}); + if (!rec.alive) try writer.writeAll("([*])"); + if (i < atom.unwind_records.pos + atom.unwind_records.len - 1) try writer.writeAll(", "); + } + try writer.writeAll(" }"); } } -pub fn relocIsTlv(macho_file: *MachO, rel: macho.relocation_info) bool { - const target = macho_file.base.comp.root_mod.resolved_target.result; - switch (target.cpu.arch) { - .aarch64 => switch (@as(macho.reloc_type_arm64, @enumFromInt(rel.r_type))) { - .ARM64_RELOC_TLVP_LOAD_PAGE21, - .ARM64_RELOC_TLVP_LOAD_PAGEOFF12, - => return true, - else => return false, - }, - .x86_64 => switch (@as(macho.reloc_type_x86_64, @enumFromInt(rel.r_type))) { - .X86_64_RELOC_TLV => return true, - else => return false, - }, - else => unreachable, - } -} +pub const Index = u32; -pub fn relocIsStub(macho_file: *MachO, rel: macho.relocation_info) bool { - const target = macho_file.base.comp.root_mod.resolved_target.result; - switch (target.cpu.arch) { - .aarch64 => switch (@as(macho.reloc_type_arm64, @enumFromInt(rel.r_type))) { - .ARM64_RELOC_BRANCH26 => return true, - else => return false, - }, - .x86_64 => switch (@as(macho.reloc_type_x86_64, @enumFromInt(rel.r_type))) { - .X86_64_RELOC_BRANCH => return true, - else => return false, - }, - else => unreachable, - } -} +pub const Flags = packed struct { + /// Specifies whether this atom is alive or has been garbage collected. + alive: bool = true, -const Atom = @This(); + /// Specifies if the atom has been visited during garbage collection. + visited: bool = false, +}; -const std = @import("std"); -const build_options = @import("build_options"); -const aarch64 = @import("../../arch/aarch64/bits.zig"); +pub const Loc = struct { + pos: usize = 0, + len: usize = 0, +}; + +const aarch64 = @import("../aarch64.zig"); const assert = std.debug.assert; -const log = std.log.scoped(.link); -const relocs_log = std.log.scoped(.link_relocs); +const bind = @import("dyld_info/bind.zig"); +const dis_x86_64 = @import("dis_x86_64"); const macho = std.macho; const math = std.math; const mem = std.mem; -const meta = std.meta; -const trace = @import("../../tracy.zig").trace; +const log = std.log.scoped(.link); +const relocs_log = std.log.scoped(.relocs); +const std = @import("std"); +const trace = @import("../tracy.zig").trace; const Allocator = mem.Allocator; -const Arch = std.Target.Cpu.Arch; +const Atom = @This(); +const Disassembler = dis_x86_64.Disassembler; +const File = @import("file.zig").File; +const Instruction = dis_x86_64.Instruction; +const Immediate = dis_x86_64.Immediate; const MachO = @import("../MachO.zig"); -pub const Relocation = @import("Relocation.zig"); -const SymbolWithLoc = MachO.SymbolWithLoc; +const Object = @import("Object.zig"); +const Relocation = @import("Relocation.zig"); +const Symbol = @import("Symbol.zig"); +const Thunk = @import("thunks.zig").Thunk; +const UnwindInfo = @import("UnwindInfo.zig"); diff --git a/src/link/MachO/CodeSignature.zig b/src/link/MachO/CodeSignature.zig index 0f49ee6a64..ce142b4376 100644 --- a/src/link/MachO/CodeSignature.zig +++ b/src/link/MachO/CodeSignature.zig @@ -1,175 +1,17 @@ -page_size: u16, -code_directory: CodeDirectory, -requirements: ?Requirements = null, -entitlements: ?Entitlements = null, -signature: ?Signature = null, +const CodeSignature = @This(); -pub fn init(page_size: u16) CodeSignature { - return .{ - .page_size = page_size, - .code_directory = CodeDirectory.init(page_size), - }; -} - -pub fn deinit(self: *CodeSignature, allocator: Allocator) void { - self.code_directory.deinit(allocator); - if (self.requirements) |*req| { - req.deinit(allocator); - } - if (self.entitlements) |*ents| { - ents.deinit(allocator); - } - if (self.signature) |*sig| { - sig.deinit(allocator); - } -} - -pub fn addEntitlements(self: *CodeSignature, allocator: Allocator, path: []const u8) !void { - const file = try fs.cwd().openFile(path, .{}); - defer file.close(); - const inner = try file.readToEndAlloc(allocator, std.math.maxInt(u32)); - self.entitlements = .{ .inner = inner }; -} - -pub const WriteOpts = struct { - file: fs.File, - exec_seg_base: u64, - exec_seg_limit: u64, - file_size: u32, - output_mode: std.builtin.OutputMode, -}; - -pub fn writeAdhocSignature( - self: *CodeSignature, - comp: *const Compilation, - opts: WriteOpts, - writer: anytype, -) !void { - const gpa = comp.gpa; - - var header: macho.SuperBlob = .{ - .magic = macho.CSMAGIC_EMBEDDED_SIGNATURE, - .length = @sizeOf(macho.SuperBlob), - .count = 0, - }; - - var blobs = std.ArrayList(Blob).init(gpa); - defer blobs.deinit(); - - self.code_directory.inner.execSegBase = opts.exec_seg_base; - self.code_directory.inner.execSegLimit = opts.exec_seg_limit; - self.code_directory.inner.execSegFlags = if (opts.output_mode == .Exe) macho.CS_EXECSEG_MAIN_BINARY else 0; - self.code_directory.inner.codeLimit = opts.file_size; - - const total_pages = @as(u32, @intCast(mem.alignForward(usize, opts.file_size, self.page_size) / self.page_size)); - - try self.code_directory.code_slots.ensureTotalCapacityPrecise(gpa, total_pages); - self.code_directory.code_slots.items.len = total_pages; - self.code_directory.inner.nCodeSlots = total_pages; - - // Calculate hash for each page (in file) and write it to the buffer - var hasher = Hasher(Sha256){ .allocator = gpa, .thread_pool = comp.thread_pool }; - try hasher.hash(opts.file, self.code_directory.code_slots.items, .{ - .chunk_size = self.page_size, - .max_file_size = opts.file_size, - }); - - try blobs.append(.{ .code_directory = &self.code_directory }); - header.length += @sizeOf(macho.BlobIndex); - header.count += 1; - - var hash: [hash_size]u8 = undefined; - - if (self.requirements) |*req| { - var buf = std.ArrayList(u8).init(gpa); - defer buf.deinit(); - try req.write(buf.writer()); - Sha256.hash(buf.items, &hash, .{}); - self.code_directory.addSpecialHash(req.slotType(), hash); - - try blobs.append(.{ .requirements = req }); - header.count += 1; - header.length += @sizeOf(macho.BlobIndex) + req.size(); - } - - if (self.entitlements) |*ents| { - var buf = std.ArrayList(u8).init(gpa); - defer buf.deinit(); - try ents.write(buf.writer()); - Sha256.hash(buf.items, &hash, .{}); - self.code_directory.addSpecialHash(ents.slotType(), hash); - - try blobs.append(.{ .entitlements = ents }); - header.count += 1; - header.length += @sizeOf(macho.BlobIndex) + ents.size(); - } - - if (self.signature) |*sig| { - try blobs.append(.{ .signature = sig }); - header.count += 1; - header.length += @sizeOf(macho.BlobIndex) + sig.size(); - } - - self.code_directory.inner.hashOffset = - @sizeOf(macho.CodeDirectory) + @as(u32, @intCast(self.code_directory.ident.len + 1 + self.code_directory.inner.nSpecialSlots * hash_size)); - self.code_directory.inner.length = self.code_directory.size(); - header.length += self.code_directory.size(); - - try writer.writeInt(u32, header.magic, .big); - try writer.writeInt(u32, header.length, .big); - try writer.writeInt(u32, header.count, .big); - - var offset: u32 = @sizeOf(macho.SuperBlob) + @sizeOf(macho.BlobIndex) * @as(u32, @intCast(blobs.items.len)); - for (blobs.items) |blob| { - try writer.writeInt(u32, blob.slotType(), .big); - try writer.writeInt(u32, offset, .big); - offset += blob.size(); - } - - for (blobs.items) |blob| { - try blob.write(writer); - } -} - -pub fn size(self: CodeSignature) u32 { - var ssize: u32 = @sizeOf(macho.SuperBlob) + @sizeOf(macho.BlobIndex) + self.code_directory.size(); - if (self.requirements) |req| { - ssize += @sizeOf(macho.BlobIndex) + req.size(); - } - if (self.entitlements) |ent| { - ssize += @sizeOf(macho.BlobIndex) + ent.size(); - } - if (self.signature) |sig| { - ssize += @sizeOf(macho.BlobIndex) + sig.size(); - } - return ssize; -} - -pub fn estimateSize(self: CodeSignature, file_size: u64) u32 { - var ssize: u64 = @sizeOf(macho.SuperBlob) + @sizeOf(macho.BlobIndex) + self.code_directory.size(); - // Approx code slots - const total_pages = mem.alignForward(u64, file_size, self.page_size) / self.page_size; - ssize += total_pages * hash_size; - var n_special_slots: u32 = 0; - if (self.requirements) |req| { - ssize += @sizeOf(macho.BlobIndex) + req.size(); - n_special_slots = @max(n_special_slots, req.slotType()); - } - if (self.entitlements) |ent| { - ssize += @sizeOf(macho.BlobIndex) + ent.size() + hash_size; - n_special_slots = @max(n_special_slots, ent.slotType()); - } - if (self.signature) |sig| { - ssize += @sizeOf(macho.BlobIndex) + sig.size(); - } - ssize += n_special_slots * hash_size; - return @as(u32, @intCast(mem.alignForward(u64, ssize, @sizeOf(u64)))); -} - -pub fn clear(self: *CodeSignature, allocator: Allocator) void { - self.code_directory.deinit(allocator); - self.code_directory = CodeDirectory.init(self.page_size); -} +const std = @import("std"); +const assert = std.debug.assert; +const fs = std.fs; +const log = std.log.scoped(.link); +const macho = std.macho; +const mem = std.mem; +const testing = std.testing; +const Allocator = mem.Allocator; +const Hasher = @import("hasher.zig").ParallelHasher; +const MachO = @import("../MachO.zig"); +const Sha256 = std.crypto.hash.sha2.Sha256; +const Zld = @import("../Zld.zig"); const hash_size = Sha256.digest_length; @@ -257,7 +99,7 @@ const CodeDirectory = struct { fn addSpecialHash(self: *CodeDirectory, index: u32, hash: [hash_size]u8) void { assert(index > 0); self.inner.nSpecialSlots = @max(self.inner.nSpecialSlots, index); - self.special_slots[index - 1] = hash; + @memcpy(&self.special_slots[index - 1], &hash); } fn slotType(self: CodeDirectory) u32 { @@ -376,17 +218,175 @@ const Signature = struct { } }; -const CodeSignature = @This(); +page_size: u16, +code_directory: CodeDirectory, +requirements: ?Requirements = null, +entitlements: ?Entitlements = null, +signature: ?Signature = null, -const std = @import("std"); -const assert = std.debug.assert; -const fs = std.fs; -const log = std.log.scoped(.link); -const macho = std.macho; -const mem = std.mem; -const testing = std.testing; +pub fn init(page_size: u16) CodeSignature { + return .{ + .page_size = page_size, + .code_directory = CodeDirectory.init(page_size), + }; +} -const Allocator = mem.Allocator; -const Compilation = @import("../../Compilation.zig"); -const Hasher = @import("hasher.zig").ParallelHasher; -const Sha256 = std.crypto.hash.sha2.Sha256; +pub fn deinit(self: *CodeSignature, allocator: Allocator) void { + self.code_directory.deinit(allocator); + if (self.requirements) |*req| { + req.deinit(allocator); + } + if (self.entitlements) |*ents| { + ents.deinit(allocator); + } + if (self.signature) |*sig| { + sig.deinit(allocator); + } +} + +pub fn addEntitlements(self: *CodeSignature, allocator: Allocator, path: []const u8) !void { + const file = try fs.cwd().openFile(path, .{}); + defer file.close(); + const inner = try file.readToEndAlloc(allocator, std.math.maxInt(u32)); + self.entitlements = .{ .inner = inner }; +} + +pub const WriteOpts = struct { + file: fs.File, + exec_seg_base: u64, + exec_seg_limit: u64, + file_size: u32, + dylib: bool, +}; + +pub fn writeAdhocSignature( + self: *CodeSignature, + macho_file: *MachO, + opts: WriteOpts, + writer: anytype, +) !void { + const allocator = macho_file.base.allocator; + + var header: macho.SuperBlob = .{ + .magic = macho.CSMAGIC_EMBEDDED_SIGNATURE, + .length = @sizeOf(macho.SuperBlob), + .count = 0, + }; + + var blobs = std.ArrayList(Blob).init(allocator); + defer blobs.deinit(); + + self.code_directory.inner.execSegBase = opts.exec_seg_base; + self.code_directory.inner.execSegLimit = opts.exec_seg_limit; + self.code_directory.inner.execSegFlags = if (!opts.dylib) macho.CS_EXECSEG_MAIN_BINARY else 0; + self.code_directory.inner.codeLimit = opts.file_size; + + const total_pages = @as(u32, @intCast(mem.alignForward(usize, opts.file_size, self.page_size) / self.page_size)); + + try self.code_directory.code_slots.ensureTotalCapacityPrecise(allocator, total_pages); + self.code_directory.code_slots.items.len = total_pages; + self.code_directory.inner.nCodeSlots = total_pages; + + // Calculate hash for each page (in file) and write it to the buffer + var hasher = Hasher(Sha256){ .allocator = allocator, .thread_pool = macho_file.base.thread_pool }; + try hasher.hash(opts.file, self.code_directory.code_slots.items, .{ + .chunk_size = self.page_size, + .max_file_size = opts.file_size, + }); + + try blobs.append(.{ .code_directory = &self.code_directory }); + header.length += @sizeOf(macho.BlobIndex); + header.count += 1; + + var hash: [hash_size]u8 = undefined; + + if (self.requirements) |*req| { + var buf = std.ArrayList(u8).init(allocator); + defer buf.deinit(); + try req.write(buf.writer()); + Sha256.hash(buf.items, &hash, .{}); + self.code_directory.addSpecialHash(req.slotType(), hash); + + try blobs.append(.{ .requirements = req }); + header.count += 1; + header.length += @sizeOf(macho.BlobIndex) + req.size(); + } + + if (self.entitlements) |*ents| { + var buf = std.ArrayList(u8).init(allocator); + defer buf.deinit(); + try ents.write(buf.writer()); + Sha256.hash(buf.items, &hash, .{}); + self.code_directory.addSpecialHash(ents.slotType(), hash); + + try blobs.append(.{ .entitlements = ents }); + header.count += 1; + header.length += @sizeOf(macho.BlobIndex) + ents.size(); + } + + if (self.signature) |*sig| { + try blobs.append(.{ .signature = sig }); + header.count += 1; + header.length += @sizeOf(macho.BlobIndex) + sig.size(); + } + + self.code_directory.inner.hashOffset = + @sizeOf(macho.CodeDirectory) + @as(u32, @intCast(self.code_directory.ident.len + 1 + self.code_directory.inner.nSpecialSlots * hash_size)); + self.code_directory.inner.length = self.code_directory.size(); + header.length += self.code_directory.size(); + + try writer.writeInt(u32, header.magic, .big); + try writer.writeInt(u32, header.length, .big); + try writer.writeInt(u32, header.count, .big); + + var offset: u32 = @sizeOf(macho.SuperBlob) + @sizeOf(macho.BlobIndex) * @as(u32, @intCast(blobs.items.len)); + for (blobs.items) |blob| { + try writer.writeInt(u32, blob.slotType(), .big); + try writer.writeInt(u32, offset, .big); + offset += blob.size(); + } + + for (blobs.items) |blob| { + try blob.write(writer); + } +} + +pub fn size(self: CodeSignature) u32 { + var ssize: u32 = @sizeOf(macho.SuperBlob) + @sizeOf(macho.BlobIndex) + self.code_directory.size(); + if (self.requirements) |req| { + ssize += @sizeOf(macho.BlobIndex) + req.size(); + } + if (self.entitlements) |ent| { + ssize += @sizeOf(macho.BlobIndex) + ent.size(); + } + if (self.signature) |sig| { + ssize += @sizeOf(macho.BlobIndex) + sig.size(); + } + return ssize; +} + +pub fn estimateSize(self: CodeSignature, file_size: u64) u32 { + var ssize: u64 = @sizeOf(macho.SuperBlob) + @sizeOf(macho.BlobIndex) + self.code_directory.size(); + // Approx code slots + const total_pages = mem.alignForward(u64, file_size, self.page_size) / self.page_size; + ssize += total_pages * hash_size; + var n_special_slots: u32 = 0; + if (self.requirements) |req| { + ssize += @sizeOf(macho.BlobIndex) + req.size(); + n_special_slots = @max(n_special_slots, req.slotType()); + } + if (self.entitlements) |ent| { + ssize += @sizeOf(macho.BlobIndex) + ent.size() + hash_size; + n_special_slots = @max(n_special_slots, ent.slotType()); + } + if (self.signature) |sig| { + ssize += @sizeOf(macho.BlobIndex) + sig.size(); + } + ssize += n_special_slots * hash_size; + return @as(u32, @intCast(mem.alignForward(u64, ssize, @sizeOf(u64)))); +} + +pub fn clear(self: *CodeSignature, allocator: Allocator) void { + self.code_directory.deinit(allocator); + self.code_directory = CodeDirectory.init(self.page_size); +} diff --git a/src/link/MachO/DwarfInfo.zig b/src/link/MachO/DwarfInfo.zig index a1e0ae458a..c3f8d235ce 100644 --- a/src/link/MachO/DwarfInfo.zig +++ b/src/link/MachO/DwarfInfo.zig @@ -2,377 +2,175 @@ debug_info: []const u8, debug_abbrev: []const u8, debug_str: []const u8, -pub fn getCompileUnitIterator(self: DwarfInfo) CompileUnitIterator { - return .{ .ctx = self }; +/// Abbreviation table indexed by offset in the .debug_abbrev bytestream +abbrev_tables: std.AutoArrayHashMapUnmanaged(u64, AbbrevTable) = .{}, +/// List of compile units as they appear in the .debug_info bytestream +compile_units: std.ArrayListUnmanaged(CompileUnit) = .{}, + +pub fn init(dw: *DwarfInfo, allocator: Allocator) !void { + try dw.parseAbbrevTables(allocator); + try dw.parseCompileUnits(allocator); } -const CompileUnitIterator = struct { - ctx: DwarfInfo, - pos: usize = 0, - - pub fn next(self: *CompileUnitIterator) !?CompileUnit { - if (self.pos >= self.ctx.debug_info.len) return null; - - var stream = std.io.fixedBufferStream(self.ctx.debug_info[self.pos..]); - var creader = std.io.countingReader(stream.reader()); - const reader = creader.reader(); - - const cuh = try CompileUnit.Header.read(reader); - const total_length = cuh.length + @as(u64, if (cuh.is_64bit) @sizeOf(u64) else @sizeOf(u32)); - const offset = math.cast(usize, creader.bytes_read) orelse return error.Overflow; - - const cu = CompileUnit{ - .cuh = cuh, - .debug_info_off = self.pos + offset, - }; - - self.pos += (math.cast(usize, total_length) orelse return error.Overflow); - - return cu; +pub fn deinit(dw: *DwarfInfo, allocator: Allocator) void { + dw.abbrev_tables.deinit(allocator); + for (dw.compile_units.items) |*cu| { + cu.deinit(allocator); } -}; - -pub fn genSubprogramLookupByName( - self: DwarfInfo, - compile_unit: CompileUnit, - abbrev_lookup: AbbrevLookupTable, - lookup: *SubprogramLookupByName, -) !void { - var abbrev_it = compile_unit.getAbbrevEntryIterator(self); - while (try abbrev_it.next(abbrev_lookup)) |entry| switch (entry.tag) { - dwarf.TAG.subprogram => { - var attr_it = entry.getAttributeIterator(self, compile_unit.cuh); - - var name: ?[]const u8 = null; - var low_pc: ?u64 = null; - var high_pc: ?u64 = null; - - while (try attr_it.next()) |attr| switch (attr.name) { - dwarf.AT.name => if (attr.getString(self, compile_unit.cuh)) |str| { - name = str; - }, - dwarf.AT.low_pc => { - if (attr.getAddr(self, compile_unit.cuh)) |addr| { - low_pc = addr; - } - if (try attr.getConstant(self)) |constant| { - low_pc = @as(u64, @intCast(constant)); - } - }, - dwarf.AT.high_pc => { - if (attr.getAddr(self, compile_unit.cuh)) |addr| { - high_pc = addr; - } - if (try attr.getConstant(self)) |constant| { - high_pc = @as(u64, @intCast(constant)); - } - }, - else => {}, - }; - - if (name == null or low_pc == null or high_pc == null) continue; - - try lookup.putNoClobber(name.?, .{ .addr = low_pc.?, .size = high_pc.? }); - }, - else => {}, - }; + dw.compile_units.deinit(allocator); } -pub fn genAbbrevLookupByKind(self: DwarfInfo, off: usize, lookup: *AbbrevLookupTable) !void { - const data = self.debug_abbrev[off..]; - var stream = std.io.fixedBufferStream(data); - var creader = std.io.countingReader(stream.reader()); - const reader = creader.reader(); - - while (true) { - const kind = try leb.readULEB128(u64, reader); - - if (kind == 0) break; - - const pos = math.cast(usize, creader.bytes_read) orelse return error.Overflow; - _ = try leb.readULEB128(u64, reader); // TAG - _ = try reader.readByte(); // CHILDREN - - while (true) { - const name = try leb.readULEB128(u64, reader); - const form = try leb.readULEB128(u64, reader); - - if (name == 0 and form == 0) break; - } - - const next_pos = math.cast(usize, creader.bytes_read) orelse return error.Overflow; - - try lookup.putNoClobber(kind, .{ - .pos = pos, - .len = next_pos - pos - 2, - }); - } +fn getString(dw: DwarfInfo, off: u64) [:0]const u8 { + assert(off < dw.debug_str.len); + return mem.sliceTo(@as([*:0]const u8, @ptrCast(dw.debug_str.ptr + off)), 0); } -pub const CompileUnit = struct { - cuh: Header, - debug_info_off: usize, +fn parseAbbrevTables(dw: *DwarfInfo, allocator: Allocator) !void { + const tracy = trace(@src()); + defer tracy.end(); - pub const Header = struct { - is_64bit: bool, - length: u64, - version: u16, - debug_abbrev_offset: u64, - address_size: u8, - - fn read(reader: anytype) !Header { - var length: u64 = try reader.readInt(u32, .little); - - const is_64bit = length == 0xffffffff; - if (is_64bit) { - length = try reader.readInt(u64, .little); - } - - const version = try reader.readInt(u16, .little); - const debug_abbrev_offset = if (is_64bit) - try reader.readInt(u64, .little) - else - try reader.readInt(u32, .little); - const address_size = try reader.readInt(u8, .little); - - return Header{ - .is_64bit = is_64bit, - .length = length, - .version = version, - .debug_abbrev_offset = debug_abbrev_offset, - .address_size = address_size, - }; - } - }; - - inline fn getDebugInfo(self: CompileUnit, ctx: DwarfInfo) []const u8 { - return ctx.debug_info[self.debug_info_off..][0..self.cuh.length]; - } - - pub fn getAbbrevEntryIterator(self: CompileUnit, ctx: DwarfInfo) AbbrevEntryIterator { - return .{ .cu = self, .ctx = ctx }; - } -}; - -const AbbrevEntryIterator = struct { - cu: CompileUnit, - ctx: DwarfInfo, - pos: usize = 0, - - pub fn next(self: *AbbrevEntryIterator, lookup: AbbrevLookupTable) !?AbbrevEntry { - if (self.pos + self.cu.debug_info_off >= self.ctx.debug_info.len) return null; - - const debug_info = self.ctx.debug_info[self.pos + self.cu.debug_info_off ..]; - var stream = std.io.fixedBufferStream(debug_info); - var creader = std.io.countingReader(stream.reader()); - const reader = creader.reader(); - - const kind = try leb.readULEB128(u64, reader); - self.pos += (math.cast(usize, creader.bytes_read) orelse return error.Overflow); - - if (kind == 0) { - return AbbrevEntry.null(); - } - - const abbrev_pos = lookup.get(kind) orelse return null; - const len = try findAbbrevEntrySize( - self.ctx, - abbrev_pos.pos, - abbrev_pos.len, - self.pos + self.cu.debug_info_off, - self.cu.cuh, - ); - const entry = try getAbbrevEntry( - self.ctx, - abbrev_pos.pos, - abbrev_pos.len, - self.pos + self.cu.debug_info_off, - len, - ); - - self.pos += len; - - return entry; - } -}; - -pub const AbbrevEntry = struct { - tag: u64, - children: u8, - debug_abbrev_off: usize, - debug_abbrev_len: usize, - debug_info_off: usize, - debug_info_len: usize, - - fn @"null"() AbbrevEntry { - return .{ - .tag = 0, - .children = dwarf.CHILDREN.no, - .debug_abbrev_off = 0, - .debug_abbrev_len = 0, - .debug_info_off = 0, - .debug_info_len = 0, - }; - } - - pub fn hasChildren(self: AbbrevEntry) bool { - return self.children == dwarf.CHILDREN.yes; - } - - inline fn getDebugInfo(self: AbbrevEntry, ctx: DwarfInfo) []const u8 { - return ctx.debug_info[self.debug_info_off..][0..self.debug_info_len]; - } - - inline fn getDebugAbbrev(self: AbbrevEntry, ctx: DwarfInfo) []const u8 { - return ctx.debug_abbrev[self.debug_abbrev_off..][0..self.debug_abbrev_len]; - } - - pub fn getAttributeIterator(self: AbbrevEntry, ctx: DwarfInfo, cuh: CompileUnit.Header) AttributeIterator { - return .{ .entry = self, .ctx = ctx, .cuh = cuh }; - } -}; - -pub const Attribute = struct { - name: u64, - form: u64, - debug_info_off: usize, - debug_info_len: usize, - - inline fn getDebugInfo(self: Attribute, ctx: DwarfInfo) []const u8 { - return ctx.debug_info[self.debug_info_off..][0..self.debug_info_len]; - } - - pub fn getString(self: Attribute, ctx: DwarfInfo, cuh: CompileUnit.Header) ?[]const u8 { - const debug_info = self.getDebugInfo(ctx); - - switch (self.form) { - dwarf.FORM.string => { - return mem.sliceTo(@as([*:0]const u8, @ptrCast(debug_info.ptr)), 0); - }, - dwarf.FORM.strp => { - const off = if (cuh.is_64bit) - mem.readInt(u64, debug_info[0..8], .little) - else - mem.readInt(u32, debug_info[0..4], .little); - return ctx.getString(off); - }, - else => return null, - } - } - - pub fn getConstant(self: Attribute, ctx: DwarfInfo) !?i128 { - const debug_info = self.getDebugInfo(ctx); - var stream = std.io.fixedBufferStream(debug_info); - const reader = stream.reader(); - - return switch (self.form) { - dwarf.FORM.data1 => debug_info[0], - dwarf.FORM.data2 => mem.readInt(u16, debug_info[0..2], .little), - dwarf.FORM.data4 => mem.readInt(u32, debug_info[0..4], .little), - dwarf.FORM.data8 => mem.readInt(u64, debug_info[0..8], .little), - dwarf.FORM.udata => try leb.readULEB128(u64, reader), - dwarf.FORM.sdata => try leb.readILEB128(i64, reader), - else => null, - }; - } - - pub fn getAddr(self: Attribute, ctx: DwarfInfo, cuh: CompileUnit.Header) ?u64 { - if (self.form != dwarf.FORM.addr) return null; - const debug_info = self.getDebugInfo(ctx); - return switch (cuh.address_size) { - 1 => debug_info[0], - 2 => mem.readInt(u16, debug_info[0..2], .little), - 4 => mem.readInt(u32, debug_info[0..4], .little), - 8 => mem.readInt(u64, debug_info[0..8], .little), - else => unreachable, - }; - } -}; - -const AttributeIterator = struct { - entry: AbbrevEntry, - ctx: DwarfInfo, - cuh: CompileUnit.Header, - debug_abbrev_pos: usize = 0, - debug_info_pos: usize = 0, - - pub fn next(self: *AttributeIterator) !?Attribute { - const debug_abbrev = self.entry.getDebugAbbrev(self.ctx); - if (self.debug_abbrev_pos >= debug_abbrev.len) return null; - - var stream = std.io.fixedBufferStream(debug_abbrev[self.debug_abbrev_pos..]); - var creader = std.io.countingReader(stream.reader()); - const reader = creader.reader(); - - const name = try leb.readULEB128(u64, reader); - const form = try leb.readULEB128(u64, reader); - - self.debug_abbrev_pos += (math.cast(usize, creader.bytes_read) orelse return error.Overflow); - - const len = try findFormSize( - self.ctx, - form, - self.debug_info_pos + self.entry.debug_info_off, - self.cuh, - ); - const attr = Attribute{ - .name = name, - .form = form, - .debug_info_off = self.debug_info_pos + self.entry.debug_info_off, - .debug_info_len = len, - }; - - self.debug_info_pos += len; - - return attr; - } -}; - -fn getAbbrevEntry(self: DwarfInfo, da_off: usize, da_len: usize, di_off: usize, di_len: usize) !AbbrevEntry { - const debug_abbrev = self.debug_abbrev[da_off..][0..da_len]; + const debug_abbrev = dw.debug_abbrev; var stream = std.io.fixedBufferStream(debug_abbrev); var creader = std.io.countingReader(stream.reader()); const reader = creader.reader(); - const tag = try leb.readULEB128(u64, reader); - const children = switch (tag) { - std.dwarf.TAG.const_type, - std.dwarf.TAG.packed_type, - std.dwarf.TAG.pointer_type, - std.dwarf.TAG.reference_type, - std.dwarf.TAG.restrict_type, - std.dwarf.TAG.rvalue_reference_type, - std.dwarf.TAG.shared_type, - std.dwarf.TAG.volatile_type, - => if (creader.bytes_read == da_len) std.dwarf.CHILDREN.no else try reader.readByte(), - else => try reader.readByte(), - }; + while (true) { + if (creader.bytes_read >= debug_abbrev.len) break; - const pos = math.cast(usize, creader.bytes_read) orelse return error.Overflow; + try dw.abbrev_tables.ensureUnusedCapacity(allocator, 1); + const table_gop = dw.abbrev_tables.getOrPutAssumeCapacity(@intCast(creader.bytes_read)); + assert(!table_gop.found_existing); + const table = table_gop.value_ptr; + table.* = .{}; - return AbbrevEntry{ - .tag = tag, - .children = children, - .debug_abbrev_off = pos + da_off, - .debug_abbrev_len = da_len - pos, - .debug_info_off = di_off, - .debug_info_len = di_len, - }; + while (true) { + const code = try leb.readULEB128(Code, reader); + if (code == 0) break; + + try table.decls.ensureUnusedCapacity(allocator, 1); + const decl_gop = table.decls.getOrPutAssumeCapacity(code); + assert(!decl_gop.found_existing); + const decl = decl_gop.value_ptr; + decl.* = .{ + .code = code, + .tag = undefined, + .children = false, + }; + decl.tag = try leb.readULEB128(Tag, reader); + decl.children = (try reader.readByte()) > 0; + + while (true) { + const at = try leb.readULEB128(At, reader); + const form = try leb.readULEB128(Form, reader); + if (at == 0 and form == 0) break; + + try decl.attrs.ensureUnusedCapacity(allocator, 1); + const attr_gop = decl.attrs.getOrPutAssumeCapacity(at); + assert(!attr_gop.found_existing); + const attr = attr_gop.value_ptr; + attr.* = .{ + .at = at, + .form = form, + }; + } + } + } } -fn findFormSize(self: DwarfInfo, form: u64, di_off: usize, cuh: CompileUnit.Header) !usize { - const debug_info = self.debug_info[di_off..]; +fn parseCompileUnits(dw: *DwarfInfo, allocator: Allocator) !void { + const tracy = trace(@src()); + defer tracy.end(); + + const debug_info = dw.debug_info; var stream = std.io.fixedBufferStream(debug_info); var creader = std.io.countingReader(stream.reader()); const reader = creader.reader(); + while (true) { + if (creader.bytes_read == debug_info.len) break; + + const cu = try dw.compile_units.addOne(allocator); + cu.* = .{ + .header = undefined, + .pos = creader.bytes_read, + }; + + var length: u64 = try reader.readInt(u32, .little); + const is_64bit = length == 0xffffffff; + if (is_64bit) { + length = try reader.readInt(u64, .little); + } + cu.header.format = if (is_64bit) .dwarf64 else .dwarf32; + cu.header.length = length; + cu.header.version = try reader.readInt(u16, .little); + cu.header.debug_abbrev_offset = try readOffset(cu.header.format, reader); + cu.header.address_size = try reader.readInt(u8, .little); + + const table = dw.abbrev_tables.get(cu.header.debug_abbrev_offset).?; + try dw.parseDie(allocator, cu, table, null, &creader); + } +} + +fn parseDie( + dw: *DwarfInfo, + allocator: Allocator, + cu: *CompileUnit, + table: AbbrevTable, + parent: ?u32, + creader: anytype, +) anyerror!void { + const tracy = trace(@src()); + defer tracy.end(); + + while (creader.bytes_read < cu.nextCompileUnitOffset()) { + const die = try cu.addDie(allocator); + cu.diePtr(die).* = .{ .code = undefined }; + if (parent) |p| { + try cu.diePtr(p).children.append(allocator, die); + } else { + try cu.children.append(allocator, die); + } + + const code = try leb.readULEB128(Code, creader.reader()); + cu.diePtr(die).code = code; + + if (code == 0) { + if (parent == null) continue; + return; // Close scope + } + + const decl = table.decls.get(code) orelse return error.MalformedDwarf; // TODO better errors + const data = dw.debug_info; + try cu.diePtr(die).values.ensureTotalCapacityPrecise(allocator, decl.attrs.values().len); + + for (decl.attrs.values()) |attr| { + const start = creader.bytes_read; + try advanceByFormSize(cu, attr.form, creader); + const end = creader.bytes_read; + cu.diePtr(die).values.appendAssumeCapacity(data[start..end]); + } + + if (decl.children) { + // Open scope + try dw.parseDie(allocator, cu, table, die, creader); + } + } +} + +fn advanceByFormSize(cu: *CompileUnit, form: Form, creader: anytype) !void { + const tracy = trace(@src()); + defer tracy.end(); + + const reader = creader.reader(); switch (form) { dwarf.FORM.strp, dwarf.FORM.sec_offset, dwarf.FORM.ref_addr, - => return if (cuh.is_64bit) @sizeOf(u64) else @sizeOf(u32), + => { + _ = try readOffset(cu.header.format, reader); + }, - dwarf.FORM.addr => return cuh.address_size, + dwarf.FORM.addr => try reader.skipBytes(cu.header.address_size, .{}), dwarf.FORM.block1, dwarf.FORM.block2, @@ -386,119 +184,285 @@ fn findFormSize(self: DwarfInfo, form: u64, di_off: usize, cuh: CompileUnit.Head dwarf.FORM.block => try leb.readULEB128(u64, reader), else => unreachable, }; - var i: u64 = 0; - while (i < len) : (i += 1) { + for (0..len) |_| { _ = try reader.readByte(); } - return math.cast(usize, creader.bytes_read) orelse error.Overflow; }, dwarf.FORM.exprloc => { - const expr_len = try leb.readULEB128(u64, reader); - var i: u64 = 0; - while (i < expr_len) : (i += 1) { + const len = try leb.readULEB128(u64, reader); + for (0..len) |_| { _ = try reader.readByte(); } - return math.cast(usize, creader.bytes_read) orelse error.Overflow; }, - dwarf.FORM.flag_present => return 0, + dwarf.FORM.flag_present => {}, dwarf.FORM.data1, dwarf.FORM.ref1, dwarf.FORM.flag, - => return @sizeOf(u8), + => try reader.skipBytes(1, .{}), dwarf.FORM.data2, dwarf.FORM.ref2, - => return @sizeOf(u16), + => try reader.skipBytes(2, .{}), dwarf.FORM.data4, dwarf.FORM.ref4, - => return @sizeOf(u32), + => try reader.skipBytes(4, .{}), dwarf.FORM.data8, dwarf.FORM.ref8, dwarf.FORM.ref_sig8, - => return @sizeOf(u64), + => try reader.skipBytes(8, .{}), dwarf.FORM.udata, dwarf.FORM.ref_udata, => { _ = try leb.readULEB128(u64, reader); - return math.cast(usize, creader.bytes_read) orelse error.Overflow; }, dwarf.FORM.sdata => { _ = try leb.readILEB128(i64, reader); - return math.cast(usize, creader.bytes_read) orelse error.Overflow; }, dwarf.FORM.string => { - var count: usize = 0; while (true) { const byte = try reader.readByte(); - count += 1; if (byte == 0x0) break; } - return count; }, else => { - // TODO figure out how to handle this - log.debug("unhandled DW_FORM_* value with identifier {x}", .{form}); + // TODO better errors + log.err("unhandled DW_FORM_* value with identifier {x}", .{form}); return error.UnhandledDwFormValue; }, } } -fn findAbbrevEntrySize(self: DwarfInfo, da_off: usize, da_len: usize, di_off: usize, cuh: CompileUnit.Header) !usize { - const debug_abbrev = self.debug_abbrev[da_off..][0..da_len]; - var stream = std.io.fixedBufferStream(debug_abbrev); - var creader = std.io.countingReader(stream.reader()); - const reader = creader.reader(); - - const tag = try leb.readULEB128(u64, reader); - switch (tag) { - std.dwarf.TAG.const_type, - std.dwarf.TAG.packed_type, - std.dwarf.TAG.pointer_type, - std.dwarf.TAG.reference_type, - std.dwarf.TAG.restrict_type, - std.dwarf.TAG.rvalue_reference_type, - std.dwarf.TAG.shared_type, - std.dwarf.TAG.volatile_type, - => if (creader.bytes_read != da_len) { - _ = try reader.readByte(); - }, - else => _ = try reader.readByte(), - } - - var len: usize = 0; - while (creader.bytes_read < debug_abbrev.len) { - _ = try leb.readULEB128(u64, reader); - const form = try leb.readULEB128(u64, reader); - const form_len = try self.findFormSize(form, di_off + len, cuh); - len += form_len; - } - - return len; +fn readOffset(format: Format, reader: anytype) !u64 { + return switch (format) { + .dwarf32 => try reader.readInt(u32, .little), + .dwarf64 => try reader.readInt(u64, .little), + }; } -fn getString(self: DwarfInfo, off: u64) []const u8 { - assert(off < self.debug_str.len); - return mem.sliceTo(@as([*:0]const u8, @ptrCast(self.debug_str.ptr + @as(usize, @intCast(off)))), 0); -} +pub const AbbrevTable = struct { + /// Table of abbreviation declarations indexed by their assigned code value + decls: std.AutoArrayHashMapUnmanaged(Code, Decl) = .{}, -const DwarfInfo = @This(); + pub fn deinit(table: *AbbrevTable, gpa: Allocator) void { + for (table.decls.values()) |*decl| { + decl.deinit(gpa); + } + table.decls.deinit(gpa); + } +}; + +pub const Decl = struct { + code: Code, + tag: Tag, + children: bool, + + /// Table of attributes indexed by their AT value + attrs: std.AutoArrayHashMapUnmanaged(At, Attr) = .{}, + + pub fn deinit(decl: *Decl, gpa: Allocator) void { + decl.attrs.deinit(gpa); + } +}; + +pub const Attr = struct { + at: At, + form: Form, +}; + +pub const At = u64; +pub const Code = u64; +pub const Form = u64; +pub const Tag = u64; + +pub const CompileUnitHeader = struct { + format: Format, + length: u64, + version: u16, + debug_abbrev_offset: u64, + address_size: u8, +}; + +pub const CompileUnit = struct { + header: CompileUnitHeader, + pos: usize, + dies: std.ArrayListUnmanaged(Die) = .{}, + children: std.ArrayListUnmanaged(Die.Index) = .{}, + + pub fn deinit(cu: *CompileUnit, gpa: Allocator) void { + for (cu.dies.items) |*die| { + die.deinit(gpa); + } + cu.dies.deinit(gpa); + cu.children.deinit(gpa); + } + + pub fn addDie(cu: *CompileUnit, gpa: Allocator) !Die.Index { + const index = @as(Die.Index, @intCast(cu.dies.items.len)); + _ = try cu.dies.addOne(gpa); + return index; + } + + pub fn diePtr(cu: *CompileUnit, index: Die.Index) *Die { + return &cu.dies.items[index]; + } + + pub fn getCompileDir(cu: CompileUnit, ctx: DwarfInfo) ?[:0]const u8 { + assert(cu.dies.items.len > 0); + const die = cu.dies.items[0]; + const res = die.find(dwarf.AT.comp_dir, cu, ctx) orelse return null; + return res.getString(cu.header.format, ctx); + } + + pub fn getSourceFile(cu: CompileUnit, ctx: DwarfInfo) ?[:0]const u8 { + assert(cu.dies.items.len > 0); + const die = cu.dies.items[0]; + const res = die.find(dwarf.AT.name, cu, ctx) orelse return null; + return res.getString(cu.header.format, ctx); + } + + pub fn nextCompileUnitOffset(cu: CompileUnit) u64 { + return cu.pos + switch (cu.header.format) { + .dwarf32 => @as(u64, 4), + .dwarf64 => 12, + } + cu.header.length; + } +}; + +pub const Die = struct { + code: Code, + values: std.ArrayListUnmanaged([]const u8) = .{}, + children: std.ArrayListUnmanaged(Die.Index) = .{}, + + pub fn deinit(die: *Die, gpa: Allocator) void { + die.values.deinit(gpa); + die.children.deinit(gpa); + } + + pub fn find(die: Die, at: At, cu: CompileUnit, ctx: DwarfInfo) ?DieValue { + const table = ctx.abbrev_tables.get(cu.header.debug_abbrev_offset) orelse return null; + const decl = table.decls.get(die.code).?; + const index = decl.attrs.getIndex(at) orelse return null; + const attr = decl.attrs.values()[index]; + const value = die.values.items[index]; + return .{ .attr = attr, .bytes = value }; + } + + pub const Index = u32; +}; + +pub const DieValue = struct { + attr: Attr, + bytes: []const u8, + + pub fn getFlag(value: DieValue) ?bool { + return switch (value.attr.form) { + dwarf.FORM.flag => value.bytes[0] == 1, + dwarf.FORM.flag_present => true, + else => null, + }; + } + + pub fn getString(value: DieValue, format: Format, ctx: DwarfInfo) ?[:0]const u8 { + switch (value.attr.form) { + dwarf.FORM.string => { + return mem.sliceTo(@as([*:0]const u8, @ptrCast(value.bytes.ptr)), 0); + }, + dwarf.FORM.strp => { + const off = switch (format) { + .dwarf64 => mem.readInt(u64, value.bytes[0..8], .little), + .dwarf32 => mem.readInt(u32, value.bytes[0..4], .little), + }; + return ctx.getString(off); + }, + else => return null, + } + } + + pub fn getSecOffset(value: DieValue, format: Format) ?u64 { + return switch (value.attr.form) { + dwarf.FORM.sec_offset => switch (format) { + .dwarf32 => mem.readInt(u32, value.bytes[0..4], .little), + .dwarf64 => mem.readInt(u64, value.bytes[0..8], .little), + }, + else => null, + }; + } + + pub fn getConstant(value: DieValue) !?i128 { + var stream = std.io.fixedBufferStream(value.bytes); + const reader = stream.reader(); + return switch (value.attr.form) { + dwarf.FORM.data1 => value.bytes[0], + dwarf.FORM.data2 => mem.readInt(u16, value.bytes[0..2], .little), + dwarf.FORM.data4 => mem.readInt(u32, value.bytes[0..4], .little), + dwarf.FORM.data8 => mem.readInt(u64, value.bytes[0..8], .little), + dwarf.FORM.udata => try leb.readULEB128(u64, reader), + dwarf.FORM.sdata => try leb.readILEB128(i64, reader), + else => null, + }; + } + + pub fn getReference(value: DieValue, format: Format) !?u64 { + var stream = std.io.fixedBufferStream(value.bytes); + const reader = stream.reader(); + return switch (value.attr.form) { + dwarf.FORM.ref1 => value.bytes[0], + dwarf.FORM.ref2 => mem.readInt(u16, value.bytes[0..2], .little), + dwarf.FORM.ref4 => mem.readInt(u32, value.bytes[0..4], .little), + dwarf.FORM.ref8 => mem.readInt(u64, value.bytes[0..8], .little), + dwarf.FORM.ref_udata => try leb.readULEB128(u64, reader), + dwarf.FORM.ref_addr => switch (format) { + .dwarf32 => mem.readInt(u32, value.bytes[0..4], .little), + .dwarf64 => mem.readInt(u64, value.bytes[0..8], .little), + }, + else => null, + }; + } + + pub fn getAddr(value: DieValue, header: CompileUnitHeader) ?u64 { + return switch (value.attr.form) { + dwarf.FORM.addr => switch (header.address_size) { + 1 => value.bytes[0], + 2 => mem.readInt(u16, value.bytes[0..2], .little), + 4 => mem.readInt(u32, value.bytes[0..4], .little), + 8 => mem.readInt(u64, value.bytes[0..8], .little), + else => null, + }, + else => null, + }; + } + + pub fn getExprloc(value: DieValue) !?[]const u8 { + if (value.attr.form != dwarf.FORM.exprloc) return null; + var stream = std.io.fixedBufferStream(value.bytes); + var creader = std.io.countingReader(stream.reader()); + const reader = creader.reader(); + const expr_len = try leb.readULEB128(u64, reader); + return value.bytes[creader.bytes_read..][0..expr_len]; + } +}; + +pub const Format = enum { + dwarf32, + dwarf64, +}; -const std = @import("std"); const assert = std.debug.assert; const dwarf = std.dwarf; const leb = std.leb; -const log = std.log.scoped(.macho); -const math = std.math; +const log = std.log.scoped(.link); const mem = std.mem; +const std = @import("std"); +const trace = @import("../tracy.zig").trace; const Allocator = mem.Allocator; -pub const AbbrevLookupTable = std.AutoHashMap(u64, struct { pos: usize, len: usize }); -pub const SubprogramLookupByName = std.StringHashMap(struct { addr: u64, size: u64 }); +const DwarfInfo = @This(); +const MachO = @import("../MachO.zig"); diff --git a/src/link/MachO/Dylib.zig b/src/link/MachO/Dylib.zig index 65d503b1ae..4944c4d5ef 100644 --- a/src/link/MachO/Dylib.zig +++ b/src/link/MachO/Dylib.zig @@ -1,20 +1,716 @@ path: []const u8, -id: ?Id = null, -weak: bool = false, -/// Header is only set if Dylib is parsed directly from a binary and not a stub file. -header: ?macho.mach_header_64 = null, +data: []const u8, +index: File.Index, -/// Parsed symbol table represented as hash map of symbols' -/// names. We can and should defer creating *Symbols until -/// a symbol is referenced by an object file. -/// -/// The value for each parsed symbol represents whether the -/// symbol is defined as a weak symbol or strong. -/// TODO when the referenced symbol is weak, ld64 marks it as -/// N_REF_TO_WEAK but need to investigate if there's more to it -/// such as weak binding entry or simply weak. For now, we generate -/// standard bind or lazy bind. -symbols: std.StringArrayHashMapUnmanaged(bool) = .{}, +header: ?macho.mach_header_64 = null, +exports: std.MultiArrayList(Export) = .{}, +strtab: std.ArrayListUnmanaged(u8) = .{}, +id: ?Id = null, +ordinal: u16 = 0, + +symbols: std.ArrayListUnmanaged(Symbol.Index) = .{}, +dependents: std.ArrayListUnmanaged(Id) = .{}, +rpaths: std.StringArrayHashMapUnmanaged(void) = .{}, +umbrella: File.Index = 0, +platform: ?MachO.Options.Platform = null, + +needed: bool, +weak: bool, +reexport: bool, +explicit: bool, +hoisted: bool = true, +referenced: bool = false, + +output_symtab_ctx: MachO.SymtabCtx = .{}, + +pub fn deinit(self: *Dylib, allocator: Allocator) void { + self.exports.deinit(allocator); + self.strtab.deinit(allocator); + if (self.id) |*id| id.deinit(allocator); + self.symbols.deinit(allocator); + for (self.dependents.items) |*id| { + id.deinit(allocator); + } + self.dependents.deinit(allocator); + self.rpaths.deinit(allocator); +} + +pub fn parse(self: *Dylib, macho_file: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + + const gpa = macho_file.base.allocator; + var stream = std.io.fixedBufferStream(self.data); + const reader = stream.reader(); + + log.debug("parsing dylib from binary", .{}); + + self.header = try reader.readStruct(macho.mach_header_64); + + const lc_id = self.getLoadCommand(.ID_DYLIB) orelse { + macho_file.base.fatal("{s}: missing LC_ID_DYLIB load command", .{self.path}); + return error.ParseFailed; + }; + self.id = try Id.fromLoadCommand(gpa, lc_id.cast(macho.dylib_command).?, lc_id.getDylibPathName()); + + var it = LoadCommandIterator{ + .ncmds = self.header.?.ncmds, + .buffer = self.data[@sizeOf(macho.mach_header_64)..][0..self.header.?.sizeofcmds], + }; + while (it.next()) |cmd| switch (cmd.cmd()) { + .REEXPORT_DYLIB => if (self.header.?.flags & macho.MH_NO_REEXPORTED_DYLIBS == 0) { + const id = try Id.fromLoadCommand(gpa, cmd.cast(macho.dylib_command).?, cmd.getDylibPathName()); + try self.dependents.append(gpa, id); + }, + .DYLD_INFO_ONLY => { + const dyld_cmd = cmd.cast(macho.dyld_info_command).?; + const data = self.data[dyld_cmd.export_off..][0..dyld_cmd.export_size]; + try self.parseTrie(data, macho_file); + }, + .DYLD_EXPORTS_TRIE => { + const ld_cmd = cmd.cast(macho.linkedit_data_command).?; + const data = self.data[ld_cmd.dataoff..][0..ld_cmd.datasize]; + try self.parseTrie(data, macho_file); + }, + .RPATH => { + const path = cmd.getRpathPathName(); + try self.rpaths.put(gpa, path, {}); + }, + else => {}, + }; + + self.initPlatform(); +} + +const TrieIterator = struct { + data: []const u8, + pos: usize = 0, + + fn getStream(it: *TrieIterator) std.io.FixedBufferStream([]const u8) { + return std.io.fixedBufferStream(it.data[it.pos..]); + } + + fn readULEB128(it: *TrieIterator) !u64 { + var stream = it.getStream(); + var creader = std.io.countingReader(stream.reader()); + const reader = creader.reader(); + const value = try std.leb.readULEB128(u64, reader); + it.pos += creader.bytes_read; + return value; + } + + fn readString(it: *TrieIterator) ![:0]const u8 { + var stream = it.getStream(); + const reader = stream.reader(); + + var count: usize = 0; + while (true) : (count += 1) { + const byte = try reader.readByte(); + if (byte == 0) break; + } + + const str = @as([*:0]const u8, @ptrCast(it.data.ptr + it.pos))[0..count :0]; + it.pos += count + 1; + return str; + } + + fn readByte(it: *TrieIterator) !u8 { + var stream = it.getStream(); + const value = try stream.reader().readByte(); + it.pos += 1; + return value; + } +}; + +pub fn addExport(self: *Dylib, allocator: Allocator, name: []const u8, flags: Export.Flags) !void { + try self.exports.append(allocator, .{ + .name = try self.insertString(allocator, name), + .flags = flags, + }); +} + +fn parseTrieNode( + self: *Dylib, + it: *TrieIterator, + allocator: Allocator, + arena: Allocator, + prefix: []const u8, +) !void { + const tracy = trace(@src()); + defer tracy.end(); + const size = try it.readULEB128(); + if (size > 0) { + const flags = try it.readULEB128(); + const kind = flags & macho.EXPORT_SYMBOL_FLAGS_KIND_MASK; + const out_flags = Export.Flags{ + .abs = kind == macho.EXPORT_SYMBOL_FLAGS_KIND_ABSOLUTE, + .tlv = kind == macho.EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL, + .weak = flags & macho.EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION != 0, + }; + if (flags & macho.EXPORT_SYMBOL_FLAGS_REEXPORT != 0) { + _ = try it.readULEB128(); // dylib ordinal + const name = try it.readString(); + try self.addExport(allocator, if (name.len > 0) name else prefix, out_flags); + } else if (flags & macho.EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER != 0) { + _ = try it.readULEB128(); // stub offset + _ = try it.readULEB128(); // resolver offset + try self.addExport(allocator, prefix, out_flags); + } else { + _ = try it.readULEB128(); // VM offset + try self.addExport(allocator, prefix, out_flags); + } + } + + const nedges = try it.readByte(); + + for (0..nedges) |_| { + const label = try it.readString(); + const off = try it.readULEB128(); + const prefix_label = try std.fmt.allocPrint(arena, "{s}{s}", .{ prefix, label }); + const curr = it.pos; + it.pos = off; + try self.parseTrieNode(it, allocator, arena, prefix_label); + it.pos = curr; + } +} + +fn parseTrie(self: *Dylib, data: []const u8, macho_file: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + const gpa = macho_file.base.allocator; + var arena = std.heap.ArenaAllocator.init(gpa); + defer arena.deinit(); + + var it: TrieIterator = .{ .data = data }; + try self.parseTrieNode(&it, gpa, arena.allocator(), ""); +} + +pub fn parseTbd( + self: *Dylib, + cpu_arch: std.Target.Cpu.Arch, + platform: ?MachO.Options.Platform, + lib_stub: LibStub, + macho_file: *MachO, +) !void { + const tracy = trace(@src()); + defer tracy.end(); + const gpa = macho_file.base.allocator; + + log.debug("parsing dylib from stub", .{}); + + const umbrella_lib = lib_stub.inner[0]; + + { + var id = try Id.default(gpa, umbrella_lib.installName()); + if (umbrella_lib.currentVersion()) |version| { + try id.parseCurrentVersion(version); + } + if (umbrella_lib.compatibilityVersion()) |version| { + try id.parseCompatibilityVersion(version); + } + self.id = id; + } + + var umbrella_libs = std.StringHashMap(void).init(gpa); + defer umbrella_libs.deinit(); + + log.debug(" (install_name '{s}')", .{umbrella_lib.installName()}); + + self.platform = platform orelse .{ + .platform = .MACOS, + .version = .{ .value = 0 }, + }; + + var matcher = try TargetMatcher.init(gpa, cpu_arch, self.platform.?.platform); + defer matcher.deinit(); + + for (lib_stub.inner, 0..) |elem, stub_index| { + if (!(try matcher.matchesTargetTbd(elem))) continue; + + if (stub_index > 0) { + // TODO I thought that we could switch on presence of `parent-umbrella` map; + // however, turns out `libsystem_notify.dylib` is fully reexported by `libSystem.dylib` + // BUT does not feature a `parent-umbrella` map as the only sublib. Apple's bug perhaps? + try umbrella_libs.put(elem.installName(), {}); + } + + switch (elem) { + .v3 => |stub| { + if (stub.exports) |exports| { + for (exports) |exp| { + if (!matcher.matchesArch(exp.archs)) continue; + + if (exp.symbols) |symbols| { + for (symbols) |sym_name| { + try self.addExport(gpa, sym_name, .{}); + } + } + + if (exp.weak_symbols) |symbols| { + for (symbols) |sym_name| { + try self.addExport(gpa, sym_name, .{ .weak = true }); + } + } + + if (exp.objc_classes) |objc_classes| { + for (objc_classes) |class_name| { + try self.addObjCClass(gpa, class_name); + } + } + + if (exp.objc_ivars) |objc_ivars| { + for (objc_ivars) |ivar| { + try self.addObjCIVar(gpa, ivar); + } + } + + if (exp.objc_eh_types) |objc_eh_types| { + for (objc_eh_types) |eht| { + try self.addObjCEhType(gpa, eht); + } + } + + if (exp.re_exports) |re_exports| { + for (re_exports) |lib| { + if (umbrella_libs.contains(lib)) continue; + + log.debug(" (found re-export '{s}')", .{lib}); + + const dep_id = try Id.default(gpa, lib); + try self.dependents.append(gpa, dep_id); + } + } + } + } + }, + .v4 => |stub| { + if (stub.exports) |exports| { + for (exports) |exp| { + if (!matcher.matchesTarget(exp.targets)) continue; + + if (exp.symbols) |symbols| { + for (symbols) |sym_name| { + try self.addExport(gpa, sym_name, .{}); + } + } + + if (exp.weak_symbols) |symbols| { + for (symbols) |sym_name| { + try self.addExport(gpa, sym_name, .{ .weak = true }); + } + } + + if (exp.objc_classes) |classes| { + for (classes) |sym_name| { + try self.addObjCClass(gpa, sym_name); + } + } + + if (exp.objc_ivars) |objc_ivars| { + for (objc_ivars) |ivar| { + try self.addObjCIVar(gpa, ivar); + } + } + + if (exp.objc_eh_types) |objc_eh_types| { + for (objc_eh_types) |eht| { + try self.addObjCEhType(gpa, eht); + } + } + } + } + + if (stub.reexports) |reexports| { + for (reexports) |reexp| { + if (!matcher.matchesTarget(reexp.targets)) continue; + + if (reexp.symbols) |symbols| { + for (symbols) |sym_name| { + try self.addExport(gpa, sym_name, .{}); + } + } + + if (reexp.weak_symbols) |symbols| { + for (symbols) |sym_name| { + try self.addExport(gpa, sym_name, .{ .weak = true }); + } + } + + if (reexp.objc_classes) |classes| { + for (classes) |sym_name| { + try self.addObjCClass(gpa, sym_name); + } + } + + if (reexp.objc_ivars) |objc_ivars| { + for (objc_ivars) |ivar| { + try self.addObjCIVar(gpa, ivar); + } + } + + if (reexp.objc_eh_types) |objc_eh_types| { + for (objc_eh_types) |eht| { + try self.addObjCEhType(gpa, eht); + } + } + } + } + + if (stub.objc_classes) |classes| { + for (classes) |sym_name| { + try self.addObjCClass(gpa, sym_name); + } + } + + if (stub.objc_ivars) |objc_ivars| { + for (objc_ivars) |ivar| { + try self.addObjCIVar(gpa, ivar); + } + } + + if (stub.objc_eh_types) |objc_eh_types| { + for (objc_eh_types) |eht| { + try self.addObjCEhType(gpa, eht); + } + } + }, + } + } + + // For V4, we add dependent libs in a separate pass since some stubs such as libSystem include + // re-exports directly in the stub file. + for (lib_stub.inner) |elem| { + if (elem == .v3) continue; + const stub = elem.v4; + + if (stub.reexported_libraries) |reexports| { + for (reexports) |reexp| { + if (!matcher.matchesTarget(reexp.targets)) continue; + + for (reexp.libraries) |lib| { + if (umbrella_libs.contains(lib)) continue; + + log.debug(" (found re-export '{s}')", .{lib}); + + const dep_id = try Id.default(gpa, lib); + try self.dependents.append(gpa, dep_id); + } + } + } + } +} + +fn addObjCClass(self: *Dylib, allocator: Allocator, name: []const u8) !void { + try self.addObjCExport(allocator, "_OBJC_CLASS_", name); + try self.addObjCExport(allocator, "_OBJC_METACLASS_", name); +} + +fn addObjCIVar(self: *Dylib, allocator: Allocator, name: []const u8) !void { + try self.addObjCExport(allocator, "_OBJC_IVAR_", name); +} + +fn addObjCEhType(self: *Dylib, allocator: Allocator, name: []const u8) !void { + try self.addObjCExport(allocator, "_OBJC_EHTYPE_", name); +} + +fn addObjCExport( + self: *Dylib, + allocator: Allocator, + comptime prefix: []const u8, + name: []const u8, +) !void { + const full_name = try std.fmt.allocPrint(allocator, prefix ++ "$_{s}", .{name}); + defer allocator.free(full_name); + try self.addExport(allocator, full_name, .{}); +} + +pub fn initSymbols(self: *Dylib, macho_file: *MachO) !void { + const gpa = macho_file.base.allocator; + + try self.symbols.ensureTotalCapacityPrecise(gpa, self.exports.items(.name).len); + + for (self.exports.items(.name)) |noff| { + const name = self.getString(noff); + const off = try macho_file.string_intern.insert(gpa, name); + const gop = try macho_file.getOrCreateGlobal(off); + self.symbols.addOneAssumeCapacity().* = gop.index; + } +} + +fn initPlatform(self: *Dylib) void { + var it = LoadCommandIterator{ + .ncmds = self.header.?.ncmds, + .buffer = self.data[@sizeOf(macho.mach_header_64)..][0..self.header.?.sizeofcmds], + }; + self.platform = while (it.next()) |cmd| { + switch (cmd.cmd()) { + .BUILD_VERSION, + .VERSION_MIN_MACOSX, + .VERSION_MIN_IPHONEOS, + .VERSION_MIN_TVOS, + .VERSION_MIN_WATCHOS, + => break MachO.Options.Platform.fromLoadCommand(cmd), + else => {}, + } + } else null; +} + +pub fn resolveSymbols(self: *Dylib, macho_file: *MachO) void { + const tracy = trace(@src()); + defer tracy.end(); + + if (!self.explicit and !self.hoisted) return; + + for (self.symbols.items, self.exports.items(.flags)) |index, flags| { + const global = macho_file.getSymbol(index); + if (self.asFile().getSymbolRank(.{ + .weak = flags.weak, + }) < global.getSymbolRank(macho_file)) { + global.value = 0; + global.atom = 0; + global.nlist_idx = 0; + global.file = self.index; + global.flags.weak = flags.weak; + global.flags.weak_ref = false; + global.flags.tlv = flags.tlv; + global.flags.dyn_ref = false; + global.flags.tentative = false; + global.visibility = .global; + } + } +} + +pub fn resetGlobals(self: *Dylib, macho_file: *MachO) void { + for (self.symbols.items) |sym_index| { + const sym = macho_file.getSymbol(sym_index); + const name = sym.name; + sym.* = .{}; + sym.name = name; + } +} + +pub fn isAlive(self: Dylib, macho_file: *MachO) bool { + if (!macho_file.options.dead_strip_dylibs) return self.explicit or self.referenced or self.needed; + return self.referenced or self.needed; +} + +pub fn markReferenced(self: *Dylib, macho_file: *MachO) void { + const tracy = trace(@src()); + defer tracy.end(); + + for (self.symbols.items) |global_index| { + const global = macho_file.getSymbol(global_index); + const file_ptr = global.getFile(macho_file) orelse continue; + if (file_ptr.getIndex() != self.index) continue; + if (global.isLocal()) continue; + self.referenced = true; + break; + } +} + +pub fn calcSymtabSize(self: *Dylib, macho_file: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + + for (self.symbols.items) |global_index| { + const global = macho_file.getSymbol(global_index); + const file_ptr = global.getFile(macho_file) orelse continue; + if (file_ptr.getIndex() != self.index) continue; + if (global.isLocal()) continue; + assert(global.flags.import); + global.flags.output_symtab = true; + try global.addExtra(.{ .symtab = self.output_symtab_ctx.nimports }, macho_file); + self.output_symtab_ctx.nimports += 1; + self.output_symtab_ctx.strsize += @as(u32, @intCast(global.getName(macho_file).len + 1)); + } +} + +pub fn writeSymtab(self: Dylib, macho_file: *MachO) void { + const tracy = trace(@src()); + defer tracy.end(); + + for (self.symbols.items) |global_index| { + const global = macho_file.getSymbol(global_index); + const file = global.getFile(macho_file) orelse continue; + if (file.getIndex() != self.index) continue; + const idx = global.getOutputSymtabIndex(macho_file) orelse continue; + const n_strx = @as(u32, @intCast(macho_file.strtab.items.len)); + macho_file.strtab.appendSliceAssumeCapacity(global.getName(macho_file)); + macho_file.strtab.appendAssumeCapacity(0); + const out_sym = &macho_file.symtab.items[idx]; + out_sym.n_strx = n_strx; + global.setOutputSym(macho_file, out_sym); + } +} + +pub inline fn getUmbrella(self: Dylib, macho_file: *MachO) *Dylib { + return macho_file.getFile(self.umbrella).?.dylib; +} + +fn getLoadCommand(self: Dylib, lc: macho.LC) ?LoadCommandIterator.LoadCommand { + var it = LoadCommandIterator{ + .ncmds = self.header.?.ncmds, + .buffer = self.data[@sizeOf(macho.mach_header_64)..][0..self.header.?.sizeofcmds], + }; + while (it.next()) |cmd| { + if (cmd.cmd() == lc) return cmd; + } else return null; +} + +fn insertString(self: *Dylib, allocator: Allocator, name: []const u8) !u32 { + const off = @as(u32, @intCast(self.strtab.items.len)); + try self.strtab.writer(allocator).print("{s}\x00", .{name}); + return off; +} + +pub inline fn getString(self: Dylib, off: u32) [:0]const u8 { + assert(off < self.strtab.items.len); + return mem.sliceTo(@as([*:0]const u8, @ptrCast(self.strtab.items.ptr + off)), 0); +} + +pub fn asFile(self: *Dylib) File { + return .{ .dylib = self }; +} + +pub fn format( + self: *Dylib, + comptime unused_fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, +) !void { + _ = self; + _ = unused_fmt_string; + _ = options; + _ = writer; + @compileError("do not format dylib directly"); +} + +pub fn fmtSymtab(self: *Dylib, macho_file: *MachO) std.fmt.Formatter(formatSymtab) { + return .{ .data = .{ + .dylib = self, + .macho_file = macho_file, + } }; +} + +const FormatContext = struct { + dylib: *Dylib, + macho_file: *MachO, +}; + +fn formatSymtab( + ctx: FormatContext, + comptime unused_fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, +) !void { + _ = unused_fmt_string; + _ = options; + const dylib = ctx.dylib; + try writer.writeAll(" globals\n"); + for (dylib.symbols.items) |index| { + const global = ctx.macho_file.getSymbol(index); + try writer.print(" {}\n", .{global.fmt(ctx.macho_file)}); + } +} + +pub const TargetMatcher = struct { + allocator: Allocator, + cpu_arch: std.Target.Cpu.Arch, + platform: macho.PLATFORM, + target_strings: std.ArrayListUnmanaged([]const u8) = .{}, + + pub fn init(allocator: Allocator, cpu_arch: std.Target.Cpu.Arch, platform: macho.PLATFORM) !TargetMatcher { + var self = TargetMatcher{ + .allocator = allocator, + .cpu_arch = cpu_arch, + .platform = platform, + }; + const apple_string = try targetToAppleString(allocator, cpu_arch, platform); + try self.target_strings.append(allocator, apple_string); + + switch (platform) { + .IOSSIMULATOR, .TVOSSIMULATOR, .WATCHOSSIMULATOR => { + // For Apple simulator targets, linking gets tricky as we need to link against the simulator + // hosts dylibs too. + const host_target = try targetToAppleString(allocator, cpu_arch, .MACOS); + try self.target_strings.append(allocator, host_target); + }, + else => {}, + } + + return self; + } + + pub fn deinit(self: *TargetMatcher) void { + for (self.target_strings.items) |t| { + self.allocator.free(t); + } + self.target_strings.deinit(self.allocator); + } + + inline fn cpuArchToAppleString(cpu_arch: std.Target.Cpu.Arch) []const u8 { + return switch (cpu_arch) { + .aarch64 => "arm64", + .x86_64 => "x86_64", + else => unreachable, + }; + } + + pub fn targetToAppleString(allocator: Allocator, cpu_arch: std.Target.Cpu.Arch, platform: macho.PLATFORM) ![]const u8 { + const arch = cpuArchToAppleString(cpu_arch); + const plat = switch (platform) { + .MACOS => "macos", + .IOS => "ios", + .TVOS => "tvos", + .WATCHOS => "watchos", + .IOSSIMULATOR => "ios-simulator", + .TVOSSIMULATOR => "tvos-simulator", + .WATCHOSSIMULATOR => "watchos-simulator", + .BRIDGEOS => "bridgeos", + .MACCATALYST => "maccatalyst", + .DRIVERKIT => "driverkit", + else => unreachable, + }; + return std.fmt.allocPrint(allocator, "{s}-{s}", .{ arch, plat }); + } + + fn hasValue(stack: []const []const u8, needle: []const u8) bool { + for (stack) |v| { + if (mem.eql(u8, v, needle)) return true; + } + return false; + } + + fn matchesArch(self: TargetMatcher, archs: []const []const u8) bool { + return hasValue(archs, cpuArchToAppleString(self.cpu_arch)); + } + + fn matchesTarget(self: TargetMatcher, targets: []const []const u8) bool { + for (self.target_strings.items) |t| { + if (hasValue(targets, t)) return true; + } + return false; + } + + pub fn matchesTargetTbd(self: TargetMatcher, tbd: Tbd) !bool { + var arena = std.heap.ArenaAllocator.init(self.allocator); + defer arena.deinit(); + + const targets = switch (tbd) { + .v3 => |v3| blk: { + var targets = std.ArrayList([]const u8).init(arena.allocator()); + for (v3.archs) |arch| { + const target = try std.fmt.allocPrint(arena.allocator(), "{s}-{s}", .{ arch, v3.platform }); + try targets.append(target); + } + break :blk targets.items; + }, + .v4 => |v4| v4.targets, + }; + + return self.matchesTarget(targets); + } +}; pub const Id = struct { name: []const u8, @@ -76,7 +772,7 @@ pub const Id = struct { var out: u32 = 0; var values: [3][]const u8 = undefined; - var split = mem.splitScalar(u8, string, '.'); + var split = mem.split(u8, string, "."); var count: u4 = 0; while (split.next()) |value| { if (count > 2) { @@ -99,458 +795,34 @@ pub const Id = struct { } }; -pub fn isDylib(file: std.fs.File, fat_offset: u64) bool { - const reader = file.reader(); - const hdr = reader.readStruct(macho.mach_header_64) catch return false; - defer file.seekTo(fat_offset) catch {}; - return hdr.filetype == macho.MH_DYLIB; -} +const Export = struct { + name: u32, + flags: Flags, -pub fn deinit(self: *Dylib, allocator: Allocator) void { - allocator.free(self.path); - for (self.symbols.keys()) |key| { - allocator.free(key); - } - self.symbols.deinit(allocator); - if (self.id) |*id| { - id.deinit(allocator); - } -} - -pub fn parseFromBinary( - self: *Dylib, - allocator: Allocator, - dylib_id: u16, - dependent_libs: anytype, - name: []const u8, - data: []align(@alignOf(u64)) const u8, -) !void { - var stream = std.io.fixedBufferStream(data); - const reader = stream.reader(); - - log.debug("parsing shared library '{s}'", .{name}); - - self.header = try reader.readStruct(macho.mach_header_64); - - const should_lookup_reexports = self.header.?.flags & macho.MH_NO_REEXPORTED_DYLIBS == 0; - var it = LoadCommandIterator{ - .ncmds = self.header.?.ncmds, - .buffer = data[@sizeOf(macho.mach_header_64)..][0..self.header.?.sizeofcmds], + const Flags = packed struct { + abs: bool = false, + weak: bool = false, + tlv: bool = false, }; - while (it.next()) |cmd| { - switch (cmd.cmd()) { - .SYMTAB => { - const symtab_cmd = cmd.cast(macho.symtab_command).?; - const symtab = @as( - [*]const macho.nlist_64, - // Alignment is guaranteed as a dylib is a final linked image and has to have sections - // properly aligned in order to be correctly loaded by the loader. - @ptrCast(@alignCast(&data[symtab_cmd.symoff])), - )[0..symtab_cmd.nsyms]; - const strtab = data[symtab_cmd.stroff..][0..symtab_cmd.strsize]; - - for (symtab) |sym| { - const add_to_symtab = sym.ext() and (sym.sect() or sym.indr()); - if (!add_to_symtab) continue; - - const sym_name = mem.sliceTo(@as([*:0]const u8, @ptrCast(strtab.ptr + sym.n_strx)), 0); - try self.symbols.putNoClobber(allocator, try allocator.dupe(u8, sym_name), false); - } - }, - .ID_DYLIB => { - self.id = try Id.fromLoadCommand( - allocator, - cmd.cast(macho.dylib_command).?, - cmd.getDylibPathName(), - ); - }, - .REEXPORT_DYLIB => { - if (should_lookup_reexports) { - // Parse install_name to dependent dylib. - const id = try Id.fromLoadCommand( - allocator, - cmd.cast(macho.dylib_command).?, - cmd.getDylibPathName(), - ); - try dependent_libs.writeItem(.{ .id = id, .parent = dylib_id }); - } - }, - else => {}, - } - } -} - -/// Returns Platform composed from the first encountered build version type load command: -/// either LC_BUILD_VERSION or LC_VERSION_MIN_*. -pub fn getPlatform(self: Dylib, data: []align(@alignOf(u64)) const u8) ?Platform { - var it = LoadCommandIterator{ - .ncmds = self.header.?.ncmds, - .buffer = data[@sizeOf(macho.mach_header_64)..][0..self.header.?.sizeofcmds], - }; - while (it.next()) |cmd| { - switch (cmd.cmd()) { - .BUILD_VERSION, - .VERSION_MIN_MACOSX, - .VERSION_MIN_IPHONEOS, - .VERSION_MIN_TVOS, - .VERSION_MIN_WATCHOS, - => return Platform.fromLoadCommand(cmd), - else => {}, - } - } else return null; -} - -fn addObjCClassSymbol(self: *Dylib, allocator: Allocator, sym_name: []const u8) !void { - const expanded = &[_][]const u8{ - try std.fmt.allocPrint(allocator, "_OBJC_CLASS_$_{s}", .{sym_name}), - try std.fmt.allocPrint(allocator, "_OBJC_METACLASS_$_{s}", .{sym_name}), - }; - - for (expanded) |sym| { - if (self.symbols.contains(sym)) continue; - try self.symbols.putNoClobber(allocator, sym, false); - } -} - -fn addObjCIVarSymbol(self: *Dylib, allocator: Allocator, sym_name: []const u8) !void { - const expanded = try std.fmt.allocPrint(allocator, "_OBJC_IVAR_$_{s}", .{sym_name}); - if (self.symbols.contains(expanded)) return; - try self.symbols.putNoClobber(allocator, expanded, false); -} - -fn addObjCEhTypeSymbol(self: *Dylib, allocator: Allocator, sym_name: []const u8) !void { - const expanded = try std.fmt.allocPrint(allocator, "_OBJC_EHTYPE_$_{s}", .{sym_name}); - if (self.symbols.contains(expanded)) return; - try self.symbols.putNoClobber(allocator, expanded, false); -} - -fn addSymbol(self: *Dylib, allocator: Allocator, sym_name: []const u8) !void { - if (self.symbols.contains(sym_name)) return; - try self.symbols.putNoClobber(allocator, try allocator.dupe(u8, sym_name), false); -} - -fn addWeakSymbol(self: *Dylib, allocator: Allocator, sym_name: []const u8) !void { - if (self.symbols.contains(sym_name)) return; - try self.symbols.putNoClobber(allocator, try allocator.dupe(u8, sym_name), true); -} - -pub const TargetMatcher = struct { - allocator: Allocator, - cpu_arch: std.Target.Cpu.Arch, - os_tag: std.Target.Os.Tag, - abi: std.Target.Abi, - target_strings: std.ArrayListUnmanaged([]const u8) = .{}, - - pub fn init(allocator: Allocator, target: std.Target) !TargetMatcher { - var self = TargetMatcher{ - .allocator = allocator, - .cpu_arch = target.cpu.arch, - .os_tag = target.os.tag, - .abi = target.abi, - }; - const apple_string = try toAppleTargetTriple(allocator, self.cpu_arch, self.os_tag, self.abi); - try self.target_strings.append(allocator, apple_string); - - if (self.abi == .simulator) { - // For Apple simulator targets, linking gets tricky as we need to link against the simulator - // hosts dylibs too. - const host_target = try toAppleTargetTriple(allocator, self.cpu_arch, .macos, .none); - try self.target_strings.append(allocator, host_target); - } - - return self; - } - - pub fn deinit(self: *TargetMatcher) void { - for (self.target_strings.items) |t| { - self.allocator.free(t); - } - self.target_strings.deinit(self.allocator); - } - - inline fn fmtCpuArch(cpu_arch: std.Target.Cpu.Arch) []const u8 { - return switch (cpu_arch) { - .aarch64 => "arm64", - .x86_64 => "x86_64", - else => unreachable, - }; - } - - inline fn fmtAbi(abi: std.Target.Abi) ?[]const u8 { - return switch (abi) { - .none => null, - .simulator => "simulator", - .macabi => "maccatalyst", - else => unreachable, - }; - } - - pub fn toAppleTargetTriple( - allocator: Allocator, - cpu_arch: std.Target.Cpu.Arch, - os_tag: std.Target.Os.Tag, - abi: std.Target.Abi, - ) ![]const u8 { - const cpu_arch_s = fmtCpuArch(cpu_arch); - const os_tag_s = @tagName(os_tag); - if (fmtAbi(abi)) |abi_s| { - return std.fmt.allocPrint(allocator, "{s}-{s}-{s}", .{ cpu_arch_s, os_tag_s, abi_s }); - } - return std.fmt.allocPrint(allocator, "{s}-{s}", .{ cpu_arch_s, os_tag_s }); - } - - fn hasValue(stack: []const []const u8, needle: []const u8) bool { - for (stack) |v| { - if (mem.eql(u8, v, needle)) return true; - } - return false; - } - - pub fn matchesTarget(self: TargetMatcher, targets: []const []const u8) bool { - for (self.target_strings.items) |t| { - if (hasValue(targets, t)) return true; - } - return false; - } - - fn matchesArch(self: TargetMatcher, archs: []const []const u8) bool { - return hasValue(archs, fmtCpuArch(self.cpu_arch)); - } }; -pub fn parseFromStub( - self: *Dylib, - allocator: Allocator, - target: std.Target, - lib_stub: LibStub, - dylib_id: u16, - dependent_libs: anytype, - name: []const u8, -) !void { - if (lib_stub.inner.len == 0) return error.NotLibStub; - - log.debug("parsing shared library from stub '{s}'", .{name}); - - const umbrella_lib = lib_stub.inner[0]; - - { - var id = try Id.default(allocator, umbrella_lib.installName()); - if (umbrella_lib.currentVersion()) |version| { - try id.parseCurrentVersion(version); - } - if (umbrella_lib.compatibilityVersion()) |version| { - try id.parseCompatibilityVersion(version); - } - self.id = id; - } - - var umbrella_libs = std.StringHashMap(void).init(allocator); - defer umbrella_libs.deinit(); - - log.debug(" (install_name '{s}')", .{umbrella_lib.installName()}); - - var matcher = try TargetMatcher.init(allocator, target); - defer matcher.deinit(); - - for (lib_stub.inner, 0..) |elem, stub_index| { - const targets = try elem.targets(allocator); - defer { - for (targets) |t| allocator.free(t); - allocator.free(targets); - } - if (!matcher.matchesTarget(targets)) continue; - - if (stub_index > 0) { - // TODO I thought that we could switch on presence of `parent-umbrella` map; - // however, turns out `libsystem_notify.dylib` is fully reexported by `libSystem.dylib` - // BUT does not feature a `parent-umbrella` map as the only sublib. Apple's bug perhaps? - try umbrella_libs.put(elem.installName(), {}); - } - - switch (elem) { - .v3 => |stub| { - if (stub.exports) |exports| { - for (exports) |exp| { - if (!matcher.matchesArch(exp.archs)) continue; - - if (exp.symbols) |symbols| { - for (symbols) |sym_name| { - try self.addSymbol(allocator, sym_name); - } - } - - if (exp.weak_symbols) |symbols| { - for (symbols) |sym_name| { - try self.addWeakSymbol(allocator, sym_name); - } - } - - if (exp.objc_classes) |objc_classes| { - for (objc_classes) |class_name| { - try self.addObjCClassSymbol(allocator, class_name); - } - } - - if (exp.objc_ivars) |objc_ivars| { - for (objc_ivars) |ivar| { - try self.addObjCIVarSymbol(allocator, ivar); - } - } - - if (exp.objc_eh_types) |objc_eh_types| { - for (objc_eh_types) |eht| { - try self.addObjCEhTypeSymbol(allocator, eht); - } - } - - // TODO track which libs were already parsed in different steps - if (exp.re_exports) |re_exports| { - for (re_exports) |lib| { - if (umbrella_libs.contains(lib)) continue; - - log.debug(" (found re-export '{s}')", .{lib}); - - const dep_id = try Id.default(allocator, lib); - try dependent_libs.writeItem(.{ .id = dep_id, .parent = dylib_id }); - } - } - } - } - }, - .v4 => |stub| { - if (stub.exports) |exports| { - for (exports) |exp| { - if (!matcher.matchesTarget(exp.targets)) continue; - - if (exp.symbols) |symbols| { - for (symbols) |sym_name| { - try self.addSymbol(allocator, sym_name); - } - } - - if (exp.weak_symbols) |symbols| { - for (symbols) |sym_name| { - try self.addWeakSymbol(allocator, sym_name); - } - } - - if (exp.objc_classes) |classes| { - for (classes) |sym_name| { - try self.addObjCClassSymbol(allocator, sym_name); - } - } - - if (exp.objc_ivars) |objc_ivars| { - for (objc_ivars) |ivar| { - try self.addObjCIVarSymbol(allocator, ivar); - } - } - - if (exp.objc_eh_types) |objc_eh_types| { - for (objc_eh_types) |eht| { - try self.addObjCEhTypeSymbol(allocator, eht); - } - } - } - } - - if (stub.reexports) |reexports| { - for (reexports) |reexp| { - if (!matcher.matchesTarget(reexp.targets)) continue; - - if (reexp.symbols) |symbols| { - for (symbols) |sym_name| { - try self.addSymbol(allocator, sym_name); - } - } - - if (reexp.weak_symbols) |symbols| { - for (symbols) |sym_name| { - try self.addWeakSymbol(allocator, sym_name); - } - } - - if (reexp.objc_classes) |classes| { - for (classes) |sym_name| { - try self.addObjCClassSymbol(allocator, sym_name); - } - } - - if (reexp.objc_ivars) |objc_ivars| { - for (objc_ivars) |ivar| { - try self.addObjCIVarSymbol(allocator, ivar); - } - } - - if (reexp.objc_eh_types) |objc_eh_types| { - for (objc_eh_types) |eht| { - try self.addObjCEhTypeSymbol(allocator, eht); - } - } - } - } - - if (stub.objc_classes) |classes| { - for (classes) |sym_name| { - try self.addObjCClassSymbol(allocator, sym_name); - } - } - - if (stub.objc_ivars) |objc_ivars| { - for (objc_ivars) |ivar| { - try self.addObjCIVarSymbol(allocator, ivar); - } - } - - if (stub.objc_eh_types) |objc_eh_types| { - for (objc_eh_types) |eht| { - try self.addObjCEhTypeSymbol(allocator, eht); - } - } - }, - } - } - - // For V4, we add dependent libs in a separate pass since some stubs such as libSystem include - // re-exports directly in the stub file. - for (lib_stub.inner) |elem| { - if (elem == .v3) break; - const stub = elem.v4; - - // TODO track which libs were already parsed in different steps - if (stub.reexported_libraries) |reexports| { - for (reexports) |reexp| { - if (!matcher.matchesTarget(reexp.targets)) continue; - - for (reexp.libraries) |lib| { - if (umbrella_libs.contains(lib)) continue; - - log.debug(" (found re-export '{s}')", .{lib}); - - const dep_id = try Id.default(allocator, lib); - try dependent_libs.writeItem(.{ .id = dep_id, .parent = dylib_id }); - } - } - } - } -} - -const Dylib = @This(); - -const std = @import("std"); const assert = std.debug.assert; +const fat = @import("fat.zig"); const fs = std.fs; const fmt = std.fmt; const log = std.log.scoped(.link); const macho = std.macho; const math = std.math; const mem = std.mem; -const fat = @import("fat.zig"); const tapi = @import("../tapi.zig"); +const trace = @import("../tracy.zig").trace; +const std = @import("std"); const Allocator = mem.Allocator; +const Dylib = @This(); +const File = @import("file.zig").File; const LibStub = tapi.LibStub; const LoadCommandIterator = macho.LoadCommandIterator; const MachO = @import("../MachO.zig"); -const Platform = @import("load_commands.zig").Platform; +const Symbol = @import("Symbol.zig"); const Tbd = tapi.Tbd; diff --git a/src/link/MachO/InternalObject.zig b/src/link/MachO/InternalObject.zig new file mode 100644 index 0000000000..e139e4efab --- /dev/null +++ b/src/link/MachO/InternalObject.zig @@ -0,0 +1,249 @@ +index: File.Index, + +sections: std.MultiArrayList(Section) = .{}, +atoms: std.ArrayListUnmanaged(Atom.Index) = .{}, +symbols: std.ArrayListUnmanaged(Symbol.Index) = .{}, + +objc_methnames: std.ArrayListUnmanaged(u8) = .{}, +objc_selrefs: [@sizeOf(u64)]u8 = [_]u8{0} ** @sizeOf(u64), + +output_symtab_ctx: MachO.SymtabCtx = .{}, + +pub fn deinit(self: *InternalObject, allocator: Allocator) void { + for (self.sections.items(.relocs)) |*relocs| { + relocs.deinit(allocator); + } + self.sections.deinit(allocator); + self.atoms.deinit(allocator); + self.symbols.deinit(allocator); + self.objc_methnames.deinit(allocator); +} + +pub fn addSymbol(self: *InternalObject, name: [:0]const u8, macho_file: *MachO) !Symbol.Index { + const gpa = macho_file.base.allocator; + try self.symbols.ensureUnusedCapacity(gpa, 1); + const off = try macho_file.string_intern.insert(gpa, name); + const gop = try macho_file.getOrCreateGlobal(off); + self.symbols.addOneAssumeCapacity().* = gop.index; + const sym = macho_file.getSymbol(gop.index); + sym.* = .{ .name = off, .file = self.index }; + return gop.index; +} + +/// Creates a fake input sections __TEXT,__objc_methname and __DATA,__objc_selrefs. +pub fn addObjcMsgsendSections(self: *InternalObject, sym_name: []const u8, macho_file: *MachO) !u32 { + const methname_atom_index = try self.addObjcMethnameSection(sym_name, macho_file); + return try self.addObjcSelrefsSection(sym_name, methname_atom_index, macho_file); +} + +fn addObjcMethnameSection(self: *InternalObject, methname: []const u8, macho_file: *MachO) !Atom.Index { + const gpa = macho_file.base.allocator; + const atom_index = try macho_file.addAtom(); + try self.atoms.append(gpa, atom_index); + + const name = try std.fmt.allocPrintZ(gpa, "__TEXT$__objc_methname${s}", .{methname}); + defer gpa.free(name); + const atom = macho_file.getAtom(atom_index).?; + atom.atom_index = atom_index; + atom.name = try macho_file.string_intern.insert(gpa, name); + atom.file = self.index; + atom.size = methname.len + 1; + atom.alignment = 0; + + const n_sect = try self.addSection(gpa, "__TEXT", "__objc_methname"); + const sect = &self.sections.items(.header)[n_sect]; + sect.flags = macho.S_CSTRING_LITERALS; + sect.size = atom.size; + sect.@"align" = 0; + atom.n_sect = n_sect; + self.sections.items(.extra)[n_sect].is_objc_methname = true; + + sect.offset = @intCast(self.objc_methnames.items.len); + try self.objc_methnames.ensureUnusedCapacity(gpa, methname.len + 1); + self.objc_methnames.writer(gpa).print("{s}\x00", .{methname}) catch unreachable; + + return atom_index; +} + +fn addObjcSelrefsSection( + self: *InternalObject, + methname: []const u8, + methname_atom_index: Atom.Index, + macho_file: *MachO, +) !Atom.Index { + const gpa = macho_file.base.allocator; + const atom_index = try macho_file.addAtom(); + try self.atoms.append(gpa, atom_index); + + const name = try std.fmt.allocPrintZ(gpa, "__DATA$__objc_selrefs${s}", .{methname}); + defer gpa.free(name); + const atom = macho_file.getAtom(atom_index).?; + atom.atom_index = atom_index; + atom.name = try macho_file.string_intern.insert(gpa, name); + atom.file = self.index; + atom.size = @sizeOf(u64); + atom.alignment = 3; + + const n_sect = try self.addSection(gpa, "__DATA", "__objc_selrefs"); + const sect = &self.sections.items(.header)[n_sect]; + sect.flags = macho.S_LITERAL_POINTERS | macho.S_ATTR_NO_DEAD_STRIP; + sect.offset = 0; + sect.size = atom.size; + sect.@"align" = 3; + atom.n_sect = n_sect; + self.sections.items(.extra)[n_sect].is_objc_selref = true; + + const relocs = &self.sections.items(.relocs)[n_sect]; + try relocs.ensureUnusedCapacity(gpa, 1); + relocs.appendAssumeCapacity(.{ + .tag = .local, + .offset = 0, + .target = methname_atom_index, + .addend = 0, + .type = .unsigned, + .meta = .{ + .pcrel = false, + .length = 3, + .symbolnum = 0, // Only used when synthesising unwind records so can be anything + .has_subtractor = false, + }, + }); + atom.relocs = .{ .pos = 0, .len = 1 }; + + return atom_index; +} + +pub fn calcSymtabSize(self: *InternalObject, macho_file: *MachO) !void { + for (self.symbols.items) |sym_index| { + const sym = macho_file.getSymbol(sym_index); + if (sym.getFile(macho_file)) |file| if (file.getIndex() != self.index) continue; + sym.flags.output_symtab = true; + if (sym.isLocal()) { + try sym.addExtra(.{ .symtab = self.output_symtab_ctx.nlocals }, macho_file); + self.output_symtab_ctx.nlocals += 1; + } else if (sym.flags.@"export") { + try sym.addExtra(.{ .symtab = self.output_symtab_ctx.nexports }, macho_file); + self.output_symtab_ctx.nexports += 1; + } else { + assert(sym.flags.import); + try sym.addExtra(.{ .symtab = self.output_symtab_ctx.nimports }, macho_file); + self.output_symtab_ctx.nimports += 1; + } + self.output_symtab_ctx.strsize += @as(u32, @intCast(sym.getName(macho_file).len + 1)); + } +} + +pub fn writeSymtab(self: InternalObject, macho_file: *MachO) void { + for (self.symbols.items) |sym_index| { + const sym = macho_file.getSymbol(sym_index); + if (sym.getFile(macho_file)) |file| if (file.getIndex() != self.index) continue; + const idx = sym.getOutputSymtabIndex(macho_file) orelse continue; + const n_strx = @as(u32, @intCast(macho_file.strtab.items.len)); + macho_file.strtab.appendSliceAssumeCapacity(sym.getName(macho_file)); + macho_file.strtab.appendAssumeCapacity(0); + const out_sym = &macho_file.symtab.items[idx]; + out_sym.n_strx = n_strx; + sym.setOutputSym(macho_file, out_sym); + } +} + +fn addSection(self: *InternalObject, allocator: Allocator, segname: []const u8, sectname: []const u8) !u32 { + const n_sect = @as(u32, @intCast(try self.sections.addOne(allocator))); + self.sections.set(n_sect, .{ + .header = .{ + .sectname = MachO.makeStaticString(sectname), + .segname = MachO.makeStaticString(segname), + }, + }); + return n_sect; +} + +pub fn getSectionData(self: *const InternalObject, index: u32) []const u8 { + const slice = self.sections.slice(); + assert(index < slice.items(.header).len); + const sect = slice.items(.header)[index]; + const extra = slice.items(.extra)[index]; + if (extra.is_objc_methname) { + return self.objc_methnames.items[sect.offset..][0..sect.size]; + } else if (extra.is_objc_selref) { + return &self.objc_selrefs; + } else @panic("ref to non-existent section"); +} + +pub fn asFile(self: *InternalObject) File { + return .{ .internal = self }; +} + +const FormatContext = struct { + self: *InternalObject, + macho_file: *MachO, +}; + +pub fn fmtAtoms(self: *InternalObject, macho_file: *MachO) std.fmt.Formatter(formatAtoms) { + return .{ .data = .{ + .self = self, + .macho_file = macho_file, + } }; +} + +fn formatAtoms( + ctx: FormatContext, + comptime unused_fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, +) !void { + _ = unused_fmt_string; + _ = options; + try writer.writeAll(" atoms\n"); + for (ctx.self.atoms.items) |atom_index| { + const atom = ctx.macho_file.getAtom(atom_index).?; + try writer.print(" {}\n", .{atom.fmt(ctx.macho_file)}); + } +} + +pub fn fmtSymtab(self: *InternalObject, macho_file: *MachO) std.fmt.Formatter(formatSymtab) { + return .{ .data = .{ + .self = self, + .macho_file = macho_file, + } }; +} + +fn formatSymtab( + ctx: FormatContext, + comptime unused_fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, +) !void { + _ = unused_fmt_string; + _ = options; + try writer.writeAll(" symbols\n"); + for (ctx.self.symbols.items) |index| { + const global = ctx.macho_file.getSymbol(index); + try writer.print(" {}\n", .{global.fmt(ctx.macho_file)}); + } +} + +const Section = struct { + header: macho.section_64, + relocs: std.ArrayListUnmanaged(Relocation) = .{}, + extra: Extra = .{}, + + const Extra = packed struct { + is_objc_methname: bool = false, + is_objc_selref: bool = false, + }; +}; + +const assert = std.debug.assert; +const macho = std.macho; +const mem = std.mem; +const std = @import("std"); + +const Allocator = std.mem.Allocator; +const Atom = @import("Atom.zig"); +const File = @import("file.zig").File; +const InternalObject = @This(); +const MachO = @import("../MachO.zig"); +const Object = @import("Object.zig"); +const Relocation = @import("Relocation.zig"); +const Symbol = @import("Symbol.zig"); diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index ad069b845e..deb17ba80b 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -1,1130 +1,2093 @@ -//! Represents an input relocatable Object file. -//! Each Object is fully loaded into memory for easier -//! access into different data within. - -name: []const u8, +archive: ?[]const u8 = null, +path: []const u8, mtime: u64, -contents: []align(@alignOf(u64)) const u8, +data: []const u8, +index: File.Index, -header: macho.mach_header_64 = undefined, - -/// Symtab and strtab might not exist for empty object files so we use an optional -/// to signal this. -in_symtab: ?[]align(1) const macho.nlist_64 = null, -in_strtab: ?[]const u8 = null, - -/// Output symtab is sorted so that we can easily reference symbols following each -/// other in address space. -/// The length of the symtab is at least of the input symtab length however there -/// can be trailing section symbols. -symtab: []macho.nlist_64 = undefined, -/// Can be undefined as set together with in_symtab. -source_symtab_lookup: []u32 = undefined, -/// Can be undefined as set together with in_symtab. -reverse_symtab_lookup: []u32 = undefined, -/// Can be undefined as set together with in_symtab. -source_address_lookup: []i64 = undefined, -/// Can be undefined as set together with in_symtab. -source_section_index_lookup: []Entry = undefined, -/// Can be undefined as set together with in_symtab. -strtab_lookup: []u32 = undefined, -/// Can be undefined as set together with in_symtab. -atom_by_index_table: []?Atom.Index = undefined, -/// Can be undefined as set together with in_symtab. -globals_lookup: []i64 = undefined, -/// Can be undefined as set together with in_symtab. -relocs_lookup: []Entry = undefined, - -/// All relocations sorted and flatened, sorted by address descending -/// per section. -relocations: std.ArrayListUnmanaged(macho.relocation_info) = .{}, -/// Beginning index to the relocations array for each input section -/// defined within this Object file. -section_relocs_lookup: std.ArrayListUnmanaged(u32) = .{}, - -/// Data-in-code records sorted by address. -data_in_code: std.ArrayListUnmanaged(macho.data_in_code_entry) = .{}, +header: ?macho.mach_header_64 = null, +sections: std.MultiArrayList(Section) = .{}, +symtab: std.MultiArrayList(Nlist) = .{}, +strtab: []const u8 = &[0]u8{}, +symbols: std.ArrayListUnmanaged(Symbol.Index) = .{}, atoms: std.ArrayListUnmanaged(Atom.Index) = .{}, -exec_atoms: std.ArrayListUnmanaged(Atom.Index) = .{}, -eh_frame_sect_id: ?u8 = null, -eh_frame_relocs_lookup: std.AutoArrayHashMapUnmanaged(u32, Record) = .{}, -eh_frame_records_lookup: std.AutoArrayHashMapUnmanaged(SymbolWithLoc, u32) = .{}, +platform: ?MachO.Options.Platform = null, +dwarf_info: ?DwarfInfo = null, +stab_files: std.ArrayListUnmanaged(StabFile) = .{}, -unwind_info_sect_id: ?u8 = null, -unwind_relocs_lookup: []Record = undefined, -unwind_records_lookup: std.AutoHashMapUnmanaged(SymbolWithLoc, u32) = .{}, +eh_frame_sect_index: ?u8 = null, +compact_unwind_sect_index: ?u8 = null, +cies: std.ArrayListUnmanaged(Cie) = .{}, +fdes: std.ArrayListUnmanaged(Fde) = .{}, +eh_frame_data: std.ArrayListUnmanaged(u8) = .{}, +unwind_records: std.ArrayListUnmanaged(UnwindInfo.Record.Index) = .{}, -const Entry = struct { - start: u32 = 0, - len: u32 = 0, -}; +alive: bool = true, +hidden: bool = false, +num_rebase_relocs: u32 = 0, +num_bind_relocs: u32 = 0, +num_weak_bind_relocs: u32 = 0, -const Record = struct { - dead: bool, - reloc: Entry, -}; +output_symtab_ctx: MachO.SymtabCtx = .{}, -pub fn isObject(file: std.fs.File) bool { - const reader = file.reader(); - const hdr = reader.readStruct(macho.mach_header_64) catch return false; - defer file.seekTo(0) catch {}; - return hdr.filetype == macho.MH_OBJECT; +pub fn deinit(self: *Object, allocator: Allocator) void { + for (self.sections.items(.relocs), self.sections.items(.subsections)) |*relocs, *sub| { + relocs.deinit(allocator); + sub.deinit(allocator); + } + self.sections.deinit(allocator); + self.symtab.deinit(allocator); + self.symbols.deinit(allocator); + self.atoms.deinit(allocator); + self.cies.deinit(allocator); + self.fdes.deinit(allocator); + self.eh_frame_data.deinit(allocator); + self.unwind_records.deinit(allocator); + if (self.dwarf_info) |*dw| dw.deinit(allocator); + for (self.stab_files.items) |*sf| { + sf.stabs.deinit(allocator); + } + self.stab_files.deinit(allocator); } -pub fn deinit(self: *Object, gpa: Allocator) void { - self.atoms.deinit(gpa); - self.exec_atoms.deinit(gpa); - gpa.free(self.name); - gpa.free(self.contents); - if (self.in_symtab) |_| { - gpa.free(self.source_symtab_lookup); - gpa.free(self.reverse_symtab_lookup); - gpa.free(self.source_address_lookup); - gpa.free(self.source_section_index_lookup); - gpa.free(self.strtab_lookup); - gpa.free(self.symtab); - gpa.free(self.atom_by_index_table); - gpa.free(self.globals_lookup); - gpa.free(self.relocs_lookup); - } - self.eh_frame_relocs_lookup.deinit(gpa); - self.eh_frame_records_lookup.deinit(gpa); - if (self.hasUnwindRecords()) { - gpa.free(self.unwind_relocs_lookup); - } - self.unwind_records_lookup.deinit(gpa); - self.relocations.deinit(gpa); - self.section_relocs_lookup.deinit(gpa); - self.data_in_code.deinit(gpa); -} +pub fn parse(self: *Object, macho_file: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); -pub fn parse(self: *Object, allocator: Allocator) !void { - var stream = std.io.fixedBufferStream(self.contents); + const gpa = macho_file.base.allocator; + var stream = std.io.fixedBufferStream(self.data); const reader = stream.reader(); self.header = try reader.readStruct(macho.mach_header_64); - var it = LoadCommandIterator{ - .ncmds = self.header.ncmds, - .buffer = self.contents[@sizeOf(macho.mach_header_64)..][0..self.header.sizeofcmds], - }; - const nsects = self.getSourceSections().len; + if (self.getLoadCommand(.SEGMENT_64)) |lc| { + const sections = lc.getSections(); + try self.sections.ensureUnusedCapacity(gpa, sections.len); + for (sections) |sect| { + const index = try self.sections.addOne(gpa); + self.sections.set(index, .{ .header = sect }); - // Prepopulate relocations per section lookup table. - try self.section_relocs_lookup.resize(allocator, nsects); - @memset(self.section_relocs_lookup.items, 0); - - // Parse symtab. - const symtab = while (it.next()) |cmd| switch (cmd.cmd()) { - .SYMTAB => break cmd.cast(macho.symtab_command).?, - else => {}, - } else return; - - self.in_symtab = @as([*]align(1) const macho.nlist_64, @ptrCast(self.contents.ptr + symtab.symoff))[0..symtab.nsyms]; - self.in_strtab = self.contents[symtab.stroff..][0..symtab.strsize]; - - self.symtab = try allocator.alloc(macho.nlist_64, self.in_symtab.?.len + nsects); - self.source_symtab_lookup = try allocator.alloc(u32, self.in_symtab.?.len); - self.reverse_symtab_lookup = try allocator.alloc(u32, self.in_symtab.?.len); - self.strtab_lookup = try allocator.alloc(u32, self.in_symtab.?.len); - self.globals_lookup = try allocator.alloc(i64, self.in_symtab.?.len); - self.atom_by_index_table = try allocator.alloc(?Atom.Index, self.in_symtab.?.len + nsects); - self.relocs_lookup = try allocator.alloc(Entry, self.in_symtab.?.len + nsects); - // This is wasteful but we need to be able to lookup source symbol address after stripping and - // allocating of sections. - self.source_address_lookup = try allocator.alloc(i64, self.in_symtab.?.len); - self.source_section_index_lookup = try allocator.alloc(Entry, nsects); - - for (self.symtab) |*sym| { - sym.* = .{ - .n_value = 0, - .n_sect = 0, - .n_desc = 0, - .n_strx = 0, - .n_type = 0, - }; - } - - @memset(self.globals_lookup, -1); - @memset(self.atom_by_index_table, null); - @memset(self.source_section_index_lookup, .{}); - @memset(self.relocs_lookup, .{}); - - // You would expect that the symbol table is at least pre-sorted based on symbol's type: - // local < extern defined < undefined. Unfortunately, this is not guaranteed! For instance, - // the GO compiler does not necessarily respect that therefore we sort immediately by type - // and address within. - var sorted_all_syms = try std.ArrayList(SymbolAtIndex).initCapacity(allocator, self.in_symtab.?.len); - defer sorted_all_syms.deinit(); - - for (self.in_symtab.?, 0..) |_, index| { - sorted_all_syms.appendAssumeCapacity(.{ .index = @as(u32, @intCast(index)) }); - } - - // We sort by type: defined < undefined, and - // afterwards by address in each group. Normally, dysymtab should - // be enough to guarantee the sort, but turns out not every compiler - // is kind enough to specify the symbols in the correct order. - mem.sort(SymbolAtIndex, sorted_all_syms.items, self, SymbolAtIndex.lessThan); - - var prev_sect_id: u8 = 0; - var section_index_lookup: ?Entry = null; - for (sorted_all_syms.items, 0..) |sym_id, i| { - const sym = sym_id.getSymbol(self); - - if (section_index_lookup) |*lookup| { - if (sym.n_sect != prev_sect_id or sym.undf()) { - self.source_section_index_lookup[prev_sect_id - 1] = lookup.*; - section_index_lookup = null; - } else { - lookup.len += 1; + if (mem.eql(u8, sect.sectName(), "__eh_frame")) { + self.eh_frame_sect_index = @intCast(index); + } else if (mem.eql(u8, sect.sectName(), "__compact_unwind")) { + self.compact_unwind_sect_index = @intCast(index); } } - if (sym.sect() and section_index_lookup == null) { - section_index_lookup = .{ .start = @as(u32, @intCast(i)), .len = 1 }; + } + if (self.getLoadCommand(.SYMTAB)) |lc| { + const cmd = lc.cast(macho.symtab_command).?; + self.strtab = self.data[cmd.stroff..][0..cmd.strsize]; + + const symtab = @as([*]align(1) const macho.nlist_64, @ptrCast(self.data.ptr + cmd.symoff))[0..cmd.nsyms]; + try self.symtab.ensureUnusedCapacity(gpa, symtab.len); + for (symtab) |nlist| { + self.symtab.appendAssumeCapacity(.{ + .nlist = nlist, + .atom = 0, + .size = 0, + }); } - - prev_sect_id = sym.n_sect; - - self.symtab[i] = sym; - self.source_symtab_lookup[i] = sym_id.index; - self.reverse_symtab_lookup[sym_id.index] = @as(u32, @intCast(i)); - self.source_address_lookup[i] = if (sym.undf()) -1 else @as(i64, @intCast(sym.n_value)); - - const sym_name_len = mem.sliceTo(@as([*:0]const u8, @ptrCast(self.in_strtab.?.ptr + sym.n_strx)), 0).len + 1; - self.strtab_lookup[i] = @as(u32, @intCast(sym_name_len)); } - // If there were no undefined symbols, make sure we populate the - // source section index lookup for the last scanned section. - if (section_index_lookup) |lookup| { - self.source_section_index_lookup[prev_sect_id - 1] = lookup; - } + const NlistIdx = struct { + nlist: macho.nlist_64, + idx: usize, - // Parse __TEXT,__eh_frame header if one exists - self.eh_frame_sect_id = self.getSourceSectionIndexByName("__TEXT", "__eh_frame"); - - // Parse __LD,__compact_unwind header if one exists - self.unwind_info_sect_id = self.getSourceSectionIndexByName("__LD", "__compact_unwind"); - if (self.hasUnwindRecords()) { - self.unwind_relocs_lookup = try allocator.alloc(Record, self.getUnwindRecords().len); - @memset(self.unwind_relocs_lookup, .{ .dead = true, .reloc = .{} }); - } -} - -const SymbolAtIndex = struct { - index: u32, - - const Context = *const Object; - - fn getSymbol(self: SymbolAtIndex, ctx: Context) macho.nlist_64 { - return ctx.in_symtab.?[self.index]; - } - - fn getSymbolName(self: SymbolAtIndex, ctx: Context) []const u8 { - const off = self.getSymbol(ctx).n_strx; - return mem.sliceTo(@as([*:0]const u8, @ptrCast(ctx.in_strtab.?.ptr + off)), 0); - } - - fn getSymbolSeniority(self: SymbolAtIndex, ctx: Context) u2 { - const sym = self.getSymbol(ctx); - if (!sym.ext()) { - const sym_name = self.getSymbolName(ctx); - if (mem.startsWith(u8, sym_name, "l") or mem.startsWith(u8, sym_name, "L")) return 3; - return 2; - } - if (sym.weakDef() or sym.pext()) return 1; - return 0; - } - - /// Performs lexicographic-like check. - /// * lhs and rhs defined - /// * if lhs == rhs - /// * if lhs.n_sect == rhs.n_sect - /// * ext < weak < local < temp - /// * lhs.n_sect < rhs.n_sect - /// * lhs < rhs - /// * !rhs is undefined - fn lessThan(ctx: Context, lhs_index: SymbolAtIndex, rhs_index: SymbolAtIndex) bool { - const lhs = lhs_index.getSymbol(ctx); - const rhs = rhs_index.getSymbol(ctx); - if (lhs.sect() and rhs.sect()) { - if (lhs.n_value == rhs.n_value) { - if (lhs.n_sect == rhs.n_sect) { - const lhs_senior = lhs_index.getSymbolSeniority(ctx); - const rhs_senior = rhs_index.getSymbolSeniority(ctx); - if (lhs_senior == rhs_senior) { - return lessThanByNStrx(ctx, lhs_index, rhs_index); - } else return lhs_senior < rhs_senior; - } else return lhs.n_sect < rhs.n_sect; - } else return lhs.n_value < rhs.n_value; - } else if (lhs.undf() and rhs.undf()) { - return lessThanByNStrx(ctx, lhs_index, rhs_index); - } else return rhs.undf(); - } - - fn lessThanByNStrx(ctx: Context, lhs: SymbolAtIndex, rhs: SymbolAtIndex) bool { - return lhs.getSymbol(ctx).n_strx < rhs.getSymbol(ctx).n_strx; - } -}; - -fn filterSymbolsBySection(symbols: []macho.nlist_64, n_sect: u8) struct { - index: u32, - len: u32, -} { - const FirstMatch = struct { - n_sect: u8, - - pub fn predicate(pred: @This(), symbol: macho.nlist_64) bool { - return symbol.n_sect == pred.n_sect; - } - }; - const FirstNonMatch = struct { - n_sect: u8, - - pub fn predicate(pred: @This(), symbol: macho.nlist_64) bool { - return symbol.n_sect != pred.n_sect; - } - }; - - const index = MachO.lsearch(macho.nlist_64, symbols, FirstMatch{ - .n_sect = n_sect, - }); - const len = MachO.lsearch(macho.nlist_64, symbols[index..], FirstNonMatch{ - .n_sect = n_sect, - }); - - return .{ .index = @as(u32, @intCast(index)), .len = @as(u32, @intCast(len)) }; -} - -fn filterSymbolsByAddress(symbols: []macho.nlist_64, start_addr: u64, end_addr: u64) struct { - index: u32, - len: u32, -} { - const Predicate = struct { - addr: u64, - - pub fn predicate(pred: @This(), symbol: macho.nlist_64) bool { - return symbol.n_value >= pred.addr; - } - }; - - const index = MachO.lsearch(macho.nlist_64, symbols, Predicate{ - .addr = start_addr, - }); - const len = MachO.lsearch(macho.nlist_64, symbols[index..], Predicate{ - .addr = end_addr, - }); - - return .{ .index = @as(u32, @intCast(index)), .len = @as(u32, @intCast(len)) }; -} - -const SortedSection = struct { - header: macho.section_64, - id: u8, -}; - -fn sectionLessThanByAddress(ctx: void, lhs: SortedSection, rhs: SortedSection) bool { - _ = ctx; - if (lhs.header.addr == rhs.header.addr) { - return lhs.id < rhs.id; - } - return lhs.header.addr < rhs.header.addr; -} - -pub const SplitIntoAtomsError = error{ - OutOfMemory, - EndOfStream, - MissingEhFrameSection, - BadDwarfCfi, -}; - -pub fn splitIntoAtoms(self: *Object, macho_file: *MachO, object_id: u32) SplitIntoAtomsError!void { - const comp = macho_file.base.comp; - const gpa = comp.gpa; - log.debug("splitting object({d}, {s}) into atoms", .{ object_id, self.name }); - - try self.splitRegularSections(macho_file, object_id); - try self.parseEhFrameSection(macho_file, object_id); - try self.parseUnwindInfo(macho_file, object_id); - try self.parseDataInCode(gpa); -} - -/// Splits input regular sections into Atoms. -/// If the Object was compiled with `MH_SUBSECTIONS_VIA_SYMBOLS`, splits section -/// into subsections where each subsection then represents an Atom. -pub fn splitRegularSections(self: *Object, macho_file: *MachO, object_id: u32) !void { - const comp = macho_file.base.comp; - const gpa = comp.gpa; - const target = macho_file.base.comp.root_mod.resolved_target.result; - - const sections = self.getSourceSections(); - for (sections, 0..) |sect, id| { - if (sect.isDebug()) continue; - const out_sect_id = (try Atom.getOutputSection(macho_file, sect)) orelse { - log.debug(" unhandled section '{s},{s}'", .{ sect.segName(), sect.sectName() }); - continue; - }; - if (sect.size == 0) continue; - - const sect_id = @as(u8, @intCast(id)); - const sym = self.getSectionAliasSymbolPtr(sect_id); - sym.* = .{ - .n_strx = 0, - .n_type = macho.N_SECT, - .n_sect = out_sect_id + 1, - .n_desc = 0, - .n_value = sect.addr, - }; - } - - if (self.in_symtab == null) { - for (sections, 0..) |sect, id| { - if (sect.isDebug()) continue; - const out_sect_id = (try Atom.getOutputSection(macho_file, sect)) orelse continue; - if (sect.size == 0) continue; - - const sect_id: u8 = @intCast(id); - const sym_index = self.getSectionAliasSymbolIndex(sect_id); - const atom_index = try self.createAtomFromSubsection( - macho_file, - object_id, - sym_index, - sym_index, - 1, - sect.size, - Alignment.fromLog2Units(sect.@"align"), - out_sect_id, - ); - macho_file.addAtomToSection(atom_index); - } - return; - } - - // Well, shit, sometimes compilers skip the dysymtab load command altogether, meaning we - // have to infer the start of undef section in the symtab ourselves. - const iundefsym = blk: { - const dysymtab = self.getDysymtab() orelse { - var iundefsym: usize = self.in_symtab.?.len; - while (iundefsym > 0) : (iundefsym -= 1) { - const sym = self.symtab[iundefsym - 1]; - if (sym.sect()) break; + fn rank(ctx: *const Object, nl: macho.nlist_64) u8 { + if (!nl.ext()) { + const name = ctx.getString(nl.n_strx); + if (name.len == 0) return 5; + if (name[0] == 'l' or name[0] == 'L') return 4; + return 3; } - break :blk iundefsym; - }; - break :blk dysymtab.iundefsym; - }; + return if (nl.weakDef()) 2 else 1; + } - // We only care about defined symbols, so filter every other out. - const symtab = try gpa.dupe(macho.nlist_64, self.symtab[0..iundefsym]); - defer gpa.free(symtab); - - const subsections_via_symbols = self.header.flags & macho.MH_SUBSECTIONS_VIA_SYMBOLS != 0; - - // Sort section headers by address. - var sorted_sections = try gpa.alloc(SortedSection, sections.len); - defer gpa.free(sorted_sections); - - for (sections, 0..) |sect, id| { - sorted_sections[id] = .{ .header = sect, .id = @as(u8, @intCast(id)) }; - } - - mem.sort(SortedSection, sorted_sections, {}, sectionLessThanByAddress); - - var sect_sym_index: u32 = 0; - for (sorted_sections) |section| { - const sect = section.header; - if (sect.isDebug()) continue; - - const sect_id = section.id; - log.debug("splitting section '{s},{s}' into atoms", .{ sect.segName(), sect.sectName() }); - - // Get output segment/section in the final artifact. - const out_sect_id = (try Atom.getOutputSection(macho_file, sect)) orelse continue; - - log.debug(" output sect({d}, '{s},{s}')", .{ - out_sect_id + 1, - macho_file.sections.items(.header)[out_sect_id].segName(), - macho_file.sections.items(.header)[out_sect_id].sectName(), - }); - - try self.parseRelocs(gpa, section.id); - - const cpu_arch = target.cpu.arch; - const sect_loc = filterSymbolsBySection(symtab[sect_sym_index..], sect_id + 1); - const sect_start_index = sect_sym_index + sect_loc.index; - - sect_sym_index += sect_loc.len; - - if (sect.size == 0) continue; - if (subsections_via_symbols and sect_loc.len > 0) { - // If the first nlist does not match the start of the section, - // then we need to encapsulate the memory range [section start, first symbol) - // as a temporary symbol and insert the matching Atom. - const first_sym = symtab[sect_start_index]; - if (first_sym.n_value > sect.addr) { - const sym_index = self.getSectionAliasSymbolIndex(sect_id); - const atom_size = first_sym.n_value - sect.addr; - const atom_index = try self.createAtomFromSubsection( - macho_file, - object_id, - sym_index, - sym_index, - 1, - atom_size, - Alignment.fromLog2Units(sect.@"align"), - out_sect_id, - ); - if (!sect.isZerofill()) { - try self.cacheRelocs(macho_file, atom_index); + fn lessThan(ctx: *const Object, lhs: @This(), rhs: @This()) bool { + if (lhs.nlist.n_sect == rhs.nlist.n_sect) { + if (lhs.nlist.n_value == rhs.nlist.n_value) { + return rank(ctx, lhs.nlist) < rank(ctx, rhs.nlist); } - macho_file.addAtomToSection(atom_index); + return lhs.nlist.n_value < rhs.nlist.n_value; } + return lhs.nlist.n_sect < rhs.nlist.n_sect; + } + }; - var next_sym_index = sect_start_index; - while (next_sym_index < sect_start_index + sect_loc.len) { - const next_sym = symtab[next_sym_index]; - const addr = next_sym.n_value; - const atom_loc = filterSymbolsByAddress(symtab[next_sym_index..], addr, addr + 1); - assert(atom_loc.len > 0); - const atom_sym_index = atom_loc.index + next_sym_index; - const nsyms_trailing = atom_loc.len; - next_sym_index += atom_loc.len; + var nlists = try std.ArrayList(NlistIdx).initCapacity(gpa, self.symtab.items(.nlist).len); + defer nlists.deinit(); + for (self.symtab.items(.nlist), 0..) |nlist, i| { + if (nlist.stab() or !nlist.sect()) continue; + nlists.appendAssumeCapacity(.{ .nlist = nlist, .idx = i }); + } + mem.sort(NlistIdx, nlists.items, self, NlistIdx.lessThan); - const atom_size = if (next_sym_index < sect_start_index + sect_loc.len) - symtab[next_sym_index].n_value - addr - else - sect.addr + sect.size - addr; + if (self.hasSubsections()) { + try self.initSubsections(nlists.items, macho_file); + } else { + try self.initSections(nlists.items, macho_file); + } - const atom_align = Alignment.fromLog2Units(if (addr > 0) - @min(@ctz(addr), sect.@"align") - else - sect.@"align"); + try self.initLiteralSections(macho_file); + try self.linkNlistToAtom(macho_file); - const atom_index = try self.createAtomFromSubsection( - macho_file, - object_id, - atom_sym_index, - atom_sym_index, - nsyms_trailing, - atom_size, - atom_align, - out_sect_id, - ); + try self.sortAtoms(macho_file); + try self.initSymbols(macho_file); + try self.initSymbolStabs(nlists.items, macho_file); + try self.initRelocs(macho_file); - // TODO rework this at the relocation level - if (cpu_arch == .x86_64 and addr == sect.addr) { - // In x86_64 relocs, it can so happen that the compiler refers to the same - // atom by both the actual assigned symbol and the start of the section. In this - // case, we need to link the two together so add an alias. - const alias_index = self.getSectionAliasSymbolIndex(sect_id); - self.atom_by_index_table[alias_index] = atom_index; - } - if (!sect.isZerofill()) { - try self.cacheRelocs(macho_file, atom_index); - } - macho_file.addAtomToSection(atom_index); - } - } else { - const alias_index = self.getSectionAliasSymbolIndex(sect_id); - const atom_index = try self.createAtomFromSubsection( - macho_file, - object_id, - alias_index, - sect_start_index, - sect_loc.len, - sect.size, - Alignment.fromLog2Units(sect.@"align"), - out_sect_id, - ); - if (!sect.isZerofill()) { - try self.cacheRelocs(macho_file, atom_index); - } - macho_file.addAtomToSection(atom_index); + if (self.eh_frame_sect_index) |index| { + try self.initEhFrameRecords(index, macho_file); + } + + if (self.compact_unwind_sect_index) |index| { + try self.initUnwindRecords(index, macho_file); + } + + self.initPlatform(); + try self.initDwarfInfo(macho_file); + + for (self.atoms.items) |atom_index| { + const atom = macho_file.getAtom(atom_index).?; + const isec = atom.getInputSection(macho_file); + if (mem.eql(u8, isec.sectName(), "__eh_frame") or + mem.eql(u8, isec.sectName(), "__compact_unwind") or + isec.attrs() & macho.S_ATTR_DEBUG != 0) + { + atom.flags.alive = false; } } } -fn createAtomFromSubsection( - self: *Object, - macho_file: *MachO, - object_id: u32, - sym_index: u32, - inner_sym_index: u32, - inner_nsyms_trailing: u32, +inline fn isLiteral(sect: macho.section_64) bool { + return switch (sect.type()) { + macho.S_CSTRING_LITERALS, + macho.S_4BYTE_LITERALS, + macho.S_8BYTE_LITERALS, + macho.S_16BYTE_LITERALS, + macho.S_LITERAL_POINTERS, + => true, + else => false, + }; +} + +fn initSubsections(self: *Object, nlists: anytype, macho_file: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + const gpa = macho_file.base.allocator; + const slice = self.sections.slice(); + for (slice.items(.header), slice.items(.subsections), 0..) |sect, *subsections, n_sect| { + if (isLiteral(sect)) continue; + + const nlist_start = for (nlists, 0..) |nlist, i| { + if (nlist.nlist.n_sect - 1 == n_sect) break i; + } else nlists.len; + const nlist_end = for (nlists[nlist_start..], nlist_start..) |nlist, i| { + if (nlist.nlist.n_sect - 1 != n_sect) break i; + } else nlists.len; + + if (nlist_start == nlist_end or nlists[nlist_start].nlist.n_value > sect.addr) { + const name = try std.fmt.allocPrintZ(gpa, "{s}${s}", .{ sect.segName(), sect.sectName() }); + defer gpa.free(name); + const size = if (nlist_start == nlist_end) sect.size else nlists[nlist_start].nlist.n_value - sect.addr; + const atom_index = try self.addAtom(.{ + .name = name, + .n_sect = @intCast(n_sect), + .off = 0, + .size = size, + .alignment = sect.@"align", + }, macho_file); + try subsections.append(gpa, .{ + .atom = atom_index, + .off = 0, + }); + } + + var idx: usize = nlist_start; + while (idx < nlist_end) { + const alias_start = idx; + const nlist = nlists[alias_start]; + + while (idx < nlist_end and + nlists[idx].nlist.n_value == nlist.nlist.n_value) : (idx += 1) + {} + + const size = if (idx < nlist_end) + nlists[idx].nlist.n_value - nlist.nlist.n_value + else + sect.addr + sect.size - nlist.nlist.n_value; + const alignment = if (nlist.nlist.n_value > 0) + @min(@ctz(nlist.nlist.n_value), sect.@"align") + else + sect.@"align"; + const atom_index = try self.addAtom(.{ + .name = self.getString(nlist.nlist.n_strx), + .n_sect = @intCast(n_sect), + .off = nlist.nlist.n_value - sect.addr, + .size = size, + .alignment = alignment, + }, macho_file); + try subsections.append(gpa, .{ + .atom = atom_index, + .off = nlist.nlist.n_value - sect.addr, + }); + + for (alias_start..idx) |i| { + self.symtab.items(.size)[nlists[i].idx] = size; + } + } + } +} + +fn initSections(self: *Object, nlists: anytype, macho_file: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + const gpa = macho_file.base.allocator; + const slice = self.sections.slice(); + + try self.atoms.ensureUnusedCapacity(gpa, self.sections.items(.header).len); + + for (slice.items(.header), 0..) |sect, n_sect| { + if (isLiteral(sect)) continue; + + const name = try std.fmt.allocPrintZ(gpa, "{s}${s}", .{ sect.segName(), sect.sectName() }); + defer gpa.free(name); + + const atom_index = try self.addAtom(.{ + .name = name, + .n_sect = @intCast(n_sect), + .off = 0, + .size = sect.size, + .alignment = sect.@"align", + }, macho_file); + try slice.items(.subsections)[n_sect].append(gpa, .{ .atom = atom_index, .off = 0 }); + + const nlist_start = for (nlists, 0..) |nlist, i| { + if (nlist.nlist.n_sect - 1 == n_sect) break i; + } else nlists.len; + const nlist_end = for (nlists[nlist_start..], nlist_start..) |nlist, i| { + if (nlist.nlist.n_sect - 1 != n_sect) break i; + } else nlists.len; + + var idx: usize = nlist_start; + while (idx < nlist_end) { + const nlist = nlists[idx]; + + while (idx < nlist_end and + nlists[idx].nlist.n_value == nlist.nlist.n_value) : (idx += 1) + {} + + const size = if (idx < nlist_end) + nlists[idx].nlist.n_value - nlist.nlist.n_value + else + sect.addr + sect.size - nlist.nlist.n_value; + + for (nlist_start..idx) |i| { + self.symtab.items(.size)[nlists[i].idx] = size; + } + } + } +} + +const AddAtomArgs = struct { + name: [:0]const u8, + n_sect: u8, + off: u64, size: u64, - alignment: Alignment, - out_sect_id: u8, -) !Atom.Index { - const comp = macho_file.base.comp; - const gpa = comp.gpa; - const atom_index = try macho_file.createAtom(sym_index, .{ - .size = size, - .alignment = alignment, - }); - const atom = macho_file.getAtomPtr(atom_index); - atom.inner_sym_index = inner_sym_index; - atom.inner_nsyms_trailing = inner_nsyms_trailing; - atom.file = object_id + 1; - self.symtab[sym_index].n_sect = out_sect_id + 1; - - log.debug("creating ATOM(%{d}, '{s}') in sect({d}, '{s},{s}') in object({d})", .{ - sym_index, - self.getSymbolName(sym_index), - out_sect_id + 1, - macho_file.sections.items(.header)[out_sect_id].segName(), - macho_file.sections.items(.header)[out_sect_id].sectName(), - object_id, - }); + alignment: u32, +}; +fn addAtom(self: *Object, args: AddAtomArgs, macho_file: *MachO) !Atom.Index { + const gpa = macho_file.base.allocator; + const atom_index = try macho_file.addAtom(); + const atom = macho_file.getAtom(atom_index).?; + atom.file = self.index; + atom.atom_index = atom_index; + atom.name = try macho_file.string_intern.insert(gpa, args.name); + atom.n_sect = args.n_sect; + atom.size = args.size; + atom.alignment = args.alignment; + atom.off = args.off; try self.atoms.append(gpa, atom_index); - self.atom_by_index_table[sym_index] = atom_index; - - var it = Atom.getInnerSymbolsIterator(macho_file, atom_index); - while (it.next()) |sym_loc| { - const inner = macho_file.getSymbolPtr(sym_loc); - inner.n_sect = out_sect_id + 1; - self.atom_by_index_table[sym_loc.sym_index] = atom_index; - } - - const out_sect = macho_file.sections.items(.header)[out_sect_id]; - if (out_sect.isCode() and - mem.eql(u8, "__TEXT", out_sect.segName()) and - mem.eql(u8, "__text", out_sect.sectName())) - { - // TODO currently assuming a single section for executable machine code - try self.exec_atoms.append(gpa, atom_index); - } - return atom_index; } -fn filterRelocs( - relocs: []align(1) const macho.relocation_info, - start_addr: u64, - end_addr: u64, -) Entry { - const Predicate = struct { - addr: u64, +fn initLiteralSections(self: *Object, macho_file: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + // TODO here we should split into equal-sized records, hash the contents, and then + // deduplicate - ICF. + // For now, we simply cover each literal section with one large atom. + const gpa = macho_file.base.allocator; + const slice = self.sections.slice(); - pub fn predicate(self: @This(), rel: macho.relocation_info) bool { - return rel.r_address >= self.addr; + try self.atoms.ensureUnusedCapacity(gpa, self.sections.items(.header).len); + + for (slice.items(.header), 0..) |sect, n_sect| { + if (!isLiteral(sect)) continue; + + const name = try std.fmt.allocPrintZ(gpa, "{s}${s}", .{ sect.segName(), sect.sectName() }); + defer gpa.free(name); + + const atom_index = try self.addAtom(.{ + .name = name, + .n_sect = @intCast(n_sect), + .off = 0, + .size = sect.size, + .alignment = sect.@"align", + }, macho_file); + try slice.items(.subsections)[n_sect].append(gpa, .{ .atom = atom_index, .off = 0 }); + } +} + +pub fn findAtom(self: Object, addr: u64) ?Atom.Index { + const tracy = trace(@src()); + defer tracy.end(); + const slice = self.sections.slice(); + for (slice.items(.header), slice.items(.subsections), 0..) |sect, subs, n_sect| { + if (subs.items.len == 0) continue; + if (sect.addr == addr) return subs.items[0].atom; + if (sect.addr < addr and addr < sect.addr + sect.size) { + return self.findAtomInSection(addr, @intCast(n_sect)); } - }; - const LPredicate = struct { - addr: u64, + } + return null; +} - pub fn predicate(self: @This(), rel: macho.relocation_info) bool { - return rel.r_address < self.addr; +fn findAtomInSection(self: Object, addr: u64, n_sect: u8) ?Atom.Index { + const tracy = trace(@src()); + defer tracy.end(); + const slice = self.sections.slice(); + const sect = slice.items(.header)[n_sect]; + const subsections = slice.items(.subsections)[n_sect]; + + var min: usize = 0; + var max: usize = subsections.items.len; + while (min < max) { + const idx = (min + max) / 2; + const sub = subsections.items[idx]; + const sub_addr = sect.addr + sub.off; + const sub_size = if (idx + 1 < subsections.items.len) + subsections.items[idx + 1].off - sub.off + else + sect.size - sub.off; + if (sub_addr == addr or (sub_addr < addr and addr < sub_addr + sub_size)) return sub.atom; + if (sub_addr < addr) { + min = idx + 1; + } else { + max = idx; } - }; - - const start = MachO.bsearch(macho.relocation_info, relocs, Predicate{ .addr = end_addr }); - const len = MachO.lsearch(macho.relocation_info, relocs[start..], LPredicate{ .addr = start_addr }); - - return .{ .start = @as(u32, @intCast(start)), .len = @as(u32, @intCast(len)) }; -} - -/// Parse all relocs for the input section, and sort in descending order. -/// Previously, I have wrongly assumed the compilers output relocations for each -/// section in a sorted manner which is simply not true. -fn parseRelocs(self: *Object, gpa: Allocator, sect_id: u8) !void { - const section = self.getSourceSection(sect_id); - const start = @as(u32, @intCast(self.relocations.items.len)); - if (self.getSourceRelocs(section)) |relocs| { - try self.relocations.ensureUnusedCapacity(gpa, relocs.len); - self.relocations.appendUnalignedSliceAssumeCapacity(relocs); - mem.sort(macho.relocation_info, self.relocations.items[start..], {}, relocGreaterThan); - } - self.section_relocs_lookup.items[sect_id] = start; -} - -fn cacheRelocs(self: *Object, macho_file: *MachO, atom_index: Atom.Index) !void { - const atom = macho_file.getAtom(atom_index); - - const source_sect_id = if (self.getSourceSymbol(atom.sym_index)) |source_sym| blk: { - break :blk source_sym.n_sect - 1; - } else blk: { - // If there was no matching symbol present in the source symtab, this means - // we are dealing with either an entire section, or part of it, but also - // starting at the beginning. - const nbase = @as(u32, @intCast(self.in_symtab.?.len)); - const sect_id = @as(u8, @intCast(atom.sym_index - nbase)); - break :blk sect_id; - }; - const source_sect = self.getSourceSection(source_sect_id); - assert(!source_sect.isZerofill()); - const relocs = self.getRelocs(source_sect_id); - - self.relocs_lookup[atom.sym_index] = if (self.getSourceSymbol(atom.sym_index)) |source_sym| blk: { - const offset = source_sym.n_value - source_sect.addr; - break :blk filterRelocs(relocs, offset, offset + atom.size); - } else filterRelocs(relocs, 0, atom.size); -} - -fn relocGreaterThan(ctx: void, lhs: macho.relocation_info, rhs: macho.relocation_info) bool { - _ = ctx; - return lhs.r_address > rhs.r_address; -} - -fn parseEhFrameSection(self: *Object, macho_file: *MachO, object_id: u32) !void { - const sect_id = self.eh_frame_sect_id orelse return; - const sect = self.getSourceSection(sect_id); - - log.debug("parsing __TEXT,__eh_frame section", .{}); - - const comp = macho_file.base.comp; - const gpa = comp.gpa; - - if (macho_file.eh_frame_section_index == null) { - macho_file.eh_frame_section_index = try macho_file.initSection("__TEXT", "__eh_frame", .{}); } - const target = macho_file.base.comp.root_mod.resolved_target.result; - const cpu_arch = target.cpu.arch; - try self.parseRelocs(gpa, sect_id); - const relocs = self.getRelocs(sect_id); - - var it = self.getEhFrameRecordsIterator(); - var record_count: u32 = 0; - while (try it.next()) |_| { - record_count += 1; + if (min < subsections.items.len) { + const sub = subsections.items[min]; + const sub_addr = sect.addr + sub.off; + const sub_size = if (min + 1 < subsections.items.len) + subsections.items[min + 1].off - sub.off + else + sect.size - sub.off; + if (sub_addr == addr or (sub_addr < addr and addr < sub_addr + sub_size)) return sub.atom; } - try self.eh_frame_relocs_lookup.ensureTotalCapacity(gpa, record_count); - try self.eh_frame_records_lookup.ensureUnusedCapacity(gpa, record_count); + return null; +} - it.reset(); +fn linkNlistToAtom(self: *Object, macho_file: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + for (self.symtab.items(.nlist), self.symtab.items(.atom)) |nlist, *atom| { + if (!nlist.stab() and nlist.sect()) { + if (self.findAtomInSection(nlist.n_value, nlist.n_sect - 1)) |atom_index| { + atom.* = atom_index; + } else { + macho_file.base.fatal("{}: symbol {s} not attached to any (sub)section", .{ + self.fmtPath(), self.getString(nlist.n_strx), + }); + return error.ParseFailed; + } + } + } +} - while (try it.next()) |record| { - const offset = it.pos - record.getSize(); - const rel_pos: Entry = switch (cpu_arch) { - .aarch64 => filterRelocs(relocs, offset, offset + record.getSize()), - .x86_64 => .{}, - else => unreachable, +fn initSymbols(self: *Object, macho_file: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + const gpa = macho_file.base.allocator; + const slice = self.symtab.slice(); + + try self.symbols.ensureUnusedCapacity(gpa, slice.items(.nlist).len); + + for (slice.items(.nlist), slice.items(.atom), 0..) |nlist, atom_index, i| { + if (nlist.ext()) { + const name = self.getString(nlist.n_strx); + const off = try macho_file.string_intern.insert(gpa, name); + const gop = try macho_file.getOrCreateGlobal(off); + self.symbols.addOneAssumeCapacity().* = gop.index; + continue; + } + + const index = try macho_file.addSymbol(); + self.symbols.appendAssumeCapacity(index); + const symbol = macho_file.getSymbol(index); + const name = self.getString(nlist.n_strx); + symbol.* = .{ + .value = nlist.n_value, + .name = try macho_file.string_intern.insert(gpa, name), + .nlist_idx = @intCast(i), + .atom = 0, + .file = self.index, }; - self.eh_frame_relocs_lookup.putAssumeCapacityNoClobber(offset, .{ - .dead = false, - .reloc = rel_pos, - }); - if (record.tag == .fde) { - const reloc_target = blk: { - switch (cpu_arch) { - .aarch64 => { - assert(rel_pos.len > 0); // TODO convert to an error as the FDE eh frame is malformed - // Find function symbol that this record describes - const rel = for (relocs[rel_pos.start..][0..rel_pos.len]) |rel| { - if (rel.r_address - @as(i32, @intCast(offset)) == 8 and - @as(macho.reloc_type_arm64, @enumFromInt(rel.r_type)) == .ARM64_RELOC_UNSIGNED) - break rel; - } else unreachable; - const reloc_target = Atom.parseRelocTarget(macho_file, .{ - .object_id = object_id, - .rel = rel, - .code = it.data[offset..], - .base_offset = @as(i32, @intCast(offset)), - }); - break :blk reloc_target; + if (macho_file.getAtom(atom_index)) |atom| { + assert(!nlist.abs()); + symbol.value -= atom.getInputAddress(macho_file); + symbol.atom = atom_index; + } + + symbol.flags.abs = nlist.abs(); + symbol.flags.no_dead_strip = symbol.flags.no_dead_strip or nlist.noDeadStrip(); + + if (nlist.sect() and + self.sections.items(.header)[nlist.n_sect - 1].type() == macho.S_THREAD_LOCAL_VARIABLES) + { + symbol.flags.tlv = true; + } + } +} + +fn initSymbolStabs(self: *Object, nlists: anytype, macho_file: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + + const SymbolLookup = struct { + ctx: *const Object, + entries: @TypeOf(nlists), + + fn find(fs: @This(), addr: u64) ?Symbol.Index { + // TODO binary search since we have the list sorted + for (fs.entries) |nlist| { + if (nlist.nlist.n_value == addr) return fs.ctx.symbols.items[nlist.idx]; + } + return null; + } + }; + + const start: u32 = for (self.symtab.items(.nlist), 0..) |nlist, i| { + if (nlist.stab()) break @intCast(i); + } else @intCast(self.symtab.items(.nlist).len); + const end: u32 = for (self.symtab.items(.nlist)[start..], start..) |nlist, i| { + if (!nlist.stab()) break @intCast(i); + } else @intCast(self.symtab.items(.nlist).len); + + if (start == end) return; + + const gpa = macho_file.base.allocator; + const syms = self.symtab.items(.nlist); + const sym_lookup = SymbolLookup{ .ctx = self, .entries = nlists }; + + var i: u32 = start; + while (i < end) : (i += 1) { + const open = syms[i]; + if (open.n_type != macho.N_SO) { + macho_file.base.fatal("{}: unexpected symbol stab type 0x{x} as the first entry", .{ + self.fmtPath(), + open.n_type, + }); + return error.ParseFailed; + } + + while (i < end and syms[i].n_type == macho.N_SO and syms[i].n_sect != 0) : (i += 1) {} + + var sf: StabFile = .{ .comp_dir = i }; + // TODO validate + i += 3; + + while (i < end and syms[i].n_type != macho.N_SO) : (i += 1) { + const nlist = syms[i]; + var stab: StabFile.Stab = .{}; + switch (nlist.n_type) { + macho.N_BNSYM => { + stab.tag = .func; + stab.symbol = sym_lookup.find(nlist.n_value); + // TODO validate + i += 3; + }, + macho.N_GSYM => { + stab.tag = .global; + stab.symbol = macho_file.getGlobalByName(self.getString(nlist.n_strx)); + }, + macho.N_STSYM => { + stab.tag = .static; + stab.symbol = sym_lookup.find(nlist.n_value); + }, + else => { + macho_file.base.fatal("{}: unhandled symbol stab type 0x{x}", .{ + self.fmtPath(), + nlist.n_type, + }); + return error.ParseFailed; + }, + } + try sf.stabs.append(gpa, stab); + } + + try self.stab_files.append(gpa, sf); + } +} + +fn sortAtoms(self: *Object, macho_file: *MachO) !void { + const lessThanAtom = struct { + fn lessThanAtom(ctx: *MachO, lhs: Atom.Index, rhs: Atom.Index) bool { + return ctx.getAtom(lhs).?.getInputAddress(ctx) < ctx.getAtom(rhs).?.getInputAddress(ctx); + } + }.lessThanAtom; + mem.sort(Atom.Index, self.atoms.items, macho_file, lessThanAtom); +} + +fn initRelocs(self: *Object, macho_file: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + const cpu_arch = macho_file.options.cpu_arch.?; + const slice = self.sections.slice(); + + for (slice.items(.header), slice.items(.relocs), 0..) |sect, *out, n_sect| { + if (sect.nreloc == 0) continue; + // We skip relocs for __DWARF since even in -r mode, the linker is expected to emit + // debug symbol stabs in the relocatable. This made me curious why that is. For now, + // I shall comply, but I wanna compare with dsymutil. + if (sect.attrs() & macho.S_ATTR_DEBUG != 0 and + !mem.eql(u8, sect.sectName(), "__compact_unwind")) continue; + + switch (cpu_arch) { + .x86_64 => try x86_64.parseRelocs(self, @intCast(n_sect), sect, out, macho_file), + .aarch64 => try aarch64.parseRelocs(self, @intCast(n_sect), sect, out, macho_file), + else => unreachable, + } + + mem.sort(Relocation, out.items, {}, Relocation.lessThan); + } + + for (slice.items(.header), slice.items(.relocs), slice.items(.subsections)) |sect, relocs, subsections| { + if (sect.isZerofill()) continue; + + var next_reloc: usize = 0; + for (subsections.items) |subsection| { + const atom = macho_file.getAtom(subsection.atom).?; + if (!atom.flags.alive) continue; + if (next_reloc >= relocs.items.len) break; + const end_addr = atom.off + atom.size; + atom.relocs.pos = next_reloc; + + while (next_reloc < relocs.items.len and relocs.items[next_reloc].offset < end_addr) : (next_reloc += 1) {} + + atom.relocs.len = next_reloc - atom.relocs.pos; + } + } +} + +fn initEhFrameRecords(self: *Object, sect_id: u8, macho_file: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + const gpa = macho_file.base.allocator; + const nlists = self.symtab.items(.nlist); + const slice = self.sections.slice(); + const sect = slice.items(.header)[sect_id]; + const relocs = slice.items(.relocs)[sect_id]; + + const data = self.getSectionData(sect_id); + try self.eh_frame_data.ensureTotalCapacityPrecise(gpa, data.len); + self.eh_frame_data.appendSliceAssumeCapacity(data); + + // Check for non-personality relocs in FDEs and apply them + for (relocs.items, 0..) |rel, i| { + switch (rel.type) { + .unsigned => { + assert((rel.meta.length == 2 or rel.meta.length == 3) and rel.meta.has_subtractor); // TODO error + const S: i64 = switch (rel.tag) { + .local => rel.meta.symbolnum, + .@"extern" => @intCast(nlists[rel.meta.symbolnum].n_value), + }; + const A = rel.addend; + const SUB: i64 = blk: { + const sub_rel = relocs.items[i - 1]; + break :blk switch (sub_rel.tag) { + .local => sub_rel.meta.symbolnum, + .@"extern" => @intCast(nlists[sub_rel.meta.symbolnum].n_value), + }; + }; + switch (rel.meta.length) { + 0, 1 => unreachable, + 2 => mem.writeInt(u32, self.eh_frame_data.items[rel.offset..][0..4], @bitCast(@as(i32, @truncate(S + A - SUB))), .little), + 3 => mem.writeInt(u64, self.eh_frame_data.items[rel.offset..][0..8], @bitCast(S + A - SUB), .little), + } + }, + else => {}, + } + } + + var it = eh_frame.Iterator{ .data = self.eh_frame_data.items }; + while (try it.next()) |rec| { + switch (rec.tag) { + .cie => try self.cies.append(gpa, .{ + .offset = rec.offset, + .size = rec.size, + .file = self.index, + }), + .fde => try self.fdes.append(gpa, .{ + .offset = rec.offset, + .size = rec.size, + .cie = undefined, + .file = self.index, + }), + } + } + + for (self.cies.items) |*cie| { + try cie.parse(macho_file); + } + + for (self.fdes.items) |*fde| { + try fde.parse(macho_file); + } + + const sortFn = struct { + fn sortFn(ctx: *MachO, lhs: Fde, rhs: Fde) bool { + return lhs.getAtom(ctx).getInputAddress(ctx) < rhs.getAtom(ctx).getInputAddress(ctx); + } + }.sortFn; + + mem.sort(Fde, self.fdes.items, macho_file, sortFn); + + // Parse and attach personality pointers to CIEs if any + for (relocs.items) |rel| { + switch (rel.type) { + .got => { + assert(rel.meta.length == 2 and rel.tag == .@"extern"); + const cie = for (self.cies.items) |*cie| { + if (cie.offset <= rel.offset and rel.offset < cie.offset + cie.getSize()) break cie; + } else { + macho_file.base.fatal("{}: {s},{s}: 0x{x}: bad relocation", .{ + self.fmtPath(), sect.segName(), sect.sectName(), rel.offset, + }); + return error.ParseFailed; + }; + cie.personality = .{ .index = @intCast(rel.target), .offset = rel.offset - cie.offset }; + }, + else => {}, + } + } +} + +fn initUnwindRecords(self: *Object, sect_id: u8, macho_file: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + + const SymbolLookup = struct { + ctx: *const Object, + + fn find(fs: @This(), addr: u64) ?Symbol.Index { + for (fs.ctx.symbols.items, 0..) |sym_index, i| { + const nlist = fs.ctx.symtab.items(.nlist)[i]; + if (nlist.ext() and nlist.n_value == addr) return sym_index; + } + return null; + } + }; + + const gpa = macho_file.base.allocator; + const data = self.getSectionData(sect_id); + const nrecs = @divExact(data.len, @sizeOf(macho.compact_unwind_entry)); + const recs = @as([*]align(1) const macho.compact_unwind_entry, @ptrCast(data.ptr))[0..nrecs]; + const sym_lookup = SymbolLookup{ .ctx = self }; + + try self.unwind_records.resize(gpa, nrecs); + + const header = self.sections.items(.header)[sect_id]; + const relocs = self.sections.items(.relocs)[sect_id].items; + var reloc_idx: usize = 0; + for (recs, self.unwind_records.items, 0..) |rec, *out_index, rec_idx| { + const rec_start = rec_idx * @sizeOf(macho.compact_unwind_entry); + const rec_end = rec_start + @sizeOf(macho.compact_unwind_entry); + const reloc_start = reloc_idx; + while (reloc_idx < relocs.len and + relocs[reloc_idx].offset < rec_end) : (reloc_idx += 1) + {} + + out_index.* = try macho_file.addUnwindRecord(); + const out = macho_file.getUnwindRecord(out_index.*); + out.length = rec.rangeLength; + out.enc = .{ .enc = rec.compactUnwindEncoding }; + out.file = self.index; + + for (relocs[reloc_start..reloc_idx]) |rel| { + if (rel.type != .unsigned or rel.meta.length != 3) { + macho_file.base.fatal("{}: {s},{s}: 0x{x}: bad relocation", .{ + self.fmtPath(), header.segName(), header.sectName(), rel.offset, + }); + return error.ParseFailed; + } + assert(rel.type == .unsigned and rel.meta.length == 3); // TODO error + const offset = rel.offset - rec_start; + switch (offset) { + 0 => switch (rel.tag) { // target symbol + .@"extern" => { + out.atom = self.symtab.items(.atom)[rel.meta.symbolnum]; + out.atom_offset = @intCast(rec.rangeStart); }, - .x86_64 => { - const target_address = record.getTargetSymbolAddress(.{ - .base_addr = sect.addr, - .base_offset = offset, + .local => if (self.findAtom(rec.rangeStart)) |atom_index| { + out.atom = atom_index; + const atom = out.getAtom(macho_file); + out.atom_offset = @intCast(rec.rangeStart - atom.getInputAddress(macho_file)); + } else { + macho_file.base.fatal("{}: {s},{s}: 0x{x}: bad relocation", .{ + self.fmtPath(), header.segName(), header.sectName(), rel.offset, }); - const target_sym_index = self.getSymbolByAddress(target_address, null); - const reloc_target = if (self.getGlobal(target_sym_index)) |global_index| - macho_file.globals.items[global_index] - else - SymbolWithLoc{ .sym_index = target_sym_index, .file = object_id + 1 }; - break :blk reloc_target; + return error.ParseFailed; }, + }, + 16 => switch (rel.tag) { // personality function + .@"extern" => { + out.personality = rel.target; + }, + .local => if (sym_lookup.find(rec.personalityFunction)) |sym_index| { + out.personality = sym_index; + } else { + macho_file.base.fatal("{}: {s},{s}: 0x{x}: bad relocation", .{ + self.fmtPath(), header.segName(), header.sectName(), rel.offset, + }); + return error.ParseFailed; + }, + }, + 24 => switch (rel.tag) { // lsda + .@"extern" => { + out.lsda = self.symtab.items(.atom)[rel.meta.symbolnum]; + out.lsda_offset = @intCast(rec.lsda); + }, + .local => if (self.findAtom(rec.lsda)) |atom_index| { + out.lsda = atom_index; + const atom = out.getLsdaAtom(macho_file).?; + out.lsda_offset = @intCast(rec.lsda - atom.getInputAddress(macho_file)); + } else { + macho_file.base.fatal("{}: {s},{s}: 0x{x}: bad relocation", .{ + self.fmtPath(), header.segName(), header.sectName(), rel.offset, + }); + return error.ParseFailed; + }, + }, + else => {}, + } + } + } + + if (!macho_file.options.relocatable) try self.synthesiseNullUnwindRecords(macho_file); + + const sortFn = struct { + fn sortFn(ctx: *MachO, lhs_index: UnwindInfo.Record.Index, rhs_index: UnwindInfo.Record.Index) bool { + const lhs = ctx.getUnwindRecord(lhs_index); + const rhs = ctx.getUnwindRecord(rhs_index); + const lhsa = lhs.getAtom(ctx); + const rhsa = rhs.getAtom(ctx); + return lhsa.getInputAddress(ctx) + lhs.atom_offset < rhsa.getInputAddress(ctx) + rhs.atom_offset; + } + }.sortFn; + mem.sort(UnwindInfo.Record.Index, self.unwind_records.items, macho_file, sortFn); + + // Associate unwind records to atoms + var next_cu: u32 = 0; + while (next_cu < self.unwind_records.items.len) { + const start = next_cu; + const rec_index = self.unwind_records.items[start]; + const rec = macho_file.getUnwindRecord(rec_index); + while (next_cu < self.unwind_records.items.len and + macho_file.getUnwindRecord(self.unwind_records.items[next_cu]).atom == rec.atom) : (next_cu += 1) + {} + + const atom = rec.getAtom(macho_file); + atom.unwind_records = .{ .pos = start, .len = next_cu - start }; + } +} + +fn synthesiseNullUnwindRecords(self: *Object, macho_file: *MachO) !void { + // Synthesise missing unwind records. + // The logic here is as follows: + // 1. if an atom has unwind info record that is not DWARF, FDE is marked dead + // 2. if an atom has unwind info record that is DWARF, FDE is tied to this unwind record + // 3. if an atom doesn't have unwind info record but FDE is available, synthesise and tie + // 4. if an atom doesn't have either, synthesise a null unwind info record + + const Superposition = struct { atom: Atom.Index, size: u64, cu: ?UnwindInfo.Record.Index = null, fde: ?Fde.Index = null }; + + const gpa = macho_file.base.allocator; + var superposition = std.AutoArrayHashMap(u64, Superposition).init(gpa); + defer superposition.deinit(); + + const slice = self.symtab.slice(); + for (slice.items(.nlist), slice.items(.atom), slice.items(.size)) |nlist, atom, size| { + if (nlist.stab()) continue; + if (!nlist.sect()) continue; + const sect = self.sections.items(.header)[nlist.n_sect - 1]; + if (sect.isCode()) { + try superposition.ensureUnusedCapacity(1); + const gop = superposition.getOrPutAssumeCapacity(nlist.n_value); + if (gop.found_existing) { + assert(gop.value_ptr.atom == atom and gop.value_ptr.size == size); + } + gop.value_ptr.* = .{ .atom = atom, .size = size }; + } + } + + for (self.unwind_records.items) |rec_index| { + const rec = macho_file.getUnwindRecord(rec_index); + const atom = rec.getAtom(macho_file); + const addr = atom.getInputAddress(macho_file) + rec.atom_offset; + superposition.getPtr(addr).?.cu = rec_index; + } + + for (self.fdes.items, 0..) |fde, fde_index| { + const atom = fde.getAtom(macho_file); + const addr = atom.getInputAddress(macho_file) + fde.atom_offset; + superposition.getPtr(addr).?.fde = @intCast(fde_index); + } + + for (superposition.keys(), superposition.values()) |addr, meta| { + if (meta.fde) |fde_index| { + const fde = &self.fdes.items[fde_index]; + + if (meta.cu) |rec_index| { + const rec = macho_file.getUnwindRecord(rec_index); + if (!rec.enc.isDwarf(macho_file)) { + // Mark FDE dead + fde.alive = false; + } else { + // Tie FDE to unwind record + rec.fde = fde_index; + } + } else { + // Synthesise new unwind info record + const fde_data = fde.getData(macho_file); + const atom_size = mem.readInt(u64, fde_data[16..][0..8], .little); + const rec_index = try macho_file.addUnwindRecord(); + const rec = macho_file.getUnwindRecord(rec_index); + try self.unwind_records.append(gpa, rec_index); + rec.length = @intCast(atom_size); + rec.atom = fde.atom; + rec.atom_offset = fde.atom_offset; + rec.fde = fde_index; + rec.file = fde.file; + switch (macho_file.options.cpu_arch.?) { + .x86_64 => rec.enc.setMode(macho.UNWIND_X86_64_MODE.DWARF), + .aarch64 => rec.enc.setMode(macho.UNWIND_ARM64_MODE.DWARF), else => unreachable, } - }; - if (reloc_target.getFile() != object_id) { - log.debug("FDE at offset {x} marked DEAD", .{offset}); - self.eh_frame_relocs_lookup.getPtr(offset).?.dead = true; - } else { - // You would think that we are done but turns out that the compilers may use - // whichever symbol alias they want for a target symbol. This in particular - // very problematic when using Zig's @export feature to re-export symbols under - // additional names. For that reason, we need to ensure we record aliases here - // too so that we can tie them with their matching unwind records and vice versa. - const aliases = self.getSymbolAliases(reloc_target.sym_index); - var i: u32 = 0; - while (i < aliases.len) : (i += 1) { - const actual_target = SymbolWithLoc{ - .sym_index = i + aliases.start, - .file = reloc_target.file, - }; - log.debug("FDE at offset {x} tracks {s}", .{ - offset, - macho_file.getSymbolName(actual_target), - }); - try self.eh_frame_records_lookup.putNoClobber(gpa, actual_target, offset); - } } + } else if (meta.cu == null and meta.fde == null) { + // Create a null record + const rec_index = try macho_file.addUnwindRecord(); + const rec = macho_file.getUnwindRecord(rec_index); + const atom = macho_file.getAtom(meta.atom).?; + try self.unwind_records.append(gpa, rec_index); + rec.length = @intCast(meta.size); + rec.atom = meta.atom; + rec.atom_offset = @intCast(addr - atom.getInputAddress(macho_file)); + rec.file = self.index; } } } -fn parseUnwindInfo(self: *Object, macho_file: *MachO, object_id: u32) !void { - const comp = macho_file.base.comp; - const gpa = comp.gpa; - const target = macho_file.base.comp.root_mod.resolved_target.result; - const cpu_arch = target.cpu.arch; - const sect_id = self.unwind_info_sect_id orelse { - // If it so happens that the object had `__eh_frame` section defined but no `__compact_unwind`, - // we will try fully synthesising unwind info records to somewhat match Apple ld's - // approach. However, we will only synthesise DWARF records and nothing more. For this reason, - // we still create the output `__TEXT,__unwind_info` section. - if (self.hasEhFrameRecords()) { - if (macho_file.unwind_info_section_index == null) { - macho_file.unwind_info_section_index = try macho_file.initSection( - "__TEXT", - "__unwind_info", - .{}, - ); - } - } - return; - }; - - log.debug("parsing unwind info in {s}", .{self.name}); - - if (macho_file.unwind_info_section_index == null) { - macho_file.unwind_info_section_index = try macho_file.initSection("__TEXT", "__unwind_info", .{}); - } - - const unwind_records = self.getUnwindRecords(); - - try self.unwind_records_lookup.ensureUnusedCapacity(gpa, @as(u32, @intCast(unwind_records.len))); - - const needs_eh_frame = for (unwind_records) |record| { - if (UnwindInfo.UnwindEncoding.isDwarf(record.compactUnwindEncoding, cpu_arch)) break true; - } else false; - - if (needs_eh_frame and !self.hasEhFrameRecords()) return error.MissingEhFrameSection; - - try self.parseRelocs(gpa, sect_id); - const relocs = self.getRelocs(sect_id); - - for (unwind_records, 0..) |record, record_id| { - const offset = record_id * @sizeOf(macho.compact_unwind_entry); - const rel_pos = filterRelocs( - relocs, - offset, - offset + @sizeOf(macho.compact_unwind_entry), - ); - assert(rel_pos.len > 0); // TODO convert to an error as the unwind info is malformed - self.unwind_relocs_lookup[record_id] = .{ - .dead = false, - .reloc = rel_pos, - }; - - // Find function symbol that this record describes - const rel = relocs[rel_pos.start..][rel_pos.len - 1]; - const reloc_target = Atom.parseRelocTarget(macho_file, .{ - .object_id = object_id, - .rel = rel, - .code = mem.asBytes(&record), - .base_offset = @as(i32, @intCast(offset)), - }); - if (reloc_target.getFile() != object_id) { - log.debug("unwind record {d} marked DEAD", .{record_id}); - self.unwind_relocs_lookup[record_id].dead = true; - } else { - // You would think that we are done but turns out that the compilers may use - // whichever symbol alias they want for a target symbol. This in particular - // very problematic when using Zig's @export feature to re-export symbols under - // additional names. For that reason, we need to ensure we record aliases here - // too so that we can tie them with their matching unwind records and vice versa. - const aliases = self.getSymbolAliases(reloc_target.sym_index); - var i: u32 = 0; - while (i < aliases.len) : (i += 1) { - const actual_target = SymbolWithLoc{ - .sym_index = i + aliases.start, - .file = reloc_target.file, - }; - log.debug("unwind record {d} tracks {s}", .{ - record_id, - macho_file.getSymbolName(actual_target), - }); - try self.unwind_records_lookup.putNoClobber(gpa, actual_target, @intCast(record_id)); - } - } - } -} - -pub fn getSourceSymbol(self: Object, index: u32) ?macho.nlist_64 { - const symtab = self.in_symtab.?; - if (index >= symtab.len) return null; - const mapped_index = self.source_symtab_lookup[index]; - return symtab[mapped_index]; -} - -pub fn getSourceSection(self: Object, index: u8) macho.section_64 { - const sections = self.getSourceSections(); - assert(index < sections.len); - return sections[index]; -} - -pub fn getSourceSectionByName(self: Object, segname: []const u8, sectname: []const u8) ?macho.section_64 { - const index = self.getSourceSectionIndexByName(segname, sectname) orelse return null; - const sections = self.getSourceSections(); - return sections[index]; -} - -pub fn getSourceSectionIndexByName(self: Object, segname: []const u8, sectname: []const u8) ?u8 { - const sections = self.getSourceSections(); - for (sections, 0..) |sect, i| { - if (mem.eql(u8, segname, sect.segName()) and mem.eql(u8, sectname, sect.sectName())) - return @as(u8, @intCast(i)); - } else return null; -} - -pub fn getSourceSections(self: Object) []align(1) const macho.section_64 { +fn initPlatform(self: *Object) void { var it = LoadCommandIterator{ - .ncmds = self.header.ncmds, - .buffer = self.contents[@sizeOf(macho.mach_header_64)..][0..self.header.sizeofcmds], + .ncmds = self.header.?.ncmds, + .buffer = self.data[@sizeOf(macho.mach_header_64)..][0..self.header.?.sizeofcmds], }; - while (it.next()) |cmd| switch (cmd.cmd()) { - .SEGMENT_64 => { - return cmd.getSections(); - }, - else => {}, - } else unreachable; -} - -pub fn parseDataInCode(self: *Object, gpa: Allocator) !void { - var it = LoadCommandIterator{ - .ncmds = self.header.ncmds, - .buffer = self.contents[@sizeOf(macho.mach_header_64)..][0..self.header.sizeofcmds], - }; - const cmd = while (it.next()) |cmd| { - switch (cmd.cmd()) { - .DATA_IN_CODE => break cmd.cast(macho.linkedit_data_command).?, - else => {}, - } - } else return; - const ndice = @divExact(cmd.datasize, @sizeOf(macho.data_in_code_entry)); - const dice = @as([*]align(1) const macho.data_in_code_entry, @ptrCast(self.contents.ptr + cmd.dataoff))[0..ndice]; - try self.data_in_code.ensureTotalCapacityPrecise(gpa, dice.len); - self.data_in_code.appendUnalignedSliceAssumeCapacity(dice); - mem.sort(macho.data_in_code_entry, self.data_in_code.items, {}, diceLessThan); -} - -fn diceLessThan(ctx: void, lhs: macho.data_in_code_entry, rhs: macho.data_in_code_entry) bool { - _ = ctx; - return lhs.offset < rhs.offset; -} - -fn getDysymtab(self: Object) ?macho.dysymtab_command { - var it = LoadCommandIterator{ - .ncmds = self.header.ncmds, - .buffer = self.contents[@sizeOf(macho.mach_header_64)..][0..self.header.sizeofcmds], - }; - while (it.next()) |cmd| { - switch (cmd.cmd()) { - .DYSYMTAB => return cmd.cast(macho.dysymtab_command).?, - else => {}, - } - } else return null; -} - -pub fn parseDwarfInfo(self: Object) DwarfInfo { - var di = DwarfInfo{ - .debug_info = &[0]u8{}, - .debug_abbrev = &[0]u8{}, - .debug_str = &[0]u8{}, - }; - for (self.getSourceSections()) |sect| { - if (!sect.isDebug()) continue; - const sectname = sect.sectName(); - if (mem.eql(u8, sectname, "__debug_info")) { - di.debug_info = self.getSectionContents(sect); - } else if (mem.eql(u8, sectname, "__debug_abbrev")) { - di.debug_abbrev = self.getSectionContents(sect); - } else if (mem.eql(u8, sectname, "__debug_str")) { - di.debug_str = self.getSectionContents(sect); - } - } - return di; -} - -/// Returns Platform composed from the first encountered build version type load command: -/// either LC_BUILD_VERSION or LC_VERSION_MIN_*. -pub fn getPlatform(self: Object) ?Platform { - var it = LoadCommandIterator{ - .ncmds = self.header.ncmds, - .buffer = self.contents[@sizeOf(macho.mach_header_64)..][0..self.header.sizeofcmds], - }; - while (it.next()) |cmd| { + self.platform = while (it.next()) |cmd| { switch (cmd.cmd()) { .BUILD_VERSION, .VERSION_MIN_MACOSX, .VERSION_MIN_IPHONEOS, .VERSION_MIN_TVOS, .VERSION_MIN_WATCHOS, - => return Platform.fromLoadCommand(cmd), + => break MachO.Options.Platform.fromLoadCommand(cmd), else => {}, } + } else null; +} + +/// Currently, we only check if a compile unit for this input object file exists +/// and record that so that we can emit symbol stabs. +/// TODO in the future, we want parse debug info and debug line sections so that +/// we can provide nice error locations to the user. +fn initDwarfInfo(self: *Object, macho_file: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + + const gpa = macho_file.base.allocator; + + var debug_info_index: ?usize = null; + var debug_abbrev_index: ?usize = null; + var debug_str_index: ?usize = null; + + for (self.sections.items(.header), 0..) |sect, index| { + if (sect.attrs() & macho.S_ATTR_DEBUG == 0) continue; + if (mem.eql(u8, sect.sectName(), "__debug_info")) debug_info_index = index; + if (mem.eql(u8, sect.sectName(), "__debug_abbrev")) debug_abbrev_index = index; + if (mem.eql(u8, sect.sectName(), "__debug_str")) debug_str_index = index; + } + + if (debug_info_index == null or debug_abbrev_index == null) return; + + var dwarf_info = DwarfInfo{ + .debug_info = self.getSectionData(@intCast(debug_info_index.?)), + .debug_abbrev = self.getSectionData(@intCast(debug_abbrev_index.?)), + .debug_str = if (debug_str_index) |index| self.getSectionData(@intCast(index)) else "", + }; + dwarf_info.init(gpa) catch { + macho_file.base.fatal("{}: invalid __DWARF info found", .{self.fmtPath()}); + return error.ParseFailed; + }; + self.dwarf_info = dwarf_info; +} + +pub fn resolveSymbols(self: *Object, macho_file: *MachO) void { + const tracy = trace(@src()); + defer tracy.end(); + + for (self.symbols.items, 0..) |index, i| { + const nlist_idx = @as(Symbol.Index, @intCast(i)); + const nlist = self.symtab.items(.nlist)[nlist_idx]; + const atom_index = self.symtab.items(.atom)[nlist_idx]; + + if (!nlist.ext()) continue; + if (nlist.undf() and !nlist.tentative()) continue; + if (nlist.sect()) { + const atom = macho_file.getAtom(atom_index).?; + if (!atom.flags.alive) continue; + } + + const symbol = macho_file.getSymbol(index); + if (self.asFile().getSymbolRank(.{ + .archive = !self.alive, + .weak = nlist.weakDef(), + .tentative = nlist.tentative(), + }) < symbol.getSymbolRank(macho_file)) { + const value = if (nlist.sect()) blk: { + const atom = macho_file.getAtom(atom_index).?; + break :blk nlist.n_value - atom.getInputAddress(macho_file); + } else nlist.n_value; + symbol.value = value; + symbol.atom = atom_index; + symbol.nlist_idx = nlist_idx; + symbol.file = self.index; + symbol.flags.weak = nlist.weakDef(); + symbol.flags.abs = nlist.abs(); + symbol.flags.tentative = nlist.tentative(); + symbol.flags.weak_ref = false; + symbol.flags.dyn_ref = nlist.n_desc & macho.REFERENCED_DYNAMICALLY != 0; + symbol.flags.no_dead_strip = symbol.flags.no_dead_strip or nlist.noDeadStrip(); + symbol.flags.interposable = macho_file.options.dylib and macho_file.options.namespace == .flat and !nlist.pext(); + + if (nlist.sect() and + self.sections.items(.header)[nlist.n_sect - 1].type() == macho.S_THREAD_LOCAL_VARIABLES) + { + symbol.flags.tlv = true; + } + } + + // Regardless of who the winner is, we still merge symbol visibility here. + if (nlist.pext() or (nlist.weakDef() and nlist.weakRef()) or self.hidden) { + if (symbol.visibility != .global) { + symbol.visibility = .hidden; + } + } else { + symbol.visibility = .global; + } + } +} + +pub fn resetGlobals(self: *Object, macho_file: *MachO) void { + for (self.symbols.items, 0..) |sym_index, nlist_idx| { + if (!self.symtab.items(.nlist)[nlist_idx].ext()) continue; + const sym = macho_file.getSymbol(sym_index); + const name = sym.name; + sym.* = .{}; + sym.name = name; + } +} + +pub fn markLive(self: *Object, macho_file: *MachO) void { + const tracy = trace(@src()); + defer tracy.end(); + + for (self.symbols.items, 0..) |index, nlist_idx| { + const nlist = self.symtab.items(.nlist)[nlist_idx]; + if (!nlist.ext()) continue; + + const sym = macho_file.getSymbol(index); + const file = sym.getFile(macho_file) orelse continue; + const should_keep = nlist.undf() or (nlist.tentative() and !sym.flags.tentative); + if (should_keep and file == .object and !file.object.alive) { + file.object.alive = true; + file.object.markLive(macho_file); + } + } +} + +pub fn scanRelocs(self: Object, macho_file: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + + for (self.atoms.items) |atom_index| { + const atom = macho_file.getAtom(atom_index).?; + if (!atom.flags.alive) continue; + const sect = atom.getInputSection(macho_file); + if (sect.isZerofill()) continue; + try atom.scanRelocs(macho_file); + } + + for (self.unwind_records.items) |rec_index| { + const rec = macho_file.getUnwindRecord(rec_index); + if (!rec.alive) continue; + if (rec.getFde(macho_file)) |fde| { + if (fde.getCie(macho_file).getPersonality(macho_file)) |sym| { + sym.flags.got = true; + } + } else if (rec.getPersonality(macho_file)) |sym| { + sym.flags.got = true; + } + } +} + +pub fn convertTentativeDefinitions(self: *Object, macho_file: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + const gpa = macho_file.base.allocator; + + for (self.symbols.items, 0..) |index, i| { + const sym = macho_file.getSymbol(index); + if (!sym.flags.tentative) continue; + const sym_file = sym.getFile(macho_file).?; + if (sym_file.getIndex() != self.index) continue; + + const nlist_idx = @as(Symbol.Index, @intCast(i)); + const nlist = &self.symtab.items(.nlist)[nlist_idx]; + const nlist_atom = &self.symtab.items(.atom)[nlist_idx]; + + const atom_index = try macho_file.addAtom(); + try self.atoms.append(gpa, atom_index); + + const name = try std.fmt.allocPrintZ(gpa, "__DATA$__common${s}", .{sym.getName(macho_file)}); + defer gpa.free(name); + const atom = macho_file.getAtom(atom_index).?; + atom.atom_index = atom_index; + atom.name = try macho_file.string_intern.insert(gpa, name); + atom.file = self.index; + atom.size = nlist.n_value; + atom.alignment = (nlist.n_desc >> 8) & 0x0f; + + const n_sect = try self.addSection(gpa, "__DATA", "__common"); + const sect = &self.sections.items(.header)[n_sect]; + sect.flags = macho.S_ZEROFILL; + sect.size = atom.size; + sect.@"align" = atom.alignment; + atom.n_sect = n_sect; + + sym.value = 0; + sym.atom = atom_index; + sym.flags.weak = false; + sym.flags.weak_ref = false; + sym.flags.tentative = false; + sym.visibility = .global; + + nlist.n_value = 0; + nlist.n_type = macho.N_EXT | macho.N_SECT; + nlist.n_sect = 0; + nlist.n_desc = 0; + nlist_atom.* = atom_index; + } +} + +fn addSection(self: *Object, allocator: Allocator, segname: []const u8, sectname: []const u8) !u32 { + const n_sect = @as(u32, @intCast(try self.sections.addOne(allocator))); + self.sections.set(n_sect, .{ + .header = .{ + .sectname = MachO.makeStaticString(sectname), + .segname = MachO.makeStaticString(segname), + }, + }); + return n_sect; +} + +pub fn calcSymtabSize(self: *Object, macho_file: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + + for (self.symbols.items) |sym_index| { + const sym = macho_file.getSymbol(sym_index); + const file = sym.getFile(macho_file) orelse continue; + if (file.getIndex() != self.index) continue; + if (sym.getAtom(macho_file)) |atom| if (!atom.flags.alive) continue; + if (sym.isSymbolStab(macho_file)) continue; + const name = sym.getName(macho_file); + // TODO in -r mode, we actually want to merge symbol names and emit only one + // work it out when emitting relocs + if (name.len > 0 and (name[0] == 'L' or name[0] == 'l') and !macho_file.options.relocatable) continue; + sym.flags.output_symtab = true; + if (sym.isLocal()) { + try sym.addExtra(.{ .symtab = self.output_symtab_ctx.nlocals }, macho_file); + self.output_symtab_ctx.nlocals += 1; + } else if (sym.flags.@"export") { + try sym.addExtra(.{ .symtab = self.output_symtab_ctx.nexports }, macho_file); + self.output_symtab_ctx.nexports += 1; + } else { + assert(sym.flags.import); + try sym.addExtra(.{ .symtab = self.output_symtab_ctx.nimports }, macho_file); + self.output_symtab_ctx.nimports += 1; + } + self.output_symtab_ctx.strsize += @as(u32, @intCast(sym.getName(macho_file).len + 1)); + } + + if (!macho_file.options.strip and self.hasDebugInfo()) self.calcStabsSize(macho_file); +} + +pub fn calcStabsSize(self: *Object, macho_file: *MachO) void { + if (self.dwarf_info) |dw| { + // TODO handle multiple CUs + const cu = dw.compile_units.items[0]; + const comp_dir = cu.getCompileDir(dw) orelse return; + const tu_name = cu.getSourceFile(dw) orelse return; + + self.output_symtab_ctx.nstabs += 4; // N_SO, N_SO, N_OSO, N_SO + self.output_symtab_ctx.strsize += @as(u32, @intCast(comp_dir.len + 1)); // comp_dir + self.output_symtab_ctx.strsize += @as(u32, @intCast(tu_name.len + 1)); // tu_name + + if (self.archive) |path| { + self.output_symtab_ctx.strsize += @as(u32, @intCast(path.len + 1 + self.path.len + 1 + 1)); + } else { + self.output_symtab_ctx.strsize += @as(u32, @intCast(self.path.len + 1)); + } + + for (self.symbols.items) |sym_index| { + const sym = macho_file.getSymbol(sym_index); + const file = sym.getFile(macho_file) orelse continue; + if (file.getIndex() != self.index) continue; + if (!sym.flags.output_symtab) continue; + if (macho_file.options.relocatable) { + const name = sym.getName(macho_file); + if (name.len > 0 and (name[0] == 'L' or name[0] == 'l')) continue; + } + const sect = macho_file.sections.items(.header)[sym.out_n_sect]; + if (sect.isCode()) { + self.output_symtab_ctx.nstabs += 4; // N_BNSYM, N_FUN, N_FUN, N_ENSYM + } else if (sym.visibility == .global) { + self.output_symtab_ctx.nstabs += 1; // N_GSYM + } else { + self.output_symtab_ctx.nstabs += 1; // N_STSYM + } + } + } else { + assert(self.hasSymbolStabs()); + + for (self.stab_files.items) |sf| { + self.output_symtab_ctx.nstabs += 4; // N_SO, N_SO, N_OSO, N_SO + self.output_symtab_ctx.strsize += @as(u32, @intCast(sf.getCompDir(self).len + 1)); // comp_dir + self.output_symtab_ctx.strsize += @as(u32, @intCast(sf.getTuName(self).len + 1)); // tu_name + self.output_symtab_ctx.strsize += @as(u32, @intCast(sf.getOsoPath(self).len + 1)); // path + + for (sf.stabs.items) |stab| { + const sym = stab.getSymbol(macho_file) orelse continue; + const file = sym.getFile(macho_file).?; + if (file.getIndex() != self.index) continue; + if (!sym.flags.output_symtab) continue; + const nstabs: u32 = switch (stab.tag) { + .func => 4, // N_BNSYM, N_FUN, N_FUN, N_ENSYM + .global => 1, // N_GSYM + .static => 1, // N_STSYM + }; + self.output_symtab_ctx.nstabs += nstabs; + } + } + } +} + +pub fn writeSymtab(self: Object, macho_file: *MachO) void { + const tracy = trace(@src()); + defer tracy.end(); + + for (self.symbols.items) |sym_index| { + const sym = macho_file.getSymbol(sym_index); + const file = sym.getFile(macho_file) orelse continue; + if (file.getIndex() != self.index) continue; + const idx = sym.getOutputSymtabIndex(macho_file) orelse continue; + const n_strx = @as(u32, @intCast(macho_file.strtab.items.len)); + macho_file.strtab.appendSliceAssumeCapacity(sym.getName(macho_file)); + macho_file.strtab.appendAssumeCapacity(0); + const out_sym = &macho_file.symtab.items[idx]; + out_sym.n_strx = n_strx; + sym.setOutputSym(macho_file, out_sym); + } + + if (!macho_file.options.strip and self.hasDebugInfo()) self.writeStabs(macho_file); +} + +pub fn writeStabs(self: *const Object, macho_file: *MachO) void { + const writeFuncStab = struct { + inline fn writeFuncStab( + n_strx: u32, + n_sect: u8, + n_value: u64, + size: u64, + index: u32, + ctx: *MachO, + ) void { + ctx.symtab.items[index] = .{ + .n_strx = 0, + .n_type = macho.N_BNSYM, + .n_sect = n_sect, + .n_desc = 0, + .n_value = n_value, + }; + ctx.symtab.items[index + 1] = .{ + .n_strx = n_strx, + .n_type = macho.N_FUN, + .n_sect = n_sect, + .n_desc = 0, + .n_value = n_value, + }; + ctx.symtab.items[index + 2] = .{ + .n_strx = 0, + .n_type = macho.N_FUN, + .n_sect = 0, + .n_desc = 0, + .n_value = size, + }; + ctx.symtab.items[index + 3] = .{ + .n_strx = 0, + .n_type = macho.N_ENSYM, + .n_sect = n_sect, + .n_desc = 0, + .n_value = size, + }; + } + }.writeFuncStab; + + var index = self.output_symtab_ctx.istab; + + if (self.dwarf_info) |dw| { + // TODO handle multiple CUs + const cu = dw.compile_units.items[0]; + const comp_dir = cu.getCompileDir(dw) orelse return; + const tu_name = cu.getSourceFile(dw) orelse return; + + // Open scope + // N_SO comp_dir + var n_strx = @as(u32, @intCast(macho_file.strtab.items.len)); + macho_file.strtab.appendSliceAssumeCapacity(comp_dir); + macho_file.strtab.appendAssumeCapacity(0); + macho_file.symtab.items[index] = .{ + .n_strx = n_strx, + .n_type = macho.N_SO, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }; + index += 1; + // N_SO tu_name + n_strx = @as(u32, @intCast(macho_file.strtab.items.len)); + macho_file.strtab.appendSliceAssumeCapacity(tu_name); + macho_file.strtab.appendAssumeCapacity(0); + macho_file.symtab.items[index] = .{ + .n_strx = n_strx, + .n_type = macho.N_SO, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }; + index += 1; + // N_OSO path + n_strx = @as(u32, @intCast(macho_file.strtab.items.len)); + if (self.archive) |path| { + macho_file.strtab.appendSliceAssumeCapacity(path); + macho_file.strtab.appendAssumeCapacity('('); + macho_file.strtab.appendSliceAssumeCapacity(self.path); + macho_file.strtab.appendAssumeCapacity(')'); + macho_file.strtab.appendAssumeCapacity(0); + } else { + macho_file.strtab.appendSliceAssumeCapacity(self.path); + macho_file.strtab.appendAssumeCapacity(0); + } + macho_file.symtab.items[index] = .{ + .n_strx = n_strx, + .n_type = macho.N_OSO, + .n_sect = 0, + .n_desc = 1, + .n_value = self.mtime, + }; + index += 1; + + for (self.symbols.items) |sym_index| { + const sym = macho_file.getSymbol(sym_index); + const file = sym.getFile(macho_file) orelse continue; + if (file.getIndex() != self.index) continue; + if (!sym.flags.output_symtab) continue; + if (macho_file.options.relocatable) { + const name = sym.getName(macho_file); + if (name.len > 0 and (name[0] == 'L' or name[0] == 'l')) continue; + } + const sect = macho_file.sections.items(.header)[sym.out_n_sect]; + const sym_n_strx = n_strx: { + const symtab_index = sym.getOutputSymtabIndex(macho_file).?; + const osym = macho_file.symtab.items[symtab_index]; + break :n_strx osym.n_strx; + }; + const sym_n_sect: u8 = if (!sym.flags.abs) @intCast(sym.out_n_sect + 1) else 0; + const sym_n_value = sym.getAddress(.{}, macho_file); + const sym_size = sym.getSize(macho_file); + if (sect.isCode()) { + writeFuncStab(sym_n_strx, sym_n_sect, sym_n_value, sym_size, index, macho_file); + index += 4; + } else if (sym.visibility == .global) { + macho_file.symtab.items[index] = .{ + .n_strx = sym_n_strx, + .n_type = macho.N_GSYM, + .n_sect = sym_n_sect, + .n_desc = 0, + .n_value = 0, + }; + index += 1; + } else { + macho_file.symtab.items[index] = .{ + .n_strx = sym_n_strx, + .n_type = macho.N_STSYM, + .n_sect = sym_n_sect, + .n_desc = 0, + .n_value = sym_n_value, + }; + index += 1; + } + } + + // Close scope + // N_SO + macho_file.symtab.items[index] = .{ + .n_strx = 0, + .n_type = macho.N_SO, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }; + } else { + assert(self.hasSymbolStabs()); + + for (self.stab_files.items) |sf| { + // Open scope + // N_SO comp_dir + var n_strx = @as(u32, @intCast(macho_file.strtab.items.len)); + macho_file.strtab.appendSliceAssumeCapacity(sf.getCompDir(self)); + macho_file.strtab.appendAssumeCapacity(0); + macho_file.symtab.items[index] = .{ + .n_strx = n_strx, + .n_type = macho.N_SO, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }; + index += 1; + // N_SO tu_name + n_strx = @as(u32, @intCast(macho_file.strtab.items.len)); + macho_file.strtab.appendSliceAssumeCapacity(sf.getTuName(self)); + macho_file.strtab.appendAssumeCapacity(0); + macho_file.symtab.items[index] = .{ + .n_strx = n_strx, + .n_type = macho.N_SO, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }; + index += 1; + // N_OSO path + n_strx = @as(u32, @intCast(macho_file.strtab.items.len)); + macho_file.strtab.appendSliceAssumeCapacity(sf.getOsoPath(self)); + macho_file.strtab.appendAssumeCapacity(0); + macho_file.symtab.items[index] = .{ + .n_strx = n_strx, + .n_type = macho.N_OSO, + .n_sect = 0, + .n_desc = 1, + .n_value = sf.getOsoModTime(self), + }; + index += 1; + + for (sf.stabs.items) |stab| { + const sym = stab.getSymbol(macho_file) orelse continue; + const file = sym.getFile(macho_file).?; + if (file.getIndex() != self.index) continue; + if (!sym.flags.output_symtab) continue; + const sym_n_strx = n_strx: { + const symtab_index = sym.getOutputSymtabIndex(macho_file).?; + const osym = macho_file.symtab.items[symtab_index]; + break :n_strx osym.n_strx; + }; + const sym_n_sect: u8 = if (!sym.flags.abs) @intCast(sym.out_n_sect + 1) else 0; + const sym_n_value = sym.getAddress(.{}, macho_file); + const sym_size = sym.getSize(macho_file); + switch (stab.tag) { + .func => { + writeFuncStab(sym_n_strx, sym_n_sect, sym_n_value, sym_size, index, macho_file); + index += 4; + }, + .global => { + macho_file.symtab.items[index] = .{ + .n_strx = sym_n_strx, + .n_type = macho.N_GSYM, + .n_sect = sym_n_sect, + .n_desc = 0, + .n_value = 0, + }; + index += 1; + }, + .static => { + macho_file.symtab.items[index] = .{ + .n_strx = sym_n_strx, + .n_type = macho.N_STSYM, + .n_sect = sym_n_sect, + .n_desc = 0, + .n_value = sym_n_value, + }; + index += 1; + }, + } + } + + // Close scope + // N_SO + macho_file.symtab.items[index] = .{ + .n_strx = 0, + .n_type = macho.N_SO, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }; + index += 1; + } + } +} + +fn getLoadCommand(self: Object, lc: macho.LC) ?LoadCommandIterator.LoadCommand { + var it = LoadCommandIterator{ + .ncmds = self.header.?.ncmds, + .buffer = self.data[@sizeOf(macho.mach_header_64)..][0..self.header.?.sizeofcmds], + }; + while (it.next()) |cmd| { + if (cmd.cmd() == lc) return cmd; } else return null; } -pub fn getSectionContents(self: Object, sect: macho.section_64) []const u8 { - const size = @as(usize, @intCast(sect.size)); - return self.contents[sect.offset..][0..size]; +pub fn getSectionData(self: *const Object, index: u32) []const u8 { + const slice = self.sections.slice(); + assert(index < slice.items(.header).len); + const sect = slice.items(.header)[index]; + return self.data[sect.offset..][0..sect.size]; } -pub fn getSectionAliasSymbolIndex(self: Object, sect_id: u8) u32 { - const start = @as(u32, @intCast(self.in_symtab.?.len)); - return start + sect_id; +fn getString(self: Object, off: u32) [:0]const u8 { + assert(off < self.strtab.len); + return mem.sliceTo(@as([*:0]const u8, @ptrCast(self.strtab.ptr + off)), 0); } -pub fn getSectionAliasSymbol(self: *Object, sect_id: u8) macho.nlist_64 { - return self.symtab[self.getSectionAliasSymbolIndex(sect_id)]; +/// TODO handle multiple CUs +pub fn hasDebugInfo(self: Object) bool { + if (self.dwarf_info) |dw| { + return dw.compile_units.items.len > 0; + } + return self.hasSymbolStabs(); } -pub fn getSectionAliasSymbolPtr(self: *Object, sect_id: u8) *macho.nlist_64 { - return &self.symtab[self.getSectionAliasSymbolIndex(sect_id)]; +fn hasSymbolStabs(self: Object) bool { + return self.stab_files.items.len > 0; } -fn getSourceRelocs(self: Object, sect: macho.section_64) ?[]align(1) const macho.relocation_info { - if (sect.nreloc == 0) return null; - return @as([*]align(1) const macho.relocation_info, @ptrCast(self.contents.ptr + sect.reloff))[0..sect.nreloc]; +pub fn hasObjc(self: Object) bool { + for (self.symtab.items(.nlist)) |nlist| { + const name = self.getString(nlist.n_strx); + if (mem.startsWith(u8, name, "_OBJC_CLASS_$_")) return true; + } + for (self.sections.items(.header)) |sect| { + if (mem.eql(u8, sect.segName(), "__DATA") and mem.eql(u8, sect.sectName(), "__objc_catlist")) return true; + if (mem.eql(u8, sect.segName(), "__TEXT") and mem.eql(u8, sect.sectName(), "__swift")) return true; + } + return false; } -pub fn getRelocs(self: Object, sect_id: u8) []const macho.relocation_info { - const sect = self.getSourceSection(sect_id); - const start = self.section_relocs_lookup.items[sect_id]; - const len = sect.nreloc; - return self.relocations.items[start..][0..len]; +pub fn getDataInCode(self: Object) []align(1) const macho.data_in_code_entry { + const lc = self.getLoadCommand(.DATA_IN_CODE) orelse return &[0]macho.data_in_code_entry{}; + const cmd = lc.cast(macho.linkedit_data_command).?; + const ndice = @divExact(cmd.datasize, @sizeOf(macho.data_in_code_entry)); + const dice = @as( + [*]align(1) const macho.data_in_code_entry, + @ptrCast(self.data.ptr + cmd.dataoff), + )[0..ndice]; + return dice; } -pub fn getSymbolName(self: Object, index: u32) []const u8 { - const strtab = self.in_strtab.?; - const sym = self.symtab[index]; +pub inline fn hasSubsections(self: Object) bool { + return self.header.?.flags & macho.MH_SUBSECTIONS_VIA_SYMBOLS != 0; +} - if (self.getSourceSymbol(index) == null) { - assert(sym.n_strx == 0); - return ""; +pub fn asFile(self: *Object) File { + return .{ .object = self }; +} + +pub fn format( + self: *Object, + comptime unused_fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, +) !void { + _ = self; + _ = unused_fmt_string; + _ = options; + _ = writer; + @compileError("do not format objects directly"); +} + +const FormatContext = struct { + object: *Object, + macho_file: *MachO, +}; + +pub fn fmtAtoms(self: *Object, macho_file: *MachO) std.fmt.Formatter(formatAtoms) { + return .{ .data = .{ + .object = self, + .macho_file = macho_file, + } }; +} + +fn formatAtoms( + ctx: FormatContext, + comptime unused_fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, +) !void { + _ = unused_fmt_string; + _ = options; + const object = ctx.object; + try writer.writeAll(" atoms\n"); + for (object.atoms.items) |atom_index| { + const atom = ctx.macho_file.getAtom(atom_index).?; + try writer.print(" {}\n", .{atom.fmt(ctx.macho_file)}); + } +} + +pub fn fmtCies(self: *Object, macho_file: *MachO) std.fmt.Formatter(formatCies) { + return .{ .data = .{ + .object = self, + .macho_file = macho_file, + } }; +} + +fn formatCies( + ctx: FormatContext, + comptime unused_fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, +) !void { + _ = unused_fmt_string; + _ = options; + const object = ctx.object; + try writer.writeAll(" cies\n"); + for (object.cies.items, 0..) |cie, i| { + try writer.print(" cie({d}) : {}\n", .{ i, cie.fmt(ctx.macho_file) }); + } +} + +pub fn fmtFdes(self: *Object, macho_file: *MachO) std.fmt.Formatter(formatFdes) { + return .{ .data = .{ + .object = self, + .macho_file = macho_file, + } }; +} + +fn formatFdes( + ctx: FormatContext, + comptime unused_fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, +) !void { + _ = unused_fmt_string; + _ = options; + const object = ctx.object; + try writer.writeAll(" fdes\n"); + for (object.fdes.items, 0..) |fde, i| { + try writer.print(" fde({d}) : {}\n", .{ i, fde.fmt(ctx.macho_file) }); + } +} + +pub fn fmtUnwindRecords(self: *Object, macho_file: *MachO) std.fmt.Formatter(formatUnwindRecords) { + return .{ .data = .{ + .object = self, + .macho_file = macho_file, + } }; +} + +fn formatUnwindRecords( + ctx: FormatContext, + comptime unused_fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, +) !void { + _ = unused_fmt_string; + _ = options; + const object = ctx.object; + const macho_file = ctx.macho_file; + try writer.writeAll(" unwind records\n"); + for (object.unwind_records.items) |rec| { + try writer.print(" rec({d}) : {}\n", .{ rec, macho_file.getUnwindRecord(rec).fmt(macho_file) }); + } +} + +pub fn fmtSymtab(self: *Object, macho_file: *MachO) std.fmt.Formatter(formatSymtab) { + return .{ .data = .{ + .object = self, + .macho_file = macho_file, + } }; +} + +fn formatSymtab( + ctx: FormatContext, + comptime unused_fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, +) !void { + _ = unused_fmt_string; + _ = options; + const object = ctx.object; + try writer.writeAll(" symbols\n"); + for (object.symbols.items) |index| { + const sym = ctx.macho_file.getSymbol(index); + try writer.print(" {}\n", .{sym.fmt(ctx.macho_file)}); + } +} + +pub fn fmtPath(self: Object) std.fmt.Formatter(formatPath) { + return .{ .data = self }; +} + +fn formatPath( + object: Object, + comptime unused_fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, +) !void { + _ = unused_fmt_string; + _ = options; + if (object.archive) |path| { + try writer.writeAll(path); + try writer.writeByte('('); + try writer.writeAll(object.path); + try writer.writeByte(')'); + } else try writer.writeAll(object.path); +} + +const Section = struct { + header: macho.section_64, + subsections: std.ArrayListUnmanaged(Subsection) = .{}, + relocs: std.ArrayListUnmanaged(Relocation) = .{}, +}; + +const Subsection = struct { + atom: Atom.Index, + off: u64, +}; + +const Nlist = struct { + nlist: macho.nlist_64, + size: u64, + atom: Atom.Index, +}; + +const StabFile = struct { + comp_dir: u32, + stabs: std.ArrayListUnmanaged(Stab) = .{}, + + fn getCompDir(sf: StabFile, object: *const Object) [:0]const u8 { + const nlist = object.symtab.items(.nlist)[sf.comp_dir]; + return object.getString(nlist.n_strx); } - const start = sym.n_strx; - const len = self.strtab_lookup[index]; + fn getTuName(sf: StabFile, object: *const Object) [:0]const u8 { + const nlist = object.symtab.items(.nlist)[sf.comp_dir + 1]; + return object.getString(nlist.n_strx); + } - return strtab[start..][0 .. len - 1 :0]; -} + fn getOsoPath(sf: StabFile, object: *const Object) [:0]const u8 { + const nlist = object.symtab.items(.nlist)[sf.comp_dir + 2]; + return object.getString(nlist.n_strx); + } -fn getSymbolAliases(self: Object, index: u32) Entry { - const addr = self.source_address_lookup[index]; - var start = index; - while (start > 0 and - self.source_address_lookup[start - 1] == addr) : (start -= 1) - {} - const end: u32 = for (self.source_address_lookup[start..], start..) |saddr, i| { - if (saddr != addr) break @as(u32, @intCast(i)); - } else @as(u32, @intCast(self.source_address_lookup.len)); - return .{ .start = start, .len = end - start }; -} + fn getOsoModTime(sf: StabFile, object: *const Object) u64 { + const nlist = object.symtab.items(.nlist)[sf.comp_dir + 2]; + return nlist.n_value; + } -pub fn getSymbolByAddress(self: Object, addr: u64, sect_hint: ?u8) u32 { - // Find containing atom - const Predicate = struct { - addr: i64, + const Stab = struct { + tag: enum { func, global, static } = .func, + symbol: ?Symbol.Index = null, - pub fn predicate(pred: @This(), other: i64) bool { - return if (other == -1) true else other > pred.addr; + fn getSymbol(stab: Stab, macho_file: *MachO) ?*Symbol { + return if (stab.symbol) |s| macho_file.getSymbol(s) else null; } }; +}; - if (sect_hint) |sect_id| { - if (self.source_section_index_lookup[sect_id].len > 0) { - const lookup = self.source_section_index_lookup[sect_id]; - const target_sym_index = MachO.lsearch( - i64, - self.source_address_lookup[lookup.start..][0..lookup.len], - Predicate{ .addr = @as(i64, @intCast(addr)) }, - ); - if (target_sym_index > 0) { - // Hone in on the most senior alias of the target symbol. - // See SymbolAtIndex.lessThan for more context. - const aliases = self.getSymbolAliases(@intCast(lookup.start + target_sym_index - 1)); - return aliases.start; - } +const x86_64 = struct { + fn parseRelocs( + self: *const Object, + n_sect: u8, + sect: macho.section_64, + out: *std.ArrayListUnmanaged(Relocation), + macho_file: *MachO, + ) !void { + const gpa = macho_file.base.allocator; + + const relocs = @as( + [*]align(1) const macho.relocation_info, + @ptrCast(self.data.ptr + sect.reloff), + )[0..sect.nreloc]; + const code = self.getSectionData(@intCast(n_sect)); + + try out.ensureTotalCapacityPrecise(gpa, relocs.len); + + var i: usize = 0; + while (i < relocs.len) : (i += 1) { + const rel = relocs[i]; + const rel_type: macho.reloc_type_x86_64 = @enumFromInt(rel.r_type); + const rel_offset = @as(u32, @intCast(rel.r_address)); + + var addend = switch (rel.r_length) { + 0 => code[rel_offset], + 1 => mem.readInt(i16, code[rel_offset..][0..2], .little), + 2 => mem.readInt(i32, code[rel_offset..][0..4], .little), + 3 => mem.readInt(i64, code[rel_offset..][0..8], .little), + }; + addend += switch (@as(macho.reloc_type_x86_64, @enumFromInt(rel.r_type))) { + .X86_64_RELOC_SIGNED_1 => 1, + .X86_64_RELOC_SIGNED_2 => 2, + .X86_64_RELOC_SIGNED_4 => 4, + else => 0, + }; + + const target = if (rel.r_extern == 0) blk: { + const nsect = rel.r_symbolnum - 1; + const taddr: i64 = if (rel.r_pcrel == 1) + @as(i64, @intCast(sect.addr)) + rel.r_address + addend + 4 + else + addend; + const target = self.findAtomInSection(@intCast(taddr), @intCast(nsect)) orelse { + macho_file.base.fatal("{}: {s},{s}: 0x{x}: bad relocation", .{ + self.fmtPath(), sect.segName(), sect.sectName(), rel.r_address, + }); + return error.ParseFailed; + }; + addend = taddr - @as(i64, @intCast(macho_file.getAtom(target).?.getInputAddress(macho_file))); + break :blk target; + } else self.symbols.items[rel.r_symbolnum]; + + const has_subtractor = if (i > 0 and + @as(macho.reloc_type_x86_64, @enumFromInt(relocs[i - 1].r_type)) == .X86_64_RELOC_SUBTRACTOR) + blk: { + if (rel_type != .X86_64_RELOC_UNSIGNED) { + macho_file.base.fatal("{}: {s},{s}: 0x{x}: X86_64_RELOC_SUBTRACTOR followed by {s}", .{ + self.fmtPath(), sect.segName(), sect.sectName(), rel_offset, @tagName(rel_type), + }); + return error.ParseFailed; + } + break :blk true; + } else false; + + const @"type": Relocation.Type = validateRelocType(rel, rel_type) catch |err| { + switch (err) { + error.Pcrel => macho_file.base.fatal( + "{}: {s},{s}: 0x{x}: PC-relative {s} relocation", + .{ self.fmtPath(), sect.segName(), sect.sectName(), rel_offset, @tagName(rel_type) }, + ), + error.NonPcrel => macho_file.base.fatal( + "{}: {s},{s}: 0x{x}: non-PC-relative {s} relocation", + .{ self.fmtPath(), sect.segName(), sect.sectName(), rel_offset, @tagName(rel_type) }, + ), + error.InvalidLength => macho_file.base.fatal( + "{}: {s},{s}: 0x{x}: invalid length of {d} in {s} relocation", + .{ self.fmtPath(), sect.segName(), sect.sectName(), rel_offset, @as(u8, 1) << rel.r_length, @tagName(rel_type) }, + ), + error.NonExtern => macho_file.base.fatal( + "{}: {s},{s}: 0x{x}: non-extern target in {s} relocation", + .{ self.fmtPath(), sect.segName(), sect.sectName(), rel_offset, @tagName(rel_type) }, + ), + } + return error.ParseFailed; + }; + + out.appendAssumeCapacity(.{ + .tag = if (rel.r_extern == 1) .@"extern" else .local, + .offset = @as(u32, @intCast(rel.r_address)), + .target = target, + .addend = addend, + .type = @"type", + .meta = .{ + .pcrel = rel.r_pcrel == 1, + .has_subtractor = has_subtractor, + .length = rel.r_length, + .symbolnum = rel.r_symbolnum, + }, + }); } - return self.getSectionAliasSymbolIndex(sect_id); } - const target_sym_index = MachO.lsearch(i64, self.source_address_lookup, Predicate{ - .addr = @as(i64, @intCast(addr)), - }); - assert(target_sym_index > 0); - return @as(u32, @intCast(target_sym_index - 1)); -} + fn validateRelocType(rel: macho.relocation_info, rel_type: macho.reloc_type_x86_64) !Relocation.Type { + switch (rel_type) { + .X86_64_RELOC_UNSIGNED => { + if (rel.r_pcrel == 1) return error.Pcrel; + if (rel.r_length != 2 and rel.r_length != 3) return error.InvalidLength; + return .unsigned; + }, -pub fn getGlobal(self: Object, sym_index: u32) ?u32 { - if (self.globals_lookup[sym_index] == -1) return null; - return @as(u32, @intCast(self.globals_lookup[sym_index])); -} + .X86_64_RELOC_SUBTRACTOR => { + if (rel.r_pcrel == 1) return error.Pcrel; + return .subtractor; + }, -pub fn getAtomIndexForSymbol(self: Object, sym_index: u32) ?Atom.Index { - return self.atom_by_index_table[sym_index]; -} + .X86_64_RELOC_BRANCH, + .X86_64_RELOC_GOT_LOAD, + .X86_64_RELOC_GOT, + .X86_64_RELOC_TLV, + => { + if (rel.r_pcrel == 0) return error.NonPcrel; + if (rel.r_length != 2) return error.InvalidLength; + if (rel.r_extern == 0) return error.NonExtern; + return switch (rel_type) { + .X86_64_RELOC_BRANCH => .branch, + .X86_64_RELOC_GOT_LOAD => .got_load, + .X86_64_RELOC_GOT => .got, + .X86_64_RELOC_TLV => .tlv, + else => unreachable, + }; + }, -pub fn hasUnwindRecords(self: Object) bool { - return self.unwind_info_sect_id != null; -} + .X86_64_RELOC_SIGNED, + .X86_64_RELOC_SIGNED_1, + .X86_64_RELOC_SIGNED_2, + .X86_64_RELOC_SIGNED_4, + => { + if (rel.r_pcrel == 0) return error.NonPcrel; + if (rel.r_length != 2) return error.InvalidLength; + return switch (rel_type) { + .X86_64_RELOC_SIGNED => .signed, + .X86_64_RELOC_SIGNED_1 => .signed1, + .X86_64_RELOC_SIGNED_2 => .signed2, + .X86_64_RELOC_SIGNED_4 => .signed4, + else => unreachable, + }; + }, + } + } +}; -pub fn getUnwindRecords(self: Object) []align(1) const macho.compact_unwind_entry { - const sect_id = self.unwind_info_sect_id orelse return &[0]macho.compact_unwind_entry{}; - const sect = self.getSourceSection(sect_id); - const data = self.getSectionContents(sect); - const num_entries = @divExact(data.len, @sizeOf(macho.compact_unwind_entry)); - return @as([*]align(1) const macho.compact_unwind_entry, @ptrCast(data))[0..num_entries]; -} +const aarch64 = struct { + fn parseRelocs( + self: *const Object, + n_sect: u8, + sect: macho.section_64, + out: *std.ArrayListUnmanaged(Relocation), + macho_file: *MachO, + ) !void { + const gpa = macho_file.base.allocator; -pub fn hasEhFrameRecords(self: Object) bool { - return self.eh_frame_sect_id != null; -} + const relocs = @as( + [*]align(1) const macho.relocation_info, + @ptrCast(self.data.ptr + sect.reloff), + )[0..sect.nreloc]; + const code = self.getSectionData(@intCast(n_sect)); -pub fn getEhFrameRecordsIterator(self: Object) eh_frame.Iterator { - const sect_id = self.eh_frame_sect_id orelse return .{ .data = &[0]u8{} }; - const sect = self.getSourceSection(sect_id); - const data = self.getSectionContents(sect); - return .{ .data = data }; -} + try out.ensureTotalCapacityPrecise(gpa, relocs.len); -pub fn hasDataInCode(self: Object) bool { - return self.data_in_code.items.len > 0; -} + var i: usize = 0; + while (i < relocs.len) : (i += 1) { + var rel = relocs[i]; + const rel_offset = @as(u32, @intCast(rel.r_address)); -const Object = @This(); + var addend: i64 = 0; + + switch (@as(macho.reloc_type_arm64, @enumFromInt(rel.r_type))) { + .ARM64_RELOC_ADDEND => { + addend = rel.r_symbolnum; + i += 1; + if (i >= relocs.len) { + macho_file.base.fatal("{}: {s},{s}: 0x{x}: unterminated ARM64_RELOC_ADDEND", .{ + self.fmtPath(), sect.segName(), sect.sectName(), rel_offset, + }); + return error.ParseFailed; + } + rel = relocs[i]; + switch (@as(macho.reloc_type_arm64, @enumFromInt(rel.r_type))) { + .ARM64_RELOC_PAGE21, .ARM64_RELOC_PAGEOFF12 => {}, + else => |x| { + macho_file.base.fatal( + "{}: {s},{s}: 0x{x}: ARM64_RELOC_ADDEND followed by {s}", + .{ self.fmtPath(), sect.segName(), sect.sectName(), rel_offset, @tagName(x) }, + ); + return error.ParseFailed; + }, + } + }, + .ARM64_RELOC_UNSIGNED => { + addend = switch (rel.r_length) { + 0 => code[rel_offset], + 1 => mem.readInt(i16, code[rel_offset..][0..2], .little), + 2 => mem.readInt(i32, code[rel_offset..][0..4], .little), + 3 => mem.readInt(i64, code[rel_offset..][0..8], .little), + }; + }, + else => {}, + } + + const rel_type: macho.reloc_type_arm64 = @enumFromInt(rel.r_type); + + const target = if (rel.r_extern == 0) blk: { + const nsect = rel.r_symbolnum - 1; + const taddr: i64 = if (rel.r_pcrel == 1) + @as(i64, @intCast(sect.addr)) + rel.r_address + addend + else + addend; + const target = self.findAtomInSection(@intCast(taddr), @intCast(nsect)) orelse { + macho_file.base.fatal("{}: {s},{s}: 0x{x}: bad relocation", .{ + self.fmtPath(), sect.segName(), sect.sectName(), rel.r_address, + }); + return error.ParseFailed; + }; + addend = taddr - @as(i64, @intCast(macho_file.getAtom(target).?.getInputAddress(macho_file))); + break :blk target; + } else self.symbols.items[rel.r_symbolnum]; + + const has_subtractor = if (i > 0 and + @as(macho.reloc_type_arm64, @enumFromInt(relocs[i - 1].r_type)) == .ARM64_RELOC_SUBTRACTOR) + blk: { + if (rel_type != .ARM64_RELOC_UNSIGNED) { + macho_file.base.fatal("{}: {s},{s}: 0x{x}: ARM64_RELOC_SUBTRACTOR followed by {s}", .{ + self.fmtPath(), sect.segName(), sect.sectName(), rel_offset, @tagName(rel_type), + }); + return error.ParseFailed; + } + break :blk true; + } else false; + + const @"type": Relocation.Type = validateRelocType(rel, rel_type) catch |err| { + switch (err) { + error.Pcrel => macho_file.base.fatal( + "{}: {s},{s}: 0x{x}: PC-relative {s} relocation", + .{ self.fmtPath(), sect.segName(), sect.sectName(), rel_offset, @tagName(rel_type) }, + ), + error.NonPcrel => macho_file.base.fatal( + "{}: {s},{s}: 0x{x}: non-PC-relative {s} relocation", + .{ self.fmtPath(), sect.segName(), sect.sectName(), rel_offset, @tagName(rel_type) }, + ), + error.InvalidLength => macho_file.base.fatal( + "{}: {s},{s}: 0x{x}: invalid length of {d} in {s} relocation", + .{ self.fmtPath(), sect.segName(), sect.sectName(), rel_offset, @as(u8, 1) << rel.r_length, @tagName(rel_type) }, + ), + error.NonExtern => macho_file.base.fatal( + "{}: {s},{s}: 0x{x}: non-extern target in {s} relocation", + .{ self.fmtPath(), sect.segName(), sect.sectName(), rel_offset, @tagName(rel_type) }, + ), + } + return error.ParseFailed; + }; + + out.appendAssumeCapacity(.{ + .tag = if (rel.r_extern == 1) .@"extern" else .local, + .offset = @as(u32, @intCast(rel.r_address)), + .target = target, + .addend = addend, + .type = @"type", + .meta = .{ + .pcrel = rel.r_pcrel == 1, + .has_subtractor = has_subtractor, + .length = rel.r_length, + .symbolnum = rel.r_symbolnum, + }, + }); + } + } + + fn validateRelocType(rel: macho.relocation_info, rel_type: macho.reloc_type_arm64) !Relocation.Type { + switch (rel_type) { + .ARM64_RELOC_UNSIGNED => { + if (rel.r_pcrel == 1) return error.Pcrel; + if (rel.r_length != 2 and rel.r_length != 3) return error.InvalidLength; + return .unsigned; + }, + + .ARM64_RELOC_SUBTRACTOR => { + if (rel.r_pcrel == 1) return error.Pcrel; + return .subtractor; + }, + + .ARM64_RELOC_BRANCH26, + .ARM64_RELOC_PAGE21, + .ARM64_RELOC_GOT_LOAD_PAGE21, + .ARM64_RELOC_TLVP_LOAD_PAGE21, + .ARM64_RELOC_POINTER_TO_GOT, + => { + if (rel.r_pcrel == 0) return error.NonPcrel; + if (rel.r_length != 2) return error.InvalidLength; + if (rel.r_extern == 0) return error.NonExtern; + return switch (rel_type) { + .ARM64_RELOC_BRANCH26 => .branch, + .ARM64_RELOC_PAGE21 => .page, + .ARM64_RELOC_GOT_LOAD_PAGE21 => .got_load_page, + .ARM64_RELOC_TLVP_LOAD_PAGE21 => .tlvp_page, + .ARM64_RELOC_POINTER_TO_GOT => .got, + else => unreachable, + }; + }, + + .ARM64_RELOC_PAGEOFF12, + .ARM64_RELOC_GOT_LOAD_PAGEOFF12, + .ARM64_RELOC_TLVP_LOAD_PAGEOFF12, + => { + if (rel.r_pcrel == 1) return error.Pcrel; + if (rel.r_length != 2) return error.InvalidLength; + if (rel.r_extern == 0) return error.NonExtern; + return switch (rel_type) { + .ARM64_RELOC_PAGEOFF12 => .pageoff, + .ARM64_RELOC_GOT_LOAD_PAGEOFF12 => .got_load_pageoff, + .ARM64_RELOC_TLVP_LOAD_PAGEOFF12 => .tlvp_pageoff, + else => unreachable, + }; + }, + + .ARM64_RELOC_ADDEND => unreachable, // We make it part of the addend field + } + } +}; -const std = @import("std"); -const build_options = @import("build_options"); const assert = std.debug.assert; -const dwarf = std.dwarf; const eh_frame = @import("eh_frame.zig"); -const fs = std.fs; -const io = std.io; const log = std.log.scoped(.link); const macho = std.macho; const math = std.math; const mem = std.mem; -const sort = std.sort; -const trace = @import("../../tracy.zig").trace; +const trace = @import("../tracy.zig").trace; +const std = @import("std"); const Allocator = mem.Allocator; const Atom = @import("Atom.zig"); +const Cie = eh_frame.Cie; const DwarfInfo = @import("DwarfInfo.zig"); +const Fde = eh_frame.Fde; +const File = @import("file.zig").File; const LoadCommandIterator = macho.LoadCommandIterator; const MachO = @import("../MachO.zig"); -const Platform = @import("load_commands.zig").Platform; -const SymbolWithLoc = MachO.SymbolWithLoc; +const Object = @This(); +const Relocation = @import("Relocation.zig"); +const StringTable = @import("../strtab.zig").StringTable; +const Symbol = @import("Symbol.zig"); const UnwindInfo = @import("UnwindInfo.zig"); -const Alignment = Atom.Alignment; diff --git a/src/link/MachO/Relocation.zig b/src/link/MachO/Relocation.zig index 85c19c7608..f77e0c8792 100644 --- a/src/link/MachO/Relocation.zig +++ b/src/link/MachO/Relocation.zig @@ -1,235 +1,62 @@ -//! Relocation used by the self-hosted backends to instruct the linker where and how to -//! fixup the values when flushing the contents to file and/or memory. - -type: Type, -target: SymbolWithLoc, +tag: enum { @"extern", local }, offset: u32, +target: u32, addend: i64, -pcrel: bool, -length: u2, -dirty: bool = true, +type: Type, +meta: packed struct { + pcrel: bool, + has_subtractor: bool, + length: u2, + symbolnum: u24, +}, -pub const Type = enum { - // x86, x86_64 - /// RIP-relative displacement to a GOT pointer - got, - /// RIP-relative displacement - signed, - /// RIP-relative displacement to a TLV thunk - tlv, - - // aarch64 - /// PC-relative distance to target page in GOT section - got_page, - /// Offset to a GOT pointer relative to the start of a page in GOT section - got_pageoff, - /// PC-relative distance to target page in a section - page, - /// Offset to a pointer relative to the start of a page in a section - pageoff, - - // common - /// PC/RIP-relative displacement B/BL/CALL - branch, - /// Absolute pointer value - unsigned, - /// Relative offset to TLV initializer - tlv_initializer, -}; - -/// Returns true if and only if the reloc can be resolved. -pub fn isResolvable(self: Relocation, macho_file: *MachO) bool { - _ = self.getTargetBaseAddress(macho_file) orelse return false; - return true; +pub fn getTargetSymbol(rel: Relocation, macho_file: *MachO) *Symbol { + assert(rel.tag == .@"extern"); + return macho_file.getSymbol(rel.target); } -pub fn isGotIndirection(self: Relocation) bool { - return switch (self.type) { - .got, .got_page, .got_pageoff => true, - else => false, +pub fn getTargetAtom(rel: Relocation, macho_file: *MachO) *Atom { + assert(rel.tag == .local); + return macho_file.getAtom(rel.target).?; +} + +pub fn getTargetAddress(rel: Relocation, macho_file: *MachO) u64 { + return switch (rel.tag) { + .local => rel.getTargetAtom(macho_file).value, + .@"extern" => rel.getTargetSymbol(macho_file).getAddress(.{}, macho_file), }; } -pub fn isStubTrampoline(self: Relocation, macho_file: *MachO) bool { - return switch (self.type) { - .branch => macho_file.getSymbol(self.target).undf(), - else => false, +pub fn getGotTargetAddress(rel: Relocation, macho_file: *MachO) u64 { + return switch (rel.tag) { + .local => 0, + .@"extern" => rel.getTargetSymbol(macho_file).getGotAddress(macho_file), }; } -pub fn getTargetBaseAddress(self: Relocation, macho_file: *MachO) ?u64 { - const target = macho_file.base.comp.root_mod.resolved_target.result; - if (self.isStubTrampoline(macho_file)) { - const index = macho_file.stub_table.lookup.get(self.target) orelse return null; - const header = macho_file.sections.items(.header)[macho_file.stubs_section_index.?]; - return header.addr + - index * @import("stubs.zig").stubSize(target.cpu.arch); - } - switch (self.type) { - .got, .got_page, .got_pageoff => { - const got_index = macho_file.got_table.lookup.get(self.target) orelse return null; - const header = macho_file.sections.items(.header)[macho_file.got_section_index.?]; - return header.addr + got_index * @sizeOf(u64); - }, - .tlv => { - const atom_index = macho_file.tlv_table.get(self.target) orelse return null; - const atom = macho_file.getAtom(atom_index); - return atom.getSymbol(macho_file).n_value; - }, - else => { - const target_atom_index = macho_file.getAtomIndexForSymbol(self.target) orelse return null; - const target_atom = macho_file.getAtom(target_atom_index); - return target_atom.getSymbol(macho_file).n_value; - }, - } -} - -pub fn resolve(self: Relocation, macho_file: *MachO, atom_index: Atom.Index, code: []u8) void { - const target = macho_file.base.comp.root_mod.resolved_target.result; - const arch = target.cpu.arch; - const atom = macho_file.getAtom(atom_index); - const source_sym = atom.getSymbol(macho_file); - const source_addr = source_sym.n_value + self.offset; - - const target_base_addr = self.getTargetBaseAddress(macho_file).?; // Oops, you didn't check if the relocation can be resolved with isResolvable(). - const target_addr: i64 = switch (self.type) { - .tlv_initializer => blk: { - assert(self.addend == 0); // Addend here makes no sense. - const header = macho_file.sections.items(.header)[macho_file.thread_data_section_index.?]; - break :blk @as(i64, @intCast(target_base_addr - header.addr)); - }, - else => @as(i64, @intCast(target_base_addr)) + self.addend, +pub fn getRelocAddend(rel: Relocation, cpu_arch: std.Target.Cpu.Arch) i64 { + const addend: i64 = switch (rel.type) { + .signed => 0, + .signed1 => -1, + .signed2 => -2, + .signed4 => -4, + else => 0, + }; + return switch (cpu_arch) { + .x86_64 => if (rel.meta.pcrel) addend - 4 else addend, + else => addend, }; - - relocs_log.debug(" ({x}: [() => 0x{x} ({s})) ({s})", .{ - source_addr, - target_addr, - macho_file.getSymbolName(self.target), - @tagName(self.type), - }); - - switch (arch) { - .aarch64 => self.resolveAarch64(source_addr, target_addr, code), - .x86_64 => self.resolveX8664(source_addr, target_addr, code), - else => unreachable, - } } -fn resolveAarch64(self: Relocation, source_addr: u64, target_addr: i64, code: []u8) void { - var buffer = code[self.offset..]; - switch (self.type) { - .branch => { - const displacement = math.cast( - i28, - @as(i64, @intCast(target_addr)) - @as(i64, @intCast(source_addr)), - ) orelse unreachable; // TODO codegen should never allow for jump larger than i28 displacement - var inst = aarch64.Instruction{ - .unconditional_branch_immediate = mem.bytesToValue(meta.TagPayload( - aarch64.Instruction, - aarch64.Instruction.unconditional_branch_immediate, - ), buffer[0..4]), - }; - inst.unconditional_branch_immediate.imm26 = @as(u26, @truncate(@as(u28, @bitCast(displacement >> 2)))); - mem.writeInt(u32, buffer[0..4], inst.toU32(), .little); - }, - .page, .got_page => { - const source_page = @as(i32, @intCast(source_addr >> 12)); - const target_page = @as(i32, @intCast(target_addr >> 12)); - const pages = @as(u21, @bitCast(@as(i21, @intCast(target_page - source_page)))); - var inst = aarch64.Instruction{ - .pc_relative_address = mem.bytesToValue(meta.TagPayload( - aarch64.Instruction, - aarch64.Instruction.pc_relative_address, - ), buffer[0..4]), - }; - inst.pc_relative_address.immhi = @as(u19, @truncate(pages >> 2)); - inst.pc_relative_address.immlo = @as(u2, @truncate(pages)); - mem.writeInt(u32, buffer[0..4], inst.toU32(), .little); - }, - .pageoff, .got_pageoff => { - const narrowed = @as(u12, @truncate(@as(u64, @intCast(target_addr)))); - if (isArithmeticOp(buffer[0..4])) { - var inst = aarch64.Instruction{ - .add_subtract_immediate = mem.bytesToValue(meta.TagPayload( - aarch64.Instruction, - aarch64.Instruction.add_subtract_immediate, - ), buffer[0..4]), - }; - inst.add_subtract_immediate.imm12 = narrowed; - mem.writeInt(u32, buffer[0..4], inst.toU32(), .little); - } else { - var inst = aarch64.Instruction{ - .load_store_register = mem.bytesToValue(meta.TagPayload( - aarch64.Instruction, - aarch64.Instruction.load_store_register, - ), buffer[0..4]), - }; - const offset: u12 = blk: { - if (inst.load_store_register.size == 0) { - if (inst.load_store_register.v == 1) { - // 128-bit SIMD is scaled by 16. - break :blk @divExact(narrowed, 16); - } - // Otherwise, 8-bit SIMD or ldrb. - break :blk narrowed; - } else { - const denom: u4 = math.powi(u4, 2, inst.load_store_register.size) catch unreachable; - break :blk @divExact(narrowed, denom); - } - }; - inst.load_store_register.offset = offset; - mem.writeInt(u32, buffer[0..4], inst.toU32(), .little); - } - }, - .tlv_initializer, .unsigned => switch (self.length) { - 2 => mem.writeInt(u32, buffer[0..4], @as(u32, @truncate(@as(u64, @bitCast(target_addr)))), .little), - 3 => mem.writeInt(u64, buffer[0..8], @as(u64, @bitCast(target_addr)), .little), - else => unreachable, - }, - .got, .signed, .tlv => unreachable, // Invalid target architecture. - } +pub fn lessThan(ctx: void, lhs: Relocation, rhs: Relocation) bool { + _ = ctx; + return lhs.offset < rhs.offset; } -fn resolveX8664(self: Relocation, source_addr: u64, target_addr: i64, code: []u8) void { - switch (self.type) { - .branch, .got, .tlv, .signed => { - const displacement = @as(i32, @intCast(@as(i64, @intCast(target_addr)) - @as(i64, @intCast(source_addr)) - 4)); - mem.writeInt(u32, code[self.offset..][0..4], @as(u32, @bitCast(displacement)), .little); - }, - .tlv_initializer, .unsigned => { - switch (self.length) { - 2 => { - mem.writeInt(u32, code[self.offset..][0..4], @as(u32, @truncate(@as(u64, @bitCast(target_addr)))), .little); - }, - 3 => { - mem.writeInt(u64, code[self.offset..][0..8], @as(u64, @bitCast(target_addr)), .little); - }, - else => unreachable, - } - }, - .got_page, .got_pageoff, .page, .pageoff => unreachable, // Invalid target architecture. - } -} - -pub inline fn isArithmeticOp(inst: *const [4]u8) bool { - const group_decode = @as(u5, @truncate(inst[3])); - return ((group_decode >> 2) == 4); -} - -pub fn calcPcRelativeDisplacementX86(source_addr: u64, target_addr: u64, correction: u3) error{Overflow}!i32 { - const disp = @as(i64, @intCast(target_addr)) - @as(i64, @intCast(source_addr + 4 + correction)); - return math.cast(i32, disp) orelse error.Overflow; -} - -pub fn calcPcRelativeDisplacementArm64(source_addr: u64, target_addr: u64) error{Overflow}!i28 { - const disp = @as(i64, @intCast(target_addr)) - @as(i64, @intCast(source_addr)); - return math.cast(i28, disp) orelse error.Overflow; -} - -pub fn calcNumberOfPages(source_addr: u64, target_addr: u64) i21 { - const source_page = @as(i32, @intCast(source_addr >> 12)); - const target_page = @as(i32, @intCast(target_addr >> 12)); - const pages = @as(i21, @intCast(target_page - source_page)); +pub fn calcNumberOfPages(saddr: u64, taddr: u64) error{Overflow}!i21 { + const spage = math.cast(i32, saddr >> 12) orelse return error.Overflow; + const tpage = math.cast(i32, taddr >> 12) orelse return error.Overflow; + const pages = math.cast(i21, tpage - spage) orelse return error.Overflow; return pages; } @@ -242,8 +69,8 @@ pub const PageOffsetInstKind = enum { load_store_128, }; -pub fn calcPageOffset(target_addr: u64, kind: PageOffsetInstKind) !u12 { - const narrowed = @as(u12, @truncate(target_addr)); +pub fn calcPageOffset(taddr: u64, kind: PageOffsetInstKind) !u12 { + const narrowed = @as(u12, @truncate(taddr)); return switch (kind) { .arithmetic, .load_store_8 => narrowed, .load_store_16 => try math.divExact(u12, narrowed, 2), @@ -253,17 +80,57 @@ pub fn calcPageOffset(target_addr: u64, kind: PageOffsetInstKind) !u12 { }; } -const Relocation = @This(); +pub inline fn isArithmeticOp(inst: *const [4]u8) bool { + const group_decode = @as(u5, @truncate(inst[3])); + return ((group_decode >> 2) == 4); +} + +pub const Type = enum { + // x86_64 + /// RIP-relative displacement (X86_64_RELOC_SIGNED) + signed, + /// RIP-relative displacement (X86_64_RELOC_SIGNED_1) + signed1, + /// RIP-relative displacement (X86_64_RELOC_SIGNED_2) + signed2, + /// RIP-relative displacement (X86_64_RELOC_SIGNED_4) + signed4, + /// RIP-relative GOT load (X86_64_RELOC_GOT_LOAD) + got_load, + /// RIP-relative TLV load (X86_64_RELOC_TLV) + tlv, + + // arm64 + /// PC-relative load (distance to page, ARM64_RELOC_PAGE21) + page, + /// Non-PC-relative offset to symbol (ARM64_RELOC_PAGEOFF12) + pageoff, + /// PC-relative GOT load (distance to page, ARM64_RELOC_GOT_LOAD_PAGE21) + got_load_page, + /// Non-PC-relative offset to GOT slot (ARM64_RELOC_GOT_LOAD_PAGEOFF12) + got_load_pageoff, + /// PC-relative TLV load (distance to page, ARM64_RELOC_TLVP_LOAD_PAGE21) + tlvp_page, + /// Non-PC-relative offset to TLV slot (ARM64_RELOC_TLVP_LOAD_PAGEOFF12) + tlvp_pageoff, + + // common + /// PC-relative call/bl/b (X86_64_RELOC_BRANCH or ARM64_RELOC_BRANCH26) + branch, + /// PC-relative displacement to GOT pointer (X86_64_RELOC_GOT or ARM64_RELOC_POINTER_TO_GOT) + got, + /// Absolute subtractor value (X86_64_RELOC_SUBTRACTOR or ARM64_RELOC_SUBTRACTOR) + subtractor, + /// Absolute relocation (X86_64_RELOC_UNSIGNED or ARM64_RELOC_UNSIGNED) + unsigned, +}; -const std = @import("std"); -const aarch64 = @import("../../arch/aarch64/bits.zig"); const assert = std.debug.assert; -const relocs_log = std.log.scoped(.link_relocs); const macho = std.macho; const math = std.math; -const mem = std.mem; -const meta = std.meta; +const std = @import("std"); const Atom = @import("Atom.zig"); const MachO = @import("../MachO.zig"); -const SymbolWithLoc = MachO.SymbolWithLoc; +const Relocation = @This(); +const Symbol = @import("Symbol.zig"); diff --git a/src/link/MachO/Symbol.zig b/src/link/MachO/Symbol.zig new file mode 100644 index 0000000000..35e53534a8 --- /dev/null +++ b/src/link/MachO/Symbol.zig @@ -0,0 +1,383 @@ +//! Represents a defined symbol. + +/// Allocated address value of this symbol. +value: u64 = 0, + +/// Offset into the linker's intern table. +name: u32 = 0, + +/// File where this symbol is defined. +file: File.Index = 0, + +/// Atom containing this symbol if any. +/// Index of 0 means there is no associated atom with this symbol. +/// Use `getAtom` to get the pointer to the atom. +atom: Atom.Index = 0, + +/// Assigned output section index for this atom. +out_n_sect: u16 = 0, + +/// Index of the source nlist this symbol references. +/// Use `getNlist` to pull the nlist from the relevant file. +nlist_idx: u32 = 0, + +/// Misc flags for the symbol packaged as packed struct for compression. +flags: Flags = .{}, + +visibility: Visibility = .local, + +extra: u32 = 0, + +pub fn isLocal(symbol: Symbol) bool { + return !(symbol.flags.import or symbol.flags.@"export"); +} + +pub fn isSymbolStab(symbol: Symbol, macho_file: *MachO) bool { + const file = symbol.getFile(macho_file) orelse return false; + return switch (file) { + .object => symbol.getNlist(macho_file).stab(), + else => false, + }; +} + +pub fn isTlvInit(symbol: Symbol, macho_file: *MachO) bool { + const name = symbol.getName(macho_file); + return std.mem.indexOf(u8, name, "$tlv$init") != null; +} + +pub fn weakRef(symbol: Symbol, macho_file: *MachO) bool { + const file = symbol.getFile(macho_file).?; + const is_dylib_weak = switch (file) { + .dylib => |x| x.weak, + else => false, + }; + return is_dylib_weak or symbol.flags.weak_ref; +} + +pub fn getName(symbol: Symbol, macho_file: *MachO) [:0]const u8 { + return macho_file.string_intern.getAssumeExists(symbol.name); +} + +pub fn getAtom(symbol: Symbol, macho_file: *MachO) ?*Atom { + return macho_file.getAtom(symbol.atom); +} + +pub fn getFile(symbol: Symbol, macho_file: *MachO) ?File { + return macho_file.getFile(symbol.file); +} + +/// Asserts file is an object. +pub fn getNlist(symbol: Symbol, macho_file: *MachO) macho.nlist_64 { + const file = symbol.getFile(macho_file).?; + return switch (file) { + .object => |x| x.symtab.items(.nlist)[symbol.nlist_idx], + else => unreachable, + }; +} + +pub fn getSize(symbol: Symbol, macho_file: *MachO) u64 { + const file = symbol.getFile(macho_file).?; + assert(file == .object); + return file.object.symtab.items(.size)[symbol.nlist_idx]; +} + +pub fn getDylibOrdinal(symbol: Symbol, macho_file: *MachO) ?u16 { + assert(symbol.flags.import); + const file = symbol.getFile(macho_file) orelse return null; + return switch (file) { + .dylib => |x| x.ordinal, + else => null, + }; +} + +pub fn getSymbolRank(symbol: Symbol, macho_file: *MachO) u32 { + const file = symbol.getFile(macho_file) orelse return std.math.maxInt(u32); + const in_archive = switch (file) { + .object => |x| !x.alive, + else => false, + }; + return file.getSymbolRank(.{ + .archive = in_archive, + .weak = symbol.flags.weak, + .tentative = symbol.flags.tentative, + }); +} + +pub fn getAddress(symbol: Symbol, opts: struct { + stubs: bool = true, +}, macho_file: *MachO) u64 { + if (opts.stubs) { + if (symbol.flags.stubs) { + return symbol.getStubsAddress(macho_file); + } else if (symbol.flags.objc_stubs) { + return symbol.getObjcStubsAddress(macho_file); + } + } + if (symbol.getAtom(macho_file)) |atom| return atom.value + symbol.value; + return symbol.value; +} + +pub fn getGotAddress(symbol: Symbol, macho_file: *MachO) u64 { + if (!symbol.flags.got) return 0; + const extra = symbol.getExtra(macho_file).?; + return macho_file.got.getAddress(extra.got, macho_file); +} + +pub fn getStubsAddress(symbol: Symbol, macho_file: *MachO) u64 { + if (!symbol.flags.stubs) return 0; + const extra = symbol.getExtra(macho_file).?; + return macho_file.stubs.getAddress(extra.stubs, macho_file); +} + +pub fn getObjcStubsAddress(symbol: Symbol, macho_file: *MachO) u64 { + if (!symbol.flags.objc_stubs) return 0; + const extra = symbol.getExtra(macho_file).?; + return macho_file.objc_stubs.getAddress(extra.objc_stubs, macho_file); +} + +pub fn getObjcSelrefsAddress(symbol: Symbol, macho_file: *MachO) u64 { + if (!symbol.flags.objc_stubs) return 0; + const extra = symbol.getExtra(macho_file).?; + const atom = macho_file.getAtom(extra.objc_selrefs).?; + assert(atom.flags.alive); + return atom.value; +} + +pub fn getTlvPtrAddress(symbol: Symbol, macho_file: *MachO) u64 { + if (!symbol.flags.tlv_ptr) return 0; + const extra = symbol.getExtra(macho_file).?; + return macho_file.tlv_ptr.getAddress(extra.tlv_ptr, macho_file); +} + +pub fn getOutputSymtabIndex(symbol: Symbol, macho_file: *MachO) ?u32 { + if (!symbol.flags.output_symtab) return null; + assert(!symbol.isSymbolStab(macho_file)); + const file = symbol.getFile(macho_file).?; + const symtab_ctx = switch (file) { + inline else => |x| x.output_symtab_ctx, + }; + var idx = symbol.getExtra(macho_file).?.symtab; + if (symbol.isLocal()) { + idx += symtab_ctx.ilocal; + } else if (symbol.flags.@"export") { + idx += symtab_ctx.iexport; + } else { + assert(symbol.flags.import); + idx += symtab_ctx.iimport; + } + return idx; +} + +const AddExtraOpts = struct { + got: ?u32 = null, + stubs: ?u32 = null, + objc_stubs: ?u32 = null, + objc_selrefs: ?u32 = null, + tlv_ptr: ?u32 = null, + symtab: ?u32 = null, +}; + +pub fn addExtra(symbol: *Symbol, opts: AddExtraOpts, macho_file: *MachO) !void { + if (symbol.getExtra(macho_file) == null) { + symbol.extra = try macho_file.addSymbolExtra(.{}); + } + var extra = symbol.getExtra(macho_file).?; + inline for (@typeInfo(@TypeOf(opts)).Struct.fields) |field| { + if (@field(opts, field.name)) |x| { + @field(extra, field.name) = x; + } + } + symbol.setExtra(extra, macho_file); +} + +pub inline fn getExtra(symbol: Symbol, macho_file: *MachO) ?Extra { + return macho_file.getSymbolExtra(symbol.extra); +} + +pub inline fn setExtra(symbol: Symbol, extra: Extra, macho_file: *MachO) void { + macho_file.setSymbolExtra(symbol.extra, extra); +} + +pub fn setOutputSym(symbol: Symbol, macho_file: *MachO, out: *macho.nlist_64) void { + if (symbol.isLocal()) { + out.n_type = if (symbol.flags.abs) macho.N_ABS else macho.N_SECT; + out.n_sect = if (symbol.flags.abs) 0 else @intCast(symbol.out_n_sect + 1); + out.n_desc = 0; + out.n_value = symbol.getAddress(.{}, macho_file); + + switch (symbol.visibility) { + .hidden => out.n_type |= macho.N_PEXT, + else => {}, + } + } else if (symbol.flags.@"export") { + assert(symbol.visibility == .global); + out.n_type = macho.N_EXT; + out.n_type |= if (symbol.flags.abs) macho.N_ABS else macho.N_SECT; + out.n_sect = if (symbol.flags.abs) 0 else @intCast(symbol.out_n_sect + 1); + out.n_value = symbol.getAddress(.{}, macho_file); + out.n_desc = 0; + + if (symbol.flags.weak) { + out.n_desc |= macho.N_WEAK_DEF; + } + if (symbol.flags.dyn_ref) { + out.n_desc |= macho.REFERENCED_DYNAMICALLY; + } + } else { + assert(symbol.visibility == .global); + out.n_type = macho.N_EXT; + out.n_sect = 0; + out.n_value = 0; + out.n_desc = 0; + + const ord: u16 = if (macho_file.options.namespace == .flat) + @as(u8, @bitCast(macho.BIND_SPECIAL_DYLIB_FLAT_LOOKUP)) + else if (symbol.getDylibOrdinal(macho_file)) |ord| + ord + else + macho.BIND_SPECIAL_DYLIB_SELF; + out.n_desc = macho.N_SYMBOL_RESOLVER * ord; + + if (symbol.flags.weak) { + out.n_desc |= macho.N_WEAK_DEF; + } + + if (symbol.weakRef(macho_file)) { + out.n_desc |= macho.N_WEAK_REF; + } + } +} + +pub fn format( + symbol: Symbol, + comptime unused_fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, +) !void { + _ = symbol; + _ = unused_fmt_string; + _ = options; + _ = writer; + @compileError("do not format symbols directly"); +} + +const FormatContext = struct { + symbol: Symbol, + macho_file: *MachO, +}; + +pub fn fmt(symbol: Symbol, macho_file: *MachO) std.fmt.Formatter(format2) { + return .{ .data = .{ + .symbol = symbol, + .macho_file = macho_file, + } }; +} + +fn format2( + ctx: FormatContext, + comptime unused_fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, +) !void { + _ = options; + _ = unused_fmt_string; + const symbol = ctx.symbol; + try writer.print("%{d} : {s} : @{x}", .{ + symbol.nlist_idx, + symbol.getName(ctx.macho_file), + symbol.getAddress(.{}, ctx.macho_file), + }); + if (symbol.getFile(ctx.macho_file)) |file| { + if (symbol.out_n_sect != 0) { + try writer.print(" : sect({d})", .{symbol.out_n_sect}); + } + if (symbol.getAtom(ctx.macho_file)) |atom| { + try writer.print(" : atom({d})", .{atom.atom_index}); + } + var buf: [2]u8 = .{'_'} ** 2; + if (symbol.flags.@"export") buf[0] = 'E'; + if (symbol.flags.import) buf[1] = 'I'; + try writer.print(" : {s}", .{&buf}); + if (symbol.flags.weak) try writer.writeAll(" : weak"); + if (symbol.isSymbolStab(ctx.macho_file)) try writer.writeAll(" : stab"); + switch (file) { + .internal => |x| try writer.print(" : internal({d})", .{x.index}), + .object => |x| try writer.print(" : object({d})", .{x.index}), + .dylib => |x| try writer.print(" : dylib({d})", .{x.index}), + } + } else try writer.writeAll(" : unresolved"); +} + +pub const Flags = packed struct { + /// Whether the symbol is imported at runtime. + import: bool = false, + + /// Whether the symbol is exported at runtime. + @"export": bool = false, + + /// Whether this symbol is weak. + weak: bool = false, + + /// Whether this symbol is weakly referenced. + weak_ref: bool = false, + + /// Whether this symbol is dynamically referenced. + dyn_ref: bool = false, + + /// Whether this symbol was marked as N_NO_DEAD_STRIP. + no_dead_strip: bool = false, + + /// Whether this symbol can be interposed at runtime. + interposable: bool = false, + + /// Whether this symbol is absolute. + abs: bool = false, + + /// Whether this symbol is a tentative definition. + tentative: bool = false, + + /// Whether this symbol is a thread-local variable. + tlv: bool = false, + + /// Whether the symbol makes into the output symtab or not. + output_symtab: bool = false, + + /// Whether the symbol contains __got indirection. + got: bool = false, + + /// Whether the symbols contains __stubs indirection. + stubs: bool = false, + + /// Whether the symbol has a TLV pointer. + tlv_ptr: bool = false, + + /// Whether the symbol contains __objc_stubs indirection. + objc_stubs: bool = false, +}; + +pub const Visibility = enum { + global, + hidden, + local, +}; + +pub const Extra = struct { + got: u32 = 0, + stubs: u32 = 0, + objc_stubs: u32 = 0, + objc_selrefs: u32 = 0, + tlv_ptr: u32 = 0, + symtab: u32 = 0, +}; + +pub const Index = u32; + +const assert = std.debug.assert; +const macho = std.macho; +const std = @import("std"); + +const Atom = @import("Atom.zig"); +const File = @import("file.zig").File; +const MachO = @import("../MachO.zig"); +const Object = @import("Object.zig"); +const Symbol = @This(); diff --git a/src/link/MachO/UnwindInfo.zig b/src/link/MachO/UnwindInfo.zig index 7223b5555f..1d0bfc1ff9 100644 --- a/src/link/MachO/UnwindInfo.zig +++ b/src/link/MachO/UnwindInfo.zig @@ -1,376 +1,122 @@ -gpa: Allocator, - /// List of all unwind records gathered from all objects and sorted -/// by source function address. -records: std.ArrayListUnmanaged(macho.compact_unwind_entry) = .{}, -records_lookup: std.AutoHashMapUnmanaged(SymbolWithLoc, RecordIndex) = .{}, +/// by allocated relative function address within the section. +records: std.ArrayListUnmanaged(Record.Index) = .{}, /// List of all personalities referenced by either unwind info entries /// or __eh_frame entries. -personalities: [max_personalities]SymbolWithLoc = undefined, +personalities: [max_personalities]Symbol.Index = undefined, personalities_count: u2 = 0, /// List of common encodings sorted in descending order with the most common first. -common_encodings: [max_common_encodings]macho.compact_unwind_encoding_t = undefined, +common_encodings: [max_common_encodings]Encoding = undefined, common_encodings_count: u7 = 0, /// List of record indexes containing an LSDA pointer. -lsdas: std.ArrayListUnmanaged(RecordIndex) = .{}, -lsdas_lookup: std.AutoHashMapUnmanaged(RecordIndex, u32) = .{}, +lsdas: std.ArrayListUnmanaged(u32) = .{}, +lsdas_lookup: std.ArrayListUnmanaged(u32) = .{}, /// List of second level pages. pages: std.ArrayListUnmanaged(Page) = .{}, -/// Upper bound (exclusive) of all the record ranges -end_boundary: u64 = 0, - -const RecordIndex = u32; - -const max_personalities = 3; -const max_common_encodings = 127; -const max_compact_encodings = 256; - -const second_level_page_bytes = 0x1000; -const second_level_page_words = second_level_page_bytes / @sizeOf(u32); - -const max_regular_second_level_entries = - (second_level_page_bytes - @sizeOf(macho.unwind_info_regular_second_level_page_header)) / - @sizeOf(macho.unwind_info_regular_second_level_entry); - -const max_compressed_second_level_entries = - (second_level_page_bytes - @sizeOf(macho.unwind_info_compressed_second_level_page_header)) / - @sizeOf(u32); - -const compressed_entry_func_offset_mask = ~@as(u24, 0); - -const Page = struct { - kind: enum { regular, compressed }, - start: RecordIndex, - count: u16, - page_encodings: [max_compact_encodings]RecordIndex = undefined, - page_encodings_count: u9 = 0, - - fn appendPageEncoding(page: *Page, record_id: RecordIndex) void { - assert(page.page_encodings_count <= max_compact_encodings); - page.page_encodings[page.page_encodings_count] = record_id; - page.page_encodings_count += 1; - } - - fn getPageEncoding( - page: *const Page, - info: *const UnwindInfo, - enc: macho.compact_unwind_encoding_t, - ) ?u8 { - comptime var index: u9 = 0; - inline while (index < max_compact_encodings) : (index += 1) { - if (index >= page.page_encodings_count) return null; - const record_id = page.page_encodings[index]; - const record = info.records.items[record_id]; - if (record.compactUnwindEncoding == enc) { - return @as(u8, @intCast(index)); - } - } - return null; - } - - fn format( - page: *const Page, - comptime unused_format_string: []const u8, - options: std.fmt.FormatOptions, - writer: anytype, - ) !void { - _ = page; - _ = unused_format_string; - _ = options; - _ = writer; - @compileError("do not format Page directly; use page.fmtDebug()"); - } - - const DumpCtx = struct { - page: *const Page, - info: *const UnwindInfo, - }; - - fn dump( - ctx: DumpCtx, - comptime unused_format_string: []const u8, - options: std.fmt.FormatOptions, - writer: anytype, - ) @TypeOf(writer).Error!void { - _ = options; - comptime assert(unused_format_string.len == 0); - try writer.writeAll("Page:\n"); - try writer.print(" kind: {s}\n", .{@tagName(ctx.page.kind)}); - try writer.print(" entries: {d} - {d}\n", .{ - ctx.page.start, - ctx.page.start + ctx.page.count, - }); - try writer.print(" encodings (count = {d})\n", .{ctx.page.page_encodings_count}); - for (ctx.page.page_encodings[0..ctx.page.page_encodings_count], 0..) |record_id, i| { - const record = ctx.info.records.items[record_id]; - const enc = record.compactUnwindEncoding; - try writer.print(" {d}: 0x{x:0>8}\n", .{ ctx.info.common_encodings_count + i, enc }); - } - } - - fn fmtDebug(page: *const Page, info: *const UnwindInfo) std.fmt.Formatter(dump) { - return .{ .data = .{ - .page = page, - .info = info, - } }; - } - - fn write(page: *const Page, info: *const UnwindInfo, writer: anytype) !void { - switch (page.kind) { - .regular => { - try writer.writeStruct(macho.unwind_info_regular_second_level_page_header{ - .entryPageOffset = @sizeOf(macho.unwind_info_regular_second_level_page_header), - .entryCount = page.count, - }); - - for (info.records.items[page.start..][0..page.count]) |record| { - try writer.writeStruct(macho.unwind_info_regular_second_level_entry{ - .functionOffset = @as(u32, @intCast(record.rangeStart)), - .encoding = record.compactUnwindEncoding, - }); - } - }, - .compressed => { - const entry_offset = @sizeOf(macho.unwind_info_compressed_second_level_page_header) + - @as(u16, @intCast(page.page_encodings_count)) * @sizeOf(u32); - try writer.writeStruct(macho.unwind_info_compressed_second_level_page_header{ - .entryPageOffset = entry_offset, - .entryCount = page.count, - .encodingsPageOffset = @sizeOf( - macho.unwind_info_compressed_second_level_page_header, - ), - .encodingsCount = page.page_encodings_count, - }); - - for (page.page_encodings[0..page.page_encodings_count]) |record_id| { - const enc = info.records.items[record_id].compactUnwindEncoding; - try writer.writeInt(u32, enc, .little); - } - - assert(page.count > 0); - const first_entry = info.records.items[page.start]; - for (info.records.items[page.start..][0..page.count]) |record| { - const enc_index = blk: { - if (info.getCommonEncoding(record.compactUnwindEncoding)) |id| { - break :blk id; - } - const ncommon = info.common_encodings_count; - break :blk ncommon + page.getPageEncoding(info, record.compactUnwindEncoding).?; - }; - const compressed = macho.UnwindInfoCompressedEntry{ - .funcOffset = @as(u24, @intCast(record.rangeStart - first_entry.rangeStart)), - .encodingIndex = @as(u8, @intCast(enc_index)), - }; - try writer.writeStruct(compressed); - } - }, - } - } -}; - -pub fn deinit(info: *UnwindInfo) void { - info.records.deinit(info.gpa); - info.records_lookup.deinit(info.gpa); - info.pages.deinit(info.gpa); - info.lsdas.deinit(info.gpa); - info.lsdas_lookup.deinit(info.gpa); +pub fn deinit(info: *UnwindInfo, allocator: Allocator) void { + info.records.deinit(allocator); + info.pages.deinit(allocator); + info.lsdas.deinit(allocator); + info.lsdas_lookup.deinit(allocator); } -pub fn scanRelocs(macho_file: *MachO) !void { - if (macho_file.unwind_info_section_index == null) return; - - const target = macho_file.base.comp.root_mod.resolved_target.result; - const cpu_arch = target.cpu.arch; - for (macho_file.objects.items, 0..) |*object, object_id| { - const unwind_records = object.getUnwindRecords(); - for (object.exec_atoms.items) |atom_index| { - var inner_syms_it = Atom.getInnerSymbolsIterator(macho_file, atom_index); - while (inner_syms_it.next()) |sym| { - const record_id = object.unwind_records_lookup.get(sym) orelse continue; - if (object.unwind_relocs_lookup[record_id].dead) continue; - const record = unwind_records[record_id]; - if (!UnwindEncoding.isDwarf(record.compactUnwindEncoding, cpu_arch)) { - if (getPersonalityFunctionReloc(macho_file, @as(u32, @intCast(object_id)), record_id)) |rel| { - // Personality function; add GOT pointer. - const reloc_target = Atom.parseRelocTarget(macho_file, .{ - .object_id = @as(u32, @intCast(object_id)), - .rel = rel, - .code = mem.asBytes(&record), - .base_offset = @as(i32, @intCast(record_id * @sizeOf(macho.compact_unwind_entry))), - }); - try macho_file.addGotEntry(reloc_target); - } - } - } - } +fn canFold(macho_file: *MachO, lhs_index: Record.Index, rhs_index: Record.Index) bool { + const cpu_arch = macho_file.options.cpu_arch.?; + const lhs = macho_file.getUnwindRecord(lhs_index); + const rhs = macho_file.getUnwindRecord(rhs_index); + if (cpu_arch == .x86_64) { + if (lhs.enc.getMode() == @intFromEnum(macho.UNWIND_X86_64_MODE.STACK_IND) or + rhs.enc.getMode() == @intFromEnum(macho.UNWIND_X86_64_MODE.STACK_IND)) return false; } + const lhs_per = lhs.personality orelse 0; + const rhs_per = rhs.personality orelse 0; + return lhs.enc.eql(rhs.enc) and + lhs_per == rhs_per and + lhs.fde == rhs.fde and + lhs.getLsdaAtom(macho_file) == null and rhs.getLsdaAtom(macho_file) == null; } -pub fn collect(info: *UnwindInfo, macho_file: *MachO) !void { - if (macho_file.unwind_info_section_index == null) return; +pub fn generate(info: *UnwindInfo, macho_file: *MachO) !void { + const gpa = macho_file.base.allocator; - const target = macho_file.base.comp.root_mod.resolved_target.result; - const cpu_arch = target.cpu.arch; + log.debug("generating unwind info", .{}); - var records = std.ArrayList(macho.compact_unwind_entry).init(info.gpa); - defer records.deinit(); - - var sym_indexes = std.ArrayList(SymbolWithLoc).init(info.gpa); - defer sym_indexes.deinit(); - - // TODO handle dead stripping - for (macho_file.objects.items, 0..) |*object, object_id| { - log.debug("collecting unwind records in {s} ({d})", .{ object.name, object_id }); - const unwind_records = object.getUnwindRecords(); - - // Contents of unwind records does not have to cover all symbol in executable section - // so we need insert them ourselves. - try records.ensureUnusedCapacity(object.exec_atoms.items.len); - try sym_indexes.ensureUnusedCapacity(object.exec_atoms.items.len); - - for (object.exec_atoms.items) |atom_index| { - var inner_syms_it = Atom.getInnerSymbolsIterator(macho_file, atom_index); - var prev_symbol: ?SymbolWithLoc = null; - while (inner_syms_it.next()) |symbol| { - var record = if (object.unwind_records_lookup.get(symbol)) |record_id| blk: { - if (object.unwind_relocs_lookup[record_id].dead) continue; - var record = unwind_records[record_id]; - - if (UnwindEncoding.isDwarf(record.compactUnwindEncoding, cpu_arch)) { - info.collectPersonalityFromDwarf(macho_file, @as(u32, @intCast(object_id)), symbol, &record); - } else { - if (getPersonalityFunctionReloc( - macho_file, - @as(u32, @intCast(object_id)), - record_id, - )) |rel| { - const reloc_target = Atom.parseRelocTarget(macho_file, .{ - .object_id = @as(u32, @intCast(object_id)), - .rel = rel, - .code = mem.asBytes(&record), - .base_offset = @as(i32, @intCast(record_id * @sizeOf(macho.compact_unwind_entry))), - }); - const personality_index = info.getPersonalityFunction(reloc_target) orelse inner: { - const personality_index = info.personalities_count; - info.personalities[personality_index] = reloc_target; - info.personalities_count += 1; - break :inner personality_index; - }; - - record.personalityFunction = personality_index + 1; - UnwindEncoding.setPersonalityIndex(&record.compactUnwindEncoding, personality_index + 1); - } - - if (getLsdaReloc(macho_file, @as(u32, @intCast(object_id)), record_id)) |rel| { - const reloc_target = Atom.parseRelocTarget(macho_file, .{ - .object_id = @as(u32, @intCast(object_id)), - .rel = rel, - .code = mem.asBytes(&record), - .base_offset = @as(i32, @intCast(record_id * @sizeOf(macho.compact_unwind_entry))), - }); - record.lsda = @as(u64, @bitCast(reloc_target)); - } - } - break :blk record; - } else blk: { - const sym = macho_file.getSymbol(symbol); - if (sym.n_desc == MachO.N_DEAD) continue; - if (prev_symbol) |prev_sym| { - const prev_addr = object.getSourceSymbol(prev_sym.sym_index).?.n_value; - const curr_addr = object.getSourceSymbol(symbol.sym_index).?.n_value; - if (prev_addr == curr_addr) continue; - } - - if (!object.hasUnwindRecords()) { - if (object.eh_frame_records_lookup.get(symbol)) |fde_offset| { - if (object.eh_frame_relocs_lookup.get(fde_offset).?.dead) continue; - var record = nullRecord(); - info.collectPersonalityFromDwarf(macho_file, @as(u32, @intCast(object_id)), symbol, &record); - switch (cpu_arch) { - .aarch64 => UnwindEncoding.setMode(&record.compactUnwindEncoding, macho.UNWIND_ARM64_MODE.DWARF), - .x86_64 => UnwindEncoding.setMode(&record.compactUnwindEncoding, macho.UNWIND_X86_64_MODE.DWARF), - else => unreachable, - } - break :blk record; - } - } - - break :blk nullRecord(); - }; - - const atom = macho_file.getAtom(atom_index); - const sym = macho_file.getSymbol(symbol); - assert(sym.n_desc != MachO.N_DEAD); - const size = if (inner_syms_it.next()) |next_sym| blk: { - // All this trouble to account for symbol aliases. - // TODO I think that remodelling the linker so that a Symbol references an Atom - // is the way to go, kinda like we do for ELF. We might also want to perhaps tag - // symbol aliases somehow so that they are excluded from everything except relocation - // resolution. - defer inner_syms_it.pos -= 1; - const curr_addr = object.getSourceSymbol(symbol.sym_index).?.n_value; - const next_addr = object.getSourceSymbol(next_sym.sym_index).?.n_value; - if (next_addr > curr_addr) break :blk next_addr - curr_addr; - break :blk macho_file.getSymbol(atom.getSymbolWithLoc()).n_value + atom.size - sym.n_value; - } else macho_file.getSymbol(atom.getSymbolWithLoc()).n_value + atom.size - sym.n_value; - record.rangeStart = sym.n_value; - record.rangeLength = @as(u32, @intCast(size)); - - try records.append(record); - try sym_indexes.append(symbol); - - prev_symbol = symbol; + // Collect all unwind records + for (macho_file.sections.items(.atoms)) |atoms| { + for (atoms.items) |atom_index| { + const atom = macho_file.getAtom(atom_index) orelse continue; + if (!atom.flags.alive) continue; + const recs = atom.getUnwindRecords(macho_file); + try info.records.ensureUnusedCapacity(gpa, recs.len); + for (recs) |rec| { + if (!macho_file.getUnwindRecord(rec).alive) continue; + info.records.appendAssumeCapacity(rec); } } } - // Record the ending boundary before folding. - assert(records.items.len > 0); - info.end_boundary = blk: { - const last_record = records.items[records.items.len - 1]; - break :blk last_record.rangeStart + last_record.rangeLength; - }; + // Encode records + for (info.records.items) |index| { + const rec = macho_file.getUnwindRecord(index); + if (rec.getFde(macho_file)) |fde| { + rec.enc.setDwarfSectionOffset(@intCast(fde.out_offset)); + } else if (rec.getPersonality(macho_file)) |_| { + const personality_index = try info.getOrPutPersonalityFunction(rec.personality.?); // TODO handle error + rec.enc.setPersonalityIndex(personality_index + 1); + } + } - // Fold records - try info.records.ensureTotalCapacity(info.gpa, records.items.len); - try info.records_lookup.ensureTotalCapacity(info.gpa, @as(u32, @intCast(sym_indexes.items.len))); + // Sort by assigned relative address within each output section + const sortFn = struct { + fn sortFn(ctx: *MachO, lhs_index: Record.Index, rhs_index: Record.Index) bool { + const lhs = ctx.getUnwindRecord(lhs_index); + const rhs = ctx.getUnwindRecord(rhs_index); + const lhsa = lhs.getAtom(ctx); + const rhsa = rhs.getAtom(ctx); + if (lhsa.out_n_sect == rhsa.out_n_sect) return lhs.getAtomAddress(ctx) < rhs.getAtomAddress(ctx); + return lhsa.out_n_sect < rhsa.out_n_sect; + } + }.sortFn; + mem.sort(Record.Index, info.records.items, macho_file, sortFn); - var maybe_prev: ?macho.compact_unwind_entry = null; - for (records.items, 0..) |record, i| { - const record_id = blk: { - if (maybe_prev) |prev| { - const is_dwarf = UnwindEncoding.isDwarf(record.compactUnwindEncoding, cpu_arch); - if (is_dwarf or - (prev.compactUnwindEncoding != record.compactUnwindEncoding) or - (prev.personalityFunction != record.personalityFunction) or - record.lsda > 0) - { - const record_id = @as(RecordIndex, @intCast(info.records.items.len)); - info.records.appendAssumeCapacity(record); - maybe_prev = record; - break :blk record_id; - } else { - break :blk @as(RecordIndex, @intCast(info.records.items.len - 1)); - } + // Fold the records + // Any adjacent two records that share encoding can be folded into one. + { + var i: usize = 0; + var j: usize = 1; + while (j < info.records.items.len) : (j += 1) { + if (canFold(macho_file, info.records.items[i], info.records.items[j])) { + const rec = macho_file.getUnwindRecord(info.records.items[i]); + rec.length += macho_file.getUnwindRecord(info.records.items[j]).length + 1; } else { - const record_id = @as(RecordIndex, @intCast(info.records.items.len)); - info.records.appendAssumeCapacity(record); - maybe_prev = record; - break :blk record_id; + i += 1; + info.records.items[i] = info.records.items[j]; } - }; - info.records_lookup.putAssumeCapacityNoClobber(sym_indexes.items[i], record_id); + } + info.records.shrinkAndFree(gpa, i + 1); + } + + for (info.records.items) |rec_index| { + const rec = macho_file.getUnwindRecord(rec_index); + const atom = rec.getAtom(macho_file); + log.debug("@{x}-{x} : {s} : rec({d}) : {}", .{ + rec.getAtomAddress(macho_file), + rec.getAtomAddress(macho_file) + rec.length, + atom.getName(macho_file), + rec_index, + rec.enc, + }); } // Calculate common encodings { const CommonEncWithCount = struct { - enc: macho.compact_unwind_encoding_t, + enc: Encoding, count: u32, fn greaterThan(ctx: void, lhs: @This(), rhs: @This()) bool { @@ -380,39 +126,38 @@ pub fn collect(info: *UnwindInfo, macho_file: *MachO) !void { }; const Context = struct { - pub fn hash(ctx: @This(), key: macho.compact_unwind_encoding_t) u32 { + pub fn hash(ctx: @This(), key: Encoding) u32 { _ = ctx; - return key; + return key.enc; } pub fn eql( ctx: @This(), - key1: macho.compact_unwind_encoding_t, - key2: macho.compact_unwind_encoding_t, + key1: Encoding, + key2: Encoding, b_index: usize, ) bool { _ = ctx; _ = b_index; - return key1 == key2; + return key1.eql(key2); } }; var common_encodings_counts = std.ArrayHashMap( - macho.compact_unwind_encoding_t, + Encoding, CommonEncWithCount, Context, false, - ).init(info.gpa); + ).init(gpa); defer common_encodings_counts.deinit(); - for (info.records.items) |record| { - assert(!isNull(record)); - if (UnwindEncoding.isDwarf(record.compactUnwindEncoding, cpu_arch)) continue; - const enc = record.compactUnwindEncoding; - const gop = try common_encodings_counts.getOrPut(enc); + for (info.records.items) |rec_index| { + const rec = macho_file.getUnwindRecord(rec_index); + if (rec.enc.isDwarf(macho_file)) continue; + const gop = try common_encodings_counts.getOrPut(rec.enc); if (!gop.found_existing) { gop.value_ptr.* = .{ - .enc = enc, + .enc = rec.enc, .count = 0, }; } @@ -427,7 +172,7 @@ pub fn collect(info: *UnwindInfo, macho_file: *MachO) !void { if (i >= max_common_encodings) break; if (slice[i].count < 2) continue; info.appendCommonEncoding(slice[i].enc); - log.debug("adding common encoding: {d} => 0x{x:0>8}", .{ i, slice[i].enc }); + log.debug("adding common encoding: {d} => {}", .{ i, slice[i].enc }); } } @@ -435,8 +180,8 @@ pub fn collect(info: *UnwindInfo, macho_file: *MachO) !void { { var i: u32 = 0; while (i < info.records.items.len) { - const range_start_max: u64 = - info.records.items[i].rangeStart + compressed_entry_func_offset_mask; + const rec = macho_file.getUnwindRecord(info.records.items[i]); + const range_start_max: u64 = rec.getAtomAddress(macho_file) + compressed_entry_func_offset_mask; var encoding_count: u9 = info.common_encodings_count; var space_left: u32 = second_level_page_words - @sizeOf(macho.unwind_info_compressed_second_level_page_header) / @sizeOf(u32); @@ -447,19 +192,18 @@ pub fn collect(info: *UnwindInfo, macho_file: *MachO) !void { }; while (space_left >= 1 and i < info.records.items.len) { - const record = info.records.items[i]; - const enc = record.compactUnwindEncoding; - const is_dwarf = UnwindEncoding.isDwarf(record.compactUnwindEncoding, cpu_arch); + const next = macho_file.getUnwindRecord(info.records.items[i]); + const is_dwarf = next.enc.isDwarf(macho_file); - if (record.rangeStart >= range_start_max) { + if (next.getAtomAddress(macho_file) >= range_start_max) { break; - } else if (info.getCommonEncoding(enc) != null or - page.getPageEncoding(info, enc) != null and !is_dwarf) + } else if (info.getCommonEncoding(next.enc) != null or + page.getPageEncoding(next.enc) != null and !is_dwarf) { i += 1; space_left -= 1; } else if (space_left >= 2 and encoding_count < max_compact_encodings) { - page.appendPageEncoding(i); + page.appendPageEncoding(next.enc); i += 1; space_left -= 2; encoding_count += 1; @@ -481,63 +225,24 @@ pub fn collect(info: *UnwindInfo, macho_file: *MachO) !void { page.kind = .compressed; } - log.debug("{}", .{page.fmtDebug(info)}); + log.debug("{}", .{page.fmt(info.*)}); - try info.pages.append(info.gpa, page); + try info.pages.append(gpa, page); } } - // Save indices of records requiring LSDA relocation - try info.lsdas_lookup.ensureTotalCapacity(info.gpa, @as(u32, @intCast(info.records.items.len))); - for (info.records.items, 0..) |rec, i| { - info.lsdas_lookup.putAssumeCapacityNoClobber(@as(RecordIndex, @intCast(i)), @as(u32, @intCast(info.lsdas.items.len))); - if (rec.lsda == 0) continue; - try info.lsdas.append(info.gpa, @as(RecordIndex, @intCast(i))); + // Save records having an LSDA pointer + try info.lsdas_lookup.ensureTotalCapacityPrecise(gpa, info.records.items.len); + for (info.records.items, 0..) |index, i| { + const rec = macho_file.getUnwindRecord(index); + info.lsdas_lookup.appendAssumeCapacity(@intCast(info.lsdas.items.len)); + if (rec.getLsdaAtom(macho_file)) |_| { + try info.lsdas.append(gpa, @intCast(i)); + } } } -fn collectPersonalityFromDwarf( - info: *UnwindInfo, - macho_file: *MachO, - object_id: u32, - sym_loc: SymbolWithLoc, - record: *macho.compact_unwind_entry, -) void { - const object = &macho_file.objects.items[object_id]; - var it = object.getEhFrameRecordsIterator(); - const fde_offset = object.eh_frame_records_lookup.get(sym_loc).?; - it.seekTo(fde_offset); - const fde = (it.next() catch return).?; // We don't care about the error since we already handled it - const cie_ptr = fde.getCiePointerSource(object_id, macho_file, fde_offset); - const cie_offset = fde_offset + 4 - cie_ptr; - it.seekTo(cie_offset); - const cie = (it.next() catch return).?; // We don't care about the error since we already handled it - - if (cie.getPersonalityPointerReloc( - macho_file, - @as(u32, @intCast(object_id)), - cie_offset, - )) |target| { - const personality_index = info.getPersonalityFunction(target) orelse inner: { - const personality_index = info.personalities_count; - info.personalities[personality_index] = target; - info.personalities_count += 1; - break :inner personality_index; - }; - - record.personalityFunction = personality_index + 1; - UnwindEncoding.setPersonalityIndex(&record.compactUnwindEncoding, personality_index + 1); - } -} - -pub fn calcSectionSize(info: UnwindInfo, macho_file: *MachO) void { - const sect_id = macho_file.unwind_info_section_index orelse return; - const sect = &macho_file.sections.items(.header)[sect_id]; - sect.@"align" = 2; - sect.size = info.calcRequiredSize(); -} - -fn calcRequiredSize(info: UnwindInfo) usize { +pub fn calcSize(info: UnwindInfo) usize { var total_size: usize = 0; total_size += @sizeOf(macho.unwind_info_section_header); total_size += @@ -549,59 +254,12 @@ fn calcRequiredSize(info: UnwindInfo) usize { return total_size; } -pub fn write(info: *UnwindInfo, macho_file: *MachO) !void { - const sect_id = macho_file.unwind_info_section_index orelse return; - const sect = &macho_file.sections.items(.header)[sect_id]; - const seg_id = macho_file.sections.items(.segment_index)[sect_id]; - const seg = macho_file.segments.items[seg_id]; +pub fn write(info: UnwindInfo, macho_file: *MachO, buffer: []u8) !void { + const seg = macho_file.getTextSegment(); + const header = macho_file.sections.items(.header)[macho_file.unwind_info_sect_index.?]; - const text_sect_id = macho_file.text_section_index.?; - const text_sect = macho_file.sections.items(.header)[text_sect_id]; - - var personalities: [max_personalities]u32 = undefined; - const target = macho_file.base.comp.root_mod.resolved_target.result; - const cpu_arch = target.cpu.arch; - - log.debug("Personalities:", .{}); - for (info.personalities[0..info.personalities_count], 0..) |reloc_target, i| { - const addr = macho_file.getGotEntryAddress(reloc_target).?; - personalities[i] = @as(u32, @intCast(addr - seg.vmaddr)); - log.debug(" {d}: 0x{x} ({s})", .{ i, personalities[i], macho_file.getSymbolName(reloc_target) }); - } - - for (info.records.items) |*rec| { - // Finalize missing address values - rec.rangeStart += text_sect.addr - seg.vmaddr; - if (rec.personalityFunction > 0) { - const index = math.cast(usize, rec.personalityFunction - 1) orelse return error.Overflow; - rec.personalityFunction = personalities[index]; - } - - if (rec.compactUnwindEncoding > 0 and !UnwindEncoding.isDwarf(rec.compactUnwindEncoding, cpu_arch)) { - const lsda_target = @as(SymbolWithLoc, @bitCast(rec.lsda)); - if (lsda_target.getFile()) |_| { - const sym = macho_file.getSymbol(lsda_target); - rec.lsda = sym.n_value - seg.vmaddr; - } - } - } - - for (info.records.items, 0..) |record, i| { - log.debug("Unwind record at offset 0x{x}", .{i * @sizeOf(macho.compact_unwind_entry)}); - log.debug(" start: 0x{x}", .{record.rangeStart}); - log.debug(" length: 0x{x}", .{record.rangeLength}); - log.debug(" compact encoding: 0x{x:0>8}", .{record.compactUnwindEncoding}); - log.debug(" personality: 0x{x}", .{record.personalityFunction}); - log.debug(" LSDA: 0x{x}", .{record.lsda}); - } - - var buffer = std.ArrayList(u8).init(info.gpa); - defer buffer.deinit(); - - const size = info.calcRequiredSize(); - try buffer.ensureTotalCapacityPrecise(size); - - var cwriter = std.io.countingWriter(buffer.writer()); + var stream = std.io.fixedBufferStream(buffer); + var cwriter = std.io.countingWriter(stream.writer()); const writer = cwriter.writer(); const common_encodings_offset: u32 = @sizeOf(macho.unwind_info_section_header); @@ -621,211 +279,404 @@ pub fn write(info: *UnwindInfo, macho_file: *MachO) !void { }); try writer.writeAll(mem.sliceAsBytes(info.common_encodings[0..info.common_encodings_count])); - try writer.writeAll(mem.sliceAsBytes(personalities[0..info.personalities_count])); - const pages_base_offset = @as(u32, @intCast(size - (info.pages.items.len * second_level_page_bytes))); + for (info.personalities[0..info.personalities_count]) |sym_index| { + const sym = macho_file.getSymbol(sym_index); + try writer.writeInt(u32, @intCast(sym.getGotAddress(macho_file) - seg.vmaddr), .little); + } + + const pages_base_offset = @as(u32, @intCast(header.size - (info.pages.items.len * second_level_page_bytes))); const lsda_base_offset = @as(u32, @intCast(pages_base_offset - (info.lsdas.items.len * @sizeOf(macho.unwind_info_section_header_lsda_index_entry)))); for (info.pages.items, 0..) |page, i| { assert(page.count > 0); - const first_entry = info.records.items[page.start]; + const rec = macho_file.getUnwindRecord(info.records.items[page.start]); try writer.writeStruct(macho.unwind_info_section_header_index_entry{ - .functionOffset = @as(u32, @intCast(first_entry.rangeStart)), + .functionOffset = @as(u32, @intCast(rec.getAtomAddress(macho_file) - seg.vmaddr)), .secondLevelPagesSectionOffset = @as(u32, @intCast(pages_base_offset + i * second_level_page_bytes)), .lsdaIndexArraySectionOffset = lsda_base_offset + - info.lsdas_lookup.get(page.start).? * @sizeOf(macho.unwind_info_section_header_lsda_index_entry), + info.lsdas_lookup.items[page.start] * @sizeOf(macho.unwind_info_section_header_lsda_index_entry), }); } - // Relocate end boundary address - const end_boundary = @as(u32, @intCast(info.end_boundary + text_sect.addr - seg.vmaddr)); + const last_rec = macho_file.getUnwindRecord(info.records.items[info.records.items.len - 1]); + const sentinel_address = @as(u32, @intCast(last_rec.getAtomAddress(macho_file) + last_rec.length - seg.vmaddr)); try writer.writeStruct(macho.unwind_info_section_header_index_entry{ - .functionOffset = end_boundary, + .functionOffset = sentinel_address, .secondLevelPagesSectionOffset = 0, .lsdaIndexArraySectionOffset = lsda_base_offset + @as(u32, @intCast(info.lsdas.items.len)) * @sizeOf(macho.unwind_info_section_header_lsda_index_entry), }); - for (info.lsdas.items) |record_id| { - const record = info.records.items[record_id]; + for (info.lsdas.items) |index| { + const rec = macho_file.getUnwindRecord(info.records.items[index]); try writer.writeStruct(macho.unwind_info_section_header_lsda_index_entry{ - .functionOffset = @as(u32, @intCast(record.rangeStart)), - .lsdaOffset = @as(u32, @intCast(record.lsda)), + .functionOffset = @as(u32, @intCast(rec.getAtomAddress(macho_file) - seg.vmaddr)), + .lsdaOffset = @as(u32, @intCast(rec.getLsdaAddress(macho_file) - seg.vmaddr)), }); } for (info.pages.items) |page| { const start = cwriter.bytes_written; - try page.write(info, writer); + try page.write(info, macho_file, writer); const nwritten = cwriter.bytes_written - start; if (nwritten < second_level_page_bytes) { - const offset = math.cast(usize, second_level_page_bytes - nwritten) orelse return error.Overflow; - try writer.writeByteNTimes(0, offset); + try writer.writeByteNTimes(0, second_level_page_bytes - nwritten); } } - const padding = buffer.items.len - cwriter.bytes_written; + const padding = buffer.len - cwriter.bytes_written; if (padding > 0) { - const offset = math.cast(usize, cwriter.bytes_written) orelse return error.Overflow; - @memset(buffer.items[offset..], 0); + @memset(buffer[cwriter.bytes_written..], 0); } - - try macho_file.base.file.?.pwriteAll(buffer.items, sect.offset); } -fn getRelocs(macho_file: *MachO, object_id: u32, record_id: usize) []const macho.relocation_info { - const object = &macho_file.objects.items[object_id]; - assert(object.hasUnwindRecords()); - const rel_pos = object.unwind_relocs_lookup[record_id].reloc; - const relocs = object.getRelocs(object.unwind_info_sect_id.?); - return relocs[rel_pos.start..][0..rel_pos.len]; -} - -fn isPersonalityFunction(record_id: usize, rel: macho.relocation_info) bool { - const base_offset = @as(i32, @intCast(record_id * @sizeOf(macho.compact_unwind_entry))); - const rel_offset = rel.r_address - base_offset; - return rel_offset == 16; -} - -pub fn getPersonalityFunctionReloc( - macho_file: *MachO, - object_id: u32, - record_id: usize, -) ?macho.relocation_info { - const relocs = getRelocs(macho_file, object_id, record_id); - for (relocs) |rel| { - if (isPersonalityFunction(record_id, rel)) return rel; - } - return null; -} - -fn getPersonalityFunction(info: UnwindInfo, global_index: SymbolWithLoc) ?u2 { +fn getOrPutPersonalityFunction(info: *UnwindInfo, sym_index: Symbol.Index) error{TooManyPersonalities}!u2 { comptime var index: u2 = 0; inline while (index < max_personalities) : (index += 1) { - if (index >= info.personalities_count) return null; - if (info.personalities[index].eql(global_index)) { + if (info.personalities[index] == sym_index) { + return index; + } else if (index == info.personalities_count) { + info.personalities[index] = sym_index; + info.personalities_count += 1; return index; } } - return null; + return error.TooManyPersonalities; } -fn isLsda(record_id: usize, rel: macho.relocation_info) bool { - const base_offset = @as(i32, @intCast(record_id * @sizeOf(macho.compact_unwind_entry))); - const rel_offset = rel.r_address - base_offset; - return rel_offset == 24; -} - -pub fn getLsdaReloc(macho_file: *MachO, object_id: u32, record_id: usize) ?macho.relocation_info { - const relocs = getRelocs(macho_file, object_id, record_id); - for (relocs) |rel| { - if (isLsda(record_id, rel)) return rel; - } - return null; -} - -pub fn isNull(rec: macho.compact_unwind_entry) bool { - return rec.rangeStart == 0 and - rec.rangeLength == 0 and - rec.compactUnwindEncoding == 0 and - rec.lsda == 0 and - rec.personalityFunction == 0; -} - -inline fn nullRecord() macho.compact_unwind_entry { - return .{ - .rangeStart = 0, - .rangeLength = 0, - .compactUnwindEncoding = 0, - .personalityFunction = 0, - .lsda = 0, - }; -} - -fn appendCommonEncoding(info: *UnwindInfo, enc: macho.compact_unwind_encoding_t) void { +fn appendCommonEncoding(info: *UnwindInfo, enc: Encoding) void { assert(info.common_encodings_count <= max_common_encodings); info.common_encodings[info.common_encodings_count] = enc; info.common_encodings_count += 1; } -fn getCommonEncoding(info: UnwindInfo, enc: macho.compact_unwind_encoding_t) ?u7 { +fn getCommonEncoding(info: UnwindInfo, enc: Encoding) ?u7 { comptime var index: u7 = 0; inline while (index < max_common_encodings) : (index += 1) { if (index >= info.common_encodings_count) return null; - if (info.common_encodings[index] == enc) { + if (info.common_encodings[index].eql(enc)) { return index; } } return null; } -pub const UnwindEncoding = struct { - pub fn getMode(enc: macho.compact_unwind_encoding_t) u4 { +pub const Encoding = extern struct { + enc: macho.compact_unwind_encoding_t, + + pub fn getMode(enc: Encoding) u4 { comptime assert(macho.UNWIND_ARM64_MODE_MASK == macho.UNWIND_X86_64_MODE_MASK); - return @as(u4, @truncate((enc & macho.UNWIND_ARM64_MODE_MASK) >> 24)); + return @as(u4, @truncate((enc.enc & macho.UNWIND_ARM64_MODE_MASK) >> 24)); } - pub fn isDwarf(enc: macho.compact_unwind_encoding_t, cpu_arch: std.Target.Cpu.Arch) bool { - const mode = getMode(enc); - return switch (cpu_arch) { + pub fn isDwarf(enc: Encoding, macho_file: *MachO) bool { + const mode = enc.getMode(); + return switch (macho_file.options.cpu_arch.?) { .aarch64 => @as(macho.UNWIND_ARM64_MODE, @enumFromInt(mode)) == .DWARF, .x86_64 => @as(macho.UNWIND_X86_64_MODE, @enumFromInt(mode)) == .DWARF, else => unreachable, }; } - pub fn setMode(enc: *macho.compact_unwind_encoding_t, mode: anytype) void { - enc.* |= @as(u32, @intCast(@intFromEnum(mode))) << 24; + pub fn setMode(enc: *Encoding, mode: anytype) void { + enc.enc |= @as(u32, @intCast(@intFromEnum(mode))) << 24; } - pub fn hasLsda(enc: macho.compact_unwind_encoding_t) bool { - const has_lsda = @as(u1, @truncate((enc & macho.UNWIND_HAS_LSDA) >> 31)); + pub fn hasLsda(enc: Encoding) bool { + const has_lsda = @as(u1, @truncate((enc.enc & macho.UNWIND_HAS_LSDA) >> 31)); return has_lsda == 1; } - pub fn setHasLsda(enc: *macho.compact_unwind_encoding_t, has_lsda: bool) void { + pub fn setHasLsda(enc: *Encoding, has_lsda: bool) void { const mask = @as(u32, @intCast(@intFromBool(has_lsda))) << 31; - enc.* |= mask; + enc.enc |= mask; } - pub fn getPersonalityIndex(enc: macho.compact_unwind_encoding_t) u2 { - const index = @as(u2, @truncate((enc & macho.UNWIND_PERSONALITY_MASK) >> 28)); + pub fn getPersonalityIndex(enc: Encoding) u2 { + const index = @as(u2, @truncate((enc.enc & macho.UNWIND_PERSONALITY_MASK) >> 28)); return index; } - pub fn setPersonalityIndex(enc: *macho.compact_unwind_encoding_t, index: u2) void { + pub fn setPersonalityIndex(enc: *Encoding, index: u2) void { const mask = @as(u32, @intCast(index)) << 28; - enc.* |= mask; + enc.enc |= mask; } - pub fn getDwarfSectionOffset(enc: macho.compact_unwind_encoding_t, cpu_arch: std.Target.Cpu.Arch) u24 { - assert(isDwarf(enc, cpu_arch)); - const offset = @as(u24, @truncate(enc)); + pub fn getDwarfSectionOffset(enc: Encoding) u24 { + const offset = @as(u24, @truncate(enc.enc)); return offset; } - pub fn setDwarfSectionOffset(enc: *macho.compact_unwind_encoding_t, cpu_arch: std.Target.Cpu.Arch, offset: u24) void { - assert(isDwarf(enc.*, cpu_arch)); - enc.* |= offset; + pub fn setDwarfSectionOffset(enc: *Encoding, offset: u24) void { + enc.enc |= offset; + } + + pub fn eql(enc: Encoding, other: Encoding) bool { + return enc.enc == other.enc; + } + + pub fn format( + enc: Encoding, + comptime unused_fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, + ) !void { + _ = unused_fmt_string; + _ = options; + try writer.print("0x{x:0>8}", .{enc.enc}); } }; -const UnwindInfo = @This(); +pub const Record = struct { + length: u32 = 0, + enc: Encoding = .{ .enc = 0 }, + atom: Atom.Index = 0, + atom_offset: u32 = 0, + lsda: Atom.Index = 0, + lsda_offset: u32 = 0, + personality: ?Symbol.Index = null, // TODO make this zero-is-null + fde: Fde.Index = 0, // TODO actually make FDE at 0 an invalid FDE + file: File.Index = 0, + alive: bool = true, + + pub fn getObject(rec: Record, macho_file: *MachO) *Object { + return macho_file.getFile(rec.file).?.object; + } + + pub fn getAtom(rec: Record, macho_file: *MachO) *Atom { + return macho_file.getAtom(rec.atom).?; + } + + pub fn getLsdaAtom(rec: Record, macho_file: *MachO) ?*Atom { + return macho_file.getAtom(rec.lsda); + } + + pub fn getPersonality(rec: Record, macho_file: *MachO) ?*Symbol { + const personality = rec.personality orelse return null; + return macho_file.getSymbol(personality); + } + + pub fn getFde(rec: Record, macho_file: *MachO) ?Fde { + if (!rec.enc.isDwarf(macho_file)) return null; + return rec.getObject(macho_file).fdes.items[rec.fde]; + } + + pub fn getFdePtr(rec: Record, macho_file: *MachO) ?*Fde { + if (!rec.enc.isDwarf(macho_file)) return null; + return &rec.getObject(macho_file).fdes.items[rec.fde]; + } + + pub fn getAtomAddress(rec: Record, macho_file: *MachO) u64 { + const atom = rec.getAtom(macho_file); + return atom.value + rec.atom_offset; + } + + pub fn getLsdaAddress(rec: Record, macho_file: *MachO) u64 { + const lsda = rec.getLsdaAtom(macho_file) orelse return 0; + return lsda.value + rec.lsda_offset; + } + + pub fn format( + rec: Record, + comptime unused_fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, + ) !void { + _ = rec; + _ = unused_fmt_string; + _ = options; + _ = writer; + @compileError("do not format UnwindInfo.Records directly"); + } + + pub fn fmt(rec: Record, macho_file: *MachO) std.fmt.Formatter(format2) { + return .{ .data = .{ + .rec = rec, + .macho_file = macho_file, + } }; + } + + const FormatContext = struct { + rec: Record, + macho_file: *MachO, + }; + + fn format2( + ctx: FormatContext, + comptime unused_fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, + ) !void { + _ = unused_fmt_string; + _ = options; + const rec = ctx.rec; + const macho_file = ctx.macho_file; + try writer.print("{x} : len({x})", .{ + rec.enc.enc, rec.length, + }); + if (rec.enc.isDwarf(macho_file)) try writer.print(" : fde({d})", .{rec.fde}); + try writer.print(" : {s}", .{rec.getAtom(macho_file).getName(macho_file)}); + if (!rec.alive) try writer.writeAll(" : [*]"); + } + + pub const Index = u32; +}; + +const max_personalities = 3; +const max_common_encodings = 127; +const max_compact_encodings = 256; + +const second_level_page_bytes = 0x1000; +const second_level_page_words = second_level_page_bytes / @sizeOf(u32); + +const max_regular_second_level_entries = + (second_level_page_bytes - @sizeOf(macho.unwind_info_regular_second_level_page_header)) / + @sizeOf(macho.unwind_info_regular_second_level_entry); + +const max_compressed_second_level_entries = + (second_level_page_bytes - @sizeOf(macho.unwind_info_compressed_second_level_page_header)) / + @sizeOf(u32); + +const compressed_entry_func_offset_mask = ~@as(u24, 0); + +const Page = struct { + kind: enum { regular, compressed }, + start: u32, + count: u16, + page_encodings: [max_compact_encodings]Encoding = undefined, + page_encodings_count: u9 = 0, + + fn appendPageEncoding(page: *Page, enc: Encoding) void { + assert(page.page_encodings_count <= max_compact_encodings); + page.page_encodings[page.page_encodings_count] = enc; + page.page_encodings_count += 1; + } + + fn getPageEncoding(page: Page, enc: Encoding) ?u8 { + comptime var index: u9 = 0; + inline while (index < max_compact_encodings) : (index += 1) { + if (index >= page.page_encodings_count) return null; + if (page.page_encodings[index].eql(enc)) { + return @as(u8, @intCast(index)); + } + } + return null; + } + + fn format( + page: *const Page, + comptime unused_format_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, + ) !void { + _ = page; + _ = unused_format_string; + _ = options; + _ = writer; + @compileError("do not format Page directly; use page.fmt()"); + } + + const FormatPageContext = struct { + page: Page, + info: UnwindInfo, + }; + + fn format2( + ctx: FormatPageContext, + comptime unused_format_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, + ) @TypeOf(writer).Error!void { + _ = options; + _ = unused_format_string; + try writer.writeAll("Page:\n"); + try writer.print(" kind: {s}\n", .{@tagName(ctx.page.kind)}); + try writer.print(" entries: {d} - {d}\n", .{ + ctx.page.start, + ctx.page.start + ctx.page.count, + }); + try writer.print(" encodings (count = {d})\n", .{ctx.page.page_encodings_count}); + for (ctx.page.page_encodings[0..ctx.page.page_encodings_count], 0..) |enc, i| { + try writer.print(" {d}: {}\n", .{ ctx.info.common_encodings_count + i, enc }); + } + } + + fn fmt(page: Page, info: UnwindInfo) std.fmt.Formatter(format2) { + return .{ .data = .{ + .page = page, + .info = info, + } }; + } + + fn write(page: Page, info: UnwindInfo, macho_file: *MachO, writer: anytype) !void { + const seg = macho_file.getTextSegment(); + + switch (page.kind) { + .regular => { + try writer.writeStruct(macho.unwind_info_regular_second_level_page_header{ + .entryPageOffset = @sizeOf(macho.unwind_info_regular_second_level_page_header), + .entryCount = page.count, + }); + + for (info.records.items[page.start..][0..page.count]) |index| { + const rec = macho_file.getUnwindRecord(index); + try writer.writeStruct(macho.unwind_info_regular_second_level_entry{ + .functionOffset = @as(u32, @intCast(rec.getAtomAddress(macho_file) - seg.vmaddr)), + .encoding = rec.enc.enc, + }); + } + }, + .compressed => { + const entry_offset = @sizeOf(macho.unwind_info_compressed_second_level_page_header) + + @as(u16, @intCast(page.page_encodings_count)) * @sizeOf(u32); + try writer.writeStruct(macho.unwind_info_compressed_second_level_page_header{ + .entryPageOffset = entry_offset, + .entryCount = page.count, + .encodingsPageOffset = @sizeOf(macho.unwind_info_compressed_second_level_page_header), + .encodingsCount = page.page_encodings_count, + }); + + for (page.page_encodings[0..page.page_encodings_count]) |enc| { + try writer.writeInt(u32, enc.enc, .little); + } + + assert(page.count > 0); + const first_rec = macho_file.getUnwindRecord(info.records.items[page.start]); + for (info.records.items[page.start..][0..page.count]) |index| { + const rec = macho_file.getUnwindRecord(index); + const enc_index = blk: { + if (info.getCommonEncoding(rec.enc)) |id| break :blk id; + const ncommon = info.common_encodings_count; + break :blk ncommon + page.getPageEncoding(rec.enc).?; + }; + const compressed = macho.UnwindInfoCompressedEntry{ + .funcOffset = @as(u24, @intCast(rec.getAtomAddress(macho_file) - first_rec.getAtomAddress(macho_file))), + .encodingIndex = @as(u8, @intCast(enc_index)), + }; + try writer.writeStruct(compressed); + } + }, + } + } +}; const std = @import("std"); const assert = std.debug.assert; const eh_frame = @import("eh_frame.zig"); const fs = std.fs; const leb = std.leb; -const log = std.log.scoped(.unwind_info); +const log = std.log.scoped(.link); const macho = std.macho; const math = std.math; const mem = std.mem; -const trace = @import("../../tracy.zig").trace; +const trace = @import("../tracy.zig").trace; const Allocator = mem.Allocator; const Atom = @import("Atom.zig"); -const EhFrameRecord = eh_frame.EhFrameRecord; +const Fde = eh_frame.Fde; +const File = @import("file.zig").File; const MachO = @import("../MachO.zig"); const Object = @import("Object.zig"); -const SymbolWithLoc = MachO.SymbolWithLoc; +const Symbol = @import("Symbol.zig"); +const UnwindInfo = @This(); diff --git a/src/link/MachO/dead_strip.zig b/src/link/MachO/dead_strip.zig index fe3740e826..8d2dba53c6 100644 --- a/src/link/MachO/dead_strip.zig +++ b/src/link/MachO/dead_strip.zig @@ -1,495 +1,204 @@ -//! An algorithm for dead stripping of unreferenced Atoms. - pub fn gcAtoms(macho_file: *MachO) !void { - const comp = macho_file.base.comp; - const gpa = comp.gpa; + const gpa = macho_file.base.allocator; - var arena = std.heap.ArenaAllocator.init(gpa); - defer arena.deinit(); + var objects = try std.ArrayList(File.Index).initCapacity(gpa, macho_file.objects.items.len + 1); + defer objects.deinit(); + for (macho_file.objects.items) |index| objects.appendAssumeCapacity(index); + if (macho_file.internal_object_index) |index| objects.appendAssumeCapacity(index); - var roots = AtomTable.init(arena.allocator()); - try roots.ensureUnusedCapacity(@as(u32, @intCast(macho_file.globals.items.len))); + var roots = std.ArrayList(*Atom).init(gpa); + defer roots.deinit(); - var alive = AtomTable.init(arena.allocator()); - try alive.ensureTotalCapacity(@as(u32, @intCast(macho_file.atoms.items.len))); - - try collectRoots(macho_file, &roots); - mark(macho_file, roots, &alive); - prune(macho_file, alive); + try collectRoots(&roots, objects.items, macho_file); + mark(roots.items, objects.items, macho_file); + prune(objects.items, macho_file); } -fn addRoot(macho_file: *MachO, roots: *AtomTable, file: u32, sym_loc: SymbolWithLoc) !void { - const sym = macho_file.getSymbol(sym_loc); - assert(!sym.undf()); - const object = &macho_file.objects.items[file]; - const atom_index = object.getAtomIndexForSymbol(sym_loc.sym_index).?; // panic here means fatal error - log.debug("root(ATOM({d}, %{d}, {d}))", .{ - atom_index, - macho_file.getAtom(atom_index).sym_index, - file, - }); - _ = try roots.getOrPut(atom_index); -} +fn collectRoots(roots: *std.ArrayList(*Atom), objects: []const File.Index, macho_file: *MachO) !void { + for (objects) |index| { + const object = macho_file.getFile(index).?; + for (object.getSymbols()) |sym_index| { + const sym = macho_file.getSymbol(sym_index); + const file = sym.getFile(macho_file) orelse continue; + if (file.getIndex() != index) continue; + if (sym.flags.no_dead_strip or (macho_file.options.dylib and sym.visibility == .global)) + try markSymbol(sym, roots, macho_file); + } -fn collectRoots(macho_file: *MachO, roots: *AtomTable) !void { - log.debug("collecting roots", .{}); + for (object.getAtoms()) |atom_index| { + const atom = macho_file.getAtom(atom_index).?; + const isec = atom.getInputSection(macho_file); + switch (isec.type()) { + macho.S_MOD_INIT_FUNC_POINTERS, + macho.S_MOD_TERM_FUNC_POINTERS, + => if (markAtom(atom)) try roots.append(atom), - const comp = macho_file.base.comp; - - switch (comp.config.output_mode) { - .Exe => { - // Add entrypoint as GC root - if (macho_file.getEntryPoint()) |global| { - if (global.getFile()) |file| { - try addRoot(macho_file, roots, file, global); - } else { - assert(macho_file.getSymbol(global).undf()); // Stub as our entrypoint is in a dylib. - } + else => if (isec.isDontDeadStrip() and markAtom(atom)) { + try roots.append(atom); + }, } - }, - else => |other| { - assert(other == .Lib); - // Add exports as GC roots - for (macho_file.globals.items) |global| { - const sym = macho_file.getSymbol(global); - if (sym.undf()) continue; - if (sym.n_desc == MachO.N_BOUNDARY) continue; - - if (global.getFile()) |file| { - try addRoot(macho_file, roots, file, global); - } - } - }, + } } - // Add all symbols force-defined by the user. - for (comp.force_undefined_symbols.keys()) |sym_name| { - const global_index = macho_file.resolver.get(sym_name).?; - const global = macho_file.globals.items[global_index]; - const sym = macho_file.getSymbol(global); - assert(!sym.undf()); - try addRoot(macho_file, roots, global.getFile().?, global); + for (macho_file.objects.items) |index| { + for (macho_file.getFile(index).?.object.unwind_records.items) |cu_index| { + const cu = macho_file.getUnwindRecord(cu_index); + if (!cu.alive) continue; + if (cu.getFde(macho_file)) |fde| { + if (fde.getCie(macho_file).getPersonality(macho_file)) |sym| try markSymbol(sym, roots, macho_file); + } else if (cu.getPersonality(macho_file)) |sym| try markSymbol(sym, roots, macho_file); + } } - for (macho_file.objects.items) |object| { - const has_subsections = object.header.flags & macho.MH_SUBSECTIONS_VIA_SYMBOLS != 0; + for (macho_file.undefined_symbols.items) |sym_index| { + const sym = macho_file.getSymbol(sym_index); + try markSymbol(sym, roots, macho_file); + } - for (object.atoms.items) |atom_index| { - const is_gc_root = blk: { - // Modelled after ld64 which treats each object file compiled without MH_SUBSECTIONS_VIA_SYMBOLS - // as a root. - if (!has_subsections) break :blk true; - - const atom = macho_file.getAtom(atom_index); - const sect_id = if (object.getSourceSymbol(atom.sym_index)) |source_sym| - source_sym.n_sect - 1 - else sect_id: { - const nbase = @as(u32, @intCast(object.in_symtab.?.len)); - const sect_id = @as(u8, @intCast(atom.sym_index - nbase)); - break :sect_id sect_id; - }; - const source_sect = object.getSourceSection(sect_id); - if (source_sect.isDontDeadStrip()) break :blk true; - switch (source_sect.type()) { - macho.S_MOD_INIT_FUNC_POINTERS, - macho.S_MOD_TERM_FUNC_POINTERS, - => break :blk true, - else => break :blk false, - } - }; - - if (is_gc_root) { - _ = try roots.getOrPut(atom_index); - - log.debug("root(ATOM({d}, %{d}, {?d}))", .{ - atom_index, - macho_file.getAtom(atom_index).sym_index, - macho_file.getAtom(atom_index).getFile(), - }); - } + for (&[_]?Symbol.Index{ + macho_file.entry_index, + macho_file.dyld_stub_binder_index, + macho_file.objc_msg_send_index, + }) |index| { + if (index) |idx| { + const sym = macho_file.getSymbol(idx); + try markSymbol(sym, roots, macho_file); } } } -fn markLive(macho_file: *MachO, atom_index: Atom.Index, alive: *AtomTable) void { - if (alive.contains(atom_index)) return; - - const atom = macho_file.getAtom(atom_index); - const sym_loc = atom.getSymbolWithLoc(); - - log.debug("mark(ATOM({d}, %{d}, {?d}))", .{ atom_index, sym_loc.sym_index, sym_loc.getFile() }); - - alive.putAssumeCapacityNoClobber(atom_index, {}); - - const target = macho_file.base.comp.root_mod.resolved_target.result; - const cpu_arch = target.cpu.arch; - - const sym = macho_file.getSymbol(atom.getSymbolWithLoc()); - const header = macho_file.sections.items(.header)[sym.n_sect - 1]; - if (header.isZerofill()) return; - - const code = Atom.getAtomCode(macho_file, atom_index); - const relocs = Atom.getAtomRelocs(macho_file, atom_index); - const ctx = Atom.getRelocContext(macho_file, atom_index); - - for (relocs) |rel| { - const reloc_target = switch (cpu_arch) { - .aarch64 => switch (@as(macho.reloc_type_arm64, @enumFromInt(rel.r_type))) { - .ARM64_RELOC_ADDEND => continue, - else => Atom.parseRelocTarget(macho_file, .{ - .object_id = atom.getFile().?, - .rel = rel, - .code = code, - .base_offset = ctx.base_offset, - .base_addr = ctx.base_addr, - }), - }, - .x86_64 => Atom.parseRelocTarget(macho_file, .{ - .object_id = atom.getFile().?, - .rel = rel, - .code = code, - .base_offset = ctx.base_offset, - .base_addr = ctx.base_addr, - }), - else => unreachable, - }; - const target_sym = macho_file.getSymbol(reloc_target); - - if (target_sym.undf()) continue; - if (reloc_target.getFile() == null) { - const target_sym_name = macho_file.getSymbolName(reloc_target); - if (mem.eql(u8, "__mh_execute_header", target_sym_name)) continue; - if (mem.eql(u8, "___dso_handle", target_sym_name)) continue; - - unreachable; // referenced symbol not found - } - - const object = macho_file.objects.items[reloc_target.getFile().?]; - const target_atom_index = object.getAtomIndexForSymbol(reloc_target.sym_index).?; - log.debug(" following ATOM({d}, %{d}, {?d})", .{ - target_atom_index, - macho_file.getAtom(target_atom_index).sym_index, - macho_file.getAtom(target_atom_index).getFile(), - }); - - markLive(macho_file, target_atom_index, alive); - } +fn markSymbol(sym: *Symbol, roots: *std.ArrayList(*Atom), macho_file: *MachO) !void { + const atom = sym.getAtom(macho_file) orelse return; + if (markAtom(atom)) try roots.append(atom); } -fn refersLive(macho_file: *MachO, atom_index: Atom.Index, alive: AtomTable) bool { - const atom = macho_file.getAtom(atom_index); - const sym_loc = atom.getSymbolWithLoc(); - - log.debug("refersLive(ATOM({d}, %{d}, {?d}))", .{ atom_index, sym_loc.sym_index, sym_loc.getFile() }); - - const target = macho_file.base.comp.root_mod.resolved_target.result; - const cpu_arch = target.cpu.arch; - - const sym = macho_file.getSymbol(sym_loc); - const header = macho_file.sections.items(.header)[sym.n_sect - 1]; - assert(!header.isZerofill()); - - const code = Atom.getAtomCode(macho_file, atom_index); - const relocs = Atom.getAtomRelocs(macho_file, atom_index); - const ctx = Atom.getRelocContext(macho_file, atom_index); - - for (relocs) |rel| { - const reloc_target = switch (cpu_arch) { - .aarch64 => switch (@as(macho.reloc_type_arm64, @enumFromInt(rel.r_type))) { - .ARM64_RELOC_ADDEND => continue, - else => Atom.parseRelocTarget(macho_file, .{ - .object_id = atom.getFile().?, - .rel = rel, - .code = code, - .base_offset = ctx.base_offset, - .base_addr = ctx.base_addr, - }), - }, - .x86_64 => Atom.parseRelocTarget(macho_file, .{ - .object_id = atom.getFile().?, - .rel = rel, - .code = code, - .base_offset = ctx.base_offset, - .base_addr = ctx.base_addr, - }), - else => unreachable, - }; - - const object = macho_file.objects.items[reloc_target.getFile().?]; - const target_atom_index = object.getAtomIndexForSymbol(reloc_target.sym_index) orelse { - log.debug("atom for symbol '{s}' not found; skipping...", .{macho_file.getSymbolName(reloc_target)}); - continue; - }; - if (alive.contains(target_atom_index)) { - log.debug(" refers live ATOM({d}, %{d}, {?d})", .{ - target_atom_index, - macho_file.getAtom(target_atom_index).sym_index, - macho_file.getAtom(target_atom_index).getFile(), - }); - return true; - } - } - - return false; +fn markAtom(atom: *Atom) bool { + const already_visited = atom.flags.visited; + atom.flags.visited = true; + return atom.flags.alive and !already_visited; } -fn mark(macho_file: *MachO, roots: AtomTable, alive: *AtomTable) void { - var it = roots.keyIterator(); - while (it.next()) |root| { - markLive(macho_file, root.*, alive); +fn mark(roots: []*Atom, objects: []const File.Index, macho_file: *MachO) void { + for (roots) |root| { + markLive(root, macho_file); } var loop: bool = true; while (loop) { loop = false; - for (macho_file.objects.items) |object| { - for (object.atoms.items) |atom_index| { - if (alive.contains(atom_index)) continue; - - const atom = macho_file.getAtom(atom_index); - const sect_id = if (object.getSourceSymbol(atom.sym_index)) |source_sym| - source_sym.n_sect - 1 - else blk: { - const nbase = @as(u32, @intCast(object.in_symtab.?.len)); - const sect_id = @as(u8, @intCast(atom.sym_index - nbase)); - break :blk sect_id; - }; - const source_sect = object.getSourceSection(sect_id); - - if (source_sect.isDontDeadStripIfReferencesLive()) { - if (refersLive(macho_file, atom_index, alive.*)) { - markLive(macho_file, atom_index, alive); - loop = true; - } - } - } - } - } - - for (macho_file.objects.items, 0..) |_, object_id| { - // Traverse unwind and eh_frame records noting if the source symbol has been marked, and if so, - // marking all references as live. - markUnwindRecords(macho_file, @as(u32, @intCast(object_id)), alive); - } -} - -fn markUnwindRecords(macho_file: *MachO, object_id: u32, alive: *AtomTable) void { - const object = &macho_file.objects.items[object_id]; - const target = macho_file.base.comp.root_mod.resolved_target.result; - const cpu_arch = target.cpu.arch; - - const unwind_records = object.getUnwindRecords(); - - for (object.exec_atoms.items) |atom_index| { - var inner_syms_it = Atom.getInnerSymbolsIterator(macho_file, atom_index); - - if (!object.hasUnwindRecords()) { - if (alive.contains(atom_index)) { - // Mark references live and continue. - markEhFrameRecords(macho_file, object_id, atom_index, alive); - } else { - while (inner_syms_it.next()) |sym| { - if (object.eh_frame_records_lookup.get(sym)) |fde_offset| { - // Mark dead and continue. - object.eh_frame_relocs_lookup.getPtr(fde_offset).?.dead = true; - } - } - } - continue; - } - - while (inner_syms_it.next()) |sym| { - const record_id = object.unwind_records_lookup.get(sym) orelse continue; - if (object.unwind_relocs_lookup[record_id].dead) continue; // already marked, nothing to do - if (!alive.contains(atom_index)) { - // Mark the record dead and continue. - object.unwind_relocs_lookup[record_id].dead = true; - if (object.eh_frame_records_lookup.get(sym)) |fde_offset| { - object.eh_frame_relocs_lookup.getPtr(fde_offset).?.dead = true; - } - continue; - } - - const record = unwind_records[record_id]; - if (UnwindInfo.UnwindEncoding.isDwarf(record.compactUnwindEncoding, cpu_arch)) { - markEhFrameRecords(macho_file, object_id, atom_index, alive); - } else { - if (UnwindInfo.getPersonalityFunctionReloc(macho_file, object_id, record_id)) |rel| { - const reloc_target = Atom.parseRelocTarget(macho_file, .{ - .object_id = object_id, - .rel = rel, - .code = mem.asBytes(&record), - .base_offset = @as(i32, @intCast(record_id * @sizeOf(macho.compact_unwind_entry))), - }); - const target_sym = macho_file.getSymbol(reloc_target); - if (!target_sym.undf()) { - const target_object = macho_file.objects.items[reloc_target.getFile().?]; - const target_atom_index = target_object.getAtomIndexForSymbol(reloc_target.sym_index).?; - markLive(macho_file, target_atom_index, alive); - } - } - - if (UnwindInfo.getLsdaReloc(macho_file, object_id, record_id)) |rel| { - const reloc_target = Atom.parseRelocTarget(macho_file, .{ - .object_id = object_id, - .rel = rel, - .code = mem.asBytes(&record), - .base_offset = @as(i32, @intCast(record_id * @sizeOf(macho.compact_unwind_entry))), - }); - const target_object = macho_file.objects.items[reloc_target.getFile().?]; - const target_atom_index = target_object.getAtomIndexForSymbol(reloc_target.sym_index).?; - markLive(macho_file, target_atom_index, alive); + for (objects) |index| { + for (macho_file.getFile(index).?.getAtoms()) |atom_index| { + const atom = macho_file.getAtom(atom_index).?; + const isec = atom.getInputSection(macho_file); + if (isec.isDontDeadStripIfReferencesLive() and !atom.flags.alive and refersLive(atom, macho_file)) { + markLive(atom, macho_file); + loop = true; } } } } } -fn markEhFrameRecords(macho_file: *MachO, object_id: u32, atom_index: Atom.Index, alive: *AtomTable) void { - const target = macho_file.base.comp.root_mod.resolved_target.result; - const cpu_arch = target.cpu.arch; - const object = &macho_file.objects.items[object_id]; - var it = object.getEhFrameRecordsIterator(); - var inner_syms_it = Atom.getInnerSymbolsIterator(macho_file, atom_index); +fn markLive(atom: *Atom, macho_file: *MachO) void { + assert(atom.flags.visited); + atom.flags.alive = true; + track_live_log.debug("{}marking live atom({d},{s})", .{ + track_live_level, + atom.atom_index, + atom.getName(macho_file), + }); - while (inner_syms_it.next()) |sym| { - const fde_offset = object.eh_frame_records_lookup.get(sym) orelse continue; // Continue in case we hit a temp symbol alias - it.seekTo(fde_offset); - const fde = (it.next() catch continue).?; // We don't care about the error at this point since it was already handled + if (build_options.enable_logging) + track_live_level.incr(); - const cie_ptr = fde.getCiePointerSource(object_id, macho_file, fde_offset); - const cie_offset = fde_offset + 4 - cie_ptr; - it.seekTo(cie_offset); - const cie = (it.next() catch continue).?; // We don't care about the error at this point since it was already handled - - switch (cpu_arch) { - .aarch64 => { - // Mark FDE references which should include any referenced LSDA record - const relocs = eh_frame.getRelocs(macho_file, object_id, fde_offset); - for (relocs) |rel| { - const reloc_target = Atom.parseRelocTarget(macho_file, .{ - .object_id = object_id, - .rel = rel, - .code = fde.data, - .base_offset = @as(i32, @intCast(fde_offset)) + 4, - }); - const target_sym = macho_file.getSymbol(reloc_target); - if (!target_sym.undf()) blk: { - const target_object = macho_file.objects.items[reloc_target.getFile().?]; - const target_atom_index = target_object.getAtomIndexForSymbol(reloc_target.sym_index) orelse - break :blk; - markLive(macho_file, target_atom_index, alive); - } - } - }, - .x86_64 => { - const sect = object.getSourceSection(object.eh_frame_sect_id.?); - const lsda_ptr = fde.getLsdaPointer(cie, .{ - .base_addr = sect.addr, - .base_offset = fde_offset, - }) catch continue; // We don't care about the error at this point since it was already handled - if (lsda_ptr) |lsda_address| { - // Mark LSDA record as live - const sym_index = object.getSymbolByAddress(lsda_address, null); - const target_atom_index = object.getAtomIndexForSymbol(sym_index).?; - markLive(macho_file, target_atom_index, alive); - } - }, - else => unreachable, + for (atom.getRelocs(macho_file)) |rel| { + const target_atom = switch (rel.tag) { + .local => rel.getTargetAtom(macho_file), + .@"extern" => rel.getTargetSymbol(macho_file).getAtom(macho_file), + }; + if (target_atom) |ta| { + if (markAtom(ta)) markLive(ta, macho_file); } + } - // Mark CIE references which should include any referenced personalities - // that are defined locally. - if (cie.getPersonalityPointerReloc(macho_file, object_id, cie_offset)) |reloc_target| { - const target_sym = macho_file.getSymbol(reloc_target); - if (!target_sym.undf()) { - const target_object = macho_file.objects.items[reloc_target.getFile().?]; - const target_atom_index = target_object.getAtomIndexForSymbol(reloc_target.sym_index).?; - markLive(macho_file, target_atom_index, alive); + for (atom.getUnwindRecords(macho_file)) |cu_index| { + const cu = macho_file.getUnwindRecord(cu_index); + const cu_atom = cu.getAtom(macho_file); + if (markAtom(cu_atom)) markLive(cu_atom, macho_file); + + if (cu.getLsdaAtom(macho_file)) |lsda| { + if (markAtom(lsda)) markLive(lsda, macho_file); + } + if (cu.getFde(macho_file)) |fde| { + const fde_atom = fde.getAtom(macho_file); + if (markAtom(fde_atom)) markLive(fde_atom, macho_file); + + if (fde.getLsdaAtom(macho_file)) |lsda| { + if (markAtom(lsda)) markLive(lsda, macho_file); } } } } -fn prune(macho_file: *MachO, alive: AtomTable) void { - log.debug("pruning dead atoms", .{}); - for (macho_file.objects.items) |*object| { - var i: usize = 0; - while (i < object.atoms.items.len) { - const atom_index = object.atoms.items[i]; - if (alive.contains(atom_index)) { - i += 1; - continue; - } +fn refersLive(atom: *Atom, macho_file: *MachO) bool { + for (atom.getRelocs(macho_file)) |rel| { + const target_atom = switch (rel.tag) { + .local => rel.getTargetAtom(macho_file), + .@"extern" => rel.getTargetSymbol(macho_file).getAtom(macho_file), + }; + if (target_atom) |ta| { + if (ta.flags.alive) return true; + } + } + return false; +} - const atom = macho_file.getAtom(atom_index); - const sym_loc = atom.getSymbolWithLoc(); - - log.debug("prune(ATOM({d}, %{d}, {?d}))", .{ - atom_index, - sym_loc.sym_index, - sym_loc.getFile(), - }); - log.debug(" {s} in {s}", .{ macho_file.getSymbolName(sym_loc), object.name }); - - const sym = macho_file.getSymbolPtr(sym_loc); - const sect_id = sym.n_sect - 1; - var section = macho_file.sections.get(sect_id); - section.header.size -= atom.size; - - if (atom.prev_index) |prev_index| { - const prev = macho_file.getAtomPtr(prev_index); - prev.next_index = atom.next_index; - } else { - if (atom.next_index) |next_index| { - section.first_atom_index = next_index; - } - } - if (atom.next_index) |next_index| { - const next = macho_file.getAtomPtr(next_index); - next.prev_index = atom.prev_index; - } else { - if (atom.prev_index) |prev_index| { - section.last_atom_index = prev_index; - } else { - assert(section.header.size == 0); - section.first_atom_index = null; - section.last_atom_index = null; - } - } - - macho_file.sections.set(sect_id, section); - _ = object.atoms.swapRemove(i); - - sym.n_desc = MachO.N_DEAD; - - var inner_sym_it = Atom.getInnerSymbolsIterator(macho_file, atom_index); - while (inner_sym_it.next()) |inner| { - const inner_sym = macho_file.getSymbolPtr(inner); - inner_sym.n_desc = MachO.N_DEAD; - } - - if (Atom.getSectionAlias(macho_file, atom_index)) |alias| { - const alias_sym = macho_file.getSymbolPtr(alias); - alias_sym.n_desc = MachO.N_DEAD; +fn prune(objects: []const File.Index, macho_file: *MachO) void { + for (objects) |index| { + for (macho_file.getFile(index).?.getAtoms()) |atom_index| { + const atom = macho_file.getAtom(atom_index).?; + if (atom.flags.alive and !atom.flags.visited) { + atom.flags.alive = false; + atom.markUnwindRecordsDead(macho_file); } } } } -const std = @import("std"); +const Level = struct { + value: usize = 0, + + fn incr(self: *@This()) void { + self.value += 1; + } + + pub fn format( + self: *const @This(), + comptime unused_fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, + ) !void { + _ = unused_fmt_string; + _ = options; + try writer.writeByteNTimes(' ', self.value); + } +}; + +var track_live_level: Level = .{}; + const assert = std.debug.assert; -const eh_frame = @import("eh_frame.zig"); +const build_options = @import("build_options"); const log = std.log.scoped(.dead_strip); const macho = std.macho; const math = std.math; const mem = std.mem; +const trace = @import("../tracy.zig").trace; +const track_live_log = std.log.scoped(.dead_strip_track_live); +const std = @import("std"); const Allocator = mem.Allocator; const Atom = @import("Atom.zig"); +const File = @import("file.zig").File; const MachO = @import("../MachO.zig"); -const SymbolWithLoc = MachO.SymbolWithLoc; -const UnwindInfo = @import("UnwindInfo.zig"); - -const AtomTable = std.AutoHashMap(Atom.Index, void); +const Symbol = @import("Symbol.zig"); diff --git a/src/link/MachO/dyld_info/Rebase.zig b/src/link/MachO/dyld_info/Rebase.zig index 512e23eddb..ffad0362f9 100644 --- a/src/link/MachO/dyld_info/Rebase.zig +++ b/src/link/MachO/dyld_info/Rebase.zig @@ -1,3 +1,14 @@ +const Rebase = @This(); + +const std = @import("std"); +const assert = std.debug.assert; +const leb = std.leb; +const log = std.log.scoped(.dyld_info); +const macho = std.macho; +const testing = std.testing; + +const Allocator = std.mem.Allocator; + entries: std.ArrayListUnmanaged(Entry) = .{}, buffer: std.ArrayListUnmanaged(u8) = .{}, @@ -168,7 +179,7 @@ fn rebaseTimesSkip(count: usize, skip: u64, writer: anytype) !void { fn addAddr(addr: u64, writer: anytype) !void { log.debug(">>> add: {x}", .{addr}); - if (std.mem.isAlignedGeneric(u64, addr, @sizeOf(u64))) { + if (std.mem.isAligned(addr, @sizeOf(u64))) { const imm = @divExact(addr, @sizeOf(u64)); if (imm <= 0xf) { try writer.writeByte(macho.REBASE_OPCODE_ADD_ADDR_IMM_SCALED | @as(u4, @truncate(imm))); @@ -561,14 +572,3 @@ test "rebase - composite" { macho.REBASE_OPCODE_DONE, }, rebase.buffer.items); } - -const Rebase = @This(); - -const std = @import("std"); -const assert = std.debug.assert; -const leb = std.leb; -const log = std.log.scoped(.dyld_info); -const macho = std.macho; -const testing = std.testing; - -const Allocator = std.mem.Allocator; diff --git a/src/link/MachO/Trie.zig b/src/link/MachO/dyld_info/Trie.zig similarity index 96% rename from src/link/MachO/Trie.zig rename to src/link/MachO/dyld_info/Trie.zig index 98add0315c..edef57569a 100644 --- a/src/link/MachO/Trie.zig +++ b/src/link/MachO/dyld_info/Trie.zig @@ -28,347 +28,16 @@ //! After the optional exported symbol information is a byte of how many edges (0-255) that //! this node has leaving it, followed by each edge. Each edge is a zero terminated UTF8 of //! the addition chars in the symbol, followed by a uleb128 offset for the node that edge points to. -/// The root node of the trie. -root: ?*Node = null, +const Trie = @This(); -/// If you want to access nodes ordered in DFS fashion, -/// you should call `finalize` first since the nodes -/// in this container are not guaranteed to not be stale -/// if more insertions took place after the last `finalize` -/// call. -ordered_nodes: std.ArrayListUnmanaged(*Node) = .{}, - -/// The size of the trie in bytes. -/// This value may be outdated if there were additional -/// insertions performed after `finalize` was called. -/// Call `finalize` before accessing this value to ensure -/// it is up-to-date. -size: u64 = 0, - -/// Number of nodes currently in the trie. -node_count: usize = 0, - -trie_dirty: bool = true, - -/// Export symbol that is to be placed in the trie. -pub const ExportSymbol = struct { - /// Name of the symbol. - name: []const u8, - - /// Offset of this symbol's virtual memory address from the beginning - /// of the __TEXT segment. - vmaddr_offset: u64, - - /// Export flags of this exported symbol. - export_flags: u64, -}; - -/// Insert a symbol into the trie, updating the prefixes in the process. -/// This operation may change the layout of the trie by splicing edges in -/// certain circumstances. -pub fn put(self: *Trie, allocator: Allocator, symbol: ExportSymbol) !void { - const node = try self.root.?.put(allocator, symbol.name); - node.terminal_info = .{ - .vmaddr_offset = symbol.vmaddr_offset, - .export_flags = symbol.export_flags, - }; - self.trie_dirty = true; -} - -/// Finalizes this trie for writing to a byte stream. -/// This step performs multiple passes through the trie ensuring -/// there are no gaps after every `Node` is ULEB128 encoded. -/// Call this method before trying to `write` the trie to a byte stream. -pub fn finalize(self: *Trie, allocator: Allocator) !void { - if (!self.trie_dirty) return; - - self.ordered_nodes.shrinkRetainingCapacity(0); - try self.ordered_nodes.ensureTotalCapacity(allocator, self.node_count); - - var fifo = std.fifo.LinearFifo(*Node, .Dynamic).init(allocator); - defer fifo.deinit(); - - try fifo.writeItem(self.root.?); - - while (fifo.readItem()) |next| { - for (next.edges.items) |*edge| { - try fifo.writeItem(edge.to); - } - self.ordered_nodes.appendAssumeCapacity(next); - } - - var more: bool = true; - while (more) { - self.size = 0; - more = false; - for (self.ordered_nodes.items) |node| { - const res = try node.finalize(self.size); - self.size += res.node_size; - if (res.updated) more = true; - } - } - - self.trie_dirty = false; -} - -const ReadError = error{ - OutOfMemory, - EndOfStream, - Overflow, -}; - -/// Parse the trie from a byte stream. -pub fn read(self: *Trie, allocator: Allocator, reader: anytype) ReadError!usize { - return self.root.?.read(allocator, reader); -} - -/// Write the trie to a byte stream. -/// Panics if the trie was not finalized using `finalize` before calling this method. -pub fn write(self: Trie, writer: anytype) !u64 { - assert(!self.trie_dirty); - var counting_writer = std.io.countingWriter(writer); - for (self.ordered_nodes.items) |node| { - try node.write(counting_writer.writer()); - } - return counting_writer.bytes_written; -} - -pub fn init(self: *Trie, allocator: Allocator) !void { - assert(self.root == null); - const root = try allocator.create(Node); - root.* = .{ .base = self }; - self.root = root; - self.node_count += 1; -} - -pub fn deinit(self: *Trie, allocator: Allocator) void { - if (self.root) |root| { - root.deinit(allocator); - allocator.destroy(root); - } - self.ordered_nodes.deinit(allocator); -} - -test "Trie node count" { - const gpa = testing.allocator; - var trie: Trie = .{}; - defer trie.deinit(gpa); - try trie.init(gpa); - - try testing.expectEqual(trie.node_count, 0); - try testing.expect(trie.root == null); - - try trie.put(gpa, .{ - .name = "_main", - .vmaddr_offset = 0, - .export_flags = 0, - }); - try testing.expectEqual(trie.node_count, 2); - - // Inserting the same node shouldn't update the trie. - try trie.put(gpa, .{ - .name = "_main", - .vmaddr_offset = 0, - .export_flags = 0, - }); - try testing.expectEqual(trie.node_count, 2); - - try trie.put(gpa, .{ - .name = "__mh_execute_header", - .vmaddr_offset = 0x1000, - .export_flags = 0, - }); - try testing.expectEqual(trie.node_count, 4); - - // Inserting the same node shouldn't update the trie. - try trie.put(gpa, .{ - .name = "__mh_execute_header", - .vmaddr_offset = 0x1000, - .export_flags = 0, - }); - try testing.expectEqual(trie.node_count, 4); - try trie.put(gpa, .{ - .name = "_main", - .vmaddr_offset = 0, - .export_flags = 0, - }); - try testing.expectEqual(trie.node_count, 4); -} - -test "Trie basic" { - const gpa = testing.allocator; - var trie: Trie = .{}; - defer trie.deinit(gpa); - try trie.init(gpa); - - // root --- _st ---> node - try trie.put(gpa, .{ - .name = "_st", - .vmaddr_offset = 0, - .export_flags = 0, - }); - try testing.expect(trie.root.?.edges.items.len == 1); - try testing.expect(mem.eql(u8, trie.root.?.edges.items[0].label, "_st")); - - { - // root --- _st ---> node --- art ---> node - try trie.put(gpa, .{ - .name = "_start", - .vmaddr_offset = 0, - .export_flags = 0, - }); - try testing.expect(trie.root.?.edges.items.len == 1); - - const nextEdge = &trie.root.?.edges.items[0]; - try testing.expect(mem.eql(u8, nextEdge.label, "_st")); - try testing.expect(nextEdge.to.edges.items.len == 1); - try testing.expect(mem.eql(u8, nextEdge.to.edges.items[0].label, "art")); - } - { - // root --- _ ---> node --- st ---> node --- art ---> node - // | - // | --- main ---> node - try trie.put(gpa, .{ - .name = "_main", - .vmaddr_offset = 0, - .export_flags = 0, - }); - try testing.expect(trie.root.?.edges.items.len == 1); - - const nextEdge = &trie.root.?.edges.items[0]; - try testing.expect(mem.eql(u8, nextEdge.label, "_")); - try testing.expect(nextEdge.to.edges.items.len == 2); - try testing.expect(mem.eql(u8, nextEdge.to.edges.items[0].label, "st")); - try testing.expect(mem.eql(u8, nextEdge.to.edges.items[1].label, "main")); - - const nextNextEdge = &nextEdge.to.edges.items[0]; - try testing.expect(mem.eql(u8, nextNextEdge.to.edges.items[0].label, "art")); - } -} - -fn expectEqualHexStrings(expected: []const u8, given: []const u8) !void { - assert(expected.len > 0); - if (mem.eql(u8, expected, given)) return; - const expected_fmt = try std.fmt.allocPrint(testing.allocator, "{x}", .{std.fmt.fmtSliceHexLower(expected)}); - defer testing.allocator.free(expected_fmt); - const given_fmt = try std.fmt.allocPrint(testing.allocator, "{x}", .{std.fmt.fmtSliceHexLower(given)}); - defer testing.allocator.free(given_fmt); - const idx = mem.indexOfDiff(u8, expected_fmt, given_fmt).?; - const padding = try testing.allocator.alloc(u8, idx + 5); - defer testing.allocator.free(padding); - @memset(padding, ' '); - std.debug.print("\nEXP: {s}\nGIV: {s}\n{s}^ -- first differing byte\n", .{ expected_fmt, given_fmt, padding }); - return error.TestFailed; -} - -test "write Trie to a byte stream" { - var gpa = testing.allocator; - var trie: Trie = .{}; - defer trie.deinit(gpa); - try trie.init(gpa); - - try trie.put(gpa, .{ - .name = "__mh_execute_header", - .vmaddr_offset = 0, - .export_flags = 0, - }); - try trie.put(gpa, .{ - .name = "_main", - .vmaddr_offset = 0x1000, - .export_flags = 0, - }); - - try trie.finalize(gpa); - try trie.finalize(gpa); // Finalizing multiple times is a nop subsequently unless we add new nodes. - - const exp_buffer = [_]u8{ - 0x0, 0x1, // node root - 0x5f, 0x0, 0x5, // edge '_' - 0x0, 0x2, // non-terminal node - 0x5f, 0x6d, 0x68, 0x5f, 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, // edge '_mh_execute_header' - 0x65, 0x5f, 0x68, 0x65, 0x61, 0x64, 0x65, 0x72, 0x0, 0x21, // edge '_mh_execute_header' - 0x6d, 0x61, 0x69, 0x6e, 0x0, 0x25, // edge 'main' - 0x2, 0x0, 0x0, 0x0, // terminal node - 0x3, 0x0, 0x80, 0x20, 0x0, // terminal node - }; - - const buffer = try gpa.alloc(u8, trie.size); - defer gpa.free(buffer); - var stream = std.io.fixedBufferStream(buffer); - { - _ = try trie.write(stream.writer()); - try expectEqualHexStrings(&exp_buffer, buffer); - } - { - // Writing finalized trie again should yield the same result. - try stream.seekTo(0); - _ = try trie.write(stream.writer()); - try expectEqualHexStrings(&exp_buffer, buffer); - } -} - -test "parse Trie from byte stream" { - var gpa = testing.allocator; - - const in_buffer = [_]u8{ - 0x0, 0x1, // node root - 0x5f, 0x0, 0x5, // edge '_' - 0x0, 0x2, // non-terminal node - 0x5f, 0x6d, 0x68, 0x5f, 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, // edge '_mh_execute_header' - 0x65, 0x5f, 0x68, 0x65, 0x61, 0x64, 0x65, 0x72, 0x0, 0x21, // edge '_mh_execute_header' - 0x6d, 0x61, 0x69, 0x6e, 0x0, 0x25, // edge 'main' - 0x2, 0x0, 0x0, 0x0, // terminal node - 0x3, 0x0, 0x80, 0x20, 0x0, // terminal node - }; - - var in_stream = std.io.fixedBufferStream(&in_buffer); - var trie: Trie = .{}; - defer trie.deinit(gpa); - try trie.init(gpa); - const nread = try trie.read(gpa, in_stream.reader()); - - try testing.expect(nread == in_buffer.len); - - try trie.finalize(gpa); - - const out_buffer = try gpa.alloc(u8, trie.size); - defer gpa.free(out_buffer); - var out_stream = std.io.fixedBufferStream(out_buffer); - _ = try trie.write(out_stream.writer()); - try expectEqualHexStrings(&in_buffer, out_buffer); -} - -test "ordering bug" { - var gpa = testing.allocator; - var trie: Trie = .{}; - defer trie.deinit(gpa); - try trie.init(gpa); - - try trie.put(gpa, .{ - .name = "_asStr", - .vmaddr_offset = 0x558, - .export_flags = 0, - }); - try trie.put(gpa, .{ - .name = "_a", - .vmaddr_offset = 0x8008, - .export_flags = 0, - }); - try trie.finalize(gpa); - - const exp_buffer = [_]u8{ - 0x00, 0x01, 0x5F, 0x61, 0x00, 0x06, 0x04, 0x00, - 0x88, 0x80, 0x02, 0x01, 0x73, 0x53, 0x74, 0x72, - 0x00, 0x12, 0x03, 0x00, 0xD8, 0x0A, 0x00, - }; - - const buffer = try gpa.alloc(u8, trie.size); - defer gpa.free(buffer); - var stream = std.io.fixedBufferStream(buffer); - // Writing finalized trie again should yield the same result. - _ = try trie.write(stream.writer()); - try expectEqualHexStrings(&exp_buffer, buffer); -} +const std = @import("std"); +const mem = std.mem; +const leb = std.leb; +const log = std.log.scoped(.macho); +const macho = std.macho; +const testing = std.testing; +const assert = std.debug.assert; +const Allocator = mem.Allocator; pub const Node = struct { base: *Trie, @@ -601,13 +270,343 @@ pub const Node = struct { } }; -const Trie = @This(); +/// The root node of the trie. +root: ?*Node = null, -const std = @import("std"); -const mem = std.mem; -const leb = std.leb; -const log = std.log.scoped(.link); -const macho = std.macho; -const testing = std.testing; -const assert = std.debug.assert; -const Allocator = mem.Allocator; +/// If you want to access nodes ordered in DFS fashion, +/// you should call `finalize` first since the nodes +/// in this container are not guaranteed to not be stale +/// if more insertions took place after the last `finalize` +/// call. +ordered_nodes: std.ArrayListUnmanaged(*Node) = .{}, + +/// The size of the trie in bytes. +/// This value may be outdated if there were additional +/// insertions performed after `finalize` was called. +/// Call `finalize` before accessing this value to ensure +/// it is up-to-date. +size: u64 = 0, + +/// Number of nodes currently in the trie. +node_count: usize = 0, + +trie_dirty: bool = true, + +/// Export symbol that is to be placed in the trie. +pub const ExportSymbol = struct { + /// Name of the symbol. + name: []const u8, + + /// Offset of this symbol's virtual memory address from the beginning + /// of the __TEXT segment. + vmaddr_offset: u64, + + /// Export flags of this exported symbol. + export_flags: u64, +}; + +/// Insert a symbol into the trie, updating the prefixes in the process. +/// This operation may change the layout of the trie by splicing edges in +/// certain circumstances. +pub fn put(self: *Trie, allocator: Allocator, symbol: ExportSymbol) !void { + const node = try self.root.?.put(allocator, symbol.name); + node.terminal_info = .{ + .vmaddr_offset = symbol.vmaddr_offset, + .export_flags = symbol.export_flags, + }; + self.trie_dirty = true; +} + +/// Finalizes this trie for writing to a byte stream. +/// This step performs multiple passes through the trie ensuring +/// there are no gaps after every `Node` is ULEB128 encoded. +/// Call this method before trying to `write` the trie to a byte stream. +pub fn finalize(self: *Trie, allocator: Allocator) !void { + if (!self.trie_dirty) return; + + self.ordered_nodes.shrinkRetainingCapacity(0); + try self.ordered_nodes.ensureTotalCapacity(allocator, self.node_count); + + var fifo = std.fifo.LinearFifo(*Node, .Dynamic).init(allocator); + defer fifo.deinit(); + + try fifo.writeItem(self.root.?); + + while (fifo.readItem()) |next| { + for (next.edges.items) |*edge| { + try fifo.writeItem(edge.to); + } + self.ordered_nodes.appendAssumeCapacity(next); + } + + var more: bool = true; + while (more) { + self.size = 0; + more = false; + for (self.ordered_nodes.items) |node| { + const res = try node.finalize(self.size); + self.size += res.node_size; + if (res.updated) more = true; + } + } + + self.trie_dirty = false; +} + +const ReadError = error{ + OutOfMemory, + EndOfStream, + Overflow, +}; + +/// Parse the trie from a byte stream. +pub fn read(self: *Trie, allocator: Allocator, reader: anytype) ReadError!usize { + return self.root.?.read(allocator, reader); +} + +/// Write the trie to a byte stream. +/// Panics if the trie was not finalized using `finalize` before calling this method. +pub fn write(self: Trie, writer: anytype) !void { + assert(!self.trie_dirty); + for (self.ordered_nodes.items) |node| { + try node.write(writer); + } +} + +pub fn init(self: *Trie, allocator: Allocator) !void { + assert(self.root == null); + const root = try allocator.create(Node); + root.* = .{ .base = self }; + self.root = root; + self.node_count += 1; +} + +pub fn deinit(self: *Trie, allocator: Allocator) void { + if (self.root) |root| { + root.deinit(allocator); + allocator.destroy(root); + } + self.ordered_nodes.deinit(allocator); +} + +test "Trie node count" { + const gpa = testing.allocator; + var trie: Trie = .{}; + defer trie.deinit(gpa); + try trie.init(gpa); + + try testing.expectEqual(@as(usize, 1), trie.node_count); + try testing.expect(trie.root != null); + + try trie.put(gpa, .{ + .name = "_main", + .vmaddr_offset = 0, + .export_flags = 0, + }); + try testing.expectEqual(@as(usize, 2), trie.node_count); + + // Inserting the same node shouldn't update the trie. + try trie.put(gpa, .{ + .name = "_main", + .vmaddr_offset = 0, + .export_flags = 0, + }); + try testing.expectEqual(@as(usize, 2), trie.node_count); + + try trie.put(gpa, .{ + .name = "__mh_execute_header", + .vmaddr_offset = 0x1000, + .export_flags = 0, + }); + try testing.expectEqual(@as(usize, 4), trie.node_count); + + // Inserting the same node shouldn't update the trie. + try trie.put(gpa, .{ + .name = "__mh_execute_header", + .vmaddr_offset = 0x1000, + .export_flags = 0, + }); + try testing.expectEqual(@as(usize, 4), trie.node_count); + try trie.put(gpa, .{ + .name = "_main", + .vmaddr_offset = 0, + .export_flags = 0, + }); + try testing.expectEqual(@as(usize, 4), trie.node_count); +} + +test "Trie basic" { + const gpa = testing.allocator; + var trie: Trie = .{}; + defer trie.deinit(gpa); + try trie.init(gpa); + + // root --- _st ---> node + try trie.put(gpa, .{ + .name = "_st", + .vmaddr_offset = 0, + .export_flags = 0, + }); + try testing.expect(trie.root.?.edges.items.len == 1); + try testing.expect(mem.eql(u8, trie.root.?.edges.items[0].label, "_st")); + + { + // root --- _st ---> node --- art ---> node + try trie.put(gpa, .{ + .name = "_start", + .vmaddr_offset = 0, + .export_flags = 0, + }); + try testing.expect(trie.root.?.edges.items.len == 1); + + const nextEdge = &trie.root.?.edges.items[0]; + try testing.expect(mem.eql(u8, nextEdge.label, "_st")); + try testing.expect(nextEdge.to.edges.items.len == 1); + try testing.expect(mem.eql(u8, nextEdge.to.edges.items[0].label, "art")); + } + { + // root --- _ ---> node --- st ---> node --- art ---> node + // | + // | --- main ---> node + try trie.put(gpa, .{ + .name = "_main", + .vmaddr_offset = 0, + .export_flags = 0, + }); + try testing.expect(trie.root.?.edges.items.len == 1); + + const nextEdge = &trie.root.?.edges.items[0]; + try testing.expect(mem.eql(u8, nextEdge.label, "_")); + try testing.expect(nextEdge.to.edges.items.len == 2); + try testing.expect(mem.eql(u8, nextEdge.to.edges.items[0].label, "st")); + try testing.expect(mem.eql(u8, nextEdge.to.edges.items[1].label, "main")); + + const nextNextEdge = &nextEdge.to.edges.items[0]; + try testing.expect(mem.eql(u8, nextNextEdge.to.edges.items[0].label, "art")); + } +} + +fn expectEqualHexStrings(expected: []const u8, given: []const u8) !void { + assert(expected.len > 0); + if (mem.eql(u8, expected, given)) return; + const expected_fmt = try std.fmt.allocPrint(testing.allocator, "{x}", .{std.fmt.fmtSliceHexLower(expected)}); + defer testing.allocator.free(expected_fmt); + const given_fmt = try std.fmt.allocPrint(testing.allocator, "{x}", .{std.fmt.fmtSliceHexLower(given)}); + defer testing.allocator.free(given_fmt); + const idx = mem.indexOfDiff(u8, expected_fmt, given_fmt).?; + const padding = try testing.allocator.alloc(u8, idx + 5); + defer testing.allocator.free(padding); + @memset(padding, ' '); + std.debug.print("\nEXP: {s}\nGIV: {s}\n{s}^ -- first differing byte\n", .{ expected_fmt, given_fmt, padding }); + return error.TestFailed; +} + +test "write Trie to a byte stream" { + var gpa = testing.allocator; + var trie: Trie = .{}; + defer trie.deinit(gpa); + try trie.init(gpa); + + try trie.put(gpa, .{ + .name = "__mh_execute_header", + .vmaddr_offset = 0, + .export_flags = 0, + }); + try trie.put(gpa, .{ + .name = "_main", + .vmaddr_offset = 0x1000, + .export_flags = 0, + }); + + try trie.finalize(gpa); + try trie.finalize(gpa); // Finalizing mulitple times is a nop subsequently unless we add new nodes. + + const exp_buffer = [_]u8{ + 0x0, 0x1, // node root + 0x5f, 0x0, 0x5, // edge '_' + 0x0, 0x2, // non-terminal node + 0x5f, 0x6d, 0x68, 0x5f, 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, // edge '_mh_execute_header' + 0x65, 0x5f, 0x68, 0x65, 0x61, 0x64, 0x65, 0x72, 0x0, 0x21, // edge '_mh_execute_header' + 0x6d, 0x61, 0x69, 0x6e, 0x0, 0x25, // edge 'main' + 0x2, 0x0, 0x0, 0x0, // terminal node + 0x3, 0x0, 0x80, 0x20, 0x0, // terminal node + }; + + const buffer = try gpa.alloc(u8, trie.size); + defer gpa.free(buffer); + var stream = std.io.fixedBufferStream(buffer); + { + _ = try trie.write(stream.writer()); + try expectEqualHexStrings(&exp_buffer, buffer); + } + { + // Writing finalized trie again should yield the same result. + try stream.seekTo(0); + _ = try trie.write(stream.writer()); + try expectEqualHexStrings(&exp_buffer, buffer); + } +} + +test "parse Trie from byte stream" { + const gpa = testing.allocator; + + const in_buffer = [_]u8{ + 0x0, 0x1, // node root + 0x5f, 0x0, 0x5, // edge '_' + 0x0, 0x2, // non-terminal node + 0x5f, 0x6d, 0x68, 0x5f, 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, // edge '_mh_execute_header' + 0x65, 0x5f, 0x68, 0x65, 0x61, 0x64, 0x65, 0x72, 0x0, 0x21, // edge '_mh_execute_header' + 0x6d, 0x61, 0x69, 0x6e, 0x0, 0x25, // edge 'main' + 0x2, 0x0, 0x0, 0x0, // terminal node + 0x3, 0x0, 0x80, 0x20, 0x0, // terminal node + }; + + var in_stream = std.io.fixedBufferStream(&in_buffer); + var trie: Trie = .{}; + defer trie.deinit(gpa); + try trie.init(gpa); + const nread = try trie.read(gpa, in_stream.reader()); + + try testing.expect(nread == in_buffer.len); + + try trie.finalize(gpa); + + const out_buffer = try gpa.alloc(u8, trie.size); + defer gpa.free(out_buffer); + var out_stream = std.io.fixedBufferStream(out_buffer); + _ = try trie.write(out_stream.writer()); + try expectEqualHexStrings(&in_buffer, out_buffer); +} + +test "ordering bug" { + const gpa = testing.allocator; + var trie: Trie = .{}; + defer trie.deinit(gpa); + try trie.init(gpa); + + try trie.put(gpa, .{ + .name = "_asStr", + .vmaddr_offset = 0x558, + .export_flags = 0, + }); + try trie.put(gpa, .{ + .name = "_a", + .vmaddr_offset = 0x8008, + .export_flags = 0, + }); + + try trie.finalize(gpa); + + const exp_buffer = [_]u8{ + 0x00, 0x01, 0x5F, 0x61, 0x00, 0x06, 0x04, 0x00, + 0x88, 0x80, 0x02, 0x01, 0x73, 0x53, 0x74, 0x72, + 0x00, 0x12, 0x03, 0x00, 0xD8, 0x0A, 0x00, + }; + + const buffer = try gpa.alloc(u8, trie.size); + defer gpa.free(buffer); + var stream = std.io.fixedBufferStream(buffer); + // Writing finalized trie again should yield the same result. + _ = try trie.write(stream.writer()); + try expectEqualHexStrings(&exp_buffer, buffer); +} diff --git a/src/link/MachO/dyld_info/bind.zig b/src/link/MachO/dyld_info/bind.zig index ca4e73a283..5bc872e277 100644 --- a/src/link/MachO/dyld_info/bind.zig +++ b/src/link/MachO/dyld_info/bind.zig @@ -1,231 +1,391 @@ -pub fn Bind(comptime Ctx: type, comptime Target: type) type { - return struct { - entries: std.ArrayListUnmanaged(Entry) = .{}, - buffer: std.ArrayListUnmanaged(u8) = .{}, +const std = @import("std"); +const assert = std.debug.assert; +const leb = std.leb; +const log = std.log.scoped(.dyld_info); +const macho = std.macho; +const testing = std.testing; - const Self = @This(); +const Allocator = std.mem.Allocator; +const MachO = @import("../../MachO.zig"); +const Symbol = @import("../Symbol.zig"); - const Entry = struct { - target: Target, - offset: u64, - segment_id: u8, - addend: i64, +pub const Entry = struct { + target: Symbol.Index, + offset: u64, + segment_id: u8, + addend: i64, - pub fn lessThan(ctx: Ctx, entry: Entry, other: Entry) bool { - if (entry.segment_id == other.segment_id) { - if (entry.target.eql(other.target)) { - return entry.offset < other.offset; - } - const entry_name = ctx.getSymbolName(entry.target); - const other_name = ctx.getSymbolName(other.target); - return std.mem.lessThan(u8, entry_name, other_name); - } - return entry.segment_id < other.segment_id; + pub fn lessThan(ctx: *MachO, entry: Entry, other: Entry) bool { + if (entry.segment_id == other.segment_id) { + if (entry.target == other.target) { + return entry.offset < other.offset; } - }; + const entry_name = ctx.getSymbol(entry.target).getName(ctx); + const other_name = ctx.getSymbol(other.target).getName(ctx); + return std.mem.lessThan(u8, entry_name, other_name); + } + return entry.segment_id < other.segment_id; + } +}; - pub fn deinit(self: *Self, gpa: Allocator) void { - self.entries.deinit(gpa); - self.buffer.deinit(gpa); +pub const Bind = struct { + entries: std.ArrayListUnmanaged(Entry) = .{}, + buffer: std.ArrayListUnmanaged(u8) = .{}, + + const Self = @This(); + + pub fn deinit(self: *Self, gpa: Allocator) void { + self.entries.deinit(gpa); + self.buffer.deinit(gpa); + } + + pub fn size(self: Self) u64 { + return @as(u64, @intCast(self.buffer.items.len)); + } + + pub fn finalize(self: *Self, gpa: Allocator, ctx: *MachO) !void { + if (self.entries.items.len == 0) return; + + const writer = self.buffer.writer(gpa); + + std.mem.sort(Entry, self.entries.items, ctx, Entry.lessThan); + + var start: usize = 0; + var seg_id: ?u8 = null; + for (self.entries.items, 0..) |entry, i| { + if (seg_id != null and seg_id.? == entry.segment_id) continue; + try finalizeSegment(self.entries.items[start..i], ctx, writer); + seg_id = entry.segment_id; + start = i; } - pub fn size(self: Self) u64 { - return @as(u64, @intCast(self.buffer.items.len)); - } + try finalizeSegment(self.entries.items[start..], ctx, writer); + try done(writer); + } - pub fn finalize(self: *Self, gpa: Allocator, ctx: Ctx) !void { - if (self.entries.items.len == 0) return; + fn finalizeSegment(entries: []const Entry, ctx: *MachO, writer: anytype) !void { + if (entries.len == 0) return; - const writer = self.buffer.writer(gpa); + const seg_id = entries[0].segment_id; + try setSegmentOffset(seg_id, 0, writer); - std.mem.sort(Entry, self.entries.items, ctx, Entry.lessThan); + var offset: u64 = 0; + var addend: i64 = 0; + var count: usize = 0; + var skip: u64 = 0; + var target: ?Symbol.Index = null; - var start: usize = 0; - var seg_id: ?u8 = null; - for (self.entries.items, 0..) |entry, i| { - if (seg_id != null and seg_id.? == entry.segment_id) continue; - try finalizeSegment(self.entries.items[start..i], ctx, writer); - seg_id = entry.segment_id; - start = i; - } + var state: enum { + start, + bind_single, + bind_times_skip, + } = .start; - try finalizeSegment(self.entries.items[start..], ctx, writer); - try done(writer); - } - - fn finalizeSegment(entries: []const Entry, ctx: Ctx, writer: anytype) !void { - if (entries.len == 0) return; - - const seg_id = entries[0].segment_id; - try setSegmentOffset(seg_id, 0, writer); - - var offset: u64 = 0; - var addend: i64 = 0; - var count: usize = 0; - var skip: u64 = 0; - var target: ?Target = null; - - var state: enum { - start, - bind_single, - bind_times_skip, - } = .start; - - var i: usize = 0; - while (i < entries.len) : (i += 1) { - const current = entries[i]; - if (target == null or !target.?.eql(current.target)) { - switch (state) { - .start => {}, - .bind_single => try doBind(writer), - .bind_times_skip => try doBindTimesSkip(count, skip, writer), - } - state = .start; - target = current.target; - - const sym = ctx.getSymbol(current.target); - const name = ctx.getSymbolName(current.target); - const flags: u8 = if (sym.weakRef()) macho.BIND_SYMBOL_FLAGS_WEAK_IMPORT else 0; - const ordinal = @divTrunc(@as(i16, @bitCast(sym.n_desc)), macho.N_SYMBOL_RESOLVER); - - try setSymbol(name, flags, writer); - try setTypePointer(writer); - try setDylibOrdinal(ordinal, writer); - - if (current.addend != addend) { - addend = current.addend; - try setAddend(addend, writer); - } - } - - log.debug("{x}, {d}, {x}, {?x}, {s}", .{ offset, count, skip, addend, @tagName(state) }); - log.debug(" => {x}", .{current.offset}); + var i: usize = 0; + while (i < entries.len) : (i += 1) { + const current = entries[i]; + if (target == null or target.? != current.target) { switch (state) { - .start => { - if (current.offset < offset) { - try addAddr(@as(u64, @bitCast(@as(i64, @intCast(current.offset)) - @as(i64, @intCast(offset)))), writer); - offset = offset - (offset - current.offset); - } else if (current.offset > offset) { - const delta = current.offset - offset; - try addAddr(delta, writer); - offset += delta; - } - state = .bind_single; - offset += @sizeOf(u64); - count = 1; - }, - .bind_single => { - if (current.offset == offset) { - try doBind(writer); - state = .start; - } else if (current.offset > offset) { - const delta = current.offset - offset; - state = .bind_times_skip; - skip = @as(u64, @intCast(delta)); - offset += skip; - } else unreachable; - i -= 1; - }, - .bind_times_skip => { - if (current.offset < offset) { - count -= 1; - if (count == 1) { - try doBindAddAddr(skip, writer); - } else { - try doBindTimesSkip(count, skip, writer); - } - state = .start; - offset = offset - (@sizeOf(u64) + skip); - i -= 2; - } else if (current.offset == offset) { - count += 1; - offset += @sizeOf(u64) + skip; - } else { - try doBindTimesSkip(count, skip, writer); - state = .start; - i -= 1; - } - }, + .start => {}, + .bind_single => try doBind(writer), + .bind_times_skip => try doBindTimesSkip(count, skip, writer), } - } + state = .start; + target = current.target; - switch (state) { - .start => unreachable, - .bind_single => try doBind(writer), - .bind_times_skip => try doBindTimesSkip(count, skip, writer), - } - } + const sym = ctx.getSymbol(current.target); + const name = sym.getName(ctx); + const flags: u8 = if (sym.weakRef(ctx)) macho.BIND_SYMBOL_FLAGS_WEAK_IMPORT else 0; + const ordinal: i16 = ord: { + if (sym.flags.interposable) break :ord macho.BIND_SPECIAL_DYLIB_FLAT_LOOKUP; + if (sym.flags.import) { + if (ctx.options.namespace == .flat) break :ord macho.BIND_SPECIAL_DYLIB_FLAT_LOOKUP; + if (sym.getDylibOrdinal(ctx)) |ord| break :ord @bitCast(ord); + } + if (ctx.options.undefined_treatment == .dynamic_lookup) + break :ord macho.BIND_SPECIAL_DYLIB_FLAT_LOOKUP; + break :ord macho.BIND_SPECIAL_DYLIB_SELF; + }; - pub fn write(self: Self, writer: anytype) !void { - if (self.size() == 0) return; - try writer.writeAll(self.buffer.items); - } - }; -} - -pub fn LazyBind(comptime Ctx: type, comptime Target: type) type { - return struct { - entries: std.ArrayListUnmanaged(Entry) = .{}, - buffer: std.ArrayListUnmanaged(u8) = .{}, - offsets: std.ArrayListUnmanaged(u32) = .{}, - - const Self = @This(); - - const Entry = struct { - target: Target, - offset: u64, - segment_id: u8, - addend: i64, - }; - - pub fn deinit(self: *Self, gpa: Allocator) void { - self.entries.deinit(gpa); - self.buffer.deinit(gpa); - self.offsets.deinit(gpa); - } - - pub fn size(self: Self) u64 { - return @as(u64, @intCast(self.buffer.items.len)); - } - - pub fn finalize(self: *Self, gpa: Allocator, ctx: Ctx) !void { - if (self.entries.items.len == 0) return; - - try self.offsets.ensureTotalCapacityPrecise(gpa, self.entries.items.len); - - var cwriter = std.io.countingWriter(self.buffer.writer(gpa)); - const writer = cwriter.writer(); - - var addend: i64 = 0; - - for (self.entries.items) |entry| { - self.offsets.appendAssumeCapacity(@as(u32, @intCast(cwriter.bytes_written))); - - const sym = ctx.getSymbol(entry.target); - const name = ctx.getSymbolName(entry.target); - const flags: u8 = if (sym.weakRef()) macho.BIND_SYMBOL_FLAGS_WEAK_IMPORT else 0; - const ordinal = @divTrunc(@as(i16, @bitCast(sym.n_desc)), macho.N_SYMBOL_RESOLVER); - - try setSegmentOffset(entry.segment_id, entry.offset, writer); try setSymbol(name, flags, writer); + try setTypePointer(writer); try setDylibOrdinal(ordinal, writer); - if (entry.addend != addend) { - try setAddend(entry.addend, writer); - addend = entry.addend; + if (current.addend != addend) { + addend = current.addend; + try setAddend(addend, writer); } + } - try doBind(writer); - try done(writer); + log.debug("{x}, {d}, {x}, {?x}, {s}", .{ offset, count, skip, addend, @tagName(state) }); + log.debug(" => {x}", .{current.offset}); + switch (state) { + .start => { + if (current.offset < offset) { + try addAddr(@as(u64, @bitCast(@as(i64, @intCast(current.offset)) - @as(i64, @intCast(offset)))), writer); + offset = offset - (offset - current.offset); + } else if (current.offset > offset) { + const delta = current.offset - offset; + try addAddr(delta, writer); + offset += delta; + } + state = .bind_single; + offset += @sizeOf(u64); + count = 1; + }, + .bind_single => { + if (current.offset == offset) { + try doBind(writer); + state = .start; + } else if (current.offset > offset) { + const delta = current.offset - offset; + state = .bind_times_skip; + skip = @as(u64, @intCast(delta)); + offset += skip; + } else unreachable; + i -= 1; + }, + .bind_times_skip => { + if (current.offset < offset) { + count -= 1; + if (count == 1) { + try doBindAddAddr(skip, writer); + } else { + try doBindTimesSkip(count, skip, writer); + } + state = .start; + offset = offset - (@sizeOf(u64) + skip); + i -= 2; + } else if (current.offset == offset) { + count += 1; + offset += @sizeOf(u64) + skip; + } else { + try doBindTimesSkip(count, skip, writer); + state = .start; + i -= 1; + } + }, } } - pub fn write(self: Self, writer: anytype) !void { - if (self.size() == 0) return; - try writer.writeAll(self.buffer.items); + switch (state) { + .start => unreachable, + .bind_single => try doBind(writer), + .bind_times_skip => try doBindTimesSkip(count, skip, writer), } - }; -} + } + + pub fn write(self: Self, writer: anytype) !void { + if (self.size() == 0) return; + try writer.writeAll(self.buffer.items); + } +}; + +pub const WeakBind = struct { + entries: std.ArrayListUnmanaged(Entry) = .{}, + buffer: std.ArrayListUnmanaged(u8) = .{}, + + const Self = @This(); + + pub fn deinit(self: *Self, gpa: Allocator) void { + self.entries.deinit(gpa); + self.buffer.deinit(gpa); + } + + pub fn size(self: Self) u64 { + return @as(u64, @intCast(self.buffer.items.len)); + } + + pub fn finalize(self: *Self, gpa: Allocator, ctx: *MachO) !void { + if (self.entries.items.len == 0) return; + + const writer = self.buffer.writer(gpa); + + std.mem.sort(Entry, self.entries.items, ctx, Entry.lessThan); + + var start: usize = 0; + var seg_id: ?u8 = null; + for (self.entries.items, 0..) |entry, i| { + if (seg_id != null and seg_id.? == entry.segment_id) continue; + try finalizeSegment(self.entries.items[start..i], ctx, writer); + seg_id = entry.segment_id; + start = i; + } + + try finalizeSegment(self.entries.items[start..], ctx, writer); + try done(writer); + } + + fn finalizeSegment(entries: []const Entry, ctx: *MachO, writer: anytype) !void { + if (entries.len == 0) return; + + const seg_id = entries[0].segment_id; + try setSegmentOffset(seg_id, 0, writer); + + var offset: u64 = 0; + var addend: i64 = 0; + var count: usize = 0; + var skip: u64 = 0; + var target: ?Symbol.Index = null; + + var state: enum { + start, + bind_single, + bind_times_skip, + } = .start; + + var i: usize = 0; + while (i < entries.len) : (i += 1) { + const current = entries[i]; + if (target == null or target.? != current.target) { + switch (state) { + .start => {}, + .bind_single => try doBind(writer), + .bind_times_skip => try doBindTimesSkip(count, skip, writer), + } + state = .start; + target = current.target; + + const sym = ctx.getSymbol(current.target); + const name = sym.getName(ctx); + const flags: u8 = 0; // TODO NON_WEAK_DEFINITION + + try setSymbol(name, flags, writer); + try setTypePointer(writer); + + if (current.addend != addend) { + addend = current.addend; + try setAddend(addend, writer); + } + } + + log.debug("{x}, {d}, {x}, {?x}, {s}", .{ offset, count, skip, addend, @tagName(state) }); + log.debug(" => {x}", .{current.offset}); + switch (state) { + .start => { + if (current.offset < offset) { + try addAddr(@as(u64, @bitCast(@as(i64, @intCast(current.offset)) - @as(i64, @intCast(offset)))), writer); + offset = offset - (offset - current.offset); + } else if (current.offset > offset) { + const delta = current.offset - offset; + try addAddr(delta, writer); + offset += delta; + } + state = .bind_single; + offset += @sizeOf(u64); + count = 1; + }, + .bind_single => { + if (current.offset == offset) { + try doBind(writer); + state = .start; + } else if (current.offset > offset) { + const delta = current.offset - offset; + state = .bind_times_skip; + skip = @as(u64, @intCast(delta)); + offset += skip; + } else unreachable; + i -= 1; + }, + .bind_times_skip => { + if (current.offset < offset) { + count -= 1; + if (count == 1) { + try doBindAddAddr(skip, writer); + } else { + try doBindTimesSkip(count, skip, writer); + } + state = .start; + offset = offset - (@sizeOf(u64) + skip); + i -= 2; + } else if (current.offset == offset) { + count += 1; + offset += @sizeOf(u64) + skip; + } else { + try doBindTimesSkip(count, skip, writer); + state = .start; + i -= 1; + } + }, + } + } + + switch (state) { + .start => unreachable, + .bind_single => try doBind(writer), + .bind_times_skip => try doBindTimesSkip(count, skip, writer), + } + } + + pub fn write(self: Self, writer: anytype) !void { + if (self.size() == 0) return; + try writer.writeAll(self.buffer.items); + } +}; + +pub const LazyBind = struct { + entries: std.ArrayListUnmanaged(Entry) = .{}, + buffer: std.ArrayListUnmanaged(u8) = .{}, + offsets: std.ArrayListUnmanaged(u32) = .{}, + + const Self = @This(); + + pub fn deinit(self: *Self, gpa: Allocator) void { + self.entries.deinit(gpa); + self.buffer.deinit(gpa); + self.offsets.deinit(gpa); + } + + pub fn size(self: Self) u64 { + return @as(u64, @intCast(self.buffer.items.len)); + } + + pub fn finalize(self: *Self, gpa: Allocator, ctx: *MachO) !void { + if (self.entries.items.len == 0) return; + + try self.offsets.ensureTotalCapacityPrecise(gpa, self.entries.items.len); + + var cwriter = std.io.countingWriter(self.buffer.writer(gpa)); + const writer = cwriter.writer(); + + var addend: i64 = 0; + + for (self.entries.items) |entry| { + self.offsets.appendAssumeCapacity(@as(u32, @intCast(cwriter.bytes_written))); + + const sym = ctx.getSymbol(entry.target); + const name = sym.getName(ctx); + const flags: u8 = if (sym.weakRef(ctx)) macho.BIND_SYMBOL_FLAGS_WEAK_IMPORT else 0; + const ordinal: i16 = ord: { + if (sym.flags.interposable) break :ord macho.BIND_SPECIAL_DYLIB_FLAT_LOOKUP; + if (sym.flags.import) { + if (ctx.options.namespace == .flat) break :ord macho.BIND_SPECIAL_DYLIB_FLAT_LOOKUP; + if (sym.getDylibOrdinal(ctx)) |ord| break :ord @bitCast(ord); + } + if (ctx.options.undefined_treatment == .dynamic_lookup) + break :ord macho.BIND_SPECIAL_DYLIB_FLAT_LOOKUP; + break :ord macho.BIND_SPECIAL_DYLIB_SELF; + }; + + try setSegmentOffset(entry.segment_id, entry.offset, writer); + try setSymbol(name, flags, writer); + try setDylibOrdinal(ordinal, writer); + + if (entry.addend != addend) { + try setAddend(entry.addend, writer); + addend = entry.addend; + } + + try doBind(writer); + try done(writer); + } + } + + pub fn write(self: Self, writer: anytype) !void { + if (self.size() == 0) return; + try writer.writeAll(self.buffer.items); + } +}; fn setSegmentOffset(segment_id: u8, offset: u64, writer: anytype) !void { log.debug(">>> set segment: {d} and offset: {x}", .{ segment_id, offset }); @@ -282,7 +442,7 @@ fn doBind(writer: anytype) !void { fn doBindAddAddr(addr: u64, writer: anytype) !void { log.debug(">>> bind with add: {x}", .{addr}); - if (std.mem.isAlignedGeneric(u64, addr, @sizeOf(u64))) { + if (std.mem.isAligned(addr, @sizeOf(u64))) { const imm = @divExact(addr, @sizeOf(u64)); if (imm <= 0xf) { try writer.writeByte( @@ -312,429 +472,3 @@ fn done(writer: anytype) !void { log.debug(">>> done", .{}); try writer.writeByte(macho.BIND_OPCODE_DONE); } - -const TestContext = struct { - symbols: std.ArrayListUnmanaged(macho.nlist_64) = .{}, - strtab: std.ArrayListUnmanaged(u8) = .{}, - - const Target = struct { - index: u32, - - fn eql(this: Target, other: Target) bool { - return this.index == other.index; - } - }; - - fn deinit(ctx: *TestContext, gpa: Allocator) void { - ctx.symbols.deinit(gpa); - ctx.strtab.deinit(gpa); - } - - fn addSymbol(ctx: *TestContext, gpa: Allocator, name: []const u8, ordinal: i16, flags: u16) !void { - const n_strx = try ctx.addString(gpa, name); - var n_desc = @as(u16, @bitCast(ordinal * macho.N_SYMBOL_RESOLVER)); - n_desc |= flags; - try ctx.symbols.append(gpa, .{ - .n_value = 0, - .n_strx = n_strx, - .n_desc = n_desc, - .n_type = macho.N_EXT, - .n_sect = 0, - }); - } - - fn addString(ctx: *TestContext, gpa: Allocator, name: []const u8) !u32 { - const n_strx = @as(u32, @intCast(ctx.strtab.items.len)); - try ctx.strtab.appendSlice(gpa, name); - try ctx.strtab.append(gpa, 0); - return n_strx; - } - - fn getSymbol(ctx: TestContext, target: Target) macho.nlist_64 { - return ctx.symbols.items[target.index]; - } - - fn getSymbolName(ctx: TestContext, target: Target) []const u8 { - const sym = ctx.getSymbol(target); - assert(sym.n_strx < ctx.strtab.items.len); - return std.mem.sliceTo(@as([*:0]const u8, @ptrCast(ctx.strtab.items.ptr + sym.n_strx)), 0); - } -}; - -fn generateTestContext() !TestContext { - const gpa = testing.allocator; - var ctx = TestContext{}; - try ctx.addSymbol(gpa, "_import_1", 1, 0); - try ctx.addSymbol(gpa, "_import_2", 1, 0); - try ctx.addSymbol(gpa, "_import_3", 1, 0); - try ctx.addSymbol(gpa, "_import_4", 2, 0); - try ctx.addSymbol(gpa, "_import_5_weak", 2, macho.N_WEAK_REF); - try ctx.addSymbol(gpa, "_import_6", 2, 0); - return ctx; -} - -test "bind - no entries" { - const gpa = testing.allocator; - - var test_context = try generateTestContext(); - defer test_context.deinit(gpa); - - var bind = Bind(TestContext, TestContext.Target){}; - defer bind.deinit(gpa); - - try bind.finalize(gpa, test_context); - try testing.expectEqual(@as(u64, 0), bind.size()); -} - -test "bind - single entry" { - const gpa = testing.allocator; - - var test_context = try generateTestContext(); - defer test_context.deinit(gpa); - - var bind = Bind(TestContext, TestContext.Target){}; - defer bind.deinit(gpa); - - try bind.entries.append(gpa, .{ - .offset = 0x10, - .segment_id = 1, - .target = TestContext.Target{ .index = 0 }, - .addend = 0, - }); - try bind.finalize(gpa, test_context); - try testing.expectEqualSlices(u8, &[_]u8{ - macho.BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | 1, - 0x0, - macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM | 0, - 0x5f, - 0x69, - 0x6d, - 0x70, - 0x6f, - 0x72, - 0x74, - 0x5f, - 0x31, - 0x0, - macho.BIND_OPCODE_SET_TYPE_IMM | 1, - macho.BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | 1, - macho.BIND_OPCODE_ADD_ADDR_ULEB, - 0x10, - macho.BIND_OPCODE_DO_BIND, - macho.BIND_OPCODE_DONE, - }, bind.buffer.items); -} - -test "bind - multiple occurrences within the same segment" { - const gpa = testing.allocator; - - var test_context = try generateTestContext(); - defer test_context.deinit(gpa); - - var bind = Bind(TestContext, TestContext.Target){}; - defer bind.deinit(gpa); - - try bind.entries.append(gpa, .{ - .offset = 0x10, - .segment_id = 1, - .target = TestContext.Target{ .index = 0 }, - .addend = 0, - }); - try bind.entries.append(gpa, .{ - .offset = 0x18, - .segment_id = 1, - .target = TestContext.Target{ .index = 0 }, - .addend = 0, - }); - try bind.entries.append(gpa, .{ - .offset = 0x20, - .segment_id = 1, - .target = TestContext.Target{ .index = 0 }, - .addend = 0, - }); - try bind.entries.append(gpa, .{ - .offset = 0x28, - .segment_id = 1, - .target = TestContext.Target{ .index = 0 }, - .addend = 0, - }); - - try bind.finalize(gpa, test_context); - try testing.expectEqualSlices(u8, &[_]u8{ - macho.BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | 1, - 0x0, - macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM | 0, - 0x5f, - 0x69, - 0x6d, - 0x70, - 0x6f, - 0x72, - 0x74, - 0x5f, - 0x31, - 0x0, - macho.BIND_OPCODE_SET_TYPE_IMM | 1, - macho.BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | 1, - macho.BIND_OPCODE_ADD_ADDR_ULEB, - 0x10, - macho.BIND_OPCODE_DO_BIND, - macho.BIND_OPCODE_DO_BIND, - macho.BIND_OPCODE_DO_BIND, - macho.BIND_OPCODE_DO_BIND, - macho.BIND_OPCODE_DONE, - }, bind.buffer.items); -} - -test "bind - multiple occurrences with skip and addend" { - const gpa = testing.allocator; - - var test_context = try generateTestContext(); - defer test_context.deinit(gpa); - - var bind = Bind(TestContext, TestContext.Target){}; - defer bind.deinit(gpa); - - try bind.entries.append(gpa, .{ - .offset = 0x0, - .segment_id = 1, - .target = TestContext.Target{ .index = 0 }, - .addend = 0x10, - }); - try bind.entries.append(gpa, .{ - .offset = 0x10, - .segment_id = 1, - .target = TestContext.Target{ .index = 0 }, - .addend = 0x10, - }); - try bind.entries.append(gpa, .{ - .offset = 0x20, - .segment_id = 1, - .target = TestContext.Target{ .index = 0 }, - .addend = 0x10, - }); - try bind.entries.append(gpa, .{ - .offset = 0x30, - .segment_id = 1, - .target = TestContext.Target{ .index = 0 }, - .addend = 0x10, - }); - - try bind.finalize(gpa, test_context); - try testing.expectEqualSlices(u8, &[_]u8{ - macho.BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | 1, - 0x0, - macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM | 0, - 0x5f, - 0x69, - 0x6d, - 0x70, - 0x6f, - 0x72, - 0x74, - 0x5f, - 0x31, - 0x0, - macho.BIND_OPCODE_SET_TYPE_IMM | 1, - macho.BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | 1, - macho.BIND_OPCODE_SET_ADDEND_SLEB, - 0x10, - macho.BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB, - 0x4, - 0x8, - macho.BIND_OPCODE_DONE, - }, bind.buffer.items); -} - -test "bind - complex" { - const gpa = testing.allocator; - - var test_context = try generateTestContext(); - defer test_context.deinit(gpa); - - var bind = Bind(TestContext, TestContext.Target){}; - defer bind.deinit(gpa); - - try bind.entries.append(gpa, .{ - .offset = 0x58, - .segment_id = 1, - .target = TestContext.Target{ .index = 0 }, - .addend = 0, - }); - try bind.entries.append(gpa, .{ - .offset = 0x100, - .segment_id = 1, - .target = TestContext.Target{ .index = 1 }, - .addend = 0x10, - }); - try bind.entries.append(gpa, .{ - .offset = 0x110, - .segment_id = 1, - .target = TestContext.Target{ .index = 1 }, - .addend = 0x10, - }); - try bind.entries.append(gpa, .{ - .offset = 0x130, - .segment_id = 1, - .target = TestContext.Target{ .index = 1 }, - .addend = 0x10, - }); - try bind.entries.append(gpa, .{ - .offset = 0x140, - .segment_id = 1, - .target = TestContext.Target{ .index = 1 }, - .addend = 0x10, - }); - try bind.entries.append(gpa, .{ - .offset = 0x148, - .segment_id = 1, - .target = TestContext.Target{ .index = 2 }, - .addend = 0, - }); - - try bind.finalize(gpa, test_context); - try testing.expectEqualSlices(u8, &[_]u8{ - macho.BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | 1, - 0x0, - macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM | 0, - 0x5f, - 0x69, - 0x6d, - 0x70, - 0x6f, - 0x72, - 0x74, - 0x5f, - 0x31, - 0x0, - macho.BIND_OPCODE_SET_TYPE_IMM | 1, - macho.BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | 1, - macho.BIND_OPCODE_ADD_ADDR_ULEB, - 0x58, - macho.BIND_OPCODE_DO_BIND, - macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM | 0, - 0x5f, - 0x69, - 0x6d, - 0x70, - 0x6f, - 0x72, - 0x74, - 0x5f, - 0x32, - 0x0, - macho.BIND_OPCODE_SET_TYPE_IMM | 1, - macho.BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | 1, - macho.BIND_OPCODE_SET_ADDEND_SLEB, - 0x10, - macho.BIND_OPCODE_ADD_ADDR_ULEB, - 0xa0, - 0x1, - macho.BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB, - 0x2, - 0x8, - macho.BIND_OPCODE_ADD_ADDR_ULEB, - 0x10, - macho.BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB, - 0x2, - 0x8, - macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM | 0, - 0x5f, - 0x69, - 0x6d, - 0x70, - 0x6f, - 0x72, - 0x74, - 0x5f, - 0x33, - 0x0, - macho.BIND_OPCODE_SET_TYPE_IMM | 1, - macho.BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | 1, - macho.BIND_OPCODE_SET_ADDEND_SLEB, - 0x0, - macho.BIND_OPCODE_ADD_ADDR_ULEB, - 0xf8, - 0xff, - 0xff, - 0xff, - 0xff, - 0xff, - 0xff, - 0xff, - 0xff, - 0x1, - macho.BIND_OPCODE_DO_BIND, - macho.BIND_OPCODE_DONE, - }, bind.buffer.items); -} - -test "lazy bind" { - const gpa = testing.allocator; - - var test_context = try generateTestContext(); - defer test_context.deinit(gpa); - - var bind = LazyBind(TestContext, TestContext.Target){}; - defer bind.deinit(gpa); - - try bind.entries.append(gpa, .{ - .offset = 0x10, - .segment_id = 1, - .target = TestContext.Target{ .index = 0 }, - .addend = 0, - }); - try bind.entries.append(gpa, .{ - .offset = 0x20, - .segment_id = 2, - .target = TestContext.Target{ .index = 1 }, - .addend = 0x10, - }); - - try bind.finalize(gpa, test_context); - try testing.expectEqualSlices(u8, &[_]u8{ - macho.BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | 1, - 0x10, - macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM | 0, - 0x5f, - 0x69, - 0x6d, - 0x70, - 0x6f, - 0x72, - 0x74, - 0x5f, - 0x31, - 0x0, - macho.BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | 1, - macho.BIND_OPCODE_DO_BIND, - macho.BIND_OPCODE_DONE, - macho.BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | 2, - 0x20, - macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM | 0, - 0x5f, - 0x69, - 0x6d, - 0x70, - 0x6f, - 0x72, - 0x74, - 0x5f, - 0x32, - 0x0, - macho.BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | 1, - macho.BIND_OPCODE_SET_ADDEND_SLEB, - 0x10, - macho.BIND_OPCODE_DO_BIND, - macho.BIND_OPCODE_DONE, - }, bind.buffer.items); -} - -const std = @import("std"); -const assert = std.debug.assert; -const leb = std.leb; -const log = std.log.scoped(.dyld_info); -const macho = std.macho; -const testing = std.testing; - -const Allocator = std.mem.Allocator; diff --git a/src/link/MachO/eh_frame.zig b/src/link/MachO/eh_frame.zig index 4b51d09683..6ca7a5cd2a 100644 --- a/src/link/MachO/eh_frame.zig +++ b/src/link/MachO/eh_frame.zig @@ -1,629 +1,539 @@ -pub fn scanRelocs(macho_file: *MachO) !void { - const comp = macho_file.base.comp; - const gpa = comp.gpa; +pub const Cie = struct { + /// Includes 4byte size cell. + offset: u32, + out_offset: u32 = 0, + size: u32, + lsda_size: ?enum { p32, p64 } = null, + personality: ?Personality = null, + file: File.Index = 0, + alive: bool = false, - for (macho_file.objects.items, 0..) |*object, object_id| { - var cies = std.AutoHashMap(u32, void).init(gpa); - defer cies.deinit(); + pub fn parse(cie: *Cie, macho_file: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); - var it = object.getEhFrameRecordsIterator(); + const data = cie.getData(macho_file); + const aug = std.mem.sliceTo(@as([*:0]const u8, @ptrCast(data.ptr + 9)), 0); - for (object.exec_atoms.items) |atom_index| { - var inner_syms_it = Atom.getInnerSymbolsIterator(macho_file, atom_index); - while (inner_syms_it.next()) |sym| { - const fde_offset = object.eh_frame_records_lookup.get(sym) orelse continue; - if (object.eh_frame_relocs_lookup.get(fde_offset).?.dead) continue; - it.seekTo(fde_offset); - const fde = (it.next() catch continue).?; // We don't care about this error since we already handled it + if (aug[0] != 'z') return; // TODO should we error out? - const cie_ptr = fde.getCiePointerSource(@intCast(object_id), macho_file, fde_offset); - const cie_offset = fde_offset + 4 - cie_ptr; + var stream = std.io.fixedBufferStream(data[9 + aug.len + 1 ..]); + var creader = std.io.countingReader(stream.reader()); + const reader = creader.reader(); - if (!cies.contains(cie_offset)) { - try cies.putNoClobber(cie_offset, {}); - it.seekTo(cie_offset); - const cie = (it.next() catch continue).?; // We don't care about this error since we already handled it - try cie.scanRelocs(macho_file, @as(u32, @intCast(object_id)), cie_offset); + _ = try leb.readULEB128(u64, reader); // code alignment factor + _ = try leb.readULEB128(u64, reader); // data alignment factor + _ = try leb.readULEB128(u64, reader); // return address register + _ = try leb.readULEB128(u64, reader); // augmentation data length + + for (aug[1..]) |ch| switch (ch) { + 'R' => { + const enc = try reader.readByte(); + if (enc & 0xf != EH_PE.absptr or enc & EH_PE.pcrel == 0) { + @panic("unexpected pointer encoding"); // TODO error } - } - } - } -} - -pub fn calcSectionSize(macho_file: *MachO, unwind_info: *const UnwindInfo) error{OutOfMemory}!void { - const sect_id = macho_file.eh_frame_section_index orelse return; - const sect = &macho_file.sections.items(.header)[sect_id]; - sect.@"align" = 3; - sect.size = 0; - - const target = macho_file.base.comp.root_mod.resolved_target.result; - const cpu_arch = target.cpu.arch; - const comp = macho_file.base.comp; - const gpa = comp.gpa; - var size: u32 = 0; - - for (macho_file.objects.items, 0..) |*object, object_id| { - var cies = std.AutoHashMap(u32, u32).init(gpa); - defer cies.deinit(); - - var eh_it = object.getEhFrameRecordsIterator(); - - for (object.exec_atoms.items) |atom_index| { - var inner_syms_it = Atom.getInnerSymbolsIterator(macho_file, atom_index); - while (inner_syms_it.next()) |sym| { - const fde_record_offset = object.eh_frame_records_lookup.get(sym) orelse continue; - if (object.eh_frame_relocs_lookup.get(fde_record_offset).?.dead) continue; - - const record_id = unwind_info.records_lookup.get(sym) orelse continue; - const record = unwind_info.records.items[record_id]; - - // TODO skip this check if no __compact_unwind is present - const is_dwarf = UnwindInfo.UnwindEncoding.isDwarf(record.compactUnwindEncoding, cpu_arch); - if (!is_dwarf) continue; - - eh_it.seekTo(fde_record_offset); - const source_fde_record = (eh_it.next() catch continue).?; // We already handled this error - - const cie_ptr = source_fde_record.getCiePointerSource(@intCast(object_id), macho_file, fde_record_offset); - const cie_offset = fde_record_offset + 4 - cie_ptr; - - const gop = try cies.getOrPut(cie_offset); - if (!gop.found_existing) { - eh_it.seekTo(cie_offset); - const source_cie_record = (eh_it.next() catch continue).?; // We already handled this error - gop.value_ptr.* = size; - size += source_cie_record.getSize(); + }, + 'P' => { + const enc = try reader.readByte(); + if (enc != EH_PE.pcrel | EH_PE.indirect | EH_PE.sdata4) { + @panic("unexpected personality pointer encoding"); // TODO error } - - size += source_fde_record.getSize(); - } - } - - sect.size = size; - } -} - -pub fn write(macho_file: *MachO, unwind_info: *UnwindInfo) !void { - const sect_id = macho_file.eh_frame_section_index orelse return; - const sect = macho_file.sections.items(.header)[sect_id]; - const seg_id = macho_file.sections.items(.segment_index)[sect_id]; - const seg = macho_file.segments.items[seg_id]; - - const target = macho_file.base.comp.root_mod.resolved_target.result; - const cpu_arch = target.cpu.arch; - const comp = macho_file.base.comp; - const gpa = comp.gpa; - - var eh_records = std.AutoArrayHashMap(u32, EhFrameRecord(true)).init(gpa); - defer { - for (eh_records.values()) |*rec| { - rec.deinit(gpa); - } - eh_records.deinit(); + _ = try reader.readInt(u32, .little); // personality pointer + }, + 'L' => { + const enc = try reader.readByte(); + switch (enc & 0xf) { + EH_PE.sdata4 => cie.lsda_size = .p32, + EH_PE.absptr => cie.lsda_size = .p64, + else => unreachable, // TODO error + } + }, + else => @panic("unexpected augmentation string"), // TODO error + }; } - var eh_frame_offset: u32 = 0; - - for (macho_file.objects.items, 0..) |*object, object_id| { - try eh_records.ensureUnusedCapacity(2 * @as(u32, @intCast(object.exec_atoms.items.len))); - - var cies = std.AutoHashMap(u32, u32).init(gpa); - defer cies.deinit(); - - var eh_it = object.getEhFrameRecordsIterator(); - - for (object.exec_atoms.items) |atom_index| { - var inner_syms_it = Atom.getInnerSymbolsIterator(macho_file, atom_index); - while (inner_syms_it.next()) |reloc_target| { - const fde_record_offset = object.eh_frame_records_lookup.get(reloc_target) orelse continue; - if (object.eh_frame_relocs_lookup.get(fde_record_offset).?.dead) continue; - - const record_id = unwind_info.records_lookup.get(reloc_target) orelse continue; - const record = &unwind_info.records.items[record_id]; - - // TODO skip this check if no __compact_unwind is present - const is_dwarf = UnwindInfo.UnwindEncoding.isDwarf(record.compactUnwindEncoding, cpu_arch); - if (!is_dwarf) continue; - - eh_it.seekTo(fde_record_offset); - const source_fde_record = (eh_it.next() catch continue).?; // We already handled this error - - const cie_ptr = source_fde_record.getCiePointerSource(@intCast(object_id), macho_file, fde_record_offset); - const cie_offset = fde_record_offset + 4 - cie_ptr; - - const gop = try cies.getOrPut(cie_offset); - if (!gop.found_existing) { - eh_it.seekTo(cie_offset); - const source_cie_record = (eh_it.next() catch continue).?; // We already handled this error - var cie_record = try source_cie_record.toOwned(gpa); - try cie_record.relocate(macho_file, @as(u32, @intCast(object_id)), .{ - .source_offset = cie_offset, - .out_offset = eh_frame_offset, - .sect_addr = sect.addr, - }); - eh_records.putAssumeCapacityNoClobber(eh_frame_offset, cie_record); - gop.value_ptr.* = eh_frame_offset; - eh_frame_offset += cie_record.getSize(); - } - - var fde_record = try source_fde_record.toOwned(gpa); - try fde_record.relocate(macho_file, @as(u32, @intCast(object_id)), .{ - .source_offset = fde_record_offset, - .out_offset = eh_frame_offset, - .sect_addr = sect.addr, - }); - fde_record.setCiePointer(eh_frame_offset + 4 - gop.value_ptr.*); - - switch (cpu_arch) { - .aarch64 => {}, // relocs take care of LSDA pointers - .x86_64 => { - // We need to relocate target symbol address ourselves. - const atom_sym = macho_file.getSymbol(reloc_target); - try fde_record.setTargetSymbolAddress(atom_sym.n_value, .{ - .base_addr = sect.addr, - .base_offset = eh_frame_offset, - }); - - // We need to parse LSDA pointer and relocate ourselves. - const cie_record = eh_records.get( - eh_frame_offset + 4 - fde_record.getCiePointer(), - ).?; - const eh_frame_sect = object.getSourceSection(object.eh_frame_sect_id.?); - const source_lsda_ptr = fde_record.getLsdaPointer(cie_record, .{ - .base_addr = eh_frame_sect.addr, - .base_offset = fde_record_offset, - }) catch continue; // We already handled this error - if (source_lsda_ptr) |ptr| { - const sym_index = object.getSymbolByAddress(ptr, null); - const sym = object.symtab[sym_index]; - fde_record.setLsdaPointer(cie_record, sym.n_value, .{ - .base_addr = sect.addr, - .base_offset = eh_frame_offset, - }) catch continue; // We already handled this error - } - }, - else => unreachable, - } - - eh_records.putAssumeCapacityNoClobber(eh_frame_offset, fde_record); - - UnwindInfo.UnwindEncoding.setDwarfSectionOffset( - &record.compactUnwindEncoding, - cpu_arch, - @as(u24, @intCast(eh_frame_offset)), - ); - - const cie_record = eh_records.get( - eh_frame_offset + 4 - fde_record.getCiePointer(), - ).?; - const lsda_ptr = fde_record.getLsdaPointer(cie_record, .{ - .base_addr = sect.addr, - .base_offset = eh_frame_offset, - }) catch continue; // We already handled this error - if (lsda_ptr) |ptr| { - record.lsda = ptr - seg.vmaddr; - } - - eh_frame_offset += fde_record.getSize(); - } - } + pub inline fn getSize(cie: Cie) u32 { + return cie.size + 4; } - var buffer = std.ArrayList(u8).init(gpa); - defer buffer.deinit(); - const writer = buffer.writer(); - - for (eh_records.values()) |record| { - try writer.writeInt(u32, record.size, .little); - try buffer.appendSlice(record.data); + pub fn getObject(cie: Cie, macho_file: *MachO) *Object { + const file = macho_file.getFile(cie.file).?; + return file.object; } - try macho_file.base.file.?.pwriteAll(buffer.items, sect.offset); -} -const EhFrameRecordTag = enum { cie, fde }; + pub fn getData(cie: Cie, macho_file: *MachO) []const u8 { + const object = cie.getObject(macho_file); + return object.eh_frame_data.items[cie.offset..][0..cie.getSize()]; + } -pub fn EhFrameRecord(comptime is_mutable: bool) type { - return struct { - tag: EhFrameRecordTag, - size: u32, - data: if (is_mutable) []u8 else []const u8, + pub fn getPersonality(cie: Cie, macho_file: *MachO) ?*Symbol { + const personality = cie.personality orelse return null; + return macho_file.getSymbol(personality.index); + } - const Record = @This(); - - pub fn deinit(rec: *Record, gpa: Allocator) void { - comptime assert(is_mutable); - gpa.free(rec.data); + pub fn eql(cie: Cie, other: Cie, macho_file: *MachO) bool { + if (!std.mem.eql(u8, cie.getData(macho_file), other.getData(macho_file))) return false; + if (cie.personality != null and other.personality != null) { + if (cie.personality.?.index != other.personality.?.index) return false; } + if (cie.personality != null or other.personality != null) return false; + return true; + } - pub fn toOwned(rec: Record, gpa: Allocator) Allocator.Error!EhFrameRecord(true) { - const data = try gpa.dupe(u8, rec.data); - return EhFrameRecord(true){ - .tag = rec.tag, - .size = rec.size, - .data = data, - }; - } + pub fn format( + cie: Cie, + comptime unused_fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, + ) !void { + _ = cie; + _ = unused_fmt_string; + _ = options; + _ = writer; + @compileError("do not format CIEs directly"); + } - pub inline fn getSize(rec: Record) u32 { - return 4 + rec.size; - } + pub fn fmt(cie: Cie, macho_file: *MachO) std.fmt.Formatter(format2) { + return .{ .data = .{ + .cie = cie, + .macho_file = macho_file, + } }; + } - pub fn scanRelocs( - rec: Record, - macho_file: *MachO, - object_id: u32, - source_offset: u32, - ) !void { - if (rec.getPersonalityPointerReloc(macho_file, object_id, source_offset)) |target| { - try macho_file.addGotEntry(target); - } - } - - pub fn getTargetSymbolAddress(rec: Record, ctx: struct { - base_addr: u64, - base_offset: u64, - }) u64 { - assert(rec.tag == .fde); - const addend = mem.readInt(i64, rec.data[4..][0..8], .little); - return @as(u64, @intCast(@as(i64, @intCast(ctx.base_addr + ctx.base_offset + 8)) + addend)); - } - - pub fn setTargetSymbolAddress(rec: *Record, value: u64, ctx: struct { - base_addr: u64, - base_offset: u64, - }) !void { - assert(rec.tag == .fde); - const addend = @as(i64, @intCast(value)) - @as(i64, @intCast(ctx.base_addr + ctx.base_offset + 8)); - mem.writeInt(i64, rec.data[4..][0..8], addend, .little); - } - - pub fn getPersonalityPointerReloc( - rec: Record, - macho_file: *MachO, - object_id: u32, - source_offset: u32, - ) ?SymbolWithLoc { - const target = macho_file.base.comp.root_mod.resolved_target.result; - const cpu_arch = target.cpu.arch; - const relocs = getRelocs(macho_file, object_id, source_offset); - for (relocs) |rel| { - switch (cpu_arch) { - .aarch64 => { - const rel_type = @as(macho.reloc_type_arm64, @enumFromInt(rel.r_type)); - switch (rel_type) { - .ARM64_RELOC_SUBTRACTOR, - .ARM64_RELOC_UNSIGNED, - => continue, - .ARM64_RELOC_POINTER_TO_GOT => {}, - else => unreachable, - } - }, - .x86_64 => { - const rel_type = @as(macho.reloc_type_x86_64, @enumFromInt(rel.r_type)); - switch (rel_type) { - .X86_64_RELOC_GOT => {}, - else => unreachable, - } - }, - else => unreachable, - } - const reloc_target = Atom.parseRelocTarget(macho_file, .{ - .object_id = object_id, - .rel = rel, - .code = rec.data, - .base_offset = @as(i32, @intCast(source_offset)) + 4, - }); - return reloc_target; - } - return null; - } - - pub fn relocate(rec: *Record, macho_file: *MachO, object_id: u32, ctx: struct { - source_offset: u32, - out_offset: u32, - sect_addr: u64, - }) !void { - comptime assert(is_mutable); - - const target = macho_file.base.comp.root_mod.resolved_target.result; - const cpu_arch = target.cpu.arch; - const relocs = getRelocs(macho_file, object_id, ctx.source_offset); - - for (relocs) |rel| { - const reloc_target = Atom.parseRelocTarget(macho_file, .{ - .object_id = object_id, - .rel = rel, - .code = rec.data, - .base_offset = @as(i32, @intCast(ctx.source_offset)) + 4, - }); - const rel_offset = @as(u32, @intCast(rel.r_address - @as(i32, @intCast(ctx.source_offset)) - 4)); - const source_addr = ctx.sect_addr + rel_offset + ctx.out_offset + 4; - - switch (cpu_arch) { - .aarch64 => { - const rel_type = @as(macho.reloc_type_arm64, @enumFromInt(rel.r_type)); - switch (rel_type) { - .ARM64_RELOC_SUBTRACTOR => { - // Address of the __eh_frame in the source object file - }, - .ARM64_RELOC_POINTER_TO_GOT => { - const target_addr = macho_file.getGotEntryAddress(reloc_target).?; - const result = math.cast(i32, @as(i64, @intCast(target_addr)) - @as(i64, @intCast(source_addr))) orelse - return error.Overflow; - mem.writeInt(i32, rec.data[rel_offset..][0..4], result, .little); - }, - .ARM64_RELOC_UNSIGNED => { - assert(rel.r_extern == 1); - const target_addr = Atom.getRelocTargetAddress(macho_file, reloc_target, false); - const result = @as(i64, @intCast(target_addr)) - @as(i64, @intCast(source_addr)); - mem.writeInt(i64, rec.data[rel_offset..][0..8], @as(i64, @intCast(result)), .little); - }, - else => unreachable, - } - }, - .x86_64 => { - const rel_type = @as(macho.reloc_type_x86_64, @enumFromInt(rel.r_type)); - switch (rel_type) { - .X86_64_RELOC_GOT => { - const target_addr = macho_file.getGotEntryAddress(reloc_target).?; - const addend = mem.readInt(i32, rec.data[rel_offset..][0..4], .little); - const adjusted_target_addr = @as(u64, @intCast(@as(i64, @intCast(target_addr)) + addend)); - const disp = try Relocation.calcPcRelativeDisplacementX86(source_addr, adjusted_target_addr, 0); - mem.writeInt(i32, rec.data[rel_offset..][0..4], disp, .little); - }, - else => unreachable, - } - }, - else => unreachable, - } - } - } - - pub fn getCiePointerSource(rec: Record, object_id: u32, macho_file: *MachO, offset: u32) u32 { - assert(rec.tag == .fde); - const target = macho_file.base.comp.root_mod.resolved_target.result; - const cpu_arch = target.cpu.arch; - const addend = mem.readInt(u32, rec.data[0..4], .little); - switch (cpu_arch) { - .aarch64 => { - const relocs = getRelocs(macho_file, object_id, offset); - const maybe_rel = for (relocs) |rel| { - if (rel.r_address - @as(i32, @intCast(offset)) == 4 and - @as(macho.reloc_type_arm64, @enumFromInt(rel.r_type)) == .ARM64_RELOC_SUBTRACTOR) - break rel; - } else null; - const rel = maybe_rel orelse return addend; - const object = &macho_file.objects.items[object_id]; - const target_addr = object.in_symtab.?[rel.r_symbolnum].n_value; - const sect = object.getSourceSection(object.eh_frame_sect_id.?); - return @intCast(sect.addr + offset - target_addr + addend); - }, - .x86_64 => return addend, - else => unreachable, - } - } - - pub fn getCiePointer(rec: Record) u32 { - assert(rec.tag == .fde); - return mem.readInt(u32, rec.data[0..4], .little); - } - - pub fn setCiePointer(rec: *Record, ptr: u32) void { - assert(rec.tag == .fde); - mem.writeInt(u32, rec.data[0..4], ptr, .little); - } - - pub fn getAugmentationString(rec: Record) []const u8 { - assert(rec.tag == .cie); - return mem.sliceTo(@as([*:0]const u8, @ptrCast(rec.data.ptr + 5)), 0); - } - - pub fn getPersonalityPointer(rec: Record, ctx: struct { - base_addr: u64, - base_offset: u64, - }) !?u64 { - assert(rec.tag == .cie); - const aug_str = rec.getAugmentationString(); - - var stream = std.io.fixedBufferStream(rec.data[9 + aug_str.len ..]); - var creader = std.io.countingReader(stream.reader()); - const reader = creader.reader(); - - for (aug_str, 0..) |ch, i| switch (ch) { - 'z' => if (i > 0) { - return error.BadDwarfCfi; - } else { - _ = try leb.readULEB128(u64, reader); - }, - 'R' => { - _ = try reader.readByte(); - }, - 'P' => { - const enc = try reader.readByte(); - const offset = ctx.base_offset + 13 + aug_str.len + creader.bytes_read; - const ptr = try getEncodedPointer(enc, @as(i64, @intCast(ctx.base_addr + offset)), reader); - return ptr; - }, - 'L' => { - _ = try reader.readByte(); - }, - 'S', 'B', 'G' => {}, - else => return error.BadDwarfCfi, - }; - - return null; - } - - pub fn getLsdaPointer(rec: Record, cie: Record, ctx: struct { - base_addr: u64, - base_offset: u64, - }) !?u64 { - assert(rec.tag == .fde); - const enc = (try cie.getLsdaEncoding()) orelse return null; - var stream = std.io.fixedBufferStream(rec.data[20..]); - const reader = stream.reader(); - _ = try reader.readByte(); - const offset = ctx.base_offset + 25; - const ptr = try getEncodedPointer(enc, @as(i64, @intCast(ctx.base_addr + offset)), reader); - return ptr; - } - - pub fn setLsdaPointer(rec: *Record, cie: Record, value: u64, ctx: struct { - base_addr: u64, - base_offset: u64, - }) !void { - assert(rec.tag == .fde); - const enc = (try cie.getLsdaEncoding()) orelse unreachable; - var stream = std.io.fixedBufferStream(rec.data[21..]); - const writer = stream.writer(); - const offset = ctx.base_offset + 25; - try setEncodedPointer(enc, @as(i64, @intCast(ctx.base_addr + offset)), value, writer); - } - - fn getLsdaEncoding(rec: Record) !?u8 { - assert(rec.tag == .cie); - const aug_str = rec.getAugmentationString(); - - const base_offset = 9 + aug_str.len; - var stream = std.io.fixedBufferStream(rec.data[base_offset..]); - var creader = std.io.countingReader(stream.reader()); - const reader = creader.reader(); - - for (aug_str, 0..) |ch, i| switch (ch) { - 'z' => if (i > 0) { - return error.BadDwarfCfi; - } else { - _ = try leb.readULEB128(u64, reader); - }, - 'R' => { - _ = try reader.readByte(); - }, - 'P' => { - const enc = try reader.readByte(); - _ = try getEncodedPointer(enc, 0, reader); - }, - 'L' => { - const enc = try reader.readByte(); - return enc; - }, - 'S', 'B', 'G' => {}, - else => return error.BadDwarfCfi, - }; - - return null; - } - - fn getEncodedPointer(enc: u8, pcrel_offset: i64, reader: anytype) !?u64 { - if (enc == EH_PE.omit) return null; - - var ptr: i64 = switch (enc & 0x0F) { - EH_PE.absptr => @as(i64, @bitCast(try reader.readInt(u64, .little))), - EH_PE.udata2 => @as(i16, @bitCast(try reader.readInt(u16, .little))), - EH_PE.udata4 => @as(i32, @bitCast(try reader.readInt(u32, .little))), - EH_PE.udata8 => @as(i64, @bitCast(try reader.readInt(u64, .little))), - EH_PE.uleb128 => @as(i64, @bitCast(try leb.readULEB128(u64, reader))), - EH_PE.sdata2 => try reader.readInt(i16, .little), - EH_PE.sdata4 => try reader.readInt(i32, .little), - EH_PE.sdata8 => try reader.readInt(i64, .little), - EH_PE.sleb128 => try leb.readILEB128(i64, reader), - else => return null, - }; - - switch (enc & 0x70) { - EH_PE.absptr => {}, - EH_PE.pcrel => ptr += pcrel_offset, - EH_PE.datarel, - EH_PE.textrel, - EH_PE.funcrel, - EH_PE.aligned, - => return null, - else => return null, - } - - return @as(u64, @bitCast(ptr)); - } - - fn setEncodedPointer(enc: u8, pcrel_offset: i64, value: u64, writer: anytype) !void { - if (enc == EH_PE.omit) return; - - var actual = @as(i64, @intCast(value)); - - switch (enc & 0x70) { - EH_PE.absptr => {}, - EH_PE.pcrel => actual -= pcrel_offset, - EH_PE.datarel, - EH_PE.textrel, - EH_PE.funcrel, - EH_PE.aligned, - => unreachable, - else => unreachable, - } - - switch (enc & 0x0F) { - EH_PE.absptr => try writer.writeInt(u64, @as(u64, @bitCast(actual)), .little), - EH_PE.udata2 => try writer.writeInt(u16, @as(u16, @bitCast(@as(i16, @intCast(actual)))), .little), - EH_PE.udata4 => try writer.writeInt(u32, @as(u32, @bitCast(@as(i32, @intCast(actual)))), .little), - EH_PE.udata8 => try writer.writeInt(u64, @as(u64, @bitCast(actual)), .little), - EH_PE.uleb128 => try leb.writeULEB128(writer, @as(u64, @bitCast(actual))), - EH_PE.sdata2 => try writer.writeInt(i16, @as(i16, @intCast(actual)), .little), - EH_PE.sdata4 => try writer.writeInt(i32, @as(i32, @intCast(actual)), .little), - EH_PE.sdata8 => try writer.writeInt(i64, actual, .little), - EH_PE.sleb128 => try leb.writeILEB128(writer, actual), - else => unreachable, - } - } + const FormatContext = struct { + cie: Cie, + macho_file: *MachO, }; -} -pub fn getRelocs(macho_file: *MachO, object_id: u32, source_offset: u32) []const macho.relocation_info { - const object = &macho_file.objects.items[object_id]; - assert(object.hasEhFrameRecords()); - const urel = object.eh_frame_relocs_lookup.get(source_offset) orelse - return &[0]macho.relocation_info{}; - const all_relocs = object.getRelocs(object.eh_frame_sect_id.?); - return all_relocs[urel.reloc.start..][0..urel.reloc.len]; -} + fn format2( + ctx: FormatContext, + comptime unused_fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, + ) !void { + _ = unused_fmt_string; + _ = options; + const cie = ctx.cie; + try writer.print("@{x} : size({x})", .{ + cie.offset, + cie.getSize(), + }); + if (!cie.alive) try writer.writeAll(" : [*]"); + } + + pub const Index = u32; + + pub const Personality = struct { + index: Symbol.Index = 0, + offset: u32 = 0, + }; +}; + +pub const Fde = struct { + /// Includes 4byte size cell. + offset: u32, + out_offset: u32 = 0, + size: u32, + cie: Cie.Index, + atom: Atom.Index = 0, + atom_offset: u32 = 0, + lsda: Atom.Index = 0, + lsda_offset: u32 = 0, + lsda_ptr_offset: u32 = 0, + file: File.Index = 0, + alive: bool = true, + + pub fn parse(fde: *Fde, macho_file: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + + const data = fde.getData(macho_file); + const object = fde.getObject(macho_file); + const sect = object.sections.items(.header)[object.eh_frame_sect_index.?]; + + // Parse target atom index + const pc_begin = std.mem.readInt(i64, data[8..][0..8], .little); + const taddr: u64 = @intCast(@as(i64, @intCast(sect.addr + fde.offset + 8)) + pc_begin); + fde.atom = object.findAtom(taddr) orelse { + macho_file.base.fatal("{}: {s},{s}: 0x{x}: invalid function reference in FDE", .{ + object.fmtPath(), sect.segName(), sect.sectName(), fde.offset + 8, + }); + return error.ParseFailed; + }; + const atom = fde.getAtom(macho_file); + fde.atom_offset = @intCast(taddr - atom.getInputAddress(macho_file)); + + // Associate with a CIE + const cie_ptr = std.mem.readInt(u32, data[4..8], .little); + const cie_offset = fde.offset + 4 - cie_ptr; + const cie_index = for (object.cies.items, 0..) |cie, cie_index| { + if (cie.offset == cie_offset) break @as(Cie.Index, @intCast(cie_index)); + } else null; + if (cie_index) |cie| { + fde.cie = cie; + } else { + macho_file.base.fatal("{}: no matching CIE found for FDE at offset {x}", .{ + object.fmtPath(), + fde.offset, + }); + return error.ParseFailed; + } + + const cie = fde.getCie(macho_file); + + // Parse LSDA atom index if any + if (cie.lsda_size) |lsda_size| { + var stream = std.io.fixedBufferStream(data[24..]); + var creader = std.io.countingReader(stream.reader()); + const reader = creader.reader(); + _ = try leb.readULEB128(u64, reader); // augmentation length + fde.lsda_ptr_offset = @intCast(creader.bytes_read + 24); + const lsda_ptr = switch (lsda_size) { + .p32 => try reader.readInt(i32, .little), + .p64 => try reader.readInt(i64, .little), + }; + const lsda_addr: u64 = @intCast(@as(i64, @intCast(sect.addr + fde.offset + fde.lsda_ptr_offset)) + lsda_ptr); + fde.lsda = object.findAtom(lsda_addr) orelse { + macho_file.base.fatal("{}: {s},{s}: 0x{x}: invalid LSDA reference in FDE", .{ + object.fmtPath(), sect.segName(), sect.sectName(), fde.offset + fde.lsda_ptr_offset, + }); + return error.ParseFailed; + }; + const lsda_atom = fde.getLsdaAtom(macho_file).?; + fde.lsda_offset = @intCast(lsda_addr - lsda_atom.getInputAddress(macho_file)); + } + } + + pub inline fn getSize(fde: Fde) u32 { + return fde.size + 4; + } + + pub fn getObject(fde: Fde, macho_file: *MachO) *Object { + const file = macho_file.getFile(fde.file).?; + return file.object; + } + + pub fn getData(fde: Fde, macho_file: *MachO) []const u8 { + const object = fde.getObject(macho_file); + return object.eh_frame_data.items[fde.offset..][0..fde.getSize()]; + } + + pub fn getCie(fde: Fde, macho_file: *MachO) *const Cie { + const object = fde.getObject(macho_file); + return &object.cies.items[fde.cie]; + } + + pub fn getAtom(fde: Fde, macho_file: *MachO) *Atom { + return macho_file.getAtom(fde.atom).?; + } + + pub fn getLsdaAtom(fde: Fde, macho_file: *MachO) ?*Atom { + return macho_file.getAtom(fde.lsda); + } + + pub fn format( + fde: Fde, + comptime unused_fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, + ) !void { + _ = fde; + _ = unused_fmt_string; + _ = options; + _ = writer; + @compileError("do not format FDEs directly"); + } + + pub fn fmt(fde: Fde, macho_file: *MachO) std.fmt.Formatter(format2) { + return .{ .data = .{ + .fde = fde, + .macho_file = macho_file, + } }; + } + + const FormatContext = struct { + fde: Fde, + macho_file: *MachO, + }; + + fn format2( + ctx: FormatContext, + comptime unused_fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, + ) !void { + _ = unused_fmt_string; + _ = options; + const fde = ctx.fde; + const macho_file = ctx.macho_file; + try writer.print("@{x} : size({x}) : cie({d}) : {s}", .{ + fde.offset, + fde.getSize(), + fde.cie, + fde.getAtom(macho_file).getName(macho_file), + }); + if (!fde.alive) try writer.writeAll(" : [*]"); + } + + pub const Index = u32; +}; pub const Iterator = struct { data: []const u8, pos: u32 = 0, - pub fn next(it: *Iterator) !?EhFrameRecord(false) { + pub const Record = struct { + tag: enum { fde, cie }, + offset: u32, + size: u32, + }; + + pub fn next(it: *Iterator) !?Record { if (it.pos >= it.data.len) return null; var stream = std.io.fixedBufferStream(it.data[it.pos..]); const reader = stream.reader(); const size = try reader.readInt(u32, .little); - if (size == 0xFFFFFFFF) { - log.debug("MachO doesn't support 64bit DWARF CFI __eh_frame records", .{}); - return error.BadDwarfCfi; - } + if (size == 0xFFFFFFFF) @panic("DWARF CFI is 32bit on macOS"); const id = try reader.readInt(u32, .little); - const tag: EhFrameRecordTag = if (id == 0) .cie else .fde; - const offset: u32 = 4; - const record = EhFrameRecord(false){ - .tag = tag, + const record = Record{ + .tag = if (id == 0) .cie else .fde, + .offset = it.pos, .size = size, - .data = it.data[it.pos + offset ..][0..size], }; - - it.pos += size + offset; + it.pos += size + 4; return record; } - - pub fn reset(it: *Iterator) void { - it.pos = 0; - } - - pub fn seekTo(it: *Iterator, pos: u32) void { - assert(pos >= 0 and pos < it.data.len); - it.pos = pos; - } }; +pub fn calcSize(macho_file: *MachO) !u32 { + const tracy = trace(@src()); + defer tracy.end(); + + var offset: u32 = 0; + + var cies = std.ArrayList(Cie).init(macho_file.base.allocator); + defer cies.deinit(); + + for (macho_file.objects.items) |index| { + const object = macho_file.getFile(index).?.object; + + outer: for (object.cies.items) |*cie| { + for (cies.items) |other| { + if (other.eql(cie.*, macho_file)) { + // We already have a CIE record that has the exact same contents, so instead of + // duplicating them, we mark this one dead and set its output offset to be + // equal to that of the alive record. This way, we won't have to rewrite + // Fde.cie_index field when committing the records to file. + cie.out_offset = other.out_offset; + continue :outer; + } + } + cie.alive = true; + cie.out_offset = offset; + offset += cie.getSize(); + try cies.append(cie.*); + } + } + + for (macho_file.objects.items) |index| { + const object = macho_file.getFile(index).?.object; + for (object.fdes.items) |*fde| { + if (!fde.alive) continue; + fde.out_offset = offset; + offset += fde.getSize(); + } + } + + return offset; +} + +pub fn calcNumRelocs(macho_file: *MachO) u32 { + const tracy = trace(@src()); + defer tracy.end(); + + var nreloc: u32 = 0; + + for (macho_file.objects.items) |index| { + const object = macho_file.getFile(index).?.object; + for (object.cies.items) |cie| { + if (!cie.alive) continue; + if (cie.getPersonality(macho_file)) |_| { + nreloc += 1; // personality + } + } + } + + return nreloc; +} + +pub fn write(macho_file: *MachO, buffer: []u8) void { + const tracy = trace(@src()); + defer tracy.end(); + + const sect = macho_file.sections.items(.header)[macho_file.eh_frame_sect_index.?]; + const addend: i64 = switch (macho_file.options.cpu_arch.?) { + .x86_64 => 4, + else => 0, + }; + + for (macho_file.objects.items) |index| { + const object = macho_file.getFile(index).?.object; + for (object.cies.items) |cie| { + if (!cie.alive) continue; + + @memcpy(buffer[cie.out_offset..][0..cie.getSize()], cie.getData(macho_file)); + + if (cie.getPersonality(macho_file)) |sym| { + const offset = cie.out_offset + cie.personality.?.offset; + const saddr = sect.addr + offset; + const taddr = sym.getGotAddress(macho_file); + std.mem.writeInt( + i32, + buffer[offset..][0..4], + @intCast(@as(i64, @intCast(taddr)) - @as(i64, @intCast(saddr)) + addend), + .little, + ); + } + } + } + + for (macho_file.objects.items) |index| { + const object = macho_file.getFile(index).?.object; + for (object.fdes.items) |fde| { + if (!fde.alive) continue; + + @memcpy(buffer[fde.out_offset..][0..fde.getSize()], fde.getData(macho_file)); + + { + const offset = fde.out_offset + 4; + const value = offset - fde.getCie(macho_file).out_offset; + std.mem.writeInt(u32, buffer[offset..][0..4], value, .little); + } + + { + const offset = fde.out_offset + 8; + const saddr = sect.addr + offset; + const taddr = fde.getAtom(macho_file).value; + std.mem.writeInt( + i64, + buffer[offset..][0..8], + @as(i64, @intCast(taddr)) - @as(i64, @intCast(saddr)), + .little, + ); + } + + if (fde.getLsdaAtom(macho_file)) |atom| { + const offset = fde.out_offset + fde.lsda_offset; + const saddr = sect.addr + offset; + const taddr = atom.value; + switch (fde.getCie(macho_file).lsda_size.?) { + .p32 => std.mem.writeInt( + i32, + buffer[offset..][0..4], + @intCast(@as(i64, @intCast(taddr)) - @as(i64, @intCast(saddr)) + addend), + .little, + ), + .p64 => std.mem.writeInt( + i64, + buffer[offset..][0..8], + @as(i64, @intCast(taddr)) - @as(i64, @intCast(saddr)), + .little, + ), + } + } + } + } +} + +pub fn writeRelocs(macho_file: *MachO, code: []u8, relocs: *std.ArrayList(macho.relocation_info)) error{Overflow}!void { + const tracy = trace(@src()); + defer tracy.end(); + + const cpu_arch = macho_file.options.cpu_arch.?; + const sect = macho_file.sections.items(.header)[macho_file.eh_frame_sect_index.?]; + const addend: i64 = switch (cpu_arch) { + .x86_64 => 4, + else => 0, + }; + + for (macho_file.objects.items) |index| { + const object = macho_file.getFile(index).?.object; + for (object.cies.items) |cie| { + if (!cie.alive) continue; + + @memcpy(code[cie.out_offset..][0..cie.getSize()], cie.getData(macho_file)); + + if (cie.getPersonality(macho_file)) |sym| { + const r_address = math.cast(i32, cie.out_offset + cie.personality.?.offset) orelse return error.Overflow; + const r_symbolnum = math.cast(u24, sym.getOutputSymtabIndex(macho_file).?) orelse return error.Overflow; + relocs.appendAssumeCapacity(.{ + .r_address = r_address, + .r_symbolnum = r_symbolnum, + .r_length = 2, + .r_extern = 1, + .r_pcrel = 1, + .r_type = switch (cpu_arch) { + .aarch64 => @intFromEnum(macho.reloc_type_arm64.ARM64_RELOC_POINTER_TO_GOT), + .x86_64 => @intFromEnum(macho.reloc_type_x86_64.X86_64_RELOC_GOT), + else => unreachable, + }, + }); + } + } + } + + for (macho_file.objects.items) |index| { + const object = macho_file.getFile(index).?.object; + for (object.fdes.items) |fde| { + if (!fde.alive) continue; + + @memcpy(code[fde.out_offset..][0..fde.getSize()], fde.getData(macho_file)); + + { + const offset = fde.out_offset + 4; + const value = offset - fde.getCie(macho_file).out_offset; + std.mem.writeInt(u32, code[offset..][0..4], value, .little); + } + + { + const offset = fde.out_offset + 8; + const saddr = sect.addr + offset; + const taddr = fde.getAtom(macho_file).value; + std.mem.writeInt( + i64, + code[offset..][0..8], + @as(i64, @intCast(taddr)) - @as(i64, @intCast(saddr)), + .little, + ); + } + + if (fde.getLsdaAtom(macho_file)) |atom| { + const offset = fde.out_offset + fde.lsda_ptr_offset; + const saddr = sect.addr + offset; + const taddr = atom.value + fde.lsda_offset; + switch (fde.getCie(macho_file).lsda_size.?) { + .p32 => std.mem.writeInt( + i32, + code[offset..][0..4], + @intCast(@as(i64, @intCast(taddr)) - @as(i64, @intCast(saddr)) + addend), + .little, + ), + .p64 => std.mem.writeInt( + i64, + code[offset..][0..8], + @as(i64, @intCast(taddr)) - @as(i64, @intCast(saddr)), + .little, + ), + } + } + } + } +} + pub const EH_PE = struct { pub const absptr = 0x00; pub const uleb128 = 0x01; @@ -643,17 +553,17 @@ pub const EH_PE = struct { pub const omit = 0xFF; }; -const std = @import("std"); const assert = std.debug.assert; +const leb = std.leb; const macho = std.macho; const math = std.math; const mem = std.mem; -const leb = std.leb; -const log = std.log.scoped(.eh_frame); +const std = @import("std"); +const trace = @import("../tracy.zig").trace; -const Allocator = mem.Allocator; +const Allocator = std.mem.Allocator; const Atom = @import("Atom.zig"); +const File = @import("file.zig").File; const MachO = @import("../MachO.zig"); -const Relocation = @import("Relocation.zig"); -const SymbolWithLoc = MachO.SymbolWithLoc; -const UnwindInfo = @import("UnwindInfo.zig"); +const Object = @import("Object.zig"); +const Symbol = @import("Symbol.zig"); diff --git a/src/link/MachO/fat.zig b/src/link/MachO/fat.zig index fcaca7d99a..46cf0139df 100644 --- a/src/link/MachO/fat.zig +++ b/src/link/MachO/fat.zig @@ -1,3 +1,13 @@ +const std = @import("std"); +const assert = std.debug.assert; +const builtin = @import("builtin"); +const log = std.log.scoped(.macho); +const macho = std.macho; +const mem = std.mem; +const native_endian = builtin.target.cpu.arch.endian(); + +const MachO = @import("../MachO.zig"); + pub fn isFatLibrary(file: std.fs.File) bool { const reader = file.reader(); const hdr = reader.readStructEndian(macho.fat_header, .big) catch return false; @@ -7,18 +17,16 @@ pub fn isFatLibrary(file: std.fs.File) bool { pub const Arch = struct { tag: std.Target.Cpu.Arch, - offset: u64, + offset: u32, + size: u32, }; -/// Caller owns the memory. -pub fn parseArchs(gpa: Allocator, file: std.fs.File) ![]const Arch { +pub fn parseArchs(file: std.fs.File, buffer: *[2]Arch) ![]const Arch { const reader = file.reader(); const fat_header = try reader.readStructEndian(macho.fat_header, .big); assert(fat_header.magic == macho.FAT_MAGIC); - var archs = try std.ArrayList(Arch).initCapacity(gpa, fat_header.nfat_arch); - defer archs.deinit(); - + var count: usize = 0; var fat_arch_index: u32 = 0; while (fat_arch_index < fat_header.nfat_arch) : (fat_arch_index += 1) { const fat_arch = try reader.readStructEndian(macho.fat_arch, .big); @@ -29,16 +37,9 @@ pub fn parseArchs(gpa: Allocator, file: std.fs.File) ![]const Arch { macho.CPU_TYPE_X86_64 => if (fat_arch.cpusubtype == macho.CPU_SUBTYPE_X86_64_ALL) .x86_64 else continue, else => continue, }; - - archs.appendAssumeCapacity(.{ .tag = arch, .offset = fat_arch.offset }); + buffer[count] = .{ .tag = arch, .offset = fat_arch.offset, .size = fat_arch.size }; + count += 1; } - return archs.toOwnedSlice(); + return buffer[0..count]; } - -const std = @import("std"); -const assert = std.debug.assert; -const log = std.log.scoped(.archive); -const macho = std.macho; -const mem = std.mem; -const Allocator = mem.Allocator; diff --git a/src/link/MachO/file.zig b/src/link/MachO/file.zig new file mode 100644 index 0000000000..9e19bed7df --- /dev/null +++ b/src/link/MachO/file.zig @@ -0,0 +1,116 @@ +pub const File = union(enum) { + internal: *InternalObject, + object: *Object, + dylib: *Dylib, + + pub fn getIndex(file: File) Index { + return switch (file) { + inline else => |x| x.index, + }; + } + + pub fn fmtPath(file: File) std.fmt.Formatter(formatPath) { + return .{ .data = file }; + } + + fn formatPath( + file: File, + comptime unused_fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, + ) !void { + _ = unused_fmt_string; + _ = options; + switch (file) { + .internal => try writer.writeAll(""), + .object => |x| try writer.print("{}", .{x.fmtPath()}), + .dylib => |x| try writer.writeAll(x.path), + } + } + + pub fn resolveSymbols(file: File, macho_file: *MachO) void { + switch (file) { + .internal => unreachable, + inline else => |x| x.resolveSymbols(macho_file), + } + } + + pub fn resetGlobals(file: File, macho_file: *MachO) void { + switch (file) { + .internal => unreachable, + inline else => |x| x.resetGlobals(macho_file), + } + } + + /// Encodes symbol rank so that the following ordering applies: + /// * strong in object + /// * weak in object + /// * tentative in object + /// * strong in archive/dylib + /// * weak in archive/dylib + /// * tentative in archive + /// * unclaimed + pub fn getSymbolRank(file: File, args: struct { + archive: bool = false, + weak: bool = false, + tentative: bool = false, + }) u32 { + if (file == .object and !args.archive) { + const base: u32 = blk: { + if (args.tentative) break :blk 3; + break :blk if (args.weak) 2 else 1; + }; + return (base << 16) + file.getIndex(); + } + const base: u32 = blk: { + if (args.tentative) break :blk 3; + break :blk if (args.weak) 2 else 1; + }; + return base + (file.getIndex() << 24); + } + + pub fn getSymbols(file: File) []const Symbol.Index { + return switch (file) { + inline else => |x| x.symbols.items, + }; + } + + pub fn getAtoms(file: File) []const Atom.Index { + return switch (file) { + .dylib => unreachable, + inline else => |x| x.atoms.items, + }; + } + + pub fn calcSymtabSize(file: File, macho_file: *MachO) !void { + return switch (file) { + inline else => |x| x.calcSymtabSize(macho_file), + }; + } + + pub fn writeSymtab(file: File, macho_file: *MachO) void { + return switch (file) { + inline else => |x| x.writeSymtab(macho_file), + }; + } + + pub const Index = u32; + + pub const Entry = union(enum) { + null: void, + internal: InternalObject, + object: Object, + dylib: Dylib, + }; +}; + +const macho = std.macho; +const std = @import("std"); + +const Allocator = std.mem.Allocator; +const Atom = @import("Atom.zig"); +const InternalObject = @import("InternalObject.zig"); +const MachO = @import("../MachO.zig"); +const Object = @import("Object.zig"); +const Dylib = @import("Dylib.zig"); +const Symbol = @import("Symbol.zig"); diff --git a/src/link/MachO/hasher.zig b/src/link/MachO/hasher.zig index 45847689f3..95faaf3a92 100644 --- a/src/link/MachO/hasher.zig +++ b/src/link/MachO/hasher.zig @@ -9,15 +9,14 @@ pub fn ParallelHasher(comptime Hasher: type) type { chunk_size: u64 = 0x4000, max_file_size: ?u64 = null, }) !void { + const tracy = trace(@src()); + defer tracy.end(); + var wg: WaitGroup = .{}; - const file_size = blk: { - const file_size = opts.max_file_size orelse try file.getEndPos(); - break :blk std.math.cast(usize, file_size) orelse return error.Overflow; - }; - const chunk_size = std.math.cast(usize, opts.chunk_size) orelse return error.Overflow; + const file_size = opts.max_file_size orelse try file.getEndPos(); - const buffer = try self.allocator.alloc(u8, chunk_size * out.len); + const buffer = try self.allocator.alloc(u8, opts.chunk_size * out.len); defer self.allocator.free(buffer); const results = try self.allocator.alloc(fs.File.PReadError!usize, out.len); @@ -28,8 +27,11 @@ pub fn ParallelHasher(comptime Hasher: type) type { defer wg.wait(); for (out, results, 0..) |*out_buf, *result, i| { - const fstart = i * chunk_size; - const fsize = if (fstart + chunk_size > file_size) file_size - fstart else chunk_size; + const fstart = i * opts.chunk_size; + const fsize = if (fstart + opts.chunk_size > file_size) + file_size - fstart + else + opts.chunk_size; wg.start(); try self.thread_pool.spawn(worker, .{ file, @@ -61,10 +63,11 @@ pub fn ParallelHasher(comptime Hasher: type) type { }; } -const std = @import("std"); const assert = std.debug.assert; const fs = std.fs; const mem = std.mem; +const std = @import("std"); +const trace = @import("../tracy.zig").trace; const Allocator = mem.Allocator; const ThreadPool = std.Thread.Pool; diff --git a/src/link/MachO/load_commands.zig b/src/link/MachO/load_commands.zig index e155a7a8ed..725bd4291f 100644 --- a/src/link/MachO/load_commands.zig +++ b/src/link/MachO/load_commands.zig @@ -1,4 +1,14 @@ -/// Default path to dyld. +const std = @import("std"); +const assert = std.debug.assert; +const log = std.log.scoped(.link); +const macho = std.macho; +const mem = std.mem; + +const Allocator = mem.Allocator; +const Dylib = @import("Dylib.zig"); +const MachO = @import("../MachO.zig"); +const Options = @import("../MachO.zig").Options; + pub const default_dyld_path: [*:0]const u8 = "/usr/lib/dyld"; fn calcInstallNameLen(cmd_size: u64, name: []const u8, assume_max_path_len: bool) u64 { @@ -7,31 +17,20 @@ fn calcInstallNameLen(cmd_size: u64, name: []const u8, assume_max_path_len: bool return mem.alignForward(u64, cmd_size + name_len, @alignOf(u64)); } -const CalcLCsSizeCtx = struct { - segments: []const macho.segment_command_64, - dylibs: []const Dylib, - referenced_dylibs: []u16, - wants_function_starts: bool = true, -}; - -fn calcLCsSize(m: *MachO, ctx: CalcLCsSizeCtx, assume_max_path_len: bool) !u32 { - const comp = m.base.comp; - const gpa = comp.gpa; - var has_text_segment: bool = false; +pub fn calcLoadCommandsSize(macho_file: *MachO, assume_max_path_len: bool) u32 { + const options = &macho_file.options; var sizeofcmds: u64 = 0; - for (ctx.segments) |seg| { - sizeofcmds += seg.nsects * @sizeOf(macho.section_64) + @sizeOf(macho.segment_command_64); - if (mem.eql(u8, seg.segName(), "__TEXT")) { - has_text_segment = true; - } + + // LC_SEGMENT_64 + sizeofcmds += @sizeOf(macho.segment_command_64) * macho_file.segments.items.len; + for (macho_file.segments.items) |seg| { + sizeofcmds += seg.nsects * @sizeOf(macho.section_64); } // LC_DYLD_INFO_ONLY sizeofcmds += @sizeOf(macho.dyld_info_command); // LC_FUNCTION_STARTS - if (has_text_segment and ctx.wants_function_starts) { - sizeofcmds += @sizeOf(macho.linkedit_data_command); - } + sizeofcmds += @sizeOf(macho.linkedit_data_command); // LC_DATA_IN_CODE sizeofcmds += @sizeOf(macho.linkedit_data_command); // LC_SYMTAB @@ -45,15 +44,14 @@ fn calcLCsSize(m: *MachO, ctx: CalcLCsSizeCtx, assume_max_path_len: bool) !u32 { false, ); // LC_MAIN - if (comp.config.output_mode == .Exe) { + if (!options.dylib) { sizeofcmds += @sizeOf(macho.entry_point_command); } // LC_ID_DYLIB - if (comp.config.output_mode == .Lib and comp.config.link_mode == .Dynamic) { + if (options.dylib) { sizeofcmds += blk: { - const emit = m.base.emit; - const install_name = m.install_name orelse try emit.directory.join(gpa, &.{emit.sub_path}); - defer if (m.install_name == null) gpa.free(install_name); + const emit = options.emit; + const install_name = options.install_name orelse emit.sub_path; break :blk calcInstallNameLen( @sizeOf(macho.dylib_command), install_name, @@ -63,9 +61,7 @@ fn calcLCsSize(m: *MachO, ctx: CalcLCsSizeCtx, assume_max_path_len: bool) !u32 { } // LC_RPATH { - var it = RpathIterator.init(gpa, m.base.rpath_list); - defer it.deinit(); - while (try it.next()) |rpath| { + for (options.rpath_list) |rpath| { sizeofcmds += calcInstallNameLen( @sizeOf(macho.rpath_command), rpath, @@ -75,24 +71,22 @@ fn calcLCsSize(m: *MachO, ctx: CalcLCsSizeCtx, assume_max_path_len: bool) !u32 { } // LC_SOURCE_VERSION sizeofcmds += @sizeOf(macho.source_version_command); - // LC_BUILD_VERSION or LC_VERSION_MIN_ or nothing - { - const target = comp.root_mod.resolved_target.result; - const platform = Platform.fromTarget(target); + if (options.platform) |platform| { if (platform.isBuildVersionCompatible()) { // LC_BUILD_VERSION sizeofcmds += @sizeOf(macho.build_version_command) + @sizeOf(macho.build_tool_version); - } else if (platform.isVersionMinCompatible()) { - // LC_VERSION_MIN_ + } else { + // LC_VERSION_MIN_* sizeofcmds += @sizeOf(macho.version_min_command); } } // LC_UUID sizeofcmds += @sizeOf(macho.uuid_command); // LC_LOAD_DYLIB - for (ctx.referenced_dylibs) |id| { - const dylib = ctx.dylibs[id]; - const dylib_id = dylib.id orelse unreachable; + for (macho_file.dylibs.items) |index| { + const dylib = macho_file.getFile(index).?.dylib; + assert(dylib.isAlive(macho_file)); + const dylib_id = dylib.id.?; sizeofcmds += calcInstallNameLen( @sizeOf(macho.dylib_command), dylib_id.name, @@ -100,19 +94,52 @@ fn calcLCsSize(m: *MachO, ctx: CalcLCsSizeCtx, assume_max_path_len: bool) !u32 { ); } // LC_CODE_SIGNATURE - if (m.requiresCodeSignature()) { + if (macho_file.requiresCodeSig()) { sizeofcmds += @sizeOf(macho.linkedit_data_command); } - return @intCast(sizeofcmds); + return @as(u32, @intCast(sizeofcmds)); } -pub fn calcMinHeaderPad(m: *MachO, ctx: CalcLCsSizeCtx) !u64 { - var padding: u32 = (try calcLCsSize(m, ctx, false)) + m.headerpad_size; +pub fn calcLoadCommandsSizeObject(macho_file: *MachO) u32 { + const options = &macho_file.options; + var sizeofcmds: u64 = 0; + + // LC_SEGMENT_64 + { + assert(macho_file.segments.items.len == 1); + sizeofcmds += @sizeOf(macho.segment_command_64); + const seg = macho_file.segments.items[0]; + sizeofcmds += seg.nsects * @sizeOf(macho.section_64); + } + + // LC_DATA_IN_CODE + sizeofcmds += @sizeOf(macho.linkedit_data_command); + // LC_SYMTAB + sizeofcmds += @sizeOf(macho.symtab_command); + // LC_DYSYMTAB + sizeofcmds += @sizeOf(macho.dysymtab_command); + + if (options.platform) |platform| { + if (platform.isBuildVersionCompatible()) { + // LC_BUILD_VERSION + sizeofcmds += @sizeOf(macho.build_version_command) + @sizeOf(macho.build_tool_version); + } else { + // LC_VERSION_MIN_* + sizeofcmds += @sizeOf(macho.version_min_command); + } + } + + return @as(u32, @intCast(sizeofcmds)); +} + +pub fn calcMinHeaderPadSize(macho_file: *MachO) u32 { + const options = &macho_file.options; + var padding: u32 = calcLoadCommandsSize(macho_file, false) + (options.headerpad orelse 0); log.debug("minimum requested headerpad size 0x{x}", .{padding + @sizeOf(macho.mach_header_64)}); - if (m.headerpad_max_install_names) { - const min_headerpad_size: u32 = try calcLCsSize(m, ctx, true); + if (options.headerpad_max_install_names) { + const min_headerpad_size: u32 = calcLoadCommandsSize(macho_file, true); log.debug("headerpad_max_install_names minimum headerpad size 0x{x}", .{ min_headerpad_size + @sizeOf(macho.mach_header_64), }); @@ -125,34 +152,22 @@ pub fn calcMinHeaderPad(m: *MachO, ctx: CalcLCsSizeCtx) !u64 { return offset; } -pub fn calcNumOfLCs(lc_buffer: []const u8) u32 { - var ncmds: u32 = 0; - var pos: usize = 0; - while (true) { - if (pos >= lc_buffer.len) break; - const cmd = @as(*align(1) const macho.load_command, @ptrCast(lc_buffer.ptr + pos)).*; - ncmds += 1; - pos += cmd.cmdsize; - } - return ncmds; -} - -pub fn writeDylinkerLC(lc_writer: anytype) !void { +pub fn writeDylinkerLC(writer: anytype) !void { const name_len = mem.sliceTo(default_dyld_path, 0).len; const cmdsize = @as(u32, @intCast(mem.alignForward( u64, @sizeOf(macho.dylinker_command) + name_len, @sizeOf(u64), ))); - try lc_writer.writeStruct(macho.dylinker_command{ + try writer.writeStruct(macho.dylinker_command{ .cmd = .LOAD_DYLINKER, .cmdsize = cmdsize, .name = @sizeOf(macho.dylinker_command), }); - try lc_writer.writeAll(mem.sliceTo(default_dyld_path, 0)); + try writer.writeAll(mem.sliceTo(default_dyld_path, 0)); const padding = cmdsize - @sizeOf(macho.dylinker_command) - name_len; if (padding > 0) { - try lc_writer.writeByteNTimes(0, padding); + try writer.writeByteNTimes(0, padding); } } @@ -164,14 +179,14 @@ const WriteDylibLCCtx = struct { compatibility_version: u32 = 0x10000, }; -fn writeDylibLC(ctx: WriteDylibLCCtx, lc_writer: anytype) !void { +pub fn writeDylibLC(ctx: WriteDylibLCCtx, writer: anytype) !void { const name_len = ctx.name.len + 1; const cmdsize = @as(u32, @intCast(mem.alignForward( u64, @sizeOf(macho.dylib_command) + name_len, @sizeOf(u64), ))); - try lc_writer.writeStruct(macho.dylib_command{ + try writer.writeStruct(macho.dylib_command{ .cmd = ctx.cmd, .cmdsize = cmdsize, .dylib = .{ @@ -181,392 +196,75 @@ fn writeDylibLC(ctx: WriteDylibLCCtx, lc_writer: anytype) !void { .compatibility_version = ctx.compatibility_version, }, }); - try lc_writer.writeAll(ctx.name); - try lc_writer.writeByte(0); + try writer.writeAll(ctx.name); + try writer.writeByte(0); const padding = cmdsize - @sizeOf(macho.dylib_command) - name_len; if (padding > 0) { - try lc_writer.writeByteNTimes(0, padding); + try writer.writeByteNTimes(0, padding); } } -pub fn writeDylibIdLC(macho_file: *MachO, lc_writer: anytype) !void { - const comp = macho_file.base.comp; - const gpa = comp.gpa; - assert(comp.config.output_mode == .Lib and comp.config.link_mode == .Dynamic); - const emit = macho_file.base.emit; - const install_name = macho_file.install_name orelse - try emit.directory.join(gpa, &.{emit.sub_path}); - defer if (macho_file.install_name == null) gpa.free(install_name); - const curr = comp.version orelse std.SemanticVersion{ - .major = 1, - .minor = 0, - .patch = 0, - }; - const compat = macho_file.compatibility_version orelse std.SemanticVersion{ - .major = 1, - .minor = 0, - .patch = 0, - }; +pub fn writeDylibIdLC(options: *const Options, writer: anytype) !void { + assert(options.dylib); + const emit = options.emit; + const install_name = options.install_name orelse emit.sub_path; + const curr = options.current_version orelse Options.Version.new(1, 0, 0); + const compat = options.compatibility_version orelse Options.Version.new(1, 0, 0); try writeDylibLC(.{ .cmd = .ID_DYLIB, .name = install_name, - .current_version = @as(u32, @intCast(curr.major << 16 | curr.minor << 8 | curr.patch)), - .compatibility_version = @as(u32, @intCast(compat.major << 16 | compat.minor << 8 | compat.patch)), - }, lc_writer); + .current_version = curr.value, + .compatibility_version = compat.value, + }, writer); } -const RpathIterator = struct { - buffer: []const []const u8, - table: std.StringHashMap(void), - count: usize = 0, - - fn init(gpa: Allocator, rpaths: []const []const u8) RpathIterator { - return .{ .buffer = rpaths, .table = std.StringHashMap(void).init(gpa) }; - } - - fn deinit(it: *RpathIterator) void { - it.table.deinit(); - } - - fn next(it: *RpathIterator) !?[]const u8 { - while (true) { - if (it.count >= it.buffer.len) return null; - const rpath = it.buffer[it.count]; - it.count += 1; - const gop = try it.table.getOrPut(rpath); - if (gop.found_existing) continue; - return rpath; - } - } -}; - -pub fn writeRpathLCs(macho_file: *MachO, lc_writer: anytype) !void { - const comp = macho_file.base.comp; - const gpa = comp.gpa; - - var it = RpathIterator.init(gpa, macho_file.base.rpath_list); - defer it.deinit(); - - while (try it.next()) |rpath| { +pub fn writeRpathLCs(rpaths: []const []const u8, writer: anytype) !void { + for (rpaths) |rpath| { const rpath_len = rpath.len + 1; const cmdsize = @as(u32, @intCast(mem.alignForward( u64, @sizeOf(macho.rpath_command) + rpath_len, @sizeOf(u64), ))); - try lc_writer.writeStruct(macho.rpath_command{ + try writer.writeStruct(macho.rpath_command{ .cmdsize = cmdsize, .path = @sizeOf(macho.rpath_command), }); - try lc_writer.writeAll(rpath); - try lc_writer.writeByte(0); + try writer.writeAll(rpath); + try writer.writeByte(0); const padding = cmdsize - @sizeOf(macho.rpath_command) - rpath_len; if (padding > 0) { - try lc_writer.writeByteNTimes(0, padding); + try writer.writeByteNTimes(0, padding); } } } -pub fn writeVersionMinLC(platform: Platform, sdk_version: ?std.SemanticVersion, lc_writer: anytype) !void { - const cmd: macho.LC = switch (platform.os_tag) { - .macos => .VERSION_MIN_MACOSX, - .ios => .VERSION_MIN_IPHONEOS, - .tvos => .VERSION_MIN_TVOS, - .watchos => .VERSION_MIN_WATCHOS, +pub fn writeVersionMinLC(platform: Options.Platform, sdk_version: ?Options.Version, writer: anytype) !void { + const cmd: macho.LC = switch (platform.platform) { + .MACOS => .VERSION_MIN_MACOSX, + .IOS, .IOSSIMULATOR => .VERSION_MIN_IPHONEOS, + .TVOS, .TVOSSIMULATOR => .VERSION_MIN_TVOS, + .WATCHOS, .WATCHOSSIMULATOR => .VERSION_MIN_WATCHOS, else => unreachable, }; - try lc_writer.writeAll(mem.asBytes(&macho.version_min_command{ + try writer.writeAll(mem.asBytes(&macho.version_min_command{ .cmd = cmd, - .version = platform.toAppleVersion(), - .sdk = if (sdk_version) |ver| semanticVersionToAppleVersion(ver) else platform.toAppleVersion(), + .version = platform.version.value, + .sdk = if (sdk_version) |ver| ver.value else platform.version.value, })); } -pub fn writeBuildVersionLC(platform: Platform, sdk_version: ?std.SemanticVersion, lc_writer: anytype) !void { +pub fn writeBuildVersionLC(platform: Options.Platform, sdk_version: ?Options.Version, writer: anytype) !void { const cmdsize = @sizeOf(macho.build_version_command) + @sizeOf(macho.build_tool_version); - try lc_writer.writeStruct(macho.build_version_command{ + try writer.writeStruct(macho.build_version_command{ .cmdsize = cmdsize, - .platform = platform.toApplePlatform(), - .minos = platform.toAppleVersion(), - .sdk = if (sdk_version) |ver| semanticVersionToAppleVersion(ver) else platform.toAppleVersion(), + .platform = platform.platform, + .minos = platform.version.value, + .sdk = if (sdk_version) |ver| ver.value else platform.version.value, .ntools = 1, }); - try lc_writer.writeAll(mem.asBytes(&macho.build_tool_version{ - .tool = .ZIG, + try writer.writeAll(mem.asBytes(&macho.build_tool_version{ + .tool = @as(macho.TOOL, @enumFromInt(0x6)), .version = 0x0, })); } - -pub fn writeLoadDylibLCs(dylibs: []const Dylib, referenced: []u16, lc_writer: anytype) !void { - for (referenced) |index| { - const dylib = dylibs[index]; - const dylib_id = dylib.id orelse unreachable; - try writeDylibLC(.{ - .cmd = if (dylib.weak) .LOAD_WEAK_DYLIB else .LOAD_DYLIB, - .name = dylib_id.name, - .timestamp = dylib_id.timestamp, - .current_version = dylib_id.current_version, - .compatibility_version = dylib_id.compatibility_version, - }, lc_writer); - } -} - -pub const Platform = struct { - os_tag: std.Target.Os.Tag, - abi: std.Target.Abi, - version: std.SemanticVersion, - - /// Using Apple's ld64 as our blueprint, `min_version` as well as `sdk_version` are set to - /// the extracted minimum platform version. - pub fn fromLoadCommand(lc: macho.LoadCommandIterator.LoadCommand) Platform { - switch (lc.cmd()) { - .BUILD_VERSION => { - const cmd = lc.cast(macho.build_version_command).?; - return .{ - .os_tag = switch (cmd.platform) { - .MACOS => .macos, - .IOS, .IOSSIMULATOR => .ios, - .TVOS, .TVOSSIMULATOR => .tvos, - .WATCHOS, .WATCHOSSIMULATOR => .watchos, - else => @panic("TODO"), - }, - .abi = switch (cmd.platform) { - .IOSSIMULATOR, - .TVOSSIMULATOR, - .WATCHOSSIMULATOR, - => .simulator, - else => .none, - }, - .version = appleVersionToSemanticVersion(cmd.minos), - }; - }, - .VERSION_MIN_MACOSX, - .VERSION_MIN_IPHONEOS, - .VERSION_MIN_TVOS, - .VERSION_MIN_WATCHOS, - => { - const cmd = lc.cast(macho.version_min_command).?; - return .{ - .os_tag = switch (lc.cmd()) { - .VERSION_MIN_MACOSX => .macos, - .VERSION_MIN_IPHONEOS => .ios, - .VERSION_MIN_TVOS => .tvos, - .VERSION_MIN_WATCHOS => .watchos, - else => unreachable, - }, - .abi = .none, - .version = appleVersionToSemanticVersion(cmd.version), - }; - }, - else => unreachable, - } - } - - pub fn fromTarget(target: std.Target) Platform { - return .{ - .os_tag = target.os.tag, - .abi = target.abi, - .version = target.os.version_range.semver.min, - }; - } - - pub fn toAppleVersion(plat: Platform) u32 { - return semanticVersionToAppleVersion(plat.version); - } - - pub fn toApplePlatform(plat: Platform) macho.PLATFORM { - return switch (plat.os_tag) { - .macos => .MACOS, - .ios => if (plat.abi == .simulator) .IOSSIMULATOR else .IOS, - .tvos => if (plat.abi == .simulator) .TVOSSIMULATOR else .TVOS, - .watchos => if (plat.abi == .simulator) .WATCHOSSIMULATOR else .WATCHOS, - else => unreachable, - }; - } - - pub fn isBuildVersionCompatible(plat: Platform) bool { - inline for (supported_platforms) |sup_plat| { - if (sup_plat[0] == plat.os_tag and sup_plat[1] == plat.abi) { - return sup_plat[2] <= plat.toAppleVersion(); - } - } - return false; - } - - pub fn isVersionMinCompatible(plat: Platform) bool { - inline for (supported_platforms) |sup_plat| { - if (sup_plat[0] == plat.os_tag and sup_plat[1] == plat.abi) { - return sup_plat[3] <= plat.toAppleVersion(); - } - } - return false; - } - - pub fn fmtTarget(plat: Platform, cpu_arch: std.Target.Cpu.Arch) std.fmt.Formatter(formatTarget) { - return .{ .data = .{ .platform = plat, .cpu_arch = cpu_arch } }; - } - - const FmtCtx = struct { - platform: Platform, - cpu_arch: std.Target.Cpu.Arch, - }; - - pub fn formatTarget( - ctx: FmtCtx, - comptime unused_fmt_string: []const u8, - options: std.fmt.FormatOptions, - writer: anytype, - ) !void { - _ = unused_fmt_string; - _ = options; - try writer.print("{s}-{s}", .{ @tagName(ctx.cpu_arch), @tagName(ctx.platform.os_tag) }); - if (ctx.platform.abi != .none) { - try writer.print("-{s}", .{@tagName(ctx.platform.abi)}); - } - } - - /// Caller owns the memory. - pub fn allocPrintTarget(plat: Platform, gpa: Allocator, cpu_arch: std.Target.Cpu.Arch) error{OutOfMemory}![]u8 { - var buffer = std.ArrayList(u8).init(gpa); - defer buffer.deinit(); - try buffer.writer().print("{}", .{plat.fmtTarget(cpu_arch)}); - return buffer.toOwnedSlice(); - } - - pub fn eqlTarget(plat: Platform, other: Platform) bool { - return plat.os_tag == other.os_tag and plat.abi == other.abi; - } -}; - -const SupportedPlatforms = struct { - std.Target.Os.Tag, - std.Target.Abi, - u32, // Min platform version for which to emit LC_BUILD_VERSION - u32, // Min supported platform version -}; - -// Source: https://github.com/apple-oss-distributions/ld64/blob/59a99ab60399c5e6c49e6945a9e1049c42b71135/src/ld/PlatformSupport.cpp#L52 -// zig fmt: off -const supported_platforms = [_]SupportedPlatforms{ - .{ .macos, .none, 0xA0E00, 0xA0800 }, - .{ .ios, .none, 0xC0000, 0x70000 }, - .{ .tvos, .none, 0xC0000, 0x70000 }, - .{ .watchos, .none, 0x50000, 0x20000 }, - .{ .ios, .simulator, 0xD0000, 0x80000 }, - .{ .tvos, .simulator, 0xD0000, 0x80000 }, - .{ .watchos, .simulator, 0x60000, 0x20000 }, -}; -// zig fmt: on - -inline fn semanticVersionToAppleVersion(version: std.SemanticVersion) u32 { - const major = version.major; - const minor = version.minor; - const patch = version.patch; - return (@as(u32, @intCast(major)) << 16) | (@as(u32, @intCast(minor)) << 8) | @as(u32, @intCast(patch)); -} - -pub inline fn appleVersionToSemanticVersion(version: u32) std.SemanticVersion { - return .{ - .major = @as(u16, @truncate(version >> 16)), - .minor = @as(u8, @truncate(version >> 8)), - .patch = @as(u8, @truncate(version)), - }; -} - -pub fn inferSdkVersion(macho_file: *MachO) ?std.SemanticVersion { - const comp = macho_file.base.comp; - const gpa = comp.gpa; - - var arena_allocator = std.heap.ArenaAllocator.init(gpa); - defer arena_allocator.deinit(); - const arena = arena_allocator.allocator(); - - const sdk_layout = macho_file.sdk_layout orelse return null; - const sdk_dir = switch (sdk_layout) { - .sdk => comp.sysroot.?, - .vendored => std.fs.path.join(arena, &.{ comp.zig_lib_directory.path.?, "libc", "darwin" }) catch return null, - }; - if (readSdkVersionFromSettings(arena, sdk_dir)) |ver| { - return parseSdkVersion(ver); - } else |_| { - // Read from settings should always succeed when vendored. - if (sdk_layout == .vendored) @panic("zig installation bug: unable to parse SDK version"); - } - - // infer from pathname - const stem = std.fs.path.stem(sdk_dir); - const start = for (stem, 0..) |c, i| { - if (std.ascii.isDigit(c)) break i; - } else stem.len; - const end = for (stem[start..], start..) |c, i| { - if (std.ascii.isDigit(c) or c == '.') continue; - break i; - } else stem.len; - return parseSdkVersion(stem[start..end]); -} - -// Official Apple SDKs ship with a `SDKSettings.json` located at the top of SDK fs layout. -// Use property `MinimalDisplayName` to determine version. -// The file/property is also available with vendored libc. -fn readSdkVersionFromSettings(arena: Allocator, dir: []const u8) ![]const u8 { - const sdk_path = try std.fs.path.join(arena, &.{ dir, "SDKSettings.json" }); - const contents = try std.fs.cwd().readFileAlloc(arena, sdk_path, std.math.maxInt(u16)); - const parsed = try std.json.parseFromSlice(std.json.Value, arena, contents, .{}); - if (parsed.value.object.get("MinimalDisplayName")) |ver| return ver.string; - return error.SdkVersionFailure; -} - -// Versions reported by Apple aren't exactly semantically valid as they usually omit -// the patch component, so we parse SDK value by hand. -fn parseSdkVersion(raw: []const u8) ?std.SemanticVersion { - var parsed: std.SemanticVersion = .{ - .major = 0, - .minor = 0, - .patch = 0, - }; - - const parseNext = struct { - fn parseNext(it: anytype) ?u16 { - const nn = it.next() orelse return null; - return std.fmt.parseInt(u16, nn, 10) catch null; - } - }.parseNext; - - var it = std.mem.splitAny(u8, raw, "."); - parsed.major = parseNext(&it) orelse return null; - parsed.minor = parseNext(&it) orelse return null; - parsed.patch = parseNext(&it) orelse 0; - return parsed; -} - -const expect = std.testing.expect; -const expectEqual = std.testing.expectEqual; - -fn testParseSdkVersionSuccess(exp: std.SemanticVersion, raw: []const u8) !void { - const maybe_ver = parseSdkVersion(raw); - try expect(maybe_ver != null); - const ver = maybe_ver.?; - try expectEqual(exp.major, ver.major); - try expectEqual(exp.minor, ver.minor); - try expectEqual(exp.patch, ver.patch); -} - -test "parseSdkVersion" { - try testParseSdkVersionSuccess(.{ .major = 13, .minor = 4, .patch = 0 }, "13.4"); - try testParseSdkVersionSuccess(.{ .major = 13, .minor = 4, .patch = 1 }, "13.4.1"); - try testParseSdkVersionSuccess(.{ .major = 11, .minor = 15, .patch = 0 }, "11.15"); - - try expect(parseSdkVersion("11") == null); -} - -const std = @import("std"); -const assert = std.debug.assert; -const link = @import("../../link.zig"); -const log = std.log.scoped(.link); -const macho = std.macho; -const mem = std.mem; - -const Allocator = mem.Allocator; -const Dylib = @import("Dylib.zig"); -const MachO = @import("../MachO.zig"); -const Compilation = @import("../../Compilation.zig"); diff --git a/src/link/MachO/relocatable.zig b/src/link/MachO/relocatable.zig new file mode 100644 index 0000000000..3d2d5b97b9 --- /dev/null +++ b/src/link/MachO/relocatable.zig @@ -0,0 +1,452 @@ +pub fn flush(macho_file: *MachO) !void { + markExports(macho_file); + claimUnresolved(macho_file); + try initOutputSections(macho_file); + try macho_file.sortSections(); + try macho_file.addAtomsToSections(); + try calcSectionSizes(macho_file); + + { + // For relocatable, we only ever need a single segment so create it now. + const prot: macho.vm_prot_t = macho.PROT.READ | macho.PROT.WRITE | macho.PROT.EXEC; + try macho_file.segments.append(macho_file.base.allocator, .{ + .cmdsize = @sizeOf(macho.segment_command_64), + .segname = MachO.makeStaticString(""), + .maxprot = prot, + .initprot = prot, + }); + const seg = &macho_file.segments.items[0]; + seg.nsects = @intCast(macho_file.sections.items(.header).len); + seg.cmdsize += seg.nsects * @sizeOf(macho.section_64); + } + + var off = try allocateSections(macho_file); + + { + // Allocate the single segment. + assert(macho_file.segments.items.len == 1); + const seg = &macho_file.segments.items[0]; + var vmaddr: u64 = 0; + var fileoff: u64 = load_commands.calcLoadCommandsSizeObject(macho_file) + @sizeOf(macho.mach_header_64); + seg.vmaddr = vmaddr; + seg.fileoff = fileoff; + + for (macho_file.sections.items(.header)) |header| { + vmaddr = header.addr + header.size; + if (!header.isZerofill()) { + fileoff = header.offset + header.size; + } + } + + seg.vmsize = vmaddr - seg.vmaddr; + seg.filesize = fileoff - seg.fileoff; + } + + macho_file.allocateAtoms(); + + state_log.debug("{}", .{macho_file.dumpState()}); + + try macho_file.calcSymtabSize(); + try writeAtoms(macho_file); + try writeCompactUnwind(macho_file); + try writeEhFrame(macho_file); + + off = mem.alignForward(u32, off, @alignOf(u64)); + off = try macho_file.writeDataInCode(0, off); + off = mem.alignForward(u32, off, @alignOf(u64)); + off = try macho_file.writeSymtab(off); + off = mem.alignForward(u32, off, @alignOf(u64)); + off = try macho_file.writeStrtab(off); + + const ncmds, const sizeofcmds = try writeLoadCommands(macho_file); + try writeHeader(macho_file, ncmds, sizeofcmds); +} + +fn markExports(macho_file: *MachO) void { + for (macho_file.objects.items) |index| { + for (macho_file.getFile(index).?.getSymbols()) |sym_index| { + const sym = macho_file.getSymbol(sym_index); + const file = sym.getFile(macho_file) orelse continue; + if (sym.visibility != .global) continue; + if (file.getIndex() == index) { + sym.flags.@"export" = true; + } + } + } +} + +fn claimUnresolved(macho_file: *MachO) void { + for (macho_file.objects.items) |index| { + const object = macho_file.getFile(index).?.object; + + for (object.symbols.items, 0..) |sym_index, i| { + const nlist_idx = @as(Symbol.Index, @intCast(i)); + const nlist = object.symtab.items(.nlist)[nlist_idx]; + if (!nlist.ext()) continue; + if (!nlist.undf()) continue; + + const sym = macho_file.getSymbol(sym_index); + if (sym.getFile(macho_file) != null) continue; + + sym.value = 0; + sym.atom = 0; + sym.nlist_idx = nlist_idx; + sym.file = index; + sym.flags.weak_ref = nlist.weakRef(); + sym.flags.import = true; + sym.visibility = .global; + } + } +} + +fn initOutputSections(macho_file: *MachO) !void { + for (macho_file.objects.items) |index| { + const object = macho_file.getFile(index).?.object; + for (object.atoms.items) |atom_index| { + const atom = macho_file.getAtom(atom_index) orelse continue; + if (!atom.flags.alive) continue; + atom.out_n_sect = try Atom.initOutputSection(atom.getInputSection(macho_file), macho_file); + } + } + + const needs_unwind_info = for (macho_file.objects.items) |index| { + if (macho_file.getFile(index).?.object.compact_unwind_sect_index != null) break true; + } else false; + if (needs_unwind_info) { + macho_file.unwind_info_sect_index = try macho_file.addSection("__LD", "__compact_unwind", .{ + .flags = macho.S_ATTR_DEBUG, + }); + } + + const needs_eh_frame = for (macho_file.objects.items) |index| { + if (macho_file.getFile(index).?.object.eh_frame_sect_index != null) break true; + } else false; + if (needs_eh_frame) { + assert(needs_unwind_info); + macho_file.eh_frame_sect_index = try macho_file.addSection("__TEXT", "__eh_frame", .{}); + } +} + +fn calcSectionSizes(macho_file: *MachO) !void { + const slice = macho_file.sections.slice(); + for (slice.items(.header), slice.items(.atoms)) |*header, atoms| { + if (atoms.items.len == 0) continue; + for (atoms.items) |atom_index| { + const atom = macho_file.getAtom(atom_index).?; + const atom_alignment = try math.powi(u32, 2, atom.alignment); + const offset = mem.alignForward(u64, header.size, atom_alignment); + const padding = offset - header.size; + atom.value = offset; + header.size += padding + atom.size; + header.@"align" = @max(header.@"align", atom.alignment); + header.nreloc += atom.calcNumRelocs(macho_file); + } + } + + if (macho_file.unwind_info_sect_index) |index| { + calcCompactUnwindSize(macho_file, index); + } + + if (macho_file.eh_frame_sect_index) |index| { + const sect = &macho_file.sections.items(.header)[index]; + sect.size = try eh_frame.calcSize(macho_file); + sect.@"align" = 3; + sect.nreloc = eh_frame.calcNumRelocs(macho_file); + } +} + +fn calcCompactUnwindSize(macho_file: *MachO, sect_index: u8) void { + var size: u32 = 0; + var nreloc: u32 = 0; + + for (macho_file.objects.items) |index| { + const object = macho_file.getFile(index).?.object; + for (object.unwind_records.items) |irec| { + const rec = macho_file.getUnwindRecord(irec); + if (!rec.alive) continue; + size += @sizeOf(macho.compact_unwind_entry); + nreloc += 1; + if (rec.getPersonality(macho_file)) |_| { + nreloc += 1; + } + if (rec.getLsdaAtom(macho_file)) |_| { + nreloc += 1; + } + } + } + + const sect = &macho_file.sections.items(.header)[sect_index]; + sect.size = size; + sect.nreloc = nreloc; + sect.@"align" = 3; +} + +fn allocateSections(macho_file: *MachO) !u32 { + var fileoff = load_commands.calcLoadCommandsSizeObject(macho_file) + @sizeOf(macho.mach_header_64); + var vmaddr: u64 = 0; + const slice = macho_file.sections.slice(); + + for (slice.items(.header)) |*header| { + const alignment = try math.powi(u32, 2, header.@"align"); + vmaddr = mem.alignForward(u64, vmaddr, alignment); + header.addr = vmaddr; + vmaddr += header.size; + + if (!header.isZerofill()) { + fileoff = mem.alignForward(u32, fileoff, alignment); + header.offset = fileoff; + fileoff += @intCast(header.size); + } + } + + for (slice.items(.header)) |*header| { + if (header.nreloc == 0) continue; + header.reloff = mem.alignForward(u32, fileoff, @alignOf(macho.relocation_info)); + fileoff = header.reloff + header.nreloc * @sizeOf(macho.relocation_info); + } + + return fileoff; +} + +// We need to sort relocations in descending order to be compatible with Apple's linker. +fn sortReloc(ctx: void, lhs: macho.relocation_info, rhs: macho.relocation_info) bool { + _ = ctx; + return lhs.r_address > rhs.r_address; +} + +fn writeAtoms(macho_file: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + + const gpa = macho_file.base.allocator; + const cpu_arch = macho_file.options.cpu_arch.?; + const slice = macho_file.sections.slice(); + + for (slice.items(.header), slice.items(.atoms)) |header, atoms| { + if (atoms.items.len == 0) continue; + if (header.isZerofill()) continue; + + const code = try gpa.alloc(u8, header.size); + defer gpa.free(code); + const padding_byte: u8 = if (header.isCode() and cpu_arch == .x86_64) 0xcc else 0; + @memset(code, padding_byte); + + var relocs = try std.ArrayList(macho.relocation_info).initCapacity(gpa, header.nreloc); + defer relocs.deinit(); + + for (atoms.items) |atom_index| { + const atom = macho_file.getAtom(atom_index).?; + assert(atom.flags.alive); + const off = atom.value - header.addr; + @memcpy(code[off..][0..atom.size], atom.getCode(macho_file)); + try atom.writeRelocs(macho_file, code[off..][0..atom.size], &relocs); + } + + assert(relocs.items.len == header.nreloc); + + mem.sort(macho.relocation_info, relocs.items, {}, sortReloc); + + // TODO scattered writes? + try macho_file.base.file.pwriteAll(code, header.offset); + try macho_file.base.file.pwriteAll(mem.sliceAsBytes(relocs.items), header.reloff); + } +} + +fn writeCompactUnwind(macho_file: *MachO) !void { + const sect_index = macho_file.unwind_info_sect_index orelse return; + const gpa = macho_file.base.allocator; + const header = macho_file.sections.items(.header)[sect_index]; + + const nrecs = @divExact(header.size, @sizeOf(macho.compact_unwind_entry)); + var entries = try std.ArrayList(macho.compact_unwind_entry).initCapacity(gpa, nrecs); + defer entries.deinit(); + + var relocs = try std.ArrayList(macho.relocation_info).initCapacity(gpa, header.nreloc); + defer relocs.deinit(); + + const addReloc = struct { + fn addReloc(offset: i32, cpu_arch: std.Target.Cpu.Arch) macho.relocation_info { + return .{ + .r_address = offset, + .r_symbolnum = 0, + .r_pcrel = 0, + .r_length = 3, + .r_extern = 0, + .r_type = switch (cpu_arch) { + .aarch64 => @intFromEnum(macho.reloc_type_arm64.ARM64_RELOC_UNSIGNED), + .x86_64 => @intFromEnum(macho.reloc_type_x86_64.X86_64_RELOC_UNSIGNED), + else => unreachable, + }, + }; + } + }.addReloc; + + var offset: i32 = 0; + for (macho_file.objects.items) |index| { + const object = macho_file.getFile(index).?.object; + for (object.unwind_records.items) |irec| { + const rec = macho_file.getUnwindRecord(irec); + if (!rec.alive) continue; + + var out: macho.compact_unwind_entry = .{ + .rangeStart = 0, + .rangeLength = rec.length, + .compactUnwindEncoding = rec.enc.enc, + .personalityFunction = 0, + .lsda = 0, + }; + + { + // Function address + const atom = rec.getAtom(macho_file); + const addr = rec.getAtomAddress(macho_file); + out.rangeStart = addr; + var reloc = addReloc(offset, macho_file.options.cpu_arch.?); + reloc.r_symbolnum = atom.out_n_sect + 1; + relocs.appendAssumeCapacity(reloc); + } + + // Personality function + if (rec.getPersonality(macho_file)) |sym| { + const r_symbolnum = math.cast(u24, sym.getOutputSymtabIndex(macho_file).?) orelse return error.Overflow; + var reloc = addReloc(offset + 16, macho_file.options.cpu_arch.?); + reloc.r_symbolnum = r_symbolnum; + reloc.r_extern = 1; + relocs.appendAssumeCapacity(reloc); + } + + // LSDA address + if (rec.getLsdaAtom(macho_file)) |atom| { + const addr = rec.getLsdaAddress(macho_file); + out.lsda = addr; + var reloc = addReloc(offset + 24, macho_file.options.cpu_arch.?); + reloc.r_symbolnum = atom.out_n_sect + 1; + relocs.appendAssumeCapacity(reloc); + } + + entries.appendAssumeCapacity(out); + offset += @sizeOf(macho.compact_unwind_entry); + } + } + + assert(entries.items.len == nrecs); + assert(relocs.items.len == header.nreloc); + + mem.sort(macho.relocation_info, relocs.items, {}, sortReloc); + + // TODO scattered writes? + try macho_file.base.file.pwriteAll(mem.sliceAsBytes(entries.items), header.offset); + try macho_file.base.file.pwriteAll(mem.sliceAsBytes(relocs.items), header.reloff); +} + +fn writeEhFrame(macho_file: *MachO) !void { + const sect_index = macho_file.eh_frame_sect_index orelse return; + const gpa = macho_file.base.allocator; + const header = macho_file.sections.items(.header)[sect_index]; + + const code = try gpa.alloc(u8, header.size); + defer gpa.free(code); + + var relocs = try std.ArrayList(macho.relocation_info).initCapacity(gpa, header.nreloc); + defer relocs.deinit(); + + try eh_frame.writeRelocs(macho_file, code, &relocs); + assert(relocs.items.len == header.nreloc); + + mem.sort(macho.relocation_info, relocs.items, {}, sortReloc); + + // TODO scattered writes? + try macho_file.base.file.pwriteAll(code, header.offset); + try macho_file.base.file.pwriteAll(mem.sliceAsBytes(relocs.items), header.reloff); +} + +fn writeLoadCommands(macho_file: *MachO) !struct { usize, usize } { + const gpa = macho_file.base.allocator; + const needed_size = load_commands.calcLoadCommandsSizeObject(macho_file); + const buffer = try gpa.alloc(u8, needed_size); + defer gpa.free(buffer); + + var stream = std.io.fixedBufferStream(buffer); + var cwriter = std.io.countingWriter(stream.writer()); + const writer = cwriter.writer(); + + var ncmds: usize = 0; + + // Segment and section load commands + { + assert(macho_file.segments.items.len == 1); + const seg = macho_file.segments.items[0]; + try writer.writeStruct(seg); + for (macho_file.sections.items(.header)) |header| { + try writer.writeStruct(header); + } + ncmds += 1; + } + + try writer.writeStruct(macho_file.data_in_code_cmd); + ncmds += 1; + try writer.writeStruct(macho_file.symtab_cmd); + ncmds += 1; + try writer.writeStruct(macho_file.dysymtab_cmd); + ncmds += 1; + + if (macho_file.options.platform) |platform| { + if (platform.isBuildVersionCompatible()) { + try load_commands.writeBuildVersionLC(platform, macho_file.options.sdk_version, writer); + ncmds += 1; + } else { + try load_commands.writeVersionMinLC(platform, macho_file.options.sdk_version, writer); + ncmds += 1; + } + } + + assert(cwriter.bytes_written == needed_size); + + try macho_file.base.file.pwriteAll(buffer, @sizeOf(macho.mach_header_64)); + + return .{ ncmds, buffer.len }; +} + +fn writeHeader(macho_file: *MachO, ncmds: usize, sizeofcmds: usize) !void { + var header: macho.mach_header_64 = .{}; + header.filetype = macho.MH_OBJECT; + + const subsections_via_symbols = for (macho_file.objects.items) |index| { + const object = macho_file.getFile(index).?.object; + if (object.hasSubsections()) break true; + } else false; + if (subsections_via_symbols) { + header.flags |= macho.MH_SUBSECTIONS_VIA_SYMBOLS; + } + + switch (macho_file.options.cpu_arch.?) { + .aarch64 => { + header.cputype = macho.CPU_TYPE_ARM64; + header.cpusubtype = macho.CPU_SUBTYPE_ARM_ALL; + }, + .x86_64 => { + header.cputype = macho.CPU_TYPE_X86_64; + header.cpusubtype = macho.CPU_SUBTYPE_X86_64_ALL; + }, + else => {}, + } + + header.ncmds = @intCast(ncmds); + header.sizeofcmds = @intCast(sizeofcmds); + + try macho_file.base.file.pwriteAll(mem.asBytes(&header), 0); +} + +const assert = std.debug.assert; +const eh_frame = @import("eh_frame.zig"); +const load_commands = @import("load_commands.zig"); +const macho = std.macho; +const math = std.math; +const mem = std.mem; +const state_log = std.log.scoped(.state); +const std = @import("std"); +const trace = @import("../tracy.zig").trace; + +const Atom = @import("Atom.zig"); +const MachO = @import("../MachO.zig"); +const Symbol = @import("Symbol.zig"); diff --git a/src/link/MachO/stubs.zig b/src/link/MachO/stubs.zig deleted file mode 100644 index 925aeaa61f..0000000000 --- a/src/link/MachO/stubs.zig +++ /dev/null @@ -1,169 +0,0 @@ -pub inline fn stubHelperPreambleSize(cpu_arch: std.Target.Cpu.Arch) u8 { - return switch (cpu_arch) { - .x86_64 => 15, - .aarch64 => 6 * @sizeOf(u32), - else => unreachable, // unhandled architecture type - }; -} - -pub inline fn stubHelperSize(cpu_arch: std.Target.Cpu.Arch) u8 { - return switch (cpu_arch) { - .x86_64 => 10, - .aarch64 => 3 * @sizeOf(u32), - else => unreachable, // unhandled architecture type - }; -} - -pub inline fn stubSize(cpu_arch: std.Target.Cpu.Arch) u8 { - return switch (cpu_arch) { - .x86_64 => 6, - .aarch64 => 3 * @sizeOf(u32), - else => unreachable, // unhandled architecture type - }; -} - -pub inline fn stubAlignment(cpu_arch: std.Target.Cpu.Arch) u8 { - return switch (cpu_arch) { - .x86_64 => 1, - .aarch64 => 4, - else => unreachable, // unhandled architecture type - }; -} - -pub inline fn stubOffsetInStubHelper(cpu_arch: std.Target.Cpu.Arch) u8 { - return switch (cpu_arch) { - .x86_64 => 1, - .aarch64 => 2 * @sizeOf(u32), - else => unreachable, - }; -} - -pub fn writeStubHelperPreambleCode(args: struct { - cpu_arch: std.Target.Cpu.Arch, - source_addr: u64, - dyld_private_addr: u64, - dyld_stub_binder_got_addr: u64, -}, writer: anytype) !void { - switch (args.cpu_arch) { - .x86_64 => { - try writer.writeAll(&.{ 0x4c, 0x8d, 0x1d }); - { - const disp = try Relocation.calcPcRelativeDisplacementX86( - args.source_addr + 3, - args.dyld_private_addr, - 0, - ); - try writer.writeInt(i32, disp, .little); - } - try writer.writeAll(&.{ 0x41, 0x53, 0xff, 0x25 }); - { - const disp = try Relocation.calcPcRelativeDisplacementX86( - args.source_addr + 11, - args.dyld_stub_binder_got_addr, - 0, - ); - try writer.writeInt(i32, disp, .little); - } - }, - .aarch64 => { - { - const pages = Relocation.calcNumberOfPages(args.source_addr, args.dyld_private_addr); - try writer.writeInt(u32, aarch64.Instruction.adrp(.x17, pages).toU32(), .little); - } - { - const off = try Relocation.calcPageOffset(args.dyld_private_addr, .arithmetic); - try writer.writeInt(u32, aarch64.Instruction.add(.x17, .x17, off, false).toU32(), .little); - } - try writer.writeInt(u32, aarch64.Instruction.stp( - .x16, - .x17, - aarch64.Register.sp, - aarch64.Instruction.LoadStorePairOffset.pre_index(-16), - ).toU32(), .little); - { - const pages = Relocation.calcNumberOfPages(args.source_addr + 12, args.dyld_stub_binder_got_addr); - try writer.writeInt(u32, aarch64.Instruction.adrp(.x16, pages).toU32(), .little); - } - { - const off = try Relocation.calcPageOffset(args.dyld_stub_binder_got_addr, .load_store_64); - try writer.writeInt(u32, aarch64.Instruction.ldr( - .x16, - .x16, - aarch64.Instruction.LoadStoreOffset.imm(off), - ).toU32(), .little); - } - try writer.writeInt(u32, aarch64.Instruction.br(.x16).toU32(), .little); - }, - else => unreachable, - } -} - -pub fn writeStubHelperCode(args: struct { - cpu_arch: std.Target.Cpu.Arch, - source_addr: u64, - target_addr: u64, -}, writer: anytype) !void { - switch (args.cpu_arch) { - .x86_64 => { - try writer.writeAll(&.{ 0x68, 0x0, 0x0, 0x0, 0x0, 0xe9 }); - { - const disp = try Relocation.calcPcRelativeDisplacementX86(args.source_addr + 6, args.target_addr, 0); - try writer.writeInt(i32, disp, .little); - } - }, - .aarch64 => { - const stub_size: u4 = 3 * @sizeOf(u32); - const literal = blk: { - const div_res = try std.math.divExact(u64, stub_size - @sizeOf(u32), 4); - break :blk std.math.cast(u18, div_res) orelse return error.Overflow; - }; - try writer.writeInt(u32, aarch64.Instruction.ldrLiteral( - .w16, - literal, - ).toU32(), .little); - { - const disp = try Relocation.calcPcRelativeDisplacementArm64(args.source_addr + 4, args.target_addr); - try writer.writeInt(u32, aarch64.Instruction.b(disp).toU32(), .little); - } - try writer.writeAll(&.{ 0x0, 0x0, 0x0, 0x0 }); - }, - else => unreachable, - } -} - -pub fn writeStubCode(args: struct { - cpu_arch: std.Target.Cpu.Arch, - source_addr: u64, - target_addr: u64, -}, writer: anytype) !void { - switch (args.cpu_arch) { - .x86_64 => { - try writer.writeAll(&.{ 0xff, 0x25 }); - { - const disp = try Relocation.calcPcRelativeDisplacementX86(args.source_addr + 2, args.target_addr, 0); - try writer.writeInt(i32, disp, .little); - } - }, - .aarch64 => { - { - const pages = Relocation.calcNumberOfPages(args.source_addr, args.target_addr); - try writer.writeInt(u32, aarch64.Instruction.adrp(.x16, pages).toU32(), .little); - } - { - const off = try Relocation.calcPageOffset(args.target_addr, .load_store_64); - try writer.writeInt(u32, aarch64.Instruction.ldr( - .x16, - .x16, - aarch64.Instruction.LoadStoreOffset.imm(off), - ).toU32(), .little); - } - try writer.writeInt(u32, aarch64.Instruction.br(.x16).toU32(), .little); - }, - else => unreachable, - } -} - -const std = @import("std"); -const aarch64 = @import("../../arch/aarch64/bits.zig"); - -const Relocation = @import("Relocation.zig"); diff --git a/src/link/MachO/synthetic.zig b/src/link/MachO/synthetic.zig new file mode 100644 index 0000000000..d75e1f08aa --- /dev/null +++ b/src/link/MachO/synthetic.zig @@ -0,0 +1,669 @@ +pub const GotSection = struct { + symbols: std.ArrayListUnmanaged(Symbol.Index) = .{}, + + pub const Index = u32; + + pub fn deinit(got: *GotSection, allocator: Allocator) void { + got.symbols.deinit(allocator); + } + + pub fn addSymbol(got: *GotSection, sym_index: Symbol.Index, macho_file: *MachO) !void { + const gpa = macho_file.base.allocator; + const index = @as(Index, @intCast(got.symbols.items.len)); + const entry = try got.symbols.addOne(gpa); + entry.* = sym_index; + const symbol = macho_file.getSymbol(sym_index); + try symbol.addExtra(.{ .got = index }, macho_file); + } + + pub fn getAddress(got: GotSection, index: Index, macho_file: *MachO) u64 { + assert(index < got.symbols.items.len); + const header = macho_file.sections.items(.header)[macho_file.got_sect_index.?]; + return header.addr + index * @sizeOf(u64); + } + + pub fn size(got: GotSection) usize { + return got.symbols.items.len * @sizeOf(u64); + } + + pub fn addDyldRelocs(got: GotSection, macho_file: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + const gpa = macho_file.base.allocator; + const seg_id = macho_file.sections.items(.segment_id)[macho_file.got_sect_index.?]; + const seg = macho_file.segments.items[seg_id]; + + for (got.symbols.items, 0..) |sym_index, idx| { + const sym = macho_file.getSymbol(sym_index); + const addr = got.getAddress(@intCast(idx), macho_file); + const entry = bind.Entry{ + .target = sym_index, + .offset = addr - seg.vmaddr, + .segment_id = seg_id, + .addend = 0, + }; + if (sym.flags.import) { + try macho_file.bind.entries.append(gpa, entry); + if (sym.flags.weak) { + try macho_file.weak_bind.entries.append(gpa, entry); + } + } else { + try macho_file.rebase.entries.append(gpa, .{ + .offset = addr - seg.vmaddr, + .segment_id = seg_id, + }); + if (sym.flags.weak) { + try macho_file.weak_bind.entries.append(gpa, entry); + } else if (sym.flags.interposable) { + try macho_file.bind.entries.append(gpa, entry); + } + } + } + } + + pub fn write(got: GotSection, macho_file: *MachO, writer: anytype) !void { + const tracy = trace(@src()); + defer tracy.end(); + for (got.symbols.items) |sym_index| { + const sym = macho_file.getSymbol(sym_index); + const value = if (sym.flags.import) @as(u64, 0) else sym.getAddress(.{}, macho_file); + try writer.writeInt(u64, value, .little); + } + } + + const FormatCtx = struct { + got: GotSection, + macho_file: *MachO, + }; + + pub fn fmt(got: GotSection, macho_file: *MachO) std.fmt.Formatter(format2) { + return .{ .data = .{ .got = got, .macho_file = macho_file } }; + } + + pub fn format2( + ctx: FormatCtx, + comptime unused_fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, + ) !void { + _ = options; + _ = unused_fmt_string; + for (ctx.got.symbols.items, 0..) |entry, i| { + const symbol = ctx.macho_file.getSymbol(entry); + try writer.print(" {d}@0x{x} => {d}@0x{x} ({s})\n", .{ + i, + symbol.getGotAddress(ctx.macho_file), + entry, + symbol.getAddress(.{}, ctx.macho_file), + symbol.getName(ctx.macho_file), + }); + } + } +}; + +pub const StubsSection = struct { + symbols: std.ArrayListUnmanaged(Symbol.Index) = .{}, + + pub const Index = u32; + + pub fn deinit(stubs: *StubsSection, allocator: Allocator) void { + stubs.symbols.deinit(allocator); + } + + pub fn addSymbol(stubs: *StubsSection, sym_index: Symbol.Index, macho_file: *MachO) !void { + const gpa = macho_file.base.allocator; + const index = @as(Index, @intCast(stubs.symbols.items.len)); + const entry = try stubs.symbols.addOne(gpa); + entry.* = sym_index; + const symbol = macho_file.getSymbol(sym_index); + try symbol.addExtra(.{ .stubs = index }, macho_file); + } + + pub fn getAddress(stubs: StubsSection, index: Index, macho_file: *MachO) u64 { + assert(index < stubs.symbols.items.len); + const header = macho_file.sections.items(.header)[macho_file.stubs_sect_index.?]; + return header.addr + index * header.reserved2; + } + + pub fn size(stubs: StubsSection, macho_file: *MachO) usize { + const header = macho_file.sections.items(.header)[macho_file.stubs_sect_index.?]; + return stubs.symbols.items.len * header.reserved2; + } + + pub fn write(stubs: StubsSection, macho_file: *MachO, writer: anytype) !void { + const tracy = trace(@src()); + defer tracy.end(); + const cpu_arch = macho_file.options.cpu_arch.?; + const laptr_sect = macho_file.sections.items(.header)[macho_file.la_symbol_ptr_sect_index.?]; + + for (stubs.symbols.items, 0..) |sym_index, idx| { + const sym = macho_file.getSymbol(sym_index); + const source = sym.getAddress(.{ .stubs = true }, macho_file); + const target = laptr_sect.addr + idx * @sizeOf(u64); + switch (cpu_arch) { + .x86_64 => { + try writer.writeAll(&.{ 0xff, 0x25 }); + try writer.writeInt(i32, @intCast(target - source - 2 - 4), .little); + }, + .aarch64 => { + // TODO relax if possible + const pages = try Relocation.calcNumberOfPages(source, target); + try writer.writeInt(u32, aarch64.Instruction.adrp(.x16, pages).toU32(), .little); + const off = try Relocation.calcPageOffset(target, .load_store_64); + try writer.writeInt( + u32, + aarch64.Instruction.ldr(.x16, .x16, aarch64.Instruction.LoadStoreOffset.imm(off)).toU32(), + .little, + ); + try writer.writeInt(u32, aarch64.Instruction.br(.x16).toU32(), .little); + }, + else => unreachable, + } + } + } + + const FormatCtx = struct { + stubs: StubsSection, + macho_file: *MachO, + }; + + pub fn fmt(stubs: StubsSection, macho_file: *MachO) std.fmt.Formatter(format2) { + return .{ .data = .{ .stubs = stubs, .macho_file = macho_file } }; + } + + pub fn format2( + ctx: FormatCtx, + comptime unused_fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, + ) !void { + _ = options; + _ = unused_fmt_string; + for (ctx.stubs.symbols.items, 0..) |entry, i| { + const symbol = ctx.macho_file.getSymbol(entry); + try writer.print(" {d}@0x{x} => {d}@0x{x} ({s})\n", .{ + i, + symbol.getStubsAddress(ctx.macho_file), + entry, + symbol.getAddress(.{}, ctx.macho_file), + symbol.getName(ctx.macho_file), + }); + } + } +}; + +pub const StubsHelperSection = struct { + pub inline fn preambleSize(cpu_arch: std.Target.Cpu.Arch) usize { + return switch (cpu_arch) { + .x86_64 => 15, + .aarch64 => 6 * @sizeOf(u32), + else => 0, + }; + } + + pub inline fn entrySize(cpu_arch: std.Target.Cpu.Arch) usize { + return switch (cpu_arch) { + .x86_64 => 10, + .aarch64 => 3 * @sizeOf(u32), + else => 0, + }; + } + + pub fn size(stubs_helper: StubsHelperSection, macho_file: *MachO) usize { + const tracy = trace(@src()); + defer tracy.end(); + _ = stubs_helper; + const cpu_arch = macho_file.options.cpu_arch.?; + var s: usize = preambleSize(cpu_arch); + for (macho_file.stubs.symbols.items) |sym_index| { + const sym = macho_file.getSymbol(sym_index); + if ((sym.flags.import and !sym.flags.weak) or (!sym.flags.weak and sym.flags.interposable)) { + s += entrySize(cpu_arch); + } + } + return s; + } + + pub fn write(stubs_helper: StubsHelperSection, macho_file: *MachO, writer: anytype) !void { + const tracy = trace(@src()); + defer tracy.end(); + + try stubs_helper.writePreamble(macho_file, writer); + + const cpu_arch = macho_file.options.cpu_arch.?; + const sect = macho_file.sections.items(.header)[macho_file.stubs_helper_sect_index.?]; + const preamble_size = preambleSize(cpu_arch); + const entry_size = entrySize(cpu_arch); + + var idx: usize = 0; + for (macho_file.stubs.symbols.items) |sym_index| { + const sym = macho_file.getSymbol(sym_index); + if ((sym.flags.import and !sym.flags.weak) or (!sym.flags.weak and sym.flags.interposable)) { + const offset = macho_file.lazy_bind.offsets.items[idx]; + const source: i64 = @intCast(sect.addr + preamble_size + entry_size * idx); + const target: i64 = @intCast(sect.addr); + switch (cpu_arch) { + .x86_64 => { + try writer.writeByte(0x68); + try writer.writeInt(u32, offset, .little); + try writer.writeByte(0xe9); + try writer.writeInt(i32, @intCast(target - source - 6 - 4), .little); + }, + .aarch64 => { + const literal = blk: { + const div_res = try std.math.divExact(u64, entry_size - @sizeOf(u32), 4); + break :blk std.math.cast(u18, div_res) orelse return error.Overflow; + }; + try writer.writeInt(u32, aarch64.Instruction.ldrLiteral( + .w16, + literal, + ).toU32(), .little); + const disp = math.cast(i28, @as(i64, @intCast(target)) - @as(i64, @intCast(source + 4))) orelse + return error.Overflow; + try writer.writeInt(u32, aarch64.Instruction.b(disp).toU32(), .little); + try writer.writeAll(&.{ 0x0, 0x0, 0x0, 0x0 }); + }, + else => unreachable, + } + idx += 1; + } + } + } + + fn writePreamble(stubs_helper: StubsHelperSection, macho_file: *MachO, writer: anytype) !void { + _ = stubs_helper; + const cpu_arch = macho_file.options.cpu_arch.?; + const sect = macho_file.sections.items(.header)[macho_file.stubs_helper_sect_index.?]; + const dyld_private_addr = target: { + const sym = macho_file.getSymbol(macho_file.dyld_private_index.?); + break :target sym.getAddress(.{}, macho_file); + }; + const dyld_stub_binder_addr = target: { + const sym = macho_file.getSymbol(macho_file.dyld_stub_binder_index.?); + break :target sym.getGotAddress(macho_file); + }; + switch (cpu_arch) { + .x86_64 => { + try writer.writeAll(&.{ 0x4c, 0x8d, 0x1d }); + try writer.writeInt(i32, @intCast(dyld_private_addr - sect.addr - 3 - 4), .little); + try writer.writeAll(&.{ 0x41, 0x53, 0xff, 0x25 }); + try writer.writeInt(i32, @intCast(dyld_stub_binder_addr - sect.addr - 11 - 4), .little); + }, + .aarch64 => { + { + // TODO relax if possible + const pages = try Relocation.calcNumberOfPages(sect.addr, dyld_private_addr); + try writer.writeInt(u32, aarch64.Instruction.adrp(.x17, pages).toU32(), .little); + const off = try Relocation.calcPageOffset(dyld_private_addr, .arithmetic); + try writer.writeInt(u32, aarch64.Instruction.add(.x17, .x17, off, false).toU32(), .little); + } + try writer.writeInt(u32, aarch64.Instruction.stp( + .x16, + .x17, + aarch64.Register.sp, + aarch64.Instruction.LoadStorePairOffset.pre_index(-16), + ).toU32(), .little); + { + // TODO relax if possible + const pages = try Relocation.calcNumberOfPages(sect.addr + 12, dyld_stub_binder_addr); + try writer.writeInt(u32, aarch64.Instruction.adrp(.x16, pages).toU32(), .little); + const off = try Relocation.calcPageOffset(dyld_stub_binder_addr, .load_store_64); + try writer.writeInt(u32, aarch64.Instruction.ldr( + .x16, + .x16, + aarch64.Instruction.LoadStoreOffset.imm(off), + ).toU32(), .little); + } + try writer.writeInt(u32, aarch64.Instruction.br(.x16).toU32(), .little); + }, + else => unreachable, + } + } +}; + +pub const LaSymbolPtrSection = struct { + pub fn size(laptr: LaSymbolPtrSection, macho_file: *MachO) usize { + _ = laptr; + return macho_file.stubs.symbols.items.len * @sizeOf(u64); + } + + pub fn addDyldRelocs(laptr: LaSymbolPtrSection, macho_file: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + _ = laptr; + const gpa = macho_file.base.allocator; + + const sect = macho_file.sections.items(.header)[macho_file.la_symbol_ptr_sect_index.?]; + const seg_id = macho_file.sections.items(.segment_id)[macho_file.la_symbol_ptr_sect_index.?]; + const seg = macho_file.segments.items[seg_id]; + + for (macho_file.stubs.symbols.items, 0..) |sym_index, idx| { + const sym = macho_file.getSymbol(sym_index); + const addr = sect.addr + idx * @sizeOf(u64); + const entry = bind.Entry{ + .target = sym_index, + .offset = addr - seg.vmaddr, + .segment_id = seg_id, + .addend = 0, + }; + if (sym.flags.import) { + if (sym.flags.weak) { + try macho_file.bind.entries.append(gpa, entry); + try macho_file.weak_bind.entries.append(gpa, entry); + } else { + try macho_file.lazy_bind.entries.append(gpa, entry); + } + } else { + if (sym.flags.weak) { + try macho_file.rebase.entries.append(gpa, .{ + .offset = addr - seg.vmaddr, + .segment_id = seg_id, + }); + try macho_file.weak_bind.entries.append(gpa, entry); + } else if (sym.flags.interposable) { + try macho_file.lazy_bind.entries.append(gpa, entry); + } + } + } + } + + pub fn write(laptr: LaSymbolPtrSection, macho_file: *MachO, writer: anytype) !void { + const tracy = trace(@src()); + defer tracy.end(); + _ = laptr; + const cpu_arch = macho_file.options.cpu_arch.?; + const sect = macho_file.sections.items(.header)[macho_file.stubs_helper_sect_index.?]; + for (macho_file.stubs.symbols.items, 0..) |sym_index, idx| { + const sym = macho_file.getSymbol(sym_index); + const value: u64 = if (sym.flags.@"export") + sym.getAddress(.{ .stubs = false }, macho_file) + else if (sym.flags.weak) + @as(u64, 0) + else + sect.addr + StubsHelperSection.preambleSize(cpu_arch) + + StubsHelperSection.entrySize(cpu_arch) * idx; + try writer.writeInt(u64, @intCast(value), .little); + } + } +}; + +pub const TlvPtrSection = struct { + symbols: std.ArrayListUnmanaged(Symbol.Index) = .{}, + + pub const Index = u32; + + pub fn deinit(tlv: *TlvPtrSection, allocator: Allocator) void { + tlv.symbols.deinit(allocator); + } + + pub fn addSymbol(tlv: *TlvPtrSection, sym_index: Symbol.Index, macho_file: *MachO) !void { + const gpa = macho_file.base.allocator; + const index = @as(Index, @intCast(tlv.symbols.items.len)); + const entry = try tlv.symbols.addOne(gpa); + entry.* = sym_index; + const symbol = macho_file.getSymbol(sym_index); + try symbol.addExtra(.{ .tlv_ptr = index }, macho_file); + } + + pub fn getAddress(tlv: TlvPtrSection, index: Index, macho_file: *MachO) u64 { + assert(index < tlv.symbols.items.len); + const header = macho_file.sections.items(.header)[macho_file.tlv_ptr_sect_index.?]; + return header.addr + index * @sizeOf(u64) * 3; + } + + pub fn size(tlv: TlvPtrSection) usize { + return tlv.symbols.items.len * @sizeOf(u64); + } + + pub fn addDyldRelocs(tlv: TlvPtrSection, macho_file: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + const gpa = macho_file.base.allocator; + const seg_id = macho_file.sections.items(.segment_id)[macho_file.tlv_ptr_sect_index.?]; + const seg = macho_file.segments.items[seg_id]; + + for (tlv.symbols.items, 0..) |sym_index, idx| { + const sym = macho_file.getSymbol(sym_index); + const addr = tlv.getAddress(@intCast(idx), macho_file); + const entry = bind.Entry{ + .target = sym_index, + .offset = addr - seg.vmaddr, + .segment_id = seg_id, + .addend = 0, + }; + if (sym.flags.import) { + try macho_file.bind.entries.append(gpa, entry); + if (sym.flags.weak) { + try macho_file.weak_bind.entries.append(gpa, entry); + } + } else { + try macho_file.rebase.entries.append(gpa, .{ + .offset = addr - seg.vmaddr, + .segment_id = seg_id, + }); + if (sym.flags.weak) { + try macho_file.weak_bind.entries.append(gpa, entry); + } else if (sym.flags.interposable) { + try macho_file.bind.entries.append(gpa, entry); + } + } + } + } + + pub fn write(tlv: TlvPtrSection, macho_file: *MachO, writer: anytype) !void { + const tracy = trace(@src()); + defer tracy.end(); + + for (tlv.symbols.items) |sym_index| { + const sym = macho_file.getSymbol(sym_index); + if (sym.flags.import) { + try writer.writeInt(u64, 0, .little); + } else { + try writer.writeInt(u64, sym.getAddress(.{}, macho_file), .little); + } + } + } + + const FormatCtx = struct { + tlv: TlvPtrSection, + macho_file: *MachO, + }; + + pub fn fmt(tlv: TlvPtrSection, macho_file: *MachO) std.fmt.Formatter(format2) { + return .{ .data = .{ .tlv = tlv, .macho_file = macho_file } }; + } + + pub fn format2( + ctx: FormatCtx, + comptime unused_fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, + ) !void { + _ = options; + _ = unused_fmt_string; + for (ctx.tlv.symbols.items, 0..) |entry, i| { + const symbol = ctx.macho_file.getSymbol(entry); + try writer.print(" {d}@0x{x} => {d}@0x{x} ({s})\n", .{ + i, + symbol.getTlvPtrAddress(ctx.macho_file), + entry, + symbol.getAddress(.{}, ctx.macho_file), + symbol.getName(ctx.macho_file), + }); + } + } +}; + +pub const ObjcStubsSection = struct { + symbols: std.ArrayListUnmanaged(Symbol.Index) = .{}, + + pub fn deinit(objc: *ObjcStubsSection, allocator: Allocator) void { + objc.symbols.deinit(allocator); + } + + pub fn entrySize(cpu_arch: std.Target.Cpu.Arch) u8 { + return switch (cpu_arch) { + .x86_64 => 13, + .aarch64 => 8 * @sizeOf(u32), + else => unreachable, + }; + } + + pub fn addSymbol(objc: *ObjcStubsSection, sym_index: Symbol.Index, macho_file: *MachO) !void { + const gpa = macho_file.base.allocator; + const index = @as(Index, @intCast(objc.symbols.items.len)); + const entry = try objc.symbols.addOne(gpa); + entry.* = sym_index; + const symbol = macho_file.getSymbol(sym_index); + try symbol.addExtra(.{ .objc_stubs = index }, macho_file); + } + + pub fn getAddress(objc: ObjcStubsSection, index: Index, macho_file: *MachO) u64 { + assert(index < objc.symbols.items.len); + const header = macho_file.sections.items(.header)[macho_file.objc_stubs_sect_index.?]; + return header.addr + index * entrySize(macho_file.options.cpu_arch.?); + } + + pub fn size(objc: ObjcStubsSection, macho_file: *MachO) usize { + return objc.symbols.items.len * entrySize(macho_file.options.cpu_arch.?); + } + + pub fn write(objc: ObjcStubsSection, macho_file: *MachO, writer: anytype) !void { + const tracy = trace(@src()); + defer tracy.end(); + + for (objc.symbols.items, 0..) |sym_index, idx| { + const sym = macho_file.getSymbol(sym_index); + const addr = objc.getAddress(@intCast(idx), macho_file); + switch (macho_file.options.cpu_arch.?) { + .x86_64 => { + try writer.writeAll(&.{ 0x48, 0x8b, 0x35 }); + { + const target = sym.getObjcSelrefsAddress(macho_file); + const source = addr; + try writer.writeInt(i32, @intCast(target - source - 3 - 4), .little); + } + try writer.writeAll(&.{ 0xff, 0x25 }); + { + const target_sym = macho_file.getSymbol(macho_file.objc_msg_send_index.?); + const target = target_sym.getGotAddress(macho_file); + const source = addr + 7; + try writer.writeInt(i32, @intCast(target - source - 2 - 4), .little); + } + }, + .aarch64 => { + { + const target = sym.getObjcSelrefsAddress(macho_file); + const source = addr; + const pages = try Relocation.calcNumberOfPages(source, target); + try writer.writeInt(u32, aarch64.Instruction.adrp(.x1, pages).toU32(), .little); + const off = try Relocation.calcPageOffset(target, .load_store_64); + try writer.writeInt( + u32, + aarch64.Instruction.ldr(.x1, .x1, aarch64.Instruction.LoadStoreOffset.imm(off)).toU32(), + .little, + ); + } + { + const target_sym = macho_file.getSymbol(macho_file.objc_msg_send_index.?); + const target = target_sym.getGotAddress(macho_file); + const source = addr + 2 * @sizeOf(u32); + const pages = try Relocation.calcNumberOfPages(source, target); + try writer.writeInt(u32, aarch64.Instruction.adrp(.x16, pages).toU32(), .little); + const off = try Relocation.calcPageOffset(target, .load_store_64); + try writer.writeInt( + u32, + aarch64.Instruction.ldr(.x16, .x16, aarch64.Instruction.LoadStoreOffset.imm(off)).toU32(), + .little, + ); + } + try writer.writeInt(u32, aarch64.Instruction.br(.x16).toU32(), .little); + try writer.writeInt(u32, aarch64.Instruction.brk(1).toU32(), .little); + try writer.writeInt(u32, aarch64.Instruction.brk(1).toU32(), .little); + try writer.writeInt(u32, aarch64.Instruction.brk(1).toU32(), .little); + }, + else => unreachable, + } + } + } + + const FormatCtx = struct { + objc: ObjcStubsSection, + macho_file: *MachO, + }; + + pub fn fmt(objc: ObjcStubsSection, macho_file: *MachO) std.fmt.Formatter(format2) { + return .{ .data = .{ .objc = objc, .macho_file = macho_file } }; + } + + pub fn format2( + ctx: FormatCtx, + comptime unused_fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, + ) !void { + _ = options; + _ = unused_fmt_string; + for (ctx.objc.symbols.items, 0..) |entry, i| { + const symbol = ctx.macho_file.getSymbol(entry); + try writer.print(" {d}@0x{x} => {d}@0x{x} ({s})\n", .{ + i, + symbol.getObjcStubsAddress(ctx.macho_file), + entry, + symbol.getAddress(.{}, ctx.macho_file), + symbol.getName(ctx.macho_file), + }); + } + } + + pub const Index = u32; +}; + +pub const Indsymtab = struct { + pub inline fn nsyms(ind: Indsymtab, macho_file: *MachO) u32 { + _ = ind; + return @intCast(macho_file.stubs.symbols.items.len * 2 + macho_file.got.symbols.items.len); + } + + pub fn write(ind: Indsymtab, macho_file: *MachO, writer: anytype) !void { + const tracy = trace(@src()); + defer tracy.end(); + + _ = ind; + + for (macho_file.stubs.symbols.items) |sym_index| { + const sym = macho_file.getSymbol(sym_index); + try writer.writeInt(u32, sym.getOutputSymtabIndex(macho_file).?, .little); + } + + for (macho_file.got.symbols.items) |sym_index| { + const sym = macho_file.getSymbol(sym_index); + try writer.writeInt(u32, sym.getOutputSymtabIndex(macho_file).?, .little); + } + + for (macho_file.stubs.symbols.items) |sym_index| { + const sym = macho_file.getSymbol(sym_index); + try writer.writeInt(u32, sym.getOutputSymtabIndex(macho_file).?, .little); + } + } +}; + +pub const RebaseSection = Rebase; +pub const BindSection = bind.Bind; +pub const WeakBindSection = bind.WeakBind; +pub const LazyBindSection = bind.LazyBind; +pub const ExportTrieSection = Trie; + +const aarch64 = @import("../aarch64.zig"); +const assert = std.debug.assert; +const bind = @import("dyld_info/bind.zig"); +const math = std.math; +const std = @import("std"); +const trace = @import("../tracy.zig").trace; + +const Allocator = std.mem.Allocator; +const MachO = @import("../MachO.zig"); +const Rebase = @import("dyld_info/Rebase.zig"); +const Relocation = @import("Relocation.zig"); +const Symbol = @import("Symbol.zig"); +const Trie = @import("dyld_info/Trie.zig"); diff --git a/src/link/MachO/thunks.zig b/src/link/MachO/thunks.zig index f080de7f80..6593fb6a1b 100644 --- a/src/link/MachO/thunks.zig +++ b/src/link/MachO/thunks.zig @@ -1,13 +1,157 @@ -//! An algorithm for allocating output machine code section (aka `__TEXT,__text`), -//! and insertion of range extending thunks. As such, this algorithm is only run -//! for a target that requires range extenders such as arm64. -//! -//! The algorithm works pessimistically and assumes that any reference to an Atom in -//! another output section is out of range. +pub fn createThunks(sect_id: u8, macho_file: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); -/// Branch instruction has 26 bits immediate but 4 byte aligned. + const gpa = macho_file.base.allocator; + const slice = macho_file.sections.slice(); + const header = &slice.items(.header)[sect_id]; + const atoms = slice.items(.atoms)[sect_id].items; + assert(atoms.len > 0); + + for (atoms) |atom_index| { + macho_file.getAtom(atom_index).?.value = @bitCast(@as(i64, -1)); + } + + var i: usize = 0; + while (i < atoms.len) { + const start = i; + const start_atom = macho_file.getAtom(atoms[start]).?; + assert(start_atom.flags.alive); + start_atom.value = try advance(header, start_atom.size, start_atom.alignment); + i += 1; + + while (i < atoms.len and + header.size - start_atom.value < max_allowed_distance) : (i += 1) + { + const atom_index = atoms[i]; + const atom = macho_file.getAtom(atom_index).?; + assert(atom.flags.alive); + atom.value = try advance(header, atom.size, atom.alignment); + } + + // Insert a thunk at the group end + const thunk_index = try macho_file.addThunk(); + const thunk = macho_file.getThunk(thunk_index); + thunk.out_n_sect = sect_id; + + // Scan relocs in the group and create trampolines for any unreachable callsite + for (atoms[start..i]) |atom_index| { + const atom = macho_file.getAtom(atom_index).?; + log.debug("atom({d}) {s}", .{ atom_index, atom.getName(macho_file) }); + for (atom.getRelocs(macho_file)) |rel| { + if (rel.type != .branch) continue; + if (isReachable(atom, rel, macho_file)) continue; + try thunk.symbols.put(gpa, rel.target, {}); + } + atom.thunk_index = thunk_index; + } + + thunk.value = try advance(header, thunk.size(), 2); + + log.debug("thunk({d}) : {}", .{ thunk_index, thunk.fmt(macho_file) }); + } +} + +fn advance(sect: *macho.section_64, size: u64, pow2_align: u32) !u64 { + const alignment = try math.powi(u32, 2, pow2_align); + const offset = mem.alignForward(u64, sect.size, alignment); + const padding = offset - sect.size; + sect.size += padding + size; + sect.@"align" = @max(sect.@"align", pow2_align); + return offset; +} + +fn isReachable(atom: *const Atom, rel: Relocation, macho_file: *MachO) bool { + const target = rel.getTargetSymbol(macho_file); + if (target.flags.stubs or target.flags.objc_stubs) return false; + if (atom.out_n_sect != target.out_n_sect) return false; + const target_atom = target.getAtom(macho_file).?; + if (target_atom.value == @as(u64, @bitCast(@as(i64, -1)))) return false; + const saddr = @as(i64, @intCast(atom.value)) + @as(i64, @intCast(rel.offset - atom.off)); + const taddr: i64 = @intCast(rel.getTargetAddress(macho_file)); + _ = math.cast(i28, taddr + rel.addend - saddr) orelse return false; + return true; +} + +pub const Thunk = struct { + value: u64 = 0, + out_n_sect: u8 = 0, + symbols: std.AutoArrayHashMapUnmanaged(Symbol.Index, void) = .{}, + + pub fn deinit(thunk: *Thunk, allocator: Allocator) void { + thunk.symbols.deinit(allocator); + } + + pub fn size(thunk: Thunk) usize { + return thunk.symbols.keys().len * trampoline_size; + } + + pub fn getAddress(thunk: Thunk, sym_index: Symbol.Index) u64 { + return thunk.value + thunk.symbols.getIndex(sym_index).? * trampoline_size; + } + + pub fn write(thunk: Thunk, macho_file: *MachO, writer: anytype) !void { + for (thunk.symbols.keys(), 0..) |sym_index, i| { + const sym = macho_file.getSymbol(sym_index); + const saddr = thunk.value + i * trampoline_size; + const taddr = sym.getAddress(.{}, macho_file); + const pages = try Relocation.calcNumberOfPages(saddr, taddr); + try writer.writeInt(u32, aarch64.Instruction.adrp(.x16, pages).toU32(), .little); + const off = try Relocation.calcPageOffset(taddr, .arithmetic); + try writer.writeInt(u32, aarch64.Instruction.add(.x16, .x16, off, false).toU32(), .little); + try writer.writeInt(u32, aarch64.Instruction.br(.x16).toU32(), .little); + } + } + + pub fn format( + thunk: Thunk, + comptime unused_fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, + ) !void { + _ = thunk; + _ = unused_fmt_string; + _ = options; + _ = writer; + @compileError("do not format Thunk directly"); + } + + pub fn fmt(thunk: Thunk, macho_file: *MachO) std.fmt.Formatter(format2) { + return .{ .data = .{ + .thunk = thunk, + .macho_file = macho_file, + } }; + } + + const FormatContext = struct { + thunk: Thunk, + macho_file: *MachO, + }; + + fn format2( + ctx: FormatContext, + comptime unused_fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, + ) !void { + _ = options; + _ = unused_fmt_string; + const thunk = ctx.thunk; + const macho_file = ctx.macho_file; + try writer.print("@{x} : size({x})\n", .{ thunk.value, thunk.size() }); + for (thunk.symbols.keys()) |index| { + const sym = macho_file.getSymbol(index); + try writer.print(" %{d} : {s} : @{x}\n", .{ index, sym.getName(macho_file), sym.value }); + } + } + + const trampoline_size = 3 * @sizeOf(u32); + + pub const Index = u32; +}; + +/// Branch instruction has 26 bits immediate but is 4 byte aligned. const jump_bits = @bitSizeOf(i28); - const max_distance = (1 << (jump_bits - 1)); /// A branch will need an extender if its target is larger than @@ -16,359 +160,17 @@ const max_distance = (1 << (jump_bits - 1)); /// and assume margin to be 5MiB. const max_allowed_distance = max_distance - 0x500_000; -pub const Thunk = struct { - start_index: Atom.Index, - len: u32, - - targets: std.MultiArrayList(Target) = .{}, - lookup: std.AutoHashMapUnmanaged(Target, u32) = .{}, - - pub const Tag = enum { - stub, - atom, - }; - - pub const Target = struct { - tag: Tag, - target: SymbolWithLoc, - }; - - pub const Index = u32; - - pub fn deinit(self: *Thunk, gpa: Allocator) void { - self.targets.deinit(gpa); - self.lookup.deinit(gpa); - } - - pub fn getStartAtomIndex(self: Thunk) Atom.Index { - assert(self.len != 0); - return self.start_index; - } - - pub fn getEndAtomIndex(self: Thunk) Atom.Index { - assert(self.len != 0); - return self.start_index + self.len - 1; - } - - pub fn getSize(self: Thunk) u64 { - return 12 * self.len; - } - - pub fn getAlignment() u32 { - return @alignOf(u32); - } - - pub fn getTrampoline(self: Thunk, macho_file: *MachO, tag: Tag, target: SymbolWithLoc) ?SymbolWithLoc { - const atom_index = self.lookup.get(.{ .tag = tag, .target = target }) orelse return null; - return macho_file.getAtom(atom_index).getSymbolWithLoc(); - } -}; - -pub fn createThunks(macho_file: *MachO, sect_id: u8) !void { - const header = &macho_file.sections.items(.header)[sect_id]; - if (header.size == 0) return; - - const comp = macho_file.base.comp; - const gpa = comp.gpa; - const first_atom_index = macho_file.sections.items(.first_atom_index)[sect_id].?; - - header.size = 0; - header.@"align" = 0; - - var atom_count: u32 = 0; - - { - var atom_index = first_atom_index; - while (true) { - const atom = macho_file.getAtom(atom_index); - const sym = macho_file.getSymbolPtr(atom.getSymbolWithLoc()); - sym.n_value = 0; - atom_count += 1; - - if (atom.next_index) |next_index| { - atom_index = next_index; - } else break; - } - } - - var allocated = std.AutoHashMap(Atom.Index, void).init(gpa); - defer allocated.deinit(); - try allocated.ensureTotalCapacity(atom_count); - - var group_start = first_atom_index; - var group_end = first_atom_index; - var offset: u64 = 0; - - while (true) { - const group_start_atom = macho_file.getAtom(group_start); - log.debug("GROUP START at {d}", .{group_start}); - - while (true) { - const atom = macho_file.getAtom(group_end); - offset = atom.alignment.forward(offset); - - const sym = macho_file.getSymbolPtr(atom.getSymbolWithLoc()); - sym.n_value = offset; - offset += atom.size; - - macho_file.logAtom(group_end, log); - - header.@"align" = @max(header.@"align", atom.alignment.toLog2Units()); - - allocated.putAssumeCapacityNoClobber(group_end, {}); - - const group_start_sym = macho_file.getSymbol(group_start_atom.getSymbolWithLoc()); - if (offset - group_start_sym.n_value >= max_allowed_distance) break; - - if (atom.next_index) |next_index| { - group_end = next_index; - } else break; - } - log.debug("GROUP END at {d}", .{group_end}); - - // Insert thunk at group_end - const thunk_index = @as(u32, @intCast(macho_file.thunks.items.len)); - try macho_file.thunks.append(gpa, .{ .start_index = undefined, .len = 0 }); - - // Scan relocs in the group and create trampolines for any unreachable callsite. - var atom_index = group_start; - while (true) { - const atom = macho_file.getAtom(atom_index); - try scanRelocs( - macho_file, - atom_index, - allocated, - thunk_index, - group_end, - ); - - if (atom_index == group_end) break; - - if (atom.next_index) |next_index| { - atom_index = next_index; - } else break; - } - - offset = mem.alignForward(u64, offset, Thunk.getAlignment()); - allocateThunk(macho_file, thunk_index, offset, header); - offset += macho_file.thunks.items[thunk_index].getSize(); - - const thunk = macho_file.thunks.items[thunk_index]; - if (thunk.len == 0) { - const group_end_atom = macho_file.getAtom(group_end); - if (group_end_atom.next_index) |next_index| { - group_start = next_index; - group_end = next_index; - } else break; - } else { - const thunk_end_atom_index = thunk.getEndAtomIndex(); - const thunk_end_atom = macho_file.getAtom(thunk_end_atom_index); - if (thunk_end_atom.next_index) |next_index| { - group_start = next_index; - group_end = next_index; - } else break; - } - } - - header.size = @as(u32, @intCast(offset)); -} - -fn allocateThunk( - macho_file: *MachO, - thunk_index: Thunk.Index, - base_offset: u64, - header: *macho.section_64, -) void { - const thunk = macho_file.thunks.items[thunk_index]; - if (thunk.len == 0) return; - - const first_atom_index = thunk.getStartAtomIndex(); - const end_atom_index = thunk.getEndAtomIndex(); - - var atom_index = first_atom_index; - var offset = base_offset; - while (true) { - const atom = macho_file.getAtom(atom_index); - offset = mem.alignForward(u64, offset, Thunk.getAlignment()); - - const sym = macho_file.getSymbolPtr(atom.getSymbolWithLoc()); - sym.n_value = offset; - offset += atom.size; - - macho_file.logAtom(atom_index, log); - - header.@"align" = @max(header.@"align", atom.alignment.toLog2Units()); - - if (end_atom_index == atom_index) break; - - if (atom.next_index) |next_index| { - atom_index = next_index; - } else break; - } -} - -fn scanRelocs( - macho_file: *MachO, - atom_index: Atom.Index, - allocated: std.AutoHashMap(Atom.Index, void), - thunk_index: Thunk.Index, - group_end: Atom.Index, -) !void { - const atom = macho_file.getAtom(atom_index); - const object = macho_file.objects.items[atom.getFile().?]; - - const base_offset = if (object.getSourceSymbol(atom.sym_index)) |source_sym| blk: { - const source_sect = object.getSourceSection(source_sym.n_sect - 1); - break :blk @as(i32, @intCast(source_sym.n_value - source_sect.addr)); - } else 0; - - const code = Atom.getAtomCode(macho_file, atom_index); - const relocs = Atom.getAtomRelocs(macho_file, atom_index); - const ctx = Atom.getRelocContext(macho_file, atom_index); - - for (relocs) |rel| { - if (!relocNeedsThunk(rel)) continue; - - const target = Atom.parseRelocTarget(macho_file, .{ - .object_id = atom.getFile().?, - .rel = rel, - .code = code, - .base_offset = ctx.base_offset, - .base_addr = ctx.base_addr, - }); - if (isReachable(macho_file, atom_index, rel, base_offset, target, allocated)) continue; - - log.debug("{x}: source = {s}@{x}, target = {s}@{x} unreachable", .{ - rel.r_address - base_offset, - macho_file.getSymbolName(atom.getSymbolWithLoc()), - macho_file.getSymbol(atom.getSymbolWithLoc()).n_value, - macho_file.getSymbolName(target), - macho_file.getSymbol(target).n_value, - }); - - const comp = macho_file.base.comp; - const gpa = comp.gpa; - const target_sym = macho_file.getSymbol(target); - const thunk = &macho_file.thunks.items[thunk_index]; - - const tag: Thunk.Tag = if (target_sym.undf()) .stub else .atom; - const thunk_target: Thunk.Target = .{ .tag = tag, .target = target }; - const gop = try thunk.lookup.getOrPut(gpa, thunk_target); - if (!gop.found_existing) { - gop.value_ptr.* = try pushThunkAtom(macho_file, thunk, group_end); - try thunk.targets.append(gpa, thunk_target); - } - - try macho_file.thunk_table.put(gpa, atom_index, thunk_index); - } -} - -fn pushThunkAtom(macho_file: *MachO, thunk: *Thunk, group_end: Atom.Index) !Atom.Index { - const thunk_atom_index = try createThunkAtom(macho_file); - - const thunk_atom = macho_file.getAtomPtr(thunk_atom_index); - const end_atom_index = if (thunk.len == 0) group_end else thunk.getEndAtomIndex(); - const end_atom = macho_file.getAtomPtr(end_atom_index); - - if (end_atom.next_index) |first_after_index| { - const first_after_atom = macho_file.getAtomPtr(first_after_index); - first_after_atom.prev_index = thunk_atom_index; - thunk_atom.next_index = first_after_index; - } - - end_atom.next_index = thunk_atom_index; - thunk_atom.prev_index = end_atom_index; - - if (thunk.len == 0) { - thunk.start_index = thunk_atom_index; - } - - thunk.len += 1; - - return thunk_atom_index; -} - -inline fn relocNeedsThunk(rel: macho.relocation_info) bool { - const rel_type = @as(macho.reloc_type_arm64, @enumFromInt(rel.r_type)); - return rel_type == .ARM64_RELOC_BRANCH26; -} - -fn isReachable( - macho_file: *MachO, - atom_index: Atom.Index, - rel: macho.relocation_info, - base_offset: i32, - target: SymbolWithLoc, - allocated: std.AutoHashMap(Atom.Index, void), -) bool { - if (macho_file.stub_table.lookup.contains(target)) return false; - - const source_atom = macho_file.getAtom(atom_index); - const source_sym = macho_file.getSymbol(source_atom.getSymbolWithLoc()); - - const target_object = macho_file.objects.items[target.getFile().?]; - const target_atom_index = target_object.getAtomIndexForSymbol(target.sym_index).?; - const target_atom = macho_file.getAtom(target_atom_index); - const target_sym = macho_file.getSymbol(target_atom.getSymbolWithLoc()); - - if (source_sym.n_sect != target_sym.n_sect) return false; - - if (!allocated.contains(target_atom_index)) return false; - - const source_addr = source_sym.n_value + @as(u32, @intCast(rel.r_address - base_offset)); - const target_addr = if (Atom.relocRequiresGot(macho_file, rel)) - macho_file.getGotEntryAddress(target).? - else - Atom.getRelocTargetAddress(macho_file, target, false); - _ = Relocation.calcPcRelativeDisplacementArm64(source_addr, target_addr) catch - return false; - - return true; -} - -fn createThunkAtom(macho_file: *MachO) !Atom.Index { - const sym_index = try macho_file.allocateSymbol(); - const atom_index = try macho_file.createAtom(sym_index, .{ - .size = @sizeOf(u32) * 3, - .alignment = .@"4", - }); - const sym = macho_file.getSymbolPtr(.{ .sym_index = sym_index }); - sym.n_type = macho.N_SECT; - sym.n_sect = macho_file.text_section_index.? + 1; - return atom_index; -} - -pub fn writeThunkCode(macho_file: *MachO, thunk: *const Thunk, writer: anytype) !void { - const slice = thunk.targets.slice(); - for (thunk.getStartAtomIndex()..thunk.getEndAtomIndex(), 0..) |atom_index, target_index| { - const atom = macho_file.getAtom(@intCast(atom_index)); - const sym = macho_file.getSymbol(atom.getSymbolWithLoc()); - const source_addr = sym.n_value; - const tag = slice.items(.tag)[target_index]; - const target = slice.items(.target)[target_index]; - const target_addr = switch (tag) { - .stub => macho_file.getStubsEntryAddress(target).?, - .atom => macho_file.getSymbol(target).n_value, - }; - const pages = Relocation.calcNumberOfPages(source_addr, target_addr); - try writer.writeInt(u32, aarch64.Instruction.adrp(.x16, pages).toU32(), .little); - const off = try Relocation.calcPageOffset(target_addr, .arithmetic); - try writer.writeInt(u32, aarch64.Instruction.add(.x16, .x16, off, false).toU32(), .little); - try writer.writeInt(u32, aarch64.Instruction.br(.x16).toU32(), .little); - } -} - -const std = @import("std"); +const aarch64 = @import("../aarch64.zig"); const assert = std.debug.assert; -const log = std.log.scoped(.thunks); +const log = std.log.scoped(.link); const macho = std.macho; const math = std.math; const mem = std.mem; - -const aarch64 = @import("../../arch/aarch64/bits.zig"); +const std = @import("std"); +const trace = @import("../tracy.zig").trace; const Allocator = mem.Allocator; const Atom = @import("Atom.zig"); const MachO = @import("../MachO.zig"); const Relocation = @import("Relocation.zig"); -const SymbolWithLoc = MachO.SymbolWithLoc; +const Symbol = @import("Symbol.zig"); diff --git a/src/link/MachO/uuid.zig b/src/link/MachO/uuid.zig index 6d60397149..0248acc091 100644 --- a/src/link/MachO/uuid.zig +++ b/src/link/MachO/uuid.zig @@ -4,22 +4,31 @@ /// and we will use it too as it seems accepted by Apple OSes. /// TODO LLD also hashes the output filename to disambiguate between same builds with different /// output files. Should we also do that? -pub fn calcUuid(comp: *const Compilation, file: fs.File, file_size: u64, out: *[Md5.digest_length]u8) !void { +pub fn calcUuid( + allocator: Allocator, + thread_pool: *ThreadPool, + file: fs.File, + file_size: u64, + out: *[Md5.digest_length]u8, +) !void { + const tracy = trace(@src()); + defer tracy.end(); + const chunk_size: usize = 1024 * 1024; const num_chunks: usize = std.math.cast(usize, @divTrunc(file_size, chunk_size)) orelse return error.Overflow; const actual_num_chunks = if (@rem(file_size, chunk_size) > 0) num_chunks + 1 else num_chunks; - const hashes = try comp.gpa.alloc([Md5.digest_length]u8, actual_num_chunks); - defer comp.gpa.free(hashes); + const hashes = try allocator.alloc([Md5.digest_length]u8, actual_num_chunks); + defer allocator.free(hashes); - var hasher = Hasher(Md5){ .allocator = comp.gpa, .thread_pool = comp.thread_pool }; + var hasher = Hasher(Md5){ .allocator = allocator, .thread_pool = thread_pool }; try hasher.hash(file, hashes, .{ .chunk_size = chunk_size, .max_file_size = file_size, }); - const final_buffer = try comp.gpa.alloc(u8, actual_num_chunks * Md5.digest_length); - defer comp.gpa.free(final_buffer); + const final_buffer = try allocator.alloc(u8, actual_num_chunks * Md5.digest_length); + defer allocator.free(final_buffer); for (hashes, 0..) |hash, i| { @memcpy(final_buffer[i * Md5.digest_length ..][0..Md5.digest_length], &hash); @@ -35,11 +44,12 @@ inline fn conform(out: *[Md5.digest_length]u8) void { out[8] = (out[8] & 0x3F) | 0x80; } -const std = @import("std"); const fs = std.fs; const mem = std.mem; +const std = @import("std"); +const trace = @import("../tracy.zig").trace; const Allocator = mem.Allocator; -const Compilation = @import("../../Compilation.zig"); const Md5 = std.crypto.hash.Md5; const Hasher = @import("hasher.zig").ParallelHasher; +const ThreadPool = std.Thread.Pool; diff --git a/src/link/MachO/zld.zig b/src/link/MachO/zld.zig deleted file mode 100644 index 57681dd935..0000000000 --- a/src/link/MachO/zld.zig +++ /dev/null @@ -1,1230 +0,0 @@ -pub fn linkWithZld( - macho_file: *MachO, - arena: Allocator, - prog_node: *std.Progress.Node, -) link.File.FlushError!void { - const tracy = trace(@src()); - defer tracy.end(); - - const comp = macho_file.base.comp; - const gpa = comp.gpa; - const target = comp.root_mod.resolved_target.result; - const emit = macho_file.base.emit; - - const directory = emit.directory; // Just an alias to make it shorter to type. - const full_out_path = try directory.join(arena, &[_][]const u8{emit.sub_path}); - const opt_zcu = comp.module; - - // If there is no Zig code to compile, then we should skip flushing the output file because it - // will not be part of the linker line anyway. - const module_obj_path: ?[]const u8 = if (opt_zcu != null) blk: { - try macho_file.flushModule(arena, prog_node); - - if (fs.path.dirname(full_out_path)) |dirname| { - break :blk try fs.path.join(arena, &.{ dirname, macho_file.base.zcu_object_sub_path.? }); - } else { - break :blk macho_file.base.zcu_object_sub_path.?; - } - } else null; - - var sub_prog_node = prog_node.start("MachO Flush", 0); - sub_prog_node.activate(); - sub_prog_node.context.refresh(); - defer sub_prog_node.end(); - - const output_mode = comp.config.output_mode; - const link_mode = comp.config.link_mode; - const cpu_arch = target.cpu.arch; - const is_lib = output_mode == .Lib; - const is_dyn_lib = link_mode == .Dynamic and is_lib; - const is_exe_or_dyn_lib = is_dyn_lib or output_mode == .Exe; - const stack_size = macho_file.base.stack_size; - - const id_symlink_basename = "zld.id"; - - var man: Cache.Manifest = undefined; - defer if (!macho_file.base.disable_lld_caching) man.deinit(); - - var digest: [Cache.hex_digest_len]u8 = undefined; - - const objects = comp.objects; - - if (!macho_file.base.disable_lld_caching) { - man = comp.cache_parent.obtain(); - - // We are about to obtain this lock, so here we give other processes a chance first. - macho_file.base.releaseLock(); - - comptime assert(Compilation.link_hash_implementation_version == 11); - - for (objects) |obj| { - _ = try man.addFile(obj.path, null); - man.hash.add(obj.must_link); - } - for (comp.c_object_table.keys()) |key| { - _ = try man.addFile(key.status.success.object_path, null); - } - try man.addOptionalFile(module_obj_path); - // We can skip hashing libc and libc++ components that we are in charge of building from Zig - // installation sources because they are always a product of the compiler version + target information. - man.hash.add(stack_size); - man.hash.add(macho_file.pagezero_vmsize); - man.hash.add(macho_file.headerpad_size); - man.hash.add(macho_file.headerpad_max_install_names); - man.hash.add(macho_file.base.gc_sections); - man.hash.add(macho_file.dead_strip_dylibs); - man.hash.add(comp.root_mod.strip); - try MachO.hashAddFrameworks(&man, macho_file.frameworks); - man.hash.addListOfBytes(macho_file.base.rpath_list); - if (is_dyn_lib) { - man.hash.addOptionalBytes(macho_file.install_name); - man.hash.addOptional(comp.version); - } - try link.hashAddSystemLibs(&man, comp.system_libs); - man.hash.addOptionalBytes(comp.sysroot); - man.hash.addListOfBytes(comp.force_undefined_symbols.keys()); - try man.addOptionalFile(macho_file.entitlements); - - // We don't actually care whether it's a cache hit or miss; we just - // need the digest and the lock. - _ = try man.hit(); - digest = man.final(); - - var prev_digest_buf: [digest.len]u8 = undefined; - const prev_digest: []u8 = Cache.readSmallFile( - directory.handle, - id_symlink_basename, - &prev_digest_buf, - ) catch |err| blk: { - log.debug("MachO Zld new_digest={s} error: {s}", .{ - std.fmt.fmtSliceHexLower(&digest), - @errorName(err), - }); - // Handle this as a cache miss. - break :blk prev_digest_buf[0..0]; - }; - if (mem.eql(u8, prev_digest, &digest)) { - // Hot diggity dog! The output binary is already there. - log.debug("MachO Zld digest={s} match - skipping invocation", .{ - std.fmt.fmtSliceHexLower(&digest), - }); - macho_file.base.lock = man.toOwnedLock(); - return; - } - log.debug("MachO Zld prev_digest={s} new_digest={s}", .{ - std.fmt.fmtSliceHexLower(prev_digest), - std.fmt.fmtSliceHexLower(&digest), - }); - - // We are about to change the output file to be different, so we invalidate the build hash now. - directory.handle.deleteFile(id_symlink_basename) catch |err| switch (err) { - error.FileNotFound => {}, - else => |e| return e, - }; - } - - if (output_mode == .Obj) { - // LLD's MachO driver does not support the equivalent of `-r` so we do a simple file copy - // here. TODO: think carefully about how we can avoid this redundant operation when doing - // build-obj. See also the corresponding TODO in linkAsArchive. - const the_object_path = blk: { - if (objects.len != 0) { - break :blk objects[0].path; - } - - if (comp.c_object_table.count() != 0) - break :blk comp.c_object_table.keys()[0].status.success.object_path; - - if (module_obj_path) |p| - break :blk p; - - // TODO I think this is unreachable. Audit this situation when solving the above TODO - // regarding eliding redundant object -> object transformations. - return error.NoObjectsToLink; - }; - // This can happen when using --enable-cache and using the stage1 backend. In this case - // we can skip the file copy. - if (!mem.eql(u8, the_object_path, full_out_path)) { - try fs.cwd().copyFile(the_object_path, fs.cwd(), full_out_path, .{}); - } - } else { - const sub_path = emit.sub_path; - - const old_file = macho_file.base.file; // TODO is this needed at all? - defer macho_file.base.file = old_file; - - const file = try directory.handle.createFile(sub_path, .{ - .truncate = true, - .read = true, - .mode = link.File.determineMode(false, output_mode, link_mode), - }); - defer file.close(); - macho_file.base.file = file; - - // Index 0 is always a null symbol. - try macho_file.locals.append(gpa, .{ - .n_strx = 0, - .n_type = 0, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }); - try macho_file.strtab.buffer.append(gpa, 0); - - // Positional arguments to the linker such as object files and static archives. - var positionals = std.ArrayList(Compilation.LinkObject).init(arena); - try positionals.ensureUnusedCapacity(objects.len); - positionals.appendSliceAssumeCapacity(objects); - - for (comp.c_object_table.keys()) |key| { - try positionals.append(.{ .path = key.status.success.object_path }); - } - - if (module_obj_path) |p| { - try positionals.append(.{ .path = p }); - } - - if (comp.compiler_rt_lib) |lib| try positionals.append(.{ .path = lib.full_object_path }); - if (comp.compiler_rt_obj) |obj| try positionals.append(.{ .path = obj.full_object_path }); - - // libc++ dep - if (comp.config.link_libcpp) { - try positionals.ensureUnusedCapacity(2); - positionals.appendAssumeCapacity(.{ .path = comp.libcxxabi_static_lib.?.full_object_path }); - positionals.appendAssumeCapacity(.{ .path = comp.libcxx_static_lib.?.full_object_path }); - } - - var libs = std.StringArrayHashMap(link.SystemLib).init(arena); - - { - const vals = comp.system_libs.values(); - try libs.ensureUnusedCapacity(vals.len); - for (vals) |v| libs.putAssumeCapacity(v.path.?, v); - } - - { - try libs.ensureUnusedCapacity(macho_file.frameworks.len); - for (macho_file.frameworks) |v| libs.putAssumeCapacity(v.path, .{ - .needed = v.needed, - .weak = v.weak, - .path = v.path, - }); - } - - try macho_file.resolveLibSystem(arena, comp, &libs); - - if (comp.verbose_link) { - var argv = std.ArrayList([]const u8).init(arena); - - try argv.append("zig"); - try argv.append("ld"); - - if (is_exe_or_dyn_lib) { - try argv.append("-dynamic"); - } - - if (is_dyn_lib) { - try argv.append("-dylib"); - - if (macho_file.install_name) |install_name| { - try argv.append("-install_name"); - try argv.append(install_name); - } - } - - { - const platform = Platform.fromTarget(target); - try argv.append("-platform_version"); - try argv.append(@tagName(platform.os_tag)); - try argv.append(try std.fmt.allocPrint(arena, "{}", .{platform.version})); - - const sdk_version: ?std.SemanticVersion = load_commands.inferSdkVersion(macho_file); - if (sdk_version) |ver| { - try argv.append(try std.fmt.allocPrint(arena, "{d}.{d}", .{ ver.major, ver.minor })); - } else { - try argv.append(try std.fmt.allocPrint(arena, "{}", .{platform.version})); - } - } - - if (comp.sysroot) |syslibroot| { - try argv.append("-syslibroot"); - try argv.append(syslibroot); - } - - for (macho_file.base.rpath_list) |rpath| { - try argv.append("-rpath"); - try argv.append(rpath); - } - - try argv.appendSlice(&.{ - "-pagezero_size", try std.fmt.allocPrint(arena, "0x{x}", .{macho_file.pagezero_vmsize}), - "-headerpad_size", try std.fmt.allocPrint(arena, "0x{x}", .{macho_file.headerpad_size}), - }); - - if (macho_file.headerpad_max_install_names) { - try argv.append("-headerpad_max_install_names"); - } - - if (macho_file.base.gc_sections) { - try argv.append("-dead_strip"); - } - - if (macho_file.dead_strip_dylibs) { - try argv.append("-dead_strip_dylibs"); - } - - if (macho_file.entry_name) |entry_name| { - try argv.appendSlice(&.{ "-e", entry_name }); - } - - for (objects) |obj| { - if (obj.must_link) { - try argv.append("-force_load"); - } - try argv.append(obj.path); - } - - for (comp.c_object_table.keys()) |key| { - try argv.append(key.status.success.object_path); - } - - if (module_obj_path) |p| { - try argv.append(p); - } - - if (comp.compiler_rt_lib) |lib| try argv.append(lib.full_object_path); - if (comp.compiler_rt_obj) |obj| try argv.append(obj.full_object_path); - - if (comp.config.link_libcpp) { - try argv.append(comp.libcxxabi_static_lib.?.full_object_path); - try argv.append(comp.libcxx_static_lib.?.full_object_path); - } - - try argv.append("-o"); - try argv.append(full_out_path); - - try argv.append("-lSystem"); - - for (comp.system_libs.keys()) |l_name| { - const info = comp.system_libs.get(l_name).?; - const arg = if (info.needed) - try std.fmt.allocPrint(arena, "-needed-l{s}", .{l_name}) - else if (info.weak) - try std.fmt.allocPrint(arena, "-weak-l{s}", .{l_name}) - else - try std.fmt.allocPrint(arena, "-l{s}", .{l_name}); - try argv.append(arg); - } - - for (macho_file.frameworks) |framework| { - const name = std.fs.path.stem(framework.path); - const arg = if (framework.needed) - try std.fmt.allocPrint(arena, "-needed_framework {s}", .{name}) - else if (framework.weak) - try std.fmt.allocPrint(arena, "-weak_framework {s}", .{name}) - else - try std.fmt.allocPrint(arena, "-framework {s}", .{name}); - try argv.append(arg); - } - - if (is_dyn_lib and macho_file.base.allow_shlib_undefined) { - try argv.append("-undefined"); - try argv.append("dynamic_lookup"); - } - - Compilation.dump_argv(argv.items); - } - - var dependent_libs = std.fifo.LinearFifo(MachO.DylibReExportInfo, .Dynamic).init(arena); - - for (positionals.items) |obj| { - const in_file = try std.fs.cwd().openFile(obj.path, .{}); - defer in_file.close(); - - var parse_ctx = MachO.ParseErrorCtx.init(gpa); - defer parse_ctx.deinit(); - - macho_file.parsePositional( - in_file, - obj.path, - obj.must_link, - &dependent_libs, - &parse_ctx, - ) catch |err| try macho_file.handleAndReportParseError(obj.path, err, &parse_ctx); - } - - for (libs.keys(), libs.values()) |path, lib| { - const in_file = try std.fs.cwd().openFile(path, .{}); - defer in_file.close(); - - var parse_ctx = MachO.ParseErrorCtx.init(gpa); - defer parse_ctx.deinit(); - - macho_file.parseLibrary( - in_file, - path, - lib, - false, - false, - null, - &dependent_libs, - &parse_ctx, - ) catch |err| try macho_file.handleAndReportParseError(path, err, &parse_ctx); - } - - try macho_file.parseDependentLibs(&dependent_libs); - - try macho_file.resolveSymbols(); - if (macho_file.unresolved.count() > 0) { - try macho_file.reportUndefined(); - return error.FlushFailure; - } - - for (macho_file.objects.items, 0..) |*object, object_id| { - object.splitIntoAtoms(macho_file, @as(u32, @intCast(object_id))) catch |err| switch (err) { - error.MissingEhFrameSection => try macho_file.reportParseError( - object.name, - "missing section: '__TEXT,__eh_frame' is required but could not be found", - .{}, - ), - error.BadDwarfCfi => try macho_file.reportParseError( - object.name, - "invalid DWARF: failed to parse '__TEXT,__eh_frame' section", - .{}, - ), - else => |e| return e, - }; - } - - if (macho_file.base.gc_sections) { - try dead_strip.gcAtoms(macho_file); - } - - try macho_file.createDyldPrivateAtom(); - try macho_file.createTentativeDefAtoms(); - - if (comp.config.output_mode == .Exe) { - const global = macho_file.getEntryPoint().?; - if (macho_file.getSymbol(global).undf()) { - // We do one additional check here in case the entry point was found in one of the dylibs. - // (I actually have no idea what this would imply but it is a possible outcome and so we - // support it.) - try macho_file.addStubEntry(global); - } - } - - for (macho_file.objects.items) |object| { - for (object.atoms.items) |atom_index| { - const atom = macho_file.getAtom(atom_index); - const sym = macho_file.getSymbol(atom.getSymbolWithLoc()); - const header = macho_file.sections.items(.header)[sym.n_sect - 1]; - if (header.isZerofill()) continue; - - const relocs = Atom.getAtomRelocs(macho_file, atom_index); - try Atom.scanAtomRelocs(macho_file, atom_index, relocs); - } - } - - try eh_frame.scanRelocs(macho_file); - try UnwindInfo.scanRelocs(macho_file); - - if (macho_file.dyld_stub_binder_index) |index| - try macho_file.addGotEntry(macho_file.globals.items[index]); - - try calcSectionSizes(macho_file); - - var unwind_info = UnwindInfo{ .gpa = gpa }; - defer unwind_info.deinit(); - try unwind_info.collect(macho_file); - - try eh_frame.calcSectionSize(macho_file, &unwind_info); - unwind_info.calcSectionSize(macho_file); - - try pruneAndSortSections(macho_file); - try createSegments(macho_file); - try allocateSegments(macho_file); - - try macho_file.allocateSpecialSymbols(); - - if (build_options.enable_logging) { - macho_file.logSymtab(); - macho_file.logSegments(); - macho_file.logSections(); - macho_file.logAtoms(); - } - - try writeAtoms(macho_file); - if (target.cpu.arch == .aarch64) try writeThunks(macho_file); - try writeDyldPrivateAtom(macho_file); - - if (macho_file.stubs_section_index) |_| { - try writeStubs(macho_file); - try writeStubHelpers(macho_file); - try writeLaSymbolPtrs(macho_file); - } - if (macho_file.got_section_index) |sect_id| - try writePointerEntries(macho_file, sect_id, &macho_file.got_table); - if (macho_file.tlv_ptr_section_index) |sect_id| - try writePointerEntries(macho_file, sect_id, &macho_file.tlv_ptr_table); - - try eh_frame.write(macho_file, &unwind_info); - try unwind_info.write(macho_file); - try macho_file.writeLinkeditSegmentData(); - - // If the last section of __DATA segment is zerofill section, we need to ensure - // that the free space between the end of the last non-zerofill section of __DATA - // segment and the beginning of __LINKEDIT segment is zerofilled as the loader will - // copy-paste this space into memory for quicker zerofill operation. - if (macho_file.data_segment_cmd_index) |data_seg_id| blk: { - var physical_zerofill_start: ?u64 = null; - const section_indexes = macho_file.getSectionIndexes(data_seg_id); - for (macho_file.sections.items(.header)[section_indexes.start..section_indexes.end]) |header| { - if (header.isZerofill() and header.size > 0) break; - physical_zerofill_start = header.offset + header.size; - } else break :blk; - const start = physical_zerofill_start orelse break :blk; - const linkedit = macho_file.getLinkeditSegmentPtr(); - const size = math.cast(usize, linkedit.fileoff - start) orelse return error.Overflow; - if (size > 0) { - log.debug("zeroing out zerofill area of length {x} at {x}", .{ size, start }); - const padding = try gpa.alloc(u8, size); - defer gpa.free(padding); - @memset(padding, 0); - try macho_file.base.file.?.pwriteAll(padding, start); - } - } - - // Write code signature padding if required - var codesig: ?CodeSignature = if (macho_file.requiresCodeSignature()) blk: { - // Preallocate space for the code signature. - // We need to do this at this stage so that we have the load commands with proper values - // written out to the file. - // The most important here is to have the correct vm and filesize of the __LINKEDIT segment - // where the code signature goes into. - var codesig = CodeSignature.init(MachO.getPageSize(cpu_arch)); - codesig.code_directory.ident = fs.path.basename(full_out_path); - if (macho_file.entitlements) |path| { - try codesig.addEntitlements(gpa, path); - } - try macho_file.writeCodeSignaturePadding(&codesig); - break :blk codesig; - } else null; - defer if (codesig) |*csig| csig.deinit(gpa); - - // Write load commands - var lc_buffer = std.ArrayList(u8).init(arena); - const lc_writer = lc_buffer.writer(); - - try macho_file.writeSegmentHeaders(lc_writer); - try lc_writer.writeStruct(macho_file.dyld_info_cmd); - try lc_writer.writeStruct(macho_file.function_starts_cmd); - try lc_writer.writeStruct(macho_file.data_in_code_cmd); - try lc_writer.writeStruct(macho_file.symtab_cmd); - try lc_writer.writeStruct(macho_file.dysymtab_cmd); - try load_commands.writeDylinkerLC(lc_writer); - - switch (output_mode) { - .Exe => blk: { - const seg_id = macho_file.header_segment_cmd_index.?; - const seg = macho_file.segments.items[seg_id]; - const global = macho_file.getEntryPoint() orelse break :blk; - const sym = macho_file.getSymbol(global); - - const addr: u64 = if (sym.undf()) - // In this case, the symbol has been resolved in one of dylibs and so we point - // to the stub as its vmaddr value. - macho_file.getStubsEntryAddress(global).? - else - sym.n_value; - - try lc_writer.writeStruct(macho.entry_point_command{ - .entryoff = @as(u32, @intCast(addr - seg.vmaddr)), - .stacksize = macho_file.base.stack_size, - }); - }, - .Lib => if (link_mode == .Dynamic) { - try load_commands.writeDylibIdLC(macho_file, lc_writer); - }, - else => {}, - } - - try load_commands.writeRpathLCs(macho_file, lc_writer); - try lc_writer.writeStruct(macho.source_version_command{ - .version = 0, - }); - { - const platform = Platform.fromTarget(target); - const sdk_version: ?std.SemanticVersion = load_commands.inferSdkVersion(macho_file); - if (platform.isBuildVersionCompatible()) { - try load_commands.writeBuildVersionLC(platform, sdk_version, lc_writer); - } else { - try load_commands.writeVersionMinLC(platform, sdk_version, lc_writer); - } - } - - const uuid_cmd_offset = @sizeOf(macho.mach_header_64) + @as(u32, @intCast(lc_buffer.items.len)); - try lc_writer.writeStruct(macho_file.uuid_cmd); - - try load_commands.writeLoadDylibLCs( - macho_file.dylibs.items, - macho_file.referenced_dylibs.keys(), - lc_writer, - ); - - if (codesig != null) { - try lc_writer.writeStruct(macho_file.codesig_cmd); - } - - const ncmds = load_commands.calcNumOfLCs(lc_buffer.items); - try macho_file.base.file.?.pwriteAll(lc_buffer.items, @sizeOf(macho.mach_header_64)); - try macho_file.writeHeader(ncmds, @as(u32, @intCast(lc_buffer.items.len))); - try macho_file.writeUuid(comp, uuid_cmd_offset, codesig != null); - - if (codesig) |*csig| { - try macho_file.writeCodeSignature(comp, csig); // code signing always comes last - try MachO.invalidateKernelCache(directory.handle, macho_file.base.emit.sub_path); - } - } - - if (!macho_file.base.disable_lld_caching) { - // Update the file with the digest. If it fails we can continue; it only - // means that the next invocation will have an unnecessary cache miss. - Cache.writeSmallFile(directory.handle, id_symlink_basename, &digest) catch |err| { - log.debug("failed to save linking hash digest file: {s}", .{@errorName(err)}); - }; - // Again failure here only means an unnecessary cache miss. - if (man.have_exclusive_lock) { - man.writeManifest() catch |err| { - log.debug("failed to write cache manifest when linking: {s}", .{@errorName(err)}); - }; - } - // We hang on to this lock so that the output file path can be used without - // other processes clobbering it. - macho_file.base.lock = man.toOwnedLock(); - } -} - -fn createSegments(macho_file: *MachO) !void { - const comp = macho_file.base.comp; - const gpa = comp.gpa; - const target = macho_file.base.comp.root_mod.resolved_target.result; - const page_size = MachO.getPageSize(target.cpu.arch); - const aligned_pagezero_vmsize = mem.alignBackward(u64, macho_file.pagezero_vmsize, page_size); - if (macho_file.base.comp.config.output_mode != .Lib and aligned_pagezero_vmsize > 0) { - if (aligned_pagezero_vmsize != macho_file.pagezero_vmsize) { - log.warn("requested __PAGEZERO size (0x{x}) is not page aligned", .{macho_file.pagezero_vmsize}); - log.warn(" rounding down to 0x{x}", .{aligned_pagezero_vmsize}); - } - macho_file.pagezero_segment_cmd_index = @intCast(macho_file.segments.items.len); - try macho_file.segments.append(gpa, .{ - .cmdsize = @sizeOf(macho.segment_command_64), - .segname = MachO.makeStaticString("__PAGEZERO"), - .vmsize = aligned_pagezero_vmsize, - }); - } - - // __TEXT segment is non-optional - { - const protection = MachO.getSegmentMemoryProtection("__TEXT"); - macho_file.text_segment_cmd_index = @intCast(macho_file.segments.items.len); - macho_file.header_segment_cmd_index = macho_file.text_segment_cmd_index.?; - try macho_file.segments.append(gpa, .{ - .cmdsize = @sizeOf(macho.segment_command_64), - .segname = MachO.makeStaticString("__TEXT"), - .maxprot = protection, - .initprot = protection, - }); - } - - for (macho_file.sections.items(.header), 0..) |header, sect_id| { - if (header.size == 0) continue; // empty section - - const segname = header.segName(); - const segment_id = macho_file.getSegmentByName(segname) orelse blk: { - log.debug("creating segment '{s}'", .{segname}); - const segment_id = @as(u8, @intCast(macho_file.segments.items.len)); - const protection = MachO.getSegmentMemoryProtection(segname); - try macho_file.segments.append(gpa, .{ - .cmdsize = @sizeOf(macho.segment_command_64), - .segname = MachO.makeStaticString(segname), - .maxprot = protection, - .initprot = protection, - }); - break :blk segment_id; - }; - const segment = &macho_file.segments.items[segment_id]; - segment.cmdsize += @sizeOf(macho.section_64); - segment.nsects += 1; - macho_file.sections.items(.segment_index)[sect_id] = segment_id; - } - - if (macho_file.getSegmentByName("__DATA_CONST")) |index| { - macho_file.data_const_segment_cmd_index = index; - } - - if (macho_file.getSegmentByName("__DATA")) |index| { - macho_file.data_segment_cmd_index = index; - } - - // __LINKEDIT always comes last - { - const protection = MachO.getSegmentMemoryProtection("__LINKEDIT"); - macho_file.linkedit_segment_cmd_index = @intCast(macho_file.segments.items.len); - try macho_file.segments.append(gpa, .{ - .cmdsize = @sizeOf(macho.segment_command_64), - .segname = MachO.makeStaticString("__LINKEDIT"), - .maxprot = protection, - .initprot = protection, - }); - } -} - -fn writeAtoms(macho_file: *MachO) !void { - const comp = macho_file.base.comp; - const gpa = comp.gpa; - const slice = macho_file.sections.slice(); - - for (slice.items(.first_atom_index), 0..) |first_atom_index, sect_id| { - const header = slice.items(.header)[sect_id]; - if (header.isZerofill()) continue; - - var atom_index = first_atom_index orelse continue; - - var buffer = try gpa.alloc(u8, math.cast(usize, header.size) orelse return error.Overflow); - defer gpa.free(buffer); - @memset(buffer, 0); // TODO with NOPs - - log.debug("writing atoms in {s},{s}", .{ header.segName(), header.sectName() }); - - while (true) { - const atom = macho_file.getAtom(atom_index); - if (atom.getFile()) |file| { - const this_sym = macho_file.getSymbol(atom.getSymbolWithLoc()); - const padding_size: usize = if (atom.next_index) |next_index| blk: { - const next_sym = macho_file.getSymbol(macho_file.getAtom(next_index).getSymbolWithLoc()); - const size = next_sym.n_value - (this_sym.n_value + atom.size); - break :blk math.cast(usize, size) orelse return error.Overflow; - } else 0; - - log.debug(" (adding ATOM(%{d}, '{s}') from object({d}) to buffer)", .{ - atom.sym_index, - macho_file.getSymbolName(atom.getSymbolWithLoc()), - file, - }); - if (padding_size > 0) { - log.debug(" (with padding {x})", .{padding_size}); - } - - const offset = math.cast(usize, this_sym.n_value - header.addr) orelse - return error.Overflow; - log.debug(" (at offset 0x{x})", .{offset}); - - const code = Atom.getAtomCode(macho_file, atom_index); - const relocs = Atom.getAtomRelocs(macho_file, atom_index); - const size = math.cast(usize, atom.size) orelse return error.Overflow; - @memcpy(buffer[offset .. offset + size], code); - try Atom.resolveRelocs( - macho_file, - atom_index, - buffer[offset..][0..size], - relocs, - ); - } - - if (atom.next_index) |next_index| { - atom_index = next_index; - } else break; - } - - log.debug(" (writing at file offset 0x{x})", .{header.offset}); - try macho_file.base.file.?.pwriteAll(buffer, header.offset); - } -} - -fn writeDyldPrivateAtom(macho_file: *MachO) !void { - const atom_index = macho_file.dyld_private_atom_index orelse return; - const atom = macho_file.getAtom(atom_index); - const sym = macho_file.getSymbol(atom.getSymbolWithLoc()); - const sect_id = macho_file.data_section_index.?; - const header = macho_file.sections.items(.header)[sect_id]; - const offset = sym.n_value - header.addr + header.offset; - log.debug("writing __dyld_private at offset 0x{x}", .{offset}); - const buffer: [@sizeOf(u64)]u8 = [_]u8{0} ** @sizeOf(u64); - try macho_file.base.file.?.pwriteAll(&buffer, offset); -} - -fn writeThunks(macho_file: *MachO) !void { - const target = macho_file.base.comp.root_mod.resolved_target.result; - assert(target.cpu.arch == .aarch64); - const comp = macho_file.base.comp; - const gpa = comp.gpa; - - const sect_id = macho_file.text_section_index orelse return; - const header = macho_file.sections.items(.header)[sect_id]; - - for (macho_file.thunks.items, 0..) |*thunk, i| { - if (thunk.getSize() == 0) continue; - const thunk_size = math.cast(usize, thunk.getSize()) orelse return error.Overflow; - var buffer = try std.ArrayList(u8).initCapacity(gpa, thunk_size); - defer buffer.deinit(); - try thunks.writeThunkCode(macho_file, thunk, buffer.writer()); - const thunk_atom = macho_file.getAtom(thunk.getStartAtomIndex()); - const thunk_sym = macho_file.getSymbol(thunk_atom.getSymbolWithLoc()); - const offset = thunk_sym.n_value - header.addr + header.offset; - log.debug("writing thunk({d}) at offset 0x{x}", .{ i, offset }); - try macho_file.base.file.?.pwriteAll(buffer.items, offset); - } -} - -fn writePointerEntries(macho_file: *MachO, sect_id: u8, table: anytype) !void { - const comp = macho_file.base.comp; - const gpa = comp.gpa; - const header = macho_file.sections.items(.header)[sect_id]; - const capacity = math.cast(usize, header.size) orelse return error.Overflow; - var buffer = try std.ArrayList(u8).initCapacity(gpa, capacity); - defer buffer.deinit(); - for (table.entries.items) |entry| { - const sym = macho_file.getSymbol(entry); - buffer.writer().writeInt(u64, sym.n_value, .little) catch unreachable; - } - log.debug("writing __DATA_CONST,__got contents at file offset 0x{x}", .{header.offset}); - try macho_file.base.file.?.pwriteAll(buffer.items, header.offset); -} - -fn writeStubs(macho_file: *MachO) !void { - const comp = macho_file.base.comp; - const gpa = comp.gpa; - const target = macho_file.base.comp.root_mod.resolved_target.result; - const cpu_arch = target.cpu.arch; - const stubs_header = macho_file.sections.items(.header)[macho_file.stubs_section_index.?]; - const la_symbol_ptr_header = macho_file.sections.items(.header)[macho_file.la_symbol_ptr_section_index.?]; - - const capacity = math.cast(usize, stubs_header.size) orelse return error.Overflow; - var buffer = try std.ArrayList(u8).initCapacity(gpa, capacity); - defer buffer.deinit(); - - for (0..macho_file.stub_table.count()) |index| { - try stubs.writeStubCode(.{ - .cpu_arch = cpu_arch, - .source_addr = stubs_header.addr + stubs.stubSize(cpu_arch) * index, - .target_addr = la_symbol_ptr_header.addr + index * @sizeOf(u64), - }, buffer.writer()); - } - - log.debug("writing __TEXT,__stubs contents at file offset 0x{x}", .{stubs_header.offset}); - try macho_file.base.file.?.pwriteAll(buffer.items, stubs_header.offset); -} - -fn writeStubHelpers(macho_file: *MachO) !void { - const comp = macho_file.base.comp; - const gpa = comp.gpa; - const target = macho_file.base.comp.root_mod.resolved_target.result; - const cpu_arch = target.cpu.arch; - const stub_helper_header = macho_file.sections.items(.header)[macho_file.stub_helper_section_index.?]; - - const capacity = math.cast(usize, stub_helper_header.size) orelse return error.Overflow; - var buffer = try std.ArrayList(u8).initCapacity(gpa, capacity); - defer buffer.deinit(); - - { - const dyld_private_addr = blk: { - const atom = macho_file.getAtom(macho_file.dyld_private_atom_index.?); - const sym = macho_file.getSymbol(atom.getSymbolWithLoc()); - break :blk sym.n_value; - }; - const dyld_stub_binder_got_addr = blk: { - const sym_loc = macho_file.globals.items[macho_file.dyld_stub_binder_index.?]; - break :blk macho_file.getGotEntryAddress(sym_loc).?; - }; - try stubs.writeStubHelperPreambleCode(.{ - .cpu_arch = cpu_arch, - .source_addr = stub_helper_header.addr, - .dyld_private_addr = dyld_private_addr, - .dyld_stub_binder_got_addr = dyld_stub_binder_got_addr, - }, buffer.writer()); - } - - for (0..macho_file.stub_table.count()) |index| { - const source_addr = stub_helper_header.addr + stubs.stubHelperPreambleSize(cpu_arch) + - stubs.stubHelperSize(cpu_arch) * index; - try stubs.writeStubHelperCode(.{ - .cpu_arch = cpu_arch, - .source_addr = source_addr, - .target_addr = stub_helper_header.addr, - }, buffer.writer()); - } - - log.debug("writing __TEXT,__stub_helper contents at file offset 0x{x}", .{ - stub_helper_header.offset, - }); - try macho_file.base.file.?.pwriteAll(buffer.items, stub_helper_header.offset); -} - -fn writeLaSymbolPtrs(macho_file: *MachO) !void { - const comp = macho_file.base.comp; - const gpa = comp.gpa; - const target = macho_file.base.comp.root_mod.resolved_target.result; - const cpu_arch = target.cpu.arch; - const la_symbol_ptr_header = macho_file.sections.items(.header)[macho_file.la_symbol_ptr_section_index.?]; - const stub_helper_header = macho_file.sections.items(.header)[macho_file.stub_helper_section_index.?]; - - const capacity = math.cast(usize, la_symbol_ptr_header.size) orelse return error.Overflow; - var buffer = try std.ArrayList(u8).initCapacity(gpa, capacity); - defer buffer.deinit(); - - for (0..macho_file.stub_table.count()) |index| { - const target_addr = stub_helper_header.addr + stubs.stubHelperPreambleSize(cpu_arch) + - stubs.stubHelperSize(cpu_arch) * index; - buffer.writer().writeInt(u64, target_addr, .little) catch unreachable; - } - - log.debug("writing __DATA,__la_symbol_ptr contents at file offset 0x{x}", .{ - la_symbol_ptr_header.offset, - }); - try macho_file.base.file.?.pwriteAll(buffer.items, la_symbol_ptr_header.offset); -} - -fn pruneAndSortSections(macho_file: *MachO) !void { - const Entry = struct { - index: u8, - - pub fn lessThan(ctx: *MachO, lhs: @This(), rhs: @This()) bool { - const lhs_header = ctx.sections.items(.header)[lhs.index]; - const rhs_header = ctx.sections.items(.header)[rhs.index]; - return MachO.getSectionPrecedence(lhs_header) < MachO.getSectionPrecedence(rhs_header); - } - }; - - const comp = macho_file.base.comp; - const gpa = comp.gpa; - - var entries = try std.ArrayList(Entry).initCapacity(gpa, macho_file.sections.slice().len); - defer entries.deinit(); - - for (0..macho_file.sections.slice().len) |index| { - const section = macho_file.sections.get(index); - if (section.header.size == 0) { - log.debug("pruning section {s},{s} {?d}", .{ - section.header.segName(), - section.header.sectName(), - section.first_atom_index, - }); - for (&[_]*?u8{ - &macho_file.text_section_index, - &macho_file.data_const_section_index, - &macho_file.data_section_index, - &macho_file.bss_section_index, - &macho_file.thread_vars_section_index, - &macho_file.thread_data_section_index, - &macho_file.thread_bss_section_index, - &macho_file.eh_frame_section_index, - &macho_file.unwind_info_section_index, - &macho_file.got_section_index, - &macho_file.tlv_ptr_section_index, - &macho_file.stubs_section_index, - &macho_file.stub_helper_section_index, - &macho_file.la_symbol_ptr_section_index, - }) |maybe_index| { - if (maybe_index.* != null and maybe_index.*.? == index) { - maybe_index.* = null; - } - } - continue; - } - entries.appendAssumeCapacity(.{ .index = @intCast(index) }); - } - - mem.sort(Entry, entries.items, macho_file, Entry.lessThan); - - var slice = macho_file.sections.toOwnedSlice(); - defer slice.deinit(gpa); - - const backlinks = try gpa.alloc(u8, slice.len); - defer gpa.free(backlinks); - for (entries.items, 0..) |entry, i| { - backlinks[entry.index] = @as(u8, @intCast(i)); - } - - try macho_file.sections.ensureTotalCapacity(gpa, entries.items.len); - for (entries.items) |entry| { - macho_file.sections.appendAssumeCapacity(slice.get(entry.index)); - } - - for (&[_]*?u8{ - &macho_file.text_section_index, - &macho_file.data_const_section_index, - &macho_file.data_section_index, - &macho_file.bss_section_index, - &macho_file.thread_vars_section_index, - &macho_file.thread_data_section_index, - &macho_file.thread_bss_section_index, - &macho_file.eh_frame_section_index, - &macho_file.unwind_info_section_index, - &macho_file.got_section_index, - &macho_file.tlv_ptr_section_index, - &macho_file.stubs_section_index, - &macho_file.stub_helper_section_index, - &macho_file.la_symbol_ptr_section_index, - }) |maybe_index| { - if (maybe_index.*) |*index| { - index.* = backlinks[index.*]; - } - } -} - -fn calcSectionSizes(macho_file: *MachO) !void { - const target = macho_file.base.comp.root_mod.resolved_target.result; - const slice = macho_file.sections.slice(); - for (slice.items(.header), 0..) |*header, sect_id| { - if (header.size == 0) continue; - if (macho_file.text_section_index) |txt| { - if (txt == sect_id and target.cpu.arch == .aarch64) continue; - } - - var atom_index = slice.items(.first_atom_index)[sect_id] orelse continue; - - header.size = 0; - header.@"align" = 0; - - while (true) { - const atom = macho_file.getAtom(atom_index); - const atom_offset = atom.alignment.forward(header.size); - const padding = atom_offset - header.size; - - const sym = macho_file.getSymbolPtr(atom.getSymbolWithLoc()); - sym.n_value = atom_offset; - - header.size += padding + atom.size; - header.@"align" = @max(header.@"align", atom.alignment.toLog2Units()); - - atom_index = atom.next_index orelse break; - } - } - - if (macho_file.text_section_index != null and target.cpu.arch == .aarch64) { - // Create jump/branch range extenders if needed. - try thunks.createThunks(macho_file, macho_file.text_section_index.?); - } - - // Update offsets of all symbols contained within each Atom. - // We need to do this since our unwind info synthesiser relies on - // traversing the symbols when synthesising unwind info and DWARF CFI records. - for (slice.items(.first_atom_index)) |first_atom_index| { - var atom_index = first_atom_index orelse continue; - - while (true) { - const atom = macho_file.getAtom(atom_index); - const sym = macho_file.getSymbol(atom.getSymbolWithLoc()); - - if (atom.getFile() != null) { - // Update each symbol contained within the atom - var it = Atom.getInnerSymbolsIterator(macho_file, atom_index); - while (it.next()) |sym_loc| { - const inner_sym = macho_file.getSymbolPtr(sym_loc); - inner_sym.n_value = sym.n_value + Atom.calcInnerSymbolOffset( - macho_file, - atom_index, - sym_loc.sym_index, - ); - } - - // If there is a section alias, update it now too - if (Atom.getSectionAlias(macho_file, atom_index)) |sym_loc| { - const alias = macho_file.getSymbolPtr(sym_loc); - alias.n_value = sym.n_value; - } - } - - if (atom.next_index) |next_index| { - atom_index = next_index; - } else break; - } - } - - if (macho_file.got_section_index) |sect_id| { - const header = &macho_file.sections.items(.header)[sect_id]; - header.size = macho_file.got_table.count() * @sizeOf(u64); - header.@"align" = 3; - } - - if (macho_file.tlv_ptr_section_index) |sect_id| { - const header = &macho_file.sections.items(.header)[sect_id]; - header.size = macho_file.tlv_ptr_table.count() * @sizeOf(u64); - header.@"align" = 3; - } - - const cpu_arch = target.cpu.arch; - - if (macho_file.stubs_section_index) |sect_id| { - const header = &macho_file.sections.items(.header)[sect_id]; - header.size = macho_file.stub_table.count() * stubs.stubSize(cpu_arch); - header.@"align" = math.log2(stubs.stubAlignment(cpu_arch)); - } - - if (macho_file.stub_helper_section_index) |sect_id| { - const header = &macho_file.sections.items(.header)[sect_id]; - header.size = macho_file.stub_table.count() * stubs.stubHelperSize(cpu_arch) + - stubs.stubHelperPreambleSize(cpu_arch); - header.@"align" = math.log2(stubs.stubAlignment(cpu_arch)); - } - - if (macho_file.la_symbol_ptr_section_index) |sect_id| { - const header = &macho_file.sections.items(.header)[sect_id]; - header.size = macho_file.stub_table.count() * @sizeOf(u64); - header.@"align" = 3; - } -} - -fn allocateSegments(macho_file: *MachO) !void { - for (macho_file.segments.items, 0..) |*segment, segment_index| { - const is_text_segment = mem.eql(u8, segment.segName(), "__TEXT"); - const base_size = if (is_text_segment) - try load_commands.calcMinHeaderPad(macho_file, .{ - .segments = macho_file.segments.items, - .dylibs = macho_file.dylibs.items, - .referenced_dylibs = macho_file.referenced_dylibs.keys(), - }) - else - 0; - try allocateSegment(macho_file, @as(u8, @intCast(segment_index)), base_size); - } -} - -fn getSegmentAllocBase(macho_file: *MachO, segment_index: u8) struct { vmaddr: u64, fileoff: u64 } { - if (segment_index > 0) { - const prev_segment = macho_file.segments.items[segment_index - 1]; - return .{ - .vmaddr = prev_segment.vmaddr + prev_segment.vmsize, - .fileoff = prev_segment.fileoff + prev_segment.filesize, - }; - } - return .{ .vmaddr = 0, .fileoff = 0 }; -} - -fn allocateSegment(macho_file: *MachO, segment_index: u8, init_size: u64) !void { - const target = macho_file.base.comp.root_mod.resolved_target.result; - const segment = &macho_file.segments.items[segment_index]; - - if (mem.eql(u8, segment.segName(), "__PAGEZERO")) return; // allocated upon creation - - const base = getSegmentAllocBase(macho_file, segment_index); - segment.vmaddr = base.vmaddr; - segment.fileoff = base.fileoff; - segment.filesize = init_size; - segment.vmsize = init_size; - - // Allocate the sections according to their alignment at the beginning of the segment. - const indexes = macho_file.getSectionIndexes(segment_index); - var start = init_size; - - const slice = macho_file.sections.slice(); - for (slice.items(.header)[indexes.start..indexes.end], 0..) |*header, sect_id| { - const alignment = try math.powi(u32, 2, header.@"align"); - const start_aligned = mem.alignForward(u64, start, alignment); - const n_sect = @as(u8, @intCast(indexes.start + sect_id + 1)); - - header.offset = if (header.isZerofill()) - 0 - else - @as(u32, @intCast(segment.fileoff + start_aligned)); - header.addr = segment.vmaddr + start_aligned; - - if (slice.items(.first_atom_index)[indexes.start + sect_id]) |first_atom_index| { - var atom_index = first_atom_index; - - log.debug("allocating local symbols in sect({d}, '{s},{s}')", .{ - n_sect, - header.segName(), - header.sectName(), - }); - - while (true) { - const atom = macho_file.getAtom(atom_index); - const sym = macho_file.getSymbolPtr(atom.getSymbolWithLoc()); - sym.n_value += header.addr; - sym.n_sect = n_sect; - - log.debug(" ATOM(%{d}, '{s}') @{x}", .{ - atom.sym_index, - macho_file.getSymbolName(atom.getSymbolWithLoc()), - sym.n_value, - }); - - if (atom.getFile() != null) { - // Update each symbol contained within the atom - var it = Atom.getInnerSymbolsIterator(macho_file, atom_index); - while (it.next()) |sym_loc| { - const inner_sym = macho_file.getSymbolPtr(sym_loc); - inner_sym.n_value = sym.n_value + Atom.calcInnerSymbolOffset( - macho_file, - atom_index, - sym_loc.sym_index, - ); - inner_sym.n_sect = n_sect; - } - - // If there is a section alias, update it now too - if (Atom.getSectionAlias(macho_file, atom_index)) |sym_loc| { - const alias = macho_file.getSymbolPtr(sym_loc); - alias.n_value = sym.n_value; - alias.n_sect = n_sect; - } - } - - if (atom.next_index) |next_index| { - atom_index = next_index; - } else break; - } - } - - start = start_aligned + header.size; - - if (!header.isZerofill()) { - segment.filesize = start; - } - segment.vmsize = start; - } - - const page_size = MachO.getPageSize(target.cpu.arch); - segment.filesize = mem.alignForward(u64, segment.filesize, page_size); - segment.vmsize = mem.alignForward(u64, segment.vmsize, page_size); -} - -const std = @import("std"); -const build_options = @import("build_options"); -const assert = std.debug.assert; -const dwarf = std.dwarf; -const fs = std.fs; -const log = std.log.scoped(.link); -const macho = std.macho; -const math = std.math; -const mem = std.mem; - -const aarch64 = @import("../../arch/aarch64/bits.zig"); -const calcUuid = @import("uuid.zig").calcUuid; -const dead_strip = @import("dead_strip.zig"); -const eh_frame = @import("eh_frame.zig"); -const fat = @import("fat.zig"); -const link = @import("../../link.zig"); -const load_commands = @import("load_commands.zig"); -const stubs = @import("stubs.zig"); -const thunks = @import("thunks.zig"); -const trace = @import("../../tracy.zig").trace; - -const Allocator = mem.Allocator; -const Archive = @import("Archive.zig"); -const Atom = @import("Atom.zig"); -const Cache = std.Build.Cache; -const CodeSignature = @import("CodeSignature.zig"); -const Compilation = @import("../../Compilation.zig"); -const Dylib = @import("Dylib.zig"); -const MachO = @import("../MachO.zig"); -const Md5 = std.crypto.hash.Md5; -const LibStub = @import("../tapi.zig").LibStub; -const Object = @import("Object.zig"); -const Platform = load_commands.Platform; -const Section = MachO.Section; -const SymbolWithLoc = MachO.SymbolWithLoc; -const TableSection = @import("../table_section.zig").TableSection; -const Trie = @import("Trie.zig"); -const UnwindInfo = @import("UnwindInfo.zig");