From 2f94dc939e8acf0dab4cf1f3cf01b132ac6fdd3c Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Tue, 9 Jan 2024 14:59:34 +0100 Subject: [PATCH 001/133] macho: copy over new implementation sources from zld --- CMakeLists.txt | 10 +- src/link/MachO/Archive.zig | 219 +- src/link/MachO/Atom.zig | 1994 +++++++-------- src/link/MachO/CodeSignature.zig | 368 +-- src/link/MachO/DwarfInfo.zig | 776 +++--- src/link/MachO/Dylib.zig | 1178 +++++---- src/link/MachO/InternalObject.zig | 249 ++ src/link/MachO/Object.zig | 2975 +++++++++++++++-------- src/link/MachO/Relocation.zig | 317 +-- src/link/MachO/Symbol.zig | 383 +++ src/link/MachO/UnwindInfo.zig | 1003 ++++---- src/link/MachO/dead_strip.zig | 583 ++--- src/link/MachO/dyld_info/Rebase.zig | 24 +- src/link/MachO/{ => dyld_info}/Trie.zig | 697 +++--- src/link/MachO/dyld_info/bind.zig | 994 +++----- src/link/MachO/eh_frame.zig | 1094 ++++----- src/link/MachO/fat.zig | 33 +- src/link/MachO/file.zig | 116 + src/link/MachO/hasher.zig | 21 +- src/link/MachO/load_commands.zig | 522 +--- src/link/MachO/relocatable.zig | 452 ++++ src/link/MachO/stubs.zig | 169 -- src/link/MachO/synthetic.zig | 669 +++++ src/link/MachO/thunks.zig | 512 ++-- src/link/MachO/uuid.zig | 26 +- src/link/MachO/zld.zig | 1230 ---------- 26 files changed, 8208 insertions(+), 8406 deletions(-) create mode 100644 src/link/MachO/InternalObject.zig create mode 100644 src/link/MachO/Symbol.zig rename src/link/MachO/{ => dyld_info}/Trie.zig (96%) create mode 100644 src/link/MachO/file.zig create mode 100644 src/link/MachO/relocatable.zig delete mode 100644 src/link/MachO/stubs.zig create mode 100644 src/link/MachO/synthetic.zig delete mode 100644 src/link/MachO/zld.zig diff --git a/CMakeLists.txt b/CMakeLists.txt index 0108f448df..851e554923 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -603,20 +603,24 @@ set(ZIG_STAGE2_SOURCES "${CMAKE_SOURCE_DIR}/src/link/MachO/DebugSymbols.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/DwarfInfo.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/Dylib.zig" + "${CMAKE_SOURCE_DIR}/src/link/MachO/InternalObject.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/Object.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/Relocation.zig" - "${CMAKE_SOURCE_DIR}/src/link/MachO/Trie.zig" + "${CMAKE_SOURCE_DIR}/src/link/MachO/Symbol.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/UnwindInfo.zig" + "${CMAKE_SOURCE_DIR}/src/link/MachO/dead_strip.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/dyld_info/bind.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/dyld_info/Rebase.zig" - "${CMAKE_SOURCE_DIR}/src/link/MachO/dead_strip.zig" + "${CMAKE_SOURCE_DIR}/src/link/MachO/dyld_info/Trie.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/eh_frame.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/fat.zig" + "${CMAKE_SOURCE_DIR}/src/link/MachO/file.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/hasher.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/load_commands.zig" + "${CMAKE_SOURCE_DIR}/src/link/MachO/relocatable.zig" + "${CMAKE_SOURCE_DIR}/src/link/MachO/synthetic.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/thunks.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/uuid.zig" - "${CMAKE_SOURCE_DIR}/src/link/MachO/zld.zig" "${CMAKE_SOURCE_DIR}/src/link/Plan9.zig" "${CMAKE_SOURCE_DIR}/src/link/Plan9/aout.zig" "${CMAKE_SOURCE_DIR}/src/link/Wasm.zig" diff --git a/src/link/MachO/Archive.zig b/src/link/MachO/Archive.zig index ba3915f51b..c31278ce1c 100644 --- a/src/link/MachO/Archive.zig +++ b/src/link/MachO/Archive.zig @@ -1,20 +1,15 @@ -file: fs.File, -fat_offset: u64, -name: []const u8, -header: ar_hdr = undefined, +path: []const u8, +data: []const u8, -/// Parsed table of contents. -/// Each symbol name points to a list of all definition -/// sites within the current static archive. -toc: std.StringArrayHashMapUnmanaged(std.ArrayListUnmanaged(u32)) = .{}, +objects: std.ArrayListUnmanaged(Object) = .{}, // Archive files start with the ARMAG identifying string. Then follows a // `struct ar_hdr', and as many bytes of member file data as its `ar_size' // member indicates, for each member file. /// String that begins an archive file. -const ARMAG: *const [SARMAG:0]u8 = "!\n"; +pub const ARMAG: *const [SARMAG:0]u8 = "!\n"; /// Size of that string. -const SARMAG: u4 = 8; +pub const SARMAG: u4 = 8; /// String in ar_fmag at the end of each header. const ARFMAG: *const [2:0]u8 = "`\n"; @@ -41,177 +36,93 @@ const ar_hdr = extern struct { /// Always contains ARFMAG. ar_fmag: [2]u8, - const NameOrLength = union(enum) { - Name: []const u8, - Length: u32, - }; - fn nameOrLength(self: ar_hdr) !NameOrLength { - const value = getValue(&self.ar_name); - const slash_index = mem.indexOf(u8, value, "/") orelse return error.MalformedArchive; - const len = value.len; - if (slash_index == len - 1) { - // Name stored directly - return NameOrLength{ .Name = value }; - } else { - // Name follows the header directly and its length is encoded in - // the name field. - const length = try std.fmt.parseInt(u32, value[slash_index + 1 ..], 10); - return NameOrLength{ .Length = length }; - } - } - fn date(self: ar_hdr) !u64 { - const value = getValue(&self.ar_date); + const value = mem.trimRight(u8, &self.ar_date, &[_]u8{@as(u8, 0x20)}); return std.fmt.parseInt(u64, value, 10); } fn size(self: ar_hdr) !u32 { - const value = getValue(&self.ar_size); + const value = mem.trimRight(u8, &self.ar_size, &[_]u8{@as(u8, 0x20)}); return std.fmt.parseInt(u32, value, 10); } - fn getValue(raw: []const u8) []const u8 { - return mem.trimRight(u8, raw, &[_]u8{@as(u8, 0x20)}); + fn name(self: *const ar_hdr) ?[]const u8 { + const value = &self.ar_name; + if (mem.startsWith(u8, value, "#1/")) return null; + const sentinel = mem.indexOfScalar(u8, value, '/') orelse value.len; + return value[0..sentinel]; + } + + fn nameLength(self: ar_hdr) !?u32 { + const value = &self.ar_name; + if (!mem.startsWith(u8, value, "#1/")) return null; + const trimmed = mem.trimRight(u8, self.ar_name["#1/".len..], &[_]u8{0x20}); + return try std.fmt.parseInt(u32, trimmed, 10); } }; -pub fn isArchive(file: fs.File, fat_offset: u64) bool { - const reader = file.reader(); - const magic = reader.readBytesNoEof(SARMAG) catch return false; - defer file.seekTo(fat_offset) catch {}; - return mem.eql(u8, &magic, ARMAG); -} - pub fn deinit(self: *Archive, allocator: Allocator) void { - self.file.close(); - for (self.toc.keys()) |*key| { - allocator.free(key.*); - } - for (self.toc.values()) |*value| { - value.deinit(allocator); - } - self.toc.deinit(allocator); - allocator.free(self.name); + self.objects.deinit(allocator); } -pub fn parse(self: *Archive, allocator: Allocator, reader: anytype) !void { - _ = try reader.readBytesNoEof(SARMAG); - self.header = try reader.readStruct(ar_hdr); - const name_or_length = try self.header.nameOrLength(); - const embedded_name = try parseName(allocator, name_or_length, reader); - log.debug("parsing archive '{s}' at '{s}'", .{ embedded_name, self.name }); - defer allocator.free(embedded_name); +pub fn parse(self: *Archive, arena: Allocator, macho_file: *MachO) !void { + const gpa = macho_file.base.allocator; - try self.parseTableOfContents(allocator, reader); -} - -fn parseName(allocator: Allocator, name_or_length: ar_hdr.NameOrLength, reader: anytype) ![]u8 { - var name: []u8 = undefined; - switch (name_or_length) { - .Name => |n| { - name = try allocator.dupe(u8, n); - }, - .Length => |len| { - var n = try allocator.alloc(u8, len); - defer allocator.free(n); - try reader.readNoEof(n); - const actual_len = mem.indexOfScalar(u8, n, @as(u8, 0)) orelse n.len; - name = try allocator.dupe(u8, n[0..actual_len]); - }, - } - return name; -} - -fn parseTableOfContents(self: *Archive, allocator: Allocator, reader: anytype) !void { - const symtab_size = try reader.readInt(u32, .little); - const symtab = try allocator.alloc(u8, symtab_size); - defer allocator.free(symtab); - - reader.readNoEof(symtab) catch { - log.debug("incomplete symbol table: expected symbol table of length 0x{x}", .{symtab_size}); - return error.MalformedArchive; - }; - - const strtab_size = try reader.readInt(u32, .little); - const strtab = try allocator.alloc(u8, strtab_size); - defer allocator.free(strtab); - - reader.readNoEof(strtab) catch { - log.debug("incomplete symbol table: expected string table of length 0x{x}", .{strtab_size}); - return error.MalformedArchive; - }; - - var symtab_stream = std.io.fixedBufferStream(symtab); - var symtab_reader = symtab_stream.reader(); + var stream = std.io.fixedBufferStream(self.data); + const reader = stream.reader(); while (true) { - const n_strx = symtab_reader.readInt(u32, .little) catch |err| switch (err) { - error.EndOfStream => break, - else => |e| return e, - }; - const object_offset = try symtab_reader.readInt(u32, .little); + if (stream.pos >= self.data.len) break; + if (!mem.isAligned(stream.pos, 2)) stream.pos += 1; - const sym_name = mem.sliceTo(@as([*:0]const u8, @ptrCast(strtab.ptr + n_strx)), 0); - const owned_name = try allocator.dupe(u8, sym_name); - const res = try self.toc.getOrPut(allocator, owned_name); - defer if (res.found_existing) allocator.free(owned_name); + const hdr = try reader.readStruct(ar_hdr); - if (!res.found_existing) { - res.value_ptr.* = .{}; + if (!mem.eql(u8, &hdr.ar_fmag, ARFMAG)) { + macho_file.base.fatal("{s}: invalid header delimiter: expected '{s}', found '{s}'", .{ + self.path, std.fmt.fmtSliceEscapeLower(ARFMAG), std.fmt.fmtSliceEscapeLower(&hdr.ar_fmag), + }); + return error.ParseFailed; } - try res.value_ptr.append(allocator, object_offset); + var size = try hdr.size(); + const name = name: { + if (hdr.name()) |n| break :name try arena.dupe(u8, n); + if (try hdr.nameLength()) |len| { + size -= len; + const buf = try arena.alloc(u8, len); + try reader.readNoEof(buf); + const actual_len = mem.indexOfScalar(u8, buf, @as(u8, 0)) orelse len; + break :name buf[0..actual_len]; + } + unreachable; + }; + defer { + _ = stream.seekBy(size) catch {}; + } + + if (mem.eql(u8, name, "__.SYMDEF") or mem.eql(u8, name, "__.SYMDEF SORTED")) continue; + + const object = Object{ + .archive = self.path, + .path = name, + .data = self.data[stream.pos..][0..size], + .index = undefined, + .alive = false, + .mtime = hdr.date() catch 0, + }; + + log.debug("extracting object '{s}' from archive '{s}'", .{ object.path, self.path }); + + try self.objects.append(gpa, object); } } -pub fn parseObject(self: Archive, gpa: Allocator, offset: u32) !Object { - const reader = self.file.reader(); - try reader.context.seekTo(self.fat_offset + offset); - - const object_header = try reader.readStruct(ar_hdr); - - const name_or_length = try object_header.nameOrLength(); - const object_name = try parseName(gpa, name_or_length, reader); - defer gpa.free(object_name); - - log.debug("extracting object '{s}' from archive '{s}'", .{ object_name, self.name }); - - const name = name: { - var buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined; - const path = try std.os.realpath(self.name, &buffer); - break :name try std.fmt.allocPrint(gpa, "{s}({s})", .{ path, object_name }); - }; - - const object_name_len = switch (name_or_length) { - .Name => 0, - .Length => |len| len, - }; - const object_size = (try object_header.size()) - object_name_len; - const contents = try gpa.allocWithOptions(u8, object_size, @alignOf(u64), null); - const amt = try reader.readAll(contents); - if (amt != object_size) { - return error.InputOutput; - } - - var object = Object{ - .name = name, - .mtime = object_header.date() catch 0, - .contents = contents, - }; - - try object.parse(gpa); - - return object; -} - -const Archive = @This(); - -const std = @import("std"); -const assert = std.debug.assert; -const fs = std.fs; const log = std.log.scoped(.link); const macho = std.macho; const mem = std.mem; +const std = @import("std"); const Allocator = mem.Allocator; +const Archive = @This(); +const MachO = @import("../MachO.zig"); const Object = @import("Object.zig"); diff --git a/src/link/MachO/Atom.zig b/src/link/MachO/Atom.zig index d76a6de841..d734faa487 100644 --- a/src/link/MachO/Atom.zig +++ b/src/link/MachO/Atom.zig @@ -1,1271 +1,905 @@ -/// Each Atom always gets a symbol with the fully qualified name. -/// The symbol can reside in any object file context structure in `symtab` array -/// (see `Object`), or if the symbol is a synthetic symbol such as a GOT cell or -/// a stub trampoline, it can be found in the linkers `locals` arraylist. -/// If this field is 0 and file is 0, it means the codegen size = 0 and there is no symbol or -/// offset table entry. -sym_index: u32 = 0, +/// Address allocated for this Atom. +value: u64 = 0, -/// 0 means an Atom is a synthetic Atom such as a GOT cell defined by the linker. -/// Otherwise, it is the index into appropriate object file (indexing from 1). -/// Prefer using `getFile()` helper to get the file index out rather than using -/// the field directly. -file: u32 = 0, +/// Name of this Atom. +name: u32 = 0, -/// If this Atom is not a synthetic Atom, i.e., references a subsection in an -/// Object file, `inner_sym_index` and `inner_nsyms_trailing` tell where and if -/// this Atom contains any additional symbol references that fall within this Atom's -/// address range. These could for example be an alias symbol which can be used -/// internally by the relocation records, or if the Object file couldn't be split -/// into subsections, this Atom may encompass an entire input section. -inner_sym_index: u32 = 0, -inner_nsyms_trailing: u32 = 0, +/// Index into linker's input file table. +file: File.Index = 0, -/// Size and alignment of this atom -/// Unlike in Elf, we need to store the size of this symbol as part of -/// the atom since macho.nlist_64 lacks this information. +/// Size of this atom size: u64 = 0, -/// Alignment of this atom as a power of 2. -/// For instance, aligmment of 0 should be read as 2^0 = 1 byte aligned. -alignment: Alignment = .@"1", +/// Alignment of this atom as a power of two. +alignment: u32 = 0, -/// Points to the previous and next neighbours -/// TODO use the same trick as with symbols: reserve index 0 as null atom -next_index: ?Index = null, -prev_index: ?Index = null, +/// Index of the input section. +n_sect: u32 = 0, -pub const Alignment = @import("../../InternPool.zig").Alignment; +/// Index of the output section. +out_n_sect: u8 = 0, -pub const Index = u32; +/// Offset within the parent section pointed to by n_sect. +/// off + size <= parent section size. +off: u64 = 0, -pub const Binding = struct { - target: SymbolWithLoc, - offset: u64, -}; +/// Relocations of this atom. +relocs: Loc = .{}, -/// Returns `null` if the Atom is a synthetic Atom. -/// Otherwise, returns an index into an array of Objects. -pub fn getFile(self: Atom) ?u32 { - if (self.file == 0) return null; - return self.file - 1; +/// Index of this atom in the linker's atoms table. +atom_index: Index = 0, + +/// Index of the thunk for this atom. +thunk_index: Thunk.Index = 0, + +/// Unwind records associated with this atom. +unwind_records: Loc = .{}, + +flags: Flags = .{}, + +pub fn getName(self: Atom, macho_file: *MachO) [:0]const u8 { + return macho_file.string_intern.getAssumeExists(self.name); } -pub fn getSymbolIndex(self: Atom) ?u32 { - if (self.getFile() == null and self.sym_index == 0) return null; - return self.sym_index; +pub fn getFile(self: Atom, macho_file: *MachO) File { + return macho_file.getFile(self.file).?; } -/// Returns symbol referencing this atom. -pub fn getSymbol(self: Atom, macho_file: *MachO) macho.nlist_64 { - return self.getSymbolPtr(macho_file).*; +pub fn getInputSection(self: Atom, macho_file: *MachO) macho.section_64 { + return switch (self.getFile(macho_file)) { + .dylib => unreachable, + inline else => |x| x.sections.items(.header)[self.n_sect], + }; } -/// Returns pointer-to-symbol referencing this atom. -pub fn getSymbolPtr(self: Atom, macho_file: *MachO) *macho.nlist_64 { - const sym_index = self.getSymbolIndex().?; - return macho_file.getSymbolPtr(.{ .sym_index = sym_index, .file = self.file }); +pub fn getInputAddress(self: Atom, macho_file: *MachO) u64 { + return self.getInputSection(macho_file).addr + self.off; } -pub fn getSymbolWithLoc(self: Atom) SymbolWithLoc { - const sym_index = self.getSymbolIndex().?; - return .{ .sym_index = sym_index, .file = self.file }; +pub fn getPriority(self: Atom, macho_file: *MachO) u64 { + const file = self.getFile(macho_file); + return (@as(u64, @intCast(file.getIndex())) << 32) | @as(u64, @intCast(self.n_sect)); } -/// Returns the name of this atom. -pub fn getName(self: Atom, macho_file: *MachO) []const u8 { - const sym_index = self.getSymbolIndex().?; - return macho_file.getSymbolName(.{ .sym_index = sym_index, .file = self.file }); +pub fn getCode(self: Atom, macho_file: *MachO) []const u8 { + const code = switch (self.getFile(macho_file)) { + .dylib => unreachable, + inline else => |x| x.getSectionData(self.n_sect), + }; + return code[self.off..][0..self.size]; } -/// Returns how much room there is to grow in virtual address space. -/// File offset relocation happens transparently, so it is not included in -/// this calculation. -pub fn capacity(self: Atom, macho_file: *MachO) u64 { - const self_sym = self.getSymbol(macho_file); - if (self.next_index) |next_index| { - const next = macho_file.getAtom(next_index); - const next_sym = next.getSymbol(macho_file); - return next_sym.n_value - self_sym.n_value; - } else { - // We are the last atom. - // The capacity is limited only by virtual address space. - return macho_file.allocatedVirtualSize(self_sym.n_value); +pub fn getRelocs(self: Atom, macho_file: *MachO) []const Relocation { + const relocs = switch (self.getFile(macho_file)) { + .dylib => unreachable, + inline else => |x| x.sections.items(.relocs)[self.n_sect], + }; + return relocs.items[self.relocs.pos..][0..self.relocs.len]; +} + +pub fn getUnwindRecords(self: Atom, macho_file: *MachO) []const UnwindInfo.Record.Index { + return switch (self.getFile(macho_file)) { + .dylib => unreachable, + .internal => &[0]UnwindInfo.Record.Index{}, + .object => |x| x.unwind_records.items[self.unwind_records.pos..][0..self.unwind_records.len], + }; +} + +pub fn markUnwindRecordsDead(self: Atom, macho_file: *MachO) void { + for (self.getUnwindRecords(macho_file)) |cu_index| { + const cu = macho_file.getUnwindRecord(cu_index); + cu.alive = false; + + if (cu.getFdePtr(macho_file)) |fde| { + fde.alive = false; + } } } -pub fn freeListEligible(self: Atom, macho_file: *MachO) bool { - // No need to keep a free list node for the last atom. - const next_index = self.next_index orelse return false; - const next = macho_file.getAtom(next_index); - const self_sym = self.getSymbol(macho_file); - const next_sym = next.getSymbol(macho_file); - const cap = next_sym.n_value - self_sym.n_value; - const ideal_cap = MachO.padToIdeal(self.size); - if (cap <= ideal_cap) return false; - const surplus = cap - ideal_cap; - return surplus >= MachO.min_text_capacity; +pub fn getThunk(self: Atom, macho_file: *MachO) *Thunk { + return macho_file.getThunk(self.thunk_index); } -pub fn getOutputSection(macho_file: *MachO, sect: macho.section_64) !?u8 { - const segname = sect.segName(); - const sectname = sect.sectName(); - const res: ?u8 = blk: { - if (mem.eql(u8, "__LLVM", segname)) { - log.debug("TODO LLVM section: type 0x{x}, name '{s},{s}'", .{ - sect.flags, segname, sectname, - }); - break :blk null; - } - - // We handle unwind info separately. - if (mem.eql(u8, "__TEXT", segname) and mem.eql(u8, "__eh_frame", sectname)) { - break :blk null; - } - if (mem.eql(u8, "__LD", segname) and mem.eql(u8, "__compact_unwind", sectname)) { - break :blk null; - } - - if (sect.isCode()) { - if (macho_file.text_section_index == null) { - macho_file.text_section_index = try macho_file.initSection("__TEXT", "__text", .{ - .flags = macho.S_REGULAR | - macho.S_ATTR_PURE_INSTRUCTIONS | - macho.S_ATTR_SOME_INSTRUCTIONS, - }); - } - break :blk macho_file.text_section_index.?; - } - - if (sect.isDebug()) { - break :blk null; - } +pub fn initOutputSection(sect: macho.section_64, macho_file: *MachO) !u8 { + const segname, const sectname, const flags = blk: { + if (sect.isCode()) break :blk .{ + "__TEXT", + "__text", + macho.S_REGULAR | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS, + }; switch (sect.type()) { macho.S_4BYTE_LITERALS, macho.S_8BYTE_LITERALS, macho.S_16BYTE_LITERALS, - => { - break :blk macho_file.getSectionByName("__TEXT", "__const") orelse - try macho_file.initSection("__TEXT", "__const", .{}); - }, + => break :blk .{ "__TEXT", "__const", macho.S_REGULAR }, + macho.S_CSTRING_LITERALS => { - if (mem.startsWith(u8, sectname, "__objc")) { - break :blk macho_file.getSectionByName(segname, sectname) orelse - try macho_file.initSection(segname, sectname, .{}); - } - break :blk macho_file.getSectionByName("__TEXT", "__cstring") orelse - try macho_file.initSection("__TEXT", "__cstring", .{ - .flags = macho.S_CSTRING_LITERALS, - }); + if (mem.startsWith(u8, sect.sectName(), "__objc")) break :blk .{ + sect.segName(), sect.sectName(), macho.S_REGULAR, + }; + break :blk .{ "__TEXT", "__cstring", macho.S_CSTRING_LITERALS }; }, + macho.S_MOD_INIT_FUNC_POINTERS, macho.S_MOD_TERM_FUNC_POINTERS, - => { - break :blk macho_file.getSectionByName("__DATA_CONST", sectname) orelse - try macho_file.initSection("__DATA_CONST", sectname, .{ - .flags = sect.flags, - }); - }, + => break :blk .{ "__DATA_CONST", sect.sectName(), sect.flags }, + macho.S_LITERAL_POINTERS, macho.S_ZEROFILL, + macho.S_GB_ZEROFILL, macho.S_THREAD_LOCAL_VARIABLES, macho.S_THREAD_LOCAL_VARIABLE_POINTERS, macho.S_THREAD_LOCAL_REGULAR, macho.S_THREAD_LOCAL_ZEROFILL, - => { - break :blk macho_file.getSectionByName(segname, sectname) orelse - try macho_file.initSection(segname, sectname, .{ - .flags = sect.flags, - }); - }, - macho.S_COALESCED => { - break :blk macho_file.getSectionByName(segname, sectname) orelse - try macho_file.initSection(segname, sectname, .{}); + => break :blk .{ sect.segName(), sect.sectName(), sect.flags }, + + macho.S_COALESCED => break :blk .{ + sect.segName(), + sect.sectName(), + macho.S_REGULAR, }, + macho.S_REGULAR => { - if (mem.eql(u8, segname, "__TEXT")) { - if (mem.eql(u8, sectname, "__rodata") or - mem.eql(u8, sectname, "__typelink") or - mem.eql(u8, sectname, "__itablink") or - mem.eql(u8, sectname, "__gosymtab") or - mem.eql(u8, sectname, "__gopclntab")) - { - break :blk macho_file.getSectionByName("__TEXT", sectname) orelse - try macho_file.initSection("__TEXT", sectname, .{}); - } - } + const segname = sect.segName(); + const sectname = sect.sectName(); if (mem.eql(u8, segname, "__DATA")) { if (mem.eql(u8, sectname, "__const") or mem.eql(u8, sectname, "__cfstring") or mem.eql(u8, sectname, "__objc_classlist") or - mem.eql(u8, sectname, "__objc_imageinfo")) - { - break :blk macho_file.getSectionByName("__DATA_CONST", sectname) orelse - try macho_file.initSection("__DATA_CONST", sectname, .{}); - } else if (mem.eql(u8, sectname, "__data")) { - if (macho_file.data_section_index == null) { - macho_file.data_section_index = try macho_file.initSection("__DATA", "__data", .{}); - } - break :blk macho_file.data_section_index.?; - } + mem.eql(u8, sectname, "__objc_imageinfo")) break :blk .{ + "__DATA_CONST", + sectname, + macho.S_REGULAR, + }; } - break :blk macho_file.getSectionByName(segname, sectname) orelse - try macho_file.initSection(segname, sectname, .{}); + break :blk .{ segname, sectname, sect.flags }; }, - else => break :blk null, + + else => break :blk .{ sect.segName(), sect.sectName(), sect.flags }, } }; - - // TODO we can do this directly in the selection logic above. - // Or is it not worth it? - if (macho_file.data_const_section_index == null) { - if (macho_file.getSectionByName("__DATA_CONST", "__const")) |index| { - macho_file.data_const_section_index = index; - } + const osec = macho_file.getSectionByName(segname, sectname) orelse try macho_file.addSection( + segname, + sectname, + .{ .flags = flags }, + ); + if (mem.eql(u8, segname, "__DATA") and mem.eql(u8, sectname, "__data")) { + macho_file.data_sect_index = osec; } - if (macho_file.thread_vars_section_index == null) { - if (macho_file.getSectionByName("__DATA", "__thread_vars")) |index| { - macho_file.thread_vars_section_index = index; - } - } - if (macho_file.thread_data_section_index == null) { - if (macho_file.getSectionByName("__DATA", "__thread_data")) |index| { - macho_file.thread_data_section_index = index; - } - } - if (macho_file.thread_bss_section_index == null) { - if (macho_file.getSectionByName("__DATA", "__thread_bss")) |index| { - macho_file.thread_bss_section_index = index; - } - } - if (macho_file.bss_section_index == null) { - if (macho_file.getSectionByName("__DATA", "__bss")) |index| { - macho_file.bss_section_index = index; - } - } - - return res; + return osec; } -pub fn addRelocation(macho_file: *MachO, atom_index: Index, reloc: Relocation) !void { - return addRelocations(macho_file, atom_index, &[_]Relocation{reloc}); -} - -pub fn addRelocations(macho_file: *MachO, atom_index: Index, relocs: []const Relocation) !void { - const comp = macho_file.base.comp; - const gpa = comp.gpa; - const gop = try macho_file.relocs.getOrPut(gpa, atom_index); - if (!gop.found_existing) { - gop.value_ptr.* = .{}; - } - try gop.value_ptr.ensureUnusedCapacity(gpa, relocs.len); - for (relocs) |reloc| { - log.debug(" (adding reloc of type {s} to target %{d})", .{ - @tagName(reloc.type), - reloc.target.sym_index, - }); - gop.value_ptr.appendAssumeCapacity(reloc); - } -} - -pub fn addRebase(macho_file: *MachO, atom_index: Index, offset: u32) !void { - const comp = macho_file.base.comp; - const gpa = comp.gpa; - const atom = macho_file.getAtom(atom_index); - log.debug(" (adding rebase at offset 0x{x} in %{?d})", .{ offset, atom.getSymbolIndex() }); - const gop = try macho_file.rebases.getOrPut(gpa, atom_index); - if (!gop.found_existing) { - gop.value_ptr.* = .{}; - } - try gop.value_ptr.append(gpa, offset); -} - -pub fn addBinding(macho_file: *MachO, atom_index: Index, binding: Binding) !void { - const comp = macho_file.base.comp; - const gpa = comp.gpa; - const atom = macho_file.getAtom(atom_index); - log.debug(" (adding binding to symbol {s} at offset 0x{x} in %{?d})", .{ - macho_file.getSymbolName(binding.target), - binding.offset, - atom.getSymbolIndex(), - }); - const gop = try macho_file.bindings.getOrPut(gpa, atom_index); - if (!gop.found_existing) { - gop.value_ptr.* = .{}; - } - try gop.value_ptr.append(gpa, binding); -} - -pub fn resolveRelocations( - macho_file: *MachO, - atom_index: Index, - relocs: []*const Relocation, - code: []u8, -) void { - relocs_log.debug("relocating '{s}'", .{macho_file.getAtom(atom_index).getName(macho_file)}); - for (relocs) |reloc| { - reloc.resolve(macho_file, atom_index, code); - } -} - -pub fn freeRelocations(macho_file: *MachO, atom_index: Index) void { - const comp = macho_file.base.comp; - const gpa = comp.gpa; - var removed_relocs = macho_file.relocs.fetchOrderedRemove(atom_index); - if (removed_relocs) |*relocs| relocs.value.deinit(gpa); - var removed_rebases = macho_file.rebases.fetchOrderedRemove(atom_index); - if (removed_rebases) |*rebases| rebases.value.deinit(gpa); - var removed_bindings = macho_file.bindings.fetchOrderedRemove(atom_index); - if (removed_bindings) |*bindings| bindings.value.deinit(gpa); -} - -const InnerSymIterator = struct { - sym_index: u32, - nsyms: u32, - file: u32, - pos: u32 = 0, - - pub fn next(it: *@This()) ?SymbolWithLoc { - if (it.pos == it.nsyms) return null; - const res = SymbolWithLoc{ .sym_index = it.sym_index + it.pos, .file = it.file }; - it.pos += 1; - return res; - } -}; - -/// Returns an iterator over potentially contained symbols. -/// Panics when called on a synthetic Atom. -pub fn getInnerSymbolsIterator(macho_file: *MachO, atom_index: Index) InnerSymIterator { - const atom = macho_file.getAtom(atom_index); - assert(atom.getFile() != null); - return .{ - .sym_index = atom.inner_sym_index, - .nsyms = atom.inner_nsyms_trailing, - .file = atom.file, - }; -} - -/// Returns a section alias symbol if one is defined. -/// An alias symbol is used to represent the start of an input section -/// if there were no symbols defined within that range. -/// Alias symbols are only used on x86_64. -pub fn getSectionAlias(macho_file: *MachO, atom_index: Index) ?SymbolWithLoc { - const atom = macho_file.getAtom(atom_index); - assert(atom.getFile() != null); - - const object = macho_file.objects.items[atom.getFile().?]; - const nbase = @as(u32, @intCast(object.in_symtab.?.len)); - const ntotal = @as(u32, @intCast(object.symtab.len)); - var sym_index: u32 = nbase; - while (sym_index < ntotal) : (sym_index += 1) { - if (object.getAtomIndexForSymbol(sym_index)) |other_atom_index| { - if (other_atom_index == atom_index) return SymbolWithLoc{ - .sym_index = sym_index, - .file = atom.file, - }; - } - } - return null; -} - -/// Given an index into a contained symbol within, calculates an offset wrt -/// the start of this Atom. -pub fn calcInnerSymbolOffset(macho_file: *MachO, atom_index: Index, sym_index: u32) u64 { - const atom = macho_file.getAtom(atom_index); - assert(atom.getFile() != null); - - if (atom.sym_index == sym_index) return 0; - - const object = macho_file.objects.items[atom.getFile().?]; - const source_sym = object.getSourceSymbol(sym_index).?; - const base_addr = if (object.getSourceSymbol(atom.sym_index)) |sym| - sym.n_value - else blk: { - const nbase = @as(u32, @intCast(object.in_symtab.?.len)); - const sect_id = @as(u8, @intCast(atom.sym_index - nbase)); - const source_sect = object.getSourceSection(sect_id); - break :blk source_sect.addr; - }; - return source_sym.n_value - base_addr; -} - -pub fn scanAtomRelocs(macho_file: *MachO, atom_index: Index, relocs: []align(1) const macho.relocation_info) !void { - const target = macho_file.base.comp.root_mod.resolved_target.result; - const arch = target.cpu.arch; - const atom = macho_file.getAtom(atom_index); - assert(atom.getFile() != null); // synthetic atoms do not have relocs - - return switch (arch) { - .aarch64 => scanAtomRelocsArm64(macho_file, atom_index, relocs), - .x86_64 => scanAtomRelocsX86(macho_file, atom_index, relocs), - else => unreachable, - }; -} - -const RelocContext = struct { - base_addr: i64 = 0, - base_offset: i32 = 0, -}; - -pub fn getRelocContext(macho_file: *MachO, atom_index: Index) RelocContext { - const atom = macho_file.getAtom(atom_index); - assert(atom.getFile() != null); // synthetic atoms do not have relocs - - const object = macho_file.objects.items[atom.getFile().?]; - if (object.getSourceSymbol(atom.sym_index)) |source_sym| { - const source_sect = object.getSourceSection(source_sym.n_sect - 1); - return .{ - .base_addr = @as(i64, @intCast(source_sect.addr)), - .base_offset = @as(i32, @intCast(source_sym.n_value - source_sect.addr)), - }; - } - const nbase = @as(u32, @intCast(object.in_symtab.?.len)); - const sect_id = @as(u8, @intCast(atom.sym_index - nbase)); - const source_sect = object.getSourceSection(sect_id); - return .{ - .base_addr = @as(i64, @intCast(source_sect.addr)), - .base_offset = 0, - }; -} - -pub fn parseRelocTarget(macho_file: *MachO, ctx: struct { - object_id: u32, - rel: macho.relocation_info, - code: []const u8, - base_addr: i64 = 0, - base_offset: i32 = 0, -}) SymbolWithLoc { +pub fn scanRelocs(self: Atom, macho_file: *MachO) !void { const tracy = trace(@src()); defer tracy.end(); - const target = macho_file.base.comp.root_mod.resolved_target.result; - const object = &macho_file.objects.items[ctx.object_id]; - log.debug("parsing reloc target in object({d}) '{s}' ", .{ ctx.object_id, object.name }); + const object = self.getFile(macho_file).object; + const relocs = self.getRelocs(macho_file); - const sym_index = if (ctx.rel.r_extern == 0) sym_index: { - const sect_id = @as(u8, @intCast(ctx.rel.r_symbolnum - 1)); - const rel_offset = @as(u32, @intCast(ctx.rel.r_address - ctx.base_offset)); + for (relocs) |rel| { + if (try self.reportUndefSymbol(rel, macho_file)) continue; - const address_in_section = if (ctx.rel.r_pcrel == 0) blk: { - break :blk if (ctx.rel.r_length == 3) - mem.readInt(u64, ctx.code[rel_offset..][0..8], .little) - else - mem.readInt(u32, ctx.code[rel_offset..][0..4], .little); - } else blk: { - assert(target.cpu.arch == .x86_64); - const correction: u3 = switch (@as(macho.reloc_type_x86_64, @enumFromInt(ctx.rel.r_type))) { - .X86_64_RELOC_SIGNED => 0, - .X86_64_RELOC_SIGNED_1 => 1, - .X86_64_RELOC_SIGNED_2 => 2, - .X86_64_RELOC_SIGNED_4 => 4, + switch (rel.type) { + .branch => { + const symbol = rel.getTargetSymbol(macho_file); + if (symbol.flags.import or (symbol.flags.@"export" and (symbol.flags.weak or symbol.flags.interposable))) { + symbol.flags.stubs = true; + if (symbol.flags.weak) { + macho_file.binds_to_weak = true; + } + } else if (mem.startsWith(u8, symbol.getName(macho_file), "_objc_msgSend$")) { + symbol.flags.objc_stubs = true; + } + }, + + .got_load, + .got_load_page, + .got_load_pageoff, + => { + const symbol = rel.getTargetSymbol(macho_file); + if (symbol.flags.import or + (symbol.flags.@"export" and (symbol.flags.weak or symbol.flags.interposable)) or + macho_file.options.cpu_arch.? == .aarch64) // TODO relax on arm64 + { + symbol.flags.got = true; + if (symbol.flags.weak) { + macho_file.binds_to_weak = true; + } + } + }, + + .got => { + rel.getTargetSymbol(macho_file).flags.got = true; + }, + + .tlv, + .tlvp_page, + .tlvp_pageoff, + => { + const symbol = rel.getTargetSymbol(macho_file); + if (!symbol.flags.tlv) { + macho_file.base.fatal( + "{}: {s}: illegal thread-local variable reference to regular symbol {s}", + .{ object.fmtPath(), self.getName(macho_file), symbol.getName(macho_file) }, + ); + } + if (symbol.flags.import or (symbol.flags.@"export" and (symbol.flags.weak or symbol.flags.interposable))) { + symbol.flags.tlv_ptr = true; + if (symbol.flags.weak) { + macho_file.binds_to_weak = true; + } + } + }, + + .unsigned => { + if (rel.meta.length == 3) { // TODO this really should check if this is pointer width + if (rel.tag == .@"extern") { + const symbol = rel.getTargetSymbol(macho_file); + if (symbol.isTlvInit(macho_file)) { + macho_file.has_tlv = true; + continue; + } + if (symbol.flags.import) { + object.num_bind_relocs += 1; + if (symbol.flags.weak) { + object.num_weak_bind_relocs += 1; + macho_file.binds_to_weak = true; + } + continue; + } + if (symbol.flags.@"export") { + if (symbol.flags.weak) { + object.num_weak_bind_relocs += 1; + macho_file.binds_to_weak = true; + } else if (symbol.flags.interposable) { + object.num_bind_relocs += 1; + } + } + } + object.num_rebase_relocs += 1; + } + }, + + else => {}, + } + } +} + +fn reportUndefSymbol(self: Atom, rel: Relocation, macho_file: *MachO) !bool { + if (rel.tag == .local) return false; + + const sym = rel.getTargetSymbol(macho_file); + if (sym.getFile(macho_file) == null) { + const gpa = macho_file.base.allocator; + const gop = try macho_file.undefs.getOrPut(gpa, rel.target); + if (!gop.found_existing) { + gop.value_ptr.* = .{}; + } + try gop.value_ptr.append(gpa, self.atom_index); + return true; + } + + return false; +} + +pub fn resolveRelocs(self: Atom, macho_file: *MachO, buffer: []u8) !void { + const tracy = trace(@src()); + defer tracy.end(); + + assert(!self.getInputSection(macho_file).isZerofill()); + const relocs = self.getRelocs(macho_file); + const file = self.getFile(macho_file); + const name = self.getName(macho_file); + @memcpy(buffer, self.getCode(macho_file)); + + relocs_log.debug("{x}: {s}", .{ self.value, name }); + + var stream = std.io.fixedBufferStream(buffer); + + var i: usize = 0; + while (i < relocs.len) : (i += 1) { + const rel = relocs[i]; + const rel_offset = rel.offset - self.off; + const subtractor = if (rel.meta.has_subtractor) relocs[i - 1] else null; + + if (rel.tag == .@"extern") { + if (rel.getTargetSymbol(macho_file).getFile(macho_file) == null) continue; + } + + try stream.seekTo(rel_offset); + self.resolveRelocInner(rel, subtractor, buffer, macho_file, stream.writer()) catch |err| { + switch (err) { + error.RelaxFail => macho_file.base.fatal( + "{}: {s}: 0x{x}: failed to relax relocation: in {s}", + .{ file.fmtPath(), name, rel.offset, @tagName(rel.type) }, + ), + else => |e| return e, + } + return error.ResolveFailed; + }; + } +} + +const ResolveError = error{ + RelaxFail, + NoSpaceLeft, + DivisionByZero, + UnexpectedRemainder, + Overflow, +}; + +fn resolveRelocInner( + self: Atom, + rel: Relocation, + subtractor: ?Relocation, + code: []u8, + macho_file: *MachO, + writer: anytype, +) ResolveError!void { + const cpu_arch = macho_file.options.cpu_arch.?; + const rel_offset = rel.offset - self.off; + const seg_id = macho_file.sections.items(.segment_id)[self.out_n_sect]; + const seg = macho_file.segments.items[seg_id]; + const P = @as(i64, @intCast(self.value)) + @as(i64, @intCast(rel_offset)); + const A = rel.addend + rel.getRelocAddend(cpu_arch); + const S: i64 = @intCast(rel.getTargetAddress(macho_file)); + const G: i64 = @intCast(rel.getGotTargetAddress(macho_file)); + const TLS = @as(i64, @intCast(macho_file.getTlsAddress())); + const SUB = if (subtractor) |sub| @as(i64, @intCast(sub.getTargetAddress(macho_file))) else 0; + + switch (rel.tag) { + .local => relocs_log.debug(" {x}<+{d}>: {s}: [=> {x}] atom({d})", .{ + P, + rel_offset, + @tagName(rel.type), + S + A - SUB, + rel.getTargetAtom(macho_file).atom_index, + }), + .@"extern" => relocs_log.debug(" {x}<+{d}>: {s}: [=> {x}] G({x}) ({s})", .{ + P, + rel_offset, + @tagName(rel.type), + S + A - SUB, + G + A, + rel.getTargetSymbol(macho_file).getName(macho_file), + }), + } + + switch (rel.type) { + .subtractor => {}, + + .unsigned => { + assert(!rel.meta.pcrel); + if (rel.meta.length == 3) { + if (rel.tag == .@"extern") { + const sym = rel.getTargetSymbol(macho_file); + if (sym.isTlvInit(macho_file)) { + try writer.writeInt(u64, @intCast(S - TLS), .little); + return; + } + const entry = bind.Entry{ + .target = rel.target, + .offset = @as(u64, @intCast(P)) - seg.vmaddr, + .segment_id = seg_id, + .addend = A, + }; + if (sym.flags.import) { + macho_file.bind.entries.appendAssumeCapacity(entry); + if (sym.flags.weak) { + macho_file.weak_bind.entries.appendAssumeCapacity(entry); + } + return; + } + if (sym.flags.@"export") { + if (sym.flags.weak) { + macho_file.weak_bind.entries.appendAssumeCapacity(entry); + } else if (sym.flags.interposable) { + macho_file.bind.entries.appendAssumeCapacity(entry); + } + } + } + macho_file.rebase.entries.appendAssumeCapacity(.{ + .offset = @as(u64, @intCast(P)) - seg.vmaddr, + .segment_id = seg_id, + }); + try writer.writeInt(u64, @bitCast(S + A - SUB), .little); + } else if (rel.meta.length == 2) { + try writer.writeInt(u32, @bitCast(@as(i32, @truncate(S + A - SUB))), .little); + } else unreachable; + }, + + .got => { + assert(rel.tag == .@"extern"); + assert(rel.meta.length == 2); + assert(rel.meta.pcrel); + try writer.writeInt(i32, @intCast(G + A - P), .little); + }, + + .branch => { + assert(rel.meta.length == 2); + assert(rel.meta.pcrel); + assert(rel.tag == .@"extern"); + + switch (cpu_arch) { + .x86_64 => try writer.writeInt(i32, @intCast(S + A - P), .little), + .aarch64 => { + const disp: i28 = math.cast(i28, S + A - P) orelse blk: { + const thunk = self.getThunk(macho_file); + const S_: i64 = @intCast(thunk.getAddress(rel.target)); + break :blk math.cast(i28, S_ + A - P) orelse return error.Overflow; + }; + var inst = aarch64.Instruction{ + .unconditional_branch_immediate = mem.bytesToValue(std.meta.TagPayload( + aarch64.Instruction, + aarch64.Instruction.unconditional_branch_immediate, + ), code[rel_offset..][0..4]), + }; + inst.unconditional_branch_immediate.imm26 = @as(u26, @truncate(@as(u28, @bitCast(disp >> 2)))); + try writer.writeInt(u32, inst.toU32(), .little); + }, else => unreachable, - }; - const addend = mem.readInt(i32, ctx.code[rel_offset..][0..4], .little); - const target_address = @as(i64, @intCast(ctx.base_addr)) + ctx.rel.r_address + 4 + correction + addend; - break :blk @as(u64, @intCast(target_address)); - }; - - // Find containing atom - log.debug(" | locating symbol by address @{x} in section {d}", .{ address_in_section, sect_id }); - break :sym_index object.getSymbolByAddress(address_in_section, sect_id); - } else object.reverse_symtab_lookup[ctx.rel.r_symbolnum]; - - const sym_loc = SymbolWithLoc{ .sym_index = sym_index, .file = ctx.object_id + 1 }; - const sym = macho_file.getSymbol(sym_loc); - const reloc_target = if (sym.sect() and !sym.ext()) - sym_loc - else if (object.getGlobal(sym_index)) |global_index| - macho_file.globals.items[global_index] - else - sym_loc; - log.debug(" | target %{d} ('{s}') in object({?d})", .{ - reloc_target.sym_index, - macho_file.getSymbolName(reloc_target), - reloc_target.getFile(), - }); - return reloc_target; -} - -pub fn getRelocTargetAtomIndex(macho_file: *MachO, target: SymbolWithLoc) ?Index { - if (target.getFile() == null) { - const target_sym_name = macho_file.getSymbolName(target); - if (mem.eql(u8, "__mh_execute_header", target_sym_name)) return null; - if (mem.eql(u8, "___dso_handle", target_sym_name)) return null; - - unreachable; // referenced symbol not found - } - - const object = macho_file.objects.items[target.getFile().?]; - return object.getAtomIndexForSymbol(target.sym_index); -} - -fn scanAtomRelocsArm64( - macho_file: *MachO, - atom_index: Index, - relocs: []align(1) const macho.relocation_info, -) !void { - for (relocs) |rel| { - const rel_type = @as(macho.reloc_type_arm64, @enumFromInt(rel.r_type)); - - switch (rel_type) { - .ARM64_RELOC_ADDEND, .ARM64_RELOC_SUBTRACTOR => continue, - else => {}, - } - - if (rel.r_extern == 0) continue; - - const atom = macho_file.getAtom(atom_index); - const object = &macho_file.objects.items[atom.getFile().?]; - const sym_index = object.reverse_symtab_lookup[rel.r_symbolnum]; - const sym_loc = SymbolWithLoc{ - .sym_index = sym_index, - .file = atom.file, - }; - - const target = if (object.getGlobal(sym_index)) |global_index| - macho_file.globals.items[global_index] - else - sym_loc; - - switch (rel_type) { - .ARM64_RELOC_BRANCH26 => { - // TODO rewrite relocation - const sym = macho_file.getSymbol(target); - if (sym.undf()) try macho_file.addStubEntry(target); - }, - .ARM64_RELOC_GOT_LOAD_PAGE21, - .ARM64_RELOC_GOT_LOAD_PAGEOFF12, - .ARM64_RELOC_POINTER_TO_GOT, - => { - // TODO rewrite relocation - try macho_file.addGotEntry(target); - }, - .ARM64_RELOC_TLVP_LOAD_PAGE21, - .ARM64_RELOC_TLVP_LOAD_PAGEOFF12, - => { - const sym = macho_file.getSymbol(target); - if (sym.undf()) try macho_file.addTlvPtrEntry(target); - }, - else => {}, - } - } -} - -fn scanAtomRelocsX86( - macho_file: *MachO, - atom_index: Index, - relocs: []align(1) const macho.relocation_info, -) !void { - for (relocs) |rel| { - const rel_type = @as(macho.reloc_type_x86_64, @enumFromInt(rel.r_type)); - - switch (rel_type) { - .X86_64_RELOC_SUBTRACTOR => continue, - else => {}, - } - - if (rel.r_extern == 0) continue; - - const atom = macho_file.getAtom(atom_index); - const object = &macho_file.objects.items[atom.getFile().?]; - const sym_index = object.reverse_symtab_lookup[rel.r_symbolnum]; - const sym_loc = SymbolWithLoc{ - .sym_index = sym_index, - .file = atom.file, - }; - - const target = if (object.getGlobal(sym_index)) |global_index| - macho_file.globals.items[global_index] - else - sym_loc; - - switch (rel_type) { - .X86_64_RELOC_BRANCH => { - // TODO rewrite relocation - const sym = macho_file.getSymbol(target); - if (sym.undf()) try macho_file.addStubEntry(target); - }, - .X86_64_RELOC_GOT, .X86_64_RELOC_GOT_LOAD => { - // TODO rewrite relocation - try macho_file.addGotEntry(target); - }, - .X86_64_RELOC_TLV => { - const sym = macho_file.getSymbol(target); - if (sym.undf()) try macho_file.addTlvPtrEntry(target); - }, - else => {}, - } - } -} - -pub fn resolveRelocs( - macho_file: *MachO, - atom_index: Index, - atom_code: []u8, - atom_relocs: []align(1) const macho.relocation_info, -) !void { - const target = macho_file.base.comp.root_mod.resolved_target.result; - const arch = target.cpu.arch; - const atom = macho_file.getAtom(atom_index); - assert(atom.getFile() != null); // synthetic atoms do not have relocs - - relocs_log.debug("resolving relocations in ATOM(%{d}, '{s}')", .{ - atom.sym_index, - macho_file.getSymbolName(atom.getSymbolWithLoc()), - }); - - const ctx = getRelocContext(macho_file, atom_index); - - return switch (arch) { - .aarch64 => resolveRelocsArm64(macho_file, atom_index, atom_code, atom_relocs, ctx), - .x86_64 => resolveRelocsX86(macho_file, atom_index, atom_code, atom_relocs, ctx), - else => unreachable, - }; -} - -pub fn getRelocTargetAddress(macho_file: *MachO, target: SymbolWithLoc, is_tlv: bool) u64 { - const target_atom_index = getRelocTargetAtomIndex(macho_file, target) orelse { - // If there is no atom for target, we still need to check for special, atom-less - // symbols such as `___dso_handle`. - const target_name = macho_file.getSymbolName(target); - const atomless_sym = macho_file.getSymbol(target); - log.debug(" | atomless target '{s}'", .{target_name}); - return atomless_sym.n_value; - }; - const target_atom = macho_file.getAtom(target_atom_index); - log.debug(" | target ATOM(%{d}, '{s}') in object({?})", .{ - target_atom.sym_index, - macho_file.getSymbolName(target_atom.getSymbolWithLoc()), - target_atom.getFile(), - }); - - const target_sym = macho_file.getSymbol(target_atom.getSymbolWithLoc()); - assert(target_sym.n_desc != MachO.N_DEAD); - - // If `target` is contained within the target atom, pull its address value. - const offset = if (target_atom.getFile() != null) blk: { - const object = macho_file.objects.items[target_atom.getFile().?]; - break :blk if (object.getSourceSymbol(target.sym_index)) |_| - Atom.calcInnerSymbolOffset(macho_file, target_atom_index, target.sym_index) - else - 0; // section alias - } else 0; - const base_address: u64 = if (is_tlv) base_address: { - // For TLV relocations, the value specified as a relocation is the displacement from the - // TLV initializer (either value in __thread_data or zero-init in __thread_bss) to the first - // defined TLV template init section in the following order: - // * wrt to __thread_data if defined, then - // * wrt to __thread_bss - // TODO remember to check what the mechanism was prior to HAS_TLV_INITIALIZERS in earlier versions of macOS - const sect_id: u16 = sect_id: { - if (macho_file.thread_data_section_index) |i| { - break :sect_id i; - } else if (macho_file.thread_bss_section_index) |i| { - break :sect_id i; - } else break :base_address 0; - }; - break :base_address macho_file.sections.items(.header)[sect_id].addr; - } else 0; - return target_sym.n_value + offset - base_address; -} - -fn resolveRelocsArm64( - macho_file: *MachO, - atom_index: Index, - atom_code: []u8, - atom_relocs: []align(1) const macho.relocation_info, - context: RelocContext, -) !void { - const atom = macho_file.getAtom(atom_index); - const object = macho_file.objects.items[atom.getFile().?]; - - var addend: ?i64 = null; - var subtractor: ?SymbolWithLoc = null; - - for (atom_relocs) |rel| { - const rel_type = @as(macho.reloc_type_arm64, @enumFromInt(rel.r_type)); - - switch (rel_type) { - .ARM64_RELOC_ADDEND => { - assert(addend == null); - - relocs_log.debug(" RELA({s}) @ {x} => {x}", .{ @tagName(rel_type), rel.r_address, rel.r_symbolnum }); - - addend = rel.r_symbolnum; - continue; - }, - .ARM64_RELOC_SUBTRACTOR => { - assert(subtractor == null); - - relocs_log.debug(" RELA({s}) @ {x} => %{d} in object({?d})", .{ - @tagName(rel_type), - rel.r_address, - rel.r_symbolnum, - atom.getFile(), - }); - - subtractor = parseRelocTarget(macho_file, .{ - .object_id = atom.getFile().?, - .rel = rel, - .code = atom_code, - .base_addr = context.base_addr, - .base_offset = context.base_offset, - }); - continue; - }, - else => {}, - } - - const target = parseRelocTarget(macho_file, .{ - .object_id = atom.getFile().?, - .rel = rel, - .code = atom_code, - .base_addr = context.base_addr, - .base_offset = context.base_offset, - }); - const rel_offset = @as(u32, @intCast(rel.r_address - context.base_offset)); - - relocs_log.debug(" RELA({s}) @ {x} => %{d} ('{s}') in object({?})", .{ - @tagName(rel_type), - rel.r_address, - target.sym_index, - macho_file.getSymbolName(target), - target.getFile(), - }); - - const source_addr = blk: { - const source_sym = macho_file.getSymbol(atom.getSymbolWithLoc()); - break :blk source_sym.n_value + rel_offset; - }; - const target_addr = blk: { - if (relocRequiresGot(macho_file, rel)) break :blk macho_file.getGotEntryAddress(target).?; - if (relocIsTlv(macho_file, rel) and macho_file.getSymbol(target).undf()) - break :blk macho_file.getTlvPtrEntryAddress(target).?; - if (relocIsStub(macho_file, rel) and macho_file.getSymbol(target).undf()) - break :blk macho_file.getStubsEntryAddress(target).?; - const is_tlv = is_tlv: { - const source_sym = macho_file.getSymbol(atom.getSymbolWithLoc()); - const header = macho_file.sections.items(.header)[source_sym.n_sect - 1]; - break :is_tlv header.type() == macho.S_THREAD_LOCAL_VARIABLES; - }; - break :blk getRelocTargetAddress(macho_file, target, is_tlv); - }; - - relocs_log.debug(" | source_addr = 0x{x}", .{source_addr}); - - switch (rel_type) { - .ARM64_RELOC_BRANCH26 => { - relocs_log.debug(" source {s} (object({?})), target {s}", .{ - macho_file.getSymbolName(atom.getSymbolWithLoc()), - atom.getFile(), - macho_file.getSymbolName(target), - }); - - const displacement = if (Relocation.calcPcRelativeDisplacementArm64( - source_addr, - target_addr, - )) |disp| blk: { - relocs_log.debug(" | target_addr = 0x{x}", .{target_addr}); - break :blk disp; - } else |_| blk: { - const thunk_index = macho_file.thunk_table.get(atom_index).?; - const thunk = macho_file.thunks.items[thunk_index]; - const thunk_sym_loc = if (macho_file.getSymbol(target).undf()) - thunk.getTrampoline(macho_file, .stub, target).? - else - thunk.getTrampoline(macho_file, .atom, target).?; - const thunk_addr = macho_file.getSymbol(thunk_sym_loc).n_value; - relocs_log.debug(" | target_addr = 0x{x} (thunk)", .{thunk_addr}); - break :blk try Relocation.calcPcRelativeDisplacementArm64(source_addr, thunk_addr); - }; - - const code = atom_code[rel_offset..][0..4]; - var inst = aarch64.Instruction{ - .unconditional_branch_immediate = mem.bytesToValue(meta.TagPayload( - aarch64.Instruction, - aarch64.Instruction.unconditional_branch_immediate, - ), code), - }; - inst.unconditional_branch_immediate.imm26 = @as(u26, @truncate(@as(u28, @bitCast(displacement >> 2)))); - mem.writeInt(u32, code, inst.toU32(), .little); - }, - - .ARM64_RELOC_PAGE21, - .ARM64_RELOC_GOT_LOAD_PAGE21, - .ARM64_RELOC_TLVP_LOAD_PAGE21, - => { - const adjusted_target_addr = @as(u64, @intCast(@as(i64, @intCast(target_addr)) + (addend orelse 0))); - - relocs_log.debug(" | target_addr = 0x{x}", .{adjusted_target_addr}); - - const pages = @as(u21, @bitCast(Relocation.calcNumberOfPages(source_addr, adjusted_target_addr))); - const code = atom_code[rel_offset..][0..4]; - var inst = aarch64.Instruction{ - .pc_relative_address = mem.bytesToValue(meta.TagPayload( - aarch64.Instruction, - aarch64.Instruction.pc_relative_address, - ), code), - }; - inst.pc_relative_address.immhi = @as(u19, @truncate(pages >> 2)); - inst.pc_relative_address.immlo = @as(u2, @truncate(pages)); - mem.writeInt(u32, code, inst.toU32(), .little); - addend = null; - }, - - .ARM64_RELOC_PAGEOFF12 => { - const adjusted_target_addr = @as(u64, @intCast(@as(i64, @intCast(target_addr)) + (addend orelse 0))); - - relocs_log.debug(" | target_addr = 0x{x}", .{adjusted_target_addr}); - - const code = atom_code[rel_offset..][0..4]; - if (Relocation.isArithmeticOp(code)) { - const off = try Relocation.calcPageOffset(adjusted_target_addr, .arithmetic); - var inst = aarch64.Instruction{ - .add_subtract_immediate = mem.bytesToValue(meta.TagPayload( - aarch64.Instruction, - aarch64.Instruction.add_subtract_immediate, - ), code), - }; - inst.add_subtract_immediate.imm12 = off; - mem.writeInt(u32, code, inst.toU32(), .little); - } else { - var inst = aarch64.Instruction{ - .load_store_register = mem.bytesToValue(meta.TagPayload( - aarch64.Instruction, - aarch64.Instruction.load_store_register, - ), code), - }; - const off = try Relocation.calcPageOffset(adjusted_target_addr, switch (inst.load_store_register.size) { - 0 => if (inst.load_store_register.v == 1) - Relocation.PageOffsetInstKind.load_store_128 - else - Relocation.PageOffsetInstKind.load_store_8, - 1 => .load_store_16, - 2 => .load_store_32, - 3 => .load_store_64, - }); - inst.load_store_register.offset = off; - mem.writeInt(u32, code, inst.toU32(), .little); - } - addend = null; - }, - - .ARM64_RELOC_GOT_LOAD_PAGEOFF12 => { - const code = atom_code[rel_offset..][0..4]; - const adjusted_target_addr = @as(u64, @intCast(@as(i64, @intCast(target_addr)) + (addend orelse 0))); - - relocs_log.debug(" | target_addr = 0x{x}", .{adjusted_target_addr}); - - const off = try Relocation.calcPageOffset(adjusted_target_addr, .load_store_64); - var inst: aarch64.Instruction = .{ - .load_store_register = mem.bytesToValue(meta.TagPayload( - aarch64.Instruction, - aarch64.Instruction.load_store_register, - ), code), - }; - inst.load_store_register.offset = off; - mem.writeInt(u32, code, inst.toU32(), .little); - addend = null; - }, - - .ARM64_RELOC_TLVP_LOAD_PAGEOFF12 => { - const code = atom_code[rel_offset..][0..4]; - const adjusted_target_addr = @as(u64, @intCast(@as(i64, @intCast(target_addr)) + (addend orelse 0))); - - relocs_log.debug(" | target_addr = 0x{x}", .{adjusted_target_addr}); - - const RegInfo = struct { - rd: u5, - rn: u5, - size: u2, - }; - const reg_info: RegInfo = blk: { - if (Relocation.isArithmeticOp(code)) { - const inst = mem.bytesToValue(meta.TagPayload( - aarch64.Instruction, - aarch64.Instruction.add_subtract_immediate, - ), code); - break :blk .{ - .rd = inst.rd, - .rn = inst.rn, - .size = inst.sf, - }; - } else { - const inst = mem.bytesToValue(meta.TagPayload( - aarch64.Instruction, - aarch64.Instruction.load_store_register, - ), code); - break :blk .{ - .rd = inst.rt, - .rn = inst.rn, - .size = inst.size, - }; - } - }; - - var inst = if (macho_file.tlv_ptr_table.lookup.contains(target)) aarch64.Instruction{ - .load_store_register = .{ - .rt = reg_info.rd, - .rn = reg_info.rn, - .offset = try Relocation.calcPageOffset(adjusted_target_addr, .load_store_64), - .opc = 0b01, - .op1 = 0b01, - .v = 0, - .size = reg_info.size, - }, - } else aarch64.Instruction{ - .add_subtract_immediate = .{ - .rd = reg_info.rd, - .rn = reg_info.rn, - .imm12 = try Relocation.calcPageOffset(adjusted_target_addr, .arithmetic), - .sh = 0, - .s = 0, - .op = 0, - .sf = @as(u1, @truncate(reg_info.size)), - }, - }; - mem.writeInt(u32, code, inst.toU32(), .little); - addend = null; - }, - - .ARM64_RELOC_POINTER_TO_GOT => { - relocs_log.debug(" | target_addr = 0x{x}", .{target_addr}); - const result = math.cast(i32, @as(i64, @intCast(target_addr)) - @as(i64, @intCast(source_addr))) orelse - return error.Overflow; - mem.writeInt(u32, atom_code[rel_offset..][0..4], @as(u32, @bitCast(result)), .little); - }, - - .ARM64_RELOC_UNSIGNED => { - var ptr_addend = if (rel.r_length == 3) - mem.readInt(i64, atom_code[rel_offset..][0..8], .little) - else - mem.readInt(i32, atom_code[rel_offset..][0..4], .little); - - if (rel.r_extern == 0) { - const base_addr = if (target.sym_index >= object.source_address_lookup.len) - @as(i64, @intCast(object.getSourceSection(@as(u8, @intCast(rel.r_symbolnum - 1))).addr)) - else - object.source_address_lookup[target.sym_index]; - ptr_addend -= base_addr; - } - - const result = blk: { - if (subtractor) |sub| { - const sym = macho_file.getSymbol(sub); - break :blk @as(i64, @intCast(target_addr)) - @as(i64, @intCast(sym.n_value)) + ptr_addend; - } else { - break :blk @as(i64, @intCast(target_addr)) + ptr_addend; - } - }; - relocs_log.debug(" | target_addr = 0x{x}", .{result}); - - if (rel.r_length == 3) { - mem.writeInt(u64, atom_code[rel_offset..][0..8], @as(u64, @bitCast(result)), .little); - } else { - mem.writeInt(u32, atom_code[rel_offset..][0..4], @as(u32, @truncate(@as(u64, @bitCast(result)))), .little); - } - - subtractor = null; - }, - - .ARM64_RELOC_ADDEND => unreachable, - .ARM64_RELOC_SUBTRACTOR => unreachable, - } - } -} - -fn resolveRelocsX86( - macho_file: *MachO, - atom_index: Index, - atom_code: []u8, - atom_relocs: []align(1) const macho.relocation_info, - context: RelocContext, -) !void { - const atom = macho_file.getAtom(atom_index); - const object = macho_file.objects.items[atom.getFile().?]; - - var subtractor: ?SymbolWithLoc = null; - - for (atom_relocs) |rel| { - const rel_type = @as(macho.reloc_type_x86_64, @enumFromInt(rel.r_type)); - - switch (rel_type) { - .X86_64_RELOC_SUBTRACTOR => { - assert(subtractor == null); - - relocs_log.debug(" RELA({s}) @ {x} => %{d} in object({?d})", .{ - @tagName(rel_type), - rel.r_address, - rel.r_symbolnum, - atom.getFile(), - }); - - subtractor = parseRelocTarget(macho_file, .{ - .object_id = atom.getFile().?, - .rel = rel, - .code = atom_code, - .base_addr = context.base_addr, - .base_offset = context.base_offset, - }); - continue; - }, - else => {}, - } - - const target = parseRelocTarget(macho_file, .{ - .object_id = atom.getFile().?, - .rel = rel, - .code = atom_code, - .base_addr = context.base_addr, - .base_offset = context.base_offset, - }); - const rel_offset = @as(u32, @intCast(rel.r_address - context.base_offset)); - - relocs_log.debug(" RELA({s}) @ {x} => %{d} ('{s}') in object({?})", .{ - @tagName(rel_type), - rel.r_address, - target.sym_index, - macho_file.getSymbolName(target), - target.getFile(), - }); - - const source_addr = blk: { - const source_sym = macho_file.getSymbol(atom.getSymbolWithLoc()); - break :blk source_sym.n_value + rel_offset; - }; - const target_addr = blk: { - if (relocRequiresGot(macho_file, rel)) break :blk macho_file.getGotEntryAddress(target).?; - if (relocIsStub(macho_file, rel) and macho_file.getSymbol(target).undf()) - break :blk macho_file.getStubsEntryAddress(target).?; - if (relocIsTlv(macho_file, rel) and macho_file.getSymbol(target).undf()) - break :blk macho_file.getTlvPtrEntryAddress(target).?; - const is_tlv = is_tlv: { - const source_sym = macho_file.getSymbol(atom.getSymbolWithLoc()); - const header = macho_file.sections.items(.header)[source_sym.n_sect - 1]; - break :is_tlv header.type() == macho.S_THREAD_LOCAL_VARIABLES; - }; - break :blk getRelocTargetAddress(macho_file, target, is_tlv); - }; - - relocs_log.debug(" | source_addr = 0x{x}", .{source_addr}); - - switch (rel_type) { - .X86_64_RELOC_BRANCH => { - const addend = mem.readInt(i32, atom_code[rel_offset..][0..4], .little); - const adjusted_target_addr = @as(u64, @intCast(@as(i64, @intCast(target_addr)) + addend)); - relocs_log.debug(" | target_addr = 0x{x}", .{adjusted_target_addr}); - const disp = try Relocation.calcPcRelativeDisplacementX86(source_addr, adjusted_target_addr, 0); - mem.writeInt(i32, atom_code[rel_offset..][0..4], disp, .little); - }, - - .X86_64_RELOC_GOT, - .X86_64_RELOC_GOT_LOAD, - => { - const addend = mem.readInt(i32, atom_code[rel_offset..][0..4], .little); - const adjusted_target_addr = @as(u64, @intCast(@as(i64, @intCast(target_addr)) + addend)); - relocs_log.debug(" | target_addr = 0x{x}", .{adjusted_target_addr}); - const disp = try Relocation.calcPcRelativeDisplacementX86(source_addr, adjusted_target_addr, 0); - mem.writeInt(i32, atom_code[rel_offset..][0..4], disp, .little); - }, - - .X86_64_RELOC_TLV => { - const addend = mem.readInt(i32, atom_code[rel_offset..][0..4], .little); - const adjusted_target_addr = @as(u64, @intCast(@as(i64, @intCast(target_addr)) + addend)); - relocs_log.debug(" | target_addr = 0x{x}", .{adjusted_target_addr}); - const disp = try Relocation.calcPcRelativeDisplacementX86(source_addr, adjusted_target_addr, 0); - - if (macho_file.tlv_ptr_table.lookup.get(target) == null) { - // We need to rewrite the opcode from movq to leaq. - atom_code[rel_offset - 2] = 0x8d; - } - - mem.writeInt(i32, atom_code[rel_offset..][0..4], disp, .little); - }, - - .X86_64_RELOC_SIGNED, - .X86_64_RELOC_SIGNED_1, - .X86_64_RELOC_SIGNED_2, - .X86_64_RELOC_SIGNED_4, - => { - const correction: u3 = switch (rel_type) { - .X86_64_RELOC_SIGNED => 0, - .X86_64_RELOC_SIGNED_1 => 1, - .X86_64_RELOC_SIGNED_2 => 2, - .X86_64_RELOC_SIGNED_4 => 4, + } + }, + + .got_load => { + assert(rel.tag == .@"extern"); + assert(rel.meta.length == 2); + assert(rel.meta.pcrel); + if (rel.getTargetSymbol(macho_file).flags.got) { + try writer.writeInt(i32, @intCast(G + A - P), .little); + } else { + try relaxGotLoad(code[rel_offset - 3 ..]); + try writer.writeInt(i32, @intCast(S + A - P), .little); + } + }, + + .tlv => { + assert(rel.tag == .@"extern"); + assert(rel.meta.length == 2); + assert(rel.meta.pcrel); + const sym = rel.getTargetSymbol(macho_file); + if (sym.flags.tlv_ptr) { + const S_: i64 = @intCast(sym.getTlvPtrAddress(macho_file)); + try writer.writeInt(i32, @intCast(S_ + A - P), .little); + } else { + try relaxTlv(code[rel_offset - 3 ..]); + try writer.writeInt(i32, @intCast(S + A - P), .little); + } + }, + + .signed, .signed1, .signed2, .signed4 => { + assert(rel.meta.length == 2); + assert(rel.meta.pcrel); + try writer.writeInt(i32, @intCast(S + A - P), .little); + }, + + .page, + .got_load_page, + .tlvp_page, + => { + assert(rel.tag == .@"extern"); + assert(rel.meta.length == 2); + assert(rel.meta.pcrel); + const sym = rel.getTargetSymbol(macho_file); + const source = math.cast(u64, P) orelse return error.Overflow; + const target = target: { + const target = switch (rel.type) { + .page => S + A, + .got_load_page => G + A, + .tlvp_page => if (sym.flags.tlv_ptr) blk: { + const S_: i64 = @intCast(sym.getTlvPtrAddress(macho_file)); + break :blk S_ + A; + } else S + A, else => unreachable, }; - var addend = mem.readInt(i32, atom_code[rel_offset..][0..4], .little) + correction; + break :target math.cast(u64, target) orelse return error.Overflow; + }; + const pages = @as(u21, @bitCast(try Relocation.calcNumberOfPages(source, target))); + var inst = aarch64.Instruction{ + .pc_relative_address = mem.bytesToValue(std.meta.TagPayload( + aarch64.Instruction, + aarch64.Instruction.pc_relative_address, + ), code[rel_offset..][0..4]), + }; + inst.pc_relative_address.immhi = @as(u19, @truncate(pages >> 2)); + inst.pc_relative_address.immlo = @as(u2, @truncate(pages)); + try writer.writeInt(u32, inst.toU32(), .little); + }, - if (rel.r_extern == 0) { - const base_addr = if (target.sym_index >= object.source_address_lookup.len) - @as(i64, @intCast(object.getSourceSection(@as(u8, @intCast(rel.r_symbolnum - 1))).addr)) - else - object.source_address_lookup[target.sym_index]; - addend += @as(i32, @intCast(@as(i64, @intCast(context.base_addr)) + rel.r_address + 4 - - @as(i64, @intCast(base_addr)))); - } - - const adjusted_target_addr = @as(u64, @intCast(@as(i64, @intCast(target_addr)) + addend)); - - relocs_log.debug(" | target_addr = 0x{x}", .{adjusted_target_addr}); - - const disp = try Relocation.calcPcRelativeDisplacementX86(source_addr, adjusted_target_addr, correction); - mem.writeInt(i32, atom_code[rel_offset..][0..4], disp, .little); - }, - - .X86_64_RELOC_UNSIGNED => { - var addend = if (rel.r_length == 3) - mem.readInt(i64, atom_code[rel_offset..][0..8], .little) - else - mem.readInt(i32, atom_code[rel_offset..][0..4], .little); - - if (rel.r_extern == 0) { - const base_addr = if (target.sym_index >= object.source_address_lookup.len) - @as(i64, @intCast(object.getSourceSection(@as(u8, @intCast(rel.r_symbolnum - 1))).addr)) - else - object.source_address_lookup[target.sym_index]; - addend -= base_addr; - } - - const result = blk: { - if (subtractor) |sub| { - const sym = macho_file.getSymbol(sub); - break :blk @as(i64, @intCast(target_addr)) - @as(i64, @intCast(sym.n_value)) + addend; - } else { - break :blk @as(i64, @intCast(target_addr)) + addend; - } + .pageoff => { + assert(rel.tag == .@"extern"); + assert(rel.meta.length == 2); + assert(!rel.meta.pcrel); + const target = math.cast(u64, S + A) orelse return error.Overflow; + const inst_code = code[rel_offset..][0..4]; + if (Relocation.isArithmeticOp(inst_code)) { + const off = try Relocation.calcPageOffset(target, .arithmetic); + var inst = aarch64.Instruction{ + .add_subtract_immediate = mem.bytesToValue(std.meta.TagPayload( + aarch64.Instruction, + aarch64.Instruction.add_subtract_immediate, + ), inst_code), }; - relocs_log.debug(" | target_addr = 0x{x}", .{result}); + inst.add_subtract_immediate.imm12 = off; + try writer.writeInt(u32, inst.toU32(), .little); + } else { + var inst = aarch64.Instruction{ + .load_store_register = mem.bytesToValue(std.meta.TagPayload( + aarch64.Instruction, + aarch64.Instruction.load_store_register, + ), inst_code), + }; + const off = try Relocation.calcPageOffset(target, switch (inst.load_store_register.size) { + 0 => if (inst.load_store_register.v == 1) + Relocation.PageOffsetInstKind.load_store_128 + else + Relocation.PageOffsetInstKind.load_store_8, + 1 => .load_store_16, + 2 => .load_store_32, + 3 => .load_store_64, + }); + inst.load_store_register.offset = off; + try writer.writeInt(u32, inst.toU32(), .little); + } + }, - if (rel.r_length == 3) { - mem.writeInt(u64, atom_code[rel_offset..][0..8], @as(u64, @bitCast(result)), .little); + .got_load_pageoff => { + assert(rel.tag == .@"extern"); + assert(rel.meta.length == 2); + assert(!rel.meta.pcrel); + const target = math.cast(u64, G + A) orelse return error.Overflow; + const off = try Relocation.calcPageOffset(target, .load_store_64); + var inst: aarch64.Instruction = .{ + .load_store_register = mem.bytesToValue(std.meta.TagPayload( + aarch64.Instruction, + aarch64.Instruction.load_store_register, + ), code[rel_offset..][0..4]), + }; + inst.load_store_register.offset = off; + try writer.writeInt(u32, inst.toU32(), .little); + }, + + .tlvp_pageoff => { + assert(rel.tag == .@"extern"); + assert(rel.meta.length == 2); + assert(!rel.meta.pcrel); + + const sym = rel.getTargetSymbol(macho_file); + const target = target: { + const target = if (sym.flags.tlv_ptr) blk: { + const S_: i64 = @intCast(sym.getTlvPtrAddress(macho_file)); + break :blk S_ + A; + } else S + A; + break :target math.cast(u64, target) orelse return error.Overflow; + }; + + const RegInfo = struct { + rd: u5, + rn: u5, + size: u2, + }; + + const inst_code = code[rel_offset..][0..4]; + const reg_info: RegInfo = blk: { + if (Relocation.isArithmeticOp(inst_code)) { + const inst = mem.bytesToValue(std.meta.TagPayload( + aarch64.Instruction, + aarch64.Instruction.add_subtract_immediate, + ), inst_code); + break :blk .{ + .rd = inst.rd, + .rn = inst.rn, + .size = inst.sf, + }; } else { - mem.writeInt(u32, atom_code[rel_offset..][0..4], @as(u32, @truncate(@as(u64, @bitCast(result)))), .little); + const inst = mem.bytesToValue(std.meta.TagPayload( + aarch64.Instruction, + aarch64.Instruction.load_store_register, + ), inst_code); + break :blk .{ + .rd = inst.rt, + .rn = inst.rn, + .size = inst.size, + }; + } + }; + + var inst = if (sym.flags.tlv_ptr) aarch64.Instruction{ + .load_store_register = .{ + .rt = reg_info.rd, + .rn = reg_info.rn, + .offset = try Relocation.calcPageOffset(target, .load_store_64), + .opc = 0b01, + .op1 = 0b01, + .v = 0, + .size = reg_info.size, + }, + } else aarch64.Instruction{ + .add_subtract_immediate = .{ + .rd = reg_info.rd, + .rn = reg_info.rn, + .imm12 = try Relocation.calcPageOffset(target, .arithmetic), + .sh = 0, + .s = 0, + .op = 0, + .sf = @as(u1, @truncate(reg_info.size)), + }, + }; + try writer.writeInt(u32, inst.toU32(), .little); + }, + } +} + +fn relaxGotLoad(code: []u8) error{RelaxFail}!void { + const old_inst = disassemble(code) orelse return error.RelaxFail; + switch (old_inst.encoding.mnemonic) { + .mov => { + const inst = Instruction.new(old_inst.prefix, .lea, &old_inst.ops) catch return error.RelaxFail; + relocs_log.debug(" relaxing {} => {}", .{ old_inst.encoding, inst.encoding }); + encode(&.{inst}, code) catch return error.RelaxFail; + }, + else => return error.RelaxFail, + } +} + +fn relaxTlv(code: []u8) error{RelaxFail}!void { + const old_inst = disassemble(code) orelse return error.RelaxFail; + switch (old_inst.encoding.mnemonic) { + .mov => { + const inst = Instruction.new(old_inst.prefix, .lea, &old_inst.ops) catch return error.RelaxFail; + relocs_log.debug(" relaxing {} => {}", .{ old_inst.encoding, inst.encoding }); + encode(&.{inst}, code) catch return error.RelaxFail; + }, + else => return error.RelaxFail, + } +} + +fn disassemble(code: []const u8) ?Instruction { + var disas = Disassembler.init(code); + const inst = disas.next() catch return null; + return inst; +} + +fn encode(insts: []const Instruction, code: []u8) !void { + var stream = std.io.fixedBufferStream(code); + const writer = stream.writer(); + for (insts) |inst| { + try inst.encode(writer, .{}); + } +} + +pub fn calcNumRelocs(self: Atom, macho_file: *MachO) u32 { + switch (macho_file.options.cpu_arch.?) { + .aarch64 => { + var nreloc: u32 = 0; + for (self.getRelocs(macho_file)) |rel| { + nreloc += 1; + switch (rel.type) { + .page, .pageoff => if (rel.addend > 0) { + nreloc += 1; + }, + else => {}, + } + } + return nreloc; + }, + .x86_64 => return @intCast(self.getRelocs(macho_file).len), + else => unreachable, + } +} + +pub fn writeRelocs(self: Atom, macho_file: *MachO, code: []u8, buffer: *std.ArrayList(macho.relocation_info)) !void { + const tracy = trace(@src()); + defer tracy.end(); + + const cpu_arch = macho_file.options.cpu_arch.?; + const relocs = self.getRelocs(macho_file); + const sect = macho_file.sections.items(.header)[self.out_n_sect]; + var stream = std.io.fixedBufferStream(code); + + for (relocs) |rel| { + const rel_offset = rel.offset - self.off; + const r_address: i32 = math.cast(i32, self.value + rel_offset - sect.addr) orelse return error.Overflow; + const r_symbolnum = r_symbolnum: { + const r_symbolnum: u32 = switch (rel.tag) { + .local => rel.getTargetAtom(macho_file).out_n_sect + 1, + .@"extern" => rel.getTargetSymbol(macho_file).getOutputSymtabIndex(macho_file).?, + }; + break :r_symbolnum math.cast(u24, r_symbolnum) orelse return error.Overflow; + }; + const r_extern = rel.tag == .@"extern"; + var addend = rel.addend + rel.getRelocAddend(cpu_arch); + if (rel.tag == .local) { + const target: i64 = @intCast(rel.getTargetAddress(macho_file)); + addend += target; + } + + try stream.seekTo(rel_offset); + + switch (cpu_arch) { + .aarch64 => { + if (rel.type == .unsigned) switch (rel.meta.length) { + 0, 1 => unreachable, + 2 => try stream.writer().writeInt(i32, @truncate(addend), .little), + 3 => try stream.writer().writeInt(i64, addend, .little), + } else if (addend > 0) { + buffer.appendAssumeCapacity(.{ + .r_address = r_address, + .r_symbolnum = @bitCast(math.cast(i24, addend) orelse return error.Overflow), + .r_pcrel = 0, + .r_length = 2, + .r_extern = 0, + .r_type = @intFromEnum(macho.reloc_type_arm64.ARM64_RELOC_ADDEND), + }); } - subtractor = null; - }, + const r_type: macho.reloc_type_arm64 = switch (rel.type) { + .page => .ARM64_RELOC_PAGE21, + .pageoff => .ARM64_RELOC_PAGEOFF12, + .got_load_page => .ARM64_RELOC_GOT_LOAD_PAGE21, + .got_load_pageoff => .ARM64_RELOC_GOT_LOAD_PAGEOFF12, + .tlvp_page => .ARM64_RELOC_TLVP_LOAD_PAGE21, + .tlvp_pageoff => .ARM64_RELOC_TLVP_LOAD_PAGEOFF12, + .branch => .ARM64_RELOC_BRANCH26, + .got => .ARM64_RELOC_POINTER_TO_GOT, + .subtractor => .ARM64_RELOC_SUBTRACTOR, + .unsigned => .ARM64_RELOC_UNSIGNED, - .X86_64_RELOC_SUBTRACTOR => unreachable, + .signed, + .signed1, + .signed2, + .signed4, + .got_load, + .tlv, + => unreachable, + }; + buffer.appendAssumeCapacity(.{ + .r_address = r_address, + .r_symbolnum = r_symbolnum, + .r_pcrel = @intFromBool(rel.meta.pcrel), + .r_extern = @intFromBool(r_extern), + .r_length = rel.meta.length, + .r_type = @intFromEnum(r_type), + }); + }, + .x86_64 => { + if (rel.meta.pcrel) { + if (rel.tag == .local) { + addend -= @as(i64, @intCast(self.value + rel_offset)); + } else { + addend += 4; + } + } + switch (rel.meta.length) { + 0, 1 => unreachable, + 2 => try stream.writer().writeInt(i32, @truncate(addend), .little), + 3 => try stream.writer().writeInt(i64, addend, .little), + } + + const r_type: macho.reloc_type_x86_64 = switch (rel.type) { + .signed => .X86_64_RELOC_SIGNED, + .signed1 => .X86_64_RELOC_SIGNED_1, + .signed2 => .X86_64_RELOC_SIGNED_2, + .signed4 => .X86_64_RELOC_SIGNED_4, + .got_load => .X86_64_RELOC_GOT_LOAD, + .tlv => .X86_64_RELOC_TLV, + .branch => .X86_64_RELOC_BRANCH, + .got => .X86_64_RELOC_GOT, + .subtractor => .X86_64_RELOC_SUBTRACTOR, + .unsigned => .X86_64_RELOC_UNSIGNED, + + .page, + .pageoff, + .got_load_page, + .got_load_pageoff, + .tlvp_page, + .tlvp_pageoff, + => unreachable, + }; + buffer.appendAssumeCapacity(.{ + .r_address = r_address, + .r_symbolnum = r_symbolnum, + .r_pcrel = @intFromBool(rel.meta.pcrel), + .r_extern = @intFromBool(r_extern), + .r_length = rel.meta.length, + .r_type = @intFromEnum(r_type), + }); + }, + else => unreachable, } } } -pub fn getAtomCode(macho_file: *MachO, atom_index: Index) []const u8 { - const atom = macho_file.getAtom(atom_index); - assert(atom.getFile() != null); // Synthetic atom shouldn't need to inquire for code. - const object = macho_file.objects.items[atom.getFile().?]; - const source_sym = object.getSourceSymbol(atom.sym_index) orelse { - // If there was no matching symbol present in the source symtab, this means - // we are dealing with either an entire section, or part of it, but also - // starting at the beginning. - const nbase = @as(u32, @intCast(object.in_symtab.?.len)); - const sect_id = @as(u8, @intCast(atom.sym_index - nbase)); - const source_sect = object.getSourceSection(sect_id); - assert(!source_sect.isZerofill()); - const code = object.getSectionContents(source_sect); - const code_len = @as(usize, @intCast(atom.size)); - return code[0..code_len]; - }; - const source_sect = object.getSourceSection(source_sym.n_sect - 1); - assert(!source_sect.isZerofill()); - const code = object.getSectionContents(source_sect); - const offset = @as(usize, @intCast(source_sym.n_value - source_sect.addr)); - const code_len = @as(usize, @intCast(atom.size)); - return code[offset..][0..code_len]; +pub fn format( + atom: Atom, + comptime unused_fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, +) !void { + _ = atom; + _ = unused_fmt_string; + _ = options; + _ = writer; + @compileError("do not format Atom directly"); } -pub fn getAtomRelocs(macho_file: *MachO, atom_index: Index) []const macho.relocation_info { - const atom = macho_file.getAtom(atom_index); - assert(atom.getFile() != null); // Synthetic atom shouldn't need to unique for relocs. - const object = macho_file.objects.items[atom.getFile().?]; - const cache = object.relocs_lookup[atom.sym_index]; - - const source_sect_id = if (object.getSourceSymbol(atom.sym_index)) |source_sym| blk: { - break :blk source_sym.n_sect - 1; - } else blk: { - // If there was no matching symbol present in the source symtab, this means - // we are dealing with either an entire section, or part of it, but also - // starting at the beginning. - const nbase = @as(u32, @intCast(object.in_symtab.?.len)); - const sect_id = @as(u8, @intCast(atom.sym_index - nbase)); - break :blk sect_id; - }; - const source_sect = object.getSourceSection(source_sect_id); - assert(!source_sect.isZerofill()); - const relocs = object.getRelocs(source_sect_id); - return relocs[cache.start..][0..cache.len]; +pub fn fmt(atom: Atom, macho_file: *MachO) std.fmt.Formatter(format2) { + return .{ .data = .{ + .atom = atom, + .macho_file = macho_file, + } }; } -pub fn relocRequiresGot(macho_file: *MachO, rel: macho.relocation_info) bool { - const target = macho_file.base.comp.root_mod.resolved_target.result; - switch (target.cpu.arch) { - .aarch64 => switch (@as(macho.reloc_type_arm64, @enumFromInt(rel.r_type))) { - .ARM64_RELOC_GOT_LOAD_PAGE21, - .ARM64_RELOC_GOT_LOAD_PAGEOFF12, - .ARM64_RELOC_POINTER_TO_GOT, - => return true, - else => return false, - }, - .x86_64 => switch (@as(macho.reloc_type_x86_64, @enumFromInt(rel.r_type))) { - .X86_64_RELOC_GOT, - .X86_64_RELOC_GOT_LOAD, - => return true, - else => return false, - }, - else => unreachable, +const FormatContext = struct { + atom: Atom, + macho_file: *MachO, +}; + +fn format2( + ctx: FormatContext, + comptime unused_fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, +) !void { + _ = options; + _ = unused_fmt_string; + const atom = ctx.atom; + const macho_file = ctx.macho_file; + try writer.print("atom({d}) : {s} : @{x} : sect({d}) : align({x}) : size({x}) : thunk({d})", .{ + atom.atom_index, atom.getName(macho_file), atom.value, + atom.out_n_sect, atom.alignment, atom.size, + atom.thunk_index, + }); + if (!atom.flags.alive) try writer.writeAll(" : [*]"); + if (atom.unwind_records.len > 0) { + try writer.writeAll(" : unwind{ "); + for (atom.getUnwindRecords(macho_file), atom.unwind_records.pos..) |index, i| { + const rec = macho_file.getUnwindRecord(index); + try writer.print("{d}", .{index}); + if (!rec.alive) try writer.writeAll("([*])"); + if (i < atom.unwind_records.pos + atom.unwind_records.len - 1) try writer.writeAll(", "); + } + try writer.writeAll(" }"); } } -pub fn relocIsTlv(macho_file: *MachO, rel: macho.relocation_info) bool { - const target = macho_file.base.comp.root_mod.resolved_target.result; - switch (target.cpu.arch) { - .aarch64 => switch (@as(macho.reloc_type_arm64, @enumFromInt(rel.r_type))) { - .ARM64_RELOC_TLVP_LOAD_PAGE21, - .ARM64_RELOC_TLVP_LOAD_PAGEOFF12, - => return true, - else => return false, - }, - .x86_64 => switch (@as(macho.reloc_type_x86_64, @enumFromInt(rel.r_type))) { - .X86_64_RELOC_TLV => return true, - else => return false, - }, - else => unreachable, - } -} +pub const Index = u32; -pub fn relocIsStub(macho_file: *MachO, rel: macho.relocation_info) bool { - const target = macho_file.base.comp.root_mod.resolved_target.result; - switch (target.cpu.arch) { - .aarch64 => switch (@as(macho.reloc_type_arm64, @enumFromInt(rel.r_type))) { - .ARM64_RELOC_BRANCH26 => return true, - else => return false, - }, - .x86_64 => switch (@as(macho.reloc_type_x86_64, @enumFromInt(rel.r_type))) { - .X86_64_RELOC_BRANCH => return true, - else => return false, - }, - else => unreachable, - } -} +pub const Flags = packed struct { + /// Specifies whether this atom is alive or has been garbage collected. + alive: bool = true, -const Atom = @This(); + /// Specifies if the atom has been visited during garbage collection. + visited: bool = false, +}; -const std = @import("std"); -const build_options = @import("build_options"); -const aarch64 = @import("../../arch/aarch64/bits.zig"); +pub const Loc = struct { + pos: usize = 0, + len: usize = 0, +}; + +const aarch64 = @import("../aarch64.zig"); const assert = std.debug.assert; -const log = std.log.scoped(.link); -const relocs_log = std.log.scoped(.link_relocs); +const bind = @import("dyld_info/bind.zig"); +const dis_x86_64 = @import("dis_x86_64"); const macho = std.macho; const math = std.math; const mem = std.mem; -const meta = std.meta; -const trace = @import("../../tracy.zig").trace; +const log = std.log.scoped(.link); +const relocs_log = std.log.scoped(.relocs); +const std = @import("std"); +const trace = @import("../tracy.zig").trace; const Allocator = mem.Allocator; -const Arch = std.Target.Cpu.Arch; +const Atom = @This(); +const Disassembler = dis_x86_64.Disassembler; +const File = @import("file.zig").File; +const Instruction = dis_x86_64.Instruction; +const Immediate = dis_x86_64.Immediate; const MachO = @import("../MachO.zig"); -pub const Relocation = @import("Relocation.zig"); -const SymbolWithLoc = MachO.SymbolWithLoc; +const Object = @import("Object.zig"); +const Relocation = @import("Relocation.zig"); +const Symbol = @import("Symbol.zig"); +const Thunk = @import("thunks.zig").Thunk; +const UnwindInfo = @import("UnwindInfo.zig"); diff --git a/src/link/MachO/CodeSignature.zig b/src/link/MachO/CodeSignature.zig index 0f49ee6a64..ce142b4376 100644 --- a/src/link/MachO/CodeSignature.zig +++ b/src/link/MachO/CodeSignature.zig @@ -1,175 +1,17 @@ -page_size: u16, -code_directory: CodeDirectory, -requirements: ?Requirements = null, -entitlements: ?Entitlements = null, -signature: ?Signature = null, +const CodeSignature = @This(); -pub fn init(page_size: u16) CodeSignature { - return .{ - .page_size = page_size, - .code_directory = CodeDirectory.init(page_size), - }; -} - -pub fn deinit(self: *CodeSignature, allocator: Allocator) void { - self.code_directory.deinit(allocator); - if (self.requirements) |*req| { - req.deinit(allocator); - } - if (self.entitlements) |*ents| { - ents.deinit(allocator); - } - if (self.signature) |*sig| { - sig.deinit(allocator); - } -} - -pub fn addEntitlements(self: *CodeSignature, allocator: Allocator, path: []const u8) !void { - const file = try fs.cwd().openFile(path, .{}); - defer file.close(); - const inner = try file.readToEndAlloc(allocator, std.math.maxInt(u32)); - self.entitlements = .{ .inner = inner }; -} - -pub const WriteOpts = struct { - file: fs.File, - exec_seg_base: u64, - exec_seg_limit: u64, - file_size: u32, - output_mode: std.builtin.OutputMode, -}; - -pub fn writeAdhocSignature( - self: *CodeSignature, - comp: *const Compilation, - opts: WriteOpts, - writer: anytype, -) !void { - const gpa = comp.gpa; - - var header: macho.SuperBlob = .{ - .magic = macho.CSMAGIC_EMBEDDED_SIGNATURE, - .length = @sizeOf(macho.SuperBlob), - .count = 0, - }; - - var blobs = std.ArrayList(Blob).init(gpa); - defer blobs.deinit(); - - self.code_directory.inner.execSegBase = opts.exec_seg_base; - self.code_directory.inner.execSegLimit = opts.exec_seg_limit; - self.code_directory.inner.execSegFlags = if (opts.output_mode == .Exe) macho.CS_EXECSEG_MAIN_BINARY else 0; - self.code_directory.inner.codeLimit = opts.file_size; - - const total_pages = @as(u32, @intCast(mem.alignForward(usize, opts.file_size, self.page_size) / self.page_size)); - - try self.code_directory.code_slots.ensureTotalCapacityPrecise(gpa, total_pages); - self.code_directory.code_slots.items.len = total_pages; - self.code_directory.inner.nCodeSlots = total_pages; - - // Calculate hash for each page (in file) and write it to the buffer - var hasher = Hasher(Sha256){ .allocator = gpa, .thread_pool = comp.thread_pool }; - try hasher.hash(opts.file, self.code_directory.code_slots.items, .{ - .chunk_size = self.page_size, - .max_file_size = opts.file_size, - }); - - try blobs.append(.{ .code_directory = &self.code_directory }); - header.length += @sizeOf(macho.BlobIndex); - header.count += 1; - - var hash: [hash_size]u8 = undefined; - - if (self.requirements) |*req| { - var buf = std.ArrayList(u8).init(gpa); - defer buf.deinit(); - try req.write(buf.writer()); - Sha256.hash(buf.items, &hash, .{}); - self.code_directory.addSpecialHash(req.slotType(), hash); - - try blobs.append(.{ .requirements = req }); - header.count += 1; - header.length += @sizeOf(macho.BlobIndex) + req.size(); - } - - if (self.entitlements) |*ents| { - var buf = std.ArrayList(u8).init(gpa); - defer buf.deinit(); - try ents.write(buf.writer()); - Sha256.hash(buf.items, &hash, .{}); - self.code_directory.addSpecialHash(ents.slotType(), hash); - - try blobs.append(.{ .entitlements = ents }); - header.count += 1; - header.length += @sizeOf(macho.BlobIndex) + ents.size(); - } - - if (self.signature) |*sig| { - try blobs.append(.{ .signature = sig }); - header.count += 1; - header.length += @sizeOf(macho.BlobIndex) + sig.size(); - } - - self.code_directory.inner.hashOffset = - @sizeOf(macho.CodeDirectory) + @as(u32, @intCast(self.code_directory.ident.len + 1 + self.code_directory.inner.nSpecialSlots * hash_size)); - self.code_directory.inner.length = self.code_directory.size(); - header.length += self.code_directory.size(); - - try writer.writeInt(u32, header.magic, .big); - try writer.writeInt(u32, header.length, .big); - try writer.writeInt(u32, header.count, .big); - - var offset: u32 = @sizeOf(macho.SuperBlob) + @sizeOf(macho.BlobIndex) * @as(u32, @intCast(blobs.items.len)); - for (blobs.items) |blob| { - try writer.writeInt(u32, blob.slotType(), .big); - try writer.writeInt(u32, offset, .big); - offset += blob.size(); - } - - for (blobs.items) |blob| { - try blob.write(writer); - } -} - -pub fn size(self: CodeSignature) u32 { - var ssize: u32 = @sizeOf(macho.SuperBlob) + @sizeOf(macho.BlobIndex) + self.code_directory.size(); - if (self.requirements) |req| { - ssize += @sizeOf(macho.BlobIndex) + req.size(); - } - if (self.entitlements) |ent| { - ssize += @sizeOf(macho.BlobIndex) + ent.size(); - } - if (self.signature) |sig| { - ssize += @sizeOf(macho.BlobIndex) + sig.size(); - } - return ssize; -} - -pub fn estimateSize(self: CodeSignature, file_size: u64) u32 { - var ssize: u64 = @sizeOf(macho.SuperBlob) + @sizeOf(macho.BlobIndex) + self.code_directory.size(); - // Approx code slots - const total_pages = mem.alignForward(u64, file_size, self.page_size) / self.page_size; - ssize += total_pages * hash_size; - var n_special_slots: u32 = 0; - if (self.requirements) |req| { - ssize += @sizeOf(macho.BlobIndex) + req.size(); - n_special_slots = @max(n_special_slots, req.slotType()); - } - if (self.entitlements) |ent| { - ssize += @sizeOf(macho.BlobIndex) + ent.size() + hash_size; - n_special_slots = @max(n_special_slots, ent.slotType()); - } - if (self.signature) |sig| { - ssize += @sizeOf(macho.BlobIndex) + sig.size(); - } - ssize += n_special_slots * hash_size; - return @as(u32, @intCast(mem.alignForward(u64, ssize, @sizeOf(u64)))); -} - -pub fn clear(self: *CodeSignature, allocator: Allocator) void { - self.code_directory.deinit(allocator); - self.code_directory = CodeDirectory.init(self.page_size); -} +const std = @import("std"); +const assert = std.debug.assert; +const fs = std.fs; +const log = std.log.scoped(.link); +const macho = std.macho; +const mem = std.mem; +const testing = std.testing; +const Allocator = mem.Allocator; +const Hasher = @import("hasher.zig").ParallelHasher; +const MachO = @import("../MachO.zig"); +const Sha256 = std.crypto.hash.sha2.Sha256; +const Zld = @import("../Zld.zig"); const hash_size = Sha256.digest_length; @@ -257,7 +99,7 @@ const CodeDirectory = struct { fn addSpecialHash(self: *CodeDirectory, index: u32, hash: [hash_size]u8) void { assert(index > 0); self.inner.nSpecialSlots = @max(self.inner.nSpecialSlots, index); - self.special_slots[index - 1] = hash; + @memcpy(&self.special_slots[index - 1], &hash); } fn slotType(self: CodeDirectory) u32 { @@ -376,17 +218,175 @@ const Signature = struct { } }; -const CodeSignature = @This(); +page_size: u16, +code_directory: CodeDirectory, +requirements: ?Requirements = null, +entitlements: ?Entitlements = null, +signature: ?Signature = null, -const std = @import("std"); -const assert = std.debug.assert; -const fs = std.fs; -const log = std.log.scoped(.link); -const macho = std.macho; -const mem = std.mem; -const testing = std.testing; +pub fn init(page_size: u16) CodeSignature { + return .{ + .page_size = page_size, + .code_directory = CodeDirectory.init(page_size), + }; +} -const Allocator = mem.Allocator; -const Compilation = @import("../../Compilation.zig"); -const Hasher = @import("hasher.zig").ParallelHasher; -const Sha256 = std.crypto.hash.sha2.Sha256; +pub fn deinit(self: *CodeSignature, allocator: Allocator) void { + self.code_directory.deinit(allocator); + if (self.requirements) |*req| { + req.deinit(allocator); + } + if (self.entitlements) |*ents| { + ents.deinit(allocator); + } + if (self.signature) |*sig| { + sig.deinit(allocator); + } +} + +pub fn addEntitlements(self: *CodeSignature, allocator: Allocator, path: []const u8) !void { + const file = try fs.cwd().openFile(path, .{}); + defer file.close(); + const inner = try file.readToEndAlloc(allocator, std.math.maxInt(u32)); + self.entitlements = .{ .inner = inner }; +} + +pub const WriteOpts = struct { + file: fs.File, + exec_seg_base: u64, + exec_seg_limit: u64, + file_size: u32, + dylib: bool, +}; + +pub fn writeAdhocSignature( + self: *CodeSignature, + macho_file: *MachO, + opts: WriteOpts, + writer: anytype, +) !void { + const allocator = macho_file.base.allocator; + + var header: macho.SuperBlob = .{ + .magic = macho.CSMAGIC_EMBEDDED_SIGNATURE, + .length = @sizeOf(macho.SuperBlob), + .count = 0, + }; + + var blobs = std.ArrayList(Blob).init(allocator); + defer blobs.deinit(); + + self.code_directory.inner.execSegBase = opts.exec_seg_base; + self.code_directory.inner.execSegLimit = opts.exec_seg_limit; + self.code_directory.inner.execSegFlags = if (!opts.dylib) macho.CS_EXECSEG_MAIN_BINARY else 0; + self.code_directory.inner.codeLimit = opts.file_size; + + const total_pages = @as(u32, @intCast(mem.alignForward(usize, opts.file_size, self.page_size) / self.page_size)); + + try self.code_directory.code_slots.ensureTotalCapacityPrecise(allocator, total_pages); + self.code_directory.code_slots.items.len = total_pages; + self.code_directory.inner.nCodeSlots = total_pages; + + // Calculate hash for each page (in file) and write it to the buffer + var hasher = Hasher(Sha256){ .allocator = allocator, .thread_pool = macho_file.base.thread_pool }; + try hasher.hash(opts.file, self.code_directory.code_slots.items, .{ + .chunk_size = self.page_size, + .max_file_size = opts.file_size, + }); + + try blobs.append(.{ .code_directory = &self.code_directory }); + header.length += @sizeOf(macho.BlobIndex); + header.count += 1; + + var hash: [hash_size]u8 = undefined; + + if (self.requirements) |*req| { + var buf = std.ArrayList(u8).init(allocator); + defer buf.deinit(); + try req.write(buf.writer()); + Sha256.hash(buf.items, &hash, .{}); + self.code_directory.addSpecialHash(req.slotType(), hash); + + try blobs.append(.{ .requirements = req }); + header.count += 1; + header.length += @sizeOf(macho.BlobIndex) + req.size(); + } + + if (self.entitlements) |*ents| { + var buf = std.ArrayList(u8).init(allocator); + defer buf.deinit(); + try ents.write(buf.writer()); + Sha256.hash(buf.items, &hash, .{}); + self.code_directory.addSpecialHash(ents.slotType(), hash); + + try blobs.append(.{ .entitlements = ents }); + header.count += 1; + header.length += @sizeOf(macho.BlobIndex) + ents.size(); + } + + if (self.signature) |*sig| { + try blobs.append(.{ .signature = sig }); + header.count += 1; + header.length += @sizeOf(macho.BlobIndex) + sig.size(); + } + + self.code_directory.inner.hashOffset = + @sizeOf(macho.CodeDirectory) + @as(u32, @intCast(self.code_directory.ident.len + 1 + self.code_directory.inner.nSpecialSlots * hash_size)); + self.code_directory.inner.length = self.code_directory.size(); + header.length += self.code_directory.size(); + + try writer.writeInt(u32, header.magic, .big); + try writer.writeInt(u32, header.length, .big); + try writer.writeInt(u32, header.count, .big); + + var offset: u32 = @sizeOf(macho.SuperBlob) + @sizeOf(macho.BlobIndex) * @as(u32, @intCast(blobs.items.len)); + for (blobs.items) |blob| { + try writer.writeInt(u32, blob.slotType(), .big); + try writer.writeInt(u32, offset, .big); + offset += blob.size(); + } + + for (blobs.items) |blob| { + try blob.write(writer); + } +} + +pub fn size(self: CodeSignature) u32 { + var ssize: u32 = @sizeOf(macho.SuperBlob) + @sizeOf(macho.BlobIndex) + self.code_directory.size(); + if (self.requirements) |req| { + ssize += @sizeOf(macho.BlobIndex) + req.size(); + } + if (self.entitlements) |ent| { + ssize += @sizeOf(macho.BlobIndex) + ent.size(); + } + if (self.signature) |sig| { + ssize += @sizeOf(macho.BlobIndex) + sig.size(); + } + return ssize; +} + +pub fn estimateSize(self: CodeSignature, file_size: u64) u32 { + var ssize: u64 = @sizeOf(macho.SuperBlob) + @sizeOf(macho.BlobIndex) + self.code_directory.size(); + // Approx code slots + const total_pages = mem.alignForward(u64, file_size, self.page_size) / self.page_size; + ssize += total_pages * hash_size; + var n_special_slots: u32 = 0; + if (self.requirements) |req| { + ssize += @sizeOf(macho.BlobIndex) + req.size(); + n_special_slots = @max(n_special_slots, req.slotType()); + } + if (self.entitlements) |ent| { + ssize += @sizeOf(macho.BlobIndex) + ent.size() + hash_size; + n_special_slots = @max(n_special_slots, ent.slotType()); + } + if (self.signature) |sig| { + ssize += @sizeOf(macho.BlobIndex) + sig.size(); + } + ssize += n_special_slots * hash_size; + return @as(u32, @intCast(mem.alignForward(u64, ssize, @sizeOf(u64)))); +} + +pub fn clear(self: *CodeSignature, allocator: Allocator) void { + self.code_directory.deinit(allocator); + self.code_directory = CodeDirectory.init(self.page_size); +} diff --git a/src/link/MachO/DwarfInfo.zig b/src/link/MachO/DwarfInfo.zig index a1e0ae458a..c3f8d235ce 100644 --- a/src/link/MachO/DwarfInfo.zig +++ b/src/link/MachO/DwarfInfo.zig @@ -2,377 +2,175 @@ debug_info: []const u8, debug_abbrev: []const u8, debug_str: []const u8, -pub fn getCompileUnitIterator(self: DwarfInfo) CompileUnitIterator { - return .{ .ctx = self }; +/// Abbreviation table indexed by offset in the .debug_abbrev bytestream +abbrev_tables: std.AutoArrayHashMapUnmanaged(u64, AbbrevTable) = .{}, +/// List of compile units as they appear in the .debug_info bytestream +compile_units: std.ArrayListUnmanaged(CompileUnit) = .{}, + +pub fn init(dw: *DwarfInfo, allocator: Allocator) !void { + try dw.parseAbbrevTables(allocator); + try dw.parseCompileUnits(allocator); } -const CompileUnitIterator = struct { - ctx: DwarfInfo, - pos: usize = 0, - - pub fn next(self: *CompileUnitIterator) !?CompileUnit { - if (self.pos >= self.ctx.debug_info.len) return null; - - var stream = std.io.fixedBufferStream(self.ctx.debug_info[self.pos..]); - var creader = std.io.countingReader(stream.reader()); - const reader = creader.reader(); - - const cuh = try CompileUnit.Header.read(reader); - const total_length = cuh.length + @as(u64, if (cuh.is_64bit) @sizeOf(u64) else @sizeOf(u32)); - const offset = math.cast(usize, creader.bytes_read) orelse return error.Overflow; - - const cu = CompileUnit{ - .cuh = cuh, - .debug_info_off = self.pos + offset, - }; - - self.pos += (math.cast(usize, total_length) orelse return error.Overflow); - - return cu; +pub fn deinit(dw: *DwarfInfo, allocator: Allocator) void { + dw.abbrev_tables.deinit(allocator); + for (dw.compile_units.items) |*cu| { + cu.deinit(allocator); } -}; - -pub fn genSubprogramLookupByName( - self: DwarfInfo, - compile_unit: CompileUnit, - abbrev_lookup: AbbrevLookupTable, - lookup: *SubprogramLookupByName, -) !void { - var abbrev_it = compile_unit.getAbbrevEntryIterator(self); - while (try abbrev_it.next(abbrev_lookup)) |entry| switch (entry.tag) { - dwarf.TAG.subprogram => { - var attr_it = entry.getAttributeIterator(self, compile_unit.cuh); - - var name: ?[]const u8 = null; - var low_pc: ?u64 = null; - var high_pc: ?u64 = null; - - while (try attr_it.next()) |attr| switch (attr.name) { - dwarf.AT.name => if (attr.getString(self, compile_unit.cuh)) |str| { - name = str; - }, - dwarf.AT.low_pc => { - if (attr.getAddr(self, compile_unit.cuh)) |addr| { - low_pc = addr; - } - if (try attr.getConstant(self)) |constant| { - low_pc = @as(u64, @intCast(constant)); - } - }, - dwarf.AT.high_pc => { - if (attr.getAddr(self, compile_unit.cuh)) |addr| { - high_pc = addr; - } - if (try attr.getConstant(self)) |constant| { - high_pc = @as(u64, @intCast(constant)); - } - }, - else => {}, - }; - - if (name == null or low_pc == null or high_pc == null) continue; - - try lookup.putNoClobber(name.?, .{ .addr = low_pc.?, .size = high_pc.? }); - }, - else => {}, - }; + dw.compile_units.deinit(allocator); } -pub fn genAbbrevLookupByKind(self: DwarfInfo, off: usize, lookup: *AbbrevLookupTable) !void { - const data = self.debug_abbrev[off..]; - var stream = std.io.fixedBufferStream(data); - var creader = std.io.countingReader(stream.reader()); - const reader = creader.reader(); - - while (true) { - const kind = try leb.readULEB128(u64, reader); - - if (kind == 0) break; - - const pos = math.cast(usize, creader.bytes_read) orelse return error.Overflow; - _ = try leb.readULEB128(u64, reader); // TAG - _ = try reader.readByte(); // CHILDREN - - while (true) { - const name = try leb.readULEB128(u64, reader); - const form = try leb.readULEB128(u64, reader); - - if (name == 0 and form == 0) break; - } - - const next_pos = math.cast(usize, creader.bytes_read) orelse return error.Overflow; - - try lookup.putNoClobber(kind, .{ - .pos = pos, - .len = next_pos - pos - 2, - }); - } +fn getString(dw: DwarfInfo, off: u64) [:0]const u8 { + assert(off < dw.debug_str.len); + return mem.sliceTo(@as([*:0]const u8, @ptrCast(dw.debug_str.ptr + off)), 0); } -pub const CompileUnit = struct { - cuh: Header, - debug_info_off: usize, +fn parseAbbrevTables(dw: *DwarfInfo, allocator: Allocator) !void { + const tracy = trace(@src()); + defer tracy.end(); - pub const Header = struct { - is_64bit: bool, - length: u64, - version: u16, - debug_abbrev_offset: u64, - address_size: u8, - - fn read(reader: anytype) !Header { - var length: u64 = try reader.readInt(u32, .little); - - const is_64bit = length == 0xffffffff; - if (is_64bit) { - length = try reader.readInt(u64, .little); - } - - const version = try reader.readInt(u16, .little); - const debug_abbrev_offset = if (is_64bit) - try reader.readInt(u64, .little) - else - try reader.readInt(u32, .little); - const address_size = try reader.readInt(u8, .little); - - return Header{ - .is_64bit = is_64bit, - .length = length, - .version = version, - .debug_abbrev_offset = debug_abbrev_offset, - .address_size = address_size, - }; - } - }; - - inline fn getDebugInfo(self: CompileUnit, ctx: DwarfInfo) []const u8 { - return ctx.debug_info[self.debug_info_off..][0..self.cuh.length]; - } - - pub fn getAbbrevEntryIterator(self: CompileUnit, ctx: DwarfInfo) AbbrevEntryIterator { - return .{ .cu = self, .ctx = ctx }; - } -}; - -const AbbrevEntryIterator = struct { - cu: CompileUnit, - ctx: DwarfInfo, - pos: usize = 0, - - pub fn next(self: *AbbrevEntryIterator, lookup: AbbrevLookupTable) !?AbbrevEntry { - if (self.pos + self.cu.debug_info_off >= self.ctx.debug_info.len) return null; - - const debug_info = self.ctx.debug_info[self.pos + self.cu.debug_info_off ..]; - var stream = std.io.fixedBufferStream(debug_info); - var creader = std.io.countingReader(stream.reader()); - const reader = creader.reader(); - - const kind = try leb.readULEB128(u64, reader); - self.pos += (math.cast(usize, creader.bytes_read) orelse return error.Overflow); - - if (kind == 0) { - return AbbrevEntry.null(); - } - - const abbrev_pos = lookup.get(kind) orelse return null; - const len = try findAbbrevEntrySize( - self.ctx, - abbrev_pos.pos, - abbrev_pos.len, - self.pos + self.cu.debug_info_off, - self.cu.cuh, - ); - const entry = try getAbbrevEntry( - self.ctx, - abbrev_pos.pos, - abbrev_pos.len, - self.pos + self.cu.debug_info_off, - len, - ); - - self.pos += len; - - return entry; - } -}; - -pub const AbbrevEntry = struct { - tag: u64, - children: u8, - debug_abbrev_off: usize, - debug_abbrev_len: usize, - debug_info_off: usize, - debug_info_len: usize, - - fn @"null"() AbbrevEntry { - return .{ - .tag = 0, - .children = dwarf.CHILDREN.no, - .debug_abbrev_off = 0, - .debug_abbrev_len = 0, - .debug_info_off = 0, - .debug_info_len = 0, - }; - } - - pub fn hasChildren(self: AbbrevEntry) bool { - return self.children == dwarf.CHILDREN.yes; - } - - inline fn getDebugInfo(self: AbbrevEntry, ctx: DwarfInfo) []const u8 { - return ctx.debug_info[self.debug_info_off..][0..self.debug_info_len]; - } - - inline fn getDebugAbbrev(self: AbbrevEntry, ctx: DwarfInfo) []const u8 { - return ctx.debug_abbrev[self.debug_abbrev_off..][0..self.debug_abbrev_len]; - } - - pub fn getAttributeIterator(self: AbbrevEntry, ctx: DwarfInfo, cuh: CompileUnit.Header) AttributeIterator { - return .{ .entry = self, .ctx = ctx, .cuh = cuh }; - } -}; - -pub const Attribute = struct { - name: u64, - form: u64, - debug_info_off: usize, - debug_info_len: usize, - - inline fn getDebugInfo(self: Attribute, ctx: DwarfInfo) []const u8 { - return ctx.debug_info[self.debug_info_off..][0..self.debug_info_len]; - } - - pub fn getString(self: Attribute, ctx: DwarfInfo, cuh: CompileUnit.Header) ?[]const u8 { - const debug_info = self.getDebugInfo(ctx); - - switch (self.form) { - dwarf.FORM.string => { - return mem.sliceTo(@as([*:0]const u8, @ptrCast(debug_info.ptr)), 0); - }, - dwarf.FORM.strp => { - const off = if (cuh.is_64bit) - mem.readInt(u64, debug_info[0..8], .little) - else - mem.readInt(u32, debug_info[0..4], .little); - return ctx.getString(off); - }, - else => return null, - } - } - - pub fn getConstant(self: Attribute, ctx: DwarfInfo) !?i128 { - const debug_info = self.getDebugInfo(ctx); - var stream = std.io.fixedBufferStream(debug_info); - const reader = stream.reader(); - - return switch (self.form) { - dwarf.FORM.data1 => debug_info[0], - dwarf.FORM.data2 => mem.readInt(u16, debug_info[0..2], .little), - dwarf.FORM.data4 => mem.readInt(u32, debug_info[0..4], .little), - dwarf.FORM.data8 => mem.readInt(u64, debug_info[0..8], .little), - dwarf.FORM.udata => try leb.readULEB128(u64, reader), - dwarf.FORM.sdata => try leb.readILEB128(i64, reader), - else => null, - }; - } - - pub fn getAddr(self: Attribute, ctx: DwarfInfo, cuh: CompileUnit.Header) ?u64 { - if (self.form != dwarf.FORM.addr) return null; - const debug_info = self.getDebugInfo(ctx); - return switch (cuh.address_size) { - 1 => debug_info[0], - 2 => mem.readInt(u16, debug_info[0..2], .little), - 4 => mem.readInt(u32, debug_info[0..4], .little), - 8 => mem.readInt(u64, debug_info[0..8], .little), - else => unreachable, - }; - } -}; - -const AttributeIterator = struct { - entry: AbbrevEntry, - ctx: DwarfInfo, - cuh: CompileUnit.Header, - debug_abbrev_pos: usize = 0, - debug_info_pos: usize = 0, - - pub fn next(self: *AttributeIterator) !?Attribute { - const debug_abbrev = self.entry.getDebugAbbrev(self.ctx); - if (self.debug_abbrev_pos >= debug_abbrev.len) return null; - - var stream = std.io.fixedBufferStream(debug_abbrev[self.debug_abbrev_pos..]); - var creader = std.io.countingReader(stream.reader()); - const reader = creader.reader(); - - const name = try leb.readULEB128(u64, reader); - const form = try leb.readULEB128(u64, reader); - - self.debug_abbrev_pos += (math.cast(usize, creader.bytes_read) orelse return error.Overflow); - - const len = try findFormSize( - self.ctx, - form, - self.debug_info_pos + self.entry.debug_info_off, - self.cuh, - ); - const attr = Attribute{ - .name = name, - .form = form, - .debug_info_off = self.debug_info_pos + self.entry.debug_info_off, - .debug_info_len = len, - }; - - self.debug_info_pos += len; - - return attr; - } -}; - -fn getAbbrevEntry(self: DwarfInfo, da_off: usize, da_len: usize, di_off: usize, di_len: usize) !AbbrevEntry { - const debug_abbrev = self.debug_abbrev[da_off..][0..da_len]; + const debug_abbrev = dw.debug_abbrev; var stream = std.io.fixedBufferStream(debug_abbrev); var creader = std.io.countingReader(stream.reader()); const reader = creader.reader(); - const tag = try leb.readULEB128(u64, reader); - const children = switch (tag) { - std.dwarf.TAG.const_type, - std.dwarf.TAG.packed_type, - std.dwarf.TAG.pointer_type, - std.dwarf.TAG.reference_type, - std.dwarf.TAG.restrict_type, - std.dwarf.TAG.rvalue_reference_type, - std.dwarf.TAG.shared_type, - std.dwarf.TAG.volatile_type, - => if (creader.bytes_read == da_len) std.dwarf.CHILDREN.no else try reader.readByte(), - else => try reader.readByte(), - }; + while (true) { + if (creader.bytes_read >= debug_abbrev.len) break; - const pos = math.cast(usize, creader.bytes_read) orelse return error.Overflow; + try dw.abbrev_tables.ensureUnusedCapacity(allocator, 1); + const table_gop = dw.abbrev_tables.getOrPutAssumeCapacity(@intCast(creader.bytes_read)); + assert(!table_gop.found_existing); + const table = table_gop.value_ptr; + table.* = .{}; - return AbbrevEntry{ - .tag = tag, - .children = children, - .debug_abbrev_off = pos + da_off, - .debug_abbrev_len = da_len - pos, - .debug_info_off = di_off, - .debug_info_len = di_len, - }; + while (true) { + const code = try leb.readULEB128(Code, reader); + if (code == 0) break; + + try table.decls.ensureUnusedCapacity(allocator, 1); + const decl_gop = table.decls.getOrPutAssumeCapacity(code); + assert(!decl_gop.found_existing); + const decl = decl_gop.value_ptr; + decl.* = .{ + .code = code, + .tag = undefined, + .children = false, + }; + decl.tag = try leb.readULEB128(Tag, reader); + decl.children = (try reader.readByte()) > 0; + + while (true) { + const at = try leb.readULEB128(At, reader); + const form = try leb.readULEB128(Form, reader); + if (at == 0 and form == 0) break; + + try decl.attrs.ensureUnusedCapacity(allocator, 1); + const attr_gop = decl.attrs.getOrPutAssumeCapacity(at); + assert(!attr_gop.found_existing); + const attr = attr_gop.value_ptr; + attr.* = .{ + .at = at, + .form = form, + }; + } + } + } } -fn findFormSize(self: DwarfInfo, form: u64, di_off: usize, cuh: CompileUnit.Header) !usize { - const debug_info = self.debug_info[di_off..]; +fn parseCompileUnits(dw: *DwarfInfo, allocator: Allocator) !void { + const tracy = trace(@src()); + defer tracy.end(); + + const debug_info = dw.debug_info; var stream = std.io.fixedBufferStream(debug_info); var creader = std.io.countingReader(stream.reader()); const reader = creader.reader(); + while (true) { + if (creader.bytes_read == debug_info.len) break; + + const cu = try dw.compile_units.addOne(allocator); + cu.* = .{ + .header = undefined, + .pos = creader.bytes_read, + }; + + var length: u64 = try reader.readInt(u32, .little); + const is_64bit = length == 0xffffffff; + if (is_64bit) { + length = try reader.readInt(u64, .little); + } + cu.header.format = if (is_64bit) .dwarf64 else .dwarf32; + cu.header.length = length; + cu.header.version = try reader.readInt(u16, .little); + cu.header.debug_abbrev_offset = try readOffset(cu.header.format, reader); + cu.header.address_size = try reader.readInt(u8, .little); + + const table = dw.abbrev_tables.get(cu.header.debug_abbrev_offset).?; + try dw.parseDie(allocator, cu, table, null, &creader); + } +} + +fn parseDie( + dw: *DwarfInfo, + allocator: Allocator, + cu: *CompileUnit, + table: AbbrevTable, + parent: ?u32, + creader: anytype, +) anyerror!void { + const tracy = trace(@src()); + defer tracy.end(); + + while (creader.bytes_read < cu.nextCompileUnitOffset()) { + const die = try cu.addDie(allocator); + cu.diePtr(die).* = .{ .code = undefined }; + if (parent) |p| { + try cu.diePtr(p).children.append(allocator, die); + } else { + try cu.children.append(allocator, die); + } + + const code = try leb.readULEB128(Code, creader.reader()); + cu.diePtr(die).code = code; + + if (code == 0) { + if (parent == null) continue; + return; // Close scope + } + + const decl = table.decls.get(code) orelse return error.MalformedDwarf; // TODO better errors + const data = dw.debug_info; + try cu.diePtr(die).values.ensureTotalCapacityPrecise(allocator, decl.attrs.values().len); + + for (decl.attrs.values()) |attr| { + const start = creader.bytes_read; + try advanceByFormSize(cu, attr.form, creader); + const end = creader.bytes_read; + cu.diePtr(die).values.appendAssumeCapacity(data[start..end]); + } + + if (decl.children) { + // Open scope + try dw.parseDie(allocator, cu, table, die, creader); + } + } +} + +fn advanceByFormSize(cu: *CompileUnit, form: Form, creader: anytype) !void { + const tracy = trace(@src()); + defer tracy.end(); + + const reader = creader.reader(); switch (form) { dwarf.FORM.strp, dwarf.FORM.sec_offset, dwarf.FORM.ref_addr, - => return if (cuh.is_64bit) @sizeOf(u64) else @sizeOf(u32), + => { + _ = try readOffset(cu.header.format, reader); + }, - dwarf.FORM.addr => return cuh.address_size, + dwarf.FORM.addr => try reader.skipBytes(cu.header.address_size, .{}), dwarf.FORM.block1, dwarf.FORM.block2, @@ -386,119 +184,285 @@ fn findFormSize(self: DwarfInfo, form: u64, di_off: usize, cuh: CompileUnit.Head dwarf.FORM.block => try leb.readULEB128(u64, reader), else => unreachable, }; - var i: u64 = 0; - while (i < len) : (i += 1) { + for (0..len) |_| { _ = try reader.readByte(); } - return math.cast(usize, creader.bytes_read) orelse error.Overflow; }, dwarf.FORM.exprloc => { - const expr_len = try leb.readULEB128(u64, reader); - var i: u64 = 0; - while (i < expr_len) : (i += 1) { + const len = try leb.readULEB128(u64, reader); + for (0..len) |_| { _ = try reader.readByte(); } - return math.cast(usize, creader.bytes_read) orelse error.Overflow; }, - dwarf.FORM.flag_present => return 0, + dwarf.FORM.flag_present => {}, dwarf.FORM.data1, dwarf.FORM.ref1, dwarf.FORM.flag, - => return @sizeOf(u8), + => try reader.skipBytes(1, .{}), dwarf.FORM.data2, dwarf.FORM.ref2, - => return @sizeOf(u16), + => try reader.skipBytes(2, .{}), dwarf.FORM.data4, dwarf.FORM.ref4, - => return @sizeOf(u32), + => try reader.skipBytes(4, .{}), dwarf.FORM.data8, dwarf.FORM.ref8, dwarf.FORM.ref_sig8, - => return @sizeOf(u64), + => try reader.skipBytes(8, .{}), dwarf.FORM.udata, dwarf.FORM.ref_udata, => { _ = try leb.readULEB128(u64, reader); - return math.cast(usize, creader.bytes_read) orelse error.Overflow; }, dwarf.FORM.sdata => { _ = try leb.readILEB128(i64, reader); - return math.cast(usize, creader.bytes_read) orelse error.Overflow; }, dwarf.FORM.string => { - var count: usize = 0; while (true) { const byte = try reader.readByte(); - count += 1; if (byte == 0x0) break; } - return count; }, else => { - // TODO figure out how to handle this - log.debug("unhandled DW_FORM_* value with identifier {x}", .{form}); + // TODO better errors + log.err("unhandled DW_FORM_* value with identifier {x}", .{form}); return error.UnhandledDwFormValue; }, } } -fn findAbbrevEntrySize(self: DwarfInfo, da_off: usize, da_len: usize, di_off: usize, cuh: CompileUnit.Header) !usize { - const debug_abbrev = self.debug_abbrev[da_off..][0..da_len]; - var stream = std.io.fixedBufferStream(debug_abbrev); - var creader = std.io.countingReader(stream.reader()); - const reader = creader.reader(); - - const tag = try leb.readULEB128(u64, reader); - switch (tag) { - std.dwarf.TAG.const_type, - std.dwarf.TAG.packed_type, - std.dwarf.TAG.pointer_type, - std.dwarf.TAG.reference_type, - std.dwarf.TAG.restrict_type, - std.dwarf.TAG.rvalue_reference_type, - std.dwarf.TAG.shared_type, - std.dwarf.TAG.volatile_type, - => if (creader.bytes_read != da_len) { - _ = try reader.readByte(); - }, - else => _ = try reader.readByte(), - } - - var len: usize = 0; - while (creader.bytes_read < debug_abbrev.len) { - _ = try leb.readULEB128(u64, reader); - const form = try leb.readULEB128(u64, reader); - const form_len = try self.findFormSize(form, di_off + len, cuh); - len += form_len; - } - - return len; +fn readOffset(format: Format, reader: anytype) !u64 { + return switch (format) { + .dwarf32 => try reader.readInt(u32, .little), + .dwarf64 => try reader.readInt(u64, .little), + }; } -fn getString(self: DwarfInfo, off: u64) []const u8 { - assert(off < self.debug_str.len); - return mem.sliceTo(@as([*:0]const u8, @ptrCast(self.debug_str.ptr + @as(usize, @intCast(off)))), 0); -} +pub const AbbrevTable = struct { + /// Table of abbreviation declarations indexed by their assigned code value + decls: std.AutoArrayHashMapUnmanaged(Code, Decl) = .{}, -const DwarfInfo = @This(); + pub fn deinit(table: *AbbrevTable, gpa: Allocator) void { + for (table.decls.values()) |*decl| { + decl.deinit(gpa); + } + table.decls.deinit(gpa); + } +}; + +pub const Decl = struct { + code: Code, + tag: Tag, + children: bool, + + /// Table of attributes indexed by their AT value + attrs: std.AutoArrayHashMapUnmanaged(At, Attr) = .{}, + + pub fn deinit(decl: *Decl, gpa: Allocator) void { + decl.attrs.deinit(gpa); + } +}; + +pub const Attr = struct { + at: At, + form: Form, +}; + +pub const At = u64; +pub const Code = u64; +pub const Form = u64; +pub const Tag = u64; + +pub const CompileUnitHeader = struct { + format: Format, + length: u64, + version: u16, + debug_abbrev_offset: u64, + address_size: u8, +}; + +pub const CompileUnit = struct { + header: CompileUnitHeader, + pos: usize, + dies: std.ArrayListUnmanaged(Die) = .{}, + children: std.ArrayListUnmanaged(Die.Index) = .{}, + + pub fn deinit(cu: *CompileUnit, gpa: Allocator) void { + for (cu.dies.items) |*die| { + die.deinit(gpa); + } + cu.dies.deinit(gpa); + cu.children.deinit(gpa); + } + + pub fn addDie(cu: *CompileUnit, gpa: Allocator) !Die.Index { + const index = @as(Die.Index, @intCast(cu.dies.items.len)); + _ = try cu.dies.addOne(gpa); + return index; + } + + pub fn diePtr(cu: *CompileUnit, index: Die.Index) *Die { + return &cu.dies.items[index]; + } + + pub fn getCompileDir(cu: CompileUnit, ctx: DwarfInfo) ?[:0]const u8 { + assert(cu.dies.items.len > 0); + const die = cu.dies.items[0]; + const res = die.find(dwarf.AT.comp_dir, cu, ctx) orelse return null; + return res.getString(cu.header.format, ctx); + } + + pub fn getSourceFile(cu: CompileUnit, ctx: DwarfInfo) ?[:0]const u8 { + assert(cu.dies.items.len > 0); + const die = cu.dies.items[0]; + const res = die.find(dwarf.AT.name, cu, ctx) orelse return null; + return res.getString(cu.header.format, ctx); + } + + pub fn nextCompileUnitOffset(cu: CompileUnit) u64 { + return cu.pos + switch (cu.header.format) { + .dwarf32 => @as(u64, 4), + .dwarf64 => 12, + } + cu.header.length; + } +}; + +pub const Die = struct { + code: Code, + values: std.ArrayListUnmanaged([]const u8) = .{}, + children: std.ArrayListUnmanaged(Die.Index) = .{}, + + pub fn deinit(die: *Die, gpa: Allocator) void { + die.values.deinit(gpa); + die.children.deinit(gpa); + } + + pub fn find(die: Die, at: At, cu: CompileUnit, ctx: DwarfInfo) ?DieValue { + const table = ctx.abbrev_tables.get(cu.header.debug_abbrev_offset) orelse return null; + const decl = table.decls.get(die.code).?; + const index = decl.attrs.getIndex(at) orelse return null; + const attr = decl.attrs.values()[index]; + const value = die.values.items[index]; + return .{ .attr = attr, .bytes = value }; + } + + pub const Index = u32; +}; + +pub const DieValue = struct { + attr: Attr, + bytes: []const u8, + + pub fn getFlag(value: DieValue) ?bool { + return switch (value.attr.form) { + dwarf.FORM.flag => value.bytes[0] == 1, + dwarf.FORM.flag_present => true, + else => null, + }; + } + + pub fn getString(value: DieValue, format: Format, ctx: DwarfInfo) ?[:0]const u8 { + switch (value.attr.form) { + dwarf.FORM.string => { + return mem.sliceTo(@as([*:0]const u8, @ptrCast(value.bytes.ptr)), 0); + }, + dwarf.FORM.strp => { + const off = switch (format) { + .dwarf64 => mem.readInt(u64, value.bytes[0..8], .little), + .dwarf32 => mem.readInt(u32, value.bytes[0..4], .little), + }; + return ctx.getString(off); + }, + else => return null, + } + } + + pub fn getSecOffset(value: DieValue, format: Format) ?u64 { + return switch (value.attr.form) { + dwarf.FORM.sec_offset => switch (format) { + .dwarf32 => mem.readInt(u32, value.bytes[0..4], .little), + .dwarf64 => mem.readInt(u64, value.bytes[0..8], .little), + }, + else => null, + }; + } + + pub fn getConstant(value: DieValue) !?i128 { + var stream = std.io.fixedBufferStream(value.bytes); + const reader = stream.reader(); + return switch (value.attr.form) { + dwarf.FORM.data1 => value.bytes[0], + dwarf.FORM.data2 => mem.readInt(u16, value.bytes[0..2], .little), + dwarf.FORM.data4 => mem.readInt(u32, value.bytes[0..4], .little), + dwarf.FORM.data8 => mem.readInt(u64, value.bytes[0..8], .little), + dwarf.FORM.udata => try leb.readULEB128(u64, reader), + dwarf.FORM.sdata => try leb.readILEB128(i64, reader), + else => null, + }; + } + + pub fn getReference(value: DieValue, format: Format) !?u64 { + var stream = std.io.fixedBufferStream(value.bytes); + const reader = stream.reader(); + return switch (value.attr.form) { + dwarf.FORM.ref1 => value.bytes[0], + dwarf.FORM.ref2 => mem.readInt(u16, value.bytes[0..2], .little), + dwarf.FORM.ref4 => mem.readInt(u32, value.bytes[0..4], .little), + dwarf.FORM.ref8 => mem.readInt(u64, value.bytes[0..8], .little), + dwarf.FORM.ref_udata => try leb.readULEB128(u64, reader), + dwarf.FORM.ref_addr => switch (format) { + .dwarf32 => mem.readInt(u32, value.bytes[0..4], .little), + .dwarf64 => mem.readInt(u64, value.bytes[0..8], .little), + }, + else => null, + }; + } + + pub fn getAddr(value: DieValue, header: CompileUnitHeader) ?u64 { + return switch (value.attr.form) { + dwarf.FORM.addr => switch (header.address_size) { + 1 => value.bytes[0], + 2 => mem.readInt(u16, value.bytes[0..2], .little), + 4 => mem.readInt(u32, value.bytes[0..4], .little), + 8 => mem.readInt(u64, value.bytes[0..8], .little), + else => null, + }, + else => null, + }; + } + + pub fn getExprloc(value: DieValue) !?[]const u8 { + if (value.attr.form != dwarf.FORM.exprloc) return null; + var stream = std.io.fixedBufferStream(value.bytes); + var creader = std.io.countingReader(stream.reader()); + const reader = creader.reader(); + const expr_len = try leb.readULEB128(u64, reader); + return value.bytes[creader.bytes_read..][0..expr_len]; + } +}; + +pub const Format = enum { + dwarf32, + dwarf64, +}; -const std = @import("std"); const assert = std.debug.assert; const dwarf = std.dwarf; const leb = std.leb; -const log = std.log.scoped(.macho); -const math = std.math; +const log = std.log.scoped(.link); const mem = std.mem; +const std = @import("std"); +const trace = @import("../tracy.zig").trace; const Allocator = mem.Allocator; -pub const AbbrevLookupTable = std.AutoHashMap(u64, struct { pos: usize, len: usize }); -pub const SubprogramLookupByName = std.StringHashMap(struct { addr: u64, size: u64 }); +const DwarfInfo = @This(); +const MachO = @import("../MachO.zig"); diff --git a/src/link/MachO/Dylib.zig b/src/link/MachO/Dylib.zig index 65d503b1ae..4944c4d5ef 100644 --- a/src/link/MachO/Dylib.zig +++ b/src/link/MachO/Dylib.zig @@ -1,20 +1,716 @@ path: []const u8, -id: ?Id = null, -weak: bool = false, -/// Header is only set if Dylib is parsed directly from a binary and not a stub file. -header: ?macho.mach_header_64 = null, +data: []const u8, +index: File.Index, -/// Parsed symbol table represented as hash map of symbols' -/// names. We can and should defer creating *Symbols until -/// a symbol is referenced by an object file. -/// -/// The value for each parsed symbol represents whether the -/// symbol is defined as a weak symbol or strong. -/// TODO when the referenced symbol is weak, ld64 marks it as -/// N_REF_TO_WEAK but need to investigate if there's more to it -/// such as weak binding entry or simply weak. For now, we generate -/// standard bind or lazy bind. -symbols: std.StringArrayHashMapUnmanaged(bool) = .{}, +header: ?macho.mach_header_64 = null, +exports: std.MultiArrayList(Export) = .{}, +strtab: std.ArrayListUnmanaged(u8) = .{}, +id: ?Id = null, +ordinal: u16 = 0, + +symbols: std.ArrayListUnmanaged(Symbol.Index) = .{}, +dependents: std.ArrayListUnmanaged(Id) = .{}, +rpaths: std.StringArrayHashMapUnmanaged(void) = .{}, +umbrella: File.Index = 0, +platform: ?MachO.Options.Platform = null, + +needed: bool, +weak: bool, +reexport: bool, +explicit: bool, +hoisted: bool = true, +referenced: bool = false, + +output_symtab_ctx: MachO.SymtabCtx = .{}, + +pub fn deinit(self: *Dylib, allocator: Allocator) void { + self.exports.deinit(allocator); + self.strtab.deinit(allocator); + if (self.id) |*id| id.deinit(allocator); + self.symbols.deinit(allocator); + for (self.dependents.items) |*id| { + id.deinit(allocator); + } + self.dependents.deinit(allocator); + self.rpaths.deinit(allocator); +} + +pub fn parse(self: *Dylib, macho_file: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + + const gpa = macho_file.base.allocator; + var stream = std.io.fixedBufferStream(self.data); + const reader = stream.reader(); + + log.debug("parsing dylib from binary", .{}); + + self.header = try reader.readStruct(macho.mach_header_64); + + const lc_id = self.getLoadCommand(.ID_DYLIB) orelse { + macho_file.base.fatal("{s}: missing LC_ID_DYLIB load command", .{self.path}); + return error.ParseFailed; + }; + self.id = try Id.fromLoadCommand(gpa, lc_id.cast(macho.dylib_command).?, lc_id.getDylibPathName()); + + var it = LoadCommandIterator{ + .ncmds = self.header.?.ncmds, + .buffer = self.data[@sizeOf(macho.mach_header_64)..][0..self.header.?.sizeofcmds], + }; + while (it.next()) |cmd| switch (cmd.cmd()) { + .REEXPORT_DYLIB => if (self.header.?.flags & macho.MH_NO_REEXPORTED_DYLIBS == 0) { + const id = try Id.fromLoadCommand(gpa, cmd.cast(macho.dylib_command).?, cmd.getDylibPathName()); + try self.dependents.append(gpa, id); + }, + .DYLD_INFO_ONLY => { + const dyld_cmd = cmd.cast(macho.dyld_info_command).?; + const data = self.data[dyld_cmd.export_off..][0..dyld_cmd.export_size]; + try self.parseTrie(data, macho_file); + }, + .DYLD_EXPORTS_TRIE => { + const ld_cmd = cmd.cast(macho.linkedit_data_command).?; + const data = self.data[ld_cmd.dataoff..][0..ld_cmd.datasize]; + try self.parseTrie(data, macho_file); + }, + .RPATH => { + const path = cmd.getRpathPathName(); + try self.rpaths.put(gpa, path, {}); + }, + else => {}, + }; + + self.initPlatform(); +} + +const TrieIterator = struct { + data: []const u8, + pos: usize = 0, + + fn getStream(it: *TrieIterator) std.io.FixedBufferStream([]const u8) { + return std.io.fixedBufferStream(it.data[it.pos..]); + } + + fn readULEB128(it: *TrieIterator) !u64 { + var stream = it.getStream(); + var creader = std.io.countingReader(stream.reader()); + const reader = creader.reader(); + const value = try std.leb.readULEB128(u64, reader); + it.pos += creader.bytes_read; + return value; + } + + fn readString(it: *TrieIterator) ![:0]const u8 { + var stream = it.getStream(); + const reader = stream.reader(); + + var count: usize = 0; + while (true) : (count += 1) { + const byte = try reader.readByte(); + if (byte == 0) break; + } + + const str = @as([*:0]const u8, @ptrCast(it.data.ptr + it.pos))[0..count :0]; + it.pos += count + 1; + return str; + } + + fn readByte(it: *TrieIterator) !u8 { + var stream = it.getStream(); + const value = try stream.reader().readByte(); + it.pos += 1; + return value; + } +}; + +pub fn addExport(self: *Dylib, allocator: Allocator, name: []const u8, flags: Export.Flags) !void { + try self.exports.append(allocator, .{ + .name = try self.insertString(allocator, name), + .flags = flags, + }); +} + +fn parseTrieNode( + self: *Dylib, + it: *TrieIterator, + allocator: Allocator, + arena: Allocator, + prefix: []const u8, +) !void { + const tracy = trace(@src()); + defer tracy.end(); + const size = try it.readULEB128(); + if (size > 0) { + const flags = try it.readULEB128(); + const kind = flags & macho.EXPORT_SYMBOL_FLAGS_KIND_MASK; + const out_flags = Export.Flags{ + .abs = kind == macho.EXPORT_SYMBOL_FLAGS_KIND_ABSOLUTE, + .tlv = kind == macho.EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL, + .weak = flags & macho.EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION != 0, + }; + if (flags & macho.EXPORT_SYMBOL_FLAGS_REEXPORT != 0) { + _ = try it.readULEB128(); // dylib ordinal + const name = try it.readString(); + try self.addExport(allocator, if (name.len > 0) name else prefix, out_flags); + } else if (flags & macho.EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER != 0) { + _ = try it.readULEB128(); // stub offset + _ = try it.readULEB128(); // resolver offset + try self.addExport(allocator, prefix, out_flags); + } else { + _ = try it.readULEB128(); // VM offset + try self.addExport(allocator, prefix, out_flags); + } + } + + const nedges = try it.readByte(); + + for (0..nedges) |_| { + const label = try it.readString(); + const off = try it.readULEB128(); + const prefix_label = try std.fmt.allocPrint(arena, "{s}{s}", .{ prefix, label }); + const curr = it.pos; + it.pos = off; + try self.parseTrieNode(it, allocator, arena, prefix_label); + it.pos = curr; + } +} + +fn parseTrie(self: *Dylib, data: []const u8, macho_file: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + const gpa = macho_file.base.allocator; + var arena = std.heap.ArenaAllocator.init(gpa); + defer arena.deinit(); + + var it: TrieIterator = .{ .data = data }; + try self.parseTrieNode(&it, gpa, arena.allocator(), ""); +} + +pub fn parseTbd( + self: *Dylib, + cpu_arch: std.Target.Cpu.Arch, + platform: ?MachO.Options.Platform, + lib_stub: LibStub, + macho_file: *MachO, +) !void { + const tracy = trace(@src()); + defer tracy.end(); + const gpa = macho_file.base.allocator; + + log.debug("parsing dylib from stub", .{}); + + const umbrella_lib = lib_stub.inner[0]; + + { + var id = try Id.default(gpa, umbrella_lib.installName()); + if (umbrella_lib.currentVersion()) |version| { + try id.parseCurrentVersion(version); + } + if (umbrella_lib.compatibilityVersion()) |version| { + try id.parseCompatibilityVersion(version); + } + self.id = id; + } + + var umbrella_libs = std.StringHashMap(void).init(gpa); + defer umbrella_libs.deinit(); + + log.debug(" (install_name '{s}')", .{umbrella_lib.installName()}); + + self.platform = platform orelse .{ + .platform = .MACOS, + .version = .{ .value = 0 }, + }; + + var matcher = try TargetMatcher.init(gpa, cpu_arch, self.platform.?.platform); + defer matcher.deinit(); + + for (lib_stub.inner, 0..) |elem, stub_index| { + if (!(try matcher.matchesTargetTbd(elem))) continue; + + if (stub_index > 0) { + // TODO I thought that we could switch on presence of `parent-umbrella` map; + // however, turns out `libsystem_notify.dylib` is fully reexported by `libSystem.dylib` + // BUT does not feature a `parent-umbrella` map as the only sublib. Apple's bug perhaps? + try umbrella_libs.put(elem.installName(), {}); + } + + switch (elem) { + .v3 => |stub| { + if (stub.exports) |exports| { + for (exports) |exp| { + if (!matcher.matchesArch(exp.archs)) continue; + + if (exp.symbols) |symbols| { + for (symbols) |sym_name| { + try self.addExport(gpa, sym_name, .{}); + } + } + + if (exp.weak_symbols) |symbols| { + for (symbols) |sym_name| { + try self.addExport(gpa, sym_name, .{ .weak = true }); + } + } + + if (exp.objc_classes) |objc_classes| { + for (objc_classes) |class_name| { + try self.addObjCClass(gpa, class_name); + } + } + + if (exp.objc_ivars) |objc_ivars| { + for (objc_ivars) |ivar| { + try self.addObjCIVar(gpa, ivar); + } + } + + if (exp.objc_eh_types) |objc_eh_types| { + for (objc_eh_types) |eht| { + try self.addObjCEhType(gpa, eht); + } + } + + if (exp.re_exports) |re_exports| { + for (re_exports) |lib| { + if (umbrella_libs.contains(lib)) continue; + + log.debug(" (found re-export '{s}')", .{lib}); + + const dep_id = try Id.default(gpa, lib); + try self.dependents.append(gpa, dep_id); + } + } + } + } + }, + .v4 => |stub| { + if (stub.exports) |exports| { + for (exports) |exp| { + if (!matcher.matchesTarget(exp.targets)) continue; + + if (exp.symbols) |symbols| { + for (symbols) |sym_name| { + try self.addExport(gpa, sym_name, .{}); + } + } + + if (exp.weak_symbols) |symbols| { + for (symbols) |sym_name| { + try self.addExport(gpa, sym_name, .{ .weak = true }); + } + } + + if (exp.objc_classes) |classes| { + for (classes) |sym_name| { + try self.addObjCClass(gpa, sym_name); + } + } + + if (exp.objc_ivars) |objc_ivars| { + for (objc_ivars) |ivar| { + try self.addObjCIVar(gpa, ivar); + } + } + + if (exp.objc_eh_types) |objc_eh_types| { + for (objc_eh_types) |eht| { + try self.addObjCEhType(gpa, eht); + } + } + } + } + + if (stub.reexports) |reexports| { + for (reexports) |reexp| { + if (!matcher.matchesTarget(reexp.targets)) continue; + + if (reexp.symbols) |symbols| { + for (symbols) |sym_name| { + try self.addExport(gpa, sym_name, .{}); + } + } + + if (reexp.weak_symbols) |symbols| { + for (symbols) |sym_name| { + try self.addExport(gpa, sym_name, .{ .weak = true }); + } + } + + if (reexp.objc_classes) |classes| { + for (classes) |sym_name| { + try self.addObjCClass(gpa, sym_name); + } + } + + if (reexp.objc_ivars) |objc_ivars| { + for (objc_ivars) |ivar| { + try self.addObjCIVar(gpa, ivar); + } + } + + if (reexp.objc_eh_types) |objc_eh_types| { + for (objc_eh_types) |eht| { + try self.addObjCEhType(gpa, eht); + } + } + } + } + + if (stub.objc_classes) |classes| { + for (classes) |sym_name| { + try self.addObjCClass(gpa, sym_name); + } + } + + if (stub.objc_ivars) |objc_ivars| { + for (objc_ivars) |ivar| { + try self.addObjCIVar(gpa, ivar); + } + } + + if (stub.objc_eh_types) |objc_eh_types| { + for (objc_eh_types) |eht| { + try self.addObjCEhType(gpa, eht); + } + } + }, + } + } + + // For V4, we add dependent libs in a separate pass since some stubs such as libSystem include + // re-exports directly in the stub file. + for (lib_stub.inner) |elem| { + if (elem == .v3) continue; + const stub = elem.v4; + + if (stub.reexported_libraries) |reexports| { + for (reexports) |reexp| { + if (!matcher.matchesTarget(reexp.targets)) continue; + + for (reexp.libraries) |lib| { + if (umbrella_libs.contains(lib)) continue; + + log.debug(" (found re-export '{s}')", .{lib}); + + const dep_id = try Id.default(gpa, lib); + try self.dependents.append(gpa, dep_id); + } + } + } + } +} + +fn addObjCClass(self: *Dylib, allocator: Allocator, name: []const u8) !void { + try self.addObjCExport(allocator, "_OBJC_CLASS_", name); + try self.addObjCExport(allocator, "_OBJC_METACLASS_", name); +} + +fn addObjCIVar(self: *Dylib, allocator: Allocator, name: []const u8) !void { + try self.addObjCExport(allocator, "_OBJC_IVAR_", name); +} + +fn addObjCEhType(self: *Dylib, allocator: Allocator, name: []const u8) !void { + try self.addObjCExport(allocator, "_OBJC_EHTYPE_", name); +} + +fn addObjCExport( + self: *Dylib, + allocator: Allocator, + comptime prefix: []const u8, + name: []const u8, +) !void { + const full_name = try std.fmt.allocPrint(allocator, prefix ++ "$_{s}", .{name}); + defer allocator.free(full_name); + try self.addExport(allocator, full_name, .{}); +} + +pub fn initSymbols(self: *Dylib, macho_file: *MachO) !void { + const gpa = macho_file.base.allocator; + + try self.symbols.ensureTotalCapacityPrecise(gpa, self.exports.items(.name).len); + + for (self.exports.items(.name)) |noff| { + const name = self.getString(noff); + const off = try macho_file.string_intern.insert(gpa, name); + const gop = try macho_file.getOrCreateGlobal(off); + self.symbols.addOneAssumeCapacity().* = gop.index; + } +} + +fn initPlatform(self: *Dylib) void { + var it = LoadCommandIterator{ + .ncmds = self.header.?.ncmds, + .buffer = self.data[@sizeOf(macho.mach_header_64)..][0..self.header.?.sizeofcmds], + }; + self.platform = while (it.next()) |cmd| { + switch (cmd.cmd()) { + .BUILD_VERSION, + .VERSION_MIN_MACOSX, + .VERSION_MIN_IPHONEOS, + .VERSION_MIN_TVOS, + .VERSION_MIN_WATCHOS, + => break MachO.Options.Platform.fromLoadCommand(cmd), + else => {}, + } + } else null; +} + +pub fn resolveSymbols(self: *Dylib, macho_file: *MachO) void { + const tracy = trace(@src()); + defer tracy.end(); + + if (!self.explicit and !self.hoisted) return; + + for (self.symbols.items, self.exports.items(.flags)) |index, flags| { + const global = macho_file.getSymbol(index); + if (self.asFile().getSymbolRank(.{ + .weak = flags.weak, + }) < global.getSymbolRank(macho_file)) { + global.value = 0; + global.atom = 0; + global.nlist_idx = 0; + global.file = self.index; + global.flags.weak = flags.weak; + global.flags.weak_ref = false; + global.flags.tlv = flags.tlv; + global.flags.dyn_ref = false; + global.flags.tentative = false; + global.visibility = .global; + } + } +} + +pub fn resetGlobals(self: *Dylib, macho_file: *MachO) void { + for (self.symbols.items) |sym_index| { + const sym = macho_file.getSymbol(sym_index); + const name = sym.name; + sym.* = .{}; + sym.name = name; + } +} + +pub fn isAlive(self: Dylib, macho_file: *MachO) bool { + if (!macho_file.options.dead_strip_dylibs) return self.explicit or self.referenced or self.needed; + return self.referenced or self.needed; +} + +pub fn markReferenced(self: *Dylib, macho_file: *MachO) void { + const tracy = trace(@src()); + defer tracy.end(); + + for (self.symbols.items) |global_index| { + const global = macho_file.getSymbol(global_index); + const file_ptr = global.getFile(macho_file) orelse continue; + if (file_ptr.getIndex() != self.index) continue; + if (global.isLocal()) continue; + self.referenced = true; + break; + } +} + +pub fn calcSymtabSize(self: *Dylib, macho_file: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + + for (self.symbols.items) |global_index| { + const global = macho_file.getSymbol(global_index); + const file_ptr = global.getFile(macho_file) orelse continue; + if (file_ptr.getIndex() != self.index) continue; + if (global.isLocal()) continue; + assert(global.flags.import); + global.flags.output_symtab = true; + try global.addExtra(.{ .symtab = self.output_symtab_ctx.nimports }, macho_file); + self.output_symtab_ctx.nimports += 1; + self.output_symtab_ctx.strsize += @as(u32, @intCast(global.getName(macho_file).len + 1)); + } +} + +pub fn writeSymtab(self: Dylib, macho_file: *MachO) void { + const tracy = trace(@src()); + defer tracy.end(); + + for (self.symbols.items) |global_index| { + const global = macho_file.getSymbol(global_index); + const file = global.getFile(macho_file) orelse continue; + if (file.getIndex() != self.index) continue; + const idx = global.getOutputSymtabIndex(macho_file) orelse continue; + const n_strx = @as(u32, @intCast(macho_file.strtab.items.len)); + macho_file.strtab.appendSliceAssumeCapacity(global.getName(macho_file)); + macho_file.strtab.appendAssumeCapacity(0); + const out_sym = &macho_file.symtab.items[idx]; + out_sym.n_strx = n_strx; + global.setOutputSym(macho_file, out_sym); + } +} + +pub inline fn getUmbrella(self: Dylib, macho_file: *MachO) *Dylib { + return macho_file.getFile(self.umbrella).?.dylib; +} + +fn getLoadCommand(self: Dylib, lc: macho.LC) ?LoadCommandIterator.LoadCommand { + var it = LoadCommandIterator{ + .ncmds = self.header.?.ncmds, + .buffer = self.data[@sizeOf(macho.mach_header_64)..][0..self.header.?.sizeofcmds], + }; + while (it.next()) |cmd| { + if (cmd.cmd() == lc) return cmd; + } else return null; +} + +fn insertString(self: *Dylib, allocator: Allocator, name: []const u8) !u32 { + const off = @as(u32, @intCast(self.strtab.items.len)); + try self.strtab.writer(allocator).print("{s}\x00", .{name}); + return off; +} + +pub inline fn getString(self: Dylib, off: u32) [:0]const u8 { + assert(off < self.strtab.items.len); + return mem.sliceTo(@as([*:0]const u8, @ptrCast(self.strtab.items.ptr + off)), 0); +} + +pub fn asFile(self: *Dylib) File { + return .{ .dylib = self }; +} + +pub fn format( + self: *Dylib, + comptime unused_fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, +) !void { + _ = self; + _ = unused_fmt_string; + _ = options; + _ = writer; + @compileError("do not format dylib directly"); +} + +pub fn fmtSymtab(self: *Dylib, macho_file: *MachO) std.fmt.Formatter(formatSymtab) { + return .{ .data = .{ + .dylib = self, + .macho_file = macho_file, + } }; +} + +const FormatContext = struct { + dylib: *Dylib, + macho_file: *MachO, +}; + +fn formatSymtab( + ctx: FormatContext, + comptime unused_fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, +) !void { + _ = unused_fmt_string; + _ = options; + const dylib = ctx.dylib; + try writer.writeAll(" globals\n"); + for (dylib.symbols.items) |index| { + const global = ctx.macho_file.getSymbol(index); + try writer.print(" {}\n", .{global.fmt(ctx.macho_file)}); + } +} + +pub const TargetMatcher = struct { + allocator: Allocator, + cpu_arch: std.Target.Cpu.Arch, + platform: macho.PLATFORM, + target_strings: std.ArrayListUnmanaged([]const u8) = .{}, + + pub fn init(allocator: Allocator, cpu_arch: std.Target.Cpu.Arch, platform: macho.PLATFORM) !TargetMatcher { + var self = TargetMatcher{ + .allocator = allocator, + .cpu_arch = cpu_arch, + .platform = platform, + }; + const apple_string = try targetToAppleString(allocator, cpu_arch, platform); + try self.target_strings.append(allocator, apple_string); + + switch (platform) { + .IOSSIMULATOR, .TVOSSIMULATOR, .WATCHOSSIMULATOR => { + // For Apple simulator targets, linking gets tricky as we need to link against the simulator + // hosts dylibs too. + const host_target = try targetToAppleString(allocator, cpu_arch, .MACOS); + try self.target_strings.append(allocator, host_target); + }, + else => {}, + } + + return self; + } + + pub fn deinit(self: *TargetMatcher) void { + for (self.target_strings.items) |t| { + self.allocator.free(t); + } + self.target_strings.deinit(self.allocator); + } + + inline fn cpuArchToAppleString(cpu_arch: std.Target.Cpu.Arch) []const u8 { + return switch (cpu_arch) { + .aarch64 => "arm64", + .x86_64 => "x86_64", + else => unreachable, + }; + } + + pub fn targetToAppleString(allocator: Allocator, cpu_arch: std.Target.Cpu.Arch, platform: macho.PLATFORM) ![]const u8 { + const arch = cpuArchToAppleString(cpu_arch); + const plat = switch (platform) { + .MACOS => "macos", + .IOS => "ios", + .TVOS => "tvos", + .WATCHOS => "watchos", + .IOSSIMULATOR => "ios-simulator", + .TVOSSIMULATOR => "tvos-simulator", + .WATCHOSSIMULATOR => "watchos-simulator", + .BRIDGEOS => "bridgeos", + .MACCATALYST => "maccatalyst", + .DRIVERKIT => "driverkit", + else => unreachable, + }; + return std.fmt.allocPrint(allocator, "{s}-{s}", .{ arch, plat }); + } + + fn hasValue(stack: []const []const u8, needle: []const u8) bool { + for (stack) |v| { + if (mem.eql(u8, v, needle)) return true; + } + return false; + } + + fn matchesArch(self: TargetMatcher, archs: []const []const u8) bool { + return hasValue(archs, cpuArchToAppleString(self.cpu_arch)); + } + + fn matchesTarget(self: TargetMatcher, targets: []const []const u8) bool { + for (self.target_strings.items) |t| { + if (hasValue(targets, t)) return true; + } + return false; + } + + pub fn matchesTargetTbd(self: TargetMatcher, tbd: Tbd) !bool { + var arena = std.heap.ArenaAllocator.init(self.allocator); + defer arena.deinit(); + + const targets = switch (tbd) { + .v3 => |v3| blk: { + var targets = std.ArrayList([]const u8).init(arena.allocator()); + for (v3.archs) |arch| { + const target = try std.fmt.allocPrint(arena.allocator(), "{s}-{s}", .{ arch, v3.platform }); + try targets.append(target); + } + break :blk targets.items; + }, + .v4 => |v4| v4.targets, + }; + + return self.matchesTarget(targets); + } +}; pub const Id = struct { name: []const u8, @@ -76,7 +772,7 @@ pub const Id = struct { var out: u32 = 0; var values: [3][]const u8 = undefined; - var split = mem.splitScalar(u8, string, '.'); + var split = mem.split(u8, string, "."); var count: u4 = 0; while (split.next()) |value| { if (count > 2) { @@ -99,458 +795,34 @@ pub const Id = struct { } }; -pub fn isDylib(file: std.fs.File, fat_offset: u64) bool { - const reader = file.reader(); - const hdr = reader.readStruct(macho.mach_header_64) catch return false; - defer file.seekTo(fat_offset) catch {}; - return hdr.filetype == macho.MH_DYLIB; -} +const Export = struct { + name: u32, + flags: Flags, -pub fn deinit(self: *Dylib, allocator: Allocator) void { - allocator.free(self.path); - for (self.symbols.keys()) |key| { - allocator.free(key); - } - self.symbols.deinit(allocator); - if (self.id) |*id| { - id.deinit(allocator); - } -} - -pub fn parseFromBinary( - self: *Dylib, - allocator: Allocator, - dylib_id: u16, - dependent_libs: anytype, - name: []const u8, - data: []align(@alignOf(u64)) const u8, -) !void { - var stream = std.io.fixedBufferStream(data); - const reader = stream.reader(); - - log.debug("parsing shared library '{s}'", .{name}); - - self.header = try reader.readStruct(macho.mach_header_64); - - const should_lookup_reexports = self.header.?.flags & macho.MH_NO_REEXPORTED_DYLIBS == 0; - var it = LoadCommandIterator{ - .ncmds = self.header.?.ncmds, - .buffer = data[@sizeOf(macho.mach_header_64)..][0..self.header.?.sizeofcmds], + const Flags = packed struct { + abs: bool = false, + weak: bool = false, + tlv: bool = false, }; - while (it.next()) |cmd| { - switch (cmd.cmd()) { - .SYMTAB => { - const symtab_cmd = cmd.cast(macho.symtab_command).?; - const symtab = @as( - [*]const macho.nlist_64, - // Alignment is guaranteed as a dylib is a final linked image and has to have sections - // properly aligned in order to be correctly loaded by the loader. - @ptrCast(@alignCast(&data[symtab_cmd.symoff])), - )[0..symtab_cmd.nsyms]; - const strtab = data[symtab_cmd.stroff..][0..symtab_cmd.strsize]; - - for (symtab) |sym| { - const add_to_symtab = sym.ext() and (sym.sect() or sym.indr()); - if (!add_to_symtab) continue; - - const sym_name = mem.sliceTo(@as([*:0]const u8, @ptrCast(strtab.ptr + sym.n_strx)), 0); - try self.symbols.putNoClobber(allocator, try allocator.dupe(u8, sym_name), false); - } - }, - .ID_DYLIB => { - self.id = try Id.fromLoadCommand( - allocator, - cmd.cast(macho.dylib_command).?, - cmd.getDylibPathName(), - ); - }, - .REEXPORT_DYLIB => { - if (should_lookup_reexports) { - // Parse install_name to dependent dylib. - const id = try Id.fromLoadCommand( - allocator, - cmd.cast(macho.dylib_command).?, - cmd.getDylibPathName(), - ); - try dependent_libs.writeItem(.{ .id = id, .parent = dylib_id }); - } - }, - else => {}, - } - } -} - -/// Returns Platform composed from the first encountered build version type load command: -/// either LC_BUILD_VERSION or LC_VERSION_MIN_*. -pub fn getPlatform(self: Dylib, data: []align(@alignOf(u64)) const u8) ?Platform { - var it = LoadCommandIterator{ - .ncmds = self.header.?.ncmds, - .buffer = data[@sizeOf(macho.mach_header_64)..][0..self.header.?.sizeofcmds], - }; - while (it.next()) |cmd| { - switch (cmd.cmd()) { - .BUILD_VERSION, - .VERSION_MIN_MACOSX, - .VERSION_MIN_IPHONEOS, - .VERSION_MIN_TVOS, - .VERSION_MIN_WATCHOS, - => return Platform.fromLoadCommand(cmd), - else => {}, - } - } else return null; -} - -fn addObjCClassSymbol(self: *Dylib, allocator: Allocator, sym_name: []const u8) !void { - const expanded = &[_][]const u8{ - try std.fmt.allocPrint(allocator, "_OBJC_CLASS_$_{s}", .{sym_name}), - try std.fmt.allocPrint(allocator, "_OBJC_METACLASS_$_{s}", .{sym_name}), - }; - - for (expanded) |sym| { - if (self.symbols.contains(sym)) continue; - try self.symbols.putNoClobber(allocator, sym, false); - } -} - -fn addObjCIVarSymbol(self: *Dylib, allocator: Allocator, sym_name: []const u8) !void { - const expanded = try std.fmt.allocPrint(allocator, "_OBJC_IVAR_$_{s}", .{sym_name}); - if (self.symbols.contains(expanded)) return; - try self.symbols.putNoClobber(allocator, expanded, false); -} - -fn addObjCEhTypeSymbol(self: *Dylib, allocator: Allocator, sym_name: []const u8) !void { - const expanded = try std.fmt.allocPrint(allocator, "_OBJC_EHTYPE_$_{s}", .{sym_name}); - if (self.symbols.contains(expanded)) return; - try self.symbols.putNoClobber(allocator, expanded, false); -} - -fn addSymbol(self: *Dylib, allocator: Allocator, sym_name: []const u8) !void { - if (self.symbols.contains(sym_name)) return; - try self.symbols.putNoClobber(allocator, try allocator.dupe(u8, sym_name), false); -} - -fn addWeakSymbol(self: *Dylib, allocator: Allocator, sym_name: []const u8) !void { - if (self.symbols.contains(sym_name)) return; - try self.symbols.putNoClobber(allocator, try allocator.dupe(u8, sym_name), true); -} - -pub const TargetMatcher = struct { - allocator: Allocator, - cpu_arch: std.Target.Cpu.Arch, - os_tag: std.Target.Os.Tag, - abi: std.Target.Abi, - target_strings: std.ArrayListUnmanaged([]const u8) = .{}, - - pub fn init(allocator: Allocator, target: std.Target) !TargetMatcher { - var self = TargetMatcher{ - .allocator = allocator, - .cpu_arch = target.cpu.arch, - .os_tag = target.os.tag, - .abi = target.abi, - }; - const apple_string = try toAppleTargetTriple(allocator, self.cpu_arch, self.os_tag, self.abi); - try self.target_strings.append(allocator, apple_string); - - if (self.abi == .simulator) { - // For Apple simulator targets, linking gets tricky as we need to link against the simulator - // hosts dylibs too. - const host_target = try toAppleTargetTriple(allocator, self.cpu_arch, .macos, .none); - try self.target_strings.append(allocator, host_target); - } - - return self; - } - - pub fn deinit(self: *TargetMatcher) void { - for (self.target_strings.items) |t| { - self.allocator.free(t); - } - self.target_strings.deinit(self.allocator); - } - - inline fn fmtCpuArch(cpu_arch: std.Target.Cpu.Arch) []const u8 { - return switch (cpu_arch) { - .aarch64 => "arm64", - .x86_64 => "x86_64", - else => unreachable, - }; - } - - inline fn fmtAbi(abi: std.Target.Abi) ?[]const u8 { - return switch (abi) { - .none => null, - .simulator => "simulator", - .macabi => "maccatalyst", - else => unreachable, - }; - } - - pub fn toAppleTargetTriple( - allocator: Allocator, - cpu_arch: std.Target.Cpu.Arch, - os_tag: std.Target.Os.Tag, - abi: std.Target.Abi, - ) ![]const u8 { - const cpu_arch_s = fmtCpuArch(cpu_arch); - const os_tag_s = @tagName(os_tag); - if (fmtAbi(abi)) |abi_s| { - return std.fmt.allocPrint(allocator, "{s}-{s}-{s}", .{ cpu_arch_s, os_tag_s, abi_s }); - } - return std.fmt.allocPrint(allocator, "{s}-{s}", .{ cpu_arch_s, os_tag_s }); - } - - fn hasValue(stack: []const []const u8, needle: []const u8) bool { - for (stack) |v| { - if (mem.eql(u8, v, needle)) return true; - } - return false; - } - - pub fn matchesTarget(self: TargetMatcher, targets: []const []const u8) bool { - for (self.target_strings.items) |t| { - if (hasValue(targets, t)) return true; - } - return false; - } - - fn matchesArch(self: TargetMatcher, archs: []const []const u8) bool { - return hasValue(archs, fmtCpuArch(self.cpu_arch)); - } }; -pub fn parseFromStub( - self: *Dylib, - allocator: Allocator, - target: std.Target, - lib_stub: LibStub, - dylib_id: u16, - dependent_libs: anytype, - name: []const u8, -) !void { - if (lib_stub.inner.len == 0) return error.NotLibStub; - - log.debug("parsing shared library from stub '{s}'", .{name}); - - const umbrella_lib = lib_stub.inner[0]; - - { - var id = try Id.default(allocator, umbrella_lib.installName()); - if (umbrella_lib.currentVersion()) |version| { - try id.parseCurrentVersion(version); - } - if (umbrella_lib.compatibilityVersion()) |version| { - try id.parseCompatibilityVersion(version); - } - self.id = id; - } - - var umbrella_libs = std.StringHashMap(void).init(allocator); - defer umbrella_libs.deinit(); - - log.debug(" (install_name '{s}')", .{umbrella_lib.installName()}); - - var matcher = try TargetMatcher.init(allocator, target); - defer matcher.deinit(); - - for (lib_stub.inner, 0..) |elem, stub_index| { - const targets = try elem.targets(allocator); - defer { - for (targets) |t| allocator.free(t); - allocator.free(targets); - } - if (!matcher.matchesTarget(targets)) continue; - - if (stub_index > 0) { - // TODO I thought that we could switch on presence of `parent-umbrella` map; - // however, turns out `libsystem_notify.dylib` is fully reexported by `libSystem.dylib` - // BUT does not feature a `parent-umbrella` map as the only sublib. Apple's bug perhaps? - try umbrella_libs.put(elem.installName(), {}); - } - - switch (elem) { - .v3 => |stub| { - if (stub.exports) |exports| { - for (exports) |exp| { - if (!matcher.matchesArch(exp.archs)) continue; - - if (exp.symbols) |symbols| { - for (symbols) |sym_name| { - try self.addSymbol(allocator, sym_name); - } - } - - if (exp.weak_symbols) |symbols| { - for (symbols) |sym_name| { - try self.addWeakSymbol(allocator, sym_name); - } - } - - if (exp.objc_classes) |objc_classes| { - for (objc_classes) |class_name| { - try self.addObjCClassSymbol(allocator, class_name); - } - } - - if (exp.objc_ivars) |objc_ivars| { - for (objc_ivars) |ivar| { - try self.addObjCIVarSymbol(allocator, ivar); - } - } - - if (exp.objc_eh_types) |objc_eh_types| { - for (objc_eh_types) |eht| { - try self.addObjCEhTypeSymbol(allocator, eht); - } - } - - // TODO track which libs were already parsed in different steps - if (exp.re_exports) |re_exports| { - for (re_exports) |lib| { - if (umbrella_libs.contains(lib)) continue; - - log.debug(" (found re-export '{s}')", .{lib}); - - const dep_id = try Id.default(allocator, lib); - try dependent_libs.writeItem(.{ .id = dep_id, .parent = dylib_id }); - } - } - } - } - }, - .v4 => |stub| { - if (stub.exports) |exports| { - for (exports) |exp| { - if (!matcher.matchesTarget(exp.targets)) continue; - - if (exp.symbols) |symbols| { - for (symbols) |sym_name| { - try self.addSymbol(allocator, sym_name); - } - } - - if (exp.weak_symbols) |symbols| { - for (symbols) |sym_name| { - try self.addWeakSymbol(allocator, sym_name); - } - } - - if (exp.objc_classes) |classes| { - for (classes) |sym_name| { - try self.addObjCClassSymbol(allocator, sym_name); - } - } - - if (exp.objc_ivars) |objc_ivars| { - for (objc_ivars) |ivar| { - try self.addObjCIVarSymbol(allocator, ivar); - } - } - - if (exp.objc_eh_types) |objc_eh_types| { - for (objc_eh_types) |eht| { - try self.addObjCEhTypeSymbol(allocator, eht); - } - } - } - } - - if (stub.reexports) |reexports| { - for (reexports) |reexp| { - if (!matcher.matchesTarget(reexp.targets)) continue; - - if (reexp.symbols) |symbols| { - for (symbols) |sym_name| { - try self.addSymbol(allocator, sym_name); - } - } - - if (reexp.weak_symbols) |symbols| { - for (symbols) |sym_name| { - try self.addWeakSymbol(allocator, sym_name); - } - } - - if (reexp.objc_classes) |classes| { - for (classes) |sym_name| { - try self.addObjCClassSymbol(allocator, sym_name); - } - } - - if (reexp.objc_ivars) |objc_ivars| { - for (objc_ivars) |ivar| { - try self.addObjCIVarSymbol(allocator, ivar); - } - } - - if (reexp.objc_eh_types) |objc_eh_types| { - for (objc_eh_types) |eht| { - try self.addObjCEhTypeSymbol(allocator, eht); - } - } - } - } - - if (stub.objc_classes) |classes| { - for (classes) |sym_name| { - try self.addObjCClassSymbol(allocator, sym_name); - } - } - - if (stub.objc_ivars) |objc_ivars| { - for (objc_ivars) |ivar| { - try self.addObjCIVarSymbol(allocator, ivar); - } - } - - if (stub.objc_eh_types) |objc_eh_types| { - for (objc_eh_types) |eht| { - try self.addObjCEhTypeSymbol(allocator, eht); - } - } - }, - } - } - - // For V4, we add dependent libs in a separate pass since some stubs such as libSystem include - // re-exports directly in the stub file. - for (lib_stub.inner) |elem| { - if (elem == .v3) break; - const stub = elem.v4; - - // TODO track which libs were already parsed in different steps - if (stub.reexported_libraries) |reexports| { - for (reexports) |reexp| { - if (!matcher.matchesTarget(reexp.targets)) continue; - - for (reexp.libraries) |lib| { - if (umbrella_libs.contains(lib)) continue; - - log.debug(" (found re-export '{s}')", .{lib}); - - const dep_id = try Id.default(allocator, lib); - try dependent_libs.writeItem(.{ .id = dep_id, .parent = dylib_id }); - } - } - } - } -} - -const Dylib = @This(); - -const std = @import("std"); const assert = std.debug.assert; +const fat = @import("fat.zig"); const fs = std.fs; const fmt = std.fmt; const log = std.log.scoped(.link); const macho = std.macho; const math = std.math; const mem = std.mem; -const fat = @import("fat.zig"); const tapi = @import("../tapi.zig"); +const trace = @import("../tracy.zig").trace; +const std = @import("std"); const Allocator = mem.Allocator; +const Dylib = @This(); +const File = @import("file.zig").File; const LibStub = tapi.LibStub; const LoadCommandIterator = macho.LoadCommandIterator; const MachO = @import("../MachO.zig"); -const Platform = @import("load_commands.zig").Platform; +const Symbol = @import("Symbol.zig"); const Tbd = tapi.Tbd; diff --git a/src/link/MachO/InternalObject.zig b/src/link/MachO/InternalObject.zig new file mode 100644 index 0000000000..e139e4efab --- /dev/null +++ b/src/link/MachO/InternalObject.zig @@ -0,0 +1,249 @@ +index: File.Index, + +sections: std.MultiArrayList(Section) = .{}, +atoms: std.ArrayListUnmanaged(Atom.Index) = .{}, +symbols: std.ArrayListUnmanaged(Symbol.Index) = .{}, + +objc_methnames: std.ArrayListUnmanaged(u8) = .{}, +objc_selrefs: [@sizeOf(u64)]u8 = [_]u8{0} ** @sizeOf(u64), + +output_symtab_ctx: MachO.SymtabCtx = .{}, + +pub fn deinit(self: *InternalObject, allocator: Allocator) void { + for (self.sections.items(.relocs)) |*relocs| { + relocs.deinit(allocator); + } + self.sections.deinit(allocator); + self.atoms.deinit(allocator); + self.symbols.deinit(allocator); + self.objc_methnames.deinit(allocator); +} + +pub fn addSymbol(self: *InternalObject, name: [:0]const u8, macho_file: *MachO) !Symbol.Index { + const gpa = macho_file.base.allocator; + try self.symbols.ensureUnusedCapacity(gpa, 1); + const off = try macho_file.string_intern.insert(gpa, name); + const gop = try macho_file.getOrCreateGlobal(off); + self.symbols.addOneAssumeCapacity().* = gop.index; + const sym = macho_file.getSymbol(gop.index); + sym.* = .{ .name = off, .file = self.index }; + return gop.index; +} + +/// Creates a fake input sections __TEXT,__objc_methname and __DATA,__objc_selrefs. +pub fn addObjcMsgsendSections(self: *InternalObject, sym_name: []const u8, macho_file: *MachO) !u32 { + const methname_atom_index = try self.addObjcMethnameSection(sym_name, macho_file); + return try self.addObjcSelrefsSection(sym_name, methname_atom_index, macho_file); +} + +fn addObjcMethnameSection(self: *InternalObject, methname: []const u8, macho_file: *MachO) !Atom.Index { + const gpa = macho_file.base.allocator; + const atom_index = try macho_file.addAtom(); + try self.atoms.append(gpa, atom_index); + + const name = try std.fmt.allocPrintZ(gpa, "__TEXT$__objc_methname${s}", .{methname}); + defer gpa.free(name); + const atom = macho_file.getAtom(atom_index).?; + atom.atom_index = atom_index; + atom.name = try macho_file.string_intern.insert(gpa, name); + atom.file = self.index; + atom.size = methname.len + 1; + atom.alignment = 0; + + const n_sect = try self.addSection(gpa, "__TEXT", "__objc_methname"); + const sect = &self.sections.items(.header)[n_sect]; + sect.flags = macho.S_CSTRING_LITERALS; + sect.size = atom.size; + sect.@"align" = 0; + atom.n_sect = n_sect; + self.sections.items(.extra)[n_sect].is_objc_methname = true; + + sect.offset = @intCast(self.objc_methnames.items.len); + try self.objc_methnames.ensureUnusedCapacity(gpa, methname.len + 1); + self.objc_methnames.writer(gpa).print("{s}\x00", .{methname}) catch unreachable; + + return atom_index; +} + +fn addObjcSelrefsSection( + self: *InternalObject, + methname: []const u8, + methname_atom_index: Atom.Index, + macho_file: *MachO, +) !Atom.Index { + const gpa = macho_file.base.allocator; + const atom_index = try macho_file.addAtom(); + try self.atoms.append(gpa, atom_index); + + const name = try std.fmt.allocPrintZ(gpa, "__DATA$__objc_selrefs${s}", .{methname}); + defer gpa.free(name); + const atom = macho_file.getAtom(atom_index).?; + atom.atom_index = atom_index; + atom.name = try macho_file.string_intern.insert(gpa, name); + atom.file = self.index; + atom.size = @sizeOf(u64); + atom.alignment = 3; + + const n_sect = try self.addSection(gpa, "__DATA", "__objc_selrefs"); + const sect = &self.sections.items(.header)[n_sect]; + sect.flags = macho.S_LITERAL_POINTERS | macho.S_ATTR_NO_DEAD_STRIP; + sect.offset = 0; + sect.size = atom.size; + sect.@"align" = 3; + atom.n_sect = n_sect; + self.sections.items(.extra)[n_sect].is_objc_selref = true; + + const relocs = &self.sections.items(.relocs)[n_sect]; + try relocs.ensureUnusedCapacity(gpa, 1); + relocs.appendAssumeCapacity(.{ + .tag = .local, + .offset = 0, + .target = methname_atom_index, + .addend = 0, + .type = .unsigned, + .meta = .{ + .pcrel = false, + .length = 3, + .symbolnum = 0, // Only used when synthesising unwind records so can be anything + .has_subtractor = false, + }, + }); + atom.relocs = .{ .pos = 0, .len = 1 }; + + return atom_index; +} + +pub fn calcSymtabSize(self: *InternalObject, macho_file: *MachO) !void { + for (self.symbols.items) |sym_index| { + const sym = macho_file.getSymbol(sym_index); + if (sym.getFile(macho_file)) |file| if (file.getIndex() != self.index) continue; + sym.flags.output_symtab = true; + if (sym.isLocal()) { + try sym.addExtra(.{ .symtab = self.output_symtab_ctx.nlocals }, macho_file); + self.output_symtab_ctx.nlocals += 1; + } else if (sym.flags.@"export") { + try sym.addExtra(.{ .symtab = self.output_symtab_ctx.nexports }, macho_file); + self.output_symtab_ctx.nexports += 1; + } else { + assert(sym.flags.import); + try sym.addExtra(.{ .symtab = self.output_symtab_ctx.nimports }, macho_file); + self.output_symtab_ctx.nimports += 1; + } + self.output_symtab_ctx.strsize += @as(u32, @intCast(sym.getName(macho_file).len + 1)); + } +} + +pub fn writeSymtab(self: InternalObject, macho_file: *MachO) void { + for (self.symbols.items) |sym_index| { + const sym = macho_file.getSymbol(sym_index); + if (sym.getFile(macho_file)) |file| if (file.getIndex() != self.index) continue; + const idx = sym.getOutputSymtabIndex(macho_file) orelse continue; + const n_strx = @as(u32, @intCast(macho_file.strtab.items.len)); + macho_file.strtab.appendSliceAssumeCapacity(sym.getName(macho_file)); + macho_file.strtab.appendAssumeCapacity(0); + const out_sym = &macho_file.symtab.items[idx]; + out_sym.n_strx = n_strx; + sym.setOutputSym(macho_file, out_sym); + } +} + +fn addSection(self: *InternalObject, allocator: Allocator, segname: []const u8, sectname: []const u8) !u32 { + const n_sect = @as(u32, @intCast(try self.sections.addOne(allocator))); + self.sections.set(n_sect, .{ + .header = .{ + .sectname = MachO.makeStaticString(sectname), + .segname = MachO.makeStaticString(segname), + }, + }); + return n_sect; +} + +pub fn getSectionData(self: *const InternalObject, index: u32) []const u8 { + const slice = self.sections.slice(); + assert(index < slice.items(.header).len); + const sect = slice.items(.header)[index]; + const extra = slice.items(.extra)[index]; + if (extra.is_objc_methname) { + return self.objc_methnames.items[sect.offset..][0..sect.size]; + } else if (extra.is_objc_selref) { + return &self.objc_selrefs; + } else @panic("ref to non-existent section"); +} + +pub fn asFile(self: *InternalObject) File { + return .{ .internal = self }; +} + +const FormatContext = struct { + self: *InternalObject, + macho_file: *MachO, +}; + +pub fn fmtAtoms(self: *InternalObject, macho_file: *MachO) std.fmt.Formatter(formatAtoms) { + return .{ .data = .{ + .self = self, + .macho_file = macho_file, + } }; +} + +fn formatAtoms( + ctx: FormatContext, + comptime unused_fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, +) !void { + _ = unused_fmt_string; + _ = options; + try writer.writeAll(" atoms\n"); + for (ctx.self.atoms.items) |atom_index| { + const atom = ctx.macho_file.getAtom(atom_index).?; + try writer.print(" {}\n", .{atom.fmt(ctx.macho_file)}); + } +} + +pub fn fmtSymtab(self: *InternalObject, macho_file: *MachO) std.fmt.Formatter(formatSymtab) { + return .{ .data = .{ + .self = self, + .macho_file = macho_file, + } }; +} + +fn formatSymtab( + ctx: FormatContext, + comptime unused_fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, +) !void { + _ = unused_fmt_string; + _ = options; + try writer.writeAll(" symbols\n"); + for (ctx.self.symbols.items) |index| { + const global = ctx.macho_file.getSymbol(index); + try writer.print(" {}\n", .{global.fmt(ctx.macho_file)}); + } +} + +const Section = struct { + header: macho.section_64, + relocs: std.ArrayListUnmanaged(Relocation) = .{}, + extra: Extra = .{}, + + const Extra = packed struct { + is_objc_methname: bool = false, + is_objc_selref: bool = false, + }; +}; + +const assert = std.debug.assert; +const macho = std.macho; +const mem = std.mem; +const std = @import("std"); + +const Allocator = std.mem.Allocator; +const Atom = @import("Atom.zig"); +const File = @import("file.zig").File; +const InternalObject = @This(); +const MachO = @import("../MachO.zig"); +const Object = @import("Object.zig"); +const Relocation = @import("Relocation.zig"); +const Symbol = @import("Symbol.zig"); diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index ad069b845e..deb17ba80b 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -1,1130 +1,2093 @@ -//! Represents an input relocatable Object file. -//! Each Object is fully loaded into memory for easier -//! access into different data within. - -name: []const u8, +archive: ?[]const u8 = null, +path: []const u8, mtime: u64, -contents: []align(@alignOf(u64)) const u8, +data: []const u8, +index: File.Index, -header: macho.mach_header_64 = undefined, - -/// Symtab and strtab might not exist for empty object files so we use an optional -/// to signal this. -in_symtab: ?[]align(1) const macho.nlist_64 = null, -in_strtab: ?[]const u8 = null, - -/// Output symtab is sorted so that we can easily reference symbols following each -/// other in address space. -/// The length of the symtab is at least of the input symtab length however there -/// can be trailing section symbols. -symtab: []macho.nlist_64 = undefined, -/// Can be undefined as set together with in_symtab. -source_symtab_lookup: []u32 = undefined, -/// Can be undefined as set together with in_symtab. -reverse_symtab_lookup: []u32 = undefined, -/// Can be undefined as set together with in_symtab. -source_address_lookup: []i64 = undefined, -/// Can be undefined as set together with in_symtab. -source_section_index_lookup: []Entry = undefined, -/// Can be undefined as set together with in_symtab. -strtab_lookup: []u32 = undefined, -/// Can be undefined as set together with in_symtab. -atom_by_index_table: []?Atom.Index = undefined, -/// Can be undefined as set together with in_symtab. -globals_lookup: []i64 = undefined, -/// Can be undefined as set together with in_symtab. -relocs_lookup: []Entry = undefined, - -/// All relocations sorted and flatened, sorted by address descending -/// per section. -relocations: std.ArrayListUnmanaged(macho.relocation_info) = .{}, -/// Beginning index to the relocations array for each input section -/// defined within this Object file. -section_relocs_lookup: std.ArrayListUnmanaged(u32) = .{}, - -/// Data-in-code records sorted by address. -data_in_code: std.ArrayListUnmanaged(macho.data_in_code_entry) = .{}, +header: ?macho.mach_header_64 = null, +sections: std.MultiArrayList(Section) = .{}, +symtab: std.MultiArrayList(Nlist) = .{}, +strtab: []const u8 = &[0]u8{}, +symbols: std.ArrayListUnmanaged(Symbol.Index) = .{}, atoms: std.ArrayListUnmanaged(Atom.Index) = .{}, -exec_atoms: std.ArrayListUnmanaged(Atom.Index) = .{}, -eh_frame_sect_id: ?u8 = null, -eh_frame_relocs_lookup: std.AutoArrayHashMapUnmanaged(u32, Record) = .{}, -eh_frame_records_lookup: std.AutoArrayHashMapUnmanaged(SymbolWithLoc, u32) = .{}, +platform: ?MachO.Options.Platform = null, +dwarf_info: ?DwarfInfo = null, +stab_files: std.ArrayListUnmanaged(StabFile) = .{}, -unwind_info_sect_id: ?u8 = null, -unwind_relocs_lookup: []Record = undefined, -unwind_records_lookup: std.AutoHashMapUnmanaged(SymbolWithLoc, u32) = .{}, +eh_frame_sect_index: ?u8 = null, +compact_unwind_sect_index: ?u8 = null, +cies: std.ArrayListUnmanaged(Cie) = .{}, +fdes: std.ArrayListUnmanaged(Fde) = .{}, +eh_frame_data: std.ArrayListUnmanaged(u8) = .{}, +unwind_records: std.ArrayListUnmanaged(UnwindInfo.Record.Index) = .{}, -const Entry = struct { - start: u32 = 0, - len: u32 = 0, -}; +alive: bool = true, +hidden: bool = false, +num_rebase_relocs: u32 = 0, +num_bind_relocs: u32 = 0, +num_weak_bind_relocs: u32 = 0, -const Record = struct { - dead: bool, - reloc: Entry, -}; +output_symtab_ctx: MachO.SymtabCtx = .{}, -pub fn isObject(file: std.fs.File) bool { - const reader = file.reader(); - const hdr = reader.readStruct(macho.mach_header_64) catch return false; - defer file.seekTo(0) catch {}; - return hdr.filetype == macho.MH_OBJECT; +pub fn deinit(self: *Object, allocator: Allocator) void { + for (self.sections.items(.relocs), self.sections.items(.subsections)) |*relocs, *sub| { + relocs.deinit(allocator); + sub.deinit(allocator); + } + self.sections.deinit(allocator); + self.symtab.deinit(allocator); + self.symbols.deinit(allocator); + self.atoms.deinit(allocator); + self.cies.deinit(allocator); + self.fdes.deinit(allocator); + self.eh_frame_data.deinit(allocator); + self.unwind_records.deinit(allocator); + if (self.dwarf_info) |*dw| dw.deinit(allocator); + for (self.stab_files.items) |*sf| { + sf.stabs.deinit(allocator); + } + self.stab_files.deinit(allocator); } -pub fn deinit(self: *Object, gpa: Allocator) void { - self.atoms.deinit(gpa); - self.exec_atoms.deinit(gpa); - gpa.free(self.name); - gpa.free(self.contents); - if (self.in_symtab) |_| { - gpa.free(self.source_symtab_lookup); - gpa.free(self.reverse_symtab_lookup); - gpa.free(self.source_address_lookup); - gpa.free(self.source_section_index_lookup); - gpa.free(self.strtab_lookup); - gpa.free(self.symtab); - gpa.free(self.atom_by_index_table); - gpa.free(self.globals_lookup); - gpa.free(self.relocs_lookup); - } - self.eh_frame_relocs_lookup.deinit(gpa); - self.eh_frame_records_lookup.deinit(gpa); - if (self.hasUnwindRecords()) { - gpa.free(self.unwind_relocs_lookup); - } - self.unwind_records_lookup.deinit(gpa); - self.relocations.deinit(gpa); - self.section_relocs_lookup.deinit(gpa); - self.data_in_code.deinit(gpa); -} +pub fn parse(self: *Object, macho_file: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); -pub fn parse(self: *Object, allocator: Allocator) !void { - var stream = std.io.fixedBufferStream(self.contents); + const gpa = macho_file.base.allocator; + var stream = std.io.fixedBufferStream(self.data); const reader = stream.reader(); self.header = try reader.readStruct(macho.mach_header_64); - var it = LoadCommandIterator{ - .ncmds = self.header.ncmds, - .buffer = self.contents[@sizeOf(macho.mach_header_64)..][0..self.header.sizeofcmds], - }; - const nsects = self.getSourceSections().len; + if (self.getLoadCommand(.SEGMENT_64)) |lc| { + const sections = lc.getSections(); + try self.sections.ensureUnusedCapacity(gpa, sections.len); + for (sections) |sect| { + const index = try self.sections.addOne(gpa); + self.sections.set(index, .{ .header = sect }); - // Prepopulate relocations per section lookup table. - try self.section_relocs_lookup.resize(allocator, nsects); - @memset(self.section_relocs_lookup.items, 0); - - // Parse symtab. - const symtab = while (it.next()) |cmd| switch (cmd.cmd()) { - .SYMTAB => break cmd.cast(macho.symtab_command).?, - else => {}, - } else return; - - self.in_symtab = @as([*]align(1) const macho.nlist_64, @ptrCast(self.contents.ptr + symtab.symoff))[0..symtab.nsyms]; - self.in_strtab = self.contents[symtab.stroff..][0..symtab.strsize]; - - self.symtab = try allocator.alloc(macho.nlist_64, self.in_symtab.?.len + nsects); - self.source_symtab_lookup = try allocator.alloc(u32, self.in_symtab.?.len); - self.reverse_symtab_lookup = try allocator.alloc(u32, self.in_symtab.?.len); - self.strtab_lookup = try allocator.alloc(u32, self.in_symtab.?.len); - self.globals_lookup = try allocator.alloc(i64, self.in_symtab.?.len); - self.atom_by_index_table = try allocator.alloc(?Atom.Index, self.in_symtab.?.len + nsects); - self.relocs_lookup = try allocator.alloc(Entry, self.in_symtab.?.len + nsects); - // This is wasteful but we need to be able to lookup source symbol address after stripping and - // allocating of sections. - self.source_address_lookup = try allocator.alloc(i64, self.in_symtab.?.len); - self.source_section_index_lookup = try allocator.alloc(Entry, nsects); - - for (self.symtab) |*sym| { - sym.* = .{ - .n_value = 0, - .n_sect = 0, - .n_desc = 0, - .n_strx = 0, - .n_type = 0, - }; - } - - @memset(self.globals_lookup, -1); - @memset(self.atom_by_index_table, null); - @memset(self.source_section_index_lookup, .{}); - @memset(self.relocs_lookup, .{}); - - // You would expect that the symbol table is at least pre-sorted based on symbol's type: - // local < extern defined < undefined. Unfortunately, this is not guaranteed! For instance, - // the GO compiler does not necessarily respect that therefore we sort immediately by type - // and address within. - var sorted_all_syms = try std.ArrayList(SymbolAtIndex).initCapacity(allocator, self.in_symtab.?.len); - defer sorted_all_syms.deinit(); - - for (self.in_symtab.?, 0..) |_, index| { - sorted_all_syms.appendAssumeCapacity(.{ .index = @as(u32, @intCast(index)) }); - } - - // We sort by type: defined < undefined, and - // afterwards by address in each group. Normally, dysymtab should - // be enough to guarantee the sort, but turns out not every compiler - // is kind enough to specify the symbols in the correct order. - mem.sort(SymbolAtIndex, sorted_all_syms.items, self, SymbolAtIndex.lessThan); - - var prev_sect_id: u8 = 0; - var section_index_lookup: ?Entry = null; - for (sorted_all_syms.items, 0..) |sym_id, i| { - const sym = sym_id.getSymbol(self); - - if (section_index_lookup) |*lookup| { - if (sym.n_sect != prev_sect_id or sym.undf()) { - self.source_section_index_lookup[prev_sect_id - 1] = lookup.*; - section_index_lookup = null; - } else { - lookup.len += 1; + if (mem.eql(u8, sect.sectName(), "__eh_frame")) { + self.eh_frame_sect_index = @intCast(index); + } else if (mem.eql(u8, sect.sectName(), "__compact_unwind")) { + self.compact_unwind_sect_index = @intCast(index); } } - if (sym.sect() and section_index_lookup == null) { - section_index_lookup = .{ .start = @as(u32, @intCast(i)), .len = 1 }; + } + if (self.getLoadCommand(.SYMTAB)) |lc| { + const cmd = lc.cast(macho.symtab_command).?; + self.strtab = self.data[cmd.stroff..][0..cmd.strsize]; + + const symtab = @as([*]align(1) const macho.nlist_64, @ptrCast(self.data.ptr + cmd.symoff))[0..cmd.nsyms]; + try self.symtab.ensureUnusedCapacity(gpa, symtab.len); + for (symtab) |nlist| { + self.symtab.appendAssumeCapacity(.{ + .nlist = nlist, + .atom = 0, + .size = 0, + }); } - - prev_sect_id = sym.n_sect; - - self.symtab[i] = sym; - self.source_symtab_lookup[i] = sym_id.index; - self.reverse_symtab_lookup[sym_id.index] = @as(u32, @intCast(i)); - self.source_address_lookup[i] = if (sym.undf()) -1 else @as(i64, @intCast(sym.n_value)); - - const sym_name_len = mem.sliceTo(@as([*:0]const u8, @ptrCast(self.in_strtab.?.ptr + sym.n_strx)), 0).len + 1; - self.strtab_lookup[i] = @as(u32, @intCast(sym_name_len)); } - // If there were no undefined symbols, make sure we populate the - // source section index lookup for the last scanned section. - if (section_index_lookup) |lookup| { - self.source_section_index_lookup[prev_sect_id - 1] = lookup; - } + const NlistIdx = struct { + nlist: macho.nlist_64, + idx: usize, - // Parse __TEXT,__eh_frame header if one exists - self.eh_frame_sect_id = self.getSourceSectionIndexByName("__TEXT", "__eh_frame"); - - // Parse __LD,__compact_unwind header if one exists - self.unwind_info_sect_id = self.getSourceSectionIndexByName("__LD", "__compact_unwind"); - if (self.hasUnwindRecords()) { - self.unwind_relocs_lookup = try allocator.alloc(Record, self.getUnwindRecords().len); - @memset(self.unwind_relocs_lookup, .{ .dead = true, .reloc = .{} }); - } -} - -const SymbolAtIndex = struct { - index: u32, - - const Context = *const Object; - - fn getSymbol(self: SymbolAtIndex, ctx: Context) macho.nlist_64 { - return ctx.in_symtab.?[self.index]; - } - - fn getSymbolName(self: SymbolAtIndex, ctx: Context) []const u8 { - const off = self.getSymbol(ctx).n_strx; - return mem.sliceTo(@as([*:0]const u8, @ptrCast(ctx.in_strtab.?.ptr + off)), 0); - } - - fn getSymbolSeniority(self: SymbolAtIndex, ctx: Context) u2 { - const sym = self.getSymbol(ctx); - if (!sym.ext()) { - const sym_name = self.getSymbolName(ctx); - if (mem.startsWith(u8, sym_name, "l") or mem.startsWith(u8, sym_name, "L")) return 3; - return 2; - } - if (sym.weakDef() or sym.pext()) return 1; - return 0; - } - - /// Performs lexicographic-like check. - /// * lhs and rhs defined - /// * if lhs == rhs - /// * if lhs.n_sect == rhs.n_sect - /// * ext < weak < local < temp - /// * lhs.n_sect < rhs.n_sect - /// * lhs < rhs - /// * !rhs is undefined - fn lessThan(ctx: Context, lhs_index: SymbolAtIndex, rhs_index: SymbolAtIndex) bool { - const lhs = lhs_index.getSymbol(ctx); - const rhs = rhs_index.getSymbol(ctx); - if (lhs.sect() and rhs.sect()) { - if (lhs.n_value == rhs.n_value) { - if (lhs.n_sect == rhs.n_sect) { - const lhs_senior = lhs_index.getSymbolSeniority(ctx); - const rhs_senior = rhs_index.getSymbolSeniority(ctx); - if (lhs_senior == rhs_senior) { - return lessThanByNStrx(ctx, lhs_index, rhs_index); - } else return lhs_senior < rhs_senior; - } else return lhs.n_sect < rhs.n_sect; - } else return lhs.n_value < rhs.n_value; - } else if (lhs.undf() and rhs.undf()) { - return lessThanByNStrx(ctx, lhs_index, rhs_index); - } else return rhs.undf(); - } - - fn lessThanByNStrx(ctx: Context, lhs: SymbolAtIndex, rhs: SymbolAtIndex) bool { - return lhs.getSymbol(ctx).n_strx < rhs.getSymbol(ctx).n_strx; - } -}; - -fn filterSymbolsBySection(symbols: []macho.nlist_64, n_sect: u8) struct { - index: u32, - len: u32, -} { - const FirstMatch = struct { - n_sect: u8, - - pub fn predicate(pred: @This(), symbol: macho.nlist_64) bool { - return symbol.n_sect == pred.n_sect; - } - }; - const FirstNonMatch = struct { - n_sect: u8, - - pub fn predicate(pred: @This(), symbol: macho.nlist_64) bool { - return symbol.n_sect != pred.n_sect; - } - }; - - const index = MachO.lsearch(macho.nlist_64, symbols, FirstMatch{ - .n_sect = n_sect, - }); - const len = MachO.lsearch(macho.nlist_64, symbols[index..], FirstNonMatch{ - .n_sect = n_sect, - }); - - return .{ .index = @as(u32, @intCast(index)), .len = @as(u32, @intCast(len)) }; -} - -fn filterSymbolsByAddress(symbols: []macho.nlist_64, start_addr: u64, end_addr: u64) struct { - index: u32, - len: u32, -} { - const Predicate = struct { - addr: u64, - - pub fn predicate(pred: @This(), symbol: macho.nlist_64) bool { - return symbol.n_value >= pred.addr; - } - }; - - const index = MachO.lsearch(macho.nlist_64, symbols, Predicate{ - .addr = start_addr, - }); - const len = MachO.lsearch(macho.nlist_64, symbols[index..], Predicate{ - .addr = end_addr, - }); - - return .{ .index = @as(u32, @intCast(index)), .len = @as(u32, @intCast(len)) }; -} - -const SortedSection = struct { - header: macho.section_64, - id: u8, -}; - -fn sectionLessThanByAddress(ctx: void, lhs: SortedSection, rhs: SortedSection) bool { - _ = ctx; - if (lhs.header.addr == rhs.header.addr) { - return lhs.id < rhs.id; - } - return lhs.header.addr < rhs.header.addr; -} - -pub const SplitIntoAtomsError = error{ - OutOfMemory, - EndOfStream, - MissingEhFrameSection, - BadDwarfCfi, -}; - -pub fn splitIntoAtoms(self: *Object, macho_file: *MachO, object_id: u32) SplitIntoAtomsError!void { - const comp = macho_file.base.comp; - const gpa = comp.gpa; - log.debug("splitting object({d}, {s}) into atoms", .{ object_id, self.name }); - - try self.splitRegularSections(macho_file, object_id); - try self.parseEhFrameSection(macho_file, object_id); - try self.parseUnwindInfo(macho_file, object_id); - try self.parseDataInCode(gpa); -} - -/// Splits input regular sections into Atoms. -/// If the Object was compiled with `MH_SUBSECTIONS_VIA_SYMBOLS`, splits section -/// into subsections where each subsection then represents an Atom. -pub fn splitRegularSections(self: *Object, macho_file: *MachO, object_id: u32) !void { - const comp = macho_file.base.comp; - const gpa = comp.gpa; - const target = macho_file.base.comp.root_mod.resolved_target.result; - - const sections = self.getSourceSections(); - for (sections, 0..) |sect, id| { - if (sect.isDebug()) continue; - const out_sect_id = (try Atom.getOutputSection(macho_file, sect)) orelse { - log.debug(" unhandled section '{s},{s}'", .{ sect.segName(), sect.sectName() }); - continue; - }; - if (sect.size == 0) continue; - - const sect_id = @as(u8, @intCast(id)); - const sym = self.getSectionAliasSymbolPtr(sect_id); - sym.* = .{ - .n_strx = 0, - .n_type = macho.N_SECT, - .n_sect = out_sect_id + 1, - .n_desc = 0, - .n_value = sect.addr, - }; - } - - if (self.in_symtab == null) { - for (sections, 0..) |sect, id| { - if (sect.isDebug()) continue; - const out_sect_id = (try Atom.getOutputSection(macho_file, sect)) orelse continue; - if (sect.size == 0) continue; - - const sect_id: u8 = @intCast(id); - const sym_index = self.getSectionAliasSymbolIndex(sect_id); - const atom_index = try self.createAtomFromSubsection( - macho_file, - object_id, - sym_index, - sym_index, - 1, - sect.size, - Alignment.fromLog2Units(sect.@"align"), - out_sect_id, - ); - macho_file.addAtomToSection(atom_index); - } - return; - } - - // Well, shit, sometimes compilers skip the dysymtab load command altogether, meaning we - // have to infer the start of undef section in the symtab ourselves. - const iundefsym = blk: { - const dysymtab = self.getDysymtab() orelse { - var iundefsym: usize = self.in_symtab.?.len; - while (iundefsym > 0) : (iundefsym -= 1) { - const sym = self.symtab[iundefsym - 1]; - if (sym.sect()) break; + fn rank(ctx: *const Object, nl: macho.nlist_64) u8 { + if (!nl.ext()) { + const name = ctx.getString(nl.n_strx); + if (name.len == 0) return 5; + if (name[0] == 'l' or name[0] == 'L') return 4; + return 3; } - break :blk iundefsym; - }; - break :blk dysymtab.iundefsym; - }; + return if (nl.weakDef()) 2 else 1; + } - // We only care about defined symbols, so filter every other out. - const symtab = try gpa.dupe(macho.nlist_64, self.symtab[0..iundefsym]); - defer gpa.free(symtab); - - const subsections_via_symbols = self.header.flags & macho.MH_SUBSECTIONS_VIA_SYMBOLS != 0; - - // Sort section headers by address. - var sorted_sections = try gpa.alloc(SortedSection, sections.len); - defer gpa.free(sorted_sections); - - for (sections, 0..) |sect, id| { - sorted_sections[id] = .{ .header = sect, .id = @as(u8, @intCast(id)) }; - } - - mem.sort(SortedSection, sorted_sections, {}, sectionLessThanByAddress); - - var sect_sym_index: u32 = 0; - for (sorted_sections) |section| { - const sect = section.header; - if (sect.isDebug()) continue; - - const sect_id = section.id; - log.debug("splitting section '{s},{s}' into atoms", .{ sect.segName(), sect.sectName() }); - - // Get output segment/section in the final artifact. - const out_sect_id = (try Atom.getOutputSection(macho_file, sect)) orelse continue; - - log.debug(" output sect({d}, '{s},{s}')", .{ - out_sect_id + 1, - macho_file.sections.items(.header)[out_sect_id].segName(), - macho_file.sections.items(.header)[out_sect_id].sectName(), - }); - - try self.parseRelocs(gpa, section.id); - - const cpu_arch = target.cpu.arch; - const sect_loc = filterSymbolsBySection(symtab[sect_sym_index..], sect_id + 1); - const sect_start_index = sect_sym_index + sect_loc.index; - - sect_sym_index += sect_loc.len; - - if (sect.size == 0) continue; - if (subsections_via_symbols and sect_loc.len > 0) { - // If the first nlist does not match the start of the section, - // then we need to encapsulate the memory range [section start, first symbol) - // as a temporary symbol and insert the matching Atom. - const first_sym = symtab[sect_start_index]; - if (first_sym.n_value > sect.addr) { - const sym_index = self.getSectionAliasSymbolIndex(sect_id); - const atom_size = first_sym.n_value - sect.addr; - const atom_index = try self.createAtomFromSubsection( - macho_file, - object_id, - sym_index, - sym_index, - 1, - atom_size, - Alignment.fromLog2Units(sect.@"align"), - out_sect_id, - ); - if (!sect.isZerofill()) { - try self.cacheRelocs(macho_file, atom_index); + fn lessThan(ctx: *const Object, lhs: @This(), rhs: @This()) bool { + if (lhs.nlist.n_sect == rhs.nlist.n_sect) { + if (lhs.nlist.n_value == rhs.nlist.n_value) { + return rank(ctx, lhs.nlist) < rank(ctx, rhs.nlist); } - macho_file.addAtomToSection(atom_index); + return lhs.nlist.n_value < rhs.nlist.n_value; } + return lhs.nlist.n_sect < rhs.nlist.n_sect; + } + }; - var next_sym_index = sect_start_index; - while (next_sym_index < sect_start_index + sect_loc.len) { - const next_sym = symtab[next_sym_index]; - const addr = next_sym.n_value; - const atom_loc = filterSymbolsByAddress(symtab[next_sym_index..], addr, addr + 1); - assert(atom_loc.len > 0); - const atom_sym_index = atom_loc.index + next_sym_index; - const nsyms_trailing = atom_loc.len; - next_sym_index += atom_loc.len; + var nlists = try std.ArrayList(NlistIdx).initCapacity(gpa, self.symtab.items(.nlist).len); + defer nlists.deinit(); + for (self.symtab.items(.nlist), 0..) |nlist, i| { + if (nlist.stab() or !nlist.sect()) continue; + nlists.appendAssumeCapacity(.{ .nlist = nlist, .idx = i }); + } + mem.sort(NlistIdx, nlists.items, self, NlistIdx.lessThan); - const atom_size = if (next_sym_index < sect_start_index + sect_loc.len) - symtab[next_sym_index].n_value - addr - else - sect.addr + sect.size - addr; + if (self.hasSubsections()) { + try self.initSubsections(nlists.items, macho_file); + } else { + try self.initSections(nlists.items, macho_file); + } - const atom_align = Alignment.fromLog2Units(if (addr > 0) - @min(@ctz(addr), sect.@"align") - else - sect.@"align"); + try self.initLiteralSections(macho_file); + try self.linkNlistToAtom(macho_file); - const atom_index = try self.createAtomFromSubsection( - macho_file, - object_id, - atom_sym_index, - atom_sym_index, - nsyms_trailing, - atom_size, - atom_align, - out_sect_id, - ); + try self.sortAtoms(macho_file); + try self.initSymbols(macho_file); + try self.initSymbolStabs(nlists.items, macho_file); + try self.initRelocs(macho_file); - // TODO rework this at the relocation level - if (cpu_arch == .x86_64 and addr == sect.addr) { - // In x86_64 relocs, it can so happen that the compiler refers to the same - // atom by both the actual assigned symbol and the start of the section. In this - // case, we need to link the two together so add an alias. - const alias_index = self.getSectionAliasSymbolIndex(sect_id); - self.atom_by_index_table[alias_index] = atom_index; - } - if (!sect.isZerofill()) { - try self.cacheRelocs(macho_file, atom_index); - } - macho_file.addAtomToSection(atom_index); - } - } else { - const alias_index = self.getSectionAliasSymbolIndex(sect_id); - const atom_index = try self.createAtomFromSubsection( - macho_file, - object_id, - alias_index, - sect_start_index, - sect_loc.len, - sect.size, - Alignment.fromLog2Units(sect.@"align"), - out_sect_id, - ); - if (!sect.isZerofill()) { - try self.cacheRelocs(macho_file, atom_index); - } - macho_file.addAtomToSection(atom_index); + if (self.eh_frame_sect_index) |index| { + try self.initEhFrameRecords(index, macho_file); + } + + if (self.compact_unwind_sect_index) |index| { + try self.initUnwindRecords(index, macho_file); + } + + self.initPlatform(); + try self.initDwarfInfo(macho_file); + + for (self.atoms.items) |atom_index| { + const atom = macho_file.getAtom(atom_index).?; + const isec = atom.getInputSection(macho_file); + if (mem.eql(u8, isec.sectName(), "__eh_frame") or + mem.eql(u8, isec.sectName(), "__compact_unwind") or + isec.attrs() & macho.S_ATTR_DEBUG != 0) + { + atom.flags.alive = false; } } } -fn createAtomFromSubsection( - self: *Object, - macho_file: *MachO, - object_id: u32, - sym_index: u32, - inner_sym_index: u32, - inner_nsyms_trailing: u32, +inline fn isLiteral(sect: macho.section_64) bool { + return switch (sect.type()) { + macho.S_CSTRING_LITERALS, + macho.S_4BYTE_LITERALS, + macho.S_8BYTE_LITERALS, + macho.S_16BYTE_LITERALS, + macho.S_LITERAL_POINTERS, + => true, + else => false, + }; +} + +fn initSubsections(self: *Object, nlists: anytype, macho_file: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + const gpa = macho_file.base.allocator; + const slice = self.sections.slice(); + for (slice.items(.header), slice.items(.subsections), 0..) |sect, *subsections, n_sect| { + if (isLiteral(sect)) continue; + + const nlist_start = for (nlists, 0..) |nlist, i| { + if (nlist.nlist.n_sect - 1 == n_sect) break i; + } else nlists.len; + const nlist_end = for (nlists[nlist_start..], nlist_start..) |nlist, i| { + if (nlist.nlist.n_sect - 1 != n_sect) break i; + } else nlists.len; + + if (nlist_start == nlist_end or nlists[nlist_start].nlist.n_value > sect.addr) { + const name = try std.fmt.allocPrintZ(gpa, "{s}${s}", .{ sect.segName(), sect.sectName() }); + defer gpa.free(name); + const size = if (nlist_start == nlist_end) sect.size else nlists[nlist_start].nlist.n_value - sect.addr; + const atom_index = try self.addAtom(.{ + .name = name, + .n_sect = @intCast(n_sect), + .off = 0, + .size = size, + .alignment = sect.@"align", + }, macho_file); + try subsections.append(gpa, .{ + .atom = atom_index, + .off = 0, + }); + } + + var idx: usize = nlist_start; + while (idx < nlist_end) { + const alias_start = idx; + const nlist = nlists[alias_start]; + + while (idx < nlist_end and + nlists[idx].nlist.n_value == nlist.nlist.n_value) : (idx += 1) + {} + + const size = if (idx < nlist_end) + nlists[idx].nlist.n_value - nlist.nlist.n_value + else + sect.addr + sect.size - nlist.nlist.n_value; + const alignment = if (nlist.nlist.n_value > 0) + @min(@ctz(nlist.nlist.n_value), sect.@"align") + else + sect.@"align"; + const atom_index = try self.addAtom(.{ + .name = self.getString(nlist.nlist.n_strx), + .n_sect = @intCast(n_sect), + .off = nlist.nlist.n_value - sect.addr, + .size = size, + .alignment = alignment, + }, macho_file); + try subsections.append(gpa, .{ + .atom = atom_index, + .off = nlist.nlist.n_value - sect.addr, + }); + + for (alias_start..idx) |i| { + self.symtab.items(.size)[nlists[i].idx] = size; + } + } + } +} + +fn initSections(self: *Object, nlists: anytype, macho_file: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + const gpa = macho_file.base.allocator; + const slice = self.sections.slice(); + + try self.atoms.ensureUnusedCapacity(gpa, self.sections.items(.header).len); + + for (slice.items(.header), 0..) |sect, n_sect| { + if (isLiteral(sect)) continue; + + const name = try std.fmt.allocPrintZ(gpa, "{s}${s}", .{ sect.segName(), sect.sectName() }); + defer gpa.free(name); + + const atom_index = try self.addAtom(.{ + .name = name, + .n_sect = @intCast(n_sect), + .off = 0, + .size = sect.size, + .alignment = sect.@"align", + }, macho_file); + try slice.items(.subsections)[n_sect].append(gpa, .{ .atom = atom_index, .off = 0 }); + + const nlist_start = for (nlists, 0..) |nlist, i| { + if (nlist.nlist.n_sect - 1 == n_sect) break i; + } else nlists.len; + const nlist_end = for (nlists[nlist_start..], nlist_start..) |nlist, i| { + if (nlist.nlist.n_sect - 1 != n_sect) break i; + } else nlists.len; + + var idx: usize = nlist_start; + while (idx < nlist_end) { + const nlist = nlists[idx]; + + while (idx < nlist_end and + nlists[idx].nlist.n_value == nlist.nlist.n_value) : (idx += 1) + {} + + const size = if (idx < nlist_end) + nlists[idx].nlist.n_value - nlist.nlist.n_value + else + sect.addr + sect.size - nlist.nlist.n_value; + + for (nlist_start..idx) |i| { + self.symtab.items(.size)[nlists[i].idx] = size; + } + } + } +} + +const AddAtomArgs = struct { + name: [:0]const u8, + n_sect: u8, + off: u64, size: u64, - alignment: Alignment, - out_sect_id: u8, -) !Atom.Index { - const comp = macho_file.base.comp; - const gpa = comp.gpa; - const atom_index = try macho_file.createAtom(sym_index, .{ - .size = size, - .alignment = alignment, - }); - const atom = macho_file.getAtomPtr(atom_index); - atom.inner_sym_index = inner_sym_index; - atom.inner_nsyms_trailing = inner_nsyms_trailing; - atom.file = object_id + 1; - self.symtab[sym_index].n_sect = out_sect_id + 1; - - log.debug("creating ATOM(%{d}, '{s}') in sect({d}, '{s},{s}') in object({d})", .{ - sym_index, - self.getSymbolName(sym_index), - out_sect_id + 1, - macho_file.sections.items(.header)[out_sect_id].segName(), - macho_file.sections.items(.header)[out_sect_id].sectName(), - object_id, - }); + alignment: u32, +}; +fn addAtom(self: *Object, args: AddAtomArgs, macho_file: *MachO) !Atom.Index { + const gpa = macho_file.base.allocator; + const atom_index = try macho_file.addAtom(); + const atom = macho_file.getAtom(atom_index).?; + atom.file = self.index; + atom.atom_index = atom_index; + atom.name = try macho_file.string_intern.insert(gpa, args.name); + atom.n_sect = args.n_sect; + atom.size = args.size; + atom.alignment = args.alignment; + atom.off = args.off; try self.atoms.append(gpa, atom_index); - self.atom_by_index_table[sym_index] = atom_index; - - var it = Atom.getInnerSymbolsIterator(macho_file, atom_index); - while (it.next()) |sym_loc| { - const inner = macho_file.getSymbolPtr(sym_loc); - inner.n_sect = out_sect_id + 1; - self.atom_by_index_table[sym_loc.sym_index] = atom_index; - } - - const out_sect = macho_file.sections.items(.header)[out_sect_id]; - if (out_sect.isCode() and - mem.eql(u8, "__TEXT", out_sect.segName()) and - mem.eql(u8, "__text", out_sect.sectName())) - { - // TODO currently assuming a single section for executable machine code - try self.exec_atoms.append(gpa, atom_index); - } - return atom_index; } -fn filterRelocs( - relocs: []align(1) const macho.relocation_info, - start_addr: u64, - end_addr: u64, -) Entry { - const Predicate = struct { - addr: u64, +fn initLiteralSections(self: *Object, macho_file: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + // TODO here we should split into equal-sized records, hash the contents, and then + // deduplicate - ICF. + // For now, we simply cover each literal section with one large atom. + const gpa = macho_file.base.allocator; + const slice = self.sections.slice(); - pub fn predicate(self: @This(), rel: macho.relocation_info) bool { - return rel.r_address >= self.addr; + try self.atoms.ensureUnusedCapacity(gpa, self.sections.items(.header).len); + + for (slice.items(.header), 0..) |sect, n_sect| { + if (!isLiteral(sect)) continue; + + const name = try std.fmt.allocPrintZ(gpa, "{s}${s}", .{ sect.segName(), sect.sectName() }); + defer gpa.free(name); + + const atom_index = try self.addAtom(.{ + .name = name, + .n_sect = @intCast(n_sect), + .off = 0, + .size = sect.size, + .alignment = sect.@"align", + }, macho_file); + try slice.items(.subsections)[n_sect].append(gpa, .{ .atom = atom_index, .off = 0 }); + } +} + +pub fn findAtom(self: Object, addr: u64) ?Atom.Index { + const tracy = trace(@src()); + defer tracy.end(); + const slice = self.sections.slice(); + for (slice.items(.header), slice.items(.subsections), 0..) |sect, subs, n_sect| { + if (subs.items.len == 0) continue; + if (sect.addr == addr) return subs.items[0].atom; + if (sect.addr < addr and addr < sect.addr + sect.size) { + return self.findAtomInSection(addr, @intCast(n_sect)); } - }; - const LPredicate = struct { - addr: u64, + } + return null; +} - pub fn predicate(self: @This(), rel: macho.relocation_info) bool { - return rel.r_address < self.addr; +fn findAtomInSection(self: Object, addr: u64, n_sect: u8) ?Atom.Index { + const tracy = trace(@src()); + defer tracy.end(); + const slice = self.sections.slice(); + const sect = slice.items(.header)[n_sect]; + const subsections = slice.items(.subsections)[n_sect]; + + var min: usize = 0; + var max: usize = subsections.items.len; + while (min < max) { + const idx = (min + max) / 2; + const sub = subsections.items[idx]; + const sub_addr = sect.addr + sub.off; + const sub_size = if (idx + 1 < subsections.items.len) + subsections.items[idx + 1].off - sub.off + else + sect.size - sub.off; + if (sub_addr == addr or (sub_addr < addr and addr < sub_addr + sub_size)) return sub.atom; + if (sub_addr < addr) { + min = idx + 1; + } else { + max = idx; } - }; - - const start = MachO.bsearch(macho.relocation_info, relocs, Predicate{ .addr = end_addr }); - const len = MachO.lsearch(macho.relocation_info, relocs[start..], LPredicate{ .addr = start_addr }); - - return .{ .start = @as(u32, @intCast(start)), .len = @as(u32, @intCast(len)) }; -} - -/// Parse all relocs for the input section, and sort in descending order. -/// Previously, I have wrongly assumed the compilers output relocations for each -/// section in a sorted manner which is simply not true. -fn parseRelocs(self: *Object, gpa: Allocator, sect_id: u8) !void { - const section = self.getSourceSection(sect_id); - const start = @as(u32, @intCast(self.relocations.items.len)); - if (self.getSourceRelocs(section)) |relocs| { - try self.relocations.ensureUnusedCapacity(gpa, relocs.len); - self.relocations.appendUnalignedSliceAssumeCapacity(relocs); - mem.sort(macho.relocation_info, self.relocations.items[start..], {}, relocGreaterThan); - } - self.section_relocs_lookup.items[sect_id] = start; -} - -fn cacheRelocs(self: *Object, macho_file: *MachO, atom_index: Atom.Index) !void { - const atom = macho_file.getAtom(atom_index); - - const source_sect_id = if (self.getSourceSymbol(atom.sym_index)) |source_sym| blk: { - break :blk source_sym.n_sect - 1; - } else blk: { - // If there was no matching symbol present in the source symtab, this means - // we are dealing with either an entire section, or part of it, but also - // starting at the beginning. - const nbase = @as(u32, @intCast(self.in_symtab.?.len)); - const sect_id = @as(u8, @intCast(atom.sym_index - nbase)); - break :blk sect_id; - }; - const source_sect = self.getSourceSection(source_sect_id); - assert(!source_sect.isZerofill()); - const relocs = self.getRelocs(source_sect_id); - - self.relocs_lookup[atom.sym_index] = if (self.getSourceSymbol(atom.sym_index)) |source_sym| blk: { - const offset = source_sym.n_value - source_sect.addr; - break :blk filterRelocs(relocs, offset, offset + atom.size); - } else filterRelocs(relocs, 0, atom.size); -} - -fn relocGreaterThan(ctx: void, lhs: macho.relocation_info, rhs: macho.relocation_info) bool { - _ = ctx; - return lhs.r_address > rhs.r_address; -} - -fn parseEhFrameSection(self: *Object, macho_file: *MachO, object_id: u32) !void { - const sect_id = self.eh_frame_sect_id orelse return; - const sect = self.getSourceSection(sect_id); - - log.debug("parsing __TEXT,__eh_frame section", .{}); - - const comp = macho_file.base.comp; - const gpa = comp.gpa; - - if (macho_file.eh_frame_section_index == null) { - macho_file.eh_frame_section_index = try macho_file.initSection("__TEXT", "__eh_frame", .{}); } - const target = macho_file.base.comp.root_mod.resolved_target.result; - const cpu_arch = target.cpu.arch; - try self.parseRelocs(gpa, sect_id); - const relocs = self.getRelocs(sect_id); - - var it = self.getEhFrameRecordsIterator(); - var record_count: u32 = 0; - while (try it.next()) |_| { - record_count += 1; + if (min < subsections.items.len) { + const sub = subsections.items[min]; + const sub_addr = sect.addr + sub.off; + const sub_size = if (min + 1 < subsections.items.len) + subsections.items[min + 1].off - sub.off + else + sect.size - sub.off; + if (sub_addr == addr or (sub_addr < addr and addr < sub_addr + sub_size)) return sub.atom; } - try self.eh_frame_relocs_lookup.ensureTotalCapacity(gpa, record_count); - try self.eh_frame_records_lookup.ensureUnusedCapacity(gpa, record_count); + return null; +} - it.reset(); +fn linkNlistToAtom(self: *Object, macho_file: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + for (self.symtab.items(.nlist), self.symtab.items(.atom)) |nlist, *atom| { + if (!nlist.stab() and nlist.sect()) { + if (self.findAtomInSection(nlist.n_value, nlist.n_sect - 1)) |atom_index| { + atom.* = atom_index; + } else { + macho_file.base.fatal("{}: symbol {s} not attached to any (sub)section", .{ + self.fmtPath(), self.getString(nlist.n_strx), + }); + return error.ParseFailed; + } + } + } +} - while (try it.next()) |record| { - const offset = it.pos - record.getSize(); - const rel_pos: Entry = switch (cpu_arch) { - .aarch64 => filterRelocs(relocs, offset, offset + record.getSize()), - .x86_64 => .{}, - else => unreachable, +fn initSymbols(self: *Object, macho_file: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + const gpa = macho_file.base.allocator; + const slice = self.symtab.slice(); + + try self.symbols.ensureUnusedCapacity(gpa, slice.items(.nlist).len); + + for (slice.items(.nlist), slice.items(.atom), 0..) |nlist, atom_index, i| { + if (nlist.ext()) { + const name = self.getString(nlist.n_strx); + const off = try macho_file.string_intern.insert(gpa, name); + const gop = try macho_file.getOrCreateGlobal(off); + self.symbols.addOneAssumeCapacity().* = gop.index; + continue; + } + + const index = try macho_file.addSymbol(); + self.symbols.appendAssumeCapacity(index); + const symbol = macho_file.getSymbol(index); + const name = self.getString(nlist.n_strx); + symbol.* = .{ + .value = nlist.n_value, + .name = try macho_file.string_intern.insert(gpa, name), + .nlist_idx = @intCast(i), + .atom = 0, + .file = self.index, }; - self.eh_frame_relocs_lookup.putAssumeCapacityNoClobber(offset, .{ - .dead = false, - .reloc = rel_pos, - }); - if (record.tag == .fde) { - const reloc_target = blk: { - switch (cpu_arch) { - .aarch64 => { - assert(rel_pos.len > 0); // TODO convert to an error as the FDE eh frame is malformed - // Find function symbol that this record describes - const rel = for (relocs[rel_pos.start..][0..rel_pos.len]) |rel| { - if (rel.r_address - @as(i32, @intCast(offset)) == 8 and - @as(macho.reloc_type_arm64, @enumFromInt(rel.r_type)) == .ARM64_RELOC_UNSIGNED) - break rel; - } else unreachable; - const reloc_target = Atom.parseRelocTarget(macho_file, .{ - .object_id = object_id, - .rel = rel, - .code = it.data[offset..], - .base_offset = @as(i32, @intCast(offset)), - }); - break :blk reloc_target; + if (macho_file.getAtom(atom_index)) |atom| { + assert(!nlist.abs()); + symbol.value -= atom.getInputAddress(macho_file); + symbol.atom = atom_index; + } + + symbol.flags.abs = nlist.abs(); + symbol.flags.no_dead_strip = symbol.flags.no_dead_strip or nlist.noDeadStrip(); + + if (nlist.sect() and + self.sections.items(.header)[nlist.n_sect - 1].type() == macho.S_THREAD_LOCAL_VARIABLES) + { + symbol.flags.tlv = true; + } + } +} + +fn initSymbolStabs(self: *Object, nlists: anytype, macho_file: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + + const SymbolLookup = struct { + ctx: *const Object, + entries: @TypeOf(nlists), + + fn find(fs: @This(), addr: u64) ?Symbol.Index { + // TODO binary search since we have the list sorted + for (fs.entries) |nlist| { + if (nlist.nlist.n_value == addr) return fs.ctx.symbols.items[nlist.idx]; + } + return null; + } + }; + + const start: u32 = for (self.symtab.items(.nlist), 0..) |nlist, i| { + if (nlist.stab()) break @intCast(i); + } else @intCast(self.symtab.items(.nlist).len); + const end: u32 = for (self.symtab.items(.nlist)[start..], start..) |nlist, i| { + if (!nlist.stab()) break @intCast(i); + } else @intCast(self.symtab.items(.nlist).len); + + if (start == end) return; + + const gpa = macho_file.base.allocator; + const syms = self.symtab.items(.nlist); + const sym_lookup = SymbolLookup{ .ctx = self, .entries = nlists }; + + var i: u32 = start; + while (i < end) : (i += 1) { + const open = syms[i]; + if (open.n_type != macho.N_SO) { + macho_file.base.fatal("{}: unexpected symbol stab type 0x{x} as the first entry", .{ + self.fmtPath(), + open.n_type, + }); + return error.ParseFailed; + } + + while (i < end and syms[i].n_type == macho.N_SO and syms[i].n_sect != 0) : (i += 1) {} + + var sf: StabFile = .{ .comp_dir = i }; + // TODO validate + i += 3; + + while (i < end and syms[i].n_type != macho.N_SO) : (i += 1) { + const nlist = syms[i]; + var stab: StabFile.Stab = .{}; + switch (nlist.n_type) { + macho.N_BNSYM => { + stab.tag = .func; + stab.symbol = sym_lookup.find(nlist.n_value); + // TODO validate + i += 3; + }, + macho.N_GSYM => { + stab.tag = .global; + stab.symbol = macho_file.getGlobalByName(self.getString(nlist.n_strx)); + }, + macho.N_STSYM => { + stab.tag = .static; + stab.symbol = sym_lookup.find(nlist.n_value); + }, + else => { + macho_file.base.fatal("{}: unhandled symbol stab type 0x{x}", .{ + self.fmtPath(), + nlist.n_type, + }); + return error.ParseFailed; + }, + } + try sf.stabs.append(gpa, stab); + } + + try self.stab_files.append(gpa, sf); + } +} + +fn sortAtoms(self: *Object, macho_file: *MachO) !void { + const lessThanAtom = struct { + fn lessThanAtom(ctx: *MachO, lhs: Atom.Index, rhs: Atom.Index) bool { + return ctx.getAtom(lhs).?.getInputAddress(ctx) < ctx.getAtom(rhs).?.getInputAddress(ctx); + } + }.lessThanAtom; + mem.sort(Atom.Index, self.atoms.items, macho_file, lessThanAtom); +} + +fn initRelocs(self: *Object, macho_file: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + const cpu_arch = macho_file.options.cpu_arch.?; + const slice = self.sections.slice(); + + for (slice.items(.header), slice.items(.relocs), 0..) |sect, *out, n_sect| { + if (sect.nreloc == 0) continue; + // We skip relocs for __DWARF since even in -r mode, the linker is expected to emit + // debug symbol stabs in the relocatable. This made me curious why that is. For now, + // I shall comply, but I wanna compare with dsymutil. + if (sect.attrs() & macho.S_ATTR_DEBUG != 0 and + !mem.eql(u8, sect.sectName(), "__compact_unwind")) continue; + + switch (cpu_arch) { + .x86_64 => try x86_64.parseRelocs(self, @intCast(n_sect), sect, out, macho_file), + .aarch64 => try aarch64.parseRelocs(self, @intCast(n_sect), sect, out, macho_file), + else => unreachable, + } + + mem.sort(Relocation, out.items, {}, Relocation.lessThan); + } + + for (slice.items(.header), slice.items(.relocs), slice.items(.subsections)) |sect, relocs, subsections| { + if (sect.isZerofill()) continue; + + var next_reloc: usize = 0; + for (subsections.items) |subsection| { + const atom = macho_file.getAtom(subsection.atom).?; + if (!atom.flags.alive) continue; + if (next_reloc >= relocs.items.len) break; + const end_addr = atom.off + atom.size; + atom.relocs.pos = next_reloc; + + while (next_reloc < relocs.items.len and relocs.items[next_reloc].offset < end_addr) : (next_reloc += 1) {} + + atom.relocs.len = next_reloc - atom.relocs.pos; + } + } +} + +fn initEhFrameRecords(self: *Object, sect_id: u8, macho_file: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + const gpa = macho_file.base.allocator; + const nlists = self.symtab.items(.nlist); + const slice = self.sections.slice(); + const sect = slice.items(.header)[sect_id]; + const relocs = slice.items(.relocs)[sect_id]; + + const data = self.getSectionData(sect_id); + try self.eh_frame_data.ensureTotalCapacityPrecise(gpa, data.len); + self.eh_frame_data.appendSliceAssumeCapacity(data); + + // Check for non-personality relocs in FDEs and apply them + for (relocs.items, 0..) |rel, i| { + switch (rel.type) { + .unsigned => { + assert((rel.meta.length == 2 or rel.meta.length == 3) and rel.meta.has_subtractor); // TODO error + const S: i64 = switch (rel.tag) { + .local => rel.meta.symbolnum, + .@"extern" => @intCast(nlists[rel.meta.symbolnum].n_value), + }; + const A = rel.addend; + const SUB: i64 = blk: { + const sub_rel = relocs.items[i - 1]; + break :blk switch (sub_rel.tag) { + .local => sub_rel.meta.symbolnum, + .@"extern" => @intCast(nlists[sub_rel.meta.symbolnum].n_value), + }; + }; + switch (rel.meta.length) { + 0, 1 => unreachable, + 2 => mem.writeInt(u32, self.eh_frame_data.items[rel.offset..][0..4], @bitCast(@as(i32, @truncate(S + A - SUB))), .little), + 3 => mem.writeInt(u64, self.eh_frame_data.items[rel.offset..][0..8], @bitCast(S + A - SUB), .little), + } + }, + else => {}, + } + } + + var it = eh_frame.Iterator{ .data = self.eh_frame_data.items }; + while (try it.next()) |rec| { + switch (rec.tag) { + .cie => try self.cies.append(gpa, .{ + .offset = rec.offset, + .size = rec.size, + .file = self.index, + }), + .fde => try self.fdes.append(gpa, .{ + .offset = rec.offset, + .size = rec.size, + .cie = undefined, + .file = self.index, + }), + } + } + + for (self.cies.items) |*cie| { + try cie.parse(macho_file); + } + + for (self.fdes.items) |*fde| { + try fde.parse(macho_file); + } + + const sortFn = struct { + fn sortFn(ctx: *MachO, lhs: Fde, rhs: Fde) bool { + return lhs.getAtom(ctx).getInputAddress(ctx) < rhs.getAtom(ctx).getInputAddress(ctx); + } + }.sortFn; + + mem.sort(Fde, self.fdes.items, macho_file, sortFn); + + // Parse and attach personality pointers to CIEs if any + for (relocs.items) |rel| { + switch (rel.type) { + .got => { + assert(rel.meta.length == 2 and rel.tag == .@"extern"); + const cie = for (self.cies.items) |*cie| { + if (cie.offset <= rel.offset and rel.offset < cie.offset + cie.getSize()) break cie; + } else { + macho_file.base.fatal("{}: {s},{s}: 0x{x}: bad relocation", .{ + self.fmtPath(), sect.segName(), sect.sectName(), rel.offset, + }); + return error.ParseFailed; + }; + cie.personality = .{ .index = @intCast(rel.target), .offset = rel.offset - cie.offset }; + }, + else => {}, + } + } +} + +fn initUnwindRecords(self: *Object, sect_id: u8, macho_file: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + + const SymbolLookup = struct { + ctx: *const Object, + + fn find(fs: @This(), addr: u64) ?Symbol.Index { + for (fs.ctx.symbols.items, 0..) |sym_index, i| { + const nlist = fs.ctx.symtab.items(.nlist)[i]; + if (nlist.ext() and nlist.n_value == addr) return sym_index; + } + return null; + } + }; + + const gpa = macho_file.base.allocator; + const data = self.getSectionData(sect_id); + const nrecs = @divExact(data.len, @sizeOf(macho.compact_unwind_entry)); + const recs = @as([*]align(1) const macho.compact_unwind_entry, @ptrCast(data.ptr))[0..nrecs]; + const sym_lookup = SymbolLookup{ .ctx = self }; + + try self.unwind_records.resize(gpa, nrecs); + + const header = self.sections.items(.header)[sect_id]; + const relocs = self.sections.items(.relocs)[sect_id].items; + var reloc_idx: usize = 0; + for (recs, self.unwind_records.items, 0..) |rec, *out_index, rec_idx| { + const rec_start = rec_idx * @sizeOf(macho.compact_unwind_entry); + const rec_end = rec_start + @sizeOf(macho.compact_unwind_entry); + const reloc_start = reloc_idx; + while (reloc_idx < relocs.len and + relocs[reloc_idx].offset < rec_end) : (reloc_idx += 1) + {} + + out_index.* = try macho_file.addUnwindRecord(); + const out = macho_file.getUnwindRecord(out_index.*); + out.length = rec.rangeLength; + out.enc = .{ .enc = rec.compactUnwindEncoding }; + out.file = self.index; + + for (relocs[reloc_start..reloc_idx]) |rel| { + if (rel.type != .unsigned or rel.meta.length != 3) { + macho_file.base.fatal("{}: {s},{s}: 0x{x}: bad relocation", .{ + self.fmtPath(), header.segName(), header.sectName(), rel.offset, + }); + return error.ParseFailed; + } + assert(rel.type == .unsigned and rel.meta.length == 3); // TODO error + const offset = rel.offset - rec_start; + switch (offset) { + 0 => switch (rel.tag) { // target symbol + .@"extern" => { + out.atom = self.symtab.items(.atom)[rel.meta.symbolnum]; + out.atom_offset = @intCast(rec.rangeStart); }, - .x86_64 => { - const target_address = record.getTargetSymbolAddress(.{ - .base_addr = sect.addr, - .base_offset = offset, + .local => if (self.findAtom(rec.rangeStart)) |atom_index| { + out.atom = atom_index; + const atom = out.getAtom(macho_file); + out.atom_offset = @intCast(rec.rangeStart - atom.getInputAddress(macho_file)); + } else { + macho_file.base.fatal("{}: {s},{s}: 0x{x}: bad relocation", .{ + self.fmtPath(), header.segName(), header.sectName(), rel.offset, }); - const target_sym_index = self.getSymbolByAddress(target_address, null); - const reloc_target = if (self.getGlobal(target_sym_index)) |global_index| - macho_file.globals.items[global_index] - else - SymbolWithLoc{ .sym_index = target_sym_index, .file = object_id + 1 }; - break :blk reloc_target; + return error.ParseFailed; }, + }, + 16 => switch (rel.tag) { // personality function + .@"extern" => { + out.personality = rel.target; + }, + .local => if (sym_lookup.find(rec.personalityFunction)) |sym_index| { + out.personality = sym_index; + } else { + macho_file.base.fatal("{}: {s},{s}: 0x{x}: bad relocation", .{ + self.fmtPath(), header.segName(), header.sectName(), rel.offset, + }); + return error.ParseFailed; + }, + }, + 24 => switch (rel.tag) { // lsda + .@"extern" => { + out.lsda = self.symtab.items(.atom)[rel.meta.symbolnum]; + out.lsda_offset = @intCast(rec.lsda); + }, + .local => if (self.findAtom(rec.lsda)) |atom_index| { + out.lsda = atom_index; + const atom = out.getLsdaAtom(macho_file).?; + out.lsda_offset = @intCast(rec.lsda - atom.getInputAddress(macho_file)); + } else { + macho_file.base.fatal("{}: {s},{s}: 0x{x}: bad relocation", .{ + self.fmtPath(), header.segName(), header.sectName(), rel.offset, + }); + return error.ParseFailed; + }, + }, + else => {}, + } + } + } + + if (!macho_file.options.relocatable) try self.synthesiseNullUnwindRecords(macho_file); + + const sortFn = struct { + fn sortFn(ctx: *MachO, lhs_index: UnwindInfo.Record.Index, rhs_index: UnwindInfo.Record.Index) bool { + const lhs = ctx.getUnwindRecord(lhs_index); + const rhs = ctx.getUnwindRecord(rhs_index); + const lhsa = lhs.getAtom(ctx); + const rhsa = rhs.getAtom(ctx); + return lhsa.getInputAddress(ctx) + lhs.atom_offset < rhsa.getInputAddress(ctx) + rhs.atom_offset; + } + }.sortFn; + mem.sort(UnwindInfo.Record.Index, self.unwind_records.items, macho_file, sortFn); + + // Associate unwind records to atoms + var next_cu: u32 = 0; + while (next_cu < self.unwind_records.items.len) { + const start = next_cu; + const rec_index = self.unwind_records.items[start]; + const rec = macho_file.getUnwindRecord(rec_index); + while (next_cu < self.unwind_records.items.len and + macho_file.getUnwindRecord(self.unwind_records.items[next_cu]).atom == rec.atom) : (next_cu += 1) + {} + + const atom = rec.getAtom(macho_file); + atom.unwind_records = .{ .pos = start, .len = next_cu - start }; + } +} + +fn synthesiseNullUnwindRecords(self: *Object, macho_file: *MachO) !void { + // Synthesise missing unwind records. + // The logic here is as follows: + // 1. if an atom has unwind info record that is not DWARF, FDE is marked dead + // 2. if an atom has unwind info record that is DWARF, FDE is tied to this unwind record + // 3. if an atom doesn't have unwind info record but FDE is available, synthesise and tie + // 4. if an atom doesn't have either, synthesise a null unwind info record + + const Superposition = struct { atom: Atom.Index, size: u64, cu: ?UnwindInfo.Record.Index = null, fde: ?Fde.Index = null }; + + const gpa = macho_file.base.allocator; + var superposition = std.AutoArrayHashMap(u64, Superposition).init(gpa); + defer superposition.deinit(); + + const slice = self.symtab.slice(); + for (slice.items(.nlist), slice.items(.atom), slice.items(.size)) |nlist, atom, size| { + if (nlist.stab()) continue; + if (!nlist.sect()) continue; + const sect = self.sections.items(.header)[nlist.n_sect - 1]; + if (sect.isCode()) { + try superposition.ensureUnusedCapacity(1); + const gop = superposition.getOrPutAssumeCapacity(nlist.n_value); + if (gop.found_existing) { + assert(gop.value_ptr.atom == atom and gop.value_ptr.size == size); + } + gop.value_ptr.* = .{ .atom = atom, .size = size }; + } + } + + for (self.unwind_records.items) |rec_index| { + const rec = macho_file.getUnwindRecord(rec_index); + const atom = rec.getAtom(macho_file); + const addr = atom.getInputAddress(macho_file) + rec.atom_offset; + superposition.getPtr(addr).?.cu = rec_index; + } + + for (self.fdes.items, 0..) |fde, fde_index| { + const atom = fde.getAtom(macho_file); + const addr = atom.getInputAddress(macho_file) + fde.atom_offset; + superposition.getPtr(addr).?.fde = @intCast(fde_index); + } + + for (superposition.keys(), superposition.values()) |addr, meta| { + if (meta.fde) |fde_index| { + const fde = &self.fdes.items[fde_index]; + + if (meta.cu) |rec_index| { + const rec = macho_file.getUnwindRecord(rec_index); + if (!rec.enc.isDwarf(macho_file)) { + // Mark FDE dead + fde.alive = false; + } else { + // Tie FDE to unwind record + rec.fde = fde_index; + } + } else { + // Synthesise new unwind info record + const fde_data = fde.getData(macho_file); + const atom_size = mem.readInt(u64, fde_data[16..][0..8], .little); + const rec_index = try macho_file.addUnwindRecord(); + const rec = macho_file.getUnwindRecord(rec_index); + try self.unwind_records.append(gpa, rec_index); + rec.length = @intCast(atom_size); + rec.atom = fde.atom; + rec.atom_offset = fde.atom_offset; + rec.fde = fde_index; + rec.file = fde.file; + switch (macho_file.options.cpu_arch.?) { + .x86_64 => rec.enc.setMode(macho.UNWIND_X86_64_MODE.DWARF), + .aarch64 => rec.enc.setMode(macho.UNWIND_ARM64_MODE.DWARF), else => unreachable, } - }; - if (reloc_target.getFile() != object_id) { - log.debug("FDE at offset {x} marked DEAD", .{offset}); - self.eh_frame_relocs_lookup.getPtr(offset).?.dead = true; - } else { - // You would think that we are done but turns out that the compilers may use - // whichever symbol alias they want for a target symbol. This in particular - // very problematic when using Zig's @export feature to re-export symbols under - // additional names. For that reason, we need to ensure we record aliases here - // too so that we can tie them with their matching unwind records and vice versa. - const aliases = self.getSymbolAliases(reloc_target.sym_index); - var i: u32 = 0; - while (i < aliases.len) : (i += 1) { - const actual_target = SymbolWithLoc{ - .sym_index = i + aliases.start, - .file = reloc_target.file, - }; - log.debug("FDE at offset {x} tracks {s}", .{ - offset, - macho_file.getSymbolName(actual_target), - }); - try self.eh_frame_records_lookup.putNoClobber(gpa, actual_target, offset); - } } + } else if (meta.cu == null and meta.fde == null) { + // Create a null record + const rec_index = try macho_file.addUnwindRecord(); + const rec = macho_file.getUnwindRecord(rec_index); + const atom = macho_file.getAtom(meta.atom).?; + try self.unwind_records.append(gpa, rec_index); + rec.length = @intCast(meta.size); + rec.atom = meta.atom; + rec.atom_offset = @intCast(addr - atom.getInputAddress(macho_file)); + rec.file = self.index; } } } -fn parseUnwindInfo(self: *Object, macho_file: *MachO, object_id: u32) !void { - const comp = macho_file.base.comp; - const gpa = comp.gpa; - const target = macho_file.base.comp.root_mod.resolved_target.result; - const cpu_arch = target.cpu.arch; - const sect_id = self.unwind_info_sect_id orelse { - // If it so happens that the object had `__eh_frame` section defined but no `__compact_unwind`, - // we will try fully synthesising unwind info records to somewhat match Apple ld's - // approach. However, we will only synthesise DWARF records and nothing more. For this reason, - // we still create the output `__TEXT,__unwind_info` section. - if (self.hasEhFrameRecords()) { - if (macho_file.unwind_info_section_index == null) { - macho_file.unwind_info_section_index = try macho_file.initSection( - "__TEXT", - "__unwind_info", - .{}, - ); - } - } - return; - }; - - log.debug("parsing unwind info in {s}", .{self.name}); - - if (macho_file.unwind_info_section_index == null) { - macho_file.unwind_info_section_index = try macho_file.initSection("__TEXT", "__unwind_info", .{}); - } - - const unwind_records = self.getUnwindRecords(); - - try self.unwind_records_lookup.ensureUnusedCapacity(gpa, @as(u32, @intCast(unwind_records.len))); - - const needs_eh_frame = for (unwind_records) |record| { - if (UnwindInfo.UnwindEncoding.isDwarf(record.compactUnwindEncoding, cpu_arch)) break true; - } else false; - - if (needs_eh_frame and !self.hasEhFrameRecords()) return error.MissingEhFrameSection; - - try self.parseRelocs(gpa, sect_id); - const relocs = self.getRelocs(sect_id); - - for (unwind_records, 0..) |record, record_id| { - const offset = record_id * @sizeOf(macho.compact_unwind_entry); - const rel_pos = filterRelocs( - relocs, - offset, - offset + @sizeOf(macho.compact_unwind_entry), - ); - assert(rel_pos.len > 0); // TODO convert to an error as the unwind info is malformed - self.unwind_relocs_lookup[record_id] = .{ - .dead = false, - .reloc = rel_pos, - }; - - // Find function symbol that this record describes - const rel = relocs[rel_pos.start..][rel_pos.len - 1]; - const reloc_target = Atom.parseRelocTarget(macho_file, .{ - .object_id = object_id, - .rel = rel, - .code = mem.asBytes(&record), - .base_offset = @as(i32, @intCast(offset)), - }); - if (reloc_target.getFile() != object_id) { - log.debug("unwind record {d} marked DEAD", .{record_id}); - self.unwind_relocs_lookup[record_id].dead = true; - } else { - // You would think that we are done but turns out that the compilers may use - // whichever symbol alias they want for a target symbol. This in particular - // very problematic when using Zig's @export feature to re-export symbols under - // additional names. For that reason, we need to ensure we record aliases here - // too so that we can tie them with their matching unwind records and vice versa. - const aliases = self.getSymbolAliases(reloc_target.sym_index); - var i: u32 = 0; - while (i < aliases.len) : (i += 1) { - const actual_target = SymbolWithLoc{ - .sym_index = i + aliases.start, - .file = reloc_target.file, - }; - log.debug("unwind record {d} tracks {s}", .{ - record_id, - macho_file.getSymbolName(actual_target), - }); - try self.unwind_records_lookup.putNoClobber(gpa, actual_target, @intCast(record_id)); - } - } - } -} - -pub fn getSourceSymbol(self: Object, index: u32) ?macho.nlist_64 { - const symtab = self.in_symtab.?; - if (index >= symtab.len) return null; - const mapped_index = self.source_symtab_lookup[index]; - return symtab[mapped_index]; -} - -pub fn getSourceSection(self: Object, index: u8) macho.section_64 { - const sections = self.getSourceSections(); - assert(index < sections.len); - return sections[index]; -} - -pub fn getSourceSectionByName(self: Object, segname: []const u8, sectname: []const u8) ?macho.section_64 { - const index = self.getSourceSectionIndexByName(segname, sectname) orelse return null; - const sections = self.getSourceSections(); - return sections[index]; -} - -pub fn getSourceSectionIndexByName(self: Object, segname: []const u8, sectname: []const u8) ?u8 { - const sections = self.getSourceSections(); - for (sections, 0..) |sect, i| { - if (mem.eql(u8, segname, sect.segName()) and mem.eql(u8, sectname, sect.sectName())) - return @as(u8, @intCast(i)); - } else return null; -} - -pub fn getSourceSections(self: Object) []align(1) const macho.section_64 { +fn initPlatform(self: *Object) void { var it = LoadCommandIterator{ - .ncmds = self.header.ncmds, - .buffer = self.contents[@sizeOf(macho.mach_header_64)..][0..self.header.sizeofcmds], + .ncmds = self.header.?.ncmds, + .buffer = self.data[@sizeOf(macho.mach_header_64)..][0..self.header.?.sizeofcmds], }; - while (it.next()) |cmd| switch (cmd.cmd()) { - .SEGMENT_64 => { - return cmd.getSections(); - }, - else => {}, - } else unreachable; -} - -pub fn parseDataInCode(self: *Object, gpa: Allocator) !void { - var it = LoadCommandIterator{ - .ncmds = self.header.ncmds, - .buffer = self.contents[@sizeOf(macho.mach_header_64)..][0..self.header.sizeofcmds], - }; - const cmd = while (it.next()) |cmd| { - switch (cmd.cmd()) { - .DATA_IN_CODE => break cmd.cast(macho.linkedit_data_command).?, - else => {}, - } - } else return; - const ndice = @divExact(cmd.datasize, @sizeOf(macho.data_in_code_entry)); - const dice = @as([*]align(1) const macho.data_in_code_entry, @ptrCast(self.contents.ptr + cmd.dataoff))[0..ndice]; - try self.data_in_code.ensureTotalCapacityPrecise(gpa, dice.len); - self.data_in_code.appendUnalignedSliceAssumeCapacity(dice); - mem.sort(macho.data_in_code_entry, self.data_in_code.items, {}, diceLessThan); -} - -fn diceLessThan(ctx: void, lhs: macho.data_in_code_entry, rhs: macho.data_in_code_entry) bool { - _ = ctx; - return lhs.offset < rhs.offset; -} - -fn getDysymtab(self: Object) ?macho.dysymtab_command { - var it = LoadCommandIterator{ - .ncmds = self.header.ncmds, - .buffer = self.contents[@sizeOf(macho.mach_header_64)..][0..self.header.sizeofcmds], - }; - while (it.next()) |cmd| { - switch (cmd.cmd()) { - .DYSYMTAB => return cmd.cast(macho.dysymtab_command).?, - else => {}, - } - } else return null; -} - -pub fn parseDwarfInfo(self: Object) DwarfInfo { - var di = DwarfInfo{ - .debug_info = &[0]u8{}, - .debug_abbrev = &[0]u8{}, - .debug_str = &[0]u8{}, - }; - for (self.getSourceSections()) |sect| { - if (!sect.isDebug()) continue; - const sectname = sect.sectName(); - if (mem.eql(u8, sectname, "__debug_info")) { - di.debug_info = self.getSectionContents(sect); - } else if (mem.eql(u8, sectname, "__debug_abbrev")) { - di.debug_abbrev = self.getSectionContents(sect); - } else if (mem.eql(u8, sectname, "__debug_str")) { - di.debug_str = self.getSectionContents(sect); - } - } - return di; -} - -/// Returns Platform composed from the first encountered build version type load command: -/// either LC_BUILD_VERSION or LC_VERSION_MIN_*. -pub fn getPlatform(self: Object) ?Platform { - var it = LoadCommandIterator{ - .ncmds = self.header.ncmds, - .buffer = self.contents[@sizeOf(macho.mach_header_64)..][0..self.header.sizeofcmds], - }; - while (it.next()) |cmd| { + self.platform = while (it.next()) |cmd| { switch (cmd.cmd()) { .BUILD_VERSION, .VERSION_MIN_MACOSX, .VERSION_MIN_IPHONEOS, .VERSION_MIN_TVOS, .VERSION_MIN_WATCHOS, - => return Platform.fromLoadCommand(cmd), + => break MachO.Options.Platform.fromLoadCommand(cmd), else => {}, } + } else null; +} + +/// Currently, we only check if a compile unit for this input object file exists +/// and record that so that we can emit symbol stabs. +/// TODO in the future, we want parse debug info and debug line sections so that +/// we can provide nice error locations to the user. +fn initDwarfInfo(self: *Object, macho_file: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + + const gpa = macho_file.base.allocator; + + var debug_info_index: ?usize = null; + var debug_abbrev_index: ?usize = null; + var debug_str_index: ?usize = null; + + for (self.sections.items(.header), 0..) |sect, index| { + if (sect.attrs() & macho.S_ATTR_DEBUG == 0) continue; + if (mem.eql(u8, sect.sectName(), "__debug_info")) debug_info_index = index; + if (mem.eql(u8, sect.sectName(), "__debug_abbrev")) debug_abbrev_index = index; + if (mem.eql(u8, sect.sectName(), "__debug_str")) debug_str_index = index; + } + + if (debug_info_index == null or debug_abbrev_index == null) return; + + var dwarf_info = DwarfInfo{ + .debug_info = self.getSectionData(@intCast(debug_info_index.?)), + .debug_abbrev = self.getSectionData(@intCast(debug_abbrev_index.?)), + .debug_str = if (debug_str_index) |index| self.getSectionData(@intCast(index)) else "", + }; + dwarf_info.init(gpa) catch { + macho_file.base.fatal("{}: invalid __DWARF info found", .{self.fmtPath()}); + return error.ParseFailed; + }; + self.dwarf_info = dwarf_info; +} + +pub fn resolveSymbols(self: *Object, macho_file: *MachO) void { + const tracy = trace(@src()); + defer tracy.end(); + + for (self.symbols.items, 0..) |index, i| { + const nlist_idx = @as(Symbol.Index, @intCast(i)); + const nlist = self.symtab.items(.nlist)[nlist_idx]; + const atom_index = self.symtab.items(.atom)[nlist_idx]; + + if (!nlist.ext()) continue; + if (nlist.undf() and !nlist.tentative()) continue; + if (nlist.sect()) { + const atom = macho_file.getAtom(atom_index).?; + if (!atom.flags.alive) continue; + } + + const symbol = macho_file.getSymbol(index); + if (self.asFile().getSymbolRank(.{ + .archive = !self.alive, + .weak = nlist.weakDef(), + .tentative = nlist.tentative(), + }) < symbol.getSymbolRank(macho_file)) { + const value = if (nlist.sect()) blk: { + const atom = macho_file.getAtom(atom_index).?; + break :blk nlist.n_value - atom.getInputAddress(macho_file); + } else nlist.n_value; + symbol.value = value; + symbol.atom = atom_index; + symbol.nlist_idx = nlist_idx; + symbol.file = self.index; + symbol.flags.weak = nlist.weakDef(); + symbol.flags.abs = nlist.abs(); + symbol.flags.tentative = nlist.tentative(); + symbol.flags.weak_ref = false; + symbol.flags.dyn_ref = nlist.n_desc & macho.REFERENCED_DYNAMICALLY != 0; + symbol.flags.no_dead_strip = symbol.flags.no_dead_strip or nlist.noDeadStrip(); + symbol.flags.interposable = macho_file.options.dylib and macho_file.options.namespace == .flat and !nlist.pext(); + + if (nlist.sect() and + self.sections.items(.header)[nlist.n_sect - 1].type() == macho.S_THREAD_LOCAL_VARIABLES) + { + symbol.flags.tlv = true; + } + } + + // Regardless of who the winner is, we still merge symbol visibility here. + if (nlist.pext() or (nlist.weakDef() and nlist.weakRef()) or self.hidden) { + if (symbol.visibility != .global) { + symbol.visibility = .hidden; + } + } else { + symbol.visibility = .global; + } + } +} + +pub fn resetGlobals(self: *Object, macho_file: *MachO) void { + for (self.symbols.items, 0..) |sym_index, nlist_idx| { + if (!self.symtab.items(.nlist)[nlist_idx].ext()) continue; + const sym = macho_file.getSymbol(sym_index); + const name = sym.name; + sym.* = .{}; + sym.name = name; + } +} + +pub fn markLive(self: *Object, macho_file: *MachO) void { + const tracy = trace(@src()); + defer tracy.end(); + + for (self.symbols.items, 0..) |index, nlist_idx| { + const nlist = self.symtab.items(.nlist)[nlist_idx]; + if (!nlist.ext()) continue; + + const sym = macho_file.getSymbol(index); + const file = sym.getFile(macho_file) orelse continue; + const should_keep = nlist.undf() or (nlist.tentative() and !sym.flags.tentative); + if (should_keep and file == .object and !file.object.alive) { + file.object.alive = true; + file.object.markLive(macho_file); + } + } +} + +pub fn scanRelocs(self: Object, macho_file: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + + for (self.atoms.items) |atom_index| { + const atom = macho_file.getAtom(atom_index).?; + if (!atom.flags.alive) continue; + const sect = atom.getInputSection(macho_file); + if (sect.isZerofill()) continue; + try atom.scanRelocs(macho_file); + } + + for (self.unwind_records.items) |rec_index| { + const rec = macho_file.getUnwindRecord(rec_index); + if (!rec.alive) continue; + if (rec.getFde(macho_file)) |fde| { + if (fde.getCie(macho_file).getPersonality(macho_file)) |sym| { + sym.flags.got = true; + } + } else if (rec.getPersonality(macho_file)) |sym| { + sym.flags.got = true; + } + } +} + +pub fn convertTentativeDefinitions(self: *Object, macho_file: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + const gpa = macho_file.base.allocator; + + for (self.symbols.items, 0..) |index, i| { + const sym = macho_file.getSymbol(index); + if (!sym.flags.tentative) continue; + const sym_file = sym.getFile(macho_file).?; + if (sym_file.getIndex() != self.index) continue; + + const nlist_idx = @as(Symbol.Index, @intCast(i)); + const nlist = &self.symtab.items(.nlist)[nlist_idx]; + const nlist_atom = &self.symtab.items(.atom)[nlist_idx]; + + const atom_index = try macho_file.addAtom(); + try self.atoms.append(gpa, atom_index); + + const name = try std.fmt.allocPrintZ(gpa, "__DATA$__common${s}", .{sym.getName(macho_file)}); + defer gpa.free(name); + const atom = macho_file.getAtom(atom_index).?; + atom.atom_index = atom_index; + atom.name = try macho_file.string_intern.insert(gpa, name); + atom.file = self.index; + atom.size = nlist.n_value; + atom.alignment = (nlist.n_desc >> 8) & 0x0f; + + const n_sect = try self.addSection(gpa, "__DATA", "__common"); + const sect = &self.sections.items(.header)[n_sect]; + sect.flags = macho.S_ZEROFILL; + sect.size = atom.size; + sect.@"align" = atom.alignment; + atom.n_sect = n_sect; + + sym.value = 0; + sym.atom = atom_index; + sym.flags.weak = false; + sym.flags.weak_ref = false; + sym.flags.tentative = false; + sym.visibility = .global; + + nlist.n_value = 0; + nlist.n_type = macho.N_EXT | macho.N_SECT; + nlist.n_sect = 0; + nlist.n_desc = 0; + nlist_atom.* = atom_index; + } +} + +fn addSection(self: *Object, allocator: Allocator, segname: []const u8, sectname: []const u8) !u32 { + const n_sect = @as(u32, @intCast(try self.sections.addOne(allocator))); + self.sections.set(n_sect, .{ + .header = .{ + .sectname = MachO.makeStaticString(sectname), + .segname = MachO.makeStaticString(segname), + }, + }); + return n_sect; +} + +pub fn calcSymtabSize(self: *Object, macho_file: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + + for (self.symbols.items) |sym_index| { + const sym = macho_file.getSymbol(sym_index); + const file = sym.getFile(macho_file) orelse continue; + if (file.getIndex() != self.index) continue; + if (sym.getAtom(macho_file)) |atom| if (!atom.flags.alive) continue; + if (sym.isSymbolStab(macho_file)) continue; + const name = sym.getName(macho_file); + // TODO in -r mode, we actually want to merge symbol names and emit only one + // work it out when emitting relocs + if (name.len > 0 and (name[0] == 'L' or name[0] == 'l') and !macho_file.options.relocatable) continue; + sym.flags.output_symtab = true; + if (sym.isLocal()) { + try sym.addExtra(.{ .symtab = self.output_symtab_ctx.nlocals }, macho_file); + self.output_symtab_ctx.nlocals += 1; + } else if (sym.flags.@"export") { + try sym.addExtra(.{ .symtab = self.output_symtab_ctx.nexports }, macho_file); + self.output_symtab_ctx.nexports += 1; + } else { + assert(sym.flags.import); + try sym.addExtra(.{ .symtab = self.output_symtab_ctx.nimports }, macho_file); + self.output_symtab_ctx.nimports += 1; + } + self.output_symtab_ctx.strsize += @as(u32, @intCast(sym.getName(macho_file).len + 1)); + } + + if (!macho_file.options.strip and self.hasDebugInfo()) self.calcStabsSize(macho_file); +} + +pub fn calcStabsSize(self: *Object, macho_file: *MachO) void { + if (self.dwarf_info) |dw| { + // TODO handle multiple CUs + const cu = dw.compile_units.items[0]; + const comp_dir = cu.getCompileDir(dw) orelse return; + const tu_name = cu.getSourceFile(dw) orelse return; + + self.output_symtab_ctx.nstabs += 4; // N_SO, N_SO, N_OSO, N_SO + self.output_symtab_ctx.strsize += @as(u32, @intCast(comp_dir.len + 1)); // comp_dir + self.output_symtab_ctx.strsize += @as(u32, @intCast(tu_name.len + 1)); // tu_name + + if (self.archive) |path| { + self.output_symtab_ctx.strsize += @as(u32, @intCast(path.len + 1 + self.path.len + 1 + 1)); + } else { + self.output_symtab_ctx.strsize += @as(u32, @intCast(self.path.len + 1)); + } + + for (self.symbols.items) |sym_index| { + const sym = macho_file.getSymbol(sym_index); + const file = sym.getFile(macho_file) orelse continue; + if (file.getIndex() != self.index) continue; + if (!sym.flags.output_symtab) continue; + if (macho_file.options.relocatable) { + const name = sym.getName(macho_file); + if (name.len > 0 and (name[0] == 'L' or name[0] == 'l')) continue; + } + const sect = macho_file.sections.items(.header)[sym.out_n_sect]; + if (sect.isCode()) { + self.output_symtab_ctx.nstabs += 4; // N_BNSYM, N_FUN, N_FUN, N_ENSYM + } else if (sym.visibility == .global) { + self.output_symtab_ctx.nstabs += 1; // N_GSYM + } else { + self.output_symtab_ctx.nstabs += 1; // N_STSYM + } + } + } else { + assert(self.hasSymbolStabs()); + + for (self.stab_files.items) |sf| { + self.output_symtab_ctx.nstabs += 4; // N_SO, N_SO, N_OSO, N_SO + self.output_symtab_ctx.strsize += @as(u32, @intCast(sf.getCompDir(self).len + 1)); // comp_dir + self.output_symtab_ctx.strsize += @as(u32, @intCast(sf.getTuName(self).len + 1)); // tu_name + self.output_symtab_ctx.strsize += @as(u32, @intCast(sf.getOsoPath(self).len + 1)); // path + + for (sf.stabs.items) |stab| { + const sym = stab.getSymbol(macho_file) orelse continue; + const file = sym.getFile(macho_file).?; + if (file.getIndex() != self.index) continue; + if (!sym.flags.output_symtab) continue; + const nstabs: u32 = switch (stab.tag) { + .func => 4, // N_BNSYM, N_FUN, N_FUN, N_ENSYM + .global => 1, // N_GSYM + .static => 1, // N_STSYM + }; + self.output_symtab_ctx.nstabs += nstabs; + } + } + } +} + +pub fn writeSymtab(self: Object, macho_file: *MachO) void { + const tracy = trace(@src()); + defer tracy.end(); + + for (self.symbols.items) |sym_index| { + const sym = macho_file.getSymbol(sym_index); + const file = sym.getFile(macho_file) orelse continue; + if (file.getIndex() != self.index) continue; + const idx = sym.getOutputSymtabIndex(macho_file) orelse continue; + const n_strx = @as(u32, @intCast(macho_file.strtab.items.len)); + macho_file.strtab.appendSliceAssumeCapacity(sym.getName(macho_file)); + macho_file.strtab.appendAssumeCapacity(0); + const out_sym = &macho_file.symtab.items[idx]; + out_sym.n_strx = n_strx; + sym.setOutputSym(macho_file, out_sym); + } + + if (!macho_file.options.strip and self.hasDebugInfo()) self.writeStabs(macho_file); +} + +pub fn writeStabs(self: *const Object, macho_file: *MachO) void { + const writeFuncStab = struct { + inline fn writeFuncStab( + n_strx: u32, + n_sect: u8, + n_value: u64, + size: u64, + index: u32, + ctx: *MachO, + ) void { + ctx.symtab.items[index] = .{ + .n_strx = 0, + .n_type = macho.N_BNSYM, + .n_sect = n_sect, + .n_desc = 0, + .n_value = n_value, + }; + ctx.symtab.items[index + 1] = .{ + .n_strx = n_strx, + .n_type = macho.N_FUN, + .n_sect = n_sect, + .n_desc = 0, + .n_value = n_value, + }; + ctx.symtab.items[index + 2] = .{ + .n_strx = 0, + .n_type = macho.N_FUN, + .n_sect = 0, + .n_desc = 0, + .n_value = size, + }; + ctx.symtab.items[index + 3] = .{ + .n_strx = 0, + .n_type = macho.N_ENSYM, + .n_sect = n_sect, + .n_desc = 0, + .n_value = size, + }; + } + }.writeFuncStab; + + var index = self.output_symtab_ctx.istab; + + if (self.dwarf_info) |dw| { + // TODO handle multiple CUs + const cu = dw.compile_units.items[0]; + const comp_dir = cu.getCompileDir(dw) orelse return; + const tu_name = cu.getSourceFile(dw) orelse return; + + // Open scope + // N_SO comp_dir + var n_strx = @as(u32, @intCast(macho_file.strtab.items.len)); + macho_file.strtab.appendSliceAssumeCapacity(comp_dir); + macho_file.strtab.appendAssumeCapacity(0); + macho_file.symtab.items[index] = .{ + .n_strx = n_strx, + .n_type = macho.N_SO, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }; + index += 1; + // N_SO tu_name + n_strx = @as(u32, @intCast(macho_file.strtab.items.len)); + macho_file.strtab.appendSliceAssumeCapacity(tu_name); + macho_file.strtab.appendAssumeCapacity(0); + macho_file.symtab.items[index] = .{ + .n_strx = n_strx, + .n_type = macho.N_SO, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }; + index += 1; + // N_OSO path + n_strx = @as(u32, @intCast(macho_file.strtab.items.len)); + if (self.archive) |path| { + macho_file.strtab.appendSliceAssumeCapacity(path); + macho_file.strtab.appendAssumeCapacity('('); + macho_file.strtab.appendSliceAssumeCapacity(self.path); + macho_file.strtab.appendAssumeCapacity(')'); + macho_file.strtab.appendAssumeCapacity(0); + } else { + macho_file.strtab.appendSliceAssumeCapacity(self.path); + macho_file.strtab.appendAssumeCapacity(0); + } + macho_file.symtab.items[index] = .{ + .n_strx = n_strx, + .n_type = macho.N_OSO, + .n_sect = 0, + .n_desc = 1, + .n_value = self.mtime, + }; + index += 1; + + for (self.symbols.items) |sym_index| { + const sym = macho_file.getSymbol(sym_index); + const file = sym.getFile(macho_file) orelse continue; + if (file.getIndex() != self.index) continue; + if (!sym.flags.output_symtab) continue; + if (macho_file.options.relocatable) { + const name = sym.getName(macho_file); + if (name.len > 0 and (name[0] == 'L' or name[0] == 'l')) continue; + } + const sect = macho_file.sections.items(.header)[sym.out_n_sect]; + const sym_n_strx = n_strx: { + const symtab_index = sym.getOutputSymtabIndex(macho_file).?; + const osym = macho_file.symtab.items[symtab_index]; + break :n_strx osym.n_strx; + }; + const sym_n_sect: u8 = if (!sym.flags.abs) @intCast(sym.out_n_sect + 1) else 0; + const sym_n_value = sym.getAddress(.{}, macho_file); + const sym_size = sym.getSize(macho_file); + if (sect.isCode()) { + writeFuncStab(sym_n_strx, sym_n_sect, sym_n_value, sym_size, index, macho_file); + index += 4; + } else if (sym.visibility == .global) { + macho_file.symtab.items[index] = .{ + .n_strx = sym_n_strx, + .n_type = macho.N_GSYM, + .n_sect = sym_n_sect, + .n_desc = 0, + .n_value = 0, + }; + index += 1; + } else { + macho_file.symtab.items[index] = .{ + .n_strx = sym_n_strx, + .n_type = macho.N_STSYM, + .n_sect = sym_n_sect, + .n_desc = 0, + .n_value = sym_n_value, + }; + index += 1; + } + } + + // Close scope + // N_SO + macho_file.symtab.items[index] = .{ + .n_strx = 0, + .n_type = macho.N_SO, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }; + } else { + assert(self.hasSymbolStabs()); + + for (self.stab_files.items) |sf| { + // Open scope + // N_SO comp_dir + var n_strx = @as(u32, @intCast(macho_file.strtab.items.len)); + macho_file.strtab.appendSliceAssumeCapacity(sf.getCompDir(self)); + macho_file.strtab.appendAssumeCapacity(0); + macho_file.symtab.items[index] = .{ + .n_strx = n_strx, + .n_type = macho.N_SO, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }; + index += 1; + // N_SO tu_name + n_strx = @as(u32, @intCast(macho_file.strtab.items.len)); + macho_file.strtab.appendSliceAssumeCapacity(sf.getTuName(self)); + macho_file.strtab.appendAssumeCapacity(0); + macho_file.symtab.items[index] = .{ + .n_strx = n_strx, + .n_type = macho.N_SO, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }; + index += 1; + // N_OSO path + n_strx = @as(u32, @intCast(macho_file.strtab.items.len)); + macho_file.strtab.appendSliceAssumeCapacity(sf.getOsoPath(self)); + macho_file.strtab.appendAssumeCapacity(0); + macho_file.symtab.items[index] = .{ + .n_strx = n_strx, + .n_type = macho.N_OSO, + .n_sect = 0, + .n_desc = 1, + .n_value = sf.getOsoModTime(self), + }; + index += 1; + + for (sf.stabs.items) |stab| { + const sym = stab.getSymbol(macho_file) orelse continue; + const file = sym.getFile(macho_file).?; + if (file.getIndex() != self.index) continue; + if (!sym.flags.output_symtab) continue; + const sym_n_strx = n_strx: { + const symtab_index = sym.getOutputSymtabIndex(macho_file).?; + const osym = macho_file.symtab.items[symtab_index]; + break :n_strx osym.n_strx; + }; + const sym_n_sect: u8 = if (!sym.flags.abs) @intCast(sym.out_n_sect + 1) else 0; + const sym_n_value = sym.getAddress(.{}, macho_file); + const sym_size = sym.getSize(macho_file); + switch (stab.tag) { + .func => { + writeFuncStab(sym_n_strx, sym_n_sect, sym_n_value, sym_size, index, macho_file); + index += 4; + }, + .global => { + macho_file.symtab.items[index] = .{ + .n_strx = sym_n_strx, + .n_type = macho.N_GSYM, + .n_sect = sym_n_sect, + .n_desc = 0, + .n_value = 0, + }; + index += 1; + }, + .static => { + macho_file.symtab.items[index] = .{ + .n_strx = sym_n_strx, + .n_type = macho.N_STSYM, + .n_sect = sym_n_sect, + .n_desc = 0, + .n_value = sym_n_value, + }; + index += 1; + }, + } + } + + // Close scope + // N_SO + macho_file.symtab.items[index] = .{ + .n_strx = 0, + .n_type = macho.N_SO, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }; + index += 1; + } + } +} + +fn getLoadCommand(self: Object, lc: macho.LC) ?LoadCommandIterator.LoadCommand { + var it = LoadCommandIterator{ + .ncmds = self.header.?.ncmds, + .buffer = self.data[@sizeOf(macho.mach_header_64)..][0..self.header.?.sizeofcmds], + }; + while (it.next()) |cmd| { + if (cmd.cmd() == lc) return cmd; } else return null; } -pub fn getSectionContents(self: Object, sect: macho.section_64) []const u8 { - const size = @as(usize, @intCast(sect.size)); - return self.contents[sect.offset..][0..size]; +pub fn getSectionData(self: *const Object, index: u32) []const u8 { + const slice = self.sections.slice(); + assert(index < slice.items(.header).len); + const sect = slice.items(.header)[index]; + return self.data[sect.offset..][0..sect.size]; } -pub fn getSectionAliasSymbolIndex(self: Object, sect_id: u8) u32 { - const start = @as(u32, @intCast(self.in_symtab.?.len)); - return start + sect_id; +fn getString(self: Object, off: u32) [:0]const u8 { + assert(off < self.strtab.len); + return mem.sliceTo(@as([*:0]const u8, @ptrCast(self.strtab.ptr + off)), 0); } -pub fn getSectionAliasSymbol(self: *Object, sect_id: u8) macho.nlist_64 { - return self.symtab[self.getSectionAliasSymbolIndex(sect_id)]; +/// TODO handle multiple CUs +pub fn hasDebugInfo(self: Object) bool { + if (self.dwarf_info) |dw| { + return dw.compile_units.items.len > 0; + } + return self.hasSymbolStabs(); } -pub fn getSectionAliasSymbolPtr(self: *Object, sect_id: u8) *macho.nlist_64 { - return &self.symtab[self.getSectionAliasSymbolIndex(sect_id)]; +fn hasSymbolStabs(self: Object) bool { + return self.stab_files.items.len > 0; } -fn getSourceRelocs(self: Object, sect: macho.section_64) ?[]align(1) const macho.relocation_info { - if (sect.nreloc == 0) return null; - return @as([*]align(1) const macho.relocation_info, @ptrCast(self.contents.ptr + sect.reloff))[0..sect.nreloc]; +pub fn hasObjc(self: Object) bool { + for (self.symtab.items(.nlist)) |nlist| { + const name = self.getString(nlist.n_strx); + if (mem.startsWith(u8, name, "_OBJC_CLASS_$_")) return true; + } + for (self.sections.items(.header)) |sect| { + if (mem.eql(u8, sect.segName(), "__DATA") and mem.eql(u8, sect.sectName(), "__objc_catlist")) return true; + if (mem.eql(u8, sect.segName(), "__TEXT") and mem.eql(u8, sect.sectName(), "__swift")) return true; + } + return false; } -pub fn getRelocs(self: Object, sect_id: u8) []const macho.relocation_info { - const sect = self.getSourceSection(sect_id); - const start = self.section_relocs_lookup.items[sect_id]; - const len = sect.nreloc; - return self.relocations.items[start..][0..len]; +pub fn getDataInCode(self: Object) []align(1) const macho.data_in_code_entry { + const lc = self.getLoadCommand(.DATA_IN_CODE) orelse return &[0]macho.data_in_code_entry{}; + const cmd = lc.cast(macho.linkedit_data_command).?; + const ndice = @divExact(cmd.datasize, @sizeOf(macho.data_in_code_entry)); + const dice = @as( + [*]align(1) const macho.data_in_code_entry, + @ptrCast(self.data.ptr + cmd.dataoff), + )[0..ndice]; + return dice; } -pub fn getSymbolName(self: Object, index: u32) []const u8 { - const strtab = self.in_strtab.?; - const sym = self.symtab[index]; +pub inline fn hasSubsections(self: Object) bool { + return self.header.?.flags & macho.MH_SUBSECTIONS_VIA_SYMBOLS != 0; +} - if (self.getSourceSymbol(index) == null) { - assert(sym.n_strx == 0); - return ""; +pub fn asFile(self: *Object) File { + return .{ .object = self }; +} + +pub fn format( + self: *Object, + comptime unused_fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, +) !void { + _ = self; + _ = unused_fmt_string; + _ = options; + _ = writer; + @compileError("do not format objects directly"); +} + +const FormatContext = struct { + object: *Object, + macho_file: *MachO, +}; + +pub fn fmtAtoms(self: *Object, macho_file: *MachO) std.fmt.Formatter(formatAtoms) { + return .{ .data = .{ + .object = self, + .macho_file = macho_file, + } }; +} + +fn formatAtoms( + ctx: FormatContext, + comptime unused_fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, +) !void { + _ = unused_fmt_string; + _ = options; + const object = ctx.object; + try writer.writeAll(" atoms\n"); + for (object.atoms.items) |atom_index| { + const atom = ctx.macho_file.getAtom(atom_index).?; + try writer.print(" {}\n", .{atom.fmt(ctx.macho_file)}); + } +} + +pub fn fmtCies(self: *Object, macho_file: *MachO) std.fmt.Formatter(formatCies) { + return .{ .data = .{ + .object = self, + .macho_file = macho_file, + } }; +} + +fn formatCies( + ctx: FormatContext, + comptime unused_fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, +) !void { + _ = unused_fmt_string; + _ = options; + const object = ctx.object; + try writer.writeAll(" cies\n"); + for (object.cies.items, 0..) |cie, i| { + try writer.print(" cie({d}) : {}\n", .{ i, cie.fmt(ctx.macho_file) }); + } +} + +pub fn fmtFdes(self: *Object, macho_file: *MachO) std.fmt.Formatter(formatFdes) { + return .{ .data = .{ + .object = self, + .macho_file = macho_file, + } }; +} + +fn formatFdes( + ctx: FormatContext, + comptime unused_fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, +) !void { + _ = unused_fmt_string; + _ = options; + const object = ctx.object; + try writer.writeAll(" fdes\n"); + for (object.fdes.items, 0..) |fde, i| { + try writer.print(" fde({d}) : {}\n", .{ i, fde.fmt(ctx.macho_file) }); + } +} + +pub fn fmtUnwindRecords(self: *Object, macho_file: *MachO) std.fmt.Formatter(formatUnwindRecords) { + return .{ .data = .{ + .object = self, + .macho_file = macho_file, + } }; +} + +fn formatUnwindRecords( + ctx: FormatContext, + comptime unused_fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, +) !void { + _ = unused_fmt_string; + _ = options; + const object = ctx.object; + const macho_file = ctx.macho_file; + try writer.writeAll(" unwind records\n"); + for (object.unwind_records.items) |rec| { + try writer.print(" rec({d}) : {}\n", .{ rec, macho_file.getUnwindRecord(rec).fmt(macho_file) }); + } +} + +pub fn fmtSymtab(self: *Object, macho_file: *MachO) std.fmt.Formatter(formatSymtab) { + return .{ .data = .{ + .object = self, + .macho_file = macho_file, + } }; +} + +fn formatSymtab( + ctx: FormatContext, + comptime unused_fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, +) !void { + _ = unused_fmt_string; + _ = options; + const object = ctx.object; + try writer.writeAll(" symbols\n"); + for (object.symbols.items) |index| { + const sym = ctx.macho_file.getSymbol(index); + try writer.print(" {}\n", .{sym.fmt(ctx.macho_file)}); + } +} + +pub fn fmtPath(self: Object) std.fmt.Formatter(formatPath) { + return .{ .data = self }; +} + +fn formatPath( + object: Object, + comptime unused_fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, +) !void { + _ = unused_fmt_string; + _ = options; + if (object.archive) |path| { + try writer.writeAll(path); + try writer.writeByte('('); + try writer.writeAll(object.path); + try writer.writeByte(')'); + } else try writer.writeAll(object.path); +} + +const Section = struct { + header: macho.section_64, + subsections: std.ArrayListUnmanaged(Subsection) = .{}, + relocs: std.ArrayListUnmanaged(Relocation) = .{}, +}; + +const Subsection = struct { + atom: Atom.Index, + off: u64, +}; + +const Nlist = struct { + nlist: macho.nlist_64, + size: u64, + atom: Atom.Index, +}; + +const StabFile = struct { + comp_dir: u32, + stabs: std.ArrayListUnmanaged(Stab) = .{}, + + fn getCompDir(sf: StabFile, object: *const Object) [:0]const u8 { + const nlist = object.symtab.items(.nlist)[sf.comp_dir]; + return object.getString(nlist.n_strx); } - const start = sym.n_strx; - const len = self.strtab_lookup[index]; + fn getTuName(sf: StabFile, object: *const Object) [:0]const u8 { + const nlist = object.symtab.items(.nlist)[sf.comp_dir + 1]; + return object.getString(nlist.n_strx); + } - return strtab[start..][0 .. len - 1 :0]; -} + fn getOsoPath(sf: StabFile, object: *const Object) [:0]const u8 { + const nlist = object.symtab.items(.nlist)[sf.comp_dir + 2]; + return object.getString(nlist.n_strx); + } -fn getSymbolAliases(self: Object, index: u32) Entry { - const addr = self.source_address_lookup[index]; - var start = index; - while (start > 0 and - self.source_address_lookup[start - 1] == addr) : (start -= 1) - {} - const end: u32 = for (self.source_address_lookup[start..], start..) |saddr, i| { - if (saddr != addr) break @as(u32, @intCast(i)); - } else @as(u32, @intCast(self.source_address_lookup.len)); - return .{ .start = start, .len = end - start }; -} + fn getOsoModTime(sf: StabFile, object: *const Object) u64 { + const nlist = object.symtab.items(.nlist)[sf.comp_dir + 2]; + return nlist.n_value; + } -pub fn getSymbolByAddress(self: Object, addr: u64, sect_hint: ?u8) u32 { - // Find containing atom - const Predicate = struct { - addr: i64, + const Stab = struct { + tag: enum { func, global, static } = .func, + symbol: ?Symbol.Index = null, - pub fn predicate(pred: @This(), other: i64) bool { - return if (other == -1) true else other > pred.addr; + fn getSymbol(stab: Stab, macho_file: *MachO) ?*Symbol { + return if (stab.symbol) |s| macho_file.getSymbol(s) else null; } }; +}; - if (sect_hint) |sect_id| { - if (self.source_section_index_lookup[sect_id].len > 0) { - const lookup = self.source_section_index_lookup[sect_id]; - const target_sym_index = MachO.lsearch( - i64, - self.source_address_lookup[lookup.start..][0..lookup.len], - Predicate{ .addr = @as(i64, @intCast(addr)) }, - ); - if (target_sym_index > 0) { - // Hone in on the most senior alias of the target symbol. - // See SymbolAtIndex.lessThan for more context. - const aliases = self.getSymbolAliases(@intCast(lookup.start + target_sym_index - 1)); - return aliases.start; - } +const x86_64 = struct { + fn parseRelocs( + self: *const Object, + n_sect: u8, + sect: macho.section_64, + out: *std.ArrayListUnmanaged(Relocation), + macho_file: *MachO, + ) !void { + const gpa = macho_file.base.allocator; + + const relocs = @as( + [*]align(1) const macho.relocation_info, + @ptrCast(self.data.ptr + sect.reloff), + )[0..sect.nreloc]; + const code = self.getSectionData(@intCast(n_sect)); + + try out.ensureTotalCapacityPrecise(gpa, relocs.len); + + var i: usize = 0; + while (i < relocs.len) : (i += 1) { + const rel = relocs[i]; + const rel_type: macho.reloc_type_x86_64 = @enumFromInt(rel.r_type); + const rel_offset = @as(u32, @intCast(rel.r_address)); + + var addend = switch (rel.r_length) { + 0 => code[rel_offset], + 1 => mem.readInt(i16, code[rel_offset..][0..2], .little), + 2 => mem.readInt(i32, code[rel_offset..][0..4], .little), + 3 => mem.readInt(i64, code[rel_offset..][0..8], .little), + }; + addend += switch (@as(macho.reloc_type_x86_64, @enumFromInt(rel.r_type))) { + .X86_64_RELOC_SIGNED_1 => 1, + .X86_64_RELOC_SIGNED_2 => 2, + .X86_64_RELOC_SIGNED_4 => 4, + else => 0, + }; + + const target = if (rel.r_extern == 0) blk: { + const nsect = rel.r_symbolnum - 1; + const taddr: i64 = if (rel.r_pcrel == 1) + @as(i64, @intCast(sect.addr)) + rel.r_address + addend + 4 + else + addend; + const target = self.findAtomInSection(@intCast(taddr), @intCast(nsect)) orelse { + macho_file.base.fatal("{}: {s},{s}: 0x{x}: bad relocation", .{ + self.fmtPath(), sect.segName(), sect.sectName(), rel.r_address, + }); + return error.ParseFailed; + }; + addend = taddr - @as(i64, @intCast(macho_file.getAtom(target).?.getInputAddress(macho_file))); + break :blk target; + } else self.symbols.items[rel.r_symbolnum]; + + const has_subtractor = if (i > 0 and + @as(macho.reloc_type_x86_64, @enumFromInt(relocs[i - 1].r_type)) == .X86_64_RELOC_SUBTRACTOR) + blk: { + if (rel_type != .X86_64_RELOC_UNSIGNED) { + macho_file.base.fatal("{}: {s},{s}: 0x{x}: X86_64_RELOC_SUBTRACTOR followed by {s}", .{ + self.fmtPath(), sect.segName(), sect.sectName(), rel_offset, @tagName(rel_type), + }); + return error.ParseFailed; + } + break :blk true; + } else false; + + const @"type": Relocation.Type = validateRelocType(rel, rel_type) catch |err| { + switch (err) { + error.Pcrel => macho_file.base.fatal( + "{}: {s},{s}: 0x{x}: PC-relative {s} relocation", + .{ self.fmtPath(), sect.segName(), sect.sectName(), rel_offset, @tagName(rel_type) }, + ), + error.NonPcrel => macho_file.base.fatal( + "{}: {s},{s}: 0x{x}: non-PC-relative {s} relocation", + .{ self.fmtPath(), sect.segName(), sect.sectName(), rel_offset, @tagName(rel_type) }, + ), + error.InvalidLength => macho_file.base.fatal( + "{}: {s},{s}: 0x{x}: invalid length of {d} in {s} relocation", + .{ self.fmtPath(), sect.segName(), sect.sectName(), rel_offset, @as(u8, 1) << rel.r_length, @tagName(rel_type) }, + ), + error.NonExtern => macho_file.base.fatal( + "{}: {s},{s}: 0x{x}: non-extern target in {s} relocation", + .{ self.fmtPath(), sect.segName(), sect.sectName(), rel_offset, @tagName(rel_type) }, + ), + } + return error.ParseFailed; + }; + + out.appendAssumeCapacity(.{ + .tag = if (rel.r_extern == 1) .@"extern" else .local, + .offset = @as(u32, @intCast(rel.r_address)), + .target = target, + .addend = addend, + .type = @"type", + .meta = .{ + .pcrel = rel.r_pcrel == 1, + .has_subtractor = has_subtractor, + .length = rel.r_length, + .symbolnum = rel.r_symbolnum, + }, + }); } - return self.getSectionAliasSymbolIndex(sect_id); } - const target_sym_index = MachO.lsearch(i64, self.source_address_lookup, Predicate{ - .addr = @as(i64, @intCast(addr)), - }); - assert(target_sym_index > 0); - return @as(u32, @intCast(target_sym_index - 1)); -} + fn validateRelocType(rel: macho.relocation_info, rel_type: macho.reloc_type_x86_64) !Relocation.Type { + switch (rel_type) { + .X86_64_RELOC_UNSIGNED => { + if (rel.r_pcrel == 1) return error.Pcrel; + if (rel.r_length != 2 and rel.r_length != 3) return error.InvalidLength; + return .unsigned; + }, -pub fn getGlobal(self: Object, sym_index: u32) ?u32 { - if (self.globals_lookup[sym_index] == -1) return null; - return @as(u32, @intCast(self.globals_lookup[sym_index])); -} + .X86_64_RELOC_SUBTRACTOR => { + if (rel.r_pcrel == 1) return error.Pcrel; + return .subtractor; + }, -pub fn getAtomIndexForSymbol(self: Object, sym_index: u32) ?Atom.Index { - return self.atom_by_index_table[sym_index]; -} + .X86_64_RELOC_BRANCH, + .X86_64_RELOC_GOT_LOAD, + .X86_64_RELOC_GOT, + .X86_64_RELOC_TLV, + => { + if (rel.r_pcrel == 0) return error.NonPcrel; + if (rel.r_length != 2) return error.InvalidLength; + if (rel.r_extern == 0) return error.NonExtern; + return switch (rel_type) { + .X86_64_RELOC_BRANCH => .branch, + .X86_64_RELOC_GOT_LOAD => .got_load, + .X86_64_RELOC_GOT => .got, + .X86_64_RELOC_TLV => .tlv, + else => unreachable, + }; + }, -pub fn hasUnwindRecords(self: Object) bool { - return self.unwind_info_sect_id != null; -} + .X86_64_RELOC_SIGNED, + .X86_64_RELOC_SIGNED_1, + .X86_64_RELOC_SIGNED_2, + .X86_64_RELOC_SIGNED_4, + => { + if (rel.r_pcrel == 0) return error.NonPcrel; + if (rel.r_length != 2) return error.InvalidLength; + return switch (rel_type) { + .X86_64_RELOC_SIGNED => .signed, + .X86_64_RELOC_SIGNED_1 => .signed1, + .X86_64_RELOC_SIGNED_2 => .signed2, + .X86_64_RELOC_SIGNED_4 => .signed4, + else => unreachable, + }; + }, + } + } +}; -pub fn getUnwindRecords(self: Object) []align(1) const macho.compact_unwind_entry { - const sect_id = self.unwind_info_sect_id orelse return &[0]macho.compact_unwind_entry{}; - const sect = self.getSourceSection(sect_id); - const data = self.getSectionContents(sect); - const num_entries = @divExact(data.len, @sizeOf(macho.compact_unwind_entry)); - return @as([*]align(1) const macho.compact_unwind_entry, @ptrCast(data))[0..num_entries]; -} +const aarch64 = struct { + fn parseRelocs( + self: *const Object, + n_sect: u8, + sect: macho.section_64, + out: *std.ArrayListUnmanaged(Relocation), + macho_file: *MachO, + ) !void { + const gpa = macho_file.base.allocator; -pub fn hasEhFrameRecords(self: Object) bool { - return self.eh_frame_sect_id != null; -} + const relocs = @as( + [*]align(1) const macho.relocation_info, + @ptrCast(self.data.ptr + sect.reloff), + )[0..sect.nreloc]; + const code = self.getSectionData(@intCast(n_sect)); -pub fn getEhFrameRecordsIterator(self: Object) eh_frame.Iterator { - const sect_id = self.eh_frame_sect_id orelse return .{ .data = &[0]u8{} }; - const sect = self.getSourceSection(sect_id); - const data = self.getSectionContents(sect); - return .{ .data = data }; -} + try out.ensureTotalCapacityPrecise(gpa, relocs.len); -pub fn hasDataInCode(self: Object) bool { - return self.data_in_code.items.len > 0; -} + var i: usize = 0; + while (i < relocs.len) : (i += 1) { + var rel = relocs[i]; + const rel_offset = @as(u32, @intCast(rel.r_address)); -const Object = @This(); + var addend: i64 = 0; + + switch (@as(macho.reloc_type_arm64, @enumFromInt(rel.r_type))) { + .ARM64_RELOC_ADDEND => { + addend = rel.r_symbolnum; + i += 1; + if (i >= relocs.len) { + macho_file.base.fatal("{}: {s},{s}: 0x{x}: unterminated ARM64_RELOC_ADDEND", .{ + self.fmtPath(), sect.segName(), sect.sectName(), rel_offset, + }); + return error.ParseFailed; + } + rel = relocs[i]; + switch (@as(macho.reloc_type_arm64, @enumFromInt(rel.r_type))) { + .ARM64_RELOC_PAGE21, .ARM64_RELOC_PAGEOFF12 => {}, + else => |x| { + macho_file.base.fatal( + "{}: {s},{s}: 0x{x}: ARM64_RELOC_ADDEND followed by {s}", + .{ self.fmtPath(), sect.segName(), sect.sectName(), rel_offset, @tagName(x) }, + ); + return error.ParseFailed; + }, + } + }, + .ARM64_RELOC_UNSIGNED => { + addend = switch (rel.r_length) { + 0 => code[rel_offset], + 1 => mem.readInt(i16, code[rel_offset..][0..2], .little), + 2 => mem.readInt(i32, code[rel_offset..][0..4], .little), + 3 => mem.readInt(i64, code[rel_offset..][0..8], .little), + }; + }, + else => {}, + } + + const rel_type: macho.reloc_type_arm64 = @enumFromInt(rel.r_type); + + const target = if (rel.r_extern == 0) blk: { + const nsect = rel.r_symbolnum - 1; + const taddr: i64 = if (rel.r_pcrel == 1) + @as(i64, @intCast(sect.addr)) + rel.r_address + addend + else + addend; + const target = self.findAtomInSection(@intCast(taddr), @intCast(nsect)) orelse { + macho_file.base.fatal("{}: {s},{s}: 0x{x}: bad relocation", .{ + self.fmtPath(), sect.segName(), sect.sectName(), rel.r_address, + }); + return error.ParseFailed; + }; + addend = taddr - @as(i64, @intCast(macho_file.getAtom(target).?.getInputAddress(macho_file))); + break :blk target; + } else self.symbols.items[rel.r_symbolnum]; + + const has_subtractor = if (i > 0 and + @as(macho.reloc_type_arm64, @enumFromInt(relocs[i - 1].r_type)) == .ARM64_RELOC_SUBTRACTOR) + blk: { + if (rel_type != .ARM64_RELOC_UNSIGNED) { + macho_file.base.fatal("{}: {s},{s}: 0x{x}: ARM64_RELOC_SUBTRACTOR followed by {s}", .{ + self.fmtPath(), sect.segName(), sect.sectName(), rel_offset, @tagName(rel_type), + }); + return error.ParseFailed; + } + break :blk true; + } else false; + + const @"type": Relocation.Type = validateRelocType(rel, rel_type) catch |err| { + switch (err) { + error.Pcrel => macho_file.base.fatal( + "{}: {s},{s}: 0x{x}: PC-relative {s} relocation", + .{ self.fmtPath(), sect.segName(), sect.sectName(), rel_offset, @tagName(rel_type) }, + ), + error.NonPcrel => macho_file.base.fatal( + "{}: {s},{s}: 0x{x}: non-PC-relative {s} relocation", + .{ self.fmtPath(), sect.segName(), sect.sectName(), rel_offset, @tagName(rel_type) }, + ), + error.InvalidLength => macho_file.base.fatal( + "{}: {s},{s}: 0x{x}: invalid length of {d} in {s} relocation", + .{ self.fmtPath(), sect.segName(), sect.sectName(), rel_offset, @as(u8, 1) << rel.r_length, @tagName(rel_type) }, + ), + error.NonExtern => macho_file.base.fatal( + "{}: {s},{s}: 0x{x}: non-extern target in {s} relocation", + .{ self.fmtPath(), sect.segName(), sect.sectName(), rel_offset, @tagName(rel_type) }, + ), + } + return error.ParseFailed; + }; + + out.appendAssumeCapacity(.{ + .tag = if (rel.r_extern == 1) .@"extern" else .local, + .offset = @as(u32, @intCast(rel.r_address)), + .target = target, + .addend = addend, + .type = @"type", + .meta = .{ + .pcrel = rel.r_pcrel == 1, + .has_subtractor = has_subtractor, + .length = rel.r_length, + .symbolnum = rel.r_symbolnum, + }, + }); + } + } + + fn validateRelocType(rel: macho.relocation_info, rel_type: macho.reloc_type_arm64) !Relocation.Type { + switch (rel_type) { + .ARM64_RELOC_UNSIGNED => { + if (rel.r_pcrel == 1) return error.Pcrel; + if (rel.r_length != 2 and rel.r_length != 3) return error.InvalidLength; + return .unsigned; + }, + + .ARM64_RELOC_SUBTRACTOR => { + if (rel.r_pcrel == 1) return error.Pcrel; + return .subtractor; + }, + + .ARM64_RELOC_BRANCH26, + .ARM64_RELOC_PAGE21, + .ARM64_RELOC_GOT_LOAD_PAGE21, + .ARM64_RELOC_TLVP_LOAD_PAGE21, + .ARM64_RELOC_POINTER_TO_GOT, + => { + if (rel.r_pcrel == 0) return error.NonPcrel; + if (rel.r_length != 2) return error.InvalidLength; + if (rel.r_extern == 0) return error.NonExtern; + return switch (rel_type) { + .ARM64_RELOC_BRANCH26 => .branch, + .ARM64_RELOC_PAGE21 => .page, + .ARM64_RELOC_GOT_LOAD_PAGE21 => .got_load_page, + .ARM64_RELOC_TLVP_LOAD_PAGE21 => .tlvp_page, + .ARM64_RELOC_POINTER_TO_GOT => .got, + else => unreachable, + }; + }, + + .ARM64_RELOC_PAGEOFF12, + .ARM64_RELOC_GOT_LOAD_PAGEOFF12, + .ARM64_RELOC_TLVP_LOAD_PAGEOFF12, + => { + if (rel.r_pcrel == 1) return error.Pcrel; + if (rel.r_length != 2) return error.InvalidLength; + if (rel.r_extern == 0) return error.NonExtern; + return switch (rel_type) { + .ARM64_RELOC_PAGEOFF12 => .pageoff, + .ARM64_RELOC_GOT_LOAD_PAGEOFF12 => .got_load_pageoff, + .ARM64_RELOC_TLVP_LOAD_PAGEOFF12 => .tlvp_pageoff, + else => unreachable, + }; + }, + + .ARM64_RELOC_ADDEND => unreachable, // We make it part of the addend field + } + } +}; -const std = @import("std"); -const build_options = @import("build_options"); const assert = std.debug.assert; -const dwarf = std.dwarf; const eh_frame = @import("eh_frame.zig"); -const fs = std.fs; -const io = std.io; const log = std.log.scoped(.link); const macho = std.macho; const math = std.math; const mem = std.mem; -const sort = std.sort; -const trace = @import("../../tracy.zig").trace; +const trace = @import("../tracy.zig").trace; +const std = @import("std"); const Allocator = mem.Allocator; const Atom = @import("Atom.zig"); +const Cie = eh_frame.Cie; const DwarfInfo = @import("DwarfInfo.zig"); +const Fde = eh_frame.Fde; +const File = @import("file.zig").File; const LoadCommandIterator = macho.LoadCommandIterator; const MachO = @import("../MachO.zig"); -const Platform = @import("load_commands.zig").Platform; -const SymbolWithLoc = MachO.SymbolWithLoc; +const Object = @This(); +const Relocation = @import("Relocation.zig"); +const StringTable = @import("../strtab.zig").StringTable; +const Symbol = @import("Symbol.zig"); const UnwindInfo = @import("UnwindInfo.zig"); -const Alignment = Atom.Alignment; diff --git a/src/link/MachO/Relocation.zig b/src/link/MachO/Relocation.zig index 85c19c7608..f77e0c8792 100644 --- a/src/link/MachO/Relocation.zig +++ b/src/link/MachO/Relocation.zig @@ -1,235 +1,62 @@ -//! Relocation used by the self-hosted backends to instruct the linker where and how to -//! fixup the values when flushing the contents to file and/or memory. - -type: Type, -target: SymbolWithLoc, +tag: enum { @"extern", local }, offset: u32, +target: u32, addend: i64, -pcrel: bool, -length: u2, -dirty: bool = true, +type: Type, +meta: packed struct { + pcrel: bool, + has_subtractor: bool, + length: u2, + symbolnum: u24, +}, -pub const Type = enum { - // x86, x86_64 - /// RIP-relative displacement to a GOT pointer - got, - /// RIP-relative displacement - signed, - /// RIP-relative displacement to a TLV thunk - tlv, - - // aarch64 - /// PC-relative distance to target page in GOT section - got_page, - /// Offset to a GOT pointer relative to the start of a page in GOT section - got_pageoff, - /// PC-relative distance to target page in a section - page, - /// Offset to a pointer relative to the start of a page in a section - pageoff, - - // common - /// PC/RIP-relative displacement B/BL/CALL - branch, - /// Absolute pointer value - unsigned, - /// Relative offset to TLV initializer - tlv_initializer, -}; - -/// Returns true if and only if the reloc can be resolved. -pub fn isResolvable(self: Relocation, macho_file: *MachO) bool { - _ = self.getTargetBaseAddress(macho_file) orelse return false; - return true; +pub fn getTargetSymbol(rel: Relocation, macho_file: *MachO) *Symbol { + assert(rel.tag == .@"extern"); + return macho_file.getSymbol(rel.target); } -pub fn isGotIndirection(self: Relocation) bool { - return switch (self.type) { - .got, .got_page, .got_pageoff => true, - else => false, +pub fn getTargetAtom(rel: Relocation, macho_file: *MachO) *Atom { + assert(rel.tag == .local); + return macho_file.getAtom(rel.target).?; +} + +pub fn getTargetAddress(rel: Relocation, macho_file: *MachO) u64 { + return switch (rel.tag) { + .local => rel.getTargetAtom(macho_file).value, + .@"extern" => rel.getTargetSymbol(macho_file).getAddress(.{}, macho_file), }; } -pub fn isStubTrampoline(self: Relocation, macho_file: *MachO) bool { - return switch (self.type) { - .branch => macho_file.getSymbol(self.target).undf(), - else => false, +pub fn getGotTargetAddress(rel: Relocation, macho_file: *MachO) u64 { + return switch (rel.tag) { + .local => 0, + .@"extern" => rel.getTargetSymbol(macho_file).getGotAddress(macho_file), }; } -pub fn getTargetBaseAddress(self: Relocation, macho_file: *MachO) ?u64 { - const target = macho_file.base.comp.root_mod.resolved_target.result; - if (self.isStubTrampoline(macho_file)) { - const index = macho_file.stub_table.lookup.get(self.target) orelse return null; - const header = macho_file.sections.items(.header)[macho_file.stubs_section_index.?]; - return header.addr + - index * @import("stubs.zig").stubSize(target.cpu.arch); - } - switch (self.type) { - .got, .got_page, .got_pageoff => { - const got_index = macho_file.got_table.lookup.get(self.target) orelse return null; - const header = macho_file.sections.items(.header)[macho_file.got_section_index.?]; - return header.addr + got_index * @sizeOf(u64); - }, - .tlv => { - const atom_index = macho_file.tlv_table.get(self.target) orelse return null; - const atom = macho_file.getAtom(atom_index); - return atom.getSymbol(macho_file).n_value; - }, - else => { - const target_atom_index = macho_file.getAtomIndexForSymbol(self.target) orelse return null; - const target_atom = macho_file.getAtom(target_atom_index); - return target_atom.getSymbol(macho_file).n_value; - }, - } -} - -pub fn resolve(self: Relocation, macho_file: *MachO, atom_index: Atom.Index, code: []u8) void { - const target = macho_file.base.comp.root_mod.resolved_target.result; - const arch = target.cpu.arch; - const atom = macho_file.getAtom(atom_index); - const source_sym = atom.getSymbol(macho_file); - const source_addr = source_sym.n_value + self.offset; - - const target_base_addr = self.getTargetBaseAddress(macho_file).?; // Oops, you didn't check if the relocation can be resolved with isResolvable(). - const target_addr: i64 = switch (self.type) { - .tlv_initializer => blk: { - assert(self.addend == 0); // Addend here makes no sense. - const header = macho_file.sections.items(.header)[macho_file.thread_data_section_index.?]; - break :blk @as(i64, @intCast(target_base_addr - header.addr)); - }, - else => @as(i64, @intCast(target_base_addr)) + self.addend, +pub fn getRelocAddend(rel: Relocation, cpu_arch: std.Target.Cpu.Arch) i64 { + const addend: i64 = switch (rel.type) { + .signed => 0, + .signed1 => -1, + .signed2 => -2, + .signed4 => -4, + else => 0, + }; + return switch (cpu_arch) { + .x86_64 => if (rel.meta.pcrel) addend - 4 else addend, + else => addend, }; - - relocs_log.debug(" ({x}: [() => 0x{x} ({s})) ({s})", .{ - source_addr, - target_addr, - macho_file.getSymbolName(self.target), - @tagName(self.type), - }); - - switch (arch) { - .aarch64 => self.resolveAarch64(source_addr, target_addr, code), - .x86_64 => self.resolveX8664(source_addr, target_addr, code), - else => unreachable, - } } -fn resolveAarch64(self: Relocation, source_addr: u64, target_addr: i64, code: []u8) void { - var buffer = code[self.offset..]; - switch (self.type) { - .branch => { - const displacement = math.cast( - i28, - @as(i64, @intCast(target_addr)) - @as(i64, @intCast(source_addr)), - ) orelse unreachable; // TODO codegen should never allow for jump larger than i28 displacement - var inst = aarch64.Instruction{ - .unconditional_branch_immediate = mem.bytesToValue(meta.TagPayload( - aarch64.Instruction, - aarch64.Instruction.unconditional_branch_immediate, - ), buffer[0..4]), - }; - inst.unconditional_branch_immediate.imm26 = @as(u26, @truncate(@as(u28, @bitCast(displacement >> 2)))); - mem.writeInt(u32, buffer[0..4], inst.toU32(), .little); - }, - .page, .got_page => { - const source_page = @as(i32, @intCast(source_addr >> 12)); - const target_page = @as(i32, @intCast(target_addr >> 12)); - const pages = @as(u21, @bitCast(@as(i21, @intCast(target_page - source_page)))); - var inst = aarch64.Instruction{ - .pc_relative_address = mem.bytesToValue(meta.TagPayload( - aarch64.Instruction, - aarch64.Instruction.pc_relative_address, - ), buffer[0..4]), - }; - inst.pc_relative_address.immhi = @as(u19, @truncate(pages >> 2)); - inst.pc_relative_address.immlo = @as(u2, @truncate(pages)); - mem.writeInt(u32, buffer[0..4], inst.toU32(), .little); - }, - .pageoff, .got_pageoff => { - const narrowed = @as(u12, @truncate(@as(u64, @intCast(target_addr)))); - if (isArithmeticOp(buffer[0..4])) { - var inst = aarch64.Instruction{ - .add_subtract_immediate = mem.bytesToValue(meta.TagPayload( - aarch64.Instruction, - aarch64.Instruction.add_subtract_immediate, - ), buffer[0..4]), - }; - inst.add_subtract_immediate.imm12 = narrowed; - mem.writeInt(u32, buffer[0..4], inst.toU32(), .little); - } else { - var inst = aarch64.Instruction{ - .load_store_register = mem.bytesToValue(meta.TagPayload( - aarch64.Instruction, - aarch64.Instruction.load_store_register, - ), buffer[0..4]), - }; - const offset: u12 = blk: { - if (inst.load_store_register.size == 0) { - if (inst.load_store_register.v == 1) { - // 128-bit SIMD is scaled by 16. - break :blk @divExact(narrowed, 16); - } - // Otherwise, 8-bit SIMD or ldrb. - break :blk narrowed; - } else { - const denom: u4 = math.powi(u4, 2, inst.load_store_register.size) catch unreachable; - break :blk @divExact(narrowed, denom); - } - }; - inst.load_store_register.offset = offset; - mem.writeInt(u32, buffer[0..4], inst.toU32(), .little); - } - }, - .tlv_initializer, .unsigned => switch (self.length) { - 2 => mem.writeInt(u32, buffer[0..4], @as(u32, @truncate(@as(u64, @bitCast(target_addr)))), .little), - 3 => mem.writeInt(u64, buffer[0..8], @as(u64, @bitCast(target_addr)), .little), - else => unreachable, - }, - .got, .signed, .tlv => unreachable, // Invalid target architecture. - } +pub fn lessThan(ctx: void, lhs: Relocation, rhs: Relocation) bool { + _ = ctx; + return lhs.offset < rhs.offset; } -fn resolveX8664(self: Relocation, source_addr: u64, target_addr: i64, code: []u8) void { - switch (self.type) { - .branch, .got, .tlv, .signed => { - const displacement = @as(i32, @intCast(@as(i64, @intCast(target_addr)) - @as(i64, @intCast(source_addr)) - 4)); - mem.writeInt(u32, code[self.offset..][0..4], @as(u32, @bitCast(displacement)), .little); - }, - .tlv_initializer, .unsigned => { - switch (self.length) { - 2 => { - mem.writeInt(u32, code[self.offset..][0..4], @as(u32, @truncate(@as(u64, @bitCast(target_addr)))), .little); - }, - 3 => { - mem.writeInt(u64, code[self.offset..][0..8], @as(u64, @bitCast(target_addr)), .little); - }, - else => unreachable, - } - }, - .got_page, .got_pageoff, .page, .pageoff => unreachable, // Invalid target architecture. - } -} - -pub inline fn isArithmeticOp(inst: *const [4]u8) bool { - const group_decode = @as(u5, @truncate(inst[3])); - return ((group_decode >> 2) == 4); -} - -pub fn calcPcRelativeDisplacementX86(source_addr: u64, target_addr: u64, correction: u3) error{Overflow}!i32 { - const disp = @as(i64, @intCast(target_addr)) - @as(i64, @intCast(source_addr + 4 + correction)); - return math.cast(i32, disp) orelse error.Overflow; -} - -pub fn calcPcRelativeDisplacementArm64(source_addr: u64, target_addr: u64) error{Overflow}!i28 { - const disp = @as(i64, @intCast(target_addr)) - @as(i64, @intCast(source_addr)); - return math.cast(i28, disp) orelse error.Overflow; -} - -pub fn calcNumberOfPages(source_addr: u64, target_addr: u64) i21 { - const source_page = @as(i32, @intCast(source_addr >> 12)); - const target_page = @as(i32, @intCast(target_addr >> 12)); - const pages = @as(i21, @intCast(target_page - source_page)); +pub fn calcNumberOfPages(saddr: u64, taddr: u64) error{Overflow}!i21 { + const spage = math.cast(i32, saddr >> 12) orelse return error.Overflow; + const tpage = math.cast(i32, taddr >> 12) orelse return error.Overflow; + const pages = math.cast(i21, tpage - spage) orelse return error.Overflow; return pages; } @@ -242,8 +69,8 @@ pub const PageOffsetInstKind = enum { load_store_128, }; -pub fn calcPageOffset(target_addr: u64, kind: PageOffsetInstKind) !u12 { - const narrowed = @as(u12, @truncate(target_addr)); +pub fn calcPageOffset(taddr: u64, kind: PageOffsetInstKind) !u12 { + const narrowed = @as(u12, @truncate(taddr)); return switch (kind) { .arithmetic, .load_store_8 => narrowed, .load_store_16 => try math.divExact(u12, narrowed, 2), @@ -253,17 +80,57 @@ pub fn calcPageOffset(target_addr: u64, kind: PageOffsetInstKind) !u12 { }; } -const Relocation = @This(); +pub inline fn isArithmeticOp(inst: *const [4]u8) bool { + const group_decode = @as(u5, @truncate(inst[3])); + return ((group_decode >> 2) == 4); +} + +pub const Type = enum { + // x86_64 + /// RIP-relative displacement (X86_64_RELOC_SIGNED) + signed, + /// RIP-relative displacement (X86_64_RELOC_SIGNED_1) + signed1, + /// RIP-relative displacement (X86_64_RELOC_SIGNED_2) + signed2, + /// RIP-relative displacement (X86_64_RELOC_SIGNED_4) + signed4, + /// RIP-relative GOT load (X86_64_RELOC_GOT_LOAD) + got_load, + /// RIP-relative TLV load (X86_64_RELOC_TLV) + tlv, + + // arm64 + /// PC-relative load (distance to page, ARM64_RELOC_PAGE21) + page, + /// Non-PC-relative offset to symbol (ARM64_RELOC_PAGEOFF12) + pageoff, + /// PC-relative GOT load (distance to page, ARM64_RELOC_GOT_LOAD_PAGE21) + got_load_page, + /// Non-PC-relative offset to GOT slot (ARM64_RELOC_GOT_LOAD_PAGEOFF12) + got_load_pageoff, + /// PC-relative TLV load (distance to page, ARM64_RELOC_TLVP_LOAD_PAGE21) + tlvp_page, + /// Non-PC-relative offset to TLV slot (ARM64_RELOC_TLVP_LOAD_PAGEOFF12) + tlvp_pageoff, + + // common + /// PC-relative call/bl/b (X86_64_RELOC_BRANCH or ARM64_RELOC_BRANCH26) + branch, + /// PC-relative displacement to GOT pointer (X86_64_RELOC_GOT or ARM64_RELOC_POINTER_TO_GOT) + got, + /// Absolute subtractor value (X86_64_RELOC_SUBTRACTOR or ARM64_RELOC_SUBTRACTOR) + subtractor, + /// Absolute relocation (X86_64_RELOC_UNSIGNED or ARM64_RELOC_UNSIGNED) + unsigned, +}; -const std = @import("std"); -const aarch64 = @import("../../arch/aarch64/bits.zig"); const assert = std.debug.assert; -const relocs_log = std.log.scoped(.link_relocs); const macho = std.macho; const math = std.math; -const mem = std.mem; -const meta = std.meta; +const std = @import("std"); const Atom = @import("Atom.zig"); const MachO = @import("../MachO.zig"); -const SymbolWithLoc = MachO.SymbolWithLoc; +const Relocation = @This(); +const Symbol = @import("Symbol.zig"); diff --git a/src/link/MachO/Symbol.zig b/src/link/MachO/Symbol.zig new file mode 100644 index 0000000000..35e53534a8 --- /dev/null +++ b/src/link/MachO/Symbol.zig @@ -0,0 +1,383 @@ +//! Represents a defined symbol. + +/// Allocated address value of this symbol. +value: u64 = 0, + +/// Offset into the linker's intern table. +name: u32 = 0, + +/// File where this symbol is defined. +file: File.Index = 0, + +/// Atom containing this symbol if any. +/// Index of 0 means there is no associated atom with this symbol. +/// Use `getAtom` to get the pointer to the atom. +atom: Atom.Index = 0, + +/// Assigned output section index for this atom. +out_n_sect: u16 = 0, + +/// Index of the source nlist this symbol references. +/// Use `getNlist` to pull the nlist from the relevant file. +nlist_idx: u32 = 0, + +/// Misc flags for the symbol packaged as packed struct for compression. +flags: Flags = .{}, + +visibility: Visibility = .local, + +extra: u32 = 0, + +pub fn isLocal(symbol: Symbol) bool { + return !(symbol.flags.import or symbol.flags.@"export"); +} + +pub fn isSymbolStab(symbol: Symbol, macho_file: *MachO) bool { + const file = symbol.getFile(macho_file) orelse return false; + return switch (file) { + .object => symbol.getNlist(macho_file).stab(), + else => false, + }; +} + +pub fn isTlvInit(symbol: Symbol, macho_file: *MachO) bool { + const name = symbol.getName(macho_file); + return std.mem.indexOf(u8, name, "$tlv$init") != null; +} + +pub fn weakRef(symbol: Symbol, macho_file: *MachO) bool { + const file = symbol.getFile(macho_file).?; + const is_dylib_weak = switch (file) { + .dylib => |x| x.weak, + else => false, + }; + return is_dylib_weak or symbol.flags.weak_ref; +} + +pub fn getName(symbol: Symbol, macho_file: *MachO) [:0]const u8 { + return macho_file.string_intern.getAssumeExists(symbol.name); +} + +pub fn getAtom(symbol: Symbol, macho_file: *MachO) ?*Atom { + return macho_file.getAtom(symbol.atom); +} + +pub fn getFile(symbol: Symbol, macho_file: *MachO) ?File { + return macho_file.getFile(symbol.file); +} + +/// Asserts file is an object. +pub fn getNlist(symbol: Symbol, macho_file: *MachO) macho.nlist_64 { + const file = symbol.getFile(macho_file).?; + return switch (file) { + .object => |x| x.symtab.items(.nlist)[symbol.nlist_idx], + else => unreachable, + }; +} + +pub fn getSize(symbol: Symbol, macho_file: *MachO) u64 { + const file = symbol.getFile(macho_file).?; + assert(file == .object); + return file.object.symtab.items(.size)[symbol.nlist_idx]; +} + +pub fn getDylibOrdinal(symbol: Symbol, macho_file: *MachO) ?u16 { + assert(symbol.flags.import); + const file = symbol.getFile(macho_file) orelse return null; + return switch (file) { + .dylib => |x| x.ordinal, + else => null, + }; +} + +pub fn getSymbolRank(symbol: Symbol, macho_file: *MachO) u32 { + const file = symbol.getFile(macho_file) orelse return std.math.maxInt(u32); + const in_archive = switch (file) { + .object => |x| !x.alive, + else => false, + }; + return file.getSymbolRank(.{ + .archive = in_archive, + .weak = symbol.flags.weak, + .tentative = symbol.flags.tentative, + }); +} + +pub fn getAddress(symbol: Symbol, opts: struct { + stubs: bool = true, +}, macho_file: *MachO) u64 { + if (opts.stubs) { + if (symbol.flags.stubs) { + return symbol.getStubsAddress(macho_file); + } else if (symbol.flags.objc_stubs) { + return symbol.getObjcStubsAddress(macho_file); + } + } + if (symbol.getAtom(macho_file)) |atom| return atom.value + symbol.value; + return symbol.value; +} + +pub fn getGotAddress(symbol: Symbol, macho_file: *MachO) u64 { + if (!symbol.flags.got) return 0; + const extra = symbol.getExtra(macho_file).?; + return macho_file.got.getAddress(extra.got, macho_file); +} + +pub fn getStubsAddress(symbol: Symbol, macho_file: *MachO) u64 { + if (!symbol.flags.stubs) return 0; + const extra = symbol.getExtra(macho_file).?; + return macho_file.stubs.getAddress(extra.stubs, macho_file); +} + +pub fn getObjcStubsAddress(symbol: Symbol, macho_file: *MachO) u64 { + if (!symbol.flags.objc_stubs) return 0; + const extra = symbol.getExtra(macho_file).?; + return macho_file.objc_stubs.getAddress(extra.objc_stubs, macho_file); +} + +pub fn getObjcSelrefsAddress(symbol: Symbol, macho_file: *MachO) u64 { + if (!symbol.flags.objc_stubs) return 0; + const extra = symbol.getExtra(macho_file).?; + const atom = macho_file.getAtom(extra.objc_selrefs).?; + assert(atom.flags.alive); + return atom.value; +} + +pub fn getTlvPtrAddress(symbol: Symbol, macho_file: *MachO) u64 { + if (!symbol.flags.tlv_ptr) return 0; + const extra = symbol.getExtra(macho_file).?; + return macho_file.tlv_ptr.getAddress(extra.tlv_ptr, macho_file); +} + +pub fn getOutputSymtabIndex(symbol: Symbol, macho_file: *MachO) ?u32 { + if (!symbol.flags.output_symtab) return null; + assert(!symbol.isSymbolStab(macho_file)); + const file = symbol.getFile(macho_file).?; + const symtab_ctx = switch (file) { + inline else => |x| x.output_symtab_ctx, + }; + var idx = symbol.getExtra(macho_file).?.symtab; + if (symbol.isLocal()) { + idx += symtab_ctx.ilocal; + } else if (symbol.flags.@"export") { + idx += symtab_ctx.iexport; + } else { + assert(symbol.flags.import); + idx += symtab_ctx.iimport; + } + return idx; +} + +const AddExtraOpts = struct { + got: ?u32 = null, + stubs: ?u32 = null, + objc_stubs: ?u32 = null, + objc_selrefs: ?u32 = null, + tlv_ptr: ?u32 = null, + symtab: ?u32 = null, +}; + +pub fn addExtra(symbol: *Symbol, opts: AddExtraOpts, macho_file: *MachO) !void { + if (symbol.getExtra(macho_file) == null) { + symbol.extra = try macho_file.addSymbolExtra(.{}); + } + var extra = symbol.getExtra(macho_file).?; + inline for (@typeInfo(@TypeOf(opts)).Struct.fields) |field| { + if (@field(opts, field.name)) |x| { + @field(extra, field.name) = x; + } + } + symbol.setExtra(extra, macho_file); +} + +pub inline fn getExtra(symbol: Symbol, macho_file: *MachO) ?Extra { + return macho_file.getSymbolExtra(symbol.extra); +} + +pub inline fn setExtra(symbol: Symbol, extra: Extra, macho_file: *MachO) void { + macho_file.setSymbolExtra(symbol.extra, extra); +} + +pub fn setOutputSym(symbol: Symbol, macho_file: *MachO, out: *macho.nlist_64) void { + if (symbol.isLocal()) { + out.n_type = if (symbol.flags.abs) macho.N_ABS else macho.N_SECT; + out.n_sect = if (symbol.flags.abs) 0 else @intCast(symbol.out_n_sect + 1); + out.n_desc = 0; + out.n_value = symbol.getAddress(.{}, macho_file); + + switch (symbol.visibility) { + .hidden => out.n_type |= macho.N_PEXT, + else => {}, + } + } else if (symbol.flags.@"export") { + assert(symbol.visibility == .global); + out.n_type = macho.N_EXT; + out.n_type |= if (symbol.flags.abs) macho.N_ABS else macho.N_SECT; + out.n_sect = if (symbol.flags.abs) 0 else @intCast(symbol.out_n_sect + 1); + out.n_value = symbol.getAddress(.{}, macho_file); + out.n_desc = 0; + + if (symbol.flags.weak) { + out.n_desc |= macho.N_WEAK_DEF; + } + if (symbol.flags.dyn_ref) { + out.n_desc |= macho.REFERENCED_DYNAMICALLY; + } + } else { + assert(symbol.visibility == .global); + out.n_type = macho.N_EXT; + out.n_sect = 0; + out.n_value = 0; + out.n_desc = 0; + + const ord: u16 = if (macho_file.options.namespace == .flat) + @as(u8, @bitCast(macho.BIND_SPECIAL_DYLIB_FLAT_LOOKUP)) + else if (symbol.getDylibOrdinal(macho_file)) |ord| + ord + else + macho.BIND_SPECIAL_DYLIB_SELF; + out.n_desc = macho.N_SYMBOL_RESOLVER * ord; + + if (symbol.flags.weak) { + out.n_desc |= macho.N_WEAK_DEF; + } + + if (symbol.weakRef(macho_file)) { + out.n_desc |= macho.N_WEAK_REF; + } + } +} + +pub fn format( + symbol: Symbol, + comptime unused_fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, +) !void { + _ = symbol; + _ = unused_fmt_string; + _ = options; + _ = writer; + @compileError("do not format symbols directly"); +} + +const FormatContext = struct { + symbol: Symbol, + macho_file: *MachO, +}; + +pub fn fmt(symbol: Symbol, macho_file: *MachO) std.fmt.Formatter(format2) { + return .{ .data = .{ + .symbol = symbol, + .macho_file = macho_file, + } }; +} + +fn format2( + ctx: FormatContext, + comptime unused_fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, +) !void { + _ = options; + _ = unused_fmt_string; + const symbol = ctx.symbol; + try writer.print("%{d} : {s} : @{x}", .{ + symbol.nlist_idx, + symbol.getName(ctx.macho_file), + symbol.getAddress(.{}, ctx.macho_file), + }); + if (symbol.getFile(ctx.macho_file)) |file| { + if (symbol.out_n_sect != 0) { + try writer.print(" : sect({d})", .{symbol.out_n_sect}); + } + if (symbol.getAtom(ctx.macho_file)) |atom| { + try writer.print(" : atom({d})", .{atom.atom_index}); + } + var buf: [2]u8 = .{'_'} ** 2; + if (symbol.flags.@"export") buf[0] = 'E'; + if (symbol.flags.import) buf[1] = 'I'; + try writer.print(" : {s}", .{&buf}); + if (symbol.flags.weak) try writer.writeAll(" : weak"); + if (symbol.isSymbolStab(ctx.macho_file)) try writer.writeAll(" : stab"); + switch (file) { + .internal => |x| try writer.print(" : internal({d})", .{x.index}), + .object => |x| try writer.print(" : object({d})", .{x.index}), + .dylib => |x| try writer.print(" : dylib({d})", .{x.index}), + } + } else try writer.writeAll(" : unresolved"); +} + +pub const Flags = packed struct { + /// Whether the symbol is imported at runtime. + import: bool = false, + + /// Whether the symbol is exported at runtime. + @"export": bool = false, + + /// Whether this symbol is weak. + weak: bool = false, + + /// Whether this symbol is weakly referenced. + weak_ref: bool = false, + + /// Whether this symbol is dynamically referenced. + dyn_ref: bool = false, + + /// Whether this symbol was marked as N_NO_DEAD_STRIP. + no_dead_strip: bool = false, + + /// Whether this symbol can be interposed at runtime. + interposable: bool = false, + + /// Whether this symbol is absolute. + abs: bool = false, + + /// Whether this symbol is a tentative definition. + tentative: bool = false, + + /// Whether this symbol is a thread-local variable. + tlv: bool = false, + + /// Whether the symbol makes into the output symtab or not. + output_symtab: bool = false, + + /// Whether the symbol contains __got indirection. + got: bool = false, + + /// Whether the symbols contains __stubs indirection. + stubs: bool = false, + + /// Whether the symbol has a TLV pointer. + tlv_ptr: bool = false, + + /// Whether the symbol contains __objc_stubs indirection. + objc_stubs: bool = false, +}; + +pub const Visibility = enum { + global, + hidden, + local, +}; + +pub const Extra = struct { + got: u32 = 0, + stubs: u32 = 0, + objc_stubs: u32 = 0, + objc_selrefs: u32 = 0, + tlv_ptr: u32 = 0, + symtab: u32 = 0, +}; + +pub const Index = u32; + +const assert = std.debug.assert; +const macho = std.macho; +const std = @import("std"); + +const Atom = @import("Atom.zig"); +const File = @import("file.zig").File; +const MachO = @import("../MachO.zig"); +const Object = @import("Object.zig"); +const Symbol = @This(); diff --git a/src/link/MachO/UnwindInfo.zig b/src/link/MachO/UnwindInfo.zig index 7223b5555f..1d0bfc1ff9 100644 --- a/src/link/MachO/UnwindInfo.zig +++ b/src/link/MachO/UnwindInfo.zig @@ -1,376 +1,122 @@ -gpa: Allocator, - /// List of all unwind records gathered from all objects and sorted -/// by source function address. -records: std.ArrayListUnmanaged(macho.compact_unwind_entry) = .{}, -records_lookup: std.AutoHashMapUnmanaged(SymbolWithLoc, RecordIndex) = .{}, +/// by allocated relative function address within the section. +records: std.ArrayListUnmanaged(Record.Index) = .{}, /// List of all personalities referenced by either unwind info entries /// or __eh_frame entries. -personalities: [max_personalities]SymbolWithLoc = undefined, +personalities: [max_personalities]Symbol.Index = undefined, personalities_count: u2 = 0, /// List of common encodings sorted in descending order with the most common first. -common_encodings: [max_common_encodings]macho.compact_unwind_encoding_t = undefined, +common_encodings: [max_common_encodings]Encoding = undefined, common_encodings_count: u7 = 0, /// List of record indexes containing an LSDA pointer. -lsdas: std.ArrayListUnmanaged(RecordIndex) = .{}, -lsdas_lookup: std.AutoHashMapUnmanaged(RecordIndex, u32) = .{}, +lsdas: std.ArrayListUnmanaged(u32) = .{}, +lsdas_lookup: std.ArrayListUnmanaged(u32) = .{}, /// List of second level pages. pages: std.ArrayListUnmanaged(Page) = .{}, -/// Upper bound (exclusive) of all the record ranges -end_boundary: u64 = 0, - -const RecordIndex = u32; - -const max_personalities = 3; -const max_common_encodings = 127; -const max_compact_encodings = 256; - -const second_level_page_bytes = 0x1000; -const second_level_page_words = second_level_page_bytes / @sizeOf(u32); - -const max_regular_second_level_entries = - (second_level_page_bytes - @sizeOf(macho.unwind_info_regular_second_level_page_header)) / - @sizeOf(macho.unwind_info_regular_second_level_entry); - -const max_compressed_second_level_entries = - (second_level_page_bytes - @sizeOf(macho.unwind_info_compressed_second_level_page_header)) / - @sizeOf(u32); - -const compressed_entry_func_offset_mask = ~@as(u24, 0); - -const Page = struct { - kind: enum { regular, compressed }, - start: RecordIndex, - count: u16, - page_encodings: [max_compact_encodings]RecordIndex = undefined, - page_encodings_count: u9 = 0, - - fn appendPageEncoding(page: *Page, record_id: RecordIndex) void { - assert(page.page_encodings_count <= max_compact_encodings); - page.page_encodings[page.page_encodings_count] = record_id; - page.page_encodings_count += 1; - } - - fn getPageEncoding( - page: *const Page, - info: *const UnwindInfo, - enc: macho.compact_unwind_encoding_t, - ) ?u8 { - comptime var index: u9 = 0; - inline while (index < max_compact_encodings) : (index += 1) { - if (index >= page.page_encodings_count) return null; - const record_id = page.page_encodings[index]; - const record = info.records.items[record_id]; - if (record.compactUnwindEncoding == enc) { - return @as(u8, @intCast(index)); - } - } - return null; - } - - fn format( - page: *const Page, - comptime unused_format_string: []const u8, - options: std.fmt.FormatOptions, - writer: anytype, - ) !void { - _ = page; - _ = unused_format_string; - _ = options; - _ = writer; - @compileError("do not format Page directly; use page.fmtDebug()"); - } - - const DumpCtx = struct { - page: *const Page, - info: *const UnwindInfo, - }; - - fn dump( - ctx: DumpCtx, - comptime unused_format_string: []const u8, - options: std.fmt.FormatOptions, - writer: anytype, - ) @TypeOf(writer).Error!void { - _ = options; - comptime assert(unused_format_string.len == 0); - try writer.writeAll("Page:\n"); - try writer.print(" kind: {s}\n", .{@tagName(ctx.page.kind)}); - try writer.print(" entries: {d} - {d}\n", .{ - ctx.page.start, - ctx.page.start + ctx.page.count, - }); - try writer.print(" encodings (count = {d})\n", .{ctx.page.page_encodings_count}); - for (ctx.page.page_encodings[0..ctx.page.page_encodings_count], 0..) |record_id, i| { - const record = ctx.info.records.items[record_id]; - const enc = record.compactUnwindEncoding; - try writer.print(" {d}: 0x{x:0>8}\n", .{ ctx.info.common_encodings_count + i, enc }); - } - } - - fn fmtDebug(page: *const Page, info: *const UnwindInfo) std.fmt.Formatter(dump) { - return .{ .data = .{ - .page = page, - .info = info, - } }; - } - - fn write(page: *const Page, info: *const UnwindInfo, writer: anytype) !void { - switch (page.kind) { - .regular => { - try writer.writeStruct(macho.unwind_info_regular_second_level_page_header{ - .entryPageOffset = @sizeOf(macho.unwind_info_regular_second_level_page_header), - .entryCount = page.count, - }); - - for (info.records.items[page.start..][0..page.count]) |record| { - try writer.writeStruct(macho.unwind_info_regular_second_level_entry{ - .functionOffset = @as(u32, @intCast(record.rangeStart)), - .encoding = record.compactUnwindEncoding, - }); - } - }, - .compressed => { - const entry_offset = @sizeOf(macho.unwind_info_compressed_second_level_page_header) + - @as(u16, @intCast(page.page_encodings_count)) * @sizeOf(u32); - try writer.writeStruct(macho.unwind_info_compressed_second_level_page_header{ - .entryPageOffset = entry_offset, - .entryCount = page.count, - .encodingsPageOffset = @sizeOf( - macho.unwind_info_compressed_second_level_page_header, - ), - .encodingsCount = page.page_encodings_count, - }); - - for (page.page_encodings[0..page.page_encodings_count]) |record_id| { - const enc = info.records.items[record_id].compactUnwindEncoding; - try writer.writeInt(u32, enc, .little); - } - - assert(page.count > 0); - const first_entry = info.records.items[page.start]; - for (info.records.items[page.start..][0..page.count]) |record| { - const enc_index = blk: { - if (info.getCommonEncoding(record.compactUnwindEncoding)) |id| { - break :blk id; - } - const ncommon = info.common_encodings_count; - break :blk ncommon + page.getPageEncoding(info, record.compactUnwindEncoding).?; - }; - const compressed = macho.UnwindInfoCompressedEntry{ - .funcOffset = @as(u24, @intCast(record.rangeStart - first_entry.rangeStart)), - .encodingIndex = @as(u8, @intCast(enc_index)), - }; - try writer.writeStruct(compressed); - } - }, - } - } -}; - -pub fn deinit(info: *UnwindInfo) void { - info.records.deinit(info.gpa); - info.records_lookup.deinit(info.gpa); - info.pages.deinit(info.gpa); - info.lsdas.deinit(info.gpa); - info.lsdas_lookup.deinit(info.gpa); +pub fn deinit(info: *UnwindInfo, allocator: Allocator) void { + info.records.deinit(allocator); + info.pages.deinit(allocator); + info.lsdas.deinit(allocator); + info.lsdas_lookup.deinit(allocator); } -pub fn scanRelocs(macho_file: *MachO) !void { - if (macho_file.unwind_info_section_index == null) return; - - const target = macho_file.base.comp.root_mod.resolved_target.result; - const cpu_arch = target.cpu.arch; - for (macho_file.objects.items, 0..) |*object, object_id| { - const unwind_records = object.getUnwindRecords(); - for (object.exec_atoms.items) |atom_index| { - var inner_syms_it = Atom.getInnerSymbolsIterator(macho_file, atom_index); - while (inner_syms_it.next()) |sym| { - const record_id = object.unwind_records_lookup.get(sym) orelse continue; - if (object.unwind_relocs_lookup[record_id].dead) continue; - const record = unwind_records[record_id]; - if (!UnwindEncoding.isDwarf(record.compactUnwindEncoding, cpu_arch)) { - if (getPersonalityFunctionReloc(macho_file, @as(u32, @intCast(object_id)), record_id)) |rel| { - // Personality function; add GOT pointer. - const reloc_target = Atom.parseRelocTarget(macho_file, .{ - .object_id = @as(u32, @intCast(object_id)), - .rel = rel, - .code = mem.asBytes(&record), - .base_offset = @as(i32, @intCast(record_id * @sizeOf(macho.compact_unwind_entry))), - }); - try macho_file.addGotEntry(reloc_target); - } - } - } - } +fn canFold(macho_file: *MachO, lhs_index: Record.Index, rhs_index: Record.Index) bool { + const cpu_arch = macho_file.options.cpu_arch.?; + const lhs = macho_file.getUnwindRecord(lhs_index); + const rhs = macho_file.getUnwindRecord(rhs_index); + if (cpu_arch == .x86_64) { + if (lhs.enc.getMode() == @intFromEnum(macho.UNWIND_X86_64_MODE.STACK_IND) or + rhs.enc.getMode() == @intFromEnum(macho.UNWIND_X86_64_MODE.STACK_IND)) return false; } + const lhs_per = lhs.personality orelse 0; + const rhs_per = rhs.personality orelse 0; + return lhs.enc.eql(rhs.enc) and + lhs_per == rhs_per and + lhs.fde == rhs.fde and + lhs.getLsdaAtom(macho_file) == null and rhs.getLsdaAtom(macho_file) == null; } -pub fn collect(info: *UnwindInfo, macho_file: *MachO) !void { - if (macho_file.unwind_info_section_index == null) return; +pub fn generate(info: *UnwindInfo, macho_file: *MachO) !void { + const gpa = macho_file.base.allocator; - const target = macho_file.base.comp.root_mod.resolved_target.result; - const cpu_arch = target.cpu.arch; + log.debug("generating unwind info", .{}); - var records = std.ArrayList(macho.compact_unwind_entry).init(info.gpa); - defer records.deinit(); - - var sym_indexes = std.ArrayList(SymbolWithLoc).init(info.gpa); - defer sym_indexes.deinit(); - - // TODO handle dead stripping - for (macho_file.objects.items, 0..) |*object, object_id| { - log.debug("collecting unwind records in {s} ({d})", .{ object.name, object_id }); - const unwind_records = object.getUnwindRecords(); - - // Contents of unwind records does not have to cover all symbol in executable section - // so we need insert them ourselves. - try records.ensureUnusedCapacity(object.exec_atoms.items.len); - try sym_indexes.ensureUnusedCapacity(object.exec_atoms.items.len); - - for (object.exec_atoms.items) |atom_index| { - var inner_syms_it = Atom.getInnerSymbolsIterator(macho_file, atom_index); - var prev_symbol: ?SymbolWithLoc = null; - while (inner_syms_it.next()) |symbol| { - var record = if (object.unwind_records_lookup.get(symbol)) |record_id| blk: { - if (object.unwind_relocs_lookup[record_id].dead) continue; - var record = unwind_records[record_id]; - - if (UnwindEncoding.isDwarf(record.compactUnwindEncoding, cpu_arch)) { - info.collectPersonalityFromDwarf(macho_file, @as(u32, @intCast(object_id)), symbol, &record); - } else { - if (getPersonalityFunctionReloc( - macho_file, - @as(u32, @intCast(object_id)), - record_id, - )) |rel| { - const reloc_target = Atom.parseRelocTarget(macho_file, .{ - .object_id = @as(u32, @intCast(object_id)), - .rel = rel, - .code = mem.asBytes(&record), - .base_offset = @as(i32, @intCast(record_id * @sizeOf(macho.compact_unwind_entry))), - }); - const personality_index = info.getPersonalityFunction(reloc_target) orelse inner: { - const personality_index = info.personalities_count; - info.personalities[personality_index] = reloc_target; - info.personalities_count += 1; - break :inner personality_index; - }; - - record.personalityFunction = personality_index + 1; - UnwindEncoding.setPersonalityIndex(&record.compactUnwindEncoding, personality_index + 1); - } - - if (getLsdaReloc(macho_file, @as(u32, @intCast(object_id)), record_id)) |rel| { - const reloc_target = Atom.parseRelocTarget(macho_file, .{ - .object_id = @as(u32, @intCast(object_id)), - .rel = rel, - .code = mem.asBytes(&record), - .base_offset = @as(i32, @intCast(record_id * @sizeOf(macho.compact_unwind_entry))), - }); - record.lsda = @as(u64, @bitCast(reloc_target)); - } - } - break :blk record; - } else blk: { - const sym = macho_file.getSymbol(symbol); - if (sym.n_desc == MachO.N_DEAD) continue; - if (prev_symbol) |prev_sym| { - const prev_addr = object.getSourceSymbol(prev_sym.sym_index).?.n_value; - const curr_addr = object.getSourceSymbol(symbol.sym_index).?.n_value; - if (prev_addr == curr_addr) continue; - } - - if (!object.hasUnwindRecords()) { - if (object.eh_frame_records_lookup.get(symbol)) |fde_offset| { - if (object.eh_frame_relocs_lookup.get(fde_offset).?.dead) continue; - var record = nullRecord(); - info.collectPersonalityFromDwarf(macho_file, @as(u32, @intCast(object_id)), symbol, &record); - switch (cpu_arch) { - .aarch64 => UnwindEncoding.setMode(&record.compactUnwindEncoding, macho.UNWIND_ARM64_MODE.DWARF), - .x86_64 => UnwindEncoding.setMode(&record.compactUnwindEncoding, macho.UNWIND_X86_64_MODE.DWARF), - else => unreachable, - } - break :blk record; - } - } - - break :blk nullRecord(); - }; - - const atom = macho_file.getAtom(atom_index); - const sym = macho_file.getSymbol(symbol); - assert(sym.n_desc != MachO.N_DEAD); - const size = if (inner_syms_it.next()) |next_sym| blk: { - // All this trouble to account for symbol aliases. - // TODO I think that remodelling the linker so that a Symbol references an Atom - // is the way to go, kinda like we do for ELF. We might also want to perhaps tag - // symbol aliases somehow so that they are excluded from everything except relocation - // resolution. - defer inner_syms_it.pos -= 1; - const curr_addr = object.getSourceSymbol(symbol.sym_index).?.n_value; - const next_addr = object.getSourceSymbol(next_sym.sym_index).?.n_value; - if (next_addr > curr_addr) break :blk next_addr - curr_addr; - break :blk macho_file.getSymbol(atom.getSymbolWithLoc()).n_value + atom.size - sym.n_value; - } else macho_file.getSymbol(atom.getSymbolWithLoc()).n_value + atom.size - sym.n_value; - record.rangeStart = sym.n_value; - record.rangeLength = @as(u32, @intCast(size)); - - try records.append(record); - try sym_indexes.append(symbol); - - prev_symbol = symbol; + // Collect all unwind records + for (macho_file.sections.items(.atoms)) |atoms| { + for (atoms.items) |atom_index| { + const atom = macho_file.getAtom(atom_index) orelse continue; + if (!atom.flags.alive) continue; + const recs = atom.getUnwindRecords(macho_file); + try info.records.ensureUnusedCapacity(gpa, recs.len); + for (recs) |rec| { + if (!macho_file.getUnwindRecord(rec).alive) continue; + info.records.appendAssumeCapacity(rec); } } } - // Record the ending boundary before folding. - assert(records.items.len > 0); - info.end_boundary = blk: { - const last_record = records.items[records.items.len - 1]; - break :blk last_record.rangeStart + last_record.rangeLength; - }; + // Encode records + for (info.records.items) |index| { + const rec = macho_file.getUnwindRecord(index); + if (rec.getFde(macho_file)) |fde| { + rec.enc.setDwarfSectionOffset(@intCast(fde.out_offset)); + } else if (rec.getPersonality(macho_file)) |_| { + const personality_index = try info.getOrPutPersonalityFunction(rec.personality.?); // TODO handle error + rec.enc.setPersonalityIndex(personality_index + 1); + } + } - // Fold records - try info.records.ensureTotalCapacity(info.gpa, records.items.len); - try info.records_lookup.ensureTotalCapacity(info.gpa, @as(u32, @intCast(sym_indexes.items.len))); + // Sort by assigned relative address within each output section + const sortFn = struct { + fn sortFn(ctx: *MachO, lhs_index: Record.Index, rhs_index: Record.Index) bool { + const lhs = ctx.getUnwindRecord(lhs_index); + const rhs = ctx.getUnwindRecord(rhs_index); + const lhsa = lhs.getAtom(ctx); + const rhsa = rhs.getAtom(ctx); + if (lhsa.out_n_sect == rhsa.out_n_sect) return lhs.getAtomAddress(ctx) < rhs.getAtomAddress(ctx); + return lhsa.out_n_sect < rhsa.out_n_sect; + } + }.sortFn; + mem.sort(Record.Index, info.records.items, macho_file, sortFn); - var maybe_prev: ?macho.compact_unwind_entry = null; - for (records.items, 0..) |record, i| { - const record_id = blk: { - if (maybe_prev) |prev| { - const is_dwarf = UnwindEncoding.isDwarf(record.compactUnwindEncoding, cpu_arch); - if (is_dwarf or - (prev.compactUnwindEncoding != record.compactUnwindEncoding) or - (prev.personalityFunction != record.personalityFunction) or - record.lsda > 0) - { - const record_id = @as(RecordIndex, @intCast(info.records.items.len)); - info.records.appendAssumeCapacity(record); - maybe_prev = record; - break :blk record_id; - } else { - break :blk @as(RecordIndex, @intCast(info.records.items.len - 1)); - } + // Fold the records + // Any adjacent two records that share encoding can be folded into one. + { + var i: usize = 0; + var j: usize = 1; + while (j < info.records.items.len) : (j += 1) { + if (canFold(macho_file, info.records.items[i], info.records.items[j])) { + const rec = macho_file.getUnwindRecord(info.records.items[i]); + rec.length += macho_file.getUnwindRecord(info.records.items[j]).length + 1; } else { - const record_id = @as(RecordIndex, @intCast(info.records.items.len)); - info.records.appendAssumeCapacity(record); - maybe_prev = record; - break :blk record_id; + i += 1; + info.records.items[i] = info.records.items[j]; } - }; - info.records_lookup.putAssumeCapacityNoClobber(sym_indexes.items[i], record_id); + } + info.records.shrinkAndFree(gpa, i + 1); + } + + for (info.records.items) |rec_index| { + const rec = macho_file.getUnwindRecord(rec_index); + const atom = rec.getAtom(macho_file); + log.debug("@{x}-{x} : {s} : rec({d}) : {}", .{ + rec.getAtomAddress(macho_file), + rec.getAtomAddress(macho_file) + rec.length, + atom.getName(macho_file), + rec_index, + rec.enc, + }); } // Calculate common encodings { const CommonEncWithCount = struct { - enc: macho.compact_unwind_encoding_t, + enc: Encoding, count: u32, fn greaterThan(ctx: void, lhs: @This(), rhs: @This()) bool { @@ -380,39 +126,38 @@ pub fn collect(info: *UnwindInfo, macho_file: *MachO) !void { }; const Context = struct { - pub fn hash(ctx: @This(), key: macho.compact_unwind_encoding_t) u32 { + pub fn hash(ctx: @This(), key: Encoding) u32 { _ = ctx; - return key; + return key.enc; } pub fn eql( ctx: @This(), - key1: macho.compact_unwind_encoding_t, - key2: macho.compact_unwind_encoding_t, + key1: Encoding, + key2: Encoding, b_index: usize, ) bool { _ = ctx; _ = b_index; - return key1 == key2; + return key1.eql(key2); } }; var common_encodings_counts = std.ArrayHashMap( - macho.compact_unwind_encoding_t, + Encoding, CommonEncWithCount, Context, false, - ).init(info.gpa); + ).init(gpa); defer common_encodings_counts.deinit(); - for (info.records.items) |record| { - assert(!isNull(record)); - if (UnwindEncoding.isDwarf(record.compactUnwindEncoding, cpu_arch)) continue; - const enc = record.compactUnwindEncoding; - const gop = try common_encodings_counts.getOrPut(enc); + for (info.records.items) |rec_index| { + const rec = macho_file.getUnwindRecord(rec_index); + if (rec.enc.isDwarf(macho_file)) continue; + const gop = try common_encodings_counts.getOrPut(rec.enc); if (!gop.found_existing) { gop.value_ptr.* = .{ - .enc = enc, + .enc = rec.enc, .count = 0, }; } @@ -427,7 +172,7 @@ pub fn collect(info: *UnwindInfo, macho_file: *MachO) !void { if (i >= max_common_encodings) break; if (slice[i].count < 2) continue; info.appendCommonEncoding(slice[i].enc); - log.debug("adding common encoding: {d} => 0x{x:0>8}", .{ i, slice[i].enc }); + log.debug("adding common encoding: {d} => {}", .{ i, slice[i].enc }); } } @@ -435,8 +180,8 @@ pub fn collect(info: *UnwindInfo, macho_file: *MachO) !void { { var i: u32 = 0; while (i < info.records.items.len) { - const range_start_max: u64 = - info.records.items[i].rangeStart + compressed_entry_func_offset_mask; + const rec = macho_file.getUnwindRecord(info.records.items[i]); + const range_start_max: u64 = rec.getAtomAddress(macho_file) + compressed_entry_func_offset_mask; var encoding_count: u9 = info.common_encodings_count; var space_left: u32 = second_level_page_words - @sizeOf(macho.unwind_info_compressed_second_level_page_header) / @sizeOf(u32); @@ -447,19 +192,18 @@ pub fn collect(info: *UnwindInfo, macho_file: *MachO) !void { }; while (space_left >= 1 and i < info.records.items.len) { - const record = info.records.items[i]; - const enc = record.compactUnwindEncoding; - const is_dwarf = UnwindEncoding.isDwarf(record.compactUnwindEncoding, cpu_arch); + const next = macho_file.getUnwindRecord(info.records.items[i]); + const is_dwarf = next.enc.isDwarf(macho_file); - if (record.rangeStart >= range_start_max) { + if (next.getAtomAddress(macho_file) >= range_start_max) { break; - } else if (info.getCommonEncoding(enc) != null or - page.getPageEncoding(info, enc) != null and !is_dwarf) + } else if (info.getCommonEncoding(next.enc) != null or + page.getPageEncoding(next.enc) != null and !is_dwarf) { i += 1; space_left -= 1; } else if (space_left >= 2 and encoding_count < max_compact_encodings) { - page.appendPageEncoding(i); + page.appendPageEncoding(next.enc); i += 1; space_left -= 2; encoding_count += 1; @@ -481,63 +225,24 @@ pub fn collect(info: *UnwindInfo, macho_file: *MachO) !void { page.kind = .compressed; } - log.debug("{}", .{page.fmtDebug(info)}); + log.debug("{}", .{page.fmt(info.*)}); - try info.pages.append(info.gpa, page); + try info.pages.append(gpa, page); } } - // Save indices of records requiring LSDA relocation - try info.lsdas_lookup.ensureTotalCapacity(info.gpa, @as(u32, @intCast(info.records.items.len))); - for (info.records.items, 0..) |rec, i| { - info.lsdas_lookup.putAssumeCapacityNoClobber(@as(RecordIndex, @intCast(i)), @as(u32, @intCast(info.lsdas.items.len))); - if (rec.lsda == 0) continue; - try info.lsdas.append(info.gpa, @as(RecordIndex, @intCast(i))); + // Save records having an LSDA pointer + try info.lsdas_lookup.ensureTotalCapacityPrecise(gpa, info.records.items.len); + for (info.records.items, 0..) |index, i| { + const rec = macho_file.getUnwindRecord(index); + info.lsdas_lookup.appendAssumeCapacity(@intCast(info.lsdas.items.len)); + if (rec.getLsdaAtom(macho_file)) |_| { + try info.lsdas.append(gpa, @intCast(i)); + } } } -fn collectPersonalityFromDwarf( - info: *UnwindInfo, - macho_file: *MachO, - object_id: u32, - sym_loc: SymbolWithLoc, - record: *macho.compact_unwind_entry, -) void { - const object = &macho_file.objects.items[object_id]; - var it = object.getEhFrameRecordsIterator(); - const fde_offset = object.eh_frame_records_lookup.get(sym_loc).?; - it.seekTo(fde_offset); - const fde = (it.next() catch return).?; // We don't care about the error since we already handled it - const cie_ptr = fde.getCiePointerSource(object_id, macho_file, fde_offset); - const cie_offset = fde_offset + 4 - cie_ptr; - it.seekTo(cie_offset); - const cie = (it.next() catch return).?; // We don't care about the error since we already handled it - - if (cie.getPersonalityPointerReloc( - macho_file, - @as(u32, @intCast(object_id)), - cie_offset, - )) |target| { - const personality_index = info.getPersonalityFunction(target) orelse inner: { - const personality_index = info.personalities_count; - info.personalities[personality_index] = target; - info.personalities_count += 1; - break :inner personality_index; - }; - - record.personalityFunction = personality_index + 1; - UnwindEncoding.setPersonalityIndex(&record.compactUnwindEncoding, personality_index + 1); - } -} - -pub fn calcSectionSize(info: UnwindInfo, macho_file: *MachO) void { - const sect_id = macho_file.unwind_info_section_index orelse return; - const sect = &macho_file.sections.items(.header)[sect_id]; - sect.@"align" = 2; - sect.size = info.calcRequiredSize(); -} - -fn calcRequiredSize(info: UnwindInfo) usize { +pub fn calcSize(info: UnwindInfo) usize { var total_size: usize = 0; total_size += @sizeOf(macho.unwind_info_section_header); total_size += @@ -549,59 +254,12 @@ fn calcRequiredSize(info: UnwindInfo) usize { return total_size; } -pub fn write(info: *UnwindInfo, macho_file: *MachO) !void { - const sect_id = macho_file.unwind_info_section_index orelse return; - const sect = &macho_file.sections.items(.header)[sect_id]; - const seg_id = macho_file.sections.items(.segment_index)[sect_id]; - const seg = macho_file.segments.items[seg_id]; +pub fn write(info: UnwindInfo, macho_file: *MachO, buffer: []u8) !void { + const seg = macho_file.getTextSegment(); + const header = macho_file.sections.items(.header)[macho_file.unwind_info_sect_index.?]; - const text_sect_id = macho_file.text_section_index.?; - const text_sect = macho_file.sections.items(.header)[text_sect_id]; - - var personalities: [max_personalities]u32 = undefined; - const target = macho_file.base.comp.root_mod.resolved_target.result; - const cpu_arch = target.cpu.arch; - - log.debug("Personalities:", .{}); - for (info.personalities[0..info.personalities_count], 0..) |reloc_target, i| { - const addr = macho_file.getGotEntryAddress(reloc_target).?; - personalities[i] = @as(u32, @intCast(addr - seg.vmaddr)); - log.debug(" {d}: 0x{x} ({s})", .{ i, personalities[i], macho_file.getSymbolName(reloc_target) }); - } - - for (info.records.items) |*rec| { - // Finalize missing address values - rec.rangeStart += text_sect.addr - seg.vmaddr; - if (rec.personalityFunction > 0) { - const index = math.cast(usize, rec.personalityFunction - 1) orelse return error.Overflow; - rec.personalityFunction = personalities[index]; - } - - if (rec.compactUnwindEncoding > 0 and !UnwindEncoding.isDwarf(rec.compactUnwindEncoding, cpu_arch)) { - const lsda_target = @as(SymbolWithLoc, @bitCast(rec.lsda)); - if (lsda_target.getFile()) |_| { - const sym = macho_file.getSymbol(lsda_target); - rec.lsda = sym.n_value - seg.vmaddr; - } - } - } - - for (info.records.items, 0..) |record, i| { - log.debug("Unwind record at offset 0x{x}", .{i * @sizeOf(macho.compact_unwind_entry)}); - log.debug(" start: 0x{x}", .{record.rangeStart}); - log.debug(" length: 0x{x}", .{record.rangeLength}); - log.debug(" compact encoding: 0x{x:0>8}", .{record.compactUnwindEncoding}); - log.debug(" personality: 0x{x}", .{record.personalityFunction}); - log.debug(" LSDA: 0x{x}", .{record.lsda}); - } - - var buffer = std.ArrayList(u8).init(info.gpa); - defer buffer.deinit(); - - const size = info.calcRequiredSize(); - try buffer.ensureTotalCapacityPrecise(size); - - var cwriter = std.io.countingWriter(buffer.writer()); + var stream = std.io.fixedBufferStream(buffer); + var cwriter = std.io.countingWriter(stream.writer()); const writer = cwriter.writer(); const common_encodings_offset: u32 = @sizeOf(macho.unwind_info_section_header); @@ -621,211 +279,404 @@ pub fn write(info: *UnwindInfo, macho_file: *MachO) !void { }); try writer.writeAll(mem.sliceAsBytes(info.common_encodings[0..info.common_encodings_count])); - try writer.writeAll(mem.sliceAsBytes(personalities[0..info.personalities_count])); - const pages_base_offset = @as(u32, @intCast(size - (info.pages.items.len * second_level_page_bytes))); + for (info.personalities[0..info.personalities_count]) |sym_index| { + const sym = macho_file.getSymbol(sym_index); + try writer.writeInt(u32, @intCast(sym.getGotAddress(macho_file) - seg.vmaddr), .little); + } + + const pages_base_offset = @as(u32, @intCast(header.size - (info.pages.items.len * second_level_page_bytes))); const lsda_base_offset = @as(u32, @intCast(pages_base_offset - (info.lsdas.items.len * @sizeOf(macho.unwind_info_section_header_lsda_index_entry)))); for (info.pages.items, 0..) |page, i| { assert(page.count > 0); - const first_entry = info.records.items[page.start]; + const rec = macho_file.getUnwindRecord(info.records.items[page.start]); try writer.writeStruct(macho.unwind_info_section_header_index_entry{ - .functionOffset = @as(u32, @intCast(first_entry.rangeStart)), + .functionOffset = @as(u32, @intCast(rec.getAtomAddress(macho_file) - seg.vmaddr)), .secondLevelPagesSectionOffset = @as(u32, @intCast(pages_base_offset + i * second_level_page_bytes)), .lsdaIndexArraySectionOffset = lsda_base_offset + - info.lsdas_lookup.get(page.start).? * @sizeOf(macho.unwind_info_section_header_lsda_index_entry), + info.lsdas_lookup.items[page.start] * @sizeOf(macho.unwind_info_section_header_lsda_index_entry), }); } - // Relocate end boundary address - const end_boundary = @as(u32, @intCast(info.end_boundary + text_sect.addr - seg.vmaddr)); + const last_rec = macho_file.getUnwindRecord(info.records.items[info.records.items.len - 1]); + const sentinel_address = @as(u32, @intCast(last_rec.getAtomAddress(macho_file) + last_rec.length - seg.vmaddr)); try writer.writeStruct(macho.unwind_info_section_header_index_entry{ - .functionOffset = end_boundary, + .functionOffset = sentinel_address, .secondLevelPagesSectionOffset = 0, .lsdaIndexArraySectionOffset = lsda_base_offset + @as(u32, @intCast(info.lsdas.items.len)) * @sizeOf(macho.unwind_info_section_header_lsda_index_entry), }); - for (info.lsdas.items) |record_id| { - const record = info.records.items[record_id]; + for (info.lsdas.items) |index| { + const rec = macho_file.getUnwindRecord(info.records.items[index]); try writer.writeStruct(macho.unwind_info_section_header_lsda_index_entry{ - .functionOffset = @as(u32, @intCast(record.rangeStart)), - .lsdaOffset = @as(u32, @intCast(record.lsda)), + .functionOffset = @as(u32, @intCast(rec.getAtomAddress(macho_file) - seg.vmaddr)), + .lsdaOffset = @as(u32, @intCast(rec.getLsdaAddress(macho_file) - seg.vmaddr)), }); } for (info.pages.items) |page| { const start = cwriter.bytes_written; - try page.write(info, writer); + try page.write(info, macho_file, writer); const nwritten = cwriter.bytes_written - start; if (nwritten < second_level_page_bytes) { - const offset = math.cast(usize, second_level_page_bytes - nwritten) orelse return error.Overflow; - try writer.writeByteNTimes(0, offset); + try writer.writeByteNTimes(0, second_level_page_bytes - nwritten); } } - const padding = buffer.items.len - cwriter.bytes_written; + const padding = buffer.len - cwriter.bytes_written; if (padding > 0) { - const offset = math.cast(usize, cwriter.bytes_written) orelse return error.Overflow; - @memset(buffer.items[offset..], 0); + @memset(buffer[cwriter.bytes_written..], 0); } - - try macho_file.base.file.?.pwriteAll(buffer.items, sect.offset); } -fn getRelocs(macho_file: *MachO, object_id: u32, record_id: usize) []const macho.relocation_info { - const object = &macho_file.objects.items[object_id]; - assert(object.hasUnwindRecords()); - const rel_pos = object.unwind_relocs_lookup[record_id].reloc; - const relocs = object.getRelocs(object.unwind_info_sect_id.?); - return relocs[rel_pos.start..][0..rel_pos.len]; -} - -fn isPersonalityFunction(record_id: usize, rel: macho.relocation_info) bool { - const base_offset = @as(i32, @intCast(record_id * @sizeOf(macho.compact_unwind_entry))); - const rel_offset = rel.r_address - base_offset; - return rel_offset == 16; -} - -pub fn getPersonalityFunctionReloc( - macho_file: *MachO, - object_id: u32, - record_id: usize, -) ?macho.relocation_info { - const relocs = getRelocs(macho_file, object_id, record_id); - for (relocs) |rel| { - if (isPersonalityFunction(record_id, rel)) return rel; - } - return null; -} - -fn getPersonalityFunction(info: UnwindInfo, global_index: SymbolWithLoc) ?u2 { +fn getOrPutPersonalityFunction(info: *UnwindInfo, sym_index: Symbol.Index) error{TooManyPersonalities}!u2 { comptime var index: u2 = 0; inline while (index < max_personalities) : (index += 1) { - if (index >= info.personalities_count) return null; - if (info.personalities[index].eql(global_index)) { + if (info.personalities[index] == sym_index) { + return index; + } else if (index == info.personalities_count) { + info.personalities[index] = sym_index; + info.personalities_count += 1; return index; } } - return null; + return error.TooManyPersonalities; } -fn isLsda(record_id: usize, rel: macho.relocation_info) bool { - const base_offset = @as(i32, @intCast(record_id * @sizeOf(macho.compact_unwind_entry))); - const rel_offset = rel.r_address - base_offset; - return rel_offset == 24; -} - -pub fn getLsdaReloc(macho_file: *MachO, object_id: u32, record_id: usize) ?macho.relocation_info { - const relocs = getRelocs(macho_file, object_id, record_id); - for (relocs) |rel| { - if (isLsda(record_id, rel)) return rel; - } - return null; -} - -pub fn isNull(rec: macho.compact_unwind_entry) bool { - return rec.rangeStart == 0 and - rec.rangeLength == 0 and - rec.compactUnwindEncoding == 0 and - rec.lsda == 0 and - rec.personalityFunction == 0; -} - -inline fn nullRecord() macho.compact_unwind_entry { - return .{ - .rangeStart = 0, - .rangeLength = 0, - .compactUnwindEncoding = 0, - .personalityFunction = 0, - .lsda = 0, - }; -} - -fn appendCommonEncoding(info: *UnwindInfo, enc: macho.compact_unwind_encoding_t) void { +fn appendCommonEncoding(info: *UnwindInfo, enc: Encoding) void { assert(info.common_encodings_count <= max_common_encodings); info.common_encodings[info.common_encodings_count] = enc; info.common_encodings_count += 1; } -fn getCommonEncoding(info: UnwindInfo, enc: macho.compact_unwind_encoding_t) ?u7 { +fn getCommonEncoding(info: UnwindInfo, enc: Encoding) ?u7 { comptime var index: u7 = 0; inline while (index < max_common_encodings) : (index += 1) { if (index >= info.common_encodings_count) return null; - if (info.common_encodings[index] == enc) { + if (info.common_encodings[index].eql(enc)) { return index; } } return null; } -pub const UnwindEncoding = struct { - pub fn getMode(enc: macho.compact_unwind_encoding_t) u4 { +pub const Encoding = extern struct { + enc: macho.compact_unwind_encoding_t, + + pub fn getMode(enc: Encoding) u4 { comptime assert(macho.UNWIND_ARM64_MODE_MASK == macho.UNWIND_X86_64_MODE_MASK); - return @as(u4, @truncate((enc & macho.UNWIND_ARM64_MODE_MASK) >> 24)); + return @as(u4, @truncate((enc.enc & macho.UNWIND_ARM64_MODE_MASK) >> 24)); } - pub fn isDwarf(enc: macho.compact_unwind_encoding_t, cpu_arch: std.Target.Cpu.Arch) bool { - const mode = getMode(enc); - return switch (cpu_arch) { + pub fn isDwarf(enc: Encoding, macho_file: *MachO) bool { + const mode = enc.getMode(); + return switch (macho_file.options.cpu_arch.?) { .aarch64 => @as(macho.UNWIND_ARM64_MODE, @enumFromInt(mode)) == .DWARF, .x86_64 => @as(macho.UNWIND_X86_64_MODE, @enumFromInt(mode)) == .DWARF, else => unreachable, }; } - pub fn setMode(enc: *macho.compact_unwind_encoding_t, mode: anytype) void { - enc.* |= @as(u32, @intCast(@intFromEnum(mode))) << 24; + pub fn setMode(enc: *Encoding, mode: anytype) void { + enc.enc |= @as(u32, @intCast(@intFromEnum(mode))) << 24; } - pub fn hasLsda(enc: macho.compact_unwind_encoding_t) bool { - const has_lsda = @as(u1, @truncate((enc & macho.UNWIND_HAS_LSDA) >> 31)); + pub fn hasLsda(enc: Encoding) bool { + const has_lsda = @as(u1, @truncate((enc.enc & macho.UNWIND_HAS_LSDA) >> 31)); return has_lsda == 1; } - pub fn setHasLsda(enc: *macho.compact_unwind_encoding_t, has_lsda: bool) void { + pub fn setHasLsda(enc: *Encoding, has_lsda: bool) void { const mask = @as(u32, @intCast(@intFromBool(has_lsda))) << 31; - enc.* |= mask; + enc.enc |= mask; } - pub fn getPersonalityIndex(enc: macho.compact_unwind_encoding_t) u2 { - const index = @as(u2, @truncate((enc & macho.UNWIND_PERSONALITY_MASK) >> 28)); + pub fn getPersonalityIndex(enc: Encoding) u2 { + const index = @as(u2, @truncate((enc.enc & macho.UNWIND_PERSONALITY_MASK) >> 28)); return index; } - pub fn setPersonalityIndex(enc: *macho.compact_unwind_encoding_t, index: u2) void { + pub fn setPersonalityIndex(enc: *Encoding, index: u2) void { const mask = @as(u32, @intCast(index)) << 28; - enc.* |= mask; + enc.enc |= mask; } - pub fn getDwarfSectionOffset(enc: macho.compact_unwind_encoding_t, cpu_arch: std.Target.Cpu.Arch) u24 { - assert(isDwarf(enc, cpu_arch)); - const offset = @as(u24, @truncate(enc)); + pub fn getDwarfSectionOffset(enc: Encoding) u24 { + const offset = @as(u24, @truncate(enc.enc)); return offset; } - pub fn setDwarfSectionOffset(enc: *macho.compact_unwind_encoding_t, cpu_arch: std.Target.Cpu.Arch, offset: u24) void { - assert(isDwarf(enc.*, cpu_arch)); - enc.* |= offset; + pub fn setDwarfSectionOffset(enc: *Encoding, offset: u24) void { + enc.enc |= offset; + } + + pub fn eql(enc: Encoding, other: Encoding) bool { + return enc.enc == other.enc; + } + + pub fn format( + enc: Encoding, + comptime unused_fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, + ) !void { + _ = unused_fmt_string; + _ = options; + try writer.print("0x{x:0>8}", .{enc.enc}); } }; -const UnwindInfo = @This(); +pub const Record = struct { + length: u32 = 0, + enc: Encoding = .{ .enc = 0 }, + atom: Atom.Index = 0, + atom_offset: u32 = 0, + lsda: Atom.Index = 0, + lsda_offset: u32 = 0, + personality: ?Symbol.Index = null, // TODO make this zero-is-null + fde: Fde.Index = 0, // TODO actually make FDE at 0 an invalid FDE + file: File.Index = 0, + alive: bool = true, + + pub fn getObject(rec: Record, macho_file: *MachO) *Object { + return macho_file.getFile(rec.file).?.object; + } + + pub fn getAtom(rec: Record, macho_file: *MachO) *Atom { + return macho_file.getAtom(rec.atom).?; + } + + pub fn getLsdaAtom(rec: Record, macho_file: *MachO) ?*Atom { + return macho_file.getAtom(rec.lsda); + } + + pub fn getPersonality(rec: Record, macho_file: *MachO) ?*Symbol { + const personality = rec.personality orelse return null; + return macho_file.getSymbol(personality); + } + + pub fn getFde(rec: Record, macho_file: *MachO) ?Fde { + if (!rec.enc.isDwarf(macho_file)) return null; + return rec.getObject(macho_file).fdes.items[rec.fde]; + } + + pub fn getFdePtr(rec: Record, macho_file: *MachO) ?*Fde { + if (!rec.enc.isDwarf(macho_file)) return null; + return &rec.getObject(macho_file).fdes.items[rec.fde]; + } + + pub fn getAtomAddress(rec: Record, macho_file: *MachO) u64 { + const atom = rec.getAtom(macho_file); + return atom.value + rec.atom_offset; + } + + pub fn getLsdaAddress(rec: Record, macho_file: *MachO) u64 { + const lsda = rec.getLsdaAtom(macho_file) orelse return 0; + return lsda.value + rec.lsda_offset; + } + + pub fn format( + rec: Record, + comptime unused_fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, + ) !void { + _ = rec; + _ = unused_fmt_string; + _ = options; + _ = writer; + @compileError("do not format UnwindInfo.Records directly"); + } + + pub fn fmt(rec: Record, macho_file: *MachO) std.fmt.Formatter(format2) { + return .{ .data = .{ + .rec = rec, + .macho_file = macho_file, + } }; + } + + const FormatContext = struct { + rec: Record, + macho_file: *MachO, + }; + + fn format2( + ctx: FormatContext, + comptime unused_fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, + ) !void { + _ = unused_fmt_string; + _ = options; + const rec = ctx.rec; + const macho_file = ctx.macho_file; + try writer.print("{x} : len({x})", .{ + rec.enc.enc, rec.length, + }); + if (rec.enc.isDwarf(macho_file)) try writer.print(" : fde({d})", .{rec.fde}); + try writer.print(" : {s}", .{rec.getAtom(macho_file).getName(macho_file)}); + if (!rec.alive) try writer.writeAll(" : [*]"); + } + + pub const Index = u32; +}; + +const max_personalities = 3; +const max_common_encodings = 127; +const max_compact_encodings = 256; + +const second_level_page_bytes = 0x1000; +const second_level_page_words = second_level_page_bytes / @sizeOf(u32); + +const max_regular_second_level_entries = + (second_level_page_bytes - @sizeOf(macho.unwind_info_regular_second_level_page_header)) / + @sizeOf(macho.unwind_info_regular_second_level_entry); + +const max_compressed_second_level_entries = + (second_level_page_bytes - @sizeOf(macho.unwind_info_compressed_second_level_page_header)) / + @sizeOf(u32); + +const compressed_entry_func_offset_mask = ~@as(u24, 0); + +const Page = struct { + kind: enum { regular, compressed }, + start: u32, + count: u16, + page_encodings: [max_compact_encodings]Encoding = undefined, + page_encodings_count: u9 = 0, + + fn appendPageEncoding(page: *Page, enc: Encoding) void { + assert(page.page_encodings_count <= max_compact_encodings); + page.page_encodings[page.page_encodings_count] = enc; + page.page_encodings_count += 1; + } + + fn getPageEncoding(page: Page, enc: Encoding) ?u8 { + comptime var index: u9 = 0; + inline while (index < max_compact_encodings) : (index += 1) { + if (index >= page.page_encodings_count) return null; + if (page.page_encodings[index].eql(enc)) { + return @as(u8, @intCast(index)); + } + } + return null; + } + + fn format( + page: *const Page, + comptime unused_format_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, + ) !void { + _ = page; + _ = unused_format_string; + _ = options; + _ = writer; + @compileError("do not format Page directly; use page.fmt()"); + } + + const FormatPageContext = struct { + page: Page, + info: UnwindInfo, + }; + + fn format2( + ctx: FormatPageContext, + comptime unused_format_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, + ) @TypeOf(writer).Error!void { + _ = options; + _ = unused_format_string; + try writer.writeAll("Page:\n"); + try writer.print(" kind: {s}\n", .{@tagName(ctx.page.kind)}); + try writer.print(" entries: {d} - {d}\n", .{ + ctx.page.start, + ctx.page.start + ctx.page.count, + }); + try writer.print(" encodings (count = {d})\n", .{ctx.page.page_encodings_count}); + for (ctx.page.page_encodings[0..ctx.page.page_encodings_count], 0..) |enc, i| { + try writer.print(" {d}: {}\n", .{ ctx.info.common_encodings_count + i, enc }); + } + } + + fn fmt(page: Page, info: UnwindInfo) std.fmt.Formatter(format2) { + return .{ .data = .{ + .page = page, + .info = info, + } }; + } + + fn write(page: Page, info: UnwindInfo, macho_file: *MachO, writer: anytype) !void { + const seg = macho_file.getTextSegment(); + + switch (page.kind) { + .regular => { + try writer.writeStruct(macho.unwind_info_regular_second_level_page_header{ + .entryPageOffset = @sizeOf(macho.unwind_info_regular_second_level_page_header), + .entryCount = page.count, + }); + + for (info.records.items[page.start..][0..page.count]) |index| { + const rec = macho_file.getUnwindRecord(index); + try writer.writeStruct(macho.unwind_info_regular_second_level_entry{ + .functionOffset = @as(u32, @intCast(rec.getAtomAddress(macho_file) - seg.vmaddr)), + .encoding = rec.enc.enc, + }); + } + }, + .compressed => { + const entry_offset = @sizeOf(macho.unwind_info_compressed_second_level_page_header) + + @as(u16, @intCast(page.page_encodings_count)) * @sizeOf(u32); + try writer.writeStruct(macho.unwind_info_compressed_second_level_page_header{ + .entryPageOffset = entry_offset, + .entryCount = page.count, + .encodingsPageOffset = @sizeOf(macho.unwind_info_compressed_second_level_page_header), + .encodingsCount = page.page_encodings_count, + }); + + for (page.page_encodings[0..page.page_encodings_count]) |enc| { + try writer.writeInt(u32, enc.enc, .little); + } + + assert(page.count > 0); + const first_rec = macho_file.getUnwindRecord(info.records.items[page.start]); + for (info.records.items[page.start..][0..page.count]) |index| { + const rec = macho_file.getUnwindRecord(index); + const enc_index = blk: { + if (info.getCommonEncoding(rec.enc)) |id| break :blk id; + const ncommon = info.common_encodings_count; + break :blk ncommon + page.getPageEncoding(rec.enc).?; + }; + const compressed = macho.UnwindInfoCompressedEntry{ + .funcOffset = @as(u24, @intCast(rec.getAtomAddress(macho_file) - first_rec.getAtomAddress(macho_file))), + .encodingIndex = @as(u8, @intCast(enc_index)), + }; + try writer.writeStruct(compressed); + } + }, + } + } +}; const std = @import("std"); const assert = std.debug.assert; const eh_frame = @import("eh_frame.zig"); const fs = std.fs; const leb = std.leb; -const log = std.log.scoped(.unwind_info); +const log = std.log.scoped(.link); const macho = std.macho; const math = std.math; const mem = std.mem; -const trace = @import("../../tracy.zig").trace; +const trace = @import("../tracy.zig").trace; const Allocator = mem.Allocator; const Atom = @import("Atom.zig"); -const EhFrameRecord = eh_frame.EhFrameRecord; +const Fde = eh_frame.Fde; +const File = @import("file.zig").File; const MachO = @import("../MachO.zig"); const Object = @import("Object.zig"); -const SymbolWithLoc = MachO.SymbolWithLoc; +const Symbol = @import("Symbol.zig"); +const UnwindInfo = @This(); diff --git a/src/link/MachO/dead_strip.zig b/src/link/MachO/dead_strip.zig index fe3740e826..8d2dba53c6 100644 --- a/src/link/MachO/dead_strip.zig +++ b/src/link/MachO/dead_strip.zig @@ -1,495 +1,204 @@ -//! An algorithm for dead stripping of unreferenced Atoms. - pub fn gcAtoms(macho_file: *MachO) !void { - const comp = macho_file.base.comp; - const gpa = comp.gpa; + const gpa = macho_file.base.allocator; - var arena = std.heap.ArenaAllocator.init(gpa); - defer arena.deinit(); + var objects = try std.ArrayList(File.Index).initCapacity(gpa, macho_file.objects.items.len + 1); + defer objects.deinit(); + for (macho_file.objects.items) |index| objects.appendAssumeCapacity(index); + if (macho_file.internal_object_index) |index| objects.appendAssumeCapacity(index); - var roots = AtomTable.init(arena.allocator()); - try roots.ensureUnusedCapacity(@as(u32, @intCast(macho_file.globals.items.len))); + var roots = std.ArrayList(*Atom).init(gpa); + defer roots.deinit(); - var alive = AtomTable.init(arena.allocator()); - try alive.ensureTotalCapacity(@as(u32, @intCast(macho_file.atoms.items.len))); - - try collectRoots(macho_file, &roots); - mark(macho_file, roots, &alive); - prune(macho_file, alive); + try collectRoots(&roots, objects.items, macho_file); + mark(roots.items, objects.items, macho_file); + prune(objects.items, macho_file); } -fn addRoot(macho_file: *MachO, roots: *AtomTable, file: u32, sym_loc: SymbolWithLoc) !void { - const sym = macho_file.getSymbol(sym_loc); - assert(!sym.undf()); - const object = &macho_file.objects.items[file]; - const atom_index = object.getAtomIndexForSymbol(sym_loc.sym_index).?; // panic here means fatal error - log.debug("root(ATOM({d}, %{d}, {d}))", .{ - atom_index, - macho_file.getAtom(atom_index).sym_index, - file, - }); - _ = try roots.getOrPut(atom_index); -} +fn collectRoots(roots: *std.ArrayList(*Atom), objects: []const File.Index, macho_file: *MachO) !void { + for (objects) |index| { + const object = macho_file.getFile(index).?; + for (object.getSymbols()) |sym_index| { + const sym = macho_file.getSymbol(sym_index); + const file = sym.getFile(macho_file) orelse continue; + if (file.getIndex() != index) continue; + if (sym.flags.no_dead_strip or (macho_file.options.dylib and sym.visibility == .global)) + try markSymbol(sym, roots, macho_file); + } -fn collectRoots(macho_file: *MachO, roots: *AtomTable) !void { - log.debug("collecting roots", .{}); + for (object.getAtoms()) |atom_index| { + const atom = macho_file.getAtom(atom_index).?; + const isec = atom.getInputSection(macho_file); + switch (isec.type()) { + macho.S_MOD_INIT_FUNC_POINTERS, + macho.S_MOD_TERM_FUNC_POINTERS, + => if (markAtom(atom)) try roots.append(atom), - const comp = macho_file.base.comp; - - switch (comp.config.output_mode) { - .Exe => { - // Add entrypoint as GC root - if (macho_file.getEntryPoint()) |global| { - if (global.getFile()) |file| { - try addRoot(macho_file, roots, file, global); - } else { - assert(macho_file.getSymbol(global).undf()); // Stub as our entrypoint is in a dylib. - } + else => if (isec.isDontDeadStrip() and markAtom(atom)) { + try roots.append(atom); + }, } - }, - else => |other| { - assert(other == .Lib); - // Add exports as GC roots - for (macho_file.globals.items) |global| { - const sym = macho_file.getSymbol(global); - if (sym.undf()) continue; - if (sym.n_desc == MachO.N_BOUNDARY) continue; - - if (global.getFile()) |file| { - try addRoot(macho_file, roots, file, global); - } - } - }, + } } - // Add all symbols force-defined by the user. - for (comp.force_undefined_symbols.keys()) |sym_name| { - const global_index = macho_file.resolver.get(sym_name).?; - const global = macho_file.globals.items[global_index]; - const sym = macho_file.getSymbol(global); - assert(!sym.undf()); - try addRoot(macho_file, roots, global.getFile().?, global); + for (macho_file.objects.items) |index| { + for (macho_file.getFile(index).?.object.unwind_records.items) |cu_index| { + const cu = macho_file.getUnwindRecord(cu_index); + if (!cu.alive) continue; + if (cu.getFde(macho_file)) |fde| { + if (fde.getCie(macho_file).getPersonality(macho_file)) |sym| try markSymbol(sym, roots, macho_file); + } else if (cu.getPersonality(macho_file)) |sym| try markSymbol(sym, roots, macho_file); + } } - for (macho_file.objects.items) |object| { - const has_subsections = object.header.flags & macho.MH_SUBSECTIONS_VIA_SYMBOLS != 0; + for (macho_file.undefined_symbols.items) |sym_index| { + const sym = macho_file.getSymbol(sym_index); + try markSymbol(sym, roots, macho_file); + } - for (object.atoms.items) |atom_index| { - const is_gc_root = blk: { - // Modelled after ld64 which treats each object file compiled without MH_SUBSECTIONS_VIA_SYMBOLS - // as a root. - if (!has_subsections) break :blk true; - - const atom = macho_file.getAtom(atom_index); - const sect_id = if (object.getSourceSymbol(atom.sym_index)) |source_sym| - source_sym.n_sect - 1 - else sect_id: { - const nbase = @as(u32, @intCast(object.in_symtab.?.len)); - const sect_id = @as(u8, @intCast(atom.sym_index - nbase)); - break :sect_id sect_id; - }; - const source_sect = object.getSourceSection(sect_id); - if (source_sect.isDontDeadStrip()) break :blk true; - switch (source_sect.type()) { - macho.S_MOD_INIT_FUNC_POINTERS, - macho.S_MOD_TERM_FUNC_POINTERS, - => break :blk true, - else => break :blk false, - } - }; - - if (is_gc_root) { - _ = try roots.getOrPut(atom_index); - - log.debug("root(ATOM({d}, %{d}, {?d}))", .{ - atom_index, - macho_file.getAtom(atom_index).sym_index, - macho_file.getAtom(atom_index).getFile(), - }); - } + for (&[_]?Symbol.Index{ + macho_file.entry_index, + macho_file.dyld_stub_binder_index, + macho_file.objc_msg_send_index, + }) |index| { + if (index) |idx| { + const sym = macho_file.getSymbol(idx); + try markSymbol(sym, roots, macho_file); } } } -fn markLive(macho_file: *MachO, atom_index: Atom.Index, alive: *AtomTable) void { - if (alive.contains(atom_index)) return; - - const atom = macho_file.getAtom(atom_index); - const sym_loc = atom.getSymbolWithLoc(); - - log.debug("mark(ATOM({d}, %{d}, {?d}))", .{ atom_index, sym_loc.sym_index, sym_loc.getFile() }); - - alive.putAssumeCapacityNoClobber(atom_index, {}); - - const target = macho_file.base.comp.root_mod.resolved_target.result; - const cpu_arch = target.cpu.arch; - - const sym = macho_file.getSymbol(atom.getSymbolWithLoc()); - const header = macho_file.sections.items(.header)[sym.n_sect - 1]; - if (header.isZerofill()) return; - - const code = Atom.getAtomCode(macho_file, atom_index); - const relocs = Atom.getAtomRelocs(macho_file, atom_index); - const ctx = Atom.getRelocContext(macho_file, atom_index); - - for (relocs) |rel| { - const reloc_target = switch (cpu_arch) { - .aarch64 => switch (@as(macho.reloc_type_arm64, @enumFromInt(rel.r_type))) { - .ARM64_RELOC_ADDEND => continue, - else => Atom.parseRelocTarget(macho_file, .{ - .object_id = atom.getFile().?, - .rel = rel, - .code = code, - .base_offset = ctx.base_offset, - .base_addr = ctx.base_addr, - }), - }, - .x86_64 => Atom.parseRelocTarget(macho_file, .{ - .object_id = atom.getFile().?, - .rel = rel, - .code = code, - .base_offset = ctx.base_offset, - .base_addr = ctx.base_addr, - }), - else => unreachable, - }; - const target_sym = macho_file.getSymbol(reloc_target); - - if (target_sym.undf()) continue; - if (reloc_target.getFile() == null) { - const target_sym_name = macho_file.getSymbolName(reloc_target); - if (mem.eql(u8, "__mh_execute_header", target_sym_name)) continue; - if (mem.eql(u8, "___dso_handle", target_sym_name)) continue; - - unreachable; // referenced symbol not found - } - - const object = macho_file.objects.items[reloc_target.getFile().?]; - const target_atom_index = object.getAtomIndexForSymbol(reloc_target.sym_index).?; - log.debug(" following ATOM({d}, %{d}, {?d})", .{ - target_atom_index, - macho_file.getAtom(target_atom_index).sym_index, - macho_file.getAtom(target_atom_index).getFile(), - }); - - markLive(macho_file, target_atom_index, alive); - } +fn markSymbol(sym: *Symbol, roots: *std.ArrayList(*Atom), macho_file: *MachO) !void { + const atom = sym.getAtom(macho_file) orelse return; + if (markAtom(atom)) try roots.append(atom); } -fn refersLive(macho_file: *MachO, atom_index: Atom.Index, alive: AtomTable) bool { - const atom = macho_file.getAtom(atom_index); - const sym_loc = atom.getSymbolWithLoc(); - - log.debug("refersLive(ATOM({d}, %{d}, {?d}))", .{ atom_index, sym_loc.sym_index, sym_loc.getFile() }); - - const target = macho_file.base.comp.root_mod.resolved_target.result; - const cpu_arch = target.cpu.arch; - - const sym = macho_file.getSymbol(sym_loc); - const header = macho_file.sections.items(.header)[sym.n_sect - 1]; - assert(!header.isZerofill()); - - const code = Atom.getAtomCode(macho_file, atom_index); - const relocs = Atom.getAtomRelocs(macho_file, atom_index); - const ctx = Atom.getRelocContext(macho_file, atom_index); - - for (relocs) |rel| { - const reloc_target = switch (cpu_arch) { - .aarch64 => switch (@as(macho.reloc_type_arm64, @enumFromInt(rel.r_type))) { - .ARM64_RELOC_ADDEND => continue, - else => Atom.parseRelocTarget(macho_file, .{ - .object_id = atom.getFile().?, - .rel = rel, - .code = code, - .base_offset = ctx.base_offset, - .base_addr = ctx.base_addr, - }), - }, - .x86_64 => Atom.parseRelocTarget(macho_file, .{ - .object_id = atom.getFile().?, - .rel = rel, - .code = code, - .base_offset = ctx.base_offset, - .base_addr = ctx.base_addr, - }), - else => unreachable, - }; - - const object = macho_file.objects.items[reloc_target.getFile().?]; - const target_atom_index = object.getAtomIndexForSymbol(reloc_target.sym_index) orelse { - log.debug("atom for symbol '{s}' not found; skipping...", .{macho_file.getSymbolName(reloc_target)}); - continue; - }; - if (alive.contains(target_atom_index)) { - log.debug(" refers live ATOM({d}, %{d}, {?d})", .{ - target_atom_index, - macho_file.getAtom(target_atom_index).sym_index, - macho_file.getAtom(target_atom_index).getFile(), - }); - return true; - } - } - - return false; +fn markAtom(atom: *Atom) bool { + const already_visited = atom.flags.visited; + atom.flags.visited = true; + return atom.flags.alive and !already_visited; } -fn mark(macho_file: *MachO, roots: AtomTable, alive: *AtomTable) void { - var it = roots.keyIterator(); - while (it.next()) |root| { - markLive(macho_file, root.*, alive); +fn mark(roots: []*Atom, objects: []const File.Index, macho_file: *MachO) void { + for (roots) |root| { + markLive(root, macho_file); } var loop: bool = true; while (loop) { loop = false; - for (macho_file.objects.items) |object| { - for (object.atoms.items) |atom_index| { - if (alive.contains(atom_index)) continue; - - const atom = macho_file.getAtom(atom_index); - const sect_id = if (object.getSourceSymbol(atom.sym_index)) |source_sym| - source_sym.n_sect - 1 - else blk: { - const nbase = @as(u32, @intCast(object.in_symtab.?.len)); - const sect_id = @as(u8, @intCast(atom.sym_index - nbase)); - break :blk sect_id; - }; - const source_sect = object.getSourceSection(sect_id); - - if (source_sect.isDontDeadStripIfReferencesLive()) { - if (refersLive(macho_file, atom_index, alive.*)) { - markLive(macho_file, atom_index, alive); - loop = true; - } - } - } - } - } - - for (macho_file.objects.items, 0..) |_, object_id| { - // Traverse unwind and eh_frame records noting if the source symbol has been marked, and if so, - // marking all references as live. - markUnwindRecords(macho_file, @as(u32, @intCast(object_id)), alive); - } -} - -fn markUnwindRecords(macho_file: *MachO, object_id: u32, alive: *AtomTable) void { - const object = &macho_file.objects.items[object_id]; - const target = macho_file.base.comp.root_mod.resolved_target.result; - const cpu_arch = target.cpu.arch; - - const unwind_records = object.getUnwindRecords(); - - for (object.exec_atoms.items) |atom_index| { - var inner_syms_it = Atom.getInnerSymbolsIterator(macho_file, atom_index); - - if (!object.hasUnwindRecords()) { - if (alive.contains(atom_index)) { - // Mark references live and continue. - markEhFrameRecords(macho_file, object_id, atom_index, alive); - } else { - while (inner_syms_it.next()) |sym| { - if (object.eh_frame_records_lookup.get(sym)) |fde_offset| { - // Mark dead and continue. - object.eh_frame_relocs_lookup.getPtr(fde_offset).?.dead = true; - } - } - } - continue; - } - - while (inner_syms_it.next()) |sym| { - const record_id = object.unwind_records_lookup.get(sym) orelse continue; - if (object.unwind_relocs_lookup[record_id].dead) continue; // already marked, nothing to do - if (!alive.contains(atom_index)) { - // Mark the record dead and continue. - object.unwind_relocs_lookup[record_id].dead = true; - if (object.eh_frame_records_lookup.get(sym)) |fde_offset| { - object.eh_frame_relocs_lookup.getPtr(fde_offset).?.dead = true; - } - continue; - } - - const record = unwind_records[record_id]; - if (UnwindInfo.UnwindEncoding.isDwarf(record.compactUnwindEncoding, cpu_arch)) { - markEhFrameRecords(macho_file, object_id, atom_index, alive); - } else { - if (UnwindInfo.getPersonalityFunctionReloc(macho_file, object_id, record_id)) |rel| { - const reloc_target = Atom.parseRelocTarget(macho_file, .{ - .object_id = object_id, - .rel = rel, - .code = mem.asBytes(&record), - .base_offset = @as(i32, @intCast(record_id * @sizeOf(macho.compact_unwind_entry))), - }); - const target_sym = macho_file.getSymbol(reloc_target); - if (!target_sym.undf()) { - const target_object = macho_file.objects.items[reloc_target.getFile().?]; - const target_atom_index = target_object.getAtomIndexForSymbol(reloc_target.sym_index).?; - markLive(macho_file, target_atom_index, alive); - } - } - - if (UnwindInfo.getLsdaReloc(macho_file, object_id, record_id)) |rel| { - const reloc_target = Atom.parseRelocTarget(macho_file, .{ - .object_id = object_id, - .rel = rel, - .code = mem.asBytes(&record), - .base_offset = @as(i32, @intCast(record_id * @sizeOf(macho.compact_unwind_entry))), - }); - const target_object = macho_file.objects.items[reloc_target.getFile().?]; - const target_atom_index = target_object.getAtomIndexForSymbol(reloc_target.sym_index).?; - markLive(macho_file, target_atom_index, alive); + for (objects) |index| { + for (macho_file.getFile(index).?.getAtoms()) |atom_index| { + const atom = macho_file.getAtom(atom_index).?; + const isec = atom.getInputSection(macho_file); + if (isec.isDontDeadStripIfReferencesLive() and !atom.flags.alive and refersLive(atom, macho_file)) { + markLive(atom, macho_file); + loop = true; } } } } } -fn markEhFrameRecords(macho_file: *MachO, object_id: u32, atom_index: Atom.Index, alive: *AtomTable) void { - const target = macho_file.base.comp.root_mod.resolved_target.result; - const cpu_arch = target.cpu.arch; - const object = &macho_file.objects.items[object_id]; - var it = object.getEhFrameRecordsIterator(); - var inner_syms_it = Atom.getInnerSymbolsIterator(macho_file, atom_index); +fn markLive(atom: *Atom, macho_file: *MachO) void { + assert(atom.flags.visited); + atom.flags.alive = true; + track_live_log.debug("{}marking live atom({d},{s})", .{ + track_live_level, + atom.atom_index, + atom.getName(macho_file), + }); - while (inner_syms_it.next()) |sym| { - const fde_offset = object.eh_frame_records_lookup.get(sym) orelse continue; // Continue in case we hit a temp symbol alias - it.seekTo(fde_offset); - const fde = (it.next() catch continue).?; // We don't care about the error at this point since it was already handled + if (build_options.enable_logging) + track_live_level.incr(); - const cie_ptr = fde.getCiePointerSource(object_id, macho_file, fde_offset); - const cie_offset = fde_offset + 4 - cie_ptr; - it.seekTo(cie_offset); - const cie = (it.next() catch continue).?; // We don't care about the error at this point since it was already handled - - switch (cpu_arch) { - .aarch64 => { - // Mark FDE references which should include any referenced LSDA record - const relocs = eh_frame.getRelocs(macho_file, object_id, fde_offset); - for (relocs) |rel| { - const reloc_target = Atom.parseRelocTarget(macho_file, .{ - .object_id = object_id, - .rel = rel, - .code = fde.data, - .base_offset = @as(i32, @intCast(fde_offset)) + 4, - }); - const target_sym = macho_file.getSymbol(reloc_target); - if (!target_sym.undf()) blk: { - const target_object = macho_file.objects.items[reloc_target.getFile().?]; - const target_atom_index = target_object.getAtomIndexForSymbol(reloc_target.sym_index) orelse - break :blk; - markLive(macho_file, target_atom_index, alive); - } - } - }, - .x86_64 => { - const sect = object.getSourceSection(object.eh_frame_sect_id.?); - const lsda_ptr = fde.getLsdaPointer(cie, .{ - .base_addr = sect.addr, - .base_offset = fde_offset, - }) catch continue; // We don't care about the error at this point since it was already handled - if (lsda_ptr) |lsda_address| { - // Mark LSDA record as live - const sym_index = object.getSymbolByAddress(lsda_address, null); - const target_atom_index = object.getAtomIndexForSymbol(sym_index).?; - markLive(macho_file, target_atom_index, alive); - } - }, - else => unreachable, + for (atom.getRelocs(macho_file)) |rel| { + const target_atom = switch (rel.tag) { + .local => rel.getTargetAtom(macho_file), + .@"extern" => rel.getTargetSymbol(macho_file).getAtom(macho_file), + }; + if (target_atom) |ta| { + if (markAtom(ta)) markLive(ta, macho_file); } + } - // Mark CIE references which should include any referenced personalities - // that are defined locally. - if (cie.getPersonalityPointerReloc(macho_file, object_id, cie_offset)) |reloc_target| { - const target_sym = macho_file.getSymbol(reloc_target); - if (!target_sym.undf()) { - const target_object = macho_file.objects.items[reloc_target.getFile().?]; - const target_atom_index = target_object.getAtomIndexForSymbol(reloc_target.sym_index).?; - markLive(macho_file, target_atom_index, alive); + for (atom.getUnwindRecords(macho_file)) |cu_index| { + const cu = macho_file.getUnwindRecord(cu_index); + const cu_atom = cu.getAtom(macho_file); + if (markAtom(cu_atom)) markLive(cu_atom, macho_file); + + if (cu.getLsdaAtom(macho_file)) |lsda| { + if (markAtom(lsda)) markLive(lsda, macho_file); + } + if (cu.getFde(macho_file)) |fde| { + const fde_atom = fde.getAtom(macho_file); + if (markAtom(fde_atom)) markLive(fde_atom, macho_file); + + if (fde.getLsdaAtom(macho_file)) |lsda| { + if (markAtom(lsda)) markLive(lsda, macho_file); } } } } -fn prune(macho_file: *MachO, alive: AtomTable) void { - log.debug("pruning dead atoms", .{}); - for (macho_file.objects.items) |*object| { - var i: usize = 0; - while (i < object.atoms.items.len) { - const atom_index = object.atoms.items[i]; - if (alive.contains(atom_index)) { - i += 1; - continue; - } +fn refersLive(atom: *Atom, macho_file: *MachO) bool { + for (atom.getRelocs(macho_file)) |rel| { + const target_atom = switch (rel.tag) { + .local => rel.getTargetAtom(macho_file), + .@"extern" => rel.getTargetSymbol(macho_file).getAtom(macho_file), + }; + if (target_atom) |ta| { + if (ta.flags.alive) return true; + } + } + return false; +} - const atom = macho_file.getAtom(atom_index); - const sym_loc = atom.getSymbolWithLoc(); - - log.debug("prune(ATOM({d}, %{d}, {?d}))", .{ - atom_index, - sym_loc.sym_index, - sym_loc.getFile(), - }); - log.debug(" {s} in {s}", .{ macho_file.getSymbolName(sym_loc), object.name }); - - const sym = macho_file.getSymbolPtr(sym_loc); - const sect_id = sym.n_sect - 1; - var section = macho_file.sections.get(sect_id); - section.header.size -= atom.size; - - if (atom.prev_index) |prev_index| { - const prev = macho_file.getAtomPtr(prev_index); - prev.next_index = atom.next_index; - } else { - if (atom.next_index) |next_index| { - section.first_atom_index = next_index; - } - } - if (atom.next_index) |next_index| { - const next = macho_file.getAtomPtr(next_index); - next.prev_index = atom.prev_index; - } else { - if (atom.prev_index) |prev_index| { - section.last_atom_index = prev_index; - } else { - assert(section.header.size == 0); - section.first_atom_index = null; - section.last_atom_index = null; - } - } - - macho_file.sections.set(sect_id, section); - _ = object.atoms.swapRemove(i); - - sym.n_desc = MachO.N_DEAD; - - var inner_sym_it = Atom.getInnerSymbolsIterator(macho_file, atom_index); - while (inner_sym_it.next()) |inner| { - const inner_sym = macho_file.getSymbolPtr(inner); - inner_sym.n_desc = MachO.N_DEAD; - } - - if (Atom.getSectionAlias(macho_file, atom_index)) |alias| { - const alias_sym = macho_file.getSymbolPtr(alias); - alias_sym.n_desc = MachO.N_DEAD; +fn prune(objects: []const File.Index, macho_file: *MachO) void { + for (objects) |index| { + for (macho_file.getFile(index).?.getAtoms()) |atom_index| { + const atom = macho_file.getAtom(atom_index).?; + if (atom.flags.alive and !atom.flags.visited) { + atom.flags.alive = false; + atom.markUnwindRecordsDead(macho_file); } } } } -const std = @import("std"); +const Level = struct { + value: usize = 0, + + fn incr(self: *@This()) void { + self.value += 1; + } + + pub fn format( + self: *const @This(), + comptime unused_fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, + ) !void { + _ = unused_fmt_string; + _ = options; + try writer.writeByteNTimes(' ', self.value); + } +}; + +var track_live_level: Level = .{}; + const assert = std.debug.assert; -const eh_frame = @import("eh_frame.zig"); +const build_options = @import("build_options"); const log = std.log.scoped(.dead_strip); const macho = std.macho; const math = std.math; const mem = std.mem; +const trace = @import("../tracy.zig").trace; +const track_live_log = std.log.scoped(.dead_strip_track_live); +const std = @import("std"); const Allocator = mem.Allocator; const Atom = @import("Atom.zig"); +const File = @import("file.zig").File; const MachO = @import("../MachO.zig"); -const SymbolWithLoc = MachO.SymbolWithLoc; -const UnwindInfo = @import("UnwindInfo.zig"); - -const AtomTable = std.AutoHashMap(Atom.Index, void); +const Symbol = @import("Symbol.zig"); diff --git a/src/link/MachO/dyld_info/Rebase.zig b/src/link/MachO/dyld_info/Rebase.zig index 512e23eddb..ffad0362f9 100644 --- a/src/link/MachO/dyld_info/Rebase.zig +++ b/src/link/MachO/dyld_info/Rebase.zig @@ -1,3 +1,14 @@ +const Rebase = @This(); + +const std = @import("std"); +const assert = std.debug.assert; +const leb = std.leb; +const log = std.log.scoped(.dyld_info); +const macho = std.macho; +const testing = std.testing; + +const Allocator = std.mem.Allocator; + entries: std.ArrayListUnmanaged(Entry) = .{}, buffer: std.ArrayListUnmanaged(u8) = .{}, @@ -168,7 +179,7 @@ fn rebaseTimesSkip(count: usize, skip: u64, writer: anytype) !void { fn addAddr(addr: u64, writer: anytype) !void { log.debug(">>> add: {x}", .{addr}); - if (std.mem.isAlignedGeneric(u64, addr, @sizeOf(u64))) { + if (std.mem.isAligned(addr, @sizeOf(u64))) { const imm = @divExact(addr, @sizeOf(u64)); if (imm <= 0xf) { try writer.writeByte(macho.REBASE_OPCODE_ADD_ADDR_IMM_SCALED | @as(u4, @truncate(imm))); @@ -561,14 +572,3 @@ test "rebase - composite" { macho.REBASE_OPCODE_DONE, }, rebase.buffer.items); } - -const Rebase = @This(); - -const std = @import("std"); -const assert = std.debug.assert; -const leb = std.leb; -const log = std.log.scoped(.dyld_info); -const macho = std.macho; -const testing = std.testing; - -const Allocator = std.mem.Allocator; diff --git a/src/link/MachO/Trie.zig b/src/link/MachO/dyld_info/Trie.zig similarity index 96% rename from src/link/MachO/Trie.zig rename to src/link/MachO/dyld_info/Trie.zig index 98add0315c..edef57569a 100644 --- a/src/link/MachO/Trie.zig +++ b/src/link/MachO/dyld_info/Trie.zig @@ -28,347 +28,16 @@ //! After the optional exported symbol information is a byte of how many edges (0-255) that //! this node has leaving it, followed by each edge. Each edge is a zero terminated UTF8 of //! the addition chars in the symbol, followed by a uleb128 offset for the node that edge points to. -/// The root node of the trie. -root: ?*Node = null, +const Trie = @This(); -/// If you want to access nodes ordered in DFS fashion, -/// you should call `finalize` first since the nodes -/// in this container are not guaranteed to not be stale -/// if more insertions took place after the last `finalize` -/// call. -ordered_nodes: std.ArrayListUnmanaged(*Node) = .{}, - -/// The size of the trie in bytes. -/// This value may be outdated if there were additional -/// insertions performed after `finalize` was called. -/// Call `finalize` before accessing this value to ensure -/// it is up-to-date. -size: u64 = 0, - -/// Number of nodes currently in the trie. -node_count: usize = 0, - -trie_dirty: bool = true, - -/// Export symbol that is to be placed in the trie. -pub const ExportSymbol = struct { - /// Name of the symbol. - name: []const u8, - - /// Offset of this symbol's virtual memory address from the beginning - /// of the __TEXT segment. - vmaddr_offset: u64, - - /// Export flags of this exported symbol. - export_flags: u64, -}; - -/// Insert a symbol into the trie, updating the prefixes in the process. -/// This operation may change the layout of the trie by splicing edges in -/// certain circumstances. -pub fn put(self: *Trie, allocator: Allocator, symbol: ExportSymbol) !void { - const node = try self.root.?.put(allocator, symbol.name); - node.terminal_info = .{ - .vmaddr_offset = symbol.vmaddr_offset, - .export_flags = symbol.export_flags, - }; - self.trie_dirty = true; -} - -/// Finalizes this trie for writing to a byte stream. -/// This step performs multiple passes through the trie ensuring -/// there are no gaps after every `Node` is ULEB128 encoded. -/// Call this method before trying to `write` the trie to a byte stream. -pub fn finalize(self: *Trie, allocator: Allocator) !void { - if (!self.trie_dirty) return; - - self.ordered_nodes.shrinkRetainingCapacity(0); - try self.ordered_nodes.ensureTotalCapacity(allocator, self.node_count); - - var fifo = std.fifo.LinearFifo(*Node, .Dynamic).init(allocator); - defer fifo.deinit(); - - try fifo.writeItem(self.root.?); - - while (fifo.readItem()) |next| { - for (next.edges.items) |*edge| { - try fifo.writeItem(edge.to); - } - self.ordered_nodes.appendAssumeCapacity(next); - } - - var more: bool = true; - while (more) { - self.size = 0; - more = false; - for (self.ordered_nodes.items) |node| { - const res = try node.finalize(self.size); - self.size += res.node_size; - if (res.updated) more = true; - } - } - - self.trie_dirty = false; -} - -const ReadError = error{ - OutOfMemory, - EndOfStream, - Overflow, -}; - -/// Parse the trie from a byte stream. -pub fn read(self: *Trie, allocator: Allocator, reader: anytype) ReadError!usize { - return self.root.?.read(allocator, reader); -} - -/// Write the trie to a byte stream. -/// Panics if the trie was not finalized using `finalize` before calling this method. -pub fn write(self: Trie, writer: anytype) !u64 { - assert(!self.trie_dirty); - var counting_writer = std.io.countingWriter(writer); - for (self.ordered_nodes.items) |node| { - try node.write(counting_writer.writer()); - } - return counting_writer.bytes_written; -} - -pub fn init(self: *Trie, allocator: Allocator) !void { - assert(self.root == null); - const root = try allocator.create(Node); - root.* = .{ .base = self }; - self.root = root; - self.node_count += 1; -} - -pub fn deinit(self: *Trie, allocator: Allocator) void { - if (self.root) |root| { - root.deinit(allocator); - allocator.destroy(root); - } - self.ordered_nodes.deinit(allocator); -} - -test "Trie node count" { - const gpa = testing.allocator; - var trie: Trie = .{}; - defer trie.deinit(gpa); - try trie.init(gpa); - - try testing.expectEqual(trie.node_count, 0); - try testing.expect(trie.root == null); - - try trie.put(gpa, .{ - .name = "_main", - .vmaddr_offset = 0, - .export_flags = 0, - }); - try testing.expectEqual(trie.node_count, 2); - - // Inserting the same node shouldn't update the trie. - try trie.put(gpa, .{ - .name = "_main", - .vmaddr_offset = 0, - .export_flags = 0, - }); - try testing.expectEqual(trie.node_count, 2); - - try trie.put(gpa, .{ - .name = "__mh_execute_header", - .vmaddr_offset = 0x1000, - .export_flags = 0, - }); - try testing.expectEqual(trie.node_count, 4); - - // Inserting the same node shouldn't update the trie. - try trie.put(gpa, .{ - .name = "__mh_execute_header", - .vmaddr_offset = 0x1000, - .export_flags = 0, - }); - try testing.expectEqual(trie.node_count, 4); - try trie.put(gpa, .{ - .name = "_main", - .vmaddr_offset = 0, - .export_flags = 0, - }); - try testing.expectEqual(trie.node_count, 4); -} - -test "Trie basic" { - const gpa = testing.allocator; - var trie: Trie = .{}; - defer trie.deinit(gpa); - try trie.init(gpa); - - // root --- _st ---> node - try trie.put(gpa, .{ - .name = "_st", - .vmaddr_offset = 0, - .export_flags = 0, - }); - try testing.expect(trie.root.?.edges.items.len == 1); - try testing.expect(mem.eql(u8, trie.root.?.edges.items[0].label, "_st")); - - { - // root --- _st ---> node --- art ---> node - try trie.put(gpa, .{ - .name = "_start", - .vmaddr_offset = 0, - .export_flags = 0, - }); - try testing.expect(trie.root.?.edges.items.len == 1); - - const nextEdge = &trie.root.?.edges.items[0]; - try testing.expect(mem.eql(u8, nextEdge.label, "_st")); - try testing.expect(nextEdge.to.edges.items.len == 1); - try testing.expect(mem.eql(u8, nextEdge.to.edges.items[0].label, "art")); - } - { - // root --- _ ---> node --- st ---> node --- art ---> node - // | - // | --- main ---> node - try trie.put(gpa, .{ - .name = "_main", - .vmaddr_offset = 0, - .export_flags = 0, - }); - try testing.expect(trie.root.?.edges.items.len == 1); - - const nextEdge = &trie.root.?.edges.items[0]; - try testing.expect(mem.eql(u8, nextEdge.label, "_")); - try testing.expect(nextEdge.to.edges.items.len == 2); - try testing.expect(mem.eql(u8, nextEdge.to.edges.items[0].label, "st")); - try testing.expect(mem.eql(u8, nextEdge.to.edges.items[1].label, "main")); - - const nextNextEdge = &nextEdge.to.edges.items[0]; - try testing.expect(mem.eql(u8, nextNextEdge.to.edges.items[0].label, "art")); - } -} - -fn expectEqualHexStrings(expected: []const u8, given: []const u8) !void { - assert(expected.len > 0); - if (mem.eql(u8, expected, given)) return; - const expected_fmt = try std.fmt.allocPrint(testing.allocator, "{x}", .{std.fmt.fmtSliceHexLower(expected)}); - defer testing.allocator.free(expected_fmt); - const given_fmt = try std.fmt.allocPrint(testing.allocator, "{x}", .{std.fmt.fmtSliceHexLower(given)}); - defer testing.allocator.free(given_fmt); - const idx = mem.indexOfDiff(u8, expected_fmt, given_fmt).?; - const padding = try testing.allocator.alloc(u8, idx + 5); - defer testing.allocator.free(padding); - @memset(padding, ' '); - std.debug.print("\nEXP: {s}\nGIV: {s}\n{s}^ -- first differing byte\n", .{ expected_fmt, given_fmt, padding }); - return error.TestFailed; -} - -test "write Trie to a byte stream" { - var gpa = testing.allocator; - var trie: Trie = .{}; - defer trie.deinit(gpa); - try trie.init(gpa); - - try trie.put(gpa, .{ - .name = "__mh_execute_header", - .vmaddr_offset = 0, - .export_flags = 0, - }); - try trie.put(gpa, .{ - .name = "_main", - .vmaddr_offset = 0x1000, - .export_flags = 0, - }); - - try trie.finalize(gpa); - try trie.finalize(gpa); // Finalizing multiple times is a nop subsequently unless we add new nodes. - - const exp_buffer = [_]u8{ - 0x0, 0x1, // node root - 0x5f, 0x0, 0x5, // edge '_' - 0x0, 0x2, // non-terminal node - 0x5f, 0x6d, 0x68, 0x5f, 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, // edge '_mh_execute_header' - 0x65, 0x5f, 0x68, 0x65, 0x61, 0x64, 0x65, 0x72, 0x0, 0x21, // edge '_mh_execute_header' - 0x6d, 0x61, 0x69, 0x6e, 0x0, 0x25, // edge 'main' - 0x2, 0x0, 0x0, 0x0, // terminal node - 0x3, 0x0, 0x80, 0x20, 0x0, // terminal node - }; - - const buffer = try gpa.alloc(u8, trie.size); - defer gpa.free(buffer); - var stream = std.io.fixedBufferStream(buffer); - { - _ = try trie.write(stream.writer()); - try expectEqualHexStrings(&exp_buffer, buffer); - } - { - // Writing finalized trie again should yield the same result. - try stream.seekTo(0); - _ = try trie.write(stream.writer()); - try expectEqualHexStrings(&exp_buffer, buffer); - } -} - -test "parse Trie from byte stream" { - var gpa = testing.allocator; - - const in_buffer = [_]u8{ - 0x0, 0x1, // node root - 0x5f, 0x0, 0x5, // edge '_' - 0x0, 0x2, // non-terminal node - 0x5f, 0x6d, 0x68, 0x5f, 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, // edge '_mh_execute_header' - 0x65, 0x5f, 0x68, 0x65, 0x61, 0x64, 0x65, 0x72, 0x0, 0x21, // edge '_mh_execute_header' - 0x6d, 0x61, 0x69, 0x6e, 0x0, 0x25, // edge 'main' - 0x2, 0x0, 0x0, 0x0, // terminal node - 0x3, 0x0, 0x80, 0x20, 0x0, // terminal node - }; - - var in_stream = std.io.fixedBufferStream(&in_buffer); - var trie: Trie = .{}; - defer trie.deinit(gpa); - try trie.init(gpa); - const nread = try trie.read(gpa, in_stream.reader()); - - try testing.expect(nread == in_buffer.len); - - try trie.finalize(gpa); - - const out_buffer = try gpa.alloc(u8, trie.size); - defer gpa.free(out_buffer); - var out_stream = std.io.fixedBufferStream(out_buffer); - _ = try trie.write(out_stream.writer()); - try expectEqualHexStrings(&in_buffer, out_buffer); -} - -test "ordering bug" { - var gpa = testing.allocator; - var trie: Trie = .{}; - defer trie.deinit(gpa); - try trie.init(gpa); - - try trie.put(gpa, .{ - .name = "_asStr", - .vmaddr_offset = 0x558, - .export_flags = 0, - }); - try trie.put(gpa, .{ - .name = "_a", - .vmaddr_offset = 0x8008, - .export_flags = 0, - }); - try trie.finalize(gpa); - - const exp_buffer = [_]u8{ - 0x00, 0x01, 0x5F, 0x61, 0x00, 0x06, 0x04, 0x00, - 0x88, 0x80, 0x02, 0x01, 0x73, 0x53, 0x74, 0x72, - 0x00, 0x12, 0x03, 0x00, 0xD8, 0x0A, 0x00, - }; - - const buffer = try gpa.alloc(u8, trie.size); - defer gpa.free(buffer); - var stream = std.io.fixedBufferStream(buffer); - // Writing finalized trie again should yield the same result. - _ = try trie.write(stream.writer()); - try expectEqualHexStrings(&exp_buffer, buffer); -} +const std = @import("std"); +const mem = std.mem; +const leb = std.leb; +const log = std.log.scoped(.macho); +const macho = std.macho; +const testing = std.testing; +const assert = std.debug.assert; +const Allocator = mem.Allocator; pub const Node = struct { base: *Trie, @@ -601,13 +270,343 @@ pub const Node = struct { } }; -const Trie = @This(); +/// The root node of the trie. +root: ?*Node = null, -const std = @import("std"); -const mem = std.mem; -const leb = std.leb; -const log = std.log.scoped(.link); -const macho = std.macho; -const testing = std.testing; -const assert = std.debug.assert; -const Allocator = mem.Allocator; +/// If you want to access nodes ordered in DFS fashion, +/// you should call `finalize` first since the nodes +/// in this container are not guaranteed to not be stale +/// if more insertions took place after the last `finalize` +/// call. +ordered_nodes: std.ArrayListUnmanaged(*Node) = .{}, + +/// The size of the trie in bytes. +/// This value may be outdated if there were additional +/// insertions performed after `finalize` was called. +/// Call `finalize` before accessing this value to ensure +/// it is up-to-date. +size: u64 = 0, + +/// Number of nodes currently in the trie. +node_count: usize = 0, + +trie_dirty: bool = true, + +/// Export symbol that is to be placed in the trie. +pub const ExportSymbol = struct { + /// Name of the symbol. + name: []const u8, + + /// Offset of this symbol's virtual memory address from the beginning + /// of the __TEXT segment. + vmaddr_offset: u64, + + /// Export flags of this exported symbol. + export_flags: u64, +}; + +/// Insert a symbol into the trie, updating the prefixes in the process. +/// This operation may change the layout of the trie by splicing edges in +/// certain circumstances. +pub fn put(self: *Trie, allocator: Allocator, symbol: ExportSymbol) !void { + const node = try self.root.?.put(allocator, symbol.name); + node.terminal_info = .{ + .vmaddr_offset = symbol.vmaddr_offset, + .export_flags = symbol.export_flags, + }; + self.trie_dirty = true; +} + +/// Finalizes this trie for writing to a byte stream. +/// This step performs multiple passes through the trie ensuring +/// there are no gaps after every `Node` is ULEB128 encoded. +/// Call this method before trying to `write` the trie to a byte stream. +pub fn finalize(self: *Trie, allocator: Allocator) !void { + if (!self.trie_dirty) return; + + self.ordered_nodes.shrinkRetainingCapacity(0); + try self.ordered_nodes.ensureTotalCapacity(allocator, self.node_count); + + var fifo = std.fifo.LinearFifo(*Node, .Dynamic).init(allocator); + defer fifo.deinit(); + + try fifo.writeItem(self.root.?); + + while (fifo.readItem()) |next| { + for (next.edges.items) |*edge| { + try fifo.writeItem(edge.to); + } + self.ordered_nodes.appendAssumeCapacity(next); + } + + var more: bool = true; + while (more) { + self.size = 0; + more = false; + for (self.ordered_nodes.items) |node| { + const res = try node.finalize(self.size); + self.size += res.node_size; + if (res.updated) more = true; + } + } + + self.trie_dirty = false; +} + +const ReadError = error{ + OutOfMemory, + EndOfStream, + Overflow, +}; + +/// Parse the trie from a byte stream. +pub fn read(self: *Trie, allocator: Allocator, reader: anytype) ReadError!usize { + return self.root.?.read(allocator, reader); +} + +/// Write the trie to a byte stream. +/// Panics if the trie was not finalized using `finalize` before calling this method. +pub fn write(self: Trie, writer: anytype) !void { + assert(!self.trie_dirty); + for (self.ordered_nodes.items) |node| { + try node.write(writer); + } +} + +pub fn init(self: *Trie, allocator: Allocator) !void { + assert(self.root == null); + const root = try allocator.create(Node); + root.* = .{ .base = self }; + self.root = root; + self.node_count += 1; +} + +pub fn deinit(self: *Trie, allocator: Allocator) void { + if (self.root) |root| { + root.deinit(allocator); + allocator.destroy(root); + } + self.ordered_nodes.deinit(allocator); +} + +test "Trie node count" { + const gpa = testing.allocator; + var trie: Trie = .{}; + defer trie.deinit(gpa); + try trie.init(gpa); + + try testing.expectEqual(@as(usize, 1), trie.node_count); + try testing.expect(trie.root != null); + + try trie.put(gpa, .{ + .name = "_main", + .vmaddr_offset = 0, + .export_flags = 0, + }); + try testing.expectEqual(@as(usize, 2), trie.node_count); + + // Inserting the same node shouldn't update the trie. + try trie.put(gpa, .{ + .name = "_main", + .vmaddr_offset = 0, + .export_flags = 0, + }); + try testing.expectEqual(@as(usize, 2), trie.node_count); + + try trie.put(gpa, .{ + .name = "__mh_execute_header", + .vmaddr_offset = 0x1000, + .export_flags = 0, + }); + try testing.expectEqual(@as(usize, 4), trie.node_count); + + // Inserting the same node shouldn't update the trie. + try trie.put(gpa, .{ + .name = "__mh_execute_header", + .vmaddr_offset = 0x1000, + .export_flags = 0, + }); + try testing.expectEqual(@as(usize, 4), trie.node_count); + try trie.put(gpa, .{ + .name = "_main", + .vmaddr_offset = 0, + .export_flags = 0, + }); + try testing.expectEqual(@as(usize, 4), trie.node_count); +} + +test "Trie basic" { + const gpa = testing.allocator; + var trie: Trie = .{}; + defer trie.deinit(gpa); + try trie.init(gpa); + + // root --- _st ---> node + try trie.put(gpa, .{ + .name = "_st", + .vmaddr_offset = 0, + .export_flags = 0, + }); + try testing.expect(trie.root.?.edges.items.len == 1); + try testing.expect(mem.eql(u8, trie.root.?.edges.items[0].label, "_st")); + + { + // root --- _st ---> node --- art ---> node + try trie.put(gpa, .{ + .name = "_start", + .vmaddr_offset = 0, + .export_flags = 0, + }); + try testing.expect(trie.root.?.edges.items.len == 1); + + const nextEdge = &trie.root.?.edges.items[0]; + try testing.expect(mem.eql(u8, nextEdge.label, "_st")); + try testing.expect(nextEdge.to.edges.items.len == 1); + try testing.expect(mem.eql(u8, nextEdge.to.edges.items[0].label, "art")); + } + { + // root --- _ ---> node --- st ---> node --- art ---> node + // | + // | --- main ---> node + try trie.put(gpa, .{ + .name = "_main", + .vmaddr_offset = 0, + .export_flags = 0, + }); + try testing.expect(trie.root.?.edges.items.len == 1); + + const nextEdge = &trie.root.?.edges.items[0]; + try testing.expect(mem.eql(u8, nextEdge.label, "_")); + try testing.expect(nextEdge.to.edges.items.len == 2); + try testing.expect(mem.eql(u8, nextEdge.to.edges.items[0].label, "st")); + try testing.expect(mem.eql(u8, nextEdge.to.edges.items[1].label, "main")); + + const nextNextEdge = &nextEdge.to.edges.items[0]; + try testing.expect(mem.eql(u8, nextNextEdge.to.edges.items[0].label, "art")); + } +} + +fn expectEqualHexStrings(expected: []const u8, given: []const u8) !void { + assert(expected.len > 0); + if (mem.eql(u8, expected, given)) return; + const expected_fmt = try std.fmt.allocPrint(testing.allocator, "{x}", .{std.fmt.fmtSliceHexLower(expected)}); + defer testing.allocator.free(expected_fmt); + const given_fmt = try std.fmt.allocPrint(testing.allocator, "{x}", .{std.fmt.fmtSliceHexLower(given)}); + defer testing.allocator.free(given_fmt); + const idx = mem.indexOfDiff(u8, expected_fmt, given_fmt).?; + const padding = try testing.allocator.alloc(u8, idx + 5); + defer testing.allocator.free(padding); + @memset(padding, ' '); + std.debug.print("\nEXP: {s}\nGIV: {s}\n{s}^ -- first differing byte\n", .{ expected_fmt, given_fmt, padding }); + return error.TestFailed; +} + +test "write Trie to a byte stream" { + var gpa = testing.allocator; + var trie: Trie = .{}; + defer trie.deinit(gpa); + try trie.init(gpa); + + try trie.put(gpa, .{ + .name = "__mh_execute_header", + .vmaddr_offset = 0, + .export_flags = 0, + }); + try trie.put(gpa, .{ + .name = "_main", + .vmaddr_offset = 0x1000, + .export_flags = 0, + }); + + try trie.finalize(gpa); + try trie.finalize(gpa); // Finalizing mulitple times is a nop subsequently unless we add new nodes. + + const exp_buffer = [_]u8{ + 0x0, 0x1, // node root + 0x5f, 0x0, 0x5, // edge '_' + 0x0, 0x2, // non-terminal node + 0x5f, 0x6d, 0x68, 0x5f, 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, // edge '_mh_execute_header' + 0x65, 0x5f, 0x68, 0x65, 0x61, 0x64, 0x65, 0x72, 0x0, 0x21, // edge '_mh_execute_header' + 0x6d, 0x61, 0x69, 0x6e, 0x0, 0x25, // edge 'main' + 0x2, 0x0, 0x0, 0x0, // terminal node + 0x3, 0x0, 0x80, 0x20, 0x0, // terminal node + }; + + const buffer = try gpa.alloc(u8, trie.size); + defer gpa.free(buffer); + var stream = std.io.fixedBufferStream(buffer); + { + _ = try trie.write(stream.writer()); + try expectEqualHexStrings(&exp_buffer, buffer); + } + { + // Writing finalized trie again should yield the same result. + try stream.seekTo(0); + _ = try trie.write(stream.writer()); + try expectEqualHexStrings(&exp_buffer, buffer); + } +} + +test "parse Trie from byte stream" { + const gpa = testing.allocator; + + const in_buffer = [_]u8{ + 0x0, 0x1, // node root + 0x5f, 0x0, 0x5, // edge '_' + 0x0, 0x2, // non-terminal node + 0x5f, 0x6d, 0x68, 0x5f, 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, // edge '_mh_execute_header' + 0x65, 0x5f, 0x68, 0x65, 0x61, 0x64, 0x65, 0x72, 0x0, 0x21, // edge '_mh_execute_header' + 0x6d, 0x61, 0x69, 0x6e, 0x0, 0x25, // edge 'main' + 0x2, 0x0, 0x0, 0x0, // terminal node + 0x3, 0x0, 0x80, 0x20, 0x0, // terminal node + }; + + var in_stream = std.io.fixedBufferStream(&in_buffer); + var trie: Trie = .{}; + defer trie.deinit(gpa); + try trie.init(gpa); + const nread = try trie.read(gpa, in_stream.reader()); + + try testing.expect(nread == in_buffer.len); + + try trie.finalize(gpa); + + const out_buffer = try gpa.alloc(u8, trie.size); + defer gpa.free(out_buffer); + var out_stream = std.io.fixedBufferStream(out_buffer); + _ = try trie.write(out_stream.writer()); + try expectEqualHexStrings(&in_buffer, out_buffer); +} + +test "ordering bug" { + const gpa = testing.allocator; + var trie: Trie = .{}; + defer trie.deinit(gpa); + try trie.init(gpa); + + try trie.put(gpa, .{ + .name = "_asStr", + .vmaddr_offset = 0x558, + .export_flags = 0, + }); + try trie.put(gpa, .{ + .name = "_a", + .vmaddr_offset = 0x8008, + .export_flags = 0, + }); + + try trie.finalize(gpa); + + const exp_buffer = [_]u8{ + 0x00, 0x01, 0x5F, 0x61, 0x00, 0x06, 0x04, 0x00, + 0x88, 0x80, 0x02, 0x01, 0x73, 0x53, 0x74, 0x72, + 0x00, 0x12, 0x03, 0x00, 0xD8, 0x0A, 0x00, + }; + + const buffer = try gpa.alloc(u8, trie.size); + defer gpa.free(buffer); + var stream = std.io.fixedBufferStream(buffer); + // Writing finalized trie again should yield the same result. + _ = try trie.write(stream.writer()); + try expectEqualHexStrings(&exp_buffer, buffer); +} diff --git a/src/link/MachO/dyld_info/bind.zig b/src/link/MachO/dyld_info/bind.zig index ca4e73a283..5bc872e277 100644 --- a/src/link/MachO/dyld_info/bind.zig +++ b/src/link/MachO/dyld_info/bind.zig @@ -1,231 +1,391 @@ -pub fn Bind(comptime Ctx: type, comptime Target: type) type { - return struct { - entries: std.ArrayListUnmanaged(Entry) = .{}, - buffer: std.ArrayListUnmanaged(u8) = .{}, +const std = @import("std"); +const assert = std.debug.assert; +const leb = std.leb; +const log = std.log.scoped(.dyld_info); +const macho = std.macho; +const testing = std.testing; - const Self = @This(); +const Allocator = std.mem.Allocator; +const MachO = @import("../../MachO.zig"); +const Symbol = @import("../Symbol.zig"); - const Entry = struct { - target: Target, - offset: u64, - segment_id: u8, - addend: i64, +pub const Entry = struct { + target: Symbol.Index, + offset: u64, + segment_id: u8, + addend: i64, - pub fn lessThan(ctx: Ctx, entry: Entry, other: Entry) bool { - if (entry.segment_id == other.segment_id) { - if (entry.target.eql(other.target)) { - return entry.offset < other.offset; - } - const entry_name = ctx.getSymbolName(entry.target); - const other_name = ctx.getSymbolName(other.target); - return std.mem.lessThan(u8, entry_name, other_name); - } - return entry.segment_id < other.segment_id; + pub fn lessThan(ctx: *MachO, entry: Entry, other: Entry) bool { + if (entry.segment_id == other.segment_id) { + if (entry.target == other.target) { + return entry.offset < other.offset; } - }; + const entry_name = ctx.getSymbol(entry.target).getName(ctx); + const other_name = ctx.getSymbol(other.target).getName(ctx); + return std.mem.lessThan(u8, entry_name, other_name); + } + return entry.segment_id < other.segment_id; + } +}; - pub fn deinit(self: *Self, gpa: Allocator) void { - self.entries.deinit(gpa); - self.buffer.deinit(gpa); +pub const Bind = struct { + entries: std.ArrayListUnmanaged(Entry) = .{}, + buffer: std.ArrayListUnmanaged(u8) = .{}, + + const Self = @This(); + + pub fn deinit(self: *Self, gpa: Allocator) void { + self.entries.deinit(gpa); + self.buffer.deinit(gpa); + } + + pub fn size(self: Self) u64 { + return @as(u64, @intCast(self.buffer.items.len)); + } + + pub fn finalize(self: *Self, gpa: Allocator, ctx: *MachO) !void { + if (self.entries.items.len == 0) return; + + const writer = self.buffer.writer(gpa); + + std.mem.sort(Entry, self.entries.items, ctx, Entry.lessThan); + + var start: usize = 0; + var seg_id: ?u8 = null; + for (self.entries.items, 0..) |entry, i| { + if (seg_id != null and seg_id.? == entry.segment_id) continue; + try finalizeSegment(self.entries.items[start..i], ctx, writer); + seg_id = entry.segment_id; + start = i; } - pub fn size(self: Self) u64 { - return @as(u64, @intCast(self.buffer.items.len)); - } + try finalizeSegment(self.entries.items[start..], ctx, writer); + try done(writer); + } - pub fn finalize(self: *Self, gpa: Allocator, ctx: Ctx) !void { - if (self.entries.items.len == 0) return; + fn finalizeSegment(entries: []const Entry, ctx: *MachO, writer: anytype) !void { + if (entries.len == 0) return; - const writer = self.buffer.writer(gpa); + const seg_id = entries[0].segment_id; + try setSegmentOffset(seg_id, 0, writer); - std.mem.sort(Entry, self.entries.items, ctx, Entry.lessThan); + var offset: u64 = 0; + var addend: i64 = 0; + var count: usize = 0; + var skip: u64 = 0; + var target: ?Symbol.Index = null; - var start: usize = 0; - var seg_id: ?u8 = null; - for (self.entries.items, 0..) |entry, i| { - if (seg_id != null and seg_id.? == entry.segment_id) continue; - try finalizeSegment(self.entries.items[start..i], ctx, writer); - seg_id = entry.segment_id; - start = i; - } + var state: enum { + start, + bind_single, + bind_times_skip, + } = .start; - try finalizeSegment(self.entries.items[start..], ctx, writer); - try done(writer); - } - - fn finalizeSegment(entries: []const Entry, ctx: Ctx, writer: anytype) !void { - if (entries.len == 0) return; - - const seg_id = entries[0].segment_id; - try setSegmentOffset(seg_id, 0, writer); - - var offset: u64 = 0; - var addend: i64 = 0; - var count: usize = 0; - var skip: u64 = 0; - var target: ?Target = null; - - var state: enum { - start, - bind_single, - bind_times_skip, - } = .start; - - var i: usize = 0; - while (i < entries.len) : (i += 1) { - const current = entries[i]; - if (target == null or !target.?.eql(current.target)) { - switch (state) { - .start => {}, - .bind_single => try doBind(writer), - .bind_times_skip => try doBindTimesSkip(count, skip, writer), - } - state = .start; - target = current.target; - - const sym = ctx.getSymbol(current.target); - const name = ctx.getSymbolName(current.target); - const flags: u8 = if (sym.weakRef()) macho.BIND_SYMBOL_FLAGS_WEAK_IMPORT else 0; - const ordinal = @divTrunc(@as(i16, @bitCast(sym.n_desc)), macho.N_SYMBOL_RESOLVER); - - try setSymbol(name, flags, writer); - try setTypePointer(writer); - try setDylibOrdinal(ordinal, writer); - - if (current.addend != addend) { - addend = current.addend; - try setAddend(addend, writer); - } - } - - log.debug("{x}, {d}, {x}, {?x}, {s}", .{ offset, count, skip, addend, @tagName(state) }); - log.debug(" => {x}", .{current.offset}); + var i: usize = 0; + while (i < entries.len) : (i += 1) { + const current = entries[i]; + if (target == null or target.? != current.target) { switch (state) { - .start => { - if (current.offset < offset) { - try addAddr(@as(u64, @bitCast(@as(i64, @intCast(current.offset)) - @as(i64, @intCast(offset)))), writer); - offset = offset - (offset - current.offset); - } else if (current.offset > offset) { - const delta = current.offset - offset; - try addAddr(delta, writer); - offset += delta; - } - state = .bind_single; - offset += @sizeOf(u64); - count = 1; - }, - .bind_single => { - if (current.offset == offset) { - try doBind(writer); - state = .start; - } else if (current.offset > offset) { - const delta = current.offset - offset; - state = .bind_times_skip; - skip = @as(u64, @intCast(delta)); - offset += skip; - } else unreachable; - i -= 1; - }, - .bind_times_skip => { - if (current.offset < offset) { - count -= 1; - if (count == 1) { - try doBindAddAddr(skip, writer); - } else { - try doBindTimesSkip(count, skip, writer); - } - state = .start; - offset = offset - (@sizeOf(u64) + skip); - i -= 2; - } else if (current.offset == offset) { - count += 1; - offset += @sizeOf(u64) + skip; - } else { - try doBindTimesSkip(count, skip, writer); - state = .start; - i -= 1; - } - }, + .start => {}, + .bind_single => try doBind(writer), + .bind_times_skip => try doBindTimesSkip(count, skip, writer), } - } + state = .start; + target = current.target; - switch (state) { - .start => unreachable, - .bind_single => try doBind(writer), - .bind_times_skip => try doBindTimesSkip(count, skip, writer), - } - } + const sym = ctx.getSymbol(current.target); + const name = sym.getName(ctx); + const flags: u8 = if (sym.weakRef(ctx)) macho.BIND_SYMBOL_FLAGS_WEAK_IMPORT else 0; + const ordinal: i16 = ord: { + if (sym.flags.interposable) break :ord macho.BIND_SPECIAL_DYLIB_FLAT_LOOKUP; + if (sym.flags.import) { + if (ctx.options.namespace == .flat) break :ord macho.BIND_SPECIAL_DYLIB_FLAT_LOOKUP; + if (sym.getDylibOrdinal(ctx)) |ord| break :ord @bitCast(ord); + } + if (ctx.options.undefined_treatment == .dynamic_lookup) + break :ord macho.BIND_SPECIAL_DYLIB_FLAT_LOOKUP; + break :ord macho.BIND_SPECIAL_DYLIB_SELF; + }; - pub fn write(self: Self, writer: anytype) !void { - if (self.size() == 0) return; - try writer.writeAll(self.buffer.items); - } - }; -} - -pub fn LazyBind(comptime Ctx: type, comptime Target: type) type { - return struct { - entries: std.ArrayListUnmanaged(Entry) = .{}, - buffer: std.ArrayListUnmanaged(u8) = .{}, - offsets: std.ArrayListUnmanaged(u32) = .{}, - - const Self = @This(); - - const Entry = struct { - target: Target, - offset: u64, - segment_id: u8, - addend: i64, - }; - - pub fn deinit(self: *Self, gpa: Allocator) void { - self.entries.deinit(gpa); - self.buffer.deinit(gpa); - self.offsets.deinit(gpa); - } - - pub fn size(self: Self) u64 { - return @as(u64, @intCast(self.buffer.items.len)); - } - - pub fn finalize(self: *Self, gpa: Allocator, ctx: Ctx) !void { - if (self.entries.items.len == 0) return; - - try self.offsets.ensureTotalCapacityPrecise(gpa, self.entries.items.len); - - var cwriter = std.io.countingWriter(self.buffer.writer(gpa)); - const writer = cwriter.writer(); - - var addend: i64 = 0; - - for (self.entries.items) |entry| { - self.offsets.appendAssumeCapacity(@as(u32, @intCast(cwriter.bytes_written))); - - const sym = ctx.getSymbol(entry.target); - const name = ctx.getSymbolName(entry.target); - const flags: u8 = if (sym.weakRef()) macho.BIND_SYMBOL_FLAGS_WEAK_IMPORT else 0; - const ordinal = @divTrunc(@as(i16, @bitCast(sym.n_desc)), macho.N_SYMBOL_RESOLVER); - - try setSegmentOffset(entry.segment_id, entry.offset, writer); try setSymbol(name, flags, writer); + try setTypePointer(writer); try setDylibOrdinal(ordinal, writer); - if (entry.addend != addend) { - try setAddend(entry.addend, writer); - addend = entry.addend; + if (current.addend != addend) { + addend = current.addend; + try setAddend(addend, writer); } + } - try doBind(writer); - try done(writer); + log.debug("{x}, {d}, {x}, {?x}, {s}", .{ offset, count, skip, addend, @tagName(state) }); + log.debug(" => {x}", .{current.offset}); + switch (state) { + .start => { + if (current.offset < offset) { + try addAddr(@as(u64, @bitCast(@as(i64, @intCast(current.offset)) - @as(i64, @intCast(offset)))), writer); + offset = offset - (offset - current.offset); + } else if (current.offset > offset) { + const delta = current.offset - offset; + try addAddr(delta, writer); + offset += delta; + } + state = .bind_single; + offset += @sizeOf(u64); + count = 1; + }, + .bind_single => { + if (current.offset == offset) { + try doBind(writer); + state = .start; + } else if (current.offset > offset) { + const delta = current.offset - offset; + state = .bind_times_skip; + skip = @as(u64, @intCast(delta)); + offset += skip; + } else unreachable; + i -= 1; + }, + .bind_times_skip => { + if (current.offset < offset) { + count -= 1; + if (count == 1) { + try doBindAddAddr(skip, writer); + } else { + try doBindTimesSkip(count, skip, writer); + } + state = .start; + offset = offset - (@sizeOf(u64) + skip); + i -= 2; + } else if (current.offset == offset) { + count += 1; + offset += @sizeOf(u64) + skip; + } else { + try doBindTimesSkip(count, skip, writer); + state = .start; + i -= 1; + } + }, } } - pub fn write(self: Self, writer: anytype) !void { - if (self.size() == 0) return; - try writer.writeAll(self.buffer.items); + switch (state) { + .start => unreachable, + .bind_single => try doBind(writer), + .bind_times_skip => try doBindTimesSkip(count, skip, writer), } - }; -} + } + + pub fn write(self: Self, writer: anytype) !void { + if (self.size() == 0) return; + try writer.writeAll(self.buffer.items); + } +}; + +pub const WeakBind = struct { + entries: std.ArrayListUnmanaged(Entry) = .{}, + buffer: std.ArrayListUnmanaged(u8) = .{}, + + const Self = @This(); + + pub fn deinit(self: *Self, gpa: Allocator) void { + self.entries.deinit(gpa); + self.buffer.deinit(gpa); + } + + pub fn size(self: Self) u64 { + return @as(u64, @intCast(self.buffer.items.len)); + } + + pub fn finalize(self: *Self, gpa: Allocator, ctx: *MachO) !void { + if (self.entries.items.len == 0) return; + + const writer = self.buffer.writer(gpa); + + std.mem.sort(Entry, self.entries.items, ctx, Entry.lessThan); + + var start: usize = 0; + var seg_id: ?u8 = null; + for (self.entries.items, 0..) |entry, i| { + if (seg_id != null and seg_id.? == entry.segment_id) continue; + try finalizeSegment(self.entries.items[start..i], ctx, writer); + seg_id = entry.segment_id; + start = i; + } + + try finalizeSegment(self.entries.items[start..], ctx, writer); + try done(writer); + } + + fn finalizeSegment(entries: []const Entry, ctx: *MachO, writer: anytype) !void { + if (entries.len == 0) return; + + const seg_id = entries[0].segment_id; + try setSegmentOffset(seg_id, 0, writer); + + var offset: u64 = 0; + var addend: i64 = 0; + var count: usize = 0; + var skip: u64 = 0; + var target: ?Symbol.Index = null; + + var state: enum { + start, + bind_single, + bind_times_skip, + } = .start; + + var i: usize = 0; + while (i < entries.len) : (i += 1) { + const current = entries[i]; + if (target == null or target.? != current.target) { + switch (state) { + .start => {}, + .bind_single => try doBind(writer), + .bind_times_skip => try doBindTimesSkip(count, skip, writer), + } + state = .start; + target = current.target; + + const sym = ctx.getSymbol(current.target); + const name = sym.getName(ctx); + const flags: u8 = 0; // TODO NON_WEAK_DEFINITION + + try setSymbol(name, flags, writer); + try setTypePointer(writer); + + if (current.addend != addend) { + addend = current.addend; + try setAddend(addend, writer); + } + } + + log.debug("{x}, {d}, {x}, {?x}, {s}", .{ offset, count, skip, addend, @tagName(state) }); + log.debug(" => {x}", .{current.offset}); + switch (state) { + .start => { + if (current.offset < offset) { + try addAddr(@as(u64, @bitCast(@as(i64, @intCast(current.offset)) - @as(i64, @intCast(offset)))), writer); + offset = offset - (offset - current.offset); + } else if (current.offset > offset) { + const delta = current.offset - offset; + try addAddr(delta, writer); + offset += delta; + } + state = .bind_single; + offset += @sizeOf(u64); + count = 1; + }, + .bind_single => { + if (current.offset == offset) { + try doBind(writer); + state = .start; + } else if (current.offset > offset) { + const delta = current.offset - offset; + state = .bind_times_skip; + skip = @as(u64, @intCast(delta)); + offset += skip; + } else unreachable; + i -= 1; + }, + .bind_times_skip => { + if (current.offset < offset) { + count -= 1; + if (count == 1) { + try doBindAddAddr(skip, writer); + } else { + try doBindTimesSkip(count, skip, writer); + } + state = .start; + offset = offset - (@sizeOf(u64) + skip); + i -= 2; + } else if (current.offset == offset) { + count += 1; + offset += @sizeOf(u64) + skip; + } else { + try doBindTimesSkip(count, skip, writer); + state = .start; + i -= 1; + } + }, + } + } + + switch (state) { + .start => unreachable, + .bind_single => try doBind(writer), + .bind_times_skip => try doBindTimesSkip(count, skip, writer), + } + } + + pub fn write(self: Self, writer: anytype) !void { + if (self.size() == 0) return; + try writer.writeAll(self.buffer.items); + } +}; + +pub const LazyBind = struct { + entries: std.ArrayListUnmanaged(Entry) = .{}, + buffer: std.ArrayListUnmanaged(u8) = .{}, + offsets: std.ArrayListUnmanaged(u32) = .{}, + + const Self = @This(); + + pub fn deinit(self: *Self, gpa: Allocator) void { + self.entries.deinit(gpa); + self.buffer.deinit(gpa); + self.offsets.deinit(gpa); + } + + pub fn size(self: Self) u64 { + return @as(u64, @intCast(self.buffer.items.len)); + } + + pub fn finalize(self: *Self, gpa: Allocator, ctx: *MachO) !void { + if (self.entries.items.len == 0) return; + + try self.offsets.ensureTotalCapacityPrecise(gpa, self.entries.items.len); + + var cwriter = std.io.countingWriter(self.buffer.writer(gpa)); + const writer = cwriter.writer(); + + var addend: i64 = 0; + + for (self.entries.items) |entry| { + self.offsets.appendAssumeCapacity(@as(u32, @intCast(cwriter.bytes_written))); + + const sym = ctx.getSymbol(entry.target); + const name = sym.getName(ctx); + const flags: u8 = if (sym.weakRef(ctx)) macho.BIND_SYMBOL_FLAGS_WEAK_IMPORT else 0; + const ordinal: i16 = ord: { + if (sym.flags.interposable) break :ord macho.BIND_SPECIAL_DYLIB_FLAT_LOOKUP; + if (sym.flags.import) { + if (ctx.options.namespace == .flat) break :ord macho.BIND_SPECIAL_DYLIB_FLAT_LOOKUP; + if (sym.getDylibOrdinal(ctx)) |ord| break :ord @bitCast(ord); + } + if (ctx.options.undefined_treatment == .dynamic_lookup) + break :ord macho.BIND_SPECIAL_DYLIB_FLAT_LOOKUP; + break :ord macho.BIND_SPECIAL_DYLIB_SELF; + }; + + try setSegmentOffset(entry.segment_id, entry.offset, writer); + try setSymbol(name, flags, writer); + try setDylibOrdinal(ordinal, writer); + + if (entry.addend != addend) { + try setAddend(entry.addend, writer); + addend = entry.addend; + } + + try doBind(writer); + try done(writer); + } + } + + pub fn write(self: Self, writer: anytype) !void { + if (self.size() == 0) return; + try writer.writeAll(self.buffer.items); + } +}; fn setSegmentOffset(segment_id: u8, offset: u64, writer: anytype) !void { log.debug(">>> set segment: {d} and offset: {x}", .{ segment_id, offset }); @@ -282,7 +442,7 @@ fn doBind(writer: anytype) !void { fn doBindAddAddr(addr: u64, writer: anytype) !void { log.debug(">>> bind with add: {x}", .{addr}); - if (std.mem.isAlignedGeneric(u64, addr, @sizeOf(u64))) { + if (std.mem.isAligned(addr, @sizeOf(u64))) { const imm = @divExact(addr, @sizeOf(u64)); if (imm <= 0xf) { try writer.writeByte( @@ -312,429 +472,3 @@ fn done(writer: anytype) !void { log.debug(">>> done", .{}); try writer.writeByte(macho.BIND_OPCODE_DONE); } - -const TestContext = struct { - symbols: std.ArrayListUnmanaged(macho.nlist_64) = .{}, - strtab: std.ArrayListUnmanaged(u8) = .{}, - - const Target = struct { - index: u32, - - fn eql(this: Target, other: Target) bool { - return this.index == other.index; - } - }; - - fn deinit(ctx: *TestContext, gpa: Allocator) void { - ctx.symbols.deinit(gpa); - ctx.strtab.deinit(gpa); - } - - fn addSymbol(ctx: *TestContext, gpa: Allocator, name: []const u8, ordinal: i16, flags: u16) !void { - const n_strx = try ctx.addString(gpa, name); - var n_desc = @as(u16, @bitCast(ordinal * macho.N_SYMBOL_RESOLVER)); - n_desc |= flags; - try ctx.symbols.append(gpa, .{ - .n_value = 0, - .n_strx = n_strx, - .n_desc = n_desc, - .n_type = macho.N_EXT, - .n_sect = 0, - }); - } - - fn addString(ctx: *TestContext, gpa: Allocator, name: []const u8) !u32 { - const n_strx = @as(u32, @intCast(ctx.strtab.items.len)); - try ctx.strtab.appendSlice(gpa, name); - try ctx.strtab.append(gpa, 0); - return n_strx; - } - - fn getSymbol(ctx: TestContext, target: Target) macho.nlist_64 { - return ctx.symbols.items[target.index]; - } - - fn getSymbolName(ctx: TestContext, target: Target) []const u8 { - const sym = ctx.getSymbol(target); - assert(sym.n_strx < ctx.strtab.items.len); - return std.mem.sliceTo(@as([*:0]const u8, @ptrCast(ctx.strtab.items.ptr + sym.n_strx)), 0); - } -}; - -fn generateTestContext() !TestContext { - const gpa = testing.allocator; - var ctx = TestContext{}; - try ctx.addSymbol(gpa, "_import_1", 1, 0); - try ctx.addSymbol(gpa, "_import_2", 1, 0); - try ctx.addSymbol(gpa, "_import_3", 1, 0); - try ctx.addSymbol(gpa, "_import_4", 2, 0); - try ctx.addSymbol(gpa, "_import_5_weak", 2, macho.N_WEAK_REF); - try ctx.addSymbol(gpa, "_import_6", 2, 0); - return ctx; -} - -test "bind - no entries" { - const gpa = testing.allocator; - - var test_context = try generateTestContext(); - defer test_context.deinit(gpa); - - var bind = Bind(TestContext, TestContext.Target){}; - defer bind.deinit(gpa); - - try bind.finalize(gpa, test_context); - try testing.expectEqual(@as(u64, 0), bind.size()); -} - -test "bind - single entry" { - const gpa = testing.allocator; - - var test_context = try generateTestContext(); - defer test_context.deinit(gpa); - - var bind = Bind(TestContext, TestContext.Target){}; - defer bind.deinit(gpa); - - try bind.entries.append(gpa, .{ - .offset = 0x10, - .segment_id = 1, - .target = TestContext.Target{ .index = 0 }, - .addend = 0, - }); - try bind.finalize(gpa, test_context); - try testing.expectEqualSlices(u8, &[_]u8{ - macho.BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | 1, - 0x0, - macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM | 0, - 0x5f, - 0x69, - 0x6d, - 0x70, - 0x6f, - 0x72, - 0x74, - 0x5f, - 0x31, - 0x0, - macho.BIND_OPCODE_SET_TYPE_IMM | 1, - macho.BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | 1, - macho.BIND_OPCODE_ADD_ADDR_ULEB, - 0x10, - macho.BIND_OPCODE_DO_BIND, - macho.BIND_OPCODE_DONE, - }, bind.buffer.items); -} - -test "bind - multiple occurrences within the same segment" { - const gpa = testing.allocator; - - var test_context = try generateTestContext(); - defer test_context.deinit(gpa); - - var bind = Bind(TestContext, TestContext.Target){}; - defer bind.deinit(gpa); - - try bind.entries.append(gpa, .{ - .offset = 0x10, - .segment_id = 1, - .target = TestContext.Target{ .index = 0 }, - .addend = 0, - }); - try bind.entries.append(gpa, .{ - .offset = 0x18, - .segment_id = 1, - .target = TestContext.Target{ .index = 0 }, - .addend = 0, - }); - try bind.entries.append(gpa, .{ - .offset = 0x20, - .segment_id = 1, - .target = TestContext.Target{ .index = 0 }, - .addend = 0, - }); - try bind.entries.append(gpa, .{ - .offset = 0x28, - .segment_id = 1, - .target = TestContext.Target{ .index = 0 }, - .addend = 0, - }); - - try bind.finalize(gpa, test_context); - try testing.expectEqualSlices(u8, &[_]u8{ - macho.BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | 1, - 0x0, - macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM | 0, - 0x5f, - 0x69, - 0x6d, - 0x70, - 0x6f, - 0x72, - 0x74, - 0x5f, - 0x31, - 0x0, - macho.BIND_OPCODE_SET_TYPE_IMM | 1, - macho.BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | 1, - macho.BIND_OPCODE_ADD_ADDR_ULEB, - 0x10, - macho.BIND_OPCODE_DO_BIND, - macho.BIND_OPCODE_DO_BIND, - macho.BIND_OPCODE_DO_BIND, - macho.BIND_OPCODE_DO_BIND, - macho.BIND_OPCODE_DONE, - }, bind.buffer.items); -} - -test "bind - multiple occurrences with skip and addend" { - const gpa = testing.allocator; - - var test_context = try generateTestContext(); - defer test_context.deinit(gpa); - - var bind = Bind(TestContext, TestContext.Target){}; - defer bind.deinit(gpa); - - try bind.entries.append(gpa, .{ - .offset = 0x0, - .segment_id = 1, - .target = TestContext.Target{ .index = 0 }, - .addend = 0x10, - }); - try bind.entries.append(gpa, .{ - .offset = 0x10, - .segment_id = 1, - .target = TestContext.Target{ .index = 0 }, - .addend = 0x10, - }); - try bind.entries.append(gpa, .{ - .offset = 0x20, - .segment_id = 1, - .target = TestContext.Target{ .index = 0 }, - .addend = 0x10, - }); - try bind.entries.append(gpa, .{ - .offset = 0x30, - .segment_id = 1, - .target = TestContext.Target{ .index = 0 }, - .addend = 0x10, - }); - - try bind.finalize(gpa, test_context); - try testing.expectEqualSlices(u8, &[_]u8{ - macho.BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | 1, - 0x0, - macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM | 0, - 0x5f, - 0x69, - 0x6d, - 0x70, - 0x6f, - 0x72, - 0x74, - 0x5f, - 0x31, - 0x0, - macho.BIND_OPCODE_SET_TYPE_IMM | 1, - macho.BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | 1, - macho.BIND_OPCODE_SET_ADDEND_SLEB, - 0x10, - macho.BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB, - 0x4, - 0x8, - macho.BIND_OPCODE_DONE, - }, bind.buffer.items); -} - -test "bind - complex" { - const gpa = testing.allocator; - - var test_context = try generateTestContext(); - defer test_context.deinit(gpa); - - var bind = Bind(TestContext, TestContext.Target){}; - defer bind.deinit(gpa); - - try bind.entries.append(gpa, .{ - .offset = 0x58, - .segment_id = 1, - .target = TestContext.Target{ .index = 0 }, - .addend = 0, - }); - try bind.entries.append(gpa, .{ - .offset = 0x100, - .segment_id = 1, - .target = TestContext.Target{ .index = 1 }, - .addend = 0x10, - }); - try bind.entries.append(gpa, .{ - .offset = 0x110, - .segment_id = 1, - .target = TestContext.Target{ .index = 1 }, - .addend = 0x10, - }); - try bind.entries.append(gpa, .{ - .offset = 0x130, - .segment_id = 1, - .target = TestContext.Target{ .index = 1 }, - .addend = 0x10, - }); - try bind.entries.append(gpa, .{ - .offset = 0x140, - .segment_id = 1, - .target = TestContext.Target{ .index = 1 }, - .addend = 0x10, - }); - try bind.entries.append(gpa, .{ - .offset = 0x148, - .segment_id = 1, - .target = TestContext.Target{ .index = 2 }, - .addend = 0, - }); - - try bind.finalize(gpa, test_context); - try testing.expectEqualSlices(u8, &[_]u8{ - macho.BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | 1, - 0x0, - macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM | 0, - 0x5f, - 0x69, - 0x6d, - 0x70, - 0x6f, - 0x72, - 0x74, - 0x5f, - 0x31, - 0x0, - macho.BIND_OPCODE_SET_TYPE_IMM | 1, - macho.BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | 1, - macho.BIND_OPCODE_ADD_ADDR_ULEB, - 0x58, - macho.BIND_OPCODE_DO_BIND, - macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM | 0, - 0x5f, - 0x69, - 0x6d, - 0x70, - 0x6f, - 0x72, - 0x74, - 0x5f, - 0x32, - 0x0, - macho.BIND_OPCODE_SET_TYPE_IMM | 1, - macho.BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | 1, - macho.BIND_OPCODE_SET_ADDEND_SLEB, - 0x10, - macho.BIND_OPCODE_ADD_ADDR_ULEB, - 0xa0, - 0x1, - macho.BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB, - 0x2, - 0x8, - macho.BIND_OPCODE_ADD_ADDR_ULEB, - 0x10, - macho.BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB, - 0x2, - 0x8, - macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM | 0, - 0x5f, - 0x69, - 0x6d, - 0x70, - 0x6f, - 0x72, - 0x74, - 0x5f, - 0x33, - 0x0, - macho.BIND_OPCODE_SET_TYPE_IMM | 1, - macho.BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | 1, - macho.BIND_OPCODE_SET_ADDEND_SLEB, - 0x0, - macho.BIND_OPCODE_ADD_ADDR_ULEB, - 0xf8, - 0xff, - 0xff, - 0xff, - 0xff, - 0xff, - 0xff, - 0xff, - 0xff, - 0x1, - macho.BIND_OPCODE_DO_BIND, - macho.BIND_OPCODE_DONE, - }, bind.buffer.items); -} - -test "lazy bind" { - const gpa = testing.allocator; - - var test_context = try generateTestContext(); - defer test_context.deinit(gpa); - - var bind = LazyBind(TestContext, TestContext.Target){}; - defer bind.deinit(gpa); - - try bind.entries.append(gpa, .{ - .offset = 0x10, - .segment_id = 1, - .target = TestContext.Target{ .index = 0 }, - .addend = 0, - }); - try bind.entries.append(gpa, .{ - .offset = 0x20, - .segment_id = 2, - .target = TestContext.Target{ .index = 1 }, - .addend = 0x10, - }); - - try bind.finalize(gpa, test_context); - try testing.expectEqualSlices(u8, &[_]u8{ - macho.BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | 1, - 0x10, - macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM | 0, - 0x5f, - 0x69, - 0x6d, - 0x70, - 0x6f, - 0x72, - 0x74, - 0x5f, - 0x31, - 0x0, - macho.BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | 1, - macho.BIND_OPCODE_DO_BIND, - macho.BIND_OPCODE_DONE, - macho.BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | 2, - 0x20, - macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM | 0, - 0x5f, - 0x69, - 0x6d, - 0x70, - 0x6f, - 0x72, - 0x74, - 0x5f, - 0x32, - 0x0, - macho.BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | 1, - macho.BIND_OPCODE_SET_ADDEND_SLEB, - 0x10, - macho.BIND_OPCODE_DO_BIND, - macho.BIND_OPCODE_DONE, - }, bind.buffer.items); -} - -const std = @import("std"); -const assert = std.debug.assert; -const leb = std.leb; -const log = std.log.scoped(.dyld_info); -const macho = std.macho; -const testing = std.testing; - -const Allocator = std.mem.Allocator; diff --git a/src/link/MachO/eh_frame.zig b/src/link/MachO/eh_frame.zig index 4b51d09683..6ca7a5cd2a 100644 --- a/src/link/MachO/eh_frame.zig +++ b/src/link/MachO/eh_frame.zig @@ -1,629 +1,539 @@ -pub fn scanRelocs(macho_file: *MachO) !void { - const comp = macho_file.base.comp; - const gpa = comp.gpa; +pub const Cie = struct { + /// Includes 4byte size cell. + offset: u32, + out_offset: u32 = 0, + size: u32, + lsda_size: ?enum { p32, p64 } = null, + personality: ?Personality = null, + file: File.Index = 0, + alive: bool = false, - for (macho_file.objects.items, 0..) |*object, object_id| { - var cies = std.AutoHashMap(u32, void).init(gpa); - defer cies.deinit(); + pub fn parse(cie: *Cie, macho_file: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); - var it = object.getEhFrameRecordsIterator(); + const data = cie.getData(macho_file); + const aug = std.mem.sliceTo(@as([*:0]const u8, @ptrCast(data.ptr + 9)), 0); - for (object.exec_atoms.items) |atom_index| { - var inner_syms_it = Atom.getInnerSymbolsIterator(macho_file, atom_index); - while (inner_syms_it.next()) |sym| { - const fde_offset = object.eh_frame_records_lookup.get(sym) orelse continue; - if (object.eh_frame_relocs_lookup.get(fde_offset).?.dead) continue; - it.seekTo(fde_offset); - const fde = (it.next() catch continue).?; // We don't care about this error since we already handled it + if (aug[0] != 'z') return; // TODO should we error out? - const cie_ptr = fde.getCiePointerSource(@intCast(object_id), macho_file, fde_offset); - const cie_offset = fde_offset + 4 - cie_ptr; + var stream = std.io.fixedBufferStream(data[9 + aug.len + 1 ..]); + var creader = std.io.countingReader(stream.reader()); + const reader = creader.reader(); - if (!cies.contains(cie_offset)) { - try cies.putNoClobber(cie_offset, {}); - it.seekTo(cie_offset); - const cie = (it.next() catch continue).?; // We don't care about this error since we already handled it - try cie.scanRelocs(macho_file, @as(u32, @intCast(object_id)), cie_offset); + _ = try leb.readULEB128(u64, reader); // code alignment factor + _ = try leb.readULEB128(u64, reader); // data alignment factor + _ = try leb.readULEB128(u64, reader); // return address register + _ = try leb.readULEB128(u64, reader); // augmentation data length + + for (aug[1..]) |ch| switch (ch) { + 'R' => { + const enc = try reader.readByte(); + if (enc & 0xf != EH_PE.absptr or enc & EH_PE.pcrel == 0) { + @panic("unexpected pointer encoding"); // TODO error } - } - } - } -} - -pub fn calcSectionSize(macho_file: *MachO, unwind_info: *const UnwindInfo) error{OutOfMemory}!void { - const sect_id = macho_file.eh_frame_section_index orelse return; - const sect = &macho_file.sections.items(.header)[sect_id]; - sect.@"align" = 3; - sect.size = 0; - - const target = macho_file.base.comp.root_mod.resolved_target.result; - const cpu_arch = target.cpu.arch; - const comp = macho_file.base.comp; - const gpa = comp.gpa; - var size: u32 = 0; - - for (macho_file.objects.items, 0..) |*object, object_id| { - var cies = std.AutoHashMap(u32, u32).init(gpa); - defer cies.deinit(); - - var eh_it = object.getEhFrameRecordsIterator(); - - for (object.exec_atoms.items) |atom_index| { - var inner_syms_it = Atom.getInnerSymbolsIterator(macho_file, atom_index); - while (inner_syms_it.next()) |sym| { - const fde_record_offset = object.eh_frame_records_lookup.get(sym) orelse continue; - if (object.eh_frame_relocs_lookup.get(fde_record_offset).?.dead) continue; - - const record_id = unwind_info.records_lookup.get(sym) orelse continue; - const record = unwind_info.records.items[record_id]; - - // TODO skip this check if no __compact_unwind is present - const is_dwarf = UnwindInfo.UnwindEncoding.isDwarf(record.compactUnwindEncoding, cpu_arch); - if (!is_dwarf) continue; - - eh_it.seekTo(fde_record_offset); - const source_fde_record = (eh_it.next() catch continue).?; // We already handled this error - - const cie_ptr = source_fde_record.getCiePointerSource(@intCast(object_id), macho_file, fde_record_offset); - const cie_offset = fde_record_offset + 4 - cie_ptr; - - const gop = try cies.getOrPut(cie_offset); - if (!gop.found_existing) { - eh_it.seekTo(cie_offset); - const source_cie_record = (eh_it.next() catch continue).?; // We already handled this error - gop.value_ptr.* = size; - size += source_cie_record.getSize(); + }, + 'P' => { + const enc = try reader.readByte(); + if (enc != EH_PE.pcrel | EH_PE.indirect | EH_PE.sdata4) { + @panic("unexpected personality pointer encoding"); // TODO error } - - size += source_fde_record.getSize(); - } - } - - sect.size = size; - } -} - -pub fn write(macho_file: *MachO, unwind_info: *UnwindInfo) !void { - const sect_id = macho_file.eh_frame_section_index orelse return; - const sect = macho_file.sections.items(.header)[sect_id]; - const seg_id = macho_file.sections.items(.segment_index)[sect_id]; - const seg = macho_file.segments.items[seg_id]; - - const target = macho_file.base.comp.root_mod.resolved_target.result; - const cpu_arch = target.cpu.arch; - const comp = macho_file.base.comp; - const gpa = comp.gpa; - - var eh_records = std.AutoArrayHashMap(u32, EhFrameRecord(true)).init(gpa); - defer { - for (eh_records.values()) |*rec| { - rec.deinit(gpa); - } - eh_records.deinit(); + _ = try reader.readInt(u32, .little); // personality pointer + }, + 'L' => { + const enc = try reader.readByte(); + switch (enc & 0xf) { + EH_PE.sdata4 => cie.lsda_size = .p32, + EH_PE.absptr => cie.lsda_size = .p64, + else => unreachable, // TODO error + } + }, + else => @panic("unexpected augmentation string"), // TODO error + }; } - var eh_frame_offset: u32 = 0; - - for (macho_file.objects.items, 0..) |*object, object_id| { - try eh_records.ensureUnusedCapacity(2 * @as(u32, @intCast(object.exec_atoms.items.len))); - - var cies = std.AutoHashMap(u32, u32).init(gpa); - defer cies.deinit(); - - var eh_it = object.getEhFrameRecordsIterator(); - - for (object.exec_atoms.items) |atom_index| { - var inner_syms_it = Atom.getInnerSymbolsIterator(macho_file, atom_index); - while (inner_syms_it.next()) |reloc_target| { - const fde_record_offset = object.eh_frame_records_lookup.get(reloc_target) orelse continue; - if (object.eh_frame_relocs_lookup.get(fde_record_offset).?.dead) continue; - - const record_id = unwind_info.records_lookup.get(reloc_target) orelse continue; - const record = &unwind_info.records.items[record_id]; - - // TODO skip this check if no __compact_unwind is present - const is_dwarf = UnwindInfo.UnwindEncoding.isDwarf(record.compactUnwindEncoding, cpu_arch); - if (!is_dwarf) continue; - - eh_it.seekTo(fde_record_offset); - const source_fde_record = (eh_it.next() catch continue).?; // We already handled this error - - const cie_ptr = source_fde_record.getCiePointerSource(@intCast(object_id), macho_file, fde_record_offset); - const cie_offset = fde_record_offset + 4 - cie_ptr; - - const gop = try cies.getOrPut(cie_offset); - if (!gop.found_existing) { - eh_it.seekTo(cie_offset); - const source_cie_record = (eh_it.next() catch continue).?; // We already handled this error - var cie_record = try source_cie_record.toOwned(gpa); - try cie_record.relocate(macho_file, @as(u32, @intCast(object_id)), .{ - .source_offset = cie_offset, - .out_offset = eh_frame_offset, - .sect_addr = sect.addr, - }); - eh_records.putAssumeCapacityNoClobber(eh_frame_offset, cie_record); - gop.value_ptr.* = eh_frame_offset; - eh_frame_offset += cie_record.getSize(); - } - - var fde_record = try source_fde_record.toOwned(gpa); - try fde_record.relocate(macho_file, @as(u32, @intCast(object_id)), .{ - .source_offset = fde_record_offset, - .out_offset = eh_frame_offset, - .sect_addr = sect.addr, - }); - fde_record.setCiePointer(eh_frame_offset + 4 - gop.value_ptr.*); - - switch (cpu_arch) { - .aarch64 => {}, // relocs take care of LSDA pointers - .x86_64 => { - // We need to relocate target symbol address ourselves. - const atom_sym = macho_file.getSymbol(reloc_target); - try fde_record.setTargetSymbolAddress(atom_sym.n_value, .{ - .base_addr = sect.addr, - .base_offset = eh_frame_offset, - }); - - // We need to parse LSDA pointer and relocate ourselves. - const cie_record = eh_records.get( - eh_frame_offset + 4 - fde_record.getCiePointer(), - ).?; - const eh_frame_sect = object.getSourceSection(object.eh_frame_sect_id.?); - const source_lsda_ptr = fde_record.getLsdaPointer(cie_record, .{ - .base_addr = eh_frame_sect.addr, - .base_offset = fde_record_offset, - }) catch continue; // We already handled this error - if (source_lsda_ptr) |ptr| { - const sym_index = object.getSymbolByAddress(ptr, null); - const sym = object.symtab[sym_index]; - fde_record.setLsdaPointer(cie_record, sym.n_value, .{ - .base_addr = sect.addr, - .base_offset = eh_frame_offset, - }) catch continue; // We already handled this error - } - }, - else => unreachable, - } - - eh_records.putAssumeCapacityNoClobber(eh_frame_offset, fde_record); - - UnwindInfo.UnwindEncoding.setDwarfSectionOffset( - &record.compactUnwindEncoding, - cpu_arch, - @as(u24, @intCast(eh_frame_offset)), - ); - - const cie_record = eh_records.get( - eh_frame_offset + 4 - fde_record.getCiePointer(), - ).?; - const lsda_ptr = fde_record.getLsdaPointer(cie_record, .{ - .base_addr = sect.addr, - .base_offset = eh_frame_offset, - }) catch continue; // We already handled this error - if (lsda_ptr) |ptr| { - record.lsda = ptr - seg.vmaddr; - } - - eh_frame_offset += fde_record.getSize(); - } - } + pub inline fn getSize(cie: Cie) u32 { + return cie.size + 4; } - var buffer = std.ArrayList(u8).init(gpa); - defer buffer.deinit(); - const writer = buffer.writer(); - - for (eh_records.values()) |record| { - try writer.writeInt(u32, record.size, .little); - try buffer.appendSlice(record.data); + pub fn getObject(cie: Cie, macho_file: *MachO) *Object { + const file = macho_file.getFile(cie.file).?; + return file.object; } - try macho_file.base.file.?.pwriteAll(buffer.items, sect.offset); -} -const EhFrameRecordTag = enum { cie, fde }; + pub fn getData(cie: Cie, macho_file: *MachO) []const u8 { + const object = cie.getObject(macho_file); + return object.eh_frame_data.items[cie.offset..][0..cie.getSize()]; + } -pub fn EhFrameRecord(comptime is_mutable: bool) type { - return struct { - tag: EhFrameRecordTag, - size: u32, - data: if (is_mutable) []u8 else []const u8, + pub fn getPersonality(cie: Cie, macho_file: *MachO) ?*Symbol { + const personality = cie.personality orelse return null; + return macho_file.getSymbol(personality.index); + } - const Record = @This(); - - pub fn deinit(rec: *Record, gpa: Allocator) void { - comptime assert(is_mutable); - gpa.free(rec.data); + pub fn eql(cie: Cie, other: Cie, macho_file: *MachO) bool { + if (!std.mem.eql(u8, cie.getData(macho_file), other.getData(macho_file))) return false; + if (cie.personality != null and other.personality != null) { + if (cie.personality.?.index != other.personality.?.index) return false; } + if (cie.personality != null or other.personality != null) return false; + return true; + } - pub fn toOwned(rec: Record, gpa: Allocator) Allocator.Error!EhFrameRecord(true) { - const data = try gpa.dupe(u8, rec.data); - return EhFrameRecord(true){ - .tag = rec.tag, - .size = rec.size, - .data = data, - }; - } + pub fn format( + cie: Cie, + comptime unused_fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, + ) !void { + _ = cie; + _ = unused_fmt_string; + _ = options; + _ = writer; + @compileError("do not format CIEs directly"); + } - pub inline fn getSize(rec: Record) u32 { - return 4 + rec.size; - } + pub fn fmt(cie: Cie, macho_file: *MachO) std.fmt.Formatter(format2) { + return .{ .data = .{ + .cie = cie, + .macho_file = macho_file, + } }; + } - pub fn scanRelocs( - rec: Record, - macho_file: *MachO, - object_id: u32, - source_offset: u32, - ) !void { - if (rec.getPersonalityPointerReloc(macho_file, object_id, source_offset)) |target| { - try macho_file.addGotEntry(target); - } - } - - pub fn getTargetSymbolAddress(rec: Record, ctx: struct { - base_addr: u64, - base_offset: u64, - }) u64 { - assert(rec.tag == .fde); - const addend = mem.readInt(i64, rec.data[4..][0..8], .little); - return @as(u64, @intCast(@as(i64, @intCast(ctx.base_addr + ctx.base_offset + 8)) + addend)); - } - - pub fn setTargetSymbolAddress(rec: *Record, value: u64, ctx: struct { - base_addr: u64, - base_offset: u64, - }) !void { - assert(rec.tag == .fde); - const addend = @as(i64, @intCast(value)) - @as(i64, @intCast(ctx.base_addr + ctx.base_offset + 8)); - mem.writeInt(i64, rec.data[4..][0..8], addend, .little); - } - - pub fn getPersonalityPointerReloc( - rec: Record, - macho_file: *MachO, - object_id: u32, - source_offset: u32, - ) ?SymbolWithLoc { - const target = macho_file.base.comp.root_mod.resolved_target.result; - const cpu_arch = target.cpu.arch; - const relocs = getRelocs(macho_file, object_id, source_offset); - for (relocs) |rel| { - switch (cpu_arch) { - .aarch64 => { - const rel_type = @as(macho.reloc_type_arm64, @enumFromInt(rel.r_type)); - switch (rel_type) { - .ARM64_RELOC_SUBTRACTOR, - .ARM64_RELOC_UNSIGNED, - => continue, - .ARM64_RELOC_POINTER_TO_GOT => {}, - else => unreachable, - } - }, - .x86_64 => { - const rel_type = @as(macho.reloc_type_x86_64, @enumFromInt(rel.r_type)); - switch (rel_type) { - .X86_64_RELOC_GOT => {}, - else => unreachable, - } - }, - else => unreachable, - } - const reloc_target = Atom.parseRelocTarget(macho_file, .{ - .object_id = object_id, - .rel = rel, - .code = rec.data, - .base_offset = @as(i32, @intCast(source_offset)) + 4, - }); - return reloc_target; - } - return null; - } - - pub fn relocate(rec: *Record, macho_file: *MachO, object_id: u32, ctx: struct { - source_offset: u32, - out_offset: u32, - sect_addr: u64, - }) !void { - comptime assert(is_mutable); - - const target = macho_file.base.comp.root_mod.resolved_target.result; - const cpu_arch = target.cpu.arch; - const relocs = getRelocs(macho_file, object_id, ctx.source_offset); - - for (relocs) |rel| { - const reloc_target = Atom.parseRelocTarget(macho_file, .{ - .object_id = object_id, - .rel = rel, - .code = rec.data, - .base_offset = @as(i32, @intCast(ctx.source_offset)) + 4, - }); - const rel_offset = @as(u32, @intCast(rel.r_address - @as(i32, @intCast(ctx.source_offset)) - 4)); - const source_addr = ctx.sect_addr + rel_offset + ctx.out_offset + 4; - - switch (cpu_arch) { - .aarch64 => { - const rel_type = @as(macho.reloc_type_arm64, @enumFromInt(rel.r_type)); - switch (rel_type) { - .ARM64_RELOC_SUBTRACTOR => { - // Address of the __eh_frame in the source object file - }, - .ARM64_RELOC_POINTER_TO_GOT => { - const target_addr = macho_file.getGotEntryAddress(reloc_target).?; - const result = math.cast(i32, @as(i64, @intCast(target_addr)) - @as(i64, @intCast(source_addr))) orelse - return error.Overflow; - mem.writeInt(i32, rec.data[rel_offset..][0..4], result, .little); - }, - .ARM64_RELOC_UNSIGNED => { - assert(rel.r_extern == 1); - const target_addr = Atom.getRelocTargetAddress(macho_file, reloc_target, false); - const result = @as(i64, @intCast(target_addr)) - @as(i64, @intCast(source_addr)); - mem.writeInt(i64, rec.data[rel_offset..][0..8], @as(i64, @intCast(result)), .little); - }, - else => unreachable, - } - }, - .x86_64 => { - const rel_type = @as(macho.reloc_type_x86_64, @enumFromInt(rel.r_type)); - switch (rel_type) { - .X86_64_RELOC_GOT => { - const target_addr = macho_file.getGotEntryAddress(reloc_target).?; - const addend = mem.readInt(i32, rec.data[rel_offset..][0..4], .little); - const adjusted_target_addr = @as(u64, @intCast(@as(i64, @intCast(target_addr)) + addend)); - const disp = try Relocation.calcPcRelativeDisplacementX86(source_addr, adjusted_target_addr, 0); - mem.writeInt(i32, rec.data[rel_offset..][0..4], disp, .little); - }, - else => unreachable, - } - }, - else => unreachable, - } - } - } - - pub fn getCiePointerSource(rec: Record, object_id: u32, macho_file: *MachO, offset: u32) u32 { - assert(rec.tag == .fde); - const target = macho_file.base.comp.root_mod.resolved_target.result; - const cpu_arch = target.cpu.arch; - const addend = mem.readInt(u32, rec.data[0..4], .little); - switch (cpu_arch) { - .aarch64 => { - const relocs = getRelocs(macho_file, object_id, offset); - const maybe_rel = for (relocs) |rel| { - if (rel.r_address - @as(i32, @intCast(offset)) == 4 and - @as(macho.reloc_type_arm64, @enumFromInt(rel.r_type)) == .ARM64_RELOC_SUBTRACTOR) - break rel; - } else null; - const rel = maybe_rel orelse return addend; - const object = &macho_file.objects.items[object_id]; - const target_addr = object.in_symtab.?[rel.r_symbolnum].n_value; - const sect = object.getSourceSection(object.eh_frame_sect_id.?); - return @intCast(sect.addr + offset - target_addr + addend); - }, - .x86_64 => return addend, - else => unreachable, - } - } - - pub fn getCiePointer(rec: Record) u32 { - assert(rec.tag == .fde); - return mem.readInt(u32, rec.data[0..4], .little); - } - - pub fn setCiePointer(rec: *Record, ptr: u32) void { - assert(rec.tag == .fde); - mem.writeInt(u32, rec.data[0..4], ptr, .little); - } - - pub fn getAugmentationString(rec: Record) []const u8 { - assert(rec.tag == .cie); - return mem.sliceTo(@as([*:0]const u8, @ptrCast(rec.data.ptr + 5)), 0); - } - - pub fn getPersonalityPointer(rec: Record, ctx: struct { - base_addr: u64, - base_offset: u64, - }) !?u64 { - assert(rec.tag == .cie); - const aug_str = rec.getAugmentationString(); - - var stream = std.io.fixedBufferStream(rec.data[9 + aug_str.len ..]); - var creader = std.io.countingReader(stream.reader()); - const reader = creader.reader(); - - for (aug_str, 0..) |ch, i| switch (ch) { - 'z' => if (i > 0) { - return error.BadDwarfCfi; - } else { - _ = try leb.readULEB128(u64, reader); - }, - 'R' => { - _ = try reader.readByte(); - }, - 'P' => { - const enc = try reader.readByte(); - const offset = ctx.base_offset + 13 + aug_str.len + creader.bytes_read; - const ptr = try getEncodedPointer(enc, @as(i64, @intCast(ctx.base_addr + offset)), reader); - return ptr; - }, - 'L' => { - _ = try reader.readByte(); - }, - 'S', 'B', 'G' => {}, - else => return error.BadDwarfCfi, - }; - - return null; - } - - pub fn getLsdaPointer(rec: Record, cie: Record, ctx: struct { - base_addr: u64, - base_offset: u64, - }) !?u64 { - assert(rec.tag == .fde); - const enc = (try cie.getLsdaEncoding()) orelse return null; - var stream = std.io.fixedBufferStream(rec.data[20..]); - const reader = stream.reader(); - _ = try reader.readByte(); - const offset = ctx.base_offset + 25; - const ptr = try getEncodedPointer(enc, @as(i64, @intCast(ctx.base_addr + offset)), reader); - return ptr; - } - - pub fn setLsdaPointer(rec: *Record, cie: Record, value: u64, ctx: struct { - base_addr: u64, - base_offset: u64, - }) !void { - assert(rec.tag == .fde); - const enc = (try cie.getLsdaEncoding()) orelse unreachable; - var stream = std.io.fixedBufferStream(rec.data[21..]); - const writer = stream.writer(); - const offset = ctx.base_offset + 25; - try setEncodedPointer(enc, @as(i64, @intCast(ctx.base_addr + offset)), value, writer); - } - - fn getLsdaEncoding(rec: Record) !?u8 { - assert(rec.tag == .cie); - const aug_str = rec.getAugmentationString(); - - const base_offset = 9 + aug_str.len; - var stream = std.io.fixedBufferStream(rec.data[base_offset..]); - var creader = std.io.countingReader(stream.reader()); - const reader = creader.reader(); - - for (aug_str, 0..) |ch, i| switch (ch) { - 'z' => if (i > 0) { - return error.BadDwarfCfi; - } else { - _ = try leb.readULEB128(u64, reader); - }, - 'R' => { - _ = try reader.readByte(); - }, - 'P' => { - const enc = try reader.readByte(); - _ = try getEncodedPointer(enc, 0, reader); - }, - 'L' => { - const enc = try reader.readByte(); - return enc; - }, - 'S', 'B', 'G' => {}, - else => return error.BadDwarfCfi, - }; - - return null; - } - - fn getEncodedPointer(enc: u8, pcrel_offset: i64, reader: anytype) !?u64 { - if (enc == EH_PE.omit) return null; - - var ptr: i64 = switch (enc & 0x0F) { - EH_PE.absptr => @as(i64, @bitCast(try reader.readInt(u64, .little))), - EH_PE.udata2 => @as(i16, @bitCast(try reader.readInt(u16, .little))), - EH_PE.udata4 => @as(i32, @bitCast(try reader.readInt(u32, .little))), - EH_PE.udata8 => @as(i64, @bitCast(try reader.readInt(u64, .little))), - EH_PE.uleb128 => @as(i64, @bitCast(try leb.readULEB128(u64, reader))), - EH_PE.sdata2 => try reader.readInt(i16, .little), - EH_PE.sdata4 => try reader.readInt(i32, .little), - EH_PE.sdata8 => try reader.readInt(i64, .little), - EH_PE.sleb128 => try leb.readILEB128(i64, reader), - else => return null, - }; - - switch (enc & 0x70) { - EH_PE.absptr => {}, - EH_PE.pcrel => ptr += pcrel_offset, - EH_PE.datarel, - EH_PE.textrel, - EH_PE.funcrel, - EH_PE.aligned, - => return null, - else => return null, - } - - return @as(u64, @bitCast(ptr)); - } - - fn setEncodedPointer(enc: u8, pcrel_offset: i64, value: u64, writer: anytype) !void { - if (enc == EH_PE.omit) return; - - var actual = @as(i64, @intCast(value)); - - switch (enc & 0x70) { - EH_PE.absptr => {}, - EH_PE.pcrel => actual -= pcrel_offset, - EH_PE.datarel, - EH_PE.textrel, - EH_PE.funcrel, - EH_PE.aligned, - => unreachable, - else => unreachable, - } - - switch (enc & 0x0F) { - EH_PE.absptr => try writer.writeInt(u64, @as(u64, @bitCast(actual)), .little), - EH_PE.udata2 => try writer.writeInt(u16, @as(u16, @bitCast(@as(i16, @intCast(actual)))), .little), - EH_PE.udata4 => try writer.writeInt(u32, @as(u32, @bitCast(@as(i32, @intCast(actual)))), .little), - EH_PE.udata8 => try writer.writeInt(u64, @as(u64, @bitCast(actual)), .little), - EH_PE.uleb128 => try leb.writeULEB128(writer, @as(u64, @bitCast(actual))), - EH_PE.sdata2 => try writer.writeInt(i16, @as(i16, @intCast(actual)), .little), - EH_PE.sdata4 => try writer.writeInt(i32, @as(i32, @intCast(actual)), .little), - EH_PE.sdata8 => try writer.writeInt(i64, actual, .little), - EH_PE.sleb128 => try leb.writeILEB128(writer, actual), - else => unreachable, - } - } + const FormatContext = struct { + cie: Cie, + macho_file: *MachO, }; -} -pub fn getRelocs(macho_file: *MachO, object_id: u32, source_offset: u32) []const macho.relocation_info { - const object = &macho_file.objects.items[object_id]; - assert(object.hasEhFrameRecords()); - const urel = object.eh_frame_relocs_lookup.get(source_offset) orelse - return &[0]macho.relocation_info{}; - const all_relocs = object.getRelocs(object.eh_frame_sect_id.?); - return all_relocs[urel.reloc.start..][0..urel.reloc.len]; -} + fn format2( + ctx: FormatContext, + comptime unused_fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, + ) !void { + _ = unused_fmt_string; + _ = options; + const cie = ctx.cie; + try writer.print("@{x} : size({x})", .{ + cie.offset, + cie.getSize(), + }); + if (!cie.alive) try writer.writeAll(" : [*]"); + } + + pub const Index = u32; + + pub const Personality = struct { + index: Symbol.Index = 0, + offset: u32 = 0, + }; +}; + +pub const Fde = struct { + /// Includes 4byte size cell. + offset: u32, + out_offset: u32 = 0, + size: u32, + cie: Cie.Index, + atom: Atom.Index = 0, + atom_offset: u32 = 0, + lsda: Atom.Index = 0, + lsda_offset: u32 = 0, + lsda_ptr_offset: u32 = 0, + file: File.Index = 0, + alive: bool = true, + + pub fn parse(fde: *Fde, macho_file: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + + const data = fde.getData(macho_file); + const object = fde.getObject(macho_file); + const sect = object.sections.items(.header)[object.eh_frame_sect_index.?]; + + // Parse target atom index + const pc_begin = std.mem.readInt(i64, data[8..][0..8], .little); + const taddr: u64 = @intCast(@as(i64, @intCast(sect.addr + fde.offset + 8)) + pc_begin); + fde.atom = object.findAtom(taddr) orelse { + macho_file.base.fatal("{}: {s},{s}: 0x{x}: invalid function reference in FDE", .{ + object.fmtPath(), sect.segName(), sect.sectName(), fde.offset + 8, + }); + return error.ParseFailed; + }; + const atom = fde.getAtom(macho_file); + fde.atom_offset = @intCast(taddr - atom.getInputAddress(macho_file)); + + // Associate with a CIE + const cie_ptr = std.mem.readInt(u32, data[4..8], .little); + const cie_offset = fde.offset + 4 - cie_ptr; + const cie_index = for (object.cies.items, 0..) |cie, cie_index| { + if (cie.offset == cie_offset) break @as(Cie.Index, @intCast(cie_index)); + } else null; + if (cie_index) |cie| { + fde.cie = cie; + } else { + macho_file.base.fatal("{}: no matching CIE found for FDE at offset {x}", .{ + object.fmtPath(), + fde.offset, + }); + return error.ParseFailed; + } + + const cie = fde.getCie(macho_file); + + // Parse LSDA atom index if any + if (cie.lsda_size) |lsda_size| { + var stream = std.io.fixedBufferStream(data[24..]); + var creader = std.io.countingReader(stream.reader()); + const reader = creader.reader(); + _ = try leb.readULEB128(u64, reader); // augmentation length + fde.lsda_ptr_offset = @intCast(creader.bytes_read + 24); + const lsda_ptr = switch (lsda_size) { + .p32 => try reader.readInt(i32, .little), + .p64 => try reader.readInt(i64, .little), + }; + const lsda_addr: u64 = @intCast(@as(i64, @intCast(sect.addr + fde.offset + fde.lsda_ptr_offset)) + lsda_ptr); + fde.lsda = object.findAtom(lsda_addr) orelse { + macho_file.base.fatal("{}: {s},{s}: 0x{x}: invalid LSDA reference in FDE", .{ + object.fmtPath(), sect.segName(), sect.sectName(), fde.offset + fde.lsda_ptr_offset, + }); + return error.ParseFailed; + }; + const lsda_atom = fde.getLsdaAtom(macho_file).?; + fde.lsda_offset = @intCast(lsda_addr - lsda_atom.getInputAddress(macho_file)); + } + } + + pub inline fn getSize(fde: Fde) u32 { + return fde.size + 4; + } + + pub fn getObject(fde: Fde, macho_file: *MachO) *Object { + const file = macho_file.getFile(fde.file).?; + return file.object; + } + + pub fn getData(fde: Fde, macho_file: *MachO) []const u8 { + const object = fde.getObject(macho_file); + return object.eh_frame_data.items[fde.offset..][0..fde.getSize()]; + } + + pub fn getCie(fde: Fde, macho_file: *MachO) *const Cie { + const object = fde.getObject(macho_file); + return &object.cies.items[fde.cie]; + } + + pub fn getAtom(fde: Fde, macho_file: *MachO) *Atom { + return macho_file.getAtom(fde.atom).?; + } + + pub fn getLsdaAtom(fde: Fde, macho_file: *MachO) ?*Atom { + return macho_file.getAtom(fde.lsda); + } + + pub fn format( + fde: Fde, + comptime unused_fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, + ) !void { + _ = fde; + _ = unused_fmt_string; + _ = options; + _ = writer; + @compileError("do not format FDEs directly"); + } + + pub fn fmt(fde: Fde, macho_file: *MachO) std.fmt.Formatter(format2) { + return .{ .data = .{ + .fde = fde, + .macho_file = macho_file, + } }; + } + + const FormatContext = struct { + fde: Fde, + macho_file: *MachO, + }; + + fn format2( + ctx: FormatContext, + comptime unused_fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, + ) !void { + _ = unused_fmt_string; + _ = options; + const fde = ctx.fde; + const macho_file = ctx.macho_file; + try writer.print("@{x} : size({x}) : cie({d}) : {s}", .{ + fde.offset, + fde.getSize(), + fde.cie, + fde.getAtom(macho_file).getName(macho_file), + }); + if (!fde.alive) try writer.writeAll(" : [*]"); + } + + pub const Index = u32; +}; pub const Iterator = struct { data: []const u8, pos: u32 = 0, - pub fn next(it: *Iterator) !?EhFrameRecord(false) { + pub const Record = struct { + tag: enum { fde, cie }, + offset: u32, + size: u32, + }; + + pub fn next(it: *Iterator) !?Record { if (it.pos >= it.data.len) return null; var stream = std.io.fixedBufferStream(it.data[it.pos..]); const reader = stream.reader(); const size = try reader.readInt(u32, .little); - if (size == 0xFFFFFFFF) { - log.debug("MachO doesn't support 64bit DWARF CFI __eh_frame records", .{}); - return error.BadDwarfCfi; - } + if (size == 0xFFFFFFFF) @panic("DWARF CFI is 32bit on macOS"); const id = try reader.readInt(u32, .little); - const tag: EhFrameRecordTag = if (id == 0) .cie else .fde; - const offset: u32 = 4; - const record = EhFrameRecord(false){ - .tag = tag, + const record = Record{ + .tag = if (id == 0) .cie else .fde, + .offset = it.pos, .size = size, - .data = it.data[it.pos + offset ..][0..size], }; - - it.pos += size + offset; + it.pos += size + 4; return record; } - - pub fn reset(it: *Iterator) void { - it.pos = 0; - } - - pub fn seekTo(it: *Iterator, pos: u32) void { - assert(pos >= 0 and pos < it.data.len); - it.pos = pos; - } }; +pub fn calcSize(macho_file: *MachO) !u32 { + const tracy = trace(@src()); + defer tracy.end(); + + var offset: u32 = 0; + + var cies = std.ArrayList(Cie).init(macho_file.base.allocator); + defer cies.deinit(); + + for (macho_file.objects.items) |index| { + const object = macho_file.getFile(index).?.object; + + outer: for (object.cies.items) |*cie| { + for (cies.items) |other| { + if (other.eql(cie.*, macho_file)) { + // We already have a CIE record that has the exact same contents, so instead of + // duplicating them, we mark this one dead and set its output offset to be + // equal to that of the alive record. This way, we won't have to rewrite + // Fde.cie_index field when committing the records to file. + cie.out_offset = other.out_offset; + continue :outer; + } + } + cie.alive = true; + cie.out_offset = offset; + offset += cie.getSize(); + try cies.append(cie.*); + } + } + + for (macho_file.objects.items) |index| { + const object = macho_file.getFile(index).?.object; + for (object.fdes.items) |*fde| { + if (!fde.alive) continue; + fde.out_offset = offset; + offset += fde.getSize(); + } + } + + return offset; +} + +pub fn calcNumRelocs(macho_file: *MachO) u32 { + const tracy = trace(@src()); + defer tracy.end(); + + var nreloc: u32 = 0; + + for (macho_file.objects.items) |index| { + const object = macho_file.getFile(index).?.object; + for (object.cies.items) |cie| { + if (!cie.alive) continue; + if (cie.getPersonality(macho_file)) |_| { + nreloc += 1; // personality + } + } + } + + return nreloc; +} + +pub fn write(macho_file: *MachO, buffer: []u8) void { + const tracy = trace(@src()); + defer tracy.end(); + + const sect = macho_file.sections.items(.header)[macho_file.eh_frame_sect_index.?]; + const addend: i64 = switch (macho_file.options.cpu_arch.?) { + .x86_64 => 4, + else => 0, + }; + + for (macho_file.objects.items) |index| { + const object = macho_file.getFile(index).?.object; + for (object.cies.items) |cie| { + if (!cie.alive) continue; + + @memcpy(buffer[cie.out_offset..][0..cie.getSize()], cie.getData(macho_file)); + + if (cie.getPersonality(macho_file)) |sym| { + const offset = cie.out_offset + cie.personality.?.offset; + const saddr = sect.addr + offset; + const taddr = sym.getGotAddress(macho_file); + std.mem.writeInt( + i32, + buffer[offset..][0..4], + @intCast(@as(i64, @intCast(taddr)) - @as(i64, @intCast(saddr)) + addend), + .little, + ); + } + } + } + + for (macho_file.objects.items) |index| { + const object = macho_file.getFile(index).?.object; + for (object.fdes.items) |fde| { + if (!fde.alive) continue; + + @memcpy(buffer[fde.out_offset..][0..fde.getSize()], fde.getData(macho_file)); + + { + const offset = fde.out_offset + 4; + const value = offset - fde.getCie(macho_file).out_offset; + std.mem.writeInt(u32, buffer[offset..][0..4], value, .little); + } + + { + const offset = fde.out_offset + 8; + const saddr = sect.addr + offset; + const taddr = fde.getAtom(macho_file).value; + std.mem.writeInt( + i64, + buffer[offset..][0..8], + @as(i64, @intCast(taddr)) - @as(i64, @intCast(saddr)), + .little, + ); + } + + if (fde.getLsdaAtom(macho_file)) |atom| { + const offset = fde.out_offset + fde.lsda_offset; + const saddr = sect.addr + offset; + const taddr = atom.value; + switch (fde.getCie(macho_file).lsda_size.?) { + .p32 => std.mem.writeInt( + i32, + buffer[offset..][0..4], + @intCast(@as(i64, @intCast(taddr)) - @as(i64, @intCast(saddr)) + addend), + .little, + ), + .p64 => std.mem.writeInt( + i64, + buffer[offset..][0..8], + @as(i64, @intCast(taddr)) - @as(i64, @intCast(saddr)), + .little, + ), + } + } + } + } +} + +pub fn writeRelocs(macho_file: *MachO, code: []u8, relocs: *std.ArrayList(macho.relocation_info)) error{Overflow}!void { + const tracy = trace(@src()); + defer tracy.end(); + + const cpu_arch = macho_file.options.cpu_arch.?; + const sect = macho_file.sections.items(.header)[macho_file.eh_frame_sect_index.?]; + const addend: i64 = switch (cpu_arch) { + .x86_64 => 4, + else => 0, + }; + + for (macho_file.objects.items) |index| { + const object = macho_file.getFile(index).?.object; + for (object.cies.items) |cie| { + if (!cie.alive) continue; + + @memcpy(code[cie.out_offset..][0..cie.getSize()], cie.getData(macho_file)); + + if (cie.getPersonality(macho_file)) |sym| { + const r_address = math.cast(i32, cie.out_offset + cie.personality.?.offset) orelse return error.Overflow; + const r_symbolnum = math.cast(u24, sym.getOutputSymtabIndex(macho_file).?) orelse return error.Overflow; + relocs.appendAssumeCapacity(.{ + .r_address = r_address, + .r_symbolnum = r_symbolnum, + .r_length = 2, + .r_extern = 1, + .r_pcrel = 1, + .r_type = switch (cpu_arch) { + .aarch64 => @intFromEnum(macho.reloc_type_arm64.ARM64_RELOC_POINTER_TO_GOT), + .x86_64 => @intFromEnum(macho.reloc_type_x86_64.X86_64_RELOC_GOT), + else => unreachable, + }, + }); + } + } + } + + for (macho_file.objects.items) |index| { + const object = macho_file.getFile(index).?.object; + for (object.fdes.items) |fde| { + if (!fde.alive) continue; + + @memcpy(code[fde.out_offset..][0..fde.getSize()], fde.getData(macho_file)); + + { + const offset = fde.out_offset + 4; + const value = offset - fde.getCie(macho_file).out_offset; + std.mem.writeInt(u32, code[offset..][0..4], value, .little); + } + + { + const offset = fde.out_offset + 8; + const saddr = sect.addr + offset; + const taddr = fde.getAtom(macho_file).value; + std.mem.writeInt( + i64, + code[offset..][0..8], + @as(i64, @intCast(taddr)) - @as(i64, @intCast(saddr)), + .little, + ); + } + + if (fde.getLsdaAtom(macho_file)) |atom| { + const offset = fde.out_offset + fde.lsda_ptr_offset; + const saddr = sect.addr + offset; + const taddr = atom.value + fde.lsda_offset; + switch (fde.getCie(macho_file).lsda_size.?) { + .p32 => std.mem.writeInt( + i32, + code[offset..][0..4], + @intCast(@as(i64, @intCast(taddr)) - @as(i64, @intCast(saddr)) + addend), + .little, + ), + .p64 => std.mem.writeInt( + i64, + code[offset..][0..8], + @as(i64, @intCast(taddr)) - @as(i64, @intCast(saddr)), + .little, + ), + } + } + } + } +} + pub const EH_PE = struct { pub const absptr = 0x00; pub const uleb128 = 0x01; @@ -643,17 +553,17 @@ pub const EH_PE = struct { pub const omit = 0xFF; }; -const std = @import("std"); const assert = std.debug.assert; +const leb = std.leb; const macho = std.macho; const math = std.math; const mem = std.mem; -const leb = std.leb; -const log = std.log.scoped(.eh_frame); +const std = @import("std"); +const trace = @import("../tracy.zig").trace; -const Allocator = mem.Allocator; +const Allocator = std.mem.Allocator; const Atom = @import("Atom.zig"); +const File = @import("file.zig").File; const MachO = @import("../MachO.zig"); -const Relocation = @import("Relocation.zig"); -const SymbolWithLoc = MachO.SymbolWithLoc; -const UnwindInfo = @import("UnwindInfo.zig"); +const Object = @import("Object.zig"); +const Symbol = @import("Symbol.zig"); diff --git a/src/link/MachO/fat.zig b/src/link/MachO/fat.zig index fcaca7d99a..46cf0139df 100644 --- a/src/link/MachO/fat.zig +++ b/src/link/MachO/fat.zig @@ -1,3 +1,13 @@ +const std = @import("std"); +const assert = std.debug.assert; +const builtin = @import("builtin"); +const log = std.log.scoped(.macho); +const macho = std.macho; +const mem = std.mem; +const native_endian = builtin.target.cpu.arch.endian(); + +const MachO = @import("../MachO.zig"); + pub fn isFatLibrary(file: std.fs.File) bool { const reader = file.reader(); const hdr = reader.readStructEndian(macho.fat_header, .big) catch return false; @@ -7,18 +17,16 @@ pub fn isFatLibrary(file: std.fs.File) bool { pub const Arch = struct { tag: std.Target.Cpu.Arch, - offset: u64, + offset: u32, + size: u32, }; -/// Caller owns the memory. -pub fn parseArchs(gpa: Allocator, file: std.fs.File) ![]const Arch { +pub fn parseArchs(file: std.fs.File, buffer: *[2]Arch) ![]const Arch { const reader = file.reader(); const fat_header = try reader.readStructEndian(macho.fat_header, .big); assert(fat_header.magic == macho.FAT_MAGIC); - var archs = try std.ArrayList(Arch).initCapacity(gpa, fat_header.nfat_arch); - defer archs.deinit(); - + var count: usize = 0; var fat_arch_index: u32 = 0; while (fat_arch_index < fat_header.nfat_arch) : (fat_arch_index += 1) { const fat_arch = try reader.readStructEndian(macho.fat_arch, .big); @@ -29,16 +37,9 @@ pub fn parseArchs(gpa: Allocator, file: std.fs.File) ![]const Arch { macho.CPU_TYPE_X86_64 => if (fat_arch.cpusubtype == macho.CPU_SUBTYPE_X86_64_ALL) .x86_64 else continue, else => continue, }; - - archs.appendAssumeCapacity(.{ .tag = arch, .offset = fat_arch.offset }); + buffer[count] = .{ .tag = arch, .offset = fat_arch.offset, .size = fat_arch.size }; + count += 1; } - return archs.toOwnedSlice(); + return buffer[0..count]; } - -const std = @import("std"); -const assert = std.debug.assert; -const log = std.log.scoped(.archive); -const macho = std.macho; -const mem = std.mem; -const Allocator = mem.Allocator; diff --git a/src/link/MachO/file.zig b/src/link/MachO/file.zig new file mode 100644 index 0000000000..9e19bed7df --- /dev/null +++ b/src/link/MachO/file.zig @@ -0,0 +1,116 @@ +pub const File = union(enum) { + internal: *InternalObject, + object: *Object, + dylib: *Dylib, + + pub fn getIndex(file: File) Index { + return switch (file) { + inline else => |x| x.index, + }; + } + + pub fn fmtPath(file: File) std.fmt.Formatter(formatPath) { + return .{ .data = file }; + } + + fn formatPath( + file: File, + comptime unused_fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, + ) !void { + _ = unused_fmt_string; + _ = options; + switch (file) { + .internal => try writer.writeAll(""), + .object => |x| try writer.print("{}", .{x.fmtPath()}), + .dylib => |x| try writer.writeAll(x.path), + } + } + + pub fn resolveSymbols(file: File, macho_file: *MachO) void { + switch (file) { + .internal => unreachable, + inline else => |x| x.resolveSymbols(macho_file), + } + } + + pub fn resetGlobals(file: File, macho_file: *MachO) void { + switch (file) { + .internal => unreachable, + inline else => |x| x.resetGlobals(macho_file), + } + } + + /// Encodes symbol rank so that the following ordering applies: + /// * strong in object + /// * weak in object + /// * tentative in object + /// * strong in archive/dylib + /// * weak in archive/dylib + /// * tentative in archive + /// * unclaimed + pub fn getSymbolRank(file: File, args: struct { + archive: bool = false, + weak: bool = false, + tentative: bool = false, + }) u32 { + if (file == .object and !args.archive) { + const base: u32 = blk: { + if (args.tentative) break :blk 3; + break :blk if (args.weak) 2 else 1; + }; + return (base << 16) + file.getIndex(); + } + const base: u32 = blk: { + if (args.tentative) break :blk 3; + break :blk if (args.weak) 2 else 1; + }; + return base + (file.getIndex() << 24); + } + + pub fn getSymbols(file: File) []const Symbol.Index { + return switch (file) { + inline else => |x| x.symbols.items, + }; + } + + pub fn getAtoms(file: File) []const Atom.Index { + return switch (file) { + .dylib => unreachable, + inline else => |x| x.atoms.items, + }; + } + + pub fn calcSymtabSize(file: File, macho_file: *MachO) !void { + return switch (file) { + inline else => |x| x.calcSymtabSize(macho_file), + }; + } + + pub fn writeSymtab(file: File, macho_file: *MachO) void { + return switch (file) { + inline else => |x| x.writeSymtab(macho_file), + }; + } + + pub const Index = u32; + + pub const Entry = union(enum) { + null: void, + internal: InternalObject, + object: Object, + dylib: Dylib, + }; +}; + +const macho = std.macho; +const std = @import("std"); + +const Allocator = std.mem.Allocator; +const Atom = @import("Atom.zig"); +const InternalObject = @import("InternalObject.zig"); +const MachO = @import("../MachO.zig"); +const Object = @import("Object.zig"); +const Dylib = @import("Dylib.zig"); +const Symbol = @import("Symbol.zig"); diff --git a/src/link/MachO/hasher.zig b/src/link/MachO/hasher.zig index 45847689f3..95faaf3a92 100644 --- a/src/link/MachO/hasher.zig +++ b/src/link/MachO/hasher.zig @@ -9,15 +9,14 @@ pub fn ParallelHasher(comptime Hasher: type) type { chunk_size: u64 = 0x4000, max_file_size: ?u64 = null, }) !void { + const tracy = trace(@src()); + defer tracy.end(); + var wg: WaitGroup = .{}; - const file_size = blk: { - const file_size = opts.max_file_size orelse try file.getEndPos(); - break :blk std.math.cast(usize, file_size) orelse return error.Overflow; - }; - const chunk_size = std.math.cast(usize, opts.chunk_size) orelse return error.Overflow; + const file_size = opts.max_file_size orelse try file.getEndPos(); - const buffer = try self.allocator.alloc(u8, chunk_size * out.len); + const buffer = try self.allocator.alloc(u8, opts.chunk_size * out.len); defer self.allocator.free(buffer); const results = try self.allocator.alloc(fs.File.PReadError!usize, out.len); @@ -28,8 +27,11 @@ pub fn ParallelHasher(comptime Hasher: type) type { defer wg.wait(); for (out, results, 0..) |*out_buf, *result, i| { - const fstart = i * chunk_size; - const fsize = if (fstart + chunk_size > file_size) file_size - fstart else chunk_size; + const fstart = i * opts.chunk_size; + const fsize = if (fstart + opts.chunk_size > file_size) + file_size - fstart + else + opts.chunk_size; wg.start(); try self.thread_pool.spawn(worker, .{ file, @@ -61,10 +63,11 @@ pub fn ParallelHasher(comptime Hasher: type) type { }; } -const std = @import("std"); const assert = std.debug.assert; const fs = std.fs; const mem = std.mem; +const std = @import("std"); +const trace = @import("../tracy.zig").trace; const Allocator = mem.Allocator; const ThreadPool = std.Thread.Pool; diff --git a/src/link/MachO/load_commands.zig b/src/link/MachO/load_commands.zig index e155a7a8ed..725bd4291f 100644 --- a/src/link/MachO/load_commands.zig +++ b/src/link/MachO/load_commands.zig @@ -1,4 +1,14 @@ -/// Default path to dyld. +const std = @import("std"); +const assert = std.debug.assert; +const log = std.log.scoped(.link); +const macho = std.macho; +const mem = std.mem; + +const Allocator = mem.Allocator; +const Dylib = @import("Dylib.zig"); +const MachO = @import("../MachO.zig"); +const Options = @import("../MachO.zig").Options; + pub const default_dyld_path: [*:0]const u8 = "/usr/lib/dyld"; fn calcInstallNameLen(cmd_size: u64, name: []const u8, assume_max_path_len: bool) u64 { @@ -7,31 +17,20 @@ fn calcInstallNameLen(cmd_size: u64, name: []const u8, assume_max_path_len: bool return mem.alignForward(u64, cmd_size + name_len, @alignOf(u64)); } -const CalcLCsSizeCtx = struct { - segments: []const macho.segment_command_64, - dylibs: []const Dylib, - referenced_dylibs: []u16, - wants_function_starts: bool = true, -}; - -fn calcLCsSize(m: *MachO, ctx: CalcLCsSizeCtx, assume_max_path_len: bool) !u32 { - const comp = m.base.comp; - const gpa = comp.gpa; - var has_text_segment: bool = false; +pub fn calcLoadCommandsSize(macho_file: *MachO, assume_max_path_len: bool) u32 { + const options = &macho_file.options; var sizeofcmds: u64 = 0; - for (ctx.segments) |seg| { - sizeofcmds += seg.nsects * @sizeOf(macho.section_64) + @sizeOf(macho.segment_command_64); - if (mem.eql(u8, seg.segName(), "__TEXT")) { - has_text_segment = true; - } + + // LC_SEGMENT_64 + sizeofcmds += @sizeOf(macho.segment_command_64) * macho_file.segments.items.len; + for (macho_file.segments.items) |seg| { + sizeofcmds += seg.nsects * @sizeOf(macho.section_64); } // LC_DYLD_INFO_ONLY sizeofcmds += @sizeOf(macho.dyld_info_command); // LC_FUNCTION_STARTS - if (has_text_segment and ctx.wants_function_starts) { - sizeofcmds += @sizeOf(macho.linkedit_data_command); - } + sizeofcmds += @sizeOf(macho.linkedit_data_command); // LC_DATA_IN_CODE sizeofcmds += @sizeOf(macho.linkedit_data_command); // LC_SYMTAB @@ -45,15 +44,14 @@ fn calcLCsSize(m: *MachO, ctx: CalcLCsSizeCtx, assume_max_path_len: bool) !u32 { false, ); // LC_MAIN - if (comp.config.output_mode == .Exe) { + if (!options.dylib) { sizeofcmds += @sizeOf(macho.entry_point_command); } // LC_ID_DYLIB - if (comp.config.output_mode == .Lib and comp.config.link_mode == .Dynamic) { + if (options.dylib) { sizeofcmds += blk: { - const emit = m.base.emit; - const install_name = m.install_name orelse try emit.directory.join(gpa, &.{emit.sub_path}); - defer if (m.install_name == null) gpa.free(install_name); + const emit = options.emit; + const install_name = options.install_name orelse emit.sub_path; break :blk calcInstallNameLen( @sizeOf(macho.dylib_command), install_name, @@ -63,9 +61,7 @@ fn calcLCsSize(m: *MachO, ctx: CalcLCsSizeCtx, assume_max_path_len: bool) !u32 { } // LC_RPATH { - var it = RpathIterator.init(gpa, m.base.rpath_list); - defer it.deinit(); - while (try it.next()) |rpath| { + for (options.rpath_list) |rpath| { sizeofcmds += calcInstallNameLen( @sizeOf(macho.rpath_command), rpath, @@ -75,24 +71,22 @@ fn calcLCsSize(m: *MachO, ctx: CalcLCsSizeCtx, assume_max_path_len: bool) !u32 { } // LC_SOURCE_VERSION sizeofcmds += @sizeOf(macho.source_version_command); - // LC_BUILD_VERSION or LC_VERSION_MIN_ or nothing - { - const target = comp.root_mod.resolved_target.result; - const platform = Platform.fromTarget(target); + if (options.platform) |platform| { if (platform.isBuildVersionCompatible()) { // LC_BUILD_VERSION sizeofcmds += @sizeOf(macho.build_version_command) + @sizeOf(macho.build_tool_version); - } else if (platform.isVersionMinCompatible()) { - // LC_VERSION_MIN_ + } else { + // LC_VERSION_MIN_* sizeofcmds += @sizeOf(macho.version_min_command); } } // LC_UUID sizeofcmds += @sizeOf(macho.uuid_command); // LC_LOAD_DYLIB - for (ctx.referenced_dylibs) |id| { - const dylib = ctx.dylibs[id]; - const dylib_id = dylib.id orelse unreachable; + for (macho_file.dylibs.items) |index| { + const dylib = macho_file.getFile(index).?.dylib; + assert(dylib.isAlive(macho_file)); + const dylib_id = dylib.id.?; sizeofcmds += calcInstallNameLen( @sizeOf(macho.dylib_command), dylib_id.name, @@ -100,19 +94,52 @@ fn calcLCsSize(m: *MachO, ctx: CalcLCsSizeCtx, assume_max_path_len: bool) !u32 { ); } // LC_CODE_SIGNATURE - if (m.requiresCodeSignature()) { + if (macho_file.requiresCodeSig()) { sizeofcmds += @sizeOf(macho.linkedit_data_command); } - return @intCast(sizeofcmds); + return @as(u32, @intCast(sizeofcmds)); } -pub fn calcMinHeaderPad(m: *MachO, ctx: CalcLCsSizeCtx) !u64 { - var padding: u32 = (try calcLCsSize(m, ctx, false)) + m.headerpad_size; +pub fn calcLoadCommandsSizeObject(macho_file: *MachO) u32 { + const options = &macho_file.options; + var sizeofcmds: u64 = 0; + + // LC_SEGMENT_64 + { + assert(macho_file.segments.items.len == 1); + sizeofcmds += @sizeOf(macho.segment_command_64); + const seg = macho_file.segments.items[0]; + sizeofcmds += seg.nsects * @sizeOf(macho.section_64); + } + + // LC_DATA_IN_CODE + sizeofcmds += @sizeOf(macho.linkedit_data_command); + // LC_SYMTAB + sizeofcmds += @sizeOf(macho.symtab_command); + // LC_DYSYMTAB + sizeofcmds += @sizeOf(macho.dysymtab_command); + + if (options.platform) |platform| { + if (platform.isBuildVersionCompatible()) { + // LC_BUILD_VERSION + sizeofcmds += @sizeOf(macho.build_version_command) + @sizeOf(macho.build_tool_version); + } else { + // LC_VERSION_MIN_* + sizeofcmds += @sizeOf(macho.version_min_command); + } + } + + return @as(u32, @intCast(sizeofcmds)); +} + +pub fn calcMinHeaderPadSize(macho_file: *MachO) u32 { + const options = &macho_file.options; + var padding: u32 = calcLoadCommandsSize(macho_file, false) + (options.headerpad orelse 0); log.debug("minimum requested headerpad size 0x{x}", .{padding + @sizeOf(macho.mach_header_64)}); - if (m.headerpad_max_install_names) { - const min_headerpad_size: u32 = try calcLCsSize(m, ctx, true); + if (options.headerpad_max_install_names) { + const min_headerpad_size: u32 = calcLoadCommandsSize(macho_file, true); log.debug("headerpad_max_install_names minimum headerpad size 0x{x}", .{ min_headerpad_size + @sizeOf(macho.mach_header_64), }); @@ -125,34 +152,22 @@ pub fn calcMinHeaderPad(m: *MachO, ctx: CalcLCsSizeCtx) !u64 { return offset; } -pub fn calcNumOfLCs(lc_buffer: []const u8) u32 { - var ncmds: u32 = 0; - var pos: usize = 0; - while (true) { - if (pos >= lc_buffer.len) break; - const cmd = @as(*align(1) const macho.load_command, @ptrCast(lc_buffer.ptr + pos)).*; - ncmds += 1; - pos += cmd.cmdsize; - } - return ncmds; -} - -pub fn writeDylinkerLC(lc_writer: anytype) !void { +pub fn writeDylinkerLC(writer: anytype) !void { const name_len = mem.sliceTo(default_dyld_path, 0).len; const cmdsize = @as(u32, @intCast(mem.alignForward( u64, @sizeOf(macho.dylinker_command) + name_len, @sizeOf(u64), ))); - try lc_writer.writeStruct(macho.dylinker_command{ + try writer.writeStruct(macho.dylinker_command{ .cmd = .LOAD_DYLINKER, .cmdsize = cmdsize, .name = @sizeOf(macho.dylinker_command), }); - try lc_writer.writeAll(mem.sliceTo(default_dyld_path, 0)); + try writer.writeAll(mem.sliceTo(default_dyld_path, 0)); const padding = cmdsize - @sizeOf(macho.dylinker_command) - name_len; if (padding > 0) { - try lc_writer.writeByteNTimes(0, padding); + try writer.writeByteNTimes(0, padding); } } @@ -164,14 +179,14 @@ const WriteDylibLCCtx = struct { compatibility_version: u32 = 0x10000, }; -fn writeDylibLC(ctx: WriteDylibLCCtx, lc_writer: anytype) !void { +pub fn writeDylibLC(ctx: WriteDylibLCCtx, writer: anytype) !void { const name_len = ctx.name.len + 1; const cmdsize = @as(u32, @intCast(mem.alignForward( u64, @sizeOf(macho.dylib_command) + name_len, @sizeOf(u64), ))); - try lc_writer.writeStruct(macho.dylib_command{ + try writer.writeStruct(macho.dylib_command{ .cmd = ctx.cmd, .cmdsize = cmdsize, .dylib = .{ @@ -181,392 +196,75 @@ fn writeDylibLC(ctx: WriteDylibLCCtx, lc_writer: anytype) !void { .compatibility_version = ctx.compatibility_version, }, }); - try lc_writer.writeAll(ctx.name); - try lc_writer.writeByte(0); + try writer.writeAll(ctx.name); + try writer.writeByte(0); const padding = cmdsize - @sizeOf(macho.dylib_command) - name_len; if (padding > 0) { - try lc_writer.writeByteNTimes(0, padding); + try writer.writeByteNTimes(0, padding); } } -pub fn writeDylibIdLC(macho_file: *MachO, lc_writer: anytype) !void { - const comp = macho_file.base.comp; - const gpa = comp.gpa; - assert(comp.config.output_mode == .Lib and comp.config.link_mode == .Dynamic); - const emit = macho_file.base.emit; - const install_name = macho_file.install_name orelse - try emit.directory.join(gpa, &.{emit.sub_path}); - defer if (macho_file.install_name == null) gpa.free(install_name); - const curr = comp.version orelse std.SemanticVersion{ - .major = 1, - .minor = 0, - .patch = 0, - }; - const compat = macho_file.compatibility_version orelse std.SemanticVersion{ - .major = 1, - .minor = 0, - .patch = 0, - }; +pub fn writeDylibIdLC(options: *const Options, writer: anytype) !void { + assert(options.dylib); + const emit = options.emit; + const install_name = options.install_name orelse emit.sub_path; + const curr = options.current_version orelse Options.Version.new(1, 0, 0); + const compat = options.compatibility_version orelse Options.Version.new(1, 0, 0); try writeDylibLC(.{ .cmd = .ID_DYLIB, .name = install_name, - .current_version = @as(u32, @intCast(curr.major << 16 | curr.minor << 8 | curr.patch)), - .compatibility_version = @as(u32, @intCast(compat.major << 16 | compat.minor << 8 | compat.patch)), - }, lc_writer); + .current_version = curr.value, + .compatibility_version = compat.value, + }, writer); } -const RpathIterator = struct { - buffer: []const []const u8, - table: std.StringHashMap(void), - count: usize = 0, - - fn init(gpa: Allocator, rpaths: []const []const u8) RpathIterator { - return .{ .buffer = rpaths, .table = std.StringHashMap(void).init(gpa) }; - } - - fn deinit(it: *RpathIterator) void { - it.table.deinit(); - } - - fn next(it: *RpathIterator) !?[]const u8 { - while (true) { - if (it.count >= it.buffer.len) return null; - const rpath = it.buffer[it.count]; - it.count += 1; - const gop = try it.table.getOrPut(rpath); - if (gop.found_existing) continue; - return rpath; - } - } -}; - -pub fn writeRpathLCs(macho_file: *MachO, lc_writer: anytype) !void { - const comp = macho_file.base.comp; - const gpa = comp.gpa; - - var it = RpathIterator.init(gpa, macho_file.base.rpath_list); - defer it.deinit(); - - while (try it.next()) |rpath| { +pub fn writeRpathLCs(rpaths: []const []const u8, writer: anytype) !void { + for (rpaths) |rpath| { const rpath_len = rpath.len + 1; const cmdsize = @as(u32, @intCast(mem.alignForward( u64, @sizeOf(macho.rpath_command) + rpath_len, @sizeOf(u64), ))); - try lc_writer.writeStruct(macho.rpath_command{ + try writer.writeStruct(macho.rpath_command{ .cmdsize = cmdsize, .path = @sizeOf(macho.rpath_command), }); - try lc_writer.writeAll(rpath); - try lc_writer.writeByte(0); + try writer.writeAll(rpath); + try writer.writeByte(0); const padding = cmdsize - @sizeOf(macho.rpath_command) - rpath_len; if (padding > 0) { - try lc_writer.writeByteNTimes(0, padding); + try writer.writeByteNTimes(0, padding); } } } -pub fn writeVersionMinLC(platform: Platform, sdk_version: ?std.SemanticVersion, lc_writer: anytype) !void { - const cmd: macho.LC = switch (platform.os_tag) { - .macos => .VERSION_MIN_MACOSX, - .ios => .VERSION_MIN_IPHONEOS, - .tvos => .VERSION_MIN_TVOS, - .watchos => .VERSION_MIN_WATCHOS, +pub fn writeVersionMinLC(platform: Options.Platform, sdk_version: ?Options.Version, writer: anytype) !void { + const cmd: macho.LC = switch (platform.platform) { + .MACOS => .VERSION_MIN_MACOSX, + .IOS, .IOSSIMULATOR => .VERSION_MIN_IPHONEOS, + .TVOS, .TVOSSIMULATOR => .VERSION_MIN_TVOS, + .WATCHOS, .WATCHOSSIMULATOR => .VERSION_MIN_WATCHOS, else => unreachable, }; - try lc_writer.writeAll(mem.asBytes(&macho.version_min_command{ + try writer.writeAll(mem.asBytes(&macho.version_min_command{ .cmd = cmd, - .version = platform.toAppleVersion(), - .sdk = if (sdk_version) |ver| semanticVersionToAppleVersion(ver) else platform.toAppleVersion(), + .version = platform.version.value, + .sdk = if (sdk_version) |ver| ver.value else platform.version.value, })); } -pub fn writeBuildVersionLC(platform: Platform, sdk_version: ?std.SemanticVersion, lc_writer: anytype) !void { +pub fn writeBuildVersionLC(platform: Options.Platform, sdk_version: ?Options.Version, writer: anytype) !void { const cmdsize = @sizeOf(macho.build_version_command) + @sizeOf(macho.build_tool_version); - try lc_writer.writeStruct(macho.build_version_command{ + try writer.writeStruct(macho.build_version_command{ .cmdsize = cmdsize, - .platform = platform.toApplePlatform(), - .minos = platform.toAppleVersion(), - .sdk = if (sdk_version) |ver| semanticVersionToAppleVersion(ver) else platform.toAppleVersion(), + .platform = platform.platform, + .minos = platform.version.value, + .sdk = if (sdk_version) |ver| ver.value else platform.version.value, .ntools = 1, }); - try lc_writer.writeAll(mem.asBytes(&macho.build_tool_version{ - .tool = .ZIG, + try writer.writeAll(mem.asBytes(&macho.build_tool_version{ + .tool = @as(macho.TOOL, @enumFromInt(0x6)), .version = 0x0, })); } - -pub fn writeLoadDylibLCs(dylibs: []const Dylib, referenced: []u16, lc_writer: anytype) !void { - for (referenced) |index| { - const dylib = dylibs[index]; - const dylib_id = dylib.id orelse unreachable; - try writeDylibLC(.{ - .cmd = if (dylib.weak) .LOAD_WEAK_DYLIB else .LOAD_DYLIB, - .name = dylib_id.name, - .timestamp = dylib_id.timestamp, - .current_version = dylib_id.current_version, - .compatibility_version = dylib_id.compatibility_version, - }, lc_writer); - } -} - -pub const Platform = struct { - os_tag: std.Target.Os.Tag, - abi: std.Target.Abi, - version: std.SemanticVersion, - - /// Using Apple's ld64 as our blueprint, `min_version` as well as `sdk_version` are set to - /// the extracted minimum platform version. - pub fn fromLoadCommand(lc: macho.LoadCommandIterator.LoadCommand) Platform { - switch (lc.cmd()) { - .BUILD_VERSION => { - const cmd = lc.cast(macho.build_version_command).?; - return .{ - .os_tag = switch (cmd.platform) { - .MACOS => .macos, - .IOS, .IOSSIMULATOR => .ios, - .TVOS, .TVOSSIMULATOR => .tvos, - .WATCHOS, .WATCHOSSIMULATOR => .watchos, - else => @panic("TODO"), - }, - .abi = switch (cmd.platform) { - .IOSSIMULATOR, - .TVOSSIMULATOR, - .WATCHOSSIMULATOR, - => .simulator, - else => .none, - }, - .version = appleVersionToSemanticVersion(cmd.minos), - }; - }, - .VERSION_MIN_MACOSX, - .VERSION_MIN_IPHONEOS, - .VERSION_MIN_TVOS, - .VERSION_MIN_WATCHOS, - => { - const cmd = lc.cast(macho.version_min_command).?; - return .{ - .os_tag = switch (lc.cmd()) { - .VERSION_MIN_MACOSX => .macos, - .VERSION_MIN_IPHONEOS => .ios, - .VERSION_MIN_TVOS => .tvos, - .VERSION_MIN_WATCHOS => .watchos, - else => unreachable, - }, - .abi = .none, - .version = appleVersionToSemanticVersion(cmd.version), - }; - }, - else => unreachable, - } - } - - pub fn fromTarget(target: std.Target) Platform { - return .{ - .os_tag = target.os.tag, - .abi = target.abi, - .version = target.os.version_range.semver.min, - }; - } - - pub fn toAppleVersion(plat: Platform) u32 { - return semanticVersionToAppleVersion(plat.version); - } - - pub fn toApplePlatform(plat: Platform) macho.PLATFORM { - return switch (plat.os_tag) { - .macos => .MACOS, - .ios => if (plat.abi == .simulator) .IOSSIMULATOR else .IOS, - .tvos => if (plat.abi == .simulator) .TVOSSIMULATOR else .TVOS, - .watchos => if (plat.abi == .simulator) .WATCHOSSIMULATOR else .WATCHOS, - else => unreachable, - }; - } - - pub fn isBuildVersionCompatible(plat: Platform) bool { - inline for (supported_platforms) |sup_plat| { - if (sup_plat[0] == plat.os_tag and sup_plat[1] == plat.abi) { - return sup_plat[2] <= plat.toAppleVersion(); - } - } - return false; - } - - pub fn isVersionMinCompatible(plat: Platform) bool { - inline for (supported_platforms) |sup_plat| { - if (sup_plat[0] == plat.os_tag and sup_plat[1] == plat.abi) { - return sup_plat[3] <= plat.toAppleVersion(); - } - } - return false; - } - - pub fn fmtTarget(plat: Platform, cpu_arch: std.Target.Cpu.Arch) std.fmt.Formatter(formatTarget) { - return .{ .data = .{ .platform = plat, .cpu_arch = cpu_arch } }; - } - - const FmtCtx = struct { - platform: Platform, - cpu_arch: std.Target.Cpu.Arch, - }; - - pub fn formatTarget( - ctx: FmtCtx, - comptime unused_fmt_string: []const u8, - options: std.fmt.FormatOptions, - writer: anytype, - ) !void { - _ = unused_fmt_string; - _ = options; - try writer.print("{s}-{s}", .{ @tagName(ctx.cpu_arch), @tagName(ctx.platform.os_tag) }); - if (ctx.platform.abi != .none) { - try writer.print("-{s}", .{@tagName(ctx.platform.abi)}); - } - } - - /// Caller owns the memory. - pub fn allocPrintTarget(plat: Platform, gpa: Allocator, cpu_arch: std.Target.Cpu.Arch) error{OutOfMemory}![]u8 { - var buffer = std.ArrayList(u8).init(gpa); - defer buffer.deinit(); - try buffer.writer().print("{}", .{plat.fmtTarget(cpu_arch)}); - return buffer.toOwnedSlice(); - } - - pub fn eqlTarget(plat: Platform, other: Platform) bool { - return plat.os_tag == other.os_tag and plat.abi == other.abi; - } -}; - -const SupportedPlatforms = struct { - std.Target.Os.Tag, - std.Target.Abi, - u32, // Min platform version for which to emit LC_BUILD_VERSION - u32, // Min supported platform version -}; - -// Source: https://github.com/apple-oss-distributions/ld64/blob/59a99ab60399c5e6c49e6945a9e1049c42b71135/src/ld/PlatformSupport.cpp#L52 -// zig fmt: off -const supported_platforms = [_]SupportedPlatforms{ - .{ .macos, .none, 0xA0E00, 0xA0800 }, - .{ .ios, .none, 0xC0000, 0x70000 }, - .{ .tvos, .none, 0xC0000, 0x70000 }, - .{ .watchos, .none, 0x50000, 0x20000 }, - .{ .ios, .simulator, 0xD0000, 0x80000 }, - .{ .tvos, .simulator, 0xD0000, 0x80000 }, - .{ .watchos, .simulator, 0x60000, 0x20000 }, -}; -// zig fmt: on - -inline fn semanticVersionToAppleVersion(version: std.SemanticVersion) u32 { - const major = version.major; - const minor = version.minor; - const patch = version.patch; - return (@as(u32, @intCast(major)) << 16) | (@as(u32, @intCast(minor)) << 8) | @as(u32, @intCast(patch)); -} - -pub inline fn appleVersionToSemanticVersion(version: u32) std.SemanticVersion { - return .{ - .major = @as(u16, @truncate(version >> 16)), - .minor = @as(u8, @truncate(version >> 8)), - .patch = @as(u8, @truncate(version)), - }; -} - -pub fn inferSdkVersion(macho_file: *MachO) ?std.SemanticVersion { - const comp = macho_file.base.comp; - const gpa = comp.gpa; - - var arena_allocator = std.heap.ArenaAllocator.init(gpa); - defer arena_allocator.deinit(); - const arena = arena_allocator.allocator(); - - const sdk_layout = macho_file.sdk_layout orelse return null; - const sdk_dir = switch (sdk_layout) { - .sdk => comp.sysroot.?, - .vendored => std.fs.path.join(arena, &.{ comp.zig_lib_directory.path.?, "libc", "darwin" }) catch return null, - }; - if (readSdkVersionFromSettings(arena, sdk_dir)) |ver| { - return parseSdkVersion(ver); - } else |_| { - // Read from settings should always succeed when vendored. - if (sdk_layout == .vendored) @panic("zig installation bug: unable to parse SDK version"); - } - - // infer from pathname - const stem = std.fs.path.stem(sdk_dir); - const start = for (stem, 0..) |c, i| { - if (std.ascii.isDigit(c)) break i; - } else stem.len; - const end = for (stem[start..], start..) |c, i| { - if (std.ascii.isDigit(c) or c == '.') continue; - break i; - } else stem.len; - return parseSdkVersion(stem[start..end]); -} - -// Official Apple SDKs ship with a `SDKSettings.json` located at the top of SDK fs layout. -// Use property `MinimalDisplayName` to determine version. -// The file/property is also available with vendored libc. -fn readSdkVersionFromSettings(arena: Allocator, dir: []const u8) ![]const u8 { - const sdk_path = try std.fs.path.join(arena, &.{ dir, "SDKSettings.json" }); - const contents = try std.fs.cwd().readFileAlloc(arena, sdk_path, std.math.maxInt(u16)); - const parsed = try std.json.parseFromSlice(std.json.Value, arena, contents, .{}); - if (parsed.value.object.get("MinimalDisplayName")) |ver| return ver.string; - return error.SdkVersionFailure; -} - -// Versions reported by Apple aren't exactly semantically valid as they usually omit -// the patch component, so we parse SDK value by hand. -fn parseSdkVersion(raw: []const u8) ?std.SemanticVersion { - var parsed: std.SemanticVersion = .{ - .major = 0, - .minor = 0, - .patch = 0, - }; - - const parseNext = struct { - fn parseNext(it: anytype) ?u16 { - const nn = it.next() orelse return null; - return std.fmt.parseInt(u16, nn, 10) catch null; - } - }.parseNext; - - var it = std.mem.splitAny(u8, raw, "."); - parsed.major = parseNext(&it) orelse return null; - parsed.minor = parseNext(&it) orelse return null; - parsed.patch = parseNext(&it) orelse 0; - return parsed; -} - -const expect = std.testing.expect; -const expectEqual = std.testing.expectEqual; - -fn testParseSdkVersionSuccess(exp: std.SemanticVersion, raw: []const u8) !void { - const maybe_ver = parseSdkVersion(raw); - try expect(maybe_ver != null); - const ver = maybe_ver.?; - try expectEqual(exp.major, ver.major); - try expectEqual(exp.minor, ver.minor); - try expectEqual(exp.patch, ver.patch); -} - -test "parseSdkVersion" { - try testParseSdkVersionSuccess(.{ .major = 13, .minor = 4, .patch = 0 }, "13.4"); - try testParseSdkVersionSuccess(.{ .major = 13, .minor = 4, .patch = 1 }, "13.4.1"); - try testParseSdkVersionSuccess(.{ .major = 11, .minor = 15, .patch = 0 }, "11.15"); - - try expect(parseSdkVersion("11") == null); -} - -const std = @import("std"); -const assert = std.debug.assert; -const link = @import("../../link.zig"); -const log = std.log.scoped(.link); -const macho = std.macho; -const mem = std.mem; - -const Allocator = mem.Allocator; -const Dylib = @import("Dylib.zig"); -const MachO = @import("../MachO.zig"); -const Compilation = @import("../../Compilation.zig"); diff --git a/src/link/MachO/relocatable.zig b/src/link/MachO/relocatable.zig new file mode 100644 index 0000000000..3d2d5b97b9 --- /dev/null +++ b/src/link/MachO/relocatable.zig @@ -0,0 +1,452 @@ +pub fn flush(macho_file: *MachO) !void { + markExports(macho_file); + claimUnresolved(macho_file); + try initOutputSections(macho_file); + try macho_file.sortSections(); + try macho_file.addAtomsToSections(); + try calcSectionSizes(macho_file); + + { + // For relocatable, we only ever need a single segment so create it now. + const prot: macho.vm_prot_t = macho.PROT.READ | macho.PROT.WRITE | macho.PROT.EXEC; + try macho_file.segments.append(macho_file.base.allocator, .{ + .cmdsize = @sizeOf(macho.segment_command_64), + .segname = MachO.makeStaticString(""), + .maxprot = prot, + .initprot = prot, + }); + const seg = &macho_file.segments.items[0]; + seg.nsects = @intCast(macho_file.sections.items(.header).len); + seg.cmdsize += seg.nsects * @sizeOf(macho.section_64); + } + + var off = try allocateSections(macho_file); + + { + // Allocate the single segment. + assert(macho_file.segments.items.len == 1); + const seg = &macho_file.segments.items[0]; + var vmaddr: u64 = 0; + var fileoff: u64 = load_commands.calcLoadCommandsSizeObject(macho_file) + @sizeOf(macho.mach_header_64); + seg.vmaddr = vmaddr; + seg.fileoff = fileoff; + + for (macho_file.sections.items(.header)) |header| { + vmaddr = header.addr + header.size; + if (!header.isZerofill()) { + fileoff = header.offset + header.size; + } + } + + seg.vmsize = vmaddr - seg.vmaddr; + seg.filesize = fileoff - seg.fileoff; + } + + macho_file.allocateAtoms(); + + state_log.debug("{}", .{macho_file.dumpState()}); + + try macho_file.calcSymtabSize(); + try writeAtoms(macho_file); + try writeCompactUnwind(macho_file); + try writeEhFrame(macho_file); + + off = mem.alignForward(u32, off, @alignOf(u64)); + off = try macho_file.writeDataInCode(0, off); + off = mem.alignForward(u32, off, @alignOf(u64)); + off = try macho_file.writeSymtab(off); + off = mem.alignForward(u32, off, @alignOf(u64)); + off = try macho_file.writeStrtab(off); + + const ncmds, const sizeofcmds = try writeLoadCommands(macho_file); + try writeHeader(macho_file, ncmds, sizeofcmds); +} + +fn markExports(macho_file: *MachO) void { + for (macho_file.objects.items) |index| { + for (macho_file.getFile(index).?.getSymbols()) |sym_index| { + const sym = macho_file.getSymbol(sym_index); + const file = sym.getFile(macho_file) orelse continue; + if (sym.visibility != .global) continue; + if (file.getIndex() == index) { + sym.flags.@"export" = true; + } + } + } +} + +fn claimUnresolved(macho_file: *MachO) void { + for (macho_file.objects.items) |index| { + const object = macho_file.getFile(index).?.object; + + for (object.symbols.items, 0..) |sym_index, i| { + const nlist_idx = @as(Symbol.Index, @intCast(i)); + const nlist = object.symtab.items(.nlist)[nlist_idx]; + if (!nlist.ext()) continue; + if (!nlist.undf()) continue; + + const sym = macho_file.getSymbol(sym_index); + if (sym.getFile(macho_file) != null) continue; + + sym.value = 0; + sym.atom = 0; + sym.nlist_idx = nlist_idx; + sym.file = index; + sym.flags.weak_ref = nlist.weakRef(); + sym.flags.import = true; + sym.visibility = .global; + } + } +} + +fn initOutputSections(macho_file: *MachO) !void { + for (macho_file.objects.items) |index| { + const object = macho_file.getFile(index).?.object; + for (object.atoms.items) |atom_index| { + const atom = macho_file.getAtom(atom_index) orelse continue; + if (!atom.flags.alive) continue; + atom.out_n_sect = try Atom.initOutputSection(atom.getInputSection(macho_file), macho_file); + } + } + + const needs_unwind_info = for (macho_file.objects.items) |index| { + if (macho_file.getFile(index).?.object.compact_unwind_sect_index != null) break true; + } else false; + if (needs_unwind_info) { + macho_file.unwind_info_sect_index = try macho_file.addSection("__LD", "__compact_unwind", .{ + .flags = macho.S_ATTR_DEBUG, + }); + } + + const needs_eh_frame = for (macho_file.objects.items) |index| { + if (macho_file.getFile(index).?.object.eh_frame_sect_index != null) break true; + } else false; + if (needs_eh_frame) { + assert(needs_unwind_info); + macho_file.eh_frame_sect_index = try macho_file.addSection("__TEXT", "__eh_frame", .{}); + } +} + +fn calcSectionSizes(macho_file: *MachO) !void { + const slice = macho_file.sections.slice(); + for (slice.items(.header), slice.items(.atoms)) |*header, atoms| { + if (atoms.items.len == 0) continue; + for (atoms.items) |atom_index| { + const atom = macho_file.getAtom(atom_index).?; + const atom_alignment = try math.powi(u32, 2, atom.alignment); + const offset = mem.alignForward(u64, header.size, atom_alignment); + const padding = offset - header.size; + atom.value = offset; + header.size += padding + atom.size; + header.@"align" = @max(header.@"align", atom.alignment); + header.nreloc += atom.calcNumRelocs(macho_file); + } + } + + if (macho_file.unwind_info_sect_index) |index| { + calcCompactUnwindSize(macho_file, index); + } + + if (macho_file.eh_frame_sect_index) |index| { + const sect = &macho_file.sections.items(.header)[index]; + sect.size = try eh_frame.calcSize(macho_file); + sect.@"align" = 3; + sect.nreloc = eh_frame.calcNumRelocs(macho_file); + } +} + +fn calcCompactUnwindSize(macho_file: *MachO, sect_index: u8) void { + var size: u32 = 0; + var nreloc: u32 = 0; + + for (macho_file.objects.items) |index| { + const object = macho_file.getFile(index).?.object; + for (object.unwind_records.items) |irec| { + const rec = macho_file.getUnwindRecord(irec); + if (!rec.alive) continue; + size += @sizeOf(macho.compact_unwind_entry); + nreloc += 1; + if (rec.getPersonality(macho_file)) |_| { + nreloc += 1; + } + if (rec.getLsdaAtom(macho_file)) |_| { + nreloc += 1; + } + } + } + + const sect = &macho_file.sections.items(.header)[sect_index]; + sect.size = size; + sect.nreloc = nreloc; + sect.@"align" = 3; +} + +fn allocateSections(macho_file: *MachO) !u32 { + var fileoff = load_commands.calcLoadCommandsSizeObject(macho_file) + @sizeOf(macho.mach_header_64); + var vmaddr: u64 = 0; + const slice = macho_file.sections.slice(); + + for (slice.items(.header)) |*header| { + const alignment = try math.powi(u32, 2, header.@"align"); + vmaddr = mem.alignForward(u64, vmaddr, alignment); + header.addr = vmaddr; + vmaddr += header.size; + + if (!header.isZerofill()) { + fileoff = mem.alignForward(u32, fileoff, alignment); + header.offset = fileoff; + fileoff += @intCast(header.size); + } + } + + for (slice.items(.header)) |*header| { + if (header.nreloc == 0) continue; + header.reloff = mem.alignForward(u32, fileoff, @alignOf(macho.relocation_info)); + fileoff = header.reloff + header.nreloc * @sizeOf(macho.relocation_info); + } + + return fileoff; +} + +// We need to sort relocations in descending order to be compatible with Apple's linker. +fn sortReloc(ctx: void, lhs: macho.relocation_info, rhs: macho.relocation_info) bool { + _ = ctx; + return lhs.r_address > rhs.r_address; +} + +fn writeAtoms(macho_file: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + + const gpa = macho_file.base.allocator; + const cpu_arch = macho_file.options.cpu_arch.?; + const slice = macho_file.sections.slice(); + + for (slice.items(.header), slice.items(.atoms)) |header, atoms| { + if (atoms.items.len == 0) continue; + if (header.isZerofill()) continue; + + const code = try gpa.alloc(u8, header.size); + defer gpa.free(code); + const padding_byte: u8 = if (header.isCode() and cpu_arch == .x86_64) 0xcc else 0; + @memset(code, padding_byte); + + var relocs = try std.ArrayList(macho.relocation_info).initCapacity(gpa, header.nreloc); + defer relocs.deinit(); + + for (atoms.items) |atom_index| { + const atom = macho_file.getAtom(atom_index).?; + assert(atom.flags.alive); + const off = atom.value - header.addr; + @memcpy(code[off..][0..atom.size], atom.getCode(macho_file)); + try atom.writeRelocs(macho_file, code[off..][0..atom.size], &relocs); + } + + assert(relocs.items.len == header.nreloc); + + mem.sort(macho.relocation_info, relocs.items, {}, sortReloc); + + // TODO scattered writes? + try macho_file.base.file.pwriteAll(code, header.offset); + try macho_file.base.file.pwriteAll(mem.sliceAsBytes(relocs.items), header.reloff); + } +} + +fn writeCompactUnwind(macho_file: *MachO) !void { + const sect_index = macho_file.unwind_info_sect_index orelse return; + const gpa = macho_file.base.allocator; + const header = macho_file.sections.items(.header)[sect_index]; + + const nrecs = @divExact(header.size, @sizeOf(macho.compact_unwind_entry)); + var entries = try std.ArrayList(macho.compact_unwind_entry).initCapacity(gpa, nrecs); + defer entries.deinit(); + + var relocs = try std.ArrayList(macho.relocation_info).initCapacity(gpa, header.nreloc); + defer relocs.deinit(); + + const addReloc = struct { + fn addReloc(offset: i32, cpu_arch: std.Target.Cpu.Arch) macho.relocation_info { + return .{ + .r_address = offset, + .r_symbolnum = 0, + .r_pcrel = 0, + .r_length = 3, + .r_extern = 0, + .r_type = switch (cpu_arch) { + .aarch64 => @intFromEnum(macho.reloc_type_arm64.ARM64_RELOC_UNSIGNED), + .x86_64 => @intFromEnum(macho.reloc_type_x86_64.X86_64_RELOC_UNSIGNED), + else => unreachable, + }, + }; + } + }.addReloc; + + var offset: i32 = 0; + for (macho_file.objects.items) |index| { + const object = macho_file.getFile(index).?.object; + for (object.unwind_records.items) |irec| { + const rec = macho_file.getUnwindRecord(irec); + if (!rec.alive) continue; + + var out: macho.compact_unwind_entry = .{ + .rangeStart = 0, + .rangeLength = rec.length, + .compactUnwindEncoding = rec.enc.enc, + .personalityFunction = 0, + .lsda = 0, + }; + + { + // Function address + const atom = rec.getAtom(macho_file); + const addr = rec.getAtomAddress(macho_file); + out.rangeStart = addr; + var reloc = addReloc(offset, macho_file.options.cpu_arch.?); + reloc.r_symbolnum = atom.out_n_sect + 1; + relocs.appendAssumeCapacity(reloc); + } + + // Personality function + if (rec.getPersonality(macho_file)) |sym| { + const r_symbolnum = math.cast(u24, sym.getOutputSymtabIndex(macho_file).?) orelse return error.Overflow; + var reloc = addReloc(offset + 16, macho_file.options.cpu_arch.?); + reloc.r_symbolnum = r_symbolnum; + reloc.r_extern = 1; + relocs.appendAssumeCapacity(reloc); + } + + // LSDA address + if (rec.getLsdaAtom(macho_file)) |atom| { + const addr = rec.getLsdaAddress(macho_file); + out.lsda = addr; + var reloc = addReloc(offset + 24, macho_file.options.cpu_arch.?); + reloc.r_symbolnum = atom.out_n_sect + 1; + relocs.appendAssumeCapacity(reloc); + } + + entries.appendAssumeCapacity(out); + offset += @sizeOf(macho.compact_unwind_entry); + } + } + + assert(entries.items.len == nrecs); + assert(relocs.items.len == header.nreloc); + + mem.sort(macho.relocation_info, relocs.items, {}, sortReloc); + + // TODO scattered writes? + try macho_file.base.file.pwriteAll(mem.sliceAsBytes(entries.items), header.offset); + try macho_file.base.file.pwriteAll(mem.sliceAsBytes(relocs.items), header.reloff); +} + +fn writeEhFrame(macho_file: *MachO) !void { + const sect_index = macho_file.eh_frame_sect_index orelse return; + const gpa = macho_file.base.allocator; + const header = macho_file.sections.items(.header)[sect_index]; + + const code = try gpa.alloc(u8, header.size); + defer gpa.free(code); + + var relocs = try std.ArrayList(macho.relocation_info).initCapacity(gpa, header.nreloc); + defer relocs.deinit(); + + try eh_frame.writeRelocs(macho_file, code, &relocs); + assert(relocs.items.len == header.nreloc); + + mem.sort(macho.relocation_info, relocs.items, {}, sortReloc); + + // TODO scattered writes? + try macho_file.base.file.pwriteAll(code, header.offset); + try macho_file.base.file.pwriteAll(mem.sliceAsBytes(relocs.items), header.reloff); +} + +fn writeLoadCommands(macho_file: *MachO) !struct { usize, usize } { + const gpa = macho_file.base.allocator; + const needed_size = load_commands.calcLoadCommandsSizeObject(macho_file); + const buffer = try gpa.alloc(u8, needed_size); + defer gpa.free(buffer); + + var stream = std.io.fixedBufferStream(buffer); + var cwriter = std.io.countingWriter(stream.writer()); + const writer = cwriter.writer(); + + var ncmds: usize = 0; + + // Segment and section load commands + { + assert(macho_file.segments.items.len == 1); + const seg = macho_file.segments.items[0]; + try writer.writeStruct(seg); + for (macho_file.sections.items(.header)) |header| { + try writer.writeStruct(header); + } + ncmds += 1; + } + + try writer.writeStruct(macho_file.data_in_code_cmd); + ncmds += 1; + try writer.writeStruct(macho_file.symtab_cmd); + ncmds += 1; + try writer.writeStruct(macho_file.dysymtab_cmd); + ncmds += 1; + + if (macho_file.options.platform) |platform| { + if (platform.isBuildVersionCompatible()) { + try load_commands.writeBuildVersionLC(platform, macho_file.options.sdk_version, writer); + ncmds += 1; + } else { + try load_commands.writeVersionMinLC(platform, macho_file.options.sdk_version, writer); + ncmds += 1; + } + } + + assert(cwriter.bytes_written == needed_size); + + try macho_file.base.file.pwriteAll(buffer, @sizeOf(macho.mach_header_64)); + + return .{ ncmds, buffer.len }; +} + +fn writeHeader(macho_file: *MachO, ncmds: usize, sizeofcmds: usize) !void { + var header: macho.mach_header_64 = .{}; + header.filetype = macho.MH_OBJECT; + + const subsections_via_symbols = for (macho_file.objects.items) |index| { + const object = macho_file.getFile(index).?.object; + if (object.hasSubsections()) break true; + } else false; + if (subsections_via_symbols) { + header.flags |= macho.MH_SUBSECTIONS_VIA_SYMBOLS; + } + + switch (macho_file.options.cpu_arch.?) { + .aarch64 => { + header.cputype = macho.CPU_TYPE_ARM64; + header.cpusubtype = macho.CPU_SUBTYPE_ARM_ALL; + }, + .x86_64 => { + header.cputype = macho.CPU_TYPE_X86_64; + header.cpusubtype = macho.CPU_SUBTYPE_X86_64_ALL; + }, + else => {}, + } + + header.ncmds = @intCast(ncmds); + header.sizeofcmds = @intCast(sizeofcmds); + + try macho_file.base.file.pwriteAll(mem.asBytes(&header), 0); +} + +const assert = std.debug.assert; +const eh_frame = @import("eh_frame.zig"); +const load_commands = @import("load_commands.zig"); +const macho = std.macho; +const math = std.math; +const mem = std.mem; +const state_log = std.log.scoped(.state); +const std = @import("std"); +const trace = @import("../tracy.zig").trace; + +const Atom = @import("Atom.zig"); +const MachO = @import("../MachO.zig"); +const Symbol = @import("Symbol.zig"); diff --git a/src/link/MachO/stubs.zig b/src/link/MachO/stubs.zig deleted file mode 100644 index 925aeaa61f..0000000000 --- a/src/link/MachO/stubs.zig +++ /dev/null @@ -1,169 +0,0 @@ -pub inline fn stubHelperPreambleSize(cpu_arch: std.Target.Cpu.Arch) u8 { - return switch (cpu_arch) { - .x86_64 => 15, - .aarch64 => 6 * @sizeOf(u32), - else => unreachable, // unhandled architecture type - }; -} - -pub inline fn stubHelperSize(cpu_arch: std.Target.Cpu.Arch) u8 { - return switch (cpu_arch) { - .x86_64 => 10, - .aarch64 => 3 * @sizeOf(u32), - else => unreachable, // unhandled architecture type - }; -} - -pub inline fn stubSize(cpu_arch: std.Target.Cpu.Arch) u8 { - return switch (cpu_arch) { - .x86_64 => 6, - .aarch64 => 3 * @sizeOf(u32), - else => unreachable, // unhandled architecture type - }; -} - -pub inline fn stubAlignment(cpu_arch: std.Target.Cpu.Arch) u8 { - return switch (cpu_arch) { - .x86_64 => 1, - .aarch64 => 4, - else => unreachable, // unhandled architecture type - }; -} - -pub inline fn stubOffsetInStubHelper(cpu_arch: std.Target.Cpu.Arch) u8 { - return switch (cpu_arch) { - .x86_64 => 1, - .aarch64 => 2 * @sizeOf(u32), - else => unreachable, - }; -} - -pub fn writeStubHelperPreambleCode(args: struct { - cpu_arch: std.Target.Cpu.Arch, - source_addr: u64, - dyld_private_addr: u64, - dyld_stub_binder_got_addr: u64, -}, writer: anytype) !void { - switch (args.cpu_arch) { - .x86_64 => { - try writer.writeAll(&.{ 0x4c, 0x8d, 0x1d }); - { - const disp = try Relocation.calcPcRelativeDisplacementX86( - args.source_addr + 3, - args.dyld_private_addr, - 0, - ); - try writer.writeInt(i32, disp, .little); - } - try writer.writeAll(&.{ 0x41, 0x53, 0xff, 0x25 }); - { - const disp = try Relocation.calcPcRelativeDisplacementX86( - args.source_addr + 11, - args.dyld_stub_binder_got_addr, - 0, - ); - try writer.writeInt(i32, disp, .little); - } - }, - .aarch64 => { - { - const pages = Relocation.calcNumberOfPages(args.source_addr, args.dyld_private_addr); - try writer.writeInt(u32, aarch64.Instruction.adrp(.x17, pages).toU32(), .little); - } - { - const off = try Relocation.calcPageOffset(args.dyld_private_addr, .arithmetic); - try writer.writeInt(u32, aarch64.Instruction.add(.x17, .x17, off, false).toU32(), .little); - } - try writer.writeInt(u32, aarch64.Instruction.stp( - .x16, - .x17, - aarch64.Register.sp, - aarch64.Instruction.LoadStorePairOffset.pre_index(-16), - ).toU32(), .little); - { - const pages = Relocation.calcNumberOfPages(args.source_addr + 12, args.dyld_stub_binder_got_addr); - try writer.writeInt(u32, aarch64.Instruction.adrp(.x16, pages).toU32(), .little); - } - { - const off = try Relocation.calcPageOffset(args.dyld_stub_binder_got_addr, .load_store_64); - try writer.writeInt(u32, aarch64.Instruction.ldr( - .x16, - .x16, - aarch64.Instruction.LoadStoreOffset.imm(off), - ).toU32(), .little); - } - try writer.writeInt(u32, aarch64.Instruction.br(.x16).toU32(), .little); - }, - else => unreachable, - } -} - -pub fn writeStubHelperCode(args: struct { - cpu_arch: std.Target.Cpu.Arch, - source_addr: u64, - target_addr: u64, -}, writer: anytype) !void { - switch (args.cpu_arch) { - .x86_64 => { - try writer.writeAll(&.{ 0x68, 0x0, 0x0, 0x0, 0x0, 0xe9 }); - { - const disp = try Relocation.calcPcRelativeDisplacementX86(args.source_addr + 6, args.target_addr, 0); - try writer.writeInt(i32, disp, .little); - } - }, - .aarch64 => { - const stub_size: u4 = 3 * @sizeOf(u32); - const literal = blk: { - const div_res = try std.math.divExact(u64, stub_size - @sizeOf(u32), 4); - break :blk std.math.cast(u18, div_res) orelse return error.Overflow; - }; - try writer.writeInt(u32, aarch64.Instruction.ldrLiteral( - .w16, - literal, - ).toU32(), .little); - { - const disp = try Relocation.calcPcRelativeDisplacementArm64(args.source_addr + 4, args.target_addr); - try writer.writeInt(u32, aarch64.Instruction.b(disp).toU32(), .little); - } - try writer.writeAll(&.{ 0x0, 0x0, 0x0, 0x0 }); - }, - else => unreachable, - } -} - -pub fn writeStubCode(args: struct { - cpu_arch: std.Target.Cpu.Arch, - source_addr: u64, - target_addr: u64, -}, writer: anytype) !void { - switch (args.cpu_arch) { - .x86_64 => { - try writer.writeAll(&.{ 0xff, 0x25 }); - { - const disp = try Relocation.calcPcRelativeDisplacementX86(args.source_addr + 2, args.target_addr, 0); - try writer.writeInt(i32, disp, .little); - } - }, - .aarch64 => { - { - const pages = Relocation.calcNumberOfPages(args.source_addr, args.target_addr); - try writer.writeInt(u32, aarch64.Instruction.adrp(.x16, pages).toU32(), .little); - } - { - const off = try Relocation.calcPageOffset(args.target_addr, .load_store_64); - try writer.writeInt(u32, aarch64.Instruction.ldr( - .x16, - .x16, - aarch64.Instruction.LoadStoreOffset.imm(off), - ).toU32(), .little); - } - try writer.writeInt(u32, aarch64.Instruction.br(.x16).toU32(), .little); - }, - else => unreachable, - } -} - -const std = @import("std"); -const aarch64 = @import("../../arch/aarch64/bits.zig"); - -const Relocation = @import("Relocation.zig"); diff --git a/src/link/MachO/synthetic.zig b/src/link/MachO/synthetic.zig new file mode 100644 index 0000000000..d75e1f08aa --- /dev/null +++ b/src/link/MachO/synthetic.zig @@ -0,0 +1,669 @@ +pub const GotSection = struct { + symbols: std.ArrayListUnmanaged(Symbol.Index) = .{}, + + pub const Index = u32; + + pub fn deinit(got: *GotSection, allocator: Allocator) void { + got.symbols.deinit(allocator); + } + + pub fn addSymbol(got: *GotSection, sym_index: Symbol.Index, macho_file: *MachO) !void { + const gpa = macho_file.base.allocator; + const index = @as(Index, @intCast(got.symbols.items.len)); + const entry = try got.symbols.addOne(gpa); + entry.* = sym_index; + const symbol = macho_file.getSymbol(sym_index); + try symbol.addExtra(.{ .got = index }, macho_file); + } + + pub fn getAddress(got: GotSection, index: Index, macho_file: *MachO) u64 { + assert(index < got.symbols.items.len); + const header = macho_file.sections.items(.header)[macho_file.got_sect_index.?]; + return header.addr + index * @sizeOf(u64); + } + + pub fn size(got: GotSection) usize { + return got.symbols.items.len * @sizeOf(u64); + } + + pub fn addDyldRelocs(got: GotSection, macho_file: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + const gpa = macho_file.base.allocator; + const seg_id = macho_file.sections.items(.segment_id)[macho_file.got_sect_index.?]; + const seg = macho_file.segments.items[seg_id]; + + for (got.symbols.items, 0..) |sym_index, idx| { + const sym = macho_file.getSymbol(sym_index); + const addr = got.getAddress(@intCast(idx), macho_file); + const entry = bind.Entry{ + .target = sym_index, + .offset = addr - seg.vmaddr, + .segment_id = seg_id, + .addend = 0, + }; + if (sym.flags.import) { + try macho_file.bind.entries.append(gpa, entry); + if (sym.flags.weak) { + try macho_file.weak_bind.entries.append(gpa, entry); + } + } else { + try macho_file.rebase.entries.append(gpa, .{ + .offset = addr - seg.vmaddr, + .segment_id = seg_id, + }); + if (sym.flags.weak) { + try macho_file.weak_bind.entries.append(gpa, entry); + } else if (sym.flags.interposable) { + try macho_file.bind.entries.append(gpa, entry); + } + } + } + } + + pub fn write(got: GotSection, macho_file: *MachO, writer: anytype) !void { + const tracy = trace(@src()); + defer tracy.end(); + for (got.symbols.items) |sym_index| { + const sym = macho_file.getSymbol(sym_index); + const value = if (sym.flags.import) @as(u64, 0) else sym.getAddress(.{}, macho_file); + try writer.writeInt(u64, value, .little); + } + } + + const FormatCtx = struct { + got: GotSection, + macho_file: *MachO, + }; + + pub fn fmt(got: GotSection, macho_file: *MachO) std.fmt.Formatter(format2) { + return .{ .data = .{ .got = got, .macho_file = macho_file } }; + } + + pub fn format2( + ctx: FormatCtx, + comptime unused_fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, + ) !void { + _ = options; + _ = unused_fmt_string; + for (ctx.got.symbols.items, 0..) |entry, i| { + const symbol = ctx.macho_file.getSymbol(entry); + try writer.print(" {d}@0x{x} => {d}@0x{x} ({s})\n", .{ + i, + symbol.getGotAddress(ctx.macho_file), + entry, + symbol.getAddress(.{}, ctx.macho_file), + symbol.getName(ctx.macho_file), + }); + } + } +}; + +pub const StubsSection = struct { + symbols: std.ArrayListUnmanaged(Symbol.Index) = .{}, + + pub const Index = u32; + + pub fn deinit(stubs: *StubsSection, allocator: Allocator) void { + stubs.symbols.deinit(allocator); + } + + pub fn addSymbol(stubs: *StubsSection, sym_index: Symbol.Index, macho_file: *MachO) !void { + const gpa = macho_file.base.allocator; + const index = @as(Index, @intCast(stubs.symbols.items.len)); + const entry = try stubs.symbols.addOne(gpa); + entry.* = sym_index; + const symbol = macho_file.getSymbol(sym_index); + try symbol.addExtra(.{ .stubs = index }, macho_file); + } + + pub fn getAddress(stubs: StubsSection, index: Index, macho_file: *MachO) u64 { + assert(index < stubs.symbols.items.len); + const header = macho_file.sections.items(.header)[macho_file.stubs_sect_index.?]; + return header.addr + index * header.reserved2; + } + + pub fn size(stubs: StubsSection, macho_file: *MachO) usize { + const header = macho_file.sections.items(.header)[macho_file.stubs_sect_index.?]; + return stubs.symbols.items.len * header.reserved2; + } + + pub fn write(stubs: StubsSection, macho_file: *MachO, writer: anytype) !void { + const tracy = trace(@src()); + defer tracy.end(); + const cpu_arch = macho_file.options.cpu_arch.?; + const laptr_sect = macho_file.sections.items(.header)[macho_file.la_symbol_ptr_sect_index.?]; + + for (stubs.symbols.items, 0..) |sym_index, idx| { + const sym = macho_file.getSymbol(sym_index); + const source = sym.getAddress(.{ .stubs = true }, macho_file); + const target = laptr_sect.addr + idx * @sizeOf(u64); + switch (cpu_arch) { + .x86_64 => { + try writer.writeAll(&.{ 0xff, 0x25 }); + try writer.writeInt(i32, @intCast(target - source - 2 - 4), .little); + }, + .aarch64 => { + // TODO relax if possible + const pages = try Relocation.calcNumberOfPages(source, target); + try writer.writeInt(u32, aarch64.Instruction.adrp(.x16, pages).toU32(), .little); + const off = try Relocation.calcPageOffset(target, .load_store_64); + try writer.writeInt( + u32, + aarch64.Instruction.ldr(.x16, .x16, aarch64.Instruction.LoadStoreOffset.imm(off)).toU32(), + .little, + ); + try writer.writeInt(u32, aarch64.Instruction.br(.x16).toU32(), .little); + }, + else => unreachable, + } + } + } + + const FormatCtx = struct { + stubs: StubsSection, + macho_file: *MachO, + }; + + pub fn fmt(stubs: StubsSection, macho_file: *MachO) std.fmt.Formatter(format2) { + return .{ .data = .{ .stubs = stubs, .macho_file = macho_file } }; + } + + pub fn format2( + ctx: FormatCtx, + comptime unused_fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, + ) !void { + _ = options; + _ = unused_fmt_string; + for (ctx.stubs.symbols.items, 0..) |entry, i| { + const symbol = ctx.macho_file.getSymbol(entry); + try writer.print(" {d}@0x{x} => {d}@0x{x} ({s})\n", .{ + i, + symbol.getStubsAddress(ctx.macho_file), + entry, + symbol.getAddress(.{}, ctx.macho_file), + symbol.getName(ctx.macho_file), + }); + } + } +}; + +pub const StubsHelperSection = struct { + pub inline fn preambleSize(cpu_arch: std.Target.Cpu.Arch) usize { + return switch (cpu_arch) { + .x86_64 => 15, + .aarch64 => 6 * @sizeOf(u32), + else => 0, + }; + } + + pub inline fn entrySize(cpu_arch: std.Target.Cpu.Arch) usize { + return switch (cpu_arch) { + .x86_64 => 10, + .aarch64 => 3 * @sizeOf(u32), + else => 0, + }; + } + + pub fn size(stubs_helper: StubsHelperSection, macho_file: *MachO) usize { + const tracy = trace(@src()); + defer tracy.end(); + _ = stubs_helper; + const cpu_arch = macho_file.options.cpu_arch.?; + var s: usize = preambleSize(cpu_arch); + for (macho_file.stubs.symbols.items) |sym_index| { + const sym = macho_file.getSymbol(sym_index); + if ((sym.flags.import and !sym.flags.weak) or (!sym.flags.weak and sym.flags.interposable)) { + s += entrySize(cpu_arch); + } + } + return s; + } + + pub fn write(stubs_helper: StubsHelperSection, macho_file: *MachO, writer: anytype) !void { + const tracy = trace(@src()); + defer tracy.end(); + + try stubs_helper.writePreamble(macho_file, writer); + + const cpu_arch = macho_file.options.cpu_arch.?; + const sect = macho_file.sections.items(.header)[macho_file.stubs_helper_sect_index.?]; + const preamble_size = preambleSize(cpu_arch); + const entry_size = entrySize(cpu_arch); + + var idx: usize = 0; + for (macho_file.stubs.symbols.items) |sym_index| { + const sym = macho_file.getSymbol(sym_index); + if ((sym.flags.import and !sym.flags.weak) or (!sym.flags.weak and sym.flags.interposable)) { + const offset = macho_file.lazy_bind.offsets.items[idx]; + const source: i64 = @intCast(sect.addr + preamble_size + entry_size * idx); + const target: i64 = @intCast(sect.addr); + switch (cpu_arch) { + .x86_64 => { + try writer.writeByte(0x68); + try writer.writeInt(u32, offset, .little); + try writer.writeByte(0xe9); + try writer.writeInt(i32, @intCast(target - source - 6 - 4), .little); + }, + .aarch64 => { + const literal = blk: { + const div_res = try std.math.divExact(u64, entry_size - @sizeOf(u32), 4); + break :blk std.math.cast(u18, div_res) orelse return error.Overflow; + }; + try writer.writeInt(u32, aarch64.Instruction.ldrLiteral( + .w16, + literal, + ).toU32(), .little); + const disp = math.cast(i28, @as(i64, @intCast(target)) - @as(i64, @intCast(source + 4))) orelse + return error.Overflow; + try writer.writeInt(u32, aarch64.Instruction.b(disp).toU32(), .little); + try writer.writeAll(&.{ 0x0, 0x0, 0x0, 0x0 }); + }, + else => unreachable, + } + idx += 1; + } + } + } + + fn writePreamble(stubs_helper: StubsHelperSection, macho_file: *MachO, writer: anytype) !void { + _ = stubs_helper; + const cpu_arch = macho_file.options.cpu_arch.?; + const sect = macho_file.sections.items(.header)[macho_file.stubs_helper_sect_index.?]; + const dyld_private_addr = target: { + const sym = macho_file.getSymbol(macho_file.dyld_private_index.?); + break :target sym.getAddress(.{}, macho_file); + }; + const dyld_stub_binder_addr = target: { + const sym = macho_file.getSymbol(macho_file.dyld_stub_binder_index.?); + break :target sym.getGotAddress(macho_file); + }; + switch (cpu_arch) { + .x86_64 => { + try writer.writeAll(&.{ 0x4c, 0x8d, 0x1d }); + try writer.writeInt(i32, @intCast(dyld_private_addr - sect.addr - 3 - 4), .little); + try writer.writeAll(&.{ 0x41, 0x53, 0xff, 0x25 }); + try writer.writeInt(i32, @intCast(dyld_stub_binder_addr - sect.addr - 11 - 4), .little); + }, + .aarch64 => { + { + // TODO relax if possible + const pages = try Relocation.calcNumberOfPages(sect.addr, dyld_private_addr); + try writer.writeInt(u32, aarch64.Instruction.adrp(.x17, pages).toU32(), .little); + const off = try Relocation.calcPageOffset(dyld_private_addr, .arithmetic); + try writer.writeInt(u32, aarch64.Instruction.add(.x17, .x17, off, false).toU32(), .little); + } + try writer.writeInt(u32, aarch64.Instruction.stp( + .x16, + .x17, + aarch64.Register.sp, + aarch64.Instruction.LoadStorePairOffset.pre_index(-16), + ).toU32(), .little); + { + // TODO relax if possible + const pages = try Relocation.calcNumberOfPages(sect.addr + 12, dyld_stub_binder_addr); + try writer.writeInt(u32, aarch64.Instruction.adrp(.x16, pages).toU32(), .little); + const off = try Relocation.calcPageOffset(dyld_stub_binder_addr, .load_store_64); + try writer.writeInt(u32, aarch64.Instruction.ldr( + .x16, + .x16, + aarch64.Instruction.LoadStoreOffset.imm(off), + ).toU32(), .little); + } + try writer.writeInt(u32, aarch64.Instruction.br(.x16).toU32(), .little); + }, + else => unreachable, + } + } +}; + +pub const LaSymbolPtrSection = struct { + pub fn size(laptr: LaSymbolPtrSection, macho_file: *MachO) usize { + _ = laptr; + return macho_file.stubs.symbols.items.len * @sizeOf(u64); + } + + pub fn addDyldRelocs(laptr: LaSymbolPtrSection, macho_file: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + _ = laptr; + const gpa = macho_file.base.allocator; + + const sect = macho_file.sections.items(.header)[macho_file.la_symbol_ptr_sect_index.?]; + const seg_id = macho_file.sections.items(.segment_id)[macho_file.la_symbol_ptr_sect_index.?]; + const seg = macho_file.segments.items[seg_id]; + + for (macho_file.stubs.symbols.items, 0..) |sym_index, idx| { + const sym = macho_file.getSymbol(sym_index); + const addr = sect.addr + idx * @sizeOf(u64); + const entry = bind.Entry{ + .target = sym_index, + .offset = addr - seg.vmaddr, + .segment_id = seg_id, + .addend = 0, + }; + if (sym.flags.import) { + if (sym.flags.weak) { + try macho_file.bind.entries.append(gpa, entry); + try macho_file.weak_bind.entries.append(gpa, entry); + } else { + try macho_file.lazy_bind.entries.append(gpa, entry); + } + } else { + if (sym.flags.weak) { + try macho_file.rebase.entries.append(gpa, .{ + .offset = addr - seg.vmaddr, + .segment_id = seg_id, + }); + try macho_file.weak_bind.entries.append(gpa, entry); + } else if (sym.flags.interposable) { + try macho_file.lazy_bind.entries.append(gpa, entry); + } + } + } + } + + pub fn write(laptr: LaSymbolPtrSection, macho_file: *MachO, writer: anytype) !void { + const tracy = trace(@src()); + defer tracy.end(); + _ = laptr; + const cpu_arch = macho_file.options.cpu_arch.?; + const sect = macho_file.sections.items(.header)[macho_file.stubs_helper_sect_index.?]; + for (macho_file.stubs.symbols.items, 0..) |sym_index, idx| { + const sym = macho_file.getSymbol(sym_index); + const value: u64 = if (sym.flags.@"export") + sym.getAddress(.{ .stubs = false }, macho_file) + else if (sym.flags.weak) + @as(u64, 0) + else + sect.addr + StubsHelperSection.preambleSize(cpu_arch) + + StubsHelperSection.entrySize(cpu_arch) * idx; + try writer.writeInt(u64, @intCast(value), .little); + } + } +}; + +pub const TlvPtrSection = struct { + symbols: std.ArrayListUnmanaged(Symbol.Index) = .{}, + + pub const Index = u32; + + pub fn deinit(tlv: *TlvPtrSection, allocator: Allocator) void { + tlv.symbols.deinit(allocator); + } + + pub fn addSymbol(tlv: *TlvPtrSection, sym_index: Symbol.Index, macho_file: *MachO) !void { + const gpa = macho_file.base.allocator; + const index = @as(Index, @intCast(tlv.symbols.items.len)); + const entry = try tlv.symbols.addOne(gpa); + entry.* = sym_index; + const symbol = macho_file.getSymbol(sym_index); + try symbol.addExtra(.{ .tlv_ptr = index }, macho_file); + } + + pub fn getAddress(tlv: TlvPtrSection, index: Index, macho_file: *MachO) u64 { + assert(index < tlv.symbols.items.len); + const header = macho_file.sections.items(.header)[macho_file.tlv_ptr_sect_index.?]; + return header.addr + index * @sizeOf(u64) * 3; + } + + pub fn size(tlv: TlvPtrSection) usize { + return tlv.symbols.items.len * @sizeOf(u64); + } + + pub fn addDyldRelocs(tlv: TlvPtrSection, macho_file: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + const gpa = macho_file.base.allocator; + const seg_id = macho_file.sections.items(.segment_id)[macho_file.tlv_ptr_sect_index.?]; + const seg = macho_file.segments.items[seg_id]; + + for (tlv.symbols.items, 0..) |sym_index, idx| { + const sym = macho_file.getSymbol(sym_index); + const addr = tlv.getAddress(@intCast(idx), macho_file); + const entry = bind.Entry{ + .target = sym_index, + .offset = addr - seg.vmaddr, + .segment_id = seg_id, + .addend = 0, + }; + if (sym.flags.import) { + try macho_file.bind.entries.append(gpa, entry); + if (sym.flags.weak) { + try macho_file.weak_bind.entries.append(gpa, entry); + } + } else { + try macho_file.rebase.entries.append(gpa, .{ + .offset = addr - seg.vmaddr, + .segment_id = seg_id, + }); + if (sym.flags.weak) { + try macho_file.weak_bind.entries.append(gpa, entry); + } else if (sym.flags.interposable) { + try macho_file.bind.entries.append(gpa, entry); + } + } + } + } + + pub fn write(tlv: TlvPtrSection, macho_file: *MachO, writer: anytype) !void { + const tracy = trace(@src()); + defer tracy.end(); + + for (tlv.symbols.items) |sym_index| { + const sym = macho_file.getSymbol(sym_index); + if (sym.flags.import) { + try writer.writeInt(u64, 0, .little); + } else { + try writer.writeInt(u64, sym.getAddress(.{}, macho_file), .little); + } + } + } + + const FormatCtx = struct { + tlv: TlvPtrSection, + macho_file: *MachO, + }; + + pub fn fmt(tlv: TlvPtrSection, macho_file: *MachO) std.fmt.Formatter(format2) { + return .{ .data = .{ .tlv = tlv, .macho_file = macho_file } }; + } + + pub fn format2( + ctx: FormatCtx, + comptime unused_fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, + ) !void { + _ = options; + _ = unused_fmt_string; + for (ctx.tlv.symbols.items, 0..) |entry, i| { + const symbol = ctx.macho_file.getSymbol(entry); + try writer.print(" {d}@0x{x} => {d}@0x{x} ({s})\n", .{ + i, + symbol.getTlvPtrAddress(ctx.macho_file), + entry, + symbol.getAddress(.{}, ctx.macho_file), + symbol.getName(ctx.macho_file), + }); + } + } +}; + +pub const ObjcStubsSection = struct { + symbols: std.ArrayListUnmanaged(Symbol.Index) = .{}, + + pub fn deinit(objc: *ObjcStubsSection, allocator: Allocator) void { + objc.symbols.deinit(allocator); + } + + pub fn entrySize(cpu_arch: std.Target.Cpu.Arch) u8 { + return switch (cpu_arch) { + .x86_64 => 13, + .aarch64 => 8 * @sizeOf(u32), + else => unreachable, + }; + } + + pub fn addSymbol(objc: *ObjcStubsSection, sym_index: Symbol.Index, macho_file: *MachO) !void { + const gpa = macho_file.base.allocator; + const index = @as(Index, @intCast(objc.symbols.items.len)); + const entry = try objc.symbols.addOne(gpa); + entry.* = sym_index; + const symbol = macho_file.getSymbol(sym_index); + try symbol.addExtra(.{ .objc_stubs = index }, macho_file); + } + + pub fn getAddress(objc: ObjcStubsSection, index: Index, macho_file: *MachO) u64 { + assert(index < objc.symbols.items.len); + const header = macho_file.sections.items(.header)[macho_file.objc_stubs_sect_index.?]; + return header.addr + index * entrySize(macho_file.options.cpu_arch.?); + } + + pub fn size(objc: ObjcStubsSection, macho_file: *MachO) usize { + return objc.symbols.items.len * entrySize(macho_file.options.cpu_arch.?); + } + + pub fn write(objc: ObjcStubsSection, macho_file: *MachO, writer: anytype) !void { + const tracy = trace(@src()); + defer tracy.end(); + + for (objc.symbols.items, 0..) |sym_index, idx| { + const sym = macho_file.getSymbol(sym_index); + const addr = objc.getAddress(@intCast(idx), macho_file); + switch (macho_file.options.cpu_arch.?) { + .x86_64 => { + try writer.writeAll(&.{ 0x48, 0x8b, 0x35 }); + { + const target = sym.getObjcSelrefsAddress(macho_file); + const source = addr; + try writer.writeInt(i32, @intCast(target - source - 3 - 4), .little); + } + try writer.writeAll(&.{ 0xff, 0x25 }); + { + const target_sym = macho_file.getSymbol(macho_file.objc_msg_send_index.?); + const target = target_sym.getGotAddress(macho_file); + const source = addr + 7; + try writer.writeInt(i32, @intCast(target - source - 2 - 4), .little); + } + }, + .aarch64 => { + { + const target = sym.getObjcSelrefsAddress(macho_file); + const source = addr; + const pages = try Relocation.calcNumberOfPages(source, target); + try writer.writeInt(u32, aarch64.Instruction.adrp(.x1, pages).toU32(), .little); + const off = try Relocation.calcPageOffset(target, .load_store_64); + try writer.writeInt( + u32, + aarch64.Instruction.ldr(.x1, .x1, aarch64.Instruction.LoadStoreOffset.imm(off)).toU32(), + .little, + ); + } + { + const target_sym = macho_file.getSymbol(macho_file.objc_msg_send_index.?); + const target = target_sym.getGotAddress(macho_file); + const source = addr + 2 * @sizeOf(u32); + const pages = try Relocation.calcNumberOfPages(source, target); + try writer.writeInt(u32, aarch64.Instruction.adrp(.x16, pages).toU32(), .little); + const off = try Relocation.calcPageOffset(target, .load_store_64); + try writer.writeInt( + u32, + aarch64.Instruction.ldr(.x16, .x16, aarch64.Instruction.LoadStoreOffset.imm(off)).toU32(), + .little, + ); + } + try writer.writeInt(u32, aarch64.Instruction.br(.x16).toU32(), .little); + try writer.writeInt(u32, aarch64.Instruction.brk(1).toU32(), .little); + try writer.writeInt(u32, aarch64.Instruction.brk(1).toU32(), .little); + try writer.writeInt(u32, aarch64.Instruction.brk(1).toU32(), .little); + }, + else => unreachable, + } + } + } + + const FormatCtx = struct { + objc: ObjcStubsSection, + macho_file: *MachO, + }; + + pub fn fmt(objc: ObjcStubsSection, macho_file: *MachO) std.fmt.Formatter(format2) { + return .{ .data = .{ .objc = objc, .macho_file = macho_file } }; + } + + pub fn format2( + ctx: FormatCtx, + comptime unused_fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, + ) !void { + _ = options; + _ = unused_fmt_string; + for (ctx.objc.symbols.items, 0..) |entry, i| { + const symbol = ctx.macho_file.getSymbol(entry); + try writer.print(" {d}@0x{x} => {d}@0x{x} ({s})\n", .{ + i, + symbol.getObjcStubsAddress(ctx.macho_file), + entry, + symbol.getAddress(.{}, ctx.macho_file), + symbol.getName(ctx.macho_file), + }); + } + } + + pub const Index = u32; +}; + +pub const Indsymtab = struct { + pub inline fn nsyms(ind: Indsymtab, macho_file: *MachO) u32 { + _ = ind; + return @intCast(macho_file.stubs.symbols.items.len * 2 + macho_file.got.symbols.items.len); + } + + pub fn write(ind: Indsymtab, macho_file: *MachO, writer: anytype) !void { + const tracy = trace(@src()); + defer tracy.end(); + + _ = ind; + + for (macho_file.stubs.symbols.items) |sym_index| { + const sym = macho_file.getSymbol(sym_index); + try writer.writeInt(u32, sym.getOutputSymtabIndex(macho_file).?, .little); + } + + for (macho_file.got.symbols.items) |sym_index| { + const sym = macho_file.getSymbol(sym_index); + try writer.writeInt(u32, sym.getOutputSymtabIndex(macho_file).?, .little); + } + + for (macho_file.stubs.symbols.items) |sym_index| { + const sym = macho_file.getSymbol(sym_index); + try writer.writeInt(u32, sym.getOutputSymtabIndex(macho_file).?, .little); + } + } +}; + +pub const RebaseSection = Rebase; +pub const BindSection = bind.Bind; +pub const WeakBindSection = bind.WeakBind; +pub const LazyBindSection = bind.LazyBind; +pub const ExportTrieSection = Trie; + +const aarch64 = @import("../aarch64.zig"); +const assert = std.debug.assert; +const bind = @import("dyld_info/bind.zig"); +const math = std.math; +const std = @import("std"); +const trace = @import("../tracy.zig").trace; + +const Allocator = std.mem.Allocator; +const MachO = @import("../MachO.zig"); +const Rebase = @import("dyld_info/Rebase.zig"); +const Relocation = @import("Relocation.zig"); +const Symbol = @import("Symbol.zig"); +const Trie = @import("dyld_info/Trie.zig"); diff --git a/src/link/MachO/thunks.zig b/src/link/MachO/thunks.zig index f080de7f80..6593fb6a1b 100644 --- a/src/link/MachO/thunks.zig +++ b/src/link/MachO/thunks.zig @@ -1,13 +1,157 @@ -//! An algorithm for allocating output machine code section (aka `__TEXT,__text`), -//! and insertion of range extending thunks. As such, this algorithm is only run -//! for a target that requires range extenders such as arm64. -//! -//! The algorithm works pessimistically and assumes that any reference to an Atom in -//! another output section is out of range. +pub fn createThunks(sect_id: u8, macho_file: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); -/// Branch instruction has 26 bits immediate but 4 byte aligned. + const gpa = macho_file.base.allocator; + const slice = macho_file.sections.slice(); + const header = &slice.items(.header)[sect_id]; + const atoms = slice.items(.atoms)[sect_id].items; + assert(atoms.len > 0); + + for (atoms) |atom_index| { + macho_file.getAtom(atom_index).?.value = @bitCast(@as(i64, -1)); + } + + var i: usize = 0; + while (i < atoms.len) { + const start = i; + const start_atom = macho_file.getAtom(atoms[start]).?; + assert(start_atom.flags.alive); + start_atom.value = try advance(header, start_atom.size, start_atom.alignment); + i += 1; + + while (i < atoms.len and + header.size - start_atom.value < max_allowed_distance) : (i += 1) + { + const atom_index = atoms[i]; + const atom = macho_file.getAtom(atom_index).?; + assert(atom.flags.alive); + atom.value = try advance(header, atom.size, atom.alignment); + } + + // Insert a thunk at the group end + const thunk_index = try macho_file.addThunk(); + const thunk = macho_file.getThunk(thunk_index); + thunk.out_n_sect = sect_id; + + // Scan relocs in the group and create trampolines for any unreachable callsite + for (atoms[start..i]) |atom_index| { + const atom = macho_file.getAtom(atom_index).?; + log.debug("atom({d}) {s}", .{ atom_index, atom.getName(macho_file) }); + for (atom.getRelocs(macho_file)) |rel| { + if (rel.type != .branch) continue; + if (isReachable(atom, rel, macho_file)) continue; + try thunk.symbols.put(gpa, rel.target, {}); + } + atom.thunk_index = thunk_index; + } + + thunk.value = try advance(header, thunk.size(), 2); + + log.debug("thunk({d}) : {}", .{ thunk_index, thunk.fmt(macho_file) }); + } +} + +fn advance(sect: *macho.section_64, size: u64, pow2_align: u32) !u64 { + const alignment = try math.powi(u32, 2, pow2_align); + const offset = mem.alignForward(u64, sect.size, alignment); + const padding = offset - sect.size; + sect.size += padding + size; + sect.@"align" = @max(sect.@"align", pow2_align); + return offset; +} + +fn isReachable(atom: *const Atom, rel: Relocation, macho_file: *MachO) bool { + const target = rel.getTargetSymbol(macho_file); + if (target.flags.stubs or target.flags.objc_stubs) return false; + if (atom.out_n_sect != target.out_n_sect) return false; + const target_atom = target.getAtom(macho_file).?; + if (target_atom.value == @as(u64, @bitCast(@as(i64, -1)))) return false; + const saddr = @as(i64, @intCast(atom.value)) + @as(i64, @intCast(rel.offset - atom.off)); + const taddr: i64 = @intCast(rel.getTargetAddress(macho_file)); + _ = math.cast(i28, taddr + rel.addend - saddr) orelse return false; + return true; +} + +pub const Thunk = struct { + value: u64 = 0, + out_n_sect: u8 = 0, + symbols: std.AutoArrayHashMapUnmanaged(Symbol.Index, void) = .{}, + + pub fn deinit(thunk: *Thunk, allocator: Allocator) void { + thunk.symbols.deinit(allocator); + } + + pub fn size(thunk: Thunk) usize { + return thunk.symbols.keys().len * trampoline_size; + } + + pub fn getAddress(thunk: Thunk, sym_index: Symbol.Index) u64 { + return thunk.value + thunk.symbols.getIndex(sym_index).? * trampoline_size; + } + + pub fn write(thunk: Thunk, macho_file: *MachO, writer: anytype) !void { + for (thunk.symbols.keys(), 0..) |sym_index, i| { + const sym = macho_file.getSymbol(sym_index); + const saddr = thunk.value + i * trampoline_size; + const taddr = sym.getAddress(.{}, macho_file); + const pages = try Relocation.calcNumberOfPages(saddr, taddr); + try writer.writeInt(u32, aarch64.Instruction.adrp(.x16, pages).toU32(), .little); + const off = try Relocation.calcPageOffset(taddr, .arithmetic); + try writer.writeInt(u32, aarch64.Instruction.add(.x16, .x16, off, false).toU32(), .little); + try writer.writeInt(u32, aarch64.Instruction.br(.x16).toU32(), .little); + } + } + + pub fn format( + thunk: Thunk, + comptime unused_fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, + ) !void { + _ = thunk; + _ = unused_fmt_string; + _ = options; + _ = writer; + @compileError("do not format Thunk directly"); + } + + pub fn fmt(thunk: Thunk, macho_file: *MachO) std.fmt.Formatter(format2) { + return .{ .data = .{ + .thunk = thunk, + .macho_file = macho_file, + } }; + } + + const FormatContext = struct { + thunk: Thunk, + macho_file: *MachO, + }; + + fn format2( + ctx: FormatContext, + comptime unused_fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, + ) !void { + _ = options; + _ = unused_fmt_string; + const thunk = ctx.thunk; + const macho_file = ctx.macho_file; + try writer.print("@{x} : size({x})\n", .{ thunk.value, thunk.size() }); + for (thunk.symbols.keys()) |index| { + const sym = macho_file.getSymbol(index); + try writer.print(" %{d} : {s} : @{x}\n", .{ index, sym.getName(macho_file), sym.value }); + } + } + + const trampoline_size = 3 * @sizeOf(u32); + + pub const Index = u32; +}; + +/// Branch instruction has 26 bits immediate but is 4 byte aligned. const jump_bits = @bitSizeOf(i28); - const max_distance = (1 << (jump_bits - 1)); /// A branch will need an extender if its target is larger than @@ -16,359 +160,17 @@ const max_distance = (1 << (jump_bits - 1)); /// and assume margin to be 5MiB. const max_allowed_distance = max_distance - 0x500_000; -pub const Thunk = struct { - start_index: Atom.Index, - len: u32, - - targets: std.MultiArrayList(Target) = .{}, - lookup: std.AutoHashMapUnmanaged(Target, u32) = .{}, - - pub const Tag = enum { - stub, - atom, - }; - - pub const Target = struct { - tag: Tag, - target: SymbolWithLoc, - }; - - pub const Index = u32; - - pub fn deinit(self: *Thunk, gpa: Allocator) void { - self.targets.deinit(gpa); - self.lookup.deinit(gpa); - } - - pub fn getStartAtomIndex(self: Thunk) Atom.Index { - assert(self.len != 0); - return self.start_index; - } - - pub fn getEndAtomIndex(self: Thunk) Atom.Index { - assert(self.len != 0); - return self.start_index + self.len - 1; - } - - pub fn getSize(self: Thunk) u64 { - return 12 * self.len; - } - - pub fn getAlignment() u32 { - return @alignOf(u32); - } - - pub fn getTrampoline(self: Thunk, macho_file: *MachO, tag: Tag, target: SymbolWithLoc) ?SymbolWithLoc { - const atom_index = self.lookup.get(.{ .tag = tag, .target = target }) orelse return null; - return macho_file.getAtom(atom_index).getSymbolWithLoc(); - } -}; - -pub fn createThunks(macho_file: *MachO, sect_id: u8) !void { - const header = &macho_file.sections.items(.header)[sect_id]; - if (header.size == 0) return; - - const comp = macho_file.base.comp; - const gpa = comp.gpa; - const first_atom_index = macho_file.sections.items(.first_atom_index)[sect_id].?; - - header.size = 0; - header.@"align" = 0; - - var atom_count: u32 = 0; - - { - var atom_index = first_atom_index; - while (true) { - const atom = macho_file.getAtom(atom_index); - const sym = macho_file.getSymbolPtr(atom.getSymbolWithLoc()); - sym.n_value = 0; - atom_count += 1; - - if (atom.next_index) |next_index| { - atom_index = next_index; - } else break; - } - } - - var allocated = std.AutoHashMap(Atom.Index, void).init(gpa); - defer allocated.deinit(); - try allocated.ensureTotalCapacity(atom_count); - - var group_start = first_atom_index; - var group_end = first_atom_index; - var offset: u64 = 0; - - while (true) { - const group_start_atom = macho_file.getAtom(group_start); - log.debug("GROUP START at {d}", .{group_start}); - - while (true) { - const atom = macho_file.getAtom(group_end); - offset = atom.alignment.forward(offset); - - const sym = macho_file.getSymbolPtr(atom.getSymbolWithLoc()); - sym.n_value = offset; - offset += atom.size; - - macho_file.logAtom(group_end, log); - - header.@"align" = @max(header.@"align", atom.alignment.toLog2Units()); - - allocated.putAssumeCapacityNoClobber(group_end, {}); - - const group_start_sym = macho_file.getSymbol(group_start_atom.getSymbolWithLoc()); - if (offset - group_start_sym.n_value >= max_allowed_distance) break; - - if (atom.next_index) |next_index| { - group_end = next_index; - } else break; - } - log.debug("GROUP END at {d}", .{group_end}); - - // Insert thunk at group_end - const thunk_index = @as(u32, @intCast(macho_file.thunks.items.len)); - try macho_file.thunks.append(gpa, .{ .start_index = undefined, .len = 0 }); - - // Scan relocs in the group and create trampolines for any unreachable callsite. - var atom_index = group_start; - while (true) { - const atom = macho_file.getAtom(atom_index); - try scanRelocs( - macho_file, - atom_index, - allocated, - thunk_index, - group_end, - ); - - if (atom_index == group_end) break; - - if (atom.next_index) |next_index| { - atom_index = next_index; - } else break; - } - - offset = mem.alignForward(u64, offset, Thunk.getAlignment()); - allocateThunk(macho_file, thunk_index, offset, header); - offset += macho_file.thunks.items[thunk_index].getSize(); - - const thunk = macho_file.thunks.items[thunk_index]; - if (thunk.len == 0) { - const group_end_atom = macho_file.getAtom(group_end); - if (group_end_atom.next_index) |next_index| { - group_start = next_index; - group_end = next_index; - } else break; - } else { - const thunk_end_atom_index = thunk.getEndAtomIndex(); - const thunk_end_atom = macho_file.getAtom(thunk_end_atom_index); - if (thunk_end_atom.next_index) |next_index| { - group_start = next_index; - group_end = next_index; - } else break; - } - } - - header.size = @as(u32, @intCast(offset)); -} - -fn allocateThunk( - macho_file: *MachO, - thunk_index: Thunk.Index, - base_offset: u64, - header: *macho.section_64, -) void { - const thunk = macho_file.thunks.items[thunk_index]; - if (thunk.len == 0) return; - - const first_atom_index = thunk.getStartAtomIndex(); - const end_atom_index = thunk.getEndAtomIndex(); - - var atom_index = first_atom_index; - var offset = base_offset; - while (true) { - const atom = macho_file.getAtom(atom_index); - offset = mem.alignForward(u64, offset, Thunk.getAlignment()); - - const sym = macho_file.getSymbolPtr(atom.getSymbolWithLoc()); - sym.n_value = offset; - offset += atom.size; - - macho_file.logAtom(atom_index, log); - - header.@"align" = @max(header.@"align", atom.alignment.toLog2Units()); - - if (end_atom_index == atom_index) break; - - if (atom.next_index) |next_index| { - atom_index = next_index; - } else break; - } -} - -fn scanRelocs( - macho_file: *MachO, - atom_index: Atom.Index, - allocated: std.AutoHashMap(Atom.Index, void), - thunk_index: Thunk.Index, - group_end: Atom.Index, -) !void { - const atom = macho_file.getAtom(atom_index); - const object = macho_file.objects.items[atom.getFile().?]; - - const base_offset = if (object.getSourceSymbol(atom.sym_index)) |source_sym| blk: { - const source_sect = object.getSourceSection(source_sym.n_sect - 1); - break :blk @as(i32, @intCast(source_sym.n_value - source_sect.addr)); - } else 0; - - const code = Atom.getAtomCode(macho_file, atom_index); - const relocs = Atom.getAtomRelocs(macho_file, atom_index); - const ctx = Atom.getRelocContext(macho_file, atom_index); - - for (relocs) |rel| { - if (!relocNeedsThunk(rel)) continue; - - const target = Atom.parseRelocTarget(macho_file, .{ - .object_id = atom.getFile().?, - .rel = rel, - .code = code, - .base_offset = ctx.base_offset, - .base_addr = ctx.base_addr, - }); - if (isReachable(macho_file, atom_index, rel, base_offset, target, allocated)) continue; - - log.debug("{x}: source = {s}@{x}, target = {s}@{x} unreachable", .{ - rel.r_address - base_offset, - macho_file.getSymbolName(atom.getSymbolWithLoc()), - macho_file.getSymbol(atom.getSymbolWithLoc()).n_value, - macho_file.getSymbolName(target), - macho_file.getSymbol(target).n_value, - }); - - const comp = macho_file.base.comp; - const gpa = comp.gpa; - const target_sym = macho_file.getSymbol(target); - const thunk = &macho_file.thunks.items[thunk_index]; - - const tag: Thunk.Tag = if (target_sym.undf()) .stub else .atom; - const thunk_target: Thunk.Target = .{ .tag = tag, .target = target }; - const gop = try thunk.lookup.getOrPut(gpa, thunk_target); - if (!gop.found_existing) { - gop.value_ptr.* = try pushThunkAtom(macho_file, thunk, group_end); - try thunk.targets.append(gpa, thunk_target); - } - - try macho_file.thunk_table.put(gpa, atom_index, thunk_index); - } -} - -fn pushThunkAtom(macho_file: *MachO, thunk: *Thunk, group_end: Atom.Index) !Atom.Index { - const thunk_atom_index = try createThunkAtom(macho_file); - - const thunk_atom = macho_file.getAtomPtr(thunk_atom_index); - const end_atom_index = if (thunk.len == 0) group_end else thunk.getEndAtomIndex(); - const end_atom = macho_file.getAtomPtr(end_atom_index); - - if (end_atom.next_index) |first_after_index| { - const first_after_atom = macho_file.getAtomPtr(first_after_index); - first_after_atom.prev_index = thunk_atom_index; - thunk_atom.next_index = first_after_index; - } - - end_atom.next_index = thunk_atom_index; - thunk_atom.prev_index = end_atom_index; - - if (thunk.len == 0) { - thunk.start_index = thunk_atom_index; - } - - thunk.len += 1; - - return thunk_atom_index; -} - -inline fn relocNeedsThunk(rel: macho.relocation_info) bool { - const rel_type = @as(macho.reloc_type_arm64, @enumFromInt(rel.r_type)); - return rel_type == .ARM64_RELOC_BRANCH26; -} - -fn isReachable( - macho_file: *MachO, - atom_index: Atom.Index, - rel: macho.relocation_info, - base_offset: i32, - target: SymbolWithLoc, - allocated: std.AutoHashMap(Atom.Index, void), -) bool { - if (macho_file.stub_table.lookup.contains(target)) return false; - - const source_atom = macho_file.getAtom(atom_index); - const source_sym = macho_file.getSymbol(source_atom.getSymbolWithLoc()); - - const target_object = macho_file.objects.items[target.getFile().?]; - const target_atom_index = target_object.getAtomIndexForSymbol(target.sym_index).?; - const target_atom = macho_file.getAtom(target_atom_index); - const target_sym = macho_file.getSymbol(target_atom.getSymbolWithLoc()); - - if (source_sym.n_sect != target_sym.n_sect) return false; - - if (!allocated.contains(target_atom_index)) return false; - - const source_addr = source_sym.n_value + @as(u32, @intCast(rel.r_address - base_offset)); - const target_addr = if (Atom.relocRequiresGot(macho_file, rel)) - macho_file.getGotEntryAddress(target).? - else - Atom.getRelocTargetAddress(macho_file, target, false); - _ = Relocation.calcPcRelativeDisplacementArm64(source_addr, target_addr) catch - return false; - - return true; -} - -fn createThunkAtom(macho_file: *MachO) !Atom.Index { - const sym_index = try macho_file.allocateSymbol(); - const atom_index = try macho_file.createAtom(sym_index, .{ - .size = @sizeOf(u32) * 3, - .alignment = .@"4", - }); - const sym = macho_file.getSymbolPtr(.{ .sym_index = sym_index }); - sym.n_type = macho.N_SECT; - sym.n_sect = macho_file.text_section_index.? + 1; - return atom_index; -} - -pub fn writeThunkCode(macho_file: *MachO, thunk: *const Thunk, writer: anytype) !void { - const slice = thunk.targets.slice(); - for (thunk.getStartAtomIndex()..thunk.getEndAtomIndex(), 0..) |atom_index, target_index| { - const atom = macho_file.getAtom(@intCast(atom_index)); - const sym = macho_file.getSymbol(atom.getSymbolWithLoc()); - const source_addr = sym.n_value; - const tag = slice.items(.tag)[target_index]; - const target = slice.items(.target)[target_index]; - const target_addr = switch (tag) { - .stub => macho_file.getStubsEntryAddress(target).?, - .atom => macho_file.getSymbol(target).n_value, - }; - const pages = Relocation.calcNumberOfPages(source_addr, target_addr); - try writer.writeInt(u32, aarch64.Instruction.adrp(.x16, pages).toU32(), .little); - const off = try Relocation.calcPageOffset(target_addr, .arithmetic); - try writer.writeInt(u32, aarch64.Instruction.add(.x16, .x16, off, false).toU32(), .little); - try writer.writeInt(u32, aarch64.Instruction.br(.x16).toU32(), .little); - } -} - -const std = @import("std"); +const aarch64 = @import("../aarch64.zig"); const assert = std.debug.assert; -const log = std.log.scoped(.thunks); +const log = std.log.scoped(.link); const macho = std.macho; const math = std.math; const mem = std.mem; - -const aarch64 = @import("../../arch/aarch64/bits.zig"); +const std = @import("std"); +const trace = @import("../tracy.zig").trace; const Allocator = mem.Allocator; const Atom = @import("Atom.zig"); const MachO = @import("../MachO.zig"); const Relocation = @import("Relocation.zig"); -const SymbolWithLoc = MachO.SymbolWithLoc; +const Symbol = @import("Symbol.zig"); diff --git a/src/link/MachO/uuid.zig b/src/link/MachO/uuid.zig index 6d60397149..0248acc091 100644 --- a/src/link/MachO/uuid.zig +++ b/src/link/MachO/uuid.zig @@ -4,22 +4,31 @@ /// and we will use it too as it seems accepted by Apple OSes. /// TODO LLD also hashes the output filename to disambiguate between same builds with different /// output files. Should we also do that? -pub fn calcUuid(comp: *const Compilation, file: fs.File, file_size: u64, out: *[Md5.digest_length]u8) !void { +pub fn calcUuid( + allocator: Allocator, + thread_pool: *ThreadPool, + file: fs.File, + file_size: u64, + out: *[Md5.digest_length]u8, +) !void { + const tracy = trace(@src()); + defer tracy.end(); + const chunk_size: usize = 1024 * 1024; const num_chunks: usize = std.math.cast(usize, @divTrunc(file_size, chunk_size)) orelse return error.Overflow; const actual_num_chunks = if (@rem(file_size, chunk_size) > 0) num_chunks + 1 else num_chunks; - const hashes = try comp.gpa.alloc([Md5.digest_length]u8, actual_num_chunks); - defer comp.gpa.free(hashes); + const hashes = try allocator.alloc([Md5.digest_length]u8, actual_num_chunks); + defer allocator.free(hashes); - var hasher = Hasher(Md5){ .allocator = comp.gpa, .thread_pool = comp.thread_pool }; + var hasher = Hasher(Md5){ .allocator = allocator, .thread_pool = thread_pool }; try hasher.hash(file, hashes, .{ .chunk_size = chunk_size, .max_file_size = file_size, }); - const final_buffer = try comp.gpa.alloc(u8, actual_num_chunks * Md5.digest_length); - defer comp.gpa.free(final_buffer); + const final_buffer = try allocator.alloc(u8, actual_num_chunks * Md5.digest_length); + defer allocator.free(final_buffer); for (hashes, 0..) |hash, i| { @memcpy(final_buffer[i * Md5.digest_length ..][0..Md5.digest_length], &hash); @@ -35,11 +44,12 @@ inline fn conform(out: *[Md5.digest_length]u8) void { out[8] = (out[8] & 0x3F) | 0x80; } -const std = @import("std"); const fs = std.fs; const mem = std.mem; +const std = @import("std"); +const trace = @import("../tracy.zig").trace; const Allocator = mem.Allocator; -const Compilation = @import("../../Compilation.zig"); const Md5 = std.crypto.hash.Md5; const Hasher = @import("hasher.zig").ParallelHasher; +const ThreadPool = std.Thread.Pool; diff --git a/src/link/MachO/zld.zig b/src/link/MachO/zld.zig deleted file mode 100644 index 57681dd935..0000000000 --- a/src/link/MachO/zld.zig +++ /dev/null @@ -1,1230 +0,0 @@ -pub fn linkWithZld( - macho_file: *MachO, - arena: Allocator, - prog_node: *std.Progress.Node, -) link.File.FlushError!void { - const tracy = trace(@src()); - defer tracy.end(); - - const comp = macho_file.base.comp; - const gpa = comp.gpa; - const target = comp.root_mod.resolved_target.result; - const emit = macho_file.base.emit; - - const directory = emit.directory; // Just an alias to make it shorter to type. - const full_out_path = try directory.join(arena, &[_][]const u8{emit.sub_path}); - const opt_zcu = comp.module; - - // If there is no Zig code to compile, then we should skip flushing the output file because it - // will not be part of the linker line anyway. - const module_obj_path: ?[]const u8 = if (opt_zcu != null) blk: { - try macho_file.flushModule(arena, prog_node); - - if (fs.path.dirname(full_out_path)) |dirname| { - break :blk try fs.path.join(arena, &.{ dirname, macho_file.base.zcu_object_sub_path.? }); - } else { - break :blk macho_file.base.zcu_object_sub_path.?; - } - } else null; - - var sub_prog_node = prog_node.start("MachO Flush", 0); - sub_prog_node.activate(); - sub_prog_node.context.refresh(); - defer sub_prog_node.end(); - - const output_mode = comp.config.output_mode; - const link_mode = comp.config.link_mode; - const cpu_arch = target.cpu.arch; - const is_lib = output_mode == .Lib; - const is_dyn_lib = link_mode == .Dynamic and is_lib; - const is_exe_or_dyn_lib = is_dyn_lib or output_mode == .Exe; - const stack_size = macho_file.base.stack_size; - - const id_symlink_basename = "zld.id"; - - var man: Cache.Manifest = undefined; - defer if (!macho_file.base.disable_lld_caching) man.deinit(); - - var digest: [Cache.hex_digest_len]u8 = undefined; - - const objects = comp.objects; - - if (!macho_file.base.disable_lld_caching) { - man = comp.cache_parent.obtain(); - - // We are about to obtain this lock, so here we give other processes a chance first. - macho_file.base.releaseLock(); - - comptime assert(Compilation.link_hash_implementation_version == 11); - - for (objects) |obj| { - _ = try man.addFile(obj.path, null); - man.hash.add(obj.must_link); - } - for (comp.c_object_table.keys()) |key| { - _ = try man.addFile(key.status.success.object_path, null); - } - try man.addOptionalFile(module_obj_path); - // We can skip hashing libc and libc++ components that we are in charge of building from Zig - // installation sources because they are always a product of the compiler version + target information. - man.hash.add(stack_size); - man.hash.add(macho_file.pagezero_vmsize); - man.hash.add(macho_file.headerpad_size); - man.hash.add(macho_file.headerpad_max_install_names); - man.hash.add(macho_file.base.gc_sections); - man.hash.add(macho_file.dead_strip_dylibs); - man.hash.add(comp.root_mod.strip); - try MachO.hashAddFrameworks(&man, macho_file.frameworks); - man.hash.addListOfBytes(macho_file.base.rpath_list); - if (is_dyn_lib) { - man.hash.addOptionalBytes(macho_file.install_name); - man.hash.addOptional(comp.version); - } - try link.hashAddSystemLibs(&man, comp.system_libs); - man.hash.addOptionalBytes(comp.sysroot); - man.hash.addListOfBytes(comp.force_undefined_symbols.keys()); - try man.addOptionalFile(macho_file.entitlements); - - // We don't actually care whether it's a cache hit or miss; we just - // need the digest and the lock. - _ = try man.hit(); - digest = man.final(); - - var prev_digest_buf: [digest.len]u8 = undefined; - const prev_digest: []u8 = Cache.readSmallFile( - directory.handle, - id_symlink_basename, - &prev_digest_buf, - ) catch |err| blk: { - log.debug("MachO Zld new_digest={s} error: {s}", .{ - std.fmt.fmtSliceHexLower(&digest), - @errorName(err), - }); - // Handle this as a cache miss. - break :blk prev_digest_buf[0..0]; - }; - if (mem.eql(u8, prev_digest, &digest)) { - // Hot diggity dog! The output binary is already there. - log.debug("MachO Zld digest={s} match - skipping invocation", .{ - std.fmt.fmtSliceHexLower(&digest), - }); - macho_file.base.lock = man.toOwnedLock(); - return; - } - log.debug("MachO Zld prev_digest={s} new_digest={s}", .{ - std.fmt.fmtSliceHexLower(prev_digest), - std.fmt.fmtSliceHexLower(&digest), - }); - - // We are about to change the output file to be different, so we invalidate the build hash now. - directory.handle.deleteFile(id_symlink_basename) catch |err| switch (err) { - error.FileNotFound => {}, - else => |e| return e, - }; - } - - if (output_mode == .Obj) { - // LLD's MachO driver does not support the equivalent of `-r` so we do a simple file copy - // here. TODO: think carefully about how we can avoid this redundant operation when doing - // build-obj. See also the corresponding TODO in linkAsArchive. - const the_object_path = blk: { - if (objects.len != 0) { - break :blk objects[0].path; - } - - if (comp.c_object_table.count() != 0) - break :blk comp.c_object_table.keys()[0].status.success.object_path; - - if (module_obj_path) |p| - break :blk p; - - // TODO I think this is unreachable. Audit this situation when solving the above TODO - // regarding eliding redundant object -> object transformations. - return error.NoObjectsToLink; - }; - // This can happen when using --enable-cache and using the stage1 backend. In this case - // we can skip the file copy. - if (!mem.eql(u8, the_object_path, full_out_path)) { - try fs.cwd().copyFile(the_object_path, fs.cwd(), full_out_path, .{}); - } - } else { - const sub_path = emit.sub_path; - - const old_file = macho_file.base.file; // TODO is this needed at all? - defer macho_file.base.file = old_file; - - const file = try directory.handle.createFile(sub_path, .{ - .truncate = true, - .read = true, - .mode = link.File.determineMode(false, output_mode, link_mode), - }); - defer file.close(); - macho_file.base.file = file; - - // Index 0 is always a null symbol. - try macho_file.locals.append(gpa, .{ - .n_strx = 0, - .n_type = 0, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }); - try macho_file.strtab.buffer.append(gpa, 0); - - // Positional arguments to the linker such as object files and static archives. - var positionals = std.ArrayList(Compilation.LinkObject).init(arena); - try positionals.ensureUnusedCapacity(objects.len); - positionals.appendSliceAssumeCapacity(objects); - - for (comp.c_object_table.keys()) |key| { - try positionals.append(.{ .path = key.status.success.object_path }); - } - - if (module_obj_path) |p| { - try positionals.append(.{ .path = p }); - } - - if (comp.compiler_rt_lib) |lib| try positionals.append(.{ .path = lib.full_object_path }); - if (comp.compiler_rt_obj) |obj| try positionals.append(.{ .path = obj.full_object_path }); - - // libc++ dep - if (comp.config.link_libcpp) { - try positionals.ensureUnusedCapacity(2); - positionals.appendAssumeCapacity(.{ .path = comp.libcxxabi_static_lib.?.full_object_path }); - positionals.appendAssumeCapacity(.{ .path = comp.libcxx_static_lib.?.full_object_path }); - } - - var libs = std.StringArrayHashMap(link.SystemLib).init(arena); - - { - const vals = comp.system_libs.values(); - try libs.ensureUnusedCapacity(vals.len); - for (vals) |v| libs.putAssumeCapacity(v.path.?, v); - } - - { - try libs.ensureUnusedCapacity(macho_file.frameworks.len); - for (macho_file.frameworks) |v| libs.putAssumeCapacity(v.path, .{ - .needed = v.needed, - .weak = v.weak, - .path = v.path, - }); - } - - try macho_file.resolveLibSystem(arena, comp, &libs); - - if (comp.verbose_link) { - var argv = std.ArrayList([]const u8).init(arena); - - try argv.append("zig"); - try argv.append("ld"); - - if (is_exe_or_dyn_lib) { - try argv.append("-dynamic"); - } - - if (is_dyn_lib) { - try argv.append("-dylib"); - - if (macho_file.install_name) |install_name| { - try argv.append("-install_name"); - try argv.append(install_name); - } - } - - { - const platform = Platform.fromTarget(target); - try argv.append("-platform_version"); - try argv.append(@tagName(platform.os_tag)); - try argv.append(try std.fmt.allocPrint(arena, "{}", .{platform.version})); - - const sdk_version: ?std.SemanticVersion = load_commands.inferSdkVersion(macho_file); - if (sdk_version) |ver| { - try argv.append(try std.fmt.allocPrint(arena, "{d}.{d}", .{ ver.major, ver.minor })); - } else { - try argv.append(try std.fmt.allocPrint(arena, "{}", .{platform.version})); - } - } - - if (comp.sysroot) |syslibroot| { - try argv.append("-syslibroot"); - try argv.append(syslibroot); - } - - for (macho_file.base.rpath_list) |rpath| { - try argv.append("-rpath"); - try argv.append(rpath); - } - - try argv.appendSlice(&.{ - "-pagezero_size", try std.fmt.allocPrint(arena, "0x{x}", .{macho_file.pagezero_vmsize}), - "-headerpad_size", try std.fmt.allocPrint(arena, "0x{x}", .{macho_file.headerpad_size}), - }); - - if (macho_file.headerpad_max_install_names) { - try argv.append("-headerpad_max_install_names"); - } - - if (macho_file.base.gc_sections) { - try argv.append("-dead_strip"); - } - - if (macho_file.dead_strip_dylibs) { - try argv.append("-dead_strip_dylibs"); - } - - if (macho_file.entry_name) |entry_name| { - try argv.appendSlice(&.{ "-e", entry_name }); - } - - for (objects) |obj| { - if (obj.must_link) { - try argv.append("-force_load"); - } - try argv.append(obj.path); - } - - for (comp.c_object_table.keys()) |key| { - try argv.append(key.status.success.object_path); - } - - if (module_obj_path) |p| { - try argv.append(p); - } - - if (comp.compiler_rt_lib) |lib| try argv.append(lib.full_object_path); - if (comp.compiler_rt_obj) |obj| try argv.append(obj.full_object_path); - - if (comp.config.link_libcpp) { - try argv.append(comp.libcxxabi_static_lib.?.full_object_path); - try argv.append(comp.libcxx_static_lib.?.full_object_path); - } - - try argv.append("-o"); - try argv.append(full_out_path); - - try argv.append("-lSystem"); - - for (comp.system_libs.keys()) |l_name| { - const info = comp.system_libs.get(l_name).?; - const arg = if (info.needed) - try std.fmt.allocPrint(arena, "-needed-l{s}", .{l_name}) - else if (info.weak) - try std.fmt.allocPrint(arena, "-weak-l{s}", .{l_name}) - else - try std.fmt.allocPrint(arena, "-l{s}", .{l_name}); - try argv.append(arg); - } - - for (macho_file.frameworks) |framework| { - const name = std.fs.path.stem(framework.path); - const arg = if (framework.needed) - try std.fmt.allocPrint(arena, "-needed_framework {s}", .{name}) - else if (framework.weak) - try std.fmt.allocPrint(arena, "-weak_framework {s}", .{name}) - else - try std.fmt.allocPrint(arena, "-framework {s}", .{name}); - try argv.append(arg); - } - - if (is_dyn_lib and macho_file.base.allow_shlib_undefined) { - try argv.append("-undefined"); - try argv.append("dynamic_lookup"); - } - - Compilation.dump_argv(argv.items); - } - - var dependent_libs = std.fifo.LinearFifo(MachO.DylibReExportInfo, .Dynamic).init(arena); - - for (positionals.items) |obj| { - const in_file = try std.fs.cwd().openFile(obj.path, .{}); - defer in_file.close(); - - var parse_ctx = MachO.ParseErrorCtx.init(gpa); - defer parse_ctx.deinit(); - - macho_file.parsePositional( - in_file, - obj.path, - obj.must_link, - &dependent_libs, - &parse_ctx, - ) catch |err| try macho_file.handleAndReportParseError(obj.path, err, &parse_ctx); - } - - for (libs.keys(), libs.values()) |path, lib| { - const in_file = try std.fs.cwd().openFile(path, .{}); - defer in_file.close(); - - var parse_ctx = MachO.ParseErrorCtx.init(gpa); - defer parse_ctx.deinit(); - - macho_file.parseLibrary( - in_file, - path, - lib, - false, - false, - null, - &dependent_libs, - &parse_ctx, - ) catch |err| try macho_file.handleAndReportParseError(path, err, &parse_ctx); - } - - try macho_file.parseDependentLibs(&dependent_libs); - - try macho_file.resolveSymbols(); - if (macho_file.unresolved.count() > 0) { - try macho_file.reportUndefined(); - return error.FlushFailure; - } - - for (macho_file.objects.items, 0..) |*object, object_id| { - object.splitIntoAtoms(macho_file, @as(u32, @intCast(object_id))) catch |err| switch (err) { - error.MissingEhFrameSection => try macho_file.reportParseError( - object.name, - "missing section: '__TEXT,__eh_frame' is required but could not be found", - .{}, - ), - error.BadDwarfCfi => try macho_file.reportParseError( - object.name, - "invalid DWARF: failed to parse '__TEXT,__eh_frame' section", - .{}, - ), - else => |e| return e, - }; - } - - if (macho_file.base.gc_sections) { - try dead_strip.gcAtoms(macho_file); - } - - try macho_file.createDyldPrivateAtom(); - try macho_file.createTentativeDefAtoms(); - - if (comp.config.output_mode == .Exe) { - const global = macho_file.getEntryPoint().?; - if (macho_file.getSymbol(global).undf()) { - // We do one additional check here in case the entry point was found in one of the dylibs. - // (I actually have no idea what this would imply but it is a possible outcome and so we - // support it.) - try macho_file.addStubEntry(global); - } - } - - for (macho_file.objects.items) |object| { - for (object.atoms.items) |atom_index| { - const atom = macho_file.getAtom(atom_index); - const sym = macho_file.getSymbol(atom.getSymbolWithLoc()); - const header = macho_file.sections.items(.header)[sym.n_sect - 1]; - if (header.isZerofill()) continue; - - const relocs = Atom.getAtomRelocs(macho_file, atom_index); - try Atom.scanAtomRelocs(macho_file, atom_index, relocs); - } - } - - try eh_frame.scanRelocs(macho_file); - try UnwindInfo.scanRelocs(macho_file); - - if (macho_file.dyld_stub_binder_index) |index| - try macho_file.addGotEntry(macho_file.globals.items[index]); - - try calcSectionSizes(macho_file); - - var unwind_info = UnwindInfo{ .gpa = gpa }; - defer unwind_info.deinit(); - try unwind_info.collect(macho_file); - - try eh_frame.calcSectionSize(macho_file, &unwind_info); - unwind_info.calcSectionSize(macho_file); - - try pruneAndSortSections(macho_file); - try createSegments(macho_file); - try allocateSegments(macho_file); - - try macho_file.allocateSpecialSymbols(); - - if (build_options.enable_logging) { - macho_file.logSymtab(); - macho_file.logSegments(); - macho_file.logSections(); - macho_file.logAtoms(); - } - - try writeAtoms(macho_file); - if (target.cpu.arch == .aarch64) try writeThunks(macho_file); - try writeDyldPrivateAtom(macho_file); - - if (macho_file.stubs_section_index) |_| { - try writeStubs(macho_file); - try writeStubHelpers(macho_file); - try writeLaSymbolPtrs(macho_file); - } - if (macho_file.got_section_index) |sect_id| - try writePointerEntries(macho_file, sect_id, &macho_file.got_table); - if (macho_file.tlv_ptr_section_index) |sect_id| - try writePointerEntries(macho_file, sect_id, &macho_file.tlv_ptr_table); - - try eh_frame.write(macho_file, &unwind_info); - try unwind_info.write(macho_file); - try macho_file.writeLinkeditSegmentData(); - - // If the last section of __DATA segment is zerofill section, we need to ensure - // that the free space between the end of the last non-zerofill section of __DATA - // segment and the beginning of __LINKEDIT segment is zerofilled as the loader will - // copy-paste this space into memory for quicker zerofill operation. - if (macho_file.data_segment_cmd_index) |data_seg_id| blk: { - var physical_zerofill_start: ?u64 = null; - const section_indexes = macho_file.getSectionIndexes(data_seg_id); - for (macho_file.sections.items(.header)[section_indexes.start..section_indexes.end]) |header| { - if (header.isZerofill() and header.size > 0) break; - physical_zerofill_start = header.offset + header.size; - } else break :blk; - const start = physical_zerofill_start orelse break :blk; - const linkedit = macho_file.getLinkeditSegmentPtr(); - const size = math.cast(usize, linkedit.fileoff - start) orelse return error.Overflow; - if (size > 0) { - log.debug("zeroing out zerofill area of length {x} at {x}", .{ size, start }); - const padding = try gpa.alloc(u8, size); - defer gpa.free(padding); - @memset(padding, 0); - try macho_file.base.file.?.pwriteAll(padding, start); - } - } - - // Write code signature padding if required - var codesig: ?CodeSignature = if (macho_file.requiresCodeSignature()) blk: { - // Preallocate space for the code signature. - // We need to do this at this stage so that we have the load commands with proper values - // written out to the file. - // The most important here is to have the correct vm and filesize of the __LINKEDIT segment - // where the code signature goes into. - var codesig = CodeSignature.init(MachO.getPageSize(cpu_arch)); - codesig.code_directory.ident = fs.path.basename(full_out_path); - if (macho_file.entitlements) |path| { - try codesig.addEntitlements(gpa, path); - } - try macho_file.writeCodeSignaturePadding(&codesig); - break :blk codesig; - } else null; - defer if (codesig) |*csig| csig.deinit(gpa); - - // Write load commands - var lc_buffer = std.ArrayList(u8).init(arena); - const lc_writer = lc_buffer.writer(); - - try macho_file.writeSegmentHeaders(lc_writer); - try lc_writer.writeStruct(macho_file.dyld_info_cmd); - try lc_writer.writeStruct(macho_file.function_starts_cmd); - try lc_writer.writeStruct(macho_file.data_in_code_cmd); - try lc_writer.writeStruct(macho_file.symtab_cmd); - try lc_writer.writeStruct(macho_file.dysymtab_cmd); - try load_commands.writeDylinkerLC(lc_writer); - - switch (output_mode) { - .Exe => blk: { - const seg_id = macho_file.header_segment_cmd_index.?; - const seg = macho_file.segments.items[seg_id]; - const global = macho_file.getEntryPoint() orelse break :blk; - const sym = macho_file.getSymbol(global); - - const addr: u64 = if (sym.undf()) - // In this case, the symbol has been resolved in one of dylibs and so we point - // to the stub as its vmaddr value. - macho_file.getStubsEntryAddress(global).? - else - sym.n_value; - - try lc_writer.writeStruct(macho.entry_point_command{ - .entryoff = @as(u32, @intCast(addr - seg.vmaddr)), - .stacksize = macho_file.base.stack_size, - }); - }, - .Lib => if (link_mode == .Dynamic) { - try load_commands.writeDylibIdLC(macho_file, lc_writer); - }, - else => {}, - } - - try load_commands.writeRpathLCs(macho_file, lc_writer); - try lc_writer.writeStruct(macho.source_version_command{ - .version = 0, - }); - { - const platform = Platform.fromTarget(target); - const sdk_version: ?std.SemanticVersion = load_commands.inferSdkVersion(macho_file); - if (platform.isBuildVersionCompatible()) { - try load_commands.writeBuildVersionLC(platform, sdk_version, lc_writer); - } else { - try load_commands.writeVersionMinLC(platform, sdk_version, lc_writer); - } - } - - const uuid_cmd_offset = @sizeOf(macho.mach_header_64) + @as(u32, @intCast(lc_buffer.items.len)); - try lc_writer.writeStruct(macho_file.uuid_cmd); - - try load_commands.writeLoadDylibLCs( - macho_file.dylibs.items, - macho_file.referenced_dylibs.keys(), - lc_writer, - ); - - if (codesig != null) { - try lc_writer.writeStruct(macho_file.codesig_cmd); - } - - const ncmds = load_commands.calcNumOfLCs(lc_buffer.items); - try macho_file.base.file.?.pwriteAll(lc_buffer.items, @sizeOf(macho.mach_header_64)); - try macho_file.writeHeader(ncmds, @as(u32, @intCast(lc_buffer.items.len))); - try macho_file.writeUuid(comp, uuid_cmd_offset, codesig != null); - - if (codesig) |*csig| { - try macho_file.writeCodeSignature(comp, csig); // code signing always comes last - try MachO.invalidateKernelCache(directory.handle, macho_file.base.emit.sub_path); - } - } - - if (!macho_file.base.disable_lld_caching) { - // Update the file with the digest. If it fails we can continue; it only - // means that the next invocation will have an unnecessary cache miss. - Cache.writeSmallFile(directory.handle, id_symlink_basename, &digest) catch |err| { - log.debug("failed to save linking hash digest file: {s}", .{@errorName(err)}); - }; - // Again failure here only means an unnecessary cache miss. - if (man.have_exclusive_lock) { - man.writeManifest() catch |err| { - log.debug("failed to write cache manifest when linking: {s}", .{@errorName(err)}); - }; - } - // We hang on to this lock so that the output file path can be used without - // other processes clobbering it. - macho_file.base.lock = man.toOwnedLock(); - } -} - -fn createSegments(macho_file: *MachO) !void { - const comp = macho_file.base.comp; - const gpa = comp.gpa; - const target = macho_file.base.comp.root_mod.resolved_target.result; - const page_size = MachO.getPageSize(target.cpu.arch); - const aligned_pagezero_vmsize = mem.alignBackward(u64, macho_file.pagezero_vmsize, page_size); - if (macho_file.base.comp.config.output_mode != .Lib and aligned_pagezero_vmsize > 0) { - if (aligned_pagezero_vmsize != macho_file.pagezero_vmsize) { - log.warn("requested __PAGEZERO size (0x{x}) is not page aligned", .{macho_file.pagezero_vmsize}); - log.warn(" rounding down to 0x{x}", .{aligned_pagezero_vmsize}); - } - macho_file.pagezero_segment_cmd_index = @intCast(macho_file.segments.items.len); - try macho_file.segments.append(gpa, .{ - .cmdsize = @sizeOf(macho.segment_command_64), - .segname = MachO.makeStaticString("__PAGEZERO"), - .vmsize = aligned_pagezero_vmsize, - }); - } - - // __TEXT segment is non-optional - { - const protection = MachO.getSegmentMemoryProtection("__TEXT"); - macho_file.text_segment_cmd_index = @intCast(macho_file.segments.items.len); - macho_file.header_segment_cmd_index = macho_file.text_segment_cmd_index.?; - try macho_file.segments.append(gpa, .{ - .cmdsize = @sizeOf(macho.segment_command_64), - .segname = MachO.makeStaticString("__TEXT"), - .maxprot = protection, - .initprot = protection, - }); - } - - for (macho_file.sections.items(.header), 0..) |header, sect_id| { - if (header.size == 0) continue; // empty section - - const segname = header.segName(); - const segment_id = macho_file.getSegmentByName(segname) orelse blk: { - log.debug("creating segment '{s}'", .{segname}); - const segment_id = @as(u8, @intCast(macho_file.segments.items.len)); - const protection = MachO.getSegmentMemoryProtection(segname); - try macho_file.segments.append(gpa, .{ - .cmdsize = @sizeOf(macho.segment_command_64), - .segname = MachO.makeStaticString(segname), - .maxprot = protection, - .initprot = protection, - }); - break :blk segment_id; - }; - const segment = &macho_file.segments.items[segment_id]; - segment.cmdsize += @sizeOf(macho.section_64); - segment.nsects += 1; - macho_file.sections.items(.segment_index)[sect_id] = segment_id; - } - - if (macho_file.getSegmentByName("__DATA_CONST")) |index| { - macho_file.data_const_segment_cmd_index = index; - } - - if (macho_file.getSegmentByName("__DATA")) |index| { - macho_file.data_segment_cmd_index = index; - } - - // __LINKEDIT always comes last - { - const protection = MachO.getSegmentMemoryProtection("__LINKEDIT"); - macho_file.linkedit_segment_cmd_index = @intCast(macho_file.segments.items.len); - try macho_file.segments.append(gpa, .{ - .cmdsize = @sizeOf(macho.segment_command_64), - .segname = MachO.makeStaticString("__LINKEDIT"), - .maxprot = protection, - .initprot = protection, - }); - } -} - -fn writeAtoms(macho_file: *MachO) !void { - const comp = macho_file.base.comp; - const gpa = comp.gpa; - const slice = macho_file.sections.slice(); - - for (slice.items(.first_atom_index), 0..) |first_atom_index, sect_id| { - const header = slice.items(.header)[sect_id]; - if (header.isZerofill()) continue; - - var atom_index = first_atom_index orelse continue; - - var buffer = try gpa.alloc(u8, math.cast(usize, header.size) orelse return error.Overflow); - defer gpa.free(buffer); - @memset(buffer, 0); // TODO with NOPs - - log.debug("writing atoms in {s},{s}", .{ header.segName(), header.sectName() }); - - while (true) { - const atom = macho_file.getAtom(atom_index); - if (atom.getFile()) |file| { - const this_sym = macho_file.getSymbol(atom.getSymbolWithLoc()); - const padding_size: usize = if (atom.next_index) |next_index| blk: { - const next_sym = macho_file.getSymbol(macho_file.getAtom(next_index).getSymbolWithLoc()); - const size = next_sym.n_value - (this_sym.n_value + atom.size); - break :blk math.cast(usize, size) orelse return error.Overflow; - } else 0; - - log.debug(" (adding ATOM(%{d}, '{s}') from object({d}) to buffer)", .{ - atom.sym_index, - macho_file.getSymbolName(atom.getSymbolWithLoc()), - file, - }); - if (padding_size > 0) { - log.debug(" (with padding {x})", .{padding_size}); - } - - const offset = math.cast(usize, this_sym.n_value - header.addr) orelse - return error.Overflow; - log.debug(" (at offset 0x{x})", .{offset}); - - const code = Atom.getAtomCode(macho_file, atom_index); - const relocs = Atom.getAtomRelocs(macho_file, atom_index); - const size = math.cast(usize, atom.size) orelse return error.Overflow; - @memcpy(buffer[offset .. offset + size], code); - try Atom.resolveRelocs( - macho_file, - atom_index, - buffer[offset..][0..size], - relocs, - ); - } - - if (atom.next_index) |next_index| { - atom_index = next_index; - } else break; - } - - log.debug(" (writing at file offset 0x{x})", .{header.offset}); - try macho_file.base.file.?.pwriteAll(buffer, header.offset); - } -} - -fn writeDyldPrivateAtom(macho_file: *MachO) !void { - const atom_index = macho_file.dyld_private_atom_index orelse return; - const atom = macho_file.getAtom(atom_index); - const sym = macho_file.getSymbol(atom.getSymbolWithLoc()); - const sect_id = macho_file.data_section_index.?; - const header = macho_file.sections.items(.header)[sect_id]; - const offset = sym.n_value - header.addr + header.offset; - log.debug("writing __dyld_private at offset 0x{x}", .{offset}); - const buffer: [@sizeOf(u64)]u8 = [_]u8{0} ** @sizeOf(u64); - try macho_file.base.file.?.pwriteAll(&buffer, offset); -} - -fn writeThunks(macho_file: *MachO) !void { - const target = macho_file.base.comp.root_mod.resolved_target.result; - assert(target.cpu.arch == .aarch64); - const comp = macho_file.base.comp; - const gpa = comp.gpa; - - const sect_id = macho_file.text_section_index orelse return; - const header = macho_file.sections.items(.header)[sect_id]; - - for (macho_file.thunks.items, 0..) |*thunk, i| { - if (thunk.getSize() == 0) continue; - const thunk_size = math.cast(usize, thunk.getSize()) orelse return error.Overflow; - var buffer = try std.ArrayList(u8).initCapacity(gpa, thunk_size); - defer buffer.deinit(); - try thunks.writeThunkCode(macho_file, thunk, buffer.writer()); - const thunk_atom = macho_file.getAtom(thunk.getStartAtomIndex()); - const thunk_sym = macho_file.getSymbol(thunk_atom.getSymbolWithLoc()); - const offset = thunk_sym.n_value - header.addr + header.offset; - log.debug("writing thunk({d}) at offset 0x{x}", .{ i, offset }); - try macho_file.base.file.?.pwriteAll(buffer.items, offset); - } -} - -fn writePointerEntries(macho_file: *MachO, sect_id: u8, table: anytype) !void { - const comp = macho_file.base.comp; - const gpa = comp.gpa; - const header = macho_file.sections.items(.header)[sect_id]; - const capacity = math.cast(usize, header.size) orelse return error.Overflow; - var buffer = try std.ArrayList(u8).initCapacity(gpa, capacity); - defer buffer.deinit(); - for (table.entries.items) |entry| { - const sym = macho_file.getSymbol(entry); - buffer.writer().writeInt(u64, sym.n_value, .little) catch unreachable; - } - log.debug("writing __DATA_CONST,__got contents at file offset 0x{x}", .{header.offset}); - try macho_file.base.file.?.pwriteAll(buffer.items, header.offset); -} - -fn writeStubs(macho_file: *MachO) !void { - const comp = macho_file.base.comp; - const gpa = comp.gpa; - const target = macho_file.base.comp.root_mod.resolved_target.result; - const cpu_arch = target.cpu.arch; - const stubs_header = macho_file.sections.items(.header)[macho_file.stubs_section_index.?]; - const la_symbol_ptr_header = macho_file.sections.items(.header)[macho_file.la_symbol_ptr_section_index.?]; - - const capacity = math.cast(usize, stubs_header.size) orelse return error.Overflow; - var buffer = try std.ArrayList(u8).initCapacity(gpa, capacity); - defer buffer.deinit(); - - for (0..macho_file.stub_table.count()) |index| { - try stubs.writeStubCode(.{ - .cpu_arch = cpu_arch, - .source_addr = stubs_header.addr + stubs.stubSize(cpu_arch) * index, - .target_addr = la_symbol_ptr_header.addr + index * @sizeOf(u64), - }, buffer.writer()); - } - - log.debug("writing __TEXT,__stubs contents at file offset 0x{x}", .{stubs_header.offset}); - try macho_file.base.file.?.pwriteAll(buffer.items, stubs_header.offset); -} - -fn writeStubHelpers(macho_file: *MachO) !void { - const comp = macho_file.base.comp; - const gpa = comp.gpa; - const target = macho_file.base.comp.root_mod.resolved_target.result; - const cpu_arch = target.cpu.arch; - const stub_helper_header = macho_file.sections.items(.header)[macho_file.stub_helper_section_index.?]; - - const capacity = math.cast(usize, stub_helper_header.size) orelse return error.Overflow; - var buffer = try std.ArrayList(u8).initCapacity(gpa, capacity); - defer buffer.deinit(); - - { - const dyld_private_addr = blk: { - const atom = macho_file.getAtom(macho_file.dyld_private_atom_index.?); - const sym = macho_file.getSymbol(atom.getSymbolWithLoc()); - break :blk sym.n_value; - }; - const dyld_stub_binder_got_addr = blk: { - const sym_loc = macho_file.globals.items[macho_file.dyld_stub_binder_index.?]; - break :blk macho_file.getGotEntryAddress(sym_loc).?; - }; - try stubs.writeStubHelperPreambleCode(.{ - .cpu_arch = cpu_arch, - .source_addr = stub_helper_header.addr, - .dyld_private_addr = dyld_private_addr, - .dyld_stub_binder_got_addr = dyld_stub_binder_got_addr, - }, buffer.writer()); - } - - for (0..macho_file.stub_table.count()) |index| { - const source_addr = stub_helper_header.addr + stubs.stubHelperPreambleSize(cpu_arch) + - stubs.stubHelperSize(cpu_arch) * index; - try stubs.writeStubHelperCode(.{ - .cpu_arch = cpu_arch, - .source_addr = source_addr, - .target_addr = stub_helper_header.addr, - }, buffer.writer()); - } - - log.debug("writing __TEXT,__stub_helper contents at file offset 0x{x}", .{ - stub_helper_header.offset, - }); - try macho_file.base.file.?.pwriteAll(buffer.items, stub_helper_header.offset); -} - -fn writeLaSymbolPtrs(macho_file: *MachO) !void { - const comp = macho_file.base.comp; - const gpa = comp.gpa; - const target = macho_file.base.comp.root_mod.resolved_target.result; - const cpu_arch = target.cpu.arch; - const la_symbol_ptr_header = macho_file.sections.items(.header)[macho_file.la_symbol_ptr_section_index.?]; - const stub_helper_header = macho_file.sections.items(.header)[macho_file.stub_helper_section_index.?]; - - const capacity = math.cast(usize, la_symbol_ptr_header.size) orelse return error.Overflow; - var buffer = try std.ArrayList(u8).initCapacity(gpa, capacity); - defer buffer.deinit(); - - for (0..macho_file.stub_table.count()) |index| { - const target_addr = stub_helper_header.addr + stubs.stubHelperPreambleSize(cpu_arch) + - stubs.stubHelperSize(cpu_arch) * index; - buffer.writer().writeInt(u64, target_addr, .little) catch unreachable; - } - - log.debug("writing __DATA,__la_symbol_ptr contents at file offset 0x{x}", .{ - la_symbol_ptr_header.offset, - }); - try macho_file.base.file.?.pwriteAll(buffer.items, la_symbol_ptr_header.offset); -} - -fn pruneAndSortSections(macho_file: *MachO) !void { - const Entry = struct { - index: u8, - - pub fn lessThan(ctx: *MachO, lhs: @This(), rhs: @This()) bool { - const lhs_header = ctx.sections.items(.header)[lhs.index]; - const rhs_header = ctx.sections.items(.header)[rhs.index]; - return MachO.getSectionPrecedence(lhs_header) < MachO.getSectionPrecedence(rhs_header); - } - }; - - const comp = macho_file.base.comp; - const gpa = comp.gpa; - - var entries = try std.ArrayList(Entry).initCapacity(gpa, macho_file.sections.slice().len); - defer entries.deinit(); - - for (0..macho_file.sections.slice().len) |index| { - const section = macho_file.sections.get(index); - if (section.header.size == 0) { - log.debug("pruning section {s},{s} {?d}", .{ - section.header.segName(), - section.header.sectName(), - section.first_atom_index, - }); - for (&[_]*?u8{ - &macho_file.text_section_index, - &macho_file.data_const_section_index, - &macho_file.data_section_index, - &macho_file.bss_section_index, - &macho_file.thread_vars_section_index, - &macho_file.thread_data_section_index, - &macho_file.thread_bss_section_index, - &macho_file.eh_frame_section_index, - &macho_file.unwind_info_section_index, - &macho_file.got_section_index, - &macho_file.tlv_ptr_section_index, - &macho_file.stubs_section_index, - &macho_file.stub_helper_section_index, - &macho_file.la_symbol_ptr_section_index, - }) |maybe_index| { - if (maybe_index.* != null and maybe_index.*.? == index) { - maybe_index.* = null; - } - } - continue; - } - entries.appendAssumeCapacity(.{ .index = @intCast(index) }); - } - - mem.sort(Entry, entries.items, macho_file, Entry.lessThan); - - var slice = macho_file.sections.toOwnedSlice(); - defer slice.deinit(gpa); - - const backlinks = try gpa.alloc(u8, slice.len); - defer gpa.free(backlinks); - for (entries.items, 0..) |entry, i| { - backlinks[entry.index] = @as(u8, @intCast(i)); - } - - try macho_file.sections.ensureTotalCapacity(gpa, entries.items.len); - for (entries.items) |entry| { - macho_file.sections.appendAssumeCapacity(slice.get(entry.index)); - } - - for (&[_]*?u8{ - &macho_file.text_section_index, - &macho_file.data_const_section_index, - &macho_file.data_section_index, - &macho_file.bss_section_index, - &macho_file.thread_vars_section_index, - &macho_file.thread_data_section_index, - &macho_file.thread_bss_section_index, - &macho_file.eh_frame_section_index, - &macho_file.unwind_info_section_index, - &macho_file.got_section_index, - &macho_file.tlv_ptr_section_index, - &macho_file.stubs_section_index, - &macho_file.stub_helper_section_index, - &macho_file.la_symbol_ptr_section_index, - }) |maybe_index| { - if (maybe_index.*) |*index| { - index.* = backlinks[index.*]; - } - } -} - -fn calcSectionSizes(macho_file: *MachO) !void { - const target = macho_file.base.comp.root_mod.resolved_target.result; - const slice = macho_file.sections.slice(); - for (slice.items(.header), 0..) |*header, sect_id| { - if (header.size == 0) continue; - if (macho_file.text_section_index) |txt| { - if (txt == sect_id and target.cpu.arch == .aarch64) continue; - } - - var atom_index = slice.items(.first_atom_index)[sect_id] orelse continue; - - header.size = 0; - header.@"align" = 0; - - while (true) { - const atom = macho_file.getAtom(atom_index); - const atom_offset = atom.alignment.forward(header.size); - const padding = atom_offset - header.size; - - const sym = macho_file.getSymbolPtr(atom.getSymbolWithLoc()); - sym.n_value = atom_offset; - - header.size += padding + atom.size; - header.@"align" = @max(header.@"align", atom.alignment.toLog2Units()); - - atom_index = atom.next_index orelse break; - } - } - - if (macho_file.text_section_index != null and target.cpu.arch == .aarch64) { - // Create jump/branch range extenders if needed. - try thunks.createThunks(macho_file, macho_file.text_section_index.?); - } - - // Update offsets of all symbols contained within each Atom. - // We need to do this since our unwind info synthesiser relies on - // traversing the symbols when synthesising unwind info and DWARF CFI records. - for (slice.items(.first_atom_index)) |first_atom_index| { - var atom_index = first_atom_index orelse continue; - - while (true) { - const atom = macho_file.getAtom(atom_index); - const sym = macho_file.getSymbol(atom.getSymbolWithLoc()); - - if (atom.getFile() != null) { - // Update each symbol contained within the atom - var it = Atom.getInnerSymbolsIterator(macho_file, atom_index); - while (it.next()) |sym_loc| { - const inner_sym = macho_file.getSymbolPtr(sym_loc); - inner_sym.n_value = sym.n_value + Atom.calcInnerSymbolOffset( - macho_file, - atom_index, - sym_loc.sym_index, - ); - } - - // If there is a section alias, update it now too - if (Atom.getSectionAlias(macho_file, atom_index)) |sym_loc| { - const alias = macho_file.getSymbolPtr(sym_loc); - alias.n_value = sym.n_value; - } - } - - if (atom.next_index) |next_index| { - atom_index = next_index; - } else break; - } - } - - if (macho_file.got_section_index) |sect_id| { - const header = &macho_file.sections.items(.header)[sect_id]; - header.size = macho_file.got_table.count() * @sizeOf(u64); - header.@"align" = 3; - } - - if (macho_file.tlv_ptr_section_index) |sect_id| { - const header = &macho_file.sections.items(.header)[sect_id]; - header.size = macho_file.tlv_ptr_table.count() * @sizeOf(u64); - header.@"align" = 3; - } - - const cpu_arch = target.cpu.arch; - - if (macho_file.stubs_section_index) |sect_id| { - const header = &macho_file.sections.items(.header)[sect_id]; - header.size = macho_file.stub_table.count() * stubs.stubSize(cpu_arch); - header.@"align" = math.log2(stubs.stubAlignment(cpu_arch)); - } - - if (macho_file.stub_helper_section_index) |sect_id| { - const header = &macho_file.sections.items(.header)[sect_id]; - header.size = macho_file.stub_table.count() * stubs.stubHelperSize(cpu_arch) + - stubs.stubHelperPreambleSize(cpu_arch); - header.@"align" = math.log2(stubs.stubAlignment(cpu_arch)); - } - - if (macho_file.la_symbol_ptr_section_index) |sect_id| { - const header = &macho_file.sections.items(.header)[sect_id]; - header.size = macho_file.stub_table.count() * @sizeOf(u64); - header.@"align" = 3; - } -} - -fn allocateSegments(macho_file: *MachO) !void { - for (macho_file.segments.items, 0..) |*segment, segment_index| { - const is_text_segment = mem.eql(u8, segment.segName(), "__TEXT"); - const base_size = if (is_text_segment) - try load_commands.calcMinHeaderPad(macho_file, .{ - .segments = macho_file.segments.items, - .dylibs = macho_file.dylibs.items, - .referenced_dylibs = macho_file.referenced_dylibs.keys(), - }) - else - 0; - try allocateSegment(macho_file, @as(u8, @intCast(segment_index)), base_size); - } -} - -fn getSegmentAllocBase(macho_file: *MachO, segment_index: u8) struct { vmaddr: u64, fileoff: u64 } { - if (segment_index > 0) { - const prev_segment = macho_file.segments.items[segment_index - 1]; - return .{ - .vmaddr = prev_segment.vmaddr + prev_segment.vmsize, - .fileoff = prev_segment.fileoff + prev_segment.filesize, - }; - } - return .{ .vmaddr = 0, .fileoff = 0 }; -} - -fn allocateSegment(macho_file: *MachO, segment_index: u8, init_size: u64) !void { - const target = macho_file.base.comp.root_mod.resolved_target.result; - const segment = &macho_file.segments.items[segment_index]; - - if (mem.eql(u8, segment.segName(), "__PAGEZERO")) return; // allocated upon creation - - const base = getSegmentAllocBase(macho_file, segment_index); - segment.vmaddr = base.vmaddr; - segment.fileoff = base.fileoff; - segment.filesize = init_size; - segment.vmsize = init_size; - - // Allocate the sections according to their alignment at the beginning of the segment. - const indexes = macho_file.getSectionIndexes(segment_index); - var start = init_size; - - const slice = macho_file.sections.slice(); - for (slice.items(.header)[indexes.start..indexes.end], 0..) |*header, sect_id| { - const alignment = try math.powi(u32, 2, header.@"align"); - const start_aligned = mem.alignForward(u64, start, alignment); - const n_sect = @as(u8, @intCast(indexes.start + sect_id + 1)); - - header.offset = if (header.isZerofill()) - 0 - else - @as(u32, @intCast(segment.fileoff + start_aligned)); - header.addr = segment.vmaddr + start_aligned; - - if (slice.items(.first_atom_index)[indexes.start + sect_id]) |first_atom_index| { - var atom_index = first_atom_index; - - log.debug("allocating local symbols in sect({d}, '{s},{s}')", .{ - n_sect, - header.segName(), - header.sectName(), - }); - - while (true) { - const atom = macho_file.getAtom(atom_index); - const sym = macho_file.getSymbolPtr(atom.getSymbolWithLoc()); - sym.n_value += header.addr; - sym.n_sect = n_sect; - - log.debug(" ATOM(%{d}, '{s}') @{x}", .{ - atom.sym_index, - macho_file.getSymbolName(atom.getSymbolWithLoc()), - sym.n_value, - }); - - if (atom.getFile() != null) { - // Update each symbol contained within the atom - var it = Atom.getInnerSymbolsIterator(macho_file, atom_index); - while (it.next()) |sym_loc| { - const inner_sym = macho_file.getSymbolPtr(sym_loc); - inner_sym.n_value = sym.n_value + Atom.calcInnerSymbolOffset( - macho_file, - atom_index, - sym_loc.sym_index, - ); - inner_sym.n_sect = n_sect; - } - - // If there is a section alias, update it now too - if (Atom.getSectionAlias(macho_file, atom_index)) |sym_loc| { - const alias = macho_file.getSymbolPtr(sym_loc); - alias.n_value = sym.n_value; - alias.n_sect = n_sect; - } - } - - if (atom.next_index) |next_index| { - atom_index = next_index; - } else break; - } - } - - start = start_aligned + header.size; - - if (!header.isZerofill()) { - segment.filesize = start; - } - segment.vmsize = start; - } - - const page_size = MachO.getPageSize(target.cpu.arch); - segment.filesize = mem.alignForward(u64, segment.filesize, page_size); - segment.vmsize = mem.alignForward(u64, segment.vmsize, page_size); -} - -const std = @import("std"); -const build_options = @import("build_options"); -const assert = std.debug.assert; -const dwarf = std.dwarf; -const fs = std.fs; -const log = std.log.scoped(.link); -const macho = std.macho; -const math = std.math; -const mem = std.mem; - -const aarch64 = @import("../../arch/aarch64/bits.zig"); -const calcUuid = @import("uuid.zig").calcUuid; -const dead_strip = @import("dead_strip.zig"); -const eh_frame = @import("eh_frame.zig"); -const fat = @import("fat.zig"); -const link = @import("../../link.zig"); -const load_commands = @import("load_commands.zig"); -const stubs = @import("stubs.zig"); -const thunks = @import("thunks.zig"); -const trace = @import("../../tracy.zig").trace; - -const Allocator = mem.Allocator; -const Archive = @import("Archive.zig"); -const Atom = @import("Atom.zig"); -const Cache = std.Build.Cache; -const CodeSignature = @import("CodeSignature.zig"); -const Compilation = @import("../../Compilation.zig"); -const Dylib = @import("Dylib.zig"); -const MachO = @import("../MachO.zig"); -const Md5 = std.crypto.hash.Md5; -const LibStub = @import("../tapi.zig").LibStub; -const Object = @import("Object.zig"); -const Platform = load_commands.Platform; -const Section = MachO.Section; -const SymbolWithLoc = MachO.SymbolWithLoc; -const TableSection = @import("../table_section.zig").TableSection; -const Trie = @import("Trie.zig"); -const UnwindInfo = @import("UnwindInfo.zig"); From dd0addab1fe11b019e83c0050eedfa0ec67eb408 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Tue, 9 Jan 2024 19:51:01 +0100 Subject: [PATCH 002/133] macho: get the ball rolling! --- src/arch/aarch64/CodeGen.zig | 80 +- src/arch/aarch64/Emit.zig | 80 +- src/arch/x86_64/CodeGen.zig | 64 +- src/arch/x86_64/Emit.zig | 72 +- src/codegen.zig | 24 +- src/link/MachO.zig | 5272 ++---------------------------- src/link/MachO/Atom.zig | 86 +- src/link/MachO/CodeSignature.zig | 1 - src/link/MachO/DwarfInfo.zig | 2 +- src/link/MachO/Dylib.zig | 4 +- src/link/MachO/Object.zig | 5 +- src/link/MachO/UnwindInfo.zig | 2 +- src/link/MachO/dead_strip.zig | 2 +- src/link/MachO/eh_frame.zig | 2 +- src/link/MachO/hasher.zig | 2 +- src/link/MachO/thunks.zig | 4 +- src/link/MachO/uuid.zig | 2 +- 17 files changed, 583 insertions(+), 5121 deletions(-) diff --git a/src/arch/aarch64/CodeGen.zig b/src/arch/aarch64/CodeGen.zig index ee5e58ae05..88f211cc29 100644 --- a/src/arch/aarch64/CodeGen.zig +++ b/src/arch/aarch64/CodeGen.zig @@ -4013,10 +4013,11 @@ fn store(self: *Self, ptr: MCValue, value: MCValue, ptr_ty: Type, value_ty: Type .import => unreachable, }; const atom_index = switch (self.bin_file.tag) { - .macho => blk: { - const macho_file = self.bin_file.cast(link.File.MachO).?; - const atom = try macho_file.getOrCreateAtomForDecl(self.owner_decl); - break :blk macho_file.getAtom(atom).getSymbolIndex().?; + .macho => { + // const macho_file = self.bin_file.cast(link.File.MachO).?; + // const atom = try macho_file.getOrCreateAtomForDecl(self.owner_decl); + // break :blk macho_file.getAtom(atom).getSymbolIndex().?; + @panic("TODO store"); }, .coff => blk: { const coff_file = self.bin_file.cast(link.File.Coff).?; @@ -4321,14 +4322,16 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallModifier const got_addr = @as(u32, @intCast(sym.zigGotAddress(elf_file))); try self.genSetReg(Type.usize, .x30, .{ .memory = got_addr }); } else if (self.bin_file.cast(link.File.MachO)) |macho_file| { - const atom = try macho_file.getOrCreateAtomForDecl(func.owner_decl); - const sym_index = macho_file.getAtom(atom).getSymbolIndex().?; - try self.genSetReg(Type.u64, .x30, .{ - .linker_load = .{ - .type = .got, - .sym_index = sym_index, - }, - }); + _ = macho_file; + @panic("TODO airCall"); + // const atom = try macho_file.getOrCreateAtomForDecl(func.owner_decl); + // const sym_index = macho_file.getAtom(atom).getSymbolIndex().?; + // try self.genSetReg(Type.u64, .x30, .{ + // .linker_load = .{ + // .type = .got, + // .sym_index = sym_index, + // }, + // }); } else if (self.bin_file.cast(link.File.Coff)) |coff_file| { const atom = try coff_file.getOrCreateAtomForDecl(func.owner_decl); const sym_index = coff_file.getAtom(atom).getSymbolIndex().?; @@ -4352,18 +4355,20 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallModifier const decl_name = mod.intern_pool.stringToSlice(mod.declPtr(extern_func.decl).name); const lib_name = mod.intern_pool.stringToSliceUnwrap(extern_func.lib_name); if (self.bin_file.cast(link.File.MachO)) |macho_file| { - const sym_index = try macho_file.getGlobalSymbol(decl_name, lib_name); - const atom = try macho_file.getOrCreateAtomForDecl(self.owner_decl); - const atom_index = macho_file.getAtom(atom).getSymbolIndex().?; - _ = try self.addInst(.{ - .tag = .call_extern, - .data = .{ - .relocation = .{ - .atom_index = atom_index, - .sym_index = sym_index, - }, - }, - }); + _ = macho_file; + @panic("TODO airCall"); + // const sym_index = try macho_file.getGlobalSymbol(decl_name, lib_name); + // const atom = try macho_file.getOrCreateAtomForDecl(self.owner_decl); + // const atom_index = macho_file.getAtom(atom).getSymbolIndex().?; + // _ = try self.addInst(.{ + // .tag = .call_extern, + // .data = .{ + // .relocation = .{ + // .atom_index = atom_index, + // .sym_index = sym_index, + // }, + // }, + // }); } else if (self.bin_file.cast(link.File.Coff)) |coff_file| { const sym_index = try coff_file.getGlobalSymbol(decl_name, lib_name); try self.genSetReg(Type.u64, .x30, .{ @@ -5532,10 +5537,11 @@ fn genSetStack(self: *Self, ty: Type, stack_offset: u32, mcv: MCValue) InnerErro .import => unreachable, }; const atom_index = switch (self.bin_file.tag) { - .macho => blk: { - const macho_file = self.bin_file.cast(link.File.MachO).?; - const atom = try macho_file.getOrCreateAtomForDecl(self.owner_decl); - break :blk macho_file.getAtom(atom).getSymbolIndex().?; + .macho => { + // const macho_file = self.bin_file.cast(link.File.MachO).?; + // const atom = try macho_file.getOrCreateAtomForDecl(self.owner_decl); + // break :blk macho_file.getAtom(atom).getSymbolIndex().?; + @panic("TODO genSetStack"); }, .coff => blk: { const coff_file = self.bin_file.cast(link.File.Coff).?; @@ -5653,10 +5659,11 @@ fn genSetReg(self: *Self, ty: Type, reg: Register, mcv: MCValue) InnerError!void .import => .load_memory_import, }; const atom_index = switch (self.bin_file.tag) { - .macho => blk: { - const macho_file = self.bin_file.cast(link.File.MachO).?; - const atom = try macho_file.getOrCreateAtomForDecl(self.owner_decl); - break :blk macho_file.getAtom(atom).getSymbolIndex().?; + .macho => { + @panic("TODO genSetReg"); + // const macho_file = self.bin_file.cast(link.File.MachO).?; + // const atom = try macho_file.getOrCreateAtomForDecl(self.owner_decl); + // break :blk macho_file.getAtom(atom).getSymbolIndex().?; }, .coff => blk: { const coff_file = self.bin_file.cast(link.File.Coff).?; @@ -5850,10 +5857,11 @@ fn genSetStackArgument(self: *Self, ty: Type, stack_offset: u32, mcv: MCValue) I .import => unreachable, }; const atom_index = switch (self.bin_file.tag) { - .macho => blk: { - const macho_file = self.bin_file.cast(link.File.MachO).?; - const atom = try macho_file.getOrCreateAtomForDecl(self.owner_decl); - break :blk macho_file.getAtom(atom).getSymbolIndex().?; + .macho => { + @panic("TODO genSetStackArgument"); + // const macho_file = self.bin_file.cast(link.File.MachO).?; + // const atom = try macho_file.getOrCreateAtomForDecl(self.owner_decl); + // break :blk macho_file.getAtom(atom).getSymbolIndex().?; }, .coff => blk: { const coff_file = self.bin_file.cast(link.File.Coff).?; diff --git a/src/arch/aarch64/Emit.zig b/src/arch/aarch64/Emit.zig index 96eb5b8b30..d14c0c8aad 100644 --- a/src/arch/aarch64/Emit.zig +++ b/src/arch/aarch64/Emit.zig @@ -677,6 +677,7 @@ fn mirDebugEpilogueBegin(emit: *Emit) !void { fn mirCallExtern(emit: *Emit, inst: Mir.Inst.Index) !void { assert(emit.mir.instructions.items(.tag)[inst] == .call_extern); const relocation = emit.mir.instructions.items(.data)[inst].relocation; + _ = relocation; const offset = blk: { const offset = @as(u32, @intCast(emit.code.items.len)); @@ -684,19 +685,22 @@ fn mirCallExtern(emit: *Emit, inst: Mir.Inst.Index) !void { try emit.writeInstruction(Instruction.bl(0)); break :blk offset; }; + _ = offset; if (emit.bin_file.cast(link.File.MachO)) |macho_file| { - // Add relocation to the decl. - const atom_index = macho_file.getAtomIndexForSymbol(.{ .sym_index = relocation.atom_index }).?; - const target = macho_file.getGlobalByIndex(relocation.sym_index); - try link.File.MachO.Atom.addRelocation(macho_file, atom_index, .{ - .type = .branch, - .target = target, - .offset = offset, - .addend = 0, - .pcrel = true, - .length = 2, - }); + _ = macho_file; + @panic("TODO mirCallExtern"); + // // Add relocation to the decl. + // const atom_index = macho_file.getAtomIndexForSymbol(.{ .sym_index = relocation.atom_index }).?; + // const target = macho_file.getGlobalByIndex(relocation.sym_index); + // try link.File.MachO.Atom.addRelocation(macho_file, atom_index, .{ + // .type = .branch, + // .target = target, + // .offset = offset, + // .addend = 0, + // .pcrel = true, + // .length = 2, + // }); } else if (emit.bin_file.cast(link.File.Coff)) |_| { unreachable; // Calling imports is handled via `.load_memory_import` } else { @@ -900,32 +904,34 @@ fn mirLoadMemoryPie(emit: *Emit, inst: Mir.Inst.Index) !void { } if (emit.bin_file.cast(link.File.MachO)) |macho_file| { - const Atom = link.File.MachO.Atom; - const Relocation = Atom.Relocation; - const atom_index = macho_file.getAtomIndexForSymbol(.{ .sym_index = data.atom_index }).?; - try Atom.addRelocations(macho_file, atom_index, &[_]Relocation{ .{ - .target = .{ .sym_index = data.sym_index }, - .offset = offset, - .addend = 0, - .pcrel = true, - .length = 2, - .type = switch (tag) { - .load_memory_got, .load_memory_ptr_got => Relocation.Type.got_page, - .load_memory_direct, .load_memory_ptr_direct => Relocation.Type.page, - else => unreachable, - }, - }, .{ - .target = .{ .sym_index = data.sym_index }, - .offset = offset + 4, - .addend = 0, - .pcrel = false, - .length = 2, - .type = switch (tag) { - .load_memory_got, .load_memory_ptr_got => Relocation.Type.got_pageoff, - .load_memory_direct, .load_memory_ptr_direct => Relocation.Type.pageoff, - else => unreachable, - }, - } }); + _ = macho_file; + @panic("TODO mirLoadMemoryPie"); + // const Atom = link.File.MachO.Atom; + // const Relocation = Atom.Relocation; + // const atom_index = macho_file.getAtomIndexForSymbol(.{ .sym_index = data.atom_index }).?; + // try Atom.addRelocations(macho_file, atom_index, &[_]Relocation{ .{ + // .target = .{ .sym_index = data.sym_index }, + // .offset = offset, + // .addend = 0, + // .pcrel = true, + // .length = 2, + // .type = switch (tag) { + // .load_memory_got, .load_memory_ptr_got => Relocation.Type.got_page, + // .load_memory_direct, .load_memory_ptr_direct => Relocation.Type.page, + // else => unreachable, + // }, + // }, .{ + // .target = .{ .sym_index = data.sym_index }, + // .offset = offset + 4, + // .addend = 0, + // .pcrel = false, + // .length = 2, + // .type = switch (tag) { + // .load_memory_got, .load_memory_ptr_got => Relocation.Type.got_pageoff, + // .load_memory_direct, .load_memory_ptr_direct => Relocation.Type.pageoff, + // else => unreachable, + // }, + // } }); } else if (emit.bin_file.cast(link.File.Coff)) |coff_file| { const atom_index = coff_file.getAtomIndexForSymbol(.{ .sym_index = data.atom_index, .file = null }).?; const target = switch (tag) { diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index 55e241cbd4..870b6a2472 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -139,8 +139,10 @@ const Owner = union(enum) { if (ctx.bin_file.cast(link.File.Elf)) |elf_file| { return elf_file.zigObjectPtr().?.getOrCreateMetadataForDecl(elf_file, decl_index); } else if (ctx.bin_file.cast(link.File.MachO)) |macho_file| { - const atom = try macho_file.getOrCreateAtomForDecl(decl_index); - return macho_file.getAtom(atom).getSymbolIndex().?; + _ = macho_file; + // const atom = try macho_file.getOrCreateAtomForDecl(decl_index); + // return macho_file.getAtom(atom).getSymbolIndex().?; + @panic("TODO getSymbolIndex"); } else if (ctx.bin_file.cast(link.File.Coff)) |coff_file| { const atom = try coff_file.getOrCreateAtomForDecl(decl_index); return coff_file.getAtom(atom).getSymbolIndex().?; @@ -153,9 +155,11 @@ const Owner = union(enum) { return elf_file.zigObjectPtr().?.getOrCreateMetadataForLazySymbol(elf_file, lazy_sym) catch |err| ctx.fail("{s} creating lazy symbol", .{@errorName(err)}); } else if (ctx.bin_file.cast(link.File.MachO)) |macho_file| { - const atom = macho_file.getOrCreateAtomForLazySymbol(lazy_sym) catch |err| - return ctx.fail("{s} creating lazy symbol", .{@errorName(err)}); - return macho_file.getAtom(atom).getSymbolIndex().?; + _ = macho_file; + // const atom = macho_file.getOrCreateAtomForLazySymbol(lazy_sym) catch |err| + // return ctx.fail("{s} creating lazy symbol", .{@errorName(err)}); + // return macho_file.getAtom(atom).getSymbolIndex().?; + @panic("TODO getSymbolIndex"); } else if (ctx.bin_file.cast(link.File.Coff)) |coff_file| { const atom = coff_file.getOrCreateAtomForLazySymbol(lazy_sym) catch |err| return ctx.fail("{s} creating lazy symbol", .{@errorName(err)}); @@ -10951,10 +10955,12 @@ fn genCall(self: *Self, info: union(enum) { try self.genSetReg(.rax, Type.usize, .{ .lea_got = sym_index }); try self.asmRegister(.{ ._, .call }, .rax); } else if (self.bin_file.cast(link.File.MachO)) |macho_file| { - const atom = try macho_file.getOrCreateAtomForDecl(func.owner_decl); - const sym_index = macho_file.getAtom(atom).getSymbolIndex().?; - try self.genSetReg(.rax, Type.usize, .{ .lea_got = sym_index }); - try self.asmRegister(.{ ._, .call }, .rax); + _ = macho_file; + @panic("TODO genCall"); + // const atom = try macho_file.getOrCreateAtomForDecl(func.owner_decl); + // const sym_index = macho_file.getAtom(atom).getSymbolIndex().?; + // try self.genSetReg(.rax, Type.usize, .{ .lea_got = sym_index }); + // try self.asmRegister(.{ ._, .call }, .rax); } else if (self.bin_file.cast(link.File.Plan9)) |p9| { const atom_index = try p9.seeDecl(func.owner_decl); const atom = p9.getAtom(atom_index); @@ -13814,11 +13820,15 @@ fn genExternSymbolRef( _ = try self.addInst(.{ .tag = .call, .ops = .extern_fn_reloc, - .data = .{ .reloc = .{ - .atom_index = atom_index, - .sym_index = link.File.MachO.global_symbol_bit | global_index, - } }, + .data = .{ + .reloc = .{ + .atom_index = atom_index, + // .sym_index = link.File.MachO.global_symbol_bit | global_index, + .sym_index = global_index, + }, + }, }); + @panic("TODO genExternSymbolRef"); } else return self.fail("TODO implement calling extern functions", .{}); } @@ -13906,19 +13916,21 @@ fn genLazySymbolRef( else => unreachable, } } else if (self.bin_file.cast(link.File.MachO)) |macho_file| { - const atom_index = macho_file.getOrCreateAtomForLazySymbol(lazy_sym) catch |err| - return self.fail("{s} creating lazy symbol", .{@errorName(err)}); - const sym_index = macho_file.getAtom(atom_index).getSymbolIndex().?; - switch (tag) { - .lea, .call => try self.genSetReg(reg, Type.usize, .{ .lea_got = sym_index }), - .mov => try self.genSetReg(reg, Type.usize, .{ .load_got = sym_index }), - else => unreachable, - } - switch (tag) { - .lea, .mov => {}, - .call => try self.asmRegister(.{ ._, .call }, reg), - else => unreachable, - } + _ = macho_file; + @panic("TODO genLazySymbolRef"); + // const atom_index = macho_file.getOrCreateAtomForLazySymbol(lazy_sym) catch |err| + // return self.fail("{s} creating lazy symbol", .{@errorName(err)}); + // const sym_index = macho_file.getAtom(atom_index).getSymbolIndex().?; + // switch (tag) { + // .lea, .call => try self.genSetReg(reg, Type.usize, .{ .lea_got = sym_index }), + // .mov => try self.genSetReg(reg, Type.usize, .{ .load_got = sym_index }), + // else => unreachable, + // } + // switch (tag) { + // .lea, .mov => {}, + // .call => try self.asmRegister(.{ ._, .call }, reg), + // else => unreachable, + // } } else { return self.fail("TODO implement genLazySymbol for x86_64 {s}", .{@tagName(self.bin_file.tag)}); } diff --git a/src/arch/x86_64/Emit.zig b/src/arch/x86_64/Emit.zig index 2c976bd00d..97c6cdfc1b 100644 --- a/src/arch/x86_64/Emit.zig +++ b/src/arch/x86_64/Emit.zig @@ -49,21 +49,23 @@ pub fn emitMir(emit: *Emit) Error!void { .r_addend = -4, }); } else if (emit.lower.bin_file.cast(link.File.MachO)) |macho_file| { - // Add relocation to the decl. - const atom_index = - macho_file.getAtomIndexForSymbol(.{ .sym_index = symbol.atom_index }).?; - const target = if (link.File.MachO.global_symbol_bit & symbol.sym_index != 0) - macho_file.getGlobalByIndex(link.File.MachO.global_symbol_mask & symbol.sym_index) - else - link.File.MachO.SymbolWithLoc{ .sym_index = symbol.sym_index }; - try link.File.MachO.Atom.addRelocation(macho_file, atom_index, .{ - .type = .branch, - .target = target, - .offset = end_offset - 4, - .addend = 0, - .pcrel = true, - .length = 2, - }); + _ = macho_file; + @panic("TODO emitMir"); + // // Add relocation to the decl. + // const atom_index = + // macho_file.getAtomIndexForSymbol(.{ .sym_index = symbol.atom_index }).?; + // const target = if (link.File.MachO.global_symbol_bit & symbol.sym_index != 0) + // macho_file.getGlobalByIndex(link.File.MachO.global_symbol_mask & symbol.sym_index) + // else + // link.File.MachO.SymbolWithLoc{ .sym_index = symbol.sym_index }; + // try link.File.MachO.Atom.addRelocation(macho_file, atom_index, .{ + // .type = .branch, + // .target = target, + // .offset = end_offset - 4, + // .addend = 0, + // .pcrel = true, + // .length = 2, + // }); } else if (emit.lower.bin_file.cast(link.File.Coff)) |coff_file| { // Add relocation to the decl. const atom_index = coff_file.getAtomIndexForSymbol( @@ -157,25 +159,27 @@ pub fn emitMir(emit: *Emit) Error!void { => |symbol| if (emit.lower.bin_file.cast(link.File.Elf)) |_| { unreachable; } else if (emit.lower.bin_file.cast(link.File.MachO)) |macho_file| { - const atom_index = - macho_file.getAtomIndexForSymbol(.{ .sym_index = symbol.atom_index }).?; - const target = if (link.File.MachO.global_symbol_bit & symbol.sym_index != 0) - macho_file.getGlobalByIndex(link.File.MachO.global_symbol_mask & symbol.sym_index) - else - link.File.MachO.SymbolWithLoc{ .sym_index = symbol.sym_index }; - try link.File.MachO.Atom.addRelocation(macho_file, atom_index, .{ - .type = switch (lowered_relocs[0].target) { - .linker_got => .got, - .linker_direct => .signed, - .linker_tlv => .tlv, - else => unreachable, - }, - .target = target, - .offset = @intCast(end_offset - 4), - .addend = 0, - .pcrel = true, - .length = 2, - }); + _ = macho_file; + @panic("TODO emitMir"); + // const atom_index = + // macho_file.getAtomIndexForSymbol(.{ .sym_index = symbol.atom_index }).?; + // const target = if (link.File.MachO.global_symbol_bit & symbol.sym_index != 0) + // macho_file.getGlobalByIndex(link.File.MachO.global_symbol_mask & symbol.sym_index) + // else + // link.File.MachO.SymbolWithLoc{ .sym_index = symbol.sym_index }; + // try link.File.MachO.Atom.addRelocation(macho_file, atom_index, .{ + // .type = switch (lowered_relocs[0].target) { + // .linker_got => .got, + // .linker_direct => .signed, + // .linker_tlv => .tlv, + // else => unreachable, + // }, + // .target = target, + // .offset = @intCast(end_offset - 4), + // .addend = 0, + // .pcrel = true, + // .length = 2, + // }); } else if (emit.lower.bin_file.cast(link.File.Coff)) |coff_file| { const atom_index = coff_file.getAtomIndexForSymbol(.{ .sym_index = symbol.atom_index, diff --git a/src/codegen.zig b/src/codegen.zig index 1ac8626a79..f9263c2a69 100644 --- a/src/codegen.zig +++ b/src/codegen.zig @@ -984,20 +984,22 @@ fn genDeclRef( } return GenResult.mcv(.{ .load_symbol = sym.esym_index }); } else if (lf.cast(link.File.MachO)) |macho_file| { + _ = macho_file; if (is_extern) { // TODO make this part of getGlobalSymbol - const name = zcu.intern_pool.stringToSlice(decl.name); - const sym_name = try std.fmt.allocPrint(gpa, "_{s}", .{name}); - defer gpa.free(sym_name); - const global_index = try macho_file.addUndefined(sym_name, .{ .add_got = true }); - return GenResult.mcv(.{ .load_got = link.File.MachO.global_symbol_bit | global_index }); + // const name = zcu.intern_pool.stringToSlice(decl.name); + // const sym_name = try std.fmt.allocPrint(gpa, "_{s}", .{name}); + // defer gpa.free(sym_name); + // const global_index = try macho_file.addUndefined(sym_name, .{ .add_got = true }); + // return GenResult.mcv(.{ .load_got = link.File.MachO.global_symbol_bit | global_index }); } - const atom_index = try macho_file.getOrCreateAtomForDecl(decl_index); - const sym_index = macho_file.getAtom(atom_index).getSymbolIndex().?; - if (is_threadlocal) { - return GenResult.mcv(.{ .load_tlv = sym_index }); - } - return GenResult.mcv(.{ .load_got = sym_index }); + // const atom_index = try macho_file.getOrCreateAtomForDecl(decl_index); + // const sym_index = macho_file.getAtom(atom_index).getSymbolIndex().?; + // if (is_threadlocal) { + // return GenResult.mcv(.{ .load_tlv = sym_index }); + // } + // return GenResult.mcv(.{ .load_got = sym_index }); + @panic("TODO genDeclRef"); } else if (lf.cast(link.File.Coff)) |coff_file| { if (is_extern) { const name = zcu.intern_pool.stringToSlice(decl.name); diff --git a/src/link/MachO.zig b/src/link/MachO.zig index cb26aa0ca3..895ec1cc2e 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -7,8 +7,6 @@ llvm_object: ?*LlvmObject = null, /// Debug symbols bundle (or dSym). d_sym: ?DebugSymbols = null, -mode: Mode, - dyld_info_cmd: macho.dyld_info_command = .{}, symtab_cmd: macho.symtab_command = .{}, dysymtab_cmd: macho.dysymtab_command = .{}, @@ -17,12 +15,6 @@ data_in_code_cmd: macho.linkedit_data_command = .{ .cmd = .DATA_IN_CODE }, uuid_cmd: macho.uuid_command = .{ .uuid = [_]u8{0} ** 16 }, codesig_cmd: macho.linkedit_data_command = .{ .cmd = .CODE_SIGNATURE }, -objects: std.ArrayListUnmanaged(Object) = .{}, -archives: std.ArrayListUnmanaged(Archive) = .{}, -dylibs: std.ArrayListUnmanaged(Dylib) = .{}, -dylibs_map: std.StringHashMapUnmanaged(u16) = .{}, -referenced_dylibs: std.AutoArrayHashMapUnmanaged(u16, void) = .{}, - segments: std.ArrayListUnmanaged(macho.segment_command_64) = .{}, sections: std.MultiArrayList(Section) = .{}, @@ -48,93 +40,11 @@ got_section_index: ?u8 = null, la_symbol_ptr_section_index: ?u8 = null, tlv_ptr_section_index: ?u8 = null, -locals: std.ArrayListUnmanaged(macho.nlist_64) = .{}, -globals: std.ArrayListUnmanaged(SymbolWithLoc) = .{}, -resolver: std.StringHashMapUnmanaged(u32) = .{}, -unresolved: std.AutoArrayHashMapUnmanaged(u32, void) = .{}, - -locals_free_list: std.ArrayListUnmanaged(u32) = .{}, -globals_free_list: std.ArrayListUnmanaged(u32) = .{}, - -dyld_stub_binder_index: ?u32 = null, -dyld_private_atom_index: ?Atom.Index = null, - strtab: StringTable = .{}, -got_table: TableSection(SymbolWithLoc) = .{}, -stub_table: TableSection(SymbolWithLoc) = .{}, -tlv_ptr_table: TableSection(SymbolWithLoc) = .{}, - -thunk_table: std.AutoHashMapUnmanaged(Atom.Index, thunks.Thunk.Index) = .{}, -thunks: std.ArrayListUnmanaged(thunks.Thunk) = .{}, - -segment_table_dirty: bool = false, -got_table_count_dirty: bool = false, -got_table_contents_dirty: bool = false, -stub_table_count_dirty: bool = false, -stub_table_contents_dirty: bool = false, -stub_helper_preamble_allocated: bool = false, - /// List of atoms that are either synthetic or map directly to the Zig source program. atoms: std.ArrayListUnmanaged(Atom) = .{}, -/// Table of atoms indexed by the symbol index. -atom_by_index_table: std.AutoHashMapUnmanaged(u32, Atom.Index) = .{}, - -/// Table of unnamed constants associated with a parent `Decl`. -/// We store them here so that we can free the constants whenever the `Decl` -/// needs updating or is freed. -/// -/// For example, -/// -/// ```zig -/// const Foo = struct{ -/// a: u8, -/// }; -/// -/// pub fn main() void { -/// var foo = Foo{ .a = 1 }; -/// _ = foo; -/// } -/// ``` -/// -/// value assigned to label `foo` is an unnamed constant belonging/associated -/// with `Decl` `main`, and lives as long as that `Decl`. -unnamed_const_atoms: UnnamedConstTable = .{}, -anon_decls: AnonDeclTable = .{}, - -/// A table of relocations indexed by the owning them `Atom`. -/// Note that once we refactor `Atom`'s lifetime and ownership rules, -/// this will be a table indexed by index into the list of Atoms. -relocs: RelocationTable = .{}, -/// TODO I do not have time to make this right but this will go once -/// MachO linker is rewritten more-or-less to feature the same resolution -/// mechanism as the ELF linker. -actions: ActionTable = .{}, - -/// A table of rebases indexed by the owning them `Atom`. -/// Note that once we refactor `Atom`'s lifetime and ownership rules, -/// this will be a table indexed by index into the list of Atoms. -rebases: RebaseTable = .{}, - -/// A table of bindings indexed by the owning them `Atom`. -/// Note that once we refactor `Atom`'s lifetime and ownership rules, -/// this will be a table indexed by index into the list of Atoms. -bindings: BindingTable = .{}, - -/// Table of tracked LazySymbols. -lazy_syms: LazySymbolTable = .{}, - -/// Table of tracked Decls. -decls: DeclTable = .{}, - -/// Table of threadlocal variables descriptors. -/// They are emitted in the `__thread_vars` section. -tlv_table: TlvSymbolTable = .{}, - -/// Hot-code swapping state. -hot_state: if (is_hot_update_compatible) HotUpdateState else struct {} = .{}, - sdk_layout: ?SdkLayout, /// Size of the __PAGEZERO segment. pagezero_vmsize: u64, @@ -152,6 +62,9 @@ install_name: ?[]const u8, entitlements: ?[]const u8, compatibility_version: ?std.SemanticVersion, +/// Hot-code swapping state. +hot_state: if (is_hot_update_compatible) HotUpdateState else struct {} = .{}, + /// When adding a new field, remember to update `hashAddFrameworks`. pub const Framework = struct { needed: bool = false, @@ -183,24 +96,18 @@ pub fn createEmpty( ) !*MachO { const target = comp.root_mod.resolved_target.result; assert(target.ofmt == .macho); - const use_llvm = comp.config.use_llvm; + const gpa = comp.gpa; + const use_llvm = comp.config.use_llvm; + const opt_zcu = comp.module; const optimize_mode = comp.root_mod.optimize_mode; const output_mode = comp.config.output_mode; const link_mode = comp.config.link_mode; - // TODO: get rid of zld mode - const mode: Mode = if (use_llvm or !comp.config.have_zcu or comp.cache_use == .whole) - .zld - else - .incremental; - - // If using "zld mode" to link, this code should produce an object file so that it - // can be passed to "zld mode". TODO: get rid of "zld mode". // If using LLVM to generate the object file for the zig compilation unit, // we need a place to put the object file so that it can be subsequently // handled. - const zcu_object_sub_path = if (mode != .zld and !use_llvm) + const zcu_object_sub_path = if (!use_llvm) null else try std.fmt.allocPrint(arena, "{s}.o", .{emit.sub_path}); @@ -221,7 +128,6 @@ pub fn createEmpty( .build_id = options.build_id, .rpath_list = options.rpath_list, }, - .mode = mode, .pagezero_vmsize = options.pagezero_size orelse default_pagezero_vmsize, .headerpad_size = options.headerpad_size orelse default_headerpad_size, .headerpad_max_install_names = options.headerpad_max_install_names, @@ -243,62 +149,48 @@ pub fn createEmpty( } errdefer self.base.destroy(); - log.debug("selected linker mode '{s}'", .{@tagName(self.mode)}); - - if (mode == .zld) { - // TODO: get rid of zld mode - return self; - } - - const file = try emit.directory.handle.createFile(emit.sub_path, .{ + self.base.file = try emit.directory.handle.createFile(emit.sub_path, .{ .truncate = true, .read = true, .mode = link.File.determineMode(false, output_mode, link_mode), }); - self.base.file = file; - - if (comp.config.debug_format != .strip and comp.module != null) { - // Create dSYM bundle. - log.debug("creating {s}.dSYM bundle", .{emit.sub_path}); - - const d_sym_path = try std.fmt.allocPrint( - arena, - "{s}.dSYM" ++ fs.path.sep_str ++ "Contents" ++ fs.path.sep_str ++ "Resources" ++ fs.path.sep_str ++ "DWARF", - .{emit.sub_path}, - ); - - var d_sym_bundle = try emit.directory.handle.makeOpenPath(d_sym_path, .{}); - defer d_sym_bundle.close(); - - const d_sym_file = try d_sym_bundle.createFile(emit.sub_path, .{ - .truncate = false, - .read = true, - }); - - self.d_sym = .{ - .allocator = gpa, - .dwarf = link.File.Dwarf.init(&self.base, .dwarf32), - .file = d_sym_file, - }; - } // Index 0 is always a null symbol. - try self.locals.append(gpa, .{ - .n_strx = 0, - .n_type = 0, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }); + // try self.locals.append(gpa, null_sym); try self.strtab.buffer.append(gpa, 0); - try self.populateMissingMetadata(.{ - .symbol_count_hint = options.symbol_count_hint, - .program_code_size_hint = options.program_code_size_hint, - }); + // TODO: init - if (self.d_sym) |*d_sym| { - try d_sym.populateMissingMetadata(self); + if (opt_zcu) |zcu| { + if (!use_llvm) { + _ = zcu; + // TODO: create .zig_object + + if (comp.config.debug_format != .strip) { + // Create dSYM bundle. + log.debug("creating {s}.dSYM bundle", .{emit.sub_path}); + + const d_sym_path = try std.fmt.allocPrint( + arena, + "{s}.dSYM" ++ fs.path.sep_str ++ "Contents" ++ fs.path.sep_str ++ "Resources" ++ fs.path.sep_str ++ "DWARF", + .{emit.sub_path}, + ); + + var d_sym_bundle = try emit.directory.handle.makeOpenPath(d_sym_path, .{}); + defer d_sym_bundle.close(); + + const d_sym_file = try d_sym_bundle.createFile(emit.sub_path, .{ + .truncate = false, + .read = true, + }); + + self.d_sym = .{ + .allocator = gpa, + .dwarf = link.File.Dwarf.init(&self.base, .dwarf32), + .file = d_sym_file, + }; + } + } } return self; @@ -316,26 +208,8 @@ pub fn open( } pub fn flush(self: *MachO, arena: Allocator, prog_node: *std.Progress.Node) link.File.FlushError!void { - const comp = self.base.comp; - const gpa = comp.gpa; - const output_mode = comp.config.output_mode; - - if (output_mode == .Lib and comp.config.link_mode == .Static) { - if (build_options.have_llvm) { - return self.base.linkAsArchive(arena, prog_node); - } else { - try comp.link_errors.ensureUnusedCapacity(gpa, 1); - comp.link_errors.appendAssumeCapacity(.{ - .msg = try gpa.dupe(u8, "TODO: non-LLVM archiver for MachO object files"), - }); - return error.FlushFailure; - } - } - - switch (self.mode) { - .zld => return zld.linkWithZld(self, arena, prog_node), - .incremental => return self.flushModule(arena, prog_node), - } + // TODO: what else should we do in flush? Is it actually needed at all? + try self.flushModule(arena, prog_node); } pub fn flushModule(self: *MachO, arena: Allocator, prog_node: *std.Progress.Node) link.File.FlushError!void { @@ -344,261 +218,40 @@ pub fn flushModule(self: *MachO, arena: Allocator, prog_node: *std.Progress.Node const comp = self.base.comp; const gpa = comp.gpa; + _ = gpa; if (self.llvm_object) |llvm_object| { try self.base.emitLlvmObject(arena, llvm_object, prog_node); - return; } var sub_prog_node = prog_node.start("MachO Flush", 0); sub_prog_node.activate(); defer sub_prog_node.end(); - const output_mode = comp.config.output_mode; - const module = comp.module orelse return error.LinkingWithoutZigSourceUnimplemented; const target = comp.root_mod.resolved_target.result; - - if (self.lazy_syms.getPtr(.none)) |metadata| { - // Most lazy symbols can be updated on first use, but - // anyerror needs to wait for everything to be flushed. - if (metadata.text_state != .unused) self.updateLazySymbolAtom( - File.LazySymbol.initDecl(.code, null, module), - metadata.text_atom, - self.text_section_index.?, - ) catch |err| return switch (err) { - error.CodegenFail => error.FlushFailure, - else => |e| e, - }; - if (metadata.data_const_state != .unused) self.updateLazySymbolAtom( - File.LazySymbol.initDecl(.const_data, null, module), - metadata.data_const_atom, - self.data_const_section_index.?, - ) catch |err| return switch (err) { - error.CodegenFail => error.FlushFailure, - else => |e| e, - }; - } - for (self.lazy_syms.values()) |*metadata| { - if (metadata.text_state != .unused) metadata.text_state = .flushed; - if (metadata.data_const_state != .unused) metadata.data_const_state = .flushed; - } - - if (self.d_sym) |*d_sym| { - try d_sym.dwarf.flushModule(module); - } - - var libs = std.StringArrayHashMap(link.SystemLib).init(arena); - try self.resolveLibSystem(arena, comp, &libs); - - self.base.releaseLock(); - - for (self.dylibs.items) |*dylib| { - dylib.deinit(gpa); - } - self.dylibs.clearRetainingCapacity(); - self.dylibs_map.clearRetainingCapacity(); - self.referenced_dylibs.clearRetainingCapacity(); - - var dependent_libs = std.fifo.LinearFifo(DylibReExportInfo, .Dynamic).init(arena); - - for (libs.keys(), libs.values()) |path, lib| { - const in_file = try std.fs.cwd().openFile(path, .{}); - defer in_file.close(); - - var parse_ctx = ParseErrorCtx.init(gpa); - defer parse_ctx.deinit(); - - self.parseLibrary( - in_file, - path, - lib, - false, - false, - null, - &dependent_libs, - &parse_ctx, - ) catch |err| try self.handleAndReportParseError(path, err, &parse_ctx); - } - - try self.parseDependentLibs(&dependent_libs); - - try self.resolveSymbols(); - - if (self.getEntryPoint() == null) { - comp.link_error_flags.no_entry_point_found = true; - } - if (self.unresolved.count() > 0) { - try self.reportUndefined(); - return error.FlushFailure; - } - - { - var it = self.actions.iterator(); - while (it.next()) |entry| { - const global_index = entry.key_ptr.*; - const global = self.globals.items[global_index]; - const flags = entry.value_ptr.*; - if (flags.add_got) try self.addGotEntry(global); - if (flags.add_stub) try self.addStubEntry(global); + _ = target; + const directory = self.base.emit.directory; + const full_out_path = try directory.join(arena, &[_][]const u8{self.base.emit.sub_path}); + const module_obj_path: ?[]const u8 = if (self.base.zcu_object_sub_path) |path| blk: { + if (fs.path.dirname(full_out_path)) |dirname| { + break :blk try fs.path.join(arena, &.{ dirname, path }); + } else { + break :blk path; } - } - - try self.createDyldPrivateAtom(); - try self.writeStubHelperPreamble(); - - if (output_mode == .Exe and self.getEntryPoint() != null) { - const global = self.getEntryPoint().?; - if (self.getSymbol(global).undf()) { - // We do one additional check here in case the entry point was found in one of the dylibs. - // (I actually have no idea what this would imply but it is a possible outcome and so we - // support it.) - try self.addStubEntry(global); - } - } - - try self.allocateSpecialSymbols(); - - for (self.relocs.keys()) |atom_index| { - const relocs = self.relocs.get(atom_index).?; - const needs_update = for (relocs.items) |reloc| { - if (reloc.dirty) break true; - } else false; - - if (!needs_update) continue; - - const atom = self.getAtom(atom_index); - const sym = atom.getSymbol(self); - const section = self.sections.get(sym.n_sect - 1).header; - const file_offset = section.offset + sym.n_value - section.addr; - - var code = std.ArrayList(u8).init(gpa); - defer code.deinit(); - try code.resize(math.cast(usize, atom.size) orelse return error.Overflow); - - const amt = try self.base.file.?.preadAll(code.items, file_offset); - if (amt != code.items.len) return error.InputOutput; - - try self.writeAtom(atom_index, code.items); - } - - // Update GOT if it got moved in memory. - if (self.got_table_contents_dirty) { - for (self.got_table.entries.items, 0..) |entry, i| { - if (!self.got_table.lookup.contains(entry)) continue; - // TODO: write all in one go rather than incrementally. - try self.writeOffsetTableEntry(i); - } - self.got_table_contents_dirty = false; - } - - // Update stubs if we moved any section in memory. - // TODO: we probably don't need to update all sections if only one got moved. - if (self.stub_table_contents_dirty) { - for (self.stub_table.entries.items, 0..) |entry, i| { - if (!self.stub_table.lookup.contains(entry)) continue; - // TODO: write all in one go rather than incrementally. - try self.writeStubTableEntry(i); - } - self.stub_table_contents_dirty = false; - } - - if (build_options.enable_logging) { - self.logSymtab(); - self.logSections(); - self.logAtoms(); - } - - try self.writeLinkeditSegmentData(); - - var codesig: ?CodeSignature = if (self.requiresCodeSignature()) blk: { - // Preallocate space for the code signature. - // We need to do this at this stage so that we have the load commands with proper values - // written out to the file. - // The most important here is to have the correct vm and filesize of the __LINKEDIT segment - // where the code signature goes into. - var codesig = CodeSignature.init(getPageSize(target.cpu.arch)); - codesig.code_directory.ident = self.base.emit.sub_path; - if (self.entitlements) |path| { - try codesig.addEntitlements(gpa, path); - } - try self.writeCodeSignaturePadding(&codesig); - break :blk codesig; } else null; - defer if (codesig) |*csig| csig.deinit(gpa); + _ = module_obj_path; - // Write load commands - var lc_buffer = std.ArrayList(u8).init(arena); - const lc_writer = lc_buffer.writer(); + // --verbose-link + if (comp.verbose_link) try self.dumpArgv(comp); - try self.writeSegmentHeaders(lc_writer); - try lc_writer.writeStruct(self.dyld_info_cmd); - try lc_writer.writeStruct(self.symtab_cmd); - try lc_writer.writeStruct(self.dysymtab_cmd); - try load_commands.writeDylinkerLC(lc_writer); + @panic("TODO"); +} - switch (output_mode) { - .Exe => blk: { - const seg_id = self.header_segment_cmd_index.?; - const seg = self.segments.items[seg_id]; - const global = self.getEntryPoint() orelse break :blk; - const sym = self.getSymbol(global); - - const addr: u64 = if (sym.undf()) - // In this case, the symbol has been resolved in one of dylibs and so we point - // to the stub as its vmaddr value. - self.getStubsEntryAddress(global).? - else - sym.n_value; - - try lc_writer.writeStruct(macho.entry_point_command{ - .entryoff = @as(u32, @intCast(addr - seg.vmaddr)), - .stacksize = self.base.stack_size, - }); - }, - .Lib => if (comp.config.link_mode == .Dynamic) { - try load_commands.writeDylibIdLC(self, lc_writer); - }, - else => {}, - } - - try load_commands.writeRpathLCs(self, lc_writer); - try lc_writer.writeStruct(macho.source_version_command{ - .version = 0, - }); - { - const platform = Platform.fromTarget(target); - const sdk_version: ?std.SemanticVersion = load_commands.inferSdkVersion(self); - if (platform.isBuildVersionCompatible()) { - try load_commands.writeBuildVersionLC(platform, sdk_version, lc_writer); - } else if (platform.isVersionMinCompatible()) { - try load_commands.writeVersionMinLC(platform, sdk_version, lc_writer); - } - } - - const uuid_cmd_offset = @sizeOf(macho.mach_header_64) + @as(u32, @intCast(lc_buffer.items.len)); - try lc_writer.writeStruct(self.uuid_cmd); - - try load_commands.writeLoadDylibLCs(self.dylibs.items, self.referenced_dylibs.keys(), lc_writer); - - if (codesig != null) { - try lc_writer.writeStruct(self.codesig_cmd); - } - - const ncmds = load_commands.calcNumOfLCs(lc_buffer.items); - try self.base.file.?.pwriteAll(lc_buffer.items, @sizeOf(macho.mach_header_64)); - try self.writeHeader(ncmds, @as(u32, @intCast(lc_buffer.items.len))); - try self.writeUuid(comp, uuid_cmd_offset, codesig != null); - - if (codesig) |*csig| { - try self.writeCodeSignature(comp, csig); // code signing always comes last - const emit = self.base.emit; - try invalidateKernelCache(emit.directory.handle, emit.sub_path); - } - - if (self.d_sym) |*d_sym| { - // Flush debug symbols bundle. - try d_sym.flushModule(self); - } +/// --verbose-link output +fn dumpArgv(self: *MachO, comp: *Compilation) !void { + _ = self; + _ = comp; + @panic("TODO dumpArgv"); } /// XNU starting with Big Sur running on arm64 is caching inodes of running binaries. @@ -726,1196 +379,12 @@ pub fn parsePositional( const tracy = trace(@src()); defer tracy.end(); - if (Object.isObject(file)) { - try self.parseObject(file, path, ctx); - } else { - try self.parseLibrary(file, path, .{ - .path = null, - .needed = false, - .weak = false, - }, must_link, false, null, dependent_libs, ctx); - } -} - -fn parseObject( - self: *MachO, - file: std.fs.File, - path: []const u8, - ctx: *ParseErrorCtx, -) ParseError!void { - const tracy = trace(@src()); - defer tracy.end(); - - const gpa = self.base.comp.gpa; - const target = self.base.comp.root_mod.resolved_target.result; - const mtime: u64 = mtime: { - const stat = file.stat() catch break :mtime 0; - break :mtime @as(u64, @intCast(@divFloor(stat.mtime, 1_000_000_000))); - }; - const file_stat = try file.stat(); - const file_size = math.cast(usize, file_stat.size) orelse return error.Overflow; - const contents = try file.readToEndAllocOptions(gpa, file_size, file_size, @alignOf(u64), null); - - var object = Object{ - .name = try gpa.dupe(u8, path), - .mtime = mtime, - .contents = contents, - }; - errdefer object.deinit(gpa); - try object.parse(gpa); - - const detected_cpu_arch: std.Target.Cpu.Arch = switch (object.header.cputype) { - macho.CPU_TYPE_ARM64 => .aarch64, - macho.CPU_TYPE_X86_64 => .x86_64, - else => unreachable, - }; - const detected_platform = object.getPlatform(); - const this_cpu_arch = target.cpu.arch; - const this_platform = Platform.fromTarget(target); - - if (this_cpu_arch != detected_cpu_arch or - (detected_platform != null and !detected_platform.?.eqlTarget(this_platform))) - { - const platform = detected_platform orelse this_platform; - try ctx.detected_targets.append(try platform.allocPrintTarget(ctx.arena(), detected_cpu_arch)); - return error.InvalidTarget; - } - - try self.objects.append(gpa, object); -} - -pub fn parseLibrary( - self: *MachO, - file: std.fs.File, - path: []const u8, - lib: link.SystemLib, - must_link: bool, - is_dependent: bool, - reexport_info: ?DylibReExportInfo, - dependent_libs: anytype, - ctx: *ParseErrorCtx, -) ParseError!void { - const tracy = trace(@src()); - defer tracy.end(); - - const target = self.base.comp.root_mod.resolved_target.result; - - if (fat.isFatLibrary(file)) { - const offset = try self.parseFatLibrary(file, target.cpu.arch, ctx); - try file.seekTo(offset); - - if (Archive.isArchive(file, offset)) { - try self.parseArchive(path, offset, must_link, ctx); - } else if (Dylib.isDylib(file, offset)) { - try self.parseDylib(file, path, offset, dependent_libs, .{ - .needed = lib.needed, - .weak = lib.weak, - .dependent = is_dependent, - .reexport_info = reexport_info, - }, ctx); - } else return error.UnknownFileType; - } else if (Archive.isArchive(file, 0)) { - try self.parseArchive(path, 0, must_link, ctx); - } else if (Dylib.isDylib(file, 0)) { - try self.parseDylib(file, path, 0, dependent_libs, .{ - .needed = lib.needed, - .weak = lib.weak, - .dependent = is_dependent, - .reexport_info = reexport_info, - }, ctx); - } else { - self.parseLibStub(file, path, dependent_libs, .{ - .needed = lib.needed, - .weak = lib.weak, - .dependent = is_dependent, - .reexport_info = reexport_info, - }, ctx) catch |err| switch (err) { - error.NotLibStub, error.UnexpectedToken => return error.UnknownFileType, - else => |e| return e, - }; - } -} - -pub fn parseFatLibrary( - self: *MachO, - file: std.fs.File, - cpu_arch: std.Target.Cpu.Arch, - ctx: *ParseErrorCtx, -) ParseError!u64 { - const gpa = self.base.comp.gpa; - - const fat_archs = try fat.parseArchs(gpa, file); - defer gpa.free(fat_archs); - - const offset = for (fat_archs) |arch| { - if (arch.tag == cpu_arch) break arch.offset; - } else { - try ctx.detected_targets.ensureUnusedCapacity(fat_archs.len); - for (fat_archs) |arch| { - ctx.detected_targets.appendAssumeCapacity(try ctx.arena().dupe(u8, @tagName(arch.tag))); - } - return error.InvalidTargetFatLibrary; - }; - return offset; -} - -fn parseArchive( - self: *MachO, - path: []const u8, - fat_offset: u64, - must_link: bool, - ctx: *ParseErrorCtx, -) ParseError!void { - const gpa = self.base.comp.gpa; - const target = self.base.comp.root_mod.resolved_target.result; - - // We take ownership of the file so that we can store it for the duration of symbol resolution. - // TODO we shouldn't need to do that and could pre-parse the archive like we do for zld/ELF? - const file = try std.fs.cwd().openFile(path, .{}); - try file.seekTo(fat_offset); - - var archive = Archive{ - .file = file, - .fat_offset = fat_offset, - .name = try gpa.dupe(u8, path), - }; - errdefer archive.deinit(gpa); - - try archive.parse(gpa, file.reader()); - - // Verify arch and platform - if (archive.toc.values().len > 0) { - const offsets = archive.toc.values()[0].items; - assert(offsets.len > 0); - const off = offsets[0]; - var object = try archive.parseObject(gpa, off); // TODO we are doing all this work to pull the header only! - defer object.deinit(gpa); - - const detected_cpu_arch: std.Target.Cpu.Arch = switch (object.header.cputype) { - macho.CPU_TYPE_ARM64 => .aarch64, - macho.CPU_TYPE_X86_64 => .x86_64, - else => unreachable, - }; - const detected_platform = object.getPlatform(); - const this_cpu_arch = target.cpu.arch; - const this_platform = Platform.fromTarget(target); - - if (this_cpu_arch != detected_cpu_arch or - (detected_platform != null and !detected_platform.?.eqlTarget(this_platform))) - { - const platform = detected_platform orelse this_platform; - try ctx.detected_targets.append(try platform.allocPrintTarget(gpa, detected_cpu_arch)); - return error.InvalidTarget; - } - } - - if (must_link) { - // Get all offsets from the ToC - var offsets = std.AutoArrayHashMap(u32, void).init(gpa); - defer offsets.deinit(); - for (archive.toc.values()) |offs| { - for (offs.items) |off| { - _ = try offsets.getOrPut(off); - } - } - for (offsets.keys()) |off| { - const object = try archive.parseObject(gpa, off); - try self.objects.append(gpa, object); - } - } else { - try self.archives.append(gpa, archive); - } -} - -pub const DylibReExportInfo = struct { - id: Dylib.Id, - parent: u16, -}; - -const DylibOpts = struct { - reexport_info: ?DylibReExportInfo = null, - dependent: bool = false, - needed: bool = false, - weak: bool = false, -}; - -fn parseDylib( - self: *MachO, - file: std.fs.File, - path: []const u8, - offset: u64, - dependent_libs: anytype, - dylib_options: DylibOpts, - ctx: *ParseErrorCtx, -) ParseError!void { - const gpa = self.base.comp.gpa; - const target = self.base.comp.root_mod.resolved_target.result; - const file_stat = try file.stat(); - const file_size = math.cast(usize, file_stat.size - offset) orelse return error.Overflow; - - const contents = try file.readToEndAllocOptions(gpa, file_size, file_size, @alignOf(u64), null); - defer gpa.free(contents); - - var dylib = Dylib{ .path = try gpa.dupe(u8, path), .weak = dylib_options.weak }; - errdefer dylib.deinit(gpa); - - try dylib.parseFromBinary( - gpa, - @intCast(self.dylibs.items.len), // TODO defer it till later - dependent_libs, - path, - contents, - ); - - const detected_cpu_arch: std.Target.Cpu.Arch = switch (dylib.header.?.cputype) { - macho.CPU_TYPE_ARM64 => .aarch64, - macho.CPU_TYPE_X86_64 => .x86_64, - else => unreachable, - }; - const detected_platform = dylib.getPlatform(contents); - const this_cpu_arch = target.cpu.arch; - const this_platform = Platform.fromTarget(target); - - if (this_cpu_arch != detected_cpu_arch or - (detected_platform != null and !detected_platform.?.eqlTarget(this_platform))) - { - const platform = detected_platform orelse this_platform; - try ctx.detected_targets.append(try platform.allocPrintTarget(ctx.arena(), detected_cpu_arch)); - return error.InvalidTarget; - } - - try self.addDylib(dylib, dylib_options, ctx); -} - -fn parseLibStub( - self: *MachO, - file: std.fs.File, - path: []const u8, - dependent_libs: anytype, - dylib_options: DylibOpts, - ctx: *ParseErrorCtx, -) ParseError!void { - const gpa = self.base.comp.gpa; - const target = self.base.comp.root_mod.resolved_target.result; - - var lib_stub = try LibStub.loadFromFile(gpa, file); - defer lib_stub.deinit(); - - if (lib_stub.inner.len == 0) return error.NotLibStub; - - // Verify target - { - var matcher = try Dylib.TargetMatcher.init(gpa, target); - defer matcher.deinit(); - - const first_tbd = lib_stub.inner[0]; - const targets = try first_tbd.targets(gpa); - defer { - for (targets) |t| gpa.free(t); - gpa.free(targets); - } - if (!matcher.matchesTarget(targets)) { - try ctx.detected_targets.ensureUnusedCapacity(targets.len); - for (targets) |t| { - ctx.detected_targets.appendAssumeCapacity(try ctx.arena().dupe(u8, t)); - } - return error.InvalidTarget; - } - } - - var dylib = Dylib{ .path = try gpa.dupe(u8, path), .weak = dylib_options.weak }; - errdefer dylib.deinit(gpa); - - try dylib.parseFromStub( - gpa, - target, - lib_stub, - @intCast(self.dylibs.items.len), // TODO defer it till later - dependent_libs, - path, - ); - - try self.addDylib(dylib, dylib_options, ctx); -} - -fn addDylib(self: *MachO, dylib: Dylib, dylib_options: DylibOpts, ctx: *ParseErrorCtx) ParseError!void { - if (dylib_options.reexport_info) |reexport_info| { - if (dylib.id.?.current_version < reexport_info.id.compatibility_version) { - ctx.detected_dylib_id = .{ - .parent = reexport_info.parent, - .required_version = reexport_info.id.compatibility_version, - .found_version = dylib.id.?.current_version, - }; - return error.IncompatibleDylibVersion; - } - } - - const gpa = self.base.comp.gpa; - const gop = try self.dylibs_map.getOrPut(gpa, dylib.id.?.name); - if (gop.found_existing) return error.DylibAlreadyExists; - - gop.value_ptr.* = @as(u16, @intCast(self.dylibs.items.len)); - try self.dylibs.append(gpa, dylib); - - const should_link_dylib_even_if_unreachable = blk: { - if (self.dead_strip_dylibs and !dylib_options.needed) break :blk false; - break :blk !(dylib_options.dependent or self.referenced_dylibs.contains(gop.value_ptr.*)); - }; - - if (should_link_dylib_even_if_unreachable) { - try self.referenced_dylibs.putNoClobber(gpa, gop.value_ptr.*, {}); - } -} - -pub fn parseDependentLibs(self: *MachO, dependent_libs: anytype) !void { - const tracy = trace(@src()); - defer tracy.end(); - - // At this point, we can now parse dependents of dylibs preserving the inclusion order of: - // 1) anything on the linker line is parsed first - // 2) afterwards, we parse dependents of the included dylibs - // TODO this should not be performed if the user specifies `-flat_namespace` flag. - // See ld64 manpages. - const comp = self.base.comp; - const gpa = comp.gpa; - - while (dependent_libs.readItem()) |dep_id| { - defer dep_id.id.deinit(gpa); - - if (self.dylibs_map.contains(dep_id.id.name)) continue; - - const parent = &self.dylibs.items[dep_id.parent]; - const weak = parent.weak; - const dirname = fs.path.dirname(dep_id.id.name) orelse ""; - const stem = fs.path.stem(dep_id.id.name); - - var arena_allocator = std.heap.ArenaAllocator.init(gpa); - defer arena_allocator.deinit(); - const arena = arena_allocator.allocator(); - - var test_path = std.ArrayList(u8).init(arena); - var checked_paths = std.ArrayList([]const u8).init(arena); - - success: { - if (comp.sysroot) |root| { - const dir = try fs.path.join(arena, &[_][]const u8{ root, dirname }); - if (try accessLibPath(gpa, &test_path, &checked_paths, dir, stem)) break :success; - } - - if (try accessLibPath(gpa, &test_path, &checked_paths, dirname, stem)) break :success; - - try self.reportMissingLibraryError( - checked_paths.items, - "missing dynamic library dependency: '{s}'", - .{dep_id.id.name}, - ); - continue; - } - - const full_path = test_path.items; - const file = try std.fs.cwd().openFile(full_path, .{}); - defer file.close(); - - log.debug("parsing dependency {s} at fully resolved path {s}", .{ dep_id.id.name, full_path }); - - var parse_ctx = ParseErrorCtx.init(gpa); - defer parse_ctx.deinit(); - - self.parseLibrary(file, full_path, .{ - .path = null, - .needed = false, - .weak = weak, - }, false, true, dep_id, dependent_libs, &parse_ctx) catch |err| - try self.handleAndReportParseError(full_path, err, &parse_ctx); - - // TODO I think that it would be nice to rewrite this error to include metadata for failed dependency - // in addition to parsing error - } -} - -pub fn writeAtom(self: *MachO, atom_index: Atom.Index, code: []u8) !void { - const atom = self.getAtom(atom_index); - const sym = atom.getSymbol(self); - const section = self.sections.get(sym.n_sect - 1); - const file_offset = section.header.offset + sym.n_value - section.header.addr; - log.debug("writing atom for symbol {s} at file offset 0x{x}", .{ atom.getName(self), file_offset }); - - // Gather relocs which can be resolved. - const gpa = self.base.comp.gpa; - var relocs = std.ArrayList(*Relocation).init(gpa); - defer relocs.deinit(); - - if (self.relocs.getPtr(atom_index)) |rels| { - try relocs.ensureTotalCapacityPrecise(rels.items.len); - for (rels.items) |*reloc| { - if (reloc.isResolvable(self) and reloc.dirty) { - relocs.appendAssumeCapacity(reloc); - } - } - } - - Atom.resolveRelocations(self, atom_index, relocs.items, code); - - if (is_hot_update_compatible) { - if (self.hot_state.mach_task) |task| { - self.writeToMemory(task, section.segment_index, sym.n_value, code) catch |err| { - log.warn("cannot hot swap: writing to memory failed: {s}", .{@errorName(err)}); - }; - } - } - - try self.base.file.?.pwriteAll(code, file_offset); - - // Now we can mark the relocs as resolved. - while (relocs.popOrNull()) |reloc| { - reloc.dirty = false; - } -} - -fn writeToMemory(self: *MachO, task: std.os.darwin.MachTask, segment_index: u8, addr: u64, code: []const u8) !void { - const segment = self.segments.items[segment_index]; - const target = self.base.comp.root_mod.resolved_target.result; - const cpu_arch = target.cpu.arch; - const nwritten = if (!segment.isWriteable()) - try task.writeMemProtected(addr, code, cpu_arch) - else - try task.writeMem(addr, code, cpu_arch); - if (nwritten != code.len) return error.InputOutput; -} - -fn writeOffsetTableEntry(self: *MachO, index: usize) !void { - const sect_id = self.got_section_index.?; - - if (self.got_table_count_dirty) { - const needed_size = self.got_table.entries.items.len * @sizeOf(u64); - try self.growSection(sect_id, needed_size); - self.got_table_count_dirty = false; - } - - const header = &self.sections.items(.header)[sect_id]; - const segment_index = self.sections.items(.segment_index)[sect_id]; - const entry = self.got_table.entries.items[index]; - const entry_value = self.getSymbol(entry).n_value; - const entry_offset = index * @sizeOf(u64); - const file_offset = header.offset + entry_offset; - const vmaddr = header.addr + entry_offset; - - log.debug("writing GOT entry {d}: @{x} => {x}", .{ index, vmaddr, entry_value }); - - var buf: [@sizeOf(u64)]u8 = undefined; - mem.writeInt(u64, &buf, entry_value, .little); - try self.base.file.?.pwriteAll(&buf, file_offset); - - if (is_hot_update_compatible) { - if (self.hot_state.mach_task) |task| { - self.writeToMemory(task, segment_index, vmaddr, &buf) catch |err| { - log.warn("cannot hot swap: writing to memory failed: {s}", .{@errorName(err)}); - }; - } - } -} - -fn writeStubHelperPreamble(self: *MachO) !void { - if (self.stub_helper_preamble_allocated) return; - - const gpa = self.base.comp.gpa; - const target = self.base.comp.root_mod.resolved_target.result; - const cpu_arch = target.cpu.arch; - const size = stubs.stubHelperPreambleSize(cpu_arch); - - var buf = try std.ArrayList(u8).initCapacity(gpa, size); - defer buf.deinit(); - - const dyld_private_addr = self.getAtom(self.dyld_private_atom_index.?).getSymbol(self).n_value; - const dyld_stub_binder_got_addr = blk: { - const index = self.got_table.lookup.get(self.getGlobalByIndex(self.dyld_stub_binder_index.?)).?; - const header = self.sections.items(.header)[self.got_section_index.?]; - break :blk header.addr + @sizeOf(u64) * index; - }; - const header = self.sections.items(.header)[self.stub_helper_section_index.?]; - - try stubs.writeStubHelperPreambleCode(.{ - .cpu_arch = cpu_arch, - .source_addr = header.addr, - .dyld_private_addr = dyld_private_addr, - .dyld_stub_binder_got_addr = dyld_stub_binder_got_addr, - }, buf.writer()); - try self.base.file.?.pwriteAll(buf.items, header.offset); - - self.stub_helper_preamble_allocated = true; -} - -fn writeStubTableEntry(self: *MachO, index: usize) !void { - const target = self.base.comp.root_mod.resolved_target.result; - const stubs_sect_id = self.stubs_section_index.?; - const stub_helper_sect_id = self.stub_helper_section_index.?; - const laptr_sect_id = self.la_symbol_ptr_section_index.?; - - const cpu_arch = target.cpu.arch; - const stub_entry_size = stubs.stubSize(cpu_arch); - const stub_helper_entry_size = stubs.stubHelperSize(cpu_arch); - const stub_helper_preamble_size = stubs.stubHelperPreambleSize(cpu_arch); - - if (self.stub_table_count_dirty) { - // We grow all 3 sections one by one. - { - const needed_size = stub_entry_size * self.stub_table.entries.items.len; - try self.growSection(stubs_sect_id, needed_size); - } - { - const needed_size = stub_helper_preamble_size + stub_helper_entry_size * self.stub_table.entries.items.len; - try self.growSection(stub_helper_sect_id, needed_size); - } - { - const needed_size = @sizeOf(u64) * self.stub_table.entries.items.len; - try self.growSection(laptr_sect_id, needed_size); - } - self.stub_table_count_dirty = false; - } - - const gpa = self.base.comp.gpa; - - const stubs_header = self.sections.items(.header)[stubs_sect_id]; - const stub_helper_header = self.sections.items(.header)[stub_helper_sect_id]; - const laptr_header = self.sections.items(.header)[laptr_sect_id]; - - const entry = self.stub_table.entries.items[index]; - const stub_addr: u64 = stubs_header.addr + stub_entry_size * index; - const stub_helper_addr: u64 = stub_helper_header.addr + stub_helper_preamble_size + stub_helper_entry_size * index; - const laptr_addr: u64 = laptr_header.addr + @sizeOf(u64) * index; - - log.debug("writing stub entry {d}: @{x} => '{s}'", .{ index, stub_addr, self.getSymbolName(entry) }); - - { - var buf = try std.ArrayList(u8).initCapacity(gpa, stub_entry_size); - defer buf.deinit(); - try stubs.writeStubCode(.{ - .cpu_arch = cpu_arch, - .source_addr = stub_addr, - .target_addr = laptr_addr, - }, buf.writer()); - const off = stubs_header.offset + stub_entry_size * index; - try self.base.file.?.pwriteAll(buf.items, off); - } - - { - var buf = try std.ArrayList(u8).initCapacity(gpa, stub_helper_entry_size); - defer buf.deinit(); - try stubs.writeStubHelperCode(.{ - .cpu_arch = cpu_arch, - .source_addr = stub_helper_addr, - .target_addr = stub_helper_header.addr, - }, buf.writer()); - const off = stub_helper_header.offset + stub_helper_preamble_size + stub_helper_entry_size * index; - try self.base.file.?.pwriteAll(buf.items, off); - } - - { - var buf: [@sizeOf(u64)]u8 = undefined; - mem.writeInt(u64, &buf, stub_helper_addr, .little); - const off = laptr_header.offset + @sizeOf(u64) * index; - try self.base.file.?.pwriteAll(&buf, off); - } - - // TODO: generating new stub entry will require pulling the address of the symbol from the - // target dylib when updating directly in memory. - if (is_hot_update_compatible) { - if (self.hot_state.mach_task) |_| { - @panic("TODO: update a stub entry in memory"); - } - } -} - -fn markRelocsDirtyByTarget(self: *MachO, target: SymbolWithLoc) void { - log.debug("marking relocs dirty by target: {}", .{target}); - // TODO: reverse-lookup might come in handy here - for (self.relocs.values()) |*relocs| { - for (relocs.items) |*reloc| { - if (!reloc.target.eql(target)) continue; - reloc.dirty = true; - } - } -} - -fn markRelocsDirtyByAddress(self: *MachO, addr: u64) void { - log.debug("marking relocs dirty by address: {x}", .{addr}); - - const got_moved = blk: { - const sect_id = self.got_section_index orelse break :blk false; - break :blk self.sections.items(.header)[sect_id].addr > addr; - }; - const stubs_moved = blk: { - const sect_id = self.stubs_section_index orelse break :blk false; - break :blk self.sections.items(.header)[sect_id].addr > addr; - }; - - for (self.relocs.values()) |*relocs| { - for (relocs.items) |*reloc| { - if (reloc.isGotIndirection()) { - reloc.dirty = reloc.dirty or got_moved; - } else if (reloc.isStubTrampoline(self)) { - reloc.dirty = reloc.dirty or stubs_moved; - } else { - const target_addr = reloc.getTargetBaseAddress(self) orelse continue; - if (target_addr > addr) reloc.dirty = true; - } - } - } - - // TODO: dirty only really affected GOT cells - for (self.got_table.entries.items) |entry| { - const target_addr = self.getSymbol(entry).n_value; - if (target_addr > addr) { - self.got_table_contents_dirty = true; - break; - } - } - - { - const stubs_addr = self.getSegment(self.stubs_section_index.?).vmaddr; - const stub_helper_addr = self.getSegment(self.stub_helper_section_index.?).vmaddr; - const laptr_addr = self.getSegment(self.la_symbol_ptr_section_index.?).vmaddr; - if (stubs_addr > addr or stub_helper_addr > addr or laptr_addr > addr) - self.stub_table_contents_dirty = true; - } -} - -pub fn allocateSpecialSymbols(self: *MachO) !void { - for (&[_][]const u8{ - "___dso_handle", - "__mh_execute_header", - }) |name| { - const global = self.getGlobal(name) orelse continue; - if (global.getFile() != null) continue; - const sym = self.getSymbolPtr(global); - const seg = self.getSegment(self.text_section_index.?); - sym.n_sect = self.text_section_index.? + 1; - sym.n_value = seg.vmaddr; - - log.debug("allocating {s}(@0x{x},sect({d})) at the start of {s}", .{ - name, - sym.n_value, - sym.n_sect, - seg.segName(), - }); - } - - for (self.globals.items) |global| { - const sym = self.getSymbolPtr(global); - if (sym.n_desc != N_BOUNDARY) continue; - if (self.getSectionBoundarySymbol(global)) |bsym| { - const sect_id = self.getSectionByName(bsym.segname, bsym.sectname) orelse { - try self.reportUnresolvedBoundarySymbol(self.getSymbolName(global), "section not found: {s},{s}", .{ - bsym.segname, bsym.sectname, - }); - continue; - }; - const sect = self.sections.items(.header)[sect_id]; - sym.n_sect = sect_id + 1; - sym.n_value = switch (bsym.kind) { - .start => sect.addr, - .stop => sect.addr + sect.size, - }; - - log.debug("allocating {s} at @0x{x} sect({d})", .{ - self.getSymbolName(global), - sym.n_value, - sym.n_sect, - }); - - continue; - } - if (self.getSegmentBoundarySymbol(global)) |bsym| { - const seg_id = self.getSegmentByName(bsym.segname) orelse { - try self.reportUnresolvedBoundarySymbol(self.getSymbolName(global), "segment not found: {s}", .{ - bsym.segname, - }); - - continue; - }; - const seg = self.segments.items[seg_id]; - sym.n_value = switch (bsym.kind) { - .start => seg.vmaddr, - .stop => seg.vmaddr + seg.vmsize, - }; - - log.debug("allocating {s} at @0x{x} ", .{ self.getSymbolName(global), sym.n_value }); - - continue; - } - } -} - -const CreateAtomOpts = struct { - size: u64 = 0, - alignment: Alignment = .@"1", -}; - -pub fn createAtom(self: *MachO, sym_index: u32, opts: CreateAtomOpts) !Atom.Index { - const gpa = self.base.comp.gpa; - const index = @as(Atom.Index, @intCast(self.atoms.items.len)); - const atom = try self.atoms.addOne(gpa); - atom.* = .{}; - atom.sym_index = sym_index; - atom.size = opts.size; - atom.alignment = opts.alignment; - log.debug("creating ATOM(%{d}) at index {d}", .{ sym_index, index }); - return index; -} - -pub fn createTentativeDefAtoms(self: *MachO) !void { - const gpa = self.base.comp.gpa; - - for (self.globals.items) |global| { - const sym = self.getSymbolPtr(global); - if (!sym.tentative()) continue; - if (sym.n_desc == N_DEAD) continue; - if (sym.n_desc == N_BOUNDARY) continue; - - log.debug("creating tentative definition for ATOM(%{d}, '{s}') in object({?})", .{ - global.sym_index, self.getSymbolName(global), global.file, - }); - - // Convert any tentative definition into a regular symbol and allocate - // text blocks for each tentative definition. - const size = sym.n_value; - const alignment = (sym.n_desc >> 8) & 0x0f; - - if (self.bss_section_index == null) { - self.bss_section_index = try self.initSection("__DATA", "__bss", .{ - .flags = macho.S_ZEROFILL, - }); - } - - sym.* = .{ - .n_strx = sym.n_strx, - .n_type = macho.N_SECT | macho.N_EXT, - .n_sect = self.bss_section_index.? + 1, - .n_desc = 0, - .n_value = 0, - }; - - const atom_index = try self.createAtom(global.sym_index, .{ - .size = size, - .alignment = @enumFromInt(alignment), - }); - const atom = self.getAtomPtr(atom_index); - atom.file = global.file; - - self.addAtomToSection(atom_index); - - assert(global.getFile() != null); - const object = &self.objects.items[global.getFile().?]; - try object.atoms.append(gpa, atom_index); - object.atom_by_index_table[global.sym_index] = atom_index; - } -} - -pub fn createDyldPrivateAtom(self: *MachO) !void { - if (self.dyld_private_atom_index != null) return; - - const sym_index = try self.allocateSymbol(); - const atom_index = try self.createAtom(sym_index, .{ - .size = @sizeOf(u64), - .alignment = .@"8", - }); - const gpa = self.base.comp.gpa; - try self.atom_by_index_table.putNoClobber(gpa, sym_index, atom_index); - - if (self.data_section_index == null) { - self.data_section_index = try self.initSection("__DATA", "__data", .{}); - } - - const atom = self.getAtom(atom_index); - const sym = atom.getSymbolPtr(self); - sym.n_type = macho.N_SECT; - sym.n_sect = self.data_section_index.? + 1; - self.dyld_private_atom_index = atom_index; - - switch (self.mode) { - .zld => self.addAtomToSection(atom_index), - .incremental => { - sym.n_value = try self.allocateAtom(atom_index, atom.size, .@"8"); - log.debug("allocated dyld_private atom at 0x{x}", .{sym.n_value}); - var buffer: [@sizeOf(u64)]u8 = [_]u8{0} ** @sizeOf(u64); - try self.writeAtom(atom_index, &buffer); - }, - } -} - -fn createThreadLocalDescriptorAtom(self: *MachO, sym_name: []const u8, target: SymbolWithLoc) !Atom.Index { - const gpa = self.base.comp.gpa; - const size = 3 * @sizeOf(u64); - const required_alignment: Alignment = .@"1"; - const sym_index = try self.allocateSymbol(); - const atom_index = try self.createAtom(sym_index, .{}); - try self.atom_by_index_table.putNoClobber(gpa, sym_index, atom_index); - self.getAtomPtr(atom_index).size = size; - - const sym = self.getAtom(atom_index).getSymbolPtr(self); - sym.n_type = macho.N_SECT; - sym.n_sect = self.thread_vars_section_index.? + 1; - sym.n_strx = try self.strtab.insert(gpa, sym_name); - sym.n_value = try self.allocateAtom(atom_index, size, required_alignment); - - log.debug("allocated threadlocal descriptor atom '{s}' at 0x{x}", .{ sym_name, sym.n_value }); - - try Atom.addRelocation(self, atom_index, .{ - .type = .tlv_initializer, - .target = target, - .offset = 0x10, - .addend = 0, - .pcrel = false, - .length = 3, - }); - - var code: [size]u8 = undefined; - @memset(&code, 0); - try self.writeAtom(atom_index, &code); - - return atom_index; -} - -pub fn createMhExecuteHeaderSymbol(self: *MachO) !void { - const output_mode = self.base.comp.config.output_mode; - if (output_mode != .Exe) return; - - const gpa = self.base.comp.gpa; - const sym_index = try self.allocateSymbol(); - const sym_loc = SymbolWithLoc{ .sym_index = sym_index }; - const sym = self.getSymbolPtr(sym_loc); - sym.* = .{ - .n_strx = try self.strtab.insert(gpa, "__mh_execute_header"), - .n_type = macho.N_SECT | macho.N_EXT, - .n_sect = 0, - .n_desc = macho.REFERENCED_DYNAMICALLY, - .n_value = 0, - }; - - const gop = try self.getOrPutGlobalPtr("__mh_execute_header"); - if (gop.found_existing) { - const global = gop.value_ptr.*; - if (global.getFile()) |file| { - const global_object = &self.objects.items[file]; - global_object.globals_lookup[global.sym_index] = self.getGlobalIndex("__mh_execute_header").?; - } - } - gop.value_ptr.* = sym_loc; -} - -pub fn createDsoHandleSymbol(self: *MachO) !void { - const global = self.getGlobalPtr("___dso_handle") orelse return; - if (!self.getSymbol(global.*).undf()) return; - - const gpa = self.base.comp.gpa; - const sym_index = try self.allocateSymbol(); - const sym_loc = SymbolWithLoc{ .sym_index = sym_index }; - const sym = self.getSymbolPtr(sym_loc); - sym.* = .{ - .n_strx = try self.strtab.insert(gpa, "___dso_handle"), - .n_type = macho.N_SECT | macho.N_EXT, - .n_sect = 0, - .n_desc = macho.N_WEAK_DEF, - .n_value = 0, - }; - const global_index = self.getGlobalIndex("___dso_handle").?; - if (global.getFile()) |file| { - const global_object = &self.objects.items[file]; - global_object.globals_lookup[global.sym_index] = global_index; - } - global.* = sym_loc; - _ = self.unresolved.swapRemove(self.getGlobalIndex("___dso_handle").?); -} - -pub fn resolveSymbols(self: *MachO) !void { - const comp = self.base.comp; - const output_mode = comp.config.output_mode; - // We add the specified entrypoint as the first unresolved symbols so that - // we search for it in libraries should there be no object files specified - // on the linker line. - if (output_mode == .Exe) { - if (self.entry_name) |entry_name| { - _ = try self.addUndefined(entry_name, .{}); - } - } - - // Force resolution of any symbols requested by the user. - for (comp.force_undefined_symbols.keys()) |sym_name| { - _ = try self.addUndefined(sym_name, .{}); - } - - for (self.objects.items, 0..) |_, object_id| { - try self.resolveSymbolsInObject(@as(u32, @intCast(object_id))); - } - - try self.resolveSymbolsInArchives(); - - // Finally, force resolution of dyld_stub_binder if there are imports - // requested. - if (self.unresolved.count() > 0 and self.dyld_stub_binder_index == null) { - self.dyld_stub_binder_index = try self.addUndefined("dyld_stub_binder", .{ .add_got = true }); - } - if (comp.config.any_non_single_threaded and self.mode == .incremental) { - _ = try self.addUndefined("__tlv_bootstrap", .{}); - } - - try self.resolveSymbolsInDylibs(); - - try self.createMhExecuteHeaderSymbol(); - try self.createDsoHandleSymbol(); - try self.resolveSymbolsAtLoading(); - - // Final stop, check if unresolved contain any of the special magic boundary symbols - // * section$start$ - // * section$stop$ - // * segment$start$ - // * segment$stop$ - try self.resolveBoundarySymbols(); -} - -fn resolveGlobalSymbol(self: *MachO, current: SymbolWithLoc) !void { - const gpa = self.base.comp.gpa; - const sym = self.getSymbol(current); - const sym_name = self.getSymbolName(current); - - const gop = try self.getOrPutGlobalPtr(sym_name); - if (!gop.found_existing) { - gop.value_ptr.* = current; - if (sym.undf() and !sym.tentative()) { - try self.unresolved.putNoClobber(gpa, self.getGlobalIndex(sym_name).?, {}); - } - return; - } - const global_index = self.getGlobalIndex(sym_name).?; - const global = gop.value_ptr.*; - const global_sym = self.getSymbol(global); - - // Cases to consider: sym vs global_sym - // 1. strong(sym) and strong(global_sym) => error - // 2. strong(sym) and weak(global_sym) => sym - // 3. strong(sym) and tentative(global_sym) => sym - // 4. strong(sym) and undf(global_sym) => sym - // 5. weak(sym) and strong(global_sym) => global_sym - // 6. weak(sym) and tentative(global_sym) => sym - // 7. weak(sym) and undf(global_sym) => sym - // 8. tentative(sym) and strong(global_sym) => global_sym - // 9. tentative(sym) and weak(global_sym) => global_sym - // 10. tentative(sym) and tentative(global_sym) => pick larger - // 11. tentative(sym) and undf(global_sym) => sym - // 12. undf(sym) and * => global_sym - // - // Reduces to: - // 1. strong(sym) and strong(global_sym) => error - // 2. * and strong(global_sym) => global_sym - // 3. weak(sym) and weak(global_sym) => global_sym - // 4. tentative(sym) and tentative(global_sym) => pick larger - // 5. undf(sym) and * => global_sym - // 6. else => sym - - const sym_is_strong = sym.sect() and !(sym.weakDef() or sym.pext()); - const global_is_strong = global_sym.sect() and !(global_sym.weakDef() or global_sym.pext()); - const sym_is_weak = sym.sect() and (sym.weakDef() or sym.pext()); - const global_is_weak = global_sym.sect() and (global_sym.weakDef() or global_sym.pext()); - - if (sym_is_strong and global_is_strong) { - // TODO redo this logic with corresponding logic in updateExports to avoid this - // ugly check. - if (self.mode == .zld) { - try self.reportSymbolCollision(global, current); - } - return error.MultipleSymbolDefinitions; - } - - if (current.getFile()) |file| { - const object = &self.objects.items[file]; - object.globals_lookup[current.sym_index] = global_index; - } - - if (global_is_strong) return; - if (sym_is_weak and global_is_weak) return; - if (sym.tentative() and global_sym.tentative()) { - if (global_sym.n_value >= sym.n_value) return; - } - if (sym.undf() and !sym.tentative()) return; - - if (global.getFile()) |file| { - const global_object = &self.objects.items[file]; - global_object.globals_lookup[global.sym_index] = global_index; - } - _ = self.unresolved.swapRemove(global_index); - - gop.value_ptr.* = current; -} - -fn resolveSymbolsInObject(self: *MachO, object_id: u32) !void { - const object = &self.objects.items[object_id]; - const in_symtab = object.in_symtab orelse return; - - log.debug("resolving symbols in '{s}'", .{object.name}); - - var sym_index: u32 = 0; - while (sym_index < in_symtab.len) : (sym_index += 1) { - const sym = &object.symtab[sym_index]; - const sym_name = object.getSymbolName(sym_index); - const sym_with_loc = SymbolWithLoc{ - .sym_index = sym_index, - .file = object_id + 1, - }; - - if (sym.stab() or sym.indr() or sym.abs()) { - try self.reportUnhandledSymbolType(sym_with_loc); - continue; - } - - if (sym.sect() and !sym.ext()) { - log.debug("symbol '{s}' local to object {s}; skipping...", .{ - sym_name, - object.name, - }); - continue; - } - - self.resolveGlobalSymbol(.{ - .sym_index = sym_index, - .file = object_id + 1, - }) catch |err| switch (err) { - error.MultipleSymbolDefinitions => return error.FlushFailure, - else => |e| return e, - }; - } -} - -fn resolveSymbolsInArchives(self: *MachO) !void { - if (self.archives.items.len == 0) return; - - const gpa = self.base.comp.gpa; - var next_sym: usize = 0; - loop: while (next_sym < self.unresolved.count()) { - const global = self.globals.items[self.unresolved.keys()[next_sym]]; - const sym_name = self.getSymbolName(global); - - for (self.archives.items) |archive| { - // Check if the entry exists in a static archive. - const offsets = archive.toc.get(sym_name) orelse { - // No hit. - continue; - }; - assert(offsets.items.len > 0); - - const object_id = @as(u16, @intCast(self.objects.items.len)); - const object = try archive.parseObject(gpa, offsets.items[0]); - try self.objects.append(gpa, object); - try self.resolveSymbolsInObject(object_id); - - continue :loop; - } - - next_sym += 1; - } -} - -fn resolveSymbolsInDylibs(self: *MachO) !void { - if (self.dylibs.items.len == 0) return; - - const gpa = self.base.comp.gpa; - var next_sym: usize = 0; - loop: while (next_sym < self.unresolved.count()) { - const global_index = self.unresolved.keys()[next_sym]; - const global = self.globals.items[global_index]; - const sym = self.getSymbolPtr(global); - const sym_name = self.getSymbolName(global); - - for (self.dylibs.items, 0..) |dylib, id| { - if (!dylib.symbols.contains(sym_name)) continue; - - const dylib_id = @as(u16, @intCast(id)); - if (!self.referenced_dylibs.contains(dylib_id)) { - try self.referenced_dylibs.putNoClobber(gpa, dylib_id, {}); - } - - const ordinal = self.referenced_dylibs.getIndex(dylib_id) orelse unreachable; - sym.n_type |= macho.N_EXT; - sym.n_desc = @as(u16, @intCast(ordinal + 1)) * macho.N_SYMBOL_RESOLVER; - - if (dylib.weak) { - sym.n_desc |= macho.N_WEAK_REF; - } - - _ = self.unresolved.swapRemove(global_index); - - continue :loop; - } - - next_sym += 1; - } -} - -fn resolveSymbolsAtLoading(self: *MachO) !void { - const output_mode = self.base.comp.config.output_mode; - const is_lib = output_mode == .Lib; - const is_dyn_lib = self.base.comp.config.link_mode == .Dynamic and is_lib; - const allow_undef = is_dyn_lib and self.base.allow_shlib_undefined; - - var next_sym: usize = 0; - while (next_sym < self.unresolved.count()) { - const global_index = self.unresolved.keys()[next_sym]; - const global = self.globals.items[global_index]; - const sym = self.getSymbolPtr(global); - - if (sym.discarded()) { - sym.* = .{ - .n_strx = 0, - .n_type = macho.N_UNDF, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }; - _ = self.unresolved.swapRemove(global_index); - continue; - } else if (allow_undef) { - const n_desc = @as( - u16, - @bitCast(macho.BIND_SPECIAL_DYLIB_FLAT_LOOKUP * @as(i16, @intCast(macho.N_SYMBOL_RESOLVER))), - ); - sym.n_type = macho.N_EXT; - sym.n_desc = n_desc; - _ = self.unresolved.swapRemove(global_index); - continue; - } - - next_sym += 1; - } -} - -fn resolveBoundarySymbols(self: *MachO) !void { - const gpa = self.base.comp.gpa; - var next_sym: usize = 0; - while (next_sym < self.unresolved.count()) { - const global_index = self.unresolved.keys()[next_sym]; - const global = &self.globals.items[global_index]; - - if (self.getSectionBoundarySymbol(global.*) != null or self.getSegmentBoundarySymbol(global.*) != null) { - const sym_index = try self.allocateSymbol(); - const sym_loc = SymbolWithLoc{ .sym_index = sym_index }; - const sym = self.getSymbolPtr(sym_loc); - sym.* = .{ - .n_strx = try self.strtab.insert(gpa, self.getSymbolName(global.*)), - .n_type = macho.N_SECT | macho.N_EXT, - .n_sect = 0, - .n_desc = N_BOUNDARY, - .n_value = 0, - }; - if (global.getFile()) |file| { - const global_object = &self.objects.items[file]; - global_object.globals_lookup[global.sym_index] = global_index; - } - global.* = sym_loc; - _ = self.unresolved.swapRemove(global_index); - continue; - } - - next_sym += 1; - } + _ = self; + _ = file; + _ = path; + _ = must_link; + _ = dependent_libs; + _ = ctx; } pub fn deinit(self: *MachO) void { @@ -1927,45 +396,7 @@ pub fn deinit(self: *MachO) void { d_sym.deinit(); } - self.got_table.deinit(gpa); - self.stub_table.deinit(gpa); - self.tlv_ptr_table.deinit(gpa); - self.thunk_table.deinit(gpa); - - for (self.thunks.items) |*thunk| { - thunk.deinit(gpa); - } - self.thunks.deinit(gpa); - self.strtab.deinit(gpa); - self.locals.deinit(gpa); - self.globals.deinit(gpa); - self.locals_free_list.deinit(gpa); - self.globals_free_list.deinit(gpa); - self.unresolved.deinit(gpa); - - { - var it = self.resolver.keyIterator(); - while (it.next()) |key_ptr| { - gpa.free(key_ptr.*); - } - self.resolver.deinit(gpa); - } - - for (self.objects.items) |*object| { - object.deinit(gpa); - } - self.objects.deinit(gpa); - for (self.archives.items) |*archive| { - archive.deinit(gpa); - } - self.archives.deinit(gpa); - for (self.dylibs.items) |*dylib| { - dylib.deinit(gpa); - } - self.dylibs.deinit(gpa); - self.dylibs_map.deinit(gpa); - self.referenced_dylibs.deinit(gpa); self.segments.deinit(gpa); @@ -1973,47 +404,6 @@ pub fn deinit(self: *MachO) void { list.deinit(gpa); } self.sections.deinit(gpa); - - self.atoms.deinit(gpa); - - for (self.decls.values()) |*m| { - m.exports.deinit(gpa); - } - self.decls.deinit(gpa); - - self.lazy_syms.deinit(gpa); - self.tlv_table.deinit(gpa); - - for (self.unnamed_const_atoms.values()) |*atoms| { - atoms.deinit(gpa); - } - self.unnamed_const_atoms.deinit(gpa); - - { - var it = self.anon_decls.iterator(); - while (it.next()) |entry| { - entry.value_ptr.exports.deinit(gpa); - } - self.anon_decls.deinit(gpa); - } - - self.atom_by_index_table.deinit(gpa); - - for (self.relocs.values()) |*relocs| { - relocs.deinit(gpa); - } - self.relocs.deinit(gpa); - self.actions.deinit(gpa); - - for (self.rebases.values()) |*rebases| { - rebases.deinit(gpa); - } - self.rebases.deinit(gpa); - - for (self.bindings.values()) |*bindings| { - bindings.deinit(gpa); - } - self.bindings.deinit(gpa); } fn freeAtom(self: *MachO, atom_index: Atom.Index) void { @@ -2100,116 +490,11 @@ fn shrinkAtom(self: *MachO, atom_index: Atom.Index, new_block_size: u64) void { } fn growAtom(self: *MachO, atom_index: Atom.Index, new_atom_size: u64, alignment: Alignment) !u64 { - const atom = self.getAtom(atom_index); - const sym = atom.getSymbol(self); - const align_ok = alignment.check(sym.n_value); - const need_realloc = !align_ok or new_atom_size > atom.capacity(self); - if (!need_realloc) return sym.n_value; - return self.allocateAtom(atom_index, new_atom_size, alignment); -} - -pub fn allocateSymbol(self: *MachO) !u32 { - const gpa = self.base.comp.gpa; - try self.locals.ensureUnusedCapacity(gpa, 1); - - const index = blk: { - if (self.locals_free_list.popOrNull()) |index| { - log.debug(" (reusing symbol index {d})", .{index}); - break :blk index; - } else { - log.debug(" (allocating symbol index {d})", .{self.locals.items.len}); - const index = @as(u32, @intCast(self.locals.items.len)); - _ = self.locals.addOneAssumeCapacity(); - break :blk index; - } - }; - - self.locals.items[index] = .{ - .n_strx = 0, - .n_type = 0, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }; - - return index; -} - -fn allocateGlobal(self: *MachO) !u32 { - const gpa = self.base.comp.gpa; - try self.globals.ensureUnusedCapacity(gpa, 1); - - const index = blk: { - if (self.globals_free_list.popOrNull()) |index| { - log.debug(" (reusing global index {d})", .{index}); - break :blk index; - } else { - log.debug(" (allocating symbol index {d})", .{self.globals.items.len}); - const index = @as(u32, @intCast(self.globals.items.len)); - _ = self.globals.addOneAssumeCapacity(); - break :blk index; - } - }; - - self.globals.items[index] = .{ .sym_index = 0 }; - - return index; -} - -pub fn addGotEntry(self: *MachO, reloc_target: SymbolWithLoc) !void { - if (self.got_table.lookup.contains(reloc_target)) return; - const gpa = self.base.comp.gpa; - const got_index = try self.got_table.allocateEntry(gpa, reloc_target); - if (self.got_section_index == null) { - self.got_section_index = try self.initSection("__DATA_CONST", "__got", .{ - .flags = macho.S_NON_LAZY_SYMBOL_POINTERS, - }); - } - if (self.mode == .incremental) { - try self.writeOffsetTableEntry(got_index); - self.got_table_count_dirty = true; - self.markRelocsDirtyByTarget(reloc_target); - } -} - -pub fn addStubEntry(self: *MachO, reloc_target: SymbolWithLoc) !void { - if (self.stub_table.lookup.contains(reloc_target)) return; - const comp = self.base.comp; - const gpa = comp.gpa; - const cpu_arch = comp.root_mod.resolved_target.result.cpu.arch; - const stub_index = try self.stub_table.allocateEntry(gpa, reloc_target); - if (self.stubs_section_index == null) { - self.stubs_section_index = try self.initSection("__TEXT", "__stubs", .{ - .flags = macho.S_SYMBOL_STUBS | - macho.S_ATTR_PURE_INSTRUCTIONS | - macho.S_ATTR_SOME_INSTRUCTIONS, - .reserved2 = stubs.stubSize(cpu_arch), - }); - self.stub_helper_section_index = try self.initSection("__TEXT", "__stub_helper", .{ - .flags = macho.S_REGULAR | - macho.S_ATTR_PURE_INSTRUCTIONS | - macho.S_ATTR_SOME_INSTRUCTIONS, - }); - self.la_symbol_ptr_section_index = try self.initSection("__DATA", "__la_symbol_ptr", .{ - .flags = macho.S_LAZY_SYMBOL_POINTERS, - }); - } - if (self.mode == .incremental) { - try self.writeStubTableEntry(stub_index); - self.stub_table_count_dirty = true; - self.markRelocsDirtyByTarget(reloc_target); - } -} - -pub fn addTlvPtrEntry(self: *MachO, reloc_target: SymbolWithLoc) !void { - if (self.tlv_ptr_table.lookup.contains(reloc_target)) return; - const gpa = self.base.comp.gpa; - _ = try self.tlv_ptr_table.allocateEntry(gpa, reloc_target); - if (self.tlv_ptr_section_index == null) { - self.tlv_ptr_section_index = try self.initSection("__DATA", "__thread_ptrs", .{ - .flags = macho.S_THREAD_LOCAL_VARIABLE_POINTERS, - }); - } + _ = self; + _ = atom_index; + _ = new_atom_size; + _ = alignment; + @panic("TODO growAtom"); } pub fn updateFunc(self: *MachO, mod: *Module, func_index: InternPool.Index, air: Air, liveness: Liveness) !void { @@ -2217,85 +502,16 @@ pub fn updateFunc(self: *MachO, mod: *Module, func_index: InternPool.Index, air: @panic("Attempted to compile for object format that was disabled by build configuration"); } if (self.llvm_object) |llvm_object| return llvm_object.updateFunc(mod, func_index, air, liveness); - const tracy = trace(@src()); - defer tracy.end(); - const func = mod.funcInfo(func_index); - const decl_index = func.owner_decl; - const decl = mod.declPtr(decl_index); - - const atom_index = try self.getOrCreateAtomForDecl(decl_index); - self.freeUnnamedConsts(decl_index); - Atom.freeRelocations(self, atom_index); - - const gpa = self.base.comp.gpa; - var code_buffer = std.ArrayList(u8).init(gpa); - defer code_buffer.deinit(); - - var decl_state = if (self.d_sym) |*d_sym| - try d_sym.dwarf.initDeclState(mod, decl_index) - else - null; - defer if (decl_state) |*ds| ds.deinit(); - - const res = if (decl_state) |*ds| - try codegen.generateFunction(&self.base, decl.srcLoc(mod), func_index, air, liveness, &code_buffer, .{ - .dwarf = ds, - }) - else - try codegen.generateFunction(&self.base, decl.srcLoc(mod), func_index, air, liveness, &code_buffer, .none); - - const code = switch (res) { - .ok => code_buffer.items, - .fail => |em| { - decl.analysis = .codegen_failure; - try mod.failed_decls.put(mod.gpa, decl_index, em); - return; - }, - }; - - const addr = try self.updateDeclCode(decl_index, code); - - if (decl_state) |*ds| { - try self.d_sym.?.dwarf.commitDeclState( - mod, - decl_index, - addr, - self.getAtom(atom_index).size, - ds, - ); - } - - // Since we updated the vaddr and the size, each corresponding export symbol also - // needs to be updated. - try self.updateExports(mod, .{ .decl_index = decl_index }, mod.getDeclExports(decl_index)); + @panic("TODO updateFunc"); } pub fn lowerUnnamedConst(self: *MachO, typed_value: TypedValue, decl_index: InternPool.DeclIndex) !u32 { - const gpa = self.base.comp.gpa; - const mod = self.base.comp.module.?; - const gop = try self.unnamed_const_atoms.getOrPut(gpa, decl_index); - if (!gop.found_existing) { - gop.value_ptr.* = .{}; - } - const unnamed_consts = gop.value_ptr; - const decl = mod.declPtr(decl_index); - const decl_name = mod.intern_pool.stringToSlice(try decl.getFullyQualifiedName(mod)); - const index = unnamed_consts.items.len; - const name = try std.fmt.allocPrint(gpa, "___unnamed_{s}_{d}", .{ decl_name, index }); - defer gpa.free(name); - const atom_index = switch (try self.lowerConst(name, typed_value, typed_value.ty.abiAlignment(mod), self.data_const_section_index.?, decl.srcLoc(mod))) { - .ok => |atom_index| atom_index, - .fail => |em| { - decl.analysis = .codegen_failure; - try mod.failed_decls.put(mod.gpa, decl_index, em); - log.debug("{s}", .{em.msg}); - return error.CodegenFail; - }, - }; - try unnamed_consts.append(gpa, atom_index); - const atom = self.getAtomPtr(atom_index); - return atom.getSymbolIndex().?; + _ = self; + _ = typed_value; + _ = decl_index; + + @panic("TODO lowerUnnamedConst"); } const LowerConstResult = union(enum) { @@ -2311,44 +527,14 @@ fn lowerConst( sect_id: u8, src_loc: Module.SrcLoc, ) !LowerConstResult { - const gpa = self.base.comp.gpa; + _ = self; + _ = name; + _ = tv; + _ = required_alignment; + _ = sect_id; + _ = src_loc; - var code_buffer = std.ArrayList(u8).init(gpa); - defer code_buffer.deinit(); - - log.debug("allocating symbol indexes for {s}", .{name}); - - const sym_index = try self.allocateSymbol(); - const atom_index = try self.createAtom(sym_index, .{}); - try self.atom_by_index_table.putNoClobber(gpa, sym_index, atom_index); - - const res = try codegen.generateSymbol(&self.base, src_loc, tv, &code_buffer, .none, .{ - .parent_atom_index = self.getAtom(atom_index).getSymbolIndex().?, - }); - const code = switch (res) { - .ok => code_buffer.items, - .fail => |em| return .{ .fail = em }, - }; - - const atom = self.getAtomPtr(atom_index); - atom.size = code.len; - // TODO: work out logic for disambiguating functions from function pointers - // const sect_id = self.getDeclOutputSection(decl_index); - const symbol = atom.getSymbolPtr(self); - const name_str_index = try self.strtab.insert(gpa, name); - symbol.n_strx = name_str_index; - symbol.n_type = macho.N_SECT; - symbol.n_sect = sect_id + 1; - symbol.n_value = try self.allocateAtom(atom_index, code.len, required_alignment); - errdefer self.freeAtom(atom_index); - - log.debug("allocated atom for {s} at 0x{x}", .{ name, symbol.n_value }); - log.debug(" (required alignment 0x{x})", .{required_alignment}); - - try self.writeAtom(atom_index, code); - self.markRelocsDirtyByTarget(atom.getSymbolWithLoc()); - - return .{ .ok = atom_index }; + @panic("TODO lowerConst"); } pub fn updateDecl(self: *MachO, mod: *Module, decl_index: InternPool.DeclIndex) !void { @@ -2356,86 +542,11 @@ pub fn updateDecl(self: *MachO, mod: *Module, decl_index: InternPool.DeclIndex) @panic("Attempted to compile for object format that was disabled by build configuration"); } if (self.llvm_object) |llvm_object| return llvm_object.updateDecl(mod, decl_index); + const tracy = trace(@src()); defer tracy.end(); - const comp = self.base.comp; - const gpa = comp.gpa; - const decl = mod.declPtr(decl_index); - - if (decl.val.getExternFunc(mod)) |_| { - return; - } - - if (decl.isExtern(mod)) { - // TODO make this part of getGlobalSymbol - const name = mod.intern_pool.stringToSlice(decl.name); - const sym_name = try std.fmt.allocPrint(gpa, "_{s}", .{name}); - defer gpa.free(sym_name); - _ = try self.addUndefined(sym_name, .{ .add_got = true }); - return; - } - - const is_threadlocal = if (decl.val.getVariable(mod)) |variable| - variable.is_threadlocal and comp.config.any_non_single_threaded - else - false; - if (is_threadlocal) return self.updateThreadlocalVariable(mod, decl_index); - - const atom_index = try self.getOrCreateAtomForDecl(decl_index); - const sym_index = self.getAtom(atom_index).getSymbolIndex().?; - Atom.freeRelocations(self, atom_index); - - var code_buffer = std.ArrayList(u8).init(gpa); - defer code_buffer.deinit(); - - var decl_state: ?Dwarf.DeclState = if (self.d_sym) |*d_sym| - try d_sym.dwarf.initDeclState(mod, decl_index) - else - null; - defer if (decl_state) |*ds| ds.deinit(); - - const decl_val = if (decl.val.getVariable(mod)) |variable| Value.fromInterned(variable.init) else decl.val; - const res = if (decl_state) |*ds| - try codegen.generateSymbol(&self.base, decl.srcLoc(mod), .{ - .ty = decl.ty, - .val = decl_val, - }, &code_buffer, .{ - .dwarf = ds, - }, .{ - .parent_atom_index = sym_index, - }) - else - try codegen.generateSymbol(&self.base, decl.srcLoc(mod), .{ - .ty = decl.ty, - .val = decl_val, - }, &code_buffer, .none, .{ - .parent_atom_index = sym_index, - }); - - const code = switch (res) { - .ok => code_buffer.items, - .fail => |em| { - decl.analysis = .codegen_failure; - try mod.failed_decls.put(mod.gpa, decl_index, em); - return; - }, - }; - const addr = try self.updateDeclCode(decl_index, code); - - if (decl_state) |*ds| { - try self.d_sym.?.dwarf.commitDeclState( - mod, - decl_index, - addr, - self.getAtom(atom_index).size, - ds, - ); - } - - // Since we updated the vaddr and the size, each corresponding export symbol also - // needs to be updated. - try self.updateExports(mod, .{ .decl_index = decl_index }, mod.getDeclExports(decl_index)); + @panic("TODO updateDecl"); } fn updateLazySymbolAtom( @@ -2444,321 +555,36 @@ fn updateLazySymbolAtom( atom_index: Atom.Index, section_index: u8, ) !void { - const gpa = self.base.comp.gpa; - const mod = self.base.comp.module.?; - - var required_alignment: Alignment = .none; - var code_buffer = std.ArrayList(u8).init(gpa); - defer code_buffer.deinit(); - - const name_str_index = blk: { - const name = try std.fmt.allocPrint(gpa, "___lazy_{s}_{}", .{ - @tagName(sym.kind), - sym.ty.fmt(mod), - }); - defer gpa.free(name); - break :blk try self.strtab.insert(gpa, name); - }; - const name = self.strtab.get(name_str_index).?; - - const atom = self.getAtomPtr(atom_index); - const local_sym_index = atom.getSymbolIndex().?; - - const src = if (sym.ty.getOwnerDeclOrNull(mod)) |owner_decl| - mod.declPtr(owner_decl).srcLoc(mod) - else - Module.SrcLoc{ - .file_scope = undefined, - .parent_decl_node = undefined, - .lazy = .unneeded, - }; - const res = try codegen.generateLazySymbol( - &self.base, - src, - sym, - &required_alignment, - &code_buffer, - .none, - .{ .parent_atom_index = local_sym_index }, - ); - const code = switch (res) { - .ok => code_buffer.items, - .fail => |em| { - log.debug("{s}", .{em.msg}); - return error.CodegenFail; - }, - }; - - const symbol = atom.getSymbolPtr(self); - symbol.n_strx = name_str_index; - symbol.n_type = macho.N_SECT; - symbol.n_sect = section_index + 1; - symbol.n_desc = 0; - - const vaddr = try self.allocateAtom(atom_index, code.len, required_alignment); - errdefer self.freeAtom(atom_index); - - log.debug("allocated atom for {s} at 0x{x}", .{ name, vaddr }); - log.debug(" (required alignment 0x{x})", .{required_alignment}); - - atom.size = code.len; - symbol.n_value = vaddr; - - try self.addGotEntry(.{ .sym_index = local_sym_index }); - try self.writeAtom(atom_index, code); + _ = self; + _ = sym; + _ = atom_index; + _ = section_index; + @panic("TODO updateLazySymbolAtom"); } pub fn getOrCreateAtomForLazySymbol(self: *MachO, sym: File.LazySymbol) !Atom.Index { - const mod = self.base.comp.module.?; - const gpa = self.base.comp.gpa; - const gop = try self.lazy_syms.getOrPut(gpa, sym.getDecl(mod)); - errdefer _ = if (!gop.found_existing) self.lazy_syms.pop(); - if (!gop.found_existing) gop.value_ptr.* = .{}; - const metadata: struct { atom: *Atom.Index, state: *LazySymbolMetadata.State } = switch (sym.kind) { - .code => .{ .atom = &gop.value_ptr.text_atom, .state = &gop.value_ptr.text_state }, - .const_data => .{ - .atom = &gop.value_ptr.data_const_atom, - .state = &gop.value_ptr.data_const_state, - }, - }; - switch (metadata.state.*) { - .unused => { - const sym_index = try self.allocateSymbol(); - metadata.atom.* = try self.createAtom(sym_index, .{}); - try self.atom_by_index_table.putNoClobber(gpa, sym_index, metadata.atom.*); - }, - .pending_flush => return metadata.atom.*, - .flushed => {}, - } - metadata.state.* = .pending_flush; - const atom = metadata.atom.*; - // anyerror needs to be deferred until flushModule - if (sym.getDecl(mod) != .none) try self.updateLazySymbolAtom(sym, atom, switch (sym.kind) { - .code => self.text_section_index.?, - .const_data => self.data_const_section_index.?, - }); - return atom; -} - -fn updateThreadlocalVariable(self: *MachO, module: *Module, decl_index: InternPool.DeclIndex) !void { - const mod = self.base.comp.module.?; - // Lowering a TLV on macOS involves two stages: - // 1. first we lower the initializer into appopriate section (__thread_data or __thread_bss) - // 2. next, we create a corresponding threadlocal variable descriptor in __thread_vars - - // 1. Lower the initializer value. - const init_atom_index = try self.getOrCreateAtomForDecl(decl_index); - const init_atom = self.getAtomPtr(init_atom_index); - const init_sym_index = init_atom.getSymbolIndex().?; - Atom.freeRelocations(self, init_atom_index); - - const gpa = self.base.comp.gpa; - - var code_buffer = std.ArrayList(u8).init(gpa); - defer code_buffer.deinit(); - - var decl_state: ?Dwarf.DeclState = if (self.d_sym) |*d_sym| - try d_sym.dwarf.initDeclState(module, decl_index) - else - null; - defer if (decl_state) |*ds| ds.deinit(); - - const decl = module.declPtr(decl_index); - const decl_metadata = self.decls.get(decl_index).?; - const decl_val = Value.fromInterned(decl.val.getVariable(mod).?.init); - const res = if (decl_state) |*ds| - try codegen.generateSymbol(&self.base, decl.srcLoc(mod), .{ - .ty = decl.ty, - .val = decl_val, - }, &code_buffer, .{ - .dwarf = ds, - }, .{ - .parent_atom_index = init_sym_index, - }) - else - try codegen.generateSymbol(&self.base, decl.srcLoc(mod), .{ - .ty = decl.ty, - .val = decl_val, - }, &code_buffer, .none, .{ - .parent_atom_index = init_sym_index, - }); - - const code = switch (res) { - .ok => code_buffer.items, - .fail => |em| { - decl.analysis = .codegen_failure; - try module.failed_decls.put(module.gpa, decl_index, em); - return; - }, - }; - - const required_alignment = decl.getAlignment(mod); - - const decl_name = mod.intern_pool.stringToSlice(try decl.getFullyQualifiedName(module)); - - const init_sym_name = try std.fmt.allocPrint(gpa, "{s}$tlv$init", .{decl_name}); - defer gpa.free(init_sym_name); - - const sect_id = decl_metadata.section; - const init_sym = init_atom.getSymbolPtr(self); - init_sym.n_strx = try self.strtab.insert(gpa, init_sym_name); - init_sym.n_type = macho.N_SECT; - init_sym.n_sect = sect_id + 1; - init_sym.n_desc = 0; - init_atom.size = code.len; - - init_sym.n_value = try self.allocateAtom(init_atom_index, code.len, required_alignment); - errdefer self.freeAtom(init_atom_index); - - log.debug("allocated atom for {s} at 0x{x}", .{ init_sym_name, init_sym.n_value }); - log.debug(" (required alignment 0x{x})", .{required_alignment}); - - try self.writeAtom(init_atom_index, code); - - if (decl_state) |*ds| { - try self.d_sym.?.dwarf.commitDeclState( - module, - decl_index, - init_sym.n_value, - self.getAtom(init_atom_index).size, - ds, - ); - } - - try self.updateExports(module, .{ .decl_index = decl_index }, module.getDeclExports(decl_index)); - - // 2. Create a TLV descriptor. - const init_atom_sym_loc = init_atom.getSymbolWithLoc(); - const gop = try self.tlv_table.getOrPut(gpa, init_atom_sym_loc); - assert(!gop.found_existing); - gop.value_ptr.* = try self.createThreadLocalDescriptorAtom(decl_name, init_atom_sym_loc); - self.markRelocsDirtyByTarget(init_atom_sym_loc); + _ = self; + _ = sym; + @panic("TODO getOrCreateAtomForLazySymbol"); } pub fn getOrCreateAtomForDecl(self: *MachO, decl_index: InternPool.DeclIndex) !Atom.Index { - const gpa = self.base.comp.gpa; - const gop = try self.decls.getOrPut(gpa, decl_index); - if (!gop.found_existing) { - const sym_index = try self.allocateSymbol(); - const atom_index = try self.createAtom(sym_index, .{}); - try self.atom_by_index_table.putNoClobber(gpa, sym_index, atom_index); - gop.value_ptr.* = .{ - .atom = atom_index, - .section = self.getDeclOutputSection(decl_index), - .exports = .{}, - }; - } - return gop.value_ptr.atom; + _ = self; + _ = decl_index; + @panic("TODO getOrCreateAtomForDecl"); } fn getDeclOutputSection(self: *MachO, decl_index: InternPool.DeclIndex) u8 { - const decl = self.base.comp.module.?.declPtr(decl_index); - const ty = decl.ty; - const val = decl.val; - const mod = self.base.comp.module.?; - const zig_ty = ty.zigTypeTag(mod); - const any_non_single_threaded = self.base.comp.config.any_non_single_threaded; - const optimize_mode = self.base.comp.root_mod.optimize_mode; - const sect_id: u8 = blk: { - // TODO finish and audit this function - if (val.isUndefDeep(mod)) { - if (optimize_mode == .ReleaseFast or optimize_mode == .ReleaseSmall) { - @panic("TODO __DATA,__bss"); - } else { - break :blk self.data_section_index.?; - } - } - - if (val.getVariable(mod)) |variable| { - if (variable.is_threadlocal and any_non_single_threaded) { - break :blk self.thread_data_section_index.?; - } - break :blk self.data_section_index.?; - } - - switch (zig_ty) { - // TODO: what if this is a function pointer? - .Fn => break :blk self.text_section_index.?, - else => { - if (val.getVariable(mod)) |_| { - break :blk self.data_section_index.?; - } - break :blk self.data_const_section_index.?; - }, - } - }; - return sect_id; + _ = self; + _ = decl_index; + @panic("TODO getDeclOutputSection"); } fn updateDeclCode(self: *MachO, decl_index: InternPool.DeclIndex, code: []u8) !u64 { - const gpa = self.base.comp.gpa; - const mod = self.base.comp.module.?; - const decl = mod.declPtr(decl_index); - - const required_alignment = decl.getAlignment(mod); - - const decl_name = mod.intern_pool.stringToSlice(try decl.getFullyQualifiedName(mod)); - - const decl_metadata = self.decls.get(decl_index).?; - const atom_index = decl_metadata.atom; - const atom = self.getAtom(atom_index); - const sym_index = atom.getSymbolIndex().?; - const sect_id = decl_metadata.section; - const header = &self.sections.items(.header)[sect_id]; - const segment = self.getSegment(sect_id); - const code_len = code.len; - - if (atom.size != 0) { - const sym = atom.getSymbolPtr(self); - sym.n_strx = try self.strtab.insert(gpa, decl_name); - sym.n_type = macho.N_SECT; - sym.n_sect = sect_id + 1; - sym.n_desc = 0; - - const capacity = atom.capacity(self); - const need_realloc = code_len > capacity or !required_alignment.check(sym.n_value); - - if (need_realloc) { - const vaddr = try self.growAtom(atom_index, code_len, required_alignment); - log.debug("growing {s} and moving from 0x{x} to 0x{x}", .{ decl_name, sym.n_value, vaddr }); - log.debug(" (required alignment 0x{x})", .{required_alignment}); - - if (vaddr != sym.n_value) { - sym.n_value = vaddr; - log.debug(" (updating GOT entry)", .{}); - const got_atom_index = self.got_table.lookup.get(.{ .sym_index = sym_index }).?; - try self.writeOffsetTableEntry(got_atom_index); - self.markRelocsDirtyByTarget(.{ .sym_index = sym_index }); - } - } else if (code_len < atom.size) { - self.shrinkAtom(atom_index, code_len); - } else if (atom.next_index == null) { - const needed_size = (sym.n_value + code_len) - segment.vmaddr; - header.size = needed_size; - } - self.getAtomPtr(atom_index).size = code_len; - } else { - const sym = atom.getSymbolPtr(self); - sym.n_strx = try self.strtab.insert(gpa, decl_name); - sym.n_type = macho.N_SECT; - sym.n_sect = sect_id + 1; - sym.n_desc = 0; - - const vaddr = try self.allocateAtom(atom_index, code_len, required_alignment); - errdefer self.freeAtom(atom_index); - - log.debug("allocated atom for {s} at 0x{x}", .{ decl_name, vaddr }); - log.debug(" (required alignment 0x{x})", .{required_alignment}); - - self.getAtomPtr(atom_index).size = code_len; - sym.n_value = vaddr; - - try self.addGotEntry(.{ .sym_index = sym_index }); - } - - try self.writeAtom(atom_index, code); - - return atom.getSymbol(self).n_value; + _ = self; + _ = decl_index; + _ = code; + @panic("TODO updateDeclCode"); } pub fn updateDeclLineNumber(self: *MachO, module: *Module, decl_index: InternPool.DeclIndex) !void { @@ -2779,124 +605,7 @@ pub fn updateExports( if (self.llvm_object) |llvm_object| return llvm_object.updateExports(mod, exported, exports); - const tracy = trace(@src()); - defer tracy.end(); - - const gpa = self.base.comp.gpa; - - const metadata = switch (exported) { - .decl_index => |decl_index| blk: { - _ = try self.getOrCreateAtomForDecl(decl_index); - break :blk self.decls.getPtr(decl_index).?; - }, - .value => |value| self.anon_decls.getPtr(value) orelse blk: { - const first_exp = exports[0]; - const res = try self.lowerAnonDecl(value, .none, first_exp.getSrcLoc(mod)); - switch (res) { - .ok => {}, - .fail => |em| { - // TODO maybe it's enough to return an error here and let Module.processExportsInner - // handle the error? - try mod.failed_exports.ensureUnusedCapacity(mod.gpa, 1); - mod.failed_exports.putAssumeCapacityNoClobber(first_exp, em); - return; - }, - } - break :blk self.anon_decls.getPtr(value).?; - }, - }; - const atom_index = metadata.atom; - const atom = self.getAtom(atom_index); - const sym = atom.getSymbol(self); - - for (exports) |exp| { - const exp_name = try std.fmt.allocPrint(gpa, "_{}", .{ - exp.opts.name.fmt(&mod.intern_pool), - }); - defer gpa.free(exp_name); - - log.debug("adding new export '{s}'", .{exp_name}); - - if (exp.opts.section.unwrap()) |section_name| { - if (!mod.intern_pool.stringEqlSlice(section_name, "__text")) { - try mod.failed_exports.putNoClobber(mod.gpa, exp, try Module.ErrorMsg.create( - gpa, - exp.getSrcLoc(mod), - "Unimplemented: ExportOptions.section", - .{}, - )); - continue; - } - } - - if (exp.opts.linkage == .LinkOnce) { - try mod.failed_exports.putNoClobber(mod.gpa, exp, try Module.ErrorMsg.create( - gpa, - exp.getSrcLoc(mod), - "Unimplemented: GlobalLinkage.LinkOnce", - .{}, - )); - continue; - } - - const global_sym_index = metadata.getExport(self, exp_name) orelse blk: { - const global_sym_index = if (self.getGlobalIndex(exp_name)) |global_index| ind: { - const global = self.globals.items[global_index]; - // TODO this is just plain wrong as it all should happen in a single `resolveSymbols` - // pass. This will go away once we abstact away Zig's incremental compilation into - // its own module. - if (global.getFile() == null and self.getSymbol(global).undf()) { - _ = self.unresolved.swapRemove(global_index); - break :ind global.sym_index; - } - break :ind try self.allocateSymbol(); - } else try self.allocateSymbol(); - try metadata.exports.append(gpa, global_sym_index); - break :blk global_sym_index; - }; - const global_sym_loc = SymbolWithLoc{ .sym_index = global_sym_index }; - const global_sym = self.getSymbolPtr(global_sym_loc); - global_sym.* = .{ - .n_strx = try self.strtab.insert(gpa, exp_name), - .n_type = macho.N_SECT | macho.N_EXT, - .n_sect = metadata.section + 1, - .n_desc = 0, - .n_value = sym.n_value, - }; - - switch (exp.opts.linkage) { - .Internal => { - // Symbol should be hidden, or in MachO lingo, private extern. - // We should also mark the symbol as Weak: n_desc == N_WEAK_DEF. - global_sym.n_type |= macho.N_PEXT; - global_sym.n_desc |= macho.N_WEAK_DEF; - }, - .Strong => {}, - .Weak => { - // Weak linkage is specified as part of n_desc field. - // Symbol's n_type is like for a symbol with strong linkage. - global_sym.n_desc |= macho.N_WEAK_DEF; - }, - else => unreachable, - } - - self.resolveGlobalSymbol(global_sym_loc) catch |err| switch (err) { - error.MultipleSymbolDefinitions => { - // TODO: this needs rethinking - const global = self.getGlobal(exp_name).?; - if (global_sym_loc.sym_index != global.sym_index and global.getFile() != null) { - _ = try mod.failed_exports.put(mod.gpa, exp, try Module.ErrorMsg.create( - gpa, - exp.getSrcLoc(mod), - \\LinkError: symbol '{s}' defined multiple times - , - .{exp_name}, - )); - } - }, - else => |e| return e, - }; - } + @panic("TODO updateExports"); } pub fn deleteDeclExport( @@ -2905,82 +614,27 @@ pub fn deleteDeclExport( name: InternPool.NullTerminatedString, ) Allocator.Error!void { if (self.llvm_object) |_| return; - const metadata = self.decls.getPtr(decl_index) orelse return; - - const gpa = self.base.comp.gpa; - const mod = self.base.comp.module.?; - const exp_name = try std.fmt.allocPrint(gpa, "_{s}", .{mod.intern_pool.stringToSlice(name)}); - defer gpa.free(exp_name); - const sym_index = metadata.getExportPtr(self, exp_name) orelse return; - - const sym_loc = SymbolWithLoc{ .sym_index = sym_index.* }; - const sym = self.getSymbolPtr(sym_loc); - log.debug("deleting export '{s}'", .{exp_name}); - assert(sym.sect() and sym.ext()); - sym.* = .{ - .n_strx = 0, - .n_type = 0, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }; - self.locals_free_list.append(gpa, sym_index.*) catch {}; - - if (self.resolver.fetchRemove(exp_name)) |entry| { - defer gpa.free(entry.key); - self.globals_free_list.append(gpa, entry.value) catch {}; - self.globals.items[entry.value] = .{ .sym_index = 0 }; - } - - sym_index.* = 0; + _ = decl_index; + _ = name; + @panic("TODO deleteDeclExport"); } fn freeUnnamedConsts(self: *MachO, decl_index: InternPool.DeclIndex) void { - const gpa = self.base.comp.gpa; - const unnamed_consts = self.unnamed_const_atoms.getPtr(decl_index) orelse return; - for (unnamed_consts.items) |atom| { - self.freeAtom(atom); - } - unnamed_consts.clearAndFree(gpa); + _ = self; + _ = decl_index; + @panic("TODO freeUnnamedConst"); } pub fn freeDecl(self: *MachO, decl_index: InternPool.DeclIndex) void { if (self.llvm_object) |llvm_object| return llvm_object.freeDecl(decl_index); - const gpa = self.base.comp.gpa; - const mod = self.base.comp.module.?; - const decl = mod.declPtr(decl_index); - - log.debug("freeDecl {*}", .{decl}); - - if (self.decls.fetchSwapRemove(decl_index)) |const_kv| { - var kv = const_kv; - self.freeAtom(kv.value.atom); - self.freeUnnamedConsts(decl_index); - kv.value.exports.deinit(gpa); - } - - if (self.d_sym) |*d_sym| { - d_sym.dwarf.freeDecl(decl_index); - } + @panic("TODO freeDecl"); } pub fn getDeclVAddr(self: *MachO, decl_index: InternPool.DeclIndex, reloc_info: File.RelocInfo) !u64 { assert(self.llvm_object == null); - - const this_atom_index = try self.getOrCreateAtomForDecl(decl_index); - const sym_index = self.getAtom(this_atom_index).getSymbolIndex().?; - const atom_index = self.getAtomIndexForSymbol(.{ .sym_index = reloc_info.parent_atom_index }).?; - try Atom.addRelocation(self, atom_index, .{ - .type = .unsigned, - .target = .{ .sym_index = sym_index }, - .offset = @as(u32, @intCast(reloc_info.offset)), - .addend = reloc_info.addend, - .pcrel = false, - .length = 3, - }); - try Atom.addRebase(self, atom_index, @as(u32, @intCast(reloc_info.offset))); - - return 0; + _ = decl_index; + _ = reloc_info; + @panic("TODO getDeclVAddr"); } pub fn lowerAnonDecl( @@ -2989,1771 +643,25 @@ pub fn lowerAnonDecl( explicit_alignment: InternPool.Alignment, src_loc: Module.SrcLoc, ) !codegen.Result { - const gpa = self.base.comp.gpa; - const mod = self.base.comp.module.?; - const ty = Type.fromInterned(mod.intern_pool.typeOf(decl_val)); - const decl_alignment = switch (explicit_alignment) { - .none => ty.abiAlignment(mod), - else => explicit_alignment, - }; - if (self.anon_decls.get(decl_val)) |metadata| { - const existing_addr = self.getAtom(metadata.atom).getSymbol(self).n_value; - if (decl_alignment.check(existing_addr)) - return .ok; - } - - const val = Value.fromInterned(decl_val); - const tv = TypedValue{ .ty = ty, .val = val }; - var name_buf: [32]u8 = undefined; - const name = std.fmt.bufPrint(&name_buf, "__anon_{d}", .{ - @intFromEnum(decl_val), - }) catch unreachable; - const res = self.lowerConst( - name, - tv, - decl_alignment, - self.data_const_section_index.?, - src_loc, - ) catch |err| switch (err) { - error.OutOfMemory => return error.OutOfMemory, - else => |e| return .{ .fail = try Module.ErrorMsg.create( - gpa, - src_loc, - "unable to lower constant value: {s}", - .{@errorName(e)}, - ) }, - }; - const atom_index = switch (res) { - .ok => |atom_index| atom_index, - .fail => |em| return .{ .fail = em }, - }; - try self.anon_decls.put(gpa, decl_val, .{ - .atom = atom_index, - .section = self.data_const_section_index.?, - }); - return .ok; + _ = self; + _ = decl_val; + _ = explicit_alignment; + _ = src_loc; + @panic("TODO lowerAnonDecl"); } pub fn getAnonDeclVAddr(self: *MachO, decl_val: InternPool.Index, reloc_info: link.File.RelocInfo) !u64 { assert(self.llvm_object == null); - - const this_atom_index = self.anon_decls.get(decl_val).?.atom; - const sym_index = self.getAtom(this_atom_index).getSymbolIndex().?; - const atom_index = self.getAtomIndexForSymbol(.{ .sym_index = reloc_info.parent_atom_index }).?; - try Atom.addRelocation(self, atom_index, .{ - .type = .unsigned, - .target = .{ .sym_index = sym_index }, - .offset = @as(u32, @intCast(reloc_info.offset)), - .addend = reloc_info.addend, - .pcrel = false, - .length = 3, - }); - try Atom.addRebase(self, atom_index, @as(u32, @intCast(reloc_info.offset))); - - return 0; -} - -const PopulateMissingMetadataOptions = struct { - symbol_count_hint: u64, - program_code_size_hint: u64, -}; - -fn populateMissingMetadata(self: *MachO, options: PopulateMissingMetadataOptions) !void { - assert(self.mode == .incremental); - - const comp = self.base.comp; - const gpa = comp.gpa; - const target = comp.root_mod.resolved_target.result; - const cpu_arch = target.cpu.arch; - const pagezero_vmsize = self.calcPagezeroSize(); - - if (self.pagezero_segment_cmd_index == null) { - if (pagezero_vmsize > 0) { - self.pagezero_segment_cmd_index = @as(u8, @intCast(self.segments.items.len)); - try self.segments.append(gpa, .{ - .segname = makeStaticString("__PAGEZERO"), - .vmsize = pagezero_vmsize, - .cmdsize = @sizeOf(macho.segment_command_64), - }); - } - } - - if (self.header_segment_cmd_index == null) { - // The first __TEXT segment is immovable and covers MachO header and load commands. - self.header_segment_cmd_index = @as(u8, @intCast(self.segments.items.len)); - const ideal_size = self.headerpad_size; - const needed_size = mem.alignForward(u64, padToIdeal(ideal_size), getPageSize(cpu_arch)); - - log.debug("found __TEXT segment (header-only) free space 0x{x} to 0x{x}", .{ 0, needed_size }); - - try self.segments.append(gpa, .{ - .segname = makeStaticString("__TEXT"), - .vmaddr = pagezero_vmsize, - .vmsize = needed_size, - .filesize = needed_size, - .maxprot = macho.PROT.READ | macho.PROT.EXEC, - .initprot = macho.PROT.READ | macho.PROT.EXEC, - .cmdsize = @sizeOf(macho.segment_command_64), - }); - self.segment_table_dirty = true; - } - - if (self.text_section_index == null) { - // Sadly, segments need unique string identfiers for some reason. - self.text_section_index = try self.allocateSection("__TEXT1", "__text", .{ - .size = options.program_code_size_hint, - .alignment = switch (cpu_arch) { - .x86_64 => 1, - .aarch64 => @sizeOf(u32), - else => unreachable, // unhandled architecture type - }, - .flags = macho.S_REGULAR | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS, - .prot = macho.PROT.READ | macho.PROT.EXEC, - }); - self.segment_table_dirty = true; - } - - if (self.stubs_section_index == null) { - const stub_size = stubs.stubSize(cpu_arch); - self.stubs_section_index = try self.allocateSection("__TEXT2", "__stubs", .{ - .size = stub_size, - .alignment = stubs.stubAlignment(cpu_arch), - .flags = macho.S_SYMBOL_STUBS | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS, - .reserved2 = stub_size, - .prot = macho.PROT.READ | macho.PROT.EXEC, - }); - self.segment_table_dirty = true; - } - - if (self.stub_helper_section_index == null) { - self.stub_helper_section_index = try self.allocateSection("__TEXT3", "__stub_helper", .{ - .size = @sizeOf(u32), - .alignment = stubs.stubAlignment(cpu_arch), - .flags = macho.S_REGULAR | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS, - .prot = macho.PROT.READ | macho.PROT.EXEC, - }); - self.segment_table_dirty = true; - } - - if (self.got_section_index == null) { - self.got_section_index = try self.allocateSection("__DATA_CONST", "__got", .{ - .size = @sizeOf(u64) * options.symbol_count_hint, - .alignment = @alignOf(u64), - .flags = macho.S_NON_LAZY_SYMBOL_POINTERS, - .prot = macho.PROT.READ | macho.PROT.WRITE, - }); - self.segment_table_dirty = true; - } - - if (self.data_const_section_index == null) { - self.data_const_section_index = try self.allocateSection("__DATA_CONST1", "__const", .{ - .size = @sizeOf(u64), - .alignment = @alignOf(u64), - .flags = macho.S_REGULAR, - .prot = macho.PROT.READ | macho.PROT.WRITE, - }); - self.segment_table_dirty = true; - } - - if (self.la_symbol_ptr_section_index == null) { - self.la_symbol_ptr_section_index = try self.allocateSection("__DATA", "__la_symbol_ptr", .{ - .size = @sizeOf(u64), - .alignment = @alignOf(u64), - .flags = macho.S_LAZY_SYMBOL_POINTERS, - .prot = macho.PROT.READ | macho.PROT.WRITE, - }); - self.segment_table_dirty = true; - } - - if (self.data_section_index == null) { - self.data_section_index = try self.allocateSection("__DATA1", "__data", .{ - .size = @sizeOf(u64), - .alignment = @alignOf(u64), - .flags = macho.S_REGULAR, - .prot = macho.PROT.READ | macho.PROT.WRITE, - }); - self.segment_table_dirty = true; - } - - if (comp.config.any_non_single_threaded) { - if (self.thread_vars_section_index == null) { - self.thread_vars_section_index = try self.allocateSection("__DATA2", "__thread_vars", .{ - .size = @sizeOf(u64) * 3, - .alignment = @sizeOf(u64), - .flags = macho.S_THREAD_LOCAL_VARIABLES, - .prot = macho.PROT.READ | macho.PROT.WRITE, - }); - self.segment_table_dirty = true; - } - - if (self.thread_data_section_index == null) { - self.thread_data_section_index = try self.allocateSection("__DATA3", "__thread_data", .{ - .size = @sizeOf(u64), - .alignment = @alignOf(u64), - .flags = macho.S_THREAD_LOCAL_REGULAR, - .prot = macho.PROT.READ | macho.PROT.WRITE, - }); - self.segment_table_dirty = true; - } - } - - if (self.linkedit_segment_cmd_index == null) { - self.linkedit_segment_cmd_index = @as(u8, @intCast(self.segments.items.len)); - - try self.segments.append(gpa, .{ - .segname = makeStaticString("__LINKEDIT"), - .maxprot = macho.PROT.READ, - .initprot = macho.PROT.READ, - .cmdsize = @sizeOf(macho.segment_command_64), - }); - } -} - -fn calcPagezeroSize(self: *MachO) u64 { - const output_mode = self.base.comp.config.output_mode; - const target = self.base.comp.root_mod.resolved_target.result; - const page_size = getPageSize(target.cpu.arch); - const aligned_pagezero_vmsize = mem.alignBackward(u64, self.pagezero_vmsize, page_size); - if (output_mode == .Lib) return 0; - if (aligned_pagezero_vmsize == 0) return 0; - if (aligned_pagezero_vmsize != self.pagezero_vmsize) { - log.warn("requested __PAGEZERO size (0x{x}) is not page aligned", .{self.pagezero_vmsize}); - log.warn(" rounding down to 0x{x}", .{aligned_pagezero_vmsize}); - } - return aligned_pagezero_vmsize; -} - -const InitSectionOpts = struct { - flags: u32 = macho.S_REGULAR, - reserved1: u32 = 0, - reserved2: u32 = 0, -}; - -pub fn initSection(self: *MachO, segname: []const u8, sectname: []const u8, opts: InitSectionOpts) !u8 { - log.debug("creating section '{s},{s}'", .{ segname, sectname }); - const index = @as(u8, @intCast(self.sections.slice().len)); - const gpa = self.base.comp.gpa; - try self.sections.append(gpa, .{ - .segment_index = undefined, // Segments will be created automatically later down the pipeline - .header = .{ - .sectname = makeStaticString(sectname), - .segname = makeStaticString(segname), - .flags = opts.flags, - .reserved1 = opts.reserved1, - .reserved2 = opts.reserved2, - }, - }); - return index; -} - -fn allocateSection(self: *MachO, segname: []const u8, sectname: []const u8, opts: struct { - size: u64 = 0, - alignment: u32 = 0, - prot: macho.vm_prot_t = macho.PROT.NONE, - flags: u32 = macho.S_REGULAR, - reserved2: u32 = 0, -}) !u8 { - const gpa = self.base.comp.gpa; - const target = self.base.comp.root_mod.resolved_target.result; - const page_size = getPageSize(target.cpu.arch); - // In incremental context, we create one section per segment pairing. This way, - // we can move the segment in raw file as we please. - const segment_id = @as(u8, @intCast(self.segments.items.len)); - const vmaddr = blk: { - const prev_segment = self.segments.items[segment_id - 1]; - break :blk mem.alignForward(u64, prev_segment.vmaddr + prev_segment.vmsize, page_size); - }; - // We commit more memory than needed upfront so that we don't have to reallocate too soon. - const vmsize = mem.alignForward(u64, opts.size, page_size); - const off = self.findFreeSpace(opts.size, page_size); - - log.debug("found {s},{s} free space 0x{x} to 0x{x} (0x{x} - 0x{x})", .{ - segname, - sectname, - off, - off + opts.size, - vmaddr, - vmaddr + vmsize, - }); - - const seg = try self.segments.addOne(gpa); - seg.* = .{ - .segname = makeStaticString(segname), - .vmaddr = vmaddr, - .vmsize = vmsize, - .fileoff = off, - .filesize = vmsize, - .maxprot = opts.prot, - .initprot = opts.prot, - .nsects = 1, - .cmdsize = @sizeOf(macho.segment_command_64) + @sizeOf(macho.section_64), - }; - - const sect_id = try self.initSection(segname, sectname, .{ - .flags = opts.flags, - .reserved2 = opts.reserved2, - }); - const section = &self.sections.items(.header)[sect_id]; - section.addr = mem.alignForward(u64, vmaddr, opts.alignment); - section.offset = mem.alignForward(u32, @as(u32, @intCast(off)), opts.alignment); - section.size = opts.size; - section.@"align" = math.log2(opts.alignment); - self.sections.items(.segment_index)[sect_id] = segment_id; - assert(!section.isZerofill()); // TODO zerofill sections - - return sect_id; -} - -fn growSection(self: *MachO, sect_id: u8, needed_size: u64) !void { - const header = &self.sections.items(.header)[sect_id]; - const segment_index = self.sections.items(.segment_index)[sect_id]; - const segment = &self.segments.items[segment_index]; - const maybe_last_atom_index = self.sections.items(.last_atom_index)[sect_id]; - const sect_capacity = self.allocatedSize(header.offset); - const target = self.base.comp.root_mod.resolved_target.result; - const page_size = getPageSize(target.cpu.arch); - - if (needed_size > sect_capacity) { - const new_offset = self.findFreeSpace(needed_size, page_size); - const current_size = if (maybe_last_atom_index) |last_atom_index| blk: { - const last_atom = self.getAtom(last_atom_index); - const sym = last_atom.getSymbol(self); - break :blk (sym.n_value + last_atom.size) - segment.vmaddr; - } else header.size; - - log.debug("moving {s},{s} from 0x{x} to 0x{x}", .{ - header.segName(), - header.sectName(), - header.offset, - new_offset, - }); - - const amt = try self.base.file.?.copyRangeAll( - header.offset, - self.base.file.?, - new_offset, - current_size, - ); - if (amt != current_size) return error.InputOutput; - header.offset = @as(u32, @intCast(new_offset)); - segment.fileoff = new_offset; - } - - const sect_vm_capacity = self.allocatedVirtualSize(segment.vmaddr); - if (needed_size > sect_vm_capacity) { - self.markRelocsDirtyByAddress(segment.vmaddr + segment.vmsize); - try self.growSectionVirtualMemory(sect_id, needed_size); - } - - header.size = needed_size; - segment.filesize = mem.alignForward(u64, needed_size, page_size); - segment.vmsize = mem.alignForward(u64, needed_size, page_size); -} - -fn growSectionVirtualMemory(self: *MachO, sect_id: u8, needed_size: u64) !void { - const target = self.base.comp.root_mod.resolved_target.result; - const page_size = getPageSize(target.cpu.arch); - const header = &self.sections.items(.header)[sect_id]; - const segment = self.getSegmentPtr(sect_id); - const increased_size = padToIdeal(needed_size); - const old_aligned_end = segment.vmaddr + segment.vmsize; - const new_aligned_end = segment.vmaddr + mem.alignForward(u64, increased_size, page_size); - const diff = new_aligned_end - old_aligned_end; - log.debug("shifting every segment after {s},{s} in virtual memory by {x}", .{ - header.segName(), - header.sectName(), - diff, - }); - - // TODO: enforce order by increasing VM addresses in self.sections container. - for (self.sections.items(.header)[sect_id + 1 ..], 0..) |*next_header, next_sect_id| { - const index = @as(u8, @intCast(sect_id + 1 + next_sect_id)); - const next_segment = self.getSegmentPtr(index); - next_header.addr += diff; - next_segment.vmaddr += diff; - - const maybe_last_atom_index = &self.sections.items(.last_atom_index)[index]; - if (maybe_last_atom_index.*) |last_atom_index| { - var atom_index = last_atom_index; - while (true) { - const atom = self.getAtom(atom_index); - const sym = atom.getSymbolPtr(self); - sym.n_value += diff; - - if (atom.prev_index) |prev_index| { - atom_index = prev_index; - } else break; - } - } - } -} - -pub fn addAtomToSection(self: *MachO, atom_index: Atom.Index) void { - assert(self.mode == .zld); - const atom = self.getAtomPtr(atom_index); - const sym = self.getSymbol(atom.getSymbolWithLoc()); - var section = self.sections.get(sym.n_sect - 1); - if (section.header.size > 0) { - const last_atom = self.getAtomPtr(section.last_atom_index.?); - last_atom.next_index = atom_index; - atom.prev_index = section.last_atom_index; - } else { - section.first_atom_index = atom_index; - } - section.last_atom_index = atom_index; - section.header.size += atom.size; - self.sections.set(sym.n_sect - 1, section); -} - -fn allocateAtom(self: *MachO, atom_index: Atom.Index, new_atom_size: u64, alignment: Alignment) !u64 { - const tracy = trace(@src()); - defer tracy.end(); - - assert(self.mode == .incremental); - - const atom = self.getAtom(atom_index); - const sect_id = atom.getSymbol(self).n_sect - 1; - const segment = self.getSegmentPtr(sect_id); - const header = &self.sections.items(.header)[sect_id]; - const free_list = &self.sections.items(.free_list)[sect_id]; - const maybe_last_atom_index = &self.sections.items(.last_atom_index)[sect_id]; - const requires_padding = blk: { - if (!header.isCode()) break :blk false; - if (header.isSymbolStubs()) break :blk false; - if (mem.eql(u8, "__stub_helper", header.sectName())) break :blk false; - break :blk true; - }; - const new_atom_ideal_capacity = if (requires_padding) padToIdeal(new_atom_size) else new_atom_size; - - // We use these to indicate our intention to update metadata, placing the new atom, - // and possibly removing a free list node. - // It would be simpler to do it inside the for loop below, but that would cause a - // problem if an error was returned later in the function. So this action - // is actually carried out at the end of the function, when errors are no longer possible. - var atom_placement: ?Atom.Index = null; - var free_list_removal: ?usize = null; - - // First we look for an appropriately sized free list node. - // The list is unordered. We'll just take the first thing that works. - const vaddr = blk: { - var i: usize = 0; - while (i < free_list.items.len) { - const big_atom_index = free_list.items[i]; - const big_atom = self.getAtom(big_atom_index); - // We now have a pointer to a live atom that has too much capacity. - // Is it enough that we could fit this new atom? - const sym = big_atom.getSymbol(self); - const capacity = big_atom.capacity(self); - const ideal_capacity = if (requires_padding) padToIdeal(capacity) else capacity; - const ideal_capacity_end_vaddr = math.add(u64, sym.n_value, ideal_capacity) catch ideal_capacity; - const capacity_end_vaddr = sym.n_value + capacity; - const new_start_vaddr_unaligned = capacity_end_vaddr - new_atom_ideal_capacity; - const new_start_vaddr = alignment.backward(new_start_vaddr_unaligned); - if (new_start_vaddr < ideal_capacity_end_vaddr) { - // Additional bookkeeping here to notice if this free list node - // should be deleted because the atom that it points to has grown to take up - // more of the extra capacity. - if (!big_atom.freeListEligible(self)) { - _ = free_list.swapRemove(i); - } else { - i += 1; - } - continue; - } - // At this point we know that we will place the new atom here. But the - // remaining question is whether there is still yet enough capacity left - // over for there to still be a free list node. - const remaining_capacity = new_start_vaddr - ideal_capacity_end_vaddr; - const keep_free_list_node = remaining_capacity >= min_text_capacity; - - // Set up the metadata to be updated, after errors are no longer possible. - atom_placement = big_atom_index; - if (!keep_free_list_node) { - free_list_removal = i; - } - break :blk new_start_vaddr; - } else if (maybe_last_atom_index.*) |last_index| { - const last = self.getAtom(last_index); - const last_symbol = last.getSymbol(self); - const ideal_capacity = if (requires_padding) padToIdeal(last.size) else last.size; - const ideal_capacity_end_vaddr = last_symbol.n_value + ideal_capacity; - const new_start_vaddr = alignment.forward(ideal_capacity_end_vaddr); - atom_placement = last_index; - break :blk new_start_vaddr; - } else { - break :blk alignment.forward(segment.vmaddr); - } - }; - - const expand_section = if (atom_placement) |placement_index| - self.getAtom(placement_index).next_index == null - else - true; - if (expand_section) { - const needed_size = (vaddr + new_atom_size) - segment.vmaddr; - try self.growSection(sect_id, needed_size); - maybe_last_atom_index.* = atom_index; - self.segment_table_dirty = true; - } - - assert(alignment != .none); - header.@"align" = @min(header.@"align", @intFromEnum(alignment)); - self.getAtomPtr(atom_index).size = new_atom_size; - - if (atom.prev_index) |prev_index| { - const prev = self.getAtomPtr(prev_index); - prev.next_index = atom.next_index; - } - if (atom.next_index) |next_index| { - const next = self.getAtomPtr(next_index); - next.prev_index = atom.prev_index; - } - - if (atom_placement) |big_atom_index| { - const big_atom = self.getAtomPtr(big_atom_index); - const atom_ptr = self.getAtomPtr(atom_index); - atom_ptr.prev_index = big_atom_index; - atom_ptr.next_index = big_atom.next_index; - big_atom.next_index = atom_index; - } else { - const atom_ptr = self.getAtomPtr(atom_index); - atom_ptr.prev_index = null; - atom_ptr.next_index = null; - } - if (free_list_removal) |i| { - _ = free_list.swapRemove(i); - } - - return vaddr; + _ = decl_val; + _ = reloc_info; + @panic("TODO getAnonDeclVAddr"); } pub fn getGlobalSymbol(self: *MachO, name: []const u8, lib_name: ?[]const u8) !u32 { + _ = self; + _ = name; _ = lib_name; - const gpa = self.base.comp.gpa; - const sym_name = try std.fmt.allocPrint(gpa, "_{s}", .{name}); - defer gpa.free(sym_name); - return self.addUndefined(sym_name, .{ .add_stub = true }); -} - -pub fn writeSegmentHeaders(self: *MachO, writer: anytype) !void { - for (self.segments.items, 0..) |seg, i| { - const indexes = self.getSectionIndexes(@intCast(i)); - var out_seg = seg; - out_seg.cmdsize = @sizeOf(macho.segment_command_64); - out_seg.nsects = 0; - - // Update section headers count; any section with size of 0 is excluded - // since it doesn't have any data in the final binary file. - for (self.sections.items(.header)[indexes.start..indexes.end]) |header| { - if (header.size == 0) continue; - out_seg.cmdsize += @sizeOf(macho.section_64); - out_seg.nsects += 1; - } - - if (out_seg.nsects == 0 and - (mem.eql(u8, out_seg.segName(), "__DATA_CONST") or - mem.eql(u8, out_seg.segName(), "__DATA"))) continue; - - try writer.writeStruct(out_seg); - for (self.sections.items(.header)[indexes.start..indexes.end]) |header| { - if (header.size == 0) continue; - try writer.writeStruct(header); - } - } -} - -pub fn writeLinkeditSegmentData(self: *MachO) !void { - const target = self.base.comp.root_mod.resolved_target.result; - const page_size = getPageSize(target.cpu.arch); - const seg = self.getLinkeditSegmentPtr(); - seg.filesize = 0; - seg.vmsize = 0; - - for (self.segments.items, 0..) |segment, id| { - if (self.linkedit_segment_cmd_index.? == @as(u8, @intCast(id))) continue; - if (seg.vmaddr < segment.vmaddr + segment.vmsize) { - seg.vmaddr = mem.alignForward(u64, segment.vmaddr + segment.vmsize, page_size); - } - if (seg.fileoff < segment.fileoff + segment.filesize) { - seg.fileoff = mem.alignForward(u64, segment.fileoff + segment.filesize, page_size); - } - } - - try self.writeDyldInfoData(); - // TODO handle this better - if (self.mode == .zld) { - try self.writeFunctionStarts(); - try self.writeDataInCode(); - } - try self.writeSymtabs(); - - seg.vmsize = mem.alignForward(u64, seg.filesize, page_size); -} - -fn collectRebaseDataFromTableSection(self: *MachO, sect_id: u8, rebase: *Rebase, table: anytype) !void { - const gpa = self.base.comp.gpa; - const header = self.sections.items(.header)[sect_id]; - const segment_index = self.sections.items(.segment_index)[sect_id]; - const segment = self.segments.items[segment_index]; - const base_offset = header.addr - segment.vmaddr; - const is_got = if (self.got_section_index) |index| index == sect_id else false; - - try rebase.entries.ensureUnusedCapacity(gpa, table.entries.items.len); - - for (table.entries.items, 0..) |entry, i| { - if (!table.lookup.contains(entry)) continue; - const sym = self.getSymbol(entry); - if (is_got and sym.undf()) continue; - const offset = i * @sizeOf(u64); - log.debug(" | rebase at {x}", .{base_offset + offset}); - rebase.entries.appendAssumeCapacity(.{ - .offset = base_offset + offset, - .segment_id = segment_index, - }); - } -} - -fn collectRebaseData(self: *MachO, rebase: *Rebase) !void { - const gpa = self.base.comp.gpa; - const slice = self.sections.slice(); - - for (self.rebases.keys(), 0..) |atom_index, i| { - const atom = self.getAtom(atom_index); - log.debug(" ATOM(%{?d}, '{s}')", .{ atom.getSymbolIndex(), atom.getName(self) }); - - const sym = atom.getSymbol(self); - const segment_index = slice.items(.segment_index)[sym.n_sect - 1]; - const seg = self.getSegment(sym.n_sect - 1); - - const base_offset = sym.n_value - seg.vmaddr; - - const rebases = self.rebases.values()[i]; - try rebase.entries.ensureUnusedCapacity(gpa, rebases.items.len); - - for (rebases.items) |offset| { - log.debug(" | rebase at {x}", .{base_offset + offset}); - - rebase.entries.appendAssumeCapacity(.{ - .offset = base_offset + offset, - .segment_id = segment_index, - }); - } - } - - // Unpack GOT entries - if (self.got_section_index) |sect_id| { - try self.collectRebaseDataFromTableSection(sect_id, rebase, self.got_table); - } - - // Next, unpack __la_symbol_ptr entries - if (self.la_symbol_ptr_section_index) |sect_id| { - try self.collectRebaseDataFromTableSection(sect_id, rebase, self.stub_table); - } - - // Finally, unpack the rest. - const target = self.base.comp.root_mod.resolved_target.result; - const cpu_arch = target.cpu.arch; - for (self.objects.items) |*object| { - for (object.atoms.items) |atom_index| { - const atom = self.getAtom(atom_index); - const sym = self.getSymbol(atom.getSymbolWithLoc()); - if (sym.n_desc == N_DEAD) continue; - if (sym.n_desc == N_BOUNDARY) continue; - - const sect_id = sym.n_sect - 1; - const section = self.sections.items(.header)[sect_id]; - const segment_id = self.sections.items(.segment_index)[sect_id]; - const segment = self.segments.items[segment_id]; - if (segment.maxprot & macho.PROT.WRITE == 0) continue; - switch (section.type()) { - macho.S_LITERAL_POINTERS, - macho.S_REGULAR, - macho.S_MOD_INIT_FUNC_POINTERS, - macho.S_MOD_TERM_FUNC_POINTERS, - => {}, - else => continue, - } - - log.debug(" ATOM({d}, %{d}, '{s}')", .{ - atom_index, - atom.sym_index, - self.getSymbolName(atom.getSymbolWithLoc()), - }); - - const code = Atom.getAtomCode(self, atom_index); - const relocs = Atom.getAtomRelocs(self, atom_index); - const ctx = Atom.getRelocContext(self, atom_index); - - for (relocs) |rel| { - switch (cpu_arch) { - .aarch64 => { - const rel_type = @as(macho.reloc_type_arm64, @enumFromInt(rel.r_type)); - if (rel_type != .ARM64_RELOC_UNSIGNED) continue; - if (rel.r_length != 3) continue; - }, - .x86_64 => { - const rel_type = @as(macho.reloc_type_x86_64, @enumFromInt(rel.r_type)); - if (rel_type != .X86_64_RELOC_UNSIGNED) continue; - if (rel.r_length != 3) continue; - }, - else => unreachable, - } - const reloc_target = Atom.parseRelocTarget(self, .{ - .object_id = atom.getFile().?, - .rel = rel, - .code = code, - .base_offset = ctx.base_offset, - .base_addr = ctx.base_addr, - }); - const target_sym = self.getSymbol(reloc_target); - if (target_sym.undf()) continue; - - const base_offset = @as(i32, @intCast(sym.n_value - segment.vmaddr)); - const rel_offset = rel.r_address - ctx.base_offset; - const offset = @as(u64, @intCast(base_offset + rel_offset)); - log.debug(" | rebase at {x}", .{offset}); - - try rebase.entries.append(gpa, .{ - .offset = offset, - .segment_id = segment_id, - }); - } - } - } - - try rebase.finalize(gpa); -} - -fn collectBindDataFromTableSection(self: *MachO, sect_id: u8, bind: anytype, table: anytype) !void { - const gpa = self.base.comp.gpa; - const header = self.sections.items(.header)[sect_id]; - const segment_index = self.sections.items(.segment_index)[sect_id]; - const segment = self.segments.items[segment_index]; - const base_offset = header.addr - segment.vmaddr; - - try bind.entries.ensureUnusedCapacity(gpa, table.entries.items.len); - - for (table.entries.items, 0..) |entry, i| { - if (!table.lookup.contains(entry)) continue; - const bind_sym = self.getSymbol(entry); - if (!bind_sym.undf()) continue; - const offset = i * @sizeOf(u64); - log.debug(" | bind at {x}, import('{s}') in dylib({d})", .{ - base_offset + offset, - self.getSymbolName(entry), - @divTrunc(@as(i16, @bitCast(bind_sym.n_desc)), macho.N_SYMBOL_RESOLVER), - }); - if (bind_sym.weakRef()) { - log.debug(" | marking as weak ref ", .{}); - } - bind.entries.appendAssumeCapacity(.{ - .target = entry, - .offset = base_offset + offset, - .segment_id = segment_index, - .addend = 0, - }); - } -} - -fn collectBindData(self: *MachO, bind: anytype, raw_bindings: anytype) !void { - const gpa = self.base.comp.gpa; - const slice = self.sections.slice(); - - for (raw_bindings.keys(), 0..) |atom_index, i| { - const atom = self.getAtom(atom_index); - log.debug(" ATOM(%{?d}, '{s}')", .{ atom.getSymbolIndex(), atom.getName(self) }); - - const sym = atom.getSymbol(self); - const segment_index = slice.items(.segment_index)[sym.n_sect - 1]; - const seg = self.getSegment(sym.n_sect - 1); - - const base_offset = sym.n_value - seg.vmaddr; - - const bindings = raw_bindings.values()[i]; - try bind.entries.ensureUnusedCapacity(gpa, bindings.items.len); - - for (bindings.items) |binding| { - const bind_sym = self.getSymbol(binding.target); - const bind_sym_name = self.getSymbolName(binding.target); - const dylib_ordinal = @divTrunc( - @as(i16, @bitCast(bind_sym.n_desc)), - macho.N_SYMBOL_RESOLVER, - ); - log.debug(" | bind at {x}, import('{s}') in dylib({d})", .{ - binding.offset + base_offset, - bind_sym_name, - dylib_ordinal, - }); - if (bind_sym.weakRef()) { - log.debug(" | marking as weak ref ", .{}); - } - bind.entries.appendAssumeCapacity(.{ - .target = binding.target, - .offset = binding.offset + base_offset, - .segment_id = segment_index, - .addend = 0, - }); - } - } - - // Unpack GOT pointers - if (self.got_section_index) |sect_id| { - try self.collectBindDataFromTableSection(sect_id, bind, self.got_table); - } - - // Next, unpack TLV pointers section - if (self.tlv_ptr_section_index) |sect_id| { - try self.collectBindDataFromTableSection(sect_id, bind, self.tlv_ptr_table); - } - - // Finally, unpack the rest. - const target = self.base.comp.root_mod.resolved_target.result; - const cpu_arch = target.cpu.arch; - for (self.objects.items) |*object| { - for (object.atoms.items) |atom_index| { - const atom = self.getAtom(atom_index); - const sym = self.getSymbol(atom.getSymbolWithLoc()); - if (sym.n_desc == N_DEAD) continue; - if (sym.n_desc == N_BOUNDARY) continue; - - const sect_id = sym.n_sect - 1; - const section = self.sections.items(.header)[sect_id]; - const segment_id = self.sections.items(.segment_index)[sect_id]; - const segment = self.segments.items[segment_id]; - if (segment.maxprot & macho.PROT.WRITE == 0) continue; - switch (section.type()) { - macho.S_LITERAL_POINTERS, - macho.S_REGULAR, - macho.S_MOD_INIT_FUNC_POINTERS, - macho.S_MOD_TERM_FUNC_POINTERS, - => {}, - else => continue, - } - - log.debug(" ATOM({d}, %{d}, '{s}')", .{ - atom_index, - atom.sym_index, - self.getSymbolName(atom.getSymbolWithLoc()), - }); - - const code = Atom.getAtomCode(self, atom_index); - const relocs = Atom.getAtomRelocs(self, atom_index); - const ctx = Atom.getRelocContext(self, atom_index); - - for (relocs) |rel| { - switch (cpu_arch) { - .aarch64 => { - const rel_type = @as(macho.reloc_type_arm64, @enumFromInt(rel.r_type)); - if (rel_type != .ARM64_RELOC_UNSIGNED) continue; - if (rel.r_length != 3) continue; - }, - .x86_64 => { - const rel_type = @as(macho.reloc_type_x86_64, @enumFromInt(rel.r_type)); - if (rel_type != .X86_64_RELOC_UNSIGNED) continue; - if (rel.r_length != 3) continue; - }, - else => unreachable, - } - - const global = Atom.parseRelocTarget(self, .{ - .object_id = atom.getFile().?, - .rel = rel, - .code = code, - .base_offset = ctx.base_offset, - .base_addr = ctx.base_addr, - }); - const bind_sym_name = self.getSymbolName(global); - const bind_sym = self.getSymbol(global); - if (!bind_sym.undf()) continue; - - const base_offset = sym.n_value - segment.vmaddr; - const rel_offset = @as(u32, @intCast(rel.r_address - ctx.base_offset)); - const offset = @as(u64, @intCast(base_offset + rel_offset)); - const addend = mem.readInt(i64, code[rel_offset..][0..8], .little); - - const dylib_ordinal = @divTrunc(@as(i16, @bitCast(bind_sym.n_desc)), macho.N_SYMBOL_RESOLVER); - log.debug(" | bind at {x}, import('{s}') in dylib({d})", .{ - base_offset, - bind_sym_name, - dylib_ordinal, - }); - log.debug(" | with addend {x}", .{addend}); - if (bind_sym.weakRef()) { - log.debug(" | marking as weak ref ", .{}); - } - try bind.entries.append(gpa, .{ - .target = global, - .offset = offset, - .segment_id = segment_id, - .addend = addend, - }); - } - } - } - - try bind.finalize(gpa, self); -} - -fn collectLazyBindData(self: *MachO, bind: anytype) !void { - const sect_id = self.la_symbol_ptr_section_index orelse return; - const gpa = self.base.comp.gpa; - try self.collectBindDataFromTableSection(sect_id, bind, self.stub_table); - try bind.finalize(gpa, self); -} - -fn collectExportData(self: *MachO, trie: *Trie) !void { - const gpa = self.base.comp.gpa; - - // TODO handle macho.EXPORT_SYMBOL_FLAGS_REEXPORT and macho.EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER. - log.debug("generating export trie", .{}); - - const exec_segment = self.segments.items[self.header_segment_cmd_index.?]; - const base_address = exec_segment.vmaddr; - - for (self.globals.items) |global| { - const sym = self.getSymbol(global); - - if (sym.undf()) continue; - assert(sym.ext()); - if (sym.n_desc == N_DEAD) continue; - if (sym.n_desc == N_BOUNDARY) continue; - - const sym_name = self.getSymbolName(global); - log.debug(" (putting '{s}' defined at 0x{x})", .{ sym_name, sym.n_value }); - try trie.put(gpa, .{ - .name = sym_name, - .vmaddr_offset = sym.n_value - base_address, - .export_flags = macho.EXPORT_SYMBOL_FLAGS_KIND_REGULAR, - }); - } - - try trie.finalize(gpa); -} - -fn writeDyldInfoData(self: *MachO) !void { - const tracy = trace(@src()); - defer tracy.end(); - - const gpa = self.base.comp.gpa; - - var rebase = Rebase{}; - defer rebase.deinit(gpa); - try self.collectRebaseData(&rebase); - - var bind = Bind{}; - defer bind.deinit(gpa); - try self.collectBindData(&bind, self.bindings); - - var lazy_bind = LazyBind{}; - defer lazy_bind.deinit(gpa); - try self.collectLazyBindData(&lazy_bind); - - var trie: Trie = .{}; - defer trie.deinit(gpa); - try trie.init(gpa); - try self.collectExportData(&trie); - - const link_seg = self.getLinkeditSegmentPtr(); - assert(mem.isAlignedGeneric(u64, link_seg.fileoff, @alignOf(u64))); - const rebase_off = link_seg.fileoff; - const rebase_size = rebase.size(); - const rebase_size_aligned = mem.alignForward(u64, rebase_size, @alignOf(u64)); - log.debug("writing rebase info from 0x{x} to 0x{x}", .{ rebase_off, rebase_off + rebase_size_aligned }); - - const bind_off = rebase_off + rebase_size_aligned; - const bind_size = bind.size(); - const bind_size_aligned = mem.alignForward(u64, bind_size, @alignOf(u64)); - log.debug("writing bind info from 0x{x} to 0x{x}", .{ bind_off, bind_off + bind_size_aligned }); - - const lazy_bind_off = bind_off + bind_size_aligned; - const lazy_bind_size = lazy_bind.size(); - const lazy_bind_size_aligned = mem.alignForward(u64, lazy_bind_size, @alignOf(u64)); - log.debug("writing lazy bind info from 0x{x} to 0x{x}", .{ - lazy_bind_off, - lazy_bind_off + lazy_bind_size_aligned, - }); - - const export_off = lazy_bind_off + lazy_bind_size_aligned; - const export_size = trie.size; - const export_size_aligned = mem.alignForward(u64, export_size, @alignOf(u64)); - log.debug("writing export trie from 0x{x} to 0x{x}", .{ export_off, export_off + export_size_aligned }); - - const needed_size = math.cast(usize, export_off + export_size_aligned - rebase_off) orelse - return error.Overflow; - link_seg.filesize = needed_size; - assert(mem.isAlignedGeneric(u64, link_seg.fileoff + link_seg.filesize, @alignOf(u64))); - - const buffer = try gpa.alloc(u8, needed_size); - defer gpa.free(buffer); - @memset(buffer, 0); - - var stream = std.io.fixedBufferStream(buffer); - const writer = stream.writer(); - - try rebase.write(writer); - try stream.seekTo(bind_off - rebase_off); - - try bind.write(writer); - try stream.seekTo(lazy_bind_off - rebase_off); - - try lazy_bind.write(writer); - try stream.seekTo(export_off - rebase_off); - - _ = try trie.write(writer); - - log.debug("writing dyld info from 0x{x} to 0x{x}", .{ - rebase_off, - rebase_off + needed_size, - }); - - try self.base.file.?.pwriteAll(buffer, rebase_off); - try self.populateLazyBindOffsetsInStubHelper(lazy_bind); - - self.dyld_info_cmd.rebase_off = @as(u32, @intCast(rebase_off)); - self.dyld_info_cmd.rebase_size = @as(u32, @intCast(rebase_size_aligned)); - self.dyld_info_cmd.bind_off = @as(u32, @intCast(bind_off)); - self.dyld_info_cmd.bind_size = @as(u32, @intCast(bind_size_aligned)); - self.dyld_info_cmd.lazy_bind_off = @as(u32, @intCast(lazy_bind_off)); - self.dyld_info_cmd.lazy_bind_size = @as(u32, @intCast(lazy_bind_size_aligned)); - self.dyld_info_cmd.export_off = @as(u32, @intCast(export_off)); - self.dyld_info_cmd.export_size = @as(u32, @intCast(export_size_aligned)); -} - -fn populateLazyBindOffsetsInStubHelper(self: *MachO, lazy_bind: anytype) !void { - if (lazy_bind.size() == 0) return; - - const stub_helper_section_index = self.stub_helper_section_index.?; - // assert(ctx.stub_helper_preamble_allocated); - - const header = self.sections.items(.header)[stub_helper_section_index]; - - const target = self.base.comp.root_mod.resolved_target.result; - const cpu_arch = target.cpu.arch; - const preamble_size = stubs.stubHelperPreambleSize(cpu_arch); - const stub_size = stubs.stubHelperSize(cpu_arch); - const stub_offset = stubs.stubOffsetInStubHelper(cpu_arch); - const base_offset = header.offset + preamble_size; - - for (lazy_bind.offsets.items, 0..) |bind_offset, index| { - const file_offset = base_offset + index * stub_size + stub_offset; - - log.debug("writing lazy bind offset 0x{x} ({s}) in stub helper at 0x{x}", .{ - bind_offset, - self.getSymbolName(lazy_bind.entries.items[index].target), - file_offset, - }); - - try self.base.file.?.pwriteAll(mem.asBytes(&bind_offset), file_offset); - } -} - -const asc_u64 = std.sort.asc(u64); - -fn addSymbolToFunctionStarts(self: *MachO, sym_loc: SymbolWithLoc, addresses: *std.ArrayList(u64)) !void { - const sym = self.getSymbol(sym_loc); - if (sym.n_strx == 0) return; - if (sym.n_desc == N_DEAD) return; - if (sym.n_desc == N_BOUNDARY) return; - if (self.symbolIsTemp(sym_loc)) return; - try addresses.append(sym.n_value); -} - -fn writeFunctionStarts(self: *MachO) !void { - const gpa = self.base.comp.gpa; - const seg = self.segments.items[self.header_segment_cmd_index.?]; - - // We need to sort by address first - var addresses = std.ArrayList(u64).init(gpa); - defer addresses.deinit(); - - for (self.objects.items) |object| { - for (object.exec_atoms.items) |atom_index| { - const atom = self.getAtom(atom_index); - const sym_loc = atom.getSymbolWithLoc(); - try self.addSymbolToFunctionStarts(sym_loc, &addresses); - - var it = Atom.getInnerSymbolsIterator(self, atom_index); - while (it.next()) |inner_sym_loc| { - try self.addSymbolToFunctionStarts(inner_sym_loc, &addresses); - } - } - } - - mem.sort(u64, addresses.items, {}, asc_u64); - - var offsets = std.ArrayList(u32).init(gpa); - defer offsets.deinit(); - try offsets.ensureTotalCapacityPrecise(addresses.items.len); - - var last_off: u32 = 0; - for (addresses.items) |addr| { - const offset = @as(u32, @intCast(addr - seg.vmaddr)); - const diff = offset - last_off; - - if (diff == 0) continue; - - offsets.appendAssumeCapacity(diff); - last_off = offset; - } - - var buffer = std.ArrayList(u8).init(gpa); - defer buffer.deinit(); - - const max_size = @as(usize, @intCast(offsets.items.len * @sizeOf(u64))); - try buffer.ensureTotalCapacity(max_size); - - for (offsets.items) |offset| { - try std.leb.writeULEB128(buffer.writer(), offset); - } - - const link_seg = self.getLinkeditSegmentPtr(); - const offset = link_seg.fileoff + link_seg.filesize; - assert(mem.isAlignedGeneric(u64, offset, @alignOf(u64))); - const needed_size = buffer.items.len; - const needed_size_aligned = mem.alignForward(u64, needed_size, @alignOf(u64)); - const padding = math.cast(usize, needed_size_aligned - needed_size) orelse return error.Overflow; - if (padding > 0) { - try buffer.ensureUnusedCapacity(padding); - buffer.appendNTimesAssumeCapacity(0, padding); - } - link_seg.filesize = offset + needed_size_aligned - link_seg.fileoff; - - log.debug("writing function starts info from 0x{x} to 0x{x}", .{ offset, offset + needed_size_aligned }); - - try self.base.file.?.pwriteAll(buffer.items, offset); - - self.function_starts_cmd.dataoff = @as(u32, @intCast(offset)); - self.function_starts_cmd.datasize = @as(u32, @intCast(needed_size_aligned)); -} - -fn filterDataInCode( - dices: []const macho.data_in_code_entry, - start_addr: u64, - end_addr: u64, -) []const macho.data_in_code_entry { - const Predicate = struct { - addr: u64, - - pub fn predicate(self: @This(), dice: macho.data_in_code_entry) bool { - return dice.offset >= self.addr; - } - }; - - const start = MachO.lsearch(macho.data_in_code_entry, dices, Predicate{ .addr = start_addr }); - const end = MachO.lsearch(macho.data_in_code_entry, dices[start..], Predicate{ .addr = end_addr }) + start; - - return dices[start..end]; -} - -pub fn writeDataInCode(self: *MachO) !void { - const gpa = self.base.comp.gpa; - var out_dice = std.ArrayList(macho.data_in_code_entry).init(gpa); - defer out_dice.deinit(); - - const text_sect_id = self.text_section_index orelse return; - const text_sect_header = self.sections.items(.header)[text_sect_id]; - - for (self.objects.items) |object| { - if (!object.hasDataInCode()) continue; - const dice = object.data_in_code.items; - try out_dice.ensureUnusedCapacity(dice.len); - - for (object.exec_atoms.items) |atom_index| { - const atom = self.getAtom(atom_index); - const sym = self.getSymbol(atom.getSymbolWithLoc()); - if (sym.n_desc == N_DEAD) continue; - if (sym.n_desc == N_BOUNDARY) return; - - const source_addr = if (object.getSourceSymbol(atom.sym_index)) |source_sym| - source_sym.n_value - else blk: { - const nbase = @as(u32, @intCast(object.in_symtab.?.len)); - const source_sect_id = @as(u8, @intCast(atom.sym_index - nbase)); - break :blk object.getSourceSection(source_sect_id).addr; - }; - const filtered_dice = filterDataInCode(dice, source_addr, source_addr + atom.size); - const base = math.cast(u32, sym.n_value - text_sect_header.addr + text_sect_header.offset) orelse - return error.Overflow; - - for (filtered_dice) |single| { - const offset = math.cast(u32, single.offset - source_addr + base) orelse - return error.Overflow; - out_dice.appendAssumeCapacity(.{ - .offset = offset, - .length = single.length, - .kind = single.kind, - }); - } - } - } - - const seg = self.getLinkeditSegmentPtr(); - const offset = seg.fileoff + seg.filesize; - assert(mem.isAlignedGeneric(u64, offset, @alignOf(u64))); - const needed_size = out_dice.items.len * @sizeOf(macho.data_in_code_entry); - const needed_size_aligned = mem.alignForward(u64, needed_size, @alignOf(u64)); - seg.filesize = offset + needed_size_aligned - seg.fileoff; - - const buffer = try gpa.alloc(u8, math.cast(usize, needed_size_aligned) orelse return error.Overflow); - defer gpa.free(buffer); - { - const src = mem.sliceAsBytes(out_dice.items); - @memcpy(buffer[0..src.len], src); - @memset(buffer[src.len..], 0); - } - - log.debug("writing data-in-code from 0x{x} to 0x{x}", .{ offset, offset + needed_size_aligned }); - - try self.base.file.?.pwriteAll(buffer, offset); - - self.data_in_code_cmd.dataoff = @as(u32, @intCast(offset)); - self.data_in_code_cmd.datasize = @as(u32, @intCast(needed_size_aligned)); -} - -fn writeSymtabs(self: *MachO) !void { - var ctx = try self.writeSymtab(); - defer ctx.imports_table.deinit(); - try self.writeDysymtab(ctx); - try self.writeStrtab(); -} - -fn addLocalToSymtab(self: *MachO, sym_loc: SymbolWithLoc, locals: *std.ArrayList(macho.nlist_64)) !void { - const sym = self.getSymbol(sym_loc); - if (sym.n_strx == 0) return; // no name, skip - if (sym.n_desc == N_DEAD) return; // garbage-collected, skip - if (sym.n_desc == N_BOUNDARY) return; // boundary symbol, skip - if (sym.ext()) return; // an export lands in its own symtab section, skip - if (self.symbolIsTemp(sym_loc)) return; // local temp symbol, skip - const gpa = self.base.comp.gpa; - var out_sym = sym; - out_sym.n_strx = try self.strtab.insert(gpa, self.getSymbolName(sym_loc)); - try locals.append(out_sym); -} - -fn writeSymtab(self: *MachO) !SymtabCtx { - const comp = self.base.comp; - const gpa = comp.gpa; - - var locals = std.ArrayList(macho.nlist_64).init(gpa); - defer locals.deinit(); - - for (0..self.locals.items.len) |sym_id| { - try self.addLocalToSymtab(.{ .sym_index = @intCast(sym_id) }, &locals); - } - - for (self.objects.items) |object| { - for (object.atoms.items) |atom_index| { - const atom = self.getAtom(atom_index); - const sym_loc = atom.getSymbolWithLoc(); - try self.addLocalToSymtab(sym_loc, &locals); - - var it = Atom.getInnerSymbolsIterator(self, atom_index); - while (it.next()) |inner_sym_loc| { - try self.addLocalToSymtab(inner_sym_loc, &locals); - } - } - } - - var exports = std.ArrayList(macho.nlist_64).init(gpa); - defer exports.deinit(); - - for (self.globals.items) |global| { - const sym = self.getSymbol(global); - if (sym.undf()) continue; // import, skip - if (sym.n_desc == N_DEAD) continue; - if (sym.n_desc == N_BOUNDARY) continue; - var out_sym = sym; - out_sym.n_strx = try self.strtab.insert(gpa, self.getSymbolName(global)); - try exports.append(out_sym); - } - - var imports = std.ArrayList(macho.nlist_64).init(gpa); - defer imports.deinit(); - - var imports_table = std.AutoHashMap(SymbolWithLoc, u32).init(gpa); - - for (self.globals.items) |global| { - const sym = self.getSymbol(global); - if (sym.n_strx == 0) continue; // no name, skip - if (!sym.undf()) continue; // not an import, skip - if (sym.n_desc == N_DEAD) continue; - if (sym.n_desc == N_BOUNDARY) continue; - const new_index = @as(u32, @intCast(imports.items.len)); - var out_sym = sym; - out_sym.n_strx = try self.strtab.insert(gpa, self.getSymbolName(global)); - try imports.append(out_sym); - try imports_table.putNoClobber(global, new_index); - } - - // We generate stabs last in order to ensure that the strtab always has debug info - // strings trailing - if (comp.config.debug_format != .strip) { - for (self.objects.items) |object| { - assert(self.d_sym == null); // TODO - try self.generateSymbolStabs(object, &locals); - } - } - - const nlocals = @as(u32, @intCast(locals.items.len)); - const nexports = @as(u32, @intCast(exports.items.len)); - const nimports = @as(u32, @intCast(imports.items.len)); - const nsyms = nlocals + nexports + nimports; - - const seg = self.getLinkeditSegmentPtr(); - const offset = seg.fileoff + seg.filesize; - assert(mem.isAlignedGeneric(u64, offset, @alignOf(u64))); - const needed_size = nsyms * @sizeOf(macho.nlist_64); - seg.filesize = offset + needed_size - seg.fileoff; - assert(mem.isAlignedGeneric(u64, seg.fileoff + seg.filesize, @alignOf(u64))); - - var buffer = std.ArrayList(u8).init(gpa); - defer buffer.deinit(); - try buffer.ensureTotalCapacityPrecise(needed_size); - buffer.appendSliceAssumeCapacity(mem.sliceAsBytes(locals.items)); - buffer.appendSliceAssumeCapacity(mem.sliceAsBytes(exports.items)); - buffer.appendSliceAssumeCapacity(mem.sliceAsBytes(imports.items)); - - log.debug("writing symtab from 0x{x} to 0x{x}", .{ offset, offset + needed_size }); - try self.base.file.?.pwriteAll(buffer.items, offset); - - self.symtab_cmd.symoff = @as(u32, @intCast(offset)); - self.symtab_cmd.nsyms = nsyms; - - return SymtabCtx{ - .nlocalsym = nlocals, - .nextdefsym = nexports, - .nundefsym = nimports, - .imports_table = imports_table, - }; -} - -// TODO this function currently skips generating symbol stabs in case errors are encountered in DWARF data. -// I think we should actually report those errors to the user and let them decide if they want to strip debug info -// in that case or not. -fn generateSymbolStabs( - self: *MachO, - object: Object, - locals: *std.ArrayList(macho.nlist_64), -) !void { - log.debug("generating stabs for '{s}'", .{object.name}); - - const gpa = self.base.comp.gpa; - var debug_info = object.parseDwarfInfo(); - - var lookup = DwarfInfo.AbbrevLookupTable.init(gpa); - defer lookup.deinit(); - try lookup.ensureUnusedCapacity(std.math.maxInt(u8)); - - // We assume there is only one CU. - var cu_it = debug_info.getCompileUnitIterator(); - const compile_unit = while (try cu_it.next()) |cu| { - const offset = math.cast(usize, cu.cuh.debug_abbrev_offset) orelse return error.Overflow; - try debug_info.genAbbrevLookupByKind(offset, &lookup); - break cu; - } else { - log.debug("no compile unit found in debug info in {s}; skipping", .{object.name}); - return; - }; - - var abbrev_it = compile_unit.getAbbrevEntryIterator(debug_info); - const maybe_cu_entry: ?DwarfInfo.AbbrevEntry = blk: { - while (abbrev_it.next(lookup) catch break :blk null) |entry| switch (entry.tag) { - dwarf.TAG.compile_unit => break :blk entry, - else => continue, - } else break :blk null; - }; - - const cu_entry = maybe_cu_entry orelse { - log.debug("missing DWARF_TAG_compile_unit tag in {s}; skipping", .{object.name}); - return; - }; - - var maybe_tu_name: ?[]const u8 = null; - var maybe_tu_comp_dir: ?[]const u8 = null; - var attr_it = cu_entry.getAttributeIterator(debug_info, compile_unit.cuh); - - blk: { - while (attr_it.next() catch break :blk) |attr| switch (attr.name) { - dwarf.AT.comp_dir => maybe_tu_comp_dir = attr.getString(debug_info, compile_unit.cuh) orelse continue, - dwarf.AT.name => maybe_tu_name = attr.getString(debug_info, compile_unit.cuh) orelse continue, - else => continue, - }; - } - - if (maybe_tu_name == null or maybe_tu_comp_dir == null) { - log.debug("missing DWARF_AT_comp_dir and DWARF_AT_name attributes {s}; skipping", .{object.name}); - return; - } - - const tu_name = maybe_tu_name.?; - const tu_comp_dir = maybe_tu_comp_dir.?; - - // Open scope - try locals.ensureUnusedCapacity(3); - locals.appendAssumeCapacity(.{ - .n_strx = try self.strtab.insert(gpa, tu_comp_dir), - .n_type = macho.N_SO, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }); - locals.appendAssumeCapacity(.{ - .n_strx = try self.strtab.insert(gpa, tu_name), - .n_type = macho.N_SO, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }); - locals.appendAssumeCapacity(.{ - .n_strx = try self.strtab.insert(gpa, object.name), - .n_type = macho.N_OSO, - .n_sect = 0, - .n_desc = 1, - .n_value = object.mtime, - }); - - var stabs_buf: [4]macho.nlist_64 = undefined; - - var name_lookup: ?DwarfInfo.SubprogramLookupByName = if (object.header.flags & macho.MH_SUBSECTIONS_VIA_SYMBOLS == 0) blk: { - var name_lookup = DwarfInfo.SubprogramLookupByName.init(gpa); - errdefer name_lookup.deinit(); - try name_lookup.ensureUnusedCapacity(@as(u32, @intCast(object.atoms.items.len))); - debug_info.genSubprogramLookupByName(compile_unit, lookup, &name_lookup) catch |err| switch (err) { - error.UnhandledDwFormValue => {}, // TODO I don't like the fact we constantly re-iterate and hit this; we should validate once a priori - else => |e| return e, - }; - break :blk name_lookup; - } else null; - defer if (name_lookup) |*nl| nl.deinit(); - - for (object.atoms.items) |atom_index| { - const atom = self.getAtom(atom_index); - const stabs = try self.generateSymbolStabsForSymbol( - atom_index, - atom.getSymbolWithLoc(), - name_lookup, - &stabs_buf, - ); - try locals.appendSlice(stabs); - - var it = Atom.getInnerSymbolsIterator(self, atom_index); - while (it.next()) |sym_loc| { - const contained_stabs = try self.generateSymbolStabsForSymbol( - atom_index, - sym_loc, - name_lookup, - &stabs_buf, - ); - try locals.appendSlice(contained_stabs); - } - } - - // Close scope - try locals.append(.{ - .n_strx = 0, - .n_type = macho.N_SO, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }); -} - -fn generateSymbolStabsForSymbol( - self: *MachO, - atom_index: Atom.Index, - sym_loc: SymbolWithLoc, - lookup: ?DwarfInfo.SubprogramLookupByName, - buf: *[4]macho.nlist_64, -) ![]const macho.nlist_64 { - const gpa = self.base.comp.gpa; - const object = self.objects.items[sym_loc.getFile().?]; - const sym = self.getSymbol(sym_loc); - const sym_name = self.getSymbolName(sym_loc); - const header = self.sections.items(.header)[sym.n_sect - 1]; - - if (sym.n_strx == 0) return buf[0..0]; - if (self.symbolIsTemp(sym_loc)) return buf[0..0]; - - if (!header.isCode()) { - // Since we are not dealing with machine code, it's either a global or a static depending - // on the linkage scope. - if (sym.sect() and sym.ext()) { - // Global gets an N_GSYM stab type. - buf[0] = .{ - .n_strx = try self.strtab.insert(gpa, sym_name), - .n_type = macho.N_GSYM, - .n_sect = sym.n_sect, - .n_desc = 0, - .n_value = 0, - }; - } else { - // Local static gets an N_STSYM stab type. - buf[0] = .{ - .n_strx = try self.strtab.insert(gpa, sym_name), - .n_type = macho.N_STSYM, - .n_sect = sym.n_sect, - .n_desc = 0, - .n_value = sym.n_value, - }; - } - return buf[0..1]; - } - - const size: u64 = size: { - if (object.header.flags & macho.MH_SUBSECTIONS_VIA_SYMBOLS != 0) { - break :size self.getAtom(atom_index).size; - } - - // Since we don't have subsections to work with, we need to infer the size of each function - // the slow way by scanning the debug info for matching symbol names and extracting - // the symbol's DWARF_AT_low_pc and DWARF_AT_high_pc values. - const source_sym = object.getSourceSymbol(sym_loc.sym_index) orelse return buf[0..0]; - const subprogram = lookup.?.get(sym_name[1..]) orelse return buf[0..0]; - - if (subprogram.addr <= source_sym.n_value and source_sym.n_value < subprogram.addr + subprogram.size) { - break :size subprogram.size; - } else { - log.debug("no stab found for {s}", .{sym_name}); - return buf[0..0]; - } - }; - - buf[0] = .{ - .n_strx = 0, - .n_type = macho.N_BNSYM, - .n_sect = sym.n_sect, - .n_desc = 0, - .n_value = sym.n_value, - }; - buf[1] = .{ - .n_strx = try self.strtab.insert(gpa, sym_name), - .n_type = macho.N_FUN, - .n_sect = sym.n_sect, - .n_desc = 0, - .n_value = sym.n_value, - }; - buf[2] = .{ - .n_strx = 0, - .n_type = macho.N_FUN, - .n_sect = 0, - .n_desc = 0, - .n_value = size, - }; - buf[3] = .{ - .n_strx = 0, - .n_type = macho.N_ENSYM, - .n_sect = sym.n_sect, - .n_desc = 0, - .n_value = size, - }; - - return buf; -} - -pub fn writeStrtab(self: *MachO) !void { - const gpa = self.base.comp.gpa; - const seg = self.getLinkeditSegmentPtr(); - const offset = seg.fileoff + seg.filesize; - assert(mem.isAlignedGeneric(u64, offset, @alignOf(u64))); - const needed_size = self.strtab.buffer.items.len; - const needed_size_aligned = mem.alignForward(u64, needed_size, @alignOf(u64)); - seg.filesize = offset + needed_size_aligned - seg.fileoff; - - log.debug("writing string table from 0x{x} to 0x{x}", .{ offset, offset + needed_size_aligned }); - - const buffer = try gpa.alloc(u8, math.cast(usize, needed_size_aligned) orelse return error.Overflow); - defer gpa.free(buffer); - @memcpy(buffer[0..self.strtab.buffer.items.len], self.strtab.buffer.items); - @memset(buffer[self.strtab.buffer.items.len..], 0); - - try self.base.file.?.pwriteAll(buffer, offset); - - self.symtab_cmd.stroff = @as(u32, @intCast(offset)); - self.symtab_cmd.strsize = @as(u32, @intCast(needed_size_aligned)); -} - -const SymtabCtx = struct { - nlocalsym: u32, - nextdefsym: u32, - nundefsym: u32, - imports_table: std.AutoHashMap(SymbolWithLoc, u32), -}; - -pub fn writeDysymtab(self: *MachO, ctx: SymtabCtx) !void { - const gpa = self.base.comp.gpa; - const nstubs = @as(u32, @intCast(self.stub_table.lookup.count())); - const ngot_entries = @as(u32, @intCast(self.got_table.lookup.count())); - const nindirectsyms = nstubs * 2 + ngot_entries; - const iextdefsym = ctx.nlocalsym; - const iundefsym = iextdefsym + ctx.nextdefsym; - - const seg = self.getLinkeditSegmentPtr(); - const offset = seg.fileoff + seg.filesize; - assert(mem.isAlignedGeneric(u64, offset, @alignOf(u64))); - const needed_size = nindirectsyms * @sizeOf(u32); - const needed_size_aligned = mem.alignForward(u64, needed_size, @alignOf(u64)); - seg.filesize = offset + needed_size_aligned - seg.fileoff; - - log.debug("writing indirect symbol table from 0x{x} to 0x{x}", .{ offset, offset + needed_size_aligned }); - - var buf = std.ArrayList(u8).init(gpa); - defer buf.deinit(); - try buf.ensureTotalCapacity(math.cast(usize, needed_size_aligned) orelse return error.Overflow); - const writer = buf.writer(); - - if (self.stubs_section_index) |sect_id| { - const stubs_header = &self.sections.items(.header)[sect_id]; - stubs_header.reserved1 = 0; - for (self.stub_table.entries.items) |entry| { - if (!self.stub_table.lookup.contains(entry)) continue; - const target_sym = self.getSymbol(entry); - assert(target_sym.undf()); - try writer.writeInt(u32, iundefsym + ctx.imports_table.get(entry).?, .little); - } - } - - if (self.got_section_index) |sect_id| { - const got = &self.sections.items(.header)[sect_id]; - got.reserved1 = nstubs; - for (self.got_table.entries.items) |entry| { - if (!self.got_table.lookup.contains(entry)) continue; - const target_sym = self.getSymbol(entry); - if (target_sym.undf()) { - try writer.writeInt(u32, iundefsym + ctx.imports_table.get(entry).?, .little); - } else { - try writer.writeInt(u32, macho.INDIRECT_SYMBOL_LOCAL, .little); - } - } - } - - if (self.la_symbol_ptr_section_index) |sect_id| { - const la_symbol_ptr = &self.sections.items(.header)[sect_id]; - la_symbol_ptr.reserved1 = nstubs + ngot_entries; - for (self.stub_table.entries.items) |entry| { - if (!self.stub_table.lookup.contains(entry)) continue; - const target_sym = self.getSymbol(entry); - assert(target_sym.undf()); - try writer.writeInt(u32, iundefsym + ctx.imports_table.get(entry).?, .little); - } - } - - const padding = math.cast(usize, needed_size_aligned - needed_size) orelse return error.Overflow; - if (padding > 0) { - buf.appendNTimesAssumeCapacity(0, padding); - } - - assert(buf.items.len == needed_size_aligned); - try self.base.file.?.pwriteAll(buf.items, offset); - - self.dysymtab_cmd.nlocalsym = ctx.nlocalsym; - self.dysymtab_cmd.iextdefsym = iextdefsym; - self.dysymtab_cmd.nextdefsym = ctx.nextdefsym; - self.dysymtab_cmd.iundefsym = iundefsym; - self.dysymtab_cmd.nundefsym = ctx.nundefsym; - self.dysymtab_cmd.indirectsymoff = @as(u32, @intCast(offset)); - self.dysymtab_cmd.nindirectsyms = nindirectsyms; -} - -pub fn writeUuid(self: *MachO, comp: *const Compilation, uuid_cmd_offset: u32, has_codesig: bool) !void { - const file_size = if (!has_codesig) blk: { - const seg = self.getLinkeditSegmentPtr(); - break :blk seg.fileoff + seg.filesize; - } else self.codesig_cmd.dataoff; - try calcUuid(comp, self.base.file.?, file_size, &self.uuid_cmd.uuid); - const offset = uuid_cmd_offset + @sizeOf(macho.load_command); - try self.base.file.?.pwriteAll(&self.uuid_cmd.uuid, offset); -} - -pub fn writeCodeSignaturePadding(self: *MachO, code_sig: *CodeSignature) !void { - const target = self.base.comp.root_mod.resolved_target.result; - const seg = self.getLinkeditSegmentPtr(); - // Code signature data has to be 16-bytes aligned for Apple tools to recognize the file - // https://github.com/opensource-apple/cctools/blob/fdb4825f303fd5c0751be524babd32958181b3ed/libstuff/checkout.c#L271 - const offset = mem.alignForward(u64, seg.fileoff + seg.filesize, 16); - const needed_size = code_sig.estimateSize(offset); - seg.filesize = offset + needed_size - seg.fileoff; - seg.vmsize = mem.alignForward(u64, seg.filesize, getPageSize(target.cpu.arch)); - log.debug("writing code signature padding from 0x{x} to 0x{x}", .{ offset, offset + needed_size }); - // Pad out the space. We need to do this to calculate valid hashes for everything in the file - // except for code signature data. - try self.base.file.?.pwriteAll(&[_]u8{0}, offset + needed_size - 1); - - self.codesig_cmd.dataoff = @as(u32, @intCast(offset)); - self.codesig_cmd.datasize = @as(u32, @intCast(needed_size)); -} - -pub fn writeCodeSignature(self: *MachO, comp: *const Compilation, code_sig: *CodeSignature) !void { - const output_mode = self.base.comp.config.output_mode; - const seg_id = self.header_segment_cmd_index.?; - const seg = self.segments.items[seg_id]; - const offset = self.codesig_cmd.dataoff; - - const gpa = self.base.comp.gpa; - var buffer = std.ArrayList(u8).init(gpa); - defer buffer.deinit(); - try buffer.ensureTotalCapacityPrecise(code_sig.size()); - try code_sig.writeAdhocSignature(comp, .{ - .file = self.base.file.?, - .exec_seg_base = seg.fileoff, - .exec_seg_limit = seg.filesize, - .file_size = offset, - .output_mode = output_mode, - }, buffer.writer()); - assert(buffer.items.len == code_sig.size()); - - log.debug("writing code signature from 0x{x} to 0x{x}", .{ - offset, - offset + buffer.items.len, - }); - - try self.base.file.?.pwriteAll(buffer.items, offset); -} - -/// Writes Mach-O file header. -pub fn writeHeader(self: *MachO, ncmds: u32, sizeofcmds: u32) !void { - const output_mode = self.base.comp.config.output_mode; - - var header: macho.mach_header_64 = .{}; - header.flags = macho.MH_NOUNDEFS | macho.MH_DYLDLINK | macho.MH_PIE | macho.MH_TWOLEVEL; - - const target = self.base.comp.root_mod.resolved_target.result; - switch (target.cpu.arch) { - .aarch64 => { - header.cputype = macho.CPU_TYPE_ARM64; - header.cpusubtype = macho.CPU_SUBTYPE_ARM_ALL; - }, - .x86_64 => { - header.cputype = macho.CPU_TYPE_X86_64; - header.cpusubtype = macho.CPU_SUBTYPE_X86_64_ALL; - }, - else => unreachable, - } - - switch (output_mode) { - .Exe => { - header.filetype = macho.MH_EXECUTE; - }, - .Lib => { - // By this point, it can only be a dylib. - header.filetype = macho.MH_DYLIB; - header.flags |= macho.MH_NO_REEXPORTED_DYLIBS; - }, - else => unreachable, - } - - if (self.thread_vars_section_index) |sect_id| { - header.flags |= macho.MH_HAS_TLV_DESCRIPTORS; - if (self.sections.items(.header)[sect_id].size > 0) { - header.flags |= macho.MH_HAS_TLV_DESCRIPTORS; - } - } - - header.ncmds = ncmds; - header.sizeofcmds = sizeofcmds; - - log.debug("writing Mach-O header {}", .{header}); - - try self.base.file.?.pwriteAll(mem.asBytes(&header), 0); + @panic("TODO getGlobalSymbol"); } pub fn padToIdeal(actual_size: anytype) @TypeOf(actual_size) { @@ -4799,370 +707,12 @@ fn findFreeSpace(self: *MachO, object_size: u64, min_alignment: u32) u64 { return start; } -pub fn allocatedVirtualSize(self: *MachO, start: u64) u64 { - if (start == 0) - return 0; - var min_pos: u64 = std.math.maxInt(u64); - for (self.sections.items(.segment_index)) |seg_id| { - const segment = self.segments.items[seg_id]; - if (segment.vmaddr <= start) continue; - if (segment.vmaddr < min_pos) min_pos = segment.vmaddr; - } - return min_pos - start; -} - -pub fn ptraceAttach(self: *MachO, pid: std.os.pid_t) !void { - if (!is_hot_update_compatible) return; - - const mach_task = try std.os.darwin.machTaskForPid(pid); - log.debug("Mach task for pid {d}: {any}", .{ pid, mach_task }); - self.hot_state.mach_task = mach_task; - - // TODO start exception handler in another thread - - // TODO enable ones we register for exceptions - // try std.os.ptrace(std.os.darwin.PT.ATTACHEXC, pid, 0, 0); -} - -pub fn ptraceDetach(self: *MachO, pid: std.os.pid_t) !void { - if (!is_hot_update_compatible) return; - - _ = pid; - - // TODO stop exception handler - - // TODO see comment in ptraceAttach - // try std.os.ptrace(std.os.darwin.PT.DETACH, pid, 0, 0); - - self.hot_state.mach_task = null; -} - -pub fn addUndefined(self: *MachO, name: []const u8, flags: RelocFlags) !u32 { - const gpa = self.base.comp.gpa; - - const gop = try self.getOrPutGlobalPtr(name); - const global_index = self.getGlobalIndex(name).?; - - if (gop.found_existing) { - try self.updateRelocActions(global_index, flags); - return global_index; - } - - const sym_index = try self.allocateSymbol(); - const sym_loc = SymbolWithLoc{ .sym_index = sym_index }; - gop.value_ptr.* = sym_loc; - - const sym = self.getSymbolPtr(sym_loc); - sym.n_strx = try self.strtab.insert(gpa, name); - sym.n_type = macho.N_EXT | macho.N_UNDF; - - try self.unresolved.putNoClobber(gpa, global_index, {}); - try self.updateRelocActions(global_index, flags); - - return global_index; -} - -fn updateRelocActions(self: *MachO, global_index: u32, flags: RelocFlags) !void { - const gpa = self.base.comp.gpa; - const act_gop = try self.actions.getOrPut(gpa, global_index); - if (!act_gop.found_existing) { - act_gop.value_ptr.* = .{}; - } - act_gop.value_ptr.add_got = act_gop.value_ptr.add_got or flags.add_got; - act_gop.value_ptr.add_stub = act_gop.value_ptr.add_stub or flags.add_stub; -} - pub fn makeStaticString(bytes: []const u8) [16]u8 { var buf = [_]u8{0} ** 16; @memcpy(buf[0..bytes.len], bytes); return buf; } -pub fn getSegmentByName(self: MachO, segname: []const u8) ?u8 { - for (self.segments.items, 0..) |seg, i| { - if (mem.eql(u8, segname, seg.segName())) return @as(u8, @intCast(i)); - } else return null; -} - -pub fn getSegment(self: MachO, sect_id: u8) macho.segment_command_64 { - const index = self.sections.items(.segment_index)[sect_id]; - return self.segments.items[index]; -} - -pub fn getSegmentPtr(self: *MachO, sect_id: u8) *macho.segment_command_64 { - const index = self.sections.items(.segment_index)[sect_id]; - return &self.segments.items[index]; -} - -pub fn getLinkeditSegmentPtr(self: *MachO) *macho.segment_command_64 { - const index = self.linkedit_segment_cmd_index.?; - return &self.segments.items[index]; -} - -pub fn getSectionByName(self: MachO, segname: []const u8, sectname: []const u8) ?u8 { - // TODO investigate caching with a hashmap - for (self.sections.items(.header), 0..) |header, i| { - if (mem.eql(u8, header.segName(), segname) and mem.eql(u8, header.sectName(), sectname)) - return @as(u8, @intCast(i)); - } else return null; -} - -pub fn getSectionIndexes(self: MachO, segment_index: u8) struct { start: u8, end: u8 } { - var start: u8 = 0; - const nsects = for (self.segments.items, 0..) |seg, i| { - if (i == segment_index) break @as(u8, @intCast(seg.nsects)); - start += @as(u8, @intCast(seg.nsects)); - } else 0; - return .{ .start = start, .end = start + nsects }; -} - -pub fn symbolIsTemp(self: *MachO, sym_with_loc: SymbolWithLoc) bool { - const sym = self.getSymbol(sym_with_loc); - if (!sym.sect()) return false; - if (sym.ext()) return false; - const sym_name = self.getSymbolName(sym_with_loc); - return mem.startsWith(u8, sym_name, "l") or mem.startsWith(u8, sym_name, "L"); -} - -/// Returns pointer-to-symbol described by `sym_with_loc` descriptor. -pub fn getSymbolPtr(self: *MachO, sym_with_loc: SymbolWithLoc) *macho.nlist_64 { - if (sym_with_loc.getFile()) |file| { - const object = &self.objects.items[file]; - return &object.symtab[sym_with_loc.sym_index]; - } else { - return &self.locals.items[sym_with_loc.sym_index]; - } -} - -/// Returns symbol described by `sym_with_loc` descriptor. -pub fn getSymbol(self: *const MachO, sym_with_loc: SymbolWithLoc) macho.nlist_64 { - if (sym_with_loc.getFile()) |file| { - const object = &self.objects.items[file]; - return object.symtab[sym_with_loc.sym_index]; - } else { - return self.locals.items[sym_with_loc.sym_index]; - } -} - -/// Returns name of the symbol described by `sym_with_loc` descriptor. -pub fn getSymbolName(self: *const MachO, sym_with_loc: SymbolWithLoc) []const u8 { - if (sym_with_loc.getFile()) |file| { - const object = self.objects.items[file]; - return object.getSymbolName(sym_with_loc.sym_index); - } else { - const sym = self.locals.items[sym_with_loc.sym_index]; - return self.strtab.get(sym.n_strx).?; - } -} - -const BoundarySymbolKind = enum { - start, - stop, -}; - -const SectionBoundarySymbol = struct { - kind: BoundarySymbolKind, - segname: []const u8, - sectname: []const u8, -}; - -pub fn getSectionBoundarySymbol(self: *const MachO, sym_with_loc: SymbolWithLoc) ?SectionBoundarySymbol { - const sym_name = self.getSymbolName(sym_with_loc); - if (mem.startsWith(u8, sym_name, "section$")) { - const trailing = sym_name["section$".len..]; - const kind: BoundarySymbolKind = kind: { - if (mem.startsWith(u8, trailing, "start$")) break :kind .start; - if (mem.startsWith(u8, trailing, "stop$")) break :kind .stop; - return null; - }; - const names = trailing[@tagName(kind).len + 1 ..]; - const sep_idx = mem.indexOf(u8, names, "$") orelse return null; - const segname = names[0..sep_idx]; - const sectname = names[sep_idx + 1 ..]; - return .{ .kind = kind, .segname = segname, .sectname = sectname }; - } - return null; -} - -const SegmentBoundarySymbol = struct { - kind: BoundarySymbolKind, - segname: []const u8, -}; - -pub fn getSegmentBoundarySymbol(self: *const MachO, sym_with_loc: SymbolWithLoc) ?SegmentBoundarySymbol { - const sym_name = self.getSymbolName(sym_with_loc); - if (mem.startsWith(u8, sym_name, "segment$")) { - const trailing = sym_name["segment$".len..]; - const kind: BoundarySymbolKind = kind: { - if (mem.startsWith(u8, trailing, "start$")) break :kind .start; - if (mem.startsWith(u8, trailing, "stop$")) break :kind .stop; - return null; - }; - const segname = trailing[@tagName(kind).len + 1 ..]; - return .{ .kind = kind, .segname = segname }; - } - return null; -} - -/// Returns pointer to the global entry for `name` if one exists. -pub fn getGlobalPtr(self: *MachO, name: []const u8) ?*SymbolWithLoc { - const global_index = self.resolver.get(name) orelse return null; - return &self.globals.items[global_index]; -} - -/// Returns the global entry for `name` if one exists. -pub fn getGlobal(self: *const MachO, name: []const u8) ?SymbolWithLoc { - const global_index = self.resolver.get(name) orelse return null; - return self.globals.items[global_index]; -} - -/// Returns the index of the global entry for `name` if one exists. -pub fn getGlobalIndex(self: *const MachO, name: []const u8) ?u32 { - return self.resolver.get(name); -} - -/// Returns global entry at `index`. -pub fn getGlobalByIndex(self: *const MachO, index: u32) SymbolWithLoc { - assert(index < self.globals.items.len); - return self.globals.items[index]; -} - -const GetOrPutGlobalPtrResult = struct { - found_existing: bool, - value_ptr: *SymbolWithLoc, -}; - -/// Used only for disambiguating local from global at relocation level. -/// TODO this must go away. -pub const global_symbol_bit: u32 = 0x80000000; -pub const global_symbol_mask: u32 = 0x7fffffff; - -/// Return pointer to the global entry for `name` if one exists. -/// Puts a new global entry for `name` if one doesn't exist, and -/// returns a pointer to it. -pub fn getOrPutGlobalPtr(self: *MachO, name: []const u8) !GetOrPutGlobalPtrResult { - if (self.getGlobalPtr(name)) |ptr| { - return GetOrPutGlobalPtrResult{ .found_existing = true, .value_ptr = ptr }; - } - const gpa = self.base.comp.gpa; - const global_index = try self.allocateGlobal(); - const global_name = try gpa.dupe(u8, name); - _ = try self.resolver.put(gpa, global_name, global_index); - const ptr = &self.globals.items[global_index]; - return GetOrPutGlobalPtrResult{ .found_existing = false, .value_ptr = ptr }; -} - -pub fn getAtom(self: *MachO, atom_index: Atom.Index) Atom { - assert(atom_index < self.atoms.items.len); - return self.atoms.items[atom_index]; -} - -pub fn getAtomPtr(self: *MachO, atom_index: Atom.Index) *Atom { - assert(atom_index < self.atoms.items.len); - return &self.atoms.items[atom_index]; -} - -/// Returns atom if there is an atom referenced by the symbol described by `sym_with_loc` descriptor. -/// Returns null on failure. -pub fn getAtomIndexForSymbol(self: *MachO, sym_with_loc: SymbolWithLoc) ?Atom.Index { - assert(sym_with_loc.getFile() == null); - return self.atom_by_index_table.get(sym_with_loc.sym_index); -} - -pub fn getGotEntryAddress(self: *MachO, sym_with_loc: SymbolWithLoc) ?u64 { - const index = self.got_table.lookup.get(sym_with_loc) orelse return null; - const header = self.sections.items(.header)[self.got_section_index.?]; - return header.addr + @sizeOf(u64) * index; -} - -pub fn getTlvPtrEntryAddress(self: *MachO, sym_with_loc: SymbolWithLoc) ?u64 { - const index = self.tlv_ptr_table.lookup.get(sym_with_loc) orelse return null; - const header = self.sections.items(.header)[self.tlv_ptr_section_index.?]; - return header.addr + @sizeOf(u64) * index; -} - -pub fn getStubsEntryAddress(self: *MachO, sym_with_loc: SymbolWithLoc) ?u64 { - const target = self.base.comp.root_mod.resolved_target.result; - const index = self.stub_table.lookup.get(sym_with_loc) orelse return null; - const header = self.sections.items(.header)[self.stubs_section_index.?]; - return header.addr + stubs.stubSize(target.cpu.arch) * index; -} - -/// Returns symbol location corresponding to the set entrypoint if any. -/// Asserts output mode is executable. -pub fn getEntryPoint(self: MachO) ?SymbolWithLoc { - const entry_name = self.entry_name orelse return null; - const global = self.getGlobal(entry_name) orelse return null; - return global; -} - -pub fn getDebugSymbols(self: *MachO) ?*DebugSymbols { - if (self.d_sym == null) return null; - return &self.d_sym.?; -} - -pub inline fn getPageSize(cpu_arch: std.Target.Cpu.Arch) u16 { - return switch (cpu_arch) { - .aarch64 => 0x4000, - .x86_64 => 0x1000, - else => unreachable, - }; -} - -pub fn requiresCodeSignature(m: *MachO) bool { - if (m.entitlements) |_| return true; - const comp = m.base.comp; - const target = comp.root_mod.resolved_target.result; - const cpu_arch = target.cpu.arch; - const os_tag = target.os.tag; - const abi = target.abi; - if (cpu_arch == .aarch64 and (os_tag == .macos or abi == .simulator)) return true; - return false; -} - -pub fn getSegmentPrecedence(segname: []const u8) u4 { - if (mem.eql(u8, segname, "__PAGEZERO")) return 0x0; - if (mem.eql(u8, segname, "__TEXT")) return 0x1; - if (mem.eql(u8, segname, "__DATA_CONST")) return 0x2; - if (mem.eql(u8, segname, "__DATA")) return 0x3; - if (mem.eql(u8, segname, "__LINKEDIT")) return 0x5; - return 0x4; -} - -pub fn getSegmentMemoryProtection(segname: []const u8) macho.vm_prot_t { - if (mem.eql(u8, segname, "__PAGEZERO")) return macho.PROT.NONE; - if (mem.eql(u8, segname, "__TEXT")) return macho.PROT.READ | macho.PROT.EXEC; - if (mem.eql(u8, segname, "__LINKEDIT")) return macho.PROT.READ; - return macho.PROT.READ | macho.PROT.WRITE; -} - -pub fn getSectionPrecedence(header: macho.section_64) u8 { - const segment_precedence: u4 = getSegmentPrecedence(header.segName()); - const section_precedence: u4 = blk: { - if (header.isCode()) { - if (mem.eql(u8, "__text", header.sectName())) break :blk 0x0; - if (header.type() == macho.S_SYMBOL_STUBS) break :blk 0x1; - break :blk 0x2; - } - switch (header.type()) { - macho.S_NON_LAZY_SYMBOL_POINTERS, - macho.S_LAZY_SYMBOL_POINTERS, - => break :blk 0x0, - macho.S_MOD_INIT_FUNC_POINTERS => break :blk 0x1, - macho.S_MOD_TERM_FUNC_POINTERS => break :blk 0x2, - macho.S_ZEROFILL => break :blk 0xf, - macho.S_THREAD_LOCAL_REGULAR => break :blk 0xd, - macho.S_THREAD_LOCAL_ZEROFILL => break :blk 0xe, - else => { - if (mem.eql(u8, "__unwind_info", header.sectName())) break :blk 0xe; - if (mem.eql(u8, "__eh_frame", header.sectName())) break :blk 0xf; - break :blk 0x3; - }, - } - }; - return (@as(u8, @intCast(segment_precedence)) << 4) + section_precedence; -} - pub const ParseErrorCtx = struct { arena_allocator: std.heap.ArenaAllocator, detected_dylib_id: struct { @@ -5354,308 +904,105 @@ pub fn reportUndefined(self: *MachO) error{OutOfMemory}!void { } } -fn reportSymbolCollision( - self: *MachO, - first: SymbolWithLoc, - other: SymbolWithLoc, -) error{OutOfMemory}!void { - const comp = self.base.comp; - const gpa = comp.gpa; - try comp.link_errors.ensureUnusedCapacity(gpa, 1); +// fn reportSymbolCollision( +// self: *MachO, +// first: SymbolWithLoc, +// other: SymbolWithLoc, +// ) error{OutOfMemory}!void { +// const comp = self.base.comp; +// const gpa = comp.gpa; +// try comp.link_errors.ensureUnusedCapacity(gpa, 1); - var notes = try std.ArrayList(File.ErrorMsg).initCapacity(gpa, 2); - defer notes.deinit(); +// var notes = try std.ArrayList(File.ErrorMsg).initCapacity(gpa, 2); +// defer notes.deinit(); - if (first.getFile()) |file| { - const note = try std.fmt.allocPrint(gpa, "first definition in {s}", .{ - self.objects.items[file].name, - }); - notes.appendAssumeCapacity(.{ .msg = note }); - } - if (other.getFile()) |file| { - const note = try std.fmt.allocPrint(gpa, "next definition in {s}", .{ - self.objects.items[file].name, - }); - notes.appendAssumeCapacity(.{ .msg = note }); - } +// if (first.getFile()) |file| { +// const note = try std.fmt.allocPrint(gpa, "first definition in {s}", .{ +// self.objects.items[file].name, +// }); +// notes.appendAssumeCapacity(.{ .msg = note }); +// } +// if (other.getFile()) |file| { +// const note = try std.fmt.allocPrint(gpa, "next definition in {s}", .{ +// self.objects.items[file].name, +// }); +// notes.appendAssumeCapacity(.{ .msg = note }); +// } - var err_msg = File.ErrorMsg{ .msg = try std.fmt.allocPrint(gpa, "symbol {s} defined multiple times", .{ - self.getSymbolName(first), - }) }; - err_msg.notes = try notes.toOwnedSlice(); +// var err_msg = File.ErrorMsg{ .msg = try std.fmt.allocPrint(gpa, "symbol {s} defined multiple times", .{ +// self.getSymbolName(first), +// }) }; +// err_msg.notes = try notes.toOwnedSlice(); - comp.link_errors.appendAssumeCapacity(err_msg); +// comp.link_errors.appendAssumeCapacity(err_msg); +// } + +// fn reportUnhandledSymbolType(self: *MachO, sym_with_loc: SymbolWithLoc) error{OutOfMemory}!void { +// const comp = self.base.comp; +// const gpa = comp.gpa; +// try comp.link_errors.ensureUnusedCapacity(gpa, 1); + +// const notes = try gpa.alloc(File.ErrorMsg, 1); +// errdefer gpa.free(notes); + +// const file = sym_with_loc.getFile().?; +// notes[0] = .{ .msg = try std.fmt.allocPrint(gpa, "defined in {s}", .{self.objects.items[file].name}) }; + +// const sym = self.getSymbol(sym_with_loc); +// const sym_type = if (sym.stab()) +// "stab" +// else if (sym.indr()) +// "indirect" +// else if (sym.abs()) +// "absolute" +// else +// unreachable; + +// comp.link_errors.appendAssumeCapacity(.{ +// .msg = try std.fmt.allocPrint(gpa, "unhandled symbol type: '{s}' has type {s}", .{ +// self.getSymbolName(sym_with_loc), +// sym_type, +// }), +// .notes = notes, +// }); +// } + +pub fn getDebugSymbols(self: *MachO) ?*DebugSymbols { + if (self.d_sym) |*ds| { + return ds; + } else return null; } -fn reportUnhandledSymbolType(self: *MachO, sym_with_loc: SymbolWithLoc) error{OutOfMemory}!void { - const comp = self.base.comp; - const gpa = comp.gpa; - try comp.link_errors.ensureUnusedCapacity(gpa, 1); +pub fn ptraceAttach(self: *MachO, pid: std.os.pid_t) !void { + if (!is_hot_update_compatible) return; - const notes = try gpa.alloc(File.ErrorMsg, 1); - errdefer gpa.free(notes); + const mach_task = try std.os.darwin.machTaskForPid(pid); + log.debug("Mach task for pid {d}: {any}", .{ pid, mach_task }); + self.hot_state.mach_task = mach_task; - const file = sym_with_loc.getFile().?; - notes[0] = .{ .msg = try std.fmt.allocPrint(gpa, "defined in {s}", .{self.objects.items[file].name}) }; + // TODO start exception handler in another thread - const sym = self.getSymbol(sym_with_loc); - const sym_type = if (sym.stab()) - "stab" - else if (sym.indr()) - "indirect" - else if (sym.abs()) - "absolute" - else - unreachable; - - comp.link_errors.appendAssumeCapacity(.{ - .msg = try std.fmt.allocPrint(gpa, "unhandled symbol type: '{s}' has type {s}", .{ - self.getSymbolName(sym_with_loc), - sym_type, - }), - .notes = notes, - }); + // TODO enable ones we register for exceptions + // try std.os.ptrace(std.os.darwin.PT.ATTACHEXC, pid, 0, 0); } -/// Binary search -pub fn bsearch(comptime T: type, haystack: []align(1) const T, predicate: anytype) usize { - if (!@hasDecl(@TypeOf(predicate), "predicate")) - @compileError("Predicate is required to define fn predicate(@This(), T) bool"); +pub fn ptraceDetach(self: *MachO, pid: std.os.pid_t) !void { + if (!is_hot_update_compatible) return; - var min: usize = 0; - var max: usize = haystack.len; - while (min < max) { - const index = (min + max) / 2; - const curr = haystack[index]; - if (predicate.predicate(curr)) { - min = index + 1; - } else { - max = index; - } - } - return min; + _ = pid; + + // TODO stop exception handler + + // TODO see comment in ptraceAttach + // try std.os.ptrace(std.os.darwin.PT.DETACH, pid, 0, 0); + + self.hot_state.mach_task = null; } -/// Linear search -pub fn lsearch(comptime T: type, haystack: []align(1) const T, predicate: anytype) usize { - if (!@hasDecl(@TypeOf(predicate), "predicate")) - @compileError("Predicate is required to define fn predicate(@This(), T) bool"); - - var i: usize = 0; - while (i < haystack.len) : (i += 1) { - if (predicate.predicate(haystack[i])) break; - } - return i; -} - -pub fn logSegments(self: *MachO) void { - log.debug("segments:", .{}); - for (self.segments.items, 0..) |segment, i| { - log.debug(" segment({d}): {s} @{x} ({x}), sizeof({x})", .{ - i, - segment.segName(), - segment.fileoff, - segment.vmaddr, - segment.vmsize, - }); - } -} - -pub fn logSections(self: *MachO) void { - log.debug("sections:", .{}); - for (self.sections.items(.header), 0..) |header, i| { - log.debug(" sect({d}): {s},{s} @{x} ({x}), sizeof({x})", .{ - i + 1, - header.segName(), - header.sectName(), - header.offset, - header.addr, - header.size, - }); - } -} - -fn logSymAttributes(sym: macho.nlist_64, buf: []u8) []const u8 { - if (sym.sect()) { - buf[0] = 's'; - } - if (sym.ext()) { - if (sym.weakDef() or sym.pext()) { - buf[1] = 'w'; - } else { - buf[1] = 'e'; - } - } - if (sym.tentative()) { - buf[2] = 't'; - } - if (sym.undf()) { - buf[3] = 'u'; - } - return buf[0..]; -} - -pub fn logSymtab(self: *MachO) void { - var buf: [4]u8 = undefined; - - const scoped_log = std.log.scoped(.symtab); - - scoped_log.debug("locals:", .{}); - for (self.objects.items, 0..) |object, id| { - scoped_log.debug(" object({d}): {s}", .{ id, object.name }); - if (object.in_symtab == null) continue; - for (object.symtab, 0..) |sym, sym_id| { - @memset(&buf, '_'); - scoped_log.debug(" %{d}: {s} @{x} in sect({d}), {s}", .{ - sym_id, - object.getSymbolName(@as(u32, @intCast(sym_id))), - sym.n_value, - sym.n_sect, - logSymAttributes(sym, &buf), - }); - } - } - scoped_log.debug(" object(-1)", .{}); - for (self.locals.items, 0..) |sym, sym_id| { - if (sym.undf()) continue; - scoped_log.debug(" %{d}: {s} @{x} in sect({d}), {s}", .{ - sym_id, - self.strtab.get(sym.n_strx).?, - sym.n_value, - sym.n_sect, - logSymAttributes(sym, &buf), - }); - } - - scoped_log.debug("exports:", .{}); - for (self.globals.items, 0..) |global, i| { - const sym = self.getSymbol(global); - if (sym.undf()) continue; - if (sym.n_desc == N_DEAD) continue; - if (sym.n_desc == N_BOUNDARY) continue; - scoped_log.debug(" %{d}: {s} @{x} in sect({d}), {s} (def in object({?}))", .{ - i, - self.getSymbolName(global), - sym.n_value, - sym.n_sect, - logSymAttributes(sym, &buf), - global.file, - }); - } - - scoped_log.debug("imports:", .{}); - for (self.globals.items, 0..) |global, i| { - const sym = self.getSymbol(global); - if (!sym.undf()) continue; - if (sym.n_desc == N_DEAD) continue; - if (sym.n_desc == N_BOUNDARY) continue; - const ord = @divTrunc(sym.n_desc, macho.N_SYMBOL_RESOLVER); - scoped_log.debug(" %{d}: {s} @{x} in ord({d}), {s}", .{ - i, - self.getSymbolName(global), - sym.n_value, - ord, - logSymAttributes(sym, &buf), - }); - } - - scoped_log.debug("GOT entries:", .{}); - scoped_log.debug("{}", .{self.got_table}); - - scoped_log.debug("TLV pointers:", .{}); - scoped_log.debug("{}", .{self.tlv_ptr_table}); - - scoped_log.debug("stubs entries:", .{}); - scoped_log.debug("{}", .{self.stub_table}); - - scoped_log.debug("thunks:", .{}); - for (self.thunks.items, 0..) |thunk, i| { - scoped_log.debug(" thunk({d})", .{i}); - const slice = thunk.targets.slice(); - for (slice.items(.tag), slice.items(.target), 0..) |tag, target, j| { - const atom_index = @as(u32, @intCast(thunk.getStartAtomIndex() + j)); - const atom = self.getAtom(atom_index); - const atom_sym = self.getSymbol(atom.getSymbolWithLoc()); - const target_addr = switch (tag) { - .stub => self.getStubsEntryAddress(target).?, - .atom => self.getSymbol(target).n_value, - }; - scoped_log.debug(" {d}@{x} => {s}({s}@{x})", .{ - j, - atom_sym.n_value, - @tagName(tag), - self.getSymbolName(target), - target_addr, - }); - } - } -} - -pub fn logAtoms(self: *MachO) void { - log.debug("atoms:", .{}); - const slice = self.sections.slice(); - for (slice.items(.first_atom_index), 0..) |first_atom_index, sect_id| { - var atom_index = first_atom_index orelse continue; - const header = slice.items(.header)[sect_id]; - - log.debug("{s},{s}", .{ header.segName(), header.sectName() }); - - while (true) { - const atom = self.getAtom(atom_index); - self.logAtom(atom_index, log); - - if (atom.next_index) |next_index| { - atom_index = next_index; - } else break; - } - } -} - -pub fn logAtom(self: *MachO, atom_index: Atom.Index, logger: anytype) void { - if (!build_options.enable_logging) return; - - const atom = self.getAtom(atom_index); - const sym = self.getSymbol(atom.getSymbolWithLoc()); - const sym_name = self.getSymbolName(atom.getSymbolWithLoc()); - logger.debug(" ATOM({d}, %{d}, '{s}') @ {x} (sizeof({x}), alignof({x})) in object({?}) in sect({d})", .{ - atom_index, - atom.sym_index, - sym_name, - sym.n_value, - atom.size, - atom.alignment, - atom.getFile(), - sym.n_sect, - }); - - if (atom.getFile() != null) { - var it = Atom.getInnerSymbolsIterator(self, atom_index); - while (it.next()) |sym_loc| { - const inner = self.getSymbol(sym_loc); - const inner_name = self.getSymbolName(sym_loc); - const offset = Atom.calcInnerSymbolOffset(self, atom_index, sym_loc.sym_index); - - logger.debug(" (%{d}, '{s}') @ {x} ({x})", .{ - sym_loc.sym_index, - inner_name, - inner.n_value, - offset, - }); - } - - if (Atom.getSectionAlias(self, atom_index)) |sym_loc| { - const alias = self.getSymbol(sym_loc); - const alias_name = self.getSymbolName(sym_loc); - - logger.debug(" (%{d}, '{s}') @ {x} ({x})", .{ - sym_loc.sym_index, - alias_name, - alias.n_value, - 0, - }); - } - } -} +const is_hot_update_compatible = switch (builtin.target.os.tag) { + .macos => true, + else => false, +}; const default_entry_symbol_name = "_main"; @@ -5663,17 +1010,6 @@ pub const base_tag: File.Tag = File.Tag.macho; pub const N_DEAD: u16 = @as(u16, @bitCast(@as(i16, -1))); pub const N_BOUNDARY: u16 = @as(u16, @bitCast(@as(i16, -2))); -/// Mode of operation of the linker. -pub const Mode = enum { - /// Incremental mode will preallocate segments/sections and is compatible with - /// watch and HCS modes of operation. - incremental, - /// Zld mode will link relocatables in a traditional, one-shot - /// fashion (default for LLVM backend). It acts as a drop-in replacement for - /// LLD. - zld, -}; - pub const Section = struct { header: macho.section_64, segment_index: u8, @@ -5698,11 +1034,6 @@ pub const Section = struct { free_list: std.ArrayListUnmanaged(Atom.Index) = .{}, }; -const is_hot_update_compatible = switch (builtin.target.os.tag) { - .macos => true, - else => false, -}; - const LazySymbolTable = std.AutoArrayHashMapUnmanaged(InternPool.OptionalDeclIndex, LazySymbolMetadata); const LazySymbolMetadata = struct { @@ -5713,8 +1044,6 @@ const LazySymbolMetadata = struct { data_const_state: State = .unused, }; -const TlvSymbolTable = std.AutoArrayHashMapUnmanaged(SymbolWithLoc, Atom.Index); - const DeclMetadata = struct { atom: Atom.Index, section: u8, @@ -5737,40 +1066,145 @@ const DeclMetadata = struct { } }; -const DeclTable = std.AutoArrayHashMapUnmanaged(InternPool.DeclIndex, DeclMetadata); -const AnonDeclTable = std.AutoHashMapUnmanaged(InternPool.Index, DeclMetadata); -const BindingTable = std.AutoArrayHashMapUnmanaged(Atom.Index, std.ArrayListUnmanaged(Atom.Binding)); -const UnnamedConstTable = std.AutoArrayHashMapUnmanaged(InternPool.DeclIndex, std.ArrayListUnmanaged(Atom.Index)); -const RebaseTable = std.AutoArrayHashMapUnmanaged(Atom.Index, std.ArrayListUnmanaged(u32)); -const RelocationTable = std.AutoArrayHashMapUnmanaged(Atom.Index, std.ArrayListUnmanaged(Relocation)); -const ActionTable = std.AutoHashMapUnmanaged(u32, RelocFlags); - -pub const RelocFlags = packed struct { - add_got: bool = false, - add_stub: bool = false, -}; - -pub const SymbolWithLoc = extern struct { - // Index into the respective symbol table. - sym_index: u32, - - // 0 means it's a synthetic global. - file: u32 = 0, - - pub fn getFile(self: SymbolWithLoc) ?u32 { - if (self.file == 0) return null; - return self.file - 1; - } - - pub fn eql(self: SymbolWithLoc, other: SymbolWithLoc) bool { - return self.file == other.file and self.sym_index == other.sym_index; - } -}; - const HotUpdateState = struct { mach_task: ?std.os.darwin.MachTask = null, }; +pub const SymtabCtx = struct { + ilocal: u32 = 0, + istab: u32 = 0, + iexport: u32 = 0, + iimport: u32 = 0, + nlocals: u32 = 0, + nstabs: u32 = 0, + nexports: u32 = 0, + nimports: u32 = 0, + strsize: u32 = 0, +}; + +pub const null_sym = macho.nlist_64{ + .n_strx = 0, + .n_type = 0, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, +}; + +pub const Platform = struct { + platform: macho.PLATFORM, + version: Version, + + /// Using Apple's ld64 as our blueprint, `min_version` as well as `sdk_version` are set to + /// the extracted minimum platform version. + pub fn fromLoadCommand(lc: macho.LoadCommandIterator.LoadCommand) Platform { + switch (lc.cmd()) { + .BUILD_VERSION => { + const lc_cmd = lc.cast(macho.build_version_command).?; + return .{ + .platform = lc_cmd.platform, + .version = .{ .value = lc_cmd.minos }, + }; + }, + .VERSION_MIN_MACOSX, + .VERSION_MIN_IPHONEOS, + .VERSION_MIN_TVOS, + .VERSION_MIN_WATCHOS, + => { + const lc_cmd = lc.cast(macho.version_min_command).?; + return .{ + .platform = switch (lc.cmd()) { + .VERSION_MIN_MACOSX => .MACOS, + .VERSION_MIN_IPHONEOS => .IOS, + .VERSION_MIN_TVOS => .TVOS, + .VERSION_MIN_WATCHOS => .WATCHOS, + else => unreachable, + }, + .version = .{ .value = lc_cmd.version }, + }; + }, + else => unreachable, + } + } + + pub fn isBuildVersionCompatible(plat: Platform) bool { + inline for (supported_platforms) |sup_plat| { + if (sup_plat[0] == plat.platform) { + return sup_plat[1] <= plat.version.value; + } + } + return false; + } +}; + +pub const Version = struct { + value: u32, + + pub fn major(v: Version) u16 { + return @as(u16, @truncate(v.value >> 16)); + } + + pub fn minor(v: Version) u8 { + return @as(u8, @truncate(v.value >> 8)); + } + + pub fn patch(v: Version) u8 { + return @as(u8, @truncate(v.value)); + } + + pub fn parse(raw: []const u8) ?Version { + var parsed: [3]u16 = [_]u16{0} ** 3; + var count: usize = 0; + var it = std.mem.splitAny(u8, raw, "."); + while (it.next()) |comp| { + if (count >= 3) return null; + parsed[count] = std.fmt.parseInt(u16, comp, 10) catch return null; + count += 1; + } + if (count == 0) return null; + const maj = parsed[0]; + const min = std.math.cast(u8, parsed[1]) orelse return null; + const pat = std.math.cast(u8, parsed[2]) orelse return null; + return Version.new(maj, min, pat); + } + + pub fn new(maj: u16, min: u8, pat: u8) Version { + return .{ .value = (@as(u32, @intCast(maj)) << 16) | (@as(u32, @intCast(min)) << 8) | pat }; + } + + pub fn format( + v: Version, + comptime unused_fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, + ) !void { + _ = unused_fmt_string; + _ = options; + try writer.print("{d}.{d}.{d}", .{ + v.major(), + v.minor(), + v.patch(), + }); + } +}; + +const SupportedPlatforms = struct { + macho.PLATFORM, // Platform identifier + u32, // Min platform version for which to emit LC_BUILD_VERSION + u32, // Min supported platform version + ?[]const u8, // Env var to look for +}; + +// Source: https://github.com/apple-oss-distributions/ld64/blob/59a99ab60399c5e6c49e6945a9e1049c42b71135/src/ld/PlatformSupport.cpp#L52 +const supported_platforms = [_]SupportedPlatforms{ + .{ .MACOS, 0xA0E00, 0xA0800, "MACOSX_DEPLOYMENT_TARGET" }, + .{ .IOS, 0xC0000, 0x70000, "IPHONEOS_DEPLOYMENT_TARGET" }, + .{ .TVOS, 0xC0000, 0x70000, "TVOS_DEPLOYMENT_TARGET" }, + .{ .WATCHOS, 0x50000, 0x20000, "WATCHOS_DEPLOYMENT_TARGET" }, + .{ .IOSSIMULATOR, 0xD0000, 0x80000, null }, + .{ .TVOSSIMULATOR, 0xD0000, 0x80000, null }, + .{ .WATCHOSSIMULATOR, 0x60000, 0x20000, null }, +}; + /// When allocating, the ideal_capacity is calculated by /// actual_capacity + (actual_capacity / ideal_factor) const ideal_factor = 3; @@ -5812,20 +1246,20 @@ const fat = @import("MachO/fat.zig"); const link = @import("../link.zig"); const llvm_backend = @import("../codegen/llvm.zig"); const load_commands = @import("MachO/load_commands.zig"); -const stubs = @import("MachO/stubs.zig"); const tapi = @import("tapi.zig"); const target_util = @import("../target.zig"); const thunks = @import("MachO/thunks.zig"); const trace = @import("../tracy.zig").trace; -const zld = @import("MachO/zld.zig"); const Air = @import("../Air.zig"); +const Alignment = Atom.Alignment; const Allocator = mem.Allocator; const Archive = @import("MachO/Archive.zig"); pub const Atom = @import("MachO/Atom.zig"); const Cache = std.Build.Cache; const CodeSignature = @import("MachO/CodeSignature.zig"); const Compilation = @import("../Compilation.zig"); +pub const DebugSymbols = @import("MachO/DebugSymbols.zig"); const Dwarf = File.Dwarf; const DwarfInfo = @import("MachO/DwarfInfo.zig"); const Dylib = @import("MachO/Dylib.zig"); @@ -5837,17 +1271,9 @@ const LlvmObject = @import("../codegen/llvm.zig").Object; const Md5 = std.crypto.hash.Md5; const Module = @import("../Module.zig"); const InternPool = @import("../InternPool.zig"); -const Platform = load_commands.Platform; const Relocation = @import("MachO/Relocation.zig"); const StringTable = @import("StringTable.zig"); const TableSection = @import("table_section.zig").TableSection; -const Trie = @import("MachO/Trie.zig"); const Type = @import("../type.zig").Type; const TypedValue = @import("../TypedValue.zig"); const Value = @import("../value.zig").Value; -const Alignment = Atom.Alignment; - -pub const DebugSymbols = @import("MachO/DebugSymbols.zig"); -pub const Bind = @import("MachO/dyld_info/bind.zig").Bind(*const MachO, SymbolWithLoc); -pub const LazyBind = @import("MachO/dyld_info/bind.zig").LazyBind(*const MachO, SymbolWithLoc); -pub const Rebase = @import("MachO/dyld_info/Rebase.zig"); diff --git a/src/link/MachO/Atom.zig b/src/link/MachO/Atom.zig index d734faa487..64e7b70a0d 100644 --- a/src/link/MachO/Atom.zig +++ b/src/link/MachO/Atom.zig @@ -449,7 +449,7 @@ fn resolveRelocInner( if (rel.getTargetSymbol(macho_file).flags.got) { try writer.writeInt(i32, @intCast(G + A - P), .little); } else { - try relaxGotLoad(code[rel_offset - 3 ..]); + try x86_64.relaxGotLoad(code[rel_offset - 3 ..]); try writer.writeInt(i32, @intCast(S + A - P), .little); } }, @@ -463,7 +463,7 @@ fn resolveRelocInner( const S_: i64 = @intCast(sym.getTlvPtrAddress(macho_file)); try writer.writeInt(i32, @intCast(S_ + A - P), .little); } else { - try relaxTlv(code[rel_offset - 3 ..]); + try x86_64.relaxTlv(code[rel_offset - 3 ..]); try writer.writeInt(i32, @intCast(S + A - P), .little); } }, @@ -631,43 +631,51 @@ fn resolveRelocInner( } } -fn relaxGotLoad(code: []u8) error{RelaxFail}!void { - const old_inst = disassemble(code) orelse return error.RelaxFail; - switch (old_inst.encoding.mnemonic) { - .mov => { - const inst = Instruction.new(old_inst.prefix, .lea, &old_inst.ops) catch return error.RelaxFail; - relocs_log.debug(" relaxing {} => {}", .{ old_inst.encoding, inst.encoding }); - encode(&.{inst}, code) catch return error.RelaxFail; - }, - else => return error.RelaxFail, +const x86_64 = struct { + fn relaxGotLoad(code: []u8) error{RelaxFail}!void { + const old_inst = disassemble(code) orelse return error.RelaxFail; + switch (old_inst.encoding.mnemonic) { + .mov => { + const inst = Instruction.new(old_inst.prefix, .lea, &old_inst.ops) catch return error.RelaxFail; + relocs_log.debug(" relaxing {} => {}", .{ old_inst.encoding, inst.encoding }); + encode(&.{inst}, code) catch return error.RelaxFail; + }, + else => return error.RelaxFail, + } } -} -fn relaxTlv(code: []u8) error{RelaxFail}!void { - const old_inst = disassemble(code) orelse return error.RelaxFail; - switch (old_inst.encoding.mnemonic) { - .mov => { - const inst = Instruction.new(old_inst.prefix, .lea, &old_inst.ops) catch return error.RelaxFail; - relocs_log.debug(" relaxing {} => {}", .{ old_inst.encoding, inst.encoding }); - encode(&.{inst}, code) catch return error.RelaxFail; - }, - else => return error.RelaxFail, + fn relaxTlv(code: []u8) error{RelaxFail}!void { + const old_inst = disassemble(code) orelse return error.RelaxFail; + switch (old_inst.encoding.mnemonic) { + .mov => { + const inst = Instruction.new(old_inst.prefix, .lea, &old_inst.ops) catch return error.RelaxFail; + relocs_log.debug(" relaxing {} => {}", .{ old_inst.encoding, inst.encoding }); + encode(&.{inst}, code) catch return error.RelaxFail; + }, + else => return error.RelaxFail, + } } -} -fn disassemble(code: []const u8) ?Instruction { - var disas = Disassembler.init(code); - const inst = disas.next() catch return null; - return inst; -} - -fn encode(insts: []const Instruction, code: []u8) !void { - var stream = std.io.fixedBufferStream(code); - const writer = stream.writer(); - for (insts) |inst| { - try inst.encode(writer, .{}); + fn disassemble(code: []const u8) ?Instruction { + var disas = Disassembler.init(code); + const inst = disas.next() catch return null; + return inst; } -} + + fn encode(insts: []const Instruction, code: []u8) !void { + var stream = std.io.fixedBufferStream(code); + const writer = stream.writer(); + for (insts) |inst| { + try inst.encode(writer, .{}); + } + } + + const bits = @import("../../arch/x86_64/bits.zig"); + const encoder = @import("../../arch/x86_64/encoder.zig"); + const Disassembler = @import("../../arch/x86_64/Disassembler.zig"); + const Immediate = bits.Immediate; + const Instruction = encoder.Instruction; +}; pub fn calcNumRelocs(self: Atom, macho_file: *MachO) u32 { switch (macho_file.options.cpu_arch.?) { @@ -879,24 +887,22 @@ pub const Loc = struct { len: usize = 0, }; -const aarch64 = @import("../aarch64.zig"); +pub const Alignment = @import("../../InternPool.zig").Alignment; + +const aarch64 = @import("../../arch/aarch64/bits.zig"); const assert = std.debug.assert; const bind = @import("dyld_info/bind.zig"); -const dis_x86_64 = @import("dis_x86_64"); const macho = std.macho; const math = std.math; const mem = std.mem; const log = std.log.scoped(.link); const relocs_log = std.log.scoped(.relocs); const std = @import("std"); -const trace = @import("../tracy.zig").trace; +const trace = @import("../../tracy.zig").trace; const Allocator = mem.Allocator; const Atom = @This(); -const Disassembler = dis_x86_64.Disassembler; const File = @import("file.zig").File; -const Instruction = dis_x86_64.Instruction; -const Immediate = dis_x86_64.Immediate; const MachO = @import("../MachO.zig"); const Object = @import("Object.zig"); const Relocation = @import("Relocation.zig"); diff --git a/src/link/MachO/CodeSignature.zig b/src/link/MachO/CodeSignature.zig index ce142b4376..54d83d4530 100644 --- a/src/link/MachO/CodeSignature.zig +++ b/src/link/MachO/CodeSignature.zig @@ -11,7 +11,6 @@ const Allocator = mem.Allocator; const Hasher = @import("hasher.zig").ParallelHasher; const MachO = @import("../MachO.zig"); const Sha256 = std.crypto.hash.sha2.Sha256; -const Zld = @import("../Zld.zig"); const hash_size = Sha256.digest_length; diff --git a/src/link/MachO/DwarfInfo.zig b/src/link/MachO/DwarfInfo.zig index c3f8d235ce..8b32faa567 100644 --- a/src/link/MachO/DwarfInfo.zig +++ b/src/link/MachO/DwarfInfo.zig @@ -461,7 +461,7 @@ const leb = std.leb; const log = std.log.scoped(.link); const mem = std.mem; const std = @import("std"); -const trace = @import("../tracy.zig").trace; +const trace = @import("../../tracy.zig").trace; const Allocator = mem.Allocator; const DwarfInfo = @This(); diff --git a/src/link/MachO/Dylib.zig b/src/link/MachO/Dylib.zig index 4944c4d5ef..f28e4eb08d 100644 --- a/src/link/MachO/Dylib.zig +++ b/src/link/MachO/Dylib.zig @@ -12,7 +12,7 @@ symbols: std.ArrayListUnmanaged(Symbol.Index) = .{}, dependents: std.ArrayListUnmanaged(Id) = .{}, rpaths: std.StringArrayHashMapUnmanaged(void) = .{}, umbrella: File.Index = 0, -platform: ?MachO.Options.Platform = null, +platform: ?MachO.Platform = null, needed: bool, weak: bool, @@ -815,7 +815,7 @@ const macho = std.macho; const math = std.math; const mem = std.mem; const tapi = @import("../tapi.zig"); -const trace = @import("../tracy.zig").trace; +const trace = @import("../../tracy.zig").trace; const std = @import("std"); const Allocator = mem.Allocator; diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index deb17ba80b..0ecf3b4d45 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -12,7 +12,7 @@ strtab: []const u8 = &[0]u8{}, symbols: std.ArrayListUnmanaged(Symbol.Index) = .{}, atoms: std.ArrayListUnmanaged(Atom.Index) = .{}, -platform: ?MachO.Options.Platform = null, +platform: ?MachO.Platform = null, dwarf_info: ?DwarfInfo = null, stab_files: std.ArrayListUnmanaged(StabFile) = .{}, @@ -2075,7 +2075,7 @@ const log = std.log.scoped(.link); const macho = std.macho; const math = std.math; const mem = std.mem; -const trace = @import("../tracy.zig").trace; +const trace = @import("../../tracy.zig").trace; const std = @import("std"); const Allocator = mem.Allocator; @@ -2088,6 +2088,5 @@ const LoadCommandIterator = macho.LoadCommandIterator; const MachO = @import("../MachO.zig"); const Object = @This(); const Relocation = @import("Relocation.zig"); -const StringTable = @import("../strtab.zig").StringTable; const Symbol = @import("Symbol.zig"); const UnwindInfo = @import("UnwindInfo.zig"); diff --git a/src/link/MachO/UnwindInfo.zig b/src/link/MachO/UnwindInfo.zig index 1d0bfc1ff9..33c07915dc 100644 --- a/src/link/MachO/UnwindInfo.zig +++ b/src/link/MachO/UnwindInfo.zig @@ -670,7 +670,7 @@ const log = std.log.scoped(.link); const macho = std.macho; const math = std.math; const mem = std.mem; -const trace = @import("../tracy.zig").trace; +const trace = @import("../../tracy.zig").trace; const Allocator = mem.Allocator; const Atom = @import("Atom.zig"); diff --git a/src/link/MachO/dead_strip.zig b/src/link/MachO/dead_strip.zig index 8d2dba53c6..8097d5f710 100644 --- a/src/link/MachO/dead_strip.zig +++ b/src/link/MachO/dead_strip.zig @@ -193,7 +193,7 @@ const log = std.log.scoped(.dead_strip); const macho = std.macho; const math = std.math; const mem = std.mem; -const trace = @import("../tracy.zig").trace; +const trace = @import("../../tracy.zig").trace; const track_live_log = std.log.scoped(.dead_strip_track_live); const std = @import("std"); diff --git a/src/link/MachO/eh_frame.zig b/src/link/MachO/eh_frame.zig index 6ca7a5cd2a..716a66d4fa 100644 --- a/src/link/MachO/eh_frame.zig +++ b/src/link/MachO/eh_frame.zig @@ -559,7 +559,7 @@ const macho = std.macho; const math = std.math; const mem = std.mem; const std = @import("std"); -const trace = @import("../tracy.zig").trace; +const trace = @import("../../tracy.zig").trace; const Allocator = std.mem.Allocator; const Atom = @import("Atom.zig"); diff --git a/src/link/MachO/hasher.zig b/src/link/MachO/hasher.zig index 95faaf3a92..57c8acd35e 100644 --- a/src/link/MachO/hasher.zig +++ b/src/link/MachO/hasher.zig @@ -67,7 +67,7 @@ const assert = std.debug.assert; const fs = std.fs; const mem = std.mem; const std = @import("std"); -const trace = @import("../tracy.zig").trace; +const trace = @import("../../tracy.zig").trace; const Allocator = mem.Allocator; const ThreadPool = std.Thread.Pool; diff --git a/src/link/MachO/thunks.zig b/src/link/MachO/thunks.zig index 6593fb6a1b..7f0dc56685 100644 --- a/src/link/MachO/thunks.zig +++ b/src/link/MachO/thunks.zig @@ -160,14 +160,14 @@ const max_distance = (1 << (jump_bits - 1)); /// and assume margin to be 5MiB. const max_allowed_distance = max_distance - 0x500_000; -const aarch64 = @import("../aarch64.zig"); +const aarch64 = @import("../../arch/aarch64/bits.zig"); const assert = std.debug.assert; const log = std.log.scoped(.link); const macho = std.macho; const math = std.math; const mem = std.mem; const std = @import("std"); -const trace = @import("../tracy.zig").trace; +const trace = @import("../../tracy.zig").trace; const Allocator = mem.Allocator; const Atom = @import("Atom.zig"); diff --git a/src/link/MachO/uuid.zig b/src/link/MachO/uuid.zig index 0248acc091..ca66e129d2 100644 --- a/src/link/MachO/uuid.zig +++ b/src/link/MachO/uuid.zig @@ -47,7 +47,7 @@ inline fn conform(out: *[Md5.digest_length]u8) void { const fs = std.fs; const mem = std.mem; const std = @import("std"); -const trace = @import("../tracy.zig").trace; +const trace = @import("../../tracy.zig").trace; const Allocator = mem.Allocator; const Md5 = std.crypto.hash.Md5; From 7588eeccea02b155f934a453cc47d8641686ab22 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Wed, 10 Jan 2024 09:55:40 +0100 Subject: [PATCH 003/133] macho: re-enable --verbose-link --- src/link/MachO.zig | 444 +++++++++++++++++++++++++++++++++++---------- 1 file changed, 345 insertions(+), 99 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 895ec1cc2e..1e41458c70 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -1,5 +1,4 @@ base: File, -entry_name: ?[]const u8, /// If this is not null, an object file is created by LLVM and emitted to zcu_object_sub_path. llvm_object: ?*LlvmObject = null, @@ -47,9 +46,9 @@ atoms: std.ArrayListUnmanaged(Atom) = .{}, sdk_layout: ?SdkLayout, /// Size of the __PAGEZERO segment. -pagezero_vmsize: u64, +pagezero_vmsize: ?u64, /// Minimum space for future expansion of the load commands. -headerpad_size: u32, +headerpad_size: ?u32, /// Set enough space as if all paths were MATPATHLEN. headerpad_max_install_names: bool, /// Remove dylibs that are unreachable by the entry point or exported symbols. @@ -61,6 +60,8 @@ install_name: ?[]const u8, /// Path to entitlements file. entitlements: ?[]const u8, compatibility_version: ?std.SemanticVersion, +/// Entry name +entry_name: ?[]const u8, /// Hot-code swapping state. hot_state: if (is_hot_update_compatible) HotUpdateState else struct {} = .{}, @@ -128,8 +129,8 @@ pub fn createEmpty( .build_id = options.build_id, .rpath_list = options.rpath_list, }, - .pagezero_vmsize = options.pagezero_size orelse default_pagezero_vmsize, - .headerpad_size = options.headerpad_size orelse default_headerpad_size, + .pagezero_vmsize = options.pagezero_size, + .headerpad_size = options.headerpad_size, .headerpad_max_install_names = options.headerpad_max_install_names, .dead_strip_dylibs = options.dead_strip_dylibs, .sdk_layout = options.darwin_sdk_layout, @@ -249,9 +250,173 @@ pub fn flushModule(self: *MachO, arena: Allocator, prog_node: *std.Progress.Node /// --verbose-link output fn dumpArgv(self: *MachO, comp: *Compilation) !void { - _ = self; - _ = comp; - @panic("TODO dumpArgv"); + const gpa = self.base.comp.gpa; + var arena_allocator = std.heap.ArenaAllocator.init(gpa); + defer arena_allocator.deinit(); + const arena = arena_allocator.allocator(); + + const target = self.base.comp.root_mod.resolved_target.result; + const directory = self.base.emit.directory; + const full_out_path = try directory.join(arena, &[_][]const u8{self.base.emit.sub_path}); + const module_obj_path: ?[]const u8 = if (self.base.zcu_object_sub_path) |path| blk: { + if (fs.path.dirname(full_out_path)) |dirname| { + break :blk try fs.path.join(arena, &.{ dirname, path }); + } else { + break :blk path; + } + } else null; + + var argv = std.ArrayList([]const u8).init(arena); + + try argv.append("zig"); + + if (self.base.isStaticLib()) { + try argv.append("ar"); + } else { + try argv.append("ld"); + } + + if (self.base.isObject()) { + try argv.append("-r"); + } + + try argv.append("-o"); + try argv.append(full_out_path); + + if (self.base.isRelocatable()) { + for (comp.objects) |obj| { + try argv.append(obj.path); + } + + for (comp.c_object_table.keys()) |key| { + try argv.append(key.status.success.object_path); + } + + if (module_obj_path) |p| { + try argv.append(p); + } + } else { + if (!self.base.isStatic()) { + try argv.append("-dynamic"); + } + + if (self.base.isDynLib()) { + try argv.append("-dylib"); + + if (self.install_name) |install_name| { + try argv.append("-install_name"); + try argv.append(install_name); + } + } + + { + const platform = Platform.fromTarget(target); + try argv.append("-platform_version"); + try argv.append(@tagName(platform.os_tag)); + try argv.append(try std.fmt.allocPrint(arena, "{}", .{platform.version})); + + const sdk_version: ?std.SemanticVersion = self.inferSdkVersion(); + if (sdk_version) |ver| { + try argv.append(try std.fmt.allocPrint(arena, "{d}.{d}", .{ ver.major, ver.minor })); + } else { + try argv.append(try std.fmt.allocPrint(arena, "{}", .{platform.version})); + } + } + + if (comp.sysroot) |syslibroot| { + try argv.append("-syslibroot"); + try argv.append(syslibroot); + } + + for (self.base.rpath_list) |rpath| { + try argv.append("-rpath"); + try argv.append(rpath); + } + + if (self.pagezero_vmsize) |size| { + try argv.append("-pagezero_size"); + try argv.append(try std.fmt.allocPrint(arena, "0x{x}", .{size})); + } + + if (self.headerpad_size) |size| { + try argv.append("-headerpad_size"); + try argv.append(try std.fmt.allocPrint(arena, "0x{x}", .{size})); + } + + if (self.headerpad_max_install_names) { + try argv.append("-headerpad_max_install_names"); + } + + if (self.base.gc_sections) { + try argv.append("-dead_strip"); + } + + if (self.dead_strip_dylibs) { + try argv.append("-dead_strip_dylibs"); + } + + if (self.entry_name) |entry_name| { + try argv.appendSlice(&.{ "-e", entry_name }); + } + + for (comp.objects) |obj| { + // TODO: verify this + if (obj.must_link) { + try argv.append("-force_load"); + } + try argv.append(obj.path); + } + + for (comp.c_object_table.keys()) |key| { + try argv.append(key.status.success.object_path); + } + + if (module_obj_path) |p| { + try argv.append(p); + } + + if (comp.compiler_rt_lib) |lib| try argv.append(lib.full_object_path); + if (comp.compiler_rt_obj) |obj| try argv.append(obj.full_object_path); + + if (comp.config.link_libcpp) { + try argv.append(comp.libcxxabi_static_lib.?.full_object_path); + try argv.append(comp.libcxx_static_lib.?.full_object_path); + } + + try argv.append("-o"); + try argv.append(full_out_path); + + try argv.append("-lSystem"); + + for (comp.system_libs.keys()) |l_name| { + const info = comp.system_libs.get(l_name).?; + const arg = if (info.needed) + try std.fmt.allocPrint(arena, "-needed-l{s}", .{l_name}) + else if (info.weak) + try std.fmt.allocPrint(arena, "-weak-l{s}", .{l_name}) + else + try std.fmt.allocPrint(arena, "-l{s}", .{l_name}); + try argv.append(arg); + } + + for (self.frameworks) |framework| { + const name = std.fs.path.stem(framework.path); + const arg = if (framework.needed) + try std.fmt.allocPrint(arena, "-needed_framework {s}", .{name}) + else if (framework.weak) + try std.fmt.allocPrint(arena, "-weak_framework {s}", .{name}) + else + try std.fmt.allocPrint(arena, "-framework {s}", .{name}); + try argv.append(arg); + } + + if (self.base.isDynLib() and self.base.allow_shlib_undefined) { + try argv.append("-undefined"); + try argv.append("dynamic_lookup"); + } + } + + Compilation.dump_argv(argv.items); } /// XNU starting with Big Sur running on arm64 is caching inodes of running binaries. @@ -1034,38 +1199,6 @@ pub const Section = struct { free_list: std.ArrayListUnmanaged(Atom.Index) = .{}, }; -const LazySymbolTable = std.AutoArrayHashMapUnmanaged(InternPool.OptionalDeclIndex, LazySymbolMetadata); - -const LazySymbolMetadata = struct { - const State = enum { unused, pending_flush, flushed }; - text_atom: Atom.Index = undefined, - data_const_atom: Atom.Index = undefined, - text_state: State = .unused, - data_const_state: State = .unused, -}; - -const DeclMetadata = struct { - atom: Atom.Index, - section: u8, - /// A list of all exports aliases of this Decl. - /// TODO do we actually need this at all? - exports: std.ArrayListUnmanaged(u32) = .{}, - - fn getExport(m: DeclMetadata, macho_file: *const MachO, name: []const u8) ?u32 { - for (m.exports.items) |exp| { - if (mem.eql(u8, name, macho_file.getSymbolName(.{ .sym_index = exp }))) return exp; - } - return null; - } - - fn getExportPtr(m: *DeclMetadata, macho_file: *MachO, name: []const u8) ?*u32 { - for (m.exports.items) |*exp| { - if (mem.eql(u8, name, macho_file.getSymbolName(.{ .sym_index = exp.* }))) return exp; - } - return null; - } -}; - const HotUpdateState = struct { mach_task: ?std.os.darwin.MachTask = null, }; @@ -1091,18 +1224,32 @@ pub const null_sym = macho.nlist_64{ }; pub const Platform = struct { - platform: macho.PLATFORM, - version: Version, + os_tag: std.Target.Os.Tag, + abi: std.Target.Abi, + version: std.SemanticVersion, /// Using Apple's ld64 as our blueprint, `min_version` as well as `sdk_version` are set to /// the extracted minimum platform version. pub fn fromLoadCommand(lc: macho.LoadCommandIterator.LoadCommand) Platform { switch (lc.cmd()) { .BUILD_VERSION => { - const lc_cmd = lc.cast(macho.build_version_command).?; + const cmd = lc.cast(macho.build_version_command).?; return .{ - .platform = lc_cmd.platform, - .version = .{ .value = lc_cmd.minos }, + .os_tag = switch (cmd.platform) { + .MACOS => .macos, + .IOS, .IOSSIMULATOR => .ios, + .TVOS, .TVOSSIMULATOR => .tvos, + .WATCHOS, .WATCHOSSIMULATOR => .watchos, + else => @panic("TODO"), + }, + .abi = switch (cmd.platform) { + .IOSSIMULATOR, + .TVOSSIMULATOR, + .WATCHOSSIMULATOR, + => .simulator, + else => .none, + }, + .version = appleVersionToSemanticVersion(cmd.minos), }; }, .VERSION_MIN_MACOSX, @@ -1110,22 +1257,45 @@ pub const Platform = struct { .VERSION_MIN_TVOS, .VERSION_MIN_WATCHOS, => { - const lc_cmd = lc.cast(macho.version_min_command).?; + const cmd = lc.cast(macho.version_min_command).?; return .{ - .platform = switch (lc.cmd()) { - .VERSION_MIN_MACOSX => .MACOS, - .VERSION_MIN_IPHONEOS => .IOS, - .VERSION_MIN_TVOS => .TVOS, - .VERSION_MIN_WATCHOS => .WATCHOS, + .os_tag = switch (lc.cmd()) { + .VERSION_MIN_MACOSX => .macos, + .VERSION_MIN_IPHONEOS => .ios, + .VERSION_MIN_TVOS => .tvos, + .VERSION_MIN_WATCHOS => .watchos, else => unreachable, }, - .version = .{ .value = lc_cmd.version }, + .abi = .none, + .version = appleVersionToSemanticVersion(cmd.version), }; }, else => unreachable, } } + pub fn fromTarget(target: std.Target) Platform { + return .{ + .os_tag = target.os.tag, + .abi = target.abi, + .version = target.os.version_range.semver.min, + }; + } + + pub fn toAppleVersion(plat: Platform) u32 { + return semanticVersionToAppleVersion(plat.version); + } + + pub fn toApplePlatform(plat: Platform) macho.PLATFORM { + return switch (plat.os_tag) { + .macos => .MACOS, + .ios => if (plat.abi == .simulator) .IOSSIMULATOR else .IOS, + .tvos => if (plat.abi == .simulator) .TVOSSIMULATOR else .TVOS, + .watchos => if (plat.abi == .simulator) .WATCHOSSIMULATOR else .WATCHOS, + else => unreachable, + }; + } + pub fn isBuildVersionCompatible(plat: Platform) bool { inline for (supported_platforms) |sup_plat| { if (sup_plat[0] == plat.platform) { @@ -1134,76 +1304,152 @@ pub const Platform = struct { } return false; } -}; -pub const Version = struct { - value: u32, - - pub fn major(v: Version) u16 { - return @as(u16, @truncate(v.value >> 16)); - } - - pub fn minor(v: Version) u8 { - return @as(u8, @truncate(v.value >> 8)); - } - - pub fn patch(v: Version) u8 { - return @as(u8, @truncate(v.value)); - } - - pub fn parse(raw: []const u8) ?Version { - var parsed: [3]u16 = [_]u16{0} ** 3; - var count: usize = 0; - var it = std.mem.splitAny(u8, raw, "."); - while (it.next()) |comp| { - if (count >= 3) return null; - parsed[count] = std.fmt.parseInt(u16, comp, 10) catch return null; - count += 1; + pub fn isVersionMinCompatible(plat: Platform) bool { + inline for (supported_platforms) |sup_plat| { + if (sup_plat[0] == plat.os_tag and sup_plat[1] == plat.abi) { + return sup_plat[3] <= plat.toAppleVersion(); + } } - if (count == 0) return null; - const maj = parsed[0]; - const min = std.math.cast(u8, parsed[1]) orelse return null; - const pat = std.math.cast(u8, parsed[2]) orelse return null; - return Version.new(maj, min, pat); + return false; } - pub fn new(maj: u16, min: u8, pat: u8) Version { - return .{ .value = (@as(u32, @intCast(maj)) << 16) | (@as(u32, @intCast(min)) << 8) | pat }; + pub fn fmtTarget(plat: Platform, cpu_arch: std.Target.Cpu.Arch) std.fmt.Formatter(formatTarget) { + return .{ .data = .{ .platform = plat, .cpu_arch = cpu_arch } }; } - pub fn format( - v: Version, + const FmtCtx = struct { + platform: Platform, + cpu_arch: std.Target.Cpu.Arch, + }; + + pub fn formatTarget( + ctx: FmtCtx, comptime unused_fmt_string: []const u8, options: std.fmt.FormatOptions, writer: anytype, ) !void { _ = unused_fmt_string; _ = options; - try writer.print("{d}.{d}.{d}", .{ - v.major(), - v.minor(), - v.patch(), - }); + try writer.print("{s}-{s}", .{ @tagName(ctx.cpu_arch), @tagName(ctx.platform.os_tag) }); + if (ctx.platform.abi != .none) { + try writer.print("-{s}", .{@tagName(ctx.platform.abi)}); + } + } + + /// Caller owns the memory. + pub fn allocPrintTarget(plat: Platform, gpa: Allocator, cpu_arch: std.Target.Cpu.Arch) error{OutOfMemory}![]u8 { + var buffer = std.ArrayList(u8).init(gpa); + defer buffer.deinit(); + try buffer.writer().print("{}", .{plat.fmtTarget(cpu_arch)}); + return buffer.toOwnedSlice(); + } + + pub fn eqlTarget(plat: Platform, other: Platform) bool { + return plat.os_tag == other.os_tag and plat.abi == other.abi; } }; const SupportedPlatforms = struct { - macho.PLATFORM, // Platform identifier + std.Target.Os.Tag, + std.Target.Abi, u32, // Min platform version for which to emit LC_BUILD_VERSION u32, // Min supported platform version - ?[]const u8, // Env var to look for }; // Source: https://github.com/apple-oss-distributions/ld64/blob/59a99ab60399c5e6c49e6945a9e1049c42b71135/src/ld/PlatformSupport.cpp#L52 +// zig fmt: off const supported_platforms = [_]SupportedPlatforms{ - .{ .MACOS, 0xA0E00, 0xA0800, "MACOSX_DEPLOYMENT_TARGET" }, - .{ .IOS, 0xC0000, 0x70000, "IPHONEOS_DEPLOYMENT_TARGET" }, - .{ .TVOS, 0xC0000, 0x70000, "TVOS_DEPLOYMENT_TARGET" }, - .{ .WATCHOS, 0x50000, 0x20000, "WATCHOS_DEPLOYMENT_TARGET" }, - .{ .IOSSIMULATOR, 0xD0000, 0x80000, null }, - .{ .TVOSSIMULATOR, 0xD0000, 0x80000, null }, - .{ .WATCHOSSIMULATOR, 0x60000, 0x20000, null }, + .{ .macos, .none, 0xA0E00, 0xA0800 }, + .{ .ios, .none, 0xC0000, 0x70000 }, + .{ .tvos, .none, 0xC0000, 0x70000 }, + .{ .watchos, .none, 0x50000, 0x20000 }, + .{ .ios, .simulator, 0xD0000, 0x80000 }, + .{ .tvos, .simulator, 0xD0000, 0x80000 }, + .{ .watchos, .simulator, 0x60000, 0x20000 }, }; +// zig fmt: on + +inline fn semanticVersionToAppleVersion(version: std.SemanticVersion) u32 { + const major = version.major; + const minor = version.minor; + const patch = version.patch; + return (@as(u32, @intCast(major)) << 16) | (@as(u32, @intCast(minor)) << 8) | @as(u32, @intCast(patch)); +} + +pub inline fn appleVersionToSemanticVersion(version: u32) std.SemanticVersion { + return .{ + .major = @as(u16, @truncate(version >> 16)), + .minor = @as(u8, @truncate(version >> 8)), + .patch = @as(u8, @truncate(version)), + }; +} + +fn inferSdkVersion(self: *MachO) ?std.SemanticVersion { + const comp = self.base.comp; + const gpa = comp.gpa; + + var arena_allocator = std.heap.ArenaAllocator.init(gpa); + defer arena_allocator.deinit(); + const arena = arena_allocator.allocator(); + + const sdk_layout = self.sdk_layout orelse return null; + const sdk_dir = switch (sdk_layout) { + .sdk => comp.sysroot.?, + .vendored => std.fs.path.join(arena, &.{ comp.zig_lib_directory.path.?, "libc", "darwin" }) catch return null, + }; + if (readSdkVersionFromSettings(arena, sdk_dir)) |ver| { + return parseSdkVersion(ver); + } else |_| { + // Read from settings should always succeed when vendored. + if (sdk_layout == .vendored) @panic("zig installation bug: unable to parse SDK version"); + } + + // infer from pathname + const stem = std.fs.path.stem(sdk_dir); + const start = for (stem, 0..) |c, i| { + if (std.ascii.isDigit(c)) break i; + } else stem.len; + const end = for (stem[start..], start..) |c, i| { + if (std.ascii.isDigit(c) or c == '.') continue; + break i; + } else stem.len; + return parseSdkVersion(stem[start..end]); +} + +// Official Apple SDKs ship with a `SDKSettings.json` located at the top of SDK fs layout. +// Use property `MinimalDisplayName` to determine version. +// The file/property is also available with vendored libc. +fn readSdkVersionFromSettings(arena: Allocator, dir: []const u8) ![]const u8 { + const sdk_path = try std.fs.path.join(arena, &.{ dir, "SDKSettings.json" }); + const contents = try std.fs.cwd().readFileAlloc(arena, sdk_path, std.math.maxInt(u16)); + const parsed = try std.json.parseFromSlice(std.json.Value, arena, contents, .{}); + if (parsed.value.object.get("MinimalDisplayName")) |ver| return ver.string; + return error.SdkVersionFailure; +} + +// Versions reported by Apple aren't exactly semantically valid as they usually omit +// the patch component, so we parse SDK value by hand. +fn parseSdkVersion(raw: []const u8) ?std.SemanticVersion { + var parsed: std.SemanticVersion = .{ + .major = 0, + .minor = 0, + .patch = 0, + }; + + const parseNext = struct { + fn parseNext(it: anytype) ?u16 { + const nn = it.next() orelse return null; + return std.fmt.parseInt(u16, nn, 10) catch null; + } + }.parseNext; + + var it = std.mem.splitAny(u8, raw, "."); + parsed.major = parseNext(&it) orelse return null; + parsed.minor = parseNext(&it) orelse return null; + parsed.patch = parseNext(&it) orelse 0; + return parsed; +} /// When allocating, the ideal_capacity is calculated by /// actual_capacity + (actual_capacity / ideal_factor) From 0c171afab003f7d7dfde8491141e00cac7e99c21 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Wed, 10 Jan 2024 19:39:40 +0100 Subject: [PATCH 004/133] macho: parse an input object file! --- src/link/MachO.zig | 939 +++++++++++++++++++++++----------- src/link/MachO/Atom.zig | 2 +- src/link/MachO/Dylib.zig | 2 +- src/link/MachO/Object.zig | 234 +++++---- src/link/MachO/Symbol.zig | 2 +- src/link/MachO/UnwindInfo.zig | 2 +- src/link/MachO/eh_frame.zig | 17 +- src/link/MachO/synthetic.zig | 34 +- 8 files changed, 799 insertions(+), 433 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 1e41458c70..36d522f1ee 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -1,4 +1,4 @@ -base: File, +base: link.File, /// If this is not null, an object file is created by LLVM and emitted to zcu_object_sub_path. llvm_object: ?*LlvmObject = null, @@ -6,6 +6,27 @@ llvm_object: ?*LlvmObject = null, /// Debug symbols bundle (or dSym). d_sym: ?DebugSymbols = null, +/// A list of all input files. +/// Index of each input file also encodes the priority or precedence of one input file +/// over another. +files: std.MultiArrayList(File.Entry) = .{}, +internal_object: ?File.Index = null, +objects: std.ArrayListUnmanaged(File.Index) = .{}, +dylibs: std.ArrayListUnmanaged(File.Index) = .{}, + +segments: std.ArrayListUnmanaged(macho.segment_command_64) = .{}, +sections: std.MultiArrayList(Section) = .{}, + +symbols: std.ArrayListUnmanaged(Symbol) = .{}, +symbols_extra: std.ArrayListUnmanaged(u32) = .{}, +globals: std.AutoHashMapUnmanaged(u32, Symbol.Index) = .{}, +/// This table will be populated after `scanRelocs` has run. +/// Key is symbol index. +undefs: std.AutoHashMapUnmanaged(Symbol.Index, std.ArrayListUnmanaged(Atom.Index)) = .{}, +/// Global symbols we need to resolve for the link to succeed. +undefined_symbols: std.ArrayListUnmanaged(Symbol.Index) = .{}, +boundary_symbols: std.ArrayListUnmanaged(Symbol.Index) = .{}, + dyld_info_cmd: macho.dyld_info_command = .{}, symtab_cmd: macho.symtab_command = .{}, dysymtab_cmd: macho.dysymtab_command = .{}, @@ -14,36 +35,46 @@ data_in_code_cmd: macho.linkedit_data_command = .{ .cmd = .DATA_IN_CODE }, uuid_cmd: macho.uuid_command = .{ .uuid = [_]u8{0} ** 16 }, codesig_cmd: macho.linkedit_data_command = .{ .cmd = .CODE_SIGNATURE }, -segments: std.ArrayListUnmanaged(macho.segment_command_64) = .{}, -sections: std.MultiArrayList(Section) = .{}, - -pagezero_segment_cmd_index: ?u8 = null, -header_segment_cmd_index: ?u8 = null, -text_segment_cmd_index: ?u8 = null, -data_const_segment_cmd_index: ?u8 = null, -data_segment_cmd_index: ?u8 = null, -linkedit_segment_cmd_index: ?u8 = null, - -text_section_index: ?u8 = null, -data_const_section_index: ?u8 = null, -data_section_index: ?u8 = null, -bss_section_index: ?u8 = null, -thread_vars_section_index: ?u8 = null, -thread_data_section_index: ?u8 = null, -thread_bss_section_index: ?u8 = null, -eh_frame_section_index: ?u8 = null, -unwind_info_section_index: ?u8 = null, -stubs_section_index: ?u8 = null, -stub_helper_section_index: ?u8 = null, -got_section_index: ?u8 = null, -la_symbol_ptr_section_index: ?u8 = null, -tlv_ptr_section_index: ?u8 = null, - -strtab: StringTable = .{}, +pagezero_seg_index: ?u8 = null, +text_seg_index: ?u8 = null, +linkedit_seg_index: ?u8 = null, +data_sect_index: ?u8 = null, +got_sect_index: ?u8 = null, +stubs_sect_index: ?u8 = null, +stubs_helper_sect_index: ?u8 = null, +la_symbol_ptr_sect_index: ?u8 = null, +tlv_ptr_sect_index: ?u8 = null, +eh_frame_sect_index: ?u8 = null, +unwind_info_sect_index: ?u8 = null, +objc_stubs_sect_index: ?u8 = null, /// List of atoms that are either synthetic or map directly to the Zig source program. atoms: std.ArrayListUnmanaged(Atom) = .{}, +thunks: std.ArrayListUnmanaged(Thunk) = .{}, +unwind_records: std.ArrayListUnmanaged(UnwindInfo.Record) = .{}, +/// String interning table +strings: StringTable = .{}, + +/// Output synthetic sections +symtab: std.ArrayListUnmanaged(macho.nlist_64) = .{}, +strtab: std.ArrayListUnmanaged(u8) = .{}, +indsymtab: Indsymtab = .{}, +got: GotSection = .{}, +stubs: StubsSection = .{}, +stubs_helper: StubsHelperSection = .{}, +objc_stubs: ObjcStubsSection = .{}, +la_symbol_ptr: LaSymbolPtrSection = .{}, +tlv_ptr: TlvPtrSection = .{}, +rebase: RebaseSection = .{}, +bind: BindSection = .{}, +weak_bind: WeakBindSection = .{}, +lazy_bind: LazyBindSection = .{}, +export_trie: ExportTrieSection = .{}, +unwind_info: UnwindInfo = .{}, + +/// Options +/// SDK layout sdk_layout: ?SdkLayout, /// Size of the __PAGEZERO segment. pagezero_vmsize: ?u64, @@ -62,6 +93,8 @@ entitlements: ?[]const u8, compatibility_version: ?std.SemanticVersion, /// Entry name entry_name: ?[]const u8, +platform: Platform, +sdk_version: ?std.SemanticVersion, /// Hot-code swapping state. hot_state: if (is_hot_update_compatible) HotUpdateState else struct {} = .{}, @@ -144,6 +177,8 @@ pub fn createEmpty( .enabled => default_entry_symbol_name, .named => |name| name, }, + .platform = Platform.fromTarget(target), + .sdk_version = if (options.darwin_sdk_layout) |layout| inferSdkVersion(comp, layout) else null, }; if (use_llvm and comp.config.have_zcu) { self.llvm_object = try LlvmObject.create(arena, comp); @@ -156,9 +191,16 @@ pub fn createEmpty( .mode = link.File.determineMode(false, output_mode, link_mode), }); - // Index 0 is always a null symbol. - // try self.locals.append(gpa, null_sym); - try self.strtab.buffer.append(gpa, 0); + // Append null file + try self.files.append(gpa, .null); + // Atom at index 0 is reserved as null atom + try self.atoms.append(gpa, .{}); + // Append empty string to string tables + try self.strings.buffer.append(gpa, 0); + try self.strtab.append(gpa, 0); + // Append null symbols + try self.symbols.append(gpa, .{}); + try self.symbols_extra.append(gpa, 0); // TODO: init @@ -208,8 +250,71 @@ pub fn open( return createEmpty(arena, comp, emit, options); } +pub fn deinit(self: *MachO) void { + const gpa = self.base.comp.gpa; + + if (self.llvm_object) |llvm_object| llvm_object.deinit(); + + if (self.d_sym) |*d_sym| { + d_sym.deinit(); + } + + for (self.files.items(.tags), self.files.items(.data)) |tag, *data| switch (tag) { + .null => {}, + .internal => data.internal.deinit(gpa), + .object => data.object.deinit(gpa), + .dylib => data.dylib.deinit(gpa), + }; + self.files.deinit(gpa); + self.objects.deinit(gpa); + self.dylibs.deinit(gpa); + + self.segments.deinit(gpa); + for (self.sections.items(.atoms)) |*list| { + list.deinit(gpa); + } + self.sections.deinit(gpa); + + self.symbols.deinit(gpa); + self.symbols_extra.deinit(gpa); + self.globals.deinit(gpa); + { + var it = self.undefs.iterator(); + while (it.next()) |entry| { + entry.value_ptr.deinit(gpa); + } + self.undefs.deinit(gpa); + } + self.undefined_symbols.deinit(gpa); + self.boundary_symbols.deinit(gpa); + + self.strings.deinit(gpa); + self.symtab.deinit(gpa); + self.strtab.deinit(gpa); + self.got.deinit(gpa); + self.stubs.deinit(gpa); + self.objc_stubs.deinit(gpa); + self.tlv_ptr.deinit(gpa); + self.rebase.deinit(gpa); + self.bind.deinit(gpa); + self.weak_bind.deinit(gpa); + self.lazy_bind.deinit(gpa); + self.export_trie.deinit(gpa); + self.unwind_info.deinit(gpa); + + self.atoms.deinit(gpa); + for (self.thunks.items) |*thunk| { + thunk.deinit(gpa); + } + self.thunks.deinit(gpa); + self.unwind_records.deinit(gpa); +} + pub fn flush(self: *MachO, arena: Allocator, prog_node: *std.Progress.Node) link.File.FlushError!void { - // TODO: what else should we do in flush? Is it actually needed at all? + // TODO: I think this is just a temp and can be removed once we can emit static archives + if (self.base.isStaticLib() and build_options.have_llvm) { + return self.base.linkAsArchive(arena, prog_node); + } try self.flushModule(arena, prog_node); } @@ -219,10 +324,11 @@ pub fn flushModule(self: *MachO, arena: Allocator, prog_node: *std.Progress.Node const comp = self.base.comp; const gpa = comp.gpa; - _ = gpa; if (self.llvm_object) |llvm_object| { try self.base.emitLlvmObject(arena, llvm_object, prog_node); + // TODO: I think this is just a temp and can be removed once we can emit static archives + if (self.base.isStaticLib() and build_options.have_llvm) return; } var sub_prog_node = prog_node.start("MachO Flush", 0); @@ -240,11 +346,55 @@ pub fn flushModule(self: *MachO, arena: Allocator, prog_node: *std.Progress.Node break :blk path; } } else null; - _ = module_obj_path; // --verbose-link if (comp.verbose_link) try self.dumpArgv(comp); + if (self.base.isStaticLib()) return self.flushStaticLib(comp, module_obj_path); + if (self.base.isObject()) return self.flushObject(comp, module_obj_path); + + var positionals = std.ArrayList(Compilation.LinkObject).init(gpa); + defer positionals.deinit(); + + try positionals.ensureUnusedCapacity(comp.objects.len); + positionals.appendSliceAssumeCapacity(comp.objects); + + // This is a set of object files emitted by clang in a single `build-exe` invocation. + // For instance, the implicit `a.o` as compiled by `zig build-exe a.c` will end up + // in this set. + try positionals.ensureUnusedCapacity(comp.c_object_table.keys().len); + for (comp.c_object_table.keys()) |key| { + positionals.appendAssumeCapacity(.{ .path = key.status.success.object_path }); + } + + if (module_obj_path) |path| try positionals.append(.{ .path = path }); + + // rpaths + var rpath_table = std.StringArrayHashMap(void).init(gpa); + defer rpath_table.deinit(); + try rpath_table.ensureUnusedCapacity(self.base.rpath_list.len); + + for (self.base.rpath_list) |rpath| { + _ = rpath_table.putAssumeCapacity(rpath, {}); + } + + for (positionals.items) |obj| { + self.parsePositional(obj.path, obj.must_link) catch |err| switch (err) { + error.MalformedObject, + error.MalformedArchive, + error.InvalidCpuArch, + error.InvalidTarget, + => continue, // already reported + else => |e| try self.reportParseError( + obj.path, + "unexpected error: parsing input file failed with error {s}", + .{@errorName(e)}, + ), + }; + } + + state_log.debug("{}", .{self.dumpState()}); + @panic("TODO"); } @@ -255,7 +405,6 @@ fn dumpArgv(self: *MachO, comp: *Compilation) !void { defer arena_allocator.deinit(); const arena = arena_allocator.allocator(); - const target = self.base.comp.root_mod.resolved_target.result; const directory = self.base.emit.directory; const full_out_path = try directory.join(arena, &[_][]const u8{self.base.emit.sub_path}); const module_obj_path: ?[]const u8 = if (self.base.zcu_object_sub_path) |path| blk: { @@ -309,18 +458,14 @@ fn dumpArgv(self: *MachO, comp: *Compilation) !void { } } - { - const platform = Platform.fromTarget(target); - try argv.append("-platform_version"); - try argv.append(@tagName(platform.os_tag)); - try argv.append(try std.fmt.allocPrint(arena, "{}", .{platform.version})); + try argv.append("-platform_version"); + try argv.append(@tagName(self.platform.os_tag)); + try argv.append(try std.fmt.allocPrint(arena, "{}", .{self.platform.version})); - const sdk_version: ?std.SemanticVersion = self.inferSdkVersion(); - if (sdk_version) |ver| { - try argv.append(try std.fmt.allocPrint(arena, "{d}.{d}", .{ ver.major, ver.minor })); - } else { - try argv.append(try std.fmt.allocPrint(arena, "{}", .{platform.version})); - } + if (self.sdk_version) |ver| { + try argv.append(try std.fmt.allocPrint(arena, "{d}.{d}", .{ ver.major, ver.minor })); + } else { + try argv.append(try std.fmt.allocPrint(arena, "{}", .{self.platform.version})); } if (comp.sysroot) |syslibroot| { @@ -419,6 +564,26 @@ fn dumpArgv(self: *MachO, comp: *Compilation) !void { Compilation.dump_argv(argv.items); } +fn flushStaticLib(self: *MachO, comp: *Compilation, module_obj_path: ?[]const u8) link.File.FlushError!void { + _ = comp; + _ = module_obj_path; + + var err = try self.addErrorWithNotes(0); + try err.addMsg(self, "TODO implement flushStaticLib", .{}); + + return error.FlushFailure; +} + +fn flushObject(self: *MachO, comp: *Compilation, module_obj_path: ?[]const u8) link.File.FlushError!void { + _ = comp; + _ = module_obj_path; + + var err = try self.addErrorWithNotes(0); + try err.addMsg(self, "TODO implement flushObject", .{}); + + return error.FlushFailure; +} + /// XNU starting with Big Sur running on arm64 is caching inodes of running binaries. /// Any change to the binary will effectively invalidate the kernel's cache /// resulting in a SIGKILL on each subsequent run. Since when doing incremental @@ -518,132 +683,60 @@ fn accessLibPath( } const ParseError = error{ - UnknownFileType, + MalformedObject, + MalformedArchive, + NotLibStub, + InvalidCpuArch, InvalidTarget, InvalidTargetFatLibrary, - DylibAlreadyExists, IncompatibleDylibVersion, OutOfMemory, Overflow, InputOutput, - MalformedArchive, - NotLibStub, EndOfStream, FileSystem, NotSupported, } || std.os.SeekError || std.fs.File.OpenError || std.fs.File.ReadError || tapi.TapiError; -pub fn parsePositional( - self: *MachO, - file: std.fs.File, - path: []const u8, - must_link: bool, - dependent_libs: anytype, - ctx: *ParseErrorCtx, -) ParseError!void { +fn parsePositional(self: *MachO, path: []const u8, must_link: bool) ParseError!void { + const tracy = trace(@src()); + defer tracy.end(); + if (try Object.isObject(path)) { + try self.parseObject(path); + } else { + try self.parseLibrary(.{ .path = path }, must_link); + } +} + +fn parseLibrary(self: *MachO, lib: SystemLib, must_link: bool) ParseError!void { + _ = self; + _ = lib; + _ = must_link; +} + +fn parseObject(self: *MachO, path: []const u8) ParseError!void { const tracy = trace(@src()); defer tracy.end(); - _ = self; - _ = file; - _ = path; - _ = must_link; - _ = dependent_libs; - _ = ctx; -} - -pub fn deinit(self: *MachO) void { const gpa = self.base.comp.gpa; + const file = try std.fs.cwd().openFile(path, .{}); + defer file.close(); + const mtime: u64 = mtime: { + const stat = file.stat() catch break :mtime 0; + break :mtime @as(u64, @intCast(@divFloor(stat.mtime, 1_000_000_000))); + }; + const data = try file.readToEndAlloc(gpa, std.math.maxInt(u32)); + const index = @as(File.Index, @intCast(try self.files.addOne(gpa))); + self.files.set(index, .{ .object = .{ + .path = try gpa.dupe(u8, path), + .mtime = mtime, + .data = data, + .index = index, + } }); + try self.objects.append(gpa, index); - if (self.llvm_object) |llvm_object| llvm_object.deinit(); - - if (self.d_sym) |*d_sym| { - d_sym.deinit(); - } - - self.strtab.deinit(gpa); - - self.segments.deinit(gpa); - - for (self.sections.items(.free_list)) |*list| { - list.deinit(gpa); - } - self.sections.deinit(gpa); -} - -fn freeAtom(self: *MachO, atom_index: Atom.Index) void { - const gpa = self.base.comp.gpa; - log.debug("freeAtom {d}", .{atom_index}); - - // Remove any relocs and base relocs associated with this Atom - Atom.freeRelocations(self, atom_index); - - const atom = self.getAtom(atom_index); - const sect_id = atom.getSymbol(self).n_sect - 1; - const free_list = &self.sections.items(.free_list)[sect_id]; - var already_have_free_list_node = false; - { - var i: usize = 0; - // TODO turn free_list into a hash map - while (i < free_list.items.len) { - if (free_list.items[i] == atom_index) { - _ = free_list.swapRemove(i); - continue; - } - if (free_list.items[i] == atom.prev_index) { - already_have_free_list_node = true; - } - i += 1; - } - } - - const maybe_last_atom_index = &self.sections.items(.last_atom_index)[sect_id]; - if (maybe_last_atom_index.*) |last_atom_index| { - if (last_atom_index == atom_index) { - if (atom.prev_index) |prev_index| { - // TODO shrink the section size here - maybe_last_atom_index.* = prev_index; - } else { - maybe_last_atom_index.* = null; - } - } - } - - if (atom.prev_index) |prev_index| { - const prev = self.getAtomPtr(prev_index); - prev.next_index = atom.next_index; - - if (!already_have_free_list_node and prev.*.freeListEligible(self)) { - // The free list is heuristics, it doesn't have to be perfect, so we can ignore - // the OOM here. - free_list.append(gpa, prev_index) catch {}; - } - } else { - self.getAtomPtr(atom_index).prev_index = null; - } - - if (atom.next_index) |next_index| { - self.getAtomPtr(next_index).prev_index = atom.prev_index; - } else { - self.getAtomPtr(atom_index).next_index = null; - } - - // Appending to free lists is allowed to fail because the free lists are heuristics based anyway. - const sym_index = atom.getSymbolIndex().?; - - self.locals_free_list.append(gpa, sym_index) catch {}; - - // Try freeing GOT atom if this decl had one - self.got_table.freeEntry(gpa, .{ .sym_index = sym_index }); - - if (self.d_sym) |*d_sym| { - d_sym.swapRemoveRelocs(sym_index); - } - - self.locals.items[sym_index].n_type = 0; - _ = self.atom_by_index_table.remove(sym_index); - log.debug(" adding local symbol index {d} to free list", .{sym_index}); - self.getAtomPtr(atom_index).sym_index = 0; + const object = self.getFile(index).?.object; + try object.parse(self); } fn shrinkAtom(self: *MachO, atom_index: Atom.Index, new_block_size: u64) void { @@ -716,7 +809,7 @@ pub fn updateDecl(self: *MachO, mod: *Module, decl_index: InternPool.DeclIndex) fn updateLazySymbolAtom( self: *MachO, - sym: File.LazySymbol, + sym: link.File.LazySymbol, atom_index: Atom.Index, section_index: u8, ) !void { @@ -727,7 +820,7 @@ fn updateLazySymbolAtom( @panic("TODO updateLazySymbolAtom"); } -pub fn getOrCreateAtomForLazySymbol(self: *MachO, sym: File.LazySymbol) !Atom.Index { +pub fn getOrCreateAtomForLazySymbol(self: *MachO, sym: link.File.LazySymbol) !Atom.Index { _ = self; _ = sym; @panic("TODO getOrCreateAtomForLazySymbol"); @@ -763,7 +856,7 @@ pub fn updateExports( mod: *Module, exported: Module.Exported, exports: []const *Module.Export, -) File.UpdateExportsError!void { +) link.File.UpdateExportsError!void { if (build_options.skip_non_native and builtin.object_format != .macho) { @panic("Attempted to compile for object format that was disabled by build configuration"); } @@ -795,7 +888,7 @@ pub fn freeDecl(self: *MachO, decl_index: InternPool.DeclIndex) void { @panic("TODO freeDecl"); } -pub fn getDeclVAddr(self: *MachO, decl_index: InternPool.DeclIndex, reloc_info: File.RelocInfo) !u64 { +pub fn getDeclVAddr(self: *MachO, decl_index: InternPool.DeclIndex, reloc_info: link.File.RelocInfo) !u64 { assert(self.llvm_object == null); _ = decl_index; _ = reloc_info; @@ -872,94 +965,224 @@ fn findFreeSpace(self: *MachO, object_size: u64, min_alignment: u32) u64 { return start; } +pub fn getTarget(self: MachO) std.Target { + return self.base.comp.root_mod.resolved_target.result; +} + pub fn makeStaticString(bytes: []const u8) [16]u8 { var buf = [_]u8{0} ** 16; @memcpy(buf[0..bytes.len], bytes); return buf; } -pub const ParseErrorCtx = struct { - arena_allocator: std.heap.ArenaAllocator, - detected_dylib_id: struct { - parent: u16, - required_version: u32, - found_version: u32, - }, - detected_targets: std.ArrayList([]const u8), +pub fn getFile(self: *MachO, index: File.Index) ?File { + const tag = self.files.items(.tags)[index]; + return switch (tag) { + .null => null, + .internal => .{ .internal = &self.files.items(.data)[index].internal }, + .object => .{ .object = &self.files.items(.data)[index].object }, + .dylib => .{ .dylib = &self.files.items(.data)[index].dylib }, + }; +} - pub fn init(gpa: Allocator) ParseErrorCtx { - return .{ - .arena_allocator = std.heap.ArenaAllocator.init(gpa), - .detected_dylib_id = undefined, - .detected_targets = std.ArrayList([]const u8).init(gpa), +pub fn getInternalObject(self: *MachO) ?*InternalObject { + const index = self.internal_object orelse return null; + return self.getFile(index).?.internal; +} + +pub fn addAtom(self: *MachO) error{OutOfMemory}!Atom.Index { + const index = @as(Atom.Index, @intCast(self.atoms.items.len)); + const atom = try self.atoms.addOne(self.base.comp.gpa); + atom.* = .{}; + return index; +} + +pub fn getAtom(self: *MachO, index: Atom.Index) ?*Atom { + if (index == 0) return null; + assert(index < self.atoms.items.len); + return &self.atoms.items[index]; +} + +pub fn addSymbol(self: *MachO) !Symbol.Index { + const index = @as(Symbol.Index, @intCast(self.symbols.items.len)); + const symbol = try self.symbols.addOne(self.base.comp.gpa); + symbol.* = .{}; + return index; +} + +pub fn getSymbol(self: *MachO, index: Symbol.Index) *Symbol { + assert(index < self.symbols.items.len); + return &self.symbols.items[index]; +} + +pub fn addSymbolExtra(self: *MachO, extra: Symbol.Extra) !u32 { + const fields = @typeInfo(Symbol.Extra).Struct.fields; + try self.symbols_extra.ensureUnusedCapacity(self.base.comp.gpa, fields.len); + return self.addSymbolExtraAssumeCapacity(extra); +} + +pub fn addSymbolExtraAssumeCapacity(self: *MachO, extra: Symbol.Extra) u32 { + const index = @as(u32, @intCast(self.symbols_extra.items.len)); + const fields = @typeInfo(Symbol.Extra).Struct.fields; + inline for (fields) |field| { + self.symbols_extra.appendAssumeCapacity(switch (field.type) { + u32 => @field(extra, field.name), + else => @compileError("bad field type"), + }); + } + return index; +} + +pub fn getSymbolExtra(self: MachO, index: u32) ?Symbol.Extra { + if (index == 0) return null; + const fields = @typeInfo(Symbol.Extra).Struct.fields; + var i: usize = index; + var result: Symbol.Extra = undefined; + inline for (fields) |field| { + @field(result, field.name) = switch (field.type) { + u32 => self.symbols_extra.items[i], + else => @compileError("bad field type"), + }; + i += 1; + } + return result; +} + +pub fn setSymbolExtra(self: *MachO, index: u32, extra: Symbol.Extra) void { + assert(index > 0); + const fields = @typeInfo(Symbol.Extra).Struct.fields; + inline for (fields, 0..) |field, i| { + self.symbols_extra.items[index + i] = switch (field.type) { + u32 => @field(extra, field.name), + else => @compileError("bad field type"), }; } +} - pub fn deinit(ctx: *ParseErrorCtx) void { - ctx.arena_allocator.deinit(); - ctx.detected_targets.deinit(); +const GetOrCreateGlobalResult = struct { + found_existing: bool, + index: Symbol.Index, +}; + +pub fn getOrCreateGlobal(self: *MachO, off: u32) !GetOrCreateGlobalResult { + const gpa = self.base.comp.gpa; + const gop = try self.globals.getOrPut(gpa, off); + if (!gop.found_existing) { + const index = try self.addSymbol(); + const global = self.getSymbol(index); + global.name = off; + gop.value_ptr.* = index; + } + return .{ + .found_existing = gop.found_existing, + .index = gop.value_ptr.*, + }; +} + +pub fn getGlobalByName(self: *MachO, name: []const u8) ?Symbol.Index { + const off = self.strings.getOffset(name) orelse return null; + return self.globals.get(off); +} + +pub fn addUnwindRecord(self: *MachO) !UnwindInfo.Record.Index { + const index = @as(UnwindInfo.Record.Index, @intCast(self.unwind_records.items.len)); + const rec = try self.unwind_records.addOne(self.base.comp.gpa); + rec.* = .{}; + return index; +} + +pub fn getUnwindRecord(self: *MachO, index: UnwindInfo.Record.Index) *UnwindInfo.Record { + assert(index < self.unwind_records.items.len); + return &self.unwind_records.items[index]; +} + +pub fn addThunk(self: *MachO) !Thunk.Index { + const index = @as(Thunk.Index, @intCast(self.thunks.items.len)); + const thunk = try self.thunks.addOne(self.base.comp.gpa); + thunk.* = .{}; + return index; +} + +pub fn getThunk(self: *MachO, index: Thunk.Index) *Thunk { + assert(index < self.thunks.items.len); + return &self.thunks.items[index]; +} + +pub fn eatPrefix(path: []const u8, prefix: []const u8) ?[]const u8 { + if (mem.startsWith(u8, path, prefix)) return path[prefix.len..]; + return null; +} + +const ErrorWithNotes = struct { + /// Allocated index in comp.link_errors array. + index: usize, + + /// Next available note slot. + note_slot: usize = 0, + + pub fn addMsg( + err: ErrorWithNotes, + macho_file: *MachO, + comptime format: []const u8, + args: anytype, + ) error{OutOfMemory}!void { + const comp = macho_file.base.comp; + const gpa = comp.gpa; + const err_msg = &comp.link_errors.items[err.index]; + err_msg.msg = try std.fmt.allocPrint(gpa, format, args); } - pub fn arena(ctx: *ParseErrorCtx) Allocator { - return ctx.arena_allocator.allocator(); + pub fn addNote( + err: *ErrorWithNotes, + macho_file: *MachO, + comptime format: []const u8, + args: anytype, + ) error{OutOfMemory}!void { + const comp = macho_file.base.comp; + const gpa = comp.gpa; + const err_msg = &comp.link_errors.items[err.index]; + assert(err.note_slot < err_msg.notes.len); + err_msg.notes[err.note_slot] = .{ .msg = try std.fmt.allocPrint(gpa, format, args) }; + err.note_slot += 1; } }; -pub fn handleAndReportParseError( +pub fn addErrorWithNotes(self: *MachO, note_count: usize) error{OutOfMemory}!ErrorWithNotes { + const comp = self.base.comp; + const gpa = comp.gpa; + try comp.link_errors.ensureUnusedCapacity(gpa, 1); + return self.addErrorWithNotesAssumeCapacity(note_count); +} + +fn addErrorWithNotesAssumeCapacity(self: *MachO, note_count: usize) error{OutOfMemory}!ErrorWithNotes { + const comp = self.base.comp; + const gpa = comp.gpa; + const index = comp.link_errors.items.len; + const err = comp.link_errors.addOneAssumeCapacity(); + err.* = .{ .msg = undefined, .notes = try gpa.alloc(link.File.ErrorMsg, note_count) }; + return .{ .index = index }; +} + +pub fn reportParseError( self: *MachO, path: []const u8, - err: ParseError, - ctx: *const ParseErrorCtx, + comptime format: []const u8, + args: anytype, ) error{OutOfMemory}!void { - const target = self.base.comp.root_mod.resolved_target.result; - const gpa = self.base.comp.gpa; - const cpu_arch = target.cpu.arch; - switch (err) { - error.DylibAlreadyExists => {}, - error.IncompatibleDylibVersion => { - const parent = &self.dylibs.items[ctx.detected_dylib_id.parent]; - try self.reportDependencyError( - if (parent.id) |id| id.name else parent.path, - path, - "incompatible dylib version: expected at least '{}', but found '{}'", - .{ - load_commands.appleVersionToSemanticVersion(ctx.detected_dylib_id.required_version), - load_commands.appleVersionToSemanticVersion(ctx.detected_dylib_id.found_version), - }, - ); - }, - error.UnknownFileType => try self.reportParseError(path, "unknown file type", .{}), - error.InvalidTarget, error.InvalidTargetFatLibrary => { - var targets_string = std.ArrayList(u8).init(gpa); - defer targets_string.deinit(); + var err = try self.addErrorWithNotes(1); + try err.addMsg(self, format, args); + try err.addNote(self, "while parsing {s}", .{path}); +} - if (ctx.detected_targets.items.len > 1) { - try targets_string.writer().writeAll("("); - for (ctx.detected_targets.items) |t| { - try targets_string.writer().print("{s}, ", .{t}); - } - try targets_string.resize(targets_string.items.len - 2); - try targets_string.writer().writeAll(")"); - } else { - try targets_string.writer().writeAll(ctx.detected_targets.items[0]); - } - - switch (err) { - error.InvalidTarget => try self.reportParseError( - path, - "invalid target: expected '{}', but found '{s}'", - .{ Platform.fromTarget(target).fmtTarget(cpu_arch), targets_string.items }, - ), - error.InvalidTargetFatLibrary => try self.reportParseError( - path, - "invalid architecture in universal library: expected '{s}', but found '{s}'", - .{ @tagName(cpu_arch), targets_string.items }, - ), - else => unreachable, - } - }, - else => |e| try self.reportParseError(path, "{s}: parsing object failed", .{@errorName(e)}), - } +pub fn reportParseError2( + self: *MachO, + file_index: File.Index, + comptime format: []const u8, + args: anytype, +) error{OutOfMemory}!void { + var err = try self.addErrorWithNotes(1); + try err.addMsg(self, format, args); + try err.addNote(self, "while parsing {}", .{self.getFile(file_index).?.fmtPath()}); } fn reportMissingLibraryError( @@ -968,18 +1191,11 @@ fn reportMissingLibraryError( comptime format: []const u8, args: anytype, ) error{OutOfMemory}!void { - const comp = self.base.comp; - const gpa = comp.gpa; - try comp.link_errors.ensureUnusedCapacity(gpa, 1); - const notes = try gpa.alloc(File.ErrorMsg, checked_paths.len); - errdefer gpa.free(notes); - for (checked_paths, notes) |path, *note| { - note.* = .{ .msg = try std.fmt.allocPrint(gpa, "tried {s}", .{path}) }; + var err = try self.addErrorWithNotes(checked_paths.len); + try err.addMsg(self, format, args); + for (checked_paths) |path| { + try err.addNote(self, "tried {s}", .{path}); } - comp.link_errors.appendAssumeCapacity(.{ - .msg = try std.fmt.allocPrint(gpa, format, args), - .notes = notes, - }); } fn reportDependencyError( @@ -992,7 +1208,7 @@ fn reportDependencyError( const comp = self.base.comp; const gpa = comp.gpa; try comp.link_errors.ensureUnusedCapacity(gpa, 1); - var notes = try std.ArrayList(File.ErrorMsg).initCapacity(gpa, 2); + var notes = try std.ArrayList(link.File.ErrorMsg).initCapacity(gpa, 2); defer notes.deinit(); if (path) |p| { notes.appendAssumeCapacity(.{ .msg = try std.fmt.allocPrint(gpa, "while parsing {s}", .{p}) }); @@ -1004,42 +1220,6 @@ fn reportDependencyError( }); } -pub fn reportParseError( - self: *MachO, - path: []const u8, - comptime format: []const u8, - args: anytype, -) error{OutOfMemory}!void { - const comp = self.base.comp; - const gpa = comp.gpa; - try comp.link_errors.ensureUnusedCapacity(gpa, 1); - var notes = try gpa.alloc(File.ErrorMsg, 1); - errdefer gpa.free(notes); - notes[0] = .{ .msg = try std.fmt.allocPrint(gpa, "while parsing {s}", .{path}) }; - comp.link_errors.appendAssumeCapacity(.{ - .msg = try std.fmt.allocPrint(gpa, format, args), - .notes = notes, - }); -} - -pub fn reportUnresolvedBoundarySymbol( - self: *MachO, - sym_name: []const u8, - comptime format: []const u8, - args: anytype, -) error{OutOfMemory}!void { - const comp = self.base.comp; - const gpa = comp.gpa; - try comp.link_errors.ensureUnusedCapacity(gpa, 1); - var notes = try gpa.alloc(File.ErrorMsg, 1); - errdefer gpa.free(notes); - notes[0] = .{ .msg = try std.fmt.allocPrint(gpa, "while resolving {s}", .{sym_name}) }; - comp.link_errors.appendAssumeCapacity(.{ - .msg = try std.fmt.allocPrint(gpa, format, args), - .notes = notes, - }); -} - pub fn reportUndefined(self: *MachO) error{OutOfMemory}!void { const comp = self.base.comp; const gpa = comp.gpa; @@ -1050,7 +1230,7 @@ pub fn reportUndefined(self: *MachO) error{OutOfMemory}!void { const global = self.globals.items[global_index]; const sym_name = self.getSymbolName(global); - var notes = try std.ArrayList(File.ErrorMsg).initCapacity(gpa, 1); + var notes = try std.ArrayList(link.File.ErrorMsg).initCapacity(gpa, 1); defer notes.deinit(); if (global.getFile()) |file| { @@ -1060,7 +1240,7 @@ pub fn reportUndefined(self: *MachO) error{OutOfMemory}!void { notes.appendAssumeCapacity(.{ .msg = note }); } - var err_msg = File.ErrorMsg{ + var err_msg = link.File.ErrorMsg{ .msg = try std.fmt.allocPrint(gpa, "undefined reference to symbol {s}", .{sym_name}), }; err_msg.notes = try notes.toOwnedSlice(); @@ -1164,6 +1344,145 @@ pub fn ptraceDetach(self: *MachO, pid: std.os.pid_t) !void { self.hot_state.mach_task = null; } +pub fn dumpState(self: *MachO) std.fmt.Formatter(fmtDumpState) { + return .{ .data = self }; +} + +fn fmtDumpState( + self: *MachO, + comptime unused_fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, +) !void { + _ = options; + _ = unused_fmt_string; + for (self.objects.items) |index| { + const object = self.getFile(index).?.object; + try writer.print("object({d}) : {} : has_debug({})", .{ + index, + object.fmtPath(), + object.hasDebugInfo(), + }); + if (!object.alive) try writer.writeAll(" : ([*])"); + try writer.writeByte('\n'); + try writer.print("{}{}{}{}{}\n", .{ + object.fmtAtoms(self), + object.fmtCies(self), + object.fmtFdes(self), + object.fmtUnwindRecords(self), + object.fmtSymtab(self), + }); + } + // for (self.dylibs.items) |index| { + // const dylib = self.getFile(index).?.dylib; + // try writer.print("dylib({d}) : {s} : needed({}) : weak({})", .{ + // index, + // dylib.path, + // dylib.needed, + // dylib.weak, + // }); + // if (!dylib.isAlive(self)) try writer.writeAll(" : ([*])"); + // try writer.writeByte('\n'); + // try writer.print("{}\n", .{dylib.fmtSymtab(self)}); + // } + if (self.getInternalObject()) |internal| { + try writer.print("internal({d}) : internal\n", .{internal.index}); + try writer.print("{}{}\n", .{ internal.fmtAtoms(self), internal.fmtSymtab(self) }); + } + try writer.writeAll("thunks\n"); + for (self.thunks.items, 0..) |thunk, index| { + try writer.print("thunk({d}) : {}\n", .{ index, thunk.fmt(self) }); + } + try writer.print("stubs\n{}\n", .{self.stubs.fmt(self)}); + try writer.print("objc_stubs\n{}\n", .{self.objc_stubs.fmt(self)}); + try writer.print("got\n{}\n", .{self.got.fmt(self)}); + try writer.print("tlv_ptr\n{}\n", .{self.tlv_ptr.fmt(self)}); + try writer.writeByte('\n'); + try writer.print("sections\n{}\n", .{self.fmtSections()}); + try writer.print("segments\n{}\n", .{self.fmtSegments()}); +} + +fn fmtSections(self: *MachO) std.fmt.Formatter(formatSections) { + return .{ .data = self }; +} + +fn formatSections( + self: *MachO, + comptime unused_fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, +) !void { + _ = options; + _ = unused_fmt_string; + const slice = self.sections.slice(); + for (slice.items(.header), slice.items(.segment_id), 0..) |header, seg_id, i| { + try writer.print("sect({d}) : seg({d}) : {s},{s} : @{x} ({x}) : align({x}) : size({x})\n", .{ + i, seg_id, header.segName(), header.sectName(), header.offset, header.addr, + header.@"align", header.size, + }); + } +} + +fn fmtSegments(self: *MachO) std.fmt.Formatter(formatSegments) { + return .{ .data = self }; +} + +fn formatSegments( + self: *MachO, + comptime unused_fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, +) !void { + _ = options; + _ = unused_fmt_string; + for (self.segments.items, 0..) |seg, i| { + try writer.print("seg({d}) : {s} : @{x}-{x} ({x}-{x})\n", .{ + i, seg.segName(), seg.vmaddr, seg.vmaddr + seg.vmsize, + seg.fileoff, seg.fileoff + seg.filesize, + }); + } +} + +pub fn fmtSectType(tt: u8) std.fmt.Formatter(formatSectType) { + return .{ .data = tt }; +} + +fn formatSectType( + tt: u8, + comptime unused_fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, +) !void { + _ = options; + _ = unused_fmt_string; + const name = switch (tt) { + macho.S_REGULAR => "REGULAR", + macho.S_ZEROFILL => "ZEROFILL", + macho.S_CSTRING_LITERALS => "CSTRING_LITERALS", + macho.S_4BYTE_LITERALS => "4BYTE_LITERALS", + macho.S_8BYTE_LITERALS => "8BYTE_LITERALS", + macho.S_16BYTE_LITERALS => "16BYTE_LITERALS", + macho.S_LITERAL_POINTERS => "LITERAL_POINTERS", + macho.S_NON_LAZY_SYMBOL_POINTERS => "NON_LAZY_SYMBOL_POINTERS", + macho.S_LAZY_SYMBOL_POINTERS => "LAZY_SYMBOL_POINTERS", + macho.S_SYMBOL_STUBS => "SYMBOL_STUBS", + macho.S_MOD_INIT_FUNC_POINTERS => "MOD_INIT_FUNC_POINTERS", + macho.S_MOD_TERM_FUNC_POINTERS => "MOD_TERM_FUNC_POINTERS", + macho.S_COALESCED => "COALESCED", + macho.S_GB_ZEROFILL => "GB_ZEROFILL", + macho.S_INTERPOSING => "INTERPOSING", + macho.S_DTRACE_DOF => "DTRACE_DOF", + macho.S_THREAD_LOCAL_REGULAR => "THREAD_LOCAL_REGULAR", + macho.S_THREAD_LOCAL_ZEROFILL => "THREAD_LOCAL_ZEROFILL", + macho.S_THREAD_LOCAL_VARIABLES => "THREAD_LOCAL_VARIABLES", + macho.S_THREAD_LOCAL_VARIABLE_POINTERS => "THREAD_LOCAL_VARIABLE_POINTERS", + macho.S_THREAD_LOCAL_INIT_FUNCTION_POINTERS => "THREAD_LOCAL_INIT_FUNCTION_POINTERS", + macho.S_INIT_FUNC_OFFSETS => "INIT_FUNC_OFFSETS", + else => |x| return writer.print("UNKNOWN({x})", .{x}), + }; + try writer.print("{s}", .{name}); +} + const is_hot_update_compatible = switch (builtin.target.os.tag) { .macos => true, else => false, @@ -1171,32 +1490,14 @@ const is_hot_update_compatible = switch (builtin.target.os.tag) { const default_entry_symbol_name = "_main"; -pub const base_tag: File.Tag = File.Tag.macho; +pub const base_tag: link.File.Tag = link.File.Tag.macho; pub const N_DEAD: u16 = @as(u16, @bitCast(@as(i16, -1))); pub const N_BOUNDARY: u16 = @as(u16, @bitCast(@as(i16, -2))); -pub const Section = struct { +const Section = struct { header: macho.section_64, - segment_index: u8, - first_atom_index: ?Atom.Index = null, - last_atom_index: ?Atom.Index = null, - - /// A list of atoms that have surplus capacity. This list can have false - /// positives, as functions grow and shrink over time, only sometimes being added - /// or removed from the freelist. - /// - /// An atom has surplus capacity when its overcapacity value is greater than - /// padToIdeal(minimum_atom_size). That is, when it has so - /// much extra capacity, that we could fit a small new symbol in it, itself with - /// ideal_capacity or more. - /// - /// Ideal capacity is defined by size + (size / ideal_factor). - /// - /// Overcapacity is measured by actual_capacity - ideal_capacity. Note that - /// overcapacity can be negative. A simple way to have negative overcapacity is to - /// allocate a fresh atom, which will have ideal capacity, and then grow it - /// by 1 byte. It will then have -1 overcapacity. - free_list: std.ArrayListUnmanaged(Atom.Index) = .{}, + segment_id: u8, + atoms: std.ArrayListUnmanaged(Atom.Index) = .{}, }; const HotUpdateState = struct { @@ -1385,15 +1686,13 @@ pub inline fn appleVersionToSemanticVersion(version: u32) std.SemanticVersion { }; } -fn inferSdkVersion(self: *MachO) ?std.SemanticVersion { - const comp = self.base.comp; +fn inferSdkVersion(comp: *Compilation, sdk_layout: SdkLayout) ?std.SemanticVersion { const gpa = comp.gpa; var arena_allocator = std.heap.ArenaAllocator.init(gpa); defer arena_allocator.deinit(); const arena = arena_allocator.allocator(); - const sdk_layout = self.sdk_layout orelse return null; const sdk_dir = switch (sdk_layout) { .sdk => comp.sysroot.?, .vendored => std.fs.path.join(arena, &.{ comp.zig_lib_directory.path.?, "libc", "darwin" }) catch return null, @@ -1402,6 +1701,7 @@ fn inferSdkVersion(self: *MachO) ?std.SemanticVersion { return parseSdkVersion(ver); } else |_| { // Read from settings should always succeed when vendored. + // TODO: convert to fatal linker error if (sdk_layout == .vendored) @panic("zig installation bug: unable to parse SDK version"); } @@ -1470,6 +1770,15 @@ pub const default_pagezero_vmsize: u64 = 0x100000000; /// potential future extensions. pub const default_headerpad_size: u32 = 0x1000; +const SystemLib = struct { + path: []const u8, + needed: bool = false, + weak: bool = false, + hidden: bool = false, + reexport: bool = false, + must_link: bool = false, +}; + const MachO = @This(); const std = @import("std"); @@ -1479,6 +1788,7 @@ const assert = std.debug.assert; const dwarf = std.dwarf; const fs = std.fs; const log = std.log.scoped(.link); +const state_log = std.log.scoped(.link_state); const macho = std.macho; const math = std.math; const mem = std.mem; @@ -1488,6 +1798,7 @@ const aarch64 = @import("../arch/aarch64/bits.zig"); const calcUuid = @import("MachO/uuid.zig").calcUuid; const codegen = @import("../codegen.zig"); const dead_strip = @import("MachO/dead_strip.zig"); +const eh_frame = @import("MachO/eh_frame.zig"); const fat = @import("MachO/fat.zig"); const link = @import("../link.zig"); const llvm_backend = @import("../codegen/llvm.zig"); @@ -1496,12 +1807,14 @@ const tapi = @import("tapi.zig"); const target_util = @import("../target.zig"); const thunks = @import("MachO/thunks.zig"); const trace = @import("../tracy.zig").trace; +const synthetic = @import("MachO/synthetic.zig"); const Air = @import("../Air.zig"); const Alignment = Atom.Alignment; const Allocator = mem.Allocator; const Archive = @import("MachO/Archive.zig"); pub const Atom = @import("MachO/Atom.zig"); +const BindSection = synthetic.BindSection; const Cache = std.Build.Cache; const CodeSignature = @import("MachO/CodeSignature.zig"); const Compilation = @import("../Compilation.zig"); @@ -1509,17 +1822,29 @@ pub const DebugSymbols = @import("MachO/DebugSymbols.zig"); const Dwarf = File.Dwarf; const DwarfInfo = @import("MachO/DwarfInfo.zig"); const Dylib = @import("MachO/Dylib.zig"); -const File = link.File; +const ExportTrieSection = synthetic.ExportTrieSection; +const File = @import("MachO/file.zig").File; +const GotSection = synthetic.GotSection; +const Indsymtab = synthetic.Indsymtab; +const InternalObject = @import("MachO/InternalObject.zig"); +const ObjcStubsSection = synthetic.ObjcStubsSection; const Object = @import("MachO/Object.zig"); +const LazyBindSection = synthetic.LazyBindSection; +const LaSymbolPtrSection = synthetic.LaSymbolPtrSection; const LibStub = tapi.LibStub; const Liveness = @import("../Liveness.zig"); const LlvmObject = @import("../codegen/llvm.zig").Object; const Md5 = std.crypto.hash.Md5; const Module = @import("../Module.zig"); const InternPool = @import("../InternPool.zig"); +const RebaseSection = synthetic.RebaseSection; const Relocation = @import("MachO/Relocation.zig"); const StringTable = @import("StringTable.zig"); -const TableSection = @import("table_section.zig").TableSection; -const Type = @import("../type.zig").Type; +const StubsSection = synthetic.StubsSection; +const StubsHelperSection = synthetic.StubsHelperSection; +const Symbol = @import("MachO/Symbol.zig"); +const Thunk = thunks.Thunk; +const TlvPtrSection = synthetic.TlvPtrSection; const TypedValue = @import("../TypedValue.zig"); -const Value = @import("../value.zig").Value; +const UnwindInfo = @import("MachO/UnwindInfo.zig"); +const WeakBindSection = synthetic.WeakBindSection; diff --git a/src/link/MachO/Atom.zig b/src/link/MachO/Atom.zig index 64e7b70a0d..8a04e64236 100644 --- a/src/link/MachO/Atom.zig +++ b/src/link/MachO/Atom.zig @@ -38,7 +38,7 @@ unwind_records: Loc = .{}, flags: Flags = .{}, pub fn getName(self: Atom, macho_file: *MachO) [:0]const u8 { - return macho_file.string_intern.getAssumeExists(self.name); + return macho_file.strings.getAssumeExists(self.name); } pub fn getFile(self: Atom, macho_file: *MachO) File { diff --git a/src/link/MachO/Dylib.zig b/src/link/MachO/Dylib.zig index f28e4eb08d..b57d6eb911 100644 --- a/src/link/MachO/Dylib.zig +++ b/src/link/MachO/Dylib.zig @@ -431,7 +431,7 @@ pub fn initSymbols(self: *Dylib, macho_file: *MachO) !void { for (self.exports.items(.name)) |noff| { const name = self.getString(noff); - const off = try macho_file.string_intern.insert(gpa, name); + const off = try macho_file.strings.insert(gpa, name); const gop = try macho_file.getOrCreateGlobal(off); self.symbols.addOneAssumeCapacity().* = gop.index; } diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index 0ecf3b4d45..53334be6ab 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -31,6 +31,14 @@ num_weak_bind_relocs: u32 = 0, output_symtab_ctx: MachO.SymtabCtx = .{}, +pub fn isObject(path: []const u8) !bool { + const file = try std.fs.cwd().openFile(path, .{}); + defer file.close(); + const reader = file.reader(); + const header = reader.readStruct(macho.mach_header_64) catch return false; + return header.filetype == macho.MH_OBJECT; +} + pub fn deinit(self: *Object, allocator: Allocator) void { for (self.sections.items(.relocs), self.sections.items(.subsections)) |*relocs, *sub| { relocs.deinit(allocator); @@ -55,12 +63,25 @@ pub fn parse(self: *Object, macho_file: *MachO) !void { const tracy = trace(@src()); defer tracy.end(); - const gpa = macho_file.base.allocator; + const gpa = macho_file.base.comp.gpa; var stream = std.io.fixedBufferStream(self.data); const reader = stream.reader(); self.header = try reader.readStruct(macho.mach_header_64); + const this_cpu_arch: std.Target.Cpu.Arch = switch (self.header.?.cputype) { + macho.CPU_TYPE_ARM64 => .aarch64, + macho.CPU_TYPE_X86_64 => .x86_64, + else => |x| { + try macho_file.reportParseError2(self.index, "unknown cpu architecture: {d}", .{x}); + return error.InvalidCpuArch; + }, + }; + if (macho_file.getTarget().cpu.arch != this_cpu_arch) { + try macho_file.reportParseError2(self.index, "invalid cpu architecture: {s}", .{@tagName(this_cpu_arch)}); + return error.InvalidCpuArch; + } + if (self.getLoadCommand(.SEGMENT_64)) |lc| { const sections = lc.getSections(); try self.sections.ensureUnusedCapacity(gpa, sections.len); @@ -146,6 +167,20 @@ pub fn parse(self: *Object, macho_file: *MachO) !void { } self.initPlatform(); + + if (self.platform) |platform| { + if (!macho_file.platform.eqlTarget(platform)) { + try macho_file.reportParseError2(self.index, "invalid platform: {}", .{ + platform.fmtTarget(macho_file.getTarget().cpu.arch), + }); + return error.InvalidTarget; + } + if (macho_file.platform.version.order(platform.version) != .lt) { + try macho_file.reportParseError2(self.index, "object file built for newer platform: {}", .{platform}); + return error.InvalidTarget; + } + } + try self.initDwarfInfo(macho_file); for (self.atoms.items) |atom_index| { @@ -175,7 +210,7 @@ inline fn isLiteral(sect: macho.section_64) bool { fn initSubsections(self: *Object, nlists: anytype, macho_file: *MachO) !void { const tracy = trace(@src()); defer tracy.end(); - const gpa = macho_file.base.allocator; + const gpa = macho_file.base.comp.gpa; const slice = self.sections.slice(); for (slice.items(.header), slice.items(.subsections), 0..) |sect, *subsections, n_sect| { if (isLiteral(sect)) continue; @@ -243,7 +278,7 @@ fn initSubsections(self: *Object, nlists: anytype, macho_file: *MachO) !void { fn initSections(self: *Object, nlists: anytype, macho_file: *MachO) !void { const tracy = trace(@src()); defer tracy.end(); - const gpa = macho_file.base.allocator; + const gpa = macho_file.base.comp.gpa; const slice = self.sections.slice(); try self.atoms.ensureUnusedCapacity(gpa, self.sections.items(.header).len); @@ -299,12 +334,12 @@ const AddAtomArgs = struct { }; fn addAtom(self: *Object, args: AddAtomArgs, macho_file: *MachO) !Atom.Index { - const gpa = macho_file.base.allocator; + const gpa = macho_file.base.comp.gpa; const atom_index = try macho_file.addAtom(); const atom = macho_file.getAtom(atom_index).?; atom.file = self.index; atom.atom_index = atom_index; - atom.name = try macho_file.string_intern.insert(gpa, args.name); + atom.name = try macho_file.strings.insert(gpa, args.name); atom.n_sect = args.n_sect; atom.size = args.size; atom.alignment = args.alignment; @@ -319,7 +354,7 @@ fn initLiteralSections(self: *Object, macho_file: *MachO) !void { // TODO here we should split into equal-sized records, hash the contents, and then // deduplicate - ICF. // For now, we simply cover each literal section with one large atom. - const gpa = macho_file.base.allocator; + const gpa = macho_file.base.comp.gpa; const slice = self.sections.slice(); try self.atoms.ensureUnusedCapacity(gpa, self.sections.items(.header).len); @@ -401,10 +436,10 @@ fn linkNlistToAtom(self: *Object, macho_file: *MachO) !void { if (self.findAtomInSection(nlist.n_value, nlist.n_sect - 1)) |atom_index| { atom.* = atom_index; } else { - macho_file.base.fatal("{}: symbol {s} not attached to any (sub)section", .{ - self.fmtPath(), self.getString(nlist.n_strx), + try macho_file.reportParseError2(self.index, "symbol {s} not attached to any (sub)section", .{ + self.getString(nlist.n_strx), }); - return error.ParseFailed; + return error.MalformedObject; } } } @@ -413,7 +448,7 @@ fn linkNlistToAtom(self: *Object, macho_file: *MachO) !void { fn initSymbols(self: *Object, macho_file: *MachO) !void { const tracy = trace(@src()); defer tracy.end(); - const gpa = macho_file.base.allocator; + const gpa = macho_file.base.comp.gpa; const slice = self.symtab.slice(); try self.symbols.ensureUnusedCapacity(gpa, slice.items(.nlist).len); @@ -421,7 +456,7 @@ fn initSymbols(self: *Object, macho_file: *MachO) !void { for (slice.items(.nlist), slice.items(.atom), 0..) |nlist, atom_index, i| { if (nlist.ext()) { const name = self.getString(nlist.n_strx); - const off = try macho_file.string_intern.insert(gpa, name); + const off = try macho_file.strings.insert(gpa, name); const gop = try macho_file.getOrCreateGlobal(off); self.symbols.addOneAssumeCapacity().* = gop.index; continue; @@ -433,7 +468,7 @@ fn initSymbols(self: *Object, macho_file: *MachO) !void { const name = self.getString(nlist.n_strx); symbol.* = .{ .value = nlist.n_value, - .name = try macho_file.string_intern.insert(gpa, name), + .name = try macho_file.strings.insert(gpa, name), .nlist_idx = @intCast(i), .atom = 0, .file = self.index, @@ -482,7 +517,7 @@ fn initSymbolStabs(self: *Object, nlists: anytype, macho_file: *MachO) !void { if (start == end) return; - const gpa = macho_file.base.allocator; + const gpa = macho_file.base.comp.gpa; const syms = self.symtab.items(.nlist); const sym_lookup = SymbolLookup{ .ctx = self, .entries = nlists }; @@ -490,11 +525,10 @@ fn initSymbolStabs(self: *Object, nlists: anytype, macho_file: *MachO) !void { while (i < end) : (i += 1) { const open = syms[i]; if (open.n_type != macho.N_SO) { - macho_file.base.fatal("{}: unexpected symbol stab type 0x{x} as the first entry", .{ - self.fmtPath(), + try macho_file.reportParseError2(self.index, "unexpected symbol stab type 0x{x} as the first entry", .{ open.n_type, }); - return error.ParseFailed; + return error.MalformedObject; } while (i < end and syms[i].n_type == macho.N_SO and syms[i].n_sect != 0) : (i += 1) {} @@ -522,11 +556,10 @@ fn initSymbolStabs(self: *Object, nlists: anytype, macho_file: *MachO) !void { stab.symbol = sym_lookup.find(nlist.n_value); }, else => { - macho_file.base.fatal("{}: unhandled symbol stab type 0x{x}", .{ - self.fmtPath(), + try macho_file.reportParseError2(self.index, "unhandled symbol stab type 0x{x}", .{ nlist.n_type, }); - return error.ParseFailed; + return error.MalformedObject; }, } try sf.stabs.append(gpa, stab); @@ -548,7 +581,7 @@ fn sortAtoms(self: *Object, macho_file: *MachO) !void { fn initRelocs(self: *Object, macho_file: *MachO) !void { const tracy = trace(@src()); defer tracy.end(); - const cpu_arch = macho_file.options.cpu_arch.?; + const cpu_arch = macho_file.getTarget().cpu.arch; const slice = self.sections.slice(); for (slice.items(.header), slice.items(.relocs), 0..) |sect, *out, n_sect| { @@ -589,7 +622,7 @@ fn initRelocs(self: *Object, macho_file: *MachO) !void { fn initEhFrameRecords(self: *Object, sect_id: u8, macho_file: *MachO) !void { const tracy = trace(@src()); defer tracy.end(); - const gpa = macho_file.base.allocator; + const gpa = macho_file.base.comp.gpa; const nlists = self.symtab.items(.nlist); const slice = self.sections.slice(); const sect = slice.items(.header)[sect_id]; @@ -667,10 +700,10 @@ fn initEhFrameRecords(self: *Object, sect_id: u8, macho_file: *MachO) !void { const cie = for (self.cies.items) |*cie| { if (cie.offset <= rel.offset and rel.offset < cie.offset + cie.getSize()) break cie; } else { - macho_file.base.fatal("{}: {s},{s}: 0x{x}: bad relocation", .{ - self.fmtPath(), sect.segName(), sect.sectName(), rel.offset, + try macho_file.reportParseError2(self.index, "{s},{s}: 0x{x}: bad relocation", .{ + sect.segName(), sect.sectName(), rel.offset, }); - return error.ParseFailed; + return error.MalformedObject; }; cie.personality = .{ .index = @intCast(rel.target), .offset = rel.offset - cie.offset }; }, @@ -695,7 +728,7 @@ fn initUnwindRecords(self: *Object, sect_id: u8, macho_file: *MachO) !void { } }; - const gpa = macho_file.base.allocator; + const gpa = macho_file.base.comp.gpa; const data = self.getSectionData(sect_id); const nrecs = @divExact(data.len, @sizeOf(macho.compact_unwind_entry)); const recs = @as([*]align(1) const macho.compact_unwind_entry, @ptrCast(data.ptr))[0..nrecs]; @@ -722,10 +755,10 @@ fn initUnwindRecords(self: *Object, sect_id: u8, macho_file: *MachO) !void { for (relocs[reloc_start..reloc_idx]) |rel| { if (rel.type != .unsigned or rel.meta.length != 3) { - macho_file.base.fatal("{}: {s},{s}: 0x{x}: bad relocation", .{ - self.fmtPath(), header.segName(), header.sectName(), rel.offset, + try macho_file.reportParseError2(self.index, "{s},{s}: 0x{x}: bad relocation", .{ + header.segName(), header.sectName(), rel.offset, }); - return error.ParseFailed; + return error.MalformedObject; } assert(rel.type == .unsigned and rel.meta.length == 3); // TODO error const offset = rel.offset - rec_start; @@ -740,10 +773,10 @@ fn initUnwindRecords(self: *Object, sect_id: u8, macho_file: *MachO) !void { const atom = out.getAtom(macho_file); out.atom_offset = @intCast(rec.rangeStart - atom.getInputAddress(macho_file)); } else { - macho_file.base.fatal("{}: {s},{s}: 0x{x}: bad relocation", .{ - self.fmtPath(), header.segName(), header.sectName(), rel.offset, + try macho_file.reportParseError2(self.index, "{s},{s}: 0x{x}: bad relocation", .{ + header.segName(), header.sectName(), rel.offset, }); - return error.ParseFailed; + return error.MalformedObject; }, }, 16 => switch (rel.tag) { // personality function @@ -753,10 +786,10 @@ fn initUnwindRecords(self: *Object, sect_id: u8, macho_file: *MachO) !void { .local => if (sym_lookup.find(rec.personalityFunction)) |sym_index| { out.personality = sym_index; } else { - macho_file.base.fatal("{}: {s},{s}: 0x{x}: bad relocation", .{ - self.fmtPath(), header.segName(), header.sectName(), rel.offset, + try macho_file.reportParseError2(self.index, "{s},{s}: 0x{x}: bad relocation", .{ + header.segName(), header.sectName(), rel.offset, }); - return error.ParseFailed; + return error.MalformedObject; }, }, 24 => switch (rel.tag) { // lsda @@ -769,10 +802,10 @@ fn initUnwindRecords(self: *Object, sect_id: u8, macho_file: *MachO) !void { const atom = out.getLsdaAtom(macho_file).?; out.lsda_offset = @intCast(rec.lsda - atom.getInputAddress(macho_file)); } else { - macho_file.base.fatal("{}: {s},{s}: 0x{x}: bad relocation", .{ - self.fmtPath(), header.segName(), header.sectName(), rel.offset, + try macho_file.reportParseError2(self.index, "{s},{s}: 0x{x}: bad relocation", .{ + header.segName(), header.sectName(), rel.offset, }); - return error.ParseFailed; + return error.MalformedObject; }, }, else => {}, @@ -780,7 +813,7 @@ fn initUnwindRecords(self: *Object, sect_id: u8, macho_file: *MachO) !void { } } - if (!macho_file.options.relocatable) try self.synthesiseNullUnwindRecords(macho_file); + if (!macho_file.base.isObject()) try self.synthesiseNullUnwindRecords(macho_file); const sortFn = struct { fn sortFn(ctx: *MachO, lhs_index: UnwindInfo.Record.Index, rhs_index: UnwindInfo.Record.Index) bool { @@ -818,7 +851,7 @@ fn synthesiseNullUnwindRecords(self: *Object, macho_file: *MachO) !void { const Superposition = struct { atom: Atom.Index, size: u64, cu: ?UnwindInfo.Record.Index = null, fde: ?Fde.Index = null }; - const gpa = macho_file.base.allocator; + const gpa = macho_file.base.comp.gpa; var superposition = std.AutoArrayHashMap(u64, Superposition).init(gpa); defer superposition.deinit(); @@ -875,7 +908,7 @@ fn synthesiseNullUnwindRecords(self: *Object, macho_file: *MachO) !void { rec.atom_offset = fde.atom_offset; rec.fde = fde_index; rec.file = fde.file; - switch (macho_file.options.cpu_arch.?) { + switch (macho_file.getTarget().cpu.arch) { .x86_64 => rec.enc.setMode(macho.UNWIND_X86_64_MODE.DWARF), .aarch64 => rec.enc.setMode(macho.UNWIND_ARM64_MODE.DWARF), else => unreachable, @@ -907,7 +940,7 @@ fn initPlatform(self: *Object) void { .VERSION_MIN_IPHONEOS, .VERSION_MIN_TVOS, .VERSION_MIN_WATCHOS, - => break MachO.Options.Platform.fromLoadCommand(cmd), + => break MachO.Platform.fromLoadCommand(cmd), else => {}, } } else null; @@ -921,7 +954,7 @@ fn initDwarfInfo(self: *Object, macho_file: *MachO) !void { const tracy = trace(@src()); defer tracy.end(); - const gpa = macho_file.base.allocator; + const gpa = macho_file.base.comp.gpa; var debug_info_index: ?usize = null; var debug_abbrev_index: ?usize = null; @@ -942,8 +975,8 @@ fn initDwarfInfo(self: *Object, macho_file: *MachO) !void { .debug_str = if (debug_str_index) |index| self.getSectionData(@intCast(index)) else "", }; dwarf_info.init(gpa) catch { - macho_file.base.fatal("{}: invalid __DWARF info found", .{self.fmtPath()}); - return error.ParseFailed; + try macho_file.reportParseError2(self.index, "invalid __DWARF info found", .{}); + return error.MalformedObject; }; self.dwarf_info = dwarf_info; } @@ -1060,7 +1093,7 @@ pub fn scanRelocs(self: Object, macho_file: *MachO) !void { pub fn convertTentativeDefinitions(self: *Object, macho_file: *MachO) !void { const tracy = trace(@src()); defer tracy.end(); - const gpa = macho_file.base.allocator; + const gpa = macho_file.base.comp.gpa; for (self.symbols.items, 0..) |index, i| { const sym = macho_file.getSymbol(index); @@ -1079,7 +1112,7 @@ pub fn convertTentativeDefinitions(self: *Object, macho_file: *MachO) !void { defer gpa.free(name); const atom = macho_file.getAtom(atom_index).?; atom.atom_index = atom_index; - atom.name = try macho_file.string_intern.insert(gpa, name); + atom.name = try macho_file.strings.insert(gpa, name); atom.file = self.index; atom.size = nlist.n_value; atom.alignment = (nlist.n_desc >> 8) & 0x0f; @@ -1130,7 +1163,7 @@ pub fn calcSymtabSize(self: *Object, macho_file: *MachO) !void { const name = sym.getName(macho_file); // TODO in -r mode, we actually want to merge symbol names and emit only one // work it out when emitting relocs - if (name.len > 0 and (name[0] == 'L' or name[0] == 'l') and !macho_file.options.relocatable) continue; + if (name.len > 0 and (name[0] == 'L' or name[0] == 'l') and !macho_file.base.isObject()) continue; sym.flags.output_symtab = true; if (sym.isLocal()) { try sym.addExtra(.{ .symtab = self.output_symtab_ctx.nlocals }, macho_file); @@ -1171,7 +1204,7 @@ pub fn calcStabsSize(self: *Object, macho_file: *MachO) void { const file = sym.getFile(macho_file) orelse continue; if (file.getIndex() != self.index) continue; if (!sym.flags.output_symtab) continue; - if (macho_file.options.relocatable) { + if (macho_file.base.isObject()) { const name = sym.getName(macho_file); if (name.len > 0 and (name[0] == 'L' or name[0] == 'l')) continue; } @@ -1329,7 +1362,7 @@ pub fn writeStabs(self: *const Object, macho_file: *MachO) void { const file = sym.getFile(macho_file) orelse continue; if (file.getIndex() != self.index) continue; if (!sym.flags.output_symtab) continue; - if (macho_file.options.relocatable) { + if (macho_file.base.isObject()) { const name = sym.getName(macho_file); if (name.len > 0 and (name[0] == 'L' or name[0] == 'l')) continue; } @@ -1747,7 +1780,7 @@ const x86_64 = struct { out: *std.ArrayListUnmanaged(Relocation), macho_file: *MachO, ) !void { - const gpa = macho_file.base.allocator; + const gpa = macho_file.base.comp.gpa; const relocs = @as( [*]align(1) const macho.relocation_info, @@ -1783,10 +1816,10 @@ const x86_64 = struct { else addend; const target = self.findAtomInSection(@intCast(taddr), @intCast(nsect)) orelse { - macho_file.base.fatal("{}: {s},{s}: 0x{x}: bad relocation", .{ - self.fmtPath(), sect.segName(), sect.sectName(), rel.r_address, + try macho_file.reportParseError2(self.index, "{s},{s}: 0x{x}: bad relocation", .{ + sect.segName(), sect.sectName(), rel.r_address, }); - return error.ParseFailed; + return error.MalformedObject; }; addend = taddr - @as(i64, @intCast(macho_file.getAtom(target).?.getInputAddress(macho_file))); break :blk target; @@ -1796,34 +1829,38 @@ const x86_64 = struct { @as(macho.reloc_type_x86_64, @enumFromInt(relocs[i - 1].r_type)) == .X86_64_RELOC_SUBTRACTOR) blk: { if (rel_type != .X86_64_RELOC_UNSIGNED) { - macho_file.base.fatal("{}: {s},{s}: 0x{x}: X86_64_RELOC_SUBTRACTOR followed by {s}", .{ - self.fmtPath(), sect.segName(), sect.sectName(), rel_offset, @tagName(rel_type), + try macho_file.reportParseError2(self.index, "{s},{s}: 0x{x}: X86_64_RELOC_SUBTRACTOR followed by {s}", .{ + sect.segName(), sect.sectName(), rel_offset, @tagName(rel_type), }); - return error.ParseFailed; + return error.MalformedObject; } break :blk true; } else false; const @"type": Relocation.Type = validateRelocType(rel, rel_type) catch |err| { switch (err) { - error.Pcrel => macho_file.base.fatal( - "{}: {s},{s}: 0x{x}: PC-relative {s} relocation", - .{ self.fmtPath(), sect.segName(), sect.sectName(), rel_offset, @tagName(rel_type) }, + error.Pcrel => try macho_file.reportParseError2( + self.index, + "{s},{s}: 0x{x}: PC-relative {s} relocation", + .{ sect.segName(), sect.sectName(), rel_offset, @tagName(rel_type) }, ), - error.NonPcrel => macho_file.base.fatal( - "{}: {s},{s}: 0x{x}: non-PC-relative {s} relocation", - .{ self.fmtPath(), sect.segName(), sect.sectName(), rel_offset, @tagName(rel_type) }, + error.NonPcrel => try macho_file.reportParseError2( + self.index, + "{s},{s}: 0x{x}: non-PC-relative {s} relocation", + .{ sect.segName(), sect.sectName(), rel_offset, @tagName(rel_type) }, ), - error.InvalidLength => macho_file.base.fatal( - "{}: {s},{s}: 0x{x}: invalid length of {d} in {s} relocation", - .{ self.fmtPath(), sect.segName(), sect.sectName(), rel_offset, @as(u8, 1) << rel.r_length, @tagName(rel_type) }, + error.InvalidLength => try macho_file.reportParseError2( + self.index, + "{s},{s}: 0x{x}: invalid length of {d} in {s} relocation", + .{ sect.segName(), sect.sectName(), rel_offset, @as(u8, 1) << rel.r_length, @tagName(rel_type) }, ), - error.NonExtern => macho_file.base.fatal( - "{}: {s},{s}: 0x{x}: non-extern target in {s} relocation", - .{ self.fmtPath(), sect.segName(), sect.sectName(), rel_offset, @tagName(rel_type) }, + error.NonExtern => try macho_file.reportParseError2( + self.index, + "{s},{s}: 0x{x}: non-extern target in {s} relocation", + .{ sect.segName(), sect.sectName(), rel_offset, @tagName(rel_type) }, ), } - return error.ParseFailed; + return error.MalformedObject; }; out.appendAssumeCapacity(.{ @@ -1899,7 +1936,7 @@ const aarch64 = struct { out: *std.ArrayListUnmanaged(Relocation), macho_file: *MachO, ) !void { - const gpa = macho_file.base.allocator; + const gpa = macho_file.base.comp.gpa; const relocs = @as( [*]align(1) const macho.relocation_info, @@ -1921,20 +1958,21 @@ const aarch64 = struct { addend = rel.r_symbolnum; i += 1; if (i >= relocs.len) { - macho_file.base.fatal("{}: {s},{s}: 0x{x}: unterminated ARM64_RELOC_ADDEND", .{ - self.fmtPath(), sect.segName(), sect.sectName(), rel_offset, + try macho_file.reportParseError2(self.index, "{s},{s}: 0x{x}: unterminated ARM64_RELOC_ADDEND", .{ + sect.segName(), sect.sectName(), rel_offset, }); - return error.ParseFailed; + return error.MalformedObject; } rel = relocs[i]; switch (@as(macho.reloc_type_arm64, @enumFromInt(rel.r_type))) { .ARM64_RELOC_PAGE21, .ARM64_RELOC_PAGEOFF12 => {}, else => |x| { - macho_file.base.fatal( - "{}: {s},{s}: 0x{x}: ARM64_RELOC_ADDEND followed by {s}", - .{ self.fmtPath(), sect.segName(), sect.sectName(), rel_offset, @tagName(x) }, + try macho_file.reportParseError2( + self.index, + "{s},{s}: 0x{x}: ARM64_RELOC_ADDEND followed by {s}", + .{ sect.segName(), sect.sectName(), rel_offset, @tagName(x) }, ); - return error.ParseFailed; + return error.MalformedObject; }, } }, @@ -1958,10 +1996,10 @@ const aarch64 = struct { else addend; const target = self.findAtomInSection(@intCast(taddr), @intCast(nsect)) orelse { - macho_file.base.fatal("{}: {s},{s}: 0x{x}: bad relocation", .{ - self.fmtPath(), sect.segName(), sect.sectName(), rel.r_address, + try macho_file.reportParseError2(self.index, "{s},{s}: 0x{x}: bad relocation", .{ + sect.segName(), sect.sectName(), rel.r_address, }); - return error.ParseFailed; + return error.MalformedObject; }; addend = taddr - @as(i64, @intCast(macho_file.getAtom(target).?.getInputAddress(macho_file))); break :blk target; @@ -1971,34 +2009,38 @@ const aarch64 = struct { @as(macho.reloc_type_arm64, @enumFromInt(relocs[i - 1].r_type)) == .ARM64_RELOC_SUBTRACTOR) blk: { if (rel_type != .ARM64_RELOC_UNSIGNED) { - macho_file.base.fatal("{}: {s},{s}: 0x{x}: ARM64_RELOC_SUBTRACTOR followed by {s}", .{ - self.fmtPath(), sect.segName(), sect.sectName(), rel_offset, @tagName(rel_type), + try macho_file.reportParseError2(self.index, "{s},{s}: 0x{x}: ARM64_RELOC_SUBTRACTOR followed by {s}", .{ + sect.segName(), sect.sectName(), rel_offset, @tagName(rel_type), }); - return error.ParseFailed; + return error.MalformedObject; } break :blk true; } else false; const @"type": Relocation.Type = validateRelocType(rel, rel_type) catch |err| { switch (err) { - error.Pcrel => macho_file.base.fatal( - "{}: {s},{s}: 0x{x}: PC-relative {s} relocation", - .{ self.fmtPath(), sect.segName(), sect.sectName(), rel_offset, @tagName(rel_type) }, + error.Pcrel => try macho_file.reportParseError2( + self.index, + "{s},{s}: 0x{x}: PC-relative {s} relocation", + .{ sect.segName(), sect.sectName(), rel_offset, @tagName(rel_type) }, ), - error.NonPcrel => macho_file.base.fatal( - "{}: {s},{s}: 0x{x}: non-PC-relative {s} relocation", - .{ self.fmtPath(), sect.segName(), sect.sectName(), rel_offset, @tagName(rel_type) }, + error.NonPcrel => try macho_file.reportParseError2( + self.index, + "{s},{s}: 0x{x}: non-PC-relative {s} relocation", + .{ sect.segName(), sect.sectName(), rel_offset, @tagName(rel_type) }, ), - error.InvalidLength => macho_file.base.fatal( - "{}: {s},{s}: 0x{x}: invalid length of {d} in {s} relocation", - .{ self.fmtPath(), sect.segName(), sect.sectName(), rel_offset, @as(u8, 1) << rel.r_length, @tagName(rel_type) }, + error.InvalidLength => try macho_file.reportParseError2( + self.index, + "{s},{s}: 0x{x}: invalid length of {d} in {s} relocation", + .{ sect.segName(), sect.sectName(), rel_offset, @as(u8, 1) << rel.r_length, @tagName(rel_type) }, ), - error.NonExtern => macho_file.base.fatal( - "{}: {s},{s}: 0x{x}: non-extern target in {s} relocation", - .{ self.fmtPath(), sect.segName(), sect.sectName(), rel_offset, @tagName(rel_type) }, + error.NonExtern => try macho_file.reportParseError2( + self.index, + "{s},{s}: 0x{x}: non-extern target in {s} relocation", + .{ sect.segName(), sect.sectName(), rel_offset, @tagName(rel_type) }, ), } - return error.ParseFailed; + return error.MalformedObject; }; out.appendAssumeCapacity(.{ diff --git a/src/link/MachO/Symbol.zig b/src/link/MachO/Symbol.zig index 35e53534a8..c3a6d7b54e 100644 --- a/src/link/MachO/Symbol.zig +++ b/src/link/MachO/Symbol.zig @@ -55,7 +55,7 @@ pub fn weakRef(symbol: Symbol, macho_file: *MachO) bool { } pub fn getName(symbol: Symbol, macho_file: *MachO) [:0]const u8 { - return macho_file.string_intern.getAssumeExists(symbol.name); + return macho_file.strings.getAssumeExists(symbol.name); } pub fn getAtom(symbol: Symbol, macho_file: *MachO) ?*Atom { diff --git a/src/link/MachO/UnwindInfo.zig b/src/link/MachO/UnwindInfo.zig index 33c07915dc..8a5e3661eb 100644 --- a/src/link/MachO/UnwindInfo.zig +++ b/src/link/MachO/UnwindInfo.zig @@ -372,7 +372,7 @@ pub const Encoding = extern struct { pub fn isDwarf(enc: Encoding, macho_file: *MachO) bool { const mode = enc.getMode(); - return switch (macho_file.options.cpu_arch.?) { + return switch (macho_file.getTarget().cpu.arch) { .aarch64 => @as(macho.UNWIND_ARM64_MODE, @enumFromInt(mode)) == .DWARF, .x86_64 => @as(macho.UNWIND_X86_64_MODE, @enumFromInt(mode)) == .DWARF, else => unreachable, diff --git a/src/link/MachO/eh_frame.zig b/src/link/MachO/eh_frame.zig index 716a66d4fa..abcd44cc6b 100644 --- a/src/link/MachO/eh_frame.zig +++ b/src/link/MachO/eh_frame.zig @@ -155,10 +155,10 @@ pub const Fde = struct { const pc_begin = std.mem.readInt(i64, data[8..][0..8], .little); const taddr: u64 = @intCast(@as(i64, @intCast(sect.addr + fde.offset + 8)) + pc_begin); fde.atom = object.findAtom(taddr) orelse { - macho_file.base.fatal("{}: {s},{s}: 0x{x}: invalid function reference in FDE", .{ - object.fmtPath(), sect.segName(), sect.sectName(), fde.offset + 8, + try macho_file.reportParseError2(object.index, "{s},{s}: 0x{x}: invalid function reference in FDE", .{ + sect.segName(), sect.sectName(), fde.offset + 8, }); - return error.ParseFailed; + return error.MalformedObject; }; const atom = fde.getAtom(macho_file); fde.atom_offset = @intCast(taddr - atom.getInputAddress(macho_file)); @@ -172,11 +172,10 @@ pub const Fde = struct { if (cie_index) |cie| { fde.cie = cie; } else { - macho_file.base.fatal("{}: no matching CIE found for FDE at offset {x}", .{ - object.fmtPath(), + try macho_file.reportParseError2(object.index, "no matching CIE found for FDE at offset {x}", .{ fde.offset, }); - return error.ParseFailed; + return error.MalformedObject; } const cie = fde.getCie(macho_file); @@ -194,10 +193,10 @@ pub const Fde = struct { }; const lsda_addr: u64 = @intCast(@as(i64, @intCast(sect.addr + fde.offset + fde.lsda_ptr_offset)) + lsda_ptr); fde.lsda = object.findAtom(lsda_addr) orelse { - macho_file.base.fatal("{}: {s},{s}: 0x{x}: invalid LSDA reference in FDE", .{ - object.fmtPath(), sect.segName(), sect.sectName(), fde.offset + fde.lsda_ptr_offset, + try macho_file.reportParseError2(object.index, "{s},{s}: 0x{x}: invalid LSDA reference in FDE", .{ + sect.segName(), sect.sectName(), fde.offset + fde.lsda_ptr_offset, }); - return error.ParseFailed; + return error.MalformedObject; }; const lsda_atom = fde.getLsdaAtom(macho_file).?; fde.lsda_offset = @intCast(lsda_addr - lsda_atom.getInputAddress(macho_file)); diff --git a/src/link/MachO/synthetic.zig b/src/link/MachO/synthetic.zig index d75e1f08aa..c497bc5444 100644 --- a/src/link/MachO/synthetic.zig +++ b/src/link/MachO/synthetic.zig @@ -8,7 +8,7 @@ pub const GotSection = struct { } pub fn addSymbol(got: *GotSection, sym_index: Symbol.Index, macho_file: *MachO) !void { - const gpa = macho_file.base.allocator; + const gpa = macho_file.base.comp.gpa; const index = @as(Index, @intCast(got.symbols.items.len)); const entry = try got.symbols.addOne(gpa); entry.* = sym_index; @@ -29,7 +29,7 @@ pub const GotSection = struct { pub fn addDyldRelocs(got: GotSection, macho_file: *MachO) !void { const tracy = trace(@src()); defer tracy.end(); - const gpa = macho_file.base.allocator; + const gpa = macho_file.base.comp.gpa; const seg_id = macho_file.sections.items(.segment_id)[macho_file.got_sect_index.?]; const seg = macho_file.segments.items[seg_id]; @@ -111,7 +111,7 @@ pub const StubsSection = struct { } pub fn addSymbol(stubs: *StubsSection, sym_index: Symbol.Index, macho_file: *MachO) !void { - const gpa = macho_file.base.allocator; + const gpa = macho_file.base.comp.gpa; const index = @as(Index, @intCast(stubs.symbols.items.len)); const entry = try stubs.symbols.addOne(gpa); entry.* = sym_index; @@ -133,7 +133,7 @@ pub const StubsSection = struct { pub fn write(stubs: StubsSection, macho_file: *MachO, writer: anytype) !void { const tracy = trace(@src()); defer tracy.end(); - const cpu_arch = macho_file.options.cpu_arch.?; + const cpu_arch = macho_file.getTarget().cpu.arch; const laptr_sect = macho_file.sections.items(.header)[macho_file.la_symbol_ptr_sect_index.?]; for (stubs.symbols.items, 0..) |sym_index, idx| { @@ -213,7 +213,7 @@ pub const StubsHelperSection = struct { const tracy = trace(@src()); defer tracy.end(); _ = stubs_helper; - const cpu_arch = macho_file.options.cpu_arch.?; + const cpu_arch = macho_file.getTarget().cpu.arch; var s: usize = preambleSize(cpu_arch); for (macho_file.stubs.symbols.items) |sym_index| { const sym = macho_file.getSymbol(sym_index); @@ -230,7 +230,7 @@ pub const StubsHelperSection = struct { try stubs_helper.writePreamble(macho_file, writer); - const cpu_arch = macho_file.options.cpu_arch.?; + const cpu_arch = macho_file.getTarget().cpu.arch; const sect = macho_file.sections.items(.header)[macho_file.stubs_helper_sect_index.?]; const preamble_size = preambleSize(cpu_arch); const entry_size = entrySize(cpu_arch); @@ -272,7 +272,7 @@ pub const StubsHelperSection = struct { fn writePreamble(stubs_helper: StubsHelperSection, macho_file: *MachO, writer: anytype) !void { _ = stubs_helper; - const cpu_arch = macho_file.options.cpu_arch.?; + const cpu_arch = macho_file.getTarget().cpu.arch; const sect = macho_file.sections.items(.header)[macho_file.stubs_helper_sect_index.?]; const dyld_private_addr = target: { const sym = macho_file.getSymbol(macho_file.dyld_private_index.?); @@ -331,7 +331,7 @@ pub const LaSymbolPtrSection = struct { const tracy = trace(@src()); defer tracy.end(); _ = laptr; - const gpa = macho_file.base.allocator; + const gpa = macho_file.base.comp.gpa; const sect = macho_file.sections.items(.header)[macho_file.la_symbol_ptr_sect_index.?]; const seg_id = macho_file.sections.items(.segment_id)[macho_file.la_symbol_ptr_sect_index.?]; @@ -371,7 +371,7 @@ pub const LaSymbolPtrSection = struct { const tracy = trace(@src()); defer tracy.end(); _ = laptr; - const cpu_arch = macho_file.options.cpu_arch.?; + const cpu_arch = macho_file.getTarget().cpu.arch; const sect = macho_file.sections.items(.header)[macho_file.stubs_helper_sect_index.?]; for (macho_file.stubs.symbols.items, 0..) |sym_index, idx| { const sym = macho_file.getSymbol(sym_index); @@ -397,7 +397,7 @@ pub const TlvPtrSection = struct { } pub fn addSymbol(tlv: *TlvPtrSection, sym_index: Symbol.Index, macho_file: *MachO) !void { - const gpa = macho_file.base.allocator; + const gpa = macho_file.base.comp.gpa; const index = @as(Index, @intCast(tlv.symbols.items.len)); const entry = try tlv.symbols.addOne(gpa); entry.* = sym_index; @@ -418,7 +418,7 @@ pub const TlvPtrSection = struct { pub fn addDyldRelocs(tlv: TlvPtrSection, macho_file: *MachO) !void { const tracy = trace(@src()); defer tracy.end(); - const gpa = macho_file.base.allocator; + const gpa = macho_file.base.comp.gpa; const seg_id = macho_file.sections.items(.segment_id)[macho_file.tlv_ptr_sect_index.?]; const seg = macho_file.segments.items[seg_id]; @@ -510,7 +510,7 @@ pub const ObjcStubsSection = struct { } pub fn addSymbol(objc: *ObjcStubsSection, sym_index: Symbol.Index, macho_file: *MachO) !void { - const gpa = macho_file.base.allocator; + const gpa = macho_file.base.comp.gpa; const index = @as(Index, @intCast(objc.symbols.items.len)); const entry = try objc.symbols.addOne(gpa); entry.* = sym_index; @@ -521,11 +521,11 @@ pub const ObjcStubsSection = struct { pub fn getAddress(objc: ObjcStubsSection, index: Index, macho_file: *MachO) u64 { assert(index < objc.symbols.items.len); const header = macho_file.sections.items(.header)[macho_file.objc_stubs_sect_index.?]; - return header.addr + index * entrySize(macho_file.options.cpu_arch.?); + return header.addr + index * entrySize(macho_file.getTarget().cpu.arch); } pub fn size(objc: ObjcStubsSection, macho_file: *MachO) usize { - return objc.symbols.items.len * entrySize(macho_file.options.cpu_arch.?); + return objc.symbols.items.len * entrySize(macho_file.getTarget().cpu.arch); } pub fn write(objc: ObjcStubsSection, macho_file: *MachO, writer: anytype) !void { @@ -535,7 +535,7 @@ pub const ObjcStubsSection = struct { for (objc.symbols.items, 0..) |sym_index, idx| { const sym = macho_file.getSymbol(sym_index); const addr = objc.getAddress(@intCast(idx), macho_file); - switch (macho_file.options.cpu_arch.?) { + switch (macho_file.getTarget().cpu.arch) { .x86_64 => { try writer.writeAll(&.{ 0x48, 0x8b, 0x35 }); { @@ -654,12 +654,12 @@ pub const WeakBindSection = bind.WeakBind; pub const LazyBindSection = bind.LazyBind; pub const ExportTrieSection = Trie; -const aarch64 = @import("../aarch64.zig"); +const aarch64 = @import("../../arch/aarch64/bits.zig"); const assert = std.debug.assert; const bind = @import("dyld_info/bind.zig"); const math = std.math; const std = @import("std"); -const trace = @import("../tracy.zig").trace; +const trace = @import("../../tracy.zig").trace; const Allocator = std.mem.Allocator; const MachO = @import("../MachO.zig"); From d05e9c379267e9a68522eca3fe8800803c6a5c70 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 11 Jan 2024 10:19:18 +0100 Subject: [PATCH 005/133] macho: create scaffolding for parsing different input objects --- src/link/MachO.zig | 140 +++++++++++++++++++++++++++++++++++-- src/link/MachO/Archive.zig | 12 ++++ src/link/MachO/Dylib.zig | 10 +++ src/link/MachO/Object.zig | 11 +-- src/link/MachO/fat.zig | 12 ++-- 5 files changed, 170 insertions(+), 15 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 36d522f1ee..b34d3324c6 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -384,6 +384,7 @@ pub fn flushModule(self: *MachO, arena: Allocator, prog_node: *std.Progress.Node error.MalformedArchive, error.InvalidCpuArch, error.InvalidTarget, + error.UnknownFileType, => continue, // already reported else => |e| try self.reportParseError( obj.path, @@ -393,6 +394,81 @@ pub fn flushModule(self: *MachO, arena: Allocator, prog_node: *std.Progress.Node }; } + var system_libs = std.ArrayList(SystemLib).init(gpa); + defer system_libs.deinit(); + + // libs + try system_libs.ensureUnusedCapacity(comp.system_libs.values().len); + for (comp.system_libs.values()) |info| { + system_libs.appendAssumeCapacity(.{ + .needed = info.needed, + .weak = info.weak, + .path = info.path.?, + }); + } + + // frameworks + try system_libs.ensureUnusedCapacity(self.frameworks.len); + for (self.frameworks) |info| { + system_libs.appendAssumeCapacity(.{ + .needed = info.needed, + .weak = info.weak, + .path = info.path, + }); + } + + // libc++ dep + if (comp.config.link_libcpp) { + try system_libs.ensureUnusedCapacity(2); + system_libs.appendAssumeCapacity(.{ .path = comp.libcxxabi_static_lib.?.full_object_path }); + system_libs.appendAssumeCapacity(.{ .path = comp.libcxx_static_lib.?.full_object_path }); + } + + // libc/libSystem dep + self.resolveLibSystem(arena, comp, &system_libs) catch |err| switch (err) { + error.MissingLibSystem => {}, // already reported + else => |e| return e, // TODO: convert into an error + }; + + for (system_libs.items) |lib| { + self.parseLibrary(lib, false) catch |err| switch (err) { + error.MalformedDylib, + error.MalformedArchive, + error.InvalidCpuArch, + error.UnknownFileType, + => continue, // already reported + else => |e| try self.reportParseError( + lib.path, + "unexpected error: parsing library failed with error {s}", + .{@errorName(e)}, + ), + }; + } + + // Finally, link against compiler_rt. + const compiler_rt_path: ?[]const u8 = blk: { + if (comp.compiler_rt_lib) |x| break :blk x.full_object_path; + if (comp.compiler_rt_obj) |x| break :blk x.full_object_path; + break :blk null; + }; + if (compiler_rt_path) |path| { + self.parsePositional(path, false) catch |err| switch (err) { + error.MalformedObject, + error.MalformedArchive, + error.InvalidCpuArch, + error.InvalidTarget, + error.UnknownFileType, + => {}, // already reported + else => |e| try self.reportParseError( + path, + "unexpected error: parsing input file failed with error {s}", + .{@errorName(e)}, + ), + }; + } + + if (comp.link_errors.items.len > 0) return error.FlushFailure; + state_log.debug("{}", .{self.dumpState()}); @panic("TODO"); @@ -626,13 +702,12 @@ pub fn resolveLibSystem( }; try self.reportMissingLibraryError(checked_paths.items, "unable to find libSystem system library", .{}); - return; + return error.MissingLibSystem; } const libsystem_path = try arena.dupe(u8, test_path.items); - try out_libs.put(libsystem_path, .{ + try out_libs.append(.{ .needed = true, - .weak = false, .path = libsystem_path, }); } @@ -685,6 +760,7 @@ fn accessLibPath( const ParseError = error{ MalformedObject, MalformedArchive, + MalformedDylib, NotLibStub, InvalidCpuArch, InvalidTarget, @@ -696,6 +772,8 @@ const ParseError = error{ EndOfStream, FileSystem, NotSupported, + Unhandled, + UnknownFileType, } || std.os.SeekError || std.fs.File.OpenError || std.fs.File.ReadError || tapi.TapiError; fn parsePositional(self: *MachO, path: []const u8, must_link: bool) ParseError!void { @@ -709,9 +787,25 @@ fn parsePositional(self: *MachO, path: []const u8, must_link: bool) ParseError!v } fn parseLibrary(self: *MachO, lib: SystemLib, must_link: bool) ParseError!void { - _ = self; - _ = lib; - _ = must_link; + const tracy = trace(@src()); + defer tracy.end(); + if (try fat.isFatLibrary(lib.path)) { + const fat_arch = try self.parseFatLibrary(lib.path); + if (try Archive.isArchive(lib.path, fat_arch)) { + try self.parseArchive(lib, must_link, fat_arch); + } else if (try Dylib.isDylib(lib.path, fat_arch)) { + try self.parseDylib(lib, true, fat_arch); + } else { + try self.reportParseError(lib.path, "unknown file type for a library", .{}); + return error.UnknownFileType; + } + } else if (try Archive.isArchive(lib.path, null)) { + try self.parseArchive(lib, must_link, null); + } else if (try Dylib.isDylib(lib.path, null)) { + try self.parseDylib(lib, true, null); + } else { + try self.parseTbd(lib, true); + } } fn parseObject(self: *MachO, path: []const u8) ParseError!void { @@ -739,6 +833,40 @@ fn parseObject(self: *MachO, path: []const u8) ParseError!void { try object.parse(self); } +fn parseFatLibrary(self: *MachO, path: []const u8) !fat.Arch { + var buffer: [2]fat.Arch = undefined; + const fat_archs = try fat.parseArchs(path, &buffer); + const cpu_arch = self.getTarget().cpu.arch; + for (fat_archs) |arch| { + if (arch.tag == cpu_arch) return arch; + } + try self.reportParseError(path, "missing arch in universal file: expected {s}", .{@tagName(cpu_arch)}); + return error.InvalidCpuArch; +} + +fn parseArchive(self: *MachO, lib: SystemLib, must_link: bool, fat_arch: ?fat.Arch) ParseError!void { + _ = self; + _ = lib; + _ = must_link; + _ = fat_arch; + return error.Unhandled; +} + +fn parseDylib(self: *MachO, lib: SystemLib, explicit: bool, fat_arch: ?fat.Arch) ParseError!void { + _ = self; + _ = lib; + _ = explicit; + _ = fat_arch; + return error.Unhandled; +} + +fn parseTbd(self: *MachO, lib: SystemLib, explicit: bool) ParseError!void { + _ = self; + _ = lib; + _ = explicit; + return error.Unhandled; +} + fn shrinkAtom(self: *MachO, atom_index: Atom.Index, new_block_size: u64) void { _ = self; _ = atom_index; diff --git a/src/link/MachO/Archive.zig b/src/link/MachO/Archive.zig index c31278ce1c..2451874eb0 100644 --- a/src/link/MachO/Archive.zig +++ b/src/link/MachO/Archive.zig @@ -61,6 +61,17 @@ const ar_hdr = extern struct { } }; +pub fn isArchive(path: []const u8, fat_arch: ?fat.Arch) !bool { + const file = try std.fs.cwd().openFile(path, .{}); + defer file.close(); + if (fat_arch) |arch| { + try file.seekTo(arch.offset); + } + const magic = file.reader().readBytesNoEof(SARMAG) catch return false; + if (!mem.eql(u8, &magic, ARMAG)) return false; + return true; +} + pub fn deinit(self: *Archive, allocator: Allocator) void { self.objects.deinit(allocator); } @@ -117,6 +128,7 @@ pub fn parse(self: *Archive, arena: Allocator, macho_file: *MachO) !void { } } +const fat = @import("fat.zig"); const log = std.log.scoped(.link); const macho = std.macho; const mem = std.mem; diff --git a/src/link/MachO/Dylib.zig b/src/link/MachO/Dylib.zig index b57d6eb911..f0454d91c3 100644 --- a/src/link/MachO/Dylib.zig +++ b/src/link/MachO/Dylib.zig @@ -23,6 +23,16 @@ referenced: bool = false, output_symtab_ctx: MachO.SymtabCtx = .{}, +pub fn isDylib(path: []const u8, fat_arch: ?fat.Arch) !bool { + const file = try std.fs.cwd().openFile(path, .{}); + defer file.close(); + if (fat_arch) |arch| { + try file.seekTo(arch.offset); + } + const header = file.reader().readStruct(macho.mach_header_64) catch return false; + return header.filetype == macho.MH_DYLIB; +} + pub fn deinit(self: *Dylib, allocator: Allocator) void { self.exports.deinit(allocator); self.strtab.deinit(allocator); diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index 53334be6ab..9b453fad49 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -34,8 +34,7 @@ output_symtab_ctx: MachO.SymtabCtx = .{}, pub fn isObject(path: []const u8) !bool { const file = try std.fs.cwd().openFile(path, .{}); defer file.close(); - const reader = file.reader(); - const header = reader.readStruct(macho.mach_header_64) catch return false; + const header = file.reader().readStruct(macho.mach_header_64) catch return false; return header.filetype == macho.MH_OBJECT; } @@ -175,8 +174,12 @@ pub fn parse(self: *Object, macho_file: *MachO) !void { }); return error.InvalidTarget; } - if (macho_file.platform.version.order(platform.version) != .lt) { - try macho_file.reportParseError2(self.index, "object file built for newer platform: {}", .{platform}); + if (macho_file.platform.version.order(platform.version) == .lt) { + try macho_file.reportParseError2(self.index, "object file built for newer platform: {}: {} < {}", .{ + macho_file.platform.fmtTarget(macho_file.getTarget().cpu.arch), + macho_file.platform.version, + platform.version, + }); return error.InvalidTarget; } } diff --git a/src/link/MachO/fat.zig b/src/link/MachO/fat.zig index 46cf0139df..5542d70dc0 100644 --- a/src/link/MachO/fat.zig +++ b/src/link/MachO/fat.zig @@ -8,10 +8,10 @@ const native_endian = builtin.target.cpu.arch.endian(); const MachO = @import("../MachO.zig"); -pub fn isFatLibrary(file: std.fs.File) bool { - const reader = file.reader(); - const hdr = reader.readStructEndian(macho.fat_header, .big) catch return false; - defer file.seekTo(0) catch {}; +pub fn isFatLibrary(path: []const u8) !bool { + const file = try std.fs.cwd().openFile(path, .{}); + defer file.close(); + const hdr = file.reader().readStructEndian(macho.fat_header, .big) catch return false; return hdr.magic == macho.FAT_MAGIC; } @@ -21,7 +21,9 @@ pub const Arch = struct { size: u32, }; -pub fn parseArchs(file: std.fs.File, buffer: *[2]Arch) ![]const Arch { +pub fn parseArchs(path: []const u8, buffer: *[2]Arch) ![]const Arch { + const file = try std.fs.cwd().openFile(path, .{}); + defer file.close(); const reader = file.reader(); const fat_header = try reader.readStructEndian(macho.fat_header, .big); assert(fat_header.magic == macho.FAT_MAGIC); From c5e509595a267ddec7d49a9d89ab2bc4f86a9d35 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 11 Jan 2024 17:34:52 +0100 Subject: [PATCH 006/133] macho: parse archives --- src/link/MachO.zig | 47 ++++++++++++++++++++++++++++++++++---- src/link/MachO/Archive.zig | 26 +++++++++++++-------- src/link/MachO/Object.zig | 1 + 3 files changed, 59 insertions(+), 15 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index b34d3324c6..0a64fbd624 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -845,11 +845,48 @@ fn parseFatLibrary(self: *MachO, path: []const u8) !fat.Arch { } fn parseArchive(self: *MachO, lib: SystemLib, must_link: bool, fat_arch: ?fat.Arch) ParseError!void { - _ = self; - _ = lib; - _ = must_link; - _ = fat_arch; - return error.Unhandled; + const tracy = trace(@src()); + defer tracy.end(); + + const gpa = self.base.comp.gpa; + + const file = try std.fs.cwd().openFile(lib.path, .{}); + defer file.close(); + + const data = if (fat_arch) |arch| blk: { + try file.seekTo(arch.offset); + const data = try gpa.alloc(u8, arch.size); + const nread = try file.readAll(data); + if (nread != arch.size) return error.InputOutput; + break :blk data; + } else try file.readToEndAlloc(gpa, std.math.maxInt(u32)); + + var archive = Archive{ .path = try gpa.dupe(u8, lib.path), .data = data }; + defer archive.deinit(gpa); + try archive.parse(self); + + var has_parse_error = false; + for (archive.objects.items) |extracted| { + const index = @as(File.Index, @intCast(try self.files.addOne(gpa))); + self.files.set(index, .{ .object = extracted }); + const object = &self.files.items(.data)[index].object; + object.index = index; + object.alive = must_link or lib.needed; // TODO: or self.options.all_load; + object.hidden = lib.hidden; + object.parse(self) catch |err| switch (err) { + error.MalformedObject, + error.InvalidCpuArch, + error.InvalidTarget, + => has_parse_error = true, + else => |e| return e, + }; + try self.objects.append(gpa, index); + + // Finally, we do a post-parse check for -ObjC to see if we need to force load this member + // anyhow. + // TODO: object.alive = object.alive or (self.options.force_load_objc and object.hasObjc()); + } + if (has_parse_error) return error.MalformedArchive; } fn parseDylib(self: *MachO, lib: SystemLib, explicit: bool, fat_arch: ?fat.Arch) ParseError!void { diff --git a/src/link/MachO/Archive.zig b/src/link/MachO/Archive.zig index 2451874eb0..7203d89b94 100644 --- a/src/link/MachO/Archive.zig +++ b/src/link/MachO/Archive.zig @@ -73,14 +73,20 @@ pub fn isArchive(path: []const u8, fat_arch: ?fat.Arch) !bool { } pub fn deinit(self: *Archive, allocator: Allocator) void { + allocator.free(self.data); + allocator.free(self.path); self.objects.deinit(allocator); } -pub fn parse(self: *Archive, arena: Allocator, macho_file: *MachO) !void { - const gpa = macho_file.base.allocator; +pub fn parse(self: *Archive, macho_file: *MachO) !void { + const gpa = macho_file.base.comp.gpa; + + var arena = std.heap.ArenaAllocator.init(gpa); + defer arena.deinit(); var stream = std.io.fixedBufferStream(self.data); const reader = stream.reader(); + _ = try reader.readBytesNoEof(SARMAG); while (true) { if (stream.pos >= self.data.len) break; @@ -89,18 +95,18 @@ pub fn parse(self: *Archive, arena: Allocator, macho_file: *MachO) !void { const hdr = try reader.readStruct(ar_hdr); if (!mem.eql(u8, &hdr.ar_fmag, ARFMAG)) { - macho_file.base.fatal("{s}: invalid header delimiter: expected '{s}', found '{s}'", .{ - self.path, std.fmt.fmtSliceEscapeLower(ARFMAG), std.fmt.fmtSliceEscapeLower(&hdr.ar_fmag), + try macho_file.reportParseError(self.path, "invalid header delimiter: expected '{s}', found '{s}'", .{ + std.fmt.fmtSliceEscapeLower(ARFMAG), std.fmt.fmtSliceEscapeLower(&hdr.ar_fmag), }); - return error.ParseFailed; + return error.MalformedArchive; } var size = try hdr.size(); const name = name: { - if (hdr.name()) |n| break :name try arena.dupe(u8, n); + if (hdr.name()) |n| break :name n; if (try hdr.nameLength()) |len| { size -= len; - const buf = try arena.alloc(u8, len); + const buf = try arena.allocator().alloc(u8, len); try reader.readNoEof(buf); const actual_len = mem.indexOfScalar(u8, buf, @as(u8, 0)) orelse len; break :name buf[0..actual_len]; @@ -114,9 +120,9 @@ pub fn parse(self: *Archive, arena: Allocator, macho_file: *MachO) !void { if (mem.eql(u8, name, "__.SYMDEF") or mem.eql(u8, name, "__.SYMDEF SORTED")) continue; const object = Object{ - .archive = self.path, - .path = name, - .data = self.data[stream.pos..][0..size], + .archive = try gpa.dupe(u8, self.path), + .path = try gpa.dupe(u8, name), + .data = try gpa.dupe(u8, self.data[stream.pos..][0..size]), .index = undefined, .alive = false, .mtime = hdr.date() catch 0, diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index 9b453fad49..e28c23d4ad 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -56,6 +56,7 @@ pub fn deinit(self: *Object, allocator: Allocator) void { sf.stabs.deinit(allocator); } self.stab_files.deinit(allocator); + allocator.free(self.data); } pub fn parse(self: *Object, macho_file: *MachO) !void { From d153bc2f0c01af7027f09dd6d42786c4b4d076ed Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 11 Jan 2024 17:53:58 +0100 Subject: [PATCH 007/133] macho: parse dylibs --- src/link/MachO.zig | 46 ++++++++++++++++++++++++++++++++-------- src/link/MachO/Dylib.zig | 46 ++++++++++++++++++++++++++++++++++------ 2 files changed, 76 insertions(+), 16 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 0a64fbd624..6a5e26d24c 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -382,6 +382,7 @@ pub fn flushModule(self: *MachO, arena: Allocator, prog_node: *std.Progress.Node self.parsePositional(obj.path, obj.must_link) catch |err| switch (err) { error.MalformedObject, error.MalformedArchive, + error.MalformedDylib, error.InvalidCpuArch, error.InvalidTarget, error.UnknownFileType, @@ -432,8 +433,8 @@ pub fn flushModule(self: *MachO, arena: Allocator, prog_node: *std.Progress.Node for (system_libs.items) |lib| { self.parseLibrary(lib, false) catch |err| switch (err) { - error.MalformedDylib, error.MalformedArchive, + error.MalformedDylib, error.InvalidCpuArch, error.UnknownFileType, => continue, // already reported @@ -794,7 +795,7 @@ fn parseLibrary(self: *MachO, lib: SystemLib, must_link: bool) ParseError!void { if (try Archive.isArchive(lib.path, fat_arch)) { try self.parseArchive(lib, must_link, fat_arch); } else if (try Dylib.isDylib(lib.path, fat_arch)) { - try self.parseDylib(lib, true, fat_arch); + _ = try self.parseDylib(lib, true, fat_arch); } else { try self.reportParseError(lib.path, "unknown file type for a library", .{}); return error.UnknownFileType; @@ -802,7 +803,7 @@ fn parseLibrary(self: *MachO, lib: SystemLib, must_link: bool) ParseError!void { } else if (try Archive.isArchive(lib.path, null)) { try self.parseArchive(lib, must_link, null); } else if (try Dylib.isDylib(lib.path, null)) { - try self.parseDylib(lib, true, null); + _ = try self.parseDylib(lib, true, null); } else { try self.parseTbd(lib, true); } @@ -889,12 +890,39 @@ fn parseArchive(self: *MachO, lib: SystemLib, must_link: bool, fat_arch: ?fat.Ar if (has_parse_error) return error.MalformedArchive; } -fn parseDylib(self: *MachO, lib: SystemLib, explicit: bool, fat_arch: ?fat.Arch) ParseError!void { - _ = self; - _ = lib; - _ = explicit; - _ = fat_arch; - return error.Unhandled; +fn parseDylib(self: *MachO, lib: SystemLib, explicit: bool, fat_arch: ?fat.Arch) ParseError!File.Index { + const tracy = trace(@src()); + defer tracy.end(); + + const gpa = self.base.comp.gpa; + + const file = try std.fs.cwd().openFile(lib.path, .{}); + defer file.close(); + + const data = if (fat_arch) |arch| blk: { + try file.seekTo(arch.offset); + const data = try gpa.alloc(u8, arch.size); + const nread = try file.readAll(data); + if (nread != arch.size) return error.InputOutput; + break :blk data; + } else try file.readToEndAlloc(gpa, std.math.maxInt(u32)); + + const index = @as(File.Index, @intCast(try self.files.addOne(gpa))); + self.files.set(index, .{ .dylib = .{ + .path = try gpa.dupe(u8, lib.path), + .data = data, + .index = index, + .needed = lib.needed, + .weak = lib.weak, + .reexport = lib.reexport, + .explicit = explicit, + } }); + const dylib = &self.files.items(.data)[index].dylib; + try dylib.parse(self); + + try self.dylibs.append(gpa, index); + + return index; } fn parseTbd(self: *MachO, lib: SystemLib, explicit: bool) ParseError!void { diff --git a/src/link/MachO/Dylib.zig b/src/link/MachO/Dylib.zig index f0454d91c3..50eb8971c3 100644 --- a/src/link/MachO/Dylib.zig +++ b/src/link/MachO/Dylib.zig @@ -34,6 +34,8 @@ pub fn isDylib(path: []const u8, fat_arch: ?fat.Arch) !bool { } pub fn deinit(self: *Dylib, allocator: Allocator) void { + allocator.free(self.data); + allocator.free(self.path); self.exports.deinit(allocator); self.strtab.deinit(allocator); if (self.id) |*id| id.deinit(allocator); @@ -49,7 +51,7 @@ pub fn parse(self: *Dylib, macho_file: *MachO) !void { const tracy = trace(@src()); defer tracy.end(); - const gpa = macho_file.base.allocator; + const gpa = macho_file.base.comp.gpa; var stream = std.io.fixedBufferStream(self.data); const reader = stream.reader(); @@ -57,9 +59,22 @@ pub fn parse(self: *Dylib, macho_file: *MachO) !void { self.header = try reader.readStruct(macho.mach_header_64); + const this_cpu_arch: std.Target.Cpu.Arch = switch (self.header.?.cputype) { + macho.CPU_TYPE_ARM64 => .aarch64, + macho.CPU_TYPE_X86_64 => .x86_64, + else => |x| { + try macho_file.reportParseError2(self.index, "unknown cpu architecture: {d}", .{x}); + return error.InvalidCpuArch; + }, + }; + if (macho_file.getTarget().cpu.arch != this_cpu_arch) { + try macho_file.reportParseError2(self.index, "invalid cpu architecture: {s}", .{@tagName(this_cpu_arch)}); + return error.InvalidCpuArch; + } + const lc_id = self.getLoadCommand(.ID_DYLIB) orelse { - macho_file.base.fatal("{s}: missing LC_ID_DYLIB load command", .{self.path}); - return error.ParseFailed; + try macho_file.reportParseError2(self.index, "missing LC_ID_DYLIB load command", .{}); + return error.MalformedDylib; }; self.id = try Id.fromLoadCommand(gpa, lc_id.cast(macho.dylib_command).?, lc_id.getDylibPathName()); @@ -90,6 +105,23 @@ pub fn parse(self: *Dylib, macho_file: *MachO) !void { }; self.initPlatform(); + + if (self.platform) |platform| { + if (!macho_file.platform.eqlTarget(platform)) { + try macho_file.reportParseError2(self.index, "invalid platform: {}", .{ + platform.fmtTarget(macho_file.getTarget().cpu.arch), + }); + return error.InvalidTarget; + } + if (macho_file.platform.version.order(platform.version) == .lt) { + try macho_file.reportParseError2(self.index, "object file built for newer platform: {}: {} < {}", .{ + macho_file.platform.fmtTarget(macho_file.getTarget().cpu.arch), + macho_file.platform.version, + platform.version, + }); + return error.InvalidTarget; + } + } } const TrieIterator = struct { @@ -187,7 +219,7 @@ fn parseTrieNode( fn parseTrie(self: *Dylib, data: []const u8, macho_file: *MachO) !void { const tracy = trace(@src()); defer tracy.end(); - const gpa = macho_file.base.allocator; + const gpa = macho_file.base.comp.gpa; var arena = std.heap.ArenaAllocator.init(gpa); defer arena.deinit(); @@ -204,7 +236,7 @@ pub fn parseTbd( ) !void { const tracy = trace(@src()); defer tracy.end(); - const gpa = macho_file.base.allocator; + const gpa = macho_file.base.comp.gpa; log.debug("parsing dylib from stub", .{}); @@ -435,7 +467,7 @@ fn addObjCExport( } pub fn initSymbols(self: *Dylib, macho_file: *MachO) !void { - const gpa = macho_file.base.allocator; + const gpa = macho_file.base.comp.gpa; try self.symbols.ensureTotalCapacityPrecise(gpa, self.exports.items(.name).len); @@ -459,7 +491,7 @@ fn initPlatform(self: *Dylib) void { .VERSION_MIN_IPHONEOS, .VERSION_MIN_TVOS, .VERSION_MIN_WATCHOS, - => break MachO.Options.Platform.fromLoadCommand(cmd), + => break MachO.Platform.fromLoadCommand(cmd), else => {}, } } else null; From c023b762cd2898e6d2f5cb3c1f6f188ebeb00069 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 11 Jan 2024 18:15:36 +0100 Subject: [PATCH 008/133] macho: parse tbds --- src/link/MachO.zig | 48 +++++++++++++++++++++++++++++----------- src/link/MachO/Dylib.zig | 10 ++++----- 2 files changed, 39 insertions(+), 19 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 6a5e26d24c..7834dbae16 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -385,8 +385,8 @@ pub fn flushModule(self: *MachO, arena: Allocator, prog_node: *std.Progress.Node error.MalformedDylib, error.InvalidCpuArch, error.InvalidTarget, - error.UnknownFileType, => continue, // already reported + error.UnknownFileType => try self.reportParseError(obj.path, "unknown file type for an object file", .{}), else => |e| try self.reportParseError( obj.path, "unexpected error: parsing input file failed with error {s}", @@ -436,8 +436,8 @@ pub fn flushModule(self: *MachO, arena: Allocator, prog_node: *std.Progress.Node error.MalformedArchive, error.MalformedDylib, error.InvalidCpuArch, - error.UnknownFileType, => continue, // already reported + error.UnknownFileType => try self.reportParseError(lib.path, "unknown file type for a library", .{}), else => |e| try self.reportParseError( lib.path, "unexpected error: parsing library failed with error {s}", @@ -458,8 +458,8 @@ pub fn flushModule(self: *MachO, arena: Allocator, prog_node: *std.Progress.Node error.MalformedArchive, error.InvalidCpuArch, error.InvalidTarget, - error.UnknownFileType, => {}, // already reported + error.UnknownFileType => try self.reportParseError(path, "unknown file type for a library", .{}), else => |e| try self.reportParseError( path, "unexpected error: parsing input file failed with error {s}", @@ -762,6 +762,7 @@ const ParseError = error{ MalformedObject, MalformedArchive, MalformedDylib, + MalformedTbd, NotLibStub, InvalidCpuArch, InvalidTarget, @@ -796,16 +797,16 @@ fn parseLibrary(self: *MachO, lib: SystemLib, must_link: bool) ParseError!void { try self.parseArchive(lib, must_link, fat_arch); } else if (try Dylib.isDylib(lib.path, fat_arch)) { _ = try self.parseDylib(lib, true, fat_arch); - } else { - try self.reportParseError(lib.path, "unknown file type for a library", .{}); - return error.UnknownFileType; - } + } else return error.UnknownFileType; } else if (try Archive.isArchive(lib.path, null)) { try self.parseArchive(lib, must_link, null); } else if (try Dylib.isDylib(lib.path, null)) { _ = try self.parseDylib(lib, true, null); } else { - try self.parseTbd(lib, true); + _ = self.parseTbd(lib, true) catch |err| switch (err) { + error.MalformedTbd => return error.UnknownFileType, + else => |e| return e, + }; } } @@ -925,11 +926,32 @@ fn parseDylib(self: *MachO, lib: SystemLib, explicit: bool, fat_arch: ?fat.Arch) return index; } -fn parseTbd(self: *MachO, lib: SystemLib, explicit: bool) ParseError!void { - _ = self; - _ = lib; - _ = explicit; - return error.Unhandled; +fn parseTbd(self: *MachO, lib: SystemLib, explicit: bool) ParseError!File.Index { + const tracy = trace(@src()); + defer tracy.end(); + + const gpa = self.base.comp.gpa; + const file = try std.fs.cwd().openFile(lib.path, .{}); + defer file.close(); + + var lib_stub = LibStub.loadFromFile(gpa, file) catch return error.MalformedTbd; // TODO actually handle different errors + defer lib_stub.deinit(); + + const index = @as(File.Index, @intCast(try self.files.addOne(gpa))); + self.files.set(index, .{ .dylib = .{ + .path = try gpa.dupe(u8, lib.path), + .data = &[0]u8{}, + .index = index, + .needed = lib.needed, + .weak = lib.weak, + .reexport = lib.reexport, + .explicit = explicit, + } }); + const dylib = &self.files.items(.data)[index].dylib; + try dylib.parseTbd(self.getTarget().cpu.arch, self.platform, lib_stub, self); + try self.dylibs.append(gpa, index); + + return index; } fn shrinkAtom(self: *MachO, atom_index: Atom.Index, new_block_size: u64) void { diff --git a/src/link/MachO/Dylib.zig b/src/link/MachO/Dylib.zig index 50eb8971c3..694f2214b7 100644 --- a/src/link/MachO/Dylib.zig +++ b/src/link/MachO/Dylib.zig @@ -230,12 +230,13 @@ fn parseTrie(self: *Dylib, data: []const u8, macho_file: *MachO) !void { pub fn parseTbd( self: *Dylib, cpu_arch: std.Target.Cpu.Arch, - platform: ?MachO.Options.Platform, + platform: MachO.Platform, lib_stub: LibStub, macho_file: *MachO, ) !void { const tracy = trace(@src()); defer tracy.end(); + const gpa = macho_file.base.comp.gpa; log.debug("parsing dylib from stub", .{}); @@ -258,12 +259,9 @@ pub fn parseTbd( log.debug(" (install_name '{s}')", .{umbrella_lib.installName()}); - self.platform = platform orelse .{ - .platform = .MACOS, - .version = .{ .value = 0 }, - }; + self.platform = platform; - var matcher = try TargetMatcher.init(gpa, cpu_arch, self.platform.?.platform); + var matcher = try TargetMatcher.init(gpa, cpu_arch, self.platform.?.toApplePlatform()); defer matcher.deinit(); for (lib_stub.inner, 0..) |elem, stub_index| { From 8c7a34ae68bbc87e67692e280350f8f825a59230 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 11 Jan 2024 18:23:54 +0100 Subject: [PATCH 009/133] macho: prep for dylib deps (no resolution yet) --- src/link/MachO.zig | 158 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 158 insertions(+) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 7834dbae16..20d76ca5f3 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -470,6 +470,18 @@ pub fn flushModule(self: *MachO, arena: Allocator, prog_node: *std.Progress.Node if (comp.link_errors.items.len > 0) return error.FlushFailure; + for (self.dylibs.items) |index| { + self.getFile(index).?.dylib.umbrella = index; + } + + // try self.parseDependentDylibs(); + + for (self.dylibs.items) |index| { + const dylib = self.getFile(index).?.dylib; + if (!dylib.explicit and !dylib.hoisted) continue; + try dylib.initSymbols(self); + } + state_log.debug("{}", .{self.dumpState()}); @panic("TODO"); @@ -954,6 +966,152 @@ fn parseTbd(self: *MachO, lib: SystemLib, explicit: bool) ParseError!File.Index return index; } +// /// According to ld64's manual, public (i.e., system) dylibs/frameworks are hoisted into the final +// /// image unless overriden by -no_implicit_dylibs. +// fn isHoisted(self: *MachO, install_name: []const u8) bool { +// _ = self; +// // TODO: if (self.options.no_implicit_dylibs) return true; +// if (std.fs.path.dirname(install_name)) |dirname| { +// if (mem.startsWith(u8, dirname, "/usr/lib")) return true; +// if (eatPrefix(dirname, "/System/Library/Frameworks/")) |path| { +// const basename = std.fs.path.basename(install_name); +// if (mem.indexOfScalar(u8, path, '.')) |index| { +// if (mem.eql(u8, basename, path[0..index])) return true; +// } +// } +// } +// return false; +// } + +// fn parseDependentDylibs( +// self: *MachO +// ) !void { +// const tracy = trace(@src()); +// defer tracy.end(); + +// const gpa = self.base.comp.gpa; +// const lib_dirs = self.base.comp.lib_dirs; +// const framework_dirs = self.base.comp.framework_dirs; + +// if (self.dylibs.items.len == 0) return; + +// var arena = std.heap.ArenaAllocator.init(gpa); +// defer arena.deinit(); + +// // TODO handle duplicate dylibs - it is not uncommon to have the same dylib loaded multiple times +// // in which case we should track that and return File.Index immediately instead re-parsing paths. + +// var index: usize = 0; +// while (index < self.dylibs.items.len) : (index += 1) { +// const dylib_index = self.dylibs.items[index]; + +// var dependents = std.ArrayList(File.Index).init(gpa); +// defer dependents.deinit(); +// try dependents.ensureTotalCapacityPrecise(self.getFile(dylib_index).?.dylib.dependents.items.len); + +// const is_weak = self.getFile(dylib_index).?.dylib.weak; +// for (self.getFile(dylib_index).?.dylib.dependents.items) |id| { +// // We will search for the dependent dylibs in the following order: +// // 1. Basename is in search lib directories or framework directories +// // 2. If name is an absolute path, search as-is optionally prepending a syslibroot +// // if specified. +// // 3. If name is a relative path, substitute @rpath, @loader_path, @executable_path with +// // dependees list of rpaths, and search there. +// // 4. Finally, just search the provided relative path directly in CWD. +// const full_path = full_path: { +// fail: { +// const stem = std.fs.path.stem(id.name); +// const framework_name = try std.fmt.allocPrint(gpa, "{s}.framework" ++ std.fs.path.sep_str ++ "{s}", .{ +// stem, +// stem, +// }); +// defer gpa.free(framework_name); + +// if (mem.endsWith(u8, id.name, framework_name)) { +// // Framework +// const full_path = (try self.resolveFramework(arena, framework_dirs, stem)) orelse break :fail; +// break :full_path full_path; +// } + +// // Library +// const lib_name = eatPrefix(stem, "lib") orelse stem; +// const full_path = (try self.resolveLib(arena, lib_dirs, lib_name)) orelse break :fail; +// break :full_path full_path; +// } + +// if (std.fs.path.isAbsolute(id.name)) { +// const path = if (self.options.syslibroot) |root| +// try std.fs.path.join(arena, &.{ root, id.name }) +// else +// id.name; +// for (&[_][]const u8{ "", ".tbd", ".dylib" }) |ext| { +// const full_path = try std.fmt.allocPrint(arena, "{s}{s}", .{ path, ext }); +// if (try accessLibPath(full_path)) break :full_path full_path; +// } +// } + +// if (eatPrefix(id.name, "@rpath/")) |path| { +// const dylib = self.getFile(dylib_index).?.dylib; +// for (self.getFile(dylib.umbrella).?.dylib.rpaths.keys()) |rpath| { +// const prefix = eatPrefix(rpath, "@loader_path/") orelse rpath; +// const rel_path = try std.fs.path.join(arena, &.{ prefix, path }); +// var buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined; +// const full_path = std.fs.realpath(rel_path, &buffer) catch continue; +// break :full_path full_path; +// } +// } else if (eatPrefix(id.name, "@loader_path/")) |_| { +// return self.base.fatal("{s}: TODO handle install_name '{s}'", .{ +// self.getFile(dylib_index).?.dylib.path, id.name, +// }); +// } else if (eatPrefix(id.name, "@executable_path/")) |_| { +// return self.base.fatal("{s}: TODO handle install_name '{s}'", .{ +// self.getFile(dylib_index).?.dylib.path, id.name, +// }); +// } + +// var buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined; +// const full_path = std.fs.realpath(id.name, &buffer) catch { +// dependents.appendAssumeCapacity(0); +// continue; +// }; +// break :full_path full_path; +// }; +// const link_obj = LinkObject{ +// .path = full_path, +// .tag = .obj, +// .weak = is_weak, +// }; +// const file_index = file_index: { +// if (try self.parseDylib(arena, link_obj, false)) |file| break :file_index file; +// if (try self.parseTbd(link_obj, false)) |file| break :file_index file; +// break :file_index @as(File.Index, 0); +// }; +// dependents.appendAssumeCapacity(file_index); +// } + +// const dylib = self.getFile(dylib_index).?.dylib; +// for (dylib.dependents.items, dependents.items) |id, file_index| { +// if (self.getFile(file_index)) |file| { +// const dep_dylib = file.dylib; +// dep_dylib.hoisted = self.isHoisted(id.name); +// if (self.getFile(dep_dylib.umbrella) == null) { +// dep_dylib.umbrella = dylib.umbrella; +// } +// if (!dep_dylib.hoisted) { +// const umbrella = dep_dylib.getUmbrella(self); +// for (dep_dylib.exports.items(.name), dep_dylib.exports.items(.flags)) |off, flags| { +// try umbrella.addExport(gpa, dep_dylib.getString(off), flags); +// } +// try umbrella.rpaths.ensureUnusedCapacity(gpa, dep_dylib.rpaths.keys().len); +// for (dep_dylib.rpaths.keys()) |rpath| { +// umbrella.rpaths.putAssumeCapacity(rpath, {}); +// } +// } +// } else self.base.fatal("{s}: unable to resolve dependency {s}", .{ dylib.getUmbrella(self).path, id.name }); +// } +// } +// } + fn shrinkAtom(self: *MachO, atom_index: Atom.Index, new_block_size: u64) void { _ = self; _ = atom_index; From b8f67d79850b0be1f2384dca0a9a946bfa6a75a6 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 11 Jan 2024 18:30:22 +0100 Subject: [PATCH 010/133] macho: init InternalObject and add forced undefined globals --- src/link/MachO.zig | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 20d76ca5f3..49e4ad525c 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -48,6 +48,14 @@ eh_frame_sect_index: ?u8 = null, unwind_info_sect_index: ?u8 = null, objc_stubs_sect_index: ?u8 = null, +mh_execute_header_index: ?Symbol.Index = null, +mh_dylib_header_index: ?Symbol.Index = null, +dyld_private_index: ?Symbol.Index = null, +dyld_stub_binder_index: ?Symbol.Index = null, +dso_handle_index: ?Symbol.Index = null, +objc_msg_send_index: ?Symbol.Index = null, +entry_index: ?Symbol.Index = null, + /// List of atoms that are either synthetic or map directly to the Zig source program. atoms: std.ArrayListUnmanaged(Atom) = .{}, thunks: std.ArrayListUnmanaged(Thunk) = .{}, @@ -482,6 +490,14 @@ pub fn flushModule(self: *MachO, arena: Allocator, prog_node: *std.Progress.Node try dylib.initSymbols(self); } + { + const index = @as(File.Index, @intCast(try self.files.addOne(gpa))); + self.files.set(index, .{ .internal = .{ .index = index } }); + self.internal_object = index; + } + + try self.addUndefinedGlobals(); + state_log.debug("{}", .{self.dumpState()}); @panic("TODO"); @@ -1112,6 +1128,35 @@ fn parseTbd(self: *MachO, lib: SystemLib, explicit: bool) ParseError!File.Index // } // } +fn addUndefinedGlobals(self: *MachO) !void { + const gpa = self.base.comp.gpa; + + try self.undefined_symbols.ensureUnusedCapacity(gpa, self.base.comp.force_undefined_symbols.keys().len); + for (self.base.comp.force_undefined_symbols.keys()) |name| { + const off = try self.strings.insert(gpa, name); + const gop = try self.getOrCreateGlobal(off); + self.undefined_symbols.appendAssumeCapacity(gop.index); + } + + if (!self.base.isDynLib() and self.entry_name != null) { + const off = try self.strings.insert(gpa, self.entry_name.?); + const gop = try self.getOrCreateGlobal(off); + self.entry_index = gop.index; + } + + { + const off = try self.strings.insert(gpa, "dyld_stub_binder"); + const gop = try self.getOrCreateGlobal(off); + self.dyld_stub_binder_index = gop.index; + } + + { + const off = try self.strings.insert(gpa, "_objc_msgSend"); + const gop = try self.getOrCreateGlobal(off); + self.objc_msg_send_index = gop.index; + } +} + fn shrinkAtom(self: *MachO, atom_index: Atom.Index, new_block_size: u64) void { _ = self; _ = atom_index; From 8a1311733b313a59afa2ea354ca9b342a935d869 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 11 Jan 2024 18:42:35 +0100 Subject: [PATCH 011/133] macho: resolve symbols and mark files live --- src/link/MachO.zig | 82 +++++++++++++++++++++++++++++++++------ src/link/MachO/Dylib.zig | 2 +- src/link/MachO/Object.zig | 3 +- 3 files changed, 73 insertions(+), 14 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 49e4ad525c..8a43526fab 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -497,6 +497,7 @@ pub fn flushModule(self: *MachO, arena: Allocator, prog_node: *std.Progress.Node } try self.addUndefinedGlobals(); + try self.resolveSymbols(); state_log.debug("{}", .{self.dumpState()}); @@ -1157,6 +1158,63 @@ fn addUndefinedGlobals(self: *MachO) !void { } } +/// When resolving symbols, we approach the problem similarly to `mold`. +/// 1. Resolve symbols across all objects (including those preemptively extracted archives). +/// 2. Resolve symbols across all shared objects. +/// 3. Mark live objects (see `MachO.markLive`) +/// 4. Reset state of all resolved globals since we will redo this bit on the pruned set. +/// 5. Remove references to dead objects/shared objects +/// 6. Re-run symbol resolution on pruned objects and shared objects sets. +pub fn resolveSymbols(self: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + + // Resolve symbols on the set of all objects and shared objects (even if some are unneeded). + for (self.objects.items) |index| self.getFile(index).?.resolveSymbols(self); + for (self.dylibs.items) |index| self.getFile(index).?.resolveSymbols(self); + + // Mark live objects. + self.markLive(); + + // Reset state of all globals after marking live objects. + for (self.objects.items) |index| self.getFile(index).?.resetGlobals(self); + for (self.dylibs.items) |index| self.getFile(index).?.resetGlobals(self); + + // Prune dead objects. + var i: usize = 0; + while (i < self.objects.items.len) { + const index = self.objects.items[i]; + if (!self.getFile(index).?.object.alive) { + _ = self.objects.orderedRemove(i); + } else i += 1; + } + + // Re-resolve the symbols. + for (self.objects.items) |index| self.getFile(index).?.resolveSymbols(self); + for (self.dylibs.items) |index| self.getFile(index).?.resolveSymbols(self); +} + +fn markLive(self: *MachO) void { + const tracy = trace(@src()); + defer tracy.end(); + + for (self.undefined_symbols.items) |index| { + if (self.getSymbol(index).getFile(self)) |file| { + if (file == .object) file.object.alive = true; + } + } + if (self.entry_index) |index| { + const sym = self.getSymbol(index); + if (sym.getFile(self)) |file| { + if (file == .object) file.object.alive = true; + } + } + for (self.objects.items) |index| { + const object = self.getFile(index).?.object; + if (object.alive) object.markLive(self); + } +} + fn shrinkAtom(self: *MachO, atom_index: Atom.Index, new_block_size: u64) void { _ = self; _ = atom_index; @@ -1791,18 +1849,18 @@ fn fmtDumpState( object.fmtSymtab(self), }); } - // for (self.dylibs.items) |index| { - // const dylib = self.getFile(index).?.dylib; - // try writer.print("dylib({d}) : {s} : needed({}) : weak({})", .{ - // index, - // dylib.path, - // dylib.needed, - // dylib.weak, - // }); - // if (!dylib.isAlive(self)) try writer.writeAll(" : ([*])"); - // try writer.writeByte('\n'); - // try writer.print("{}\n", .{dylib.fmtSymtab(self)}); - // } + for (self.dylibs.items) |index| { + const dylib = self.getFile(index).?.dylib; + try writer.print("dylib({d}) : {s} : needed({}) : weak({})", .{ + index, + dylib.path, + dylib.needed, + dylib.weak, + }); + if (!dylib.isAlive(self)) try writer.writeAll(" : ([*])"); + try writer.writeByte('\n'); + try writer.print("{}\n", .{dylib.fmtSymtab(self)}); + } if (self.getInternalObject()) |internal| { try writer.print("internal({d}) : internal\n", .{internal.index}); try writer.print("{}{}\n", .{ internal.fmtAtoms(self), internal.fmtSymtab(self) }); diff --git a/src/link/MachO/Dylib.zig b/src/link/MachO/Dylib.zig index 694f2214b7..0df66ffd71 100644 --- a/src/link/MachO/Dylib.zig +++ b/src/link/MachO/Dylib.zig @@ -530,7 +530,7 @@ pub fn resetGlobals(self: *Dylib, macho_file: *MachO) void { } pub fn isAlive(self: Dylib, macho_file: *MachO) bool { - if (!macho_file.options.dead_strip_dylibs) return self.explicit or self.referenced or self.needed; + if (!macho_file.dead_strip_dylibs) return self.explicit or self.referenced or self.needed; return self.referenced or self.needed; } diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index e28c23d4ad..9d39621d16 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -1021,7 +1021,8 @@ pub fn resolveSymbols(self: *Object, macho_file: *MachO) void { symbol.flags.weak_ref = false; symbol.flags.dyn_ref = nlist.n_desc & macho.REFERENCED_DYNAMICALLY != 0; symbol.flags.no_dead_strip = symbol.flags.no_dead_strip or nlist.noDeadStrip(); - symbol.flags.interposable = macho_file.options.dylib and macho_file.options.namespace == .flat and !nlist.pext(); + // TODO: symbol.flags.interposable = macho_file.base.isDynLib() and macho_file.options.namespace == .flat and !nlist.pext(); + symbol.flags.interposable = false; if (nlist.sect() and self.sections.items(.header)[nlist.n_sect - 1].type() == macho.S_THREAD_LOCAL_VARIABLES) From 6b617afe2a3904ad3d5c9e3542a30bd7f35ed54e Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 11 Jan 2024 18:50:53 +0100 Subject: [PATCH 012/133] macho: resolve synthetic symbols --- src/link/MachO.zig | 48 +++++++++++++++++++++++++++++++ src/link/MachO/InternalObject.zig | 12 ++++---- 2 files changed, 54 insertions(+), 6 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 8a43526fab..9514d6d71f 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -498,6 +498,7 @@ pub fn flushModule(self: *MachO, arena: Allocator, prog_node: *std.Progress.Node try self.addUndefinedGlobals(); try self.resolveSymbols(); + try self.resolveSyntheticSymbols(); state_log.debug("{}", .{self.dumpState()}); @@ -1215,6 +1216,53 @@ fn markLive(self: *MachO) void { } } +fn resolveSyntheticSymbols(self: *MachO) !void { + const internal = self.getInternalObject() orelse return; + + if (!self.base.isDynLib()) { + self.mh_execute_header_index = try internal.addSymbol("__mh_execute_header", self); + const sym = self.getSymbol(self.mh_execute_header_index.?); + sym.flags.@"export" = true; + sym.flags.dyn_ref = true; + sym.visibility = .global; + } else { + self.mh_dylib_header_index = try internal.addSymbol("__mh_dylib_header", self); + } + + self.dso_handle_index = try internal.addSymbol("___dso_handle", self); + self.dyld_private_index = try internal.addSymbol("dyld_private", self); + + { + const gpa = self.base.comp.gpa; + var boundary_symbols = std.AutoHashMap(Symbol.Index, void).init(gpa); + defer boundary_symbols.deinit(); + + for (self.objects.items) |index| { + const object = self.getFile(index).?.object; + for (object.symbols.items, 0..) |sym_index, i| { + const nlist = object.symtab.items(.nlist)[i]; + const name = self.getSymbol(sym_index).getName(self); + if (!nlist.undf() or !nlist.ext()) continue; + if (mem.startsWith(u8, name, "segment$start$") or + mem.startsWith(u8, name, "segment$stop$") or + mem.startsWith(u8, name, "section$start$") or + mem.startsWith(u8, name, "section$stop$")) + { + _ = try boundary_symbols.put(sym_index, {}); + } + } + } + + try self.boundary_symbols.ensureTotalCapacityPrecise(gpa, boundary_symbols.count()); + + var it = boundary_symbols.iterator(); + while (it.next()) |entry| { + _ = try internal.addSymbol(self.getSymbol(entry.key_ptr.*).getName(self), self); + self.boundary_symbols.appendAssumeCapacity(entry.key_ptr.*); + } + } +} + fn shrinkAtom(self: *MachO, atom_index: Atom.Index, new_block_size: u64) void { _ = self; _ = atom_index; diff --git a/src/link/MachO/InternalObject.zig b/src/link/MachO/InternalObject.zig index e139e4efab..26ec86bb67 100644 --- a/src/link/MachO/InternalObject.zig +++ b/src/link/MachO/InternalObject.zig @@ -20,9 +20,9 @@ pub fn deinit(self: *InternalObject, allocator: Allocator) void { } pub fn addSymbol(self: *InternalObject, name: [:0]const u8, macho_file: *MachO) !Symbol.Index { - const gpa = macho_file.base.allocator; + const gpa = macho_file.base.comp.gpa; try self.symbols.ensureUnusedCapacity(gpa, 1); - const off = try macho_file.string_intern.insert(gpa, name); + const off = try macho_file.strings.insert(gpa, name); const gop = try macho_file.getOrCreateGlobal(off); self.symbols.addOneAssumeCapacity().* = gop.index; const sym = macho_file.getSymbol(gop.index); @@ -37,7 +37,7 @@ pub fn addObjcMsgsendSections(self: *InternalObject, sym_name: []const u8, macho } fn addObjcMethnameSection(self: *InternalObject, methname: []const u8, macho_file: *MachO) !Atom.Index { - const gpa = macho_file.base.allocator; + const gpa = macho_file.base.comp.gpa; const atom_index = try macho_file.addAtom(); try self.atoms.append(gpa, atom_index); @@ -45,7 +45,7 @@ fn addObjcMethnameSection(self: *InternalObject, methname: []const u8, macho_fil defer gpa.free(name); const atom = macho_file.getAtom(atom_index).?; atom.atom_index = atom_index; - atom.name = try macho_file.string_intern.insert(gpa, name); + atom.name = try macho_file.strings.insert(gpa, name); atom.file = self.index; atom.size = methname.len + 1; atom.alignment = 0; @@ -71,7 +71,7 @@ fn addObjcSelrefsSection( methname_atom_index: Atom.Index, macho_file: *MachO, ) !Atom.Index { - const gpa = macho_file.base.allocator; + const gpa = macho_file.base.comp.gpa; const atom_index = try macho_file.addAtom(); try self.atoms.append(gpa, atom_index); @@ -79,7 +79,7 @@ fn addObjcSelrefsSection( defer gpa.free(name); const atom = macho_file.getAtom(atom_index).?; atom.atom_index = atom_index; - atom.name = try macho_file.string_intern.insert(gpa, name); + atom.name = try macho_file.strings.insert(gpa, name); atom.file = self.index; atom.size = @sizeOf(u64); atom.alignment = 3; From 1b76779857cae1d8212f574a4a3059fad9419476 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 11 Jan 2024 18:54:59 +0100 Subject: [PATCH 013/133] macho: convert tentative defs and create objc sections --- src/link/MachO.zig | 47 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 9514d6d71f..b07d30bb11 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -500,6 +500,9 @@ pub fn flushModule(self: *MachO, arena: Allocator, prog_node: *std.Progress.Node try self.resolveSymbols(); try self.resolveSyntheticSymbols(); + try self.convertTentativeDefinitions(); + try self.createObjcSections(); + state_log.debug("{}", .{self.dumpState()}); @panic("TODO"); @@ -1263,6 +1266,50 @@ fn resolveSyntheticSymbols(self: *MachO) !void { } } +fn convertTentativeDefinitions(self: *MachO) !void { + for (self.objects.items) |index| { + try self.getFile(index).?.object.convertTentativeDefinitions(self); + } +} + +fn createObjcSections(self: *MachO) !void { + const gpa = self.base.comp.gpa; + var objc_msgsend_syms = std.AutoArrayHashMap(Symbol.Index, void).init(gpa); + defer objc_msgsend_syms.deinit(); + + for (self.objects.items) |index| { + const object = self.getFile(index).?.object; + + for (object.symbols.items, 0..) |sym_index, i| { + const nlist_idx = @as(Symbol.Index, @intCast(i)); + const nlist = object.symtab.items(.nlist)[nlist_idx]; + if (!nlist.ext()) continue; + if (!nlist.undf()) continue; + + const sym = self.getSymbol(sym_index); + if (sym.getFile(self) != null) continue; + if (mem.startsWith(u8, sym.getName(self), "_objc_msgSend$")) { + _ = try objc_msgsend_syms.put(sym_index, {}); + } + } + } + + for (objc_msgsend_syms.keys()) |sym_index| { + const sym = self.getSymbol(sym_index); + sym.value = 0; + sym.atom = 0; + sym.nlist_idx = 0; + sym.file = self.internal_object.?; + sym.flags = .{}; + sym.visibility = .hidden; + const object = self.getInternalObject().?; + const name = eatPrefix(sym.getName(self), "_objc_msgSend$").?; + const selrefs_index = try object.addObjcMsgsendSections(name, self); + try sym.addExtra(.{ .objc_selrefs = selrefs_index }, self); + try object.symbols.append(gpa, sym_index); + } +} + fn shrinkAtom(self: *MachO, atom_index: Atom.Index, new_block_size: u64) void { _ = self; _ = atom_index; From 21e3bb38afad7a2cc4e84910d7630e4ee3435a85 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 11 Jan 2024 19:04:50 +0100 Subject: [PATCH 014/133] macho: claim unresolved symbols --- src/link/MachO.zig | 64 +++++++++++++++++++++++++++++++++++++++------- 1 file changed, 55 insertions(+), 9 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index b07d30bb11..d083471fee 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -92,6 +92,9 @@ headerpad_size: ?u32, headerpad_max_install_names: bool, /// Remove dylibs that are unreachable by the entry point or exported symbols. dead_strip_dylibs: bool, +/// Treatment of undefined symbols +undefined_treatment: UndefinedTreatment, +/// List of input frameworks frameworks: []const Framework, /// Install name for the dylib. /// TODO: unify with soname @@ -122,14 +125,6 @@ pub fn hashAddFrameworks(man: *Cache.Manifest, hm: []const Framework) !void { } } -/// The filesystem layout of darwin SDK elements. -pub const SdkLayout = enum { - /// macOS SDK layout: TOP { /usr/include, /usr/lib, /System/Library/Frameworks }. - sdk, - /// Shipped libc layout: TOP { /lib/libc/include, /lib/libc/darwin, }. - vendored, -}; - pub fn createEmpty( arena: Allocator, comp: *Compilation, @@ -153,6 +148,7 @@ pub fn createEmpty( null else try std.fmt.allocPrint(arena, "{s}.o", .{emit.sub_path}); + const allow_shlib_undefined = options.allow_shlib_undefined orelse false; const self = try arena.create(MachO); self.* = .{ @@ -164,7 +160,7 @@ pub fn createEmpty( .gc_sections = options.gc_sections orelse (optimize_mode != .Debug), .print_gc_sections = options.print_gc_sections, .stack_size = options.stack_size orelse 16777216, - .allow_shlib_undefined = options.allow_shlib_undefined orelse false, + .allow_shlib_undefined = allow_shlib_undefined, .file = null, .disable_lld_caching = options.disable_lld_caching, .build_id = options.build_id, @@ -187,6 +183,7 @@ pub fn createEmpty( }, .platform = Platform.fromTarget(target), .sdk_version = if (options.darwin_sdk_layout) |layout| inferSdkVersion(comp, layout) else null, + .undefined_treatment = if (allow_shlib_undefined) .dynamic_lookup else .@"error", }; if (use_llvm and comp.config.have_zcu) { self.llvm_object = try LlvmObject.create(arena, comp); @@ -502,6 +499,7 @@ pub fn flushModule(self: *MachO, arena: Allocator, prog_node: *std.Progress.Node try self.convertTentativeDefinitions(); try self.createObjcSections(); + try self.claimUnresolved(); state_log.debug("{}", .{self.dumpState()}); @@ -1310,6 +1308,39 @@ fn createObjcSections(self: *MachO) !void { } } +fn claimUnresolved(self: *MachO) error{OutOfMemory}!void { + for (self.objects.items) |index| { + const object = self.getFile(index).?.object; + + for (object.symbols.items, 0..) |sym_index, i| { + const nlist_idx = @as(Symbol.Index, @intCast(i)); + const nlist = object.symtab.items(.nlist)[nlist_idx]; + if (!nlist.ext()) continue; + if (!nlist.undf()) continue; + + const sym = self.getSymbol(sym_index); + if (sym.getFile(self) != null) continue; + + const is_import = switch (self.undefined_treatment) { + .@"error" => false, + .warn, .suppress => nlist.weakRef(), + .dynamic_lookup => true, + }; + if (is_import) { + sym.value = 0; + sym.atom = 0; + sym.nlist_idx = 0; + sym.file = self.internal_object.?; + sym.flags.weak = false; + sym.flags.weak_ref = nlist.weakRef(); + sym.flags.import = is_import; + sym.visibility = .global; + try self.getInternalObject().?.symbols.append(self.base.comp.gpa, sym_index); + } + } + } +} + fn shrinkAtom(self: *MachO, atom_index: Atom.Index, new_block_size: u64) void { _ = self; _ = atom_index; @@ -2350,6 +2381,21 @@ const SystemLib = struct { must_link: bool = false, }; +/// The filesystem layout of darwin SDK elements. +pub const SdkLayout = enum { + /// macOS SDK layout: TOP { /usr/include, /usr/lib, /System/Library/Frameworks }. + sdk, + /// Shipped libc layout: TOP { /lib/libc/include, /lib/libc/darwin, }. + vendored, +}; + +const UndefinedTreatment = enum { + @"error", + warn, + suppress, + dynamic_lookup, +}; + const MachO = @This(); const std = @import("std"); From b0327ff233faca0dd11e50c9ba7fc41f434a4d82 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 11 Jan 2024 19:09:41 +0100 Subject: [PATCH 015/133] macho: handle dead stripping of atoms --- src/link/MachO.zig | 4 ++++ src/link/MachO/dead_strip.zig | 6 +++--- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index d083471fee..f9e1c2a953 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -501,6 +501,10 @@ pub fn flushModule(self: *MachO, arena: Allocator, prog_node: *std.Progress.Node try self.createObjcSections(); try self.claimUnresolved(); + if (self.base.gc_sections) { + try dead_strip.gcAtoms(self); + } + state_log.debug("{}", .{self.dumpState()}); @panic("TODO"); diff --git a/src/link/MachO/dead_strip.zig b/src/link/MachO/dead_strip.zig index 8097d5f710..7356e65a60 100644 --- a/src/link/MachO/dead_strip.zig +++ b/src/link/MachO/dead_strip.zig @@ -1,10 +1,10 @@ pub fn gcAtoms(macho_file: *MachO) !void { - const gpa = macho_file.base.allocator; + const gpa = macho_file.base.comp.gpa; var objects = try std.ArrayList(File.Index).initCapacity(gpa, macho_file.objects.items.len + 1); defer objects.deinit(); for (macho_file.objects.items) |index| objects.appendAssumeCapacity(index); - if (macho_file.internal_object_index) |index| objects.appendAssumeCapacity(index); + if (macho_file.internal_object) |index| objects.appendAssumeCapacity(index); var roots = std.ArrayList(*Atom).init(gpa); defer roots.deinit(); @@ -21,7 +21,7 @@ fn collectRoots(roots: *std.ArrayList(*Atom), objects: []const File.Index, macho const sym = macho_file.getSymbol(sym_index); const file = sym.getFile(macho_file) orelse continue; if (file.getIndex() != index) continue; - if (sym.flags.no_dead_strip or (macho_file.options.dylib and sym.visibility == .global)) + if (sym.flags.no_dead_strip or (macho_file.base.isDynLib() and sym.visibility == .global)) try markSymbol(sym, roots, macho_file); } From b28ff75f5d47ead9e8b416b41149147d796ff8db Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 11 Jan 2024 19:15:54 +0100 Subject: [PATCH 016/133] macho: mark imports and exports --- src/link/MachO.zig | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index f9e1c2a953..8783098e6b 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -505,6 +505,8 @@ pub fn flushModule(self: *MachO, arena: Allocator, prog_node: *std.Progress.Node try dead_strip.gcAtoms(self); } + self.markImportsAndExports(); + state_log.debug("{}", .{self.dumpState()}); @panic("TODO"); @@ -1345,6 +1347,44 @@ fn claimUnresolved(self: *MachO) error{OutOfMemory}!void { } } +fn markImportsAndExports(self: *MachO) void { + for (self.objects.items) |index| { + for (self.getFile(index).?.getSymbols()) |sym_index| { + const sym = self.getSymbol(sym_index); + const file = sym.getFile(self) orelse continue; + if (sym.visibility != .global) continue; + if (file == .dylib and !sym.flags.abs) { + sym.flags.import = true; + continue; + } + if (file.getIndex() == index) { + sym.flags.@"export" = true; + } + } + } + + for (self.undefined_symbols.items) |index| { + const sym = self.getSymbol(index); + if (sym.getFile(self)) |file| { + if (sym.visibility != .global) continue; + if (file == .dylib and !sym.flags.abs) sym.flags.import = true; + } + } + + for (&[_]?Symbol.Index{ + self.entry_index, + self.dyld_stub_binder_index, + self.objc_msg_send_index, + }) |index| { + if (index) |idx| { + const sym = self.getSymbol(idx); + if (sym.getFile(self)) |file| { + if (file == .dylib) sym.flags.import = true; + } + } + } +} + fn shrinkAtom(self: *MachO, atom_index: Atom.Index, new_block_size: u64) void { _ = self; _ = atom_index; From eca9bc4c33112155f855a71d0df015f995a96b29 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 11 Jan 2024 19:18:55 +0100 Subject: [PATCH 017/133] macho: dead strip dylibs --- src/link/MachO.zig | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 8783098e6b..3d51d71d34 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -506,6 +506,7 @@ pub fn flushModule(self: *MachO, arena: Allocator, prog_node: *std.Progress.Node } self.markImportsAndExports(); + self.deadStripDylibs(); state_log.debug("{}", .{self.dumpState()}); @@ -1385,6 +1386,33 @@ fn markImportsAndExports(self: *MachO) void { } } +fn deadStripDylibs(self: *MachO) void { + for (&[_]?Symbol.Index{ + self.entry_index, + self.dyld_stub_binder_index, + self.objc_msg_send_index, + }) |index| { + if (index) |idx| { + const sym = self.getSymbol(idx); + if (sym.getFile(self)) |file| { + if (file == .dylib) file.dylib.referenced = true; + } + } + } + + for (self.dylibs.items) |index| { + self.getFile(index).?.dylib.markReferenced(self); + } + + var i: usize = 0; + while (i < self.dylibs.items.len) { + const index = self.dylibs.items[i]; + if (!self.getFile(index).?.dylib.isAlive(self)) { + _ = self.dylibs.orderedRemove(i); + } else i += 1; + } +} + fn shrinkAtom(self: *MachO, atom_index: Atom.Index, new_block_size: u64) void { _ = self; _ = atom_index; From 40e1bb11f87d97a89b5d5a6414c13af5f2d0d86b Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 11 Jan 2024 19:21:21 +0100 Subject: [PATCH 018/133] macho: set dylib ordinals after pruning --- src/link/MachO.zig | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 3d51d71d34..b8a93f63c4 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -508,6 +508,11 @@ pub fn flushModule(self: *MachO, arena: Allocator, prog_node: *std.Progress.Node self.markImportsAndExports(); self.deadStripDylibs(); + for (self.dylibs.items, 1..) |index, ord| { + const dylib = self.getFile(index).?.dylib; + dylib.ordinal = @intCast(ord); + } + state_log.debug("{}", .{self.dumpState()}); @panic("TODO"); From f0119ce37339ef72acc527e28b2b611712cf24f0 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 11 Jan 2024 19:42:14 +0100 Subject: [PATCH 019/133] macho: report undefined symbols to the user --- src/link/MachO.zig | 169 +++++++++++++++++++++++++++++++++------- src/link/MachO/Atom.zig | 11 +-- 2 files changed, 148 insertions(+), 32 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index b8a93f63c4..d19941509d 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -81,6 +81,10 @@ lazy_bind: LazyBindSection = .{}, export_trie: ExportTrieSection = .{}, unwind_info: UnwindInfo = .{}, +has_tlv: bool = false, +binds_to_weak: bool = false, +weak_defines: bool = false, + /// Options /// SDK layout sdk_layout: ?SdkLayout, @@ -513,6 +517,14 @@ pub fn flushModule(self: *MachO, arena: Allocator, prog_node: *std.Progress.Node dylib.ordinal = @intCast(ord); } + self.scanRelocs() catch |err| switch (err) { + error.HasUndefinedSymbols => return error.FlushFailure, + else => |e| { + try self.reportUnexpectedError("unexpected error while scanning relocations", .{}); + return e; + }, + }; + state_log.debug("{}", .{self.dumpState()}); @panic("TODO"); @@ -1418,6 +1430,132 @@ fn deadStripDylibs(self: *MachO) void { } } +fn scanRelocs(self: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + + for (self.objects.items) |index| { + try self.getFile(index).?.object.scanRelocs(self); + } + + try self.reportUndefs(); + + if (self.entry_index) |index| { + const sym = self.getSymbol(index); + if (sym.getFile(self) != null) { + if (sym.flags.import) sym.flags.stubs = true; + } + } + + if (self.dyld_stub_binder_index) |index| { + const sym = self.getSymbol(index); + if (sym.getFile(self) != null) sym.flags.got = true; + } + + if (self.objc_msg_send_index) |index| { + const sym = self.getSymbol(index); + if (sym.getFile(self) != null) + sym.flags.got = true; // TODO is it always needed, or only if we are synthesising fast stubs? + } + + for (self.symbols.items, 0..) |*symbol, i| { + const index = @as(Symbol.Index, @intCast(i)); + if (symbol.flags.got) { + log.debug("'{s}' needs GOT", .{symbol.getName(self)}); + try self.got.addSymbol(index, self); + } + if (symbol.flags.stubs) { + log.debug("'{s}' needs STUBS", .{symbol.getName(self)}); + try self.stubs.addSymbol(index, self); + } + if (symbol.flags.tlv_ptr) { + log.debug("'{s}' needs TLV pointer", .{symbol.getName(self)}); + try self.tlv_ptr.addSymbol(index, self); + } + if (symbol.flags.objc_stubs) { + log.debug("'{s}' needs OBJC STUBS", .{symbol.getName(self)}); + try self.objc_stubs.addSymbol(index, self); + } + } +} + +fn reportUndefs(self: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + + switch (self.undefined_treatment) { + .dynamic_lookup, .suppress => return, + .@"error", .warn => {}, + } + + const max_notes = 4; + + var has_undefs = false; + var it = self.undefs.iterator(); + while (it.next()) |entry| { + const undef_sym = self.getSymbol(entry.key_ptr.*); + const notes = entry.value_ptr.*; + const nnotes = @min(notes.items.len, max_notes) + @intFromBool(notes.items.len > max_notes); + + var err = try self.addErrorWithNotes(nnotes); + try err.addMsg(self, "undefined symbol: {s}", .{undef_sym.getName(self)}); + has_undefs = true; + + var inote: usize = 0; + while (inote < @min(notes.items.len, max_notes)) : (inote += 1) { + const atom = self.getAtom(notes.items[inote]).?; + const file = atom.getFile(self); + try err.addNote(self, "referenced by {}:{s}", .{ file.fmtPath(), atom.getName(self) }); + } + + if (notes.items.len > max_notes) { + const remaining = notes.items.len - max_notes; + try err.addNote(self, "referenced {d} more times", .{remaining}); + } + } + + for (self.undefined_symbols.items) |index| { + const sym = self.getSymbol(index); + if (sym.getFile(self) != null) continue; // If undefined in an object file, will be reported above + has_undefs = true; + var err = try self.addErrorWithNotes(1); + try err.addMsg(self, "undefined symbol: {s}", .{sym.getName(self)}); + try err.addNote(self, "-u command line option", .{}); + } + + if (self.entry_index) |index| { + const sym = self.getSymbol(index); + if (sym.getFile(self) == null) { + has_undefs = true; + var err = try self.addErrorWithNotes(1); + try err.addMsg(self, "undefined symbol: {s}", .{sym.getName(self)}); + try err.addNote(self, "implicit entry/start for main executable", .{}); + } + } + + if (self.dyld_stub_binder_index) |index| { + const sym = self.getSymbol(index); + if (sym.getFile(self) == null and self.stubs_sect_index != null) { + has_undefs = true; + var err = try self.addErrorWithNotes(1); + try err.addMsg(self, "undefined symbol: {s}", .{sym.getName(self)}); + try err.addNote(self, "implicit -u command line option", .{}); + } + } + + if (self.objc_msg_send_index) |index| { + const sym = self.getSymbol(index); + if (sym.getFile(self) == null and self.objc_stubs_sect_index != null) { + has_undefs = true; + var err = try self.addErrorWithNotes(1); + try err.addMsg(self, "undefined symbol: {s}", .{sym.getName(self)}); + try err.addNote(self, "implicit -u command line option", .{}); + } + } + + if (has_undefs) return error.HasUndefinedSymbols; +} + fn shrinkAtom(self: *MachO, atom_index: Atom.Index, new_block_size: u64) void { _ = self; _ = atom_index; @@ -1899,33 +2037,10 @@ fn reportDependencyError( }); } -pub fn reportUndefined(self: *MachO) error{OutOfMemory}!void { - const comp = self.base.comp; - const gpa = comp.gpa; - const count = self.unresolved.count(); - try comp.link_errors.ensureUnusedCapacity(gpa, count); - - for (self.unresolved.keys()) |global_index| { - const global = self.globals.items[global_index]; - const sym_name = self.getSymbolName(global); - - var notes = try std.ArrayList(link.File.ErrorMsg).initCapacity(gpa, 1); - defer notes.deinit(); - - if (global.getFile()) |file| { - const note = try std.fmt.allocPrint(gpa, "referenced in {s}", .{ - self.objects.items[file].name, - }); - notes.appendAssumeCapacity(.{ .msg = note }); - } - - var err_msg = link.File.ErrorMsg{ - .msg = try std.fmt.allocPrint(gpa, "undefined reference to symbol {s}", .{sym_name}), - }; - err_msg.notes = try notes.toOwnedSlice(); - - comp.link_errors.appendAssumeCapacity(err_msg); - } +fn reportUnexpectedError(self: *MachO, comptime format: []const u8, args: anytype) error{OutOfMemory}!void { + var err = try self.addErrorWithNotes(1); + try err.addMsg(self, format, args); + try err.addNote(self, "please report this as a linker bug on https://github.com/ziglang/zig/issues/new/choose", .{}); } // fn reportSymbolCollision( diff --git a/src/link/MachO/Atom.zig b/src/link/MachO/Atom.zig index 8a04e64236..2534f2b078 100644 --- a/src/link/MachO/Atom.zig +++ b/src/link/MachO/Atom.zig @@ -200,7 +200,7 @@ pub fn scanRelocs(self: Atom, macho_file: *MachO) !void { const symbol = rel.getTargetSymbol(macho_file); if (symbol.flags.import or (symbol.flags.@"export" and (symbol.flags.weak or symbol.flags.interposable)) or - macho_file.options.cpu_arch.? == .aarch64) // TODO relax on arm64 + macho_file.getTarget().cpu.arch == .aarch64) // TODO relax on arm64 { symbol.flags.got = true; if (symbol.flags.weak) { @@ -219,9 +219,10 @@ pub fn scanRelocs(self: Atom, macho_file: *MachO) !void { => { const symbol = rel.getTargetSymbol(macho_file); if (!symbol.flags.tlv) { - macho_file.base.fatal( - "{}: {s}: illegal thread-local variable reference to regular symbol {s}", - .{ object.fmtPath(), self.getName(macho_file), symbol.getName(macho_file) }, + try macho_file.reportParseError2( + object.index, + "{s}: illegal thread-local variable reference to regular symbol {s}", + .{ self.getName(macho_file), symbol.getName(macho_file) }, ); } if (symbol.flags.import or (symbol.flags.@"export" and (symbol.flags.weak or symbol.flags.interposable))) { @@ -271,7 +272,7 @@ fn reportUndefSymbol(self: Atom, rel: Relocation, macho_file: *MachO) !bool { const sym = rel.getTargetSymbol(macho_file); if (sym.getFile(macho_file) == null) { - const gpa = macho_file.base.allocator; + const gpa = macho_file.base.comp.gpa; const gop = try macho_file.undefs.getOrPut(gpa, rel.target); if (!gop.found_existing) { gop.value_ptr.* = .{}; From 0b2231998f5b95845c8414083622ec7913d60af6 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 11 Jan 2024 19:51:29 +0100 Subject: [PATCH 020/133] macho: init output and synthetic sections --- src/link/MachO.zig | 250 +++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 230 insertions(+), 20 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index d19941509d..27483601d2 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -525,6 +525,9 @@ pub fn flushModule(self: *MachO, arena: Allocator, prog_node: *std.Progress.Node }, }; + try self.initOutputSections(); + try self.initSyntheticSections(); + state_log.debug("{}", .{self.dumpState()}); @panic("TODO"); @@ -716,26 +719,6 @@ fn flushObject(self: *MachO, comp: *Compilation, module_obj_path: ?[]const u8) l return error.FlushFailure; } -/// XNU starting with Big Sur running on arm64 is caching inodes of running binaries. -/// Any change to the binary will effectively invalidate the kernel's cache -/// resulting in a SIGKILL on each subsequent run. Since when doing incremental -/// linking we're modifying a binary in-place, this will end up with the kernel -/// killing it on every subsequent run. To circumvent it, we will copy the file -/// into a new inode, remove the original file, and rename the copy to match -/// the original file. This is super messy, but there doesn't seem any other -/// way to please the XNU. -pub fn invalidateKernelCache(dir: std.fs.Dir, sub_path: []const u8) !void { - if (comptime builtin.target.isDarwin() and builtin.target.cpu.arch == .aarch64) { - try dir.copyFile(sub_path, dir, sub_path, .{}); - } -} - -inline fn conformUuid(out: *[Md5.digest_length]u8) void { - // LC_UUID uuids should conform to RFC 4122 UUID version 4 & UUID version 5 formats - out[6] = (out[6] & 0x0F) | (3 << 4); - out[8] = (out[8] & 0x3F) | 0x80; -} - pub fn resolveLibSystem( self: *MachO, arena: Allocator, @@ -1556,6 +1539,134 @@ fn reportUndefs(self: *MachO) !void { if (has_undefs) return error.HasUndefinedSymbols; } +fn initOutputSections(self: *MachO) !void { + for (self.objects.items) |index| { + const object = self.getFile(index).?.object; + for (object.atoms.items) |atom_index| { + const atom = self.getAtom(atom_index) orelse continue; + if (!atom.flags.alive) continue; + atom.out_n_sect = try Atom.initOutputSection(atom.getInputSection(self), self); + } + } + if (self.getInternalObject()) |object| { + for (object.atoms.items) |atom_index| { + const atom = self.getAtom(atom_index) orelse continue; + if (!atom.flags.alive) continue; + atom.out_n_sect = try Atom.initOutputSection(atom.getInputSection(self), self); + } + } + if (self.data_sect_index == null) { + self.data_sect_index = try self.addSection("__DATA", "__data", .{}); + } +} + +fn initSyntheticSections(self: *MachO) !void { + const cpu_arch = self.getTarget().cpu.arch; + + if (self.got.symbols.items.len > 0) { + self.got_sect_index = try self.addSection("__DATA_CONST", "__got", .{ + .flags = macho.S_NON_LAZY_SYMBOL_POINTERS, + .reserved1 = @intCast(self.stubs.symbols.items.len), + }); + } + + if (self.stubs.symbols.items.len > 0) { + self.stubs_sect_index = try self.addSection("__TEXT", "__stubs", .{ + .flags = macho.S_SYMBOL_STUBS | + macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS, + .reserved1 = 0, + .reserved2 = switch (cpu_arch) { + .x86_64 => 6, + .aarch64 => 3 * @sizeOf(u32), + else => 0, + }, + }); + self.stubs_helper_sect_index = try self.addSection("__TEXT", "__stub_helper", .{ + .flags = macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS, + }); + self.la_symbol_ptr_sect_index = try self.addSection("__DATA", "__la_symbol_ptr", .{ + .flags = macho.S_LAZY_SYMBOL_POINTERS, + .reserved1 = @intCast(self.stubs.symbols.items.len + self.got.symbols.items.len), + }); + } + + if (self.objc_stubs.symbols.items.len > 0) { + self.objc_stubs_sect_index = try self.addSection("__TEXT", "__objc_stubs", .{ + .flags = macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS, + }); + } + + if (self.tlv_ptr.symbols.items.len > 0) { + self.tlv_ptr_sect_index = try self.addSection("__DATA", "__thread_ptrs", .{ + .flags = macho.S_THREAD_LOCAL_VARIABLE_POINTERS, + }); + } + + const needs_unwind_info = for (self.objects.items) |index| { + if (self.getFile(index).?.object.compact_unwind_sect_index != null) break true; + } else false; + if (needs_unwind_info) { + self.unwind_info_sect_index = try self.addSection("__TEXT", "__unwind_info", .{}); + } + + const needs_eh_frame = for (self.objects.items) |index| { + if (self.getFile(index).?.object.eh_frame_sect_index != null) break true; + } else false; + if (needs_eh_frame) { + assert(needs_unwind_info); + self.eh_frame_sect_index = try self.addSection("__TEXT", "__eh_frame", .{}); + } + + for (self.boundary_symbols.items) |sym_index| { + const gpa = self.base.comp.gpa; + const sym = self.getSymbol(sym_index); + const name = sym.getName(self); + + if (eatPrefix(name, "segment$start$")) |segname| { + if (self.getSegmentByName(segname) == null) { // TODO check segname is valid + const prot = getSegmentProt(segname); + _ = try self.segments.append(gpa, .{ + .cmdsize = @sizeOf(macho.segment_command_64), + .segname = makeStaticString(segname), + .initprot = prot, + .maxprot = prot, + }); + } + } else if (eatPrefix(name, "segment$stop$")) |segname| { + if (self.getSegmentByName(segname) == null) { // TODO check segname is valid + const prot = getSegmentProt(segname); + _ = try self.segments.append(gpa, .{ + .cmdsize = @sizeOf(macho.segment_command_64), + .segname = makeStaticString(segname), + .initprot = prot, + .maxprot = prot, + }); + } + } else if (eatPrefix(name, "section$start$")) |actual_name| { + const sep = mem.indexOfScalar(u8, actual_name, '$').?; // TODO error rather than a panic + const segname = actual_name[0..sep]; // TODO check segname is valid + const sectname = actual_name[sep + 1 ..]; // TODO check sectname is valid + if (self.getSectionByName(segname, sectname) == null) { + _ = try self.addSection(segname, sectname, .{}); + } + } else if (eatPrefix(name, "section$stop$")) |actual_name| { + const sep = mem.indexOfScalar(u8, actual_name, '$').?; // TODO error rather than a panic + const segname = actual_name[0..sep]; // TODO check segname is valid + const sectname = actual_name[sep + 1 ..]; // TODO check sectname is valid + if (self.getSectionByName(segname, sectname) == null) { + _ = try self.addSection(segname, sectname, .{}); + } + } else unreachable; + } +} + +fn getSegmentProt(segname: []const u8) macho.vm_prot_t { + if (mem.eql(u8, segname, "__PAGEZERO")) return macho.PROT.NONE; + if (mem.eql(u8, segname, "__TEXT")) return macho.PROT.READ | macho.PROT.EXEC; + if (mem.eql(u8, segname, "__LINKEDIT")) return macho.PROT.READ; + return macho.PROT.READ | macho.PROT.WRITE; +} + fn shrinkAtom(self: *MachO, atom_index: Atom.Index, new_block_size: u64) void { _ = self; _ = atom_index; @@ -1786,12 +1897,111 @@ pub fn getTarget(self: MachO) std.Target { return self.base.comp.root_mod.resolved_target.result; } +/// XNU starting with Big Sur running on arm64 is caching inodes of running binaries. +/// Any change to the binary will effectively invalidate the kernel's cache +/// resulting in a SIGKILL on each subsequent run. Since when doing incremental +/// linking we're modifying a binary in-place, this will end up with the kernel +/// killing it on every subsequent run. To circumvent it, we will copy the file +/// into a new inode, remove the original file, and rename the copy to match +/// the original file. This is super messy, but there doesn't seem any other +/// way to please the XNU. +pub fn invalidateKernelCache(dir: std.fs.Dir, sub_path: []const u8) !void { + if (comptime builtin.target.isDarwin() and builtin.target.cpu.arch == .aarch64) { + try dir.copyFile(sub_path, dir, sub_path, .{}); + } +} + +inline fn conformUuid(out: *[Md5.digest_length]u8) void { + // LC_UUID uuids should conform to RFC 4122 UUID version 4 & UUID version 5 formats + out[6] = (out[6] & 0x0F) | (3 << 4); + out[8] = (out[8] & 0x3F) | 0x80; +} + +pub inline fn getPageSize(self: MachO) u16 { + return switch (self.getTarget().cpu.arch) { + .aarch64 => 0x4000, + .x86_64 => 0x1000, + else => unreachable, + }; +} + +pub fn requiresCodeSig(self: MachO) bool { + if (self.entitlements) |_| return true; + // if (self.options.adhoc_codesign) |cs| return cs; + return switch (self.getTarget().cpu.arch) { + .aarch64 => true, + else => false, + }; +} + +inline fn requiresThunks(self: MachO) bool { + return self.getTarget().cpu.arch == .aarch64; +} + +const AddSectionOpts = struct { + flags: u32 = macho.S_REGULAR, + reserved1: u32 = 0, + reserved2: u32 = 0, +}; + +pub fn addSection( + self: *MachO, + segname: []const u8, + sectname: []const u8, + opts: AddSectionOpts, +) !u8 { + const gpa = self.base.comp.gpa; + const index = @as(u8, @intCast(try self.sections.addOne(gpa))); + self.sections.set(index, .{ + .segment_id = 0, // Segments will be created automatically later down the pipeline. + .header = .{ + .sectname = makeStaticString(sectname), + .segname = makeStaticString(segname), + .flags = opts.flags, + .reserved1 = opts.reserved1, + .reserved2 = opts.reserved2, + }, + }); + return index; +} + pub fn makeStaticString(bytes: []const u8) [16]u8 { var buf = [_]u8{0} ** 16; @memcpy(buf[0..bytes.len], bytes); return buf; } +pub fn getSegmentByName(self: MachO, segname: []const u8) ?u8 { + for (self.segments.items, 0..) |seg, i| { + if (mem.eql(u8, segname, seg.segName())) return @as(u8, @intCast(i)); + } else return null; +} + +pub fn getSectionByName(self: MachO, segname: []const u8, sectname: []const u8) ?u8 { + for (self.sections.items(.header), 0..) |header, i| { + if (mem.eql(u8, header.segName(), segname) and mem.eql(u8, header.sectName(), sectname)) + return @as(u8, @intCast(i)); + } else return null; +} + +pub fn getTlsAddress(self: MachO) u64 { + for (self.sections.items(.header)) |header| switch (header.type()) { + macho.S_THREAD_LOCAL_REGULAR, + macho.S_THREAD_LOCAL_ZEROFILL, + => return header.addr, + else => {}, + }; + return 0; +} + +pub inline fn getTextSegment(self: *MachO) *macho.segment_command_64 { + return &self.segments.items[self.text_seg_index.?]; +} + +pub inline fn getLinkeditSegment(self: *MachO) *macho.segment_command_64 { + return &self.segments.items[self.linkedit_seg_index.?]; +} + pub fn getFile(self: *MachO, index: File.Index) ?File { const tag = self.files.items(.tags)[index]; return switch (tag) { From 32ebceea95d22d399bc979370e5ce4cc3ca7d0ef Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 11 Jan 2024 21:30:32 +0100 Subject: [PATCH 021/133] macho: sort sections; use Atom.Alignment for alignment; init segments --- src/link/MachO.zig | 357 +++++++++++++++++++++++++++++- src/link/MachO/Atom.zig | 2 +- src/link/MachO/InternalObject.zig | 4 +- src/link/MachO/Object.zig | 6 +- src/link/MachO/UnwindInfo.zig | 4 +- src/link/MachO/eh_frame.zig | 2 +- src/link/MachO/thunks.zig | 11 +- 7 files changed, 367 insertions(+), 19 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 27483601d2..c7a9d8c5cf 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -89,7 +89,7 @@ weak_defines: bool = false, /// SDK layout sdk_layout: ?SdkLayout, /// Size of the __PAGEZERO segment. -pagezero_vmsize: ?u64, +pagezero_size: ?u64, /// Minimum space for future expansion of the load commands. headerpad_size: ?u32, /// Set enough space as if all paths were MATPATHLEN. @@ -170,7 +170,7 @@ pub fn createEmpty( .build_id = options.build_id, .rpath_list = options.rpath_list, }, - .pagezero_vmsize = options.pagezero_size, + .pagezero_size = options.pagezero_size, .headerpad_size = options.headerpad_size, .headerpad_max_install_names = options.headerpad_max_install_names, .dead_strip_dylibs = options.dead_strip_dylibs, @@ -527,6 +527,11 @@ pub fn flushModule(self: *MachO, arena: Allocator, prog_node: *std.Progress.Node try self.initOutputSections(); try self.initSyntheticSections(); + try self.sortSections(); + try self.addAtomsToSections(); + try self.calcSectionSizes(); + try self.generateUnwindInfo(); + try self.initSegments(); state_log.debug("{}", .{self.dumpState()}); @@ -613,7 +618,7 @@ fn dumpArgv(self: *MachO, comp: *Compilation) !void { try argv.append(rpath); } - if (self.pagezero_vmsize) |size| { + if (self.pagezero_size) |size| { try argv.append("-pagezero_size"); try argv.append(try std.fmt.allocPrint(arena, "0x{x}", .{size})); } @@ -1667,6 +1672,350 @@ fn getSegmentProt(segname: []const u8) macho.vm_prot_t { return macho.PROT.READ | macho.PROT.WRITE; } +fn getSegmentRank(segname: []const u8) u4 { + if (mem.eql(u8, segname, "__PAGEZERO")) return 0x0; + if (mem.eql(u8, segname, "__TEXT")) return 0x1; + if (mem.eql(u8, segname, "__DATA_CONST")) return 0x2; + if (mem.eql(u8, segname, "__DATA")) return 0x3; + if (mem.eql(u8, segname, "__LINKEDIT")) return 0x5; + return 0x4; +} + +fn getSectionRank(self: *MachO, sect_index: u8) u8 { + const header = self.sections.items(.header)[sect_index]; + const segment_rank = getSegmentRank(header.segName()); + const section_rank: u4 = blk: { + if (header.isCode()) { + if (mem.eql(u8, "__text", header.sectName())) break :blk 0x0; + if (header.type() == macho.S_SYMBOL_STUBS) break :blk 0x1; + break :blk 0x2; + } + switch (header.type()) { + macho.S_NON_LAZY_SYMBOL_POINTERS, + macho.S_LAZY_SYMBOL_POINTERS, + => break :blk 0x0, + + macho.S_MOD_INIT_FUNC_POINTERS => break :blk 0x1, + macho.S_MOD_TERM_FUNC_POINTERS => break :blk 0x2, + macho.S_ZEROFILL => break :blk 0xf, + macho.S_THREAD_LOCAL_REGULAR => break :blk 0xd, + macho.S_THREAD_LOCAL_ZEROFILL => break :blk 0xe, + + else => { + if (mem.eql(u8, "__unwind_info", header.sectName())) break :blk 0xe; + if (mem.eql(u8, "__compact_unwind", header.sectName())) break :blk 0xe; + if (mem.eql(u8, "__eh_frame", header.sectName())) break :blk 0xf; + break :blk 0x3; + }, + } + }; + return (@as(u8, @intCast(segment_rank)) << 4) + section_rank; +} + +pub fn sortSections(self: *MachO) !void { + const Entry = struct { + index: u8, + + pub fn lessThan(macho_file: *MachO, lhs: @This(), rhs: @This()) bool { + return macho_file.getSectionRank(lhs.index) < macho_file.getSectionRank(rhs.index); + } + }; + + const gpa = self.base.comp.gpa; + + var entries = try std.ArrayList(Entry).initCapacity(gpa, self.sections.slice().len); + defer entries.deinit(); + for (0..self.sections.slice().len) |index| { + entries.appendAssumeCapacity(.{ .index = @intCast(index) }); + } + + mem.sort(Entry, entries.items, self, Entry.lessThan); + + const backlinks = try gpa.alloc(u8, entries.items.len); + defer gpa.free(backlinks); + for (entries.items, 0..) |entry, i| { + backlinks[entry.index] = @intCast(i); + } + + var slice = self.sections.toOwnedSlice(); + defer slice.deinit(gpa); + + try self.sections.ensureTotalCapacity(gpa, slice.len); + for (entries.items) |sorted| { + self.sections.appendAssumeCapacity(slice.get(sorted.index)); + } + + for (self.objects.items) |index| { + for (self.getFile(index).?.object.atoms.items) |atom_index| { + const atom = self.getAtom(atom_index) orelse continue; + if (!atom.flags.alive) continue; + atom.out_n_sect = backlinks[atom.out_n_sect]; + } + } + if (self.getInternalObject()) |object| { + for (object.atoms.items) |atom_index| { + const atom = self.getAtom(atom_index) orelse continue; + if (!atom.flags.alive) continue; + atom.out_n_sect = backlinks[atom.out_n_sect]; + } + } + + for (&[_]*?u8{ + &self.data_sect_index, + &self.got_sect_index, + &self.stubs_sect_index, + &self.stubs_helper_sect_index, + &self.la_symbol_ptr_sect_index, + &self.tlv_ptr_sect_index, + &self.eh_frame_sect_index, + &self.unwind_info_sect_index, + &self.objc_stubs_sect_index, + }) |maybe_index| { + if (maybe_index.*) |*index| { + index.* = backlinks[index.*]; + } + } +} + +pub fn addAtomsToSections(self: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + + for (self.objects.items) |index| { + const object = self.getFile(index).?.object; + for (object.atoms.items) |atom_index| { + const atom = self.getAtom(atom_index) orelse continue; + if (!atom.flags.alive) continue; + const atoms = &self.sections.items(.atoms)[atom.out_n_sect]; + try atoms.append(self.base.comp.gpa, atom_index); + } + for (object.symbols.items) |sym_index| { + const sym = self.getSymbol(sym_index); + const atom = sym.getAtom(self) orelse continue; + if (!atom.flags.alive) continue; + if (sym.getFile(self).?.getIndex() != index) continue; + sym.out_n_sect = atom.out_n_sect; + } + } + if (self.getInternalObject()) |object| { + for (object.atoms.items) |atom_index| { + const atom = self.getAtom(atom_index) orelse continue; + if (!atom.flags.alive) continue; + const atoms = &self.sections.items(.atoms)[atom.out_n_sect]; + try atoms.append(self.base.comp.gpa, atom_index); + } + for (object.symbols.items) |sym_index| { + const sym = self.getSymbol(sym_index); + const atom = sym.getAtom(self) orelse continue; + if (!atom.flags.alive) continue; + if (sym.getFile(self).?.getIndex() != object.index) continue; + sym.out_n_sect = atom.out_n_sect; + } + } +} + +fn calcSectionSizes(self: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + + const cpu_arch = self.getTarget().cpu.arch; + + if (self.data_sect_index) |idx| { + const header = &self.sections.items(.header)[idx]; + header.size += @sizeOf(u64); + header.@"align" = 3; + } + + const slice = self.sections.slice(); + for (slice.items(.header), slice.items(.atoms)) |*header, atoms| { + if (atoms.items.len == 0) continue; + if (self.requiresThunks() and header.isCode()) continue; + + for (atoms.items) |atom_index| { + const atom = self.getAtom(atom_index).?; + const atom_alignment = atom.alignment.toByteUnits(1); + const offset = mem.alignForward(u64, header.size, atom_alignment); + const padding = offset - header.size; + atom.value = offset; + header.size += padding + atom.size; + header.@"align" = @max(header.@"align", atom.alignment.toLog2Units()); + } + } + + if (self.requiresThunks()) { + for (slice.items(.header), slice.items(.atoms), 0..) |header, atoms, i| { + if (!header.isCode()) continue; + if (atoms.items.len == 0) continue; + + // Create jump/branch range extenders if needed. + try thunks.createThunks(@intCast(i), self); + } + } + + if (self.got_sect_index) |idx| { + const header = &self.sections.items(.header)[idx]; + header.size = self.got.size(); + header.@"align" = 3; + } + + if (self.stubs_sect_index) |idx| { + const header = &self.sections.items(.header)[idx]; + header.size = self.stubs.size(self); + header.@"align" = switch (cpu_arch) { + .x86_64 => 0, + .aarch64 => 2, + else => 0, + }; + } + + if (self.stubs_helper_sect_index) |idx| { + const header = &self.sections.items(.header)[idx]; + header.size = self.stubs_helper.size(self); + header.@"align" = switch (cpu_arch) { + .x86_64 => 0, + .aarch64 => 2, + else => 0, + }; + } + + if (self.la_symbol_ptr_sect_index) |idx| { + const header = &self.sections.items(.header)[idx]; + header.size = self.la_symbol_ptr.size(self); + header.@"align" = 3; + } + + if (self.tlv_ptr_sect_index) |idx| { + const header = &self.sections.items(.header)[idx]; + header.size = self.tlv_ptr.size(); + header.@"align" = 3; + } + + if (self.objc_stubs_sect_index) |idx| { + const header = &self.sections.items(.header)[idx]; + header.size = self.objc_stubs.size(self); + header.@"align" = switch (cpu_arch) { + .x86_64 => 0, + .aarch64 => 2, + else => 0, + }; + } +} + +fn generateUnwindInfo(self: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + + if (self.eh_frame_sect_index) |index| { + const sect = &self.sections.items(.header)[index]; + sect.size = try eh_frame.calcSize(self); + sect.@"align" = 3; + } + if (self.unwind_info_sect_index) |index| { + const sect = &self.sections.items(.header)[index]; + self.unwind_info.generate(self) catch |err| switch (err) { + error.TooManyPersonalities => return self.reportUnexpectedError( + "too many personalities in unwind info", + .{}, + ), + else => |e| return e, + }; + sect.size = self.unwind_info.calcSize(); + sect.@"align" = 2; + } +} + +fn initSegments(self: *MachO) !void { + const gpa = self.base.comp.gpa; + const slice = self.sections.slice(); + + // First, create segments required by sections + for (slice.items(.header)) |header| { + const segname = header.segName(); + if (self.getSegmentByName(segname) == null) { + const prot = getSegmentProt(segname); + try self.segments.append(gpa, .{ + .cmdsize = @sizeOf(macho.segment_command_64), + .segname = makeStaticString(segname), + .maxprot = prot, + .initprot = prot, + }); + } + } + + // Add __PAGEZERO if required + const pagezero_size = self.pagezero_size orelse default_pagezero_size; + const aligned_pagezero_size = mem.alignBackward(u64, pagezero_size, self.getPageSize()); + if (!self.base.isDynLib() and aligned_pagezero_size > 0) { + if (aligned_pagezero_size != pagezero_size) { + // TODO convert into a warning + log.warn("requested __PAGEZERO size (0x{x}) is not page aligned", .{pagezero_size}); + log.warn(" rounding down to 0x{x}", .{aligned_pagezero_size}); + } + try self.segments.append(gpa, .{ + .cmdsize = @sizeOf(macho.segment_command_64), + .segname = makeStaticString("__PAGEZERO"), + .vmsize = aligned_pagezero_size, + }); + } + + // Add __LINKEDIT + { + const protection = getSegmentProt("__LINKEDIT"); + self.linkedit_seg_index = @intCast(self.segments.items.len); + try self.segments.append(gpa, .{ + .cmdsize = @sizeOf(macho.segment_command_64), + .segname = makeStaticString("__LINKEDIT"), + .maxprot = protection, + .initprot = protection, + }); + } + + // __TEXT segment is non-optional + if (self.getSegmentByName("__TEXT") == null) { + const protection = getSegmentProt("__TEXT"); + try self.segments.append(gpa, .{ + .cmdsize = @sizeOf(macho.segment_command_64), + .segname = makeStaticString("__TEXT"), + .maxprot = protection, + .initprot = protection, + }); + } + + const sortFn = struct { + fn sortFn(ctx: void, lhs: macho.segment_command_64, rhs: macho.segment_command_64) bool { + _ = ctx; + return getSegmentRank(lhs.segName()) < getSegmentRank(rhs.segName()); + } + }.sortFn; + + // Sort segments + mem.sort(macho.segment_command_64, self.segments.items, {}, sortFn); + + // Attach sections to segments + for (slice.items(.header), slice.items(.segment_id)) |header, *seg_id| { + const segname = header.segName(); + const segment_id = self.getSegmentByName(segname) orelse blk: { + const segment_id = @as(u8, @intCast(self.segments.items.len)); + const protection = getSegmentProt(segname); + try self.segments.append(gpa, .{ + .cmdsize = @sizeOf(macho.segment_command_64), + .segname = makeStaticString(segname), + .maxprot = protection, + .initprot = protection, + }); + break :blk segment_id; + }; + const segment = &self.segments.items[segment_id]; + segment.cmdsize += @sizeOf(macho.section_64); + segment.nsects += 1; + seg_id.* = segment_id; + } + + self.pagezero_seg_index = self.getSegmentByName("__PAGEZERO"); + self.text_seg_index = self.getSegmentByName("__TEXT").?; + self.linkedit_seg_index = self.getSegmentByName("__LINKEDIT").?; +} + fn shrinkAtom(self: *MachO, atom_index: Atom.Index, new_block_size: u64) void { _ = self; _ = atom_index; @@ -2767,7 +3116,7 @@ pub const min_text_capacity = padToIdeal(minimum_text_block_size); /// Default virtual memory offset corresponds to the size of __PAGEZERO segment and /// start of __TEXT segment. -pub const default_pagezero_vmsize: u64 = 0x100000000; +pub const default_pagezero_size: u64 = 0x100000000; /// We commit 0x1000 = 4096 bytes of space to the header and /// the table of load commands. This should be plenty for any diff --git a/src/link/MachO/Atom.zig b/src/link/MachO/Atom.zig index 2534f2b078..cb5feef263 100644 --- a/src/link/MachO/Atom.zig +++ b/src/link/MachO/Atom.zig @@ -11,7 +11,7 @@ file: File.Index = 0, size: u64 = 0, /// Alignment of this atom as a power of two. -alignment: u32 = 0, +alignment: Alignment = .@"1", /// Index of the input section. n_sect: u32 = 0, diff --git a/src/link/MachO/InternalObject.zig b/src/link/MachO/InternalObject.zig index 26ec86bb67..88663c2e37 100644 --- a/src/link/MachO/InternalObject.zig +++ b/src/link/MachO/InternalObject.zig @@ -48,7 +48,7 @@ fn addObjcMethnameSection(self: *InternalObject, methname: []const u8, macho_fil atom.name = try macho_file.strings.insert(gpa, name); atom.file = self.index; atom.size = methname.len + 1; - atom.alignment = 0; + atom.alignment = .@"1"; const n_sect = try self.addSection(gpa, "__TEXT", "__objc_methname"); const sect = &self.sections.items(.header)[n_sect]; @@ -82,7 +82,7 @@ fn addObjcSelrefsSection( atom.name = try macho_file.strings.insert(gpa, name); atom.file = self.index; atom.size = @sizeOf(u64); - atom.alignment = 3; + atom.alignment = .@"8"; const n_sect = try self.addSection(gpa, "__DATA", "__objc_selrefs"); const sect = &self.sections.items(.header)[n_sect]; diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index 9d39621d16..a6f865901e 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -346,7 +346,7 @@ fn addAtom(self: *Object, args: AddAtomArgs, macho_file: *MachO) !Atom.Index { atom.name = try macho_file.strings.insert(gpa, args.name); atom.n_sect = args.n_sect; atom.size = args.size; - atom.alignment = args.alignment; + atom.alignment = Atom.Alignment.fromLog2Units(args.alignment); atom.off = args.off; try self.atoms.append(gpa, atom_index); return atom_index; @@ -1120,13 +1120,13 @@ pub fn convertTentativeDefinitions(self: *Object, macho_file: *MachO) !void { atom.name = try macho_file.strings.insert(gpa, name); atom.file = self.index; atom.size = nlist.n_value; - atom.alignment = (nlist.n_desc >> 8) & 0x0f; + atom.alignment = Atom.Alignment.fromLog2Units((nlist.n_desc >> 8) & 0x0f); const n_sect = try self.addSection(gpa, "__DATA", "__common"); const sect = &self.sections.items(.header)[n_sect]; sect.flags = macho.S_ZEROFILL; sect.size = atom.size; - sect.@"align" = atom.alignment; + sect.@"align" = atom.alignment.toLog2Units(); atom.n_sect = n_sect; sym.value = 0; diff --git a/src/link/MachO/UnwindInfo.zig b/src/link/MachO/UnwindInfo.zig index 8a5e3661eb..a993809fd1 100644 --- a/src/link/MachO/UnwindInfo.zig +++ b/src/link/MachO/UnwindInfo.zig @@ -26,7 +26,7 @@ pub fn deinit(info: *UnwindInfo, allocator: Allocator) void { } fn canFold(macho_file: *MachO, lhs_index: Record.Index, rhs_index: Record.Index) bool { - const cpu_arch = macho_file.options.cpu_arch.?; + const cpu_arch = macho_file.getTarget().cpu.arch; const lhs = macho_file.getUnwindRecord(lhs_index); const rhs = macho_file.getUnwindRecord(rhs_index); if (cpu_arch == .x86_64) { @@ -42,7 +42,7 @@ fn canFold(macho_file: *MachO, lhs_index: Record.Index, rhs_index: Record.Index) } pub fn generate(info: *UnwindInfo, macho_file: *MachO) !void { - const gpa = macho_file.base.allocator; + const gpa = macho_file.base.comp.gpa; log.debug("generating unwind info", .{}); diff --git a/src/link/MachO/eh_frame.zig b/src/link/MachO/eh_frame.zig index abcd44cc6b..91a9cafb54 100644 --- a/src/link/MachO/eh_frame.zig +++ b/src/link/MachO/eh_frame.zig @@ -314,7 +314,7 @@ pub fn calcSize(macho_file: *MachO) !u32 { var offset: u32 = 0; - var cies = std.ArrayList(Cie).init(macho_file.base.allocator); + var cies = std.ArrayList(Cie).init(macho_file.base.comp.gpa); defer cies.deinit(); for (macho_file.objects.items) |index| { diff --git a/src/link/MachO/thunks.zig b/src/link/MachO/thunks.zig index 7f0dc56685..2e9602f8d8 100644 --- a/src/link/MachO/thunks.zig +++ b/src/link/MachO/thunks.zig @@ -2,7 +2,7 @@ pub fn createThunks(sect_id: u8, macho_file: *MachO) !void { const tracy = trace(@src()); defer tracy.end(); - const gpa = macho_file.base.allocator; + const gpa = macho_file.base.comp.gpa; const slice = macho_file.sections.slice(); const header = &slice.items(.header)[sect_id]; const atoms = slice.items(.atoms)[sect_id].items; @@ -46,18 +46,17 @@ pub fn createThunks(sect_id: u8, macho_file: *MachO) !void { atom.thunk_index = thunk_index; } - thunk.value = try advance(header, thunk.size(), 2); + thunk.value = try advance(header, thunk.size(), .@"4"); log.debug("thunk({d}) : {}", .{ thunk_index, thunk.fmt(macho_file) }); } } -fn advance(sect: *macho.section_64, size: u64, pow2_align: u32) !u64 { - const alignment = try math.powi(u32, 2, pow2_align); - const offset = mem.alignForward(u64, sect.size, alignment); +fn advance(sect: *macho.section_64, size: u64, alignment: Atom.Alignment) !u64 { + const offset = alignment.forward(sect.size); const padding = offset - sect.size; sect.size += padding + size; - sect.@"align" = @max(sect.@"align", pow2_align); + sect.@"align" = @max(sect.@"align", alignment.toLog2Units()); return offset; } From 98d6d40cd64b5c52ba2ddd01fbedb5069bf0458d Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 11 Jan 2024 23:23:16 +0100 Subject: [PATCH 022/133] macho: allocate sections, segments and atoms --- src/link/MachO.zig | 194 +++++++++++++++++++++++++++++-- src/link/MachO/load_commands.zig | 30 +++-- 2 files changed, 195 insertions(+), 29 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index c7a9d8c5cf..c87f2f7fd5 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -110,6 +110,8 @@ compatibility_version: ?std.SemanticVersion, entry_name: ?[]const u8, platform: Platform, sdk_version: ?std.SemanticVersion, +/// Rpath table +rpath_table: std.StringArrayHashMapUnmanaged(void) = .{}, /// Hot-code swapping state. hot_state: if (is_hot_update_compatible) HotUpdateState else struct {} = .{}, @@ -200,6 +202,12 @@ pub fn createEmpty( .mode = link.File.determineMode(false, output_mode, link_mode), }); + // Filter rpaths + try self.rpath_table.ensureUnusedCapacity(gpa, self.base.rpath_list.len); + for (options.rpath_list) |rpath| { + _ = self.rpath_table.putAssumeCapacity(rpath, {}); + } + // Append null file try self.files.append(gpa, .null); // Atom at index 0 is reserved as null atom @@ -317,6 +325,7 @@ pub fn deinit(self: *MachO) void { } self.thunks.deinit(gpa); self.unwind_records.deinit(gpa); + self.rpath_table.deinit(gpa); } pub fn flush(self: *MachO, arena: Allocator, prog_node: *std.Progress.Node) link.File.FlushError!void { @@ -378,15 +387,6 @@ pub fn flushModule(self: *MachO, arena: Allocator, prog_node: *std.Progress.Node if (module_obj_path) |path| try positionals.append(.{ .path = path }); - // rpaths - var rpath_table = std.StringArrayHashMap(void).init(gpa); - defer rpath_table.deinit(); - try rpath_table.ensureUnusedCapacity(self.base.rpath_list.len); - - for (self.base.rpath_list) |rpath| { - _ = rpath_table.putAssumeCapacity(rpath, {}); - } - for (positionals.items) |obj| { self.parsePositional(obj.path, obj.must_link) catch |err| switch (err) { error.MalformedObject, @@ -533,6 +533,11 @@ pub fn flushModule(self: *MachO, arena: Allocator, prog_node: *std.Progress.Node try self.generateUnwindInfo(); try self.initSegments(); + try self.allocateSections(); + self.allocateSegments(); + self.allocateAtoms(); + self.allocateSyntheticSymbols(); + state_log.debug("{}", .{self.dumpState()}); @panic("TODO"); @@ -613,7 +618,7 @@ fn dumpArgv(self: *MachO, comp: *Compilation) !void { try argv.append(syslibroot); } - for (self.base.rpath_list) |rpath| { + for (self.rpath_table.keys()) |rpath| { try argv.append("-rpath"); try argv.append(rpath); } @@ -2016,6 +2021,171 @@ fn initSegments(self: *MachO) !void { self.linkedit_seg_index = self.getSegmentByName("__LINKEDIT").?; } +fn allocateSections(self: *MachO) !void { + const headerpad = load_commands.calcMinHeaderPadSize(self); + var vmaddr: u64 = if (self.pagezero_seg_index) |index| + self.segments.items[index].vmaddr + self.segments.items[index].vmsize + else + 0; + vmaddr += headerpad; + var fileoff = headerpad; + + const page_size = self.getPageSize(); + const slice = self.sections.slice(); + + var next_seg_id: u8 = if (self.pagezero_seg_index) |index| index + 1 else 0; + for (slice.items(.header), slice.items(.segment_id)) |*header, seg_id| { + if (seg_id != next_seg_id) { + vmaddr = mem.alignForward(u64, vmaddr, page_size); + fileoff = mem.alignForward(u32, fileoff, page_size); + } + + const alignment = try math.powi(u32, 2, header.@"align"); + + vmaddr = mem.alignForward(u64, vmaddr, alignment); + header.addr = vmaddr; + vmaddr += header.size; + + if (!header.isZerofill()) { + fileoff = mem.alignForward(u32, fileoff, alignment); + header.offset = fileoff; + fileoff += @intCast(header.size); + } + + next_seg_id = seg_id; + } +} + +fn allocateSegments(self: *MachO) void { + const page_size = self.getPageSize(); + var vmaddr = if (self.pagezero_seg_index) |index| + self.segments.items[index].vmaddr + self.segments.items[index].vmsize + else + 0; + var fileoff: u64 = 0; + const index = if (self.pagezero_seg_index) |index| index + 1 else 0; + + const slice = self.sections.slice(); + var next_sect_id: u8 = 0; + for (self.segments.items[index..], index..) |*seg, seg_id| { + seg.vmaddr = vmaddr; + seg.fileoff = fileoff; + + for ( + slice.items(.header)[next_sect_id..], + slice.items(.segment_id)[next_sect_id..], + ) |header, sid| { + if (seg_id != sid) break; + + vmaddr = header.addr + header.size; + if (!header.isZerofill()) { + fileoff = header.offset + header.size; + } + + next_sect_id += 1; + } + + vmaddr = mem.alignForward(u64, vmaddr, page_size); + fileoff = mem.alignForward(u64, fileoff, page_size); + + seg.vmsize = vmaddr - seg.vmaddr; + seg.filesize = fileoff - seg.fileoff; + } +} + +pub fn allocateAtoms(self: *MachO) void { + const slice = self.sections.slice(); + for (slice.items(.header), slice.items(.atoms)) |header, atoms| { + if (atoms.items.len == 0) continue; + for (atoms.items) |atom_index| { + const atom = self.getAtom(atom_index).?; + assert(atom.flags.alive); + atom.value += header.addr; + } + } + + for (self.thunks.items) |*thunk| { + const header = self.sections.items(.header)[thunk.out_n_sect]; + thunk.value += header.addr; + } +} + +fn allocateSyntheticSymbols(self: *MachO) void { + const text_seg = self.getTextSegment(); + + if (self.mh_execute_header_index) |index| { + const global = self.getSymbol(index); + global.value = text_seg.vmaddr; + } + + if (self.data_sect_index) |idx| { + const sect = self.sections.items(.header)[idx]; + for (&[_]?Symbol.Index{ + self.dso_handle_index, + self.mh_dylib_header_index, + self.dyld_private_index, + }) |maybe_index| { + if (maybe_index) |index| { + const global = self.getSymbol(index); + global.value = sect.addr; + global.out_n_sect = idx; + } + } + } + + for (self.boundary_symbols.items) |sym_index| { + const sym = self.getSymbol(sym_index); + const name = sym.getName(self); + + sym.flags.@"export" = false; + sym.value = text_seg.vmaddr; + + if (mem.startsWith(u8, name, "segment$start$")) { + const segname = name["segment$start$".len..]; + if (self.getSegmentByName(segname)) |seg_id| { + const seg = self.segments.items[seg_id]; + sym.value = seg.vmaddr; + } + } else if (mem.startsWith(u8, name, "segment$stop$")) { + const segname = name["segment$stop$".len..]; + if (self.getSegmentByName(segname)) |seg_id| { + const seg = self.segments.items[seg_id]; + sym.value = seg.vmaddr + seg.vmsize; + } + } else if (mem.startsWith(u8, name, "section$start$")) { + const actual_name = name["section$start$".len..]; + const sep = mem.indexOfScalar(u8, actual_name, '$').?; // TODO error rather than a panic + const segname = actual_name[0..sep]; + const sectname = actual_name[sep + 1 ..]; + if (self.getSectionByName(segname, sectname)) |sect_id| { + const sect = self.sections.items(.header)[sect_id]; + sym.value = sect.addr; + sym.out_n_sect = sect_id; + } + } else if (mem.startsWith(u8, name, "section$stop$")) { + const actual_name = name["section$stop$".len..]; + const sep = mem.indexOfScalar(u8, actual_name, '$').?; // TODO error rather than a panic + const segname = actual_name[0..sep]; + const sectname = actual_name[sep + 1 ..]; + if (self.getSectionByName(segname, sectname)) |sect_id| { + const sect = self.sections.items(.header)[sect_id]; + sym.value = sect.addr + sect.size; + sym.out_n_sect = sect_id; + } + } else unreachable; + } + + if (self.objc_stubs.symbols.items.len > 0) { + const addr = self.sections.items(.header)[self.objc_stubs_sect_index.?].addr; + + for (self.objc_stubs.symbols.items, 0..) |sym_index, idx| { + const sym = self.getSymbol(sym_index); + sym.value = addr + idx * ObjcStubsSection.entrySize(self.getTarget().cpu.arch); + sym.out_n_sect = self.objc_stubs_sect_index.?; + } + } +} + fn shrinkAtom(self: *MachO, atom_index: Atom.Index, new_block_size: u64) void { _ = self; _ = atom_index; @@ -2952,8 +3122,8 @@ pub const Platform = struct { pub fn isBuildVersionCompatible(plat: Platform) bool { inline for (supported_platforms) |sup_plat| { - if (sup_plat[0] == plat.platform) { - return sup_plat[1] <= plat.version.value; + if (sup_plat[0] == plat.os_tag and sup_plat[1] == plat.abi) { + return sup_plat[2] <= plat.toAppleVersion(); } } return false; diff --git a/src/link/MachO/load_commands.zig b/src/link/MachO/load_commands.zig index 725bd4291f..66b838f95c 100644 --- a/src/link/MachO/load_commands.zig +++ b/src/link/MachO/load_commands.zig @@ -18,7 +18,6 @@ fn calcInstallNameLen(cmd_size: u64, name: []const u8, assume_max_path_len: bool } pub fn calcLoadCommandsSize(macho_file: *MachO, assume_max_path_len: bool) u32 { - const options = &macho_file.options; var sizeofcmds: u64 = 0; // LC_SEGMENT_64 @@ -44,14 +43,14 @@ pub fn calcLoadCommandsSize(macho_file: *MachO, assume_max_path_len: bool) u32 { false, ); // LC_MAIN - if (!options.dylib) { + if (!macho_file.base.isDynLib()) { sizeofcmds += @sizeOf(macho.entry_point_command); } // LC_ID_DYLIB - if (options.dylib) { + if (macho_file.base.isDynLib()) { sizeofcmds += blk: { - const emit = options.emit; - const install_name = options.install_name orelse emit.sub_path; + const emit = macho_file.base.emit; + const install_name = macho_file.install_name orelse emit.sub_path; break :blk calcInstallNameLen( @sizeOf(macho.dylib_command), install_name, @@ -61,7 +60,7 @@ pub fn calcLoadCommandsSize(macho_file: *MachO, assume_max_path_len: bool) u32 { } // LC_RPATH { - for (options.rpath_list) |rpath| { + for (macho_file.rpath_table.keys()) |rpath| { sizeofcmds += calcInstallNameLen( @sizeOf(macho.rpath_command), rpath, @@ -71,14 +70,12 @@ pub fn calcLoadCommandsSize(macho_file: *MachO, assume_max_path_len: bool) u32 { } // LC_SOURCE_VERSION sizeofcmds += @sizeOf(macho.source_version_command); - if (options.platform) |platform| { - if (platform.isBuildVersionCompatible()) { - // LC_BUILD_VERSION - sizeofcmds += @sizeOf(macho.build_version_command) + @sizeOf(macho.build_tool_version); - } else { - // LC_VERSION_MIN_* - sizeofcmds += @sizeOf(macho.version_min_command); - } + if (macho_file.platform.isBuildVersionCompatible()) { + // LC_BUILD_VERSION + sizeofcmds += @sizeOf(macho.build_version_command) + @sizeOf(macho.build_tool_version); + } else { + // LC_VERSION_MIN_* + sizeofcmds += @sizeOf(macho.version_min_command); } // LC_UUID sizeofcmds += @sizeOf(macho.uuid_command); @@ -134,11 +131,10 @@ pub fn calcLoadCommandsSizeObject(macho_file: *MachO) u32 { } pub fn calcMinHeaderPadSize(macho_file: *MachO) u32 { - const options = &macho_file.options; - var padding: u32 = calcLoadCommandsSize(macho_file, false) + (options.headerpad orelse 0); + var padding: u32 = calcLoadCommandsSize(macho_file, false) + (macho_file.headerpad_size orelse 0); log.debug("minimum requested headerpad size 0x{x}", .{padding + @sizeOf(macho.mach_header_64)}); - if (options.headerpad_max_install_names) { + if (macho_file.headerpad_max_install_names) { const min_headerpad_size: u32 = calcLoadCommandsSize(macho_file, true); log.debug("headerpad_max_install_names minimum headerpad size 0x{x}", .{ min_headerpad_size + @sizeOf(macho.mach_header_64), From 3968aea8ec98277bcc5b3c26beb5592f26b1a9fd Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Fri, 12 Jan 2024 00:05:17 +0100 Subject: [PATCH 023/133] macho: write to file --- src/link/MachO.zig | 700 +++++++++++++++++++++++++++++- src/link/MachO/Atom.zig | 15 +- src/link/MachO/CodeSignature.zig | 4 +- src/link/MachO/Object.zig | 6 +- src/link/MachO/Symbol.zig | 11 +- src/link/MachO/dyld_info/bind.zig | 8 +- src/link/MachO/eh_frame.zig | 4 +- src/link/MachO/load_commands.zig | 61 ++- src/link/MachO/uuid.zig | 20 +- 9 files changed, 773 insertions(+), 56 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index c87f2f7fd5..af5d165665 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -540,7 +540,63 @@ pub fn flushModule(self: *MachO, arena: Allocator, prog_node: *std.Progress.Node state_log.debug("{}", .{self.dumpState()}); - @panic("TODO"); + try self.initDyldInfoSections(); + self.writeAtoms() catch |err| switch (err) { + error.ResolveFailed => return error.FlushFailure, + else => |e| { + try self.reportUnexpectedError("unexpected error while resolving relocations", .{}); + return e; + }, + }; + try self.writeUnwindInfo(); + try self.finalizeDyldInfoSections(); + try self.writeSyntheticSections(); + + var off = math.cast(u32, self.getLinkeditSegment().fileoff) orelse return error.Overflow; + off = try self.writeDyldInfoSections(off); + off = mem.alignForward(u32, off, @alignOf(u64)); + off = try self.writeFunctionStarts(off); + off = mem.alignForward(u32, off, @alignOf(u64)); + off = try self.writeDataInCode(self.getTextSegment().vmaddr, off); + try self.calcSymtabSize(); + off = mem.alignForward(u32, off, @alignOf(u64)); + off = try self.writeSymtab(off); + off = mem.alignForward(u32, off, @alignOf(u32)); + off = try self.writeIndsymtab(off); + off = mem.alignForward(u32, off, @alignOf(u64)); + off = try self.writeStrtab(off); + + self.getLinkeditSegment().filesize = off - self.getLinkeditSegment().fileoff; + + var codesig: ?CodeSignature = if (self.requiresCodeSig()) blk: { + // Preallocate space for the code signature. + // We need to do this at this stage so that we have the load commands with proper values + // written out to the file. + // The most important here is to have the correct vm and filesize of the __LINKEDIT segment + // where the code signature goes into. + var codesig = CodeSignature.init(self.getPageSize()); + codesig.code_directory.ident = self.base.emit.sub_path; + if (self.entitlements) |path| try codesig.addEntitlements(gpa, path); + try self.writeCodeSignaturePadding(&codesig); + break :blk codesig; + } else null; + defer if (codesig) |*csig| csig.deinit(gpa); + + self.getLinkeditSegment().vmsize = mem.alignForward( + u64, + self.getLinkeditSegment().filesize, + self.getPageSize(), + ); + + const ncmds, const sizeofcmds, const uuid_cmd_offset = try self.writeLoadCommands(); + try self.writeHeader(ncmds, sizeofcmds); + try self.writeUuid(uuid_cmd_offset, self.requiresCodeSig()); + + if (codesig) |*csig| { + try self.writeCodeSignature(csig); // code signing always comes last + const emit = self.base.emit; + try invalidateKernelCache(emit.directory.handle, emit.sub_path); + } } /// --verbose-link output @@ -2186,6 +2242,646 @@ fn allocateSyntheticSymbols(self: *MachO) void { } } +fn initDyldInfoSections(self: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + + const gpa = self.base.comp.gpa; + + if (self.got_sect_index != null) try self.got.addDyldRelocs(self); + if (self.tlv_ptr_sect_index != null) try self.tlv_ptr.addDyldRelocs(self); + if (self.la_symbol_ptr_sect_index != null) try self.la_symbol_ptr.addDyldRelocs(self); + try self.initExportTrie(); + + var nrebases: usize = 0; + var nbinds: usize = 0; + var nweak_binds: usize = 0; + for (self.objects.items) |index| { + const object = self.getFile(index).?.object; + nrebases += object.num_rebase_relocs; + nbinds += object.num_bind_relocs; + nweak_binds += object.num_weak_bind_relocs; + } + try self.rebase.entries.ensureUnusedCapacity(gpa, nrebases); + try self.bind.entries.ensureUnusedCapacity(gpa, nbinds); + try self.weak_bind.entries.ensureUnusedCapacity(gpa, nweak_binds); +} + +fn initExportTrie(self: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + + const gpa = self.base.comp.gpa; + try self.export_trie.init(gpa); + + const seg = self.getTextSegment(); + for (self.objects.items) |index| { + for (self.getFile(index).?.getSymbols()) |sym_index| { + const sym = self.getSymbol(sym_index); + if (!sym.flags.@"export") continue; + if (sym.getAtom(self)) |atom| if (!atom.flags.alive) continue; + if (sym.getFile(self).?.getIndex() != index) continue; + var flags: u64 = if (sym.flags.abs) + macho.EXPORT_SYMBOL_FLAGS_KIND_ABSOLUTE + else if (sym.flags.tlv) + macho.EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL + else + macho.EXPORT_SYMBOL_FLAGS_KIND_REGULAR; + if (sym.flags.weak) { + flags |= macho.EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION; + self.weak_defines = true; + self.binds_to_weak = true; + } + try self.export_trie.put(gpa, .{ + .name = sym.getName(self), + .vmaddr_offset = sym.getAddress(.{ .stubs = false }, self) - seg.vmaddr, + .export_flags = flags, + }); + } + } + + if (self.mh_execute_header_index) |index| { + const sym = self.getSymbol(index); + try self.export_trie.put(gpa, .{ + .name = sym.getName(self), + .vmaddr_offset = sym.getAddress(.{}, self) - seg.vmaddr, + .export_flags = macho.EXPORT_SYMBOL_FLAGS_KIND_REGULAR, + }); + } +} + +fn writeAtoms(self: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + + const gpa = self.base.comp.gpa; + const cpu_arch = self.getTarget().cpu.arch; + const slice = self.sections.slice(); + + var has_resolve_error = false; + for (slice.items(.header), slice.items(.atoms)) |header, atoms| { + if (atoms.items.len == 0) continue; + if (header.isZerofill()) continue; + + const buffer = try gpa.alloc(u8, header.size); + defer gpa.free(buffer); + const padding_byte: u8 = if (header.isCode() and cpu_arch == .x86_64) 0xcc else 0; + @memset(buffer, padding_byte); + + for (atoms.items) |atom_index| { + const atom = self.getAtom(atom_index).?; + assert(atom.flags.alive); + const off = atom.value - header.addr; + atom.resolveRelocs(self, buffer[off..][0..atom.size]) catch |err| switch (err) { + error.ResolveFailed => has_resolve_error = true, + else => |e| return e, + }; + } + + try self.base.file.?.pwriteAll(buffer, header.offset); + } + + for (self.thunks.items) |thunk| { + const header = slice.items(.header)[thunk.out_n_sect]; + const offset = thunk.value - header.addr + header.offset; + const buffer = try gpa.alloc(u8, thunk.size()); + defer gpa.free(buffer); + var stream = std.io.fixedBufferStream(buffer); + try thunk.write(self, stream.writer()); + try self.base.file.?.pwriteAll(buffer, offset); + } + + if (has_resolve_error) return error.ResolveFailed; +} + +fn writeUnwindInfo(self: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + + const gpa = self.base.comp.gpa; + + if (self.eh_frame_sect_index) |index| { + const header = self.sections.items(.header)[index]; + const buffer = try gpa.alloc(u8, header.size); + defer gpa.free(buffer); + eh_frame.write(self, buffer); + try self.base.file.?.pwriteAll(buffer, header.offset); + } + + if (self.unwind_info_sect_index) |index| { + const header = self.sections.items(.header)[index]; + const buffer = try gpa.alloc(u8, header.size); + defer gpa.free(buffer); + try self.unwind_info.write(self, buffer); + try self.base.file.?.pwriteAll(buffer, header.offset); + } +} + +fn finalizeDyldInfoSections(self: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + const gpa = self.base.comp.gpa; + + try self.rebase.finalize(gpa); + try self.bind.finalize(gpa, self); + try self.weak_bind.finalize(gpa, self); + try self.lazy_bind.finalize(gpa, self); + try self.export_trie.finalize(gpa); +} + +fn writeSyntheticSections(self: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + + const gpa = self.base.comp.gpa; + + if (self.got_sect_index) |sect_id| { + const header = self.sections.items(.header)[sect_id]; + var buffer = try std.ArrayList(u8).initCapacity(gpa, header.size); + defer buffer.deinit(); + try self.got.write(self, buffer.writer()); + assert(buffer.items.len == header.size); + try self.base.file.?.pwriteAll(buffer.items, header.offset); + } + + if (self.stubs_sect_index) |sect_id| { + const header = self.sections.items(.header)[sect_id]; + var buffer = try std.ArrayList(u8).initCapacity(gpa, header.size); + defer buffer.deinit(); + try self.stubs.write(self, buffer.writer()); + assert(buffer.items.len == header.size); + try self.base.file.?.pwriteAll(buffer.items, header.offset); + } + + if (self.stubs_helper_sect_index) |sect_id| { + const header = self.sections.items(.header)[sect_id]; + var buffer = try std.ArrayList(u8).initCapacity(gpa, header.size); + defer buffer.deinit(); + try self.stubs_helper.write(self, buffer.writer()); + assert(buffer.items.len == header.size); + try self.base.file.?.pwriteAll(buffer.items, header.offset); + } + + if (self.la_symbol_ptr_sect_index) |sect_id| { + const header = self.sections.items(.header)[sect_id]; + var buffer = try std.ArrayList(u8).initCapacity(gpa, header.size); + defer buffer.deinit(); + try self.la_symbol_ptr.write(self, buffer.writer()); + assert(buffer.items.len == header.size); + try self.base.file.?.pwriteAll(buffer.items, header.offset); + } + + if (self.tlv_ptr_sect_index) |sect_id| { + const header = self.sections.items(.header)[sect_id]; + var buffer = try std.ArrayList(u8).initCapacity(gpa, header.size); + defer buffer.deinit(); + try self.tlv_ptr.write(self, buffer.writer()); + assert(buffer.items.len == header.size); + try self.base.file.?.pwriteAll(buffer.items, header.offset); + } + + if (self.objc_stubs_sect_index) |sect_id| { + const header = self.sections.items(.header)[sect_id]; + var buffer = try std.ArrayList(u8).initCapacity(gpa, header.size); + defer buffer.deinit(); + try self.objc_stubs.write(self, buffer.writer()); + assert(buffer.items.len == header.size); + try self.base.file.?.pwriteAll(buffer.items, header.offset); + } +} + +fn writeDyldInfoSections(self: *MachO, off: u32) !u32 { + const tracy = trace(@src()); + defer tracy.end(); + + const gpa = self.base.comp.gpa; + const cmd = &self.dyld_info_cmd; + var needed_size: u32 = 0; + + cmd.rebase_off = needed_size; + cmd.rebase_size = mem.alignForward(u32, @intCast(self.rebase.size()), @alignOf(u64)); + needed_size += cmd.rebase_size; + + cmd.bind_off = needed_size; + cmd.bind_size = mem.alignForward(u32, @intCast(self.bind.size()), @alignOf(u64)); + needed_size += cmd.bind_size; + + cmd.weak_bind_off = needed_size; + cmd.weak_bind_size = mem.alignForward(u32, @intCast(self.weak_bind.size()), @alignOf(u64)); + needed_size += cmd.weak_bind_size; + + cmd.lazy_bind_off = needed_size; + cmd.lazy_bind_size = mem.alignForward(u32, @intCast(self.lazy_bind.size()), @alignOf(u64)); + needed_size += cmd.lazy_bind_size; + + cmd.export_off = needed_size; + cmd.export_size = mem.alignForward(u32, @intCast(self.export_trie.size), @alignOf(u64)); + needed_size += cmd.export_size; + + const buffer = try gpa.alloc(u8, needed_size); + defer gpa.free(buffer); + @memset(buffer, 0); + + var stream = std.io.fixedBufferStream(buffer); + const writer = stream.writer(); + + try self.rebase.write(writer); + try stream.seekTo(cmd.bind_off); + try self.bind.write(writer); + try stream.seekTo(cmd.weak_bind_off); + try self.weak_bind.write(writer); + try stream.seekTo(cmd.lazy_bind_off); + try self.lazy_bind.write(writer); + try stream.seekTo(cmd.export_off); + try self.export_trie.write(writer); + + cmd.rebase_off += off; + cmd.bind_off += off; + cmd.weak_bind_off += off; + cmd.lazy_bind_off += off; + cmd.export_off += off; + + try self.base.file.?.pwriteAll(buffer, off); + + return off + needed_size; +} + +fn writeFunctionStarts(self: *MachO, off: u32) !u32 { + // TODO actually write it out + const cmd = &self.function_starts_cmd; + cmd.dataoff = off; + return off; +} + +pub fn writeDataInCode(self: *MachO, base_address: u64, off: u32) !u32 { + const cmd = &self.data_in_code_cmd; + cmd.dataoff = off; + + const gpa = self.base.comp.gpa; + var dices = std.ArrayList(macho.data_in_code_entry).init(gpa); + defer dices.deinit(); + + for (self.objects.items) |index| { + const object = self.getFile(index).?.object; + const in_dices = object.getDataInCode(); + + try dices.ensureUnusedCapacity(in_dices.len); + + var next_dice: usize = 0; + for (object.atoms.items) |atom_index| { + if (next_dice >= in_dices.len) break; + const atom = self.getAtom(atom_index) orelse continue; + const start_off = atom.getInputAddress(self); + const end_off = start_off + atom.size; + const start_dice = next_dice; + + if (end_off < in_dices[next_dice].offset) continue; + + while (next_dice < in_dices.len and + in_dices[next_dice].offset < end_off) : (next_dice += 1) + {} + + if (atom.flags.alive) for (in_dices[start_dice..next_dice]) |dice| { + dices.appendAssumeCapacity(.{ + .offset = @intCast(atom.value + dice.offset - start_off - base_address), + .length = dice.length, + .kind = dice.kind, + }); + }; + } + } + + const needed_size = math.cast(u32, dices.items.len * @sizeOf(macho.data_in_code_entry)) orelse return error.Overflow; + cmd.datasize = needed_size; + + try self.base.file.?.pwriteAll(mem.sliceAsBytes(dices.items), cmd.dataoff); + + return off + needed_size; +} + +pub fn calcSymtabSize(self: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + const gpa = self.base.comp.gpa; + + var nlocals: u32 = 0; + var nstabs: u32 = 0; + var nexports: u32 = 0; + var nimports: u32 = 0; + var strsize: u32 = 0; + + var files = std.ArrayList(File.Index).init(gpa); + defer files.deinit(); + try files.ensureTotalCapacityPrecise(self.objects.items.len + self.dylibs.items.len + 1); + for (self.objects.items) |index| files.appendAssumeCapacity(index); + for (self.dylibs.items) |index| files.appendAssumeCapacity(index); + if (self.internal_object) |index| files.appendAssumeCapacity(index); + + for (files.items) |index| { + const file = self.getFile(index).?; + const ctx = switch (file) { + inline else => |x| &x.output_symtab_ctx, + }; + ctx.ilocal = nlocals; + ctx.istab = nstabs; + ctx.iexport = nexports; + ctx.iimport = nimports; + try file.calcSymtabSize(self); + nlocals += ctx.nlocals; + nstabs += ctx.nstabs; + nexports += ctx.nexports; + nimports += ctx.nimports; + strsize += ctx.strsize; + } + + for (files.items) |index| { + const file = self.getFile(index).?; + const ctx = switch (file) { + inline else => |x| &x.output_symtab_ctx, + }; + ctx.istab += nlocals; + ctx.iexport += nlocals + nstabs; + ctx.iimport += nlocals + nstabs + nexports; + } + + { + const cmd = &self.symtab_cmd; + cmd.nsyms = nlocals + nstabs + nexports + nimports; + cmd.strsize = strsize + 1; + } + + { + const cmd = &self.dysymtab_cmd; + cmd.ilocalsym = 0; + cmd.nlocalsym = nlocals + nstabs; + cmd.iextdefsym = nlocals + nstabs; + cmd.nextdefsym = nexports; + cmd.iundefsym = nlocals + nstabs + nexports; + cmd.nundefsym = nimports; + } +} + +pub fn writeSymtab(self: *MachO, off: u32) !u32 { + const tracy = trace(@src()); + defer tracy.end(); + const gpa = self.base.comp.gpa; + const cmd = &self.symtab_cmd; + cmd.symoff = off; + + try self.symtab.resize(gpa, cmd.nsyms); + try self.strtab.ensureUnusedCapacity(gpa, cmd.strsize - 1); + + for (self.objects.items) |index| { + self.getFile(index).?.writeSymtab(self); + } + for (self.dylibs.items) |index| { + self.getFile(index).?.writeSymtab(self); + } + if (self.getInternalObject()) |internal| { + internal.writeSymtab(self); + } + + assert(self.strtab.items.len == cmd.strsize); + + try self.base.file.?.pwriteAll(mem.sliceAsBytes(self.symtab.items), cmd.symoff); + + return off + cmd.nsyms * @sizeOf(macho.nlist_64); +} + +fn writeIndsymtab(self: *MachO, off: u32) !u32 { + const gpa = self.base.comp.gpa; + const cmd = &self.dysymtab_cmd; + cmd.indirectsymoff = off; + cmd.nindirectsyms = self.indsymtab.nsyms(self); + + const needed_size = cmd.nindirectsyms * @sizeOf(u32); + var buffer = try std.ArrayList(u8).initCapacity(gpa, needed_size); + defer buffer.deinit(); + try self.indsymtab.write(self, buffer.writer()); + + try self.base.file.?.pwriteAll(buffer.items, cmd.indirectsymoff); + assert(buffer.items.len == needed_size); + + return off + needed_size; +} + +pub fn writeStrtab(self: *MachO, off: u32) !u32 { + const cmd = &self.symtab_cmd; + cmd.stroff = off; + try self.base.file.?.pwriteAll(self.strtab.items, cmd.stroff); + return off + cmd.strsize; +} + +fn writeLoadCommands(self: *MachO) !struct { usize, usize, usize } { + const gpa = self.base.comp.gpa; + const needed_size = load_commands.calcLoadCommandsSize(self, false); + const buffer = try gpa.alloc(u8, needed_size); + defer gpa.free(buffer); + + var stream = std.io.fixedBufferStream(buffer); + var cwriter = std.io.countingWriter(stream.writer()); + const writer = cwriter.writer(); + + var ncmds: usize = 0; + + // Segment and section load commands + { + const slice = self.sections.slice(); + var sect_id: usize = 0; + for (self.segments.items) |seg| { + try writer.writeStruct(seg); + for (slice.items(.header)[sect_id..][0..seg.nsects]) |header| { + try writer.writeStruct(header); + } + sect_id += seg.nsects; + } + ncmds += self.segments.items.len; + } + + try writer.writeStruct(self.dyld_info_cmd); + ncmds += 1; + try writer.writeStruct(self.function_starts_cmd); + ncmds += 1; + try writer.writeStruct(self.data_in_code_cmd); + ncmds += 1; + try writer.writeStruct(self.symtab_cmd); + ncmds += 1; + try writer.writeStruct(self.dysymtab_cmd); + ncmds += 1; + try load_commands.writeDylinkerLC(writer); + ncmds += 1; + + if (self.entry_index) |global_index| { + const sym = self.getSymbol(global_index); + const seg = self.getTextSegment(); + const entryoff: u32 = if (sym.getFile(self) == null) + 0 + else + @as(u32, @intCast(sym.getAddress(.{ .stubs = true }, self) - seg.vmaddr)); + try writer.writeStruct(macho.entry_point_command{ + .entryoff = entryoff, + .stacksize = self.base.stack_size, + }); + ncmds += 1; + } + + if (self.base.isDynLib()) { + try load_commands.writeDylibIdLC(self, writer); + ncmds += 1; + } + + try load_commands.writeRpathLCs(self.rpath_table.keys(), writer); + ncmds += self.rpath_table.keys().len; + + try writer.writeStruct(macho.source_version_command{ .version = 0 }); + ncmds += 1; + + if (self.platform.isBuildVersionCompatible()) { + try load_commands.writeBuildVersionLC(self.platform, self.sdk_version, writer); + ncmds += 1; + } else { + try load_commands.writeVersionMinLC(self.platform, self.sdk_version, writer); + ncmds += 1; + } + + const uuid_cmd_offset = @sizeOf(macho.mach_header_64) + cwriter.bytes_written; + try writer.writeStruct(self.uuid_cmd); + ncmds += 1; + + for (self.dylibs.items) |index| { + const dylib = self.getFile(index).?.dylib; + assert(dylib.isAlive(self)); + const dylib_id = dylib.id.?; + try load_commands.writeDylibLC(.{ + .cmd = if (dylib.weak) + .LOAD_WEAK_DYLIB + else if (dylib.reexport) + .REEXPORT_DYLIB + else + .LOAD_DYLIB, + .name = dylib_id.name, + .timestamp = dylib_id.timestamp, + .current_version = dylib_id.current_version, + .compatibility_version = dylib_id.compatibility_version, + }, writer); + ncmds += 1; + } + + if (self.requiresCodeSig()) { + try writer.writeStruct(self.codesig_cmd); + ncmds += 1; + } + + assert(cwriter.bytes_written == needed_size); + + try self.base.file.?.pwriteAll(buffer, @sizeOf(macho.mach_header_64)); + + return .{ ncmds, buffer.len, uuid_cmd_offset }; +} + +fn writeHeader(self: *MachO, ncmds: usize, sizeofcmds: usize) !void { + var header: macho.mach_header_64 = .{}; + header.flags = macho.MH_NOUNDEFS | macho.MH_DYLDLINK; + + // TODO: if (self.options.namespace == .two_level) { + header.flags |= macho.MH_TWOLEVEL; + // } + + switch (self.getTarget().cpu.arch) { + .aarch64 => { + header.cputype = macho.CPU_TYPE_ARM64; + header.cpusubtype = macho.CPU_SUBTYPE_ARM_ALL; + }, + .x86_64 => { + header.cputype = macho.CPU_TYPE_X86_64; + header.cpusubtype = macho.CPU_SUBTYPE_X86_64_ALL; + }, + else => {}, + } + + if (self.base.isDynLib()) { + header.filetype = macho.MH_DYLIB; + } else { + header.filetype = macho.MH_EXECUTE; + header.flags |= macho.MH_PIE; + } + + const has_reexports = for (self.dylibs.items) |index| { + if (self.getFile(index).?.dylib.reexport) break true; + } else false; + if (!has_reexports) { + header.flags |= macho.MH_NO_REEXPORTED_DYLIBS; + } + + if (self.has_tlv) { + header.flags |= macho.MH_HAS_TLV_DESCRIPTORS; + } + if (self.binds_to_weak) { + header.flags |= macho.MH_BINDS_TO_WEAK; + } + if (self.weak_defines) { + header.flags |= macho.MH_WEAK_DEFINES; + } + + header.ncmds = @intCast(ncmds); + header.sizeofcmds = @intCast(sizeofcmds); + + log.debug("writing Mach-O header {}", .{header}); + + try self.base.file.?.pwriteAll(mem.asBytes(&header), 0); +} + +fn writeUuid(self: *MachO, uuid_cmd_offset: usize, has_codesig: bool) !void { + const file_size = if (!has_codesig) blk: { + const seg = self.getLinkeditSegment(); + break :blk seg.fileoff + seg.filesize; + } else self.codesig_cmd.dataoff; + try calcUuid(self.base.comp, self.base.file.?, file_size, &self.uuid_cmd.uuid); + const offset = uuid_cmd_offset + @sizeOf(macho.load_command); + try self.base.file.?.pwriteAll(&self.uuid_cmd.uuid, offset); +} + +pub fn writeCodeSignaturePadding(self: *MachO, code_sig: *CodeSignature) !void { + const seg = self.getLinkeditSegment(); + // Code signature data has to be 16-bytes aligned for Apple tools to recognize the file + // https://github.com/opensource-apple/cctools/blob/fdb4825f303fd5c0751be524babd32958181b3ed/libstuff/checkout.c#L271 + const offset = mem.alignForward(u64, seg.fileoff + seg.filesize, 16); + const needed_size = code_sig.estimateSize(offset); + seg.filesize = offset + needed_size - seg.fileoff; + seg.vmsize = mem.alignForward(u64, seg.filesize, self.getPageSize()); + log.debug("writing code signature padding from 0x{x} to 0x{x}", .{ offset, offset + needed_size }); + // Pad out the space. We need to do this to calculate valid hashes for everything in the file + // except for code signature data. + try self.base.file.?.pwriteAll(&[_]u8{0}, offset + needed_size - 1); + + self.codesig_cmd.dataoff = @as(u32, @intCast(offset)); + self.codesig_cmd.datasize = @as(u32, @intCast(needed_size)); +} + +pub fn writeCodeSignature(self: *MachO, code_sig: *CodeSignature) !void { + const seg = self.getTextSegment(); + const offset = self.codesig_cmd.dataoff; + + var buffer = std.ArrayList(u8).init(self.base.comp.gpa); + defer buffer.deinit(); + try buffer.ensureTotalCapacityPrecise(code_sig.size()); + try code_sig.writeAdhocSignature(self, .{ + .file = self.base.file.?, + .exec_seg_base = seg.fileoff, + .exec_seg_limit = seg.filesize, + .file_size = offset, + .dylib = self.base.isDynLib(), + }, buffer.writer()); + assert(buffer.items.len == code_sig.size()); + + log.debug("writing code signature from 0x{x} to 0x{x}", .{ + offset, + offset + buffer.items.len, + }); + + try self.base.file.?.pwriteAll(buffer.items, offset); +} + fn shrinkAtom(self: *MachO, atom_index: Atom.Index, new_block_size: u64) void { _ = self; _ = atom_index; @@ -3194,7 +3890,7 @@ const supported_platforms = [_]SupportedPlatforms{ }; // zig fmt: on -inline fn semanticVersionToAppleVersion(version: std.SemanticVersion) u32 { +pub inline fn semanticVersionToAppleVersion(version: std.SemanticVersion) u32 { const major = version.major; const minor = version.minor; const patch = version.patch; diff --git a/src/link/MachO/Atom.zig b/src/link/MachO/Atom.zig index cb5feef263..1e723d337b 100644 --- a/src/link/MachO/Atom.zig +++ b/src/link/MachO/Atom.zig @@ -311,13 +311,16 @@ pub fn resolveRelocs(self: Atom, macho_file: *MachO, buffer: []u8) !void { try stream.seekTo(rel_offset); self.resolveRelocInner(rel, subtractor, buffer, macho_file, stream.writer()) catch |err| { switch (err) { - error.RelaxFail => macho_file.base.fatal( - "{}: {s}: 0x{x}: failed to relax relocation: in {s}", - .{ file.fmtPath(), name, rel.offset, @tagName(rel.type) }, - ), + error.RelaxFail => { + try macho_file.reportParseError2( + file.getIndex(), + "{s}: 0x{x}: failed to relax relocation: in {s}", + .{ name, rel.offset, @tagName(rel.type) }, + ); + return error.ResolveFailed; + }, else => |e| return e, } - return error.ResolveFailed; }; } } @@ -338,7 +341,7 @@ fn resolveRelocInner( macho_file: *MachO, writer: anytype, ) ResolveError!void { - const cpu_arch = macho_file.options.cpu_arch.?; + const cpu_arch = macho_file.getTarget().cpu.arch; const rel_offset = rel.offset - self.off; const seg_id = macho_file.sections.items(.segment_id)[self.out_n_sect]; const seg = macho_file.segments.items[seg_id]; diff --git a/src/link/MachO/CodeSignature.zig b/src/link/MachO/CodeSignature.zig index 54d83d4530..045bad712b 100644 --- a/src/link/MachO/CodeSignature.zig +++ b/src/link/MachO/CodeSignature.zig @@ -264,7 +264,7 @@ pub fn writeAdhocSignature( opts: WriteOpts, writer: anytype, ) !void { - const allocator = macho_file.base.allocator; + const allocator = macho_file.base.comp.gpa; var header: macho.SuperBlob = .{ .magic = macho.CSMAGIC_EMBEDDED_SIGNATURE, @@ -287,7 +287,7 @@ pub fn writeAdhocSignature( self.code_directory.inner.nCodeSlots = total_pages; // Calculate hash for each page (in file) and write it to the buffer - var hasher = Hasher(Sha256){ .allocator = allocator, .thread_pool = macho_file.base.thread_pool }; + var hasher = Hasher(Sha256){ .allocator = allocator, .thread_pool = macho_file.base.comp.thread_pool }; try hasher.hash(opts.file, self.code_directory.code_slots.items, .{ .chunk_size = self.page_size, .max_file_size = opts.file_size, diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index a6f865901e..a89e75d533 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -1184,7 +1184,8 @@ pub fn calcSymtabSize(self: *Object, macho_file: *MachO) !void { self.output_symtab_ctx.strsize += @as(u32, @intCast(sym.getName(macho_file).len + 1)); } - if (!macho_file.options.strip and self.hasDebugInfo()) self.calcStabsSize(macho_file); + if (macho_file.base.comp.config.debug_format != .strip and self.hasDebugInfo()) + self.calcStabsSize(macho_file); } pub fn calcStabsSize(self: *Object, macho_file: *MachO) void { @@ -1264,7 +1265,8 @@ pub fn writeSymtab(self: Object, macho_file: *MachO) void { sym.setOutputSym(macho_file, out_sym); } - if (!macho_file.options.strip and self.hasDebugInfo()) self.writeStabs(macho_file); + if (macho_file.base.comp.config.debug_format != .strip and self.hasDebugInfo()) + self.writeStabs(macho_file); } pub fn writeStabs(self: *const Object, macho_file: *MachO) void { diff --git a/src/link/MachO/Symbol.zig b/src/link/MachO/Symbol.zig index c3a6d7b54e..9355c0db2c 100644 --- a/src/link/MachO/Symbol.zig +++ b/src/link/MachO/Symbol.zig @@ -230,9 +230,14 @@ pub fn setOutputSym(symbol: Symbol, macho_file: *MachO, out: *macho.nlist_64) vo out.n_value = 0; out.n_desc = 0; - const ord: u16 = if (macho_file.options.namespace == .flat) - @as(u8, @bitCast(macho.BIND_SPECIAL_DYLIB_FLAT_LOOKUP)) - else if (symbol.getDylibOrdinal(macho_file)) |ord| + // TODO: + // const ord: u16 = if (macho_file.options.namespace == .flat) + // @as(u8, @bitCast(macho.BIND_SPECIAL_DYLIB_FLAT_LOOKUP)) + // else if (symbol.getDylibOrdinal(macho_file)) |ord| + // ord + // else + // macho.BIND_SPECIAL_DYLIB_SELF; + const ord: u16 = if (symbol.getDylibOrdinal(macho_file)) |ord| ord else macho.BIND_SPECIAL_DYLIB_SELF; diff --git a/src/link/MachO/dyld_info/bind.zig b/src/link/MachO/dyld_info/bind.zig index 5bc872e277..cee57e1edf 100644 --- a/src/link/MachO/dyld_info/bind.zig +++ b/src/link/MachO/dyld_info/bind.zig @@ -99,10 +99,10 @@ pub const Bind = struct { const ordinal: i16 = ord: { if (sym.flags.interposable) break :ord macho.BIND_SPECIAL_DYLIB_FLAT_LOOKUP; if (sym.flags.import) { - if (ctx.options.namespace == .flat) break :ord macho.BIND_SPECIAL_DYLIB_FLAT_LOOKUP; + // TODO: if (ctx.options.namespace == .flat) break :ord macho.BIND_SPECIAL_DYLIB_FLAT_LOOKUP; if (sym.getDylibOrdinal(ctx)) |ord| break :ord @bitCast(ord); } - if (ctx.options.undefined_treatment == .dynamic_lookup) + if (ctx.undefined_treatment == .dynamic_lookup) break :ord macho.BIND_SPECIAL_DYLIB_FLAT_LOOKUP; break :ord macho.BIND_SPECIAL_DYLIB_SELF; }; @@ -359,10 +359,10 @@ pub const LazyBind = struct { const ordinal: i16 = ord: { if (sym.flags.interposable) break :ord macho.BIND_SPECIAL_DYLIB_FLAT_LOOKUP; if (sym.flags.import) { - if (ctx.options.namespace == .flat) break :ord macho.BIND_SPECIAL_DYLIB_FLAT_LOOKUP; + // TODO: if (ctx.options.namespace == .flat) break :ord macho.BIND_SPECIAL_DYLIB_FLAT_LOOKUP; if (sym.getDylibOrdinal(ctx)) |ord| break :ord @bitCast(ord); } - if (ctx.options.undefined_treatment == .dynamic_lookup) + if (ctx.undefined_treatment == .dynamic_lookup) break :ord macho.BIND_SPECIAL_DYLIB_FLAT_LOOKUP; break :ord macho.BIND_SPECIAL_DYLIB_SELF; }; diff --git a/src/link/MachO/eh_frame.zig b/src/link/MachO/eh_frame.zig index 91a9cafb54..56d81ba93a 100644 --- a/src/link/MachO/eh_frame.zig +++ b/src/link/MachO/eh_frame.zig @@ -374,7 +374,7 @@ pub fn write(macho_file: *MachO, buffer: []u8) void { defer tracy.end(); const sect = macho_file.sections.items(.header)[macho_file.eh_frame_sect_index.?]; - const addend: i64 = switch (macho_file.options.cpu_arch.?) { + const addend: i64 = switch (macho_file.getTarget().cpu.arch) { .x86_64 => 4, else => 0, }; @@ -452,7 +452,7 @@ pub fn writeRelocs(macho_file: *MachO, code: []u8, relocs: *std.ArrayList(macho. const tracy = trace(@src()); defer tracy.end(); - const cpu_arch = macho_file.options.cpu_arch.?; + const cpu_arch = macho_file.getTarget().cpu.arch; const sect = macho_file.sections.items(.header)[macho_file.eh_frame_sect_index.?]; const addend: i64 = switch (cpu_arch) { .x86_64 => 4, diff --git a/src/link/MachO/load_commands.zig b/src/link/MachO/load_commands.zig index 66b838f95c..3c3c53f637 100644 --- a/src/link/MachO/load_commands.zig +++ b/src/link/MachO/load_commands.zig @@ -7,7 +7,6 @@ const mem = std.mem; const Allocator = mem.Allocator; const Dylib = @import("Dylib.zig"); const MachO = @import("../MachO.zig"); -const Options = @import("../MachO.zig").Options; pub const default_dyld_path: [*:0]const u8 = "/usr/lib/dyld"; @@ -200,17 +199,29 @@ pub fn writeDylibLC(ctx: WriteDylibLCCtx, writer: anytype) !void { } } -pub fn writeDylibIdLC(options: *const Options, writer: anytype) !void { - assert(options.dylib); - const emit = options.emit; - const install_name = options.install_name orelse emit.sub_path; - const curr = options.current_version orelse Options.Version.new(1, 0, 0); - const compat = options.compatibility_version orelse Options.Version.new(1, 0, 0); +pub fn writeDylibIdLC(macho_file: *MachO, writer: anytype) !void { + const comp = macho_file.base.comp; + const gpa = comp.gpa; + assert(comp.config.output_mode == .Lib and comp.config.link_mode == .Dynamic); + const emit = macho_file.base.emit; + const install_name = macho_file.install_name orelse + try emit.directory.join(gpa, &.{emit.sub_path}); + defer if (macho_file.install_name == null) gpa.free(install_name); + const curr = comp.version orelse std.SemanticVersion{ + .major = 1, + .minor = 0, + .patch = 0, + }; + const compat = macho_file.compatibility_version orelse std.SemanticVersion{ + .major = 1, + .minor = 0, + .patch = 0, + }; try writeDylibLC(.{ .cmd = .ID_DYLIB, .name = install_name, - .current_version = curr.value, - .compatibility_version = compat.value, + .current_version = @as(u32, @intCast(curr.major << 16 | curr.minor << 8 | curr.patch)), + .compatibility_version = @as(u32, @intCast(compat.major << 16 | compat.minor << 8 | compat.patch)), }, writer); } @@ -235,32 +246,38 @@ pub fn writeRpathLCs(rpaths: []const []const u8, writer: anytype) !void { } } -pub fn writeVersionMinLC(platform: Options.Platform, sdk_version: ?Options.Version, writer: anytype) !void { - const cmd: macho.LC = switch (platform.platform) { - .MACOS => .VERSION_MIN_MACOSX, - .IOS, .IOSSIMULATOR => .VERSION_MIN_IPHONEOS, - .TVOS, .TVOSSIMULATOR => .VERSION_MIN_TVOS, - .WATCHOS, .WATCHOSSIMULATOR => .VERSION_MIN_WATCHOS, +pub fn writeVersionMinLC(platform: MachO.Platform, sdk_version: ?std.SemanticVersion, writer: anytype) !void { + const cmd: macho.LC = switch (platform.os_tag) { + .macos => .VERSION_MIN_MACOSX, + .ios => .VERSION_MIN_IPHONEOS, + .tvos => .VERSION_MIN_TVOS, + .watchos => .VERSION_MIN_WATCHOS, else => unreachable, }; try writer.writeAll(mem.asBytes(&macho.version_min_command{ .cmd = cmd, - .version = platform.version.value, - .sdk = if (sdk_version) |ver| ver.value else platform.version.value, + .version = platform.toAppleVersion(), + .sdk = if (sdk_version) |ver| + MachO.semanticVersionToAppleVersion(ver) + else + platform.toAppleVersion(), })); } -pub fn writeBuildVersionLC(platform: Options.Platform, sdk_version: ?Options.Version, writer: anytype) !void { +pub fn writeBuildVersionLC(platform: MachO.Platform, sdk_version: ?std.SemanticVersion, writer: anytype) !void { const cmdsize = @sizeOf(macho.build_version_command) + @sizeOf(macho.build_tool_version); try writer.writeStruct(macho.build_version_command{ .cmdsize = cmdsize, - .platform = platform.platform, - .minos = platform.version.value, - .sdk = if (sdk_version) |ver| ver.value else platform.version.value, + .platform = platform.toApplePlatform(), + .minos = platform.toAppleVersion(), + .sdk = if (sdk_version) |ver| + MachO.semanticVersionToAppleVersion(ver) + else + platform.toAppleVersion(), .ntools = 1, }); try writer.writeAll(mem.asBytes(&macho.build_tool_version{ - .tool = @as(macho.TOOL, @enumFromInt(0x6)), + .tool = .ZIG, .version = 0x0, })); } diff --git a/src/link/MachO/uuid.zig b/src/link/MachO/uuid.zig index ca66e129d2..565ae80b22 100644 --- a/src/link/MachO/uuid.zig +++ b/src/link/MachO/uuid.zig @@ -4,13 +4,7 @@ /// and we will use it too as it seems accepted by Apple OSes. /// TODO LLD also hashes the output filename to disambiguate between same builds with different /// output files. Should we also do that? -pub fn calcUuid( - allocator: Allocator, - thread_pool: *ThreadPool, - file: fs.File, - file_size: u64, - out: *[Md5.digest_length]u8, -) !void { +pub fn calcUuid(comp: *const Compilation, file: fs.File, file_size: u64, out: *[Md5.digest_length]u8) !void { const tracy = trace(@src()); defer tracy.end(); @@ -18,17 +12,17 @@ pub fn calcUuid( const num_chunks: usize = std.math.cast(usize, @divTrunc(file_size, chunk_size)) orelse return error.Overflow; const actual_num_chunks = if (@rem(file_size, chunk_size) > 0) num_chunks + 1 else num_chunks; - const hashes = try allocator.alloc([Md5.digest_length]u8, actual_num_chunks); - defer allocator.free(hashes); + const hashes = try comp.gpa.alloc([Md5.digest_length]u8, actual_num_chunks); + defer comp.gpa.free(hashes); - var hasher = Hasher(Md5){ .allocator = allocator, .thread_pool = thread_pool }; + var hasher = Hasher(Md5){ .allocator = comp.gpa, .thread_pool = comp.thread_pool }; try hasher.hash(file, hashes, .{ .chunk_size = chunk_size, .max_file_size = file_size, }); - const final_buffer = try allocator.alloc(u8, actual_num_chunks * Md5.digest_length); - defer allocator.free(final_buffer); + const final_buffer = try comp.gpa.alloc(u8, actual_num_chunks * Md5.digest_length); + defer comp.gpa.free(final_buffer); for (hashes, 0..) |hash, i| { @memcpy(final_buffer[i * Md5.digest_length ..][0..Md5.digest_length], &hash); @@ -49,7 +43,7 @@ const mem = std.mem; const std = @import("std"); const trace = @import("../../tracy.zig").trace; -const Allocator = mem.Allocator; +const Compilation = @import("../../Compilation.zig"); const Md5 = std.crypto.hash.Md5; const Hasher = @import("hasher.zig").ParallelHasher; const ThreadPool = std.Thread.Pool; From c2a0a882842d8cc9d0ad29781fe1e13c1a5880cd Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Fri, 12 Jan 2024 10:24:02 +0100 Subject: [PATCH 024/133] macho: report duplicate symbols --- src/link/MachO.zig | 106 ++++++++++++++++++-------------------- src/link/MachO/Object.zig | 18 +++++++ 2 files changed, 69 insertions(+), 55 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index af5d165665..2911832132 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -509,6 +509,14 @@ pub fn flushModule(self: *MachO, arena: Allocator, prog_node: *std.Progress.Node try dead_strip.gcAtoms(self); } + self.checkDuplicates() catch |err| switch (err) { + error.HasDuplicates => return error.FlushFailure, + else => |e| { + try self.reportUnexpectedError("unexpected error while checking for duplicate symbol definitions", .{}); + return e; + }, + }; + self.markImportsAndExports(); self.deadStripDylibs(); @@ -1414,6 +1422,24 @@ fn claimUnresolved(self: *MachO) error{OutOfMemory}!void { } } +fn checkDuplicates(self: *MachO) !void { + const gpa = self.base.comp.gpa; + + var dupes = std.AutoArrayHashMap(Symbol.Index, std.ArrayListUnmanaged(File.Index)).init(gpa); + defer { + for (dupes.values()) |*list| { + list.deinit(gpa); + } + dupes.deinit(); + } + + for (self.objects.items) |index| { + try self.getFile(index).?.object.checkDuplicates(&dupes, self); + } + + try self.reportDuplicates(dupes); +} + fn markImportsAndExports(self: *MachO) void { for (self.objects.items) |index| { for (self.getFile(index).?.getSymbols()) |sym_index| { @@ -3468,68 +3494,38 @@ fn reportUnexpectedError(self: *MachO, comptime format: []const u8, args: anytyp try err.addNote(self, "please report this as a linker bug on https://github.com/ziglang/zig/issues/new/choose", .{}); } -// fn reportSymbolCollision( -// self: *MachO, -// first: SymbolWithLoc, -// other: SymbolWithLoc, -// ) error{OutOfMemory}!void { -// const comp = self.base.comp; -// const gpa = comp.gpa; -// try comp.link_errors.ensureUnusedCapacity(gpa, 1); +fn reportDuplicates(self: *MachO, dupes: anytype) error{ HasDuplicates, OutOfMemory }!void { + const tracy = trace(@src()); + defer tracy.end(); -// var notes = try std.ArrayList(File.ErrorMsg).initCapacity(gpa, 2); -// defer notes.deinit(); + const max_notes = 4; -// if (first.getFile()) |file| { -// const note = try std.fmt.allocPrint(gpa, "first definition in {s}", .{ -// self.objects.items[file].name, -// }); -// notes.appendAssumeCapacity(.{ .msg = note }); -// } -// if (other.getFile()) |file| { -// const note = try std.fmt.allocPrint(gpa, "next definition in {s}", .{ -// self.objects.items[file].name, -// }); -// notes.appendAssumeCapacity(.{ .msg = note }); -// } + var has_dupes = false; + var it = dupes.iterator(); + while (it.next()) |entry| { + const sym = self.getSymbol(entry.key_ptr.*); + const notes = entry.value_ptr.*; + const nnotes = @min(notes.items.len, max_notes) + @intFromBool(notes.items.len > max_notes); -// var err_msg = File.ErrorMsg{ .msg = try std.fmt.allocPrint(gpa, "symbol {s} defined multiple times", .{ -// self.getSymbolName(first), -// }) }; -// err_msg.notes = try notes.toOwnedSlice(); + var err = try self.addErrorWithNotes(nnotes); + try err.addMsg(self, "duplicate symbol definition: {s}", .{sym.getName(self)}); -// comp.link_errors.appendAssumeCapacity(err_msg); -// } + var inote: usize = 0; + while (inote < @min(notes.items.len, max_notes)) : (inote += 1) { + const file = self.getFile(notes.items[inote]).?; + try err.addNote(self, "defined by {}", .{file.fmtPath()}); + } -// fn reportUnhandledSymbolType(self: *MachO, sym_with_loc: SymbolWithLoc) error{OutOfMemory}!void { -// const comp = self.base.comp; -// const gpa = comp.gpa; -// try comp.link_errors.ensureUnusedCapacity(gpa, 1); + if (notes.items.len > max_notes) { + const remaining = notes.items.len - max_notes; + try err.addNote(self, "defined {d} more times", .{remaining}); + } -// const notes = try gpa.alloc(File.ErrorMsg, 1); -// errdefer gpa.free(notes); + has_dupes = true; + } -// const file = sym_with_loc.getFile().?; -// notes[0] = .{ .msg = try std.fmt.allocPrint(gpa, "defined in {s}", .{self.objects.items[file].name}) }; - -// const sym = self.getSymbol(sym_with_loc); -// const sym_type = if (sym.stab()) -// "stab" -// else if (sym.indr()) -// "indirect" -// else if (sym.abs()) -// "absolute" -// else -// unreachable; - -// comp.link_errors.appendAssumeCapacity(.{ -// .msg = try std.fmt.allocPrint(gpa, "unhandled symbol type: '{s}' has type {s}", .{ -// self.getSymbolName(sym_with_loc), -// sym_type, -// }), -// .notes = notes, -// }); -// } + if (has_dupes) return error.HasDuplicates; +} pub fn getDebugSymbols(self: *MachO) ?*DebugSymbols { if (self.d_sym) |*ds| { diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index a89e75d533..136b39d617 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -1070,6 +1070,24 @@ pub fn markLive(self: *Object, macho_file: *MachO) void { } } +pub fn checkDuplicates(self: *Object, dupes: anytype, macho_file: *MachO) error{OutOfMemory}!void { + for (self.symbols.items, 0..) |index, nlist_idx| { + const sym = macho_file.getSymbol(index); + if (sym.visibility != .global) continue; + const file = sym.getFile(macho_file) orelse continue; + if (file.getIndex() == self.index) continue; + + const nlist = self.symtab.items(.nlist)[nlist_idx]; + if (!nlist.undf() and !nlist.tentative() and !(nlist.weakDef() or nlist.pext())) { + const gop = try dupes.getOrPut(index); + if (!gop.found_existing) { + gop.value_ptr.* = .{}; + } + try gop.value_ptr.append(macho_file.base.comp.gpa, self.index); + } + } +} + pub fn scanRelocs(self: Object, macho_file: *MachO) !void { const tracy = trace(@src()); defer tracy.end(); From 10a5536a7c9f9545c292f8e863a7cecd89e9ee54 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Fri, 12 Jan 2024 10:46:56 +0100 Subject: [PATCH 025/133] macho: re-instate build-obj codepath --- src/link/MachO.zig | 20 ++----- src/link/MachO/Atom.zig | 4 +- src/link/MachO/load_commands.zig | 15 ++--- src/link/MachO/relocatable.zig | 98 ++++++++++++++++++++++---------- 4 files changed, 81 insertions(+), 56 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 2911832132..5c8e30400d 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -369,7 +369,7 @@ pub fn flushModule(self: *MachO, arena: Allocator, prog_node: *std.Progress.Node if (comp.verbose_link) try self.dumpArgv(comp); if (self.base.isStaticLib()) return self.flushStaticLib(comp, module_obj_path); - if (self.base.isObject()) return self.flushObject(comp, module_obj_path); + if (self.base.isObject()) return relocatable.flush(self, comp, module_obj_path); var positionals = std.ArrayList(Compilation.LinkObject).init(gpa); defer positionals.deinit(); @@ -483,7 +483,7 @@ pub fn flushModule(self: *MachO, arena: Allocator, prog_node: *std.Progress.Node self.getFile(index).?.dylib.umbrella = index; } - // try self.parseDependentDylibs(); + // TODO: try self.parseDependentDylibs(); for (self.dylibs.items) |index| { const dylib = self.getFile(index).?.dylib; @@ -783,16 +783,6 @@ fn flushStaticLib(self: *MachO, comp: *Compilation, module_obj_path: ?[]const u8 return error.FlushFailure; } -fn flushObject(self: *MachO, comp: *Compilation, module_obj_path: ?[]const u8) link.File.FlushError!void { - _ = comp; - _ = module_obj_path; - - var err = try self.addErrorWithNotes(0); - try err.addMsg(self, "TODO implement flushObject", .{}); - - return error.FlushFailure; -} - pub fn resolveLibSystem( self: *MachO, arena: Allocator, @@ -890,7 +880,7 @@ const ParseError = error{ UnknownFileType, } || std.os.SeekError || std.fs.File.OpenError || std.fs.File.ReadError || tapi.TapiError; -fn parsePositional(self: *MachO, path: []const u8, must_link: bool) ParseError!void { +pub fn parsePositional(self: *MachO, path: []const u8, must_link: bool) ParseError!void { const tracy = trace(@src()); defer tracy.end(); if (try Object.isObject(path)) { @@ -1083,6 +1073,7 @@ fn parseTbd(self: *MachO, lib: SystemLib, explicit: bool) ParseError!File.Index // return false; // } +// TODO: // fn parseDependentDylibs( // self: *MachO // ) !void { @@ -1212,7 +1203,7 @@ fn parseTbd(self: *MachO, lib: SystemLib, explicit: bool) ParseError!File.Index // } // } -fn addUndefinedGlobals(self: *MachO) !void { +pub fn addUndefinedGlobals(self: *MachO) !void { const gpa = self.base.comp.gpa; try self.undefined_symbols.ensureUnusedCapacity(gpa, self.base.comp.force_undefined_symbols.keys().len); @@ -4033,6 +4024,7 @@ const fat = @import("MachO/fat.zig"); const link = @import("../link.zig"); const llvm_backend = @import("../codegen/llvm.zig"); const load_commands = @import("MachO/load_commands.zig"); +const relocatable = @import("MachO/relocatable.zig"); const tapi = @import("tapi.zig"); const target_util = @import("../target.zig"); const thunks = @import("MachO/thunks.zig"); diff --git a/src/link/MachO/Atom.zig b/src/link/MachO/Atom.zig index 1e723d337b..7cd44b162e 100644 --- a/src/link/MachO/Atom.zig +++ b/src/link/MachO/Atom.zig @@ -682,7 +682,7 @@ const x86_64 = struct { }; pub fn calcNumRelocs(self: Atom, macho_file: *MachO) u32 { - switch (macho_file.options.cpu_arch.?) { + switch (macho_file.getTarget().cpu.arch) { .aarch64 => { var nreloc: u32 = 0; for (self.getRelocs(macho_file)) |rel| { @@ -705,7 +705,7 @@ pub fn writeRelocs(self: Atom, macho_file: *MachO, code: []u8, buffer: *std.Arra const tracy = trace(@src()); defer tracy.end(); - const cpu_arch = macho_file.options.cpu_arch.?; + const cpu_arch = macho_file.getTarget().cpu.arch; const relocs = self.getRelocs(macho_file); const sect = macho_file.sections.items(.header)[self.out_n_sect]; var stream = std.io.fixedBufferStream(code); diff --git a/src/link/MachO/load_commands.zig b/src/link/MachO/load_commands.zig index 3c3c53f637..bd6a41d39c 100644 --- a/src/link/MachO/load_commands.zig +++ b/src/link/MachO/load_commands.zig @@ -98,7 +98,6 @@ pub fn calcLoadCommandsSize(macho_file: *MachO, assume_max_path_len: bool) u32 { } pub fn calcLoadCommandsSizeObject(macho_file: *MachO) u32 { - const options = &macho_file.options; var sizeofcmds: u64 = 0; // LC_SEGMENT_64 @@ -116,14 +115,12 @@ pub fn calcLoadCommandsSizeObject(macho_file: *MachO) u32 { // LC_DYSYMTAB sizeofcmds += @sizeOf(macho.dysymtab_command); - if (options.platform) |platform| { - if (platform.isBuildVersionCompatible()) { - // LC_BUILD_VERSION - sizeofcmds += @sizeOf(macho.build_version_command) + @sizeOf(macho.build_tool_version); - } else { - // LC_VERSION_MIN_* - sizeofcmds += @sizeOf(macho.version_min_command); - } + if (macho_file.platform.isBuildVersionCompatible()) { + // LC_BUILD_VERSION + sizeofcmds += @sizeOf(macho.build_version_command) + @sizeOf(macho.build_tool_version); + } else { + // LC_VERSION_MIN_* + sizeofcmds += @sizeOf(macho.version_min_command); } return @as(u32, @intCast(sizeofcmds)); diff --git a/src/link/MachO/relocatable.zig b/src/link/MachO/relocatable.zig index 3d2d5b97b9..00513479b9 100644 --- a/src/link/MachO/relocatable.zig +++ b/src/link/MachO/relocatable.zig @@ -1,4 +1,37 @@ -pub fn flush(macho_file: *MachO) !void { +pub fn flush(macho_file: *MachO, comp: *Compilation, module_obj_path: ?[]const u8) link.File.FlushError!void { + const gpa = macho_file.base.comp.gpa; + + var positionals = std.ArrayList(Compilation.LinkObject).init(gpa); + defer positionals.deinit(); + try positionals.ensureUnusedCapacity(comp.objects.len); + positionals.appendSliceAssumeCapacity(comp.objects); + + for (comp.c_object_table.keys()) |key| { + try positionals.append(.{ .path = key.status.success.object_path }); + } + + if (module_obj_path) |path| try positionals.append(.{ .path = path }); + + for (positionals.items) |obj| { + macho_file.parsePositional(obj.path, obj.must_link) catch |err| switch (err) { + error.MalformedObject, + error.MalformedArchive, + error.InvalidCpuArch, + error.InvalidTarget, + => continue, // already reported + error.UnknownFileType => try macho_file.reportParseError(obj.path, "unknown file type for an object file", .{}), + else => |e| try macho_file.reportParseError( + obj.path, + "unexpected error: parsing input file failed with error {s}", + .{@errorName(e)}, + ), + }; + } + + if (comp.link_errors.items.len > 0) return error.FlushFailure; + + try macho_file.addUndefinedGlobals(); + try macho_file.resolveSymbols(); markExports(macho_file); claimUnresolved(macho_file); try initOutputSections(macho_file); @@ -9,7 +42,7 @@ pub fn flush(macho_file: *MachO) !void { { // For relocatable, we only ever need a single segment so create it now. const prot: macho.vm_prot_t = macho.PROT.READ | macho.PROT.WRITE | macho.PROT.EXEC; - try macho_file.segments.append(macho_file.base.allocator, .{ + try macho_file.segments.append(gpa, .{ .cmdsize = @sizeOf(macho.segment_command_64), .segname = MachO.makeStaticString(""), .maxprot = prot, @@ -128,17 +161,20 @@ fn initOutputSections(macho_file: *MachO) !void { } fn calcSectionSizes(macho_file: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + const slice = macho_file.sections.slice(); for (slice.items(.header), slice.items(.atoms)) |*header, atoms| { if (atoms.items.len == 0) continue; for (atoms.items) |atom_index| { const atom = macho_file.getAtom(atom_index).?; - const atom_alignment = try math.powi(u32, 2, atom.alignment); + const atom_alignment = atom.alignment.toByteUnits(1); const offset = mem.alignForward(u64, header.size, atom_alignment); const padding = offset - header.size; atom.value = offset; header.size += padding + atom.size; - header.@"align" = @max(header.@"align", atom.alignment); + header.@"align" = @max(header.@"align", atom.alignment.toLog2Units()); header.nreloc += atom.calcNumRelocs(macho_file); } } @@ -218,8 +254,8 @@ fn writeAtoms(macho_file: *MachO) !void { const tracy = trace(@src()); defer tracy.end(); - const gpa = macho_file.base.allocator; - const cpu_arch = macho_file.options.cpu_arch.?; + const gpa = macho_file.base.comp.gpa; + const cpu_arch = macho_file.getTarget().cpu.arch; const slice = macho_file.sections.slice(); for (slice.items(.header), slice.items(.atoms)) |header, atoms| { @@ -247,14 +283,14 @@ fn writeAtoms(macho_file: *MachO) !void { mem.sort(macho.relocation_info, relocs.items, {}, sortReloc); // TODO scattered writes? - try macho_file.base.file.pwriteAll(code, header.offset); - try macho_file.base.file.pwriteAll(mem.sliceAsBytes(relocs.items), header.reloff); + try macho_file.base.file.?.pwriteAll(code, header.offset); + try macho_file.base.file.?.pwriteAll(mem.sliceAsBytes(relocs.items), header.reloff); } } fn writeCompactUnwind(macho_file: *MachO) !void { const sect_index = macho_file.unwind_info_sect_index orelse return; - const gpa = macho_file.base.allocator; + const gpa = macho_file.base.comp.gpa; const header = macho_file.sections.items(.header)[sect_index]; const nrecs = @divExact(header.size, @sizeOf(macho.compact_unwind_entry)); @@ -301,7 +337,7 @@ fn writeCompactUnwind(macho_file: *MachO) !void { const atom = rec.getAtom(macho_file); const addr = rec.getAtomAddress(macho_file); out.rangeStart = addr; - var reloc = addReloc(offset, macho_file.options.cpu_arch.?); + var reloc = addReloc(offset, macho_file.getTarget().cpu.arch); reloc.r_symbolnum = atom.out_n_sect + 1; relocs.appendAssumeCapacity(reloc); } @@ -309,7 +345,7 @@ fn writeCompactUnwind(macho_file: *MachO) !void { // Personality function if (rec.getPersonality(macho_file)) |sym| { const r_symbolnum = math.cast(u24, sym.getOutputSymtabIndex(macho_file).?) orelse return error.Overflow; - var reloc = addReloc(offset + 16, macho_file.options.cpu_arch.?); + var reloc = addReloc(offset + 16, macho_file.getTarget().cpu.arch); reloc.r_symbolnum = r_symbolnum; reloc.r_extern = 1; relocs.appendAssumeCapacity(reloc); @@ -319,7 +355,7 @@ fn writeCompactUnwind(macho_file: *MachO) !void { if (rec.getLsdaAtom(macho_file)) |atom| { const addr = rec.getLsdaAddress(macho_file); out.lsda = addr; - var reloc = addReloc(offset + 24, macho_file.options.cpu_arch.?); + var reloc = addReloc(offset + 24, macho_file.getTarget().cpu.arch); reloc.r_symbolnum = atom.out_n_sect + 1; relocs.appendAssumeCapacity(reloc); } @@ -335,13 +371,13 @@ fn writeCompactUnwind(macho_file: *MachO) !void { mem.sort(macho.relocation_info, relocs.items, {}, sortReloc); // TODO scattered writes? - try macho_file.base.file.pwriteAll(mem.sliceAsBytes(entries.items), header.offset); - try macho_file.base.file.pwriteAll(mem.sliceAsBytes(relocs.items), header.reloff); + try macho_file.base.file.?.pwriteAll(mem.sliceAsBytes(entries.items), header.offset); + try macho_file.base.file.?.pwriteAll(mem.sliceAsBytes(relocs.items), header.reloff); } fn writeEhFrame(macho_file: *MachO) !void { const sect_index = macho_file.eh_frame_sect_index orelse return; - const gpa = macho_file.base.allocator; + const gpa = macho_file.base.comp.gpa; const header = macho_file.sections.items(.header)[sect_index]; const code = try gpa.alloc(u8, header.size); @@ -356,12 +392,12 @@ fn writeEhFrame(macho_file: *MachO) !void { mem.sort(macho.relocation_info, relocs.items, {}, sortReloc); // TODO scattered writes? - try macho_file.base.file.pwriteAll(code, header.offset); - try macho_file.base.file.pwriteAll(mem.sliceAsBytes(relocs.items), header.reloff); + try macho_file.base.file.?.pwriteAll(code, header.offset); + try macho_file.base.file.?.pwriteAll(mem.sliceAsBytes(relocs.items), header.reloff); } fn writeLoadCommands(macho_file: *MachO) !struct { usize, usize } { - const gpa = macho_file.base.allocator; + const gpa = macho_file.base.comp.gpa; const needed_size = load_commands.calcLoadCommandsSizeObject(macho_file); const buffer = try gpa.alloc(u8, needed_size); defer gpa.free(buffer); @@ -390,19 +426,17 @@ fn writeLoadCommands(macho_file: *MachO) !struct { usize, usize } { try writer.writeStruct(macho_file.dysymtab_cmd); ncmds += 1; - if (macho_file.options.platform) |platform| { - if (platform.isBuildVersionCompatible()) { - try load_commands.writeBuildVersionLC(platform, macho_file.options.sdk_version, writer); - ncmds += 1; - } else { - try load_commands.writeVersionMinLC(platform, macho_file.options.sdk_version, writer); - ncmds += 1; - } + if (macho_file.platform.isBuildVersionCompatible()) { + try load_commands.writeBuildVersionLC(macho_file.platform, macho_file.sdk_version, writer); + ncmds += 1; + } else { + try load_commands.writeVersionMinLC(macho_file.platform, macho_file.sdk_version, writer); + ncmds += 1; } assert(cwriter.bytes_written == needed_size); - try macho_file.base.file.pwriteAll(buffer, @sizeOf(macho.mach_header_64)); + try macho_file.base.file.?.pwriteAll(buffer, @sizeOf(macho.mach_header_64)); return .{ ncmds, buffer.len }; } @@ -419,7 +453,7 @@ fn writeHeader(macho_file: *MachO, ncmds: usize, sizeofcmds: usize) !void { header.flags |= macho.MH_SUBSECTIONS_VIA_SYMBOLS; } - switch (macho_file.options.cpu_arch.?) { + switch (macho_file.getTarget().cpu.arch) { .aarch64 => { header.cputype = macho.CPU_TYPE_ARM64; header.cpusubtype = macho.CPU_SUBTYPE_ARM_ALL; @@ -434,19 +468,21 @@ fn writeHeader(macho_file: *MachO, ncmds: usize, sizeofcmds: usize) !void { header.ncmds = @intCast(ncmds); header.sizeofcmds = @intCast(sizeofcmds); - try macho_file.base.file.pwriteAll(mem.asBytes(&header), 0); + try macho_file.base.file.?.pwriteAll(mem.asBytes(&header), 0); } const assert = std.debug.assert; const eh_frame = @import("eh_frame.zig"); +const link = @import("../../link.zig"); const load_commands = @import("load_commands.zig"); const macho = std.macho; const math = std.math; const mem = std.mem; -const state_log = std.log.scoped(.state); +const state_log = std.log.scoped(.link_state); const std = @import("std"); -const trace = @import("../tracy.zig").trace; +const trace = @import("../../tracy.zig").trace; const Atom = @import("Atom.zig"); +const Compilation = @import("../../Compilation.zig"); const MachO = @import("../MachO.zig"); const Symbol = @import("Symbol.zig"); From ef9aea75d0b1a0727cbf52be9344cd4c04954f9a Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Fri, 12 Jan 2024 11:40:07 +0100 Subject: [PATCH 026/133] macho: fix dead stripping logic to exclude debug sections --- src/link/MachO/Atom.zig | 1 + src/link/MachO/dead_strip.zig | 7 ++++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/src/link/MachO/Atom.zig b/src/link/MachO/Atom.zig index 7cd44b162e..98f417dd51 100644 --- a/src/link/MachO/Atom.zig +++ b/src/link/MachO/Atom.zig @@ -173,6 +173,7 @@ pub fn initOutputSection(sect: macho.section_64, macho_file: *MachO) !u8 { pub fn scanRelocs(self: Atom, macho_file: *MachO) !void { const tracy = trace(@src()); defer tracy.end(); + assert(self.flags.alive); const object = self.getFile(macho_file).object; const relocs = self.getRelocs(macho_file); diff --git a/src/link/MachO/dead_strip.zig b/src/link/MachO/dead_strip.zig index 7356e65a60..e91682ca58 100644 --- a/src/link/MachO/dead_strip.zig +++ b/src/link/MachO/dead_strip.zig @@ -91,7 +91,12 @@ fn mark(roots: []*Atom, objects: []const File.Index, macho_file: *MachO) void { for (macho_file.getFile(index).?.getAtoms()) |atom_index| { const atom = macho_file.getAtom(atom_index).?; const isec = atom.getInputSection(macho_file); - if (isec.isDontDeadStripIfReferencesLive() and !atom.flags.alive and refersLive(atom, macho_file)) { + if (isec.isDontDeadStripIfReferencesLive() and + !(mem.eql(u8, isec.sectName(), "__eh_frame") or + mem.eql(u8, isec.sectName(), "__compact_unwind") or + isec.attrs() & macho.S_ATTR_DEBUG != 0) and + !atom.flags.alive and refersLive(atom, macho_file)) + { markLive(atom, macho_file); loop = true; } From ee7a027059d87c10533744920793bca0bdec9687 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Fri, 12 Jan 2024 20:54:13 +0100 Subject: [PATCH 027/133] macho: parse dependent dylibs --- src/Compilation.zig | 1 + src/link.zig | 1 + src/link/MachO.zig | 348 ++++++++++++++++++++++++++------------------ 3 files changed, 205 insertions(+), 145 deletions(-) diff --git a/src/Compilation.zig b/src/Compilation.zig index 58f56517c3..2e1a5a6e4f 100644 --- a/src/Compilation.zig +++ b/src/Compilation.zig @@ -1542,6 +1542,7 @@ pub fn create(gpa: Allocator, arena: Allocator, options: CreateOptions) !*Compil .darwin_sdk_layout = libc_dirs.darwin_sdk_layout, .frameworks = options.frameworks, .lib_dirs = options.lib_dirs, + .framework_dirs = options.framework_dirs, .rpath_list = options.rpath_list, .symbol_wrap_set = options.symbol_wrap_set, .allow_shlib_undefined = options.linker_allow_shlib_undefined, diff --git a/src/link.zig b/src/link.zig index 8bd481b399..528ba10d1b 100644 --- a/src/link.zig +++ b/src/link.zig @@ -133,6 +133,7 @@ pub const File = struct { // TODO: remove this. libraries are resolved by the frontend. lib_dirs: []const []const u8, + framework_dirs: []const []const u8, rpath_list: []const []const u8, /// (Zig compiler development) Enable dumping of linker's state as JSON. diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 5c8e30400d..759e9b94de 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -98,6 +98,10 @@ headerpad_max_install_names: bool, dead_strip_dylibs: bool, /// Treatment of undefined symbols undefined_treatment: UndefinedTreatment, +/// Resolved list of library search directories +lib_dirs: []const []const u8, +/// Resolved list of framework search directories +framework_dirs: []const []const u8, /// List of input frameworks frameworks: []const Framework, /// Install name for the dylib. @@ -112,6 +116,8 @@ platform: Platform, sdk_version: ?std.SemanticVersion, /// Rpath table rpath_table: std.StringArrayHashMapUnmanaged(void) = .{}, +/// When set to true, the linker will hoist all dylibs including system dependent dylibs. +no_implicit_dylibs: bool = false, /// Hot-code swapping state. hot_state: if (is_hot_update_compatible) HotUpdateState else struct {} = .{}, @@ -190,6 +196,8 @@ pub fn createEmpty( .platform = Platform.fromTarget(target), .sdk_version = if (options.darwin_sdk_layout) |layout| inferSdkVersion(comp, layout) else null, .undefined_treatment = if (allow_shlib_undefined) .dynamic_lookup else .@"error", + .lib_dirs = options.lib_dirs, + .framework_dirs = options.framework_dirs, }; if (use_llvm and comp.config.have_zcu) { self.llvm_object = try LlvmObject.create(arena, comp); @@ -483,7 +491,18 @@ pub fn flushModule(self: *MachO, arena: Allocator, prog_node: *std.Progress.Node self.getFile(index).?.dylib.umbrella = index; } - // TODO: try self.parseDependentDylibs(); + if (self.dylibs.items.len > 0) { + self.parseDependentDylibs() catch |err| { + switch (err) { + error.MissingLibraryDependencies => {}, + else => |e| try self.reportUnexpectedError( + "unexpected error while parsing dependent libraries: {s}", + .{@errorName(e)}, + ), + } + return error.FlushFailure; + }; + } for (self.dylibs.items) |index| { const dylib = self.getFile(index).?.dylib; @@ -1056,152 +1075,198 @@ fn parseTbd(self: *MachO, lib: SystemLib, explicit: bool) ParseError!File.Index return index; } -// /// According to ld64's manual, public (i.e., system) dylibs/frameworks are hoisted into the final -// /// image unless overriden by -no_implicit_dylibs. -// fn isHoisted(self: *MachO, install_name: []const u8) bool { -// _ = self; -// // TODO: if (self.options.no_implicit_dylibs) return true; -// if (std.fs.path.dirname(install_name)) |dirname| { -// if (mem.startsWith(u8, dirname, "/usr/lib")) return true; -// if (eatPrefix(dirname, "/System/Library/Frameworks/")) |path| { -// const basename = std.fs.path.basename(install_name); -// if (mem.indexOfScalar(u8, path, '.')) |index| { -// if (mem.eql(u8, basename, path[0..index])) return true; -// } -// } -// } -// return false; -// } +/// According to ld64's manual, public (i.e., system) dylibs/frameworks are hoisted into the final +/// image unless overriden by -no_implicit_dylibs. +fn isHoisted(self: *MachO, install_name: []const u8) bool { + if (self.no_implicit_dylibs) return true; + if (std.fs.path.dirname(install_name)) |dirname| { + if (mem.startsWith(u8, dirname, "/usr/lib")) return true; + if (eatPrefix(dirname, "/System/Library/Frameworks/")) |path| { + const basename = std.fs.path.basename(install_name); + if (mem.indexOfScalar(u8, path, '.')) |index| { + if (mem.eql(u8, basename, path[0..index])) return true; + } + } + } + return false; +} -// TODO: -// fn parseDependentDylibs( -// self: *MachO -// ) !void { -// const tracy = trace(@src()); -// defer tracy.end(); +fn accessPath(path: []const u8) !bool { + std.fs.cwd().access(path, .{}) catch |err| switch (err) { + error.FileNotFound => return false, + else => |e| return e, + }; + return true; +} -// const gpa = self.base.comp.gpa; -// const lib_dirs = self.base.comp.lib_dirs; -// const framework_dirs = self.base.comp.framework_dirs; +fn resolveLib(arena: Allocator, search_dirs: []const []const u8, name: []const u8) !?[]const u8 { + const path = try std.fmt.allocPrint(arena, "lib{s}", .{name}); + for (search_dirs) |dir| { + for (&[_][]const u8{ ".tbd", ".dylib" }) |ext| { + const with_ext = try std.fmt.allocPrint(arena, "{s}{s}", .{ path, ext }); + const full_path = try std.fs.path.join(arena, &[_][]const u8{ dir, with_ext }); + if (try accessPath(full_path)) return full_path; + } + } + return null; +} -// if (self.dylibs.items.len == 0) return; +fn resolveFramework(arena: Allocator, search_dirs: []const []const u8, name: []const u8) !?[]const u8 { + const prefix = try std.fmt.allocPrint(arena, "{s}.framework", .{name}); + const path = try std.fs.path.join(arena, &[_][]const u8{ prefix, name }); + for (search_dirs) |dir| { + for (&[_][]const u8{ ".tbd", ".dylib" }) |ext| { + const with_ext = try std.fmt.allocPrint(arena, "{s}{s}", .{ path, ext }); + const full_path = try std.fs.path.join(arena, &[_][]const u8{ dir, with_ext }); + if (try accessPath(full_path)) return full_path; + } + } + return null; +} -// var arena = std.heap.ArenaAllocator.init(gpa); -// defer arena.deinit(); +fn parseDependentDylibs(self: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); -// // TODO handle duplicate dylibs - it is not uncommon to have the same dylib loaded multiple times -// // in which case we should track that and return File.Index immediately instead re-parsing paths. + const gpa = self.base.comp.gpa; + const lib_dirs = self.lib_dirs; + const framework_dirs = self.framework_dirs; -// var index: usize = 0; -// while (index < self.dylibs.items.len) : (index += 1) { -// const dylib_index = self.dylibs.items[index]; + var arena = std.heap.ArenaAllocator.init(gpa); + defer arena.deinit(); -// var dependents = std.ArrayList(File.Index).init(gpa); -// defer dependents.deinit(); -// try dependents.ensureTotalCapacityPrecise(self.getFile(dylib_index).?.dylib.dependents.items.len); + // TODO handle duplicate dylibs - it is not uncommon to have the same dylib loaded multiple times + // in which case we should track that and return File.Index immediately instead re-parsing paths. -// const is_weak = self.getFile(dylib_index).?.dylib.weak; -// for (self.getFile(dylib_index).?.dylib.dependents.items) |id| { -// // We will search for the dependent dylibs in the following order: -// // 1. Basename is in search lib directories or framework directories -// // 2. If name is an absolute path, search as-is optionally prepending a syslibroot -// // if specified. -// // 3. If name is a relative path, substitute @rpath, @loader_path, @executable_path with -// // dependees list of rpaths, and search there. -// // 4. Finally, just search the provided relative path directly in CWD. -// const full_path = full_path: { -// fail: { -// const stem = std.fs.path.stem(id.name); -// const framework_name = try std.fmt.allocPrint(gpa, "{s}.framework" ++ std.fs.path.sep_str ++ "{s}", .{ -// stem, -// stem, -// }); -// defer gpa.free(framework_name); + var has_errors = false; + var index: usize = 0; + while (index < self.dylibs.items.len) : (index += 1) { + const dylib_index = self.dylibs.items[index]; -// if (mem.endsWith(u8, id.name, framework_name)) { -// // Framework -// const full_path = (try self.resolveFramework(arena, framework_dirs, stem)) orelse break :fail; -// break :full_path full_path; -// } + var dependents = std.ArrayList(File.Index).init(gpa); + defer dependents.deinit(); + try dependents.ensureTotalCapacityPrecise(self.getFile(dylib_index).?.dylib.dependents.items.len); -// // Library -// const lib_name = eatPrefix(stem, "lib") orelse stem; -// const full_path = (try self.resolveLib(arena, lib_dirs, lib_name)) orelse break :fail; -// break :full_path full_path; -// } + const is_weak = self.getFile(dylib_index).?.dylib.weak; + for (self.getFile(dylib_index).?.dylib.dependents.items) |id| { + // We will search for the dependent dylibs in the following order: + // 1. Basename is in search lib directories or framework directories + // 2. If name is an absolute path, search as-is optionally prepending a syslibroot + // if specified. + // 3. If name is a relative path, substitute @rpath, @loader_path, @executable_path with + // dependees list of rpaths, and search there. + // 4. Finally, just search the provided relative path directly in CWD. + const full_path = full_path: { + fail: { + const stem = std.fs.path.stem(id.name); + const framework_name = try std.fmt.allocPrint(gpa, "{s}.framework" ++ std.fs.path.sep_str ++ "{s}", .{ + stem, + stem, + }); + defer gpa.free(framework_name); -// if (std.fs.path.isAbsolute(id.name)) { -// const path = if (self.options.syslibroot) |root| -// try std.fs.path.join(arena, &.{ root, id.name }) -// else -// id.name; -// for (&[_][]const u8{ "", ".tbd", ".dylib" }) |ext| { -// const full_path = try std.fmt.allocPrint(arena, "{s}{s}", .{ path, ext }); -// if (try accessLibPath(full_path)) break :full_path full_path; -// } -// } + if (mem.endsWith(u8, id.name, framework_name)) { + // Framework + const full_path = (try resolveFramework(arena.allocator(), framework_dirs, stem)) orelse break :fail; + break :full_path full_path; + } -// if (eatPrefix(id.name, "@rpath/")) |path| { -// const dylib = self.getFile(dylib_index).?.dylib; -// for (self.getFile(dylib.umbrella).?.dylib.rpaths.keys()) |rpath| { -// const prefix = eatPrefix(rpath, "@loader_path/") orelse rpath; -// const rel_path = try std.fs.path.join(arena, &.{ prefix, path }); -// var buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined; -// const full_path = std.fs.realpath(rel_path, &buffer) catch continue; -// break :full_path full_path; -// } -// } else if (eatPrefix(id.name, "@loader_path/")) |_| { -// return self.base.fatal("{s}: TODO handle install_name '{s}'", .{ -// self.getFile(dylib_index).?.dylib.path, id.name, -// }); -// } else if (eatPrefix(id.name, "@executable_path/")) |_| { -// return self.base.fatal("{s}: TODO handle install_name '{s}'", .{ -// self.getFile(dylib_index).?.dylib.path, id.name, -// }); -// } + // Library + const lib_name = eatPrefix(stem, "lib") orelse stem; + const full_path = (try resolveLib(arena.allocator(), lib_dirs, lib_name)) orelse break :fail; + break :full_path full_path; + } -// var buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined; -// const full_path = std.fs.realpath(id.name, &buffer) catch { -// dependents.appendAssumeCapacity(0); -// continue; -// }; -// break :full_path full_path; -// }; -// const link_obj = LinkObject{ -// .path = full_path, -// .tag = .obj, -// .weak = is_weak, -// }; -// const file_index = file_index: { -// if (try self.parseDylib(arena, link_obj, false)) |file| break :file_index file; -// if (try self.parseTbd(link_obj, false)) |file| break :file_index file; -// break :file_index @as(File.Index, 0); -// }; -// dependents.appendAssumeCapacity(file_index); -// } + if (std.fs.path.isAbsolute(id.name)) { + const path = if (self.base.comp.sysroot) |root| + try std.fs.path.join(arena.allocator(), &.{ root, id.name }) + else + id.name; + for (&[_][]const u8{ "", ".tbd", ".dylib" }) |ext| { + const full_path = try std.fmt.allocPrint(arena.allocator(), "{s}{s}", .{ path, ext }); + if (try accessPath(full_path)) break :full_path full_path; + } + } -// const dylib = self.getFile(dylib_index).?.dylib; -// for (dylib.dependents.items, dependents.items) |id, file_index| { -// if (self.getFile(file_index)) |file| { -// const dep_dylib = file.dylib; -// dep_dylib.hoisted = self.isHoisted(id.name); -// if (self.getFile(dep_dylib.umbrella) == null) { -// dep_dylib.umbrella = dylib.umbrella; -// } -// if (!dep_dylib.hoisted) { -// const umbrella = dep_dylib.getUmbrella(self); -// for (dep_dylib.exports.items(.name), dep_dylib.exports.items(.flags)) |off, flags| { -// try umbrella.addExport(gpa, dep_dylib.getString(off), flags); -// } -// try umbrella.rpaths.ensureUnusedCapacity(gpa, dep_dylib.rpaths.keys().len); -// for (dep_dylib.rpaths.keys()) |rpath| { -// umbrella.rpaths.putAssumeCapacity(rpath, {}); -// } -// } -// } else self.base.fatal("{s}: unable to resolve dependency {s}", .{ dylib.getUmbrella(self).path, id.name }); -// } -// } -// } + if (eatPrefix(id.name, "@rpath/")) |path| { + const dylib = self.getFile(dylib_index).?.dylib; + for (self.getFile(dylib.umbrella).?.dylib.rpaths.keys()) |rpath| { + const prefix = eatPrefix(rpath, "@loader_path/") orelse rpath; + const rel_path = try std.fs.path.join(arena.allocator(), &.{ prefix, path }); + var buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined; + const full_path = std.fs.realpath(rel_path, &buffer) catch continue; + break :full_path full_path; + } + } else if (eatPrefix(id.name, "@loader_path/")) |_| { + try self.reportParseError2(dylib_index, "TODO handle install_name '{s}'", .{id.name}); + return error.Unhandled; + } else if (eatPrefix(id.name, "@executable_path/")) |_| { + try self.reportParseError2(dylib_index, "TODO handle install_name '{s}'", .{id.name}); + return error.Unhandled; + } + + var buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined; + const full_path = std.fs.realpath(id.name, &buffer) catch { + dependents.appendAssumeCapacity(0); + continue; + }; + break :full_path full_path; + }; + const lib = SystemLib{ + .path = full_path, + .weak = is_weak, + }; + const file_index = file_index: { + if (try fat.isFatLibrary(lib.path)) { + const fat_arch = try self.parseFatLibrary(lib.path); + if (try Dylib.isDylib(lib.path, fat_arch)) { + break :file_index try self.parseDylib(lib, false, fat_arch); + } else break :file_index @as(File.Index, 0); + } else if (try Dylib.isDylib(lib.path, null)) { + break :file_index try self.parseDylib(lib, false, null); + } else { + const file_index = self.parseTbd(lib, false) catch |err| switch (err) { + error.MalformedTbd => @as(File.Index, 0), + else => |e| return e, + }; + break :file_index file_index; + } + }; + dependents.appendAssumeCapacity(file_index); + } + + const dylib = self.getFile(dylib_index).?.dylib; + for (dylib.dependents.items, dependents.items) |id, file_index| { + if (self.getFile(file_index)) |file| { + const dep_dylib = file.dylib; + dep_dylib.hoisted = self.isHoisted(id.name); + if (self.getFile(dep_dylib.umbrella) == null) { + dep_dylib.umbrella = dylib.umbrella; + } + if (!dep_dylib.hoisted) { + const umbrella = dep_dylib.getUmbrella(self); + for (dep_dylib.exports.items(.name), dep_dylib.exports.items(.flags)) |off, flags| { + try umbrella.addExport(gpa, dep_dylib.getString(off), flags); + } + try umbrella.rpaths.ensureUnusedCapacity(gpa, dep_dylib.rpaths.keys().len); + for (dep_dylib.rpaths.keys()) |rpath| { + umbrella.rpaths.putAssumeCapacity(rpath, {}); + } + } + } else { + try self.reportDependencyError( + dylib.getUmbrella(self).index, + id.name, + "unable to resolve dependency", + .{}, + ); + has_errors = true; + } + } + } + + if (has_errors) return error.MissingLibraryDependencies; +} pub fn addUndefinedGlobals(self: *MachO) !void { const gpa = self.base.comp.gpa; @@ -3459,24 +3524,17 @@ fn reportMissingLibraryError( fn reportDependencyError( self: *MachO, - parent: []const u8, + parent: File.Index, path: ?[]const u8, comptime format: []const u8, args: anytype, ) error{OutOfMemory}!void { - const comp = self.base.comp; - const gpa = comp.gpa; - try comp.link_errors.ensureUnusedCapacity(gpa, 1); - var notes = try std.ArrayList(link.File.ErrorMsg).initCapacity(gpa, 2); - defer notes.deinit(); + var err = try self.addErrorWithNotes(2); + try err.addMsg(self, format, args); if (path) |p| { - notes.appendAssumeCapacity(.{ .msg = try std.fmt.allocPrint(gpa, "while parsing {s}", .{p}) }); + try err.addNote(self, "while parsing {s}", .{p}); } - notes.appendAssumeCapacity(.{ .msg = try std.fmt.allocPrint(gpa, "a dependency of {s}", .{parent}) }); - comp.link_errors.appendAssumeCapacity(.{ - .msg = try std.fmt.allocPrint(gpa, format, args), - .notes = try notes.toOwnedSlice(), - }); + try err.addNote(self, "a dependency of {}", .{self.getFile(parent).?.fmtPath()}); } fn reportUnexpectedError(self: *MachO, comptime format: []const u8, args: anytype) error{OutOfMemory}!void { From ee68f35bfe742220f3fdebe97548425bb5440da1 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Fri, 12 Jan 2024 21:17:53 +0100 Subject: [PATCH 028/133] macho: fix section boundary symbols test --- test/link/macho.zig | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/test/link/macho.zig b/test/link/macho.zig index 8a5016f9b7..fc40ebec33 100644 --- a/test/link/macho.zig +++ b/test/link/macho.zig @@ -4,15 +4,15 @@ pub fn testAll(b: *std.Build) *Step { const macho_step = b.step("test-macho", "Run MachO tests"); - macho_step.dependOn(testResolvingBoundarySymbols(b, .{ + macho_step.dependOn(testSectionBoundarySymbols(b, .{ .target = b.resolveTargetQuery(.{ .os_tag = .macos }), })); return macho_step; } -fn testResolvingBoundarySymbols(b: *std.Build, opts: Options) *Step { - const test_step = addTestStep(b, "macho-resolving-boundary-symbols", opts); +fn testSectionBoundarySymbols(b: *std.Build, opts: Options) *Step { + const test_step = addTestStep(b, "macho-section-boundary-symbols", opts); const obj1 = addObject(b, opts, .{ .name = "obj1", @@ -25,10 +25,10 @@ fn testResolvingBoundarySymbols(b: *std.Build, opts: Options) *Step { .name = "main", .zig_source_bytes = \\const std = @import("std"); - \\extern fn interop() [*:0]const u8; + \\extern fn interop() ?[*:0]const u8; \\pub fn main() !void { \\ std.debug.print("All your {s} are belong to us.\n", .{ - \\ std.mem.span(interop()), + \\ if (interop()) |ptr| std.mem.span(ptr) else "(null)", \\ }); \\} , @@ -57,7 +57,7 @@ fn testResolvingBoundarySymbols(b: *std.Build, opts: Options) *Step { const check = exe.checkObject(); check.checkInSymtab(); - check.checkNotPresent("section$start$__DATA_CONST$__message_ptr"); + check.checkNotPresent("external section$start$__DATA_CONST$__message_ptr"); test_step.dependOn(&check.step); } @@ -65,7 +65,7 @@ fn testResolvingBoundarySymbols(b: *std.Build, opts: Options) *Step { const obj3 = addObject(b, opts, .{ .name = "obj3", .cpp_source_bytes = - \\extern const char* message_pointer __asm("section$start$__DATA$__message_ptr"); + \\extern const char* message_pointer __asm("section$start$__DATA_CONST$__not_present"); \\extern "C" const char* interop() { \\ return message_pointer; \\} @@ -77,10 +77,15 @@ fn testResolvingBoundarySymbols(b: *std.Build, opts: Options) *Step { exe.addObject(obj3); exe.addObject(main_o); - expectLinkErrors(exe, test_step, .{ .exact = &.{ - "section not found: __DATA,__message_ptr", - "note: while resolving section$start$__DATA$__message_ptr", - } }); + const run = b.addRunArtifact(exe); + run.skip_foreign_checks = true; + run.expectStdErrEqual("All your (null) are belong to us.\n"); + test_step.dependOn(&run.step); + + const check = exe.checkObject(); + check.checkInSymtab(); + check.checkNotPresent("external section$start$__DATA_CONST$__not_present"); + test_step.dependOn(&check.step); } return test_step; From 0a60e4448d27639a4dadccb48c00d22fab7bb790 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Fri, 12 Jan 2024 22:50:29 +0100 Subject: [PATCH 029/133] macho: preserve section name for code sections --- src/link/MachO/Atom.zig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/link/MachO/Atom.zig b/src/link/MachO/Atom.zig index 98f417dd51..ead0e96a50 100644 --- a/src/link/MachO/Atom.zig +++ b/src/link/MachO/Atom.zig @@ -104,7 +104,7 @@ pub fn initOutputSection(sect: macho.section_64, macho_file: *MachO) !u8 { const segname, const sectname, const flags = blk: { if (sect.isCode()) break :blk .{ "__TEXT", - "__text", + sect.sectName(), macho.S_REGULAR | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS, }; From 11524e4d0c1e924d49dccc03eb8b0beb71872792 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Fri, 12 Jan 2024 23:12:15 +0100 Subject: [PATCH 030/133] test/link/macho: migrate entry_in_dylib test to new test format --- test/link.zig | 4 -- test/link/macho.zig | 66 +++++++++++++++++++++- test/link/macho/entry_in_dylib/bootstrap.c | 5 -- test/link/macho/entry_in_dylib/build.zig | 59 ------------------- test/link/macho/entry_in_dylib/main.c | 6 -- 5 files changed, 63 insertions(+), 77 deletions(-) delete mode 100644 test/link/macho/entry_in_dylib/bootstrap.c delete mode 100644 test/link/macho/entry_in_dylib/build.zig delete mode 100644 test/link/macho/entry_in_dylib/main.c diff --git a/test/link.zig b/test/link.zig index 93ccb3c640..49d06b80c9 100644 --- a/test/link.zig +++ b/test/link.zig @@ -131,10 +131,6 @@ pub const cases = [_]Case{ .build_root = "test/link/macho/entry_in_archive", .import = @import("link/macho/entry_in_archive/build.zig"), }, - .{ - .build_root = "test/link/macho/entry_in_dylib", - .import = @import("link/macho/entry_in_dylib/build.zig"), - }, .{ .build_root = "test/link/macho/headerpad", .import = @import("link/macho/headerpad/build.zig"), diff --git a/test/link/macho.zig b/test/link/macho.zig index fc40ebec33..d52ffc98d1 100644 --- a/test/link/macho.zig +++ b/test/link/macho.zig @@ -4,13 +4,70 @@ pub fn testAll(b: *std.Build) *Step { const macho_step = b.step("test-macho", "Run MachO tests"); - macho_step.dependOn(testSectionBoundarySymbols(b, .{ - .target = b.resolveTargetQuery(.{ .os_tag = .macos }), - })); + const default_target = b.resolveTargetQuery(.{ + .os_tag = .macos, + }); + + macho_step.dependOn(testEntryPointDylib(b, .{ .target = default_target })); + macho_step.dependOn(testSectionBoundarySymbols(b, .{ .target = default_target })); return macho_step; } +fn testEntryPointDylib(b: *std.Build, opts: Options) *Step { + const test_step = addTestStep(b, "macho-entry-point-dylib", opts); + + const dylib = addSharedLibrary(b, opts, .{ .name = "liba.dylib" }); + addCSourceBytes(dylib, + \\extern int my_main(); + \\int bootstrap() { + \\ return my_main(); + \\} + , &.{}); + dylib.linker_allow_shlib_undefined = true; + + const exe = addExecutable(b, opts, .{ .name = "main" }); + addCSourceBytes(dylib, + \\#include + \\int my_main() { + \\ fprintf(stdout, "Hello!\n"); + \\ return 0; + \\} + , &.{}); + exe.linkLibrary(dylib); + exe.entry = .{ .symbol_name = "_bootstrap" }; + exe.forceUndefinedSymbol("_my_main"); + + const check = exe.checkObject(); + check.checkInHeaders(); + check.checkExact("segname __TEXT"); + check.checkExtract("vmaddr {text_vmaddr}"); + check.checkInHeaders(); + check.checkExact("sectname __stubs"); + check.checkExtract("addr {stubs_vmaddr}"); + check.checkInHeaders(); + check.checkExact("sectname __stubs"); + check.checkExtract("size {stubs_vmsize}"); + check.checkInHeaders(); + check.checkExact("cmd MAIN"); + check.checkExtract("entryoff {entryoff}"); + check.checkComputeCompare("text_vmaddr entryoff +", .{ + .op = .gte, + .value = .{ .variable = "stubs_vmaddr" }, // The entrypoint should be a synthetic stub + }); + check.checkComputeCompare("text_vmaddr entryoff + stubs_vmaddr -", .{ + .op = .lt, + .value = .{ .variable = "stubs_vmsize" }, // The entrypoint should be a synthetic stub + }); + test_step.dependOn(&check.step); + + const run = addRunArtifact(exe); + run.expectStdOutEqual("Hello!\n"); + test_step.dependOn(&run.step); + + return test_step; +} + fn testSectionBoundarySymbols(b: *std.Build, opts: Options) *Step { const test_step = addTestStep(b, "macho-section-boundary-symbols", opts); @@ -95,8 +152,11 @@ fn addTestStep(b: *std.Build, comptime prefix: []const u8, opts: Options) *Step return link.addTestStep(b, "macho-" ++ prefix, opts); } +const addCSourceBytes = link.addCSourceBytes; +const addRunArtifact = link.addRunArtifact; const addObject = link.addObject; const addExecutable = link.addExecutable; +const addSharedLibrary = link.addSharedLibrary; const expectLinkErrors = link.expectLinkErrors; const link = @import("link.zig"); const std = @import("std"); diff --git a/test/link/macho/entry_in_dylib/bootstrap.c b/test/link/macho/entry_in_dylib/bootstrap.c deleted file mode 100644 index 6e9a2b830c..0000000000 --- a/test/link/macho/entry_in_dylib/bootstrap.c +++ /dev/null @@ -1,5 +0,0 @@ -extern int my_main(); - -int bootstrap() { - return my_main(); -} diff --git a/test/link/macho/entry_in_dylib/build.zig b/test/link/macho/entry_in_dylib/build.zig deleted file mode 100644 index 7827552bcf..0000000000 --- a/test/link/macho/entry_in_dylib/build.zig +++ /dev/null @@ -1,59 +0,0 @@ -const std = @import("std"); - -pub const requires_symlinks = true; - -pub fn build(b: *std.Build) void { - const test_step = b.step("test", "Test it"); - b.default_step = test_step; - - add(b, test_step, .Debug); - add(b, test_step, .ReleaseFast); - add(b, test_step, .ReleaseSmall); - add(b, test_step, .ReleaseSafe); -} - -fn add(b: *std.Build, test_step: *std.Build.Step, optimize: std.builtin.OptimizeMode) void { - const lib = b.addSharedLibrary(.{ - .name = "bootstrap", - .optimize = optimize, - .target = b.resolveTargetQuery(.{ .os_tag = .macos }), - }); - lib.addCSourceFile(.{ .file = .{ .path = "bootstrap.c" }, .flags = &.{} }); - lib.linkLibC(); - lib.linker_allow_shlib_undefined = true; - - const exe = b.addExecutable(.{ - .name = "main", - .optimize = optimize, - .target = b.resolveTargetQuery(.{ .os_tag = .macos }), - }); - exe.addCSourceFile(.{ .file = .{ .path = "main.c" }, .flags = &.{} }); - exe.linkLibrary(lib); - exe.linkLibC(); - exe.entry = .{ .symbol_name = "_bootstrap" }; - exe.forceUndefinedSymbol("_my_main"); - - const check_exe = exe.checkObject(); - check_exe.checkInHeaders(); - check_exe.checkExact("segname __TEXT"); - check_exe.checkExtract("vmaddr {text_vmaddr}"); - - check_exe.checkInHeaders(); - check_exe.checkExact("sectname __stubs"); - check_exe.checkExtract("addr {stubs_vmaddr}"); - - check_exe.checkInHeaders(); - check_exe.checkExact("cmd MAIN"); - check_exe.checkExtract("entryoff {entryoff}"); - - check_exe.checkComputeCompare("text_vmaddr entryoff +", .{ - .op = .eq, - .value = .{ .variable = "stubs_vmaddr" }, // The entrypoint should be a synthetic stub - }); - test_step.dependOn(&check_exe.step); - - const run = b.addRunArtifact(exe); - run.skip_foreign_checks = true; - run.expectStdOutEqual("Hello!\n"); - test_step.dependOn(&run.step); -} diff --git a/test/link/macho/entry_in_dylib/main.c b/test/link/macho/entry_in_dylib/main.c deleted file mode 100644 index 26173b80ba..0000000000 --- a/test/link/macho/entry_in_dylib/main.c +++ /dev/null @@ -1,6 +0,0 @@ -#include - -int my_main() { - fprintf(stdout, "Hello!\n"); - return 0; -} From 56303d770e8330eb47c12a395ce45e3d448f892d Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 13 Jan 2024 12:39:24 +0100 Subject: [PATCH 031/133] macho: fix invalid generation of FDE records --- src/link/MachO/UnwindInfo.zig | 33 +++++++++++++++++++++------ src/link/MachO/eh_frame.zig | 4 ++-- test/link/macho/unwind_info/build.zig | 2 +- 3 files changed, 29 insertions(+), 10 deletions(-) diff --git a/src/link/MachO/UnwindInfo.zig b/src/link/MachO/UnwindInfo.zig index a993809fd1..ed70b1c083 100644 --- a/src/link/MachO/UnwindInfo.zig +++ b/src/link/MachO/UnwindInfo.zig @@ -65,6 +65,16 @@ pub fn generate(info: *UnwindInfo, macho_file: *MachO) !void { const rec = macho_file.getUnwindRecord(index); if (rec.getFde(macho_file)) |fde| { rec.enc.setDwarfSectionOffset(@intCast(fde.out_offset)); + if (fde.getLsdaAtom(macho_file)) |lsda| { + rec.lsda = lsda.atom_index; + rec.lsda_offset = fde.lsda_offset; + rec.enc.setHasLsda(true); + } + const cie = fde.getCie(macho_file); + if (cie.getPersonality(macho_file)) |_| { + const personality_index = try info.getOrPutPersonalityFunction(cie.personality.?.index); // TODO handle error + rec.enc.setPersonalityIndex(personality_index + 1); + } } else if (rec.getPersonality(macho_file)) |_| { const personality_index = try info.getOrPutPersonalityFunction(rec.personality.?); // TODO handle error rec.enc.setPersonalityIndex(personality_index + 1); @@ -232,11 +242,13 @@ pub fn generate(info: *UnwindInfo, macho_file: *MachO) !void { } // Save records having an LSDA pointer + log.debug("LSDA pointers:", .{}); try info.lsdas_lookup.ensureTotalCapacityPrecise(gpa, info.records.items.len); for (info.records.items, 0..) |index, i| { const rec = macho_file.getUnwindRecord(index); info.lsdas_lookup.appendAssumeCapacity(@intCast(info.lsdas.items.len)); - if (rec.getLsdaAtom(macho_file)) |_| { + if (rec.getLsdaAtom(macho_file)) |lsda| { + log.debug(" @{x} => lsda({d})", .{ rec.getAtomAddress(macho_file), lsda.atom_index }); try info.lsdas.append(gpa, @intCast(i)); } } @@ -367,7 +379,8 @@ pub const Encoding = extern struct { pub fn getMode(enc: Encoding) u4 { comptime assert(macho.UNWIND_ARM64_MODE_MASK == macho.UNWIND_X86_64_MODE_MASK); - return @as(u4, @truncate((enc.enc & macho.UNWIND_ARM64_MODE_MASK) >> 24)); + const shift = comptime @ctz(macho.UNWIND_ARM64_MODE_MASK); + return @as(u4, @truncate((enc.enc & macho.UNWIND_ARM64_MODE_MASK) >> shift)); } pub fn isDwarf(enc: Encoding, macho_file: *MachO) bool { @@ -380,26 +393,32 @@ pub const Encoding = extern struct { } pub fn setMode(enc: *Encoding, mode: anytype) void { - enc.enc |= @as(u32, @intCast(@intFromEnum(mode))) << 24; + comptime assert(macho.UNWIND_ARM64_MODE_MASK == macho.UNWIND_X86_64_MODE_MASK); + const shift = comptime @ctz(macho.UNWIND_ARM64_MODE_MASK); + enc.enc |= @as(u32, @intCast(@intFromEnum(mode))) << shift; } pub fn hasLsda(enc: Encoding) bool { - const has_lsda = @as(u1, @truncate((enc.enc & macho.UNWIND_HAS_LSDA) >> 31)); + const shift = comptime @ctz(macho.UNWIND_HAS_LSDA); + const has_lsda = @as(u1, @truncate((enc.enc & macho.UNWIND_HAS_LSDA) >> shift)); return has_lsda == 1; } pub fn setHasLsda(enc: *Encoding, has_lsda: bool) void { - const mask = @as(u32, @intCast(@intFromBool(has_lsda))) << 31; + const shift = comptime @ctz(macho.UNWIND_HAS_LSDA); + const mask = @as(u32, @intCast(@intFromBool(has_lsda))) << shift; enc.enc |= mask; } pub fn getPersonalityIndex(enc: Encoding) u2 { - const index = @as(u2, @truncate((enc.enc & macho.UNWIND_PERSONALITY_MASK) >> 28)); + const shift = comptime @ctz(macho.UNWIND_PERSONALITY_MASK); + const index = @as(u2, @truncate((enc.enc & macho.UNWIND_PERSONALITY_MASK) >> shift)); return index; } pub fn setPersonalityIndex(enc: *Encoding, index: u2) void { - const mask = @as(u32, @intCast(index)) << 28; + const shift = comptime @ctz(macho.UNWIND_PERSONALITY_MASK); + const mask = @as(u32, @intCast(index)) << shift; enc.enc |= mask; } diff --git a/src/link/MachO/eh_frame.zig b/src/link/MachO/eh_frame.zig index 56d81ba93a..24b3d751a4 100644 --- a/src/link/MachO/eh_frame.zig +++ b/src/link/MachO/eh_frame.zig @@ -426,9 +426,9 @@ pub fn write(macho_file: *MachO, buffer: []u8) void { } if (fde.getLsdaAtom(macho_file)) |atom| { - const offset = fde.out_offset + fde.lsda_offset; + const offset = fde.out_offset + fde.lsda_ptr_offset; const saddr = sect.addr + offset; - const taddr = atom.value; + const taddr = atom.value + fde.lsda_offset; switch (fde.getCie(macho_file).lsda_size.?) { .p32 => std.mem.writeInt( i32, diff --git a/test/link/macho/unwind_info/build.zig b/test/link/macho/unwind_info/build.zig index 534cc4e51a..33af6016f9 100644 --- a/test/link/macho/unwind_info/build.zig +++ b/test/link/macho/unwind_info/build.zig @@ -46,7 +46,7 @@ fn testUnwindInfo( } check.checkInSymtab(); - check.checkContains("(__TEXT,__text) private external ___gxx_personality_v0"); + check.checkContains("(was private external) ___gxx_personality_v0"); test_step.dependOn(&check.step); const run = b.addRunArtifact(exe); From aa50bca1516f6fc2c9d6c7ca66ef8585b4b5e197 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 13 Jan 2024 12:49:54 +0100 Subject: [PATCH 032/133] test/link/elf: make invalid input file test less janky --- test/link/elf.zig | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/test/link/elf.zig b/test/link/elf.zig index 0d76a5b64b..1f5c1978ce 100644 --- a/test/link/elf.zig +++ b/test/link/elf.zig @@ -3609,12 +3609,17 @@ fn testUnknownFileTypeError(b: *Build, opts: Options) *Step { exe.linkLibrary(dylib); exe.linkLibC(); - expectLinkErrors(exe, test_step, .{ .exact = &.{ - "invalid token in LD script: '\\x00\\x00\\x00\\x0c\\x00\\x00\\x00/usr/lib/dyld\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x0d' (0:829)", - "note: while parsing /?/liba.dylib", - "unexpected error: parsing input file failed with error InvalidLdScript", - "note: while parsing /?/liba.dylib", - } }); + // TODO: improve the test harness to be able to selectively match lines in error output + // while avoiding jankiness + // expectLinkErrors(exe, test_step, .{ .exact = &.{ + // "error: invalid token in LD script: '\\x00\\x00\\x00\\x0c\\x00\\x00\\x00/usr/lib/dyld\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x0d' (0:989)", + // "note: while parsing /?/liba.dylib", + // "error: unexpected error: parsing input file failed with error InvalidLdScript", + // "note: while parsing /?/liba.dylib", + // } }); + expectLinkErrors(exe, test_step, .{ + .contains = "error: unexpected error: parsing input file failed with error InvalidLdScript", + }); return test_step; } From 6cdcf61a5ce193104c3c0f14189014f65bcd104d Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 13 Jan 2024 18:09:32 +0100 Subject: [PATCH 033/133] test/link/macho: upgrade and migrate dead_strip test --- test/link.zig | 4 -- test/link/macho.zig | 82 ++++++++++++++++++++++++++++ test/link/macho/dead_strip/build.zig | 58 -------------------- test/link/macho/dead_strip/main.c | 14 ----- 4 files changed, 82 insertions(+), 76 deletions(-) delete mode 100644 test/link/macho/dead_strip/build.zig delete mode 100644 test/link/macho/dead_strip/main.c diff --git a/test/link.zig b/test/link.zig index 49d06b80c9..a8cdfe19ad 100644 --- a/test/link.zig +++ b/test/link.zig @@ -107,10 +107,6 @@ pub const cases = [_]Case{ .build_root = "test/link/macho/bugs/16628", .import = @import("link/macho/bugs/16628/build.zig"), }, - .{ - .build_root = "test/link/macho/dead_strip", - .import = @import("link/macho/dead_strip/build.zig"), - }, .{ .build_root = "test/link/macho/dead_strip_dylibs", .import = @import("link/macho/dead_strip_dylibs/build.zig"), diff --git a/test/link/macho.zig b/test/link/macho.zig index d52ffc98d1..77e4531a96 100644 --- a/test/link/macho.zig +++ b/test/link/macho.zig @@ -8,12 +8,94 @@ pub fn testAll(b: *std.Build) *Step { .os_tag = .macos, }); + macho_step.dependOn(testDeadStrip(b, .{ .target = default_target })); macho_step.dependOn(testEntryPointDylib(b, .{ .target = default_target })); macho_step.dependOn(testSectionBoundarySymbols(b, .{ .target = default_target })); return macho_step; } +fn testDeadStrip(b: *std.Build, opts: Options) *Step { + const test_step = addTestStep(b, "macho-dead-strip", opts); + + const obj = addObject(b, opts, .{ .name = "a", .cpp_source_bytes = + \\#include + \\int two() { return 2; } + \\int live_var1 = 1; + \\int live_var2 = two(); + \\int dead_var1 = 3; + \\int dead_var2 = 4; + \\void live_fn1() {} + \\void live_fn2() { live_fn1(); } + \\void dead_fn1() {} + \\void dead_fn2() { dead_fn1(); } + \\int main() { + \\ printf("%d %d\n", live_var1, live_var2); + \\ live_fn2(); + \\} + }); + + { + const exe = addExecutable(b, opts, .{ .name = "no_dead_strip" }); + exe.addObject(obj); + exe.link_gc_sections = false; + + const check = exe.checkObject(); + check.checkInSymtab(); + check.checkContains("live_var1"); + check.checkInSymtab(); + check.checkContains("live_var2"); + check.checkInSymtab(); + check.checkContains("dead_var1"); + check.checkInSymtab(); + check.checkContains("dead_var2"); + check.checkInSymtab(); + check.checkContains("live_fn1"); + check.checkInSymtab(); + check.checkContains("live_fn2"); + check.checkInSymtab(); + check.checkContains("dead_fn1"); + check.checkInSymtab(); + check.checkContains("dead_fn2"); + test_step.dependOn(&check.step); + + const run = addRunArtifact(exe); + run.expectStdOutEqual("1 2\n"); + test_step.dependOn(&run.step); + } + + { + const exe = addExecutable(b, opts, .{ .name = "yes_dead_strip" }); + exe.addObject(obj); + exe.link_gc_sections = true; + + const check = exe.checkObject(); + check.checkInSymtab(); + check.checkContains("live_var1"); + check.checkInSymtab(); + check.checkContains("live_var2"); + check.checkInSymtab(); + check.checkNotPresent("dead_var1"); + check.checkInSymtab(); + check.checkNotPresent("dead_var2"); + check.checkInSymtab(); + check.checkContains("live_fn1"); + check.checkInSymtab(); + check.checkContains("live_fn2"); + check.checkInSymtab(); + check.checkNotPresent("dead_fn1"); + check.checkInSymtab(); + check.checkNotPresent("dead_fn2"); + test_step.dependOn(&check.step); + + const run = addRunArtifact(exe); + run.expectStdOutEqual("1 2\n"); + test_step.dependOn(&run.step); + } + + return test_step; +} + fn testEntryPointDylib(b: *std.Build, opts: Options) *Step { const test_step = addTestStep(b, "macho-entry-point-dylib", opts); diff --git a/test/link/macho/dead_strip/build.zig b/test/link/macho/dead_strip/build.zig deleted file mode 100644 index a5bb28df9f..0000000000 --- a/test/link/macho/dead_strip/build.zig +++ /dev/null @@ -1,58 +0,0 @@ -const std = @import("std"); - -pub const requires_symlinks = true; - -pub fn build(b: *std.Build) void { - const optimize: std.builtin.OptimizeMode = .Debug; - const target = b.resolveTargetQuery(.{ .os_tag = .macos }); - - const test_step = b.step("test", "Test the program"); - b.default_step = test_step; - - { - // Without -dead_strip, we expect `iAmUnused` symbol present - const exe = createScenario(b, optimize, target, "no-gc"); - - const check = exe.checkObject(); - check.checkInSymtab(); - check.checkContains("(__TEXT,__text) external _iAmUnused"); - test_step.dependOn(&check.step); - - const run = b.addRunArtifact(exe); - run.skip_foreign_checks = true; - run.expectStdOutEqual("Hello!\n"); - test_step.dependOn(&run.step); - } - - { - // With -dead_strip, no `iAmUnused` symbol should be present - const exe = createScenario(b, optimize, target, "yes-gc"); - exe.link_gc_sections = true; - - const check = exe.checkObject(); - check.checkInSymtab(); - check.checkNotPresent("(__TEXT,__text) external _iAmUnused"); - test_step.dependOn(&check.step); - - const run = b.addRunArtifact(exe); - run.skip_foreign_checks = true; - run.expectStdOutEqual("Hello!\n"); - test_step.dependOn(&run.step); - } -} - -fn createScenario( - b: *std.Build, - optimize: std.builtin.OptimizeMode, - target: std.Build.ResolvedTarget, - name: []const u8, -) *std.Build.Step.Compile { - const exe = b.addExecutable(.{ - .name = name, - .optimize = optimize, - .target = target, - }); - exe.addCSourceFile(.{ .file = .{ .path = "main.c" }, .flags = &[0][]const u8{} }); - exe.linkLibC(); - return exe; -} diff --git a/test/link/macho/dead_strip/main.c b/test/link/macho/dead_strip/main.c deleted file mode 100644 index 4756e2ca13..0000000000 --- a/test/link/macho/dead_strip/main.c +++ /dev/null @@ -1,14 +0,0 @@ -#include - -void printMe() { - printf("Hello!\n"); -} - -int main(int argc, char* argv[]) { - printMe(); - return 0; -} - -void iAmUnused() { - printf("YOU SHALL NOT PASS!\n"); -} From ffd7f7f6427139b1391d3f2c3d8a02c552ecebf8 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 13 Jan 2024 18:10:34 +0100 Subject: [PATCH 034/133] test/link/macho: fix naming in entry-in-dylib test --- test/link/macho.zig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/link/macho.zig b/test/link/macho.zig index 77e4531a96..fcd4100514 100644 --- a/test/link/macho.zig +++ b/test/link/macho.zig @@ -99,7 +99,7 @@ fn testDeadStrip(b: *std.Build, opts: Options) *Step { fn testEntryPointDylib(b: *std.Build, opts: Options) *Step { const test_step = addTestStep(b, "macho-entry-point-dylib", opts); - const dylib = addSharedLibrary(b, opts, .{ .name = "liba.dylib" }); + const dylib = addSharedLibrary(b, opts, .{ .name = "a" }); addCSourceBytes(dylib, \\extern int my_main(); \\int bootstrap() { From 041f7d69f0656254cfa62a59af2fa65ac1b1c433 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 13 Jan 2024 18:18:17 +0100 Subject: [PATCH 035/133] test/link/macho: test segment boundary symbols --- test/link/macho.zig | 69 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) diff --git a/test/link/macho.zig b/test/link/macho.zig index fcd4100514..cc5bc7878e 100644 --- a/test/link/macho.zig +++ b/test/link/macho.zig @@ -11,6 +11,7 @@ pub fn testAll(b: *std.Build) *Step { macho_step.dependOn(testDeadStrip(b, .{ .target = default_target })); macho_step.dependOn(testEntryPointDylib(b, .{ .target = default_target })); macho_step.dependOn(testSectionBoundarySymbols(b, .{ .target = default_target })); + macho_step.dependOn(testSegmentBoundarySymbols(b, .{ .target = default_target })); return macho_step; } @@ -230,6 +231,74 @@ fn testSectionBoundarySymbols(b: *std.Build, opts: Options) *Step { return test_step; } +fn testSegmentBoundarySymbols(b: *std.Build, opts: Options) *Step { + const test_step = addTestStep(b, "macho-segment-boundary-symbols", opts); + + const obj1 = addObject(b, opts, .{ .name = "a", .cpp_source_bytes = + \\constexpr const char* MESSAGE __attribute__((used, section("__DATA_CONST_1,__message_ptr"))) = "codebase"; + }); + + const main_o = addObject(b, opts, .{ .name = "main", .c_source_bytes = + \\#include + \\const char* interop(); + \\int main() { + \\ printf("All your %s are belong to us.\n", interop()); + \\ return 0; + \\} + }); + + { + const obj2 = addObject(b, opts, .{ .name = "b", .cpp_source_bytes = + \\extern const char* message_pointer __asm("segment$start$__DATA_CONST_1"); + \\extern "C" const char* interop() { + \\ return message_pointer; + \\} + }); + + const exe = addExecutable(b, opts, .{ .name = "main" }); + exe.addObject(obj1); + exe.addObject(obj2); + exe.addObject(main_o); + + const run = addRunArtifact(exe); + run.expectStdOutEqual("All your codebase are belong to us.\n"); + test_step.dependOn(&run.step); + + const check = exe.checkObject(); + check.checkInSymtab(); + check.checkNotPresent("external segment$start$__DATA_CONST_1"); + test_step.dependOn(&check.step); + } + + { + const obj2 = addObject(b, opts, .{ .name = "c", .cpp_source_bytes = + \\extern const char* message_pointer __asm("segment$start$__DATA_1"); + \\extern "C" const char* interop() { + \\ return message_pointer; + \\} + }); + + const exe = addExecutable(b, opts, .{ .name = "main2" }); + exe.addObject(obj1); + exe.addObject(obj2); + exe.addObject(main_o); + + const check = exe.checkObject(); + check.checkInHeaders(); + check.checkExact("cmd SEGMENT_64"); + check.checkExact("segname __DATA_1"); + check.checkExtract("vmsize {vmsize}"); + check.checkExtract("filesz {filesz}"); + check.checkComputeCompare("vmsize", .{ .op = .eq, .value = .{ .literal = 0 } }); + check.checkComputeCompare("filesz", .{ .op = .eq, .value = .{ .literal = 0 } }); + check.checkInSymtab(); + check.checkNotPresent("external segment$start$__DATA_1"); + test_step.dependOn(&check.step); + } + + return test_step; +} + fn addTestStep(b: *std.Build, comptime prefix: []const u8, opts: Options) *Step { return link.addTestStep(b, "macho-" ++ prefix, opts); } From d93a0763d4c732fcdb3ed891d5b44a10943ccfed Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 13 Jan 2024 18:49:27 +0100 Subject: [PATCH 036/133] test/link/link: pass build options to elf and macho tests --- test/link.zig | 4 -- test/link/elf.zig | 4 +- test/link/link.zig | 20 +++++++++- test/link/macho.zig | 28 +++++++++++++- test/link/macho/needed_framework/build.zig | 37 ------------------- test/link/macho/needed_framework/main.c | 3 -- test/tests.zig | 43 ++++++++++++++-------- 7 files changed, 76 insertions(+), 63 deletions(-) delete mode 100644 test/link/macho/needed_framework/build.zig delete mode 100644 test/link/macho/needed_framework/main.c diff --git a/test/link.zig b/test/link.zig index a8cdfe19ad..259166f0e0 100644 --- a/test/link.zig +++ b/test/link.zig @@ -135,10 +135,6 @@ pub const cases = [_]Case{ .build_root = "test/link/macho/linksection", .import = @import("link/macho/linksection/build.zig"), }, - .{ - .build_root = "test/link/macho/needed_framework", - .import = @import("link/macho/needed_framework/build.zig"), - }, .{ .build_root = "test/link/macho/needed_library", .import = @import("link/macho/needed_library/build.zig"), diff --git a/test/link/elf.zig b/test/link/elf.zig index 1f5c1978ce..8c02128521 100644 --- a/test/link/elf.zig +++ b/test/link/elf.zig @@ -2,7 +2,8 @@ //! Currently, we support linking x86_64 Linux, but in the future we //! will progressively relax those to exercise more combinations. -pub fn testAll(b: *Build) *Step { +pub fn testAll(b: *Build, build_opts: BuildOptions) *Step { + _ = build_opts; const elf_step = b.step("test-elf", "Run ELF tests"); const default_target = b.resolveTargetQuery(.{ @@ -3901,6 +3902,7 @@ const link = @import("link.zig"); const std = @import("std"); const Build = std.Build; +const BuildOptions = link.BuildOptions; const Options = link.Options; const Step = Build.Step; const WriteFile = Step.WriteFile; diff --git a/test/link/link.zig b/test/link/link.zig index ccce03bb0b..c17f8b0b5a 100644 --- a/test/link/link.zig +++ b/test/link/link.zig @@ -2,10 +2,26 @@ pub fn build(b: *Build) void { const test_step = b.step("test-link", "Run link tests"); b.default_step = test_step; - test_step.dependOn(@import("elf.zig").testAll(b)); - test_step.dependOn(@import("macho.zig").testAll(b)); + const has_macos_sdk = b.option(bool, "has_macos_sdk", "whether the host provides a macOS SDK in system path"); + const has_ios_sdk = b.option(bool, "has_ios_sdk", "whether the host provides a iOS SDK in system path"); + const has_symlinks_windows = b.option(bool, "has_symlinks_windows", "whether the host is windows and has symlinks enabled"); + + const build_opts: BuildOptions = .{ + .has_macos_sdk = has_macos_sdk orelse false, + .has_ios_sdk = has_ios_sdk orelse false, + .has_symlinks_windows = has_symlinks_windows orelse false, + }; + + test_step.dependOn(@import("elf.zig").testAll(b, build_opts)); + test_step.dependOn(@import("macho.zig").testAll(b, build_opts)); } +pub const BuildOptions = struct { + has_macos_sdk: bool, + has_ios_sdk: bool, + has_symlinks_windows: bool, +}; + pub const Options = struct { target: std.Build.ResolvedTarget, optimize: std.builtin.OptimizeMode = .Debug, diff --git a/test/link/macho.zig b/test/link/macho.zig index cc5bc7878e..b9e5ee4349 100644 --- a/test/link/macho.zig +++ b/test/link/macho.zig @@ -1,7 +1,7 @@ //! Here we test our MachO linker for correctness and functionality. //! TODO migrate standalone tests from test/link/macho/* to here. -pub fn testAll(b: *std.Build) *Step { +pub fn testAll(b: *std.Build, build_opts: BuildOptions) *Step { const macho_step = b.step("test-macho", "Run MachO tests"); const default_target = b.resolveTargetQuery(.{ @@ -13,6 +13,11 @@ pub fn testAll(b: *std.Build) *Step { macho_step.dependOn(testSectionBoundarySymbols(b, .{ .target = default_target })); macho_step.dependOn(testSegmentBoundarySymbols(b, .{ .target = default_target })); + // Tests requiring presence of macOS SDK in system path + if (build_opts.has_macos_sdk and build_opts.has_symlinks_windows) { + macho_step.dependOn(testNeededFramework(b, .{ .target = b.host })); + } + return macho_step; } @@ -151,6 +156,26 @@ fn testEntryPointDylib(b: *std.Build, opts: Options) *Step { return test_step; } +fn testNeededFramework(b: *std.Build, opts: Options) *Step { + const test_step = addTestStep(b, "macho-needed-framework", opts); + + const exe = addExecutable(b, opts, .{ .name = "main", .c_source_bytes = "int main() { return 0; }" }); + exe.linkFrameworkNeeded("Cocoa"); + exe.dead_strip_dylibs = true; + + const check = exe.checkObject(); + check.checkInHeaders(); + check.checkExact("cmd LOAD_DYLIB"); + check.checkContains("Cocoa"); + test_step.dependOn(&check.step); + + const run = addRunArtifact(exe); + run.expectExitCode(0); + test_step.dependOn(&run.step); + + return test_step; +} + fn testSectionBoundarySymbols(b: *std.Build, opts: Options) *Step { const test_step = addTestStep(b, "macho-section-boundary-symbols", opts); @@ -311,5 +336,6 @@ const addSharedLibrary = link.addSharedLibrary; const expectLinkErrors = link.expectLinkErrors; const link = @import("link.zig"); const std = @import("std"); +const BuildOptions = link.BuildOptions; const Options = link.Options; const Step = std.Build.Step; diff --git a/test/link/macho/needed_framework/build.zig b/test/link/macho/needed_framework/build.zig deleted file mode 100644 index 83a3e75e2d..0000000000 --- a/test/link/macho/needed_framework/build.zig +++ /dev/null @@ -1,37 +0,0 @@ -const std = @import("std"); - -pub const requires_symlinks = true; -pub const requires_macos_sdk = true; - -pub fn build(b: *std.Build) void { - const test_step = b.step("test", "Test it"); - b.default_step = test_step; - - add(b, test_step, .Debug); - add(b, test_step, .ReleaseFast); - add(b, test_step, .ReleaseSmall); - add(b, test_step, .ReleaseSafe); -} - -fn add(b: *std.Build, test_step: *std.Build.Step, optimize: std.builtin.OptimizeMode) void { - // -dead_strip_dylibs - // -needed_framework Cocoa - const exe = b.addExecutable(.{ - .name = "test", - .optimize = optimize, - .target = b.host, - }); - exe.addCSourceFile(.{ .file = .{ .path = "main.c" }, .flags = &[0][]const u8{} }); - exe.linkLibC(); - exe.linkFrameworkNeeded("Cocoa"); - exe.dead_strip_dylibs = true; - - const check = exe.checkObject(); - check.checkInHeaders(); - check.checkExact("cmd LOAD_DYLIB"); - check.checkContains("Cocoa"); - test_step.dependOn(&check.step); - - const run_cmd = b.addRunArtifact(exe); - test_step.dependOn(&run_cmd.step); -} diff --git a/test/link/macho/needed_framework/main.c b/test/link/macho/needed_framework/main.c deleted file mode 100644 index ca68d24cc7..0000000000 --- a/test/link/macho/needed_framework/main.c +++ /dev/null @@ -1,3 +0,0 @@ -int main(int argc, char* argv[]) { - return 0; -} diff --git a/test/tests.zig b/test/tests.zig index b2fb1e4bca..9dcc77b141 100644 --- a/test/tests.zig +++ b/test/tests.zig @@ -750,26 +750,39 @@ pub fn addLinkTests( const omit_symlinks = builtin.os.tag == .windows and !enable_symlinks_windows; inline for (link.cases) |case| { - const requires_stage2 = @hasDecl(case.import, "requires_stage2") and - case.import.requires_stage2; - const requires_symlinks = @hasDecl(case.import, "requires_symlinks") and - case.import.requires_symlinks; - const requires_macos_sdk = @hasDecl(case.import, "requires_macos_sdk") and - case.import.requires_macos_sdk; - const requires_ios_sdk = @hasDecl(case.import, "requires_ios_sdk") and - case.import.requires_ios_sdk; - const bad = - (requires_stage2 and omit_stage2) or - (requires_symlinks and omit_symlinks) or - (requires_macos_sdk and !enable_macos_sdk) or - (requires_ios_sdk and !enable_ios_sdk); - if (!bad) { - const dep = b.anonymousDependency(case.build_root, case.import, .{}); + if (mem.eql(u8, @typeName(case.import), "test.link.link")) { + const dep = b.anonymousDependency(case.build_root, case.import, .{ + .has_macos_sdk = enable_macos_sdk, + .has_ios_sdk = enable_ios_sdk, + .has_symlinks_windows = !omit_symlinks, + }); const dep_step = dep.builder.default_step; assert(mem.startsWith(u8, dep.builder.dep_prefix, "test.")); const dep_prefix_adjusted = dep.builder.dep_prefix["test.".len..]; dep_step.name = b.fmt("{s}{s}", .{ dep_prefix_adjusted, dep_step.name }); step.dependOn(dep_step); + } else { + const requires_stage2 = @hasDecl(case.import, "requires_stage2") and + case.import.requires_stage2; + const requires_symlinks = @hasDecl(case.import, "requires_symlinks") and + case.import.requires_symlinks; + const requires_macos_sdk = @hasDecl(case.import, "requires_macos_sdk") and + case.import.requires_macos_sdk; + const requires_ios_sdk = @hasDecl(case.import, "requires_ios_sdk") and + case.import.requires_ios_sdk; + const bad = + (requires_stage2 and omit_stage2) or + (requires_symlinks and omit_symlinks) or + (requires_macos_sdk and !enable_macos_sdk) or + (requires_ios_sdk and !enable_ios_sdk); + if (!bad) { + const dep = b.anonymousDependency(case.build_root, case.import, .{}); + const dep_step = dep.builder.default_step; + assert(mem.startsWith(u8, dep.builder.dep_prefix, "test.")); + const dep_prefix_adjusted = dep.builder.dep_prefix["test.".len..]; + dep_step.name = b.fmt("{s}{s}", .{ dep_prefix_adjusted, dep_step.name }); + step.dependOn(dep_step); + } } } From 49c11e0c3404b28af8b98994bad15db138272fa7 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 13 Jan 2024 19:02:29 +0100 Subject: [PATCH 037/133] test/link/macho: upgrade and migrate needed_library test --- test/link.zig | 4 -- test/link/macho.zig | 55 ++++++++++++++++++------ test/link/macho/needed_library/a.c | 1 - test/link/macho/needed_library/build.zig | 51 ---------------------- test/link/macho/needed_library/main.c | 3 -- 5 files changed, 43 insertions(+), 71 deletions(-) delete mode 100644 test/link/macho/needed_library/a.c delete mode 100644 test/link/macho/needed_library/build.zig delete mode 100644 test/link/macho/needed_library/main.c diff --git a/test/link.zig b/test/link.zig index 259166f0e0..d934e54a82 100644 --- a/test/link.zig +++ b/test/link.zig @@ -135,10 +135,6 @@ pub const cases = [_]Case{ .build_root = "test/link/macho/linksection", .import = @import("link/macho/linksection/build.zig"), }, - .{ - .build_root = "test/link/macho/needed_library", - .import = @import("link/macho/needed_library/build.zig"), - }, .{ .build_root = "test/link/macho/objc", .import = @import("link/macho/objc/build.zig"), diff --git a/test/link/macho.zig b/test/link/macho.zig index b9e5ee4349..f3b0b7bcee 100644 --- a/test/link/macho.zig +++ b/test/link/macho.zig @@ -1,7 +1,7 @@ //! Here we test our MachO linker for correctness and functionality. //! TODO migrate standalone tests from test/link/macho/* to here. -pub fn testAll(b: *std.Build, build_opts: BuildOptions) *Step { +pub fn testAll(b: *Build, build_opts: BuildOptions) *Step { const macho_step = b.step("test-macho", "Run MachO tests"); const default_target = b.resolveTargetQuery(.{ @@ -13,15 +13,20 @@ pub fn testAll(b: *std.Build, build_opts: BuildOptions) *Step { macho_step.dependOn(testSectionBoundarySymbols(b, .{ .target = default_target })); macho_step.dependOn(testSegmentBoundarySymbols(b, .{ .target = default_target })); - // Tests requiring presence of macOS SDK in system path - if (build_opts.has_macos_sdk and build_opts.has_symlinks_windows) { - macho_step.dependOn(testNeededFramework(b, .{ .target = b.host })); + // Tests requiring symlinks when tested on Windows + if (build_opts.has_symlinks_windows) { + macho_step.dependOn(testNeededLibrary(b, .{ .target = default_target })); + + // Tests requiring presence of macOS SDK in system path + if (build_opts.has_macos_sdk) { + macho_step.dependOn(testNeededFramework(b, .{ .target = b.host })); + } } return macho_step; } -fn testDeadStrip(b: *std.Build, opts: Options) *Step { +fn testDeadStrip(b: *Build, opts: Options) *Step { const test_step = addTestStep(b, "macho-dead-strip", opts); const obj = addObject(b, opts, .{ .name = "a", .cpp_source_bytes = @@ -102,7 +107,7 @@ fn testDeadStrip(b: *std.Build, opts: Options) *Step { return test_step; } -fn testEntryPointDylib(b: *std.Build, opts: Options) *Step { +fn testEntryPointDylib(b: *Build, opts: Options) *Step { const test_step = addTestStep(b, "macho-entry-point-dylib", opts); const dylib = addSharedLibrary(b, opts, .{ .name = "a" }); @@ -156,11 +161,11 @@ fn testEntryPointDylib(b: *std.Build, opts: Options) *Step { return test_step; } -fn testNeededFramework(b: *std.Build, opts: Options) *Step { +fn testNeededFramework(b: *Build, opts: Options) *Step { const test_step = addTestStep(b, "macho-needed-framework", opts); const exe = addExecutable(b, opts, .{ .name = "main", .c_source_bytes = "int main() { return 0; }" }); - exe.linkFrameworkNeeded("Cocoa"); + exe.root_module.linkFramework("Cocoa", .{ .needed = true }); exe.dead_strip_dylibs = true; const check = exe.checkObject(); @@ -176,7 +181,31 @@ fn testNeededFramework(b: *std.Build, opts: Options) *Step { return test_step; } -fn testSectionBoundarySymbols(b: *std.Build, opts: Options) *Step { +fn testNeededLibrary(b: *Build, opts: Options) *Step { + const test_step = addTestStep(b, "macho-needed-library", opts); + + const dylib = addSharedLibrary(b, opts, .{ .name = "a", .c_source_bytes = "int a = 42;" }); + + const exe = addExecutable(b, opts, .{ .name = "main", .c_source_bytes = "int main() { return 0; }" }); + exe.root_module.linkSystemLibrary("a", .{ .needed = true }); + exe.addLibraryPath(dylib.getEmittedBinDirectory()); + exe.addRPath(dylib.getEmittedBinDirectory()); + exe.dead_strip_dylibs = true; + + const check = exe.checkObject(); + check.checkInHeaders(); + check.checkExact("cmd LOAD_DYLIB"); + check.checkContains("liba.dylib"); + test_step.dependOn(&check.step); + + const run = addRunArtifact(exe); + run.expectExitCode(0); + test_step.dependOn(&run.step); + + return test_step; +} + +fn testSectionBoundarySymbols(b: *Build, opts: Options) *Step { const test_step = addTestStep(b, "macho-section-boundary-symbols", opts); const obj1 = addObject(b, opts, .{ @@ -256,7 +285,7 @@ fn testSectionBoundarySymbols(b: *std.Build, opts: Options) *Step { return test_step; } -fn testSegmentBoundarySymbols(b: *std.Build, opts: Options) *Step { +fn testSegmentBoundarySymbols(b: *Build, opts: Options) *Step { const test_step = addTestStep(b, "macho-segment-boundary-symbols", opts); const obj1 = addObject(b, opts, .{ .name = "a", .cpp_source_bytes = @@ -324,7 +353,7 @@ fn testSegmentBoundarySymbols(b: *std.Build, opts: Options) *Step { return test_step; } -fn addTestStep(b: *std.Build, comptime prefix: []const u8, opts: Options) *Step { +fn addTestStep(b: *Build, comptime prefix: []const u8, opts: Options) *Step { return link.addTestStep(b, "macho-" ++ prefix, opts); } @@ -336,6 +365,8 @@ const addSharedLibrary = link.addSharedLibrary; const expectLinkErrors = link.expectLinkErrors; const link = @import("link.zig"); const std = @import("std"); + +const Build = std.Build; const BuildOptions = link.BuildOptions; const Options = link.Options; -const Step = std.Build.Step; +const Step = Build.Step; diff --git a/test/link/macho/needed_library/a.c b/test/link/macho/needed_library/a.c deleted file mode 100644 index 4bcf8c9786..0000000000 --- a/test/link/macho/needed_library/a.c +++ /dev/null @@ -1 +0,0 @@ -int a = 42; diff --git a/test/link/macho/needed_library/build.zig b/test/link/macho/needed_library/build.zig deleted file mode 100644 index a07493a8b1..0000000000 --- a/test/link/macho/needed_library/build.zig +++ /dev/null @@ -1,51 +0,0 @@ -const std = @import("std"); - -pub const requires_symlinks = true; - -pub fn build(b: *std.Build) void { - const test_step = b.step("test", "Test it"); - b.default_step = test_step; - - add(b, test_step, .Debug); - add(b, test_step, .ReleaseFast); - add(b, test_step, .ReleaseSmall); - add(b, test_step, .ReleaseSafe); -} - -fn add(b: *std.Build, test_step: *std.Build.Step, optimize: std.builtin.OptimizeMode) void { - const target = b.resolveTargetQuery(.{ .os_tag = .macos }); - - const dylib = b.addSharedLibrary(.{ - .name = "a", - .version = .{ .major = 1, .minor = 0, .patch = 0 }, - .optimize = optimize, - .target = target, - }); - dylib.addCSourceFile(.{ .file = .{ .path = "a.c" }, .flags = &.{} }); - dylib.linkLibC(); - - // -dead_strip_dylibs - // -needed-la - const exe = b.addExecutable(.{ - .name = "test", - .optimize = optimize, - .target = target, - }); - exe.addCSourceFile(.{ .file = .{ .path = "main.c" }, .flags = &[0][]const u8{} }); - exe.linkLibC(); - exe.root_module.linkSystemLibrary("a", .{ .needed = true }); - exe.addLibraryPath(dylib.getEmittedBinDirectory()); - exe.addRPath(dylib.getEmittedBinDirectory()); - exe.dead_strip_dylibs = true; - - const check = exe.checkObject(); - check.checkInHeaders(); - check.checkExact("cmd LOAD_DYLIB"); - check.checkExact("name @rpath/liba.dylib"); - test_step.dependOn(&check.step); - - const run = b.addRunArtifact(exe); - run.skip_foreign_checks = true; - run.expectStdOutEqual(""); - test_step.dependOn(&run.step); -} diff --git a/test/link/macho/needed_library/main.c b/test/link/macho/needed_library/main.c deleted file mode 100644 index ca68d24cc7..0000000000 --- a/test/link/macho/needed_library/main.c +++ /dev/null @@ -1,3 +0,0 @@ -int main(int argc, char* argv[]) { - return 0; -} From faa1849f81fd1478f2340ab6ccdc8d71d1c5d102 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 13 Jan 2024 19:09:40 +0100 Subject: [PATCH 038/133] test/link/macho: test for correct handling of __mh_execute_header symbol --- test/link/macho.zig | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/test/link/macho.zig b/test/link/macho.zig index f3b0b7bcee..bbcd7b31bd 100644 --- a/test/link/macho.zig +++ b/test/link/macho.zig @@ -10,6 +10,7 @@ pub fn testAll(b: *Build, build_opts: BuildOptions) *Step { macho_step.dependOn(testDeadStrip(b, .{ .target = default_target })); macho_step.dependOn(testEntryPointDylib(b, .{ .target = default_target })); + macho_step.dependOn(testMhExecuteHeader(b, .{ .target = default_target })); macho_step.dependOn(testSectionBoundarySymbols(b, .{ .target = default_target })); macho_step.dependOn(testSegmentBoundarySymbols(b, .{ .target = default_target })); @@ -161,6 +162,19 @@ fn testEntryPointDylib(b: *Build, opts: Options) *Step { return test_step; } +fn testMhExecuteHeader(b: *Build, opts: Options) *Step { + const test_step = addTestStep(b, "macho-mh-execute-header", opts); + + const exe = addExecutable(b, opts, .{ .name = "main", .c_source_bytes = "int main() { return 0; }" }); + + const check = exe.checkObject(); + check.checkInSymtab(); + check.checkContains("[referenced dynamically] external __mh_execute_header"); + test_step.dependOn(&check.step); + + return test_step; +} + fn testNeededFramework(b: *Build, opts: Options) *Step { const test_step = addTestStep(b, "macho-needed-framework", opts); From bf285c7e409bddfe7089e570ebf836d0343c8628 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 13 Jan 2024 19:12:55 +0100 Subject: [PATCH 039/133] test/link/macho: test for correct handling of large __bss sections --- test/link/macho.zig | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/test/link/macho.zig b/test/link/macho.zig index bbcd7b31bd..dd7b178f27 100644 --- a/test/link/macho.zig +++ b/test/link/macho.zig @@ -10,6 +10,7 @@ pub fn testAll(b: *Build, build_opts: BuildOptions) *Step { macho_step.dependOn(testDeadStrip(b, .{ .target = default_target })); macho_step.dependOn(testEntryPointDylib(b, .{ .target = default_target })); + macho_step.dependOn(testLargeBss(b, .{ .target = default_target })); macho_step.dependOn(testMhExecuteHeader(b, .{ .target = default_target })); macho_step.dependOn(testSectionBoundarySymbols(b, .{ .target = default_target })); macho_step.dependOn(testSegmentBoundarySymbols(b, .{ .target = default_target })); @@ -162,6 +163,27 @@ fn testEntryPointDylib(b: *Build, opts: Options) *Step { return test_step; } +fn testLargeBss(b: *Build, opts: Options) *Step { + const test_step = addTestStep(b, "macho-large-bss", opts); + + // TODO this test used use a 4GB zerofill section but this actually fails and causes every + // linker I tried misbehave in different ways. This only happened on arm64. I thought that + // maybe S_GB_ZEROFILL section is an answer to this but it doesn't seem supported by dyld + // anymore. When I get some free time I will re-investigate this. + const exe = addExecutable(b, opts, .{ .name = "main", .c_source_bytes = + \\char arr[0x1000000]; + \\int main() { + \\ return arr[2000]; + \\} + }); + + const run = addRunArtifact(exe); + run.expectExitCode(0); + test_step.dependOn(&run.step); + + return test_step; +} + fn testMhExecuteHeader(b: *Build, opts: Options) *Step { const test_step = addTestStep(b, "macho-mh-execute-header", opts); From 0cc4dc615b536eb5e927558f1391060ecf97b0ea Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 13 Jan 2024 19:15:56 +0100 Subject: [PATCH 040/133] test/link/macho: test hello world in Zig --- test/link/macho.zig | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/test/link/macho.zig b/test/link/macho.zig index dd7b178f27..e4a0cc6cb8 100644 --- a/test/link/macho.zig +++ b/test/link/macho.zig @@ -10,6 +10,7 @@ pub fn testAll(b: *Build, build_opts: BuildOptions) *Step { macho_step.dependOn(testDeadStrip(b, .{ .target = default_target })); macho_step.dependOn(testEntryPointDylib(b, .{ .target = default_target })); + macho_step.dependOn(testHelloZig(b, .{ .target = default_target })); macho_step.dependOn(testLargeBss(b, .{ .target = default_target })); macho_step.dependOn(testMhExecuteHeader(b, .{ .target = default_target })); macho_step.dependOn(testSectionBoundarySymbols(b, .{ .target = default_target })); @@ -163,6 +164,23 @@ fn testEntryPointDylib(b: *Build, opts: Options) *Step { return test_step; } +fn testHelloZig(b: *Build, opts: Options) *Step { + const test_step = addTestStep(b, "macho-hello-zig", opts); + + const exe = addExecutable(b, opts, .{ .name = "main", .zig_source_bytes = + \\const std = @import("std"); + \\pub fn main() void { + \\ std.io.getStdOut().writer().print("Hello world!\n", .{}) catch unreachable; + \\} + }); + + const run = addRunArtifact(exe); + run.expectStdOutEqual("Hello world!\n"); + test_step.dependOn(&run.step); + + return test_step; +} + fn testLargeBss(b: *Build, opts: Options) *Step { const test_step = addTestStep(b, "macho-large-bss", opts); From 5142d92c514bc190224c30439c09b75169e0799a Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 13 Jan 2024 23:32:41 +0100 Subject: [PATCH 041/133] test/link/macho: test hello world in C --- test/link/macho.zig | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/test/link/macho.zig b/test/link/macho.zig index e4a0cc6cb8..4cb97b439b 100644 --- a/test/link/macho.zig +++ b/test/link/macho.zig @@ -10,6 +10,7 @@ pub fn testAll(b: *Build, build_opts: BuildOptions) *Step { macho_step.dependOn(testDeadStrip(b, .{ .target = default_target })); macho_step.dependOn(testEntryPointDylib(b, .{ .target = default_target })); + macho_step.dependOn(testHelloC(b, .{ .target = default_target })); macho_step.dependOn(testHelloZig(b, .{ .target = default_target })); macho_step.dependOn(testLargeBss(b, .{ .target = default_target })); macho_step.dependOn(testMhExecuteHeader(b, .{ .target = default_target })); @@ -164,6 +165,30 @@ fn testEntryPointDylib(b: *Build, opts: Options) *Step { return test_step; } +fn testHelloC(b: *Build, opts: Options) *Step { + const test_step = addTestStep(b, "macho-hello-c", opts); + + const exe = addExecutable(b, opts, .{ .name = "main", .c_source_bytes = + \\#include + \\int main() { + \\ printf("Hello world!\n"); + \\ return 0; + \\} + }); + + const run = addRunArtifact(exe); + run.expectStdOutEqual("Hello world!\n"); + test_step.dependOn(&run.step); + + const check = exe.checkObject(); + check.checkInHeaders(); + check.checkExact("header"); + check.checkContains("PIE"); + test_step.dependOn(&check.step); + + return test_step; +} + fn testHelloZig(b: *Build, opts: Options) *Step { const test_step = addTestStep(b, "macho-hello-zig", opts); From 9533628ca08e8ca1a111c0cb0842d4d45706ea2d Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 13 Jan 2024 23:56:07 +0100 Subject: [PATCH 042/133] test/link/macho: test setting correct weak* flags in the header --- test/link/macho.zig | 90 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 90 insertions(+) diff --git a/test/link/macho.zig b/test/link/macho.zig index 4cb97b439b..44f76dbae3 100644 --- a/test/link/macho.zig +++ b/test/link/macho.zig @@ -10,6 +10,7 @@ pub fn testAll(b: *Build, build_opts: BuildOptions) *Step { macho_step.dependOn(testDeadStrip(b, .{ .target = default_target })); macho_step.dependOn(testEntryPointDylib(b, .{ .target = default_target })); + macho_step.dependOn(testHeaderWeakFlags(b, .{ .target = default_target })); macho_step.dependOn(testHelloC(b, .{ .target = default_target })); macho_step.dependOn(testHelloZig(b, .{ .target = default_target })); macho_step.dependOn(testLargeBss(b, .{ .target = default_target })); @@ -165,6 +166,94 @@ fn testEntryPointDylib(b: *Build, opts: Options) *Step { return test_step; } +// Adapted from https://github.com/llvm/llvm-project/blob/main/lld/test/MachO/weak-header-flags.s +fn testHeaderWeakFlags(b: *Build, opts: Options) *Step { + const test_step = addTestStep(b, "macho-header-weak-flags", opts); + + const obj1 = addObject(b, opts, .{ .name = "a", .asm_source_bytes = + \\.globl _x + \\.weak_definition _x + \\_x: + \\ ret + }); + + const lib = addSharedLibrary(b, opts, .{ .name = "a" }); + lib.addObject(obj1); + + { + const exe = addExecutable(b, opts, .{ .name = "main1", .c_source_bytes = "int main() { return 0; }" }); + exe.addObject(obj1); + + const check = exe.checkObject(); + check.checkInHeaders(); + check.checkExact("header"); + check.checkContains("WEAK_DEFINES"); + check.checkInHeaders(); + check.checkExact("header"); + check.checkContains("BINDS_TO_WEAK"); + check.checkInExports(); + check.checkExtract("[WEAK] {vmaddr} _x"); + test_step.dependOn(&check.step); + } + + { + const obj = addObject(b, opts, .{ .name = "b" }); + + switch (opts.target.result.cpu.arch) { + .aarch64 => addAsmSourceBytes(obj, + \\.globl _main + \\_main: + \\ bl _x + \\ ret + ), + .x86_64 => addAsmSourceBytes(obj, + \\.globl _main + \\_main: + \\ callq _x + \\ ret + ), + else => unreachable, + } + + const exe = addExecutable(b, opts, .{ .name = "main2" }); + exe.linkLibrary(lib); + exe.addObject(obj); + + const check = exe.checkObject(); + check.checkInHeaders(); + check.checkExact("header"); + check.checkNotPresent("WEAK_DEFINES"); + check.checkInHeaders(); + check.checkExact("header"); + check.checkContains("BINDS_TO_WEAK"); + check.checkInExports(); + check.checkNotPresent("[WEAK] {vmaddr} _x"); + test_step.dependOn(&check.step); + } + + { + const exe = addExecutable(b, opts, .{ .name = "main3", .asm_source_bytes = + \\.globl _main, _x + \\_x: + \\ + \\_main: + \\ ret + }); + exe.linkLibrary(lib); + + const check = exe.checkObject(); + check.checkInHeaders(); + check.checkExact("header"); + check.checkNotPresent("WEAK_DEFINES"); + check.checkInHeaders(); + check.checkExact("header"); + check.checkNotPresent("BINDS_TO_WEAK"); + test_step.dependOn(&check.step); + } + + return test_step; +} + fn testHelloC(b: *Build, opts: Options) *Step { const test_step = addTestStep(b, "macho-hello-c", opts); @@ -436,6 +525,7 @@ fn addTestStep(b: *Build, comptime prefix: []const u8, opts: Options) *Step { return link.addTestStep(b, "macho-" ++ prefix, opts); } +const addAsmSourceBytes = link.addAsmSourceBytes; const addCSourceBytes = link.addCSourceBytes; const addRunArtifact = link.addRunArtifact; const addObject = link.addObject; From 2c0c86944ef202788ba7489fe14df12194b720d9 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sun, 14 Jan 2024 00:17:23 +0100 Subject: [PATCH 043/133] test/link/macho: upgrade and migrate headerpad test --- test/link.zig | 4 - test/link/macho.zig | 109 ++++++++++++++++++++++ test/link/macho/headerpad/build.zig | 137 ---------------------------- test/link/macho/headerpad/main.c | 3 - 4 files changed, 109 insertions(+), 144 deletions(-) delete mode 100644 test/link/macho/headerpad/build.zig delete mode 100644 test/link/macho/headerpad/main.c diff --git a/test/link.zig b/test/link.zig index d934e54a82..917e5fc3ef 100644 --- a/test/link.zig +++ b/test/link.zig @@ -127,10 +127,6 @@ pub const cases = [_]Case{ .build_root = "test/link/macho/entry_in_archive", .import = @import("link/macho/entry_in_archive/build.zig"), }, - .{ - .build_root = "test/link/macho/headerpad", - .import = @import("link/macho/headerpad/build.zig"), - }, .{ .build_root = "test/link/macho/linksection", .import = @import("link/macho/linksection/build.zig"), diff --git a/test/link/macho.zig b/test/link/macho.zig index 44f76dbae3..9a5e090956 100644 --- a/test/link/macho.zig +++ b/test/link/macho.zig @@ -24,6 +24,7 @@ pub fn testAll(b: *Build, build_opts: BuildOptions) *Step { // Tests requiring presence of macOS SDK in system path if (build_opts.has_macos_sdk) { + macho_step.dependOn(testHeaderpad(b, .{ .target = b.host })); macho_step.dependOn(testNeededFramework(b, .{ .target = b.host })); } } @@ -166,6 +167,113 @@ fn testEntryPointDylib(b: *Build, opts: Options) *Step { return test_step; } +fn testHeaderpad(b: *Build, opts: Options) *Step { + const test_step = addTestStep(b, "macho-headerpad", opts); + + const addExe = struct { + fn addExe(bb: *Build, o: Options, name: []const u8) *Compile { + const exe = addExecutable(bb, o, .{ + .name = name, + .c_source_bytes = "int main() { return 0; }", + }); + exe.linkFramework("CoreFoundation"); + exe.linkFramework("Foundation"); + exe.linkFramework("Cocoa"); + exe.linkFramework("CoreGraphics"); + exe.linkFramework("CoreHaptics"); + exe.linkFramework("CoreAudio"); + exe.linkFramework("AVFoundation"); + exe.linkFramework("CoreImage"); + exe.linkFramework("CoreLocation"); + exe.linkFramework("CoreML"); + exe.linkFramework("CoreVideo"); + exe.linkFramework("CoreText"); + exe.linkFramework("CryptoKit"); + exe.linkFramework("GameKit"); + exe.linkFramework("SwiftUI"); + exe.linkFramework("StoreKit"); + exe.linkFramework("SpriteKit"); + return exe; + } + }.addExe; + + { + const exe = addExe(b, opts, "main1"); + exe.headerpad_max_install_names = true; + + const check = exe.checkObject(); + check.checkInHeaders(); + check.checkExact("sectname __text"); + check.checkExtract("offset {offset}"); + switch (opts.target.result.cpu.arch) { + .aarch64 => check.checkComputeCompare("offset", .{ .op = .gte, .value = .{ .literal = 0x4000 } }), + .x86_64 => check.checkComputeCompare("offset", .{ .op = .gte, .value = .{ .literal = 0x1000 } }), + else => unreachable, + } + test_step.dependOn(&check.step); + + const run = addRunArtifact(exe); + run.expectExitCode(0); + test_step.dependOn(&run.step); + } + + { + const exe = addExe(b, opts, "main2"); + exe.headerpad_size = 0x10000; + + const check = exe.checkObject(); + check.checkInHeaders(); + check.checkExact("sectname __text"); + check.checkExtract("offset {offset}"); + check.checkComputeCompare("offset", .{ .op = .gte, .value = .{ .literal = 0x10000 } }); + test_step.dependOn(&check.step); + + const run = addRunArtifact(exe); + run.expectExitCode(0); + test_step.dependOn(&run.step); + } + + { + const exe = addExe(b, opts, "main3"); + exe.headerpad_max_install_names = true; + exe.headerpad_size = 0x10000; + + const check = exe.checkObject(); + check.checkInHeaders(); + check.checkExact("sectname __text"); + check.checkExtract("offset {offset}"); + check.checkComputeCompare("offset", .{ .op = .gte, .value = .{ .literal = 0x10000 } }); + test_step.dependOn(&check.step); + + const run = addRunArtifact(exe); + run.expectExitCode(0); + test_step.dependOn(&run.step); + } + + { + const exe = addExe(b, opts, "main4"); + exe.headerpad_max_install_names = true; + exe.headerpad_size = 0x1000; + + const check = exe.checkObject(); + check.checkInHeaders(); + check.checkExact("sectname __text"); + check.checkExtract("offset {offset}"); + switch (opts.target.result.cpu.arch) { + .aarch64 => check.checkComputeCompare("offset", .{ .op = .gte, .value = .{ .literal = 0x4000 } }), + .x86_64 => check.checkComputeCompare("offset", .{ .op = .gte, .value = .{ .literal = 0x1000 } }), + else => unreachable, + } + test_step.dependOn(&check.step); + + const run = addRunArtifact(exe); + run.expectExitCode(0); + test_step.dependOn(&run.step); + } + + return test_step; +} + // Adapted from https://github.com/llvm/llvm-project/blob/main/lld/test/MachO/weak-header-flags.s fn testHeaderWeakFlags(b: *Build, opts: Options) *Step { const test_step = addTestStep(b, "macho-header-weak-flags", opts); @@ -537,5 +645,6 @@ const std = @import("std"); const Build = std.Build; const BuildOptions = link.BuildOptions; +const Compile = Step.Compile; const Options = link.Options; const Step = Build.Step; diff --git a/test/link/macho/headerpad/build.zig b/test/link/macho/headerpad/build.zig deleted file mode 100644 index b982224e85..0000000000 --- a/test/link/macho/headerpad/build.zig +++ /dev/null @@ -1,137 +0,0 @@ -const std = @import("std"); -const builtin = @import("builtin"); - -pub const requires_symlinks = true; -pub const requires_macos_sdk = true; - -pub fn build(b: *std.Build) void { - const test_step = b.step("test", "Test it"); - b.default_step = test_step; - - add(b, test_step, .Debug); - add(b, test_step, .ReleaseFast); - add(b, test_step, .ReleaseSmall); - add(b, test_step, .ReleaseSafe); -} - -fn add(b: *std.Build, test_step: *std.Build.Step, optimize: std.builtin.OptimizeMode) void { - { - // Test -headerpad_max_install_names - const exe = simpleExe(b, optimize, "headerpad_max_install_names"); - exe.headerpad_max_install_names = true; - - const check = exe.checkObject(); - check.checkInHeaders(); - check.checkExact("sectname __text"); - check.checkExtract("offset {offset}"); - - switch (builtin.cpu.arch) { - .aarch64 => { - check.checkComputeCompare("offset", .{ .op = .gte, .value = .{ .literal = 0x4000 } }); - }, - .x86_64 => { - check.checkComputeCompare("offset", .{ .op = .gte, .value = .{ .literal = 0x1000 } }); - }, - else => unreachable, - } - - test_step.dependOn(&check.step); - - const run = b.addRunArtifact(exe); - test_step.dependOn(&run.step); - } - - { - // Test -headerpad - const exe = simpleExe(b, optimize, "headerpad"); - exe.headerpad_size = 0x10000; - - const check = exe.checkObject(); - check.checkInHeaders(); - check.checkExact("sectname __text"); - check.checkExtract("offset {offset}"); - check.checkComputeCompare("offset", .{ .op = .gte, .value = .{ .literal = 0x10000 } }); - - test_step.dependOn(&check.step); - - const run = b.addRunArtifact(exe); - test_step.dependOn(&run.step); - } - - { - // Test both flags with -headerpad overriding -headerpad_max_install_names - const exe = simpleExe(b, optimize, "headerpad_overriding"); - exe.headerpad_max_install_names = true; - exe.headerpad_size = 0x10000; - - const check = exe.checkObject(); - check.checkInHeaders(); - check.checkExact("sectname __text"); - check.checkExtract("offset {offset}"); - check.checkComputeCompare("offset", .{ .op = .gte, .value = .{ .literal = 0x10000 } }); - - test_step.dependOn(&check.step); - - const run = b.addRunArtifact(exe); - test_step.dependOn(&run.step); - } - - { - // Test both flags with -headerpad_max_install_names overriding -headerpad - const exe = simpleExe(b, optimize, "headerpad_max_install_names_overriding"); - exe.headerpad_size = 0x1000; - exe.headerpad_max_install_names = true; - - const check = exe.checkObject(); - check.checkInHeaders(); - check.checkExact("sectname __text"); - check.checkExtract("offset {offset}"); - - switch (builtin.cpu.arch) { - .aarch64 => { - check.checkComputeCompare("offset", .{ .op = .gte, .value = .{ .literal = 0x4000 } }); - }, - .x86_64 => { - check.checkComputeCompare("offset", .{ .op = .gte, .value = .{ .literal = 0x1000 } }); - }, - else => unreachable, - } - - test_step.dependOn(&check.step); - - const run = b.addRunArtifact(exe); - test_step.dependOn(&run.step); - } -} - -fn simpleExe( - b: *std.Build, - optimize: std.builtin.OptimizeMode, - name: []const u8, -) *std.Build.Step.Compile { - const exe = b.addExecutable(.{ - .name = name, - .optimize = optimize, - .target = b.host, - }); - exe.addCSourceFile(.{ .file = .{ .path = "main.c" }, .flags = &.{} }); - exe.linkLibC(); - exe.linkFramework("CoreFoundation"); - exe.linkFramework("Foundation"); - exe.linkFramework("Cocoa"); - exe.linkFramework("CoreGraphics"); - exe.linkFramework("CoreHaptics"); - exe.linkFramework("CoreAudio"); - exe.linkFramework("AVFoundation"); - exe.linkFramework("CoreImage"); - exe.linkFramework("CoreLocation"); - exe.linkFramework("CoreML"); - exe.linkFramework("CoreVideo"); - exe.linkFramework("CoreText"); - exe.linkFramework("CryptoKit"); - exe.linkFramework("GameKit"); - exe.linkFramework("SwiftUI"); - exe.linkFramework("StoreKit"); - exe.linkFramework("SpriteKit"); - return exe; -} diff --git a/test/link/macho/headerpad/main.c b/test/link/macho/headerpad/main.c deleted file mode 100644 index ca68d24cc7..0000000000 --- a/test/link/macho/headerpad/main.c +++ /dev/null @@ -1,3 +0,0 @@ -int main(int argc, char* argv[]) { - return 0; -} From 1e0eb3c8097a3bd45cd5e6c7b67f6f7d4f974c6f Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sun, 14 Jan 2024 00:23:11 +0100 Subject: [PATCH 044/133] test/link/macho: test weak binding --- test/link/macho.zig | 137 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 137 insertions(+) diff --git a/test/link/macho.zig b/test/link/macho.zig index 9a5e090956..c03e87694d 100644 --- a/test/link/macho.zig +++ b/test/link/macho.zig @@ -7,6 +7,10 @@ pub fn testAll(b: *Build, build_opts: BuildOptions) *Step { const default_target = b.resolveTargetQuery(.{ .os_tag = .macos, }); + const x86_64_target = b.resolveTargetQuery(.{ + .cpu_arch = .x86_64, + .os_tag = .macos, + }); macho_step.dependOn(testDeadStrip(b, .{ .target = default_target })); macho_step.dependOn(testEntryPointDylib(b, .{ .target = default_target })); @@ -17,6 +21,7 @@ pub fn testAll(b: *Build, build_opts: BuildOptions) *Step { macho_step.dependOn(testMhExecuteHeader(b, .{ .target = default_target })); macho_step.dependOn(testSectionBoundarySymbols(b, .{ .target = default_target })); macho_step.dependOn(testSegmentBoundarySymbols(b, .{ .target = default_target })); + macho_step.dependOn(testWeakBind(b, .{ .target = x86_64_target })); // Tests requiring symlinks when tested on Windows if (build_opts.has_symlinks_windows) { @@ -629,6 +634,138 @@ fn testSegmentBoundarySymbols(b: *Build, opts: Options) *Step { return test_step; } +// Adapted from https://github.com/llvm/llvm-project/blob/main/lld/test/MachO/weak-binding.s +fn testWeakBind(b: *Build, opts: Options) *Step { + const test_step = addTestStep(b, "macho-weak-bind", opts); + + const lib = addSharedLibrary(b, opts, .{ .name = "foo", .asm_source_bytes = + \\.globl _weak_dysym + \\.weak_definition _weak_dysym + \\_weak_dysym: + \\ .quad 0x1234 + \\ + \\.globl _weak_dysym_for_gotpcrel + \\.weak_definition _weak_dysym_for_gotpcrel + \\_weak_dysym_for_gotpcrel: + \\ .quad 0x1234 + \\ + \\.globl _weak_dysym_fn + \\.weak_definition _weak_dysym_fn + \\_weak_dysym_fn: + \\ ret + \\ + \\.section __DATA,__thread_vars,thread_local_variables + \\ + \\.globl _weak_dysym_tlv + \\.weak_definition _weak_dysym_tlv + \\_weak_dysym_tlv: + \\ .quad 0x1234 + }); + + { + const check = lib.checkObject(); + check.checkInExports(); + check.checkExtract("[WEAK] {vmaddr1} _weak_dysym"); + check.checkExtract("[WEAK] {vmaddr2} _weak_dysym_for_gotpcrel"); + check.checkExtract("[WEAK] {vmaddr3} _weak_dysym_fn"); + check.checkExtract("[THREAD_LOCAL, WEAK] {vmaddr4} _weak_dysym_tlv"); + test_step.dependOn(&check.step); + } + + const exe = addExecutable(b, opts, .{ .name = "main", .asm_source_bytes = + \\.globl _main, _weak_external, _weak_external_for_gotpcrel, _weak_external_fn + \\.weak_definition _weak_external, _weak_external_for_gotpcrel, _weak_external_fn, _weak_internal, _weak_internal_for_gotpcrel, _weak_internal_fn + \\ + \\_main: + \\ mov _weak_dysym_for_gotpcrel@GOTPCREL(%rip), %rax + \\ mov _weak_external_for_gotpcrel@GOTPCREL(%rip), %rax + \\ mov _weak_internal_for_gotpcrel@GOTPCREL(%rip), %rax + \\ mov _weak_tlv@TLVP(%rip), %rax + \\ mov _weak_dysym_tlv@TLVP(%rip), %rax + \\ mov _weak_internal_tlv@TLVP(%rip), %rax + \\ callq _weak_dysym_fn + \\ callq _weak_external_fn + \\ callq _weak_internal_fn + \\ mov $0, %rax + \\ ret + \\ + \\_weak_external: + \\ .quad 0x1234 + \\ + \\_weak_external_for_gotpcrel: + \\ .quad 0x1234 + \\ + \\_weak_external_fn: + \\ ret + \\ + \\_weak_internal: + \\ .quad 0x1234 + \\ + \\_weak_internal_for_gotpcrel: + \\ .quad 0x1234 + \\ + \\_weak_internal_fn: + \\ ret + \\ + \\.data + \\ .quad _weak_dysym + \\ .quad _weak_external + 2 + \\ .quad _weak_internal + \\ + \\.tbss _weak_tlv$tlv$init, 4, 2 + \\.tbss _weak_internal_tlv$tlv$init, 4, 2 + \\ + \\.section __DATA,__thread_vars,thread_local_variables + \\.globl _weak_tlv + \\.weak_definition _weak_tlv, _weak_internal_tlv + \\ + \\_weak_tlv: + \\ .quad __tlv_bootstrap + \\ .quad 0 + \\ .quad _weak_tlv$tlv$init + \\ + \\_weak_internal_tlv: + \\ .quad __tlv_bootstrap + \\ .quad 0 + \\ .quad _weak_internal_tlv$tlv$init + }); + exe.linkLibrary(lib); + + { + const check = exe.checkObject(); + + check.checkInExports(); + check.checkExtract("[WEAK] {vmaddr1} _weak_external"); + check.checkExtract("[WEAK] {vmaddr2} _weak_external_for_gotpcrel"); + check.checkExtract("[WEAK] {vmaddr3} _weak_external_fn"); + check.checkExtract("[THREAD_LOCAL, WEAK] {vmaddr4} _weak_tlv"); + + check.checkInDyldBind(); + check.checkContains("(libfoo.dylib) _weak_dysym_for_gotpcrel"); + check.checkContains("(libfoo.dylib) _weak_dysym_fn"); + check.checkContains("(libfoo.dylib) _weak_dysym"); + check.checkContains("(libfoo.dylib) _weak_dysym_tlv"); + + check.checkInDyldWeakBind(); + check.checkContains("_weak_external_for_gotpcrel"); + check.checkContains("_weak_dysym_for_gotpcrel"); + check.checkContains("_weak_external_fn"); + check.checkContains("_weak_dysym_fn"); + check.checkContains("_weak_dysym"); + check.checkContains("_weak_external"); + check.checkContains("_weak_tlv"); + check.checkContains("_weak_dysym_tlv"); + + test_step.dependOn(&check.step); + } + + const run = addRunArtifact(exe); + run.expectExitCode(0); + test_step.dependOn(&run.step); + + return test_step; +} + fn addTestStep(b: *Build, comptime prefix: []const u8, opts: Options) *Step { return link.addTestStep(b, "macho-" ++ prefix, opts); } From e96f8b817a889abecc86a71961ba9a95c13c315e Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sun, 14 Jan 2024 10:20:22 +0100 Subject: [PATCH 045/133] test/link/macho: upgrade weak library test --- test/link.zig | 4 -- test/link/macho.zig | 44 +++++++++++++++++++++ test/link/macho/weak_library/a.c | 9 ----- test/link/macho/weak_library/build.zig | 55 -------------------------- test/link/macho/weak_library/main.c | 9 ----- 5 files changed, 44 insertions(+), 77 deletions(-) delete mode 100644 test/link/macho/weak_library/a.c delete mode 100644 test/link/macho/weak_library/build.zig delete mode 100644 test/link/macho/weak_library/main.c diff --git a/test/link.zig b/test/link.zig index 917e5fc3ef..fd5fc0fd4c 100644 --- a/test/link.zig +++ b/test/link.zig @@ -171,10 +171,6 @@ pub const cases = [_]Case{ .build_root = "test/link/macho/unwind_info", .import = @import("link/macho/unwind_info/build.zig"), }, - .{ - .build_root = "test/link/macho/weak_library", - .import = @import("link/macho/weak_library/build.zig"), - }, .{ .build_root = "test/link/macho/weak_framework", .import = @import("link/macho/weak_framework/build.zig"), diff --git a/test/link/macho.zig b/test/link/macho.zig index c03e87694d..5b4d0a3a28 100644 --- a/test/link/macho.zig +++ b/test/link/macho.zig @@ -26,6 +26,7 @@ pub fn testAll(b: *Build, build_opts: BuildOptions) *Step { // Tests requiring symlinks when tested on Windows if (build_opts.has_symlinks_windows) { macho_step.dependOn(testNeededLibrary(b, .{ .target = default_target })); + macho_step.dependOn(testWeakLibrary(b, .{ .target = default_target })); // Tests requiring presence of macOS SDK in system path if (build_opts.has_macos_sdk) { @@ -766,6 +767,49 @@ fn testWeakBind(b: *Build, opts: Options) *Step { return test_step; } +fn testWeakLibrary(b: *Build, opts: Options) *Step { + const test_step = addTestStep(b, "macho-weak-library", opts); + + const dylib = addSharedLibrary(b, opts, .{ .name = "a", .c_source_bytes = + \\#include + \\int a = 42; + \\const char* asStr() { + \\ static char str[3]; + \\ sprintf(str, "%d", 42); + \\ return str; + \\} + }); + + const exe = addExecutable(b, opts, .{ .name = "main", .c_source_bytes = + \\#include + \\extern int a; + \\extern const char* asStr(); + \\int main() { + \\ printf("%d %s", a, asStr()); + \\ return 0; + \\} + }); + exe.root_module.linkSystemLibrary("a", .{ .weak = true }); + exe.addLibraryPath(dylib.getEmittedBinDirectory()); + exe.addRPath(dylib.getEmittedBinDirectory()); + + const check = exe.checkObject(); + check.checkInHeaders(); + check.checkExact("cmd LOAD_WEAK_DYLIB"); + check.checkContains("liba.dylib"); + check.checkInSymtab(); + check.checkExact("(undefined) weakref external _a (from liba)"); + check.checkInSymtab(); + check.checkExact("(undefined) weakref external _asStr (from liba)"); + test_step.dependOn(&check.step); + + const run = addRunArtifact(exe); + run.expectStdOutEqual("42 42"); + test_step.dependOn(&run.step); + + return test_step; +} + fn addTestStep(b: *Build, comptime prefix: []const u8, opts: Options) *Step { return link.addTestStep(b, "macho-" ++ prefix, opts); } diff --git a/test/link/macho/weak_library/a.c b/test/link/macho/weak_library/a.c deleted file mode 100644 index 9f49802ce6..0000000000 --- a/test/link/macho/weak_library/a.c +++ /dev/null @@ -1,9 +0,0 @@ -#include - -int a = 42; - -const char* asStr() { - static char str[3]; - sprintf(str, "%d", 42); - return str; -} diff --git a/test/link/macho/weak_library/build.zig b/test/link/macho/weak_library/build.zig deleted file mode 100644 index 0d73b9f4ea..0000000000 --- a/test/link/macho/weak_library/build.zig +++ /dev/null @@ -1,55 +0,0 @@ -const std = @import("std"); - -pub const requires_symlinks = true; - -pub fn build(b: *std.Build) void { - const test_step = b.step("test", "Test it"); - b.default_step = test_step; - - add(b, test_step, .Debug); - add(b, test_step, .ReleaseFast); - add(b, test_step, .ReleaseSmall); - add(b, test_step, .ReleaseSafe); -} - -fn add(b: *std.Build, test_step: *std.Build.Step, optimize: std.builtin.OptimizeMode) void { - const target = b.resolveTargetQuery(.{ .os_tag = .macos }); - - const dylib = b.addSharedLibrary(.{ - .name = "a", - .version = .{ .major = 1, .minor = 0, .patch = 0 }, - .target = target, - .optimize = optimize, - }); - dylib.addCSourceFile(.{ .file = .{ .path = "a.c" }, .flags = &.{} }); - dylib.linkLibC(); - b.installArtifact(dylib); - - const exe = b.addExecutable(.{ - .name = "test", - .target = target, - .optimize = optimize, - }); - exe.addCSourceFile(.{ .file = .{ .path = "main.c" }, .flags = &[0][]const u8{} }); - exe.linkLibC(); - exe.root_module.linkSystemLibrary("a", .{ .weak = true }); - exe.addLibraryPath(dylib.getEmittedBinDirectory()); - exe.addRPath(dylib.getEmittedBinDirectory()); - - const check = exe.checkObject(); - check.checkInHeaders(); - check.checkExact("cmd LOAD_WEAK_DYLIB"); - check.checkExact("name @rpath/liba.dylib"); - - check.checkInSymtab(); - check.checkExact("(undefined) weakref external _a (from liba)"); - - check.checkInSymtab(); - check.checkExact("(undefined) weakref external _asStr (from liba)"); - test_step.dependOn(&check.step); - - const run = b.addRunArtifact(exe); - run.skip_foreign_checks = true; - run.expectStdOutEqual("42 42"); - test_step.dependOn(&run.step); -} diff --git a/test/link/macho/weak_library/main.c b/test/link/macho/weak_library/main.c deleted file mode 100644 index ee5367fef7..0000000000 --- a/test/link/macho/weak_library/main.c +++ /dev/null @@ -1,9 +0,0 @@ -#include - -extern int a; -extern const char* asStr(); - -int main(int argc, char* argv[]) { - printf("%d %s", a, asStr()); - return 0; -} From 105655857f02c8bd59f5c0250f124a064df400cf Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sun, 14 Jan 2024 10:23:11 +0100 Subject: [PATCH 046/133] test/link/macho: upgrade weak framework test --- test/link.zig | 4 --- test/link/macho.zig | 20 ++++++++++++++ test/link/macho/weak_framework/build.zig | 34 ------------------------ test/link/macho/weak_framework/main.c | 3 --- 4 files changed, 20 insertions(+), 41 deletions(-) delete mode 100644 test/link/macho/weak_framework/build.zig delete mode 100644 test/link/macho/weak_framework/main.c diff --git a/test/link.zig b/test/link.zig index fd5fc0fd4c..909d1d406e 100644 --- a/test/link.zig +++ b/test/link.zig @@ -171,8 +171,4 @@ pub const cases = [_]Case{ .build_root = "test/link/macho/unwind_info", .import = @import("link/macho/unwind_info/build.zig"), }, - .{ - .build_root = "test/link/macho/weak_framework", - .import = @import("link/macho/weak_framework/build.zig"), - }, }; diff --git a/test/link/macho.zig b/test/link/macho.zig index 5b4d0a3a28..bebf4d6d91 100644 --- a/test/link/macho.zig +++ b/test/link/macho.zig @@ -32,6 +32,7 @@ pub fn testAll(b: *Build, build_opts: BuildOptions) *Step { if (build_opts.has_macos_sdk) { macho_step.dependOn(testHeaderpad(b, .{ .target = b.host })); macho_step.dependOn(testNeededFramework(b, .{ .target = b.host })); + macho_step.dependOn(testWeakFramework(b, .{ .target = b.host })); } } @@ -767,6 +768,25 @@ fn testWeakBind(b: *Build, opts: Options) *Step { return test_step; } +fn testWeakFramework(b: *Build, opts: Options) *Step { + const test_step = addTestStep(b, "macho-weak-framework", opts); + + const exe = addExecutable(b, opts, .{ .name = "main", .c_source_bytes = "int main() { return 0; }" }); + exe.root_module.linkFramework("Cocoa", .{ .weak = true }); + + const run = addRunArtifact(exe); + run.expectExitCode(0); + test_step.dependOn(&run.step); + + const check = exe.checkObject(); + check.checkInHeaders(); + check.checkExact("cmd LOAD_WEAK_DYLIB"); + check.checkContains("Cocoa"); + test_step.dependOn(&check.step); + + return test_step; +} + fn testWeakLibrary(b: *Build, opts: Options) *Step { const test_step = addTestStep(b, "macho-weak-library", opts); diff --git a/test/link/macho/weak_framework/build.zig b/test/link/macho/weak_framework/build.zig deleted file mode 100644 index 2a430443d4..0000000000 --- a/test/link/macho/weak_framework/build.zig +++ /dev/null @@ -1,34 +0,0 @@ -const std = @import("std"); - -pub const requires_symlinks = true; -pub const requires_macos_sdk = true; - -pub fn build(b: *std.Build) void { - const test_step = b.step("test", "Test it"); - b.default_step = test_step; - - add(b, test_step, .Debug); - add(b, test_step, .ReleaseFast); - add(b, test_step, .ReleaseSmall); - add(b, test_step, .ReleaseSafe); -} - -fn add(b: *std.Build, test_step: *std.Build.Step, optimize: std.builtin.OptimizeMode) void { - const exe = b.addExecutable(.{ - .name = "test", - .optimize = optimize, - .target = b.host, - }); - exe.addCSourceFile(.{ .file = .{ .path = "main.c" }, .flags = &[0][]const u8{} }); - exe.linkLibC(); - exe.linkFrameworkWeak("Cocoa"); - - const check = exe.checkObject(); - check.checkInHeaders(); - check.checkExact("cmd LOAD_WEAK_DYLIB"); - check.checkContains("Cocoa"); - test_step.dependOn(&check.step); - - const run_cmd = b.addRunArtifact(exe); - test_step.dependOn(&run_cmd.step); -} diff --git a/test/link/macho/weak_framework/main.c b/test/link/macho/weak_framework/main.c deleted file mode 100644 index ca68d24cc7..0000000000 --- a/test/link/macho/weak_framework/main.c +++ /dev/null @@ -1,3 +0,0 @@ -int main(int argc, char* argv[]) { - return 0; -} From 9fce2e2233733e1ada2b1440012ae78675e196f9 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sun, 14 Jan 2024 10:35:45 +0100 Subject: [PATCH 047/133] test/link/macho: test -u flag handling (forceUndefinedSymbol) --- test/link/macho.zig | 79 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) diff --git a/test/link/macho.zig b/test/link/macho.zig index bebf4d6d91..4387a614c5 100644 --- a/test/link/macho.zig +++ b/test/link/macho.zig @@ -21,6 +21,7 @@ pub fn testAll(b: *Build, build_opts: BuildOptions) *Step { macho_step.dependOn(testMhExecuteHeader(b, .{ .target = default_target })); macho_step.dependOn(testSectionBoundarySymbols(b, .{ .target = default_target })); macho_step.dependOn(testSegmentBoundarySymbols(b, .{ .target = default_target })); + macho_step.dependOn(testUndefinedFlag(b, .{ .target = default_target })); macho_step.dependOn(testWeakBind(b, .{ .target = x86_64_target })); // Tests requiring symlinks when tested on Windows @@ -636,6 +637,83 @@ fn testSegmentBoundarySymbols(b: *Build, opts: Options) *Step { return test_step; } +fn testUndefinedFlag(b: *Build, opts: Options) *Step { + const test_step = addTestStep(b, "macho-undefined-flag", opts); + + const obj = addObject(b, opts, .{ .name = "a", .c_source_bytes = "int foo = 42;" }); + + const lib = addStaticLibrary(b, opts, .{ .name = "a" }); + lib.addObject(obj); + + const main_o = addObject(b, opts, .{ .name = "main", .c_source_bytes = "int main() { return 0; }" }); + + { + const exe = addExecutable(b, opts, .{ .name = "main1" }); + exe.addObject(main_o); + exe.linkLibrary(lib); + exe.forceUndefinedSymbol("_foo"); + + const run = addRunArtifact(exe); + run.expectExitCode(0); + test_step.dependOn(&run.step); + + const check = exe.checkObject(); + check.checkInSymtab(); + check.checkContains("_foo"); + test_step.dependOn(&check.step); + } + + { + const exe = addExecutable(b, opts, .{ .name = "main2" }); + exe.addObject(main_o); + exe.linkLibrary(lib); + exe.forceUndefinedSymbol("_foo"); + exe.link_gc_sections = true; + + const run = addRunArtifact(exe); + run.expectExitCode(0); + test_step.dependOn(&run.step); + + const check = exe.checkObject(); + check.checkInSymtab(); + check.checkContains("_foo"); + test_step.dependOn(&check.step); + } + + { + const exe = addExecutable(b, opts, .{ .name = "main3" }); + exe.addObject(main_o); + exe.addObject(obj); + + const run = addRunArtifact(exe); + run.expectExitCode(0); + test_step.dependOn(&run.step); + + const check = exe.checkObject(); + check.checkInSymtab(); + check.checkContains("_foo"); + test_step.dependOn(&check.step); + } + + { + const exe = addExecutable(b, opts, .{ .name = "main4" }); + exe.addObject(main_o); + exe.addObject(obj); + exe.link_gc_sections = true; + + const run = addRunArtifact(exe); + run.expectExitCode(0); + test_step.dependOn(&run.step); + + const check = exe.checkObject(); + check.checkInSymtab(); + check.checkNotPresent("_foo"); + test_step.dependOn(&check.step); + } + + return test_step; +} + // Adapted from https://github.com/llvm/llvm-project/blob/main/lld/test/MachO/weak-binding.s fn testWeakBind(b: *Build, opts: Options) *Step { const test_step = addTestStep(b, "macho-weak-bind", opts); @@ -839,6 +917,7 @@ const addCSourceBytes = link.addCSourceBytes; const addRunArtifact = link.addRunArtifact; const addObject = link.addObject; const addExecutable = link.addExecutable; +const addStaticLibrary = link.addStaticLibrary; const addSharedLibrary = link.addSharedLibrary; const expectLinkErrors = link.expectLinkErrors; const link = @import("link.zig"); From fa649cad4e1c5a291dfcd7a5f766e8992be28f72 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sun, 14 Jan 2024 10:38:06 +0100 Subject: [PATCH 048/133] test/link/macho: test large .tbss section --- test/link/macho.zig | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/test/link/macho.zig b/test/link/macho.zig index 4387a614c5..58ef007565 100644 --- a/test/link/macho.zig +++ b/test/link/macho.zig @@ -21,6 +21,7 @@ pub fn testAll(b: *Build, build_opts: BuildOptions) *Step { macho_step.dependOn(testMhExecuteHeader(b, .{ .target = default_target })); macho_step.dependOn(testSectionBoundarySymbols(b, .{ .target = default_target })); macho_step.dependOn(testSegmentBoundarySymbols(b, .{ .target = default_target })); + macho_step.dependOn(testTlsLargeTbss(b, .{ .target = default_target })); macho_step.dependOn(testUndefinedFlag(b, .{ .target = default_target })); macho_step.dependOn(testWeakBind(b, .{ .target = x86_64_target })); @@ -637,6 +638,27 @@ fn testSegmentBoundarySymbols(b: *Build, opts: Options) *Step { return test_step; } +fn testTlsLargeTbss(b: *Build, opts: Options) *Step { + const test_step = addTestStep(b, "macho-tls-large-tbss", opts); + + const exe = addExecutable(b, opts, .{ .name = "main", .c_source_bytes = + \\#include + \\_Thread_local int x[0x8000]; + \\_Thread_local int y[0x8000]; + \\int main() { + \\ x[0] = 3; + \\ x[0x7fff] = 5; + \\ printf("%d %d %d %d %d %d\n", x[0], x[1], x[0x7fff], y[0], y[1], y[0x7fff]); + \\} + }); + + const run = addRunArtifact(exe); + run.expectStdOutEqual("3 0 5 0 0 0\n"); + test_step.dependOn(&run.step); + + return test_step; +} + fn testUndefinedFlag(b: *Build, opts: Options) *Step { const test_step = addTestStep(b, "macho-undefined-flag", opts); From 5790e89b5aa5c8a939ea92e76a0a88655c4f55d4 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sun, 14 Jan 2024 10:53:09 +0100 Subject: [PATCH 049/133] test/link/macho: test twolevel namespacing --- test/link/macho.zig | 121 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 121 insertions(+) diff --git a/test/link/macho.zig b/test/link/macho.zig index 58ef007565..ff02bee33d 100644 --- a/test/link/macho.zig +++ b/test/link/macho.zig @@ -29,6 +29,7 @@ pub fn testAll(b: *Build, build_opts: BuildOptions) *Step { if (build_opts.has_symlinks_windows) { macho_step.dependOn(testNeededLibrary(b, .{ .target = default_target })); macho_step.dependOn(testWeakLibrary(b, .{ .target = default_target })); + macho_step.dependOn(testTwoLevelNamespace(b, .{ .target = default_target })); // Tests requiring presence of macOS SDK in system path if (build_opts.has_macos_sdk) { @@ -659,6 +660,126 @@ fn testTlsLargeTbss(b: *Build, opts: Options) *Step { return test_step; } +fn testTwoLevelNamespace(b: *Build, opts: Options) *Step { + const test_step = addTestStep(b, "macho-two-level-namespace", opts); + + const liba = addSharedLibrary(b, opts, .{ .name = "a", .c_source_bytes = + \\#include + \\int foo = 1; + \\int* ptr_to_foo = &foo; + \\int getFoo() { + \\ return foo; + \\} + \\void printInA() { + \\ printf("liba: getFoo()=%d, ptr_to_foo=%d\n", getFoo(), *ptr_to_foo); + \\} + }); + + { + const check = liba.checkObject(); + check.checkInDyldLazyBind(); + check.checkNotPresent("(flat lookup) _getFoo"); + check.checkInIndirectSymtab(); + check.checkNotPresent("_getFoo"); + test_step.dependOn(&check.step); + } + + const libb = addSharedLibrary(b, opts, .{ .name = "b", .c_source_bytes = + \\#include + \\int foo = 2; + \\int* ptr_to_foo = &foo; + \\int getFoo() { + \\ return foo; + \\} + \\void printInB() { + \\ printf("libb: getFoo()=%d, ptr_to_foo=%d\n", getFoo(), *ptr_to_foo); + \\} + }); + + { + const check = libb.checkObject(); + check.checkInDyldLazyBind(); + check.checkNotPresent("(flat lookup) _getFoo"); + check.checkInIndirectSymtab(); + check.checkNotPresent("_getFoo"); + test_step.dependOn(&check.step); + } + + const main_o = addObject(b, opts, .{ .name = "main", .c_source_bytes = + \\#include + \\int getFoo(); + \\extern int* ptr_to_foo; + \\void printInA(); + \\void printInB(); + \\int main() { + \\ printf("main: getFoo()=%d, ptr_to_foo=%d\n", getFoo(), *ptr_to_foo); + \\ printInA(); + \\ printInB(); + \\ return 0; + \\} + }); + + { + const exe = addExecutable(b, opts, .{ .name = "main1" }); + exe.addObject(main_o); + exe.root_module.linkSystemLibrary("a", .{}); + exe.root_module.linkSystemLibrary("b", .{}); + exe.addLibraryPath(liba.getEmittedBinDirectory()); + exe.addLibraryPath(libb.getEmittedBinDirectory()); + exe.addRPath(liba.getEmittedBinDirectory()); + exe.addRPath(libb.getEmittedBinDirectory()); + + const check = exe.checkObject(); + check.checkInSymtab(); + check.checkExact("(undefined) external _getFoo (from liba)"); + check.checkInSymtab(); + check.checkExact("(undefined) external _printInA (from liba)"); + check.checkInSymtab(); + check.checkExact("(undefined) external _printInB (from libb)"); + test_step.dependOn(&check.step); + + const run = addRunArtifact(exe); + run.expectStdOutEqual( + \\main: getFoo()=1, ptr_to_foo=1 + \\liba: getFoo()=1, ptr_to_foo=1 + \\libb: getFoo()=2, ptr_to_foo=2 + \\ + ); + test_step.dependOn(&run.step); + } + + { + const exe = addExecutable(b, opts, .{ .name = "main2" }); + exe.addObject(main_o); + exe.root_module.linkSystemLibrary("b", .{}); + exe.root_module.linkSystemLibrary("a", .{}); + exe.addLibraryPath(liba.getEmittedBinDirectory()); + exe.addLibraryPath(libb.getEmittedBinDirectory()); + exe.addRPath(liba.getEmittedBinDirectory()); + exe.addRPath(libb.getEmittedBinDirectory()); + + const check = exe.checkObject(); + check.checkInSymtab(); + check.checkExact("(undefined) external _getFoo (from libb)"); + check.checkInSymtab(); + check.checkExact("(undefined) external _printInA (from liba)"); + check.checkInSymtab(); + check.checkExact("(undefined) external _printInB (from libb)"); + test_step.dependOn(&check.step); + + const run = addRunArtifact(exe); + run.expectStdOutEqual( + \\main: getFoo()=2, ptr_to_foo=2 + \\liba: getFoo()=1, ptr_to_foo=1 + \\libb: getFoo()=2, ptr_to_foo=2 + \\ + ); + test_step.dependOn(&run.step); + } + + return test_step; +} + fn testUndefinedFlag(b: *Build, opts: Options) *Step { const test_step = addTestStep(b, "macho-undefined-flag", opts); From 9f0e1ab467a2214efb97d340c733fa1da4e708c7 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sun, 14 Jan 2024 10:59:47 +0100 Subject: [PATCH 050/133] test/link/macho: test thunks on arm64 --- test/link/macho.zig | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/test/link/macho.zig b/test/link/macho.zig index ff02bee33d..10cc38d374 100644 --- a/test/link/macho.zig +++ b/test/link/macho.zig @@ -11,6 +11,10 @@ pub fn testAll(b: *Build, build_opts: BuildOptions) *Step { .cpu_arch = .x86_64, .os_tag = .macos, }); + const aarch64_target = b.resolveTargetQuery(.{ + .cpu_arch = .aarch64, + .os_tag = .macos, + }); macho_step.dependOn(testDeadStrip(b, .{ .target = default_target })); macho_step.dependOn(testEntryPointDylib(b, .{ .target = default_target })); @@ -21,6 +25,7 @@ pub fn testAll(b: *Build, build_opts: BuildOptions) *Step { macho_step.dependOn(testMhExecuteHeader(b, .{ .target = default_target })); macho_step.dependOn(testSectionBoundarySymbols(b, .{ .target = default_target })); macho_step.dependOn(testSegmentBoundarySymbols(b, .{ .target = default_target })); + macho_step.dependOn(testThunks(b, .{ .target = aarch64_target })); macho_step.dependOn(testTlsLargeTbss(b, .{ .target = default_target })); macho_step.dependOn(testUndefinedFlag(b, .{ .target = default_target })); macho_step.dependOn(testWeakBind(b, .{ .target = x86_64_target })); @@ -639,6 +644,35 @@ fn testSegmentBoundarySymbols(b: *Build, opts: Options) *Step { return test_step; } +fn testThunks(b: *Build, opts: Options) *Step { + const test_step = addTestStep(b, "macho-thunks", opts); + + const exe = addExecutable(b, opts, .{ .name = "main", .c_source_bytes = + \\#include + \\__attribute__((aligned(0x8000000))) int bar() { + \\ return 42; + \\} + \\int foobar(); + \\int foo() { + \\ return bar() - foobar(); + \\} + \\__attribute__((aligned(0x8000000))) int foobar() { + \\ return 42; + \\} + \\int main() { + \\ printf("bar=%d, foo=%d, foobar=%d", bar(), foo(), foobar()); + \\ return foo(); + \\} + }); + + const run = addRunArtifact(exe); + run.expectStdOutEqual("bar=42, foo=0, foobar=42"); + run.expectExitCode(0); + test_step.dependOn(&run.step); + + return test_step; +} + fn testTlsLargeTbss(b: *Build, opts: Options) *Step { const test_step = addTestStep(b, "macho-tls-large-tbss", opts); From 49a4b429954826fba3bd9d952ec28a058addf31c Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sun, 14 Jan 2024 11:04:08 +0100 Subject: [PATCH 051/133] test/link/macho: upgrade tls test --- test/link.zig | 4 ---- test/link/macho.zig | 35 +++++++++++++++++++++++++++++++ test/link/macho/tls/a.c | 5 ----- test/link/macho/tls/build.zig | 39 ----------------------------------- test/link/macho/tls/main.zig | 15 -------------- 5 files changed, 35 insertions(+), 63 deletions(-) delete mode 100644 test/link/macho/tls/a.c delete mode 100644 test/link/macho/tls/build.zig delete mode 100644 test/link/macho/tls/main.zig diff --git a/test/link.zig b/test/link.zig index 909d1d406e..b6d4a4fa40 100644 --- a/test/link.zig +++ b/test/link.zig @@ -163,10 +163,6 @@ pub const cases = [_]Case{ .build_root = "test/link/macho/tbdv3", .import = @import("link/macho/tbdv3/build.zig"), }, - .{ - .build_root = "test/link/macho/tls", - .import = @import("link/macho/tls/build.zig"), - }, .{ .build_root = "test/link/macho/unwind_info", .import = @import("link/macho/unwind_info/build.zig"), diff --git a/test/link/macho.zig b/test/link/macho.zig index 10cc38d374..39efb02065 100644 --- a/test/link/macho.zig +++ b/test/link/macho.zig @@ -34,6 +34,7 @@ pub fn testAll(b: *Build, build_opts: BuildOptions) *Step { if (build_opts.has_symlinks_windows) { macho_step.dependOn(testNeededLibrary(b, .{ .target = default_target })); macho_step.dependOn(testWeakLibrary(b, .{ .target = default_target })); + macho_step.dependOn(testTls(b, .{ .target = default_target })); macho_step.dependOn(testTwoLevelNamespace(b, .{ .target = default_target })); // Tests requiring presence of macOS SDK in system path @@ -673,6 +674,40 @@ fn testThunks(b: *Build, opts: Options) *Step { return test_step; } +fn testTls(b: *Build, opts: Options) *Step { + const test_step = addTestStep(b, "macho-tls", opts); + + const dylib = addSharedLibrary(b, opts, .{ .name = "a", .c_source_bytes = + \\_Thread_local int a; + \\int getA() { + \\ return a; + \\} + }); + + const exe = addExecutable(b, opts, .{ .name = "main", .c_source_bytes = + \\#include + \\extern _Thread_local int a; + \\extern int getA(); + \\int getA2() { + \\ return a; + \\} + \\int main() { + \\ a = 2; + \\ printf("%d %d %d", a, getA(), getA2()); + \\ return 0; + \\} + }); + exe.root_module.linkSystemLibrary("a", .{}); + exe.addLibraryPath(dylib.getEmittedBinDirectory()); + exe.addRPath(dylib.getEmittedBinDirectory()); + + const run = addRunArtifact(exe); + run.expectStdOutEqual("2 2 2"); + test_step.dependOn(&run.step); + + return test_step; +} + fn testTlsLargeTbss(b: *Build, opts: Options) *Step { const test_step = addTestStep(b, "macho-tls-large-tbss", opts); diff --git a/test/link/macho/tls/a.c b/test/link/macho/tls/a.c deleted file mode 100644 index 8602d02419..0000000000 --- a/test/link/macho/tls/a.c +++ /dev/null @@ -1,5 +0,0 @@ -_Thread_local int a; - -int getA() { - return a; -} diff --git a/test/link/macho/tls/build.zig b/test/link/macho/tls/build.zig deleted file mode 100644 index af2ac8a9c6..0000000000 --- a/test/link/macho/tls/build.zig +++ /dev/null @@ -1,39 +0,0 @@ -const std = @import("std"); - -pub const requires_symlinks = true; - -pub fn build(b: *std.Build) void { - const test_step = b.step("test", "Test it"); - b.default_step = test_step; - - add(b, test_step, .Debug); - add(b, test_step, .ReleaseFast); - add(b, test_step, .ReleaseSmall); - add(b, test_step, .ReleaseSafe); -} - -fn add(b: *std.Build, test_step: *std.Build.Step, optimize: std.builtin.OptimizeMode) void { - const target = b.resolveTargetQuery(.{ .os_tag = .macos }); - - const lib = b.addSharedLibrary(.{ - .name = "a", - .version = .{ .major = 1, .minor = 0, .patch = 0 }, - .optimize = optimize, - .target = target, - }); - lib.addCSourceFile(.{ .file = .{ .path = "a.c" }, .flags = &.{} }); - lib.linkLibC(); - - const test_exe = b.addTest(.{ - .root_source_file = .{ .path = "main.zig" }, - .optimize = optimize, - .target = target, - }); - test_exe.linkLibrary(lib); - test_exe.linkLibC(); - - const run = b.addRunArtifact(test_exe); - run.skip_foreign_checks = true; - - test_step.dependOn(&run.step); -} diff --git a/test/link/macho/tls/main.zig b/test/link/macho/tls/main.zig deleted file mode 100644 index ab01616e31..0000000000 --- a/test/link/macho/tls/main.zig +++ /dev/null @@ -1,15 +0,0 @@ -const std = @import("std"); - -extern threadlocal var a: i32; -extern fn getA() i32; - -fn getA2() i32 { - return a; -} - -test { - a = 2; - try std.testing.expect(getA() == 2); - try std.testing.expect(2 == getA2()); - try std.testing.expect(getA() == getA2()); -} From fa161c205943116f630975a097a04a9ac6ea1586 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sun, 14 Jan 2024 11:07:16 +0100 Subject: [PATCH 052/133] test/link/macho: test tentative definitions --- test/link/macho.zig | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/test/link/macho.zig b/test/link/macho.zig index 39efb02065..cddd9954ba 100644 --- a/test/link/macho.zig +++ b/test/link/macho.zig @@ -25,6 +25,7 @@ pub fn testAll(b: *Build, build_opts: BuildOptions) *Step { macho_step.dependOn(testMhExecuteHeader(b, .{ .target = default_target })); macho_step.dependOn(testSectionBoundarySymbols(b, .{ .target = default_target })); macho_step.dependOn(testSegmentBoundarySymbols(b, .{ .target = default_target })); + macho_step.dependOn(testTentative(b, .{ .target = default_target })); macho_step.dependOn(testThunks(b, .{ .target = aarch64_target })); macho_step.dependOn(testTlsLargeTbss(b, .{ .target = default_target })); macho_step.dependOn(testUndefinedFlag(b, .{ .target = default_target })); @@ -645,6 +646,32 @@ fn testSegmentBoundarySymbols(b: *Build, opts: Options) *Step { return test_step; } +fn testTentative(b: *Build, opts: Options) *Step { + const test_step = addTestStep(b, "macho-tentative", opts); + + const exe = addExecutable(b, opts, .{ .name = "main" }); + addCSourceBytes(exe, + \\int foo; + \\int bar; + \\int baz = 42; + , &.{"-fcommon"}); + addCSourceBytes(exe, + \\#include + \\int foo; + \\int bar = 5; + \\int baz; + \\int main() { + \\ printf("%d %d %d\n", foo, bar, baz); + \\} + , &.{"-fcommon"}); + + const run = addRunArtifact(exe); + run.expectStdOutEqual("0 5 42\n"); + test_step.dependOn(&run.step); + + return test_step; +} + fn testThunks(b: *Build, opts: Options) *Step { const test_step = addTestStep(b, "macho-thunks", opts); From b038bcb93b7a3e254be3fb2f1979680ffa99aa48 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sun, 14 Jan 2024 11:22:04 +0100 Subject: [PATCH 053/133] test/link/macho: test -r mode --- src/main.zig | 4 +- test/link/macho.zig | 121 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 122 insertions(+), 3 deletions(-) diff --git a/src/main.zig b/src/main.zig index fd650384f9..857315346e 100644 --- a/src/main.zig +++ b/src/main.zig @@ -2823,9 +2823,7 @@ fn buildOutputType( } // After this point, resolved_frameworks is used instead of frameworks. - if (create_module.resolved_options.output_mode == .Obj and - (target.ofmt == .coff or target.ofmt == .macho)) - { + if (create_module.resolved_options.output_mode == .Obj and target.ofmt == .coff) { const total_obj_count = create_module.c_source_files.items.len + @intFromBool(root_src_file != null) + create_module.rc_source_files.items.len + diff --git a/test/link/macho.zig b/test/link/macho.zig index cddd9954ba..38327bbdcf 100644 --- a/test/link/macho.zig +++ b/test/link/macho.zig @@ -23,6 +23,8 @@ pub fn testAll(b: *Build, build_opts: BuildOptions) *Step { macho_step.dependOn(testHelloZig(b, .{ .target = default_target })); macho_step.dependOn(testLargeBss(b, .{ .target = default_target })); macho_step.dependOn(testMhExecuteHeader(b, .{ .target = default_target })); + macho_step.dependOn(testRelocatable(b, .{ .target = default_target })); + macho_step.dependOn(testRelocatableZig(b, .{ .target = default_target })); macho_step.dependOn(testSectionBoundarySymbols(b, .{ .target = default_target })); macho_step.dependOn(testSegmentBoundarySymbols(b, .{ .target = default_target })); macho_step.dependOn(testTentative(b, .{ .target = default_target })); @@ -498,6 +500,125 @@ fn testNeededLibrary(b: *Build, opts: Options) *Step { return test_step; } +fn testRelocatable(b: *Build, opts: Options) *Step { + const test_step = addTestStep(b, "macho-relocatable", opts); + + const a_o = addObject(b, opts, .{ .name = "a", .cpp_source_bytes = + \\#include + \\int try_me() { + \\ throw std::runtime_error("Oh no!"); + \\} + }); + a_o.linkLibCpp(); + + const b_o = addObject(b, opts, .{ .name = "b", .cpp_source_bytes = + \\extern int try_me(); + \\int try_again() { + \\ return try_me(); + \\} + }); + + const main_o = addObject(b, opts, .{ .name = "main", .cpp_source_bytes = + \\#include + \\#include + \\extern int try_again(); + \\int main() { + \\ try { + \\ try_again(); + \\ } catch (const std::exception &e) { + \\ std::cout << "exception=" << e.what(); + \\ } + \\ return 0; + \\} + }); + main_o.linkLibCpp(); + + const exp_stdout = "exception=Oh no!"; + + { + const c_o = addObject(b, opts, .{ .name = "c" }); + c_o.addObject(a_o); + c_o.addObject(b_o); + + const exe = addExecutable(b, opts, .{ .name = "main1" }); + exe.addObject(main_o); + exe.addObject(c_o); + exe.linkLibCpp(); + + const run = addRunArtifact(exe); + run.expectStdOutEqual(exp_stdout); + test_step.dependOn(&run.step); + } + + { + const d_o = addObject(b, opts, .{ .name = "d" }); + d_o.addObject(a_o); + d_o.addObject(b_o); + d_o.addObject(main_o); + + const exe = addExecutable(b, opts, .{ .name = "main2" }); + exe.addObject(d_o); + exe.linkLibCpp(); + + const run = addRunArtifact(exe); + run.expectStdOutEqual(exp_stdout); + test_step.dependOn(&run.step); + } + + return test_step; +} + +fn testRelocatableZig(b: *Build, opts: Options) *Step { + const test_step = addTestStep(b, "macho-relocatable-zig", opts); + + const a_o = addObject(b, opts, .{ .name = "a", .zig_source_bytes = + \\const std = @import("std"); + \\export var foo: i32 = 0; + \\export fn incrFoo() void { + \\ foo += 1; + \\ std.debug.print("incrFoo={d}\n", .{foo}); + \\} + }); + + const b_o = addObject(b, opts, .{ .name = "b", .zig_source_bytes = + \\const std = @import("std"); + \\extern var foo: i32; + \\export fn decrFoo() void { + \\ foo -= 1; + \\ std.debug.print("decrFoo={d}\n", .{foo}); + \\} + }); + + const main_o = addObject(b, opts, .{ .name = "main", .zig_source_bytes = + \\const std = @import("std"); + \\extern var foo: i32; + \\extern fn incrFoo() void; + \\extern fn decrFoo() void; + \\pub fn main() void { + \\ const init = foo; + \\ incrFoo(); + \\ decrFoo(); + \\ if (init == foo) @panic("Oh no!"); + \\} + }); + + const c_o = addObject(b, opts, .{ .name = "c" }); + c_o.addObject(a_o); + c_o.addObject(b_o); + c_o.addObject(main_o); + + const exe = addExecutable(b, opts, .{ .name = "main" }); + exe.addObject(c_o); + + const run = addRunArtifact(exe); + run.addCheck(.{ .expect_stderr_match = b.dupe("incrFoo=1") }); + run.addCheck(.{ .expect_stderr_match = b.dupe("decrFoo=1") }); + run.addCheck(.{ .expect_stderr_match = b.dupe("panic: Oh no!") }); + test_step.dependOn(&run.step); + + return test_step; +} + fn testSectionBoundarySymbols(b: *Build, opts: Options) *Step { const test_step = addTestStep(b, "macho-section-boundary-symbols", opts); From 181e476915133091c6a51862861079151a4f19f1 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sun, 14 Jan 2024 17:02:57 +0100 Subject: [PATCH 054/133] test/link/macho: upgrade dead_strip_dylibs test --- test/link.zig | 4 ---- test/link/macho.zig | 50 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+), 4 deletions(-) diff --git a/test/link.zig b/test/link.zig index b6d4a4fa40..b778b349a0 100644 --- a/test/link.zig +++ b/test/link.zig @@ -107,10 +107,6 @@ pub const cases = [_]Case{ .build_root = "test/link/macho/bugs/16628", .import = @import("link/macho/bugs/16628/build.zig"), }, - .{ - .build_root = "test/link/macho/dead_strip_dylibs", - .import = @import("link/macho/dead_strip_dylibs/build.zig"), - }, .{ .build_root = "test/link/macho/dylib", .import = @import("link/macho/dylib/build.zig"), diff --git a/test/link/macho.zig b/test/link/macho.zig index 38327bbdcf..85ff0142f2 100644 --- a/test/link/macho.zig +++ b/test/link/macho.zig @@ -42,6 +42,7 @@ pub fn testAll(b: *Build, build_opts: BuildOptions) *Step { // Tests requiring presence of macOS SDK in system path if (build_opts.has_macos_sdk) { + macho_step.dependOn(testDeadStripDylibs(b, .{ .target = b.host })); macho_step.dependOn(testHeaderpad(b, .{ .target = b.host })); macho_step.dependOn(testNeededFramework(b, .{ .target = b.host })); macho_step.dependOn(testWeakFramework(b, .{ .target = b.host })); @@ -132,6 +133,55 @@ fn testDeadStrip(b: *Build, opts: Options) *Step { return test_step; } +fn testDeadStripDylibs(b: *Build, opts: Options) *Step { + const test_step = addTestStep(b, "macho-dead-strip-dylibs", opts); + + const main_o = addObject(b, opts, .{ .name = "main", .c_source_bytes = + \\#include + \\int main() { + \\ if (objc_getClass("NSObject") == 0) { + \\ return -1; + \\ } + \\ if (objc_getClass("NSApplication") == 0) { + \\ return -2; + \\ } + \\ return 0; + \\} + }); + + { + const exe = addExecutable(b, opts, .{ .name = "main1" }); + exe.addObject(main_o); + exe.root_module.linkFramework("Cocoa", .{}); + + const check = exe.checkObject(); + check.checkInHeaders(); + check.checkExact("cmd LOAD_DYLIB"); + check.checkContains("Cocoa"); + check.checkInHeaders(); + check.checkExact("cmd LOAD_DYLIB"); + check.checkContains("libobjc"); + test_step.dependOn(&check.step); + + const run = addRunArtifact(exe); + run.expectExitCode(0); + test_step.dependOn(&run.step); + } + + { + const exe = addExecutable(b, opts, .{ .name = "main2" }); + exe.addObject(main_o); + exe.root_module.linkFramework("Cocoa", .{}); + exe.dead_strip_dylibs = true; + + const run = addRunArtifact(exe); + run.expectExitCode(@as(u8, @bitCast(@as(i8, -2)))); + test_step.dependOn(&run.step); + } + + return test_step; +} + fn testEntryPointDylib(b: *Build, opts: Options) *Step { const test_step = addTestStep(b, "macho-entry-point-dylib", opts); From b1cf6d310323f2908ed401d1a6926d096ade530d Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sun, 14 Jan 2024 17:19:18 +0100 Subject: [PATCH 055/133] macho: print all definitions of symbols when printing duplicates --- src/link/MachO.zig | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 759e9b94de..21966d351c 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -3547,7 +3547,7 @@ fn reportDuplicates(self: *MachO, dupes: anytype) error{ HasDuplicates, OutOfMem const tracy = trace(@src()); defer tracy.end(); - const max_notes = 4; + const max_notes = 3; var has_dupes = false; var it = dupes.iterator(); @@ -3556,8 +3556,9 @@ fn reportDuplicates(self: *MachO, dupes: anytype) error{ HasDuplicates, OutOfMem const notes = entry.value_ptr.*; const nnotes = @min(notes.items.len, max_notes) + @intFromBool(notes.items.len > max_notes); - var err = try self.addErrorWithNotes(nnotes); + var err = try self.addErrorWithNotes(nnotes + 1); try err.addMsg(self, "duplicate symbol definition: {s}", .{sym.getName(self)}); + try err.addNote(self, "defined by {}", .{sym.getFile(self).?.fmtPath()}); var inote: usize = 0; while (inote < @min(notes.items.len, max_notes)) : (inote += 1) { From 8105390fff82f372645b01dbfe89f7972ba4e49d Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Mon, 15 Jan 2024 08:52:05 +0100 Subject: [PATCH 056/133] macho: remove all rpath parsing from the linker --- src/link/MachO.zig | 15 +++------------ src/link/MachO/load_commands.zig | 2 +- test/link/macho.zig | 2 +- 3 files changed, 5 insertions(+), 14 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 21966d351c..cbc0c8ec65 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -114,8 +114,6 @@ compatibility_version: ?std.SemanticVersion, entry_name: ?[]const u8, platform: Platform, sdk_version: ?std.SemanticVersion, -/// Rpath table -rpath_table: std.StringArrayHashMapUnmanaged(void) = .{}, /// When set to true, the linker will hoist all dylibs including system dependent dylibs. no_implicit_dylibs: bool = false, @@ -210,12 +208,6 @@ pub fn createEmpty( .mode = link.File.determineMode(false, output_mode, link_mode), }); - // Filter rpaths - try self.rpath_table.ensureUnusedCapacity(gpa, self.base.rpath_list.len); - for (options.rpath_list) |rpath| { - _ = self.rpath_table.putAssumeCapacity(rpath, {}); - } - // Append null file try self.files.append(gpa, .null); // Atom at index 0 is reserved as null atom @@ -333,7 +325,6 @@ pub fn deinit(self: *MachO) void { } self.thunks.deinit(gpa); self.unwind_records.deinit(gpa); - self.rpath_table.deinit(gpa); } pub fn flush(self: *MachO, arena: Allocator, prog_node: *std.Progress.Node) link.File.FlushError!void { @@ -701,7 +692,7 @@ fn dumpArgv(self: *MachO, comp: *Compilation) !void { try argv.append(syslibroot); } - for (self.rpath_table.keys()) |rpath| { + for (self.base.rpath_list) |rpath| { try argv.append("-rpath"); try argv.append(rpath); } @@ -2812,8 +2803,8 @@ fn writeLoadCommands(self: *MachO) !struct { usize, usize, usize } { ncmds += 1; } - try load_commands.writeRpathLCs(self.rpath_table.keys(), writer); - ncmds += self.rpath_table.keys().len; + try load_commands.writeRpathLCs(self.base.rpath_list, writer); + ncmds += self.base.rpath_list.len; try writer.writeStruct(macho.source_version_command{ .version = 0 }); ncmds += 1; diff --git a/src/link/MachO/load_commands.zig b/src/link/MachO/load_commands.zig index bd6a41d39c..7d045779fe 100644 --- a/src/link/MachO/load_commands.zig +++ b/src/link/MachO/load_commands.zig @@ -59,7 +59,7 @@ pub fn calcLoadCommandsSize(macho_file: *MachO, assume_max_path_len: bool) u32 { } // LC_RPATH { - for (macho_file.rpath_table.keys()) |rpath| { + for (macho_file.base.rpath_list) |rpath| { sizeofcmds += calcInstallNameLen( @sizeOf(macho.rpath_command), rpath, diff --git a/test/link/macho.zig b/test/link/macho.zig index 85ff0142f2..ef50abe5cd 100644 --- a/test/link/macho.zig +++ b/test/link/macho.zig @@ -662,7 +662,7 @@ fn testRelocatableZig(b: *Build, opts: Options) *Step { const run = addRunArtifact(exe); run.addCheck(.{ .expect_stderr_match = b.dupe("incrFoo=1") }); - run.addCheck(.{ .expect_stderr_match = b.dupe("decrFoo=1") }); + run.addCheck(.{ .expect_stderr_match = b.dupe("decrFoo=0") }); run.addCheck(.{ .expect_stderr_match = b.dupe("panic: Oh no!") }); test_step.dependOn(&run.step); From a454ba79083128e3172d2ca14fced9ec4a3763b1 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Mon, 15 Jan 2024 08:58:44 +0100 Subject: [PATCH 057/133] test/link/macho: upgrade dylib test --- test/link.zig | 4 ---- test/link/macho.zig | 38 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+), 4 deletions(-) diff --git a/test/link.zig b/test/link.zig index b778b349a0..1085a35181 100644 --- a/test/link.zig +++ b/test/link.zig @@ -107,10 +107,6 @@ pub const cases = [_]Case{ .build_root = "test/link/macho/bugs/16628", .import = @import("link/macho/bugs/16628/build.zig"), }, - .{ - .build_root = "test/link/macho/dylib", - .import = @import("link/macho/dylib/build.zig"), - }, .{ .build_root = "test/link/macho/empty", .import = @import("link/macho/empty/build.zig"), diff --git a/test/link/macho.zig b/test/link/macho.zig index ef50abe5cd..e3c481aed9 100644 --- a/test/link/macho.zig +++ b/test/link/macho.zig @@ -35,6 +35,7 @@ pub fn testAll(b: *Build, build_opts: BuildOptions) *Step { // Tests requiring symlinks when tested on Windows if (build_opts.has_symlinks_windows) { + macho_step.dependOn(testDylib(b, .{ .target = default_target })); macho_step.dependOn(testNeededLibrary(b, .{ .target = default_target })); macho_step.dependOn(testWeakLibrary(b, .{ .target = default_target })); macho_step.dependOn(testTls(b, .{ .target = default_target })); @@ -182,6 +183,43 @@ fn testDeadStripDylibs(b: *Build, opts: Options) *Step { return test_step; } +fn testDylib(b: *Build, opts: Options) *Step { + const test_step = addTestStep(b, "macho-dylib", opts); + + const dylib = addSharedLibrary(b, opts, .{ .name = "a", .c_source_bytes = + \\#include + \\char world[] = "world"; + \\char* hello() { + \\ return "Hello"; + \\} + }); + + const check = dylib.checkObject(); + check.checkInHeaders(); + check.checkExact("header"); + check.checkNotPresent("PIE"); + test_step.dependOn(&check.step); + + const exe = addExecutable(b, opts, .{ .name = "main", .c_source_bytes = + \\#include + \\char* hello(); + \\extern char world[]; + \\int main() { + \\ printf("%s %s", hello(), world); + \\ return 0; + \\} + }); + exe.root_module.linkSystemLibrary("a", .{}); + exe.addLibraryPath(dylib.getEmittedBinDirectory()); + exe.addRPath(dylib.getEmittedBinDirectory()); + + const run = addRunArtifact(exe); + run.expectStdOutEqual("Hello world"); + test_step.dependOn(&run.step); + + return test_step; +} + fn testEntryPointDylib(b: *Build, opts: Options) *Step { const test_step = addTestStep(b, "macho-entry-point-dylib", opts); From 82a044f4f7997dbbb43d1aa7f8ba956328380371 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Mon, 15 Jan 2024 09:03:36 +0100 Subject: [PATCH 058/133] test/link/macho: upgrade empty object test --- test/link.zig | 4 -- test/link/macho.zig | 21 +++++++++++ test/link/macho/dylib/a.c | 7 ---- test/link/macho/dylib/build.zig | 65 --------------------------------- test/link/macho/dylib/main.c | 9 ----- test/link/macho/empty/build.zig | 31 ---------------- test/link/macho/empty/empty.c | 0 test/link/macho/empty/main.c | 6 --- 8 files changed, 21 insertions(+), 122 deletions(-) delete mode 100644 test/link/macho/dylib/a.c delete mode 100644 test/link/macho/dylib/build.zig delete mode 100644 test/link/macho/dylib/main.c delete mode 100644 test/link/macho/empty/build.zig delete mode 100644 test/link/macho/empty/empty.c delete mode 100644 test/link/macho/empty/main.c diff --git a/test/link.zig b/test/link.zig index 1085a35181..f0d6c87ec9 100644 --- a/test/link.zig +++ b/test/link.zig @@ -107,10 +107,6 @@ pub const cases = [_]Case{ .build_root = "test/link/macho/bugs/16628", .import = @import("link/macho/bugs/16628/build.zig"), }, - .{ - .build_root = "test/link/macho/empty", - .import = @import("link/macho/empty/build.zig"), - }, .{ .build_root = "test/link/macho/entry", .import = @import("link/macho/entry/build.zig"), diff --git a/test/link/macho.zig b/test/link/macho.zig index e3c481aed9..6dede6a973 100644 --- a/test/link/macho.zig +++ b/test/link/macho.zig @@ -17,6 +17,7 @@ pub fn testAll(b: *Build, build_opts: BuildOptions) *Step { }); macho_step.dependOn(testDeadStrip(b, .{ .target = default_target })); + macho_step.dependOn(testEmptyObject(b, .{ .target = default_target })); macho_step.dependOn(testEntryPointDylib(b, .{ .target = default_target })); macho_step.dependOn(testHeaderWeakFlags(b, .{ .target = default_target })); macho_step.dependOn(testHelloC(b, .{ .target = default_target })); @@ -220,6 +221,26 @@ fn testDylib(b: *Build, opts: Options) *Step { return test_step; } +fn testEmptyObject(b: *Build, opts: Options) *Step { + const test_step = addTestStep(b, "macho-empty-object", opts); + + const empty = addObject(b, opts, .{ .name = "empty", .c_source_bytes = "" }); + + const exe = addExecutable(b, opts, .{ .name = "main", .c_source_bytes = + \\#include + \\int main() { + \\ printf("Hello world!"); + \\} + }); + exe.addObject(empty); + + const run = addRunArtifact(exe); + run.expectStdOutEqual("Hello world!"); + test_step.dependOn(&run.step); + + return test_step; +} + fn testEntryPointDylib(b: *Build, opts: Options) *Step { const test_step = addTestStep(b, "macho-entry-point-dylib", opts); diff --git a/test/link/macho/dylib/a.c b/test/link/macho/dylib/a.c deleted file mode 100644 index 199b31e1a0..0000000000 --- a/test/link/macho/dylib/a.c +++ /dev/null @@ -1,7 +0,0 @@ -#include - -char world[] = "world"; - -char* hello() { - return "Hello"; -} diff --git a/test/link/macho/dylib/build.zig b/test/link/macho/dylib/build.zig deleted file mode 100644 index abd7175eae..0000000000 --- a/test/link/macho/dylib/build.zig +++ /dev/null @@ -1,65 +0,0 @@ -const std = @import("std"); - -pub const requires_symlinks = true; - -pub fn build(b: *std.Build) void { - const test_step = b.step("test", "Test it"); - b.default_step = test_step; - - add(b, test_step, .Debug); - add(b, test_step, .ReleaseFast); - add(b, test_step, .ReleaseSmall); - add(b, test_step, .ReleaseSafe); -} - -fn add(b: *std.Build, test_step: *std.Build.Step, optimize: std.builtin.OptimizeMode) void { - const target = b.resolveTargetQuery(.{ .os_tag = .macos }); - - const dylib = b.addSharedLibrary(.{ - .name = "a", - .version = .{ .major = 1, .minor = 0, .patch = 0 }, - .optimize = optimize, - .target = target, - }); - dylib.addCSourceFile(.{ .file = .{ .path = "a.c" }, .flags = &.{} }); - dylib.linkLibC(); - - const check_dylib = dylib.checkObject(); - check_dylib.checkInHeaders(); - check_dylib.checkExact("cmd ID_DYLIB"); - check_dylib.checkExact("name @rpath/liba.dylib"); - check_dylib.checkExact("timestamp 2"); - check_dylib.checkExact("current version 10000"); - check_dylib.checkExact("compatibility version 10000"); - - test_step.dependOn(&check_dylib.step); - - const exe = b.addExecutable(.{ - .name = "main", - .optimize = optimize, - .target = target, - }); - exe.addCSourceFile(.{ .file = .{ .path = "main.c" }, .flags = &.{} }); - exe.linkSystemLibrary("a"); - exe.addLibraryPath(dylib.getEmittedBinDirectory()); - exe.addRPath(dylib.getEmittedBinDirectory()); - exe.linkLibC(); - - const check_exe = exe.checkObject(); - check_exe.checkInHeaders(); - check_exe.checkExact("cmd LOAD_DYLIB"); - check_exe.checkExact("name @rpath/liba.dylib"); - check_exe.checkExact("timestamp 2"); - check_exe.checkExact("current version 10000"); - check_exe.checkExact("compatibility version 10000"); - - check_exe.checkInHeaders(); - check_exe.checkExact("cmd RPATH"); - check_exe.checkExactPath("path", dylib.getEmittedBinDirectory()); - test_step.dependOn(&check_exe.step); - - const run = b.addRunArtifact(exe); - run.skip_foreign_checks = true; - run.expectStdOutEqual("Hello world"); - test_step.dependOn(&run.step); -} diff --git a/test/link/macho/dylib/main.c b/test/link/macho/dylib/main.c deleted file mode 100644 index 941903f219..0000000000 --- a/test/link/macho/dylib/main.c +++ /dev/null @@ -1,9 +0,0 @@ -#include - -char* hello(); -extern char world[]; - -int main(int argc, char* argv[]) { - printf("%s %s", hello(), world); - return 0; -} diff --git a/test/link/macho/empty/build.zig b/test/link/macho/empty/build.zig deleted file mode 100644 index af38930ae8..0000000000 --- a/test/link/macho/empty/build.zig +++ /dev/null @@ -1,31 +0,0 @@ -const std = @import("std"); - -pub const requires_symlinks = true; - -pub fn build(b: *std.Build) void { - const test_step = b.step("test", "Test it"); - b.default_step = test_step; - - add(b, test_step, .Debug); - add(b, test_step, .ReleaseFast); - add(b, test_step, .ReleaseSmall); - add(b, test_step, .ReleaseSafe); -} - -fn add(b: *std.Build, test_step: *std.Build.Step, optimize: std.builtin.OptimizeMode) void { - const target = b.resolveTargetQuery(.{ .os_tag = .macos }); - - const exe = b.addExecutable(.{ - .name = "test", - .optimize = optimize, - .target = target, - }); - exe.addCSourceFile(.{ .file = .{ .path = "main.c" }, .flags = &[0][]const u8{} }); - exe.addCSourceFile(.{ .file = .{ .path = "empty.c" }, .flags = &[0][]const u8{} }); - exe.linkLibC(); - - const run_cmd = b.addRunArtifact(exe); - run_cmd.skip_foreign_checks = true; - run_cmd.expectStdOutEqual("Hello!\n"); - test_step.dependOn(&run_cmd.step); -} diff --git a/test/link/macho/empty/empty.c b/test/link/macho/empty/empty.c deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/test/link/macho/empty/main.c b/test/link/macho/empty/main.c deleted file mode 100644 index 9f1eea37b4..0000000000 --- a/test/link/macho/empty/main.c +++ /dev/null @@ -1,6 +0,0 @@ -#include - -int main(int argc, char* argv[]) { - printf("Hello!\n"); - return 0; -} From a25b780aad5df41bc97ce8ffc700917208be820b Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Mon, 15 Jan 2024 10:14:39 +0100 Subject: [PATCH 059/133] test/link/macho: upgrade entry and entry in archive tests --- test/link.zig | 8 --- test/link/macho.zig | 62 +++++++++++++++++++++- test/link/macho/entry/build.zig | 45 ---------------- test/link/macho/entry/main.c | 6 --- test/link/macho/entry_in_archive/build.zig | 36 ------------- test/link/macho/entry_in_archive/main.c | 5 -- 6 files changed, 61 insertions(+), 101 deletions(-) delete mode 100644 test/link/macho/entry/build.zig delete mode 100644 test/link/macho/entry/main.c delete mode 100644 test/link/macho/entry_in_archive/build.zig delete mode 100644 test/link/macho/entry_in_archive/main.c diff --git a/test/link.zig b/test/link.zig index f0d6c87ec9..7fc18a141c 100644 --- a/test/link.zig +++ b/test/link.zig @@ -107,14 +107,6 @@ pub const cases = [_]Case{ .build_root = "test/link/macho/bugs/16628", .import = @import("link/macho/bugs/16628/build.zig"), }, - .{ - .build_root = "test/link/macho/entry", - .import = @import("link/macho/entry/build.zig"), - }, - .{ - .build_root = "test/link/macho/entry_in_archive", - .import = @import("link/macho/entry_in_archive/build.zig"), - }, .{ .build_root = "test/link/macho/linksection", .import = @import("link/macho/linksection/build.zig"), diff --git a/test/link/macho.zig b/test/link/macho.zig index 6dede6a973..66c4b7eb38 100644 --- a/test/link/macho.zig +++ b/test/link/macho.zig @@ -18,7 +18,7 @@ pub fn testAll(b: *Build, build_opts: BuildOptions) *Step { macho_step.dependOn(testDeadStrip(b, .{ .target = default_target })); macho_step.dependOn(testEmptyObject(b, .{ .target = default_target })); - macho_step.dependOn(testEntryPointDylib(b, .{ .target = default_target })); + macho_step.dependOn(testEntryPoint(b, .{ .target = default_target })); macho_step.dependOn(testHeaderWeakFlags(b, .{ .target = default_target })); macho_step.dependOn(testHelloC(b, .{ .target = default_target })); macho_step.dependOn(testHelloZig(b, .{ .target = default_target })); @@ -36,6 +36,8 @@ pub fn testAll(b: *Build, build_opts: BuildOptions) *Step { // Tests requiring symlinks when tested on Windows if (build_opts.has_symlinks_windows) { + macho_step.dependOn(testEntryPointArchive(b, .{ .target = default_target })); + macho_step.dependOn(testEntryPointDylib(b, .{ .target = default_target })); macho_step.dependOn(testDylib(b, .{ .target = default_target })); macho_step.dependOn(testNeededLibrary(b, .{ .target = default_target })); macho_step.dependOn(testWeakLibrary(b, .{ .target = default_target })); @@ -241,6 +243,64 @@ fn testEmptyObject(b: *Build, opts: Options) *Step { return test_step; } +fn testEntryPoint(b: *Build, opts: Options) *Step { + const test_step = addTestStep(b, "macho-entry-point", opts); + + const exe = addExecutable(b, opts, .{ .name = "main", .c_source_bytes = + \\#include + \\int non_main() { + \\ printf("%d", 42); + \\ return 0; + \\} + }); + exe.entry = .{ .symbol_name = "_non_main" }; + + const run = addRunArtifact(exe); + run.expectStdOutEqual("42"); + test_step.dependOn(&run.step); + + const check = exe.checkObject(); + check.checkInHeaders(); + check.checkExact("segname __TEXT"); + check.checkExtract("vmaddr {vmaddr}"); + check.checkInHeaders(); + check.checkExact("cmd MAIN"); + check.checkExtract("entryoff {entryoff}"); + check.checkInSymtab(); + check.checkExtract("{n_value} (__TEXT,__text) external _non_main"); + check.checkComputeCompare("vmaddr entryoff +", .{ .op = .eq, .value = .{ .variable = "n_value" } }); + test_step.dependOn(&check.step); + + return test_step; +} + +fn testEntryPointArchive(b: *Build, opts: Options) *Step { + const test_step = addTestStep(b, "macho-entry-point-archive", opts); + + const lib = addStaticLibrary(b, opts, .{ .name = "main", .c_source_bytes = "int main() { return 0; }" }); + + { + const exe = addExecutable(b, opts, .{ .name = "main", .c_source_bytes = "" }); + exe.root_module.linkSystemLibrary("main", .{}); + exe.addLibraryPath(lib.getEmittedBinDirectory()); + + const run = addRunArtifact(exe); + test_step.dependOn(&run.step); + } + + { + const exe = addExecutable(b, opts, .{ .name = "main", .c_source_bytes = "" }); + exe.root_module.linkSystemLibrary("main", .{}); + exe.addLibraryPath(lib.getEmittedBinDirectory()); + exe.link_gc_sections = true; + + const run = addRunArtifact(exe); + test_step.dependOn(&run.step); + } + + return test_step; +} + fn testEntryPointDylib(b: *Build, opts: Options) *Step { const test_step = addTestStep(b, "macho-entry-point-dylib", opts); diff --git a/test/link/macho/entry/build.zig b/test/link/macho/entry/build.zig deleted file mode 100644 index 0ef717f292..0000000000 --- a/test/link/macho/entry/build.zig +++ /dev/null @@ -1,45 +0,0 @@ -const std = @import("std"); - -pub const requires_symlinks = true; - -pub fn build(b: *std.Build) void { - const test_step = b.step("test", "Test it"); - b.default_step = test_step; - - add(b, test_step, .Debug); - add(b, test_step, .ReleaseFast); - add(b, test_step, .ReleaseSmall); - add(b, test_step, .ReleaseSafe); -} - -fn add(b: *std.Build, test_step: *std.Build.Step, optimize: std.builtin.OptimizeMode) void { - const exe = b.addExecutable(.{ - .name = "main", - .optimize = optimize, - .target = b.resolveTargetQuery(.{ .os_tag = .macos }), - }); - exe.addCSourceFile(.{ .file = .{ .path = "main.c" }, .flags = &.{} }); - exe.linkLibC(); - exe.entry = .{ .symbol_name = "_non_main" }; - - const check_exe = exe.checkObject(); - - check_exe.checkInHeaders(); - check_exe.checkExact("segname __TEXT"); - check_exe.checkExtract("vmaddr {vmaddr}"); - - check_exe.checkInHeaders(); - check_exe.checkExact("cmd MAIN"); - check_exe.checkExtract("entryoff {entryoff}"); - - check_exe.checkInSymtab(); - check_exe.checkExtract("{n_value} (__TEXT,__text) external _non_main"); - - check_exe.checkComputeCompare("vmaddr entryoff +", .{ .op = .eq, .value = .{ .variable = "n_value" } }); - test_step.dependOn(&check_exe.step); - - const run = b.addRunArtifact(exe); - run.skip_foreign_checks = true; - run.expectStdOutEqual("42"); - test_step.dependOn(&run.step); -} diff --git a/test/link/macho/entry/main.c b/test/link/macho/entry/main.c deleted file mode 100644 index 5fc58fc465..0000000000 --- a/test/link/macho/entry/main.c +++ /dev/null @@ -1,6 +0,0 @@ -#include - -int non_main() { - printf("%d", 42); - return 0; -} diff --git a/test/link/macho/entry_in_archive/build.zig b/test/link/macho/entry_in_archive/build.zig deleted file mode 100644 index 72f340b204..0000000000 --- a/test/link/macho/entry_in_archive/build.zig +++ /dev/null @@ -1,36 +0,0 @@ -const std = @import("std"); - -pub const requires_symlinks = true; - -pub fn build(b: *std.Build) void { - const test_step = b.step("test", "Test it"); - b.default_step = test_step; - - add(b, test_step, .Debug); - add(b, test_step, .ReleaseFast); - add(b, test_step, .ReleaseSmall); - add(b, test_step, .ReleaseSafe); -} - -fn add(b: *std.Build, test_step: *std.Build.Step, optimize: std.builtin.OptimizeMode) void { - const lib = b.addStaticLibrary(.{ - .name = "main", - .optimize = optimize, - .target = b.resolveTargetQuery(.{ .os_tag = .macos }), - }); - lib.addCSourceFile(.{ .file = .{ .path = "main.c" }, .flags = &.{} }); - lib.linkLibC(); - - const exe = b.addExecutable(.{ - .name = "main", - .optimize = optimize, - .target = b.resolveTargetQuery(.{ .os_tag = .macos }), - }); - exe.linkLibrary(lib); - exe.linkLibC(); - - const run = b.addRunArtifact(exe); - run.skip_foreign_checks = true; - run.expectExitCode(0); - test_step.dependOn(&run.step); -} diff --git a/test/link/macho/entry_in_archive/main.c b/test/link/macho/entry_in_archive/main.c deleted file mode 100644 index b9f6deb5be..0000000000 --- a/test/link/macho/entry_in_archive/main.c +++ /dev/null @@ -1,5 +0,0 @@ -#include - -int main(int argc, char* argv[]) { - return 0; -} From 4dc1907a88c950daa8273ed4db1b46f7537e20fc Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Mon, 15 Jan 2024 10:22:29 +0100 Subject: [PATCH 060/133] test/link/macho: upgrade strict validation of layout test --- test/link.zig | 4 - test/link/macho.zig | 120 +++++++++++++++++ test/link/macho/strict_validation/build.zig | 137 -------------------- test/link/macho/strict_validation/main.zig | 6 - 4 files changed, 120 insertions(+), 147 deletions(-) delete mode 100644 test/link/macho/strict_validation/build.zig delete mode 100644 test/link/macho/strict_validation/main.zig diff --git a/test/link.zig b/test/link.zig index 7fc18a141c..905d6cc35d 100644 --- a/test/link.zig +++ b/test/link.zig @@ -135,10 +135,6 @@ pub const cases = [_]Case{ .build_root = "test/link/macho/stack_size", .import = @import("link/macho/stack_size/build.zig"), }, - .{ - .build_root = "test/link/macho/strict_validation", - .import = @import("link/macho/strict_validation/build.zig"), - }, .{ .build_root = "test/link/macho/tbdv3", .import = @import("link/macho/tbdv3/build.zig"), diff --git a/test/link/macho.zig b/test/link/macho.zig index 66c4b7eb38..03c3b71c59 100644 --- a/test/link/macho.zig +++ b/test/link/macho.zig @@ -23,6 +23,7 @@ pub fn testAll(b: *Build, build_opts: BuildOptions) *Step { macho_step.dependOn(testHelloC(b, .{ .target = default_target })); macho_step.dependOn(testHelloZig(b, .{ .target = default_target })); macho_step.dependOn(testLargeBss(b, .{ .target = default_target })); + macho_step.dependOn(testLayout(b, .{ .target = default_target })); macho_step.dependOn(testMhExecuteHeader(b, .{ .target = default_target })); macho_step.dependOn(testRelocatable(b, .{ .target = default_target })); macho_step.dependOn(testRelocatableZig(b, .{ .target = default_target })); @@ -612,6 +613,125 @@ fn testLargeBss(b: *Build, opts: Options) *Step { return test_step; } +fn testLayout(b: *Build, opts: Options) *Step { + const test_step = addTestStep(b, "macho-layout", opts); + + const exe = addExecutable(b, opts, .{ .name = "main", .c_source_bytes = + \\#include + \\int main() { + \\ printf("Hello world!"); + \\ return 0; + \\} + }); + + const check = exe.checkObject(); + check.checkInHeaders(); + check.checkExact("cmd SEGMENT_64"); + check.checkExact("segname __LINKEDIT"); + check.checkExtract("fileoff {fileoff}"); + check.checkExtract("filesz {filesz}"); + check.checkInHeaders(); + check.checkExact("cmd DYLD_INFO_ONLY"); + check.checkExtract("rebaseoff {rebaseoff}"); + check.checkExtract("rebasesize {rebasesize}"); + check.checkExtract("bindoff {bindoff}"); + check.checkExtract("bindsize {bindsize}"); + check.checkExtract("lazybindoff {lazybindoff}"); + check.checkExtract("lazybindsize {lazybindsize}"); + check.checkExtract("exportoff {exportoff}"); + check.checkExtract("exportsize {exportsize}"); + check.checkInHeaders(); + check.checkExact("cmd FUNCTION_STARTS"); + check.checkExtract("dataoff {fstartoff}"); + check.checkExtract("datasize {fstartsize}"); + check.checkInHeaders(); + check.checkExact("cmd DATA_IN_CODE"); + check.checkExtract("dataoff {diceoff}"); + check.checkExtract("datasize {dicesize}"); + check.checkInHeaders(); + check.checkExact("cmd SYMTAB"); + check.checkExtract("symoff {symoff}"); + check.checkExtract("nsyms {symnsyms}"); + check.checkExtract("stroff {stroff}"); + check.checkExtract("strsize {strsize}"); + check.checkInHeaders(); + check.checkExact("cmd DYSYMTAB"); + check.checkExtract("indirectsymoff {dysymoff}"); + check.checkExtract("nindirectsyms {dysymnsyms}"); + + switch (opts.target.result.cpu.arch) { + .aarch64 => { + check.checkInHeaders(); + check.checkExact("cmd CODE_SIGNATURE"); + check.checkExtract("dataoff {codesigoff}"); + check.checkExtract("datasize {codesigsize}"); + }, + .x86_64 => {}, + else => unreachable, + } + + // DYLD_INFO_ONLY subsections are in order: rebase < bind < lazy < export, + // and there are no gaps between them + check.checkComputeCompare("rebaseoff rebasesize +", .{ .op = .eq, .value = .{ .variable = "bindoff" } }); + check.checkComputeCompare("bindoff bindsize +", .{ .op = .eq, .value = .{ .variable = "lazybindoff" } }); + check.checkComputeCompare("lazybindoff lazybindsize +", .{ .op = .eq, .value = .{ .variable = "exportoff" } }); + + // FUNCTION_STARTS directly follows DYLD_INFO_ONLY (no gap) + check.checkComputeCompare("exportoff exportsize +", .{ .op = .eq, .value = .{ .variable = "fstartoff" } }); + + // DATA_IN_CODE directly follows FUNCTION_STARTS (no gap) + check.checkComputeCompare("fstartoff fstartsize +", .{ .op = .eq, .value = .{ .variable = "diceoff" } }); + + // SYMTAB directly follows DATA_IN_CODE (no gap) + check.checkComputeCompare("diceoff dicesize +", .{ .op = .eq, .value = .{ .variable = "symoff" } }); + + // DYSYMTAB directly follows SYMTAB (no gap) + check.checkComputeCompare("symnsyms 16 symoff * +", .{ .op = .eq, .value = .{ .variable = "dysymoff" } }); + + // STRTAB follows DYSYMTAB with possible gap + check.checkComputeCompare("dysymnsyms 4 dysymoff * +", .{ .op = .lte, .value = .{ .variable = "stroff" } }); + + // all LINKEDIT sections apart from CODE_SIGNATURE are 8-bytes aligned + check.checkComputeCompare("rebaseoff 8 %", .{ .op = .eq, .value = .{ .literal = 0 } }); + check.checkComputeCompare("bindoff 8 %", .{ .op = .eq, .value = .{ .literal = 0 } }); + check.checkComputeCompare("lazybindoff 8 %", .{ .op = .eq, .value = .{ .literal = 0 } }); + check.checkComputeCompare("exportoff 8 %", .{ .op = .eq, .value = .{ .literal = 0 } }); + check.checkComputeCompare("fstartoff 8 %", .{ .op = .eq, .value = .{ .literal = 0 } }); + check.checkComputeCompare("diceoff 8 %", .{ .op = .eq, .value = .{ .literal = 0 } }); + check.checkComputeCompare("symoff 8 %", .{ .op = .eq, .value = .{ .literal = 0 } }); + check.checkComputeCompare("stroff 8 %", .{ .op = .eq, .value = .{ .literal = 0 } }); + check.checkComputeCompare("dysymoff 8 %", .{ .op = .eq, .value = .{ .literal = 0 } }); + + switch (opts.target.result.cpu.arch) { + .aarch64 => { + // LINKEDIT segment does not extend beyond, or does not include, CODE_SIGNATURE data + check.checkComputeCompare("fileoff filesz codesigoff codesigsize + - -", .{ + .op = .eq, + .value = .{ .literal = 0 }, + }); + + // CODE_SIGNATURE data offset is 16-bytes aligned + check.checkComputeCompare("codesigoff 16 %", .{ .op = .eq, .value = .{ .literal = 0 } }); + }, + .x86_64 => { + // LINKEDIT segment does not extend beyond, or does not include, strtab data + check.checkComputeCompare("fileoff filesz stroff strsize + - -", .{ + .op = .eq, + .value = .{ .literal = 0 }, + }); + }, + else => unreachable, + } + + test_step.dependOn(&check.step); + + const run = addRunArtifact(exe); + run.expectStdOutEqual("Hello world!"); + test_step.dependOn(&run.step); + + return test_step; +} + fn testMhExecuteHeader(b: *Build, opts: Options) *Step { const test_step = addTestStep(b, "macho-mh-execute-header", opts); diff --git a/test/link/macho/strict_validation/build.zig b/test/link/macho/strict_validation/build.zig deleted file mode 100644 index f35438369c..0000000000 --- a/test/link/macho/strict_validation/build.zig +++ /dev/null @@ -1,137 +0,0 @@ -const std = @import("std"); -const builtin = @import("builtin"); - -pub const requires_symlinks = true; - -pub fn build(b: *std.Build) void { - const test_step = b.step("test", "Test it"); - b.default_step = test_step; - - add(b, test_step, .Debug); - add(b, test_step, .ReleaseFast); - add(b, test_step, .ReleaseSmall); - add(b, test_step, .ReleaseSafe); -} - -fn add(b: *std.Build, test_step: *std.Build.Step, optimize: std.builtin.OptimizeMode) void { - const target = b.resolveTargetQuery(.{ .os_tag = .macos }); - - const exe = b.addExecutable(.{ - .name = "main", - .root_source_file = .{ .path = "main.zig" }, - .optimize = optimize, - .target = target, - }); - exe.linkLibC(); - - const check_exe = exe.checkObject(); - - check_exe.checkInHeaders(); - check_exe.checkExact("cmd SEGMENT_64"); - check_exe.checkExact("segname __LINKEDIT"); - check_exe.checkExtract("fileoff {fileoff}"); - check_exe.checkExtract("filesz {filesz}"); - - check_exe.checkInHeaders(); - check_exe.checkExact("cmd DYLD_INFO_ONLY"); - check_exe.checkExtract("rebaseoff {rebaseoff}"); - check_exe.checkExtract("rebasesize {rebasesize}"); - check_exe.checkExtract("bindoff {bindoff}"); - check_exe.checkExtract("bindsize {bindsize}"); - check_exe.checkExtract("lazybindoff {lazybindoff}"); - check_exe.checkExtract("lazybindsize {lazybindsize}"); - check_exe.checkExtract("exportoff {exportoff}"); - check_exe.checkExtract("exportsize {exportsize}"); - - check_exe.checkInHeaders(); - check_exe.checkExact("cmd FUNCTION_STARTS"); - check_exe.checkExtract("dataoff {fstartoff}"); - check_exe.checkExtract("datasize {fstartsize}"); - - check_exe.checkInHeaders(); - check_exe.checkExact("cmd DATA_IN_CODE"); - check_exe.checkExtract("dataoff {diceoff}"); - check_exe.checkExtract("datasize {dicesize}"); - - check_exe.checkInHeaders(); - check_exe.checkExact("cmd SYMTAB"); - check_exe.checkExtract("symoff {symoff}"); - check_exe.checkExtract("nsyms {symnsyms}"); - check_exe.checkExtract("stroff {stroff}"); - check_exe.checkExtract("strsize {strsize}"); - - check_exe.checkInHeaders(); - check_exe.checkExact("cmd DYSYMTAB"); - check_exe.checkExtract("indirectsymoff {dysymoff}"); - check_exe.checkExtract("nindirectsyms {dysymnsyms}"); - - switch (builtin.cpu.arch) { - .aarch64 => { - check_exe.checkInHeaders(); - check_exe.checkExact("cmd CODE_SIGNATURE"); - check_exe.checkExtract("dataoff {codesigoff}"); - check_exe.checkExtract("datasize {codesigsize}"); - }, - .x86_64 => {}, - else => unreachable, - } - - // DYLD_INFO_ONLY subsections are in order: rebase < bind < lazy < export, - // and there are no gaps between them - check_exe.checkComputeCompare("rebaseoff rebasesize +", .{ .op = .eq, .value = .{ .variable = "bindoff" } }); - check_exe.checkComputeCompare("bindoff bindsize +", .{ .op = .eq, .value = .{ .variable = "lazybindoff" } }); - check_exe.checkComputeCompare("lazybindoff lazybindsize +", .{ .op = .eq, .value = .{ .variable = "exportoff" } }); - - // FUNCTION_STARTS directly follows DYLD_INFO_ONLY (no gap) - check_exe.checkComputeCompare("exportoff exportsize +", .{ .op = .eq, .value = .{ .variable = "fstartoff" } }); - - // DATA_IN_CODE directly follows FUNCTION_STARTS (no gap) - check_exe.checkComputeCompare("fstartoff fstartsize +", .{ .op = .eq, .value = .{ .variable = "diceoff" } }); - - // SYMTAB directly follows DATA_IN_CODE (no gap) - check_exe.checkComputeCompare("diceoff dicesize +", .{ .op = .eq, .value = .{ .variable = "symoff" } }); - - // DYSYMTAB directly follows SYMTAB (no gap) - check_exe.checkComputeCompare("symnsyms 16 symoff * +", .{ .op = .eq, .value = .{ .variable = "dysymoff" } }); - - // STRTAB follows DYSYMTAB with possible gap - check_exe.checkComputeCompare("dysymnsyms 4 dysymoff * +", .{ .op = .lte, .value = .{ .variable = "stroff" } }); - - // all LINKEDIT sections apart from CODE_SIGNATURE are 8-bytes aligned - check_exe.checkComputeCompare("rebaseoff 8 %", .{ .op = .eq, .value = .{ .literal = 0 } }); - check_exe.checkComputeCompare("bindoff 8 %", .{ .op = .eq, .value = .{ .literal = 0 } }); - check_exe.checkComputeCompare("lazybindoff 8 %", .{ .op = .eq, .value = .{ .literal = 0 } }); - check_exe.checkComputeCompare("exportoff 8 %", .{ .op = .eq, .value = .{ .literal = 0 } }); - check_exe.checkComputeCompare("fstartoff 8 %", .{ .op = .eq, .value = .{ .literal = 0 } }); - check_exe.checkComputeCompare("diceoff 8 %", .{ .op = .eq, .value = .{ .literal = 0 } }); - check_exe.checkComputeCompare("symoff 8 %", .{ .op = .eq, .value = .{ .literal = 0 } }); - check_exe.checkComputeCompare("stroff 8 %", .{ .op = .eq, .value = .{ .literal = 0 } }); - check_exe.checkComputeCompare("dysymoff 8 %", .{ .op = .eq, .value = .{ .literal = 0 } }); - - switch (builtin.cpu.arch) { - .aarch64 => { - // LINKEDIT segment does not extend beyond, or does not include, CODE_SIGNATURE data - check_exe.checkComputeCompare("fileoff filesz codesigoff codesigsize + - -", .{ - .op = .eq, - .value = .{ .literal = 0 }, - }); - - // CODE_SIGNATURE data offset is 16-bytes aligned - check_exe.checkComputeCompare("codesigoff 16 %", .{ .op = .eq, .value = .{ .literal = 0 } }); - }, - .x86_64 => { - // LINKEDIT segment does not extend beyond, or does not include, strtab data - check_exe.checkComputeCompare("fileoff filesz stroff strsize + - -", .{ - .op = .eq, - .value = .{ .literal = 0 }, - }); - }, - else => unreachable, - } - test_step.dependOn(&check_exe.step); - - const run = b.addRunArtifact(exe); - run.skip_foreign_checks = true; - run.expectStdOutEqual("Hello!\n"); - test_step.dependOn(&run.step); -} diff --git a/test/link/macho/strict_validation/main.zig b/test/link/macho/strict_validation/main.zig deleted file mode 100644 index 6510e13fd7..0000000000 --- a/test/link/macho/strict_validation/main.zig +++ /dev/null @@ -1,6 +0,0 @@ -const std = @import("std"); - -pub fn main() !void { - const stdout = std.io.getStdOut().writer(); - try stdout.writeAll("Hello!\n"); -} From 4cf94bb14869641316f337903ab24eef52eb7899 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Mon, 15 Jan 2024 10:32:25 +0100 Subject: [PATCH 061/133] test/link/macho: test attribute "used" on vars in presence of dead_strip --- test/link/macho.zig | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/test/link/macho.zig b/test/link/macho.zig index 03c3b71c59..7b87448d58 100644 --- a/test/link/macho.zig +++ b/test/link/macho.zig @@ -25,6 +25,7 @@ pub fn testAll(b: *Build, build_opts: BuildOptions) *Step { macho_step.dependOn(testLargeBss(b, .{ .target = default_target })); macho_step.dependOn(testLayout(b, .{ .target = default_target })); macho_step.dependOn(testMhExecuteHeader(b, .{ .target = default_target })); + macho_step.dependOn(testNoDeadStrip(b, .{ .target = default_target })); macho_step.dependOn(testRelocatable(b, .{ .target = default_target })); macho_step.dependOn(testRelocatableZig(b, .{ .target = default_target })); macho_step.dependOn(testSectionBoundarySymbols(b, .{ .target = default_target })); @@ -745,6 +746,32 @@ fn testMhExecuteHeader(b: *Build, opts: Options) *Step { return test_step; } +fn testNoDeadStrip(b: *Build, opts: Options) *Step { + const test_step = addTestStep(b, "macho-no-dead-strip", opts); + + const exe = addExecutable(b, opts, .{ .name = "name", .c_source_bytes = + \\__attribute__((used)) int bogus1 = 0; + \\int bogus2 = 0; + \\int foo = 42; + \\int main() { + \\ return foo - 42; + \\} + }); + exe.link_gc_sections = true; + + const check = exe.checkObject(); + check.checkInSymtab(); + check.checkContains("external _bogus1"); + check.checkInSymtab(); + check.checkNotPresent("external _bogus2"); + test_step.dependOn(&check.step); + + const run = addRunArtifact(exe); + test_step.dependOn(&run.step); + + return test_step; +} + fn testNeededFramework(b: *Build, opts: Options) *Step { const test_step = addTestStep(b, "macho-needed-framework", opts); From 7c65f0be375c7e4f0d2ecdc846ae5e9b49cf2737 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Mon, 15 Jan 2024 10:35:42 +0100 Subject: [PATCH 062/133] test/link/macho: test no exports in a dylib --- test/link/macho.zig | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/test/link/macho.zig b/test/link/macho.zig index 7b87448d58..91f60246f2 100644 --- a/test/link/macho.zig +++ b/test/link/macho.zig @@ -26,6 +26,7 @@ pub fn testAll(b: *Build, build_opts: BuildOptions) *Step { macho_step.dependOn(testLayout(b, .{ .target = default_target })); macho_step.dependOn(testMhExecuteHeader(b, .{ .target = default_target })); macho_step.dependOn(testNoDeadStrip(b, .{ .target = default_target })); + macho_step.dependOn(testNoExportsDylib(b, .{ .target = default_target })); macho_step.dependOn(testRelocatable(b, .{ .target = default_target })); macho_step.dependOn(testRelocatableZig(b, .{ .target = default_target })); macho_step.dependOn(testSectionBoundarySymbols(b, .{ .target = default_target })); @@ -772,6 +773,19 @@ fn testNoDeadStrip(b: *Build, opts: Options) *Step { return test_step; } +fn testNoExportsDylib(b: *Build, opts: Options) *Step { + const test_step = addTestStep(b, "macho-no-exports-dylib", opts); + + const dylib = addSharedLibrary(b, opts, .{ .name = "a", .c_source_bytes = "static void abc() {}" }); + + const check = dylib.checkObject(); + check.checkInSymtab(); + check.checkNotPresent("external _abc"); + test_step.dependOn(&check.step); + + return test_step; +} + fn testNeededFramework(b: *Build, opts: Options) *Step { const test_step = addTestStep(b, "macho-needed-framework", opts); From abeb0e3ea41888dd2f4ac04ae335927aba2e7b07 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Mon, 15 Jan 2024 10:58:40 +0100 Subject: [PATCH 063/133] test/link/macho: test force-loading objects containing ObjC from archives --- src/link/MachO.zig | 5 +- test/link/link.zig | 139 +++++++++++++------------------------------- test/link/macho.zig | 29 +++++++++ 3 files changed, 75 insertions(+), 98 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index cbc0c8ec65..8d959275aa 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -116,6 +116,9 @@ platform: Platform, sdk_version: ?std.SemanticVersion, /// When set to true, the linker will hoist all dylibs including system dependent dylibs. no_implicit_dylibs: bool = false, +/// Whether the linker should parse and always force load objects containing ObjC in archives. +// TODO: in Zig we currently take -ObjC as always on +force_load_objc: bool = true, /// Hot-code swapping state. hot_state: if (is_hot_update_compatible) HotUpdateState else struct {} = .{}, @@ -998,7 +1001,7 @@ fn parseArchive(self: *MachO, lib: SystemLib, must_link: bool, fat_arch: ?fat.Ar // Finally, we do a post-parse check for -ObjC to see if we need to force load this member // anyhow. - // TODO: object.alive = object.alive or (self.options.force_load_objc and object.hasObjc()); + object.alive = object.alive or (self.force_load_objc and object.hasObjc()); } if (has_parse_error) return error.MalformedArchive; } diff --git a/test/link/link.zig b/test/link/link.zig index c17f8b0b5a..8ef497424f 100644 --- a/test/link/link.zig +++ b/test/link/link.zig @@ -46,121 +46,66 @@ const OverlayOptions = struct { c_source_flags: []const []const u8 = &.{}, cpp_source_bytes: ?[]const u8 = null, cpp_source_flags: []const []const u8 = &.{}, + objc_source_bytes: ?[]const u8 = null, + objc_source_flags: []const []const u8 = &.{}, zig_source_bytes: ?[]const u8 = null, pic: ?bool = null, strip: ?bool = null, }; -pub fn addExecutable(b: *std.Build, base: Options, overlay: OverlayOptions) *Step.Compile { - const compile_step = b.addExecutable(.{ - .name = overlay.name, - .root_source_file = rsf: { - const bytes = overlay.zig_source_bytes orelse break :rsf null; - break :rsf b.addWriteFiles().add("a.zig", bytes); - }, - .target = base.target, - .optimize = base.optimize, - .use_llvm = base.use_llvm, - .use_lld = base.use_lld, - .pic = overlay.pic, - .strip = overlay.strip, - }); - if (overlay.cpp_source_bytes) |bytes| { - compile_step.addCSourceFile(.{ - .file = b.addWriteFiles().add("a.cpp", bytes), - .flags = overlay.cpp_source_flags, - }); - } - if (overlay.c_source_bytes) |bytes| { - compile_step.addCSourceFile(.{ - .file = b.addWriteFiles().add("a.c", bytes), - .flags = overlay.c_source_flags, - }); - } - if (overlay.asm_source_bytes) |bytes| { - compile_step.addAssemblyFile(b.addWriteFiles().add("a.s", bytes)); - } - return compile_step; +pub fn addExecutable(b: *std.Build, base: Options, overlay: OverlayOptions) *Compile { + return addCompileStep(b, base, overlay, .exe); } -pub fn addObject(b: *Build, base: Options, overlay: OverlayOptions) *Step.Compile { - const compile_step = b.addObject(.{ - .name = overlay.name, - .root_source_file = rsf: { - const bytes = overlay.zig_source_bytes orelse break :rsf null; - break :rsf b.addWriteFiles().add("a.zig", bytes); - }, - .target = base.target, - .optimize = base.optimize, - .use_llvm = base.use_llvm, - .use_lld = base.use_lld, - .pic = overlay.pic, - .strip = overlay.strip, - }); - if (overlay.cpp_source_bytes) |bytes| { - compile_step.addCSourceFile(.{ - .file = b.addWriteFiles().add("a.cpp", bytes), - .flags = overlay.cpp_source_flags, - }); - } - if (overlay.c_source_bytes) |bytes| { - compile_step.addCSourceFile(.{ - .file = b.addWriteFiles().add("a.c", bytes), - .flags = overlay.c_source_flags, - }); - } - if (overlay.asm_source_bytes) |bytes| { - compile_step.addAssemblyFile(b.addWriteFiles().add("a.s", bytes)); - } - return compile_step; +pub fn addObject(b: *Build, base: Options, overlay: OverlayOptions) *Compile { + return addCompileStep(b, base, overlay, .obj); } pub fn addStaticLibrary(b: *Build, base: Options, overlay: OverlayOptions) *Compile { - const compile_step = b.addStaticLibrary(.{ - .name = overlay.name, - .root_source_file = rsf: { - const bytes = overlay.zig_source_bytes orelse break :rsf null; - break :rsf b.addWriteFiles().add("a.zig", bytes); - }, - .target = base.target, - .optimize = base.optimize, - .use_llvm = base.use_llvm, - .use_lld = base.use_lld, - .pic = overlay.pic, - .strip = overlay.strip, - }); - if (overlay.cpp_source_bytes) |bytes| { - compile_step.addCSourceFile(.{ - .file = b.addWriteFiles().add("a.cpp", bytes), - .flags = overlay.cpp_source_flags, - }); - } - if (overlay.c_source_bytes) |bytes| { - compile_step.addCSourceFile(.{ - .file = b.addWriteFiles().add("a.c", bytes), - .flags = overlay.c_source_flags, - }); - } - if (overlay.asm_source_bytes) |bytes| { - compile_step.addAssemblyFile(b.addWriteFiles().add("a.s", bytes)); - } - return compile_step; + return addCompileStep(b, base, overlay, .static_lib); } pub fn addSharedLibrary(b: *Build, base: Options, overlay: OverlayOptions) *Compile { - const compile_step = b.addSharedLibrary(.{ + return addCompileStep(b, base, overlay, .shared_lib); +} + +fn addCompileStep( + b: *Build, + base: Options, + overlay: OverlayOptions, + kind: enum { exe, obj, shared_lib, static_lib }, +) *Compile { + const compile_step = Compile.create(b, .{ .name = overlay.name, - .root_source_file = rsf: { - const bytes = overlay.zig_source_bytes orelse break :rsf null; - break :rsf b.addWriteFiles().add("a.zig", bytes); + .root_module = .{ + .target = base.target, + .optimize = base.optimize, + .root_source_file = rsf: { + const bytes = overlay.zig_source_bytes orelse break :rsf null; + break :rsf b.addWriteFiles().add("a.zig", bytes); + }, + .pic = overlay.pic, + .strip = overlay.strip, }, - .target = base.target, - .optimize = base.optimize, .use_llvm = base.use_llvm, .use_lld = base.use_lld, - .pic = overlay.pic, - .strip = overlay.strip, + .kind = switch (kind) { + .exe => .exe, + .obj => .obj, + .shared_lib, .static_lib => .lib, + }, + .linkage = switch (kind) { + .exe, .obj => null, + .shared_lib => .dynamic, + .static_lib => .static, + }, }); + if (overlay.objc_source_bytes) |bytes| { + compile_step.addCSourceFile(.{ + .file = b.addWriteFiles().add("a.m", bytes), + .flags = overlay.objc_source_flags, + }); + } if (overlay.cpp_source_bytes) |bytes| { compile_step.addCSourceFile(.{ .file = b.addWriteFiles().add("a.cpp", bytes), diff --git a/test/link/macho.zig b/test/link/macho.zig index 91f60246f2..9b63a9a5e9 100644 --- a/test/link/macho.zig +++ b/test/link/macho.zig @@ -52,6 +52,7 @@ pub fn testAll(b: *Build, build_opts: BuildOptions) *Step { macho_step.dependOn(testDeadStripDylibs(b, .{ .target = b.host })); macho_step.dependOn(testHeaderpad(b, .{ .target = b.host })); macho_step.dependOn(testNeededFramework(b, .{ .target = b.host })); + macho_step.dependOn(testObjc(b, .{ .target = b.host })); macho_step.dependOn(testWeakFramework(b, .{ .target = b.host })); } } @@ -830,6 +831,34 @@ fn testNeededLibrary(b: *Build, opts: Options) *Step { return test_step; } +fn testObjc(b: *Build, opts: Options) *Step { + const test_step = addTestStep(b, "macho-objc", opts); + + const lib = addStaticLibrary(b, opts, .{ .name = "a", .objc_source_bytes = + \\#import + \\@interface Foo : NSObject + \\@end + \\@implementation Foo + \\@end + }); + + const exe = addExecutable(b, opts, .{ .name = "main", .c_source_bytes = "int main() { return 0; }" }); + exe.root_module.linkSystemLibrary("a", .{}); + exe.root_module.linkFramework("Foundation", .{}); + exe.addLibraryPath(lib.getEmittedBinDirectory()); + + const check = exe.checkObject(); + check.checkInSymtab(); + check.checkContains("_OBJC_"); + test_step.dependOn(&check.step); + + const run = addRunArtifact(exe); + run.expectExitCode(0); + test_step.dependOn(&run.step); + + return test_step; +} + fn testRelocatable(b: *Build, opts: Options) *Step { const test_step = addTestStep(b, "macho-relocatable", opts); From 3d835dea2a5e61a6ecff6078207b808441ae937a Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Mon, 15 Jan 2024 11:01:38 +0100 Subject: [PATCH 064/133] test/link/macho: use .root_module. instead of wrappers where possible --- test/link/macho.zig | 72 ++++++++++++++++++++++----------------------- 1 file changed, 36 insertions(+), 36 deletions(-) diff --git a/test/link/macho.zig b/test/link/macho.zig index 9b63a9a5e9..e5d04d9733 100644 --- a/test/link/macho.zig +++ b/test/link/macho.zig @@ -217,8 +217,8 @@ fn testDylib(b: *Build, opts: Options) *Step { \\} }); exe.root_module.linkSystemLibrary("a", .{}); - exe.addLibraryPath(dylib.getEmittedBinDirectory()); - exe.addRPath(dylib.getEmittedBinDirectory()); + exe.root_module.addLibraryPath(dylib.getEmittedBinDirectory()); + exe.root_module.addRPath(dylib.getEmittedBinDirectory()); const run = addRunArtifact(exe); run.expectStdOutEqual("Hello world"); @@ -286,7 +286,7 @@ fn testEntryPointArchive(b: *Build, opts: Options) *Step { { const exe = addExecutable(b, opts, .{ .name = "main", .c_source_bytes = "" }); exe.root_module.linkSystemLibrary("main", .{}); - exe.addLibraryPath(lib.getEmittedBinDirectory()); + exe.root_module.addLibraryPath(lib.getEmittedBinDirectory()); const run = addRunArtifact(exe); test_step.dependOn(&run.step); @@ -295,7 +295,7 @@ fn testEntryPointArchive(b: *Build, opts: Options) *Step { { const exe = addExecutable(b, opts, .{ .name = "main", .c_source_bytes = "" }); exe.root_module.linkSystemLibrary("main", .{}); - exe.addLibraryPath(lib.getEmittedBinDirectory()); + exe.root_module.addLibraryPath(lib.getEmittedBinDirectory()); exe.link_gc_sections = true; const run = addRunArtifact(exe); @@ -368,23 +368,23 @@ fn testHeaderpad(b: *Build, opts: Options) *Step { .name = name, .c_source_bytes = "int main() { return 0; }", }); - exe.linkFramework("CoreFoundation"); - exe.linkFramework("Foundation"); - exe.linkFramework("Cocoa"); - exe.linkFramework("CoreGraphics"); - exe.linkFramework("CoreHaptics"); - exe.linkFramework("CoreAudio"); - exe.linkFramework("AVFoundation"); - exe.linkFramework("CoreImage"); - exe.linkFramework("CoreLocation"); - exe.linkFramework("CoreML"); - exe.linkFramework("CoreVideo"); - exe.linkFramework("CoreText"); - exe.linkFramework("CryptoKit"); - exe.linkFramework("GameKit"); - exe.linkFramework("SwiftUI"); - exe.linkFramework("StoreKit"); - exe.linkFramework("SpriteKit"); + exe.root_module.linkFramework("CoreFoundation", .{}); + exe.root_module.linkFramework("Foundation", .{}); + exe.root_module.linkFramework("Cocoa", .{}); + exe.root_module.linkFramework("CoreGraphics", .{}); + exe.root_module.linkFramework("CoreHaptics", .{}); + exe.root_module.linkFramework("CoreAudio", .{}); + exe.root_module.linkFramework("AVFoundation", .{}); + exe.root_module.linkFramework("CoreImage", .{}); + exe.root_module.linkFramework("CoreLocation", .{}); + exe.root_module.linkFramework("CoreML", .{}); + exe.root_module.linkFramework("CoreVideo", .{}); + exe.root_module.linkFramework("CoreText", .{}); + exe.root_module.linkFramework("CryptoKit", .{}); + exe.root_module.linkFramework("GameKit", .{}); + exe.root_module.linkFramework("SwiftUI", .{}); + exe.root_module.linkFramework("StoreKit", .{}); + exe.root_module.linkFramework("SpriteKit", .{}); return exe; } }.addExe; @@ -814,8 +814,8 @@ fn testNeededLibrary(b: *Build, opts: Options) *Step { const exe = addExecutable(b, opts, .{ .name = "main", .c_source_bytes = "int main() { return 0; }" }); exe.root_module.linkSystemLibrary("a", .{ .needed = true }); - exe.addLibraryPath(dylib.getEmittedBinDirectory()); - exe.addRPath(dylib.getEmittedBinDirectory()); + exe.root_module.addLibraryPath(dylib.getEmittedBinDirectory()); + exe.root_module.addRPath(dylib.getEmittedBinDirectory()); exe.dead_strip_dylibs = true; const check = exe.checkObject(); @@ -845,7 +845,7 @@ fn testObjc(b: *Build, opts: Options) *Step { const exe = addExecutable(b, opts, .{ .name = "main", .c_source_bytes = "int main() { return 0; }" }); exe.root_module.linkSystemLibrary("a", .{}); exe.root_module.linkFramework("Foundation", .{}); - exe.addLibraryPath(lib.getEmittedBinDirectory()); + exe.root_module.addLibraryPath(lib.getEmittedBinDirectory()); const check = exe.checkObject(); check.checkInSymtab(); @@ -1205,8 +1205,8 @@ fn testTls(b: *Build, opts: Options) *Step { \\} }); exe.root_module.linkSystemLibrary("a", .{}); - exe.addLibraryPath(dylib.getEmittedBinDirectory()); - exe.addRPath(dylib.getEmittedBinDirectory()); + exe.root_module.addLibraryPath(dylib.getEmittedBinDirectory()); + exe.root_module.addRPath(dylib.getEmittedBinDirectory()); const run = addRunArtifact(exe); run.expectStdOutEqual("2 2 2"); @@ -1300,10 +1300,10 @@ fn testTwoLevelNamespace(b: *Build, opts: Options) *Step { exe.addObject(main_o); exe.root_module.linkSystemLibrary("a", .{}); exe.root_module.linkSystemLibrary("b", .{}); - exe.addLibraryPath(liba.getEmittedBinDirectory()); - exe.addLibraryPath(libb.getEmittedBinDirectory()); - exe.addRPath(liba.getEmittedBinDirectory()); - exe.addRPath(libb.getEmittedBinDirectory()); + exe.root_module.addLibraryPath(liba.getEmittedBinDirectory()); + exe.root_module.addLibraryPath(libb.getEmittedBinDirectory()); + exe.root_module.addRPath(liba.getEmittedBinDirectory()); + exe.root_module.addRPath(libb.getEmittedBinDirectory()); const check = exe.checkObject(); check.checkInSymtab(); @@ -1329,10 +1329,10 @@ fn testTwoLevelNamespace(b: *Build, opts: Options) *Step { exe.addObject(main_o); exe.root_module.linkSystemLibrary("b", .{}); exe.root_module.linkSystemLibrary("a", .{}); - exe.addLibraryPath(liba.getEmittedBinDirectory()); - exe.addLibraryPath(libb.getEmittedBinDirectory()); - exe.addRPath(liba.getEmittedBinDirectory()); - exe.addRPath(libb.getEmittedBinDirectory()); + exe.root_module.addLibraryPath(liba.getEmittedBinDirectory()); + exe.root_module.addLibraryPath(libb.getEmittedBinDirectory()); + exe.root_module.addRPath(liba.getEmittedBinDirectory()); + exe.root_module.addRPath(libb.getEmittedBinDirectory()); const check = exe.checkObject(); check.checkInSymtab(); @@ -1607,8 +1607,8 @@ fn testWeakLibrary(b: *Build, opts: Options) *Step { \\} }); exe.root_module.linkSystemLibrary("a", .{ .weak = true }); - exe.addLibraryPath(dylib.getEmittedBinDirectory()); - exe.addRPath(dylib.getEmittedBinDirectory()); + exe.root_module.addLibraryPath(dylib.getEmittedBinDirectory()); + exe.root_module.addRPath(dylib.getEmittedBinDirectory()); const check = exe.checkObject(); check.checkInHeaders(); From 000598acc9cbd362e533afad9c48cffdd2fae380 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Mon, 15 Jan 2024 11:16:03 +0100 Subject: [PATCH 065/133] test/link/macho: test objc msgsend selector stubs --- test/link/macho.zig | 155 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 155 insertions(+) diff --git a/test/link/macho.zig b/test/link/macho.zig index e5d04d9733..27ae09a22e 100644 --- a/test/link/macho.zig +++ b/test/link/macho.zig @@ -53,6 +53,8 @@ pub fn testAll(b: *Build, build_opts: BuildOptions) *Step { macho_step.dependOn(testHeaderpad(b, .{ .target = b.host })); macho_step.dependOn(testNeededFramework(b, .{ .target = b.host })); macho_step.dependOn(testObjc(b, .{ .target = b.host })); + macho_step.dependOn(testObjcStubs(b, .{ .target = b.host })); + macho_step.dependOn(testObjcStubs2(b, .{ .target = b.host })); macho_step.dependOn(testWeakFramework(b, .{ .target = b.host })); } } @@ -289,6 +291,7 @@ fn testEntryPointArchive(b: *Build, opts: Options) *Step { exe.root_module.addLibraryPath(lib.getEmittedBinDirectory()); const run = addRunArtifact(exe); + run.expectExitCode(0); test_step.dependOn(&run.step); } @@ -299,6 +302,7 @@ fn testEntryPointArchive(b: *Build, opts: Options) *Step { exe.link_gc_sections = true; const run = addRunArtifact(exe); + run.expectExitCode(0); test_step.dependOn(&run.step); } @@ -769,6 +773,7 @@ fn testNoDeadStrip(b: *Build, opts: Options) *Step { test_step.dependOn(&check.step); const run = addRunArtifact(exe); + run.expectExitCode(0); test_step.dependOn(&run.step); return test_step; @@ -859,6 +864,155 @@ fn testObjc(b: *Build, opts: Options) *Step { return test_step; } +fn testObjcStubs(b: *Build, opts: Options) *Step { + const test_step = addTestStep(b, "macho-objc-stubs", opts); + + const exe = addExecutable(b, opts, .{ + .name = "main", + .objc_source_bytes = + \\@import Foundation; + \\@interface Foo : NSObject + \\@property (nonatomic, assign) NSString* name; + \\@end + \\@implementation Foo + \\- (void)bar { + \\ printf("%s", [self.name UTF8String]); + \\} + \\@end + \\int main() { + \\ Foo *foo = [[Foo alloc] init]; + \\ foo.name = @"Foo"; + \\ [foo bar]; + \\ return 0; + \\} + , + .objc_source_flags = &.{ "-fmodules", "-fobjc-msgsend-selector-stubs" }, + }); + exe.root_module.linkFramework("Foundation", .{}); + + const run = addRunArtifact(exe); + run.expectStdOutEqual("Foo"); + test_step.dependOn(&run.step); + + const check = exe.checkObject(); + check.checkInHeaders(); + check.checkExact("sectname __objc_stubs"); + check.checkInHeaders(); + check.checkExact("sectname __objc_methname"); + check.checkInHeaders(); + check.checkExact("sectname __objc_selrefs"); + check.checkInSymtab(); + check.checkContains("(__TEXT,__objc_stubs) (was private external) _objc_msgSend$bar"); + check.checkInSymtab(); + check.checkContains("(__TEXT,__objc_stubs) (was private external) _objc_msgSend$name"); + check.checkInSymtab(); + check.checkContains("(__TEXT,__objc_stubs) (was private external) _objc_msgSend$setName"); + test_step.dependOn(&check.step); + + return test_step; +} + +fn testObjcStubs2(b: *Build, opts: Options) *Step { + const test_step = addTestStep(b, "macho-objc-stubs-2", opts); + + const all_h = all_h: { + const wf = WriteFile.create(b); + break :all_h wf.add("all.h", + \\#import + \\ + \\@interface Foo : NSObject + \\@property (nonatomic, assign) NSString* name; + \\- (void) foo; + \\@end + \\@interface Bar : NSObject + \\@property (nonatomic, assign) NSString* name; + \\- (void) bar; + \\- (void) foobar: (Foo*) foo; + \\@end + ); + }; + + const foo_o = addObject(b, opts, .{ + .name = "foo", + .objc_source_bytes = + \\#import + \\#import "all.h" + \\@implementation Foo + \\- (void)foo { + \\ printf("%s", [self.name UTF8String]); + \\} + \\@end + , + .objc_source_flags = &.{"-fobjc-msgsend-selector-stubs"}, + }); + foo_o.root_module.addIncludePath(all_h.dirname()); + + const bar_o = addObject(b, opts, .{ + .name = "bar", + .objc_source_bytes = + \\#import + \\#import "all.h" + \\@implementation Bar + \\- (void)bar { + \\ printf("%s", [self.name UTF8String]); + \\} + \\- (void)foobar: (Foo*) foo { + \\ printf("%s%s", [foo.name UTF8String], [self.name UTF8String]); + \\} + \\@end + , + .objc_source_flags = &.{"-fobjc-msgsend-selector-stubs"}, + }); + bar_o.root_module.addIncludePath(all_h.dirname()); + + const main_o = addObject(b, opts, .{ + .name = "main", + .objc_source_bytes = + \\#import + \\#import "all.h" + \\int main() { + \\ Foo *foo = [[Foo alloc] init]; + \\ foo.name = @"Foo"; + \\ Bar *bar = [[Bar alloc] init]; + \\ bar.name = @"Bar"; + \\ [foo foo]; + \\ [bar bar]; + \\ [bar foobar:foo]; + \\ return 0; + \\} + , + .objc_source_flags = &.{"-fobjc-msgsend-selector-stubs"}, + }); + main_o.root_module.addIncludePath(all_h.dirname()); + + const exe = addExecutable(b, opts, .{ .name = "main" }); + exe.addObject(main_o); + exe.addObject(foo_o); + exe.addObject(bar_o); + exe.root_module.linkFramework("Foundation", .{}); + + const run = addRunArtifact(exe); + run.expectStdOutEqual("FooBarFooBar"); + test_step.dependOn(&run.step); + + const check = exe.checkObject(); + check.checkInHeaders(); + check.checkExact("sectname __objc_stubs"); + check.checkInHeaders(); + check.checkExact("sectname __objc_methname"); + check.checkInHeaders(); + check.checkExact("sectname __objc_selrefs"); + check.checkInSymtab(); + check.checkContains("(__TEXT,__objc_stubs) (was private external) _objc_msgSend$foo"); + check.checkInSymtab(); + check.checkContains("(__TEXT,__objc_stubs) (was private external) _objc_msgSend$bar"); + check.checkInSymtab(); + check.checkContains("(__TEXT,__objc_stubs) (was private external) _objc_msgSend$foobar"); + test_step.dependOn(&check.step); + + return test_step; +} + fn testRelocatable(b: *Build, opts: Options) *Step { const test_step = addTestStep(b, "macho-relocatable", opts); @@ -1647,3 +1801,4 @@ const BuildOptions = link.BuildOptions; const Compile = Step.Compile; const Options = link.Options; const Step = Build.Step; +const WriteFile = Step.WriteFile; From 5c8acc551d4d99efebcdfc3b4a4a56dcbaad53b3 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Mon, 15 Jan 2024 11:28:05 +0100 Subject: [PATCH 066/133] test/link/macho: revert testing objc msgsend stubs - no way of enabling in clang --- test/link/macho.zig | 151 -------------------------------------------- 1 file changed, 151 deletions(-) diff --git a/test/link/macho.zig b/test/link/macho.zig index 27ae09a22e..ac58620e8c 100644 --- a/test/link/macho.zig +++ b/test/link/macho.zig @@ -53,8 +53,6 @@ pub fn testAll(b: *Build, build_opts: BuildOptions) *Step { macho_step.dependOn(testHeaderpad(b, .{ .target = b.host })); macho_step.dependOn(testNeededFramework(b, .{ .target = b.host })); macho_step.dependOn(testObjc(b, .{ .target = b.host })); - macho_step.dependOn(testObjcStubs(b, .{ .target = b.host })); - macho_step.dependOn(testObjcStubs2(b, .{ .target = b.host })); macho_step.dependOn(testWeakFramework(b, .{ .target = b.host })); } } @@ -864,155 +862,6 @@ fn testObjc(b: *Build, opts: Options) *Step { return test_step; } -fn testObjcStubs(b: *Build, opts: Options) *Step { - const test_step = addTestStep(b, "macho-objc-stubs", opts); - - const exe = addExecutable(b, opts, .{ - .name = "main", - .objc_source_bytes = - \\@import Foundation; - \\@interface Foo : NSObject - \\@property (nonatomic, assign) NSString* name; - \\@end - \\@implementation Foo - \\- (void)bar { - \\ printf("%s", [self.name UTF8String]); - \\} - \\@end - \\int main() { - \\ Foo *foo = [[Foo alloc] init]; - \\ foo.name = @"Foo"; - \\ [foo bar]; - \\ return 0; - \\} - , - .objc_source_flags = &.{ "-fmodules", "-fobjc-msgsend-selector-stubs" }, - }); - exe.root_module.linkFramework("Foundation", .{}); - - const run = addRunArtifact(exe); - run.expectStdOutEqual("Foo"); - test_step.dependOn(&run.step); - - const check = exe.checkObject(); - check.checkInHeaders(); - check.checkExact("sectname __objc_stubs"); - check.checkInHeaders(); - check.checkExact("sectname __objc_methname"); - check.checkInHeaders(); - check.checkExact("sectname __objc_selrefs"); - check.checkInSymtab(); - check.checkContains("(__TEXT,__objc_stubs) (was private external) _objc_msgSend$bar"); - check.checkInSymtab(); - check.checkContains("(__TEXT,__objc_stubs) (was private external) _objc_msgSend$name"); - check.checkInSymtab(); - check.checkContains("(__TEXT,__objc_stubs) (was private external) _objc_msgSend$setName"); - test_step.dependOn(&check.step); - - return test_step; -} - -fn testObjcStubs2(b: *Build, opts: Options) *Step { - const test_step = addTestStep(b, "macho-objc-stubs-2", opts); - - const all_h = all_h: { - const wf = WriteFile.create(b); - break :all_h wf.add("all.h", - \\#import - \\ - \\@interface Foo : NSObject - \\@property (nonatomic, assign) NSString* name; - \\- (void) foo; - \\@end - \\@interface Bar : NSObject - \\@property (nonatomic, assign) NSString* name; - \\- (void) bar; - \\- (void) foobar: (Foo*) foo; - \\@end - ); - }; - - const foo_o = addObject(b, opts, .{ - .name = "foo", - .objc_source_bytes = - \\#import - \\#import "all.h" - \\@implementation Foo - \\- (void)foo { - \\ printf("%s", [self.name UTF8String]); - \\} - \\@end - , - .objc_source_flags = &.{"-fobjc-msgsend-selector-stubs"}, - }); - foo_o.root_module.addIncludePath(all_h.dirname()); - - const bar_o = addObject(b, opts, .{ - .name = "bar", - .objc_source_bytes = - \\#import - \\#import "all.h" - \\@implementation Bar - \\- (void)bar { - \\ printf("%s", [self.name UTF8String]); - \\} - \\- (void)foobar: (Foo*) foo { - \\ printf("%s%s", [foo.name UTF8String], [self.name UTF8String]); - \\} - \\@end - , - .objc_source_flags = &.{"-fobjc-msgsend-selector-stubs"}, - }); - bar_o.root_module.addIncludePath(all_h.dirname()); - - const main_o = addObject(b, opts, .{ - .name = "main", - .objc_source_bytes = - \\#import - \\#import "all.h" - \\int main() { - \\ Foo *foo = [[Foo alloc] init]; - \\ foo.name = @"Foo"; - \\ Bar *bar = [[Bar alloc] init]; - \\ bar.name = @"Bar"; - \\ [foo foo]; - \\ [bar bar]; - \\ [bar foobar:foo]; - \\ return 0; - \\} - , - .objc_source_flags = &.{"-fobjc-msgsend-selector-stubs"}, - }); - main_o.root_module.addIncludePath(all_h.dirname()); - - const exe = addExecutable(b, opts, .{ .name = "main" }); - exe.addObject(main_o); - exe.addObject(foo_o); - exe.addObject(bar_o); - exe.root_module.linkFramework("Foundation", .{}); - - const run = addRunArtifact(exe); - run.expectStdOutEqual("FooBarFooBar"); - test_step.dependOn(&run.step); - - const check = exe.checkObject(); - check.checkInHeaders(); - check.checkExact("sectname __objc_stubs"); - check.checkInHeaders(); - check.checkExact("sectname __objc_methname"); - check.checkInHeaders(); - check.checkExact("sectname __objc_selrefs"); - check.checkInSymtab(); - check.checkContains("(__TEXT,__objc_stubs) (was private external) _objc_msgSend$foo"); - check.checkInSymtab(); - check.checkContains("(__TEXT,__objc_stubs) (was private external) _objc_msgSend$bar"); - check.checkInSymtab(); - check.checkContains("(__TEXT,__objc_stubs) (was private external) _objc_msgSend$foobar"); - test_step.dependOn(&check.step); - - return test_step; -} - fn testRelocatable(b: *Build, opts: Options) *Step { const test_step = addTestStep(b, "macho-relocatable", opts); From 7bb323c0ebfbc2ebc6c327328c3cc249b495e3dc Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Mon, 15 Jan 2024 11:30:48 +0100 Subject: [PATCH 067/133] test/link/macho: test pagezero size --- test/link/macho.zig | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/test/link/macho.zig b/test/link/macho.zig index ac58620e8c..a502ba97e5 100644 --- a/test/link/macho.zig +++ b/test/link/macho.zig @@ -27,6 +27,7 @@ pub fn testAll(b: *Build, build_opts: BuildOptions) *Step { macho_step.dependOn(testMhExecuteHeader(b, .{ .target = default_target })); macho_step.dependOn(testNoDeadStrip(b, .{ .target = default_target })); macho_step.dependOn(testNoExportsDylib(b, .{ .target = default_target })); + macho_step.dependOn(testPagezeroSize(b, .{ .target = default_target })); macho_step.dependOn(testRelocatable(b, .{ .target = default_target })); macho_step.dependOn(testRelocatableZig(b, .{ .target = default_target })); macho_step.dependOn(testSectionBoundarySymbols(b, .{ .target = default_target })); @@ -862,6 +863,40 @@ fn testObjc(b: *Build, opts: Options) *Step { return test_step; } +fn testPagezeroSize(b: *Build, opts: Options) *Step { + const test_step = addTestStep(b, "macho-pagezero-size", opts); + + { + const exe = addExecutable(b, opts, .{ .name = "main", .c_source_bytes = "int main () { return 0; }" }); + exe.pagezero_size = 0x4000; + + const check = exe.checkObject(); + check.checkInHeaders(); + check.checkExact("LC 0"); + check.checkExact("segname __PAGEZERO"); + check.checkExact("vmaddr 0"); + check.checkExact("vmsize 4000"); + check.checkInHeaders(); + check.checkExact("segname __TEXT"); + check.checkExact("vmaddr 4000"); + test_step.dependOn(&check.step); + } + + { + const exe = addExecutable(b, opts, .{ .name = "main", .c_source_bytes = "int main () { return 0; }" }); + exe.pagezero_size = 0; + + const check = exe.checkObject(); + check.checkInHeaders(); + check.checkExact("LC 0"); + check.checkExact("segname __TEXT"); + check.checkExact("vmaddr 0"); + test_step.dependOn(&check.step); + } + + return test_step; +} + fn testRelocatable(b: *Build, opts: Options) *Step { const test_step = addTestStep(b, "macho-relocatable", opts); From 7dc6900018f78d7514926853d35087d5b205109f Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Mon, 15 Jan 2024 11:32:00 +0100 Subject: [PATCH 068/133] test/link/macho: remove converted standalone tests --- test/link.zig | 8 ----- test/link/macho/objc/Foo.h | 7 ---- test/link/macho/objc/Foo.m | 11 ------ test/link/macho/objc/build.zig | 34 ------------------- test/link/macho/objc/test.m | 12 ------- test/link/macho/pagezero/build.zig | 54 ------------------------------ test/link/macho/pagezero/main.c | 3 -- 7 files changed, 129 deletions(-) delete mode 100644 test/link/macho/objc/Foo.h delete mode 100644 test/link/macho/objc/Foo.m delete mode 100644 test/link/macho/objc/build.zig delete mode 100644 test/link/macho/objc/test.m delete mode 100644 test/link/macho/pagezero/build.zig delete mode 100644 test/link/macho/pagezero/main.c diff --git a/test/link.zig b/test/link.zig index 905d6cc35d..4fb90000ab 100644 --- a/test/link.zig +++ b/test/link.zig @@ -111,18 +111,10 @@ pub const cases = [_]Case{ .build_root = "test/link/macho/linksection", .import = @import("link/macho/linksection/build.zig"), }, - .{ - .build_root = "test/link/macho/objc", - .import = @import("link/macho/objc/build.zig"), - }, .{ .build_root = "test/link/macho/objcpp", .import = @import("link/macho/objcpp/build.zig"), }, - .{ - .build_root = "test/link/macho/pagezero", - .import = @import("link/macho/pagezero/build.zig"), - }, .{ .build_root = "test/link/macho/reexports", .import = @import("link/macho/reexports/build.zig"), diff --git a/test/link/macho/objc/Foo.h b/test/link/macho/objc/Foo.h deleted file mode 100644 index 05cb7df39b..0000000000 --- a/test/link/macho/objc/Foo.h +++ /dev/null @@ -1,7 +0,0 @@ -#import - -@interface Foo : NSObject - -- (NSString *)name; - -@end diff --git a/test/link/macho/objc/Foo.m b/test/link/macho/objc/Foo.m deleted file mode 100644 index 6fc9b1edf0..0000000000 --- a/test/link/macho/objc/Foo.m +++ /dev/null @@ -1,11 +0,0 @@ -#import "Foo.h" - -@implementation Foo - -- (NSString *)name -{ - NSString *str = [[NSString alloc] initWithFormat:@"Zig"]; - return str; -} - -@end diff --git a/test/link/macho/objc/build.zig b/test/link/macho/objc/build.zig deleted file mode 100644 index 26bbdc2673..0000000000 --- a/test/link/macho/objc/build.zig +++ /dev/null @@ -1,34 +0,0 @@ -const std = @import("std"); - -pub const requires_symlinks = true; -pub const requires_macos_sdk = true; - -pub fn build(b: *std.Build) void { - const test_step = b.step("test", "Test it"); - b.default_step = test_step; - - add(b, test_step, .Debug); - add(b, test_step, .ReleaseFast); - add(b, test_step, .ReleaseSmall); - add(b, test_step, .ReleaseSafe); -} - -fn add(b: *std.Build, test_step: *std.Build.Step, optimize: std.builtin.OptimizeMode) void { - const exe = b.addExecutable(.{ - .name = "test", - .optimize = optimize, - .target = b.host, - }); - exe.addIncludePath(.{ .path = "." }); - exe.addCSourceFile(.{ .file = .{ .path = "Foo.m" }, .flags = &[0][]const u8{} }); - exe.addCSourceFile(.{ .file = .{ .path = "test.m" }, .flags = &[0][]const u8{} }); - exe.linkLibC(); - // TODO when we figure out how to ship framework stubs for cross-compilation, - // populate paths to the sysroot here. - exe.linkFramework("Foundation"); - - const run_cmd = b.addRunArtifact(exe); - run_cmd.skip_foreign_checks = true; - run_cmd.expectStdOutEqual(""); - test_step.dependOn(&run_cmd.step); -} diff --git a/test/link/macho/objc/test.m b/test/link/macho/objc/test.m deleted file mode 100644 index 3c81316788..0000000000 --- a/test/link/macho/objc/test.m +++ /dev/null @@ -1,12 +0,0 @@ -#import "Foo.h" -#import - -int main(int argc, char *argv[]) -{ - @autoreleasepool { - Foo *foo = [[Foo alloc] init]; - NSString *result = [foo name]; - assert([result isEqualToString:@"Zig"]); - return 0; - } -} diff --git a/test/link/macho/pagezero/build.zig b/test/link/macho/pagezero/build.zig deleted file mode 100644 index cee2aa9fd5..0000000000 --- a/test/link/macho/pagezero/build.zig +++ /dev/null @@ -1,54 +0,0 @@ -const std = @import("std"); - -pub const requires_symlinks = true; - -pub fn build(b: *std.Build) void { - const test_step = b.step("test", "Test it"); - b.default_step = test_step; - - const optimize: std.builtin.OptimizeMode = .Debug; - const target = b.resolveTargetQuery(.{ .os_tag = .macos }); - - { - const exe = b.addExecutable(.{ - .name = "pagezero", - .optimize = optimize, - .target = target, - }); - exe.addCSourceFile(.{ .file = .{ .path = "main.c" }, .flags = &.{} }); - exe.linkLibC(); - exe.pagezero_size = 0x4000; - - const check = exe.checkObject(); - check.checkInHeaders(); - check.checkExact("LC 0"); - check.checkExact("segname __PAGEZERO"); - check.checkExact("vmaddr 0"); - check.checkExact("vmsize 4000"); - - check.checkInHeaders(); - check.checkExact("segname __TEXT"); - check.checkExact("vmaddr 4000"); - - test_step.dependOn(&check.step); - } - - { - const exe = b.addExecutable(.{ - .name = "no_pagezero", - .optimize = optimize, - .target = target, - }); - exe.addCSourceFile(.{ .file = .{ .path = "main.c" }, .flags = &.{} }); - exe.linkLibC(); - exe.pagezero_size = 0; - - const check = exe.checkObject(); - check.checkInHeaders(); - check.checkExact("LC 0"); - check.checkExact("segname __TEXT"); - check.checkExact("vmaddr 0"); - - test_step.dependOn(&check.step); - } -} diff --git a/test/link/macho/pagezero/main.c b/test/link/macho/pagezero/main.c deleted file mode 100644 index ca68d24cc7..0000000000 --- a/test/link/macho/pagezero/main.c +++ /dev/null @@ -1,3 +0,0 @@ -int main(int argc, char* argv[]) { - return 0; -} From 933231868ab37c67af53ab67aaf1355b5f50413a Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Mon, 15 Jan 2024 11:34:47 +0100 Subject: [PATCH 069/133] test/link/macho: test re-exports in zig --- test/link/macho.zig | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/test/link/macho.zig b/test/link/macho.zig index a502ba97e5..1918a4cb15 100644 --- a/test/link/macho.zig +++ b/test/link/macho.zig @@ -28,6 +28,7 @@ pub fn testAll(b: *Build, build_opts: BuildOptions) *Step { macho_step.dependOn(testNoDeadStrip(b, .{ .target = default_target })); macho_step.dependOn(testNoExportsDylib(b, .{ .target = default_target })); macho_step.dependOn(testPagezeroSize(b, .{ .target = default_target })); + macho_step.dependOn(testReexportsZig(b, .{ .target = default_target })); macho_step.dependOn(testRelocatable(b, .{ .target = default_target })); macho_step.dependOn(testRelocatableZig(b, .{ .target = default_target })); macho_step.dependOn(testSectionBoundarySymbols(b, .{ .target = default_target })); @@ -897,6 +898,35 @@ fn testPagezeroSize(b: *Build, opts: Options) *Step { return test_step; } +fn testReexportsZig(b: *Build, opts: Options) *Step { + const test_step = addTestStep(b, "macho-reexports-zig", opts); + + const lib = addStaticLibrary(b, opts, .{ .name = "a", .zig_source_bytes = + \\const x: i32 = 42; + \\export fn foo() i32 { + \\ return x; + \\} + \\comptime { + \\ @export(foo, .{ .name = "bar", .linkage = .Strong }); + \\} + }); + + const exe = addExecutable(b, opts, .{ .name = "main", .c_source_bytes = + \\extern int foo(); + \\extern int bar(); + \\int main() { + \\ return bar() - foo(); + \\} + }); + exe.linkLibrary(lib); + + const run = addRunArtifact(exe); + run.expectExitCode(0); + test_step.dependOn(&run.step); + + return test_step; +} + fn testRelocatable(b: *Build, opts: Options) *Step { const test_step = addTestStep(b, "macho-relocatable", opts); From b70fedee7e61d0243211ef36635681fcb998ca54 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Mon, 15 Jan 2024 11:43:36 +0100 Subject: [PATCH 070/133] test/link/macho: upgrade search strategy test --- test/link.zig | 4 -- test/link/macho.zig | 71 ++++++++++++++++++- test/link/macho/search_strategy/a.c | 7 -- test/link/macho/search_strategy/build.zig | 84 ----------------------- test/link/macho/search_strategy/main.c | 9 --- 5 files changed, 70 insertions(+), 105 deletions(-) delete mode 100644 test/link/macho/search_strategy/a.c delete mode 100644 test/link/macho/search_strategy/build.zig delete mode 100644 test/link/macho/search_strategy/main.c diff --git a/test/link.zig b/test/link.zig index 4fb90000ab..9559d66a81 100644 --- a/test/link.zig +++ b/test/link.zig @@ -119,10 +119,6 @@ pub const cases = [_]Case{ .build_root = "test/link/macho/reexports", .import = @import("link/macho/reexports/build.zig"), }, - .{ - .build_root = "test/link/macho/search_strategy", - .import = @import("link/macho/search_strategy/build.zig"), - }, .{ .build_root = "test/link/macho/stack_size", .import = @import("link/macho/stack_size/build.zig"), diff --git a/test/link/macho.zig b/test/link/macho.zig index 1918a4cb15..5a5b447bbc 100644 --- a/test/link/macho.zig +++ b/test/link/macho.zig @@ -45,9 +45,10 @@ pub fn testAll(b: *Build, build_opts: BuildOptions) *Step { macho_step.dependOn(testEntryPointDylib(b, .{ .target = default_target })); macho_step.dependOn(testDylib(b, .{ .target = default_target })); macho_step.dependOn(testNeededLibrary(b, .{ .target = default_target })); - macho_step.dependOn(testWeakLibrary(b, .{ .target = default_target })); + macho_step.dependOn(testSearchStrategy(b, .{ .target = b.host })); macho_step.dependOn(testTls(b, .{ .target = default_target })); macho_step.dependOn(testTwoLevelNamespace(b, .{ .target = default_target })); + macho_step.dependOn(testWeakLibrary(b, .{ .target = default_target })); // Tests requiring presence of macOS SDK in system path if (build_opts.has_macos_sdk) { @@ -1046,6 +1047,74 @@ fn testRelocatableZig(b: *Build, opts: Options) *Step { return test_step; } +fn testSearchStrategy(b: *Build, opts: Options) *Step { + const test_step = addTestStep(b, "macho-search-strategy", opts); + + const obj = addObject(b, opts, .{ .name = "a", .c_source_bytes = + \\#include + \\char world[] = "world"; + \\char* hello() { + \\ return "Hello"; + \\} + }); + + const liba = addStaticLibrary(b, opts, .{ .name = "a" }); + liba.addObject(obj); + + const dylib = addSharedLibrary(b, opts, .{ .name = "a" }); + dylib.addObject(obj); + + const main_o = addObject(b, opts, .{ .name = "main", .c_source_bytes = + \\#include + \\char* hello(); + \\extern char world[]; + \\int main() { + \\ printf("%s %s", hello(), world); + \\ return 0; + \\} + }); + + { + const exe = addExecutable(b, opts, .{ .name = "main" }); + exe.addObject(main_o); + exe.root_module.linkSystemLibrary("a", .{ .use_pkg_config = .no, .search_strategy = .mode_first }); + exe.root_module.addLibraryPath(liba.getEmittedBinDirectory()); + exe.root_module.addLibraryPath(dylib.getEmittedBinDirectory()); + exe.root_module.addRPath(dylib.getEmittedBinDirectory()); + + const run = addRunArtifact(exe); + run.expectStdOutEqual("Hello world"); + test_step.dependOn(&run.step); + + const check = exe.checkObject(); + check.checkInHeaders(); + check.checkExact("cmd LOAD_DYLIB"); + check.checkContains("liba.dylib"); + test_step.dependOn(&check.step); + } + + { + const exe = addExecutable(b, opts, .{ .name = "main" }); + exe.addObject(main_o); + exe.root_module.linkSystemLibrary("a", .{ .use_pkg_config = .no, .search_strategy = .paths_first }); + exe.root_module.addLibraryPath(liba.getEmittedBinDirectory()); + exe.root_module.addLibraryPath(dylib.getEmittedBinDirectory()); + exe.root_module.addRPath(dylib.getEmittedBinDirectory()); + + const run = addRunArtifact(exe); + run.expectStdOutEqual("Hello world"); + test_step.dependOn(&run.step); + + const check = exe.checkObject(); + check.checkInHeaders(); + check.checkExact("cmd LOAD_DYLIB"); + check.checkNotPresent("liba.dylib"); + test_step.dependOn(&check.step); + } + + return test_step; +} + fn testSectionBoundarySymbols(b: *Build, opts: Options) *Step { const test_step = addTestStep(b, "macho-section-boundary-symbols", opts); diff --git a/test/link/macho/search_strategy/a.c b/test/link/macho/search_strategy/a.c deleted file mode 100644 index 199b31e1a0..0000000000 --- a/test/link/macho/search_strategy/a.c +++ /dev/null @@ -1,7 +0,0 @@ -#include - -char world[] = "world"; - -char* hello() { - return "Hello"; -} diff --git a/test/link/macho/search_strategy/build.zig b/test/link/macho/search_strategy/build.zig deleted file mode 100644 index 181d2df91a..0000000000 --- a/test/link/macho/search_strategy/build.zig +++ /dev/null @@ -1,84 +0,0 @@ -const std = @import("std"); - -pub const requires_symlinks = true; - -pub fn build(b: *std.Build) void { - const test_step = b.step("test", "Test it"); - b.default_step = test_step; - - add(b, test_step, .Debug); - add(b, test_step, .ReleaseFast); - add(b, test_step, .ReleaseSmall); - add(b, test_step, .ReleaseSafe); -} - -fn add(b: *std.Build, test_step: *std.Build.Step, optimize: std.builtin.OptimizeMode) void { - const target = b.resolveTargetQuery(.{ .os_tag = .macos }); - - { - // -search_dylibs_first - const exe = createScenario(b, optimize, target, "search_dylibs_first", .mode_first); - - const check = exe.checkObject(); - check.checkInHeaders(); - check.checkExact("cmd LOAD_DYLIB"); - check.checkExact("name @rpath/libsearch_dylibs_first.dylib"); - test_step.dependOn(&check.step); - - const run = b.addRunArtifact(exe); - run.skip_foreign_checks = true; - run.expectStdOutEqual("Hello world"); - test_step.dependOn(&run.step); - } - - { - // -search_paths_first - const exe = createScenario(b, optimize, target, "search_paths_first", .paths_first); - - const run = b.addRunArtifact(exe); - run.skip_foreign_checks = true; - run.expectStdOutEqual("Hello world"); - test_step.dependOn(&run.step); - } -} - -fn createScenario( - b: *std.Build, - optimize: std.builtin.OptimizeMode, - target: std.Build.ResolvedTarget, - name: []const u8, - search_strategy: std.Build.Module.SystemLib.SearchStrategy, -) *std.Build.Step.Compile { - const static = b.addStaticLibrary(.{ - .name = name, - .optimize = optimize, - .target = target, - }); - static.addCSourceFile(.{ .file = .{ .path = "a.c" }, .flags = &.{} }); - static.linkLibC(); - - const dylib = b.addSharedLibrary(.{ - .name = name, - .version = .{ .major = 1, .minor = 0, .patch = 0 }, - .optimize = optimize, - .target = target, - }); - dylib.addCSourceFile(.{ .file = .{ .path = "a.c" }, .flags = &.{} }); - dylib.linkLibC(); - - const exe = b.addExecutable(.{ - .name = name, - .optimize = optimize, - .target = target, - }); - exe.addCSourceFile(.{ .file = .{ .path = "main.c" }, .flags = &.{} }); - exe.linkSystemLibrary2(name, .{ - .use_pkg_config = .no, - .search_strategy = search_strategy, - }); - exe.linkLibC(); - exe.addLibraryPath(static.getEmittedBinDirectory()); - exe.addLibraryPath(dylib.getEmittedBinDirectory()); - exe.addRPath(dylib.getEmittedBinDirectory()); - return exe; -} diff --git a/test/link/macho/search_strategy/main.c b/test/link/macho/search_strategy/main.c deleted file mode 100644 index 941903f219..0000000000 --- a/test/link/macho/search_strategy/main.c +++ /dev/null @@ -1,9 +0,0 @@ -#include - -char* hello(); -extern char world[]; - -int main(int argc, char* argv[]) { - printf("%s %s", hello(), world); - return 0; -} From d500caaa62aa666916f44dc07f2e618a2260f640 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Mon, 15 Jan 2024 11:46:32 +0100 Subject: [PATCH 071/133] test/link/macho: test stacksize option --- test/link.zig | 4 --- test/link/macho.zig | 21 +++++++++++++++- test/link/macho/stack_size/build.zig | 37 ---------------------------- test/link/macho/stack_size/main.c | 3 --- 4 files changed, 20 insertions(+), 45 deletions(-) delete mode 100644 test/link/macho/stack_size/build.zig delete mode 100644 test/link/macho/stack_size/main.c diff --git a/test/link.zig b/test/link.zig index 9559d66a81..0eb39c2a5c 100644 --- a/test/link.zig +++ b/test/link.zig @@ -119,10 +119,6 @@ pub const cases = [_]Case{ .build_root = "test/link/macho/reexports", .import = @import("link/macho/reexports/build.zig"), }, - .{ - .build_root = "test/link/macho/stack_size", - .import = @import("link/macho/stack_size/build.zig"), - }, .{ .build_root = "test/link/macho/tbdv3", .import = @import("link/macho/tbdv3/build.zig"), diff --git a/test/link/macho.zig b/test/link/macho.zig index 5a5b447bbc..0cf2de1b85 100644 --- a/test/link/macho.zig +++ b/test/link/macho.zig @@ -33,6 +33,7 @@ pub fn testAll(b: *Build, build_opts: BuildOptions) *Step { macho_step.dependOn(testRelocatableZig(b, .{ .target = default_target })); macho_step.dependOn(testSectionBoundarySymbols(b, .{ .target = default_target })); macho_step.dependOn(testSegmentBoundarySymbols(b, .{ .target = default_target })); + macho_step.dependOn(testStackSize(b, .{ .target = default_target })); macho_step.dependOn(testTentative(b, .{ .target = default_target })); macho_step.dependOn(testThunks(b, .{ .target = aarch64_target })); macho_step.dependOn(testTlsLargeTbss(b, .{ .target = default_target })); @@ -45,7 +46,7 @@ pub fn testAll(b: *Build, build_opts: BuildOptions) *Step { macho_step.dependOn(testEntryPointDylib(b, .{ .target = default_target })); macho_step.dependOn(testDylib(b, .{ .target = default_target })); macho_step.dependOn(testNeededLibrary(b, .{ .target = default_target })); - macho_step.dependOn(testSearchStrategy(b, .{ .target = b.host })); + macho_step.dependOn(testSearchStrategy(b, .{ .target = default_target })); macho_step.dependOn(testTls(b, .{ .target = default_target })); macho_step.dependOn(testTwoLevelNamespace(b, .{ .target = default_target })); macho_step.dependOn(testWeakLibrary(b, .{ .target = default_target })); @@ -1263,6 +1264,24 @@ fn testSegmentBoundarySymbols(b: *Build, opts: Options) *Step { return test_step; } +fn testStackSize(b: *Build, opts: Options) *Step { + const test_step = addTestStep(b, "macho-stack-size", opts); + + const exe = addExecutable(b, opts, .{ .name = "main", .c_source_bytes = "int main() { return 0; }" }); + exe.stack_size = 0x100000000; + + const run = addRunArtifact(exe); + test_step.dependOn(&run.step); + + const check = exe.checkObject(); + check.checkInHeaders(); + check.checkExact("cmd MAIN"); + check.checkExact("stacksize 100000000"); + test_step.dependOn(&check.step); + + return test_step; +} + fn testTentative(b: *Build, opts: Options) *Step { const test_step = addTestStep(b, "macho-tentative", opts); diff --git a/test/link/macho/stack_size/build.zig b/test/link/macho/stack_size/build.zig deleted file mode 100644 index 28a2602ea5..0000000000 --- a/test/link/macho/stack_size/build.zig +++ /dev/null @@ -1,37 +0,0 @@ -const std = @import("std"); - -pub const requires_symlinks = true; - -pub fn build(b: *std.Build) void { - const test_step = b.step("test", "Test it"); - b.default_step = test_step; - - add(b, test_step, .Debug); - add(b, test_step, .ReleaseFast); - add(b, test_step, .ReleaseSmall); - add(b, test_step, .ReleaseSafe); -} - -fn add(b: *std.Build, test_step: *std.Build.Step, optimize: std.builtin.OptimizeMode) void { - const target = b.resolveTargetQuery(.{ .os_tag = .macos }); - - const exe = b.addExecutable(.{ - .name = "main", - .optimize = optimize, - .target = target, - }); - exe.addCSourceFile(.{ .file = .{ .path = "main.c" }, .flags = &.{} }); - exe.linkLibC(); - exe.stack_size = 0x100000000; - - const check_exe = exe.checkObject(); - check_exe.checkInHeaders(); - check_exe.checkExact("cmd MAIN"); - check_exe.checkExact("stacksize 100000000"); - test_step.dependOn(&check_exe.step); - - const run = b.addRunArtifact(exe); - run.skip_foreign_checks = true; - run.expectStdOutEqual(""); - test_step.dependOn(&run.step); -} diff --git a/test/link/macho/stack_size/main.c b/test/link/macho/stack_size/main.c deleted file mode 100644 index ca68d24cc7..0000000000 --- a/test/link/macho/stack_size/main.c +++ /dev/null @@ -1,3 +0,0 @@ -int main(int argc, char* argv[]) { - return 0; -} From a3f68c6fa23f893597b7708e0c00e81f57cb35c1 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Mon, 15 Jan 2024 12:00:51 +0100 Subject: [PATCH 072/133] test/link/macho: upgrade unwind info tests --- test/link.zig | 8 - test/link/macho.zig | 273 ++++++++++++++++++ test/link/macho/reexports/a.zig | 7 - test/link/macho/reexports/build.zig | 38 --- test/link/macho/reexports/main.c | 5 - test/link/macho/unwind_info/all.h | 41 --- test/link/macho/unwind_info/build.zig | 88 ------ test/link/macho/unwind_info/main.cpp | 24 -- test/link/macho/unwind_info/simple_string.cpp | 30 -- .../macho/unwind_info/simple_string_owner.cpp | 12 - 10 files changed, 273 insertions(+), 253 deletions(-) delete mode 100644 test/link/macho/reexports/a.zig delete mode 100644 test/link/macho/reexports/build.zig delete mode 100644 test/link/macho/reexports/main.c delete mode 100644 test/link/macho/unwind_info/all.h delete mode 100644 test/link/macho/unwind_info/build.zig delete mode 100644 test/link/macho/unwind_info/main.cpp delete mode 100644 test/link/macho/unwind_info/simple_string.cpp delete mode 100644 test/link/macho/unwind_info/simple_string_owner.cpp diff --git a/test/link.zig b/test/link.zig index 0eb39c2a5c..3f85cd1a20 100644 --- a/test/link.zig +++ b/test/link.zig @@ -115,16 +115,8 @@ pub const cases = [_]Case{ .build_root = "test/link/macho/objcpp", .import = @import("link/macho/objcpp/build.zig"), }, - .{ - .build_root = "test/link/macho/reexports", - .import = @import("link/macho/reexports/build.zig"), - }, .{ .build_root = "test/link/macho/tbdv3", .import = @import("link/macho/tbdv3/build.zig"), }, - .{ - .build_root = "test/link/macho/unwind_info", - .import = @import("link/macho/unwind_info/build.zig"), - }, }; diff --git a/test/link/macho.zig b/test/link/macho.zig index 0cf2de1b85..6692b1888b 100644 --- a/test/link/macho.zig +++ b/test/link/macho.zig @@ -38,6 +38,9 @@ pub fn testAll(b: *Build, build_opts: BuildOptions) *Step { macho_step.dependOn(testThunks(b, .{ .target = aarch64_target })); macho_step.dependOn(testTlsLargeTbss(b, .{ .target = default_target })); macho_step.dependOn(testUndefinedFlag(b, .{ .target = default_target })); + macho_step.dependOn(testUnwindInfo(b, .{ .target = default_target })); + macho_step.dependOn(testUnwindInfoNoSubsectionsX64(b, .{ .target = x86_64_target })); + macho_step.dependOn(testUnwindInfoNoSubsectionsArm64(b, .{ .target = aarch64_target })); macho_step.dependOn(testWeakBind(b, .{ .target = x86_64_target })); // Tests requiring symlinks when tested on Windows @@ -1589,6 +1592,276 @@ fn testUndefinedFlag(b: *Build, opts: Options) *Step { return test_step; } +fn testUnwindInfo(b: *Build, opts: Options) *Step { + const test_step = addTestStep(b, "macho-unwind-info", opts); + + const all_h = all_h: { + const wf = WriteFile.create(b); + break :all_h wf.add("all.h", + \\#ifndef ALL + \\#define ALL + \\ + \\#include + \\#include + \\#include + \\ + \\struct SimpleString { + \\ SimpleString(size_t max_size); + \\ ~SimpleString(); + \\ + \\ void print(const char* tag) const; + \\ bool append_line(const char* x); + \\ + \\private: + \\ size_t max_size; + \\ char* buffer; + \\ size_t length; + \\}; + \\ + \\struct SimpleStringOwner { + \\ SimpleStringOwner(const char* x); + \\ ~SimpleStringOwner(); + \\ + \\private: + \\ SimpleString string; + \\}; + \\ + \\class Error: public std::exception { + \\public: + \\ explicit Error(const char* msg) : msg{ msg } {} + \\ virtual ~Error() noexcept {} + \\ virtual const char* what() const noexcept { + \\ return msg.c_str(); + \\ } + \\ + \\protected: + \\ std::string msg; + \\}; + \\ + \\#endif + ); + }; + + const main_o = addObject(b, opts, .{ .name = "main", .cpp_source_bytes = + \\#include "all.h" + \\#include + \\ + \\void fn_c() { + \\ SimpleStringOwner c{ "cccccccccc" }; + \\} + \\ + \\void fn_b() { + \\ SimpleStringOwner b{ "b" }; + \\ fn_c(); + \\} + \\ + \\int main() { + \\ try { + \\ SimpleStringOwner a{ "a" }; + \\ fn_b(); + \\ SimpleStringOwner d{ "d" }; + \\ } catch (const Error& e) { + \\ printf("Error: %s\n", e.what()); + \\ } catch(const std::exception& e) { + \\ printf("Exception: %s\n", e.what()); + \\ } + \\ return 0; + \\} + }); + main_o.root_module.addIncludePath(all_h.dirname()); + main_o.linkLibCpp(); + + const simple_string_o = addObject(b, opts, .{ .name = "simple_string", .cpp_source_bytes = + \\#include "all.h" + \\#include + \\#include + \\ + \\SimpleString::SimpleString(size_t max_size) + \\: max_size{ max_size }, length{} { + \\ if (max_size == 0) { + \\ throw Error{ "Max size must be at least 1." }; + \\ } + \\ buffer = new char[max_size]; + \\ buffer[0] = 0; + \\} + \\ + \\SimpleString::~SimpleString() { + \\ delete[] buffer; + \\} + \\ + \\void SimpleString::print(const char* tag) const { + \\ printf("%s: %s", tag, buffer); + \\} + \\ + \\bool SimpleString::append_line(const char* x) { + \\ const auto x_len = strlen(x); + \\ if (x_len + length + 2 > max_size) return false; + \\ std::strncpy(buffer + length, x, max_size - length); + \\ length += x_len; + \\ buffer[length++] = '\n'; + \\ buffer[length] = 0; + \\ return true; + \\} + }); + simple_string_o.root_module.addIncludePath(all_h.dirname()); + simple_string_o.linkLibCpp(); + + const simple_string_owner_o = addObject(b, opts, .{ .name = "simple_string_owner", .cpp_source_bytes = + \\#include "all.h" + \\ + \\SimpleStringOwner::SimpleStringOwner(const char* x) : string{ 10 } { + \\ if (!string.append_line(x)) { + \\ throw Error{ "Not enough memory!" }; + \\ } + \\ string.print("Constructed"); + \\} + \\ + \\SimpleStringOwner::~SimpleStringOwner() { + \\ string.print("About to destroy"); + \\} + }); + simple_string_owner_o.root_module.addIncludePath(all_h.dirname()); + simple_string_owner_o.linkLibCpp(); + + const exp_stdout = + \\Constructed: a + \\Constructed: b + \\About to destroy: b + \\About to destroy: a + \\Error: Not enough memory! + \\ + ; + + const exe = addExecutable(b, opts, .{ .name = "main" }); + exe.addObject(main_o); + exe.addObject(simple_string_o); + exe.addObject(simple_string_owner_o); + exe.linkLibCpp(); + + const run = addRunArtifact(exe); + run.expectStdOutEqual(exp_stdout); + test_step.dependOn(&run.step); + + const check = exe.checkObject(); + check.checkInSymtab(); + check.checkContains("(was private external) ___gxx_personality_v0"); + test_step.dependOn(&check.step); + + return test_step; +} + +fn testUnwindInfoNoSubsectionsArm64(b: *Build, opts: Options) *Step { + const test_step = addTestStep(b, "macho-unwind-info-no-subsections-arm64", opts); + + const a_o = addObject(b, opts, .{ .name = "a", .asm_source_bytes = + \\.globl _foo + \\.align 4 + \\_foo: + \\ .cfi_startproc + \\ stp x29, x30, [sp, #-32]! + \\ .cfi_def_cfa_offset 32 + \\ .cfi_offset w30, -24 + \\ .cfi_offset w29, -32 + \\ mov x29, sp + \\ .cfi_def_cfa w29, 32 + \\ bl _bar + \\ ldp x29, x30, [sp], #32 + \\ .cfi_restore w29 + \\ .cfi_restore w30 + \\ .cfi_def_cfa_offset 0 + \\ ret + \\ .cfi_endproc + \\ + \\.globl _bar + \\.align 4 + \\_bar: + \\ .cfi_startproc + \\ sub sp, sp, #32 + \\ .cfi_def_cfa_offset -32 + \\ stp x29, x30, [sp, #16] + \\ .cfi_offset w30, -24 + \\ .cfi_offset w29, -32 + \\ mov x29, sp + \\ .cfi_def_cfa w29, 32 + \\ mov w0, #4 + \\ ldp x29, x30, [sp, #16] + \\ .cfi_restore w29 + \\ .cfi_restore w30 + \\ add sp, sp, #32 + \\ .cfi_def_cfa_offset 0 + \\ ret + \\ .cfi_endproc + }); + + const exe = addExecutable(b, opts, .{ .name = "main", .c_source_bytes = + \\#include + \\int foo(); + \\int main() { + \\ printf("%d\n", foo()); + \\ return 0; + \\} + }); + exe.addObject(a_o); + + const run = addRunArtifact(exe); + run.expectStdOutEqual("4\n"); + test_step.dependOn(&run.step); + + return test_step; +} + +fn testUnwindInfoNoSubsectionsX64(b: *Build, opts: Options) *Step { + const test_step = addTestStep(b, "macho-unwind-info-no-subsections-x64", opts); + + const a_o = addObject(b, opts, .{ .name = "a", .asm_source_bytes = + \\.globl _foo + \\_foo: + \\ .cfi_startproc + \\ push %rbp + \\ .cfi_def_cfa_offset 8 + \\ .cfi_offset %rbp, -8 + \\ mov %rsp, %rbp + \\ .cfi_def_cfa_register %rbp + \\ call _bar + \\ pop %rbp + \\ .cfi_restore %rbp + \\ .cfi_def_cfa_offset 0 + \\ ret + \\ .cfi_endproc + \\ + \\.globl _bar + \\_bar: + \\ .cfi_startproc + \\ push %rbp + \\ .cfi_def_cfa_offset 8 + \\ .cfi_offset %rbp, -8 + \\ mov %rsp, %rbp + \\ .cfi_def_cfa_register %rbp + \\ mov $4, %rax + \\ pop %rbp + \\ .cfi_restore %rbp + \\ .cfi_def_cfa_offset 0 + \\ ret + \\ .cfi_endproc + }); + + const exe = addExecutable(b, opts, .{ .name = "main", .c_source_bytes = + \\#include + \\int foo(); + \\int main() { + \\ printf("%d\n", foo()); + \\ return 0; + \\} + }); + exe.addObject(a_o); + + const run = addRunArtifact(exe); + run.expectStdOutEqual("4\n"); + test_step.dependOn(&run.step); + + return test_step; +} + // Adapted from https://github.com/llvm/llvm-project/blob/main/lld/test/MachO/weak-binding.s fn testWeakBind(b: *Build, opts: Options) *Step { const test_step = addTestStep(b, "macho-weak-bind", opts); diff --git a/test/link/macho/reexports/a.zig b/test/link/macho/reexports/a.zig deleted file mode 100644 index cfa7e8b3ac..0000000000 --- a/test/link/macho/reexports/a.zig +++ /dev/null @@ -1,7 +0,0 @@ -const x: i32 = 42; -export fn foo() i32 { - return x; -} -comptime { - @export(foo, .{ .name = "bar", .linkage = .Strong }); -} diff --git a/test/link/macho/reexports/build.zig b/test/link/macho/reexports/build.zig deleted file mode 100644 index 44c96ecf7e..0000000000 --- a/test/link/macho/reexports/build.zig +++ /dev/null @@ -1,38 +0,0 @@ -const std = @import("std"); - -pub const requires_symlinks = true; - -pub fn build(b: *std.Build) void { - const test_step = b.step("test", "Test it"); - b.default_step = test_step; - - add(b, test_step, .Debug); - add(b, test_step, .ReleaseFast); - add(b, test_step, .ReleaseSmall); - add(b, test_step, .ReleaseSafe); -} - -fn add(b: *std.Build, test_step: *std.Build.Step, optimize: std.builtin.OptimizeMode) void { - const target = b.resolveTargetQuery(.{ .os_tag = .macos }); - - const lib = b.addStaticLibrary(.{ - .name = "a", - .root_source_file = .{ .path = "a.zig" }, - .optimize = optimize, - .target = target, - }); - - const exe = b.addExecutable(.{ - .name = "test", - .optimize = optimize, - .target = target, - }); - exe.addCSourceFile(.{ .file = .{ .path = "main.c" }, .flags = &.{} }); - exe.linkLibrary(lib); - exe.linkLibC(); - - const run = b.addRunArtifact(exe); - run.skip_foreign_checks = true; - run.expectExitCode(0); - test_step.dependOn(&run.step); -} diff --git a/test/link/macho/reexports/main.c b/test/link/macho/reexports/main.c deleted file mode 100644 index 2beb701f1f..0000000000 --- a/test/link/macho/reexports/main.c +++ /dev/null @@ -1,5 +0,0 @@ -extern int foo(); -extern int bar(); -int main() { - return bar() - foo(); -} diff --git a/test/link/macho/unwind_info/all.h b/test/link/macho/unwind_info/all.h deleted file mode 100644 index 15efba90d3..0000000000 --- a/test/link/macho/unwind_info/all.h +++ /dev/null @@ -1,41 +0,0 @@ -#ifndef ALL -#define ALL - -#include -#include -#include - -struct SimpleString { - SimpleString(size_t max_size); - ~SimpleString(); - - void print(const char* tag) const; - bool append_line(const char* x); - -private: - size_t max_size; - char* buffer; - size_t length; -}; - -struct SimpleStringOwner { - SimpleStringOwner(const char* x); - ~SimpleStringOwner(); - -private: - SimpleString string; -}; - -class Error: public std::exception { -public: - explicit Error(const char* msg) : msg{ msg } {} - virtual ~Error() noexcept {} - virtual const char* what() const noexcept { - return msg.c_str(); - } - -protected: - std::string msg; -}; - -#endif diff --git a/test/link/macho/unwind_info/build.zig b/test/link/macho/unwind_info/build.zig deleted file mode 100644 index 33af6016f9..0000000000 --- a/test/link/macho/unwind_info/build.zig +++ /dev/null @@ -1,88 +0,0 @@ -const std = @import("std"); -const builtin = @import("builtin"); - -pub const requires_symlinks = true; - -pub fn build(b: *std.Build) void { - const test_step = b.step("test", "Test it"); - b.default_step = test_step; - - add(b, test_step, .Debug); - add(b, test_step, .ReleaseFast); - add(b, test_step, .ReleaseSmall); - add(b, test_step, .ReleaseSafe); -} - -fn add(b: *std.Build, test_step: *std.Build.Step, optimize: std.builtin.OptimizeMode) void { - const target = b.resolveTargetQuery(.{ .os_tag = .macos }); - - testUnwindInfo(b, test_step, optimize, target, false, "no-dead-strip"); - testUnwindInfo(b, test_step, optimize, target, true, "yes-dead-strip"); -} - -fn testUnwindInfo( - b: *std.Build, - test_step: *std.Build.Step, - optimize: std.builtin.OptimizeMode, - target: std.Build.ResolvedTarget, - dead_strip: bool, - name: []const u8, -) void { - const exe = createScenario(b, optimize, target, name); - exe.link_gc_sections = dead_strip; - - const check = exe.checkObject(); - check.checkInHeaders(); - check.checkExact("segname __TEXT"); - check.checkExact("sectname __gcc_except_tab"); - check.checkExact("sectname __unwind_info"); - - switch (builtin.cpu.arch) { - .aarch64 => { - check.checkExact("sectname __eh_frame"); - }, - .x86_64 => {}, // We do not expect `__eh_frame` section on x86_64 in this case - else => unreachable, - } - - check.checkInSymtab(); - check.checkContains("(was private external) ___gxx_personality_v0"); - test_step.dependOn(&check.step); - - const run = b.addRunArtifact(exe); - run.skip_foreign_checks = true; - run.expectStdOutEqual( - \\Constructed: a - \\Constructed: b - \\About to destroy: b - \\About to destroy: a - \\Error: Not enough memory! - \\ - ); - - test_step.dependOn(&run.step); -} - -fn createScenario( - b: *std.Build, - optimize: std.builtin.OptimizeMode, - target: std.Build.ResolvedTarget, - name: []const u8, -) *std.Build.Step.Compile { - const exe = b.addExecutable(.{ - .name = name, - .optimize = optimize, - .target = target, - }); - b.default_step.dependOn(&exe.step); - exe.addIncludePath(.{ .path = "." }); - exe.addCSourceFiles(.{ - .files = &[_][]const u8{ - "main.cpp", - "simple_string.cpp", - "simple_string_owner.cpp", - }, - }); - exe.linkLibCpp(); - return exe; -} diff --git a/test/link/macho/unwind_info/main.cpp b/test/link/macho/unwind_info/main.cpp deleted file mode 100644 index 8195f80b3c..0000000000 --- a/test/link/macho/unwind_info/main.cpp +++ /dev/null @@ -1,24 +0,0 @@ -#include "all.h" -#include - -void fn_c() { - SimpleStringOwner c{ "cccccccccc" }; -} - -void fn_b() { - SimpleStringOwner b{ "b" }; - fn_c(); -} - -int main() { - try { - SimpleStringOwner a{ "a" }; - fn_b(); - SimpleStringOwner d{ "d" }; - } catch (const Error& e) { - printf("Error: %s\n", e.what()); - } catch(const std::exception& e) { - printf("Exception: %s\n", e.what()); - } - return 0; -} diff --git a/test/link/macho/unwind_info/simple_string.cpp b/test/link/macho/unwind_info/simple_string.cpp deleted file mode 100644 index 15699cd1e4..0000000000 --- a/test/link/macho/unwind_info/simple_string.cpp +++ /dev/null @@ -1,30 +0,0 @@ -#include "all.h" -#include -#include - -SimpleString::SimpleString(size_t max_size) -: max_size{ max_size }, length{} { - if (max_size == 0) { - throw Error{ "Max size must be at least 1." }; - } - buffer = new char[max_size]; - buffer[0] = 0; -} - -SimpleString::~SimpleString() { - delete[] buffer; -} - -void SimpleString::print(const char* tag) const { - printf("%s: %s", tag, buffer); -} - -bool SimpleString::append_line(const char* x) { - const auto x_len = strlen(x); - if (x_len + length + 2 > max_size) return false; - std::strncpy(buffer + length, x, max_size - length); - length += x_len; - buffer[length++] = '\n'; - buffer[length] = 0; - return true; -} diff --git a/test/link/macho/unwind_info/simple_string_owner.cpp b/test/link/macho/unwind_info/simple_string_owner.cpp deleted file mode 100644 index c242af6ecc..0000000000 --- a/test/link/macho/unwind_info/simple_string_owner.cpp +++ /dev/null @@ -1,12 +0,0 @@ -#include "all.h" - -SimpleStringOwner::SimpleStringOwner(const char* x) : string{ 10 } { - if (!string.append_line(x)) { - throw Error{ "Not enough memory!" }; - } - string.print("Constructed"); -} - -SimpleStringOwner::~SimpleStringOwner() { - string.print("About to destroy"); -} From 2b3ac3e82f4207981469739b4ecd9fa0c3ac3308 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Mon, 15 Jan 2024 12:05:26 +0100 Subject: [PATCH 073/133] test/link/macho: upgrade tbdv3 test --- test/link.zig | 4 --- test/link/macho.zig | 39 ++++++++++++++++++++++ test/link/macho/tbdv3/a.c | 3 -- test/link/macho/tbdv3/build.zig | 57 --------------------------------- test/link/macho/tbdv3/main.c | 7 ---- 5 files changed, 39 insertions(+), 71 deletions(-) delete mode 100644 test/link/macho/tbdv3/a.c delete mode 100644 test/link/macho/tbdv3/build.zig delete mode 100644 test/link/macho/tbdv3/main.c diff --git a/test/link.zig b/test/link.zig index 3f85cd1a20..8cd04a9142 100644 --- a/test/link.zig +++ b/test/link.zig @@ -115,8 +115,4 @@ pub const cases = [_]Case{ .build_root = "test/link/macho/objcpp", .import = @import("link/macho/objcpp/build.zig"), }, - .{ - .build_root = "test/link/macho/tbdv3", - .import = @import("link/macho/tbdv3/build.zig"), - }, }; diff --git a/test/link/macho.zig b/test/link/macho.zig index 6692b1888b..f4018089f8 100644 --- a/test/link/macho.zig +++ b/test/link/macho.zig @@ -50,6 +50,7 @@ pub fn testAll(b: *Build, build_opts: BuildOptions) *Step { macho_step.dependOn(testDylib(b, .{ .target = default_target })); macho_step.dependOn(testNeededLibrary(b, .{ .target = default_target })); macho_step.dependOn(testSearchStrategy(b, .{ .target = default_target })); + macho_step.dependOn(testTbdv3(b, .{ .target = default_target })); macho_step.dependOn(testTls(b, .{ .target = default_target })); macho_step.dependOn(testTwoLevelNamespace(b, .{ .target = default_target })); macho_step.dependOn(testWeakLibrary(b, .{ .target = default_target })); @@ -1285,6 +1286,44 @@ fn testStackSize(b: *Build, opts: Options) *Step { return test_step; } +fn testTbdv3(b: *Build, opts: Options) *Step { + const test_step = addTestStep(b, "macho-tbdv3", opts); + + const dylib = addSharedLibrary(b, opts, .{ .name = "a", .c_source_bytes = "int getFoo() { return 42; }" }); + + const tbd = tbd: { + const wf = WriteFile.create(b); + break :tbd wf.add("liba.tbd", + \\--- !tapi-tbd-v3 + \\archs: [ arm64, x86_64 ] + \\uuids: [ 'arm64: DEADBEEF', 'x86_64: BEEFDEAD' ] + \\platform: macos + \\install-name: @rpath/liba.dylib + \\current-version: 0 + \\exports: + \\ - archs: [ arm64, x86_64 ] + \\ symbols: [ _getFoo ] + ); + }; + + const exe = addExecutable(b, opts, .{ .name = "main", .c_source_bytes = + \\#include + \\int getFoo(); + \\int main() { + \\ return getFoo() - 42; + \\} + }); + exe.root_module.linkSystemLibrary("a", .{}); + exe.root_module.addLibraryPath(tbd.dirname()); + exe.root_module.addRPath(dylib.getEmittedBinDirectory()); + + const run = addRunArtifact(exe); + run.expectExitCode(0); + test_step.dependOn(&run.step); + + return test_step; +} + fn testTentative(b: *Build, opts: Options) *Step { const test_step = addTestStep(b, "macho-tentative", opts); diff --git a/test/link/macho/tbdv3/a.c b/test/link/macho/tbdv3/a.c deleted file mode 100644 index ddd18ee4b6..0000000000 --- a/test/link/macho/tbdv3/a.c +++ /dev/null @@ -1,3 +0,0 @@ -int getFoo() { - return 42; -} diff --git a/test/link/macho/tbdv3/build.zig b/test/link/macho/tbdv3/build.zig deleted file mode 100644 index 547f72c25f..0000000000 --- a/test/link/macho/tbdv3/build.zig +++ /dev/null @@ -1,57 +0,0 @@ -const std = @import("std"); -const builtin = @import("builtin"); - -pub const requires_symlinks = true; -pub const requires_macos_sdk = false; - -pub fn build(b: *std.Build) void { - const test_step = b.step("test", "Test it"); - b.default_step = test_step; - - add(b, test_step, .Debug); - add(b, test_step, .ReleaseFast); - add(b, test_step, .ReleaseSmall); - add(b, test_step, .ReleaseSafe); -} - -fn add(b: *std.Build, test_step: *std.Build.Step, optimize: std.builtin.OptimizeMode) void { - const target = b.resolveTargetQuery(.{ .os_tag = .macos }); - - const lib = b.addSharedLibrary(.{ - .name = "a", - .version = .{ .major = 1, .minor = 0, .patch = 0 }, - .optimize = optimize, - .target = target, - }); - lib.addCSourceFile(.{ .file = .{ .path = "a.c" }, .flags = &.{} }); - lib.linkLibC(); - - const tbd_file = b.addWriteFile("liba.tbd", - \\--- !tapi-tbd-v3 - \\archs: [ arm64, x86_64 ] - \\uuids: [ 'arm64: DEADBEEF', 'x86_64: BEEFDEAD' ] - \\platform: macos - \\install-name: @rpath/liba.dylib - \\current-version: 0 - \\exports: - \\ - archs: [ arm64, x86_64 ] - \\ symbols: [ _getFoo ] - ); - - const exe = b.addExecutable(.{ - .name = "test", - .optimize = optimize, - .target = target, - }); - exe.addCSourceFile(.{ .file = .{ .path = "main.c" }, .flags = &[0][]const u8{} }); - exe.linkSystemLibrary("a"); - exe.addLibraryPath(tbd_file.getDirectory()); - exe.addRPath(lib.getEmittedBinDirectory()); - exe.linkLibC(); - - const run = b.addRunArtifact(exe); - run.skip_foreign_checks = true; - run.expectExitCode(0); - - test_step.dependOn(&run.step); -} diff --git a/test/link/macho/tbdv3/main.c b/test/link/macho/tbdv3/main.c deleted file mode 100644 index 3cf37ae590..0000000000 --- a/test/link/macho/tbdv3/main.c +++ /dev/null @@ -1,7 +0,0 @@ -#include - -int getFoo(); - -int main() { - return getFoo() - 42; -} From 03b33b0f0139129e649f57952209586db188cb79 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Mon, 15 Jan 2024 12:30:55 +0100 Subject: [PATCH 074/133] test/link/macho: migrate all tests to the new test matrix --- test/link.zig | 26 ---- test/link/link.zig | 8 ++ test/link/macho.zig | 129 +++++++++++++++++++- test/link/macho/bugs/13056/build.zig | 38 ------ test/link/macho/bugs/13056/test.cpp | 10 -- test/link/macho/bugs/13457/build.zig | 30 ----- test/link/macho/bugs/13457/main.zig | 1 - test/link/macho/bugs/16308/build.zig | 23 ---- test/link/macho/bugs/16308/main.zig | 1 - test/link/macho/bugs/16628/a_arm64.s | 37 ------ test/link/macho/bugs/16628/a_x64.s | 29 ----- test/link/macho/bugs/16628/build.zig | 42 ------- test/link/macho/bugs/16628/main.c | 8 -- test/link/macho/dead_strip_dylibs/build.zig | 61 --------- test/link/macho/dead_strip_dylibs/main.c | 11 -- test/link/macho/linksection/build.zig | 39 ------ test/link/macho/linksection/main.zig | 5 - test/link/macho/objcpp/Foo.h | 7 -- test/link/macho/objcpp/Foo.mm | 11 -- test/link/macho/objcpp/build.zig | 35 ------ test/link/macho/objcpp/test.mm | 14 --- 21 files changed, 136 insertions(+), 429 deletions(-) delete mode 100644 test/link/macho/bugs/13056/build.zig delete mode 100644 test/link/macho/bugs/13056/test.cpp delete mode 100644 test/link/macho/bugs/13457/build.zig delete mode 100644 test/link/macho/bugs/13457/main.zig delete mode 100644 test/link/macho/bugs/16308/build.zig delete mode 100644 test/link/macho/bugs/16308/main.zig delete mode 100644 test/link/macho/bugs/16628/a_arm64.s delete mode 100644 test/link/macho/bugs/16628/a_x64.s delete mode 100644 test/link/macho/bugs/16628/build.zig delete mode 100644 test/link/macho/bugs/16628/main.c delete mode 100644 test/link/macho/dead_strip_dylibs/build.zig delete mode 100644 test/link/macho/dead_strip_dylibs/main.c delete mode 100644 test/link/macho/linksection/build.zig delete mode 100644 test/link/macho/linksection/main.zig delete mode 100644 test/link/macho/objcpp/Foo.h delete mode 100644 test/link/macho/objcpp/Foo.mm delete mode 100644 test/link/macho/objcpp/build.zig delete mode 100644 test/link/macho/objcpp/test.mm diff --git a/test/link.zig b/test/link.zig index 8cd04a9142..731736c553 100644 --- a/test/link.zig +++ b/test/link.zig @@ -89,30 +89,4 @@ pub const cases = [_]Case{ .build_root = "test/link/wasm/type", .import = @import("link/wasm/type/build.zig"), }, - - // Mach-O Cases - .{ - .build_root = "test/link/macho/bugs/13056", - .import = @import("link/macho/bugs/13056/build.zig"), - }, - .{ - .build_root = "test/link/macho/bugs/13457", - .import = @import("link/macho/bugs/13457/build.zig"), - }, - .{ - .build_root = "test/link/macho/bugs/16308", - .import = @import("link/macho/bugs/16308/build.zig"), - }, - .{ - .build_root = "test/link/macho/bugs/16628", - .import = @import("link/macho/bugs/16628/build.zig"), - }, - .{ - .build_root = "test/link/macho/linksection", - .import = @import("link/macho/linksection/build.zig"), - }, - .{ - .build_root = "test/link/macho/objcpp", - .import = @import("link/macho/objcpp/build.zig"), - }, }; diff --git a/test/link/link.zig b/test/link/link.zig index 8ef497424f..f227831971 100644 --- a/test/link/link.zig +++ b/test/link/link.zig @@ -48,6 +48,8 @@ const OverlayOptions = struct { cpp_source_flags: []const []const u8 = &.{}, objc_source_bytes: ?[]const u8 = null, objc_source_flags: []const []const u8 = &.{}, + objcpp_source_bytes: ?[]const u8 = null, + objcpp_source_flags: []const []const u8 = &.{}, zig_source_bytes: ?[]const u8 = null, pic: ?bool = null, strip: ?bool = null, @@ -100,6 +102,12 @@ fn addCompileStep( .static_lib => .static, }, }); + if (overlay.objcpp_source_bytes) |bytes| { + compile_step.addCSourceFile(.{ + .file = b.addWriteFiles().add("a.mm", bytes), + .flags = overlay.objcpp_source_flags, + }); + } if (overlay.objc_source_bytes) |bytes| { compile_step.addCSourceFile(.{ .file = b.addWriteFiles().add("a.m", bytes), diff --git a/test/link/macho.zig b/test/link/macho.zig index f4018089f8..f41b0b2361 100644 --- a/test/link/macho.zig +++ b/test/link/macho.zig @@ -1,5 +1,4 @@ //! Here we test our MachO linker for correctness and functionality. -//! TODO migrate standalone tests from test/link/macho/* to here. pub fn testAll(b: *Build, build_opts: BuildOptions) *Step { const macho_step = b.step("test-macho", "Run MachO tests"); @@ -18,12 +17,14 @@ pub fn testAll(b: *Build, build_opts: BuildOptions) *Step { macho_step.dependOn(testDeadStrip(b, .{ .target = default_target })); macho_step.dependOn(testEmptyObject(b, .{ .target = default_target })); + macho_step.dependOn(testEmptyZig(b, .{ .target = default_target })); macho_step.dependOn(testEntryPoint(b, .{ .target = default_target })); macho_step.dependOn(testHeaderWeakFlags(b, .{ .target = default_target })); macho_step.dependOn(testHelloC(b, .{ .target = default_target })); macho_step.dependOn(testHelloZig(b, .{ .target = default_target })); macho_step.dependOn(testLargeBss(b, .{ .target = default_target })); macho_step.dependOn(testLayout(b, .{ .target = default_target })); + macho_step.dependOn(testLinksection(b, .{ .target = default_target })); macho_step.dependOn(testMhExecuteHeader(b, .{ .target = default_target })); macho_step.dependOn(testNoDeadStrip(b, .{ .target = default_target })); macho_step.dependOn(testNoExportsDylib(b, .{ .target = default_target })); @@ -59,8 +60,10 @@ pub fn testAll(b: *Build, build_opts: BuildOptions) *Step { if (build_opts.has_macos_sdk) { macho_step.dependOn(testDeadStripDylibs(b, .{ .target = b.host })); macho_step.dependOn(testHeaderpad(b, .{ .target = b.host })); + macho_step.dependOn(testLinkDirectlyCppTbd(b, .{ .target = b.host })); macho_step.dependOn(testNeededFramework(b, .{ .target = b.host })); macho_step.dependOn(testObjc(b, .{ .target = b.host })); + macho_step.dependOn(testObjcpp(b, .{ .target = b.host })); macho_step.dependOn(testWeakFramework(b, .{ .target = b.host })); } } @@ -255,6 +258,18 @@ fn testEmptyObject(b: *Build, opts: Options) *Step { return test_step; } +fn testEmptyZig(b: *Build, opts: Options) *Step { + const test_step = addTestStep(b, "macho-empty-zig", opts); + + const exe = addExecutable(b, opts, .{ .name = "empty", .zig_source_bytes = "pub fn main() void {}" }); + + const run = addRunArtifact(exe); + run.expectExitCode(0); + test_step.dependOn(&run.step); + + return test_step; +} + fn testEntryPoint(b: *Build, opts: Options) *Step { const test_step = addTestStep(b, "macho-entry-point", opts); @@ -745,6 +760,65 @@ fn testLayout(b: *Build, opts: Options) *Step { return test_step; } +fn testLinkDirectlyCppTbd(b: *Build, opts: Options) *Step { + const test_step = addTestStep(b, "macho-link-directly-cpp-tbd", opts); + + const sdk = std.zig.system.darwin.getSdk(b.allocator, opts.target.result) orelse + @panic("macOS SDK is required to run the test"); + + const exe = addExecutable(b, opts, .{ + .name = "main", + .cpp_source_bytes = + \\#include + \\#include + \\int main() { + \\ int *x = new int; + \\ *x = 5; + \\ fprintf(stderr, "x: %d\n", *x); + \\ delete x; + \\} + , + .cpp_source_flags = &.{ "-nostdlib++", "-nostdinc++" }, + }); + exe.root_module.addSystemIncludePath(.{ .path = b.pathJoin(&.{ sdk, "/usr/include" }) }); + exe.root_module.addIncludePath(.{ .path = b.pathJoin(&.{ sdk, "/usr/include/c++/v1" }) }); + exe.root_module.addObjectFile(.{ .path = b.pathJoin(&.{ sdk, "/usr/lib/libc++.tbd" }) }); + + const check = exe.checkObject(); + check.checkInSymtab(); + check.checkContains("[referenced dynamically] external __mh_execute_header"); + test_step.dependOn(&check.step); + + return test_step; +} + +fn testLinksection(b: *Build, opts: Options) *Step { + const test_step = addTestStep(b, "macho-linksection", opts); + + const obj = addObject(b, opts, .{ .name = "main", .zig_source_bytes = + \\export var test_global: u32 linksection("__DATA,__TestGlobal") = undefined; + \\export fn testFn() linksection("__TEXT,__TestFn") callconv(.C) void { + \\ testGenericFn("A"); + \\} + \\fn testGenericFn(comptime suffix: []const u8) linksection("__TEXT,__TestGenFn" ++ suffix) void {} + }); + + const check = obj.checkObject(); + check.checkInSymtab(); + check.checkContains("(__DATA,__TestGlobal) external _test_global"); + check.checkInSymtab(); + check.checkContains("(__TEXT,__TestFn) external _testFn"); + + if (opts.optimize == .Debug) { + check.checkInSymtab(); + check.checkContains("(__TEXT,__TestGenFnA) _a.testGenericFn__anon_"); + } + + test_step.dependOn(&check.step); + + return test_step; +} + fn testMhExecuteHeader(b: *Build, opts: Options) *Step { const test_step = addTestStep(b, "macho-mh-execute-header", opts); @@ -870,6 +944,59 @@ fn testObjc(b: *Build, opts: Options) *Step { return test_step; } +fn testObjcpp(b: *Build, opts: Options) *Step { + const test_step = addTestStep(b, "macho-objcpp", opts); + + const foo_h = foo_h: { + const wf = WriteFile.create(b); + break :foo_h wf.add("Foo.h", + \\#import + \\@interface Foo : NSObject + \\- (NSString *)name; + \\@end + ); + }; + + const foo_o = addObject(b, opts, .{ .name = "foo", .objcpp_source_bytes = + \\#import "Foo.h" + \\@implementation Foo + \\- (NSString *)name + \\{ + \\ NSString *str = [[NSString alloc] initWithFormat:@"Zig"]; + \\ return str; + \\} + \\@end + }); + foo_o.root_module.addIncludePath(foo_h.dirname()); + foo_o.linkLibCpp(); + + const exe = addExecutable(b, opts, .{ .name = "main", .objcpp_source_bytes = + \\#import "Foo.h" + \\#import + \\#include + \\int main(int argc, char *argv[]) + \\{ + \\ @autoreleasepool { + \\ Foo *foo = [[Foo alloc] init]; + \\ NSString *result = [foo name]; + \\ std::cout << "Hello from C++ and " << [result UTF8String]; + \\ assert([result isEqualToString:@"Zig"]); + \\ return 0; + \\ } + \\} + }); + exe.root_module.addIncludePath(foo_h.dirname()); + exe.addObject(foo_o); + exe.linkLibCpp(); + exe.root_module.linkFramework("Foundation", .{}); + + const run = addRunArtifact(exe); + run.expectStdOutEqual("Hello from C++ and Zig"); + test_step.dependOn(&run.step); + + return test_step; +} + fn testPagezeroSize(b: *Build, opts: Options) *Step { const test_step = addTestStep(b, "macho-pagezero-size", opts); diff --git a/test/link/macho/bugs/13056/build.zig b/test/link/macho/bugs/13056/build.zig deleted file mode 100644 index 53bcefb930..0000000000 --- a/test/link/macho/bugs/13056/build.zig +++ /dev/null @@ -1,38 +0,0 @@ -const std = @import("std"); - -pub const requires_macos_sdk = true; -pub const requires_symlinks = true; - -pub fn build(b: *std.Build) void { - const test_step = b.step("test", "Test it"); - b.default_step = test_step; - - add(b, test_step, .Debug); - add(b, test_step, .ReleaseFast); - add(b, test_step, .ReleaseSmall); - add(b, test_step, .ReleaseSafe); -} - -fn add(b: *std.Build, test_step: *std.Build.Step, optimize: std.builtin.OptimizeMode) void { - const target = b.resolveTargetQuery(.{ .os_tag = .macos }); - const sdk = std.zig.system.darwin.getSdk(b.allocator, target.result) orelse - @panic("macOS SDK is required to run the test"); - - const exe = b.addExecutable(.{ - .name = "test", - .optimize = optimize, - .target = b.host, - }); - exe.addSystemIncludePath(.{ .path = b.pathJoin(&.{ sdk, "/usr/include" }) }); - exe.addIncludePath(.{ .path = b.pathJoin(&.{ sdk, "/usr/include/c++/v1" }) }); - exe.addCSourceFile(.{ .file = .{ .path = "test.cpp" }, .flags = &.{ - "-nostdlib++", - "-nostdinc++", - } }); - exe.addObjectFile(.{ .path = b.pathJoin(&.{ sdk, "/usr/lib/libc++.tbd" }) }); - - const run_cmd = b.addRunArtifact(exe); - run_cmd.expectStdErrEqual("x: 5\n"); - - test_step.dependOn(&run_cmd.step); -} diff --git a/test/link/macho/bugs/13056/test.cpp b/test/link/macho/bugs/13056/test.cpp deleted file mode 100644 index d042cb0a2a..0000000000 --- a/test/link/macho/bugs/13056/test.cpp +++ /dev/null @@ -1,10 +0,0 @@ -// test.cpp -#include -#include - -int main() { - int *x = new int; - *x = 5; - fprintf(stderr, "x: %d\n", *x); - delete x; -} diff --git a/test/link/macho/bugs/13457/build.zig b/test/link/macho/bugs/13457/build.zig deleted file mode 100644 index fe835b5715..0000000000 --- a/test/link/macho/bugs/13457/build.zig +++ /dev/null @@ -1,30 +0,0 @@ -const std = @import("std"); - -pub const requires_symlinks = true; - -pub fn build(b: *std.Build) void { - const test_step = b.step("test", "Test it"); - b.default_step = test_step; - - add(b, test_step, .Debug); - add(b, test_step, .ReleaseFast); - add(b, test_step, .ReleaseSmall); - add(b, test_step, .ReleaseSafe); -} - -fn add(b: *std.Build, test_step: *std.Build.Step, optimize: std.builtin.OptimizeMode) void { - const target = b.resolveTargetQuery(.{ .os_tag = .macos }); - - const exe = b.addExecutable(.{ - .name = "test", - .root_source_file = .{ .path = "main.zig" }, - .optimize = optimize, - .target = target, - }); - - const run = b.addRunArtifact(exe); - run.skip_foreign_checks = true; - run.expectStdOutEqual(""); - - test_step.dependOn(&run.step); -} diff --git a/test/link/macho/bugs/13457/main.zig b/test/link/macho/bugs/13457/main.zig deleted file mode 100644 index 902b554db0..0000000000 --- a/test/link/macho/bugs/13457/main.zig +++ /dev/null @@ -1 +0,0 @@ -pub fn main() void {} diff --git a/test/link/macho/bugs/16308/build.zig b/test/link/macho/bugs/16308/build.zig deleted file mode 100644 index f4456b111c..0000000000 --- a/test/link/macho/bugs/16308/build.zig +++ /dev/null @@ -1,23 +0,0 @@ -const std = @import("std"); - -pub const requires_symlinks = true; - -pub fn build(b: *std.Build) void { - const test_step = b.step("test", "Test it"); - b.default_step = test_step; - - const target = b.resolveTargetQuery(.{ .os_tag = .macos }); - - const lib = b.addSharedLibrary(.{ - .name = "a", - .root_source_file = .{ .path = "main.zig" }, - .optimize = .Debug, - .target = target, - }); - - const check = lib.checkObject(); - check.checkInSymtab(); - check.checkNotPresent("external _abc"); - - test_step.dependOn(&check.step); -} diff --git a/test/link/macho/bugs/16308/main.zig b/test/link/macho/bugs/16308/main.zig deleted file mode 100644 index fd94789461..0000000000 --- a/test/link/macho/bugs/16308/main.zig +++ /dev/null @@ -1 +0,0 @@ -fn abc() void {} diff --git a/test/link/macho/bugs/16628/a_arm64.s b/test/link/macho/bugs/16628/a_arm64.s deleted file mode 100644 index 215deb8514..0000000000 --- a/test/link/macho/bugs/16628/a_arm64.s +++ /dev/null @@ -1,37 +0,0 @@ -.globl _foo -.align 4 -_foo: - .cfi_startproc - stp x29, x30, [sp, #-32]! - .cfi_def_cfa_offset 32 - .cfi_offset w30, -24 - .cfi_offset w29, -32 - mov x29, sp - .cfi_def_cfa w29, 32 - bl _bar - ldp x29, x30, [sp], #32 - .cfi_restore w29 - .cfi_restore w30 - .cfi_def_cfa_offset 0 - ret - .cfi_endproc - -.globl _bar -.align 4 -_bar: - .cfi_startproc - sub sp, sp, #32 - .cfi_def_cfa_offset -32 - stp x29, x30, [sp, #16] - .cfi_offset w30, -24 - .cfi_offset w29, -32 - mov x29, sp - .cfi_def_cfa w29, 32 - mov w0, #4 - ldp x29, x30, [sp, #16] - .cfi_restore w29 - .cfi_restore w30 - add sp, sp, #32 - .cfi_def_cfa_offset 0 - ret - .cfi_endproc diff --git a/test/link/macho/bugs/16628/a_x64.s b/test/link/macho/bugs/16628/a_x64.s deleted file mode 100644 index cc1712585e..0000000000 --- a/test/link/macho/bugs/16628/a_x64.s +++ /dev/null @@ -1,29 +0,0 @@ -.globl _foo -_foo: - .cfi_startproc - push %rbp - .cfi_def_cfa_offset 8 - .cfi_offset %rbp, -8 - mov %rsp, %rbp - .cfi_def_cfa_register %rbp - call _bar - pop %rbp - .cfi_restore %rbp - .cfi_def_cfa_offset 0 - ret - .cfi_endproc - -.globl _bar -_bar: - .cfi_startproc - push %rbp - .cfi_def_cfa_offset 8 - .cfi_offset %rbp, -8 - mov %rsp, %rbp - .cfi_def_cfa_register %rbp - mov $4, %rax - pop %rbp - .cfi_restore %rbp - .cfi_def_cfa_offset 0 - ret - .cfi_endproc diff --git a/test/link/macho/bugs/16628/build.zig b/test/link/macho/bugs/16628/build.zig deleted file mode 100644 index 59c666aa27..0000000000 --- a/test/link/macho/bugs/16628/build.zig +++ /dev/null @@ -1,42 +0,0 @@ -const std = @import("std"); -const builtin = @import("builtin"); - -pub const requires_symlinks = true; -pub const requires_macos_sdk = false; - -pub fn build(b: *std.Build) void { - const test_step = b.step("test", "Test it"); - b.default_step = test_step; - - add(b, test_step, .Debug); - add(b, test_step, .ReleaseFast); - add(b, test_step, .ReleaseSmall); - add(b, test_step, .ReleaseSafe); -} - -fn add(b: *std.Build, test_step: *std.Build.Step, optimize: std.builtin.OptimizeMode) void { - const target = b.resolveTargetQuery(.{ .os_tag = .macos }); - - const exe = b.addExecutable(.{ - .name = "test", - .optimize = optimize, - .target = target, - }); - exe.addCSourceFile(.{ .file = .{ .path = "main.c" }, .flags = &[0][]const u8{} }); - switch (builtin.cpu.arch) { - .aarch64 => { - exe.addCSourceFile(.{ .file = .{ .path = "a_arm64.s" }, .flags = &[0][]const u8{} }); - }, - .x86_64 => { - exe.addCSourceFile(.{ .file = .{ .path = "a_x64.s" }, .flags = &[0][]const u8{} }); - }, - else => unreachable, - } - exe.linkLibC(); - - const run = b.addRunArtifact(exe); - run.skip_foreign_checks = true; - run.expectStdOutEqual("4\n"); - - test_step.dependOn(&run.step); -} diff --git a/test/link/macho/bugs/16628/main.c b/test/link/macho/bugs/16628/main.c deleted file mode 100644 index 3ac8481dca..0000000000 --- a/test/link/macho/bugs/16628/main.c +++ /dev/null @@ -1,8 +0,0 @@ -#include - -int foo(); - -int main() { - printf("%d\n", foo()); - return 0; -} diff --git a/test/link/macho/dead_strip_dylibs/build.zig b/test/link/macho/dead_strip_dylibs/build.zig deleted file mode 100644 index 7ab8c16dfe..0000000000 --- a/test/link/macho/dead_strip_dylibs/build.zig +++ /dev/null @@ -1,61 +0,0 @@ -const std = @import("std"); - -pub const requires_macos_sdk = true; -pub const requires_symlinks = true; - -pub fn build(b: *std.Build) void { - const test_step = b.step("test", "Test it"); - b.default_step = test_step; - - add(b, test_step, .Debug); - add(b, test_step, .ReleaseFast); - add(b, test_step, .ReleaseSmall); - add(b, test_step, .ReleaseSafe); -} - -fn add(b: *std.Build, test_step: *std.Build.Step, optimize: std.builtin.OptimizeMode) void { - { - // Without -dead_strip_dylibs we expect `-la` to include liba.dylib in the final executable - const exe = createScenario(b, optimize, "no-dead-strip"); - - const check = exe.checkObject(); - check.checkInHeaders(); - check.checkExact("cmd LOAD_DYLIB"); - check.checkContains("Cocoa"); - - check.checkInHeaders(); - check.checkExact("cmd LOAD_DYLIB"); - check.checkContains("libobjc"); - - test_step.dependOn(&check.step); - - const run_cmd = b.addRunArtifact(exe); - test_step.dependOn(&run_cmd.step); - } - - { - // With -dead_strip_dylibs, we should include liba.dylib as it's unreachable - const exe = createScenario(b, optimize, "yes-dead-strip"); - exe.dead_strip_dylibs = true; - - const run_cmd = b.addRunArtifact(exe); - run_cmd.expectExitCode(@as(u8, @bitCast(@as(i8, -2)))); // should fail - test_step.dependOn(&run_cmd.step); - } -} - -fn createScenario( - b: *std.Build, - optimize: std.builtin.OptimizeMode, - name: []const u8, -) *std.Build.Step.Compile { - const exe = b.addExecutable(.{ - .name = name, - .optimize = optimize, - .target = b.host, - }); - exe.addCSourceFile(.{ .file = .{ .path = "main.c" }, .flags = &[0][]const u8{} }); - exe.linkLibC(); - exe.linkFramework("Cocoa"); - return exe; -} diff --git a/test/link/macho/dead_strip_dylibs/main.c b/test/link/macho/dead_strip_dylibs/main.c deleted file mode 100644 index 06668f5522..0000000000 --- a/test/link/macho/dead_strip_dylibs/main.c +++ /dev/null @@ -1,11 +0,0 @@ -#include - -int main(int argc, char* argv[]) { - if (objc_getClass("NSObject") == 0) { - return -1; - } - if (objc_getClass("NSApplication") == 0) { - return -2; - } - return 0; -} diff --git a/test/link/macho/linksection/build.zig b/test/link/macho/linksection/build.zig deleted file mode 100644 index 91ddfe530c..0000000000 --- a/test/link/macho/linksection/build.zig +++ /dev/null @@ -1,39 +0,0 @@ -const std = @import("std"); - -pub const requires_symlinks = true; - -pub fn build(b: *std.Build) void { - const test_step = b.step("test", "Test it"); - b.default_step = test_step; - - add(b, test_step, .Debug); - add(b, test_step, .ReleaseFast); - add(b, test_step, .ReleaseSmall); - add(b, test_step, .ReleaseSafe); -} - -fn add(b: *std.Build, test_step: *std.Build.Step, optimize: std.builtin.OptimizeMode) void { - const target = b.resolveTargetQuery(.{ .os_tag = .macos }); - - const obj = b.addObject(.{ - .name = "test", - .root_source_file = .{ .path = "main.zig" }, - .optimize = optimize, - .target = target, - }); - - const check = obj.checkObject(); - - check.checkInSymtab(); - check.checkContains("(__DATA,__TestGlobal) external _test_global"); - - check.checkInSymtab(); - check.checkContains("(__TEXT,__TestFn) external _testFn"); - - if (optimize == .Debug) { - check.checkInSymtab(); - check.checkContains("(__TEXT,__TestGenFnA) _main.testGenericFn__anon_"); - } - - test_step.dependOn(&check.step); -} diff --git a/test/link/macho/linksection/main.zig b/test/link/macho/linksection/main.zig deleted file mode 100644 index 2105c35ed3..0000000000 --- a/test/link/macho/linksection/main.zig +++ /dev/null @@ -1,5 +0,0 @@ -export var test_global: u32 linksection("__DATA,__TestGlobal") = undefined; -export fn testFn() linksection("__TEXT,__TestFn") callconv(.C) void { - testGenericFn("A"); -} -fn testGenericFn(comptime suffix: []const u8) linksection("__TEXT,__TestGenFn" ++ suffix) void {} diff --git a/test/link/macho/objcpp/Foo.h b/test/link/macho/objcpp/Foo.h deleted file mode 100644 index 05cb7df39b..0000000000 --- a/test/link/macho/objcpp/Foo.h +++ /dev/null @@ -1,7 +0,0 @@ -#import - -@interface Foo : NSObject - -- (NSString *)name; - -@end diff --git a/test/link/macho/objcpp/Foo.mm b/test/link/macho/objcpp/Foo.mm deleted file mode 100644 index 6fc9b1edf0..0000000000 --- a/test/link/macho/objcpp/Foo.mm +++ /dev/null @@ -1,11 +0,0 @@ -#import "Foo.h" - -@implementation Foo - -- (NSString *)name -{ - NSString *str = [[NSString alloc] initWithFormat:@"Zig"]; - return str; -} - -@end diff --git a/test/link/macho/objcpp/build.zig b/test/link/macho/objcpp/build.zig deleted file mode 100644 index 53743fafdf..0000000000 --- a/test/link/macho/objcpp/build.zig +++ /dev/null @@ -1,35 +0,0 @@ -const std = @import("std"); - -pub const requires_symlinks = true; -pub const requires_macos_sdk = true; - -pub fn build(b: *std.Build) void { - const test_step = b.step("test", "Test it"); - b.default_step = test_step; - - add(b, test_step, .Debug); - add(b, test_step, .ReleaseFast); - add(b, test_step, .ReleaseSmall); - add(b, test_step, .ReleaseSafe); -} - -fn add(b: *std.Build, test_step: *std.Build.Step, optimize: std.builtin.OptimizeMode) void { - const exe = b.addExecutable(.{ - .name = "test", - .optimize = optimize, - .target = b.host, - }); - b.default_step.dependOn(&exe.step); - exe.addIncludePath(.{ .path = "." }); - exe.addCSourceFile(.{ .file = .{ .path = "Foo.mm" }, .flags = &[0][]const u8{} }); - exe.addCSourceFile(.{ .file = .{ .path = "test.mm" }, .flags = &[0][]const u8{} }); - exe.linkLibCpp(); - // TODO when we figure out how to ship framework stubs for cross-compilation, - // populate paths to the sysroot here. - exe.linkFramework("Foundation"); - - const run_cmd = b.addRunArtifact(exe); - run_cmd.expectStdOutEqual("Hello from C++ and Zig"); - - test_step.dependOn(&run_cmd.step); -} diff --git a/test/link/macho/objcpp/test.mm b/test/link/macho/objcpp/test.mm deleted file mode 100644 index d27c543cdf..0000000000 --- a/test/link/macho/objcpp/test.mm +++ /dev/null @@ -1,14 +0,0 @@ -#import "Foo.h" -#import -#include - -int main(int argc, char *argv[]) -{ - @autoreleasepool { - Foo *foo = [[Foo alloc] init]; - NSString *result = [foo name]; - std::cout << "Hello from C++ and " << [result UTF8String]; - assert([result isEqualToString:@"Zig"]); - return 0; - } -} From 6d0ba6dd10b9ba9137b3ea9532789fcc5e971649 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Mon, 15 Jan 2024 20:34:50 +0100 Subject: [PATCH 075/133] macho: introduce ZigObject --- CMakeLists.txt | 1 + src/link/MachO.zig | 28 +++-- src/link/MachO/Atom.zig | 48 ++++---- src/link/MachO/Object.zig | 10 ++ src/link/MachO/Symbol.zig | 1 + src/link/MachO/ZigObject.zig | 199 +++++++++++++++++++++++++++++++++ src/link/MachO/file.zig | 4 + src/link/MachO/relocatable.zig | 2 +- 8 files changed, 262 insertions(+), 31 deletions(-) create mode 100644 src/link/MachO/ZigObject.zig diff --git a/CMakeLists.txt b/CMakeLists.txt index 851e554923..0942f9530a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -608,6 +608,7 @@ set(ZIG_STAGE2_SOURCES "${CMAKE_SOURCE_DIR}/src/link/MachO/Relocation.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/Symbol.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/UnwindInfo.zig" + "${CMAKE_SOURCE_DIR}/src/link/MachO/ZigObject.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/dead_strip.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/dyld_info/bind.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/dyld_info/Rebase.zig" diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 8d959275aa..956f06a49a 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -10,6 +10,7 @@ d_sym: ?DebugSymbols = null, /// Index of each input file also encodes the priority or precedence of one input file /// over another. files: std.MultiArrayList(File.Entry) = .{}, +zig_object: ?File.Index = null, internal_object: ?File.Index = null, objects: std.ArrayListUnmanaged(File.Index) = .{}, dylibs: std.ArrayListUnmanaged(File.Index) = .{}, @@ -222,12 +223,19 @@ pub fn createEmpty( try self.symbols.append(gpa, .{}); try self.symbols_extra.append(gpa, 0); - // TODO: init - if (opt_zcu) |zcu| { if (!use_llvm) { - _ = zcu; - // TODO: create .zig_object + const index: File.Index = @intCast(try self.files.addOne(gpa)); + self.files.set(index, .{ .zig_object = .{ + .index = index, + .path = try std.fmt.allocPrint(arena, "{s}.o", .{std.fs.path.stem( + zcu.main_mod.root_src_path, + )}), + } }); + self.zig_object = index; + try self.getZigObject().?.init(self); + + // TODO init metadata if (comp.config.debug_format != .strip) { // Create dSYM bundle. @@ -281,6 +289,7 @@ pub fn deinit(self: *MachO) void { for (self.files.items(.tags), self.files.items(.data)) |tag, *data| switch (tag) { .null => {}, + .zig_object => data.zig_object.deinit(gpa), .internal => data.internal.deinit(gpa), .object => data.object.deinit(gpa), .dylib => data.dylib.deinit(gpa), @@ -3109,9 +3118,7 @@ pub fn freeDecl(self: *MachO, decl_index: InternPool.DeclIndex) void { pub fn getDeclVAddr(self: *MachO, decl_index: InternPool.DeclIndex, reloc_info: link.File.RelocInfo) !u64 { assert(self.llvm_object == null); - _ = decl_index; - _ = reloc_info; - @panic("TODO getDeclVAddr"); + return self.getZigObject().?.getDeclVAddr(self, decl_index, reloc_info); } pub fn lowerAnonDecl( @@ -3297,12 +3304,18 @@ pub fn getFile(self: *MachO, index: File.Index) ?File { const tag = self.files.items(.tags)[index]; return switch (tag) { .null => null, + .zig_object => .{ .zig_object = &self.files.items(.data)[index].zig_object }, .internal => .{ .internal = &self.files.items(.data)[index].internal }, .object => .{ .object = &self.files.items(.data)[index].object }, .dylib => .{ .dylib = &self.files.items(.data)[index].dylib }, }; } +pub fn getZigObject(self: *MachO) ?*ZigObject { + const index = self.zig_object orelse return null; + return self.getFile(index).?.zig_object; +} + pub fn getInternalObject(self: *MachO) ?*InternalObject { const index = self.internal_object orelse return null; return self.getFile(index).?.internal; @@ -4123,3 +4136,4 @@ const TlvPtrSection = synthetic.TlvPtrSection; const TypedValue = @import("../TypedValue.zig"); const UnwindInfo = @import("MachO/UnwindInfo.zig"); const WeakBindSection = synthetic.WeakBindSection; +const ZigObject = @import("MachO/ZigObject.zig"); diff --git a/src/link/MachO/Atom.zig b/src/link/MachO/Atom.zig index ead0e96a50..3e9884f770 100644 --- a/src/link/MachO/Atom.zig +++ b/src/link/MachO/Atom.zig @@ -45,10 +45,27 @@ pub fn getFile(self: Atom, macho_file: *MachO) File { return macho_file.getFile(self.file).?; } +pub fn getData(self: Atom, macho_file: *MachO) []const u8 { + return switch (self.getFile(macho_file)) { + .zig_object => @panic("TODO Atom.getData"), + .object => |x| x.getAtomData(self), + else => unreachable, + }; +} + +pub fn getRelocs(self: Atom, macho_file: *MachO) []const Relocation { + return switch (self.getFile(macho_file)) { + .zig_object => @panic("TODO Atom.getRelocs"), + .object => |x| x.getAtomRelocs(self), + else => unreachable, + }; +} + pub fn getInputSection(self: Atom, macho_file: *MachO) macho.section_64 { return switch (self.getFile(macho_file)) { - .dylib => unreachable, - inline else => |x| x.sections.items(.header)[self.n_sect], + .zig_object => |x| x.getInputSection(self, macho_file), + .object => |x| x.sections.items(.header)[self.n_sect], + else => unreachable, }; } @@ -61,26 +78,10 @@ pub fn getPriority(self: Atom, macho_file: *MachO) u64 { return (@as(u64, @intCast(file.getIndex())) << 32) | @as(u64, @intCast(self.n_sect)); } -pub fn getCode(self: Atom, macho_file: *MachO) []const u8 { - const code = switch (self.getFile(macho_file)) { - .dylib => unreachable, - inline else => |x| x.getSectionData(self.n_sect), - }; - return code[self.off..][0..self.size]; -} - -pub fn getRelocs(self: Atom, macho_file: *MachO) []const Relocation { - const relocs = switch (self.getFile(macho_file)) { - .dylib => unreachable, - inline else => |x| x.sections.items(.relocs)[self.n_sect], - }; - return relocs.items[self.relocs.pos..][0..self.relocs.len]; -} - pub fn getUnwindRecords(self: Atom, macho_file: *MachO) []const UnwindInfo.Record.Index { return switch (self.getFile(macho_file)) { .dylib => unreachable, - .internal => &[0]UnwindInfo.Record.Index{}, + .zig_object, .internal => &[0]UnwindInfo.Record.Index{}, .object => |x| x.unwind_records.items[self.unwind_records.pos..][0..self.unwind_records.len], }; } @@ -290,10 +291,10 @@ pub fn resolveRelocs(self: Atom, macho_file: *MachO, buffer: []u8) !void { defer tracy.end(); assert(!self.getInputSection(macho_file).isZerofill()); - const relocs = self.getRelocs(macho_file); const file = self.getFile(macho_file); const name = self.getName(macho_file); - @memcpy(buffer, self.getCode(macho_file)); + const relocs = self.getRelocs(macho_file); + @memcpy(buffer, self.getData(macho_file)); relocs_log.debug("{x}: {s}", .{ self.value, name }); @@ -683,10 +684,11 @@ const x86_64 = struct { }; pub fn calcNumRelocs(self: Atom, macho_file: *MachO) u32 { + const relocs = self.getRelocs(macho_file); switch (macho_file.getTarget().cpu.arch) { .aarch64 => { var nreloc: u32 = 0; - for (self.getRelocs(macho_file)) |rel| { + for (relocs) |rel| { nreloc += 1; switch (rel.type) { .page, .pageoff => if (rel.addend > 0) { @@ -697,7 +699,7 @@ pub fn calcNumRelocs(self: Atom, macho_file: *MachO) u32 { } return nreloc; }, - .x86_64 => return @intCast(self.getRelocs(macho_file).len), + .x86_64 => return @intCast(relocs.len), else => unreachable, } } diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index 136b39d617..d31aaf667f 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -1547,6 +1547,16 @@ pub fn getSectionData(self: *const Object, index: u32) []const u8 { return self.data[sect.offset..][0..sect.size]; } +pub fn getAtomData(self: *const Object, atom: Atom) []const u8 { + const data = self.getSectionData(atom.n_sect); + return data[atom.off..][0..atom.size]; +} + +pub fn getAtomRelocs(self: *const Object, atom: Atom) []const Relocation { + const relocs = self.sections.items(.relocs)[atom.n_sect]; + return relocs.items[atom.relocs.pos..][0..atom.relocs.len]; +} + fn getString(self: Object, off: u32) [:0]const u8 { assert(off < self.strtab.len); return mem.sliceTo(@as([*:0]const u8, @ptrCast(self.strtab.ptr + off)), 0); diff --git a/src/link/MachO/Symbol.zig b/src/link/MachO/Symbol.zig index 9355c0db2c..c6cf3f9631 100644 --- a/src/link/MachO/Symbol.zig +++ b/src/link/MachO/Symbol.zig @@ -306,6 +306,7 @@ fn format2( if (symbol.flags.weak) try writer.writeAll(" : weak"); if (symbol.isSymbolStab(ctx.macho_file)) try writer.writeAll(" : stab"); switch (file) { + .zig_object => |x| try writer.print(" : zig_object({d})", .{x.index}), .internal => |x| try writer.print(" : internal({d})", .{x.index}), .object => |x| try writer.print(" : object({d})", .{x.index}), .dylib => |x| try writer.print(" : dylib({d})", .{x.index}), diff --git a/src/link/MachO/ZigObject.zig b/src/link/MachO/ZigObject.zig new file mode 100644 index 0000000000..e8adebc5c0 --- /dev/null +++ b/src/link/MachO/ZigObject.zig @@ -0,0 +1,199 @@ +/// Externally owned memory. +path: []const u8, +index: File.Index, + +symtab: std.MultiArrayList(Nlist) = .{}, + +symbols: std.ArrayListUnmanaged(Symbol.Index) = .{}, +atoms: std.ArrayListUnmanaged(Atom.Index) = .{}, + +output_symtab_ctx: MachO.SymtabCtx = .{}, + +pub fn init(self: *ZigObject, macho_file: *MachO) !void { + const comp = macho_file.base.comp; + const gpa = comp.gpa; + + try self.atoms.append(gpa, 0); // null input section +} + +pub fn deinit(self: *ZigObject, allocator: Allocator) void { + self.symtab.deinit(allocator); + self.symbols.deinit(allocator); + self.atoms.deinit(allocator); +} + +fn addNlist(self: *ZigObject, allocator: Allocator) !Symbol.Index { + try self.symtab.ensureUnusedCapacity(allocator, 1); + const index = @as(Symbol.Index, @intCast(self.symtab.addOneAssumeCapacity())); + self.symtab.set(index, .{ + .nlist = MachO.null_sym, + .size = 0, + .atom = 0, + }); + return index; +} + +pub fn getDeclVAddr( + self: *ZigObject, + macho_file: *MachO, + decl_index: InternPool.DeclIndex, + reloc_info: link.File.RelocInfo, +) !u64 { + _ = self; + _ = macho_file; + _ = decl_index; + _ = reloc_info; + @panic("TODO getDeclVAddr"); +} + +pub fn resolveSymbols(self: *ZigObject, macho_file: *MachO) void { + _ = self; + _ = macho_file; + @panic("TODO resolveSymbols"); +} + +pub fn resetGlobals(self: *ZigObject, macho_file: *MachO) void { + for (self.symbols.items, 0..) |sym_index, nlist_idx| { + if (!self.symtab.items(.nlist)[nlist_idx].ext()) continue; + const sym = macho_file.getSymbol(sym_index); + const name = sym.name; + sym.* = .{}; + sym.name = name; + } +} + +pub fn calcSymtabSize(self: *ZigObject, macho_file: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + + for (self.symbols.items) |sym_index| { + const sym = macho_file.getSymbol(sym_index); + const file = sym.getFile(macho_file) orelse continue; + if (file.getIndex() != self.index) continue; + if (sym.getAtom(macho_file)) |atom| if (!atom.flags.alive) continue; + sym.flags.output_symtab = true; + if (sym.isLocal()) { + try sym.addExtra(.{ .symtab = self.output_symtab_ctx.nlocals }, macho_file); + self.output_symtab_ctx.nlocals += 1; + } else if (sym.flags.@"export") { + try sym.addExtra(.{ .symtab = self.output_symtab_ctx.nexports }, macho_file); + self.output_symtab_ctx.nexports += 1; + } else { + assert(sym.flags.import); + try sym.addExtra(.{ .symtab = self.output_symtab_ctx.nimports }, macho_file); + self.output_symtab_ctx.nimports += 1; + } + self.output_symtab_ctx.strsize += @as(u32, @intCast(sym.getName(macho_file).len + 1)); + } +} + +pub fn writeSymtab(self: ZigObject, macho_file: *MachO) void { + const tracy = trace(@src()); + defer tracy.end(); + + for (self.symbols.items) |sym_index| { + const sym = macho_file.getSymbol(sym_index); + const file = sym.getFile(macho_file) orelse continue; + if (file.getIndex() != self.index) continue; + const idx = sym.getOutputSymtabIndex(macho_file) orelse continue; + const n_strx = @as(u32, @intCast(macho_file.strtab.items.len)); + macho_file.strtab.appendSliceAssumeCapacity(sym.getName(macho_file)); + macho_file.strtab.appendAssumeCapacity(0); + const out_sym = &macho_file.symtab.items[idx]; + out_sym.n_strx = n_strx; + sym.setOutputSym(macho_file, out_sym); + } +} + +pub fn getInputSection(self: ZigObject, atom: Atom, macho_file: *MachO) macho.section_64 { + _ = self; + var sect = macho_file.sections.items(.header)[atom.out_n_sect]; + sect.addr = 0; + sect.offset = 0; + sect.size = atom.size; + sect.@"align" = atom.alignment.toLog2Units(); + return sect; +} + +pub fn fmtSymtab(self: *ZigObject, macho_file: *MachO) std.fmt.Formatter(formatSymtab) { + return .{ .data = .{ + .self = self, + .macho_file = macho_file, + } }; +} + +const FormatContext = struct { + self: *ZigObject, + macho_file: *MachO, +}; + +fn formatSymtab( + ctx: FormatContext, + comptime unused_fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, +) !void { + _ = unused_fmt_string; + _ = options; + try writer.writeAll(" symbols\n"); + for (ctx.self.symbols.items) |index| { + const sym = ctx.macho_file.getSymbol(index); + try writer.print(" {}\n", .{sym.fmt(ctx.macho_file)}); + } +} + +pub fn fmtAtoms(self: *ZigObject, macho_file: *MachO) std.fmt.Formatter(formatAtoms) { + return .{ .data = .{ + .self = self, + .macho_file = macho_file, + } }; +} + +fn formatAtoms( + ctx: FormatContext, + comptime unused_fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, +) !void { + _ = unused_fmt_string; + _ = options; + try writer.writeAll(" atoms\n"); + for (ctx.self.atoms.items) |atom_index| { + const atom = ctx.macho_file.getAtom(atom_index) orelse continue; + try writer.print(" {}\n", .{atom.fmt(ctx.macho_file)}); + } +} + +const Nlist = struct { + nlist: macho.nlist_64, + size: u64, + atom: Atom.Index, +}; + +const assert = std.debug.assert; +const builtin = @import("builtin"); +const codegen = @import("../../codegen.zig"); +const link = @import("../../link.zig"); +const log = std.log.scoped(.link); +const macho = std.macho; +const mem = std.mem; +const trace = @import("../../tracy.zig").trace; +const std = @import("std"); + +const Air = @import("../../Air.zig"); +const Allocator = std.mem.Allocator; +const Archive = @import("Archive.zig"); +const Atom = @import("Atom.zig"); +const Dwarf = @import("../Dwarf.zig"); +const File = @import("file.zig").File; +const InternPool = @import("../../InternPool.zig"); +const Liveness = @import("../../Liveness.zig"); +const MachO = @import("../MachO.zig"); +const Module = @import("../../Module.zig"); +const Object = @import("Object.zig"); +const Symbol = @import("Symbol.zig"); +const StringTable = @import("../StringTable.zig"); +const Type = @import("../../type.zig").Type; +const Value = @import("../../value.zig").Value; +const TypedValue = @import("../../TypedValue.zig"); +const ZigObject = @This(); diff --git a/src/link/MachO/file.zig b/src/link/MachO/file.zig index 9e19bed7df..7033f58761 100644 --- a/src/link/MachO/file.zig +++ b/src/link/MachO/file.zig @@ -1,4 +1,5 @@ pub const File = union(enum) { + zig_object: *ZigObject, internal: *InternalObject, object: *Object, dylib: *Dylib, @@ -22,6 +23,7 @@ pub const File = union(enum) { _ = unused_fmt_string; _ = options; switch (file) { + .zig_object => |x| try writer.writeAll(x.path), .internal => try writer.writeAll(""), .object => |x| try writer.print("{}", .{x.fmtPath()}), .dylib => |x| try writer.writeAll(x.path), @@ -98,6 +100,7 @@ pub const File = union(enum) { pub const Entry = union(enum) { null: void, + zig_object: ZigObject, internal: InternalObject, object: Object, dylib: Dylib, @@ -114,3 +117,4 @@ const MachO = @import("../MachO.zig"); const Object = @import("Object.zig"); const Dylib = @import("Dylib.zig"); const Symbol = @import("Symbol.zig"); +const ZigObject = @import("ZigObject.zig"); diff --git a/src/link/MachO/relocatable.zig b/src/link/MachO/relocatable.zig index 00513479b9..323bf9d76f 100644 --- a/src/link/MachO/relocatable.zig +++ b/src/link/MachO/relocatable.zig @@ -274,7 +274,7 @@ fn writeAtoms(macho_file: *MachO) !void { const atom = macho_file.getAtom(atom_index).?; assert(atom.flags.alive); const off = atom.value - header.addr; - @memcpy(code[off..][0..atom.size], atom.getCode(macho_file)); + @memcpy(code[off..][0..atom.size], atom.getData(macho_file)); try atom.writeRelocs(macho_file, code[off..][0..atom.size], &relocs); } From 667af6511ff5f2973d48a962e507bf526259eb0f Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Tue, 16 Jan 2024 14:29:12 +0100 Subject: [PATCH 076/133] macho: move all unimplemented function stubs into ZigObject --- src/link/MachO.zig | 130 +++--------------------------- src/link/MachO/ZigObject.zig | 152 ++++++++++++++++++++++++++++++++--- 2 files changed, 151 insertions(+), 131 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 956f06a49a..33f866e464 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -2967,60 +2967,16 @@ pub fn writeCodeSignature(self: *MachO, code_sig: *CodeSignature) !void { try self.base.file.?.pwriteAll(buffer.items, offset); } -fn shrinkAtom(self: *MachO, atom_index: Atom.Index, new_block_size: u64) void { - _ = self; - _ = atom_index; - _ = new_block_size; - // TODO check the new capacity, and if it crosses the size threshold into a big enough - // capacity, insert a free list node for it. -} - -fn growAtom(self: *MachO, atom_index: Atom.Index, new_atom_size: u64, alignment: Alignment) !u64 { - _ = self; - _ = atom_index; - _ = new_atom_size; - _ = alignment; - @panic("TODO growAtom"); -} - pub fn updateFunc(self: *MachO, mod: *Module, func_index: InternPool.Index, air: Air, liveness: Liveness) !void { if (build_options.skip_non_native and builtin.object_format != .macho) { @panic("Attempted to compile for object format that was disabled by build configuration"); } if (self.llvm_object) |llvm_object| return llvm_object.updateFunc(mod, func_index, air, liveness); - - @panic("TODO updateFunc"); + return self.getZigObject().?.updateFunc(self, mod, func_index, air, liveness); } pub fn lowerUnnamedConst(self: *MachO, typed_value: TypedValue, decl_index: InternPool.DeclIndex) !u32 { - _ = self; - _ = typed_value; - _ = decl_index; - - @panic("TODO lowerUnnamedConst"); -} - -const LowerConstResult = union(enum) { - ok: Atom.Index, - fail: *Module.ErrorMsg, -}; - -fn lowerConst( - self: *MachO, - name: []const u8, - tv: TypedValue, - required_alignment: InternPool.Alignment, - sect_id: u8, - src_loc: Module.SrcLoc, -) !LowerConstResult { - _ = self; - _ = name; - _ = tv; - _ = required_alignment; - _ = sect_id; - _ = src_loc; - - @panic("TODO lowerConst"); + return self.getZigObject().?.lowerUnnamedConst(self, typed_value, decl_index); } pub fn updateDecl(self: *MachO, mod: *Module, decl_index: InternPool.DeclIndex) !void { @@ -3028,55 +2984,12 @@ pub fn updateDecl(self: *MachO, mod: *Module, decl_index: InternPool.DeclIndex) @panic("Attempted to compile for object format that was disabled by build configuration"); } if (self.llvm_object) |llvm_object| return llvm_object.updateDecl(mod, decl_index); - - const tracy = trace(@src()); - defer tracy.end(); - - @panic("TODO updateDecl"); -} - -fn updateLazySymbolAtom( - self: *MachO, - sym: link.File.LazySymbol, - atom_index: Atom.Index, - section_index: u8, -) !void { - _ = self; - _ = sym; - _ = atom_index; - _ = section_index; - @panic("TODO updateLazySymbolAtom"); -} - -pub fn getOrCreateAtomForLazySymbol(self: *MachO, sym: link.File.LazySymbol) !Atom.Index { - _ = self; - _ = sym; - @panic("TODO getOrCreateAtomForLazySymbol"); -} - -pub fn getOrCreateAtomForDecl(self: *MachO, decl_index: InternPool.DeclIndex) !Atom.Index { - _ = self; - _ = decl_index; - @panic("TODO getOrCreateAtomForDecl"); -} - -fn getDeclOutputSection(self: *MachO, decl_index: InternPool.DeclIndex) u8 { - _ = self; - _ = decl_index; - @panic("TODO getDeclOutputSection"); -} - -fn updateDeclCode(self: *MachO, decl_index: InternPool.DeclIndex, code: []u8) !u64 { - _ = self; - _ = decl_index; - _ = code; - @panic("TODO updateDeclCode"); + return self.getZigObject().?.updateDecl(self, mod, decl_index); } pub fn updateDeclLineNumber(self: *MachO, module: *Module, decl_index: InternPool.DeclIndex) !void { - if (self.d_sym) |*d_sym| { - try d_sym.dwarf.updateDeclLineNumber(module, decl_index); - } + if (self.llvm_object) |_| return; + return self.getZigObject().?.updateDeclLineNumber(module, decl_index); } pub fn updateExports( @@ -3088,10 +3001,8 @@ pub fn updateExports( if (build_options.skip_non_native and builtin.object_format != .macho) { @panic("Attempted to compile for object format that was disabled by build configuration"); } - if (self.llvm_object) |llvm_object| - return llvm_object.updateExports(mod, exported, exports); - - @panic("TODO updateExports"); + if (self.llvm_object) |llvm_object| return llvm_object.updateExports(mod, exported, exports); + return self.getZigObject().?.updateExports(self, mod, exported, exports); } pub fn deleteDeclExport( @@ -3100,20 +3011,12 @@ pub fn deleteDeclExport( name: InternPool.NullTerminatedString, ) Allocator.Error!void { if (self.llvm_object) |_| return; - _ = decl_index; - _ = name; - @panic("TODO deleteDeclExport"); -} - -fn freeUnnamedConsts(self: *MachO, decl_index: InternPool.DeclIndex) void { - _ = self; - _ = decl_index; - @panic("TODO freeUnnamedConst"); + return self.getZigObject().?.deleteDeclExport(self, decl_index, name); } pub fn freeDecl(self: *MachO, decl_index: InternPool.DeclIndex) void { if (self.llvm_object) |llvm_object| return llvm_object.freeDecl(decl_index); - @panic("TODO freeDecl"); + return self.getZigObject().?.freeDecl(decl_index); } pub fn getDeclVAddr(self: *MachO, decl_index: InternPool.DeclIndex, reloc_info: link.File.RelocInfo) !u64 { @@ -3127,25 +3030,16 @@ pub fn lowerAnonDecl( explicit_alignment: InternPool.Alignment, src_loc: Module.SrcLoc, ) !codegen.Result { - _ = self; - _ = decl_val; - _ = explicit_alignment; - _ = src_loc; - @panic("TODO lowerAnonDecl"); + return self.getZigObject().?.lowerAnonDecl(self, decl_val, explicit_alignment, src_loc); } pub fn getAnonDeclVAddr(self: *MachO, decl_val: InternPool.Index, reloc_info: link.File.RelocInfo) !u64 { assert(self.llvm_object == null); - _ = decl_val; - _ = reloc_info; - @panic("TODO getAnonDeclVAddr"); + return self.getZigObject().?.getAnonDeclVAddr(self, decl_val, reloc_info); } pub fn getGlobalSymbol(self: *MachO, name: []const u8, lib_name: ?[]const u8) !u32 { - _ = self; - _ = name; - _ = lib_name; - @panic("TODO getGlobalSymbol"); + return self.getZigObject().?.getGlobalSymbol(self, name, lib_name); } pub fn padToIdeal(actual_size: anytype) @TypeOf(actual_size) { diff --git a/src/link/MachO/ZigObject.zig b/src/link/MachO/ZigObject.zig index e8adebc5c0..578c059345 100644 --- a/src/link/MachO/ZigObject.zig +++ b/src/link/MachO/ZigObject.zig @@ -33,19 +33,6 @@ fn addNlist(self: *ZigObject, allocator: Allocator) !Symbol.Index { return index; } -pub fn getDeclVAddr( - self: *ZigObject, - macho_file: *MachO, - decl_index: InternPool.DeclIndex, - reloc_info: link.File.RelocInfo, -) !u64 { - _ = self; - _ = macho_file; - _ = decl_index; - _ = reloc_info; - @panic("TODO getDeclVAddr"); -} - pub fn resolveSymbols(self: *ZigObject, macho_file: *MachO) void { _ = self; _ = macho_file; @@ -115,6 +102,145 @@ pub fn getInputSection(self: ZigObject, atom: Atom, macho_file: *MachO) macho.se return sect; } +pub fn getDeclVAddr( + self: *ZigObject, + macho_file: *MachO, + decl_index: InternPool.DeclIndex, + reloc_info: link.File.RelocInfo, +) !u64 { + _ = self; + _ = macho_file; + _ = decl_index; + _ = reloc_info; + @panic("TODO getDeclVAddr"); +} + +pub fn getAnonDeclVAddr( + self: *ZigObject, + macho_file: *MachO, + decl_val: InternPool.Index, + reloc_info: link.File.RelocInfo, +) !u64 { + _ = self; + _ = macho_file; + _ = decl_val; + _ = reloc_info; + @panic("TODO getAnonDeclVAddr"); +} + +pub fn lowerAnonDecl( + self: *ZigObject, + macho_file: *MachO, + decl_val: InternPool.Index, + explicit_alignment: InternPool.Alignment, + src_loc: Module.SrcLoc, +) !codegen.Result { + _ = self; + _ = macho_file; + _ = decl_val; + _ = explicit_alignment; + _ = src_loc; + @panic("TODO lowerAnonDecl"); +} + +pub fn freeDecl(self: *ZigObject, macho_file: *MachO, decl_index: InternPool.DeclIndex) void { + _ = self; + _ = macho_file; + _ = decl_index; + @panic("TODO freeDecl"); +} + +pub fn updateFunc( + self: *ZigObject, + macho_file: *MachO, + mod: *Module, + func_index: InternPool.Index, + air: Air, + liveness: Liveness, +) !void { + _ = self; + _ = macho_file; + _ = mod; + _ = func_index; + _ = air; + _ = liveness; + @panic("TODO updateFunc"); +} + +pub fn updateDecl( + self: *ZigObject, + macho_file: *MachO, + mod: *Module, + decl_index: InternPool.DeclIndex, +) link.File.UpdateDeclError!void { + _ = self; + _ = macho_file; + _ = mod; + _ = decl_index; + @panic("TODO updateDecl"); +} + +pub fn lowerUnnamedConst( + self: *ZigObject, + macho_file: *MachO, + typed_value: TypedValue, + decl_index: InternPool.DeclIndex, +) !u32 { + _ = self; + _ = macho_file; + _ = typed_value; + _ = decl_index; + @panic("TODO lowerUnnamedConst"); +} + +pub fn updateExports( + self: *ZigObject, + macho_file: *MachO, + mod: *Module, + exported: Module.Exported, + exports: []const *Module.Export, +) link.File.UpdateExportsError!void { + _ = self; + _ = macho_file; + _ = mod; + _ = exported; + _ = exports; + @panic("TODO updateExports"); +} + +/// Must be called only after a successful call to `updateDecl`. +pub fn updateDeclLineNumber( + self: *ZigObject, + mod: *Module, + decl_index: InternPool.DeclIndex, +) !void { + _ = self; + _ = mod; + _ = decl_index; + @panic("TODO updateDeclLineNumber"); +} + +pub fn deleteDeclExport( + self: *ZigObject, + macho_file: *MachO, + decl_index: InternPool.DeclIndex, + name: InternPool.NullTerminatedString, +) void { + _ = self; + _ = macho_file; + _ = decl_index; + _ = name; + @panic("TODO deleteDeclExport"); +} + +pub fn getGlobalSymbol(self: *ZigObject, macho_file: *MachO, name: []const u8, lib_name: ?[]const u8) !u32 { + _ = self; + _ = macho_file; + _ = name; + _ = lib_name; + @panic("TODO getGlobalSymbol"); +} + pub fn fmtSymtab(self: *ZigObject, macho_file: *MachO) std.fmt.Formatter(formatSymtab) { return .{ .data = .{ .self = self, From 8437ba6a795f015ad34c33b8ac768def24cfbc5e Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Tue, 16 Jan 2024 23:14:05 +0100 Subject: [PATCH 077/133] macho: patch up more holes with ZigObject --- src/link/MachO.zig | 66 +++++++++++++++++++++++------------- src/link/MachO/ZigObject.zig | 28 +++++++++++++++ 2 files changed, 70 insertions(+), 24 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 33f866e464..a028adde8a 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -236,31 +236,32 @@ pub fn createEmpty( try self.getZigObject().?.init(self); // TODO init metadata + // TODO init dwarf - if (comp.config.debug_format != .strip) { - // Create dSYM bundle. - log.debug("creating {s}.dSYM bundle", .{emit.sub_path}); + // if (comp.config.debug_format != .strip) { + // // Create dSYM bundle. + // log.debug("creating {s}.dSYM bundle", .{emit.sub_path}); - const d_sym_path = try std.fmt.allocPrint( - arena, - "{s}.dSYM" ++ fs.path.sep_str ++ "Contents" ++ fs.path.sep_str ++ "Resources" ++ fs.path.sep_str ++ "DWARF", - .{emit.sub_path}, - ); + // const d_sym_path = try std.fmt.allocPrint( + // arena, + // "{s}.dSYM" ++ fs.path.sep_str ++ "Contents" ++ fs.path.sep_str ++ "Resources" ++ fs.path.sep_str ++ "DWARF", + // .{emit.sub_path}, + // ); - var d_sym_bundle = try emit.directory.handle.makeOpenPath(d_sym_path, .{}); - defer d_sym_bundle.close(); + // var d_sym_bundle = try emit.directory.handle.makeOpenPath(d_sym_path, .{}); + // defer d_sym_bundle.close(); - const d_sym_file = try d_sym_bundle.createFile(emit.sub_path, .{ - .truncate = false, - .read = true, - }); + // const d_sym_file = try d_sym_bundle.createFile(emit.sub_path, .{ + // .truncate = false, + // .read = true, + // }); - self.d_sym = .{ - .allocator = gpa, - .dwarf = link.File.Dwarf.init(&self.base, .dwarf32), - .file = d_sym_file, - }; - } + // self.d_sym = .{ + // .allocator = gpa, + // .dwarf = link.File.Dwarf.init(&self.base, .dwarf32), + // .file = d_sym_file, + // }; + // } } } @@ -379,6 +380,7 @@ pub fn flushModule(self: *MachO, arena: Allocator, prog_node: *std.Progress.Node // --verbose-link if (comp.verbose_link) try self.dumpArgv(comp); + if (self.getZigObject()) |zo| try zo.flushModule(self); if (self.base.isStaticLib()) return self.flushStaticLib(comp, module_obj_path); if (self.base.isObject()) return relocatable.flush(self, comp, module_obj_path); @@ -1311,6 +1313,8 @@ pub fn resolveSymbols(self: *MachO) !void { const tracy = trace(@src()); defer tracy.end(); + // Resolve symbols in the ZigObject. For now, we assume that it's always live. + if (self.getZigObject()) |zo| zo.asFile().resolveSymbols(self); // Resolve symbols on the set of all objects and shared objects (even if some are unneeded). for (self.objects.items) |index| self.getFile(index).?.resolveSymbols(self); for (self.dylibs.items) |index| self.getFile(index).?.resolveSymbols(self); @@ -1319,6 +1323,7 @@ pub fn resolveSymbols(self: *MachO) !void { self.markLive(); // Reset state of all globals after marking live objects. + if (self.getZigObject()) |zo| zo.asFile().resetGlobals(self); for (self.objects.items) |index| self.getFile(index).?.resetGlobals(self); for (self.dylibs.items) |index| self.getFile(index).?.resetGlobals(self); @@ -1332,6 +1337,7 @@ pub fn resolveSymbols(self: *MachO) !void { } // Re-resolve the symbols. + if (self.getZigObject()) |zo| zo.resolveSymbols(self); for (self.objects.items) |index| self.getFile(index).?.resolveSymbols(self); for (self.dylibs.items) |index| self.getFile(index).?.resolveSymbols(self); } @@ -1351,6 +1357,7 @@ fn markLive(self: *MachO) void { if (file == .object) file.object.alive = true; } } + if (self.getZigObject()) |zo| zo.markLive(self); for (self.objects.items) |index| { const object = self.getFile(index).?.object; if (object.alive) object.markLive(self); @@ -1449,12 +1456,23 @@ fn createObjcSections(self: *MachO) !void { } fn claimUnresolved(self: *MachO) error{OutOfMemory}!void { - for (self.objects.items) |index| { - const object = self.getFile(index).?.object; + const gpa = self.base.comp.gpa; - for (object.symbols.items, 0..) |sym_index, i| { + var objects = try std.ArrayList(File.Index).initCapacity(gpa, self.objects.items.len + 1); + defer objects.deinit(); + if (self.getZigObject()) |zo| objects.appendAssumeCapacity(zo.index); + objects.appendSliceAssumeCapacity(self.objects.items); + + for (objects.items) |index| { + const file = self.getFile(index).?; + + for (file.getSymbols(), 0..) |sym_index, i| { const nlist_idx = @as(Symbol.Index, @intCast(i)); - const nlist = object.symtab.items(.nlist)[nlist_idx]; + const nlist = switch (file) { + .object => |x| x.symtab.items(.nlist)[nlist_idx], + .zig_object => |x| x.symtab.items(.nlist)[nlist_idx], + else => unreachable, + }; if (!nlist.ext()) continue; if (!nlist.undf()) continue; diff --git a/src/link/MachO/ZigObject.zig b/src/link/MachO/ZigObject.zig index 578c059345..da9fafea3d 100644 --- a/src/link/MachO/ZigObject.zig +++ b/src/link/MachO/ZigObject.zig @@ -49,6 +49,24 @@ pub fn resetGlobals(self: *ZigObject, macho_file: *MachO) void { } } +pub fn markLive(self: *ZigObject, macho_file: *MachO) void { + const tracy = trace(@src()); + defer tracy.end(); + + for (self.symbols.items, 0..) |index, nlist_idx| { + const nlist = self.symtab.items(.nlist)[nlist_idx]; + if (!nlist.ext()) continue; + + const sym = macho_file.getSymbol(index); + const file = sym.getFile(macho_file) orelse continue; + const should_keep = nlist.undf() or (nlist.tentative() and !sym.flags.tentative); + if (should_keep and file == .object and !file.object.alive) { + file.object.alive = true; + file.object.markLive(macho_file); + } + } +} + pub fn calcSymtabSize(self: *ZigObject, macho_file: *MachO) !void { const tracy = trace(@src()); defer tracy.end(); @@ -102,6 +120,12 @@ pub fn getInputSection(self: ZigObject, atom: Atom, macho_file: *MachO) macho.se return sect; } +pub fn flushModule(self: *ZigObject, macho_file: *MachO) !void { + _ = self; + _ = macho_file; + @panic("TODO flushModule"); +} + pub fn getDeclVAddr( self: *ZigObject, macho_file: *MachO, @@ -241,6 +265,10 @@ pub fn getGlobalSymbol(self: *ZigObject, macho_file: *MachO, name: []const u8, l @panic("TODO getGlobalSymbol"); } +pub fn asFile(self: *ZigObject) File { + return .{ .zig_object = self }; +} + pub fn fmtSymtab(self: *ZigObject, macho_file: *MachO) std.fmt.Formatter(formatSymtab) { return .{ .data = .{ .self = self, From b66911370b3a5376c8f383dc0a187ffe9c3bbeb2 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Wed, 17 Jan 2024 06:30:30 +0100 Subject: [PATCH 078/133] macho: forward parsed -compatibility_version to the linker --- src/main.zig | 1 + 1 file changed, 1 insertion(+) diff --git a/src/main.zig b/src/main.zig index 857315346e..c8b0d95520 100644 --- a/src/main.zig +++ b/src/main.zig @@ -3218,6 +3218,7 @@ fn buildOutputType( .clang_passthrough_mode = clang_passthrough_mode, .clang_preprocessor_mode = clang_preprocessor_mode, .version = optional_version, + .compatibility_version = compatibility_version, .libc_installation = if (create_module.libc_installation) |*lci| lci else null, .verbose_cc = verbose_cc, .verbose_link = verbose_link, From 9509fadbe38e77bc0f8b079c4d9def2937d81322 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Wed, 17 Jan 2024 07:17:58 +0100 Subject: [PATCH 079/133] macho: split symbol.flags.got into needs_got and has_got --- src/link/MachO.zig | 6 +-- src/link/MachO/Atom.zig | 6 +-- src/link/MachO/Object.zig | 4 +- src/link/MachO/Symbol.zig | 5 ++- src/link/MachO/ZigObject.zig | 87 +++++++++++++++++++++++++++++++++--- src/link/MachO/synthetic.zig | 1 + 6 files changed, 94 insertions(+), 15 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index a028adde8a..856a65594c 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -1601,18 +1601,18 @@ fn scanRelocs(self: *MachO) !void { if (self.dyld_stub_binder_index) |index| { const sym = self.getSymbol(index); - if (sym.getFile(self) != null) sym.flags.got = true; + if (sym.getFile(self) != null) sym.flags.needs_got = true; } if (self.objc_msg_send_index) |index| { const sym = self.getSymbol(index); if (sym.getFile(self) != null) - sym.flags.got = true; // TODO is it always needed, or only if we are synthesising fast stubs? + sym.flags.needs_got = true; // TODO is it always needed, or only if we are synthesising fast stubs? } for (self.symbols.items, 0..) |*symbol, i| { const index = @as(Symbol.Index, @intCast(i)); - if (symbol.flags.got) { + if (symbol.flags.needs_got) { log.debug("'{s}' needs GOT", .{symbol.getName(self)}); try self.got.addSymbol(index, self); } diff --git a/src/link/MachO/Atom.zig b/src/link/MachO/Atom.zig index 3e9884f770..222129df0a 100644 --- a/src/link/MachO/Atom.zig +++ b/src/link/MachO/Atom.zig @@ -204,7 +204,7 @@ pub fn scanRelocs(self: Atom, macho_file: *MachO) !void { (symbol.flags.@"export" and (symbol.flags.weak or symbol.flags.interposable)) or macho_file.getTarget().cpu.arch == .aarch64) // TODO relax on arm64 { - symbol.flags.got = true; + symbol.flags.needs_got = true; if (symbol.flags.weak) { macho_file.binds_to_weak = true; } @@ -212,7 +212,7 @@ pub fn scanRelocs(self: Atom, macho_file: *MachO) !void { }, .got => { - rel.getTargetSymbol(macho_file).flags.got = true; + rel.getTargetSymbol(macho_file).flags.needs_got = true; }, .tlv, @@ -452,7 +452,7 @@ fn resolveRelocInner( assert(rel.tag == .@"extern"); assert(rel.meta.length == 2); assert(rel.meta.pcrel); - if (rel.getTargetSymbol(macho_file).flags.got) { + if (rel.getTargetSymbol(macho_file).flags.has_got) { try writer.writeInt(i32, @intCast(G + A - P), .little); } else { try x86_64.relaxGotLoad(code[rel_offset - 3 ..]); diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index d31aaf667f..662bab9b1f 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -1105,10 +1105,10 @@ pub fn scanRelocs(self: Object, macho_file: *MachO) !void { if (!rec.alive) continue; if (rec.getFde(macho_file)) |fde| { if (fde.getCie(macho_file).getPersonality(macho_file)) |sym| { - sym.flags.got = true; + sym.flags.needs_got = true; } } else if (rec.getPersonality(macho_file)) |sym| { - sym.flags.got = true; + sym.flags.needs_got = true; } } } diff --git a/src/link/MachO/Symbol.zig b/src/link/MachO/Symbol.zig index c6cf3f9631..421e9bc02c 100644 --- a/src/link/MachO/Symbol.zig +++ b/src/link/MachO/Symbol.zig @@ -118,7 +118,7 @@ pub fn getAddress(symbol: Symbol, opts: struct { } pub fn getGotAddress(symbol: Symbol, macho_file: *MachO) u64 { - if (!symbol.flags.got) return 0; + if (!symbol.flags.has_got) return 0; const extra = symbol.getExtra(macho_file).?; return macho_file.got.getAddress(extra.got, macho_file); } @@ -349,7 +349,8 @@ pub const Flags = packed struct { output_symtab: bool = false, /// Whether the symbol contains __got indirection. - got: bool = false, + needs_got: bool = false, + has_got: bool = false, /// Whether the symbols contains __stubs indirection. stubs: bool = false, diff --git a/src/link/MachO/ZigObject.zig b/src/link/MachO/ZigObject.zig index da9fafea3d..5030527617 100644 --- a/src/link/MachO/ZigObject.zig +++ b/src/link/MachO/ZigObject.zig @@ -197,11 +197,88 @@ pub fn updateDecl( mod: *Module, decl_index: InternPool.DeclIndex, ) link.File.UpdateDeclError!void { - _ = self; - _ = macho_file; - _ = mod; - _ = decl_index; - @panic("TODO updateDecl"); + const tracy = trace(@src()); + defer tracy.end(); + + const decl = mod.declPtr(decl_index); + + if (decl.val.getExternFunc(mod)) |_| { + return; + } + + if (decl.isExtern(mod)) { + // Extern variable gets a __got entry only + const variable = decl.getOwnedVariable(mod).?; + const name = mod.intern_pool.stringToSlice(decl.name); + const lib_name = mod.intern_pool.stringToSliceUnwrap(variable.lib_name); + const index = try self.getGlobalSymbol(macho_file, name, lib_name); + macho_file.getSymbol(index).flags.needs_got = true; + return; + } + + // const is_threadlocal = if (decl.val.getVariable(mod)) |variable| + // variable.is_threadlocal and comp.config.any_non_single_threaded + // else + // false; + // if (is_threadlocal) return self.updateThreadlocalVariable(mod, decl_index); + + // const atom_index = try self.getOrCreateAtomForDecl(decl_index); + // const sym_index = self.getAtom(atom_index).getSymbolIndex().?; + // Atom.freeRelocations(self, atom_index); + + // const comp = macho_file.base.comp; + // const gpa = comp.gpa; + + // var code_buffer = std.ArrayList(u8).init(gpa); + // defer code_buffer.deinit(); + + // var decl_state: ?Dwarf.DeclState = if (self.d_sym) |*d_sym| + // try d_sym.dwarf.initDeclState(mod, decl_index) + // else + // null; + // defer if (decl_state) |*ds| ds.deinit(); + + // const decl_val = if (decl.val.getVariable(mod)) |variable| Value.fromInterned(variable.init) else decl.val; + // const res = if (decl_state) |*ds| + // try codegen.generateSymbol(&self.base, decl.srcLoc(mod), .{ + // .ty = decl.ty, + // .val = decl_val, + // }, &code_buffer, .{ + // .dwarf = ds, + // }, .{ + // .parent_atom_index = sym_index, + // }) + // else + // try codegen.generateSymbol(&self.base, decl.srcLoc(mod), .{ + // .ty = decl.ty, + // .val = decl_val, + // }, &code_buffer, .none, .{ + // .parent_atom_index = sym_index, + // }); + + // const code = switch (res) { + // .ok => code_buffer.items, + // .fail => |em| { + // decl.analysis = .codegen_failure; + // try mod.failed_decls.put(mod.gpa, decl_index, em); + // return; + // }, + // }; + // const addr = try self.updateDeclCode(decl_index, code); + + // if (decl_state) |*ds| { + // try self.d_sym.?.dwarf.commitDeclState( + // mod, + // decl_index, + // addr, + // self.getAtom(atom_index).size, + // ds, + // ); + // } + + // // Since we updated the vaddr and the size, each corresponding export symbol also + // // needs to be updated. + // try self.updateExports(mod, .{ .decl_index = decl_index }, mod.getDeclExports(decl_index)); } pub fn lowerUnnamedConst( diff --git a/src/link/MachO/synthetic.zig b/src/link/MachO/synthetic.zig index c497bc5444..cc5643c7ec 100644 --- a/src/link/MachO/synthetic.zig +++ b/src/link/MachO/synthetic.zig @@ -13,6 +13,7 @@ pub const GotSection = struct { const entry = try got.symbols.addOne(gpa); entry.* = sym_index; const symbol = macho_file.getSymbol(sym_index); + symbol.flags.has_got = true; try symbol.addExtra(.{ .got = index }, macho_file); } From bd9d8bd462799c552eed9812484add68e498a3ee Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Wed, 17 Jan 2024 08:33:58 +0100 Subject: [PATCH 080/133] macho: create Atom for Decl in ZigObject --- src/link/MachO/Atom.zig | 6 +- src/link/MachO/Object.zig | 4 +- src/link/MachO/Symbol.zig | 7 +- src/link/MachO/ZigObject.zig | 168 +++++++++++++++++++++++++---------- 4 files changed, 131 insertions(+), 54 deletions(-) diff --git a/src/link/MachO/Atom.zig b/src/link/MachO/Atom.zig index 222129df0a..98223a9ad2 100644 --- a/src/link/MachO/Atom.zig +++ b/src/link/MachO/Atom.zig @@ -55,7 +55,7 @@ pub fn getData(self: Atom, macho_file: *MachO) []const u8 { pub fn getRelocs(self: Atom, macho_file: *MachO) []const Relocation { return switch (self.getFile(macho_file)) { - .zig_object => @panic("TODO Atom.getRelocs"), + .zig_object => |x| x.getAtomRelocs(self), .object => |x| x.getAtomRelocs(self), else => unreachable, }; @@ -890,8 +890,8 @@ pub const Flags = packed struct { }; pub const Loc = struct { - pos: usize = 0, - len: usize = 0, + pos: u32 = 0, + len: u32 = 0, }; pub const Alignment = @import("../../InternPool.zig").Alignment; diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index 662bab9b1f..3fe4142ebc 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -608,7 +608,7 @@ fn initRelocs(self: *Object, macho_file: *MachO) !void { for (slice.items(.header), slice.items(.relocs), slice.items(.subsections)) |sect, relocs, subsections| { if (sect.isZerofill()) continue; - var next_reloc: usize = 0; + var next_reloc: u32 = 0; for (subsections.items) |subsection| { const atom = macho_file.getAtom(subsection.atom).?; if (!atom.flags.alive) continue; @@ -1767,7 +1767,7 @@ const Subsection = struct { off: u64, }; -const Nlist = struct { +pub const Nlist = struct { nlist: macho.nlist_64, size: u64, atom: Atom.Index, diff --git a/src/link/MachO/Symbol.zig b/src/link/MachO/Symbol.zig index 421e9bc02c..a86aea80b5 100644 --- a/src/link/MachO/Symbol.zig +++ b/src/link/MachO/Symbol.zig @@ -19,7 +19,7 @@ out_n_sect: u16 = 0, /// Index of the source nlist this symbol references. /// Use `getNlist` to pull the nlist from the relevant file. -nlist_idx: u32 = 0, +nlist_idx: Index = 0, /// Misc flags for the symbol packaged as packed struct for compression. flags: Flags = .{}, @@ -352,6 +352,10 @@ pub const Flags = packed struct { needs_got: bool = false, has_got: bool = false, + /// Whether the symbol contains __got_zig indirection. + needs_zig_got: bool = false, + has_zig_got: bool = false, + /// Whether the symbols contains __stubs indirection. stubs: bool = false, @@ -386,5 +390,6 @@ const std = @import("std"); const Atom = @import("Atom.zig"); const File = @import("file.zig").File; const MachO = @import("../MachO.zig"); +const Nlist = Object.Nlist; const Object = @import("Object.zig"); const Symbol = @This(); diff --git a/src/link/MachO/ZigObject.zig b/src/link/MachO/ZigObject.zig index 5030527617..b18b7b4c9d 100644 --- a/src/link/MachO/ZigObject.zig +++ b/src/link/MachO/ZigObject.zig @@ -7,6 +7,12 @@ symtab: std.MultiArrayList(Nlist) = .{}, symbols: std.ArrayListUnmanaged(Symbol.Index) = .{}, atoms: std.ArrayListUnmanaged(Atom.Index) = .{}, +/// Table of tracked Decls. +decls: DeclTable = .{}, + +/// A table of relocations. +relocs: RelocationTable = .{}, + output_symtab_ctx: MachO.SymtabCtx = .{}, pub fn init(self: *ZigObject, macho_file: *MachO) !void { @@ -20,6 +26,19 @@ pub fn deinit(self: *ZigObject, allocator: Allocator) void { self.symtab.deinit(allocator); self.symbols.deinit(allocator); self.atoms.deinit(allocator); + + { + var it = self.decls.iterator(); + while (it.next()) |entry| { + entry.value_ptr.exports.deinit(allocator); + } + self.decls.deinit(allocator); + } + + for (self.relocs.items) |*list| { + list.deinit(allocator); + } + self.relocs.deinit(allocator); } fn addNlist(self: *ZigObject, allocator: Allocator) !Symbol.Index { @@ -33,6 +52,38 @@ fn addNlist(self: *ZigObject, allocator: Allocator) !Symbol.Index { return index; } +pub fn addAtom(self: *ZigObject, macho_file: *MachO) !Symbol.Index { + const gpa = macho_file.base.comp.gpa; + const atom_index = try macho_file.addAtom(); + const symbol_index = try macho_file.addSymbol(); + const nlist_index = try self.addNlist(gpa); + + try self.atoms.append(gpa, atom_index); + try self.symbols.append(gpa, symbol_index); + + const atom = macho_file.getAtom(atom_index).?; + atom.file = self.index; + + const symbol = macho_file.getSymbol(symbol_index); + symbol.file = self.index; + symbol.atom = atom_index; + + self.symtab.items(.atom)[nlist_index] = atom_index; + symbol.nlist_idx = nlist_index; + + const relocs_index = @as(u32, @intCast(self.relocs.items.len)); + const relocs = try self.relocs.addOne(gpa); + relocs.* = .{}; + atom.relocs = .{ .pos = relocs_index, .len = 0 }; + + return symbol_index; +} + +pub fn getAtomRelocs(self: *ZigObject, atom: Atom) []const Relocation { + const relocs = self.relocs.items[atom.relocs.pos]; + return relocs.items[0..atom.relocs.len]; +} + pub fn resolveSymbols(self: *ZigObject, macho_file: *MachO) void { _ = self; _ = macho_file; @@ -216,54 +267,35 @@ pub fn updateDecl( return; } - // const is_threadlocal = if (decl.val.getVariable(mod)) |variable| - // variable.is_threadlocal and comp.config.any_non_single_threaded - // else - // false; - // if (is_threadlocal) return self.updateThreadlocalVariable(mod, decl_index); + const sym_index = try self.getOrCreateMetadataForDecl(macho_file, decl_index); + // TODO: free relocs if any - // const atom_index = try self.getOrCreateAtomForDecl(decl_index); - // const sym_index = self.getAtom(atom_index).getSymbolIndex().?; - // Atom.freeRelocations(self, atom_index); + const gpa = macho_file.base.comp.gpa; + var code_buffer = std.ArrayList(u8).init(gpa); + defer code_buffer.deinit(); - // const comp = macho_file.base.comp; - // const gpa = comp.gpa; + var decl_state: ?Dwarf.DeclState = null; // TODO: Dwarf + defer if (decl_state) |*ds| ds.deinit(); - // var code_buffer = std.ArrayList(u8).init(gpa); - // defer code_buffer.deinit(); + const decl_val = if (decl.val.getVariable(mod)) |variable| Value.fromInterned(variable.init) else decl.val; + const dio: codegen.DebugInfoOutput = if (decl_state) |*ds| .{ .dwarf = ds } else .none; + const res = + try codegen.generateSymbol(&macho_file.base, decl.srcLoc(mod), .{ + .ty = decl.ty, + .val = decl_val, + }, &code_buffer, dio, .{ + .parent_atom_index = sym_index, + }); - // var decl_state: ?Dwarf.DeclState = if (self.d_sym) |*d_sym| - // try d_sym.dwarf.initDeclState(mod, decl_index) - // else - // null; - // defer if (decl_state) |*ds| ds.deinit(); - - // const decl_val = if (decl.val.getVariable(mod)) |variable| Value.fromInterned(variable.init) else decl.val; - // const res = if (decl_state) |*ds| - // try codegen.generateSymbol(&self.base, decl.srcLoc(mod), .{ - // .ty = decl.ty, - // .val = decl_val, - // }, &code_buffer, .{ - // .dwarf = ds, - // }, .{ - // .parent_atom_index = sym_index, - // }) - // else - // try codegen.generateSymbol(&self.base, decl.srcLoc(mod), .{ - // .ty = decl.ty, - // .val = decl_val, - // }, &code_buffer, .none, .{ - // .parent_atom_index = sym_index, - // }); - - // const code = switch (res) { - // .ok => code_buffer.items, - // .fail => |em| { - // decl.analysis = .codegen_failure; - // try mod.failed_decls.put(mod.gpa, decl_index, em); - // return; - // }, - // }; + const code = switch (res) { + .ok => code_buffer.items, + .fail => |em| { + decl.analysis = .codegen_failure; + try mod.failed_decls.put(mod.gpa, decl_index, em); + return; + }, + }; + _ = code; // const addr = try self.updateDeclCode(decl_index, code); // if (decl_state) |*ds| { @@ -342,6 +374,32 @@ pub fn getGlobalSymbol(self: *ZigObject, macho_file: *MachO, name: []const u8, l @panic("TODO getGlobalSymbol"); } +pub fn getOrCreateMetadataForDecl( + self: *ZigObject, + macho_file: *MachO, + decl_index: InternPool.DeclIndex, +) !Symbol.Index { + const gpa = macho_file.base.comp.gpa; + const gop = try self.decls.getOrPut(gpa, decl_index); + if (!gop.found_existing) { + const any_non_single_threaded = macho_file.base.comp.config.any_non_single_threaded; + const sym_index = try self.addAtom(macho_file); + const mod = macho_file.base.comp.module.?; + const decl = mod.declPtr(decl_index); + const sym = macho_file.getSymbol(self.symbols.items[sym_index]); + if (decl.getOwnedVariable(mod)) |variable| { + if (variable.is_threadlocal and any_non_single_threaded) { + sym.flags.tlv = true; + } + } + if (!sym.flags.tlv) { + sym.flags.needs_zig_got = true; + } + gop.value_ptr.* = .{ .symbol_index = sym_index }; + } + return gop.value_ptr.symbol_index; +} + pub fn asFile(self: *ZigObject) File { return .{ .zig_object = self }; } @@ -395,11 +453,23 @@ fn formatAtoms( } } -const Nlist = struct { - nlist: macho.nlist_64, - size: u64, - atom: Atom.Index, +const DeclMetadata = struct { + symbol_index: Symbol.Index, + /// A list of all exports aliases of this Decl. + exports: std.ArrayListUnmanaged(Symbol.Index) = .{}, + + fn @"export"(m: DeclMetadata, zig_object: *ZigObject, macho_file: *MachO, name: []const u8) ?*u32 { + for (m.exports.items) |*exp| { + const nlist = zig_object.symtab.items(.nlist)[exp.*]; + const exp_name = macho_file.strings.getAssumeExists(nlist.n_strx); + if (mem.eql(u8, name, exp_name)) return exp; + } + return null; + } }; +const DeclTable = std.AutoHashMapUnmanaged(InternPool.DeclIndex, DeclMetadata); + +const RelocationTable = std.ArrayListUnmanaged(std.ArrayListUnmanaged(Relocation)); const assert = std.debug.assert; const builtin = @import("builtin"); @@ -420,8 +490,10 @@ const File = @import("file.zig").File; const InternPool = @import("../../InternPool.zig"); const Liveness = @import("../../Liveness.zig"); const MachO = @import("../MachO.zig"); +const Nlist = Object.Nlist; const Module = @import("../../Module.zig"); const Object = @import("Object.zig"); +const Relocation = @import("Relocation.zig"); const Symbol = @import("Symbol.zig"); const StringTable = @import("../StringTable.zig"); const Type = @import("../../type.zig").Type; From 0b2133d4412e8f3c67a1eb6ddbb43afc02196703 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Wed, 17 Jan 2024 14:55:40 +0100 Subject: [PATCH 081/133] macho: init metadata and partially implement updateDecl --- src/link/MachO.zig | 79 +++++++++++++++++++++++++++++++++++- src/link/MachO/ZigObject.zig | 58 +++++++++++++++++++++++++- 2 files changed, 134 insertions(+), 3 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 856a65594c..912ac6a43e 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -82,6 +82,18 @@ lazy_bind: LazyBindSection = .{}, export_trie: ExportTrieSection = .{}, unwind_info: UnwindInfo = .{}, +/// Tracked loadable segments during incremental linking. +zig_text_seg_index: ?u8 = null, +zig_data_const_seg_index: ?u8 = null, +zig_data_seg_index: ?u8 = null, + +/// Tracked section headers with incremental updates to Zig object. +zig_text_section_index: ?u8 = null, +zig_data_const_section_index: ?u8 = null, +zig_data_section_index: ?u8 = null, +zig_bss_section_index: ?u8 = null, +zig_got_section_index: ?u8 = null, + has_tlv: bool = false, binds_to_weak: bool = false, weak_defines: bool = false, @@ -234,8 +246,11 @@ pub fn createEmpty( } }); self.zig_object = index; try self.getZigObject().?.init(self); + try self.initMetadata(.{ + .symbol_count_hint = options.symbol_count_hint, + .program_code_size_hint = options.program_code_size_hint, + }); - // TODO init metadata // TODO init dwarf // if (comp.config.debug_format != .strip) { @@ -3103,6 +3118,45 @@ fn findFreeSpace(self: *MachO, object_size: u64, min_alignment: u32) u64 { return start; } +const InitMetadataOptions = struct { + symbol_count_hint: u64, + program_code_size_hint: u64, +}; + +// TODO: move to ZigObject +// TODO: bring back pre-alloc of segments/sections +fn initMetadata(self: *MachO, options: InitMetadataOptions) !void { + _ = options; + + if (!self.base.isRelocatable()) { + // TODO: If we are not emitting a relocatable object file, init segments. + } + + if (self.zig_text_section_index == null) { + self.zig_text_section_index = try self.addSection("__TEXT", "__text", .{ + .flags = macho.S_REGULAR | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS, + }); + } + + if (self.zig_got_section_index == null and !self.base.isRelocatable()) { + self.zig_got_section_index = try self.addSection("__DATA_CONST", "__got_zig", .{}); + } + + if (self.zig_data_const_section_index == null) { + self.zig_data_const_section_index = try self.addSection("__DATA_CONST", "__const", .{}); + } + + if (self.zig_data_section_index == null) { + self.zig_data_section_index = try self.addSection("__DATA", "__data", .{}); + } + + if (self.zig_bss_section_index == null) { + self.zig_bss_section_index = try self.addSection("__DATA", "_bss", .{ + .flags = macho.S_ZEROFILL, + }); + } +} + pub fn getTarget(self: MachO) std.Target { return self.base.comp.root_mod.resolved_target.result; } @@ -3148,6 +3202,29 @@ inline fn requiresThunks(self: MachO) bool { return self.getTarget().cpu.arch == .aarch64; } +pub fn addSegment(self: *MachO, name: []const u8, opts: struct { + vmaddr: u64 = 0, + vmsize: u64 = 0, + fileoff: u64 = 0, + filesize: u64 = 0, + prot: macho.vm_prot_t = macho.PROT.NONE, +}) error{OutOfMemory}!u8 { + const gpa = self.base.comp.gpa; + const index = @as(u8, @intCast(self.segments.items.len)); + try self.segments.append(gpa, .{ + .segname = makeStaticString(name), + .vmaddr = opts.vmaddr, + .vmsize = opts.vmsize, + .fileoff = opts.fileoff, + .filesize = opts.filesize, + .maxprot = opts.prot, + .initprot = opts.prot, + .nsects = 0, + .cmdsize = @sizeOf(macho.segment_command_64), + }); + return index; +} + const AddSectionOpts = struct { flags: u32 = macho.S_REGULAR, reserved1: u32 = 0, diff --git a/src/link/MachO/ZigObject.zig b/src/link/MachO/ZigObject.zig index b18b7b4c9d..7aaca00366 100644 --- a/src/link/MachO/ZigObject.zig +++ b/src/link/MachO/ZigObject.zig @@ -295,7 +295,8 @@ pub fn updateDecl( return; }, }; - _ = code; + const sect_index = try self.getDeclOutputSection(macho_file, decl, code); + _ = sect_index; // const addr = try self.updateDeclCode(decl_index, code); // if (decl_state) |*ds| { @@ -313,6 +314,59 @@ pub fn updateDecl( // try self.updateExports(mod, .{ .decl_index = decl_index }, mod.getDeclExports(decl_index)); } +fn getDeclOutputSection( + self: *ZigObject, + macho_file: *MachO, + decl: *const Module.Decl, + code: []const u8, +) error{OutOfMemory}!u8 { + _ = self; + const mod = macho_file.base.comp.module.?; + const any_non_single_threaded = macho_file.base.comp.config.any_non_single_threaded; + const sect_id: u8 = switch (decl.ty.zigTypeTag(mod)) { + .Fn => macho_file.zig_text_section_index.?, + else => blk: { + if (decl.getOwnedVariable(mod)) |variable| { + if (variable.is_threadlocal and any_non_single_threaded) { + const is_all_zeroes = for (code) |byte| { + if (byte != 0) break false; + } else true; + if (is_all_zeroes) break :blk macho_file.getSectionByName("__DATA", "__thread_bss") orelse try macho_file.addSection( + "__DATA", + "__thread_bss", + .{ .flags = macho.S_THREAD_LOCAL_ZEROFILL }, + ); + break :blk macho_file.getSectionByName("__DATA", "__thread_data") orelse try macho_file.addSection( + "__DATA", + "__thread_data", + .{ .flags = macho.S_THREAD_LOCAL_REGULAR }, + ); + } + + if (variable.is_const) break :blk macho_file.zig_data_const_section_index.?; + if (Value.fromInterned(variable.init).isUndefDeep(mod)) { + // TODO: get the optimize_mode from the Module that owns the decl instead + // of using the root module here. + break :blk switch (macho_file.base.comp.root_mod.optimize_mode) { + .Debug, .ReleaseSafe => macho_file.zig_data_section_index.?, + .ReleaseFast, .ReleaseSmall => macho_file.zig_bss_section_index.?, + }; + } + + // TODO I blatantly copied the logic from the Wasm linker, but is there a less + // intrusive check for all zeroes than this? + const is_all_zeroes = for (code) |byte| { + if (byte != 0) break false; + } else true; + if (is_all_zeroes) break :blk macho_file.zig_bss_section_index.?; + break :blk macho_file.zig_data_section_index.?; + } + break :blk macho_file.zig_data_const_section_index.?; + }, + }; + return sect_id; +} + pub fn lowerUnnamedConst( self: *ZigObject, macho_file: *MachO, @@ -386,7 +440,7 @@ pub fn getOrCreateMetadataForDecl( const sym_index = try self.addAtom(macho_file); const mod = macho_file.base.comp.module.?; const decl = mod.declPtr(decl_index); - const sym = macho_file.getSymbol(self.symbols.items[sym_index]); + const sym = macho_file.getSymbol(sym_index); if (decl.getOwnedVariable(mod)) |variable| { if (variable.is_threadlocal and any_non_single_threaded) { sym.flags.tlv = true; From c7de5e511125a738269a802318695b98a88b2791 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Wed, 17 Jan 2024 18:59:24 +0100 Subject: [PATCH 082/133] macho: re-implement updateDeclCode --- src/link/MachO/ZigObject.zig | 95 +++++++++++++++++++++++++++++++++++- 1 file changed, 93 insertions(+), 2 deletions(-) diff --git a/src/link/MachO/ZigObject.zig b/src/link/MachO/ZigObject.zig index 7aaca00366..007df845f6 100644 --- a/src/link/MachO/ZigObject.zig +++ b/src/link/MachO/ZigObject.zig @@ -296,8 +296,16 @@ pub fn updateDecl( }, }; const sect_index = try self.getDeclOutputSection(macho_file, decl, code); - _ = sect_index; - // const addr = try self.updateDeclCode(decl_index, code); + const is_threadlocal = switch (macho_file.sections.items(.header[sect_index].type())) { + macho.S_THREAD_LOCAL_ZEROFILL, macho.S_THREAD_LOCAL_REGULAR => true, + else => false, + }; + if (is_threadlocal) { + // TODO: emit TLV + @panic("TODO updateDecl for TLS"); + } else { + try self.updateDeclCode(macho_file, decl_index, sym_index, sect_index, code); + } // if (decl_state) |*ds| { // try self.d_sym.?.dwarf.commitDeclState( @@ -314,6 +322,89 @@ pub fn updateDecl( // try self.updateExports(mod, .{ .decl_index = decl_index }, mod.getDeclExports(decl_index)); } +fn updateDeclCode( + self: *ZigObject, + macho_file: *MachO, + decl_index: Module.Decl.Index, + sym_index: Symbol.Index, + sect_index: u8, + code: []const u8, +) !void { + const gpa = self.base.comp.gpa; + const mod = self.base.comp.module.?; + const decl = mod.declPtr(decl_index); + const decl_name = mod.intern_pool.stringToSlice(try decl.getFullyQualifiedName(mod)); + + log.debug("updateDeclCode {s}{*}", .{ decl_name, decl }); + + const required_alignment = decl.getAlignment(mod); + + const sect = &macho_file.sections.items(.header)[sect_index]; + const sym = macho_file.getSymbol(sym_index); + const nlist = &self.symtab.items(.nlist)[sym.nlist_idx]; + const size = &self.symtab.items(.size)[sym.nlist_idx]; + const atom = sym.getAtom(macho_file).?; + + sym.out_n_sect = sect_index; + atom.out_n_sect = sect_index; + + sym.name = try macho_file.strings.insert(gpa, decl_name); + atom.flags.alive = true; + atom.name = sym.name; + nlist.n_strx = sym.name; + nlist.n_type = macho.N_SECT; + nlist.n_sect = sect_index + 1; + size = code.len; + + const old_size = atom.size; + const old_vaddr = atom.value; + atom.alignment = required_alignment; + atom.size = code.len; + + if (old_size > 0) { + const capacity = atom.capacity(macho_file); + const need_realloc = code.len > capacity or !required_alignment.check(sym.value); + + if (need_realloc) { + try atom.grow(macho_file); + log.debug("growing {s} from 0x{x} to 0x{x}", .{ decl_name, old_vaddr, atom.value }); + if (old_vaddr != atom.value) { + sym.value = atom.value; + nlist.n_value = atom.value; + + if (!macho_file.base.isRelocatable()) { + log.debug(" (updating offset table entry)", .{}); + assert(sym.flags.has_zig_got); + const extra = sym.getExtra(macho_file).?; + try macho_file.zig_got.writeOne(macho_file, extra.zig_got); + } + } + } else if (code.len < old_size) { + atom.shrink(macho_file); + } else if (atom.next_index == null) { + const needed_size = (sym.value + code.len) - sect.addr; + sect.size = needed_size; + } + } else { + try atom.allocate(macho_file); + // TODO: freeDeclMetadata in case of error + + sym.value = atom.value; + sym.flags.needs_zig_got = true; + nlist.n_value = atom.value; + + if (!macho_file.base.isRelocatable()) { + const gop = try sym.getOrCreateZigGotEntry(sym_index, macho_file); + try macho_file.zig_got.writeOne(macho_file, gop.index); + } + } + + if (!sect.isZerofill()) { + const file_offset = sect.offset + sym.value - sect.addr; + try macho_file.base.file.?.pwriteAll(code, file_offset); + } +} + fn getDeclOutputSection( self: *ZigObject, macho_file: *MachO, From 8c578ba02ccff63a64093b5acdefcf7b95cc8c46 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Wed, 17 Jan 2024 20:09:12 +0100 Subject: [PATCH 083/133] macho: add __zig_got section implementation --- src/link/MachO.zig | 13 +++ src/link/MachO/Atom.zig | 153 +++++++++++++++++++++++++++++ src/link/MachO/Symbol.zig | 22 +++++ src/link/MachO/ZigObject.zig | 182 ++++++++++++++++++++++++++++++----- src/link/MachO/synthetic.zig | 118 +++++++++++++++++++++++ 5 files changed, 464 insertions(+), 24 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 912ac6a43e..035c59ac21 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -70,6 +70,7 @@ symtab: std.ArrayListUnmanaged(macho.nlist_64) = .{}, strtab: std.ArrayListUnmanaged(u8) = .{}, indsymtab: Indsymtab = .{}, got: GotSection = .{}, +zig_got: ZigGotSection = .{}, stubs: StubsSection = .{}, stubs_helper: StubsHelperSection = .{}, objc_stubs: ObjcStubsSection = .{}, @@ -337,6 +338,7 @@ pub fn deinit(self: *MachO) void { self.symtab.deinit(gpa); self.strtab.deinit(gpa); self.got.deinit(gpa); + self.zig_got.deinit(gpa); self.stubs.deinit(gpa); self.objc_stubs.deinit(gpa); self.tlv_ptr.deinit(gpa); @@ -3157,6 +3159,13 @@ fn initMetadata(self: *MachO, options: InitMetadataOptions) !void { } } +pub fn growSection(self: *MachO, sect_index: u8, size: u64) !void { + _ = self; + _ = sect_index; + _ = size; + @panic("TODO growSection"); +} + pub fn getTarget(self: MachO) std.Target { return self.base.comp.root_mod.resolved_target.result; } @@ -3657,6 +3666,7 @@ fn fmtDumpState( try writer.print("stubs\n{}\n", .{self.stubs.fmt(self)}); try writer.print("objc_stubs\n{}\n", .{self.objc_stubs.fmt(self)}); try writer.print("got\n{}\n", .{self.got.fmt(self)}); + try writer.print("zig_got\n{}\n", .{self.zig_got.fmt(self)}); try writer.print("tlv_ptr\n{}\n", .{self.tlv_ptr.fmt(self)}); try writer.writeByte('\n'); try writer.print("sections\n{}\n", .{self.fmtSections()}); @@ -3759,6 +3769,8 @@ const Section = struct { header: macho.section_64, segment_id: u8, atoms: std.ArrayListUnmanaged(Atom.Index) = .{}, + free_list: std.ArrayListUnmanaged(Atom.Index) = .{}, + last_atom_index: Atom.Index = 0, }; const HotUpdateState = struct { @@ -4125,4 +4137,5 @@ const TlvPtrSection = synthetic.TlvPtrSection; const TypedValue = @import("../TypedValue.zig"); const UnwindInfo = @import("MachO/UnwindInfo.zig"); const WeakBindSection = synthetic.WeakBindSection; +const ZigGotSection = synthetic.ZigGotSection; const ZigObject = @import("MachO/ZigObject.zig"); diff --git a/src/link/MachO/Atom.zig b/src/link/MachO/Atom.zig index 98223a9ad2..374a98021f 100644 --- a/src/link/MachO/Atom.zig +++ b/src/link/MachO/Atom.zig @@ -37,6 +37,11 @@ unwind_records: Loc = .{}, flags: Flags = .{}, +/// Points to the previous and next neighbors, based on the `text_offset`. +/// This can be used to find, for example, the capacity of this `TextBlock`. +prev_index: Index = 0, +next_index: Index = 0, + pub fn getName(self: Atom, macho_file: *MachO) [:0]const u8 { return macho_file.strings.getAssumeExists(self.name); } @@ -171,6 +176,154 @@ pub fn initOutputSection(sect: macho.section_64, macho_file: *MachO) !u8 { return osec; } +/// Returns how much room there is to grow in virtual address space. +/// File offset relocation happens transparently, so it is not included in +/// this calculation. +pub fn capacity(self: Atom, macho_file: *MachO) u64 { + const next_value = if (macho_file.getAtom(self.next_index)) |next| next.value else std.math.maxInt(u32); + return next_value - self.value; +} + +pub fn freeListEligible(self: Atom, macho_file: *MachO) bool { + // No need to keep a free list node for the last block. + const next = macho_file.getAtom(self.next_index) orelse return false; + const cap = next.value - self.value; + const ideal_cap = MachO.padToIdeal(self.size); + if (cap <= ideal_cap) return false; + const surplus = cap - ideal_cap; + return surplus >= MachO.min_text_capacity; +} + +pub fn allocate(self: *Atom, macho_file: *MachO) !void { + const sect = &macho_file.sections.items(.header)[self.out_n_sect]; + const free_list = &macho_file.sections.items(.free_list)[self.out_n_sect]; + const last_atom_index = &macho_file.sections.items(.last_atom_index)[self.out_n_sect]; + const new_atom_ideal_capacity = MachO.padToIdeal(self.size); + + // We use these to indicate our intention to update metadata, placing the new atom, + // and possibly removing a free list node. + // It would be simpler to do it inside the for loop below, but that would cause a + // problem if an error was returned later in the function. So this action + // is actually carried out at the end of the function, when errors are no longer possible. + var atom_placement: ?Atom.Index = null; + var free_list_removal: ?usize = null; + + // First we look for an appropriately sized free list node. + // The list is unordered. We'll just take the first thing that works. + self.value = blk: { + var i: usize = free_list.items.len; + while (i < free_list.items.len) { + const big_atom_index = free_list.items[i]; + const big_atom = macho_file.getAtom(big_atom_index).?; + // We now have a pointer to a live atom that has too much capacity. + // Is it enough that we could fit this new atom? + const cap = big_atom.capacity(macho_file); + const ideal_capacity = MachO.padToIdeal(cap); + const ideal_capacity_end_vaddr = std.math.add(u64, big_atom.value, ideal_capacity) catch ideal_capacity; + const capacity_end_vaddr = big_atom.value + cap; + const new_start_vaddr_unaligned = capacity_end_vaddr - new_atom_ideal_capacity; + const new_start_vaddr = self.alignment.backward(new_start_vaddr_unaligned); + if (new_start_vaddr < ideal_capacity_end_vaddr) { + // Additional bookkeeping here to notice if this free list node + // should be deleted because the block that it points to has grown to take up + // more of the extra capacity. + if (!big_atom.freeListEligible(macho_file)) { + _ = free_list.swapRemove(i); + } else { + i += 1; + } + continue; + } + // At this point we know that we will place the new block here. But the + // remaining question is whether there is still yet enough capacity left + // over for there to still be a free list node. + const remaining_capacity = new_start_vaddr - ideal_capacity_end_vaddr; + const keep_free_list_node = remaining_capacity >= MachO.min_text_capacity; + + // Set up the metadata to be updated, after errors are no longer possible. + atom_placement = big_atom_index; + if (!keep_free_list_node) { + free_list_removal = i; + } + break :blk new_start_vaddr; + } else if (macho_file.getAtom(last_atom_index.*)) |last| { + const ideal_capacity = MachO.padToIdeal(last.size); + const ideal_capacity_end_vaddr = last.value + ideal_capacity; + const new_start_vaddr = self.alignment.forward(ideal_capacity_end_vaddr); + // Set up the metadata to be updated, after errors are no longer possible. + atom_placement = last.atom_index; + break :blk new_start_vaddr; + } else { + break :blk sect.addr; + } + }; + + log.debug("allocated atom({d}) : '{s}' at 0x{x} to 0x{x}", .{ + self.atom_index, + self.getName(macho_file), + self.value, + self.value + self.size, + }); + + const expand_section = if (atom_placement) |placement_index| + macho_file.getAtom(placement_index).?.next_index == 0 + else + true; + if (expand_section) { + const needed_size = (self.value + self.size) - sect.addr; + try macho_file.growSection(self.out_n_sect, needed_size); + last_atom_index.* = self.atom_index; + + // const zig_object = macho_file_file.getZigObject().?; + // if (zig_object.dwarf) |_| { + // // The .debug_info section has `low_pc` and `high_pc` values which is the virtual address + // // range of the compilation unit. When we expand the text section, this range changes, + // // so the DW_TAG.compile_unit tag of the .debug_info section becomes dirty. + // zig_object.debug_info_header_dirty = true; + // // This becomes dirty for the same reason. We could potentially make this more + // // fine-grained with the addition of support for more compilation units. It is planned to + // // model each package as a different compilation unit. + // zig_object.debug_aranges_section_dirty = true; + // } + } + sect.@"align" = @max(sect.@"align", self.alignment.toLog2Units()); + + // This function can also reallocate an atom. + // In this case we need to "unplug" it from its previous location before + // plugging it in to its new location. + if (macho_file.getAtom(self.prev_index)) |prev| { + prev.next_index = self.next_index; + } + if (macho_file.getAtom(self.next_index)) |next| { + next.prev_index = self.prev_index; + } + + if (atom_placement) |big_atom_index| { + const big_atom = macho_file.getAtom(big_atom_index).?; + self.prev_index = big_atom_index; + self.next_index = big_atom.next_index; + big_atom.next_index = self.atom_index; + } else { + self.prev_index = 0; + self.next_index = 0; + } + if (free_list_removal) |i| { + _ = free_list.swapRemove(i); + } + + self.flags.alive = true; +} + +pub fn shrink(self: *Atom, macho_file: *MachO) void { + _ = self; + _ = macho_file; +} + +pub fn grow(self: *Atom, macho_file: *MachO) !void { + if (!self.alignment.check(self.value) or self.size > self.capacity(macho_file)) + try self.allocate(macho_file); +} + pub fn scanRelocs(self: Atom, macho_file: *MachO) !void { const tracy = trace(@src()); defer tracy.end(); diff --git a/src/link/MachO/Symbol.zig b/src/link/MachO/Symbol.zig index a86aea80b5..5ffbabe1e2 100644 --- a/src/link/MachO/Symbol.zig +++ b/src/link/MachO/Symbol.zig @@ -149,6 +149,25 @@ pub fn getTlvPtrAddress(symbol: Symbol, macho_file: *MachO) u64 { return macho_file.tlv_ptr.getAddress(extra.tlv_ptr, macho_file); } +const GetOrCreateZigGotEntryResult = struct { + found_existing: bool, + index: ZigGotSection.Index, +}; + +pub fn getOrCreateZigGotEntry(symbol: *Symbol, symbol_index: Index, macho_file: *MachO) !GetOrCreateZigGotEntryResult { + assert(!macho_file.base.isRelocatable()); + assert(symbol.flags.needs_zig_got); + if (symbol.flags.has_zig_got) return .{ .found_existing = true, .index = symbol.getExtra(macho_file).?.zig_got }; + const index = try macho_file.zig_got.addSymbol(symbol_index, macho_file); + return .{ .found_existing = false, .index = index }; +} + +pub fn zigGotAddress(symbol: Symbol, macho_file: *MachO) u64 { + if (!symbol.flags.has_zig_got) return 0; + const extras = symbol.getExtra(macho_file).?; + return macho_file.zig_got.entryAddress(extras.zig_got, macho_file); +} + pub fn getOutputSymtabIndex(symbol: Symbol, macho_file: *MachO) ?u32 { if (!symbol.flags.output_symtab) return null; assert(!symbol.isSymbolStab(macho_file)); @@ -170,6 +189,7 @@ pub fn getOutputSymtabIndex(symbol: Symbol, macho_file: *MachO) ?u32 { const AddExtraOpts = struct { got: ?u32 = null, + zig_got: ?u32 = null, stubs: ?u32 = null, objc_stubs: ?u32 = null, objc_selrefs: ?u32 = null, @@ -374,6 +394,7 @@ pub const Visibility = enum { pub const Extra = struct { got: u32 = 0, + zig_got: u32 = 0, stubs: u32 = 0, objc_stubs: u32 = 0, objc_selrefs: u32 = 0, @@ -393,3 +414,4 @@ const MachO = @import("../MachO.zig"); const Nlist = Object.Nlist; const Object = @import("Object.zig"); const Symbol = @This(); +const ZigGotSection = @import("synthetic.zig").ZigGotSection; diff --git a/src/link/MachO/ZigObject.zig b/src/link/MachO/ZigObject.zig index 007df845f6..4e0cb76a40 100644 --- a/src/link/MachO/ZigObject.zig +++ b/src/link/MachO/ZigObject.zig @@ -7,9 +7,36 @@ symtab: std.MultiArrayList(Nlist) = .{}, symbols: std.ArrayListUnmanaged(Symbol.Index) = .{}, atoms: std.ArrayListUnmanaged(Atom.Index) = .{}, +/// Table of tracked LazySymbols. +lazy_syms: LazySymbolTable = .{}, + /// Table of tracked Decls. decls: DeclTable = .{}, +/// Table of unnamed constants associated with a parent `Decl`. +/// We store them here so that we can free the constants whenever the `Decl` +/// needs updating or is freed. +/// +/// For example, +/// +/// ```zig +/// const Foo = struct{ +/// a: u8, +/// }; +/// +/// pub fn main() void { +/// var foo = Foo{ .a = 1 }; +/// _ = foo; +/// } +/// ``` +/// +/// value assigned to label `foo` is an unnamed constant belonging/associated +/// with `Decl` `main`, and lives as long as that `Decl`. +unnamed_consts: UnnamedConstTable = .{}, + +/// Table of tracked AnonDecls. +anon_decls: AnonDeclTable = .{}, + /// A table of relocations. relocs: RelocationTable = .{}, @@ -35,6 +62,24 @@ pub fn deinit(self: *ZigObject, allocator: Allocator) void { self.decls.deinit(allocator); } + self.lazy_syms.deinit(allocator); + + { + var it = self.unnamed_consts.valueIterator(); + while (it.next()) |syms| { + syms.deinit(allocator); + } + self.unnamed_consts.deinit(allocator); + } + + { + var it = self.anon_decls.iterator(); + while (it.next()) |entry| { + entry.value_ptr.exports.deinit(allocator); + } + self.anon_decls.deinit(allocator); + } + for (self.relocs.items) |*list| { list.deinit(allocator); } @@ -296,7 +341,7 @@ pub fn updateDecl( }, }; const sect_index = try self.getDeclOutputSection(macho_file, decl, code); - const is_threadlocal = switch (macho_file.sections.items(.header[sect_index].type())) { + const is_threadlocal = switch (macho_file.sections.items(.header)[sect_index].type()) { macho.S_THREAD_LOCAL_ZEROFILL, macho.S_THREAD_LOCAL_REGULAR => true, else => false, }; @@ -317,21 +362,21 @@ pub fn updateDecl( // ); // } - // // Since we updated the vaddr and the size, each corresponding export symbol also - // // needs to be updated. - // try self.updateExports(mod, .{ .decl_index = decl_index }, mod.getDeclExports(decl_index)); + // Since we updated the vaddr and the size, each corresponding export symbol also + // needs to be updated. + try self.updateExports(macho_file, mod, .{ .decl_index = decl_index }, mod.getDeclExports(decl_index)); } fn updateDeclCode( self: *ZigObject, macho_file: *MachO, - decl_index: Module.Decl.Index, + decl_index: InternPool.DeclIndex, sym_index: Symbol.Index, sect_index: u8, code: []const u8, ) !void { - const gpa = self.base.comp.gpa; - const mod = self.base.comp.module.?; + const gpa = macho_file.base.comp.gpa; + const mod = macho_file.base.comp.module.?; const decl = mod.declPtr(decl_index); const decl_name = mod.intern_pool.stringToSlice(try decl.getFullyQualifiedName(mod)); @@ -342,7 +387,6 @@ fn updateDeclCode( const sect = &macho_file.sections.items(.header)[sect_index]; const sym = macho_file.getSymbol(sym_index); const nlist = &self.symtab.items(.nlist)[sym.nlist_idx]; - const size = &self.symtab.items(.size)[sym.nlist_idx]; const atom = sym.getAtom(macho_file).?; sym.out_n_sect = sect_index; @@ -354,7 +398,7 @@ fn updateDeclCode( nlist.n_strx = sym.name; nlist.n_type = macho.N_SECT; nlist.n_sect = sect_index + 1; - size = code.len; + self.symtab.items(.size)[sym.nlist_idx] = code.len; const old_size = atom.size; const old_vaddr = atom.value; @@ -363,14 +407,14 @@ fn updateDeclCode( if (old_size > 0) { const capacity = atom.capacity(macho_file); - const need_realloc = code.len > capacity or !required_alignment.check(sym.value); + const need_realloc = code.len > capacity or !required_alignment.check(sym.getAddress(.{}, macho_file)); if (need_realloc) { try atom.grow(macho_file); log.debug("growing {s} from 0x{x} to 0x{x}", .{ decl_name, old_vaddr, atom.value }); if (old_vaddr != atom.value) { - sym.value = atom.value; - nlist.n_value = atom.value; + sym.value = 0; + nlist.n_value = 0; if (!macho_file.base.isRelocatable()) { log.debug(" (updating offset table entry)", .{}); @@ -381,17 +425,17 @@ fn updateDeclCode( } } else if (code.len < old_size) { atom.shrink(macho_file); - } else if (atom.next_index == null) { - const needed_size = (sym.value + code.len) - sect.addr; + } else if (macho_file.getAtom(atom.next_index) == null) { + const needed_size = (sym.getAddress(.{}, macho_file) + code.len) - sect.addr; sect.size = needed_size; } } else { try atom.allocate(macho_file); // TODO: freeDeclMetadata in case of error - sym.value = atom.value; + sym.value = 0; sym.flags.needs_zig_got = true; - nlist.n_value = atom.value; + nlist.n_value = 0; if (!macho_file.base.isRelocatable()) { const gop = try sym.getOrCreateZigGotEntry(sym_index, macho_file); @@ -400,7 +444,7 @@ fn updateDeclCode( } if (!sect.isZerofill()) { - const file_offset = sect.offset + sym.value - sect.addr; + const file_offset = sect.offset + sym.getAddress(.{}, macho_file) - sect.addr; try macho_file.base.file.?.pwriteAll(code, file_offset); } } @@ -478,12 +522,91 @@ pub fn updateExports( exported: Module.Exported, exports: []const *Module.Export, ) link.File.UpdateExportsError!void { - _ = self; - _ = macho_file; - _ = mod; - _ = exported; - _ = exports; - @panic("TODO updateExports"); + const tracy = trace(@src()); + defer tracy.end(); + + const gpa = macho_file.base.comp.gpa; + const metadata = switch (exported) { + .decl_index => |decl_index| blk: { + _ = try self.getOrCreateMetadataForDecl(macho_file, decl_index); + break :blk self.decls.getPtr(decl_index).?; + }, + .value => |value| self.anon_decls.getPtr(value) orelse blk: { + const first_exp = exports[0]; + const res = try self.lowerAnonDecl(macho_file, value, .none, first_exp.getSrcLoc(mod)); + switch (res) { + .ok => {}, + .fail => |em| { + // TODO maybe it's enough to return an error here and let Module.processExportsInner + // handle the error? + try mod.failed_exports.ensureUnusedCapacity(mod.gpa, 1); + mod.failed_exports.putAssumeCapacityNoClobber(first_exp, em); + return; + }, + } + break :blk self.anon_decls.getPtr(value).?; + }, + }; + const sym_index = metadata.symbol_index; + const nlist_idx = macho_file.getSymbol(sym_index).nlist_idx; + const nlist = self.symtab.items(.nlist)[nlist_idx]; + + for (exports) |exp| { + if (exp.opts.section.unwrap()) |section_name| { + if (!mod.intern_pool.stringEqlSlice(section_name, "__text")) { + try mod.failed_exports.ensureUnusedCapacity(mod.gpa, 1); + mod.failed_exports.putAssumeCapacityNoClobber(exp, try Module.ErrorMsg.create( + gpa, + exp.getSrcLoc(mod), + "Unimplemented: ExportOptions.section", + .{}, + )); + continue; + } + } + if (exp.opts.linkage == .LinkOnce) { + try mod.failed_exports.putNoClobber(mod.gpa, exp, try Module.ErrorMsg.create( + gpa, + exp.getSrcLoc(mod), + "Unimplemented: GlobalLinkage.LinkOnce", + .{}, + )); + continue; + } + + const exp_name = try std.fmt.allocPrint(gpa, "_{}", .{exp.opts.name.fmt(&mod.intern_pool)}); + defer gpa.free(exp_name); + + const name_off = try macho_file.strings.insert(gpa, exp_name); + const global_nlist_index = if (metadata.@"export"(self, macho_file, exp_name)) |exp_index| + exp_index.* + else blk: { + const global_nlist_index = try self.getGlobalSymbol(macho_file, exp_name, null); + try metadata.exports.append(gpa, global_nlist_index); + break :blk global_nlist_index; + }; + const global_nlist = &self.symtab.items(.nlist)[global_nlist_index]; + global_nlist.n_strx = name_off; + global_nlist.n_value = nlist.n_value; + global_nlist.n_sect = nlist.n_sect; + global_nlist.n_type = macho.N_EXT | macho.N_SECT; + self.symtab.items(.size)[global_nlist_index] = self.symtab.items(.size)[nlist_idx]; + self.symtab.items(.atom)[global_nlist_index] = self.symtab.items(.atom)[nlist_idx]; + + switch (exp.opts.linkage) { + .Internal => { + // Symbol should be hidden, or in MachO lingo, private extern. + global_nlist.n_type |= macho.N_PEXT; + }, + .Strong => {}, + .Weak => { + // Weak linkage is specified as part of n_desc field. + // Symbol's n_type is like for a symbol with strong linkage. + global_nlist.n_desc |= macho.N_WEAK_DEF; + }, + else => unreachable, + } + } } /// Must be called only after a successful call to `updateDecl`. @@ -612,8 +735,19 @@ const DeclMetadata = struct { return null; } }; -const DeclTable = std.AutoHashMapUnmanaged(InternPool.DeclIndex, DeclMetadata); +const LazySymbolMetadata = struct { + const State = enum { unused, pending_flush, flushed }; + text_symbol_index: Symbol.Index = undefined, + data_const_symbol_index: Symbol.Index = undefined, + text_state: State = .unused, + rodata_state: State = .unused, +}; + +const DeclTable = std.AutoHashMapUnmanaged(InternPool.DeclIndex, DeclMetadata); +const UnnamedConstTable = std.AutoHashMapUnmanaged(InternPool.DeclIndex, std.ArrayListUnmanaged(Symbol.Index)); +const AnonDeclTable = std.AutoHashMapUnmanaged(InternPool.Index, DeclMetadata); +const LazySymbolTable = std.AutoArrayHashMapUnmanaged(InternPool.OptionalDeclIndex, LazySymbolMetadata); const RelocationTable = std.ArrayListUnmanaged(std.ArrayListUnmanaged(Relocation)); const assert = std.debug.assert; diff --git a/src/link/MachO/synthetic.zig b/src/link/MachO/synthetic.zig index cc5643c7ec..5a6e316da5 100644 --- a/src/link/MachO/synthetic.zig +++ b/src/link/MachO/synthetic.zig @@ -1,3 +1,121 @@ +pub const ZigGotSection = struct { + entries: std.ArrayListUnmanaged(Symbol.Index) = .{}, + dirty: bool = false, + + pub const Index = u32; + + pub fn deinit(zig_got: *ZigGotSection, allocator: Allocator) void { + zig_got.entries.deinit(allocator); + } + + fn allocateEntry(zig_got: *ZigGotSection, allocator: Allocator) !Index { + try zig_got.entries.ensureUnusedCapacity(allocator, 1); + // TODO add free list + const index = @as(Index, @intCast(zig_got.entries.items.len)); + _ = zig_got.entries.addOneAssumeCapacity(); + zig_got.dirty = true; + return index; + } + + pub fn addSymbol(zig_got: *ZigGotSection, sym_index: Symbol.Index, macho_file: *MachO) !Index { + const comp = macho_file.base.comp; + const gpa = comp.gpa; + const index = try zig_got.allocateEntry(gpa); + const entry = &zig_got.entries.items[index]; + entry.* = sym_index; + const symbol = macho_file.getSymbol(sym_index); + symbol.flags.has_zig_got = true; + try symbol.addExtra(.{ .zig_got = index }, macho_file); + return index; + } + + pub fn entryOffset(zig_got: ZigGotSection, index: Index, macho_file: *MachO) u64 { + _ = zig_got; + const sect = macho_file.sections.items(.header)[macho_file.zig_got_section_index.?]; + return sect.offset + @sizeOf(u64) * index; + } + + pub fn entryAddress(zig_got: ZigGotSection, index: Index, macho_file: *MachO) u64 { + _ = zig_got; + const sect = macho_file.sections.items(.header)[macho_file.zig_got_section_index.?]; + return sect.addr + @sizeOf(u64) * index; + } + + pub fn size(zig_got: ZigGotSection, macho_file: *MachO) usize { + _ = macho_file; + return @sizeOf(u64) * zig_got.entries.items.len; + } + + pub fn writeOne(zig_got: *ZigGotSection, macho_file: *MachO, index: Index) !void { + if (zig_got.dirty) { + const needed_size = zig_got.size(macho_file); + try macho_file.growSection(macho_file.zig_got_section_index.?, needed_size); + zig_got.dirty = false; + } + const off = zig_got.entryOffset(index, macho_file); + const entry = zig_got.entries.items[index]; + const value = macho_file.getSymbol(entry).getAddress(.{ .stubs = false }, macho_file); + + var buf: [8]u8 = undefined; + std.mem.writeInt(u64, &buf, value, .little); + try macho_file.base.file.?.pwriteAll(&buf, off); + } + + pub fn writeAll(zig_got: ZigGotSection, macho_file: *MachO, writer: anytype) !void { + for (zig_got.entries.items) |entry| { + const symbol = macho_file.getSymbol(entry); + const value = symbol.address(.{ .stubs = false }, macho_file); + try writer.writeInt(u64, value, .little); + } + } + + pub fn addDyldRelocs(zig_got: ZigGotSection, macho_file: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + const gpa = macho_file.base.comp.gpa; + const seg_id = macho_file.sections.items(.segment_id)[macho_file.zig_got_sect_index.?]; + const seg = macho_file.segments.items[seg_id]; + + for (0..zig_got.symbols.items.len) |idx| { + const addr = zig_got.entryAddress(@intCast(idx), macho_file); + try macho_file.rebase.entries.append(gpa, .{ + .offset = addr - seg.vmaddr, + .segment_id = seg_id, + }); + } + } + + const FormatCtx = struct { + zig_got: ZigGotSection, + macho_file: *MachO, + }; + + pub fn fmt(zig_got: ZigGotSection, macho_file: *MachO) std.fmt.Formatter(format2) { + return .{ .data = .{ .zig_got = zig_got, .macho_file = macho_file } }; + } + + pub fn format2( + ctx: FormatCtx, + comptime unused_fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, + ) !void { + _ = options; + _ = unused_fmt_string; + try writer.writeAll("__zig_got\n"); + for (ctx.zig_got.entries.items, 0..) |entry, index| { + const symbol = ctx.macho_file.getSymbol(entry); + try writer.print(" {d}@0x{x} => {d}@0x{x} ({s})\n", .{ + index, + ctx.zig_got.entryAddress(@intCast(index), ctx.macho_file), + entry, + symbol.getAddress(.{}, ctx.macho_file), + symbol.getName(ctx.macho_file), + }); + } + } +}; + pub const GotSection = struct { symbols: std.ArrayListUnmanaged(Symbol.Index) = .{}, From a6ed54ea2243bd9053333c09da6de5324c799b47 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 18 Jan 2024 12:56:27 +0100 Subject: [PATCH 084/133] macho: init metadata for incremental linking --- src/link/MachO.zig | 131 ++++++++++++++++++++++++++++++----- src/link/MachO/ZigObject.zig | 4 +- 2 files changed, 114 insertions(+), 21 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 035c59ac21..9ab4a9397e 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -85,15 +85,17 @@ unwind_info: UnwindInfo = .{}, /// Tracked loadable segments during incremental linking. zig_text_seg_index: ?u8 = null, -zig_data_const_seg_index: ?u8 = null, +zig_got_seg_index: ?u8 = null, +zig_const_seg_index: ?u8 = null, zig_data_seg_index: ?u8 = null, +zig_bss_seg_index: ?u8 = null, /// Tracked section headers with incremental updates to Zig object. zig_text_section_index: ?u8 = null, -zig_data_const_section_index: ?u8 = null, +zig_got_section_index: ?u8 = null, +zig_const_section_index: ?u8 = null, zig_data_section_index: ?u8 = null, zig_bss_section_index: ?u8 = null, -zig_got_section_index: ?u8 = null, has_tlv: bool = false, binds_to_weak: bool = false, @@ -252,6 +254,8 @@ pub fn createEmpty( .program_code_size_hint = options.program_code_size_hint, }); + std.debug.print("{}", .{self.dumpState()}); + // TODO init dwarf // if (comp.config.debug_format != .strip) { @@ -3082,33 +3086,45 @@ pub fn padToIdeal(actual_size: anytype) @TypeOf(actual_size) { } fn detectAllocCollision(self: *MachO, start: u64, size: u64) ?u64 { - // TODO: header and load commands have to be part of the __TEXT segment - const header_size = self.segments.items[self.header_segment_cmd_index.?].filesize; + // Conservatively commit one page size as reserved space for the headers as we + // expect it to grow and everything else be moved in flush anyhow. + const header_size = self.getPageSize(); if (start < header_size) return header_size; const end = start + padToIdeal(size); for (self.sections.items(.header)) |header| { - const tight_size = header.size; - const increased_size = padToIdeal(tight_size); + if (header.isZerofill()) continue; + const increased_size = padToIdeal(header.size); const test_end = header.offset + increased_size; if (end > header.offset and start < test_end) { return test_end; } } + for (self.segments.items) |seg| { + const increased_size = padToIdeal(seg.filesize); + const test_end = seg.fileoff +| increased_size; + if (end > seg.fileoff and start < test_end) { + return test_end; + } + } + return null; } fn allocatedSize(self: *MachO, start: u64) u64 { - if (start == 0) - return 0; + if (start == 0) return 0; var min_pos: u64 = std.math.maxInt(u64); for (self.sections.items(.header)) |header| { if (header.offset <= start) continue; if (header.offset < min_pos) min_pos = header.offset; } + for (self.segments.items) |seg| { + if (seg.fileoff <= start) continue; + if (seg.fileoff < min_pos) min_pos = seg.fileoff; + } return min_pos - start; } @@ -3126,36 +3142,113 @@ const InitMetadataOptions = struct { }; // TODO: move to ZigObject -// TODO: bring back pre-alloc of segments/sections fn initMetadata(self: *MachO, options: InitMetadataOptions) !void { - _ = options; - if (!self.base.isRelocatable()) { - // TODO: If we are not emitting a relocatable object file, init segments. + const base_vmaddr = blk: { + const pagezero_size = self.pagezero_size orelse default_pagezero_size; + break :blk mem.alignBackward(u64, pagezero_size, self.getPageSize()); + }; + + { + const filesize = options.program_code_size_hint; + const off = self.findFreeSpace(filesize, self.getPageSize()); + self.zig_text_seg_index = try self.addSegment("__TEXT_ZIG", .{ + .fileoff = off, + .filesize = filesize, + .vmaddr = base_vmaddr + 0x8000000, + .vmsize = filesize, + .prot = macho.PROT.READ | macho.PROT.EXEC, + }); + } + + { + const filesize = options.symbol_count_hint * @sizeOf(u64); + const off = self.findFreeSpace(filesize, self.getPageSize()); + self.zig_got_seg_index = try self.addSegment("__GOT_ZIG", .{ + .fileoff = off, + .filesize = filesize, + .vmaddr = base_vmaddr + 0x4000000, + .vmsize = filesize, + .prot = macho.PROT.READ | macho.PROT.WRITE, + }); + } + + { + const filesize: u64 = 1024; + const off = self.findFreeSpace(filesize, self.getPageSize()); + self.zig_const_seg_index = try self.addSegment("__CONST_ZIG", .{ + .fileoff = off, + .filesize = filesize, + .vmaddr = base_vmaddr + 0xc000000, + .vmsize = filesize, + .prot = macho.PROT.READ | macho.PROT.WRITE, + }); + } + + { + const filesize: u64 = 1024; + const off = self.findFreeSpace(filesize, self.getPageSize()); + self.zig_data_seg_index = try self.addSegment("__DATA_ZIG", .{ + .fileoff = off, + .filesize = filesize, + .vmaddr = base_vmaddr + 0x10000000, + .vmsize = filesize, + .prot = macho.PROT.READ | macho.PROT.WRITE, + }); + } + + { + const memsize: u64 = 1024; + self.zig_bss_seg_index = try self.addSegment("__BSS_ZIG", .{ + .vmaddr = base_vmaddr + 0x14000000, + .vmsize = memsize, + .prot = macho.PROT.READ | macho.PROT.WRITE, + }); + } + } else { + @panic("TODO initMetadata when relocatable"); } + const appendSect = struct { + fn appendSect(macho_file: *MachO, sect_id: u8, seg_id: u8) void { + const sect = &macho_file.sections.items(.header)[sect_id]; + const seg = &macho_file.segments.items[seg_id]; + seg.cmdsize += @sizeOf(macho.section_64); + seg.nsects += 1; + sect.addr = seg.vmaddr; + sect.offset = @intCast(seg.fileoff); + sect.size = seg.vmsize; + macho_file.sections.items(.segment_id)[sect_id] = seg_id; + } + }.appendSect; + if (self.zig_text_section_index == null) { - self.zig_text_section_index = try self.addSection("__TEXT", "__text", .{ + self.zig_text_section_index = try self.addSection("__TEXT_ZIG", "__text_zig", .{ .flags = macho.S_REGULAR | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS, }); + appendSect(self, self.zig_text_section_index.?, self.zig_text_seg_index.?); } if (self.zig_got_section_index == null and !self.base.isRelocatable()) { - self.zig_got_section_index = try self.addSection("__DATA_CONST", "__got_zig", .{}); + self.zig_got_section_index = try self.addSection("__GOT_ZIG", "__got_zig", .{}); + appendSect(self, self.zig_got_section_index.?, self.zig_got_seg_index.?); } - if (self.zig_data_const_section_index == null) { - self.zig_data_const_section_index = try self.addSection("__DATA_CONST", "__const", .{}); + if (self.zig_const_section_index == null) { + self.zig_const_section_index = try self.addSection("__CONST_ZIG", "__const_zig", .{}); + appendSect(self, self.zig_const_section_index.?, self.zig_const_seg_index.?); } if (self.zig_data_section_index == null) { - self.zig_data_section_index = try self.addSection("__DATA", "__data", .{}); + self.zig_data_section_index = try self.addSection("__DATA_ZIG", "__data_zig", .{}); + appendSect(self, self.zig_data_section_index.?, self.zig_data_seg_index.?); } if (self.zig_bss_section_index == null) { - self.zig_bss_section_index = try self.addSection("__DATA", "_bss", .{ + self.zig_bss_section_index = try self.addSection("__BSS_ZIG", "__bss_zig", .{ .flags = macho.S_ZEROFILL, }); + appendSect(self, self.zig_bss_section_index.?, self.zig_bss_seg_index.?); } } diff --git a/src/link/MachO/ZigObject.zig b/src/link/MachO/ZigObject.zig index 4e0cb76a40..c574900bce 100644 --- a/src/link/MachO/ZigObject.zig +++ b/src/link/MachO/ZigObject.zig @@ -478,7 +478,7 @@ fn getDeclOutputSection( ); } - if (variable.is_const) break :blk macho_file.zig_data_const_section_index.?; + if (variable.is_const) break :blk macho_file.zig_const_section_index.?; if (Value.fromInterned(variable.init).isUndefDeep(mod)) { // TODO: get the optimize_mode from the Module that owns the decl instead // of using the root module here. @@ -496,7 +496,7 @@ fn getDeclOutputSection( if (is_all_zeroes) break :blk macho_file.zig_bss_section_index.?; break :blk macho_file.zig_data_section_index.?; } - break :blk macho_file.zig_data_const_section_index.?; + break :blk macho_file.zig_const_section_index.?; }, }; return sect_id; From 55c8b82b50a6b1f2d65775f8d2ad313833549067 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 18 Jan 2024 14:36:52 +0100 Subject: [PATCH 085/133] macho: set alignment of pre-allocated sections --- src/link/MachO.zig | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 9ab4a9397e..3fc22255ce 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -3222,29 +3222,36 @@ fn initMetadata(self: *MachO, options: InitMetadataOptions) !void { } }.appendSect; - if (self.zig_text_section_index == null) { + { self.zig_text_section_index = try self.addSection("__TEXT_ZIG", "__text_zig", .{ + .alignment = switch (self.getTarget().cpu.arch) { + .aarch64 => 2, + .x86_64 => 0, + else => unreachable, + }, .flags = macho.S_REGULAR | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS, }); appendSect(self, self.zig_text_section_index.?, self.zig_text_seg_index.?); } - if (self.zig_got_section_index == null and !self.base.isRelocatable()) { - self.zig_got_section_index = try self.addSection("__GOT_ZIG", "__got_zig", .{}); + if (!self.base.isRelocatable()) { + self.zig_got_section_index = try self.addSection("__GOT_ZIG", "__got_zig", .{ + .alignment = 3, + }); appendSect(self, self.zig_got_section_index.?, self.zig_got_seg_index.?); } - if (self.zig_const_section_index == null) { + { self.zig_const_section_index = try self.addSection("__CONST_ZIG", "__const_zig", .{}); appendSect(self, self.zig_const_section_index.?, self.zig_const_seg_index.?); } - if (self.zig_data_section_index == null) { + { self.zig_data_section_index = try self.addSection("__DATA_ZIG", "__data_zig", .{}); appendSect(self, self.zig_data_section_index.?, self.zig_data_seg_index.?); } - if (self.zig_bss_section_index == null) { + { self.zig_bss_section_index = try self.addSection("__BSS_ZIG", "__bss_zig", .{ .flags = macho.S_ZEROFILL, }); @@ -3328,6 +3335,7 @@ pub fn addSegment(self: *MachO, name: []const u8, opts: struct { } const AddSectionOpts = struct { + alignment: u32 = 0, flags: u32 = macho.S_REGULAR, reserved1: u32 = 0, reserved2: u32 = 0, @@ -3346,6 +3354,7 @@ pub fn addSection( .header = .{ .sectname = makeStaticString(sectname), .segname = makeStaticString(segname), + .@"align" = opts.alignment, .flags = opts.flags, .reserved1 = opts.reserved1, .reserved2 = opts.reserved2, From 2169a5559dedd1ea245264a0e642347677af91b8 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 18 Jan 2024 14:59:15 +0100 Subject: [PATCH 086/133] macho: implement more self-hosted primitives --- src/link/MachO.zig | 7 ++- src/link/MachO/Atom.zig | 64 ++++++++++++++++++++ src/link/MachO/ZigObject.zig | 109 ++++++++++++++++++++++++++++++----- 3 files changed, 164 insertions(+), 16 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 3fc22255ce..d0d60a5587 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -20,6 +20,7 @@ sections: std.MultiArrayList(Section) = .{}, symbols: std.ArrayListUnmanaged(Symbol) = .{}, symbols_extra: std.ArrayListUnmanaged(u32) = .{}, +symbols_free_list: std.ArrayListUnmanaged(Symbol.Index) = .{}, globals: std.AutoHashMapUnmanaged(u32, Symbol.Index) = .{}, /// This table will be populated after `scanRelocs` has run. /// Key is symbol index. @@ -327,6 +328,7 @@ pub fn deinit(self: *MachO) void { self.symbols.deinit(gpa); self.symbols_extra.deinit(gpa); + self.symbols_free_list.deinit(gpa); self.globals.deinit(gpa); { var it = self.undefs.iterator(); @@ -3260,9 +3262,8 @@ fn initMetadata(self: *MachO, options: InitMetadataOptions) !void { } pub fn growSection(self: *MachO, sect_index: u8, size: u64) !void { - _ = self; - _ = sect_index; - _ = size; + const sect = &self.sections.items(.header)[sect_index]; + std.debug.print("curr={x}, needed={x}\n", .{ sect.size, size }); @panic("TODO growSection"); } diff --git a/src/link/MachO/Atom.zig b/src/link/MachO/Atom.zig index 374a98021f..5da72886be 100644 --- a/src/link/MachO/Atom.zig +++ b/src/link/MachO/Atom.zig @@ -324,6 +324,70 @@ pub fn grow(self: *Atom, macho_file: *MachO) !void { try self.allocate(macho_file); } +pub fn free(self: *Atom, macho_file: *MachO) void { + log.debug("freeAtom {d} ({s})", .{ self.atom_index, self.getName(macho_file) }); + + const comp = macho_file.base.comp; + const gpa = comp.gpa; + const free_list = &macho_file.sections.items(.free_list)[self.out_n_sect]; + const last_atom_index = &macho_file.sections.items(.last_atom_index)[self.out_n_sect]; + var already_have_free_list_node = false; + { + var i: usize = 0; + // TODO turn free_list into a hash map + while (i < free_list.items.len) { + if (free_list.items[i] == self.atom_index) { + _ = free_list.swapRemove(i); + continue; + } + if (free_list.items[i] == self.prev_index) { + already_have_free_list_node = true; + } + i += 1; + } + } + + if (macho_file.getAtom(last_atom_index.*)) |last_atom| { + if (last_atom.atom_index == self.atom_index) { + if (macho_file.getAtom(self.prev_index)) |_| { + // TODO shrink the section size here + last_atom_index.* = self.prev_index; + } else { + last_atom_index.* = 0; + } + } + } + + if (macho_file.getAtom(self.prev_index)) |prev| { + prev.next_index = self.next_index; + if (!already_have_free_list_node and prev.*.freeListEligible(macho_file)) { + // The free list is heuristics, it doesn't have to be perfect, so we can + // ignore the OOM here. + free_list.append(gpa, prev.atom_index) catch {}; + } + } else { + self.prev_index = 0; + } + + if (macho_file.getAtom(self.next_index)) |next| { + next.prev_index = self.prev_index; + } else { + self.next_index = 0; + } + + // TODO create relocs free list + self.freeRelocs(macho_file); + // TODO figure out how to free input section mappind in ZigModule + // const zig_object = macho_file.zigObjectPtr().? + // assert(zig_object.atoms.swapRemove(self.atom_index)); + self.* = .{}; +} + +pub fn freeRelocs(self: *Atom, macho_file: *MachO) void { + self.getFile(macho_file).zig_object.freeAtomRelocs(self.*); + self.relocs.len = 0; +} + pub fn scanRelocs(self: Atom, macho_file: *MachO) !void { const tracy = trace(@src()); defer tracy.end(); diff --git a/src/link/MachO/ZigObject.zig b/src/link/MachO/ZigObject.zig index c574900bce..ceb9c7fe61 100644 --- a/src/link/MachO/ZigObject.zig +++ b/src/link/MachO/ZigObject.zig @@ -129,6 +129,10 @@ pub fn getAtomRelocs(self: *ZigObject, atom: Atom) []const Relocation { return relocs.items[0..atom.relocs.len]; } +pub fn freeAtomRelocs(self: *ZigObject, atom: Atom) void { + self.relocs.items[atom.relocs.pos].clearRetainingCapacity(); +} + pub fn resolveSymbols(self: *ZigObject, macho_file: *MachO) void { _ = self; _ = macho_file; @@ -263,11 +267,42 @@ pub fn lowerAnonDecl( @panic("TODO lowerAnonDecl"); } -pub fn freeDecl(self: *ZigObject, macho_file: *MachO, decl_index: InternPool.DeclIndex) void { +fn freeUnnamedConsts(self: *ZigObject, macho_file: *MachO, decl_index: InternPool.DeclIndex) void { + const gpa = macho_file.base.comp.gpa; + const unnamed_consts = self.unnamed_consts.getPtr(decl_index) orelse return; + for (unnamed_consts.items) |sym_index| { + self.freeDeclMetadata(macho_file, sym_index); + } + unnamed_consts.clearAndFree(gpa); +} + +fn freeDeclMetadata(self: *ZigObject, macho_file: *MachO, sym_index: Symbol.Index) void { _ = self; - _ = macho_file; - _ = decl_index; - @panic("TODO freeDecl"); + const gpa = macho_file.base.comp.gpa; + const sym = macho_file.getSymbol(sym_index); + sym.getAtom(macho_file).?.free(macho_file); + log.debug("adding %{d} to local symbols free list", .{sym_index}); + macho_file.symbols_free_list.append(gpa, sym_index) catch {}; + macho_file.symbols.items[sym_index] = .{}; + // TODO free GOT entry here +} + +pub fn freeDecl(self: *ZigObject, macho_file: *MachO, decl_index: InternPool.DeclIndex) void { + const gpa = macho_file.base.comp.gpa; + const mod = macho_file.base.comp.module.?; + const decl = mod.declPtr(decl_index); + + log.debug("freeDecl {*}", .{decl}); + + if (self.decls.fetchRemove(decl_index)) |const_kv| { + var kv = const_kv; + const sym_index = kv.value.symbol_index; + self.freeDeclMetadata(macho_file, sym_index); + self.freeUnnamedConsts(macho_file, decl_index); + kv.value.exports.deinit(gpa); + } + + // TODO free decl in dSYM } pub fn updateFunc( @@ -278,13 +313,61 @@ pub fn updateFunc( air: Air, liveness: Liveness, ) !void { - _ = self; - _ = macho_file; - _ = mod; - _ = func_index; - _ = air; - _ = liveness; - @panic("TODO updateFunc"); + const tracy = trace(@src()); + defer tracy.end(); + + const gpa = macho_file.base.comp.gpa; + const func = mod.funcInfo(func_index); + const decl_index = func.owner_decl; + const decl = mod.declPtr(decl_index); + + const sym_index = try self.getOrCreateMetadataForDecl(macho_file, decl_index); + self.freeUnnamedConsts(macho_file, decl_index); + macho_file.getSymbol(sym_index).getAtom(macho_file).?.freeRelocs(macho_file); + + var code_buffer = std.ArrayList(u8).init(gpa); + defer code_buffer.deinit(); + + var decl_state: ?Dwarf.DeclState = null; // TODO: Dwarf + defer if (decl_state) |*ds| ds.deinit(); + + const dio: codegen.DebugInfoOutput = if (decl_state) |*ds| .{ .dwarf = ds } else .none; + const res = try codegen.generateFunction( + &macho_file.base, + decl.srcLoc(mod), + func_index, + air, + liveness, + &code_buffer, + dio, + ); + + const code = switch (res) { + .ok => code_buffer.items, + .fail => |em| { + decl.analysis = .codegen_failure; + try mod.failed_decls.put(mod.gpa, decl_index, em); + return; + }, + }; + + const sect_index = try self.getDeclOutputSection(macho_file, decl, code); + try self.updateDeclCode(macho_file, decl_index, sym_index, sect_index, code); + + // if (decl_state) |*ds| { + // const sym = elf_file.symbol(sym_index); + // try self.dwarf.?.commitDeclState( + // mod, + // decl_index, + // sym.value, + // sym.atom(elf_file).?.size, + // ds, + // ); + // } + + // Since we updated the vaddr and the size, each corresponding export + // symbol also needs to be updated. + return self.updateExports(macho_file, mod, .{ .decl_index = decl_index }, mod.getDeclExports(decl_index)); } pub fn updateDecl( @@ -313,7 +396,7 @@ pub fn updateDecl( } const sym_index = try self.getOrCreateMetadataForDecl(macho_file, decl_index); - // TODO: free relocs if any + macho_file.getSymbol(sym_index).getAtom(macho_file).?.freeRelocs(macho_file); const gpa = macho_file.base.comp.gpa; var code_buffer = std.ArrayList(u8).init(gpa); @@ -431,7 +514,7 @@ fn updateDeclCode( } } else { try atom.allocate(macho_file); - // TODO: freeDeclMetadata in case of error + errdefer self.freeDeclMetadata(macho_file, sym_index); sym.value = 0; sym.flags.needs_zig_got = true; From 6cd4c7612f7d0c1bf21695c29432296823875c46 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 18 Jan 2024 16:52:41 +0100 Subject: [PATCH 087/133] macho: implement growSection mechanism --- src/link/MachO.zig | 56 +++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 53 insertions(+), 3 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index d0d60a5587..239deda1c8 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -3130,6 +3130,16 @@ fn allocatedSize(self: *MachO, start: u64) u64 { return min_pos - start; } +fn allocatedVirtualSize(self: *MachO, start: u64) u64 { + if (start == 0) return 0; + var min_pos: u64 = std.math.maxInt(u64); + for (self.segments.items) |seg| { + if (seg.vmaddr <= start) continue; + if (seg.vmaddr < min_pos) min_pos = seg.vmaddr; + } + return min_pos - start; +} + fn findFreeSpace(self: *MachO, object_size: u64, min_alignment: u32) u64 { var start: u64 = 0; while (self.detectAllocCollision(start, object_size)) |item_end| { @@ -3261,10 +3271,50 @@ fn initMetadata(self: *MachO, options: InitMetadataOptions) !void { } } -pub fn growSection(self: *MachO, sect_index: u8, size: u64) !void { +pub fn growSection(self: *MachO, sect_index: u8, needed_size: u64) !void { const sect = &self.sections.items(.header)[sect_index]; - std.debug.print("curr={x}, needed={x}\n", .{ sect.size, size }); - @panic("TODO growSection"); + const seg_id = self.sections.items(.segment_id)[sect_index]; + const seg = &self.segments.items[seg_id]; + + if (needed_size > self.allocatedSize(sect.offset) and !sect.isZerofill()) { + const existing_size = sect.size; + sect.size = 0; + + // Must move the entire section. + const new_offset = self.findFreeSpace(needed_size, self.getPageSize()); + + log.debug("new '{s},{s}' file offset 0x{x} to 0x{x}", .{ + sect.segName(), + sect.sectName(), + new_offset, + new_offset + existing_size, + }); + + const amt = try self.base.file.?.copyRangeAll(sect.offset, self.base.file.?, new_offset, existing_size); + // TODO figure out what to about this error condition - how to communicate it up. + if (amt != existing_size) return error.InputOutput; + + sect.offset = @intCast(new_offset); + seg.fileoff = new_offset; + } + + sect.size = needed_size; + if (!sect.isZerofill()) { + seg.filesize = needed_size; + } + + const mem_capacity = self.allocatedVirtualSize(seg.vmaddr); + if (needed_size > mem_capacity) { + var err = try self.addErrorWithNotes(2); + try err.addMsg(self, "fatal linker error: cannot expand segment seg({d})({s}) in virtual memory", .{ + seg_id, + seg.segName(), + }); + try err.addNote(self, "TODO: emit relocations to memory locations in self-hosted backends", .{}); + try err.addNote(self, "as a workaround, try increasing pre-allocated virtual memory of each segment", .{}); + } + + seg.vmsize = needed_size; } pub fn getTarget(self: MachO) std.Target { From c98d229844e4b12ba2c61291fdf7fa0376f4e087 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 18 Jan 2024 17:13:19 +0100 Subject: [PATCH 088/133] macho: fill in more blanks in ZigObject --- src/link/MachO/ZigObject.zig | 277 +++++++++++++++++++++++++++++++++-- 1 file changed, 263 insertions(+), 14 deletions(-) diff --git a/src/link/MachO/ZigObject.zig b/src/link/MachO/ZigObject.zig index ceb9c7fe61..2f2434f211 100644 --- a/src/link/MachO/ZigObject.zig +++ b/src/link/MachO/ZigObject.zig @@ -259,12 +259,47 @@ pub fn lowerAnonDecl( explicit_alignment: InternPool.Alignment, src_loc: Module.SrcLoc, ) !codegen.Result { - _ = self; - _ = macho_file; - _ = decl_val; - _ = explicit_alignment; - _ = src_loc; - @panic("TODO lowerAnonDecl"); + const gpa = macho_file.base.comp.gpa; + const mod = macho_file.base.comp.module.?; + const ty = Type.fromInterned(mod.intern_pool.typeOf(decl_val)); + const decl_alignment = switch (explicit_alignment) { + .none => ty.abiAlignment(mod), + else => explicit_alignment, + }; + if (self.anon_decls.get(decl_val)) |metadata| { + const existing_alignment = macho_file.getSymbol(metadata.symbol_index).getAtom(macho_file).?.alignment; + if (decl_alignment.order(existing_alignment).compare(.lte)) + return .ok; + } + + const val = Value.fromInterned(decl_val); + const tv = TypedValue{ .ty = ty, .val = val }; + var name_buf: [32]u8 = undefined; + const name = std.fmt.bufPrint(&name_buf, "__anon_{d}", .{ + @intFromEnum(decl_val), + }) catch unreachable; + const res = self.lowerConst( + macho_file, + name, + tv, + decl_alignment, + macho_file.zig_const_section_index.?, + src_loc, + ) catch |err| switch (err) { + error.OutOfMemory => return error.OutOfMemory, + else => |e| return .{ .fail = try Module.ErrorMsg.create( + gpa, + src_loc, + "unable to lower constant value: {s}", + .{@errorName(e)}, + ) }, + }; + const sym_index = switch (res) { + .ok => |sym_index| sym_index, + .fail => |em| return .{ .fail = em }, + }; + try self.anon_decls.put(gpa, decl_val, .{ .symbol_index = sym_index }); + return .ok; } fn freeUnnamedConsts(self: *ZigObject, macho_file: *MachO, decl_index: InternPool.DeclIndex) void { @@ -591,11 +626,100 @@ pub fn lowerUnnamedConst( typed_value: TypedValue, decl_index: InternPool.DeclIndex, ) !u32 { - _ = self; - _ = macho_file; - _ = typed_value; - _ = decl_index; - @panic("TODO lowerUnnamedConst"); + const gpa = macho_file.base.comp.gpa; + const mod = macho_file.base.comp.module.?; + const gop = try self.unnamed_consts.getOrPut(gpa, decl_index); + if (!gop.found_existing) { + gop.value_ptr.* = .{}; + } + const unnamed_consts = gop.value_ptr; + const decl = mod.declPtr(decl_index); + const decl_name = mod.intern_pool.stringToSlice(try decl.getFullyQualifiedName(mod)); + const index = unnamed_consts.items.len; + const name = try std.fmt.allocPrint(gpa, "__unnamed_{s}_{d}", .{ decl_name, index }); + defer gpa.free(name); + const sym_index = switch (try self.lowerConst( + macho_file, + name, + typed_value, + typed_value.ty.abiAlignment(mod), + macho_file.zig_const_section_index.?, + decl.srcLoc(mod), + )) { + .ok => |sym_index| sym_index, + .fail => |em| { + decl.analysis = .codegen_failure; + try mod.failed_decls.put(mod.gpa, decl_index, em); + log.err("{s}", .{em.msg}); + return error.CodegenFail; + }, + }; + const sym = macho_file.getSymbol(sym_index); + try unnamed_consts.append(gpa, sym.atom); + return sym_index; +} + +const LowerConstResult = union(enum) { + ok: Symbol.Index, + fail: *Module.ErrorMsg, +}; + +fn lowerConst( + self: *ZigObject, + macho_file: *MachO, + name: []const u8, + tv: TypedValue, + required_alignment: InternPool.Alignment, + output_section_index: u8, + src_loc: Module.SrcLoc, +) !LowerConstResult { + const gpa = macho_file.base.comp.gpa; + + var code_buffer = std.ArrayList(u8).init(gpa); + defer code_buffer.deinit(); + + const sym_index = try self.addAtom(macho_file); + + const res = try codegen.generateSymbol(&macho_file.base, src_loc, tv, &code_buffer, .{ + .none = {}, + }, .{ + .parent_atom_index = sym_index, + }); + const code = switch (res) { + .ok => code_buffer.items, + .fail => |em| return .{ .fail = em }, + }; + + const sym = macho_file.getSymbol(sym_index); + const name_str_index = try macho_file.strings.insert(gpa, name); + sym.name = name_str_index; + sym.out_n_sect = output_section_index; + + const nlist = &self.symtab.items(.nlist)[sym.nlist_idx]; + nlist.n_strx = name_str_index; + nlist.n_type = macho.N_SECT; + nlist.n_sect = output_section_index + 1; + self.symtab.items(.size)[sym.nlist_idx] = code.len; + + const atom = sym.getAtom(macho_file).?; + atom.flags.alive = true; + atom.name = name_str_index; + atom.alignment = required_alignment; + atom.size = code.len; + atom.out_n_sect = output_section_index; + + try atom.allocate(macho_file); + // TODO rename and re-audit this method + errdefer self.freeDeclMetadata(macho_file, sym_index); + + sym.value = 0; + nlist.n_value = 0; + + const sect = macho_file.sections.items(.header)[output_section_index]; + const file_offset = sect.offset + atom.value - sect.addr; + try macho_file.base.file.?.pwriteAll(code, file_offset); + + return .{ .ok = sym_index }; } pub fn updateExports( @@ -692,6 +816,91 @@ pub fn updateExports( } } +fn updateLazySymbol( + self: *ZigObject, + macho_file: *MachO, + lazy_sym: link.File.LazySymbol, + symbol_index: Symbol.Index, +) !void { + const gpa = macho_file.base.comp.gpa; + const mod = macho_file.base.comp.module.?; + + var required_alignment: InternPool.Alignment = .none; + var code_buffer = std.ArrayList(u8).init(gpa); + defer code_buffer.deinit(); + + const name_str_index = blk: { + const name = try std.fmt.allocPrint(gpa, "__lazy_{s}_{}", .{ + @tagName(lazy_sym.kind), + lazy_sym.ty.fmt(mod), + }); + defer gpa.free(name); + break :blk try macho_file.strings.insert(gpa, name); + }; + + const src = if (lazy_sym.ty.getOwnerDeclOrNull(mod)) |owner_decl| + mod.declPtr(owner_decl).srcLoc(mod) + else + Module.SrcLoc{ + .file_scope = undefined, + .parent_decl_node = undefined, + .lazy = .unneeded, + }; + const res = try codegen.generateLazySymbol( + &macho_file.base, + src, + lazy_sym, + &required_alignment, + &code_buffer, + .none, + .{ .parent_atom_index = symbol_index }, + ); + const code = switch (res) { + .ok => code_buffer.items, + .fail => |em| { + log.err("{s}", .{em.msg}); + return error.CodegenFail; + }, + }; + + const output_section_index = switch (lazy_sym.kind) { + .code => macho_file.zig_text_section_index.?, + .const_data => macho_file.zig_const_section_index.?, + }; + const sym = macho_file.getSymbol(symbol_index); + sym.name = name_str_index; + sym.out_n_sect = output_section_index; + + const nlist = &self.symtab.items(.nlist)[sym.nlist_idx]; + nlist.n_strx = name_str_index; + nlist.n_type = macho.N_SECT; + nlist.n_sect = output_section_index + 1; + self.symtab.items(.size)[sym.nlist_idx] = code.len; + + const atom = sym.getAtom(macho_file).?; + atom.flags.alive = true; + atom.name = name_str_index; + atom.alignment = required_alignment; + atom.size = code.len; + atom.out_n_sect = output_section_index; + + try atom.allocate(macho_file); + errdefer self.freeDeclMetadata(macho_file, symbol_index); + + sym.value = 0; + sym.flags.needs_zig_got = true; + nlist.st_value = 0; + + if (!macho_file.base.isRelocatable()) { + const gop = try sym.getOrCreateZigGotEntry(symbol_index, macho_file); + try macho_file.zig_got.writeOne(macho_file, gop.index); + } + + const sect = macho_file.sections.items(.header)[output_section_index]; + const file_offset = sect.offset + atom.value - sect.addr; + try macho_file.base.file.?.pwriteAll(code, file_offset); +} + /// Must be called only after a successful call to `updateDecl`. pub fn updateDeclLineNumber( self: *ZigObject, @@ -701,7 +910,7 @@ pub fn updateDeclLineNumber( _ = self; _ = mod; _ = decl_index; - @panic("TODO updateDeclLineNumber"); + // TODO: Dwarf } pub fn deleteDeclExport( @@ -751,6 +960,46 @@ pub fn getOrCreateMetadataForDecl( return gop.value_ptr.symbol_index; } +pub fn getOrCreateMetadataForLazySymbol( + self: *ZigObject, + macho_file: *MachO, + lazy_sym: link.File.LazySymbol, +) !Symbol.Index { + const gpa = macho_file.base.comp.gpa; + const mod = macho_file.base.comp.module.?; + const gop = try self.lazy_syms.getOrPut(gpa, lazy_sym.getDecl(mod)); + errdefer _ = if (!gop.found_existing) self.lazy_syms.pop(); + if (!gop.found_existing) gop.value_ptr.* = .{}; + const metadata: struct { + symbol_index: *Symbol.Index, + state: *LazySymbolMetadata.State, + } = switch (lazy_sym.kind) { + .code => .{ + .symbol_index = &gop.value_ptr.text_symbol_index, + .state = &gop.value_ptr.text_state, + }, + .const_data => .{ + .symbol_index = &gop.value_ptr.const_symbol_index, + .state = &gop.value_ptr.const_state, + }, + }; + switch (metadata.state.*) { + .unused => { + const symbol_index = try self.addAtom(macho_file); + const sym = macho_file.getSymbol(symbol_index); + sym.flags.needs_zig_got = true; + metadata.symbol_index.* = symbol_index; + }, + .pending_flush => return metadata.symbol_index.*, + .flushed => {}, + } + metadata.state.* = .pending_flush; + const symbol_index = metadata.symbol_index.*; + // anyerror needs to be deferred until flushModule + if (lazy_sym.getDecl(mod) != .none) try self.updateLazySymbol(macho_file, lazy_sym, symbol_index); + return symbol_index; +} + pub fn asFile(self: *ZigObject) File { return .{ .zig_object = self }; } @@ -822,9 +1071,9 @@ const DeclMetadata = struct { const LazySymbolMetadata = struct { const State = enum { unused, pending_flush, flushed }; text_symbol_index: Symbol.Index = undefined, - data_const_symbol_index: Symbol.Index = undefined, + const_symbol_index: Symbol.Index = undefined, text_state: State = .unused, - rodata_state: State = .unused, + const_state: State = .unused, }; const DeclTable = std.AutoHashMapUnmanaged(InternPool.DeclIndex, DeclMetadata); From ee463efdf2e662fb4970aa6e9ba43d165e0cdfc7 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 18 Jan 2024 17:24:07 +0100 Subject: [PATCH 089/133] macho: fill in more blanks in ZigObject --- src/link/MachO.zig | 2 +- src/link/MachO/ZigObject.zig | 46 ++++++++++++++++++++++++++++-------- 2 files changed, 37 insertions(+), 11 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 239deda1c8..165dbbe17a 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -21,7 +21,7 @@ sections: std.MultiArrayList(Section) = .{}, symbols: std.ArrayListUnmanaged(Symbol) = .{}, symbols_extra: std.ArrayListUnmanaged(u32) = .{}, symbols_free_list: std.ArrayListUnmanaged(Symbol.Index) = .{}, -globals: std.AutoHashMapUnmanaged(u32, Symbol.Index) = .{}, +globals: std.AutoArrayHashMapUnmanaged(u32, Symbol.Index) = .{}, /// This table will be populated after `scanRelocs` has run. /// Key is symbol index. undefs: std.AutoHashMapUnmanaged(Symbol.Index, std.ArrayListUnmanaged(Atom.Index)) = .{}, diff --git a/src/link/MachO/ZigObject.zig b/src/link/MachO/ZigObject.zig index 2f2434f211..2ee829f06c 100644 --- a/src/link/MachO/ZigObject.zig +++ b/src/link/MachO/ZigObject.zig @@ -6,6 +6,7 @@ symtab: std.MultiArrayList(Nlist) = .{}, symbols: std.ArrayListUnmanaged(Symbol.Index) = .{}, atoms: std.ArrayListUnmanaged(Atom.Index) = .{}, +globals_lookup: std.AutoHashMapUnmanaged(u32, Symbol.Index) = .{}, /// Table of tracked LazySymbols. lazy_syms: LazySymbolTable = .{}, @@ -53,6 +54,7 @@ pub fn deinit(self: *ZigObject, allocator: Allocator) void { self.symtab.deinit(allocator); self.symbols.deinit(allocator); self.atoms.deinit(allocator); + self.globals_lookup.deinit(allocator); { var it = self.decls.iterator(); @@ -918,20 +920,44 @@ pub fn deleteDeclExport( macho_file: *MachO, decl_index: InternPool.DeclIndex, name: InternPool.NullTerminatedString, -) void { - _ = self; - _ = macho_file; - _ = decl_index; - _ = name; - @panic("TODO deleteDeclExport"); +) Allocator.Error!void { + const metadata = self.decls.getPtr(decl_index) orelse return; + + const gpa = macho_file.base.comp.gpa; + const mod = macho_file.base.comp.module.?; + const exp_name = try std.fmt.allocPrint(gpa, "_{s}", .{mod.intern_pool.stringToSlice(name)}); + defer gpa.free(exp_name); + const nlist_index = metadata.@"export"(self, macho_file, exp_name) orelse return; + + log.debug("deleting export '{s}'", .{exp_name}); + + const nlist = &self.symtab.items(.nlist)[nlist_index.*]; + self.symtab.items(.size)[nlist_index.*] = 0; + _ = self.globals_lookup.remove(nlist.n_strx); + const sym_index = macho_file.globals.get(nlist.n_strx).?; + const sym = macho_file.getSymbol(sym_index); + if (sym.file == self.index) { + _ = macho_file.globals.swapRemove(nlist.n_strx); + sym.* = .{}; + } + nlist.* = MachO.null_sym; } pub fn getGlobalSymbol(self: *ZigObject, macho_file: *MachO, name: []const u8, lib_name: ?[]const u8) !u32 { - _ = self; - _ = macho_file; - _ = name; _ = lib_name; - @panic("TODO getGlobalSymbol"); + const gpa = macho_file.base.comp.gpa; + const off = try macho_file.strings.insert(gpa, name); + const lookup_gop = try self.globals_lookup.getOrPut(gpa, off); + if (!lookup_gop.found_existing) { + const nlist_index = try self.addNlist(gpa); + const nlist = &self.symtab.items(.nlist)[nlist_index]; + nlist.n_strx = off; + nlist.n_type = macho.N_EXT; + lookup_gop.value_ptr.* = nlist_index; + const gop = try macho_file.getOrCreateGlobal(off); + try self.symbols.append(gpa, gop.index); + } + return lookup_gop.value_ptr.*; } pub fn getOrCreateMetadataForDecl( From 82e92fe5f605a0ef8fa53e8434458bf18deadb5f Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 18 Jan 2024 17:42:22 +0100 Subject: [PATCH 090/133] macho: fill the rest of ZigObject --- src/link/MachO/Atom.zig | 9 +++++++ src/link/MachO/ZigObject.zig | 46 ++++++++++++++++++++++++++++-------- 2 files changed, 45 insertions(+), 10 deletions(-) diff --git a/src/link/MachO/Atom.zig b/src/link/MachO/Atom.zig index 5da72886be..25cbe6ca64 100644 --- a/src/link/MachO/Atom.zig +++ b/src/link/MachO/Atom.zig @@ -383,6 +383,15 @@ pub fn free(self: *Atom, macho_file: *MachO) void { self.* = .{}; } +pub fn addReloc(self: *Atom, macho_file: *MachO, reloc: Relocation) !void { + const gpa = macho_file.base.comp.gpa; + const file = self.getFile(macho_file); + assert(file == .zig_object); + const rels = &file.zig_object.relocs.items[self.relocs.pos]; + try rels.append(gpa, reloc); + self.relocs.len += 1; +} + pub fn freeRelocs(self: *Atom, macho_file: *MachO) void { self.getFile(macho_file).zig_object.freeAtomRelocs(self.*); self.relocs.len = 0; diff --git a/src/link/MachO/ZigObject.zig b/src/link/MachO/ZigObject.zig index 2ee829f06c..0199c0707c 100644 --- a/src/link/MachO/ZigObject.zig +++ b/src/link/MachO/ZigObject.zig @@ -234,11 +234,24 @@ pub fn getDeclVAddr( decl_index: InternPool.DeclIndex, reloc_info: link.File.RelocInfo, ) !u64 { - _ = self; - _ = macho_file; - _ = decl_index; - _ = reloc_info; - @panic("TODO getDeclVAddr"); + const sym_index = try self.getOrCreateMetadataForDecl(macho_file, decl_index); + const sym = macho_file.getSymbol(sym_index); + const vaddr = sym.getAddress(.{}, macho_file); + const parent_atom = macho_file.getSymbol(reloc_info.parent_atom_index).getAtom(macho_file).?; + try parent_atom.addReloc(macho_file, .{ + .tag = .@"extern", + .offset = @intCast(reloc_info.offset), + .target = sym.nlist_idx, + .addend = reloc_info.addend, + .type = .unsigned, + .meta = .{ + .pcrel = false, + .has_subtractor = false, + .length = 3, + .symbolnum = @intCast(sym.nlist_idx), + }, + }); + return vaddr; } pub fn getAnonDeclVAddr( @@ -247,11 +260,24 @@ pub fn getAnonDeclVAddr( decl_val: InternPool.Index, reloc_info: link.File.RelocInfo, ) !u64 { - _ = self; - _ = macho_file; - _ = decl_val; - _ = reloc_info; - @panic("TODO getAnonDeclVAddr"); + const sym_index = self.anon_decls.get(decl_val).?.symbol_index; + const sym = macho_file.getSymbol(sym_index); + const vaddr = sym.getAddress(.{}, macho_file); + const parent_atom = macho_file.getSymbol(reloc_info.parent_atom_index).getAtom(macho_file).?; + try parent_atom.addReloc(macho_file, .{ + .tag = .@"extern", + .offset = @intCast(reloc_info.offset), + .target = sym.nlist_idx, + .addend = reloc_info.addend, + .type = .unsigned, + .meta = .{ + .pcrel = false, + .has_subtractor = false, + .length = 3, + .symbolnum = @intCast(sym.nlist_idx), + }, + }); + return vaddr; } pub fn lowerAnonDecl( From 76dc305d4e71a9a4c9de92e6dde40a53eac1e328 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 18 Jan 2024 17:55:49 +0100 Subject: [PATCH 091/133] codegen: re-enable MachO support in genDeclRef --- src/codegen.zig | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/src/codegen.zig b/src/codegen.zig index f9263c2a69..83d0215cfa 100644 --- a/src/codegen.zig +++ b/src/codegen.zig @@ -984,22 +984,24 @@ fn genDeclRef( } return GenResult.mcv(.{ .load_symbol = sym.esym_index }); } else if (lf.cast(link.File.MachO)) |macho_file| { - _ = macho_file; if (is_extern) { - // TODO make this part of getGlobalSymbol - // const name = zcu.intern_pool.stringToSlice(decl.name); - // const sym_name = try std.fmt.allocPrint(gpa, "_{s}", .{name}); - // defer gpa.free(sym_name); - // const global_index = try macho_file.addUndefined(sym_name, .{ .add_got = true }); - // return GenResult.mcv(.{ .load_got = link.File.MachO.global_symbol_bit | global_index }); + const name = zcu.intern_pool.stringToSlice(decl.name); + const sym_name = try std.fmt.allocPrint(gpa, "_{s}", .{name}); + defer gpa.free(sym_name); + const lib_name = if (decl.getOwnedVariable(zcu)) |ov| + zcu.intern_pool.stringToSliceUnwrap(ov.lib_name) + else + null; + const sym_index = try macho_file.getGlobalSymbol(sym_name, lib_name); + macho_file.getSymbol(sym_index).flags.needs_got = true; + return GenResult.mcv(.{ .load_symbol = sym_index }); } - // const atom_index = try macho_file.getOrCreateAtomForDecl(decl_index); - // const sym_index = macho_file.getAtom(atom_index).getSymbolIndex().?; - // if (is_threadlocal) { - // return GenResult.mcv(.{ .load_tlv = sym_index }); - // } - // return GenResult.mcv(.{ .load_got = sym_index }); - @panic("TODO genDeclRef"); + const sym_index = try macho_file.getZigObject().?.getOrCreateMetadataForDecl(macho_file, decl_index); + const sym = macho_file.getSymbol(sym_index); + if (is_threadlocal) { + return GenResult.mcv(.{ .load_tlv = sym.nlist_idx }); + } + return GenResult.mcv(.{ .load_symbol = sym.nlist_idx }); } else if (lf.cast(link.File.Coff)) |coff_file| { if (is_extern) { const name = zcu.intern_pool.stringToSlice(decl.name); From 96cc9fafbf0a382c0ed0b6142986cd8373cffaa3 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 18 Jan 2024 18:58:29 +0100 Subject: [PATCH 092/133] codegen: re-implement enough of codegen to error out instead panic --- src/arch/x86_64/CodeGen.zig | 90 ++++++------------ src/arch/x86_64/Emit.zig | 86 +++++++++-------- src/arch/x86_64/Lower.zig | 169 ++++++++++++++++++---------------- src/codegen.zig | 7 +- src/link/MachO.zig | 2 +- src/link/MachO/Atom.zig | 6 ++ src/link/MachO/Relocation.zig | 2 + src/link/MachO/ZigObject.zig | 37 ++++---- 8 files changed, 197 insertions(+), 202 deletions(-) diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index 870b6a2472..2b4932c5c2 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -139,10 +139,7 @@ const Owner = union(enum) { if (ctx.bin_file.cast(link.File.Elf)) |elf_file| { return elf_file.zigObjectPtr().?.getOrCreateMetadataForDecl(elf_file, decl_index); } else if (ctx.bin_file.cast(link.File.MachO)) |macho_file| { - _ = macho_file; - // const atom = try macho_file.getOrCreateAtomForDecl(decl_index); - // return macho_file.getAtom(atom).getSymbolIndex().?; - @panic("TODO getSymbolIndex"); + return macho_file.getZigObject().?.getOrCreateMetadataForDecl(macho_file, decl_index); } else if (ctx.bin_file.cast(link.File.Coff)) |coff_file| { const atom = try coff_file.getOrCreateAtomForDecl(decl_index); return coff_file.getAtom(atom).getSymbolIndex().?; @@ -155,11 +152,8 @@ const Owner = union(enum) { return elf_file.zigObjectPtr().?.getOrCreateMetadataForLazySymbol(elf_file, lazy_sym) catch |err| ctx.fail("{s} creating lazy symbol", .{@errorName(err)}); } else if (ctx.bin_file.cast(link.File.MachO)) |macho_file| { - _ = macho_file; - // const atom = macho_file.getOrCreateAtomForLazySymbol(lazy_sym) catch |err| - // return ctx.fail("{s} creating lazy symbol", .{@errorName(err)}); - // return macho_file.getAtom(atom).getSymbolIndex().?; - @panic("TODO getSymbolIndex"); + return macho_file.getZigObject().?.getOrCreateMetadataForLazySymbol(macho_file, lazy_sym) catch |err| + ctx.fail("{s} creating lazy symbol", .{@errorName(err)}); } else if (ctx.bin_file.cast(link.File.Coff)) |coff_file| { const atom = coff_file.getOrCreateAtomForLazySymbol(lazy_sym) catch |err| return ctx.fail("{s} creating lazy symbol", .{@errorName(err)}); @@ -10955,12 +10949,10 @@ fn genCall(self: *Self, info: union(enum) { try self.genSetReg(.rax, Type.usize, .{ .lea_got = sym_index }); try self.asmRegister(.{ ._, .call }, .rax); } else if (self.bin_file.cast(link.File.MachO)) |macho_file| { - _ = macho_file; - @panic("TODO genCall"); - // const atom = try macho_file.getOrCreateAtomForDecl(func.owner_decl); - // const sym_index = macho_file.getAtom(atom).getSymbolIndex().?; - // try self.genSetReg(.rax, Type.usize, .{ .lea_got = sym_index }); - // try self.asmRegister(.{ ._, .call }, .rax); + const sym_index = try macho_file.getZigObject().?.getOrCreateMetadataForDecl(macho_file, func.owner_decl); + const sym = macho_file.getSymbol(sym_index); + try self.genSetReg(.rax, Type.usize, .{ .load_symbol = .{ .sym = sym.nlist_idx } }); + try self.asmRegister(.{ ._, .call }, .rax); } else if (self.bin_file.cast(link.File.Plan9)) |p9| { const atom_index = try p9.seeDecl(func.owner_decl); const atom = p9.getAtom(atom_index); @@ -13556,30 +13548,7 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr } }, }); }, - .lea_tlv => |sym_index| { - const atom_index = try self.owner.getSymbolIndex(self); - if (self.bin_file.cast(link.File.MachO)) |_| { - _ = try self.addInst(.{ - .tag = .lea, - .ops = .tlv_reloc, - .data = .{ .rx = .{ - .r1 = .rdi, - .payload = try self.addExtra(bits.Symbol{ - .atom_index = atom_index, - .sym_index = sym_index, - }), - } }, - }); - // TODO: spill registers before calling - try self.asmMemory(.{ ._, .call }, .{ - .base = .{ .reg = .rdi }, - .mod = .{ .rm = .{ .size = .qword } }, - }); - try self.genSetReg(dst_reg.to64(), Type.usize, .{ .register = .rax }); - } else return self.fail("TODO emit ptr to TLV sequence on {s}", .{ - @tagName(self.bin_file.tag), - }); - }, + .lea_tlv => unreachable, // TODO: remove this .air_ref => |src_ref| try self.genSetReg(dst_reg, ty, try self.resolveInst(src_ref)), } } @@ -13816,19 +13785,14 @@ fn genExternSymbolRef( else => unreachable, } } else if (self.bin_file.cast(link.File.MachO)) |macho_file| { - const global_index = try macho_file.getGlobalSymbol(callee, lib); _ = try self.addInst(.{ .tag = .call, .ops = .extern_fn_reloc, - .data = .{ - .reloc = .{ - .atom_index = atom_index, - // .sym_index = link.File.MachO.global_symbol_bit | global_index, - .sym_index = global_index, - }, - }, + .data = .{ .reloc = .{ + .atom_index = atom_index, + .sym_index = try macho_file.getGlobalSymbol(callee, lib), + } }, }); - @panic("TODO genExternSymbolRef"); } else return self.fail("TODO implement calling extern functions", .{}); } @@ -13916,21 +13880,19 @@ fn genLazySymbolRef( else => unreachable, } } else if (self.bin_file.cast(link.File.MachO)) |macho_file| { - _ = macho_file; - @panic("TODO genLazySymbolRef"); - // const atom_index = macho_file.getOrCreateAtomForLazySymbol(lazy_sym) catch |err| - // return self.fail("{s} creating lazy symbol", .{@errorName(err)}); - // const sym_index = macho_file.getAtom(atom_index).getSymbolIndex().?; - // switch (tag) { - // .lea, .call => try self.genSetReg(reg, Type.usize, .{ .lea_got = sym_index }), - // .mov => try self.genSetReg(reg, Type.usize, .{ .load_got = sym_index }), - // else => unreachable, - // } - // switch (tag) { - // .lea, .mov => {}, - // .call => try self.asmRegister(.{ ._, .call }, reg), - // else => unreachable, - // } + const sym_index = macho_file.getZigObject().?.getOrCreateMetadataForLazySymbol(macho_file, lazy_sym) catch |err| + return self.fail("{s} creating lazy symbol", .{@errorName(err)}); + const sym = macho_file.getSymbol(sym_index); + switch (tag) { + .lea, .call => try self.genSetReg(reg, Type.usize, .{ .load_symbol = .{ .sym = sym.nlist_idx } }), + .mov => try self.genSetReg(reg, Type.usize, .{ .load_symbol = .{ .sym = sym.nlist_idx } }), + else => unreachable, + } + switch (tag) { + .lea, .mov => {}, + .call => try self.asmRegister(.{ ._, .call }, reg), + else => unreachable, + } } else { return self.fail("TODO implement genLazySymbol for x86_64 {s}", .{@tagName(self.bin_file.tag)}); } @@ -16103,6 +16065,8 @@ fn resolveInst(self: *Self, ref: Air.Inst.Ref) InnerError!MCValue { .{ .lea_symbol = .{ .sym = tlv_sym } }, ); break :init .{ .load_frame = .{ .index = frame_index } }; + } else if (self.bin_file.cast(link.File.MachO)) |_| { + return self.fail("TODO implement lowering TLV variable to stack", .{}); } else break :init const_mcv, else => break :init const_mcv, } diff --git a/src/arch/x86_64/Emit.zig b/src/arch/x86_64/Emit.zig index 97c6cdfc1b..9cadef1ef6 100644 --- a/src/arch/x86_64/Emit.zig +++ b/src/arch/x86_64/Emit.zig @@ -49,23 +49,21 @@ pub fn emitMir(emit: *Emit) Error!void { .r_addend = -4, }); } else if (emit.lower.bin_file.cast(link.File.MachO)) |macho_file| { - _ = macho_file; - @panic("TODO emitMir"); - // // Add relocation to the decl. - // const atom_index = - // macho_file.getAtomIndexForSymbol(.{ .sym_index = symbol.atom_index }).?; - // const target = if (link.File.MachO.global_symbol_bit & symbol.sym_index != 0) - // macho_file.getGlobalByIndex(link.File.MachO.global_symbol_mask & symbol.sym_index) - // else - // link.File.MachO.SymbolWithLoc{ .sym_index = symbol.sym_index }; - // try link.File.MachO.Atom.addRelocation(macho_file, atom_index, .{ - // .type = .branch, - // .target = target, - // .offset = end_offset - 4, - // .addend = 0, - // .pcrel = true, - // .length = 2, - // }); + // Add relocation to the decl. + const atom = macho_file.getSymbol(symbol.atom_index).getAtom(macho_file).?; + try atom.addReloc(macho_file, .{ + .tag = .@"extern", + .offset = end_offset - 4, + .target = symbol.sym_index, + .addend = 0, + .type = .branch, + .meta = .{ + .pcrel = true, + .has_subtractor = false, + .length = 2, + .symbolnum = 0, + }, + }); } else if (emit.lower.bin_file.cast(link.File.Coff)) |coff_file| { // Add relocation to the decl. const atom_index = coff_file.getAtomIndexForSymbol( @@ -151,6 +149,36 @@ pub fn emitMir(emit: *Emit) Error!void { }); } } + } else if (emit.lower.bin_file.cast(link.File.MachO)) |macho_file| { + const is_obj_or_static_lib = switch (emit.lower.output_mode) { + .Exe => false, + .Obj => true, + .Lib => emit.lower.link_mode == .Static, + }; + const atom = macho_file.getSymbol(data.atom_index).getAtom(macho_file).?; + const sym = macho_file.getSymbol(data.sym_index); + if (sym.flags.needs_zig_got and !is_obj_or_static_lib) { + _ = try sym.getOrCreateZigGotEntry(data.sym_index, macho_file); + } + const @"type": link.File.MachO.Relocation.Type = if (sym.flags.needs_zig_got and !is_obj_or_static_lib) + .zig_got_load + else if (sym.flags.needs_got) + .got_load + else + .signed; + try atom.addReloc(macho_file, .{ + .tag = .@"extern", + .offset = @intCast(end_offset - 4), + .target = data.sym_index, + .addend = 0, + .type = @"type", + .meta = .{ + .pcrel = true, + .has_subtractor = false, + .length = 2, + .symbolnum = 0, + }, + }); } else unreachable, .linker_got, .linker_direct, @@ -158,28 +186,8 @@ pub fn emitMir(emit: *Emit) Error!void { .linker_tlv, => |symbol| if (emit.lower.bin_file.cast(link.File.Elf)) |_| { unreachable; - } else if (emit.lower.bin_file.cast(link.File.MachO)) |macho_file| { - _ = macho_file; - @panic("TODO emitMir"); - // const atom_index = - // macho_file.getAtomIndexForSymbol(.{ .sym_index = symbol.atom_index }).?; - // const target = if (link.File.MachO.global_symbol_bit & symbol.sym_index != 0) - // macho_file.getGlobalByIndex(link.File.MachO.global_symbol_mask & symbol.sym_index) - // else - // link.File.MachO.SymbolWithLoc{ .sym_index = symbol.sym_index }; - // try link.File.MachO.Atom.addRelocation(macho_file, atom_index, .{ - // .type = switch (lowered_relocs[0].target) { - // .linker_got => .got, - // .linker_direct => .signed, - // .linker_tlv => .tlv, - // else => unreachable, - // }, - // .target = target, - // .offset = @intCast(end_offset - 4), - // .addend = 0, - // .pcrel = true, - // .length = 2, - // }); + } else if (emit.lower.bin_file.cast(link.File.MachO)) |_| { + unreachable; } else if (emit.lower.bin_file.cast(link.File.Coff)) |coff_file| { const atom_index = coff_file.getAtomIndexForSymbol(.{ .sym_index = symbol.atom_index, diff --git a/src/arch/x86_64/Lower.zig b/src/arch/x86_64/Lower.zig index cc5ae7712b..eb3ed88b37 100644 --- a/src/arch/x86_64/Lower.zig +++ b/src/arch/x86_64/Lower.zig @@ -14,7 +14,7 @@ result_relocs_len: u8 = undefined, result_insts: [ std.mem.max(usize, &.{ 1, // non-pseudo instructions - 3, // TLS local dynamic (LD) sequence in PIC mode + 3, // (ELF only) TLS local dynamic (LD) sequence in PIC mode 2, // cmovcc: cmovcc \ cmovcc 3, // setcc: setcc \ setcc \ logicop 2, // jcc: jcc \ jcc @@ -32,7 +32,7 @@ result_relocs: [ 2, // jcc: jcc \ jcc 2, // test \ jcc \ probe \ sub \ jmp 1, // probe \ sub \ jcc - 3, // TLS local dynamic (LD) sequence in PIC mode + 3, // (ELF only) TLS local dynamic (LD) sequence in PIC mode }) ]Reloc = undefined, @@ -326,18 +326,6 @@ fn reloc(lower: *Lower, target: Reloc.Target) Immediate { return Immediate.s(0); } -fn needsZigGot(sym: bits.Symbol, ctx: *link.File) bool { - const elf_file = ctx.cast(link.File.Elf).?; - const sym_index = elf_file.zigObjectPtr().?.symbol(sym.sym_index); - return elf_file.symbol(sym_index).flags.needs_zig_got; -} - -fn isTls(sym: bits.Symbol, ctx: *link.File) bool { - const elf_file = ctx.cast(link.File.Elf).?; - const sym_index = elf_file.zigObjectPtr().?.symbol(sym.sym_index); - return elf_file.symbol(sym_index).flags.is_tls; -} - fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand) Error!void { const is_obj_or_static_lib = switch (lower.output_mode) { .Exe => false, @@ -359,80 +347,101 @@ fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand) assert(mem_op.sib.disp == 0); assert(mem_op.sib.scale_index.scale == 0); - if (isTls(sym, lower.bin_file)) { - // TODO handle extern TLS vars, i.e., emit GD model - if (lower.pic) { - // Here, we currently assume local dynamic TLS vars, and so - // we emit LD model. - _ = lower.reloc(.{ .linker_tlsld = sym }); - lower.result_insts[lower.result_insts_len] = - try Instruction.new(.none, .lea, &[_]Operand{ - .{ .reg = .rdi }, - .{ .mem = Memory.rip(mem_op.sib.ptr_size, 0) }, - }); - lower.result_insts_len += 1; - if (lower.bin_file.cast(link.File.Elf)) |elf_file| { + if (lower.bin_file.cast(link.File.Elf)) |elf_file| { + const sym_index = elf_file.zigObjectPtr().?.symbol(sym.sym_index); + const elf_sym = elf_file.symbol(sym_index); + + if (elf_sym.flags.is_tls) { + // TODO handle extern TLS vars, i.e., emit GD model + if (lower.pic) { + // Here, we currently assume local dynamic TLS vars, and so + // we emit LD model. + _ = lower.reloc(.{ .linker_tlsld = sym }); + lower.result_insts[lower.result_insts_len] = + try Instruction.new(.none, .lea, &[_]Operand{ + .{ .reg = .rdi }, + .{ .mem = Memory.rip(mem_op.sib.ptr_size, 0) }, + }); + lower.result_insts_len += 1; _ = lower.reloc(.{ .linker_extern_fn = .{ .atom_index = sym.atom_index, .sym_index = try elf_file.getGlobalSymbol("__tls_get_addr", null), } }); + lower.result_insts[lower.result_insts_len] = + try Instruction.new(.none, .call, &[_]Operand{ + .{ .imm = Immediate.s(0) }, + }); + lower.result_insts_len += 1; + _ = lower.reloc(.{ .linker_dtpoff = sym }); + emit_mnemonic = .lea; + break :op .{ .mem = Memory.sib(mem_op.sib.ptr_size, .{ + .base = .{ .reg = .rax }, + .disp = std.math.minInt(i32), + }) }; + } else { + // Since we are linking statically, we emit LE model directly. + lower.result_insts[lower.result_insts_len] = + try Instruction.new(.none, .mov, &[_]Operand{ + .{ .reg = .rax }, + .{ .mem = Memory.sib(.qword, .{ .base = .{ .reg = .fs } }) }, + }); + lower.result_insts_len += 1; + _ = lower.reloc(.{ .linker_reloc = sym }); + emit_mnemonic = .lea; + break :op .{ .mem = Memory.sib(mem_op.sib.ptr_size, .{ + .base = .{ .reg = .rax }, + .disp = std.math.minInt(i32), + }) }; } - lower.result_insts[lower.result_insts_len] = - try Instruction.new(.none, .call, &[_]Operand{ - .{ .imm = Immediate.s(0) }, - }); - lower.result_insts_len += 1; - _ = lower.reloc(.{ .linker_dtpoff = sym }); - emit_mnemonic = .lea; - break :op .{ .mem = Memory.sib(mem_op.sib.ptr_size, .{ - .base = .{ .reg = .rax }, - .disp = std.math.minInt(i32), - }) }; - } else { - // Since we are linking statically, we emit LE model directly. - lower.result_insts[lower.result_insts_len] = - try Instruction.new(.none, .mov, &[_]Operand{ - .{ .reg = .rax }, - .{ .mem = Memory.sib(.qword, .{ .base = .{ .reg = .fs } }) }, - }); - lower.result_insts_len += 1; - _ = lower.reloc(.{ .linker_reloc = sym }); - emit_mnemonic = .lea; - break :op .{ .mem = Memory.sib(mem_op.sib.ptr_size, .{ - .base = .{ .reg = .rax }, - .disp = std.math.minInt(i32), - }) }; } - } - _ = lower.reloc(.{ .linker_reloc = sym }); - break :op if (lower.pic) switch (mnemonic) { - .lea => { - break :op .{ .mem = Memory.rip(mem_op.sib.ptr_size, 0) }; - }, - .mov => { - if (is_obj_or_static_lib and needsZigGot(sym, lower.bin_file)) emit_mnemonic = .lea; - break :op .{ .mem = Memory.rip(mem_op.sib.ptr_size, 0) }; - }, - else => unreachable, - } else switch (mnemonic) { - .call => break :op if (is_obj_or_static_lib and needsZigGot(sym, lower.bin_file)) .{ - .imm = Immediate.s(0), - } else .{ .mem = Memory.sib(mem_op.sib.ptr_size, .{ - .base = .{ .reg = .ds }, - }) }, - .lea => { - emit_mnemonic = .mov; - break :op .{ .imm = Immediate.s(0) }; - }, - .mov => { - if (is_obj_or_static_lib and needsZigGot(sym, lower.bin_file)) emit_mnemonic = .lea; - break :op .{ .mem = Memory.sib(mem_op.sib.ptr_size, .{ + _ = lower.reloc(.{ .linker_reloc = sym }); + break :op if (lower.pic) switch (mnemonic) { + .lea => { + break :op .{ .mem = Memory.rip(mem_op.sib.ptr_size, 0) }; + }, + .mov => { + if (is_obj_or_static_lib and elf_sym.flags.needs_zig_got) emit_mnemonic = .lea; + break :op .{ .mem = Memory.rip(mem_op.sib.ptr_size, 0) }; + }, + else => unreachable, + } else switch (mnemonic) { + .call => break :op if (is_obj_or_static_lib and elf_sym.flags.needs_zig_got) .{ + .imm = Immediate.s(0), + } else .{ .mem = Memory.sib(mem_op.sib.ptr_size, .{ .base = .{ .reg = .ds }, - }) }; - }, - else => unreachable, - }; + }) }, + .lea => { + emit_mnemonic = .mov; + break :op .{ .imm = Immediate.s(0) }; + }, + .mov => { + if (is_obj_or_static_lib and elf_sym.flags.needs_zig_got) emit_mnemonic = .lea; + break :op .{ .mem = Memory.sib(mem_op.sib.ptr_size, .{ + .base = .{ .reg = .ds }, + }) }; + }, + else => unreachable, + }; + } else if (lower.bin_file.cast(link.File.MachO)) |macho_file| { + const macho_sym = macho_file.getSymbol(sym.sym_index); + + if (macho_sym.flags.tlv) { + @panic("TODO lower TLS access on macOS"); + } + + _ = lower.reloc(.{ .linker_reloc = sym }); + break :op switch (mnemonic) { + .lea => { + break :op .{ .mem = Memory.rip(mem_op.sib.ptr_size, 0) }; + }, + .mov => { + if (is_obj_or_static_lib and macho_sym.flags.needs_zig_got) emit_mnemonic = .lea; + break :op .{ .mem = Memory.rip(mem_op.sib.ptr_size, 0) }; + }, + else => unreachable, + }; + } }, }, }; diff --git a/src/codegen.zig b/src/codegen.zig index 83d0215cfa..8172e15c3e 100644 --- a/src/codegen.zig +++ b/src/codegen.zig @@ -1045,7 +1045,12 @@ fn genUnnamedConst( const local = elf_file.symbol(local_sym_index); return GenResult.mcv(.{ .load_symbol = local.esym_index }); }, - .macho, .coff => { + .macho => { + const macho_file = lf.cast(link.File.MachO).?; + const local = macho_file.getSymbol(local_sym_index); + return GenResult.mcv(.{ .load_symbol = local.nlist_idx }); + }, + .coff => { return GenResult.mcv(.{ .load_direct = local_sym_index }); }, .plan9 => { diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 165dbbe17a..34f2d0b49e 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -4280,7 +4280,7 @@ const Md5 = std.crypto.hash.Md5; const Module = @import("../Module.zig"); const InternPool = @import("../InternPool.zig"); const RebaseSection = synthetic.RebaseSection; -const Relocation = @import("MachO/Relocation.zig"); +pub const Relocation = @import("MachO/Relocation.zig"); const StringTable = @import("StringTable.zig"); const StubsSection = synthetic.StubsSection; const StubsHelperSection = synthetic.StubsHelperSection; diff --git a/src/link/MachO/Atom.zig b/src/link/MachO/Atom.zig index 25cbe6ca64..5389303337 100644 --- a/src/link/MachO/Atom.zig +++ b/src/link/MachO/Atom.zig @@ -686,6 +686,10 @@ fn resolveRelocInner( } }, + .zig_got_load => { + @panic("TODO resolve __got_zig indirection reloc"); + }, + .tlv => { assert(rel.tag == .@"extern"); assert(rel.meta.length == 2); @@ -987,6 +991,7 @@ pub fn writeRelocs(self: Atom, macho_file: *MachO, code: []u8, buffer: *std.Arra .subtractor => .ARM64_RELOC_SUBTRACTOR, .unsigned => .ARM64_RELOC_UNSIGNED, + .zig_got_load, .signed, .signed1, .signed2, @@ -1030,6 +1035,7 @@ pub fn writeRelocs(self: Atom, macho_file: *MachO, code: []u8, buffer: *std.Arra .subtractor => .X86_64_RELOC_SUBTRACTOR, .unsigned => .X86_64_RELOC_UNSIGNED, + .zig_got_load, .page, .pageoff, .got_load_page, diff --git a/src/link/MachO/Relocation.zig b/src/link/MachO/Relocation.zig index f77e0c8792..20891f07e3 100644 --- a/src/link/MachO/Relocation.zig +++ b/src/link/MachO/Relocation.zig @@ -99,6 +99,8 @@ pub const Type = enum { got_load, /// RIP-relative TLV load (X86_64_RELOC_TLV) tlv, + /// Zig-specific __got_zig indirection + zig_got_load, // arm64 /// PC-relative load (distance to page, ARM64_RELOC_PAGE21) diff --git a/src/link/MachO/ZigObject.zig b/src/link/MachO/ZigObject.zig index 0199c0707c..96ef3c1996 100644 --- a/src/link/MachO/ZigObject.zig +++ b/src/link/MachO/ZigObject.zig @@ -248,7 +248,7 @@ pub fn getDeclVAddr( .pcrel = false, .has_subtractor = false, .length = 3, - .symbolnum = @intCast(sym.nlist_idx), + .symbolnum = 0, }, }); return vaddr; @@ -274,7 +274,7 @@ pub fn getAnonDeclVAddr( .pcrel = false, .has_subtractor = false, .length = 3, - .symbolnum = @intCast(sym.nlist_idx), + .symbolnum = 0, }, }); return vaddr; @@ -604,25 +604,26 @@ fn getDeclOutputSection( _ = self; const mod = macho_file.base.comp.module.?; const any_non_single_threaded = macho_file.base.comp.config.any_non_single_threaded; + _ = any_non_single_threaded; const sect_id: u8 = switch (decl.ty.zigTypeTag(mod)) { .Fn => macho_file.zig_text_section_index.?, else => blk: { if (decl.getOwnedVariable(mod)) |variable| { - if (variable.is_threadlocal and any_non_single_threaded) { - const is_all_zeroes = for (code) |byte| { - if (byte != 0) break false; - } else true; - if (is_all_zeroes) break :blk macho_file.getSectionByName("__DATA", "__thread_bss") orelse try macho_file.addSection( - "__DATA", - "__thread_bss", - .{ .flags = macho.S_THREAD_LOCAL_ZEROFILL }, - ); - break :blk macho_file.getSectionByName("__DATA", "__thread_data") orelse try macho_file.addSection( - "__DATA", - "__thread_data", - .{ .flags = macho.S_THREAD_LOCAL_REGULAR }, - ); - } + // if (variable.is_threadlocal and any_non_single_threaded) { + // const is_all_zeroes = for (code) |byte| { + // if (byte != 0) break false; + // } else true; + // if (is_all_zeroes) break :blk macho_file.getSectionByName("__DATA", "__thread_bss") orelse try macho_file.addSection( + // "__DATA", + // "__thread_bss", + // .{ .flags = macho.S_THREAD_LOCAL_ZEROFILL }, + // ); + // break :blk macho_file.getSectionByName("__DATA", "__thread_data") orelse try macho_file.addSection( + // "__DATA", + // "__thread_data", + // .{ .flags = macho.S_THREAD_LOCAL_REGULAR }, + // ); + // } if (variable.is_const) break :blk macho_file.zig_const_section_index.?; if (Value.fromInterned(variable.init).isUndefDeep(mod)) { @@ -917,7 +918,7 @@ fn updateLazySymbol( sym.value = 0; sym.flags.needs_zig_got = true; - nlist.st_value = 0; + nlist.n_value = 0; if (!macho_file.base.isRelocatable()) { const gop = try sym.getOrCreateZigGotEntry(symbol_index, macho_file); From a79a038e61541d6faffaab70c82a18a812c2e9df Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 18 Jan 2024 19:11:48 +0100 Subject: [PATCH 093/133] codegen: implement more missing bits --- src/arch/x86_64/CodeGen.zig | 41 ++++++++++++++++++++----------------- src/codegen.zig | 7 ++++--- 2 files changed, 26 insertions(+), 22 deletions(-) diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index 2b4932c5c2..2cc5fe267e 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -13507,24 +13507,27 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr }, .lea_symbol => |sym_index| { const atom_index = try self.owner.getSymbolIndex(self); - if (self.bin_file.cast(link.File.Elf)) |_| { - try self.asmRegisterMemory( - .{ ._, .lea }, - dst_reg.to64(), - .{ - .base = .{ .reloc = .{ - .atom_index = atom_index, - .sym_index = sym_index.sym, - } }, - .mod = .{ .rm = .{ - .size = .qword, - .disp = sym_index.off, - } }, - }, - ); - } else return self.fail("TODO emit symbol sequence on {s}", .{ - @tagName(self.bin_file.tag), - }); + switch (self.bin_file.tag) { + .elf, .macho => { + try self.asmRegisterMemory( + .{ ._, .lea }, + dst_reg.to64(), + .{ + .base = .{ .reloc = .{ + .atom_index = atom_index, + .sym_index = sym_index.sym, + } }, + .mod = .{ .rm = .{ + .size = .qword, + .disp = sym_index.off, + } }, + }, + ); + }, + else => return self.fail("TODO emit symbol sequence on {s}", .{ + @tagName(self.bin_file.tag), + }), + } }, .lea_direct, .lea_got => |sym_index| { const atom_index = try self.owner.getSymbolIndex(self); @@ -16066,7 +16069,7 @@ fn resolveInst(self: *Self, ref: Air.Inst.Ref) InnerError!MCValue { ); break :init .{ .load_frame = .{ .index = frame_index } }; } else if (self.bin_file.cast(link.File.MachO)) |_| { - return self.fail("TODO implement lowering TLV variable to stack", .{}); + return self.fail("TODO implement saving TLV variable to stack", .{}); } else break :init const_mcv, else => break :init const_mcv, } diff --git a/src/codegen.zig b/src/codegen.zig index 8172e15c3e..c39c541235 100644 --- a/src/codegen.zig +++ b/src/codegen.zig @@ -998,9 +998,10 @@ fn genDeclRef( } const sym_index = try macho_file.getZigObject().?.getOrCreateMetadataForDecl(macho_file, decl_index); const sym = macho_file.getSymbol(sym_index); - if (is_threadlocal) { - return GenResult.mcv(.{ .load_tlv = sym.nlist_idx }); - } + // TODO: tlv + // if (is_threadlocal) { + // return GenResult.mcv(.{ .load_tlv = sym.nlist_idx }); + // } return GenResult.mcv(.{ .load_symbol = sym.nlist_idx }); } else if (lf.cast(link.File.Coff)) |coff_file| { if (is_extern) { From 9a78173ee0019c6b0bd1a73926d1413f603b20ac Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 18 Jan 2024 19:15:06 +0100 Subject: [PATCH 094/133] macho: implement ZigObject.flushModule --- src/link/MachO/ZigObject.zig | 30 +++++++++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/src/link/MachO/ZigObject.zig b/src/link/MachO/ZigObject.zig index 96ef3c1996..ef866167ad 100644 --- a/src/link/MachO/ZigObject.zig +++ b/src/link/MachO/ZigObject.zig @@ -223,9 +223,33 @@ pub fn getInputSection(self: ZigObject, atom: Atom, macho_file: *MachO) macho.se } pub fn flushModule(self: *ZigObject, macho_file: *MachO) !void { - _ = self; - _ = macho_file; - @panic("TODO flushModule"); + // Handle any lazy symbols that were emitted by incremental compilation. + if (self.lazy_syms.getPtr(.none)) |metadata| { + const zcu = macho_file.base.comp.module.?; + + // Most lazy symbols can be updated on first use, but + // anyerror needs to wait for everything to be flushed. + if (metadata.text_state != .unused) self.updateLazySymbol( + macho_file, + link.File.LazySymbol.initDecl(.code, null, zcu), + metadata.text_symbol_index, + ) catch |err| return switch (err) { + error.CodegenFail => error.FlushFailure, + else => |e| e, + }; + if (metadata.const_state != .unused) self.updateLazySymbol( + macho_file, + link.File.LazySymbol.initDecl(.const_data, null, zcu), + metadata.const_symbol_index, + ) catch |err| return switch (err) { + error.CodegenFail => error.FlushFailure, + else => |e| e, + }; + } + for (self.lazy_syms.values()) |*metadata| { + if (metadata.text_state != .unused) metadata.text_state = .flushed; + if (metadata.const_state != .unused) metadata.const_state = .flushed; + } } pub fn getDeclVAddr( From 5d0ea3fd275167b81ff0c75789a3ea4408527f4e Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 18 Jan 2024 19:18:46 +0100 Subject: [PATCH 095/133] macho: let us dump some state and debug! --- src/link/MachO.zig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 34f2d0b49e..cd0d13b12a 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -255,8 +255,6 @@ pub fn createEmpty( .program_code_size_hint = options.program_code_size_hint, }); - std.debug.print("{}", .{self.dumpState()}); - // TODO init dwarf // if (comp.config.debug_format != .strip) { @@ -544,6 +542,8 @@ pub fn flushModule(self: *MachO, arena: Allocator, prog_node: *std.Progress.Node self.internal_object = index; } + state_log.debug("{}", .{self.dumpState()}); + try self.addUndefinedGlobals(); try self.resolveSymbols(); try self.resolveSyntheticSymbols(); From 5ef63e333ad47c63ac648da48c25078bba0cccbb Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 18 Jan 2024 20:40:47 +0100 Subject: [PATCH 096/133] macho: set atom_index for newly created Atom in ZigObject --- src/link/MachO.zig | 7 +++++++ src/link/MachO/ZigObject.zig | 1 + 2 files changed, 8 insertions(+) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index cd0d13b12a..7af13ca8d2 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -3779,6 +3779,13 @@ fn fmtDumpState( ) !void { _ = options; _ = unused_fmt_string; + if (self.getZigObject()) |zo| { + try writer.print("zig_object({d}) : {s}\n", .{ zo.index, zo.path }); + try writer.print("{}{}\n", .{ + zo.fmtAtoms(self), + zo.fmtSymtab(self), + }); + } for (self.objects.items) |index| { const object = self.getFile(index).?.object; try writer.print("object({d}) : {} : has_debug({})", .{ diff --git a/src/link/MachO/ZigObject.zig b/src/link/MachO/ZigObject.zig index ef866167ad..aa29dc59e1 100644 --- a/src/link/MachO/ZigObject.zig +++ b/src/link/MachO/ZigObject.zig @@ -110,6 +110,7 @@ pub fn addAtom(self: *ZigObject, macho_file: *MachO) !Symbol.Index { const atom = macho_file.getAtom(atom_index).?; atom.file = self.index; + atom.atom_index = atom_index; const symbol = macho_file.getSymbol(symbol_index); symbol.file = self.index; From 30b7d3e45f25195791f8eba74a2f89d41b325049 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 18 Jan 2024 21:23:00 +0100 Subject: [PATCH 097/133] macho: implement resolveSymbols in ZigObject --- src/link/MachO.zig | 16 +++++++-- src/link/MachO/ZigObject.zig | 64 ++++++++++++++++++++++++++++++++++-- 2 files changed, 74 insertions(+), 6 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 7af13ca8d2..6763d21fdf 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -564,7 +564,7 @@ pub fn flushModule(self: *MachO, arena: Allocator, prog_node: *std.Progress.Node }, }; - self.markImportsAndExports(); + try self.markImportsAndExports(); self.deadStripDylibs(); for (self.dylibs.items, 1..) |index, ord| { @@ -1533,6 +1533,10 @@ fn checkDuplicates(self: *MachO) !void { dupes.deinit(); } + if (self.getZigObject()) |zo| { + try zo.checkDuplicates(&dupes, self); + } + for (self.objects.items) |index| { try self.getFile(index).?.object.checkDuplicates(&dupes, self); } @@ -1540,8 +1544,14 @@ fn checkDuplicates(self: *MachO) !void { try self.reportDuplicates(dupes); } -fn markImportsAndExports(self: *MachO) void { - for (self.objects.items) |index| { +fn markImportsAndExports(self: *MachO) error{OutOfMemory}!void { + const gpa = self.base.comp.gpa; + var objects = try std.ArrayList(File.Index).initCapacity(gpa, self.objects.items.len + 1); + defer objects.deinit(); + if (self.getZigObject()) |zo| objects.appendAssumeCapacity(zo.index); + objects.appendSliceAssumeCapacity(self.objects.items); + + for (objects.items) |index| { for (self.getFile(index).?.getSymbols()) |sym_index| { const sym = self.getSymbol(sym_index); const file = sym.getFile(self) orelse continue; diff --git a/src/link/MachO/ZigObject.zig b/src/link/MachO/ZigObject.zig index aa29dc59e1..b6a8f5448b 100644 --- a/src/link/MachO/ZigObject.zig +++ b/src/link/MachO/ZigObject.zig @@ -137,9 +137,60 @@ pub fn freeAtomRelocs(self: *ZigObject, atom: Atom) void { } pub fn resolveSymbols(self: *ZigObject, macho_file: *MachO) void { - _ = self; - _ = macho_file; - @panic("TODO resolveSymbols"); + const tracy = trace(@src()); + defer tracy.end(); + + for (self.symbols.items, 0..) |index, i| { + const nlist_idx = @as(Symbol.Index, @intCast(i)); + const nlist = self.symtab.items(.nlist)[nlist_idx]; + const atom_index = self.symtab.items(.atom)[nlist_idx]; + + if (!nlist.ext()) continue; + if (nlist.undf() and !nlist.tentative()) continue; + if (nlist.sect()) { + const atom = macho_file.getAtom(atom_index).?; + if (!atom.flags.alive) continue; + } + + const symbol = macho_file.getSymbol(index); + if (self.asFile().getSymbolRank(.{ + .archive = false, + .weak = nlist.weakDef(), + .tentative = nlist.tentative(), + }) < symbol.getSymbolRank(macho_file)) { + const value = if (nlist.sect()) blk: { + const atom = macho_file.getAtom(atom_index).?; + break :blk nlist.n_value - atom.getInputAddress(macho_file); + } else nlist.n_value; + symbol.value = value; + symbol.atom = atom_index; + symbol.nlist_idx = nlist_idx; + symbol.file = self.index; + symbol.flags.weak = nlist.weakDef(); + symbol.flags.abs = nlist.abs(); + symbol.flags.tentative = nlist.tentative(); + symbol.flags.weak_ref = false; + symbol.flags.dyn_ref = nlist.n_desc & macho.REFERENCED_DYNAMICALLY != 0; + symbol.flags.no_dead_strip = symbol.flags.no_dead_strip or nlist.noDeadStrip(); + // TODO: symbol.flags.interposable = macho_file.base.isDynLib() and macho_file.options.namespace == .flat and !nlist.pext(); + symbol.flags.interposable = false; + + if (nlist.sect() and + macho_file.sections.items(.header)[nlist.n_sect - 1].type() == macho.S_THREAD_LOCAL_VARIABLES) + { + symbol.flags.tlv = true; + } + } + + // Regardless of who the winner is, we still merge symbol visibility here. + if (nlist.pext() or (nlist.weakDef() and nlist.weakRef())) { + if (symbol.visibility != .global) { + symbol.visibility = .hidden; + } + } else { + symbol.visibility = .global; + } + } } pub fn resetGlobals(self: *ZigObject, macho_file: *MachO) void { @@ -170,6 +221,13 @@ pub fn markLive(self: *ZigObject, macho_file: *MachO) void { } } +pub fn checkDuplicates(self: *ZigObject, dupes: anytype, macho_file: *MachO) !void { + _ = self; + _ = dupes; + _ = macho_file; + @panic("TODO checkDuplicates"); +} + pub fn calcSymtabSize(self: *ZigObject, macho_file: *MachO) !void { const tracy = trace(@src()); defer tracy.end(); From a8629fb8501275d826912085ccd120eb48a53199 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 18 Jan 2024 21:55:31 +0100 Subject: [PATCH 098/133] macho: fix symbol index dereference in codegen wrt ZigObject This is incredibly confusing and I really need to simplify it. Elf also possesses this shortcoming so once I get Coff up to speed it should hopefully become clear on how to refactor this. --- src/arch/x86_64/Emit.zig | 5 +++-- src/codegen.zig | 2 +- src/link/MachO.zig | 27 ++++++++++++++++++++------- src/link/MachO/Atom.zig | 31 +++++++++++++++++++++++-------- src/link/MachO/Object.zig | 4 +--- src/link/MachO/ZigObject.zig | 33 ++++++++++++++++++++++++++++----- 6 files changed, 76 insertions(+), 26 deletions(-) diff --git a/src/arch/x86_64/Emit.zig b/src/arch/x86_64/Emit.zig index 9cadef1ef6..08209f5198 100644 --- a/src/arch/x86_64/Emit.zig +++ b/src/arch/x86_64/Emit.zig @@ -156,9 +156,10 @@ pub fn emitMir(emit: *Emit) Error!void { .Lib => emit.lower.link_mode == .Static, }; const atom = macho_file.getSymbol(data.atom_index).getAtom(macho_file).?; - const sym = macho_file.getSymbol(data.sym_index); + const sym_index = macho_file.getZigObject().?.symbols.items[data.sym_index]; + const sym = macho_file.getSymbol(sym_index); if (sym.flags.needs_zig_got and !is_obj_or_static_lib) { - _ = try sym.getOrCreateZigGotEntry(data.sym_index, macho_file); + _ = try sym.getOrCreateZigGotEntry(sym_index, macho_file); } const @"type": link.File.MachO.Relocation.Type = if (sym.flags.needs_zig_got and !is_obj_or_static_lib) .zig_got_load diff --git a/src/codegen.zig b/src/codegen.zig index c39c541235..7365c3b6b0 100644 --- a/src/codegen.zig +++ b/src/codegen.zig @@ -993,7 +993,7 @@ fn genDeclRef( else null; const sym_index = try macho_file.getGlobalSymbol(sym_name, lib_name); - macho_file.getSymbol(sym_index).flags.needs_got = true; + macho_file.getSymbol(macho_file.getZigObject().?.symbols.items[sym_index]).flags.needs_got = true; return GenResult.mcv(.{ .load_symbol = sym_index }); } const sym_index = try macho_file.getZigObject().?.getOrCreateMetadataForDecl(macho_file, decl_index); diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 6763d21fdf..5eb45981a6 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -542,8 +542,6 @@ pub fn flushModule(self: *MachO, arena: Allocator, prog_node: *std.Progress.Node self.internal_object = index; } - state_log.debug("{}", .{self.dumpState()}); - try self.addUndefinedGlobals(); try self.resolveSymbols(); try self.resolveSyntheticSymbols(); @@ -2389,14 +2387,23 @@ fn initDyldInfoSections(self: *MachO) !void { if (self.la_symbol_ptr_sect_index != null) try self.la_symbol_ptr.addDyldRelocs(self); try self.initExportTrie(); + var objects = try std.ArrayList(File.Index).initCapacity(gpa, self.objects.items.len + 1); + defer objects.deinit(); + if (self.getZigObject()) |zo| objects.appendAssumeCapacity(zo.index); + objects.appendSliceAssumeCapacity(self.objects.items); + var nrebases: usize = 0; var nbinds: usize = 0; var nweak_binds: usize = 0; - for (self.objects.items) |index| { - const object = self.getFile(index).?.object; - nrebases += object.num_rebase_relocs; - nbinds += object.num_bind_relocs; - nweak_binds += object.num_weak_bind_relocs; + for (objects.items) |index| { + const ctx = switch (self.getFile(index).?) { + .zig_object => |x| x.dynamic_relocs, + .object => |x| x.dynamic_relocs, + else => unreachable, + }; + nrebases += ctx.rebase_relocs; + nbinds += ctx.bind_relocs; + nweak_binds += ctx.weak_bind_relocs; } try self.rebase.entries.ensureUnusedCapacity(gpa, nrebases); try self.bind.entries.ensureUnusedCapacity(gpa, nbinds); @@ -3947,6 +3954,12 @@ const HotUpdateState = struct { mach_task: ?std.os.darwin.MachTask = null, }; +pub const DynamicRelocs = struct { + rebase_relocs: u32 = 0, + bind_relocs: u32 = 0, + weak_bind_relocs: u32 = 0, +}; + pub const SymtabCtx = struct { ilocal: u32 = 0, istab: u32 = 0, diff --git a/src/link/MachO/Atom.zig b/src/link/MachO/Atom.zig index 5389303337..b30344854f 100644 --- a/src/link/MachO/Atom.zig +++ b/src/link/MachO/Atom.zig @@ -402,7 +402,11 @@ pub fn scanRelocs(self: Atom, macho_file: *MachO) !void { defer tracy.end(); assert(self.flags.alive); - const object = self.getFile(macho_file).object; + const dynrel_ctx = switch (self.getFile(macho_file)) { + .zig_object => |x| &x.dynamic_relocs, + .object => |x| &x.dynamic_relocs, + else => unreachable, + }; const relocs = self.getRelocs(macho_file); for (relocs) |rel| { @@ -437,6 +441,10 @@ pub fn scanRelocs(self: Atom, macho_file: *MachO) !void { } }, + .zig_got_load => { + assert(rel.getTargetSymbol(macho_file).flags.has_zig_got); + }, + .got => { rel.getTargetSymbol(macho_file).flags.needs_got = true; }, @@ -448,7 +456,7 @@ pub fn scanRelocs(self: Atom, macho_file: *MachO) !void { const symbol = rel.getTargetSymbol(macho_file); if (!symbol.flags.tlv) { try macho_file.reportParseError2( - object.index, + self.getFile(macho_file).getIndex(), "{s}: illegal thread-local variable reference to regular symbol {s}", .{ self.getName(macho_file), symbol.getName(macho_file) }, ); @@ -470,27 +478,34 @@ pub fn scanRelocs(self: Atom, macho_file: *MachO) !void { continue; } if (symbol.flags.import) { - object.num_bind_relocs += 1; + dynrel_ctx.bind_relocs += 1; if (symbol.flags.weak) { - object.num_weak_bind_relocs += 1; + dynrel_ctx.weak_bind_relocs += 1; macho_file.binds_to_weak = true; } continue; } if (symbol.flags.@"export") { if (symbol.flags.weak) { - object.num_weak_bind_relocs += 1; + dynrel_ctx.weak_bind_relocs += 1; macho_file.binds_to_weak = true; } else if (symbol.flags.interposable) { - object.num_bind_relocs += 1; + dynrel_ctx.bind_relocs += 1; } } } - object.num_rebase_relocs += 1; + dynrel_ctx.rebase_relocs += 1; } }, - else => {}, + .signed, + .signed1, + .signed2, + .signed4, + .page, + .pageoff, + .subtractor, + => {}, } } } diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index 3fe4142ebc..11846a66d3 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -25,10 +25,8 @@ unwind_records: std.ArrayListUnmanaged(UnwindInfo.Record.Index) = .{}, alive: bool = true, hidden: bool = false, -num_rebase_relocs: u32 = 0, -num_bind_relocs: u32 = 0, -num_weak_bind_relocs: u32 = 0, +dynamic_relocs: MachO.DynamicRelocs = .{}, output_symtab_ctx: MachO.SymtabCtx = .{}, pub fn isObject(path: []const u8) !bool { diff --git a/src/link/MachO/ZigObject.zig b/src/link/MachO/ZigObject.zig index b6a8f5448b..af805600c5 100644 --- a/src/link/MachO/ZigObject.zig +++ b/src/link/MachO/ZigObject.zig @@ -41,6 +41,7 @@ anon_decls: AnonDeclTable = .{}, /// A table of relocations. relocs: RelocationTable = .{}, +dynamic_relocs: MachO.DynamicRelocs = .{}, output_symtab_ctx: MachO.SymtabCtx = .{}, pub fn init(self: *ZigObject, macho_file: *MachO) !void { @@ -222,10 +223,31 @@ pub fn markLive(self: *ZigObject, macho_file: *MachO) void { } pub fn checkDuplicates(self: *ZigObject, dupes: anytype, macho_file: *MachO) !void { - _ = self; - _ = dupes; - _ = macho_file; - @panic("TODO checkDuplicates"); + for (self.symbols.items, 0..) |index, nlist_idx| { + const sym = macho_file.getSymbol(index); + if (sym.visibility != .global) continue; + const file = sym.getFile(macho_file) orelse continue; + if (file.getIndex() == self.index) continue; + + const nlist = self.symtab.items(.nlist)[nlist_idx]; + if (!nlist.undf() and !nlist.tentative() and !(nlist.weakDef() or nlist.pext())) { + const gop = try dupes.getOrPut(index); + if (!gop.found_existing) { + gop.value_ptr.* = .{}; + } + try gop.value_ptr.append(macho_file.base.comp.gpa, self.index); + } + } +} + +pub fn scanRelocs(self: *ZigObject, macho_file: *MachO) !void { + for (self.atoms.items) |atom_index| { + const atom = macho_file.getAtom(atom_index) orelse continue; + if (!atom.flags.alive) continue; + const sect = atom.getInputSection(macho_file); + if (sect.isZerofill()) continue; + try atom.scanRelocs(macho_file); + } } pub fn calcSymtabSize(self: *ZigObject, macho_file: *MachO) !void { @@ -537,7 +559,8 @@ pub fn updateDecl( const name = mod.intern_pool.stringToSlice(decl.name); const lib_name = mod.intern_pool.stringToSliceUnwrap(variable.lib_name); const index = try self.getGlobalSymbol(macho_file, name, lib_name); - macho_file.getSymbol(index).flags.needs_got = true; + const actual_index = self.symbols.items[index]; + macho_file.getSymbol(actual_index).flags.needs_got = true; return; } From 5c951cd21175c935215979f7599e8e340d86845b Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Fri, 19 Jan 2024 13:47:39 +0100 Subject: [PATCH 099/133] macho: again fix symbol index dereference in codegen wrt ZigObject --- src/arch/x86_64/Emit.zig | 5 +++-- src/arch/x86_64/Lower.zig | 3 ++- src/link/MachO.zig | 2 ++ src/link/MachO/ZigObject.zig | 16 +++++++++------- src/link/MachO/synthetic.zig | 1 + 5 files changed, 17 insertions(+), 10 deletions(-) diff --git a/src/arch/x86_64/Emit.zig b/src/arch/x86_64/Emit.zig index 08209f5198..97899f224d 100644 --- a/src/arch/x86_64/Emit.zig +++ b/src/arch/x86_64/Emit.zig @@ -51,10 +51,11 @@ pub fn emitMir(emit: *Emit) Error!void { } else if (emit.lower.bin_file.cast(link.File.MachO)) |macho_file| { // Add relocation to the decl. const atom = macho_file.getSymbol(symbol.atom_index).getAtom(macho_file).?; + const sym_index = macho_file.getZigObject().?.symbols.items[symbol.sym_index]; try atom.addReloc(macho_file, .{ .tag = .@"extern", .offset = end_offset - 4, - .target = symbol.sym_index, + .target = sym_index, .addend = 0, .type = .branch, .meta = .{ @@ -170,7 +171,7 @@ pub fn emitMir(emit: *Emit) Error!void { try atom.addReloc(macho_file, .{ .tag = .@"extern", .offset = @intCast(end_offset - 4), - .target = data.sym_index, + .target = sym_index, .addend = 0, .type = @"type", .meta = .{ diff --git a/src/arch/x86_64/Lower.zig b/src/arch/x86_64/Lower.zig index eb3ed88b37..0b48afe0c6 100644 --- a/src/arch/x86_64/Lower.zig +++ b/src/arch/x86_64/Lower.zig @@ -424,7 +424,8 @@ fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand) else => unreachable, }; } else if (lower.bin_file.cast(link.File.MachO)) |macho_file| { - const macho_sym = macho_file.getSymbol(sym.sym_index); + const sym_index = macho_file.getZigObject().?.symbols.items[sym.sym_index]; + const macho_sym = macho_file.getSymbol(sym_index); if (macho_sym.flags.tlv) { @panic("TODO lower TLS access on macOS"); diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 5eb45981a6..df96318e7b 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -1617,6 +1617,8 @@ fn scanRelocs(self: *MachO) !void { const tracy = trace(@src()); defer tracy.end(); + if (self.getZigObject()) |zo| try zo.scanRelocs(self); + for (self.objects.items) |index| { try self.getFile(index).?.object.scanRelocs(self); } diff --git a/src/link/MachO/ZigObject.zig b/src/link/MachO/ZigObject.zig index af805600c5..4b677d3d61 100644 --- a/src/link/MachO/ZigObject.zig +++ b/src/link/MachO/ZigObject.zig @@ -346,7 +346,7 @@ pub fn getDeclVAddr( try parent_atom.addReloc(macho_file, .{ .tag = .@"extern", .offset = @intCast(reloc_info.offset), - .target = sym.nlist_idx, + .target = sym_index, .addend = reloc_info.addend, .type = .unsigned, .meta = .{ @@ -372,7 +372,7 @@ pub fn getAnonDeclVAddr( try parent_atom.addReloc(macho_file, .{ .tag = .@"extern", .offset = @intCast(reloc_info.offset), - .target = sym.nlist_idx, + .target = sym_index, .addend = reloc_info.addend, .type = .unsigned, .meta = .{ @@ -1102,15 +1102,17 @@ pub fn getOrCreateMetadataForDecl( const gop = try self.decls.getOrPut(gpa, decl_index); if (!gop.found_existing) { const any_non_single_threaded = macho_file.base.comp.config.any_non_single_threaded; + _ = any_non_single_threaded; const sym_index = try self.addAtom(macho_file); const mod = macho_file.base.comp.module.?; const decl = mod.declPtr(decl_index); + _ = decl; const sym = macho_file.getSymbol(sym_index); - if (decl.getOwnedVariable(mod)) |variable| { - if (variable.is_threadlocal and any_non_single_threaded) { - sym.flags.tlv = true; - } - } + // if (decl.getOwnedVariable(mod)) |variable| { + // if (variable.is_threadlocal and any_non_single_threaded) { + // sym.flags.tlv = true; + // } + // } if (!sym.flags.tlv) { sym.flags.needs_zig_got = true; } diff --git a/src/link/MachO/synthetic.zig b/src/link/MachO/synthetic.zig index 5a6e316da5..b749d7daec 100644 --- a/src/link/MachO/synthetic.zig +++ b/src/link/MachO/synthetic.zig @@ -24,6 +24,7 @@ pub const ZigGotSection = struct { const entry = &zig_got.entries.items[index]; entry.* = sym_index; const symbol = macho_file.getSymbol(sym_index); + assert(symbol.flags.needs_zig_got); symbol.flags.has_zig_got = true; try symbol.addExtra(.{ .zig_got = index }, macho_file); return index; From 0143dd8fdcf39f308a1a68098bb54f78068ac1e6 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Fri, 19 Jan 2024 14:09:06 +0100 Subject: [PATCH 100/133] macho: fix '_' prefixing rules for exports --- src/codegen.zig | 4 +--- src/link/MachO.zig | 4 +--- src/link/MachO/ZigObject.zig | 16 ++++++---------- 3 files changed, 8 insertions(+), 16 deletions(-) diff --git a/src/codegen.zig b/src/codegen.zig index 7365c3b6b0..49f7feda8f 100644 --- a/src/codegen.zig +++ b/src/codegen.zig @@ -986,13 +986,11 @@ fn genDeclRef( } else if (lf.cast(link.File.MachO)) |macho_file| { if (is_extern) { const name = zcu.intern_pool.stringToSlice(decl.name); - const sym_name = try std.fmt.allocPrint(gpa, "_{s}", .{name}); - defer gpa.free(sym_name); const lib_name = if (decl.getOwnedVariable(zcu)) |ov| zcu.intern_pool.stringToSliceUnwrap(ov.lib_name) else null; - const sym_index = try macho_file.getGlobalSymbol(sym_name, lib_name); + const sym_index = try macho_file.getGlobalSymbol(name, lib_name); macho_file.getSymbol(macho_file.getZigObject().?.symbols.items[sym_index]).flags.needs_got = true; return GenResult.mcv(.{ .load_symbol = sym_index }); } diff --git a/src/link/MachO.zig b/src/link/MachO.zig index df96318e7b..cb2debf6f2 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -3243,9 +3243,7 @@ fn initMetadata(self: *MachO, options: InitMetadataOptions) !void { const appendSect = struct { fn appendSect(macho_file: *MachO, sect_id: u8, seg_id: u8) void { const sect = &macho_file.sections.items(.header)[sect_id]; - const seg = &macho_file.segments.items[seg_id]; - seg.cmdsize += @sizeOf(macho.section_64); - seg.nsects += 1; + const seg = macho_file.segments.items[seg_id]; sect.addr = seg.vmaddr; sect.offset = @intCast(seg.fileoff); sect.size = seg.vmsize; diff --git a/src/link/MachO/ZigObject.zig b/src/link/MachO/ZigObject.zig index 4b677d3d61..f2e80f69cd 100644 --- a/src/link/MachO/ZigObject.zig +++ b/src/link/MachO/ZigObject.zig @@ -916,10 +916,7 @@ pub fn updateExports( continue; } - const exp_name = try std.fmt.allocPrint(gpa, "_{}", .{exp.opts.name.fmt(&mod.intern_pool)}); - defer gpa.free(exp_name); - - const name_off = try macho_file.strings.insert(gpa, exp_name); + const exp_name = mod.intern_pool.stringToSlice(exp.opts.name); const global_nlist_index = if (metadata.@"export"(self, macho_file, exp_name)) |exp_index| exp_index.* else blk: { @@ -928,7 +925,6 @@ pub fn updateExports( break :blk global_nlist_index; }; const global_nlist = &self.symtab.items(.nlist)[global_nlist_index]; - global_nlist.n_strx = name_off; global_nlist.n_value = nlist.n_value; global_nlist.n_sect = nlist.n_sect; global_nlist.n_type = macho.N_EXT | macho.N_SECT; @@ -1053,13 +1049,11 @@ pub fn deleteDeclExport( macho_file: *MachO, decl_index: InternPool.DeclIndex, name: InternPool.NullTerminatedString, -) Allocator.Error!void { +) void { const metadata = self.decls.getPtr(decl_index) orelse return; - const gpa = macho_file.base.comp.gpa; const mod = macho_file.base.comp.module.?; - const exp_name = try std.fmt.allocPrint(gpa, "_{s}", .{mod.intern_pool.stringToSlice(name)}); - defer gpa.free(exp_name); + const exp_name = mod.intern_pool.stringToSlice(name); const nlist_index = metadata.@"export"(self, macho_file, exp_name) orelse return; log.debug("deleting export '{s}'", .{exp_name}); @@ -1079,7 +1073,9 @@ pub fn deleteDeclExport( pub fn getGlobalSymbol(self: *ZigObject, macho_file: *MachO, name: []const u8, lib_name: ?[]const u8) !u32 { _ = lib_name; const gpa = macho_file.base.comp.gpa; - const off = try macho_file.strings.insert(gpa, name); + const sym_name = try std.fmt.allocPrint(gpa, "_{s}", .{name}); + defer gpa.free(sym_name); + const off = try macho_file.strings.insert(gpa, sym_name); const lookup_gop = try self.globals_lookup.getOrPut(gpa, off); if (!lookup_gop.found_existing) { const nlist_index = try self.addNlist(gpa); From 1bdcb23b137b65696d7cad4adcc85697d359995a Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Fri, 19 Jan 2024 17:09:07 +0100 Subject: [PATCH 101/133] macho: allocate sections and segments that need it --- src/link/MachO.zig | 81 ++++++++++++++++++++++++++--------------- src/link/MachO/Atom.zig | 3 ++ 2 files changed, 55 insertions(+), 29 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index cb2debf6f2..93961c653f 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -40,6 +40,7 @@ codesig_cmd: macho.linkedit_data_command = .{ .cmd = .CODE_SIGNATURE }, pagezero_seg_index: ?u8 = null, text_seg_index: ?u8 = null, linkedit_seg_index: ?u8 = null, +text_sect_index: ?u8 = null, data_sect_index: ?u8 = null, got_sect_index: ?u8 = null, stubs_sect_index: ?u8 = null, @@ -1757,6 +1758,17 @@ fn initOutputSections(self: *MachO) !void { atom.out_n_sect = try Atom.initOutputSection(atom.getInputSection(self), self); } } + if (self.text_sect_index == null) { + self.text_sect_index = try self.addSection("__TEXT", "__text", .{ + .alignment = switch (self.getTarget().cpu.arch) { + .x86_64 => 0, + .aarch64 => 2, + else => unreachable, + }, + .flags = macho.S_SYMBOL_STUBS | + macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS, + }); + } if (self.data_sect_index == null) { self.data_sect_index = try self.addSection("__DATA", "__data", .{}); } @@ -2227,21 +2239,25 @@ fn allocateSections(self: *MachO) !void { var next_seg_id: u8 = if (self.pagezero_seg_index) |index| index + 1 else 0; for (slice.items(.header), slice.items(.segment_id)) |*header, seg_id| { - if (seg_id != next_seg_id) { - vmaddr = mem.alignForward(u64, vmaddr, page_size); - fileoff = mem.alignForward(u32, fileoff, page_size); - } + if (mem.indexOf(u8, header.segName(), "ZIG")) |_| { + vmaddr = header.addr + header.size; + } else { + if (seg_id != next_seg_id) { + vmaddr = mem.alignForward(u64, vmaddr, page_size); + fileoff = mem.alignForward(u32, fileoff, page_size); + } - const alignment = try math.powi(u32, 2, header.@"align"); + const alignment = try math.powi(u32, 2, header.@"align"); - vmaddr = mem.alignForward(u64, vmaddr, alignment); - header.addr = vmaddr; - vmaddr += header.size; + vmaddr = mem.alignForward(u64, vmaddr, alignment); + header.addr = vmaddr; + vmaddr += header.size; - if (!header.isZerofill()) { - fileoff = mem.alignForward(u32, fileoff, alignment); - header.offset = fileoff; - fileoff += @intCast(header.size); + if (!header.isZerofill()) { + fileoff = mem.alignForward(u32, fileoff, alignment); + header.offset = fileoff; + fileoff += @intCast(header.size); + } } next_seg_id = seg_id; @@ -2260,28 +2276,35 @@ fn allocateSegments(self: *MachO) void { const slice = self.sections.slice(); var next_sect_id: u8 = 0; for (self.segments.items[index..], index..) |*seg, seg_id| { - seg.vmaddr = vmaddr; - seg.fileoff = fileoff; + if (mem.indexOf(u8, seg.segName(), "ZIG")) |_| { + vmaddr = mem.alignForward(u64, seg.vmaddr + seg.vmsize, page_size); + if (mem.eql(u8, seg.segName(), "__BSS_ZIG")) { + fileoff = mem.alignForward(u64, seg.fileoff + seg.filesize, page_size); + } + } else { + seg.vmaddr = vmaddr; + seg.fileoff = fileoff; - for ( - slice.items(.header)[next_sect_id..], - slice.items(.segment_id)[next_sect_id..], - ) |header, sid| { - if (seg_id != sid) break; + for ( + slice.items(.header)[next_sect_id..], + slice.items(.segment_id)[next_sect_id..], + ) |header, sid| { + if (seg_id != sid) break; - vmaddr = header.addr + header.size; - if (!header.isZerofill()) { - fileoff = header.offset + header.size; + vmaddr = header.addr + header.size; + if (!header.isZerofill()) { + fileoff = header.offset + header.size; + } + + next_sect_id += 1; } - next_sect_id += 1; + vmaddr = mem.alignForward(u64, vmaddr, page_size); + fileoff = mem.alignForward(u64, fileoff, page_size); + + seg.vmsize = vmaddr - seg.vmaddr; + seg.filesize = fileoff - seg.fileoff; } - - vmaddr = mem.alignForward(u64, vmaddr, page_size); - fileoff = mem.alignForward(u64, fileoff, page_size); - - seg.vmsize = vmaddr - seg.vmaddr; - seg.filesize = fileoff - seg.fileoff; } } diff --git a/src/link/MachO/Atom.zig b/src/link/MachO/Atom.zig index b30344854f..d76bea03e0 100644 --- a/src/link/MachO/Atom.zig +++ b/src/link/MachO/Atom.zig @@ -170,6 +170,9 @@ pub fn initOutputSection(sect: macho.section_64, macho_file: *MachO) !u8 { sectname, .{ .flags = flags }, ); + if (mem.eql(u8, segname, "__TEXT") and mem.eql(u8, sectname, "__text")) { + macho_file.text_sect_index = osec; + } if (mem.eql(u8, segname, "__DATA") and mem.eql(u8, sectname, "__data")) { macho_file.data_sect_index = osec; } From f4da8145358cccd92018838a6f890f3a229f7df8 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Fri, 19 Jan 2024 17:26:47 +0100 Subject: [PATCH 102/133] macho: init linkedit segment separately --- src/link/MachO.zig | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 93961c653f..8011786fd7 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -592,6 +592,8 @@ pub fn flushModule(self: *MachO, arena: Allocator, prog_node: *std.Progress.Node self.allocateAtoms(); self.allocateSyntheticSymbols(); + try self.initLinkeditSegment(); + state_log.debug("{}", .{self.dumpState()}); try self.initDyldInfoSections(); @@ -2167,18 +2169,6 @@ fn initSegments(self: *MachO) !void { }); } - // Add __LINKEDIT - { - const protection = getSegmentProt("__LINKEDIT"); - self.linkedit_seg_index = @intCast(self.segments.items.len); - try self.segments.append(gpa, .{ - .cmdsize = @sizeOf(macho.segment_command_64), - .segname = makeStaticString("__LINKEDIT"), - .maxprot = protection, - .initprot = protection, - }); - } - // __TEXT segment is non-optional if (self.getSegmentByName("__TEXT") == null) { const protection = getSegmentProt("__TEXT"); @@ -2222,7 +2212,6 @@ fn initSegments(self: *MachO) !void { self.pagezero_seg_index = self.getSegmentByName("__PAGEZERO"); self.text_seg_index = self.getSegmentByName("__TEXT").?; - self.linkedit_seg_index = self.getSegmentByName("__LINKEDIT").?; } fn allocateSections(self: *MachO) !void { @@ -2401,6 +2390,23 @@ fn allocateSyntheticSymbols(self: *MachO) void { } } +fn initLinkeditSegment(self: *MachO) !void { + var fileoff: u64 = 0; + var vmaddr: u64 = 0; + + for (self.segments.items) |seg| { + if (fileoff < seg.fileoff + seg.filesize) fileoff = seg.fileoff + seg.filesize; + if (vmaddr < seg.vmaddr + seg.vmsize) vmaddr = seg.vmaddr + seg.vmsize; + } + + const page_size = self.getPageSize(); + self.linkedit_seg_index = try self.addSegment("__LINKEDIT", .{ + .vmaddr = mem.alignForward(u64, vmaddr, page_size), + .fileoff = mem.alignForward(u64, fileoff, page_size), + .prot = getSegmentProt("__LINKEDIT"), + }); +} + fn initDyldInfoSections(self: *MachO) !void { const tracy = trace(@src()); defer tracy.end(); From 16b66588f02743aafeb5972644271d92bdcf1051 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Fri, 19 Jan 2024 18:01:14 +0100 Subject: [PATCH 103/133] macho: allocate __LINKEDIT in a separate pass --- src/link/MachO.zig | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 8011786fd7..385a848326 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -591,8 +591,7 @@ pub fn flushModule(self: *MachO, arena: Allocator, prog_node: *std.Progress.Node self.allocateSegments(); self.allocateAtoms(); self.allocateSyntheticSymbols(); - - try self.initLinkeditSegment(); + try self.allocateLinkeditSegment(); state_log.debug("{}", .{self.dumpState()}); @@ -2180,6 +2179,18 @@ fn initSegments(self: *MachO) !void { }); } + // Add __LINKEDIT + { + const protection = getSegmentProt("__LINKEDIT"); + self.linkedit_seg_index = @intCast(self.segments.items.len); + try self.segments.append(gpa, .{ + .cmdsize = @sizeOf(macho.segment_command_64), + .segname = makeStaticString("__LINKEDIT"), + .maxprot = protection, + .initprot = protection, + }); + } + const sortFn = struct { fn sortFn(ctx: void, lhs: macho.segment_command_64, rhs: macho.segment_command_64) bool { _ = ctx; @@ -2261,10 +2272,11 @@ fn allocateSegments(self: *MachO) void { 0; var fileoff: u64 = 0; const index = if (self.pagezero_seg_index) |index| index + 1 else 0; + const last_index = self.linkedit_seg_index.?; // TODO: please clean this up! const slice = self.sections.slice(); var next_sect_id: u8 = 0; - for (self.segments.items[index..], index..) |*seg, seg_id| { + for (self.segments.items[index..last_index], index..last_index) |*seg, seg_id| { if (mem.indexOf(u8, seg.segName(), "ZIG")) |_| { vmaddr = mem.alignForward(u64, seg.vmaddr + seg.vmsize, page_size); if (mem.eql(u8, seg.segName(), "__BSS_ZIG")) { @@ -2390,7 +2402,7 @@ fn allocateSyntheticSymbols(self: *MachO) void { } } -fn initLinkeditSegment(self: *MachO) !void { +fn allocateLinkeditSegment(self: *MachO) !void { var fileoff: u64 = 0; var vmaddr: u64 = 0; @@ -2400,11 +2412,9 @@ fn initLinkeditSegment(self: *MachO) !void { } const page_size = self.getPageSize(); - self.linkedit_seg_index = try self.addSegment("__LINKEDIT", .{ - .vmaddr = mem.alignForward(u64, vmaddr, page_size), - .fileoff = mem.alignForward(u64, fileoff, page_size), - .prot = getSegmentProt("__LINKEDIT"), - }); + const seg = self.getLinkeditSegment(); + seg.vmaddr = mem.alignForward(u64, vmaddr, page_size); + seg.fileoff = mem.alignForward(u64, fileoff, page_size); } fn initDyldInfoSections(self: *MachO) !void { From 7647db327328bffea7dc8d7286cb6be9265080be Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Fri, 19 Jan 2024 18:19:36 +0100 Subject: [PATCH 104/133] macho: reserve space for __got_zig rebase opcodes --- src/link/MachO.zig | 31 ++++++++++++++++--------------- src/link/MachO/ZigObject.zig | 22 +++++++++++----------- src/link/MachO/synthetic.zig | 8 ++++---- 3 files changed, 31 insertions(+), 30 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 385a848326..f4ba60a9df 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -93,11 +93,11 @@ zig_data_seg_index: ?u8 = null, zig_bss_seg_index: ?u8 = null, /// Tracked section headers with incremental updates to Zig object. -zig_text_section_index: ?u8 = null, -zig_got_section_index: ?u8 = null, -zig_const_section_index: ?u8 = null, -zig_data_section_index: ?u8 = null, -zig_bss_section_index: ?u8 = null, +zig_text_sect_index: ?u8 = null, +zig_got_sect_index: ?u8 = null, +zig_const_sect_index: ?u8 = null, +zig_data_sect_index: ?u8 = null, +zig_bss_sect_index: ?u8 = null, has_tlv: bool = false, binds_to_weak: bool = false, @@ -2423,6 +2423,7 @@ fn initDyldInfoSections(self: *MachO) !void { const gpa = self.base.comp.gpa; + if (self.zig_got_sect_index != null) try self.zig_got.addDyldRelocs(self); if (self.got_sect_index != null) try self.got.addDyldRelocs(self); if (self.tlv_ptr_sect_index != null) try self.tlv_ptr.addDyldRelocs(self); if (self.la_symbol_ptr_sect_index != null) try self.la_symbol_ptr.addDyldRelocs(self); @@ -3291,7 +3292,7 @@ fn initMetadata(self: *MachO, options: InitMetadataOptions) !void { }.appendSect; { - self.zig_text_section_index = try self.addSection("__TEXT_ZIG", "__text_zig", .{ + self.zig_text_sect_index = try self.addSection("__TEXT_ZIG", "__text_zig", .{ .alignment = switch (self.getTarget().cpu.arch) { .aarch64 => 2, .x86_64 => 0, @@ -3299,31 +3300,31 @@ fn initMetadata(self: *MachO, options: InitMetadataOptions) !void { }, .flags = macho.S_REGULAR | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS, }); - appendSect(self, self.zig_text_section_index.?, self.zig_text_seg_index.?); + appendSect(self, self.zig_text_sect_index.?, self.zig_text_seg_index.?); } if (!self.base.isRelocatable()) { - self.zig_got_section_index = try self.addSection("__GOT_ZIG", "__got_zig", .{ + self.zig_got_sect_index = try self.addSection("__GOT_ZIG", "__got_zig", .{ .alignment = 3, }); - appendSect(self, self.zig_got_section_index.?, self.zig_got_seg_index.?); + appendSect(self, self.zig_got_sect_index.?, self.zig_got_seg_index.?); } { - self.zig_const_section_index = try self.addSection("__CONST_ZIG", "__const_zig", .{}); - appendSect(self, self.zig_const_section_index.?, self.zig_const_seg_index.?); + self.zig_const_sect_index = try self.addSection("__CONST_ZIG", "__const_zig", .{}); + appendSect(self, self.zig_const_sect_index.?, self.zig_const_seg_index.?); } { - self.zig_data_section_index = try self.addSection("__DATA_ZIG", "__data_zig", .{}); - appendSect(self, self.zig_data_section_index.?, self.zig_data_seg_index.?); + self.zig_data_sect_index = try self.addSection("__DATA_ZIG", "__data_zig", .{}); + appendSect(self, self.zig_data_sect_index.?, self.zig_data_seg_index.?); } { - self.zig_bss_section_index = try self.addSection("__BSS_ZIG", "__bss_zig", .{ + self.zig_bss_sect_index = try self.addSection("__BSS_ZIG", "__bss_zig", .{ .flags = macho.S_ZEROFILL, }); - appendSect(self, self.zig_bss_section_index.?, self.zig_bss_seg_index.?); + appendSect(self, self.zig_bss_sect_index.?, self.zig_bss_seg_index.?); } } diff --git a/src/link/MachO/ZigObject.zig b/src/link/MachO/ZigObject.zig index f2e80f69cd..f57a11d3a6 100644 --- a/src/link/MachO/ZigObject.zig +++ b/src/link/MachO/ZigObject.zig @@ -416,7 +416,7 @@ pub fn lowerAnonDecl( name, tv, decl_alignment, - macho_file.zig_const_section_index.?, + macho_file.zig_const_sect_index.?, src_loc, ) catch |err| switch (err) { error.OutOfMemory => return error.OutOfMemory, @@ -712,7 +712,7 @@ fn getDeclOutputSection( const any_non_single_threaded = macho_file.base.comp.config.any_non_single_threaded; _ = any_non_single_threaded; const sect_id: u8 = switch (decl.ty.zigTypeTag(mod)) { - .Fn => macho_file.zig_text_section_index.?, + .Fn => macho_file.zig_text_sect_index.?, else => blk: { if (decl.getOwnedVariable(mod)) |variable| { // if (variable.is_threadlocal and any_non_single_threaded) { @@ -731,13 +731,13 @@ fn getDeclOutputSection( // ); // } - if (variable.is_const) break :blk macho_file.zig_const_section_index.?; + if (variable.is_const) break :blk macho_file.zig_const_sect_index.?; if (Value.fromInterned(variable.init).isUndefDeep(mod)) { // TODO: get the optimize_mode from the Module that owns the decl instead // of using the root module here. break :blk switch (macho_file.base.comp.root_mod.optimize_mode) { - .Debug, .ReleaseSafe => macho_file.zig_data_section_index.?, - .ReleaseFast, .ReleaseSmall => macho_file.zig_bss_section_index.?, + .Debug, .ReleaseSafe => macho_file.zig_data_sect_index.?, + .ReleaseFast, .ReleaseSmall => macho_file.zig_bss_sect_index.?, }; } @@ -746,10 +746,10 @@ fn getDeclOutputSection( const is_all_zeroes = for (code) |byte| { if (byte != 0) break false; } else true; - if (is_all_zeroes) break :blk macho_file.zig_bss_section_index.?; - break :blk macho_file.zig_data_section_index.?; + if (is_all_zeroes) break :blk macho_file.zig_bss_sect_index.?; + break :blk macho_file.zig_data_sect_index.?; } - break :blk macho_file.zig_const_section_index.?; + break :blk macho_file.zig_const_sect_index.?; }, }; return sect_id; @@ -778,7 +778,7 @@ pub fn lowerUnnamedConst( name, typed_value, typed_value.ty.abiAlignment(mod), - macho_file.zig_const_section_index.?, + macho_file.zig_const_sect_index.?, decl.srcLoc(mod), )) { .ok => |sym_index| sym_index, @@ -995,8 +995,8 @@ fn updateLazySymbol( }; const output_section_index = switch (lazy_sym.kind) { - .code => macho_file.zig_text_section_index.?, - .const_data => macho_file.zig_const_section_index.?, + .code => macho_file.zig_text_sect_index.?, + .const_data => macho_file.zig_const_sect_index.?, }; const sym = macho_file.getSymbol(symbol_index); sym.name = name_str_index; diff --git a/src/link/MachO/synthetic.zig b/src/link/MachO/synthetic.zig index b749d7daec..882db5d414 100644 --- a/src/link/MachO/synthetic.zig +++ b/src/link/MachO/synthetic.zig @@ -32,13 +32,13 @@ pub const ZigGotSection = struct { pub fn entryOffset(zig_got: ZigGotSection, index: Index, macho_file: *MachO) u64 { _ = zig_got; - const sect = macho_file.sections.items(.header)[macho_file.zig_got_section_index.?]; + const sect = macho_file.sections.items(.header)[macho_file.zig_got_sect_index.?]; return sect.offset + @sizeOf(u64) * index; } pub fn entryAddress(zig_got: ZigGotSection, index: Index, macho_file: *MachO) u64 { _ = zig_got; - const sect = macho_file.sections.items(.header)[macho_file.zig_got_section_index.?]; + const sect = macho_file.sections.items(.header)[macho_file.zig_got_sect_index.?]; return sect.addr + @sizeOf(u64) * index; } @@ -50,7 +50,7 @@ pub const ZigGotSection = struct { pub fn writeOne(zig_got: *ZigGotSection, macho_file: *MachO, index: Index) !void { if (zig_got.dirty) { const needed_size = zig_got.size(macho_file); - try macho_file.growSection(macho_file.zig_got_section_index.?, needed_size); + try macho_file.growSection(macho_file.zig_got_sect_index.?, needed_size); zig_got.dirty = false; } const off = zig_got.entryOffset(index, macho_file); @@ -77,7 +77,7 @@ pub const ZigGotSection = struct { const seg_id = macho_file.sections.items(.segment_id)[macho_file.zig_got_sect_index.?]; const seg = macho_file.segments.items[seg_id]; - for (0..zig_got.symbols.items.len) |idx| { + for (0..zig_got.entries.items.len) |idx| { const addr = zig_got.entryAddress(@intCast(idx), macho_file); try macho_file.rebase.entries.append(gpa, .{ .offset = addr - seg.vmaddr, From a112241f6454b3dd5b93703337339d38c2707be7 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Fri, 19 Jan 2024 20:28:05 +0100 Subject: [PATCH 105/133] macho: re-read atom code from ZigObject when resolving relocs --- src/link/MachO.zig | 73 +++++++++++++++++++++++++++++++++- src/link/MachO/Atom.zig | 11 +---- src/link/MachO/ZigObject.zig | 20 ++++++++-- src/link/MachO/relocatable.zig | 2 +- 4 files changed, 90 insertions(+), 16 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index f4ba60a9df..36e34d5ff0 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -596,6 +596,47 @@ pub fn flushModule(self: *MachO, arena: Allocator, prog_node: *std.Progress.Node state_log.debug("{}", .{self.dumpState()}); try self.initDyldInfoSections(); + + // Beyond this point, everything has been allocated a virtual address and we can resolve + // the relocations, and commit objects to file. + if (self.getZigObject()) |zo| { + var has_resolve_error = false; + + for (zo.atoms.items) |atom_index| { + const atom = self.getAtom(atom_index) orelse continue; + if (!atom.flags.alive) continue; + const sect = &self.sections.items(.header)[atom.out_n_sect]; + if (sect.isZerofill()) continue; + const code = zo.getAtomDataAlloc(self, atom.*) catch |err| switch (err) { + error.InputOutput => { + try self.reportUnexpectedError("fetching code for '{s}' failed", .{ + atom.getName(self), + }); + return error.FlushFailure; + }, + else => |e| { + try self.reportUnexpectedError("unexpected error while fetching code for '{s}': {s}", .{ + atom.getName(self), + @errorName(e), + }); + return error.FlushFailure; + }, + }; + defer gpa.free(code); + const file_offset = sect.offset + atom.value - sect.addr; + atom.resolveRelocs(self, code) catch |err| switch (err) { + error.ResolveFailed => has_resolve_error = true, + else => |e| { + try self.reportUnexpectedError("unexpected error while resolving relocations", .{}); + return e; + }, + }; + try self.base.file.?.pwriteAll(code, file_offset); + } + + if (has_resolve_error) return error.FlushFailure; + } + self.writeAtoms() catch |err| switch (err) { error.ResolveFailed => return error.FlushFailure, else => |e| { @@ -1955,6 +1996,28 @@ pub fn sortSections(self: *MachO) !void { self.sections.appendAssumeCapacity(slice.get(sorted.index)); } + if (self.getZigObject()) |zo| { + for (zo.atoms.items) |atom_index| { + const atom = self.getAtom(atom_index) orelse continue; + if (!atom.flags.alive) continue; + atom.out_n_sect = backlinks[atom.out_n_sect]; + } + + for (zo.symtab.items(.nlist)) |*sym| { + if (sym.sect()) { + sym.n_sect = backlinks[sym.n_sect]; + } + } + + for (zo.symbols.items) |sym_index| { + const sym = self.getSymbol(sym_index); + const atom = sym.getAtom(self) orelse continue; + if (!atom.flags.alive) continue; + if (sym.getFile(self).?.getIndex() != zo.index) continue; + sym.out_n_sect = backlinks[sym.out_n_sect]; + } + } + for (self.objects.items) |index| { for (self.getFile(index).?.object.atoms.items) |atom_index| { const atom = self.getAtom(atom_index) orelse continue; @@ -1962,6 +2025,7 @@ pub fn sortSections(self: *MachO) !void { atom.out_n_sect = backlinks[atom.out_n_sect]; } } + if (self.getInternalObject()) |object| { for (object.atoms.items) |atom_index| { const atom = self.getAtom(atom_index) orelse continue; @@ -2517,6 +2581,7 @@ fn writeAtoms(self: *MachO) !void { const atom = self.getAtom(atom_index).?; assert(atom.flags.alive); const off = atom.value - header.addr; + @memcpy(buffer[off..][0..atom.size], atom.getFile(self).object.getAtomData(atom.*)); atom.resolveRelocs(self, buffer[off..][0..atom.size]) catch |err| switch (err) { error.ResolveFailed => has_resolve_error = true, else => |e| return e, @@ -2757,7 +2822,8 @@ pub fn calcSymtabSize(self: *MachO) !void { var files = std.ArrayList(File.Index).init(gpa); defer files.deinit(); - try files.ensureTotalCapacityPrecise(self.objects.items.len + self.dylibs.items.len + 1); + try files.ensureTotalCapacityPrecise(self.objects.items.len + self.dylibs.items.len + 2); + if (self.zig_object) |index| files.appendAssumeCapacity(index); for (self.objects.items) |index| files.appendAssumeCapacity(index); for (self.dylibs.items) |index| files.appendAssumeCapacity(index); if (self.internal_object) |index| files.appendAssumeCapacity(index); @@ -2816,6 +2882,9 @@ pub fn writeSymtab(self: *MachO, off: u32) !u32 { try self.symtab.resize(gpa, cmd.nsyms); try self.strtab.ensureUnusedCapacity(gpa, cmd.strsize - 1); + if (self.getZigObject()) |zo| { + zo.writeSymtab(self); + } for (self.objects.items) |index| { self.getFile(index).?.writeSymtab(self); } @@ -3752,7 +3821,7 @@ fn reportDependencyError( try err.addNote(self, "a dependency of {}", .{self.getFile(parent).?.fmtPath()}); } -fn reportUnexpectedError(self: *MachO, comptime format: []const u8, args: anytype) error{OutOfMemory}!void { +pub fn reportUnexpectedError(self: *MachO, comptime format: []const u8, args: anytype) error{OutOfMemory}!void { var err = try self.addErrorWithNotes(1); try err.addMsg(self, format, args); try err.addNote(self, "please report this as a linker bug on https://github.com/ziglang/zig/issues/new/choose", .{}); diff --git a/src/link/MachO/Atom.zig b/src/link/MachO/Atom.zig index d76bea03e0..90a3e129c9 100644 --- a/src/link/MachO/Atom.zig +++ b/src/link/MachO/Atom.zig @@ -50,14 +50,6 @@ pub fn getFile(self: Atom, macho_file: *MachO) File { return macho_file.getFile(self.file).?; } -pub fn getData(self: Atom, macho_file: *MachO) []const u8 { - return switch (self.getFile(macho_file)) { - .zig_object => @panic("TODO Atom.getData"), - .object => |x| x.getAtomData(self), - else => unreachable, - }; -} - pub fn getRelocs(self: Atom, macho_file: *MachO) []const Relocation { return switch (self.getFile(macho_file)) { .zig_object => |x| x.getAtomRelocs(self), @@ -538,7 +530,6 @@ pub fn resolveRelocs(self: Atom, macho_file: *MachO, buffer: []u8) !void { const file = self.getFile(macho_file); const name = self.getName(macho_file); const relocs = self.getRelocs(macho_file); - @memcpy(buffer, self.getData(macho_file)); relocs_log.debug("{x}: {s}", .{ self.value, name }); @@ -1153,7 +1144,7 @@ const macho = std.macho; const math = std.math; const mem = std.mem; const log = std.log.scoped(.link); -const relocs_log = std.log.scoped(.relocs); +const relocs_log = std.log.scoped(.link_relocs); const std = @import("std"); const trace = @import("../../tracy.zig").trace; diff --git a/src/link/MachO/ZigObject.zig b/src/link/MachO/ZigObject.zig index f57a11d3a6..23673f2438 100644 --- a/src/link/MachO/ZigObject.zig +++ b/src/link/MachO/ZigObject.zig @@ -128,6 +128,20 @@ pub fn addAtom(self: *ZigObject, macho_file: *MachO) !Symbol.Index { return symbol_index; } +/// Caller owns the memory. +pub fn getAtomDataAlloc(self: ZigObject, macho_file: *MachO, atom: Atom) ![]u8 { + const gpa = macho_file.base.comp.gpa; + assert(atom.file == self.index); + const sect = macho_file.sections.items(.header)[atom.out_n_sect]; + const file_offset = sect.offset + atom.value - sect.addr; + const size = std.math.cast(usize, atom.size) orelse return error.Overflow; + const code = try gpa.alloc(u8, size); + errdefer gpa.free(code); + const amt = try macho_file.base.file.?.preadAll(code, file_offset); + if (amt != code.len) return error.InputOutput; + return code; +} + pub fn getAtomRelocs(self: *ZigObject, atom: Atom) []const Relocation { const relocs = self.relocs.items[atom.relocs.pos]; return relocs.items[0..atom.relocs.len]; @@ -659,7 +673,7 @@ fn updateDeclCode( if (old_size > 0) { const capacity = atom.capacity(macho_file); - const need_realloc = code.len > capacity or !required_alignment.check(sym.getAddress(.{}, macho_file)); + const need_realloc = code.len > capacity or !required_alignment.check(atom.value); if (need_realloc) { try atom.grow(macho_file); @@ -678,7 +692,7 @@ fn updateDeclCode( } else if (code.len < old_size) { atom.shrink(macho_file); } else if (macho_file.getAtom(atom.next_index) == null) { - const needed_size = (sym.getAddress(.{}, macho_file) + code.len) - sect.addr; + const needed_size = atom.value + code.len - sect.addr; sect.size = needed_size; } } else { @@ -696,7 +710,7 @@ fn updateDeclCode( } if (!sect.isZerofill()) { - const file_offset = sect.offset + sym.getAddress(.{}, macho_file) - sect.addr; + const file_offset = sect.offset + atom.value - sect.addr; try macho_file.base.file.?.pwriteAll(code, file_offset); } } diff --git a/src/link/MachO/relocatable.zig b/src/link/MachO/relocatable.zig index 323bf9d76f..8b47d8eeb5 100644 --- a/src/link/MachO/relocatable.zig +++ b/src/link/MachO/relocatable.zig @@ -274,7 +274,7 @@ fn writeAtoms(macho_file: *MachO) !void { const atom = macho_file.getAtom(atom_index).?; assert(atom.flags.alive); const off = atom.value - header.addr; - @memcpy(code[off..][0..atom.size], atom.getData(macho_file)); + @memcpy(code[off..][0..atom.size], atom.getFile(macho_file).object.getAtomData(atom.*)); try atom.writeRelocs(macho_file, code[off..][0..atom.size], &relocs); } From 8f74d2519f2d262ee4ff9ab14316e76433c05901 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Fri, 19 Jan 2024 21:15:07 +0100 Subject: [PATCH 106/133] macho: resolve relocs pointing at __got_zig --- src/link/MachO/Atom.zig | 14 ++++++++++++-- src/link/MachO/Relocation.zig | 7 +++++++ src/link/MachO/Symbol.zig | 2 +- 3 files changed, 20 insertions(+), 3 deletions(-) diff --git a/src/link/MachO/Atom.zig b/src/link/MachO/Atom.zig index 90a3e129c9..c8564c95d8 100644 --- a/src/link/MachO/Atom.zig +++ b/src/link/MachO/Atom.zig @@ -588,6 +588,8 @@ fn resolveRelocInner( const G: i64 = @intCast(rel.getGotTargetAddress(macho_file)); const TLS = @as(i64, @intCast(macho_file.getTlsAddress())); const SUB = if (subtractor) |sub| @as(i64, @intCast(sub.getTargetAddress(macho_file))) else 0; + // Address of the __got_zig table entry if any. + const ZIG_GOT = @as(i64, @intCast(rel.getZigGotTargetAddress(macho_file))); switch (rel.tag) { .local => relocs_log.debug(" {x}<+{d}>: {s}: [=> {x}] atom({d})", .{ @@ -597,12 +599,13 @@ fn resolveRelocInner( S + A - SUB, rel.getTargetAtom(macho_file).atom_index, }), - .@"extern" => relocs_log.debug(" {x}<+{d}>: {s}: [=> {x}] G({x}) ({s})", .{ + .@"extern" => relocs_log.debug(" {x}<+{d}>: {s}: [=> {x}] G({x}) ZG({x}) ({s})", .{ P, rel_offset, @tagName(rel.type), S + A - SUB, G + A, + ZIG_GOT + A, rel.getTargetSymbol(macho_file).getName(macho_file), }), } @@ -696,7 +699,14 @@ fn resolveRelocInner( }, .zig_got_load => { - @panic("TODO resolve __got_zig indirection reloc"); + assert(rel.tag == .@"extern"); + assert(rel.meta.length == 2); + assert(rel.meta.pcrel); + switch (cpu_arch) { + .x86_64 => try writer.writeInt(i32, @intCast(ZIG_GOT + A - P), .little), + .aarch64 => @panic("TODO resolve __got_zig indirection reloc"), + else => unreachable, + } }, .tlv => { diff --git a/src/link/MachO/Relocation.zig b/src/link/MachO/Relocation.zig index 20891f07e3..eff628071f 100644 --- a/src/link/MachO/Relocation.zig +++ b/src/link/MachO/Relocation.zig @@ -34,6 +34,13 @@ pub fn getGotTargetAddress(rel: Relocation, macho_file: *MachO) u64 { }; } +pub fn getZigGotTargetAddress(rel: Relocation, macho_file: *MachO) u64 { + return switch (rel.tag) { + .local => 0, + .@"extern" => rel.getTargetSymbol(macho_file).getZigGotAddress(macho_file), + }; +} + pub fn getRelocAddend(rel: Relocation, cpu_arch: std.Target.Cpu.Arch) i64 { const addend: i64 = switch (rel.type) { .signed => 0, diff --git a/src/link/MachO/Symbol.zig b/src/link/MachO/Symbol.zig index 5ffbabe1e2..dfdb81c605 100644 --- a/src/link/MachO/Symbol.zig +++ b/src/link/MachO/Symbol.zig @@ -162,7 +162,7 @@ pub fn getOrCreateZigGotEntry(symbol: *Symbol, symbol_index: Index, macho_file: return .{ .found_existing = false, .index = index }; } -pub fn zigGotAddress(symbol: Symbol, macho_file: *MachO) u64 { +pub fn getZigGotAddress(symbol: Symbol, macho_file: *MachO) u64 { if (!symbol.flags.has_zig_got) return 0; const extras = symbol.getExtra(macho_file).?; return macho_file.zig_got.entryAddress(extras.zig_got, macho_file); From eaf4bb442300adb4cedebf918738028e074a01b3 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Fri, 19 Jan 2024 21:30:55 +0100 Subject: [PATCH 107/133] macho: fix not resetting __got_zig sect index after sorting headers --- src/link/MachO.zig | 1 + 1 file changed, 1 insertion(+) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 36e34d5ff0..3617dc9c7d 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -2037,6 +2037,7 @@ pub fn sortSections(self: *MachO) !void { for (&[_]*?u8{ &self.data_sect_index, &self.got_sect_index, + &self.zig_got_sect_index, &self.stubs_sect_index, &self.stubs_helper_sect_index, &self.la_symbol_ptr_sect_index, From a531ecf9dc104895741c46901989d400ad4e3043 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 20 Jan 2024 08:41:55 +0100 Subject: [PATCH 108/133] macho: refactor segment creation logic --- src/link/MachO.zig | 56 ++++++++++++++-------------------------------- 1 file changed, 17 insertions(+), 39 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 3617dc9c7d..9fed435a41 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -2203,20 +2203,6 @@ fn initSegments(self: *MachO) !void { const gpa = self.base.comp.gpa; const slice = self.sections.slice(); - // First, create segments required by sections - for (slice.items(.header)) |header| { - const segname = header.segName(); - if (self.getSegmentByName(segname) == null) { - const prot = getSegmentProt(segname); - try self.segments.append(gpa, .{ - .cmdsize = @sizeOf(macho.segment_command_64), - .segname = makeStaticString(segname), - .maxprot = prot, - .initprot = prot, - }); - } - } - // Add __PAGEZERO if required const pagezero_size = self.pagezero_size orelse default_pagezero_size; const aligned_pagezero_size = mem.alignBackward(u64, pagezero_size, self.getPageSize()); @@ -2226,44 +2212,30 @@ fn initSegments(self: *MachO) !void { log.warn("requested __PAGEZERO size (0x{x}) is not page aligned", .{pagezero_size}); log.warn(" rounding down to 0x{x}", .{aligned_pagezero_size}); } - try self.segments.append(gpa, .{ - .cmdsize = @sizeOf(macho.segment_command_64), - .segname = makeStaticString("__PAGEZERO"), - .vmsize = aligned_pagezero_size, - }); + _ = try self.addSegment("__PAGEZERO", .{ .vmsize = aligned_pagezero_size }); } // __TEXT segment is non-optional - if (self.getSegmentByName("__TEXT") == null) { - const protection = getSegmentProt("__TEXT"); - try self.segments.append(gpa, .{ - .cmdsize = @sizeOf(macho.segment_command_64), - .segname = makeStaticString("__TEXT"), - .maxprot = protection, - .initprot = protection, - }); + _ = try self.addSegment("__TEXT", .{ .prot = getSegmentProt("__TEXT") }); + + // Next, create segments required by sections + for (slice.items(.header)) |header| { + const segname = header.segName(); + if (self.getSegmentByName(segname) == null) { + _ = try self.addSegment(segname, .{ .prot = getSegmentProt(segname) }); + } } // Add __LINKEDIT - { - const protection = getSegmentProt("__LINKEDIT"); - self.linkedit_seg_index = @intCast(self.segments.items.len); - try self.segments.append(gpa, .{ - .cmdsize = @sizeOf(macho.segment_command_64), - .segname = makeStaticString("__LINKEDIT"), - .maxprot = protection, - .initprot = protection, - }); - } + _ = try self.addSegment("__LINKEDIT", .{ .prot = getSegmentProt("__LINKEDIT") }); + // Sort segments const sortFn = struct { fn sortFn(ctx: void, lhs: macho.segment_command_64, rhs: macho.segment_command_64) bool { _ = ctx; return getSegmentRank(lhs.segName()) < getSegmentRank(rhs.segName()); } }.sortFn; - - // Sort segments mem.sort(macho.segment_command_64, self.segments.items, {}, sortFn); // Attach sections to segments @@ -2288,6 +2260,12 @@ fn initSegments(self: *MachO) !void { self.pagezero_seg_index = self.getSegmentByName("__PAGEZERO"); self.text_seg_index = self.getSegmentByName("__TEXT").?; + self.linkedit_seg_index = self.getSegmentByName("__LINKEDIT").?; + self.zig_text_seg_index = self.getSegmentByName("__TEXT_ZIG"); + self.zig_got_seg_index = self.getSegmentByName("__GOT_ZIG"); + self.zig_const_seg_index = self.getSegmentByName("__CONST_ZIG"); + self.zig_data_seg_index = self.getSegmentByName("__DATA_ZIG"); + self.zig_bss_seg_index = self.getSegmentByName("__BSS_ZIG"); } fn allocateSections(self: *MachO) !void { From aef2c91d9f91c89a883c95331fa382692ab5e7c3 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 20 Jan 2024 17:09:56 +0100 Subject: [PATCH 109/133] macho: move incr Zig sections in file if overlap nonincr sections --- src/link/MachO.zig | 70 ++++++++++++++++++++++++++++++++-------------- 1 file changed, 49 insertions(+), 21 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 9fed435a41..861ca6e206 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -1923,12 +1923,13 @@ fn getSegmentProt(segname: []const u8) macho.vm_prot_t { return macho.PROT.READ | macho.PROT.WRITE; } -fn getSegmentRank(segname: []const u8) u4 { +fn getSegmentRank(segname: []const u8) u8 { if (mem.eql(u8, segname, "__PAGEZERO")) return 0x0; if (mem.eql(u8, segname, "__TEXT")) return 0x1; if (mem.eql(u8, segname, "__DATA_CONST")) return 0x2; if (mem.eql(u8, segname, "__DATA")) return 0x3; - if (mem.eql(u8, segname, "__LINKEDIT")) return 0x5; + if (mem.indexOf(u8, segname, "ZIG")) |_| return 0xe; + if (mem.eql(u8, segname, "__LINKEDIT")) return 0xf; return 0x4; } @@ -2282,28 +2283,55 @@ fn allocateSections(self: *MachO) !void { var next_seg_id: u8 = if (self.pagezero_seg_index) |index| index + 1 else 0; for (slice.items(.header), slice.items(.segment_id)) |*header, seg_id| { - if (mem.indexOf(u8, header.segName(), "ZIG")) |_| { - vmaddr = header.addr + header.size; - } else { - if (seg_id != next_seg_id) { - vmaddr = mem.alignForward(u64, vmaddr, page_size); - fileoff = mem.alignForward(u32, fileoff, page_size); - } + defer next_seg_id = seg_id; - const alignment = try math.powi(u32, 2, header.@"align"); - - vmaddr = mem.alignForward(u64, vmaddr, alignment); - header.addr = vmaddr; - vmaddr += header.size; - - if (!header.isZerofill()) { - fileoff = mem.alignForward(u32, fileoff, alignment); - header.offset = fileoff; - fileoff += @intCast(header.size); - } + if (mem.indexOf(u8, header.segName(), "ZIG")) |_| continue; + if (seg_id != next_seg_id) { + vmaddr = mem.alignForward(u64, vmaddr, page_size); + fileoff = mem.alignForward(u32, fileoff, page_size); } - next_seg_id = seg_id; + const alignment = try math.powi(u32, 2, header.@"align"); + + vmaddr = mem.alignForward(u64, vmaddr, alignment); + header.addr = vmaddr; + vmaddr += header.size; + + if (!header.isZerofill()) { + fileoff = mem.alignForward(u32, fileoff, alignment); + header.offset = fileoff; + fileoff += @intCast(header.size); + } + } + + // TODO iterate over sections again, but consider only zig sections + // and move them if they are allocated in file below page-aligned fileoff + fileoff = mem.alignForward(u32, fileoff, page_size); + for (slice.items(.header), slice.items(.segment_id)) |*header, seg_id| { + if (mem.indexOf(u8, header.segName(), "ZIG") == null) continue; + if (header.isZerofill()) continue; + if (header.offset < fileoff) { + const existing_size = header.size; + header.size = 0; + + // Must move the entire section. + const new_offset = self.findFreeSpace(existing_size, page_size); + + log.debug("new '{s},{s}' file offset 0x{x} to 0x{x}", .{ + header.segName(), + header.sectName(), + new_offset, + new_offset + existing_size, + }); + + const amt = try self.base.file.?.copyRangeAll(header.offset, self.base.file.?, new_offset, existing_size); + // TODO figure out what to about this error condition - how to communicate it up. + if (amt != existing_size) return error.InputOutput; + + header.offset = @intCast(new_offset); + header.size = existing_size; + self.segments.items[seg_id].fileoff = new_offset; + } } } From 5c30c23fc4d9f996244c11ff95104d86be88b4e4 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 20 Jan 2024 18:03:10 +0100 Subject: [PATCH 110/133] macho: get rid of allocateSegments --- src/link/MachO.zig | 75 +++++++++++++++------------------------------- 1 file changed, 24 insertions(+), 51 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 861ca6e206..5c897386aa 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -588,7 +588,6 @@ pub fn flushModule(self: *MachO, arena: Allocator, prog_node: *std.Progress.Node try self.initSegments(); try self.allocateSections(); - self.allocateSegments(); self.allocateAtoms(); self.allocateSyntheticSymbols(); try self.allocateLinkeditSegment(); @@ -2275,20 +2274,31 @@ fn allocateSections(self: *MachO) !void { self.segments.items[index].vmaddr + self.segments.items[index].vmsize else 0; + + var prev_seg_id: u8 = if (self.pagezero_seg_index) |index| index + 1 else 0; + { + const seg = &self.segments.items[prev_seg_id]; + seg.vmaddr = vmaddr; + seg.fileoff = 0; + } + vmaddr += headerpad; var fileoff = headerpad; const page_size = self.getPageSize(); const slice = self.sections.slice(); - var next_seg_id: u8 = if (self.pagezero_seg_index) |index| index + 1 else 0; - for (slice.items(.header), slice.items(.segment_id)) |*header, seg_id| { - defer next_seg_id = seg_id; - + for (slice.items(.header), slice.items(.segment_id)) |*header, curr_seg_id| { if (mem.indexOf(u8, header.segName(), "ZIG")) |_| continue; - if (seg_id != next_seg_id) { + if (prev_seg_id != curr_seg_id) { + const prev_seg = &self.segments.items[prev_seg_id]; + const curr_seg = &self.segments.items[curr_seg_id]; + prev_seg.vmsize = vmaddr - prev_seg.vmaddr; + prev_seg.filesize = fileoff - prev_seg.fileoff; vmaddr = mem.alignForward(u64, vmaddr, page_size); fileoff = mem.alignForward(u32, fileoff, page_size); + curr_seg.vmaddr = vmaddr; + curr_seg.fileoff = fileoff; } const alignment = try math.powi(u32, 2, header.@"align"); @@ -2302,6 +2312,14 @@ fn allocateSections(self: *MachO) !void { header.offset = fileoff; fileoff += @intCast(header.size); } + + prev_seg_id = curr_seg_id; + } + + { + const prev_seg = &self.segments.items[prev_seg_id]; + prev_seg.vmsize = vmaddr - prev_seg.vmaddr; + prev_seg.filesize = fileoff - prev_seg.fileoff; } // TODO iterate over sections again, but consider only zig sections @@ -2335,51 +2353,6 @@ fn allocateSections(self: *MachO) !void { } } -fn allocateSegments(self: *MachO) void { - const page_size = self.getPageSize(); - var vmaddr = if (self.pagezero_seg_index) |index| - self.segments.items[index].vmaddr + self.segments.items[index].vmsize - else - 0; - var fileoff: u64 = 0; - const index = if (self.pagezero_seg_index) |index| index + 1 else 0; - const last_index = self.linkedit_seg_index.?; // TODO: please clean this up! - - const slice = self.sections.slice(); - var next_sect_id: u8 = 0; - for (self.segments.items[index..last_index], index..last_index) |*seg, seg_id| { - if (mem.indexOf(u8, seg.segName(), "ZIG")) |_| { - vmaddr = mem.alignForward(u64, seg.vmaddr + seg.vmsize, page_size); - if (mem.eql(u8, seg.segName(), "__BSS_ZIG")) { - fileoff = mem.alignForward(u64, seg.fileoff + seg.filesize, page_size); - } - } else { - seg.vmaddr = vmaddr; - seg.fileoff = fileoff; - - for ( - slice.items(.header)[next_sect_id..], - slice.items(.segment_id)[next_sect_id..], - ) |header, sid| { - if (seg_id != sid) break; - - vmaddr = header.addr + header.size; - if (!header.isZerofill()) { - fileoff = header.offset + header.size; - } - - next_sect_id += 1; - } - - vmaddr = mem.alignForward(u64, vmaddr, page_size); - fileoff = mem.alignForward(u64, fileoff, page_size); - - seg.vmsize = vmaddr - seg.vmaddr; - seg.filesize = fileoff - seg.fileoff; - } - } -} - pub fn allocateAtoms(self: *MachO) void { const slice = self.sections.slice(); for (slice.items(.header), slice.items(.atoms)) |header, atoms| { From 55f57ceb2e8f900ba281c5cf5524718cf7eb332f Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 20 Jan 2024 18:08:11 +0100 Subject: [PATCH 111/133] macho: prep for lowering TLS variables --- src/codegen.zig | 7 +++--- src/link/MachO/ZigObject.zig | 43 +++++++++++++++++------------------- 2 files changed, 23 insertions(+), 27 deletions(-) diff --git a/src/codegen.zig b/src/codegen.zig index 49f7feda8f..e9509c4efd 100644 --- a/src/codegen.zig +++ b/src/codegen.zig @@ -996,10 +996,9 @@ fn genDeclRef( } const sym_index = try macho_file.getZigObject().?.getOrCreateMetadataForDecl(macho_file, decl_index); const sym = macho_file.getSymbol(sym_index); - // TODO: tlv - // if (is_threadlocal) { - // return GenResult.mcv(.{ .load_tlv = sym.nlist_idx }); - // } + if (is_threadlocal) { + return GenResult.mcv(.{ .load_tlv = sym.nlist_idx }); + } return GenResult.mcv(.{ .load_symbol = sym.nlist_idx }); } else if (lf.cast(link.File.Coff)) |coff_file| { if (is_extern) { diff --git a/src/link/MachO/ZigObject.zig b/src/link/MachO/ZigObject.zig index 23673f2438..930d500e29 100644 --- a/src/link/MachO/ZigObject.zig +++ b/src/link/MachO/ZigObject.zig @@ -724,26 +724,25 @@ fn getDeclOutputSection( _ = self; const mod = macho_file.base.comp.module.?; const any_non_single_threaded = macho_file.base.comp.config.any_non_single_threaded; - _ = any_non_single_threaded; const sect_id: u8 = switch (decl.ty.zigTypeTag(mod)) { .Fn => macho_file.zig_text_sect_index.?, else => blk: { if (decl.getOwnedVariable(mod)) |variable| { - // if (variable.is_threadlocal and any_non_single_threaded) { - // const is_all_zeroes = for (code) |byte| { - // if (byte != 0) break false; - // } else true; - // if (is_all_zeroes) break :blk macho_file.getSectionByName("__DATA", "__thread_bss") orelse try macho_file.addSection( - // "__DATA", - // "__thread_bss", - // .{ .flags = macho.S_THREAD_LOCAL_ZEROFILL }, - // ); - // break :blk macho_file.getSectionByName("__DATA", "__thread_data") orelse try macho_file.addSection( - // "__DATA", - // "__thread_data", - // .{ .flags = macho.S_THREAD_LOCAL_REGULAR }, - // ); - // } + if (variable.is_threadlocal and any_non_single_threaded) { + const is_all_zeroes = for (code) |byte| { + if (byte != 0) break false; + } else true; + if (is_all_zeroes) break :blk macho_file.getSectionByName("__DATA", "__thread_bss") orelse try macho_file.addSection( + "__DATA", + "__thread_bss", + .{ .flags = macho.S_THREAD_LOCAL_ZEROFILL }, + ); + break :blk macho_file.getSectionByName("__DATA", "__thread_data") orelse try macho_file.addSection( + "__DATA", + "__thread_data", + .{ .flags = macho.S_THREAD_LOCAL_REGULAR }, + ); + } if (variable.is_const) break :blk macho_file.zig_const_sect_index.?; if (Value.fromInterned(variable.init).isUndefDeep(mod)) { @@ -1112,17 +1111,15 @@ pub fn getOrCreateMetadataForDecl( const gop = try self.decls.getOrPut(gpa, decl_index); if (!gop.found_existing) { const any_non_single_threaded = macho_file.base.comp.config.any_non_single_threaded; - _ = any_non_single_threaded; const sym_index = try self.addAtom(macho_file); const mod = macho_file.base.comp.module.?; const decl = mod.declPtr(decl_index); - _ = decl; const sym = macho_file.getSymbol(sym_index); - // if (decl.getOwnedVariable(mod)) |variable| { - // if (variable.is_threadlocal and any_non_single_threaded) { - // sym.flags.tlv = true; - // } - // } + if (decl.getOwnedVariable(mod)) |variable| { + if (variable.is_threadlocal and any_non_single_threaded) { + sym.flags.tlv = true; + } + } if (!sym.flags.tlv) { sym.flags.needs_zig_got = true; } From c02a603b63611a3f7963fce64f48ed3d2c10f86d Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 20 Jan 2024 18:23:57 +0100 Subject: [PATCH 112/133] macho: get start index of Zig sections when allocating other sections --- src/link/MachO.zig | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 5c897386aa..cba79e1262 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -2287,9 +2287,11 @@ fn allocateSections(self: *MachO) !void { const page_size = self.getPageSize(); const slice = self.sections.slice(); + const last_index = for (slice.items(.header), 0..) |header, i| { + if (mem.indexOf(u8, header.segName(), "ZIG")) |_| break i; + } else slice.items(.header).len; - for (slice.items(.header), slice.items(.segment_id)) |*header, curr_seg_id| { - if (mem.indexOf(u8, header.segName(), "ZIG")) |_| continue; + for (slice.items(.header)[0..last_index], slice.items(.segment_id)[0..last_index]) |*header, curr_seg_id| { if (prev_seg_id != curr_seg_id) { const prev_seg = &self.segments.items[prev_seg_id]; const curr_seg = &self.segments.items[curr_seg_id]; @@ -2325,8 +2327,7 @@ fn allocateSections(self: *MachO) !void { // TODO iterate over sections again, but consider only zig sections // and move them if they are allocated in file below page-aligned fileoff fileoff = mem.alignForward(u32, fileoff, page_size); - for (slice.items(.header), slice.items(.segment_id)) |*header, seg_id| { - if (mem.indexOf(u8, header.segName(), "ZIG") == null) continue; + for (slice.items(.header)[last_index..], slice.items(.segment_id)[last_index..]) |*header, seg_id| { if (header.isZerofill()) continue; if (header.offset < fileoff) { const existing_size = header.size; From 6ad4062bf2b5066808d728071b87d4084d710e5b Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 20 Jan 2024 21:49:11 +0100 Subject: [PATCH 113/133] macho: save TLS variables in ZigObject --- src/link/MachO/ZigObject.zig | 73 +++++++++++++++++++++++++++++++++++- 1 file changed, 71 insertions(+), 2 deletions(-) diff --git a/src/link/MachO/ZigObject.zig b/src/link/MachO/ZigObject.zig index 930d500e29..976a3236ae 100644 --- a/src/link/MachO/ZigObject.zig +++ b/src/link/MachO/ZigObject.zig @@ -38,6 +38,9 @@ unnamed_consts: UnnamedConstTable = .{}, /// Table of tracked AnonDecls. anon_decls: AnonDeclTable = .{}, +/// TLS variables indexed by Atom.Index. +tls_variables: TlsTable = .{}, + /// A table of relocations. relocs: RelocationTable = .{}, @@ -87,6 +90,11 @@ pub fn deinit(self: *ZigObject, allocator: Allocator) void { list.deinit(allocator); } self.relocs.deinit(allocator); + + for (self.tls_variables.values()) |*tlv| { + tlv.deinit(allocator); + } + self.tls_variables.deinit(allocator); } fn addNlist(self: *ZigObject, allocator: Allocator) !Symbol.Index { @@ -612,8 +620,7 @@ pub fn updateDecl( else => false, }; if (is_threadlocal) { - // TODO: emit TLV - @panic("TODO updateDecl for TLS"); + try self.updateTlv(macho_file, decl_index, sym_index, sect_index, code); } else { try self.updateDeclCode(macho_file, decl_index, sym_index, sect_index, code); } @@ -715,6 +722,58 @@ fn updateDeclCode( } } +fn updateTlv( + self: *ZigObject, + macho_file: *MachO, + decl_index: InternPool.DeclIndex, + sym_index: Symbol.Index, + sect_index: u8, + code: []const u8, +) !void { + const comp = macho_file.base.comp; + const gpa = comp.gpa; + const mod = comp.module.?; + const decl = mod.declPtr(decl_index); + const decl_name = mod.intern_pool.stringToSlice(try decl.getFullyQualifiedName(mod)); + + log.debug("updateTlv {s} ({*})", .{ decl_name, decl }); + + const required_alignment = decl.getAlignment(mod); + + const sym = macho_file.getSymbol(sym_index); + const nlist = &self.symtab.items(.nlist)[sym.nlist_idx]; + const atom = sym.getAtom(macho_file).?; + + sym.out_n_sect = sect_index; + atom.out_n_sect = sect_index; + + sym.name = try macho_file.strings.insert(gpa, decl_name); + atom.flags.alive = true; + atom.name = sym.name; + nlist.n_strx = sym.name; + nlist.n_sect = sect_index + 1; + nlist.n_type = macho.N_EXT; + self.symtab.items(.size)[sym.nlist_idx] = code.len; + + atom.alignment = required_alignment; + atom.size = code.len; + + const slice = macho_file.sections.slice(); + const header = slice.items(.header)[sect_index]; + const atoms = &slice.items(.atoms)[sect_index]; + + const gop = try self.tls_variables.getOrPut(gpa, atom.atom_index); + assert(!gop.found_existing); // TODO incremental updates + gop.value_ptr.* = .{ .symbol_index = sym_index }; + + // We only store the data for the TLV if it's non-zerofill. + if (!header.isZerofill()) { + gop.value_ptr.code = try gpa.dupe(u8, code); + } + + try atoms.append(gpa, atom.atom_index); +} + fn getDeclOutputSection( self: *ZigObject, macho_file: *MachO, @@ -1244,11 +1303,21 @@ const LazySymbolMetadata = struct { const_state: State = .unused, }; +const TlsVariable = struct { + symbol_index: Symbol.Index, + code: []const u8 = &[0]u8{}, + + fn deinit(tlv: *TlsVariable, allocator: Allocator) void { + allocator.free(tlv.code); + } +}; + const DeclTable = std.AutoHashMapUnmanaged(InternPool.DeclIndex, DeclMetadata); const UnnamedConstTable = std.AutoHashMapUnmanaged(InternPool.DeclIndex, std.ArrayListUnmanaged(Symbol.Index)); const AnonDeclTable = std.AutoHashMapUnmanaged(InternPool.Index, DeclMetadata); const LazySymbolTable = std.AutoArrayHashMapUnmanaged(InternPool.OptionalDeclIndex, LazySymbolMetadata); const RelocationTable = std.ArrayListUnmanaged(std.ArrayListUnmanaged(Relocation)); +const TlsTable = std.AutoArrayHashMapUnmanaged(Atom.Index, TlsVariable); const assert = std.debug.assert; const builtin = @import("builtin"); From 080ad94249101ed167c06761eebbaf1f5d47cfa9 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 20 Jan 2024 21:53:40 +0100 Subject: [PATCH 114/133] x86_64: save TLS to stack for MachO --- src/arch/x86_64/CodeGen.zig | 41 +++++++++++++++++++------------------ 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index 2cc5fe267e..f427e284e6 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -16051,26 +16051,27 @@ fn resolveInst(self: *Self, ref: Air.Inst.Ref) InnerError!MCValue { if (!gop.found_existing) gop.value_ptr.* = InstTracking.init(init: { const const_mcv = try self.genTypedValue(.{ .ty = ty, .val = Value.fromInterned(ip_index) }); switch (const_mcv) { - .lea_tlv => |tlv_sym| if (self.bin_file.cast(link.File.Elf)) |_| { - if (self.mod.pic) { - try self.spillRegisters(&.{ .rdi, .rax }); - } else { - try self.spillRegisters(&.{.rax}); - } - const frame_index = try self.allocFrameIndex(FrameAlloc.init(.{ - .size = 8, - .alignment = .@"8", - })); - try self.genSetMem( - .{ .frame = frame_index }, - 0, - Type.usize, - .{ .lea_symbol = .{ .sym = tlv_sym } }, - ); - break :init .{ .load_frame = .{ .index = frame_index } }; - } else if (self.bin_file.cast(link.File.MachO)) |_| { - return self.fail("TODO implement saving TLV variable to stack", .{}); - } else break :init const_mcv, + .lea_tlv => |tlv_sym| switch (self.bin_file.tag) { + .elf, .macho => { + if (self.mod.pic) { + try self.spillRegisters(&.{ .rdi, .rax }); + } else { + try self.spillRegisters(&.{.rax}); + } + const frame_index = try self.allocFrameIndex(FrameAlloc.init(.{ + .size = 8, + .alignment = .@"8", + })); + try self.genSetMem( + .{ .frame = frame_index }, + 0, + Type.usize, + .{ .lea_symbol = .{ .sym = tlv_sym } }, + ); + break :init .{ .load_frame = .{ .index = frame_index } }; + }, + else => break :init const_mcv, + }, else => break :init const_mcv, } }); From 5c4db4e5787eb534c8a83a6f7fc8f0fcfb1d01ef Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 20 Jan 2024 22:31:50 +0100 Subject: [PATCH 115/133] x86_64: emit MachO TLV sequence --- src/arch/x86_64/Emit.zig | 3 ++- src/arch/x86_64/Lower.zig | 22 ++++++++++++++++++---- src/link/MachO/ZigObject.zig | 24 +++++++++++++++++------- 3 files changed, 37 insertions(+), 12 deletions(-) diff --git a/src/arch/x86_64/Emit.zig b/src/arch/x86_64/Emit.zig index 97899f224d..35d6935d77 100644 --- a/src/arch/x86_64/Emit.zig +++ b/src/arch/x86_64/Emit.zig @@ -166,6 +166,8 @@ pub fn emitMir(emit: *Emit) Error!void { .zig_got_load else if (sym.flags.needs_got) .got_load + else if (sym.flags.tlv) + .tlv else .signed; try atom.addReloc(macho_file, .{ @@ -185,7 +187,6 @@ pub fn emitMir(emit: *Emit) Error!void { .linker_got, .linker_direct, .linker_import, - .linker_tlv, => |symbol| if (emit.lower.bin_file.cast(link.File.Elf)) |_| { unreachable; } else if (emit.lower.bin_file.cast(link.File.MachO)) |_| { diff --git a/src/arch/x86_64/Lower.zig b/src/arch/x86_64/Lower.zig index 0b48afe0c6..5271b3e93c 100644 --- a/src/arch/x86_64/Lower.zig +++ b/src/arch/x86_64/Lower.zig @@ -62,7 +62,6 @@ pub const Reloc = struct { linker_got: bits.Symbol, linker_direct: bits.Symbol, linker_import: bits.Symbol, - linker_tlv: bits.Symbol, }; }; @@ -428,7 +427,23 @@ fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand) const macho_sym = macho_file.getSymbol(sym_index); if (macho_sym.flags.tlv) { - @panic("TODO lower TLS access on macOS"); + _ = lower.reloc(.{ .linker_reloc = sym }); + lower.result_insts[lower.result_insts_len] = + try Instruction.new(.none, .mov, &[_]Operand{ + .{ .reg = .rdi }, + .{ .mem = Memory.rip(mem_op.sib.ptr_size, 0) }, + }); + lower.result_insts_len += 1; + lower.result_insts[lower.result_insts_len] = + try Instruction.new(.none, .call, &[_]Operand{ + .{ .mem = Memory.sib(.qword, .{ .base = .{ .reg = .rdi } }) }, + }); + lower.result_insts_len += 1; + emit_mnemonic = .lea; + break :op .{ .mem = Memory.sib(mem_op.sib.ptr_size, .{ + .base = .{ .reg = .rax }, + .disp = std.math.minInt(i32), + }) }; } _ = lower.reloc(.{ .linker_reloc = sym }); @@ -594,14 +609,13 @@ fn generic(lower: *Lower, inst: Mir.Inst) Error!void { .extern_fn_reloc => &.{ .{ .imm = lower.reloc(.{ .linker_extern_fn = inst.data.reloc }) }, }, - .got_reloc, .direct_reloc, .import_reloc, .tlv_reloc => ops: { + .got_reloc, .direct_reloc, .import_reloc => ops: { const reg = inst.data.rx.r1; const extra = lower.mir.extraData(bits.Symbol, inst.data.rx.payload).data; _ = lower.reloc(switch (inst.ops) { .got_reloc => .{ .linker_got = extra }, .direct_reloc => .{ .linker_direct = extra }, .import_reloc => .{ .linker_import = extra }, - .tlv_reloc => .{ .linker_tlv = extra }, else => unreachable, }); break :ops &.{ diff --git a/src/link/MachO/ZigObject.zig b/src/link/MachO/ZigObject.zig index 976a3236ae..fc9ce86762 100644 --- a/src/link/MachO/ZigObject.zig +++ b/src/link/MachO/ZigObject.zig @@ -141,13 +141,23 @@ pub fn getAtomDataAlloc(self: ZigObject, macho_file: *MachO, atom: Atom) ![]u8 { const gpa = macho_file.base.comp.gpa; assert(atom.file == self.index); const sect = macho_file.sections.items(.header)[atom.out_n_sect]; - const file_offset = sect.offset + atom.value - sect.addr; - const size = std.math.cast(usize, atom.size) orelse return error.Overflow; - const code = try gpa.alloc(u8, size); - errdefer gpa.free(code); - const amt = try macho_file.base.file.?.preadAll(code, file_offset); - if (amt != code.len) return error.InputOutput; - return code; + + switch (sect.type()) { + macho.S_THREAD_LOCAL_REGULAR => { + const tlv = self.tls_variables.get(atom.atom_index).?; + const code = try gpa.dupe(u8, tlv.code); + return code; + }, + else => { + const file_offset = sect.offset + atom.value - sect.addr; + const size = std.math.cast(usize, atom.size) orelse return error.Overflow; + const code = try gpa.alloc(u8, size); + errdefer gpa.free(code); + const amt = try macho_file.base.file.?.preadAll(code, file_offset); + if (amt != code.len) return error.InputOutput; + return code; + }, + } } pub fn getAtomRelocs(self: *ZigObject, atom: Atom) []const Relocation { From 411c7f6669ed2eb758f371dfde59e03abf05aa0a Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 20 Jan 2024 23:45:32 +0100 Subject: [PATCH 116/133] macho: fix wrong symbol type for TLV vars --- src/link/MachO/ZigObject.zig | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/link/MachO/ZigObject.zig b/src/link/MachO/ZigObject.zig index fc9ce86762..4b08131f76 100644 --- a/src/link/MachO/ZigObject.zig +++ b/src/link/MachO/ZigObject.zig @@ -757,12 +757,14 @@ fn updateTlv( sym.out_n_sect = sect_index; atom.out_n_sect = sect_index; + sym.value = 0; sym.name = try macho_file.strings.insert(gpa, decl_name); atom.flags.alive = true; atom.name = sym.name; nlist.n_strx = sym.name; nlist.n_sect = sect_index + 1; - nlist.n_type = macho.N_EXT; + nlist.n_type = macho.N_SECT; + nlist.n_value = 0; self.symtab.items(.size)[sym.nlist_idx] = code.len; atom.alignment = required_alignment; From 3a6410959ca6df6f020547c58845730753dc9e97 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sun, 21 Jan 2024 10:32:06 +0100 Subject: [PATCH 117/133] macho: actually lower TLS variables --- src/arch/x86_64/Lower.zig | 3 +- src/link/MachO.zig | 17 ++- src/link/MachO/ZigObject.zig | 168 +++++++++++++++++++++++----- src/link/MachO/dyld_info/Rebase.zig | 4 +- src/link/MachO/dyld_info/bind.zig | 8 +- 5 files changed, 163 insertions(+), 37 deletions(-) diff --git a/src/arch/x86_64/Lower.zig b/src/arch/x86_64/Lower.zig index 5271b3e93c..34e6a02f71 100644 --- a/src/arch/x86_64/Lower.zig +++ b/src/arch/x86_64/Lower.zig @@ -439,10 +439,9 @@ fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand) .{ .mem = Memory.sib(.qword, .{ .base = .{ .reg = .rdi } }) }, }); lower.result_insts_len += 1; - emit_mnemonic = .lea; + emit_mnemonic = .mov; break :op .{ .mem = Memory.sib(mem_op.sib.ptr_size, .{ .base = .{ .reg = .rax }, - .disp = std.math.minInt(i32), }) }; } diff --git a/src/link/MachO.zig b/src/link/MachO.zig index cba79e1262..9a4bdfcc86 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -606,7 +606,10 @@ pub fn flushModule(self: *MachO, arena: Allocator, prog_node: *std.Progress.Node if (!atom.flags.alive) continue; const sect = &self.sections.items(.header)[atom.out_n_sect]; if (sect.isZerofill()) continue; - const code = zo.getAtomDataAlloc(self, atom.*) catch |err| switch (err) { + if (mem.indexOf(u8, sect.segName(), "ZIG") == null) continue; // Non-Zig sections are handled separately + // TODO: we will resolve and write ZigObject's TLS data twice: + // once here, and once in writeAtoms + const code = zo.getAtomDataAlloc(self, gpa, atom.*) catch |err| switch (err) { error.InputOutput => { try self.reportUnexpectedError("fetching code for '{s}' failed", .{ atom.getName(self), @@ -1806,7 +1809,7 @@ fn initOutputSections(self: *MachO) !void { .aarch64 => 2, else => unreachable, }, - .flags = macho.S_SYMBOL_STUBS | + .flags = macho.S_REGULAR | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS, }); } @@ -2545,6 +2548,9 @@ fn writeAtoms(self: *MachO) !void { defer tracy.end(); const gpa = self.base.comp.gpa; + var arena = std.heap.ArenaAllocator.init(gpa); + defer arena.deinit(); + const cpu_arch = self.getTarget().cpu.arch; const slice = self.sections.slice(); @@ -2562,7 +2568,12 @@ fn writeAtoms(self: *MachO) !void { const atom = self.getAtom(atom_index).?; assert(atom.flags.alive); const off = atom.value - header.addr; - @memcpy(buffer[off..][0..atom.size], atom.getFile(self).object.getAtomData(atom.*)); + const data = switch (atom.getFile(self)) { + .object => |x| x.getAtomData(atom.*), + .zig_object => |x| try x.getAtomDataAlloc(self, arena.allocator(), atom.*), + else => unreachable, + }; + @memcpy(buffer[off..][0..atom.size], data); atom.resolveRelocs(self, buffer[off..][0..atom.size]) catch |err| switch (err) { error.ResolveFailed => has_resolve_error = true, else => |e| return e, diff --git a/src/link/MachO/ZigObject.zig b/src/link/MachO/ZigObject.zig index 4b08131f76..5c1fc9971f 100644 --- a/src/link/MachO/ZigObject.zig +++ b/src/link/MachO/ZigObject.zig @@ -38,8 +38,8 @@ unnamed_consts: UnnamedConstTable = .{}, /// Table of tracked AnonDecls. anon_decls: AnonDeclTable = .{}, -/// TLS variables indexed by Atom.Index. -tls_variables: TlsTable = .{}, +/// TLV initializers indexed by Atom.Index. +tlv_initializers: TlvInitializerTable = .{}, /// A table of relocations. relocs: RelocationTable = .{}, @@ -91,10 +91,10 @@ pub fn deinit(self: *ZigObject, allocator: Allocator) void { } self.relocs.deinit(allocator); - for (self.tls_variables.values()) |*tlv| { - tlv.deinit(allocator); + for (self.tlv_initializers.values()) |*tlv_init| { + tlv_init.deinit(allocator); } - self.tls_variables.deinit(allocator); + self.tlv_initializers.deinit(allocator); } fn addNlist(self: *ZigObject, allocator: Allocator) !Symbol.Index { @@ -137,25 +137,35 @@ pub fn addAtom(self: *ZigObject, macho_file: *MachO) !Symbol.Index { } /// Caller owns the memory. -pub fn getAtomDataAlloc(self: ZigObject, macho_file: *MachO, atom: Atom) ![]u8 { - const gpa = macho_file.base.comp.gpa; +pub fn getAtomDataAlloc( + self: ZigObject, + macho_file: *MachO, + allocator: Allocator, + atom: Atom, +) ![]u8 { assert(atom.file == self.index); const sect = macho_file.sections.items(.header)[atom.out_n_sect]; + assert(!sect.isZerofill()); switch (sect.type()) { macho.S_THREAD_LOCAL_REGULAR => { - const tlv = self.tls_variables.get(atom.atom_index).?; - const code = try gpa.dupe(u8, tlv.code); - return code; + const tlv = self.tlv_initializers.get(atom.atom_index).?; + const data = try allocator.dupe(u8, tlv.data); + return data; + }, + macho.S_THREAD_LOCAL_VARIABLES => { + const data = try allocator.alloc(u8, atom.size); + @memset(data, 0); + return data; }, else => { const file_offset = sect.offset + atom.value - sect.addr; const size = std.math.cast(usize, atom.size) orelse return error.Overflow; - const code = try gpa.alloc(u8, size); - errdefer gpa.free(code); - const amt = try macho_file.base.file.?.preadAll(code, file_offset); - if (amt != code.len) return error.InputOutput; - return code; + const data = try allocator.alloc(u8, size); + errdefer allocator.free(data); + const amt = try macho_file.base.file.?.preadAll(data, file_offset); + if (amt != data.len) return error.InputOutput; + return data; }, } } @@ -421,7 +431,7 @@ pub fn lowerAnonDecl( self: *ZigObject, macho_file: *MachO, decl_val: InternPool.Index, - explicit_alignment: InternPool.Alignment, + explicit_alignment: Atom.Alignment, src_loc: Module.SrcLoc, ) !codegen.Result { const gpa = macho_file.base.comp.gpa; @@ -732,6 +742,9 @@ fn updateDeclCode( } } +/// Lowering a TLV on macOS involves two stages: +/// 1. first we lower the initializer into appopriate section (__thread_data or __thread_bss) +/// 2. next, we create a corresponding threadlocal variable descriptor in __thread_vars fn updateTlv( self: *ZigObject, macho_file: *MachO, @@ -740,9 +753,7 @@ fn updateTlv( sect_index: u8, code: []const u8, ) !void { - const comp = macho_file.base.comp; - const gpa = comp.gpa; - const mod = comp.module.?; + const mod = macho_file.base.comp.module.?; const decl = mod.declPtr(decl_index); const decl_name = mod.intern_pool.stringToSlice(try decl.getFullyQualifiedName(mod)); @@ -750,6 +761,32 @@ fn updateTlv( const required_alignment = decl.getAlignment(mod); + // 1. Lower TLV initializer + const init_sym_index = try self.createTlvInitializer( + macho_file, + decl_name, + required_alignment, + sect_index, + code, + ); + + // 2. Create TLV descriptor + try self.createTlvDescriptor(macho_file, sym_index, init_sym_index, decl_name); +} + +fn createTlvInitializer( + self: *ZigObject, + macho_file: *MachO, + name: []const u8, + alignment: Atom.Alignment, + sect_index: u8, + code: []const u8, +) !Symbol.Index { + const gpa = macho_file.base.comp.gpa; + const sym_name = try std.fmt.allocPrint(gpa, "{s}$tlv$init", .{name}); + defer gpa.free(sym_name); + + const sym_index = try self.addAtom(macho_file); const sym = macho_file.getSymbol(sym_index); const nlist = &self.symtab.items(.nlist)[sym.nlist_idx]; const atom = sym.getAtom(macho_file).?; @@ -758,7 +795,7 @@ fn updateTlv( atom.out_n_sect = sect_index; sym.value = 0; - sym.name = try macho_file.strings.insert(gpa, decl_name); + sym.name = try macho_file.strings.insert(gpa, sym_name); atom.flags.alive = true; atom.name = sym.name; nlist.n_strx = sym.name; @@ -767,23 +804,94 @@ fn updateTlv( nlist.n_value = 0; self.symtab.items(.size)[sym.nlist_idx] = code.len; - atom.alignment = required_alignment; + atom.alignment = alignment; atom.size = code.len; const slice = macho_file.sections.slice(); const header = slice.items(.header)[sect_index]; const atoms = &slice.items(.atoms)[sect_index]; - const gop = try self.tls_variables.getOrPut(gpa, atom.atom_index); + const gop = try self.tlv_initializers.getOrPut(gpa, atom.atom_index); assert(!gop.found_existing); // TODO incremental updates gop.value_ptr.* = .{ .symbol_index = sym_index }; // We only store the data for the TLV if it's non-zerofill. if (!header.isZerofill()) { - gop.value_ptr.code = try gpa.dupe(u8, code); + gop.value_ptr.data = try gpa.dupe(u8, code); } try atoms.append(gpa, atom.atom_index); + + return sym_index; +} + +fn createTlvDescriptor( + self: *ZigObject, + macho_file: *MachO, + sym_index: Symbol.Index, + init_sym_index: Symbol.Index, + name: []const u8, +) !void { + const gpa = macho_file.base.comp.gpa; + + const sym = macho_file.getSymbol(sym_index); + const nlist = &self.symtab.items(.nlist)[sym.nlist_idx]; + const atom = sym.getAtom(macho_file).?; + const alignment = Atom.Alignment.fromNonzeroByteUnits(@alignOf(u64)); + const size: u64 = @sizeOf(u64) * 3; + + const sect_index = macho_file.getSectionByName("__DATA", "__thread_vars") orelse + try macho_file.addSection("__DATA", "__thread_vars", .{ + .flags = macho.S_THREAD_LOCAL_VARIABLES, + }); + sym.out_n_sect = sect_index; + atom.out_n_sect = sect_index; + + sym.value = 0; + sym.name = try macho_file.strings.insert(gpa, name); + atom.flags.alive = true; + atom.name = sym.name; + nlist.n_strx = sym.name; + nlist.n_sect = sect_index + 1; + nlist.n_type = macho.N_SECT; + nlist.n_value = 0; + self.symtab.items(.size)[sym.nlist_idx] = size; + + atom.alignment = alignment; + atom.size = size; + + const tlv_bootstrap_index = blk: { + const index = try self.getGlobalSymbol(macho_file, "_tlv_bootstrap", null); + break :blk self.symbols.items[index]; + }; + try atom.addReloc(macho_file, .{ + .tag = .@"extern", + .offset = 0, + .target = tlv_bootstrap_index, + .addend = 0, + .type = .unsigned, + .meta = .{ + .pcrel = false, + .has_subtractor = false, + .length = 3, + .symbolnum = 0, + }, + }); + try atom.addReloc(macho_file, .{ + .tag = .@"extern", + .offset = 16, + .target = init_sym_index, + .addend = 0, + .type = .unsigned, + .meta = .{ + .pcrel = false, + .has_subtractor = false, + .length = 3, + .symbolnum = 0, + }, + }); + + try macho_file.sections.items(.atoms)[sect_index].append(gpa, atom.atom_index); } fn getDeclOutputSection( @@ -888,7 +996,7 @@ fn lowerConst( macho_file: *MachO, name: []const u8, tv: TypedValue, - required_alignment: InternPool.Alignment, + required_alignment: Atom.Alignment, output_section_index: u8, src_loc: Module.SrcLoc, ) !LowerConstResult { @@ -1040,7 +1148,7 @@ fn updateLazySymbol( const gpa = macho_file.base.comp.gpa; const mod = macho_file.base.comp.module.?; - var required_alignment: InternPool.Alignment = .none; + var required_alignment: Atom.Alignment = .none; var code_buffer = std.ArrayList(u8).init(gpa); defer code_buffer.deinit(); @@ -1315,12 +1423,12 @@ const LazySymbolMetadata = struct { const_state: State = .unused, }; -const TlsVariable = struct { +const TlvInitializer = struct { symbol_index: Symbol.Index, - code: []const u8 = &[0]u8{}, + data: []const u8 = &[0]u8{}, - fn deinit(tlv: *TlsVariable, allocator: Allocator) void { - allocator.free(tlv.code); + fn deinit(tlv_init: *TlvInitializer, allocator: Allocator) void { + allocator.free(tlv_init.data); } }; @@ -1329,7 +1437,7 @@ const UnnamedConstTable = std.AutoHashMapUnmanaged(InternPool.DeclIndex, std.Arr const AnonDeclTable = std.AutoHashMapUnmanaged(InternPool.Index, DeclMetadata); const LazySymbolTable = std.AutoArrayHashMapUnmanaged(InternPool.OptionalDeclIndex, LazySymbolMetadata); const RelocationTable = std.ArrayListUnmanaged(std.ArrayListUnmanaged(Relocation)); -const TlsTable = std.AutoArrayHashMapUnmanaged(Atom.Index, TlsVariable); +const TlvInitializerTable = std.AutoArrayHashMapUnmanaged(Atom.Index, TlvInitializer); const assert = std.debug.assert; const builtin = @import("builtin"); diff --git a/src/link/MachO/dyld_info/Rebase.zig b/src/link/MachO/dyld_info/Rebase.zig index ffad0362f9..5209dcd9f4 100644 --- a/src/link/MachO/dyld_info/Rebase.zig +++ b/src/link/MachO/dyld_info/Rebase.zig @@ -3,7 +3,7 @@ const Rebase = @This(); const std = @import("std"); const assert = std.debug.assert; const leb = std.leb; -const log = std.log.scoped(.dyld_info); +const log = std.log.scoped(.link_dyld_info); const macho = std.macho; const testing = std.testing; @@ -39,6 +39,8 @@ pub fn finalize(rebase: *Rebase, gpa: Allocator) !void { const writer = rebase.buffer.writer(gpa); + log.debug("rebase opcodes", .{}); + std.mem.sort(Entry, rebase.entries.items, {}, Entry.lessThan); try setTypePointer(writer); diff --git a/src/link/MachO/dyld_info/bind.zig b/src/link/MachO/dyld_info/bind.zig index cee57e1edf..bb746b05e3 100644 --- a/src/link/MachO/dyld_info/bind.zig +++ b/src/link/MachO/dyld_info/bind.zig @@ -1,7 +1,7 @@ const std = @import("std"); const assert = std.debug.assert; const leb = std.leb; -const log = std.log.scoped(.dyld_info); +const log = std.log.scoped(.link_dyld_info); const macho = std.macho; const testing = std.testing; @@ -48,6 +48,8 @@ pub const Bind = struct { const writer = self.buffer.writer(gpa); + log.debug("bind opcodes", .{}); + std.mem.sort(Entry, self.entries.items, ctx, Entry.lessThan); var start: usize = 0; @@ -201,6 +203,8 @@ pub const WeakBind = struct { const writer = self.buffer.writer(gpa); + log.debug("weak bind opcodes", .{}); + std.mem.sort(Entry, self.entries.items, ctx, Entry.lessThan); var start: usize = 0; @@ -348,6 +352,8 @@ pub const LazyBind = struct { var cwriter = std.io.countingWriter(self.buffer.writer(gpa)); const writer = cwriter.writer(); + log.debug("lazy bind opcodes", .{}); + var addend: i64 = 0; for (self.entries.items) |entry| { From 060406a52665e9c70012a7148757bd6d9797cf34 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sun, 21 Jan 2024 10:54:03 +0100 Subject: [PATCH 118/133] macho: ensure we zero-out regions after copying them over This is to ensure that the loader correctly zeroes-out zerofill sections when mapping them. For context, Apple's loader dyld will map the regions where any zerofill would theoretically reside as belonging to zerofill section. --- src/arch/x86_64/Lower.zig | 4 +--- src/link/MachO.zig | 21 +++++++++++++++------ 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/src/arch/x86_64/Lower.zig b/src/arch/x86_64/Lower.zig index 34e6a02f71..4e9c37e5aa 100644 --- a/src/arch/x86_64/Lower.zig +++ b/src/arch/x86_64/Lower.zig @@ -440,9 +440,7 @@ fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand) }); lower.result_insts_len += 1; emit_mnemonic = .mov; - break :op .{ .mem = Memory.sib(mem_op.sib.ptr_size, .{ - .base = .{ .reg = .rax }, - }) }; + break :op .{ .reg = .rax }; } _ = lower.reloc(.{ .linker_reloc = sym }); diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 9a4bdfcc86..e561b64cf2 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -2346,9 +2346,7 @@ fn allocateSections(self: *MachO) !void { new_offset + existing_size, }); - const amt = try self.base.file.?.copyRangeAll(header.offset, self.base.file.?, new_offset, existing_size); - // TODO figure out what to about this error condition - how to communicate it up. - if (amt != existing_size) return error.InputOutput; + try self.copyRangeAllZeroOut(header.offset, new_offset, existing_size); header.offset = @intCast(new_offset); header.size = existing_size; @@ -3268,6 +3266,19 @@ fn findFreeSpace(self: *MachO, object_size: u64, min_alignment: u32) u64 { return start; } +/// Like File.copyRangeAll but also ensures the source region is zeroed out after copy. +/// This is so that we guarantee zeroed out regions for mapping of zerofill sections by the loader. +fn copyRangeAllZeroOut(self: *MachO, old_offset: u64, new_offset: u64, size: u64) !void { + const gpa = self.base.comp.gpa; + const file = self.base.file.?; + const amt = try file.copyRangeAll(old_offset, file, new_offset, size); + if (amt != size) return error.InputOutput; + const zeroes = try gpa.alloc(u8, size); + defer gpa.free(zeroes); + @memset(zeroes, 0); + try file.pwriteAll(zeroes, old_offset); +} + const InitMetadataOptions = struct { symbol_count_hint: u64, program_code_size_hint: u64, @@ -3408,9 +3419,7 @@ pub fn growSection(self: *MachO, sect_index: u8, needed_size: u64) !void { new_offset + existing_size, }); - const amt = try self.base.file.?.copyRangeAll(sect.offset, self.base.file.?, new_offset, existing_size); - // TODO figure out what to about this error condition - how to communicate it up. - if (amt != existing_size) return error.InputOutput; + try self.copyRangeAllZeroOut(sect.offset, new_offset, existing_size); sect.offset = @intCast(new_offset); seg.fileoff = new_offset; From ead02378143ed3fb160438d7bfbb1c5718580ff5 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sun, 21 Jan 2024 19:35:07 +0100 Subject: [PATCH 119/133] build: bump maxrss --- build.zig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.zig b/build.zig index 1d44c249cc..e452c3b622 100644 --- a/build.zig +++ b/build.zig @@ -623,7 +623,7 @@ fn addCompilerStep(b: *std.Build, options: AddCompilerStepOptions) *std.Build.St .root_source_file = .{ .path = "src/main.zig" }, .target = options.target, .optimize = options.optimize, - .max_rss = 7_000_000_000, + .max_rss = 7_100_000_000, .strip = options.strip, .sanitize_thread = options.sanitize_thread, .single_threaded = options.single_threaded, From 67ea039426751bc8326c5835edca55ecf20dc66e Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sun, 21 Jan 2024 20:11:51 +0100 Subject: [PATCH 120/133] macho: do not enforce platform check for now --- src/link/MachO/Dylib.zig | 18 ++++++++++-------- src/link/MachO/Object.zig | 18 ++++++++++-------- 2 files changed, 20 insertions(+), 16 deletions(-) diff --git a/src/link/MachO/Dylib.zig b/src/link/MachO/Dylib.zig index 0df66ffd71..0f4ee09e78 100644 --- a/src/link/MachO/Dylib.zig +++ b/src/link/MachO/Dylib.zig @@ -113,14 +113,16 @@ pub fn parse(self: *Dylib, macho_file: *MachO) !void { }); return error.InvalidTarget; } - if (macho_file.platform.version.order(platform.version) == .lt) { - try macho_file.reportParseError2(self.index, "object file built for newer platform: {}: {} < {}", .{ - macho_file.platform.fmtTarget(macho_file.getTarget().cpu.arch), - macho_file.platform.version, - platform.version, - }); - return error.InvalidTarget; - } + // TODO: this can cause the CI to fail so I'm commenting this check out so that + // I can work out the rest of the changes first + // if (macho_file.platform.version.order(platform.version) == .lt) { + // try macho_file.reportParseError2(self.index, "object file built for newer platform: {}: {} < {}", .{ + // macho_file.platform.fmtTarget(macho_file.getTarget().cpu.arch), + // macho_file.platform.version, + // platform.version, + // }); + // return error.InvalidTarget; + // } } } diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index 11846a66d3..367b7bc8fa 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -173,14 +173,16 @@ pub fn parse(self: *Object, macho_file: *MachO) !void { }); return error.InvalidTarget; } - if (macho_file.platform.version.order(platform.version) == .lt) { - try macho_file.reportParseError2(self.index, "object file built for newer platform: {}: {} < {}", .{ - macho_file.platform.fmtTarget(macho_file.getTarget().cpu.arch), - macho_file.platform.version, - platform.version, - }); - return error.InvalidTarget; - } + // TODO: this causes the CI to fail so I'm commenting this check out so that + // I can work out the rest of the changes first + // if (macho_file.platform.version.order(platform.version) == .lt) { + // try macho_file.reportParseError2(self.index, "object file built for newer platform: {}: {} < {}", .{ + // macho_file.platform.fmtTarget(macho_file.getTarget().cpu.arch), + // macho_file.platform.version, + // platform.version, + // }); + // return error.InvalidTarget; + // } } try self.initDwarfInfo(macho_file); From 06224c23b7ac3b4e31c6f63898da4107cbe918a8 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sun, 21 Jan 2024 21:12:15 +0100 Subject: [PATCH 121/133] macho: fix 32bit compilation issues --- src/link/MachO.zig | 47 ++++++++++++++++++----------- src/link/MachO/Atom.zig | 2 +- src/link/MachO/DwarfInfo.zig | 23 ++++++++------ src/link/MachO/Dylib.zig | 4 +-- src/link/MachO/Object.zig | 46 +++++++++++++++------------- src/link/MachO/UnwindInfo.zig | 6 ++-- src/link/MachO/ZigObject.zig | 3 +- src/link/MachO/dyld_info/Rebase.zig | 2 +- src/link/MachO/dyld_info/bind.zig | 2 +- src/link/MachO/file.zig | 2 +- src/link/MachO/hasher.zig | 14 ++++++--- src/link/MachO/relocatable.zig | 16 ++++++---- 12 files changed, 98 insertions(+), 69 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index e561b64cf2..00e6b7f235 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -2557,7 +2557,8 @@ fn writeAtoms(self: *MachO) !void { if (atoms.items.len == 0) continue; if (header.isZerofill()) continue; - const buffer = try gpa.alloc(u8, header.size); + const size = math.cast(usize, header.size) orelse return error.Overflow; + const buffer = try gpa.alloc(u8, size); defer gpa.free(buffer); const padding_byte: u8 = if (header.isCode() and cpu_arch == .x86_64) 0xcc else 0; @memset(buffer, padding_byte); @@ -2565,14 +2566,15 @@ fn writeAtoms(self: *MachO) !void { for (atoms.items) |atom_index| { const atom = self.getAtom(atom_index).?; assert(atom.flags.alive); - const off = atom.value - header.addr; + const off = math.cast(usize, atom.value - header.addr) orelse return error.Overflow; const data = switch (atom.getFile(self)) { - .object => |x| x.getAtomData(atom.*), + .object => |x| try x.getAtomData(atom.*), .zig_object => |x| try x.getAtomDataAlloc(self, arena.allocator(), atom.*), else => unreachable, }; - @memcpy(buffer[off..][0..atom.size], data); - atom.resolveRelocs(self, buffer[off..][0..atom.size]) catch |err| switch (err) { + const atom_size = math.cast(usize, atom.size) orelse return error.Overflow; + @memcpy(buffer[off..][0..atom_size], data); + atom.resolveRelocs(self, buffer[off..][0..atom_size]) catch |err| switch (err) { error.ResolveFailed => has_resolve_error = true, else => |e| return e, }; @@ -2602,7 +2604,8 @@ fn writeUnwindInfo(self: *MachO) !void { if (self.eh_frame_sect_index) |index| { const header = self.sections.items(.header)[index]; - const buffer = try gpa.alloc(u8, header.size); + const size = math.cast(usize, header.size) orelse return error.Overflow; + const buffer = try gpa.alloc(u8, size); defer gpa.free(buffer); eh_frame.write(self, buffer); try self.base.file.?.pwriteAll(buffer, header.offset); @@ -2610,7 +2613,8 @@ fn writeUnwindInfo(self: *MachO) !void { if (self.unwind_info_sect_index) |index| { const header = self.sections.items(.header)[index]; - const buffer = try gpa.alloc(u8, header.size); + const size = math.cast(usize, header.size) orelse return error.Overflow; + const buffer = try gpa.alloc(u8, size); defer gpa.free(buffer); try self.unwind_info.write(self, buffer); try self.base.file.?.pwriteAll(buffer, header.offset); @@ -2637,7 +2641,8 @@ fn writeSyntheticSections(self: *MachO) !void { if (self.got_sect_index) |sect_id| { const header = self.sections.items(.header)[sect_id]; - var buffer = try std.ArrayList(u8).initCapacity(gpa, header.size); + const size = math.cast(usize, header.size) orelse return error.Overflow; + var buffer = try std.ArrayList(u8).initCapacity(gpa, size); defer buffer.deinit(); try self.got.write(self, buffer.writer()); assert(buffer.items.len == header.size); @@ -2646,7 +2651,8 @@ fn writeSyntheticSections(self: *MachO) !void { if (self.stubs_sect_index) |sect_id| { const header = self.sections.items(.header)[sect_id]; - var buffer = try std.ArrayList(u8).initCapacity(gpa, header.size); + const size = math.cast(usize, header.size) orelse return error.Overflow; + var buffer = try std.ArrayList(u8).initCapacity(gpa, size); defer buffer.deinit(); try self.stubs.write(self, buffer.writer()); assert(buffer.items.len == header.size); @@ -2655,7 +2661,8 @@ fn writeSyntheticSections(self: *MachO) !void { if (self.stubs_helper_sect_index) |sect_id| { const header = self.sections.items(.header)[sect_id]; - var buffer = try std.ArrayList(u8).initCapacity(gpa, header.size); + const size = math.cast(usize, header.size) orelse return error.Overflow; + var buffer = try std.ArrayList(u8).initCapacity(gpa, size); defer buffer.deinit(); try self.stubs_helper.write(self, buffer.writer()); assert(buffer.items.len == header.size); @@ -2664,7 +2671,8 @@ fn writeSyntheticSections(self: *MachO) !void { if (self.la_symbol_ptr_sect_index) |sect_id| { const header = self.sections.items(.header)[sect_id]; - var buffer = try std.ArrayList(u8).initCapacity(gpa, header.size); + const size = math.cast(usize, header.size) orelse return error.Overflow; + var buffer = try std.ArrayList(u8).initCapacity(gpa, size); defer buffer.deinit(); try self.la_symbol_ptr.write(self, buffer.writer()); assert(buffer.items.len == header.size); @@ -2673,7 +2681,8 @@ fn writeSyntheticSections(self: *MachO) !void { if (self.tlv_ptr_sect_index) |sect_id| { const header = self.sections.items(.header)[sect_id]; - var buffer = try std.ArrayList(u8).initCapacity(gpa, header.size); + const size = math.cast(usize, header.size) orelse return error.Overflow; + var buffer = try std.ArrayList(u8).initCapacity(gpa, size); defer buffer.deinit(); try self.tlv_ptr.write(self, buffer.writer()); assert(buffer.items.len == header.size); @@ -2682,7 +2691,8 @@ fn writeSyntheticSections(self: *MachO) !void { if (self.objc_stubs_sect_index) |sect_id| { const header = self.sections.items(.header)[sect_id]; - var buffer = try std.ArrayList(u8).initCapacity(gpa, header.size); + const size = math.cast(usize, header.size) orelse return error.Overflow; + var buffer = try std.ArrayList(u8).initCapacity(gpa, size); defer buffer.deinit(); try self.objc_stubs.write(self, buffer.writer()); assert(buffer.items.len == header.size); @@ -2876,10 +2886,10 @@ pub fn writeSymtab(self: *MachO, off: u32) !u32 { zo.writeSymtab(self); } for (self.objects.items) |index| { - self.getFile(index).?.writeSymtab(self); + try self.getFile(index).?.writeSymtab(self); } for (self.dylibs.items) |index| { - self.getFile(index).?.writeSymtab(self); + try self.getFile(index).?.writeSymtab(self); } if (self.getInternalObject()) |internal| { internal.writeSymtab(self); @@ -2916,7 +2926,7 @@ pub fn writeStrtab(self: *MachO, off: u32) !u32 { return off + cmd.strsize; } -fn writeLoadCommands(self: *MachO) !struct { usize, usize, usize } { +fn writeLoadCommands(self: *MachO) !struct { usize, usize, u64 } { const gpa = self.base.comp.gpa; const needed_size = load_commands.calcLoadCommandsSize(self, false); const buffer = try gpa.alloc(u8, needed_size); @@ -3075,7 +3085,7 @@ fn writeHeader(self: *MachO, ncmds: usize, sizeofcmds: usize) !void { try self.base.file.?.pwriteAll(mem.asBytes(&header), 0); } -fn writeUuid(self: *MachO, uuid_cmd_offset: usize, has_codesig: bool) !void { +fn writeUuid(self: *MachO, uuid_cmd_offset: u64, has_codesig: bool) !void { const file_size = if (!has_codesig) blk: { const seg = self.getLinkeditSegment(); break :blk seg.fileoff + seg.filesize; @@ -3273,7 +3283,8 @@ fn copyRangeAllZeroOut(self: *MachO, old_offset: u64, new_offset: u64, size: u64 const file = self.base.file.?; const amt = try file.copyRangeAll(old_offset, file, new_offset, size); if (amt != size) return error.InputOutput; - const zeroes = try gpa.alloc(u8, size); + const size_u = math.cast(usize, size) orelse return error.Overflow; + const zeroes = try gpa.alloc(u8, size_u); defer gpa.free(zeroes); @memset(zeroes, 0); try file.pwriteAll(zeroes, old_offset); diff --git a/src/link/MachO/Atom.zig b/src/link/MachO/Atom.zig index c8564c95d8..5f6671c493 100644 --- a/src/link/MachO/Atom.zig +++ b/src/link/MachO/Atom.zig @@ -579,7 +579,7 @@ fn resolveRelocInner( writer: anytype, ) ResolveError!void { const cpu_arch = macho_file.getTarget().cpu.arch; - const rel_offset = rel.offset - self.off; + const rel_offset = math.cast(usize, rel.offset - self.off) orelse return error.Overflow; const seg_id = macho_file.sections.items(.segment_id)[self.out_n_sect]; const seg = macho_file.segments.items[seg_id]; const P = @as(i64, @intCast(self.value)) + @as(i64, @intCast(rel_offset)); diff --git a/src/link/MachO/DwarfInfo.zig b/src/link/MachO/DwarfInfo.zig index 8b32faa567..036738225d 100644 --- a/src/link/MachO/DwarfInfo.zig +++ b/src/link/MachO/DwarfInfo.zig @@ -20,7 +20,7 @@ pub fn deinit(dw: *DwarfInfo, allocator: Allocator) void { dw.compile_units.deinit(allocator); } -fn getString(dw: DwarfInfo, off: u64) [:0]const u8 { +fn getString(dw: DwarfInfo, off: usize) [:0]const u8 { assert(off < dw.debug_str.len); return mem.sliceTo(@as([*:0]const u8, @ptrCast(dw.debug_str.ptr + off)), 0); } @@ -144,9 +144,9 @@ fn parseDie( try cu.diePtr(die).values.ensureTotalCapacityPrecise(allocator, decl.attrs.values().len); for (decl.attrs.values()) |attr| { - const start = creader.bytes_read; + const start = std.math.cast(usize, creader.bytes_read) orelse return error.Overflow; try advanceByFormSize(cu, attr.form, creader); - const end = creader.bytes_read; + const end = std.math.cast(usize, creader.bytes_read) orelse return error.Overflow; cu.diePtr(die).values.appendAssumeCapacity(data[start..end]); } @@ -184,14 +184,16 @@ fn advanceByFormSize(cu: *CompileUnit, form: Form, creader: anytype) !void { dwarf.FORM.block => try leb.readULEB128(u64, reader), else => unreachable, }; - for (0..len) |_| { + var i: u64 = 0; + while (i < len) : (i += 1) { _ = try reader.readByte(); } }, dwarf.FORM.exprloc => { const len = try leb.readULEB128(u64, reader); - for (0..len) |_| { + var i: u64 = 0; + while (i < len) : (i += 1) { _ = try reader.readByte(); } }, @@ -292,7 +294,7 @@ pub const CompileUnitHeader = struct { pub const CompileUnit = struct { header: CompileUnitHeader, - pos: usize, + pos: u64, dies: std.ArrayListUnmanaged(Die) = .{}, children: std.ArrayListUnmanaged(Die.Index) = .{}, @@ -314,14 +316,14 @@ pub const CompileUnit = struct { return &cu.dies.items[index]; } - pub fn getCompileDir(cu: CompileUnit, ctx: DwarfInfo) ?[:0]const u8 { + pub fn getCompileDir(cu: CompileUnit, ctx: DwarfInfo) error{Overflow}!?[:0]const u8 { assert(cu.dies.items.len > 0); const die = cu.dies.items[0]; const res = die.find(dwarf.AT.comp_dir, cu, ctx) orelse return null; return res.getString(cu.header.format, ctx); } - pub fn getSourceFile(cu: CompileUnit, ctx: DwarfInfo) ?[:0]const u8 { + pub fn getSourceFile(cu: CompileUnit, ctx: DwarfInfo) error{Overflow}!?[:0]const u8 { assert(cu.dies.items.len > 0); const die = cu.dies.items[0]; const res = die.find(dwarf.AT.name, cu, ctx) orelse return null; @@ -370,7 +372,7 @@ pub const DieValue = struct { }; } - pub fn getString(value: DieValue, format: Format, ctx: DwarfInfo) ?[:0]const u8 { + pub fn getString(value: DieValue, format: Format, ctx: DwarfInfo) error{Overflow}!?[:0]const u8 { switch (value.attr.form) { dwarf.FORM.string => { return mem.sliceTo(@as([*:0]const u8, @ptrCast(value.bytes.ptr)), 0); @@ -380,7 +382,8 @@ pub const DieValue = struct { .dwarf64 => mem.readInt(u64, value.bytes[0..8], .little), .dwarf32 => mem.readInt(u32, value.bytes[0..4], .little), }; - return ctx.getString(off); + const off_u = std.math.cast(usize, off) orelse return error.Overflow; + return ctx.getString(off_u); }, else => return null, } diff --git a/src/link/MachO/Dylib.zig b/src/link/MachO/Dylib.zig index 0f4ee09e78..363ec2e3f9 100644 --- a/src/link/MachO/Dylib.zig +++ b/src/link/MachO/Dylib.zig @@ -139,7 +139,7 @@ const TrieIterator = struct { var creader = std.io.countingReader(stream.reader()); const reader = creader.reader(); const value = try std.leb.readULEB128(u64, reader); - it.pos += creader.bytes_read; + it.pos += math.cast(usize, creader.bytes_read) orelse return error.Overflow; return value; } @@ -212,7 +212,7 @@ fn parseTrieNode( const off = try it.readULEB128(); const prefix_label = try std.fmt.allocPrint(arena, "{s}{s}", .{ prefix, label }); const curr = it.pos; - it.pos = off; + it.pos = math.cast(usize, off) orelse return error.Overflow; try self.parseTrieNode(it, allocator, arena, prefix_label); it.pos = curr; } diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index 367b7bc8fa..3d1984e43f 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -632,7 +632,7 @@ fn initEhFrameRecords(self: *Object, sect_id: u8, macho_file: *MachO) !void { const sect = slice.items(.header)[sect_id]; const relocs = slice.items(.relocs)[sect_id]; - const data = self.getSectionData(sect_id); + const data = try self.getSectionData(sect_id); try self.eh_frame_data.ensureTotalCapacityPrecise(gpa, data.len); self.eh_frame_data.appendSliceAssumeCapacity(data); @@ -733,7 +733,7 @@ fn initUnwindRecords(self: *Object, sect_id: u8, macho_file: *MachO) !void { }; const gpa = macho_file.base.comp.gpa; - const data = self.getSectionData(sect_id); + const data = try self.getSectionData(sect_id); const nrecs = @divExact(data.len, @sizeOf(macho.compact_unwind_entry)); const recs = @as([*]align(1) const macho.compact_unwind_entry, @ptrCast(data.ptr))[0..nrecs]; const sym_lookup = SymbolLookup{ .ctx = self }; @@ -974,9 +974,9 @@ fn initDwarfInfo(self: *Object, macho_file: *MachO) !void { if (debug_info_index == null or debug_abbrev_index == null) return; var dwarf_info = DwarfInfo{ - .debug_info = self.getSectionData(@intCast(debug_info_index.?)), - .debug_abbrev = self.getSectionData(@intCast(debug_abbrev_index.?)), - .debug_str = if (debug_str_index) |index| self.getSectionData(@intCast(index)) else "", + .debug_info = try self.getSectionData(@intCast(debug_info_index.?)), + .debug_abbrev = try self.getSectionData(@intCast(debug_abbrev_index.?)), + .debug_str = if (debug_str_index) |index| try self.getSectionData(@intCast(index)) else "", }; dwarf_info.init(gpa) catch { try macho_file.reportParseError2(self.index, "invalid __DWARF info found", .{}); @@ -1203,15 +1203,15 @@ pub fn calcSymtabSize(self: *Object, macho_file: *MachO) !void { } if (macho_file.base.comp.config.debug_format != .strip and self.hasDebugInfo()) - self.calcStabsSize(macho_file); + try self.calcStabsSize(macho_file); } -pub fn calcStabsSize(self: *Object, macho_file: *MachO) void { +pub fn calcStabsSize(self: *Object, macho_file: *MachO) error{Overflow}!void { if (self.dwarf_info) |dw| { // TODO handle multiple CUs const cu = dw.compile_units.items[0]; - const comp_dir = cu.getCompileDir(dw) orelse return; - const tu_name = cu.getSourceFile(dw) orelse return; + const comp_dir = try cu.getCompileDir(dw) orelse return; + const tu_name = try cu.getSourceFile(dw) orelse return; self.output_symtab_ctx.nstabs += 4; // N_SO, N_SO, N_OSO, N_SO self.output_symtab_ctx.strsize += @as(u32, @intCast(comp_dir.len + 1)); // comp_dir @@ -1266,7 +1266,7 @@ pub fn calcStabsSize(self: *Object, macho_file: *MachO) void { } } -pub fn writeSymtab(self: Object, macho_file: *MachO) void { +pub fn writeSymtab(self: Object, macho_file: *MachO) error{Overflow}!void { const tracy = trace(@src()); defer tracy.end(); @@ -1284,10 +1284,10 @@ pub fn writeSymtab(self: Object, macho_file: *MachO) void { } if (macho_file.base.comp.config.debug_format != .strip and self.hasDebugInfo()) - self.writeStabs(macho_file); + try self.writeStabs(macho_file); } -pub fn writeStabs(self: *const Object, macho_file: *MachO) void { +pub fn writeStabs(self: *const Object, macho_file: *MachO) error{Overflow}!void { const writeFuncStab = struct { inline fn writeFuncStab( n_strx: u32, @@ -1333,8 +1333,8 @@ pub fn writeStabs(self: *const Object, macho_file: *MachO) void { if (self.dwarf_info) |dw| { // TODO handle multiple CUs const cu = dw.compile_units.items[0]; - const comp_dir = cu.getCompileDir(dw) orelse return; - const tu_name = cu.getSourceFile(dw) orelse return; + const comp_dir = try cu.getCompileDir(dw) orelse return; + const tu_name = try cu.getSourceFile(dw) orelse return; // Open scope // N_SO comp_dir @@ -1540,16 +1540,20 @@ fn getLoadCommand(self: Object, lc: macho.LC) ?LoadCommandIterator.LoadCommand { } else return null; } -pub fn getSectionData(self: *const Object, index: u32) []const u8 { +pub fn getSectionData(self: *const Object, index: u32) error{Overflow}![]const u8 { const slice = self.sections.slice(); assert(index < slice.items(.header).len); const sect = slice.items(.header)[index]; - return self.data[sect.offset..][0..sect.size]; + const off = math.cast(usize, sect.offset) orelse return error.Overflow; + const size = math.cast(usize, sect.size) orelse return error.Overflow; + return self.data[off..][0..size]; } -pub fn getAtomData(self: *const Object, atom: Atom) []const u8 { - const data = self.getSectionData(atom.n_sect); - return data[atom.off..][0..atom.size]; +pub fn getAtomData(self: *const Object, atom: Atom) error{Overflow}![]const u8 { + const data = try self.getSectionData(atom.n_sect); + const off = math.cast(usize, atom.off) orelse return error.Overflow; + const size = math.cast(usize, atom.size) orelse return error.Overflow; + return data[off..][0..size]; } pub fn getAtomRelocs(self: *const Object, atom: Atom) []const Relocation { @@ -1821,7 +1825,7 @@ const x86_64 = struct { [*]align(1) const macho.relocation_info, @ptrCast(self.data.ptr + sect.reloff), )[0..sect.nreloc]; - const code = self.getSectionData(@intCast(n_sect)); + const code = try self.getSectionData(@intCast(n_sect)); try out.ensureTotalCapacityPrecise(gpa, relocs.len); @@ -1977,7 +1981,7 @@ const aarch64 = struct { [*]align(1) const macho.relocation_info, @ptrCast(self.data.ptr + sect.reloff), )[0..sect.nreloc]; - const code = self.getSectionData(@intCast(n_sect)); + const code = try self.getSectionData(@intCast(n_sect)); try out.ensureTotalCapacityPrecise(gpa, relocs.len); diff --git a/src/link/MachO/UnwindInfo.zig b/src/link/MachO/UnwindInfo.zig index ed70b1c083..8f62cc2f88 100644 --- a/src/link/MachO/UnwindInfo.zig +++ b/src/link/MachO/UnwindInfo.zig @@ -333,13 +333,15 @@ pub fn write(info: UnwindInfo, macho_file: *MachO, buffer: []u8) !void { try page.write(info, macho_file, writer); const nwritten = cwriter.bytes_written - start; if (nwritten < second_level_page_bytes) { - try writer.writeByteNTimes(0, second_level_page_bytes - nwritten); + const padding = math.cast(usize, second_level_page_bytes - nwritten) orelse return error.Overflow; + try writer.writeByteNTimes(0, padding); } } const padding = buffer.len - cwriter.bytes_written; if (padding > 0) { - @memset(buffer[cwriter.bytes_written..], 0); + const off = math.cast(usize, cwriter.bytes_written) orelse return error.Overflow; + @memset(buffer[off..], 0); } } diff --git a/src/link/MachO/ZigObject.zig b/src/link/MachO/ZigObject.zig index 5c1fc9971f..6f55a077b5 100644 --- a/src/link/MachO/ZigObject.zig +++ b/src/link/MachO/ZigObject.zig @@ -154,7 +154,8 @@ pub fn getAtomDataAlloc( return data; }, macho.S_THREAD_LOCAL_VARIABLES => { - const data = try allocator.alloc(u8, atom.size); + const size = std.math.cast(usize, atom.size) orelse return error.Overflow; + const data = try allocator.alloc(u8, size); @memset(data, 0); return data; }, diff --git a/src/link/MachO/dyld_info/Rebase.zig b/src/link/MachO/dyld_info/Rebase.zig index 5209dcd9f4..776d144754 100644 --- a/src/link/MachO/dyld_info/Rebase.zig +++ b/src/link/MachO/dyld_info/Rebase.zig @@ -181,7 +181,7 @@ fn rebaseTimesSkip(count: usize, skip: u64, writer: anytype) !void { fn addAddr(addr: u64, writer: anytype) !void { log.debug(">>> add: {x}", .{addr}); - if (std.mem.isAligned(addr, @sizeOf(u64))) { + if (std.mem.isAlignedGeneric(u64, addr, @sizeOf(u64))) { const imm = @divExact(addr, @sizeOf(u64)); if (imm <= 0xf) { try writer.writeByte(macho.REBASE_OPCODE_ADD_ADDR_IMM_SCALED | @as(u4, @truncate(imm))); diff --git a/src/link/MachO/dyld_info/bind.zig b/src/link/MachO/dyld_info/bind.zig index bb746b05e3..7c0d2ab692 100644 --- a/src/link/MachO/dyld_info/bind.zig +++ b/src/link/MachO/dyld_info/bind.zig @@ -448,7 +448,7 @@ fn doBind(writer: anytype) !void { fn doBindAddAddr(addr: u64, writer: anytype) !void { log.debug(">>> bind with add: {x}", .{addr}); - if (std.mem.isAligned(addr, @sizeOf(u64))) { + if (std.mem.isAlignedGeneric(u64, addr, @sizeOf(u64))) { const imm = @divExact(addr, @sizeOf(u64)); if (imm <= 0xf) { try writer.writeByte( diff --git a/src/link/MachO/file.zig b/src/link/MachO/file.zig index 7033f58761..67b2b9106e 100644 --- a/src/link/MachO/file.zig +++ b/src/link/MachO/file.zig @@ -90,7 +90,7 @@ pub const File = union(enum) { }; } - pub fn writeSymtab(file: File, macho_file: *MachO) void { + pub fn writeSymtab(file: File, macho_file: *MachO) !void { return switch (file) { inline else => |x| x.writeSymtab(macho_file), }; diff --git a/src/link/MachO/hasher.zig b/src/link/MachO/hasher.zig index 57c8acd35e..aff4696c08 100644 --- a/src/link/MachO/hasher.zig +++ b/src/link/MachO/hasher.zig @@ -14,9 +14,13 @@ pub fn ParallelHasher(comptime Hasher: type) type { var wg: WaitGroup = .{}; - const file_size = opts.max_file_size orelse try file.getEndPos(); + const file_size = blk: { + const file_size = opts.max_file_size orelse try file.getEndPos(); + break :blk std.math.cast(usize, file_size) orelse return error.Overflow; + }; + const chunk_size = std.math.cast(usize, opts.chunk_size) orelse return error.Overflow; - const buffer = try self.allocator.alloc(u8, opts.chunk_size * out.len); + const buffer = try self.allocator.alloc(u8, chunk_size * out.len); defer self.allocator.free(buffer); const results = try self.allocator.alloc(fs.File.PReadError!usize, out.len); @@ -27,11 +31,11 @@ pub fn ParallelHasher(comptime Hasher: type) type { defer wg.wait(); for (out, results, 0..) |*out_buf, *result, i| { - const fstart = i * opts.chunk_size; - const fsize = if (fstart + opts.chunk_size > file_size) + const fstart = i * chunk_size; + const fsize = if (fstart + chunk_size > file_size) file_size - fstart else - opts.chunk_size; + chunk_size; wg.start(); try self.thread_pool.spawn(worker, .{ file, diff --git a/src/link/MachO/relocatable.zig b/src/link/MachO/relocatable.zig index 8b47d8eeb5..ecd1d6220b 100644 --- a/src/link/MachO/relocatable.zig +++ b/src/link/MachO/relocatable.zig @@ -262,7 +262,8 @@ fn writeAtoms(macho_file: *MachO) !void { if (atoms.items.len == 0) continue; if (header.isZerofill()) continue; - const code = try gpa.alloc(u8, header.size); + const size = math.cast(usize, header.size) orelse return error.Overflow; + const code = try gpa.alloc(u8, size); defer gpa.free(code); const padding_byte: u8 = if (header.isCode() and cpu_arch == .x86_64) 0xcc else 0; @memset(code, padding_byte); @@ -273,9 +274,11 @@ fn writeAtoms(macho_file: *MachO) !void { for (atoms.items) |atom_index| { const atom = macho_file.getAtom(atom_index).?; assert(atom.flags.alive); - const off = atom.value - header.addr; - @memcpy(code[off..][0..atom.size], atom.getFile(macho_file).object.getAtomData(atom.*)); - try atom.writeRelocs(macho_file, code[off..][0..atom.size], &relocs); + const off = math.cast(usize, atom.value - header.addr) orelse return error.Overflow; + const atom_size = math.cast(usize, atom.size) orelse return error.Overflow; + const atom_data = try atom.getFile(macho_file).object.getAtomData(atom.*); + @memcpy(code[off..][0..atom_size], atom_data); + try atom.writeRelocs(macho_file, code[off..][0..atom_size], &relocs); } assert(relocs.items.len == header.nreloc); @@ -293,7 +296,7 @@ fn writeCompactUnwind(macho_file: *MachO) !void { const gpa = macho_file.base.comp.gpa; const header = macho_file.sections.items(.header)[sect_index]; - const nrecs = @divExact(header.size, @sizeOf(macho.compact_unwind_entry)); + const nrecs = math.cast(usize, @divExact(header.size, @sizeOf(macho.compact_unwind_entry))) orelse return error.Overflow; var entries = try std.ArrayList(macho.compact_unwind_entry).initCapacity(gpa, nrecs); defer entries.deinit(); @@ -379,8 +382,9 @@ fn writeEhFrame(macho_file: *MachO) !void { const sect_index = macho_file.eh_frame_sect_index orelse return; const gpa = macho_file.base.comp.gpa; const header = macho_file.sections.items(.header)[sect_index]; + const size = math.cast(usize, header.size) orelse return error.Overflow; - const code = try gpa.alloc(u8, header.size); + const code = try gpa.alloc(u8, size); defer gpa.free(code); var relocs = try std.ArrayList(macho.relocation_info).initCapacity(gpa, header.nreloc); From d7265384941abeb0af7021d2df63f27899a80c82 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sun, 21 Jan 2024 22:20:36 +0100 Subject: [PATCH 122/133] test/link/macho: ensure we do not run testStackSize on foreign host --- test/link/macho.zig | 1 + 1 file changed, 1 insertion(+) diff --git a/test/link/macho.zig b/test/link/macho.zig index f41b0b2361..2659a156d5 100644 --- a/test/link/macho.zig +++ b/test/link/macho.zig @@ -1402,6 +1402,7 @@ fn testStackSize(b: *Build, opts: Options) *Step { exe.stack_size = 0x100000000; const run = addRunArtifact(exe); + run.expectExitCode(0); test_step.dependOn(&run.step); const check = exe.checkObject(); From c57e77a45553a6570e6e278475ff45408f59d1b4 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sun, 21 Jan 2024 22:21:19 +0100 Subject: [PATCH 123/133] build: bump max_rss for building the compiler again --- build.zig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.zig b/build.zig index e452c3b622..a430512cdd 100644 --- a/build.zig +++ b/build.zig @@ -623,7 +623,7 @@ fn addCompilerStep(b: *std.Build, options: AddCompilerStepOptions) *std.Build.St .root_source_file = .{ .path = "src/main.zig" }, .target = options.target, .optimize = options.optimize, - .max_rss = 7_100_000_000, + .max_rss = 7_500_000_000, .strip = options.strip, .sanitize_thread = options.sanitize_thread, .single_threaded = options.single_threaded, From fe19d1e09b283c8f09afa49abb0f3835fdc40aaa Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Mon, 22 Jan 2024 11:25:43 +0100 Subject: [PATCH 124/133] macho: allocate segments in sep step and tweak sorting logic --- src/link/MachO.zig | 151 ++++++++++++++++++++++++++++----------------- 1 file changed, 96 insertions(+), 55 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 00e6b7f235..8c6f0f9d0e 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -588,6 +588,7 @@ pub fn flushModule(self: *MachO, arena: Allocator, prog_node: *std.Progress.Node try self.initSegments(); try self.allocateSections(); + self.allocateSegments(); self.allocateAtoms(); self.allocateSyntheticSymbols(); try self.allocateLinkeditSegment(); @@ -1927,43 +1928,60 @@ fn getSegmentProt(segname: []const u8) macho.vm_prot_t { fn getSegmentRank(segname: []const u8) u8 { if (mem.eql(u8, segname, "__PAGEZERO")) return 0x0; - if (mem.eql(u8, segname, "__TEXT")) return 0x1; - if (mem.eql(u8, segname, "__DATA_CONST")) return 0x2; - if (mem.eql(u8, segname, "__DATA")) return 0x3; - if (mem.indexOf(u8, segname, "ZIG")) |_| return 0xe; if (mem.eql(u8, segname, "__LINKEDIT")) return 0xf; + if (mem.indexOf(u8, segname, "ZIG")) |_| return 0xe; + if (mem.startsWith(u8, segname, "__TEXT")) return 0x1; + if (mem.startsWith(u8, segname, "__DATA_CONST")) return 0x2; + if (mem.startsWith(u8, segname, "__DATA")) return 0x3; return 0x4; } -fn getSectionRank(self: *MachO, sect_index: u8) u8 { - const header = self.sections.items(.header)[sect_index]; - const segment_rank = getSegmentRank(header.segName()); - const section_rank: u4 = blk: { - if (header.isCode()) { - if (mem.eql(u8, "__text", header.sectName())) break :blk 0x0; - if (header.type() == macho.S_SYMBOL_STUBS) break :blk 0x1; - break :blk 0x2; - } - switch (header.type()) { - macho.S_NON_LAZY_SYMBOL_POINTERS, - macho.S_LAZY_SYMBOL_POINTERS, - => break :blk 0x0, +fn segmentLessThan(ctx: void, lhs: []const u8, rhs: []const u8) bool { + _ = ctx; + const lhs_rank = getSegmentRank(lhs); + const rhs_rank = getSegmentRank(rhs); + if (lhs_rank == rhs_rank) { + return mem.order(u8, lhs, rhs) == .lt; + } + return lhs_rank < rhs_rank; +} - macho.S_MOD_INIT_FUNC_POINTERS => break :blk 0x1, - macho.S_MOD_TERM_FUNC_POINTERS => break :blk 0x2, - macho.S_ZEROFILL => break :blk 0xf, - macho.S_THREAD_LOCAL_REGULAR => break :blk 0xd, - macho.S_THREAD_LOCAL_ZEROFILL => break :blk 0xe, +fn getSectionRank(section: macho.section_64) u8 { + if (section.isCode()) { + if (mem.eql(u8, "__text", section.sectName())) return 0x0; + if (section.type() == macho.S_SYMBOL_STUBS) return 0x1; + return 0x2; + } + switch (section.type()) { + macho.S_NON_LAZY_SYMBOL_POINTERS, + macho.S_LAZY_SYMBOL_POINTERS, + => return 0x0, - else => { - if (mem.eql(u8, "__unwind_info", header.sectName())) break :blk 0xe; - if (mem.eql(u8, "__compact_unwind", header.sectName())) break :blk 0xe; - if (mem.eql(u8, "__eh_frame", header.sectName())) break :blk 0xf; - break :blk 0x3; - }, + macho.S_MOD_INIT_FUNC_POINTERS => return 0x1, + macho.S_MOD_TERM_FUNC_POINTERS => return 0x2, + macho.S_ZEROFILL => return 0xf, + macho.S_THREAD_LOCAL_REGULAR => return 0xd, + macho.S_THREAD_LOCAL_ZEROFILL => return 0xe, + + else => { + if (mem.eql(u8, "__unwind_info", section.sectName())) return 0xe; + if (mem.eql(u8, "__compact_unwind", section.sectName())) return 0xe; + if (mem.eql(u8, "__eh_frame", section.sectName())) return 0xf; + return 0x3; + }, + } +} + +fn sectionLessThan(ctx: void, lhs: macho.section_64, rhs: macho.section_64) bool { + if (mem.eql(u8, lhs.segName(), rhs.segName())) { + const lhs_rank = getSectionRank(lhs); + const rhs_rank = getSectionRank(rhs); + if (lhs_rank == rhs_rank) { + return mem.order(u8, lhs.sectName(), rhs.sectName()) == .lt; } - }; - return (@as(u8, @intCast(segment_rank)) << 4) + section_rank; + return lhs_rank < rhs_rank; + } + return segmentLessThan(ctx, lhs.segName(), rhs.segName()); } pub fn sortSections(self: *MachO) !void { @@ -1971,7 +1989,11 @@ pub fn sortSections(self: *MachO) !void { index: u8, pub fn lessThan(macho_file: *MachO, lhs: @This(), rhs: @This()) bool { - return macho_file.getSectionRank(lhs.index) < macho_file.getSectionRank(rhs.index); + return sectionLessThan( + {}, + macho_file.sections.items(.header)[lhs.index], + macho_file.sections.items(.header)[rhs.index], + ); } }; @@ -2235,8 +2257,7 @@ fn initSegments(self: *MachO) !void { // Sort segments const sortFn = struct { fn sortFn(ctx: void, lhs: macho.segment_command_64, rhs: macho.segment_command_64) bool { - _ = ctx; - return getSegmentRank(lhs.segName()) < getSegmentRank(rhs.segName()); + return segmentLessThan(ctx, lhs.segName(), rhs.segName()); } }.sortFn; mem.sort(macho.segment_command_64, self.segments.items, {}, sortFn); @@ -2277,16 +2298,9 @@ fn allocateSections(self: *MachO) !void { self.segments.items[index].vmaddr + self.segments.items[index].vmsize else 0; - - var prev_seg_id: u8 = if (self.pagezero_seg_index) |index| index + 1 else 0; - { - const seg = &self.segments.items[prev_seg_id]; - seg.vmaddr = vmaddr; - seg.fileoff = 0; - } - vmaddr += headerpad; var fileoff = headerpad; + var prev_seg_id: u8 = if (self.pagezero_seg_index) |index| index + 1 else 0; const page_size = self.getPageSize(); const slice = self.sections.slice(); @@ -2296,14 +2310,8 @@ fn allocateSections(self: *MachO) !void { for (slice.items(.header)[0..last_index], slice.items(.segment_id)[0..last_index]) |*header, curr_seg_id| { if (prev_seg_id != curr_seg_id) { - const prev_seg = &self.segments.items[prev_seg_id]; - const curr_seg = &self.segments.items[curr_seg_id]; - prev_seg.vmsize = vmaddr - prev_seg.vmaddr; - prev_seg.filesize = fileoff - prev_seg.fileoff; vmaddr = mem.alignForward(u64, vmaddr, page_size); fileoff = mem.alignForward(u32, fileoff, page_size); - curr_seg.vmaddr = vmaddr; - curr_seg.fileoff = fileoff; } const alignment = try math.powi(u32, 2, header.@"align"); @@ -2321,14 +2329,6 @@ fn allocateSections(self: *MachO) !void { prev_seg_id = curr_seg_id; } - { - const prev_seg = &self.segments.items[prev_seg_id]; - prev_seg.vmsize = vmaddr - prev_seg.vmaddr; - prev_seg.filesize = fileoff - prev_seg.fileoff; - } - - // TODO iterate over sections again, but consider only zig sections - // and move them if they are allocated in file below page-aligned fileoff fileoff = mem.alignForward(u32, fileoff, page_size); for (slice.items(.header)[last_index..], slice.items(.segment_id)[last_index..]) |*header, seg_id| { if (header.isZerofill()) continue; @@ -2355,6 +2355,47 @@ fn allocateSections(self: *MachO) !void { } } +/// We allocate segments in a separate step to also consider segments that have no sections. +fn allocateSegments(self: *MachO) void { + const first_index = if (self.pagezero_seg_index) |index| index + 1 else 0; + const last_index = for (self.segments.items, 0..) |seg, i| { + if (mem.indexOf(u8, seg.segName(), "ZIG")) |_| break i; + } else self.segments.items.len; + + var vmaddr: u64 = if (self.pagezero_seg_index) |index| + self.segments.items[index].vmaddr + self.segments.items[index].vmsize + else + 0; + var fileoff: u64 = 0; + + const page_size = self.getPageSize(); + const slice = self.sections.slice(); + + var next_sect_id: u8 = 0; + for (self.segments.items[first_index..last_index], first_index..last_index) |*seg, seg_id| { + seg.vmaddr = vmaddr; + seg.fileoff = fileoff; + + while (next_sect_id < slice.items(.header).len) : (next_sect_id += 1) { + const header = slice.items(.header)[next_sect_id]; + const sid = slice.items(.segment_id)[next_sect_id]; + + if (seg_id != sid) break; + + vmaddr = header.addr + header.size; + if (!header.isZerofill()) { + fileoff = header.offset + header.size; + } + } + + seg.vmsize = vmaddr - seg.vmaddr; + seg.filesize = fileoff - seg.fileoff; + + vmaddr = mem.alignForward(u64, vmaddr, page_size); + fileoff = mem.alignForward(u64, fileoff, page_size); + } +} + pub fn allocateAtoms(self: *MachO) void { const slice = self.sections.slice(); for (slice.items(.header), slice.items(.atoms)) |header, atoms| { From 82628dd151c15241cffd17884ed5124808dc4ed2 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Mon, 22 Jan 2024 13:29:53 +0100 Subject: [PATCH 125/133] macho: synthesise unwind records from __eh_frame even if no __compact_unwind --- src/link/MachO.zig | 4 +- src/link/MachO/Object.zig | 70 +++++++++++++++++++--------------- src/link/MachO/relocatable.zig | 4 +- 3 files changed, 43 insertions(+), 35 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 8c6f0f9d0e..3ffee5ad73 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -1862,14 +1862,14 @@ fn initSyntheticSections(self: *MachO) !void { } const needs_unwind_info = for (self.objects.items) |index| { - if (self.getFile(index).?.object.compact_unwind_sect_index != null) break true; + if (self.getFile(index).?.object.hasUnwindRecords()) break true; } else false; if (needs_unwind_info) { self.unwind_info_sect_index = try self.addSection("__TEXT", "__unwind_info", .{}); } const needs_eh_frame = for (self.objects.items) |index| { - if (self.getFile(index).?.object.eh_frame_sect_index != null) break true; + if (self.getFile(index).?.object.hasEhFrameRecords()) break true; } else false; if (needs_eh_frame) { assert(needs_unwind_info); diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index 3d1984e43f..850120b64b 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -164,6 +164,10 @@ pub fn parse(self: *Object, macho_file: *MachO) !void { try self.initUnwindRecords(index, macho_file); } + if (self.hasUnwindRecords() or self.hasEhFrameRecords()) { + try self.parseUnwindRecords(macho_file); + } + self.initPlatform(); if (self.platform) |platform| { @@ -816,36 +820,9 @@ fn initUnwindRecords(self: *Object, sect_id: u8, macho_file: *MachO) !void { } } } - - if (!macho_file.base.isObject()) try self.synthesiseNullUnwindRecords(macho_file); - - const sortFn = struct { - fn sortFn(ctx: *MachO, lhs_index: UnwindInfo.Record.Index, rhs_index: UnwindInfo.Record.Index) bool { - const lhs = ctx.getUnwindRecord(lhs_index); - const rhs = ctx.getUnwindRecord(rhs_index); - const lhsa = lhs.getAtom(ctx); - const rhsa = rhs.getAtom(ctx); - return lhsa.getInputAddress(ctx) + lhs.atom_offset < rhsa.getInputAddress(ctx) + rhs.atom_offset; - } - }.sortFn; - mem.sort(UnwindInfo.Record.Index, self.unwind_records.items, macho_file, sortFn); - - // Associate unwind records to atoms - var next_cu: u32 = 0; - while (next_cu < self.unwind_records.items.len) { - const start = next_cu; - const rec_index = self.unwind_records.items[start]; - const rec = macho_file.getUnwindRecord(rec_index); - while (next_cu < self.unwind_records.items.len and - macho_file.getUnwindRecord(self.unwind_records.items[next_cu]).atom == rec.atom) : (next_cu += 1) - {} - - const atom = rec.getAtom(macho_file); - atom.unwind_records = .{ .pos = start, .len = next_cu - start }; - } } -fn synthesiseNullUnwindRecords(self: *Object, macho_file: *MachO) !void { +fn parseUnwindRecords(self: *Object, macho_file: *MachO) !void { // Synthesise missing unwind records. // The logic here is as follows: // 1. if an atom has unwind info record that is not DWARF, FDE is marked dead @@ -902,12 +879,10 @@ fn synthesiseNullUnwindRecords(self: *Object, macho_file: *MachO) !void { } } else { // Synthesise new unwind info record - const fde_data = fde.getData(macho_file); - const atom_size = mem.readInt(u64, fde_data[16..][0..8], .little); const rec_index = try macho_file.addUnwindRecord(); const rec = macho_file.getUnwindRecord(rec_index); try self.unwind_records.append(gpa, rec_index); - rec.length = @intCast(atom_size); + rec.length = @intCast(meta.size); rec.atom = fde.atom; rec.atom_offset = fde.atom_offset; rec.fde = fde_index; @@ -930,6 +905,31 @@ fn synthesiseNullUnwindRecords(self: *Object, macho_file: *MachO) !void { rec.file = self.index; } } + + const sortFn = struct { + fn sortFn(ctx: *MachO, lhs_index: UnwindInfo.Record.Index, rhs_index: UnwindInfo.Record.Index) bool { + const lhs = ctx.getUnwindRecord(lhs_index); + const rhs = ctx.getUnwindRecord(rhs_index); + const lhsa = lhs.getAtom(ctx); + const rhsa = rhs.getAtom(ctx); + return lhsa.getInputAddress(ctx) + lhs.atom_offset < rhsa.getInputAddress(ctx) + rhs.atom_offset; + } + }.sortFn; + mem.sort(UnwindInfo.Record.Index, self.unwind_records.items, macho_file, sortFn); + + // Associate unwind records to atoms + var next_cu: u32 = 0; + while (next_cu < self.unwind_records.items.len) { + const start = next_cu; + const rec_index = self.unwind_records.items[start]; + const rec = macho_file.getUnwindRecord(rec_index); + while (next_cu < self.unwind_records.items.len and + macho_file.getUnwindRecord(self.unwind_records.items[next_cu]).atom == rec.atom) : (next_cu += 1) + {} + + const atom = rec.getAtom(macho_file); + atom.unwind_records = .{ .pos = start, .len = next_cu - start }; + } } fn initPlatform(self: *Object) void { @@ -1566,6 +1566,14 @@ fn getString(self: Object, off: u32) [:0]const u8 { return mem.sliceTo(@as([*:0]const u8, @ptrCast(self.strtab.ptr + off)), 0); } +pub fn hasUnwindRecords(self: Object) bool { + return self.unwind_records.items.len > 0; +} + +pub fn hasEhFrameRecords(self: Object) bool { + return self.cies.items.len > 0; +} + /// TODO handle multiple CUs pub fn hasDebugInfo(self: Object) bool { if (self.dwarf_info) |dw| { diff --git a/src/link/MachO/relocatable.zig b/src/link/MachO/relocatable.zig index ecd1d6220b..dcda59d607 100644 --- a/src/link/MachO/relocatable.zig +++ b/src/link/MachO/relocatable.zig @@ -143,7 +143,7 @@ fn initOutputSections(macho_file: *MachO) !void { } const needs_unwind_info = for (macho_file.objects.items) |index| { - if (macho_file.getFile(index).?.object.compact_unwind_sect_index != null) break true; + if (macho_file.getFile(index).?.object.hasUnwindRecords()) break true; } else false; if (needs_unwind_info) { macho_file.unwind_info_sect_index = try macho_file.addSection("__LD", "__compact_unwind", .{ @@ -152,7 +152,7 @@ fn initOutputSections(macho_file: *MachO) !void { } const needs_eh_frame = for (macho_file.objects.items) |index| { - if (macho_file.getFile(index).?.object.eh_frame_sect_index != null) break true; + if (macho_file.getFile(index).?.object.hasEhFrameRecords()) break true; } else false; if (needs_eh_frame) { assert(needs_unwind_info); From f2dce0c33794f363d1b7448ee110ca2ae4bbafac Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Mon, 22 Jan 2024 18:54:00 +0100 Subject: [PATCH 126/133] macho: exclude symbols from empty sections when parsing unwind info --- src/link/MachO/Object.zig | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index 850120b64b..9aecf0a78e 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -156,10 +156,12 @@ pub fn parse(self: *Object, macho_file: *MachO) !void { try self.initSymbolStabs(nlists.items, macho_file); try self.initRelocs(macho_file); + // Parse DWARF __TEXT,__eh_frame section if (self.eh_frame_sect_index) |index| { try self.initEhFrameRecords(index, macho_file); } + // Parse Apple's __LD,__compact_unwind section if (self.compact_unwind_sect_index) |index| { try self.initUnwindRecords(index, macho_file); } @@ -841,7 +843,7 @@ fn parseUnwindRecords(self: *Object, macho_file: *MachO) !void { if (nlist.stab()) continue; if (!nlist.sect()) continue; const sect = self.sections.items(.header)[nlist.n_sect - 1]; - if (sect.isCode()) { + if (sect.isCode() and sect.size > 0) { try superposition.ensureUnusedCapacity(1); const gop = superposition.getOrPutAssumeCapacity(nlist.n_value); if (gop.found_existing) { From 46bc91ade533ad2f51ae32962b057952f90e8d2a Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Mon, 22 Jan 2024 19:24:58 +0100 Subject: [PATCH 127/133] macho: skip -r when single input object file This is to ensure we don't unnecessarily strip debug info from the final relocatable input file, so just copy the file out for now. --- src/link/MachO/relocatable.zig | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/link/MachO/relocatable.zig b/src/link/MachO/relocatable.zig index dcda59d607..1bcbe1f3ab 100644 --- a/src/link/MachO/relocatable.zig +++ b/src/link/MachO/relocatable.zig @@ -12,6 +12,20 @@ pub fn flush(macho_file: *MachO, comp: *Compilation, module_obj_path: ?[]const u if (module_obj_path) |path| try positionals.append(.{ .path = path }); + if (positionals.items.len == 1) { + // Instead of invoking a full-blown `-r` mode on the input which sadly will strip all + // debug info segments/sections (this is apparently by design by Apple), we copy + // the *only* input file over. + // TODO: in the future, when we implement `dsymutil` alternative directly in the Zig + // compiler, investigate if we can get rid of this `if` prong here. + const path = positionals.items[0].path; + const in_file = try std.fs.cwd().openFile(path, .{}); + const stat = try in_file.stat(); + const amt = try in_file.copyRangeAll(0, macho_file.base.file.?, 0, stat.size); + if (amt != stat.size) return error.InputOutput; // TODO: report an actual user error + return; + } + for (positionals.items) |obj| { macho_file.parsePositional(obj.path, obj.must_link) catch |err| switch (err) { error.MalformedObject, From e69ffcd8c1179f9945b672a7dba5a2d42fa6cf88 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Mon, 22 Jan 2024 23:21:43 +0100 Subject: [PATCH 128/133] macho: set filename as ident in code signature --- src/link/MachO.zig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 3ffee5ad73..05f26746f6 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -674,7 +674,7 @@ pub fn flushModule(self: *MachO, arena: Allocator, prog_node: *std.Progress.Node // The most important here is to have the correct vm and filesize of the __LINKEDIT segment // where the code signature goes into. var codesig = CodeSignature.init(self.getPageSize()); - codesig.code_directory.ident = self.base.emit.sub_path; + codesig.code_directory.ident = fs.path.basename(full_out_path); if (self.entitlements) |path| try codesig.addEntitlements(gpa, path); try self.writeCodeSignaturePadding(&codesig); break :blk codesig; From 8fd4c36bf90baf8d2f7614e61b779aa75c66ee90 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Mon, 22 Jan 2024 23:21:55 +0100 Subject: [PATCH 129/133] build: bump max_rss for building the compiler again --- build.zig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.zig b/build.zig index a430512cdd..1b5ca01ea2 100644 --- a/build.zig +++ b/build.zig @@ -623,7 +623,7 @@ fn addCompilerStep(b: *std.Build, options: AddCompilerStepOptions) *std.Build.St .root_source_file = .{ .path = "src/main.zig" }, .target = options.target, .optimize = options.optimize, - .max_rss = 7_500_000_000, + .max_rss = 8_000_000_000, .strip = options.strip, .sanitize_thread = options.sanitize_thread, .single_threaded = options.single_threaded, From 508ff1dd144b86e7e561636ebe4ad4410462981a Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Wed, 24 Jan 2024 00:32:32 +0100 Subject: [PATCH 130/133] macho: add misc fixes targeting macos 11 --- src/link/MachO.zig | 8 ++---- src/link/MachO/Symbol.zig | 4 +-- src/link/MachO/dyld_info/Rebase.zig | 2 +- src/link/MachO/synthetic.zig | 38 +++++++++++++++++------------ 4 files changed, 28 insertions(+), 24 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 05f26746f6..d4d684cde6 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -2162,7 +2162,7 @@ fn calcSectionSizes(self: *MachO) !void { const header = &self.sections.items(.header)[idx]; header.size = self.stubs.size(self); header.@"align" = switch (cpu_arch) { - .x86_64 => 0, + .x86_64 => 1, .aarch64 => 2, else => 0, }; @@ -2171,11 +2171,7 @@ fn calcSectionSizes(self: *MachO) !void { if (self.stubs_helper_sect_index) |idx| { const header = &self.sections.items(.header)[idx]; header.size = self.stubs_helper.size(self); - header.@"align" = switch (cpu_arch) { - .x86_64 => 0, - .aarch64 => 2, - else => 0, - }; + header.@"align" = 2; } if (self.la_symbol_ptr_sect_index) |idx| { diff --git a/src/link/MachO/Symbol.zig b/src/link/MachO/Symbol.zig index dfdb81c605..e8a8a561b7 100644 --- a/src/link/MachO/Symbol.zig +++ b/src/link/MachO/Symbol.zig @@ -223,7 +223,7 @@ pub fn setOutputSym(symbol: Symbol, macho_file: *MachO, out: *macho.nlist_64) vo out.n_type = if (symbol.flags.abs) macho.N_ABS else macho.N_SECT; out.n_sect = if (symbol.flags.abs) 0 else @intCast(symbol.out_n_sect + 1); out.n_desc = 0; - out.n_value = symbol.getAddress(.{}, macho_file); + out.n_value = symbol.getAddress(.{ .stubs = false }, macho_file); switch (symbol.visibility) { .hidden => out.n_type |= macho.N_PEXT, @@ -234,7 +234,7 @@ pub fn setOutputSym(symbol: Symbol, macho_file: *MachO, out: *macho.nlist_64) vo out.n_type = macho.N_EXT; out.n_type |= if (symbol.flags.abs) macho.N_ABS else macho.N_SECT; out.n_sect = if (symbol.flags.abs) 0 else @intCast(symbol.out_n_sect + 1); - out.n_value = symbol.getAddress(.{}, macho_file); + out.n_value = symbol.getAddress(.{ .stubs = false }, macho_file); out.n_desc = 0; if (symbol.flags.weak) { diff --git a/src/link/MachO/dyld_info/Rebase.zig b/src/link/MachO/dyld_info/Rebase.zig index 776d144754..c0cda1584a 100644 --- a/src/link/MachO/dyld_info/Rebase.zig +++ b/src/link/MachO/dyld_info/Rebase.zig @@ -12,7 +12,7 @@ const Allocator = std.mem.Allocator; entries: std.ArrayListUnmanaged(Entry) = .{}, buffer: std.ArrayListUnmanaged(u8) = .{}, -const Entry = struct { +pub const Entry = struct { offset: u64, segment_id: u8, diff --git a/src/link/MachO/synthetic.zig b/src/link/MachO/synthetic.zig index 882db5d414..f8ad741d27 100644 --- a/src/link/MachO/synthetic.zig +++ b/src/link/MachO/synthetic.zig @@ -315,7 +315,7 @@ pub const StubsSection = struct { pub const StubsHelperSection = struct { pub inline fn preambleSize(cpu_arch: std.Target.Cpu.Arch) usize { return switch (cpu_arch) { - .x86_64 => 15, + .x86_64 => 16, .aarch64 => 6 * @sizeOf(u32), else => 0, }; @@ -408,6 +408,7 @@ pub const StubsHelperSection = struct { try writer.writeInt(i32, @intCast(dyld_private_addr - sect.addr - 3 - 4), .little); try writer.writeAll(&.{ 0x41, 0x53, 0xff, 0x25 }); try writer.writeInt(i32, @intCast(dyld_stub_binder_addr - sect.addr - 11 - 4), .little); + try writer.writeByte(0x90); }, .aarch64 => { { @@ -460,7 +461,11 @@ pub const LaSymbolPtrSection = struct { for (macho_file.stubs.symbols.items, 0..) |sym_index, idx| { const sym = macho_file.getSymbol(sym_index); const addr = sect.addr + idx * @sizeOf(u64); - const entry = bind.Entry{ + const rebase_entry = Rebase.Entry{ + .offset = addr - seg.vmaddr, + .segment_id = seg_id, + }; + const bind_entry = bind.Entry{ .target = sym_index, .offset = addr - seg.vmaddr, .segment_id = seg_id, @@ -468,20 +473,19 @@ pub const LaSymbolPtrSection = struct { }; if (sym.flags.import) { if (sym.flags.weak) { - try macho_file.bind.entries.append(gpa, entry); - try macho_file.weak_bind.entries.append(gpa, entry); + try macho_file.bind.entries.append(gpa, bind_entry); + try macho_file.weak_bind.entries.append(gpa, bind_entry); } else { - try macho_file.lazy_bind.entries.append(gpa, entry); + try macho_file.lazy_bind.entries.append(gpa, bind_entry); + try macho_file.rebase.entries.append(gpa, rebase_entry); } } else { if (sym.flags.weak) { - try macho_file.rebase.entries.append(gpa, .{ - .offset = addr - seg.vmaddr, - .segment_id = seg_id, - }); - try macho_file.weak_bind.entries.append(gpa, entry); + try macho_file.rebase.entries.append(gpa, rebase_entry); + try macho_file.weak_bind.entries.append(gpa, bind_entry); } else if (sym.flags.interposable) { - try macho_file.lazy_bind.entries.append(gpa, entry); + try macho_file.lazy_bind.entries.append(gpa, bind_entry); + try macho_file.rebase.entries.append(gpa, rebase_entry); } } } @@ -493,15 +497,19 @@ pub const LaSymbolPtrSection = struct { _ = laptr; const cpu_arch = macho_file.getTarget().cpu.arch; const sect = macho_file.sections.items(.header)[macho_file.stubs_helper_sect_index.?]; - for (macho_file.stubs.symbols.items, 0..) |sym_index, idx| { + var stub_helper_idx: u32 = 0; + for (macho_file.stubs.symbols.items) |sym_index| { const sym = macho_file.getSymbol(sym_index); const value: u64 = if (sym.flags.@"export") sym.getAddress(.{ .stubs = false }, macho_file) else if (sym.flags.weak) @as(u64, 0) - else - sect.addr + StubsHelperSection.preambleSize(cpu_arch) + - StubsHelperSection.entrySize(cpu_arch) * idx; + else value: { + const value = sect.addr + StubsHelperSection.preambleSize(cpu_arch) + + StubsHelperSection.entrySize(cpu_arch) * stub_helper_idx; + stub_helper_idx += 1; + break :value value; + }; try writer.writeInt(u64, @intCast(value), .little); } } From 47dd8d0cf76b4505c37490b81836b90844c50f8a Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Wed, 24 Jan 2024 12:40:01 +0100 Subject: [PATCH 131/133] macho: clean up logic for deciding if needs __stub_helper --- src/link/MachO/Atom.zig | 29 +++++++------- src/link/MachO/synthetic.zig | 74 +++++++++++++++++------------------- 2 files changed, 48 insertions(+), 55 deletions(-) diff --git a/src/link/MachO/Atom.zig b/src/link/MachO/Atom.zig index 5f6671c493..57fb67f505 100644 --- a/src/link/MachO/Atom.zig +++ b/src/link/MachO/Atom.zig @@ -410,7 +410,7 @@ pub fn scanRelocs(self: Atom, macho_file: *MachO) !void { switch (rel.type) { .branch => { const symbol = rel.getTargetSymbol(macho_file); - if (symbol.flags.import or (symbol.flags.@"export" and (symbol.flags.weak or symbol.flags.interposable))) { + if (symbol.flags.import or (symbol.flags.@"export" and symbol.flags.weak) or symbol.flags.interposable) { symbol.flags.stubs = true; if (symbol.flags.weak) { macho_file.binds_to_weak = true; @@ -426,7 +426,8 @@ pub fn scanRelocs(self: Atom, macho_file: *MachO) !void { => { const symbol = rel.getTargetSymbol(macho_file); if (symbol.flags.import or - (symbol.flags.@"export" and (symbol.flags.weak or symbol.flags.interposable)) or + (symbol.flags.@"export" and symbol.flags.weak) or + symbol.flags.interposable or macho_file.getTarget().cpu.arch == .aarch64) // TODO relax on arm64 { symbol.flags.needs_got = true; @@ -456,7 +457,7 @@ pub fn scanRelocs(self: Atom, macho_file: *MachO) !void { .{ self.getName(macho_file), symbol.getName(macho_file) }, ); } - if (symbol.flags.import or (symbol.flags.@"export" and (symbol.flags.weak or symbol.flags.interposable))) { + if (symbol.flags.import or (symbol.flags.@"export" and symbol.flags.weak) or symbol.flags.interposable) { symbol.flags.tlv_ptr = true; if (symbol.flags.weak) { macho_file.binds_to_weak = true; @@ -480,13 +481,11 @@ pub fn scanRelocs(self: Atom, macho_file: *MachO) !void { } continue; } - if (symbol.flags.@"export") { - if (symbol.flags.weak) { - dynrel_ctx.weak_bind_relocs += 1; - macho_file.binds_to_weak = true; - } else if (symbol.flags.interposable) { - dynrel_ctx.bind_relocs += 1; - } + if (symbol.flags.@"export" and symbol.flags.weak) { + dynrel_ctx.weak_bind_relocs += 1; + macho_file.binds_to_weak = true; + } else if (symbol.flags.interposable) { + dynrel_ctx.bind_relocs += 1; } } dynrel_ctx.rebase_relocs += 1; @@ -635,12 +634,10 @@ fn resolveRelocInner( } return; } - if (sym.flags.@"export") { - if (sym.flags.weak) { - macho_file.weak_bind.entries.appendAssumeCapacity(entry); - } else if (sym.flags.interposable) { - macho_file.bind.entries.appendAssumeCapacity(entry); - } + if (sym.flags.@"export" and sym.flags.weak) { + macho_file.weak_bind.entries.appendAssumeCapacity(entry); + } else if (sym.flags.interposable) { + macho_file.bind.entries.appendAssumeCapacity(entry); } } macho_file.rebase.entries.appendAssumeCapacity(.{ diff --git a/src/link/MachO/synthetic.zig b/src/link/MachO/synthetic.zig index f8ad741d27..774fbe0d27 100644 --- a/src/link/MachO/synthetic.zig +++ b/src/link/MachO/synthetic.zig @@ -337,9 +337,8 @@ pub const StubsHelperSection = struct { var s: usize = preambleSize(cpu_arch); for (macho_file.stubs.symbols.items) |sym_index| { const sym = macho_file.getSymbol(sym_index); - if ((sym.flags.import and !sym.flags.weak) or (!sym.flags.weak and sym.flags.interposable)) { - s += entrySize(cpu_arch); - } + if (sym.flags.weak) continue; + s += entrySize(cpu_arch); } return s; } @@ -358,35 +357,34 @@ pub const StubsHelperSection = struct { var idx: usize = 0; for (macho_file.stubs.symbols.items) |sym_index| { const sym = macho_file.getSymbol(sym_index); - if ((sym.flags.import and !sym.flags.weak) or (!sym.flags.weak and sym.flags.interposable)) { - const offset = macho_file.lazy_bind.offsets.items[idx]; - const source: i64 = @intCast(sect.addr + preamble_size + entry_size * idx); - const target: i64 = @intCast(sect.addr); - switch (cpu_arch) { - .x86_64 => { - try writer.writeByte(0x68); - try writer.writeInt(u32, offset, .little); - try writer.writeByte(0xe9); - try writer.writeInt(i32, @intCast(target - source - 6 - 4), .little); - }, - .aarch64 => { - const literal = blk: { - const div_res = try std.math.divExact(u64, entry_size - @sizeOf(u32), 4); - break :blk std.math.cast(u18, div_res) orelse return error.Overflow; - }; - try writer.writeInt(u32, aarch64.Instruction.ldrLiteral( - .w16, - literal, - ).toU32(), .little); - const disp = math.cast(i28, @as(i64, @intCast(target)) - @as(i64, @intCast(source + 4))) orelse - return error.Overflow; - try writer.writeInt(u32, aarch64.Instruction.b(disp).toU32(), .little); - try writer.writeAll(&.{ 0x0, 0x0, 0x0, 0x0 }); - }, - else => unreachable, - } - idx += 1; + if (sym.flags.weak) continue; + const offset = macho_file.lazy_bind.offsets.items[idx]; + const source: i64 = @intCast(sect.addr + preamble_size + entry_size * idx); + const target: i64 = @intCast(sect.addr); + switch (cpu_arch) { + .x86_64 => { + try writer.writeByte(0x68); + try writer.writeInt(u32, offset, .little); + try writer.writeByte(0xe9); + try writer.writeInt(i32, @intCast(target - source - 6 - 4), .little); + }, + .aarch64 => { + const literal = blk: { + const div_res = try std.math.divExact(u64, entry_size - @sizeOf(u32), 4); + break :blk std.math.cast(u18, div_res) orelse return error.Overflow; + }; + try writer.writeInt(u32, aarch64.Instruction.ldrLiteral( + .w16, + literal, + ).toU32(), .little); + const disp = math.cast(i28, @as(i64, @intCast(target)) - @as(i64, @intCast(source + 4))) orelse + return error.Overflow; + try writer.writeInt(u32, aarch64.Instruction.b(disp).toU32(), .little); + try writer.writeAll(&.{ 0x0, 0x0, 0x0, 0x0 }); + }, + else => unreachable, } + idx += 1; } } @@ -500,17 +498,15 @@ pub const LaSymbolPtrSection = struct { var stub_helper_idx: u32 = 0; for (macho_file.stubs.symbols.items) |sym_index| { const sym = macho_file.getSymbol(sym_index); - const value: u64 = if (sym.flags.@"export") - sym.getAddress(.{ .stubs = false }, macho_file) - else if (sym.flags.weak) - @as(u64, 0) - else value: { + if (sym.flags.weak) { + const value = sym.getAddress(.{ .stubs = false }, macho_file); + try writer.writeInt(u64, @intCast(value), .little); + } else { const value = sect.addr + StubsHelperSection.preambleSize(cpu_arch) + StubsHelperSection.entrySize(cpu_arch) * stub_helper_idx; stub_helper_idx += 1; - break :value value; - }; - try writer.writeInt(u64, @intCast(value), .little); + try writer.writeInt(u64, @intCast(value), .little); + } } } }; From 5226705b321a2bb6aa63e3b20f5fd272e79441f2 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Wed, 24 Jan 2024 12:48:12 +0100 Subject: [PATCH 132/133] std.macho: add segment flags --- lib/std/macho.zig | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/lib/std/macho.zig b/lib/std/macho.zig index 2926b5ca22..fd17e32fb1 100644 --- a/lib/std/macho.zig +++ b/lib/std/macho.zig @@ -1240,6 +1240,22 @@ pub const FAT_MAGIC_64 = 0xcafebabf; /// NXSwapLong(FAT_MAGIC_64) pub const FAT_CIGAM_64 = 0xbfbafeca; +/// Segment flags +/// The file contents for this segment is for the high part of the VM space, the low part +/// is zero filled (for stacks in core files). +pub const SG_HIGHVM = 0x1; +/// This segment is the VM that is allocated by a fixed VM library, for overlap checking in +/// the link editor. +pub const SG_FVMLIB = 0x2; +/// This segment has nothing that was relocated in it and nothing relocated to it, that is +/// it maybe safely replaced without relocation. +pub const SG_NORELOC = 0x4; +/// This segment is protected. If the segment starts at file offset 0, the +/// first page of the segment is not protected. All other pages of the segment are protected. +pub const SG_PROTECTED_VERSION_1 = 0x8; +/// This segment is made read-only after fixups +pub const SG_READ_ONLY = 0x10; + /// The flags field of a section structure is separated into two parts a section /// type and section attributes. The section types are mutually exclusive (it /// can only have one type) but the section attributes are not (it may have more From 0fd0b765fa84a40446663928db1d3f9a63b7a98d Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Wed, 24 Jan 2024 12:48:22 +0100 Subject: [PATCH 133/133] macho: set __DATA_CONST* segments flags to SG_READ_ONLY --- src/link/MachO.zig | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index d4d684cde6..c9f655fd19 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -2243,7 +2243,8 @@ fn initSegments(self: *MachO) !void { for (slice.items(.header)) |header| { const segname = header.segName(); if (self.getSegmentByName(segname) == null) { - _ = try self.addSegment(segname, .{ .prot = getSegmentProt(segname) }); + const flags: u32 = if (mem.startsWith(u8, segname, "__DATA_CONST")) macho.SG_READ_ONLY else 0; + _ = try self.addSegment(segname, .{ .prot = getSegmentProt(segname), .flags = flags }); } } @@ -3543,6 +3544,7 @@ pub fn addSegment(self: *MachO, name: []const u8, opts: struct { fileoff: u64 = 0, filesize: u64 = 0, prot: macho.vm_prot_t = macho.PROT.NONE, + flags: u32 = 0, }) error{OutOfMemory}!u8 { const gpa = self.base.comp.gpa; const index = @as(u8, @intCast(self.segments.items.len));