From 44ee42c6bc46b20b1dac1f0b3a44512a8ada9c93 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 2 Jul 2022 08:45:33 +0200 Subject: [PATCH 01/27] cli: parse -dead_strip MachO linker flag --- src/link/MachO.zig | 7 +++++++ src/main.zig | 5 +++++ 2 files changed, 12 insertions(+) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index d659d994eb..ad0aac94ca 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -565,6 +565,7 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No man.hash.addOptional(self.base.options.search_strategy); man.hash.addOptional(self.base.options.headerpad_size); man.hash.add(self.base.options.headerpad_max_install_names); + man.hash.add(self.base.options.gc_sections orelse false); man.hash.add(self.base.options.dead_strip_dylibs); man.hash.addListOfBytes(self.base.options.lib_dirs); man.hash.addListOfBytes(self.base.options.framework_dirs); @@ -1003,6 +1004,12 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No try argv.append("-headerpad_max_install_names"); } + if (self.base.options.gc_sections) |is_set| { + if (is_set) { + try argv.append("-dead_strip"); + } + } + if (self.base.options.dead_strip_dylibs) { try argv.append("-dead_strip_dylibs"); } diff --git a/src/main.zig b/src/main.zig index 823cbf8757..3af8d48d99 100644 --- a/src/main.zig +++ b/src/main.zig @@ -463,6 +463,7 @@ const usage_build_generic = \\ -search_dylibs_first (Darwin) search `libx.dylib` in each dir in library search paths, then `libx.a` \\ -headerpad [value] (Darwin) set minimum space for future expansion of the load commands in hexadecimal notation \\ -headerpad_max_install_names (Darwin) set enough space as if all paths were MAXPATHLEN + \\ -dead_strip (Darwin) remove function and data that are unreachable by the entry point of exported symbols \\ -dead_strip_dylibs (Darwin) remove dylibs that are unreachable by the entry point or exported symbols \\ --import-memory (WebAssembly) import memory from the environment \\ --import-table (WebAssembly) import function table from the host environment @@ -969,6 +970,8 @@ fn buildOutputType( }; } else if (mem.eql(u8, arg, "-headerpad_max_install_names")) { headerpad_max_install_names = true; + } else if (mem.eql(u8, arg, "-dead_strip")) { + linker_gc_sections = true; } else if (mem.eql(u8, arg, "-dead_strip_dylibs")) { dead_strip_dylibs = true; } else if (mem.eql(u8, arg, "-T") or mem.eql(u8, arg, "--script")) { @@ -1764,6 +1767,8 @@ fn buildOutputType( }; } else if (mem.eql(u8, arg, "-headerpad_max_install_names")) { headerpad_max_install_names = true; + } else if (mem.eql(u8, arg, "-dead_strip")) { + linker_gc_sections = true; } else if (mem.eql(u8, arg, "-dead_strip_dylibs")) { dead_strip_dylibs = true; } else if (mem.eql(u8, arg, "--gc-sections")) { From 7ec9a4f382fb73950e4b4c5f7e005c154f6ec294 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Tue, 5 Jul 2022 14:42:45 +0200 Subject: [PATCH 02/27] cli: support --gc-sections and --no-gc-sections for Zig sources --- lib/std/build.zig | 7 +++++++ src/main.zig | 8 +++++++- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/lib/std/build.zig b/lib/std/build.zig index 2fb7b0258c..83e30b278f 100644 --- a/lib/std/build.zig +++ b/lib/std/build.zig @@ -1561,6 +1561,10 @@ pub const LibExeObjStep = struct { /// safely garbage-collected during the linking phase. link_function_sections: bool = false, + /// Remove functions and data that are unreachable by the entry point or + /// exported symbols. + link_gc_sections: ?bool = null, + linker_allow_shlib_undefined: ?bool = null, /// Permit read-only relocations in read-only segments. Disallowed by default. @@ -2705,6 +2709,9 @@ pub const LibExeObjStep = struct { if (self.link_function_sections) { try zig_args.append("-ffunction-sections"); } + if (self.link_gc_sections) |x| { + try zig_args.append(if (x) "--gc-sections" else "--no-gc-sections"); + } if (self.linker_allow_shlib_undefined) |x| { try zig_args.append(if (x) "-fallow-shlib-undefined" else "-fno-allow-shlib-undefined"); } diff --git a/src/main.zig b/src/main.zig index 3af8d48d99..27682003f2 100644 --- a/src/main.zig +++ b/src/main.zig @@ -446,6 +446,8 @@ const usage_build_generic = \\ --compress-debug-sections=[e] Debug section compression settings \\ none No compression \\ zlib Compression with deflate/inflate + \\ --gc-sections Force removal of functions and data that are unreachable by the entry point or exported symbols + \\ --no-gc-sections Don't force removal of unreachable functions and data \\ --subsystem [subsystem] (Windows) /SUBSYSTEM: to the linker \\ --stack [size] Override default stack size \\ --image-base [addr] Set base address for executable image @@ -463,7 +465,7 @@ const usage_build_generic = \\ -search_dylibs_first (Darwin) search `libx.dylib` in each dir in library search paths, then `libx.a` \\ -headerpad [value] (Darwin) set minimum space for future expansion of the load commands in hexadecimal notation \\ -headerpad_max_install_names (Darwin) set enough space as if all paths were MAXPATHLEN - \\ -dead_strip (Darwin) remove function and data that are unreachable by the entry point of exported symbols + \\ -dead_strip (Darwin) remove functions and data that are unreachable by the entry point or exported symbols \\ -dead_strip_dylibs (Darwin) remove dylibs that are unreachable by the entry point or exported symbols \\ --import-memory (WebAssembly) import memory from the environment \\ --import-table (WebAssembly) import function table from the host environment @@ -1314,6 +1316,10 @@ fn buildOutputType( try linker_export_symbol_names.append(arg["--export=".len..]); } else if (mem.eql(u8, arg, "-Bsymbolic")) { linker_bind_global_refs_locally = true; + } else if (mem.eql(u8, arg, "--gc-sections")) { + linker_gc_sections = true; + } else if (mem.eql(u8, arg, "--no-gc-sections")) { + linker_gc_sections = false; } else if (mem.eql(u8, arg, "--debug-compile-errors")) { debug_compile_errors = true; } else if (mem.eql(u8, arg, "--verbose-link")) { From b81c48d9856141a4a380bd8569825919e5bd8cfc Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sun, 3 Jul 2022 10:11:58 +0200 Subject: [PATCH 03/27] macho: read the entire file contents into memory at once --- src/link/MachO.zig | 4 +- src/link/MachO/Atom.zig | 6 +- src/link/MachO/Object.zig | 218 ++++++++++----------- test/cases/recursive_inline_function.0.zig | 2 +- 4 files changed, 105 insertions(+), 125 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index ad0aac94ca..c5ed6cb6ac 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -2895,7 +2895,7 @@ fn resolveSymbolsInObject(self: *MachO, object_id: u16) !void { log.debug("resolving symbols in '{s}'", .{object.name}); - for (object.symtab.items) |sym, id| { + for (object.symtab) |sym, id| { const sym_id = @intCast(u32, id); const sym_name = object.getString(sym.n_strx); @@ -6600,7 +6600,7 @@ pub fn symbolIsTemp(sym: macho.nlist_64, sym_name: []const u8) bool { return mem.startsWith(u8, sym_name, "l") or mem.startsWith(u8, sym_name, "L"); } -pub fn findFirst(comptime T: type, haystack: []T, start: usize, predicate: anytype) usize { +pub fn findFirst(comptime T: type, haystack: []const T, start: usize, predicate: anytype) usize { if (!@hasDecl(@TypeOf(predicate), "predicate")) @compileError("Predicate is required to define fn predicate(@This(), T) bool"); diff --git a/src/link/MachO/Atom.zig b/src/link/MachO/Atom.zig index ff78b26989..d7c595dbba 100644 --- a/src/link/MachO/Atom.zig +++ b/src/link/MachO/Atom.zig @@ -241,7 +241,7 @@ const RelocContext = struct { macho_file: *MachO, }; -pub fn parseRelocs(self: *Atom, relocs: []macho.relocation_info, context: RelocContext) !void { +pub fn parseRelocs(self: *Atom, relocs: []const macho.relocation_info, context: RelocContext) !void { const tracy = trace(@src()); defer tracy.end(); @@ -284,7 +284,7 @@ pub fn parseRelocs(self: *Atom, relocs: []macho.relocation_info, context: RelocC } assert(subtractor == null); - const sym = context.object.symtab.items[rel.r_symbolnum]; + const sym = context.object.symtab[rel.r_symbolnum]; if (sym.sect() and !sym.ext()) { subtractor = context.object.symbol_mapping.get(rel.r_symbolnum).?; } else { @@ -350,7 +350,7 @@ pub fn parseRelocs(self: *Atom, relocs: []macho.relocation_info, context: RelocC break :target Relocation.Target{ .local = local_sym_index }; } - const sym = context.object.symtab.items[rel.r_symbolnum]; + const sym = context.object.symtab[rel.r_symbolnum]; const sym_name = context.object.getString(sym.n_strx); if (sym.sect() and !sym.ext()) { diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index 03291cefab..d43f6154d9 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -20,9 +20,14 @@ const MachO = @import("../MachO.zig"); file: fs.File, name: []const u8, +/// Data contents of the file. Includes sections, and data of load commands. +/// Excludes the backing memory for the header and load commands. +/// Initialized in `parse`. +contents: []const u8 = undefined, + file_offset: ?u32 = null, -header: ?macho.mach_header_64 = null, +header: macho.mach_header_64 = undefined, load_commands: std.ArrayListUnmanaged(macho.LoadCommand) = .{}, @@ -41,9 +46,9 @@ dwarf_debug_line_index: ?u16 = null, dwarf_debug_line_str_index: ?u16 = null, dwarf_debug_ranges_index: ?u16 = null, -symtab: std.ArrayListUnmanaged(macho.nlist_64) = .{}, -strtab: std.ArrayListUnmanaged(u8) = .{}, -data_in_code_entries: std.ArrayListUnmanaged(macho.data_in_code_entry) = .{}, +symtab: []const macho.nlist_64 = &.{}, +strtab: []const u8 = &.{}, +data_in_code_entries: []const macho.data_in_code_entry = &.{}, // Debug info debug_info: ?DebugInfo = null, @@ -65,41 +70,41 @@ analyzed: bool = false, const DebugInfo = struct { inner: dwarf.DwarfInfo, - debug_info: []u8, - debug_abbrev: []u8, - debug_str: []u8, - debug_line: []u8, - debug_line_str: []u8, - debug_ranges: []u8, + debug_info: []const u8, + debug_abbrev: []const u8, + debug_str: []const u8, + debug_line: []const u8, + debug_line_str: []const u8, + debug_ranges: []const u8, pub fn parseFromObject(allocator: Allocator, object: *const Object) !?DebugInfo { var debug_info = blk: { const index = object.dwarf_debug_info_index orelse return null; - break :blk try object.readSection(allocator, index); + break :blk object.getSectionContents(index); }; var debug_abbrev = blk: { const index = object.dwarf_debug_abbrev_index orelse return null; - break :blk try object.readSection(allocator, index); + break :blk object.getSectionContents(index); }; var debug_str = blk: { const index = object.dwarf_debug_str_index orelse return null; - break :blk try object.readSection(allocator, index); + break :blk object.getSectionContents(index); }; var debug_line = blk: { const index = object.dwarf_debug_line_index orelse return null; - break :blk try object.readSection(allocator, index); + break :blk object.getSectionContents(index); }; var debug_line_str = blk: { if (object.dwarf_debug_line_str_index) |ind| { - break :blk try object.readSection(allocator, ind); + break :blk object.getSectionContents(ind); } - break :blk try allocator.alloc(u8, 0); + break :blk &[0]u8{}; }; var debug_ranges = blk: { if (object.dwarf_debug_ranges_index) |ind| { - break :blk try object.readSection(allocator, ind); + break :blk object.getSectionContents(ind); } - break :blk try allocator.alloc(u8, 0); + break :blk &[0]u8{}; }; var inner: dwarf.DwarfInfo = .{ @@ -125,12 +130,6 @@ const DebugInfo = struct { } pub fn deinit(self: *DebugInfo, allocator: Allocator) void { - allocator.free(self.debug_info); - allocator.free(self.debug_abbrev); - allocator.free(self.debug_str); - allocator.free(self.debug_line); - allocator.free(self.debug_line_str); - allocator.free(self.debug_ranges); self.inner.deinit(allocator); } }; @@ -140,9 +139,7 @@ pub fn deinit(self: *Object, allocator: Allocator) void { lc.deinit(allocator); } self.load_commands.deinit(allocator); - self.data_in_code_entries.deinit(allocator); - self.symtab.deinit(allocator); - self.strtab.deinit(allocator); + allocator.free(self.contents); self.sections_as_symbols.deinit(allocator); self.symbol_mapping.deinit(allocator); self.reverse_symbol_mapping.deinit(allocator); @@ -155,14 +152,6 @@ pub fn deinit(self: *Object, allocator: Allocator) void { if (self.debug_info) |*db| { db.deinit(allocator); } - - if (self.tu_name) |n| { - allocator.free(n); - } - - if (self.tu_comp_dir) |n| { - allocator.free(n); - } } pub fn free(self: *Object, allocator: Allocator, macho_file: *MachO) void { @@ -233,21 +222,28 @@ fn freeAtoms(self: *Object, macho_file: *MachO) void { } pub fn parse(self: *Object, allocator: Allocator, target: std.Target) !void { - const reader = self.file.reader(); - if (self.file_offset) |offset| { - try reader.context.seekTo(offset); + const file_stat = try self.file.stat(); + const file_size = math.cast(usize, file_stat.size) orelse return error.Overflow; + self.contents = try self.file.readToEndAlloc(allocator, file_size); + + var stream = std.io.fixedBufferStream(self.contents); + const reader = stream.reader(); + + const file_offset = self.file_offset orelse 0; + if (file_offset > 0) { + try reader.context.seekTo(file_offset); } - const header = try reader.readStruct(macho.mach_header_64); - if (header.filetype != macho.MH_OBJECT) { + self.header = try reader.readStruct(macho.mach_header_64); + if (self.header.filetype != macho.MH_OBJECT) { log.debug("invalid filetype: expected 0x{x}, found 0x{x}", .{ macho.MH_OBJECT, - header.filetype, + self.header.filetype, }); return error.NotObject; } - const this_arch: std.Target.Cpu.Arch = switch (header.cputype) { + const this_arch: std.Target.Cpu.Arch = switch (self.header.cputype) { macho.CPU_TYPE_ARM64 => .aarch64, macho.CPU_TYPE_X86_64 => .x86_64, else => |value| { @@ -260,22 +256,10 @@ pub fn parse(self: *Object, allocator: Allocator, target: std.Target) !void { return error.MismatchedCpuArchitecture; } - self.header = header; - - try self.readLoadCommands(allocator, reader); - try self.parseSymtab(allocator); - try self.parseDataInCode(allocator); - try self.parseDebugInfo(allocator); -} - -pub fn readLoadCommands(self: *Object, allocator: Allocator, reader: anytype) !void { - const header = self.header orelse unreachable; // Unreachable here signifies a fatal unexplored condition. - const offset = self.file_offset orelse 0; - - try self.load_commands.ensureUnusedCapacity(allocator, header.ncmds); + try self.load_commands.ensureUnusedCapacity(allocator, self.header.ncmds); var i: u16 = 0; - while (i < header.ncmds) : (i += 1) { + while (i < self.header.ncmds) : (i += 1) { var cmd = try macho.LoadCommand.read(allocator, reader); switch (cmd.cmd()) { .SEGMENT_64 => { @@ -305,18 +289,18 @@ pub fn readLoadCommands(self: *Object, allocator: Allocator, reader: anytype) !v } } - sect.offset += offset; + sect.offset += file_offset; if (sect.reloff > 0) { - sect.reloff += offset; + sect.reloff += file_offset; } } - seg.inner.fileoff += offset; + seg.inner.fileoff += file_offset; }, .SYMTAB => { self.symtab_cmd_index = i; - cmd.symtab.symoff += offset; - cmd.symtab.stroff += offset; + cmd.symtab.symoff += file_offset; + cmd.symtab.stroff += file_offset; }, .DYSYMTAB => { self.dysymtab_cmd_index = i; @@ -326,7 +310,7 @@ pub fn readLoadCommands(self: *Object, allocator: Allocator, reader: anytype) !v }, .DATA_IN_CODE => { self.data_in_code_cmd_index = i; - cmd.linkedit_data.dataoff += offset; + cmd.linkedit_data.dataoff += file_offset; }, else => { log.debug("Unknown load command detected: 0x{x}.", .{cmd.cmd()}); @@ -334,6 +318,10 @@ pub fn readLoadCommands(self: *Object, allocator: Allocator, reader: anytype) !v } self.load_commands.appendAssumeCapacity(cmd); } + + self.parseSymtab(); + self.parseDataInCode(); + try self.parseDebugInfo(allocator); } const NlistWithIndex = struct { @@ -373,7 +361,11 @@ const NlistWithIndex = struct { } }; -fn filterDice(dices: []macho.data_in_code_entry, start_addr: u64, end_addr: u64) []macho.data_in_code_entry { +fn filterDice( + dices: []const macho.data_in_code_entry, + start_addr: u64, + end_addr: u64, +) []const macho.data_in_code_entry { const Predicate = struct { addr: u64, @@ -400,10 +392,10 @@ pub fn parseIntoAtoms(self: *Object, allocator: Allocator, macho_file: *MachO) ! // local < extern defined < undefined. Unfortunately, this is not guaranteed! For instance, // the GO compiler does not necessarily respect that therefore we sort immediately by type // and address within. - var sorted_all_nlists = try std.ArrayList(NlistWithIndex).initCapacity(allocator, self.symtab.items.len); + var sorted_all_nlists = try std.ArrayList(NlistWithIndex).initCapacity(allocator, self.symtab.len); defer sorted_all_nlists.deinit(); - for (self.symtab.items) |nlist, index| { + for (self.symtab) |nlist, index| { sorted_all_nlists.appendAssumeCapacity(.{ .nlist = nlist, .index = @intCast(u32, index), @@ -439,16 +431,20 @@ pub fn parseIntoAtoms(self: *Object, allocator: Allocator, macho_file: *MachO) ! continue; }; + const is_zerofill = blk: { + const section_type = sect.type_(); + break :blk section_type == macho.S_ZEROFILL or section_type == macho.S_THREAD_LOCAL_ZEROFILL; + }; + // Read section's code - var code = try allocator.alloc(u8, @intCast(usize, sect.size)); - defer allocator.free(code); - _ = try self.file.preadAll(code, sect.offset); + const code: ?[]const u8 = if (!is_zerofill) self.getSectionContents(sect_id) else null; // Read section's list of relocations - var raw_relocs = try allocator.alloc(u8, sect.nreloc * @sizeOf(macho.relocation_info)); - defer allocator.free(raw_relocs); - _ = try self.file.preadAll(raw_relocs, sect.reloff); - const relocs = mem.bytesAsSlice(macho.relocation_info, raw_relocs); + const raw_relocs = self.contents[sect.reloff..][0 .. sect.nreloc * @sizeOf(macho.relocation_info)]; + const relocs = mem.bytesAsSlice( + macho.relocation_info, + @alignCast(@alignOf(macho.relocation_info), raw_relocs), + ); // Symbols within this section only. const filtered_nlists = NlistWithIndex.filterInSection(sorted_nlists, sect); @@ -456,7 +452,7 @@ pub fn parseIntoAtoms(self: *Object, allocator: Allocator, macho_file: *MachO) ! macho_file.has_dices = macho_file.has_dices or blk: { if (self.text_section_index) |index| { if (index != id) break :blk false; - if (self.data_in_code_entries.items.len == 0) break :blk false; + if (self.data_in_code_entries.len == 0) break :blk false; break :blk true; } break :blk false; @@ -482,16 +478,12 @@ pub fn parseIntoAtoms(self: *Object, allocator: Allocator, macho_file: *MachO) ! const aligned_size = mem.alignForwardGeneric(u64, sect.size, alignment); const atom = try macho_file.createEmptyAtom(atom_local_sym_index, aligned_size, sect.@"align"); - const is_zerofill = blk: { - const section_type = sect.type_(); - break :blk section_type == macho.S_ZEROFILL or section_type == macho.S_THREAD_LOCAL_ZEROFILL; - }; - if (!is_zerofill) { - mem.copy(u8, atom.code.items, code); + if (code) |cc| { + assert(!is_zerofill); + mem.copy(u8, atom.code.items, cc); } - // TODO stage2 bug: @alignCast shouldn't be needed - try atom.parseRelocs(@alignCast(@alignOf(macho.relocation_info), relocs), .{ + try atom.parseRelocs(relocs, .{ .base_addr = sect.addr, .allocator = allocator, .object = self, @@ -499,7 +491,7 @@ pub fn parseIntoAtoms(self: *Object, allocator: Allocator, macho_file: *MachO) ! }); if (macho_file.has_dices) { - const dices = filterDice(self.data_in_code_entries.items, sect.addr, sect.addr + sect.size); + const dices = filterDice(self.data_in_code_entries, sect.addr, sect.addr + sect.size); try atom.dices.ensureTotalCapacity(allocator, dices.len); for (dices) |dice| { @@ -562,20 +554,13 @@ pub fn parseIntoAtoms(self: *Object, allocator: Allocator, macho_file: *MachO) ! } } -fn parseSymtab(self: *Object, allocator: Allocator) !void { +fn parseSymtab(self: *Object) void { const index = self.symtab_cmd_index orelse return; - const symtab_cmd = self.load_commands.items[index].symtab; - - var symtab = try allocator.alloc(u8, @sizeOf(macho.nlist_64) * symtab_cmd.nsyms); - defer allocator.free(symtab); - _ = try self.file.preadAll(symtab, symtab_cmd.symoff); - const slice = @alignCast(@alignOf(macho.nlist_64), mem.bytesAsSlice(macho.nlist_64, symtab)); - try self.symtab.appendSlice(allocator, slice); - - var strtab = try allocator.alloc(u8, symtab_cmd.strsize); - defer allocator.free(strtab); - _ = try self.file.preadAll(strtab, symtab_cmd.stroff); - try self.strtab.appendSlice(allocator, strtab); + const symtab = self.load_commands.items[index].symtab; + const symtab_size = @sizeOf(macho.nlist_64) * symtab.nsyms; + const raw_symtab = self.contents[symtab.symoff..][0..symtab_size]; + self.symtab = mem.bytesAsSlice(macho.nlist_64, @alignCast(@alignOf(macho.nlist_64), raw_symtab)); + self.strtab = self.contents[symtab.stroff..][0..symtab.strsize]; } pub fn parseDebugInfo(self: *Object, allocator: Allocator) !void { @@ -599,8 +584,8 @@ pub fn parseDebugInfo(self: *Object, allocator: Allocator) !void { const comp_dir = try compile_unit.die.getAttrString(&debug_info.inner, dwarf.AT.comp_dir); self.debug_info = debug_info; - self.tu_name = try allocator.dupe(u8, name); - self.tu_comp_dir = try allocator.dupe(u8, comp_dir); + self.tu_name = name; + self.tu_comp_dir = comp_dir; if (self.mtime == null) { self.mtime = mtime: { @@ -610,34 +595,29 @@ pub fn parseDebugInfo(self: *Object, allocator: Allocator) !void { } } -pub fn parseDataInCode(self: *Object, allocator: Allocator) !void { +pub fn parseDataInCode(self: *Object) void { const index = self.data_in_code_cmd_index orelse return; const data_in_code = self.load_commands.items[index].linkedit_data; - - var buffer = try allocator.alloc(u8, data_in_code.datasize); - defer allocator.free(buffer); - - _ = try self.file.preadAll(buffer, data_in_code.dataoff); - - var stream = io.fixedBufferStream(buffer); - var reader = stream.reader(); - while (true) { - const dice = reader.readStruct(macho.data_in_code_entry) catch |err| switch (err) { - error.EndOfStream => break, - }; - try self.data_in_code_entries.append(allocator, dice); - } + const raw_dice = self.contents[data_in_code.dataoff..][0..data_in_code.datasize]; + self.data_in_code_entries = mem.bytesAsSlice( + macho.data_in_code_entry, + @alignCast(@alignOf(macho.data_in_code_entry), raw_dice), + ); } -fn readSection(self: Object, allocator: Allocator, index: u16) ![]u8 { +fn getSectionContents(self: Object, sect_id: u16) []const u8 { const seg = self.load_commands.items[self.segment_cmd_index.?].segment; - const sect = seg.sections.items[index]; - var buffer = try allocator.alloc(u8, @intCast(usize, sect.size)); - _ = try self.file.preadAll(buffer, sect.offset); - return buffer; + const sect = seg.sections.items[sect_id]; + log.debug("getting {s},{s} data at 0x{x} - 0x{x}", .{ + sect.segName(), + sect.sectName(), + sect.offset, + sect.offset + sect.size, + }); + return self.contents[sect.offset..][0..sect.size]; } pub fn getString(self: Object, off: u32) []const u8 { - assert(off < self.strtab.items.len); - return mem.sliceTo(@ptrCast([*:0]const u8, self.strtab.items.ptr + off), 0); + assert(off < self.strtab.len); + return mem.sliceTo(@ptrCast([*:0]const u8, self.strtab.ptr + off), 0); } diff --git a/test/cases/recursive_inline_function.0.zig b/test/cases/recursive_inline_function.0.zig index 45a29a1068..9273db5fd3 100644 --- a/test/cases/recursive_inline_function.0.zig +++ b/test/cases/recursive_inline_function.0.zig @@ -9,5 +9,5 @@ inline fn fibonacci(n: usize) usize { } // run -// target=x86_64-linux,arm-linux,x86_64-macos,wasm32-wasi +// target=x86_64-linux,arm-linux,wasm32-wasi // From d042b88c112aa919386bc76294225d4f7bd9a7b3 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sun, 3 Jul 2022 21:04:37 +0200 Subject: [PATCH 04/27] macho: put parsing into an atom into a helper --- src/link/MachO/Object.zig | 335 ++++++++++++++++++++++++++++++-------- 1 file changed, 263 insertions(+), 72 deletions(-) diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index d43f6154d9..f01f366fdd 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -16,6 +16,7 @@ const trace = @import("../../tracy.zig").trace; const Allocator = mem.Allocator; const Atom = @import("Atom.zig"); const MachO = @import("../MachO.zig"); +const MatchingSection = MachO.MatchingSection; file: fs.File, name: []const u8, @@ -421,6 +422,13 @@ pub fn parseIntoAtoms(self: *Object, allocator: Allocator, macho_file: *MachO) ! // We only care about defined symbols, so filter every other out. const sorted_nlists = sorted_all_nlists.items[0..iundefsym]; + const dead_strip = blk: { + const dead_strip = macho_file.base.options.gc_sections orelse break :blk false; + if (dead_strip or macho_file.base.options.optimize_mode != .Debug) + break :blk self.header.flags & macho.MH_SUBSECTIONS_VIA_SYMBOLS != 0; + break :blk false; + }; + for (seg.sections.items) |sect, id| { const sect_id = @intCast(u8, id); log.debug("putting section '{s},{s}' as an Atom", .{ sect.segName(), sect.sectName() }); @@ -459,85 +467,42 @@ pub fn parseIntoAtoms(self: *Object, allocator: Allocator, macho_file: *MachO) ! }; macho_file.has_stabs = macho_file.has_stabs or self.debug_info != null; - // Since there is no symbol to refer to this atom, we create + if (dead_strip) blk: { + if (filtered_nlists.len == 0) break :blk; // nothing to split + + // If the first nlist does not match the start of the section, + // then we need to encapsulate the memory range [section start, first symbol) + // as a temporary symbol and insert the matching Atom. + const first_nlist = filtered_nlists[0].nlist; + if (first_nlist.n_value > sect.addr) {} + } + + // If there is no symbol to refer to this atom, we create // a temp one, unless we already did that when working out the relocations // of other atoms. - const atom_local_sym_index = self.sections_as_symbols.get(sect_id) orelse blk: { - const atom_local_sym_index = @intCast(u32, macho_file.locals.items.len); + const local_sym_index = self.sections_as_symbols.get(sect_id) orelse blk: { + const local_sym_index = @intCast(u32, macho_file.locals.items.len); try macho_file.locals.append(allocator, .{ .n_strx = 0, .n_type = macho.N_SECT, .n_sect = @intCast(u8, macho_file.section_ordinals.getIndex(match).? + 1), .n_desc = 0, - .n_value = 0, + .n_value = sect.addr, }); - try self.sections_as_symbols.putNoClobber(allocator, sect_id, atom_local_sym_index); - break :blk atom_local_sym_index; + try self.sections_as_symbols.putNoClobber(allocator, sect_id, local_sym_index); + break :blk local_sym_index; }; - const alignment = try math.powi(u32, 2, sect.@"align"); - const aligned_size = mem.alignForwardGeneric(u64, sect.size, alignment); - const atom = try macho_file.createEmptyAtom(atom_local_sym_index, aligned_size, sect.@"align"); - - if (code) |cc| { - assert(!is_zerofill); - mem.copy(u8, atom.code.items, cc); - } - - try atom.parseRelocs(relocs, .{ - .base_addr = sect.addr, - .allocator = allocator, - .object = self, - .macho_file = macho_file, - }); - - if (macho_file.has_dices) { - const dices = filterDice(self.data_in_code_entries, sect.addr, sect.addr + sect.size); - try atom.dices.ensureTotalCapacity(allocator, dices.len); - - for (dices) |dice| { - atom.dices.appendAssumeCapacity(.{ - .offset = dice.offset - (math.cast(u32, sect.addr) orelse return error.Overflow), - .length = dice.length, - .kind = dice.kind, - }); - } - } - - // Since this is atom gets a helper local temporary symbol that didn't exist - // in the object file which encompasses the entire section, we need traverse - // the filtered symbols and note which symbol is contained within so that - // we can properly allocate addresses down the line. - // While we're at it, we need to update segment,section mapping of each symbol too. - try atom.contained.ensureTotalCapacity(allocator, filtered_nlists.len); - - for (filtered_nlists) |nlist_with_index| { - const nlist = nlist_with_index.nlist; - const local_sym_index = self.symbol_mapping.get(nlist_with_index.index) orelse unreachable; - const local = &macho_file.locals.items[local_sym_index]; - local.n_sect = @intCast(u8, macho_file.section_ordinals.getIndex(match).? + 1); - - const stab: ?Atom.Stab = if (self.debug_info) |di| blk: { - // TODO there has to be a better to handle this. - for (di.inner.func_list.items) |func| { - if (func.pc_range) |range| { - if (nlist.n_value >= range.start and nlist.n_value < range.end) { - break :blk Atom.Stab{ - .function = range.end - range.start, - }; - } - } - } - // TODO - // if (zld.globals.contains(zld.getString(sym.strx))) break :blk .global; - break :blk .static; - } else null; - - atom.contained.appendAssumeCapacity(.{ - .local_sym_index = local_sym_index, - .offset = nlist.n_value - sect.addr, - .stab = stab, - }); - } + const atom = try self.parseIntoAtom( + allocator, + local_sym_index, + sect.size, + sect.@"align", + code, + relocs, + filtered_nlists, + match, + macho_file, + ); if (!self.start_atoms.contains(match)) { try self.start_atoms.putNoClobber(allocator, match, atom); @@ -554,6 +519,232 @@ pub fn parseIntoAtoms(self: *Object, allocator: Allocator, macho_file: *MachO) ! } } +// const Context = struct { +// allocator: *Allocator, +// object: *Object, +// macho_file: *MachO, +// match: MachO.MatchingSection, +// }; + +// const AtomParser = struct { +// section: macho.section_64, +// code: []u8, +// relocs: []macho.relocation_info, +// nlists: []NlistWithIndex, +// index: u32 = 0, + +// fn peek(self: AtomParser) ?NlistWithIndex { +// return if (self.index + 1 < self.nlists.len) self.nlists[self.index + 1] else null; +// } + +// fn lessThanBySeniority(context: Context, lhs: NlistWithIndex, rhs: NlistWithIndex) bool { +// if (!MachO.symbolIsExt(rhs.nlist)) { +// return MachO.symbolIsTemp(lhs.nlist, context.object.getString(lhs.nlist.n_strx)); +// } else if (MachO.symbolIsPext(rhs.nlist) or MachO.symbolIsWeakDef(rhs.nlist)) { +// return !MachO.symbolIsExt(lhs.nlist); +// } else { +// return false; +// } +// } + +// pub fn next(self: *AtomParser, context: Context) !?*Atom { +// if (self.index == self.nlists.len) return null; + +// const tracy = trace(@src()); +// defer tracy.end(); + +// var aliases = std.ArrayList(NlistWithIndex).init(context.allocator); +// defer aliases.deinit(); + +// const next_nlist: ?NlistWithIndex = blk: while (true) { +// const curr_nlist = self.nlists[self.index]; +// try aliases.append(curr_nlist); + +// if (self.peek()) |next_nlist| { +// if (curr_nlist.nlist.n_value == next_nlist.nlist.n_value) { +// self.index += 1; +// continue; +// } +// break :blk next_nlist; +// } +// break :blk null; +// } else null; + +// for (aliases.items) |*nlist_with_index| { +// nlist_with_index.index = context.object.symbol_mapping.get(nlist_with_index.index) orelse unreachable; +// } + +// if (aliases.items.len > 1) { +// // Bubble-up senior symbol as the main link to the atom. +// sort.sort( +// NlistWithIndex, +// aliases.items, +// context, +// AtomParser.lessThanBySeniority, +// ); +// } + +// const senior_nlist = aliases.pop(); +// const senior_sym = &context.macho_file.locals.items[senior_nlist.index]; +// senior_sym.n_sect = @intCast(u8, context.macho_file.section_ordinals.getIndex(context.match).? + 1); + +// const start_addr = senior_nlist.nlist.n_value - self.section.addr; +// const end_addr = if (next_nlist) |n| n.nlist.n_value - self.section.addr else self.section.size; + +// const code = self.code[start_addr..end_addr]; +// const size = code.len; + +// const max_align = self.section.@"align"; +// const actual_align = if (senior_nlist.nlist.n_value > 0) +// math.min(@ctz(u64, senior_nlist.nlist.n_value), max_align) +// else +// max_align; + +// const stab: ?Atom.Stab = if (context.object.debug_info) |di| blk: { +// // TODO there has to be a better to handle this. +// for (di.inner.func_list.items) |func| { +// if (func.pc_range) |range| { +// if (senior_nlist.nlist.n_value >= range.start and senior_nlist.nlist.n_value < range.end) { +// break :blk Atom.Stab{ +// .function = range.end - range.start, +// }; +// } +// } +// } +// // TODO +// // if (self.macho_file.globals.contains(self.macho_file.getString(senior_sym.strx))) break :blk .global; +// break :blk .static; +// } else null; + +// const atom = try context.macho_file.createEmptyAtom(senior_nlist.index, size, actual_align); +// atom.stab = stab; + +// const is_zerofill = blk: { +// const section_type = commands.sectionType(self.section); +// break :blk section_type == macho.S_ZEROFILL or section_type == macho.S_THREAD_LOCAL_ZEROFILL; +// }; +// if (!is_zerofill) { +// mem.copy(u8, atom.code.items, code); +// } + +// try atom.aliases.ensureTotalCapacity(context.allocator, aliases.items.len); +// for (aliases.items) |alias| { +// atom.aliases.appendAssumeCapacity(alias.index); +// const sym = &context.macho_file.locals.items[alias.index]; +// sym.n_sect = @intCast(u8, context.macho_file.section_ordinals.getIndex(context.match).? + 1); +// } + +// try atom.parseRelocs(self.relocs, .{ +// .base_addr = self.section.addr, +// .base_offset = start_addr, +// .allocator = context.allocator, +// .object = context.object, +// .macho_file = context.macho_file, +// }); + +// if (context.macho_file.has_dices) { +// const dices = filterDice( +// context.object.data_in_code_entries.items, +// senior_nlist.nlist.n_value, +// senior_nlist.nlist.n_value + size, +// ); +// try atom.dices.ensureTotalCapacity(context.allocator, dices.len); + +// for (dices) |dice| { +// atom.dices.appendAssumeCapacity(.{ +// .offset = dice.offset - try math.cast(u32, senior_nlist.nlist.n_value), +// .length = dice.length, +// .kind = dice.kind, +// }); +// } +// } + +// self.index += 1; + +// return atom; +// } +// }; + +fn parseIntoAtom( + self: *Object, + allocator: Allocator, + local_sym_index: u32, + size: u64, + alignment: u32, + code: ?[]const u8, + relocs: []const macho.relocation_info, + nlists: []const NlistWithIndex, + match: MatchingSection, + macho_file: *MachO, +) !*Atom { + const sym = macho_file.locals.items[local_sym_index]; + const align_pow_2 = try math.powi(u32, 2, alignment); + const aligned_size = mem.alignForwardGeneric(u64, size, align_pow_2); + const atom = try macho_file.createEmptyAtom(local_sym_index, aligned_size, alignment); + + if (code) |cc| { + mem.copy(u8, atom.code.items, cc); + } + + try atom.parseRelocs(relocs, .{ + .base_addr = sym.n_value, + .allocator = allocator, + .object = self, + .macho_file = macho_file, + }); + + if (macho_file.has_dices) { + const dices = filterDice(self.data_in_code_entries, sym.n_value, sym.n_value + size); + try atom.dices.ensureTotalCapacity(allocator, dices.len); + + for (dices) |dice| { + atom.dices.appendAssumeCapacity(.{ + .offset = dice.offset - (math.cast(u32, sym.n_value) orelse return error.Overflow), + .length = dice.length, + .kind = dice.kind, + }); + } + } + + // Since this is atom gets a helper local temporary symbol that didn't exist + // in the object file which encompasses the entire section, we need traverse + // the filtered symbols and note which symbol is contained within so that + // we can properly allocate addresses down the line. + // While we're at it, we need to update segment,section mapping of each symbol too. + try atom.contained.ensureTotalCapacity(allocator, nlists.len); + + for (nlists) |nlist_with_index| { + const nlist = nlist_with_index.nlist; + const sym_index = self.symbol_mapping.get(nlist_with_index.index) orelse unreachable; + const this_sym = &macho_file.locals.items[sym_index]; + this_sym.n_sect = @intCast(u8, macho_file.section_ordinals.getIndex(match).? + 1); + + const stab: ?Atom.Stab = if (self.debug_info) |di| blk: { + // TODO there has to be a better to handle this. + for (di.inner.func_list.items) |func| { + if (func.pc_range) |range| { + if (nlist.n_value >= range.start and nlist.n_value < range.end) { + break :blk Atom.Stab{ + .function = range.end - range.start, + }; + } + } + } + // TODO + // if (zld.globals.contains(zld.getString(sym.strx))) break :blk .global; + break :blk .static; + } else null; + + atom.contained.appendAssumeCapacity(.{ + .local_sym_index = sym_index, + .offset = nlist.n_value - sym.n_value, + .stab = stab, + }); + } + + return atom; +} + fn parseSymtab(self: *Object) void { const index = self.symtab_cmd_index orelse return; const symtab = self.load_commands.items[index].symtab; @@ -563,7 +754,7 @@ fn parseSymtab(self: *Object) void { self.strtab = self.contents[symtab.stroff..][0..symtab.strsize]; } -pub fn parseDebugInfo(self: *Object, allocator: Allocator) !void { +fn parseDebugInfo(self: *Object, allocator: Allocator) !void { log.debug("parsing debug info in '{s}'", .{self.name}); var debug_info = blk: { @@ -595,7 +786,7 @@ pub fn parseDebugInfo(self: *Object, allocator: Allocator) !void { } } -pub fn parseDataInCode(self: *Object) void { +fn parseDataInCode(self: *Object) void { const index = self.data_in_code_cmd_index orelse return; const data_in_code = self.load_commands.items[index].linkedit_data; const raw_dice = self.contents[data_in_code.dataoff..][0..data_in_code.datasize]; From 03feea0fb200f273dd74bf778997e6a6bead86cc Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Mon, 4 Jul 2022 20:40:10 +0200 Subject: [PATCH 05/27] macho: split section into subsections if requested and/or possible --- src/link/MachO.zig | 291 ++++++++++++++++++++--- src/link/MachO/Atom.zig | 64 ++++- src/link/MachO/Object.zig | 388 +++++++++++++++---------------- test/link/macho/objcpp/build.zig | 1 + 4 files changed, 513 insertions(+), 231 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index c5ed6cb6ac..38624ae152 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -57,6 +57,8 @@ const SystemLib = struct { weak: bool = false, }; +const N_DESC_GCED: u16 = @bitCast(u16, @as(i16, -1)); + base: File, /// If this is not null, an object file is created by LLVM and linked with LLD afterwards. @@ -256,6 +258,8 @@ unnamed_const_atoms: UnnamedConstTable = .{}, /// TODO consolidate this. decls: std.AutoArrayHashMapUnmanaged(Module.Decl.Index, ?MatchingSection) = .{}, +gc_roots: std.AutoHashMapUnmanaged(*Atom, void) = .{}, + const Entry = struct { target: Atom.Relocation.Target, atom: *Atom, @@ -1165,6 +1169,8 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No const use_llvm = build_options.have_llvm and self.base.options.use_llvm; if (use_llvm or use_stage1) { + self.logAtoms(); + try self.gcAtoms(); try self.pruneAndSortSections(); try self.allocateSegments(); try self.allocateLocals(); @@ -1173,9 +1179,10 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No try self.allocateSpecialSymbols(); try self.allocateGlobals(); - if (build_options.enable_logging) { + if (build_options.enable_logging or true) { self.logSymtab(); self.logSectionOrdinals(); + self.logAtoms(); } if (use_llvm or use_stage1) { @@ -2177,6 +2184,7 @@ pub fn createEmptyAtom(self: *MachO, local_sym_index: u32, size: u64, alignment: try atom.code.resize(self.base.allocator, size_usize); mem.set(u8, atom.code.items, 0); + try self.atom_by_index_table.putNoClobber(self.base.allocator, local_sym_index, atom); try self.managed_atoms.append(self.base.allocator, atom); return atom; } @@ -3298,12 +3306,7 @@ fn resolveDyldStubBinder(self: *MachO) !void { const vaddr = try self.allocateAtom(atom, @sizeOf(u64), 8, match); log.debug("allocated {s} atom at 0x{x}", .{ self.getString(sym.n_strx), vaddr }); atom_sym.n_value = vaddr; - } else { - const seg = &self.load_commands.items[self.data_const_segment_cmd_index.?].segment; - const sect = &seg.sections.items[self.got_section_index.?]; - sect.size += atom.size; - try self.addAtomToSection(atom, match); - } + } else try self.addAtomToSection(atom, match); atom_sym.n_sect = @intCast(u8, self.section_ordinals.getIndex(match).? + 1); } @@ -3564,6 +3567,7 @@ pub fn deinit(self: *MachO) void { self.symbol_resolver.deinit(self.base.allocator); self.unresolved.deinit(self.base.allocator); self.tentatives.deinit(self.base.allocator); + self.gc_roots.deinit(self.base.allocator); for (self.objects.items) |*object| { object.deinit(self.base.allocator); @@ -3916,7 +3920,6 @@ pub fn lowerUnnamedConst(self: *MachO, typed_value: TypedValue, decl_index: Modu const required_alignment = typed_value.ty.abiAlignment(self.base.options.target); const local_sym_index = try self.allocateLocalSymbol(); const atom = try self.createEmptyAtom(local_sym_index, @sizeOf(u64), math.log2(required_alignment)); - try self.atom_by_index_table.putNoClobber(self.base.allocator, local_sym_index, atom); const res = try codegen.generateSymbol(&self.base, decl.srcLoc(), typed_value, &code_buffer, .none, .{ .parent_atom_index = local_sym_index, @@ -5597,7 +5600,7 @@ fn pruneAndSortSectionsInSegment(self: *MachO, maybe_seg_id: *?u16, indices: []* const old_idx = maybe_index.* orelse continue; const sect = sections[old_idx]; if (sect.size == 0) { - log.debug("pruning section {s},{s}", .{ sect.segName(), sect.sectName() }); + log.warn("pruning section {s},{s}", .{ sect.segName(), sect.sectName() }); maybe_index.* = null; seg.inner.cmdsize -= @sizeOf(macho.section_64); seg.inner.nsects -= 1; @@ -5630,7 +5633,7 @@ fn pruneAndSortSectionsInSegment(self: *MachO, maybe_seg_id: *?u16, indices: []* if (seg.inner.nsects == 0 and !mem.eql(u8, "__TEXT", seg.inner.segName())) { // Segment has now become empty, so mark it as such - log.debug("marking segment {s} as dead", .{seg.inner.segName()}); + log.warn("marking segment {s} as dead", .{seg.inner.segName()}); seg.inner.cmd = @intToEnum(macho.LC, 0); maybe_seg_id.* = null; } @@ -5712,6 +5715,189 @@ fn pruneAndSortSections(self: *MachO) !void { self.sections_order_dirty = false; } +fn gcAtoms(self: *MachO) !void { + const dead_strip = self.base.options.gc_sections orelse false; + if (!dead_strip) return; + + // Add all exports as GC roots + for (self.globals.items) |sym| { + if (sym.n_type == 0) continue; + const resolv = self.symbol_resolver.get(sym.n_strx).?; + assert(resolv.where == .global); + const gc_root = self.atom_by_index_table.get(resolv.local_sym_index) orelse { + log.warn("skipping {s}", .{self.getString(sym.n_strx)}); + continue; + }; + _ = try self.gc_roots.getOrPut(self.base.allocator, gc_root); + } + + // if (self.tlv_ptrs_section_index) |sect| { + // var atom = self.atoms.get(.{ + // .seg = self.data_segment_cmd_index.?, + // .sect = sect, + // }).?; + + // while (true) { + // _ = try self.gc_roots.getOrPut(self.base.allocator, atom); + + // if (atom.prev) |prev| { + // atom = prev; + // } else break; + // } + // } + + // Add any atom targeting an import as GC root + var atoms_it = self.atoms.iterator(); + while (atoms_it.next()) |entry| { + var atom = entry.value_ptr.*; + + while (true) { + for (atom.relocs.items) |rel| { + if ((try Atom.getTargetAtom(rel, self)) == null) switch (rel.target) { + .local => {}, + .global => |n_strx| { + const resolv = self.symbol_resolver.get(n_strx).?; + switch (resolv.where) { + .global => {}, + .undef => { + _ = try self.gc_roots.getOrPut(self.base.allocator, atom); + break; + }, + } + }, + }; + } + + if (atom.prev) |prev| { + atom = prev; + } else break; + } + } + + var stack = std.ArrayList(*Atom).init(self.base.allocator); + defer stack.deinit(); + try stack.ensureUnusedCapacity(self.gc_roots.count()); + + var retained = std.AutoHashMap(*Atom, void).init(self.base.allocator); + defer retained.deinit(); + try retained.ensureUnusedCapacity(self.gc_roots.count()); + + log.warn("GC roots:", .{}); + var gc_roots_it = self.gc_roots.keyIterator(); + while (gc_roots_it.next()) |gc_root| { + self.logAtom(gc_root.*); + + stack.appendAssumeCapacity(gc_root.*); + retained.putAssumeCapacityNoClobber(gc_root.*, {}); + } + + log.warn("walking tree...", .{}); + while (stack.popOrNull()) |source_atom| { + for (source_atom.relocs.items) |rel| { + if (try Atom.getTargetAtom(rel, self)) |target_atom| { + const gop = try retained.getOrPut(target_atom); + if (!gop.found_existing) { + log.warn(" RETAINED ATOM(%{d}) -> ATOM(%{d})", .{ + source_atom.local_sym_index, + target_atom.local_sym_index, + }); + try stack.append(target_atom); + } + } + } + } + + atoms_it = self.atoms.iterator(); + while (atoms_it.next()) |entry| { + const match = entry.key_ptr.*; + + if (self.text_segment_cmd_index) |seg| { + if (seg == match.seg) { + if (self.eh_frame_section_index) |sect| { + if (sect == match.sect) continue; + } + } + } + + if (self.data_segment_cmd_index) |seg| { + if (seg == match.seg) { + if (self.rustc_section_index) |sect| { + if (sect == match.sect) continue; + } + } + } + + const seg = &self.load_commands.items[match.seg].segment; + const sect = &seg.sections.items[match.sect]; + var atom = entry.value_ptr.*; + + log.warn("GCing atoms in {s},{s}", .{ sect.segName(), sect.sectName() }); + + while (true) { + const orig_prev = atom.prev; + + if (!retained.contains(atom)) { + // Dead atom; remove. + log.warn(" DEAD ATOM(%{d})", .{atom.local_sym_index}); + + const sym = &self.locals.items[atom.local_sym_index]; + sym.n_desc = N_DESC_GCED; + + if (self.symbol_resolver.getPtr(sym.n_strx)) |resolv| { + if (resolv.local_sym_index == atom.local_sym_index) { + const global = &self.globals.items[resolv.where_index]; + global.n_desc = N_DESC_GCED; + } + } + + for (self.got_entries.items) |got_entry| { + if (got_entry.atom == atom) { + _ = self.got_entries_table.swapRemove(got_entry.target); + break; + } + } + + for (self.stubs.items) |stub, i| { + if (stub == atom) { + _ = self.stubs_table.swapRemove(@intCast(u32, i)); + break; + } + } + + for (atom.contained.items) |sym_off| { + const inner = &self.locals.items[sym_off.local_sym_index]; + inner.n_desc = N_DESC_GCED; + + if (self.symbol_resolver.getPtr(inner.n_strx)) |resolv| { + if (resolv.local_sym_index == atom.local_sym_index) { + const global = &self.globals.items[resolv.where_index]; + global.n_desc = N_DESC_GCED; + } + } + } + + log.warn(" BEFORE size = {x}", .{sect.size}); + sect.size -= atom.size; + log.warn(" AFTER size = {x}", .{sect.size}); + if (atom.prev) |prev| { + prev.next = atom.next; + } + if (atom.next) |next| { + next.prev = atom.prev; + } else { + // TODO I think a null would be better here. + // The section will be GCed in the next step. + entry.value_ptr.* = if (atom.prev) |prev| prev else undefined; + } + } + + if (orig_prev) |prev| { + atom = prev; + } else break; + } + } +} + fn updateSectionOrdinals(self: *MachO) !void { if (!self.sections_order_dirty) return; @@ -5776,8 +5962,11 @@ fn writeDyldInfoData(self: *MachO) !void { } const seg = self.load_commands.items[match.seg].segment; + const sect = seg.sections.items[match.sect]; + log.warn("dyld info for {s},{s}", .{ sect.segName(), sect.sectName() }); while (true) { + log.warn(" ATOM %{d}", .{atom.local_sym_index}); const sym = self.locals.items[atom.local_sym_index]; const base_offset = sym.n_value - seg.inner.vmaddr; @@ -6217,10 +6406,19 @@ fn writeSymbolTable(self: *MachO) !void { for (self.locals.items) |sym| { if (sym.n_strx == 0) continue; + if (sym.n_desc == N_DESC_GCED) continue; if (self.symbol_resolver.get(sym.n_strx)) |_| continue; try locals.append(sym); } + var globals = std.ArrayList(macho.nlist_64).init(self.base.allocator); + defer globals.deinit(); + + for (self.globals.items) |sym| { + if (sym.n_desc == N_DESC_GCED) continue; + try globals.append(sym); + } + // TODO How do we handle null global symbols in incremental context? var undefs = std.ArrayList(macho.nlist_64).init(self.base.allocator); defer undefs.deinit(); @@ -6291,7 +6489,7 @@ fn writeSymbolTable(self: *MachO) !void { } const nlocals = locals.items.len; - const nexports = self.globals.items.len; + const nexports = globals.items.len; const nundefs = undefs.items.len; const locals_off = symtab.symoff; @@ -6302,7 +6500,7 @@ fn writeSymbolTable(self: *MachO) !void { const exports_off = locals_off + locals_size; const exports_size = nexports * @sizeOf(macho.nlist_64); log.debug("writing exported symbols from 0x{x} to 0x{x}", .{ exports_off, exports_size + exports_off }); - try self.base.file.?.pwriteAll(mem.sliceAsBytes(self.globals.items), exports_off); + try self.base.file.?.pwriteAll(mem.sliceAsBytes(globals.items), exports_off); const undefs_off = exports_off + exports_size; const undefs_size = nundefs * @sizeOf(macho.nlist_64); @@ -6898,55 +7096,55 @@ fn snapshotState(self: *MachO) !void { } fn logSymtab(self: MachO) void { - log.debug("locals:", .{}); + log.warn("locals:", .{}); for (self.locals.items) |sym, id| { - log.debug(" {d}: {s}: @{x} in {d}", .{ id, self.getString(sym.n_strx), sym.n_value, sym.n_sect }); + log.warn(" {d}: {s}: @{x} in {d}", .{ id, self.getString(sym.n_strx), sym.n_value, sym.n_sect }); } - log.debug("globals:", .{}); + log.warn("globals:", .{}); for (self.globals.items) |sym, id| { - log.debug(" {d}: {s}: @{x} in {d}", .{ id, self.getString(sym.n_strx), sym.n_value, sym.n_sect }); + log.warn(" {d}: {s}: @{x} in {d}", .{ id, self.getString(sym.n_strx), sym.n_value, sym.n_sect }); } - log.debug("undefs:", .{}); + log.warn("undefs:", .{}); for (self.undefs.items) |sym, id| { - log.debug(" {d}: {s}: in {d}", .{ id, self.getString(sym.n_strx), sym.n_desc }); + log.warn(" {d}: {s}: in {d}", .{ id, self.getString(sym.n_strx), sym.n_desc }); } { - log.debug("resolver:", .{}); + log.warn("resolver:", .{}); var it = self.symbol_resolver.iterator(); while (it.next()) |entry| { - log.debug(" {s} => {}", .{ self.getString(entry.key_ptr.*), entry.value_ptr.* }); + log.warn(" {s} => {}", .{ self.getString(entry.key_ptr.*), entry.value_ptr.* }); } } - log.debug("GOT entries:", .{}); + log.warn("GOT entries:", .{}); for (self.got_entries_table.values()) |value| { const key = self.got_entries.items[value].target; const atom = self.got_entries.items[value].atom; const n_value = self.locals.items[atom.local_sym_index].n_value; switch (key) { - .local => |ndx| log.debug(" {d}: @{x}", .{ ndx, n_value }), - .global => |n_strx| log.debug(" {s}: @{x}", .{ self.getString(n_strx), n_value }), + .local => |ndx| log.warn(" {d}: @{x}", .{ ndx, n_value }), + .global => |n_strx| log.warn(" {s}: @{x}", .{ self.getString(n_strx), n_value }), } } - log.debug("__thread_ptrs entries:", .{}); + log.warn("__thread_ptrs entries:", .{}); for (self.tlv_ptr_entries_table.values()) |value| { const key = self.tlv_ptr_entries.items[value].target; const atom = self.tlv_ptr_entries.items[value].atom; const n_value = self.locals.items[atom.local_sym_index].n_value; assert(key == .global); - log.debug(" {s}: @{x}", .{ self.getString(key.global), n_value }); + log.warn(" {s}: @{x}", .{ self.getString(key.global), n_value }); } - log.debug("stubs:", .{}); + log.warn("stubs:", .{}); for (self.stubs_table.keys()) |key| { const value = self.stubs_table.get(key).?; const atom = self.stubs.items[value]; const sym = self.locals.items[atom.local_sym_index]; - log.debug(" {s}: @{x}", .{ self.getString(key), sym.n_value }); + log.warn(" {s}: @{x}", .{ self.getString(key), sym.n_value }); } } @@ -6964,6 +7162,45 @@ fn logSectionOrdinals(self: MachO) void { } } +fn logAtoms(self: MachO) void { + log.warn("atoms:", .{}); + var it = self.atoms.iterator(); + while (it.next()) |entry| { + const match = entry.key_ptr.*; + var atom = entry.value_ptr.*; + + while (atom.prev) |prev| { + atom = prev; + } + + const seg = self.load_commands.items[match.seg].segment; + const sect = seg.sections.items[match.sect]; + log.warn("{s},{s}", .{ sect.segName(), sect.sectName() }); + + while (true) { + self.logAtom(atom); + + if (atom.next) |next| { + atom = next; + } else break; + } + } +} + +fn logAtom(self: MachO, atom: *const Atom) void { + const sym = self.locals.items[atom.local_sym_index]; + log.warn(" ATOM(%{d}) @ {x}", .{ atom.local_sym_index, sym.n_value }); + + for (atom.contained.items) |sym_off| { + const inner_sym = self.locals.items[sym_off.local_sym_index]; + log.warn(" %{d} ('{s}') @ {x}", .{ + sym_off.local_sym_index, + self.getString(inner_sym.n_strx), + inner_sym.n_value, + }); + } +} + /// Since `os.copy_file_range` cannot be used when copying overlapping ranges within the same file, /// and since `File.copyRangeAll` uses `os.copy_file_range` under-the-hood, we use heap allocated /// buffers on all hosts except Linux (if `copy_file_range` syscall is available). diff --git a/src/link/MachO/Atom.zig b/src/link/MachO/Atom.zig index d7c595dbba..e6adb0cc1c 100644 --- a/src/link/MachO/Atom.zig +++ b/src/link/MachO/Atom.zig @@ -236,6 +236,7 @@ pub fn freeListEligible(self: Atom, macho_file: MachO) bool { const RelocContext = struct { base_addr: u64 = 0, + base_offset: i32 = 0, allocator: Allocator, object: *Object, macho_file: *MachO, @@ -366,7 +367,7 @@ pub fn parseRelocs(self: *Atom, relocs: []const macho.relocation_info, context: ) orelse unreachable; break :target Relocation.Target{ .global = n_strx }; }; - const offset = @intCast(u32, rel.r_address); + const offset = @intCast(u32, rel.r_address - context.base_offset); switch (arch) { .aarch64 => { @@ -487,7 +488,7 @@ fn addPtrBindingOrRebase( .global => |n_strx| { try self.bindings.append(context.allocator, .{ .n_strx = n_strx, - .offset = @intCast(u32, rel.r_address), + .offset = @intCast(u32, rel.r_address - context.base_offset), }); }, .local => { @@ -529,7 +530,10 @@ fn addPtrBindingOrRebase( }; if (should_rebase) { - try self.rebases.append(context.allocator, @intCast(u32, rel.r_address)); + try self.rebases.append( + context.allocator, + @intCast(u32, rel.r_address - context.base_offset), + ); } }, } @@ -650,6 +654,60 @@ fn addStub(target: Relocation.Target, context: RelocContext) !void { context.macho_file.stubs.items[stub_index] = atom; } +pub fn getTargetAtom(rel: Relocation, macho_file: *MachO) !?*Atom { + const is_via_got = got: { + switch (macho_file.base.options.target.cpu.arch) { + .aarch64 => break :got switch (@intToEnum(macho.reloc_type_arm64, rel.@"type")) { + .ARM64_RELOC_GOT_LOAD_PAGE21, + .ARM64_RELOC_GOT_LOAD_PAGEOFF12, + .ARM64_RELOC_POINTER_TO_GOT, + => true, + else => false, + }, + .x86_64 => break :got switch (@intToEnum(macho.reloc_type_x86_64, rel.@"type")) { + .X86_64_RELOC_GOT, .X86_64_RELOC_GOT_LOAD => true, + else => false, + }, + else => unreachable, + } + }; + + if (is_via_got) { + const got_index = macho_file.got_entries_table.get(rel.target) orelse { + log.err("expected GOT entry for symbol", .{}); + switch (rel.target) { + .local => |sym_index| log.err(" local @{d}", .{sym_index}), + .global => |n_strx| log.err(" global @'{s}'", .{macho_file.getString(n_strx)}), + } + log.err(" this is an internal linker error", .{}); + return error.FailedToResolveRelocationTarget; + }; + return macho_file.got_entries.items[got_index].atom; + } + + switch (rel.target) { + .local => |sym_index| { + return macho_file.atom_by_index_table.get(sym_index); + }, + .global => |n_strx| { + const resolv = macho_file.symbol_resolver.get(n_strx).?; + switch (resolv.where) { + .global => return macho_file.atom_by_index_table.get(resolv.local_sym_index), + .undef => { + if (macho_file.stubs_table.get(n_strx)) |stub_index| { + return macho_file.stubs.items[stub_index]; + } else { + if (macho_file.tlv_ptr_entries_table.get(rel.target)) |tlv_ptr_index| { + return macho_file.tlv_ptr_entries.items[tlv_ptr_index].atom; + } + return null; + } + }, + } + }, + } +} + pub fn resolveRelocs(self: *Atom, macho_file: *MachO) !void { const tracy = trace(@src()); defer tracy.end(); diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index f01f366fdd..305ae25791 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -176,6 +176,13 @@ pub fn free(self: *Object, allocator: Allocator, macho_file: *MachO) void { .n_desc = 0, .n_value = 0, }; + _ = macho_file.atom_by_index_table.remove(atom.local_sym_index); + _ = macho_file.gc_roots.remove(atom); + + for (atom.contained.items) |sym_off| { + _ = macho_file.atom_by_index_table.remove(sym_off.local_sym_index); + } + atom.local_sym_index = 0; } if (atom == last_atom) { @@ -346,7 +353,7 @@ const NlistWithIndex = struct { } } - fn filterInSection(symbols: []NlistWithIndex, sect: macho.section_64) []NlistWithIndex { + fn filterByAddress(symbols: []NlistWithIndex, start_addr: u64, end_addr: u64) []NlistWithIndex { const Predicate = struct { addr: u64, @@ -355,13 +362,36 @@ const NlistWithIndex = struct { } }; - const start = MachO.findFirst(NlistWithIndex, symbols, 0, Predicate{ .addr = sect.addr }); - const end = MachO.findFirst(NlistWithIndex, symbols, start, Predicate{ .addr = sect.addr + sect.size }); + const start = MachO.findFirst(NlistWithIndex, symbols, 0, Predicate{ + .addr = start_addr, + }); + const end = MachO.findFirst(NlistWithIndex, symbols, start, Predicate{ + .addr = end_addr, + }); return symbols[start..end]; } }; +fn filterRelocs( + relocs: []const macho.relocation_info, + start_addr: u64, + end_addr: u64, +) []const macho.relocation_info { + const Predicate = struct { + addr: u64, + + pub fn predicate(self: @This(), rel: macho.relocation_info) bool { + return rel.r_address < self.addr; + } + }; + + const start = MachO.findFirst(macho.relocation_info, relocs, 0, Predicate{ .addr = end_addr }); + const end = MachO.findFirst(macho.relocation_info, relocs, start, Predicate{ .addr = start_addr }); + + return relocs[start..end]; +} + fn filterDice( dices: []const macho.data_in_code_entry, start_addr: u64, @@ -422,16 +452,13 @@ pub fn parseIntoAtoms(self: *Object, allocator: Allocator, macho_file: *MachO) ! // We only care about defined symbols, so filter every other out. const sorted_nlists = sorted_all_nlists.items[0..iundefsym]; - const dead_strip = blk: { - const dead_strip = macho_file.base.options.gc_sections orelse break :blk false; - if (dead_strip or macho_file.base.options.optimize_mode != .Debug) - break :blk self.header.flags & macho.MH_SUBSECTIONS_VIA_SYMBOLS != 0; - break :blk false; - }; + const dead_strip = macho_file.base.options.gc_sections orelse false; + const subsections_via_symbols = self.header.flags & macho.MH_SUBSECTIONS_VIA_SYMBOLS != 0 and + (macho_file.base.options.optimize_mode != .Debug or dead_strip); for (seg.sections.items) |sect, id| { const sect_id = @intCast(u8, id); - log.debug("putting section '{s},{s}' as an Atom", .{ sect.segName(), sect.sectName() }); + log.debug("parsing section '{s},{s}' into Atoms", .{ sect.segName(), sect.sectName() }); // Get matching segment/section in the final artifact. const match = (try macho_file.getMatchingSection(sect)) orelse { @@ -455,7 +482,11 @@ pub fn parseIntoAtoms(self: *Object, allocator: Allocator, macho_file: *MachO) ! ); // Symbols within this section only. - const filtered_nlists = NlistWithIndex.filterInSection(sorted_nlists, sect); + const filtered_nlists = NlistWithIndex.filterByAddress( + sorted_nlists, + sect.addr, + sect.addr + sect.size, + ); macho_file.has_dices = macho_file.has_dices or blk: { if (self.text_section_index) |index| { @@ -467,204 +498,123 @@ pub fn parseIntoAtoms(self: *Object, allocator: Allocator, macho_file: *MachO) ! }; macho_file.has_stabs = macho_file.has_stabs or self.debug_info != null; - if (dead_strip) blk: { - if (filtered_nlists.len == 0) break :blk; // nothing to split - + if (subsections_via_symbols and filtered_nlists.len > 0) { // If the first nlist does not match the start of the section, // then we need to encapsulate the memory range [section start, first symbol) // as a temporary symbol and insert the matching Atom. const first_nlist = filtered_nlists[0].nlist; - if (first_nlist.n_value > sect.addr) {} - } + if (first_nlist.n_value > sect.addr) { + const local_sym_index = self.sections_as_symbols.get(sect_id) orelse blk: { + const local_sym_index = @intCast(u32, macho_file.locals.items.len); + try macho_file.locals.append(allocator, .{ + .n_strx = 0, + .n_type = macho.N_SECT, + .n_sect = @intCast(u8, macho_file.section_ordinals.getIndex(match).? + 1), + .n_desc = 0, + .n_value = sect.addr, + }); + try self.sections_as_symbols.putNoClobber(allocator, sect_id, local_sym_index); + break :blk local_sym_index; + }; + const atom_size = first_nlist.n_value - sect.addr; + const atom_code: ?[]const u8 = if (code) |cc| + cc[0..atom_size] + else + null; + try self.parseIntoAtom( + allocator, + local_sym_index, + atom_size, + sect.@"align", + atom_code, + relocs, + &.{}, + match, + sect, + macho_file, + ); + } - // If there is no symbol to refer to this atom, we create - // a temp one, unless we already did that when working out the relocations - // of other atoms. - const local_sym_index = self.sections_as_symbols.get(sect_id) orelse blk: { - const local_sym_index = @intCast(u32, macho_file.locals.items.len); - try macho_file.locals.append(allocator, .{ - .n_strx = 0, - .n_type = macho.N_SECT, - .n_sect = @intCast(u8, macho_file.section_ordinals.getIndex(match).? + 1), - .n_desc = 0, - .n_value = sect.addr, - }); - try self.sections_as_symbols.putNoClobber(allocator, sect_id, local_sym_index); - break :blk local_sym_index; - }; - const atom = try self.parseIntoAtom( - allocator, - local_sym_index, - sect.size, - sect.@"align", - code, - relocs, - filtered_nlists, - match, - macho_file, - ); + var next_nlist_count: usize = 0; + while (next_nlist_count < filtered_nlists.len) { + const next_nlist = filtered_nlists[next_nlist_count]; + const addr = next_nlist.nlist.n_value; + const atom_nlists = NlistWithIndex.filterByAddress( + filtered_nlists[next_nlist_count..], + addr, + addr + 1, + ); + next_nlist_count += atom_nlists.len; - if (!self.start_atoms.contains(match)) { - try self.start_atoms.putNoClobber(allocator, match, atom); - } + const local_sym_index = @intCast(u32, macho_file.locals.items.len); + try macho_file.locals.append(allocator, .{ + .n_strx = 0, + .n_type = macho.N_SECT, + .n_sect = @intCast(u8, macho_file.section_ordinals.getIndex(match).? + 1), + .n_desc = 0, + .n_value = addr, + }); - if (self.end_atoms.getPtr(match)) |last| { - last.*.next = atom; - atom.prev = last.*; - last.* = atom; + const atom_size = blk: { + const end_addr = if (next_nlist_count < filtered_nlists.len) + filtered_nlists[next_nlist_count].nlist.n_value + else + sect.addr + sect.size; + break :blk end_addr - addr; + }; + const atom_code: ?[]const u8 = if (code) |cc| + cc[addr - sect.addr ..][0..atom_size] + else + null; + const atom_align = if (addr > 0) + math.min(@ctz(u64, addr), sect.@"align") + else + sect.@"align"; + try self.parseIntoAtom( + allocator, + local_sym_index, + atom_size, + atom_align, + atom_code, + relocs, + atom_nlists, + match, + sect, + macho_file, + ); + } } else { - try self.end_atoms.putNoClobber(allocator, match, atom); + // If there is no symbol to refer to this atom, we create + // a temp one, unless we already did that when working out the relocations + // of other atoms. + const local_sym_index = self.sections_as_symbols.get(sect_id) orelse blk: { + const local_sym_index = @intCast(u32, macho_file.locals.items.len); + try macho_file.locals.append(allocator, .{ + .n_strx = 0, + .n_type = macho.N_SECT, + .n_sect = @intCast(u8, macho_file.section_ordinals.getIndex(match).? + 1), + .n_desc = 0, + .n_value = sect.addr, + }); + try self.sections_as_symbols.putNoClobber(allocator, sect_id, local_sym_index); + break :blk local_sym_index; + }; + try self.parseIntoAtom( + allocator, + local_sym_index, + sect.size, + sect.@"align", + code, + relocs, + filtered_nlists, + match, + sect, + macho_file, + ); } - try self.contained_atoms.append(allocator, atom); } } -// const Context = struct { -// allocator: *Allocator, -// object: *Object, -// macho_file: *MachO, -// match: MachO.MatchingSection, -// }; - -// const AtomParser = struct { -// section: macho.section_64, -// code: []u8, -// relocs: []macho.relocation_info, -// nlists: []NlistWithIndex, -// index: u32 = 0, - -// fn peek(self: AtomParser) ?NlistWithIndex { -// return if (self.index + 1 < self.nlists.len) self.nlists[self.index + 1] else null; -// } - -// fn lessThanBySeniority(context: Context, lhs: NlistWithIndex, rhs: NlistWithIndex) bool { -// if (!MachO.symbolIsExt(rhs.nlist)) { -// return MachO.symbolIsTemp(lhs.nlist, context.object.getString(lhs.nlist.n_strx)); -// } else if (MachO.symbolIsPext(rhs.nlist) or MachO.symbolIsWeakDef(rhs.nlist)) { -// return !MachO.symbolIsExt(lhs.nlist); -// } else { -// return false; -// } -// } - -// pub fn next(self: *AtomParser, context: Context) !?*Atom { -// if (self.index == self.nlists.len) return null; - -// const tracy = trace(@src()); -// defer tracy.end(); - -// var aliases = std.ArrayList(NlistWithIndex).init(context.allocator); -// defer aliases.deinit(); - -// const next_nlist: ?NlistWithIndex = blk: while (true) { -// const curr_nlist = self.nlists[self.index]; -// try aliases.append(curr_nlist); - -// if (self.peek()) |next_nlist| { -// if (curr_nlist.nlist.n_value == next_nlist.nlist.n_value) { -// self.index += 1; -// continue; -// } -// break :blk next_nlist; -// } -// break :blk null; -// } else null; - -// for (aliases.items) |*nlist_with_index| { -// nlist_with_index.index = context.object.symbol_mapping.get(nlist_with_index.index) orelse unreachable; -// } - -// if (aliases.items.len > 1) { -// // Bubble-up senior symbol as the main link to the atom. -// sort.sort( -// NlistWithIndex, -// aliases.items, -// context, -// AtomParser.lessThanBySeniority, -// ); -// } - -// const senior_nlist = aliases.pop(); -// const senior_sym = &context.macho_file.locals.items[senior_nlist.index]; -// senior_sym.n_sect = @intCast(u8, context.macho_file.section_ordinals.getIndex(context.match).? + 1); - -// const start_addr = senior_nlist.nlist.n_value - self.section.addr; -// const end_addr = if (next_nlist) |n| n.nlist.n_value - self.section.addr else self.section.size; - -// const code = self.code[start_addr..end_addr]; -// const size = code.len; - -// const max_align = self.section.@"align"; -// const actual_align = if (senior_nlist.nlist.n_value > 0) -// math.min(@ctz(u64, senior_nlist.nlist.n_value), max_align) -// else -// max_align; - -// const stab: ?Atom.Stab = if (context.object.debug_info) |di| blk: { -// // TODO there has to be a better to handle this. -// for (di.inner.func_list.items) |func| { -// if (func.pc_range) |range| { -// if (senior_nlist.nlist.n_value >= range.start and senior_nlist.nlist.n_value < range.end) { -// break :blk Atom.Stab{ -// .function = range.end - range.start, -// }; -// } -// } -// } -// // TODO -// // if (self.macho_file.globals.contains(self.macho_file.getString(senior_sym.strx))) break :blk .global; -// break :blk .static; -// } else null; - -// const atom = try context.macho_file.createEmptyAtom(senior_nlist.index, size, actual_align); -// atom.stab = stab; - -// const is_zerofill = blk: { -// const section_type = commands.sectionType(self.section); -// break :blk section_type == macho.S_ZEROFILL or section_type == macho.S_THREAD_LOCAL_ZEROFILL; -// }; -// if (!is_zerofill) { -// mem.copy(u8, atom.code.items, code); -// } - -// try atom.aliases.ensureTotalCapacity(context.allocator, aliases.items.len); -// for (aliases.items) |alias| { -// atom.aliases.appendAssumeCapacity(alias.index); -// const sym = &context.macho_file.locals.items[alias.index]; -// sym.n_sect = @intCast(u8, context.macho_file.section_ordinals.getIndex(context.match).? + 1); -// } - -// try atom.parseRelocs(self.relocs, .{ -// .base_addr = self.section.addr, -// .base_offset = start_addr, -// .allocator = context.allocator, -// .object = context.object, -// .macho_file = context.macho_file, -// }); - -// if (context.macho_file.has_dices) { -// const dices = filterDice( -// context.object.data_in_code_entries.items, -// senior_nlist.nlist.n_value, -// senior_nlist.nlist.n_value + size, -// ); -// try atom.dices.ensureTotalCapacity(context.allocator, dices.len); - -// for (dices) |dice| { -// atom.dices.appendAssumeCapacity(.{ -// .offset = dice.offset - try math.cast(u32, senior_nlist.nlist.n_value), -// .length = dice.length, -// .kind = dice.kind, -// }); -// } -// } - -// self.index += 1; - -// return atom; -// } -// }; - fn parseIntoAtom( self: *Object, allocator: Allocator, @@ -675,8 +625,9 @@ fn parseIntoAtom( relocs: []const macho.relocation_info, nlists: []const NlistWithIndex, match: MatchingSection, + sect: macho.section_64, macho_file: *MachO, -) !*Atom { +) !void { const sym = macho_file.locals.items[local_sym_index]; const align_pow_2 = try math.powi(u32, 2, alignment); const aligned_size = mem.alignForwardGeneric(u64, size, align_pow_2); @@ -686,8 +637,11 @@ fn parseIntoAtom( mem.copy(u8, atom.code.items, cc); } - try atom.parseRelocs(relocs, .{ - .base_addr = sym.n_value, + const base_offset = sym.n_value - sect.addr; + const filtered_relocs = filterRelocs(relocs, base_offset, base_offset + size); + try atom.parseRelocs(filtered_relocs, .{ + .base_addr = sect.addr, + .base_offset = @intCast(i32, base_offset), .allocator = allocator, .object = self, .macho_file = macho_file, @@ -740,9 +694,41 @@ fn parseIntoAtom( .offset = nlist.n_value - sym.n_value, .stab = stab, }); + + try macho_file.atom_by_index_table.putNoClobber(allocator, sym_index, atom); } - return atom; + const is_gc_root = blk: { + if (sect.isDontDeadStrip()) break :blk true; + if (sect.isDontDeadStripIfReferencesLive()) { + // TODO if isDontDeadStripIfReferencesLive we should analyse the edges + // before making it a GC root + break :blk true; + } + if (mem.eql(u8, "__StaticInit", sect.sectName())) break :blk true; + switch (sect.type_()) { + macho.S_MOD_INIT_FUNC_POINTERS, + macho.S_MOD_TERM_FUNC_POINTERS, + => break :blk true, + else => break :blk false, + } + }; + if (is_gc_root) { + try macho_file.gc_roots.putNoClobber(allocator, atom, {}); + } + + if (!self.start_atoms.contains(match)) { + try self.start_atoms.putNoClobber(allocator, match, atom); + } + + if (self.end_atoms.getPtr(match)) |last| { + last.*.next = atom; + atom.prev = last.*; + last.* = atom; + } else { + try self.end_atoms.putNoClobber(allocator, match, atom); + } + try self.contained_atoms.append(allocator, atom); } fn parseSymtab(self: *Object) void { diff --git a/test/link/macho/objcpp/build.zig b/test/link/macho/objcpp/build.zig index 767578e225..6b9047fbf9 100644 --- a/test/link/macho/objcpp/build.zig +++ b/test/link/macho/objcpp/build.zig @@ -16,6 +16,7 @@ pub fn build(b: *Builder) void { // TODO when we figure out how to ship framework stubs for cross-compilation, // populate paths to the sysroot here. exe.linkFramework("Foundation"); + exe.link_gc_sections = true; const run_cmd = exe.run(); run_cmd.expectStdOutEqual("Hello from C++ and Zig"); From 843701d0feb683810f6be3cb5d6406eddb5539d0 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Wed, 6 Jul 2022 08:41:13 +0200 Subject: [PATCH 06/27] macho: remove unused fields from Atom --- src/link/MachO.zig | 37 +++++++------------------------------ src/link/MachO/Atom.zig | 11 ----------- 2 files changed, 7 insertions(+), 41 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 38624ae152..33fdbafde0 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -2230,13 +2230,6 @@ fn allocateLocals(self: *MachO) !void { base_vaddr, }); - // Update each alias (if any) - for (atom.aliases.items) |index| { - const alias_sym = &self.locals.items[index]; - alias_sym.n_value = base_vaddr; - alias_sym.n_sect = n_sect; - } - // Update each symbol contained within the atom for (atom.contained.items) |sym_at_off| { const contained_sym = &self.locals.items[sym_at_off.local_sym_index]; @@ -2260,11 +2253,6 @@ fn shiftLocalsByOffset(self: *MachO, match: MatchingSection, offset: i64) !void const atom_sym = &self.locals.items[atom.local_sym_index]; atom_sym.n_value = @intCast(u64, @intCast(i64, atom_sym.n_value) + offset); - for (atom.aliases.items) |index| { - const alias_sym = &self.locals.items[index]; - alias_sym.n_value = @intCast(u64, @intCast(i64, alias_sym.n_value) + offset); - } - for (atom.contained.items) |sym_at_off| { const contained_sym = &self.locals.items[sym_at_off.local_sym_index]; contained_sym.n_value = @intCast(u64, @intCast(i64, contained_sym.n_value) + offset); @@ -3463,13 +3451,6 @@ fn parseObjectsIntoAtoms(self: *MachO) !void { atom.alignment, }); - // Update each alias (if any) - for (atom.aliases.items) |index| { - const alias_sym = &self.locals.items[index]; - alias_sym.n_value = base_vaddr; - alias_sym.n_sect = n_sect; - } - // Update each symbol contained within the atom for (atom.contained.items) |sym_at_off| { const contained_sym = &self.locals.items[sym_at_off.local_sym_index]; @@ -6463,17 +6444,11 @@ fn writeSymbolTable(self: *MachO) !void { }); for (object.contained_atoms.items) |atom| { - if (atom.stab) |stab| { - const nlists = try stab.asNlists(atom.local_sym_index, self); + for (atom.contained.items) |sym_at_off| { + const stab = sym_at_off.stab orelse continue; + const nlists = try stab.asNlists(sym_at_off.local_sym_index, self); defer self.base.allocator.free(nlists); try locals.appendSlice(nlists); - } else { - for (atom.contained.items) |sym_at_off| { - const stab = sym_at_off.stab orelse continue; - const nlists = try stab.asNlists(sym_at_off.local_sym_index, self); - defer self.base.allocator.free(nlists); - try locals.appendSlice(nlists); - } } } @@ -6929,8 +6904,10 @@ fn snapshotState(self: *MachO) !void { }; var aliases = std.ArrayList([]const u8).init(arena); - for (atom.aliases.items) |loc| { - try aliases.append(self.getString(self.locals.items[loc].n_strx)); + for (atom.contained.items) |sym_off| { + if (sym_off.offset == 0) { + try aliases.append(self.getString(self.locals.items[sym_off.local_sym_index].n_strx)); + } } node.payload.aliases = aliases.toOwnedSlice(); try nodes.append(node); diff --git a/src/link/MachO/Atom.zig b/src/link/MachO/Atom.zig index e6adb0cc1c..177d5419fb 100644 --- a/src/link/MachO/Atom.zig +++ b/src/link/MachO/Atom.zig @@ -26,9 +26,6 @@ const StringIndexAdapter = std.hash_map.StringIndexAdapter; /// offset table entry. local_sym_index: u32, -/// List of symbol aliases pointing to the same atom via different nlists -aliases: std.ArrayListUnmanaged(u32) = .{}, - /// List of symbols contained within this atom contained: std.ArrayListUnmanaged(SymbolAtOffset) = .{}, @@ -62,12 +59,6 @@ lazy_bindings: std.ArrayListUnmanaged(Binding) = .{}, /// List of data-in-code entries. This is currently specific to x86_64 only. dices: std.ArrayListUnmanaged(macho.data_in_code_entry) = .{}, -/// Stab entry for this atom. This is currently specific to a binary created -/// by linking object files in a traditional sense - in incremental sense, we -/// bypass stabs altogether to produce dSYM bundle directly with fully relocated -/// DWARF sections. -stab: ?Stab = null, - /// Points to the previous and next neighbours next: ?*Atom, prev: ?*Atom, @@ -192,7 +183,6 @@ pub fn deinit(self: *Atom, allocator: Allocator) void { self.rebases.deinit(allocator); self.relocs.deinit(allocator); self.contained.deinit(allocator); - self.aliases.deinit(allocator); self.code.deinit(allocator); } @@ -203,7 +193,6 @@ pub fn clearRetainingCapacity(self: *Atom) void { self.rebases.clearRetainingCapacity(); self.relocs.clearRetainingCapacity(); self.contained.clearRetainingCapacity(); - self.aliases.clearRetainingCapacity(); self.code.clearRetainingCapacity(); } From 9eb7e5182b963366da9415ff7efe7c0fa5b1ad62 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Wed, 6 Jul 2022 17:11:39 +0200 Subject: [PATCH 07/27] macho: rework symbol handling to match zld/ELF Now, each object file will store a mutable table of symbols that it defines. Upon symbol resolution between object files, the symbol will be updated with a globally allocated section ordinal and address in virtual memory. If the object defines a globally available symbol, its location only (comprising of the symbol index and object index) will be stored in the globals map for easy access when relocating, etc. This approach cleans up the symbol management significantly, and matches the status quo used in zld/ELF. Additionally, this makes scoping symbol stabs easier too as they are now naturally contained within each object file. --- src/arch/aarch64/CodeGen.zig | 16 +- src/arch/aarch64/Emit.zig | 7 +- src/arch/aarch64/Mir.zig | 2 +- src/arch/riscv64/CodeGen.zig | 2 +- src/arch/x86_64/CodeGen.zig | 14 +- src/arch/x86_64/Emit.zig | 5 +- src/arch/x86_64/Mir.zig | 4 +- src/link.zig | 7 +- src/link/MachO.zig | 3298 ++++++++++++++----------------- src/link/MachO/Atom.zig | 639 +++--- src/link/MachO/DebugSymbols.zig | 59 +- src/link/MachO/Object.zig | 445 ++--- src/link/strtab.zig | 113 ++ 13 files changed, 2154 insertions(+), 2457 deletions(-) create mode 100644 src/link/strtab.zig diff --git a/src/arch/aarch64/CodeGen.zig b/src/arch/aarch64/CodeGen.zig index 64d49f2508..f65e19d561 100644 --- a/src/arch/aarch64/CodeGen.zig +++ b/src/arch/aarch64/CodeGen.zig @@ -3174,7 +3174,7 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallOptions. const func = func_payload.data; const fn_owner_decl = mod.declPtr(func.owner_decl); try self.genSetReg(Type.initTag(.u64), .x30, .{ - .got_load = fn_owner_decl.link.macho.local_sym_index, + .got_load = fn_owner_decl.link.macho.sym_index, }); // blr x30 _ = try self.addInst(.{ @@ -3190,14 +3190,14 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallOptions. lib_name, }); } - const n_strx = try macho_file.getGlobalSymbol(mem.sliceTo(decl_name, 0)); + const global_index = try macho_file.getGlobalSymbol(mem.sliceTo(decl_name, 0)); _ = try self.addInst(.{ .tag = .call_extern, .data = .{ .extern_fn = .{ - .atom_index = mod.declPtr(self.mod_fn.owner_decl).link.macho.local_sym_index, - .sym_name = n_strx, + .atom_index = mod.declPtr(self.mod_fn.owner_decl).link.macho.sym_index, + .global_index = global_index, }, }, }); @@ -4157,7 +4157,7 @@ fn genSetStack(self: *Self, ty: Type, stack_offset: u32, mcv: MCValue) InnerErro .data = .{ .payload = try self.addExtra(Mir.LoadMemoryPie{ .register = @enumToInt(src_reg), - .atom_index = mod.declPtr(self.mod_fn.owner_decl).link.macho.local_sym_index, + .atom_index = mod.declPtr(self.mod_fn.owner_decl).link.macho.sym_index, .sym_index = sym_index, }), }, @@ -4270,7 +4270,7 @@ fn genSetReg(self: *Self, ty: Type, reg: Register, mcv: MCValue) InnerError!void .data = .{ .payload = try self.addExtra(Mir.LoadMemoryPie{ .register = @enumToInt(reg), - .atom_index = mod.declPtr(self.mod_fn.owner_decl).link.macho.local_sym_index, + .atom_index = mod.declPtr(self.mod_fn.owner_decl).link.macho.sym_index, .sym_index = sym_index, }), }, @@ -4578,8 +4578,8 @@ fn lowerDeclRef(self: *Self, tv: TypedValue, decl_index: Module.Decl.Index) Inne } else if (self.bin_file.cast(link.File.MachO)) |_| { // Because MachO is PIE-always-on, we defer memory address resolution until // the linker has enough info to perform relocations. - assert(decl.link.macho.local_sym_index != 0); - return MCValue{ .got_load = decl.link.macho.local_sym_index }; + assert(decl.link.macho.sym_index != 0); + return MCValue{ .got_load = decl.link.macho.sym_index }; } else if (self.bin_file.cast(link.File.Coff)) |coff_file| { const got_addr = coff_file.offset_table_virtual_address + decl.link.coff.offset_table_index * ptr_bytes; return MCValue{ .memory = got_addr }; diff --git a/src/arch/aarch64/Emit.zig b/src/arch/aarch64/Emit.zig index 8ea6ab91e2..7469eaefeb 100644 --- a/src/arch/aarch64/Emit.zig +++ b/src/arch/aarch64/Emit.zig @@ -660,9 +660,10 @@ fn mirCallExtern(emit: *Emit, inst: Mir.Inst.Index) !void { }; // Add relocation to the decl. const atom = macho_file.atom_by_index_table.get(extern_fn.atom_index).?; + const target = macho_file.globals.values()[extern_fn.global_index]; try atom.relocs.append(emit.bin_file.allocator, .{ .offset = offset, - .target = .{ .global = extern_fn.sym_name }, + .target = target, .addend = 0, .subtractor = null, .pcrel = true, @@ -864,7 +865,7 @@ fn mirLoadMemoryPie(emit: *Emit, inst: Mir.Inst.Index) !void { // Page reloc for adrp instruction. try atom.relocs.append(emit.bin_file.allocator, .{ .offset = offset, - .target = .{ .local = data.sym_index }, + .target = .{ .sym_index = data.sym_index, .file = null }, .addend = 0, .subtractor = null, .pcrel = true, @@ -882,7 +883,7 @@ fn mirLoadMemoryPie(emit: *Emit, inst: Mir.Inst.Index) !void { // Pageoff reloc for adrp instruction. try atom.relocs.append(emit.bin_file.allocator, .{ .offset = offset + 4, - .target = .{ .local = data.sym_index }, + .target = .{ .sym_index = data.sym_index, .file = null }, .addend = 0, .subtractor = null, .pcrel = false, diff --git a/src/arch/aarch64/Mir.zig b/src/arch/aarch64/Mir.zig index 1d66a69c8e..b162905f36 100644 --- a/src/arch/aarch64/Mir.zig +++ b/src/arch/aarch64/Mir.zig @@ -232,7 +232,7 @@ pub const Inst = struct { /// Index of the containing atom. atom_index: u32, /// Index into the linker's string table. - sym_name: u32, + global_index: u32, }, /// A 16-bit immediate value. /// diff --git a/src/arch/riscv64/CodeGen.zig b/src/arch/riscv64/CodeGen.zig index 1d4108a77e..e52dd4ec08 100644 --- a/src/arch/riscv64/CodeGen.zig +++ b/src/arch/riscv64/CodeGen.zig @@ -2563,7 +2563,7 @@ fn lowerDeclRef(self: *Self, tv: TypedValue, decl_index: Module.Decl.Index) Inne } else if (self.bin_file.cast(link.File.MachO)) |_| { // TODO I'm hacking my way through here by repurposing .memory for storing // index to the GOT target symbol index. - return MCValue{ .memory = decl.link.macho.local_sym_index }; + return MCValue{ .memory = decl.link.macho.sym_index }; } else if (self.bin_file.cast(link.File.Coff)) |coff_file| { const got_addr = coff_file.offset_table_virtual_address + decl.link.coff.offset_table_index * ptr_bytes; return MCValue{ .memory = got_addr }; diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index 8616c4ac5c..da35d3b4b6 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -2645,7 +2645,7 @@ fn loadMemPtrIntoRegister(self: *Self, reg: Register, ptr_ty: Type, ptr: MCValue }), .data = .{ .load_reloc = .{ - .atom_index = fn_owner_decl.link.macho.local_sym_index, + .atom_index = fn_owner_decl.link.macho.sym_index, .sym_index = sym_index, }, }, @@ -3977,7 +3977,7 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallOptions. const func = func_payload.data; const fn_owner_decl = mod.declPtr(func.owner_decl); try self.genSetReg(Type.initTag(.usize), .rax, .{ - .got_load = fn_owner_decl.link.macho.local_sym_index, + .got_load = fn_owner_decl.link.macho.sym_index, }); // callq *%rax _ = try self.addInst(.{ @@ -3997,14 +3997,14 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallOptions. lib_name, }); } - const n_strx = try macho_file.getGlobalSymbol(mem.sliceTo(decl_name, 0)); + const global_index = try macho_file.getGlobalSymbol(mem.sliceTo(decl_name, 0)); _ = try self.addInst(.{ .tag = .call_extern, .ops = undefined, .data = .{ .extern_fn = .{ - .atom_index = mod.declPtr(self.mod_fn.owner_decl).link.macho.local_sym_index, - .sym_name = n_strx, + .atom_index = mod.declPtr(self.mod_fn.owner_decl).link.macho.sym_index, + .global_index = global_index, }, }, }); @@ -6771,8 +6771,8 @@ fn lowerDeclRef(self: *Self, tv: TypedValue, decl_index: Module.Decl.Index) Inne } else if (self.bin_file.cast(link.File.MachO)) |_| { // Because MachO is PIE-always-on, we defer memory address resolution until // the linker has enough info to perform relocations. - assert(decl.link.macho.local_sym_index != 0); - return MCValue{ .got_load = decl.link.macho.local_sym_index }; + assert(decl.link.macho.sym_index != 0); + return MCValue{ .got_load = decl.link.macho.sym_index }; } else if (self.bin_file.cast(link.File.Coff)) |coff_file| { const got_addr = coff_file.offset_table_virtual_address + decl.link.coff.offset_table_index * ptr_bytes; return MCValue{ .memory = got_addr }; diff --git a/src/arch/x86_64/Emit.zig b/src/arch/x86_64/Emit.zig index cc5b54fb55..8d91c3d7e6 100644 --- a/src/arch/x86_64/Emit.zig +++ b/src/arch/x86_64/Emit.zig @@ -1005,7 +1005,7 @@ fn mirLeaPie(emit: *Emit, inst: Mir.Inst.Index) InnerError!void { log.debug("adding reloc of type {} to local @{d}", .{ reloc_type, load_reloc.sym_index }); try atom.relocs.append(emit.bin_file.allocator, .{ .offset = @intCast(u32, end_offset - 4), - .target = .{ .local = load_reloc.sym_index }, + .target = .{ .sym_index = load_reloc.sym_index, .file = null }, .addend = 0, .subtractor = null, .pcrel = true, @@ -1127,9 +1127,10 @@ fn mirCallExtern(emit: *Emit, inst: Mir.Inst.Index) InnerError!void { if (emit.bin_file.cast(link.File.MachO)) |macho_file| { // Add relocation to the decl. const atom = macho_file.atom_by_index_table.get(extern_fn.atom_index).?; + const target = macho_file.globals.values()[extern_fn.global_index]; try atom.relocs.append(emit.bin_file.allocator, .{ .offset = offset, - .target = .{ .global = extern_fn.sym_name }, + .target = target, .addend = 0, .subtractor = null, .pcrel = true, diff --git a/src/arch/x86_64/Mir.zig b/src/arch/x86_64/Mir.zig index 74b0ca0d12..91ad9f4d9c 100644 --- a/src/arch/x86_64/Mir.zig +++ b/src/arch/x86_64/Mir.zig @@ -443,8 +443,8 @@ pub const Inst = struct { extern_fn: struct { /// Index of the containing atom. atom_index: u32, - /// Index into the linker's string table. - sym_name: u32, + /// Index into the linker's globals table. + global_index: u32, }, /// PIE load relocation. load_reloc: struct { diff --git a/src/link.zig b/src/link.zig index aa37589ff5..a69dcc4c6e 100644 --- a/src/link.zig +++ b/src/link.zig @@ -544,12 +544,7 @@ pub const File = struct { switch (base.tag) { .coff => return @fieldParentPtr(Coff, "base", base).allocateDeclIndexes(decl_index), .elf => return @fieldParentPtr(Elf, "base", base).allocateDeclIndexes(decl_index), - .macho => return @fieldParentPtr(MachO, "base", base).allocateDeclIndexes(decl_index) catch |err| switch (err) { - // remap this error code because we are transitioning away from - // `allocateDeclIndexes`. - error.Overflow => return error.OutOfMemory, - error.OutOfMemory => return error.OutOfMemory, - }, + .macho => return @fieldParentPtr(MachO, "base", base).allocateDeclIndexes(decl_index), .wasm => return @fieldParentPtr(Wasm, "base", base).allocateDeclIndexes(decl_index), .plan9 => return @fieldParentPtr(Plan9, "base", base).allocateDeclIndexes(decl_index), .c, .spirv, .nvptx => {}, diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 33fdbafde0..8967e2a3e3 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -35,8 +35,7 @@ const LibStub = @import("tapi.zig").LibStub; const Liveness = @import("../Liveness.zig"); const LlvmObject = @import("../codegen/llvm.zig").Object; const Module = @import("../Module.zig"); -const StringIndexAdapter = std.hash_map.StringIndexAdapter; -const StringIndexContext = std.hash_map.StringIndexContext; +const StringTable = @import("strtab.zig").StringTable; const Trie = @import("MachO/Trie.zig"); const Type = @import("../type.zig").Type; const TypedValue = @import("../TypedValue.zig"); @@ -52,13 +51,13 @@ pub const SearchStrategy = enum { dylibs_first, }; +pub const N_DESC_GCED: u16 = @bitCast(u16, @as(i16, -1)); + const SystemLib = struct { needed: bool = false, weak: bool = false, }; -const N_DESC_GCED: u16 = @bitCast(u16, @as(i16, -1)); - base: File, /// If this is not null, an object file is created by LLVM and linked with LLD afterwards. @@ -153,40 +152,28 @@ rustc_section_index: ?u16 = null, rustc_section_size: u64 = 0, locals: std.ArrayListUnmanaged(macho.nlist_64) = .{}, -globals: std.ArrayListUnmanaged(macho.nlist_64) = .{}, -undefs: std.ArrayListUnmanaged(macho.nlist_64) = .{}, -symbol_resolver: std.AutoHashMapUnmanaged(u32, SymbolWithLoc) = .{}, -unresolved: std.AutoArrayHashMapUnmanaged(u32, enum { - none, - stub, - got, -}) = .{}, -tentatives: std.AutoArrayHashMapUnmanaged(u32, void) = .{}, +globals: std.StringArrayHashMapUnmanaged(SymbolWithLoc) = .{}, +unresolved: std.AutoArrayHashMapUnmanaged(u32, void) = .{}, locals_free_list: std.ArrayListUnmanaged(u32) = .{}, -globals_free_list: std.ArrayListUnmanaged(u32) = .{}, dyld_stub_binder_index: ?u32 = null, dyld_private_atom: ?*Atom = null, stub_helper_preamble_atom: ?*Atom = null, -mh_execute_header_sym_index: ?u32 = null, -dso_handle_sym_index: ?u32 = null, - -strtab: std.ArrayListUnmanaged(u8) = .{}, -strtab_dir: std.HashMapUnmanaged(u32, void, StringIndexContext, std.hash_map.default_max_load_percentage) = .{}, +strtab: StringTable(.link) = .{}, tlv_ptr_entries: std.ArrayListUnmanaged(Entry) = .{}, tlv_ptr_entries_free_list: std.ArrayListUnmanaged(u32) = .{}, -tlv_ptr_entries_table: std.AutoArrayHashMapUnmanaged(Atom.Relocation.Target, u32) = .{}, +tlv_ptr_entries_table: std.AutoArrayHashMapUnmanaged(SymbolWithLoc, u32) = .{}, got_entries: std.ArrayListUnmanaged(Entry) = .{}, got_entries_free_list: std.ArrayListUnmanaged(u32) = .{}, -got_entries_table: std.AutoArrayHashMapUnmanaged(Atom.Relocation.Target, u32) = .{}, +got_entries_table: std.AutoArrayHashMapUnmanaged(SymbolWithLoc, u32) = .{}, -stubs: std.ArrayListUnmanaged(*Atom) = .{}, +stubs: std.ArrayListUnmanaged(Entry) = .{}, stubs_free_list: std.ArrayListUnmanaged(u32) = .{}, -stubs_table: std.AutoArrayHashMapUnmanaged(u32, u32) = .{}, +stubs_table: std.AutoArrayHashMapUnmanaged(SymbolWithLoc, u32) = .{}, error_flags: File.ErrorFlags = File.ErrorFlags{}, @@ -194,12 +181,6 @@ load_commands_dirty: bool = false, sections_order_dirty: bool = false, has_dices: bool = false, has_stabs: bool = false, -/// A helper var to indicate if we are at the start of the incremental updates, or -/// already somewhere further along the update-and-run chain. -/// TODO once we add opening a prelinked output binary from file, this will become -/// obsolete as we will carry on where we left off. -cold_start: bool = false, -invalidate_relocs: bool = false, section_ordinals: std.AutoArrayHashMapUnmanaged(MatchingSection, void) = .{}, @@ -223,12 +204,10 @@ atom_free_lists: std.AutoHashMapUnmanaged(MatchingSection, std.ArrayListUnmanage /// Pointer to the last allocated atom atoms: std.AutoHashMapUnmanaged(MatchingSection, *Atom) = .{}, -/// List of atoms that are owned directly by the linker. -/// Currently these are only atoms that are the result of linking -/// object files. Atoms which take part in incremental linking are -/// at present owned by Module.Decl. -/// TODO consolidate this. +/// List of atoms that are either synthetic or map directly to the Zig source program. managed_atoms: std.ArrayListUnmanaged(*Atom) = .{}, + +/// Table of atoms indexed by the symbol index. atom_by_index_table: std.AutoHashMapUnmanaged(u32, *Atom) = .{}, /// Table of unnamed constants associated with a parent `Decl`. @@ -259,9 +238,10 @@ unnamed_const_atoms: UnnamedConstTable = .{}, decls: std.AutoArrayHashMapUnmanaged(Module.Decl.Index, ?MatchingSection) = .{}, gc_roots: std.AutoHashMapUnmanaged(*Atom, void) = .{}, +gc_sections: std.AutoHashMapUnmanaged(MatchingSection, void) = .{}, const Entry = struct { - target: Atom.Relocation.Target, + target: SymbolWithLoc, atom: *Atom, }; @@ -273,15 +253,12 @@ const PendingUpdate = union(enum) { add_got_entry: u32, }; -const SymbolWithLoc = struct { - // Table where the symbol can be found. - where: enum { - global, - undef, - }, - where_index: u32, - local_sym_index: u32 = 0, - file: ?u16 = null, // null means Zig module +pub const SymbolWithLoc = struct { + // Index into the respective symbol table. + sym_index: u32, + + // null means it's a synthetic global. + file: ?u32 = null, }; /// When allocating, the ideal_capacity is calculated by @@ -389,7 +366,7 @@ pub fn openPath(allocator: Allocator, options: link.Options) !*MachO { .n_desc = 0, .n_value = 0, }); - try self.strtab.append(allocator, 0); + try self.strtab.buffer.append(allocator, 0); try self.populateMissingMetadata(); @@ -524,7 +501,6 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No const is_dyn_lib = self.base.options.link_mode == .Dynamic and is_lib; const is_exe_or_dyn_lib = is_dyn_lib or self.base.options.output_mode == .Exe; const stack_size = self.base.options.stack_size_override orelse 0; - const allow_undef = is_dyn_lib and (self.base.options.allow_shlib_undefined orelse false); const id_symlink_basename = "zld.id"; const cache_dir_handle = blk: { @@ -541,7 +517,6 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No defer if (!self.base.options.disable_lld_caching) man.deinit(); var digest: [Cache.hex_digest_len]u8 = undefined; - var needs_full_relink = true; cache: { if ((use_stage1 and self.base.options.disable_lld_caching) or self.base.options.cache_mode == .whole) @@ -610,14 +585,6 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No return; } else { log.debug("MachO Zld digest={s} match", .{std.fmt.fmtSliceHexLower(&digest)}); - if (!self.cold_start) { - log.debug(" no need to relink objects", .{}); - needs_full_relink = false; - } else { - log.debug(" TODO parse prelinked binary and continue linking where we left off", .{}); - // TODO until such time however, perform a full relink of objects. - needs_full_relink = true; - } } } log.debug("MachO Zld prev_digest={s} new_digest={s}", .{ @@ -672,441 +639,373 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No .n_desc = 0, .n_value = 0, }); - try self.strtab.append(self.base.allocator, 0); + try self.strtab.buffer.append(self.base.allocator, 0); try self.populateMissingMetadata(); } var lib_not_found = false; var framework_not_found = false; - if (needs_full_relink) { - for (self.objects.items) |*object| { - object.free(self.base.allocator, self); - object.deinit(self.base.allocator); + // Positional arguments to the linker such as object files and static archives. + var positionals = std.ArrayList([]const u8).init(arena); + try positionals.ensureUnusedCapacity(self.base.options.objects.len); + + var must_link_archives = std.StringArrayHashMap(void).init(arena); + try must_link_archives.ensureUnusedCapacity(self.base.options.objects.len); + + for (self.base.options.objects) |obj| { + if (must_link_archives.contains(obj.path)) continue; + if (obj.must_link) { + _ = must_link_archives.getOrPutAssumeCapacity(obj.path); + } else { + _ = positionals.appendAssumeCapacity(obj.path); } - self.objects.clearRetainingCapacity(); + } - for (self.archives.items) |*archive| { - archive.deinit(self.base.allocator); - } - self.archives.clearRetainingCapacity(); + for (comp.c_object_table.keys()) |key| { + try positionals.append(key.status.success.object_path); + } - for (self.dylibs.items) |*dylib| { - dylib.deinit(self.base.allocator); - } - self.dylibs.clearRetainingCapacity(); - self.dylibs_map.clearRetainingCapacity(); - self.referenced_dylibs.clearRetainingCapacity(); + if (module_obj_path) |p| { + try positionals.append(p); + } - { - var to_remove = std.ArrayList(u32).init(self.base.allocator); - defer to_remove.deinit(); - var it = self.symbol_resolver.iterator(); - while (it.next()) |entry| { - const key = entry.key_ptr.*; - const value = entry.value_ptr.*; - if (value.file != null) { - try to_remove.append(key); - } - } + if (comp.compiler_rt_lib) |lib| { + try positionals.append(lib.full_object_path); + } - for (to_remove.items) |key| { - if (self.symbol_resolver.fetchRemove(key)) |entry| { - const resolv = entry.value; - switch (resolv.where) { - .global => { - self.globals_free_list.append(self.base.allocator, resolv.where_index) catch {}; - const sym = &self.globals.items[resolv.where_index]; - sym.n_strx = 0; - sym.n_type = 0; - sym.n_value = 0; - }, - .undef => { - const sym = &self.undefs.items[resolv.where_index]; - sym.n_strx = 0; - sym.n_desc = 0; - }, - } - if (self.got_entries_table.get(.{ .global = entry.key })) |i| { - self.got_entries_free_list.append(self.base.allocator, @intCast(u32, i)) catch {}; - self.got_entries.items[i] = .{ .target = .{ .local = 0 }, .atom = undefined }; - _ = self.got_entries_table.swapRemove(.{ .global = entry.key }); - } - if (self.stubs_table.get(entry.key)) |i| { - self.stubs_free_list.append(self.base.allocator, @intCast(u32, i)) catch {}; - self.stubs.items[i] = undefined; - _ = self.stubs_table.swapRemove(entry.key); - } - } - } - } - // Invalidate all relocs - // TODO we only need to invalidate the backlinks to the relinked atoms from - // the relocatable object files. - self.invalidate_relocs = true; + // libc++ dep + if (self.base.options.link_libcpp) { + try positionals.append(comp.libcxxabi_static_lib.?.full_object_path); + try positionals.append(comp.libcxx_static_lib.?.full_object_path); + } - // Positional arguments to the linker such as object files and static archives. - var positionals = std.ArrayList([]const u8).init(arena); - try positionals.ensureUnusedCapacity(self.base.options.objects.len); + // Shared and static libraries passed via `-l` flag. + var candidate_libs = std.StringArrayHashMap(SystemLib).init(arena); - var must_link_archives = std.StringArrayHashMap(void).init(arena); - try must_link_archives.ensureUnusedCapacity(self.base.options.objects.len); - - for (self.base.options.objects) |obj| { - if (must_link_archives.contains(obj.path)) continue; - if (obj.must_link) { - _ = must_link_archives.getOrPutAssumeCapacity(obj.path); - } else { - _ = positionals.appendAssumeCapacity(obj.path); - } + const system_lib_names = self.base.options.system_libs.keys(); + for (system_lib_names) |system_lib_name| { + // By this time, we depend on these libs being dynamically linked libraries and not static libraries + // (the check for that needs to be earlier), but they could be full paths to .dylib files, in which + // case we want to avoid prepending "-l". + if (Compilation.classifyFileExt(system_lib_name) == .shared_library) { + try positionals.append(system_lib_name); + continue; } - for (comp.c_object_table.keys()) |key| { - try positionals.append(key.status.success.object_path); + const system_lib_info = self.base.options.system_libs.get(system_lib_name).?; + try candidate_libs.put(system_lib_name, .{ + .needed = system_lib_info.needed, + .weak = system_lib_info.weak, + }); + } + + var lib_dirs = std.ArrayList([]const u8).init(arena); + for (self.base.options.lib_dirs) |dir| { + if (try resolveSearchDir(arena, dir, self.base.options.sysroot)) |search_dir| { + try lib_dirs.append(search_dir); + } else { + log.warn("directory not found for '-L{s}'", .{dir}); } + } - if (module_obj_path) |p| { - try positionals.append(p); - } + var libs = std.StringArrayHashMap(SystemLib).init(arena); - if (comp.compiler_rt_lib) |lib| { - try positionals.append(lib.full_object_path); - } - - // libc++ dep - if (self.base.options.link_libcpp) { - try positionals.append(comp.libcxxabi_static_lib.?.full_object_path); - try positionals.append(comp.libcxx_static_lib.?.full_object_path); - } - - // Shared and static libraries passed via `-l` flag. - var candidate_libs = std.StringArrayHashMap(SystemLib).init(arena); - - const system_lib_names = self.base.options.system_libs.keys(); - for (system_lib_names) |system_lib_name| { - // By this time, we depend on these libs being dynamically linked libraries and not static libraries - // (the check for that needs to be earlier), but they could be full paths to .dylib files, in which - // case we want to avoid prepending "-l". - if (Compilation.classifyFileExt(system_lib_name) == .shared_library) { - try positionals.append(system_lib_name); - continue; - } - - const system_lib_info = self.base.options.system_libs.get(system_lib_name).?; - try candidate_libs.put(system_lib_name, .{ - .needed = system_lib_info.needed, - .weak = system_lib_info.weak, - }); - } - - var lib_dirs = std.ArrayList([]const u8).init(arena); - for (self.base.options.lib_dirs) |dir| { - if (try resolveSearchDir(arena, dir, self.base.options.sysroot)) |search_dir| { - try lib_dirs.append(search_dir); - } else { - log.warn("directory not found for '-L{s}'", .{dir}); - } - } - - var libs = std.StringArrayHashMap(SystemLib).init(arena); - - // Assume ld64 default -search_paths_first if no strategy specified. - const search_strategy = self.base.options.search_strategy orelse .paths_first; - outer: for (candidate_libs.keys()) |lib_name| { - switch (search_strategy) { - .paths_first => { - // Look in each directory for a dylib (stub first), and then for archive - for (lib_dirs.items) |dir| { - for (&[_][]const u8{ ".tbd", ".dylib", ".a" }) |ext| { - if (try resolveLib(arena, dir, lib_name, ext)) |full_path| { - try libs.put(full_path, candidate_libs.get(lib_name).?); - continue :outer; - } + // Assume ld64 default -search_paths_first if no strategy specified. + const search_strategy = self.base.options.search_strategy orelse .paths_first; + outer: for (candidate_libs.keys()) |lib_name| { + switch (search_strategy) { + .paths_first => { + // Look in each directory for a dylib (stub first), and then for archive + for (lib_dirs.items) |dir| { + for (&[_][]const u8{ ".tbd", ".dylib", ".a" }) |ext| { + if (try resolveLib(arena, dir, lib_name, ext)) |full_path| { + try libs.put(full_path, candidate_libs.get(lib_name).?); + continue :outer; } + } + } else { + log.warn("library not found for '-l{s}'", .{lib_name}); + lib_not_found = true; + } + }, + .dylibs_first => { + // First, look for a dylib in each search dir + for (lib_dirs.items) |dir| { + for (&[_][]const u8{ ".tbd", ".dylib" }) |ext| { + if (try resolveLib(arena, dir, lib_name, ext)) |full_path| { + try libs.put(full_path, candidate_libs.get(lib_name).?); + continue :outer; + } + } + } else for (lib_dirs.items) |dir| { + if (try resolveLib(arena, dir, lib_name, ".a")) |full_path| { + try libs.put(full_path, candidate_libs.get(lib_name).?); } else { log.warn("library not found for '-l{s}'", .{lib_name}); lib_not_found = true; } - }, - .dylibs_first => { - // First, look for a dylib in each search dir - for (lib_dirs.items) |dir| { - for (&[_][]const u8{ ".tbd", ".dylib" }) |ext| { - if (try resolveLib(arena, dir, lib_name, ext)) |full_path| { - try libs.put(full_path, candidate_libs.get(lib_name).?); - continue :outer; - } - } - } else for (lib_dirs.items) |dir| { - if (try resolveLib(arena, dir, lib_name, ".a")) |full_path| { - try libs.put(full_path, candidate_libs.get(lib_name).?); - } else { - log.warn("library not found for '-l{s}'", .{lib_name}); - lib_not_found = true; - } - } - }, + } + }, + } + } + + if (lib_not_found) { + log.warn("Library search paths:", .{}); + for (lib_dirs.items) |dir| { + log.warn(" {s}", .{dir}); + } + } + + // If we were given the sysroot, try to look there first for libSystem.B.{dylib, tbd}. + var libsystem_available = false; + if (self.base.options.sysroot != null) blk: { + // Try stub file first. If we hit it, then we're done as the stub file + // re-exports every single symbol definition. + for (lib_dirs.items) |dir| { + if (try resolveLib(arena, dir, "System", ".tbd")) |full_path| { + try libs.put(full_path, .{ .needed = true }); + libsystem_available = true; + break :blk; } } - - if (lib_not_found) { - log.warn("Library search paths:", .{}); - for (lib_dirs.items) |dir| { - log.warn(" {s}", .{dir}); - } - } - - // If we were given the sysroot, try to look there first for libSystem.B.{dylib, tbd}. - var libsystem_available = false; - if (self.base.options.sysroot != null) blk: { - // Try stub file first. If we hit it, then we're done as the stub file - // re-exports every single symbol definition. - for (lib_dirs.items) |dir| { - if (try resolveLib(arena, dir, "System", ".tbd")) |full_path| { - try libs.put(full_path, .{ .needed = true }); + // If we didn't hit the stub file, try .dylib next. However, libSystem.dylib + // doesn't export libc.dylib which we'll need to resolve subsequently also. + for (lib_dirs.items) |dir| { + if (try resolveLib(arena, dir, "System", ".dylib")) |libsystem_path| { + if (try resolveLib(arena, dir, "c", ".dylib")) |libc_path| { + try libs.put(libsystem_path, .{ .needed = true }); + try libs.put(libc_path, .{ .needed = true }); libsystem_available = true; break :blk; } } - // If we didn't hit the stub file, try .dylib next. However, libSystem.dylib - // doesn't export libc.dylib which we'll need to resolve subsequently also. - for (lib_dirs.items) |dir| { - if (try resolveLib(arena, dir, "System", ".dylib")) |libsystem_path| { - if (try resolveLib(arena, dir, "c", ".dylib")) |libc_path| { - try libs.put(libsystem_path, .{ .needed = true }); - try libs.put(libc_path, .{ .needed = true }); - libsystem_available = true; - break :blk; - } - } - } } - if (!libsystem_available) { - const libsystem_name = try std.fmt.allocPrint(arena, "libSystem.{d}.tbd", .{ - self.base.options.target.os.version_range.semver.min.major, - }); - const full_path = try comp.zig_lib_directory.join(arena, &[_][]const u8{ - "libc", "darwin", libsystem_name, - }); - try libs.put(full_path, .{ .needed = true }); - } - - // frameworks - var framework_dirs = std.ArrayList([]const u8).init(arena); - for (self.base.options.framework_dirs) |dir| { - if (try resolveSearchDir(arena, dir, self.base.options.sysroot)) |search_dir| { - try framework_dirs.append(search_dir); - } else { - log.warn("directory not found for '-F{s}'", .{dir}); - } - } - - outer: for (self.base.options.frameworks.keys()) |f_name| { - for (framework_dirs.items) |dir| { - for (&[_][]const u8{ ".tbd", ".dylib", "" }) |ext| { - if (try resolveFramework(arena, dir, f_name, ext)) |full_path| { - const info = self.base.options.frameworks.get(f_name).?; - try libs.put(full_path, .{ - .needed = info.needed, - .weak = info.weak, - }); - continue :outer; - } - } - } else { - log.warn("framework not found for '-framework {s}'", .{f_name}); - framework_not_found = true; - } - } - - if (framework_not_found) { - log.warn("Framework search paths:", .{}); - for (framework_dirs.items) |dir| { - log.warn(" {s}", .{dir}); - } - } - - // rpaths - var rpath_table = std.StringArrayHashMap(void).init(arena); - for (self.base.options.rpath_list) |rpath| { - if (rpath_table.contains(rpath)) continue; - const cmdsize = @intCast(u32, mem.alignForwardGeneric( - u64, - @sizeOf(macho.rpath_command) + rpath.len + 1, - @sizeOf(u64), - )); - var rpath_cmd = macho.emptyGenericCommandWithData(macho.rpath_command{ - .cmdsize = cmdsize, - .path = @sizeOf(macho.rpath_command), - }); - rpath_cmd.data = try self.base.allocator.alloc(u8, cmdsize - rpath_cmd.inner.path); - mem.set(u8, rpath_cmd.data, 0); - mem.copy(u8, rpath_cmd.data, rpath); - try self.load_commands.append(self.base.allocator, .{ .rpath = rpath_cmd }); - try rpath_table.putNoClobber(rpath, {}); - self.load_commands_dirty = true; - } - - // code signature and entitlements - if (self.base.options.entitlements) |path| { - if (self.code_signature) |*csig| { - try csig.addEntitlements(self.base.allocator, path); - csig.code_directory.ident = self.base.options.emit.?.sub_path; - } else { - var csig = CodeSignature.init(self.page_size); - try csig.addEntitlements(self.base.allocator, path); - csig.code_directory.ident = self.base.options.emit.?.sub_path; - self.code_signature = csig; - } - } - - if (self.base.options.verbose_link) { - var argv = std.ArrayList([]const u8).init(arena); - - try argv.append("zig"); - try argv.append("ld"); - - if (is_exe_or_dyn_lib) { - try argv.append("-dynamic"); - } - - if (is_dyn_lib) { - try argv.append("-dylib"); - - if (self.base.options.install_name) |install_name| { - try argv.append("-install_name"); - try argv.append(install_name); - } - } - - if (self.base.options.sysroot) |syslibroot| { - try argv.append("-syslibroot"); - try argv.append(syslibroot); - } - - for (rpath_table.keys()) |rpath| { - try argv.append("-rpath"); - try argv.append(rpath); - } - - if (self.base.options.pagezero_size) |pagezero_size| { - try argv.append("-pagezero_size"); - try argv.append(try std.fmt.allocPrint(arena, "0x{x}", .{pagezero_size})); - } - - if (self.base.options.search_strategy) |strat| switch (strat) { - .paths_first => try argv.append("-search_paths_first"), - .dylibs_first => try argv.append("-search_dylibs_first"), - }; - - if (self.base.options.headerpad_size) |headerpad_size| { - try argv.append("-headerpad_size"); - try argv.append(try std.fmt.allocPrint(arena, "0x{x}", .{headerpad_size})); - } - - if (self.base.options.headerpad_max_install_names) { - try argv.append("-headerpad_max_install_names"); - } - - if (self.base.options.gc_sections) |is_set| { - if (is_set) { - try argv.append("-dead_strip"); - } - } - - if (self.base.options.dead_strip_dylibs) { - try argv.append("-dead_strip_dylibs"); - } - - if (self.base.options.entry) |entry| { - try argv.append("-e"); - try argv.append(entry); - } - - for (self.base.options.objects) |obj| { - try argv.append(obj.path); - } - - for (comp.c_object_table.keys()) |key| { - try argv.append(key.status.success.object_path); - } - - if (module_obj_path) |p| { - try argv.append(p); - } - - if (comp.compiler_rt_lib) |lib| { - try argv.append(lib.full_object_path); - } - - if (self.base.options.link_libcpp) { - try argv.append(comp.libcxxabi_static_lib.?.full_object_path); - try argv.append(comp.libcxx_static_lib.?.full_object_path); - } - - try argv.append("-o"); - try argv.append(full_out_path); - - try argv.append("-lSystem"); - try argv.append("-lc"); - - for (self.base.options.system_libs.keys()) |l_name| { - const info = self.base.options.system_libs.get(l_name).?; - const arg = if (info.needed) - try std.fmt.allocPrint(arena, "-needed-l{s}", .{l_name}) - else if (info.weak) - try std.fmt.allocPrint(arena, "-weak-l{s}", .{l_name}) - else - try std.fmt.allocPrint(arena, "-l{s}", .{l_name}); - try argv.append(arg); - } - - for (self.base.options.lib_dirs) |lib_dir| { - try argv.append(try std.fmt.allocPrint(arena, "-L{s}", .{lib_dir})); - } - - for (self.base.options.frameworks.keys()) |framework| { - const info = self.base.options.frameworks.get(framework).?; - const arg = if (info.needed) - try std.fmt.allocPrint(arena, "-needed_framework {s}", .{framework}) - else if (info.weak) - try std.fmt.allocPrint(arena, "-weak_framework {s}", .{framework}) - else - try std.fmt.allocPrint(arena, "-framework {s}", .{framework}); - try argv.append(arg); - } - - for (self.base.options.framework_dirs) |framework_dir| { - try argv.append(try std.fmt.allocPrint(arena, "-F{s}", .{framework_dir})); - } - - if (allow_undef) { - try argv.append("-undefined"); - try argv.append("dynamic_lookup"); - } - - for (must_link_archives.keys()) |lib| { - try argv.append(try std.fmt.allocPrint(arena, "-force_load {s}", .{lib})); - } - - Compilation.dump_argv(argv.items); - } - - var dependent_libs = std.fifo.LinearFifo(struct { - id: Dylib.Id, - parent: u16, - }, .Dynamic).init(self.base.allocator); - defer dependent_libs.deinit(); - try self.parseInputFiles(positionals.items, self.base.options.sysroot, &dependent_libs); - try self.parseAndForceLoadStaticArchives(must_link_archives.keys()); - try self.parseLibs(libs.keys(), libs.values(), self.base.options.sysroot, &dependent_libs); - try self.parseDependentLibs(self.base.options.sysroot, &dependent_libs); } + if (!libsystem_available) { + const libsystem_name = try std.fmt.allocPrint(arena, "libSystem.{d}.tbd", .{ + self.base.options.target.os.version_range.semver.min.major, + }); + const full_path = try comp.zig_lib_directory.join(arena, &[_][]const u8{ + "libc", "darwin", libsystem_name, + }); + try libs.put(full_path, .{ .needed = true }); + } + + // frameworks + var framework_dirs = std.ArrayList([]const u8).init(arena); + for (self.base.options.framework_dirs) |dir| { + if (try resolveSearchDir(arena, dir, self.base.options.sysroot)) |search_dir| { + try framework_dirs.append(search_dir); + } else { + log.warn("directory not found for '-F{s}'", .{dir}); + } + } + + outer: for (self.base.options.frameworks.keys()) |f_name| { + for (framework_dirs.items) |dir| { + for (&[_][]const u8{ ".tbd", ".dylib", "" }) |ext| { + if (try resolveFramework(arena, dir, f_name, ext)) |full_path| { + const info = self.base.options.frameworks.get(f_name).?; + try libs.put(full_path, .{ + .needed = info.needed, + .weak = info.weak, + }); + continue :outer; + } + } + } else { + log.warn("framework not found for '-framework {s}'", .{f_name}); + framework_not_found = true; + } + } + + if (framework_not_found) { + log.warn("Framework search paths:", .{}); + for (framework_dirs.items) |dir| { + log.warn(" {s}", .{dir}); + } + } + + // rpaths + var rpath_table = std.StringArrayHashMap(void).init(arena); + for (self.base.options.rpath_list) |rpath| { + if (rpath_table.contains(rpath)) continue; + const cmdsize = @intCast(u32, mem.alignForwardGeneric( + u64, + @sizeOf(macho.rpath_command) + rpath.len + 1, + @sizeOf(u64), + )); + var rpath_cmd = macho.emptyGenericCommandWithData(macho.rpath_command{ + .cmdsize = cmdsize, + .path = @sizeOf(macho.rpath_command), + }); + rpath_cmd.data = try self.base.allocator.alloc(u8, cmdsize - rpath_cmd.inner.path); + mem.set(u8, rpath_cmd.data, 0); + mem.copy(u8, rpath_cmd.data, rpath); + try self.load_commands.append(self.base.allocator, .{ .rpath = rpath_cmd }); + try rpath_table.putNoClobber(rpath, {}); + self.load_commands_dirty = true; + } + + // code signature and entitlements + if (self.base.options.entitlements) |path| { + if (self.code_signature) |*csig| { + try csig.addEntitlements(self.base.allocator, path); + csig.code_directory.ident = self.base.options.emit.?.sub_path; + } else { + var csig = CodeSignature.init(self.page_size); + try csig.addEntitlements(self.base.allocator, path); + csig.code_directory.ident = self.base.options.emit.?.sub_path; + self.code_signature = csig; + } + } + + if (self.base.options.verbose_link) { + var argv = std.ArrayList([]const u8).init(arena); + + try argv.append("zig"); + try argv.append("ld"); + + if (is_exe_or_dyn_lib) { + try argv.append("-dynamic"); + } + + if (is_dyn_lib) { + try argv.append("-dylib"); + + if (self.base.options.install_name) |install_name| { + try argv.append("-install_name"); + try argv.append(install_name); + } + } + + if (self.base.options.sysroot) |syslibroot| { + try argv.append("-syslibroot"); + try argv.append(syslibroot); + } + + for (rpath_table.keys()) |rpath| { + try argv.append("-rpath"); + try argv.append(rpath); + } + + if (self.base.options.pagezero_size) |pagezero_size| { + try argv.append("-pagezero_size"); + try argv.append(try std.fmt.allocPrint(arena, "0x{x}", .{pagezero_size})); + } + + if (self.base.options.search_strategy) |strat| switch (strat) { + .paths_first => try argv.append("-search_paths_first"), + .dylibs_first => try argv.append("-search_dylibs_first"), + }; + + if (self.base.options.headerpad_size) |headerpad_size| { + try argv.append("-headerpad_size"); + try argv.append(try std.fmt.allocPrint(arena, "0x{x}", .{headerpad_size})); + } + + if (self.base.options.headerpad_max_install_names) { + try argv.append("-headerpad_max_install_names"); + } + + if (self.base.options.gc_sections) |is_set| { + if (is_set) { + try argv.append("-dead_strip"); + } + } + + if (self.base.options.dead_strip_dylibs) { + try argv.append("-dead_strip_dylibs"); + } + + if (self.base.options.entry) |entry| { + try argv.append("-e"); + try argv.append(entry); + } + + for (self.base.options.objects) |obj| { + try argv.append(obj.path); + } + + for (comp.c_object_table.keys()) |key| { + try argv.append(key.status.success.object_path); + } + + if (module_obj_path) |p| { + try argv.append(p); + } + + if (comp.compiler_rt_lib) |lib| { + try argv.append(lib.full_object_path); + } + + if (self.base.options.link_libcpp) { + try argv.append(comp.libcxxabi_static_lib.?.full_object_path); + try argv.append(comp.libcxx_static_lib.?.full_object_path); + } + + try argv.append("-o"); + try argv.append(full_out_path); + + try argv.append("-lSystem"); + try argv.append("-lc"); + + for (self.base.options.system_libs.keys()) |l_name| { + const info = self.base.options.system_libs.get(l_name).?; + const arg = if (info.needed) + try std.fmt.allocPrint(arena, "-needed-l{s}", .{l_name}) + else if (info.weak) + try std.fmt.allocPrint(arena, "-weak-l{s}", .{l_name}) + else + try std.fmt.allocPrint(arena, "-l{s}", .{l_name}); + try argv.append(arg); + } + + for (self.base.options.lib_dirs) |lib_dir| { + try argv.append(try std.fmt.allocPrint(arena, "-L{s}", .{lib_dir})); + } + + for (self.base.options.frameworks.keys()) |framework| { + const info = self.base.options.frameworks.get(framework).?; + const arg = if (info.needed) + try std.fmt.allocPrint(arena, "-needed_framework {s}", .{framework}) + else if (info.weak) + try std.fmt.allocPrint(arena, "-weak_framework {s}", .{framework}) + else + try std.fmt.allocPrint(arena, "-framework {s}", .{framework}); + try argv.append(arg); + } + + for (self.base.options.framework_dirs) |framework_dir| { + try argv.append(try std.fmt.allocPrint(arena, "-F{s}", .{framework_dir})); + } + + if (is_dyn_lib and (self.base.options.allow_shlib_undefined orelse false)) { + try argv.append("-undefined"); + try argv.append("dynamic_lookup"); + } + + for (must_link_archives.keys()) |lib| { + try argv.append(try std.fmt.allocPrint(arena, "-force_load {s}", .{lib})); + } + + Compilation.dump_argv(argv.items); + } + + var dependent_libs = std.fifo.LinearFifo(struct { + id: Dylib.Id, + parent: u16, + }, .Dynamic).init(self.base.allocator); + defer dependent_libs.deinit(); + try self.parseInputFiles(positionals.items, self.base.options.sysroot, &dependent_libs); + try self.parseAndForceLoadStaticArchives(must_link_archives.keys()); + try self.parseLibs(libs.keys(), libs.values(), self.base.options.sysroot, &dependent_libs); + try self.parseDependentLibs(self.base.options.sysroot, &dependent_libs); try self.createMhExecuteHeaderSymbol(); for (self.objects.items) |*object, object_id| { - if (object.analyzed) continue; - try self.resolveSymbolsInObject(@intCast(u16, object_id)); + try self.resolveSymbolsInObject(object, @intCast(u16, object_id)); } try self.resolveSymbolsInArchives(); @@ -1116,44 +1015,8 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No try self.resolveSymbolsInDylibs(); try self.createDsoHandleSymbol(); try self.addCodeSignatureLC(); + try self.resolveSymbolsAtLoading(); - { - var next_sym: usize = 0; - while (next_sym < self.unresolved.count()) { - const sym = &self.undefs.items[self.unresolved.keys()[next_sym]]; - const sym_name = self.getString(sym.n_strx); - const resolv = self.symbol_resolver.get(sym.n_strx) orelse unreachable; - - if (sym.discarded()) { - sym.* = .{ - .n_strx = 0, - .n_type = macho.N_UNDF, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }; - _ = self.unresolved.swapRemove(resolv.where_index); - continue; - } else if (allow_undef) { - const n_desc = @bitCast( - u16, - macho.BIND_SPECIAL_DYLIB_FLAT_LOOKUP * @intCast(i16, macho.N_SYMBOL_RESOLVER), - ); - // TODO allow_shlib_undefined is an ELF flag so figure out macOS specific flags too. - sym.n_type = macho.N_EXT; - sym.n_desc = n_desc; - _ = self.unresolved.swapRemove(resolv.where_index); - continue; - } - - log.err("undefined reference to symbol '{s}'", .{sym_name}); - if (resolv.file) |file| { - log.err(" first referenced in '{s}'", .{self.objects.items[file].name}); - } - - next_sym += 1; - } - } if (self.unresolved.count() > 0) { return error.UndefinedSymbolReference; } @@ -1165,35 +1028,40 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No } try self.createTentativeDefAtoms(); - try self.parseObjectsIntoAtoms(); const use_llvm = build_options.have_llvm and self.base.options.use_llvm; if (use_llvm or use_stage1) { - self.logAtoms(); + for (self.objects.items) |*object, object_id| { + try object.splitIntoAtomsWhole(self, @intCast(u32, object_id)); + } + try self.gcAtoms(); try self.pruneAndSortSections(); try self.allocateSegments(); - try self.allocateLocals(); + try self.allocateSymbols(); + } else { + // TODO incremental mode: parsing objects into atoms } try self.allocateSpecialSymbols(); - try self.allocateGlobals(); - if (build_options.enable_logging or true) { + if (build_options.enable_logging) { self.logSymtab(); self.logSectionOrdinals(); self.logAtoms(); } if (use_llvm or use_stage1) { - try self.writeAllAtoms(); + try self.writeAtomsWhole(); } else { - try self.writeAtoms(); + // try self.writeAtoms(); } if (self.rustc_section_index) |id| { - const seg = &self.load_commands.items[self.data_segment_cmd_index.?].segment; - const sect = &seg.sections.items[id]; + const sect = self.getSectionPtr(.{ + .seg = self.data_segment_cmd_index.?, + .sect = id, + }); sect.size = self.rustc_section_size; } @@ -1234,10 +1102,10 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No try self.writeCodeSignature(csig); // code signing always comes last } - if (build_options.enable_link_snapshots) { - if (self.base.options.enable_link_snapshots) - try self.snapshotState(); - } + // if (build_options.enable_link_snapshots) { + // if (self.base.options.enable_link_snapshots) + // try self.snapshotState(); + // } } cache: { @@ -1256,8 +1124,6 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No // other processes clobbering it. self.base.lock = man.toOwnedLock(); } - - self.cold_start = false; } fn resolveSearchDir( @@ -1521,7 +1387,7 @@ fn parseInputFiles(self: *MachO, files: []const []const u8, syslibroot: ?[]const .syslibroot = syslibroot, })) continue; - log.warn("unknown filetype for positional input file: '{s}'", .{file_name}); + log.debug("unknown filetype for positional input file: '{s}'", .{file_name}); } } @@ -1536,7 +1402,7 @@ fn parseAndForceLoadStaticArchives(self: *MachO, files: []const []const u8) !voi log.debug("parsing and force loading static archive '{s}'", .{full_path}); if (try self.parseArchive(full_path, true)) continue; - log.warn("unknown filetype: expected static archive: '{s}'", .{file_name}); + log.debug("unknown filetype: expected static archive: '{s}'", .{file_name}); } } @@ -1557,7 +1423,7 @@ fn parseLibs( })) continue; if (try self.parseArchive(lib, false)) continue; - log.warn("unknown filetype for a library: '{s}'", .{lib}); + log.debug("unknown filetype for a library: '{s}'", .{lib}); } } @@ -1601,7 +1467,7 @@ fn parseDependentLibs(self: *MachO, syslibroot: ?[]const u8, dependent_libs: any }); if (did_parse_successfully) break; } else { - log.warn("unable to resolve dependency {s}", .{dep_id.id.name}); + log.debug("unable to resolve dependency {s}", .{dep_id.id.name}); } } } @@ -2172,34 +2038,31 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio return res; } -pub fn createEmptyAtom(self: *MachO, local_sym_index: u32, size: u64, alignment: u32) !*Atom { +pub fn createEmptyAtom(gpa: Allocator, sym_index: u32, size: u64, alignment: u32) !*Atom { const size_usize = math.cast(usize, size) orelse return error.Overflow; - const atom = try self.base.allocator.create(Atom); - errdefer self.base.allocator.destroy(atom); + const atom = try gpa.create(Atom); + errdefer gpa.destroy(atom); atom.* = Atom.empty; - atom.local_sym_index = local_sym_index; + atom.sym_index = sym_index; atom.size = size; atom.alignment = alignment; - try atom.code.resize(self.base.allocator, size_usize); + try atom.code.resize(gpa, size_usize); mem.set(u8, atom.code.items, 0); - try self.atom_by_index_table.putNoClobber(self.base.allocator, local_sym_index, atom); - try self.managed_atoms.append(self.base.allocator, atom); return atom; } pub fn writeAtom(self: *MachO, atom: *Atom, match: MatchingSection) !void { - const seg = self.load_commands.items[match.seg].segment; - const sect = seg.sections.items[match.sect]; - const sym = self.locals.items[atom.local_sym_index]; + const sect = self.getSection(match); + const sym = atom.getSymbol(self); const file_offset = sect.offset + sym.n_value - sect.addr; try atom.resolveRelocs(self); - log.debug("writing atom for symbol {s} at file offset 0x{x}", .{ self.getString(sym.n_strx), file_offset }); + log.debug("writing atom for symbol {s} at file offset 0x{x}", .{ atom.getName(self), file_offset }); try self.base.file.?.pwriteAll(atom.code.items, file_offset); } -fn allocateLocals(self: *MachO) !void { +fn allocateSymbols(self: *MachO) !void { var it = self.atoms.iterator(); while (it.next()) |entry| { const match = entry.key_ptr.*; @@ -2209,30 +2072,25 @@ fn allocateLocals(self: *MachO) !void { atom = prev; } - const n_sect = @intCast(u8, self.section_ordinals.getIndex(match).? + 1); - const seg = self.load_commands.items[match.seg].segment; - const sect = seg.sections.items[match.sect]; + const n_sect = self.getSectionOrdinal(match); + const sect = self.getSection(match); var base_vaddr = sect.addr; - log.debug("allocating local symbols in {s},{s}", .{ sect.segName(), sect.sectName() }); + log.debug("allocating local symbols in sect({d}, '{s},{s}')", .{ n_sect, sect.segName(), sect.sectName() }); while (true) { const alignment = try math.powi(u32, 2, atom.alignment); base_vaddr = mem.alignForwardGeneric(u64, base_vaddr, alignment); - const sym = &self.locals.items[atom.local_sym_index]; + const sym = atom.getSymbolPtr(self); sym.n_value = base_vaddr; sym.n_sect = n_sect; - log.debug(" {d}: {s} allocated at 0x{x}", .{ - atom.local_sym_index, - self.getString(sym.n_strx), - base_vaddr, - }); + log.debug(" ATOM(%{d}, '{s}') @{x}", .{ atom.sym_index, atom.getName(self), base_vaddr }); // Update each symbol contained within the atom for (atom.contained.items) |sym_at_off| { - const contained_sym = &self.locals.items[sym_at_off.local_sym_index]; + const contained_sym = self.getSymbolPtr(.{ .sym_index = sym_at_off.sym_index, .file = atom.file }); contained_sym.n_value = base_vaddr + sym_at_off.offset; contained_sym.n_sect = n_sect; } @@ -2250,11 +2108,11 @@ fn shiftLocalsByOffset(self: *MachO, match: MatchingSection, offset: i64) !void var atom = self.atoms.get(match) orelse return; while (true) { - const atom_sym = &self.locals.items[atom.local_sym_index]; + const atom_sym = &self.locals.items[atom.sym_index]; atom_sym.n_value = @intCast(u64, @intCast(i64, atom_sym.n_value) + offset); for (atom.contained.items) |sym_at_off| { - const contained_sym = &self.locals.items[sym_at_off.local_sym_index]; + const contained_sym = &self.locals.items[sym_at_off.sym_index]; contained_sym.n_value = @intCast(u64, @intCast(i64, contained_sym.n_value) + offset); } @@ -2265,53 +2123,30 @@ fn shiftLocalsByOffset(self: *MachO, match: MatchingSection, offset: i64) !void } fn allocateSpecialSymbols(self: *MachO) !void { - for (&[_]?u32{ - self.mh_execute_header_sym_index, - self.dso_handle_sym_index, - }) |maybe_sym_index| { - const sym_index = maybe_sym_index orelse continue; - const sym = &self.locals.items[sym_index]; + for (&[_][]const u8{ + "___dso_handle", + "__mh_execute_header", + }) |name| { + const global = self.globals.get(name) orelse continue; + const sym = self.getSymbolPtr(global); const seg = self.load_commands.items[self.text_segment_cmd_index.?].segment; - sym.n_sect = @intCast(u8, self.section_ordinals.getIndex(.{ + sym.n_sect = self.getSectionOrdinal(.{ .seg = self.text_segment_cmd_index.?, .sect = 0, - }).? + 1); + }); sym.n_value = seg.inner.vmaddr; log.debug("allocating {s} at the start of {s}", .{ - self.getString(sym.n_strx), + name, seg.inner.segName(), }); } } -fn allocateGlobals(self: *MachO) !void { - log.debug("allocating global symbols", .{}); - - var sym_it = self.symbol_resolver.valueIterator(); - while (sym_it.next()) |resolv| { - if (resolv.where != .global) continue; - - assert(resolv.local_sym_index != 0); - const local_sym = self.locals.items[resolv.local_sym_index]; - const sym = &self.globals.items[resolv.where_index]; - sym.n_value = local_sym.n_value; - sym.n_sect = local_sym.n_sect; - - log.debug(" {d}: {s} allocated at 0x{x}", .{ - resolv.where_index, - self.getString(sym.n_strx), - local_sym.n_value, - }); - } -} - -fn writeAllAtoms(self: *MachO) !void { +fn writeAtomsWhole(self: *MachO) !void { var it = self.atoms.iterator(); while (it.next()) |entry| { - const match = entry.key_ptr.*; - const seg = self.load_commands.items[match.seg].segment; - const sect = seg.sections.items[match.sect]; + const sect = self.getSection(entry.key_ptr.*); var atom: *Atom = entry.value_ptr.*; if (sect.flags == macho.S_ZEROFILL or sect.flags == macho.S_THREAD_LOCAL_ZEROFILL) continue; @@ -2327,20 +2162,28 @@ fn writeAllAtoms(self: *MachO) !void { } while (true) { - const atom_sym = self.locals.items[atom.local_sym_index]; + const this_sym = atom.getSymbol(self); const padding_size: usize = if (atom.next) |next| blk: { - const next_sym = self.locals.items[next.local_sym_index]; - const size = next_sym.n_value - (atom_sym.n_value + atom.size); + const next_sym = next.getSymbol(self); + const size = next_sym.n_value - (this_sym.n_value + atom.size); break :blk math.cast(usize, size) orelse return error.Overflow; } else 0; - log.debug(" (adding atom {s} to buffer: {})", .{ self.getString(atom_sym.n_strx), atom_sym }); + log.debug(" (adding ATOM(%{d}, '{s}') from object({d}) to buffer)", .{ + atom.sym_index, + atom.getName(self), + atom.file, + }); + if (padding_size > 0) { + log.debug(" (with padding {x})", .{padding_size}); + } try atom.resolveRelocs(self); buffer.appendSliceAssumeCapacity(atom.code.items); var i: usize = 0; while (i < padding_size) : (i += 1) { + // TODO with NOPs buffer.appendAssumeCapacity(0); } @@ -2388,8 +2231,7 @@ fn writeAtoms(self: *MachO) !void { var it = self.atoms.iterator(); while (it.next()) |entry| { const match = entry.key_ptr.*; - const seg = self.load_commands.items[match.seg].segment; - const sect = seg.sections.items[match.sect]; + const sect = self.getSection(match); var atom: *Atom = entry.value_ptr.*; // TODO handle zerofill in stage2 @@ -2410,17 +2252,19 @@ fn writeAtoms(self: *MachO) !void { } } -pub fn createGotAtom(self: *MachO, target: Atom.Relocation.Target) !*Atom { - const local_sym_index = @intCast(u32, self.locals.items.len); - try self.locals.append(self.base.allocator, .{ +pub fn createGotAtom(self: *MachO, target: SymbolWithLoc) !*Atom { + const gpa = self.base.allocator; + const sym_index = @intCast(u32, self.locals.items.len); + try self.locals.append(gpa, .{ .n_strx = 0, .n_type = macho.N_SECT, .n_sect = 0, .n_desc = 0, .n_value = 0, }); - const atom = try self.createEmptyAtom(local_sym_index, @sizeOf(u64), 3); - try atom.relocs.append(self.base.allocator, .{ + + const atom = try MachO.createEmptyAtom(gpa, sym_index, @sizeOf(u64), 3); + try atom.relocs.append(gpa, .{ .offset = 0, .target = target, .addend = 0, @@ -2433,35 +2277,59 @@ pub fn createGotAtom(self: *MachO, target: Atom.Relocation.Target) !*Atom { else => unreachable, }, }); - switch (target) { - .local => { - try atom.rebases.append(self.base.allocator, 0); - }, - .global => |n_strx| { - try atom.bindings.append(self.base.allocator, .{ - .n_strx = n_strx, - .offset = 0, - }); - }, + + const target_sym = self.getSymbol(target); + if (target_sym.undf()) { + const global_index = @intCast(u32, self.globals.getIndex(self.getSymbolName(target)).?); + try atom.bindings.append(gpa, .{ + .global_index = global_index, + .offset = 0, + }); + } else { + try atom.rebases.append(gpa, 0); } + + try self.managed_atoms.append(gpa, atom); + try self.atom_by_index_table.putNoClobber(gpa, sym_index, atom); + + try self.allocateAtomCommon(atom, .{ + .seg = self.data_const_segment_cmd_index.?, + .sect = self.got_section_index.?, + }); + return atom; } -pub fn createTlvPtrAtom(self: *MachO, target: Atom.Relocation.Target) !*Atom { - const local_sym_index = @intCast(u32, self.locals.items.len); - try self.locals.append(self.base.allocator, .{ +pub fn createTlvPtrAtom(self: *MachO, target: SymbolWithLoc) !*Atom { + const gpa = self.base.allocator; + const sym_index = @intCast(u32, self.locals.items.len); + try self.locals.append(gpa, .{ .n_strx = 0, .n_type = macho.N_SECT, .n_sect = 0, .n_desc = 0, .n_value = 0, }); - const atom = try self.createEmptyAtom(local_sym_index, @sizeOf(u64), 3); - assert(target == .global); - try atom.bindings.append(self.base.allocator, .{ - .n_strx = target.global, + + const atom = try MachO.createEmptyAtom(gpa, sym_index, @sizeOf(u64), 3); + const target_sym = self.getSymbol(target); + assert(target_sym.undf()); + const global_index = @intCast(u32, self.globals.getIndex(self.getSymbolName(target)).?); + try atom.bindings.append(gpa, .{ + .global_index = global_index, .offset = 0, }); + + try self.managed_atoms.append(gpa, atom); + try self.atom_by_index_table.putNoClobber(gpa, sym_index, atom); + + const match = (try self.getMatchingSection(.{ + .segname = makeStaticString("__DATA"), + .sectname = makeStaticString("__thread_ptrs"), + .flags = macho.S_THREAD_LOCAL_VARIABLE_POINTERS, + })).?; + try self.allocateAtomCommon(atom, match); + return atom; } @@ -2469,34 +2337,32 @@ fn createDyldPrivateAtom(self: *MachO) !void { if (self.dyld_stub_binder_index == null) return; if (self.dyld_private_atom != null) return; - const local_sym_index = @intCast(u32, self.locals.items.len); - const sym = try self.locals.addOne(self.base.allocator); - sym.* = .{ + const gpa = self.base.allocator; + const sym_index = @intCast(u32, self.locals.items.len); + try self.locals.append(gpa, .{ .n_strx = 0, .n_type = macho.N_SECT, .n_sect = 0, .n_desc = 0, .n_value = 0, - }; - const atom = try self.createEmptyAtom(local_sym_index, @sizeOf(u64), 3); + }); + const atom = try MachO.createEmptyAtom(gpa, sym_index, @sizeOf(u64), 3); self.dyld_private_atom = atom; - const match = MatchingSection{ + + try self.allocateAtomCommon(atom, .{ .seg = self.data_segment_cmd_index.?, .sect = self.data_section_index.?, - }; - if (self.needs_prealloc) { - const vaddr = try self.allocateAtom(atom, @sizeOf(u64), 8, match); - log.debug("allocated {s} atom at 0x{x}", .{ self.getString(sym.n_strx), vaddr }); - sym.n_value = vaddr; - } else try self.addAtomToSection(atom, match); + }); - sym.n_sect = @intCast(u8, self.section_ordinals.getIndex(match).? + 1); + try self.managed_atoms.append(gpa, atom); + try self.atom_by_index_table.putNoClobber(gpa, sym_index, atom); } fn createStubHelperPreambleAtom(self: *MachO) !void { if (self.dyld_stub_binder_index == null) return; if (self.stub_helper_preamble_atom != null) return; + const gpa = self.base.allocator; const arch = self.base.options.target.cpu.arch; const size: u64 = switch (arch) { .x86_64 => 15, @@ -2508,17 +2374,16 @@ fn createStubHelperPreambleAtom(self: *MachO) !void { .aarch64 => 2, else => unreachable, }; - const local_sym_index = @intCast(u32, self.locals.items.len); - const sym = try self.locals.addOne(self.base.allocator); - sym.* = .{ + const sym_index = @intCast(u32, self.locals.items.len); + try self.locals.append(gpa, .{ .n_strx = 0, .n_type = macho.N_SECT, .n_sect = 0, .n_desc = 0, .n_value = 0, - }; - const atom = try self.createEmptyAtom(local_sym_index, size, alignment); - const dyld_private_sym_index = self.dyld_private_atom.?.local_sym_index; + }); + const atom = try MachO.createEmptyAtom(gpa, sym_index, size, alignment); + const dyld_private_sym_index = self.dyld_private_atom.?.sym_index; switch (arch) { .x86_64 => { try atom.relocs.ensureUnusedCapacity(self.base.allocator, 2); @@ -2528,7 +2393,7 @@ fn createStubHelperPreambleAtom(self: *MachO) !void { atom.code.items[2] = 0x1d; atom.relocs.appendAssumeCapacity(.{ .offset = 3, - .target = .{ .local = dyld_private_sym_index }, + .target = .{ .sym_index = dyld_private_sym_index, .file = null }, .addend = 0, .subtractor = null, .pcrel = true, @@ -2543,7 +2408,7 @@ fn createStubHelperPreambleAtom(self: *MachO) !void { atom.code.items[10] = 0x25; atom.relocs.appendAssumeCapacity(.{ .offset = 11, - .target = .{ .global = self.undefs.items[self.dyld_stub_binder_index.?].n_strx }, + .target = .{ .sym_index = self.dyld_stub_binder_index.?, .file = null }, .addend = 0, .subtractor = null, .pcrel = true, @@ -2557,7 +2422,7 @@ fn createStubHelperPreambleAtom(self: *MachO) !void { mem.writeIntLittle(u32, atom.code.items[0..][0..4], aarch64.Instruction.adrp(.x17, 0).toU32()); atom.relocs.appendAssumeCapacity(.{ .offset = 0, - .target = .{ .local = dyld_private_sym_index }, + .target = .{ .sym_index = dyld_private_sym_index, .file = null }, .addend = 0, .subtractor = null, .pcrel = true, @@ -2568,7 +2433,7 @@ fn createStubHelperPreambleAtom(self: *MachO) !void { mem.writeIntLittle(u32, atom.code.items[4..][0..4], aarch64.Instruction.add(.x17, .x17, 0, false).toU32()); atom.relocs.appendAssumeCapacity(.{ .offset = 4, - .target = .{ .local = dyld_private_sym_index }, + .target = .{ .sym_index = dyld_private_sym_index, .file = null }, .addend = 0, .subtractor = null, .pcrel = false, @@ -2586,7 +2451,7 @@ fn createStubHelperPreambleAtom(self: *MachO) !void { mem.writeIntLittle(u32, atom.code.items[12..][0..4], aarch64.Instruction.adrp(.x16, 0).toU32()); atom.relocs.appendAssumeCapacity(.{ .offset = 12, - .target = .{ .global = self.undefs.items[self.dyld_stub_binder_index.?].n_strx }, + .target = .{ .sym_index = self.dyld_stub_binder_index.?, .file = null }, .addend = 0, .subtractor = null, .pcrel = true, @@ -2601,7 +2466,7 @@ fn createStubHelperPreambleAtom(self: *MachO) !void { ).toU32()); atom.relocs.appendAssumeCapacity(.{ .offset = 16, - .target = .{ .global = self.undefs.items[self.dyld_stub_binder_index.?].n_strx }, + .target = .{ .sym_index = self.dyld_stub_binder_index.?, .file = null }, .addend = 0, .subtractor = null, .pcrel = false, @@ -2614,22 +2479,18 @@ fn createStubHelperPreambleAtom(self: *MachO) !void { else => unreachable, } self.stub_helper_preamble_atom = atom; - const match = MatchingSection{ + + try self.allocateAtomCommon(atom, .{ .seg = self.text_segment_cmd_index.?, .sect = self.stub_helper_section_index.?, - }; + }); - if (self.needs_prealloc) { - const alignment_pow_2 = try math.powi(u32, 2, atom.alignment); - const vaddr = try self.allocateAtom(atom, atom.size, alignment_pow_2, match); - log.debug("allocated {s} atom at 0x{x}", .{ self.getString(sym.n_strx), vaddr }); - sym.n_value = vaddr; - } else try self.addAtomToSection(atom, match); - - sym.n_sect = @intCast(u8, self.section_ordinals.getIndex(match).? + 1); + try self.managed_atoms.append(gpa, atom); + try self.atom_by_index_table.putNoClobber(gpa, sym_index, atom); } pub fn createStubHelperAtom(self: *MachO) !*Atom { + const gpa = self.base.allocator; const arch = self.base.options.target.cpu.arch; const stub_size: u4 = switch (arch) { .x86_64 => 10, @@ -2641,16 +2502,16 @@ pub fn createStubHelperAtom(self: *MachO) !*Atom { .aarch64 => 2, else => unreachable, }; - const local_sym_index = @intCast(u32, self.locals.items.len); - try self.locals.append(self.base.allocator, .{ + const sym_index = @intCast(u32, self.locals.items.len); + try self.locals.append(gpa, .{ .n_strx = 0, .n_type = macho.N_SECT, .n_sect = 0, .n_desc = 0, .n_value = 0, }); - const atom = try self.createEmptyAtom(local_sym_index, stub_size, alignment); - try atom.relocs.ensureTotalCapacity(self.base.allocator, 1); + const atom = try MachO.createEmptyAtom(gpa, sym_index, stub_size, alignment); + try atom.relocs.ensureTotalCapacity(gpa, 1); switch (arch) { .x86_64 => { @@ -2661,7 +2522,7 @@ pub fn createStubHelperAtom(self: *MachO) !*Atom { atom.code.items[5] = 0xe9; atom.relocs.appendAssumeCapacity(.{ .offset = 6, - .target = .{ .local = self.stub_helper_preamble_atom.?.local_sym_index }, + .target = .{ .sym_index = self.stub_helper_preamble_atom.?.sym_index, .file = null }, .addend = 0, .subtractor = null, .pcrel = true, @@ -2683,7 +2544,7 @@ pub fn createStubHelperAtom(self: *MachO) !*Atom { mem.writeIntLittle(u32, atom.code.items[4..8], aarch64.Instruction.b(0).toU32()); atom.relocs.appendAssumeCapacity(.{ .offset = 4, - .target = .{ .local = self.stub_helper_preamble_atom.?.local_sym_index }, + .target = .{ .sym_index = self.stub_helper_preamble_atom.?.sym_index, .file = null }, .addend = 0, .subtractor = null, .pcrel = true, @@ -2695,22 +2556,32 @@ pub fn createStubHelperAtom(self: *MachO) !*Atom { else => unreachable, } + try self.managed_atoms.append(gpa, atom); + try self.atom_by_index_table.putNoClobber(gpa, sym_index, atom); + + try self.allocateAtomCommon(atom, .{ + .seg = self.text_segment_cmd_index.?, + .sect = self.stub_helper_section_index.?, + }); + return atom; } -pub fn createLazyPointerAtom(self: *MachO, stub_sym_index: u32, n_strx: u32) !*Atom { - const local_sym_index = @intCast(u32, self.locals.items.len); - try self.locals.append(self.base.allocator, .{ +pub fn createLazyPointerAtom(self: *MachO, stub_sym_index: u32, target: SymbolWithLoc) !*Atom { + const gpa = self.base.allocator; + const sym_index = @intCast(u32, self.locals.items.len); + const global_index = @intCast(u32, self.globals.getIndex(self.getSymbolName(target)).?); + try self.locals.append(gpa, .{ .n_strx = 0, .n_type = macho.N_SECT, .n_sect = 0, .n_desc = 0, .n_value = 0, }); - const atom = try self.createEmptyAtom(local_sym_index, @sizeOf(u64), 3); - try atom.relocs.append(self.base.allocator, .{ + const atom = try MachO.createEmptyAtom(gpa, sym_index, @sizeOf(u64), 3); + try atom.relocs.append(gpa, .{ .offset = 0, - .target = .{ .local = stub_sym_index }, + .target = .{ .sym_index = stub_sym_index, .file = null }, .addend = 0, .subtractor = null, .pcrel = false, @@ -2721,15 +2592,25 @@ pub fn createLazyPointerAtom(self: *MachO, stub_sym_index: u32, n_strx: u32) !*A else => unreachable, }, }); - try atom.rebases.append(self.base.allocator, 0); - try atom.lazy_bindings.append(self.base.allocator, .{ - .n_strx = n_strx, + try atom.rebases.append(gpa, 0); + try atom.lazy_bindings.append(gpa, .{ + .global_index = global_index, .offset = 0, }); + + try self.managed_atoms.append(gpa, atom); + try self.atom_by_index_table.putNoClobber(gpa, sym_index, atom); + + try self.allocateAtomCommon(atom, .{ + .seg = self.data_segment_cmd_index.?, + .sect = self.la_symbol_ptr_section_index.?, + }); + return atom; } pub fn createStubAtom(self: *MachO, laptr_sym_index: u32) !*Atom { + const gpa = self.base.allocator; const arch = self.base.options.target.cpu.arch; const alignment: u2 = switch (arch) { .x86_64 => 0, @@ -2741,23 +2622,23 @@ pub fn createStubAtom(self: *MachO, laptr_sym_index: u32) !*Atom { .aarch64 => 3 * @sizeOf(u32), else => unreachable, // unhandled architecture type }; - const local_sym_index = @intCast(u32, self.locals.items.len); - try self.locals.append(self.base.allocator, .{ + const sym_index = @intCast(u32, self.locals.items.len); + try self.locals.append(gpa, .{ .n_strx = 0, .n_type = macho.N_SECT, .n_sect = 0, .n_desc = 0, .n_value = 0, }); - const atom = try self.createEmptyAtom(local_sym_index, stub_size, alignment); + const atom = try MachO.createEmptyAtom(gpa, sym_index, stub_size, alignment); switch (arch) { .x86_64 => { // jmp atom.code.items[0] = 0xff; atom.code.items[1] = 0x25; - try atom.relocs.append(self.base.allocator, .{ + try atom.relocs.append(gpa, .{ .offset = 2, - .target = .{ .local = laptr_sym_index }, + .target = .{ .sym_index = laptr_sym_index, .file = null }, .addend = 0, .subtractor = null, .pcrel = true, @@ -2766,12 +2647,12 @@ pub fn createStubAtom(self: *MachO, laptr_sym_index: u32) !*Atom { }); }, .aarch64 => { - try atom.relocs.ensureTotalCapacity(self.base.allocator, 2); + try atom.relocs.ensureTotalCapacity(gpa, 2); // adrp x16, pages mem.writeIntLittle(u32, atom.code.items[0..4], aarch64.Instruction.adrp(.x16, 0).toU32()); atom.relocs.appendAssumeCapacity(.{ .offset = 0, - .target = .{ .local = laptr_sym_index }, + .target = .{ .sym_index = laptr_sym_index, .file = null }, .addend = 0, .subtractor = null, .pcrel = true, @@ -2786,7 +2667,7 @@ pub fn createStubAtom(self: *MachO, laptr_sym_index: u32) !*Atom { ).toU32()); atom.relocs.appendAssumeCapacity(.{ .offset = 4, - .target = .{ .local = laptr_sym_index }, + .target = .{ .sym_index = laptr_sym_index, .file = null }, .addend = 0, .subtractor = null, .pcrel = false, @@ -2798,101 +2679,121 @@ pub fn createStubAtom(self: *MachO, laptr_sym_index: u32) !*Atom { }, else => unreachable, } + + try self.managed_atoms.append(gpa, atom); + try self.atom_by_index_table.putNoClobber(gpa, sym_index, atom); + + try self.allocateAtomCommon(atom, .{ + .seg = self.text_segment_cmd_index.?, + .sect = self.stubs_section_index.?, + }); + return atom; } fn createTentativeDefAtoms(self: *MachO) !void { - if (self.tentatives.count() == 0) return; - // Convert any tentative definition into a regular symbol and allocate - // text blocks for each tentative definition. - while (self.tentatives.popOrNull()) |entry| { + const gpa = self.base.allocator; + + for (self.globals.values()) |global| { + const sym = self.getSymbolPtr(global); + if (!sym.tentative()) continue; + + log.debug("creating tentative definition for ATOM(%{d}, '{s}') in object({d})", .{ + global.sym_index, self.getSymbolName(global), global.file, + }); + + // Convert any tentative definition into a regular symbol and allocate + // text blocks for each tentative definition. const match = MatchingSection{ .seg = self.data_segment_cmd_index.?, .sect = self.bss_section_index.?, }; - _ = try self.section_ordinals.getOrPut(self.base.allocator, match); + _ = try self.section_ordinals.getOrPut(gpa, match); - const global_sym = &self.globals.items[entry.key]; - const size = global_sym.n_value; - const alignment = (global_sym.n_desc >> 8) & 0x0f; + const size = sym.n_value; + const alignment = (sym.n_desc >> 8) & 0x0f; - global_sym.n_value = 0; - global_sym.n_desc = 0; - global_sym.n_sect = @intCast(u8, self.section_ordinals.getIndex(match).? + 1); - - const local_sym_index = @intCast(u32, self.locals.items.len); - const local_sym = try self.locals.addOne(self.base.allocator); - local_sym.* = .{ - .n_strx = global_sym.n_strx, - .n_type = macho.N_SECT, - .n_sect = global_sym.n_sect, + sym.* = .{ + .n_strx = sym.n_strx, + .n_type = macho.N_SECT | macho.N_EXT, + .n_sect = 0, .n_desc = 0, .n_value = 0, }; - const resolv = self.symbol_resolver.getPtr(local_sym.n_strx) orelse unreachable; - resolv.local_sym_index = local_sym_index; + const atom = try MachO.createEmptyAtom(gpa, global.sym_index, size, alignment); + atom.file = global.file; - const atom = try self.createEmptyAtom(local_sym_index, size, alignment); + try self.allocateAtomCommon(atom, match); - if (self.needs_prealloc) { - const alignment_pow_2 = try math.powi(u32, 2, alignment); - const vaddr = try self.allocateAtom(atom, size, alignment_pow_2, match); - local_sym.n_value = vaddr; - global_sym.n_value = vaddr; - } else try self.addAtomToSection(atom, match); + if (global.file) |file| { + const object = &self.objects.items[file]; + + try atom.contained.append(gpa, .{ + .sym_index = global.sym_index, + .offset = 0, + .stab = if (object.debug_info) |_| .static else null, + }); + + try object.managed_atoms.append(gpa, atom); + try object.atom_by_index_table.putNoClobber(gpa, global.sym_index, atom); + } else { + try self.managed_atoms.append(gpa, atom); + try self.atom_by_index_table.putNoClobber(gpa, global.sym_index, atom); + } } } -fn createDsoHandleSymbol(self: *MachO) !void { - if (self.dso_handle_sym_index != null) return; +fn createMhExecuteHeaderSymbol(self: *MachO) !void { + if (self.base.options.output_mode != .Exe) return; + if (self.globals.contains("__mh_execute_header")) return; - const n_strx = self.strtab_dir.getKeyAdapted(@as([]const u8, "___dso_handle"), StringIndexAdapter{ - .bytes = &self.strtab, - }) orelse return; - - const resolv = self.symbol_resolver.getPtr(n_strx) orelse return; - if (resolv.where != .undef) return; - - const undef = &self.undefs.items[resolv.where_index]; - const local_sym_index = @intCast(u32, self.locals.items.len); - var nlist = macho.nlist_64{ - .n_strx = undef.n_strx, - .n_type = macho.N_SECT, + const gpa = self.base.allocator; + const name = try gpa.dupe(u8, "__mh_execute_header"); + const n_strx = try self.strtab.insert(gpa, name); + const sym_index = @intCast(u32, self.locals.items.len); + try self.locals.append(gpa, .{ + .n_strx = n_strx, + .n_type = macho.N_SECT | macho.N_EXT, .n_sect = 0, .n_desc = 0, .n_value = 0, - }; - try self.locals.append(self.base.allocator, nlist); - const global_sym_index = @intCast(u32, self.globals.items.len); - nlist.n_type |= macho.N_EXT; - nlist.n_desc = macho.N_WEAK_DEF; - try self.globals.append(self.base.allocator, nlist); - self.dso_handle_sym_index = local_sym_index; - - assert(self.unresolved.swapRemove(resolv.where_index)); - - undef.* = .{ - .n_strx = 0, - .n_type = macho.N_UNDF, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }; - resolv.* = .{ - .where = .global, - .where_index = global_sym_index, - .local_sym_index = local_sym_index, - }; + }); + try self.globals.putNoClobber(gpa, name, .{ + .sym_index = sym_index, + .file = null, + }); } -fn resolveSymbolsInObject(self: *MachO, object_id: u16) !void { - const object = &self.objects.items[object_id]; +fn createDsoHandleSymbol(self: *MachO) !void { + const global = self.globals.getPtr("___dso_handle") orelse return; + const sym = self.getSymbolPtr(global.*); + if (!sym.undf()) return; + + const gpa = self.base.allocator; + const n_strx = try self.strtab.insert(gpa, "___dso_handle"); + const sym_index = @intCast(u32, self.locals.items.len); + try self.locals.append(gpa, .{ + .n_strx = n_strx, + .n_type = macho.N_SECT | macho.N_EXT, + .n_sect = 0, + .n_desc = macho.N_WEAK_DEF, + .n_value = 0, + }); + global.* = .{ + .sym_index = sym_index, + .file = null, + }; + _ = self.unresolved.swapRemove(@intCast(u32, self.globals.getIndex("___dso_handle").?)); +} + +fn resolveSymbolsInObject(self: *MachO, object: *Object, object_id: u16) !void { + const gpa = self.base.allocator; log.debug("resolving symbols in '{s}'", .{object.name}); - for (object.symtab) |sym, id| { - const sym_id = @intCast(u32, id); + for (object.symtab.items) |sym, index| { + const sym_index = @intCast(u32, index); const sym_name = object.getString(sym.n_strx); if (sym.stab()) { @@ -2916,170 +2817,81 @@ fn resolveSymbolsInObject(self: *MachO, object_id: u16) !void { return error.UnhandledSymbolType; } - if (sym.sect()) { - // Defined symbol regardless of scope lands in the locals symbol table. - const local_sym_index = @intCast(u32, self.locals.items.len); - try self.locals.append(self.base.allocator, .{ - .n_strx = if (symbolIsTemp(sym, sym_name)) 0 else try self.makeString(sym_name), - .n_type = macho.N_SECT, - .n_sect = 0, - .n_desc = 0, - .n_value = sym.n_value, + if (sym.sect() and !sym.ext()) { + log.debug("symbol '{s}' local to object {s}; skipping...", .{ + sym_name, + object.name, }); - try object.symbol_mapping.putNoClobber(self.base.allocator, sym_id, local_sym_index); - try object.reverse_symbol_mapping.putNoClobber(self.base.allocator, local_sym_index, sym_id); - - // If the symbol's scope is not local aka translation unit, then we need work out - // if we should save the symbol as a global, or potentially flag the error. - if (!sym.ext()) continue; - - const n_strx = try self.makeString(sym_name); - const local = self.locals.items[local_sym_index]; - const resolv = self.symbol_resolver.getPtr(n_strx) orelse { - const global_sym_index = @intCast(u32, self.globals.items.len); - try self.globals.append(self.base.allocator, .{ - .n_strx = n_strx, - .n_type = sym.n_type, - .n_sect = 0, - .n_desc = sym.n_desc, - .n_value = sym.n_value, - }); - try self.symbol_resolver.putNoClobber(self.base.allocator, n_strx, .{ - .where = .global, - .where_index = global_sym_index, - .local_sym_index = local_sym_index, - .file = object_id, - }); - continue; - }; - - switch (resolv.where) { - .global => { - const global = &self.globals.items[resolv.where_index]; - - if (global.tentative()) { - assert(self.tentatives.swapRemove(resolv.where_index)); - } else if (!(sym.weakDef() or sym.pext()) and !(global.weakDef() or global.pext())) { - log.err("symbol '{s}' defined multiple times", .{sym_name}); - if (resolv.file) |file| { - log.err(" first definition in '{s}'", .{self.objects.items[file].name}); - } - log.err(" next definition in '{s}'", .{object.name}); - return error.MultipleSymbolDefinitions; - } else if (sym.weakDef() or sym.pext()) continue; // Current symbol is weak, so skip it. - - // Otherwise, update the resolver and the global symbol. - global.n_type = sym.n_type; - resolv.local_sym_index = local_sym_index; - resolv.file = object_id; - - continue; - }, - .undef => { - const undef = &self.undefs.items[resolv.where_index]; - undef.* = .{ - .n_strx = 0, - .n_type = macho.N_UNDF, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }; - assert(self.unresolved.swapRemove(resolv.where_index)); - }, - } - - const global_sym_index = @intCast(u32, self.globals.items.len); - try self.globals.append(self.base.allocator, .{ - .n_strx = local.n_strx, - .n_type = sym.n_type, - .n_sect = 0, - .n_desc = sym.n_desc, - .n_value = sym.n_value, - }); - resolv.* = .{ - .where = .global, - .where_index = global_sym_index, - .local_sym_index = local_sym_index, - .file = object_id, - }; - } else if (sym.tentative()) { - // Symbol is a tentative definition. - const n_strx = try self.makeString(sym_name); - const resolv = self.symbol_resolver.getPtr(n_strx) orelse { - const global_sym_index = @intCast(u32, self.globals.items.len); - try self.globals.append(self.base.allocator, .{ - .n_strx = try self.makeString(sym_name), - .n_type = sym.n_type, - .n_sect = 0, - .n_desc = sym.n_desc, - .n_value = sym.n_value, - }); - try self.symbol_resolver.putNoClobber(self.base.allocator, n_strx, .{ - .where = .global, - .where_index = global_sym_index, - .file = object_id, - }); - _ = try self.tentatives.getOrPut(self.base.allocator, global_sym_index); - continue; - }; - - switch (resolv.where) { - .global => { - const global = &self.globals.items[resolv.where_index]; - if (!global.tentative()) continue; - if (global.n_value >= sym.n_value) continue; - - global.n_desc = sym.n_desc; - global.n_value = sym.n_value; - resolv.file = object_id; - }, - .undef => { - const undef = &self.undefs.items[resolv.where_index]; - const global_sym_index = @intCast(u32, self.globals.items.len); - try self.globals.append(self.base.allocator, .{ - .n_strx = undef.n_strx, - .n_type = sym.n_type, - .n_sect = 0, - .n_desc = sym.n_desc, - .n_value = sym.n_value, - }); - _ = try self.tentatives.getOrPut(self.base.allocator, global_sym_index); - assert(self.unresolved.swapRemove(resolv.where_index)); - - resolv.* = .{ - .where = .global, - .where_index = global_sym_index, - .file = object_id, - }; - undef.* = .{ - .n_strx = 0, - .n_type = macho.N_UNDF, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }; - }, - } - } else { - // Symbol is undefined. - const n_strx = try self.makeString(sym_name); - if (self.symbol_resolver.contains(n_strx)) continue; - - const undef_sym_index = @intCast(u32, self.undefs.items.len); - try self.undefs.append(self.base.allocator, .{ - .n_strx = try self.makeString(sym_name), - .n_type = macho.N_UNDF, - .n_sect = 0, - .n_desc = sym.n_desc, - .n_value = 0, - }); - try self.symbol_resolver.putNoClobber(self.base.allocator, n_strx, .{ - .where = .undef, - .where_index = undef_sym_index, - .file = object_id, - }); - try self.unresolved.putNoClobber(self.base.allocator, undef_sym_index, .none); + continue; } + + const name = try gpa.dupe(u8, sym_name); + const global_index = @intCast(u32, self.globals.values().len); + const gop = try self.globals.getOrPut(gpa, name); + defer if (gop.found_existing) gpa.free(name); + + if (!gop.found_existing) { + gop.value_ptr.* = .{ + .sym_index = sym_index, + .file = object_id, + }; + if (sym.undf() and !sym.tentative()) { + try self.unresolved.putNoClobber(gpa, global_index, {}); + } + continue; + } + + const global = gop.value_ptr.*; + const global_sym = self.getSymbol(global); + + // Cases to consider: sym vs global_sym + // 1. strong(sym) and strong(global_sym) => error + // 2. strong(sym) and weak(global_sym) => sym + // 3. strong(sym) and tentative(global_sym) => sym + // 4. strong(sym) and undf(global_sym) => sym + // 5. weak(sym) and strong(global_sym) => global_sym + // 6. weak(sym) and tentative(global_sym) => sym + // 7. weak(sym) and undf(global_sym) => sym + // 8. tentative(sym) and strong(global_sym) => global_sym + // 9. tentative(sym) and weak(global_sym) => global_sym + // 10. tentative(sym) and tentative(global_sym) => pick larger + // 11. tentative(sym) and undf(global_sym) => sym + // 12. undf(sym) and * => global_sym + // + // Reduces to: + // 1. strong(sym) and strong(global_sym) => error + // 2. * and strong(global_sym) => global_sym + // 3. weak(sym) and weak(global_sym) => global_sym + // 4. tentative(sym) and tentative(global_sym) => pick larger + // 5. undf(sym) and * => global_sym + // 6. else => sym + + const sym_is_strong = sym.sect() and !(sym.weakDef() or sym.pext()); + const global_is_strong = global_sym.sect() and !(global_sym.weakDef() or global_sym.pext()); + const sym_is_weak = sym.sect() and (sym.weakDef() or sym.pext()); + const global_is_weak = global_sym.sect() and (global_sym.weakDef() or global_sym.pext()); + + if (sym_is_strong and global_is_strong) { + log.err("symbol '{s}' defined multiple times", .{sym_name}); + if (global.file) |file| { + log.err(" first definition in '{s}'", .{self.objects.items[file].name}); + } + log.err(" next definition in '{s}'", .{object.name}); + return error.MultipleSymbolDefinitions; + } + if (global_is_strong) continue; + if (sym_is_weak and global_is_weak) continue; + if (sym.tentative() and global_sym.tentative()) { + if (global_sym.n_value >= sym.n_value) continue; + } + if (sym.undf() and !sym.tentative()) continue; + + _ = self.unresolved.swapRemove(@intCast(u32, self.globals.getIndex(name).?)); + + gop.value_ptr.* = .{ + .sym_index = sym_index, + .file = object_id, + }; } } @@ -3088,8 +2900,8 @@ fn resolveSymbolsInArchives(self: *MachO) !void { var next_sym: usize = 0; loop: while (next_sym < self.unresolved.count()) { - const sym = self.undefs.items[self.unresolved.keys()[next_sym]]; - const sym_name = self.getString(sym.n_strx); + const global = self.globals.values()[self.unresolved.keys()[next_sym]]; + const sym_name = self.getSymbolName(global); for (self.archives.items) |archive| { // Check if the entry exists in a static archive. @@ -3102,7 +2914,7 @@ fn resolveSymbolsInArchives(self: *MachO) !void { const object_id = @intCast(u16, self.objects.items.len); const object = try self.objects.addOne(self.base.allocator); object.* = try archive.parseObject(self.base.allocator, self.base.options.target, offsets.items[0]); - try self.resolveSymbolsInObject(object_id); + try self.resolveSymbolsInObject(object, object_id); continue :loop; } @@ -3116,8 +2928,10 @@ fn resolveSymbolsInDylibs(self: *MachO) !void { var next_sym: usize = 0; loop: while (next_sym < self.unresolved.count()) { - const sym = self.undefs.items[self.unresolved.keys()[next_sym]]; - const sym_name = self.getString(sym.n_strx); + const global_index = self.unresolved.keys()[next_sym]; + const global = self.globals.values()[global_index]; + const sym = self.getSymbolPtr(global); + const sym_name = self.getSymbolName(global); for (self.dylibs.items) |dylib, id| { if (!dylib.symbols.contains(sym_name)) continue; @@ -3129,69 +2943,14 @@ fn resolveSymbolsInDylibs(self: *MachO) !void { } const ordinal = self.referenced_dylibs.getIndex(dylib_id) orelse unreachable; - const resolv = self.symbol_resolver.getPtr(sym.n_strx) orelse unreachable; - const undef = &self.undefs.items[resolv.where_index]; - undef.n_type |= macho.N_EXT; - undef.n_desc = @intCast(u16, ordinal + 1) * macho.N_SYMBOL_RESOLVER; + sym.n_type |= macho.N_EXT; + sym.n_desc = @intCast(u16, ordinal + 1) * macho.N_SYMBOL_RESOLVER; if (dylib.weak) { - undef.n_desc |= macho.N_WEAK_REF; + sym.n_desc |= macho.N_WEAK_REF; } - if (self.unresolved.fetchSwapRemove(resolv.where_index)) |entry| outer_blk: { - switch (entry.value) { - .none => {}, - .got => return error.TODOGotHint, - .stub => { - if (self.stubs_table.contains(sym.n_strx)) break :outer_blk; - const stub_helper_atom = blk: { - const match = MatchingSection{ - .seg = self.text_segment_cmd_index.?, - .sect = self.stub_helper_section_index.?, - }; - const atom = try self.createStubHelperAtom(); - const atom_sym = &self.locals.items[atom.local_sym_index]; - const alignment = try math.powi(u32, 2, atom.alignment); - const vaddr = try self.allocateAtom(atom, atom.size, alignment, match); - atom_sym.n_value = vaddr; - atom_sym.n_sect = @intCast(u8, self.section_ordinals.getIndex(match).? + 1); - break :blk atom; - }; - const laptr_atom = blk: { - const match = MatchingSection{ - .seg = self.data_segment_cmd_index.?, - .sect = self.la_symbol_ptr_section_index.?, - }; - const atom = try self.createLazyPointerAtom( - stub_helper_atom.local_sym_index, - sym.n_strx, - ); - const atom_sym = &self.locals.items[atom.local_sym_index]; - const alignment = try math.powi(u32, 2, atom.alignment); - const vaddr = try self.allocateAtom(atom, atom.size, alignment, match); - atom_sym.n_value = vaddr; - atom_sym.n_sect = @intCast(u8, self.section_ordinals.getIndex(match).? + 1); - break :blk atom; - }; - const stub_atom = blk: { - const match = MatchingSection{ - .seg = self.text_segment_cmd_index.?, - .sect = self.stubs_section_index.?, - }; - const atom = try self.createStubAtom(laptr_atom.local_sym_index); - const atom_sym = &self.locals.items[atom.local_sym_index]; - const alignment = try math.powi(u32, 2, atom.alignment); - const vaddr = try self.allocateAtom(atom, atom.size, alignment, match); - atom_sym.n_value = vaddr; - atom_sym.n_sect = @intCast(u8, self.section_ordinals.getIndex(match).? + 1); - break :blk atom; - }; - const stub_index = @intCast(u32, self.stubs.items.len); - try self.stubs.append(self.base.allocator, stub_atom); - try self.stubs_table.putNoClobber(self.base.allocator, sym.n_strx, stub_index); - }, - } - } + assert(self.unresolved.swapRemove(global_index)); continue :loop; } @@ -3200,39 +2959,46 @@ fn resolveSymbolsInDylibs(self: *MachO) !void { } } -fn createMhExecuteHeaderSymbol(self: *MachO) !void { - if (self.base.options.output_mode != .Exe) return; - if (self.mh_execute_header_sym_index != null) return; +fn resolveSymbolsAtLoading(self: *MachO) !void { + const is_lib = self.base.options.output_mode == .Lib; + const is_dyn_lib = self.base.options.link_mode == .Dynamic and is_lib; + const allow_undef = is_dyn_lib and (self.base.options.allow_shlib_undefined orelse false); - const n_strx = try self.makeString("__mh_execute_header"); - const local_sym_index = @intCast(u32, self.locals.items.len); - var nlist = macho.nlist_64{ - .n_strx = n_strx, - .n_type = macho.N_SECT, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }; - try self.locals.append(self.base.allocator, nlist); - self.mh_execute_header_sym_index = local_sym_index; + var next_sym: usize = 0; + while (next_sym < self.unresolved.count()) { + const global_index = self.unresolved.keys()[next_sym]; + const global = self.globals.values()[global_index]; + const sym = self.getSymbolPtr(global); + const sym_name = self.getSymbolName(global); - if (self.symbol_resolver.getPtr(n_strx)) |resolv| { - const global = &self.globals.items[resolv.where_index]; - if (!(global.weakDef() or !global.pext())) { - log.err("symbol '__mh_execute_header' defined multiple times", .{}); - return error.MultipleSymbolDefinitions; + if (sym.discarded()) { + sym.* = .{ + .n_strx = 0, + .n_type = macho.N_UNDF, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }; + _ = self.unresolved.swapRemove(global_index); + continue; + } else if (allow_undef) { + const n_desc = @bitCast( + u16, + macho.BIND_SPECIAL_DYLIB_FLAT_LOOKUP * @intCast(i16, macho.N_SYMBOL_RESOLVER), + ); + // TODO allow_shlib_undefined is an ELF flag so figure out macOS specific flags too. + sym.n_type = macho.N_EXT; + sym.n_desc = n_desc; + _ = self.unresolved.swapRemove(global_index); + continue; } - resolv.local_sym_index = local_sym_index; - } else { - const global_sym_index = @intCast(u32, self.globals.items.len); - nlist.n_type |= macho.N_EXT; - try self.globals.append(self.base.allocator, nlist); - try self.symbol_resolver.putNoClobber(self.base.allocator, n_strx, .{ - .where = .global, - .where_index = global_sym_index, - .local_sym_index = local_sym_index, - .file = null, - }); + + log.err("undefined reference to symbol '{s}'", .{sym_name}); + if (global.file) |file| { + log.err(" first referenced in '{s}'", .{self.objects.items[file].name}); + } + + next_sym += 1; } } @@ -3240,21 +3006,20 @@ fn resolveDyldStubBinder(self: *MachO) !void { if (self.dyld_stub_binder_index != null) return; if (self.unresolved.count() == 0) return; // no need for a stub binder if we don't have any imports - const n_strx = try self.makeString("dyld_stub_binder"); - const sym_index = @intCast(u32, self.undefs.items.len); - try self.undefs.append(self.base.allocator, .{ + const gpa = self.base.allocator; + const n_strx = try self.strtab.insert(gpa, "dyld_stub_binder"); + const sym_index = @intCast(u32, self.locals.items.len); + try self.locals.append(gpa, .{ .n_strx = n_strx, .n_type = macho.N_UNDF, .n_sect = 0, .n_desc = 0, .n_value = 0, }); - try self.symbol_resolver.putNoClobber(self.base.allocator, n_strx, .{ - .where = .undef, - .where_index = sym_index, - }); - const sym = &self.undefs.items[sym_index]; - const sym_name = self.getString(n_strx); + const sym_name = try gpa.dupe(u8, "dyld_stub_binder"); + const global = SymbolWithLoc{ .sym_index = sym_index, .file = null }; + try self.globals.putNoClobber(gpa, sym_name, global); + const sym = &self.locals.items[sym_index]; for (self.dylibs.items) |dylib, id| { if (!dylib.symbols.contains(sym_name)) continue; @@ -3279,193 +3044,9 @@ fn resolveDyldStubBinder(self: *MachO) !void { } // Add dyld_stub_binder as the final GOT entry. - const target = Atom.Relocation.Target{ .global = n_strx }; - const atom = try self.createGotAtom(target); - const got_index = @intCast(u32, self.got_entries.items.len); - try self.got_entries.append(self.base.allocator, .{ .target = target, .atom = atom }); - try self.got_entries_table.putNoClobber(self.base.allocator, target, got_index); - const match = MatchingSection{ - .seg = self.data_const_segment_cmd_index.?, - .sect = self.got_section_index.?, - }; - const atom_sym = &self.locals.items[atom.local_sym_index]; - - if (self.needs_prealloc) { - const vaddr = try self.allocateAtom(atom, @sizeOf(u64), 8, match); - log.debug("allocated {s} atom at 0x{x}", .{ self.getString(sym.n_strx), vaddr }); - atom_sym.n_value = vaddr; - } else try self.addAtomToSection(atom, match); - - atom_sym.n_sect = @intCast(u8, self.section_ordinals.getIndex(match).? + 1); -} - -fn parseObjectsIntoAtoms(self: *MachO) !void { - // TODO I need to see if I can simplify this logic, or perhaps split it into two functions: - // one for non-prealloc traditional path, and one for incremental prealloc path. - const tracy = trace(@src()); - defer tracy.end(); - - var parsed_atoms = std.AutoArrayHashMap(MatchingSection, *Atom).init(self.base.allocator); - defer parsed_atoms.deinit(); - - var first_atoms = std.AutoArrayHashMap(MatchingSection, *Atom).init(self.base.allocator); - defer first_atoms.deinit(); - - var section_metadata = std.AutoHashMap(MatchingSection, struct { - size: u64, - alignment: u32, - }).init(self.base.allocator); - defer section_metadata.deinit(); - - for (self.objects.items) |*object| { - if (object.analyzed) continue; - - try object.parseIntoAtoms(self.base.allocator, self); - - var it = object.end_atoms.iterator(); - while (it.next()) |entry| { - const match = entry.key_ptr.*; - var atom = entry.value_ptr.*; - - while (atom.prev) |prev| { - atom = prev; - } - - const first_atom = atom; - - const seg = self.load_commands.items[match.seg].segment; - const sect = seg.sections.items[match.sect]; - const metadata = try section_metadata.getOrPut(match); - if (!metadata.found_existing) { - metadata.value_ptr.* = .{ - .size = sect.size, - .alignment = sect.@"align", - }; - } - - log.debug("{s},{s}", .{ sect.segName(), sect.sectName() }); - - while (true) { - const alignment = try math.powi(u32, 2, atom.alignment); - const curr_size = metadata.value_ptr.size; - const curr_size_aligned = mem.alignForwardGeneric(u64, curr_size, alignment); - metadata.value_ptr.size = curr_size_aligned + atom.size; - metadata.value_ptr.alignment = math.max(metadata.value_ptr.alignment, atom.alignment); - - const sym = self.locals.items[atom.local_sym_index]; - log.debug(" {s}: n_value=0x{x}, size=0x{x}, alignment=0x{x}", .{ - self.getString(sym.n_strx), - sym.n_value, - atom.size, - atom.alignment, - }); - - if (atom.next) |next| { - atom = next; - } else break; - } - - if (parsed_atoms.getPtr(match)) |last| { - last.*.next = first_atom; - first_atom.prev = last.*; - last.* = first_atom; - } - _ = try parsed_atoms.put(match, atom); - - if (!first_atoms.contains(match)) { - try first_atoms.putNoClobber(match, first_atom); - } - } - - object.analyzed = true; - } - - var it = section_metadata.iterator(); - while (it.next()) |entry| { - const match = entry.key_ptr.*; - const metadata = entry.value_ptr.*; - const seg = &self.load_commands.items[match.seg].segment; - const sect = &seg.sections.items[match.sect]; - log.debug("{s},{s} => size: 0x{x}, alignment: 0x{x}", .{ - sect.segName(), - sect.sectName(), - metadata.size, - metadata.alignment, - }); - - sect.@"align" = math.max(sect.@"align", metadata.alignment); - const needed_size = @intCast(u32, metadata.size); - - if (self.needs_prealloc) { - try self.growSection(match, needed_size); - } - sect.size = needed_size; - } - - for (&[_]?u16{ - self.text_segment_cmd_index, - self.data_const_segment_cmd_index, - self.data_segment_cmd_index, - }) |maybe_seg_id| { - const seg_id = maybe_seg_id orelse continue; - const seg = self.load_commands.items[seg_id].segment; - - for (seg.sections.items) |sect, sect_id| { - const match = MatchingSection{ - .seg = seg_id, - .sect = @intCast(u16, sect_id), - }; - if (!section_metadata.contains(match)) continue; - - var base_vaddr = if (self.atoms.get(match)) |last| blk: { - const last_atom_sym = self.locals.items[last.local_sym_index]; - break :blk last_atom_sym.n_value + last.size; - } else sect.addr; - - if (self.atoms.getPtr(match)) |last| { - const first_atom = first_atoms.get(match).?; - last.*.next = first_atom; - first_atom.prev = last.*; - last.* = first_atom; - } - _ = try self.atoms.put(self.base.allocator, match, parsed_atoms.get(match).?); - - if (!self.needs_prealloc) continue; - - const n_sect = @intCast(u8, self.section_ordinals.getIndex(match).? + 1); - - var atom = first_atoms.get(match).?; - while (true) { - const alignment = try math.powi(u32, 2, atom.alignment); - base_vaddr = mem.alignForwardGeneric(u64, base_vaddr, alignment); - - const sym = &self.locals.items[atom.local_sym_index]; - sym.n_value = base_vaddr; - sym.n_sect = n_sect; - - log.debug(" {s}: start=0x{x}, end=0x{x}, size=0x{x}, alignment=0x{x}", .{ - self.getString(sym.n_strx), - base_vaddr, - base_vaddr + atom.size, - atom.size, - atom.alignment, - }); - - // Update each symbol contained within the atom - for (atom.contained.items) |sym_at_off| { - const contained_sym = &self.locals.items[sym_at_off.local_sym_index]; - contained_sym.n_value = base_vaddr + sym_at_off.offset; - contained_sym.n_sect = n_sect; - } - - base_vaddr += atom.size; - - if (atom.next) |next| { - atom = next; - } else break; - } - } - } + const got_index = try self.allocateGotEntry(global); + const got_atom = try self.createGotAtom(global); + self.got_entries.items[got_index].atom = got_atom; } fn addLoadDylibLC(self: *MachO, id: u16) !void { @@ -3503,15 +3084,11 @@ fn setEntryPoint(self: *MachO) !void { const seg = self.load_commands.items[self.text_segment_cmd_index.?].segment; const entry_name = self.base.options.entry orelse "_main"; - const n_strx = self.strtab_dir.getKeyAdapted(entry_name, StringIndexAdapter{ - .bytes = &self.strtab, - }) orelse { + const global = self.globals.get(entry_name) orelse { log.err("entrypoint '{s}' not found", .{entry_name}); return error.MissingMainEntrypoint; }; - const resolv = self.symbol_resolver.get(n_strx) orelse unreachable; - assert(resolv.where == .global); - const sym = self.globals.items[resolv.where_index]; + const sym = self.getSymbol(global); const ec = &self.load_commands.items[self.main_cmd_index.?].main; ec.entryoff = @intCast(u32, sym.n_value - seg.inner.vmaddr); ec.stacksize = self.base.options.stack_size_override orelse 0; @@ -3538,17 +3115,13 @@ pub fn deinit(self: *MachO) void { self.stubs.deinit(self.base.allocator); self.stubs_free_list.deinit(self.base.allocator); self.stubs_table.deinit(self.base.allocator); - self.strtab_dir.deinit(self.base.allocator); self.strtab.deinit(self.base.allocator); - self.undefs.deinit(self.base.allocator); self.globals.deinit(self.base.allocator); - self.globals_free_list.deinit(self.base.allocator); self.locals.deinit(self.base.allocator); self.locals_free_list.deinit(self.base.allocator); - self.symbol_resolver.deinit(self.base.allocator); self.unresolved.deinit(self.base.allocator); - self.tentatives.deinit(self.base.allocator); self.gc_roots.deinit(self.base.allocator); + self.gc_sections.deinit(self.base.allocator); for (self.objects.items) |*object| { object.deinit(self.base.allocator); @@ -3662,7 +3235,7 @@ fn freeAtom(self: *MachO, atom: *Atom, match: MatchingSection, owns_atom: bool) if (atom.prev) |prev| { prev.next = atom.next; - if (!already_have_free_list_node and prev.freeListEligible(self.*)) { + if (!already_have_free_list_node and prev.freeListEligible(self)) { // The free list is heuristics, it doesn't have to be perfect, so we can ignore // the OOM here. free_list.append(self.base.allocator, prev) catch {}; @@ -3692,9 +3265,9 @@ fn shrinkAtom(self: *MachO, atom: *Atom, new_block_size: u64, match: MatchingSec } fn growAtom(self: *MachO, atom: *Atom, new_atom_size: u64, alignment: u64, match: MatchingSection) !u64 { - const sym = self.locals.items[atom.local_sym_index]; + const sym = self.locals.items[atom.sym_index]; const align_ok = mem.alignBackwardGeneric(u64, sym.n_value, alignment) == sym.n_value; - const need_realloc = !align_ok or new_atom_size > atom.capacity(self.*); + const need_realloc = !align_ok or new_atom_size > atom.capacity(self); if (!need_realloc) return sym.n_value; return self.allocateAtom(atom, new_atom_size, alignment, match); } @@ -3725,7 +3298,7 @@ fn allocateLocalSymbol(self: *MachO) !u32 { return index; } -pub fn allocateGotEntry(self: *MachO, target: Atom.Relocation.Target) !u32 { +pub fn allocateGotEntry(self: *MachO, target: SymbolWithLoc) !u32 { try self.got_entries.ensureUnusedCapacity(self.base.allocator, 1); const index = blk: { @@ -3740,16 +3313,13 @@ pub fn allocateGotEntry(self: *MachO, target: Atom.Relocation.Target) !u32 { } }; - self.got_entries.items[index] = .{ - .target = target, - .atom = undefined, - }; + self.got_entries.items[index] = .{ .target = target, .atom = undefined }; try self.got_entries_table.putNoClobber(self.base.allocator, target, index); return index; } -pub fn allocateStubEntry(self: *MachO, n_strx: u32) !u32 { +pub fn allocateStubEntry(self: *MachO, target: SymbolWithLoc) !u32 { try self.stubs.ensureUnusedCapacity(self.base.allocator, 1); const index = blk: { @@ -3764,13 +3334,13 @@ pub fn allocateStubEntry(self: *MachO, n_strx: u32) !u32 { } }; - self.stubs.items[index] = undefined; - try self.stubs_table.putNoClobber(self.base.allocator, n_strx, index); + self.stubs.items[index] = .{ .target = target, .atom = undefined }; + try self.stubs_table.putNoClobber(self.base.allocator, target, index); return index; } -pub fn allocateTlvPtrEntry(self: *MachO, target: Atom.Relocation.Target) !u32 { +pub fn allocateTlvPtrEntry(self: *MachO, target: SymbolWithLoc) !u32 { try self.tlv_ptr_entries.ensureUnusedCapacity(self.base.allocator, 1); const index = blk: { @@ -3794,16 +3364,14 @@ pub fn allocateTlvPtrEntry(self: *MachO, target: Atom.Relocation.Target) !u32 { pub fn allocateDeclIndexes(self: *MachO, decl_index: Module.Decl.Index) !void { if (self.llvm_object) |_| return; const decl = self.base.options.module.?.declPtr(decl_index); - if (decl.link.macho.local_sym_index != 0) return; + if (decl.link.macho.sym_index != 0) return; - decl.link.macho.local_sym_index = try self.allocateLocalSymbol(); - try self.atom_by_index_table.putNoClobber(self.base.allocator, decl.link.macho.local_sym_index, &decl.link.macho); + decl.link.macho.sym_index = try self.allocateLocalSymbol(); + try self.atom_by_index_table.putNoClobber(self.base.allocator, decl.link.macho.sym_index, &decl.link.macho); try self.decls.putNoClobber(self.base.allocator, decl_index, null); - const got_target = .{ .local = decl.link.macho.local_sym_index }; - const got_index = try self.allocateGotEntry(got_target); - const got_atom = try self.createGotAtom(got_target); - self.got_entries.items[got_index].atom = got_atom; + const got_target = .{ .sym_index = decl.link.macho.sym_index, .file = null }; + _ = try self.allocateGotEntry(got_target); } pub fn updateFunc(self: *MachO, module: *Module, func: *Module.Fn, air: Air, liveness: Liveness) !void { @@ -3877,8 +3445,9 @@ pub fn lowerUnnamedConst(self: *MachO, typed_value: TypedValue, decl_index: Modu var code_buffer = std.ArrayList(u8).init(self.base.allocator); defer code_buffer.deinit(); + const gpa = self.base.allocator; const module = self.base.options.module.?; - const gop = try self.unnamed_const_atoms.getOrPut(self.base.allocator, decl_index); + const gop = try self.unnamed_const_atoms.getOrPut(gpa, decl_index); if (!gop.found_existing) { gop.value_ptr.* = .{}; } @@ -3886,24 +3455,32 @@ pub fn lowerUnnamedConst(self: *MachO, typed_value: TypedValue, decl_index: Modu const decl = module.declPtr(decl_index); const decl_name = try decl.getFullyQualifiedName(module); - defer self.base.allocator.free(decl_name); + defer gpa.free(decl_name); const name_str_index = blk: { const index = unnamed_consts.items.len; - const name = try std.fmt.allocPrint(self.base.allocator, "__unnamed_{s}_{d}", .{ decl_name, index }); - defer self.base.allocator.free(name); - break :blk try self.makeString(name); + const name = try std.fmt.allocPrint(gpa, "__unnamed_{s}_{d}", .{ decl_name, index }); + defer gpa.free(name); + break :blk try self.strtab.insert(gpa, name); }; - const name = self.getString(name_str_index); + const name = self.strtab.get(name_str_index); log.debug("allocating symbol indexes for {s}", .{name}); const required_alignment = typed_value.ty.abiAlignment(self.base.options.target); - const local_sym_index = try self.allocateLocalSymbol(); - const atom = try self.createEmptyAtom(local_sym_index, @sizeOf(u64), math.log2(required_alignment)); + const sym_index = try self.allocateLocalSymbol(); + const atom = try MachO.createEmptyAtom( + gpa, + sym_index, + @sizeOf(u64), + math.log2(required_alignment), + ); + + try self.managed_atoms.append(gpa, atom); + try self.atom_by_index_table.putNoClobber(gpa, sym_index, atom); const res = try codegen.generateSymbol(&self.base, decl.srcLoc(), typed_value, &code_buffer, .none, .{ - .parent_atom_index = local_sym_index, + .parent_atom_index = sym_index, }); const code = switch (res) { .externally_managed => |x| x, @@ -3917,7 +3494,7 @@ pub fn lowerUnnamedConst(self: *MachO, typed_value: TypedValue, decl_index: Modu }; atom.code.clearRetainingCapacity(); - try atom.code.appendSlice(self.base.allocator, code); + try atom.code.appendSlice(gpa, code); const match = try self.getMatchingSectionAtom( atom, @@ -3933,18 +3510,18 @@ pub fn lowerUnnamedConst(self: *MachO, typed_value: TypedValue, decl_index: Modu errdefer self.freeAtom(atom, match, true); - const symbol = &self.locals.items[atom.local_sym_index]; + const symbol = &self.locals.items[atom.sym_index]; symbol.* = .{ .n_strx = name_str_index, .n_type = macho.N_SECT, - .n_sect = @intCast(u8, self.section_ordinals.getIndex(match).?) + 1, + .n_sect = self.getSectionOrdinal(match), .n_desc = 0, .n_value = addr, }; - try unnamed_consts.append(self.base.allocator, atom); + try unnamed_consts.append(gpa, atom); - return atom.local_sym_index; + return atom.sym_index; } pub fn updateDecl(self: *MachO, module: *Module, decl_index: Module.Decl.Index) !void { @@ -3986,14 +3563,14 @@ pub fn updateDecl(self: *MachO, module: *Module, decl_index: Module.Decl.Index) }, &code_buffer, .{ .dwarf = ds, }, .{ - .parent_atom_index = decl.link.macho.local_sym_index, + .parent_atom_index = decl.link.macho.sym_index, }) else try codegen.generateSymbol(&self.base, decl.srcLoc(), .{ .ty = decl.ty, .val = decl_val, }, &code_buffer, .none, .{ - .parent_atom_index = decl.link.macho.local_sym_index, + .parent_atom_index = decl.link.macho.sym_index, }); const code = blk: { @@ -4168,8 +3745,7 @@ fn getMatchingSectionAtom( .@"align" = align_log_2, })).?; }; - const seg = self.load_commands.items[match.seg].segment; - const sect = seg.sections.items[match.sect]; + const sect = self.getSection(match); log.debug(" allocating atom '{s}' in '{s},{s}' ({d},{d})", .{ name, sect.segName(), @@ -4184,8 +3760,8 @@ fn placeDecl(self: *MachO, decl_index: Module.Decl.Index, code_len: usize) !*mac const module = self.base.options.module.?; const decl = module.declPtr(decl_index); const required_alignment = decl.getAlignment(self.base.options.target); - assert(decl.link.macho.local_sym_index != 0); // Caller forgot to call allocateDeclIndexes() - const symbol = &self.locals.items[decl.link.macho.local_sym_index]; + assert(decl.link.macho.sym_index != 0); // Caller forgot to call allocateDeclIndexes() + const symbol = &self.locals.items[decl.link.macho.sym_index]; const sym_name = try decl.getFullyQualifiedName(module); defer self.base.allocator.free(sym_name); @@ -4203,7 +3779,7 @@ fn placeDecl(self: *MachO, decl_index: Module.Decl.Index, code_len: usize) !*mac const match = decl_ptr.*.?; if (decl.link.macho.size != 0) { - const capacity = decl.link.macho.capacity(self.*); + const capacity = decl.link.macho.capacity(self); const need_realloc = code_len > capacity or !mem.isAlignedGeneric(u64, symbol.n_value, required_alignment); if (need_realloc) { @@ -4217,12 +3793,12 @@ fn placeDecl(self: *MachO, decl_index: Module.Decl.Index, code_len: usize) !*mac decl.link.macho.size = code_len; decl.link.macho.dirty = true; - symbol.n_strx = try self.makeString(sym_name); + symbol.n_strx = try self.strtab.insert(self.base.allocator, sym_name); symbol.n_type = macho.N_SECT; symbol.n_sect = @intCast(u8, self.text_section_index.?) + 1; symbol.n_desc = 0; } else { - const name_str_index = try self.makeString(sym_name); + const name_str_index = try self.strtab.insert(self.base.allocator, sym_name); const addr = try self.allocateAtom(&decl.link.macho, code_len, required_alignment, match); log.debug("allocated atom for {s} at 0x{x}", .{ sym_name, addr }); @@ -4233,22 +3809,18 @@ fn placeDecl(self: *MachO, decl_index: Module.Decl.Index, code_len: usize) !*mac symbol.* = .{ .n_strx = name_str_index, .n_type = macho.N_SECT, - .n_sect = @intCast(u8, self.section_ordinals.getIndex(match).?) + 1, + .n_sect = self.getSectionOrdinal(match), .n_desc = 0, .n_value = addr, }; - const got_index = self.got_entries_table.get(.{ .local = decl.link.macho.local_sym_index }).?; - const got_atom = self.got_entries.items[got_index].atom; - const got_sym = &self.locals.items[got_atom.local_sym_index]; - const vaddr = try self.allocateAtom(got_atom, @sizeOf(u64), 8, .{ - .seg = self.data_const_segment_cmd_index.?, - .sect = self.got_section_index.?, - }); - got_sym.n_value = vaddr; - got_sym.n_sect = @intCast(u8, self.section_ordinals.getIndex(.{ - .seg = self.data_const_segment_cmd_index.?, - .sect = self.got_section_index.?, - }).? + 1); + + const got_target = SymbolWithLoc{ + .sym_index = decl.link.macho.sym_index, + .file = null, + }; + const got_index = self.got_entries_table.get(got_target).?; + const got_atom = try self.createGotAtom(got_target); + self.got_entries.items[got_index].atom = got_atom; } return symbol; @@ -4278,8 +3850,8 @@ pub fn updateDeclExports( try self.globals.ensureUnusedCapacity(self.base.allocator, exports.len); const decl = module.declPtr(decl_index); - if (decl.link.macho.local_sym_index == 0) return; - const decl_sym = &self.locals.items[decl.link.macho.local_sym_index]; + if (decl.link.macho.sym_index == 0) return; + const decl_sym = &self.locals.items[decl.link.macho.sym_index]; for (exports) |exp| { const exp_name = try std.fmt.allocPrint(self.base.allocator, "_{s}", .{exp.options.name}); @@ -4316,46 +3888,47 @@ pub fn updateDeclExports( } const is_weak = exp.options.linkage == .Internal or exp.options.linkage == .Weak; - const n_strx = try self.makeString(exp_name); - if (self.symbol_resolver.getPtr(n_strx)) |resolv| { - switch (resolv.where) { - .global => { - if (resolv.local_sym_index == decl.link.macho.local_sym_index) continue; + _ = is_weak; + const n_strx = try self.strtab.insert(self.base.allocator, exp_name); + // if (self.symbol_resolver.getPtr(n_strx)) |resolv| { + // switch (resolv.where) { + // .global => { + // if (resolv.sym_index == decl.link.macho.sym_index) continue; - const sym = &self.globals.items[resolv.where_index]; + // const sym = &self.globals.items[resolv.where_index]; - if (sym.tentative()) { - assert(self.tentatives.swapRemove(resolv.where_index)); - } else if (!is_weak and !(sym.weakDef() or sym.pext())) { - _ = try module.failed_exports.put( - module.gpa, - exp, - try Module.ErrorMsg.create( - self.base.allocator, - decl.srcLoc(), - \\LinkError: symbol '{s}' defined multiple times - \\ first definition in '{s}' - , - .{ exp_name, self.objects.items[resolv.file.?].name }, - ), - ); - continue; - } else if (is_weak) continue; // Current symbol is weak, so skip it. + // if (sym.tentative()) { + // assert(self.tentatives.swapRemove(resolv.where_index)); + // } else if (!is_weak and !(sym.weakDef() or sym.pext())) { + // _ = try module.failed_exports.put( + // module.gpa, + // exp, + // try Module.ErrorMsg.create( + // self.base.allocator, + // decl.srcLoc(), + // \\LinkError: symbol '{s}' defined multiple times + // \\ first definition in '{s}' + // , + // .{ exp_name, self.objects.items[resolv.file.?].name }, + // ), + // ); + // continue; + // } else if (is_weak) continue; // Current symbol is weak, so skip it. - // Otherwise, update the resolver and the global symbol. - sym.n_type = macho.N_SECT | macho.N_EXT; - resolv.local_sym_index = decl.link.macho.local_sym_index; - resolv.file = null; - exp.link.macho.sym_index = resolv.where_index; + // // Otherwise, update the resolver and the global symbol. + // sym.n_type = macho.N_SECT | macho.N_EXT; + // resolv.sym_index = decl.link.macho.sym_index; + // resolv.file = null; + // exp.link.macho.sym_index = resolv.where_index; - continue; - }, - .undef => { - assert(self.unresolved.swapRemove(resolv.where_index)); - _ = self.symbol_resolver.remove(n_strx); - }, - } - } + // continue; + // }, + // .undef => { + // assert(self.unresolved.swapRemove(resolv.where_index)); + // _ = self.symbol_resolver.remove(n_strx); + // }, + // } + // } var n_type: u8 = macho.N_SECT | macho.N_EXT; var n_desc: u16 = 0; @@ -4377,41 +3950,44 @@ pub fn updateDeclExports( else => unreachable, } - const global_sym_index = if (exp.link.macho.sym_index) |i| i else blk: { - const i = if (self.globals_free_list.popOrNull()) |i| i else inner: { - _ = self.globals.addOneAssumeCapacity(); - break :inner @intCast(u32, self.globals.items.len - 1); - }; - break :blk i; - }; - const sym = &self.globals.items[global_sym_index]; + const global_sym_index: u32 = 0; + // const global_sym_index = if (exp.link.macho.sym_index) |i| i else blk: { + // const i = if (self.globals_free_list.popOrNull()) |i| i else inner: { + // _ = self.globals.addOneAssumeCapacity(); + // break :inner @intCast(u32, self.globals.items.len - 1); + // }; + // break :blk i; + // }; + const sym = &self.locals.items[global_sym_index]; sym.* = .{ - .n_strx = try self.makeString(exp_name), + .n_strx = try self.strtab.insert(self.base.allocator, exp_name), .n_type = n_type, .n_sect = @intCast(u8, self.text_section_index.?) + 1, .n_desc = n_desc, .n_value = decl_sym.n_value, }; exp.link.macho.sym_index = global_sym_index; + _ = n_strx; - try self.symbol_resolver.putNoClobber(self.base.allocator, n_strx, .{ - .where = .global, - .where_index = global_sym_index, - .local_sym_index = decl.link.macho.local_sym_index, - }); + // try self.symbol_resolver.putNoClobber(self.base.allocator, n_strx, .{ + // .where = .global, + // .where_index = global_sym_index, + // .sym_index = decl.link.macho.sym_index, + // }); } } pub fn deleteExport(self: *MachO, exp: Export) void { if (self.llvm_object) |_| return; const sym_index = exp.sym_index orelse return; - self.globals_free_list.append(self.base.allocator, sym_index) catch {}; - const global = &self.globals.items[sym_index]; - log.debug("deleting export '{s}': {}", .{ self.getString(global.n_strx), global }); - assert(self.symbol_resolver.remove(global.n_strx)); - global.n_type = 0; - global.n_strx = 0; - global.n_value = 0; + _ = sym_index; + // self.globals_free_list.append(self.base.allocator, sym_index) catch {}; + // const global = &self.globals.items[sym_index]; + // log.warn("deleting export '{s}': {}", .{ self.getString(global.n_strx), global }); + // assert(self.symbol_resolver.remove(global.n_strx)); + // global.n_type = 0; + // global.n_strx = 0; + // global.n_value = 0; } fn freeUnnamedConsts(self: *MachO, decl_index: Module.Decl.Index) void { @@ -4421,11 +3997,11 @@ fn freeUnnamedConsts(self: *MachO, decl_index: Module.Decl.Index) void { .seg = self.text_segment_cmd_index.?, .sect = self.text_const_section_index.?, }, true); - self.locals_free_list.append(self.base.allocator, atom.local_sym_index) catch {}; - self.locals.items[atom.local_sym_index].n_type = 0; - _ = self.atom_by_index_table.remove(atom.local_sym_index); - log.debug(" adding local symbol index {d} to free list", .{atom.local_sym_index}); - atom.local_sym_index = 0; + self.locals_free_list.append(self.base.allocator, atom.sym_index) catch {}; + self.locals.items[atom.sym_index].n_type = 0; + _ = self.atom_by_index_table.remove(atom.sym_index); + log.debug(" adding local symbol index {d} to free list", .{atom.sym_index}); + atom.sym_index = 0; } unnamed_consts.clearAndFree(self.base.allocator); } @@ -4443,29 +4019,30 @@ pub fn freeDecl(self: *MachO, decl_index: Module.Decl.Index) void { self.freeUnnamedConsts(decl_index); } // Appending to free lists is allowed to fail because the free lists are heuristics based anyway. - if (decl.link.macho.local_sym_index != 0) { - self.locals_free_list.append(self.base.allocator, decl.link.macho.local_sym_index) catch {}; + if (decl.link.macho.sym_index != 0) { + self.locals_free_list.append(self.base.allocator, decl.link.macho.sym_index) catch {}; // Try freeing GOT atom if this decl had one - if (self.got_entries_table.get(.{ .local = decl.link.macho.local_sym_index })) |got_index| { + const got_target = SymbolWithLoc{ .sym_index = decl.link.macho.sym_index, .file = null }; + if (self.got_entries_table.get(got_target)) |got_index| { self.got_entries_free_list.append(self.base.allocator, @intCast(u32, got_index)) catch {}; - self.got_entries.items[got_index] = .{ .target = .{ .local = 0 }, .atom = undefined }; - _ = self.got_entries_table.swapRemove(.{ .local = decl.link.macho.local_sym_index }); + self.got_entries.items[got_index] = .{ .target = .{ .sym_index = 0, .file = null }, .atom = undefined }; + _ = self.got_entries_table.swapRemove(got_target); if (self.d_sym) |*d_sym| { - d_sym.swapRemoveRelocs(decl.link.macho.local_sym_index); + d_sym.swapRemoveRelocs(decl.link.macho.sym_index); } log.debug(" adding GOT index {d} to free list (target local@{d})", .{ got_index, - decl.link.macho.local_sym_index, + decl.link.macho.sym_index, }); } - self.locals.items[decl.link.macho.local_sym_index].n_type = 0; - _ = self.atom_by_index_table.remove(decl.link.macho.local_sym_index); - log.debug(" adding local symbol index {d} to free list", .{decl.link.macho.local_sym_index}); - decl.link.macho.local_sym_index = 0; + self.locals.items[decl.link.macho.sym_index].n_type = 0; + _ = self.atom_by_index_table.remove(decl.link.macho.sym_index); + log.debug(" adding local symbol index {d} to free list", .{decl.link.macho.sym_index}); + decl.link.macho.sym_index = 0; } if (self.d_sym) |*d_sym| { d_sym.dwarf.freeDecl(decl); @@ -4477,12 +4054,12 @@ pub fn getDeclVAddr(self: *MachO, decl_index: Module.Decl.Index, reloc_info: Fil const decl = mod.declPtr(decl_index); assert(self.llvm_object == null); - assert(decl.link.macho.local_sym_index != 0); + assert(decl.link.macho.sym_index != 0); const atom = self.atom_by_index_table.get(reloc_info.parent_atom_index).?; try atom.relocs.append(self.base.allocator, .{ .offset = @intCast(u32, reloc_info.offset), - .target = .{ .local = decl.link.macho.local_sym_index }, + .target = .{ .sym_index = decl.link.macho.sym_index, .file = null }, .addend = reloc_info.addend, .subtractor = null, .pcrel = false, @@ -5019,8 +4596,6 @@ fn populateMissingMetadata(self: *MachO) !void { }); self.load_commands_dirty = true; } - - self.cold_start = true; } fn calcMinHeaderpad(self: *MachO) u64 { @@ -5121,7 +4696,7 @@ fn allocateSegment(self: *MachO, maybe_index: ?u16, indices: []const ?u16, init_ // Allocate the sections according to their alignment at the beginning of the segment. var start = init_size; - for (seg.sections.items) |*sect, sect_id| { + for (seg.sections.items) |*sect| { const is_zerofill = sect.flags == macho.S_ZEROFILL or sect.flags == macho.S_THREAD_LOCAL_ZEROFILL; const use_llvm = build_options.have_llvm and self.base.options.use_llvm; const use_stage1 = build_options.is_stage1 and self.base.options.use_stage1; @@ -5129,32 +4704,12 @@ fn allocateSegment(self: *MachO, maybe_index: ?u16, indices: []const ?u16, init_ const start_aligned = mem.alignForwardGeneric(u64, start, alignment); // TODO handle zerofill sections in stage2 - sect.offset = if (is_zerofill and (use_stage1 or use_llvm)) 0 else @intCast(u32, seg.inner.fileoff + start_aligned); + sect.offset = if (is_zerofill and (use_stage1 or use_llvm)) + 0 + else + @intCast(u32, seg.inner.fileoff + start_aligned); sect.addr = seg.inner.vmaddr + start_aligned; - // Recalculate section size given the allocated start address - sect.size = if (self.atoms.get(.{ - .seg = index, - .sect = @intCast(u16, sect_id), - })) |last_atom| blk: { - var atom = last_atom; - while (atom.prev) |prev| { - atom = prev; - } - - var base_addr = sect.addr; - - while (true) { - const atom_alignment = try math.powi(u32, 2, atom.alignment); - base_addr = mem.alignForwardGeneric(u64, base_addr, atom_alignment) + atom.size; - if (atom.next) |next| { - atom = next; - } else break; - } - - break :blk base_addr - sect.addr; - } else 0; - start = start_aligned + sect.size; if (!(is_zerofill and (use_stage1 or use_llvm))) { @@ -5410,12 +4965,30 @@ fn getSectionMaxAlignment(self: *MachO, segment_id: u16, start_sect_id: u16) !u3 return max_alignment; } -fn allocateAtom(self: *MachO, atom: *Atom, new_atom_size: u64, alignment: u64, match: MatchingSection) !u64 { +fn allocateAtomCommon(self: *MachO, atom: *Atom, match: MatchingSection) !void { + const sym = atom.getSymbolPtr(self); + if (self.needs_prealloc) { + const size = atom.size; + const alignment = try math.powi(u32, 2, atom.alignment); + const vaddr = try self.allocateAtom(atom, size, alignment, match); + const sym_name = atom.getName(self); + log.debug("allocated {s} atom at 0x{x}", .{ sym_name, vaddr }); + sym.n_value = vaddr; + } else try self.addAtomToSection(atom, match); + sym.n_sect = self.getSectionOrdinal(match); +} + +fn allocateAtom( + self: *MachO, + atom: *Atom, + new_atom_size: u64, + alignment: u64, + match: MatchingSection, +) !u64 { const tracy = trace(@src()); defer tracy.end(); - const seg = &self.load_commands.items[match.seg].segment; - const sect = &seg.sections.items[match.sect]; + const sect = self.getSectionPtr(match); var free_list = self.atom_free_lists.get(match).?; const needs_padding = match.seg == self.text_segment_cmd_index.? and match.sect == self.text_section_index.?; const new_atom_ideal_capacity = if (needs_padding) padToIdeal(new_atom_size) else new_atom_size; @@ -5436,8 +5009,8 @@ fn allocateAtom(self: *MachO, atom: *Atom, new_atom_size: u64, alignment: u64, m const big_atom = free_list.items[i]; // We now have a pointer to a live atom that has too much capacity. // Is it enough that we could fit this new atom? - const sym = self.locals.items[big_atom.local_sym_index]; - const capacity = big_atom.capacity(self.*); + const sym = self.locals.items[big_atom.sym_index]; + const capacity = big_atom.capacity(self); const ideal_capacity = if (needs_padding) padToIdeal(capacity) else capacity; const ideal_capacity_end_vaddr = math.add(u64, sym.n_value, ideal_capacity) catch ideal_capacity; const capacity_end_vaddr = sym.n_value + capacity; @@ -5447,7 +5020,7 @@ fn allocateAtom(self: *MachO, atom: *Atom, new_atom_size: u64, alignment: u64, m // Additional bookkeeping here to notice if this free list node // should be deleted because the atom that it points to has grown to take up // more of the extra capacity. - if (!big_atom.freeListEligible(self.*)) { + if (!big_atom.freeListEligible(self)) { _ = free_list.swapRemove(i); } else { i += 1; @@ -5467,7 +5040,7 @@ fn allocateAtom(self: *MachO, atom: *Atom, new_atom_size: u64, alignment: u64, m } break :blk new_start_vaddr; } else if (self.atoms.get(match)) |last| { - const last_symbol = self.locals.items[last.local_sym_index]; + const last_symbol = self.locals.items[last.sym_index]; const ideal_capacity = if (needs_padding) padToIdeal(last.size) else last.size; const ideal_capacity_end_vaddr = last_symbol.n_value + ideal_capacity; const new_start_vaddr = mem.alignForwardGeneric(u64, ideal_capacity_end_vaddr, alignment); @@ -5516,7 +5089,7 @@ fn allocateAtom(self: *MachO, atom: *Atom, new_atom_size: u64, alignment: u64, m return vaddr; } -fn addAtomToSection(self: *MachO, atom: *Atom, match: MatchingSection) !void { +pub fn addAtomToSection(self: *MachO, atom: *Atom, match: MatchingSection) !void { if (self.atoms.getPtr(match)) |last| { last.*.next = atom; atom.prev = last.*; @@ -5524,34 +5097,38 @@ fn addAtomToSection(self: *MachO, atom: *Atom, match: MatchingSection) !void { } else { try self.atoms.putNoClobber(self.base.allocator, match, atom); } - const seg = &self.load_commands.items[match.seg].segment; - const sect = &seg.sections.items[match.sect]; - sect.size += atom.size; + const sect = self.getSectionPtr(match); + const atom_alignment = try math.powi(u32, 2, atom.alignment); + const aligned_end_addr = mem.alignForwardGeneric(u64, sect.size, atom_alignment); + const padding = aligned_end_addr - sect.size; + sect.size += padding + atom.size; + sect.@"align" = @maximum(sect.@"align", atom.alignment); } pub fn getGlobalSymbol(self: *MachO, name: []const u8) !u32 { - const sym_name = try std.fmt.allocPrint(self.base.allocator, "_{s}", .{name}); - defer self.base.allocator.free(sym_name); - const n_strx = try self.makeString(sym_name); + const gpa = self.base.allocator; + const sym_name = try std.fmt.allocPrint(gpa, "_{s}", .{name}); + defer gpa.free(sym_name); - if (!self.symbol_resolver.contains(n_strx)) { - log.debug("adding new extern function '{s}'", .{sym_name}); - const sym_index = @intCast(u32, self.undefs.items.len); - try self.undefs.append(self.base.allocator, .{ - .n_strx = n_strx, - .n_type = macho.N_UNDF, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }); - try self.symbol_resolver.putNoClobber(self.base.allocator, n_strx, .{ - .where = .undef, - .where_index = sym_index, - }); - try self.unresolved.putNoClobber(self.base.allocator, sym_index, .stub); + if (self.globals.getIndex(sym_name)) |global_index| { + return @intCast(u32, global_index); } - return n_strx; + const n_strx = try self.strtab.insert(gpa, sym_name); + const sym_index = @intCast(u32, self.locals.items.len); + try self.locals.append(gpa, .{ + .n_strx = n_strx, + .n_type = macho.N_UNDF, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }); + try self.globals.putNoClobber(gpa, sym_name, .{ + .sym_index = sym_index, + .file = null, + }); + const global_index = self.globals.getIndex(sym_name).?; + return @intCast(u32, global_index); } fn getSegmentAllocBase(self: MachO, indices: []const ?u16) struct { vmaddr: u64, fileoff: u64 } { @@ -5579,15 +5156,44 @@ fn pruneAndSortSectionsInSegment(self: *MachO, maybe_seg_id: *?u16, indices: []* for (indices) |maybe_index| { const old_idx = maybe_index.* orelse continue; - const sect = sections[old_idx]; + const sect = §ions[old_idx]; + + // Recalculate section alignment and size if required. + const match = MatchingSection{ + .seg = seg_id, + .sect = old_idx, + }; + if (self.gc_sections.get(match)) |_| blk: { + sect.@"align" = 0; + sect.size = 0; + + var atom = self.atoms.get(match) orelse break :blk; + + while (atom.prev) |prev| { + atom = prev; + } + + while (true) { + const atom_alignment = try math.powi(u32, 2, atom.alignment); + const aligned_end_addr = mem.alignForwardGeneric(u64, sect.size, atom_alignment); + const padding = aligned_end_addr - sect.size; + sect.size += padding + atom.size; + sect.@"align" = @maximum(sect.@"align", atom.alignment); + + if (atom.next) |next| { + atom = next; + } else break; + } + } + if (sect.size == 0) { - log.warn("pruning section {s},{s}", .{ sect.segName(), sect.sectName() }); + log.debug("pruning section {s},{s}", .{ sect.segName(), sect.sectName() }); maybe_index.* = null; seg.inner.cmdsize -= @sizeOf(macho.section_64); seg.inner.nsects -= 1; } else { maybe_index.* = @intCast(u16, seg.sections.items.len); - seg.sections.appendAssumeCapacity(sect); + seg.sections.appendAssumeCapacity(sect.*); } try mapping.putNoClobber(old_idx, maybe_index.*); } @@ -5614,7 +5220,7 @@ fn pruneAndSortSectionsInSegment(self: *MachO, maybe_seg_id: *?u16, indices: []* if (seg.inner.nsects == 0 and !mem.eql(u8, "__TEXT", seg.inner.segName())) { // Segment has now become empty, so mark it as such - log.warn("marking segment {s} as dead", .{seg.inner.segName()}); + log.debug("marking segment {s} as dead", .{seg.inner.segName()}); seg.inner.cmd = @intToEnum(macho.LC, 0); maybe_seg_id.* = null; } @@ -5697,36 +5303,22 @@ fn pruneAndSortSections(self: *MachO) !void { } fn gcAtoms(self: *MachO) !void { - const dead_strip = self.base.options.gc_sections orelse false; + const dead_strip = self.base.options.gc_sections orelse return; if (!dead_strip) return; + const gpa = self.base.allocator; + // Add all exports as GC roots - for (self.globals.items) |sym| { - if (sym.n_type == 0) continue; - const resolv = self.symbol_resolver.get(sym.n_strx).?; - assert(resolv.where == .global); - const gc_root = self.atom_by_index_table.get(resolv.local_sym_index) orelse { - log.warn("skipping {s}", .{self.getString(sym.n_strx)}); + for (self.globals.values()) |global| { + const sym = self.getSymbol(global); + if (!sym.sect()) continue; + const gc_root = self.getAtomForSymbol(global) orelse { + log.debug("skipping {s}", .{self.getSymbolName(global)}); continue; }; - _ = try self.gc_roots.getOrPut(self.base.allocator, gc_root); + _ = try self.gc_roots.getOrPut(gpa, gc_root); } - // if (self.tlv_ptrs_section_index) |sect| { - // var atom = self.atoms.get(.{ - // .seg = self.data_segment_cmd_index.?, - // .sect = sect, - // }).?; - - // while (true) { - // _ = try self.gc_roots.getOrPut(self.base.allocator, atom); - - // if (atom.prev) |prev| { - // atom = prev; - // } else break; - // } - // } - // Add any atom targeting an import as GC root var atoms_it = self.atoms.iterator(); while (atoms_it.next()) |entry| { @@ -5734,19 +5326,13 @@ fn gcAtoms(self: *MachO) !void { while (true) { for (atom.relocs.items) |rel| { - if ((try Atom.getTargetAtom(rel, self)) == null) switch (rel.target) { - .local => {}, - .global => |n_strx| { - const resolv = self.symbol_resolver.get(n_strx).?; - switch (resolv.where) { - .global => {}, - .undef => { - _ = try self.gc_roots.getOrPut(self.base.allocator, atom); - break; - }, - } - }, - }; + if ((try rel.getTargetAtom(self)) == null) { + const target_sym = self.getSymbol(rel.target); + if (target_sym.undf()) { + _ = try self.gc_roots.getOrPut(gpa, atom); + break; + } + } } if (atom.prev) |prev| { @@ -5755,15 +5341,15 @@ fn gcAtoms(self: *MachO) !void { } } - var stack = std.ArrayList(*Atom).init(self.base.allocator); + var stack = std.ArrayList(*Atom).init(gpa); defer stack.deinit(); try stack.ensureUnusedCapacity(self.gc_roots.count()); - var retained = std.AutoHashMap(*Atom, void).init(self.base.allocator); + var retained = std.AutoHashMap(*Atom, void).init(gpa); defer retained.deinit(); try retained.ensureUnusedCapacity(self.gc_roots.count()); - log.warn("GC roots:", .{}); + log.debug("GC roots:", .{}); var gc_roots_it = self.gc_roots.keyIterator(); while (gc_roots_it.next()) |gc_root| { self.logAtom(gc_root.*); @@ -5772,15 +5358,15 @@ fn gcAtoms(self: *MachO) !void { retained.putAssumeCapacityNoClobber(gc_root.*, {}); } - log.warn("walking tree...", .{}); + log.debug("walking tree...", .{}); while (stack.popOrNull()) |source_atom| { for (source_atom.relocs.items) |rel| { - if (try Atom.getTargetAtom(rel, self)) |target_atom| { + if (try rel.getTargetAtom(self)) |target_atom| { const gop = try retained.getOrPut(target_atom); if (!gop.found_existing) { - log.warn(" RETAINED ATOM(%{d}) -> ATOM(%{d})", .{ - source_atom.local_sym_index, - target_atom.local_sym_index, + log.debug(" RETAINED ATOM(%{d}) -> ATOM(%{d})", .{ + source_atom.sym_index, + target_atom.sym_index, }); try stack.append(target_atom); } @@ -5808,58 +5394,38 @@ fn gcAtoms(self: *MachO) !void { } } - const seg = &self.load_commands.items[match.seg].segment; - const sect = &seg.sections.items[match.sect]; + const sect = self.getSectionPtr(match); var atom = entry.value_ptr.*; - log.warn("GCing atoms in {s},{s}", .{ sect.segName(), sect.sectName() }); + log.debug("GCing atoms in {s},{s}", .{ sect.segName(), sect.sectName() }); while (true) { const orig_prev = atom.prev; if (!retained.contains(atom)) { // Dead atom; remove. - log.warn(" DEAD ATOM(%{d})", .{atom.local_sym_index}); + log.debug(" DEAD ATOM(%{d})", .{atom.sym_index}); - const sym = &self.locals.items[atom.local_sym_index]; + const sym = atom.getSymbolPtr(self); sym.n_desc = N_DESC_GCED; - if (self.symbol_resolver.getPtr(sym.n_strx)) |resolv| { - if (resolv.local_sym_index == atom.local_sym_index) { - const global = &self.globals.items[resolv.where_index]; - global.n_desc = N_DESC_GCED; - } - } - - for (self.got_entries.items) |got_entry| { - if (got_entry.atom == atom) { - _ = self.got_entries_table.swapRemove(got_entry.target); - break; - } - } - - for (self.stubs.items) |stub, i| { - if (stub == atom) { - _ = self.stubs_table.swapRemove(@intCast(u32, i)); - break; - } - } + // TODO add full bookkeeping here + const global = SymbolWithLoc{ .sym_index = atom.sym_index, .file = atom.file }; + _ = self.got_entries_table.swapRemove(global); + _ = self.stubs_table.swapRemove(global); + _ = self.tlv_ptr_entries_table.swapRemove(global); for (atom.contained.items) |sym_off| { - const inner = &self.locals.items[sym_off.local_sym_index]; + const inner = self.getSymbolPtr(.{ + .sym_index = sym_off.sym_index, + .file = atom.file, + }); inner.n_desc = N_DESC_GCED; - - if (self.symbol_resolver.getPtr(inner.n_strx)) |resolv| { - if (resolv.local_sym_index == atom.local_sym_index) { - const global = &self.globals.items[resolv.where_index]; - global.n_desc = N_DESC_GCED; - } - } } - - log.warn(" BEFORE size = {x}", .{sect.size}); + // If we want to enable GC for incremental codepath, we need to take into + // account any padding that might have been left here. sect.size -= atom.size; - log.warn(" AFTER size = {x}", .{sect.size}); + if (atom.prev) |prev| { prev.next = atom.next; } @@ -5870,6 +5436,8 @@ fn gcAtoms(self: *MachO) !void { // The section will be GCed in the next step. entry.value_ptr.* = if (atom.prev) |prev| prev else undefined; } + + _ = try self.gc_sections.getOrPut(gpa, match); } if (orig_prev) |prev| { @@ -5885,7 +5453,11 @@ fn updateSectionOrdinals(self: *MachO) !void { const tracy = trace(@src()); defer tracy.end(); - var ordinal_remap = std.AutoHashMap(u8, u8).init(self.base.allocator); + log.debug("updating section ordinals", .{}); + + const gpa = self.base.allocator; + + var ordinal_remap = std.AutoHashMap(u8, u8).init(gpa); defer ordinal_remap.deinit(); var ordinals: std.AutoArrayHashMapUnmanaged(MatchingSection, void) = .{}; @@ -5897,27 +5469,38 @@ fn updateSectionOrdinals(self: *MachO) !void { }) |maybe_index| { const index = maybe_index orelse continue; const seg = self.load_commands.items[index].segment; - for (seg.sections.items) |_, sect_id| { + for (seg.sections.items) |sect, sect_id| { const match = MatchingSection{ .seg = @intCast(u16, index), .sect = @intCast(u16, sect_id), }; - const old_ordinal = @intCast(u8, self.section_ordinals.getIndex(match).? + 1); + const old_ordinal = self.getSectionOrdinal(match); new_ordinal += 1; + log.debug("'{s},{s}': sect({d}, '_,_') => sect({d}, '_,_')", .{ + sect.segName(), + sect.sectName(), + old_ordinal, + new_ordinal, + }); try ordinal_remap.putNoClobber(old_ordinal, new_ordinal); - try ordinals.putNoClobber(self.base.allocator, match, {}); + try ordinals.putNoClobber(gpa, match, {}); } } for (self.locals.items) |*sym| { + if (sym.undf()) continue; if (sym.n_sect == 0) continue; sym.n_sect = ordinal_remap.get(sym.n_sect).?; } - for (self.globals.items) |*sym| { - sym.n_sect = ordinal_remap.get(sym.n_sect).?; + for (self.objects.items) |*object| { + for (object.symtab.items) |*sym| { + if (sym.undf()) continue; + if (sym.n_sect == 0) continue; + sym.n_sect = ordinal_remap.get(sym.n_sect).?; + } } - self.section_ordinals.deinit(self.base.allocator); + self.section_ordinals.deinit(gpa); self.section_ordinals = ordinals; } @@ -5925,11 +5508,13 @@ fn writeDyldInfoData(self: *MachO) !void { const tracy = trace(@src()); defer tracy.end(); - var rebase_pointers = std.ArrayList(bind.Pointer).init(self.base.allocator); + const gpa = self.base.allocator; + + var rebase_pointers = std.ArrayList(bind.Pointer).init(gpa); defer rebase_pointers.deinit(); - var bind_pointers = std.ArrayList(bind.Pointer).init(self.base.allocator); + var bind_pointers = std.ArrayList(bind.Pointer).init(gpa); defer bind_pointers.deinit(); - var lazy_bind_pointers = std.ArrayList(bind.Pointer).init(self.base.allocator); + var lazy_bind_pointers = std.ArrayList(bind.Pointer).init(gpa); defer lazy_bind_pointers.deinit(); { @@ -5942,13 +5527,13 @@ fn writeDyldInfoData(self: *MachO) !void { if (match.seg == seg) continue; // __TEXT is non-writable } - const seg = self.load_commands.items[match.seg].segment; - const sect = seg.sections.items[match.sect]; - log.warn("dyld info for {s},{s}", .{ sect.segName(), sect.sectName() }); + const seg = self.getSegment(match); + const sect = self.getSection(match); + log.debug("dyld info for {s},{s}", .{ sect.segName(), sect.sectName() }); while (true) { - log.warn(" ATOM %{d}", .{atom.local_sym_index}); - const sym = self.locals.items[atom.local_sym_index]; + log.debug(" ATOM %{d}", .{atom.sym_index}); + const sym = atom.getSymbol(self); const base_offset = sym.n_value - seg.inner.vmaddr; for (atom.rebases.items) |offset| { @@ -5959,57 +5544,35 @@ fn writeDyldInfoData(self: *MachO) !void { } for (atom.bindings.items) |binding| { - const resolv = self.symbol_resolver.get(binding.n_strx).?; - switch (resolv.where) { - .global => { - // Turn into a rebase. - try rebase_pointers.append(.{ - .offset = base_offset + binding.offset, - .segment_id = match.seg, - }); - }, - .undef => { - const bind_sym = self.undefs.items[resolv.where_index]; - var flags: u4 = 0; - if (bind_sym.weakRef()) { - flags |= @truncate(u4, macho.BIND_SYMBOL_FLAGS_WEAK_IMPORT); - } - try bind_pointers.append(.{ - .offset = binding.offset + base_offset, - .segment_id = match.seg, - .dylib_ordinal = @divTrunc(@bitCast(i16, bind_sym.n_desc), macho.N_SYMBOL_RESOLVER), - .name = self.getString(bind_sym.n_strx), - .bind_flags = flags, - }); - }, + const global = self.globals.values()[binding.global_index]; + const bind_sym = self.getSymbol(global); + var flags: u4 = 0; + if (bind_sym.weakRef()) { + flags |= @truncate(u4, macho.BIND_SYMBOL_FLAGS_WEAK_IMPORT); } + try bind_pointers.append(.{ + .offset = binding.offset + base_offset, + .segment_id = match.seg, + .dylib_ordinal = @divTrunc(@bitCast(i16, bind_sym.n_desc), macho.N_SYMBOL_RESOLVER), + .name = self.getSymbolName(global), + .bind_flags = flags, + }); } for (atom.lazy_bindings.items) |binding| { - const resolv = self.symbol_resolver.get(binding.n_strx).?; - switch (resolv.where) { - .global => { - // Turn into a rebase. - try rebase_pointers.append(.{ - .offset = base_offset + binding.offset, - .segment_id = match.seg, - }); - }, - .undef => { - const bind_sym = self.undefs.items[resolv.where_index]; - var flags: u4 = 0; - if (bind_sym.weakRef()) { - flags |= @truncate(u4, macho.BIND_SYMBOL_FLAGS_WEAK_IMPORT); - } - try lazy_bind_pointers.append(.{ - .offset = binding.offset + base_offset, - .segment_id = match.seg, - .dylib_ordinal = @divTrunc(@bitCast(i16, bind_sym.n_desc), macho.N_SYMBOL_RESOLVER), - .name = self.getString(bind_sym.n_strx), - .bind_flags = flags, - }); - }, + const global = self.globals.values()[binding.global_index]; + const bind_sym = self.getSymbol(global); + var flags: u4 = 0; + if (bind_sym.weakRef()) { + flags |= @truncate(u4, macho.BIND_SYMBOL_FLAGS_WEAK_IMPORT); } + try lazy_bind_pointers.append(.{ + .offset = binding.offset + base_offset, + .segment_id = match.seg, + .dylib_ordinal = @divTrunc(@bitCast(i16, bind_sym.n_desc), macho.N_SYMBOL_RESOLVER), + .name = self.getSymbolName(global), + .bind_flags = flags, + }); } if (atom.prev) |prev| { @@ -6020,7 +5583,7 @@ fn writeDyldInfoData(self: *MachO) !void { } var trie: Trie = .{}; - defer trie.deinit(self.base.allocator); + defer trie.deinit(gpa); { // TODO handle macho.EXPORT_SYMBOL_FLAGS_REEXPORT and macho.EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER. @@ -6029,19 +5592,22 @@ fn writeDyldInfoData(self: *MachO) !void { const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].segment; const base_address = text_segment.inner.vmaddr; - for (self.globals.items) |sym| { - if (sym.n_type == 0) continue; - const sym_name = self.getString(sym.n_strx); + for (self.globals.values()) |global| { + const sym = self.getSymbol(global); + if (sym.undf()) continue; + if (!sym.ext()) continue; + if (sym.n_desc == N_DESC_GCED) continue; + const sym_name = self.getSymbolName(global); log.debug(" (putting '{s}' defined at 0x{x})", .{ sym_name, sym.n_value }); - try trie.put(self.base.allocator, .{ + try trie.put(gpa, .{ .name = sym_name, .vmaddr_offset = sym.n_value - base_address, .export_flags = macho.EXPORT_SYMBOL_FLAGS_KIND_REGULAR, }); } - try trie.finalize(self.base.allocator); + try trie.finalize(gpa); } const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].segment; @@ -6086,8 +5652,8 @@ fn writeDyldInfoData(self: *MachO) !void { seg.inner.filesize = dyld_info.export_off + dyld_info.export_size - seg.inner.fileoff; const needed_size = dyld_info.export_off + dyld_info.export_size - dyld_info.rebase_off; - var buffer = try self.base.allocator.alloc(u8, needed_size); - defer self.base.allocator.free(buffer); + var buffer = try gpa.alloc(u8, needed_size); + defer gpa.free(buffer); mem.set(u8, buffer, 0); var stream = std.io.fixedBufferStream(buffer); @@ -6114,10 +5680,12 @@ fn writeDyldInfoData(self: *MachO) !void { try self.populateLazyBindOffsetsInStubHelper( buffer[dyld_info.lazy_bind_off - base_off ..][0..dyld_info.lazy_bind_size], ); + self.load_commands_dirty = true; } fn populateLazyBindOffsetsInStubHelper(self: *MachO, buffer: []const u8) !void { + const gpa = self.base.allocator; const text_segment_cmd_index = self.text_segment_cmd_index orelse return; const stub_helper_section_index = self.stub_helper_section_index orelse return; const last_atom = self.atoms.get(.{ @@ -6127,7 +5695,7 @@ fn populateLazyBindOffsetsInStubHelper(self: *MachO, buffer: []const u8) !void { if (self.stub_helper_preamble_atom == null) return; if (last_atom == self.stub_helper_preamble_atom.?) return; - var table = std.AutoHashMap(i64, *Atom).init(self.base.allocator); + var table = std.AutoHashMap(i64, *Atom).init(gpa); defer table.deinit(); { @@ -6143,7 +5711,7 @@ fn populateLazyBindOffsetsInStubHelper(self: *MachO, buffer: []const u8) !void { while (true) { const laptr_off = blk: { - const sym = self.locals.items[laptr_atom.local_sym_index]; + const sym = laptr_atom.getSymbol(self); break :blk @intCast(i64, sym.n_value - base_addr); }; try table.putNoClobber(laptr_off, stub_atom); @@ -6156,7 +5724,7 @@ fn populateLazyBindOffsetsInStubHelper(self: *MachO, buffer: []const u8) !void { var stream = std.io.fixedBufferStream(buffer); var reader = stream.reader(); - var offsets = std.ArrayList(struct { sym_offset: i64, offset: u32 }).init(self.base.allocator); + var offsets = std.ArrayList(struct { sym_offset: i64, offset: u32 }).init(gpa); try offsets.append(.{ .sym_offset = undefined, .offset = 0 }); defer offsets.deinit(); var valid_block = false; @@ -6199,10 +5767,10 @@ fn populateLazyBindOffsetsInStubHelper(self: *MachO, buffer: []const u8) !void { } } - const sect = blk: { - const seg = self.load_commands.items[text_segment_cmd_index].segment; - break :blk seg.sections.items[stub_helper_section_index]; - }; + const sect = self.getSection(.{ + .seg = text_segment_cmd_index, + .sect = stub_helper_section_index, + }); const stub_offset: u4 = switch (self.base.options.target.cpu.arch) { .x86_64 => 1, .aarch64 => 2 * @sizeOf(u32), @@ -6213,79 +5781,63 @@ fn populateLazyBindOffsetsInStubHelper(self: *MachO, buffer: []const u8) !void { while (offsets.popOrNull()) |bind_offset| { const atom = table.get(bind_offset.sym_offset).?; - const sym = self.locals.items[atom.local_sym_index]; + const sym = atom.getSymbol(self); const file_offset = sect.offset + sym.n_value - sect.addr + stub_offset; mem.writeIntLittle(u32, &buf, bind_offset.offset); log.debug("writing lazy bind offset in stub helper of 0x{x} for symbol {s} at offset 0x{x}", .{ bind_offset.offset, - self.getString(sym.n_strx), + atom.getName(self), file_offset, }); try self.base.file.?.pwriteAll(&buf, file_offset); } } +const asc_u64 = std.sort.asc(u64); + fn writeFunctionStarts(self: *MachO) !void { - var atom = self.atoms.get(.{ - .seg = self.text_segment_cmd_index orelse return, - .sect = self.text_section_index orelse return, - }) orelse return; + const text_seg_index = self.text_segment_cmd_index orelse return; + const text_sect_index = self.text_section_index orelse return; + const text_seg = self.load_commands.items[text_seg_index].segment; const tracy = trace(@src()); defer tracy.end(); - while (atom.prev) |prev| { - atom = prev; + const gpa = self.base.allocator; + + // We need to sort by address first + var addresses = std.ArrayList(u64).init(gpa); + defer addresses.deinit(); + try addresses.ensureTotalCapacityPrecise(self.globals.count()); + + for (self.globals.values()) |global| { + const sym = self.getSymbol(global); + if (sym.undf()) continue; + if (sym.n_desc == N_DESC_GCED) continue; + const match = self.getMatchingSectionFromOrdinal(sym.n_sect); + if (match.seg != text_seg_index or match.sect != text_sect_index) continue; + + addresses.appendAssumeCapacity(sym.n_value); } - var offsets = std.ArrayList(u32).init(self.base.allocator); + std.sort.sort(u64, addresses.items, {}, asc_u64); + + var offsets = std.ArrayList(u32).init(gpa); defer offsets.deinit(); + try offsets.ensureTotalCapacityPrecise(addresses.items.len); - const text_seg = self.load_commands.items[self.text_segment_cmd_index.?].segment; var last_off: u32 = 0; + for (addresses.items) |addr| { + const offset = @intCast(u32, addr - text_seg.inner.vmaddr); + const diff = offset - last_off; - while (true) { - const atom_sym = self.locals.items[atom.local_sym_index]; + if (diff == 0) continue; - if (atom_sym.n_strx != 0) blk: { - if (self.symbol_resolver.get(atom_sym.n_strx)) |resolv| { - assert(resolv.where == .global); - if (resolv.local_sym_index != atom.local_sym_index) break :blk; - } - - const offset = @intCast(u32, atom_sym.n_value - text_seg.inner.vmaddr); - const diff = offset - last_off; - - if (diff == 0) break :blk; - - try offsets.append(diff); - last_off = offset; - } - - for (atom.contained.items) |cont| { - const cont_sym = self.locals.items[cont.local_sym_index]; - - if (cont_sym.n_strx == 0) continue; - if (self.symbol_resolver.get(cont_sym.n_strx)) |resolv| { - assert(resolv.where == .global); - if (resolv.local_sym_index != cont.local_sym_index) continue; - } - - const offset = @intCast(u32, cont_sym.n_value - text_seg.inner.vmaddr); - const diff = offset - last_off; - - if (diff == 0) continue; - - try offsets.append(diff); - last_off = offset; - } - - if (atom.next) |next| { - atom = next; - } else break; + offsets.appendAssumeCapacity(diff); + last_off = offset; } - var buffer = std.ArrayList(u8).init(self.base.allocator); + var buffer = std.ArrayList(u8).init(gpa); defer buffer.deinit(); const max_size = @intCast(usize, offsets.items.len * @sizeOf(u64)); @@ -6331,12 +5883,14 @@ fn writeDices(self: *MachO) !void { atom = prev; } - const text_seg = self.load_commands.items[self.text_segment_cmd_index.?].segment; - const text_sect = text_seg.sections.items[self.text_section_index.?]; + const text_sect = self.getSection(.{ + .seg = self.text_segment_cmd_index.?, + .sect = self.text_section_index.?, + }); while (true) { if (atom.dices.items.len > 0) { - const sym = self.locals.items[atom.local_sym_index]; + const sym = atom.getSymbol(self); const base_off = math.cast(u32, sym.n_value - text_sect.addr + text_sect.offset) orelse return error.Overflow; try buf.ensureUnusedCapacity(atom.dices.items.len * @sizeOf(macho.data_in_code_entry)); @@ -6377,113 +5931,139 @@ fn writeSymbolTable(self: *MachO) !void { const tracy = trace(@src()); defer tracy.end(); + const gpa = self.base.allocator; const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].segment; const symtab = &self.load_commands.items[self.symtab_cmd_index.?].symtab; const symoff = mem.alignForwardGeneric(u64, seg.inner.fileoff + seg.inner.filesize, @alignOf(macho.nlist_64)); symtab.symoff = @intCast(u32, symoff); - var locals = std.ArrayList(macho.nlist_64).init(self.base.allocator); + var locals = std.ArrayList(macho.nlist_64).init(gpa); defer locals.deinit(); - for (self.locals.items) |sym| { - if (sym.n_strx == 0) continue; - if (sym.n_desc == N_DESC_GCED) continue; - if (self.symbol_resolver.get(sym.n_strx)) |_| continue; + for (self.locals.items) |sym, sym_id| { + if (sym.n_strx == 0) continue; // no name, skip + if (sym.n_desc == N_DESC_GCED) continue; // GCed, skip + const sym_loc = SymbolWithLoc{ .sym_index = @intCast(u32, sym_id), .file = null }; + if (self.symbolIsTemp(sym_loc)) continue; // local temp symbol, skip + if (self.globals.contains(self.getSymbolName(sym_loc))) continue; // global symbol is either an export or import, skip try locals.append(sym); } - var globals = std.ArrayList(macho.nlist_64).init(self.base.allocator); - defer globals.deinit(); + for (self.objects.items) |object, object_id| { + if (self.has_stabs) { + if (object.debug_info) |_| { + // Open scope + try locals.ensureUnusedCapacity(3); + locals.appendAssumeCapacity(.{ + .n_strx = try self.strtab.insert(gpa, object.tu_comp_dir.?), + .n_type = macho.N_SO, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }); + locals.appendAssumeCapacity(.{ + .n_strx = try self.strtab.insert(gpa, object.tu_name.?), + .n_type = macho.N_SO, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }); + locals.appendAssumeCapacity(.{ + .n_strx = try self.strtab.insert(gpa, object.name), + .n_type = macho.N_OSO, + .n_sect = 0, + .n_desc = 1, + .n_value = object.mtime orelse 0, + }); - for (self.globals.items) |sym| { - if (sym.n_desc == N_DESC_GCED) continue; - try globals.append(sym); - } + for (object.managed_atoms.items) |atom| { + for (atom.contained.items) |sym_at_off| { + const stab = sym_at_off.stab orelse continue; + const sym_loc = SymbolWithLoc{ + .sym_index = sym_at_off.sym_index, + .file = atom.file, + }; + const sym = self.getSymbol(sym_loc); + if (sym.n_strx == 0) continue; + if (sym.n_desc == N_DESC_GCED) continue; + if (self.symbolIsTemp(sym_loc)) continue; - // TODO How do we handle null global symbols in incremental context? - var undefs = std.ArrayList(macho.nlist_64).init(self.base.allocator); - defer undefs.deinit(); - var undefs_table = std.AutoHashMap(u32, u32).init(self.base.allocator); - defer undefs_table.deinit(); - try undefs.ensureTotalCapacity(self.undefs.items.len); - try undefs_table.ensureTotalCapacity(@intCast(u32, self.undefs.items.len)); + const nlists = try stab.asNlists(.{ + .sym_index = sym_at_off.sym_index, + .file = atom.file, + }, self); + defer gpa.free(nlists); - for (self.undefs.items) |sym, i| { - if (sym.n_strx == 0) continue; - const new_index = @intCast(u32, undefs.items.len); - undefs.appendAssumeCapacity(sym); - undefs_table.putAssumeCapacityNoClobber(@intCast(u32, i), new_index); - } - - if (self.has_stabs) { - for (self.objects.items) |object| { - if (object.debug_info == null) continue; - - // Open scope - try locals.ensureUnusedCapacity(3); - locals.appendAssumeCapacity(.{ - .n_strx = try self.makeString(object.tu_comp_dir.?), - .n_type = macho.N_SO, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }); - locals.appendAssumeCapacity(.{ - .n_strx = try self.makeString(object.tu_name.?), - .n_type = macho.N_SO, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }); - locals.appendAssumeCapacity(.{ - .n_strx = try self.makeString(object.name), - .n_type = macho.N_OSO, - .n_sect = 0, - .n_desc = 1, - .n_value = object.mtime orelse 0, - }); - - for (object.contained_atoms.items) |atom| { - for (atom.contained.items) |sym_at_off| { - const stab = sym_at_off.stab orelse continue; - const nlists = try stab.asNlists(sym_at_off.local_sym_index, self); - defer self.base.allocator.free(nlists); - try locals.appendSlice(nlists); + try locals.appendSlice(nlists); + } } - } - // Close scope - try locals.append(.{ - .n_strx = 0, - .n_type = macho.N_SO, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }); + // Close scope + try locals.append(.{ + .n_strx = 0, + .n_type = macho.N_SO, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }); + } + } + for (object.symtab.items) |sym, sym_id| { + if (sym.n_strx == 0) continue; // no name, skip + if (sym.n_desc == N_DESC_GCED) continue; // GCed, skip + const sym_loc = SymbolWithLoc{ .sym_index = @intCast(u32, sym_id), .file = @intCast(u32, object_id) }; + if (self.symbolIsTemp(sym_loc)) continue; // local temp symbol, skip + if (self.globals.contains(self.getSymbolName(sym_loc))) continue; // global symbol is either an export or import, skip + var out_sym = sym; + out_sym.n_strx = try self.strtab.insert(gpa, self.getSymbolName(sym_loc)); + try locals.append(out_sym); } } + var exports = std.ArrayList(macho.nlist_64).init(gpa); + defer exports.deinit(); + + for (self.globals.values()) |global| { + const sym = self.getSymbol(global); + if (sym.undf()) continue; // import, skip + if (sym.n_desc == N_DESC_GCED) continue; // GCed, skip + var out_sym = sym; + out_sym.n_strx = try self.strtab.insert(gpa, self.getSymbolName(global)); + try exports.append(out_sym); + } + + var imports = std.ArrayList(macho.nlist_64).init(gpa); + defer imports.deinit(); + var imports_table = std.AutoHashMap(SymbolWithLoc, u32).init(gpa); + defer imports_table.deinit(); + + for (self.globals.values()) |global| { + const sym = self.getSymbol(global); + if (sym.n_strx == 0) continue; // no name, skip + if (!sym.undf()) continue; // not an import, skip + const new_index = @intCast(u32, imports.items.len); + var out_sym = sym; + out_sym.n_strx = try self.strtab.insert(gpa, self.getSymbolName(global)); + try imports.append(out_sym); + try imports_table.putNoClobber(global, new_index); + } + const nlocals = locals.items.len; - const nexports = globals.items.len; - const nundefs = undefs.items.len; + const nexports = exports.items.len; + const nimports = imports.items.len; + symtab.nsyms = @intCast(u32, nlocals + nexports + nimports); - const locals_off = symtab.symoff; - const locals_size = nlocals * @sizeOf(macho.nlist_64); - log.debug("writing local symbols from 0x{x} to 0x{x}", .{ locals_off, locals_size + locals_off }); - try self.base.file.?.pwriteAll(mem.sliceAsBytes(locals.items), locals_off); + var buffer = std.ArrayList(u8).init(gpa); + defer buffer.deinit(); + try buffer.ensureTotalCapacityPrecise(symtab.nsyms * @sizeOf(macho.nlist_64)); + buffer.appendSliceAssumeCapacity(mem.sliceAsBytes(locals.items)); + buffer.appendSliceAssumeCapacity(mem.sliceAsBytes(exports.items)); + buffer.appendSliceAssumeCapacity(mem.sliceAsBytes(imports.items)); - const exports_off = locals_off + locals_size; - const exports_size = nexports * @sizeOf(macho.nlist_64); - log.debug("writing exported symbols from 0x{x} to 0x{x}", .{ exports_off, exports_size + exports_off }); - try self.base.file.?.pwriteAll(mem.sliceAsBytes(globals.items), exports_off); + log.debug("writing symtab from 0x{x} to 0x{x}", .{ symtab.symoff, symtab.symoff + buffer.items.len }); + try self.base.file.?.pwriteAll(buffer.items, symtab.symoff); - const undefs_off = exports_off + exports_size; - const undefs_size = nundefs * @sizeOf(macho.nlist_64); - log.debug("writing undefined symbols from 0x{x} to 0x{x}", .{ undefs_off, undefs_size + undefs_off }); - try self.base.file.?.pwriteAll(mem.sliceAsBytes(undefs.items), undefs_off); - - symtab.nsyms = @intCast(u32, nlocals + nexports + nundefs); - seg.inner.filesize = symtab.symoff + symtab.nsyms * @sizeOf(macho.nlist_64) - seg.inner.fileoff; + seg.inner.filesize = symtab.symoff + buffer.items.len - seg.inner.fileoff; // Update dynamic symbol table. const dysymtab = &self.load_commands.items[self.dysymtab_cmd_index.?].dysymtab; @@ -6491,7 +6071,7 @@ fn writeSymbolTable(self: *MachO) !void { dysymtab.iextdefsym = dysymtab.nlocalsym; dysymtab.nextdefsym = @intCast(u32, nexports); dysymtab.iundefsym = dysymtab.nlocalsym + dysymtab.nextdefsym; - dysymtab.nundefsym = @intCast(u32, nundefs); + dysymtab.nundefsym = @intCast(u32, nimports); const nstubs = @intCast(u32, self.stubs_table.count()); const ngot_entries = @intCast(u32, self.got_entries_table.count()); @@ -6507,55 +6087,53 @@ fn writeSymbolTable(self: *MachO) !void { dysymtab.indirectsymoff + dysymtab.nindirectsyms * @sizeOf(u32), }); - var buf = std.ArrayList(u8).init(self.base.allocator); + var buf = std.ArrayList(u8).init(gpa); defer buf.deinit(); try buf.ensureTotalCapacity(dysymtab.nindirectsyms * @sizeOf(u32)); const writer = buf.writer(); if (self.text_segment_cmd_index) |text_segment_cmd_index| blk: { const stubs_section_index = self.stubs_section_index orelse break :blk; - const text_segment = &self.load_commands.items[text_segment_cmd_index].segment; - const stubs = &text_segment.sections.items[stubs_section_index]; + const stubs = self.getSectionPtr(.{ + .seg = text_segment_cmd_index, + .sect = stubs_section_index, + }); stubs.reserved1 = 0; - for (self.stubs_table.keys()) |key| { - const resolv = self.symbol_resolver.get(key).?; - switch (resolv.where) { - .global => try writer.writeIntLittle(u32, macho.INDIRECT_SYMBOL_LOCAL), - .undef => try writer.writeIntLittle(u32, dysymtab.iundefsym + undefs_table.get(resolv.where_index).?), - } + for (self.stubs_table.keys()) |target| { + const sym = self.getSymbol(target); + assert(sym.undf()); + try writer.writeIntLittle(u32, dysymtab.iundefsym + imports_table.get(target).?); } } if (self.data_const_segment_cmd_index) |data_const_segment_cmd_index| blk: { const got_section_index = self.got_section_index orelse break :blk; - const data_const_segment = &self.load_commands.items[data_const_segment_cmd_index].segment; - const got = &data_const_segment.sections.items[got_section_index]; + const got = self.getSectionPtr(.{ + .seg = data_const_segment_cmd_index, + .sect = got_section_index, + }); got.reserved1 = nstubs; - for (self.got_entries_table.keys()) |key| { - switch (key) { - .local => try writer.writeIntLittle(u32, macho.INDIRECT_SYMBOL_LOCAL), - .global => |n_strx| { - const resolv = self.symbol_resolver.get(n_strx).?; - switch (resolv.where) { - .global => try writer.writeIntLittle(u32, macho.INDIRECT_SYMBOL_LOCAL), - .undef => try writer.writeIntLittle(u32, dysymtab.iundefsym + undefs_table.get(resolv.where_index).?), - } - }, + for (self.got_entries_table.keys()) |target| { + const sym = self.getSymbol(target); + if (sym.undf()) { + try writer.writeIntLittle(u32, dysymtab.iundefsym + imports_table.get(target).?); + } else { + try writer.writeIntLittle(u32, macho.INDIRECT_SYMBOL_LOCAL); } } } if (self.data_segment_cmd_index) |data_segment_cmd_index| blk: { const la_symbol_ptr_section_index = self.la_symbol_ptr_section_index orelse break :blk; - const data_segment = &self.load_commands.items[data_segment_cmd_index].segment; - const la_symbol_ptr = &data_segment.sections.items[la_symbol_ptr_section_index]; + const la_symbol_ptr = self.getSectionPtr(.{ + .seg = data_segment_cmd_index, + .sect = la_symbol_ptr_section_index, + }); la_symbol_ptr.reserved1 = nstubs + ngot_entries; - for (self.stubs_table.keys()) |key| { - const resolv = self.symbol_resolver.get(key).?; - switch (resolv.where) { - .global => try writer.writeIntLittle(u32, macho.INDIRECT_SYMBOL_LOCAL), - .undef => try writer.writeIntLittle(u32, dysymtab.iundefsym + undefs_table.get(resolv.where_index).?), - } + for (self.stubs_table.keys()) |target| { + const sym = self.getSymbol(target); + assert(sym.undf()); + try writer.writeIntLittle(u32, dysymtab.iundefsym + imports_table.get(target).?); } } @@ -6572,14 +6150,15 @@ fn writeStringTable(self: *MachO) !void { const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].segment; const symtab = &self.load_commands.items[self.symtab_cmd_index.?].symtab; const stroff = mem.alignForwardGeneric(u64, seg.inner.fileoff + seg.inner.filesize, @alignOf(u64)); - const strsize = self.strtab.items.len; + + const strsize = self.strtab.buffer.items.len; symtab.stroff = @intCast(u32, stroff); symtab.strsize = @intCast(u32, strsize); seg.inner.filesize = symtab.stroff + symtab.strsize - seg.inner.fileoff; log.debug("writing string table from 0x{x} to 0x{x}", .{ symtab.stroff, symtab.stroff + symtab.strsize }); - try self.base.file.?.pwriteAll(self.strtab.items, symtab.stroff); + try self.base.file.?.pwriteAll(self.strtab.buffer.items, symtab.stroff); self.load_commands_dirty = true; } @@ -6737,42 +6316,81 @@ pub fn makeStaticString(bytes: []const u8) [16]u8 { return buf; } -pub fn makeString(self: *MachO, string: []const u8) !u32 { - const gop = try self.strtab_dir.getOrPutContextAdapted(self.base.allocator, @as([]const u8, string), StringIndexAdapter{ - .bytes = &self.strtab, - }, StringIndexContext{ - .bytes = &self.strtab, - }); - if (gop.found_existing) { - const off = gop.key_ptr.*; - log.debug("reusing string '{s}' at offset 0x{x}", .{ string, off }); - return off; - } - - try self.strtab.ensureUnusedCapacity(self.base.allocator, string.len + 1); - const new_off = @intCast(u32, self.strtab.items.len); - - log.debug("writing new string '{s}' at offset 0x{x}", .{ string, new_off }); - - self.strtab.appendSliceAssumeCapacity(string); - self.strtab.appendAssumeCapacity(0); - - gop.key_ptr.* = new_off; - - return new_off; +pub fn getSectionOrdinal(self: *MachO, match: MatchingSection) u8 { + return @intCast(u8, self.section_ordinals.getIndex(match).?) + 1; } -pub fn getString(self: MachO, off: u32) []const u8 { - assert(off < self.strtab.items.len); - return mem.sliceTo(@ptrCast([*:0]const u8, self.strtab.items.ptr + off), 0); +pub fn getMatchingSectionFromOrdinal(self: *MachO, ord: u8) MatchingSection { + const index = ord - 1; + assert(index < self.section_ordinals.count()); + return self.section_ordinals.keys()[index]; } -pub fn symbolIsTemp(sym: macho.nlist_64, sym_name: []const u8) bool { +pub fn getSegmentPtr(self: *MachO, match: MatchingSection) *macho.SegmentCommand { + assert(match.seg < self.load_commands.items.len); + return &self.load_commands.items[match.seg].segment; +} + +pub fn getSegment(self: *MachO, match: MatchingSection) macho.SegmentCommand { + return self.getSegmentPtr(match).*; +} + +pub fn getSectionPtr(self: *MachO, match: MatchingSection) *macho.section_64 { + const seg = self.getSegmentPtr(match); + assert(match.sect < seg.sections.items.len); + return &seg.sections.items[match.sect]; +} + +pub fn getSection(self: *MachO, match: MatchingSection) macho.section_64 { + return self.getSectionPtr(match).*; +} + +pub fn symbolIsTemp(self: *MachO, sym_with_loc: SymbolWithLoc) bool { + const sym = self.getSymbol(sym_with_loc); if (!sym.sect()) return false; if (sym.ext()) return false; + const sym_name = self.getSymbolName(sym_with_loc); return mem.startsWith(u8, sym_name, "l") or mem.startsWith(u8, sym_name, "L"); } +/// Returns pointer-to-symbol described by `sym_with_loc` descriptor. +pub fn getSymbolPtr(self: *MachO, sym_with_loc: SymbolWithLoc) *macho.nlist_64 { + if (sym_with_loc.file) |file| { + const object = &self.objects.items[file]; + return &object.symtab.items[sym_with_loc.sym_index]; + } else { + return &self.locals.items[sym_with_loc.sym_index]; + } +} + +/// Returns symbol described by `sym_with_loc` descriptor. +pub fn getSymbol(self: *MachO, sym_with_loc: SymbolWithLoc) macho.nlist_64 { + return self.getSymbolPtr(sym_with_loc).*; +} + +/// Returns name of the symbol described by `sym_with_loc` descriptor. +pub fn getSymbolName(self: *MachO, sym_with_loc: SymbolWithLoc) []const u8 { + if (sym_with_loc.file) |file| { + const object = self.objects.items[file]; + const sym = object.symtab.items[sym_with_loc.sym_index]; + return object.getString(sym.n_strx); + } else { + const sym = self.locals.items[sym_with_loc.sym_index]; + return self.strtab.get(sym.n_strx).?; + } +} + +/// Returns atom if there is an atom referenced by the symbol described by `sym_with_loc` descriptor. +/// Returns null on failure. +pub fn getAtomForSymbol(self: *MachO, sym_with_loc: SymbolWithLoc) ?*Atom { + if (sym_with_loc.file) |file| { + const object = self.objects.items[file]; + return object.atom_by_index_table.get(sym_with_loc.sym_index); + } else { + return self.atom_by_index_table.get(sym_with_loc.sym_index); + } +} + pub fn findFirst(comptime T: type, haystack: []const T, start: usize, predicate: anytype) usize { if (!@hasDecl(@TypeOf(predicate), "predicate")) @compileError("Predicate is required to define fn predicate(@This(), T) bool"); @@ -6835,7 +6453,7 @@ fn snapshotState(self: *MachO) !void { const arena = arena_allocator.allocator(); const out_file = try emit.directory.handle.createFile("snapshots.json", .{ - .truncate = self.cold_start, + .truncate = false, .read = true, }); defer out_file.close(); @@ -6855,8 +6473,7 @@ fn snapshotState(self: *MachO) !void { var nodes = std.ArrayList(Snapshot.Node).init(arena); for (self.section_ordinals.keys()) |key| { - const seg = self.load_commands.items[key.seg].segment; - const sect = seg.sections.items[key.sect]; + const sect = self.getSection(key); const sect_name = try std.fmt.allocPrint(arena, "{s},{s}", .{ sect.segName(), sect.sectName() }); try nodes.append(.{ .address = sect.addr, @@ -6878,10 +6495,10 @@ fn snapshotState(self: *MachO) !void { } while (true) { - const atom_sym = self.locals.items[atom.local_sym_index]; + const atom_sym = self.locals.items[atom.sym_index]; const should_skip_atom: bool = blk: { if (self.mh_execute_header_index) |index| { - if (index == atom.local_sym_index) break :blk true; + if (index == atom.sym_index) break :blk true; } if (mem.eql(u8, self.getString(atom_sym.n_strx), "___dso_handle")) break :blk true; break :blk false; @@ -6906,7 +6523,7 @@ fn snapshotState(self: *MachO) !void { var aliases = std.ArrayList([]const u8).init(arena); for (atom.contained.items) |sym_off| { if (sym_off.offset == 0) { - try aliases.append(self.getString(self.locals.items[sym_off.local_sym_index].n_strx)); + try aliases.append(self.getString(self.locals.items[sym_off.sym_index].n_strx)); } } node.payload.aliases = aliases.toOwnedSlice(); @@ -6916,7 +6533,7 @@ fn snapshotState(self: *MachO) !void { for (atom.relocs.items) |rel| { const arch = self.base.options.target.cpu.arch; const source_addr = blk: { - const sym = self.locals.items[atom.local_sym_index]; + const sym = self.locals.items[atom.sym_index]; break :blk sym.n_value + rel.offset; }; const target_addr = blk: { @@ -6937,14 +6554,14 @@ fn snapshotState(self: *MachO) !void { if (is_via_got) { const got_index = self.got_entries_table.get(rel.target) orelse break :blk 0; const got_atom = self.got_entries.items[got_index].atom; - break :blk self.locals.items[got_atom.local_sym_index].n_value; + break :blk self.locals.items[got_atom.sym_index].n_value; } switch (rel.target) { .local => |sym_index| { const sym = self.locals.items[sym_index]; const is_tlv = is_tlv: { - const source_sym = self.locals.items[atom.local_sym_index]; + const source_sym = self.locals.items[atom.sym_index]; const match = self.section_ordinals.keys()[source_sym.n_sect - 1]; const match_seg = self.load_commands.items[match.seg].segment; const match_sect = match_seg.sections.items[match.sect]; @@ -6970,7 +6587,7 @@ fn snapshotState(self: *MachO) !void { .undef => { if (self.stubs_table.get(n_strx)) |stub_index| { const stub_atom = self.stubs.items[stub_index]; - break :blk self.locals.items[stub_atom.local_sym_index].n_value; + break :blk self.locals.items[stub_atom.sym_index].n_value; } break :blk 0; }, @@ -6998,7 +6615,7 @@ fn snapshotState(self: *MachO) !void { var last_rel: usize = 0; while (next_i < atom.contained.items.len) : (next_i += 1) { const loc = atom.contained.items[next_i]; - const cont_sym = self.locals.items[loc.local_sym_index]; + const cont_sym = self.locals.items[loc.sym_index]; const cont_sym_name = self.getString(cont_sym.n_strx); var contained_node = Snapshot.Node{ .address = cont_sym.n_value, @@ -7013,7 +6630,7 @@ fn snapshotState(self: *MachO) !void { var inner_aliases = std.ArrayList([]const u8).init(arena); while (true) { if (next_i + 1 >= atom.contained.items.len) break; - const next_sym = self.locals.items[atom.contained.items[next_i + 1].local_sym_index]; + const next_sym = self.locals.items[atom.contained.items[next_i + 1].sym_index]; if (next_sym.n_value != cont_sym.n_value) break; const next_sym_name = self.getString(next_sym.n_strx); if (self.symbol_resolver.contains(next_sym.n_strx)) { @@ -7025,7 +6642,7 @@ fn snapshotState(self: *MachO) !void { } const cont_size = if (next_i + 1 < atom.contained.items.len) - self.locals.items[atom.contained.items[next_i + 1].local_sym_index].n_value - cont_sym.n_value + self.locals.items[atom.contained.items[next_i + 1].sym_index].n_value - cont_sym.n_value else atom_sym.n_value + atom.size - cont_sym.n_value; @@ -7072,75 +6689,117 @@ fn snapshotState(self: *MachO) !void { try writer.writeByte(']'); } -fn logSymtab(self: MachO) void { - log.warn("locals:", .{}); - for (self.locals.items) |sym, id| { - log.warn(" {d}: {s}: @{x} in {d}", .{ id, self.getString(sym.n_strx), sym.n_value, sym.n_sect }); +pub fn logSymAttributes(sym: macho.nlist_64, buf: *[4]u8) []const u8 { + mem.set(u8, buf, '_'); + if (sym.sect()) { + buf[0] = 's'; } - - log.warn("globals:", .{}); - for (self.globals.items) |sym, id| { - log.warn(" {d}: {s}: @{x} in {d}", .{ id, self.getString(sym.n_strx), sym.n_value, sym.n_sect }); + if (sym.ext()) { + buf[1] = 'e'; } - - log.warn("undefs:", .{}); - for (self.undefs.items) |sym, id| { - log.warn(" {d}: {s}: in {d}", .{ id, self.getString(sym.n_strx), sym.n_desc }); + if (sym.tentative()) { + buf[2] = 't'; } - - { - log.warn("resolver:", .{}); - var it = self.symbol_resolver.iterator(); - while (it.next()) |entry| { - log.warn(" {s} => {}", .{ self.getString(entry.key_ptr.*), entry.value_ptr.* }); - } - } - - log.warn("GOT entries:", .{}); - for (self.got_entries_table.values()) |value| { - const key = self.got_entries.items[value].target; - const atom = self.got_entries.items[value].atom; - const n_value = self.locals.items[atom.local_sym_index].n_value; - switch (key) { - .local => |ndx| log.warn(" {d}: @{x}", .{ ndx, n_value }), - .global => |n_strx| log.warn(" {s}: @{x}", .{ self.getString(n_strx), n_value }), - } - } - - log.warn("__thread_ptrs entries:", .{}); - for (self.tlv_ptr_entries_table.values()) |value| { - const key = self.tlv_ptr_entries.items[value].target; - const atom = self.tlv_ptr_entries.items[value].atom; - const n_value = self.locals.items[atom.local_sym_index].n_value; - assert(key == .global); - log.warn(" {s}: @{x}", .{ self.getString(key.global), n_value }); - } - - log.warn("stubs:", .{}); - for (self.stubs_table.keys()) |key| { - const value = self.stubs_table.get(key).?; - const atom = self.stubs.items[value]; - const sym = self.locals.items[atom.local_sym_index]; - log.warn(" {s}: @{x}", .{ self.getString(key), sym.n_value }); + if (sym.undf()) { + buf[3] = 'u'; } + return buf[0..]; } -fn logSectionOrdinals(self: MachO) void { - for (self.section_ordinals.keys()) |match, i| { - const seg = self.load_commands.items[match.seg].segment; - const sect = seg.sections.items[match.sect]; - log.debug("ord {d}: {d},{d} => {s},{s}", .{ - i + 1, - match.seg, - match.sect, - sect.segName(), - sect.sectName(), +fn logSymtab(self: *MachO) void { + var buf: [4]u8 = undefined; + + log.debug("symtab:", .{}); + for (self.objects.items) |object, id| { + log.debug(" object({d}): {s}", .{ id, object.name }); + for (object.symtab.items) |sym, sym_id| { + const where = if (sym.undf() and !sym.tentative()) "ord" else "sect"; + const def_index = if (sym.undf() and !sym.tentative()) + @divTrunc(sym.n_desc, macho.N_SYMBOL_RESOLVER) + else + sym.n_sect; + log.debug(" %{d}: {s} @{x} in {s}({d}), {s}", .{ + sym_id, + object.getString(sym.n_strx), + sym.n_value, + where, + def_index, + logSymAttributes(sym, &buf), + }); + } + } + log.debug(" object(null)", .{}); + for (self.locals.items) |sym, sym_id| { + const where = if (sym.undf() and !sym.tentative()) "ord" else "sect"; + const def_index = if (sym.undf() and !sym.tentative()) + @divTrunc(sym.n_desc, macho.N_SYMBOL_RESOLVER) + else + sym.n_sect; + log.debug(" %{d}: {s} @{x} in {s}({d}), {s}", .{ + sym_id, + self.strtab.get(sym.n_strx), + sym.n_value, + where, + def_index, + logSymAttributes(sym, &buf), }); } + + log.debug("globals table:", .{}); + for (self.globals.keys()) |name, id| { + const value = self.globals.values()[id]; + log.debug(" {s} => %{d} in object({d})", .{ name, value.sym_index, value.file }); + } + + log.debug("GOT entries:", .{}); + for (self.got_entries_table.values()) |value| { + const target = self.got_entries.items[value].target; + const target_sym = self.getSymbol(target); + const atom = self.got_entries.items[value].atom; + const atom_sym = atom.getSymbol(self); + + if (target_sym.undf()) { + log.debug(" {d}@{x} => import('{s}')", .{ value, atom_sym.n_value, self.getSymbolName(target) }); + } else { + log.debug(" {d}@{x} => local(%{d}) in object({d})", .{ + value, + atom_sym.n_value, + target.sym_index, + target.file, + }); + } + } + + log.debug("__thread_ptrs entries:", .{}); + for (self.tlv_ptr_entries_table.values()) |value| { + const target = self.tlv_ptr_entries.items[value].target; + const target_sym = self.getSymbol(target); + const atom = self.tlv_ptr_entries.items[value].atom; + const atom_sym = atom.getSymbol(self); + assert(target_sym.undf()); + log.debug(" {d}@{x} => import('{s}')", .{ value, atom_sym.n_value, self.getSymbolName(target) }); + } + + log.debug("stubs entries:", .{}); + for (self.stubs_table.values()) |value| { + const target = self.stubs.items[value].target; + const target_sym = self.getSymbol(target); + const atom = self.stubs.items[value].atom; + const atom_sym = atom.getSymbol(self); + assert(target_sym.undf()); + log.debug(" {d}@{x} => import('{s}')", .{ value, atom_sym.n_value, self.getSymbolName(target) }); + } } -fn logAtoms(self: MachO) void { - log.warn("atoms:", .{}); +fn logSectionOrdinals(self: *MachO) void { + for (self.section_ordinals.keys()) |match, i| { + const sect = self.getSection(match); + log.debug("sect({d}, '{s},{s}')", .{ i + 1, sect.segName(), sect.sectName() }); + } +} + +fn logAtoms(self: *MachO) void { + log.debug("atoms:", .{}); var it = self.atoms.iterator(); while (it.next()) |entry| { const match = entry.key_ptr.*; @@ -7150,9 +6809,8 @@ fn logAtoms(self: MachO) void { atom = prev; } - const seg = self.load_commands.items[match.seg].segment; - const sect = seg.sections.items[match.sect]; - log.warn("{s},{s}", .{ sect.segName(), sect.sectName() }); + const sect = self.getSection(match); + log.debug("{s},{s}", .{ sect.segName(), sect.sectName() }); while (true) { self.logAtom(atom); @@ -7164,16 +6822,28 @@ fn logAtoms(self: MachO) void { } } -fn logAtom(self: MachO, atom: *const Atom) void { - const sym = self.locals.items[atom.local_sym_index]; - log.warn(" ATOM(%{d}) @ {x}", .{ atom.local_sym_index, sym.n_value }); +pub fn logAtom(self: *MachO, atom: *const Atom) void { + const sym = atom.getSymbol(self); + const sym_name = atom.getName(self); + log.debug(" ATOM(%{d}, '{s}') @ {x} in object({d})", .{ + atom.sym_index, + sym_name, + sym.n_value, + atom.file, + }); for (atom.contained.items) |sym_off| { - const inner_sym = self.locals.items[sym_off.local_sym_index]; - log.warn(" %{d} ('{s}') @ {x}", .{ - sym_off.local_sym_index, - self.getString(inner_sym.n_strx), + const inner_sym = self.getSymbol(.{ + .sym_index = sym_off.sym_index, + .file = atom.file, + }); + const inner_sym_name = self.getSymbolName(.{ .sym_index = sym_off.sym_index, .file = atom.file }); + log.debug(" (%{d}, '{s}') @ {x} ({x}) in object({d})", .{ + sym_off.sym_index, + inner_sym_name, inner_sym.n_value, + sym_off.offset, + atom.file, }); } } diff --git a/src/link/MachO/Atom.zig b/src/link/MachO/Atom.zig index 177d5419fb..f5995cbd87 100644 --- a/src/link/MachO/Atom.zig +++ b/src/link/MachO/Atom.zig @@ -16,7 +16,7 @@ const Arch = std.Target.Cpu.Arch; const Dwarf = @import("../Dwarf.zig"); const MachO = @import("../MachO.zig"); const Object = @import("Object.zig"); -const StringIndexAdapter = std.hash_map.StringIndexAdapter; +const SymbolWithLoc = MachO.SymbolWithLoc; /// Each decl always gets a local symbol with the fully qualified name. /// The vaddr and size are found here directly. @@ -24,7 +24,10 @@ const StringIndexAdapter = std.hash_map.StringIndexAdapter; /// the symbol references, and adding that to the file offset of the section. /// If this field is 0, it means the codegen size = 0 and there is no symbol or /// offset table entry. -local_sym_index: u32, +sym_index: u32, + +/// null means symbol defined by Zig source. +file: ?u32, /// List of symbols contained within this atom contained: std.ArrayListUnmanaged(SymbolAtOffset) = .{}, @@ -45,15 +48,15 @@ alignment: u32, relocs: std.ArrayListUnmanaged(Relocation) = .{}, /// List of offsets contained within this atom that need rebasing by the dynamic -/// loader in presence of ASLR. +/// loader for example in presence of ASLR. rebases: std.ArrayListUnmanaged(u64) = .{}, /// List of offsets contained within this atom that will be dynamically bound /// by the dynamic loader and contain pointers to resolved (at load time) extern -/// symbols (aka proxies aka imports) +/// symbols (aka proxies aka imports). bindings: std.ArrayListUnmanaged(Binding) = .{}, -/// List of lazy bindings +/// List of lazy bindings (cf bindings above). lazy_bindings: std.ArrayListUnmanaged(Binding) = .{}, /// List of data-in-code entries. This is currently specific to x86_64 only. @@ -68,12 +71,12 @@ dbg_info_atom: Dwarf.Atom, dirty: bool = true, pub const Binding = struct { - n_strx: u32, + global_index: u32, offset: u64, }; pub const SymbolAtOffset = struct { - local_sym_index: u32, + sym_index: u32, offset: u64, stab: ?Stab = null, }; @@ -83,11 +86,14 @@ pub const Stab = union(enum) { static, global, - pub fn asNlists(stab: Stab, local_sym_index: u32, macho_file: anytype) ![]macho.nlist_64 { - var nlists = std.ArrayList(macho.nlist_64).init(macho_file.base.allocator); + pub fn asNlists(stab: Stab, sym_loc: SymbolWithLoc, macho_file: *MachO) ![]macho.nlist_64 { + const gpa = macho_file.base.allocator; + + var nlists = std.ArrayList(macho.nlist_64).init(gpa); defer nlists.deinit(); - const sym = macho_file.locals.items[local_sym_index]; + const sym = macho_file.getSymbol(sym_loc); + const sym_name = macho_file.getSymbolName(sym_loc); switch (stab) { .function => |size| { try nlists.ensureUnusedCapacity(4); @@ -99,7 +105,7 @@ pub const Stab = union(enum) { .n_value = sym.n_value, }); nlists.appendAssumeCapacity(.{ - .n_strx = sym.n_strx, + .n_strx = try macho_file.strtab.insert(gpa, sym_name), .n_type = macho.N_FUN, .n_sect = sym.n_sect, .n_desc = 0, @@ -122,7 +128,7 @@ pub const Stab = union(enum) { }, .global => { try nlists.append(.{ - .n_strx = sym.n_strx, + .n_strx = try macho_file.strtab.insert(gpa, sym_name), .n_type = macho.N_GSYM, .n_sect = 0, .n_desc = 0, @@ -131,7 +137,7 @@ pub const Stab = union(enum) { }, .static => { try nlists.append(.{ - .n_strx = sym.n_strx, + .n_strx = try macho_file.strtab.insert(gpa, sym_name), .n_type = macho.N_STSYM, .n_sect = sym.n_sect, .n_desc = 0, @@ -145,30 +151,66 @@ pub const Stab = union(enum) { }; pub const Relocation = struct { - pub const Target = union(enum) { - local: u32, - global: u32, - }; - /// Offset within the atom's code buffer. /// Note relocation size can be inferred by relocation's kind. offset: u32, - target: Target, + target: MachO.SymbolWithLoc, addend: i64, - subtractor: ?u32, + subtractor: ?MachO.SymbolWithLoc, pcrel: bool, length: u2, @"type": u4, + + pub fn getTargetAtom(self: Relocation, macho_file: *MachO) !?*Atom { + const is_via_got = got: { + switch (macho_file.base.options.target.cpu.arch) { + .aarch64 => break :got switch (@intToEnum(macho.reloc_type_arm64, self.@"type")) { + .ARM64_RELOC_GOT_LOAD_PAGE21, + .ARM64_RELOC_GOT_LOAD_PAGEOFF12, + .ARM64_RELOC_POINTER_TO_GOT, + => true, + else => false, + }, + .x86_64 => break :got switch (@intToEnum(macho.reloc_type_x86_64, self.@"type")) { + .X86_64_RELOC_GOT, .X86_64_RELOC_GOT_LOAD => true, + else => false, + }, + else => unreachable, + } + }; + + const target_sym = macho_file.getSymbol(self.target); + if (is_via_got) { + const got_index = macho_file.got_entries_table.get(self.target) orelse { + log.err("expected GOT entry for symbol", .{}); + if (target_sym.undf()) { + log.err(" import('{s}')", .{macho_file.getSymbolName(self.target)}); + } else { + log.err(" local(%{d}) in object({d})", .{ self.target.sym_index, self.target.file }); + } + log.err(" this is an internal linker error", .{}); + return error.FailedToResolveRelocationTarget; + }; + return macho_file.got_entries.items[got_index].atom; + } + + if (macho_file.stubs_table.get(self.target)) |stub_index| { + return macho_file.stubs.items[stub_index].atom; + } else if (macho_file.tlv_ptr_entries_table.get(self.target)) |tlv_ptr_index| { + return macho_file.tlv_ptr_entries.items[tlv_ptr_index].atom; + } else return macho_file.getAtomForSymbol(self.target); + } }; pub const empty = Atom{ - .local_sym_index = 0, + .sym_index = 0, + .file = null, .size = 0, .alignment = 0, .prev = null, @@ -196,13 +238,45 @@ pub fn clearRetainingCapacity(self: *Atom) void { self.code.clearRetainingCapacity(); } +/// Returns symbol referencing this atom. +pub fn getSymbol(self: Atom, macho_file: *MachO) macho.nlist_64 { + return self.getSymbolPtr(macho_file).*; +} + +/// Returns pointer-to-symbol referencing this atom. +pub fn getSymbolPtr(self: Atom, macho_file: *MachO) *macho.nlist_64 { + return macho_file.getSymbolPtr(.{ + .sym_index = self.sym_index, + .file = self.file, + }); +} + +/// Returns true if the symbol pointed at with `sym_loc` is contained within this atom. +/// WARNING this function assumes all atoms have been allocated in the virtual memory. +/// Calling it without allocating with `MachO.allocateSymbols` (or equivalent) will +/// give bogus results. +pub fn isSymbolContained(self: Atom, sym_loc: SymbolWithLoc, macho_file: *MachO) bool { + const sym = macho_file.getSymbol(sym_loc); + if (!sym.sect()) return false; + const self_sym = self.getSymbol(macho_file); + return sym.n_value >= self_sym.n_value and sym.n_value < self_sym.n_value + self.size; +} + +/// Returns the name of this atom. +pub fn getName(self: Atom, macho_file: *MachO) []const u8 { + return macho_file.getSymbolName(.{ + .sym_index = self.sym_index, + .file = self.file, + }); +} + /// Returns how much room there is to grow in virtual address space. /// File offset relocation happens transparently, so it is not included in /// this calculation. -pub fn capacity(self: Atom, macho_file: MachO) u64 { - const self_sym = macho_file.locals.items[self.local_sym_index]; +pub fn capacity(self: Atom, macho_file: *MachO) u64 { + const self_sym = self.getSymbol(macho_file); if (self.next) |next| { - const next_sym = macho_file.locals.items[next.local_sym_index]; + const next_sym = next.getSymbol(macho_file); return next_sym.n_value - self_sym.n_value; } else { // We are the last atom. @@ -211,11 +285,11 @@ pub fn capacity(self: Atom, macho_file: MachO) u64 { } } -pub fn freeListEligible(self: Atom, macho_file: MachO) bool { +pub fn freeListEligible(self: Atom, macho_file: *MachO) bool { // No need to keep a free list node for the last atom. const next = self.next orelse return false; - const self_sym = macho_file.locals.items[self.local_sym_index]; - const next_sym = macho_file.locals.items[next.local_sym_index]; + const self_sym = self.getSymbol(macho_file); + const next_sym = next.getSymbol(macho_file); const cap = next_sym.n_value - self_sym.n_value; const ideal_cap = MachO.padToIdeal(self.size); if (cap <= ideal_cap) return false; @@ -224,20 +298,20 @@ pub fn freeListEligible(self: Atom, macho_file: MachO) bool { } const RelocContext = struct { + macho_file: *MachO, base_addr: u64 = 0, base_offset: i32 = 0, - allocator: Allocator, - object: *Object, - macho_file: *MachO, }; pub fn parseRelocs(self: *Atom, relocs: []const macho.relocation_info, context: RelocContext) !void { const tracy = trace(@src()); defer tracy.end(); + const gpa = context.macho_file.base.allocator; + const arch = context.macho_file.base.options.target.cpu.arch; var addend: i64 = 0; - var subtractor: ?u32 = null; + var subtractor: ?SymbolWithLoc = null; for (relocs) |rel, i| { blk: { @@ -274,20 +348,16 @@ pub fn parseRelocs(self: *Atom, relocs: []const macho.relocation_info, context: } assert(subtractor == null); - const sym = context.object.symtab[rel.r_symbolnum]; + const sym_loc = MachO.SymbolWithLoc{ + .sym_index = rel.r_symbolnum, + .file = self.file, + }; + const sym = context.macho_file.getSymbol(sym_loc); if (sym.sect() and !sym.ext()) { - subtractor = context.object.symbol_mapping.get(rel.r_symbolnum).?; + subtractor = sym_loc; } else { - const sym_name = context.object.getString(sym.n_strx); - const n_strx = context.macho_file.strtab_dir.getKeyAdapted( - @as([]const u8, sym_name), - StringIndexAdapter{ - .bytes = &context.macho_file.strtab, - }, - ).?; - const resolv = context.macho_file.symbol_resolver.get(n_strx).?; - assert(resolv.where == .global); - subtractor = resolv.local_sym_index; + const sym_name = context.macho_file.getSymbolName(sym_loc); + subtractor = context.macho_file.globals.get(sym_name).?; } // Verify that *_SUBTRACTOR is followed by *_UNSIGNED. if (relocs.len <= i + 1) { @@ -318,43 +388,40 @@ pub fn parseRelocs(self: *Atom, relocs: []const macho.relocation_info, context: continue; } + const object = &context.macho_file.objects.items[self.file.?]; const target = target: { if (rel.r_extern == 0) { const sect_id = @intCast(u16, rel.r_symbolnum - 1); - const local_sym_index = context.object.sections_as_symbols.get(sect_id) orelse blk: { - const seg = context.object.load_commands.items[context.object.segment_cmd_index.?].segment; - const sect = seg.sections.items[sect_id]; + const sym_index = object.sections_as_symbols.get(sect_id) orelse blk: { + const sect = object.getSection(sect_id); const match = (try context.macho_file.getMatchingSection(sect)) orelse unreachable; - const local_sym_index = @intCast(u32, context.macho_file.locals.items.len); - try context.macho_file.locals.append(context.allocator, .{ + const sym_index = @intCast(u32, object.symtab.items.len); + try object.symtab.append(gpa, .{ .n_strx = 0, .n_type = macho.N_SECT, - .n_sect = @intCast(u8, context.macho_file.section_ordinals.getIndex(match).? + 1), + .n_sect = context.macho_file.getSectionOrdinal(match), .n_desc = 0, .n_value = 0, }); - try context.object.sections_as_symbols.putNoClobber(context.allocator, sect_id, local_sym_index); - break :blk local_sym_index; + try object.sections_as_symbols.putNoClobber(gpa, sect_id, sym_index); + break :blk sym_index; }; - break :target Relocation.Target{ .local = local_sym_index }; + break :target MachO.SymbolWithLoc{ .sym_index = sym_index, .file = self.file }; } - const sym = context.object.symtab[rel.r_symbolnum]; - const sym_name = context.object.getString(sym.n_strx); + const sym_loc = MachO.SymbolWithLoc{ + .sym_index = rel.r_symbolnum, + .file = self.file, + }; + const sym = context.macho_file.getSymbol(sym_loc); if (sym.sect() and !sym.ext()) { - const sym_index = context.object.symbol_mapping.get(rel.r_symbolnum) orelse unreachable; - break :target Relocation.Target{ .local = sym_index }; + break :target sym_loc; + } else { + const sym_name = context.macho_file.getSymbolName(sym_loc); + break :target context.macho_file.globals.get(sym_name).?; } - - const n_strx = context.macho_file.strtab_dir.getKeyAdapted( - @as([]const u8, sym_name), - StringIndexAdapter{ - .bytes = &context.macho_file.strtab, - }, - ) orelse unreachable; - break :target Relocation.Target{ .global = n_strx }; }; const offset = @intCast(u32, rel.r_address - context.base_offset); @@ -378,8 +445,7 @@ pub fn parseRelocs(self: *Atom, relocs: []const macho.relocation_info, context: else mem.readIntLittle(i32, self.code.items[offset..][0..4]); if (rel.r_extern == 0) { - const seg = context.object.load_commands.items[context.object.segment_cmd_index.?].segment; - const target_sect_base_addr = seg.sections.items[rel.r_symbolnum - 1].addr; + const target_sect_base_addr = object.getSection(@intCast(u16, rel.r_symbolnum - 1)).addr; addend -= @intCast(i64, target_sect_base_addr); } try self.addPtrBindingOrRebase(rel, target, context); @@ -387,9 +453,7 @@ pub fn parseRelocs(self: *Atom, relocs: []const macho.relocation_info, context: .ARM64_RELOC_TLVP_LOAD_PAGE21, .ARM64_RELOC_TLVP_LOAD_PAGEOFF12, => { - if (target == .global) { - try addTlvPtrEntry(target, context); - } + try addTlvPtrEntry(target, context); }, else => {}, } @@ -413,8 +477,7 @@ pub fn parseRelocs(self: *Atom, relocs: []const macho.relocation_info, context: else mem.readIntLittle(i32, self.code.items[offset..][0..4]); if (rel.r_extern == 0) { - const seg = context.object.load_commands.items[context.object.segment_cmd_index.?].segment; - const target_sect_base_addr = seg.sections.items[rel.r_symbolnum - 1].addr; + const target_sect_base_addr = object.getSection(@intCast(u16, rel.r_symbolnum - 1)).addr; addend -= @intCast(i64, target_sect_base_addr); } try self.addPtrBindingOrRebase(rel, target, context); @@ -435,16 +498,13 @@ pub fn parseRelocs(self: *Atom, relocs: []const macho.relocation_info, context: if (rel.r_extern == 0) { // Note for the future self: when r_extern == 0, we should subtract correction from the // addend. - const seg = context.object.load_commands.items[context.object.segment_cmd_index.?].segment; - const target_sect_base_addr = seg.sections.items[rel.r_symbolnum - 1].addr; + const target_sect_base_addr = object.getSection(@intCast(u16, rel.r_symbolnum - 1)).addr; addend += @intCast(i64, context.base_addr + offset + 4) - @intCast(i64, target_sect_base_addr); } }, .X86_64_RELOC_TLV => { - if (target == .global) { - try addTlvPtrEntry(target, context); - } + try addTlvPtrEntry(target, context); }, else => {}, } @@ -452,7 +512,7 @@ pub fn parseRelocs(self: *Atom, relocs: []const macho.relocation_info, context: else => unreachable, } - try self.relocs.append(context.allocator, .{ + try self.relocs.append(gpa, .{ .offset = offset, .target = target, .addend = addend, @@ -470,338 +530,181 @@ pub fn parseRelocs(self: *Atom, relocs: []const macho.relocation_info, context: fn addPtrBindingOrRebase( self: *Atom, rel: macho.relocation_info, - target: Relocation.Target, + target: MachO.SymbolWithLoc, context: RelocContext, ) !void { - switch (target) { - .global => |n_strx| { - try self.bindings.append(context.allocator, .{ - .n_strx = n_strx, - .offset = @intCast(u32, rel.r_address - context.base_offset), - }); - }, - .local => { - const source_sym = context.macho_file.locals.items[self.local_sym_index]; - const match = context.macho_file.section_ordinals.keys()[source_sym.n_sect - 1]; - const seg = context.macho_file.load_commands.items[match.seg].segment; - const sect = seg.sections.items[match.sect]; - const sect_type = sect.type_(); + const gpa = context.macho_file.base.allocator; + const sym = context.macho_file.getSymbol(target); + if (sym.undf()) { + const sym_name = context.macho_file.getSymbolName(target); + const global_index = @intCast(u32, context.macho_file.globals.getIndex(sym_name).?); + try self.bindings.append(gpa, .{ + .global_index = global_index, + .offset = @intCast(u32, rel.r_address - context.base_offset), + }); + } else { + const source_sym = self.getSymbol(context.macho_file); + const match = context.macho_file.getMatchingSectionFromOrdinal(source_sym.n_sect); + const sect = context.macho_file.getSection(match); + const sect_type = sect.type_(); - const should_rebase = rebase: { - if (rel.r_length != 3) break :rebase false; + const should_rebase = rebase: { + if (rel.r_length != 3) break :rebase false; - // TODO actually, a check similar to what dyld is doing, that is, verifying - // that the segment is writable should be enough here. - const is_right_segment = blk: { - if (context.macho_file.data_segment_cmd_index) |idx| { - if (match.seg == idx) { - break :blk true; - } + // TODO actually, a check similar to what dyld is doing, that is, verifying + // that the segment is writable should be enough here. + const is_right_segment = blk: { + if (context.macho_file.data_segment_cmd_index) |idx| { + if (match.seg == idx) { + break :blk true; } - if (context.macho_file.data_const_segment_cmd_index) |idx| { - if (match.seg == idx) { - break :blk true; - } - } - break :blk false; - }; - - if (!is_right_segment) break :rebase false; - if (sect_type != macho.S_LITERAL_POINTERS and - sect_type != macho.S_REGULAR and - sect_type != macho.S_MOD_INIT_FUNC_POINTERS and - sect_type != macho.S_MOD_TERM_FUNC_POINTERS) - { - break :rebase false; } - - break :rebase true; + if (context.macho_file.data_const_segment_cmd_index) |idx| { + if (match.seg == idx) { + break :blk true; + } + } + break :blk false; }; - if (should_rebase) { - try self.rebases.append( - context.allocator, - @intCast(u32, rel.r_address - context.base_offset), - ); + if (!is_right_segment) break :rebase false; + if (sect_type != macho.S_LITERAL_POINTERS and + sect_type != macho.S_REGULAR and + sect_type != macho.S_MOD_INIT_FUNC_POINTERS and + sect_type != macho.S_MOD_TERM_FUNC_POINTERS) + { + break :rebase false; } - }, + + break :rebase true; + }; + + if (should_rebase) { + try self.rebases.append(gpa, @intCast(u32, rel.r_address - context.base_offset)); + } } } -fn addTlvPtrEntry(target: Relocation.Target, context: RelocContext) !void { +fn addTlvPtrEntry(target: MachO.SymbolWithLoc, context: RelocContext) !void { + const target_sym = context.macho_file.getSymbol(target); + if (!target_sym.undf()) return; if (context.macho_file.tlv_ptr_entries_table.contains(target)) return; const index = try context.macho_file.allocateTlvPtrEntry(target); const atom = try context.macho_file.createTlvPtrAtom(target); context.macho_file.tlv_ptr_entries.items[index].atom = atom; - - const match = (try context.macho_file.getMatchingSection(.{ - .segname = MachO.makeStaticString("__DATA"), - .sectname = MachO.makeStaticString("__thread_ptrs"), - .flags = macho.S_THREAD_LOCAL_VARIABLE_POINTERS, - })).?; - if (!context.object.start_atoms.contains(match)) { - try context.object.start_atoms.putNoClobber(context.allocator, match, atom); - } - if (context.object.end_atoms.getPtr(match)) |last| { - last.*.next = atom; - atom.prev = last.*; - last.* = atom; - } else { - try context.object.end_atoms.putNoClobber(context.allocator, match, atom); - } } -fn addGotEntry(target: Relocation.Target, context: RelocContext) !void { +fn addGotEntry(target: MachO.SymbolWithLoc, context: RelocContext) !void { if (context.macho_file.got_entries_table.contains(target)) return; const index = try context.macho_file.allocateGotEntry(target); const atom = try context.macho_file.createGotAtom(target); context.macho_file.got_entries.items[index].atom = atom; - - const match = MachO.MatchingSection{ - .seg = context.macho_file.data_const_segment_cmd_index.?, - .sect = context.macho_file.got_section_index.?, - }; - if (!context.object.start_atoms.contains(match)) { - try context.object.start_atoms.putNoClobber(context.allocator, match, atom); - } - if (context.object.end_atoms.getPtr(match)) |last| { - last.*.next = atom; - atom.prev = last.*; - last.* = atom; - } else { - try context.object.end_atoms.putNoClobber(context.allocator, match, atom); - } } -fn addStub(target: Relocation.Target, context: RelocContext) !void { - if (target != .global) return; - if (context.macho_file.stubs_table.contains(target.global)) return; - // If the symbol has been resolved as defined globally elsewhere (in a different translation unit), - // then skip creating stub entry. - // TODO Is this the correct for the incremental? - if (context.macho_file.symbol_resolver.get(target.global).?.where == .global) return; +fn addStub(target: MachO.SymbolWithLoc, context: RelocContext) !void { + const target_sym = context.macho_file.getSymbol(target); + if (!target_sym.undf()) return; + if (context.macho_file.stubs_table.contains(target)) return; - const stub_index = try context.macho_file.allocateStubEntry(target.global); + const stub_index = try context.macho_file.allocateStubEntry(target); + const stub_helper_atom = try context.macho_file.createStubHelperAtom(); + const laptr_atom = try context.macho_file.createLazyPointerAtom(stub_helper_atom.sym_index, target); + const stub_atom = try context.macho_file.createStubAtom(laptr_atom.sym_index); - // TODO clean this up! - const stub_helper_atom = atom: { - const atom = try context.macho_file.createStubHelperAtom(); - const match = MachO.MatchingSection{ - .seg = context.macho_file.text_segment_cmd_index.?, - .sect = context.macho_file.stub_helper_section_index.?, - }; - if (!context.object.start_atoms.contains(match)) { - try context.object.start_atoms.putNoClobber(context.allocator, match, atom); - } - if (context.object.end_atoms.getPtr(match)) |last| { - last.*.next = atom; - atom.prev = last.*; - last.* = atom; - } else { - try context.object.end_atoms.putNoClobber(context.allocator, match, atom); - } - break :atom atom; - }; - const laptr_atom = atom: { - const atom = try context.macho_file.createLazyPointerAtom( - stub_helper_atom.local_sym_index, - target.global, - ); - const match = MachO.MatchingSection{ - .seg = context.macho_file.data_segment_cmd_index.?, - .sect = context.macho_file.la_symbol_ptr_section_index.?, - }; - if (!context.object.start_atoms.contains(match)) { - try context.object.start_atoms.putNoClobber(context.allocator, match, atom); - } - if (context.object.end_atoms.getPtr(match)) |last| { - last.*.next = atom; - atom.prev = last.*; - last.* = atom; - } else { - try context.object.end_atoms.putNoClobber(context.allocator, match, atom); - } - break :atom atom; - }; - const atom = try context.macho_file.createStubAtom(laptr_atom.local_sym_index); - const match = MachO.MatchingSection{ - .seg = context.macho_file.text_segment_cmd_index.?, - .sect = context.macho_file.stubs_section_index.?, - }; - if (!context.object.start_atoms.contains(match)) { - try context.object.start_atoms.putNoClobber(context.allocator, match, atom); - } - if (context.object.end_atoms.getPtr(match)) |last| { - last.*.next = atom; - atom.prev = last.*; - last.* = atom; - } else { - try context.object.end_atoms.putNoClobber(context.allocator, match, atom); - } - context.macho_file.stubs.items[stub_index] = atom; -} - -pub fn getTargetAtom(rel: Relocation, macho_file: *MachO) !?*Atom { - const is_via_got = got: { - switch (macho_file.base.options.target.cpu.arch) { - .aarch64 => break :got switch (@intToEnum(macho.reloc_type_arm64, rel.@"type")) { - .ARM64_RELOC_GOT_LOAD_PAGE21, - .ARM64_RELOC_GOT_LOAD_PAGEOFF12, - .ARM64_RELOC_POINTER_TO_GOT, - => true, - else => false, - }, - .x86_64 => break :got switch (@intToEnum(macho.reloc_type_x86_64, rel.@"type")) { - .X86_64_RELOC_GOT, .X86_64_RELOC_GOT_LOAD => true, - else => false, - }, - else => unreachable, - } - }; - - if (is_via_got) { - const got_index = macho_file.got_entries_table.get(rel.target) orelse { - log.err("expected GOT entry for symbol", .{}); - switch (rel.target) { - .local => |sym_index| log.err(" local @{d}", .{sym_index}), - .global => |n_strx| log.err(" global @'{s}'", .{macho_file.getString(n_strx)}), - } - log.err(" this is an internal linker error", .{}); - return error.FailedToResolveRelocationTarget; - }; - return macho_file.got_entries.items[got_index].atom; - } - - switch (rel.target) { - .local => |sym_index| { - return macho_file.atom_by_index_table.get(sym_index); - }, - .global => |n_strx| { - const resolv = macho_file.symbol_resolver.get(n_strx).?; - switch (resolv.where) { - .global => return macho_file.atom_by_index_table.get(resolv.local_sym_index), - .undef => { - if (macho_file.stubs_table.get(n_strx)) |stub_index| { - return macho_file.stubs.items[stub_index]; - } else { - if (macho_file.tlv_ptr_entries_table.get(rel.target)) |tlv_ptr_index| { - return macho_file.tlv_ptr_entries.items[tlv_ptr_index].atom; - } - return null; - } - }, - } - }, - } + context.macho_file.stubs.items[stub_index].atom = stub_atom; } pub fn resolveRelocs(self: *Atom, macho_file: *MachO) !void { const tracy = trace(@src()); defer tracy.end(); + log.debug("ATOM(%{d}, '{s}')", .{ self.sym_index, self.getName(macho_file) }); + for (self.relocs.items) |rel| { - log.debug("relocating {}", .{rel}); const arch = macho_file.base.options.target.cpu.arch; + switch (arch) { + .aarch64 => { + log.debug(" RELA({s}) @ {x} => %{d} in object({d})", .{ + @tagName(@intToEnum(macho.reloc_type_arm64, rel.@"type")), + rel.offset, + rel.target.sym_index, + rel.target.file, + }); + }, + .x86_64 => { + log.debug(" RELA({s}) @ {x} => %{d} in object({d})", .{ + @tagName(@intToEnum(macho.reloc_type_x86_64, rel.@"type")), + rel.offset, + rel.target.sym_index, + rel.target.file, + }); + }, + else => unreachable, + } + const source_addr = blk: { - const sym = macho_file.locals.items[self.local_sym_index]; - break :blk sym.n_value + rel.offset; + const source_sym = self.getSymbol(macho_file); + break :blk source_sym.n_value + rel.offset; + }; + const is_tlv = is_tlv: { + const source_sym = self.getSymbol(macho_file); + const match = macho_file.getMatchingSectionFromOrdinal(source_sym.n_sect); + const sect = macho_file.getSection(match); + break :is_tlv sect.type_() == macho.S_THREAD_LOCAL_VARIABLES; }; - var is_via_thread_ptrs: bool = false; const target_addr = blk: { - const is_via_got = got: { - switch (arch) { - .aarch64 => break :got switch (@intToEnum(macho.reloc_type_arm64, rel.@"type")) { - .ARM64_RELOC_GOT_LOAD_PAGE21, - .ARM64_RELOC_GOT_LOAD_PAGEOFF12, - .ARM64_RELOC_POINTER_TO_GOT, - => true, - else => false, - }, - .x86_64 => break :got switch (@intToEnum(macho.reloc_type_x86_64, rel.@"type")) { - .X86_64_RELOC_GOT, .X86_64_RELOC_GOT_LOAD => true, - else => false, - }, - else => unreachable, + const target_atom = (try rel.getTargetAtom(macho_file)) orelse { + // If there is no atom for target, we still need to check for special, atom-less + // symbols such as `___dso_handle`. + const target_name = macho_file.getSymbolName(rel.target); + if (macho_file.globals.contains(target_name)) { + const atomless_sym = macho_file.getSymbol(rel.target); + log.debug(" | atomless target '{s}'", .{target_name}); + break :blk atomless_sym.n_value; } + log.debug(" | undef target '{s}'", .{target_name}); + break :blk 0; }; - - if (is_via_got) { - const got_index = macho_file.got_entries_table.get(rel.target) orelse { - log.err("expected GOT entry for symbol", .{}); - switch (rel.target) { - .local => |sym_index| log.err(" local @{d}", .{sym_index}), - .global => |n_strx| log.err(" global @'{s}'", .{macho_file.getString(n_strx)}), + log.debug(" | target ATOM(%{d}, '{s}') in object({d})", .{ + target_atom.sym_index, + target_atom.getName(macho_file), + target_atom.file, + }); + // If `rel.target` is contained within the target atom, pull its address value. + const target_sym = if (target_atom.isSymbolContained(rel.target, macho_file)) + macho_file.getSymbol(rel.target) + else + target_atom.getSymbol(macho_file); + const base_address: u64 = if (is_tlv) base_address: { + // For TLV relocations, the value specified as a relocation is the displacement from the + // TLV initializer (either value in __thread_data or zero-init in __thread_bss) to the first + // defined TLV template init section in the following order: + // * wrt to __thread_data if defined, then + // * wrt to __thread_bss + const sect_id: u16 = sect_id: { + if (macho_file.tlv_data_section_index) |i| { + break :sect_id i; + } else if (macho_file.tlv_bss_section_index) |i| { + break :sect_id i; + } else { + log.err("threadlocal variables present but no initializer sections found", .{}); + log.err(" __thread_data not found", .{}); + log.err(" __thread_bss not found", .{}); + return error.FailedToResolveRelocationTarget; } - log.err(" this is an internal linker error", .{}); - return error.FailedToResolveRelocationTarget; }; - const atom = macho_file.got_entries.items[got_index].atom; - break :blk macho_file.locals.items[atom.local_sym_index].n_value; - } - - switch (rel.target) { - .local => |sym_index| { - const sym = macho_file.locals.items[sym_index]; - const is_tlv = is_tlv: { - const source_sym = macho_file.locals.items[self.local_sym_index]; - const match = macho_file.section_ordinals.keys()[source_sym.n_sect - 1]; - const seg = macho_file.load_commands.items[match.seg].segment; - const sect = seg.sections.items[match.sect]; - break :is_tlv sect.type_() == macho.S_THREAD_LOCAL_VARIABLES; - }; - if (is_tlv) { - // For TLV relocations, the value specified as a relocation is the displacement from the - // TLV initializer (either value in __thread_data or zero-init in __thread_bss) to the first - // defined TLV template init section in the following order: - // * wrt to __thread_data if defined, then - // * wrt to __thread_bss - const seg = macho_file.load_commands.items[macho_file.data_segment_cmd_index.?].segment; - const base_address = inner: { - if (macho_file.tlv_data_section_index) |i| { - break :inner seg.sections.items[i].addr; - } else if (macho_file.tlv_bss_section_index) |i| { - break :inner seg.sections.items[i].addr; - } else { - log.err("threadlocal variables present but no initializer sections found", .{}); - log.err(" __thread_data not found", .{}); - log.err(" __thread_bss not found", .{}); - return error.FailedToResolveRelocationTarget; - } - }; - break :blk sym.n_value - base_address; - } - break :blk sym.n_value; - }, - .global => |n_strx| { - // TODO Still trying to figure out how to possibly use stubs for local symbol indirection with - // branching instructions. If it is not possible, then the best course of action is to - // resurrect the former approach of defering creating synthethic atoms in __got and __la_symbol_ptr - // sections until we resolve the relocations. - const resolv = macho_file.symbol_resolver.get(n_strx).?; - switch (resolv.where) { - .global => break :blk macho_file.globals.items[resolv.where_index].n_value, - .undef => { - if (macho_file.stubs_table.get(n_strx)) |stub_index| { - const atom = macho_file.stubs.items[stub_index]; - break :blk macho_file.locals.items[atom.local_sym_index].n_value; - } else { - if (macho_file.tlv_ptr_entries_table.get(rel.target)) |tlv_ptr_index| { - is_via_thread_ptrs = true; - const atom = macho_file.tlv_ptr_entries.items[tlv_ptr_index].atom; - break :blk macho_file.locals.items[atom.local_sym_index].n_value; - } - break :blk 0; - } - }, - } - }, - } + break :base_address macho_file.getSection(.{ + .seg = macho_file.data_segment_cmd_index.?, + .sect = sect_id, + }).addr; + } else 0; + break :blk target_sym.n_value - base_address; }; - log.debug(" | source_addr = 0x{x}", .{source_addr}); - log.debug(" | target_addr = 0x{x}", .{target_addr}); + log.debug(" | source_addr = 0x{x}", .{source_addr}); + log.debug(" | target_addr = 0x{x}", .{target_addr}); switch (arch) { .aarch64 => { @@ -933,7 +836,7 @@ pub fn resolveRelocs(self: *Atom, macho_file: *MachO) !void { } }; const narrowed = @truncate(u12, @intCast(u64, actual_target_addr)); - var inst = if (is_via_thread_ptrs) blk: { + var inst = if (macho_file.tlv_ptr_entries_table.contains(rel.target)) blk: { const offset = try math.divExact(u12, narrowed, 8); break :blk aarch64.Instruction{ .load_store_register = .{ @@ -966,7 +869,7 @@ pub fn resolveRelocs(self: *Atom, macho_file: *MachO) !void { .ARM64_RELOC_UNSIGNED => { const result = blk: { if (rel.subtractor) |subtractor| { - const sym = macho_file.locals.items[subtractor]; + const sym = macho_file.getSymbol(subtractor); break :blk @intCast(i64, target_addr) - @intCast(i64, sym.n_value) + rel.addend; } else { break :blk @intCast(i64, target_addr) + rel.addend; @@ -1004,7 +907,7 @@ pub fn resolveRelocs(self: *Atom, macho_file: *MachO) !void { mem.writeIntLittle(u32, self.code.items[rel.offset..][0..4], @bitCast(u32, displacement)); }, .X86_64_RELOC_TLV => { - if (!is_via_thread_ptrs) { + if (!macho_file.tlv_ptr_entries_table.contains(rel.target)) { // We need to rewrite the opcode from movq to leaq. self.code.items[rel.offset - 2] = 0x8d; } @@ -1036,7 +939,7 @@ pub fn resolveRelocs(self: *Atom, macho_file: *MachO) !void { .X86_64_RELOC_UNSIGNED => { const result = blk: { if (rel.subtractor) |subtractor| { - const sym = macho_file.locals.items[subtractor]; + const sym = macho_file.getSymbol(subtractor); break :blk @intCast(i64, target_addr) - @intCast(i64, sym.n_value) + rel.addend; } else { break :blk @intCast(i64, target_addr) + rel.addend; diff --git a/src/link/MachO/DebugSymbols.zig b/src/link/MachO/DebugSymbols.zig index 5e5aca26c1..0ef03c5f32 100644 --- a/src/link/MachO/DebugSymbols.zig +++ b/src/link/MachO/DebugSymbols.zig @@ -17,6 +17,7 @@ const Allocator = mem.Allocator; const Dwarf = @import("../Dwarf.zig"); const MachO = @import("../MachO.zig"); const Module = @import("../../Module.zig"); +const StringTable = @import("../strtab.zig").StringTable; const TextBlock = MachO.TextBlock; const Type = @import("../../type.zig").Type; @@ -59,6 +60,8 @@ debug_aranges_section_dirty: bool = false, debug_info_header_dirty: bool = false, debug_line_header_dirty: bool = false, +strtab: StringTable(.link) = .{}, + relocs: std.ArrayListUnmanaged(Reloc) = .{}, pub const Reloc = struct { @@ -93,6 +96,7 @@ pub fn populateMissingMetadata(self: *DebugSymbols, allocator: Allocator) !void .strsize = 0, }, }); + try self.strtab.buffer.append(allocator, 0); self.load_commands_dirty = true; } @@ -269,22 +273,30 @@ pub fn flushModule(self: *DebugSymbols, allocator: Allocator, options: link.Opti for (self.relocs.items) |*reloc| { const sym = switch (reloc.@"type") { - .direct_load => self.base.locals.items[reloc.target], + .direct_load => self.base.getSymbol(.{ .sym_index = reloc.target, .file = null }), .got_load => blk: { - const got_index = self.base.got_entries_table.get(.{ .local = reloc.target }).?; - const got_entry = self.base.got_entries.items[got_index]; - break :blk self.base.locals.items[got_entry.atom.local_sym_index]; + const got_index = self.base.got_entries_table.get(.{ .sym_index = reloc.target, .file = null }).?; + const got_atom = self.base.got_entries.items[got_index].atom; + break :blk got_atom.getSymbol(self.base); }, }; if (sym.n_value == reloc.prev_vaddr) continue; + const sym_name = switch (reloc.@"type") { + .direct_load => self.base.getSymbolName(.{ .sym_index = reloc.target, .file = null }), + .got_load => blk: { + const got_index = self.base.got_entries_table.get(.{ .sym_index = reloc.target, .file = null }).?; + const got_atom = self.base.got_entries.items[got_index].atom; + break :blk got_atom.getName(self.base); + }, + }; const seg = &self.load_commands.items[self.dwarf_segment_cmd_index.?].segment; const sect = &seg.sections.items[self.debug_info_section_index.?]; const file_offset = sect.offset + reloc.offset; log.debug("resolving relocation: {d}@{x} ('{s}') at offset {x}", .{ reloc.target, sym.n_value, - self.base.getString(sym.n_strx), + sym_name, file_offset, }); try self.file.pwriteAll(mem.asBytes(&sym.n_value), file_offset); @@ -367,6 +379,7 @@ pub fn deinit(self: *DebugSymbols, allocator: Allocator) void { } self.load_commands.deinit(allocator); self.dwarf.deinit(); + self.strtab.deinit(allocator); self.relocs.deinit(allocator); } @@ -582,21 +595,39 @@ fn writeSymbolTable(self: *DebugSymbols) !void { const tracy = trace(@src()); defer tracy.end(); + const gpa = self.base.base.allocator; const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].segment; const symtab = &self.load_commands.items[self.symtab_cmd_index.?].symtab; symtab.symoff = @intCast(u32, seg.inner.fileoff); - var locals = std.ArrayList(macho.nlist_64).init(self.base.base.allocator); + var locals = std.ArrayList(macho.nlist_64).init(gpa); defer locals.deinit(); - for (self.base.locals.items) |sym| { - if (sym.n_strx == 0) continue; - if (self.base.symbol_resolver.get(sym.n_strx)) |_| continue; - try locals.append(sym); + for (self.base.locals.items) |sym, sym_id| { + if (sym.n_strx == 0) continue; // no name, skip + if (sym.n_desc == MachO.N_DESC_GCED) continue; // GCed, skip + const sym_loc = MachO.SymbolWithLoc{ .sym_index = @intCast(u32, sym_id), .file = null }; + if (self.base.symbolIsTemp(sym_loc)) continue; // local temp symbol, skip + if (self.base.globals.contains(self.base.getSymbolName(sym_loc))) continue; // global symbol is either an export or import, skip + var out_sym = sym; + out_sym.n_strx = try self.strtab.insert(gpa, self.base.getSymbolName(sym_loc)); + try locals.append(out_sym); + } + + var exports = std.ArrayList(macho.nlist_64).init(gpa); + defer exports.deinit(); + + for (self.base.globals.values()) |global| { + const sym = self.base.getSymbol(global); + if (sym.undf()) continue; // import, skip + if (sym.n_desc == MachO.N_DESC_GCED) continue; // GCed, skip + var out_sym = sym; + out_sym.n_strx = try self.strtab.insert(gpa, self.base.getSymbolName(global)); + try exports.append(out_sym); } const nlocals = locals.items.len; - const nexports = self.base.globals.items.len; + const nexports = exports.items.len; const locals_off = symtab.symoff; const locals_size = nlocals * @sizeOf(macho.nlist_64); const exports_off = locals_off + locals_size; @@ -641,7 +672,7 @@ fn writeSymbolTable(self: *DebugSymbols) !void { try self.file.pwriteAll(mem.sliceAsBytes(locals.items), locals_off); log.debug("writing exported symbols from 0x{x} to 0x{x}", .{ exports_off, exports_size + exports_off }); - try self.file.pwriteAll(mem.sliceAsBytes(self.base.globals.items), exports_off); + try self.file.pwriteAll(mem.sliceAsBytes(exports.items), exports_off); self.load_commands_dirty = true; } @@ -655,7 +686,7 @@ fn writeStringTable(self: *DebugSymbols) !void { const symtab_size = @intCast(u32, symtab.nsyms * @sizeOf(macho.nlist_64)); symtab.stroff = symtab.symoff + symtab_size; - const needed_size = mem.alignForwardGeneric(u64, self.base.strtab.items.len, @alignOf(u64)); + const needed_size = mem.alignForwardGeneric(u64, self.strtab.buffer.items.len, @alignOf(u64)); symtab.strsize = @intCast(u32, needed_size); if (symtab_size + needed_size > seg.inner.filesize) { @@ -692,7 +723,7 @@ fn writeStringTable(self: *DebugSymbols) !void { log.debug("writing string table from 0x{x} to 0x{x}", .{ symtab.stroff, symtab.stroff + symtab.strsize }); - try self.file.pwriteAll(self.base.strtab.items, symtab.stroff); + try self.file.pwriteAll(self.strtab.buffer.items, symtab.stroff); self.load_commands_dirty = true; } diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index 305ae25791..cab5ab63df 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -47,7 +47,7 @@ dwarf_debug_line_index: ?u16 = null, dwarf_debug_line_str_index: ?u16 = null, dwarf_debug_ranges_index: ?u16 = null, -symtab: []const macho.nlist_64 = &.{}, +symtab: std.ArrayListUnmanaged(macho.nlist_64) = .{}, strtab: []const u8 = &.{}, data_in_code_entries: []const macho.data_in_code_entry = &.{}, @@ -57,17 +57,13 @@ tu_name: ?[]const u8 = null, tu_comp_dir: ?[]const u8 = null, mtime: ?u64 = null, -contained_atoms: std.ArrayListUnmanaged(*Atom) = .{}, -start_atoms: std.AutoHashMapUnmanaged(MachO.MatchingSection, *Atom) = .{}, -end_atoms: std.AutoHashMapUnmanaged(MachO.MatchingSection, *Atom) = .{}, sections_as_symbols: std.AutoHashMapUnmanaged(u16, u32) = .{}, -// TODO symbol mapping and its inverse can probably be simple arrays -// instead of hash maps. -symbol_mapping: std.AutoHashMapUnmanaged(u32, u32) = .{}, -reverse_symbol_mapping: std.AutoHashMapUnmanaged(u32, u32) = .{}, +/// List of atoms that map to the symbols parsed from this object file. +managed_atoms: std.ArrayListUnmanaged(*Atom) = .{}, -analyzed: bool = false, +/// Table of atoms belonging to this object file indexed by the symbol index. +atom_by_index_table: std.AutoHashMapUnmanaged(u32, *Atom) = .{}, const DebugInfo = struct { inner: dwarf.DwarfInfo, @@ -135,97 +131,25 @@ const DebugInfo = struct { } }; -pub fn deinit(self: *Object, allocator: Allocator) void { +pub fn deinit(self: *Object, gpa: Allocator) void { for (self.load_commands.items) |*lc| { - lc.deinit(allocator); + lc.deinit(gpa); } - self.load_commands.deinit(allocator); - allocator.free(self.contents); - self.sections_as_symbols.deinit(allocator); - self.symbol_mapping.deinit(allocator); - self.reverse_symbol_mapping.deinit(allocator); - allocator.free(self.name); + self.load_commands.deinit(gpa); + gpa.free(self.contents); + self.sections_as_symbols.deinit(gpa); + self.atom_by_index_table.deinit(gpa); - self.contained_atoms.deinit(allocator); - self.start_atoms.deinit(allocator); - self.end_atoms.deinit(allocator); + for (self.managed_atoms.items) |atom| { + atom.deinit(gpa); + gpa.destroy(atom); + } + self.managed_atoms.deinit(gpa); + + gpa.free(self.name); if (self.debug_info) |*db| { - db.deinit(allocator); - } -} - -pub fn free(self: *Object, allocator: Allocator, macho_file: *MachO) void { - log.debug("freeObject {*}", .{self}); - - var it = self.end_atoms.iterator(); - while (it.next()) |entry| { - const match = entry.key_ptr.*; - const first_atom = self.start_atoms.get(match).?; - const last_atom = entry.value_ptr.*; - var atom = first_atom; - - while (true) { - if (atom.local_sym_index != 0) { - macho_file.locals_free_list.append(allocator, atom.local_sym_index) catch {}; - const local = &macho_file.locals.items[atom.local_sym_index]; - local.* = .{ - .n_strx = 0, - .n_type = 0, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }; - _ = macho_file.atom_by_index_table.remove(atom.local_sym_index); - _ = macho_file.gc_roots.remove(atom); - - for (atom.contained.items) |sym_off| { - _ = macho_file.atom_by_index_table.remove(sym_off.local_sym_index); - } - - atom.local_sym_index = 0; - } - if (atom == last_atom) { - break; - } - if (atom.next) |next| { - atom = next; - } else break; - } - } - - self.freeAtoms(macho_file); -} - -fn freeAtoms(self: *Object, macho_file: *MachO) void { - var it = self.end_atoms.iterator(); - while (it.next()) |entry| { - const match = entry.key_ptr.*; - var first_atom: *Atom = self.start_atoms.get(match).?; - var last_atom: *Atom = entry.value_ptr.*; - - if (macho_file.atoms.getPtr(match)) |atom_ptr| { - if (atom_ptr.* == last_atom) { - if (first_atom.prev) |prev| { - // TODO shrink the section size here - atom_ptr.* = prev; - } else { - _ = macho_file.atoms.fetchRemove(match); - } - } - } - - if (first_atom.prev) |prev| { - prev.next = last_atom.next; - } else { - first_atom.prev = null; - } - - if (last_atom.next) |next| { - next.prev = last_atom.prev; - } else { - last_atom.next = null; - } + db.deinit(gpa); } } @@ -327,24 +251,40 @@ pub fn parse(self: *Object, allocator: Allocator, target: std.Target) !void { self.load_commands.appendAssumeCapacity(cmd); } - self.parseSymtab(); + try self.parseSymtab(allocator); self.parseDataInCode(); try self.parseDebugInfo(allocator); } -const NlistWithIndex = struct { - nlist: macho.nlist_64, +const Context = struct { + symtab: []const macho.nlist_64, + strtab: []const u8, +}; + +const SymbolAtIndex = struct { index: u32, - fn lessThan(_: void, lhs: NlistWithIndex, rhs: NlistWithIndex) bool { + fn getSymbol(self: SymbolAtIndex, ctx: Context) macho.nlist_64 { + return ctx.symtab[self.index]; + } + + fn getSymbolName(self: SymbolAtIndex, ctx: Context) []const u8 { + const sym = self.getSymbol(ctx); + if (sym.n_strx == 0) return ""; + return mem.sliceTo(@ptrCast([*:0]const u8, ctx.strtab.ptr + sym.n_strx), 0); + } + + fn lessThan(ctx: Context, lhs_index: SymbolAtIndex, rhs_index: SymbolAtIndex) bool { // We sort by type: defined < undefined, and // afterwards by address in each group. Normally, dysymtab should // be enough to guarantee the sort, but turns out not every compiler // is kind enough to specify the symbols in the correct order. - if (lhs.nlist.sect()) { - if (rhs.nlist.sect()) { + const lhs = lhs_index.getSymbol(ctx); + const rhs = rhs_index.getSymbol(ctx); + if (lhs.sect()) { + if (rhs.sect()) { // Same group, sort by address. - return lhs.nlist.n_value < rhs.nlist.n_value; + return lhs.n_value < rhs.n_value; } else { return true; } @@ -352,27 +292,35 @@ const NlistWithIndex = struct { return false; } } - - fn filterByAddress(symbols: []NlistWithIndex, start_addr: u64, end_addr: u64) []NlistWithIndex { - const Predicate = struct { - addr: u64, - - pub fn predicate(self: @This(), symbol: NlistWithIndex) bool { - return symbol.nlist.n_value >= self.addr; - } - }; - - const start = MachO.findFirst(NlistWithIndex, symbols, 0, Predicate{ - .addr = start_addr, - }); - const end = MachO.findFirst(NlistWithIndex, symbols, start, Predicate{ - .addr = end_addr, - }); - - return symbols[start..end]; - } }; +fn filterSymbolsByAddress( + indexes: []SymbolAtIndex, + start_addr: u64, + end_addr: u64, + ctx: Context, +) []SymbolAtIndex { + const Predicate = struct { + addr: u64, + ctx: Context, + + pub fn predicate(pred: @This(), index: SymbolAtIndex) bool { + return index.getSymbol(pred.ctx).n_value >= pred.addr; + } + }; + + const start = MachO.findFirst(SymbolAtIndex, indexes, 0, Predicate{ + .addr = start_addr, + .ctx = ctx, + }); + const end = MachO.findFirst(SymbolAtIndex, indexes, start, Predicate{ + .addr = end_addr, + .ctx = ctx, + }); + + return indexes[start..end]; +} + fn filterRelocs( relocs: []const macho.relocation_info, start_addr: u64, @@ -411,29 +359,32 @@ fn filterDice( return dices[start..end]; } -pub fn parseIntoAtoms(self: *Object, allocator: Allocator, macho_file: *MachO) !void { +/// Splits object into atoms assuming whole cache mode aka traditional linking mode. +pub fn splitIntoAtomsWhole(self: *Object, macho_file: *MachO, object_id: u32) !void { const tracy = trace(@src()); defer tracy.end(); + const gpa = macho_file.base.allocator; const seg = self.load_commands.items[self.segment_cmd_index.?].segment; - log.debug("analysing {s}", .{self.name}); + log.debug("splitting object({d}, {s}) into atoms: whole cache mode", .{ object_id, self.name }); // You would expect that the symbol table is at least pre-sorted based on symbol's type: // local < extern defined < undefined. Unfortunately, this is not guaranteed! For instance, // the GO compiler does not necessarily respect that therefore we sort immediately by type // and address within. - var sorted_all_nlists = try std.ArrayList(NlistWithIndex).initCapacity(allocator, self.symtab.len); - defer sorted_all_nlists.deinit(); + const context = Context{ + .symtab = self.getSourceSymtab(), + .strtab = self.strtab, + }; + var sorted_all_syms = try std.ArrayList(SymbolAtIndex).initCapacity(gpa, context.symtab.len); + defer sorted_all_syms.deinit(); - for (self.symtab) |nlist, index| { - sorted_all_nlists.appendAssumeCapacity(.{ - .nlist = nlist, - .index = @intCast(u32, index), - }); + for (context.symtab) |_, index| { + sorted_all_syms.appendAssumeCapacity(.{ .index = @intCast(u32, index) }); } - sort.sort(NlistWithIndex, sorted_all_nlists.items, {}, NlistWithIndex.lessThan); + sort.sort(SymbolAtIndex, sorted_all_syms.items, context, SymbolAtIndex.lessThan); // Well, shit, sometimes compilers skip the dysymtab load command altogether, meaning we // have to infer the start of undef section in the symtab ourselves. @@ -441,30 +392,36 @@ pub fn parseIntoAtoms(self: *Object, allocator: Allocator, macho_file: *MachO) ! const dysymtab = self.load_commands.items[cmd_index].dysymtab; break :blk dysymtab.iundefsym; } else blk: { - var iundefsym: usize = sorted_all_nlists.items.len; + var iundefsym: usize = sorted_all_syms.items.len; while (iundefsym > 0) : (iundefsym -= 1) { - const nlist = sorted_all_nlists.items[iundefsym - 1]; - if (nlist.nlist.sect()) break; + const sym = sorted_all_syms.items[iundefsym - 1].getSymbol(context); + if (sym.sect()) break; } break :blk iundefsym; }; // We only care about defined symbols, so filter every other out. - const sorted_nlists = sorted_all_nlists.items[0..iundefsym]; - + const sorted_syms = sorted_all_syms.items[0..iundefsym]; const dead_strip = macho_file.base.options.gc_sections orelse false; const subsections_via_symbols = self.header.flags & macho.MH_SUBSECTIONS_VIA_SYMBOLS != 0 and (macho_file.base.options.optimize_mode != .Debug or dead_strip); + // const subsections_via_symbols = self.header.flags & macho.MH_SUBSECTIONS_VIA_SYMBOLS != 0; for (seg.sections.items) |sect, id| { const sect_id = @intCast(u8, id); - log.debug("parsing section '{s},{s}' into Atoms", .{ sect.segName(), sect.sectName() }); + log.debug("splitting section '{s},{s}' into atoms", .{ sect.segName(), sect.sectName() }); // Get matching segment/section in the final artifact. const match = (try macho_file.getMatchingSection(sect)) orelse { - log.debug("unhandled section", .{}); + log.debug(" unhandled section", .{}); continue; }; + const target_sect = macho_file.getSection(match); + log.debug(" output sect({d}, '{s},{s}')", .{ + macho_file.getSectionOrdinal(match), + target_sect.segName(), + target_sect.sectName(), + }); const is_zerofill = blk: { const section_type = sect.type_(); @@ -482,10 +439,11 @@ pub fn parseIntoAtoms(self: *Object, allocator: Allocator, macho_file: *MachO) ! ); // Symbols within this section only. - const filtered_nlists = NlistWithIndex.filterByAddress( - sorted_nlists, + const filtered_syms = filterSymbolsByAddress( + sorted_syms, sect.addr, sect.addr + sect.size, + context, ); macho_file.has_dices = macho_file.has_dices or blk: { @@ -498,32 +456,33 @@ pub fn parseIntoAtoms(self: *Object, allocator: Allocator, macho_file: *MachO) ! }; macho_file.has_stabs = macho_file.has_stabs or self.debug_info != null; - if (subsections_via_symbols and filtered_nlists.len > 0) { + if (subsections_via_symbols and filtered_syms.len > 0) { // If the first nlist does not match the start of the section, // then we need to encapsulate the memory range [section start, first symbol) // as a temporary symbol and insert the matching Atom. - const first_nlist = filtered_nlists[0].nlist; - if (first_nlist.n_value > sect.addr) { - const local_sym_index = self.sections_as_symbols.get(sect_id) orelse blk: { - const local_sym_index = @intCast(u32, macho_file.locals.items.len); - try macho_file.locals.append(allocator, .{ + const first_sym = filtered_syms[0].getSymbol(context); + if (first_sym.n_value > sect.addr) { + const sym_index = self.sections_as_symbols.get(sect_id) orelse blk: { + const sym_index = @intCast(u32, self.symtab.items.len); + try self.symtab.append(gpa, .{ .n_strx = 0, .n_type = macho.N_SECT, - .n_sect = @intCast(u8, macho_file.section_ordinals.getIndex(match).? + 1), + .n_sect = macho_file.getSectionOrdinal(match), .n_desc = 0, .n_value = sect.addr, }); - try self.sections_as_symbols.putNoClobber(allocator, sect_id, local_sym_index); - break :blk local_sym_index; + try self.sections_as_symbols.putNoClobber(gpa, sect_id, sym_index); + break :blk sym_index; }; - const atom_size = first_nlist.n_value - sect.addr; + const atom_size = first_sym.n_value - sect.addr; const atom_code: ?[]const u8 = if (code) |cc| cc[0..atom_size] else null; - try self.parseIntoAtom( - allocator, - local_sym_index, + const atom = try self.createAtomFromSubsection( + macho_file, + object_id, + sym_index, atom_size, sect.@"align", atom_code, @@ -531,33 +490,27 @@ pub fn parseIntoAtoms(self: *Object, allocator: Allocator, macho_file: *MachO) ! &.{}, match, sect, - macho_file, ); + try macho_file.addAtomToSection(atom, match); } - var next_nlist_count: usize = 0; - while (next_nlist_count < filtered_nlists.len) { - const next_nlist = filtered_nlists[next_nlist_count]; - const addr = next_nlist.nlist.n_value; - const atom_nlists = NlistWithIndex.filterByAddress( - filtered_nlists[next_nlist_count..], + var next_sym_count: usize = 0; + while (next_sym_count < filtered_syms.len) { + const next_sym = filtered_syms[next_sym_count].getSymbol(context); + const addr = next_sym.n_value; + const atom_syms = filterSymbolsByAddress( + filtered_syms[next_sym_count..], addr, addr + 1, + context, ); - next_nlist_count += atom_nlists.len; - - const local_sym_index = @intCast(u32, macho_file.locals.items.len); - try macho_file.locals.append(allocator, .{ - .n_strx = 0, - .n_type = macho.N_SECT, - .n_sect = @intCast(u8, macho_file.section_ordinals.getIndex(match).? + 1), - .n_desc = 0, - .n_value = addr, - }); + next_sym_count += atom_syms.len; + assert(atom_syms.len > 0); + const sym_index = atom_syms[0].index; const atom_size = blk: { - const end_addr = if (next_nlist_count < filtered_nlists.len) - filtered_nlists[next_nlist_count].nlist.n_value + const end_addr = if (next_sym_count < filtered_syms.len) + filtered_syms[next_sym_count].getSymbol(context).n_value else sect.addr + sect.size; break :blk end_addr - addr; @@ -570,86 +523,91 @@ pub fn parseIntoAtoms(self: *Object, allocator: Allocator, macho_file: *MachO) ! math.min(@ctz(u64, addr), sect.@"align") else sect.@"align"; - try self.parseIntoAtom( - allocator, - local_sym_index, + const atom = try self.createAtomFromSubsection( + macho_file, + object_id, + sym_index, atom_size, atom_align, atom_code, relocs, - atom_nlists, + atom_syms[1..], match, sect, - macho_file, ); + try macho_file.addAtomToSection(atom, match); } } else { // If there is no symbol to refer to this atom, we create // a temp one, unless we already did that when working out the relocations // of other atoms. - const local_sym_index = self.sections_as_symbols.get(sect_id) orelse blk: { - const local_sym_index = @intCast(u32, macho_file.locals.items.len); - try macho_file.locals.append(allocator, .{ + const sym_index = self.sections_as_symbols.get(sect_id) orelse blk: { + const sym_index = @intCast(u32, self.symtab.items.len); + try self.symtab.append(gpa, .{ .n_strx = 0, .n_type = macho.N_SECT, - .n_sect = @intCast(u8, macho_file.section_ordinals.getIndex(match).? + 1), + .n_sect = macho_file.getSectionOrdinal(match), .n_desc = 0, .n_value = sect.addr, }); - try self.sections_as_symbols.putNoClobber(allocator, sect_id, local_sym_index); - break :blk local_sym_index; + try self.sections_as_symbols.putNoClobber(gpa, sect_id, sym_index); + break :blk sym_index; }; - try self.parseIntoAtom( - allocator, - local_sym_index, + const atom = try self.createAtomFromSubsection( + macho_file, + object_id, + sym_index, sect.size, sect.@"align", code, relocs, - filtered_nlists, + filtered_syms, match, sect, - macho_file, ); + try macho_file.addAtomToSection(atom, match); } } } -fn parseIntoAtom( +fn createAtomFromSubsection( self: *Object, - allocator: Allocator, - local_sym_index: u32, + macho_file: *MachO, + object_id: u32, + sym_index: u32, size: u64, alignment: u32, code: ?[]const u8, relocs: []const macho.relocation_info, - nlists: []const NlistWithIndex, + indexes: []const SymbolAtIndex, match: MatchingSection, sect: macho.section_64, - macho_file: *MachO, -) !void { - const sym = macho_file.locals.items[local_sym_index]; - const align_pow_2 = try math.powi(u32, 2, alignment); - const aligned_size = mem.alignForwardGeneric(u64, size, align_pow_2); - const atom = try macho_file.createEmptyAtom(local_sym_index, aligned_size, alignment); +) !*Atom { + const gpa = macho_file.base.allocator; + const sym = &self.symtab.items[sym_index]; + const atom = try MachO.createEmptyAtom(gpa, sym_index, size, alignment); + atom.file = object_id; + sym.n_sect = macho_file.getSectionOrdinal(match); + + try self.atom_by_index_table.putNoClobber(gpa, sym_index, atom); + try self.managed_atoms.append(gpa, atom); if (code) |cc| { + assert(size == cc.len); mem.copy(u8, atom.code.items, cc); } const base_offset = sym.n_value - sect.addr; const filtered_relocs = filterRelocs(relocs, base_offset, base_offset + size); try atom.parseRelocs(filtered_relocs, .{ + .macho_file = macho_file, .base_addr = sect.addr, .base_offset = @intCast(i32, base_offset), - .allocator = allocator, - .object = self, - .macho_file = macho_file, }); if (macho_file.has_dices) { const dices = filterDice(self.data_in_code_entries, sym.n_value, sym.n_value + size); - try atom.dices.ensureTotalCapacity(allocator, dices.len); + try atom.dices.ensureTotalCapacity(gpa, dices.len); for (dices) |dice| { atom.dices.appendAssumeCapacity(.{ @@ -665,19 +623,14 @@ fn parseIntoAtom( // the filtered symbols and note which symbol is contained within so that // we can properly allocate addresses down the line. // While we're at it, we need to update segment,section mapping of each symbol too. - try atom.contained.ensureTotalCapacity(allocator, nlists.len); - - for (nlists) |nlist_with_index| { - const nlist = nlist_with_index.nlist; - const sym_index = self.symbol_mapping.get(nlist_with_index.index) orelse unreachable; - const this_sym = &macho_file.locals.items[sym_index]; - this_sym.n_sect = @intCast(u8, macho_file.section_ordinals.getIndex(match).? + 1); + try atom.contained.ensureTotalCapacity(gpa, indexes.len + 1); + { const stab: ?Atom.Stab = if (self.debug_info) |di| blk: { // TODO there has to be a better to handle this. for (di.inner.func_list.items) |func| { if (func.pc_range) |range| { - if (nlist.n_value >= range.start and nlist.n_value < range.end) { + if (sym.n_value >= range.start and sym.n_value < range.end) { break :blk Atom.Stab{ .function = range.end - range.start, }; @@ -690,12 +643,39 @@ fn parseIntoAtom( } else null; atom.contained.appendAssumeCapacity(.{ - .local_sym_index = sym_index, - .offset = nlist.n_value - sym.n_value, + .sym_index = sym_index, + .offset = 0, + .stab = stab, + }); + } + + for (indexes) |inner_sym_index| { + const inner_sym = &self.symtab.items[inner_sym_index.index]; + inner_sym.n_sect = macho_file.getSectionOrdinal(match); + + const stab: ?Atom.Stab = if (self.debug_info) |di| blk: { + // TODO there has to be a better to handle this. + for (di.inner.func_list.items) |func| { + if (func.pc_range) |range| { + if (inner_sym.n_value >= range.start and inner_sym.n_value < range.end) { + break :blk Atom.Stab{ + .function = range.end - range.start, + }; + } + } + } + // TODO + // if (zld.globals.contains(zld.getString(sym.strx))) break :blk .global; + break :blk .static; + } else null; + + atom.contained.appendAssumeCapacity(.{ + .sym_index = inner_sym_index.index, + .offset = inner_sym.n_value - sym.n_value, .stab = stab, }); - try macho_file.atom_by_index_table.putNoClobber(allocator, sym_index, atom); + try self.atom_by_index_table.putNoClobber(gpa, inner_sym_index.index, atom); } const is_gc_root = blk: { @@ -714,30 +694,28 @@ fn parseIntoAtom( } }; if (is_gc_root) { - try macho_file.gc_roots.putNoClobber(allocator, atom, {}); + try macho_file.gc_roots.putNoClobber(gpa, atom, {}); } - if (!self.start_atoms.contains(match)) { - try self.start_atoms.putNoClobber(allocator, match, atom); - } - - if (self.end_atoms.getPtr(match)) |last| { - last.*.next = atom; - atom.prev = last.*; - last.* = atom; - } else { - try self.end_atoms.putNoClobber(allocator, match, atom); - } - try self.contained_atoms.append(allocator, atom); + return atom; } -fn parseSymtab(self: *Object) void { +fn parseSymtab(self: *Object, allocator: Allocator) !void { const index = self.symtab_cmd_index orelse return; const symtab = self.load_commands.items[index].symtab; + try self.symtab.appendSlice(allocator, self.getSourceSymtab()); + self.strtab = self.contents[symtab.stroff..][0..symtab.strsize]; +} + +fn getSourceSymtab(self: *Object) []const macho.nlist_64 { + const index = self.symtab_cmd_index orelse return &[0]macho.nlist_64{}; + const symtab = self.load_commands.items[index].symtab; const symtab_size = @sizeOf(macho.nlist_64) * symtab.nsyms; const raw_symtab = self.contents[symtab.symoff..][0..symtab_size]; - self.symtab = mem.bytesAsSlice(macho.nlist_64, @alignCast(@alignOf(macho.nlist_64), raw_symtab)); - self.strtab = self.contents[symtab.stroff..][0..symtab.strsize]; + return mem.bytesAsSlice( + macho.nlist_64, + @alignCast(@alignOf(macho.nlist_64), raw_symtab), + ); } fn parseDebugInfo(self: *Object, allocator: Allocator) !void { @@ -783,8 +761,7 @@ fn parseDataInCode(self: *Object) void { } fn getSectionContents(self: Object, sect_id: u16) []const u8 { - const seg = self.load_commands.items[self.segment_cmd_index.?].segment; - const sect = seg.sections.items[sect_id]; + const sect = self.getSection(sect_id); log.debug("getting {s},{s} data at 0x{x} - 0x{x}", .{ sect.segName(), sect.sectName(), @@ -798,3 +775,9 @@ pub fn getString(self: Object, off: u32) []const u8 { assert(off < self.strtab.len); return mem.sliceTo(@ptrCast([*:0]const u8, self.strtab.ptr + off), 0); } + +pub fn getSection(self: Object, n_sect: u16) macho.section_64 { + const seg = self.load_commands.items[self.segment_cmd_index.?].segment; + assert(n_sect < seg.sections.items.len); + return seg.sections.items[n_sect]; +} diff --git a/src/link/strtab.zig b/src/link/strtab.zig new file mode 100644 index 0000000000..ae9b00027e --- /dev/null +++ b/src/link/strtab.zig @@ -0,0 +1,113 @@ +const std = @import("std"); +const mem = std.mem; + +const Allocator = mem.Allocator; +const StringIndexAdapter = std.hash_map.StringIndexAdapter; +const StringIndexContext = std.hash_map.StringIndexContext; + +pub fn StringTable(comptime log_scope: @Type(.EnumLiteral)) type { + return struct { + const Self = @This(); + + const log = std.log.scoped(log_scope); + + buffer: std.ArrayListUnmanaged(u8) = .{}, + table: std.HashMapUnmanaged(u32, bool, StringIndexContext, std.hash_map.default_max_load_percentage) = .{}, + + pub fn deinit(self: *Self, gpa: Allocator) void { + self.buffer.deinit(gpa); + self.table.deinit(gpa); + } + + pub fn toOwnedSlice(self: *Self, gpa: Allocator) []const u8 { + const result = self.buffer.toOwnedSlice(gpa); + self.table.clearRetainingCapacity(); + return result; + } + + pub const PrunedResult = struct { + buffer: []const u8, + idx_map: std.AutoHashMap(u32, u32), + }; + + pub fn toPrunedResult(self: *Self, gpa: Allocator) !PrunedResult { + var buffer = std.ArrayList(u8).init(gpa); + defer buffer.deinit(); + try buffer.ensureTotalCapacity(self.buffer.items.len); + buffer.appendAssumeCapacity(0); + + var idx_map = std.AutoHashMap(u32, u32).init(gpa); + errdefer idx_map.deinit(); + try idx_map.ensureTotalCapacity(self.table.count()); + + var it = self.table.iterator(); + while (it.next()) |entry| { + const off = entry.key_ptr.*; + const save = entry.value_ptr.*; + if (!save) continue; + const new_off = @intCast(u32, buffer.items.len); + buffer.appendSliceAssumeCapacity(self.getAssumeExists(off)); + idx_map.putAssumeCapacityNoClobber(off, new_off); + } + + self.buffer.clearRetainingCapacity(); + self.table.clearRetainingCapacity(); + + return PrunedResult{ + .buffer = buffer.toOwnedSlice(), + .idx_map = idx_map, + }; + } + + pub fn insert(self: *Self, gpa: Allocator, string: []const u8) !u32 { + const gop = try self.table.getOrPutContextAdapted(gpa, @as([]const u8, string), StringIndexAdapter{ + .bytes = &self.buffer, + }, StringIndexContext{ + .bytes = &self.buffer, + }); + if (gop.found_existing) { + const off = gop.key_ptr.*; + gop.value_ptr.* = true; + log.debug("reusing string '{s}' at offset 0x{x}", .{ string, off }); + return off; + } + + try self.buffer.ensureUnusedCapacity(gpa, string.len + 1); + const new_off = @intCast(u32, self.buffer.items.len); + + log.debug("writing new string '{s}' at offset 0x{x}", .{ string, new_off }); + + self.buffer.appendSliceAssumeCapacity(string); + self.buffer.appendAssumeCapacity(0); + + gop.key_ptr.* = new_off; + gop.value_ptr.* = true; + + return new_off; + } + + pub fn delete(self: *Self, string: []const u8) void { + const value_ptr = self.table.getPtrAdapted(@as([]const u8, string), StringIndexAdapter{ + .bytes = &self.buffer, + }) orelse return; + value_ptr.* = false; + log.debug("marked '{s}' for deletion", .{string}); + } + + pub fn getOffset(self: *Self, string: []const u8) ?u32 { + return self.table.getKeyAdapted(string, StringIndexAdapter{ + .bytes = &self.buffer, + }); + } + + pub fn get(self: Self, off: u32) ?[]const u8 { + log.debug("getting string at 0x{x}", .{off}); + if (off >= self.buffer.items.len) return null; + return mem.sliceTo(@ptrCast([*:0]const u8, self.buffer.items.ptr + off), 0); + } + + pub fn getAssumeExists(self: Self, off: u32) []const u8 { + return self.get(off) orelse unreachable; + } + }; +} From eeb6d8f0457b42cef560c1e4efeca69c0fe276fe Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 14 Jul 2022 09:05:45 +0200 Subject: [PATCH 08/27] macho: fix compilation issues on 32bit hosts --- src/link/MachO/Object.zig | 36 +++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index cab5ab63df..7170e8efdb 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -77,29 +77,29 @@ const DebugInfo = struct { pub fn parseFromObject(allocator: Allocator, object: *const Object) !?DebugInfo { var debug_info = blk: { const index = object.dwarf_debug_info_index orelse return null; - break :blk object.getSectionContents(index); + break :blk try object.getSectionContents(index); }; var debug_abbrev = blk: { const index = object.dwarf_debug_abbrev_index orelse return null; - break :blk object.getSectionContents(index); + break :blk try object.getSectionContents(index); }; var debug_str = blk: { const index = object.dwarf_debug_str_index orelse return null; - break :blk object.getSectionContents(index); + break :blk try object.getSectionContents(index); }; var debug_line = blk: { const index = object.dwarf_debug_line_index orelse return null; - break :blk object.getSectionContents(index); + break :blk try object.getSectionContents(index); }; var debug_line_str = blk: { if (object.dwarf_debug_line_str_index) |ind| { - break :blk object.getSectionContents(ind); + break :blk try object.getSectionContents(ind); } break :blk &[0]u8{}; }; var debug_ranges = blk: { if (object.dwarf_debug_ranges_index) |ind| { - break :blk object.getSectionContents(ind); + break :blk try object.getSectionContents(ind); } break :blk &[0]u8{}; }; @@ -429,7 +429,7 @@ pub fn splitIntoAtomsWhole(self: *Object, macho_file: *MachO, object_id: u32) !v }; // Read section's code - const code: ?[]const u8 = if (!is_zerofill) self.getSectionContents(sect_id) else null; + const code: ?[]const u8 = if (!is_zerofill) try self.getSectionContents(sect_id) else null; // Read section's list of relocations const raw_relocs = self.contents[sect.reloff..][0 .. sect.nreloc * @sizeOf(macho.relocation_info)]; @@ -475,10 +475,10 @@ pub fn splitIntoAtomsWhole(self: *Object, macho_file: *MachO, object_id: u32) !v break :blk sym_index; }; const atom_size = first_sym.n_value - sect.addr; - const atom_code: ?[]const u8 = if (code) |cc| - cc[0..atom_size] - else - null; + const atom_code: ?[]const u8 = if (code) |cc| blk: { + const size = math.cast(usize, atom_size) orelse return error.Overflow; + break :blk cc[0..size]; + } else null; const atom = try self.createAtomFromSubsection( macho_file, object_id, @@ -515,10 +515,11 @@ pub fn splitIntoAtomsWhole(self: *Object, macho_file: *MachO, object_id: u32) !v sect.addr + sect.size; break :blk end_addr - addr; }; - const atom_code: ?[]const u8 = if (code) |cc| - cc[addr - sect.addr ..][0..atom_size] - else - null; + const atom_code: ?[]const u8 = if (code) |cc| blk: { + const start = math.cast(usize, addr - sect.addr) orelse return error.Overflow; + const size = math.cast(usize, atom_size) orelse return error.Overflow; + break :blk cc[start..][0..size]; + } else null; const atom_align = if (addr > 0) math.min(@ctz(u64, addr), sect.@"align") else @@ -760,15 +761,16 @@ fn parseDataInCode(self: *Object) void { ); } -fn getSectionContents(self: Object, sect_id: u16) []const u8 { +fn getSectionContents(self: Object, sect_id: u16) error{Overflow}![]const u8 { const sect = self.getSection(sect_id); + const size = math.cast(usize, sect.size) orelse return error.Overflow; log.debug("getting {s},{s} data at 0x{x} - 0x{x}", .{ sect.segName(), sect.sectName(), sect.offset, sect.offset + sect.size, }); - return self.contents[sect.offset..][0..sect.size]; + return self.contents[sect.offset..][0..size]; } pub fn getString(self: Object, off: u32) []const u8 { From d80fcc8a0b5594a6eb0fb409f4e5e5f949eec2fe Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Tue, 12 Jul 2022 23:03:15 +0200 Subject: [PATCH 09/27] macho: rework symbol handling for incremental stage2 builds --- src/link/MachO.zig | 1047 +++++++++++++++++------------- src/link/MachO/Atom.zig | 18 +- src/link/MachO/DebugSymbols.zig | 4 +- src/link/MachO/Object.zig | 30 +- test/link/macho/objcpp/build.zig | 1 - 5 files changed, 640 insertions(+), 460 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 8967e2a3e3..7ae013e6bc 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -70,10 +70,10 @@ d_sym: ?DebugSymbols = null, /// For x86_64 that's 4KB, whereas for aarch64, that's 16KB. page_size: u16, -/// If true, the linker will preallocate several sections and segments before starting the linking -/// process. This is for example true for stage2 debug builds, however, this is false for stage1 -/// and potentially stage2 release builds in the future. -needs_prealloc: bool = true, +/// Mode of operation: incremental - will preallocate segments/sections and is compatible with +/// watch and HCS modes of operation; one_shot - will link relocatables in a traditional, one-shot +/// fashion (default for LLVM backend). +mode: enum { incremental, one_shot }, /// The absolute address of the entry point. entry_addr: ?u64 = null, @@ -153,7 +153,7 @@ rustc_section_size: u64 = 0, locals: std.ArrayListUnmanaged(macho.nlist_64) = .{}, globals: std.StringArrayHashMapUnmanaged(SymbolWithLoc) = .{}, -unresolved: std.AutoArrayHashMapUnmanaged(u32, void) = .{}, +unresolved: std.AutoArrayHashMapUnmanaged(u32, bool) = .{}, locals_free_list: std.ArrayListUnmanaged(u32) = .{}, @@ -161,7 +161,7 @@ dyld_stub_binder_index: ?u32 = null, dyld_private_atom: ?*Atom = null, stub_helper_preamble_atom: ?*Atom = null, -strtab: StringTable(.link) = .{}, +strtab: StringTable(.strtab) = .{}, tlv_ptr_entries: std.ArrayListUnmanaged(Entry) = .{}, tlv_ptr_entries_free_list: std.ArrayListUnmanaged(u32) = .{}, @@ -182,6 +182,12 @@ sections_order_dirty: bool = false, has_dices: bool = false, has_stabs: bool = false, +/// A helper var to indicate if we are at the start of the incremental updates, or +/// already somewhere further along the update-and-run chain. +/// TODO once we add opening a prelinked output binary from file, this will become +/// obsolete as we will carry on where we left off. +cold_start: bool = true, + section_ordinals: std.AutoArrayHashMapUnmanaged(MatchingSection, void) = .{}, /// A list of atoms that have surplus capacity. This list can have false @@ -387,7 +393,6 @@ pub fn createEmpty(gpa: Allocator, options: link.Options) !*MachO { const requires_adhoc_codesig = cpu_arch == .aarch64 and (os_tag == .macos or abi == .simulator); const use_llvm = build_options.have_llvm and options.use_llvm; const use_stage1 = build_options.is_stage1 and options.use_stage1; - const needs_prealloc = !(use_stage1 or use_llvm or options.cache_mode == .whole); const self = try gpa.create(MachO); errdefer gpa.destroy(self); @@ -400,8 +405,14 @@ pub fn createEmpty(gpa: Allocator, options: link.Options) !*MachO { .file = null, }, .page_size = page_size, - .code_signature = if (requires_adhoc_codesig) CodeSignature.init(page_size) else null, - .needs_prealloc = needs_prealloc, + .code_signature = if (requires_adhoc_codesig) + CodeSignature.init(page_size) + else + null, + .mode = if (use_stage1 or use_llvm or options.cache_mode == .whole) + .one_shot + else + .incremental, }; if (use_llvm and !use_stage1) { @@ -429,32 +440,198 @@ pub fn flush(self: *MachO, comp: *Compilation, prog_node: *std.Progress.Node) !v return error.TODOImplementWritingStaticLibFiles; } } - return self.flushModule(comp, prog_node); + + switch (self.mode) { + .one_shot => return self.linkOneShot(comp, prog_node), + .incremental => return self.flushModule(comp, prog_node), + } } pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.Node) !void { const tracy = trace(@src()); defer tracy.end(); - const use_stage1 = build_options.is_stage1 and self.base.options.use_stage1; - - if (build_options.have_llvm and !use_stage1) { + if (build_options.have_llvm) { if (self.llvm_object) |llvm_object| { - try llvm_object.flushModule(comp, prog_node); - - llvm_object.destroy(self.base.allocator); - self.llvm_object = null; - - if (self.base.options.output_mode == .Lib and self.base.options.link_mode == .Static) { - return; - } + return try llvm_object.flushModule(comp, prog_node); } } + var arena_allocator = std.heap.ArenaAllocator.init(self.base.allocator); + defer arena_allocator.deinit(); + const arena = arena_allocator.allocator(); + var sub_prog_node = prog_node.start("MachO Flush", 0); sub_prog_node.activate(); defer sub_prog_node.end(); + const module = self.base.options.module orelse return error.LinkingWithoutZigSourceUnimplemented; + + if (self.d_sym) |*d_sym| { + try d_sym.dwarf.flushModule(&self.base, module); + } + + var libs = std.StringArrayHashMap(SystemLib).init(arena); + try self.resolveLibSystem(arena, comp, &.{}, &libs); + + const id_symlink_basename = "zld.id"; + + const cache_dir_handle = module.zig_cache_artifact_directory.handle; + var man: Cache.Manifest = undefined; + defer if (!self.base.options.disable_lld_caching) man.deinit(); + + var digest: [Cache.hex_digest_len]u8 = undefined; + var cache_miss: bool = self.cold_start; + + if (!self.base.options.disable_lld_caching) { + man = comp.cache_parent.obtain(); + self.base.releaseLock(); + + man.hash.addListOfBytes(libs.keys()); + + _ = try man.hit(); + digest = man.final(); + + var prev_digest_buf: [digest.len]u8 = undefined; + const prev_digest: []u8 = Cache.readSmallFile( + cache_dir_handle, + id_symlink_basename, + &prev_digest_buf, + ) catch |err| blk: { + log.debug("MachO Zld new_digest={s} error: {s}", .{ + std.fmt.fmtSliceHexLower(&digest), + @errorName(err), + }); + // Handle this as a cache miss. + break :blk prev_digest_buf[0..0]; + }; + if (mem.eql(u8, prev_digest, &digest)) { + log.debug("MachO Zld digest={s} match - skipping parsing linker line objects", .{ + std.fmt.fmtSliceHexLower(&digest), + }); + self.base.lock = man.toOwnedLock(); + } else { + log.debug("MachO Zld prev_digest={s} new_digest={s}", .{ + std.fmt.fmtSliceHexLower(prev_digest), + std.fmt.fmtSliceHexLower(&digest), + }); + // We are about to change the output file to be different, so we invalidate the build hash now. + cache_dir_handle.deleteFile(id_symlink_basename) catch |err| switch (err) { + error.FileNotFound => {}, + else => |e| return e, + }; + cache_miss = true; + } + } + + if (cache_miss) { + var dependent_libs = std.fifo.LinearFifo(struct { + id: Dylib.Id, + parent: u16, + }, .Dynamic).init(self.base.allocator); + defer dependent_libs.deinit(); + try self.parseLibs(libs.keys(), libs.values(), self.base.options.sysroot, &dependent_libs); + try self.parseDependentLibs(self.base.options.sysroot, &dependent_libs); + } + + try self.createMhExecuteHeaderSymbol(); + try self.resolveDyldStubBinder(); + try self.createDyldPrivateAtom(); + try self.createStubHelperPreambleAtom(); + try self.resolveSymbolsInDylibs(); + try self.addCodeSignatureLC(); + + if (self.unresolved.count() > 0) { + return error.UndefinedSymbolReference; + } + + try self.allocateSpecialSymbols(); + + if (build_options.enable_logging) { + self.logSymtab(); + self.logSectionOrdinals(); + self.logAtoms(); + } + + try self.writeAtomsIncremental(); + + try self.setEntryPoint(); + try self.updateSectionOrdinals(); + try self.writeLinkeditSegment(); + + if (self.d_sym) |*d_sym| { + // Flush debug symbols bundle. + try d_sym.flushModule(self.base.allocator, self.base.options); + } + + // code signature and entitlements + if (self.base.options.entitlements) |path| { + if (self.code_signature) |*csig| { + try csig.addEntitlements(self.base.allocator, path); + csig.code_directory.ident = self.base.options.emit.?.sub_path; + } else { + var csig = CodeSignature.init(self.page_size); + try csig.addEntitlements(self.base.allocator, path); + csig.code_directory.ident = self.base.options.emit.?.sub_path; + self.code_signature = csig; + } + } + + if (self.code_signature) |*csig| { + csig.clear(self.base.allocator); + csig.code_directory.ident = self.base.options.emit.?.sub_path; + // Preallocate space for the code signature. + // We need to do this at this stage so that we have the load commands with proper values + // written out to the file. + // The most important here is to have the correct vm and filesize of the __LINKEDIT segment + // where the code signature goes into. + try self.writeCodeSignaturePadding(csig); + } + + try self.writeLoadCommands(); + try self.writeHeader(); + + if (self.entry_addr == null and self.base.options.output_mode == .Exe) { + log.debug("flushing. no_entry_point_found = true", .{}); + self.error_flags.no_entry_point_found = true; + } else { + log.debug("flushing. no_entry_point_found = false", .{}); + self.error_flags.no_entry_point_found = false; + } + + assert(!self.load_commands_dirty); + + if (self.code_signature) |*csig| { + try self.writeCodeSignature(csig); // code signing always comes last + } + + if (build_options.enable_link_snapshots) { + if (self.base.options.enable_link_snapshots) + try self.snapshotState(); + } + + if (!self.base.options.disable_lld_caching and cache_miss) { + // Update the file with the digest. If it fails we can continue; it only + // means that the next invocation will have an unnecessary cache miss. + Cache.writeSmallFile(cache_dir_handle, id_symlink_basename, &digest) catch |err| { + log.debug("failed to save linking hash digest file: {s}", .{@errorName(err)}); + }; + // Again failure here only means an unnecessary cache miss. + man.writeManifest() catch |err| { + log.debug("failed to write cache manifest when linking: {s}", .{@errorName(err)}); + }; + // We hang on to this lock so that the output file path can be used without + // other processes clobbering it. + self.base.lock = man.toOwnedLock(); + } + + self.cold_start = false; +} + +fn linkOneShot(self: *MachO, comp: *Compilation, prog_node: *std.Progress.Node) !void { + const tracy = trace(@src()); + defer tracy.end(); + var arena_allocator = std.heap.ArenaAllocator.init(self.base.allocator); defer arena_allocator.deinit(); const arena = arena_allocator.allocator(); @@ -465,7 +642,7 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No // If there is no Zig code to compile, then we should skip flushing the output file because it // will not be part of the linker line anyway. const module_obj_path: ?[]const u8 = if (self.base.options.module) |module| blk: { - if (use_stage1) { + if (self.base.options.use_stage1) { const obj_basename = try std.zig.binNameAlloc(arena, .{ .root_name = self.base.options.root_name, .target = self.base.options.target, @@ -482,20 +659,19 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No } } - const obj_basename = self.base.intermediary_basename orelse break :blk null; + try self.flushModule(comp, prog_node); if (fs.path.dirname(full_out_path)) |dirname| { - break :blk try fs.path.join(arena, &.{ dirname, obj_basename }); + break :blk try fs.path.join(arena, &.{ dirname, self.base.intermediary_basename.? }); } else { - break :blk obj_basename; + break :blk self.base.intermediary_basename.?; } } else null; - if (self.d_sym) |*d_sym| { - if (self.base.options.module) |module| { - try d_sym.dwarf.flushModule(&self.base, module); - } - } + var sub_prog_node = prog_node.start("MachO Flush", 0); + sub_prog_node.activate(); + sub_prog_node.context.refresh(); + defer sub_prog_node.end(); const is_lib = self.base.options.output_mode == .Lib; const is_dyn_lib = self.base.options.link_mode == .Dynamic and is_lib; @@ -503,25 +679,13 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No const stack_size = self.base.options.stack_size_override orelse 0; const id_symlink_basename = "zld.id"; - const cache_dir_handle = blk: { - if (use_stage1) { - break :blk directory.handle; - } - if (self.base.options.module) |module| { - break :blk module.zig_cache_artifact_directory.handle; - } - break :blk directory.handle; - }; var man: Cache.Manifest = undefined; defer if (!self.base.options.disable_lld_caching) man.deinit(); var digest: [Cache.hex_digest_len]u8 = undefined; - cache: { - if ((use_stage1 and self.base.options.disable_lld_caching) or self.base.options.cache_mode == .whole) - break :cache; - + if (!self.base.options.disable_lld_caching) { man = comp.cache_parent.obtain(); // We are about to obtain this lock, so here we give other processes a chance first. @@ -564,7 +728,7 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No var prev_digest_buf: [digest.len]u8 = undefined; const prev_digest: []u8 = Cache.readSmallFile( - cache_dir_handle, + directory.handle, id_symlink_basename, &prev_digest_buf, ) catch |err| blk: { @@ -577,15 +741,11 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No }; if (mem.eql(u8, prev_digest, &digest)) { // Hot diggity dog! The output binary is already there. - - const use_llvm = build_options.have_llvm and self.base.options.use_llvm; - if (use_llvm or use_stage1) { - log.debug("MachO Zld digest={s} match - skipping invocation", .{std.fmt.fmtSliceHexLower(&digest)}); - self.base.lock = man.toOwnedLock(); - return; - } else { - log.debug("MachO Zld digest={s} match", .{std.fmt.fmtSliceHexLower(&digest)}); - } + log.debug("MachO Zld digest={s} match - skipping invocation", .{ + std.fmt.fmtSliceHexLower(&digest), + }); + self.base.lock = man.toOwnedLock(); + return; } log.debug("MachO Zld prev_digest={s} new_digest={s}", .{ std.fmt.fmtSliceHexLower(prev_digest), @@ -593,7 +753,7 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No }); // We are about to change the output file to be different, so we invalidate the build hash now. - cache_dir_handle.deleteFile(id_symlink_basename) catch |err| switch (err) { + directory.handle.deleteFile(id_symlink_basename) catch |err| switch (err) { error.FileNotFound => {}, else => |e| return e, }; @@ -624,24 +784,22 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No try fs.cwd().copyFile(the_object_path, fs.cwd(), full_out_path, .{}); } } else { - if (use_stage1) { - const sub_path = self.base.options.emit.?.sub_path; - self.base.file = try cache_dir_handle.createFile(sub_path, .{ - .truncate = true, - .read = true, - .mode = link.determineMode(self.base.options), - }); - // Index 0 is always a null symbol. - try self.locals.append(self.base.allocator, .{ - .n_strx = 0, - .n_type = 0, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }); - try self.strtab.buffer.append(self.base.allocator, 0); - try self.populateMissingMetadata(); - } + const sub_path = self.base.options.emit.?.sub_path; + self.base.file = try directory.handle.createFile(sub_path, .{ + .truncate = true, + .read = true, + .mode = link.determineMode(self.base.options), + }); + // Index 0 is always a null symbol. + try self.locals.append(self.base.allocator, .{ + .n_strx = 0, + .n_type = 0, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }); + try self.strtab.buffer.append(self.base.allocator, 0); + try self.populateMissingMetadata(); var lib_not_found = false; var framework_not_found = false; @@ -757,40 +915,7 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No } } - // If we were given the sysroot, try to look there first for libSystem.B.{dylib, tbd}. - var libsystem_available = false; - if (self.base.options.sysroot != null) blk: { - // Try stub file first. If we hit it, then we're done as the stub file - // re-exports every single symbol definition. - for (lib_dirs.items) |dir| { - if (try resolveLib(arena, dir, "System", ".tbd")) |full_path| { - try libs.put(full_path, .{ .needed = true }); - libsystem_available = true; - break :blk; - } - } - // If we didn't hit the stub file, try .dylib next. However, libSystem.dylib - // doesn't export libc.dylib which we'll need to resolve subsequently also. - for (lib_dirs.items) |dir| { - if (try resolveLib(arena, dir, "System", ".dylib")) |libsystem_path| { - if (try resolveLib(arena, dir, "c", ".dylib")) |libc_path| { - try libs.put(libsystem_path, .{ .needed = true }); - try libs.put(libc_path, .{ .needed = true }); - libsystem_available = true; - break :blk; - } - } - } - } - if (!libsystem_available) { - const libsystem_name = try std.fmt.allocPrint(arena, "libSystem.{d}.tbd", .{ - self.base.options.target.os.version_range.semver.min.major, - }); - const full_path = try comp.zig_lib_directory.join(arena, &[_][]const u8{ - "libc", "darwin", libsystem_name, - }); - try libs.put(full_path, .{ .needed = true }); - } + try self.resolveLibSystem(arena, comp, lib_dirs.items, &libs); // frameworks var framework_dirs = std.ArrayList([]const u8).init(arena); @@ -1003,7 +1128,6 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No try self.parseLibs(libs.keys(), libs.values(), self.base.options.sysroot, &dependent_libs); try self.parseDependentLibs(self.base.options.sysroot, &dependent_libs); - try self.createMhExecuteHeaderSymbol(); for (self.objects.items) |*object, object_id| { try self.resolveSymbolsInObject(object, @intCast(u16, object_id)); } @@ -1013,6 +1137,7 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No try self.createDyldPrivateAtom(); try self.createStubHelperPreambleAtom(); try self.resolveSymbolsInDylibs(); + try self.createMhExecuteHeaderSymbol(); try self.createDsoHandleSymbol(); try self.addCodeSignatureLC(); try self.resolveSymbolsAtLoading(); @@ -1029,20 +1154,15 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No try self.createTentativeDefAtoms(); - const use_llvm = build_options.have_llvm and self.base.options.use_llvm; - if (use_llvm or use_stage1) { - for (self.objects.items) |*object, object_id| { - try object.splitIntoAtomsWhole(self, @intCast(u32, object_id)); - } - - try self.gcAtoms(); - try self.pruneAndSortSections(); - try self.allocateSegments(); - try self.allocateSymbols(); - } else { - // TODO incremental mode: parsing objects into atoms + for (self.objects.items) |*object, object_id| { + try object.splitIntoAtomsOneShot(self, @intCast(u32, object_id)); } + try self.gcAtoms(); + try self.pruneAndSortSections(); + try self.allocateSegments(); + try self.allocateSymbols(); + try self.allocateSpecialSymbols(); if (build_options.enable_logging) { @@ -1051,11 +1171,7 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No self.logAtoms(); } - if (use_llvm or use_stage1) { - try self.writeAtomsWhole(); - } else { - // try self.writeAtoms(); - } + try self.writeAtomsOneShot(); if (self.rustc_section_index) |id| { const sect = self.getSectionPtr(.{ @@ -1066,14 +1182,8 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No } try self.setEntryPoint(); - try self.updateSectionOrdinals(); try self.writeLinkeditSegment(); - if (self.d_sym) |*d_sym| { - // Flush debug symbols bundle. - try d_sym.flushModule(self.base.allocator, self.base.options); - } - if (self.code_signature) |*csig| { csig.clear(self.base.allocator); csig.code_directory.ident = self.base.options.emit.?.sub_path; @@ -1088,32 +1198,17 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No try self.writeLoadCommands(); try self.writeHeader(); - if (self.entry_addr == null and self.base.options.output_mode == .Exe) { - log.debug("flushing. no_entry_point_found = true", .{}); - self.error_flags.no_entry_point_found = true; - } else { - log.debug("flushing. no_entry_point_found = false", .{}); - self.error_flags.no_entry_point_found = false; - } - assert(!self.load_commands_dirty); if (self.code_signature) |*csig| { try self.writeCodeSignature(csig); // code signing always comes last } - - // if (build_options.enable_link_snapshots) { - // if (self.base.options.enable_link_snapshots) - // try self.snapshotState(); - // } } - cache: { - if ((use_stage1 and self.base.options.disable_lld_caching) or self.base.options.cache_mode == .whole) - break :cache; + if (!self.base.options.disable_lld_caching) { // Update the file with the digest. If it fails we can continue; it only // means that the next invocation will have an unnecessary cache miss. - Cache.writeSmallFile(cache_dir_handle, id_symlink_basename, &digest) catch |err| { + Cache.writeSmallFile(directory.handle, id_symlink_basename, &digest) catch |err| { log.debug("failed to save linking hash digest file: {s}", .{@errorName(err)}); }; // Again failure here only means an unnecessary cache miss. @@ -1126,6 +1221,49 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No } } +fn resolveLibSystem( + self: *MachO, + arena: Allocator, + comp: *Compilation, + search_dirs: []const []const u8, + out_libs: anytype, +) !void { + // If we were given the sysroot, try to look there first for libSystem.B.{dylib, tbd}. + var libsystem_available = false; + if (self.base.options.sysroot != null) blk: { + // Try stub file first. If we hit it, then we're done as the stub file + // re-exports every single symbol definition. + for (search_dirs) |dir| { + if (try resolveLib(arena, dir, "System", ".tbd")) |full_path| { + try out_libs.put(full_path, .{ .needed = true }); + libsystem_available = true; + break :blk; + } + } + // If we didn't hit the stub file, try .dylib next. However, libSystem.dylib + // doesn't export libc.dylib which we'll need to resolve subsequently also. + for (search_dirs) |dir| { + if (try resolveLib(arena, dir, "System", ".dylib")) |libsystem_path| { + if (try resolveLib(arena, dir, "c", ".dylib")) |libc_path| { + try out_libs.put(libsystem_path, .{ .needed = true }); + try out_libs.put(libc_path, .{ .needed = true }); + libsystem_available = true; + break :blk; + } + } + } + } + if (!libsystem_available) { + const libsystem_name = try std.fmt.allocPrint(arena, "libSystem.{d}.tbd", .{ + self.base.options.target.os.version_range.semver.min.major, + }); + const full_path = try comp.zig_lib_directory.join(arena, &[_][]const u8{ + "libc", "darwin", libsystem_name, + }); + try out_libs.put(full_path, .{ .needed = true }); + } +} + fn resolveSearchDir( arena: Allocator, dir: []const u8, @@ -1168,6 +1306,16 @@ fn resolveSearchDir( return null; } +fn resolveSearchDirs(arena: Allocator, dirs: []const []const u8, syslibroot: ?[]const u8, out_dirs: anytype) !void { + for (dirs) |dir| { + if (try resolveSearchDir(arena, dir, syslibroot)) |search_dir| { + try out_dirs.append(search_dir); + } else { + log.warn("directory not found for '-L{s}'", .{dir}); + } + } +} + fn resolveLib( arena: Allocator, search_dir: []const u8, @@ -2128,6 +2276,7 @@ fn allocateSpecialSymbols(self: *MachO) !void { "__mh_execute_header", }) |name| { const global = self.globals.get(name) orelse continue; + if (global.file != null) continue; const sym = self.getSymbolPtr(global); const seg = self.load_commands.items[self.text_segment_cmd_index.?].segment; sym.n_sect = self.getSectionOrdinal(.{ @@ -2143,7 +2292,9 @@ fn allocateSpecialSymbols(self: *MachO) !void { } } -fn writeAtomsWhole(self: *MachO) !void { +fn writeAtomsOneShot(self: *MachO) !void { + assert(self.mode == .one_shot); + var it = self.atoms.iterator(); while (it.next()) |entry| { const sect = self.getSection(entry.key_ptr.*); @@ -2227,7 +2378,9 @@ fn writePadding(self: *MachO, match: MatchingSection, size: usize, writer: anyty } } -fn writeAtoms(self: *MachO) !void { +fn writeAtomsIncremental(self: *MachO) !void { + assert(self.mode == .incremental); + var it = self.atoms.iterator(); while (it.next()) |entry| { const match = entry.key_ptr.*; @@ -2240,7 +2393,7 @@ fn writeAtoms(self: *MachO) !void { log.debug("writing atoms in {s},{s}", .{ sect.segName(), sect.sectName() }); while (true) { - if (atom.dirty or self.invalidate_relocs) { + if (atom.dirty) { try self.writeAtom(atom, match); atom.dirty = false; } @@ -2746,11 +2899,13 @@ fn createTentativeDefAtoms(self: *MachO) !void { fn createMhExecuteHeaderSymbol(self: *MachO) !void { if (self.base.options.output_mode != .Exe) return; - if (self.globals.contains("__mh_execute_header")) return; + if (self.globals.get("__mh_execute_header")) |global| { + const sym = self.getSymbol(global); + if (!sym.undf() and !(sym.pext() or sym.weakDef())) return; + } const gpa = self.base.allocator; - const name = try gpa.dupe(u8, "__mh_execute_header"); - const n_strx = try self.strtab.insert(gpa, name); + const n_strx = try self.strtab.insert(gpa, "__mh_execute_header"); const sym_index = @intCast(u32, self.locals.items.len); try self.locals.append(gpa, .{ .n_strx = n_strx, @@ -2759,10 +2914,14 @@ fn createMhExecuteHeaderSymbol(self: *MachO) !void { .n_desc = 0, .n_value = 0, }); - try self.globals.putNoClobber(gpa, name, .{ + + const name = try gpa.dupe(u8, "__mh_execute_header"); + const gop = try self.globals.getOrPut(gpa, name); + defer if (gop.found_existing) gpa.free(name); + gop.value_ptr.* = .{ .sym_index = sym_index, .file = null, - }); + }; } fn createDsoHandleSymbol(self: *MachO) !void { @@ -2787,9 +2946,68 @@ fn createDsoHandleSymbol(self: *MachO) !void { _ = self.unresolved.swapRemove(@intCast(u32, self.globals.getIndex("___dso_handle").?)); } -fn resolveSymbolsInObject(self: *MachO, object: *Object, object_id: u16) !void { +fn resolveGlobalSymbol(self: *MachO, current: SymbolWithLoc) !void { const gpa = self.base.allocator; + const sym = self.getSymbol(current); + const sym_name = self.getSymbolName(current); + const name = try gpa.dupe(u8, sym_name); + const global_index = @intCast(u32, self.globals.values().len); + const gop = try self.globals.getOrPut(gpa, name); + defer if (gop.found_existing) gpa.free(name); + + if (!gop.found_existing) { + gop.value_ptr.* = current; + if (sym.undf() and !sym.tentative()) { + try self.unresolved.putNoClobber(gpa, global_index, false); + } + return; + } + + const global = gop.value_ptr.*; + const global_sym = self.getSymbol(global); + + // Cases to consider: sym vs global_sym + // 1. strong(sym) and strong(global_sym) => error + // 2. strong(sym) and weak(global_sym) => sym + // 3. strong(sym) and tentative(global_sym) => sym + // 4. strong(sym) and undf(global_sym) => sym + // 5. weak(sym) and strong(global_sym) => global_sym + // 6. weak(sym) and tentative(global_sym) => sym + // 7. weak(sym) and undf(global_sym) => sym + // 8. tentative(sym) and strong(global_sym) => global_sym + // 9. tentative(sym) and weak(global_sym) => global_sym + // 10. tentative(sym) and tentative(global_sym) => pick larger + // 11. tentative(sym) and undf(global_sym) => sym + // 12. undf(sym) and * => global_sym + // + // Reduces to: + // 1. strong(sym) and strong(global_sym) => error + // 2. * and strong(global_sym) => global_sym + // 3. weak(sym) and weak(global_sym) => global_sym + // 4. tentative(sym) and tentative(global_sym) => pick larger + // 5. undf(sym) and * => global_sym + // 6. else => sym + + const sym_is_strong = sym.sect() and !(sym.weakDef() or sym.pext()); + const global_is_strong = global_sym.sect() and !(global_sym.weakDef() or global_sym.pext()); + const sym_is_weak = sym.sect() and (sym.weakDef() or sym.pext()); + const global_is_weak = global_sym.sect() and (global_sym.weakDef() or global_sym.pext()); + + if (sym_is_strong and global_is_strong) return error.MultipleSymbolDefinitions; + if (global_is_strong) return; + if (sym_is_weak and global_is_weak) return; + if (sym.tentative() and global_sym.tentative()) { + if (global_sym.n_value >= sym.n_value) return; + } + if (sym.undf() and !sym.tentative()) return; + + _ = self.unresolved.swapRemove(@intCast(u32, self.globals.getIndex(name).?)); + + gop.value_ptr.* = current; +} + +fn resolveSymbolsInObject(self: *MachO, object: *Object, object_id: u16) !void { log.debug("resolving symbols in '{s}'", .{object.name}); for (object.symtab.items) |sym, index| { @@ -2825,72 +3043,18 @@ fn resolveSymbolsInObject(self: *MachO, object: *Object, object_id: u16) !void { continue; } - const name = try gpa.dupe(u8, sym_name); - const global_index = @intCast(u32, self.globals.values().len); - const gop = try self.globals.getOrPut(gpa, name); - defer if (gop.found_existing) gpa.free(name); - - if (!gop.found_existing) { - gop.value_ptr.* = .{ - .sym_index = sym_index, - .file = object_id, - }; - if (sym.undf() and !sym.tentative()) { - try self.unresolved.putNoClobber(gpa, global_index, {}); - } - continue; - } - - const global = gop.value_ptr.*; - const global_sym = self.getSymbol(global); - - // Cases to consider: sym vs global_sym - // 1. strong(sym) and strong(global_sym) => error - // 2. strong(sym) and weak(global_sym) => sym - // 3. strong(sym) and tentative(global_sym) => sym - // 4. strong(sym) and undf(global_sym) => sym - // 5. weak(sym) and strong(global_sym) => global_sym - // 6. weak(sym) and tentative(global_sym) => sym - // 7. weak(sym) and undf(global_sym) => sym - // 8. tentative(sym) and strong(global_sym) => global_sym - // 9. tentative(sym) and weak(global_sym) => global_sym - // 10. tentative(sym) and tentative(global_sym) => pick larger - // 11. tentative(sym) and undf(global_sym) => sym - // 12. undf(sym) and * => global_sym - // - // Reduces to: - // 1. strong(sym) and strong(global_sym) => error - // 2. * and strong(global_sym) => global_sym - // 3. weak(sym) and weak(global_sym) => global_sym - // 4. tentative(sym) and tentative(global_sym) => pick larger - // 5. undf(sym) and * => global_sym - // 6. else => sym - - const sym_is_strong = sym.sect() and !(sym.weakDef() or sym.pext()); - const global_is_strong = global_sym.sect() and !(global_sym.weakDef() or global_sym.pext()); - const sym_is_weak = sym.sect() and (sym.weakDef() or sym.pext()); - const global_is_weak = global_sym.sect() and (global_sym.weakDef() or global_sym.pext()); - - if (sym_is_strong and global_is_strong) { - log.err("symbol '{s}' defined multiple times", .{sym_name}); - if (global.file) |file| { - log.err(" first definition in '{s}'", .{self.objects.items[file].name}); - } - log.err(" next definition in '{s}'", .{object.name}); - return error.MultipleSymbolDefinitions; - } - if (global_is_strong) continue; - if (sym_is_weak and global_is_weak) continue; - if (sym.tentative() and global_sym.tentative()) { - if (global_sym.n_value >= sym.n_value) continue; - } - if (sym.undf() and !sym.tentative()) continue; - - _ = self.unresolved.swapRemove(@intCast(u32, self.globals.getIndex(name).?)); - - gop.value_ptr.* = .{ - .sym_index = sym_index, - .file = object_id, + const sym_loc = SymbolWithLoc{ .sym_index = sym_index, .file = object_id }; + self.resolveGlobalSymbol(sym_loc) catch |err| switch (err) { + error.MultipleSymbolDefinitions => { + const global = self.globals.get(sym_name).?; + log.err("symbol '{s}' defined multiple times", .{sym_name}); + if (global.file) |file| { + log.err(" first definition in '{s}'", .{self.objects.items[file].name}); + } + log.err(" next definition in '{s}'", .{self.objects.items[object_id].name}); + return error.MultipleSymbolDefinitions; + }, + else => |e| return e, }; } } @@ -2950,7 +3114,18 @@ fn resolveSymbolsInDylibs(self: *MachO) !void { sym.n_desc |= macho.N_WEAK_REF; } - assert(self.unresolved.swapRemove(global_index)); + if (self.unresolved.fetchSwapRemove(global_index)) |entry| blk: { + if (!entry.value) break :blk; + if (!sym.undf()) break :blk; + if (self.stubs_table.contains(global)) break :blk; + + const stub_index = try self.allocateStubEntry(global); + const stub_helper_atom = try self.createStubHelperAtom(); + const laptr_atom = try self.createLazyPointerAtom(stub_helper_atom.sym_index, global); + const stub_atom = try self.createStubAtom(laptr_atom.sym_index); + + self.stubs.items[stub_index].atom = stub_atom; + } continue :loop; } @@ -3272,7 +3447,7 @@ fn growAtom(self: *MachO, atom: *Atom, new_atom_size: u64, alignment: u64, match return self.allocateAtom(atom, new_atom_size, alignment, match); } -fn allocateLocalSymbol(self: *MachO) !u32 { +fn allocateSymbol(self: *MachO) !u32 { try self.locals.ensureUnusedCapacity(self.base.allocator, 1); const index = blk: { @@ -3366,12 +3541,9 @@ pub fn allocateDeclIndexes(self: *MachO, decl_index: Module.Decl.Index) !void { const decl = self.base.options.module.?.declPtr(decl_index); if (decl.link.macho.sym_index != 0) return; - decl.link.macho.sym_index = try self.allocateLocalSymbol(); + decl.link.macho.sym_index = try self.allocateSymbol(); try self.atom_by_index_table.putNoClobber(self.base.allocator, decl.link.macho.sym_index, &decl.link.macho); try self.decls.putNoClobber(self.base.allocator, decl_index, null); - - const got_target = .{ .sym_index = decl.link.macho.sym_index, .file = null }; - _ = try self.allocateGotEntry(got_target); } pub fn updateFunc(self: *MachO, module: *Module, func: *Module.Fn, air: Air, liveness: Liveness) !void { @@ -3468,7 +3640,7 @@ pub fn lowerUnnamedConst(self: *MachO, typed_value: TypedValue, decl_index: Modu log.debug("allocating symbol indexes for {s}", .{name}); const required_alignment = typed_value.ty.abiAlignment(self.base.options.target); - const sym_index = try self.allocateLocalSymbol(); + const sym_index = try self.allocateSymbol(); const atom = try MachO.createEmptyAtom( gpa, sym_index, @@ -3787,6 +3959,12 @@ fn placeDecl(self: *MachO, decl_index: Module.Decl.Index, code_len: usize) !*mac log.debug("growing {s} and moving from 0x{x} to 0x{x}", .{ sym_name, symbol.n_value, vaddr }); log.debug(" (required alignment 0x{x})", .{required_alignment}); symbol.n_value = vaddr; + + const got_atom = self.getGotAtomForSymbol(.{ + .sym_index = decl.link.macho.sym_index, + .file = null, + }).?; + got_atom.dirty = true; } else if (code_len < decl.link.macho.size) { self.shrinkAtom(&decl.link.macho, code_len, match); } @@ -3814,11 +3992,8 @@ fn placeDecl(self: *MachO, decl_index: Module.Decl.Index, code_len: usize) !*mac .n_value = addr, }; - const got_target = SymbolWithLoc{ - .sym_index = decl.link.macho.sym_index, - .file = null, - }; - const got_index = self.got_entries_table.get(got_target).?; + const got_target = SymbolWithLoc{ .sym_index = decl.link.macho.sym_index, .file = null }; + const got_index = try self.allocateGotEntry(got_target); const got_atom = try self.createGotAtom(got_target); self.got_entries.items[got_index].atom = got_atom; } @@ -3843,19 +4018,23 @@ pub fn updateDeclExports( @panic("Attempted to compile for object format that was disabled by build configuration"); } if (build_options.have_llvm) { - if (self.llvm_object) |llvm_object| return llvm_object.updateDeclExports(module, decl_index, exports); + if (self.llvm_object) |llvm_object| + return llvm_object.updateDeclExports(module, decl_index, exports); } const tracy = trace(@src()); defer tracy.end(); - try self.globals.ensureUnusedCapacity(self.base.allocator, exports.len); + const gpa = self.base.allocator; + const decl = module.declPtr(decl_index); if (decl.link.macho.sym_index == 0) return; - const decl_sym = &self.locals.items[decl.link.macho.sym_index]; + const decl_sym = decl.link.macho.getSymbol(self); for (exports) |exp| { - const exp_name = try std.fmt.allocPrint(self.base.allocator, "_{s}", .{exp.options.name}); - defer self.base.allocator.free(exp_name); + const exp_name = try std.fmt.allocPrint(gpa, "_{s}", .{exp.options.name}); + defer gpa.free(exp_name); + + log.debug("adding new export '{s}'", .{exp_name}); if (exp.options.section) |section_name| { if (!mem.eql(u8, section_name, "__text")) { @@ -3863,7 +4042,7 @@ pub fn updateDeclExports( module.gpa, exp, try Module.ErrorMsg.create( - self.base.allocator, + gpa, decl.srcLoc(), "Unimplemented: ExportOptions.section", .{}, @@ -3878,7 +4057,7 @@ pub fn updateDeclExports( module.gpa, exp, try Module.ErrorMsg.create( - self.base.allocator, + gpa, decl.srcLoc(), "Unimplemented: GlobalLinkage.LinkOnce", .{}, @@ -3887,107 +4066,84 @@ pub fn updateDeclExports( continue; } - const is_weak = exp.options.linkage == .Internal or exp.options.linkage == .Weak; - _ = is_weak; - const n_strx = try self.strtab.insert(self.base.allocator, exp_name); - // if (self.symbol_resolver.getPtr(n_strx)) |resolv| { - // switch (resolv.where) { - // .global => { - // if (resolv.sym_index == decl.link.macho.sym_index) continue; - - // const sym = &self.globals.items[resolv.where_index]; - - // if (sym.tentative()) { - // assert(self.tentatives.swapRemove(resolv.where_index)); - // } else if (!is_weak and !(sym.weakDef() or sym.pext())) { - // _ = try module.failed_exports.put( - // module.gpa, - // exp, - // try Module.ErrorMsg.create( - // self.base.allocator, - // decl.srcLoc(), - // \\LinkError: symbol '{s}' defined multiple times - // \\ first definition in '{s}' - // , - // .{ exp_name, self.objects.items[resolv.file.?].name }, - // ), - // ); - // continue; - // } else if (is_weak) continue; // Current symbol is weak, so skip it. - - // // Otherwise, update the resolver and the global symbol. - // sym.n_type = macho.N_SECT | macho.N_EXT; - // resolv.sym_index = decl.link.macho.sym_index; - // resolv.file = null; - // exp.link.macho.sym_index = resolv.where_index; - - // continue; - // }, - // .undef => { - // assert(self.unresolved.swapRemove(resolv.where_index)); - // _ = self.symbol_resolver.remove(n_strx); - // }, - // } - // } - - var n_type: u8 = macho.N_SECT | macho.N_EXT; - var n_desc: u16 = 0; + const sym_index = exp.link.macho.sym_index orelse blk: { + const sym_index = try self.allocateSymbol(); + exp.link.macho.sym_index = sym_index; + break :blk sym_index; + }; + const sym_loc = SymbolWithLoc{ .sym_index = sym_index, .file = null }; + const sym = self.getSymbolPtr(sym_loc); + sym.* = .{ + .n_strx = try self.strtab.insert(gpa, exp_name), + .n_type = macho.N_SECT | macho.N_EXT, + .n_sect = self.getSectionOrdinal(.{ + .seg = self.text_segment_cmd_index.?, + .sect = self.text_section_index.?, // TODO what if we export a variable? + }), + .n_desc = 0, + .n_value = decl_sym.n_value, + }; switch (exp.options.linkage) { .Internal => { // Symbol should be hidden, or in MachO lingo, private extern. // We should also mark the symbol as Weak: n_desc == N_WEAK_DEF. - // TODO work out when to add N_WEAK_REF. - n_type |= macho.N_PEXT; - n_desc |= macho.N_WEAK_DEF; + sym.n_type |= macho.N_PEXT; + sym.n_desc |= macho.N_WEAK_DEF; }, .Strong => {}, .Weak => { // Weak linkage is specified as part of n_desc field. // Symbol's n_type is like for a symbol with strong linkage. - n_desc |= macho.N_WEAK_DEF; + sym.n_desc |= macho.N_WEAK_DEF; }, else => unreachable, } - const global_sym_index: u32 = 0; - // const global_sym_index = if (exp.link.macho.sym_index) |i| i else blk: { - // const i = if (self.globals_free_list.popOrNull()) |i| i else inner: { - // _ = self.globals.addOneAssumeCapacity(); - // break :inner @intCast(u32, self.globals.items.len - 1); - // }; - // break :blk i; - // }; - const sym = &self.locals.items[global_sym_index]; - sym.* = .{ - .n_strx = try self.strtab.insert(self.base.allocator, exp_name), - .n_type = n_type, - .n_sect = @intCast(u8, self.text_section_index.?) + 1, - .n_desc = n_desc, - .n_value = decl_sym.n_value, + self.resolveGlobalSymbol(sym_loc) catch |err| switch (err) { + error.MultipleSymbolDefinitions => { + const global = self.globals.get(exp_name).?; + if (sym_loc.sym_index != global.sym_index and global.file != null) { + _ = try module.failed_exports.put(module.gpa, exp, try Module.ErrorMsg.create( + gpa, + decl.srcLoc(), + \\LinkError: symbol '{s}' defined multiple times + \\ first definition in '{s}' + , + .{ exp_name, self.objects.items[global.file.?].name }, + )); + } + }, + else => |e| return e, }; - exp.link.macho.sym_index = global_sym_index; - _ = n_strx; - - // try self.symbol_resolver.putNoClobber(self.base.allocator, n_strx, .{ - // .where = .global, - // .where_index = global_sym_index, - // .sym_index = decl.link.macho.sym_index, - // }); } } pub fn deleteExport(self: *MachO, exp: Export) void { if (self.llvm_object) |_| return; const sym_index = exp.sym_index orelse return; - _ = sym_index; - // self.globals_free_list.append(self.base.allocator, sym_index) catch {}; - // const global = &self.globals.items[sym_index]; - // log.warn("deleting export '{s}': {}", .{ self.getString(global.n_strx), global }); - // assert(self.symbol_resolver.remove(global.n_strx)); - // global.n_type = 0; - // global.n_strx = 0; - // global.n_value = 0; + + const gpa = self.base.allocator; + + const sym_loc = SymbolWithLoc{ .sym_index = sym_index, .file = null }; + const sym = self.getSymbolPtr(sym_loc); + const sym_name = self.getSymbolName(sym_loc); + log.debug("deleting export '{s}'", .{sym_name}); + sym.* = .{ + .n_strx = 0, + .n_type = 0, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }; + self.locals_free_list.append(gpa, sym_index) catch {}; + + if (self.globals.get(sym_name)) |global| blk: { + if (global.sym_index != sym_index) break :blk; + if (global.file != null) break :blk; + const kv = self.globals.fetchSwapRemove(sym_name); + gpa.free(kv.?.key); + } } fn freeUnnamedConsts(self: *MachO, decl_index: Module.Decl.Index) void { @@ -4026,7 +4182,10 @@ pub fn freeDecl(self: *MachO, decl_index: Module.Decl.Index) void { const got_target = SymbolWithLoc{ .sym_index = decl.link.macho.sym_index, .file = null }; if (self.got_entries_table.get(got_target)) |got_index| { self.got_entries_free_list.append(self.base.allocator, @intCast(u32, got_index)) catch {}; - self.got_entries.items[got_index] = .{ .target = .{ .sym_index = 0, .file = null }, .atom = undefined }; + self.got_entries.items[got_index] = .{ + .target = .{ .sym_index = 0, .file = null }, + .atom = undefined, + }; _ = self.got_entries_table.swapRemove(got_target); if (self.d_sym) |*d_sym| { @@ -4102,7 +4261,7 @@ fn populateMissingMetadata(self: *MachO) !void { if (self.text_segment_cmd_index == null) { self.text_segment_cmd_index = @intCast(u16, self.load_commands.items.len); - const needed_size = if (self.needs_prealloc) blk: { + const needed_size = if (self.mode == .incremental) blk: { const headerpad_size = @maximum(self.base.options.headerpad_size orelse 0, default_headerpad_size); const program_code_size_hint = self.base.options.program_code_size_hint; const got_size_hint = @sizeOf(u64) * self.base.options.symbol_count_hint; @@ -4133,7 +4292,7 @@ fn populateMissingMetadata(self: *MachO) !void { .aarch64 => 2, else => unreachable, // unhandled architecture type }; - const needed_size = if (self.needs_prealloc) self.base.options.program_code_size_hint else 0; + const needed_size = if (self.mode == .incremental) self.base.options.program_code_size_hint else 0; self.text_section_index = try self.initSection( self.text_segment_cmd_index.?, "__text", @@ -4156,7 +4315,7 @@ fn populateMissingMetadata(self: *MachO) !void { .aarch64 => 3 * @sizeOf(u32), else => unreachable, // unhandled architecture type }; - const needed_size = if (self.needs_prealloc) stub_size * self.base.options.symbol_count_hint else 0; + const needed_size = if (self.mode == .incremental) stub_size * self.base.options.symbol_count_hint else 0; self.stubs_section_index = try self.initSection( self.text_segment_cmd_index.?, "__stubs", @@ -4185,7 +4344,7 @@ fn populateMissingMetadata(self: *MachO) !void { .aarch64 => 3 * @sizeOf(u32), else => unreachable, }; - const needed_size = if (self.needs_prealloc) + const needed_size = if (self.mode == .incremental) stub_size * self.base.options.symbol_count_hint + preamble_size else 0; @@ -4205,7 +4364,7 @@ fn populateMissingMetadata(self: *MachO) !void { var vmaddr: u64 = 0; var fileoff: u64 = 0; var needed_size: u64 = 0; - if (self.needs_prealloc) { + if (self.mode == .incremental) { const base = self.getSegmentAllocBase(&.{self.text_segment_cmd_index.?}); vmaddr = base.vmaddr; fileoff = base.fileoff; @@ -4234,7 +4393,7 @@ fn populateMissingMetadata(self: *MachO) !void { } if (self.got_section_index == null) { - const needed_size = if (self.needs_prealloc) + const needed_size = if (self.mode == .incremental) @sizeOf(u64) * self.base.options.symbol_count_hint else 0; @@ -4255,7 +4414,7 @@ fn populateMissingMetadata(self: *MachO) !void { var vmaddr: u64 = 0; var fileoff: u64 = 0; var needed_size: u64 = 0; - if (self.needs_prealloc) { + if (self.mode == .incremental) { const base = self.getSegmentAllocBase(&.{self.data_const_segment_cmd_index.?}); vmaddr = base.vmaddr; fileoff = base.fileoff; @@ -4284,7 +4443,7 @@ fn populateMissingMetadata(self: *MachO) !void { } if (self.la_symbol_ptr_section_index == null) { - const needed_size = if (self.needs_prealloc) + const needed_size = if (self.mode == .incremental) @sizeOf(u64) * self.base.options.symbol_count_hint else 0; @@ -4301,7 +4460,10 @@ fn populateMissingMetadata(self: *MachO) !void { } if (self.data_section_index == null) { - const needed_size = if (self.needs_prealloc) @sizeOf(u64) * self.base.options.symbol_count_hint else 0; + const needed_size = if (self.mode == .incremental) + @sizeOf(u64) * self.base.options.symbol_count_hint + else + 0; const alignment: u16 = 3; // 2^3 = @sizeOf(u64) self.data_section_index = try self.initSection( self.data_segment_cmd_index.?, @@ -4313,7 +4475,10 @@ fn populateMissingMetadata(self: *MachO) !void { } if (self.tlv_section_index == null) { - const needed_size = if (self.needs_prealloc) @sizeOf(u64) * self.base.options.symbol_count_hint else 0; + const needed_size = if (self.mode == .incremental) + @sizeOf(u64) * self.base.options.symbol_count_hint + else + 0; const alignment: u16 = 3; // 2^3 = @sizeOf(u64) self.tlv_section_index = try self.initSection( self.data_segment_cmd_index.?, @@ -4327,7 +4492,10 @@ fn populateMissingMetadata(self: *MachO) !void { } if (self.tlv_data_section_index == null) { - const needed_size = if (self.needs_prealloc) @sizeOf(u64) * self.base.options.symbol_count_hint else 0; + const needed_size = if (self.mode == .incremental) + @sizeOf(u64) * self.base.options.symbol_count_hint + else + 0; const alignment: u16 = 3; // 2^3 = @sizeOf(u64) self.tlv_data_section_index = try self.initSection( self.data_segment_cmd_index.?, @@ -4341,7 +4509,10 @@ fn populateMissingMetadata(self: *MachO) !void { } if (self.tlv_bss_section_index == null) { - const needed_size = if (self.needs_prealloc) @sizeOf(u64) * self.base.options.symbol_count_hint else 0; + const needed_size = if (self.mode == .incremental) + @sizeOf(u64) * self.base.options.symbol_count_hint + else + 0; const alignment: u16 = 3; // 2^3 = @sizeOf(u64) self.tlv_bss_section_index = try self.initSection( self.data_segment_cmd_index.?, @@ -4355,7 +4526,10 @@ fn populateMissingMetadata(self: *MachO) !void { } if (self.bss_section_index == null) { - const needed_size = if (self.needs_prealloc) @sizeOf(u64) * self.base.options.symbol_count_hint else 0; + const needed_size = if (self.mode == .incremental) + @sizeOf(u64) * self.base.options.symbol_count_hint + else + 0; const alignment: u16 = 3; // 2^3 = @sizeOf(u64) self.bss_section_index = try self.initSection( self.data_segment_cmd_index.?, @@ -4372,7 +4546,7 @@ fn populateMissingMetadata(self: *MachO) !void { self.linkedit_segment_cmd_index = @intCast(u16, self.load_commands.items.len); var vmaddr: u64 = 0; var fileoff: u64 = 0; - if (self.needs_prealloc) { + if (self.mode == .incremental) { const base = self.getSegmentAllocBase(&.{self.data_segment_cmd_index.?}); vmaddr = base.vmaddr; fileoff = base.fileoff; @@ -4740,14 +4914,14 @@ fn initSection( var sect = macho.section_64{ .sectname = makeStaticString(sectname), .segname = seg.inner.segname, - .size = if (self.needs_prealloc) @intCast(u32, size) else 0, + .size = if (self.mode == .incremental) @intCast(u32, size) else 0, .@"align" = alignment, .flags = opts.flags, .reserved1 = opts.reserved1, .reserved2 = opts.reserved2, }; - if (self.needs_prealloc) { + if (self.mode == .incremental) { const alignment_pow_2 = try math.powi(u32, 2, alignment); const padding: ?u32 = if (segment_id == self.text_segment_cmd_index.?) @maximum(self.base.options.headerpad_size orelse 0, default_headerpad_size) @@ -4967,7 +5141,7 @@ fn getSectionMaxAlignment(self: *MachO, segment_id: u16, start_sect_id: u16) !u3 fn allocateAtomCommon(self: *MachO, atom: *Atom, match: MatchingSection) !void { const sym = atom.getSymbolPtr(self); - if (self.needs_prealloc) { + if (self.mode == .incremental) { const size = atom.size; const alignment = try math.powi(u32, 2, atom.alignment); const vaddr = try self.allocateAtom(atom, size, alignment, match); @@ -5108,27 +5282,29 @@ pub fn addAtomToSection(self: *MachO, atom: *Atom, match: MatchingSection) !void pub fn getGlobalSymbol(self: *MachO, name: []const u8) !u32 { const gpa = self.base.allocator; const sym_name = try std.fmt.allocPrint(gpa, "_{s}", .{name}); - defer gpa.free(sym_name); + const global_index = @intCast(u32, self.globals.values().len); + const gop = try self.globals.getOrPut(gpa, sym_name); + defer if (gop.found_existing) gpa.free(sym_name); - if (self.globals.getIndex(sym_name)) |global_index| { - return @intCast(u32, global_index); + if (gop.found_existing) { + return @intCast(u32, self.globals.getIndex(sym_name).?); } - const n_strx = try self.strtab.insert(gpa, sym_name); const sym_index = @intCast(u32, self.locals.items.len); try self.locals.append(gpa, .{ - .n_strx = n_strx, + .n_strx = try self.strtab.insert(gpa, sym_name), .n_type = macho.N_UNDF, .n_sect = 0, .n_desc = 0, .n_value = 0, }); - try self.globals.putNoClobber(gpa, sym_name, .{ + gop.value_ptr.* = .{ .sym_index = sym_index, .file = null, - }); - const global_index = self.globals.getIndex(sym_name).?; - return @intCast(u32, global_index); + }; + try self.unresolved.putNoClobber(gpa, global_index, true); + + return global_index; } fn getSegmentAllocBase(self: MachO, indices: []const ?u16) struct { vmaddr: u64, fileoff: u64 } { @@ -5927,7 +6103,7 @@ fn writeDices(self: *MachO) !void { self.load_commands_dirty = true; } -fn writeSymbolTable(self: *MachO) !void { +fn writeSymtab(self: *MachO) !void { const tracy = trace(@src()); defer tracy.end(); @@ -6143,7 +6319,7 @@ fn writeSymbolTable(self: *MachO) !void { self.load_commands_dirty = true; } -fn writeStringTable(self: *MachO) !void { +fn writeStrtab(self: *MachO) !void { const tracy = trace(@src()); defer tracy.end(); @@ -6173,8 +6349,8 @@ fn writeLinkeditSegment(self: *MachO) !void { try self.writeDyldInfoData(); try self.writeFunctionStarts(); try self.writeDices(); - try self.writeSymbolTable(); - try self.writeStringTable(); + try self.writeSymtab(); + try self.writeStrtab(); seg.inner.vmsize = mem.alignForwardGeneric(u64, seg.inner.filesize, self.page_size); } @@ -6391,6 +6567,27 @@ pub fn getAtomForSymbol(self: *MachO, sym_with_loc: SymbolWithLoc) ?*Atom { } } +/// Returns GOT atom that references `sym_with_loc` if one exists. +/// Returns null otherwise. +pub fn getGotAtomForSymbol(self: *MachO, sym_with_loc: SymbolWithLoc) ?*Atom { + const got_index = self.got_entries_table.get(sym_with_loc) orelse return null; + return self.got_entries.items[got_index].atom; +} + +/// Returns stubs atom that references `sym_with_loc` if one exists. +/// Returns null otherwise. +pub fn getStubsAtomForSymbol(self: *MachO, sym_with_loc: SymbolWithLoc) ?*Atom { + const stubs_index = self.stubs_table.get(sym_with_loc) orelse return null; + return self.stubs.items[stubs_index].atom; +} + +/// Returns TLV pointer atom that references `sym_with_loc` if one exists. +/// Returns null otherwise. +pub fn getTlvPtrAtomForSymbol(self: *MachO, sym_with_loc: SymbolWithLoc) ?*Atom { + const tlv_ptr_index = self.tlv_ptr_entries_table.get(sym_with_loc) orelse return null; + return self.tlv_ptr_entries.items[tlv_ptr_index].atom; +} + pub fn findFirst(comptime T: type, haystack: []const T, start: usize, predicate: anytype) usize { if (!@hasDecl(@TypeOf(predicate), "predicate")) @compileError("Predicate is required to define fn predicate(@This(), T) bool"); @@ -6481,6 +6678,8 @@ fn snapshotState(self: *MachO) !void { .payload = .{ .name = sect_name }, }); + const is_tlv = sect.type_() == macho.S_THREAD_LOCAL_VARIABLES; + var atom: *Atom = self.atoms.get(key) orelse { try nodes.append(.{ .address = sect.addr + sect.size, @@ -6495,35 +6694,23 @@ fn snapshotState(self: *MachO) !void { } while (true) { - const atom_sym = self.locals.items[atom.sym_index]; - const should_skip_atom: bool = blk: { - if (self.mh_execute_header_index) |index| { - if (index == atom.sym_index) break :blk true; - } - if (mem.eql(u8, self.getString(atom_sym.n_strx), "___dso_handle")) break :blk true; - break :blk false; - }; - - if (should_skip_atom) { - if (atom.next) |next| { - atom = next; - } else break; - continue; - } - + const atom_sym = atom.getSymbol(self); var node = Snapshot.Node{ .address = atom_sym.n_value, .tag = .atom_start, .payload = .{ - .name = self.getString(atom_sym.n_strx), - .is_global = self.symbol_resolver.contains(atom_sym.n_strx), + .name = atom.getName(self), + .is_global = self.globals.contains(atom.getName(self)), }, }; var aliases = std.ArrayList([]const u8).init(arena); for (atom.contained.items) |sym_off| { if (sym_off.offset == 0) { - try aliases.append(self.getString(self.locals.items[sym_off.sym_index].n_strx)); + try aliases.append(self.getSymbolName(.{ + .sym_index = sym_off.sym_index, + .file = atom.file, + })); } } node.payload.aliases = aliases.toOwnedSlice(); @@ -6531,69 +6718,39 @@ fn snapshotState(self: *MachO) !void { var relocs = try std.ArrayList(Snapshot.Node).initCapacity(arena, atom.relocs.items.len); for (atom.relocs.items) |rel| { - const arch = self.base.options.target.cpu.arch; const source_addr = blk: { - const sym = self.locals.items[atom.sym_index]; - break :blk sym.n_value + rel.offset; + const source_sym = atom.getSymbol(self); + break :blk source_sym.n_value + rel.offset; }; const target_addr = blk: { - const is_via_got = got: { - switch (arch) { - .aarch64 => break :got switch (@intToEnum(macho.reloc_type_arm64, rel.@"type")) { - .ARM64_RELOC_GOT_LOAD_PAGE21, .ARM64_RELOC_GOT_LOAD_PAGEOFF12 => true, - else => false, - }, - .x86_64 => break :got switch (@intToEnum(macho.reloc_type_x86_64, rel.@"type")) { - .X86_64_RELOC_GOT, .X86_64_RELOC_GOT_LOAD => true, - else => false, - }, - else => unreachable, + const target_atom = (try rel.getTargetAtom(self)) orelse { + // If there is no atom for target, we still need to check for special, atom-less + // symbols such as `___dso_handle`. + const target_name = self.getSymbolName(rel.target); + if (self.globals.contains(target_name)) { + const atomless_sym = self.getSymbol(rel.target); + break :blk atomless_sym.n_value; } + break :blk 0; }; - - if (is_via_got) { - const got_index = self.got_entries_table.get(rel.target) orelse break :blk 0; - const got_atom = self.got_entries.items[got_index].atom; - break :blk self.locals.items[got_atom.sym_index].n_value; - } - - switch (rel.target) { - .local => |sym_index| { - const sym = self.locals.items[sym_index]; - const is_tlv = is_tlv: { - const source_sym = self.locals.items[atom.sym_index]; - const match = self.section_ordinals.keys()[source_sym.n_sect - 1]; - const match_seg = self.load_commands.items[match.seg].segment; - const match_sect = match_seg.sections.items[match.sect]; - break :is_tlv match_sect.type_() == macho.S_THREAD_LOCAL_VARIABLES; - }; - if (is_tlv) { - const match_seg = self.load_commands.items[self.data_segment_cmd_index.?].segment; - const base_address = inner: { - if (self.tlv_data_section_index) |i| { - break :inner match_seg.sections.items[i].addr; - } else if (self.tlv_bss_section_index) |i| { - break :inner match_seg.sections.items[i].addr; - } else unreachable; - }; - break :blk sym.n_value - base_address; - } - break :blk sym.n_value; - }, - .global => |n_strx| { - const resolv = self.symbol_resolver.get(n_strx).?; - switch (resolv.where) { - .global => break :blk self.globals.items[resolv.where_index].n_value, - .undef => { - if (self.stubs_table.get(n_strx)) |stub_index| { - const stub_atom = self.stubs.items[stub_index]; - break :blk self.locals.items[stub_atom.sym_index].n_value; - } - break :blk 0; - }, - } - }, - } + const target_sym = if (target_atom.isSymbolContained(rel.target, self)) + self.getSymbol(rel.target) + else + target_atom.getSymbol(self); + const base_address: u64 = if (is_tlv) base_address: { + const sect_id: u16 = sect_id: { + if (self.tlv_data_section_index) |i| { + break :sect_id i; + } else if (self.tlv_bss_section_index) |i| { + break :sect_id i; + } else unreachable; + }; + break :base_address self.getSection(.{ + .seg = self.data_segment_cmd_index.?, + .sect = sect_id, + }).addr; + } else 0; + break :blk target_sym.n_value - base_address; }; relocs.appendAssumeCapacity(.{ @@ -6614,15 +6771,18 @@ fn snapshotState(self: *MachO) !void { var next_i: usize = 0; var last_rel: usize = 0; while (next_i < atom.contained.items.len) : (next_i += 1) { - const loc = atom.contained.items[next_i]; - const cont_sym = self.locals.items[loc.sym_index]; - const cont_sym_name = self.getString(cont_sym.n_strx); + const loc = SymbolWithLoc{ + .sym_index = atom.contained.items[next_i].sym_index, + .file = atom.file, + }; + const cont_sym = self.getSymbol(loc); + const cont_sym_name = self.getSymbolName(loc); var contained_node = Snapshot.Node{ .address = cont_sym.n_value, .tag = .atom_start, .payload = .{ .name = cont_sym_name, - .is_global = self.symbol_resolver.contains(cont_sym.n_strx), + .is_global = self.globals.contains(cont_sym_name), }, }; @@ -6630,10 +6790,14 @@ fn snapshotState(self: *MachO) !void { var inner_aliases = std.ArrayList([]const u8).init(arena); while (true) { if (next_i + 1 >= atom.contained.items.len) break; - const next_sym = self.locals.items[atom.contained.items[next_i + 1].sym_index]; + const next_sym_loc = SymbolWithLoc{ + .sym_index = atom.contained.items[next_i + 1].sym_index, + .file = atom.file, + }; + const next_sym = self.getSymbol(next_sym_loc); if (next_sym.n_value != cont_sym.n_value) break; - const next_sym_name = self.getString(next_sym.n_strx); - if (self.symbol_resolver.contains(next_sym.n_strx)) { + const next_sym_name = self.getSymbolName(next_sym_loc); + if (self.globals.contains(next_sym_name)) { try inner_aliases.append(contained_node.payload.name); contained_node.payload.name = next_sym_name; contained_node.payload.is_global = true; @@ -6642,7 +6806,10 @@ fn snapshotState(self: *MachO) !void { } const cont_size = if (next_i + 1 < atom.contained.items.len) - self.locals.items[atom.contained.items[next_i + 1].sym_index].n_value - cont_sym.n_value + self.getSymbol(.{ + .sym_index = atom.contained.items[next_i + 1].sym_index, + .file = atom.file, + }).n_value - cont_sym.n_value else atom_sym.n_value + atom.size - cont_sym.n_value; @@ -6695,7 +6862,11 @@ pub fn logSymAttributes(sym: macho.nlist_64, buf: *[4]u8) []const u8 { buf[0] = 's'; } if (sym.ext()) { - buf[1] = 'e'; + if (sym.weakDef() or sym.pext()) { + buf[1] = 'w'; + } else { + buf[1] = 'e'; + } } if (sym.tentative()) { buf[2] = 't'; diff --git a/src/link/MachO/Atom.zig b/src/link/MachO/Atom.zig index f5995cbd87..d58e898848 100644 --- a/src/link/MachO/Atom.zig +++ b/src/link/MachO/Atom.zig @@ -187,7 +187,7 @@ pub const Relocation = struct { const target_sym = macho_file.getSymbol(self.target); if (is_via_got) { - const got_index = macho_file.got_entries_table.get(self.target) orelse { + const got_atom = macho_file.getGotAtomForSymbol(self.target) orelse { log.err("expected GOT entry for symbol", .{}); if (target_sym.undf()) { log.err(" import('{s}')", .{macho_file.getSymbolName(self.target)}); @@ -197,14 +197,12 @@ pub const Relocation = struct { log.err(" this is an internal linker error", .{}); return error.FailedToResolveRelocationTarget; }; - return macho_file.got_entries.items[got_index].atom; + return got_atom; } - if (macho_file.stubs_table.get(self.target)) |stub_index| { - return macho_file.stubs.items[stub_index].atom; - } else if (macho_file.tlv_ptr_entries_table.get(self.target)) |tlv_ptr_index| { - return macho_file.tlv_ptr_entries.items[tlv_ptr_index].atom; - } else return macho_file.getAtomForSymbol(self.target); + if (macho_file.getStubsAtomForSymbol(self.target)) |stubs_atom| return stubs_atom; + if (macho_file.getTlvPtrAtomForSymbol(self.target)) |tlv_ptr_atom| return tlv_ptr_atom; + return macho_file.getAtomForSymbol(self.target); } }; @@ -402,7 +400,7 @@ pub fn parseRelocs(self: *Atom, relocs: []const macho.relocation_info, context: .n_type = macho.N_SECT, .n_sect = context.macho_file.getSectionOrdinal(match), .n_desc = 0, - .n_value = 0, + .n_value = sect.addr, }); try object.sections_as_symbols.putNoClobber(gpa, sect_id, sym_index); break :blk sym_index; @@ -499,8 +497,10 @@ pub fn parseRelocs(self: *Atom, relocs: []const macho.relocation_info, context: // Note for the future self: when r_extern == 0, we should subtract correction from the // addend. const target_sect_base_addr = object.getSection(@intCast(u16, rel.r_symbolnum - 1)).addr; + // We need to add base_offset, i.e., offset of this atom wrt to the source + // section. Otherwise, the addend will over-/under-shoot. addend += @intCast(i64, context.base_addr + offset + 4) - - @intCast(i64, target_sect_base_addr); + @intCast(i64, target_sect_base_addr) + context.base_offset; } }, .X86_64_RELOC_TLV => { diff --git a/src/link/MachO/DebugSymbols.zig b/src/link/MachO/DebugSymbols.zig index 0ef03c5f32..a5c65abed3 100644 --- a/src/link/MachO/DebugSymbols.zig +++ b/src/link/MachO/DebugSymbols.zig @@ -5,7 +5,7 @@ const build_options = @import("build_options"); const assert = std.debug.assert; const fs = std.fs; const link = @import("../../link.zig"); -const log = std.log.scoped(.link); +const log = std.log.scoped(.dsym); const macho = std.macho; const makeStaticString = MachO.makeStaticString; const math = std.math; @@ -60,7 +60,7 @@ debug_aranges_section_dirty: bool = false, debug_info_header_dirty: bool = false, debug_line_header_dirty: bool = false, -strtab: StringTable(.link) = .{}, +strtab: StringTable(.strtab) = .{}, relocs: std.ArrayListUnmanaged(Reloc) = .{}, diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index 7170e8efdb..91e2169b7d 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -270,7 +270,7 @@ const SymbolAtIndex = struct { fn getSymbolName(self: SymbolAtIndex, ctx: Context) []const u8 { const sym = self.getSymbol(ctx); - if (sym.n_strx == 0) return ""; + assert(sym.n_strx < ctx.strtab.len); return mem.sliceTo(@ptrCast([*:0]const u8, ctx.strtab.ptr + sym.n_strx), 0); } @@ -359,15 +359,17 @@ fn filterDice( return dices[start..end]; } -/// Splits object into atoms assuming whole cache mode aka traditional linking mode. -pub fn splitIntoAtomsWhole(self: *Object, macho_file: *MachO, object_id: u32) !void { +/// Splits object into atoms assuming one-shot linking mode. +pub fn splitIntoAtomsOneShot(self: *Object, macho_file: *MachO, object_id: u32) !void { + assert(macho_file.mode == .one_shot); + const tracy = trace(@src()); defer tracy.end(); const gpa = macho_file.base.allocator; const seg = self.load_commands.items[self.segment_cmd_index.?].segment; - log.debug("splitting object({d}, {s}) into atoms: whole cache mode", .{ object_id, self.name }); + log.debug("splitting object({d}, {s}) into atoms: one-shot mode", .{ object_id, self.name }); // You would expect that the symbol table is at least pre-sorted based on symbol's type: // local < extern defined < undefined. Unfortunately, this is not guaranteed! For instance, @@ -416,11 +418,11 @@ pub fn splitIntoAtomsWhole(self: *Object, macho_file: *MachO, object_id: u32) !v log.debug(" unhandled section", .{}); continue; }; - const target_sect = macho_file.getSection(match); + log.debug(" output sect({d}, '{s},{s}')", .{ macho_file.getSectionOrdinal(match), - target_sect.segName(), - target_sect.sectName(), + macho_file.getSection(match).segName(), + macho_file.getSection(match).sectName(), }); const is_zerofill = blk: { @@ -585,10 +587,19 @@ fn createAtomFromSubsection( sect: macho.section_64, ) !*Atom { const gpa = macho_file.base.allocator; - const sym = &self.symtab.items[sym_index]; + const sym = self.symtab.items[sym_index]; const atom = try MachO.createEmptyAtom(gpa, sym_index, size, alignment); atom.file = object_id; - sym.n_sect = macho_file.getSectionOrdinal(match); + self.symtab.items[sym_index].n_sect = macho_file.getSectionOrdinal(match); + + log.debug("creating ATOM(%{d}, '{s}') in sect({d}, '{s},{s}') in object({d})", .{ + sym_index, + self.getString(sym.n_strx), + macho_file.getSectionOrdinal(match), + macho_file.getSection(match).segName(), + macho_file.getSection(match).sectName(), + object_id, + }); try self.atom_by_index_table.putNoClobber(gpa, sym_index, atom); try self.managed_atoms.append(gpa, atom); @@ -669,7 +680,6 @@ fn createAtomFromSubsection( // if (zld.globals.contains(zld.getString(sym.strx))) break :blk .global; break :blk .static; } else null; - atom.contained.appendAssumeCapacity(.{ .sym_index = inner_sym_index.index, .offset = inner_sym.n_value - sym.n_value, diff --git a/test/link/macho/objcpp/build.zig b/test/link/macho/objcpp/build.zig index 6b9047fbf9..767578e225 100644 --- a/test/link/macho/objcpp/build.zig +++ b/test/link/macho/objcpp/build.zig @@ -16,7 +16,6 @@ pub fn build(b: *Builder) void { // TODO when we figure out how to ship framework stubs for cross-compilation, // populate paths to the sysroot here. exe.linkFramework("Foundation"); - exe.link_gc_sections = true; const run_cmd = exe.run(); run_cmd.expectStdOutEqual("Hello from C++ and Zig"); From 35a5a4a0e45b4a92d6f7d0428fe1a4b64815edb9 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Fri, 15 Jul 2022 12:58:42 +0200 Subject: [PATCH 10/27] macho: fix marking sections for pruning in GC --- src/link/MachO.zig | 78 +++++++++++++++++++++++++--------------------- 1 file changed, 42 insertions(+), 36 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 7ae013e6bc..9b3911d54a 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -244,7 +244,6 @@ unnamed_const_atoms: UnnamedConstTable = .{}, decls: std.AutoArrayHashMapUnmanaged(Module.Decl.Index, ?MatchingSection) = .{}, gc_roots: std.AutoHashMapUnmanaged(*Atom, void) = .{}, -gc_sections: std.AutoHashMapUnmanaged(MatchingSection, void) = .{}, const Entry = struct { target: SymbolWithLoc, @@ -3296,7 +3295,6 @@ pub fn deinit(self: *MachO) void { self.locals_free_list.deinit(self.base.allocator); self.unresolved.deinit(self.base.allocator); self.gc_roots.deinit(self.base.allocator); - self.gc_sections.deinit(self.base.allocator); for (self.objects.items) |*object| { object.deinit(self.base.allocator); @@ -5333,35 +5331,6 @@ fn pruneAndSortSectionsInSegment(self: *MachO, maybe_seg_id: *?u16, indices: []* for (indices) |maybe_index| { const old_idx = maybe_index.* orelse continue; const sect = §ions[old_idx]; - - // Recalculate section alignment and size if required. - const match = MatchingSection{ - .seg = seg_id, - .sect = old_idx, - }; - if (self.gc_sections.get(match)) |_| blk: { - sect.@"align" = 0; - sect.size = 0; - - var atom = self.atoms.get(match) orelse break :blk; - - while (atom.prev) |prev| { - atom = prev; - } - - while (true) { - const atom_alignment = try math.powi(u32, 2, atom.alignment); - const aligned_end_addr = mem.alignForwardGeneric(u64, sect.size, atom_alignment); - const padding = aligned_end_addr - sect.size; - sect.size += padding + atom.size; - sect.@"align" = @maximum(sect.@"align", atom.alignment); - - if (atom.next) |next| { - atom = next; - } else break; - } - } - if (sect.size == 0) { log.debug("pruning section {s},{s}", .{ sect.segName(), sect.sectName() }); maybe_index.* = null; @@ -5550,6 +5519,11 @@ fn gcAtoms(self: *MachO) !void { } } + // Any section that ends up here will be updated, that is, + // its size and alignment recalculated. + var gc_sections = std.AutoHashMap(MatchingSection, void).init(gpa); + defer gc_sections.deinit(); + atoms_it = self.atoms.iterator(); while (atoms_it.next()) |entry| { const match = entry.key_ptr.*; @@ -5602,18 +5576,22 @@ fn gcAtoms(self: *MachO) !void { // account any padding that might have been left here. sect.size -= atom.size; + _ = try gc_sections.put(match, {}); + if (atom.prev) |prev| { prev.next = atom.next; } if (atom.next) |next| { next.prev = atom.prev; } else { - // TODO I think a null would be better here. - // The section will be GCed in the next step. - entry.value_ptr.* = if (atom.prev) |prev| prev else undefined; + if (atom.prev) |prev| { + entry.value_ptr.* = prev; + } else { + // The section will be GCed in the next step. + entry.value_ptr.* = undefined; + sect.size = 0; + } } - - _ = try self.gc_sections.getOrPut(gpa, match); } if (orig_prev) |prev| { @@ -5621,6 +5599,34 @@ fn gcAtoms(self: *MachO) !void { } else break; } } + + var gc_sections_it = gc_sections.iterator(); + while (gc_sections_it.next()) |entry| { + const match = entry.key_ptr.*; + const sect = self.getSectionPtr(match); + if (sect.size == 0) continue; // Pruning happens automatically in next step. + + sect.@"align" = 0; + sect.size = 0; + + var atom = self.atoms.get(match).?; + + while (atom.prev) |prev| { + atom = prev; + } + + while (true) { + const atom_alignment = try math.powi(u32, 2, atom.alignment); + const aligned_end_addr = mem.alignForwardGeneric(u64, sect.size, atom_alignment); + const padding = aligned_end_addr - sect.size; + sect.size += padding + atom.size; + sect.@"align" = @maximum(sect.@"align", atom.alignment); + + if (atom.next) |next| { + atom = next; + } else break; + } + } } fn updateSectionOrdinals(self: *MachO) !void { From 61b4119a7d5862f58fc4a34024456e3feca292a5 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Fri, 15 Jul 2022 15:04:45 +0200 Subject: [PATCH 11/27] macho: link atom starting section by orig section id In x86_64 relocs, it can so happen that the compiler refers to the same atom by both the actual assigned symbol and the start of the section. In this case, we need to link the two together so add an alias. --- src/link/MachO/Atom.zig | 11 ++++------- src/link/MachO/Object.zig | 26 ++++++++++++++++++++++++++ 2 files changed, 30 insertions(+), 7 deletions(-) diff --git a/src/link/MachO/Atom.zig b/src/link/MachO/Atom.zig index d58e898848..ecf2f10149 100644 --- a/src/link/MachO/Atom.zig +++ b/src/link/MachO/Atom.zig @@ -659,13 +659,10 @@ pub fn resolveRelocs(self: *Atom, macho_file: *MachO) !void { // If there is no atom for target, we still need to check for special, atom-less // symbols such as `___dso_handle`. const target_name = macho_file.getSymbolName(rel.target); - if (macho_file.globals.contains(target_name)) { - const atomless_sym = macho_file.getSymbol(rel.target); - log.debug(" | atomless target '{s}'", .{target_name}); - break :blk atomless_sym.n_value; - } - log.debug(" | undef target '{s}'", .{target_name}); - break :blk 0; + assert(macho_file.globals.contains(target_name)); + const atomless_sym = macho_file.getSymbol(rel.target); + log.debug(" | atomless target '{s}'", .{target_name}); + break :blk atomless_sym.n_value; }; log.debug(" | target ATOM(%{d}, '{s}') in object({d})", .{ target_atom.sym_index, diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index 91e2169b7d..2737b5e164 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -425,6 +425,7 @@ pub fn splitIntoAtomsOneShot(self: *Object, macho_file: *MachO, object_id: u32) macho_file.getSection(match).sectName(), }); + const arch = macho_file.base.options.target.cpu.arch; const is_zerofill = blk: { const section_type = sect.type_(); break :blk section_type == macho.S_ZEROFILL or section_type == macho.S_THREAD_LOCAL_ZEROFILL; @@ -538,6 +539,31 @@ pub fn splitIntoAtomsOneShot(self: *Object, macho_file: *MachO, object_id: u32) match, sect, ); + + if (arch == .x86_64 and addr == sect.addr) { + // In x86_64 relocs, it can so happen that the compiler refers to the same + // atom by both the actual assigned symbol and the start of the section. In this + // case, we need to link the two together so add an alias. + const alias = self.sections_as_symbols.get(sect_id) orelse blk: { + const alias = @intCast(u32, self.symtab.items.len); + try self.symtab.append(gpa, .{ + .n_strx = 0, + .n_type = macho.N_SECT, + .n_sect = macho_file.getSectionOrdinal(match), + .n_desc = 0, + .n_value = addr, + }); + try self.sections_as_symbols.putNoClobber(gpa, sect_id, alias); + break :blk alias; + }; + try atom.contained.append(gpa, .{ + .sym_index = alias, + .offset = 0, + .stab = null, + }); + try self.atom_by_index_table.put(gpa, alias, atom); + } + try macho_file.addAtomToSection(atom, match); } } else { From 817939d20a097c195fc924398081e64556cc0aea Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Fri, 15 Jul 2022 17:54:09 +0200 Subject: [PATCH 12/27] macho: don't store GC roots globally Instead, if dead-strip was requested, create a temp container and pass it around. --- src/link/MachO.zig | 62 +++++++++++++++++++++------------------ src/link/MachO/Object.zig | 45 +++++++++++++++++----------- 2 files changed, 62 insertions(+), 45 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 9b3911d54a..08e2805ee8 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -243,8 +243,6 @@ unnamed_const_atoms: UnnamedConstTable = .{}, /// TODO consolidate this. decls: std.AutoArrayHashMapUnmanaged(Module.Decl.Index, ?MatchingSection) = .{}, -gc_roots: std.AutoHashMapUnmanaged(*Atom, void) = .{}, - const Entry = struct { target: SymbolWithLoc, atom: *Atom, @@ -631,7 +629,8 @@ fn linkOneShot(self: *MachO, comp: *Compilation, prog_node: *std.Progress.Node) const tracy = trace(@src()); defer tracy.end(); - var arena_allocator = std.heap.ArenaAllocator.init(self.base.allocator); + const gpa = self.base.allocator; + var arena_allocator = std.heap.ArenaAllocator.init(gpa); defer arena_allocator.deinit(); const arena = arena_allocator.allocator(); @@ -676,6 +675,7 @@ fn linkOneShot(self: *MachO, comp: *Compilation, prog_node: *std.Progress.Node) const is_dyn_lib = self.base.options.link_mode == .Dynamic and is_lib; const is_exe_or_dyn_lib = is_dyn_lib or self.base.options.output_mode == .Exe; const stack_size = self.base.options.stack_size_override orelse 0; + const dead_strip = self.base.options.gc_sections orelse false; const id_symlink_basename = "zld.id"; @@ -707,7 +707,7 @@ fn linkOneShot(self: *MachO, comp: *Compilation, prog_node: *std.Progress.Node) man.hash.addOptional(self.base.options.search_strategy); man.hash.addOptional(self.base.options.headerpad_size); man.hash.add(self.base.options.headerpad_max_install_names); - man.hash.add(self.base.options.gc_sections orelse false); + man.hash.add(dead_strip); man.hash.add(self.base.options.dead_strip_dylibs); man.hash.addListOfBytes(self.base.options.lib_dirs); man.hash.addListOfBytes(self.base.options.framework_dirs); @@ -790,14 +790,14 @@ fn linkOneShot(self: *MachO, comp: *Compilation, prog_node: *std.Progress.Node) .mode = link.determineMode(self.base.options), }); // Index 0 is always a null symbol. - try self.locals.append(self.base.allocator, .{ + try self.locals.append(gpa, .{ .n_strx = 0, .n_type = 0, .n_sect = 0, .n_desc = 0, .n_value = 0, }); - try self.strtab.buffer.append(self.base.allocator, 0); + try self.strtab.buffer.append(gpa, 0); try self.populateMissingMetadata(); var lib_not_found = false; @@ -964,10 +964,10 @@ fn linkOneShot(self: *MachO, comp: *Compilation, prog_node: *std.Progress.Node) .cmdsize = cmdsize, .path = @sizeOf(macho.rpath_command), }); - rpath_cmd.data = try self.base.allocator.alloc(u8, cmdsize - rpath_cmd.inner.path); + rpath_cmd.data = try gpa.alloc(u8, cmdsize - rpath_cmd.inner.path); mem.set(u8, rpath_cmd.data, 0); mem.copy(u8, rpath_cmd.data, rpath); - try self.load_commands.append(self.base.allocator, .{ .rpath = rpath_cmd }); + try self.load_commands.append(gpa, .{ .rpath = rpath_cmd }); try rpath_table.putNoClobber(rpath, {}); self.load_commands_dirty = true; } @@ -975,11 +975,11 @@ fn linkOneShot(self: *MachO, comp: *Compilation, prog_node: *std.Progress.Node) // code signature and entitlements if (self.base.options.entitlements) |path| { if (self.code_signature) |*csig| { - try csig.addEntitlements(self.base.allocator, path); + try csig.addEntitlements(gpa, path); csig.code_directory.ident = self.base.options.emit.?.sub_path; } else { var csig = CodeSignature.init(self.page_size); - try csig.addEntitlements(self.base.allocator, path); + try csig.addEntitlements(gpa, path); csig.code_directory.ident = self.base.options.emit.?.sub_path; self.code_signature = csig; } @@ -1033,10 +1033,8 @@ fn linkOneShot(self: *MachO, comp: *Compilation, prog_node: *std.Progress.Node) try argv.append("-headerpad_max_install_names"); } - if (self.base.options.gc_sections) |is_set| { - if (is_set) { - try argv.append("-dead_strip"); - } + if (dead_strip) { + try argv.append("-dead_strip"); } if (self.base.options.dead_strip_dylibs) { @@ -1120,7 +1118,7 @@ fn linkOneShot(self: *MachO, comp: *Compilation, prog_node: *std.Progress.Node) var dependent_libs = std.fifo.LinearFifo(struct { id: Dylib.Id, parent: u16, - }, .Dynamic).init(self.base.allocator); + }, .Dynamic).init(gpa); defer dependent_libs.deinit(); try self.parseInputFiles(positionals.items, self.base.options.sysroot, &dependent_libs); try self.parseAndForceLoadStaticArchives(must_link_archives.keys()); @@ -1153,11 +1151,21 @@ fn linkOneShot(self: *MachO, comp: *Compilation, prog_node: *std.Progress.Node) try self.createTentativeDefAtoms(); - for (self.objects.items) |*object, object_id| { - try object.splitIntoAtomsOneShot(self, @intCast(u32, object_id)); + if (dead_strip) { + var gc_roots = std.AutoHashMap(*Atom, void).init(gpa); + defer gc_roots.deinit(); + + for (self.objects.items) |*object, object_id| { + try object.splitIntoAtomsOneShot(self, @intCast(u32, object_id), &gc_roots); + } + + try self.gcAtoms(&gc_roots); + } else { + for (self.objects.items) |*object, object_id| { + try object.splitIntoAtomsOneShot(self, @intCast(u32, object_id), null); + } } - try self.gcAtoms(); try self.pruneAndSortSections(); try self.allocateSegments(); try self.allocateSymbols(); @@ -1184,7 +1192,7 @@ fn linkOneShot(self: *MachO, comp: *Compilation, prog_node: *std.Progress.Node) try self.writeLinkeditSegment(); if (self.code_signature) |*csig| { - csig.clear(self.base.allocator); + csig.clear(gpa); csig.code_directory.ident = self.base.options.emit.?.sub_path; // Preallocate space for the code signature. // We need to do this at this stage so that we have the load commands with proper values @@ -3294,7 +3302,6 @@ pub fn deinit(self: *MachO) void { self.locals.deinit(self.base.allocator); self.locals_free_list.deinit(self.base.allocator); self.unresolved.deinit(self.base.allocator); - self.gc_roots.deinit(self.base.allocator); for (self.objects.items) |*object| { object.deinit(self.base.allocator); @@ -5447,9 +5454,8 @@ fn pruneAndSortSections(self: *MachO) !void { self.sections_order_dirty = false; } -fn gcAtoms(self: *MachO) !void { - const dead_strip = self.base.options.gc_sections orelse return; - if (!dead_strip) return; +fn gcAtoms(self: *MachO, gc_roots: *std.AutoHashMap(*Atom, void)) !void { + assert(self.base.options.gc_sections.?); const gpa = self.base.allocator; @@ -5461,7 +5467,7 @@ fn gcAtoms(self: *MachO) !void { log.debug("skipping {s}", .{self.getSymbolName(global)}); continue; }; - _ = try self.gc_roots.getOrPut(gpa, gc_root); + _ = try gc_roots.getOrPut(gc_root); } // Add any atom targeting an import as GC root @@ -5474,7 +5480,7 @@ fn gcAtoms(self: *MachO) !void { if ((try rel.getTargetAtom(self)) == null) { const target_sym = self.getSymbol(rel.target); if (target_sym.undf()) { - _ = try self.gc_roots.getOrPut(gpa, atom); + _ = try gc_roots.getOrPut(atom); break; } } @@ -5488,14 +5494,14 @@ fn gcAtoms(self: *MachO) !void { var stack = std.ArrayList(*Atom).init(gpa); defer stack.deinit(); - try stack.ensureUnusedCapacity(self.gc_roots.count()); + try stack.ensureUnusedCapacity(gc_roots.count()); var retained = std.AutoHashMap(*Atom, void).init(gpa); defer retained.deinit(); - try retained.ensureUnusedCapacity(self.gc_roots.count()); + try retained.ensureUnusedCapacity(gc_roots.count()); log.debug("GC roots:", .{}); - var gc_roots_it = self.gc_roots.keyIterator(); + var gc_roots_it = gc_roots.keyIterator(); while (gc_roots_it.next()) |gc_root| { self.logAtom(gc_root.*); diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index 2737b5e164..82d872f68c 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -360,7 +360,12 @@ fn filterDice( } /// Splits object into atoms assuming one-shot linking mode. -pub fn splitIntoAtomsOneShot(self: *Object, macho_file: *MachO, object_id: u32) !void { +pub fn splitIntoAtomsOneShot( + self: *Object, + macho_file: *MachO, + object_id: u32, + gc_roots: ?*std.AutoHashMap(*Atom, void), +) !void { assert(macho_file.mode == .one_shot); const tracy = trace(@src()); @@ -493,6 +498,7 @@ pub fn splitIntoAtomsOneShot(self: *Object, macho_file: *MachO, object_id: u32) &.{}, match, sect, + gc_roots, ); try macho_file.addAtomToSection(atom, match); } @@ -538,6 +544,7 @@ pub fn splitIntoAtomsOneShot(self: *Object, macho_file: *MachO, object_id: u32) atom_syms[1..], match, sect, + gc_roots, ); if (arch == .x86_64 and addr == sect.addr) { @@ -593,6 +600,7 @@ pub fn splitIntoAtomsOneShot(self: *Object, macho_file: *MachO, object_id: u32) filtered_syms, match, sect, + gc_roots, ); try macho_file.addAtomToSection(atom, match); } @@ -611,6 +619,7 @@ fn createAtomFromSubsection( indexes: []const SymbolAtIndex, match: MatchingSection, sect: macho.section_64, + gc_roots: ?*std.AutoHashMap(*Atom, void), ) !*Atom { const gpa = macho_file.base.allocator; const sym = self.symtab.items[sym_index]; @@ -715,23 +724,25 @@ fn createAtomFromSubsection( try self.atom_by_index_table.putNoClobber(gpa, inner_sym_index.index, atom); } - const is_gc_root = blk: { - if (sect.isDontDeadStrip()) break :blk true; - if (sect.isDontDeadStripIfReferencesLive()) { - // TODO if isDontDeadStripIfReferencesLive we should analyse the edges - // before making it a GC root - break :blk true; + if (gc_roots) |gcr| { + const is_gc_root = blk: { + if (sect.isDontDeadStrip()) break :blk true; + if (sect.isDontDeadStripIfReferencesLive()) { + // TODO if isDontDeadStripIfReferencesLive we should analyse the edges + // before making it a GC root + break :blk true; + } + if (mem.eql(u8, "__StaticInit", sect.sectName())) break :blk true; + switch (sect.type_()) { + macho.S_MOD_INIT_FUNC_POINTERS, + macho.S_MOD_TERM_FUNC_POINTERS, + => break :blk true, + else => break :blk false, + } + }; + if (is_gc_root) { + try gcr.putNoClobber(atom, {}); } - if (mem.eql(u8, "__StaticInit", sect.sectName())) break :blk true; - switch (sect.type_()) { - macho.S_MOD_INIT_FUNC_POINTERS, - macho.S_MOD_TERM_FUNC_POINTERS, - => break :blk true, - else => break :blk false, - } - }; - if (is_gc_root) { - try macho_file.gc_roots.putNoClobber(gpa, atom, {}); } return atom; From b380ed6a729d7f0daa6f0ee2b58bf3b54017cc65 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Fri, 15 Jul 2022 20:59:39 +0200 Subject: [PATCH 13/27] macho: deallocate globals keys --- src/link/MachO.zig | 84 +++++++++++++++++++++++++--------------------- 1 file changed, 45 insertions(+), 39 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 08e2805ee8..b1bd7b5817 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -3279,71 +3279,77 @@ fn setEntryPoint(self: *MachO) !void { } pub fn deinit(self: *MachO) void { + const gpa = self.base.allocator; + if (build_options.have_llvm) { - if (self.llvm_object) |llvm_object| llvm_object.destroy(self.base.allocator); + if (self.llvm_object) |llvm_object| llvm_object.destroy(gpa); } if (self.d_sym) |*d_sym| { - d_sym.deinit(self.base.allocator); + d_sym.deinit(gpa); } - self.section_ordinals.deinit(self.base.allocator); - self.tlv_ptr_entries.deinit(self.base.allocator); - self.tlv_ptr_entries_free_list.deinit(self.base.allocator); - self.tlv_ptr_entries_table.deinit(self.base.allocator); - self.got_entries.deinit(self.base.allocator); - self.got_entries_free_list.deinit(self.base.allocator); - self.got_entries_table.deinit(self.base.allocator); - self.stubs.deinit(self.base.allocator); - self.stubs_free_list.deinit(self.base.allocator); - self.stubs_table.deinit(self.base.allocator); - self.strtab.deinit(self.base.allocator); - self.globals.deinit(self.base.allocator); - self.locals.deinit(self.base.allocator); - self.locals_free_list.deinit(self.base.allocator); - self.unresolved.deinit(self.base.allocator); + self.section_ordinals.deinit(gpa); + self.tlv_ptr_entries.deinit(gpa); + self.tlv_ptr_entries_free_list.deinit(gpa); + self.tlv_ptr_entries_table.deinit(gpa); + self.got_entries.deinit(gpa); + self.got_entries_free_list.deinit(gpa); + self.got_entries_table.deinit(gpa); + self.stubs.deinit(gpa); + self.stubs_free_list.deinit(gpa); + self.stubs_table.deinit(gpa); + self.strtab.deinit(gpa); + self.locals.deinit(gpa); + self.locals_free_list.deinit(gpa); + self.unresolved.deinit(gpa); + + for (self.globals.keys()) |key| { + gpa.free(key); + } + self.globals.deinit(gpa); for (self.objects.items) |*object| { - object.deinit(self.base.allocator); + object.deinit(gpa); } - self.objects.deinit(self.base.allocator); + self.objects.deinit(gpa); for (self.archives.items) |*archive| { - archive.deinit(self.base.allocator); + archive.deinit(gpa); } - self.archives.deinit(self.base.allocator); + self.archives.deinit(gpa); for (self.dylibs.items) |*dylib| { - dylib.deinit(self.base.allocator); + dylib.deinit(gpa); } - self.dylibs.deinit(self.base.allocator); - self.dylibs_map.deinit(self.base.allocator); - self.referenced_dylibs.deinit(self.base.allocator); + self.dylibs.deinit(gpa); + self.dylibs_map.deinit(gpa); + self.referenced_dylibs.deinit(gpa); for (self.load_commands.items) |*lc| { - lc.deinit(self.base.allocator); + lc.deinit(gpa); } - self.load_commands.deinit(self.base.allocator); + self.load_commands.deinit(gpa); for (self.managed_atoms.items) |atom| { - atom.deinit(self.base.allocator); - self.base.allocator.destroy(atom); + atom.deinit(gpa); + gpa.destroy(atom); } - self.managed_atoms.deinit(self.base.allocator); - self.atoms.deinit(self.base.allocator); + self.managed_atoms.deinit(gpa); + self.atoms.deinit(gpa); { var it = self.atom_free_lists.valueIterator(); while (it.next()) |free_list| { - free_list.deinit(self.base.allocator); + free_list.deinit(gpa); } - self.atom_free_lists.deinit(self.base.allocator); + self.atom_free_lists.deinit(gpa); } if (self.base.options.module) |mod| { for (self.decls.keys()) |decl_index| { const decl = mod.declPtr(decl_index); - decl.link.macho.deinit(self.base.allocator); + decl.link.macho.deinit(gpa); } - self.decls.deinit(self.base.allocator); + self.decls.deinit(gpa); } else { assert(self.decls.count() == 0); } @@ -3351,15 +3357,15 @@ pub fn deinit(self: *MachO) void { { var it = self.unnamed_const_atoms.valueIterator(); while (it.next()) |atoms| { - atoms.deinit(self.base.allocator); + atoms.deinit(gpa); } - self.unnamed_const_atoms.deinit(self.base.allocator); + self.unnamed_const_atoms.deinit(gpa); } - self.atom_by_index_table.deinit(self.base.allocator); + self.atom_by_index_table.deinit(gpa); if (self.code_signature) |*csig| { - csig.deinit(self.base.allocator); + csig.deinit(gpa); } } From 4658d857de9198e825f13c136b2342d630e19e62 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 16 Jul 2022 10:32:17 +0200 Subject: [PATCH 14/27] macho: fix caching linker line in incremental setting --- src/link/MachO.zig | 81 +++++++++++++++++++++++++--------------------- 1 file changed, 45 insertions(+), 36 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index b1bd7b5817..b441aaefc1 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -478,50 +478,59 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No defer if (!self.base.options.disable_lld_caching) man.deinit(); var digest: [Cache.hex_digest_len]u8 = undefined; - var cache_miss: bool = self.cold_start; + man = comp.cache_parent.obtain(); + self.base.releaseLock(); - if (!self.base.options.disable_lld_caching) { - man = comp.cache_parent.obtain(); - self.base.releaseLock(); + man.hash.addListOfBytes(libs.keys()); - man.hash.addListOfBytes(libs.keys()); + _ = try man.hit(); + digest = man.final(); - _ = try man.hit(); - digest = man.final(); - - var prev_digest_buf: [digest.len]u8 = undefined; - const prev_digest: []u8 = Cache.readSmallFile( - cache_dir_handle, - id_symlink_basename, - &prev_digest_buf, - ) catch |err| blk: { - log.debug("MachO Zld new_digest={s} error: {s}", .{ - std.fmt.fmtSliceHexLower(&digest), - @errorName(err), - }); - // Handle this as a cache miss. - break :blk prev_digest_buf[0..0]; - }; + var prev_digest_buf: [digest.len]u8 = undefined; + const prev_digest: []u8 = Cache.readSmallFile( + cache_dir_handle, + id_symlink_basename, + &prev_digest_buf, + ) catch |err| blk: { + log.debug("MachO Zld new_digest={s} error: {s}", .{ + std.fmt.fmtSliceHexLower(&digest), + @errorName(err), + }); + // Handle this as a cache miss. + break :blk prev_digest_buf[0..0]; + }; + const cache_miss: bool = cache_miss: { if (mem.eql(u8, prev_digest, &digest)) { - log.debug("MachO Zld digest={s} match - skipping parsing linker line objects", .{ + log.debug("MachO Zld digest={s} match", .{ std.fmt.fmtSliceHexLower(&digest), }); - self.base.lock = man.toOwnedLock(); - } else { - log.debug("MachO Zld prev_digest={s} new_digest={s}", .{ - std.fmt.fmtSliceHexLower(prev_digest), - std.fmt.fmtSliceHexLower(&digest), - }); - // We are about to change the output file to be different, so we invalidate the build hash now. - cache_dir_handle.deleteFile(id_symlink_basename) catch |err| switch (err) { - error.FileNotFound => {}, - else => |e| return e, - }; - cache_miss = true; + if (!self.cold_start) { + log.debug(" skipping parsing linker line objects", .{}); + break :cache_miss false; + } else { + log.debug(" TODO parse prelinked binary and continue linking where we left off", .{}); + } } - } + log.debug("MachO Zld prev_digest={s} new_digest={s}", .{ + std.fmt.fmtSliceHexLower(prev_digest), + std.fmt.fmtSliceHexLower(&digest), + }); + // We are about to change the output file to be different, so we invalidate the build hash now. + cache_dir_handle.deleteFile(id_symlink_basename) catch |err| switch (err) { + error.FileNotFound => {}, + else => |e| return e, + }; + break :cache_miss true; + }; if (cache_miss) { + for (self.dylibs.items) |*dylib| { + dylib.deinit(self.base.allocator); + } + self.dylibs.clearRetainingCapacity(); + self.dylibs_map.clearRetainingCapacity(); + self.referenced_dylibs.clearRetainingCapacity(); + var dependent_libs = std.fifo.LinearFifo(struct { id: Dylib.Id, parent: u16, @@ -607,7 +616,7 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No try self.snapshotState(); } - if (!self.base.options.disable_lld_caching and cache_miss) { + if (cache_miss) { // Update the file with the digest. If it fails we can continue; it only // means that the next invocation will have an unnecessary cache miss. Cache.writeSmallFile(cache_dir_handle, id_symlink_basename, &digest) catch |err| { From 41b91442f4af293d4cbbabbe9e451bd31c5c5679 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 16 Jul 2022 10:53:47 +0200 Subject: [PATCH 15/27] macho: improve logs for dyld info --- src/arch/aarch64/CodeGen.zig | 6 ++-- src/arch/aarch64/Emit.zig | 10 +++--- src/arch/aarch64/Mir.zig | 8 +++-- src/arch/x86_64/CodeGen.zig | 8 ++--- src/arch/x86_64/Emit.zig | 18 +++++----- src/arch/x86_64/Mir.zig | 17 ++++----- src/link/MachO.zig | 69 +++++++++++++++++++++++++++--------- src/link/MachO/Atom.zig | 6 ++-- 8 files changed, 88 insertions(+), 54 deletions(-) diff --git a/src/arch/aarch64/CodeGen.zig b/src/arch/aarch64/CodeGen.zig index f65e19d561..ba7c56e2bd 100644 --- a/src/arch/aarch64/CodeGen.zig +++ b/src/arch/aarch64/CodeGen.zig @@ -3190,14 +3190,14 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallOptions. lib_name, }); } - const global_index = try macho_file.getGlobalSymbol(mem.sliceTo(decl_name, 0)); + const sym_index = try macho_file.getGlobalSymbol(mem.sliceTo(decl_name, 0)); _ = try self.addInst(.{ .tag = .call_extern, .data = .{ - .extern_fn = .{ + .relocation = .{ .atom_index = mod.declPtr(self.mod_fn.owner_decl).link.macho.sym_index, - .global_index = global_index, + .sym_index = sym_index, }, }, }); diff --git a/src/arch/aarch64/Emit.zig b/src/arch/aarch64/Emit.zig index 7469eaefeb..47a0c08893 100644 --- a/src/arch/aarch64/Emit.zig +++ b/src/arch/aarch64/Emit.zig @@ -649,7 +649,7 @@ fn mirDebugEpilogueBegin(self: *Emit) !void { fn mirCallExtern(emit: *Emit, inst: Mir.Inst.Index) !void { assert(emit.mir.instructions.items(.tag)[inst] == .call_extern); - const extern_fn = emit.mir.instructions.items(.data)[inst].extern_fn; + const relocation = emit.mir.instructions.items(.data)[inst].relocation; if (emit.bin_file.cast(link.File.MachO)) |macho_file| { const offset = blk: { @@ -659,11 +659,13 @@ fn mirCallExtern(emit: *Emit, inst: Mir.Inst.Index) !void { break :blk offset; }; // Add relocation to the decl. - const atom = macho_file.atom_by_index_table.get(extern_fn.atom_index).?; - const target = macho_file.globals.values()[extern_fn.global_index]; + const atom = macho_file.atom_by_index_table.get(relocation.atom_index).?; try atom.relocs.append(emit.bin_file.allocator, .{ .offset = offset, - .target = target, + .target = .{ + .sym_index = relocation.sym_index, + .file = null, + }, .addend = 0, .subtractor = null, .pcrel = true, diff --git a/src/arch/aarch64/Mir.zig b/src/arch/aarch64/Mir.zig index b162905f36..2fef069f7a 100644 --- a/src/arch/aarch64/Mir.zig +++ b/src/arch/aarch64/Mir.zig @@ -225,14 +225,16 @@ pub const Inst = struct { /// /// Used by e.g. b inst: Index, - /// An extern function + /// Relocation for the linker where: + /// * `atom_index` is the index of the source + /// * `sym_index` is the index of the target /// /// Used by e.g. call_extern - extern_fn: struct { + relocation: struct { /// Index of the containing atom. atom_index: u32, /// Index into the linker's string table. - global_index: u32, + sym_index: u32, }, /// A 16-bit immediate value. /// diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index da35d3b4b6..b35db3e97a 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -2644,7 +2644,7 @@ fn loadMemPtrIntoRegister(self: *Self, reg: Register, ptr_ty: Type, ptr: MCValue .flags = flags, }), .data = .{ - .load_reloc = .{ + .relocation = .{ .atom_index = fn_owner_decl.link.macho.sym_index, .sym_index = sym_index, }, @@ -3997,14 +3997,14 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallOptions. lib_name, }); } - const global_index = try macho_file.getGlobalSymbol(mem.sliceTo(decl_name, 0)); + const sym_index = try macho_file.getGlobalSymbol(mem.sliceTo(decl_name, 0)); _ = try self.addInst(.{ .tag = .call_extern, .ops = undefined, .data = .{ - .extern_fn = .{ + .relocation = .{ .atom_index = mod.declPtr(self.mod_fn.owner_decl).link.macho.sym_index, - .global_index = global_index, + .sym_index = sym_index, }, }, }); diff --git a/src/arch/x86_64/Emit.zig b/src/arch/x86_64/Emit.zig index 8d91c3d7e6..52d68e81ec 100644 --- a/src/arch/x86_64/Emit.zig +++ b/src/arch/x86_64/Emit.zig @@ -982,7 +982,7 @@ fn mirLeaPie(emit: *Emit, inst: Mir.Inst.Index) InnerError!void { const tag = emit.mir.instructions.items(.tag)[inst]; assert(tag == .lea_pie); const ops = emit.mir.instructions.items(.ops)[inst].decode(); - const load_reloc = emit.mir.instructions.items(.data)[inst].load_reloc; + const relocation = emit.mir.instructions.items(.data)[inst].relocation; // lea reg1, [rip + reloc] // RM @@ -1001,11 +1001,11 @@ fn mirLeaPie(emit: *Emit, inst: Mir.Inst.Index) InnerError!void { 0b01 => @enumToInt(std.macho.reloc_type_x86_64.X86_64_RELOC_SIGNED), else => return emit.fail("TODO unused LEA PIE variants 0b10 and 0b11", .{}), }; - const atom = macho_file.atom_by_index_table.get(load_reloc.atom_index).?; - log.debug("adding reloc of type {} to local @{d}", .{ reloc_type, load_reloc.sym_index }); + const atom = macho_file.atom_by_index_table.get(relocation.atom_index).?; + log.debug("adding reloc of type {} to local @{d}", .{ reloc_type, relocation.sym_index }); try atom.relocs.append(emit.bin_file.allocator, .{ .offset = @intCast(u32, end_offset - 4), - .target = .{ .sym_index = load_reloc.sym_index, .file = null }, + .target = .{ .sym_index = relocation.sym_index, .file = null }, .addend = 0, .subtractor = null, .pcrel = true, @@ -1116,7 +1116,7 @@ fn mirCmpFloatAvx(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!void { fn mirCallExtern(emit: *Emit, inst: Mir.Inst.Index) InnerError!void { const tag = emit.mir.instructions.items(.tag)[inst]; assert(tag == .call_extern); - const extern_fn = emit.mir.instructions.items(.data)[inst].extern_fn; + const relocation = emit.mir.instructions.items(.data)[inst].relocation; const offset = blk: { // callq @@ -1126,11 +1126,13 @@ fn mirCallExtern(emit: *Emit, inst: Mir.Inst.Index) InnerError!void { if (emit.bin_file.cast(link.File.MachO)) |macho_file| { // Add relocation to the decl. - const atom = macho_file.atom_by_index_table.get(extern_fn.atom_index).?; - const target = macho_file.globals.values()[extern_fn.global_index]; + const atom = macho_file.atom_by_index_table.get(relocation.atom_index).?; try atom.relocs.append(emit.bin_file.allocator, .{ .offset = offset, - .target = target, + .target = .{ + .sym_index = relocation.sym_index, + .file = null, + }, .addend = 0, .subtractor = null, .pcrel = true, diff --git a/src/arch/x86_64/Mir.zig b/src/arch/x86_64/Mir.zig index 91ad9f4d9c..f67b48a271 100644 --- a/src/arch/x86_64/Mir.zig +++ b/src/arch/x86_64/Mir.zig @@ -181,7 +181,7 @@ pub const Inst = struct { /// 0b00 reg1, [rip + reloc] // via GOT emits X86_64_RELOC_GOT relocation /// 0b01 reg1, [rip + reloc] // direct load emits X86_64_RELOC_SIGNED relocation /// Notes: - /// * `Data` contains `load_reloc` + /// * `Data` contains `relocation` lea_pie, /// ops flags: form: @@ -368,7 +368,7 @@ pub const Inst = struct { /// Pseudo-instructions /// call extern function /// Notes: - /// * target of the call is stored as `extern_fn` in `Data` union. + /// * target of the call is stored as `relocation` in `Data` union. call_extern, /// end of prologue @@ -439,15 +439,10 @@ pub const Inst = struct { /// A condition code for use with EFLAGS register. cc: bits.Condition, }, - /// An extern function. - extern_fn: struct { - /// Index of the containing atom. - atom_index: u32, - /// Index into the linker's globals table. - global_index: u32, - }, - /// PIE load relocation. - load_reloc: struct { + /// Relocation for the linker where: + /// * `atom_index` is the index of the source + /// * `sym_index` is the index of the target + relocation: struct { /// Index of the containing atom. atom_index: u32, /// Index into the linker's symbol table. diff --git a/src/link/MachO.zig b/src/link/MachO.zig index b441aaefc1..d4fbd14287 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -153,6 +153,13 @@ rustc_section_size: u64 = 0, locals: std.ArrayListUnmanaged(macho.nlist_64) = .{}, globals: std.StringArrayHashMapUnmanaged(SymbolWithLoc) = .{}, +// FIXME Jakub +// TODO storing index into globals might be dangerous if we delete a global +// while not having everything resolved. Actually, perhaps `unresolved` +// should not be stored at the global scope? Is this possible? +// Otherwise, audit if this can be a problem. +// An alternative, which I still need to investigate for perf reasons is to +// store all global names in an adapted with context strtab. unresolved: std.AutoArrayHashMapUnmanaged(u32, bool) = .{}, locals_free_list: std.ArrayListUnmanaged(u32) = .{}, @@ -2449,9 +2456,9 @@ pub fn createGotAtom(self: *MachO, target: SymbolWithLoc) !*Atom { const target_sym = self.getSymbol(target); if (target_sym.undf()) { - const global_index = @intCast(u32, self.globals.getIndex(self.getSymbolName(target)).?); + const global = self.globals.get(self.getSymbolName(target)).?; try atom.bindings.append(gpa, .{ - .global_index = global_index, + .target = global, .offset = 0, }); } else { @@ -2483,9 +2490,10 @@ pub fn createTlvPtrAtom(self: *MachO, target: SymbolWithLoc) !*Atom { const atom = try MachO.createEmptyAtom(gpa, sym_index, @sizeOf(u64), 3); const target_sym = self.getSymbol(target); assert(target_sym.undf()); - const global_index = @intCast(u32, self.globals.getIndex(self.getSymbolName(target)).?); + + const global = self.globals.get(self.getSymbolName(target)).?; try atom.bindings.append(gpa, .{ - .global_index = global_index, + .target = global, .offset = 0, }); @@ -2739,7 +2747,6 @@ pub fn createStubHelperAtom(self: *MachO) !*Atom { pub fn createLazyPointerAtom(self: *MachO, stub_sym_index: u32, target: SymbolWithLoc) !*Atom { const gpa = self.base.allocator; const sym_index = @intCast(u32, self.locals.items.len); - const global_index = @intCast(u32, self.globals.getIndex(self.getSymbolName(target)).?); try self.locals.append(gpa, .{ .n_strx = 0, .n_type = macho.N_SECT, @@ -2762,8 +2769,10 @@ pub fn createLazyPointerAtom(self: *MachO, stub_sym_index: u32, target: SymbolWi }, }); try atom.rebases.append(gpa, 0); + + const global = self.globals.get(self.getSymbolName(target)).?; try atom.lazy_bindings.append(gpa, .{ - .global_index = global_index, + .target = global, .offset = 0, }); @@ -4149,6 +4158,7 @@ pub fn deleteExport(self: *MachO, exp: Export) void { const sym = self.getSymbolPtr(sym_loc); const sym_name = self.getSymbolName(sym_loc); log.debug("deleting export '{s}'", .{sym_name}); + assert(sym.sect() and sym.ext()); sym.* = .{ .n_strx = 0, .n_type = 0, @@ -5307,7 +5317,9 @@ pub fn getGlobalSymbol(self: *MachO, name: []const u8) !u32 { defer if (gop.found_existing) gpa.free(sym_name); if (gop.found_existing) { - return @intCast(u32, self.globals.getIndex(sym_name).?); + // TODO audit this: can we ever reference anything from outside the Zig module? + assert(gop.value_ptr.file == null); + return gop.value_ptr.sym_index; } const sym_index = @intCast(u32, self.locals.items.len); @@ -5324,7 +5336,7 @@ pub fn getGlobalSymbol(self: *MachO, name: []const u8) !u32 { }; try self.unresolved.putNoClobber(gpa, global_index, true); - return global_index; + return sym_index; } fn getSegmentAllocBase(self: MachO, indices: []const ?u16) struct { vmaddr: u64, fileoff: u64 } { @@ -5690,6 +5702,8 @@ fn updateSectionOrdinals(self: *MachO) !void { } } + // FIXME Jakub + // TODO no need for duping work here; simply walk the atom graph for (self.locals.items) |*sym| { if (sym.undf()) continue; if (sym.n_sect == 0) continue; @@ -5735,11 +5749,12 @@ fn writeDyldInfoData(self: *MachO) !void { log.debug("dyld info for {s},{s}", .{ sect.segName(), sect.sectName() }); while (true) { - log.debug(" ATOM %{d}", .{atom.sym_index}); + log.debug(" ATOM(%{d}, '{s}')", .{ atom.sym_index, atom.getName(self) }); const sym = atom.getSymbol(self); const base_offset = sym.n_value - seg.inner.vmaddr; for (atom.rebases.items) |offset| { + log.debug(" | rebase at {x}", .{base_offset + offset}); try rebase_pointers.append(.{ .offset = base_offset + offset, .segment_id = match.seg, @@ -5747,33 +5762,53 @@ fn writeDyldInfoData(self: *MachO) !void { } for (atom.bindings.items) |binding| { - const global = self.globals.values()[binding.global_index]; - const bind_sym = self.getSymbol(global); + const bind_sym = self.getSymbol(binding.target); + const bind_sym_name = self.getSymbolName(binding.target); + const dylib_ordinal = @divTrunc( + @bitCast(i16, bind_sym.n_desc), + macho.N_SYMBOL_RESOLVER, + ); var flags: u4 = 0; + log.debug(" | bind at {x}, import('{s}') in dylib({d})", .{ + binding.offset + base_offset, + bind_sym_name, + dylib_ordinal, + }); if (bind_sym.weakRef()) { + log.debug(" | marking as weak ref ", .{}); flags |= @truncate(u4, macho.BIND_SYMBOL_FLAGS_WEAK_IMPORT); } try bind_pointers.append(.{ .offset = binding.offset + base_offset, .segment_id = match.seg, - .dylib_ordinal = @divTrunc(@bitCast(i16, bind_sym.n_desc), macho.N_SYMBOL_RESOLVER), - .name = self.getSymbolName(global), + .dylib_ordinal = dylib_ordinal, + .name = bind_sym_name, .bind_flags = flags, }); } for (atom.lazy_bindings.items) |binding| { - const global = self.globals.values()[binding.global_index]; - const bind_sym = self.getSymbol(global); + const bind_sym = self.getSymbol(binding.target); + const bind_sym_name = self.getSymbolName(binding.target); + const dylib_ordinal = @divTrunc( + @bitCast(i16, bind_sym.n_desc), + macho.N_SYMBOL_RESOLVER, + ); var flags: u4 = 0; + log.debug(" | lazy bind at {x} import('{s}') ord({d})", .{ + binding.offset + base_offset, + bind_sym_name, + dylib_ordinal, + }); if (bind_sym.weakRef()) { + log.debug(" | marking as weak ref ", .{}); flags |= @truncate(u4, macho.BIND_SYMBOL_FLAGS_WEAK_IMPORT); } try lazy_bind_pointers.append(.{ .offset = binding.offset + base_offset, .segment_id = match.seg, - .dylib_ordinal = @divTrunc(@bitCast(i16, bind_sym.n_desc), macho.N_SYMBOL_RESOLVER), - .name = self.getSymbolName(global), + .dylib_ordinal = dylib_ordinal, + .name = bind_sym_name, .bind_flags = flags, }); } diff --git a/src/link/MachO/Atom.zig b/src/link/MachO/Atom.zig index ecf2f10149..e5a940bdda 100644 --- a/src/link/MachO/Atom.zig +++ b/src/link/MachO/Atom.zig @@ -71,7 +71,7 @@ dbg_info_atom: Dwarf.Atom, dirty: bool = true, pub const Binding = struct { - global_index: u32, + target: SymbolWithLoc, offset: u64, }; @@ -536,10 +536,8 @@ fn addPtrBindingOrRebase( const gpa = context.macho_file.base.allocator; const sym = context.macho_file.getSymbol(target); if (sym.undf()) { - const sym_name = context.macho_file.getSymbolName(target); - const global_index = @intCast(u32, context.macho_file.globals.getIndex(sym_name).?); try self.bindings.append(gpa, .{ - .global_index = global_index, + .target = target, .offset = @intCast(u32, rel.r_address - context.base_offset), }); } else { From 0da8ba816a14aeb43c0c98adc13943336f9525fa Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sun, 17 Jul 2022 23:48:44 +0200 Subject: [PATCH 16/27] macho: do not store stabs; generate on-the-fly instead --- src/link/MachO.zig | 266 +++++++++++++++++++++++++++++--------- src/link/MachO/Atom.zig | 70 ---------- src/link/MachO/Object.zig | 163 ++--------------------- 3 files changed, 214 insertions(+), 285 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index d4fbd14287..bcfbc4bb1c 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -4,6 +4,7 @@ const std = @import("std"); const build_options = @import("build_options"); const builtin = @import("builtin"); const assert = std.debug.assert; +const dwarf = std.dwarf; const fmt = std.fmt; const fs = std.fs; const log = std.log.scoped(.link); @@ -187,7 +188,6 @@ error_flags: File.ErrorFlags = File.ErrorFlags{}, load_commands_dirty: bool = false, sections_order_dirty: bool = false, has_dices: bool = false, -has_stabs: bool = false, /// A helper var to indicate if we are at the start of the incremental updates, or /// already somewhere further along the update-and-run chain. @@ -725,6 +725,7 @@ fn linkOneShot(self: *MachO, comp: *Compilation, prog_node: *std.Progress.Node) man.hash.add(self.base.options.headerpad_max_install_names); man.hash.add(dead_strip); man.hash.add(self.base.options.dead_strip_dylibs); + man.hash.add(self.base.options.strip); man.hash.addListOfBytes(self.base.options.lib_dirs); man.hash.addListOfBytes(self.base.options.framework_dirs); link.hashAddSystemLibs(&man.hash, self.base.options.frameworks); @@ -1388,9 +1389,15 @@ fn parseObject(self: *MachO, path: []const u8) !bool { const name = try self.base.allocator.dupe(u8, path); errdefer self.base.allocator.free(name); + const mtime: u64 = mtime: { + const stat = file.stat() catch break :mtime 0; + break :mtime @intCast(u64, @divFloor(stat.mtime, 1_000_000_000)); + }; + var object = Object{ .name = name, .file = file, + .mtime = mtime, }; object.parse(self.base.allocator, self.base.options.target) catch |err| switch (err) { @@ -2910,7 +2917,6 @@ fn createTentativeDefAtoms(self: *MachO) !void { try atom.contained.append(gpa, .{ .sym_index = global.sym_index, .offset = 0, - .stab = if (object.debug_info) |_| .static else null, }); try object.managed_atoms.append(gpa, atom); @@ -6188,64 +6194,6 @@ fn writeSymtab(self: *MachO) !void { } for (self.objects.items) |object, object_id| { - if (self.has_stabs) { - if (object.debug_info) |_| { - // Open scope - try locals.ensureUnusedCapacity(3); - locals.appendAssumeCapacity(.{ - .n_strx = try self.strtab.insert(gpa, object.tu_comp_dir.?), - .n_type = macho.N_SO, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }); - locals.appendAssumeCapacity(.{ - .n_strx = try self.strtab.insert(gpa, object.tu_name.?), - .n_type = macho.N_SO, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }); - locals.appendAssumeCapacity(.{ - .n_strx = try self.strtab.insert(gpa, object.name), - .n_type = macho.N_OSO, - .n_sect = 0, - .n_desc = 1, - .n_value = object.mtime orelse 0, - }); - - for (object.managed_atoms.items) |atom| { - for (atom.contained.items) |sym_at_off| { - const stab = sym_at_off.stab orelse continue; - const sym_loc = SymbolWithLoc{ - .sym_index = sym_at_off.sym_index, - .file = atom.file, - }; - const sym = self.getSymbol(sym_loc); - if (sym.n_strx == 0) continue; - if (sym.n_desc == N_DESC_GCED) continue; - if (self.symbolIsTemp(sym_loc)) continue; - - const nlists = try stab.asNlists(.{ - .sym_index = sym_at_off.sym_index, - .file = atom.file, - }, self); - defer gpa.free(nlists); - - try locals.appendSlice(nlists); - } - } - - // Close scope - try locals.append(.{ - .n_strx = 0, - .n_type = macho.N_SO, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }); - } - } for (object.symtab.items) |sym, sym_id| { if (sym.n_strx == 0) continue; // no name, skip if (sym.n_desc == N_DESC_GCED) continue; // GCed, skip @@ -6256,6 +6204,10 @@ fn writeSymtab(self: *MachO) !void { out_sym.n_strx = try self.strtab.insert(gpa, self.getSymbolName(sym_loc)); try locals.append(out_sym); } + + if (!self.base.options.strip) { + try self.generateSymbolStabs(object, &locals); + } } var exports = std.ArrayList(macho.nlist_64).init(gpa); @@ -6663,6 +6615,200 @@ pub fn findFirst(comptime T: type, haystack: []const T, start: usize, predicate: return i; } +const DebugInfo = struct { + inner: dwarf.DwarfInfo, + debug_info: []const u8, + debug_abbrev: []const u8, + debug_str: []const u8, + debug_line: []const u8, + debug_line_str: []const u8, + debug_ranges: []const u8, + + pub fn parse(allocator: Allocator, object: Object) !?DebugInfo { + var debug_info = blk: { + const index = object.dwarf_debug_info_index orelse return null; + break :blk try object.getSectionContents(index); + }; + var debug_abbrev = blk: { + const index = object.dwarf_debug_abbrev_index orelse return null; + break :blk try object.getSectionContents(index); + }; + var debug_str = blk: { + const index = object.dwarf_debug_str_index orelse return null; + break :blk try object.getSectionContents(index); + }; + var debug_line = blk: { + const index = object.dwarf_debug_line_index orelse return null; + break :blk try object.getSectionContents(index); + }; + var debug_line_str = blk: { + if (object.dwarf_debug_line_str_index) |ind| { + break :blk try object.getSectionContents(ind); + } + break :blk &[0]u8{}; + }; + var debug_ranges = blk: { + if (object.dwarf_debug_ranges_index) |ind| { + break :blk try object.getSectionContents(ind); + } + break :blk &[0]u8{}; + }; + + var inner: dwarf.DwarfInfo = .{ + .endian = .Little, + .debug_info = debug_info, + .debug_abbrev = debug_abbrev, + .debug_str = debug_str, + .debug_line = debug_line, + .debug_line_str = debug_line_str, + .debug_ranges = debug_ranges, + }; + try dwarf.openDwarfDebugInfo(&inner, allocator); + + return DebugInfo{ + .inner = inner, + .debug_info = debug_info, + .debug_abbrev = debug_abbrev, + .debug_str = debug_str, + .debug_line = debug_line, + .debug_line_str = debug_line_str, + .debug_ranges = debug_ranges, + }; + } + + pub fn deinit(self: *DebugInfo, allocator: Allocator) void { + self.inner.deinit(allocator); + } +}; + +pub fn generateSymbolStabs( + self: *MachO, + object: Object, + locals: *std.ArrayList(macho.nlist_64), +) !void { + assert(!self.base.options.strip); + + const gpa = self.base.allocator; + + log.debug("parsing debug info in '{s}'", .{object.name}); + + var debug_info = (try DebugInfo.parse(gpa, object)) orelse return; + + // We assume there is only one CU. + const compile_unit = debug_info.inner.findCompileUnit(0x0) catch |err| switch (err) { + error.MissingDebugInfo => { + // TODO audit cases with missing debug info and audit our dwarf.zig module. + log.debug("invalid or missing debug info in {s}; skipping", .{object.name}); + return; + }, + else => |e| return e, + }; + const tu_name = try compile_unit.die.getAttrString(&debug_info.inner, dwarf.AT.name); + const tu_comp_dir = try compile_unit.die.getAttrString(&debug_info.inner, dwarf.AT.comp_dir); + const source_symtab = object.getSourceSymtab(); + + // Open scope + try locals.ensureUnusedCapacity(3); + locals.appendAssumeCapacity(.{ + .n_strx = try self.strtab.insert(gpa, tu_comp_dir), + .n_type = macho.N_SO, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }); + locals.appendAssumeCapacity(.{ + .n_strx = try self.strtab.insert(gpa, tu_name), + .n_type = macho.N_SO, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }); + locals.appendAssumeCapacity(.{ + .n_strx = try self.strtab.insert(gpa, object.name), + .n_type = macho.N_OSO, + .n_sect = 0, + .n_desc = 1, + .n_value = object.mtime, + }); + + for (object.managed_atoms.items) |atom| { + for (atom.contained.items) |sym_at_off| { + const sym_loc = SymbolWithLoc{ + .sym_index = sym_at_off.sym_index, + .file = atom.file, + }; + const sym = self.getSymbol(sym_loc); + const sym_name = self.getSymbolName(sym_loc); + if (sym.n_strx == 0) continue; + if (sym.n_desc == N_DESC_GCED) continue; + if (self.symbolIsTemp(sym_loc)) continue; + if (sym_at_off.sym_index >= source_symtab.len) continue; // synthetic, linker generated + + const source_sym = source_symtab[sym_at_off.sym_index]; + const size: ?u64 = size: { + if (source_sym.tentative()) break :size null; + for (debug_info.inner.func_list.items) |func| { + if (func.pc_range) |range| { + if (source_sym.n_value >= range.start and source_sym.n_value < range.end) { + break :size range.end - range.start; + } + } + } + break :size null; + }; + + if (size) |ss| { + try locals.ensureUnusedCapacity(4); + locals.appendAssumeCapacity(.{ + .n_strx = 0, + .n_type = macho.N_BNSYM, + .n_sect = sym.n_sect, + .n_desc = 0, + .n_value = sym.n_value, + }); + locals.appendAssumeCapacity(.{ + .n_strx = try self.strtab.insert(gpa, sym_name), + .n_type = macho.N_FUN, + .n_sect = sym.n_sect, + .n_desc = 0, + .n_value = sym.n_value, + }); + locals.appendAssumeCapacity(.{ + .n_strx = 0, + .n_type = macho.N_FUN, + .n_sect = 0, + .n_desc = 0, + .n_value = ss, + }); + locals.appendAssumeCapacity(.{ + .n_strx = 0, + .n_type = macho.N_ENSYM, + .n_sect = sym.n_sect, + .n_desc = 0, + .n_value = ss, + }); + } else { + try locals.append(.{ + .n_strx = try self.strtab.insert(gpa, sym_name), + .n_type = macho.N_STSYM, + .n_sect = sym.n_sect, + .n_desc = 0, + .n_value = sym.n_value, + }); + } + } + } + + // Close scope + try locals.append(.{ + .n_strx = 0, + .n_type = macho.N_SO, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }); +} + fn snapshotState(self: *MachO) !void { const emit = self.base.options.emit orelse { log.debug("no emit directory found; skipping snapshot...", .{}); diff --git a/src/link/MachO/Atom.zig b/src/link/MachO/Atom.zig index e5a940bdda..7aa4e1093a 100644 --- a/src/link/MachO/Atom.zig +++ b/src/link/MachO/Atom.zig @@ -78,76 +78,6 @@ pub const Binding = struct { pub const SymbolAtOffset = struct { sym_index: u32, offset: u64, - stab: ?Stab = null, -}; - -pub const Stab = union(enum) { - function: u64, - static, - global, - - pub fn asNlists(stab: Stab, sym_loc: SymbolWithLoc, macho_file: *MachO) ![]macho.nlist_64 { - const gpa = macho_file.base.allocator; - - var nlists = std.ArrayList(macho.nlist_64).init(gpa); - defer nlists.deinit(); - - const sym = macho_file.getSymbol(sym_loc); - const sym_name = macho_file.getSymbolName(sym_loc); - switch (stab) { - .function => |size| { - try nlists.ensureUnusedCapacity(4); - nlists.appendAssumeCapacity(.{ - .n_strx = 0, - .n_type = macho.N_BNSYM, - .n_sect = sym.n_sect, - .n_desc = 0, - .n_value = sym.n_value, - }); - nlists.appendAssumeCapacity(.{ - .n_strx = try macho_file.strtab.insert(gpa, sym_name), - .n_type = macho.N_FUN, - .n_sect = sym.n_sect, - .n_desc = 0, - .n_value = sym.n_value, - }); - nlists.appendAssumeCapacity(.{ - .n_strx = 0, - .n_type = macho.N_FUN, - .n_sect = 0, - .n_desc = 0, - .n_value = size, - }); - nlists.appendAssumeCapacity(.{ - .n_strx = 0, - .n_type = macho.N_ENSYM, - .n_sect = sym.n_sect, - .n_desc = 0, - .n_value = size, - }); - }, - .global => { - try nlists.append(.{ - .n_strx = try macho_file.strtab.insert(gpa, sym_name), - .n_type = macho.N_GSYM, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }); - }, - .static => { - try nlists.append(.{ - .n_strx = try macho_file.strtab.insert(gpa, sym_name), - .n_type = macho.N_STSYM, - .n_sect = sym.n_sect, - .n_desc = 0, - .n_value = sym.n_value, - }); - }, - } - - return nlists.toOwnedSlice(); - } }; pub const Relocation = struct { diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index 82d872f68c..2901b54087 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -3,7 +3,6 @@ const Object = @This(); const std = @import("std"); const build_options = @import("build_options"); const assert = std.debug.assert; -const dwarf = std.dwarf; const fs = std.fs; const io = std.io; const log = std.log.scoped(.link); @@ -17,9 +16,11 @@ const Allocator = mem.Allocator; const Atom = @import("Atom.zig"); const MachO = @import("../MachO.zig"); const MatchingSection = MachO.MatchingSection; +const SymbolWithLoc = MachO.SymbolWithLoc; file: fs.File, name: []const u8, +mtime: u64, /// Data contents of the file. Includes sections, and data of load commands. /// Excludes the backing memory for the header and load commands. @@ -51,12 +52,6 @@ symtab: std.ArrayListUnmanaged(macho.nlist_64) = .{}, strtab: []const u8 = &.{}, data_in_code_entries: []const macho.data_in_code_entry = &.{}, -// Debug info -debug_info: ?DebugInfo = null, -tu_name: ?[]const u8 = null, -tu_comp_dir: ?[]const u8 = null, -mtime: ?u64 = null, - sections_as_symbols: std.AutoHashMapUnmanaged(u16, u32) = .{}, /// List of atoms that map to the symbols parsed from this object file. @@ -65,72 +60,6 @@ managed_atoms: std.ArrayListUnmanaged(*Atom) = .{}, /// Table of atoms belonging to this object file indexed by the symbol index. atom_by_index_table: std.AutoHashMapUnmanaged(u32, *Atom) = .{}, -const DebugInfo = struct { - inner: dwarf.DwarfInfo, - debug_info: []const u8, - debug_abbrev: []const u8, - debug_str: []const u8, - debug_line: []const u8, - debug_line_str: []const u8, - debug_ranges: []const u8, - - pub fn parseFromObject(allocator: Allocator, object: *const Object) !?DebugInfo { - var debug_info = blk: { - const index = object.dwarf_debug_info_index orelse return null; - break :blk try object.getSectionContents(index); - }; - var debug_abbrev = blk: { - const index = object.dwarf_debug_abbrev_index orelse return null; - break :blk try object.getSectionContents(index); - }; - var debug_str = blk: { - const index = object.dwarf_debug_str_index orelse return null; - break :blk try object.getSectionContents(index); - }; - var debug_line = blk: { - const index = object.dwarf_debug_line_index orelse return null; - break :blk try object.getSectionContents(index); - }; - var debug_line_str = blk: { - if (object.dwarf_debug_line_str_index) |ind| { - break :blk try object.getSectionContents(ind); - } - break :blk &[0]u8{}; - }; - var debug_ranges = blk: { - if (object.dwarf_debug_ranges_index) |ind| { - break :blk try object.getSectionContents(ind); - } - break :blk &[0]u8{}; - }; - - var inner: dwarf.DwarfInfo = .{ - .endian = .Little, - .debug_info = debug_info, - .debug_abbrev = debug_abbrev, - .debug_str = debug_str, - .debug_line = debug_line, - .debug_line_str = debug_line_str, - .debug_ranges = debug_ranges, - }; - try dwarf.openDwarfDebugInfo(&inner, allocator); - - return DebugInfo{ - .inner = inner, - .debug_info = debug_info, - .debug_abbrev = debug_abbrev, - .debug_str = debug_str, - .debug_line = debug_line, - .debug_line_str = debug_line_str, - .debug_ranges = debug_ranges, - }; - } - - pub fn deinit(self: *DebugInfo, allocator: Allocator) void { - self.inner.deinit(allocator); - } -}; - pub fn deinit(self: *Object, gpa: Allocator) void { for (self.load_commands.items) |*lc| { lc.deinit(gpa); @@ -147,10 +76,6 @@ pub fn deinit(self: *Object, gpa: Allocator) void { self.managed_atoms.deinit(gpa); gpa.free(self.name); - - if (self.debug_info) |*db| { - db.deinit(gpa); - } } pub fn parse(self: *Object, allocator: Allocator, target: std.Target) !void { @@ -253,7 +178,6 @@ pub fn parse(self: *Object, allocator: Allocator, target: std.Target) !void { try self.parseSymtab(allocator); self.parseDataInCode(); - try self.parseDebugInfo(allocator); } const Context = struct { @@ -462,7 +386,6 @@ pub fn splitIntoAtomsOneShot( } break :blk false; }; - macho_file.has_stabs = macho_file.has_stabs or self.debug_info != null; if (subsections_via_symbols and filtered_syms.len > 0) { // If the first nlist does not match the start of the section, @@ -566,7 +489,6 @@ pub fn splitIntoAtomsOneShot( try atom.contained.append(gpa, .{ .sym_index = alias, .offset = 0, - .stab = null, }); try self.atom_by_index_table.put(gpa, alias, atom); } @@ -671,54 +593,17 @@ fn createAtomFromSubsection( // we can properly allocate addresses down the line. // While we're at it, we need to update segment,section mapping of each symbol too. try atom.contained.ensureTotalCapacity(gpa, indexes.len + 1); - - { - const stab: ?Atom.Stab = if (self.debug_info) |di| blk: { - // TODO there has to be a better to handle this. - for (di.inner.func_list.items) |func| { - if (func.pc_range) |range| { - if (sym.n_value >= range.start and sym.n_value < range.end) { - break :blk Atom.Stab{ - .function = range.end - range.start, - }; - } - } - } - // TODO - // if (zld.globals.contains(zld.getString(sym.strx))) break :blk .global; - break :blk .static; - } else null; - - atom.contained.appendAssumeCapacity(.{ - .sym_index = sym_index, - .offset = 0, - .stab = stab, - }); - } + atom.contained.appendAssumeCapacity(.{ + .sym_index = sym_index, + .offset = 0, + }); for (indexes) |inner_sym_index| { const inner_sym = &self.symtab.items[inner_sym_index.index]; inner_sym.n_sect = macho_file.getSectionOrdinal(match); - - const stab: ?Atom.Stab = if (self.debug_info) |di| blk: { - // TODO there has to be a better to handle this. - for (di.inner.func_list.items) |func| { - if (func.pc_range) |range| { - if (inner_sym.n_value >= range.start and inner_sym.n_value < range.end) { - break :blk Atom.Stab{ - .function = range.end - range.start, - }; - } - } - } - // TODO - // if (zld.globals.contains(zld.getString(sym.strx))) break :blk .global; - break :blk .static; - } else null; atom.contained.appendAssumeCapacity(.{ .sym_index = inner_sym_index.index, .offset = inner_sym.n_value - sym.n_value, - .stab = stab, }); try self.atom_by_index_table.putNoClobber(gpa, inner_sym_index.index, atom); @@ -755,7 +640,7 @@ fn parseSymtab(self: *Object, allocator: Allocator) !void { self.strtab = self.contents[symtab.stroff..][0..symtab.strsize]; } -fn getSourceSymtab(self: *Object) []const macho.nlist_64 { +pub fn getSourceSymtab(self: Object) []const macho.nlist_64 { const index = self.symtab_cmd_index orelse return &[0]macho.nlist_64{}; const symtab = self.load_commands.items[index].symtab; const symtab_size = @sizeOf(macho.nlist_64) * symtab.nsyms; @@ -766,38 +651,6 @@ fn getSourceSymtab(self: *Object) []const macho.nlist_64 { ); } -fn parseDebugInfo(self: *Object, allocator: Allocator) !void { - log.debug("parsing debug info in '{s}'", .{self.name}); - - var debug_info = blk: { - var di = try DebugInfo.parseFromObject(allocator, self); - break :blk di orelse return; - }; - - // We assume there is only one CU. - const compile_unit = debug_info.inner.findCompileUnit(0x0) catch |err| switch (err) { - error.MissingDebugInfo => { - // TODO audit cases with missing debug info and audit our dwarf.zig module. - log.debug("invalid or missing debug info in {s}; skipping", .{self.name}); - return; - }, - else => |e| return e, - }; - const name = try compile_unit.die.getAttrString(&debug_info.inner, dwarf.AT.name); - const comp_dir = try compile_unit.die.getAttrString(&debug_info.inner, dwarf.AT.comp_dir); - - self.debug_info = debug_info; - self.tu_name = name; - self.tu_comp_dir = comp_dir; - - if (self.mtime == null) { - self.mtime = mtime: { - const stat = self.file.stat() catch break :mtime 0; - break :mtime @intCast(u64, @divFloor(stat.mtime, 1_000_000_000)); - }; - } -} - fn parseDataInCode(self: *Object) void { const index = self.data_in_code_cmd_index orelse return; const data_in_code = self.load_commands.items[index].linkedit_data; @@ -808,7 +661,7 @@ fn parseDataInCode(self: *Object) void { ); } -fn getSectionContents(self: Object, sect_id: u16) error{Overflow}![]const u8 { +pub fn getSectionContents(self: Object, sect_id: u16) error{Overflow}![]const u8 { const sect = self.getSection(sect_id); const size = math.cast(usize, sect.size) orelse return error.Overflow; log.debug("getting {s},{s} data at 0x{x} - 0x{x}", .{ From 0f1b5d45bc9df17fab5c42c22e816797b555be5a Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sun, 17 Jul 2022 23:52:32 +0200 Subject: [PATCH 17/27] macho: mark __mh_execute_header as ref'd dynamically --- src/link/MachO.zig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index bcfbc4bb1c..d96dc83d42 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -2942,7 +2942,7 @@ fn createMhExecuteHeaderSymbol(self: *MachO) !void { .n_strx = n_strx, .n_type = macho.N_SECT | macho.N_EXT, .n_sect = 0, - .n_desc = 0, + .n_desc = macho.REFERENCED_DYNAMICALLY, .n_value = 0, }); From 2dfc78dc0369052033c8469e00bf599e12e73f52 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Mon, 18 Jul 2022 00:25:28 +0200 Subject: [PATCH 18/27] macho: limit export info to entrypoint and mh symbol when executable --- src/link/MachO.zig | 85 +++++++++++++++++++++++++++++++++------------- 1 file changed, 61 insertions(+), 24 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index d96dc83d42..1fb0981dc4 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -3289,9 +3289,9 @@ fn setEntryPoint(self: *MachO) !void { if (self.base.options.output_mode != .Exe) return; const seg = self.load_commands.items[self.text_segment_cmd_index.?].segment; - const entry_name = self.base.options.entry orelse "_main"; - const global = self.globals.get(entry_name) orelse { - log.err("entrypoint '{s}' not found", .{entry_name}); + const global = self.getEntryPoint() orelse { + const name = self.base.options.entry orelse "_main"; + log.err("entrypoint '{s}' not found", .{name}); return error.MissingMainEntrypoint; }; const sym = self.getSymbol(global); @@ -5492,15 +5492,27 @@ fn gcAtoms(self: *MachO, gc_roots: *std.AutoHashMap(*Atom, void)) !void { const gpa = self.base.allocator; - // Add all exports as GC roots - for (self.globals.values()) |global| { - const sym = self.getSymbol(global); - if (!sym.sect()) continue; - const gc_root = self.getAtomForSymbol(global) orelse { - log.debug("skipping {s}", .{self.getSymbolName(global)}); - continue; - }; - _ = try gc_roots.getOrPut(gc_root); + if (self.base.options.output_mode == .Exe) { + // Add entrypoint as GC root + if (self.getEntryPoint()) |global| { + if (self.getAtomForSymbol(global)) |gc_root| { + _ = try gc_roots.getOrPut(gc_root); + } else { + log.debug("skipping {s}", .{self.getSymbolName(global)}); + } + } + } else { + assert(self.base.options.output_mode == .Lib); + // Add exports as GC roots + for (self.globals.values()) |global| { + const sym = self.getSymbol(global); + if (!sym.sect()) continue; + const gc_root = self.getAtomForSymbol(global) orelse { + log.debug("skipping {s}", .{self.getSymbolName(global)}); + continue; + }; + _ = try gc_roots.getOrPut(gc_root); + } } // Add any atom targeting an import as GC root @@ -5836,19 +5848,37 @@ fn writeDyldInfoData(self: *MachO) !void { const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].segment; const base_address = text_segment.inner.vmaddr; - for (self.globals.values()) |global| { - const sym = self.getSymbol(global); - if (sym.undf()) continue; - if (!sym.ext()) continue; - if (sym.n_desc == N_DESC_GCED) continue; - const sym_name = self.getSymbolName(global); - log.debug(" (putting '{s}' defined at 0x{x})", .{ sym_name, sym.n_value }); + if (self.base.options.output_mode == .Exe) { + for (&[_]SymbolWithLoc{ + self.getEntryPoint().?, // We would already errored out if no entrypoint was found. + self.globals.get("__mh_execute_header").?, + }) |global| { + const sym = self.getSymbol(global); + const sym_name = self.getSymbolName(global); + log.debug(" (putting '{s}' defined at 0x{x})", .{ sym_name, sym.n_value }); + try trie.put(gpa, .{ + .name = sym_name, + .vmaddr_offset = sym.n_value - base_address, + .export_flags = macho.EXPORT_SYMBOL_FLAGS_KIND_REGULAR, + }); + } + } else { + assert(self.base.options.output_mode == .Lib); + for (self.globals.values()) |global| { + const sym = self.getSymbol(global); - try trie.put(gpa, .{ - .name = sym_name, - .vmaddr_offset = sym.n_value - base_address, - .export_flags = macho.EXPORT_SYMBOL_FLAGS_KIND_REGULAR, - }); + if (sym.undf()) continue; + if (!sym.ext()) continue; + if (sym.n_desc == N_DESC_GCED) continue; + + const sym_name = self.getSymbolName(global); + log.debug(" (putting '{s}' defined at 0x{x})", .{ sym_name, sym.n_value }); + try trie.put(gpa, .{ + .name = sym_name, + .vmaddr_offset = sym.n_value - base_address, + .export_flags = macho.EXPORT_SYMBOL_FLAGS_KIND_REGULAR, + }); + } } try trie.finalize(gpa); @@ -6602,6 +6632,13 @@ pub fn getTlvPtrAtomForSymbol(self: *MachO, sym_with_loc: SymbolWithLoc) ?*Atom return self.tlv_ptr_entries.items[tlv_ptr_index].atom; } +/// Returns symbol location corresponding to the set entrypoint. +/// Asserts output mode is executable. +pub fn getEntryPoint(self: MachO) ?SymbolWithLoc { + const entry_name = self.base.options.entry orelse "_main"; + return self.globals.get(entry_name); +} + pub fn findFirst(comptime T: type, haystack: []const T, start: usize, predicate: anytype) usize { if (!@hasDecl(@TypeOf(predicate), "predicate")) @compileError("Predicate is required to define fn predicate(@This(), T) bool"); From 2c184f9a5fc78be4f38cc74106c203b7bc80deb4 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Mon, 18 Jul 2022 00:43:11 +0200 Subject: [PATCH 19/27] link-tests: add checkNotPresent and add -dead_strip smoke test `checkNotPresent` is the inverse of `checkNext` - if the phrase is found in the output, then it fails the test. --- lib/std/build/CheckObjectStep.zig | 35 ++++++++++++++++++-- test/link.zig | 4 +++ test/link/macho/dead_strip/build.zig | 49 ++++++++++++++++++++++++++++ test/link/macho/dead_strip/main.c | 14 ++++++++ 4 files changed, 100 insertions(+), 2 deletions(-) create mode 100644 test/link/macho/dead_strip/build.zig create mode 100644 test/link/macho/dead_strip/main.c diff --git a/lib/std/build/CheckObjectStep.zig b/lib/std/build/CheckObjectStep.zig index cb91f883c9..b807e1de45 100644 --- a/lib/std/build/CheckObjectStep.zig +++ b/lib/std/build/CheckObjectStep.zig @@ -50,7 +50,7 @@ pub fn create(builder: *Builder, source: build.FileSource, obj_format: std.Targe /// For example, if the two extracted values were saved as `vmaddr` and `entryoff` respectively /// they could then be added with this simple program `vmaddr entryoff +`. const Action = struct { - tag: enum { match, compute_cmp }, + tag: enum { match, not_present, compute_cmp }, phrase: []const u8, expected: ?ComputeCompareExpected = null, @@ -63,7 +63,7 @@ const Action = struct { /// name {*}libobjc{*}.dylib => will match `name` followed by a token which contains `libobjc` and `.dylib` /// in that order with other letters in between fn match(act: Action, haystack: []const u8, global_vars: anytype) !bool { - assert(act.tag == .match); + assert(act.tag == .match or act.tag == .not_present); var candidate_var: ?struct { name: []const u8, value: u64 } = null; var hay_it = mem.tokenize(u8, mem.trim(u8, haystack, " "), " "); @@ -202,6 +202,13 @@ const Check = struct { }) catch unreachable; } + fn notPresent(self: *Check, phrase: []const u8) void { + self.actions.append(.{ + .tag = .not_present, + .phrase = self.builder.dupe(phrase), + }) catch unreachable; + } + fn computeCmp(self: *Check, phrase: []const u8, expected: ComputeCompareExpected) void { self.actions.append(.{ .tag = .compute_cmp, @@ -226,6 +233,15 @@ pub fn checkNext(self: *CheckObjectStep, phrase: []const u8) void { last.match(phrase); } +/// Adds another searched phrase to the latest created Check with `CheckObjectStep.checkStart(...)` +/// however ensures there is no matching phrase in the output. +/// Asserts at least one check already exists. +pub fn checkNotPresent(self: *CheckObjectStep, phrase: []const u8) void { + assert(self.checks.items.len > 0); + const last = &self.checks.items[self.checks.items.len - 1]; + last.notPresent(phrase); +} + /// Creates a new check checking specifically symbol table parsed and dumped from the object /// file. /// Issuing this check will force parsing and dumping of the symbol table. @@ -293,6 +309,21 @@ fn make(step: *Step) !void { return error.TestFailed; } }, + .not_present => { + while (it.next()) |line| { + if (try act.match(line, &vars)) { + std.debug.print( + \\ + \\========= Expected not to find: =================== + \\{s} + \\========= But parsed file does contain it: ======== + \\{s} + \\ + , .{ act.phrase, output }); + return error.TestFailed; + } + } + }, .compute_cmp => { const res = act.computeCmp(gpa, vars) catch |err| switch (err) { error.UnknownVariable => { diff --git a/test/link.zig b/test/link.zig index f7bd70fa66..afab1852eb 100644 --- a/test/link.zig +++ b/test/link.zig @@ -60,6 +60,10 @@ pub fn addCases(cases: *tests.StandaloneContext) void { .build_modes = true, }); + cases.addBuildFile("test/link/macho/dead_strip/build.zig", .{ + .build_modes = false, + }); + cases.addBuildFile("test/link/macho/dead_strip_dylibs/build.zig", .{ .build_modes = true, .requires_macos_sdk = true, diff --git a/test/link/macho/dead_strip/build.zig b/test/link/macho/dead_strip/build.zig new file mode 100644 index 0000000000..5b063308b5 --- /dev/null +++ b/test/link/macho/dead_strip/build.zig @@ -0,0 +1,49 @@ +const std = @import("std"); +const Builder = std.build.Builder; +const LibExeObjectStep = std.build.LibExeObjStep; + +pub fn build(b: *Builder) void { + const mode = b.standardReleaseOptions(); + + const test_step = b.step("test", "Test the program"); + test_step.dependOn(b.getInstallStep()); + + { + // Without -dead_strip, we expect `iAmUnused` symbol present + const exe = createScenario(b, mode); + + const check = exe.checkObject(.macho); + check.checkInSymtab(); + check.checkNext("{*} (__TEXT,__text) external _iAmUnused"); + + test_step.dependOn(&check.step); + + const run_cmd = exe.run(); + run_cmd.expectStdOutEqual("Hello!\n"); + test_step.dependOn(&run_cmd.step); + } + + { + // With -dead_strip, no `iAmUnused` symbol should be present + const exe = createScenario(b, mode); + exe.link_gc_sections = true; + + const check = exe.checkObject(.macho); + check.checkInSymtab(); + check.checkNotPresent("{*} (__TEXT,__text) external _iAmUnused"); + + test_step.dependOn(&check.step); + + const run_cmd = exe.run(); + run_cmd.expectStdOutEqual("Hello!\n"); + test_step.dependOn(&run_cmd.step); + } +} + +fn createScenario(b: *Builder, mode: std.builtin.Mode) *LibExeObjectStep { + const exe = b.addExecutable("test", null); + exe.addCSourceFile("main.c", &[0][]const u8{}); + exe.setBuildMode(mode); + exe.linkLibC(); + return exe; +} diff --git a/test/link/macho/dead_strip/main.c b/test/link/macho/dead_strip/main.c new file mode 100644 index 0000000000..4756e2ca13 --- /dev/null +++ b/test/link/macho/dead_strip/main.c @@ -0,0 +1,14 @@ +#include + +void printMe() { + printf("Hello!\n"); +} + +int main(int argc, char* argv[]) { + printMe(); + return 0; +} + +void iAmUnused() { + printf("YOU SHALL NOT PASS!\n"); +} From a089a6dc4ff04a10360019185ecaacd0564eb84c Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Mon, 18 Jul 2022 12:03:06 +0200 Subject: [PATCH 20/27] macho: parse data-in-code when writing LINKEDIT segment --- src/link/MachO.zig | 88 +++++++++++++++++++++++---------------- src/link/MachO/Atom.zig | 5 --- src/link/MachO/Object.zig | 48 ++------------------- 3 files changed, 56 insertions(+), 85 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 1fb0981dc4..165ff07521 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -187,7 +187,6 @@ error_flags: File.ErrorFlags = File.ErrorFlags{}, load_commands_dirty: bool = false, sections_order_dirty: bool = false, -has_dices: bool = false, /// A helper var to indicate if we are at the start of the incremental updates, or /// already somewhere further along the update-and-run chain. @@ -6139,55 +6138,74 @@ fn writeFunctionStarts(self: *MachO) !void { self.load_commands_dirty = true; } -fn writeDices(self: *MachO) !void { - if (!self.has_dices) return; +fn filterDataInCode( + dices: []const macho.data_in_code_entry, + start_addr: u64, + end_addr: u64, +) []const macho.data_in_code_entry { + const Predicate = struct { + addr: u64, + pub fn predicate(self: @This(), dice: macho.data_in_code_entry) bool { + return dice.offset >= self.addr; + } + }; + + const start = MachO.findFirst(macho.data_in_code_entry, dices, 0, Predicate{ .addr = start_addr }); + const end = MachO.findFirst(macho.data_in_code_entry, dices, start, Predicate{ .addr = end_addr }); + + return dices[start..end]; +} + +fn writeDataInCode(self: *MachO) !void { const tracy = trace(@src()); defer tracy.end(); - var buf = std.ArrayList(u8).init(self.base.allocator); - defer buf.deinit(); - - var atom: *Atom = self.atoms.get(.{ - .seg = self.text_segment_cmd_index orelse return, - .sect = self.text_section_index orelse return, - }) orelse return; - - while (atom.prev) |prev| { - atom = prev; - } + var out_dice = std.ArrayList(macho.data_in_code_entry).init(self.base.allocator); + defer out_dice.deinit(); const text_sect = self.getSection(.{ - .seg = self.text_segment_cmd_index.?, - .sect = self.text_section_index.?, + .seg = self.text_segment_cmd_index orelse return, + .sect = self.text_section_index orelse return, }); - while (true) { - if (atom.dices.items.len > 0) { - const sym = atom.getSymbol(self); - const base_off = math.cast(u32, sym.n_value - text_sect.addr + text_sect.offset) orelse return error.Overflow; + for (self.objects.items) |object| { + const dice = object.parseDataInCode() orelse continue; + const source_symtab = object.getSourceSymtab(); + try out_dice.ensureUnusedCapacity(dice.len); - try buf.ensureUnusedCapacity(atom.dices.items.len * @sizeOf(macho.data_in_code_entry)); - for (atom.dices.items) |dice| { - const rebased_dice = macho.data_in_code_entry{ - .offset = base_off + dice.offset, - .length = dice.length, - .kind = dice.kind, - }; - buf.appendSliceAssumeCapacity(mem.asBytes(&rebased_dice)); + for (object.managed_atoms.items) |atom| { + const sym = atom.getSymbol(self); + if (sym.n_desc == N_DESC_GCED) continue; + if (atom.sym_index >= source_symtab.len) continue; // synthetic, linker generated + + const match = self.getMatchingSectionFromOrdinal(sym.n_sect); + if (match.seg != self.text_segment_cmd_index.? and match.sect != self.text_section_index.?) { + continue; + } + + const source_sym = source_symtab[atom.sym_index]; + const source_addr = math.cast(u32, source_sym.n_value) orelse return error.Overflow; + const filtered_dice = filterDataInCode(dice, source_addr, source_addr + atom.size); + const base = math.cast(u32, sym.n_value - text_sect.addr + text_sect.offset) orelse + return error.Overflow; + + for (filtered_dice) |single| { + const offset = single.offset - source_addr + base; + out_dice.appendAssumeCapacity(.{ + .offset = offset, + .length = single.length, + .kind = single.kind, + }); } } - - if (atom.next) |next| { - atom = next; - } else break; } const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].segment; const dice_cmd = &self.load_commands.items[self.data_in_code_cmd_index.?].linkedit_data; const dataoff = mem.alignForwardGeneric(u64, seg.inner.fileoff + seg.inner.filesize, @alignOf(u64)); - const datasize = buf.items.len; + const datasize = out_dice.items.len * @sizeOf(macho.data_in_code_entry); dice_cmd.dataoff = @intCast(u32, dataoff); dice_cmd.datasize = @intCast(u32, datasize); seg.inner.filesize = dice_cmd.dataoff + dice_cmd.datasize - seg.inner.fileoff; @@ -6197,7 +6215,7 @@ fn writeDices(self: *MachO) !void { dice_cmd.dataoff + dice_cmd.datasize, }); - try self.base.file.?.pwriteAll(buf.items, dice_cmd.dataoff); + try self.base.file.?.pwriteAll(mem.sliceAsBytes(out_dice.items), dice_cmd.dataoff); self.load_commands_dirty = true; } @@ -6392,7 +6410,7 @@ fn writeLinkeditSegment(self: *MachO) !void { try self.writeDyldInfoData(); try self.writeFunctionStarts(); - try self.writeDices(); + try self.writeDataInCode(); try self.writeSymtab(); try self.writeStrtab(); diff --git a/src/link/MachO/Atom.zig b/src/link/MachO/Atom.zig index 7aa4e1093a..2db680889a 100644 --- a/src/link/MachO/Atom.zig +++ b/src/link/MachO/Atom.zig @@ -59,9 +59,6 @@ bindings: std.ArrayListUnmanaged(Binding) = .{}, /// List of lazy bindings (cf bindings above). lazy_bindings: std.ArrayListUnmanaged(Binding) = .{}, -/// List of data-in-code entries. This is currently specific to x86_64 only. -dices: std.ArrayListUnmanaged(macho.data_in_code_entry) = .{}, - /// Points to the previous and next neighbours next: ?*Atom, prev: ?*Atom, @@ -147,7 +144,6 @@ pub const empty = Atom{ }; pub fn deinit(self: *Atom, allocator: Allocator) void { - self.dices.deinit(allocator); self.lazy_bindings.deinit(allocator); self.bindings.deinit(allocator); self.rebases.deinit(allocator); @@ -157,7 +153,6 @@ pub fn deinit(self: *Atom, allocator: Allocator) void { } pub fn clearRetainingCapacity(self: *Atom) void { - self.dices.clearRetainingCapacity(); self.lazy_bindings.clearRetainingCapacity(); self.bindings.clearRetainingCapacity(); self.rebases.clearRetainingCapacity(); diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index 2901b54087..07237d31aa 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -177,7 +177,6 @@ pub fn parse(self: *Object, allocator: Allocator, target: std.Target) !void { } try self.parseSymtab(allocator); - self.parseDataInCode(); } const Context = struct { @@ -264,25 +263,6 @@ fn filterRelocs( return relocs[start..end]; } -fn filterDice( - dices: []const macho.data_in_code_entry, - start_addr: u64, - end_addr: u64, -) []const macho.data_in_code_entry { - const Predicate = struct { - addr: u64, - - pub fn predicate(self: @This(), dice: macho.data_in_code_entry) bool { - return dice.offset >= self.addr; - } - }; - - const start = MachO.findFirst(macho.data_in_code_entry, dices, 0, Predicate{ .addr = start_addr }); - const end = MachO.findFirst(macho.data_in_code_entry, dices, start, Predicate{ .addr = end_addr }); - - return dices[start..end]; -} - /// Splits object into atoms assuming one-shot linking mode. pub fn splitIntoAtomsOneShot( self: *Object, @@ -378,15 +358,6 @@ pub fn splitIntoAtomsOneShot( context, ); - macho_file.has_dices = macho_file.has_dices or blk: { - if (self.text_section_index) |index| { - if (index != id) break :blk false; - if (self.data_in_code_entries.len == 0) break :blk false; - break :blk true; - } - break :blk false; - }; - if (subsections_via_symbols and filtered_syms.len > 0) { // If the first nlist does not match the start of the section, // then we need to encapsulate the memory range [section start, first symbol) @@ -574,19 +545,6 @@ fn createAtomFromSubsection( .base_offset = @intCast(i32, base_offset), }); - if (macho_file.has_dices) { - const dices = filterDice(self.data_in_code_entries, sym.n_value, sym.n_value + size); - try atom.dices.ensureTotalCapacity(gpa, dices.len); - - for (dices) |dice| { - atom.dices.appendAssumeCapacity(.{ - .offset = dice.offset - (math.cast(u32, sym.n_value) orelse return error.Overflow), - .length = dice.length, - .kind = dice.kind, - }); - } - } - // Since this is atom gets a helper local temporary symbol that didn't exist // in the object file which encompasses the entire section, we need traverse // the filtered symbols and note which symbol is contained within so that @@ -651,11 +609,11 @@ pub fn getSourceSymtab(self: Object) []const macho.nlist_64 { ); } -fn parseDataInCode(self: *Object) void { - const index = self.data_in_code_cmd_index orelse return; +pub fn parseDataInCode(self: Object) ?[]const macho.data_in_code_entry { + const index = self.data_in_code_cmd_index orelse return null; const data_in_code = self.load_commands.items[index].linkedit_data; const raw_dice = self.contents[data_in_code.dataoff..][0..data_in_code.datasize]; - self.data_in_code_entries = mem.bytesAsSlice( + return mem.bytesAsSlice( macho.data_in_code_entry, @alignCast(@alignOf(macho.data_in_code_entry), raw_dice), ); From 39df241df4ac177503d899dd8b53a632e4e29334 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Tue, 19 Jul 2022 15:55:49 +0200 Subject: [PATCH 21/27] macho: do not GC local symbols unless reference dead symbols If a local references another local, we keep it. If it doesn't reference anything, we keep it. Otherwise, we dead strip it. --- src/link/MachO.zig | 425 +++++++++++++++++++------------- src/link/MachO/Atom.zig | 42 ++-- src/link/MachO/DebugSymbols.zig | 18 +- src/link/MachO/Object.zig | 14 +- 4 files changed, 298 insertions(+), 201 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 165ff07521..898ad6732b 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -171,17 +171,19 @@ stub_helper_preamble_atom: ?*Atom = null, strtab: StringTable(.strtab) = .{}, +// TODO I think synthetic tables are a perfect match for some generic refactoring, +// and probably reusable between linker backends too. tlv_ptr_entries: std.ArrayListUnmanaged(Entry) = .{}, tlv_ptr_entries_free_list: std.ArrayListUnmanaged(u32) = .{}, -tlv_ptr_entries_table: std.AutoArrayHashMapUnmanaged(SymbolWithLoc, u32) = .{}, +tlv_ptr_entries_table: std.AutoHashMapUnmanaged(SymbolWithLoc, u32) = .{}, got_entries: std.ArrayListUnmanaged(Entry) = .{}, got_entries_free_list: std.ArrayListUnmanaged(u32) = .{}, -got_entries_table: std.AutoArrayHashMapUnmanaged(SymbolWithLoc, u32) = .{}, +got_entries_table: std.AutoHashMapUnmanaged(SymbolWithLoc, u32) = .{}, stubs: std.ArrayListUnmanaged(Entry) = .{}, stubs_free_list: std.ArrayListUnmanaged(u32) = .{}, -stubs_table: std.AutoArrayHashMapUnmanaged(SymbolWithLoc, u32) = .{}, +stubs_table: std.AutoHashMapUnmanaged(SymbolWithLoc, u32) = .{}, error_flags: File.ErrorFlags = File.ErrorFlags{}, @@ -251,7 +253,24 @@ decls: std.AutoArrayHashMapUnmanaged(Module.Decl.Index, ?MatchingSection) = .{}, const Entry = struct { target: SymbolWithLoc, - atom: *Atom, + // Index into the synthetic symbol table (i.e., file == null). + sym_index: u32, + + pub fn getSymbol(entry: Entry, macho_file: *MachO) macho.nlist_64 { + return macho_file.getSymbol(.{ .sym_index = entry.sym_index, .file = null }); + } + + pub fn getSymbolPtr(entry: Entry, macho_file: *MachO) *macho.nlist_64 { + return macho_file.getSymbolPtr(.{ .sym_index = entry.sym_index, .file = null }); + } + + pub fn getAtom(entry: Entry, macho_file: *MachO) *Atom { + return macho_file.getAtomForSymbol(.{ .sym_index = entry.sym_index, .file = null }).?; + } + + pub fn getName(entry: Entry, macho_file: *MachO) []const u8 { + return macho_file.getSymbolName(.{ .sym_index = entry.sym_index, .file = null }); + } }; const UnnamedConstTable = std.AutoHashMapUnmanaged(Module.Decl.Index, std.ArrayListUnmanaged(*Atom)); @@ -1652,6 +1671,15 @@ fn parseDependentLibs(self: *MachO, syslibroot: ?[]const u8, dependent_libs: any pub const MatchingSection = struct { seg: u16, sect: u16, + + pub fn eql(this: MatchingSection, other: struct { + seg: ?u16, + sect: ?u16, + }) bool { + const seg = other.seg orelse return false; + const sect = other.sect orelse return false; + return this.seg == seg and this.sect == sect; + } }; pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSection { @@ -3153,8 +3181,7 @@ fn resolveSymbolsInDylibs(self: *MachO) !void { const stub_helper_atom = try self.createStubHelperAtom(); const laptr_atom = try self.createLazyPointerAtom(stub_helper_atom.sym_index, global); const stub_atom = try self.createStubAtom(laptr_atom.sym_index); - - self.stubs.items[stub_index].atom = stub_atom; + self.stubs.items[stub_index].sym_index = stub_atom.sym_index; } continue :loop; @@ -3251,7 +3278,7 @@ fn resolveDyldStubBinder(self: *MachO) !void { // Add dyld_stub_binder as the final GOT entry. const got_index = try self.allocateGotEntry(global); const got_atom = try self.createGotAtom(global); - self.got_entries.items[got_index].atom = got_atom; + self.got_entries.items[got_index].sym_index = got_atom.sym_index; } fn addLoadDylibLC(self: *MachO, id: u16) !void { @@ -3288,11 +3315,7 @@ fn setEntryPoint(self: *MachO) !void { if (self.base.options.output_mode != .Exe) return; const seg = self.load_commands.items[self.text_segment_cmd_index.?].segment; - const global = self.getEntryPoint() orelse { - const name = self.base.options.entry orelse "_main"; - log.err("entrypoint '{s}' not found", .{name}); - return error.MissingMainEntrypoint; - }; + const global = try self.getEntryPoint(); const sym = self.getSymbol(global); const ec = &self.load_commands.items[self.main_cmd_index.?].main; ec.entryoff = @intCast(u32, sym.n_value - seg.inner.vmaddr); @@ -3508,7 +3531,8 @@ fn allocateSymbol(self: *MachO) !u32 { } pub fn allocateGotEntry(self: *MachO, target: SymbolWithLoc) !u32 { - try self.got_entries.ensureUnusedCapacity(self.base.allocator, 1); + const gpa = self.base.allocator; + try self.got_entries.ensureUnusedCapacity(gpa, 1); const index = blk: { if (self.got_entries_free_list.popOrNull()) |index| { @@ -3522,8 +3546,8 @@ pub fn allocateGotEntry(self: *MachO, target: SymbolWithLoc) !u32 { } }; - self.got_entries.items[index] = .{ .target = target, .atom = undefined }; - try self.got_entries_table.putNoClobber(self.base.allocator, target, index); + self.got_entries.items[index] = .{ .target = target, .sym_index = 0 }; + try self.got_entries_table.putNoClobber(gpa, target, index); return index; } @@ -3543,7 +3567,7 @@ pub fn allocateStubEntry(self: *MachO, target: SymbolWithLoc) !u32 { } }; - self.stubs.items[index] = .{ .target = target, .atom = undefined }; + self.stubs.items[index] = .{ .target = target, .sym_index = 0 }; try self.stubs_table.putNoClobber(self.base.allocator, target, index); return index; @@ -3564,7 +3588,7 @@ pub fn allocateTlvPtrEntry(self: *MachO, target: SymbolWithLoc) !u32 { } }; - self.tlv_ptr_entries.items[index] = .{ .target = target, .atom = undefined }; + self.tlv_ptr_entries.items[index] = .{ .target = target, .sym_index = 0 }; try self.tlv_ptr_entries_table.putNoClobber(self.base.allocator, target, index); return index; @@ -4029,7 +4053,7 @@ fn placeDecl(self: *MachO, decl_index: Module.Decl.Index, code_len: usize) !*mac const got_target = SymbolWithLoc{ .sym_index = decl.link.macho.sym_index, .file = null }; const got_index = try self.allocateGotEntry(got_target); const got_atom = try self.createGotAtom(got_target); - self.got_entries.items[got_index].atom = got_atom; + self.got_entries.items[got_index].sym_index = got_atom.sym_index; } return symbol; @@ -4219,9 +4243,9 @@ pub fn freeDecl(self: *MachO, decl_index: Module.Decl.Index) void { self.got_entries_free_list.append(self.base.allocator, @intCast(u32, got_index)) catch {}; self.got_entries.items[got_index] = .{ .target = .{ .sym_index = 0, .file = null }, - .atom = undefined, + .sym_index = 0, }; - _ = self.got_entries_table.swapRemove(got_target); + _ = self.got_entries_table.remove(got_target); if (self.d_sym) |*d_sym| { d_sym.swapRemoveRelocs(decl.link.macho.sym_index); @@ -5493,46 +5517,26 @@ fn gcAtoms(self: *MachO, gc_roots: *std.AutoHashMap(*Atom, void)) !void { if (self.base.options.output_mode == .Exe) { // Add entrypoint as GC root - if (self.getEntryPoint()) |global| { - if (self.getAtomForSymbol(global)) |gc_root| { - _ = try gc_roots.getOrPut(gc_root); - } else { - log.debug("skipping {s}", .{self.getSymbolName(global)}); - } - } + const global = try self.getEntryPoint(); + const atom = self.getAtomForSymbol(global).?; // panic here means fatal error + _ = try gc_roots.getOrPut(atom); } else { assert(self.base.options.output_mode == .Lib); // Add exports as GC roots for (self.globals.values()) |global| { const sym = self.getSymbol(global); if (!sym.sect()) continue; - const gc_root = self.getAtomForSymbol(global) orelse { + const atom = self.getAtomForSymbol(global) orelse { log.debug("skipping {s}", .{self.getSymbolName(global)}); continue; }; - _ = try gc_roots.getOrPut(gc_root); + _ = try gc_roots.getOrPut(atom); } } - - // Add any atom targeting an import as GC root - var atoms_it = self.atoms.iterator(); - while (atoms_it.next()) |entry| { - var atom = entry.value_ptr.*; - - while (true) { - for (atom.relocs.items) |rel| { - if ((try rel.getTargetAtom(self)) == null) { - const target_sym = self.getSymbol(rel.target); - if (target_sym.undf()) { - _ = try gc_roots.getOrPut(atom); - break; - } - } - } - - if (atom.prev) |prev| { - atom = prev; - } else break; + // TODO just a temp until we learn how to parse unwind records + if (self.globals.get("___gxx_personality_v0")) |global| { + if (self.getAtomForSymbol(global)) |atom| { + _ = try gc_roots.getOrPut(atom); } } @@ -5540,80 +5544,80 @@ fn gcAtoms(self: *MachO, gc_roots: *std.AutoHashMap(*Atom, void)) !void { defer stack.deinit(); try stack.ensureUnusedCapacity(gc_roots.count()); - var retained = std.AutoHashMap(*Atom, void).init(gpa); - defer retained.deinit(); - try retained.ensureUnusedCapacity(gc_roots.count()); + var alive = std.AutoHashMap(*Atom, void).init(gpa); + defer alive.deinit(); + try alive.ensureUnusedCapacity(gc_roots.count()); log.debug("GC roots:", .{}); var gc_roots_it = gc_roots.keyIterator(); while (gc_roots_it.next()) |gc_root| { self.logAtom(gc_root.*); - stack.appendAssumeCapacity(gc_root.*); - retained.putAssumeCapacityNoClobber(gc_root.*, {}); + alive.putAssumeCapacity(gc_root.*, {}); } - log.debug("walking tree...", .{}); while (stack.popOrNull()) |source_atom| { for (source_atom.relocs.items) |rel| { - if (try rel.getTargetAtom(self)) |target_atom| { - const gop = try retained.getOrPut(target_atom); + if (rel.getTargetAtom(self)) |target_atom| { + const gop = try alive.getOrPut(target_atom); if (!gop.found_existing) { - log.debug(" RETAINED ATOM(%{d}) -> ATOM(%{d})", .{ - source_atom.sym_index, + log.debug(" retained ATOM(%{d}, '{s}') in object({d})", .{ target_atom.sym_index, + target_atom.getName(self), + target_atom.file, + }); + log.debug(" referenced by ATOM(%{d}, '{s}') in object({d})", .{ + source_atom.sym_index, + source_atom.getName(self), + source_atom.file, }); try stack.append(target_atom); } } } } + // TODO live support // Any section that ends up here will be updated, that is, // its size and alignment recalculated. var gc_sections = std.AutoHashMap(MatchingSection, void).init(gpa); defer gc_sections.deinit(); - atoms_it = self.atoms.iterator(); - while (atoms_it.next()) |entry| { - const match = entry.key_ptr.*; + var loop: bool = true; + while (loop) { + loop = false; - if (self.text_segment_cmd_index) |seg| { - if (seg == match.seg) { - if (self.eh_frame_section_index) |sect| { - if (sect == match.sect) continue; - } - } - } - - if (self.data_segment_cmd_index) |seg| { - if (seg == match.seg) { - if (self.rustc_section_index) |sect| { - if (sect == match.sect) continue; - } - } - } - - const sect = self.getSectionPtr(match); - var atom = entry.value_ptr.*; - - log.debug("GCing atoms in {s},{s}", .{ sect.segName(), sect.sectName() }); - - while (true) { - const orig_prev = atom.prev; - - if (!retained.contains(atom)) { - // Dead atom; remove. - log.debug(" DEAD ATOM(%{d})", .{atom.sym_index}); + for (self.objects.items) |object| { + for (object.getSourceSymtab()) |_, source_index| { + const atom = object.getAtomForSymbol(@intCast(u32, source_index)) orelse continue; + if (alive.contains(atom)) continue; + const global = atom.getSymbolWithLoc(); const sym = atom.getSymbolPtr(self); - sym.n_desc = N_DESC_GCED; - // TODO add full bookkeeping here - const global = SymbolWithLoc{ .sym_index = atom.sym_index, .file = atom.file }; - _ = self.got_entries_table.swapRemove(global); - _ = self.stubs_table.swapRemove(global); - _ = self.tlv_ptr_entries_table.swapRemove(global); + if (sym.n_desc == N_DESC_GCED) continue; + if (!sym.ext()) { + for (atom.relocs.items) |rel| { + if (rel.getTargetAtom(self)) |target_atom| { + const target_sym = target_atom.getSymbol(self); + if (target_sym.n_desc == N_DESC_GCED) break; + } + } else continue; + } + + loop = true; + const match = self.getMatchingSectionFromOrdinal(sym.n_sect); + + // TODO don't dedup eh_frame info yet until we actually implement parsing unwind records + if (match.eql(.{ + .seg = self.text_segment_cmd_index, + .sect = self.eh_frame_section_index, + })) continue; + + self.logAtom(atom); + sym.n_desc = N_DESC_GCED; + self.removeAtomFromSection(atom, match); + _ = try gc_sections.put(match, {}); for (atom.contained.items) |sym_off| { const inner = self.getSymbolPtr(.{ @@ -5622,34 +5626,64 @@ fn gcAtoms(self: *MachO, gc_roots: *std.AutoHashMap(*Atom, void)) !void { }); inner.n_desc = N_DESC_GCED; } - // If we want to enable GC for incremental codepath, we need to take into - // account any padding that might have been left here. - sect.size -= atom.size; - _ = try gc_sections.put(match, {}); - - if (atom.prev) |prev| { - prev.next = atom.next; + if (self.got_entries_table.contains(global)) { + const got_atom = self.getGotAtomForSymbol(global).?; + const got_sym = got_atom.getSymbolPtr(self); + got_sym.n_desc = N_DESC_GCED; } - if (atom.next) |next| { - next.prev = atom.prev; - } else { - if (atom.prev) |prev| { - entry.value_ptr.* = prev; - } else { - // The section will be GCed in the next step. - entry.value_ptr.* = undefined; - sect.size = 0; - } + + if (self.stubs_table.contains(global)) { + const stubs_atom = self.getStubsAtomForSymbol(global).?; + const stubs_sym = stubs_atom.getSymbolPtr(self); + stubs_sym.n_desc = N_DESC_GCED; + } + + if (self.tlv_ptr_entries_table.contains(global)) { + const tlv_ptr_atom = self.getTlvPtrAtomForSymbol(global).?; + const tlv_ptr_sym = tlv_ptr_atom.getSymbolPtr(self); + tlv_ptr_sym.n_desc = N_DESC_GCED; } } - - if (orig_prev) |prev| { - atom = prev; - } else break; } } + for (self.got_entries.items) |entry| { + const sym = entry.getSymbol(self); + if (sym.n_desc != N_DESC_GCED) continue; + + // TODO tombstone + const atom = entry.getAtom(self); + const match = self.getMatchingSectionFromOrdinal(sym.n_sect); + self.removeAtomFromSection(atom, match); + _ = try gc_sections.put(match, {}); + _ = self.got_entries_table.remove(entry.target); + } + + for (self.stubs.items) |entry| { + const sym = entry.getSymbol(self); + if (sym.n_desc != N_DESC_GCED) continue; + + // TODO tombstone + const atom = entry.getAtom(self); + const match = self.getMatchingSectionFromOrdinal(sym.n_sect); + self.removeAtomFromSection(atom, match); + _ = try gc_sections.put(match, {}); + _ = self.stubs_table.remove(entry.target); + } + + for (self.tlv_ptr_entries.items) |entry| { + const sym = entry.getSymbol(self); + if (sym.n_desc != N_DESC_GCED) continue; + + // TODO tombstone + const atom = entry.getAtom(self); + const match = self.getMatchingSectionFromOrdinal(sym.n_sect); + self.removeAtomFromSection(atom, match); + _ = try gc_sections.put(match, {}); + _ = self.tlv_ptr_entries_table.remove(entry.target); + } + var gc_sections_it = gc_sections.iterator(); while (gc_sections_it.next()) |entry| { const match = entry.key_ptr.*; @@ -5679,6 +5713,30 @@ fn gcAtoms(self: *MachO, gc_roots: *std.AutoHashMap(*Atom, void)) !void { } } +fn removeAtomFromSection(self: *MachO, atom: *Atom, match: MatchingSection) void { + const sect = self.getSectionPtr(match); + + // If we want to enable GC for incremental codepath, we need to take into + // account any padding that might have been left here. + sect.size -= atom.size; + + if (atom.prev) |prev| { + prev.next = atom.next; + } + if (atom.next) |next| { + next.prev = atom.prev; + } else { + const last = self.atoms.getPtr(match).?; + if (atom.prev) |prev| { + last.* = prev; + } else { + // The section will be GCed in the next step. + last.* = undefined; + sect.size = 0; + } + } +} + fn updateSectionOrdinals(self: *MachO) !void { if (!self.sections_order_dirty) return; @@ -5849,7 +5907,7 @@ fn writeDyldInfoData(self: *MachO) !void { if (self.base.options.output_mode == .Exe) { for (&[_]SymbolWithLoc{ - self.getEntryPoint().?, // We would already errored out if no entrypoint was found. + try self.getEntryPoint(), self.globals.get("__mh_execute_header").?, }) |global| { const sym = self.getSymbol(global); @@ -6337,10 +6395,13 @@ fn writeSymtab(self: *MachO) !void { .sect = stubs_section_index, }); stubs.reserved1 = 0; - for (self.stubs_table.keys()) |target| { - const sym = self.getSymbol(target); - assert(sym.undf()); - try writer.writeIntLittle(u32, dysymtab.iundefsym + imports_table.get(target).?); + for (self.stubs.items) |entry| { + if (entry.sym_index == 0) continue; + const atom_sym = entry.getSymbol(self); + if (atom_sym.n_desc == N_DESC_GCED) continue; + const target_sym = self.getSymbol(entry.target); + assert(target_sym.undf()); + try writer.writeIntLittle(u32, dysymtab.iundefsym + imports_table.get(entry.target).?); } } @@ -6351,10 +6412,13 @@ fn writeSymtab(self: *MachO) !void { .sect = got_section_index, }); got.reserved1 = nstubs; - for (self.got_entries_table.keys()) |target| { - const sym = self.getSymbol(target); - if (sym.undf()) { - try writer.writeIntLittle(u32, dysymtab.iundefsym + imports_table.get(target).?); + for (self.got_entries.items) |entry| { + if (entry.sym_index == 0) continue; + const atom_sym = entry.getSymbol(self); + if (atom_sym.n_desc == N_DESC_GCED) continue; + const target_sym = self.getSymbol(entry.target); + if (target_sym.undf()) { + try writer.writeIntLittle(u32, dysymtab.iundefsym + imports_table.get(entry.target).?); } else { try writer.writeIntLittle(u32, macho.INDIRECT_SYMBOL_LOCAL); } @@ -6368,10 +6432,13 @@ fn writeSymtab(self: *MachO) !void { .sect = la_symbol_ptr_section_index, }); la_symbol_ptr.reserved1 = nstubs + ngot_entries; - for (self.stubs_table.keys()) |target| { - const sym = self.getSymbol(target); - assert(sym.undf()); - try writer.writeIntLittle(u32, dysymtab.iundefsym + imports_table.get(target).?); + for (self.stubs.items) |entry| { + if (entry.sym_index == 0) continue; + const atom_sym = entry.getSymbol(self); + if (atom_sym.n_desc == N_DESC_GCED) continue; + const target_sym = self.getSymbol(entry.target); + assert(target_sym.undf()); + try writer.writeIntLittle(u32, dysymtab.iundefsym + imports_table.get(entry.target).?); } } @@ -6623,7 +6690,7 @@ pub fn getSymbolName(self: *MachO, sym_with_loc: SymbolWithLoc) []const u8 { pub fn getAtomForSymbol(self: *MachO, sym_with_loc: SymbolWithLoc) ?*Atom { if (sym_with_loc.file) |file| { const object = self.objects.items[file]; - return object.atom_by_index_table.get(sym_with_loc.sym_index); + return object.getAtomForSymbol(sym_with_loc.sym_index); } else { return self.atom_by_index_table.get(sym_with_loc.sym_index); } @@ -6633,28 +6700,32 @@ pub fn getAtomForSymbol(self: *MachO, sym_with_loc: SymbolWithLoc) ?*Atom { /// Returns null otherwise. pub fn getGotAtomForSymbol(self: *MachO, sym_with_loc: SymbolWithLoc) ?*Atom { const got_index = self.got_entries_table.get(sym_with_loc) orelse return null; - return self.got_entries.items[got_index].atom; + return self.got_entries.items[got_index].getAtom(self); } /// Returns stubs atom that references `sym_with_loc` if one exists. /// Returns null otherwise. pub fn getStubsAtomForSymbol(self: *MachO, sym_with_loc: SymbolWithLoc) ?*Atom { const stubs_index = self.stubs_table.get(sym_with_loc) orelse return null; - return self.stubs.items[stubs_index].atom; + return self.stubs.items[stubs_index].getAtom(self); } /// Returns TLV pointer atom that references `sym_with_loc` if one exists. /// Returns null otherwise. pub fn getTlvPtrAtomForSymbol(self: *MachO, sym_with_loc: SymbolWithLoc) ?*Atom { const tlv_ptr_index = self.tlv_ptr_entries_table.get(sym_with_loc) orelse return null; - return self.tlv_ptr_entries.items[tlv_ptr_index].atom; + return self.tlv_ptr_entries.items[tlv_ptr_index].getAtom(self); } /// Returns symbol location corresponding to the set entrypoint. /// Asserts output mode is executable. -pub fn getEntryPoint(self: MachO) ?SymbolWithLoc { +pub fn getEntryPoint(self: MachO) error{MissingMainEntrypoint}!SymbolWithLoc { const entry_name = self.base.options.entry orelse "_main"; - return self.globals.get(entry_name); + const global = self.globals.get(entry_name) orelse { + log.err("entrypoint '{s}' not found", .{entry_name}); + return error.MissingMainEntrypoint; + }; + return global; } pub fn findFirst(comptime T: type, haystack: []const T, start: usize, predicate: anytype) usize { @@ -6986,7 +7057,7 @@ fn snapshotState(self: *MachO) !void { break :blk source_sym.n_value + rel.offset; }; const target_addr = blk: { - const target_atom = (try rel.getTargetAtom(self)) orelse { + const target_atom = rel.getTargetAtom(self) orelse { // If there is no atom for target, we still need to check for special, atom-less // symbols such as `___dso_handle`. const target_name = self.getSymbolName(rel.target); @@ -7119,8 +7190,9 @@ fn snapshotState(self: *MachO) !void { try writer.writeByte(']'); } -pub fn logSymAttributes(sym: macho.nlist_64, buf: *[4]u8) []const u8 { - mem.set(u8, buf, '_'); +fn logSymAttributes(sym: macho.nlist_64, buf: *[9]u8) []const u8 { + mem.set(u8, buf[0..4], '_'); + mem.set(u8, buf[4..], ' '); if (sym.sect()) { buf[0] = 's'; } @@ -7137,11 +7209,14 @@ pub fn logSymAttributes(sym: macho.nlist_64, buf: *[4]u8) []const u8 { if (sym.undf()) { buf[3] = 'u'; } + if (sym.n_desc == N_DESC_GCED) { + mem.copy(u8, buf[5..], "DEAD"); + } return buf[0..]; } fn logSymtab(self: *MachO) void { - var buf: [4]u8 = undefined; + var buf: [9]u8 = undefined; log.debug("symtab:", .{}); for (self.objects.items) |object, id| { @@ -7186,42 +7261,50 @@ fn logSymtab(self: *MachO) void { } log.debug("GOT entries:", .{}); - for (self.got_entries_table.values()) |value| { - const target = self.got_entries.items[value].target; - const target_sym = self.getSymbol(target); - const atom = self.got_entries.items[value].atom; - const atom_sym = atom.getSymbol(self); - + for (self.got_entries.items) |entry, i| { + const atom_sym = entry.getSymbol(self); + if (atom_sym.n_desc == N_DESC_GCED) continue; + const target_sym = self.getSymbol(entry.target); if (target_sym.undf()) { - log.debug(" {d}@{x} => import('{s}')", .{ value, atom_sym.n_value, self.getSymbolName(target) }); - } else { - log.debug(" {d}@{x} => local(%{d}) in object({d})", .{ - value, + log.debug(" {d}@{x} => import('{s}')", .{ + i, atom_sym.n_value, - target.sym_index, - target.file, + self.getSymbolName(entry.target), + }); + } else { + log.debug(" {d}@{x} => local(%{d}) in object({d}) {s}", .{ + i, + atom_sym.n_value, + entry.target.sym_index, + entry.target.file, + logSymAttributes(target_sym, &buf), }); } } log.debug("__thread_ptrs entries:", .{}); - for (self.tlv_ptr_entries_table.values()) |value| { - const target = self.tlv_ptr_entries.items[value].target; - const target_sym = self.getSymbol(target); - const atom = self.tlv_ptr_entries.items[value].atom; - const atom_sym = atom.getSymbol(self); + for (self.tlv_ptr_entries.items) |entry, i| { + const atom_sym = entry.getSymbol(self); + if (atom_sym.n_desc == N_DESC_GCED) continue; + const target_sym = self.getSymbol(entry.target); assert(target_sym.undf()); - log.debug(" {d}@{x} => import('{s}')", .{ value, atom_sym.n_value, self.getSymbolName(target) }); + log.debug(" {d}@{x} => import('{s}')", .{ + i, + atom_sym.n_value, + self.getSymbolName(entry.target), + }); } log.debug("stubs entries:", .{}); - for (self.stubs_table.values()) |value| { - const target = self.stubs.items[value].target; - const target_sym = self.getSymbol(target); - const atom = self.stubs.items[value].atom; - const atom_sym = atom.getSymbol(self); + for (self.stubs.items) |entry, i| { + const target_sym = self.getSymbol(entry.target); + const atom_sym = entry.getSymbol(self); assert(target_sym.undf()); - log.debug(" {d}@{x} => import('{s}')", .{ value, atom_sym.n_value, self.getSymbolName(target) }); + log.debug(" {d}@{x} => import('{s}')", .{ + i, + atom_sym.n_value, + self.getSymbolName(entry.target), + }); } } @@ -7248,7 +7331,6 @@ fn logAtoms(self: *MachO) void { while (true) { self.logAtom(atom); - if (atom.next) |next| { atom = next; } else break; @@ -7256,14 +7338,17 @@ fn logAtoms(self: *MachO) void { } } -pub fn logAtom(self: *MachO, atom: *const Atom) void { +fn logAtom(self: *MachO, atom: *const Atom) void { const sym = atom.getSymbol(self); const sym_name = atom.getName(self); - log.debug(" ATOM(%{d}, '{s}') @ {x} in object({d})", .{ + log.debug(" ATOM(%{d}, '{s}') @ {x} (sizeof({x}), alignof({x})) in object({d}) in sect({d})", .{ atom.sym_index, sym_name, sym.n_value, + atom.size, + atom.alignment, atom.file, + sym.n_sect, }); for (atom.contained.items) |sym_off| { @@ -7271,13 +7356,15 @@ pub fn logAtom(self: *MachO, atom: *const Atom) void { .sym_index = sym_off.sym_index, .file = atom.file, }); - const inner_sym_name = self.getSymbolName(.{ .sym_index = sym_off.sym_index, .file = atom.file }); - log.debug(" (%{d}, '{s}') @ {x} ({x}) in object({d})", .{ + const inner_sym_name = self.getSymbolName(.{ + .sym_index = sym_off.sym_index, + .file = atom.file, + }); + log.debug(" (%{d}, '{s}') @ {x} ({x})", .{ sym_off.sym_index, inner_sym_name, inner_sym.n_value, sym_off.offset, - atom.file, }); } } diff --git a/src/link/MachO/Atom.zig b/src/link/MachO/Atom.zig index 2db680889a..acaeab7a88 100644 --- a/src/link/MachO/Atom.zig +++ b/src/link/MachO/Atom.zig @@ -94,7 +94,7 @@ pub const Relocation = struct { @"type": u4, - pub fn getTargetAtom(self: Relocation, macho_file: *MachO) !?*Atom { + pub fn getTargetAtom(self: Relocation, macho_file: *MachO) ?*Atom { const is_via_got = got: { switch (macho_file.base.options.target.cpu.arch) { .aarch64 => break :got switch (@intToEnum(macho.reloc_type_arm64, self.@"type")) { @@ -112,21 +112,9 @@ pub const Relocation = struct { } }; - const target_sym = macho_file.getSymbol(self.target); if (is_via_got) { - const got_atom = macho_file.getGotAtomForSymbol(self.target) orelse { - log.err("expected GOT entry for symbol", .{}); - if (target_sym.undf()) { - log.err(" import('{s}')", .{macho_file.getSymbolName(self.target)}); - } else { - log.err(" local(%{d}) in object({d})", .{ self.target.sym_index, self.target.file }); - } - log.err(" this is an internal linker error", .{}); - return error.FailedToResolveRelocationTarget; - }; - return got_atom; + return macho_file.getGotAtomForSymbol(self.target).?; // panic means fatal error } - if (macho_file.getStubsAtomForSymbol(self.target)) |stubs_atom| return stubs_atom; if (macho_file.getTlvPtrAtomForSymbol(self.target)) |tlv_ptr_atom| return tlv_ptr_atom; return macho_file.getAtomForSymbol(self.target); @@ -174,6 +162,10 @@ pub fn getSymbolPtr(self: Atom, macho_file: *MachO) *macho.nlist_64 { }); } +pub fn getSymbolWithLoc(self: Atom) SymbolWithLoc { + return .{ .sym_index = self.sym_index, .file = self.file }; +} + /// Returns true if the symbol pointed at with `sym_loc` is contained within this atom. /// WARNING this function assumes all atoms have been allocated in the virtual memory. /// Calling it without allocating with `MachO.allocateSymbols` (or equivalent) will @@ -515,7 +507,7 @@ fn addTlvPtrEntry(target: MachO.SymbolWithLoc, context: RelocContext) !void { const index = try context.macho_file.allocateTlvPtrEntry(target); const atom = try context.macho_file.createTlvPtrAtom(target); - context.macho_file.tlv_ptr_entries.items[index].atom = atom; + context.macho_file.tlv_ptr_entries.items[index].sym_index = atom.sym_index; } fn addGotEntry(target: MachO.SymbolWithLoc, context: RelocContext) !void { @@ -523,7 +515,7 @@ fn addGotEntry(target: MachO.SymbolWithLoc, context: RelocContext) !void { const index = try context.macho_file.allocateGotEntry(target); const atom = try context.macho_file.createGotAtom(target); - context.macho_file.got_entries.items[index].atom = atom; + context.macho_file.got_entries.items[index].sym_index = atom.sym_index; } fn addStub(target: MachO.SymbolWithLoc, context: RelocContext) !void { @@ -536,7 +528,7 @@ fn addStub(target: MachO.SymbolWithLoc, context: RelocContext) !void { const laptr_atom = try context.macho_file.createLazyPointerAtom(stub_helper_atom.sym_index, target); const stub_atom = try context.macho_file.createStubAtom(laptr_atom.sym_index); - context.macho_file.stubs.items[stub_index].atom = stub_atom; + context.macho_file.stubs.items[stub_index].sym_index = stub_atom.sym_index; } pub fn resolveRelocs(self: *Atom, macho_file: *MachO) !void { @@ -578,7 +570,7 @@ pub fn resolveRelocs(self: *Atom, macho_file: *MachO) !void { break :is_tlv sect.type_() == macho.S_THREAD_LOCAL_VARIABLES; }; const target_addr = blk: { - const target_atom = (try rel.getTargetAtom(macho_file)) orelse { + const target_atom = rel.getTargetAtom(macho_file) orelse { // If there is no atom for target, we still need to check for special, atom-less // symbols such as `___dso_handle`. const target_name = macho_file.getSymbolName(rel.target); @@ -597,6 +589,7 @@ pub fn resolveRelocs(self: *Atom, macho_file: *MachO) !void { macho_file.getSymbol(rel.target) else target_atom.getSymbol(macho_file); + assert(target_sym.n_desc != MachO.N_DESC_GCED); const base_address: u64 = if (is_tlv) base_address: { // For TLV relocations, the value specified as a relocation is the displacement from the // TLV initializer (either value in __thread_data or zero-init in __thread_bss) to the first @@ -624,12 +617,12 @@ pub fn resolveRelocs(self: *Atom, macho_file: *MachO) !void { }; log.debug(" | source_addr = 0x{x}", .{source_addr}); - log.debug(" | target_addr = 0x{x}", .{target_addr}); switch (arch) { .aarch64 => { switch (@intToEnum(macho.reloc_type_arm64, rel.@"type")) { .ARM64_RELOC_BRANCH26 => { + log.debug(" | target_addr = 0x{x}", .{target_addr}); const displacement = math.cast( i28, @intCast(i64, target_addr) - @intCast(i64, source_addr), @@ -658,6 +651,7 @@ pub fn resolveRelocs(self: *Atom, macho_file: *MachO) !void { .ARM64_RELOC_TLVP_LOAD_PAGE21, => { const actual_target_addr = @intCast(i64, target_addr) + rel.addend; + log.debug(" | target_addr = 0x{x}", .{actual_target_addr}); const source_page = @intCast(i32, source_addr >> 12); const target_page = @intCast(i32, actual_target_addr >> 12); const pages = @bitCast(u21, @intCast(i21, target_page - source_page)); @@ -675,6 +669,7 @@ pub fn resolveRelocs(self: *Atom, macho_file: *MachO) !void { .ARM64_RELOC_PAGEOFF12 => { const code = self.code.items[rel.offset..][0..4]; const actual_target_addr = @intCast(i64, target_addr) + rel.addend; + log.debug(" | target_addr = 0x{x}", .{actual_target_addr}); const narrowed = @truncate(u12, @intCast(u64, actual_target_addr)); if (isArithmeticOp(self.code.items[rel.offset..][0..4])) { var inst = aarch64.Instruction{ @@ -712,6 +707,7 @@ pub fn resolveRelocs(self: *Atom, macho_file: *MachO) !void { .ARM64_RELOC_GOT_LOAD_PAGEOFF12 => { const code = self.code.items[rel.offset..][0..4]; const actual_target_addr = @intCast(i64, target_addr) + rel.addend; + log.debug(" | target_addr = 0x{x}", .{actual_target_addr}); const narrowed = @truncate(u12, @intCast(u64, actual_target_addr)); var inst: aarch64.Instruction = .{ .load_store_register = mem.bytesToValue(meta.TagPayload( @@ -726,6 +722,7 @@ pub fn resolveRelocs(self: *Atom, macho_file: *MachO) !void { .ARM64_RELOC_TLVP_LOAD_PAGEOFF12 => { const code = self.code.items[rel.offset..][0..4]; const actual_target_addr = @intCast(i64, target_addr) + rel.addend; + log.debug(" | target_addr = 0x{x}", .{actual_target_addr}); const RegInfo = struct { rd: u5, @@ -783,6 +780,7 @@ pub fn resolveRelocs(self: *Atom, macho_file: *MachO) !void { mem.writeIntLittle(u32, code, inst.toU32()); }, .ARM64_RELOC_POINTER_TO_GOT => { + log.debug(" | target_addr = 0x{x}", .{target_addr}); const result = math.cast(i32, @intCast(i64, target_addr) - @intCast(i64, source_addr)) orelse return error.Overflow; mem.writeIntLittle(u32, self.code.items[rel.offset..][0..4], @bitCast(u32, result)); }, @@ -795,6 +793,7 @@ pub fn resolveRelocs(self: *Atom, macho_file: *MachO) !void { break :blk @intCast(i64, target_addr) + rel.addend; } }; + log.debug(" | target_addr = 0x{x}", .{result}); if (rel.length == 3) { mem.writeIntLittle(u64, self.code.items[rel.offset..][0..8], @bitCast(u64, result)); @@ -813,6 +812,7 @@ pub fn resolveRelocs(self: *Atom, macho_file: *MachO) !void { .x86_64 => { switch (@intToEnum(macho.reloc_type_x86_64, rel.@"type")) { .X86_64_RELOC_BRANCH => { + log.debug(" | target_addr = 0x{x}", .{target_addr}); const displacement = math.cast( i32, @intCast(i64, target_addr) - @intCast(i64, source_addr) - 4 + rel.addend, @@ -820,6 +820,7 @@ pub fn resolveRelocs(self: *Atom, macho_file: *MachO) !void { mem.writeIntLittle(u32, self.code.items[rel.offset..][0..4], @bitCast(u32, displacement)); }, .X86_64_RELOC_GOT, .X86_64_RELOC_GOT_LOAD => { + log.debug(" | target_addr = 0x{x}", .{target_addr}); const displacement = math.cast( i32, @intCast(i64, target_addr) - @intCast(i64, source_addr) - 4 + rel.addend, @@ -827,6 +828,7 @@ pub fn resolveRelocs(self: *Atom, macho_file: *MachO) !void { mem.writeIntLittle(u32, self.code.items[rel.offset..][0..4], @bitCast(u32, displacement)); }, .X86_64_RELOC_TLV => { + log.debug(" | target_addr = 0x{x}", .{target_addr}); if (!macho_file.tlv_ptr_entries_table.contains(rel.target)) { // We need to rewrite the opcode from movq to leaq. self.code.items[rel.offset - 2] = 0x8d; @@ -850,6 +852,7 @@ pub fn resolveRelocs(self: *Atom, macho_file: *MachO) !void { else => unreachable, }; const actual_target_addr = @intCast(i64, target_addr) + rel.addend; + log.debug(" | target_addr = 0x{x}", .{actual_target_addr}); const displacement = math.cast( i32, actual_target_addr - @intCast(i64, source_addr + correction + 4), @@ -865,6 +868,7 @@ pub fn resolveRelocs(self: *Atom, macho_file: *MachO) !void { break :blk @intCast(i64, target_addr) + rel.addend; } }; + log.debug(" | target_addr = 0x{x}", .{result}); if (rel.length == 3) { mem.writeIntLittle(u64, self.code.items[rel.offset..][0..8], @bitCast(u64, result)); diff --git a/src/link/MachO/DebugSymbols.zig b/src/link/MachO/DebugSymbols.zig index a5c65abed3..4da106eca1 100644 --- a/src/link/MachO/DebugSymbols.zig +++ b/src/link/MachO/DebugSymbols.zig @@ -275,9 +275,12 @@ pub fn flushModule(self: *DebugSymbols, allocator: Allocator, options: link.Opti const sym = switch (reloc.@"type") { .direct_load => self.base.getSymbol(.{ .sym_index = reloc.target, .file = null }), .got_load => blk: { - const got_index = self.base.got_entries_table.get(.{ .sym_index = reloc.target, .file = null }).?; - const got_atom = self.base.got_entries.items[got_index].atom; - break :blk got_atom.getSymbol(self.base); + const got_index = self.base.got_entries_table.get(.{ + .sym_index = reloc.target, + .file = null, + }).?; + const got_entry = self.base.got_entries.items[got_index]; + break :blk got_entry.getSymbol(self.base); }, }; if (sym.n_value == reloc.prev_vaddr) continue; @@ -285,9 +288,12 @@ pub fn flushModule(self: *DebugSymbols, allocator: Allocator, options: link.Opti const sym_name = switch (reloc.@"type") { .direct_load => self.base.getSymbolName(.{ .sym_index = reloc.target, .file = null }), .got_load => blk: { - const got_index = self.base.got_entries_table.get(.{ .sym_index = reloc.target, .file = null }).?; - const got_atom = self.base.got_entries.items[got_index].atom; - break :blk got_atom.getName(self.base); + const got_index = self.base.got_entries_table.get(.{ + .sym_index = reloc.target, + .file = null, + }).?; + const got_entry = self.base.got_entries.items[got_index]; + break :blk got_entry.getName(self.base); }, }; const seg = &self.load_commands.items[self.dwarf_segment_cmd_index.?].segment; diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index 07237d31aa..37b7d60e71 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -410,7 +410,9 @@ pub fn splitIntoAtomsOneShot( next_sym_count += atom_syms.len; assert(atom_syms.len > 0); - const sym_index = atom_syms[0].index; + const sym_index = for (atom_syms) |atom_sym| { + if (atom_sym.getSymbol(context).ext()) break atom_sym.index; + } else atom_syms[0].index; const atom_size = blk: { const end_addr = if (next_sym_count < filtered_syms.len) filtered_syms[next_sym_count].getSymbol(context).n_value @@ -570,12 +572,6 @@ fn createAtomFromSubsection( if (gc_roots) |gcr| { const is_gc_root = blk: { if (sect.isDontDeadStrip()) break :blk true; - if (sect.isDontDeadStripIfReferencesLive()) { - // TODO if isDontDeadStripIfReferencesLive we should analyse the edges - // before making it a GC root - break :blk true; - } - if (mem.eql(u8, "__StaticInit", sect.sectName())) break :blk true; switch (sect.type_()) { macho.S_MOD_INIT_FUNC_POINTERS, macho.S_MOD_TERM_FUNC_POINTERS, @@ -641,3 +637,7 @@ pub fn getSection(self: Object, n_sect: u16) macho.section_64 { assert(n_sect < seg.sections.items.len); return seg.sections.items[n_sect]; } + +pub fn getAtomForSymbol(self: Object, sym_index: u32) ?*Atom { + return self.atom_by_index_table.get(sym_index); +} From 7345976261d4381ed48807f2003709e7ff609b0c Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 21 Jul 2022 09:39:12 +0200 Subject: [PATCH 22/27] macho: sort subsection symbols by seniority --- src/link/MachO.zig | 165 +++++++++++++++++++++----------------- src/link/MachO/Object.zig | 58 ++++++++++---- 2 files changed, 133 insertions(+), 90 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 898ad6732b..49f4c34bb4 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -2940,12 +2940,6 @@ fn createTentativeDefAtoms(self: *MachO) !void { if (global.file) |file| { const object = &self.objects.items[file]; - - try atom.contained.append(gpa, .{ - .sym_index = global.sym_index, - .offset = 0, - }); - try object.managed_atoms.append(gpa, atom); try object.atom_by_index_table.putNoClobber(gpa, global.sym_index, atom); } else { @@ -5594,6 +5588,7 @@ fn gcAtoms(self: *MachO, gc_roots: *std.AutoHashMap(*Atom, void)) !void { const global = atom.getSymbolWithLoc(); const sym = atom.getSymbolPtr(self); + const match = self.getMatchingSectionFromOrdinal(sym.n_sect); if (sym.n_desc == N_DESC_GCED) continue; if (!sym.ext()) { @@ -5605,15 +5600,6 @@ fn gcAtoms(self: *MachO, gc_roots: *std.AutoHashMap(*Atom, void)) !void { } else continue; } - loop = true; - const match = self.getMatchingSectionFromOrdinal(sym.n_sect); - - // TODO don't dedup eh_frame info yet until we actually implement parsing unwind records - if (match.eql(.{ - .seg = self.text_segment_cmd_index, - .sect = self.eh_frame_section_index, - })) continue; - self.logAtom(atom); sym.n_desc = N_DESC_GCED; self.removeAtomFromSection(atom, match); @@ -5644,6 +5630,8 @@ fn gcAtoms(self: *MachO, gc_roots: *std.AutoHashMap(*Atom, void)) !void { const tlv_ptr_sym = tlv_ptr_atom.getSymbolPtr(self); tlv_ptr_sym.n_desc = N_DESC_GCED; } + + loop = true; } } } @@ -6831,7 +6819,6 @@ pub fn generateSymbolStabs( }; const tu_name = try compile_unit.die.getAttrString(&debug_info.inner, dwarf.AT.name); const tu_comp_dir = try compile_unit.die.getAttrString(&debug_info.inner, dwarf.AT.comp_dir); - const source_symtab = object.getSourceSymtab(); // Open scope try locals.ensureUnusedCapacity(3); @@ -6857,71 +6844,27 @@ pub fn generateSymbolStabs( .n_value = object.mtime, }); + var stabs_buf: [4]macho.nlist_64 = undefined; + for (object.managed_atoms.items) |atom| { + const stabs = try self.generateSymbolStabsForSymbol( + atom.getSymbolWithLoc(), + debug_info, + &stabs_buf, + ); + try locals.appendSlice(stabs); + for (atom.contained.items) |sym_at_off| { const sym_loc = SymbolWithLoc{ .sym_index = sym_at_off.sym_index, .file = atom.file, }; - const sym = self.getSymbol(sym_loc); - const sym_name = self.getSymbolName(sym_loc); - if (sym.n_strx == 0) continue; - if (sym.n_desc == N_DESC_GCED) continue; - if (self.symbolIsTemp(sym_loc)) continue; - if (sym_at_off.sym_index >= source_symtab.len) continue; // synthetic, linker generated - - const source_sym = source_symtab[sym_at_off.sym_index]; - const size: ?u64 = size: { - if (source_sym.tentative()) break :size null; - for (debug_info.inner.func_list.items) |func| { - if (func.pc_range) |range| { - if (source_sym.n_value >= range.start and source_sym.n_value < range.end) { - break :size range.end - range.start; - } - } - } - break :size null; - }; - - if (size) |ss| { - try locals.ensureUnusedCapacity(4); - locals.appendAssumeCapacity(.{ - .n_strx = 0, - .n_type = macho.N_BNSYM, - .n_sect = sym.n_sect, - .n_desc = 0, - .n_value = sym.n_value, - }); - locals.appendAssumeCapacity(.{ - .n_strx = try self.strtab.insert(gpa, sym_name), - .n_type = macho.N_FUN, - .n_sect = sym.n_sect, - .n_desc = 0, - .n_value = sym.n_value, - }); - locals.appendAssumeCapacity(.{ - .n_strx = 0, - .n_type = macho.N_FUN, - .n_sect = 0, - .n_desc = 0, - .n_value = ss, - }); - locals.appendAssumeCapacity(.{ - .n_strx = 0, - .n_type = macho.N_ENSYM, - .n_sect = sym.n_sect, - .n_desc = 0, - .n_value = ss, - }); - } else { - try locals.append(.{ - .n_strx = try self.strtab.insert(gpa, sym_name), - .n_type = macho.N_STSYM, - .n_sect = sym.n_sect, - .n_desc = 0, - .n_value = sym.n_value, - }); - } + const contained_stabs = try self.generateSymbolStabsForSymbol( + sym_loc, + debug_info, + &stabs_buf, + ); + try locals.appendSlice(contained_stabs); } } @@ -6935,6 +6878,78 @@ pub fn generateSymbolStabs( }); } +fn generateSymbolStabsForSymbol( + self: *MachO, + sym_loc: SymbolWithLoc, + debug_info: DebugInfo, + buf: *[4]macho.nlist_64, +) ![]const macho.nlist_64 { + const gpa = self.base.allocator; + const object = self.objects.items[sym_loc.file.?]; + const source_symtab = object.getSourceSymtab(); + const sym = self.getSymbol(sym_loc); + const sym_name = self.getSymbolName(sym_loc); + + if (sym.n_strx == 0) return buf[0..0]; + if (sym.n_desc == N_DESC_GCED) return buf[0..0]; + if (self.symbolIsTemp(sym_loc)) return buf[0..0]; + if (sym_loc.sym_index >= source_symtab.len) return buf[0..0]; // synthetic, linker generated + + const source_sym = source_symtab[sym_loc.sym_index]; + const size: ?u64 = size: { + if (source_sym.tentative()) break :size null; + for (debug_info.inner.func_list.items) |func| { + if (func.pc_range) |range| { + if (source_sym.n_value >= range.start and source_sym.n_value < range.end) { + break :size range.end - range.start; + } + } + } + break :size null; + }; + + if (size) |ss| { + buf[0] = .{ + .n_strx = 0, + .n_type = macho.N_BNSYM, + .n_sect = sym.n_sect, + .n_desc = 0, + .n_value = sym.n_value, + }; + buf[1] = .{ + .n_strx = try self.strtab.insert(gpa, sym_name), + .n_type = macho.N_FUN, + .n_sect = sym.n_sect, + .n_desc = 0, + .n_value = sym.n_value, + }; + buf[2] = .{ + .n_strx = 0, + .n_type = macho.N_FUN, + .n_sect = 0, + .n_desc = 0, + .n_value = ss, + }; + buf[3] = .{ + .n_strx = 0, + .n_type = macho.N_ENSYM, + .n_sect = sym.n_sect, + .n_desc = 0, + .n_value = ss, + }; + return buf; + } else { + buf[0] = .{ + .n_strx = try self.strtab.insert(gpa, sym_name), + .n_type = macho.N_STSYM, + .n_sect = sym.n_sect, + .n_desc = 0, + .n_value = sym.n_value, + }; + return buf[0..1]; + } +} + fn snapshotState(self: *MachO) !void { const emit = self.base.options.emit orelse { log.debug("no emit directory found; skipping snapshot...", .{}); diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index 37b7d60e71..f6b50cd0ae 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -197,11 +197,9 @@ const SymbolAtIndex = struct { return mem.sliceTo(@ptrCast([*:0]const u8, ctx.strtab.ptr + sym.n_strx), 0); } + /// Returns whether lhs is less than rhs by allocated address in object file. + /// Undefined symbols are pushed to the back (always evaluate to true). fn lessThan(ctx: Context, lhs_index: SymbolAtIndex, rhs_index: SymbolAtIndex) bool { - // We sort by type: defined < undefined, and - // afterwards by address in each group. Normally, dysymtab should - // be enough to guarantee the sort, but turns out not every compiler - // is kind enough to specify the symbols in the correct order. const lhs = lhs_index.getSymbol(ctx); const rhs = rhs_index.getSymbol(ctx); if (lhs.sect()) { @@ -215,6 +213,29 @@ const SymbolAtIndex = struct { return false; } } + + /// Returns whether lhs is less senior than rhs. The rules are: + /// 1. ext + /// 2. weak + /// 3. local + /// 4. temp (local starting with `l` prefix). + fn lessThanBySeniority(ctx: Context, lhs_index: SymbolAtIndex, rhs_index: SymbolAtIndex) bool { + const lhs = lhs_index.getSymbol(ctx); + const rhs = rhs_index.getSymbol(ctx); + if (!rhs.ext()) { + const lhs_name = lhs_index.getSymbolName(ctx); + return mem.startsWith(u8, lhs_name, "l") or mem.startsWith(u8, lhs_name, "L"); + } else if (rhs.pext() or rhs.weakDef()) { + return !lhs.ext(); + } else { + return false; + } + } + + /// Like lessThanBySeniority but negated. + fn greaterThanBySeniority(ctx: Context, lhs_index: SymbolAtIndex, rhs_index: SymbolAtIndex) bool { + return !lessThanBySeniority(ctx, lhs_index, rhs_index); + } }; fn filterSymbolsByAddress( @@ -295,6 +316,10 @@ pub fn splitIntoAtomsOneShot( sorted_all_syms.appendAssumeCapacity(.{ .index = @intCast(u32, index) }); } + // We sort by type: defined < undefined, and + // afterwards by address in each group. Normally, dysymtab should + // be enough to guarantee the sort, but turns out not every compiler + // is kind enough to specify the symbols in the correct order. sort.sort(SymbolAtIndex, sorted_all_syms.items, context, SymbolAtIndex.lessThan); // Well, shit, sometimes compilers skip the dysymtab load command altogether, meaning we @@ -409,10 +434,18 @@ pub fn splitIntoAtomsOneShot( ); next_sym_count += atom_syms.len; + // We want to bubble up the first externally defined symbol here. assert(atom_syms.len > 0); - const sym_index = for (atom_syms) |atom_sym| { - if (atom_sym.getSymbol(context).ext()) break atom_sym.index; - } else atom_syms[0].index; + var sorted_atom_syms = std.ArrayList(SymbolAtIndex).init(gpa); + defer sorted_atom_syms.deinit(); + try sorted_atom_syms.appendSlice(atom_syms); + sort.sort( + SymbolAtIndex, + sorted_atom_syms.items, + context, + SymbolAtIndex.greaterThanBySeniority, + ); + const atom_size = blk: { const end_addr = if (next_sym_count < filtered_syms.len) filtered_syms[next_sym_count].getSymbol(context).n_value @@ -432,12 +465,12 @@ pub fn splitIntoAtomsOneShot( const atom = try self.createAtomFromSubsection( macho_file, object_id, - sym_index, + sorted_atom_syms.items[0].index, atom_size, atom_align, atom_code, relocs, - atom_syms[1..], + sorted_atom_syms.items[1..], match, sect, gc_roots, @@ -552,12 +585,7 @@ fn createAtomFromSubsection( // the filtered symbols and note which symbol is contained within so that // we can properly allocate addresses down the line. // While we're at it, we need to update segment,section mapping of each symbol too. - try atom.contained.ensureTotalCapacity(gpa, indexes.len + 1); - atom.contained.appendAssumeCapacity(.{ - .sym_index = sym_index, - .offset = 0, - }); - + try atom.contained.ensureTotalCapacity(gpa, indexes.len); for (indexes) |inner_sym_index| { const inner_sym = &self.symtab.items[inner_sym_index.index]; inner_sym.n_sect = macho_file.getSectionOrdinal(match); From ca746566851aa5b12120fc76c69a0a2278a31f4e Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 21 Jul 2022 13:30:15 +0200 Subject: [PATCH 23/27] macho: move GC code into dead_strip.zig module Implement marking live atoms that reference other live atoms if required by the compiler (via section attribute). --- CMakeLists.txt | 2 + src/link/MachO.zig | 255 ++--------------------------- src/link/MachO/Atom.zig | 8 +- src/link/MachO/Object.zig | 53 ++---- src/link/MachO/dead_strip.zig | 293 ++++++++++++++++++++++++++++++++++ 5 files changed, 327 insertions(+), 284 deletions(-) create mode 100644 src/link/MachO/dead_strip.zig diff --git a/CMakeLists.txt b/CMakeLists.txt index b1e34b6068..0ec714322f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -758,10 +758,12 @@ set(ZIG_STAGE2_SOURCES "${CMAKE_SOURCE_DIR}/src/link/MachO/Object.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/Trie.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/bind.zig" + "${CMAKE_SOURCE_DIR}/src/link/MachO/dead_strip.zig" "${CMAKE_SOURCE_DIR}/src/link/Plan9.zig" "${CMAKE_SOURCE_DIR}/src/link/Plan9/aout.zig" "${CMAKE_SOURCE_DIR}/src/link/Wasm.zig" "${CMAKE_SOURCE_DIR}/src/link/msdos-stub.bin" + "${CMAKE_SOURCE_DIR}/src/link/strtab.zig" "${CMAKE_SOURCE_DIR}/src/link/tapi.zig" "${CMAKE_SOURCE_DIR}/src/link/tapi/Tokenizer.zig" "${CMAKE_SOURCE_DIR}/src/link/tapi/parse.zig" diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 49f4c34bb4..4c344c6260 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -16,6 +16,7 @@ const meta = std.meta; const aarch64 = @import("../arch/aarch64/bits.zig"); const bind = @import("MachO/bind.zig"); const codegen = @import("../codegen.zig"); +const dead_strip = @import("MachO/dead_strip.zig"); const link = @import("../link.zig"); const llvm_backend = @import("../codegen/llvm.zig"); const target_util = @import("../target.zig"); @@ -709,7 +710,7 @@ fn linkOneShot(self: *MachO, comp: *Compilation, prog_node: *std.Progress.Node) const is_dyn_lib = self.base.options.link_mode == .Dynamic and is_lib; const is_exe_or_dyn_lib = is_dyn_lib or self.base.options.output_mode == .Exe; const stack_size = self.base.options.stack_size_override orelse 0; - const dead_strip = self.base.options.gc_sections orelse false; + const gc_sections = self.base.options.gc_sections orelse false; const id_symlink_basename = "zld.id"; @@ -741,7 +742,7 @@ fn linkOneShot(self: *MachO, comp: *Compilation, prog_node: *std.Progress.Node) man.hash.addOptional(self.base.options.search_strategy); man.hash.addOptional(self.base.options.headerpad_size); man.hash.add(self.base.options.headerpad_max_install_names); - man.hash.add(dead_strip); + man.hash.add(gc_sections); man.hash.add(self.base.options.dead_strip_dylibs); man.hash.add(self.base.options.strip); man.hash.addListOfBytes(self.base.options.lib_dirs); @@ -1068,7 +1069,7 @@ fn linkOneShot(self: *MachO, comp: *Compilation, prog_node: *std.Progress.Node) try argv.append("-headerpad_max_install_names"); } - if (dead_strip) { + if (gc_sections) { try argv.append("-dead_strip"); } @@ -1186,19 +1187,12 @@ fn linkOneShot(self: *MachO, comp: *Compilation, prog_node: *std.Progress.Node) try self.createTentativeDefAtoms(); - if (dead_strip) { - var gc_roots = std.AutoHashMap(*Atom, void).init(gpa); - defer gc_roots.deinit(); + for (self.objects.items) |*object, object_id| { + try object.splitIntoAtomsOneShot(self, @intCast(u32, object_id)); + } - for (self.objects.items) |*object, object_id| { - try object.splitIntoAtomsOneShot(self, @intCast(u32, object_id), &gc_roots); - } - - try self.gcAtoms(&gc_roots); - } else { - for (self.objects.items) |*object, object_id| { - try object.splitIntoAtomsOneShot(self, @intCast(u32, object_id), null); - } + if (gc_sections) { + try dead_strip.gcAtoms(self); } try self.pruneAndSortSections(); @@ -5504,227 +5498,6 @@ fn pruneAndSortSections(self: *MachO) !void { self.sections_order_dirty = false; } -fn gcAtoms(self: *MachO, gc_roots: *std.AutoHashMap(*Atom, void)) !void { - assert(self.base.options.gc_sections.?); - - const gpa = self.base.allocator; - - if (self.base.options.output_mode == .Exe) { - // Add entrypoint as GC root - const global = try self.getEntryPoint(); - const atom = self.getAtomForSymbol(global).?; // panic here means fatal error - _ = try gc_roots.getOrPut(atom); - } else { - assert(self.base.options.output_mode == .Lib); - // Add exports as GC roots - for (self.globals.values()) |global| { - const sym = self.getSymbol(global); - if (!sym.sect()) continue; - const atom = self.getAtomForSymbol(global) orelse { - log.debug("skipping {s}", .{self.getSymbolName(global)}); - continue; - }; - _ = try gc_roots.getOrPut(atom); - } - } - // TODO just a temp until we learn how to parse unwind records - if (self.globals.get("___gxx_personality_v0")) |global| { - if (self.getAtomForSymbol(global)) |atom| { - _ = try gc_roots.getOrPut(atom); - } - } - - var stack = std.ArrayList(*Atom).init(gpa); - defer stack.deinit(); - try stack.ensureUnusedCapacity(gc_roots.count()); - - var alive = std.AutoHashMap(*Atom, void).init(gpa); - defer alive.deinit(); - try alive.ensureUnusedCapacity(gc_roots.count()); - - log.debug("GC roots:", .{}); - var gc_roots_it = gc_roots.keyIterator(); - while (gc_roots_it.next()) |gc_root| { - self.logAtom(gc_root.*); - stack.appendAssumeCapacity(gc_root.*); - alive.putAssumeCapacity(gc_root.*, {}); - } - - while (stack.popOrNull()) |source_atom| { - for (source_atom.relocs.items) |rel| { - if (rel.getTargetAtom(self)) |target_atom| { - const gop = try alive.getOrPut(target_atom); - if (!gop.found_existing) { - log.debug(" retained ATOM(%{d}, '{s}') in object({d})", .{ - target_atom.sym_index, - target_atom.getName(self), - target_atom.file, - }); - log.debug(" referenced by ATOM(%{d}, '{s}') in object({d})", .{ - source_atom.sym_index, - source_atom.getName(self), - source_atom.file, - }); - try stack.append(target_atom); - } - } - } - } - // TODO live support - - // Any section that ends up here will be updated, that is, - // its size and alignment recalculated. - var gc_sections = std.AutoHashMap(MatchingSection, void).init(gpa); - defer gc_sections.deinit(); - - var loop: bool = true; - while (loop) { - loop = false; - - for (self.objects.items) |object| { - for (object.getSourceSymtab()) |_, source_index| { - const atom = object.getAtomForSymbol(@intCast(u32, source_index)) orelse continue; - if (alive.contains(atom)) continue; - - const global = atom.getSymbolWithLoc(); - const sym = atom.getSymbolPtr(self); - const match = self.getMatchingSectionFromOrdinal(sym.n_sect); - - if (sym.n_desc == N_DESC_GCED) continue; - if (!sym.ext()) { - for (atom.relocs.items) |rel| { - if (rel.getTargetAtom(self)) |target_atom| { - const target_sym = target_atom.getSymbol(self); - if (target_sym.n_desc == N_DESC_GCED) break; - } - } else continue; - } - - self.logAtom(atom); - sym.n_desc = N_DESC_GCED; - self.removeAtomFromSection(atom, match); - _ = try gc_sections.put(match, {}); - - for (atom.contained.items) |sym_off| { - const inner = self.getSymbolPtr(.{ - .sym_index = sym_off.sym_index, - .file = atom.file, - }); - inner.n_desc = N_DESC_GCED; - } - - if (self.got_entries_table.contains(global)) { - const got_atom = self.getGotAtomForSymbol(global).?; - const got_sym = got_atom.getSymbolPtr(self); - got_sym.n_desc = N_DESC_GCED; - } - - if (self.stubs_table.contains(global)) { - const stubs_atom = self.getStubsAtomForSymbol(global).?; - const stubs_sym = stubs_atom.getSymbolPtr(self); - stubs_sym.n_desc = N_DESC_GCED; - } - - if (self.tlv_ptr_entries_table.contains(global)) { - const tlv_ptr_atom = self.getTlvPtrAtomForSymbol(global).?; - const tlv_ptr_sym = tlv_ptr_atom.getSymbolPtr(self); - tlv_ptr_sym.n_desc = N_DESC_GCED; - } - - loop = true; - } - } - } - - for (self.got_entries.items) |entry| { - const sym = entry.getSymbol(self); - if (sym.n_desc != N_DESC_GCED) continue; - - // TODO tombstone - const atom = entry.getAtom(self); - const match = self.getMatchingSectionFromOrdinal(sym.n_sect); - self.removeAtomFromSection(atom, match); - _ = try gc_sections.put(match, {}); - _ = self.got_entries_table.remove(entry.target); - } - - for (self.stubs.items) |entry| { - const sym = entry.getSymbol(self); - if (sym.n_desc != N_DESC_GCED) continue; - - // TODO tombstone - const atom = entry.getAtom(self); - const match = self.getMatchingSectionFromOrdinal(sym.n_sect); - self.removeAtomFromSection(atom, match); - _ = try gc_sections.put(match, {}); - _ = self.stubs_table.remove(entry.target); - } - - for (self.tlv_ptr_entries.items) |entry| { - const sym = entry.getSymbol(self); - if (sym.n_desc != N_DESC_GCED) continue; - - // TODO tombstone - const atom = entry.getAtom(self); - const match = self.getMatchingSectionFromOrdinal(sym.n_sect); - self.removeAtomFromSection(atom, match); - _ = try gc_sections.put(match, {}); - _ = self.tlv_ptr_entries_table.remove(entry.target); - } - - var gc_sections_it = gc_sections.iterator(); - while (gc_sections_it.next()) |entry| { - const match = entry.key_ptr.*; - const sect = self.getSectionPtr(match); - if (sect.size == 0) continue; // Pruning happens automatically in next step. - - sect.@"align" = 0; - sect.size = 0; - - var atom = self.atoms.get(match).?; - - while (atom.prev) |prev| { - atom = prev; - } - - while (true) { - const atom_alignment = try math.powi(u32, 2, atom.alignment); - const aligned_end_addr = mem.alignForwardGeneric(u64, sect.size, atom_alignment); - const padding = aligned_end_addr - sect.size; - sect.size += padding + atom.size; - sect.@"align" = @maximum(sect.@"align", atom.alignment); - - if (atom.next) |next| { - atom = next; - } else break; - } - } -} - -fn removeAtomFromSection(self: *MachO, atom: *Atom, match: MatchingSection) void { - const sect = self.getSectionPtr(match); - - // If we want to enable GC for incremental codepath, we need to take into - // account any padding that might have been left here. - sect.size -= atom.size; - - if (atom.prev) |prev| { - prev.next = atom.next; - } - if (atom.next) |next| { - next.prev = atom.prev; - } else { - const last = self.atoms.getPtr(match).?; - if (atom.prev) |prev| { - last.* = prev; - } else { - // The section will be GCed in the next step. - last.* = undefined; - sect.size = 0; - } - } -} - fn updateSectionOrdinals(self: *MachO) !void { if (!self.sections_order_dirty) return; @@ -6217,20 +5990,18 @@ fn writeDataInCode(self: *MachO) !void { for (self.objects.items) |object| { const dice = object.parseDataInCode() orelse continue; - const source_symtab = object.getSourceSymtab(); try out_dice.ensureUnusedCapacity(dice.len); for (object.managed_atoms.items) |atom| { const sym = atom.getSymbol(self); if (sym.n_desc == N_DESC_GCED) continue; - if (atom.sym_index >= source_symtab.len) continue; // synthetic, linker generated const match = self.getMatchingSectionFromOrdinal(sym.n_sect); if (match.seg != self.text_segment_cmd_index.? and match.sect != self.text_section_index.?) { continue; } - const source_sym = source_symtab[atom.sym_index]; + const source_sym = object.getSourceSymbol(atom.sym_index) orelse continue; const source_addr = math.cast(u32, source_sym.n_value) orelse return error.Overflow; const filtered_dice = filterDataInCode(dice, source_addr, source_addr + atom.size); const base = math.cast(u32, sym.n_value - text_sect.addr + text_sect.offset) orelse @@ -6886,16 +6657,14 @@ fn generateSymbolStabsForSymbol( ) ![]const macho.nlist_64 { const gpa = self.base.allocator; const object = self.objects.items[sym_loc.file.?]; - const source_symtab = object.getSourceSymtab(); const sym = self.getSymbol(sym_loc); const sym_name = self.getSymbolName(sym_loc); if (sym.n_strx == 0) return buf[0..0]; if (sym.n_desc == N_DESC_GCED) return buf[0..0]; if (self.symbolIsTemp(sym_loc)) return buf[0..0]; - if (sym_loc.sym_index >= source_symtab.len) return buf[0..0]; // synthetic, linker generated - const source_sym = source_symtab[sym_loc.sym_index]; + const source_sym = object.getSourceSymbol(sym_loc.sym_index) orelse return buf[0..0]; const size: ?u64 = size: { if (source_sym.tentative()) break :size null; for (debug_info.inner.func_list.items) |func| { @@ -7353,7 +7122,7 @@ fn logAtoms(self: *MachO) void { } } -fn logAtom(self: *MachO, atom: *const Atom) void { +pub fn logAtom(self: *MachO, atom: *const Atom) void { const sym = atom.getSymbol(self); const sym_name = atom.getName(self); log.debug(" ATOM(%{d}, '{s}') @ {x} (sizeof({x}), alignof({x})) in object({d}) in sect({d})", .{ diff --git a/src/link/MachO/Atom.zig b/src/link/MachO/Atom.zig index acaeab7a88..2f60702423 100644 --- a/src/link/MachO/Atom.zig +++ b/src/link/MachO/Atom.zig @@ -308,7 +308,7 @@ pub fn parseRelocs(self: *Atom, relocs: []const macho.relocation_info, context: if (rel.r_extern == 0) { const sect_id = @intCast(u16, rel.r_symbolnum - 1); const sym_index = object.sections_as_symbols.get(sect_id) orelse blk: { - const sect = object.getSection(sect_id); + const sect = object.getSourceSection(sect_id); const match = (try context.macho_file.getMatchingSection(sect)) orelse unreachable; const sym_index = @intCast(u32, object.symtab.items.len); @@ -360,7 +360,7 @@ pub fn parseRelocs(self: *Atom, relocs: []const macho.relocation_info, context: else mem.readIntLittle(i32, self.code.items[offset..][0..4]); if (rel.r_extern == 0) { - const target_sect_base_addr = object.getSection(@intCast(u16, rel.r_symbolnum - 1)).addr; + const target_sect_base_addr = object.getSourceSection(@intCast(u16, rel.r_symbolnum - 1)).addr; addend -= @intCast(i64, target_sect_base_addr); } try self.addPtrBindingOrRebase(rel, target, context); @@ -392,7 +392,7 @@ pub fn parseRelocs(self: *Atom, relocs: []const macho.relocation_info, context: else mem.readIntLittle(i32, self.code.items[offset..][0..4]); if (rel.r_extern == 0) { - const target_sect_base_addr = object.getSection(@intCast(u16, rel.r_symbolnum - 1)).addr; + const target_sect_base_addr = object.getSourceSection(@intCast(u16, rel.r_symbolnum - 1)).addr; addend -= @intCast(i64, target_sect_base_addr); } try self.addPtrBindingOrRebase(rel, target, context); @@ -413,7 +413,7 @@ pub fn parseRelocs(self: *Atom, relocs: []const macho.relocation_info, context: if (rel.r_extern == 0) { // Note for the future self: when r_extern == 0, we should subtract correction from the // addend. - const target_sect_base_addr = object.getSection(@intCast(u16, rel.r_symbolnum - 1)).addr; + const target_sect_base_addr = object.getSourceSection(@intCast(u16, rel.r_symbolnum - 1)).addr; // We need to add base_offset, i.e., offset of this atom wrt to the source // section. Otherwise, the addend will over-/under-shoot. addend += @intCast(i64, context.base_addr + offset + 4) - diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index f6b50cd0ae..5e10c0c0a3 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -285,12 +285,7 @@ fn filterRelocs( } /// Splits object into atoms assuming one-shot linking mode. -pub fn splitIntoAtomsOneShot( - self: *Object, - macho_file: *MachO, - object_id: u32, - gc_roots: ?*std.AutoHashMap(*Atom, void), -) !void { +pub fn splitIntoAtomsOneShot(self: *Object, macho_file: *MachO, object_id: u32) !void { assert(macho_file.mode == .one_shot); const tracy = trace(@src()); @@ -338,10 +333,7 @@ pub fn splitIntoAtomsOneShot( // We only care about defined symbols, so filter every other out. const sorted_syms = sorted_all_syms.items[0..iundefsym]; - const dead_strip = macho_file.base.options.gc_sections orelse false; - const subsections_via_symbols = self.header.flags & macho.MH_SUBSECTIONS_VIA_SYMBOLS != 0 and - (macho_file.base.options.optimize_mode != .Debug or dead_strip); - // const subsections_via_symbols = self.header.flags & macho.MH_SUBSECTIONS_VIA_SYMBOLS != 0; + const subsections_via_symbols = self.header.flags & macho.MH_SUBSECTIONS_VIA_SYMBOLS != 0; for (seg.sections.items) |sect, id| { const sect_id = @intCast(u8, id); @@ -417,7 +409,6 @@ pub fn splitIntoAtomsOneShot( &.{}, match, sect, - gc_roots, ); try macho_file.addAtomToSection(atom, match); } @@ -473,7 +464,6 @@ pub fn splitIntoAtomsOneShot( sorted_atom_syms.items[1..], match, sect, - gc_roots, ); if (arch == .x86_64 and addr == sect.addr) { @@ -528,7 +518,6 @@ pub fn splitIntoAtomsOneShot( filtered_syms, match, sect, - gc_roots, ); try macho_file.addAtomToSection(atom, match); } @@ -547,7 +536,6 @@ fn createAtomFromSubsection( indexes: []const SymbolAtIndex, match: MatchingSection, sect: macho.section_64, - gc_roots: ?*std.AutoHashMap(*Atom, void), ) !*Atom { const gpa = macho_file.base.allocator; const sym = self.symtab.items[sym_index]; @@ -597,21 +585,6 @@ fn createAtomFromSubsection( try self.atom_by_index_table.putNoClobber(gpa, inner_sym_index.index, atom); } - if (gc_roots) |gcr| { - const is_gc_root = blk: { - if (sect.isDontDeadStrip()) break :blk true; - switch (sect.type_()) { - macho.S_MOD_INIT_FUNC_POINTERS, - macho.S_MOD_TERM_FUNC_POINTERS, - => break :blk true, - else => break :blk false, - } - }; - if (is_gc_root) { - try gcr.putNoClobber(atom, {}); - } - } - return atom; } @@ -633,6 +606,18 @@ pub fn getSourceSymtab(self: Object) []const macho.nlist_64 { ); } +pub fn getSourceSymbol(self: Object, index: u32) ?macho.nlist_64 { + const symtab = self.getSourceSymtab(); + if (index >= symtab.len) return null; + return symtab[index]; +} + +pub fn getSourceSection(self: Object, index: u16) macho.section_64 { + const seg = self.load_commands.items[self.segment_cmd_index.?].segment; + assert(index < seg.sections.items.len); + return seg.sections.items[index]; +} + pub fn parseDataInCode(self: Object) ?[]const macho.data_in_code_entry { const index = self.data_in_code_cmd_index orelse return null; const data_in_code = self.load_commands.items[index].linkedit_data; @@ -643,8 +628,8 @@ pub fn parseDataInCode(self: Object) ?[]const macho.data_in_code_entry { ); } -pub fn getSectionContents(self: Object, sect_id: u16) error{Overflow}![]const u8 { - const sect = self.getSection(sect_id); +pub fn getSectionContents(self: Object, index: u16) error{Overflow}![]const u8 { + const sect = self.getSourceSection(index); const size = math.cast(usize, sect.size) orelse return error.Overflow; log.debug("getting {s},{s} data at 0x{x} - 0x{x}", .{ sect.segName(), @@ -660,12 +645,6 @@ pub fn getString(self: Object, off: u32) []const u8 { return mem.sliceTo(@ptrCast([*:0]const u8, self.strtab.ptr + off), 0); } -pub fn getSection(self: Object, n_sect: u16) macho.section_64 { - const seg = self.load_commands.items[self.segment_cmd_index.?].segment; - assert(n_sect < seg.sections.items.len); - return seg.sections.items[n_sect]; -} - pub fn getAtomForSymbol(self: Object, sym_index: u32) ?*Atom { return self.atom_by_index_table.get(sym_index); } diff --git a/src/link/MachO/dead_strip.zig b/src/link/MachO/dead_strip.zig new file mode 100644 index 0000000000..a953e5bc19 --- /dev/null +++ b/src/link/MachO/dead_strip.zig @@ -0,0 +1,293 @@ +const std = @import("std"); +const assert = std.debug.assert; +const log = std.log.scoped(.dead_strip); +const macho = std.macho; +const math = std.math; +const mem = std.mem; + +const Allocator = mem.Allocator; +const Atom = @import("Atom.zig"); +const MachO = @import("../MachO.zig"); +const MatchingSection = MachO.MatchingSection; + +pub fn gcAtoms(macho_file: *MachO) !void { + assert(macho_file.base.options.gc_sections.?); + + const gpa = macho_file.base.allocator; + var arena_allocator = std.heap.ArenaAllocator.init(gpa); + defer arena_allocator.deinit(); + const arena = arena_allocator.allocator(); + + var roots = std.AutoHashMap(*Atom, void).init(arena); + try collectRoots(&roots, macho_file); + + var alive = std.AutoHashMap(*Atom, void).init(arena); + try mark(roots, &alive, macho_file); + + try prune(arena, alive, macho_file); +} + +fn removeAtomFromSection(atom: *Atom, match: MatchingSection, macho_file: *MachO) void { + const sect = macho_file.getSectionPtr(match); + + // If we want to enable GC for incremental codepath, we need to take into + // account any padding that might have been left here. + sect.size -= atom.size; + + if (atom.prev) |prev| { + prev.next = atom.next; + } + if (atom.next) |next| { + next.prev = atom.prev; + } else { + const last = macho_file.atoms.getPtr(match).?; + if (atom.prev) |prev| { + last.* = prev; + } else { + // The section will be GCed in the next step. + last.* = undefined; + sect.size = 0; + } + } +} + +fn collectRoots(roots: *std.AutoHashMap(*Atom, void), macho_file: *MachO) !void { + const output_mode = macho_file.base.options.output_mode; + + switch (output_mode) { + .Exe => { + // Add entrypoint as GC root + const global = try macho_file.getEntryPoint(); + const atom = macho_file.getAtomForSymbol(global).?; // panic here means fatal error + _ = try roots.getOrPut(atom); + }, + else => |other| { + assert(other == .Lib); + // Add exports as GC roots + for (macho_file.globals.values()) |global| { + const sym = macho_file.getSymbol(global); + if (!sym.sect()) continue; + const atom = macho_file.getAtomForSymbol(global) orelse { + log.debug("skipping {s}", .{macho_file.getSymbolName(global)}); + continue; + }; + _ = try roots.getOrPut(atom); + log.debug("adding root", .{}); + macho_file.logAtom(atom); + } + }, + } + + // TODO just a temp until we learn how to parse unwind records + if (macho_file.globals.get("___gxx_personality_v0")) |global| { + if (macho_file.getAtomForSymbol(global)) |atom| { + _ = try roots.getOrPut(atom); + log.debug("adding root", .{}); + macho_file.logAtom(atom); + } + } + + for (macho_file.objects.items) |object| { + for (object.managed_atoms.items) |atom| { + const source_sym = object.getSourceSymbol(atom.sym_index) orelse continue; + if (source_sym.tentative()) continue; + const source_sect = object.getSourceSection(source_sym.n_sect - 1); + const is_gc_root = blk: { + if (source_sect.isDontDeadStrip()) break :blk true; + switch (source_sect.type_()) { + macho.S_MOD_INIT_FUNC_POINTERS, + macho.S_MOD_TERM_FUNC_POINTERS, + => break :blk true, + else => break :blk false, + } + }; + if (is_gc_root) { + try roots.putNoClobber(atom, {}); + log.debug("adding root", .{}); + macho_file.logAtom(atom); + } + } + } +} + +fn markLive(atom: *Atom, alive: *std.AutoHashMap(*Atom, void), macho_file: *MachO) anyerror!void { + const gop = try alive.getOrPut(atom); + if (gop.found_existing) return; + + log.debug("marking live", .{}); + macho_file.logAtom(atom); + + for (atom.relocs.items) |rel| { + const target_atom = rel.getTargetAtom(macho_file) orelse continue; + try markLive(target_atom, alive, macho_file); + } +} + +fn refersLive(atom: *Atom, alive: std.AutoHashMap(*Atom, void), macho_file: *MachO) bool { + for (atom.relocs.items) |rel| { + const target_atom = rel.getTargetAtom(macho_file) orelse continue; + if (alive.contains(target_atom)) return true; + } + return false; +} + +fn refersDead(atom: *Atom, macho_file: *MachO) bool { + for (atom.relocs.items) |rel| { + const target_atom = rel.getTargetAtom(macho_file) orelse continue; + const target_sym = target_atom.getSymbol(macho_file); + if (target_sym.n_desc == MachO.N_DESC_GCED) return true; + } + return false; +} + +fn mark( + roots: std.AutoHashMap(*Atom, void), + alive: *std.AutoHashMap(*Atom, void), + macho_file: *MachO, +) !void { + try alive.ensureUnusedCapacity(roots.count()); + + var it = roots.keyIterator(); + while (it.next()) |root| { + try markLive(root.*, alive, macho_file); + } + + var loop: bool = true; + while (loop) { + loop = false; + + for (macho_file.objects.items) |object| { + for (object.managed_atoms.items) |atom| { + if (alive.contains(atom)) continue; + const source_sym = object.getSourceSymbol(atom.sym_index) orelse continue; + if (source_sym.tentative()) continue; + const source_sect = object.getSourceSection(source_sym.n_sect - 1); + if (source_sect.isDontDeadStripIfReferencesLive() and refersLive(atom, alive.*, macho_file)) { + try markLive(atom, alive, macho_file); + loop = true; + } + } + } + } +} + +fn prune(arena: Allocator, alive: std.AutoHashMap(*Atom, void), macho_file: *MachO) !void { + // Any section that ends up here will be updated, that is, + // its size and alignment recalculated. + var gc_sections = std.AutoHashMap(MatchingSection, void).init(arena); + var loop: bool = true; + while (loop) { + loop = false; + + for (macho_file.objects.items) |object| { + for (object.getSourceSymtab()) |_, source_index| { + const atom = object.getAtomForSymbol(@intCast(u32, source_index)) orelse continue; + if (alive.contains(atom)) continue; + + const global = atom.getSymbolWithLoc(); + const sym = atom.getSymbolPtr(macho_file); + const match = macho_file.getMatchingSectionFromOrdinal(sym.n_sect); + + if (sym.n_desc == MachO.N_DESC_GCED) continue; + if (!sym.ext() and !refersDead(atom, macho_file)) continue; + + macho_file.logAtom(atom); + sym.n_desc = MachO.N_DESC_GCED; + removeAtomFromSection(atom, match, macho_file); + _ = try gc_sections.put(match, {}); + + for (atom.contained.items) |sym_off| { + const inner = macho_file.getSymbolPtr(.{ + .sym_index = sym_off.sym_index, + .file = atom.file, + }); + inner.n_desc = MachO.N_DESC_GCED; + } + + if (macho_file.got_entries_table.contains(global)) { + const got_atom = macho_file.getGotAtomForSymbol(global).?; + const got_sym = got_atom.getSymbolPtr(macho_file); + got_sym.n_desc = MachO.N_DESC_GCED; + } + + if (macho_file.stubs_table.contains(global)) { + const stubs_atom = macho_file.getStubsAtomForSymbol(global).?; + const stubs_sym = stubs_atom.getSymbolPtr(macho_file); + stubs_sym.n_desc = MachO.N_DESC_GCED; + } + + if (macho_file.tlv_ptr_entries_table.contains(global)) { + const tlv_ptr_atom = macho_file.getTlvPtrAtomForSymbol(global).?; + const tlv_ptr_sym = tlv_ptr_atom.getSymbolPtr(macho_file); + tlv_ptr_sym.n_desc = MachO.N_DESC_GCED; + } + + loop = true; + } + } + } + + for (macho_file.got_entries.items) |entry| { + const sym = entry.getSymbol(macho_file); + if (sym.n_desc != MachO.N_DESC_GCED) continue; + + // TODO tombstone + const atom = entry.getAtom(macho_file); + const match = macho_file.getMatchingSectionFromOrdinal(sym.n_sect); + removeAtomFromSection(atom, match, macho_file); + _ = try gc_sections.put(match, {}); + _ = macho_file.got_entries_table.remove(entry.target); + } + + for (macho_file.stubs.items) |entry| { + const sym = entry.getSymbol(macho_file); + if (sym.n_desc != MachO.N_DESC_GCED) continue; + + // TODO tombstone + const atom = entry.getAtom(macho_file); + const match = macho_file.getMatchingSectionFromOrdinal(sym.n_sect); + removeAtomFromSection(atom, match, macho_file); + _ = try gc_sections.put(match, {}); + _ = macho_file.stubs_table.remove(entry.target); + } + + for (macho_file.tlv_ptr_entries.items) |entry| { + const sym = entry.getSymbol(macho_file); + if (sym.n_desc != MachO.N_DESC_GCED) continue; + + // TODO tombstone + const atom = entry.getAtom(macho_file); + const match = macho_file.getMatchingSectionFromOrdinal(sym.n_sect); + removeAtomFromSection(atom, match, macho_file); + _ = try gc_sections.put(match, {}); + _ = macho_file.tlv_ptr_entries_table.remove(entry.target); + } + + var gc_sections_it = gc_sections.iterator(); + while (gc_sections_it.next()) |entry| { + const match = entry.key_ptr.*; + const sect = macho_file.getSectionPtr(match); + if (sect.size == 0) continue; // Pruning happens automatically in next step. + + sect.@"align" = 0; + sect.size = 0; + + var atom = macho_file.atoms.get(match).?; + + while (atom.prev) |prev| { + atom = prev; + } + + while (true) { + const atom_alignment = try math.powi(u32, 2, atom.alignment); + const aligned_end_addr = mem.alignForwardGeneric(u64, sect.size, atom_alignment); + const padding = aligned_end_addr - sect.size; + sect.size += padding + atom.size; + sect.@"align" = @maximum(sect.@"align", atom.alignment); + + if (atom.next) |next| { + atom = next; + } else break; + } + } +} From 0901e4805c81873defa0eb595a126473eb09acab Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 21 Jul 2022 22:21:34 +0200 Subject: [PATCH 24/27] macho: fix zig cc and c++ using stage2 llvm --- src/link/MachO.zig | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 4c344c6260..987098cd1f 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -432,7 +432,7 @@ pub fn createEmpty(gpa: Allocator, options: link.Options) !*MachO { CodeSignature.init(page_size) else null, - .mode = if (use_stage1 or use_llvm or options.cache_mode == .whole) + .mode = if (use_stage1 or use_llvm or options.module == null or options.cache_mode == .whole) .one_shot else .incremental, @@ -442,6 +442,8 @@ pub fn createEmpty(gpa: Allocator, options: link.Options) !*MachO { self.llvm_object = try LlvmObject.create(gpa, options); } + log.debug("selected linker mode '{s}'", .{@tagName(self.mode)}); + return self; } From f8458a549ba7fbd29b3d045019313704f1305e89 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Fri, 22 Jul 2022 14:20:28 +0200 Subject: [PATCH 25/27] macho: dead_strip defaults: off for debug, on for release --- src/link/MachO.zig | 3 ++- src/link/MachO/dead_strip.zig | 2 -- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 987098cd1f..e24d49cc9c 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -712,7 +712,8 @@ fn linkOneShot(self: *MachO, comp: *Compilation, prog_node: *std.Progress.Node) const is_dyn_lib = self.base.options.link_mode == .Dynamic and is_lib; const is_exe_or_dyn_lib = is_dyn_lib or self.base.options.output_mode == .Exe; const stack_size = self.base.options.stack_size_override orelse 0; - const gc_sections = self.base.options.gc_sections orelse false; + const is_debug_build = self.base.options.optimize_mode == .Debug; + const gc_sections = self.base.options.gc_sections orelse !is_debug_build; const id_symlink_basename = "zld.id"; diff --git a/src/link/MachO/dead_strip.zig b/src/link/MachO/dead_strip.zig index a953e5bc19..b3454dda57 100644 --- a/src/link/MachO/dead_strip.zig +++ b/src/link/MachO/dead_strip.zig @@ -11,8 +11,6 @@ const MachO = @import("../MachO.zig"); const MatchingSection = MachO.MatchingSection; pub fn gcAtoms(macho_file: *MachO) !void { - assert(macho_file.base.options.gc_sections.?); - const gpa = macho_file.base.allocator; var arena_allocator = std.heap.ArenaAllocator.init(gpa); defer arena_allocator.deinit(); From 600348283fa5ea9646f91997e0a32f4632ca30b8 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Fri, 22 Jul 2022 21:19:33 +0200 Subject: [PATCH 26/27] macho: include __StaticInit input sections as GC roots --- src/link/MachO/dead_strip.zig | 1 + 1 file changed, 1 insertion(+) diff --git a/src/link/MachO/dead_strip.zig b/src/link/MachO/dead_strip.zig index b3454dda57..909a0450d6 100644 --- a/src/link/MachO/dead_strip.zig +++ b/src/link/MachO/dead_strip.zig @@ -92,6 +92,7 @@ fn collectRoots(roots: *std.AutoHashMap(*Atom, void), macho_file: *MachO) !void const source_sect = object.getSourceSection(source_sym.n_sect - 1); const is_gc_root = blk: { if (source_sect.isDontDeadStrip()) break :blk true; + if (mem.eql(u8, "__StaticInit", source_sect.sectName())) break :blk true; switch (source_sect.type_()) { macho.S_MOD_INIT_FUNC_POINTERS, macho.S_MOD_TERM_FUNC_POINTERS, From cf6cfc830db89e0031200d1a16c93eb7801cb911 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 23 Jul 2022 00:13:32 +0200 Subject: [PATCH 27/27] macho: fix use-after-move in placeDecl Previously, we would get a pointer to a slot in the symbol table, apply changes to the symbol, and return the pointer. This however didn't take into account that the symbol table may be moved in memory in-between the modification and return from the function (`fn placeDecl`). Prior to my rewrite, this was not possible within the body of the said function. However, my rewrite revamped how we allocate GOT atoms and their matching symtab indexes, which now may cause a move in memory of the container. --- src/link/MachO.zig | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index e24d49cc9c..0f2cbfa844 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -2310,11 +2310,11 @@ fn shiftLocalsByOffset(self: *MachO, match: MatchingSection, offset: i64) !void var atom = self.atoms.get(match) orelse return; while (true) { - const atom_sym = &self.locals.items[atom.sym_index]; + const atom_sym = atom.getSymbolPtr(self); atom_sym.n_value = @intCast(u64, @intCast(i64, atom_sym.n_value) + offset); for (atom.contained.items) |sym_at_off| { - const contained_sym = &self.locals.items[sym_at_off.sym_index]; + const contained_sym = self.getSymbolPtr(.{ .sym_index = sym_at_off.sym_index, .file = atom.file }); contained_sym.n_value = @intCast(u64, @intCast(i64, contained_sym.n_value) + offset); } @@ -3488,7 +3488,7 @@ fn shrinkAtom(self: *MachO, atom: *Atom, new_block_size: u64, match: MatchingSec } fn growAtom(self: *MachO, atom: *Atom, new_atom_size: u64, alignment: u64, match: MatchingSection) !u64 { - const sym = self.locals.items[atom.sym_index]; + const sym = atom.getSymbol(self); const align_ok = mem.alignBackwardGeneric(u64, sym.n_value, alignment) == sym.n_value; const need_realloc = !align_ok or new_atom_size > atom.capacity(self); if (!need_realloc) return sym.n_value; @@ -3643,14 +3643,14 @@ pub fn updateFunc(self: *MachO, module: *Module, func: *Module.Fn, air: Air, liv }, } - const symbol = try self.placeDecl(decl_index, decl.link.macho.code.items.len); + const addr = try self.placeDecl(decl_index, decl.link.macho.code.items.len); if (decl_state) |*ds| { try self.d_sym.?.dwarf.commitDeclState( &self.base, module, decl, - symbol.n_value, + addr, decl.link.macho.size, ds, ); @@ -3731,7 +3731,7 @@ pub fn lowerUnnamedConst(self: *MachO, typed_value: TypedValue, decl_index: Modu errdefer self.freeAtom(atom, match, true); - const symbol = &self.locals.items[atom.sym_index]; + const symbol = atom.getSymbolPtr(self); symbol.* = .{ .n_strx = name_str_index, .n_type = macho.N_SECT, @@ -3814,14 +3814,14 @@ pub fn updateDecl(self: *MachO, module: *Module, decl_index: Module.Decl.Index) }, } }; - const symbol = try self.placeDecl(decl_index, code.len); + const addr = try self.placeDecl(decl_index, code.len); if (decl_state) |*ds| { try self.d_sym.?.dwarf.commitDeclState( &self.base, module, decl, - symbol.n_value, + addr, decl.link.macho.size, ds, ); @@ -3977,12 +3977,11 @@ fn getMatchingSectionAtom( return match; } -fn placeDecl(self: *MachO, decl_index: Module.Decl.Index, code_len: usize) !*macho.nlist_64 { +fn placeDecl(self: *MachO, decl_index: Module.Decl.Index, code_len: usize) !u64 { const module = self.base.options.module.?; const decl = module.declPtr(decl_index); const required_alignment = decl.getAlignment(self.base.options.target); assert(decl.link.macho.sym_index != 0); // Caller forgot to call allocateDeclIndexes() - const symbol = &self.locals.items[decl.link.macho.sym_index]; const sym_name = try decl.getFullyQualifiedName(module); defer self.base.allocator.free(sym_name); @@ -4000,6 +3999,7 @@ fn placeDecl(self: *MachO, decl_index: Module.Decl.Index, code_len: usize) !*mac const match = decl_ptr.*.?; if (decl.link.macho.size != 0) { + const symbol = decl.link.macho.getSymbolPtr(self); const capacity = decl.link.macho.capacity(self); const need_realloc = code_len > capacity or !mem.isAlignedGeneric(u64, symbol.n_value, required_alignment); @@ -4033,6 +4033,7 @@ fn placeDecl(self: *MachO, decl_index: Module.Decl.Index, code_len: usize) !*mac errdefer self.freeAtom(&decl.link.macho, match, false); + const symbol = decl.link.macho.getSymbolPtr(self); symbol.* = .{ .n_strx = name_str_index, .n_type = macho.N_SECT, @@ -4047,7 +4048,7 @@ fn placeDecl(self: *MachO, decl_index: Module.Decl.Index, code_len: usize) !*mac self.got_entries.items[got_index].sym_index = got_atom.sym_index; } - return symbol; + return decl.link.macho.getSymbol(self).n_value; } pub fn updateDeclLineNumber(self: *MachO, module: *Module, decl: *const Module.Decl) !void { @@ -5233,7 +5234,7 @@ fn allocateAtom( const big_atom = free_list.items[i]; // We now have a pointer to a live atom that has too much capacity. // Is it enough that we could fit this new atom? - const sym = self.locals.items[big_atom.sym_index]; + const sym = big_atom.getSymbol(self); const capacity = big_atom.capacity(self); const ideal_capacity = if (needs_padding) padToIdeal(capacity) else capacity; const ideal_capacity_end_vaddr = math.add(u64, sym.n_value, ideal_capacity) catch ideal_capacity; @@ -5264,7 +5265,7 @@ fn allocateAtom( } break :blk new_start_vaddr; } else if (self.atoms.get(match)) |last| { - const last_symbol = self.locals.items[last.sym_index]; + const last_symbol = last.getSymbol(self); const ideal_capacity = if (needs_padding) padToIdeal(last.size) else last.size; const ideal_capacity_end_vaddr = last_symbol.n_value + ideal_capacity; const new_start_vaddr = mem.alignForwardGeneric(u64, ideal_capacity_end_vaddr, alignment);