From e6891b2422522d8e572cc5f57fe6dbbfc7b34cb5 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Fri, 18 Aug 2023 16:47:48 +0200 Subject: [PATCH 01/57] macho: remove dead code --- src/link/MachO.zig | 255 --------------------------------------------- 1 file changed, 255 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 92224da63b..36a95d8ff4 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -3794,261 +3794,6 @@ pub fn findFirst(comptime T: type, haystack: []align(1) const T, start: usize, p return i; } -// fn snapshotState(self: *MachO) !void { -// const emit = self.base.options.emit orelse { -// log.debug("no emit directory found; skipping snapshot...", .{}); -// return; -// }; - -// const Snapshot = struct { -// const Node = struct { -// const Tag = enum { -// section_start, -// section_end, -// atom_start, -// atom_end, -// relocation, - -// pub fn jsonStringify( -// tag: Tag, -// options: std.json.StringifyOptions, -// out_stream: anytype, -// ) !void { -// _ = options; -// switch (tag) { -// .section_start => try out_stream.writeAll("\"section_start\""), -// .section_end => try out_stream.writeAll("\"section_end\""), -// .atom_start => try out_stream.writeAll("\"atom_start\""), -// .atom_end => try out_stream.writeAll("\"atom_end\""), -// .relocation => try out_stream.writeAll("\"relocation\""), -// } -// } -// }; -// const Payload = struct { -// name: []const u8 = "", -// aliases: [][]const u8 = &[0][]const u8{}, -// is_global: bool = false, -// target: u64 = 0, -// }; -// address: u64, -// tag: Tag, -// payload: Payload, -// }; -// timestamp: i128, -// nodes: []Node, -// }; - -// var arena_allocator = std.heap.ArenaAllocator.init(self.base.allocator); -// defer arena_allocator.deinit(); -// const arena = arena_allocator.allocator(); - -// const out_file = try emit.directory.handle.createFile("snapshots.json", .{ -// .truncate = false, -// .read = true, -// }); -// defer out_file.close(); - -// if (out_file.seekFromEnd(-1)) { -// try out_file.writer().writeByte(','); -// } else |err| switch (err) { -// error.Unseekable => try out_file.writer().writeByte('['), -// else => |e| return e, -// } -// const writer = out_file.writer(); - -// var snapshot = Snapshot{ -// .timestamp = std.time.nanoTimestamp(), -// .nodes = undefined, -// }; -// var nodes = std.ArrayList(Snapshot.Node).init(arena); - -// for (self.section_ordinals.keys()) |key| { -// const sect = self.getSection(key); -// const sect_name = try std.fmt.allocPrint(arena, "{s},{s}", .{ sect.segName(), sect.sectName() }); -// try nodes.append(.{ -// .address = sect.addr, -// .tag = .section_start, -// .payload = .{ .name = sect_name }, -// }); - -// const is_tlv = sect.type_() == macho.S_THREAD_LOCAL_VARIABLES; - -// var atom: *Atom = self.atoms.get(key) orelse { -// try nodes.append(.{ -// .address = sect.addr + sect.size, -// .tag = .section_end, -// .payload = .{}, -// }); -// continue; -// }; - -// while (atom.prev) |prev| { -// atom = prev; -// } - -// while (true) { -// const atom_sym = atom.getSymbol(self); -// var node = Snapshot.Node{ -// .address = atom_sym.n_value, -// .tag = .atom_start, -// .payload = .{ -// .name = atom.getName(self), -// .is_global = self.globals.contains(atom.getName(self)), -// }, -// }; - -// var aliases = std.ArrayList([]const u8).init(arena); -// for (atom.contained.items) |sym_off| { -// if (sym_off.offset == 0) { -// try aliases.append(self.getSymbolName(.{ -// .sym_index = sym_off.sym_index, -// .file = atom.file, -// })); -// } -// } -// node.payload.aliases = aliases.toOwnedSlice(); -// try nodes.append(node); - -// var relocs = try std.ArrayList(Snapshot.Node).initCapacity(arena, atom.relocs.items.len); -// for (atom.relocs.items) |rel| { -// const source_addr = blk: { -// const source_sym = atom.getSymbol(self); -// break :blk source_sym.n_value + rel.offset; -// }; -// const target_addr = blk: { -// const target_atom = rel.getTargetAtom(self) orelse { -// // If there is no atom for target, we still need to check for special, atom-less -// // symbols such as `___dso_handle`. -// const target_name = self.getSymbolName(rel.target); -// if (self.globals.contains(target_name)) { -// const atomless_sym = self.getSymbol(rel.target); -// break :blk atomless_sym.n_value; -// } -// break :blk 0; -// }; -// const target_sym = if (target_atom.isSymbolContained(rel.target, self)) -// self.getSymbol(rel.target) -// else -// target_atom.getSymbol(self); -// const base_address: u64 = if (is_tlv) base_address: { -// const sect_id: u16 = sect_id: { -// if (self.tlv_data_section_index) |i| { -// break :sect_id i; -// } else if (self.tlv_bss_section_index) |i| { -// break :sect_id i; -// } else unreachable; -// }; -// break :base_address self.getSection(.{ -// .seg = self.data_segment_cmd_index.?, -// .sect = sect_id, -// }).addr; -// } else 0; -// break :blk target_sym.n_value - base_address; -// }; - -// relocs.appendAssumeCapacity(.{ -// .address = source_addr, -// .tag = .relocation, -// .payload = .{ .target = target_addr }, -// }); -// } - -// if (atom.contained.items.len == 0) { -// try nodes.appendSlice(relocs.items); -// } else { -// // Need to reverse iteration order of relocs since by default for relocatable sources -// // they come in reverse. For linking, this doesn't matter in any way, however, for -// // arranging the memoryline for displaying it does. -// std.mem.reverse(Snapshot.Node, relocs.items); - -// var next_i: usize = 0; -// var last_rel: usize = 0; -// while (next_i < atom.contained.items.len) : (next_i += 1) { -// const loc = SymbolWithLoc{ -// .sym_index = atom.contained.items[next_i].sym_index, -// .file = atom.file, -// }; -// const cont_sym = self.getSymbol(loc); -// const cont_sym_name = self.getSymbolName(loc); -// var contained_node = Snapshot.Node{ -// .address = cont_sym.n_value, -// .tag = .atom_start, -// .payload = .{ -// .name = cont_sym_name, -// .is_global = self.globals.contains(cont_sym_name), -// }, -// }; - -// // Accumulate aliases -// var inner_aliases = std.ArrayList([]const u8).init(arena); -// while (true) { -// if (next_i + 1 >= atom.contained.items.len) break; -// const next_sym_loc = SymbolWithLoc{ -// .sym_index = atom.contained.items[next_i + 1].sym_index, -// .file = atom.file, -// }; -// const next_sym = self.getSymbol(next_sym_loc); -// if (next_sym.n_value != cont_sym.n_value) break; -// const next_sym_name = self.getSymbolName(next_sym_loc); -// if (self.globals.contains(next_sym_name)) { -// try inner_aliases.append(contained_node.payload.name); -// contained_node.payload.name = next_sym_name; -// contained_node.payload.is_global = true; -// } else try inner_aliases.append(next_sym_name); -// next_i += 1; -// } - -// const cont_size = if (next_i + 1 < atom.contained.items.len) -// self.getSymbol(.{ -// .sym_index = atom.contained.items[next_i + 1].sym_index, -// .file = atom.file, -// }).n_value - cont_sym.n_value -// else -// atom_sym.n_value + atom.size - cont_sym.n_value; - -// contained_node.payload.aliases = inner_aliases.toOwnedSlice(); -// try nodes.append(contained_node); - -// for (relocs.items[last_rel..]) |rel| { -// if (rel.address >= cont_sym.n_value + cont_size) { -// break; -// } -// try nodes.append(rel); -// last_rel += 1; -// } - -// try nodes.append(.{ -// .address = cont_sym.n_value + cont_size, -// .tag = .atom_end, -// .payload = .{}, -// }); -// } -// } - -// try nodes.append(.{ -// .address = atom_sym.n_value + atom.size, -// .tag = .atom_end, -// .payload = .{}, -// }); - -// if (atom.next) |next| { -// atom = next; -// } else break; -// } - -// try nodes.append(.{ -// .address = sect.addr + sect.size, -// .tag = .section_end, -// .payload = .{}, -// }); -// } - -// snapshot.nodes = nodes.toOwnedSlice(); - -// try std.json.stringify(snapshot, .{}, writer); -// try writer.writeByte(']'); -// } - pub fn logSections(self: *MachO) void { log.debug("sections:", .{}); for (self.sections.items(.header), 0..) |header, i| { From 69193a4ae421a1d69481addbba03e459df8d2a14 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Fri, 18 Aug 2023 17:20:43 +0200 Subject: [PATCH 02/57] macho: remove page_size field as it can be computed when and where required --- src/link/MachO.zig | 52 ++++++++++++++++++--------------- src/link/MachO/DebugSymbols.zig | 18 +++++++----- src/link/MachO/zld.zig | 17 +++++------ 3 files changed, 46 insertions(+), 41 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 36a95d8ff4..8125c27e80 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -103,10 +103,6 @@ llvm_object: ?*LlvmObject = null, /// Debug symbols bundle (or dSym). d_sym: ?DebugSymbols = null, -/// Page size is dependent on the target cpu architecture. -/// For x86_64 that's 4KB, whereas for aarch64, that's 16KB. -page_size: u16, - mode: Mode, dyld_info_cmd: macho.dyld_info_command = .{}, @@ -396,7 +392,6 @@ pub fn openPath(allocator: Allocator, options: link.Options) !*MachO { .allocator = allocator, .dwarf = link.File.Dwarf.init(allocator, &self.base, options.target), .file = d_sym_file, - .page_size = self.page_size, }; } @@ -413,16 +408,13 @@ pub fn openPath(allocator: Allocator, options: link.Options) !*MachO { try self.populateMissingMetadata(); if (self.d_sym) |*d_sym| { - try d_sym.populateMissingMetadata(); + try d_sym.populateMissingMetadata(self); } return self; } pub fn createEmpty(gpa: Allocator, options: link.Options) !*MachO { - const cpu_arch = options.target.cpu.arch; - const page_size: u16 = if (cpu_arch == .aarch64) 0x4000 else 0x1000; - const self = try gpa.create(MachO); errdefer gpa.destroy(self); @@ -433,7 +425,6 @@ pub fn createEmpty(gpa: Allocator, options: link.Options) !*MachO { .allocator = gpa, .file = null, }, - .page_size = page_size, .mode = if (options.use_llvm or options.module == null or options.cache_mode == .whole) .zld else @@ -698,7 +689,7 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No // written out to the file. // The most important here is to have the correct vm and filesize of the __LINKEDIT segment // where the code signature goes into. - var codesig = CodeSignature.init(self.page_size); + var codesig = CodeSignature.init(getPageSize(self.base.options.target.cpu.arch)); codesig.code_directory.ident = self.base.options.emit.?.sub_path; if (self.base.options.entitlements) |path| { try codesig.addEntitlements(self.base.allocator, path); @@ -2526,7 +2517,7 @@ fn populateMissingMetadata(self: *MachO) !void { // The first __TEXT segment is immovable and covers MachO header and load commands. self.header_segment_cmd_index = @as(u8, @intCast(self.segments.items.len)); const ideal_size = @max(self.base.options.headerpad_size orelse 0, default_headerpad_size); - const needed_size = mem.alignForward(u64, padToIdeal(ideal_size), self.page_size); + const needed_size = mem.alignForward(u64, padToIdeal(ideal_size), getPageSize(self.base.options.target.cpu.arch)); log.debug("found __TEXT segment (header-only) free space 0x{x} to 0x{x}", .{ 0, needed_size }); @@ -2663,7 +2654,8 @@ fn populateMissingMetadata(self: *MachO) !void { fn calcPagezeroSize(self: *MachO) u64 { const pagezero_vmsize = self.base.options.pagezero_size orelse default_pagezero_vmsize; - const aligned_pagezero_vmsize = mem.alignBackward(u64, pagezero_vmsize, self.page_size); + const page_size = getPageSize(self.base.options.target.cpu.arch); + const aligned_pagezero_vmsize = mem.alignBackward(u64, pagezero_vmsize, page_size); if (self.base.options.output_mode == .Lib) return 0; if (aligned_pagezero_vmsize == 0) return 0; if (aligned_pagezero_vmsize != pagezero_vmsize) { @@ -2681,17 +2673,18 @@ fn allocateSection(self: *MachO, segname: []const u8, sectname: []const u8, opts reserved2: u32 = 0, }) !u8 { const gpa = self.base.allocator; + const page_size = getPageSize(self.base.options.target.cpu.arch); // In incremental context, we create one section per segment pairing. This way, // we can move the segment in raw file as we please. const segment_id = @as(u8, @intCast(self.segments.items.len)); const section_id = @as(u8, @intCast(self.sections.slice().len)); const vmaddr = blk: { const prev_segment = self.segments.items[segment_id - 1]; - break :blk mem.alignForward(u64, prev_segment.vmaddr + prev_segment.vmsize, self.page_size); + break :blk mem.alignForward(u64, prev_segment.vmaddr + prev_segment.vmsize, page_size); }; // We commit more memory than needed upfront so that we don't have to reallocate too soon. - const vmsize = mem.alignForward(u64, opts.size, self.page_size); - const off = self.findFreeSpace(opts.size, self.page_size); + const vmsize = mem.alignForward(u64, opts.size, page_size); + const off = self.findFreeSpace(opts.size, page_size); log.debug("found {s},{s} free space 0x{x} to 0x{x} (0x{x} - 0x{x})", .{ segname, @@ -2740,9 +2733,10 @@ fn growSection(self: *MachO, sect_id: u8, needed_size: u64) !void { const segment = &self.segments.items[segment_index]; const maybe_last_atom_index = self.sections.items(.last_atom_index)[sect_id]; const sect_capacity = self.allocatedSize(header.offset); + const page_size = getPageSize(self.base.options.target.cpu.arch); if (needed_size > sect_capacity) { - const new_offset = self.findFreeSpace(needed_size, self.page_size); + const new_offset = self.findFreeSpace(needed_size, page_size); const current_size = if (maybe_last_atom_index) |last_atom_index| blk: { const last_atom = self.getAtom(last_atom_index); const sym = last_atom.getSymbol(self); @@ -2774,16 +2768,17 @@ fn growSection(self: *MachO, sect_id: u8, needed_size: u64) !void { } header.size = needed_size; - segment.filesize = mem.alignForward(u64, needed_size, self.page_size); - segment.vmsize = mem.alignForward(u64, needed_size, self.page_size); + segment.filesize = mem.alignForward(u64, needed_size, page_size); + segment.vmsize = mem.alignForward(u64, needed_size, page_size); } fn growSectionVirtualMemory(self: *MachO, sect_id: u8, needed_size: u64) !void { + const page_size = getPageSize(self.base.options.target.cpu.arch); const header = &self.sections.items(.header)[sect_id]; const segment = self.getSegmentPtr(sect_id); const increased_size = padToIdeal(needed_size); const old_aligned_end = segment.vmaddr + segment.vmsize; - const new_aligned_end = segment.vmaddr + mem.alignForward(u64, increased_size, self.page_size); + const new_aligned_end = segment.vmaddr + mem.alignForward(u64, increased_size, page_size); const diff = new_aligned_end - old_aligned_end; log.debug("shifting every segment after {s},{s} in virtual memory by {x}", .{ header.segName(), @@ -2955,6 +2950,7 @@ fn writeSegmentHeaders(self: *MachO, writer: anytype) !void { } fn writeLinkeditSegmentData(self: *MachO) !void { + const page_size = getPageSize(self.base.options.target.cpu.arch); const seg = self.getLinkeditSegmentPtr(); seg.filesize = 0; seg.vmsize = 0; @@ -2962,17 +2958,17 @@ fn writeLinkeditSegmentData(self: *MachO) !void { for (self.segments.items, 0..) |segment, id| { if (self.linkedit_segment_cmd_index.? == @as(u8, @intCast(id))) continue; if (seg.vmaddr < segment.vmaddr + segment.vmsize) { - seg.vmaddr = mem.alignForward(u64, segment.vmaddr + segment.vmsize, self.page_size); + seg.vmaddr = mem.alignForward(u64, segment.vmaddr + segment.vmsize, page_size); } if (seg.fileoff < segment.fileoff + segment.filesize) { - seg.fileoff = mem.alignForward(u64, segment.fileoff + segment.filesize, self.page_size); + seg.fileoff = mem.alignForward(u64, segment.fileoff + segment.filesize, page_size); } } try self.writeDyldInfoData(); try self.writeSymtabs(); - seg.vmsize = mem.alignForward(u64, seg.filesize, self.page_size); + seg.vmsize = mem.alignForward(u64, seg.filesize, page_size); } fn collectRebaseDataFromTableSection(self: *MachO, sect_id: u8, rebase: *Rebase, table: anytype) !void { @@ -3456,7 +3452,7 @@ fn writeCodeSignaturePadding(self: *MachO, code_sig: *CodeSignature) !void { const offset = mem.alignForward(u64, seg.fileoff + seg.filesize, 16); const needed_size = code_sig.estimateSize(offset); seg.filesize = offset + needed_size - seg.fileoff; - seg.vmsize = mem.alignForward(u64, seg.filesize, self.page_size); + seg.vmsize = mem.alignForward(u64, seg.filesize, getPageSize(self.base.options.target.cpu.arch)); log.debug("writing code signature padding from 0x{x} to 0x{x}", .{ offset, offset + needed_size }); // Pad out the space. We need to do this to calculate valid hashes for everything in the file // except for code signature data. @@ -3781,6 +3777,14 @@ pub fn getDebugSymbols(self: *MachO) ?*DebugSymbols { return &self.d_sym.?; } +pub inline fn getPageSize(cpu_arch: std.Target.Cpu.Arch) u16 { + return switch (cpu_arch) { + .aarch64 => 0x4000, + .x86_64 => 0x1000, + else => unreachable, + }; +} + pub fn findFirst(comptime T: type, haystack: []align(1) const T, start: usize, predicate: anytype) usize { if (!@hasDecl(@TypeOf(predicate), "predicate")) @compileError("Predicate is required to define fn predicate(@This(), T) bool"); diff --git a/src/link/MachO/DebugSymbols.zig b/src/link/MachO/DebugSymbols.zig index ade26de920..07bf409836 100644 --- a/src/link/MachO/DebugSymbols.zig +++ b/src/link/MachO/DebugSymbols.zig @@ -24,7 +24,6 @@ const Type = @import("../../type.zig").Type; allocator: Allocator, dwarf: Dwarf, file: fs.File, -page_size: u16, symtab_cmd: macho.symtab_command = .{}, @@ -62,13 +61,14 @@ pub const Reloc = struct { /// You must call this function *after* `MachO.populateMissingMetadata()` /// has been called to get a viable debug symbols output. -pub fn populateMissingMetadata(self: *DebugSymbols) !void { +pub fn populateMissingMetadata(self: *DebugSymbols, macho_file: *MachO) !void { if (self.dwarf_segment_cmd_index == null) { self.dwarf_segment_cmd_index = @as(u8, @intCast(self.segments.items.len)); - const off = @as(u64, @intCast(self.page_size)); + const page_size = MachO.getPageSize(macho_file.base.options.target.cpu.arch); + const off = @as(u64, @intCast(page_size)); const ideal_size: u16 = 200 + 128 + 160 + 250; - const needed_size = mem.alignForward(u64, padToIdeal(ideal_size), self.page_size); + const needed_size = mem.alignForward(u64, padToIdeal(ideal_size), page_size); log.debug("found __DWARF segment free space 0x{x} to 0x{x}", .{ off, off + needed_size }); @@ -355,7 +355,8 @@ fn finalizeDwarfSegment(self: *DebugSymbols, macho_file: *MachO) void { file_size = @max(file_size, header.offset + header.size); } - const aligned_size = mem.alignForward(u64, file_size, self.page_size); + const page_size = MachO.getPageSize(macho_file.base.options.target.cpu.arch); + const aligned_size = mem.alignForward(u64, file_size, page_size); dwarf_segment.vmaddr = base_vmaddr; dwarf_segment.filesize = aligned_size; dwarf_segment.vmsize = aligned_size; @@ -364,12 +365,12 @@ fn finalizeDwarfSegment(self: *DebugSymbols, macho_file: *MachO) void { linkedit.vmaddr = mem.alignForward( u64, dwarf_segment.vmaddr + aligned_size, - self.page_size, + page_size, ); linkedit.fileoff = mem.alignForward( u64, dwarf_segment.fileoff + aligned_size, - self.page_size, + page_size, ); log.debug("found __LINKEDIT segment free space at 0x{x}", .{linkedit.fileoff}); } @@ -457,8 +458,9 @@ fn writeLinkeditSegmentData(self: *DebugSymbols, macho_file: *MachO) !void { try self.writeSymtab(macho_file); try self.writeStrtab(); + const page_size = MachO.getPageSize(macho_file.base.options.target.cpu.arch); const seg = &self.segments.items[self.linkedit_segment_cmd_index.?]; - const aligned_size = mem.alignForward(u64, seg.filesize, self.page_size); + const aligned_size = mem.alignForward(u64, seg.filesize, page_size); seg.vmsize = aligned_size; } diff --git a/src/link/MachO/zld.zig b/src/link/MachO/zld.zig index e4dde33453..bfadd064db 100644 --- a/src/link/MachO/zld.zig +++ b/src/link/MachO/zld.zig @@ -42,7 +42,6 @@ const Rebase = @import("dyld_info/Rebase.zig"); pub const Zld = struct { gpa: Allocator, file: fs.File, - page_size: u16, options: *const link.Options, dyld_info_cmd: macho.dyld_info_command = .{}, @@ -1208,7 +1207,8 @@ pub const Zld = struct { fn createSegments(self: *Zld) !void { const pagezero_vmsize = self.options.pagezero_size orelse MachO.default_pagezero_vmsize; - const aligned_pagezero_vmsize = mem.alignBackward(u64, pagezero_vmsize, self.page_size); + const page_size = MachO.getPageSize(self.options.target.cpu.arch); + const aligned_pagezero_vmsize = mem.alignBackward(u64, pagezero_vmsize, page_size); if (self.options.output_mode != .Lib and aligned_pagezero_vmsize > 0) { if (aligned_pagezero_vmsize != pagezero_vmsize) { log.warn("requested __PAGEZERO size (0x{x}) is not page aligned", .{pagezero_vmsize}); @@ -1635,8 +1635,9 @@ pub const Zld = struct { segment.vmsize = start; } - segment.filesize = mem.alignForward(u64, segment.filesize, self.page_size); - segment.vmsize = mem.alignForward(u64, segment.vmsize, self.page_size); + const page_size = MachO.getPageSize(self.options.target.cpu.arch); + segment.filesize = mem.alignForward(u64, segment.filesize, page_size); + segment.vmsize = mem.alignForward(u64, segment.vmsize, page_size); } const InitSectionOpts = struct { @@ -1746,7 +1747,7 @@ pub const Zld = struct { try self.writeSymtabs(); const seg = self.getLinkeditSegmentPtr(); - seg.vmsize = mem.alignForward(u64, seg.filesize, self.page_size); + seg.vmsize = mem.alignForward(u64, seg.filesize, MachO.getPageSize(self.options.target.cpu.arch)); } fn collectRebaseDataFromContainer( @@ -2630,7 +2631,7 @@ pub const Zld = struct { const offset = mem.alignForward(u64, seg.fileoff + seg.filesize, 16); const needed_size = code_sig.estimateSize(offset); seg.filesize = offset + needed_size - seg.fileoff; - seg.vmsize = mem.alignForward(u64, seg.filesize, self.page_size); + seg.vmsize = mem.alignForward(u64, seg.filesize, MachO.getPageSize(self.options.target.cpu.arch)); log.debug("writing code signature padding from 0x{x} to 0x{x}", .{ offset, offset + needed_size }); // Pad out the space. We need to do this to calculate valid hashes for everything in the file // except for code signature data. @@ -3491,7 +3492,6 @@ pub fn linkWithZld(macho_file: *MachO, comp: *Compilation, prog_node: *std.Progr try fs.cwd().copyFile(the_object_path, fs.cwd(), full_out_path, .{}); } } else { - const page_size = macho_file.page_size; const sub_path = options.emit.?.sub_path; const file = try directory.handle.createFile(sub_path, .{ @@ -3504,7 +3504,6 @@ pub fn linkWithZld(macho_file: *MachO, comp: *Compilation, prog_node: *std.Progr var zld = Zld{ .gpa = gpa, .file = file, - .page_size = macho_file.page_size, .options = options, }; defer zld.deinit(); @@ -3818,7 +3817,7 @@ pub fn linkWithZld(macho_file: *MachO, comp: *Compilation, prog_node: *std.Progr // written out to the file. // The most important here is to have the correct vm and filesize of the __LINKEDIT segment // where the code signature goes into. - var codesig = CodeSignature.init(page_size); + var codesig = CodeSignature.init(MachO.getPageSize(zld.options.target.cpu.arch)); codesig.code_directory.ident = fs.path.basename(full_out_path); if (options.entitlements) |path| { try codesig.addEntitlements(zld.gpa, path); From 702bcfecf5732eceada9bfbca0804c706c238d49 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 19 Aug 2023 09:07:33 +0200 Subject: [PATCH 03/57] macho: simplify input file parsing for both drivers --- src/link/MachO.zig | 501 +++++++++++++++++++++++++++---------- src/link/MachO/Archive.zig | 36 +-- src/link/MachO/Dylib.zig | 33 +-- src/link/MachO/Object.zig | 41 +-- src/link/MachO/fat.zig | 50 ++-- src/link/MachO/zld.zig | 382 +++++----------------------- 6 files changed, 489 insertions(+), 554 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 8125c27e80..92aae67c22 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -111,6 +111,8 @@ dysymtab_cmd: macho.dysymtab_command = .{}, uuid_cmd: macho.uuid_command = .{}, codesig_cmd: macho.linkedit_data_command = .{ .cmd = .CODE_SIGNATURE }, +objects: std.ArrayListUnmanaged(Object) = .{}, +archives: std.ArrayListUnmanaged(Archive) = .{}, dylibs: std.ArrayListUnmanaged(Dylib) = .{}, dylibs_map: std.StringHashMapUnmanaged(u16) = .{}, referenced_dylibs: std.AutoArrayHashMapUnmanaged(u16, void) = .{}, @@ -586,8 +588,30 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No parent: u16, }, .Dynamic).init(arena); - try self.parseLibs(libs.keys(), libs.values(), self.base.options.sysroot, &dependent_libs); - try self.parseDependentLibs(self.base.options.sysroot, &dependent_libs); + for (libs.keys(), libs.values()) |path, lib| { + const in_file = try std.fs.cwd().openFile(path, .{}); + defer in_file.close(); + + parseLibrary( + self, + self.base.allocator, + in_file, + path, + lib, + false, + &dependent_libs, + &self.base.options, + ) catch |err| { + // TODO convert to error + log.err("{s}: parsing library failed with err {s}", .{ path, @errorName(err) }); + continue; + }; + } + + parseDependentLibs(self, self.base.allocator, &dependent_libs, &self.base.options) catch |err| { + // TODO convert to error + log.err("parsing dependent libraries failed with err {s}", .{@errorName(err)}); + }; } if (self.dyld_stub_binder_index == null) { @@ -880,175 +904,373 @@ fn resolveLib( return full_path; } -const ParseDylibError = error{ - OutOfMemory, - EmptyStubFile, - MismatchedCpuArchitecture, - UnsupportedCpuArchitecture, - EndOfStream, -} || fs.File.OpenError || std.os.PReadError || Dylib.Id.ParseError; +pub fn parsePositional( + ctx: anytype, + gpa: Allocator, + file: std.fs.File, + path: []const u8, + must_link: bool, + dependent_libs: anytype, + link_options: *const link.Options, +) !void { + const tracy = trace(@src()); + defer tracy.end(); -const DylibCreateOpts = struct { - syslibroot: ?[]const u8, + if (Object.isObject(file)) { + try parseObject(ctx, gpa, file, path, link_options); + } else { + try parseLibrary(ctx, gpa, file, path, .{ + .path = null, + .needed = false, + .weak = false, + }, must_link, dependent_libs, link_options); + } +} + +fn parseObject( + ctx: anytype, + gpa: Allocator, + file: std.fs.File, + path: []const u8, + link_options: *const link.Options, +) !void { + const tracy = trace(@src()); + defer tracy.end(); + + const mtime: u64 = mtime: { + const stat = file.stat() catch break :mtime 0; + break :mtime @as(u64, @intCast(@divFloor(stat.mtime, 1_000_000_000))); + }; + const file_stat = try file.stat(); + const file_size = math.cast(usize, file_stat.size) orelse return error.Overflow; + const contents = try file.readToEndAllocOptions(gpa, file_size, file_size, @alignOf(u64), null); + + var object = Object{ + .name = try gpa.dupe(u8, path), + .mtime = mtime, + .contents = contents, + }; + errdefer object.deinit(gpa); + try object.parse(gpa); + try ctx.objects.append(gpa, object); + + const cpu_arch: std.Target.Cpu.Arch = switch (object.header.cputype) { + macho.CPU_TYPE_ARM64 => .aarch64, + macho.CPU_TYPE_X86_64 => .x86_64, + else => unreachable, + }; + const self_cpu_arch = link_options.target.cpu.arch; + + if (self_cpu_arch != cpu_arch) { + // TODO convert into an error + log.err("{s}: invalid architecture '{s}', expected '{s}'", .{ + path, + @tagName(cpu_arch), + @tagName(self_cpu_arch), + }); + } +} + +pub fn parseLibrary( + ctx: anytype, + gpa: Allocator, + file: std.fs.File, + path: []const u8, + lib: link.SystemLib, + must_link: bool, + dependent_libs: anytype, + link_options: *const link.Options, +) !void { + const tracy = trace(@src()); + defer tracy.end(); + + const cpu_arch = link_options.target.cpu.arch; + + if (fat.isFatLibrary(file)) { + const offset = parseFatLibrary(ctx, file, path, cpu_arch) catch |err| switch (err) { + error.MissingArch => return, + else => |e| return e, + }; + try file.seekTo(offset); + + if (Archive.isArchive(file, offset)) { + try parseArchive(ctx, gpa, path, offset, must_link, cpu_arch); + } else if (Dylib.isDylib(file, offset)) { + try parseDylib(ctx, gpa, file, path, offset, dependent_libs, link_options, .{ + .needed = lib.needed, + .weak = lib.weak, + }); + } else { + // TODO convert into an error + log.err("{s}: unknown file type", .{path}); + return; + } + } else if (Archive.isArchive(file, 0)) { + try parseArchive(ctx, gpa, path, 0, must_link, cpu_arch); + } else if (Dylib.isDylib(file, 0)) { + try parseDylib(ctx, gpa, file, path, 0, dependent_libs, link_options, .{ + .needed = lib.needed, + .weak = lib.weak, + }); + } else { + parseLibStub(ctx, gpa, file, path, dependent_libs, link_options, .{ + .needed = lib.needed, + .weak = lib.weak, + }) catch |err| switch (err) { + error.NotLibStub, error.UnexpectedToken => { + // TODO convert into an error + log.err("{s}: unknown file type", .{path}); + return; + }, + else => |e| return e, + }; + } +} + +pub fn parseFatLibrary( + ctx: anytype, + file: std.fs.File, + path: []const u8, + cpu_arch: std.Target.Cpu.Arch, +) !u64 { + _ = ctx; + var buffer: [2]fat.Arch = undefined; + const fat_archs = try fat.parseArchs(file, &buffer); + const offset = for (fat_archs) |arch| { + if (arch.tag == cpu_arch) break arch.offset; + } else { + // TODO convert into an error + log.err("{s}: missing arch in universal file: expected {s}", .{ path, @tagName(cpu_arch) }); + return error.MissingArch; + }; + return offset; +} + +fn parseArchive( + ctx: anytype, + gpa: Allocator, + path: []const u8, + fat_offset: u64, + must_link: bool, + cpu_arch: std.Target.Cpu.Arch, +) !void { + + // We take ownership of the file so that we can store it for the duration of symbol resolution. + // TODO we shouldn't need to do that and could pre-parse the archive like we do for zld/ELF? + const file = try std.fs.cwd().openFile(path, .{}); + errdefer file.close(); + try file.seekTo(fat_offset); + + var archive = Archive{ + .file = file, + .fat_offset = fat_offset, + .name = try gpa.dupe(u8, path), + }; + errdefer archive.deinit(gpa); + + try archive.parse(gpa, file.reader()); + + // Verify arch and platform + if (archive.toc.values().len > 0) { + const offsets = archive.toc.values()[0].items; + assert(offsets.len > 0); + const off = offsets[0]; + var object = try archive.parseObject(gpa, off); // TODO we are doing all this work to pull the header only! + defer object.deinit(gpa); + + const parsed_cpu_arch: std.Target.Cpu.Arch = switch (object.header.cputype) { + macho.CPU_TYPE_ARM64 => .aarch64, + macho.CPU_TYPE_X86_64 => .x86_64, + else => unreachable, + }; + if (cpu_arch != parsed_cpu_arch) { + // TODO convert into an error + log.err("{s}: invalid architecture in archive '{s}', expected '{s}'", .{ + path, + @tagName(parsed_cpu_arch), + @tagName(cpu_arch), + }); + return error.MissingArch; + } + } + + if (must_link) { + // Get all offsets from the ToC + var offsets = std.AutoArrayHashMap(u32, void).init(gpa); + defer offsets.deinit(); + for (archive.toc.values()) |offs| { + for (offs.items) |off| { + _ = try offsets.getOrPut(off); + } + } + for (offsets.keys()) |off| { + const object = try archive.parseObject(gpa, off); + try ctx.objects.append(gpa, object); + } + } else { + try ctx.archives.append(gpa, archive); + } +} + +const DylibOpts = struct { id: ?Dylib.Id = null, dependent: bool = false, needed: bool = false, weak: bool = false, }; -pub fn parseDylib( - self: *MachO, +fn parseDylib( + ctx: anytype, + gpa: Allocator, + file: std.fs.File, path: []const u8, + offset: u64, dependent_libs: anytype, - opts: DylibCreateOpts, -) ParseDylibError!bool { - const gpa = self.base.allocator; - const file = fs.cwd().openFile(path, .{}) catch |err| switch (err) { - error.FileNotFound => return false, - else => |e| return e, - }; - defer file.close(); + link_options: *const link.Options, + dylib_options: DylibOpts, +) !void { + const self_cpu_arch = link_options.target.cpu.arch; - const cpu_arch = self.base.options.target.cpu.arch; const file_stat = try file.stat(); var file_size = math.cast(usize, file_stat.size) orelse return error.Overflow; - const reader = file.reader(); - const fat_offset = math.cast(usize, try fat.getLibraryOffset(reader, cpu_arch)) orelse - return error.Overflow; - try file.seekTo(fat_offset); - file_size -= fat_offset; + file_size -= offset; const contents = try file.readToEndAllocOptions(gpa, file_size, file_size, @alignOf(u64), null); defer gpa.free(contents); - const dylib_id = @as(u16, @intCast(self.dylibs.items.len)); - var dylib = Dylib{ .weak = opts.weak }; + var dylib = Dylib{ .weak = dylib_options.weak }; + errdefer dylib.deinit(gpa); - dylib.parseFromBinary( + try dylib.parseFromBinary( gpa, - cpu_arch, - dylib_id, + @intCast(ctx.dylibs.items.len), // TODO defer it till later dependent_libs, path, contents, - ) catch |err| switch (err) { - error.EndOfStream, error.NotDylib => { - try file.seekTo(0); + ); - var lib_stub = LibStub.loadFromFile(gpa, file) catch { - dylib.deinit(gpa); - return false; - }; - defer lib_stub.deinit(); + const cpu_arch: std.Target.Cpu.Arch = switch (dylib.header.?.cputype) { + macho.CPU_TYPE_ARM64 => .aarch64, + macho.CPU_TYPE_X86_64 => .x86_64, + else => unreachable, + }; + if (self_cpu_arch != cpu_arch) { + // TODO convert into an error + log.err("{s}: invalid architecture '{s}', expected '{s}'", .{ + path, + @tagName(cpu_arch), + @tagName(self_cpu_arch), + }); + return error.MissingArch; + } - try dylib.parseFromStub( - gpa, - self.base.options.target, - lib_stub, - dylib_id, - dependent_libs, - path, - ); - }, + // TODO verify platform + + addDylib(ctx, gpa, dylib, link_options, .{ + .needed = dylib_options.needed, + .weak = dylib_options.weak, + }) catch |err| switch (err) { + error.DylibAlreadyExists => dylib.deinit(gpa), else => |e| return e, }; +} - if (opts.id) |id| { +fn parseLibStub( + ctx: anytype, + gpa: Allocator, + file: std.fs.File, + path: []const u8, + dependent_libs: anytype, + link_options: *const link.Options, + dylib_options: DylibOpts, +) !void { + var lib_stub = try LibStub.loadFromFile(gpa, file); + defer lib_stub.deinit(); + + if (lib_stub.inner.len == 0) return error.NotLibStub; + + // TODO verify platform + + var dylib = Dylib{ .weak = dylib_options.weak }; + errdefer dylib.deinit(gpa); + + try dylib.parseFromStub( + gpa, + link_options.target, + lib_stub, + @intCast(ctx.dylibs.items.len), // TODO defer it till later + dependent_libs, + path, + ); + + addDylib(ctx, gpa, dylib, link_options, .{ + .needed = dylib_options.needed, + .weak = dylib_options.weak, + }) catch |err| switch (err) { + error.DylibAlreadyExists => dylib.deinit(gpa), + else => |e| return e, + }; +} + +fn addDylib( + ctx: anytype, + gpa: Allocator, + dylib: Dylib, + link_options: *const link.Options, + dylib_options: DylibOpts, +) !void { + if (dylib_options.id) |id| { if (dylib.id.?.current_version < id.compatibility_version) { + // TODO convert into an error log.warn("found dylib is incompatible with the required minimum version", .{}); log.warn(" dylib: {s}", .{id.name}); log.warn(" required minimum version: {}", .{id.compatibility_version}); log.warn(" dylib version: {}", .{dylib.id.?.current_version}); - - // TODO maybe this should be an error and facilitate auto-cleanup? - dylib.deinit(gpa); - return false; + return error.IncompatibleDylibVersion; } } - try self.dylibs.append(gpa, dylib); - try self.dylibs_map.putNoClobber(gpa, dylib.id.?.name, dylib_id); + const gop = try ctx.dylibs_map.getOrPut(gpa, dylib.id.?.name); + if (gop.found_existing) return error.DylibAlreadyExists; + + gop.value_ptr.* = @as(u16, @intCast(ctx.dylibs.items.len)); + try ctx.dylibs.append(gpa, dylib); const should_link_dylib_even_if_unreachable = blk: { - if (self.base.options.dead_strip_dylibs and !opts.needed) break :blk false; - break :blk !(opts.dependent or self.referenced_dylibs.contains(dylib_id)); + if (link_options.dead_strip_dylibs and !dylib_options.needed) break :blk false; + break :blk !(dylib_options.dependent or ctx.referenced_dylibs.contains(gop.value_ptr.*)); }; if (should_link_dylib_even_if_unreachable) { - try self.referenced_dylibs.putNoClobber(gpa, dylib_id, {}); - } - - return true; -} - -pub fn parseInputFiles(self: *MachO, files: []const []const u8, syslibroot: ?[]const u8, dependent_libs: anytype) !void { - for (files) |file_name| { - const full_path = full_path: { - var buffer: [fs.MAX_PATH_BYTES]u8 = undefined; - break :full_path try fs.realpath(file_name, &buffer); - }; - log.debug("parsing input file path '{s}'", .{full_path}); - - if (try self.parseObject(full_path)) continue; - if (try self.parseArchive(full_path, false)) continue; - if (try self.parseDylib(full_path, dependent_libs, .{ - .syslibroot = syslibroot, - })) continue; - - log.debug("unknown filetype for positional input file: '{s}'", .{file_name}); + try ctx.referenced_dylibs.putNoClobber(gpa, gop.value_ptr.*, {}); } } -pub fn parseAndForceLoadStaticArchives(self: *MachO, files: []const []const u8) !void { - for (files) |file_name| { - const full_path = full_path: { - var buffer: [fs.MAX_PATH_BYTES]u8 = undefined; - break :full_path try fs.realpath(file_name, &buffer); - }; - log.debug("parsing and force loading static archive '{s}'", .{full_path}); - - if (try self.parseArchive(full_path, true)) continue; - log.debug("unknown filetype: expected static archive: '{s}'", .{file_name}); - } -} - -pub fn parseLibs( - self: *MachO, - lib_names: []const []const u8, - lib_infos: []const link.SystemLib, - syslibroot: ?[]const u8, +pub fn parseDependentLibs( + ctx: anytype, + gpa: Allocator, dependent_libs: anytype, + link_options: *const link.Options, ) !void { - for (lib_names, 0..) |lib, i| { - const lib_info = lib_infos[i]; - log.debug("parsing lib path '{s}'", .{lib}); - if (try self.parseDylib(lib, dependent_libs, .{ - .syslibroot = syslibroot, - .needed = lib_info.needed, - .weak = lib_info.weak, - })) continue; + const tracy = trace(@src()); + defer tracy.end(); - log.debug("unknown filetype for a library: '{s}'", .{lib}); - } -} - -pub fn parseDependentLibs(self: *MachO, syslibroot: ?[]const u8, dependent_libs: anytype) !void { // At this point, we can now parse dependents of dylibs preserving the inclusion order of: // 1) anything on the linker line is parsed first // 2) afterwards, we parse dependents of the included dylibs // TODO this should not be performed if the user specifies `-flat_namespace` flag. // See ld64 manpages. - var arena_alloc = std.heap.ArenaAllocator.init(self.base.allocator); + var arena_alloc = std.heap.ArenaAllocator.init(gpa); const arena = arena_alloc.allocator(); defer arena_alloc.deinit(); - while (dependent_libs.readItem()) |*dep_id| { - defer dep_id.id.deinit(self.base.allocator); + outer: while (dependent_libs.readItem()) |dep_id| { + defer dep_id.id.deinit(gpa); - if (self.dylibs_map.contains(dep_id.id.name)) continue; + if (ctx.dylibs_map.contains(dep_id.id.name)) continue; - const weak = self.dylibs.items[dep_id.parent].weak; + const weak = ctx.dylibs.items[dep_id.parent].weak; const has_ext = blk: { const basename = fs.path.basename(dep_id.id.name); break :blk mem.lastIndexOfScalar(u8, basename, '.') != null; @@ -1061,20 +1283,47 @@ pub fn parseDependentLibs(self: *MachO, syslibroot: ?[]const u8, dependent_libs: for (&[_][]const u8{ extension, ".tbd" }) |ext| { const with_ext = try std.fmt.allocPrint(arena, "{s}{s}", .{ without_ext, ext }); - const full_path = if (syslibroot) |root| try fs.path.join(arena, &.{ root, with_ext }) else with_ext; + const full_path = if (link_options.sysroot) |root| + try fs.path.join(arena, &.{ root, with_ext }) + else + with_ext; + + const file = std.fs.cwd().openFile(full_path, .{}) catch |err| switch (err) { + error.FileNotFound => continue, + else => |e| return e, + }; + defer file.close(); log.debug("trying dependency at fully resolved path {s}", .{full_path}); - const did_parse_successfully = try self.parseDylib(full_path, dependent_libs, .{ - .id = dep_id.id, - .syslibroot = syslibroot, - .dependent = true, - .weak = weak, - }); - if (did_parse_successfully) break; - } else { - log.debug("unable to resolve dependency {s}", .{dep_id.id.name}); + const offset: u64 = if (fat.isFatLibrary(file)) blk: { + const offset = parseFatLibrary(ctx, file, full_path, link_options.target.cpu.arch) catch |err| switch (err) { + error.MissingArch => break, + else => |e| return e, + }; + try file.seekTo(offset); + break :blk offset; + } else 0; + + if (Dylib.isDylib(file, offset)) { + try parseDylib(ctx, gpa, file, full_path, offset, dependent_libs, link_options, .{ + .dependent = true, + .weak = weak, + }); + } else { + parseLibStub(ctx, gpa, file, full_path, dependent_libs, link_options, .{ + .dependent = true, + .weak = weak, + }) catch |err| switch (err) { + error.NotLibStub, error.UnexpectedToken => continue, + else => |e| return e, + }; + } + continue :outer; } + + // TODO convert into an error + log.err("{s}: unable to resolve dependency", .{dep_id.id.name}); } } @@ -2517,7 +2766,7 @@ fn populateMissingMetadata(self: *MachO) !void { // The first __TEXT segment is immovable and covers MachO header and load commands. self.header_segment_cmd_index = @as(u8, @intCast(self.segments.items.len)); const ideal_size = @max(self.base.options.headerpad_size orelse 0, default_headerpad_size); - const needed_size = mem.alignForward(u64, padToIdeal(ideal_size), getPageSize(self.base.options.target.cpu.arch)); + const needed_size = mem.alignForward(u64, padToIdeal(ideal_size), getPageSize(cpu_arch)); log.debug("found __TEXT segment (header-only) free space 0x{x} to 0x{x}", .{ 0, needed_size }); diff --git a/src/link/MachO/Archive.zig b/src/link/MachO/Archive.zig index 5276bf041e..f3922f6ff9 100644 --- a/src/link/MachO/Archive.zig +++ b/src/link/MachO/Archive.zig @@ -87,6 +87,13 @@ const ar_hdr = extern struct { } }; +pub fn isArchive(file: fs.File, fat_offset: u64) bool { + const reader = file.reader(); + const magic = reader.readBytesNoEof(SARMAG) catch return false; + defer file.seekTo(fat_offset) catch {}; + return mem.eql(u8, &magic, ARMAG); +} + pub fn deinit(self: *Archive, allocator: Allocator) void { self.file.close(); for (self.toc.keys()) |*key| { @@ -100,21 +107,8 @@ pub fn deinit(self: *Archive, allocator: Allocator) void { } pub fn parse(self: *Archive, allocator: Allocator, reader: anytype) !void { - const magic = try reader.readBytesNoEof(SARMAG); - if (!mem.eql(u8, &magic, ARMAG)) { - log.debug("invalid magic: expected '{s}', found '{s}'", .{ ARMAG, magic }); - return error.NotArchive; - } - + _ = try reader.readBytesNoEof(SARMAG); self.header = try reader.readStruct(ar_hdr); - if (!mem.eql(u8, &self.header.ar_fmag, ARFMAG)) { - log.debug("invalid header delimiter: expected '{s}', found '{s}'", .{ - ARFMAG, - self.header.ar_fmag, - }); - return error.NotArchive; - } - const name_or_length = try self.header.nameOrLength(); var embedded_name = try parseName(allocator, name_or_length, reader); log.debug("parsing archive '{s}' at '{s}'", .{ embedded_name, self.name }); @@ -182,22 +176,12 @@ fn parseTableOfContents(self: *Archive, allocator: Allocator, reader: anytype) ! } } -pub fn parseObject( - self: Archive, - gpa: Allocator, - cpu_arch: std.Target.Cpu.Arch, - offset: u32, -) !Object { +pub fn parseObject(self: Archive, gpa: Allocator, offset: u32) !Object { const reader = self.file.reader(); try reader.context.seekTo(self.fat_offset + offset); const object_header = try reader.readStruct(ar_hdr); - if (!mem.eql(u8, &object_header.ar_fmag, ARFMAG)) { - log.err("invalid header delimiter: expected '{s}', found '{s}'", .{ ARFMAG, object_header.ar_fmag }); - return error.MalformedArchive; - } - const name_or_length = try object_header.nameOrLength(); const object_name = try parseName(gpa, name_or_length, reader); defer gpa.free(object_name); @@ -227,7 +211,7 @@ pub fn parseObject( .contents = contents, }; - try object.parse(gpa, cpu_arch); + try object.parse(gpa); return object; } diff --git a/src/link/MachO/Dylib.zig b/src/link/MachO/Dylib.zig index 2aacf4009b..c424343a4e 100644 --- a/src/link/MachO/Dylib.zig +++ b/src/link/MachO/Dylib.zig @@ -20,6 +20,8 @@ const Tbd = tapi.Tbd; id: ?Id = null, weak: bool = false, +/// Header is only set if Dylib is parsed directly from a binary and not a stub file. +header: ?macho.mach_header_64 = null, /// Parsed symbol table represented as hash map of symbols' /// names. We can and should defer creating *Symbols until @@ -116,6 +118,13 @@ pub const Id = struct { } }; +pub fn isDylib(file: std.fs.File, fat_offset: u64) bool { + const reader = file.reader(); + const hdr = reader.readStruct(macho.mach_header_64) catch return false; + defer file.seekTo(fat_offset) catch {}; + return hdr.filetype == macho.MH_DYLIB; +} + pub fn deinit(self: *Dylib, allocator: Allocator) void { for (self.symbols.keys()) |key| { allocator.free(key); @@ -129,7 +138,6 @@ pub fn deinit(self: *Dylib, allocator: Allocator) void { pub fn parseFromBinary( self: *Dylib, allocator: Allocator, - cpu_arch: std.Target.Cpu.Arch, dylib_id: u16, dependent_libs: anytype, name: []const u8, @@ -140,27 +148,12 @@ pub fn parseFromBinary( log.debug("parsing shared library '{s}'", .{name}); - const header = try reader.readStruct(macho.mach_header_64); + self.header = try reader.readStruct(macho.mach_header_64); - if (header.filetype != macho.MH_DYLIB) { - log.debug("invalid filetype: expected 0x{x}, found 0x{x}", .{ macho.MH_DYLIB, header.filetype }); - return error.NotDylib; - } - - const this_arch: std.Target.Cpu.Arch = try fat.decodeArch(header.cputype, true); - - if (this_arch != cpu_arch) { - log.err("mismatched cpu architecture: expected {s}, found {s}", .{ - @tagName(cpu_arch), - @tagName(this_arch), - }); - return error.MismatchedCpuArchitecture; - } - - const should_lookup_reexports = header.flags & macho.MH_NO_REEXPORTED_DYLIBS == 0; + const should_lookup_reexports = self.header.?.flags & macho.MH_NO_REEXPORTED_DYLIBS == 0; var it = LoadCommandIterator{ - .ncmds = header.ncmds, - .buffer = data[@sizeOf(macho.mach_header_64)..][0..header.sizeofcmds], + .ncmds = self.header.?.ncmds, + .buffer = data[@sizeOf(macho.mach_header_64)..][0..self.header.?.sizeofcmds], }; while (it.next()) |cmd| { switch (cmd.cmd()) { diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index bbcfbb7047..fe517d11be 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -91,6 +91,13 @@ const Record = struct { reloc: Entry, }; +pub fn isObject(file: std.fs.File) bool { + const reader = file.reader(); + const hdr = reader.readStruct(macho.mach_header_64) catch return false; + defer file.seekTo(0) catch {}; + return hdr.filetype == macho.MH_OBJECT; +} + pub fn deinit(self: *Object, gpa: Allocator) void { self.atoms.deinit(gpa); self.exec_atoms.deinit(gpa); @@ -118,36 +125,12 @@ pub fn deinit(self: *Object, gpa: Allocator) void { self.data_in_code.deinit(gpa); } -pub fn parse(self: *Object, allocator: Allocator, cpu_arch: std.Target.Cpu.Arch) !void { +pub fn parse(self: *Object, allocator: Allocator) !void { var stream = std.io.fixedBufferStream(self.contents); const reader = stream.reader(); self.header = try reader.readStruct(macho.mach_header_64); - if (self.header.filetype != macho.MH_OBJECT) { - log.debug("invalid filetype: expected 0x{x}, found 0x{x}", .{ - macho.MH_OBJECT, - self.header.filetype, - }); - return error.NotObject; - } - - const this_arch: std.Target.Cpu.Arch = switch (self.header.cputype) { - macho.CPU_TYPE_ARM64 => .aarch64, - macho.CPU_TYPE_X86_64 => .x86_64, - else => |value| { - log.err("unsupported cpu architecture 0x{x}", .{value}); - return error.UnsupportedCpuArchitecture; - }, - }; - if (this_arch != cpu_arch) { - log.err("mismatched cpu architecture: expected {s}, found {s}", .{ - @tagName(cpu_arch), - @tagName(this_arch), - }); - return error.MismatchedCpuArchitecture; - } - var it = LoadCommandIterator{ .ncmds = self.header.ncmds, .buffer = self.contents[@sizeOf(macho.mach_header_64)..][0..self.header.sizeofcmds], @@ -437,7 +420,7 @@ pub fn splitRegularSections(self: *Object, zld: *Zld, object_id: u32) !void { // Well, shit, sometimes compilers skip the dysymtab load command altogether, meaning we // have to infer the start of undef section in the symtab ourselves. const iundefsym = blk: { - const dysymtab = self.parseDysymtab() orelse { + const dysymtab = self.getDysymtab() orelse { var iundefsym: usize = self.in_symtab.?.len; while (iundefsym > 0) : (iundefsym -= 1) { const sym = self.symtab[iundefsym - 1]; @@ -945,16 +928,14 @@ fn diceLessThan(ctx: void, lhs: macho.data_in_code_entry, rhs: macho.data_in_cod return lhs.offset < rhs.offset; } -fn parseDysymtab(self: Object) ?macho.dysymtab_command { +fn getDysymtab(self: Object) ?macho.dysymtab_command { var it = LoadCommandIterator{ .ncmds = self.header.ncmds, .buffer = self.contents[@sizeOf(macho.mach_header_64)..][0..self.header.sizeofcmds], }; while (it.next()) |cmd| { switch (cmd.cmd()) { - .DYSYMTAB => { - return cmd.cast(macho.dysymtab_command).?; - }, + .DYSYMTAB => return cmd.cast(macho.dysymtab_command).?, else => {}, } } else return null; diff --git a/src/link/MachO/fat.zig b/src/link/MachO/fat.zig index 81b0685418..751e49f651 100644 --- a/src/link/MachO/fat.zig +++ b/src/link/MachO/fat.zig @@ -1,42 +1,40 @@ const std = @import("std"); +const assert = std.debug.assert; const log = std.log.scoped(.archive); const macho = std.macho; const mem = std.mem; -pub fn decodeArch(cputype: macho.cpu_type_t, comptime logError: bool) !std.Target.Cpu.Arch { - const cpu_arch: std.Target.Cpu.Arch = switch (cputype) { - macho.CPU_TYPE_ARM64 => .aarch64, - macho.CPU_TYPE_X86_64 => .x86_64, - else => { - if (logError) { - log.err("unsupported cpu architecture 0x{x}", .{cputype}); - } - return error.UnsupportedCpuArchitecture; - }, - }; - return cpu_arch; +pub fn isFatLibrary(file: std.fs.File) bool { + const reader = file.reader(); + const hdr = reader.readStructBig(macho.fat_header) catch return false; + defer file.seekTo(0) catch {}; + return hdr.magic == macho.FAT_MAGIC; } -pub fn getLibraryOffset(reader: anytype, cpu_arch: std.Target.Cpu.Arch) !u64 { - const fat_header = try reader.readStructBig(macho.fat_header); - if (fat_header.magic != macho.FAT_MAGIC) return 0; +pub const Arch = struct { + tag: std.Target.Cpu.Arch, + offset: u64, +}; +pub fn parseArchs(file: std.fs.File, buffer: *[2]Arch) ![]const Arch { + const reader = file.reader(); + const fat_header = try reader.readStructBig(macho.fat_header); + assert(fat_header.magic == macho.FAT_MAGIC); + + var count: usize = 0; var fat_arch_index: u32 = 0; while (fat_arch_index < fat_header.nfat_arch) : (fat_arch_index += 1) { const fat_arch = try reader.readStructBig(macho.fat_arch); // If we come across an architecture that we do not know how to handle, that's // fine because we can keep looking for one that might match. - const lib_arch = decodeArch(fat_arch.cputype, false) catch |err| switch (err) { - error.UnsupportedCpuArchitecture => continue, + const arch: std.Target.Cpu.Arch = switch (fat_arch.cputype) { + macho.CPU_TYPE_ARM64 => if (fat_arch.cpusubtype == macho.CPU_SUBTYPE_ARM_ALL) .aarch64 else continue, + macho.CPU_TYPE_X86_64 => if (fat_arch.cpusubtype == macho.CPU_SUBTYPE_X86_64_ALL) .x86_64 else continue, + else => continue, }; - if (lib_arch == cpu_arch) { - // We have found a matching architecture! - return fat_arch.offset; - } - } else { - log.err("Could not find matching cpu architecture in fat library: expected {s}", .{ - @tagName(cpu_arch), - }); - return error.MismatchedCpuArchitecture; + buffer[count] = .{ .tag = arch, .offset = fat_arch.offset }; + count += 1; } + + return buffer[0..count]; } diff --git a/src/link/MachO/zld.zig b/src/link/MachO/zld.zig index bfadd064db..6ca227c430 100644 --- a/src/link/MachO/zld.zig +++ b/src/link/MachO/zld.zig @@ -89,298 +89,6 @@ pub const Zld = struct { atoms: std.ArrayListUnmanaged(Atom) = .{}, - fn parseObject(self: *Zld, path: []const u8) !bool { - const gpa = self.gpa; - const file = fs.cwd().openFile(path, .{}) catch |err| switch (err) { - error.FileNotFound => return false, - else => |e| return e, - }; - defer file.close(); - - const name = try gpa.dupe(u8, path); - errdefer gpa.free(name); - const cpu_arch = self.options.target.cpu.arch; - const mtime: u64 = mtime: { - const stat = file.stat() catch break :mtime 0; - break :mtime @as(u64, @intCast(@divFloor(stat.mtime, 1_000_000_000))); - }; - const file_stat = try file.stat(); - const file_size = math.cast(usize, file_stat.size) orelse return error.Overflow; - const contents = try file.readToEndAllocOptions(gpa, file_size, file_size, @alignOf(u64), null); - - var object = Object{ - .name = name, - .mtime = mtime, - .contents = contents, - }; - - object.parse(gpa, cpu_arch) catch |err| switch (err) { - error.EndOfStream, error.NotObject => { - object.deinit(gpa); - return false; - }, - else => |e| return e, - }; - - try self.objects.append(gpa, object); - - return true; - } - - fn parseArchive(self: *Zld, path: []const u8, force_load: bool) !bool { - const gpa = self.gpa; - const file = fs.cwd().openFile(path, .{}) catch |err| switch (err) { - error.FileNotFound => return false, - else => |e| return e, - }; - errdefer file.close(); - - const name = try gpa.dupe(u8, path); - errdefer gpa.free(name); - const cpu_arch = self.options.target.cpu.arch; - const reader = file.reader(); - const fat_offset = try fat.getLibraryOffset(reader, cpu_arch); - try reader.context.seekTo(fat_offset); - - var archive = Archive{ - .name = name, - .fat_offset = fat_offset, - .file = file, - }; - - archive.parse(gpa, reader) catch |err| switch (err) { - error.EndOfStream, error.NotArchive => { - archive.deinit(gpa); - return false; - }, - else => |e| return e, - }; - - if (force_load) { - defer archive.deinit(gpa); - // Get all offsets from the ToC - var offsets = std.AutoArrayHashMap(u32, void).init(gpa); - defer offsets.deinit(); - for (archive.toc.values()) |offs| { - for (offs.items) |off| { - _ = try offsets.getOrPut(off); - } - } - for (offsets.keys()) |off| { - const object = try archive.parseObject(gpa, cpu_arch, off); - try self.objects.append(gpa, object); - } - } else { - try self.archives.append(gpa, archive); - } - - return true; - } - - const ParseDylibError = error{ - OutOfMemory, - EmptyStubFile, - MismatchedCpuArchitecture, - UnsupportedCpuArchitecture, - EndOfStream, - } || fs.File.OpenError || std.os.PReadError || Dylib.Id.ParseError; - - const DylibCreateOpts = struct { - syslibroot: ?[]const u8, - id: ?Dylib.Id = null, - dependent: bool = false, - needed: bool = false, - weak: bool = false, - }; - - fn parseDylib( - self: *Zld, - path: []const u8, - dependent_libs: anytype, - opts: DylibCreateOpts, - ) ParseDylibError!bool { - const gpa = self.gpa; - const file = fs.cwd().openFile(path, .{}) catch |err| switch (err) { - error.FileNotFound => return false, - else => |e| return e, - }; - defer file.close(); - - const cpu_arch = self.options.target.cpu.arch; - const file_stat = try file.stat(); - var file_size = math.cast(usize, file_stat.size) orelse return error.Overflow; - - const reader = file.reader(); - const fat_offset = math.cast(usize, try fat.getLibraryOffset(reader, cpu_arch)) orelse - return error.Overflow; - try file.seekTo(fat_offset); - file_size -= fat_offset; - - const contents = try file.readToEndAllocOptions(gpa, file_size, file_size, @alignOf(u64), null); - defer gpa.free(contents); - - const dylib_id = @as(u16, @intCast(self.dylibs.items.len)); - var dylib = Dylib{ .weak = opts.weak }; - - dylib.parseFromBinary( - gpa, - cpu_arch, - dylib_id, - dependent_libs, - path, - contents, - ) catch |err| switch (err) { - error.EndOfStream, error.NotDylib => { - try file.seekTo(0); - - var lib_stub = LibStub.loadFromFile(gpa, file) catch { - dylib.deinit(gpa); - return false; - }; - defer lib_stub.deinit(); - - try dylib.parseFromStub( - gpa, - self.options.target, - lib_stub, - dylib_id, - dependent_libs, - path, - ); - }, - else => |e| return e, - }; - - if (opts.id) |id| { - if (dylib.id.?.current_version < id.compatibility_version) { - log.warn("found dylib is incompatible with the required minimum version", .{}); - log.warn(" dylib: {s}", .{id.name}); - log.warn(" required minimum version: {}", .{id.compatibility_version}); - log.warn(" dylib version: {}", .{dylib.id.?.current_version}); - - // TODO maybe this should be an error and facilitate auto-cleanup? - dylib.deinit(gpa); - return false; - } - } - - try self.dylibs.append(gpa, dylib); - try self.dylibs_map.putNoClobber(gpa, dylib.id.?.name, dylib_id); - - const should_link_dylib_even_if_unreachable = blk: { - if (self.options.dead_strip_dylibs and !opts.needed) break :blk false; - break :blk !(opts.dependent or self.referenced_dylibs.contains(dylib_id)); - }; - - if (should_link_dylib_even_if_unreachable) { - try self.referenced_dylibs.putNoClobber(gpa, dylib_id, {}); - } - - return true; - } - - fn parseInputFiles( - self: *Zld, - files: []const []const u8, - syslibroot: ?[]const u8, - dependent_libs: anytype, - ) !void { - for (files) |file_name| { - const full_path = full_path: { - var buffer: [fs.MAX_PATH_BYTES]u8 = undefined; - break :full_path try fs.realpath(file_name, &buffer); - }; - log.debug("parsing input file path '{s}'", .{full_path}); - - if (try self.parseObject(full_path)) continue; - if (try self.parseArchive(full_path, false)) continue; - if (try self.parseDylib(full_path, dependent_libs, .{ - .syslibroot = syslibroot, - })) continue; - - log.debug("unknown filetype for positional input file: '{s}'", .{file_name}); - } - } - - fn parseAndForceLoadStaticArchives(self: *Zld, files: []const []const u8) !void { - for (files) |file_name| { - const full_path = full_path: { - var buffer: [fs.MAX_PATH_BYTES]u8 = undefined; - break :full_path try fs.realpath(file_name, &buffer); - }; - log.debug("parsing and force loading static archive '{s}'", .{full_path}); - - if (try self.parseArchive(full_path, true)) continue; - log.debug("unknown filetype: expected static archive: '{s}'", .{file_name}); - } - } - - fn parseLibs( - self: *Zld, - lib_names: []const []const u8, - lib_infos: []const link.SystemLib, - syslibroot: ?[]const u8, - dependent_libs: anytype, - ) !void { - for (lib_names, 0..) |lib, i| { - const lib_info = lib_infos[i]; - log.debug("parsing lib path '{s}'", .{lib}); - if (try self.parseDylib(lib, dependent_libs, .{ - .syslibroot = syslibroot, - .needed = lib_info.needed, - .weak = lib_info.weak, - })) continue; - if (try self.parseArchive(lib, false)) continue; - - log.debug("unknown filetype for a library: '{s}'", .{lib}); - } - } - - fn parseDependentLibs(self: *Zld, syslibroot: ?[]const u8, dependent_libs: anytype) !void { - // At this point, we can now parse dependents of dylibs preserving the inclusion order of: - // 1) anything on the linker line is parsed first - // 2) afterwards, we parse dependents of the included dylibs - // TODO this should not be performed if the user specifies `-flat_namespace` flag. - // See ld64 manpages. - var arena_alloc = std.heap.ArenaAllocator.init(self.gpa); - const arena = arena_alloc.allocator(); - defer arena_alloc.deinit(); - - while (dependent_libs.readItem()) |*dep_id| { - defer dep_id.id.deinit(self.gpa); - - if (self.dylibs_map.contains(dep_id.id.name)) continue; - - const weak = self.dylibs.items[dep_id.parent].weak; - const has_ext = blk: { - const basename = fs.path.basename(dep_id.id.name); - break :blk mem.lastIndexOfScalar(u8, basename, '.') != null; - }; - const extension = if (has_ext) fs.path.extension(dep_id.id.name) else ""; - const without_ext = if (has_ext) blk: { - const index = mem.lastIndexOfScalar(u8, dep_id.id.name, '.') orelse unreachable; - break :blk dep_id.id.name[0..index]; - } else dep_id.id.name; - - for (&[_][]const u8{ extension, ".tbd" }) |ext| { - const with_ext = try std.fmt.allocPrint(arena, "{s}{s}", .{ without_ext, ext }); - const full_path = if (syslibroot) |root| try fs.path.join(arena, &.{ root, with_ext }) else with_ext; - - log.debug("trying dependency at fully resolved path {s}", .{full_path}); - - const did_parse_successfully = try self.parseDylib(full_path, dependent_libs, .{ - .id = dep_id.id, - .syslibroot = syslibroot, - .dependent = true, - .weak = weak, - }); - if (did_parse_successfully) break; - } else { - log.debug("unable to resolve dependency {s}", .{dep_id.id.name}); - } - } - } - pub fn getOutputSection(self: *Zld, sect: macho.section_64) !?u8 { const segname = sect.segName(); const sectname = sect.sectName(); @@ -1009,7 +717,7 @@ pub const Zld = struct { if (self.archives.items.len == 0) return; const gpa = self.gpa; - const cpu_arch = self.options.target.cpu.arch; + var next_sym: usize = 0; loop: while (next_sym < resolver.unresolved.count()) { const global = self.globals.items[resolver.unresolved.keys()[next_sym]]; @@ -1024,13 +732,7 @@ pub const Zld = struct { assert(offsets.items.len > 0); const object_id = @as(u16, @intCast(self.objects.items.len)); - const object = archive.parseObject(gpa, cpu_arch, offsets.items[0]) catch |e| switch (e) { - error.MismatchedCpuArchitecture => { - log.err("CPU architecture mismatch found in {s}", .{archive.name}); - return e; - }, - else => return e, - }; + const object = try archive.parseObject(gpa, offsets.items[0]); try self.objects.append(gpa, object); try self.resolveSymbolsInObject(object_id, resolver); @@ -3512,37 +3214,27 @@ pub fn linkWithZld(macho_file: *MachO, comp: *Compilation, prog_node: *std.Progr try zld.strtab.buffer.append(gpa, 0); // Positional arguments to the linker such as object files and static archives. - var positionals = std.ArrayList([]const u8).init(arena); + var positionals = std.ArrayList(Compilation.LinkObject).init(arena); try positionals.ensureUnusedCapacity(options.objects.len); - - var must_link_archives = std.StringArrayHashMap(void).init(arena); - try must_link_archives.ensureUnusedCapacity(options.objects.len); - - for (options.objects) |obj| { - if (must_link_archives.contains(obj.path)) continue; - if (obj.must_link) { - _ = must_link_archives.getOrPutAssumeCapacity(obj.path); - } else { - _ = positionals.appendAssumeCapacity(obj.path); - } - } + positionals.appendSliceAssumeCapacity(options.objects); for (comp.c_object_table.keys()) |key| { - try positionals.append(key.status.success.object_path); + try positionals.append(.{ .path = key.status.success.object_path }); } if (module_obj_path) |p| { - try positionals.append(p); + try positionals.append(.{ .path = p }); } if (comp.compiler_rt_lib) |lib| { - try positionals.append(lib.full_object_path); + try positionals.append(.{ .path = lib.full_object_path }); } // libc++ dep if (options.link_libcpp) { - try positionals.append(comp.libcxxabi_static_lib.?.full_object_path); - try positionals.append(comp.libcxx_static_lib.?.full_object_path); + try positionals.ensureUnusedCapacity(2); + positionals.appendAssumeCapacity(.{ .path = comp.libcxxabi_static_lib.?.full_object_path }); + positionals.appendAssumeCapacity(.{ .path = comp.libcxx_static_lib.?.full_object_path }); } var libs = std.StringArrayHashMap(link.SystemLib).init(arena); @@ -3621,6 +3313,9 @@ pub fn linkWithZld(macho_file: *MachO, comp: *Compilation, prog_node: *std.Progr } for (options.objects) |obj| { + if (obj.must_link) { + try argv.append("-force_load"); + } try argv.append(obj.path); } @@ -3682,10 +3377,6 @@ pub fn linkWithZld(macho_file: *MachO, comp: *Compilation, prog_node: *std.Progr try argv.append("dynamic_lookup"); } - for (must_link_archives.keys()) |lib| { - try argv.append(try std.fmt.allocPrint(arena, "-force_load {s}", .{lib})); - } - Compilation.dump_argv(argv.items); } @@ -3694,10 +3385,49 @@ pub fn linkWithZld(macho_file: *MachO, comp: *Compilation, prog_node: *std.Progr parent: u16, }, .Dynamic).init(arena); - try zld.parseInputFiles(positionals.items, options.sysroot, &dependent_libs); - try zld.parseAndForceLoadStaticArchives(must_link_archives.keys()); - try zld.parseLibs(libs.keys(), libs.values(), options.sysroot, &dependent_libs); - try zld.parseDependentLibs(options.sysroot, &dependent_libs); + for (positionals.items) |obj| { + const in_file = try std.fs.cwd().openFile(obj.path, .{}); + defer in_file.close(); + + MachO.parsePositional( + &zld, + gpa, + in_file, + obj.path, + obj.must_link, + &dependent_libs, + options, + ) catch |err| { + // TODO convert to error + log.err("{s}: parsing positional failed with err {s}", .{ obj.path, @errorName(err) }); + continue; + }; + } + + for (libs.keys(), libs.values()) |path, lib| { + const in_file = try std.fs.cwd().openFile(path, .{}); + defer in_file.close(); + + MachO.parseLibrary( + &zld, + gpa, + in_file, + path, + lib, + false, + &dependent_libs, + options, + ) catch |err| { + // TODO convert to error + log.err("{s}: parsing library failed with err {s}", .{ path, @errorName(err) }); + continue; + }; + } + + MachO.parseDependentLibs(&zld, gpa, &dependent_libs, options) catch |err| { + // TODO convert to error + log.err("parsing dependent libraries failed with err {s}", .{@errorName(err)}); + }; var resolver = SymbolResolver{ .arena = arena, From 7b282dffe68a7187a4fa4b5c11c82f1f67248a96 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 19 Aug 2023 16:15:19 +0200 Subject: [PATCH 04/57] macho: unify concept of SymbolWithLoc across drivers --- src/arch/aarch64/Emit.zig | 8 +-- src/arch/x86_64/Emit.zig | 11 ++-- src/link/MachO.zig | 91 +++++++++++++++++---------------- src/link/MachO/Atom.zig | 15 +++--- src/link/MachO/DebugSymbols.zig | 2 +- src/link/MachO/Object.zig | 21 ++++---- src/link/MachO/UnwindInfo.zig | 3 +- src/link/MachO/ZldAtom.zig | 2 +- src/link/MachO/dead_strip.zig | 3 +- src/link/MachO/eh_frame.zig | 3 +- src/link/MachO/thunks.zig | 3 +- src/link/MachO/zld.zig | 53 ++----------------- 12 files changed, 88 insertions(+), 127 deletions(-) diff --git a/src/arch/aarch64/Emit.zig b/src/arch/aarch64/Emit.zig index 8cf2386138..68c28ed8aa 100644 --- a/src/arch/aarch64/Emit.zig +++ b/src/arch/aarch64/Emit.zig @@ -670,7 +670,7 @@ fn mirCallExtern(emit: *Emit, inst: Mir.Inst.Index) !void { if (emit.bin_file.cast(link.File.MachO)) |macho_file| { // Add relocation to the decl. - const atom_index = macho_file.getAtomIndexForSymbol(.{ .sym_index = relocation.atom_index, .file = null }).?; + const atom_index = macho_file.getAtomIndexForSymbol(.{ .sym_index = relocation.atom_index }).?; const target = macho_file.getGlobalByIndex(relocation.sym_index); try link.File.MachO.Atom.addRelocation(macho_file, atom_index, .{ .type = .branch, @@ -885,9 +885,9 @@ fn mirLoadMemoryPie(emit: *Emit, inst: Mir.Inst.Index) !void { if (emit.bin_file.cast(link.File.MachO)) |macho_file| { const Atom = link.File.MachO.Atom; const Relocation = Atom.Relocation; - const atom_index = macho_file.getAtomIndexForSymbol(.{ .sym_index = data.atom_index, .file = null }).?; + const atom_index = macho_file.getAtomIndexForSymbol(.{ .sym_index = data.atom_index }).?; try Atom.addRelocations(macho_file, atom_index, &[_]Relocation{ .{ - .target = .{ .sym_index = data.sym_index, .file = null }, + .target = .{ .sym_index = data.sym_index }, .offset = offset, .addend = 0, .pcrel = true, @@ -898,7 +898,7 @@ fn mirLoadMemoryPie(emit: *Emit, inst: Mir.Inst.Index) !void { else => unreachable, }, }, .{ - .target = .{ .sym_index = data.sym_index, .file = null }, + .target = .{ .sym_index = data.sym_index }, .offset = offset + 4, .addend = 0, .pcrel = false, diff --git a/src/arch/x86_64/Emit.zig b/src/arch/x86_64/Emit.zig index 12b5294015..050772b2ce 100644 --- a/src/arch/x86_64/Emit.zig +++ b/src/arch/x86_64/Emit.zig @@ -43,9 +43,7 @@ pub fn emitMir(emit: *Emit) Error!void { }), .linker_extern_fn => |symbol| if (emit.bin_file.cast(link.File.MachO)) |macho_file| { // Add relocation to the decl. - const atom_index = macho_file.getAtomIndexForSymbol( - .{ .sym_index = symbol.atom_index, .file = null }, - ).?; + const atom_index = macho_file.getAtomIndexForSymbol(.{ .sym_index = symbol.atom_index }).?; const target = macho_file.getGlobalByIndex(symbol.sym_index); try link.File.MachO.Atom.addRelocation(macho_file, atom_index, .{ .type = .branch, @@ -77,10 +75,7 @@ pub fn emitMir(emit: *Emit) Error!void { .linker_import, .linker_tlv, => |symbol| if (emit.bin_file.cast(link.File.MachO)) |macho_file| { - const atom_index = macho_file.getAtomIndexForSymbol(.{ - .sym_index = symbol.atom_index, - .file = null, - }).?; + const atom_index = macho_file.getAtomIndexForSymbol(.{ .sym_index = symbol.atom_index }).?; try link.File.MachO.Atom.addRelocation(macho_file, atom_index, .{ .type = switch (lowered_relocs[0].target) { .linker_got => .got, @@ -88,7 +83,7 @@ pub fn emitMir(emit: *Emit) Error!void { .linker_tlv => .tlv, else => unreachable, }, - .target = .{ .sym_index = symbol.sym_index, .file = null }, + .target = .{ .sym_index = symbol.sym_index }, .offset = @as(u32, @intCast(end_offset - 4)), .addend = 0, .pcrel = true, diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 92aae67c22..440c26260a 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -249,20 +249,14 @@ const DeclMetadata = struct { fn getExport(m: DeclMetadata, macho_file: *const MachO, name: []const u8) ?u32 { for (m.exports.items) |exp| { - if (mem.eql(u8, name, macho_file.getSymbolName(.{ - .sym_index = exp, - .file = null, - }))) return exp; + if (mem.eql(u8, name, macho_file.getSymbolName(.{ .sym_index = exp }))) return exp; } return null; } fn getExportPtr(m: *DeclMetadata, macho_file: *MachO, name: []const u8) ?*u32 { for (m.exports.items) |*exp| { - if (mem.eql(u8, name, macho_file.getSymbolName(.{ - .sym_index = exp.*, - .file = null, - }))) return exp; + if (mem.eql(u8, name, macho_file.getSymbolName(.{ .sym_index = exp.* }))) return exp; } return null; } @@ -284,21 +278,20 @@ const ResolveAction = struct { }; }; -pub const SymbolWithLoc = struct { +pub const SymbolWithLoc = extern struct { // Index into the respective symbol table. sym_index: u32, - // null means it's a synthetic global. - file: ?u32 = null, + // 0 means it's a synthetic global. + file: u32 = 0, - pub fn eql(this: SymbolWithLoc, other: SymbolWithLoc) bool { - if (this.file == null and other.file == null) { - return this.sym_index == other.sym_index; - } - if (this.file != null and other.file != null) { - return this.sym_index == other.sym_index and this.file.? == other.file.?; - } - return false; + pub fn getFile(self: SymbolWithLoc) ?u32 { + if (self.file == 0) return null; + return self.file - 1; + } + + pub fn eql(self: SymbolWithLoc, other: SymbolWithLoc) bool { + return self.file == other.file and self.sym_index == other.sym_index; } }; @@ -1576,7 +1569,7 @@ pub fn allocateSpecialSymbols(self: *MachO) !void { "__mh_execute_header", }) |name| { const global = self.getGlobal(name) orelse continue; - if (global.file != null) continue; + if (global.getFile() != null) continue; const sym = self.getSymbolPtr(global); const seg = self.getSegment(self.text_section_index.?); sym.n_sect = 1; @@ -1597,7 +1590,7 @@ pub fn createAtom(self: *MachO) !Atom.Index { try self.atom_by_index_table.putNoClobber(gpa, sym_index, atom_index); atom.* = .{ .sym_index = sym_index, - .file = null, + .file = 0, .size = 0, .prev_index = null, .next_index = null, @@ -1664,7 +1657,7 @@ fn createMhExecuteHeaderSymbol(self: *MachO) !void { const gpa = self.base.allocator; const sym_index = try self.allocateSymbol(); - const sym_loc = SymbolWithLoc{ .sym_index = sym_index, .file = null }; + const sym_loc = SymbolWithLoc{ .sym_index = sym_index }; const sym = self.getSymbolPtr(sym_loc); sym.* = .{ .n_strx = try self.strtab.insert(gpa, "__mh_execute_header"), @@ -1684,7 +1677,7 @@ fn createDsoHandleSymbol(self: *MachO) !void { const gpa = self.base.allocator; const sym_index = try self.allocateSymbol(); - const sym_loc = SymbolWithLoc{ .sym_index = sym_index, .file = null }; + const sym_loc = SymbolWithLoc{ .sym_index = sym_index }; const sym = self.getSymbolPtr(sym_loc); sym.* = .{ .n_strx = try self.strtab.insert(gpa, "___dso_handle"), @@ -1998,10 +1991,7 @@ fn allocateGlobal(self: *MachO) !u32 { } }; - self.globals.items[index] = .{ - .sym_index = 0, - .file = null, - }; + self.globals.items[index] = .{ .sym_index = 0 }; return index; } @@ -2613,7 +2603,7 @@ pub fn updateDeclExports( try decl_metadata.exports.append(gpa, sym_index); break :blk sym_index; }; - const sym_loc = SymbolWithLoc{ .sym_index = sym_index, .file = null }; + const sym_loc = SymbolWithLoc{ .sym_index = sym_index }; const sym = self.getSymbolPtr(sym_loc); sym.* = .{ .n_strx = try self.strtab.insert(gpa, exp_name), @@ -2643,7 +2633,7 @@ pub fn updateDeclExports( error.MultipleSymbolDefinitions => { // TODO: this needs rethinking const global = self.getGlobal(exp_name).?; - if (sym_loc.sym_index != global.sym_index and global.file != null) { + if (sym_loc.sym_index != global.sym_index and global.getFile() != null) { _ = try mod.failed_exports.put(mod.gpa, exp, try Module.ErrorMsg.create( gpa, decl.srcLoc(mod), @@ -2672,7 +2662,7 @@ pub fn deleteDeclExport( defer gpa.free(exp_name); const sym_index = metadata.getExportPtr(self, exp_name) orelse return; - const sym_loc = SymbolWithLoc{ .sym_index = sym_index.*, .file = null }; + const sym_loc = SymbolWithLoc{ .sym_index = sym_index.* }; const sym = self.getSymbolPtr(sym_loc); log.debug("deleting export '{s}'", .{exp_name}); assert(sym.sect() and sym.ext()); @@ -2688,10 +2678,7 @@ pub fn deleteDeclExport( if (self.resolver.fetchRemove(exp_name)) |entry| { defer gpa.free(entry.key); self.globals_free_list.append(gpa, entry.value) catch {}; - self.globals.items[entry.value] = .{ - .sym_index = 0, - .file = null, - }; + self.globals.items[entry.value] = .{ .sym_index = 0 }; } sym_index.* = 0; @@ -2730,10 +2717,10 @@ pub fn getDeclVAddr(self: *MachO, decl_index: Module.Decl.Index, reloc_info: Fil const this_atom_index = try self.getOrCreateAtomForDecl(decl_index); const sym_index = self.getAtom(this_atom_index).getSymbolIndex().?; - const atom_index = self.getAtomIndexForSymbol(.{ .sym_index = reloc_info.parent_atom_index, .file = null }).?; + const atom_index = self.getAtomIndexForSymbol(.{ .sym_index = reloc_info.parent_atom_index }).?; try Atom.addRelocation(self, atom_index, .{ .type = .unsigned, - .target = .{ .sym_index = sym_index, .file = null }, + .target = .{ .sym_index = sym_index }, .offset = @as(u32, @intCast(reloc_info.offset)), .addend = reloc_info.addend, .pcrel = false, @@ -3514,7 +3501,7 @@ fn writeSymtab(self: *MachO) !SymtabCtx { for (self.locals.items, 0..) |sym, sym_id| { if (sym.n_strx == 0) continue; // no name, skip - const sym_loc = SymbolWithLoc{ .sym_index = @as(u32, @intCast(sym_id)), .file = null }; + const sym_loc = SymbolWithLoc{ .sym_index = @as(u32, @intCast(sym_id)) }; if (self.symbolIsTemp(sym_loc)) continue; // local temp symbol, skip if (self.getGlobal(self.getSymbolName(sym_loc)) != null) continue; // global symbol is either an export or import, skip try locals.append(sym); @@ -3934,13 +3921,13 @@ pub fn symbolIsTemp(self: *MachO, sym_with_loc: SymbolWithLoc) bool { /// Returns pointer-to-symbol described by `sym_with_loc` descriptor. pub fn getSymbolPtr(self: *MachO, sym_with_loc: SymbolWithLoc) *macho.nlist_64 { - assert(sym_with_loc.file == null); + assert(sym_with_loc.getFile() == null); return &self.locals.items[sym_with_loc.sym_index]; } /// Returns symbol described by `sym_with_loc` descriptor. pub fn getSymbol(self: *const MachO, sym_with_loc: SymbolWithLoc) macho.nlist_64 { - assert(sym_with_loc.file == null); + assert(sym_with_loc.getFile() == null); return self.locals.items[sym_with_loc.sym_index]; } @@ -4006,7 +3993,7 @@ pub fn getAtomPtr(self: *MachO, atom_index: Atom.Index) *Atom { /// Returns atom if there is an atom referenced by the symbol described by `sym_with_loc` descriptor. /// Returns null on failure. pub fn getAtomIndexForSymbol(self: *MachO, sym_with_loc: SymbolWithLoc) ?Atom.Index { - assert(sym_with_loc.file == null); + assert(sym_with_loc.getFile() == null); return self.atom_by_index_table.get(sym_with_loc.sym_index); } @@ -4034,13 +4021,31 @@ pub inline fn getPageSize(cpu_arch: std.Target.Cpu.Arch) u16 { }; } -pub fn findFirst(comptime T: type, haystack: []align(1) const T, start: usize, predicate: anytype) usize { +/// Binary search +pub fn bsearch(comptime T: type, haystack: []align(1) const T, predicate: anytype) usize { if (!@hasDecl(@TypeOf(predicate), "predicate")) @compileError("Predicate is required to define fn predicate(@This(), T) bool"); - if (start == haystack.len) return start; + var min: usize = 0; + var max: usize = haystack.len; + while (min < max) { + const index = (min + max) / 2; + const curr = haystack[index]; + if (predicate.predicate(curr)) { + min = index + 1; + } else { + max = index; + } + } + return min; +} - var i = start; +/// Linear search +pub fn lsearch(comptime T: type, haystack: []align(1) const T, predicate: anytype) usize { + if (!@hasDecl(@TypeOf(predicate), "predicate")) + @compileError("Predicate is required to define fn predicate(@This(), T) bool"); + + var i: usize = 0; while (i < haystack.len) : (i += 1) { if (predicate.predicate(haystack[i])) break; } diff --git a/src/link/MachO/Atom.zig b/src/link/MachO/Atom.zig index 970371e455..7dc97003aa 100644 --- a/src/link/MachO/Atom.zig +++ b/src/link/MachO/Atom.zig @@ -17,16 +17,19 @@ const MachO = @import("../MachO.zig"); pub const Relocation = @import("Relocation.zig"); const SymbolWithLoc = MachO.SymbolWithLoc; -/// Each decl always gets a local symbol with the fully qualified name. -/// The vaddr and size are found here directly. -/// The file offset is found by computing the vaddr offset from the section vaddr -/// the symbol references, and adding that to the file offset of the section. +/// Each Atom always gets a symbol with the fully qualified name. +/// The symbol can reside in any object file context structure in `symtab` array +/// (see `Object`), or if the symbol is a synthetic symbol such as a GOT cell or +/// a stub trampoline, it can be found in the linkers `locals` arraylist. /// If this field is 0, it means the codegen size = 0 and there is no symbol or /// offset table entry. sym_index: u32, -/// null means symbol defined by Zig source. -file: ?u32, +/// 0 means an Atom is a synthetic Atom such as a GOT cell defined by the linker. +/// Otherwise, it is the index into appropriate object file (indexing from 1). +/// Prefer using `getFile()` helper to get the file index out rather than using +/// the field directly. +file: u32, /// Size and alignment of this atom /// Unlike in Elf, we need to store the size of this symbol as part of diff --git a/src/link/MachO/DebugSymbols.zig b/src/link/MachO/DebugSymbols.zig index 07bf409836..602ee1ed63 100644 --- a/src/link/MachO/DebugSymbols.zig +++ b/src/link/MachO/DebugSymbols.zig @@ -475,7 +475,7 @@ fn writeSymtab(self: *DebugSymbols, macho_file: *MachO) !void { for (macho_file.locals.items, 0..) |sym, sym_id| { if (sym.n_strx == 0) continue; // no name, skip - const sym_loc = MachO.SymbolWithLoc{ .sym_index = @as(u32, @intCast(sym_id)), .file = null }; + const sym_loc = MachO.SymbolWithLoc{ .sym_index = @as(u32, @intCast(sym_id)) }; if (macho_file.symbolIsTemp(sym_loc)) continue; // local temp symbol, skip if (macho_file.getGlobal(macho_file.getSymbolName(sym_loc)) != null) continue; // global symbol is either an export or import, skip var out_sym = sym; diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index fe517d11be..5e67a334c0 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -23,9 +23,10 @@ const Atom = @import("ZldAtom.zig"); const AtomIndex = @import("zld.zig").AtomIndex; const DwarfInfo = @import("DwarfInfo.zig"); const LoadCommandIterator = macho.LoadCommandIterator; -const Zld = @import("zld.zig").Zld; -const SymbolWithLoc = @import("zld.zig").SymbolWithLoc; +const MachO = @import("../MachO.zig"); +const SymbolWithLoc = MachO.SymbolWithLoc; const UnwindInfo = @import("UnwindInfo.zig"); +const Zld = @import("zld.zig").Zld; name: []const u8, mtime: u64, @@ -314,10 +315,10 @@ fn filterSymbolsBySection(symbols: []macho.nlist_64, n_sect: u8) struct { } }; - const index = @import("zld.zig").lsearch(macho.nlist_64, symbols, FirstMatch{ + const index = MachO.lsearch(macho.nlist_64, symbols, FirstMatch{ .n_sect = n_sect, }); - const len = @import("zld.zig").lsearch(macho.nlist_64, symbols[index..], FirstNonMatch{ + const len = MachO.lsearch(macho.nlist_64, symbols[index..], FirstNonMatch{ .n_sect = n_sect, }); @@ -336,10 +337,10 @@ fn filterSymbolsByAddress(symbols: []macho.nlist_64, start_addr: u64, end_addr: } }; - const index = @import("zld.zig").lsearch(macho.nlist_64, symbols, Predicate{ + const index = MachO.lsearch(macho.nlist_64, symbols, Predicate{ .addr = start_addr, }); - const len = @import("zld.zig").lsearch(macho.nlist_64, symbols[index..], Predicate{ + const len = MachO.lsearch(macho.nlist_64, symbols[index..], Predicate{ .addr = end_addr, }); @@ -631,8 +632,8 @@ fn filterRelocs( } }; - const start = @import("zld.zig").bsearch(macho.relocation_info, relocs, Predicate{ .addr = end_addr }); - const len = @import("zld.zig").lsearch(macho.relocation_info, relocs[start..], LPredicate{ .addr = start_addr }); + const start = MachO.bsearch(macho.relocation_info, relocs, Predicate{ .addr = end_addr }); + const len = MachO.lsearch(macho.relocation_info, relocs[start..], LPredicate{ .addr = start_addr }); return .{ .start = @as(u32, @intCast(start)), .len = @as(u32, @intCast(len)) }; } @@ -1031,7 +1032,7 @@ pub fn getSymbolByAddress(self: Object, addr: u64, sect_hint: ?u8) u32 { if (sect_hint) |sect_id| { if (self.source_section_index_lookup[sect_id].len > 0) { const lookup = self.source_section_index_lookup[sect_id]; - const target_sym_index = @import("zld.zig").lsearch( + const target_sym_index = MachO.lsearch( i64, self.source_address_lookup[lookup.start..][0..lookup.len], Predicate{ .addr = @as(i64, @intCast(addr)) }, @@ -1046,7 +1047,7 @@ pub fn getSymbolByAddress(self: Object, addr: u64, sect_hint: ?u8) u32 { return self.getSectionAliasSymbolIndex(sect_id); } - const target_sym_index = @import("zld.zig").lsearch(i64, self.source_address_lookup, Predicate{ + const target_sym_index = MachO.lsearch(i64, self.source_address_lookup, Predicate{ .addr = @as(i64, @intCast(addr)), }); assert(target_sym_index > 0); diff --git a/src/link/MachO/UnwindInfo.zig b/src/link/MachO/UnwindInfo.zig index aff1681d38..49a3ab9d01 100644 --- a/src/link/MachO/UnwindInfo.zig +++ b/src/link/MachO/UnwindInfo.zig @@ -15,8 +15,9 @@ const Allocator = mem.Allocator; const Atom = @import("ZldAtom.zig"); const AtomIndex = @import("zld.zig").AtomIndex; const EhFrameRecord = eh_frame.EhFrameRecord; +const MachO = @import("../MachO.zig"); const Object = @import("Object.zig"); -const SymbolWithLoc = @import("zld.zig").SymbolWithLoc; +const SymbolWithLoc = MachO.SymbolWithLoc; const Zld = @import("zld.zig").Zld; const N_DEAD = @import("zld.zig").N_DEAD; diff --git a/src/link/MachO/ZldAtom.zig b/src/link/MachO/ZldAtom.zig index f97bfca326..6bcd74ff0c 100644 --- a/src/link/MachO/ZldAtom.zig +++ b/src/link/MachO/ZldAtom.zig @@ -22,7 +22,7 @@ const Arch = std.Target.Cpu.Arch; const AtomIndex = @import("zld.zig").AtomIndex; const Object = @import("Object.zig"); const Relocation = @import("Relocation.zig"); -const SymbolWithLoc = @import("zld.zig").SymbolWithLoc; +const SymbolWithLoc = @import("../MachO.zig").SymbolWithLoc; const Zld = @import("zld.zig").Zld; /// Each Atom always gets a symbol with the fully qualified name. diff --git a/src/link/MachO/dead_strip.zig b/src/link/MachO/dead_strip.zig index 563d68cc89..42d1604a0d 100644 --- a/src/link/MachO/dead_strip.zig +++ b/src/link/MachO/dead_strip.zig @@ -11,7 +11,8 @@ const mem = std.mem; const Allocator = mem.Allocator; const AtomIndex = @import("zld.zig").AtomIndex; const Atom = @import("ZldAtom.zig"); -const SymbolWithLoc = @import("zld.zig").SymbolWithLoc; +const MachO = @import("../MachO.zig"); +const SymbolWithLoc = MachO.SymbolWithLoc; const SymbolResolver = @import("zld.zig").SymbolResolver; const UnwindInfo = @import("UnwindInfo.zig"); const Zld = @import("zld.zig").Zld; diff --git a/src/link/MachO/eh_frame.zig b/src/link/MachO/eh_frame.zig index 9a30e863b9..6ef7a79977 100644 --- a/src/link/MachO/eh_frame.zig +++ b/src/link/MachO/eh_frame.zig @@ -9,8 +9,9 @@ const log = std.log.scoped(.eh_frame); const Allocator = mem.Allocator; const AtomIndex = @import("zld.zig").AtomIndex; const Atom = @import("ZldAtom.zig"); +const MachO = @import("../MachO.zig"); const Relocation = @import("Relocation.zig"); -const SymbolWithLoc = @import("zld.zig").SymbolWithLoc; +const SymbolWithLoc = MachO.SymbolWithLoc; const UnwindInfo = @import("UnwindInfo.zig"); const Zld = @import("zld.zig").Zld; diff --git a/src/link/MachO/thunks.zig b/src/link/MachO/thunks.zig index 82d0451225..66ae57e970 100644 --- a/src/link/MachO/thunks.zig +++ b/src/link/MachO/thunks.zig @@ -17,8 +17,9 @@ const aarch64 = @import("../../arch/aarch64/bits.zig"); const Allocator = mem.Allocator; const Atom = @import("ZldAtom.zig"); const AtomIndex = @import("zld.zig").AtomIndex; +const MachO = @import("../MachO.zig"); const Relocation = @import("Relocation.zig"); -const SymbolWithLoc = @import("zld.zig").SymbolWithLoc; +const SymbolWithLoc = MachO.SymbolWithLoc; const Zld = @import("zld.zig").Zld; pub const ThunkIndex = u32; diff --git a/src/link/MachO/zld.zig b/src/link/MachO/zld.zig index 6ca227c430..5307a3ac02 100644 --- a/src/link/MachO/zld.zig +++ b/src/link/MachO/zld.zig @@ -32,6 +32,7 @@ const Md5 = std.crypto.hash.Md5; const LibStub = @import("../tapi.zig").LibStub; const Object = @import("Object.zig"); const StringTable = @import("../strtab.zig").StringTable; +const SymbolWithLoc = MachO.SymbolWithLoc; const Trie = @import("Trie.zig"); const UnwindInfo = @import("UnwindInfo.zig"); @@ -2038,8 +2039,8 @@ pub const Zld = struct { } }; - const start = lsearch(macho.data_in_code_entry, dices, Predicate{ .addr = start_addr }); - const end = lsearch(macho.data_in_code_entry, dices[start..], Predicate{ .addr = end_addr }) + start; + const start = MachO.lsearch(macho.data_in_code_entry, dices, Predicate{ .addr = start_addr }); + const end = MachO.lsearch(macho.data_in_code_entry, dices[start..], Predicate{ .addr = end_addr }) + start; return dices[start..end]; } @@ -3021,23 +3022,6 @@ const IndirectPointer = struct { } }; -pub const SymbolWithLoc = extern struct { - // Index into the respective symbol table. - sym_index: u32, - - // 0 means it's a synthetic global. - file: u32 = 0, - - pub fn getFile(self: SymbolWithLoc) ?u32 { - if (self.file == 0) return null; - return self.file - 1; - } - - pub fn eql(self: SymbolWithLoc, other: SymbolWithLoc) bool { - return self.file == other.file and self.sym_index == other.sym_index; - } -}; - pub const SymbolResolver = struct { arena: Allocator, table: std.StringHashMap(u32), @@ -3636,34 +3620,3 @@ pub fn linkWithZld(macho_file: *MachO, comp: *Compilation, prog_node: *std.Progr macho_file.base.lock = man.toOwnedLock(); } } - -/// Binary search -pub fn bsearch(comptime T: type, haystack: []align(1) const T, predicate: anytype) usize { - if (!@hasDecl(@TypeOf(predicate), "predicate")) - @compileError("Predicate is required to define fn predicate(@This(), T) bool"); - - var min: usize = 0; - var max: usize = haystack.len; - while (min < max) { - const index = (min + max) / 2; - const curr = haystack[index]; - if (predicate.predicate(curr)) { - min = index + 1; - } else { - max = index; - } - } - return min; -} - -/// Linear search -pub fn lsearch(comptime T: type, haystack: []align(1) const T, predicate: anytype) usize { - if (!@hasDecl(@TypeOf(predicate), "predicate")) - @compileError("Predicate is required to define fn predicate(@This(), T) bool"); - - var i: usize = 0; - while (i < haystack.len) : (i += 1) { - if (predicate.predicate(haystack[i])) break; - } - return i; -} From 05c9d6c00babc4ccc7949b3eb0224f70719d12a5 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 19 Aug 2023 16:59:56 +0200 Subject: [PATCH 05/57] macho: add simple error reporting for misc errors --- src/Compilation.zig | 16 ++++++++++++ src/link.zig | 20 +++++++++++++++ src/link/MachO.zig | 48 +++++++++++++++++++++++++++++++++-- src/link/MachO/dead_strip.zig | 2 +- src/link/MachO/zld.zig | 18 ++----------- 5 files changed, 85 insertions(+), 19 deletions(-) diff --git a/src/Compilation.zig b/src/Compilation.zig index 069cc8a2c7..a30d97f1da 100644 --- a/src/Compilation.zig +++ b/src/Compilation.zig @@ -2609,6 +2609,9 @@ pub fn totalErrorCount(self: *Compilation) u32 { } total += @intFromBool(self.link_error_flags.missing_libc); + // Misc linker errors + total += self.bin_file.miscErrors().len; + // Compile log errors only count if there are no other errors. if (total == 0) { if (self.bin_file.options.module) |module| { @@ -2759,6 +2762,19 @@ pub fn getAllErrorsAlloc(self: *Compilation) !ErrorBundle { })); } + for (self.bin_file.miscErrors()) |link_err| { + try bundle.addRootErrorMessage(.{ + .msg = try bundle.addString(link_err.msg), + .notes_len = @intCast(link_err.notes.len), + }); + const notes_start = try bundle.reserveNotes(@intCast(link_err.notes.len)); + for (link_err.notes, 0..) |note, i| { + bundle.extra.items[notes_start + i] = @intFromEnum(try bundle.addErrorMessage(.{ + .msg = try bundle.addString(note.msg), + })); + } + } + if (self.bin_file.options.module) |module| { if (bundle.root_list.items.len == 0 and module.compile_log_decls.count() != 0) { const keys = module.compile_log_decls.keys(); diff --git a/src/link.zig b/src/link.zig index 48ad78364c..724c1500f4 100644 --- a/src/link.zig +++ b/src/link.zig @@ -866,6 +866,13 @@ pub const File = struct { } } + pub fn miscErrors(base: *File) []const ErrorMsg { + switch (base.tag) { + .macho => return @fieldParentPtr(MachO, "base", base).misc_errors.items, + else => return &.{}, + } + } + pub const UpdateDeclExportsError = error{ OutOfMemory, AnalysisFail, @@ -1129,6 +1136,19 @@ pub const File = struct { missing_libc: bool = false, }; + pub const ErrorMsg = struct { + msg: []const u8, + notes: []ErrorMsg = &.{}, + + pub fn deinit(self: *ErrorMsg, gpa: Allocator) void { + for (self.notes) |*note| { + note.deinit(gpa); + } + gpa.free(self.notes); + gpa.free(self.msg); + } + }; + pub const LazySymbol = struct { pub const Kind = enum { code, const_data }; diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 440c26260a..2b765ab6b9 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -52,8 +52,8 @@ const Value = @import("../value.zig").Value; pub const DebugSymbols = @import("MachO/DebugSymbols.zig"); -const Bind = @import("MachO/dyld_info/bind.zig").Bind(*const MachO, MachO.SymbolWithLoc); -const LazyBind = @import("MachO/dyld_info/bind.zig").LazyBind(*const MachO, MachO.SymbolWithLoc); +const Bind = @import("MachO/dyld_info/bind.zig").Bind(*const MachO, SymbolWithLoc); +const LazyBind = @import("MachO/dyld_info/bind.zig").LazyBind(*const MachO, SymbolWithLoc); const Rebase = @import("MachO/dyld_info/Rebase.zig"); pub const base_tag: File.Tag = File.Tag.macho; @@ -154,6 +154,7 @@ got_table: TableSection(SymbolWithLoc) = .{}, stub_table: TableSection(SymbolWithLoc) = .{}, error_flags: File.ErrorFlags = File.ErrorFlags{}, +misc_errors: std.ArrayListUnmanaged(File.ErrorMsg) = .{}, segment_table_dirty: bool = false, got_table_count_dirty: bool = false, @@ -295,6 +296,12 @@ pub const SymbolWithLoc = extern struct { } }; +pub const SymbolResolver = struct { + arena: Allocator, + table: std.StringHashMap(u32), + unresolved: std.AutoArrayHashMap(u32, void), +}; + const HotUpdateState = struct { mach_task: ?std.os.darwin.MachTask = null, }; @@ -1856,6 +1863,11 @@ pub fn deinit(self: *MachO) void { bindings.deinit(gpa); } self.bindings.deinit(gpa); + + for (self.misc_errors.items) |*err| { + err.deinit(gpa); + } + self.misc_errors.deinit(gpa); } fn freeAtom(self: *MachO, atom_index: Atom.Index) void { @@ -4021,6 +4033,38 @@ pub inline fn getPageSize(cpu_arch: std.Target.Cpu.Arch) u16 { }; } +pub fn reportUndefined(self: *MachO, ctx: anytype, resolver: *const SymbolResolver) !void { + const count = resolver.unresolved.count(); + if (count == 0) return; + + const gpa = self.base.allocator; + + try self.misc_errors.ensureUnusedCapacity(gpa, count); + + for (resolver.unresolved.keys()) |global_index| { + const global = ctx.globals.items[global_index]; + const sym_name = ctx.getSymbolName(global); + + const nnotes: usize = if (global.getFile() == null) @as(usize, 0) else 1; + var notes = try std.ArrayList(File.ErrorMsg).initCapacity(gpa, nnotes); + defer notes.deinit(); + + if (global.getFile()) |file| { + const note = try std.fmt.allocPrint(gpa, "referenced in {s}", .{ctx.objects.items[file].name}); + notes.appendAssumeCapacity(.{ .msg = note }); + } + + var err_msg = File.ErrorMsg{ + .msg = try std.fmt.allocPrint(gpa, "undefined reference to symbol {s}", .{sym_name}), + }; + err_msg.notes = try notes.toOwnedSlice(); + + self.misc_errors.appendAssumeCapacity(err_msg); + } + + return error.FlushFailure; +} + /// Binary search pub fn bsearch(comptime T: type, haystack: []align(1) const T, predicate: anytype) usize { if (!@hasDecl(@TypeOf(predicate), "predicate")) diff --git a/src/link/MachO/dead_strip.zig b/src/link/MachO/dead_strip.zig index 42d1604a0d..282e3a21c7 100644 --- a/src/link/MachO/dead_strip.zig +++ b/src/link/MachO/dead_strip.zig @@ -13,7 +13,7 @@ const AtomIndex = @import("zld.zig").AtomIndex; const Atom = @import("ZldAtom.zig"); const MachO = @import("../MachO.zig"); const SymbolWithLoc = MachO.SymbolWithLoc; -const SymbolResolver = @import("zld.zig").SymbolResolver; +const SymbolResolver = MachO.SymbolResolver; const UnwindInfo = @import("UnwindInfo.zig"); const Zld = @import("zld.zig").Zld; diff --git a/src/link/MachO/zld.zig b/src/link/MachO/zld.zig index 5307a3ac02..9e2271bccf 100644 --- a/src/link/MachO/zld.zig +++ b/src/link/MachO/zld.zig @@ -33,6 +33,7 @@ const LibStub = @import("../tapi.zig").LibStub; const Object = @import("Object.zig"); const StringTable = @import("../strtab.zig").StringTable; const SymbolWithLoc = MachO.SymbolWithLoc; +const SymbolResolver = MachO.SymbolResolver; const Trie = @import("Trie.zig"); const UnwindInfo = @import("UnwindInfo.zig"); @@ -788,7 +789,6 @@ pub const Zld = struct { const global_index = resolver.unresolved.keys()[next_sym]; const global = self.globals.items[global_index]; const sym = self.getSymbolPtr(global); - const sym_name = self.getSymbolName(global); if (sym.discarded()) { sym.* = .{ @@ -811,11 +811,6 @@ pub const Zld = struct { continue; } - log.err("undefined reference to symbol '{s}'", .{sym_name}); - if (global.getFile()) |file| { - log.err(" first referenced in '{s}'", .{self.objects.items[file].name}); - } - next_sym += 1; } } @@ -3022,12 +3017,6 @@ const IndirectPointer = struct { } }; -pub const SymbolResolver = struct { - arena: Allocator, - table: std.StringHashMap(u32), - unresolved: std.AutoArrayHashMap(u32, void), -}; - pub fn linkWithZld(macho_file: *MachO, comp: *Compilation, prog_node: *std.Progress.Node) link.File.FlushError!void { const tracy = trace(@src()); defer tracy.end(); @@ -3419,10 +3408,7 @@ pub fn linkWithZld(macho_file: *MachO, comp: *Compilation, prog_node: *std.Progr .unresolved = std.AutoArrayHashMap(u32, void).init(arena), }; try zld.resolveSymbols(&resolver); - - if (resolver.unresolved.count() > 0) { - return error.UndefinedSymbolReference; - } + try macho_file.reportUndefined(&zld, &resolver); if (options.output_mode == .Exe) { const entry_name = options.entry orelse load_commands.default_entry_point; From e1e0ccb0c7ac32024aeb079a6cb57e237f941473 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Mon, 21 Aug 2023 22:17:34 +0200 Subject: [PATCH 06/57] macho: unify Section concept across drivers --- src/link/MachO.zig | 6 ++--- src/link/MachO/Object.zig | 10 +++---- src/link/MachO/dead_strip.zig | 4 +-- src/link/MachO/thunks.zig | 2 +- src/link/MachO/zld.zig | 51 +++++++++++++---------------------- 5 files changed, 27 insertions(+), 46 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 2b765ab6b9..05b6bcef5a 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -69,12 +69,10 @@ pub const Mode = enum { zld, }; -const Section = struct { +pub const Section = struct { header: macho.section_64, segment_index: u8, - - // TODO is null here necessary, or can we do away with tracking via section - // size in incremental context? + first_atom_index: ?Atom.Index = null, last_atom_index: ?Atom.Index = null, /// A list of atoms that have surplus capacity. This list can have false diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index 5e67a334c0..e407b83017 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -55,7 +55,7 @@ source_section_index_lookup: []Entry = undefined, /// Can be undefined as set together with in_symtab. strtab_lookup: []u32 = undefined, /// Can be undefined as set together with in_symtab. -atom_by_index_table: []AtomIndex = undefined, +atom_by_index_table: []?AtomIndex = undefined, /// Can be undefined as set together with in_symtab. globals_lookup: []i64 = undefined, /// Can be undefined as set together with in_symtab. @@ -156,7 +156,7 @@ pub fn parse(self: *Object, allocator: Allocator) !void { self.reverse_symtab_lookup = try allocator.alloc(u32, self.in_symtab.?.len); self.strtab_lookup = try allocator.alloc(u32, self.in_symtab.?.len); self.globals_lookup = try allocator.alloc(i64, self.in_symtab.?.len); - self.atom_by_index_table = try allocator.alloc(AtomIndex, self.in_symtab.?.len + nsects); + self.atom_by_index_table = try allocator.alloc(?AtomIndex, self.in_symtab.?.len + nsects); self.relocs_lookup = try allocator.alloc(Entry, self.in_symtab.?.len + nsects); // This is wasteful but we need to be able to lookup source symbol address after stripping and // allocating of sections. @@ -174,7 +174,7 @@ pub fn parse(self: *Object, allocator: Allocator) !void { } @memset(self.globals_lookup, -1); - @memset(self.atom_by_index_table, 0); + @memset(self.atom_by_index_table, null); @memset(self.source_section_index_lookup, .{}); @memset(self.relocs_lookup, .{}); @@ -1060,9 +1060,7 @@ pub fn getGlobal(self: Object, sym_index: u32) ?u32 { } pub fn getAtomIndexForSymbol(self: Object, sym_index: u32) ?AtomIndex { - const atom_index = self.atom_by_index_table[sym_index]; - if (atom_index == 0) return null; - return atom_index; + return self.atom_by_index_table[sym_index]; } pub fn hasUnwindRecords(self: Object) bool { diff --git a/src/link/MachO/dead_strip.zig b/src/link/MachO/dead_strip.zig index 282e3a21c7..b2031590ed 100644 --- a/src/link/MachO/dead_strip.zig +++ b/src/link/MachO/dead_strip.zig @@ -466,8 +466,8 @@ fn prune(zld: *Zld, alive: AtomTable) void { section.last_atom_index = prev_index; } else { assert(section.header.size == 0); - section.first_atom_index = 0; - section.last_atom_index = 0; + section.first_atom_index = null; + section.last_atom_index = null; } } diff --git a/src/link/MachO/thunks.zig b/src/link/MachO/thunks.zig index 66ae57e970..e5ef305077 100644 --- a/src/link/MachO/thunks.zig +++ b/src/link/MachO/thunks.zig @@ -75,7 +75,7 @@ pub fn createThunks(zld: *Zld, sect_id: u8) !void { if (header.size == 0) return; const gpa = zld.gpa; - const first_atom_index = zld.sections.items(.first_atom_index)[sect_id]; + const first_atom_index = zld.sections.items(.first_atom_index)[sect_id].?; header.size = 0; header.@"align" = 0; diff --git a/src/link/MachO/zld.zig b/src/link/MachO/zld.zig index 9e2271bccf..d82e80a20a 100644 --- a/src/link/MachO/zld.zig +++ b/src/link/MachO/zld.zig @@ -31,6 +31,7 @@ const MachO = @import("../MachO.zig"); const Md5 = std.crypto.hash.Md5; const LibStub = @import("../tapi.zig").LibStub; const Object = @import("Object.zig"); +const Section = MachO.Section; const StringTable = @import("../strtab.zig").StringTable; const SymbolWithLoc = MachO.SymbolWithLoc; const SymbolResolver = MachO.SymbolResolver; @@ -231,7 +232,7 @@ pub const Zld = struct { const sym = self.getSymbol(atom.getSymbolWithLoc()); var section = self.sections.get(sym.n_sect - 1); if (section.header.size > 0) { - const last_atom = self.getAtomPtr(section.last_atom_index); + const last_atom = self.getAtomPtr(section.last_atom_index.?); last_atom.next_index = atom_index; atom.prev_index = section.last_atom_index; } else { @@ -445,7 +446,7 @@ pub const Zld = struct { fn writeLazyPointer(self: *Zld, stub_helper_index: u32, writer: anytype) !void { const target_addr = blk: { const sect_id = self.getSectionByName("__TEXT", "__stub_helper").?; - var atom_index = self.sections.items(.first_atom_index)[sect_id]; + var atom_index = self.sections.items(.first_atom_index)[sect_id].?; var count: u32 = 0; while (count < stub_helper_index + 1) : (count += 1) { const atom = self.getAtom(atom_index); @@ -497,7 +498,7 @@ pub const Zld = struct { const target_addr = blk: { // TODO: cache this at stub atom creation; they always go in pairs anyhow const la_sect_id = self.getSectionByName("__DATA", "__la_symbol_ptr").?; - var la_atom_index = self.sections.items(.first_atom_index)[la_sect_id]; + var la_atom_index = self.sections.items(.first_atom_index)[la_sect_id].?; var count: u32 = 0; while (count < stub_index) : (count += 1) { const la_atom = self.getAtom(la_atom_index); @@ -1012,11 +1013,10 @@ pub const Zld = struct { for (slice.items(.first_atom_index), 0..) |first_atom_index, sect_id| { const header = slice.items(.header)[sect_id]; - var atom_index = first_atom_index; - - if (atom_index == 0) continue; if (header.isZerofill()) continue; + var atom_index = first_atom_index orelse continue; + var buffer = std.ArrayList(u8).init(gpa); defer buffer.deinit(); try buffer.ensureTotalCapacity(math.cast(usize, header.size) orelse return error.Overflow); @@ -1129,7 +1129,7 @@ pub const Zld = struct { while (i < slice.len) : (i += 1) { const section = self.sections.get(i); if (section.header.size == 0) { - log.debug("pruning section {s},{s} {d}", .{ + log.debug("pruning section {s},{s} {?d}", .{ section.header.segName(), section.header.sectName(), section.first_atom_index, @@ -1156,8 +1156,7 @@ pub const Zld = struct { if (header.isCode() and !(header.type() == macho.S_SYMBOL_STUBS) and !mem.eql(u8, header.sectName(), "__stub_helper")) continue; } - var atom_index = slice.items(.first_atom_index)[sect_id]; - if (atom_index == 0) continue; + var atom_index = slice.items(.first_atom_index)[sect_id] orelse continue; header.size = 0; header.@"align" = 0; @@ -1195,8 +1194,7 @@ pub const Zld = struct { // We need to do this since our unwind info synthesiser relies on // traversing the symbols when synthesising unwind info and DWARF CFI records. for (slice.items(.first_atom_index)) |first_atom_index| { - if (first_atom_index == 0) continue; - var atom_index = first_atom_index; + var atom_index = first_atom_index orelse continue; while (true) { const atom = self.getAtom(atom_index); @@ -1278,8 +1276,9 @@ pub const Zld = struct { @as(u32, @intCast(segment.fileoff + start_aligned)); header.addr = segment.vmaddr + start_aligned; - var atom_index = slice.items(.first_atom_index)[indexes.start + sect_id]; - if (atom_index > 0) { + if (slice.items(.first_atom_index)[indexes.start + sect_id]) |first_atom_index| { + var atom_index = first_atom_index; + log.debug("allocating local symbols in sect({d}, '{s},{s}')", .{ n_sect, header.segName(), @@ -1362,8 +1361,6 @@ pub const Zld = struct { .reserved1 = opts.reserved1, .reserved2 = opts.reserved2, }, - .first_atom_index = 0, - .last_atom_index = 0, }); return index; } @@ -1491,7 +1488,7 @@ pub const Zld = struct { if (self.getSectionByName("__DATA", "__la_symbol_ptr")) |sect_id| { const segment_index = slice.items(.segment_index)[sect_id]; const seg = self.getSegment(sect_id); - var atom_index = slice.items(.first_atom_index)[sect_id]; + var atom_index = slice.items(.first_atom_index)[sect_id].?; try rebase.entries.ensureUnusedCapacity(self.gpa, self.stubs.items.len); @@ -1531,8 +1528,7 @@ pub const Zld = struct { log.debug("{s},{s}", .{ header.segName(), header.sectName() }); const cpu_arch = self.options.target.cpu.arch; - var atom_index = slice.items(.first_atom_index)[sect_id]; - if (atom_index == 0) continue; + var atom_index = slice.items(.first_atom_index)[sect_id] orelse continue; while (true) { const atom = self.getAtom(atom_index); @@ -1668,8 +1664,7 @@ pub const Zld = struct { if (segment.maxprot & macho.PROT.WRITE == 0) continue; const cpu_arch = self.options.target.cpu.arch; - var atom_index = slice.items(.first_atom_index)[sect_id]; - if (atom_index == 0) continue; + var atom_index = slice.items(.first_atom_index)[sect_id] orelse continue; log.debug("{s},{s}", .{ header.segName(), header.sectName() }); @@ -1757,7 +1752,7 @@ pub const Zld = struct { const slice = self.sections.slice(); const segment_index = slice.items(.segment_index)[sect_id]; const seg = self.getSegment(sect_id); - var atom_index = slice.items(.first_atom_index)[sect_id]; + var atom_index = slice.items(.first_atom_index)[sect_id].?; // TODO: we actually don't need to store lazy pointer atoms as they are synthetically generated by the linker try lazy_bind.entries.ensureUnusedCapacity(self.gpa, self.stubs.items.len); @@ -1920,7 +1915,7 @@ pub const Zld = struct { const section = self.sections.get(stub_helper_section_index); const stub_offset = stub_helpers.calcStubOffsetInStubHelper(self.options.target.cpu.arch); const header = section.header; - var atom_index = section.first_atom_index; + var atom_index = section.first_atom_index.?; atom_index = self.getAtom(atom_index).next_index.?; // skip preamble var index: usize = 0; @@ -2923,9 +2918,7 @@ pub const Zld = struct { log.debug("atoms:", .{}); const slice = self.sections.slice(); for (slice.items(.first_atom_index), 0..) |first_atom_index, sect_id| { - var atom_index = first_atom_index; - if (atom_index == 0) continue; - + var atom_index = first_atom_index orelse continue; const header = slice.items(.header)[sect_id]; log.debug("{s},{s}", .{ header.segName(), header.sectName() }); @@ -2990,13 +2983,6 @@ pub const Zld = struct { pub const N_DEAD: u16 = @as(u16, @bitCast(@as(i16, -1))); -const Section = struct { - header: macho.section_64, - segment_index: u8, - first_atom_index: AtomIndex, - last_atom_index: AtomIndex, -}; - pub const AtomIndex = u32; const IndirectPointer = struct { @@ -3183,7 +3169,6 @@ pub fn linkWithZld(macho_file: *MachO, comp: *Compilation, prog_node: *std.Progr }; defer zld.deinit(); - try zld.atoms.append(gpa, Atom.empty); // AtomIndex at 0 is reserved as null atom try zld.strtab.buffer.append(gpa, 0); // Positional arguments to the linker such as object files and static archives. From 85f2df5050fd6cec25bd251159795bb0fa0f8731 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Mon, 21 Aug 2023 22:41:40 +0200 Subject: [PATCH 07/57] macho: unify more of the linker state --- src/link/MachO.zig | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 05b6bcef5a..e6f7bd4828 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -106,7 +106,9 @@ mode: Mode, dyld_info_cmd: macho.dyld_info_command = .{}, symtab_cmd: macho.symtab_command = .{}, dysymtab_cmd: macho.dysymtab_command = .{}, -uuid_cmd: macho.uuid_command = .{}, +function_starts_cmd: macho.linkedit_data_command = .{ .cmd = .FUNCTION_STARTS }, +data_in_code_cmd: macho.linkedit_data_command = .{ .cmd = .DATA_IN_CODE }, +uuid_cmd: macho.uuid_command = .{ .uuid = [_]u8{0} ** 16 }, codesig_cmd: macho.linkedit_data_command = .{ .cmd = .CODE_SIGNATURE }, objects: std.ArrayListUnmanaged(Object) = .{}, From da9e7e498af411de477a7b59e72ef763b22f4f5a Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Tue, 22 Aug 2023 14:03:45 +0200 Subject: [PATCH 08/57] macho: unify Atom concept between drivers --- CMakeLists.txt | 1 - src/link/MachO.zig | 3 + src/link/MachO/Atom.zig | 967 ++++++++++++++++++++++++++++++- src/link/MachO/Object.zig | 2 +- src/link/MachO/UnwindInfo.zig | 2 +- src/link/MachO/ZldAtom.zig | 1012 --------------------------------- src/link/MachO/dead_strip.zig | 2 +- src/link/MachO/eh_frame.zig | 2 +- src/link/MachO/thunks.zig | 2 +- src/link/MachO/zld.zig | 21 +- 10 files changed, 980 insertions(+), 1034 deletions(-) delete mode 100644 src/link/MachO/ZldAtom.zig diff --git a/CMakeLists.txt b/CMakeLists.txt index d47c020ca8..3b7c1b30b4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -588,7 +588,6 @@ set(ZIG_STAGE2_SOURCES "${CMAKE_SOURCE_DIR}/src/link/MachO/Relocation.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/Trie.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/UnwindInfo.zig" - "${CMAKE_SOURCE_DIR}/src/link/MachO/ZldAtom.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/dyld_info/bind.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/dyld_info/Rebase.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/dead_strip.zig" diff --git a/src/link/MachO.zig b/src/link/MachO.zig index e6f7bd4828..f0db232e4b 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -1597,8 +1597,11 @@ pub fn createAtom(self: *MachO) !Atom.Index { try self.atom_by_index_table.putNoClobber(gpa, sym_index, atom_index); atom.* = .{ .sym_index = sym_index, + .inner_sym_index = 0, + .inner_nsyms_trailing = 0, .file = 0, .size = 0, + .alignment = 0, .prev_index = null, .next_index = null, }; diff --git a/src/link/MachO/Atom.zig b/src/link/MachO/Atom.zig index 7dc97003aa..73d90eb532 100644 --- a/src/link/MachO/Atom.zig +++ b/src/link/MachO/Atom.zig @@ -16,12 +16,13 @@ const Arch = std.Target.Cpu.Arch; const MachO = @import("../MachO.zig"); pub const Relocation = @import("Relocation.zig"); const SymbolWithLoc = MachO.SymbolWithLoc; +const Zld = @import("zld.zig").Zld; /// Each Atom always gets a symbol with the fully qualified name. /// The symbol can reside in any object file context structure in `symtab` array /// (see `Object`), or if the symbol is a synthetic symbol such as a GOT cell or /// a stub trampoline, it can be found in the linkers `locals` arraylist. -/// If this field is 0, it means the codegen size = 0 and there is no symbol or +/// If this field is 0 and file is 0, it means the codegen size = 0 and there is no symbol or /// offset table entry. sym_index: u32, @@ -31,11 +32,24 @@ sym_index: u32, /// the field directly. file: u32, +/// If this Atom is not a synthetic Atom, i.e., references a subsection in an +/// Object file, `inner_sym_index` and `inner_nsyms_trailing` tell where and if +/// this Atom contains any additional symbol references that fall within this Atom's +/// address range. These could for example be an alias symbol which can be used +/// internally by the relocation records, or if the Object file couldn't be split +/// into subsections, this Atom may encompass an entire input section. +inner_sym_index: u32, +inner_nsyms_trailing: u32, + /// Size and alignment of this atom /// Unlike in Elf, we need to store the size of this symbol as part of /// the atom since macho.nlist_64 lacks this information. size: u64, +/// Alignment of this atom as a power of 2. +/// For instance, aligmment of 0 should be read as 2^0 = 1 byte aligned. +alignment: u32, + /// Points to the previous and next neighbours /// TODO use the same trick as with symbols: reserve index 0 as null atom next_index: ?Index, @@ -48,13 +62,15 @@ pub const Binding = struct { offset: u64, }; -pub const SymbolAtOffset = struct { - sym_index: u32, - offset: u64, -}; +/// Returns `null` if the Atom is a synthetic Atom. +/// Otherwise, returns an index into an array of Objects. +pub fn getFile(self: Atom) ?u32 { + if (self.file == 0) return null; + return self.file - 1; +} pub fn getSymbolIndex(self: Atom) ?u32 { - if (self.sym_index == 0) return null; + if (self.getFile() == null and self.sym_index == 0) return null; return self.sym_index; } @@ -66,10 +82,7 @@ pub fn getSymbol(self: Atom, macho_file: *MachO) macho.nlist_64 { /// Returns pointer-to-symbol referencing this atom. pub fn getSymbolPtr(self: Atom, macho_file: *MachO) *macho.nlist_64 { const sym_index = self.getSymbolIndex().?; - return macho_file.getSymbolPtr(.{ - .sym_index = sym_index, - .file = self.file, - }); + return macho_file.getSymbolPtr(.{ .sym_index = sym_index, .file = self.file }); } pub fn getSymbolWithLoc(self: Atom) SymbolWithLoc { @@ -80,10 +93,7 @@ pub fn getSymbolWithLoc(self: Atom) SymbolWithLoc { /// Returns the name of this atom. pub fn getName(self: Atom, macho_file: *MachO) []const u8 { const sym_index = self.getSymbolIndex().?; - return macho_file.getSymbolName(.{ - .sym_index = sym_index, - .file = self.file, - }); + return macho_file.getSymbolName(.{ .sym_index = sym_index, .file = self.file }); } /// Returns how much room there is to grow in virtual address space. @@ -182,3 +192,932 @@ pub fn freeRelocations(macho_file: *MachO, atom_index: Index) void { var removed_bindings = macho_file.bindings.fetchOrderedRemove(atom_index); if (removed_bindings) |*bindings| bindings.value.deinit(gpa); } + +const InnerSymIterator = struct { + sym_index: u32, + nsyms: u32, + file: u32, + pos: u32 = 0, + + pub fn next(it: *@This()) ?SymbolWithLoc { + if (it.pos == it.nsyms) return null; + const res = SymbolWithLoc{ .sym_index = it.sym_index + it.pos, .file = it.file }; + it.pos += 1; + return res; + } +}; + +/// Returns an iterator over potentially contained symbols. +/// Panics when called on a synthetic Atom. +pub fn getInnerSymbolsIterator(zld: *Zld, atom_index: Index) InnerSymIterator { + const atom = zld.getAtom(atom_index); + assert(atom.getFile() != null); + return .{ + .sym_index = atom.inner_sym_index, + .nsyms = atom.inner_nsyms_trailing, + .file = atom.file, + }; +} + +/// Returns a section alias symbol if one is defined. +/// An alias symbol is used to represent the start of an input section +/// if there were no symbols defined within that range. +/// Alias symbols are only used on x86_64. +pub fn getSectionAlias(zld: *Zld, atom_index: Index) ?SymbolWithLoc { + const atom = zld.getAtom(atom_index); + assert(atom.getFile() != null); + + const object = zld.objects.items[atom.getFile().?]; + const nbase = @as(u32, @intCast(object.in_symtab.?.len)); + const ntotal = @as(u32, @intCast(object.symtab.len)); + var sym_index: u32 = nbase; + while (sym_index < ntotal) : (sym_index += 1) { + if (object.getAtomIndexForSymbol(sym_index)) |other_atom_index| { + if (other_atom_index == atom_index) return SymbolWithLoc{ + .sym_index = sym_index, + .file = atom.file, + }; + } + } + return null; +} + +/// Given an index into a contained symbol within, calculates an offset wrt +/// the start of this Atom. +pub fn calcInnerSymbolOffset(zld: *Zld, atom_index: Index, sym_index: u32) u64 { + const atom = zld.getAtom(atom_index); + assert(atom.getFile() != null); + + if (atom.sym_index == sym_index) return 0; + + const object = zld.objects.items[atom.getFile().?]; + const source_sym = object.getSourceSymbol(sym_index).?; + const base_addr = if (object.getSourceSymbol(atom.sym_index)) |sym| + sym.n_value + else blk: { + const nbase = @as(u32, @intCast(object.in_symtab.?.len)); + const sect_id = @as(u8, @intCast(atom.sym_index - nbase)); + const source_sect = object.getSourceSection(sect_id); + break :blk source_sect.addr; + }; + return source_sym.n_value - base_addr; +} + +pub fn scanAtomRelocs(zld: *Zld, atom_index: Index, relocs: []align(1) const macho.relocation_info) !void { + const arch = zld.options.target.cpu.arch; + const atom = zld.getAtom(atom_index); + assert(atom.getFile() != null); // synthetic atoms do not have relocs + + return switch (arch) { + .aarch64 => scanAtomRelocsArm64(zld, atom_index, relocs), + .x86_64 => scanAtomRelocsX86(zld, atom_index, relocs), + else => unreachable, + }; +} + +const RelocContext = struct { + base_addr: i64 = 0, + base_offset: i32 = 0, +}; + +pub fn getRelocContext(zld: *Zld, atom_index: Index) RelocContext { + const atom = zld.getAtom(atom_index); + assert(atom.getFile() != null); // synthetic atoms do not have relocs + + const object = zld.objects.items[atom.getFile().?]; + if (object.getSourceSymbol(atom.sym_index)) |source_sym| { + const source_sect = object.getSourceSection(source_sym.n_sect - 1); + return .{ + .base_addr = @as(i64, @intCast(source_sect.addr)), + .base_offset = @as(i32, @intCast(source_sym.n_value - source_sect.addr)), + }; + } + const nbase = @as(u32, @intCast(object.in_symtab.?.len)); + const sect_id = @as(u8, @intCast(atom.sym_index - nbase)); + const source_sect = object.getSourceSection(sect_id); + return .{ + .base_addr = @as(i64, @intCast(source_sect.addr)), + .base_offset = 0, + }; +} + +pub fn parseRelocTarget(zld: *Zld, ctx: struct { + object_id: u32, + rel: macho.relocation_info, + code: []const u8, + base_addr: i64 = 0, + base_offset: i32 = 0, +}) SymbolWithLoc { + const tracy = trace(@src()); + defer tracy.end(); + + const object = &zld.objects.items[ctx.object_id]; + log.debug("parsing reloc target in object({d}) '{s}' ", .{ ctx.object_id, object.name }); + + const sym_index = if (ctx.rel.r_extern == 0) sym_index: { + const sect_id = @as(u8, @intCast(ctx.rel.r_symbolnum - 1)); + const rel_offset = @as(u32, @intCast(ctx.rel.r_address - ctx.base_offset)); + + const address_in_section = if (ctx.rel.r_pcrel == 0) blk: { + break :blk if (ctx.rel.r_length == 3) + mem.readIntLittle(u64, ctx.code[rel_offset..][0..8]) + else + mem.readIntLittle(u32, ctx.code[rel_offset..][0..4]); + } else blk: { + assert(zld.options.target.cpu.arch == .x86_64); + const correction: u3 = switch (@as(macho.reloc_type_x86_64, @enumFromInt(ctx.rel.r_type))) { + .X86_64_RELOC_SIGNED => 0, + .X86_64_RELOC_SIGNED_1 => 1, + .X86_64_RELOC_SIGNED_2 => 2, + .X86_64_RELOC_SIGNED_4 => 4, + else => unreachable, + }; + const addend = mem.readIntLittle(i32, ctx.code[rel_offset..][0..4]); + const target_address = @as(i64, @intCast(ctx.base_addr)) + ctx.rel.r_address + 4 + correction + addend; + break :blk @as(u64, @intCast(target_address)); + }; + + // Find containing atom + log.debug(" | locating symbol by address @{x} in section {d}", .{ address_in_section, sect_id }); + break :sym_index object.getSymbolByAddress(address_in_section, sect_id); + } else object.reverse_symtab_lookup[ctx.rel.r_symbolnum]; + + const sym_loc = SymbolWithLoc{ .sym_index = sym_index, .file = ctx.object_id + 1 }; + const sym = zld.getSymbol(sym_loc); + const target = if (sym.sect() and !sym.ext()) + sym_loc + else if (object.getGlobal(sym_index)) |global_index| + zld.globals.items[global_index] + else + sym_loc; + log.debug(" | target %{d} ('{s}') in object({?d})", .{ + target.sym_index, + zld.getSymbolName(target), + target.getFile(), + }); + return target; +} + +pub fn getRelocTargetAtomIndex(zld: *Zld, target: SymbolWithLoc, is_via_got: bool) ?Index { + if (is_via_got) { + return zld.getGotAtomIndexForSymbol(target).?; // panic means fatal error + } + if (zld.getStubsAtomIndexForSymbol(target)) |stubs_atom| return stubs_atom; + if (zld.getTlvPtrAtomIndexForSymbol(target)) |tlv_ptr_atom| return tlv_ptr_atom; + + if (target.getFile() == null) { + const target_sym_name = zld.getSymbolName(target); + if (mem.eql(u8, "__mh_execute_header", target_sym_name)) return null; + if (mem.eql(u8, "___dso_handle", target_sym_name)) return null; + + unreachable; // referenced symbol not found + } + + const object = zld.objects.items[target.getFile().?]; + return object.getAtomIndexForSymbol(target.sym_index); +} + +fn scanAtomRelocsArm64(zld: *Zld, atom_index: Index, relocs: []align(1) const macho.relocation_info) !void { + for (relocs) |rel| { + const rel_type = @as(macho.reloc_type_arm64, @enumFromInt(rel.r_type)); + + switch (rel_type) { + .ARM64_RELOC_ADDEND, .ARM64_RELOC_SUBTRACTOR => continue, + else => {}, + } + + if (rel.r_extern == 0) continue; + + const atom = zld.getAtom(atom_index); + const object = &zld.objects.items[atom.getFile().?]; + const sym_index = object.reverse_symtab_lookup[rel.r_symbolnum]; + const sym_loc = SymbolWithLoc{ + .sym_index = sym_index, + .file = atom.file, + }; + + const target = if (object.getGlobal(sym_index)) |global_index| + zld.globals.items[global_index] + else + sym_loc; + + switch (rel_type) { + .ARM64_RELOC_BRANCH26 => { + // TODO rewrite relocation + try addStub(zld, target); + }, + .ARM64_RELOC_GOT_LOAD_PAGE21, + .ARM64_RELOC_GOT_LOAD_PAGEOFF12, + .ARM64_RELOC_POINTER_TO_GOT, + => { + // TODO rewrite relocation + try addGotEntry(zld, target); + }, + .ARM64_RELOC_TLVP_LOAD_PAGE21, + .ARM64_RELOC_TLVP_LOAD_PAGEOFF12, + => { + try addTlvPtrEntry(zld, target); + }, + else => {}, + } + } +} + +fn scanAtomRelocsX86(zld: *Zld, atom_index: Index, relocs: []align(1) const macho.relocation_info) !void { + for (relocs) |rel| { + const rel_type = @as(macho.reloc_type_x86_64, @enumFromInt(rel.r_type)); + + switch (rel_type) { + .X86_64_RELOC_SUBTRACTOR => continue, + else => {}, + } + + if (rel.r_extern == 0) continue; + + const atom = zld.getAtom(atom_index); + const object = &zld.objects.items[atom.getFile().?]; + const sym_index = object.reverse_symtab_lookup[rel.r_symbolnum]; + const sym_loc = SymbolWithLoc{ + .sym_index = sym_index, + .file = atom.file, + }; + + const target = if (object.getGlobal(sym_index)) |global_index| + zld.globals.items[global_index] + else + sym_loc; + + switch (rel_type) { + .X86_64_RELOC_BRANCH => { + // TODO rewrite relocation + try addStub(zld, target); + }, + .X86_64_RELOC_GOT, .X86_64_RELOC_GOT_LOAD => { + // TODO rewrite relocation + try addGotEntry(zld, target); + }, + .X86_64_RELOC_TLV => { + try addTlvPtrEntry(zld, target); + }, + else => {}, + } + } +} + +fn addTlvPtrEntry(zld: *Zld, target: SymbolWithLoc) !void { + const target_sym = zld.getSymbol(target); + if (!target_sym.undf()) return; + if (zld.tlv_ptr_table.contains(target)) return; + + const gpa = zld.gpa; + const atom_index = try zld.createTlvPtrAtom(); + const tlv_ptr_index = @as(u32, @intCast(zld.tlv_ptr_entries.items.len)); + try zld.tlv_ptr_entries.append(gpa, .{ + .target = target, + .atom_index = atom_index, + }); + try zld.tlv_ptr_table.putNoClobber(gpa, target, tlv_ptr_index); +} + +pub fn addGotEntry(zld: *Zld, target: SymbolWithLoc) !void { + if (zld.got_table.contains(target)) return; + const gpa = zld.gpa; + const atom_index = try zld.createGotAtom(); + const got_index = @as(u32, @intCast(zld.got_entries.items.len)); + try zld.got_entries.append(gpa, .{ + .target = target, + .atom_index = atom_index, + }); + try zld.got_table.putNoClobber(gpa, target, got_index); +} + +pub fn addStub(zld: *Zld, target: SymbolWithLoc) !void { + const target_sym = zld.getSymbol(target); + if (!target_sym.undf()) return; + if (zld.stubs_table.contains(target)) return; + + const gpa = zld.gpa; + _ = try zld.createStubHelperAtom(); + _ = try zld.createLazyPointerAtom(); + const atom_index = try zld.createStubAtom(); + const stubs_index = @as(u32, @intCast(zld.stubs.items.len)); + try zld.stubs.append(gpa, .{ + .target = target, + .atom_index = atom_index, + }); + try zld.stubs_table.putNoClobber(gpa, target, stubs_index); +} + +pub fn resolveRelocs( + zld: *Zld, + atom_index: Index, + atom_code: []u8, + atom_relocs: []align(1) const macho.relocation_info, +) !void { + const arch = zld.options.target.cpu.arch; + const atom = zld.getAtom(atom_index); + assert(atom.getFile() != null); // synthetic atoms do not have relocs + + log.debug("resolving relocations in ATOM(%{d}, '{s}')", .{ + atom.sym_index, + zld.getSymbolName(atom.getSymbolWithLoc()), + }); + + const ctx = getRelocContext(zld, atom_index); + + return switch (arch) { + .aarch64 => resolveRelocsArm64(zld, atom_index, atom_code, atom_relocs, ctx), + .x86_64 => resolveRelocsX86(zld, atom_index, atom_code, atom_relocs, ctx), + else => unreachable, + }; +} + +pub fn getRelocTargetAddress(zld: *Zld, target: SymbolWithLoc, is_via_got: bool, is_tlv: bool) !u64 { + const target_atom_index = getRelocTargetAtomIndex(zld, target, is_via_got) orelse { + // If there is no atom for target, we still need to check for special, atom-less + // symbols such as `___dso_handle`. + const target_name = zld.getSymbolName(target); + const atomless_sym = zld.getSymbol(target); + log.debug(" | atomless target '{s}'", .{target_name}); + return atomless_sym.n_value; + }; + const target_atom = zld.getAtom(target_atom_index); + log.debug(" | target ATOM(%{d}, '{s}') in object({?})", .{ + target_atom.sym_index, + zld.getSymbolName(target_atom.getSymbolWithLoc()), + target_atom.getFile(), + }); + + const target_sym = zld.getSymbol(target_atom.getSymbolWithLoc()); + assert(target_sym.n_desc != @import("zld.zig").N_DEAD); + + // If `target` is contained within the target atom, pull its address value. + const offset = if (target_atom.getFile() != null) blk: { + const object = zld.objects.items[target_atom.getFile().?]; + break :blk if (object.getSourceSymbol(target.sym_index)) |_| + Atom.calcInnerSymbolOffset(zld, target_atom_index, target.sym_index) + else + 0; // section alias + } else 0; + const base_address: u64 = if (is_tlv) base_address: { + // For TLV relocations, the value specified as a relocation is the displacement from the + // TLV initializer (either value in __thread_data or zero-init in __thread_bss) to the first + // defined TLV template init section in the following order: + // * wrt to __thread_data if defined, then + // * wrt to __thread_bss + const sect_id: u16 = sect_id: { + if (zld.getSectionByName("__DATA", "__thread_data")) |i| { + break :sect_id i; + } else if (zld.getSectionByName("__DATA", "__thread_bss")) |i| { + break :sect_id i; + } else { + log.err("threadlocal variables present but no initializer sections found", .{}); + log.err(" __thread_data not found", .{}); + log.err(" __thread_bss not found", .{}); + return error.FailedToResolveRelocationTarget; + } + }; + break :base_address zld.sections.items(.header)[sect_id].addr; + } else 0; + return target_sym.n_value + offset - base_address; +} + +fn resolveRelocsArm64( + zld: *Zld, + atom_index: Index, + atom_code: []u8, + atom_relocs: []align(1) const macho.relocation_info, + context: RelocContext, +) !void { + const atom = zld.getAtom(atom_index); + const object = zld.objects.items[atom.getFile().?]; + + var addend: ?i64 = null; + var subtractor: ?SymbolWithLoc = null; + + for (atom_relocs) |rel| { + const rel_type = @as(macho.reloc_type_arm64, @enumFromInt(rel.r_type)); + + switch (rel_type) { + .ARM64_RELOC_ADDEND => { + assert(addend == null); + + log.debug(" RELA({s}) @ {x} => {x}", .{ @tagName(rel_type), rel.r_address, rel.r_symbolnum }); + + addend = rel.r_symbolnum; + continue; + }, + .ARM64_RELOC_SUBTRACTOR => { + assert(subtractor == null); + + log.debug(" RELA({s}) @ {x} => %{d} in object({?d})", .{ + @tagName(rel_type), + rel.r_address, + rel.r_symbolnum, + atom.getFile(), + }); + + subtractor = parseRelocTarget(zld, .{ + .object_id = atom.getFile().?, + .rel = rel, + .code = atom_code, + .base_addr = context.base_addr, + .base_offset = context.base_offset, + }); + continue; + }, + else => {}, + } + + const target = parseRelocTarget(zld, .{ + .object_id = atom.getFile().?, + .rel = rel, + .code = atom_code, + .base_addr = context.base_addr, + .base_offset = context.base_offset, + }); + const rel_offset = @as(u32, @intCast(rel.r_address - context.base_offset)); + + log.debug(" RELA({s}) @ {x} => %{d} ('{s}') in object({?})", .{ + @tagName(rel_type), + rel.r_address, + target.sym_index, + zld.getSymbolName(target), + target.getFile(), + }); + + const source_addr = blk: { + const source_sym = zld.getSymbol(atom.getSymbolWithLoc()); + break :blk source_sym.n_value + rel_offset; + }; + const is_via_got = relocRequiresGot(zld, rel); + const is_tlv = is_tlv: { + const source_sym = zld.getSymbol(atom.getSymbolWithLoc()); + const header = zld.sections.items(.header)[source_sym.n_sect - 1]; + break :is_tlv header.type() == macho.S_THREAD_LOCAL_VARIABLES; + }; + const target_addr = try getRelocTargetAddress(zld, target, is_via_got, is_tlv); + + log.debug(" | source_addr = 0x{x}", .{source_addr}); + + switch (rel_type) { + .ARM64_RELOC_BRANCH26 => { + const actual_target = if (zld.getStubsAtomIndexForSymbol(target)) |stub_atom_index| inner: { + const stub_atom = zld.getAtom(stub_atom_index); + break :inner stub_atom.getSymbolWithLoc(); + } else target; + log.debug(" source {s} (object({?})), target {s} (object({?}))", .{ + zld.getSymbolName(atom.getSymbolWithLoc()), + atom.getFile(), + zld.getSymbolName(target), + zld.getAtom(getRelocTargetAtomIndex(zld, target, is_via_got).?).getFile(), + }); + + const displacement = if (Relocation.calcPcRelativeDisplacementArm64( + source_addr, + zld.getSymbol(actual_target).n_value, + )) |disp| blk: { + log.debug(" | target_addr = 0x{x}", .{zld.getSymbol(actual_target).n_value}); + break :blk disp; + } else |_| blk: { + const thunk_index = zld.thunk_table.get(atom_index).?; + const thunk = zld.thunks.items[thunk_index]; + const thunk_sym = zld.getSymbol(thunk.getTrampolineForSymbol( + zld, + actual_target, + ).?); + log.debug(" | target_addr = 0x{x} (thunk)", .{thunk_sym.n_value}); + break :blk try Relocation.calcPcRelativeDisplacementArm64(source_addr, thunk_sym.n_value); + }; + + const code = atom_code[rel_offset..][0..4]; + var inst = aarch64.Instruction{ + .unconditional_branch_immediate = mem.bytesToValue(meta.TagPayload( + aarch64.Instruction, + aarch64.Instruction.unconditional_branch_immediate, + ), code), + }; + inst.unconditional_branch_immediate.imm26 = @as(u26, @truncate(@as(u28, @bitCast(displacement >> 2)))); + mem.writeIntLittle(u32, code, inst.toU32()); + }, + + .ARM64_RELOC_PAGE21, + .ARM64_RELOC_GOT_LOAD_PAGE21, + .ARM64_RELOC_TLVP_LOAD_PAGE21, + => { + const adjusted_target_addr = @as(u64, @intCast(@as(i64, @intCast(target_addr)) + (addend orelse 0))); + + log.debug(" | target_addr = 0x{x}", .{adjusted_target_addr}); + + const pages = @as(u21, @bitCast(Relocation.calcNumberOfPages(source_addr, adjusted_target_addr))); + const code = atom_code[rel_offset..][0..4]; + var inst = aarch64.Instruction{ + .pc_relative_address = mem.bytesToValue(meta.TagPayload( + aarch64.Instruction, + aarch64.Instruction.pc_relative_address, + ), code), + }; + inst.pc_relative_address.immhi = @as(u19, @truncate(pages >> 2)); + inst.pc_relative_address.immlo = @as(u2, @truncate(pages)); + mem.writeIntLittle(u32, code, inst.toU32()); + addend = null; + }, + + .ARM64_RELOC_PAGEOFF12 => { + const adjusted_target_addr = @as(u64, @intCast(@as(i64, @intCast(target_addr)) + (addend orelse 0))); + + log.debug(" | target_addr = 0x{x}", .{adjusted_target_addr}); + + const code = atom_code[rel_offset..][0..4]; + if (Relocation.isArithmeticOp(code)) { + const off = try Relocation.calcPageOffset(adjusted_target_addr, .arithmetic); + var inst = aarch64.Instruction{ + .add_subtract_immediate = mem.bytesToValue(meta.TagPayload( + aarch64.Instruction, + aarch64.Instruction.add_subtract_immediate, + ), code), + }; + inst.add_subtract_immediate.imm12 = off; + mem.writeIntLittle(u32, code, inst.toU32()); + } else { + var inst = aarch64.Instruction{ + .load_store_register = mem.bytesToValue(meta.TagPayload( + aarch64.Instruction, + aarch64.Instruction.load_store_register, + ), code), + }; + const off = try Relocation.calcPageOffset(adjusted_target_addr, switch (inst.load_store_register.size) { + 0 => if (inst.load_store_register.v == 1) + Relocation.PageOffsetInstKind.load_store_128 + else + Relocation.PageOffsetInstKind.load_store_8, + 1 => .load_store_16, + 2 => .load_store_32, + 3 => .load_store_64, + }); + inst.load_store_register.offset = off; + mem.writeIntLittle(u32, code, inst.toU32()); + } + addend = null; + }, + + .ARM64_RELOC_GOT_LOAD_PAGEOFF12 => { + const code = atom_code[rel_offset..][0..4]; + const adjusted_target_addr = @as(u64, @intCast(@as(i64, @intCast(target_addr)) + (addend orelse 0))); + + log.debug(" | target_addr = 0x{x}", .{adjusted_target_addr}); + + const off = try Relocation.calcPageOffset(adjusted_target_addr, .load_store_64); + var inst: aarch64.Instruction = .{ + .load_store_register = mem.bytesToValue(meta.TagPayload( + aarch64.Instruction, + aarch64.Instruction.load_store_register, + ), code), + }; + inst.load_store_register.offset = off; + mem.writeIntLittle(u32, code, inst.toU32()); + addend = null; + }, + + .ARM64_RELOC_TLVP_LOAD_PAGEOFF12 => { + const code = atom_code[rel_offset..][0..4]; + const adjusted_target_addr = @as(u64, @intCast(@as(i64, @intCast(target_addr)) + (addend orelse 0))); + + log.debug(" | target_addr = 0x{x}", .{adjusted_target_addr}); + + const RegInfo = struct { + rd: u5, + rn: u5, + size: u2, + }; + const reg_info: RegInfo = blk: { + if (Relocation.isArithmeticOp(code)) { + const inst = mem.bytesToValue(meta.TagPayload( + aarch64.Instruction, + aarch64.Instruction.add_subtract_immediate, + ), code); + break :blk .{ + .rd = inst.rd, + .rn = inst.rn, + .size = inst.sf, + }; + } else { + const inst = mem.bytesToValue(meta.TagPayload( + aarch64.Instruction, + aarch64.Instruction.load_store_register, + ), code); + break :blk .{ + .rd = inst.rt, + .rn = inst.rn, + .size = inst.size, + }; + } + }; + + var inst = if (zld.tlv_ptr_table.contains(target)) aarch64.Instruction{ + .load_store_register = .{ + .rt = reg_info.rd, + .rn = reg_info.rn, + .offset = try Relocation.calcPageOffset(adjusted_target_addr, .load_store_64), + .opc = 0b01, + .op1 = 0b01, + .v = 0, + .size = reg_info.size, + }, + } else aarch64.Instruction{ + .add_subtract_immediate = .{ + .rd = reg_info.rd, + .rn = reg_info.rn, + .imm12 = try Relocation.calcPageOffset(adjusted_target_addr, .arithmetic), + .sh = 0, + .s = 0, + .op = 0, + .sf = @as(u1, @truncate(reg_info.size)), + }, + }; + mem.writeIntLittle(u32, code, inst.toU32()); + addend = null; + }, + + .ARM64_RELOC_POINTER_TO_GOT => { + log.debug(" | target_addr = 0x{x}", .{target_addr}); + const result = math.cast(i32, @as(i64, @intCast(target_addr)) - @as(i64, @intCast(source_addr))) orelse + return error.Overflow; + mem.writeIntLittle(u32, atom_code[rel_offset..][0..4], @as(u32, @bitCast(result))); + }, + + .ARM64_RELOC_UNSIGNED => { + var ptr_addend = if (rel.r_length == 3) + mem.readIntLittle(i64, atom_code[rel_offset..][0..8]) + else + mem.readIntLittle(i32, atom_code[rel_offset..][0..4]); + + if (rel.r_extern == 0) { + const base_addr = if (target.sym_index >= object.source_address_lookup.len) + @as(i64, @intCast(object.getSourceSection(@as(u8, @intCast(rel.r_symbolnum - 1))).addr)) + else + object.source_address_lookup[target.sym_index]; + ptr_addend -= base_addr; + } + + const result = blk: { + if (subtractor) |sub| { + const sym = zld.getSymbol(sub); + break :blk @as(i64, @intCast(target_addr)) - @as(i64, @intCast(sym.n_value)) + ptr_addend; + } else { + break :blk @as(i64, @intCast(target_addr)) + ptr_addend; + } + }; + log.debug(" | target_addr = 0x{x}", .{result}); + + if (rel.r_length == 3) { + mem.writeIntLittle(u64, atom_code[rel_offset..][0..8], @as(u64, @bitCast(result))); + } else { + mem.writeIntLittle(u32, atom_code[rel_offset..][0..4], @as(u32, @truncate(@as(u64, @bitCast(result))))); + } + + subtractor = null; + }, + + .ARM64_RELOC_ADDEND => unreachable, + .ARM64_RELOC_SUBTRACTOR => unreachable, + } + } +} + +fn resolveRelocsX86( + zld: *Zld, + atom_index: Index, + atom_code: []u8, + atom_relocs: []align(1) const macho.relocation_info, + context: RelocContext, +) !void { + const atom = zld.getAtom(atom_index); + const object = zld.objects.items[atom.getFile().?]; + + var subtractor: ?SymbolWithLoc = null; + + for (atom_relocs) |rel| { + const rel_type = @as(macho.reloc_type_x86_64, @enumFromInt(rel.r_type)); + + switch (rel_type) { + .X86_64_RELOC_SUBTRACTOR => { + assert(subtractor == null); + + log.debug(" RELA({s}) @ {x} => %{d} in object({?d})", .{ + @tagName(rel_type), + rel.r_address, + rel.r_symbolnum, + atom.getFile(), + }); + + subtractor = parseRelocTarget(zld, .{ + .object_id = atom.getFile().?, + .rel = rel, + .code = atom_code, + .base_addr = context.base_addr, + .base_offset = context.base_offset, + }); + continue; + }, + else => {}, + } + + const target = parseRelocTarget(zld, .{ + .object_id = atom.getFile().?, + .rel = rel, + .code = atom_code, + .base_addr = context.base_addr, + .base_offset = context.base_offset, + }); + const rel_offset = @as(u32, @intCast(rel.r_address - context.base_offset)); + + log.debug(" RELA({s}) @ {x} => %{d} ('{s}') in object({?})", .{ + @tagName(rel_type), + rel.r_address, + target.sym_index, + zld.getSymbolName(target), + target.getFile(), + }); + + const source_addr = blk: { + const source_sym = zld.getSymbol(atom.getSymbolWithLoc()); + break :blk source_sym.n_value + rel_offset; + }; + const is_via_got = relocRequiresGot(zld, rel); + const is_tlv = is_tlv: { + const source_sym = zld.getSymbol(atom.getSymbolWithLoc()); + const header = zld.sections.items(.header)[source_sym.n_sect - 1]; + break :is_tlv header.type() == macho.S_THREAD_LOCAL_VARIABLES; + }; + + log.debug(" | source_addr = 0x{x}", .{source_addr}); + + const target_addr = try getRelocTargetAddress(zld, target, is_via_got, is_tlv); + + switch (rel_type) { + .X86_64_RELOC_BRANCH => { + const addend = mem.readIntLittle(i32, atom_code[rel_offset..][0..4]); + const adjusted_target_addr = @as(u64, @intCast(@as(i64, @intCast(target_addr)) + addend)); + log.debug(" | target_addr = 0x{x}", .{adjusted_target_addr}); + const disp = try Relocation.calcPcRelativeDisplacementX86(source_addr, adjusted_target_addr, 0); + mem.writeIntLittle(i32, atom_code[rel_offset..][0..4], disp); + }, + + .X86_64_RELOC_GOT, + .X86_64_RELOC_GOT_LOAD, + => { + const addend = mem.readIntLittle(i32, atom_code[rel_offset..][0..4]); + const adjusted_target_addr = @as(u64, @intCast(@as(i64, @intCast(target_addr)) + addend)); + log.debug(" | target_addr = 0x{x}", .{adjusted_target_addr}); + const disp = try Relocation.calcPcRelativeDisplacementX86(source_addr, adjusted_target_addr, 0); + mem.writeIntLittle(i32, atom_code[rel_offset..][0..4], disp); + }, + + .X86_64_RELOC_TLV => { + const addend = mem.readIntLittle(i32, atom_code[rel_offset..][0..4]); + const adjusted_target_addr = @as(u64, @intCast(@as(i64, @intCast(target_addr)) + addend)); + log.debug(" | target_addr = 0x{x}", .{adjusted_target_addr}); + const disp = try Relocation.calcPcRelativeDisplacementX86(source_addr, adjusted_target_addr, 0); + + if (zld.tlv_ptr_table.get(target) == null) { + // We need to rewrite the opcode from movq to leaq. + atom_code[rel_offset - 2] = 0x8d; + } + + mem.writeIntLittle(i32, atom_code[rel_offset..][0..4], disp); + }, + + .X86_64_RELOC_SIGNED, + .X86_64_RELOC_SIGNED_1, + .X86_64_RELOC_SIGNED_2, + .X86_64_RELOC_SIGNED_4, + => { + const correction: u3 = switch (rel_type) { + .X86_64_RELOC_SIGNED => 0, + .X86_64_RELOC_SIGNED_1 => 1, + .X86_64_RELOC_SIGNED_2 => 2, + .X86_64_RELOC_SIGNED_4 => 4, + else => unreachable, + }; + var addend = mem.readIntLittle(i32, atom_code[rel_offset..][0..4]) + correction; + + if (rel.r_extern == 0) { + const base_addr = if (target.sym_index >= object.source_address_lookup.len) + @as(i64, @intCast(object.getSourceSection(@as(u8, @intCast(rel.r_symbolnum - 1))).addr)) + else + object.source_address_lookup[target.sym_index]; + addend += @as(i32, @intCast(@as(i64, @intCast(context.base_addr)) + rel.r_address + 4 - + @as(i64, @intCast(base_addr)))); + } + + const adjusted_target_addr = @as(u64, @intCast(@as(i64, @intCast(target_addr)) + addend)); + + log.debug(" | target_addr = 0x{x}", .{adjusted_target_addr}); + + const disp = try Relocation.calcPcRelativeDisplacementX86(source_addr, adjusted_target_addr, correction); + mem.writeIntLittle(i32, atom_code[rel_offset..][0..4], disp); + }, + + .X86_64_RELOC_UNSIGNED => { + var addend = if (rel.r_length == 3) + mem.readIntLittle(i64, atom_code[rel_offset..][0..8]) + else + mem.readIntLittle(i32, atom_code[rel_offset..][0..4]); + + if (rel.r_extern == 0) { + const base_addr = if (target.sym_index >= object.source_address_lookup.len) + @as(i64, @intCast(object.getSourceSection(@as(u8, @intCast(rel.r_symbolnum - 1))).addr)) + else + object.source_address_lookup[target.sym_index]; + addend -= base_addr; + } + + const result = blk: { + if (subtractor) |sub| { + const sym = zld.getSymbol(sub); + break :blk @as(i64, @intCast(target_addr)) - @as(i64, @intCast(sym.n_value)) + addend; + } else { + break :blk @as(i64, @intCast(target_addr)) + addend; + } + }; + log.debug(" | target_addr = 0x{x}", .{result}); + + if (rel.r_length == 3) { + mem.writeIntLittle(u64, atom_code[rel_offset..][0..8], @as(u64, @bitCast(result))); + } else { + mem.writeIntLittle(u32, atom_code[rel_offset..][0..4], @as(u32, @truncate(@as(u64, @bitCast(result))))); + } + + subtractor = null; + }, + + .X86_64_RELOC_SUBTRACTOR => unreachable, + } + } +} + +pub fn getAtomCode(zld: *Zld, atom_index: Index) []const u8 { + const atom = zld.getAtom(atom_index); + assert(atom.getFile() != null); // Synthetic atom shouldn't need to inquire for code. + const object = zld.objects.items[atom.getFile().?]; + const source_sym = object.getSourceSymbol(atom.sym_index) orelse { + // If there was no matching symbol present in the source symtab, this means + // we are dealing with either an entire section, or part of it, but also + // starting at the beginning. + const nbase = @as(u32, @intCast(object.in_symtab.?.len)); + const sect_id = @as(u8, @intCast(atom.sym_index - nbase)); + const source_sect = object.getSourceSection(sect_id); + assert(!source_sect.isZerofill()); + const code = object.getSectionContents(source_sect); + const code_len = @as(usize, @intCast(atom.size)); + return code[0..code_len]; + }; + const source_sect = object.getSourceSection(source_sym.n_sect - 1); + assert(!source_sect.isZerofill()); + const code = object.getSectionContents(source_sect); + const offset = @as(usize, @intCast(source_sym.n_value - source_sect.addr)); + const code_len = @as(usize, @intCast(atom.size)); + return code[offset..][0..code_len]; +} + +pub fn getAtomRelocs(zld: *Zld, atom_index: Index) []const macho.relocation_info { + const atom = zld.getAtom(atom_index); + assert(atom.getFile() != null); // Synthetic atom shouldn't need to unique for relocs. + const object = zld.objects.items[atom.getFile().?]; + const cache = object.relocs_lookup[atom.sym_index]; + + const source_sect_id = if (object.getSourceSymbol(atom.sym_index)) |source_sym| blk: { + break :blk source_sym.n_sect - 1; + } else blk: { + // If there was no matching symbol present in the source symtab, this means + // we are dealing with either an entire section, or part of it, but also + // starting at the beginning. + const nbase = @as(u32, @intCast(object.in_symtab.?.len)); + const sect_id = @as(u8, @intCast(atom.sym_index - nbase)); + break :blk sect_id; + }; + const source_sect = object.getSourceSection(source_sect_id); + assert(!source_sect.isZerofill()); + const relocs = object.getRelocs(source_sect_id); + return relocs[cache.start..][0..cache.len]; +} + +pub fn relocRequiresGot(zld: *Zld, rel: macho.relocation_info) bool { + switch (zld.options.target.cpu.arch) { + .aarch64 => switch (@as(macho.reloc_type_arm64, @enumFromInt(rel.r_type))) { + .ARM64_RELOC_GOT_LOAD_PAGE21, + .ARM64_RELOC_GOT_LOAD_PAGEOFF12, + .ARM64_RELOC_POINTER_TO_GOT, + => return true, + else => return false, + }, + .x86_64 => switch (@as(macho.reloc_type_x86_64, @enumFromInt(rel.r_type))) { + .X86_64_RELOC_GOT, + .X86_64_RELOC_GOT_LOAD, + => return true, + else => return false, + }, + else => unreachable, + } +} diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index e407b83017..03628e0e35 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -19,7 +19,7 @@ const sort = std.sort; const trace = @import("../../tracy.zig").trace; const Allocator = mem.Allocator; -const Atom = @import("ZldAtom.zig"); +const Atom = @import("Atom.zig"); const AtomIndex = @import("zld.zig").AtomIndex; const DwarfInfo = @import("DwarfInfo.zig"); const LoadCommandIterator = macho.LoadCommandIterator; diff --git a/src/link/MachO/UnwindInfo.zig b/src/link/MachO/UnwindInfo.zig index 49a3ab9d01..4f2ca33638 100644 --- a/src/link/MachO/UnwindInfo.zig +++ b/src/link/MachO/UnwindInfo.zig @@ -12,7 +12,7 @@ const mem = std.mem; const trace = @import("../../tracy.zig").trace; const Allocator = mem.Allocator; -const Atom = @import("ZldAtom.zig"); +const Atom = @import("Atom.zig"); const AtomIndex = @import("zld.zig").AtomIndex; const EhFrameRecord = eh_frame.EhFrameRecord; const MachO = @import("../MachO.zig"); diff --git a/src/link/MachO/ZldAtom.zig b/src/link/MachO/ZldAtom.zig deleted file mode 100644 index 6bcd74ff0c..0000000000 --- a/src/link/MachO/ZldAtom.zig +++ /dev/null @@ -1,1012 +0,0 @@ -//! An atom is a single smallest unit of measure that will get an -//! allocated virtual memory address in the final linked image. -//! For example, we parse each input section within an input relocatable -//! object file into a set of atoms which are then laid out contiguously -//! as they were defined in the input file. - -const Atom = @This(); - -const std = @import("std"); -const build_options = @import("build_options"); -const aarch64 = @import("../../arch/aarch64/bits.zig"); -const assert = std.debug.assert; -const log = std.log.scoped(.atom); -const macho = std.macho; -const math = std.math; -const mem = std.mem; -const meta = std.meta; -const trace = @import("../../tracy.zig").trace; - -const Allocator = mem.Allocator; -const Arch = std.Target.Cpu.Arch; -const AtomIndex = @import("zld.zig").AtomIndex; -const Object = @import("Object.zig"); -const Relocation = @import("Relocation.zig"); -const SymbolWithLoc = @import("../MachO.zig").SymbolWithLoc; -const Zld = @import("zld.zig").Zld; - -/// Each Atom always gets a symbol with the fully qualified name. -/// The symbol can reside in any object file context structure in `symtab` array -/// (see `Object`), or if the symbol is a synthetic symbol such as a GOT cell or -/// a stub trampoline, it can be found in the linkers `locals` arraylist. -sym_index: u32, - -/// 0 means an Atom is a synthetic Atom such as a GOT cell defined by the linker. -/// Otherwise, it is the index into appropriate object file (indexing from 1). -/// Prefer using `getFile()` helper to get the file index out rather than using -/// the field directly. -file: u32, - -/// If this Atom is not a synthetic Atom, i.e., references a subsection in an -/// Object file, `inner_sym_index` and `inner_nsyms_trailing` tell where and if -/// this Atom contains any additional symbol references that fall within this Atom's -/// address range. These could for example be an alias symbol which can be used -/// internally by the relocation records, or if the Object file couldn't be split -/// into subsections, this Atom may encompass an entire input section. -inner_sym_index: u32, -inner_nsyms_trailing: u32, - -/// Size of this atom. -size: u64, - -/// Alignment of this atom as a power of 2. -/// For instance, aligmment of 0 should be read as 2^0 = 1 byte aligned. -alignment: u32, - -/// Points to the previous and next neighbours -next_index: ?AtomIndex, -prev_index: ?AtomIndex, - -pub const empty = Atom{ - .sym_index = 0, - .inner_sym_index = 0, - .inner_nsyms_trailing = 0, - .file = 0, - .size = 0, - .alignment = 0, - .prev_index = null, - .next_index = null, -}; - -/// Returns `null` if the Atom is a synthetic Atom. -/// Otherwise, returns an index into an array of Objects. -pub fn getFile(self: Atom) ?u32 { - if (self.file == 0) return null; - return self.file - 1; -} - -pub inline fn getSymbolWithLoc(self: Atom) SymbolWithLoc { - return .{ - .sym_index = self.sym_index, - .file = self.file, - }; -} - -const InnerSymIterator = struct { - sym_index: u32, - nsyms: u32, - file: u32, - pos: u32 = 0, - - pub fn next(it: *@This()) ?SymbolWithLoc { - if (it.pos == it.nsyms) return null; - const res = SymbolWithLoc{ .sym_index = it.sym_index + it.pos, .file = it.file }; - it.pos += 1; - return res; - } -}; - -/// Returns an iterator over potentially contained symbols. -/// Panics when called on a synthetic Atom. -pub fn getInnerSymbolsIterator(zld: *Zld, atom_index: AtomIndex) InnerSymIterator { - const atom = zld.getAtom(atom_index); - assert(atom.getFile() != null); - return .{ - .sym_index = atom.inner_sym_index, - .nsyms = atom.inner_nsyms_trailing, - .file = atom.file, - }; -} - -/// Returns a section alias symbol if one is defined. -/// An alias symbol is used to represent the start of an input section -/// if there were no symbols defined within that range. -/// Alias symbols are only used on x86_64. -pub fn getSectionAlias(zld: *Zld, atom_index: AtomIndex) ?SymbolWithLoc { - const atom = zld.getAtom(atom_index); - assert(atom.getFile() != null); - - const object = zld.objects.items[atom.getFile().?]; - const nbase = @as(u32, @intCast(object.in_symtab.?.len)); - const ntotal = @as(u32, @intCast(object.symtab.len)); - var sym_index: u32 = nbase; - while (sym_index < ntotal) : (sym_index += 1) { - if (object.getAtomIndexForSymbol(sym_index)) |other_atom_index| { - if (other_atom_index == atom_index) return SymbolWithLoc{ - .sym_index = sym_index, - .file = atom.file, - }; - } - } - return null; -} - -/// Given an index into a contained symbol within, calculates an offset wrt -/// the start of this Atom. -pub fn calcInnerSymbolOffset(zld: *Zld, atom_index: AtomIndex, sym_index: u32) u64 { - const atom = zld.getAtom(atom_index); - assert(atom.getFile() != null); - - if (atom.sym_index == sym_index) return 0; - - const object = zld.objects.items[atom.getFile().?]; - const source_sym = object.getSourceSymbol(sym_index).?; - const base_addr = if (object.getSourceSymbol(atom.sym_index)) |sym| - sym.n_value - else blk: { - const nbase = @as(u32, @intCast(object.in_symtab.?.len)); - const sect_id = @as(u8, @intCast(atom.sym_index - nbase)); - const source_sect = object.getSourceSection(sect_id); - break :blk source_sect.addr; - }; - return source_sym.n_value - base_addr; -} - -pub fn scanAtomRelocs(zld: *Zld, atom_index: AtomIndex, relocs: []align(1) const macho.relocation_info) !void { - const arch = zld.options.target.cpu.arch; - const atom = zld.getAtom(atom_index); - assert(atom.getFile() != null); // synthetic atoms do not have relocs - - return switch (arch) { - .aarch64 => scanAtomRelocsArm64(zld, atom_index, relocs), - .x86_64 => scanAtomRelocsX86(zld, atom_index, relocs), - else => unreachable, - }; -} - -const RelocContext = struct { - base_addr: i64 = 0, - base_offset: i32 = 0, -}; - -pub fn getRelocContext(zld: *Zld, atom_index: AtomIndex) RelocContext { - const atom = zld.getAtom(atom_index); - assert(atom.getFile() != null); // synthetic atoms do not have relocs - - const object = zld.objects.items[atom.getFile().?]; - if (object.getSourceSymbol(atom.sym_index)) |source_sym| { - const source_sect = object.getSourceSection(source_sym.n_sect - 1); - return .{ - .base_addr = @as(i64, @intCast(source_sect.addr)), - .base_offset = @as(i32, @intCast(source_sym.n_value - source_sect.addr)), - }; - } - const nbase = @as(u32, @intCast(object.in_symtab.?.len)); - const sect_id = @as(u8, @intCast(atom.sym_index - nbase)); - const source_sect = object.getSourceSection(sect_id); - return .{ - .base_addr = @as(i64, @intCast(source_sect.addr)), - .base_offset = 0, - }; -} - -pub fn parseRelocTarget(zld: *Zld, ctx: struct { - object_id: u32, - rel: macho.relocation_info, - code: []const u8, - base_addr: i64 = 0, - base_offset: i32 = 0, -}) SymbolWithLoc { - const tracy = trace(@src()); - defer tracy.end(); - - const object = &zld.objects.items[ctx.object_id]; - log.debug("parsing reloc target in object({d}) '{s}' ", .{ ctx.object_id, object.name }); - - const sym_index = if (ctx.rel.r_extern == 0) sym_index: { - const sect_id = @as(u8, @intCast(ctx.rel.r_symbolnum - 1)); - const rel_offset = @as(u32, @intCast(ctx.rel.r_address - ctx.base_offset)); - - const address_in_section = if (ctx.rel.r_pcrel == 0) blk: { - break :blk if (ctx.rel.r_length == 3) - mem.readIntLittle(u64, ctx.code[rel_offset..][0..8]) - else - mem.readIntLittle(u32, ctx.code[rel_offset..][0..4]); - } else blk: { - assert(zld.options.target.cpu.arch == .x86_64); - const correction: u3 = switch (@as(macho.reloc_type_x86_64, @enumFromInt(ctx.rel.r_type))) { - .X86_64_RELOC_SIGNED => 0, - .X86_64_RELOC_SIGNED_1 => 1, - .X86_64_RELOC_SIGNED_2 => 2, - .X86_64_RELOC_SIGNED_4 => 4, - else => unreachable, - }; - const addend = mem.readIntLittle(i32, ctx.code[rel_offset..][0..4]); - const target_address = @as(i64, @intCast(ctx.base_addr)) + ctx.rel.r_address + 4 + correction + addend; - break :blk @as(u64, @intCast(target_address)); - }; - - // Find containing atom - log.debug(" | locating symbol by address @{x} in section {d}", .{ address_in_section, sect_id }); - break :sym_index object.getSymbolByAddress(address_in_section, sect_id); - } else object.reverse_symtab_lookup[ctx.rel.r_symbolnum]; - - const sym_loc = SymbolWithLoc{ .sym_index = sym_index, .file = ctx.object_id + 1 }; - const sym = zld.getSymbol(sym_loc); - const target = if (sym.sect() and !sym.ext()) - sym_loc - else if (object.getGlobal(sym_index)) |global_index| - zld.globals.items[global_index] - else - sym_loc; - log.debug(" | target %{d} ('{s}') in object({?d})", .{ - target.sym_index, - zld.getSymbolName(target), - target.getFile(), - }); - return target; -} - -pub fn getRelocTargetAtomIndex(zld: *Zld, target: SymbolWithLoc, is_via_got: bool) ?AtomIndex { - if (is_via_got) { - return zld.getGotAtomIndexForSymbol(target).?; // panic means fatal error - } - if (zld.getStubsAtomIndexForSymbol(target)) |stubs_atom| return stubs_atom; - if (zld.getTlvPtrAtomIndexForSymbol(target)) |tlv_ptr_atom| return tlv_ptr_atom; - - if (target.getFile() == null) { - const target_sym_name = zld.getSymbolName(target); - if (mem.eql(u8, "__mh_execute_header", target_sym_name)) return null; - if (mem.eql(u8, "___dso_handle", target_sym_name)) return null; - - unreachable; // referenced symbol not found - } - - const object = zld.objects.items[target.getFile().?]; - return object.getAtomIndexForSymbol(target.sym_index); -} - -fn scanAtomRelocsArm64(zld: *Zld, atom_index: AtomIndex, relocs: []align(1) const macho.relocation_info) !void { - for (relocs) |rel| { - const rel_type = @as(macho.reloc_type_arm64, @enumFromInt(rel.r_type)); - - switch (rel_type) { - .ARM64_RELOC_ADDEND, .ARM64_RELOC_SUBTRACTOR => continue, - else => {}, - } - - if (rel.r_extern == 0) continue; - - const atom = zld.getAtom(atom_index); - const object = &zld.objects.items[atom.getFile().?]; - const sym_index = object.reverse_symtab_lookup[rel.r_symbolnum]; - const sym_loc = SymbolWithLoc{ - .sym_index = sym_index, - .file = atom.file, - }; - - const target = if (object.getGlobal(sym_index)) |global_index| - zld.globals.items[global_index] - else - sym_loc; - - switch (rel_type) { - .ARM64_RELOC_BRANCH26 => { - // TODO rewrite relocation - try addStub(zld, target); - }, - .ARM64_RELOC_GOT_LOAD_PAGE21, - .ARM64_RELOC_GOT_LOAD_PAGEOFF12, - .ARM64_RELOC_POINTER_TO_GOT, - => { - // TODO rewrite relocation - try addGotEntry(zld, target); - }, - .ARM64_RELOC_TLVP_LOAD_PAGE21, - .ARM64_RELOC_TLVP_LOAD_PAGEOFF12, - => { - try addTlvPtrEntry(zld, target); - }, - else => {}, - } - } -} - -fn scanAtomRelocsX86(zld: *Zld, atom_index: AtomIndex, relocs: []align(1) const macho.relocation_info) !void { - for (relocs) |rel| { - const rel_type = @as(macho.reloc_type_x86_64, @enumFromInt(rel.r_type)); - - switch (rel_type) { - .X86_64_RELOC_SUBTRACTOR => continue, - else => {}, - } - - if (rel.r_extern == 0) continue; - - const atom = zld.getAtom(atom_index); - const object = &zld.objects.items[atom.getFile().?]; - const sym_index = object.reverse_symtab_lookup[rel.r_symbolnum]; - const sym_loc = SymbolWithLoc{ - .sym_index = sym_index, - .file = atom.file, - }; - - const target = if (object.getGlobal(sym_index)) |global_index| - zld.globals.items[global_index] - else - sym_loc; - - switch (rel_type) { - .X86_64_RELOC_BRANCH => { - // TODO rewrite relocation - try addStub(zld, target); - }, - .X86_64_RELOC_GOT, .X86_64_RELOC_GOT_LOAD => { - // TODO rewrite relocation - try addGotEntry(zld, target); - }, - .X86_64_RELOC_TLV => { - try addTlvPtrEntry(zld, target); - }, - else => {}, - } - } -} - -fn addTlvPtrEntry(zld: *Zld, target: SymbolWithLoc) !void { - const target_sym = zld.getSymbol(target); - if (!target_sym.undf()) return; - if (zld.tlv_ptr_table.contains(target)) return; - - const gpa = zld.gpa; - const atom_index = try zld.createTlvPtrAtom(); - const tlv_ptr_index = @as(u32, @intCast(zld.tlv_ptr_entries.items.len)); - try zld.tlv_ptr_entries.append(gpa, .{ - .target = target, - .atom_index = atom_index, - }); - try zld.tlv_ptr_table.putNoClobber(gpa, target, tlv_ptr_index); -} - -pub fn addGotEntry(zld: *Zld, target: SymbolWithLoc) !void { - if (zld.got_table.contains(target)) return; - const gpa = zld.gpa; - const atom_index = try zld.createGotAtom(); - const got_index = @as(u32, @intCast(zld.got_entries.items.len)); - try zld.got_entries.append(gpa, .{ - .target = target, - .atom_index = atom_index, - }); - try zld.got_table.putNoClobber(gpa, target, got_index); -} - -pub fn addStub(zld: *Zld, target: SymbolWithLoc) !void { - const target_sym = zld.getSymbol(target); - if (!target_sym.undf()) return; - if (zld.stubs_table.contains(target)) return; - - const gpa = zld.gpa; - _ = try zld.createStubHelperAtom(); - _ = try zld.createLazyPointerAtom(); - const atom_index = try zld.createStubAtom(); - const stubs_index = @as(u32, @intCast(zld.stubs.items.len)); - try zld.stubs.append(gpa, .{ - .target = target, - .atom_index = atom_index, - }); - try zld.stubs_table.putNoClobber(gpa, target, stubs_index); -} - -pub fn resolveRelocs( - zld: *Zld, - atom_index: AtomIndex, - atom_code: []u8, - atom_relocs: []align(1) const macho.relocation_info, -) !void { - const arch = zld.options.target.cpu.arch; - const atom = zld.getAtom(atom_index); - assert(atom.getFile() != null); // synthetic atoms do not have relocs - - log.debug("resolving relocations in ATOM(%{d}, '{s}')", .{ - atom.sym_index, - zld.getSymbolName(atom.getSymbolWithLoc()), - }); - - const ctx = getRelocContext(zld, atom_index); - - return switch (arch) { - .aarch64 => resolveRelocsArm64(zld, atom_index, atom_code, atom_relocs, ctx), - .x86_64 => resolveRelocsX86(zld, atom_index, atom_code, atom_relocs, ctx), - else => unreachable, - }; -} - -pub fn getRelocTargetAddress(zld: *Zld, target: SymbolWithLoc, is_via_got: bool, is_tlv: bool) !u64 { - const target_atom_index = getRelocTargetAtomIndex(zld, target, is_via_got) orelse { - // If there is no atom for target, we still need to check for special, atom-less - // symbols such as `___dso_handle`. - const target_name = zld.getSymbolName(target); - const atomless_sym = zld.getSymbol(target); - log.debug(" | atomless target '{s}'", .{target_name}); - return atomless_sym.n_value; - }; - const target_atom = zld.getAtom(target_atom_index); - log.debug(" | target ATOM(%{d}, '{s}') in object({?})", .{ - target_atom.sym_index, - zld.getSymbolName(target_atom.getSymbolWithLoc()), - target_atom.getFile(), - }); - - const target_sym = zld.getSymbol(target_atom.getSymbolWithLoc()); - assert(target_sym.n_desc != @import("zld.zig").N_DEAD); - - // If `target` is contained within the target atom, pull its address value. - const offset = if (target_atom.getFile() != null) blk: { - const object = zld.objects.items[target_atom.getFile().?]; - break :blk if (object.getSourceSymbol(target.sym_index)) |_| - Atom.calcInnerSymbolOffset(zld, target_atom_index, target.sym_index) - else - 0; // section alias - } else 0; - const base_address: u64 = if (is_tlv) base_address: { - // For TLV relocations, the value specified as a relocation is the displacement from the - // TLV initializer (either value in __thread_data or zero-init in __thread_bss) to the first - // defined TLV template init section in the following order: - // * wrt to __thread_data if defined, then - // * wrt to __thread_bss - const sect_id: u16 = sect_id: { - if (zld.getSectionByName("__DATA", "__thread_data")) |i| { - break :sect_id i; - } else if (zld.getSectionByName("__DATA", "__thread_bss")) |i| { - break :sect_id i; - } else { - log.err("threadlocal variables present but no initializer sections found", .{}); - log.err(" __thread_data not found", .{}); - log.err(" __thread_bss not found", .{}); - return error.FailedToResolveRelocationTarget; - } - }; - break :base_address zld.sections.items(.header)[sect_id].addr; - } else 0; - return target_sym.n_value + offset - base_address; -} - -fn resolveRelocsArm64( - zld: *Zld, - atom_index: AtomIndex, - atom_code: []u8, - atom_relocs: []align(1) const macho.relocation_info, - context: RelocContext, -) !void { - const atom = zld.getAtom(atom_index); - const object = zld.objects.items[atom.getFile().?]; - - var addend: ?i64 = null; - var subtractor: ?SymbolWithLoc = null; - - for (atom_relocs) |rel| { - const rel_type = @as(macho.reloc_type_arm64, @enumFromInt(rel.r_type)); - - switch (rel_type) { - .ARM64_RELOC_ADDEND => { - assert(addend == null); - - log.debug(" RELA({s}) @ {x} => {x}", .{ @tagName(rel_type), rel.r_address, rel.r_symbolnum }); - - addend = rel.r_symbolnum; - continue; - }, - .ARM64_RELOC_SUBTRACTOR => { - assert(subtractor == null); - - log.debug(" RELA({s}) @ {x} => %{d} in object({?d})", .{ - @tagName(rel_type), - rel.r_address, - rel.r_symbolnum, - atom.getFile(), - }); - - subtractor = parseRelocTarget(zld, .{ - .object_id = atom.getFile().?, - .rel = rel, - .code = atom_code, - .base_addr = context.base_addr, - .base_offset = context.base_offset, - }); - continue; - }, - else => {}, - } - - const target = parseRelocTarget(zld, .{ - .object_id = atom.getFile().?, - .rel = rel, - .code = atom_code, - .base_addr = context.base_addr, - .base_offset = context.base_offset, - }); - const rel_offset = @as(u32, @intCast(rel.r_address - context.base_offset)); - - log.debug(" RELA({s}) @ {x} => %{d} ('{s}') in object({?})", .{ - @tagName(rel_type), - rel.r_address, - target.sym_index, - zld.getSymbolName(target), - target.getFile(), - }); - - const source_addr = blk: { - const source_sym = zld.getSymbol(atom.getSymbolWithLoc()); - break :blk source_sym.n_value + rel_offset; - }; - const is_via_got = relocRequiresGot(zld, rel); - const is_tlv = is_tlv: { - const source_sym = zld.getSymbol(atom.getSymbolWithLoc()); - const header = zld.sections.items(.header)[source_sym.n_sect - 1]; - break :is_tlv header.type() == macho.S_THREAD_LOCAL_VARIABLES; - }; - const target_addr = try getRelocTargetAddress(zld, target, is_via_got, is_tlv); - - log.debug(" | source_addr = 0x{x}", .{source_addr}); - - switch (rel_type) { - .ARM64_RELOC_BRANCH26 => { - const actual_target = if (zld.getStubsAtomIndexForSymbol(target)) |stub_atom_index| inner: { - const stub_atom = zld.getAtom(stub_atom_index); - break :inner stub_atom.getSymbolWithLoc(); - } else target; - log.debug(" source {s} (object({?})), target {s} (object({?}))", .{ - zld.getSymbolName(atom.getSymbolWithLoc()), - atom.getFile(), - zld.getSymbolName(target), - zld.getAtom(getRelocTargetAtomIndex(zld, target, is_via_got).?).getFile(), - }); - - const displacement = if (Relocation.calcPcRelativeDisplacementArm64( - source_addr, - zld.getSymbol(actual_target).n_value, - )) |disp| blk: { - log.debug(" | target_addr = 0x{x}", .{zld.getSymbol(actual_target).n_value}); - break :blk disp; - } else |_| blk: { - const thunk_index = zld.thunk_table.get(atom_index).?; - const thunk = zld.thunks.items[thunk_index]; - const thunk_sym = zld.getSymbol(thunk.getTrampolineForSymbol( - zld, - actual_target, - ).?); - log.debug(" | target_addr = 0x{x} (thunk)", .{thunk_sym.n_value}); - break :blk try Relocation.calcPcRelativeDisplacementArm64(source_addr, thunk_sym.n_value); - }; - - const code = atom_code[rel_offset..][0..4]; - var inst = aarch64.Instruction{ - .unconditional_branch_immediate = mem.bytesToValue(meta.TagPayload( - aarch64.Instruction, - aarch64.Instruction.unconditional_branch_immediate, - ), code), - }; - inst.unconditional_branch_immediate.imm26 = @as(u26, @truncate(@as(u28, @bitCast(displacement >> 2)))); - mem.writeIntLittle(u32, code, inst.toU32()); - }, - - .ARM64_RELOC_PAGE21, - .ARM64_RELOC_GOT_LOAD_PAGE21, - .ARM64_RELOC_TLVP_LOAD_PAGE21, - => { - const adjusted_target_addr = @as(u64, @intCast(@as(i64, @intCast(target_addr)) + (addend orelse 0))); - - log.debug(" | target_addr = 0x{x}", .{adjusted_target_addr}); - - const pages = @as(u21, @bitCast(Relocation.calcNumberOfPages(source_addr, adjusted_target_addr))); - const code = atom_code[rel_offset..][0..4]; - var inst = aarch64.Instruction{ - .pc_relative_address = mem.bytesToValue(meta.TagPayload( - aarch64.Instruction, - aarch64.Instruction.pc_relative_address, - ), code), - }; - inst.pc_relative_address.immhi = @as(u19, @truncate(pages >> 2)); - inst.pc_relative_address.immlo = @as(u2, @truncate(pages)); - mem.writeIntLittle(u32, code, inst.toU32()); - addend = null; - }, - - .ARM64_RELOC_PAGEOFF12 => { - const adjusted_target_addr = @as(u64, @intCast(@as(i64, @intCast(target_addr)) + (addend orelse 0))); - - log.debug(" | target_addr = 0x{x}", .{adjusted_target_addr}); - - const code = atom_code[rel_offset..][0..4]; - if (Relocation.isArithmeticOp(code)) { - const off = try Relocation.calcPageOffset(adjusted_target_addr, .arithmetic); - var inst = aarch64.Instruction{ - .add_subtract_immediate = mem.bytesToValue(meta.TagPayload( - aarch64.Instruction, - aarch64.Instruction.add_subtract_immediate, - ), code), - }; - inst.add_subtract_immediate.imm12 = off; - mem.writeIntLittle(u32, code, inst.toU32()); - } else { - var inst = aarch64.Instruction{ - .load_store_register = mem.bytesToValue(meta.TagPayload( - aarch64.Instruction, - aarch64.Instruction.load_store_register, - ), code), - }; - const off = try Relocation.calcPageOffset(adjusted_target_addr, switch (inst.load_store_register.size) { - 0 => if (inst.load_store_register.v == 1) - Relocation.PageOffsetInstKind.load_store_128 - else - Relocation.PageOffsetInstKind.load_store_8, - 1 => .load_store_16, - 2 => .load_store_32, - 3 => .load_store_64, - }); - inst.load_store_register.offset = off; - mem.writeIntLittle(u32, code, inst.toU32()); - } - addend = null; - }, - - .ARM64_RELOC_GOT_LOAD_PAGEOFF12 => { - const code = atom_code[rel_offset..][0..4]; - const adjusted_target_addr = @as(u64, @intCast(@as(i64, @intCast(target_addr)) + (addend orelse 0))); - - log.debug(" | target_addr = 0x{x}", .{adjusted_target_addr}); - - const off = try Relocation.calcPageOffset(adjusted_target_addr, .load_store_64); - var inst: aarch64.Instruction = .{ - .load_store_register = mem.bytesToValue(meta.TagPayload( - aarch64.Instruction, - aarch64.Instruction.load_store_register, - ), code), - }; - inst.load_store_register.offset = off; - mem.writeIntLittle(u32, code, inst.toU32()); - addend = null; - }, - - .ARM64_RELOC_TLVP_LOAD_PAGEOFF12 => { - const code = atom_code[rel_offset..][0..4]; - const adjusted_target_addr = @as(u64, @intCast(@as(i64, @intCast(target_addr)) + (addend orelse 0))); - - log.debug(" | target_addr = 0x{x}", .{adjusted_target_addr}); - - const RegInfo = struct { - rd: u5, - rn: u5, - size: u2, - }; - const reg_info: RegInfo = blk: { - if (Relocation.isArithmeticOp(code)) { - const inst = mem.bytesToValue(meta.TagPayload( - aarch64.Instruction, - aarch64.Instruction.add_subtract_immediate, - ), code); - break :blk .{ - .rd = inst.rd, - .rn = inst.rn, - .size = inst.sf, - }; - } else { - const inst = mem.bytesToValue(meta.TagPayload( - aarch64.Instruction, - aarch64.Instruction.load_store_register, - ), code); - break :blk .{ - .rd = inst.rt, - .rn = inst.rn, - .size = inst.size, - }; - } - }; - - var inst = if (zld.tlv_ptr_table.contains(target)) aarch64.Instruction{ - .load_store_register = .{ - .rt = reg_info.rd, - .rn = reg_info.rn, - .offset = try Relocation.calcPageOffset(adjusted_target_addr, .load_store_64), - .opc = 0b01, - .op1 = 0b01, - .v = 0, - .size = reg_info.size, - }, - } else aarch64.Instruction{ - .add_subtract_immediate = .{ - .rd = reg_info.rd, - .rn = reg_info.rn, - .imm12 = try Relocation.calcPageOffset(adjusted_target_addr, .arithmetic), - .sh = 0, - .s = 0, - .op = 0, - .sf = @as(u1, @truncate(reg_info.size)), - }, - }; - mem.writeIntLittle(u32, code, inst.toU32()); - addend = null; - }, - - .ARM64_RELOC_POINTER_TO_GOT => { - log.debug(" | target_addr = 0x{x}", .{target_addr}); - const result = math.cast(i32, @as(i64, @intCast(target_addr)) - @as(i64, @intCast(source_addr))) orelse - return error.Overflow; - mem.writeIntLittle(u32, atom_code[rel_offset..][0..4], @as(u32, @bitCast(result))); - }, - - .ARM64_RELOC_UNSIGNED => { - var ptr_addend = if (rel.r_length == 3) - mem.readIntLittle(i64, atom_code[rel_offset..][0..8]) - else - mem.readIntLittle(i32, atom_code[rel_offset..][0..4]); - - if (rel.r_extern == 0) { - const base_addr = if (target.sym_index >= object.source_address_lookup.len) - @as(i64, @intCast(object.getSourceSection(@as(u8, @intCast(rel.r_symbolnum - 1))).addr)) - else - object.source_address_lookup[target.sym_index]; - ptr_addend -= base_addr; - } - - const result = blk: { - if (subtractor) |sub| { - const sym = zld.getSymbol(sub); - break :blk @as(i64, @intCast(target_addr)) - @as(i64, @intCast(sym.n_value)) + ptr_addend; - } else { - break :blk @as(i64, @intCast(target_addr)) + ptr_addend; - } - }; - log.debug(" | target_addr = 0x{x}", .{result}); - - if (rel.r_length == 3) { - mem.writeIntLittle(u64, atom_code[rel_offset..][0..8], @as(u64, @bitCast(result))); - } else { - mem.writeIntLittle(u32, atom_code[rel_offset..][0..4], @as(u32, @truncate(@as(u64, @bitCast(result))))); - } - - subtractor = null; - }, - - .ARM64_RELOC_ADDEND => unreachable, - .ARM64_RELOC_SUBTRACTOR => unreachable, - } - } -} - -fn resolveRelocsX86( - zld: *Zld, - atom_index: AtomIndex, - atom_code: []u8, - atom_relocs: []align(1) const macho.relocation_info, - context: RelocContext, -) !void { - const atom = zld.getAtom(atom_index); - const object = zld.objects.items[atom.getFile().?]; - - var subtractor: ?SymbolWithLoc = null; - - for (atom_relocs) |rel| { - const rel_type = @as(macho.reloc_type_x86_64, @enumFromInt(rel.r_type)); - - switch (rel_type) { - .X86_64_RELOC_SUBTRACTOR => { - assert(subtractor == null); - - log.debug(" RELA({s}) @ {x} => %{d} in object({?d})", .{ - @tagName(rel_type), - rel.r_address, - rel.r_symbolnum, - atom.getFile(), - }); - - subtractor = parseRelocTarget(zld, .{ - .object_id = atom.getFile().?, - .rel = rel, - .code = atom_code, - .base_addr = context.base_addr, - .base_offset = context.base_offset, - }); - continue; - }, - else => {}, - } - - const target = parseRelocTarget(zld, .{ - .object_id = atom.getFile().?, - .rel = rel, - .code = atom_code, - .base_addr = context.base_addr, - .base_offset = context.base_offset, - }); - const rel_offset = @as(u32, @intCast(rel.r_address - context.base_offset)); - - log.debug(" RELA({s}) @ {x} => %{d} ('{s}') in object({?})", .{ - @tagName(rel_type), - rel.r_address, - target.sym_index, - zld.getSymbolName(target), - target.getFile(), - }); - - const source_addr = blk: { - const source_sym = zld.getSymbol(atom.getSymbolWithLoc()); - break :blk source_sym.n_value + rel_offset; - }; - const is_via_got = relocRequiresGot(zld, rel); - const is_tlv = is_tlv: { - const source_sym = zld.getSymbol(atom.getSymbolWithLoc()); - const header = zld.sections.items(.header)[source_sym.n_sect - 1]; - break :is_tlv header.type() == macho.S_THREAD_LOCAL_VARIABLES; - }; - - log.debug(" | source_addr = 0x{x}", .{source_addr}); - - const target_addr = try getRelocTargetAddress(zld, target, is_via_got, is_tlv); - - switch (rel_type) { - .X86_64_RELOC_BRANCH => { - const addend = mem.readIntLittle(i32, atom_code[rel_offset..][0..4]); - const adjusted_target_addr = @as(u64, @intCast(@as(i64, @intCast(target_addr)) + addend)); - log.debug(" | target_addr = 0x{x}", .{adjusted_target_addr}); - const disp = try Relocation.calcPcRelativeDisplacementX86(source_addr, adjusted_target_addr, 0); - mem.writeIntLittle(i32, atom_code[rel_offset..][0..4], disp); - }, - - .X86_64_RELOC_GOT, - .X86_64_RELOC_GOT_LOAD, - => { - const addend = mem.readIntLittle(i32, atom_code[rel_offset..][0..4]); - const adjusted_target_addr = @as(u64, @intCast(@as(i64, @intCast(target_addr)) + addend)); - log.debug(" | target_addr = 0x{x}", .{adjusted_target_addr}); - const disp = try Relocation.calcPcRelativeDisplacementX86(source_addr, adjusted_target_addr, 0); - mem.writeIntLittle(i32, atom_code[rel_offset..][0..4], disp); - }, - - .X86_64_RELOC_TLV => { - const addend = mem.readIntLittle(i32, atom_code[rel_offset..][0..4]); - const adjusted_target_addr = @as(u64, @intCast(@as(i64, @intCast(target_addr)) + addend)); - log.debug(" | target_addr = 0x{x}", .{adjusted_target_addr}); - const disp = try Relocation.calcPcRelativeDisplacementX86(source_addr, adjusted_target_addr, 0); - - if (zld.tlv_ptr_table.get(target) == null) { - // We need to rewrite the opcode from movq to leaq. - atom_code[rel_offset - 2] = 0x8d; - } - - mem.writeIntLittle(i32, atom_code[rel_offset..][0..4], disp); - }, - - .X86_64_RELOC_SIGNED, - .X86_64_RELOC_SIGNED_1, - .X86_64_RELOC_SIGNED_2, - .X86_64_RELOC_SIGNED_4, - => { - const correction: u3 = switch (rel_type) { - .X86_64_RELOC_SIGNED => 0, - .X86_64_RELOC_SIGNED_1 => 1, - .X86_64_RELOC_SIGNED_2 => 2, - .X86_64_RELOC_SIGNED_4 => 4, - else => unreachable, - }; - var addend = mem.readIntLittle(i32, atom_code[rel_offset..][0..4]) + correction; - - if (rel.r_extern == 0) { - const base_addr = if (target.sym_index >= object.source_address_lookup.len) - @as(i64, @intCast(object.getSourceSection(@as(u8, @intCast(rel.r_symbolnum - 1))).addr)) - else - object.source_address_lookup[target.sym_index]; - addend += @as(i32, @intCast(@as(i64, @intCast(context.base_addr)) + rel.r_address + 4 - - @as(i64, @intCast(base_addr)))); - } - - const adjusted_target_addr = @as(u64, @intCast(@as(i64, @intCast(target_addr)) + addend)); - - log.debug(" | target_addr = 0x{x}", .{adjusted_target_addr}); - - const disp = try Relocation.calcPcRelativeDisplacementX86(source_addr, adjusted_target_addr, correction); - mem.writeIntLittle(i32, atom_code[rel_offset..][0..4], disp); - }, - - .X86_64_RELOC_UNSIGNED => { - var addend = if (rel.r_length == 3) - mem.readIntLittle(i64, atom_code[rel_offset..][0..8]) - else - mem.readIntLittle(i32, atom_code[rel_offset..][0..4]); - - if (rel.r_extern == 0) { - const base_addr = if (target.sym_index >= object.source_address_lookup.len) - @as(i64, @intCast(object.getSourceSection(@as(u8, @intCast(rel.r_symbolnum - 1))).addr)) - else - object.source_address_lookup[target.sym_index]; - addend -= base_addr; - } - - const result = blk: { - if (subtractor) |sub| { - const sym = zld.getSymbol(sub); - break :blk @as(i64, @intCast(target_addr)) - @as(i64, @intCast(sym.n_value)) + addend; - } else { - break :blk @as(i64, @intCast(target_addr)) + addend; - } - }; - log.debug(" | target_addr = 0x{x}", .{result}); - - if (rel.r_length == 3) { - mem.writeIntLittle(u64, atom_code[rel_offset..][0..8], @as(u64, @bitCast(result))); - } else { - mem.writeIntLittle(u32, atom_code[rel_offset..][0..4], @as(u32, @truncate(@as(u64, @bitCast(result))))); - } - - subtractor = null; - }, - - .X86_64_RELOC_SUBTRACTOR => unreachable, - } - } -} - -pub fn getAtomCode(zld: *Zld, atom_index: AtomIndex) []const u8 { - const atom = zld.getAtom(atom_index); - assert(atom.getFile() != null); // Synthetic atom shouldn't need to inquire for code. - const object = zld.objects.items[atom.getFile().?]; - const source_sym = object.getSourceSymbol(atom.sym_index) orelse { - // If there was no matching symbol present in the source symtab, this means - // we are dealing with either an entire section, or part of it, but also - // starting at the beginning. - const nbase = @as(u32, @intCast(object.in_symtab.?.len)); - const sect_id = @as(u8, @intCast(atom.sym_index - nbase)); - const source_sect = object.getSourceSection(sect_id); - assert(!source_sect.isZerofill()); - const code = object.getSectionContents(source_sect); - const code_len = @as(usize, @intCast(atom.size)); - return code[0..code_len]; - }; - const source_sect = object.getSourceSection(source_sym.n_sect - 1); - assert(!source_sect.isZerofill()); - const code = object.getSectionContents(source_sect); - const offset = @as(usize, @intCast(source_sym.n_value - source_sect.addr)); - const code_len = @as(usize, @intCast(atom.size)); - return code[offset..][0..code_len]; -} - -pub fn getAtomRelocs(zld: *Zld, atom_index: AtomIndex) []const macho.relocation_info { - const atom = zld.getAtom(atom_index); - assert(atom.getFile() != null); // Synthetic atom shouldn't need to unique for relocs. - const object = zld.objects.items[atom.getFile().?]; - const cache = object.relocs_lookup[atom.sym_index]; - - const source_sect_id = if (object.getSourceSymbol(atom.sym_index)) |source_sym| blk: { - break :blk source_sym.n_sect - 1; - } else blk: { - // If there was no matching symbol present in the source symtab, this means - // we are dealing with either an entire section, or part of it, but also - // starting at the beginning. - const nbase = @as(u32, @intCast(object.in_symtab.?.len)); - const sect_id = @as(u8, @intCast(atom.sym_index - nbase)); - break :blk sect_id; - }; - const source_sect = object.getSourceSection(source_sect_id); - assert(!source_sect.isZerofill()); - const relocs = object.getRelocs(source_sect_id); - return relocs[cache.start..][0..cache.len]; -} - -pub fn relocRequiresGot(zld: *Zld, rel: macho.relocation_info) bool { - switch (zld.options.target.cpu.arch) { - .aarch64 => switch (@as(macho.reloc_type_arm64, @enumFromInt(rel.r_type))) { - .ARM64_RELOC_GOT_LOAD_PAGE21, - .ARM64_RELOC_GOT_LOAD_PAGEOFF12, - .ARM64_RELOC_POINTER_TO_GOT, - => return true, - else => return false, - }, - .x86_64 => switch (@as(macho.reloc_type_x86_64, @enumFromInt(rel.r_type))) { - .X86_64_RELOC_GOT, - .X86_64_RELOC_GOT_LOAD, - => return true, - else => return false, - }, - else => unreachable, - } -} diff --git a/src/link/MachO/dead_strip.zig b/src/link/MachO/dead_strip.zig index b2031590ed..21be34a214 100644 --- a/src/link/MachO/dead_strip.zig +++ b/src/link/MachO/dead_strip.zig @@ -10,7 +10,7 @@ const mem = std.mem; const Allocator = mem.Allocator; const AtomIndex = @import("zld.zig").AtomIndex; -const Atom = @import("ZldAtom.zig"); +const Atom = @import("Atom.zig"); const MachO = @import("../MachO.zig"); const SymbolWithLoc = MachO.SymbolWithLoc; const SymbolResolver = MachO.SymbolResolver; diff --git a/src/link/MachO/eh_frame.zig b/src/link/MachO/eh_frame.zig index 6ef7a79977..efbb07ac05 100644 --- a/src/link/MachO/eh_frame.zig +++ b/src/link/MachO/eh_frame.zig @@ -8,7 +8,7 @@ const log = std.log.scoped(.eh_frame); const Allocator = mem.Allocator; const AtomIndex = @import("zld.zig").AtomIndex; -const Atom = @import("ZldAtom.zig"); +const Atom = @import("Atom.zig"); const MachO = @import("../MachO.zig"); const Relocation = @import("Relocation.zig"); const SymbolWithLoc = MachO.SymbolWithLoc; diff --git a/src/link/MachO/thunks.zig b/src/link/MachO/thunks.zig index e5ef305077..b478fb4131 100644 --- a/src/link/MachO/thunks.zig +++ b/src/link/MachO/thunks.zig @@ -15,7 +15,7 @@ const mem = std.mem; const aarch64 = @import("../../arch/aarch64/bits.zig"); const Allocator = mem.Allocator; -const Atom = @import("ZldAtom.zig"); +const Atom = @import("Atom.zig"); const AtomIndex = @import("zld.zig").AtomIndex; const MachO = @import("../MachO.zig"); const Relocation = @import("Relocation.zig"); diff --git a/src/link/MachO/zld.zig b/src/link/MachO/zld.zig index d82e80a20a..67620711d3 100644 --- a/src/link/MachO/zld.zig +++ b/src/link/MachO/zld.zig @@ -21,7 +21,7 @@ const trace = @import("../../tracy.zig").trace; const Allocator = mem.Allocator; const Archive = @import("Archive.zig"); -const Atom = @import("ZldAtom.zig"); +const Atom = @import("Atom.zig"); const Cache = std.Build.Cache; const CodeSignature = @import("CodeSignature.zig"); const Compilation = @import("../../Compilation.zig"); @@ -247,7 +247,16 @@ pub const Zld = struct { const gpa = self.gpa; const index = @as(AtomIndex, @intCast(self.atoms.items.len)); const atom = try self.atoms.addOne(gpa); - atom.* = Atom.empty; + atom.* = .{ + .sym_index = 0, + .inner_sym_index = 0, + .inner_nsyms_trailing = 0, + .file = 0, + .size = 0, + .alignment = 0, + .prev_index = null, + .next_index = null, + }; atom.sym_index = sym_index; atom.size = size; atom.alignment = alignment; @@ -3169,6 +3178,14 @@ pub fn linkWithZld(macho_file: *MachO, comp: *Compilation, prog_node: *std.Progr }; defer zld.deinit(); + // Index 0 is always a null symbol. + try zld.locals.append(gpa, .{ + .n_strx = 0, + .n_type = 0, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }); try zld.strtab.buffer.append(gpa, 0); // Positional arguments to the linker such as object files and static archives. From 837114f019a5ba2695165030790fb162ca199151 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Tue, 22 Aug 2023 22:00:34 +0200 Subject: [PATCH 09/57] macho: change dyld_private_sym_index to dyld_private_atom_index in zld --- src/link/MachO/zld.zig | 28 +++++++++------------------- 1 file changed, 9 insertions(+), 19 deletions(-) diff --git a/src/link/MachO/zld.zig b/src/link/MachO/zld.zig index 67620711d3..e8d74695e0 100644 --- a/src/link/MachO/zld.zig +++ b/src/link/MachO/zld.zig @@ -73,7 +73,7 @@ pub const Zld = struct { mh_execute_header_index: ?u32 = null, dso_handle_index: ?u32 = null, dyld_stub_binder_index: ?u32 = null, - dyld_private_sym_index: ?u32 = null, + dyld_private_atom_index: ?Atom.Index = null, stub_helper_preamble_sym_index: ?u32 = null, strtab: StringTable(.strtab) = .{}, @@ -324,8 +324,6 @@ pub const Zld = struct { } fn createDyldPrivateAtom(self: *Zld) !void { - if (self.dyld_stub_binder_index == null) return; - const sym_index = try self.allocateSymbol(); const atom_index = try self.createEmptyAtom(sym_index, @sizeOf(u64), 3); const sym = self.getSymbolPtr(.{ .sym_index = sym_index }); @@ -333,8 +331,7 @@ pub const Zld = struct { const sect_id = self.getSectionByName("__DATA", "__data") orelse try self.initSection("__DATA", "__data", .{}); sym.n_sect = sect_id + 1; - - self.dyld_private_sym_index = sym_index; + self.dyld_private_atom_index = atom_index; self.addAtomToSection(atom_index); } @@ -378,7 +375,8 @@ pub const Zld = struct { break :blk sym.n_value; }; const dyld_private_addr = blk: { - const sym = self.getSymbol(.{ .sym_index = self.dyld_private_sym_index.? }); + const atom = self.getAtom(self.dyld_private_atom_index.?); + const sym = self.getSymbol(atom.getSymbolWithLoc()); break :blk sym.n_value; }; const dyld_stub_binder_got_addr = blk: { @@ -1054,13 +1052,9 @@ pub const Zld = struct { const offset = buffer.items.len; // TODO: move writing synthetic sections into a separate function - if (atom.getFile() == null) outer: { - if (self.dyld_private_sym_index) |sym_index| { - if (atom.sym_index == sym_index) { - buffer.appendSliceAssumeCapacity(&[_]u8{0} ** @sizeOf(u64)); - break :outer; - } - } + if (atom_index == self.dyld_private_atom_index.?) { + buffer.appendSliceAssumeCapacity(&[_]u8{0} ** @sizeOf(u64)); + } else if (atom.getFile() == null) outer: { switch (header.type()) { macho.S_NON_LAZY_SYMBOL_POINTERS => { try self.writeGotPointer(count, buffer.writer()); @@ -1544,9 +1538,7 @@ pub const Zld = struct { const sym = self.getSymbol(atom.getSymbolWithLoc()); const should_rebase = blk: { - if (self.dyld_private_sym_index) |sym_index| { - if (atom.getFile() == null and atom.sym_index == sym_index) break :blk false; - } + if (atom_index == self.dyld_private_atom_index.?) break :blk false; break :blk !sym.undf(); }; @@ -1684,9 +1676,7 @@ pub const Zld = struct { log.debug(" ATOM({d}, %{d}, '{s}')", .{ atom_index, atom.sym_index, self.getSymbolName(atom.getSymbolWithLoc()) }); const should_bind = blk: { - if (self.dyld_private_sym_index) |sym_index| { - if (atom.getFile() == null and atom.sym_index == sym_index) break :blk false; - } + if (atom_index == self.dyld_private_atom_index.?) break :blk false; break :blk true; }; From 04e93dd26572e8d1d69c0f714a6eb06b9435f771 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 24 Aug 2023 10:34:59 +0200 Subject: [PATCH 10/57] macho: use TableSection for GOT entries in zld driver --- src/link/MachO.zig | 48 ++++--- src/link/MachO/Atom.zig | 37 ++---- src/link/MachO/UnwindInfo.zig | 8 +- src/link/MachO/eh_frame.zig | 8 +- src/link/MachO/thunks.zig | 5 +- src/link/MachO/zld.zig | 232 +++++++++++++++------------------- 6 files changed, 154 insertions(+), 184 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index f0db232e4b..d4899aa6b4 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -3222,18 +3222,24 @@ fn writeLinkeditSegmentData(self: *MachO) !void { seg.vmsize = mem.alignForward(u64, seg.filesize, page_size); } -fn collectRebaseDataFromTableSection(self: *MachO, sect_id: u8, rebase: *Rebase, table: anytype) !void { - const header = self.sections.items(.header)[sect_id]; - const segment_index = self.sections.items(.segment_index)[sect_id]; - const segment = self.segments.items[segment_index]; +pub fn collectRebaseDataFromTableSection( + gpa: Allocator, + ctx: anytype, + sect_id: u8, + rebase: *Rebase, + table: anytype, +) !void { + const header = ctx.sections.items(.header)[sect_id]; + const segment_index = ctx.sections.items(.segment_index)[sect_id]; + const segment = ctx.segments.items[segment_index]; const base_offset = header.addr - segment.vmaddr; - const is_got = if (self.got_section_index) |index| index == sect_id else false; + const is_got = if (ctx.got_section_index) |index| index == sect_id else false; - try rebase.entries.ensureUnusedCapacity(self.base.allocator, table.entries.items.len); + try rebase.entries.ensureUnusedCapacity(gpa, table.entries.items.len); for (table.entries.items, 0..) |entry, i| { if (!table.lookup.contains(entry)) continue; - const sym = self.getSymbol(entry); + const sym = ctx.getSymbol(entry); if (is_got and sym.undf()) continue; const offset = i * @sizeOf(u64); log.debug(" | rebase at {x}", .{base_offset + offset}); @@ -3271,28 +3277,34 @@ fn collectRebaseData(self: *MachO, rebase: *Rebase) !void { } } - try self.collectRebaseDataFromTableSection(self.got_section_index.?, rebase, self.got_table); - try self.collectRebaseDataFromTableSection(self.la_symbol_ptr_section_index.?, rebase, self.stub_table); + try collectRebaseDataFromTableSection(gpa, self, self.got_section_index.?, rebase, self.got_table); + try collectRebaseDataFromTableSection(gpa, self, self.la_symbol_ptr_section_index.?, rebase, self.stub_table); try rebase.finalize(gpa); } -fn collectBindDataFromTableSection(self: *MachO, sect_id: u8, bind: anytype, table: anytype) !void { - const header = self.sections.items(.header)[sect_id]; - const segment_index = self.sections.items(.segment_index)[sect_id]; - const segment = self.segments.items[segment_index]; +pub fn collectBindDataFromTableSection( + gpa: Allocator, + ctx: anytype, + sect_id: u8, + bind: anytype, + table: anytype, +) !void { + const header = ctx.sections.items(.header)[sect_id]; + const segment_index = ctx.sections.items(.segment_index)[sect_id]; + const segment = ctx.segments.items[segment_index]; const base_offset = header.addr - segment.vmaddr; - try bind.entries.ensureUnusedCapacity(self.base.allocator, table.entries.items.len); + try bind.entries.ensureUnusedCapacity(gpa, table.entries.items.len); for (table.entries.items, 0..) |entry, i| { if (!table.lookup.contains(entry)) continue; - const bind_sym = self.getSymbol(entry); + const bind_sym = ctx.getSymbol(entry); if (!bind_sym.undf()) continue; const offset = i * @sizeOf(u64); log.debug(" | bind at {x}, import('{s}') in dylib({d})", .{ base_offset + offset, - self.getSymbolName(entry), + ctx.getSymbolName(entry), @divTrunc(@as(i16, @bitCast(bind_sym.n_desc)), macho.N_SYMBOL_RESOLVER), }); if (bind_sym.weakRef()) { @@ -3349,12 +3361,12 @@ fn collectBindData(self: *MachO, bind: anytype, raw_bindings: anytype) !void { } // Gather GOT pointers - try self.collectBindDataFromTableSection(self.got_section_index.?, bind, self.got_table); + try collectBindDataFromTableSection(gpa, self, self.got_section_index.?, bind, self.got_table); try bind.finalize(gpa, self); } fn collectLazyBindData(self: *MachO, bind: anytype) !void { - try self.collectBindDataFromTableSection(self.la_symbol_ptr_section_index.?, bind, self.stub_table); + try collectBindDataFromTableSection(self.base.allocator, self, self.la_symbol_ptr_section_index.?, bind, self.stub_table); try bind.finalize(self.base.allocator, self); } diff --git a/src/link/MachO/Atom.zig b/src/link/MachO/Atom.zig index 73d90eb532..8b6c7638c3 100644 --- a/src/link/MachO/Atom.zig +++ b/src/link/MachO/Atom.zig @@ -358,10 +358,7 @@ pub fn parseRelocTarget(zld: *Zld, ctx: struct { return target; } -pub fn getRelocTargetAtomIndex(zld: *Zld, target: SymbolWithLoc, is_via_got: bool) ?Index { - if (is_via_got) { - return zld.getGotAtomIndexForSymbol(target).?; // panic means fatal error - } +pub fn getRelocTargetAtomIndex(zld: *Zld, target: SymbolWithLoc) ?Index { if (zld.getStubsAtomIndexForSymbol(target)) |stubs_atom| return stubs_atom; if (zld.getTlvPtrAtomIndexForSymbol(target)) |tlv_ptr_atom| return tlv_ptr_atom; @@ -411,7 +408,7 @@ fn scanAtomRelocsArm64(zld: *Zld, atom_index: Index, relocs: []align(1) const ma .ARM64_RELOC_POINTER_TO_GOT, => { // TODO rewrite relocation - try addGotEntry(zld, target); + try zld.addGotEntry(target); }, .ARM64_RELOC_TLVP_LOAD_PAGE21, .ARM64_RELOC_TLVP_LOAD_PAGEOFF12, @@ -454,7 +451,7 @@ fn scanAtomRelocsX86(zld: *Zld, atom_index: Index, relocs: []align(1) const mach }, .X86_64_RELOC_GOT, .X86_64_RELOC_GOT_LOAD => { // TODO rewrite relocation - try addGotEntry(zld, target); + try zld.addGotEntry(target); }, .X86_64_RELOC_TLV => { try addTlvPtrEntry(zld, target); @@ -479,18 +476,6 @@ fn addTlvPtrEntry(zld: *Zld, target: SymbolWithLoc) !void { try zld.tlv_ptr_table.putNoClobber(gpa, target, tlv_ptr_index); } -pub fn addGotEntry(zld: *Zld, target: SymbolWithLoc) !void { - if (zld.got_table.contains(target)) return; - const gpa = zld.gpa; - const atom_index = try zld.createGotAtom(); - const got_index = @as(u32, @intCast(zld.got_entries.items.len)); - try zld.got_entries.append(gpa, .{ - .target = target, - .atom_index = atom_index, - }); - try zld.got_table.putNoClobber(gpa, target, got_index); -} - pub fn addStub(zld: *Zld, target: SymbolWithLoc) !void { const target_sym = zld.getSymbol(target); if (!target_sym.undf()) return; @@ -532,8 +517,8 @@ pub fn resolveRelocs( }; } -pub fn getRelocTargetAddress(zld: *Zld, target: SymbolWithLoc, is_via_got: bool, is_tlv: bool) !u64 { - const target_atom_index = getRelocTargetAtomIndex(zld, target, is_via_got) orelse { +pub fn getRelocTargetAddress(zld: *Zld, target: SymbolWithLoc, is_tlv: bool) !u64 { + const target_atom_index = getRelocTargetAtomIndex(zld, target) orelse { // If there is no atom for target, we still need to check for special, atom-less // symbols such as `___dso_handle`. const target_name = zld.getSymbolName(target); @@ -656,7 +641,10 @@ fn resolveRelocsArm64( const header = zld.sections.items(.header)[source_sym.n_sect - 1]; break :is_tlv header.type() == macho.S_THREAD_LOCAL_VARIABLES; }; - const target_addr = try getRelocTargetAddress(zld, target, is_via_got, is_tlv); + const target_addr = if (is_via_got) + zld.getGotEntryAddress(target).? + else + try getRelocTargetAddress(zld, target, is_tlv); log.debug(" | source_addr = 0x{x}", .{source_addr}); @@ -670,7 +658,7 @@ fn resolveRelocsArm64( zld.getSymbolName(atom.getSymbolWithLoc()), atom.getFile(), zld.getSymbolName(target), - zld.getAtom(getRelocTargetAtomIndex(zld, target, is_via_got).?).getFile(), + zld.getAtom(getRelocTargetAtomIndex(zld, target).?).getFile(), }); const displacement = if (Relocation.calcPcRelativeDisplacementArm64( @@ -953,7 +941,10 @@ fn resolveRelocsX86( log.debug(" | source_addr = 0x{x}", .{source_addr}); - const target_addr = try getRelocTargetAddress(zld, target, is_via_got, is_tlv); + const target_addr = if (is_via_got) + zld.getGotEntryAddress(target).? + else + try getRelocTargetAddress(zld, target, is_tlv); switch (rel_type) { .X86_64_RELOC_BRANCH => { diff --git a/src/link/MachO/UnwindInfo.zig b/src/link/MachO/UnwindInfo.zig index 4f2ca33638..be4a071a94 100644 --- a/src/link/MachO/UnwindInfo.zig +++ b/src/link/MachO/UnwindInfo.zig @@ -226,7 +226,7 @@ pub fn scanRelocs(zld: *Zld) !void { .code = mem.asBytes(&record), .base_offset = @as(i32, @intCast(record_id * @sizeOf(macho.compact_unwind_entry))), }); - try Atom.addGotEntry(zld, target); + try zld.addGotEntry(target); } } } @@ -585,10 +585,8 @@ pub fn write(info: *UnwindInfo, zld: *Zld) !void { log.debug("Personalities:", .{}); for (info.personalities[0..info.personalities_count], 0..) |target, i| { - const atom_index = zld.getGotAtomIndexForSymbol(target).?; - const atom = zld.getAtom(atom_index); - const sym = zld.getSymbol(atom.getSymbolWithLoc()); - personalities[i] = @as(u32, @intCast(sym.n_value - seg.vmaddr)); + const addr = zld.getGotEntryAddress(target).?; + personalities[i] = @as(u32, @intCast(addr - seg.vmaddr)); log.debug(" {d}: 0x{x} ({s})", .{ i, personalities[i], zld.getSymbolName(target) }); } diff --git a/src/link/MachO/eh_frame.zig b/src/link/MachO/eh_frame.zig index efbb07ac05..83871b3836 100644 --- a/src/link/MachO/eh_frame.zig +++ b/src/link/MachO/eh_frame.zig @@ -267,7 +267,7 @@ pub fn EhFrameRecord(comptime is_mutable: bool) type { source_offset: u32, ) !void { if (rec.getPersonalityPointerReloc(zld, object_id, source_offset)) |target| { - try Atom.addGotEntry(zld, target); + try zld.addGotEntry(target); } } @@ -357,14 +357,14 @@ pub fn EhFrameRecord(comptime is_mutable: bool) type { // Address of the __eh_frame in the source object file }, .ARM64_RELOC_POINTER_TO_GOT => { - const target_addr = try Atom.getRelocTargetAddress(zld, target, true, false); + const target_addr = zld.getGotEntryAddress(target).?; const result = math.cast(i32, @as(i64, @intCast(target_addr)) - @as(i64, @intCast(source_addr))) orelse return error.Overflow; mem.writeIntLittle(i32, rec.data[rel_offset..][0..4], result); }, .ARM64_RELOC_UNSIGNED => { assert(rel.r_extern == 1); - const target_addr = try Atom.getRelocTargetAddress(zld, target, false, false); + const target_addr = try Atom.getRelocTargetAddress(zld, target, false); const result = @as(i64, @intCast(target_addr)) - @as(i64, @intCast(source_addr)); mem.writeIntLittle(i64, rec.data[rel_offset..][0..8], @as(i64, @intCast(result))); }, @@ -375,7 +375,7 @@ pub fn EhFrameRecord(comptime is_mutable: bool) type { const rel_type = @as(macho.reloc_type_x86_64, @enumFromInt(rel.r_type)); switch (rel_type) { .X86_64_RELOC_GOT => { - const target_addr = try Atom.getRelocTargetAddress(zld, target, true, false); + const target_addr = zld.getGotEntryAddress(target).?; const addend = mem.readIntLittle(i32, rec.data[rel_offset..][0..4]); const adjusted_target_addr = @as(u64, @intCast(@as(i64, @intCast(target_addr)) + addend)); const disp = try Relocation.calcPcRelativeDisplacementX86(source_addr, adjusted_target_addr, 0); diff --git a/src/link/MachO/thunks.zig b/src/link/MachO/thunks.zig index b478fb4131..0e9e3ba48b 100644 --- a/src/link/MachO/thunks.zig +++ b/src/link/MachO/thunks.zig @@ -318,7 +318,10 @@ fn isReachable( const source_addr = source_sym.n_value + @as(u32, @intCast(rel.r_address - base_offset)); const is_via_got = Atom.relocRequiresGot(zld, rel); - const target_addr = Atom.getRelocTargetAddress(zld, target, is_via_got, false) catch unreachable; + const target_addr = if (is_via_got) + zld.getGotEntryAddress(target).? + else + Atom.getRelocTargetAddress(zld, target, false) catch unreachable; _ = Relocation.calcPcRelativeDisplacementArm64(source_addr, target_addr) catch return false; diff --git a/src/link/MachO/zld.zig b/src/link/MachO/zld.zig index e8d74695e0..4146d0cd07 100644 --- a/src/link/MachO/zld.zig +++ b/src/link/MachO/zld.zig @@ -35,6 +35,7 @@ const Section = MachO.Section; const StringTable = @import("../strtab.zig").StringTable; const SymbolWithLoc = MachO.SymbolWithLoc; const SymbolResolver = MachO.SymbolResolver; +const TableSection = @import("../table_section.zig").TableSection; const Trie = @import("Trie.zig"); const UnwindInfo = @import("UnwindInfo.zig"); @@ -66,6 +67,8 @@ pub const Zld = struct { segments: std.ArrayListUnmanaged(macho.segment_command_64) = .{}, sections: std.MultiArrayList(Section) = .{}, + got_section_index: ?u8 = null, + locals: std.ArrayListUnmanaged(macho.nlist_64) = .{}, globals: std.ArrayListUnmanaged(SymbolWithLoc) = .{}, @@ -81,8 +84,7 @@ pub const Zld = struct { tlv_ptr_entries: std.ArrayListUnmanaged(IndirectPointer) = .{}, tlv_ptr_table: std.AutoHashMapUnmanaged(SymbolWithLoc, u32) = .{}, - got_entries: std.ArrayListUnmanaged(IndirectPointer) = .{}, - got_table: std.AutoHashMapUnmanaged(SymbolWithLoc, u32) = .{}, + got_table: TableSection(SymbolWithLoc) = .{}, stubs: std.ArrayListUnmanaged(IndirectPointer) = .{}, stubs_table: std.AutoHashMapUnmanaged(SymbolWithLoc, u32) = .{}, @@ -266,32 +268,6 @@ pub const Zld = struct { return index; } - pub fn createGotAtom(self: *Zld) !AtomIndex { - const sym_index = try self.allocateSymbol(); - const atom_index = try self.createEmptyAtom(sym_index, @sizeOf(u64), 3); - const sym = self.getSymbolPtr(.{ .sym_index = sym_index }); - sym.n_type = macho.N_SECT; - - const sect_id = self.getSectionByName("__DATA_CONST", "__got") orelse - try self.initSection("__DATA_CONST", "__got", .{ - .flags = macho.S_NON_LAZY_SYMBOL_POINTERS, - }); - sym.n_sect = sect_id + 1; - - self.addAtomToSection(atom_index); - - return atom_index; - } - - fn writeGotPointer(self: *Zld, got_index: u32, writer: anytype) !void { - const target_addr = blk: { - const entry = self.got_entries.items[got_index]; - const sym = entry.getTargetSymbol(self); - break :blk sym.n_value; - }; - try writer.writeIntLittle(u64, target_addr); - } - pub fn createTlvPtrAtom(self: *Zld) !AtomIndex { const sym_index = try self.allocateSymbol(); const atom_index = try self.createEmptyAtom(sym_index, @sizeOf(u64), 3); @@ -311,16 +287,9 @@ pub const Zld = struct { } fn createDyldStubBinderGotAtom(self: *Zld) !void { - const gpa = self.gpa; const global_index = self.dyld_stub_binder_index orelse return; const target = self.globals.items[global_index]; - const atom_index = try self.createGotAtom(); - const got_index = @as(u32, @intCast(self.got_entries.items.len)); - try self.got_entries.append(gpa, .{ - .target = target, - .atom_index = atom_index, - }); - try self.got_table.putNoClobber(gpa, target, got_index); + try self.addGotEntry(target); } fn createDyldPrivateAtom(self: *Zld) !void { @@ -381,9 +350,7 @@ pub const Zld = struct { }; const dyld_stub_binder_got_addr = blk: { const sym_loc = self.globals.items[self.dyld_stub_binder_index.?]; - const index = self.got_table.get(sym_loc).?; - const entry = self.got_entries.items[index]; - break :blk entry.getAtomSymbol(self).n_value; + break :blk self.getGotEntryAddress(sym_loc).?; }; try stub_helpers.writeStubHelperPreambleCode(.{ .cpu_arch = cpu_arch, @@ -876,7 +843,6 @@ pub const Zld = struct { self.tlv_ptr_entries.deinit(gpa); self.tlv_ptr_table.deinit(gpa); - self.got_entries.deinit(gpa); self.got_table.deinit(gpa); self.stubs.deinit(gpa); self.stubs_table.deinit(gpa); @@ -993,6 +959,16 @@ pub const Zld = struct { return global_index; } + pub fn addGotEntry(zld: *Zld, target: SymbolWithLoc) !void { + if (zld.got_table.lookup.contains(target)) return; + _ = try zld.got_table.allocateEntry(zld.gpa, target); + if (zld.got_section_index == null) { + zld.got_section_index = try zld.initSection("__DATA_CONST", "__got", .{ + .flags = macho.S_NON_LAZY_SYMBOL_POINTERS, + }); + } + } + fn allocateSpecialSymbols(self: *Zld) !void { for (&[_]?u32{ self.dso_handle_index, @@ -1056,9 +1032,7 @@ pub const Zld = struct { buffer.appendSliceAssumeCapacity(&[_]u8{0} ** @sizeOf(u64)); } else if (atom.getFile() == null) outer: { switch (header.type()) { - macho.S_NON_LAZY_SYMBOL_POINTERS => { - try self.writeGotPointer(count, buffer.writer()); - }, + macho.S_NON_LAZY_SYMBOL_POINTERS => unreachable, macho.S_LAZY_SYMBOL_POINTERS => { try self.writeLazyPointer(count, buffer.writer()); }, @@ -1113,41 +1087,70 @@ pub const Zld = struct { } } - fn pruneAndSortSections(self: *Zld) !void { - const gpa = self.gpa; + fn writeGotEntries(self: *Zld) !void { + const sect_id = self.got_section_index orelse return; + const header = self.sections.items(.header)[sect_id]; + var buffer = try std.ArrayList(u8).initCapacity(self.gpa, header.size); + defer buffer.deinit(); + for (self.got_table.entries.items) |entry| { + const sym = self.getSymbol(entry); + buffer.writer().writeIntLittle(u64, sym.n_value) catch unreachable; + } + log.debug("writing .got contents at file offset 0x{x}", .{header.offset}); + try self.file.pwriteAll(buffer.items, header.offset); + } - const SortSection = struct { - pub fn lessThan(_: void, lhs: Section, rhs: Section) bool { - return getSectionPrecedence(lhs.header) < getSectionPrecedence(rhs.header); + fn pruneAndSortSections(self: *Zld) !void { + const Entry = struct { + index: u8, + + pub fn lessThan(zld: *Zld, lhs: @This(), rhs: @This()) bool { + const lhs_header = zld.sections.items(.header)[lhs.index]; + const rhs_header = zld.sections.items(.header)[rhs.index]; + return getSectionPrecedence(lhs_header) < getSectionPrecedence(rhs_header); } }; - const slice = self.sections.slice(); - var sections = std.ArrayList(Section).init(gpa); - defer sections.deinit(); - try sections.ensureTotalCapacity(slice.len); + const gpa = self.gpa; - { - var i: u8 = 0; - while (i < slice.len) : (i += 1) { - const section = self.sections.get(i); - if (section.header.size == 0) { - log.debug("pruning section {s},{s} {?d}", .{ - section.header.segName(), - section.header.sectName(), - section.first_atom_index, - }); - continue; - } - sections.appendAssumeCapacity(section); + var entries = try std.ArrayList(Entry).initCapacity(gpa, self.sections.slice().len); + defer entries.deinit(); + + for (0..self.sections.slice().len) |index| { + const section = self.sections.get(index); + if (section.header.size == 0) { + log.debug("pruning section {s},{s} {?d}", .{ + section.header.segName(), + section.header.sectName(), + section.first_atom_index, + }); + continue; } + entries.appendAssumeCapacity(.{ .index = @intCast(index) }); } - mem.sort(Section, sections.items, {}, SortSection.lessThan); + mem.sort(Entry, entries.items, self, Entry.lessThan); - self.sections.shrinkRetainingCapacity(0); - for (sections.items) |out| { - self.sections.appendAssumeCapacity(out); + var slice = self.sections.toOwnedSlice(); + defer slice.deinit(gpa); + + const backlinks = try gpa.alloc(u8, slice.len); + defer gpa.free(backlinks); + for (entries.items, 0..) |entry, i| { + backlinks[entry.index] = @as(u8, @intCast(i)); + } + + try self.sections.ensureTotalCapacity(gpa, entries.items.len); + for (entries.items) |entry| { + self.sections.appendAssumeCapacity(slice.get(entry.index)); + } + + for (&[_]*?u8{ + &self.got_section_index, + }) |maybe_index| { + if (maybe_index.*) |*index| { + index.* = backlinks[index.*]; + } } } @@ -1227,6 +1230,12 @@ pub const Zld = struct { } else break; } } + + if (self.got_section_index) |sect_id| { + const header = &self.sections.items(.header)[sect_id]; + header.size = self.got_table.count() * @sizeOf(u64); + header.@"align" = 3; + } } fn allocateSegments(self: *Zld) !void { @@ -1448,40 +1457,12 @@ pub const Zld = struct { seg.vmsize = mem.alignForward(u64, seg.filesize, MachO.getPageSize(self.options.target.cpu.arch)); } - fn collectRebaseDataFromContainer( - self: *Zld, - sect_id: u8, - rebase: *Rebase, - container: anytype, - ) !void { - const slice = self.sections.slice(); - const segment_index = slice.items(.segment_index)[sect_id]; - const seg = self.getSegment(sect_id); - - try rebase.entries.ensureUnusedCapacity(self.gpa, container.items.len); - - for (container.items) |entry| { - const target_sym = entry.getTargetSymbol(self); - if (target_sym.undf()) continue; - - const atom_sym = entry.getAtomSymbol(self); - const base_offset = atom_sym.n_value - seg.vmaddr; - - log.debug(" | rebase at {x}", .{base_offset}); - - rebase.entries.appendAssumeCapacity(.{ - .offset = base_offset, - .segment_id = segment_index, - }); - } - } - fn collectRebaseData(self: *Zld, rebase: *Rebase) !void { log.debug("collecting rebase data", .{}); // First, unpack GOT entries - if (self.getSectionByName("__DATA_CONST", "__got")) |sect_id| { - try self.collectRebaseDataFromContainer(sect_id, rebase, self.got_entries); + if (self.got_section_index) |sect_id| { + try MachO.collectRebaseDataFromTableSection(self.gpa, self, sect_id, rebase, self.got_table); } const slice = self.sections.slice(); @@ -1543,7 +1524,11 @@ pub const Zld = struct { }; if (should_rebase) { - log.debug(" ATOM({d}, %{d}, '{s}')", .{ atom_index, atom.sym_index, self.getSymbolName(atom.getSymbolWithLoc()) }); + log.debug(" ATOM({d}, %{d}, '{s}')", .{ + atom_index, + atom.sym_index, + self.getSymbolName(atom.getSymbolWithLoc()), + }); const code = Atom.getAtomCode(self, atom_index); const relocs = Atom.getAtomRelocs(self, atom_index); @@ -1639,8 +1624,8 @@ pub const Zld = struct { log.debug("collecting bind data", .{}); // First, unpack GOT section - if (self.getSectionByName("__DATA_CONST", "__got")) |sect_id| { - try self.collectBindDataFromContainer(sect_id, bind, self.got_entries); + if (self.got_section_index) |sect_id| { + try MachO.collectBindDataFromTableSection(self.gpa, self, sect_id, bind, self.got_table); } // Next, unpack TLV pointers section @@ -2237,7 +2222,7 @@ pub const Zld = struct { fn writeDysymtab(self: *Zld, ctx: SymtabCtx) !void { const gpa = self.gpa; const nstubs = @as(u32, @intCast(self.stubs.items.len)); - const ngot_entries = @as(u32, @intCast(self.got_entries.items.len)); + const ngot_entries = @as(u32, @intCast(self.got_table.lookup.count())); const nindirectsyms = nstubs * 2 + ngot_entries; const iextdefsym = ctx.nlocalsym; const iundefsym = iextdefsym + ctx.nextdefsym; @@ -2266,13 +2251,14 @@ pub const Zld = struct { } } - if (self.getSectionByName("__DATA_CONST", "__got")) |sect_id| { + if (self.got_section_index) |sect_id| { const got = &self.sections.items(.header)[sect_id]; got.reserved1 = nstubs; - for (self.got_entries.items) |entry| { - const target_sym = entry.getTargetSymbol(self); + for (self.got_table.entries.items) |entry| { + if (!self.got_table.lookup.contains(entry)) continue; + const target_sym = self.getSymbol(entry); if (target_sym.undf()) { - try writer.writeIntLittle(u32, iundefsym + ctx.imports_table.get(entry.target).?); + try writer.writeIntLittle(u32, iundefsym + ctx.imports_table.get(entry).?); } else { try writer.writeIntLittle(u32, macho.INDIRECT_SYMBOL_LOCAL); } @@ -2496,12 +2482,10 @@ pub const Zld = struct { } } - /// Returns GOT atom that references `sym_with_loc` if one exists. - /// Returns null otherwise. - pub fn getGotAtomIndexForSymbol(self: *Zld, sym_with_loc: SymbolWithLoc) ?AtomIndex { - const index = self.got_table.get(sym_with_loc) orelse return null; - const entry = self.got_entries.items[index]; - return entry.atom_index; + pub fn getGotEntryAddress(self: *Zld, sym_with_loc: SymbolWithLoc) ?u64 { + const index = self.got_table.lookup.get(sym_with_loc) orelse return null; + const header = self.sections.items(.header)[self.got_section_index.?]; + return header.addr + @sizeOf(u64) * index; } /// Returns stubs atom that references `sym_with_loc` if one exists. @@ -2849,26 +2833,7 @@ pub const Zld = struct { } scoped_log.debug("GOT entries:", .{}); - for (self.got_entries.items, 0..) |entry, i| { - const atom_sym = entry.getAtomSymbol(self); - const target_sym = entry.getTargetSymbol(self); - const target_sym_name = entry.getTargetSymbolName(self); - if (target_sym.undf()) { - scoped_log.debug(" {d}@{x} => import('{s}')", .{ - i, - atom_sym.n_value, - target_sym_name, - }); - } else { - scoped_log.debug(" {d}@{x} => local(%{d}) in object({?}) {s}", .{ - i, - atom_sym.n_value, - entry.target.sym_index, - entry.target.file, - logSymAttributes(target_sym, buf[0..4]), - }); - } - } + scoped_log.debug("{}", .{self.got_table}); scoped_log.debug("__thread_ptrs entries:", .{}); for (self.tlv_ptr_entries.items, 0..) |entry, i| { @@ -3470,6 +3435,7 @@ pub fn linkWithZld(macho_file: *MachO, comp: *Compilation, prog_node: *std.Progr } try zld.writeAtoms(); + try zld.writeGotEntries(); try eh_frame.write(&zld, &unwind_info); try unwind_info.write(&zld); try zld.writeLinkeditSegmentData(); From 5750620715bde214778546d2adc6db5fdfad5588 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 24 Aug 2023 13:49:03 +0200 Subject: [PATCH 11/57] macho: use TableSection for TLV pointer entries in zld driver --- src/link/MachO/Atom.zig | 63 ++++++++++--------- src/link/MachO/zld.zig | 135 ++++++++++++---------------------------- 2 files changed, 75 insertions(+), 123 deletions(-) diff --git a/src/link/MachO/Atom.zig b/src/link/MachO/Atom.zig index 8b6c7638c3..d62b23d6c7 100644 --- a/src/link/MachO/Atom.zig +++ b/src/link/MachO/Atom.zig @@ -360,7 +360,6 @@ pub fn parseRelocTarget(zld: *Zld, ctx: struct { pub fn getRelocTargetAtomIndex(zld: *Zld, target: SymbolWithLoc) ?Index { if (zld.getStubsAtomIndexForSymbol(target)) |stubs_atom| return stubs_atom; - if (zld.getTlvPtrAtomIndexForSymbol(target)) |tlv_ptr_atom| return tlv_ptr_atom; if (target.getFile() == null) { const target_sym_name = zld.getSymbolName(target); @@ -413,7 +412,8 @@ fn scanAtomRelocsArm64(zld: *Zld, atom_index: Index, relocs: []align(1) const ma .ARM64_RELOC_TLVP_LOAD_PAGE21, .ARM64_RELOC_TLVP_LOAD_PAGEOFF12, => { - try addTlvPtrEntry(zld, target); + const sym = zld.getSymbol(target); + if (sym.undf()) try zld.addTlvPtrEntry(target); }, else => {}, } @@ -454,28 +454,14 @@ fn scanAtomRelocsX86(zld: *Zld, atom_index: Index, relocs: []align(1) const mach try zld.addGotEntry(target); }, .X86_64_RELOC_TLV => { - try addTlvPtrEntry(zld, target); + const sym = zld.getSymbol(target); + if (sym.undf()) try zld.addTlvPtrEntry(target); }, else => {}, } } } -fn addTlvPtrEntry(zld: *Zld, target: SymbolWithLoc) !void { - const target_sym = zld.getSymbol(target); - if (!target_sym.undf()) return; - if (zld.tlv_ptr_table.contains(target)) return; - - const gpa = zld.gpa; - const atom_index = try zld.createTlvPtrAtom(); - const tlv_ptr_index = @as(u32, @intCast(zld.tlv_ptr_entries.items.len)); - try zld.tlv_ptr_entries.append(gpa, .{ - .target = target, - .atom_index = atom_index, - }); - try zld.tlv_ptr_table.putNoClobber(gpa, target, tlv_ptr_index); -} - pub fn addStub(zld: *Zld, target: SymbolWithLoc) !void { const target_sym = zld.getSymbol(target); if (!target_sym.undf()) return; @@ -641,10 +627,12 @@ fn resolveRelocsArm64( const header = zld.sections.items(.header)[source_sym.n_sect - 1]; break :is_tlv header.type() == macho.S_THREAD_LOCAL_VARIABLES; }; - const target_addr = if (is_via_got) - zld.getGotEntryAddress(target).? - else - try getRelocTargetAddress(zld, target, is_tlv); + const target_addr = blk: { + if (is_via_got) break :blk zld.getGotEntryAddress(target).?; + if (relocIsTlv(zld, rel) and zld.getSymbol(target).undf()) + break :blk zld.getTlvPtrEntryAddress(target).?; + break :blk try getRelocTargetAddress(zld, target, is_tlv); + }; log.debug(" | source_addr = 0x{x}", .{source_addr}); @@ -802,7 +790,7 @@ fn resolveRelocsArm64( } }; - var inst = if (zld.tlv_ptr_table.contains(target)) aarch64.Instruction{ + var inst = if (zld.tlv_ptr_table.lookup.contains(target)) aarch64.Instruction{ .load_store_register = .{ .rt = reg_info.rd, .rn = reg_info.rn, @@ -938,14 +926,15 @@ fn resolveRelocsX86( const header = zld.sections.items(.header)[source_sym.n_sect - 1]; break :is_tlv header.type() == macho.S_THREAD_LOCAL_VARIABLES; }; + const target_addr = blk: { + if (is_via_got) break :blk zld.getGotEntryAddress(target).?; + if (relocIsTlv(zld, rel) and zld.getSymbol(target).undf()) + break :blk zld.getTlvPtrEntryAddress(target).?; + break :blk try getRelocTargetAddress(zld, target, is_tlv); + }; log.debug(" | source_addr = 0x{x}", .{source_addr}); - const target_addr = if (is_via_got) - zld.getGotEntryAddress(target).? - else - try getRelocTargetAddress(zld, target, is_tlv); - switch (rel_type) { .X86_64_RELOC_BRANCH => { const addend = mem.readIntLittle(i32, atom_code[rel_offset..][0..4]); @@ -971,7 +960,7 @@ fn resolveRelocsX86( log.debug(" | target_addr = 0x{x}", .{adjusted_target_addr}); const disp = try Relocation.calcPcRelativeDisplacementX86(source_addr, adjusted_target_addr, 0); - if (zld.tlv_ptr_table.get(target) == null) { + if (zld.tlv_ptr_table.lookup.get(target) == null) { // We need to rewrite the opcode from movq to leaq. atom_code[rel_offset - 2] = 0x8d; } @@ -1112,3 +1101,19 @@ pub fn relocRequiresGot(zld: *Zld, rel: macho.relocation_info) bool { else => unreachable, } } + +pub fn relocIsTlv(zld: *Zld, rel: macho.relocation_info) bool { + switch (zld.options.target.cpu.arch) { + .aarch64 => switch (@as(macho.reloc_type_arm64, @enumFromInt(rel.r_type))) { + .ARM64_RELOC_TLVP_LOAD_PAGE21, + .ARM64_RELOC_TLVP_LOAD_PAGEOFF12, + => return true, + else => return false, + }, + .x86_64 => switch (@as(macho.reloc_type_x86_64, @enumFromInt(rel.r_type))) { + .X86_64_RELOC_TLV => return true, + else => return false, + }, + else => unreachable, + } +} diff --git a/src/link/MachO/zld.zig b/src/link/MachO/zld.zig index 4146d0cd07..c15f81c7e3 100644 --- a/src/link/MachO/zld.zig +++ b/src/link/MachO/zld.zig @@ -68,6 +68,7 @@ pub const Zld = struct { sections: std.MultiArrayList(Section) = .{}, got_section_index: ?u8 = null, + tlv_ptr_section_index: ?u8 = null, locals: std.ArrayListUnmanaged(macho.nlist_64) = .{}, globals: std.ArrayListUnmanaged(SymbolWithLoc) = .{}, @@ -81,9 +82,7 @@ pub const Zld = struct { strtab: StringTable(.strtab) = .{}, - tlv_ptr_entries: std.ArrayListUnmanaged(IndirectPointer) = .{}, - tlv_ptr_table: std.AutoHashMapUnmanaged(SymbolWithLoc, u32) = .{}, - + tlv_ptr_table: TableSection(SymbolWithLoc) = .{}, got_table: TableSection(SymbolWithLoc) = .{}, stubs: std.ArrayListUnmanaged(IndirectPointer) = .{}, @@ -268,24 +267,6 @@ pub const Zld = struct { return index; } - pub fn createTlvPtrAtom(self: *Zld) !AtomIndex { - const sym_index = try self.allocateSymbol(); - const atom_index = try self.createEmptyAtom(sym_index, @sizeOf(u64), 3); - const sym = self.getSymbolPtr(.{ .sym_index = sym_index }); - sym.n_type = macho.N_SECT; - - const sect_id = (try self.getOutputSection(.{ - .segname = makeStaticString("__DATA"), - .sectname = makeStaticString("__thread_ptrs"), - .flags = macho.S_THREAD_LOCAL_VARIABLE_POINTERS, - })).?; - sym.n_sect = sect_id + 1; - - self.addAtomToSection(atom_index); - - return atom_index; - } - fn createDyldStubBinderGotAtom(self: *Zld) !void { const global_index = self.dyld_stub_binder_index orelse return; const target = self.globals.items[global_index]; @@ -841,7 +822,6 @@ pub const Zld = struct { pub fn deinit(self: *Zld) void { const gpa = self.gpa; - self.tlv_ptr_entries.deinit(gpa); self.tlv_ptr_table.deinit(gpa); self.got_table.deinit(gpa); self.stubs.deinit(gpa); @@ -959,16 +939,26 @@ pub const Zld = struct { return global_index; } - pub fn addGotEntry(zld: *Zld, target: SymbolWithLoc) !void { - if (zld.got_table.lookup.contains(target)) return; - _ = try zld.got_table.allocateEntry(zld.gpa, target); - if (zld.got_section_index == null) { - zld.got_section_index = try zld.initSection("__DATA_CONST", "__got", .{ + pub fn addGotEntry(self: *Zld, target: SymbolWithLoc) !void { + if (self.got_table.lookup.contains(target)) return; + _ = try self.got_table.allocateEntry(self.gpa, target); + if (self.got_section_index == null) { + self.got_section_index = try self.initSection("__DATA_CONST", "__got", .{ .flags = macho.S_NON_LAZY_SYMBOL_POINTERS, }); } } + pub fn addTlvPtrEntry(self: *Zld, target: SymbolWithLoc) !void { + if (self.tlv_ptr_table.lookup.contains(target)) return; + _ = try self.tlv_ptr_table.allocateEntry(self.gpa, target); + if (self.tlv_ptr_section_index == null) { + self.tlv_ptr_section_index = try self.initSection("__DATA", "__thread_ptrs", .{ + .flags = macho.S_THREAD_LOCAL_VARIABLE_POINTERS, + }); + } + } + fn allocateSpecialSymbols(self: *Zld) !void { for (&[_]?u32{ self.dso_handle_index, @@ -1033,12 +1023,10 @@ pub const Zld = struct { } else if (atom.getFile() == null) outer: { switch (header.type()) { macho.S_NON_LAZY_SYMBOL_POINTERS => unreachable, + macho.S_THREAD_LOCAL_VARIABLE_POINTERS => unreachable, macho.S_LAZY_SYMBOL_POINTERS => { try self.writeLazyPointer(count, buffer.writer()); }, - macho.S_THREAD_LOCAL_VARIABLE_POINTERS => { - buffer.appendSliceAssumeCapacity(&[_]u8{0} ** @sizeOf(u64)); - }, else => { if (self.stub_helper_preamble_sym_index) |sym_index| { if (sym_index == atom.sym_index) { @@ -1087,12 +1075,11 @@ pub const Zld = struct { } } - fn writeGotEntries(self: *Zld) !void { - const sect_id = self.got_section_index orelse return; + fn writePointerEntries(self: *Zld, sect_id: u8, table: anytype) !void { const header = self.sections.items(.header)[sect_id]; var buffer = try std.ArrayList(u8).initCapacity(self.gpa, header.size); defer buffer.deinit(); - for (self.got_table.entries.items) |entry| { + for (table.entries.items) |entry| { const sym = self.getSymbol(entry); buffer.writer().writeIntLittle(u64, sym.n_value) catch unreachable; } @@ -1147,6 +1134,7 @@ pub const Zld = struct { for (&[_]*?u8{ &self.got_section_index, + &self.tlv_ptr_section_index, }) |maybe_index| { if (maybe_index.*) |*index| { index.* = backlinks[index.*]; @@ -1236,6 +1224,12 @@ pub const Zld = struct { header.size = self.got_table.count() * @sizeOf(u64); header.@"align" = 3; } + + if (self.tlv_ptr_section_index) |sect_id| { + const header = &self.sections.items(.header)[sect_id]; + header.size = self.tlv_ptr_table.count() * @sizeOf(u64); + header.@"align" = 3; + } } fn allocateSegments(self: *Zld) !void { @@ -1579,44 +1573,6 @@ pub const Zld = struct { try rebase.finalize(self.gpa); } - fn collectBindDataFromContainer( - self: *Zld, - sect_id: u8, - bind: *Bind, - container: anytype, - ) !void { - const slice = self.sections.slice(); - const segment_index = slice.items(.segment_index)[sect_id]; - const seg = self.getSegment(sect_id); - - try bind.entries.ensureUnusedCapacity(self.gpa, container.items.len); - - for (container.items) |entry| { - const bind_sym_name = entry.getTargetSymbolName(self); - const bind_sym = entry.getTargetSymbol(self); - if (bind_sym.sect()) continue; - - const sym = entry.getAtomSymbol(self); - const base_offset = sym.n_value - seg.vmaddr; - - const dylib_ordinal = @divTrunc(@as(i16, @bitCast(bind_sym.n_desc)), macho.N_SYMBOL_RESOLVER); - log.debug(" | bind at {x}, import('{s}') in dylib({d})", .{ - base_offset, - bind_sym_name, - dylib_ordinal, - }); - if (bind_sym.weakRef()) { - log.debug(" | marking as weak ref ", .{}); - } - bind.entries.appendAssumeCapacity(.{ - .target = entry.target, - .offset = base_offset, - .segment_id = segment_index, - .addend = 0, - }); - } - } - fn collectBindData( self: *Zld, bind: *Bind, @@ -1629,8 +1585,8 @@ pub const Zld = struct { } // Next, unpack TLV pointers section - if (self.getSectionByName("__DATA", "__thread_ptrs")) |sect_id| { - try self.collectBindDataFromContainer(sect_id, bind, self.tlv_ptr_entries); + if (self.tlv_ptr_section_index) |sect_id| { + try MachO.collectBindDataFromTableSection(self.gpa, self, sect_id, bind, self.tlv_ptr_table); } // Finally, unpack the rest. @@ -2488,6 +2444,12 @@ pub const Zld = struct { return header.addr + @sizeOf(u64) * index; } + pub fn getTlvPtrEntryAddress(self: *Zld, sym_with_loc: SymbolWithLoc) ?u64 { + const index = self.tlv_ptr_table.lookup.get(sym_with_loc) orelse return null; + const header = self.sections.items(.header)[self.tlv_ptr_section_index.?]; + return header.addr + @sizeOf(u64) * index; + } + /// Returns stubs atom that references `sym_with_loc` if one exists. /// Returns null otherwise. pub fn getStubsAtomIndexForSymbol(self: *Zld, sym_with_loc: SymbolWithLoc) ?AtomIndex { @@ -2496,14 +2458,6 @@ pub const Zld = struct { return entry.atom_index; } - /// Returns TLV pointer atom that references `sym_with_loc` if one exists. - /// Returns null otherwise. - pub fn getTlvPtrAtomIndexForSymbol(self: *Zld, sym_with_loc: SymbolWithLoc) ?AtomIndex { - const index = self.tlv_ptr_table.get(sym_with_loc) orelse return null; - const entry = self.tlv_ptr_entries.items[index]; - return entry.atom_index; - } - /// Returns symbol location corresponding to the set entrypoint. /// Asserts output mode is executable. pub fn getEntryPoint(self: Zld) SymbolWithLoc { @@ -2835,18 +2789,8 @@ pub const Zld = struct { scoped_log.debug("GOT entries:", .{}); scoped_log.debug("{}", .{self.got_table}); - scoped_log.debug("__thread_ptrs entries:", .{}); - for (self.tlv_ptr_entries.items, 0..) |entry, i| { - const atom_sym = entry.getAtomSymbol(self); - const target_sym = entry.getTargetSymbol(self); - const target_sym_name = entry.getTargetSymbolName(self); - assert(target_sym.undf()); - scoped_log.debug(" {d}@{x} => import('{s}')", .{ - i, - atom_sym.n_value, - target_sym_name, - }); - } + scoped_log.debug("TLV pointers:", .{}); + scoped_log.debug("{}", .{self.tlv_ptr_table}); scoped_log.debug("stubs entries:", .{}); for (self.stubs.items, 0..) |entry, i| { @@ -3435,7 +3379,10 @@ pub fn linkWithZld(macho_file: *MachO, comp: *Compilation, prog_node: *std.Progr } try zld.writeAtoms(); - try zld.writeGotEntries(); + + if (zld.got_section_index) |sect_id| try zld.writePointerEntries(sect_id, &zld.got_table); + if (zld.tlv_ptr_section_index) |sect_id| try zld.writePointerEntries(sect_id, &zld.tlv_ptr_table); + try eh_frame.write(&zld, &unwind_info); try unwind_info.write(&zld); try zld.writeLinkeditSegmentData(); From ee02517bce58063ecf087343639620454b9f387d Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 24 Aug 2023 15:13:17 +0200 Subject: [PATCH 12/57] macho: remove obsolete function createDyldStubBinderGotAtom --- src/link/MachO/zld.zig | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/src/link/MachO/zld.zig b/src/link/MachO/zld.zig index c15f81c7e3..c49e5cf571 100644 --- a/src/link/MachO/zld.zig +++ b/src/link/MachO/zld.zig @@ -267,12 +267,6 @@ pub const Zld = struct { return index; } - fn createDyldStubBinderGotAtom(self: *Zld) !void { - const global_index = self.dyld_stub_binder_index orelse return; - const target = self.globals.items[global_index]; - try self.addGotEntry(target); - } - fn createDyldPrivateAtom(self: *Zld) !void { const sym_index = try self.allocateSymbol(); const atom_index = try self.createEmptyAtom(sym_index, @sizeOf(u64), 3); @@ -3354,7 +3348,7 @@ pub fn linkWithZld(macho_file: *MachO, comp: *Compilation, prog_node: *std.Progr try eh_frame.scanRelocs(&zld); try UnwindInfo.scanRelocs(&zld); - try zld.createDyldStubBinderGotAtom(); + if (zld.dyld_stub_binder_index) |index| try zld.addGotEntry(zld.globals.items[index]); try zld.calcSectionSizes(); From 4b934b1f78c57598b5c629cff9d9a02c5e2ffe13 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 24 Aug 2023 23:06:16 +0200 Subject: [PATCH 13/57] macho: use TableSection for stub entries in zld driver Write thunks separately from other atoms - this can still be improved by not using atoms at all, but one thing at a time. --- src/link/MachO.zig | 47 ++- src/link/MachO/Atom.zig | 93 ++--- src/link/MachO/Object.zig | 15 +- src/link/MachO/Relocation.zig | 2 +- src/link/MachO/UnwindInfo.zig | 2 +- src/link/MachO/dead_strip.zig | 9 +- src/link/MachO/eh_frame.zig | 1 - src/link/MachO/stubs.zig | 16 +- src/link/MachO/thunks.zig | 169 ++++---- src/link/MachO/zld.zig | 727 ++++++++++++++-------------------- 10 files changed, 469 insertions(+), 612 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index d4899aa6b4..9164d9aae6 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -1412,7 +1412,7 @@ fn writeStubHelperPreamble(self: *MachO) !void { const gpa = self.base.allocator; const cpu_arch = self.base.options.target.cpu.arch; - const size = stubs.calcStubHelperPreambleSize(cpu_arch); + const size = stubs.stubHelperPreambleSize(cpu_arch); var buf = try std.ArrayList(u8).initCapacity(gpa, size); defer buf.deinit(); @@ -1442,9 +1442,9 @@ fn writeStubTableEntry(self: *MachO, index: usize) !void { const laptr_sect_id = self.la_symbol_ptr_section_index.?; const cpu_arch = self.base.options.target.cpu.arch; - const stub_entry_size = stubs.calcStubEntrySize(cpu_arch); - const stub_helper_entry_size = stubs.calcStubHelperEntrySize(cpu_arch); - const stub_helper_preamble_size = stubs.calcStubHelperPreambleSize(cpu_arch); + const stub_entry_size = stubs.stubSize(cpu_arch); + const stub_helper_entry_size = stubs.stubHelperSize(cpu_arch); + const stub_helper_preamble_size = stubs.stubHelperPreambleSize(cpu_arch); if (self.stub_table_count_dirty) { // We grow all 3 sections one by one. @@ -2800,14 +2800,10 @@ fn populateMissingMetadata(self: *MachO) !void { } if (self.stubs_section_index == null) { - const stub_size = stubs.calcStubEntrySize(cpu_arch); + const stub_size = stubs.stubSize(cpu_arch); self.stubs_section_index = try self.allocateSection("__TEXT2", "__stubs", .{ .size = stub_size, - .alignment = switch (cpu_arch) { - .x86_64 => 1, - .aarch64 => @sizeOf(u32), - else => unreachable, // unhandled architecture type - }, + .alignment = stubs.stubAlignment(cpu_arch), .flags = macho.S_SYMBOL_STUBS | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS, .reserved2 = stub_size, .prot = macho.PROT.READ | macho.PROT.EXEC, @@ -3474,7 +3470,12 @@ fn writeDyldInfoData(self: *MachO) !void { }); try self.base.file.?.pwriteAll(buffer, rebase_off); - try self.populateLazyBindOffsetsInStubHelper(lazy_bind); + try populateLazyBindOffsetsInStubHelper( + self, + self.base.options.target.cpu.arch, + self.base.file.?, + lazy_bind, + ); self.dyld_info_cmd.rebase_off = @as(u32, @intCast(rebase_off)); self.dyld_info_cmd.rebase_size = @as(u32, @intCast(rebase_size_aligned)); @@ -3486,18 +3487,22 @@ fn writeDyldInfoData(self: *MachO) !void { self.dyld_info_cmd.export_size = @as(u32, @intCast(export_size_aligned)); } -fn populateLazyBindOffsetsInStubHelper(self: *MachO, lazy_bind: LazyBind) !void { +pub fn populateLazyBindOffsetsInStubHelper( + ctx: anytype, + cpu_arch: std.Target.Cpu.Arch, + file: fs.File, + lazy_bind: anytype, +) !void { if (lazy_bind.size() == 0) return; - const stub_helper_section_index = self.stub_helper_section_index.?; - assert(self.stub_helper_preamble_allocated); + const stub_helper_section_index = ctx.stub_helper_section_index.?; + // assert(ctx.stub_helper_preamble_allocated); - const header = self.sections.items(.header)[stub_helper_section_index]; + const header = ctx.sections.items(.header)[stub_helper_section_index]; - const cpu_arch = self.base.options.target.cpu.arch; - const preamble_size = stubs.calcStubHelperPreambleSize(cpu_arch); - const stub_size = stubs.calcStubHelperEntrySize(cpu_arch); - const stub_offset = stubs.calcStubOffsetInStubHelper(cpu_arch); + const preamble_size = stubs.stubHelperPreambleSize(cpu_arch); + const stub_size = stubs.stubHelperSize(cpu_arch); + const stub_offset = stubs.stubOffsetInStubHelper(cpu_arch); const base_offset = header.offset + preamble_size; for (lazy_bind.offsets.items, 0..) |bind_offset, index| { @@ -3505,11 +3510,11 @@ fn populateLazyBindOffsetsInStubHelper(self: *MachO, lazy_bind: LazyBind) !void log.debug("writing lazy bind offset 0x{x} ({s}) in stub helper at 0x{x}", .{ bind_offset, - self.getSymbolName(lazy_bind.entries.items[index].target), + ctx.getSymbolName(lazy_bind.entries.items[index].target), file_offset, }); - try self.base.file.?.pwriteAll(mem.asBytes(&bind_offset), file_offset); + try file.pwriteAll(mem.asBytes(&bind_offset), file_offset); } } diff --git a/src/link/MachO/Atom.zig b/src/link/MachO/Atom.zig index d62b23d6c7..5e1d6bd4cc 100644 --- a/src/link/MachO/Atom.zig +++ b/src/link/MachO/Atom.zig @@ -359,8 +359,6 @@ pub fn parseRelocTarget(zld: *Zld, ctx: struct { } pub fn getRelocTargetAtomIndex(zld: *Zld, target: SymbolWithLoc) ?Index { - if (zld.getStubsAtomIndexForSymbol(target)) |stubs_atom| return stubs_atom; - if (target.getFile() == null) { const target_sym_name = zld.getSymbolName(target); if (mem.eql(u8, "__mh_execute_header", target_sym_name)) return null; @@ -400,7 +398,8 @@ fn scanAtomRelocsArm64(zld: *Zld, atom_index: Index, relocs: []align(1) const ma switch (rel_type) { .ARM64_RELOC_BRANCH26 => { // TODO rewrite relocation - try addStub(zld, target); + const sym = zld.getSymbol(target); + if (sym.undf()) try zld.addStubEntry(target); }, .ARM64_RELOC_GOT_LOAD_PAGE21, .ARM64_RELOC_GOT_LOAD_PAGEOFF12, @@ -447,7 +446,8 @@ fn scanAtomRelocsX86(zld: *Zld, atom_index: Index, relocs: []align(1) const mach switch (rel_type) { .X86_64_RELOC_BRANCH => { // TODO rewrite relocation - try addStub(zld, target); + const sym = zld.getSymbol(target); + if (sym.undf()) try zld.addStubEntry(target); }, .X86_64_RELOC_GOT, .X86_64_RELOC_GOT_LOAD => { // TODO rewrite relocation @@ -462,23 +462,6 @@ fn scanAtomRelocsX86(zld: *Zld, atom_index: Index, relocs: []align(1) const mach } } -pub fn addStub(zld: *Zld, target: SymbolWithLoc) !void { - const target_sym = zld.getSymbol(target); - if (!target_sym.undf()) return; - if (zld.stubs_table.contains(target)) return; - - const gpa = zld.gpa; - _ = try zld.createStubHelperAtom(); - _ = try zld.createLazyPointerAtom(); - const atom_index = try zld.createStubAtom(); - const stubs_index = @as(u32, @intCast(zld.stubs.items.len)); - try zld.stubs.append(gpa, .{ - .target = target, - .atom_index = atom_index, - }); - try zld.stubs_table.putNoClobber(gpa, target, stubs_index); -} - pub fn resolveRelocs( zld: *Zld, atom_index: Index, @@ -621,16 +604,17 @@ fn resolveRelocsArm64( const source_sym = zld.getSymbol(atom.getSymbolWithLoc()); break :blk source_sym.n_value + rel_offset; }; - const is_via_got = relocRequiresGot(zld, rel); - const is_tlv = is_tlv: { - const source_sym = zld.getSymbol(atom.getSymbolWithLoc()); - const header = zld.sections.items(.header)[source_sym.n_sect - 1]; - break :is_tlv header.type() == macho.S_THREAD_LOCAL_VARIABLES; - }; const target_addr = blk: { - if (is_via_got) break :blk zld.getGotEntryAddress(target).?; + if (relocRequiresGot(zld, rel)) break :blk zld.getGotEntryAddress(target).?; if (relocIsTlv(zld, rel) and zld.getSymbol(target).undf()) break :blk zld.getTlvPtrEntryAddress(target).?; + if (relocIsStub(zld, rel) and zld.getSymbol(target).undf()) + break :blk zld.getStubsEntryAddress(target).?; + const is_tlv = is_tlv: { + const source_sym = zld.getSymbol(atom.getSymbolWithLoc()); + const header = zld.sections.items(.header)[source_sym.n_sect - 1]; + break :is_tlv header.type() == macho.S_THREAD_LOCAL_VARIABLES; + }; break :blk try getRelocTargetAddress(zld, target, is_tlv); }; @@ -638,32 +622,28 @@ fn resolveRelocsArm64( switch (rel_type) { .ARM64_RELOC_BRANCH26 => { - const actual_target = if (zld.getStubsAtomIndexForSymbol(target)) |stub_atom_index| inner: { - const stub_atom = zld.getAtom(stub_atom_index); - break :inner stub_atom.getSymbolWithLoc(); - } else target; - log.debug(" source {s} (object({?})), target {s} (object({?}))", .{ + log.debug(" source {s} (object({?})), target {s}", .{ zld.getSymbolName(atom.getSymbolWithLoc()), atom.getFile(), zld.getSymbolName(target), - zld.getAtom(getRelocTargetAtomIndex(zld, target).?).getFile(), }); const displacement = if (Relocation.calcPcRelativeDisplacementArm64( source_addr, - zld.getSymbol(actual_target).n_value, + target_addr, )) |disp| blk: { - log.debug(" | target_addr = 0x{x}", .{zld.getSymbol(actual_target).n_value}); + log.debug(" | target_addr = 0x{x}", .{target_addr}); break :blk disp; } else |_| blk: { const thunk_index = zld.thunk_table.get(atom_index).?; const thunk = zld.thunks.items[thunk_index]; - const thunk_sym = zld.getSymbol(thunk.getTrampolineForSymbol( - zld, - actual_target, - ).?); - log.debug(" | target_addr = 0x{x} (thunk)", .{thunk_sym.n_value}); - break :blk try Relocation.calcPcRelativeDisplacementArm64(source_addr, thunk_sym.n_value); + const thunk_sym_loc = if (zld.getSymbol(target).undf()) + thunk.getTrampoline(zld, .stub, target).? + else + thunk.getTrampoline(zld, .atom, target).?; + const thunk_addr = zld.getSymbol(thunk_sym_loc).n_value; + log.debug(" | target_addr = 0x{x} (thunk)", .{thunk_addr}); + break :blk try Relocation.calcPcRelativeDisplacementArm64(source_addr, thunk_addr); }; const code = atom_code[rel_offset..][0..4]; @@ -920,16 +900,17 @@ fn resolveRelocsX86( const source_sym = zld.getSymbol(atom.getSymbolWithLoc()); break :blk source_sym.n_value + rel_offset; }; - const is_via_got = relocRequiresGot(zld, rel); - const is_tlv = is_tlv: { - const source_sym = zld.getSymbol(atom.getSymbolWithLoc()); - const header = zld.sections.items(.header)[source_sym.n_sect - 1]; - break :is_tlv header.type() == macho.S_THREAD_LOCAL_VARIABLES; - }; const target_addr = blk: { - if (is_via_got) break :blk zld.getGotEntryAddress(target).?; + if (relocRequiresGot(zld, rel)) break :blk zld.getGotEntryAddress(target).?; + if (relocIsStub(zld, rel) and zld.getSymbol(target).undf()) + break :blk zld.getStubsEntryAddress(target).?; if (relocIsTlv(zld, rel) and zld.getSymbol(target).undf()) break :blk zld.getTlvPtrEntryAddress(target).?; + const is_tlv = is_tlv: { + const source_sym = zld.getSymbol(atom.getSymbolWithLoc()); + const header = zld.sections.items(.header)[source_sym.n_sect - 1]; + break :is_tlv header.type() == macho.S_THREAD_LOCAL_VARIABLES; + }; break :blk try getRelocTargetAddress(zld, target, is_tlv); }; @@ -1117,3 +1098,17 @@ pub fn relocIsTlv(zld: *Zld, rel: macho.relocation_info) bool { else => unreachable, } } + +pub fn relocIsStub(zld: *Zld, rel: macho.relocation_info) bool { + switch (zld.options.target.cpu.arch) { + .aarch64 => switch (@as(macho.reloc_type_arm64, @enumFromInt(rel.r_type))) { + .ARM64_RELOC_BRANCH26 => return true, + else => return false, + }, + .x86_64 => switch (@as(macho.reloc_type_x86_64, @enumFromInt(rel.r_type))) { + .X86_64_RELOC_BRANCH => return true, + else => return false, + }, + else => unreachable, + } +} diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index 03628e0e35..8c523779ea 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -20,7 +20,6 @@ const trace = @import("../../tracy.zig").trace; const Allocator = mem.Allocator; const Atom = @import("Atom.zig"); -const AtomIndex = @import("zld.zig").AtomIndex; const DwarfInfo = @import("DwarfInfo.zig"); const LoadCommandIterator = macho.LoadCommandIterator; const MachO = @import("../MachO.zig"); @@ -55,7 +54,7 @@ source_section_index_lookup: []Entry = undefined, /// Can be undefined as set together with in_symtab. strtab_lookup: []u32 = undefined, /// Can be undefined as set together with in_symtab. -atom_by_index_table: []?AtomIndex = undefined, +atom_by_index_table: []?Atom.Index = undefined, /// Can be undefined as set together with in_symtab. globals_lookup: []i64 = undefined, /// Can be undefined as set together with in_symtab. @@ -71,8 +70,8 @@ section_relocs_lookup: std.ArrayListUnmanaged(u32) = .{}, /// Data-in-code records sorted by address. data_in_code: std.ArrayListUnmanaged(macho.data_in_code_entry) = .{}, -atoms: std.ArrayListUnmanaged(AtomIndex) = .{}, -exec_atoms: std.ArrayListUnmanaged(AtomIndex) = .{}, +atoms: std.ArrayListUnmanaged(Atom.Index) = .{}, +exec_atoms: std.ArrayListUnmanaged(Atom.Index) = .{}, eh_frame_sect_id: ?u8 = null, eh_frame_relocs_lookup: std.AutoArrayHashMapUnmanaged(u32, Record) = .{}, @@ -156,7 +155,7 @@ pub fn parse(self: *Object, allocator: Allocator) !void { self.reverse_symtab_lookup = try allocator.alloc(u32, self.in_symtab.?.len); self.strtab_lookup = try allocator.alloc(u32, self.in_symtab.?.len); self.globals_lookup = try allocator.alloc(i64, self.in_symtab.?.len); - self.atom_by_index_table = try allocator.alloc(?AtomIndex, self.in_symtab.?.len + nsects); + self.atom_by_index_table = try allocator.alloc(?Atom.Index, self.in_symtab.?.len + nsects); self.relocs_lookup = try allocator.alloc(Entry, self.in_symtab.?.len + nsects); // This is wasteful but we need to be able to lookup source symbol address after stripping and // allocating of sections. @@ -572,7 +571,7 @@ fn createAtomFromSubsection( size: u64, alignment: u32, out_sect_id: u8, -) !AtomIndex { +) !Atom.Index { const gpa = zld.gpa; const atom_index = try zld.createEmptyAtom(sym_index, size, alignment); const atom = zld.getAtomPtr(atom_index); @@ -652,7 +651,7 @@ fn parseRelocs(self: *Object, gpa: Allocator, sect_id: u8) !void { self.section_relocs_lookup.items[sect_id] = start; } -fn cacheRelocs(self: *Object, zld: *Zld, atom_index: AtomIndex) !void { +fn cacheRelocs(self: *Object, zld: *Zld, atom_index: Atom.Index) !void { const atom = zld.getAtom(atom_index); const source_sect_id = if (self.getSourceSymbol(atom.sym_index)) |source_sym| blk: { @@ -1059,7 +1058,7 @@ pub fn getGlobal(self: Object, sym_index: u32) ?u32 { return @as(u32, @intCast(self.globals_lookup[sym_index])); } -pub fn getAtomIndexForSymbol(self: Object, sym_index: u32) ?AtomIndex { +pub fn getAtomIndexForSymbol(self: Object, sym_index: u32) ?Atom.Index { return self.atom_by_index_table[sym_index]; } diff --git a/src/link/MachO/Relocation.zig b/src/link/MachO/Relocation.zig index b7bbf59cfc..c75eaba855 100644 --- a/src/link/MachO/Relocation.zig +++ b/src/link/MachO/Relocation.zig @@ -62,7 +62,7 @@ pub fn getTargetBaseAddress(self: Relocation, macho_file: *MachO) ?u64 { const index = macho_file.stub_table.lookup.get(self.target) orelse return null; const header = macho_file.sections.items(.header)[macho_file.stubs_section_index.?]; return header.addr + - index * @import("stubs.zig").calcStubEntrySize(macho_file.base.options.target.cpu.arch); + index * @import("stubs.zig").stubSize(macho_file.base.options.target.cpu.arch); } switch (self.type) { .got, .got_page, .got_pageoff => { diff --git a/src/link/MachO/UnwindInfo.zig b/src/link/MachO/UnwindInfo.zig index be4a071a94..07a6b49b77 100644 --- a/src/link/MachO/UnwindInfo.zig +++ b/src/link/MachO/UnwindInfo.zig @@ -577,7 +577,7 @@ pub fn write(info: *UnwindInfo, zld: *Zld) !void { const seg_id = zld.sections.items(.segment_index)[sect_id]; const seg = zld.segments.items[seg_id]; - const text_sect_id = zld.getSectionByName("__TEXT", "__text").?; + const text_sect_id = zld.text_section_index.?; const text_sect = zld.sections.items(.header)[text_sect_id]; var personalities: [max_personalities]u32 = undefined; diff --git a/src/link/MachO/dead_strip.zig b/src/link/MachO/dead_strip.zig index 21be34a214..03e63f115a 100644 --- a/src/link/MachO/dead_strip.zig +++ b/src/link/MachO/dead_strip.zig @@ -9,7 +9,6 @@ const math = std.math; const mem = std.mem; const Allocator = mem.Allocator; -const AtomIndex = @import("zld.zig").AtomIndex; const Atom = @import("Atom.zig"); const MachO = @import("../MachO.zig"); const SymbolWithLoc = MachO.SymbolWithLoc; @@ -19,7 +18,7 @@ const Zld = @import("zld.zig").Zld; const N_DEAD = @import("zld.zig").N_DEAD; -const AtomTable = std.AutoHashMap(AtomIndex, void); +const AtomTable = std.AutoHashMap(Atom.Index, void); pub fn gcAtoms(zld: *Zld, resolver: *const SymbolResolver) !void { const gpa = zld.gpa; @@ -127,7 +126,7 @@ fn collectRoots(zld: *Zld, roots: *AtomTable, resolver: *const SymbolResolver) ! } } -fn markLive(zld: *Zld, atom_index: AtomIndex, alive: *AtomTable) void { +fn markLive(zld: *Zld, atom_index: Atom.Index, alive: *AtomTable) void { if (alive.contains(atom_index)) return; const atom = zld.getAtom(atom_index); @@ -191,7 +190,7 @@ fn markLive(zld: *Zld, atom_index: AtomIndex, alive: *AtomTable) void { } } -fn refersLive(zld: *Zld, atom_index: AtomIndex, alive: AtomTable) bool { +fn refersLive(zld: *Zld, atom_index: Atom.Index, alive: AtomTable) bool { const atom = zld.getAtom(atom_index); const sym_loc = atom.getSymbolWithLoc(); @@ -359,7 +358,7 @@ fn markUnwindRecords(zld: *Zld, object_id: u32, alive: *AtomTable) !void { } } -fn markEhFrameRecords(zld: *Zld, object_id: u32, atom_index: AtomIndex, alive: *AtomTable) !void { +fn markEhFrameRecords(zld: *Zld, object_id: u32, atom_index: Atom.Index, alive: *AtomTable) !void { const cpu_arch = zld.options.target.cpu.arch; const object = &zld.objects.items[object_id]; var it = object.getEhFrameRecordsIterator(); diff --git a/src/link/MachO/eh_frame.zig b/src/link/MachO/eh_frame.zig index 83871b3836..5d267af5ff 100644 --- a/src/link/MachO/eh_frame.zig +++ b/src/link/MachO/eh_frame.zig @@ -7,7 +7,6 @@ const leb = std.leb; const log = std.log.scoped(.eh_frame); const Allocator = mem.Allocator; -const AtomIndex = @import("zld.zig").AtomIndex; const Atom = @import("Atom.zig"); const MachO = @import("../MachO.zig"); const Relocation = @import("Relocation.zig"); diff --git a/src/link/MachO/stubs.zig b/src/link/MachO/stubs.zig index 236ba2cfb8..077d6c9303 100644 --- a/src/link/MachO/stubs.zig +++ b/src/link/MachO/stubs.zig @@ -3,7 +3,7 @@ const aarch64 = @import("../../arch/aarch64/bits.zig"); const Relocation = @import("Relocation.zig"); -pub inline fn calcStubHelperPreambleSize(cpu_arch: std.Target.Cpu.Arch) u5 { +pub inline fn stubHelperPreambleSize(cpu_arch: std.Target.Cpu.Arch) u8 { return switch (cpu_arch) { .x86_64 => 15, .aarch64 => 6 * @sizeOf(u32), @@ -11,7 +11,7 @@ pub inline fn calcStubHelperPreambleSize(cpu_arch: std.Target.Cpu.Arch) u5 { }; } -pub inline fn calcStubHelperEntrySize(cpu_arch: std.Target.Cpu.Arch) u4 { +pub inline fn stubHelperSize(cpu_arch: std.Target.Cpu.Arch) u8 { return switch (cpu_arch) { .x86_64 => 10, .aarch64 => 3 * @sizeOf(u32), @@ -19,7 +19,7 @@ pub inline fn calcStubHelperEntrySize(cpu_arch: std.Target.Cpu.Arch) u4 { }; } -pub inline fn calcStubEntrySize(cpu_arch: std.Target.Cpu.Arch) u4 { +pub inline fn stubSize(cpu_arch: std.Target.Cpu.Arch) u8 { return switch (cpu_arch) { .x86_64 => 6, .aarch64 => 3 * @sizeOf(u32), @@ -27,7 +27,15 @@ pub inline fn calcStubEntrySize(cpu_arch: std.Target.Cpu.Arch) u4 { }; } -pub inline fn calcStubOffsetInStubHelper(cpu_arch: std.Target.Cpu.Arch) u4 { +pub inline fn stubAlignment(cpu_arch: std.Target.Cpu.Arch) u8 { + return switch (cpu_arch) { + .x86_64 => 0, + .aarch64 => 2, + else => unreachable, // unhandled architecture type + }; +} + +pub inline fn stubOffsetInStubHelper(cpu_arch: std.Target.Cpu.Arch) u8 { return switch (cpu_arch) { .x86_64 => 1, .aarch64 => 2 * @sizeOf(u32), diff --git a/src/link/MachO/thunks.zig b/src/link/MachO/thunks.zig index 0e9e3ba48b..da02074abe 100644 --- a/src/link/MachO/thunks.zig +++ b/src/link/MachO/thunks.zig @@ -16,14 +16,11 @@ const aarch64 = @import("../../arch/aarch64/bits.zig"); const Allocator = mem.Allocator; const Atom = @import("Atom.zig"); -const AtomIndex = @import("zld.zig").AtomIndex; const MachO = @import("../MachO.zig"); const Relocation = @import("Relocation.zig"); const SymbolWithLoc = MachO.SymbolWithLoc; const Zld = @import("zld.zig").Zld; -pub const ThunkIndex = u32; - /// Branch instruction has 26 bits immediate but 4 byte aligned. const jump_bits = @bitSizeOf(i28); @@ -36,21 +33,35 @@ const max_distance = (1 << (jump_bits - 1)); const max_allowed_distance = max_distance - 0x500_000; pub const Thunk = struct { - start_index: AtomIndex, + start_index: Atom.Index, len: u32, - lookup: std.AutoArrayHashMapUnmanaged(SymbolWithLoc, AtomIndex) = .{}, + targets: std.MultiArrayList(Target) = .{}, + lookup: std.AutoHashMapUnmanaged(Target, u32) = .{}, + + pub const Tag = enum { + stub, + atom, + }; + + pub const Target = struct { + tag: Tag, + target: SymbolWithLoc, + }; + + pub const Index = u32; pub fn deinit(self: *Thunk, gpa: Allocator) void { + self.targets.deinit(gpa); self.lookup.deinit(gpa); } - pub fn getStartAtomIndex(self: Thunk) AtomIndex { + pub fn getStartAtomIndex(self: Thunk) Atom.Index { assert(self.len != 0); return self.start_index; } - pub fn getEndAtomIndex(self: Thunk) AtomIndex { + pub fn getEndAtomIndex(self: Thunk) Atom.Index { assert(self.len != 0); return self.start_index + self.len - 1; } @@ -63,10 +74,9 @@ pub const Thunk = struct { return @alignOf(u32); } - pub fn getTrampolineForSymbol(self: Thunk, zld: *Zld, target: SymbolWithLoc) ?SymbolWithLoc { - const atom_index = self.lookup.get(target) orelse return null; - const atom = zld.getAtom(atom_index); - return atom.getSymbolWithLoc(); + pub fn getTrampoline(self: Thunk, zld: *Zld, tag: Tag, target: SymbolWithLoc) ?SymbolWithLoc { + const atom_index = self.lookup.get(.{ .tag = tag, .target = target }) orelse return null; + return zld.getAtom(atom_index).getSymbolWithLoc(); } }; @@ -96,7 +106,7 @@ pub fn createThunks(zld: *Zld, sect_id: u8) !void { } } - var allocated = std.AutoHashMap(AtomIndex, void).init(gpa); + var allocated = std.AutoHashMap(Atom.Index, void).init(gpa); defer allocated.deinit(); try allocated.ensureTotalCapacity(atom_count); @@ -180,7 +190,7 @@ pub fn createThunks(zld: *Zld, sect_id: u8) !void { fn allocateThunk( zld: *Zld, - thunk_index: ThunkIndex, + thunk_index: Thunk.Index, base_offset: u64, header: *macho.section_64, ) void { @@ -214,10 +224,10 @@ fn allocateThunk( fn scanRelocs( zld: *Zld, - atom_index: AtomIndex, - allocated: std.AutoHashMap(AtomIndex, void), - thunk_index: ThunkIndex, - group_end: AtomIndex, + atom_index: Atom.Index, + allocated: std.AutoHashMap(Atom.Index, void), + thunk_index: Thunk.Index, + group_end: Atom.Index, ) !void { const atom = zld.getAtom(atom_index); const object = zld.objects.items[atom.getFile().?]; @@ -253,42 +263,45 @@ fn scanRelocs( const gpa = zld.gpa; const target_sym = zld.getSymbol(target); - - const actual_target: SymbolWithLoc = if (target_sym.undf()) blk: { - const stub_atom_index = zld.getStubsAtomIndexForSymbol(target).?; - break :blk .{ .sym_index = zld.getAtom(stub_atom_index).sym_index }; - } else target; - const thunk = &zld.thunks.items[thunk_index]; - const gop = try thunk.lookup.getOrPut(gpa, actual_target); + + const tag: Thunk.Tag = if (target_sym.undf()) .stub else .atom; + const thunk_target: Thunk.Target = .{ .tag = tag, .target = target }; + const gop = try thunk.lookup.getOrPut(gpa, thunk_target); if (!gop.found_existing) { - const thunk_atom_index = try createThunkAtom(zld); - gop.value_ptr.* = thunk_atom_index; - - const thunk_atom = zld.getAtomPtr(thunk_atom_index); - const end_atom_index = if (thunk.len == 0) group_end else thunk.getEndAtomIndex(); - const end_atom = zld.getAtomPtr(end_atom_index); - - if (end_atom.next_index) |first_after_index| { - const first_after_atom = zld.getAtomPtr(first_after_index); - first_after_atom.prev_index = thunk_atom_index; - thunk_atom.next_index = first_after_index; - } - - end_atom.next_index = thunk_atom_index; - thunk_atom.prev_index = end_atom_index; - - if (thunk.len == 0) { - thunk.start_index = thunk_atom_index; - } - - thunk.len += 1; + gop.value_ptr.* = try pushThunkAtom(zld, thunk, group_end); + try thunk.targets.append(gpa, thunk_target); } try zld.thunk_table.put(gpa, atom_index, thunk_index); } } +fn pushThunkAtom(zld: *Zld, thunk: *Thunk, group_end: Atom.Index) !Atom.Index { + const thunk_atom_index = try createThunkAtom(zld); + + const thunk_atom = zld.getAtomPtr(thunk_atom_index); + const end_atom_index = if (thunk.len == 0) group_end else thunk.getEndAtomIndex(); + const end_atom = zld.getAtomPtr(end_atom_index); + + if (end_atom.next_index) |first_after_index| { + const first_after_atom = zld.getAtomPtr(first_after_index); + first_after_atom.prev_index = thunk_atom_index; + thunk_atom.next_index = first_after_index; + } + + end_atom.next_index = thunk_atom_index; + thunk_atom.prev_index = end_atom_index; + + if (thunk.len == 0) { + thunk.start_index = thunk_atom_index; + } + + thunk.len += 1; + + return thunk_atom_index; +} + inline fn relocNeedsThunk(rel: macho.relocation_info) bool { const rel_type = @as(macho.reloc_type_arm64, @enumFromInt(rel.r_type)); return rel_type == .ARM64_RELOC_BRANCH26; @@ -296,13 +309,13 @@ inline fn relocNeedsThunk(rel: macho.relocation_info) bool { fn isReachable( zld: *Zld, - atom_index: AtomIndex, + atom_index: Atom.Index, rel: macho.relocation_info, base_offset: i32, target: SymbolWithLoc, - allocated: std.AutoHashMap(AtomIndex, void), + allocated: std.AutoHashMap(Atom.Index, void), ) bool { - if (zld.getStubsAtomIndexForSymbol(target)) |_| return false; + if (zld.stubs_table.lookup.contains(target)) return false; const source_atom = zld.getAtom(atom_index); const source_sym = zld.getSymbol(source_atom.getSymbolWithLoc()); @@ -317,8 +330,7 @@ fn isReachable( if (!allocated.contains(target_atom_index)) return false; const source_addr = source_sym.n_value + @as(u32, @intCast(rel.r_address - base_offset)); - const is_via_got = Atom.relocRequiresGot(zld, rel); - const target_addr = if (is_via_got) + const target_addr = if (Atom.relocRequiresGot(zld, rel)) zld.getGotEntryAddress(target).? else Atom.getRelocTargetAddress(zld, target, false) catch unreachable; @@ -328,50 +340,31 @@ fn isReachable( return true; } -fn createThunkAtom(zld: *Zld) !AtomIndex { +fn createThunkAtom(zld: *Zld) !Atom.Index { const sym_index = try zld.allocateSymbol(); const atom_index = try zld.createEmptyAtom(sym_index, @sizeOf(u32) * 3, 2); const sym = zld.getSymbolPtr(.{ .sym_index = sym_index }); sym.n_type = macho.N_SECT; - - const sect_id = zld.getSectionByName("__TEXT", "__text") orelse unreachable; - sym.n_sect = sect_id + 1; - + sym.n_sect = zld.text_section_index.? + 1; return atom_index; } -fn getThunkIndex(zld: *Zld, atom_index: AtomIndex) ?ThunkIndex { - const atom = zld.getAtom(atom_index); - const sym = zld.getSymbol(atom.getSymbolWithLoc()); - for (zld.thunks.items, 0..) |thunk, i| { - if (thunk.len == 0) continue; - - const thunk_atom_index = thunk.getStartAtomIndex(); - const thunk_atom = zld.getAtom(thunk_atom_index); - const thunk_sym = zld.getSymbol(thunk_atom.getSymbolWithLoc()); - const start_addr = thunk_sym.n_value; - const end_addr = start_addr + thunk.getSize(); - - if (start_addr <= sym.n_value and sym.n_value < end_addr) { - return @as(u32, @intCast(i)); - } +pub fn writeThunkCode(zld: *Zld, thunk: *const Thunk, writer: anytype) !void { + const slice = thunk.targets.slice(); + for (thunk.getStartAtomIndex()..thunk.getEndAtomIndex(), 0..) |atom_index, target_index| { + const atom = zld.getAtom(@intCast(atom_index)); + const sym = zld.getSymbol(atom.getSymbolWithLoc()); + const source_addr = sym.n_value; + const tag = slice.items(.tag)[target_index]; + const target = slice.items(.target)[target_index]; + const target_addr = switch (tag) { + .stub => zld.getStubsEntryAddress(target).?, + .atom => zld.getSymbol(target).n_value, + }; + const pages = Relocation.calcNumberOfPages(source_addr, target_addr); + try writer.writeIntLittle(u32, aarch64.Instruction.adrp(.x16, pages).toU32()); + const off = try Relocation.calcPageOffset(target_addr, .arithmetic); + try writer.writeIntLittle(u32, aarch64.Instruction.add(.x16, .x16, off, false).toU32()); + try writer.writeIntLittle(u32, aarch64.Instruction.br(.x16).toU32()); } - return null; -} - -pub fn writeThunkCode(zld: *Zld, atom_index: AtomIndex, writer: anytype) !void { - const atom = zld.getAtom(atom_index); - const sym = zld.getSymbol(atom.getSymbolWithLoc()); - const source_addr = sym.n_value; - const thunk = zld.thunks.items[getThunkIndex(zld, atom_index).?]; - const target_addr = for (thunk.lookup.keys()) |target| { - const target_atom_index = thunk.lookup.get(target).?; - if (atom_index == target_atom_index) break zld.getSymbol(target).n_value; - } else unreachable; - - const pages = Relocation.calcNumberOfPages(source_addr, target_addr); - try writer.writeIntLittle(u32, aarch64.Instruction.adrp(.x16, pages).toU32()); - const off = try Relocation.calcPageOffset(target_addr, .arithmetic); - try writer.writeIntLittle(u32, aarch64.Instruction.add(.x16, .x16, off, false).toU32()); - try writer.writeIntLittle(u32, aarch64.Instruction.br(.x16).toU32()); } diff --git a/src/link/MachO/zld.zig b/src/link/MachO/zld.zig index c49e5cf571..e5815bf9d3 100644 --- a/src/link/MachO/zld.zig +++ b/src/link/MachO/zld.zig @@ -15,7 +15,7 @@ const eh_frame = @import("eh_frame.zig"); const fat = @import("fat.zig"); const link = @import("../../link.zig"); const load_commands = @import("load_commands.zig"); -const stub_helpers = @import("stubs.zig"); +const stubs = @import("stubs.zig"); const thunks = @import("thunks.zig"); const trace = @import("../../tracy.zig").trace; @@ -67,8 +67,12 @@ pub const Zld = struct { segments: std.ArrayListUnmanaged(macho.segment_command_64) = .{}, sections: std.MultiArrayList(Section) = .{}, + text_section_index: ?u8 = null, got_section_index: ?u8 = null, tlv_ptr_section_index: ?u8 = null, + stubs_section_index: ?u8 = null, + stub_helper_section_index: ?u8 = null, + la_symbol_ptr_section_index: ?u8 = null, locals: std.ArrayListUnmanaged(macho.nlist_64) = .{}, globals: std.ArrayListUnmanaged(SymbolWithLoc) = .{}, @@ -78,17 +82,14 @@ pub const Zld = struct { dso_handle_index: ?u32 = null, dyld_stub_binder_index: ?u32 = null, dyld_private_atom_index: ?Atom.Index = null, - stub_helper_preamble_sym_index: ?u32 = null, strtab: StringTable(.strtab) = .{}, tlv_ptr_table: TableSection(SymbolWithLoc) = .{}, got_table: TableSection(SymbolWithLoc) = .{}, + stubs_table: TableSection(SymbolWithLoc) = .{}, - stubs: std.ArrayListUnmanaged(IndirectPointer) = .{}, - stubs_table: std.AutoHashMapUnmanaged(SymbolWithLoc, u32) = .{}, - - thunk_table: std.AutoHashMapUnmanaged(AtomIndex, thunks.ThunkIndex) = .{}, + thunk_table: std.AutoHashMapUnmanaged(Atom.Index, thunks.Thunk.Index) = .{}, thunks: std.ArrayListUnmanaged(thunks.Thunk) = .{}, atoms: std.ArrayListUnmanaged(Atom) = .{}, @@ -113,15 +114,18 @@ pub const Zld = struct { } if (sect.isCode()) { - break :blk self.getSectionByName("__TEXT", "__text") orelse try self.initSection( - "__TEXT", - "__text", - .{ - .flags = macho.S_REGULAR | - macho.S_ATTR_PURE_INSTRUCTIONS | - macho.S_ATTR_SOME_INSTRUCTIONS, - }, - ); + if (self.text_section_index == null) { + self.text_section_index = try self.initSection( + "__TEXT", + "__text", + .{ + .flags = macho.S_REGULAR | + macho.S_ATTR_PURE_INSTRUCTIONS | + macho.S_ATTR_SOME_INSTRUCTIONS, + }, + ); + } + break :blk self.text_section_index.?; } if (sect.isDebug()) { @@ -228,7 +232,7 @@ pub const Zld = struct { return res; } - pub fn addAtomToSection(self: *Zld, atom_index: AtomIndex) void { + pub fn addAtomToSection(self: *Zld, atom_index: Atom.Index) void { const atom = self.getAtomPtr(atom_index); const sym = self.getSymbol(atom.getSymbolWithLoc()); var section = self.sections.get(sym.n_sect - 1); @@ -244,9 +248,9 @@ pub const Zld = struct { self.sections.set(sym.n_sect - 1, section); } - pub fn createEmptyAtom(self: *Zld, sym_index: u32, size: u64, alignment: u32) !AtomIndex { + pub fn createEmptyAtom(self: *Zld, sym_index: u32, size: u64, alignment: u32) !Atom.Index { const gpa = self.gpa; - const index = @as(AtomIndex, @intCast(self.atoms.items.len)); + const index = @as(Atom.Index, @intCast(self.atoms.items.len)); const atom = try self.atoms.addOne(gpa); atom.* = .{ .sym_index = 0, @@ -280,190 +284,6 @@ pub const Zld = struct { self.addAtomToSection(atom_index); } - fn createStubHelperPreambleAtom(self: *Zld) !void { - if (self.dyld_stub_binder_index == null) return; - - const cpu_arch = self.options.target.cpu.arch; - const size: u64 = switch (cpu_arch) { - .x86_64 => 15, - .aarch64 => 6 * @sizeOf(u32), - else => unreachable, - }; - const alignment: u32 = switch (cpu_arch) { - .x86_64 => 0, - .aarch64 => 2, - else => unreachable, - }; - const sym_index = try self.allocateSymbol(); - const atom_index = try self.createEmptyAtom(sym_index, size, alignment); - const sym = self.getSymbolPtr(.{ .sym_index = sym_index }); - sym.n_type = macho.N_SECT; - - const sect_id = self.getSectionByName("__TEXT", "__stub_helper") orelse - try self.initSection("__TEXT", "__stub_helper", .{ - .flags = macho.S_REGULAR | - macho.S_ATTR_PURE_INSTRUCTIONS | - macho.S_ATTR_SOME_INSTRUCTIONS, - }); - sym.n_sect = sect_id + 1; - - self.stub_helper_preamble_sym_index = sym_index; - - self.addAtomToSection(atom_index); - } - - fn writeStubHelperPreambleCode(self: *Zld, writer: anytype) !void { - const cpu_arch = self.options.target.cpu.arch; - const source_addr = blk: { - const sym = self.getSymbol(.{ .sym_index = self.stub_helper_preamble_sym_index.? }); - break :blk sym.n_value; - }; - const dyld_private_addr = blk: { - const atom = self.getAtom(self.dyld_private_atom_index.?); - const sym = self.getSymbol(atom.getSymbolWithLoc()); - break :blk sym.n_value; - }; - const dyld_stub_binder_got_addr = blk: { - const sym_loc = self.globals.items[self.dyld_stub_binder_index.?]; - break :blk self.getGotEntryAddress(sym_loc).?; - }; - try stub_helpers.writeStubHelperPreambleCode(.{ - .cpu_arch = cpu_arch, - .source_addr = source_addr, - .dyld_private_addr = dyld_private_addr, - .dyld_stub_binder_got_addr = dyld_stub_binder_got_addr, - }, writer); - } - - pub fn createStubHelperAtom(self: *Zld) !AtomIndex { - const cpu_arch = self.options.target.cpu.arch; - const stub_size = stub_helpers.calcStubHelperEntrySize(cpu_arch); - const alignment: u2 = switch (cpu_arch) { - .x86_64 => 0, - .aarch64 => 2, - else => unreachable, - }; - - const sym_index = try self.allocateSymbol(); - const atom_index = try self.createEmptyAtom(sym_index, stub_size, alignment); - const sym = self.getSymbolPtr(.{ .sym_index = sym_index }); - sym.n_sect = macho.N_SECT; - - const sect_id = self.getSectionByName("__TEXT", "__stub_helper").?; - sym.n_sect = sect_id + 1; - - self.addAtomToSection(atom_index); - - return atom_index; - } - - fn writeStubHelperCode(self: *Zld, atom_index: AtomIndex, writer: anytype) !void { - const cpu_arch = self.options.target.cpu.arch; - const source_addr = blk: { - const atom = self.getAtom(atom_index); - const sym = self.getSymbol(atom.getSymbolWithLoc()); - break :blk sym.n_value; - }; - const target_addr = blk: { - const sym = self.getSymbol(.{ .sym_index = self.stub_helper_preamble_sym_index.? }); - break :blk sym.n_value; - }; - try stub_helpers.writeStubHelperCode(.{ - .cpu_arch = cpu_arch, - .source_addr = source_addr, - .target_addr = target_addr, - }, writer); - } - - pub fn createLazyPointerAtom(self: *Zld) !AtomIndex { - const sym_index = try self.allocateSymbol(); - const atom_index = try self.createEmptyAtom(sym_index, @sizeOf(u64), 3); - const sym = self.getSymbolPtr(.{ .sym_index = sym_index }); - sym.n_type = macho.N_SECT; - - const sect_id = self.getSectionByName("__DATA", "__la_symbol_ptr") orelse - try self.initSection("__DATA", "__la_symbol_ptr", .{ - .flags = macho.S_LAZY_SYMBOL_POINTERS, - }); - sym.n_sect = sect_id + 1; - - self.addAtomToSection(atom_index); - - return atom_index; - } - - fn writeLazyPointer(self: *Zld, stub_helper_index: u32, writer: anytype) !void { - const target_addr = blk: { - const sect_id = self.getSectionByName("__TEXT", "__stub_helper").?; - var atom_index = self.sections.items(.first_atom_index)[sect_id].?; - var count: u32 = 0; - while (count < stub_helper_index + 1) : (count += 1) { - const atom = self.getAtom(atom_index); - if (atom.next_index) |next_index| { - atom_index = next_index; - } - } - const atom = self.getAtom(atom_index); - const sym = self.getSymbol(atom.getSymbolWithLoc()); - break :blk sym.n_value; - }; - try writer.writeIntLittle(u64, target_addr); - } - - pub fn createStubAtom(self: *Zld) !AtomIndex { - const cpu_arch = self.options.target.cpu.arch; - const alignment: u2 = switch (cpu_arch) { - .x86_64 => 0, - .aarch64 => 2, - else => unreachable, // unhandled architecture type - }; - const stub_size = stub_helpers.calcStubEntrySize(cpu_arch); - const sym_index = try self.allocateSymbol(); - const atom_index = try self.createEmptyAtom(sym_index, stub_size, alignment); - const sym = self.getSymbolPtr(.{ .sym_index = sym_index }); - sym.n_type = macho.N_SECT; - - const sect_id = self.getSectionByName("__TEXT", "__stubs") orelse - try self.initSection("__TEXT", "__stubs", .{ - .flags = macho.S_SYMBOL_STUBS | - macho.S_ATTR_PURE_INSTRUCTIONS | - macho.S_ATTR_SOME_INSTRUCTIONS, - .reserved2 = stub_size, - }); - sym.n_sect = sect_id + 1; - - self.addAtomToSection(atom_index); - - return atom_index; - } - - fn writeStubCode(self: *Zld, atom_index: AtomIndex, stub_index: u32, writer: anytype) !void { - const cpu_arch = self.options.target.cpu.arch; - const source_addr = blk: { - const atom = self.getAtom(atom_index); - const sym = self.getSymbol(atom.getSymbolWithLoc()); - break :blk sym.n_value; - }; - const target_addr = blk: { - // TODO: cache this at stub atom creation; they always go in pairs anyhow - const la_sect_id = self.getSectionByName("__DATA", "__la_symbol_ptr").?; - var la_atom_index = self.sections.items(.first_atom_index)[la_sect_id].?; - var count: u32 = 0; - while (count < stub_index) : (count += 1) { - const la_atom = self.getAtom(la_atom_index); - la_atom_index = la_atom.next_index.?; - } - const atom = self.getAtom(la_atom_index); - const sym = self.getSymbol(atom.getSymbolWithLoc()); - break :blk sym.n_value; - }; - try stub_helpers.writeStubCode(.{ - .cpu_arch = cpu_arch, - .source_addr = source_addr, - .target_addr = target_addr, - }, writer); - } - fn createTentativeDefAtoms(self: *Zld) !void { const gpa = self.gpa; @@ -818,7 +638,6 @@ pub const Zld = struct { self.tlv_ptr_table.deinit(gpa); self.got_table.deinit(gpa); - self.stubs.deinit(gpa); self.stubs_table.deinit(gpa); self.thunk_table.deinit(gpa); @@ -953,6 +772,27 @@ pub const Zld = struct { } } + pub fn addStubEntry(self: *Zld, target: SymbolWithLoc) !void { + if (self.stubs_table.lookup.contains(target)) return; + _ = try self.stubs_table.allocateEntry(self.gpa, target); + if (self.stubs_section_index == null) { + self.stubs_section_index = try self.initSection("__TEXT", "__stubs", .{ + .flags = macho.S_SYMBOL_STUBS | + macho.S_ATTR_PURE_INSTRUCTIONS | + macho.S_ATTR_SOME_INSTRUCTIONS, + .reserved2 = stubs.stubSize(self.options.target.cpu.arch), + }); + self.stub_helper_section_index = try self.initSection("__TEXT", "__stub_helper", .{ + .flags = macho.S_REGULAR | + macho.S_ATTR_PURE_INSTRUCTIONS | + macho.S_ATTR_SOME_INSTRUCTIONS, + }); + self.la_symbol_ptr_section_index = try self.initSection("__DATA", "__la_symbol_ptr", .{ + .flags = macho.S_LAZY_SYMBOL_POINTERS, + }); + } + } + fn allocateSpecialSymbols(self: *Zld) !void { for (&[_]?u32{ self.dso_handle_index, @@ -984,88 +824,85 @@ pub const Zld = struct { var atom_index = first_atom_index orelse continue; - var buffer = std.ArrayList(u8).init(gpa); - defer buffer.deinit(); - try buffer.ensureTotalCapacity(math.cast(usize, header.size) orelse return error.Overflow); + var buffer = try gpa.alloc(u8, math.cast(usize, header.size) orelse return error.Overflow); + defer gpa.free(buffer); + @memset(buffer, 0); // TODO with NOPs log.debug("writing atoms in {s},{s}", .{ header.segName(), header.sectName() }); - var count: u32 = 0; - while (true) : (count += 1) { + while (true) { const atom = self.getAtom(atom_index); - const this_sym = self.getSymbol(atom.getSymbolWithLoc()); - const padding_size: usize = if (atom.next_index) |next_index| blk: { - const next_sym = self.getSymbol(self.getAtom(next_index).getSymbolWithLoc()); - const size = next_sym.n_value - (this_sym.n_value + atom.size); - break :blk math.cast(usize, size) orelse return error.Overflow; - } else 0; + if (atom.getFile()) |file| { + const this_sym = self.getSymbol(atom.getSymbolWithLoc()); + const padding_size: usize = if (atom.next_index) |next_index| blk: { + const next_sym = self.getSymbol(self.getAtom(next_index).getSymbolWithLoc()); + const size = next_sym.n_value - (this_sym.n_value + atom.size); + break :blk math.cast(usize, size) orelse return error.Overflow; + } else 0; - log.debug(" (adding ATOM(%{d}, '{s}') from object({?}) to buffer)", .{ - atom.sym_index, - self.getSymbolName(atom.getSymbolWithLoc()), - atom.getFile(), - }); - if (padding_size > 0) { - log.debug(" (with padding {x})", .{padding_size}); - } - - const offset = buffer.items.len; - - // TODO: move writing synthetic sections into a separate function - if (atom_index == self.dyld_private_atom_index.?) { - buffer.appendSliceAssumeCapacity(&[_]u8{0} ** @sizeOf(u64)); - } else if (atom.getFile() == null) outer: { - switch (header.type()) { - macho.S_NON_LAZY_SYMBOL_POINTERS => unreachable, - macho.S_THREAD_LOCAL_VARIABLE_POINTERS => unreachable, - macho.S_LAZY_SYMBOL_POINTERS => { - try self.writeLazyPointer(count, buffer.writer()); - }, - else => { - if (self.stub_helper_preamble_sym_index) |sym_index| { - if (sym_index == atom.sym_index) { - try self.writeStubHelperPreambleCode(buffer.writer()); - break :outer; - } - } - if (header.type() == macho.S_SYMBOL_STUBS) { - try self.writeStubCode(atom_index, count, buffer.writer()); - } else if (mem.eql(u8, header.sectName(), "__stub_helper")) { - try self.writeStubHelperCode(atom_index, buffer.writer()); - } else if (header.isCode()) { - // A thunk - try thunks.writeThunkCode(self, atom_index, buffer.writer()); - } else unreachable; - }, + log.debug(" (adding ATOM(%{d}, '{s}') from object({d}) to buffer)", .{ + atom.sym_index, + self.getSymbolName(atom.getSymbolWithLoc()), + file, + }); + if (padding_size > 0) { + log.debug(" (with padding {x})", .{padding_size}); } - } else { + + const offset = this_sym.n_value - header.addr; + log.debug(" (at offset 0x{x})", .{offset}); + const code = Atom.getAtomCode(self, atom_index); const relocs = Atom.getAtomRelocs(self, atom_index); const size = math.cast(usize, atom.size) orelse return error.Overflow; - buffer.appendSliceAssumeCapacity(code); + @memcpy(buffer[offset .. offset + size], code); try Atom.resolveRelocs( self, atom_index, - buffer.items[offset..][0..size], + buffer[offset..][0..size], relocs, ); } - var i: usize = 0; - while (i < padding_size) : (i += 1) { - // TODO with NOPs - buffer.appendAssumeCapacity(0); - } - if (atom.next_index) |next_index| { atom_index = next_index; - } else { - assert(buffer.items.len == header.size); - log.debug(" (writing at file offset 0x{x})", .{header.offset}); - try self.file.pwriteAll(buffer.items, header.offset); - break; - } + } else break; } + + log.debug(" (writing at file offset 0x{x})", .{header.offset}); + try self.file.pwriteAll(buffer, header.offset); + } + } + + fn writeDyldPrivateAtom(self: *Zld) !void { + const atom_index = self.dyld_private_atom_index orelse return; + const atom = self.getAtom(atom_index); + const sym = self.getSymbol(atom.getSymbolWithLoc()); + const sect_id = self.getSectionByName("__DATA", "__data").?; + const header = self.sections.items(.header)[sect_id]; + const offset = sym.n_value - header.addr + header.offset; + log.debug("writing __dyld_private at offset 0x{x}", .{offset}); + const buffer: [@sizeOf(u64)]u8 = [_]u8{0} ** @sizeOf(u64); + try self.file.pwriteAll(&buffer, offset); + } + + fn writeThunks(self: *Zld) !void { + assert(self.requiresThunks()); + const gpa = self.gpa; + + const sect_id = self.text_section_index orelse return; + const header = self.sections.items(.header)[sect_id]; + + for (self.thunks.items, 0..) |*thunk, i| { + if (thunk.getSize() == 0) continue; + var buffer = try std.ArrayList(u8).initCapacity(gpa, thunk.getSize()); + defer buffer.deinit(); + try thunks.writeThunkCode(self, thunk, buffer.writer()); + const thunk_atom = self.getAtom(thunk.getStartAtomIndex()); + const thunk_sym = self.getSymbol(thunk_atom.getSymbolWithLoc()); + const offset = thunk_sym.n_value - header.addr + header.offset; + log.debug("writing thunk({d}) at offset 0x{x}", .{ i, offset }); + try self.file.pwriteAll(buffer.items, offset); } } @@ -1077,10 +914,94 @@ pub const Zld = struct { const sym = self.getSymbol(entry); buffer.writer().writeIntLittle(u64, sym.n_value) catch unreachable; } - log.debug("writing .got contents at file offset 0x{x}", .{header.offset}); + log.debug("writing __DATA_CONST,__got contents at file offset 0x{x}", .{header.offset}); try self.file.pwriteAll(buffer.items, header.offset); } + fn writeStubs(self: *Zld) !void { + const gpa = self.gpa; + const cpu_arch = self.options.target.cpu.arch; + const stubs_header = self.sections.items(.header)[self.stubs_section_index.?]; + const la_symbol_ptr_header = self.sections.items(.header)[self.la_symbol_ptr_section_index.?]; + + var buffer = try std.ArrayList(u8).initCapacity(gpa, stubs_header.size); + defer buffer.deinit(); + + for (0..self.stubs_table.count()) |index| { + try stubs.writeStubCode(.{ + .cpu_arch = cpu_arch, + .source_addr = stubs_header.addr + stubs.stubSize(cpu_arch) * index, + .target_addr = la_symbol_ptr_header.addr + index * @sizeOf(u64), + }, buffer.writer()); + } + + log.debug("writing __TEXT,__stubs contents at file offset 0x{x}", .{stubs_header.offset}); + try self.file.pwriteAll(buffer.items, stubs_header.offset); + } + + fn writeStubHelpers(self: *Zld) !void { + const gpa = self.gpa; + const cpu_arch = self.options.target.cpu.arch; + const stub_helper_header = self.sections.items(.header)[self.stub_helper_section_index.?]; + + var buffer = try std.ArrayList(u8).initCapacity(gpa, stub_helper_header.size); + defer buffer.deinit(); + + { + const dyld_private_addr = blk: { + const atom = self.getAtom(self.dyld_private_atom_index.?); + const sym = self.getSymbol(atom.getSymbolWithLoc()); + break :blk sym.n_value; + }; + const dyld_stub_binder_got_addr = blk: { + const sym_loc = self.globals.items[self.dyld_stub_binder_index.?]; + break :blk self.getGotEntryAddress(sym_loc).?; + }; + try stubs.writeStubHelperPreambleCode(.{ + .cpu_arch = cpu_arch, + .source_addr = stub_helper_header.addr, + .dyld_private_addr = dyld_private_addr, + .dyld_stub_binder_got_addr = dyld_stub_binder_got_addr, + }, buffer.writer()); + } + + for (0..self.stubs_table.count()) |index| { + const source_addr = stub_helper_header.addr + stubs.stubHelperPreambleSize(cpu_arch) + + stubs.stubHelperSize(cpu_arch) * index; + try stubs.writeStubHelperCode(.{ + .cpu_arch = cpu_arch, + .source_addr = source_addr, + .target_addr = stub_helper_header.addr, + }, buffer.writer()); + } + + log.debug("writing __TEXT,__stub_helper contents at file offset 0x{x}", .{ + stub_helper_header.offset, + }); + try self.file.pwriteAll(buffer.items, stub_helper_header.offset); + } + + fn writeLaSymbolPtrs(self: *Zld) !void { + const gpa = self.gpa; + const cpu_arch = self.options.target.cpu.arch; + const la_symbol_ptr_header = self.sections.items(.header)[self.la_symbol_ptr_section_index.?]; + const stub_helper_header = self.sections.items(.header)[self.stub_helper_section_index.?]; + + var buffer = try std.ArrayList(u8).initCapacity(gpa, la_symbol_ptr_header.size); + defer buffer.deinit(); + + for (0..self.stubs_table.count()) |index| { + const target_addr = stub_helper_header.addr + stubs.stubHelperPreambleSize(cpu_arch) + + stubs.stubHelperSize(cpu_arch) * index; + buffer.writer().writeIntLittle(u64, target_addr) catch unreachable; + } + + log.debug("writing __DATA,__la_symbol_ptr contents at file offset 0x{x}", .{ + la_symbol_ptr_header.offset, + }); + try self.file.pwriteAll(buffer.items, la_symbol_ptr_header.offset); + } + fn pruneAndSortSections(self: *Zld) !void { const Entry = struct { index: u8, @@ -1105,6 +1026,18 @@ pub const Zld = struct { section.header.sectName(), section.first_atom_index, }); + for (&[_]*?u8{ + &self.text_section_index, + &self.got_section_index, + &self.tlv_ptr_section_index, + &self.stubs_section_index, + &self.stub_helper_section_index, + &self.la_symbol_ptr_section_index, + }) |maybe_index| { + if (maybe_index.* != null and maybe_index.*.? == index) { + maybe_index.* = null; + } + } continue; } entries.appendAssumeCapacity(.{ .index = @intCast(index) }); @@ -1127,8 +1060,12 @@ pub const Zld = struct { } for (&[_]*?u8{ + &self.text_section_index, &self.got_section_index, &self.tlv_ptr_section_index, + &self.stubs_section_index, + &self.stub_helper_section_index, + &self.la_symbol_ptr_section_index, }) |maybe_index| { if (maybe_index.*) |*index| { index.* = backlinks[index.*]; @@ -1140,8 +1077,8 @@ pub const Zld = struct { const slice = self.sections.slice(); for (slice.items(.header), 0..) |*header, sect_id| { if (header.size == 0) continue; - if (self.requiresThunks()) { - if (header.isCode() and !(header.type() == macho.S_SYMBOL_STUBS) and !mem.eql(u8, header.sectName(), "__stub_helper")) continue; + if (self.text_section_index) |txt| { + if (txt == sect_id and self.requiresThunks()) continue; } var atom_index = slice.items(.first_atom_index)[sect_id] orelse continue; @@ -1167,15 +1104,9 @@ pub const Zld = struct { } } - if (self.requiresThunks()) { - for (slice.items(.header), 0..) |header, sect_id| { - if (!header.isCode()) continue; - if (header.type() == macho.S_SYMBOL_STUBS) continue; - if (mem.eql(u8, header.sectName(), "__stub_helper")) continue; - - // Create jump/branch range extenders if needed. - try thunks.createThunks(self, @as(u8, @intCast(sect_id))); - } + if (self.text_section_index != null and self.requiresThunks()) { + // Create jump/branch range extenders if needed. + try thunks.createThunks(self, self.text_section_index.?); } // Update offsets of all symbols contained within each Atom. @@ -1224,6 +1155,27 @@ pub const Zld = struct { header.size = self.tlv_ptr_table.count() * @sizeOf(u64); header.@"align" = 3; } + + const cpu_arch = self.options.target.cpu.arch; + + if (self.stubs_section_index) |sect_id| { + const header = &self.sections.items(.header)[sect_id]; + header.size = self.stubs_table.count() * stubs.stubSize(cpu_arch); + header.@"align" = stubs.stubAlignment(cpu_arch); + } + + if (self.stub_helper_section_index) |sect_id| { + const header = &self.sections.items(.header)[sect_id]; + header.size = self.stubs_table.count() * stubs.stubHelperSize(cpu_arch) + + stubs.stubHelperPreambleSize(cpu_arch); + header.@"align" = stubs.stubAlignment(cpu_arch); + } + + if (self.la_symbol_ptr_section_index) |sect_id| { + const header = &self.sections.items(.header)[sect_id]; + header.size = self.stubs_table.count() * @sizeOf(u64); + header.@"align" = 3; + } } fn allocateSegments(self: *Zld) !void { @@ -1453,36 +1405,13 @@ pub const Zld = struct { try MachO.collectRebaseDataFromTableSection(self.gpa, self, sect_id, rebase, self.got_table); } - const slice = self.sections.slice(); - - // Next, unpact lazy pointers - // TODO: save la_ptr in a container so that we can re-use the helper - if (self.getSectionByName("__DATA", "__la_symbol_ptr")) |sect_id| { - const segment_index = slice.items(.segment_index)[sect_id]; - const seg = self.getSegment(sect_id); - var atom_index = slice.items(.first_atom_index)[sect_id].?; - - try rebase.entries.ensureUnusedCapacity(self.gpa, self.stubs.items.len); - - while (true) { - const atom = self.getAtom(atom_index); - const sym = self.getSymbol(atom.getSymbolWithLoc()); - const base_offset = sym.n_value - seg.vmaddr; - - log.debug(" | rebase at {x}", .{base_offset}); - - rebase.entries.appendAssumeCapacity(.{ - .offset = base_offset, - .segment_id = segment_index, - }); - - if (atom.next_index) |next_index| { - atom_index = next_index; - } else break; - } + // Next, unpack __la_symbol_ptr entries + if (self.la_symbol_ptr_section_index) |sect_id| { + try MachO.collectRebaseDataFromTableSection(self.gpa, self, sect_id, rebase, self.stubs_table); } // Finally, unpack the rest. + const slice = self.sections.slice(); for (slice.items(.header), 0..) |header, sect_id| { switch (header.type()) { macho.S_LITERAL_POINTERS, @@ -1679,51 +1608,8 @@ pub const Zld = struct { } fn collectLazyBindData(self: *Zld, lazy_bind: *LazyBind) !void { - const sect_id = self.getSectionByName("__DATA", "__la_symbol_ptr") orelse return; - - log.debug("collecting lazy bind data", .{}); - - const slice = self.sections.slice(); - const segment_index = slice.items(.segment_index)[sect_id]; - const seg = self.getSegment(sect_id); - var atom_index = slice.items(.first_atom_index)[sect_id].?; - - // TODO: we actually don't need to store lazy pointer atoms as they are synthetically generated by the linker - try lazy_bind.entries.ensureUnusedCapacity(self.gpa, self.stubs.items.len); - - var count: u32 = 0; - while (true) : (count += 1) { - const atom = self.getAtom(atom_index); - - log.debug(" ATOM(%{d}, '{s}')", .{ atom.sym_index, self.getSymbolName(atom.getSymbolWithLoc()) }); - - const sym = self.getSymbol(atom.getSymbolWithLoc()); - const base_offset = sym.n_value - seg.vmaddr; - - const stub_entry = self.stubs.items[count]; - const bind_sym = stub_entry.getTargetSymbol(self); - const bind_sym_name = stub_entry.getTargetSymbolName(self); - const dylib_ordinal = @divTrunc(@as(i16, @bitCast(bind_sym.n_desc)), macho.N_SYMBOL_RESOLVER); - log.debug(" | lazy bind at {x}, import('{s}') in dylib({d})", .{ - base_offset, - bind_sym_name, - dylib_ordinal, - }); - if (bind_sym.weakRef()) { - log.debug(" | marking as weak ref ", .{}); - } - lazy_bind.entries.appendAssumeCapacity(.{ - .target = stub_entry.target, - .offset = base_offset, - .segment_id = segment_index, - .addend = 0, - }); - - if (atom.next_index) |next_index| { - atom_index = next_index; - } else break; - } - + const sect_id = self.la_symbol_ptr_section_index orelse return; + try MachO.collectBindDataFromTableSection(self.gpa, self, sect_id, lazy_bind, self.stubs_table); try lazy_bind.finalize(self.gpa, self); } @@ -1828,7 +1714,12 @@ pub const Zld = struct { }); try self.file.pwriteAll(buffer, rebase_off); - try self.populateLazyBindOffsetsInStubHelper(lazy_bind); + try MachO.populateLazyBindOffsetsInStubHelper( + self, + self.options.target.cpu.arch, + self.file, + lazy_bind, + ); self.dyld_info_cmd.rebase_off = @as(u32, @intCast(rebase_off)); self.dyld_info_cmd.rebase_size = @as(u32, @intCast(rebase_size_aligned)); @@ -1840,36 +1731,6 @@ pub const Zld = struct { self.dyld_info_cmd.export_size = @as(u32, @intCast(export_size_aligned)); } - fn populateLazyBindOffsetsInStubHelper(self: *Zld, lazy_bind: LazyBind) !void { - if (lazy_bind.size() == 0) return; - - const stub_helper_section_index = self.getSectionByName("__TEXT", "__stub_helper").?; - assert(self.stub_helper_preamble_sym_index != null); - - const section = self.sections.get(stub_helper_section_index); - const stub_offset = stub_helpers.calcStubOffsetInStubHelper(self.options.target.cpu.arch); - const header = section.header; - var atom_index = section.first_atom_index.?; - atom_index = self.getAtom(atom_index).next_index.?; // skip preamble - - var index: usize = 0; - while (true) { - const atom = self.getAtom(atom_index); - const atom_sym = self.getSymbol(atom.getSymbolWithLoc()); - const file_offset = header.offset + atom_sym.n_value - header.addr + stub_offset; - const bind_offset = lazy_bind.offsets.items[index]; - - log.debug("writing lazy bind offset 0x{x} in stub helper at 0x{x}", .{ bind_offset, file_offset }); - - try self.file.pwriteAll(mem.asBytes(&bind_offset), file_offset); - - if (atom.next_index) |next_index| { - atom_index = next_index; - index += 1; - } else break; - } - } - const asc_u64 = std.sort.asc(u64); fn addSymbolToFunctionStarts(self: *Zld, sym_loc: SymbolWithLoc, addresses: *std.ArrayList(u64)) !void { @@ -1973,7 +1834,7 @@ pub const Zld = struct { var out_dice = std.ArrayList(macho.data_in_code_entry).init(self.gpa); defer out_dice.deinit(); - const text_sect_id = self.getSectionByName("__TEXT", "__text") orelse return; + const text_sect_id = self.text_section_index orelse return; const text_sect_header = self.sections.items(.header)[text_sect_id]; for (self.objects.items) |object| { @@ -2171,7 +2032,7 @@ pub const Zld = struct { fn writeDysymtab(self: *Zld, ctx: SymtabCtx) !void { const gpa = self.gpa; - const nstubs = @as(u32, @intCast(self.stubs.items.len)); + const nstubs = @as(u32, @intCast(self.stubs_table.lookup.count())); const ngot_entries = @as(u32, @intCast(self.got_table.lookup.count())); const nindirectsyms = nstubs * 2 + ngot_entries; const iextdefsym = ctx.nlocalsym; @@ -2191,19 +2052,20 @@ pub const Zld = struct { try buf.ensureTotalCapacityPrecise(math.cast(usize, needed_size_aligned) orelse return error.Overflow); const writer = buf.writer(); - if (self.getSectionByName("__TEXT", "__stubs")) |sect_id| { - const stubs = &self.sections.items(.header)[sect_id]; - stubs.reserved1 = 0; - for (self.stubs.items) |entry| { - const target_sym = entry.getTargetSymbol(self); + if (self.stubs_section_index) |sect_id| { + const header = &self.sections.items(.header)[sect_id]; + header.reserved1 = 0; + for (self.stubs_table.entries.items) |entry| { + if (!self.stubs_table.lookup.contains(entry)) continue; + const target_sym = self.getSymbol(entry); assert(target_sym.undf()); - try writer.writeIntLittle(u32, iundefsym + ctx.imports_table.get(entry.target).?); + try writer.writeIntLittle(u32, iundefsym + ctx.imports_table.get(entry).?); } } if (self.got_section_index) |sect_id| { - const got = &self.sections.items(.header)[sect_id]; - got.reserved1 = nstubs; + const header = &self.sections.items(.header)[sect_id]; + header.reserved1 = nstubs; for (self.got_table.entries.items) |entry| { if (!self.got_table.lookup.contains(entry)) continue; const target_sym = self.getSymbol(entry); @@ -2215,13 +2077,14 @@ pub const Zld = struct { } } - if (self.getSectionByName("__DATA", "__la_symbol_ptr")) |sect_id| { - const la_symbol_ptr = &self.sections.items(.header)[sect_id]; - la_symbol_ptr.reserved1 = nstubs + ngot_entries; - for (self.stubs.items) |entry| { - const target_sym = entry.getTargetSymbol(self); + if (self.la_symbol_ptr_section_index) |sect_id| { + const header = &self.sections.items(.header)[sect_id]; + header.reserved1 = nstubs + ngot_entries; + for (self.stubs_table.entries.items) |entry| { + if (!self.stubs_table.lookup.contains(entry)) continue; + const target_sym = self.getSymbol(entry); assert(target_sym.undf()); - try writer.writeIntLittle(u32, iundefsym + ctx.imports_table.get(entry.target).?); + try writer.writeIntLittle(u32, iundefsym + ctx.imports_table.get(entry).?); } } @@ -2343,12 +2206,12 @@ pub const Zld = struct { return buf; } - pub fn getAtomPtr(self: *Zld, atom_index: AtomIndex) *Atom { + pub fn getAtomPtr(self: *Zld, atom_index: Atom.Index) *Atom { assert(atom_index < self.atoms.items.len); return &self.atoms.items[atom_index]; } - pub fn getAtom(self: Zld, atom_index: AtomIndex) Atom { + pub fn getAtom(self: Zld, atom_index: Atom.Index) Atom { assert(atom_index < self.atoms.items.len); return self.atoms.items[atom_index]; } @@ -2444,12 +2307,10 @@ pub const Zld = struct { return header.addr + @sizeOf(u64) * index; } - /// Returns stubs atom that references `sym_with_loc` if one exists. - /// Returns null otherwise. - pub fn getStubsAtomIndexForSymbol(self: *Zld, sym_with_loc: SymbolWithLoc) ?AtomIndex { - const index = self.stubs_table.get(sym_with_loc) orelse return null; - const entry = self.stubs.items[index]; - return entry.atom_index; + pub fn getStubsEntryAddress(self: *Zld, sym_with_loc: SymbolWithLoc) ?u64 { + const index = self.stubs_table.lookup.get(sym_with_loc) orelse return null; + const header = self.sections.items(.header)[self.stubs_section_index.?]; + return header.addr + stubs.stubSize(self.options.target.cpu.arch) * index; } /// Returns symbol location corresponding to the set entrypoint. @@ -2581,7 +2442,7 @@ pub const Zld = struct { fn generateSymbolStabsForSymbol( self: *Zld, - atom_index: AtomIndex, + atom_index: Atom.Index, sym_loc: SymbolWithLoc, lookup: ?DwarfInfo.SubprogramLookupByName, buf: *[4]macho.nlist_64, @@ -2787,30 +2648,26 @@ pub const Zld = struct { scoped_log.debug("{}", .{self.tlv_ptr_table}); scoped_log.debug("stubs entries:", .{}); - for (self.stubs.items, 0..) |entry, i| { - const atom_sym = entry.getAtomSymbol(self); - const target_sym = entry.getTargetSymbol(self); - const target_sym_name = entry.getTargetSymbolName(self); - assert(target_sym.undf()); - scoped_log.debug(" {d}@{x} => import('{s}')", .{ - i, - atom_sym.n_value, - target_sym_name, - }); - } + scoped_log.debug("{}", .{self.stubs_table}); scoped_log.debug("thunks:", .{}); for (self.thunks.items, 0..) |thunk, i| { scoped_log.debug(" thunk({d})", .{i}); - for (thunk.lookup.keys(), 0..) |target, j| { - const target_sym = self.getSymbol(target); - const atom = self.getAtom(thunk.lookup.get(target).?); + const slice = thunk.targets.slice(); + for (slice.items(.tag), slice.items(.target), 0..) |tag, target, j| { + const atom_index = @as(u32, @intCast(thunk.getStartAtomIndex() + j)); + const atom = self.getAtom(atom_index); const atom_sym = self.getSymbol(atom.getSymbolWithLoc()); - scoped_log.debug(" {d}@{x} => thunk('{s}'@{x})", .{ + const target_addr = switch (tag) { + .stub => self.getStubsEntryAddress(target).?, + .atom => self.getSymbol(target).n_value, + }; + scoped_log.debug(" {d}@{x} => {s}({s}@{x})", .{ j, atom_sym.n_value, + @tagName(tag), self.getSymbolName(target), - target_sym.n_value, + target_addr, }); } } @@ -2836,7 +2693,7 @@ pub const Zld = struct { } } - pub fn logAtom(self: *Zld, atom_index: AtomIndex, logger: anytype) void { + pub fn logAtom(self: *Zld, atom_index: Atom.Index, logger: anytype) void { if (!build_options.enable_logging) return; const atom = self.getAtom(atom_index); @@ -2885,11 +2742,9 @@ pub const Zld = struct { pub const N_DEAD: u16 = @as(u16, @bitCast(@as(i16, -1))); -pub const AtomIndex = u32; - const IndirectPointer = struct { target: SymbolWithLoc, - atom_index: AtomIndex, + atom_index: Atom.Index, pub fn getTargetSymbol(self: @This(), zld: *Zld) macho.nlist_64 { return zld.getSymbol(self.target); @@ -3321,7 +3176,6 @@ pub fn linkWithZld(macho_file: *MachO, comp: *Compilation, prog_node: *std.Progr try zld.createDyldPrivateAtom(); try zld.createTentativeDefAtoms(); - try zld.createStubHelperPreambleAtom(); if (zld.options.output_mode == .Exe) { const global = zld.getEntryPoint(); @@ -3329,7 +3183,7 @@ pub fn linkWithZld(macho_file: *MachO, comp: *Compilation, prog_node: *std.Progr // We do one additional check here in case the entry point was found in one of the dylibs. // (I actually have no idea what this would imply but it is a possible outcome and so we // support it.) - try Atom.addStub(&zld, global); + try zld.addStubEntry(global); } } @@ -3373,7 +3227,14 @@ pub fn linkWithZld(macho_file: *MachO, comp: *Compilation, prog_node: *std.Progr } try zld.writeAtoms(); + if (zld.requiresThunks()) try zld.writeThunks(); + try zld.writeDyldPrivateAtom(); + if (zld.stubs_section_index) |_| { + try zld.writeStubs(); + try zld.writeStubHelpers(); + try zld.writeLaSymbolPtrs(); + } if (zld.got_section_index) |sect_id| try zld.writePointerEntries(sect_id, &zld.got_table); if (zld.tlv_ptr_section_index) |sect_id| try zld.writePointerEntries(sect_id, &zld.tlv_ptr_table); @@ -3444,14 +3305,12 @@ pub fn linkWithZld(macho_file: *MachO, comp: *Compilation, prog_node: *std.Progr const global = zld.getEntryPoint(); const sym = zld.getSymbol(global); - const addr: u64 = if (sym.undf()) blk: { + const addr: u64 = if (sym.undf()) // In this case, the symbol has been resolved in one of dylibs and so we point // to the stub as its vmaddr value. - const stub_atom_index = zld.getStubsAtomIndexForSymbol(global).?; - const stub_atom = zld.getAtom(stub_atom_index); - const stub_sym = zld.getSymbol(stub_atom.getSymbolWithLoc()); - break :blk stub_sym.n_value; - } else sym.n_value; + zld.getStubsEntryAddress(global).? + else + sym.n_value; try lc_writer.writeStruct(macho.entry_point_command{ .entryoff = @as(u32, @intCast(addr - seg.vmaddr)), From bf5c35145da5cdaa9290d000728c0b8f307d89df Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Fri, 25 Aug 2023 06:47:41 +0200 Subject: [PATCH 14/57] macho: remove dead code --- src/link/MachO.zig | 44 +++++++++++++++++ src/link/MachO/Atom.zig | 2 +- src/link/MachO/UnwindInfo.zig | 6 +-- src/link/MachO/dead_strip.zig | 8 ++-- src/link/MachO/zld.zig | 89 +++++------------------------------ 5 files changed, 63 insertions(+), 86 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 9164d9aae6..a181338f95 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -57,6 +57,7 @@ const LazyBind = @import("MachO/dyld_info/bind.zig").LazyBind(*const MachO, Symb const Rebase = @import("MachO/dyld_info/Rebase.zig"); pub const base_tag: File.Tag = File.Tag.macho; +pub const N_DEAD: u16 = @as(u16, @bitCast(@as(i16, -1))); /// Mode of operation of the linker. pub const Mode = enum { @@ -4053,6 +4054,49 @@ pub inline fn getPageSize(cpu_arch: std.Target.Cpu.Arch) u16 { }; } +pub fn getSegmentPrecedence(segname: []const u8) u4 { + if (mem.eql(u8, segname, "__PAGEZERO")) return 0x0; + if (mem.eql(u8, segname, "__TEXT")) return 0x1; + if (mem.eql(u8, segname, "__DATA_CONST")) return 0x2; + if (mem.eql(u8, segname, "__DATA")) return 0x3; + if (mem.eql(u8, segname, "__LINKEDIT")) return 0x5; + return 0x4; +} + +pub fn getSegmentMemoryProtection(segname: []const u8) macho.vm_prot_t { + if (mem.eql(u8, segname, "__PAGEZERO")) return macho.PROT.NONE; + if (mem.eql(u8, segname, "__TEXT")) return macho.PROT.READ | macho.PROT.EXEC; + if (mem.eql(u8, segname, "__LINKEDIT")) return macho.PROT.READ; + return macho.PROT.READ | macho.PROT.WRITE; +} + +pub fn getSectionPrecedence(header: macho.section_64) u8 { + const segment_precedence: u4 = getSegmentPrecedence(header.segName()); + const section_precedence: u4 = blk: { + if (header.isCode()) { + if (mem.eql(u8, "__text", header.sectName())) break :blk 0x0; + if (header.type() == macho.S_SYMBOL_STUBS) break :blk 0x1; + break :blk 0x2; + } + switch (header.type()) { + macho.S_NON_LAZY_SYMBOL_POINTERS, + macho.S_LAZY_SYMBOL_POINTERS, + => break :blk 0x0, + macho.S_MOD_INIT_FUNC_POINTERS => break :blk 0x1, + macho.S_MOD_TERM_FUNC_POINTERS => break :blk 0x2, + macho.S_ZEROFILL => break :blk 0xf, + macho.S_THREAD_LOCAL_REGULAR => break :blk 0xd, + macho.S_THREAD_LOCAL_ZEROFILL => break :blk 0xe, + else => { + if (mem.eql(u8, "__unwind_info", header.sectName())) break :blk 0xe; + if (mem.eql(u8, "__eh_frame", header.sectName())) break :blk 0xf; + break :blk 0x3; + }, + } + }; + return (@as(u8, @intCast(segment_precedence)) << 4) + section_precedence; +} + pub fn reportUndefined(self: *MachO, ctx: anytype, resolver: *const SymbolResolver) !void { const count = resolver.unresolved.count(); if (count == 0) return; diff --git a/src/link/MachO/Atom.zig b/src/link/MachO/Atom.zig index 5e1d6bd4cc..4095a1c333 100644 --- a/src/link/MachO/Atom.zig +++ b/src/link/MachO/Atom.zig @@ -503,7 +503,7 @@ pub fn getRelocTargetAddress(zld: *Zld, target: SymbolWithLoc, is_tlv: bool) !u6 }); const target_sym = zld.getSymbol(target_atom.getSymbolWithLoc()); - assert(target_sym.n_desc != @import("zld.zig").N_DEAD); + assert(target_sym.n_desc != MachO.N_DEAD); // If `target` is contained within the target atom, pull its address value. const offset = if (target_atom.getFile() != null) blk: { diff --git a/src/link/MachO/UnwindInfo.zig b/src/link/MachO/UnwindInfo.zig index 07a6b49b77..53d7c149be 100644 --- a/src/link/MachO/UnwindInfo.zig +++ b/src/link/MachO/UnwindInfo.zig @@ -20,8 +20,6 @@ const Object = @import("Object.zig"); const SymbolWithLoc = MachO.SymbolWithLoc; const Zld = @import("zld.zig").Zld; -const N_DEAD = @import("zld.zig").N_DEAD; - gpa: Allocator, /// List of all unwind records gathered from all objects and sorted @@ -301,7 +299,7 @@ pub fn collect(info: *UnwindInfo, zld: *Zld) !void { break :blk record; } else blk: { const sym = zld.getSymbol(symbol); - if (sym.n_desc == N_DEAD) continue; + if (sym.n_desc == MachO.N_DEAD) continue; if (prev_symbol) |prev_sym| { const prev_addr = object.getSourceSymbol(prev_sym.sym_index).?.n_value; const curr_addr = object.getSourceSymbol(symbol.sym_index).?.n_value; @@ -327,7 +325,7 @@ pub fn collect(info: *UnwindInfo, zld: *Zld) !void { const atom = zld.getAtom(atom_index); const sym = zld.getSymbol(symbol); - assert(sym.n_desc != N_DEAD); + assert(sym.n_desc != MachO.N_DEAD); const size = if (inner_syms_it.next()) |next_sym| blk: { // All this trouble to account for symbol aliases. // TODO I think that remodelling the linker so that a Symbol references an Atom diff --git a/src/link/MachO/dead_strip.zig b/src/link/MachO/dead_strip.zig index 03e63f115a..f79b3c6184 100644 --- a/src/link/MachO/dead_strip.zig +++ b/src/link/MachO/dead_strip.zig @@ -16,8 +16,6 @@ const SymbolResolver = MachO.SymbolResolver; const UnwindInfo = @import("UnwindInfo.zig"); const Zld = @import("zld.zig").Zld; -const N_DEAD = @import("zld.zig").N_DEAD; - const AtomTable = std.AutoHashMap(Atom.Index, void); pub fn gcAtoms(zld: *Zld, resolver: *const SymbolResolver) !void { @@ -473,17 +471,17 @@ fn prune(zld: *Zld, alive: AtomTable) void { zld.sections.set(sect_id, section); _ = object.atoms.swapRemove(i); - sym.n_desc = N_DEAD; + sym.n_desc = MachO.N_DEAD; var inner_sym_it = Atom.getInnerSymbolsIterator(zld, atom_index); while (inner_sym_it.next()) |inner| { const inner_sym = zld.getSymbolPtr(inner); - inner_sym.n_desc = N_DEAD; + inner_sym.n_desc = MachO.N_DEAD; } if (Atom.getSectionAlias(zld, atom_index)) |alias| { const alias_sym = zld.getSymbolPtr(alias); - alias_sym.n_desc = N_DEAD; + alias_sym.n_desc = MachO.N_DEAD; } } } diff --git a/src/link/MachO/zld.zig b/src/link/MachO/zld.zig index e5815bf9d3..60bc1ec67f 100644 --- a/src/link/MachO/zld.zig +++ b/src/link/MachO/zld.zig @@ -290,7 +290,7 @@ pub const Zld = struct { for (self.globals.items) |global| { const sym = self.getSymbolPtr(global); if (!sym.tentative()) continue; - if (sym.n_desc == N_DEAD) continue; + if (sym.n_desc == MachO.N_DEAD) continue; log.debug("creating tentative definition for ATOM(%{d}, '{s}') in object({?})", .{ global.sym_index, self.getSymbolName(global), global.file, @@ -688,7 +688,7 @@ pub const Zld = struct { // __TEXT segment is non-optional { - const protection = getSegmentMemoryProtection("__TEXT"); + const protection = MachO.getSegmentMemoryProtection("__TEXT"); try self.segments.append(self.gpa, .{ .cmdsize = @sizeOf(macho.segment_command_64), .segname = makeStaticString("__TEXT"), @@ -704,7 +704,7 @@ pub const Zld = struct { const segment_id = self.getSegmentByName(segname) orelse blk: { log.debug("creating segment '{s}'", .{segname}); const segment_id = @as(u8, @intCast(self.segments.items.len)); - const protection = getSegmentMemoryProtection(segname); + const protection = MachO.getSegmentMemoryProtection(segname); try self.segments.append(self.gpa, .{ .cmdsize = @sizeOf(macho.segment_command_64), .segname = makeStaticString(segname), @@ -721,7 +721,7 @@ pub const Zld = struct { // __LINKEDIT always comes last { - const protection = getSegmentMemoryProtection("__LINKEDIT"); + const protection = MachO.getSegmentMemoryProtection("__LINKEDIT"); try self.segments.append(self.gpa, .{ .cmdsize = @sizeOf(macho.segment_command_64), .segname = makeStaticString("__LINKEDIT"), @@ -1009,7 +1009,7 @@ pub const Zld = struct { pub fn lessThan(zld: *Zld, lhs: @This(), rhs: @This()) bool { const lhs_header = zld.sections.items(.header)[lhs.index]; const rhs_header = zld.sections.items(.header)[rhs.index]; - return getSectionPrecedence(lhs_header) < getSectionPrecedence(rhs_header); + return MachO.getSectionPrecedence(lhs_header) < MachO.getSectionPrecedence(rhs_header); } }; @@ -1317,49 +1317,6 @@ pub const Zld = struct { return index; } - fn getSegmentPrecedence(segname: []const u8) u4 { - if (mem.eql(u8, segname, "__PAGEZERO")) return 0x0; - if (mem.eql(u8, segname, "__TEXT")) return 0x1; - if (mem.eql(u8, segname, "__DATA_CONST")) return 0x2; - if (mem.eql(u8, segname, "__DATA")) return 0x3; - if (mem.eql(u8, segname, "__LINKEDIT")) return 0x5; - return 0x4; - } - - fn getSegmentMemoryProtection(segname: []const u8) macho.vm_prot_t { - if (mem.eql(u8, segname, "__PAGEZERO")) return macho.PROT.NONE; - if (mem.eql(u8, segname, "__TEXT")) return macho.PROT.READ | macho.PROT.EXEC; - if (mem.eql(u8, segname, "__LINKEDIT")) return macho.PROT.READ; - return macho.PROT.READ | macho.PROT.WRITE; - } - - fn getSectionPrecedence(header: macho.section_64) u8 { - const segment_precedence: u4 = getSegmentPrecedence(header.segName()); - const section_precedence: u4 = blk: { - if (header.isCode()) { - if (mem.eql(u8, "__text", header.sectName())) break :blk 0x0; - if (header.type() == macho.S_SYMBOL_STUBS) break :blk 0x1; - break :blk 0x2; - } - switch (header.type()) { - macho.S_NON_LAZY_SYMBOL_POINTERS, - macho.S_LAZY_SYMBOL_POINTERS, - => break :blk 0x0, - macho.S_MOD_INIT_FUNC_POINTERS => break :blk 0x1, - macho.S_MOD_TERM_FUNC_POINTERS => break :blk 0x2, - macho.S_ZEROFILL => break :blk 0xf, - macho.S_THREAD_LOCAL_REGULAR => break :blk 0xd, - macho.S_THREAD_LOCAL_ZEROFILL => break :blk 0xe, - else => { - if (mem.eql(u8, "__unwind_info", header.sectName())) break :blk 0xe; - if (mem.eql(u8, "__eh_frame", header.sectName())) break :blk 0xf; - break :blk 0x3; - }, - } - }; - return (@as(u8, @intCast(segment_precedence)) << 4) + section_precedence; - } - fn writeSegmentHeaders(self: *Zld, writer: anytype) !void { for (self.segments.items, 0..) |seg, i| { const indexes = self.getSectionIndexes(@as(u8, @intCast(i))); @@ -1626,7 +1583,7 @@ pub const Zld = struct { for (self.globals.items) |global| { const sym = self.getSymbol(global); if (sym.undf()) continue; - if (sym.n_desc == N_DEAD) continue; + if (sym.n_desc == MachO.N_DEAD) continue; const sym_name = self.getSymbolName(global); log.debug(" (putting '{s}' defined at 0x{x})", .{ sym_name, sym.n_value }); @@ -1736,7 +1693,7 @@ pub const Zld = struct { fn addSymbolToFunctionStarts(self: *Zld, sym_loc: SymbolWithLoc, addresses: *std.ArrayList(u64)) !void { const sym = self.getSymbol(sym_loc); if (sym.n_strx == 0) return; - if (sym.n_desc == N_DEAD) return; + if (sym.n_desc == MachO.N_DEAD) return; if (self.symbolIsTemp(sym_loc)) return; try addresses.append(sym.n_value); } @@ -1845,7 +1802,7 @@ pub const Zld = struct { for (object.exec_atoms.items) |atom_index| { const atom = self.getAtom(atom_index); const sym = self.getSymbol(atom.getSymbolWithLoc()); - if (sym.n_desc == N_DEAD) continue; + if (sym.n_desc == MachO.N_DEAD) continue; const source_addr = if (object.getSourceSymbol(atom.sym_index)) |source_sym| source_sym.n_value @@ -1903,7 +1860,7 @@ pub const Zld = struct { fn addLocalToSymtab(self: *Zld, sym_loc: SymbolWithLoc, locals: *std.ArrayList(macho.nlist_64)) !void { const sym = self.getSymbol(sym_loc); if (sym.n_strx == 0) return; // no name, skip - if (sym.n_desc == N_DEAD) return; // garbage-collected, skip + if (sym.n_desc == MachO.N_DEAD) return; // garbage-collected, skip if (sym.ext()) return; // an export lands in its own symtab section, skip if (self.symbolIsTemp(sym_loc)) return; // local temp symbol, skip @@ -1937,7 +1894,7 @@ pub const Zld = struct { for (self.globals.items) |global| { const sym = self.getSymbol(global); if (sym.undf()) continue; // import, skip - if (sym.n_desc == N_DEAD) continue; + if (sym.n_desc == MachO.N_DEAD) continue; var out_sym = sym; out_sym.n_strx = try self.strtab.insert(gpa, self.getSymbolName(global)); @@ -1952,7 +1909,7 @@ pub const Zld = struct { for (self.globals.items) |global| { const sym = self.getSymbol(global); if (!sym.undf()) continue; // not an import, skip - if (sym.n_desc == N_DEAD) continue; + if (sym.n_desc == MachO.N_DEAD) continue; const new_index = @as(u32, @intCast(imports.items.len)); var out_sym = sym; @@ -2615,7 +2572,7 @@ pub const Zld = struct { for (self.globals.items, 0..) |global, i| { const sym = self.getSymbol(global); if (sym.undf()) continue; - if (sym.n_desc == N_DEAD) continue; + if (sym.n_desc == MachO.N_DEAD) continue; scoped_log.debug(" %{d}: {s} @{x} in sect({d}), {s} (def in object({?}))", .{ i, self.getSymbolName(global), @@ -2630,7 +2587,7 @@ pub const Zld = struct { for (self.globals.items, 0..) |global, i| { const sym = self.getSymbol(global); if (!sym.undf()) continue; - if (sym.n_desc == N_DEAD) continue; + if (sym.n_desc == MachO.N_DEAD) continue; const ord = @divTrunc(sym.n_desc, macho.N_SYMBOL_RESOLVER); scoped_log.debug(" %{d}: {s} @{x} in ord({d}), {s}", .{ i, @@ -2740,26 +2697,6 @@ pub const Zld = struct { } }; -pub const N_DEAD: u16 = @as(u16, @bitCast(@as(i16, -1))); - -const IndirectPointer = struct { - target: SymbolWithLoc, - atom_index: Atom.Index, - - pub fn getTargetSymbol(self: @This(), zld: *Zld) macho.nlist_64 { - return zld.getSymbol(self.target); - } - - pub fn getTargetSymbolName(self: @This(), zld: *Zld) []const u8 { - return zld.getSymbolName(self.target); - } - - pub fn getAtomSymbol(self: @This(), zld: *Zld) macho.nlist_64 { - const atom = zld.getAtom(self.atom_index); - return zld.getSymbol(atom.getSymbolWithLoc()); - } -}; - pub fn linkWithZld(macho_file: *MachO, comp: *Compilation, prog_node: *std.Progress.Node) link.File.FlushError!void { const tracy = trace(@src()); defer tracy.end(); From 7c1135555652311fcd069e15e99ecd37e21360be Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Fri, 25 Aug 2023 07:09:07 +0200 Subject: [PATCH 15/57] macho: collect rebase data by scanning atoms directly in objects --- src/link/MachO/zld.zig | 134 ++++++++++++++++++----------------------- 1 file changed, 60 insertions(+), 74 deletions(-) diff --git a/src/link/MachO/zld.zig b/src/link/MachO/zld.zig index 60bc1ec67f..862d89783d 100644 --- a/src/link/MachO/zld.zig +++ b/src/link/MachO/zld.zig @@ -1368,85 +1368,71 @@ pub const Zld = struct { } // Finally, unpack the rest. - const slice = self.sections.slice(); - for (slice.items(.header), 0..) |header, sect_id| { - switch (header.type()) { - macho.S_LITERAL_POINTERS, - macho.S_REGULAR, - macho.S_MOD_INIT_FUNC_POINTERS, - macho.S_MOD_TERM_FUNC_POINTERS, - => {}, - else => continue, - } - - const segment_index = slice.items(.segment_index)[sect_id]; - const segment = self.getSegment(@as(u8, @intCast(sect_id))); - if (segment.maxprot & macho.PROT.WRITE == 0) continue; - - log.debug("{s},{s}", .{ header.segName(), header.sectName() }); - - const cpu_arch = self.options.target.cpu.arch; - var atom_index = slice.items(.first_atom_index)[sect_id] orelse continue; - - while (true) { + const cpu_arch = self.options.target.cpu.arch; + for (self.objects.items) |*object| { + for (object.atoms.items) |atom_index| { const atom = self.getAtom(atom_index); const sym = self.getSymbol(atom.getSymbolWithLoc()); + if (sym.n_desc == MachO.N_DEAD) continue; - const should_rebase = blk: { - if (atom_index == self.dyld_private_atom_index.?) break :blk false; - break :blk !sym.undf(); - }; - - if (should_rebase) { - log.debug(" ATOM({d}, %{d}, '{s}')", .{ - atom_index, - atom.sym_index, - self.getSymbolName(atom.getSymbolWithLoc()), - }); - - const code = Atom.getAtomCode(self, atom_index); - const relocs = Atom.getAtomRelocs(self, atom_index); - const ctx = Atom.getRelocContext(self, atom_index); - - for (relocs) |rel| { - switch (cpu_arch) { - .aarch64 => { - const rel_type = @as(macho.reloc_type_arm64, @enumFromInt(rel.r_type)); - if (rel_type != .ARM64_RELOC_UNSIGNED) continue; - if (rel.r_length != 3) continue; - }, - .x86_64 => { - const rel_type = @as(macho.reloc_type_x86_64, @enumFromInt(rel.r_type)); - if (rel_type != .X86_64_RELOC_UNSIGNED) continue; - if (rel.r_length != 3) continue; - }, - else => unreachable, - } - const target = Atom.parseRelocTarget(self, .{ - .object_id = atom.getFile().?, - .rel = rel, - .code = code, - .base_offset = ctx.base_offset, - .base_addr = ctx.base_addr, - }); - const target_sym = self.getSymbol(target); - if (target_sym.undf()) continue; - - const base_offset = @as(i32, @intCast(sym.n_value - segment.vmaddr)); - const rel_offset = rel.r_address - ctx.base_offset; - const offset = @as(u64, @intCast(base_offset + rel_offset)); - log.debug(" | rebase at {x}", .{offset}); - - try rebase.entries.append(self.gpa, .{ - .offset = offset, - .segment_id = segment_index, - }); - } + const sect_id = sym.n_sect - 1; + const section = self.sections.items(.header)[sect_id]; + const segment_id = self.sections.items(.segment_index)[sect_id]; + const segment = self.segments.items[segment_id]; + if (segment.maxprot & macho.PROT.WRITE == 0) continue; + switch (section.type()) { + macho.S_LITERAL_POINTERS, + macho.S_REGULAR, + macho.S_MOD_INIT_FUNC_POINTERS, + macho.S_MOD_TERM_FUNC_POINTERS, + => {}, + else => continue, } - if (atom.next_index) |next_index| { - atom_index = next_index; - } else break; + log.debug(" ATOM({d}, %{d}, '{s}')", .{ + atom_index, + atom.sym_index, + self.getSymbolName(atom.getSymbolWithLoc()), + }); + + const code = Atom.getAtomCode(self, atom_index); + const relocs = Atom.getAtomRelocs(self, atom_index); + const ctx = Atom.getRelocContext(self, atom_index); + + for (relocs) |rel| { + switch (cpu_arch) { + .aarch64 => { + const rel_type = @as(macho.reloc_type_arm64, @enumFromInt(rel.r_type)); + if (rel_type != .ARM64_RELOC_UNSIGNED) continue; + if (rel.r_length != 3) continue; + }, + .x86_64 => { + const rel_type = @as(macho.reloc_type_x86_64, @enumFromInt(rel.r_type)); + if (rel_type != .X86_64_RELOC_UNSIGNED) continue; + if (rel.r_length != 3) continue; + }, + else => unreachable, + } + const target = Atom.parseRelocTarget(self, .{ + .object_id = atom.getFile().?, + .rel = rel, + .code = code, + .base_offset = ctx.base_offset, + .base_addr = ctx.base_addr, + }); + const target_sym = self.getSymbol(target); + if (target_sym.undf()) continue; + + const base_offset = @as(i32, @intCast(sym.n_value - segment.vmaddr)); + const rel_offset = rel.r_address - ctx.base_offset; + const offset = @as(u64, @intCast(base_offset + rel_offset)); + log.debug(" | rebase at {x}", .{offset}); + + try rebase.entries.append(self.gpa, .{ + .offset = offset, + .segment_id = segment_id, + }); + } } } From e9ad9e04c988cbc5881fe75855bf4cfed16cb716 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Fri, 25 Aug 2023 07:22:27 +0200 Subject: [PATCH 16/57] macho: collect bind data by scanning atoms directly in objects --- src/link/MachO/zld.zig | 159 +++++++++++++++++++---------------------- 1 file changed, 75 insertions(+), 84 deletions(-) diff --git a/src/link/MachO/zld.zig b/src/link/MachO/zld.zig index 862d89783d..c86a7221fc 100644 --- a/src/link/MachO/zld.zig +++ b/src/link/MachO/zld.zig @@ -1456,94 +1456,85 @@ pub const Zld = struct { } // Finally, unpack the rest. - const slice = self.sections.slice(); - for (slice.items(.header), 0..) |header, sect_id| { - switch (header.type()) { - macho.S_LITERAL_POINTERS, - macho.S_REGULAR, - macho.S_MOD_INIT_FUNC_POINTERS, - macho.S_MOD_TERM_FUNC_POINTERS, - => {}, - else => continue, - } - - const segment_index = slice.items(.segment_index)[sect_id]; - const segment = self.getSegment(@as(u8, @intCast(sect_id))); - if (segment.maxprot & macho.PROT.WRITE == 0) continue; - - const cpu_arch = self.options.target.cpu.arch; - var atom_index = slice.items(.first_atom_index)[sect_id] orelse continue; - - log.debug("{s},{s}", .{ header.segName(), header.sectName() }); - - while (true) { + const cpu_arch = self.options.target.cpu.arch; + for (self.objects.items) |*object| { + for (object.atoms.items) |atom_index| { const atom = self.getAtom(atom_index); const sym = self.getSymbol(atom.getSymbolWithLoc()); + if (sym.n_desc == MachO.N_DEAD) continue; - log.debug(" ATOM({d}, %{d}, '{s}')", .{ atom_index, atom.sym_index, self.getSymbolName(atom.getSymbolWithLoc()) }); - - const should_bind = blk: { - if (atom_index == self.dyld_private_atom_index.?) break :blk false; - break :blk true; - }; - - if (should_bind) { - const code = Atom.getAtomCode(self, atom_index); - const relocs = Atom.getAtomRelocs(self, atom_index); - const ctx = Atom.getRelocContext(self, atom_index); - - for (relocs) |rel| { - switch (cpu_arch) { - .aarch64 => { - const rel_type = @as(macho.reloc_type_arm64, @enumFromInt(rel.r_type)); - if (rel_type != .ARM64_RELOC_UNSIGNED) continue; - if (rel.r_length != 3) continue; - }, - .x86_64 => { - const rel_type = @as(macho.reloc_type_x86_64, @enumFromInt(rel.r_type)); - if (rel_type != .X86_64_RELOC_UNSIGNED) continue; - if (rel.r_length != 3) continue; - }, - else => unreachable, - } - - const global = Atom.parseRelocTarget(self, .{ - .object_id = atom.getFile().?, - .rel = rel, - .code = code, - .base_offset = ctx.base_offset, - .base_addr = ctx.base_addr, - }); - const bind_sym_name = self.getSymbolName(global); - const bind_sym = self.getSymbol(global); - if (!bind_sym.undf()) continue; - - const base_offset = sym.n_value - segment.vmaddr; - const rel_offset = @as(u32, @intCast(rel.r_address - ctx.base_offset)); - const offset = @as(u64, @intCast(base_offset + rel_offset)); - const addend = mem.readIntLittle(i64, code[rel_offset..][0..8]); - - const dylib_ordinal = @divTrunc(@as(i16, @bitCast(bind_sym.n_desc)), macho.N_SYMBOL_RESOLVER); - log.debug(" | bind at {x}, import('{s}') in dylib({d})", .{ - base_offset, - bind_sym_name, - dylib_ordinal, - }); - log.debug(" | with addend {x}", .{addend}); - if (bind_sym.weakRef()) { - log.debug(" | marking as weak ref ", .{}); - } - try bind.entries.append(self.gpa, .{ - .target = global, - .offset = offset, - .segment_id = segment_index, - .addend = addend, - }); - } + const sect_id = sym.n_sect - 1; + const section = self.sections.items(.header)[sect_id]; + const segment_id = self.sections.items(.segment_index)[sect_id]; + const segment = self.segments.items[segment_id]; + if (segment.maxprot & macho.PROT.WRITE == 0) continue; + switch (section.type()) { + macho.S_LITERAL_POINTERS, + macho.S_REGULAR, + macho.S_MOD_INIT_FUNC_POINTERS, + macho.S_MOD_TERM_FUNC_POINTERS, + => {}, + else => continue, + } + + log.debug(" ATOM({d}, %{d}, '{s}')", .{ + atom_index, + atom.sym_index, + self.getSymbolName(atom.getSymbolWithLoc()), + }); + + const code = Atom.getAtomCode(self, atom_index); + const relocs = Atom.getAtomRelocs(self, atom_index); + const ctx = Atom.getRelocContext(self, atom_index); + + for (relocs) |rel| { + switch (cpu_arch) { + .aarch64 => { + const rel_type = @as(macho.reloc_type_arm64, @enumFromInt(rel.r_type)); + if (rel_type != .ARM64_RELOC_UNSIGNED) continue; + if (rel.r_length != 3) continue; + }, + .x86_64 => { + const rel_type = @as(macho.reloc_type_x86_64, @enumFromInt(rel.r_type)); + if (rel_type != .X86_64_RELOC_UNSIGNED) continue; + if (rel.r_length != 3) continue; + }, + else => unreachable, + } + + const global = Atom.parseRelocTarget(self, .{ + .object_id = atom.getFile().?, + .rel = rel, + .code = code, + .base_offset = ctx.base_offset, + .base_addr = ctx.base_addr, + }); + const bind_sym_name = self.getSymbolName(global); + const bind_sym = self.getSymbol(global); + if (!bind_sym.undf()) continue; + + const base_offset = sym.n_value - segment.vmaddr; + const rel_offset = @as(u32, @intCast(rel.r_address - ctx.base_offset)); + const offset = @as(u64, @intCast(base_offset + rel_offset)); + const addend = mem.readIntLittle(i64, code[rel_offset..][0..8]); + + const dylib_ordinal = @divTrunc(@as(i16, @bitCast(bind_sym.n_desc)), macho.N_SYMBOL_RESOLVER); + log.debug(" | bind at {x}, import('{s}') in dylib({d})", .{ + base_offset, + bind_sym_name, + dylib_ordinal, + }); + log.debug(" | with addend {x}", .{addend}); + if (bind_sym.weakRef()) { + log.debug(" | marking as weak ref ", .{}); + } + try bind.entries.append(self.gpa, .{ + .target = global, + .offset = offset, + .segment_id = segment_id, + .addend = addend, + }); } - if (atom.next_index) |next_index| { - atom_index = next_index; - } else break; } } From e54f286c3b78d1848acf08a7b7390cac659de641 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Fri, 25 Aug 2023 07:33:27 +0200 Subject: [PATCH 17/57] macho: assert every global is extern in incremental context --- src/link/MachO.zig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index a181338f95..dd301f8281 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -3380,7 +3380,7 @@ fn collectExportData(self: *MachO, trie: *Trie) !void { const sym = self.getSymbol(global); if (sym.undf()) continue; - if (!sym.ext()) continue; + assert(sym.ext()); const sym_name = self.getSymbolName(global); log.debug(" (putting '{s}' defined at 0x{x})", .{ sym_name, sym.n_value }); From 19afd794d08bfbebc57ee1f0cb8ddfb8e5601cdd Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Fri, 25 Aug 2023 07:38:28 +0200 Subject: [PATCH 18/57] macho: remove dead code --- src/link/MachO.zig | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index dd301f8281..cee5324ac3 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -788,11 +788,6 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No try d_sym.flushModule(self); } - // if (build_options.enable_link_snapshots) { - // if (self.base.options.enable_link_snapshots) - // try self.snapshotState(); - // } - if (cache_miss) { // Update the file with the digest. If it fails we can continue; it only // means that the next invocation will have an unnecessary cache miss. From b2af2dc8b751e3a784b10f67314303b4e6514bdc Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Fri, 25 Aug 2023 08:45:00 +0200 Subject: [PATCH 19/57] macho: move symbol resolver into zld driver's state --- src/link/MachO.zig | 12 ++--- src/link/MachO/dead_strip.zig | 9 ++-- src/link/MachO/zld.zig | 96 +++++++++++++++++------------------ 3 files changed, 54 insertions(+), 63 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index cee5324ac3..412a8ab0e4 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -297,12 +297,6 @@ pub const SymbolWithLoc = extern struct { } }; -pub const SymbolResolver = struct { - arena: Allocator, - table: std.StringHashMap(u32), - unresolved: std.AutoArrayHashMap(u32, void), -}; - const HotUpdateState = struct { mach_task: ?std.os.darwin.MachTask = null, }; @@ -4092,15 +4086,15 @@ pub fn getSectionPrecedence(header: macho.section_64) u8 { return (@as(u8, @intCast(segment_precedence)) << 4) + section_precedence; } -pub fn reportUndefined(self: *MachO, ctx: anytype, resolver: *const SymbolResolver) !void { - const count = resolver.unresolved.count(); +pub fn reportUndefined(self: *MachO, ctx: anytype) !void { + const count = ctx.unresolved.count(); if (count == 0) return; const gpa = self.base.allocator; try self.misc_errors.ensureUnusedCapacity(gpa, count); - for (resolver.unresolved.keys()) |global_index| { + for (ctx.unresolved.keys()) |global_index| { const global = ctx.globals.items[global_index]; const sym_name = ctx.getSymbolName(global); diff --git a/src/link/MachO/dead_strip.zig b/src/link/MachO/dead_strip.zig index f79b3c6184..42a2e0cbd8 100644 --- a/src/link/MachO/dead_strip.zig +++ b/src/link/MachO/dead_strip.zig @@ -12,13 +12,12 @@ const Allocator = mem.Allocator; const Atom = @import("Atom.zig"); const MachO = @import("../MachO.zig"); const SymbolWithLoc = MachO.SymbolWithLoc; -const SymbolResolver = MachO.SymbolResolver; const UnwindInfo = @import("UnwindInfo.zig"); const Zld = @import("zld.zig").Zld; const AtomTable = std.AutoHashMap(Atom.Index, void); -pub fn gcAtoms(zld: *Zld, resolver: *const SymbolResolver) !void { +pub fn gcAtoms(zld: *Zld) !void { const gpa = zld.gpa; var arena = std.heap.ArenaAllocator.init(gpa); @@ -30,7 +29,7 @@ pub fn gcAtoms(zld: *Zld, resolver: *const SymbolResolver) !void { var alive = AtomTable.init(arena.allocator()); try alive.ensureTotalCapacity(@as(u32, @intCast(zld.atoms.items.len))); - try collectRoots(zld, &roots, resolver); + try collectRoots(zld, &roots); try mark(zld, roots, &alive); prune(zld, alive); } @@ -48,7 +47,7 @@ fn addRoot(zld: *Zld, roots: *AtomTable, file: u32, sym_loc: SymbolWithLoc) !voi _ = try roots.getOrPut(atom_index); } -fn collectRoots(zld: *Zld, roots: *AtomTable, resolver: *const SymbolResolver) !void { +fn collectRoots(zld: *Zld, roots: *AtomTable) !void { log.debug("collecting roots", .{}); switch (zld.options.output_mode) { @@ -77,7 +76,7 @@ fn collectRoots(zld: *Zld, roots: *AtomTable, resolver: *const SymbolResolver) ! // Add all symbols force-defined by the user. for (zld.options.force_undefined_symbols.keys()) |sym_name| { - const global_index = resolver.table.get(sym_name).?; + const global_index = zld.resolver.get(sym_name).?; const global = zld.globals.items[global_index]; const sym = zld.getSymbol(global); assert(!sym.undf()); diff --git a/src/link/MachO/zld.zig b/src/link/MachO/zld.zig index c86a7221fc..7fa506ce0e 100644 --- a/src/link/MachO/zld.zig +++ b/src/link/MachO/zld.zig @@ -34,7 +34,6 @@ const Object = @import("Object.zig"); const Section = MachO.Section; const StringTable = @import("../strtab.zig").StringTable; const SymbolWithLoc = MachO.SymbolWithLoc; -const SymbolResolver = MachO.SymbolResolver; const TableSection = @import("../table_section.zig").TableSection; const Trie = @import("Trie.zig"); const UnwindInfo = @import("UnwindInfo.zig"); @@ -76,6 +75,8 @@ pub const Zld = struct { locals: std.ArrayListUnmanaged(macho.nlist_64) = .{}, globals: std.ArrayListUnmanaged(SymbolWithLoc) = .{}, + resolver: std.StringHashMapUnmanaged(u32) = .{}, + unresolved: std.AutoArrayHashMapUnmanaged(u32, void) = .{}, entry_index: ?u32 = null, mh_execute_header_index: ?u32 = null, @@ -327,53 +328,53 @@ pub const Zld = struct { } } - fn addUndefined(self: *Zld, name: []const u8, resolver: *SymbolResolver) !void { + fn addUndefined(self: *Zld, name: []const u8) !void { const sym_index = try self.allocateSymbol(); const sym_loc = SymbolWithLoc{ .sym_index = sym_index }; const sym = self.getSymbolPtr(sym_loc); sym.n_strx = try self.strtab.insert(self.gpa, name); sym.n_type = macho.N_UNDF; const global_index = try self.addGlobal(sym_loc); - try resolver.table.putNoClobber(name, global_index); - try resolver.unresolved.putNoClobber(global_index, {}); + try self.resolver.putNoClobber(self.gpa, name, global_index); + try self.unresolved.putNoClobber(self.gpa, global_index, {}); } - fn resolveSymbols(self: *Zld, resolver: *SymbolResolver) !void { + fn resolveSymbols(self: *Zld) !void { // We add the specified entrypoint as the first unresolved symbols so that // we search for it in libraries should there be no object files specified // on the linker line. if (self.options.output_mode == .Exe) { const entry_name = self.options.entry orelse load_commands.default_entry_point; - try self.addUndefined(entry_name, resolver); + try self.addUndefined(entry_name); } // Force resolution of any symbols requested by the user. for (self.options.force_undefined_symbols.keys()) |sym_name| { - try self.addUndefined(sym_name, resolver); + try self.addUndefined(sym_name); } for (self.objects.items, 0..) |_, object_id| { - try self.resolveSymbolsInObject(@as(u32, @intCast(object_id)), resolver); + try self.resolveSymbolsInObject(@as(u32, @intCast(object_id))); } - try self.resolveSymbolsInArchives(resolver); + try self.resolveSymbolsInArchives(); // Finally, force resolution of dyld_stub_binder if there are imports // requested. - if (resolver.unresolved.count() > 0) { - try self.addUndefined("dyld_stub_binder", resolver); + if (self.unresolved.count() > 0) { + try self.addUndefined("dyld_stub_binder"); } - try self.resolveSymbolsInDylibs(resolver); + try self.resolveSymbolsInDylibs(); - self.dyld_stub_binder_index = resolver.table.get("dyld_stub_binder"); + self.dyld_stub_binder_index = self.resolver.get("dyld_stub_binder"); - try self.createMhExecuteHeaderSymbol(resolver); - try self.createDsoHandleSymbol(resolver); - try self.resolveSymbolsAtLoading(resolver); + try self.createMhExecuteHeaderSymbol(); + try self.createDsoHandleSymbol(); + try self.resolveSymbolsAtLoading(); } - fn resolveSymbolsInObject(self: *Zld, object_id: u32, resolver: *SymbolResolver) !void { + fn resolveSymbolsInObject(self: *Zld, object_id: u32) !void { const object = &self.objects.items[object_id]; const in_symtab = object.in_symtab orelse return; @@ -415,11 +416,11 @@ pub const Zld = struct { const sym_loc = SymbolWithLoc{ .sym_index = sym_index, .file = object_id + 1 }; - const global_index = resolver.table.get(sym_name) orelse { + const global_index = self.resolver.get(sym_name) orelse { const global_index = try self.addGlobal(sym_loc); - try resolver.table.putNoClobber(sym_name, global_index); + try self.resolver.putNoClobber(self.gpa, sym_name, global_index); if (sym.undf() and !sym.tentative()) { - try resolver.unresolved.putNoClobber(global_index, {}); + try self.unresolved.putNoClobber(self.gpa, global_index, {}); } continue; }; @@ -477,7 +478,7 @@ pub const Zld = struct { const global_object = &self.objects.items[file]; global_object.globals_lookup[global.sym_index] = global_index; } - _ = resolver.unresolved.swapRemove(resolver.table.get(sym_name).?); + _ = self.unresolved.swapRemove(self.resolver.get(sym_name).?); global.* = sym_loc; } else { object.globals_lookup[sym_index] = global_index; @@ -485,14 +486,14 @@ pub const Zld = struct { } } - fn resolveSymbolsInArchives(self: *Zld, resolver: *SymbolResolver) !void { + fn resolveSymbolsInArchives(self: *Zld) !void { if (self.archives.items.len == 0) return; const gpa = self.gpa; var next_sym: usize = 0; - loop: while (next_sym < resolver.unresolved.count()) { - const global = self.globals.items[resolver.unresolved.keys()[next_sym]]; + loop: while (next_sym < self.unresolved.count()) { + const global = self.globals.items[self.unresolved.keys()[next_sym]]; const sym_name = self.getSymbolName(global); for (self.archives.items) |archive| { @@ -506,7 +507,7 @@ pub const Zld = struct { const object_id = @as(u16, @intCast(self.objects.items.len)); const object = try archive.parseObject(gpa, offsets.items[0]); try self.objects.append(gpa, object); - try self.resolveSymbolsInObject(object_id, resolver); + try self.resolveSymbolsInObject(object_id); continue :loop; } @@ -515,12 +516,12 @@ pub const Zld = struct { } } - fn resolveSymbolsInDylibs(self: *Zld, resolver: *SymbolResolver) !void { + fn resolveSymbolsInDylibs(self: *Zld) !void { if (self.dylibs.items.len == 0) return; var next_sym: usize = 0; - loop: while (next_sym < resolver.unresolved.count()) { - const global_index = resolver.unresolved.keys()[next_sym]; + loop: while (next_sym < self.unresolved.count()) { + const global_index = self.unresolved.keys()[next_sym]; const global = self.globals.items[global_index]; const sym = self.getSymbolPtr(global); const sym_name = self.getSymbolName(global); @@ -541,7 +542,7 @@ pub const Zld = struct { sym.n_desc |= macho.N_WEAK_REF; } - assert(resolver.unresolved.swapRemove(global_index)); + assert(self.unresolved.swapRemove(global_index)); continue :loop; } @@ -549,14 +550,14 @@ pub const Zld = struct { } } - fn resolveSymbolsAtLoading(self: *Zld, resolver: *SymbolResolver) !void { + fn resolveSymbolsAtLoading(self: *Zld) !void { const is_lib = self.options.output_mode == .Lib; const is_dyn_lib = self.options.link_mode == .Dynamic and is_lib; const allow_undef = is_dyn_lib and (self.options.allow_shlib_undefined orelse false); var next_sym: usize = 0; - while (next_sym < resolver.unresolved.count()) { - const global_index = resolver.unresolved.keys()[next_sym]; + while (next_sym < self.unresolved.count()) { + const global_index = self.unresolved.keys()[next_sym]; const global = self.globals.items[global_index]; const sym = self.getSymbolPtr(global); @@ -568,7 +569,7 @@ pub const Zld = struct { .n_desc = 0, .n_value = 0, }; - _ = resolver.unresolved.swapRemove(global_index); + _ = self.unresolved.swapRemove(global_index); continue; } else if (allow_undef) { const n_desc = @as( @@ -577,7 +578,7 @@ pub const Zld = struct { ); sym.n_type = macho.N_EXT; sym.n_desc = n_desc; - _ = resolver.unresolved.swapRemove(global_index); + _ = self.unresolved.swapRemove(global_index); continue; } @@ -585,9 +586,9 @@ pub const Zld = struct { } } - fn createMhExecuteHeaderSymbol(self: *Zld, resolver: *SymbolResolver) !void { + fn createMhExecuteHeaderSymbol(self: *Zld) !void { if (self.options.output_mode != .Exe) return; - if (resolver.table.get("__mh_execute_header")) |global_index| { + if (self.resolver.get("__mh_execute_header")) |global_index| { const global = self.globals.items[global_index]; const sym = self.getSymbol(global); self.mh_execute_header_index = global_index; @@ -602,7 +603,7 @@ pub const Zld = struct { sym.n_type = macho.N_SECT | macho.N_EXT; sym.n_desc = macho.REFERENCED_DYNAMICALLY; - if (resolver.table.get("__mh_execute_header")) |global_index| { + if (self.resolver.get("__mh_execute_header")) |global_index| { const global = &self.globals.items[global_index]; const global_object = &self.objects.items[global.getFile().?]; global_object.globals_lookup[global.sym_index] = global_index; @@ -613,8 +614,8 @@ pub const Zld = struct { } } - fn createDsoHandleSymbol(self: *Zld, resolver: *SymbolResolver) !void { - const global_index = resolver.table.get("___dso_handle") orelse return; + fn createDsoHandleSymbol(self: *Zld) !void { + const global_index = self.resolver.get("___dso_handle") orelse return; const global = &self.globals.items[global_index]; self.dso_handle_index = global_index; if (!self.getSymbol(global.*).undf()) return; @@ -629,7 +630,7 @@ pub const Zld = struct { const global_object = &self.objects.items[global.getFile().?]; global_object.globals_lookup[global.sym_index] = global_index; - _ = resolver.unresolved.swapRemove(resolver.table.get("___dso_handle").?); + _ = self.unresolved.swapRemove(self.resolver.get("___dso_handle").?); global.* = sym_loc; } @@ -649,6 +650,8 @@ pub const Zld = struct { self.strtab.deinit(gpa); self.locals.deinit(gpa); self.globals.deinit(gpa); + self.resolver.deinit(gpa); + self.unresolved.deinit(gpa); for (self.objects.items) |*object| { object.deinit(gpa); @@ -3066,17 +3069,12 @@ pub fn linkWithZld(macho_file: *MachO, comp: *Compilation, prog_node: *std.Progr log.err("parsing dependent libraries failed with err {s}", .{@errorName(err)}); }; - var resolver = SymbolResolver{ - .arena = arena, - .table = std.StringHashMap(u32).init(arena), - .unresolved = std.AutoArrayHashMap(u32, void).init(arena), - }; - try zld.resolveSymbols(&resolver); - try macho_file.reportUndefined(&zld, &resolver); + try zld.resolveSymbols(); + try macho_file.reportUndefined(&zld); if (options.output_mode == .Exe) { const entry_name = options.entry orelse load_commands.default_entry_point; - const global_index = resolver.table.get(entry_name).?; // Error was flagged earlier + const global_index = zld.resolver.get(entry_name).?; // Error was flagged earlier zld.entry_index = global_index; } @@ -3085,7 +3083,7 @@ pub fn linkWithZld(macho_file: *MachO, comp: *Compilation, prog_node: *std.Progr } if (gc_sections) { - try dead_strip.gcAtoms(&zld, &resolver); + try dead_strip.gcAtoms(&zld); } try zld.createDyldPrivateAtom(); From f29d9ec61cf9d533035749fe614f5071e80bc3d0 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 26 Aug 2023 07:36:13 +0200 Subject: [PATCH 20/57] macho: unify entry point handling --- src/link/MachO.zig | 454 +++++++++++++++++++++++++-------------------- 1 file changed, 249 insertions(+), 205 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 412a8ab0e4..10961d145e 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -1,99 +1,3 @@ -const MachO = @This(); - -const std = @import("std"); -const build_options = @import("build_options"); -const builtin = @import("builtin"); -const assert = std.debug.assert; -const dwarf = std.dwarf; -const fs = std.fs; -const log = std.log.scoped(.link); -const macho = std.macho; -const math = std.math; -const mem = std.mem; -const meta = std.meta; - -const aarch64 = @import("../arch/aarch64/bits.zig"); -const calcUuid = @import("MachO/uuid.zig").calcUuid; -const codegen = @import("../codegen.zig"); -const dead_strip = @import("MachO/dead_strip.zig"); -const fat = @import("MachO/fat.zig"); -const link = @import("../link.zig"); -const llvm_backend = @import("../codegen/llvm.zig"); -const load_commands = @import("MachO/load_commands.zig"); -const stubs = @import("MachO/stubs.zig"); -const target_util = @import("../target.zig"); -const trace = @import("../tracy.zig").trace; -const zld = @import("MachO/zld.zig"); - -const Air = @import("../Air.zig"); -const Allocator = mem.Allocator; -const Archive = @import("MachO/Archive.zig"); -pub const Atom = @import("MachO/Atom.zig"); -const Cache = std.Build.Cache; -const CodeSignature = @import("MachO/CodeSignature.zig"); -const Compilation = @import("../Compilation.zig"); -const Dwarf = File.Dwarf; -const Dylib = @import("MachO/Dylib.zig"); -const File = link.File; -const Object = @import("MachO/Object.zig"); -const LibStub = @import("tapi.zig").LibStub; -const Liveness = @import("../Liveness.zig"); -const LlvmObject = @import("../codegen/llvm.zig").Object; -const Md5 = std.crypto.hash.Md5; -const Module = @import("../Module.zig"); -const InternPool = @import("../InternPool.zig"); -const Relocation = @import("MachO/Relocation.zig"); -const StringTable = @import("strtab.zig").StringTable; -const TableSection = @import("table_section.zig").TableSection; -const Trie = @import("MachO/Trie.zig"); -const Type = @import("../type.zig").Type; -const TypedValue = @import("../TypedValue.zig"); -const Value = @import("../value.zig").Value; - -pub const DebugSymbols = @import("MachO/DebugSymbols.zig"); - -const Bind = @import("MachO/dyld_info/bind.zig").Bind(*const MachO, SymbolWithLoc); -const LazyBind = @import("MachO/dyld_info/bind.zig").LazyBind(*const MachO, SymbolWithLoc); -const Rebase = @import("MachO/dyld_info/Rebase.zig"); - -pub const base_tag: File.Tag = File.Tag.macho; -pub const N_DEAD: u16 = @as(u16, @bitCast(@as(i16, -1))); - -/// Mode of operation of the linker. -pub const Mode = enum { - /// Incremental mode will preallocate segments/sections and is compatible with - /// watch and HCS modes of operation. - incremental, - /// Zld mode will link relocatables in a traditional, one-shot - /// fashion (default for LLVM backend). It acts as a drop-in replacement for - /// LLD. - zld, -}; - -pub const Section = struct { - header: macho.section_64, - segment_index: u8, - first_atom_index: ?Atom.Index = null, - last_atom_index: ?Atom.Index = null, - - /// A list of atoms that have surplus capacity. This list can have false - /// positives, as functions grow and shrink over time, only sometimes being added - /// or removed from the freelist. - /// - /// An atom has surplus capacity when its overcapacity value is greater than - /// padToIdeal(minimum_atom_size). That is, when it has so - /// much extra capacity, that we could fit a small new symbol in it, itself with - /// ideal_capacity or more. - /// - /// Ideal capacity is defined by size + (size / ideal_factor). - /// - /// Overcapacity is measured by actual_capacity - ideal_capacity. Note that - /// overcapacity can be negative. A simple way to have negative overcapacity is to - /// allocate a fresh atom, which will have ideal capacity, and then grow it - /// by 1 byte. It will then have -1 overcapacity. - free_list: std.ArrayListUnmanaged(Atom.Index) = .{}, -}; - base: File, /// If this is not null, an object file is created by LLVM and linked with LLD afterwards. @@ -153,6 +57,10 @@ strtab: StringTable(.strtab) = .{}, got_table: TableSection(SymbolWithLoc) = .{}, stub_table: TableSection(SymbolWithLoc) = .{}, +tlv_ptr_table: TableSection(SymbolWithLoc) = .{}, + +thunk_table: std.AutoHashMapUnmanaged(Atom.Index, thunks.Thunk.Index) = .{}, +thunks: std.ArrayListUnmanaged(thunks.Thunk) = .{}, error_flags: File.ErrorFlags = File.ErrorFlags{}, misc_errors: std.ArrayListUnmanaged(File.ErrorMsg) = .{}, @@ -225,101 +133,6 @@ tlv_table: TlvSymbolTable = .{}, /// Hot-code swapping state. hot_state: if (is_hot_update_compatible) HotUpdateState else struct {} = .{}, -const is_hot_update_compatible = switch (builtin.target.os.tag) { - .macos => true, - else => false, -}; - -const LazySymbolTable = std.AutoArrayHashMapUnmanaged(Module.Decl.OptionalIndex, LazySymbolMetadata); - -const LazySymbolMetadata = struct { - const State = enum { unused, pending_flush, flushed }; - text_atom: Atom.Index = undefined, - data_const_atom: Atom.Index = undefined, - text_state: State = .unused, - data_const_state: State = .unused, -}; - -const TlvSymbolTable = std.AutoArrayHashMapUnmanaged(SymbolWithLoc, Atom.Index); - -const DeclMetadata = struct { - atom: Atom.Index, - section: u8, - /// A list of all exports aliases of this Decl. - /// TODO do we actually need this at all? - exports: std.ArrayListUnmanaged(u32) = .{}, - - fn getExport(m: DeclMetadata, macho_file: *const MachO, name: []const u8) ?u32 { - for (m.exports.items) |exp| { - if (mem.eql(u8, name, macho_file.getSymbolName(.{ .sym_index = exp }))) return exp; - } - return null; - } - - fn getExportPtr(m: *DeclMetadata, macho_file: *MachO, name: []const u8) ?*u32 { - for (m.exports.items) |*exp| { - if (mem.eql(u8, name, macho_file.getSymbolName(.{ .sym_index = exp.* }))) return exp; - } - return null; - } -}; - -const BindingTable = std.AutoArrayHashMapUnmanaged(Atom.Index, std.ArrayListUnmanaged(Atom.Binding)); -const UnnamedConstTable = std.AutoArrayHashMapUnmanaged(Module.Decl.Index, std.ArrayListUnmanaged(Atom.Index)); -const RebaseTable = std.AutoArrayHashMapUnmanaged(Atom.Index, std.ArrayListUnmanaged(u32)); -const RelocationTable = std.AutoArrayHashMapUnmanaged(Atom.Index, std.ArrayListUnmanaged(Relocation)); - -const ResolveAction = struct { - kind: Kind, - target: SymbolWithLoc, - - const Kind = enum { - none, - add_got, - add_stub, - }; -}; - -pub const SymbolWithLoc = extern struct { - // Index into the respective symbol table. - sym_index: u32, - - // 0 means it's a synthetic global. - file: u32 = 0, - - pub fn getFile(self: SymbolWithLoc) ?u32 { - if (self.file == 0) return null; - return self.file - 1; - } - - pub fn eql(self: SymbolWithLoc, other: SymbolWithLoc) bool { - return self.file == other.file and self.sym_index == other.sym_index; - } -}; - -const HotUpdateState = struct { - mach_task: ?std.os.darwin.MachTask = null, -}; - -/// When allocating, the ideal_capacity is calculated by -/// actual_capacity + (actual_capacity / ideal_factor) -const ideal_factor = 3; - -/// In order for a slice of bytes to be considered eligible to keep metadata pointing at -/// it as a possible place to put new symbols, it must have enough room for this many bytes -/// (plus extra for reserved capacity). -const minimum_text_block_size = 64; -pub const min_text_capacity = padToIdeal(minimum_text_block_size); - -/// Default virtual memory offset corresponds to the size of __PAGEZERO segment and -/// start of __TEXT segment. -pub const default_pagezero_vmsize: u64 = 0x100000000; - -/// We commit 0x1000 = 4096 bytes of space to the header and -/// the table of load commands. This should be plenty for any -/// potential future extensions. -pub const default_headerpad_size: u32 = 0x1000; - pub fn openPath(allocator: Allocator, options: link.Options) !*MachO { assert(options.target.ofmt == .macho); @@ -622,6 +435,10 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No defer actions.deinit(); try self.resolveSymbolsInDylibs(&actions); + if (self.getEntryPoint() == null) { + self.error_flags.no_entry_point_found = true; + } + if (self.unresolved.count() > 0) { for (self.unresolved.keys()) |index| { // TODO: convert into compiler errors. @@ -641,6 +458,16 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No try self.createDyldPrivateAtom(); try self.writeStubHelperPreamble(); + if (self.base.options.output_mode == .Exe and self.getEntryPoint() != null) { + const global = self.getEntryPoint().?; + if (self.getSymbol(global).undf()) { + // We do one additional check here in case the entry point was found in one of the dylibs. + // (I actually have no idea what this would imply but it is a possible outcome and so we + // support it.) + try self.addStubEntry(global); + } + } + try self.allocateSpecialSymbols(); for (self.relocs.keys()) |atom_index| { @@ -732,16 +559,18 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No .Exe => blk: { const seg_id = self.header_segment_cmd_index.?; const seg = self.segments.items[seg_id]; - const global = self.getEntryPoint() catch |err| switch (err) { - error.MissingMainEntrypoint => { - self.error_flags.no_entry_point_found = true; - break :blk; - }, - else => |e| return e, - }; + const global = self.getEntryPoint() orelse break :blk; const sym = self.getSymbol(global); + + const addr: u64 = if (sym.undf()) + // In this case, the symbol has been resolved in one of dylibs and so we point + // to the stub as its vmaddr value. + self.getStubsEntryAddress(global).? + else + sym.n_value; + try lc_writer.writeStruct(macho.entry_point_command{ - .entryoff = @as(u32, @intCast(sym.n_value - seg.vmaddr)), + .entryoff = @as(u32, @intCast(addr - seg.vmaddr)), .stacksize = self.base.options.stack_size_override orelse 0, }); }, @@ -1796,6 +1625,14 @@ pub fn deinit(self: *MachO) void { self.got_table.deinit(gpa); self.stub_table.deinit(gpa); + self.tlv_ptr_table.deinit(gpa); + self.thunk_table.deinit(gpa); + + for (self.thunks.items) |*thunk| { + thunk.deinit(gpa); + } + self.thunks.deinit(gpa); + self.strtab.deinit(gpa); self.locals.deinit(gpa); @@ -4019,14 +3856,29 @@ pub fn getAtomIndexForSymbol(self: *MachO, sym_with_loc: SymbolWithLoc) ?Atom.In return self.atom_by_index_table.get(sym_with_loc.sym_index); } -/// Returns symbol location corresponding to the set entrypoint. +pub fn getGotEntryAddress(self: *MachO, sym_with_loc: SymbolWithLoc) ?u64 { + const index = self.got_table.lookup.get(sym_with_loc) orelse return null; + const header = self.sections.items(.header)[self.got_section_index.?]; + return header.addr + @sizeOf(u64) * index; +} + +pub fn getTlvPtrEntryAddress(self: *MachO, sym_with_loc: SymbolWithLoc) ?u64 { + const index = self.tlv_ptr_table.lookup.get(sym_with_loc) orelse return null; + const header = self.sections.items(.header)[self.tlv_ptr_section_index.?]; + return header.addr + @sizeOf(u64) * index; +} + +pub fn getStubsEntryAddress(self: *MachO, sym_with_loc: SymbolWithLoc) ?u64 { + const index = self.stub_table.lookup.get(sym_with_loc) orelse return null; + const header = self.sections.items(.header)[self.stubs_section_index.?]; + return header.addr + stubs.stubSize(self.base.options.target.cpu.arch) * index; +} + +/// Returns symbol location corresponding to the set entrypoint if any. /// Asserts output mode is executable. -pub fn getEntryPoint(self: MachO) error{MissingMainEntrypoint}!SymbolWithLoc { +pub fn getEntryPoint(self: MachO) ?SymbolWithLoc { const entry_name = self.base.options.entry orelse load_commands.default_entry_point; - const global = self.getGlobal(entry_name) orelse { - log.err("entrypoint '{s}' not found", .{entry_name}); - return error.MissingMainEntrypoint; - }; + const global = self.getGlobal(entry_name) orelse return null; return global; } @@ -4258,3 +4110,195 @@ pub fn logAtom(self: *MachO, atom_index: Atom.Index) void { sym.n_sect + 1, }); } + +const MachO = @This(); + +const std = @import("std"); +const build_options = @import("build_options"); +const builtin = @import("builtin"); +const assert = std.debug.assert; +const dwarf = std.dwarf; +const fs = std.fs; +const log = std.log.scoped(.link); +const macho = std.macho; +const math = std.math; +const mem = std.mem; +const meta = std.meta; + +const aarch64 = @import("../arch/aarch64/bits.zig"); +const calcUuid = @import("MachO/uuid.zig").calcUuid; +const codegen = @import("../codegen.zig"); +const dead_strip = @import("MachO/dead_strip.zig"); +const fat = @import("MachO/fat.zig"); +const link = @import("../link.zig"); +const llvm_backend = @import("../codegen/llvm.zig"); +const load_commands = @import("MachO/load_commands.zig"); +const stubs = @import("MachO/stubs.zig"); +const target_util = @import("../target.zig"); +const thunks = @import("MachO/thunks.zig"); +const trace = @import("../tracy.zig").trace; +const zld = @import("MachO/zld.zig"); + +const Air = @import("../Air.zig"); +const Allocator = mem.Allocator; +const Archive = @import("MachO/Archive.zig"); +pub const Atom = @import("MachO/Atom.zig"); +const Cache = std.Build.Cache; +const CodeSignature = @import("MachO/CodeSignature.zig"); +const Compilation = @import("../Compilation.zig"); +const Dwarf = File.Dwarf; +const Dylib = @import("MachO/Dylib.zig"); +const File = link.File; +const Object = @import("MachO/Object.zig"); +const LibStub = @import("tapi.zig").LibStub; +const Liveness = @import("../Liveness.zig"); +const LlvmObject = @import("../codegen/llvm.zig").Object; +const Md5 = std.crypto.hash.Md5; +const Module = @import("../Module.zig"); +const InternPool = @import("../InternPool.zig"); +const Relocation = @import("MachO/Relocation.zig"); +const StringTable = @import("strtab.zig").StringTable; +const TableSection = @import("table_section.zig").TableSection; +const Trie = @import("MachO/Trie.zig"); +const Type = @import("../type.zig").Type; +const TypedValue = @import("../TypedValue.zig"); +const Value = @import("../value.zig").Value; + +pub const DebugSymbols = @import("MachO/DebugSymbols.zig"); + +const Bind = @import("MachO/dyld_info/bind.zig").Bind(*const MachO, SymbolWithLoc); +const LazyBind = @import("MachO/dyld_info/bind.zig").LazyBind(*const MachO, SymbolWithLoc); +const Rebase = @import("MachO/dyld_info/Rebase.zig"); + +pub const base_tag: File.Tag = File.Tag.macho; +pub const N_DEAD: u16 = @as(u16, @bitCast(@as(i16, -1))); + +/// Mode of operation of the linker. +pub const Mode = enum { + /// Incremental mode will preallocate segments/sections and is compatible with + /// watch and HCS modes of operation. + incremental, + /// Zld mode will link relocatables in a traditional, one-shot + /// fashion (default for LLVM backend). It acts as a drop-in replacement for + /// LLD. + zld, +}; + +pub const Section = struct { + header: macho.section_64, + segment_index: u8, + first_atom_index: ?Atom.Index = null, + last_atom_index: ?Atom.Index = null, + + /// A list of atoms that have surplus capacity. This list can have false + /// positives, as functions grow and shrink over time, only sometimes being added + /// or removed from the freelist. + /// + /// An atom has surplus capacity when its overcapacity value is greater than + /// padToIdeal(minimum_atom_size). That is, when it has so + /// much extra capacity, that we could fit a small new symbol in it, itself with + /// ideal_capacity or more. + /// + /// Ideal capacity is defined by size + (size / ideal_factor). + /// + /// Overcapacity is measured by actual_capacity - ideal_capacity. Note that + /// overcapacity can be negative. A simple way to have negative overcapacity is to + /// allocate a fresh atom, which will have ideal capacity, and then grow it + /// by 1 byte. It will then have -1 overcapacity. + free_list: std.ArrayListUnmanaged(Atom.Index) = .{}, +}; + +const is_hot_update_compatible = switch (builtin.target.os.tag) { + .macos => true, + else => false, +}; + +const LazySymbolTable = std.AutoArrayHashMapUnmanaged(Module.Decl.OptionalIndex, LazySymbolMetadata); + +const LazySymbolMetadata = struct { + const State = enum { unused, pending_flush, flushed }; + text_atom: Atom.Index = undefined, + data_const_atom: Atom.Index = undefined, + text_state: State = .unused, + data_const_state: State = .unused, +}; + +const TlvSymbolTable = std.AutoArrayHashMapUnmanaged(SymbolWithLoc, Atom.Index); + +const DeclMetadata = struct { + atom: Atom.Index, + section: u8, + /// A list of all exports aliases of this Decl. + /// TODO do we actually need this at all? + exports: std.ArrayListUnmanaged(u32) = .{}, + + fn getExport(m: DeclMetadata, macho_file: *const MachO, name: []const u8) ?u32 { + for (m.exports.items) |exp| { + if (mem.eql(u8, name, macho_file.getSymbolName(.{ .sym_index = exp }))) return exp; + } + return null; + } + + fn getExportPtr(m: *DeclMetadata, macho_file: *MachO, name: []const u8) ?*u32 { + for (m.exports.items) |*exp| { + if (mem.eql(u8, name, macho_file.getSymbolName(.{ .sym_index = exp.* }))) return exp; + } + return null; + } +}; + +const BindingTable = std.AutoArrayHashMapUnmanaged(Atom.Index, std.ArrayListUnmanaged(Atom.Binding)); +const UnnamedConstTable = std.AutoArrayHashMapUnmanaged(Module.Decl.Index, std.ArrayListUnmanaged(Atom.Index)); +const RebaseTable = std.AutoArrayHashMapUnmanaged(Atom.Index, std.ArrayListUnmanaged(u32)); +const RelocationTable = std.AutoArrayHashMapUnmanaged(Atom.Index, std.ArrayListUnmanaged(Relocation)); + +const ResolveAction = struct { + kind: Kind, + target: SymbolWithLoc, + + const Kind = enum { + none, + add_got, + add_stub, + }; +}; + +pub const SymbolWithLoc = extern struct { + // Index into the respective symbol table. + sym_index: u32, + + // 0 means it's a synthetic global. + file: u32 = 0, + + pub fn getFile(self: SymbolWithLoc) ?u32 { + if (self.file == 0) return null; + return self.file - 1; + } + + pub fn eql(self: SymbolWithLoc, other: SymbolWithLoc) bool { + return self.file == other.file and self.sym_index == other.sym_index; + } +}; + +const HotUpdateState = struct { + mach_task: ?std.os.darwin.MachTask = null, +}; + +/// When allocating, the ideal_capacity is calculated by +/// actual_capacity + (actual_capacity / ideal_factor) +const ideal_factor = 3; + +/// In order for a slice of bytes to be considered eligible to keep metadata pointing at +/// it as a possible place to put new symbols, it must have enough room for this many bytes +/// (plus extra for reserved capacity). +const minimum_text_block_size = 64; +pub const min_text_capacity = padToIdeal(minimum_text_block_size); + +/// Default virtual memory offset corresponds to the size of __PAGEZERO segment and +/// start of __TEXT segment. +pub const default_pagezero_vmsize: u64 = 0x100000000; + +/// We commit 0x1000 = 4096 bytes of space to the header and +/// the table of load commands. This should be plenty for any +/// potential future extensions. +pub const default_headerpad_size: u32 = 0x1000; From 180979ee41e7374ee1b8bc941e2281bb44e41dfd Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 26 Aug 2023 07:56:49 +0200 Subject: [PATCH 21/57] macho: move getOutputSection into Atom --- src/link/MachO/Atom.zig | 178 +++++++++++++++++++++++++++++++++----- src/link/MachO/Object.zig | 6 +- src/link/MachO/zld.zig | 150 ++------------------------------ 3 files changed, 166 insertions(+), 168 deletions(-) diff --git a/src/link/MachO/Atom.zig b/src/link/MachO/Atom.zig index 4095a1c333..eb5e76462f 100644 --- a/src/link/MachO/Atom.zig +++ b/src/link/MachO/Atom.zig @@ -1,23 +1,3 @@ -const Atom = @This(); - -const std = @import("std"); -const build_options = @import("build_options"); -const aarch64 = @import("../../arch/aarch64/bits.zig"); -const assert = std.debug.assert; -const log = std.log.scoped(.link); -const macho = std.macho; -const math = std.math; -const mem = std.mem; -const meta = std.meta; -const trace = @import("../../tracy.zig").trace; - -const Allocator = mem.Allocator; -const Arch = std.Target.Cpu.Arch; -const MachO = @import("../MachO.zig"); -pub const Relocation = @import("Relocation.zig"); -const SymbolWithLoc = MachO.SymbolWithLoc; -const Zld = @import("zld.zig").Zld; - /// Each Atom always gets a symbol with the fully qualified name. /// The symbol can reside in any object file context structure in `symtab` array /// (see `Object`), or if the symbol is a synthetic symbol such as a GOT cell or @@ -125,6 +105,144 @@ pub fn freeListEligible(self: Atom, macho_file: *MachO) bool { return surplus >= MachO.min_text_capacity; } +pub fn getOutputSection(zld: *Zld, sect: macho.section_64) !?u8 { + const segname = sect.segName(); + const sectname = sect.sectName(); + const res: ?u8 = blk: { + if (mem.eql(u8, "__LLVM", segname)) { + log.debug("TODO LLVM section: type 0x{x}, name '{s},{s}'", .{ + sect.flags, segname, sectname, + }); + break :blk null; + } + + // We handle unwind info separately. + if (mem.eql(u8, "__TEXT", segname) and mem.eql(u8, "__eh_frame", sectname)) { + break :blk null; + } + if (mem.eql(u8, "__LD", segname) and mem.eql(u8, "__compact_unwind", sectname)) { + break :blk null; + } + + if (sect.isCode()) { + if (zld.text_section_index == null) { + zld.text_section_index = try zld.initSection( + "__TEXT", + "__text", + .{ + .flags = macho.S_REGULAR | + macho.S_ATTR_PURE_INSTRUCTIONS | + macho.S_ATTR_SOME_INSTRUCTIONS, + }, + ); + } + break :blk zld.text_section_index.?; + } + + if (sect.isDebug()) { + break :blk null; + } + + switch (sect.type()) { + macho.S_4BYTE_LITERALS, + macho.S_8BYTE_LITERALS, + macho.S_16BYTE_LITERALS, + => { + break :blk zld.getSectionByName("__TEXT", "__const") orelse try zld.initSection( + "__TEXT", + "__const", + .{}, + ); + }, + macho.S_CSTRING_LITERALS => { + if (mem.startsWith(u8, sectname, "__objc")) { + break :blk zld.getSectionByName(segname, sectname) orelse try zld.initSection( + segname, + sectname, + .{}, + ); + } + break :blk zld.getSectionByName("__TEXT", "__cstring") orelse try zld.initSection( + "__TEXT", + "__cstring", + .{ .flags = macho.S_CSTRING_LITERALS }, + ); + }, + macho.S_MOD_INIT_FUNC_POINTERS, + macho.S_MOD_TERM_FUNC_POINTERS, + => { + break :blk zld.getSectionByName("__DATA_CONST", sectname) orelse try zld.initSection( + "__DATA_CONST", + sectname, + .{ .flags = sect.flags }, + ); + }, + macho.S_LITERAL_POINTERS, + macho.S_ZEROFILL, + macho.S_THREAD_LOCAL_VARIABLES, + macho.S_THREAD_LOCAL_VARIABLE_POINTERS, + macho.S_THREAD_LOCAL_REGULAR, + macho.S_THREAD_LOCAL_ZEROFILL, + => { + break :blk zld.getSectionByName(segname, sectname) orelse try zld.initSection( + segname, + sectname, + .{ .flags = sect.flags }, + ); + }, + macho.S_COALESCED => { + break :blk zld.getSectionByName(segname, sectname) orelse try zld.initSection( + segname, + sectname, + .{}, + ); + }, + macho.S_REGULAR => { + if (mem.eql(u8, segname, "__TEXT")) { + if (mem.eql(u8, sectname, "__rodata") or + mem.eql(u8, sectname, "__typelink") or + mem.eql(u8, sectname, "__itablink") or + mem.eql(u8, sectname, "__gosymtab") or + mem.eql(u8, sectname, "__gopclntab")) + { + break :blk zld.getSectionByName("__TEXT", sectname) orelse try zld.initSection( + "__TEXT", + sectname, + .{}, + ); + } + } + if (mem.eql(u8, segname, "__DATA")) { + if (mem.eql(u8, sectname, "__const") or + mem.eql(u8, sectname, "__cfstring") or + mem.eql(u8, sectname, "__objc_classlist") or + mem.eql(u8, sectname, "__objc_imageinfo")) + { + break :blk zld.getSectionByName("__DATA_CONST", sectname) orelse try zld.initSection( + "__DATA_CONST", + sectname, + .{}, + ); + } else if (mem.eql(u8, sectname, "__data")) { + break :blk zld.getSectionByName("__DATA", "__data") orelse try zld.initSection( + "__DATA", + "__data", + .{}, + ); + } + } + break :blk zld.getSectionByName(segname, sectname) orelse try zld.initSection( + segname, + sectname, + .{}, + ); + }, + else => break :blk null, + } + }; + return res; +} + pub fn addRelocation(macho_file: *MachO, atom_index: Index, reloc: Relocation) !void { return addRelocations(macho_file, atom_index, &[_]Relocation{reloc}); } @@ -1112,3 +1230,23 @@ pub fn relocIsStub(zld: *Zld, rel: macho.relocation_info) bool { else => unreachable, } } + +const Atom = @This(); + +const std = @import("std"); +const build_options = @import("build_options"); +const aarch64 = @import("../../arch/aarch64/bits.zig"); +const assert = std.debug.assert; +const log = std.log.scoped(.link); +const macho = std.macho; +const math = std.math; +const mem = std.mem; +const meta = std.meta; +const trace = @import("../../tracy.zig").trace; + +const Allocator = mem.Allocator; +const Arch = std.Target.Cpu.Arch; +const MachO = @import("../MachO.zig"); +pub const Relocation = @import("Relocation.zig"); +const SymbolWithLoc = MachO.SymbolWithLoc; +const Zld = @import("zld.zig").Zld; diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index 8c523779ea..c9f32aa4fd 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -377,7 +377,7 @@ pub fn splitRegularSections(self: *Object, zld: *Zld, object_id: u32) !void { const sections = self.getSourceSections(); for (sections, 0..) |sect, id| { if (sect.isDebug()) continue; - const out_sect_id = (try zld.getOutputSection(sect)) orelse { + const out_sect_id = (try Atom.getOutputSection(zld, sect)) orelse { log.debug(" unhandled section '{s},{s}'", .{ sect.segName(), sect.sectName() }); continue; }; @@ -397,7 +397,7 @@ pub fn splitRegularSections(self: *Object, zld: *Zld, object_id: u32) !void { if (self.in_symtab == null) { for (sections, 0..) |sect, id| { if (sect.isDebug()) continue; - const out_sect_id = (try zld.getOutputSection(sect)) orelse continue; + const out_sect_id = (try Atom.getOutputSection(zld, sect)) orelse continue; if (sect.size == 0) continue; const sect_id = @as(u8, @intCast(id)); @@ -456,7 +456,7 @@ pub fn splitRegularSections(self: *Object, zld: *Zld, object_id: u32) !void { log.debug("splitting section '{s},{s}' into atoms", .{ sect.segName(), sect.sectName() }); // Get output segment/section in the final artifact. - const out_sect_id = (try zld.getOutputSection(sect)) orelse continue; + const out_sect_id = (try Atom.getOutputSection(zld, sect)) orelse continue; log.debug(" output sect({d}, '{s},{s}')", .{ out_sect_id + 1, diff --git a/src/link/MachO/zld.zig b/src/link/MachO/zld.zig index 7fa506ce0e..d4dd599d2d 100644 --- a/src/link/MachO/zld.zig +++ b/src/link/MachO/zld.zig @@ -95,144 +95,6 @@ pub const Zld = struct { atoms: std.ArrayListUnmanaged(Atom) = .{}, - pub fn getOutputSection(self: *Zld, sect: macho.section_64) !?u8 { - const segname = sect.segName(); - const sectname = sect.sectName(); - const res: ?u8 = blk: { - if (mem.eql(u8, "__LLVM", segname)) { - log.debug("TODO LLVM section: type 0x{x}, name '{s},{s}'", .{ - sect.flags, segname, sectname, - }); - break :blk null; - } - - // We handle unwind info separately. - if (mem.eql(u8, "__TEXT", segname) and mem.eql(u8, "__eh_frame", sectname)) { - break :blk null; - } - if (mem.eql(u8, "__LD", segname) and mem.eql(u8, "__compact_unwind", sectname)) { - break :blk null; - } - - if (sect.isCode()) { - if (self.text_section_index == null) { - self.text_section_index = try self.initSection( - "__TEXT", - "__text", - .{ - .flags = macho.S_REGULAR | - macho.S_ATTR_PURE_INSTRUCTIONS | - macho.S_ATTR_SOME_INSTRUCTIONS, - }, - ); - } - break :blk self.text_section_index.?; - } - - if (sect.isDebug()) { - break :blk null; - } - - switch (sect.type()) { - macho.S_4BYTE_LITERALS, - macho.S_8BYTE_LITERALS, - macho.S_16BYTE_LITERALS, - => { - break :blk self.getSectionByName("__TEXT", "__const") orelse try self.initSection( - "__TEXT", - "__const", - .{}, - ); - }, - macho.S_CSTRING_LITERALS => { - if (mem.startsWith(u8, sectname, "__objc")) { - break :blk self.getSectionByName(segname, sectname) orelse try self.initSection( - segname, - sectname, - .{}, - ); - } - break :blk self.getSectionByName("__TEXT", "__cstring") orelse try self.initSection( - "__TEXT", - "__cstring", - .{ .flags = macho.S_CSTRING_LITERALS }, - ); - }, - macho.S_MOD_INIT_FUNC_POINTERS, - macho.S_MOD_TERM_FUNC_POINTERS, - => { - break :blk self.getSectionByName("__DATA_CONST", sectname) orelse try self.initSection( - "__DATA_CONST", - sectname, - .{ .flags = sect.flags }, - ); - }, - macho.S_LITERAL_POINTERS, - macho.S_ZEROFILL, - macho.S_THREAD_LOCAL_VARIABLES, - macho.S_THREAD_LOCAL_VARIABLE_POINTERS, - macho.S_THREAD_LOCAL_REGULAR, - macho.S_THREAD_LOCAL_ZEROFILL, - => { - break :blk self.getSectionByName(segname, sectname) orelse try self.initSection( - segname, - sectname, - .{ .flags = sect.flags }, - ); - }, - macho.S_COALESCED => { - break :blk self.getSectionByName(segname, sectname) orelse try self.initSection( - segname, - sectname, - .{}, - ); - }, - macho.S_REGULAR => { - if (mem.eql(u8, segname, "__TEXT")) { - if (mem.eql(u8, sectname, "__rodata") or - mem.eql(u8, sectname, "__typelink") or - mem.eql(u8, sectname, "__itablink") or - mem.eql(u8, sectname, "__gosymtab") or - mem.eql(u8, sectname, "__gopclntab")) - { - break :blk self.getSectionByName("__TEXT", sectname) orelse try self.initSection( - "__TEXT", - sectname, - .{}, - ); - } - } - if (mem.eql(u8, segname, "__DATA")) { - if (mem.eql(u8, sectname, "__const") or - mem.eql(u8, sectname, "__cfstring") or - mem.eql(u8, sectname, "__objc_classlist") or - mem.eql(u8, sectname, "__objc_imageinfo")) - { - break :blk self.getSectionByName("__DATA_CONST", sectname) orelse try self.initSection( - "__DATA_CONST", - sectname, - .{}, - ); - } else if (mem.eql(u8, sectname, "__data")) { - break :blk self.getSectionByName("__DATA", "__data") orelse try self.initSection( - "__DATA", - "__data", - .{}, - ); - } - } - break :blk self.getSectionByName(segname, sectname) orelse try self.initSection( - segname, - sectname, - .{}, - ); - }, - else => break :blk null, - } - }; - return res; - } - pub fn addAtomToSection(self: *Zld, atom_index: Atom.Index) void { const atom = self.getAtomPtr(atom_index); const sym = self.getSymbol(atom.getSymbolWithLoc()); @@ -278,7 +140,8 @@ pub const Zld = struct { const sym = self.getSymbolPtr(.{ .sym_index = sym_index }); sym.n_type = macho.N_SECT; - const sect_id = self.getSectionByName("__DATA", "__data") orelse try self.initSection("__DATA", "__data", .{}); + const sect_id = self.getSectionByName("__DATA", "__data") orelse + try self.initSection("__DATA", "__data", .{}); sym.n_sect = sect_id + 1; self.dyld_private_atom_index = atom_index; @@ -301,16 +164,13 @@ pub const Zld = struct { // text blocks for each tentative definition. const size = sym.n_value; const alignment = (sym.n_desc >> 8) & 0x0f; - const n_sect = (try self.getOutputSection(.{ - .segname = makeStaticString("__DATA"), - .sectname = makeStaticString("__bss"), - .flags = macho.S_ZEROFILL, - })).? + 1; + const sect_id = self.getSectionByName("__DATA", "__bss") orelse + try self.initSection("__DATA", "__bss", .{ .flags = macho.S_ZEROFILL }); sym.* = .{ .n_strx = sym.n_strx, .n_type = macho.N_SECT | macho.N_EXT, - .n_sect = n_sect, + .n_sect = sect_id + 1, .n_desc = 0, .n_value = 0, }; From b2773cd7120f7120410e1635aaeec026c7bbcdd1 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 26 Aug 2023 08:13:41 +0200 Subject: [PATCH 22/57] macho: move initSection into MachO from Zld --- src/link/MachO.zig | 50 ++++++++++++++++++++++++++++----------- src/link/MachO/Atom.zig | 46 ++++++++++++++++++++++++++--------- src/link/MachO/Object.zig | 14 +++++------ src/link/MachO/zld.zig | 42 ++++++-------------------------- 4 files changed, 85 insertions(+), 67 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 10961d145e..467b75c69c 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -2739,6 +2739,34 @@ fn calcPagezeroSize(self: *MachO) u64 { return aligned_pagezero_vmsize; } +const InitSectionOpts = struct { + flags: u32 = macho.S_REGULAR, + reserved1: u32 = 0, + reserved2: u32 = 0, +}; + +pub fn initSection( + gpa: Allocator, + ctx: anytype, + segname: []const u8, + sectname: []const u8, + opts: InitSectionOpts, +) !u8 { + log.debug("creating section '{s},{s}'", .{ segname, sectname }); + const index = @as(u8, @intCast(ctx.sections.slice().len)); + try ctx.sections.append(gpa, .{ + .segment_index = undefined, // Segments will be created automatically later down the pipeline + .header = .{ + .sectname = makeStaticString(sectname), + .segname = makeStaticString(segname), + .flags = opts.flags, + .reserved1 = opts.reserved1, + .reserved2 = opts.reserved2, + }, + }); + return index; +} + fn allocateSection(self: *MachO, segname: []const u8, sectname: []const u8, opts: struct { size: u64 = 0, alignment: u32 = 0, @@ -2751,7 +2779,6 @@ fn allocateSection(self: *MachO, segname: []const u8, sectname: []const u8, opts // In incremental context, we create one section per segment pairing. This way, // we can move the segment in raw file as we please. const segment_id = @as(u8, @intCast(self.segments.items.len)); - const section_id = @as(u8, @intCast(self.sections.slice().len)); const vmaddr = blk: { const prev_segment = self.segments.items[segment_id - 1]; break :blk mem.alignForward(u64, prev_segment.vmaddr + prev_segment.vmsize, page_size); @@ -2782,23 +2809,18 @@ fn allocateSection(self: *MachO, segname: []const u8, sectname: []const u8, opts .cmdsize = @sizeOf(macho.segment_command_64) + @sizeOf(macho.section_64), }; - var section = macho.section_64{ - .sectname = makeStaticString(sectname), - .segname = makeStaticString(segname), - .addr = mem.alignForward(u64, vmaddr, opts.alignment), - .offset = mem.alignForward(u32, @as(u32, @intCast(off)), opts.alignment), - .size = opts.size, - .@"align" = math.log2(opts.alignment), + const sect_id = try initSection(gpa, self, sectname, segname, .{ .flags = opts.flags, .reserved2 = opts.reserved2, - }; + }); + const section = &self.sections.items(.header)[sect_id]; + section.addr = mem.alignForward(u64, vmaddr, opts.alignment); + section.offset = mem.alignForward(u32, @as(u32, @intCast(off)), opts.alignment); + section.size = opts.size; + section.@"align" = math.log2(opts.alignment); assert(!section.isZerofill()); // TODO zerofill sections - try self.sections.append(gpa, .{ - .segment_index = segment_id, - .header = section, - }); - return section_id; + return sect_id; } fn growSection(self: *MachO, sect_id: u8, needed_size: u64) !void { diff --git a/src/link/MachO/Atom.zig b/src/link/MachO/Atom.zig index eb5e76462f..fe118e0e3a 100644 --- a/src/link/MachO/Atom.zig +++ b/src/link/MachO/Atom.zig @@ -106,6 +106,7 @@ pub fn freeListEligible(self: Atom, macho_file: *MachO) bool { } pub fn getOutputSection(zld: *Zld, sect: macho.section_64) !?u8 { + const gpa = zld.gpa; const segname = sect.segName(); const sectname = sect.sectName(); const res: ?u8 = blk: { @@ -126,7 +127,9 @@ pub fn getOutputSection(zld: *Zld, sect: macho.section_64) !?u8 { if (sect.isCode()) { if (zld.text_section_index == null) { - zld.text_section_index = try zld.initSection( + zld.text_section_index = try MachO.initSection( + gpa, + zld, "__TEXT", "__text", .{ @@ -148,7 +151,9 @@ pub fn getOutputSection(zld: *Zld, sect: macho.section_64) !?u8 { macho.S_8BYTE_LITERALS, macho.S_16BYTE_LITERALS, => { - break :blk zld.getSectionByName("__TEXT", "__const") orelse try zld.initSection( + break :blk zld.getSectionByName("__TEXT", "__const") orelse try MachO.initSection( + gpa, + zld, "__TEXT", "__const", .{}, @@ -156,13 +161,17 @@ pub fn getOutputSection(zld: *Zld, sect: macho.section_64) !?u8 { }, macho.S_CSTRING_LITERALS => { if (mem.startsWith(u8, sectname, "__objc")) { - break :blk zld.getSectionByName(segname, sectname) orelse try zld.initSection( + break :blk zld.getSectionByName(segname, sectname) orelse try MachO.initSection( + gpa, + zld, segname, sectname, .{}, ); } - break :blk zld.getSectionByName("__TEXT", "__cstring") orelse try zld.initSection( + break :blk zld.getSectionByName("__TEXT", "__cstring") orelse try MachO.initSection( + gpa, + zld, "__TEXT", "__cstring", .{ .flags = macho.S_CSTRING_LITERALS }, @@ -171,7 +180,9 @@ pub fn getOutputSection(zld: *Zld, sect: macho.section_64) !?u8 { macho.S_MOD_INIT_FUNC_POINTERS, macho.S_MOD_TERM_FUNC_POINTERS, => { - break :blk zld.getSectionByName("__DATA_CONST", sectname) orelse try zld.initSection( + break :blk zld.getSectionByName("__DATA_CONST", sectname) orelse try MachO.initSection( + gpa, + zld, "__DATA_CONST", sectname, .{ .flags = sect.flags }, @@ -184,14 +195,19 @@ pub fn getOutputSection(zld: *Zld, sect: macho.section_64) !?u8 { macho.S_THREAD_LOCAL_REGULAR, macho.S_THREAD_LOCAL_ZEROFILL, => { - break :blk zld.getSectionByName(segname, sectname) orelse try zld.initSection( + break :blk zld.getSectionByName(segname, sectname) orelse try MachO.initSection( + gpa, + zld, segname, sectname, .{ .flags = sect.flags }, ); }, macho.S_COALESCED => { - break :blk zld.getSectionByName(segname, sectname) orelse try zld.initSection( + break :blk zld.getSectionByName(segname, sectname) orelse try MachO.initSection( + gpa, + zld, + segname, sectname, .{}, @@ -205,7 +221,9 @@ pub fn getOutputSection(zld: *Zld, sect: macho.section_64) !?u8 { mem.eql(u8, sectname, "__gosymtab") or mem.eql(u8, sectname, "__gopclntab")) { - break :blk zld.getSectionByName("__TEXT", sectname) orelse try zld.initSection( + break :blk zld.getSectionByName("__TEXT", sectname) orelse try MachO.initSection( + gpa, + zld, "__TEXT", sectname, .{}, @@ -218,20 +236,26 @@ pub fn getOutputSection(zld: *Zld, sect: macho.section_64) !?u8 { mem.eql(u8, sectname, "__objc_classlist") or mem.eql(u8, sectname, "__objc_imageinfo")) { - break :blk zld.getSectionByName("__DATA_CONST", sectname) orelse try zld.initSection( + break :blk zld.getSectionByName("__DATA_CONST", sectname) orelse try MachO.initSection( + gpa, + zld, "__DATA_CONST", sectname, .{}, ); } else if (mem.eql(u8, sectname, "__data")) { - break :blk zld.getSectionByName("__DATA", "__data") orelse try zld.initSection( + break :blk zld.getSectionByName("__DATA", "__data") orelse try MachO.initSection( + gpa, + zld, "__DATA", "__data", .{}, ); } } - break :blk zld.getSectionByName(segname, sectname) orelse try zld.initSection( + break :blk zld.getSectionByName(segname, sectname) orelse try MachO.initSection( + gpa, + zld, segname, sectname, .{}, diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index c9f32aa4fd..2eee9f5787 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -685,11 +685,12 @@ fn parseEhFrameSection(self: *Object, zld: *Zld, object_id: u32) !void { log.debug("parsing __TEXT,__eh_frame section", .{}); + const gpa = zld.gpa; + if (zld.getSectionByName("__TEXT", "__eh_frame") == null) { - _ = try zld.initSection("__TEXT", "__eh_frame", .{}); + _ = try MachO.initSection(gpa, zld, "__TEXT", "__eh_frame", .{}); } - const gpa = zld.gpa; const cpu_arch = zld.options.target.cpu.arch; try self.parseRelocs(gpa, sect_id); const relocs = self.getRelocs(sect_id); @@ -779,6 +780,8 @@ fn parseEhFrameSection(self: *Object, zld: *Zld, object_id: u32) !void { } fn parseUnwindInfo(self: *Object, zld: *Zld, object_id: u32) !void { + const gpa = zld.gpa; + const cpu_arch = zld.options.target.cpu.arch; const sect_id = self.unwind_info_sect_id orelse { // If it so happens that the object had `__eh_frame` section defined but no `__compact_unwind`, // we will try fully synthesising unwind info records to somewhat match Apple ld's @@ -786,7 +789,7 @@ fn parseUnwindInfo(self: *Object, zld: *Zld, object_id: u32) !void { // we still create the output `__TEXT,__unwind_info` section. if (self.hasEhFrameRecords()) { if (zld.getSectionByName("__TEXT", "__unwind_info") == null) { - _ = try zld.initSection("__TEXT", "__unwind_info", .{}); + _ = try MachO.initSection(gpa, zld, "__TEXT", "__unwind_info", .{}); } } return; @@ -794,11 +797,8 @@ fn parseUnwindInfo(self: *Object, zld: *Zld, object_id: u32) !void { log.debug("parsing unwind info in {s}", .{self.name}); - const gpa = zld.gpa; - const cpu_arch = zld.options.target.cpu.arch; - if (zld.getSectionByName("__TEXT", "__unwind_info") == null) { - _ = try zld.initSection("__TEXT", "__unwind_info", .{}); + _ = try MachO.initSection(gpa, zld, "__TEXT", "__unwind_info", .{}); } const unwind_records = self.getUnwindRecords(); diff --git a/src/link/MachO/zld.zig b/src/link/MachO/zld.zig index d4dd599d2d..5bfff40405 100644 --- a/src/link/MachO/zld.zig +++ b/src/link/MachO/zld.zig @@ -141,7 +141,7 @@ pub const Zld = struct { sym.n_type = macho.N_SECT; const sect_id = self.getSectionByName("__DATA", "__data") orelse - try self.initSection("__DATA", "__data", .{}); + try MachO.initSection(self.gpa, self, "__DATA", "__data", .{}); sym.n_sect = sect_id + 1; self.dyld_private_atom_index = atom_index; @@ -165,7 +165,7 @@ pub const Zld = struct { const size = sym.n_value; const alignment = (sym.n_desc >> 8) & 0x0f; const sect_id = self.getSectionByName("__DATA", "__bss") orelse - try self.initSection("__DATA", "__bss", .{ .flags = macho.S_ZEROFILL }); + try MachO.initSection(gpa, self, "__DATA", "__bss", .{ .flags = macho.S_ZEROFILL }); sym.* = .{ .n_strx = sym.n_strx, @@ -619,7 +619,7 @@ pub const Zld = struct { if (self.got_table.lookup.contains(target)) return; _ = try self.got_table.allocateEntry(self.gpa, target); if (self.got_section_index == null) { - self.got_section_index = try self.initSection("__DATA_CONST", "__got", .{ + self.got_section_index = try MachO.initSection(self.gpa, self, "__DATA_CONST", "__got", .{ .flags = macho.S_NON_LAZY_SYMBOL_POINTERS, }); } @@ -629,7 +629,7 @@ pub const Zld = struct { if (self.tlv_ptr_table.lookup.contains(target)) return; _ = try self.tlv_ptr_table.allocateEntry(self.gpa, target); if (self.tlv_ptr_section_index == null) { - self.tlv_ptr_section_index = try self.initSection("__DATA", "__thread_ptrs", .{ + self.tlv_ptr_section_index = try MachO.initSection(self.gpa, self, "__DATA", "__thread_ptrs", .{ .flags = macho.S_THREAD_LOCAL_VARIABLE_POINTERS, }); } @@ -639,18 +639,18 @@ pub const Zld = struct { if (self.stubs_table.lookup.contains(target)) return; _ = try self.stubs_table.allocateEntry(self.gpa, target); if (self.stubs_section_index == null) { - self.stubs_section_index = try self.initSection("__TEXT", "__stubs", .{ + self.stubs_section_index = try MachO.initSection(self.gpa, self, "__TEXT", "__stubs", .{ .flags = macho.S_SYMBOL_STUBS | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS, .reserved2 = stubs.stubSize(self.options.target.cpu.arch), }); - self.stub_helper_section_index = try self.initSection("__TEXT", "__stub_helper", .{ + self.stub_helper_section_index = try MachO.initSection(self.gpa, self, "__TEXT", "__stub_helper", .{ .flags = macho.S_REGULAR | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS, }); - self.la_symbol_ptr_section_index = try self.initSection("__DATA", "__la_symbol_ptr", .{ + self.la_symbol_ptr_section_index = try MachO.initSection(self.gpa, self, "__DATA", "__la_symbol_ptr", .{ .flags = macho.S_LAZY_SYMBOL_POINTERS, }); } @@ -1152,34 +1152,6 @@ pub const Zld = struct { segment.vmsize = mem.alignForward(u64, segment.vmsize, page_size); } - const InitSectionOpts = struct { - flags: u32 = macho.S_REGULAR, - reserved1: u32 = 0, - reserved2: u32 = 0, - }; - - pub fn initSection( - self: *Zld, - segname: []const u8, - sectname: []const u8, - opts: InitSectionOpts, - ) !u8 { - const gpa = self.gpa; - log.debug("creating section '{s},{s}'", .{ segname, sectname }); - const index = @as(u8, @intCast(self.sections.slice().len)); - try self.sections.append(gpa, .{ - .segment_index = undefined, // Segments will be created automatically later down the pipeline - .header = .{ - .sectname = makeStaticString(sectname), - .segname = makeStaticString(segname), - .flags = opts.flags, - .reserved1 = opts.reserved1, - .reserved2 = opts.reserved2, - }, - }); - return index; - } - fn writeSegmentHeaders(self: *Zld, writer: anytype) !void { for (self.segments.items, 0..) |seg, i| { const indexes = self.getSectionIndexes(@as(u8, @intCast(i))); From 664b983518f29eed3c60b503cf12bddbb19f3afc Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 26 Aug 2023 18:30:30 +0200 Subject: [PATCH 23/57] macho: unify writeSegmentHeaders func --- src/link/MachO.zig | 21 +++++++++++++++++++-- src/link/MachO/zld.zig | 29 +---------------------------- 2 files changed, 20 insertions(+), 30 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 467b75c69c..52346e7863 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -3035,11 +3035,28 @@ pub fn getGlobalSymbol(self: *MachO, name: []const u8, lib_name: ?[]const u8) !u return self.addUndefined(sym_name, .add_stub); } -fn writeSegmentHeaders(self: *MachO, writer: anytype) !void { +pub fn writeSegmentHeaders(self: anytype, writer: anytype) !void { for (self.segments.items, 0..) |seg, i| { const indexes = self.getSectionIndexes(@as(u8, @intCast(i))); - try writer.writeStruct(seg); + var out_seg = seg; + out_seg.cmdsize = @sizeOf(macho.segment_command_64); + out_seg.nsects = 0; + + // Update section headers count; any section with size of 0 is excluded + // since it doesn't have any data in the final binary file. for (self.sections.items(.header)[indexes.start..indexes.end]) |header| { + if (header.size == 0) continue; + out_seg.cmdsize += @sizeOf(macho.section_64); + out_seg.nsects += 1; + } + + if (out_seg.nsects == 0 and + (mem.eql(u8, out_seg.segName(), "__DATA_CONST") or + mem.eql(u8, out_seg.segName(), "__DATA"))) continue; + + try writer.writeStruct(out_seg); + for (self.sections.items(.header)[indexes.start..indexes.end]) |header| { + if (header.size == 0) continue; try writer.writeStruct(header); } } diff --git a/src/link/MachO/zld.zig b/src/link/MachO/zld.zig index 5bfff40405..61a0898312 100644 --- a/src/link/MachO/zld.zig +++ b/src/link/MachO/zld.zig @@ -1152,33 +1152,6 @@ pub const Zld = struct { segment.vmsize = mem.alignForward(u64, segment.vmsize, page_size); } - fn writeSegmentHeaders(self: *Zld, writer: anytype) !void { - for (self.segments.items, 0..) |seg, i| { - const indexes = self.getSectionIndexes(@as(u8, @intCast(i))); - var out_seg = seg; - out_seg.cmdsize = @sizeOf(macho.segment_command_64); - out_seg.nsects = 0; - - // Update section headers count; any section with size of 0 is excluded - // since it doesn't have any data in the final binary file. - for (self.sections.items(.header)[indexes.start..indexes.end]) |header| { - if (header.size == 0) continue; - out_seg.cmdsize += @sizeOf(macho.section_64); - out_seg.nsects += 1; - } - - if (out_seg.nsects == 0 and - (mem.eql(u8, out_seg.segName(), "__DATA_CONST") or - mem.eql(u8, out_seg.segName(), "__DATA"))) continue; - - try writer.writeStruct(out_seg); - for (self.sections.items(.header)[indexes.start..indexes.end]) |header| { - if (header.size == 0) continue; - try writer.writeStruct(header); - } - } - } - fn writeLinkeditSegmentData(self: *Zld) !void { try self.writeDyldInfoData(); try self.writeFunctionStarts(); @@ -3035,7 +3008,7 @@ pub fn linkWithZld(macho_file: *MachO, comp: *Compilation, prog_node: *std.Progr var lc_buffer = std.ArrayList(u8).init(arena); const lc_writer = lc_buffer.writer(); - try zld.writeSegmentHeaders(lc_writer); + try MachO.writeSegmentHeaders(&zld, lc_writer); try lc_writer.writeStruct(zld.dyld_info_cmd); try lc_writer.writeStruct(zld.function_starts_cmd); try lc_writer.writeStruct(zld.data_in_code_cmd); From ef0d35e00cd1320b5f0ffde718422a69be54fe80 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 26 Aug 2023 21:41:13 +0200 Subject: [PATCH 24/57] macho: unify allocating special symbols --- src/link/MachO.zig | 16 ++-- src/link/MachO/zld.zig | 176 ++++++++++++++++++++++++----------------- 2 files changed, 111 insertions(+), 81 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 52346e7863..6b9feeb4a4 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -1389,7 +1389,7 @@ fn markRelocsDirtyByAddress(self: *MachO, addr: u64) void { } } -pub fn allocateSpecialSymbols(self: *MachO) !void { +pub fn allocateSpecialSymbols(self: anytype) !void { for (&[_][]const u8{ "___dso_handle", "__mh_execute_header", @@ -1398,11 +1398,13 @@ pub fn allocateSpecialSymbols(self: *MachO) !void { if (global.getFile() != null) continue; const sym = self.getSymbolPtr(global); const seg = self.getSegment(self.text_section_index.?); - sym.n_sect = 1; + sym.n_sect = self.text_section_index.? + 1; sym.n_value = seg.vmaddr; - log.debug("allocating {s} at the start of {s}", .{ + log.debug("allocating {s}(@0x{x},sect({d})) at the start of {s}", .{ name, + sym.n_value, + sym.n_sect, seg.segName(), }); } @@ -1479,10 +1481,6 @@ fn createThreadLocalDescriptorAtom(self: *MachO, sym_name: []const u8, target: S fn createMhExecuteHeaderSymbol(self: *MachO) !void { if (self.base.options.output_mode != .Exe) return; - if (self.getGlobal("__mh_execute_header")) |global| { - const sym = self.getSymbol(global); - if (!sym.undf() and !(sym.pext() or sym.weakDef())) return; - } const gpa = self.base.allocator; const sym_index = try self.allocateSymbol(); @@ -3748,9 +3746,7 @@ fn addUndefined(self: *MachO, name: []const u8, action: ResolveAction.Kind) !u32 const gop = try self.getOrPutGlobalPtr(name); const global_index = self.getGlobalIndex(name).?; - if (gop.found_existing) { - return global_index; - } + if (gop.found_existing) return global_index; const sym_index = try self.allocateSymbol(); const sym_loc = SymbolWithLoc{ .sym_index = sym_index }; diff --git a/src/link/MachO/zld.zig b/src/link/MachO/zld.zig index 61a0898312..a9488c81b6 100644 --- a/src/link/MachO/zld.zig +++ b/src/link/MachO/zld.zig @@ -78,9 +78,10 @@ pub const Zld = struct { resolver: std.StringHashMapUnmanaged(u32) = .{}, unresolved: std.AutoArrayHashMapUnmanaged(u32, void) = .{}, + locals_free_list: std.ArrayListUnmanaged(u32) = .{}, + globals_free_list: std.ArrayListUnmanaged(u32) = .{}, + entry_index: ?u32 = null, - mh_execute_header_index: ?u32 = null, - dso_handle_index: ?u32 = null, dyld_stub_binder_index: ?u32 = null, dyld_private_atom_index: ?Atom.Index = null, @@ -188,15 +189,23 @@ pub const Zld = struct { } } - fn addUndefined(self: *Zld, name: []const u8) !void { + fn addUndefined(self: *Zld, name: []const u8) !u32 { + const gop = try self.getOrPutGlobalPtr(name); + const global_index = self.getGlobalIndex(name).?; + + if (gop.found_existing) return global_index; + const sym_index = try self.allocateSymbol(); const sym_loc = SymbolWithLoc{ .sym_index = sym_index }; + gop.value_ptr.* = sym_loc; + const sym = self.getSymbolPtr(sym_loc); sym.n_strx = try self.strtab.insert(self.gpa, name); sym.n_type = macho.N_UNDF; - const global_index = try self.addGlobal(sym_loc); - try self.resolver.putNoClobber(self.gpa, name, global_index); + try self.unresolved.putNoClobber(self.gpa, global_index, {}); + + return global_index; } fn resolveSymbols(self: *Zld) !void { @@ -205,12 +214,12 @@ pub const Zld = struct { // on the linker line. if (self.options.output_mode == .Exe) { const entry_name = self.options.entry orelse load_commands.default_entry_point; - try self.addUndefined(entry_name); + _ = try self.addUndefined(entry_name); } // Force resolution of any symbols requested by the user. for (self.options.force_undefined_symbols.keys()) |sym_name| { - try self.addUndefined(sym_name); + _ = try self.addUndefined(sym_name); } for (self.objects.items, 0..) |_, object_id| { @@ -222,13 +231,11 @@ pub const Zld = struct { // Finally, force resolution of dyld_stub_binder if there are imports // requested. if (self.unresolved.count() > 0) { - try self.addUndefined("dyld_stub_binder"); + self.dyld_stub_binder_index = try self.addUndefined("dyld_stub_binder"); } try self.resolveSymbolsInDylibs(); - self.dyld_stub_binder_index = self.resolver.get("dyld_stub_binder"); - try self.createMhExecuteHeaderSymbol(); try self.createDsoHandleSymbol(); try self.resolveSymbolsAtLoading(); @@ -276,15 +283,16 @@ pub const Zld = struct { const sym_loc = SymbolWithLoc{ .sym_index = sym_index, .file = object_id + 1 }; - const global_index = self.resolver.get(sym_name) orelse { - const global_index = try self.addGlobal(sym_loc); - try self.resolver.putNoClobber(self.gpa, sym_name, global_index); + const gop = try self.getOrPutGlobalPtr(sym_name); + if (!gop.found_existing) { + gop.value_ptr.* = sym_loc; if (sym.undf() and !sym.tentative()) { - try self.unresolved.putNoClobber(self.gpa, global_index, {}); + try self.unresolved.putNoClobber(self.gpa, self.getGlobalIndex(sym_name).?, {}); } continue; - }; - const global = &self.globals.items[global_index]; + } + const global_index = self.getGlobalIndex(sym_name).?; + const global = gop.value_ptr; const global_sym = self.getSymbol(global.*); // Cases to consider: sym vs global_sym @@ -338,7 +346,7 @@ pub const Zld = struct { const global_object = &self.objects.items[file]; global_object.globals_lookup[global.sym_index] = global_index; } - _ = self.unresolved.swapRemove(self.resolver.get(sym_name).?); + _ = self.unresolved.swapRemove(global_index); global.* = sym_loc; } else { object.globals_lookup[sym_index] = global_index; @@ -448,50 +456,51 @@ pub const Zld = struct { fn createMhExecuteHeaderSymbol(self: *Zld) !void { if (self.options.output_mode != .Exe) return; - if (self.resolver.get("__mh_execute_header")) |global_index| { - const global = self.globals.items[global_index]; - const sym = self.getSymbol(global); - self.mh_execute_header_index = global_index; - if (!sym.undf() and !(sym.pext() or sym.weakDef())) return; - } const gpa = self.gpa; const sym_index = try self.allocateSymbol(); const sym_loc = SymbolWithLoc{ .sym_index = sym_index }; const sym = self.getSymbolPtr(sym_loc); - sym.n_strx = try self.strtab.insert(gpa, "__mh_execute_header"); - sym.n_type = macho.N_SECT | macho.N_EXT; - sym.n_desc = macho.REFERENCED_DYNAMICALLY; + sym.* = .{ + .n_strx = try self.strtab.insert(gpa, "__mh_execute_header"), + .n_type = macho.N_SECT | macho.N_EXT, + .n_sect = 0, + .n_desc = macho.REFERENCED_DYNAMICALLY, + .n_value = 0, + }; - if (self.resolver.get("__mh_execute_header")) |global_index| { - const global = &self.globals.items[global_index]; - const global_object = &self.objects.items[global.getFile().?]; - global_object.globals_lookup[global.sym_index] = global_index; - global.* = sym_loc; - self.mh_execute_header_index = global_index; - } else { - self.mh_execute_header_index = try self.addGlobal(sym_loc); + const gop = try self.getOrPutGlobalPtr("__mh_execute_header"); + if (gop.found_existing) { + const global = gop.value_ptr.*; + if (global.getFile()) |file| { + const global_object = &self.objects.items[file]; + global_object.globals_lookup[global.sym_index] = self.getGlobalIndex("__mh_execute_header").?; + } } + gop.value_ptr.* = sym_loc; } fn createDsoHandleSymbol(self: *Zld) !void { - const global_index = self.resolver.get("___dso_handle") orelse return; - const global = &self.globals.items[global_index]; - self.dso_handle_index = global_index; + const global = self.getGlobalPtr("___dso_handle") orelse return; if (!self.getSymbol(global.*).undf()) return; - const gpa = self.gpa; const sym_index = try self.allocateSymbol(); const sym_loc = SymbolWithLoc{ .sym_index = sym_index }; const sym = self.getSymbolPtr(sym_loc); - sym.n_strx = try self.strtab.insert(gpa, "___dso_handle"); - sym.n_type = macho.N_SECT | macho.N_EXT; - sym.n_desc = macho.N_WEAK_DEF; - - const global_object = &self.objects.items[global.getFile().?]; - global_object.globals_lookup[global.sym_index] = global_index; - _ = self.unresolved.swapRemove(self.resolver.get("___dso_handle").?); + sym.* = .{ + .n_strx = try self.strtab.insert(self.gpa, "___dso_handle"), + .n_type = macho.N_SECT | macho.N_EXT, + .n_sect = 0, + .n_desc = macho.N_WEAK_DEF, + .n_value = 0, + }; + const global_index = self.getGlobalIndex("___dso_handle").?; + if (global.getFile()) |file| { + const global_object = &self.objects.items[file]; + global_object.globals_lookup[global.sym_index] = global_index; + } global.* = sym_loc; + _ = self.unresolved.swapRemove(global_index); } pub fn deinit(self: *Zld) void { @@ -512,6 +521,8 @@ pub const Zld = struct { self.globals.deinit(gpa); self.resolver.deinit(gpa); self.unresolved.deinit(gpa); + self.locals_free_list.deinit(gpa); + self.globals_free_list.deinit(gpa); for (self.objects.items) |*object| { object.deinit(gpa); @@ -609,10 +620,24 @@ pub const Zld = struct { return index; } - fn addGlobal(self: *Zld, sym_loc: SymbolWithLoc) !u32 { - const global_index = @as(u32, @intCast(self.globals.items.len)); - try self.globals.append(self.gpa, sym_loc); - return global_index; + fn allocateGlobal(self: *Zld) !u32 { + try self.globals.ensureUnusedCapacity(self.gpa, 1); + + const index = blk: { + if (self.globals_free_list.popOrNull()) |index| { + log.debug(" (reusing global index {d})", .{index}); + break :blk index; + } else { + log.debug(" (allocating symbol index {d})", .{self.globals.items.len}); + const index = @as(u32, @intCast(self.globals.items.len)); + _ = self.globals.addOneAssumeCapacity(); + break :blk index; + } + }; + + self.globals.items[index] = .{ .sym_index = 0 }; + + return index; } pub fn addGotEntry(self: *Zld, target: SymbolWithLoc) !void { @@ -656,27 +681,6 @@ pub const Zld = struct { } } - fn allocateSpecialSymbols(self: *Zld) !void { - for (&[_]?u32{ - self.dso_handle_index, - self.mh_execute_header_index, - }) |maybe_index| { - const global_index = maybe_index orelse continue; - const global = self.globals.items[global_index]; - if (global.getFile() != null) continue; - const name = self.getSymbolName(global); - const sym = self.getSymbolPtr(global); - const segment_index = self.getSegmentByName("__TEXT").?; - const seg = self.segments.items[segment_index]; - sym.n_sect = 1; - sym.n_value = seg.vmaddr; - log.debug("allocating {s} at the start of {s}", .{ - name, - seg.segName(), - }); - } - } - fn writeAtoms(self: *Zld) !void { const gpa = self.gpa; const slice = self.sections.slice(); @@ -2037,6 +2041,36 @@ pub const Zld = struct { } } + pub fn getGlobalIndex(self: *const Zld, name: []const u8) ?u32 { + return self.resolver.get(name); + } + + pub fn getGlobalPtr(self: *Zld, name: []const u8) ?*SymbolWithLoc { + const global_index = self.resolver.get(name) orelse return null; + return &self.globals.items[global_index]; + } + + pub fn getGlobal(self: *const Zld, name: []const u8) ?SymbolWithLoc { + const global_index = self.resolver.get(name) orelse return null; + return self.globals.items[global_index]; + } + + const GetOrPutGlobalPtrResult = struct { + found_existing: bool, + value_ptr: *SymbolWithLoc, + }; + + pub fn getOrPutGlobalPtr(self: *Zld, name: []const u8) !GetOrPutGlobalPtrResult { + if (self.getGlobalPtr(name)) |ptr| { + return GetOrPutGlobalPtrResult{ .found_existing = true, .value_ptr = ptr }; + } + const global_index = try self.allocateGlobal(); + const global_name = try self.gpa.dupe(u8, name); + _ = try self.resolver.put(self.gpa, global_name, global_index); + const ptr = &self.globals.items[global_index]; + return GetOrPutGlobalPtrResult{ .found_existing = false, .value_ptr = ptr }; + } + pub fn getGotEntryAddress(self: *Zld, sym_with_loc: SymbolWithLoc) ?u64 { const index = self.got_table.lookup.get(sym_with_loc) orelse return null; const header = self.sections.items(.header)[self.got_section_index.?]; @@ -2934,7 +2968,7 @@ pub fn linkWithZld(macho_file: *MachO, comp: *Compilation, prog_node: *std.Progr try zld.createSegments(); try zld.allocateSegments(); - try zld.allocateSpecialSymbols(); + try MachO.allocateSpecialSymbols(&zld); if (build_options.enable_logging) { zld.logSymtab(); From 7f74b3562deaa0dedfc2945a702fe82dbd103aa2 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 26 Aug 2023 22:05:42 +0200 Subject: [PATCH 25/57] macho: unify creating atoms --- src/link/MachO.zig | 52 +++++++++++++++++++++++---------------- src/link/MachO/Atom.zig | 16 ++++++------ src/link/MachO/Object.zig | 2 +- src/link/MachO/thunks.zig | 2 +- src/link/MachO/zld.zig | 31 +++++++++++------------ 5 files changed, 55 insertions(+), 48 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 6b9feeb4a4..ac07e5c687 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -1410,30 +1410,29 @@ pub fn allocateSpecialSymbols(self: anytype) !void { } } -pub fn createAtom(self: *MachO) !Atom.Index { +const CreateAtomOpts = struct { + size: u64 = 0, + alignment: u32 = 0, +}; + +pub fn createAtom(self: *MachO, sym_index: u32, opts: CreateAtomOpts) !Atom.Index { const gpa = self.base.allocator; - const atom_index = @as(Atom.Index, @intCast(self.atoms.items.len)); + const index = @as(Atom.Index, @intCast(self.atoms.items.len)); const atom = try self.atoms.addOne(gpa); - const sym_index = try self.allocateSymbol(); - try self.atom_by_index_table.putNoClobber(gpa, sym_index, atom_index); - atom.* = .{ - .sym_index = sym_index, - .inner_sym_index = 0, - .inner_nsyms_trailing = 0, - .file = 0, - .size = 0, - .alignment = 0, - .prev_index = null, - .next_index = null, - }; - log.debug("creating ATOM(%{d}) at index {d}", .{ sym_index, atom_index }); - return atom_index; + atom.* = .{}; + atom.sym_index = sym_index; + atom.size = opts.size; + atom.alignment = opts.alignment; + log.debug("creating ATOM(%{d}) at index {d}", .{ sym_index, index }); + return index; } fn createDyldPrivateAtom(self: *MachO) !void { if (self.dyld_private_atom_index != null) return; - const atom_index = try self.createAtom(); + const sym_index = try self.allocateSymbol(); + const atom_index = try self.createAtom(sym_index, .{}); + try self.atom_by_index_table.putNoClobber(self.base.allocator, sym_index, atom_index); const atom = self.getAtomPtr(atom_index); atom.size = @sizeOf(u64); @@ -1452,7 +1451,9 @@ fn createThreadLocalDescriptorAtom(self: *MachO, sym_name: []const u8, target: S const gpa = self.base.allocator; const size = 3 * @sizeOf(u64); const required_alignment: u32 = 1; - const atom_index = try self.createAtom(); + const sym_index = try self.allocateSymbol(); + const atom_index = try self.createAtom(sym_index, .{}); + try self.atom_by_index_table.putNoClobber(gpa, sym_index, atom_index); self.getAtomPtr(atom_index).size = size; const sym = self.getAtom(atom_index).getSymbolPtr(self); @@ -1936,7 +1937,9 @@ pub fn lowerUnnamedConst(self: *MachO, typed_value: TypedValue, decl_index: Modu log.debug("allocating symbol indexes for {s}", .{name}); - const atom_index = try self.createAtom(); + const sym_index = try self.allocateSymbol(); + const atom_index = try self.createAtom(sym_index, .{}); + try self.atom_by_index_table.putNoClobber(gpa, sym_index, atom_index); const res = try codegen.generateSymbol(&self.base, decl.srcLoc(mod), typed_value, &code_buffer, .none, .{ .parent_atom_index = self.getAtom(atom_index).getSymbolIndex().?, @@ -2138,7 +2141,11 @@ pub fn getOrCreateAtomForLazySymbol(self: *MachO, sym: File.LazySymbol) !Atom.In }, }; switch (metadata.state.*) { - .unused => metadata.atom.* = try self.createAtom(), + .unused => { + const sym_index = try self.allocateSymbol(); + metadata.atom.* = try self.createAtom(sym_index, .{}); + try self.atom_by_index_table.putNoClobber(self.base.allocator, sym_index, metadata.atom.*); + }, .pending_flush => return metadata.atom.*, .flushed => {}, } @@ -2250,8 +2257,11 @@ fn updateThreadlocalVariable(self: *MachO, module: *Module, decl_index: Module.D pub fn getOrCreateAtomForDecl(self: *MachO, decl_index: Module.Decl.Index) !Atom.Index { const gop = try self.decls.getOrPut(self.base.allocator, decl_index); if (!gop.found_existing) { + const sym_index = try self.allocateSymbol(); + const atom_index = try self.createAtom(sym_index, .{}); + try self.atom_by_index_table.putNoClobber(self.base.allocator, sym_index, atom_index); gop.value_ptr.* = .{ - .atom = try self.createAtom(), + .atom = atom_index, .section = self.getDeclOutputSection(decl_index), .exports = .{}, }; diff --git a/src/link/MachO/Atom.zig b/src/link/MachO/Atom.zig index fe118e0e3a..411c42c4dd 100644 --- a/src/link/MachO/Atom.zig +++ b/src/link/MachO/Atom.zig @@ -4,13 +4,13 @@ /// a stub trampoline, it can be found in the linkers `locals` arraylist. /// If this field is 0 and file is 0, it means the codegen size = 0 and there is no symbol or /// offset table entry. -sym_index: u32, +sym_index: u32 = 0, /// 0 means an Atom is a synthetic Atom such as a GOT cell defined by the linker. /// Otherwise, it is the index into appropriate object file (indexing from 1). /// Prefer using `getFile()` helper to get the file index out rather than using /// the field directly. -file: u32, +file: u32 = 0, /// If this Atom is not a synthetic Atom, i.e., references a subsection in an /// Object file, `inner_sym_index` and `inner_nsyms_trailing` tell where and if @@ -18,22 +18,22 @@ file: u32, /// address range. These could for example be an alias symbol which can be used /// internally by the relocation records, or if the Object file couldn't be split /// into subsections, this Atom may encompass an entire input section. -inner_sym_index: u32, -inner_nsyms_trailing: u32, +inner_sym_index: u32 = 0, +inner_nsyms_trailing: u32 = 0, /// Size and alignment of this atom /// Unlike in Elf, we need to store the size of this symbol as part of /// the atom since macho.nlist_64 lacks this information. -size: u64, +size: u64 = 0, /// Alignment of this atom as a power of 2. /// For instance, aligmment of 0 should be read as 2^0 = 1 byte aligned. -alignment: u32, +alignment: u32 = 0, /// Points to the previous and next neighbours /// TODO use the same trick as with symbols: reserve index 0 as null atom -next_index: ?Index, -prev_index: ?Index, +next_index: ?Index = null, +prev_index: ?Index = null, pub const Index = u32; diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index 2eee9f5787..5042fe9849 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -573,7 +573,7 @@ fn createAtomFromSubsection( out_sect_id: u8, ) !Atom.Index { const gpa = zld.gpa; - const atom_index = try zld.createEmptyAtom(sym_index, size, alignment); + const atom_index = try zld.createAtom(sym_index, .{ .size = size, .alignment = alignment }); const atom = zld.getAtomPtr(atom_index); atom.inner_sym_index = inner_sym_index; atom.inner_nsyms_trailing = inner_nsyms_trailing; diff --git a/src/link/MachO/thunks.zig b/src/link/MachO/thunks.zig index da02074abe..c5debcc1fa 100644 --- a/src/link/MachO/thunks.zig +++ b/src/link/MachO/thunks.zig @@ -342,7 +342,7 @@ fn isReachable( fn createThunkAtom(zld: *Zld) !Atom.Index { const sym_index = try zld.allocateSymbol(); - const atom_index = try zld.createEmptyAtom(sym_index, @sizeOf(u32) * 3, 2); + const atom_index = try zld.createAtom(sym_index, .{ .size = @sizeOf(u32) * 3, .alignment = 2 }); const sym = zld.getSymbolPtr(.{ .sym_index = sym_index }); sym.n_type = macho.N_SECT; sym.n_sect = zld.text_section_index.? + 1; diff --git a/src/link/MachO/zld.zig b/src/link/MachO/zld.zig index a9488c81b6..b9d2d34712 100644 --- a/src/link/MachO/zld.zig +++ b/src/link/MachO/zld.zig @@ -112,32 +112,26 @@ pub const Zld = struct { self.sections.set(sym.n_sect - 1, section); } - pub fn createEmptyAtom(self: *Zld, sym_index: u32, size: u64, alignment: u32) !Atom.Index { + const CreateAtomOpts = struct { + size: u64 = 0, + alignment: u32 = 0, + }; + + pub fn createAtom(self: *Zld, sym_index: u32, opts: CreateAtomOpts) !Atom.Index { const gpa = self.gpa; const index = @as(Atom.Index, @intCast(self.atoms.items.len)); const atom = try self.atoms.addOne(gpa); - atom.* = .{ - .sym_index = 0, - .inner_sym_index = 0, - .inner_nsyms_trailing = 0, - .file = 0, - .size = 0, - .alignment = 0, - .prev_index = null, - .next_index = null, - }; + atom.* = .{}; atom.sym_index = sym_index; - atom.size = size; - atom.alignment = alignment; - + atom.size = opts.size; + atom.alignment = opts.alignment; log.debug("creating ATOM(%{d}) at index {d}", .{ sym_index, index }); - return index; } fn createDyldPrivateAtom(self: *Zld) !void { const sym_index = try self.allocateSymbol(); - const atom_index = try self.createEmptyAtom(sym_index, @sizeOf(u64), 3); + const atom_index = try self.createAtom(sym_index, .{ .size = @sizeOf(u64), .alignment = 3 }); const sym = self.getSymbolPtr(.{ .sym_index = sym_index }); sym.n_type = macho.N_SECT; @@ -176,7 +170,10 @@ pub const Zld = struct { .n_value = 0, }; - const atom_index = try self.createEmptyAtom(global.sym_index, size, alignment); + const atom_index = try self.createAtom(global.sym_index, .{ + .size = size, + .alignment = alignment, + }); const atom = self.getAtomPtr(atom_index); atom.file = global.file; From d17867939ee8a9de6bb96748e047b5e561b7606c Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 26 Aug 2023 22:22:18 +0200 Subject: [PATCH 26/57] macho: unify writeLinkeditSegmentData func --- src/link/MachO/zld.zig | 36 ++++++++++++++++++++++++++++++++++-- 1 file changed, 34 insertions(+), 2 deletions(-) diff --git a/src/link/MachO/zld.zig b/src/link/MachO/zld.zig index b9d2d34712..a94fc9ea00 100644 --- a/src/link/MachO/zld.zig +++ b/src/link/MachO/zld.zig @@ -66,6 +66,13 @@ pub const Zld = struct { segments: std.ArrayListUnmanaged(macho.segment_command_64) = .{}, sections: std.MultiArrayList(Section) = .{}, + pagezero_segment_cmd_index: ?u8 = null, + header_segment_cmd_index: ?u8 = null, + text_segment_cmd_index: ?u8 = null, + data_const_segment_cmd_index: ?u8 = null, + data_segment_cmd_index: ?u8 = null, + linkedit_segment_cmd_index: ?u8 = null, + text_section_index: ?u8 = null, got_section_index: ?u8 = null, tlv_ptr_section_index: ?u8 = null, @@ -550,6 +557,7 @@ pub const Zld = struct { log.warn("requested __PAGEZERO size (0x{x}) is not page aligned", .{pagezero_vmsize}); log.warn(" rounding down to 0x{x}", .{aligned_pagezero_vmsize}); } + self.pagezero_segment_cmd_index = @intCast(self.segments.items.len); try self.segments.append(self.gpa, .{ .cmdsize = @sizeOf(macho.segment_command_64), .segname = makeStaticString("__PAGEZERO"), @@ -560,6 +568,8 @@ pub const Zld = struct { // __TEXT segment is non-optional { const protection = MachO.getSegmentMemoryProtection("__TEXT"); + self.text_segment_cmd_index = @intCast(self.segments.items.len); + self.header_segment_cmd_index = self.text_segment_cmd_index.?; try self.segments.append(self.gpa, .{ .cmdsize = @sizeOf(macho.segment_command_64), .segname = makeStaticString("__TEXT"), @@ -590,9 +600,18 @@ pub const Zld = struct { self.sections.items(.segment_index)[sect_id] = segment_id; } + if (self.getSegmentByName("__DATA_CONST")) |index| { + self.data_const_segment_cmd_index = index; + } + + if (self.getSegmentByName("__DATA")) |index| { + self.data_segment_cmd_index = index; + } + // __LINKEDIT always comes last { const protection = MachO.getSegmentMemoryProtection("__LINKEDIT"); + self.linkedit_segment_cmd_index = @intCast(self.segments.items.len); try self.segments.append(self.gpa, .{ .cmdsize = @sizeOf(macho.segment_command_64), .segname = makeStaticString("__LINKEDIT"), @@ -1154,13 +1173,26 @@ pub const Zld = struct { } fn writeLinkeditSegmentData(self: *Zld) !void { + const page_size = MachO.getPageSize(self.options.target.cpu.arch); + const seg = self.getLinkeditSegmentPtr(); + seg.filesize = 0; + seg.vmsize = 0; + + for (self.segments.items, 0..) |segment, id| { + if (self.linkedit_segment_cmd_index.? == @as(u8, @intCast(id))) continue; + if (seg.vmaddr < segment.vmaddr + segment.vmsize) { + seg.vmaddr = mem.alignForward(u64, segment.vmaddr + segment.vmsize, page_size); + } + if (seg.fileoff < segment.fileoff + segment.filesize) { + seg.fileoff = mem.alignForward(u64, segment.fileoff + segment.filesize, page_size); + } + } try self.writeDyldInfoData(); try self.writeFunctionStarts(); try self.writeDataInCode(); try self.writeSymtabs(); - const seg = self.getLinkeditSegmentPtr(); - seg.vmsize = mem.alignForward(u64, seg.filesize, MachO.getPageSize(self.options.target.cpu.arch)); + seg.vmsize = mem.alignForward(u64, seg.filesize, page_size); } fn collectRebaseData(self: *Zld, rebase: *Rebase) !void { From 7f6973fbae3b01827a71497f647ee27e64845f51 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 26 Aug 2023 22:47:09 +0200 Subject: [PATCH 27/57] macho: unify segment handling --- src/link/MachO/zld.zig | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/src/link/MachO/zld.zig b/src/link/MachO/zld.zig index a94fc9ea00..1333f33fa9 100644 --- a/src/link/MachO/zld.zig +++ b/src/link/MachO/zld.zig @@ -1394,8 +1394,7 @@ pub const Zld = struct { // TODO handle macho.EXPORT_SYMBOL_FLAGS_REEXPORT and macho.EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER. log.debug("collecting export data", .{}); - const segment_index = self.getSegmentByName("__TEXT").?; - const exec_segment = self.segments.items[segment_index]; + const exec_segment = self.segments.items[self.header_segment_cmd_index.?]; const base_address = exec_segment.vmaddr; for (self.globals.items) |global| { @@ -1517,10 +1516,8 @@ pub const Zld = struct { } fn writeFunctionStarts(self: *Zld) !void { - const text_seg_index = self.getSegmentByName("__TEXT") orelse return; - const text_seg = self.segments.items[text_seg_index]; - const gpa = self.gpa; + const seg = self.segments.items[self.header_segment_cmd_index.?]; // We need to sort by address first var addresses = std.ArrayList(u64).init(gpa); @@ -1547,7 +1544,7 @@ pub const Zld = struct { var last_off: u32 = 0; for (addresses.items) |addr| { - const offset = @as(u32, @intCast(addr - text_seg.vmaddr)); + const offset = @as(u32, @intCast(addr - seg.vmaddr)); const diff = offset - last_off; if (diff == 0) continue; @@ -1908,7 +1905,7 @@ pub const Zld = struct { } fn writeCodeSignature(self: *Zld, comp: *const Compilation, code_sig: *CodeSignature) !void { - const seg_id = self.getSegmentByName("__TEXT").?; + const seg_id = self.header_segment_cmd_index.?; const seg = self.segments.items[seg_id]; var buffer = std.ArrayList(u8).init(self.gpa); @@ -3026,7 +3023,7 @@ pub fn linkWithZld(macho_file: *MachO, comp: *Compilation, prog_node: *std.Progr // that the free space between the end of the last non-zerofill section of __DATA // segment and the beginning of __LINKEDIT segment is zerofilled as the loader will // copy-paste this space into memory for quicker zerofill operation. - if (zld.getSegmentByName("__DATA")) |data_seg_id| blk: { + if (zld.data_segment_cmd_index) |data_seg_id| blk: { var physical_zerofill_start: ?u64 = null; const section_indexes = zld.getSectionIndexes(data_seg_id); for (zld.sections.items(.header)[section_indexes.start..section_indexes.end]) |header| { @@ -3080,7 +3077,7 @@ pub fn linkWithZld(macho_file: *MachO, comp: *Compilation, prog_node: *std.Progr try load_commands.writeDylinkerLC(lc_writer); if (zld.options.output_mode == .Exe) { - const seg_id = zld.getSegmentByName("__TEXT").?; + const seg_id = zld.header_segment_cmd_index.?; const seg = zld.segments.items[seg_id]; const global = zld.getEntryPoint(); const sym = zld.getSymbol(global); From 84853c5c56e87a7ee6c5392756b0773b650d283c Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 26 Aug 2023 23:05:32 +0200 Subject: [PATCH 28/57] macho: unify resolving globals --- src/link/MachO/zld.zig | 147 +++++++++++++++++++++-------------------- 1 file changed, 77 insertions(+), 70 deletions(-) diff --git a/src/link/MachO/zld.zig b/src/link/MachO/zld.zig index 1333f33fa9..c607c80f45 100644 --- a/src/link/MachO/zld.zig +++ b/src/link/MachO/zld.zig @@ -245,6 +245,82 @@ pub const Zld = struct { try self.resolveSymbolsAtLoading(); } + fn resolveGlobalSymbol(self: *Zld, current: SymbolWithLoc) !void { + const gpa = self.gpa; + const sym = self.getSymbol(current); + const sym_name = self.getSymbolName(current); + + const gop = try self.getOrPutGlobalPtr(sym_name); + if (!gop.found_existing) { + gop.value_ptr.* = current; + if (sym.undf() and !sym.tentative()) { + try self.unresolved.putNoClobber(gpa, self.getGlobalIndex(sym_name).?, {}); + } + return; + } + const global_index = self.getGlobalIndex(sym_name).?; + const global = gop.value_ptr.*; + const global_sym = self.getSymbol(global); + + // Cases to consider: sym vs global_sym + // 1. strong(sym) and strong(global_sym) => error + // 2. strong(sym) and weak(global_sym) => sym + // 3. strong(sym) and tentative(global_sym) => sym + // 4. strong(sym) and undf(global_sym) => sym + // 5. weak(sym) and strong(global_sym) => global_sym + // 6. weak(sym) and tentative(global_sym) => sym + // 7. weak(sym) and undf(global_sym) => sym + // 8. tentative(sym) and strong(global_sym) => global_sym + // 9. tentative(sym) and weak(global_sym) => global_sym + // 10. tentative(sym) and tentative(global_sym) => pick larger + // 11. tentative(sym) and undf(global_sym) => sym + // 12. undf(sym) and * => global_sym + // + // Reduces to: + // 1. strong(sym) and strong(global_sym) => error + // 2. * and strong(global_sym) => global_sym + // 3. weak(sym) and weak(global_sym) => global_sym + // 4. tentative(sym) and tentative(global_sym) => pick larger + // 5. undf(sym) and * => global_sym + // 6. else => sym + + const sym_is_strong = sym.sect() and !(sym.weakDef() or sym.pext()); + const global_is_strong = global_sym.sect() and !(global_sym.weakDef() or global_sym.pext()); + const sym_is_weak = sym.sect() and (sym.weakDef() or sym.pext()); + const global_is_weak = global_sym.sect() and (global_sym.weakDef() or global_sym.pext()); + + if (sym_is_strong and global_is_strong) { + log.err("symbol '{s}' defined multiple times", .{sym_name}); + if (global.getFile()) |file| { + log.err(" first definition in '{s}'", .{self.objects.items[file].name}); + } + if (current.getFile()) |file| { + log.err(" next definition in '{s}'", .{self.objects.items[file].name}); + } + return error.MultipleSymbolDefinitions; + } + + if (current.getFile()) |file| { + const object = &self.objects.items[file]; + object.globals_lookup[current.sym_index] = global_index; + } + + if (global_is_strong) return; + if (sym_is_weak and global_is_weak) return; + if (sym.tentative() and global_sym.tentative()) { + if (global_sym.n_value >= sym.n_value) return; + } + if (sym.undf() and !sym.tentative()) return; + + if (global.getFile()) |file| { + const global_object = &self.objects.items[file]; + global_object.globals_lookup[global.sym_index] = global_index; + } + _ = self.unresolved.swapRemove(global_index); + + gop.value_ptr.* = current; + } + fn resolveSymbolsInObject(self: *Zld, object_id: u32) !void { const object = &self.objects.items[object_id]; const in_symtab = object.in_symtab orelse return; @@ -285,76 +361,7 @@ pub const Zld = struct { continue; } - const sym_loc = SymbolWithLoc{ .sym_index = sym_index, .file = object_id + 1 }; - - const gop = try self.getOrPutGlobalPtr(sym_name); - if (!gop.found_existing) { - gop.value_ptr.* = sym_loc; - if (sym.undf() and !sym.tentative()) { - try self.unresolved.putNoClobber(self.gpa, self.getGlobalIndex(sym_name).?, {}); - } - continue; - } - const global_index = self.getGlobalIndex(sym_name).?; - const global = gop.value_ptr; - const global_sym = self.getSymbol(global.*); - - // Cases to consider: sym vs global_sym - // 1. strong(sym) and strong(global_sym) => error - // 2. strong(sym) and weak(global_sym) => sym - // 3. strong(sym) and tentative(global_sym) => sym - // 4. strong(sym) and undf(global_sym) => sym - // 5. weak(sym) and strong(global_sym) => global_sym - // 6. weak(sym) and tentative(global_sym) => sym - // 7. weak(sym) and undf(global_sym) => sym - // 8. tentative(sym) and strong(global_sym) => global_sym - // 9. tentative(sym) and weak(global_sym) => global_sym - // 10. tentative(sym) and tentative(global_sym) => pick larger - // 11. tentative(sym) and undf(global_sym) => sym - // 12. undf(sym) and * => global_sym - // - // Reduces to: - // 1. strong(sym) and strong(global_sym) => error - // 2. * and strong(global_sym) => global_sym - // 3. weak(sym) and weak(global_sym) => global_sym - // 4. tentative(sym) and tentative(global_sym) => pick larger - // 5. undf(sym) and * => global_sym - // 6. else => sym - - const sym_is_strong = sym.sect() and !(sym.weakDef() or sym.pext()); - const global_is_strong = global_sym.sect() and !(global_sym.weakDef() or global_sym.pext()); - const sym_is_weak = sym.sect() and (sym.weakDef() or sym.pext()); - const global_is_weak = global_sym.sect() and (global_sym.weakDef() or global_sym.pext()); - - if (sym_is_strong and global_is_strong) { - log.err("symbol '{s}' defined multiple times", .{sym_name}); - if (global.getFile()) |file| { - log.err(" first definition in '{s}'", .{self.objects.items[file].name}); - } - log.err(" next definition in '{s}'", .{self.objects.items[object_id].name}); - return error.MultipleSymbolDefinitions; - } - - const update_global = blk: { - if (global_is_strong) break :blk false; - if (sym_is_weak and global_is_weak) break :blk false; - if (sym.tentative() and global_sym.tentative()) { - if (global_sym.n_value >= sym.n_value) break :blk false; - } - if (sym.undf() and !sym.tentative()) break :blk false; - break :blk true; - }; - - if (update_global) { - if (global.getFile()) |file| { - const global_object = &self.objects.items[file]; - global_object.globals_lookup[global.sym_index] = global_index; - } - _ = self.unresolved.swapRemove(global_index); - global.* = sym_loc; - } else { - object.globals_lookup[sym_index] = global_index; - } + try self.resolveGlobalSymbol(.{ .sym_index = sym_index, .file = object_id + 1 }); } } From 42e0850d78e63fcc602dd0e167ac90dfb3cfec02 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sun, 27 Aug 2023 07:31:29 +0200 Subject: [PATCH 29/57] macho: save indexes to all sections of interest --- src/link/MachO.zig | 13 ++++++--- src/link/MachO/Atom.zig | 50 ++++++++++++++++++++++++++++------- src/link/MachO/Object.zig | 18 ++++++++----- src/link/MachO/UnwindInfo.zig | 8 +++--- src/link/MachO/eh_frame.zig | 4 +-- src/link/MachO/zld.zig | 45 +++++++++++++++++++++++++------ 6 files changed, 105 insertions(+), 33 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index ac07e5c687..de723639f1 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -33,14 +33,19 @@ data_segment_cmd_index: ?u8 = null, linkedit_segment_cmd_index: ?u8 = null, text_section_index: ?u8 = null, +data_const_section_index: ?u8 = null, +data_section_index: ?u8 = null, +bss_section_index: ?u8 = null, +thread_vars_section_index: ?u8 = null, +thread_data_section_index: ?u8 = null, +thread_bss_section_index: ?u8 = null, +eh_frame_section_index: ?u8 = null, +unwind_info_section_index: ?u8 = null, stubs_section_index: ?u8 = null, stub_helper_section_index: ?u8 = null, got_section_index: ?u8 = null, -data_const_section_index: ?u8 = null, la_symbol_ptr_section_index: ?u8 = null, -data_section_index: ?u8 = null, -thread_vars_section_index: ?u8 = null, -thread_data_section_index: ?u8 = null, +tlv_ptr_section_index: ?u8 = null, locals: std.ArrayListUnmanaged(macho.nlist_64) = .{}, globals: std.ArrayListUnmanaged(SymbolWithLoc) = .{}, diff --git a/src/link/MachO/Atom.zig b/src/link/MachO/Atom.zig index 411c42c4dd..73099184e0 100644 --- a/src/link/MachO/Atom.zig +++ b/src/link/MachO/Atom.zig @@ -244,13 +244,16 @@ pub fn getOutputSection(zld: *Zld, sect: macho.section_64) !?u8 { .{}, ); } else if (mem.eql(u8, sectname, "__data")) { - break :blk zld.getSectionByName("__DATA", "__data") orelse try MachO.initSection( - gpa, - zld, - "__DATA", - "__data", - .{}, - ); + if (zld.data_section_index == null) { + zld.data_section_index = try MachO.initSection( + gpa, + zld, + "__DATA", + "__data", + .{}, + ); + } + break :blk zld.data_section_index.?; } } break :blk zld.getSectionByName(segname, sectname) orelse try MachO.initSection( @@ -264,6 +267,35 @@ pub fn getOutputSection(zld: *Zld, sect: macho.section_64) !?u8 { else => break :blk null, } }; + + // TODO we can do this directly in the selection logic above. + // Or is it not worth it? + if (zld.data_const_section_index == null) { + if (zld.getSectionByName("__DATA_CONST", "__const")) |index| { + zld.data_const_section_index = index; + } + } + if (zld.thread_vars_section_index == null) { + if (zld.getSectionByName("__DATA", "__thread_vars")) |index| { + zld.thread_vars_section_index = index; + } + } + if (zld.thread_data_section_index == null) { + if (zld.getSectionByName("__DATA", "__thread_data")) |index| { + zld.thread_data_section_index = index; + } + } + if (zld.thread_bss_section_index == null) { + if (zld.getSectionByName("__DATA", "__thread_bss")) |index| { + zld.thread_bss_section_index = index; + } + } + if (zld.bss_section_index == null) { + if (zld.getSectionByName("__DATA", "__bss")) |index| { + zld.bss_section_index = index; + } + } + return res; } @@ -662,9 +694,9 @@ pub fn getRelocTargetAddress(zld: *Zld, target: SymbolWithLoc, is_tlv: bool) !u6 // * wrt to __thread_data if defined, then // * wrt to __thread_bss const sect_id: u16 = sect_id: { - if (zld.getSectionByName("__DATA", "__thread_data")) |i| { + if (zld.thread_data_section_index) |i| { break :sect_id i; - } else if (zld.getSectionByName("__DATA", "__thread_bss")) |i| { + } else if (zld.thread_bss_section_index) |i| { break :sect_id i; } else { log.err("threadlocal variables present but no initializer sections found", .{}); diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index 5042fe9849..3ab62ec191 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -687,8 +687,8 @@ fn parseEhFrameSection(self: *Object, zld: *Zld, object_id: u32) !void { const gpa = zld.gpa; - if (zld.getSectionByName("__TEXT", "__eh_frame") == null) { - _ = try MachO.initSection(gpa, zld, "__TEXT", "__eh_frame", .{}); + if (zld.eh_frame_section_index == null) { + zld.eh_frame_section_index = try MachO.initSection(gpa, zld, "__TEXT", "__eh_frame", .{}); } const cpu_arch = zld.options.target.cpu.arch; @@ -788,8 +788,14 @@ fn parseUnwindInfo(self: *Object, zld: *Zld, object_id: u32) !void { // approach. However, we will only synthesise DWARF records and nothing more. For this reason, // we still create the output `__TEXT,__unwind_info` section. if (self.hasEhFrameRecords()) { - if (zld.getSectionByName("__TEXT", "__unwind_info") == null) { - _ = try MachO.initSection(gpa, zld, "__TEXT", "__unwind_info", .{}); + if (zld.unwind_info_section_index == null) { + zld.unwind_info_section_index = try MachO.initSection( + gpa, + zld, + "__TEXT", + "__unwind_info", + .{}, + ); } } return; @@ -797,8 +803,8 @@ fn parseUnwindInfo(self: *Object, zld: *Zld, object_id: u32) !void { log.debug("parsing unwind info in {s}", .{self.name}); - if (zld.getSectionByName("__TEXT", "__unwind_info") == null) { - _ = try MachO.initSection(gpa, zld, "__TEXT", "__unwind_info", .{}); + if (zld.unwind_info_section_index == null) { + zld.unwind_info_section_index = try MachO.initSection(gpa, zld, "__TEXT", "__unwind_info", .{}); } const unwind_records = self.getUnwindRecords(); diff --git a/src/link/MachO/UnwindInfo.zig b/src/link/MachO/UnwindInfo.zig index 53d7c149be..3cd72fd64e 100644 --- a/src/link/MachO/UnwindInfo.zig +++ b/src/link/MachO/UnwindInfo.zig @@ -204,7 +204,7 @@ pub fn deinit(info: *UnwindInfo) void { } pub fn scanRelocs(zld: *Zld) !void { - if (zld.getSectionByName("__TEXT", "__unwind_info") == null) return; + if (zld.unwind_info_section_index == null) return; const cpu_arch = zld.options.target.cpu.arch; for (zld.objects.items, 0..) |*object, object_id| { @@ -233,7 +233,7 @@ pub fn scanRelocs(zld: *Zld) !void { } pub fn collect(info: *UnwindInfo, zld: *Zld) !void { - if (zld.getSectionByName("__TEXT", "__unwind_info") == null) return; + if (zld.unwind_info_section_index == null) return; const cpu_arch = zld.options.target.cpu.arch; @@ -551,7 +551,7 @@ fn collectPersonalityFromDwarf( } pub fn calcSectionSize(info: UnwindInfo, zld: *Zld) !void { - const sect_id = zld.getSectionByName("__TEXT", "__unwind_info") orelse return; + const sect_id = zld.unwind_info_section_index orelse return; const sect = &zld.sections.items(.header)[sect_id]; sect.@"align" = 2; sect.size = info.calcRequiredSize(); @@ -570,7 +570,7 @@ fn calcRequiredSize(info: UnwindInfo) usize { } pub fn write(info: *UnwindInfo, zld: *Zld) !void { - const sect_id = zld.getSectionByName("__TEXT", "__unwind_info") orelse return; + const sect_id = zld.unwind_info_section_index orelse return; const sect = &zld.sections.items(.header)[sect_id]; const seg_id = zld.sections.items(.segment_index)[sect_id]; const seg = zld.segments.items[seg_id]; diff --git a/src/link/MachO/eh_frame.zig b/src/link/MachO/eh_frame.zig index 5d267af5ff..2bcf23bff5 100644 --- a/src/link/MachO/eh_frame.zig +++ b/src/link/MachO/eh_frame.zig @@ -46,7 +46,7 @@ pub fn scanRelocs(zld: *Zld) !void { } pub fn calcSectionSize(zld: *Zld, unwind_info: *const UnwindInfo) !void { - const sect_id = zld.getSectionByName("__TEXT", "__eh_frame") orelse return; + const sect_id = zld.eh_frame_section_index orelse return; const sect = &zld.sections.items(.header)[sect_id]; sect.@"align" = 3; sect.size = 0; @@ -97,7 +97,7 @@ pub fn calcSectionSize(zld: *Zld, unwind_info: *const UnwindInfo) !void { } pub fn write(zld: *Zld, unwind_info: *UnwindInfo) !void { - const sect_id = zld.getSectionByName("__TEXT", "__eh_frame") orelse return; + const sect_id = zld.eh_frame_section_index orelse return; const sect = zld.sections.items(.header)[sect_id]; const seg_id = zld.sections.items(.segment_index)[sect_id]; const seg = zld.segments.items[seg_id]; diff --git a/src/link/MachO/zld.zig b/src/link/MachO/zld.zig index c607c80f45..28a8e9b8a8 100644 --- a/src/link/MachO/zld.zig +++ b/src/link/MachO/zld.zig @@ -74,6 +74,14 @@ pub const Zld = struct { linkedit_segment_cmd_index: ?u8 = null, text_section_index: ?u8 = null, + data_const_section_index: ?u8 = null, + data_section_index: ?u8 = null, + bss_section_index: ?u8 = null, + thread_vars_section_index: ?u8 = null, + thread_data_section_index: ?u8 = null, + thread_bss_section_index: ?u8 = null, + eh_frame_section_index: ?u8 = null, + unwind_info_section_index: ?u8 = null, got_section_index: ?u8 = null, tlv_ptr_section_index: ?u8 = null, stubs_section_index: ?u8 = null, @@ -142,9 +150,10 @@ pub const Zld = struct { const sym = self.getSymbolPtr(.{ .sym_index = sym_index }); sym.n_type = macho.N_SECT; - const sect_id = self.getSectionByName("__DATA", "__data") orelse - try MachO.initSection(self.gpa, self, "__DATA", "__data", .{}); - sym.n_sect = sect_id + 1; + if (self.data_section_index == null) { + self.data_section_index = try MachO.initSection(self.gpa, self, "__DATA", "__data", .{}); + } + sym.n_sect = self.data_section_index.? + 1; self.dyld_private_atom_index = atom_index; self.addAtomToSection(atom_index); @@ -166,13 +175,17 @@ pub const Zld = struct { // text blocks for each tentative definition. const size = sym.n_value; const alignment = (sym.n_desc >> 8) & 0x0f; - const sect_id = self.getSectionByName("__DATA", "__bss") orelse - try MachO.initSection(gpa, self, "__DATA", "__bss", .{ .flags = macho.S_ZEROFILL }); + + if (self.bss_section_index == null) { + self.bss_section_index = try MachO.initSection(gpa, self, "__DATA", "__bss", .{ + .flags = macho.S_ZEROFILL, + }); + } sym.* = .{ .n_strx = sym.n_strx, .n_type = macho.N_SECT | macho.N_EXT, - .n_sect = sect_id + 1, + .n_sect = self.bss_section_index.? + 1, .n_desc = 0, .n_value = 0, }; @@ -768,7 +781,7 @@ pub const Zld = struct { const atom_index = self.dyld_private_atom_index orelse return; const atom = self.getAtom(atom_index); const sym = self.getSymbol(atom.getSymbolWithLoc()); - const sect_id = self.getSectionByName("__DATA", "__data").?; + const sect_id = self.data_section_index.?; const header = self.sections.items(.header)[sect_id]; const offset = sym.n_value - header.addr + header.offset; log.debug("writing __dyld_private at offset 0x{x}", .{offset}); @@ -918,6 +931,14 @@ pub const Zld = struct { }); for (&[_]*?u8{ &self.text_section_index, + &self.data_const_section_index, + &self.data_section_index, + &self.bss_section_index, + &self.thread_vars_section_index, + &self.thread_data_section_index, + &self.thread_bss_section_index, + &self.eh_frame_section_index, + &self.unwind_info_section_index, &self.got_section_index, &self.tlv_ptr_section_index, &self.stubs_section_index, @@ -951,6 +972,14 @@ pub const Zld = struct { for (&[_]*?u8{ &self.text_section_index, + &self.data_const_section_index, + &self.data_section_index, + &self.bss_section_index, + &self.thread_vars_section_index, + &self.thread_data_section_index, + &self.thread_bss_section_index, + &self.eh_frame_section_index, + &self.unwind_info_section_index, &self.got_section_index, &self.tlv_ptr_section_index, &self.stubs_section_index, @@ -1964,7 +1993,7 @@ pub const Zld = struct { else => unreachable, } - if (self.getSectionByName("__DATA", "__thread_vars")) |sect_id| { + if (self.thread_vars_section_index) |sect_id| { header.flags |= macho.MH_HAS_TLV_DESCRIPTORS; if (self.sections.items(.header)[sect_id].size > 0) { header.flags |= macho.MH_HAS_TLV_DESCRIPTORS; From 2c68fb3d7ce077fba711747ee7b05b2fa0df6bcc Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sun, 27 Aug 2023 09:55:24 +0200 Subject: [PATCH 30/57] macho: merge Zld state with MachO state --- src/link/MachO.zig | 1391 +++++++++-- src/link/MachO/Archive.zig | 24 +- src/link/MachO/Atom.zig | 394 ++- src/link/MachO/CodeSignature.zig | 364 +-- src/link/MachO/DebugSymbols.zig | 46 +- src/link/MachO/DwarfInfo.zig | 28 +- src/link/MachO/Dylib.zig | 40 +- src/link/MachO/Object.zig | 155 +- src/link/MachO/Trie.zig | 484 ++-- src/link/MachO/UnwindInfo.zig | 140 +- src/link/MachO/dead_strip.zig | 251 +- src/link/MachO/dyld_info/Rebase.zig | 22 +- src/link/MachO/dyld_info/bind.zig | 18 +- src/link/MachO/eh_frame.zig | 127 +- src/link/MachO/fat.zig | 12 +- src/link/MachO/hasher.zig | 18 +- src/link/MachO/load_commands.zig | 20 +- src/link/MachO/stubs.zig | 10 +- src/link/MachO/thunks.zig | 169 +- src/link/MachO/uuid.zig | 18 +- src/link/MachO/zld.zig | 3426 ++++++--------------------- 21 files changed, 3040 insertions(+), 4117 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index de723639f1..ac7b4af988 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -405,9 +405,7 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No const in_file = try std.fs.cwd().openFile(path, .{}); defer in_file.close(); - parseLibrary( - self, - self.base.allocator, + self.parseLibrary( in_file, path, lib, @@ -421,24 +419,15 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No }; } - parseDependentLibs(self, self.base.allocator, &dependent_libs, &self.base.options) catch |err| { + self.parseDependentLibs(&dependent_libs, &self.base.options) catch |err| { // TODO convert to error log.err("parsing dependent libraries failed with err {s}", .{@errorName(err)}); }; } - if (self.dyld_stub_binder_index == null) { - self.dyld_stub_binder_index = try self.addUndefined("dyld_stub_binder", .add_got); - } - if (!self.base.options.single_threaded) { - _ = try self.addUndefined("__tlv_bootstrap", .none); - } - - try self.createMhExecuteHeaderSymbol(); - var actions = std.ArrayList(ResolveAction).init(self.base.allocator); defer actions.deinit(); - try self.resolveSymbolsInDylibs(&actions); + try self.resolveSymbols(&actions); if (self.getEntryPoint() == null) { self.error_flags.no_entry_point_found = true; @@ -527,14 +516,7 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No try self.writeLinkeditSegmentData(); - const target = self.base.options.target; - const requires_codesig = blk: { - if (self.base.options.entitlements) |_| break :blk true; - if (target.cpu.arch == .aarch64 and (target.os.tag == .macos or target.abi == .simulator)) - break :blk true; - break :blk false; - }; - var codesig: ?CodeSignature = if (requires_codesig) blk: { + var codesig: ?CodeSignature = if (self.requiresCodeSignature()) blk: { // Preallocate space for the code signature. // We need to do this at this stage so that we have the load commands with proper values // written out to the file. @@ -596,14 +578,14 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No try load_commands.writeLoadDylibLCs(self.dylibs.items, self.referenced_dylibs.keys(), lc_writer); - if (requires_codesig) { + if (codesig != null) { try lc_writer.writeStruct(self.codesig_cmd); } const ncmds = load_commands.calcNumOfLCs(lc_buffer.items); try self.base.file.?.pwriteAll(lc_buffer.items, @sizeOf(macho.mach_header_64)); try self.writeHeader(ncmds, @as(u32, @intCast(lc_buffer.items.len))); - try self.writeUuid(comp, uuid_cmd_offset, requires_codesig); + try self.writeUuid(comp, uuid_cmd_offset, codesig != null); if (codesig) |*csig| { try self.writeCodeSignature(comp, csig); // code signing always comes last @@ -729,8 +711,7 @@ fn resolveLib( } pub fn parsePositional( - ctx: anytype, - gpa: Allocator, + self: *MachO, file: std.fs.File, path: []const u8, must_link: bool, @@ -741,9 +722,9 @@ pub fn parsePositional( defer tracy.end(); if (Object.isObject(file)) { - try parseObject(ctx, gpa, file, path, link_options); + try self.parseObject(file, path, link_options); } else { - try parseLibrary(ctx, gpa, file, path, .{ + try self.parseLibrary(file, path, .{ .path = null, .needed = false, .weak = false, @@ -752,8 +733,7 @@ pub fn parsePositional( } fn parseObject( - ctx: anytype, - gpa: Allocator, + self: *MachO, file: std.fs.File, path: []const u8, link_options: *const link.Options, @@ -761,6 +741,7 @@ fn parseObject( const tracy = trace(@src()); defer tracy.end(); + const gpa = self.base.allocator; const mtime: u64 = mtime: { const stat = file.stat() catch break :mtime 0; break :mtime @as(u64, @intCast(@divFloor(stat.mtime, 1_000_000_000))); @@ -776,7 +757,7 @@ fn parseObject( }; errdefer object.deinit(gpa); try object.parse(gpa); - try ctx.objects.append(gpa, object); + try self.objects.append(gpa, object); const cpu_arch: std.Target.Cpu.Arch = switch (object.header.cputype) { macho.CPU_TYPE_ARM64 => .aarch64, @@ -796,8 +777,7 @@ fn parseObject( } pub fn parseLibrary( - ctx: anytype, - gpa: Allocator, + self: *MachO, file: std.fs.File, path: []const u8, lib: link.SystemLib, @@ -811,16 +791,16 @@ pub fn parseLibrary( const cpu_arch = link_options.target.cpu.arch; if (fat.isFatLibrary(file)) { - const offset = parseFatLibrary(ctx, file, path, cpu_arch) catch |err| switch (err) { + const offset = self.parseFatLibrary(file, path, cpu_arch) catch |err| switch (err) { error.MissingArch => return, else => |e| return e, }; try file.seekTo(offset); if (Archive.isArchive(file, offset)) { - try parseArchive(ctx, gpa, path, offset, must_link, cpu_arch); + try self.parseArchive(path, offset, must_link, cpu_arch); } else if (Dylib.isDylib(file, offset)) { - try parseDylib(ctx, gpa, file, path, offset, dependent_libs, link_options, .{ + try self.parseDylib(file, path, offset, dependent_libs, link_options, .{ .needed = lib.needed, .weak = lib.weak, }); @@ -830,14 +810,14 @@ pub fn parseLibrary( return; } } else if (Archive.isArchive(file, 0)) { - try parseArchive(ctx, gpa, path, 0, must_link, cpu_arch); + try self.parseArchive(path, 0, must_link, cpu_arch); } else if (Dylib.isDylib(file, 0)) { - try parseDylib(ctx, gpa, file, path, 0, dependent_libs, link_options, .{ + try self.parseDylib(file, path, 0, dependent_libs, link_options, .{ .needed = lib.needed, .weak = lib.weak, }); } else { - parseLibStub(ctx, gpa, file, path, dependent_libs, link_options, .{ + self.parseLibStub(file, path, dependent_libs, link_options, .{ .needed = lib.needed, .weak = lib.weak, }) catch |err| switch (err) { @@ -852,12 +832,12 @@ pub fn parseLibrary( } pub fn parseFatLibrary( - ctx: anytype, + self: *MachO, file: std.fs.File, path: []const u8, cpu_arch: std.Target.Cpu.Arch, ) !u64 { - _ = ctx; + _ = self; var buffer: [2]fat.Arch = undefined; const fat_archs = try fat.parseArchs(file, &buffer); const offset = for (fat_archs) |arch| { @@ -871,13 +851,13 @@ pub fn parseFatLibrary( } fn parseArchive( - ctx: anytype, - gpa: Allocator, + self: *MachO, path: []const u8, fat_offset: u64, must_link: bool, cpu_arch: std.Target.Cpu.Arch, ) !void { + const gpa = self.base.allocator; // We take ownership of the file so that we can store it for the duration of symbol resolution. // TODO we shouldn't need to do that and could pre-parse the archive like we do for zld/ELF? @@ -929,10 +909,10 @@ fn parseArchive( } for (offsets.keys()) |off| { const object = try archive.parseObject(gpa, off); - try ctx.objects.append(gpa, object); + try self.objects.append(gpa, object); } } else { - try ctx.archives.append(gpa, archive); + try self.archives.append(gpa, archive); } } @@ -944,8 +924,7 @@ const DylibOpts = struct { }; fn parseDylib( - ctx: anytype, - gpa: Allocator, + self: *MachO, file: std.fs.File, path: []const u8, offset: u64, @@ -953,6 +932,7 @@ fn parseDylib( link_options: *const link.Options, dylib_options: DylibOpts, ) !void { + const gpa = self.base.allocator; const self_cpu_arch = link_options.target.cpu.arch; const file_stat = try file.stat(); @@ -968,7 +948,7 @@ fn parseDylib( try dylib.parseFromBinary( gpa, - @intCast(ctx.dylibs.items.len), // TODO defer it till later + @intCast(self.dylibs.items.len), // TODO defer it till later dependent_libs, path, contents, @@ -991,7 +971,7 @@ fn parseDylib( // TODO verify platform - addDylib(ctx, gpa, dylib, link_options, .{ + self.addDylib(dylib, link_options, .{ .needed = dylib_options.needed, .weak = dylib_options.weak, }) catch |err| switch (err) { @@ -1001,14 +981,14 @@ fn parseDylib( } fn parseLibStub( - ctx: anytype, - gpa: Allocator, + self: *MachO, file: std.fs.File, path: []const u8, dependent_libs: anytype, link_options: *const link.Options, dylib_options: DylibOpts, ) !void { + const gpa = self.base.allocator; var lib_stub = try LibStub.loadFromFile(gpa, file); defer lib_stub.deinit(); @@ -1023,12 +1003,12 @@ fn parseLibStub( gpa, link_options.target, lib_stub, - @intCast(ctx.dylibs.items.len), // TODO defer it till later + @intCast(self.dylibs.items.len), // TODO defer it till later dependent_libs, path, ); - addDylib(ctx, gpa, dylib, link_options, .{ + self.addDylib(dylib, link_options, .{ .needed = dylib_options.needed, .weak = dylib_options.weak, }) catch |err| switch (err) { @@ -1038,8 +1018,7 @@ fn parseLibStub( } fn addDylib( - ctx: anytype, - gpa: Allocator, + self: *MachO, dylib: Dylib, link_options: *const link.Options, dylib_options: DylibOpts, @@ -1055,28 +1034,24 @@ fn addDylib( } } - const gop = try ctx.dylibs_map.getOrPut(gpa, dylib.id.?.name); + const gpa = self.base.allocator; + const gop = try self.dylibs_map.getOrPut(gpa, dylib.id.?.name); if (gop.found_existing) return error.DylibAlreadyExists; - gop.value_ptr.* = @as(u16, @intCast(ctx.dylibs.items.len)); - try ctx.dylibs.append(gpa, dylib); + gop.value_ptr.* = @as(u16, @intCast(self.dylibs.items.len)); + try self.dylibs.append(gpa, dylib); const should_link_dylib_even_if_unreachable = blk: { if (link_options.dead_strip_dylibs and !dylib_options.needed) break :blk false; - break :blk !(dylib_options.dependent or ctx.referenced_dylibs.contains(gop.value_ptr.*)); + break :blk !(dylib_options.dependent or self.referenced_dylibs.contains(gop.value_ptr.*)); }; if (should_link_dylib_even_if_unreachable) { - try ctx.referenced_dylibs.putNoClobber(gpa, gop.value_ptr.*, {}); + try self.referenced_dylibs.putNoClobber(gpa, gop.value_ptr.*, {}); } } -pub fn parseDependentLibs( - ctx: anytype, - gpa: Allocator, - dependent_libs: anytype, - link_options: *const link.Options, -) !void { +pub fn parseDependentLibs(self: *MachO, dependent_libs: anytype, link_options: *const link.Options) !void { const tracy = trace(@src()); defer tracy.end(); @@ -1085,6 +1060,7 @@ pub fn parseDependentLibs( // 2) afterwards, we parse dependents of the included dylibs // TODO this should not be performed if the user specifies `-flat_namespace` flag. // See ld64 manpages. + const gpa = self.base.allocator; var arena_alloc = std.heap.ArenaAllocator.init(gpa); const arena = arena_alloc.allocator(); defer arena_alloc.deinit(); @@ -1092,9 +1068,9 @@ pub fn parseDependentLibs( outer: while (dependent_libs.readItem()) |dep_id| { defer dep_id.id.deinit(gpa); - if (ctx.dylibs_map.contains(dep_id.id.name)) continue; + if (self.dylibs_map.contains(dep_id.id.name)) continue; - const weak = ctx.dylibs.items[dep_id.parent].weak; + const weak = self.dylibs.items[dep_id.parent].weak; const has_ext = blk: { const basename = fs.path.basename(dep_id.id.name); break :blk mem.lastIndexOfScalar(u8, basename, '.') != null; @@ -1121,7 +1097,7 @@ pub fn parseDependentLibs( log.debug("trying dependency at fully resolved path {s}", .{full_path}); const offset: u64 = if (fat.isFatLibrary(file)) blk: { - const offset = parseFatLibrary(ctx, file, full_path, link_options.target.cpu.arch) catch |err| switch (err) { + const offset = self.parseFatLibrary(file, full_path, link_options.target.cpu.arch) catch |err| switch (err) { error.MissingArch => break, else => |e| return e, }; @@ -1130,12 +1106,12 @@ pub fn parseDependentLibs( } else 0; if (Dylib.isDylib(file, offset)) { - try parseDylib(ctx, gpa, file, full_path, offset, dependent_libs, link_options, .{ + try self.parseDylib(file, full_path, offset, dependent_libs, link_options, .{ .dependent = true, .weak = weak, }); } else { - parseLibStub(ctx, gpa, file, full_path, dependent_libs, link_options, .{ + self.parseLibStub(file, full_path, dependent_libs, link_options, .{ .dependent = true, .weak = weak, }) catch |err| switch (err) { @@ -1394,7 +1370,7 @@ fn markRelocsDirtyByAddress(self: *MachO, addr: u64) void { } } -pub fn allocateSpecialSymbols(self: anytype) !void { +pub fn allocateSpecialSymbols(self: *MachO) !void { for (&[_][]const u8{ "___dso_handle", "__mh_execute_header", @@ -1432,24 +1408,82 @@ pub fn createAtom(self: *MachO, sym_index: u32, opts: CreateAtomOpts) !Atom.Inde return index; } +pub fn createTentativeDefAtoms(self: *MachO) !void { + const gpa = self.base.allocator; + + for (self.globals.items) |global| { + const sym = self.getSymbolPtr(global); + if (!sym.tentative()) continue; + if (sym.n_desc == N_DEAD) continue; + + log.debug("creating tentative definition for ATOM(%{d}, '{s}') in object({?})", .{ + global.sym_index, self.getSymbolName(global), global.file, + }); + + // Convert any tentative definition into a regular symbol and allocate + // text blocks for each tentative definition. + const size = sym.n_value; + const alignment = (sym.n_desc >> 8) & 0x0f; + + if (self.bss_section_index == null) { + self.bss_section_index = try self.initSection("__DATA", "__bss", .{ + .flags = macho.S_ZEROFILL, + }); + } + + sym.* = .{ + .n_strx = sym.n_strx, + .n_type = macho.N_SECT | macho.N_EXT, + .n_sect = self.bss_section_index.? + 1, + .n_desc = 0, + .n_value = 0, + }; + + const atom_index = try self.createAtom(global.sym_index, .{ + .size = size, + .alignment = alignment, + }); + const atom = self.getAtomPtr(atom_index); + atom.file = global.file; + + self.addAtomToSection(atom_index); + + assert(global.getFile() != null); + const object = &self.objects.items[global.getFile().?]; + try object.atoms.append(gpa, atom_index); + object.atom_by_index_table[global.sym_index] = atom_index; + } +} + fn createDyldPrivateAtom(self: *MachO) !void { if (self.dyld_private_atom_index != null) return; const sym_index = try self.allocateSymbol(); - const atom_index = try self.createAtom(sym_index, .{}); + const atom_index = try self.createAtom(sym_index, .{ + .size = @sizeOf(u64), + .alignment = 3, + }); try self.atom_by_index_table.putNoClobber(self.base.allocator, sym_index, atom_index); - const atom = self.getAtomPtr(atom_index); - atom.size = @sizeOf(u64); + if (self.data_section_index == null) { + self.data_section_index = try self.initSection("__DATA", "__data", .{}); + } + + const atom = self.getAtom(atom_index); const sym = atom.getSymbolPtr(self); sym.n_type = macho.N_SECT; sym.n_sect = self.data_section_index.? + 1; self.dyld_private_atom_index = atom_index; - sym.n_value = try self.allocateAtom(atom_index, atom.size, @alignOf(u64)); - log.debug("allocated dyld_private atom at 0x{x}", .{sym.n_value}); - var buffer: [@sizeOf(u64)]u8 = [_]u8{0} ** @sizeOf(u64); - try self.writeAtom(atom_index, &buffer); + switch (self.mode) { + .zld => self.addAtomToSection(atom_index), + .incremental => { + sym.n_value = try self.allocateAtom(atom_index, atom.size, @alignOf(u64)); + log.debug("allocated dyld_private atom at 0x{x}", .{sym.n_value}); + var buffer: [@sizeOf(u64)]u8 = [_]u8{0} ** @sizeOf(u64); + try self.writeAtom(atom_index, &buffer); + }, + } } fn createThreadLocalDescriptorAtom(self: *MachO, sym_name: []const u8, target: SymbolWithLoc) !Atom.Index { @@ -1485,7 +1519,7 @@ fn createThreadLocalDescriptorAtom(self: *MachO, sym_name: []const u8, target: S return atom_index; } -fn createMhExecuteHeaderSymbol(self: *MachO) !void { +pub fn createMhExecuteHeaderSymbol(self: *MachO) !void { if (self.base.options.output_mode != .Exe) return; const gpa = self.base.allocator; @@ -1501,10 +1535,17 @@ fn createMhExecuteHeaderSymbol(self: *MachO) !void { }; const gop = try self.getOrPutGlobalPtr("__mh_execute_header"); + if (gop.found_existing) { + const global = gop.value_ptr.*; + if (global.getFile()) |file| { + const global_object = &self.objects.items[file]; + global_object.globals_lookup[global.sym_index] = self.getGlobalIndex("__mh_execute_header").?; + } + } gop.value_ptr.* = sym_loc; } -fn createDsoHandleSymbol(self: *MachO) !void { +pub fn createDsoHandleSymbol(self: *MachO) !void { const global = self.getGlobalPtr("___dso_handle") orelse return; if (!self.getSymbol(global.*).undf()) return; @@ -1519,10 +1560,51 @@ fn createDsoHandleSymbol(self: *MachO) !void { .n_desc = macho.N_WEAK_DEF, .n_value = 0, }; + const global_index = self.getGlobalIndex("___dso_handle").?; + if (global.getFile()) |file| { + const global_object = &self.objects.items[file]; + global_object.globals_lookup[global.sym_index] = global_index; + } global.* = sym_loc; _ = self.unresolved.swapRemove(self.getGlobalIndex("___dso_handle").?); } +pub fn resolveSymbols(self: *MachO, actions: *std.ArrayList(ResolveAction)) !void { + // We add the specified entrypoint as the first unresolved symbols so that + // we search for it in libraries should there be no object files specified + // on the linker line. + if (self.base.options.output_mode == .Exe) { + const entry_name = self.base.options.entry orelse load_commands.default_entry_point; + _ = try self.addUndefined(entry_name, .none); + } + + // Force resolution of any symbols requested by the user. + for (self.base.options.force_undefined_symbols.keys()) |sym_name| { + _ = try self.addUndefined(sym_name, .none); + } + + for (self.objects.items, 0..) |_, object_id| { + try self.resolveSymbolsInObject(@as(u32, @intCast(object_id))); + } + + try self.resolveSymbolsInArchives(); + + // Finally, force resolution of dyld_stub_binder if there are imports + // requested. + if (self.unresolved.count() > 0 and self.dyld_stub_binder_index == null) { + self.dyld_stub_binder_index = try self.addUndefined("dyld_stub_binder", .add_got); + } + if (!self.base.options.single_threaded and self.mode == .incremental) { + _ = try self.addUndefined("__tlv_bootstrap", .none); + } + + try self.resolveSymbolsInDylibs(actions); + + try self.createMhExecuteHeaderSymbol(); + try self.createDsoHandleSymbol(); + try self.resolveSymbolsAtLoading(); +} + fn resolveGlobalSymbol(self: *MachO, current: SymbolWithLoc) !void { const gpa = self.base.allocator; const sym = self.getSymbol(current); @@ -1536,6 +1618,7 @@ fn resolveGlobalSymbol(self: *MachO, current: SymbolWithLoc) !void { } return; } + const global_index = self.getGlobalIndex(sym_name).?; const global = gop.value_ptr.*; const global_sym = self.getSymbol(global); @@ -1566,7 +1649,22 @@ fn resolveGlobalSymbol(self: *MachO, current: SymbolWithLoc) !void { const sym_is_weak = sym.sect() and (sym.weakDef() or sym.pext()); const global_is_weak = global_sym.sect() and (global_sym.weakDef() or global_sym.pext()); - if (sym_is_strong and global_is_strong) return error.MultipleSymbolDefinitions; + if (sym_is_strong and global_is_strong) { + log.err("symbol '{s}' defined multiple times", .{sym_name}); + if (global.getFile()) |file| { + log.err(" first definition in '{s}'", .{self.objects.items[file].name}); + } + if (current.getFile()) |file| { + log.err(" next definition in '{s}'", .{self.objects.items[file].name}); + } + return error.MultipleSymbolDefinitions; + } + + if (current.getFile()) |file| { + const object = &self.objects.items[file]; + object.globals_lookup[current.sym_index] = global_index; + } + if (global_is_strong) return; if (sym_is_weak and global_is_weak) return; if (sym.tentative() and global_sym.tentative()) { @@ -1574,11 +1672,88 @@ fn resolveGlobalSymbol(self: *MachO, current: SymbolWithLoc) !void { } if (sym.undf() and !sym.tentative()) return; - _ = self.unresolved.swapRemove(self.getGlobalIndex(sym_name).?); + if (global.getFile()) |file| { + const global_object = &self.objects.items[file]; + global_object.globals_lookup[global.sym_index] = global_index; + } + _ = self.unresolved.swapRemove(global_index); gop.value_ptr.* = current; } +fn resolveSymbolsInObject(self: *MachO, object_id: u32) !void { + const object = &self.objects.items[object_id]; + const in_symtab = object.in_symtab orelse return; + + log.debug("resolving symbols in '{s}'", .{object.name}); + + var sym_index: u32 = 0; + while (sym_index < in_symtab.len) : (sym_index += 1) { + const sym = &object.symtab[sym_index]; + const sym_name = object.getSymbolName(sym_index); + + if (sym.stab()) { + log.err("unhandled symbol type: stab", .{}); + log.err(" symbol '{s}'", .{sym_name}); + log.err(" first definition in '{s}'", .{object.name}); + return error.UnhandledSymbolType; + } + + if (sym.indr()) { + log.err("unhandled symbol type: indirect", .{}); + log.err(" symbol '{s}'", .{sym_name}); + log.err(" first definition in '{s}'", .{object.name}); + return error.UnhandledSymbolType; + } + + if (sym.abs()) { + log.err("unhandled symbol type: absolute", .{}); + log.err(" symbol '{s}'", .{sym_name}); + log.err(" first definition in '{s}'", .{object.name}); + return error.UnhandledSymbolType; + } + + if (sym.sect() and !sym.ext()) { + log.debug("symbol '{s}' local to object {s}; skipping...", .{ + sym_name, + object.name, + }); + continue; + } + + try self.resolveGlobalSymbol(.{ .sym_index = sym_index, .file = object_id + 1 }); + } +} + +fn resolveSymbolsInArchives(self: *MachO) !void { + if (self.archives.items.len == 0) return; + + const gpa = self.base.allocator; + var next_sym: usize = 0; + loop: while (next_sym < self.unresolved.count()) { + const global = self.globals.items[self.unresolved.keys()[next_sym]]; + const sym_name = self.getSymbolName(global); + + for (self.archives.items) |archive| { + // Check if the entry exists in a static archive. + const offsets = archive.toc.get(sym_name) orelse { + // No hit. + continue; + }; + assert(offsets.items.len > 0); + + const object_id = @as(u16, @intCast(self.objects.items.len)); + const object = try archive.parseObject(gpa, offsets.items[0]); + try self.objects.append(gpa, object); + try self.resolveSymbolsInObject(object_id); + + continue :loop; + } + + next_sym += 1; + } +} + fn resolveSymbolsInDylibs(self: *MachO, actions: *std.ArrayList(ResolveAction)) !void { if (self.dylibs.items.len == 0) return; @@ -1608,6 +1783,7 @@ fn resolveSymbolsInDylibs(self: *MachO, actions: *std.ArrayList(ResolveAction)) if (self.unresolved.fetchSwapRemove(global_index)) |entry| blk: { if (!sym.undf()) break :blk; + if (self.mode == .zld) break :blk; try actions.append(.{ .kind = entry.value, .target = global }); } @@ -1618,6 +1794,42 @@ fn resolveSymbolsInDylibs(self: *MachO, actions: *std.ArrayList(ResolveAction)) } } +fn resolveSymbolsAtLoading(self: *MachO) !void { + const is_lib = self.base.options.output_mode == .Lib; + const is_dyn_lib = self.base.options.link_mode == .Dynamic and is_lib; + const allow_undef = is_dyn_lib and (self.base.options.allow_shlib_undefined orelse false); + + var next_sym: usize = 0; + while (next_sym < self.unresolved.count()) { + const global_index = self.unresolved.keys()[next_sym]; + const global = self.globals.items[global_index]; + const sym = self.getSymbolPtr(global); + + if (sym.discarded()) { + sym.* = .{ + .n_strx = 0, + .n_type = macho.N_UNDF, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }; + _ = self.unresolved.swapRemove(global_index); + continue; + } else if (allow_undef) { + const n_desc = @as( + u16, + @bitCast(macho.BIND_SPECIAL_DYLIB_FLAT_LOOKUP * @as(i16, @intCast(macho.N_SYMBOL_RESOLVER))), + ); + sym.n_type = macho.N_EXT; + sym.n_desc = n_desc; + _ = self.unresolved.swapRemove(global_index); + continue; + } + + next_sym += 1; + } +} + pub fn deinit(self: *MachO) void { const gpa = self.base.allocator; @@ -1638,7 +1850,6 @@ pub fn deinit(self: *MachO) void { self.thunks.deinit(gpa); self.strtab.deinit(gpa); - self.locals.deinit(gpa); self.globals.deinit(gpa); self.locals_free_list.deinit(gpa); @@ -1653,6 +1864,14 @@ pub fn deinit(self: *MachO) void { self.resolver.deinit(gpa); } + for (self.objects.items) |*object| { + object.deinit(gpa); + } + self.objects.deinit(gpa); + for (self.archives.items) |*archive| { + archive.deinit(gpa); + } + self.archives.deinit(gpa); for (self.dylibs.items) |*dylib| { dylib.deinit(gpa); } @@ -1842,20 +2061,55 @@ fn allocateGlobal(self: *MachO) !u32 { return index; } -fn addGotEntry(self: *MachO, target: SymbolWithLoc) !void { +pub fn addGotEntry(self: *MachO, target: SymbolWithLoc) !void { if (self.got_table.lookup.contains(target)) return; const got_index = try self.got_table.allocateEntry(self.base.allocator, target); - try self.writeOffsetTableEntry(got_index); - self.got_table_count_dirty = true; - self.markRelocsDirtyByTarget(target); + if (self.got_section_index == null) { + self.got_section_index = try self.initSection("__DATA_CONST", "__got", .{ + .flags = macho.S_NON_LAZY_SYMBOL_POINTERS, + }); + } + if (self.mode == .incremental) { + try self.writeOffsetTableEntry(got_index); + self.got_table_count_dirty = true; + self.markRelocsDirtyByTarget(target); + } } -fn addStubEntry(self: *MachO, target: SymbolWithLoc) !void { +pub fn addStubEntry(self: *MachO, target: SymbolWithLoc) !void { if (self.stub_table.lookup.contains(target)) return; const stub_index = try self.stub_table.allocateEntry(self.base.allocator, target); - try self.writeStubTableEntry(stub_index); - self.stub_table_count_dirty = true; - self.markRelocsDirtyByTarget(target); + if (self.stubs_section_index == null) { + self.stubs_section_index = try self.initSection("__TEXT", "__stubs", .{ + .flags = macho.S_SYMBOL_STUBS | + macho.S_ATTR_PURE_INSTRUCTIONS | + macho.S_ATTR_SOME_INSTRUCTIONS, + .reserved2 = stubs.stubSize(self.base.options.target.cpu.arch), + }); + self.stub_helper_section_index = try self.initSection("__TEXT", "__stub_helper", .{ + .flags = macho.S_REGULAR | + macho.S_ATTR_PURE_INSTRUCTIONS | + macho.S_ATTR_SOME_INSTRUCTIONS, + }); + self.la_symbol_ptr_section_index = try self.initSection("__DATA", "__la_symbol_ptr", .{ + .flags = macho.S_LAZY_SYMBOL_POINTERS, + }); + } + if (self.mode == .incremental) { + try self.writeStubTableEntry(stub_index); + self.stub_table_count_dirty = true; + self.markRelocsDirtyByTarget(target); + } +} + +pub fn addTlvPtrEntry(self: *MachO, target: SymbolWithLoc) !void { + if (self.tlv_ptr_table.lookup.contains(target)) return; + _ = try self.tlv_ptr_table.allocateEntry(self.gpa, target); + if (self.tlv_ptr_section_index == null) { + self.tlv_ptr_section_index = try self.initSection("__DATA", "__thread_ptrs", .{ + .flags = macho.S_THREAD_LOCAL_VARIABLE_POINTERS, + }); + } } pub fn updateFunc(self: *MachO, mod: *Module, func_index: InternPool.Index, air: Air, liveness: Liveness) !void { @@ -2758,16 +3012,10 @@ const InitSectionOpts = struct { reserved2: u32 = 0, }; -pub fn initSection( - gpa: Allocator, - ctx: anytype, - segname: []const u8, - sectname: []const u8, - opts: InitSectionOpts, -) !u8 { +pub fn initSection(self: *MachO, segname: []const u8, sectname: []const u8, opts: InitSectionOpts) !u8 { log.debug("creating section '{s},{s}'", .{ segname, sectname }); - const index = @as(u8, @intCast(ctx.sections.slice().len)); - try ctx.sections.append(gpa, .{ + const index = @as(u8, @intCast(self.sections.slice().len)); + try self.sections.append(self.base.allocator, .{ .segment_index = undefined, // Segments will be created automatically later down the pipeline .header = .{ .sectname = makeStaticString(sectname), @@ -2822,7 +3070,7 @@ fn allocateSection(self: *MachO, segname: []const u8, sectname: []const u8, opts .cmdsize = @sizeOf(macho.segment_command_64) + @sizeOf(macho.section_64), }; - const sect_id = try initSection(gpa, self, sectname, segname, .{ + const sect_id = try self.initSection(sectname, segname, .{ .flags = opts.flags, .reserved2 = opts.reserved2, }); @@ -2918,10 +3166,29 @@ fn growSectionVirtualMemory(self: *MachO, sect_id: u8, needed_size: u64) !void { } } +pub fn addAtomToSection(self: *MachO, atom_index: Atom.Index) void { + assert(self.mode == .zld); + const atom = self.getAtomPtr(atom_index); + const sym = self.getSymbol(atom.getSymbolWithLoc()); + var section = self.sections.get(sym.n_sect - 1); + if (section.header.size > 0) { + const last_atom = self.getAtomPtr(section.last_atom_index.?); + last_atom.next_index = atom_index; + atom.prev_index = section.last_atom_index; + } else { + section.first_atom_index = atom_index; + } + section.last_atom_index = atom_index; + section.header.size += atom.size; + self.sections.set(sym.n_sect - 1, section); +} + fn allocateAtom(self: *MachO, atom_index: Atom.Index, new_atom_size: u64, alignment: u64) !u64 { const tracy = trace(@src()); defer tracy.end(); + assert(self.mode == .incremental); + const atom = self.getAtom(atom_index); const sect_id = atom.getSymbol(self).n_sect - 1; const segment = self.getSegmentPtr(sect_id); @@ -3048,7 +3315,7 @@ pub fn getGlobalSymbol(self: *MachO, name: []const u8, lib_name: ?[]const u8) !u return self.addUndefined(sym_name, .add_stub); } -pub fn writeSegmentHeaders(self: anytype, writer: anytype) !void { +pub fn writeSegmentHeaders(self: *MachO, writer: anytype) !void { for (self.segments.items, 0..) |seg, i| { const indexes = self.getSectionIndexes(@as(u8, @intCast(i))); var out_seg = seg; @@ -3075,7 +3342,7 @@ pub fn writeSegmentHeaders(self: anytype, writer: anytype) !void { } } -fn writeLinkeditSegmentData(self: *MachO) !void { +pub fn writeLinkeditSegmentData(self: *MachO) !void { const page_size = getPageSize(self.base.options.target.cpu.arch); const seg = self.getLinkeditSegmentPtr(); seg.filesize = 0; @@ -3092,29 +3359,29 @@ fn writeLinkeditSegmentData(self: *MachO) !void { } try self.writeDyldInfoData(); + // TODO handle this better + if (self.mode == .zld) { + try self.writeFunctionStarts(); + try self.writeDataInCode(); + } try self.writeSymtabs(); seg.vmsize = mem.alignForward(u64, seg.filesize, page_size); } -pub fn collectRebaseDataFromTableSection( - gpa: Allocator, - ctx: anytype, - sect_id: u8, - rebase: *Rebase, - table: anytype, -) !void { - const header = ctx.sections.items(.header)[sect_id]; - const segment_index = ctx.sections.items(.segment_index)[sect_id]; - const segment = ctx.segments.items[segment_index]; +fn collectRebaseDataFromTableSection(self: *MachO, sect_id: u8, rebase: *Rebase, table: anytype) !void { + const gpa = self.base.allocator; + const header = self.sections.items(.header)[sect_id]; + const segment_index = self.sections.items(.segment_index)[sect_id]; + const segment = self.segments.items[segment_index]; const base_offset = header.addr - segment.vmaddr; - const is_got = if (ctx.got_section_index) |index| index == sect_id else false; + const is_got = if (self.got_section_index) |index| index == sect_id else false; try rebase.entries.ensureUnusedCapacity(gpa, table.entries.items.len); for (table.entries.items, 0..) |entry, i| { if (!table.lookup.contains(entry)) continue; - const sym = ctx.getSymbol(entry); + const sym = self.getSymbol(entry); if (is_got and sym.undf()) continue; const offset = i * @sizeOf(u64); log.debug(" | rebase at {x}", .{base_offset + offset}); @@ -3152,34 +3419,105 @@ fn collectRebaseData(self: *MachO, rebase: *Rebase) !void { } } - try collectRebaseDataFromTableSection(gpa, self, self.got_section_index.?, rebase, self.got_table); - try collectRebaseDataFromTableSection(gpa, self, self.la_symbol_ptr_section_index.?, rebase, self.stub_table); + // Unpack GOT entries + if (self.got_section_index) |sect_id| { + try self.collectRebaseDataFromTableSection(sect_id, rebase, self.got_table); + } + + // Next, unpack __la_symbol_ptr entries + if (self.la_symbol_ptr_section_index) |sect_id| { + try self.collectRebaseDataFromTableSection(sect_id, rebase, self.stub_table); + } + + // Finally, unpack the rest. + const cpu_arch = self.base.options.target.cpu.arch; + for (self.objects.items) |*object| { + for (object.atoms.items) |atom_index| { + const atom = self.getAtom(atom_index); + const sym = self.getSymbol(atom.getSymbolWithLoc()); + if (sym.n_desc == N_DEAD) continue; + + const sect_id = sym.n_sect - 1; + const section = self.sections.items(.header)[sect_id]; + const segment_id = self.sections.items(.segment_index)[sect_id]; + const segment = self.segments.items[segment_id]; + if (segment.maxprot & macho.PROT.WRITE == 0) continue; + switch (section.type()) { + macho.S_LITERAL_POINTERS, + macho.S_REGULAR, + macho.S_MOD_INIT_FUNC_POINTERS, + macho.S_MOD_TERM_FUNC_POINTERS, + => {}, + else => continue, + } + + log.debug(" ATOM({d}, %{d}, '{s}')", .{ + atom_index, + atom.sym_index, + self.getSymbolName(atom.getSymbolWithLoc()), + }); + + const code = Atom.getAtomCode(self, atom_index); + const relocs = Atom.getAtomRelocs(self, atom_index); + const ctx = Atom.getRelocContext(self, atom_index); + + for (relocs) |rel| { + switch (cpu_arch) { + .aarch64 => { + const rel_type = @as(macho.reloc_type_arm64, @enumFromInt(rel.r_type)); + if (rel_type != .ARM64_RELOC_UNSIGNED) continue; + if (rel.r_length != 3) continue; + }, + .x86_64 => { + const rel_type = @as(macho.reloc_type_x86_64, @enumFromInt(rel.r_type)); + if (rel_type != .X86_64_RELOC_UNSIGNED) continue; + if (rel.r_length != 3) continue; + }, + else => unreachable, + } + const target = Atom.parseRelocTarget(self, .{ + .object_id = atom.getFile().?, + .rel = rel, + .code = code, + .base_offset = ctx.base_offset, + .base_addr = ctx.base_addr, + }); + const target_sym = self.getSymbol(target); + if (target_sym.undf()) continue; + + const base_offset = @as(i32, @intCast(sym.n_value - segment.vmaddr)); + const rel_offset = rel.r_address - ctx.base_offset; + const offset = @as(u64, @intCast(base_offset + rel_offset)); + log.debug(" | rebase at {x}", .{offset}); + + try rebase.entries.append(self.gpa, .{ + .offset = offset, + .segment_id = segment_id, + }); + } + } + } try rebase.finalize(gpa); } -pub fn collectBindDataFromTableSection( - gpa: Allocator, - ctx: anytype, - sect_id: u8, - bind: anytype, - table: anytype, -) !void { - const header = ctx.sections.items(.header)[sect_id]; - const segment_index = ctx.sections.items(.segment_index)[sect_id]; - const segment = ctx.segments.items[segment_index]; +fn collectBindDataFromTableSection(self: *MachO, sect_id: u8, bind: anytype, table: anytype) !void { + const gpa = self.base.allocator; + const header = self.sections.items(.header)[sect_id]; + const segment_index = self.sections.items(.segment_index)[sect_id]; + const segment = self.segments.items[segment_index]; const base_offset = header.addr - segment.vmaddr; try bind.entries.ensureUnusedCapacity(gpa, table.entries.items.len); for (table.entries.items, 0..) |entry, i| { if (!table.lookup.contains(entry)) continue; - const bind_sym = ctx.getSymbol(entry); + const bind_sym = self.getSymbol(entry); if (!bind_sym.undf()) continue; const offset = i * @sizeOf(u64); log.debug(" | bind at {x}, import('{s}') in dylib({d})", .{ base_offset + offset, - ctx.getSymbolName(entry), + self.getSymbolName(entry), @divTrunc(@as(i16, @bitCast(bind_sym.n_desc)), macho.N_SYMBOL_RESOLVER), }); if (bind_sym.weakRef()) { @@ -3235,13 +3573,105 @@ fn collectBindData(self: *MachO, bind: anytype, raw_bindings: anytype) !void { } } - // Gather GOT pointers - try collectBindDataFromTableSection(gpa, self, self.got_section_index.?, bind, self.got_table); + // Unpack GOT pointers + if (self.got_section_index) |sect_id| { + try self.collectBindDataFromTableSection(sect_id, bind, self.got_table); + } + + // Next, unpack TLV pointers section + if (self.tlv_ptr_section_index) |sect_id| { + try self.collectBindDataFromTableSection(sect_id, bind, self.tlv_ptr_table); + } + + // Finally, unpack the rest. + const cpu_arch = self.base.options.target.cpu.arch; + for (self.objects.items) |*object| { + for (object.atoms.items) |atom_index| { + const atom = self.getAtom(atom_index); + const sym = self.getSymbol(atom.getSymbolWithLoc()); + if (sym.n_desc == N_DEAD) continue; + + const sect_id = sym.n_sect - 1; + const section = self.sections.items(.header)[sect_id]; + const segment_id = self.sections.items(.segment_index)[sect_id]; + const segment = self.segments.items[segment_id]; + if (segment.maxprot & macho.PROT.WRITE == 0) continue; + switch (section.type()) { + macho.S_LITERAL_POINTERS, + macho.S_REGULAR, + macho.S_MOD_INIT_FUNC_POINTERS, + macho.S_MOD_TERM_FUNC_POINTERS, + => {}, + else => continue, + } + + log.debug(" ATOM({d}, %{d}, '{s}')", .{ + atom_index, + atom.sym_index, + self.getSymbolName(atom.getSymbolWithLoc()), + }); + + const code = Atom.getAtomCode(self, atom_index); + const relocs = Atom.getAtomRelocs(self, atom_index); + const ctx = Atom.getRelocContext(self, atom_index); + + for (relocs) |rel| { + switch (cpu_arch) { + .aarch64 => { + const rel_type = @as(macho.reloc_type_arm64, @enumFromInt(rel.r_type)); + if (rel_type != .ARM64_RELOC_UNSIGNED) continue; + if (rel.r_length != 3) continue; + }, + .x86_64 => { + const rel_type = @as(macho.reloc_type_x86_64, @enumFromInt(rel.r_type)); + if (rel_type != .X86_64_RELOC_UNSIGNED) continue; + if (rel.r_length != 3) continue; + }, + else => unreachable, + } + + const global = Atom.parseRelocTarget(self, .{ + .object_id = atom.getFile().?, + .rel = rel, + .code = code, + .base_offset = ctx.base_offset, + .base_addr = ctx.base_addr, + }); + const bind_sym_name = self.getSymbolName(global); + const bind_sym = self.getSymbol(global); + if (!bind_sym.undf()) continue; + + const base_offset = sym.n_value - segment.vmaddr; + const rel_offset = @as(u32, @intCast(rel.r_address - ctx.base_offset)); + const offset = @as(u64, @intCast(base_offset + rel_offset)); + const addend = mem.readIntLittle(i64, code[rel_offset..][0..8]); + + const dylib_ordinal = @divTrunc(@as(i16, @bitCast(bind_sym.n_desc)), macho.N_SYMBOL_RESOLVER); + log.debug(" | bind at {x}, import('{s}') in dylib({d})", .{ + base_offset, + bind_sym_name, + dylib_ordinal, + }); + log.debug(" | with addend {x}", .{addend}); + if (bind_sym.weakRef()) { + log.debug(" | marking as weak ref ", .{}); + } + try bind.entries.append(self.gpa, .{ + .target = global, + .offset = offset, + .segment_id = segment_id, + .addend = addend, + }); + } + } + } + try bind.finalize(gpa, self); } fn collectLazyBindData(self: *MachO, bind: anytype) !void { - try collectBindDataFromTableSection(self.base.allocator, self, self.la_symbol_ptr_section_index.?, bind, self.stub_table); + const sect_id = self.la_symbol_ptr_section_index orelse return; + try self.collectBindDataFromTableSection(sect_id, bind, self.stub_table); try bind.finalize(self.base.allocator, self); } @@ -3259,6 +3689,7 @@ fn collectExportData(self: *MachO, trie: *Trie) !void { if (sym.undf()) continue; assert(sym.ext()); + if (sym.n_desc == N_DEAD) continue; const sym_name = self.getSymbolName(global); log.debug(" (putting '{s}' defined at 0x{x})", .{ sym_name, sym.n_value }); @@ -3349,12 +3780,7 @@ fn writeDyldInfoData(self: *MachO) !void { }); try self.base.file.?.pwriteAll(buffer, rebase_off); - try populateLazyBindOffsetsInStubHelper( - self, - self.base.options.target.cpu.arch, - self.base.file.?, - lazy_bind, - ); + try self.populateLazyBindOffsetsInStubHelper(lazy_bind); self.dyld_info_cmd.rebase_off = @as(u32, @intCast(rebase_off)); self.dyld_info_cmd.rebase_size = @as(u32, @intCast(rebase_size_aligned)); @@ -3366,19 +3792,15 @@ fn writeDyldInfoData(self: *MachO) !void { self.dyld_info_cmd.export_size = @as(u32, @intCast(export_size_aligned)); } -pub fn populateLazyBindOffsetsInStubHelper( - ctx: anytype, - cpu_arch: std.Target.Cpu.Arch, - file: fs.File, - lazy_bind: anytype, -) !void { +fn populateLazyBindOffsetsInStubHelper(self: *MachO, lazy_bind: anytype) !void { if (lazy_bind.size() == 0) return; - const stub_helper_section_index = ctx.stub_helper_section_index.?; + const stub_helper_section_index = self.stub_helper_section_index.?; // assert(ctx.stub_helper_preamble_allocated); - const header = ctx.sections.items(.header)[stub_helper_section_index]; + const header = self.sections.items(.header)[stub_helper_section_index]; + const cpu_arch = self.base.options.target.cpu.arch; const preamble_size = stubs.stubHelperPreambleSize(cpu_arch); const stub_size = stubs.stubHelperSize(cpu_arch); const stub_offset = stubs.stubOffsetInStubHelper(cpu_arch); @@ -3389,14 +3811,175 @@ pub fn populateLazyBindOffsetsInStubHelper( log.debug("writing lazy bind offset 0x{x} ({s}) in stub helper at 0x{x}", .{ bind_offset, - ctx.getSymbolName(lazy_bind.entries.items[index].target), + self.getSymbolName(lazy_bind.entries.items[index].target), file_offset, }); - try file.pwriteAll(mem.asBytes(&bind_offset), file_offset); + try self.base.file.?.pwriteAll(mem.asBytes(&bind_offset), file_offset); } } +const asc_u64 = std.sort.asc(u64); + +fn addSymbolToFunctionStarts(self: *MachO, sym_loc: SymbolWithLoc, addresses: *std.ArrayList(u64)) !void { + const sym = self.getSymbol(sym_loc); + if (sym.n_strx == 0) return; + if (sym.n_desc == MachO.N_DEAD) return; + if (self.symbolIsTemp(sym_loc)) return; + try addresses.append(sym.n_value); +} + +fn writeFunctionStarts(self: *MachO) !void { + const gpa = self.base.allocator; + const seg = self.segments.items[self.header_segment_cmd_index.?]; + + // We need to sort by address first + var addresses = std.ArrayList(u64).init(gpa); + defer addresses.deinit(); + + for (self.objects.items) |object| { + for (object.exec_atoms.items) |atom_index| { + const atom = self.getAtom(atom_index); + const sym_loc = atom.getSymbolWithLoc(); + try self.addSymbolToFunctionStarts(sym_loc, &addresses); + + var it = Atom.getInnerSymbolsIterator(self, atom_index); + while (it.next()) |inner_sym_loc| { + try self.addSymbolToFunctionStarts(inner_sym_loc, &addresses); + } + } + } + + mem.sort(u64, addresses.items, {}, asc_u64); + + var offsets = std.ArrayList(u32).init(gpa); + defer offsets.deinit(); + try offsets.ensureTotalCapacityPrecise(addresses.items.len); + + var last_off: u32 = 0; + for (addresses.items) |addr| { + const offset = @as(u32, @intCast(addr - seg.vmaddr)); + const diff = offset - last_off; + + if (diff == 0) continue; + + offsets.appendAssumeCapacity(diff); + last_off = offset; + } + + var buffer = std.ArrayList(u8).init(gpa); + defer buffer.deinit(); + + const max_size = @as(usize, @intCast(offsets.items.len * @sizeOf(u64))); + try buffer.ensureTotalCapacity(max_size); + + for (offsets.items) |offset| { + try std.leb.writeULEB128(buffer.writer(), offset); + } + + const link_seg = self.getLinkeditSegmentPtr(); + const offset = link_seg.fileoff + link_seg.filesize; + assert(mem.isAlignedGeneric(u64, offset, @alignOf(u64))); + const needed_size = buffer.items.len; + const needed_size_aligned = mem.alignForward(u64, needed_size, @alignOf(u64)); + const padding = math.cast(usize, needed_size_aligned - needed_size) orelse return error.Overflow; + if (padding > 0) { + try buffer.ensureUnusedCapacity(padding); + buffer.appendNTimesAssumeCapacity(0, padding); + } + link_seg.filesize = offset + needed_size_aligned - link_seg.fileoff; + + log.debug("writing function starts info from 0x{x} to 0x{x}", .{ offset, offset + needed_size_aligned }); + + try self.base.file.?.pwriteAll(buffer.items, offset); + + self.function_starts_cmd.dataoff = @as(u32, @intCast(offset)); + self.function_starts_cmd.datasize = @as(u32, @intCast(needed_size_aligned)); +} + +fn filterDataInCode( + dices: []const macho.data_in_code_entry, + start_addr: u64, + end_addr: u64, +) []const macho.data_in_code_entry { + const Predicate = struct { + addr: u64, + + pub fn predicate(self: @This(), dice: macho.data_in_code_entry) bool { + return dice.offset >= self.addr; + } + }; + + const start = MachO.lsearch(macho.data_in_code_entry, dices, Predicate{ .addr = start_addr }); + const end = MachO.lsearch(macho.data_in_code_entry, dices[start..], Predicate{ .addr = end_addr }) + start; + + return dices[start..end]; +} + +pub fn writeDataInCode(self: *MachO) !void { + const gpa = self.base.allocator; + var out_dice = std.ArrayList(macho.data_in_code_entry).init(gpa); + defer out_dice.deinit(); + + const text_sect_id = self.text_section_index orelse return; + const text_sect_header = self.sections.items(.header)[text_sect_id]; + + for (self.objects.items) |object| { + if (!object.hasDataInCode()) continue; + const dice = object.data_in_code.items; + try out_dice.ensureUnusedCapacity(dice.len); + + for (object.exec_atoms.items) |atom_index| { + const atom = self.getAtom(atom_index); + const sym = self.getSymbol(atom.getSymbolWithLoc()); + if (sym.n_desc == MachO.N_DEAD) continue; + + const source_addr = if (object.getSourceSymbol(atom.sym_index)) |source_sym| + source_sym.n_value + else blk: { + const nbase = @as(u32, @intCast(object.in_symtab.?.len)); + const source_sect_id = @as(u8, @intCast(atom.sym_index - nbase)); + break :blk object.getSourceSection(source_sect_id).addr; + }; + const filtered_dice = filterDataInCode(dice, source_addr, source_addr + atom.size); + const base = math.cast(u32, sym.n_value - text_sect_header.addr + text_sect_header.offset) orelse + return error.Overflow; + + for (filtered_dice) |single| { + const offset = math.cast(u32, single.offset - source_addr + base) orelse + return error.Overflow; + out_dice.appendAssumeCapacity(.{ + .offset = offset, + .length = single.length, + .kind = single.kind, + }); + } + } + } + + const seg = self.getLinkeditSegmentPtr(); + const offset = seg.fileoff + seg.filesize; + assert(mem.isAlignedGeneric(u64, offset, @alignOf(u64))); + const needed_size = out_dice.items.len * @sizeOf(macho.data_in_code_entry); + const needed_size_aligned = mem.alignForward(u64, needed_size, @alignOf(u64)); + seg.filesize = offset + needed_size_aligned - seg.fileoff; + + const buffer = try gpa.alloc(u8, math.cast(usize, needed_size_aligned) orelse return error.Overflow); + defer gpa.free(buffer); + { + const src = mem.sliceAsBytes(out_dice.items); + @memcpy(buffer[0..src.len], src); + @memset(buffer[src.len..], 0); + } + + log.debug("writing data-in-code from 0x{x} to 0x{x}", .{ offset, offset + needed_size_aligned }); + + try self.base.file.?.pwriteAll(buffer, offset); + + self.data_in_code_cmd.dataoff = @as(u32, @intCast(offset)); + self.data_in_code_cmd.datasize = @as(u32, @intCast(needed_size_aligned)); +} + fn writeSymtabs(self: *MachO) !void { var ctx = try self.writeSymtab(); defer ctx.imports_table.deinit(); @@ -3404,18 +3987,38 @@ fn writeSymtabs(self: *MachO) !void { try self.writeStrtab(); } +fn addLocalToSymtab(self: *MachO, sym_loc: SymbolWithLoc, locals: *std.ArrayList(macho.nlist_64)) !void { + const sym = self.getSymbol(sym_loc); + if (sym.n_strx == 0) return; // no name, skip + if (sym.n_desc == MachO.N_DEAD) return; // garbage-collected, skip + if (sym.ext()) return; // an export lands in its own symtab section, skip + if (self.symbolIsTemp(sym_loc)) return; // local temp symbol, skip + var out_sym = sym; + out_sym.n_strx = try self.strtab.insert(self.base.allocator, self.getSymbolName(sym_loc)); + try locals.append(out_sym); +} + fn writeSymtab(self: *MachO) !SymtabCtx { const gpa = self.base.allocator; var locals = std.ArrayList(macho.nlist_64).init(gpa); defer locals.deinit(); - for (self.locals.items, 0..) |sym, sym_id| { - if (sym.n_strx == 0) continue; // no name, skip - const sym_loc = SymbolWithLoc{ .sym_index = @as(u32, @intCast(sym_id)) }; - if (self.symbolIsTemp(sym_loc)) continue; // local temp symbol, skip - if (self.getGlobal(self.getSymbolName(sym_loc)) != null) continue; // global symbol is either an export or import, skip - try locals.append(sym); + for (0..self.locals.items) |sym_id| { + try self.addLocalToSymtab(.{ .sym_index = @intCast(sym_id) }); + } + + for (self.objects.items) |object| { + for (object.atoms.items) |atom_index| { + const atom = self.getAtom(atom_index); + const sym_loc = atom.getSymbolWithLoc(); + try self.addLocalToSymtab(sym_loc, &locals); + + var it = Atom.getInnerSymbolsIterator(self, atom_index); + while (it.next()) |inner_sym_loc| { + try self.addLocalToSymtab(inner_sym_loc, &locals); + } + } } var exports = std.ArrayList(macho.nlist_64).init(gpa); @@ -3424,6 +4027,7 @@ fn writeSymtab(self: *MachO) !SymtabCtx { for (self.globals.items) |global| { const sym = self.getSymbol(global); if (sym.undf()) continue; // import, skip + if (sym.n_desc == N_DEAD) continue; var out_sym = sym; out_sym.n_strx = try self.strtab.insert(gpa, self.getSymbolName(global)); try exports.append(out_sym); @@ -3438,6 +4042,7 @@ fn writeSymtab(self: *MachO) !SymtabCtx { const sym = self.getSymbol(global); if (sym.n_strx == 0) continue; // no name, skip if (!sym.undf()) continue; // not an import, skip + if (sym.n_desc == N_DEAD) continue; const new_index = @as(u32, @intCast(imports.items.len)); var out_sym = sym; out_sym.n_strx = try self.strtab.insert(gpa, self.getSymbolName(global)); @@ -3445,6 +4050,15 @@ fn writeSymtab(self: *MachO) !SymtabCtx { try imports_table.putNoClobber(global, new_index); } + // We generate stabs last in order to ensure that the strtab always has debug info + // strings trailing + if (!self.base.options.strip) { + assert(self.d_sym == null); // TODO + for (self.objects.items) |object| { + try self.generateSymbolStabs(object, &locals); + } + } + const nlocals = @as(u32, @intCast(locals.items.len)); const nexports = @as(u32, @intCast(exports.items.len)); const nimports = @as(u32, @intCast(imports.items.len)); @@ -3478,7 +4092,218 @@ fn writeSymtab(self: *MachO) !SymtabCtx { }; } -fn writeStrtab(self: *MachO) !void { +fn generateSymbolStabs( + self: *MachO, + object: Object, + locals: *std.ArrayList(macho.nlist_64), +) !void { + log.debug("generating stabs for '{s}'", .{object.name}); + + const gpa = self.base.allocator; + var debug_info = object.parseDwarfInfo(); + + var lookup = DwarfInfo.AbbrevLookupTable.init(gpa); + defer lookup.deinit(); + try lookup.ensureUnusedCapacity(std.math.maxInt(u8)); + + // We assume there is only one CU. + var cu_it = debug_info.getCompileUnitIterator(); + const compile_unit = while (try cu_it.next()) |cu| { + const offset = math.cast(usize, cu.cuh.debug_abbrev_offset) orelse return error.Overflow; + try debug_info.genAbbrevLookupByKind(offset, &lookup); + break cu; + } else { + log.debug("no compile unit found in debug info in {s}; skipping", .{object.name}); + return; + }; + + var abbrev_it = compile_unit.getAbbrevEntryIterator(debug_info); + const cu_entry: DwarfInfo.AbbrevEntry = while (try abbrev_it.next(lookup)) |entry| switch (entry.tag) { + dwarf.TAG.compile_unit => break entry, + else => continue, + } else { + log.debug("missing DWARF_TAG_compile_unit tag in {s}; skipping", .{object.name}); + return; + }; + + var maybe_tu_name: ?[]const u8 = null; + var maybe_tu_comp_dir: ?[]const u8 = null; + var attr_it = cu_entry.getAttributeIterator(debug_info, compile_unit.cuh); + + while (try attr_it.next()) |attr| switch (attr.name) { + dwarf.AT.comp_dir => maybe_tu_comp_dir = attr.getString(debug_info, compile_unit.cuh) orelse continue, + dwarf.AT.name => maybe_tu_name = attr.getString(debug_info, compile_unit.cuh) orelse continue, + else => continue, + }; + + if (maybe_tu_name == null or maybe_tu_comp_dir == null) { + log.debug("missing DWARF_AT_comp_dir and DWARF_AT_name attributes {s}; skipping", .{object.name}); + return; + } + + const tu_name = maybe_tu_name.?; + const tu_comp_dir = maybe_tu_comp_dir.?; + + // Open scope + try locals.ensureUnusedCapacity(3); + locals.appendAssumeCapacity(.{ + .n_strx = try self.strtab.insert(gpa, tu_comp_dir), + .n_type = macho.N_SO, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }); + locals.appendAssumeCapacity(.{ + .n_strx = try self.strtab.insert(gpa, tu_name), + .n_type = macho.N_SO, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }); + locals.appendAssumeCapacity(.{ + .n_strx = try self.strtab.insert(gpa, object.name), + .n_type = macho.N_OSO, + .n_sect = 0, + .n_desc = 1, + .n_value = object.mtime, + }); + + var stabs_buf: [4]macho.nlist_64 = undefined; + + var name_lookup: ?DwarfInfo.SubprogramLookupByName = if (object.header.flags & macho.MH_SUBSECTIONS_VIA_SYMBOLS == 0) blk: { + var name_lookup = DwarfInfo.SubprogramLookupByName.init(gpa); + errdefer name_lookup.deinit(); + try name_lookup.ensureUnusedCapacity(@as(u32, @intCast(object.atoms.items.len))); + try debug_info.genSubprogramLookupByName(compile_unit, lookup, &name_lookup); + break :blk name_lookup; + } else null; + defer if (name_lookup) |*nl| nl.deinit(); + + for (object.atoms.items) |atom_index| { + const atom = self.getAtom(atom_index); + const stabs = try self.generateSymbolStabsForSymbol( + atom_index, + atom.getSymbolWithLoc(), + name_lookup, + &stabs_buf, + ); + try locals.appendSlice(stabs); + + var it = Atom.getInnerSymbolsIterator(self, atom_index); + while (it.next()) |sym_loc| { + const contained_stabs = try self.generateSymbolStabsForSymbol( + atom_index, + sym_loc, + name_lookup, + &stabs_buf, + ); + try locals.appendSlice(contained_stabs); + } + } + + // Close scope + try locals.append(.{ + .n_strx = 0, + .n_type = macho.N_SO, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }); +} + +fn generateSymbolStabsForSymbol( + self: *MachO, + atom_index: Atom.Index, + sym_loc: SymbolWithLoc, + lookup: ?DwarfInfo.SubprogramLookupByName, + buf: *[4]macho.nlist_64, +) ![]const macho.nlist_64 { + const gpa = self.base.allocator; + const object = self.objects.items[sym_loc.getFile().?]; + const sym = self.getSymbol(sym_loc); + const sym_name = self.getSymbolName(sym_loc); + const header = self.sections.items(.header)[sym.n_sect - 1]; + + if (sym.n_strx == 0) return buf[0..0]; + if (self.symbolIsTemp(sym_loc)) return buf[0..0]; + + if (!header.isCode()) { + // Since we are not dealing with machine code, it's either a global or a static depending + // on the linkage scope. + if (sym.sect() and sym.ext()) { + // Global gets an N_GSYM stab type. + buf[0] = .{ + .n_strx = try self.strtab.insert(gpa, sym_name), + .n_type = macho.N_GSYM, + .n_sect = sym.n_sect, + .n_desc = 0, + .n_value = 0, + }; + } else { + // Local static gets an N_STSYM stab type. + buf[0] = .{ + .n_strx = try self.strtab.insert(gpa, sym_name), + .n_type = macho.N_STSYM, + .n_sect = sym.n_sect, + .n_desc = 0, + .n_value = sym.n_value, + }; + } + return buf[0..1]; + } + + const size: u64 = size: { + if (object.header.flags & macho.MH_SUBSECTIONS_VIA_SYMBOLS != 0) { + break :size self.getAtom(atom_index).size; + } + + // Since we don't have subsections to work with, we need to infer the size of each function + // the slow way by scanning the debug info for matching symbol names and extracting + // the symbol's DWARF_AT_low_pc and DWARF_AT_high_pc values. + const source_sym = object.getSourceSymbol(sym_loc.sym_index) orelse return buf[0..0]; + const subprogram = lookup.?.get(sym_name[1..]) orelse return buf[0..0]; + + if (subprogram.addr <= source_sym.n_value and source_sym.n_value < subprogram.addr + subprogram.size) { + break :size subprogram.size; + } else { + log.debug("no stab found for {s}", .{sym_name}); + return buf[0..0]; + } + }; + + buf[0] = .{ + .n_strx = 0, + .n_type = macho.N_BNSYM, + .n_sect = sym.n_sect, + .n_desc = 0, + .n_value = sym.n_value, + }; + buf[1] = .{ + .n_strx = try self.strtab.insert(gpa, sym_name), + .n_type = macho.N_FUN, + .n_sect = sym.n_sect, + .n_desc = 0, + .n_value = sym.n_value, + }; + buf[2] = .{ + .n_strx = 0, + .n_type = macho.N_FUN, + .n_sect = 0, + .n_desc = 0, + .n_value = size, + }; + buf[3] = .{ + .n_strx = 0, + .n_type = macho.N_ENSYM, + .n_sect = sym.n_sect, + .n_desc = 0, + .n_value = size, + }; + + return buf; +} + +pub fn writeStrtab(self: *MachO) !void { const gpa = self.base.allocator; const seg = self.getLinkeditSegmentPtr(); const offset = seg.fileoff + seg.filesize; @@ -3507,7 +4332,7 @@ const SymtabCtx = struct { imports_table: std.AutoHashMap(SymbolWithLoc, u32), }; -fn writeDysymtab(self: *MachO, ctx: SymtabCtx) !void { +pub fn writeDysymtab(self: *MachO, ctx: SymtabCtx) !void { const gpa = self.base.allocator; const nstubs = @as(u32, @intCast(self.stub_table.lookup.count())); const ngot_entries = @as(u32, @intCast(self.got_table.lookup.count())); @@ -3582,7 +4407,7 @@ fn writeDysymtab(self: *MachO, ctx: SymtabCtx) !void { self.dysymtab_cmd.nindirectsyms = nindirectsyms; } -fn writeUuid(self: *MachO, comp: *const Compilation, uuid_cmd_offset: u32, has_codesig: bool) !void { +pub fn writeUuid(self: *MachO, comp: *const Compilation, uuid_cmd_offset: u32, has_codesig: bool) !void { const file_size = if (!has_codesig) blk: { const seg = self.getLinkeditSegmentPtr(); break :blk seg.fileoff + seg.filesize; @@ -3592,7 +4417,7 @@ fn writeUuid(self: *MachO, comp: *const Compilation, uuid_cmd_offset: u32, has_c try self.base.file.?.pwriteAll(&self.uuid_cmd.uuid, offset); } -fn writeCodeSignaturePadding(self: *MachO, code_sig: *CodeSignature) !void { +pub fn writeCodeSignaturePadding(self: *MachO, code_sig: *CodeSignature) !void { const seg = self.getLinkeditSegmentPtr(); // Code signature data has to be 16-bytes aligned for Apple tools to recognize the file // https://github.com/opensource-apple/cctools/blob/fdb4825f303fd5c0751be524babd32958181b3ed/libstuff/checkout.c#L271 @@ -3609,8 +4434,9 @@ fn writeCodeSignaturePadding(self: *MachO, code_sig: *CodeSignature) !void { self.codesig_cmd.datasize = @as(u32, @intCast(needed_size)); } -fn writeCodeSignature(self: *MachO, comp: *const Compilation, code_sig: *CodeSignature) !void { - const seg = self.getSegment(self.text_section_index.?); +pub fn writeCodeSignature(self: *MachO, comp: *const Compilation, code_sig: *CodeSignature) !void { + const seg_id = self.header_segment_cmd_index.?; + const seg = self.segments.items[seg_id]; const offset = self.codesig_cmd.dataoff; var buffer = std.ArrayList(u8).init(self.base.allocator); @@ -3634,14 +4460,10 @@ fn writeCodeSignature(self: *MachO, comp: *const Compilation, code_sig: *CodeSig } /// Writes Mach-O file header. -fn writeHeader(self: *MachO, ncmds: u32, sizeofcmds: u32) !void { +pub fn writeHeader(self: *MachO, ncmds: u32, sizeofcmds: u32) !void { var header: macho.mach_header_64 = .{}; header.flags = macho.MH_NOUNDEFS | macho.MH_DYLDLINK | macho.MH_PIE | macho.MH_TWOLEVEL; - if (!self.base.options.single_threaded) { - header.flags |= macho.MH_HAS_TLV_DESCRIPTORS; - } - switch (self.base.options.target.cpu.arch) { .aarch64 => { header.cputype = macho.CPU_TYPE_ARM64; @@ -3666,6 +4488,13 @@ fn writeHeader(self: *MachO, ncmds: u32, sizeofcmds: u32) !void { else => unreachable, } + if (self.thread_vars_section_index) |sect_id| { + header.flags |= macho.MH_HAS_TLV_DESCRIPTORS; + if (self.sections.items(.header)[sect_id].size > 0) { + header.flags |= macho.MH_HAS_TLV_DESCRIPTORS; + } + } + header.ncmds = ncmds; header.sizeofcmds = sizeofcmds; @@ -3830,20 +4659,33 @@ pub fn symbolIsTemp(self: *MachO, sym_with_loc: SymbolWithLoc) bool { /// Returns pointer-to-symbol described by `sym_with_loc` descriptor. pub fn getSymbolPtr(self: *MachO, sym_with_loc: SymbolWithLoc) *macho.nlist_64 { - assert(sym_with_loc.getFile() == null); - return &self.locals.items[sym_with_loc.sym_index]; + if (sym_with_loc.getFile()) |file| { + const object = &self.objects.items[file]; + return &object.symtab[sym_with_loc.sym_index]; + } else { + return &self.locals.items[sym_with_loc.sym_index]; + } } /// Returns symbol described by `sym_with_loc` descriptor. pub fn getSymbol(self: *const MachO, sym_with_loc: SymbolWithLoc) macho.nlist_64 { - assert(sym_with_loc.getFile() == null); - return self.locals.items[sym_with_loc.sym_index]; + if (sym_with_loc.getFile()) |file| { + const object = &self.objects.items[file]; + return object.symtab[sym_with_loc.sym_index]; + } else { + return self.locals.items[sym_with_loc.sym_index]; + } } /// Returns name of the symbol described by `sym_with_loc` descriptor. pub fn getSymbolName(self: *const MachO, sym_with_loc: SymbolWithLoc) []const u8 { - const sym = self.getSymbol(sym_with_loc); - return self.strtab.get(sym.n_strx).?; + if (sym_with_loc.getFile()) |file| { + const object = self.objects.items[file]; + return object.getSymbolName(sym_with_loc.sym_index); + } else { + const sym = self.locals.items[sym_with_loc.sym_index]; + return self.strtab.get(sym.n_strx).?; + } } /// Returns pointer to the global entry for `name` if one exists. @@ -3945,6 +4787,19 @@ pub inline fn getPageSize(cpu_arch: std.Target.Cpu.Arch) u16 { }; } +pub inline fn requiresThunks(self: MachO) bool { + return self.base.options.target.cpu.arch == .aarch64; +} + +pub fn requiresCodeSignature(self: MachO) bool { + if (self.base.options.entitlements) |_| return true; + const cpu_arch = self.base.options.target.cpu.arch; + const os_tag = self.base.options.target.os.tag; + const abi = self.base.options.target.abi; + if (cpu_arch == .aarch64 and (os_tag == .macos or abi == .simulator)) return true; + return false; +} + pub fn getSegmentPrecedence(segname: []const u8) u4 { if (mem.eql(u8, segname, "__PAGEZERO")) return 0x0; if (mem.eql(u8, segname, "__TEXT")) return 0x1; @@ -3988,24 +4843,26 @@ pub fn getSectionPrecedence(header: macho.section_64) u8 { return (@as(u8, @intCast(segment_precedence)) << 4) + section_precedence; } -pub fn reportUndefined(self: *MachO, ctx: anytype) !void { - const count = ctx.unresolved.count(); +pub fn reportUndefined(self: *MachO) !void { + const count = self.unresolved.count(); if (count == 0) return; const gpa = self.base.allocator; try self.misc_errors.ensureUnusedCapacity(gpa, count); - for (ctx.unresolved.keys()) |global_index| { - const global = ctx.globals.items[global_index]; - const sym_name = ctx.getSymbolName(global); + for (self.unresolved.keys()) |global_index| { + const global = self.globals.items[global_index]; + const sym_name = self.getSymbolName(global); const nnotes: usize = if (global.getFile() == null) @as(usize, 0) else 1; var notes = try std.ArrayList(File.ErrorMsg).initCapacity(gpa, nnotes); defer notes.deinit(); if (global.getFile()) |file| { - const note = try std.fmt.allocPrint(gpa, "referenced in {s}", .{ctx.objects.items[file].name}); + const note = try std.fmt.allocPrint(gpa, "referenced in {s}", .{ + self.objects.items[file].name, + }); notes.appendAssumeCapacity(.{ .msg = note }); } @@ -4051,6 +4908,19 @@ pub fn lsearch(comptime T: type, haystack: []align(1) const T, predicate: anytyp return i; } +pub fn logSegments(self: *MachO) void { + log.debug("segments:", .{}); + for (self.segments.items, 0..) |segment, i| { + log.debug(" segment({d}): {s} @{x} ({x}), sizeof({x})", .{ + i, + segment.segName(), + segment.fileoff, + segment.vmaddr, + segment.vmsize, + }); + } +} + pub fn logSections(self: *MachO) void { log.debug("sections:", .{}); for (self.sections.items(.header), 0..) |header, i| { @@ -4065,9 +4935,7 @@ pub fn logSections(self: *MachO) void { } } -fn logSymAttributes(sym: macho.nlist_64, buf: *[4]u8) []const u8 { - @memset(buf[0..4], '_'); - @memset(buf[4..], ' '); +fn logSymAttributes(sym: macho.nlist_64, buf: []u8) []const u8 { if (sym.sect()) { buf[0] = 's'; } @@ -4090,56 +4958,110 @@ fn logSymAttributes(sym: macho.nlist_64, buf: *[4]u8) []const u8 { pub fn logSymtab(self: *MachO) void { var buf: [4]u8 = undefined; - log.debug("symtab:", .{}); + const scoped_log = std.log.scoped(.symtab); + + scoped_log.debug("locals:", .{}); + for (self.objects.items, 0..) |object, id| { + scoped_log.debug(" object({d}): {s}", .{ id, object.name }); + if (object.in_symtab == null) continue; + for (object.symtab, 0..) |sym, sym_id| { + @memset(&buf, '_'); + scoped_log.debug(" %{d}: {s} @{x} in sect({d}), {s}", .{ + sym_id, + object.getSymbolName(@as(u32, @intCast(sym_id))), + sym.n_value, + sym.n_sect, + logSymAttributes(sym, &buf), + }); + } + } + scoped_log.debug(" object(-1)", .{}); for (self.locals.items, 0..) |sym, sym_id| { - const where = if (sym.undf() and !sym.tentative()) "ord" else "sect"; - const def_index = if (sym.undf() and !sym.tentative()) - @divTrunc(sym.n_desc, macho.N_SYMBOL_RESOLVER) - else - sym.n_sect + 1; - log.debug(" %{d}: {?s} @{x} in {s}({d}), {s}", .{ + if (sym.undf()) continue; + scoped_log.debug(" %{d}: {s} @{x} in sect({d}), {s}", .{ sym_id, - self.strtab.get(sym.n_strx), + self.strtab.get(sym.n_strx).?, sym.n_value, - where, - def_index, + sym.n_sect, logSymAttributes(sym, &buf), }); } - log.debug("globals table:", .{}); - for (self.globals.items) |global| { - const name = self.getSymbolName(global); - log.debug(" {s} => %{d} in object({?d})", .{ name, global.sym_index, global.file }); + scoped_log.debug("exports:", .{}); + for (self.globals.items, 0..) |global, i| { + const sym = self.getSymbol(global); + if (sym.undf()) continue; + if (sym.n_desc == MachO.N_DEAD) continue; + scoped_log.debug(" %{d}: {s} @{x} in sect({d}), {s} (def in object({?}))", .{ + i, + self.getSymbolName(global), + sym.n_value, + sym.n_sect, + logSymAttributes(sym, &buf), + global.file, + }); } - log.debug("GOT entries:", .{}); - log.debug("{}", .{self.got_table}); + scoped_log.debug("imports:", .{}); + for (self.globals.items, 0..) |global, i| { + const sym = self.getSymbol(global); + if (!sym.undf()) continue; + if (sym.n_desc == MachO.N_DEAD) continue; + const ord = @divTrunc(sym.n_desc, macho.N_SYMBOL_RESOLVER); + scoped_log.debug(" %{d}: {s} @{x} in ord({d}), {s}", .{ + i, + self.getSymbolName(global), + sym.n_value, + ord, + logSymAttributes(sym, &buf), + }); + } - log.debug("stubs entries:", .{}); - log.debug("{}", .{self.stub_table}); + scoped_log.debug("GOT entries:", .{}); + scoped_log.debug("{}", .{self.got_table}); + + scoped_log.debug("TLV pointers:", .{}); + scoped_log.debug("{}", .{self.tlv_ptr_table}); + + scoped_log.debug("stubs entries:", .{}); + scoped_log.debug("{}", .{self.stubs_table}); + + scoped_log.debug("thunks:", .{}); + for (self.thunks.items, 0..) |thunk, i| { + scoped_log.debug(" thunk({d})", .{i}); + const slice = thunk.targets.slice(); + for (slice.items(.tag), slice.items(.target), 0..) |tag, target, j| { + const atom_index = @as(u32, @intCast(thunk.getStartAtomIndex() + j)); + const atom = self.getAtom(atom_index); + const atom_sym = self.getSymbol(atom.getSymbolWithLoc()); + const target_addr = switch (tag) { + .stub => self.getStubsEntryAddress(target).?, + .atom => self.getSymbol(target).n_value, + }; + scoped_log.debug(" {d}@{x} => {s}({s}@{x})", .{ + j, + atom_sym.n_value, + @tagName(tag), + self.getSymbolName(target), + target_addr, + }); + } + } } pub fn logAtoms(self: *MachO) void { log.debug("atoms:", .{}); - const slice = self.sections.slice(); - for (slice.items(.last_atom_index), 0..) |last_atom_index, i| { - var atom_index = last_atom_index orelse continue; - const header = slice.items(.header)[i]; - - while (true) { - const atom = self.getAtom(atom_index); - if (atom.prev_index) |prev_index| { - atom_index = prev_index; - } else break; - } + for (slice.items(.first_atom_index), 0..) |first_atom_index, sect_id| { + var atom_index = first_atom_index orelse continue; + const header = slice.items(.header)[sect_id]; log.debug("{s},{s}", .{ header.segName(), header.sectName() }); while (true) { - self.logAtom(atom_index); const atom = self.getAtom(atom_index); + self.logAtom(atom_index, log); + if (atom.next_index) |next_index| { atom_index = next_index; } else break; @@ -4147,18 +5069,50 @@ pub fn logAtoms(self: *MachO) void { } } -pub fn logAtom(self: *MachO, atom_index: Atom.Index) void { +pub fn logAtom(self: *MachO, atom_index: Atom.Index, logger: anytype) void { + if (!build_options.enable_logging) return; + const atom = self.getAtom(atom_index); - const sym = atom.getSymbol(self); - const sym_name = atom.getName(self); - log.debug(" ATOM(%{?d}, '{s}') @ {x} sizeof({x}) in object({?d}) in sect({d})", .{ - atom.getSymbolIndex(), + const sym = self.getSymbol(atom.getSymbolWithLoc()); + const sym_name = self.getSymbolName(atom.getSymbolWithLoc()); + logger.debug(" ATOM({d}, %{d}, '{s}') @ {x} (sizeof({x}), alignof({x})) in object({?}) in sect({d})", .{ + atom_index, + atom.sym_index, sym_name, sym.n_value, atom.size, - atom.file, - sym.n_sect + 1, + atom.alignment, + atom.getFile(), + sym.n_sect, }); + + if (atom.getFile() != null) { + var it = Atom.getInnerSymbolsIterator(self, atom_index); + while (it.next()) |sym_loc| { + const inner = self.getSymbol(sym_loc); + const inner_name = self.getSymbolName(sym_loc); + const offset = Atom.calcInnerSymbolOffset(self, atom_index, sym_loc.sym_index); + + logger.debug(" (%{d}, '{s}') @ {x} ({x})", .{ + sym_loc.sym_index, + inner_name, + inner.n_value, + offset, + }); + } + + if (Atom.getSectionAlias(self, atom_index)) |sym_loc| { + const alias = self.getSymbol(sym_loc); + const alias_name = self.getSymbolName(sym_loc); + + logger.debug(" (%{d}, '{s}') @ {x} ({x})", .{ + sym_loc.sym_index, + alias_name, + alias.n_value, + 0, + }); + } + } } const MachO = @This(); @@ -4197,6 +5151,7 @@ const Cache = std.Build.Cache; const CodeSignature = @import("MachO/CodeSignature.zig"); const Compilation = @import("../Compilation.zig"); const Dwarf = File.Dwarf; +const DwarfInfo = @import("DwarfInfo.zig"); const Dylib = @import("MachO/Dylib.zig"); const File = link.File; const Object = @import("MachO/Object.zig"); diff --git a/src/link/MachO/Archive.zig b/src/link/MachO/Archive.zig index f3922f6ff9..20a191281e 100644 --- a/src/link/MachO/Archive.zig +++ b/src/link/MachO/Archive.zig @@ -1,15 +1,3 @@ -const Archive = @This(); - -const std = @import("std"); -const assert = std.debug.assert; -const fs = std.fs; -const log = std.log.scoped(.link); -const macho = std.macho; -const mem = std.mem; - -const Allocator = mem.Allocator; -const Object = @import("Object.zig"); - file: fs.File, fat_offset: u64, name: []const u8, @@ -215,3 +203,15 @@ pub fn parseObject(self: Archive, gpa: Allocator, offset: u32) !Object { return object; } + +const Archive = @This(); + +const std = @import("std"); +const assert = std.debug.assert; +const fs = std.fs; +const log = std.log.scoped(.link); +const macho = std.macho; +const mem = std.mem; + +const Allocator = mem.Allocator; +const Object = @import("Object.zig"); diff --git a/src/link/MachO/Atom.zig b/src/link/MachO/Atom.zig index 73099184e0..bde6b09583 100644 --- a/src/link/MachO/Atom.zig +++ b/src/link/MachO/Atom.zig @@ -105,8 +105,7 @@ pub fn freeListEligible(self: Atom, macho_file: *MachO) bool { return surplus >= MachO.min_text_capacity; } -pub fn getOutputSection(zld: *Zld, sect: macho.section_64) !?u8 { - const gpa = zld.gpa; +pub fn getOutputSection(macho_file: *MachO, sect: macho.section_64) !?u8 { const segname = sect.segName(); const sectname = sect.sectName(); const res: ?u8 = blk: { @@ -126,20 +125,14 @@ pub fn getOutputSection(zld: *Zld, sect: macho.section_64) !?u8 { } if (sect.isCode()) { - if (zld.text_section_index == null) { - zld.text_section_index = try MachO.initSection( - gpa, - zld, - "__TEXT", - "__text", - .{ - .flags = macho.S_REGULAR | - macho.S_ATTR_PURE_INSTRUCTIONS | - macho.S_ATTR_SOME_INSTRUCTIONS, - }, - ); + if (macho_file.text_section_index == null) { + macho_file.text_section_index = try macho_file.initSection("__TEXT", "__text", .{ + .flags = macho.S_REGULAR | + macho.S_ATTR_PURE_INSTRUCTIONS | + macho.S_ATTR_SOME_INSTRUCTIONS, + }); } - break :blk zld.text_section_index.?; + break :blk macho_file.text_section_index.?; } if (sect.isDebug()) { @@ -151,42 +144,26 @@ pub fn getOutputSection(zld: *Zld, sect: macho.section_64) !?u8 { macho.S_8BYTE_LITERALS, macho.S_16BYTE_LITERALS, => { - break :blk zld.getSectionByName("__TEXT", "__const") orelse try MachO.initSection( - gpa, - zld, - "__TEXT", - "__const", - .{}, - ); + break :blk macho_file.getSectionByName("__TEXT", "__const") orelse + try macho_file.initSection("__TEXT", "__const", .{}); }, macho.S_CSTRING_LITERALS => { if (mem.startsWith(u8, sectname, "__objc")) { - break :blk zld.getSectionByName(segname, sectname) orelse try MachO.initSection( - gpa, - zld, - segname, - sectname, - .{}, - ); + break :blk macho_file.getSectionByName(segname, sectname) orelse + try macho_file.initSection(segname, sectname, .{}); } - break :blk zld.getSectionByName("__TEXT", "__cstring") orelse try MachO.initSection( - gpa, - zld, - "__TEXT", - "__cstring", - .{ .flags = macho.S_CSTRING_LITERALS }, - ); + break :blk macho_file.getSectionByName("__TEXT", "__cstring") orelse + try macho_file.initSection("__TEXT", "__cstring", .{ + .flags = macho.S_CSTRING_LITERALS, + }); }, macho.S_MOD_INIT_FUNC_POINTERS, macho.S_MOD_TERM_FUNC_POINTERS, => { - break :blk zld.getSectionByName("__DATA_CONST", sectname) orelse try MachO.initSection( - gpa, - zld, - "__DATA_CONST", - sectname, - .{ .flags = sect.flags }, - ); + break :blk macho_file.getSectionByName("__DATA_CONST", sectname) orelse + try macho_file.initSection("__DATA_CONST", sectname, .{ + .flags = sect.flags, + }); }, macho.S_LITERAL_POINTERS, macho.S_ZEROFILL, @@ -195,23 +172,14 @@ pub fn getOutputSection(zld: *Zld, sect: macho.section_64) !?u8 { macho.S_THREAD_LOCAL_REGULAR, macho.S_THREAD_LOCAL_ZEROFILL, => { - break :blk zld.getSectionByName(segname, sectname) orelse try MachO.initSection( - gpa, - zld, - segname, - sectname, - .{ .flags = sect.flags }, - ); + break :blk macho_file.getSectionByName(segname, sectname) orelse + try macho_file.initSection(segname, sectname, .{ + .flags = sect.flags, + }); }, macho.S_COALESCED => { - break :blk zld.getSectionByName(segname, sectname) orelse try MachO.initSection( - gpa, - zld, - - segname, - sectname, - .{}, - ); + break :blk macho_file.getSectionByName(segname, sectname) orelse + try macho_file.initSection(segname, sectname, .{}); }, macho.S_REGULAR => { if (mem.eql(u8, segname, "__TEXT")) { @@ -221,13 +189,8 @@ pub fn getOutputSection(zld: *Zld, sect: macho.section_64) !?u8 { mem.eql(u8, sectname, "__gosymtab") or mem.eql(u8, sectname, "__gopclntab")) { - break :blk zld.getSectionByName("__TEXT", sectname) orelse try MachO.initSection( - gpa, - zld, - "__TEXT", - sectname, - .{}, - ); + break :blk macho_file.getSectionByName("__TEXT", sectname) orelse + try macho_file.initSection("__TEXT", sectname, .{}); } } if (mem.eql(u8, segname, "__DATA")) { @@ -236,33 +199,17 @@ pub fn getOutputSection(zld: *Zld, sect: macho.section_64) !?u8 { mem.eql(u8, sectname, "__objc_classlist") or mem.eql(u8, sectname, "__objc_imageinfo")) { - break :blk zld.getSectionByName("__DATA_CONST", sectname) orelse try MachO.initSection( - gpa, - zld, - "__DATA_CONST", - sectname, - .{}, - ); + break :blk macho_file.getSectionByName("__DATA_CONST", sectname) orelse + try macho_file.initSection("__DATA_CONST", sectname, .{}); } else if (mem.eql(u8, sectname, "__data")) { - if (zld.data_section_index == null) { - zld.data_section_index = try MachO.initSection( - gpa, - zld, - "__DATA", - "__data", - .{}, - ); + if (macho_file.data_section_index == null) { + macho_file.data_section_index = try macho_file.initSection("__DATA", "__data", .{}); } - break :blk zld.data_section_index.?; + break :blk macho_file.data_section_index.?; } } - break :blk zld.getSectionByName(segname, sectname) orelse try MachO.initSection( - gpa, - zld, - segname, - sectname, - .{}, - ); + break :blk macho_file.getSectionByName(segname, sectname) orelse + try macho_file.initSection(segname, sectname, .{}); }, else => break :blk null, } @@ -270,29 +217,29 @@ pub fn getOutputSection(zld: *Zld, sect: macho.section_64) !?u8 { // TODO we can do this directly in the selection logic above. // Or is it not worth it? - if (zld.data_const_section_index == null) { - if (zld.getSectionByName("__DATA_CONST", "__const")) |index| { - zld.data_const_section_index = index; + if (macho_file.data_const_section_index == null) { + if (macho_file.getSectionByName("__DATA_CONST", "__const")) |index| { + macho_file.data_const_section_index = index; } } - if (zld.thread_vars_section_index == null) { - if (zld.getSectionByName("__DATA", "__thread_vars")) |index| { - zld.thread_vars_section_index = index; + if (macho_file.thread_vars_section_index == null) { + if (macho_file.getSectionByName("__DATA", "__thread_vars")) |index| { + macho_file.thread_vars_section_index = index; } } - if (zld.thread_data_section_index == null) { - if (zld.getSectionByName("__DATA", "__thread_data")) |index| { - zld.thread_data_section_index = index; + if (macho_file.thread_data_section_index == null) { + if (macho_file.getSectionByName("__DATA", "__thread_data")) |index| { + macho_file.thread_data_section_index = index; } } - if (zld.thread_bss_section_index == null) { - if (zld.getSectionByName("__DATA", "__thread_bss")) |index| { - zld.thread_bss_section_index = index; + if (macho_file.thread_bss_section_index == null) { + if (macho_file.getSectionByName("__DATA", "__thread_bss")) |index| { + macho_file.thread_bss_section_index = index; } } - if (zld.bss_section_index == null) { - if (zld.getSectionByName("__DATA", "__bss")) |index| { - zld.bss_section_index = index; + if (macho_file.bss_section_index == null) { + if (macho_file.getSectionByName("__DATA", "__bss")) |index| { + macho_file.bss_section_index = index; } } @@ -383,8 +330,8 @@ const InnerSymIterator = struct { /// Returns an iterator over potentially contained symbols. /// Panics when called on a synthetic Atom. -pub fn getInnerSymbolsIterator(zld: *Zld, atom_index: Index) InnerSymIterator { - const atom = zld.getAtom(atom_index); +pub fn getInnerSymbolsIterator(macho_file: *MachO, atom_index: Index) InnerSymIterator { + const atom = macho_file.getAtom(atom_index); assert(atom.getFile() != null); return .{ .sym_index = atom.inner_sym_index, @@ -397,11 +344,11 @@ pub fn getInnerSymbolsIterator(zld: *Zld, atom_index: Index) InnerSymIterator { /// An alias symbol is used to represent the start of an input section /// if there were no symbols defined within that range. /// Alias symbols are only used on x86_64. -pub fn getSectionAlias(zld: *Zld, atom_index: Index) ?SymbolWithLoc { - const atom = zld.getAtom(atom_index); +pub fn getSectionAlias(macho_file: *MachO, atom_index: Index) ?SymbolWithLoc { + const atom = macho_file.getAtom(atom_index); assert(atom.getFile() != null); - const object = zld.objects.items[atom.getFile().?]; + const object = macho_file.objects.items[atom.getFile().?]; const nbase = @as(u32, @intCast(object.in_symtab.?.len)); const ntotal = @as(u32, @intCast(object.symtab.len)); var sym_index: u32 = nbase; @@ -418,13 +365,13 @@ pub fn getSectionAlias(zld: *Zld, atom_index: Index) ?SymbolWithLoc { /// Given an index into a contained symbol within, calculates an offset wrt /// the start of this Atom. -pub fn calcInnerSymbolOffset(zld: *Zld, atom_index: Index, sym_index: u32) u64 { - const atom = zld.getAtom(atom_index); +pub fn calcInnerSymbolOffset(macho_file: *MachO, atom_index: Index, sym_index: u32) u64 { + const atom = macho_file.getAtom(atom_index); assert(atom.getFile() != null); if (atom.sym_index == sym_index) return 0; - const object = zld.objects.items[atom.getFile().?]; + const object = macho_file.objects.items[atom.getFile().?]; const source_sym = object.getSourceSymbol(sym_index).?; const base_addr = if (object.getSourceSymbol(atom.sym_index)) |sym| sym.n_value @@ -437,14 +384,14 @@ pub fn calcInnerSymbolOffset(zld: *Zld, atom_index: Index, sym_index: u32) u64 { return source_sym.n_value - base_addr; } -pub fn scanAtomRelocs(zld: *Zld, atom_index: Index, relocs: []align(1) const macho.relocation_info) !void { - const arch = zld.options.target.cpu.arch; - const atom = zld.getAtom(atom_index); +pub fn scanAtomRelocs(macho_file: *MachO, atom_index: Index, relocs: []align(1) const macho.relocation_info) !void { + const arch = macho_file.base.options.target.cpu.arch; + const atom = macho_file.getAtom(atom_index); assert(atom.getFile() != null); // synthetic atoms do not have relocs return switch (arch) { - .aarch64 => scanAtomRelocsArm64(zld, atom_index, relocs), - .x86_64 => scanAtomRelocsX86(zld, atom_index, relocs), + .aarch64 => scanAtomRelocsArm64(macho_file, atom_index, relocs), + .x86_64 => scanAtomRelocsX86(macho_file, atom_index, relocs), else => unreachable, }; } @@ -454,11 +401,11 @@ const RelocContext = struct { base_offset: i32 = 0, }; -pub fn getRelocContext(zld: *Zld, atom_index: Index) RelocContext { - const atom = zld.getAtom(atom_index); +pub fn getRelocContext(macho_file: *MachO, atom_index: Index) RelocContext { + const atom = macho_file.getAtom(atom_index); assert(atom.getFile() != null); // synthetic atoms do not have relocs - const object = zld.objects.items[atom.getFile().?]; + const object = macho_file.objects.items[atom.getFile().?]; if (object.getSourceSymbol(atom.sym_index)) |source_sym| { const source_sect = object.getSourceSection(source_sym.n_sect - 1); return .{ @@ -475,7 +422,7 @@ pub fn getRelocContext(zld: *Zld, atom_index: Index) RelocContext { }; } -pub fn parseRelocTarget(zld: *Zld, ctx: struct { +pub fn parseRelocTarget(macho_file: *MachO, ctx: struct { object_id: u32, rel: macho.relocation_info, code: []const u8, @@ -485,7 +432,7 @@ pub fn parseRelocTarget(zld: *Zld, ctx: struct { const tracy = trace(@src()); defer tracy.end(); - const object = &zld.objects.items[ctx.object_id]; + const object = &macho_file.objects.items[ctx.object_id]; log.debug("parsing reloc target in object({d}) '{s}' ", .{ ctx.object_id, object.name }); const sym_index = if (ctx.rel.r_extern == 0) sym_index: { @@ -498,7 +445,7 @@ pub fn parseRelocTarget(zld: *Zld, ctx: struct { else mem.readIntLittle(u32, ctx.code[rel_offset..][0..4]); } else blk: { - assert(zld.options.target.cpu.arch == .x86_64); + assert(macho_file.base.options.target.cpu.arch == .x86_64); const correction: u3 = switch (@as(macho.reloc_type_x86_64, @enumFromInt(ctx.rel.r_type))) { .X86_64_RELOC_SIGNED => 0, .X86_64_RELOC_SIGNED_1 => 1, @@ -517,35 +464,39 @@ pub fn parseRelocTarget(zld: *Zld, ctx: struct { } else object.reverse_symtab_lookup[ctx.rel.r_symbolnum]; const sym_loc = SymbolWithLoc{ .sym_index = sym_index, .file = ctx.object_id + 1 }; - const sym = zld.getSymbol(sym_loc); + const sym = macho_file.getSymbol(sym_loc); const target = if (sym.sect() and !sym.ext()) sym_loc else if (object.getGlobal(sym_index)) |global_index| - zld.globals.items[global_index] + macho_file.globals.items[global_index] else sym_loc; log.debug(" | target %{d} ('{s}') in object({?d})", .{ target.sym_index, - zld.getSymbolName(target), + macho_file.getSymbolName(target), target.getFile(), }); return target; } -pub fn getRelocTargetAtomIndex(zld: *Zld, target: SymbolWithLoc) ?Index { +pub fn getRelocTargetAtomIndex(macho_file: *MachO, target: SymbolWithLoc) ?Index { if (target.getFile() == null) { - const target_sym_name = zld.getSymbolName(target); + const target_sym_name = macho_file.getSymbolName(target); if (mem.eql(u8, "__mh_execute_header", target_sym_name)) return null; if (mem.eql(u8, "___dso_handle", target_sym_name)) return null; unreachable; // referenced symbol not found } - const object = zld.objects.items[target.getFile().?]; + const object = macho_file.objects.items[target.getFile().?]; return object.getAtomIndexForSymbol(target.sym_index); } -fn scanAtomRelocsArm64(zld: *Zld, atom_index: Index, relocs: []align(1) const macho.relocation_info) !void { +fn scanAtomRelocsArm64( + macho_file: *MachO, + atom_index: Index, + relocs: []align(1) const macho.relocation_info, +) !void { for (relocs) |rel| { const rel_type = @as(macho.reloc_type_arm64, @enumFromInt(rel.r_type)); @@ -556,8 +507,8 @@ fn scanAtomRelocsArm64(zld: *Zld, atom_index: Index, relocs: []align(1) const ma if (rel.r_extern == 0) continue; - const atom = zld.getAtom(atom_index); - const object = &zld.objects.items[atom.getFile().?]; + const atom = macho_file.getAtom(atom_index); + const object = &macho_file.objects.items[atom.getFile().?]; const sym_index = object.reverse_symtab_lookup[rel.r_symbolnum]; const sym_loc = SymbolWithLoc{ .sym_index = sym_index, @@ -565,35 +516,39 @@ fn scanAtomRelocsArm64(zld: *Zld, atom_index: Index, relocs: []align(1) const ma }; const target = if (object.getGlobal(sym_index)) |global_index| - zld.globals.items[global_index] + macho_file.globals.items[global_index] else sym_loc; switch (rel_type) { .ARM64_RELOC_BRANCH26 => { // TODO rewrite relocation - const sym = zld.getSymbol(target); - if (sym.undf()) try zld.addStubEntry(target); + const sym = macho_file.getSymbol(target); + if (sym.undf()) try macho_file.addStubEntry(target); }, .ARM64_RELOC_GOT_LOAD_PAGE21, .ARM64_RELOC_GOT_LOAD_PAGEOFF12, .ARM64_RELOC_POINTER_TO_GOT, => { // TODO rewrite relocation - try zld.addGotEntry(target); + try macho_file.addGotEntry(target); }, .ARM64_RELOC_TLVP_LOAD_PAGE21, .ARM64_RELOC_TLVP_LOAD_PAGEOFF12, => { - const sym = zld.getSymbol(target); - if (sym.undf()) try zld.addTlvPtrEntry(target); + const sym = macho_file.getSymbol(target); + if (sym.undf()) try macho_file.addTlvPtrEntry(target); }, else => {}, } } } -fn scanAtomRelocsX86(zld: *Zld, atom_index: Index, relocs: []align(1) const macho.relocation_info) !void { +fn scanAtomRelocsX86( + macho_file: *MachO, + atom_index: Index, + relocs: []align(1) const macho.relocation_info, +) !void { for (relocs) |rel| { const rel_type = @as(macho.reloc_type_x86_64, @enumFromInt(rel.r_type)); @@ -604,8 +559,8 @@ fn scanAtomRelocsX86(zld: *Zld, atom_index: Index, relocs: []align(1) const mach if (rel.r_extern == 0) continue; - const atom = zld.getAtom(atom_index); - const object = &zld.objects.items[atom.getFile().?]; + const atom = macho_file.getAtom(atom_index); + const object = &macho_file.objects.items[atom.getFile().?]; const sym_index = object.reverse_symtab_lookup[rel.r_symbolnum]; const sym_loc = SymbolWithLoc{ .sym_index = sym_index, @@ -613,23 +568,23 @@ fn scanAtomRelocsX86(zld: *Zld, atom_index: Index, relocs: []align(1) const mach }; const target = if (object.getGlobal(sym_index)) |global_index| - zld.globals.items[global_index] + macho_file.globals.items[global_index] else sym_loc; switch (rel_type) { .X86_64_RELOC_BRANCH => { // TODO rewrite relocation - const sym = zld.getSymbol(target); - if (sym.undf()) try zld.addStubEntry(target); + const sym = macho_file.getSymbol(target); + if (sym.undf()) try macho_file.addStubEntry(target); }, .X86_64_RELOC_GOT, .X86_64_RELOC_GOT_LOAD => { // TODO rewrite relocation - try zld.addGotEntry(target); + try macho_file.addGotEntry(target); }, .X86_64_RELOC_TLV => { - const sym = zld.getSymbol(target); - if (sym.undf()) try zld.addTlvPtrEntry(target); + const sym = macho_file.getSymbol(target); + if (sym.undf()) try macho_file.addTlvPtrEntry(target); }, else => {}, } @@ -637,53 +592,53 @@ fn scanAtomRelocsX86(zld: *Zld, atom_index: Index, relocs: []align(1) const mach } pub fn resolveRelocs( - zld: *Zld, + macho_file: *MachO, atom_index: Index, atom_code: []u8, atom_relocs: []align(1) const macho.relocation_info, ) !void { - const arch = zld.options.target.cpu.arch; - const atom = zld.getAtom(atom_index); + const arch = macho_file.base.options.target.cpu.arch; + const atom = macho_file.getAtom(atom_index); assert(atom.getFile() != null); // synthetic atoms do not have relocs log.debug("resolving relocations in ATOM(%{d}, '{s}')", .{ atom.sym_index, - zld.getSymbolName(atom.getSymbolWithLoc()), + macho_file.getSymbolName(atom.getSymbolWithLoc()), }); - const ctx = getRelocContext(zld, atom_index); + const ctx = getRelocContext(macho_file, atom_index); return switch (arch) { - .aarch64 => resolveRelocsArm64(zld, atom_index, atom_code, atom_relocs, ctx), - .x86_64 => resolveRelocsX86(zld, atom_index, atom_code, atom_relocs, ctx), + .aarch64 => resolveRelocsArm64(macho_file, atom_index, atom_code, atom_relocs, ctx), + .x86_64 => resolveRelocsX86(macho_file, atom_index, atom_code, atom_relocs, ctx), else => unreachable, }; } -pub fn getRelocTargetAddress(zld: *Zld, target: SymbolWithLoc, is_tlv: bool) !u64 { - const target_atom_index = getRelocTargetAtomIndex(zld, target) orelse { +pub fn getRelocTargetAddress(macho_file: *MachO, target: SymbolWithLoc, is_tlv: bool) !u64 { + const target_atom_index = getRelocTargetAtomIndex(macho_file, target) orelse { // If there is no atom for target, we still need to check for special, atom-less // symbols such as `___dso_handle`. - const target_name = zld.getSymbolName(target); - const atomless_sym = zld.getSymbol(target); + const target_name = macho_file.getSymbolName(target); + const atomless_sym = macho_file.getSymbol(target); log.debug(" | atomless target '{s}'", .{target_name}); return atomless_sym.n_value; }; - const target_atom = zld.getAtom(target_atom_index); + const target_atom = macho_file.getAtom(target_atom_index); log.debug(" | target ATOM(%{d}, '{s}') in object({?})", .{ target_atom.sym_index, - zld.getSymbolName(target_atom.getSymbolWithLoc()), + macho_file.getSymbolName(target_atom.getSymbolWithLoc()), target_atom.getFile(), }); - const target_sym = zld.getSymbol(target_atom.getSymbolWithLoc()); + const target_sym = macho_file.getSymbol(target_atom.getSymbolWithLoc()); assert(target_sym.n_desc != MachO.N_DEAD); // If `target` is contained within the target atom, pull its address value. const offset = if (target_atom.getFile() != null) blk: { - const object = zld.objects.items[target_atom.getFile().?]; + const object = macho_file.objects.items[target_atom.getFile().?]; break :blk if (object.getSourceSymbol(target.sym_index)) |_| - Atom.calcInnerSymbolOffset(zld, target_atom_index, target.sym_index) + Atom.calcInnerSymbolOffset(macho_file, target_atom_index, target.sym_index) else 0; // section alias } else 0; @@ -694,9 +649,9 @@ pub fn getRelocTargetAddress(zld: *Zld, target: SymbolWithLoc, is_tlv: bool) !u6 // * wrt to __thread_data if defined, then // * wrt to __thread_bss const sect_id: u16 = sect_id: { - if (zld.thread_data_section_index) |i| { + if (macho_file.thread_data_section_index) |i| { break :sect_id i; - } else if (zld.thread_bss_section_index) |i| { + } else if (macho_file.thread_bss_section_index) |i| { break :sect_id i; } else { log.err("threadlocal variables present but no initializer sections found", .{}); @@ -705,20 +660,20 @@ pub fn getRelocTargetAddress(zld: *Zld, target: SymbolWithLoc, is_tlv: bool) !u6 return error.FailedToResolveRelocationTarget; } }; - break :base_address zld.sections.items(.header)[sect_id].addr; + break :base_address macho_file.sections.items(.header)[sect_id].addr; } else 0; return target_sym.n_value + offset - base_address; } fn resolveRelocsArm64( - zld: *Zld, + macho_file: *MachO, atom_index: Index, atom_code: []u8, atom_relocs: []align(1) const macho.relocation_info, context: RelocContext, ) !void { - const atom = zld.getAtom(atom_index); - const object = zld.objects.items[atom.getFile().?]; + const atom = macho_file.getAtom(atom_index); + const object = macho_file.objects.items[atom.getFile().?]; var addend: ?i64 = null; var subtractor: ?SymbolWithLoc = null; @@ -745,7 +700,7 @@ fn resolveRelocsArm64( atom.getFile(), }); - subtractor = parseRelocTarget(zld, .{ + subtractor = parseRelocTarget(macho_file, .{ .object_id = atom.getFile().?, .rel = rel, .code = atom_code, @@ -757,7 +712,7 @@ fn resolveRelocsArm64( else => {}, } - const target = parseRelocTarget(zld, .{ + const target = parseRelocTarget(macho_file, .{ .object_id = atom.getFile().?, .rel = rel, .code = atom_code, @@ -770,26 +725,26 @@ fn resolveRelocsArm64( @tagName(rel_type), rel.r_address, target.sym_index, - zld.getSymbolName(target), + macho_file.getSymbolName(target), target.getFile(), }); const source_addr = blk: { - const source_sym = zld.getSymbol(atom.getSymbolWithLoc()); + const source_sym = macho_file.getSymbol(atom.getSymbolWithLoc()); break :blk source_sym.n_value + rel_offset; }; const target_addr = blk: { - if (relocRequiresGot(zld, rel)) break :blk zld.getGotEntryAddress(target).?; - if (relocIsTlv(zld, rel) and zld.getSymbol(target).undf()) - break :blk zld.getTlvPtrEntryAddress(target).?; - if (relocIsStub(zld, rel) and zld.getSymbol(target).undf()) - break :blk zld.getStubsEntryAddress(target).?; + if (relocRequiresGot(macho_file, rel)) break :blk macho_file.getGotEntryAddress(target).?; + if (relocIsTlv(macho_file, rel) and macho_file.getSymbol(target).undf()) + break :blk macho_file.getTlvPtrEntryAddress(target).?; + if (relocIsStub(macho_file, rel) and macho_file.getSymbol(target).undf()) + break :blk macho_file.getStubsEntryAddress(target).?; const is_tlv = is_tlv: { - const source_sym = zld.getSymbol(atom.getSymbolWithLoc()); - const header = zld.sections.items(.header)[source_sym.n_sect - 1]; + const source_sym = macho_file.getSymbol(atom.getSymbolWithLoc()); + const header = macho_file.sections.items(.header)[source_sym.n_sect - 1]; break :is_tlv header.type() == macho.S_THREAD_LOCAL_VARIABLES; }; - break :blk try getRelocTargetAddress(zld, target, is_tlv); + break :blk try getRelocTargetAddress(macho_file, target, is_tlv); }; log.debug(" | source_addr = 0x{x}", .{source_addr}); @@ -797,9 +752,9 @@ fn resolveRelocsArm64( switch (rel_type) { .ARM64_RELOC_BRANCH26 => { log.debug(" source {s} (object({?})), target {s}", .{ - zld.getSymbolName(atom.getSymbolWithLoc()), + macho_file.getSymbolName(atom.getSymbolWithLoc()), atom.getFile(), - zld.getSymbolName(target), + macho_file.getSymbolName(target), }); const displacement = if (Relocation.calcPcRelativeDisplacementArm64( @@ -809,13 +764,13 @@ fn resolveRelocsArm64( log.debug(" | target_addr = 0x{x}", .{target_addr}); break :blk disp; } else |_| blk: { - const thunk_index = zld.thunk_table.get(atom_index).?; - const thunk = zld.thunks.items[thunk_index]; - const thunk_sym_loc = if (zld.getSymbol(target).undf()) - thunk.getTrampoline(zld, .stub, target).? + const thunk_index = macho_file.thunk_table.get(atom_index).?; + const thunk = macho_file.thunks.items[thunk_index]; + const thunk_sym_loc = if (macho_file.getSymbol(target).undf()) + thunk.getTrampoline(macho_file, .stub, target).? else - thunk.getTrampoline(zld, .atom, target).?; - const thunk_addr = zld.getSymbol(thunk_sym_loc).n_value; + thunk.getTrampoline(macho_file, .atom, target).?; + const thunk_addr = macho_file.getSymbol(thunk_sym_loc).n_value; log.debug(" | target_addr = 0x{x} (thunk)", .{thunk_addr}); break :blk try Relocation.calcPcRelativeDisplacementArm64(source_addr, thunk_addr); }; @@ -944,7 +899,7 @@ fn resolveRelocsArm64( } }; - var inst = if (zld.tlv_ptr_table.lookup.contains(target)) aarch64.Instruction{ + var inst = if (macho_file.tlv_ptr_table.lookup.contains(target)) aarch64.Instruction{ .load_store_register = .{ .rt = reg_info.rd, .rn = reg_info.rn, @@ -992,7 +947,7 @@ fn resolveRelocsArm64( const result = blk: { if (subtractor) |sub| { - const sym = zld.getSymbol(sub); + const sym = macho_file.getSymbol(sub); break :blk @as(i64, @intCast(target_addr)) - @as(i64, @intCast(sym.n_value)) + ptr_addend; } else { break :blk @as(i64, @intCast(target_addr)) + ptr_addend; @@ -1016,14 +971,14 @@ fn resolveRelocsArm64( } fn resolveRelocsX86( - zld: *Zld, + macho_file: *MachO, atom_index: Index, atom_code: []u8, atom_relocs: []align(1) const macho.relocation_info, context: RelocContext, ) !void { - const atom = zld.getAtom(atom_index); - const object = zld.objects.items[atom.getFile().?]; + const atom = macho_file.getAtom(atom_index); + const object = macho_file.objects.items[atom.getFile().?]; var subtractor: ?SymbolWithLoc = null; @@ -1041,7 +996,7 @@ fn resolveRelocsX86( atom.getFile(), }); - subtractor = parseRelocTarget(zld, .{ + subtractor = parseRelocTarget(macho_file, .{ .object_id = atom.getFile().?, .rel = rel, .code = atom_code, @@ -1053,7 +1008,7 @@ fn resolveRelocsX86( else => {}, } - const target = parseRelocTarget(zld, .{ + const target = parseRelocTarget(macho_file, .{ .object_id = atom.getFile().?, .rel = rel, .code = atom_code, @@ -1066,26 +1021,26 @@ fn resolveRelocsX86( @tagName(rel_type), rel.r_address, target.sym_index, - zld.getSymbolName(target), + macho_file.getSymbolName(target), target.getFile(), }); const source_addr = blk: { - const source_sym = zld.getSymbol(atom.getSymbolWithLoc()); + const source_sym = macho_file.getSymbol(atom.getSymbolWithLoc()); break :blk source_sym.n_value + rel_offset; }; const target_addr = blk: { - if (relocRequiresGot(zld, rel)) break :blk zld.getGotEntryAddress(target).?; - if (relocIsStub(zld, rel) and zld.getSymbol(target).undf()) - break :blk zld.getStubsEntryAddress(target).?; - if (relocIsTlv(zld, rel) and zld.getSymbol(target).undf()) - break :blk zld.getTlvPtrEntryAddress(target).?; + if (relocRequiresGot(macho_file, rel)) break :blk macho_file.getGotEntryAddress(target).?; + if (relocIsStub(macho_file, rel) and macho_file.getSymbol(target).undf()) + break :blk macho_file.getStubsEntryAddress(target).?; + if (relocIsTlv(macho_file, rel) and macho_file.getSymbol(target).undf()) + break :blk macho_file.getTlvPtrEntryAddress(target).?; const is_tlv = is_tlv: { - const source_sym = zld.getSymbol(atom.getSymbolWithLoc()); - const header = zld.sections.items(.header)[source_sym.n_sect - 1]; + const source_sym = macho_file.getSymbol(atom.getSymbolWithLoc()); + const header = macho_file.sections.items(.header)[source_sym.n_sect - 1]; break :is_tlv header.type() == macho.S_THREAD_LOCAL_VARIABLES; }; - break :blk try getRelocTargetAddress(zld, target, is_tlv); + break :blk try getRelocTargetAddress(macho_file, target, is_tlv); }; log.debug(" | source_addr = 0x{x}", .{source_addr}); @@ -1115,7 +1070,7 @@ fn resolveRelocsX86( log.debug(" | target_addr = 0x{x}", .{adjusted_target_addr}); const disp = try Relocation.calcPcRelativeDisplacementX86(source_addr, adjusted_target_addr, 0); - if (zld.tlv_ptr_table.lookup.get(target) == null) { + if (macho_file.tlv_ptr_table.lookup.get(target) == null) { // We need to rewrite the opcode from movq to leaq. atom_code[rel_offset - 2] = 0x8d; } @@ -1170,7 +1125,7 @@ fn resolveRelocsX86( const result = blk: { if (subtractor) |sub| { - const sym = zld.getSymbol(sub); + const sym = macho_file.getSymbol(sub); break :blk @as(i64, @intCast(target_addr)) - @as(i64, @intCast(sym.n_value)) + addend; } else { break :blk @as(i64, @intCast(target_addr)) + addend; @@ -1192,10 +1147,10 @@ fn resolveRelocsX86( } } -pub fn getAtomCode(zld: *Zld, atom_index: Index) []const u8 { - const atom = zld.getAtom(atom_index); +pub fn getAtomCode(macho_file: *MachO, atom_index: Index) []const u8 { + const atom = macho_file.getAtom(atom_index); assert(atom.getFile() != null); // Synthetic atom shouldn't need to inquire for code. - const object = zld.objects.items[atom.getFile().?]; + const object = macho_file.objects.items[atom.getFile().?]; const source_sym = object.getSourceSymbol(atom.sym_index) orelse { // If there was no matching symbol present in the source symtab, this means // we are dealing with either an entire section, or part of it, but also @@ -1216,10 +1171,10 @@ pub fn getAtomCode(zld: *Zld, atom_index: Index) []const u8 { return code[offset..][0..code_len]; } -pub fn getAtomRelocs(zld: *Zld, atom_index: Index) []const macho.relocation_info { - const atom = zld.getAtom(atom_index); +pub fn getAtomRelocs(macho_file: *MachO, atom_index: Index) []const macho.relocation_info { + const atom = macho_file.getAtom(atom_index); assert(atom.getFile() != null); // Synthetic atom shouldn't need to unique for relocs. - const object = zld.objects.items[atom.getFile().?]; + const object = macho_file.objects.items[atom.getFile().?]; const cache = object.relocs_lookup[atom.sym_index]; const source_sect_id = if (object.getSourceSymbol(atom.sym_index)) |source_sym| blk: { @@ -1238,8 +1193,8 @@ pub fn getAtomRelocs(zld: *Zld, atom_index: Index) []const macho.relocation_info return relocs[cache.start..][0..cache.len]; } -pub fn relocRequiresGot(zld: *Zld, rel: macho.relocation_info) bool { - switch (zld.options.target.cpu.arch) { +pub fn relocRequiresGot(macho_file: *MachO, rel: macho.relocation_info) bool { + switch (macho_file.base.options.target.cpu.arch) { .aarch64 => switch (@as(macho.reloc_type_arm64, @enumFromInt(rel.r_type))) { .ARM64_RELOC_GOT_LOAD_PAGE21, .ARM64_RELOC_GOT_LOAD_PAGEOFF12, @@ -1257,8 +1212,8 @@ pub fn relocRequiresGot(zld: *Zld, rel: macho.relocation_info) bool { } } -pub fn relocIsTlv(zld: *Zld, rel: macho.relocation_info) bool { - switch (zld.options.target.cpu.arch) { +pub fn relocIsTlv(macho_file: *MachO, rel: macho.relocation_info) bool { + switch (macho_file.base.options.target.cpu.arch) { .aarch64 => switch (@as(macho.reloc_type_arm64, @enumFromInt(rel.r_type))) { .ARM64_RELOC_TLVP_LOAD_PAGE21, .ARM64_RELOC_TLVP_LOAD_PAGEOFF12, @@ -1273,8 +1228,8 @@ pub fn relocIsTlv(zld: *Zld, rel: macho.relocation_info) bool { } } -pub fn relocIsStub(zld: *Zld, rel: macho.relocation_info) bool { - switch (zld.options.target.cpu.arch) { +pub fn relocIsStub(macho_file: *MachO, rel: macho.relocation_info) bool { + switch (macho_file.base.options.target.cpu.arch) { .aarch64 => switch (@as(macho.reloc_type_arm64, @enumFromInt(rel.r_type))) { .ARM64_RELOC_BRANCH26 => return true, else => return false, @@ -1305,4 +1260,3 @@ const Arch = std.Target.Cpu.Arch; const MachO = @import("../MachO.zig"); pub const Relocation = @import("Relocation.zig"); const SymbolWithLoc = MachO.SymbolWithLoc; -const Zld = @import("zld.zig").Zld; diff --git a/src/link/MachO/CodeSignature.zig b/src/link/MachO/CodeSignature.zig index f527ca3581..973d9a2591 100644 --- a/src/link/MachO/CodeSignature.zig +++ b/src/link/MachO/CodeSignature.zig @@ -1,17 +1,175 @@ -const CodeSignature = @This(); +page_size: u16, +code_directory: CodeDirectory, +requirements: ?Requirements = null, +entitlements: ?Entitlements = null, +signature: ?Signature = null, -const std = @import("std"); -const assert = std.debug.assert; -const fs = std.fs; -const log = std.log.scoped(.link); -const macho = std.macho; -const mem = std.mem; -const testing = std.testing; +pub fn init(page_size: u16) CodeSignature { + return .{ + .page_size = page_size, + .code_directory = CodeDirectory.init(page_size), + }; +} -const Allocator = mem.Allocator; -const Compilation = @import("../../Compilation.zig"); -const Hasher = @import("hasher.zig").ParallelHasher; -const Sha256 = std.crypto.hash.sha2.Sha256; +pub fn deinit(self: *CodeSignature, allocator: Allocator) void { + self.code_directory.deinit(allocator); + if (self.requirements) |*req| { + req.deinit(allocator); + } + if (self.entitlements) |*ents| { + ents.deinit(allocator); + } + if (self.signature) |*sig| { + sig.deinit(allocator); + } +} + +pub fn addEntitlements(self: *CodeSignature, allocator: Allocator, path: []const u8) !void { + const file = try fs.cwd().openFile(path, .{}); + defer file.close(); + const inner = try file.readToEndAlloc(allocator, std.math.maxInt(u32)); + self.entitlements = .{ .inner = inner }; +} + +pub const WriteOpts = struct { + file: fs.File, + exec_seg_base: u64, + exec_seg_limit: u64, + file_size: u32, + output_mode: std.builtin.OutputMode, +}; + +pub fn writeAdhocSignature( + self: *CodeSignature, + comp: *const Compilation, + opts: WriteOpts, + writer: anytype, +) !void { + const gpa = comp.gpa; + + var header: macho.SuperBlob = .{ + .magic = macho.CSMAGIC_EMBEDDED_SIGNATURE, + .length = @sizeOf(macho.SuperBlob), + .count = 0, + }; + + var blobs = std.ArrayList(Blob).init(gpa); + defer blobs.deinit(); + + self.code_directory.inner.execSegBase = opts.exec_seg_base; + self.code_directory.inner.execSegLimit = opts.exec_seg_limit; + self.code_directory.inner.execSegFlags = if (opts.output_mode == .Exe) macho.CS_EXECSEG_MAIN_BINARY else 0; + self.code_directory.inner.codeLimit = opts.file_size; + + const total_pages = @as(u32, @intCast(mem.alignForward(usize, opts.file_size, self.page_size) / self.page_size)); + + try self.code_directory.code_slots.ensureTotalCapacityPrecise(gpa, total_pages); + self.code_directory.code_slots.items.len = total_pages; + self.code_directory.inner.nCodeSlots = total_pages; + + // Calculate hash for each page (in file) and write it to the buffer + var hasher = Hasher(Sha256){ .allocator = gpa, .thread_pool = comp.thread_pool }; + try hasher.hash(opts.file, self.code_directory.code_slots.items, .{ + .chunk_size = self.page_size, + .max_file_size = opts.file_size, + }); + + try blobs.append(.{ .code_directory = &self.code_directory }); + header.length += @sizeOf(macho.BlobIndex); + header.count += 1; + + var hash: [hash_size]u8 = undefined; + + if (self.requirements) |*req| { + var buf = std.ArrayList(u8).init(gpa); + defer buf.deinit(); + try req.write(buf.writer()); + Sha256.hash(buf.items, &hash, .{}); + self.code_directory.addSpecialHash(req.slotType(), hash); + + try blobs.append(.{ .requirements = req }); + header.count += 1; + header.length += @sizeOf(macho.BlobIndex) + req.size(); + } + + if (self.entitlements) |*ents| { + var buf = std.ArrayList(u8).init(gpa); + defer buf.deinit(); + try ents.write(buf.writer()); + Sha256.hash(buf.items, &hash, .{}); + self.code_directory.addSpecialHash(ents.slotType(), hash); + + try blobs.append(.{ .entitlements = ents }); + header.count += 1; + header.length += @sizeOf(macho.BlobIndex) + ents.size(); + } + + if (self.signature) |*sig| { + try blobs.append(.{ .signature = sig }); + header.count += 1; + header.length += @sizeOf(macho.BlobIndex) + sig.size(); + } + + self.code_directory.inner.hashOffset = + @sizeOf(macho.CodeDirectory) + @as(u32, @intCast(self.code_directory.ident.len + 1 + self.code_directory.inner.nSpecialSlots * hash_size)); + self.code_directory.inner.length = self.code_directory.size(); + header.length += self.code_directory.size(); + + try writer.writeIntBig(u32, header.magic); + try writer.writeIntBig(u32, header.length); + try writer.writeIntBig(u32, header.count); + + var offset: u32 = @sizeOf(macho.SuperBlob) + @sizeOf(macho.BlobIndex) * @as(u32, @intCast(blobs.items.len)); + for (blobs.items) |blob| { + try writer.writeIntBig(u32, blob.slotType()); + try writer.writeIntBig(u32, offset); + offset += blob.size(); + } + + for (blobs.items) |blob| { + try blob.write(writer); + } +} + +pub fn size(self: CodeSignature) u32 { + var ssize: u32 = @sizeOf(macho.SuperBlob) + @sizeOf(macho.BlobIndex) + self.code_directory.size(); + if (self.requirements) |req| { + ssize += @sizeOf(macho.BlobIndex) + req.size(); + } + if (self.entitlements) |ent| { + ssize += @sizeOf(macho.BlobIndex) + ent.size(); + } + if (self.signature) |sig| { + ssize += @sizeOf(macho.BlobIndex) + sig.size(); + } + return ssize; +} + +pub fn estimateSize(self: CodeSignature, file_size: u64) u32 { + var ssize: u64 = @sizeOf(macho.SuperBlob) + @sizeOf(macho.BlobIndex) + self.code_directory.size(); + // Approx code slots + const total_pages = mem.alignForward(u64, file_size, self.page_size) / self.page_size; + ssize += total_pages * hash_size; + var n_special_slots: u32 = 0; + if (self.requirements) |req| { + ssize += @sizeOf(macho.BlobIndex) + req.size(); + n_special_slots = @max(n_special_slots, req.slotType()); + } + if (self.entitlements) |ent| { + ssize += @sizeOf(macho.BlobIndex) + ent.size() + hash_size; + n_special_slots = @max(n_special_slots, ent.slotType()); + } + if (self.signature) |sig| { + ssize += @sizeOf(macho.BlobIndex) + sig.size(); + } + ssize += n_special_slots * hash_size; + return @as(u32, @intCast(mem.alignForward(u64, ssize, @sizeOf(u64)))); +} + +pub fn clear(self: *CodeSignature, allocator: Allocator) void { + self.code_directory.deinit(allocator); + self.code_directory = CodeDirectory.init(self.page_size); +} const hash_size = Sha256.digest_length; @@ -218,175 +376,17 @@ const Signature = struct { } }; -page_size: u16, -code_directory: CodeDirectory, -requirements: ?Requirements = null, -entitlements: ?Entitlements = null, -signature: ?Signature = null, +const CodeSignature = @This(); -pub fn init(page_size: u16) CodeSignature { - return .{ - .page_size = page_size, - .code_directory = CodeDirectory.init(page_size), - }; -} +const std = @import("std"); +const assert = std.debug.assert; +const fs = std.fs; +const log = std.log.scoped(.link); +const macho = std.macho; +const mem = std.mem; +const testing = std.testing; -pub fn deinit(self: *CodeSignature, allocator: Allocator) void { - self.code_directory.deinit(allocator); - if (self.requirements) |*req| { - req.deinit(allocator); - } - if (self.entitlements) |*ents| { - ents.deinit(allocator); - } - if (self.signature) |*sig| { - sig.deinit(allocator); - } -} - -pub fn addEntitlements(self: *CodeSignature, allocator: Allocator, path: []const u8) !void { - const file = try fs.cwd().openFile(path, .{}); - defer file.close(); - const inner = try file.readToEndAlloc(allocator, std.math.maxInt(u32)); - self.entitlements = .{ .inner = inner }; -} - -pub const WriteOpts = struct { - file: fs.File, - exec_seg_base: u64, - exec_seg_limit: u64, - file_size: u32, - output_mode: std.builtin.OutputMode, -}; - -pub fn writeAdhocSignature( - self: *CodeSignature, - comp: *const Compilation, - opts: WriteOpts, - writer: anytype, -) !void { - const gpa = comp.gpa; - - var header: macho.SuperBlob = .{ - .magic = macho.CSMAGIC_EMBEDDED_SIGNATURE, - .length = @sizeOf(macho.SuperBlob), - .count = 0, - }; - - var blobs = std.ArrayList(Blob).init(gpa); - defer blobs.deinit(); - - self.code_directory.inner.execSegBase = opts.exec_seg_base; - self.code_directory.inner.execSegLimit = opts.exec_seg_limit; - self.code_directory.inner.execSegFlags = if (opts.output_mode == .Exe) macho.CS_EXECSEG_MAIN_BINARY else 0; - self.code_directory.inner.codeLimit = opts.file_size; - - const total_pages = @as(u32, @intCast(mem.alignForward(usize, opts.file_size, self.page_size) / self.page_size)); - - try self.code_directory.code_slots.ensureTotalCapacityPrecise(gpa, total_pages); - self.code_directory.code_slots.items.len = total_pages; - self.code_directory.inner.nCodeSlots = total_pages; - - // Calculate hash for each page (in file) and write it to the buffer - var hasher = Hasher(Sha256){ .allocator = gpa, .thread_pool = comp.thread_pool }; - try hasher.hash(opts.file, self.code_directory.code_slots.items, .{ - .chunk_size = self.page_size, - .max_file_size = opts.file_size, - }); - - try blobs.append(.{ .code_directory = &self.code_directory }); - header.length += @sizeOf(macho.BlobIndex); - header.count += 1; - - var hash: [hash_size]u8 = undefined; - - if (self.requirements) |*req| { - var buf = std.ArrayList(u8).init(gpa); - defer buf.deinit(); - try req.write(buf.writer()); - Sha256.hash(buf.items, &hash, .{}); - self.code_directory.addSpecialHash(req.slotType(), hash); - - try blobs.append(.{ .requirements = req }); - header.count += 1; - header.length += @sizeOf(macho.BlobIndex) + req.size(); - } - - if (self.entitlements) |*ents| { - var buf = std.ArrayList(u8).init(gpa); - defer buf.deinit(); - try ents.write(buf.writer()); - Sha256.hash(buf.items, &hash, .{}); - self.code_directory.addSpecialHash(ents.slotType(), hash); - - try blobs.append(.{ .entitlements = ents }); - header.count += 1; - header.length += @sizeOf(macho.BlobIndex) + ents.size(); - } - - if (self.signature) |*sig| { - try blobs.append(.{ .signature = sig }); - header.count += 1; - header.length += @sizeOf(macho.BlobIndex) + sig.size(); - } - - self.code_directory.inner.hashOffset = - @sizeOf(macho.CodeDirectory) + @as(u32, @intCast(self.code_directory.ident.len + 1 + self.code_directory.inner.nSpecialSlots * hash_size)); - self.code_directory.inner.length = self.code_directory.size(); - header.length += self.code_directory.size(); - - try writer.writeIntBig(u32, header.magic); - try writer.writeIntBig(u32, header.length); - try writer.writeIntBig(u32, header.count); - - var offset: u32 = @sizeOf(macho.SuperBlob) + @sizeOf(macho.BlobIndex) * @as(u32, @intCast(blobs.items.len)); - for (blobs.items) |blob| { - try writer.writeIntBig(u32, blob.slotType()); - try writer.writeIntBig(u32, offset); - offset += blob.size(); - } - - for (blobs.items) |blob| { - try blob.write(writer); - } -} - -pub fn size(self: CodeSignature) u32 { - var ssize: u32 = @sizeOf(macho.SuperBlob) + @sizeOf(macho.BlobIndex) + self.code_directory.size(); - if (self.requirements) |req| { - ssize += @sizeOf(macho.BlobIndex) + req.size(); - } - if (self.entitlements) |ent| { - ssize += @sizeOf(macho.BlobIndex) + ent.size(); - } - if (self.signature) |sig| { - ssize += @sizeOf(macho.BlobIndex) + sig.size(); - } - return ssize; -} - -pub fn estimateSize(self: CodeSignature, file_size: u64) u32 { - var ssize: u64 = @sizeOf(macho.SuperBlob) + @sizeOf(macho.BlobIndex) + self.code_directory.size(); - // Approx code slots - const total_pages = mem.alignForward(u64, file_size, self.page_size) / self.page_size; - ssize += total_pages * hash_size; - var n_special_slots: u32 = 0; - if (self.requirements) |req| { - ssize += @sizeOf(macho.BlobIndex) + req.size(); - n_special_slots = @max(n_special_slots, req.slotType()); - } - if (self.entitlements) |ent| { - ssize += @sizeOf(macho.BlobIndex) + ent.size() + hash_size; - n_special_slots = @max(n_special_slots, ent.slotType()); - } - if (self.signature) |sig| { - ssize += @sizeOf(macho.BlobIndex) + sig.size(); - } - ssize += n_special_slots * hash_size; - return @as(u32, @intCast(mem.alignForward(u64, ssize, @sizeOf(u64)))); -} - -pub fn clear(self: *CodeSignature, allocator: Allocator) void { - self.code_directory.deinit(allocator); - self.code_directory = CodeDirectory.init(self.page_size); -} +const Allocator = mem.Allocator; +const Compilation = @import("../../Compilation.zig"); +const Hasher = @import("hasher.zig").ParallelHasher; +const Sha256 = std.crypto.hash.sha2.Sha256; diff --git a/src/link/MachO/DebugSymbols.zig b/src/link/MachO/DebugSymbols.zig index 602ee1ed63..d20f32c14c 100644 --- a/src/link/MachO/DebugSymbols.zig +++ b/src/link/MachO/DebugSymbols.zig @@ -1,26 +1,3 @@ -const DebugSymbols = @This(); - -const std = @import("std"); -const build_options = @import("build_options"); -const assert = std.debug.assert; -const fs = std.fs; -const link = @import("../../link.zig"); -const load_commands = @import("load_commands.zig"); -const log = std.log.scoped(.dsym); -const macho = std.macho; -const makeStaticString = MachO.makeStaticString; -const math = std.math; -const mem = std.mem; -const padToIdeal = MachO.padToIdeal; -const trace = @import("../../tracy.zig").trace; - -const Allocator = mem.Allocator; -const Dwarf = @import("../Dwarf.zig"); -const MachO = @import("../MachO.zig"); -const Module = @import("../../Module.zig"); -const StringTable = @import("../strtab.zig").StringTable; -const Type = @import("../../type.zig").Type; - allocator: Allocator, dwarf: Dwarf, file: fs.File, @@ -569,3 +546,26 @@ pub fn getSection(self: DebugSymbols, sect: u8) macho.section_64 { assert(sect < self.sections.items.len); return self.sections.items[sect]; } + +const DebugSymbols = @This(); + +const std = @import("std"); +const build_options = @import("build_options"); +const assert = std.debug.assert; +const fs = std.fs; +const link = @import("../../link.zig"); +const load_commands = @import("load_commands.zig"); +const log = std.log.scoped(.dsym); +const macho = std.macho; +const makeStaticString = MachO.makeStaticString; +const math = std.math; +const mem = std.mem; +const padToIdeal = MachO.padToIdeal; +const trace = @import("../../tracy.zig").trace; + +const Allocator = mem.Allocator; +const Dwarf = @import("../Dwarf.zig"); +const MachO = @import("../MachO.zig"); +const Module = @import("../../Module.zig"); +const StringTable = @import("../strtab.zig").StringTable; +const Type = @import("../../type.zig").Type; diff --git a/src/link/MachO/DwarfInfo.zig b/src/link/MachO/DwarfInfo.zig index 07d98e8e94..7b0536f60f 100644 --- a/src/link/MachO/DwarfInfo.zig +++ b/src/link/MachO/DwarfInfo.zig @@ -1,17 +1,3 @@ -const DwarfInfo = @This(); - -const std = @import("std"); -const assert = std.debug.assert; -const dwarf = std.dwarf; -const leb = std.leb; -const log = std.log.scoped(.macho); -const math = std.math; -const mem = std.mem; - -const Allocator = mem.Allocator; -pub const AbbrevLookupTable = std.AutoHashMap(u64, struct { pos: usize, len: usize }); -pub const SubprogramLookupByName = std.StringHashMap(struct { addr: u64, size: u64 }); - debug_info: []const u8, debug_abbrev: []const u8, debug_str: []const u8, @@ -501,3 +487,17 @@ fn getString(self: DwarfInfo, off: u64) []const u8 { assert(off < self.debug_str.len); return mem.sliceTo(@as([*:0]const u8, @ptrCast(self.debug_str.ptr + @as(usize, @intCast(off)))), 0); } + +const DwarfInfo = @This(); + +const std = @import("std"); +const assert = std.debug.assert; +const dwarf = std.dwarf; +const leb = std.leb; +const log = std.log.scoped(.macho); +const math = std.math; +const mem = std.mem; + +const Allocator = mem.Allocator; +pub const AbbrevLookupTable = std.AutoHashMap(u64, struct { pos: usize, len: usize }); +pub const SubprogramLookupByName = std.StringHashMap(struct { addr: u64, size: u64 }); diff --git a/src/link/MachO/Dylib.zig b/src/link/MachO/Dylib.zig index c424343a4e..19a9eb8cd4 100644 --- a/src/link/MachO/Dylib.zig +++ b/src/link/MachO/Dylib.zig @@ -1,23 +1,3 @@ -const Dylib = @This(); - -const std = @import("std"); -const assert = std.debug.assert; -const fs = std.fs; -const fmt = std.fmt; -const log = std.log.scoped(.link); -const macho = std.macho; -const math = std.math; -const mem = std.mem; -const fat = @import("fat.zig"); -const tapi = @import("../tapi.zig"); - -const Allocator = mem.Allocator; -const CrossTarget = std.zig.CrossTarget; -const LibStub = tapi.LibStub; -const LoadCommandIterator = macho.LoadCommandIterator; -const MachO = @import("../MachO.zig"); -const Tbd = tapi.Tbd; - id: ?Id = null, weak: bool = false, /// Header is only set if Dylib is parsed directly from a binary and not a stub file. @@ -546,3 +526,23 @@ pub fn parseFromStub( } } } + +const Dylib = @This(); + +const std = @import("std"); +const assert = std.debug.assert; +const fs = std.fs; +const fmt = std.fmt; +const log = std.log.scoped(.link); +const macho = std.macho; +const math = std.math; +const mem = std.mem; +const fat = @import("fat.zig"); +const tapi = @import("../tapi.zig"); + +const Allocator = mem.Allocator; +const CrossTarget = std.zig.CrossTarget; +const LibStub = tapi.LibStub; +const LoadCommandIterator = macho.LoadCommandIterator; +const MachO = @import("../MachO.zig"); +const Tbd = tapi.Tbd; diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index 3ab62ec191..4af0c3e7aa 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -2,31 +2,6 @@ //! Each Object is fully loaded into memory for easier //! access into different data within. -const Object = @This(); - -const std = @import("std"); -const build_options = @import("build_options"); -const assert = std.debug.assert; -const dwarf = std.dwarf; -const eh_frame = @import("eh_frame.zig"); -const fs = std.fs; -const io = std.io; -const log = std.log.scoped(.link); -const macho = std.macho; -const math = std.math; -const mem = std.mem; -const sort = std.sort; -const trace = @import("../../tracy.zig").trace; - -const Allocator = mem.Allocator; -const Atom = @import("Atom.zig"); -const DwarfInfo = @import("DwarfInfo.zig"); -const LoadCommandIterator = macho.LoadCommandIterator; -const MachO = @import("../MachO.zig"); -const SymbolWithLoc = MachO.SymbolWithLoc; -const UnwindInfo = @import("UnwindInfo.zig"); -const Zld = @import("zld.zig").Zld; - name: []const u8, mtime: u64, contents: []align(@alignOf(u64)) const u8, @@ -359,25 +334,25 @@ fn sectionLessThanByAddress(ctx: void, lhs: SortedSection, rhs: SortedSection) b return lhs.header.addr < rhs.header.addr; } -pub fn splitIntoAtoms(self: *Object, zld: *Zld, object_id: u32) !void { +pub fn splitIntoAtoms(self: *Object, macho_file: *MachO, object_id: u32) !void { log.debug("splitting object({d}, {s}) into atoms", .{ object_id, self.name }); - try self.splitRegularSections(zld, object_id); - try self.parseEhFrameSection(zld, object_id); - try self.parseUnwindInfo(zld, object_id); - try self.parseDataInCode(zld.gpa); + try self.splitRegularSections(macho_file, object_id); + try self.parseEhFrameSection(macho_file, object_id); + try self.parseUnwindInfo(macho_file, object_id); + try self.parseDataInCode(macho_file.base.allocator); } /// Splits input regular sections into Atoms. /// If the Object was compiled with `MH_SUBSECTIONS_VIA_SYMBOLS`, splits section /// into subsections where each subsection then represents an Atom. -pub fn splitRegularSections(self: *Object, zld: *Zld, object_id: u32) !void { - const gpa = zld.gpa; +pub fn splitRegularSections(self: *Object, macho_file: *MachO, object_id: u32) !void { + const gpa = macho_file.base.allocator; const sections = self.getSourceSections(); for (sections, 0..) |sect, id| { if (sect.isDebug()) continue; - const out_sect_id = (try Atom.getOutputSection(zld, sect)) orelse { + const out_sect_id = (try Atom.getOutputSection(macho_file, sect)) orelse { log.debug(" unhandled section '{s},{s}'", .{ sect.segName(), sect.sectName() }); continue; }; @@ -397,13 +372,13 @@ pub fn splitRegularSections(self: *Object, zld: *Zld, object_id: u32) !void { if (self.in_symtab == null) { for (sections, 0..) |sect, id| { if (sect.isDebug()) continue; - const out_sect_id = (try Atom.getOutputSection(zld, sect)) orelse continue; + const out_sect_id = (try Atom.getOutputSection(macho_file, sect)) orelse continue; if (sect.size == 0) continue; const sect_id = @as(u8, @intCast(id)); const sym_index = self.getSectionAliasSymbolIndex(sect_id); const atom_index = try self.createAtomFromSubsection( - zld, + macho_file, object_id, sym_index, sym_index, @@ -412,7 +387,7 @@ pub fn splitRegularSections(self: *Object, zld: *Zld, object_id: u32) !void { sect.@"align", out_sect_id, ); - zld.addAtomToSection(atom_index); + macho_file.addAtomToSection(atom_index); } return; } @@ -456,17 +431,17 @@ pub fn splitRegularSections(self: *Object, zld: *Zld, object_id: u32) !void { log.debug("splitting section '{s},{s}' into atoms", .{ sect.segName(), sect.sectName() }); // Get output segment/section in the final artifact. - const out_sect_id = (try Atom.getOutputSection(zld, sect)) orelse continue; + const out_sect_id = (try Atom.getOutputSection(macho_file, sect)) orelse continue; log.debug(" output sect({d}, '{s},{s}')", .{ out_sect_id + 1, - zld.sections.items(.header)[out_sect_id].segName(), - zld.sections.items(.header)[out_sect_id].sectName(), + macho_file.sections.items(.header)[out_sect_id].segName(), + macho_file.sections.items(.header)[out_sect_id].sectName(), }); try self.parseRelocs(gpa, section.id); - const cpu_arch = zld.options.target.cpu.arch; + const cpu_arch = macho_file.base.options.target.cpu.arch; const sect_loc = filterSymbolsBySection(symtab[sect_sym_index..], sect_id + 1); const sect_start_index = sect_sym_index + sect_loc.index; @@ -482,7 +457,7 @@ pub fn splitRegularSections(self: *Object, zld: *Zld, object_id: u32) !void { const sym_index = self.getSectionAliasSymbolIndex(sect_id); const atom_size = first_sym.n_value - sect.addr; const atom_index = try self.createAtomFromSubsection( - zld, + macho_file, object_id, sym_index, sym_index, @@ -492,9 +467,9 @@ pub fn splitRegularSections(self: *Object, zld: *Zld, object_id: u32) !void { out_sect_id, ); if (!sect.isZerofill()) { - try self.cacheRelocs(zld, atom_index); + try self.cacheRelocs(macho_file, atom_index); } - zld.addAtomToSection(atom_index); + macho_file.addAtomToSection(atom_index); } var next_sym_index = sect_start_index; @@ -518,7 +493,7 @@ pub fn splitRegularSections(self: *Object, zld: *Zld, object_id: u32) !void { sect.@"align"; const atom_index = try self.createAtomFromSubsection( - zld, + macho_file, object_id, atom_sym_index, atom_sym_index, @@ -537,14 +512,14 @@ pub fn splitRegularSections(self: *Object, zld: *Zld, object_id: u32) !void { self.atom_by_index_table[alias_index] = atom_index; } if (!sect.isZerofill()) { - try self.cacheRelocs(zld, atom_index); + try self.cacheRelocs(macho_file, atom_index); } - zld.addAtomToSection(atom_index); + macho_file.addAtomToSection(atom_index); } } else { const alias_index = self.getSectionAliasSymbolIndex(sect_id); const atom_index = try self.createAtomFromSubsection( - zld, + macho_file, object_id, alias_index, sect_start_index, @@ -554,16 +529,16 @@ pub fn splitRegularSections(self: *Object, zld: *Zld, object_id: u32) !void { out_sect_id, ); if (!sect.isZerofill()) { - try self.cacheRelocs(zld, atom_index); + try self.cacheRelocs(macho_file, atom_index); } - zld.addAtomToSection(atom_index); + macho_file.addAtomToSection(atom_index); } } } fn createAtomFromSubsection( self: *Object, - zld: *Zld, + macho_file: *MachO, object_id: u32, sym_index: u32, inner_sym_index: u32, @@ -572,9 +547,9 @@ fn createAtomFromSubsection( alignment: u32, out_sect_id: u8, ) !Atom.Index { - const gpa = zld.gpa; - const atom_index = try zld.createAtom(sym_index, .{ .size = size, .alignment = alignment }); - const atom = zld.getAtomPtr(atom_index); + const gpa = macho_file.base.allocator; + const atom_index = try macho_file.createAtom(sym_index, .{ .size = size, .alignment = alignment }); + const atom = macho_file.getAtomPtr(atom_index); atom.inner_sym_index = inner_sym_index; atom.inner_nsyms_trailing = inner_nsyms_trailing; atom.file = object_id + 1; @@ -584,22 +559,22 @@ fn createAtomFromSubsection( sym_index, self.getSymbolName(sym_index), out_sect_id + 1, - zld.sections.items(.header)[out_sect_id].segName(), - zld.sections.items(.header)[out_sect_id].sectName(), + macho_file.sections.items(.header)[out_sect_id].segName(), + macho_file.sections.items(.header)[out_sect_id].sectName(), object_id, }); try self.atoms.append(gpa, atom_index); self.atom_by_index_table[sym_index] = atom_index; - var it = Atom.getInnerSymbolsIterator(zld, atom_index); + var it = Atom.getInnerSymbolsIterator(macho_file, atom_index); while (it.next()) |sym_loc| { - const inner = zld.getSymbolPtr(sym_loc); + const inner = macho_file.getSymbolPtr(sym_loc); inner.n_sect = out_sect_id + 1; self.atom_by_index_table[sym_loc.sym_index] = atom_index; } - const out_sect = zld.sections.items(.header)[out_sect_id]; + const out_sect = macho_file.sections.items(.header)[out_sect_id]; if (out_sect.isCode() and mem.eql(u8, "__TEXT", out_sect.segName()) and mem.eql(u8, "__text", out_sect.sectName())) @@ -651,8 +626,8 @@ fn parseRelocs(self: *Object, gpa: Allocator, sect_id: u8) !void { self.section_relocs_lookup.items[sect_id] = start; } -fn cacheRelocs(self: *Object, zld: *Zld, atom_index: Atom.Index) !void { - const atom = zld.getAtom(atom_index); +fn cacheRelocs(self: *Object, macho_file: *MachO, atom_index: Atom.Index) !void { + const atom = macho_file.getAtom(atom_index); const source_sect_id = if (self.getSourceSymbol(atom.sym_index)) |source_sym| blk: { break :blk source_sym.n_sect - 1; @@ -679,19 +654,19 @@ fn relocGreaterThan(ctx: void, lhs: macho.relocation_info, rhs: macho.relocation return lhs.r_address > rhs.r_address; } -fn parseEhFrameSection(self: *Object, zld: *Zld, object_id: u32) !void { +fn parseEhFrameSection(self: *Object, macho_file: *MachO, object_id: u32) !void { const sect_id = self.eh_frame_sect_id orelse return; const sect = self.getSourceSection(sect_id); log.debug("parsing __TEXT,__eh_frame section", .{}); - const gpa = zld.gpa; + const gpa = macho_file.base.allocator; - if (zld.eh_frame_section_index == null) { - zld.eh_frame_section_index = try MachO.initSection(gpa, zld, "__TEXT", "__eh_frame", .{}); + if (macho_file.eh_frame_section_index == null) { + macho_file.eh_frame_section_index = try macho_file.initSection("__TEXT", "__eh_frame", .{}); } - const cpu_arch = zld.options.target.cpu.arch; + const cpu_arch = macho_file.base.options.target.cpu.arch; try self.parseRelocs(gpa, sect_id); const relocs = self.getRelocs(sect_id); @@ -729,7 +704,7 @@ fn parseEhFrameSection(self: *Object, zld: *Zld, object_id: u32) !void { @as(macho.reloc_type_arm64, @enumFromInt(rel.r_type)) == .ARM64_RELOC_UNSIGNED) break rel; } else unreachable; - const target = Atom.parseRelocTarget(zld, .{ + const target = Atom.parseRelocTarget(macho_file, .{ .object_id = object_id, .rel = rel, .code = it.data[offset..], @@ -744,7 +719,7 @@ fn parseEhFrameSection(self: *Object, zld: *Zld, object_id: u32) !void { }); const target_sym_index = self.getSymbolByAddress(target_address, null); const target = if (self.getGlobal(target_sym_index)) |global_index| - zld.globals.items[global_index] + macho_file.globals.items[global_index] else SymbolWithLoc{ .sym_index = target_sym_index, .file = object_id + 1 }; break :blk target; @@ -770,7 +745,7 @@ fn parseEhFrameSection(self: *Object, zld: *Zld, object_id: u32) !void { }; log.debug("FDE at offset {x} tracks {s}", .{ offset, - zld.getSymbolName(actual_target), + macho_file.getSymbolName(actual_target), }); try self.eh_frame_records_lookup.putNoClobber(gpa, actual_target, offset); } @@ -779,19 +754,17 @@ fn parseEhFrameSection(self: *Object, zld: *Zld, object_id: u32) !void { } } -fn parseUnwindInfo(self: *Object, zld: *Zld, object_id: u32) !void { - const gpa = zld.gpa; - const cpu_arch = zld.options.target.cpu.arch; +fn parseUnwindInfo(self: *Object, macho_file: *MachO, object_id: u32) !void { + const gpa = macho_file.base.allocator; + const cpu_arch = macho_file.base.options.target.cpu.arch; const sect_id = self.unwind_info_sect_id orelse { // If it so happens that the object had `__eh_frame` section defined but no `__compact_unwind`, // we will try fully synthesising unwind info records to somewhat match Apple ld's // approach. However, we will only synthesise DWARF records and nothing more. For this reason, // we still create the output `__TEXT,__unwind_info` section. if (self.hasEhFrameRecords()) { - if (zld.unwind_info_section_index == null) { - zld.unwind_info_section_index = try MachO.initSection( - gpa, - zld, + if (macho_file.unwind_info_section_index == null) { + macho_file.unwind_info_section_index = try macho_file.initSection( "__TEXT", "__unwind_info", .{}, @@ -803,8 +776,8 @@ fn parseUnwindInfo(self: *Object, zld: *Zld, object_id: u32) !void { log.debug("parsing unwind info in {s}", .{self.name}); - if (zld.unwind_info_section_index == null) { - zld.unwind_info_section_index = try MachO.initSection(gpa, zld, "__TEXT", "__unwind_info", .{}); + if (macho_file.unwind_info_section_index == null) { + macho_file.unwind_info_section_index = try macho_file.initSection("__TEXT", "__unwind_info", .{}); } const unwind_records = self.getUnwindRecords(); @@ -839,7 +812,7 @@ fn parseUnwindInfo(self: *Object, zld: *Zld, object_id: u32) !void { // Find function symbol that this record describes const rel = relocs[rel_pos.start..][rel_pos.len - 1]; - const target = Atom.parseRelocTarget(zld, .{ + const target = Atom.parseRelocTarget(macho_file, .{ .object_id = object_id, .rel = rel, .code = mem.asBytes(&record), @@ -863,7 +836,7 @@ fn parseUnwindInfo(self: *Object, zld: *Zld, object_id: u32) !void { }; log.debug("unwind record {d} tracks {s}", .{ record_id, - zld.getSymbolName(actual_target), + macho_file.getSymbolName(actual_target), }); try self.unwind_records_lookup.putNoClobber(gpa, actual_target, @intCast(record_id)); } @@ -1094,3 +1067,27 @@ pub fn getEhFrameRecordsIterator(self: Object) eh_frame.Iterator { pub fn hasDataInCode(self: Object) bool { return self.data_in_code.items.len > 0; } + +const Object = @This(); + +const std = @import("std"); +const build_options = @import("build_options"); +const assert = std.debug.assert; +const dwarf = std.dwarf; +const eh_frame = @import("eh_frame.zig"); +const fs = std.fs; +const io = std.io; +const log = std.log.scoped(.link); +const macho = std.macho; +const math = std.math; +const mem = std.mem; +const sort = std.sort; +const trace = @import("../../tracy.zig").trace; + +const Allocator = mem.Allocator; +const Atom = @import("Atom.zig"); +const DwarfInfo = @import("DwarfInfo.zig"); +const LoadCommandIterator = macho.LoadCommandIterator; +const MachO = @import("../MachO.zig"); +const SymbolWithLoc = MachO.SymbolWithLoc; +const UnwindInfo = @import("UnwindInfo.zig"); diff --git a/src/link/MachO/Trie.zig b/src/link/MachO/Trie.zig index 962ead72fa..d86338f84b 100644 --- a/src/link/MachO/Trie.zig +++ b/src/link/MachO/Trie.zig @@ -28,248 +28,6 @@ //! After the optional exported symbol information is a byte of how many edges (0-255) that //! this node has leaving it, followed by each edge. Each edge is a zero terminated UTF8 of //! the addition chars in the symbol, followed by a uleb128 offset for the node that edge points to. -const Trie = @This(); - -const std = @import("std"); -const mem = std.mem; -const leb = std.leb; -const log = std.log.scoped(.link); -const macho = std.macho; -const testing = std.testing; -const assert = std.debug.assert; -const Allocator = mem.Allocator; - -pub const Node = struct { - base: *Trie, - - /// Terminal info associated with this node. - /// If this node is not a terminal node, info is null. - terminal_info: ?struct { - /// Export flags associated with this exported symbol. - export_flags: u64, - /// VM address offset wrt to the section this symbol is defined against. - vmaddr_offset: u64, - } = null, - - /// Offset of this node in the trie output byte stream. - trie_offset: ?u64 = null, - - /// List of all edges originating from this node. - edges: std.ArrayListUnmanaged(Edge) = .{}, - - node_dirty: bool = true, - - /// Edge connecting to nodes in the trie. - pub const Edge = struct { - from: *Node, - to: *Node, - label: []u8, - - fn deinit(self: *Edge, allocator: Allocator) void { - self.to.deinit(allocator); - allocator.destroy(self.to); - allocator.free(self.label); - self.from = undefined; - self.to = undefined; - self.label = undefined; - } - }; - - fn deinit(self: *Node, allocator: Allocator) void { - for (self.edges.items) |*edge| { - edge.deinit(allocator); - } - self.edges.deinit(allocator); - } - - /// Inserts a new node starting from `self`. - fn put(self: *Node, allocator: Allocator, label: []const u8) !*Node { - // Check for match with edges from this node. - for (self.edges.items) |*edge| { - const match = mem.indexOfDiff(u8, edge.label, label) orelse return edge.to; - if (match == 0) continue; - if (match == edge.label.len) return edge.to.put(allocator, label[match..]); - - // Found a match, need to splice up nodes. - // From: A -> B - // To: A -> C -> B - const mid = try allocator.create(Node); - mid.* = .{ .base = self.base }; - var to_label = try allocator.dupe(u8, edge.label[match..]); - allocator.free(edge.label); - const to_node = edge.to; - edge.to = mid; - edge.label = try allocator.dupe(u8, label[0..match]); - self.base.node_count += 1; - - try mid.edges.append(allocator, .{ - .from = mid, - .to = to_node, - .label = to_label, - }); - - return if (match == label.len) mid else mid.put(allocator, label[match..]); - } - - // Add a new node. - const node = try allocator.create(Node); - node.* = .{ .base = self.base }; - self.base.node_count += 1; - - try self.edges.append(allocator, .{ - .from = self, - .to = node, - .label = try allocator.dupe(u8, label), - }); - - return node; - } - - /// Recursively parses the node from the input byte stream. - fn read(self: *Node, allocator: Allocator, reader: anytype) Trie.ReadError!usize { - self.node_dirty = true; - const trie_offset = try reader.context.getPos(); - self.trie_offset = trie_offset; - - var nread: usize = 0; - - const node_size = try leb.readULEB128(u64, reader); - if (node_size > 0) { - const export_flags = try leb.readULEB128(u64, reader); - // TODO Parse special flags. - assert(export_flags & macho.EXPORT_SYMBOL_FLAGS_REEXPORT == 0 and - export_flags & macho.EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER == 0); - - const vmaddr_offset = try leb.readULEB128(u64, reader); - - self.terminal_info = .{ - .export_flags = export_flags, - .vmaddr_offset = vmaddr_offset, - }; - } - - const nedges = try reader.readByte(); - self.base.node_count += nedges; - - nread += (try reader.context.getPos()) - trie_offset; - - var i: usize = 0; - while (i < nedges) : (i += 1) { - const edge_start_pos = try reader.context.getPos(); - - const label = blk: { - var label_buf = std.ArrayList(u8).init(allocator); - while (true) { - const next = try reader.readByte(); - if (next == @as(u8, 0)) - break; - try label_buf.append(next); - } - break :blk try label_buf.toOwnedSlice(); - }; - - const seek_to = try leb.readULEB128(u64, reader); - const return_pos = try reader.context.getPos(); - - nread += return_pos - edge_start_pos; - try reader.context.seekTo(seek_to); - - const node = try allocator.create(Node); - node.* = .{ .base = self.base }; - - nread += try node.read(allocator, reader); - try self.edges.append(allocator, .{ - .from = self, - .to = node, - .label = label, - }); - try reader.context.seekTo(return_pos); - } - - return nread; - } - - /// Writes this node to a byte stream. - /// The children of this node *are* not written to the byte stream - /// recursively. To write all nodes to a byte stream in sequence, - /// iterate over `Trie.ordered_nodes` and call this method on each node. - /// This is one of the requirements of the MachO. - /// Panics if `finalize` was not called before calling this method. - fn write(self: Node, writer: anytype) !void { - assert(!self.node_dirty); - if (self.terminal_info) |info| { - // Terminal node info: encode export flags and vmaddr offset of this symbol. - var info_buf: [@sizeOf(u64) * 2]u8 = undefined; - var info_stream = std.io.fixedBufferStream(&info_buf); - // TODO Implement for special flags. - assert(info.export_flags & macho.EXPORT_SYMBOL_FLAGS_REEXPORT == 0 and - info.export_flags & macho.EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER == 0); - try leb.writeULEB128(info_stream.writer(), info.export_flags); - try leb.writeULEB128(info_stream.writer(), info.vmaddr_offset); - - // Encode the size of the terminal node info. - var size_buf: [@sizeOf(u64)]u8 = undefined; - var size_stream = std.io.fixedBufferStream(&size_buf); - try leb.writeULEB128(size_stream.writer(), info_stream.pos); - - // Now, write them to the output stream. - try writer.writeAll(size_buf[0..size_stream.pos]); - try writer.writeAll(info_buf[0..info_stream.pos]); - } else { - // Non-terminal node is delimited by 0 byte. - try writer.writeByte(0); - } - // Write number of edges (max legal number of edges is 256). - try writer.writeByte(@as(u8, @intCast(self.edges.items.len))); - - for (self.edges.items) |edge| { - // Write edge label and offset to next node in trie. - try writer.writeAll(edge.label); - try writer.writeByte(0); - try leb.writeULEB128(writer, edge.to.trie_offset.?); - } - } - - const FinalizeResult = struct { - /// Current size of this node in bytes. - node_size: u64, - - /// True if the trie offset of this node in the output byte stream - /// would need updating; false otherwise. - updated: bool, - }; - - /// Updates offset of this node in the output byte stream. - fn finalize(self: *Node, offset_in_trie: u64) !FinalizeResult { - var stream = std.io.countingWriter(std.io.null_writer); - var writer = stream.writer(); - - var node_size: u64 = 0; - if (self.terminal_info) |info| { - try leb.writeULEB128(writer, info.export_flags); - try leb.writeULEB128(writer, info.vmaddr_offset); - try leb.writeULEB128(writer, stream.bytes_written); - } else { - node_size += 1; // 0x0 for non-terminal nodes - } - node_size += 1; // 1 byte for edge count - - for (self.edges.items) |edge| { - const next_node_offset = edge.to.trie_offset orelse 0; - node_size += edge.label.len + 1; - try leb.writeULEB128(writer, next_node_offset); - } - - const trie_offset = self.trie_offset orelse 0; - const updated = offset_in_trie != trie_offset; - self.trie_offset = offset_in_trie; - self.node_dirty = false; - node_size += stream.bytes_written; - - return FinalizeResult{ .node_size = node_size, .updated = updated }; - } -}; - /// The root node of the trie. root: ?*Node = null, @@ -611,3 +369,245 @@ test "ordering bug" { _ = try trie.write(stream.writer()); try expectEqualHexStrings(&exp_buffer, buffer); } + +pub const Node = struct { + base: *Trie, + + /// Terminal info associated with this node. + /// If this node is not a terminal node, info is null. + terminal_info: ?struct { + /// Export flags associated with this exported symbol. + export_flags: u64, + /// VM address offset wrt to the section this symbol is defined against. + vmaddr_offset: u64, + } = null, + + /// Offset of this node in the trie output byte stream. + trie_offset: ?u64 = null, + + /// List of all edges originating from this node. + edges: std.ArrayListUnmanaged(Edge) = .{}, + + node_dirty: bool = true, + + /// Edge connecting to nodes in the trie. + pub const Edge = struct { + from: *Node, + to: *Node, + label: []u8, + + fn deinit(self: *Edge, allocator: Allocator) void { + self.to.deinit(allocator); + allocator.destroy(self.to); + allocator.free(self.label); + self.from = undefined; + self.to = undefined; + self.label = undefined; + } + }; + + fn deinit(self: *Node, allocator: Allocator) void { + for (self.edges.items) |*edge| { + edge.deinit(allocator); + } + self.edges.deinit(allocator); + } + + /// Inserts a new node starting from `self`. + fn put(self: *Node, allocator: Allocator, label: []const u8) !*Node { + // Check for match with edges from this node. + for (self.edges.items) |*edge| { + const match = mem.indexOfDiff(u8, edge.label, label) orelse return edge.to; + if (match == 0) continue; + if (match == edge.label.len) return edge.to.put(allocator, label[match..]); + + // Found a match, need to splice up nodes. + // From: A -> B + // To: A -> C -> B + const mid = try allocator.create(Node); + mid.* = .{ .base = self.base }; + var to_label = try allocator.dupe(u8, edge.label[match..]); + allocator.free(edge.label); + const to_node = edge.to; + edge.to = mid; + edge.label = try allocator.dupe(u8, label[0..match]); + self.base.node_count += 1; + + try mid.edges.append(allocator, .{ + .from = mid, + .to = to_node, + .label = to_label, + }); + + return if (match == label.len) mid else mid.put(allocator, label[match..]); + } + + // Add a new node. + const node = try allocator.create(Node); + node.* = .{ .base = self.base }; + self.base.node_count += 1; + + try self.edges.append(allocator, .{ + .from = self, + .to = node, + .label = try allocator.dupe(u8, label), + }); + + return node; + } + + /// Recursively parses the node from the input byte stream. + fn read(self: *Node, allocator: Allocator, reader: anytype) Trie.ReadError!usize { + self.node_dirty = true; + const trie_offset = try reader.context.getPos(); + self.trie_offset = trie_offset; + + var nread: usize = 0; + + const node_size = try leb.readULEB128(u64, reader); + if (node_size > 0) { + const export_flags = try leb.readULEB128(u64, reader); + // TODO Parse special flags. + assert(export_flags & macho.EXPORT_SYMBOL_FLAGS_REEXPORT == 0 and + export_flags & macho.EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER == 0); + + const vmaddr_offset = try leb.readULEB128(u64, reader); + + self.terminal_info = .{ + .export_flags = export_flags, + .vmaddr_offset = vmaddr_offset, + }; + } + + const nedges = try reader.readByte(); + self.base.node_count += nedges; + + nread += (try reader.context.getPos()) - trie_offset; + + var i: usize = 0; + while (i < nedges) : (i += 1) { + const edge_start_pos = try reader.context.getPos(); + + const label = blk: { + var label_buf = std.ArrayList(u8).init(allocator); + while (true) { + const next = try reader.readByte(); + if (next == @as(u8, 0)) + break; + try label_buf.append(next); + } + break :blk try label_buf.toOwnedSlice(); + }; + + const seek_to = try leb.readULEB128(u64, reader); + const return_pos = try reader.context.getPos(); + + nread += return_pos - edge_start_pos; + try reader.context.seekTo(seek_to); + + const node = try allocator.create(Node); + node.* = .{ .base = self.base }; + + nread += try node.read(allocator, reader); + try self.edges.append(allocator, .{ + .from = self, + .to = node, + .label = label, + }); + try reader.context.seekTo(return_pos); + } + + return nread; + } + + /// Writes this node to a byte stream. + /// The children of this node *are* not written to the byte stream + /// recursively. To write all nodes to a byte stream in sequence, + /// iterate over `Trie.ordered_nodes` and call this method on each node. + /// This is one of the requirements of the MachO. + /// Panics if `finalize` was not called before calling this method. + fn write(self: Node, writer: anytype) !void { + assert(!self.node_dirty); + if (self.terminal_info) |info| { + // Terminal node info: encode export flags and vmaddr offset of this symbol. + var info_buf: [@sizeOf(u64) * 2]u8 = undefined; + var info_stream = std.io.fixedBufferStream(&info_buf); + // TODO Implement for special flags. + assert(info.export_flags & macho.EXPORT_SYMBOL_FLAGS_REEXPORT == 0 and + info.export_flags & macho.EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER == 0); + try leb.writeULEB128(info_stream.writer(), info.export_flags); + try leb.writeULEB128(info_stream.writer(), info.vmaddr_offset); + + // Encode the size of the terminal node info. + var size_buf: [@sizeOf(u64)]u8 = undefined; + var size_stream = std.io.fixedBufferStream(&size_buf); + try leb.writeULEB128(size_stream.writer(), info_stream.pos); + + // Now, write them to the output stream. + try writer.writeAll(size_buf[0..size_stream.pos]); + try writer.writeAll(info_buf[0..info_stream.pos]); + } else { + // Non-terminal node is delimited by 0 byte. + try writer.writeByte(0); + } + // Write number of edges (max legal number of edges is 256). + try writer.writeByte(@as(u8, @intCast(self.edges.items.len))); + + for (self.edges.items) |edge| { + // Write edge label and offset to next node in trie. + try writer.writeAll(edge.label); + try writer.writeByte(0); + try leb.writeULEB128(writer, edge.to.trie_offset.?); + } + } + + const FinalizeResult = struct { + /// Current size of this node in bytes. + node_size: u64, + + /// True if the trie offset of this node in the output byte stream + /// would need updating; false otherwise. + updated: bool, + }; + + /// Updates offset of this node in the output byte stream. + fn finalize(self: *Node, offset_in_trie: u64) !FinalizeResult { + var stream = std.io.countingWriter(std.io.null_writer); + var writer = stream.writer(); + + var node_size: u64 = 0; + if (self.terminal_info) |info| { + try leb.writeULEB128(writer, info.export_flags); + try leb.writeULEB128(writer, info.vmaddr_offset); + try leb.writeULEB128(writer, stream.bytes_written); + } else { + node_size += 1; // 0x0 for non-terminal nodes + } + node_size += 1; // 1 byte for edge count + + for (self.edges.items) |edge| { + const next_node_offset = edge.to.trie_offset orelse 0; + node_size += edge.label.len + 1; + try leb.writeULEB128(writer, next_node_offset); + } + + const trie_offset = self.trie_offset orelse 0; + const updated = offset_in_trie != trie_offset; + self.trie_offset = offset_in_trie; + self.node_dirty = false; + node_size += stream.bytes_written; + + return FinalizeResult{ .node_size = node_size, .updated = updated }; + } +}; + +const Trie = @This(); + +const std = @import("std"); +const mem = std.mem; +const leb = std.leb; +const log = std.log.scoped(.link); +const macho = std.macho; +const testing = std.testing; +const assert = std.debug.assert; +const Allocator = mem.Allocator; diff --git a/src/link/MachO/UnwindInfo.zig b/src/link/MachO/UnwindInfo.zig index 3cd72fd64e..e3612c6948 100644 --- a/src/link/MachO/UnwindInfo.zig +++ b/src/link/MachO/UnwindInfo.zig @@ -1,25 +1,3 @@ -const UnwindInfo = @This(); - -const std = @import("std"); -const assert = std.debug.assert; -const eh_frame = @import("eh_frame.zig"); -const fs = std.fs; -const leb = std.leb; -const log = std.log.scoped(.unwind_info); -const macho = std.macho; -const math = std.math; -const mem = std.mem; -const trace = @import("../../tracy.zig").trace; - -const Allocator = mem.Allocator; -const Atom = @import("Atom.zig"); -const AtomIndex = @import("zld.zig").AtomIndex; -const EhFrameRecord = eh_frame.EhFrameRecord; -const MachO = @import("../MachO.zig"); -const Object = @import("Object.zig"); -const SymbolWithLoc = MachO.SymbolWithLoc; -const Zld = @import("zld.zig").Zld; - gpa: Allocator, /// List of all unwind records gathered from all objects and sorted @@ -203,28 +181,28 @@ pub fn deinit(info: *UnwindInfo) void { info.lsdas_lookup.deinit(info.gpa); } -pub fn scanRelocs(zld: *Zld) !void { - if (zld.unwind_info_section_index == null) return; +pub fn scanRelocs(macho_file: *MachO) !void { + if (macho_file.unwind_info_section_index == null) return; - const cpu_arch = zld.options.target.cpu.arch; - for (zld.objects.items, 0..) |*object, object_id| { + const cpu_arch = macho_file.base.options.target.cpu.arch; + for (macho_file.objects.items, 0..) |*object, object_id| { const unwind_records = object.getUnwindRecords(); for (object.exec_atoms.items) |atom_index| { - var inner_syms_it = Atom.getInnerSymbolsIterator(zld, atom_index); + var inner_syms_it = Atom.getInnerSymbolsIterator(macho_file, atom_index); while (inner_syms_it.next()) |sym| { const record_id = object.unwind_records_lookup.get(sym) orelse continue; if (object.unwind_relocs_lookup[record_id].dead) continue; const record = unwind_records[record_id]; if (!UnwindEncoding.isDwarf(record.compactUnwindEncoding, cpu_arch)) { - if (getPersonalityFunctionReloc(zld, @as(u32, @intCast(object_id)), record_id)) |rel| { + if (getPersonalityFunctionReloc(macho_file, @as(u32, @intCast(object_id)), record_id)) |rel| { // Personality function; add GOT pointer. - const target = Atom.parseRelocTarget(zld, .{ + const target = Atom.parseRelocTarget(macho_file, .{ .object_id = @as(u32, @intCast(object_id)), .rel = rel, .code = mem.asBytes(&record), .base_offset = @as(i32, @intCast(record_id * @sizeOf(macho.compact_unwind_entry))), }); - try zld.addGotEntry(target); + try macho_file.addGotEntry(target); } } } @@ -232,10 +210,10 @@ pub fn scanRelocs(zld: *Zld) !void { } } -pub fn collect(info: *UnwindInfo, zld: *Zld) !void { - if (zld.unwind_info_section_index == null) return; +pub fn collect(info: *UnwindInfo, macho_file: *MachO) !void { + if (macho_file.unwind_info_section_index == null) return; - const cpu_arch = zld.options.target.cpu.arch; + const cpu_arch = macho_file.base.options.target.cpu.arch; var records = std.ArrayList(macho.compact_unwind_entry).init(info.gpa); defer records.deinit(); @@ -244,7 +222,7 @@ pub fn collect(info: *UnwindInfo, zld: *Zld) !void { defer sym_indexes.deinit(); // TODO handle dead stripping - for (zld.objects.items, 0..) |*object, object_id| { + for (macho_file.objects.items, 0..) |*object, object_id| { log.debug("collecting unwind records in {s} ({d})", .{ object.name, object_id }); const unwind_records = object.getUnwindRecords(); @@ -254,7 +232,7 @@ pub fn collect(info: *UnwindInfo, zld: *Zld) !void { try sym_indexes.ensureUnusedCapacity(object.exec_atoms.items.len); for (object.exec_atoms.items) |atom_index| { - var inner_syms_it = Atom.getInnerSymbolsIterator(zld, atom_index); + var inner_syms_it = Atom.getInnerSymbolsIterator(macho_file, atom_index); var prev_symbol: ?SymbolWithLoc = null; while (inner_syms_it.next()) |symbol| { var record = if (object.unwind_records_lookup.get(symbol)) |record_id| blk: { @@ -262,14 +240,14 @@ pub fn collect(info: *UnwindInfo, zld: *Zld) !void { var record = unwind_records[record_id]; if (UnwindEncoding.isDwarf(record.compactUnwindEncoding, cpu_arch)) { - try info.collectPersonalityFromDwarf(zld, @as(u32, @intCast(object_id)), symbol, &record); + try info.collectPersonalityFromDwarf(macho_file, @as(u32, @intCast(object_id)), symbol, &record); } else { if (getPersonalityFunctionReloc( - zld, + macho_file, @as(u32, @intCast(object_id)), record_id, )) |rel| { - const target = Atom.parseRelocTarget(zld, .{ + const target = Atom.parseRelocTarget(macho_file, .{ .object_id = @as(u32, @intCast(object_id)), .rel = rel, .code = mem.asBytes(&record), @@ -286,8 +264,8 @@ pub fn collect(info: *UnwindInfo, zld: *Zld) !void { UnwindEncoding.setPersonalityIndex(&record.compactUnwindEncoding, personality_index + 1); } - if (getLsdaReloc(zld, @as(u32, @intCast(object_id)), record_id)) |rel| { - const target = Atom.parseRelocTarget(zld, .{ + if (getLsdaReloc(macho_file, @as(u32, @intCast(object_id)), record_id)) |rel| { + const target = Atom.parseRelocTarget(macho_file, .{ .object_id = @as(u32, @intCast(object_id)), .rel = rel, .code = mem.asBytes(&record), @@ -298,7 +276,7 @@ pub fn collect(info: *UnwindInfo, zld: *Zld) !void { } break :blk record; } else blk: { - const sym = zld.getSymbol(symbol); + const sym = macho_file.getSymbol(symbol); if (sym.n_desc == MachO.N_DEAD) continue; if (prev_symbol) |prev_sym| { const prev_addr = object.getSourceSymbol(prev_sym.sym_index).?.n_value; @@ -310,7 +288,7 @@ pub fn collect(info: *UnwindInfo, zld: *Zld) !void { if (object.eh_frame_records_lookup.get(symbol)) |fde_offset| { if (object.eh_frame_relocs_lookup.get(fde_offset).?.dead) continue; var record = nullRecord(); - try info.collectPersonalityFromDwarf(zld, @as(u32, @intCast(object_id)), symbol, &record); + try info.collectPersonalityFromDwarf(macho_file, @as(u32, @intCast(object_id)), symbol, &record); switch (cpu_arch) { .aarch64 => UnwindEncoding.setMode(&record.compactUnwindEncoding, macho.UNWIND_ARM64_MODE.DWARF), .x86_64 => UnwindEncoding.setMode(&record.compactUnwindEncoding, macho.UNWIND_X86_64_MODE.DWARF), @@ -323,8 +301,8 @@ pub fn collect(info: *UnwindInfo, zld: *Zld) !void { break :blk nullRecord(); }; - const atom = zld.getAtom(atom_index); - const sym = zld.getSymbol(symbol); + const atom = macho_file.getAtom(atom_index); + const sym = macho_file.getSymbol(symbol); assert(sym.n_desc != MachO.N_DEAD); const size = if (inner_syms_it.next()) |next_sym| blk: { // All this trouble to account for symbol aliases. @@ -336,8 +314,8 @@ pub fn collect(info: *UnwindInfo, zld: *Zld) !void { const curr_addr = object.getSourceSymbol(symbol.sym_index).?.n_value; const next_addr = object.getSourceSymbol(next_sym.sym_index).?.n_value; if (next_addr > curr_addr) break :blk next_addr - curr_addr; - break :blk zld.getSymbol(atom.getSymbolWithLoc()).n_value + atom.size - sym.n_value; - } else zld.getSymbol(atom.getSymbolWithLoc()).n_value + atom.size - sym.n_value; + break :blk macho_file.getSymbol(atom.getSymbolWithLoc()).n_value + atom.size - sym.n_value; + } else macho_file.getSymbol(atom.getSymbolWithLoc()).n_value + atom.size - sym.n_value; record.rangeStart = sym.n_value; record.rangeLength = @as(u32, @intCast(size)); @@ -518,23 +496,23 @@ pub fn collect(info: *UnwindInfo, zld: *Zld) !void { fn collectPersonalityFromDwarf( info: *UnwindInfo, - zld: *Zld, + macho_file: *MachO, object_id: u32, sym_loc: SymbolWithLoc, record: *macho.compact_unwind_entry, ) !void { - const object = &zld.objects.items[object_id]; + const object = &macho_file.objects.items[object_id]; var it = object.getEhFrameRecordsIterator(); const fde_offset = object.eh_frame_records_lookup.get(sym_loc).?; it.seekTo(fde_offset); const fde = (try it.next()).?; - const cie_ptr = fde.getCiePointerSource(object_id, zld, fde_offset); + const cie_ptr = fde.getCiePointerSource(object_id, macho_file, fde_offset); const cie_offset = fde_offset + 4 - cie_ptr; it.seekTo(cie_offset); const cie = (try it.next()).?; if (cie.getPersonalityPointerReloc( - zld, + macho_file, @as(u32, @intCast(object_id)), cie_offset, )) |target| { @@ -550,9 +528,9 @@ fn collectPersonalityFromDwarf( } } -pub fn calcSectionSize(info: UnwindInfo, zld: *Zld) !void { - const sect_id = zld.unwind_info_section_index orelse return; - const sect = &zld.sections.items(.header)[sect_id]; +pub fn calcSectionSize(info: UnwindInfo, macho_file: *MachO) !void { + const sect_id = macho_file.unwind_info_section_index orelse return; + const sect = &macho_file.sections.items(.header)[sect_id]; sect.@"align" = 2; sect.size = info.calcRequiredSize(); } @@ -569,23 +547,23 @@ fn calcRequiredSize(info: UnwindInfo) usize { return total_size; } -pub fn write(info: *UnwindInfo, zld: *Zld) !void { - const sect_id = zld.unwind_info_section_index orelse return; - const sect = &zld.sections.items(.header)[sect_id]; - const seg_id = zld.sections.items(.segment_index)[sect_id]; - const seg = zld.segments.items[seg_id]; +pub fn write(info: *UnwindInfo, macho_file: *MachO) !void { + const sect_id = macho_file.unwind_info_section_index orelse return; + const sect = &macho_file.sections.items(.header)[sect_id]; + const seg_id = macho_file.sections.items(.segment_index)[sect_id]; + const seg = macho_file.segments.items[seg_id]; - const text_sect_id = zld.text_section_index.?; - const text_sect = zld.sections.items(.header)[text_sect_id]; + const text_sect_id = macho_file.text_section_index.?; + const text_sect = macho_file.sections.items(.header)[text_sect_id]; var personalities: [max_personalities]u32 = undefined; - const cpu_arch = zld.options.target.cpu.arch; + const cpu_arch = macho_file.base.options.target.cpu.arch; log.debug("Personalities:", .{}); for (info.personalities[0..info.personalities_count], 0..) |target, i| { - const addr = zld.getGotEntryAddress(target).?; + const addr = macho_file.getGotEntryAddress(target).?; personalities[i] = @as(u32, @intCast(addr - seg.vmaddr)); - log.debug(" {d}: 0x{x} ({s})", .{ i, personalities[i], zld.getSymbolName(target) }); + log.debug(" {d}: 0x{x} ({s})", .{ i, personalities[i], macho_file.getSymbolName(target) }); } for (info.records.items) |*rec| { @@ -599,7 +577,7 @@ pub fn write(info: *UnwindInfo, zld: *Zld) !void { if (rec.compactUnwindEncoding > 0 and !UnwindEncoding.isDwarf(rec.compactUnwindEncoding, cpu_arch)) { const lsda_target = @as(SymbolWithLoc, @bitCast(rec.lsda)); if (lsda_target.getFile()) |_| { - const sym = zld.getSymbol(lsda_target); + const sym = macho_file.getSymbol(lsda_target); rec.lsda = sym.n_value - seg.vmaddr; } } @@ -689,11 +667,11 @@ pub fn write(info: *UnwindInfo, zld: *Zld) !void { @memset(buffer.items[offset..], 0); } - try zld.file.pwriteAll(buffer.items, sect.offset); + try macho_file.base.file.?.pwriteAll(buffer.items, sect.offset); } -fn getRelocs(zld: *Zld, object_id: u32, record_id: usize) []const macho.relocation_info { - const object = &zld.objects.items[object_id]; +fn getRelocs(macho_file: *MachO, object_id: u32, record_id: usize) []const macho.relocation_info { + const object = &macho_file.objects.items[object_id]; assert(object.hasUnwindRecords()); const rel_pos = object.unwind_relocs_lookup[record_id].reloc; const relocs = object.getRelocs(object.unwind_info_sect_id.?); @@ -707,11 +685,11 @@ fn isPersonalityFunction(record_id: usize, rel: macho.relocation_info) bool { } pub fn getPersonalityFunctionReloc( - zld: *Zld, + macho_file: *MachO, object_id: u32, record_id: usize, ) ?macho.relocation_info { - const relocs = getRelocs(zld, object_id, record_id); + const relocs = getRelocs(macho_file, object_id, record_id); for (relocs) |rel| { if (isPersonalityFunction(record_id, rel)) return rel; } @@ -735,8 +713,8 @@ fn isLsda(record_id: usize, rel: macho.relocation_info) bool { return rel_offset == 24; } -pub fn getLsdaReloc(zld: *Zld, object_id: u32, record_id: usize) ?macho.relocation_info { - const relocs = getRelocs(zld, object_id, record_id); +pub fn getLsdaReloc(macho_file: *MachO, object_id: u32, record_id: usize) ?macho.relocation_info { + const relocs = getRelocs(macho_file, object_id, record_id); for (relocs) |rel| { if (isLsda(record_id, rel)) return rel; } @@ -828,3 +806,23 @@ pub const UnwindEncoding = struct { enc.* |= offset; } }; + +const UnwindInfo = @This(); + +const std = @import("std"); +const assert = std.debug.assert; +const eh_frame = @import("eh_frame.zig"); +const fs = std.fs; +const leb = std.leb; +const log = std.log.scoped(.unwind_info); +const macho = std.macho; +const math = std.math; +const mem = std.mem; +const trace = @import("../../tracy.zig").trace; + +const Allocator = mem.Allocator; +const Atom = @import("Atom.zig"); +const EhFrameRecord = eh_frame.EhFrameRecord; +const MachO = @import("../MachO.zig"); +const Object = @import("Object.zig"); +const SymbolWithLoc = MachO.SymbolWithLoc; diff --git a/src/link/MachO/dead_strip.zig b/src/link/MachO/dead_strip.zig index 42a2e0cbd8..5e99ad2270 100644 --- a/src/link/MachO/dead_strip.zig +++ b/src/link/MachO/dead_strip.zig @@ -1,89 +1,72 @@ //! An algorithm for dead stripping of unreferenced Atoms. -const std = @import("std"); -const assert = std.debug.assert; -const eh_frame = @import("eh_frame.zig"); -const log = std.log.scoped(.dead_strip); -const macho = std.macho; -const math = std.math; -const mem = std.mem; - -const Allocator = mem.Allocator; -const Atom = @import("Atom.zig"); -const MachO = @import("../MachO.zig"); -const SymbolWithLoc = MachO.SymbolWithLoc; -const UnwindInfo = @import("UnwindInfo.zig"); -const Zld = @import("zld.zig").Zld; - -const AtomTable = std.AutoHashMap(Atom.Index, void); - -pub fn gcAtoms(zld: *Zld) !void { - const gpa = zld.gpa; +pub fn gcAtoms(macho_file: *MachO) !void { + const gpa = macho_file.base.allocator; var arena = std.heap.ArenaAllocator.init(gpa); defer arena.deinit(); var roots = AtomTable.init(arena.allocator()); - try roots.ensureUnusedCapacity(@as(u32, @intCast(zld.globals.items.len))); + try roots.ensureUnusedCapacity(@as(u32, @intCast(macho_file.globals.items.len))); var alive = AtomTable.init(arena.allocator()); - try alive.ensureTotalCapacity(@as(u32, @intCast(zld.atoms.items.len))); + try alive.ensureTotalCapacity(@as(u32, @intCast(macho_file.atoms.items.len))); - try collectRoots(zld, &roots); - try mark(zld, roots, &alive); - prune(zld, alive); + try collectRoots(macho_file, &roots); + try mark(macho_file, roots, &alive); + prune(macho_file, alive); } -fn addRoot(zld: *Zld, roots: *AtomTable, file: u32, sym_loc: SymbolWithLoc) !void { - const sym = zld.getSymbol(sym_loc); +fn addRoot(macho_file: *MachO, roots: *AtomTable, file: u32, sym_loc: SymbolWithLoc) !void { + const sym = macho_file.getSymbol(sym_loc); assert(!sym.undf()); - const object = &zld.objects.items[file]; + const object = &macho_file.objects.items[file]; const atom_index = object.getAtomIndexForSymbol(sym_loc.sym_index).?; // panic here means fatal error log.debug("root(ATOM({d}, %{d}, {d}))", .{ atom_index, - zld.getAtom(atom_index).sym_index, + macho_file.getAtom(atom_index).sym_index, file, }); _ = try roots.getOrPut(atom_index); } -fn collectRoots(zld: *Zld, roots: *AtomTable) !void { +fn collectRoots(macho_file: *MachO, roots: *AtomTable) !void { log.debug("collecting roots", .{}); - switch (zld.options.output_mode) { + switch (macho_file.base.options.output_mode) { .Exe => { // Add entrypoint as GC root - const global: SymbolWithLoc = zld.getEntryPoint(); + const global: SymbolWithLoc = macho_file.getEntryPoint(); if (global.getFile()) |file| { - try addRoot(zld, roots, file, global); + try addRoot(macho_file, roots, file, global); } else { - assert(zld.getSymbol(global).undf()); // Stub as our entrypoint is in a dylib. + assert(macho_file.getSymbol(global).undf()); // Stub as our entrypoint is in a dylib. } }, else => |other| { assert(other == .Lib); // Add exports as GC roots - for (zld.globals.items) |global| { - const sym = zld.getSymbol(global); + for (macho_file.globals.items) |global| { + const sym = macho_file.getSymbol(global); if (sym.undf()) continue; if (global.getFile()) |file| { - try addRoot(zld, roots, file, global); + try addRoot(macho_file, roots, file, global); } } }, } // Add all symbols force-defined by the user. - for (zld.options.force_undefined_symbols.keys()) |sym_name| { - const global_index = zld.resolver.get(sym_name).?; - const global = zld.globals.items[global_index]; - const sym = zld.getSymbol(global); + for (macho_file.base.options.force_undefined_symbols.keys()) |sym_name| { + const global_index = macho_file.resolver.get(sym_name).?; + const global = macho_file.globals.items[global_index]; + const sym = macho_file.getSymbol(global); assert(!sym.undf()); - try addRoot(zld, roots, global.getFile().?, global); + try addRoot(macho_file, roots, global.getFile().?, global); } - for (zld.objects.items) |object| { + for (macho_file.objects.items) |object| { const has_subsections = object.header.flags & macho.MH_SUBSECTIONS_VIA_SYMBOLS != 0; for (object.atoms.items) |atom_index| { @@ -92,7 +75,7 @@ fn collectRoots(zld: *Zld, roots: *AtomTable) !void { // as a root. if (!has_subsections) break :blk true; - const atom = zld.getAtom(atom_index); + const atom = macho_file.getAtom(atom_index); const sect_id = if (object.getSourceSymbol(atom.sym_index)) |source_sym| source_sym.n_sect - 1 else sect_id: { @@ -115,39 +98,39 @@ fn collectRoots(zld: *Zld, roots: *AtomTable) !void { log.debug("root(ATOM({d}, %{d}, {?d}))", .{ atom_index, - zld.getAtom(atom_index).sym_index, - zld.getAtom(atom_index).getFile(), + macho_file.getAtom(atom_index).sym_index, + macho_file.getAtom(atom_index).getFile(), }); } } } } -fn markLive(zld: *Zld, atom_index: Atom.Index, alive: *AtomTable) void { +fn markLive(macho_file: *MachO, atom_index: Atom.Index, alive: *AtomTable) void { if (alive.contains(atom_index)) return; - const atom = zld.getAtom(atom_index); + const atom = macho_file.getAtom(atom_index); const sym_loc = atom.getSymbolWithLoc(); log.debug("mark(ATOM({d}, %{d}, {?d}))", .{ atom_index, sym_loc.sym_index, sym_loc.getFile() }); alive.putAssumeCapacityNoClobber(atom_index, {}); - const cpu_arch = zld.options.target.cpu.arch; + const cpu_arch = macho_file.options.target.cpu.arch; - const sym = zld.getSymbol(atom.getSymbolWithLoc()); - const header = zld.sections.items(.header)[sym.n_sect - 1]; + const sym = macho_file.getSymbol(atom.getSymbolWithLoc()); + const header = macho_file.sections.items(.header)[sym.n_sect - 1]; if (header.isZerofill()) return; - const code = Atom.getAtomCode(zld, atom_index); - const relocs = Atom.getAtomRelocs(zld, atom_index); - const ctx = Atom.getRelocContext(zld, atom_index); + const code = Atom.getAtomCode(macho_file, atom_index); + const relocs = Atom.getAtomRelocs(macho_file, atom_index); + const ctx = Atom.getRelocContext(macho_file, atom_index); for (relocs) |rel| { const target = switch (cpu_arch) { .aarch64 => switch (@as(macho.reloc_type_arm64, @enumFromInt(rel.r_type))) { .ARM64_RELOC_ADDEND => continue, - else => Atom.parseRelocTarget(zld, .{ + else => Atom.parseRelocTarget(macho_file, .{ .object_id = atom.getFile().?, .rel = rel, .code = code, @@ -155,7 +138,7 @@ fn markLive(zld: *Zld, atom_index: Atom.Index, alive: *AtomTable) void { .base_addr = ctx.base_addr, }), }, - .x86_64 => Atom.parseRelocTarget(zld, .{ + .x86_64 => Atom.parseRelocTarget(macho_file, .{ .object_id = atom.getFile().?, .rel = rel, .code = code, @@ -164,50 +147,50 @@ fn markLive(zld: *Zld, atom_index: Atom.Index, alive: *AtomTable) void { }), else => unreachable, }; - const target_sym = zld.getSymbol(target); + const target_sym = macho_file.getSymbol(target); if (target_sym.undf()) continue; if (target.getFile() == null) { - const target_sym_name = zld.getSymbolName(target); + const target_sym_name = macho_file.getSymbolName(target); if (mem.eql(u8, "__mh_execute_header", target_sym_name)) continue; if (mem.eql(u8, "___dso_handle", target_sym_name)) continue; unreachable; // referenced symbol not found } - const object = zld.objects.items[target.getFile().?]; + const object = macho_file.objects.items[target.getFile().?]; const target_atom_index = object.getAtomIndexForSymbol(target.sym_index).?; log.debug(" following ATOM({d}, %{d}, {?d})", .{ target_atom_index, - zld.getAtom(target_atom_index).sym_index, - zld.getAtom(target_atom_index).getFile(), + macho_file.getAtom(target_atom_index).sym_index, + macho_file.getAtom(target_atom_index).getFile(), }); - markLive(zld, target_atom_index, alive); + markLive(macho_file, target_atom_index, alive); } } -fn refersLive(zld: *Zld, atom_index: Atom.Index, alive: AtomTable) bool { - const atom = zld.getAtom(atom_index); +fn refersLive(macho_file: *MachO, atom_index: Atom.Index, alive: AtomTable) bool { + const atom = macho_file.getAtom(atom_index); const sym_loc = atom.getSymbolWithLoc(); log.debug("refersLive(ATOM({d}, %{d}, {?d}))", .{ atom_index, sym_loc.sym_index, sym_loc.getFile() }); - const cpu_arch = zld.options.target.cpu.arch; + const cpu_arch = macho_file.base.options.target.cpu.arch; - const sym = zld.getSymbol(sym_loc); - const header = zld.sections.items(.header)[sym.n_sect - 1]; + const sym = macho_file.getSymbol(sym_loc); + const header = macho_file.sections.items(.header)[sym.n_sect - 1]; assert(!header.isZerofill()); - const code = Atom.getAtomCode(zld, atom_index); - const relocs = Atom.getAtomRelocs(zld, atom_index); - const ctx = Atom.getRelocContext(zld, atom_index); + const code = Atom.getAtomCode(macho_file, atom_index); + const relocs = Atom.getAtomRelocs(macho_file, atom_index); + const ctx = Atom.getRelocContext(macho_file, atom_index); for (relocs) |rel| { const target = switch (cpu_arch) { .aarch64 => switch (@as(macho.reloc_type_arm64, @enumFromInt(rel.r_type))) { .ARM64_RELOC_ADDEND => continue, - else => Atom.parseRelocTarget(zld, .{ + else => Atom.parseRelocTarget(macho_file, .{ .object_id = atom.getFile().?, .rel = rel, .code = code, @@ -215,7 +198,7 @@ fn refersLive(zld: *Zld, atom_index: Atom.Index, alive: AtomTable) bool { .base_addr = ctx.base_addr, }), }, - .x86_64 => Atom.parseRelocTarget(zld, .{ + .x86_64 => Atom.parseRelocTarget(macho_file, .{ .object_id = atom.getFile().?, .rel = rel, .code = code, @@ -225,16 +208,16 @@ fn refersLive(zld: *Zld, atom_index: Atom.Index, alive: AtomTable) bool { else => unreachable, }; - const object = zld.objects.items[target.getFile().?]; + const object = macho_file.objects.items[target.getFile().?]; const target_atom_index = object.getAtomIndexForSymbol(target.sym_index) orelse { - log.debug("atom for symbol '{s}' not found; skipping...", .{zld.getSymbolName(target)}); + log.debug("atom for symbol '{s}' not found; skipping...", .{macho_file.getSymbolName(target)}); continue; }; if (alive.contains(target_atom_index)) { log.debug(" refers live ATOM({d}, %{d}, {?d})", .{ target_atom_index, - zld.getAtom(target_atom_index).sym_index, - zld.getAtom(target_atom_index).getFile(), + macho_file.getAtom(target_atom_index).sym_index, + macho_file.getAtom(target_atom_index).getFile(), }); return true; } @@ -243,21 +226,21 @@ fn refersLive(zld: *Zld, atom_index: Atom.Index, alive: AtomTable) bool { return false; } -fn mark(zld: *Zld, roots: AtomTable, alive: *AtomTable) !void { +fn mark(macho_file: *MachO, roots: AtomTable, alive: *AtomTable) !void { var it = roots.keyIterator(); while (it.next()) |root| { - markLive(zld, root.*, alive); + markLive(macho_file, root.*, alive); } var loop: bool = true; while (loop) { loop = false; - for (zld.objects.items) |object| { + for (macho_file.objects.items) |object| { for (object.atoms.items) |atom_index| { if (alive.contains(atom_index)) continue; - const atom = zld.getAtom(atom_index); + const atom = macho_file.getAtom(atom_index); const sect_id = if (object.getSourceSymbol(atom.sym_index)) |source_sym| source_sym.n_sect - 1 else blk: { @@ -268,8 +251,8 @@ fn mark(zld: *Zld, roots: AtomTable, alive: *AtomTable) !void { const source_sect = object.getSourceSection(sect_id); if (source_sect.isDontDeadStripIfReferencesLive()) { - if (refersLive(zld, atom_index, alive.*)) { - markLive(zld, atom_index, alive); + if (refersLive(macho_file, atom_index, alive.*)) { + markLive(macho_file, atom_index, alive); loop = true; } } @@ -277,26 +260,26 @@ fn mark(zld: *Zld, roots: AtomTable, alive: *AtomTable) !void { } } - for (zld.objects.items, 0..) |_, object_id| { + for (macho_file.objects.items, 0..) |_, object_id| { // Traverse unwind and eh_frame records noting if the source symbol has been marked, and if so, // marking all references as live. - try markUnwindRecords(zld, @as(u32, @intCast(object_id)), alive); + try markUnwindRecords(macho_file, @as(u32, @intCast(object_id)), alive); } } -fn markUnwindRecords(zld: *Zld, object_id: u32, alive: *AtomTable) !void { - const object = &zld.objects.items[object_id]; - const cpu_arch = zld.options.target.cpu.arch; +fn markUnwindRecords(macho_file: *MachO, object_id: u32, alive: *AtomTable) !void { + const object = &macho_file.objects.items[object_id]; + const cpu_arch = macho_file.base.options.target.cpu.arch; const unwind_records = object.getUnwindRecords(); for (object.exec_atoms.items) |atom_index| { - var inner_syms_it = Atom.getInnerSymbolsIterator(zld, atom_index); + var inner_syms_it = Atom.getInnerSymbolsIterator(macho_file, atom_index); if (!object.hasUnwindRecords()) { if (alive.contains(atom_index)) { // Mark references live and continue. - try markEhFrameRecords(zld, object_id, atom_index, alive); + try markEhFrameRecords(macho_file, object_id, atom_index, alive); } else { while (inner_syms_it.next()) |sym| { if (object.eh_frame_records_lookup.get(sym)) |fde_offset| { @@ -322,51 +305,51 @@ fn markUnwindRecords(zld: *Zld, object_id: u32, alive: *AtomTable) !void { const record = unwind_records[record_id]; if (UnwindInfo.UnwindEncoding.isDwarf(record.compactUnwindEncoding, cpu_arch)) { - try markEhFrameRecords(zld, object_id, atom_index, alive); + try markEhFrameRecords(macho_file, object_id, atom_index, alive); } else { - if (UnwindInfo.getPersonalityFunctionReloc(zld, object_id, record_id)) |rel| { - const target = Atom.parseRelocTarget(zld, .{ + if (UnwindInfo.getPersonalityFunctionReloc(macho_file, object_id, record_id)) |rel| { + const target = Atom.parseRelocTarget(macho_file, .{ .object_id = object_id, .rel = rel, .code = mem.asBytes(&record), .base_offset = @as(i32, @intCast(record_id * @sizeOf(macho.compact_unwind_entry))), }); - const target_sym = zld.getSymbol(target); + const target_sym = macho_file.getSymbol(target); if (!target_sym.undf()) { - const target_object = zld.objects.items[target.getFile().?]; + const target_object = macho_file.objects.items[target.getFile().?]; const target_atom_index = target_object.getAtomIndexForSymbol(target.sym_index).?; - markLive(zld, target_atom_index, alive); + markLive(macho_file, target_atom_index, alive); } } - if (UnwindInfo.getLsdaReloc(zld, object_id, record_id)) |rel| { - const target = Atom.parseRelocTarget(zld, .{ + if (UnwindInfo.getLsdaReloc(macho_file, object_id, record_id)) |rel| { + const target = Atom.parseRelocTarget(macho_file, .{ .object_id = object_id, .rel = rel, .code = mem.asBytes(&record), .base_offset = @as(i32, @intCast(record_id * @sizeOf(macho.compact_unwind_entry))), }); - const target_object = zld.objects.items[target.getFile().?]; + const target_object = macho_file.objects.items[target.getFile().?]; const target_atom_index = target_object.getAtomIndexForSymbol(target.sym_index).?; - markLive(zld, target_atom_index, alive); + markLive(macho_file, target_atom_index, alive); } } } } } -fn markEhFrameRecords(zld: *Zld, object_id: u32, atom_index: Atom.Index, alive: *AtomTable) !void { - const cpu_arch = zld.options.target.cpu.arch; - const object = &zld.objects.items[object_id]; +fn markEhFrameRecords(macho_file: *MachO, object_id: u32, atom_index: Atom.Index, alive: *AtomTable) !void { + const cpu_arch = macho_file.base.options.target.cpu.arch; + const object = &macho_file.objects.items[object_id]; var it = object.getEhFrameRecordsIterator(); - var inner_syms_it = Atom.getInnerSymbolsIterator(zld, atom_index); + var inner_syms_it = Atom.getInnerSymbolsIterator(macho_file, atom_index); while (inner_syms_it.next()) |sym| { const fde_offset = object.eh_frame_records_lookup.get(sym) orelse continue; // Continue in case we hit a temp symbol alias it.seekTo(fde_offset); const fde = (try it.next()).?; - const cie_ptr = fde.getCiePointerSource(object_id, zld, fde_offset); + const cie_ptr = fde.getCiePointerSource(object_id, macho_file, fde_offset); const cie_offset = fde_offset + 4 - cie_ptr; it.seekTo(cie_offset); const cie = (try it.next()).?; @@ -374,20 +357,20 @@ fn markEhFrameRecords(zld: *Zld, object_id: u32, atom_index: Atom.Index, alive: switch (cpu_arch) { .aarch64 => { // Mark FDE references which should include any referenced LSDA record - const relocs = eh_frame.getRelocs(zld, object_id, fde_offset); + const relocs = eh_frame.getRelocs(macho_file, object_id, fde_offset); for (relocs) |rel| { - const target = Atom.parseRelocTarget(zld, .{ + const target = Atom.parseRelocTarget(macho_file, .{ .object_id = object_id, .rel = rel, .code = fde.data, .base_offset = @as(i32, @intCast(fde_offset)) + 4, }); - const target_sym = zld.getSymbol(target); + const target_sym = macho_file.getSymbol(target); if (!target_sym.undf()) blk: { - const target_object = zld.objects.items[target.getFile().?]; + const target_object = macho_file.objects.items[target.getFile().?]; const target_atom_index = target_object.getAtomIndexForSymbol(target.sym_index) orelse break :blk; - markLive(zld, target_atom_index, alive); + markLive(macho_file, target_atom_index, alive); } } }, @@ -401,7 +384,7 @@ fn markEhFrameRecords(zld: *Zld, object_id: u32, atom_index: Atom.Index, alive: // Mark LSDA record as live const sym_index = object.getSymbolByAddress(lsda_address, null); const target_atom_index = object.getAtomIndexForSymbol(sym_index).?; - markLive(zld, target_atom_index, alive); + markLive(macho_file, target_atom_index, alive); } }, else => unreachable, @@ -409,20 +392,20 @@ fn markEhFrameRecords(zld: *Zld, object_id: u32, atom_index: Atom.Index, alive: // Mark CIE references which should include any referenced personalities // that are defined locally. - if (cie.getPersonalityPointerReloc(zld, object_id, cie_offset)) |target| { - const target_sym = zld.getSymbol(target); + if (cie.getPersonalityPointerReloc(macho_file, object_id, cie_offset)) |target| { + const target_sym = macho_file.getSymbol(target); if (!target_sym.undf()) { - const target_object = zld.objects.items[target.getFile().?]; + const target_object = macho_file.objects.items[target.getFile().?]; const target_atom_index = target_object.getAtomIndexForSymbol(target.sym_index).?; - markLive(zld, target_atom_index, alive); + markLive(macho_file, target_atom_index, alive); } } } } -fn prune(zld: *Zld, alive: AtomTable) void { +fn prune(macho_file: *MachO, alive: AtomTable) void { log.debug("pruning dead atoms", .{}); - for (zld.objects.items) |*object| { + for (macho_file.objects.items) |*object| { var i: usize = 0; while (i < object.atoms.items.len) { const atom_index = object.atoms.items[i]; @@ -431,7 +414,7 @@ fn prune(zld: *Zld, alive: AtomTable) void { continue; } - const atom = zld.getAtom(atom_index); + const atom = macho_file.getAtom(atom_index); const sym_loc = atom.getSymbolWithLoc(); log.debug("prune(ATOM({d}, %{d}, {?d}))", .{ @@ -439,15 +422,15 @@ fn prune(zld: *Zld, alive: AtomTable) void { sym_loc.sym_index, sym_loc.getFile(), }); - log.debug(" {s} in {s}", .{ zld.getSymbolName(sym_loc), object.name }); + log.debug(" {s} in {s}", .{ macho_file.getSymbolName(sym_loc), object.name }); - const sym = zld.getSymbolPtr(sym_loc); + const sym = macho_file.getSymbolPtr(sym_loc); const sect_id = sym.n_sect - 1; - var section = zld.sections.get(sect_id); + var section = macho_file.sections.get(sect_id); section.header.size -= atom.size; if (atom.prev_index) |prev_index| { - const prev = zld.getAtomPtr(prev_index); + const prev = macho_file.getAtomPtr(prev_index); prev.next_index = atom.next_index; } else { if (atom.next_index) |next_index| { @@ -455,7 +438,7 @@ fn prune(zld: *Zld, alive: AtomTable) void { } } if (atom.next_index) |next_index| { - const next = zld.getAtomPtr(next_index); + const next = macho_file.getAtomPtr(next_index); next.prev_index = atom.prev_index; } else { if (atom.prev_index) |prev_index| { @@ -467,21 +450,37 @@ fn prune(zld: *Zld, alive: AtomTable) void { } } - zld.sections.set(sect_id, section); + macho_file.sections.set(sect_id, section); _ = object.atoms.swapRemove(i); sym.n_desc = MachO.N_DEAD; - var inner_sym_it = Atom.getInnerSymbolsIterator(zld, atom_index); + var inner_sym_it = Atom.getInnerSymbolsIterator(macho_file, atom_index); while (inner_sym_it.next()) |inner| { - const inner_sym = zld.getSymbolPtr(inner); + const inner_sym = macho_file.getSymbolPtr(inner); inner_sym.n_desc = MachO.N_DEAD; } - if (Atom.getSectionAlias(zld, atom_index)) |alias| { - const alias_sym = zld.getSymbolPtr(alias); + if (Atom.getSectionAlias(macho_file, atom_index)) |alias| { + const alias_sym = macho_file.getSymbolPtr(alias); alias_sym.n_desc = MachO.N_DEAD; } } } } + +const std = @import("std"); +const assert = std.debug.assert; +const eh_frame = @import("eh_frame.zig"); +const log = std.log.scoped(.dead_strip); +const macho = std.macho; +const math = std.math; +const mem = std.mem; + +const Allocator = mem.Allocator; +const Atom = @import("Atom.zig"); +const MachO = @import("../MachO.zig"); +const SymbolWithLoc = MachO.SymbolWithLoc; +const UnwindInfo = @import("UnwindInfo.zig"); + +const AtomTable = std.AutoHashMap(Atom.Index, void); diff --git a/src/link/MachO/dyld_info/Rebase.zig b/src/link/MachO/dyld_info/Rebase.zig index 0f3e96b02f..512e23eddb 100644 --- a/src/link/MachO/dyld_info/Rebase.zig +++ b/src/link/MachO/dyld_info/Rebase.zig @@ -1,14 +1,3 @@ -const Rebase = @This(); - -const std = @import("std"); -const assert = std.debug.assert; -const leb = std.leb; -const log = std.log.scoped(.dyld_info); -const macho = std.macho; -const testing = std.testing; - -const Allocator = std.mem.Allocator; - entries: std.ArrayListUnmanaged(Entry) = .{}, buffer: std.ArrayListUnmanaged(u8) = .{}, @@ -572,3 +561,14 @@ test "rebase - composite" { macho.REBASE_OPCODE_DONE, }, rebase.buffer.items); } + +const Rebase = @This(); + +const std = @import("std"); +const assert = std.debug.assert; +const leb = std.leb; +const log = std.log.scoped(.dyld_info); +const macho = std.macho; +const testing = std.testing; + +const Allocator = std.mem.Allocator; diff --git a/src/link/MachO/dyld_info/bind.zig b/src/link/MachO/dyld_info/bind.zig index f804c6466d..ca4e73a283 100644 --- a/src/link/MachO/dyld_info/bind.zig +++ b/src/link/MachO/dyld_info/bind.zig @@ -1,12 +1,3 @@ -const std = @import("std"); -const assert = std.debug.assert; -const leb = std.leb; -const log = std.log.scoped(.dyld_info); -const macho = std.macho; -const testing = std.testing; - -const Allocator = std.mem.Allocator; - pub fn Bind(comptime Ctx: type, comptime Target: type) type { return struct { entries: std.ArrayListUnmanaged(Entry) = .{}, @@ -738,3 +729,12 @@ test "lazy bind" { macho.BIND_OPCODE_DONE, }, bind.buffer.items); } + +const std = @import("std"); +const assert = std.debug.assert; +const leb = std.leb; +const log = std.log.scoped(.dyld_info); +const macho = std.macho; +const testing = std.testing; + +const Allocator = std.mem.Allocator; diff --git a/src/link/MachO/eh_frame.zig b/src/link/MachO/eh_frame.zig index 2bcf23bff5..332aea08e5 100644 --- a/src/link/MachO/eh_frame.zig +++ b/src/link/MachO/eh_frame.zig @@ -1,68 +1,52 @@ -const std = @import("std"); -const assert = std.debug.assert; -const macho = std.macho; -const math = std.math; -const mem = std.mem; -const leb = std.leb; -const log = std.log.scoped(.eh_frame); +pub fn scanRelocs(macho_file: *MachO) !void { + const gpa = macho_file.base.allocator; -const Allocator = mem.Allocator; -const Atom = @import("Atom.zig"); -const MachO = @import("../MachO.zig"); -const Relocation = @import("Relocation.zig"); -const SymbolWithLoc = MachO.SymbolWithLoc; -const UnwindInfo = @import("UnwindInfo.zig"); -const Zld = @import("zld.zig").Zld; - -pub fn scanRelocs(zld: *Zld) !void { - const gpa = zld.gpa; - - for (zld.objects.items, 0..) |*object, object_id| { + for (macho_file.objects.items, 0..) |*object, object_id| { var cies = std.AutoHashMap(u32, void).init(gpa); defer cies.deinit(); var it = object.getEhFrameRecordsIterator(); for (object.exec_atoms.items) |atom_index| { - var inner_syms_it = Atom.getInnerSymbolsIterator(zld, atom_index); + var inner_syms_it = Atom.getInnerSymbolsIterator(macho_file, atom_index); while (inner_syms_it.next()) |sym| { const fde_offset = object.eh_frame_records_lookup.get(sym) orelse continue; if (object.eh_frame_relocs_lookup.get(fde_offset).?.dead) continue; it.seekTo(fde_offset); const fde = (try it.next()).?; - const cie_ptr = fde.getCiePointerSource(@intCast(object_id), zld, fde_offset); + const cie_ptr = fde.getCiePointerSource(@intCast(object_id), macho_file, fde_offset); const cie_offset = fde_offset + 4 - cie_ptr; if (!cies.contains(cie_offset)) { try cies.putNoClobber(cie_offset, {}); it.seekTo(cie_offset); const cie = (try it.next()).?; - try cie.scanRelocs(zld, @as(u32, @intCast(object_id)), cie_offset); + try cie.scanRelocs(macho_file, @as(u32, @intCast(object_id)), cie_offset); } } } } } -pub fn calcSectionSize(zld: *Zld, unwind_info: *const UnwindInfo) !void { - const sect_id = zld.eh_frame_section_index orelse return; - const sect = &zld.sections.items(.header)[sect_id]; +pub fn calcSectionSize(macho_file: *MachO, unwind_info: *const UnwindInfo) !void { + const sect_id = macho_file.eh_frame_section_index orelse return; + const sect = &macho_file.sections.items(.header)[sect_id]; sect.@"align" = 3; sect.size = 0; - const cpu_arch = zld.options.target.cpu.arch; - const gpa = zld.gpa; + const cpu_arch = macho_file.base.options.target.cpu.arch; + const gpa = macho_file.base.allocator; var size: u32 = 0; - for (zld.objects.items, 0..) |*object, object_id| { + for (macho_file.objects.items, 0..) |*object, object_id| { var cies = std.AutoHashMap(u32, u32).init(gpa); defer cies.deinit(); var eh_it = object.getEhFrameRecordsIterator(); for (object.exec_atoms.items) |atom_index| { - var inner_syms_it = Atom.getInnerSymbolsIterator(zld, atom_index); + var inner_syms_it = Atom.getInnerSymbolsIterator(macho_file, atom_index); while (inner_syms_it.next()) |sym| { const fde_record_offset = object.eh_frame_records_lookup.get(sym) orelse continue; if (object.eh_frame_relocs_lookup.get(fde_record_offset).?.dead) continue; @@ -77,7 +61,7 @@ pub fn calcSectionSize(zld: *Zld, unwind_info: *const UnwindInfo) !void { eh_it.seekTo(fde_record_offset); const source_fde_record = (try eh_it.next()).?; - const cie_ptr = source_fde_record.getCiePointerSource(@intCast(object_id), zld, fde_record_offset); + const cie_ptr = source_fde_record.getCiePointerSource(@intCast(object_id), macho_file, fde_record_offset); const cie_offset = fde_record_offset + 4 - cie_ptr; const gop = try cies.getOrPut(cie_offset); @@ -96,14 +80,14 @@ pub fn calcSectionSize(zld: *Zld, unwind_info: *const UnwindInfo) !void { } } -pub fn write(zld: *Zld, unwind_info: *UnwindInfo) !void { - const sect_id = zld.eh_frame_section_index orelse return; - const sect = zld.sections.items(.header)[sect_id]; - const seg_id = zld.sections.items(.segment_index)[sect_id]; - const seg = zld.segments.items[seg_id]; +pub fn write(macho_file: *MachO, unwind_info: *UnwindInfo) !void { + const sect_id = macho_file.eh_frame_section_index orelse return; + const sect = macho_file.sections.items(.header)[sect_id]; + const seg_id = macho_file.sections.items(.segment_index)[sect_id]; + const seg = macho_file.segments.items[seg_id]; - const cpu_arch = zld.options.target.cpu.arch; - const gpa = zld.gpa; + const cpu_arch = macho_file.base.options.target.cpu.arch; + const gpa = macho_file.base.allocator; var eh_records = std.AutoArrayHashMap(u32, EhFrameRecord(true)).init(gpa); defer { @@ -115,7 +99,7 @@ pub fn write(zld: *Zld, unwind_info: *UnwindInfo) !void { var eh_frame_offset: u32 = 0; - for (zld.objects.items, 0..) |*object, object_id| { + for (macho_file.objects.items, 0..) |*object, object_id| { try eh_records.ensureUnusedCapacity(2 * @as(u32, @intCast(object.exec_atoms.items.len))); var cies = std.AutoHashMap(u32, u32).init(gpa); @@ -124,7 +108,7 @@ pub fn write(zld: *Zld, unwind_info: *UnwindInfo) !void { var eh_it = object.getEhFrameRecordsIterator(); for (object.exec_atoms.items) |atom_index| { - var inner_syms_it = Atom.getInnerSymbolsIterator(zld, atom_index); + var inner_syms_it = Atom.getInnerSymbolsIterator(macho_file, atom_index); while (inner_syms_it.next()) |target| { const fde_record_offset = object.eh_frame_records_lookup.get(target) orelse continue; if (object.eh_frame_relocs_lookup.get(fde_record_offset).?.dead) continue; @@ -139,7 +123,7 @@ pub fn write(zld: *Zld, unwind_info: *UnwindInfo) !void { eh_it.seekTo(fde_record_offset); const source_fde_record = (try eh_it.next()).?; - const cie_ptr = source_fde_record.getCiePointerSource(@intCast(object_id), zld, fde_record_offset); + const cie_ptr = source_fde_record.getCiePointerSource(@intCast(object_id), macho_file, fde_record_offset); const cie_offset = fde_record_offset + 4 - cie_ptr; const gop = try cies.getOrPut(cie_offset); @@ -147,7 +131,7 @@ pub fn write(zld: *Zld, unwind_info: *UnwindInfo) !void { eh_it.seekTo(cie_offset); const source_cie_record = (try eh_it.next()).?; var cie_record = try source_cie_record.toOwned(gpa); - try cie_record.relocate(zld, @as(u32, @intCast(object_id)), .{ + try cie_record.relocate(macho_file, @as(u32, @intCast(object_id)), .{ .source_offset = cie_offset, .out_offset = eh_frame_offset, .sect_addr = sect.addr, @@ -158,7 +142,7 @@ pub fn write(zld: *Zld, unwind_info: *UnwindInfo) !void { } var fde_record = try source_fde_record.toOwned(gpa); - try fde_record.relocate(zld, @as(u32, @intCast(object_id)), .{ + try fde_record.relocate(macho_file, @as(u32, @intCast(object_id)), .{ .source_offset = fde_record_offset, .out_offset = eh_frame_offset, .sect_addr = sect.addr, @@ -169,7 +153,7 @@ pub fn write(zld: *Zld, unwind_info: *UnwindInfo) !void { .aarch64 => {}, // relocs take care of LSDA pointers .x86_64 => { // We need to relocate target symbol address ourselves. - const atom_sym = zld.getSymbol(target); + const atom_sym = macho_file.getSymbol(target); try fde_record.setTargetSymbolAddress(atom_sym.n_value, .{ .base_addr = sect.addr, .base_offset = eh_frame_offset, @@ -229,7 +213,7 @@ pub fn write(zld: *Zld, unwind_info: *UnwindInfo) !void { try buffer.appendSlice(record.data); } - try zld.file.pwriteAll(buffer.items, sect.offset); + try macho_file.base.file.?.pwriteAll(buffer.items, sect.offset); } const EhFrameRecordTag = enum { cie, fde }; @@ -261,12 +245,12 @@ pub fn EhFrameRecord(comptime is_mutable: bool) type { pub fn scanRelocs( rec: Record, - zld: *Zld, + macho_file: *MachO, object_id: u32, source_offset: u32, ) !void { - if (rec.getPersonalityPointerReloc(zld, object_id, source_offset)) |target| { - try zld.addGotEntry(target); + if (rec.getPersonalityPointerReloc(macho_file, object_id, source_offset)) |target| { + try macho_file.addGotEntry(target); } } @@ -290,12 +274,12 @@ pub fn EhFrameRecord(comptime is_mutable: bool) type { pub fn getPersonalityPointerReloc( rec: Record, - zld: *Zld, + macho_file: *MachO, object_id: u32, source_offset: u32, ) ?SymbolWithLoc { - const cpu_arch = zld.options.target.cpu.arch; - const relocs = getRelocs(zld, object_id, source_offset); + const cpu_arch = macho_file.base.options.target.cpu.arch; + const relocs = getRelocs(macho_file, object_id, source_offset); for (relocs) |rel| { switch (cpu_arch) { .aarch64 => { @@ -317,7 +301,7 @@ pub fn EhFrameRecord(comptime is_mutable: bool) type { }, else => unreachable, } - const target = Atom.parseRelocTarget(zld, .{ + const target = Atom.parseRelocTarget(macho_file, .{ .object_id = object_id, .rel = rel, .code = rec.data, @@ -328,18 +312,18 @@ pub fn EhFrameRecord(comptime is_mutable: bool) type { return null; } - pub fn relocate(rec: *Record, zld: *Zld, object_id: u32, ctx: struct { + pub fn relocate(rec: *Record, macho_file: *MachO, object_id: u32, ctx: struct { source_offset: u32, out_offset: u32, sect_addr: u64, }) !void { comptime assert(is_mutable); - const cpu_arch = zld.options.target.cpu.arch; - const relocs = getRelocs(zld, object_id, ctx.source_offset); + const cpu_arch = macho_file.base.options.target.cpu.arch; + const relocs = getRelocs(macho_file, object_id, ctx.source_offset); for (relocs) |rel| { - const target = Atom.parseRelocTarget(zld, .{ + const target = Atom.parseRelocTarget(macho_file, .{ .object_id = object_id, .rel = rel, .code = rec.data, @@ -356,14 +340,14 @@ pub fn EhFrameRecord(comptime is_mutable: bool) type { // Address of the __eh_frame in the source object file }, .ARM64_RELOC_POINTER_TO_GOT => { - const target_addr = zld.getGotEntryAddress(target).?; + const target_addr = macho_file.getGotEntryAddress(target).?; const result = math.cast(i32, @as(i64, @intCast(target_addr)) - @as(i64, @intCast(source_addr))) orelse return error.Overflow; mem.writeIntLittle(i32, rec.data[rel_offset..][0..4], result); }, .ARM64_RELOC_UNSIGNED => { assert(rel.r_extern == 1); - const target_addr = try Atom.getRelocTargetAddress(zld, target, false); + const target_addr = try Atom.getRelocTargetAddress(macho_file, target, false); const result = @as(i64, @intCast(target_addr)) - @as(i64, @intCast(source_addr)); mem.writeIntLittle(i64, rec.data[rel_offset..][0..8], @as(i64, @intCast(result))); }, @@ -374,7 +358,7 @@ pub fn EhFrameRecord(comptime is_mutable: bool) type { const rel_type = @as(macho.reloc_type_x86_64, @enumFromInt(rel.r_type)); switch (rel_type) { .X86_64_RELOC_GOT => { - const target_addr = zld.getGotEntryAddress(target).?; + const target_addr = macho_file.getGotEntryAddress(target).?; const addend = mem.readIntLittle(i32, rec.data[rel_offset..][0..4]); const adjusted_target_addr = @as(u64, @intCast(@as(i64, @intCast(target_addr)) + addend)); const disp = try Relocation.calcPcRelativeDisplacementX86(source_addr, adjusted_target_addr, 0); @@ -388,20 +372,20 @@ pub fn EhFrameRecord(comptime is_mutable: bool) type { } } - pub fn getCiePointerSource(rec: Record, object_id: u32, zld: *Zld, offset: u32) u32 { + pub fn getCiePointerSource(rec: Record, object_id: u32, macho_file: *MachO, offset: u32) u32 { assert(rec.tag == .fde); - const cpu_arch = zld.options.target.cpu.arch; + const cpu_arch = macho_file.base.options.target.cpu.arch; const addend = mem.readIntLittle(u32, rec.data[0..4]); switch (cpu_arch) { .aarch64 => { - const relocs = getRelocs(zld, object_id, offset); + const relocs = getRelocs(macho_file, object_id, offset); const maybe_rel = for (relocs) |rel| { if (rel.r_address - @as(i32, @intCast(offset)) == 4 and @as(macho.reloc_type_arm64, @enumFromInt(rel.r_type)) == .ARM64_RELOC_SUBTRACTOR) break rel; } else null; const rel = maybe_rel orelse return addend; - const object = &zld.objects.items[object_id]; + const object = &macho_file.objects.items[object_id]; const target_addr = object.in_symtab.?[rel.r_symbolnum].n_value; const sect = object.getSourceSection(object.eh_frame_sect_id.?); return @intCast(sect.addr + offset - target_addr + addend); @@ -583,8 +567,8 @@ pub fn EhFrameRecord(comptime is_mutable: bool) type { }; } -pub fn getRelocs(zld: *Zld, object_id: u32, source_offset: u32) []const macho.relocation_info { - const object = &zld.objects.items[object_id]; +pub fn getRelocs(macho_file: *MachO, object_id: u32, source_offset: u32) []const macho.relocation_info { + const object = &macho_file.objects.items[object_id]; assert(object.hasEhFrameRecords()); const urel = object.eh_frame_relocs_lookup.get(source_offset) orelse return &[0]macho.relocation_info{}; @@ -650,3 +634,18 @@ pub const EH_PE = struct { pub const indirect = 0x80; pub const omit = 0xFF; }; + +const std = @import("std"); +const assert = std.debug.assert; +const macho = std.macho; +const math = std.math; +const mem = std.mem; +const leb = std.leb; +const log = std.log.scoped(.eh_frame); + +const Allocator = mem.Allocator; +const Atom = @import("Atom.zig"); +const MachO = @import("../MachO.zig"); +const Relocation = @import("Relocation.zig"); +const SymbolWithLoc = MachO.SymbolWithLoc; +const UnwindInfo = @import("UnwindInfo.zig"); diff --git a/src/link/MachO/fat.zig b/src/link/MachO/fat.zig index 751e49f651..6dd32e2251 100644 --- a/src/link/MachO/fat.zig +++ b/src/link/MachO/fat.zig @@ -1,9 +1,3 @@ -const std = @import("std"); -const assert = std.debug.assert; -const log = std.log.scoped(.archive); -const macho = std.macho; -const mem = std.mem; - pub fn isFatLibrary(file: std.fs.File) bool { const reader = file.reader(); const hdr = reader.readStructBig(macho.fat_header) catch return false; @@ -38,3 +32,9 @@ pub fn parseArchs(file: std.fs.File, buffer: *[2]Arch) ![]const Arch { return buffer[0..count]; } + +const std = @import("std"); +const assert = std.debug.assert; +const log = std.log.scoped(.archive); +const macho = std.macho; +const mem = std.mem; diff --git a/src/link/MachO/hasher.zig b/src/link/MachO/hasher.zig index 40c034c90c..45847689f3 100644 --- a/src/link/MachO/hasher.zig +++ b/src/link/MachO/hasher.zig @@ -1,12 +1,3 @@ -const std = @import("std"); -const assert = std.debug.assert; -const fs = std.fs; -const mem = std.mem; - -const Allocator = mem.Allocator; -const ThreadPool = std.Thread.Pool; -const WaitGroup = std.Thread.WaitGroup; - pub fn ParallelHasher(comptime Hasher: type) type { const hash_size = Hasher.digest_length; @@ -69,3 +60,12 @@ pub fn ParallelHasher(comptime Hasher: type) type { const Self = @This(); }; } + +const std = @import("std"); +const assert = std.debug.assert; +const fs = std.fs; +const mem = std.mem; + +const Allocator = mem.Allocator; +const ThreadPool = std.Thread.Pool; +const WaitGroup = std.Thread.WaitGroup; diff --git a/src/link/MachO/load_commands.zig b/src/link/MachO/load_commands.zig index c980a764a2..b548bee2fc 100644 --- a/src/link/MachO/load_commands.zig +++ b/src/link/MachO/load_commands.zig @@ -1,13 +1,3 @@ -const std = @import("std"); -const assert = std.debug.assert; -const link = @import("../../link.zig"); -const log = std.log.scoped(.link); -const macho = std.macho; -const mem = std.mem; - -const Allocator = mem.Allocator; -const Dylib = @import("Dylib.zig"); - /// Default implicit entrypoint symbol name. pub const default_entry_point: []const u8 = "_main"; @@ -374,3 +364,13 @@ test "parseSdkVersion" { try expect(parseSdkVersion("11") == null); } + +const std = @import("std"); +const assert = std.debug.assert; +const link = @import("../../link.zig"); +const log = std.log.scoped(.link); +const macho = std.macho; +const mem = std.mem; + +const Allocator = mem.Allocator; +const Dylib = @import("Dylib.zig"); diff --git a/src/link/MachO/stubs.zig b/src/link/MachO/stubs.zig index 077d6c9303..5478dd15f8 100644 --- a/src/link/MachO/stubs.zig +++ b/src/link/MachO/stubs.zig @@ -1,8 +1,3 @@ -const std = @import("std"); -const aarch64 = @import("../../arch/aarch64/bits.zig"); - -const Relocation = @import("Relocation.zig"); - pub inline fn stubHelperPreambleSize(cpu_arch: std.Target.Cpu.Arch) u8 { return switch (cpu_arch) { .x86_64 => 15, @@ -167,3 +162,8 @@ pub fn writeStubCode(args: struct { else => unreachable, } } + +const std = @import("std"); +const aarch64 = @import("../../arch/aarch64/bits.zig"); + +const Relocation = @import("Relocation.zig"); diff --git a/src/link/MachO/thunks.zig b/src/link/MachO/thunks.zig index c5debcc1fa..726fbdf2a6 100644 --- a/src/link/MachO/thunks.zig +++ b/src/link/MachO/thunks.zig @@ -5,22 +5,6 @@ //! The algorithm works pessimistically and assumes that any reference to an Atom in //! another output section is out of range. -const std = @import("std"); -const assert = std.debug.assert; -const log = std.log.scoped(.thunks); -const macho = std.macho; -const math = std.math; -const mem = std.mem; - -const aarch64 = @import("../../arch/aarch64/bits.zig"); - -const Allocator = mem.Allocator; -const Atom = @import("Atom.zig"); -const MachO = @import("../MachO.zig"); -const Relocation = @import("Relocation.zig"); -const SymbolWithLoc = MachO.SymbolWithLoc; -const Zld = @import("zld.zig").Zld; - /// Branch instruction has 26 bits immediate but 4 byte aligned. const jump_bits = @bitSizeOf(i28); @@ -74,18 +58,18 @@ pub const Thunk = struct { return @alignOf(u32); } - pub fn getTrampoline(self: Thunk, zld: *Zld, tag: Tag, target: SymbolWithLoc) ?SymbolWithLoc { + pub fn getTrampoline(self: Thunk, macho_file: *MachO, tag: Tag, target: SymbolWithLoc) ?SymbolWithLoc { const atom_index = self.lookup.get(.{ .tag = tag, .target = target }) orelse return null; - return zld.getAtom(atom_index).getSymbolWithLoc(); + return macho_file.getAtom(atom_index).getSymbolWithLoc(); } }; -pub fn createThunks(zld: *Zld, sect_id: u8) !void { - const header = &zld.sections.items(.header)[sect_id]; +pub fn createThunks(macho_file: *MachO, sect_id: u8) !void { + const header = &macho_file.sections.items(.header)[sect_id]; if (header.size == 0) return; - const gpa = zld.gpa; - const first_atom_index = zld.sections.items(.first_atom_index)[sect_id].?; + const gpa = macho_file.base.allocator; + const first_atom_index = macho_file.sections.items(.first_atom_index)[sect_id].?; header.size = 0; header.@"align" = 0; @@ -95,8 +79,8 @@ pub fn createThunks(zld: *Zld, sect_id: u8) !void { { var atom_index = first_atom_index; while (true) { - const atom = zld.getAtom(atom_index); - const sym = zld.getSymbolPtr(atom.getSymbolWithLoc()); + const atom = macho_file.getAtom(atom_index); + const sym = macho_file.getSymbolPtr(atom.getSymbolWithLoc()); sym.n_value = 0; atom_count += 1; @@ -115,24 +99,24 @@ pub fn createThunks(zld: *Zld, sect_id: u8) !void { var offset: u64 = 0; while (true) { - const group_start_atom = zld.getAtom(group_start); + const group_start_atom = macho_file.getAtom(group_start); log.debug("GROUP START at {d}", .{group_start}); while (true) { - const atom = zld.getAtom(group_end); + const atom = macho_file.getAtom(group_end); offset = mem.alignForward(u64, offset, try math.powi(u32, 2, atom.alignment)); - const sym = zld.getSymbolPtr(atom.getSymbolWithLoc()); + const sym = macho_file.getSymbolPtr(atom.getSymbolWithLoc()); sym.n_value = offset; offset += atom.size; - zld.logAtom(group_end, log); + macho_file.logAtom(group_end, log); header.@"align" = @max(header.@"align", atom.alignment); allocated.putAssumeCapacityNoClobber(group_end, {}); - const group_start_sym = zld.getSymbol(group_start_atom.getSymbolWithLoc()); + const group_start_sym = macho_file.getSymbol(group_start_atom.getSymbolWithLoc()); if (offset - group_start_sym.n_value >= max_allowed_distance) break; if (atom.next_index) |next_index| { @@ -142,15 +126,15 @@ pub fn createThunks(zld: *Zld, sect_id: u8) !void { log.debug("GROUP END at {d}", .{group_end}); // Insert thunk at group_end - const thunk_index = @as(u32, @intCast(zld.thunks.items.len)); - try zld.thunks.append(gpa, .{ .start_index = undefined, .len = 0 }); + const thunk_index = @as(u32, @intCast(macho_file.thunks.items.len)); + try macho_file.thunks.append(gpa, .{ .start_index = undefined, .len = 0 }); // Scan relocs in the group and create trampolines for any unreachable callsite. var atom_index = group_start; while (true) { - const atom = zld.getAtom(atom_index); + const atom = macho_file.getAtom(atom_index); try scanRelocs( - zld, + macho_file, atom_index, allocated, thunk_index, @@ -165,19 +149,19 @@ pub fn createThunks(zld: *Zld, sect_id: u8) !void { } offset = mem.alignForward(u64, offset, Thunk.getAlignment()); - allocateThunk(zld, thunk_index, offset, header); - offset += zld.thunks.items[thunk_index].getSize(); + allocateThunk(macho_file, thunk_index, offset, header); + offset += macho_file.thunks.items[thunk_index].getSize(); - const thunk = zld.thunks.items[thunk_index]; + const thunk = macho_file.thunks.items[thunk_index]; if (thunk.len == 0) { - const group_end_atom = zld.getAtom(group_end); + const group_end_atom = macho_file.getAtom(group_end); if (group_end_atom.next_index) |next_index| { group_start = next_index; group_end = next_index; } else break; } else { const thunk_end_atom_index = thunk.getEndAtomIndex(); - const thunk_end_atom = zld.getAtom(thunk_end_atom_index); + const thunk_end_atom = macho_file.getAtom(thunk_end_atom_index); if (thunk_end_atom.next_index) |next_index| { group_start = next_index; group_end = next_index; @@ -189,12 +173,12 @@ pub fn createThunks(zld: *Zld, sect_id: u8) !void { } fn allocateThunk( - zld: *Zld, + macho_file: *MachO, thunk_index: Thunk.Index, base_offset: u64, header: *macho.section_64, ) void { - const thunk = zld.thunks.items[thunk_index]; + const thunk = macho_file.thunks.items[thunk_index]; if (thunk.len == 0) return; const first_atom_index = thunk.getStartAtomIndex(); @@ -203,14 +187,14 @@ fn allocateThunk( var atom_index = first_atom_index; var offset = base_offset; while (true) { - const atom = zld.getAtom(atom_index); + const atom = macho_file.getAtom(atom_index); offset = mem.alignForward(u64, offset, Thunk.getAlignment()); - const sym = zld.getSymbolPtr(atom.getSymbolWithLoc()); + const sym = macho_file.getSymbolPtr(atom.getSymbolWithLoc()); sym.n_value = offset; offset += atom.size; - zld.logAtom(atom_index, log); + macho_file.logAtom(atom_index, log); header.@"align" = @max(header.@"align", atom.alignment); @@ -223,69 +207,69 @@ fn allocateThunk( } fn scanRelocs( - zld: *Zld, + macho_file: *MachO, atom_index: Atom.Index, allocated: std.AutoHashMap(Atom.Index, void), thunk_index: Thunk.Index, group_end: Atom.Index, ) !void { - const atom = zld.getAtom(atom_index); - const object = zld.objects.items[atom.getFile().?]; + const atom = macho_file.getAtom(atom_index); + const object = macho_file.objects.items[atom.getFile().?]; const base_offset = if (object.getSourceSymbol(atom.sym_index)) |source_sym| blk: { const source_sect = object.getSourceSection(source_sym.n_sect - 1); break :blk @as(i32, @intCast(source_sym.n_value - source_sect.addr)); } else 0; - const code = Atom.getAtomCode(zld, atom_index); - const relocs = Atom.getAtomRelocs(zld, atom_index); - const ctx = Atom.getRelocContext(zld, atom_index); + const code = Atom.getAtomCode(macho_file, atom_index); + const relocs = Atom.getAtomRelocs(macho_file, atom_index); + const ctx = Atom.getRelocContext(macho_file, atom_index); for (relocs) |rel| { if (!relocNeedsThunk(rel)) continue; - const target = Atom.parseRelocTarget(zld, .{ + const target = Atom.parseRelocTarget(macho_file, .{ .object_id = atom.getFile().?, .rel = rel, .code = code, .base_offset = ctx.base_offset, .base_addr = ctx.base_addr, }); - if (isReachable(zld, atom_index, rel, base_offset, target, allocated)) continue; + if (isReachable(macho_file, atom_index, rel, base_offset, target, allocated)) continue; log.debug("{x}: source = {s}@{x}, target = {s}@{x} unreachable", .{ rel.r_address - base_offset, - zld.getSymbolName(atom.getSymbolWithLoc()), - zld.getSymbol(atom.getSymbolWithLoc()).n_value, - zld.getSymbolName(target), - zld.getSymbol(target).n_value, + macho_file.getSymbolName(atom.getSymbolWithLoc()), + macho_file.getSymbol(atom.getSymbolWithLoc()).n_value, + macho_file.getSymbolName(target), + macho_file.getSymbol(target).n_value, }); - const gpa = zld.gpa; - const target_sym = zld.getSymbol(target); - const thunk = &zld.thunks.items[thunk_index]; + const gpa = macho_file.base.allocator; + const target_sym = macho_file.getSymbol(target); + const thunk = &macho_file.thunks.items[thunk_index]; const tag: Thunk.Tag = if (target_sym.undf()) .stub else .atom; const thunk_target: Thunk.Target = .{ .tag = tag, .target = target }; const gop = try thunk.lookup.getOrPut(gpa, thunk_target); if (!gop.found_existing) { - gop.value_ptr.* = try pushThunkAtom(zld, thunk, group_end); + gop.value_ptr.* = try pushThunkAtom(macho_file, thunk, group_end); try thunk.targets.append(gpa, thunk_target); } - try zld.thunk_table.put(gpa, atom_index, thunk_index); + try macho_file.thunk_table.put(gpa, atom_index, thunk_index); } } -fn pushThunkAtom(zld: *Zld, thunk: *Thunk, group_end: Atom.Index) !Atom.Index { - const thunk_atom_index = try createThunkAtom(zld); +fn pushThunkAtom(macho_file: *MachO, thunk: *Thunk, group_end: Atom.Index) !Atom.Index { + const thunk_atom_index = try createThunkAtom(macho_file); - const thunk_atom = zld.getAtomPtr(thunk_atom_index); + const thunk_atom = macho_file.getAtomPtr(thunk_atom_index); const end_atom_index = if (thunk.len == 0) group_end else thunk.getEndAtomIndex(); - const end_atom = zld.getAtomPtr(end_atom_index); + const end_atom = macho_file.getAtomPtr(end_atom_index); if (end_atom.next_index) |first_after_index| { - const first_after_atom = zld.getAtomPtr(first_after_index); + const first_after_atom = macho_file.getAtomPtr(first_after_index); first_after_atom.prev_index = thunk_atom_index; thunk_atom.next_index = first_after_index; } @@ -308,58 +292,58 @@ inline fn relocNeedsThunk(rel: macho.relocation_info) bool { } fn isReachable( - zld: *Zld, + macho_file: *MachO, atom_index: Atom.Index, rel: macho.relocation_info, base_offset: i32, target: SymbolWithLoc, allocated: std.AutoHashMap(Atom.Index, void), ) bool { - if (zld.stubs_table.lookup.contains(target)) return false; + if (macho_file.stub_table.lookup.contains(target)) return false; - const source_atom = zld.getAtom(atom_index); - const source_sym = zld.getSymbol(source_atom.getSymbolWithLoc()); + const source_atom = macho_file.getAtom(atom_index); + const source_sym = macho_file.getSymbol(source_atom.getSymbolWithLoc()); - const target_object = zld.objects.items[target.getFile().?]; + const target_object = macho_file.objects.items[target.getFile().?]; const target_atom_index = target_object.getAtomIndexForSymbol(target.sym_index).?; - const target_atom = zld.getAtom(target_atom_index); - const target_sym = zld.getSymbol(target_atom.getSymbolWithLoc()); + const target_atom = macho_file.getAtom(target_atom_index); + const target_sym = macho_file.getSymbol(target_atom.getSymbolWithLoc()); if (source_sym.n_sect != target_sym.n_sect) return false; if (!allocated.contains(target_atom_index)) return false; const source_addr = source_sym.n_value + @as(u32, @intCast(rel.r_address - base_offset)); - const target_addr = if (Atom.relocRequiresGot(zld, rel)) - zld.getGotEntryAddress(target).? + const target_addr = if (Atom.relocRequiresGot(macho_file, rel)) + macho_file.getGotEntryAddress(target).? else - Atom.getRelocTargetAddress(zld, target, false) catch unreachable; + Atom.getRelocTargetAddress(macho_file, target, false) catch unreachable; _ = Relocation.calcPcRelativeDisplacementArm64(source_addr, target_addr) catch return false; return true; } -fn createThunkAtom(zld: *Zld) !Atom.Index { - const sym_index = try zld.allocateSymbol(); - const atom_index = try zld.createAtom(sym_index, .{ .size = @sizeOf(u32) * 3, .alignment = 2 }); - const sym = zld.getSymbolPtr(.{ .sym_index = sym_index }); +fn createThunkAtom(macho_file: *MachO) !Atom.Index { + const sym_index = try macho_file.allocateSymbol(); + const atom_index = try macho_file.createAtom(sym_index, .{ .size = @sizeOf(u32) * 3, .alignment = 2 }); + const sym = macho_file.getSymbolPtr(.{ .sym_index = sym_index }); sym.n_type = macho.N_SECT; - sym.n_sect = zld.text_section_index.? + 1; + sym.n_sect = macho_file.text_section_index.? + 1; return atom_index; } -pub fn writeThunkCode(zld: *Zld, thunk: *const Thunk, writer: anytype) !void { +pub fn writeThunkCode(macho_file: *MachO, thunk: *const Thunk, writer: anytype) !void { const slice = thunk.targets.slice(); for (thunk.getStartAtomIndex()..thunk.getEndAtomIndex(), 0..) |atom_index, target_index| { - const atom = zld.getAtom(@intCast(atom_index)); - const sym = zld.getSymbol(atom.getSymbolWithLoc()); + const atom = macho_file.getAtom(@intCast(atom_index)); + const sym = macho_file.getSymbol(atom.getSymbolWithLoc()); const source_addr = sym.n_value; const tag = slice.items(.tag)[target_index]; const target = slice.items(.target)[target_index]; const target_addr = switch (tag) { - .stub => zld.getStubsEntryAddress(target).?, - .atom => zld.getSymbol(target).n_value, + .stub => macho_file.getStubsEntryAddress(target).?, + .atom => macho_file.getSymbol(target).n_value, }; const pages = Relocation.calcNumberOfPages(source_addr, target_addr); try writer.writeIntLittle(u32, aarch64.Instruction.adrp(.x16, pages).toU32()); @@ -368,3 +352,18 @@ pub fn writeThunkCode(zld: *Zld, thunk: *const Thunk, writer: anytype) !void { try writer.writeIntLittle(u32, aarch64.Instruction.br(.x16).toU32()); } } + +const std = @import("std"); +const assert = std.debug.assert; +const log = std.log.scoped(.thunks); +const macho = std.macho; +const math = std.math; +const mem = std.mem; + +const aarch64 = @import("../../arch/aarch64/bits.zig"); + +const Allocator = mem.Allocator; +const Atom = @import("Atom.zig"); +const MachO = @import("../MachO.zig"); +const Relocation = @import("Relocation.zig"); +const SymbolWithLoc = MachO.SymbolWithLoc; diff --git a/src/link/MachO/uuid.zig b/src/link/MachO/uuid.zig index 8cef0693bf..bfd602d62a 100644 --- a/src/link/MachO/uuid.zig +++ b/src/link/MachO/uuid.zig @@ -1,12 +1,3 @@ -const std = @import("std"); -const fs = std.fs; -const mem = std.mem; - -const Allocator = mem.Allocator; -const Compilation = @import("../../Compilation.zig"); -const Md5 = std.crypto.hash.Md5; -const Hasher = @import("hasher.zig").ParallelHasher; - /// Calculates Md5 hash of each chunk in parallel and then hashes all Md5 hashes to produce /// the final digest. /// While this is NOT a correct MD5 hash of the contents, this methodology is used by LLVM/LLD @@ -43,3 +34,12 @@ inline fn conform(out: *[Md5.digest_length]u8) void { out[6] = (out[6] & 0x0F) | (3 << 4); out[8] = (out[8] & 0x3F) | 0x80; } + +const std = @import("std"); +const fs = std.fs; +const mem = std.mem; + +const Allocator = mem.Allocator; +const Compilation = @import("../../Compilation.zig"); +const Md5 = std.crypto.hash.Md5; +const Hasher = @import("hasher.zig").ParallelHasher; diff --git a/src/link/MachO/zld.zig b/src/link/MachO/zld.zig index 28a8e9b8a8..86bf14bdb2 100644 --- a/src/link/MachO/zld.zig +++ b/src/link/MachO/zld.zig @@ -1,2584 +1,8 @@ -const std = @import("std"); -const build_options = @import("build_options"); -const assert = std.debug.assert; -const dwarf = std.dwarf; -const fs = std.fs; -const log = std.log.scoped(.link); -const macho = std.macho; -const math = std.math; -const mem = std.mem; - -const aarch64 = @import("../../arch/aarch64/bits.zig"); -const calcUuid = @import("uuid.zig").calcUuid; -const dead_strip = @import("dead_strip.zig"); -const eh_frame = @import("eh_frame.zig"); -const fat = @import("fat.zig"); -const link = @import("../../link.zig"); -const load_commands = @import("load_commands.zig"); -const stubs = @import("stubs.zig"); -const thunks = @import("thunks.zig"); -const trace = @import("../../tracy.zig").trace; - -const Allocator = mem.Allocator; -const Archive = @import("Archive.zig"); -const Atom = @import("Atom.zig"); -const Cache = std.Build.Cache; -const CodeSignature = @import("CodeSignature.zig"); -const Compilation = @import("../../Compilation.zig"); -const DwarfInfo = @import("DwarfInfo.zig"); -const Dylib = @import("Dylib.zig"); -const MachO = @import("../MachO.zig"); -const Md5 = std.crypto.hash.Md5; -const LibStub = @import("../tapi.zig").LibStub; -const Object = @import("Object.zig"); -const Section = MachO.Section; -const StringTable = @import("../strtab.zig").StringTable; -const SymbolWithLoc = MachO.SymbolWithLoc; -const TableSection = @import("../table_section.zig").TableSection; -const Trie = @import("Trie.zig"); -const UnwindInfo = @import("UnwindInfo.zig"); - -const Bind = @import("dyld_info/bind.zig").Bind(*const Zld, SymbolWithLoc); -const LazyBind = @import("dyld_info/bind.zig").LazyBind(*const Zld, SymbolWithLoc); -const Rebase = @import("dyld_info/Rebase.zig"); - -pub const Zld = struct { - gpa: Allocator, - file: fs.File, - options: *const link.Options, - - dyld_info_cmd: macho.dyld_info_command = .{}, - symtab_cmd: macho.symtab_command = .{}, - dysymtab_cmd: macho.dysymtab_command = .{}, - function_starts_cmd: macho.linkedit_data_command = .{ .cmd = .FUNCTION_STARTS }, - data_in_code_cmd: macho.linkedit_data_command = .{ .cmd = .DATA_IN_CODE }, - uuid_cmd: macho.uuid_command = .{ - .uuid = [_]u8{0} ** 16, - }, - codesig_cmd: macho.linkedit_data_command = .{ .cmd = .CODE_SIGNATURE }, - - objects: std.ArrayListUnmanaged(Object) = .{}, - archives: std.ArrayListUnmanaged(Archive) = .{}, - dylibs: std.ArrayListUnmanaged(Dylib) = .{}, - dylibs_map: std.StringHashMapUnmanaged(u16) = .{}, - referenced_dylibs: std.AutoArrayHashMapUnmanaged(u16, void) = .{}, - - segments: std.ArrayListUnmanaged(macho.segment_command_64) = .{}, - sections: std.MultiArrayList(Section) = .{}, - - pagezero_segment_cmd_index: ?u8 = null, - header_segment_cmd_index: ?u8 = null, - text_segment_cmd_index: ?u8 = null, - data_const_segment_cmd_index: ?u8 = null, - data_segment_cmd_index: ?u8 = null, - linkedit_segment_cmd_index: ?u8 = null, - - text_section_index: ?u8 = null, - data_const_section_index: ?u8 = null, - data_section_index: ?u8 = null, - bss_section_index: ?u8 = null, - thread_vars_section_index: ?u8 = null, - thread_data_section_index: ?u8 = null, - thread_bss_section_index: ?u8 = null, - eh_frame_section_index: ?u8 = null, - unwind_info_section_index: ?u8 = null, - got_section_index: ?u8 = null, - tlv_ptr_section_index: ?u8 = null, - stubs_section_index: ?u8 = null, - stub_helper_section_index: ?u8 = null, - la_symbol_ptr_section_index: ?u8 = null, - - locals: std.ArrayListUnmanaged(macho.nlist_64) = .{}, - globals: std.ArrayListUnmanaged(SymbolWithLoc) = .{}, - resolver: std.StringHashMapUnmanaged(u32) = .{}, - unresolved: std.AutoArrayHashMapUnmanaged(u32, void) = .{}, - - locals_free_list: std.ArrayListUnmanaged(u32) = .{}, - globals_free_list: std.ArrayListUnmanaged(u32) = .{}, - - entry_index: ?u32 = null, - dyld_stub_binder_index: ?u32 = null, - dyld_private_atom_index: ?Atom.Index = null, - - strtab: StringTable(.strtab) = .{}, - - tlv_ptr_table: TableSection(SymbolWithLoc) = .{}, - got_table: TableSection(SymbolWithLoc) = .{}, - stubs_table: TableSection(SymbolWithLoc) = .{}, - - thunk_table: std.AutoHashMapUnmanaged(Atom.Index, thunks.Thunk.Index) = .{}, - thunks: std.ArrayListUnmanaged(thunks.Thunk) = .{}, - - atoms: std.ArrayListUnmanaged(Atom) = .{}, - - pub fn addAtomToSection(self: *Zld, atom_index: Atom.Index) void { - const atom = self.getAtomPtr(atom_index); - const sym = self.getSymbol(atom.getSymbolWithLoc()); - var section = self.sections.get(sym.n_sect - 1); - if (section.header.size > 0) { - const last_atom = self.getAtomPtr(section.last_atom_index.?); - last_atom.next_index = atom_index; - atom.prev_index = section.last_atom_index; - } else { - section.first_atom_index = atom_index; - } - section.last_atom_index = atom_index; - section.header.size += atom.size; - self.sections.set(sym.n_sect - 1, section); - } - - const CreateAtomOpts = struct { - size: u64 = 0, - alignment: u32 = 0, - }; - - pub fn createAtom(self: *Zld, sym_index: u32, opts: CreateAtomOpts) !Atom.Index { - const gpa = self.gpa; - const index = @as(Atom.Index, @intCast(self.atoms.items.len)); - const atom = try self.atoms.addOne(gpa); - atom.* = .{}; - atom.sym_index = sym_index; - atom.size = opts.size; - atom.alignment = opts.alignment; - log.debug("creating ATOM(%{d}) at index {d}", .{ sym_index, index }); - return index; - } - - fn createDyldPrivateAtom(self: *Zld) !void { - const sym_index = try self.allocateSymbol(); - const atom_index = try self.createAtom(sym_index, .{ .size = @sizeOf(u64), .alignment = 3 }); - const sym = self.getSymbolPtr(.{ .sym_index = sym_index }); - sym.n_type = macho.N_SECT; - - if (self.data_section_index == null) { - self.data_section_index = try MachO.initSection(self.gpa, self, "__DATA", "__data", .{}); - } - sym.n_sect = self.data_section_index.? + 1; - self.dyld_private_atom_index = atom_index; - - self.addAtomToSection(atom_index); - } - - fn createTentativeDefAtoms(self: *Zld) !void { - const gpa = self.gpa; - - for (self.globals.items) |global| { - const sym = self.getSymbolPtr(global); - if (!sym.tentative()) continue; - if (sym.n_desc == MachO.N_DEAD) continue; - - log.debug("creating tentative definition for ATOM(%{d}, '{s}') in object({?})", .{ - global.sym_index, self.getSymbolName(global), global.file, - }); - - // Convert any tentative definition into a regular symbol and allocate - // text blocks for each tentative definition. - const size = sym.n_value; - const alignment = (sym.n_desc >> 8) & 0x0f; - - if (self.bss_section_index == null) { - self.bss_section_index = try MachO.initSection(gpa, self, "__DATA", "__bss", .{ - .flags = macho.S_ZEROFILL, - }); - } - - sym.* = .{ - .n_strx = sym.n_strx, - .n_type = macho.N_SECT | macho.N_EXT, - .n_sect = self.bss_section_index.? + 1, - .n_desc = 0, - .n_value = 0, - }; - - const atom_index = try self.createAtom(global.sym_index, .{ - .size = size, - .alignment = alignment, - }); - const atom = self.getAtomPtr(atom_index); - atom.file = global.file; - - self.addAtomToSection(atom_index); - - assert(global.getFile() != null); - const object = &self.objects.items[global.getFile().?]; - try object.atoms.append(gpa, atom_index); - object.atom_by_index_table[global.sym_index] = atom_index; - } - } - - fn addUndefined(self: *Zld, name: []const u8) !u32 { - const gop = try self.getOrPutGlobalPtr(name); - const global_index = self.getGlobalIndex(name).?; - - if (gop.found_existing) return global_index; - - const sym_index = try self.allocateSymbol(); - const sym_loc = SymbolWithLoc{ .sym_index = sym_index }; - gop.value_ptr.* = sym_loc; - - const sym = self.getSymbolPtr(sym_loc); - sym.n_strx = try self.strtab.insert(self.gpa, name); - sym.n_type = macho.N_UNDF; - - try self.unresolved.putNoClobber(self.gpa, global_index, {}); - - return global_index; - } - - fn resolveSymbols(self: *Zld) !void { - // We add the specified entrypoint as the first unresolved symbols so that - // we search for it in libraries should there be no object files specified - // on the linker line. - if (self.options.output_mode == .Exe) { - const entry_name = self.options.entry orelse load_commands.default_entry_point; - _ = try self.addUndefined(entry_name); - } - - // Force resolution of any symbols requested by the user. - for (self.options.force_undefined_symbols.keys()) |sym_name| { - _ = try self.addUndefined(sym_name); - } - - for (self.objects.items, 0..) |_, object_id| { - try self.resolveSymbolsInObject(@as(u32, @intCast(object_id))); - } - - try self.resolveSymbolsInArchives(); - - // Finally, force resolution of dyld_stub_binder if there are imports - // requested. - if (self.unresolved.count() > 0) { - self.dyld_stub_binder_index = try self.addUndefined("dyld_stub_binder"); - } - - try self.resolveSymbolsInDylibs(); - - try self.createMhExecuteHeaderSymbol(); - try self.createDsoHandleSymbol(); - try self.resolveSymbolsAtLoading(); - } - - fn resolveGlobalSymbol(self: *Zld, current: SymbolWithLoc) !void { - const gpa = self.gpa; - const sym = self.getSymbol(current); - const sym_name = self.getSymbolName(current); - - const gop = try self.getOrPutGlobalPtr(sym_name); - if (!gop.found_existing) { - gop.value_ptr.* = current; - if (sym.undf() and !sym.tentative()) { - try self.unresolved.putNoClobber(gpa, self.getGlobalIndex(sym_name).?, {}); - } - return; - } - const global_index = self.getGlobalIndex(sym_name).?; - const global = gop.value_ptr.*; - const global_sym = self.getSymbol(global); - - // Cases to consider: sym vs global_sym - // 1. strong(sym) and strong(global_sym) => error - // 2. strong(sym) and weak(global_sym) => sym - // 3. strong(sym) and tentative(global_sym) => sym - // 4. strong(sym) and undf(global_sym) => sym - // 5. weak(sym) and strong(global_sym) => global_sym - // 6. weak(sym) and tentative(global_sym) => sym - // 7. weak(sym) and undf(global_sym) => sym - // 8. tentative(sym) and strong(global_sym) => global_sym - // 9. tentative(sym) and weak(global_sym) => global_sym - // 10. tentative(sym) and tentative(global_sym) => pick larger - // 11. tentative(sym) and undf(global_sym) => sym - // 12. undf(sym) and * => global_sym - // - // Reduces to: - // 1. strong(sym) and strong(global_sym) => error - // 2. * and strong(global_sym) => global_sym - // 3. weak(sym) and weak(global_sym) => global_sym - // 4. tentative(sym) and tentative(global_sym) => pick larger - // 5. undf(sym) and * => global_sym - // 6. else => sym - - const sym_is_strong = sym.sect() and !(sym.weakDef() or sym.pext()); - const global_is_strong = global_sym.sect() and !(global_sym.weakDef() or global_sym.pext()); - const sym_is_weak = sym.sect() and (sym.weakDef() or sym.pext()); - const global_is_weak = global_sym.sect() and (global_sym.weakDef() or global_sym.pext()); - - if (sym_is_strong and global_is_strong) { - log.err("symbol '{s}' defined multiple times", .{sym_name}); - if (global.getFile()) |file| { - log.err(" first definition in '{s}'", .{self.objects.items[file].name}); - } - if (current.getFile()) |file| { - log.err(" next definition in '{s}'", .{self.objects.items[file].name}); - } - return error.MultipleSymbolDefinitions; - } - - if (current.getFile()) |file| { - const object = &self.objects.items[file]; - object.globals_lookup[current.sym_index] = global_index; - } - - if (global_is_strong) return; - if (sym_is_weak and global_is_weak) return; - if (sym.tentative() and global_sym.tentative()) { - if (global_sym.n_value >= sym.n_value) return; - } - if (sym.undf() and !sym.tentative()) return; - - if (global.getFile()) |file| { - const global_object = &self.objects.items[file]; - global_object.globals_lookup[global.sym_index] = global_index; - } - _ = self.unresolved.swapRemove(global_index); - - gop.value_ptr.* = current; - } - - fn resolveSymbolsInObject(self: *Zld, object_id: u32) !void { - const object = &self.objects.items[object_id]; - const in_symtab = object.in_symtab orelse return; - - log.debug("resolving symbols in '{s}'", .{object.name}); - - var sym_index: u32 = 0; - while (sym_index < in_symtab.len) : (sym_index += 1) { - const sym = &object.symtab[sym_index]; - const sym_name = object.getSymbolName(sym_index); - - if (sym.stab()) { - log.err("unhandled symbol type: stab", .{}); - log.err(" symbol '{s}'", .{sym_name}); - log.err(" first definition in '{s}'", .{object.name}); - return error.UnhandledSymbolType; - } - - if (sym.indr()) { - log.err("unhandled symbol type: indirect", .{}); - log.err(" symbol '{s}'", .{sym_name}); - log.err(" first definition in '{s}'", .{object.name}); - return error.UnhandledSymbolType; - } - - if (sym.abs()) { - log.err("unhandled symbol type: absolute", .{}); - log.err(" symbol '{s}'", .{sym_name}); - log.err(" first definition in '{s}'", .{object.name}); - return error.UnhandledSymbolType; - } - - if (sym.sect() and !sym.ext()) { - log.debug("symbol '{s}' local to object {s}; skipping...", .{ - sym_name, - object.name, - }); - continue; - } - - try self.resolveGlobalSymbol(.{ .sym_index = sym_index, .file = object_id + 1 }); - } - } - - fn resolveSymbolsInArchives(self: *Zld) !void { - if (self.archives.items.len == 0) return; - - const gpa = self.gpa; - - var next_sym: usize = 0; - loop: while (next_sym < self.unresolved.count()) { - const global = self.globals.items[self.unresolved.keys()[next_sym]]; - const sym_name = self.getSymbolName(global); - - for (self.archives.items) |archive| { - // Check if the entry exists in a static archive. - const offsets = archive.toc.get(sym_name) orelse { - // No hit. - continue; - }; - assert(offsets.items.len > 0); - - const object_id = @as(u16, @intCast(self.objects.items.len)); - const object = try archive.parseObject(gpa, offsets.items[0]); - try self.objects.append(gpa, object); - try self.resolveSymbolsInObject(object_id); - - continue :loop; - } - - next_sym += 1; - } - } - - fn resolveSymbolsInDylibs(self: *Zld) !void { - if (self.dylibs.items.len == 0) return; - - var next_sym: usize = 0; - loop: while (next_sym < self.unresolved.count()) { - const global_index = self.unresolved.keys()[next_sym]; - const global = self.globals.items[global_index]; - const sym = self.getSymbolPtr(global); - const sym_name = self.getSymbolName(global); - - for (self.dylibs.items, 0..) |dylib, id| { - if (!dylib.symbols.contains(sym_name)) continue; - - const dylib_id = @as(u16, @intCast(id)); - if (!self.referenced_dylibs.contains(dylib_id)) { - try self.referenced_dylibs.putNoClobber(self.gpa, dylib_id, {}); - } - - const ordinal = self.referenced_dylibs.getIndex(dylib_id) orelse unreachable; - sym.n_type |= macho.N_EXT; - sym.n_desc = @as(u16, @intCast(ordinal + 1)) * macho.N_SYMBOL_RESOLVER; - - if (dylib.weak) { - sym.n_desc |= macho.N_WEAK_REF; - } - - assert(self.unresolved.swapRemove(global_index)); - continue :loop; - } - - next_sym += 1; - } - } - - fn resolveSymbolsAtLoading(self: *Zld) !void { - const is_lib = self.options.output_mode == .Lib; - const is_dyn_lib = self.options.link_mode == .Dynamic and is_lib; - const allow_undef = is_dyn_lib and (self.options.allow_shlib_undefined orelse false); - - var next_sym: usize = 0; - while (next_sym < self.unresolved.count()) { - const global_index = self.unresolved.keys()[next_sym]; - const global = self.globals.items[global_index]; - const sym = self.getSymbolPtr(global); - - if (sym.discarded()) { - sym.* = .{ - .n_strx = 0, - .n_type = macho.N_UNDF, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }; - _ = self.unresolved.swapRemove(global_index); - continue; - } else if (allow_undef) { - const n_desc = @as( - u16, - @bitCast(macho.BIND_SPECIAL_DYLIB_FLAT_LOOKUP * @as(i16, @intCast(macho.N_SYMBOL_RESOLVER))), - ); - sym.n_type = macho.N_EXT; - sym.n_desc = n_desc; - _ = self.unresolved.swapRemove(global_index); - continue; - } - - next_sym += 1; - } - } - - fn createMhExecuteHeaderSymbol(self: *Zld) !void { - if (self.options.output_mode != .Exe) return; - - const gpa = self.gpa; - const sym_index = try self.allocateSymbol(); - const sym_loc = SymbolWithLoc{ .sym_index = sym_index }; - const sym = self.getSymbolPtr(sym_loc); - sym.* = .{ - .n_strx = try self.strtab.insert(gpa, "__mh_execute_header"), - .n_type = macho.N_SECT | macho.N_EXT, - .n_sect = 0, - .n_desc = macho.REFERENCED_DYNAMICALLY, - .n_value = 0, - }; - - const gop = try self.getOrPutGlobalPtr("__mh_execute_header"); - if (gop.found_existing) { - const global = gop.value_ptr.*; - if (global.getFile()) |file| { - const global_object = &self.objects.items[file]; - global_object.globals_lookup[global.sym_index] = self.getGlobalIndex("__mh_execute_header").?; - } - } - gop.value_ptr.* = sym_loc; - } - - fn createDsoHandleSymbol(self: *Zld) !void { - const global = self.getGlobalPtr("___dso_handle") orelse return; - if (!self.getSymbol(global.*).undf()) return; - - const sym_index = try self.allocateSymbol(); - const sym_loc = SymbolWithLoc{ .sym_index = sym_index }; - const sym = self.getSymbolPtr(sym_loc); - sym.* = .{ - .n_strx = try self.strtab.insert(self.gpa, "___dso_handle"), - .n_type = macho.N_SECT | macho.N_EXT, - .n_sect = 0, - .n_desc = macho.N_WEAK_DEF, - .n_value = 0, - }; - const global_index = self.getGlobalIndex("___dso_handle").?; - if (global.getFile()) |file| { - const global_object = &self.objects.items[file]; - global_object.globals_lookup[global.sym_index] = global_index; - } - global.* = sym_loc; - _ = self.unresolved.swapRemove(global_index); - } - - pub fn deinit(self: *Zld) void { - const gpa = self.gpa; - - self.tlv_ptr_table.deinit(gpa); - self.got_table.deinit(gpa); - self.stubs_table.deinit(gpa); - self.thunk_table.deinit(gpa); - - for (self.thunks.items) |*thunk| { - thunk.deinit(gpa); - } - self.thunks.deinit(gpa); - - self.strtab.deinit(gpa); - self.locals.deinit(gpa); - self.globals.deinit(gpa); - self.resolver.deinit(gpa); - self.unresolved.deinit(gpa); - self.locals_free_list.deinit(gpa); - self.globals_free_list.deinit(gpa); - - for (self.objects.items) |*object| { - object.deinit(gpa); - } - self.objects.deinit(gpa); - for (self.archives.items) |*archive| { - archive.deinit(gpa); - } - self.archives.deinit(gpa); - for (self.dylibs.items) |*dylib| { - dylib.deinit(gpa); - } - self.dylibs.deinit(gpa); - self.dylibs_map.deinit(gpa); - self.referenced_dylibs.deinit(gpa); - - self.segments.deinit(gpa); - self.sections.deinit(gpa); - self.atoms.deinit(gpa); - } - - fn createSegments(self: *Zld) !void { - const pagezero_vmsize = self.options.pagezero_size orelse MachO.default_pagezero_vmsize; - const page_size = MachO.getPageSize(self.options.target.cpu.arch); - const aligned_pagezero_vmsize = mem.alignBackward(u64, pagezero_vmsize, page_size); - if (self.options.output_mode != .Lib and aligned_pagezero_vmsize > 0) { - if (aligned_pagezero_vmsize != pagezero_vmsize) { - log.warn("requested __PAGEZERO size (0x{x}) is not page aligned", .{pagezero_vmsize}); - log.warn(" rounding down to 0x{x}", .{aligned_pagezero_vmsize}); - } - self.pagezero_segment_cmd_index = @intCast(self.segments.items.len); - try self.segments.append(self.gpa, .{ - .cmdsize = @sizeOf(macho.segment_command_64), - .segname = makeStaticString("__PAGEZERO"), - .vmsize = aligned_pagezero_vmsize, - }); - } - - // __TEXT segment is non-optional - { - const protection = MachO.getSegmentMemoryProtection("__TEXT"); - self.text_segment_cmd_index = @intCast(self.segments.items.len); - self.header_segment_cmd_index = self.text_segment_cmd_index.?; - try self.segments.append(self.gpa, .{ - .cmdsize = @sizeOf(macho.segment_command_64), - .segname = makeStaticString("__TEXT"), - .maxprot = protection, - .initprot = protection, - }); - } - - for (self.sections.items(.header), 0..) |header, sect_id| { - if (header.size == 0) continue; // empty section - - const segname = header.segName(); - const segment_id = self.getSegmentByName(segname) orelse blk: { - log.debug("creating segment '{s}'", .{segname}); - const segment_id = @as(u8, @intCast(self.segments.items.len)); - const protection = MachO.getSegmentMemoryProtection(segname); - try self.segments.append(self.gpa, .{ - .cmdsize = @sizeOf(macho.segment_command_64), - .segname = makeStaticString(segname), - .maxprot = protection, - .initprot = protection, - }); - break :blk segment_id; - }; - const segment = &self.segments.items[segment_id]; - segment.cmdsize += @sizeOf(macho.section_64); - segment.nsects += 1; - self.sections.items(.segment_index)[sect_id] = segment_id; - } - - if (self.getSegmentByName("__DATA_CONST")) |index| { - self.data_const_segment_cmd_index = index; - } - - if (self.getSegmentByName("__DATA")) |index| { - self.data_segment_cmd_index = index; - } - - // __LINKEDIT always comes last - { - const protection = MachO.getSegmentMemoryProtection("__LINKEDIT"); - self.linkedit_segment_cmd_index = @intCast(self.segments.items.len); - try self.segments.append(self.gpa, .{ - .cmdsize = @sizeOf(macho.segment_command_64), - .segname = makeStaticString("__LINKEDIT"), - .maxprot = protection, - .initprot = protection, - }); - } - } - - pub fn allocateSymbol(self: *Zld) !u32 { - try self.locals.ensureUnusedCapacity(self.gpa, 1); - log.debug(" (allocating symbol index {d})", .{self.locals.items.len}); - const index = @as(u32, @intCast(self.locals.items.len)); - _ = self.locals.addOneAssumeCapacity(); - self.locals.items[index] = .{ - .n_strx = 0, - .n_type = 0, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }; - return index; - } - - fn allocateGlobal(self: *Zld) !u32 { - try self.globals.ensureUnusedCapacity(self.gpa, 1); - - const index = blk: { - if (self.globals_free_list.popOrNull()) |index| { - log.debug(" (reusing global index {d})", .{index}); - break :blk index; - } else { - log.debug(" (allocating symbol index {d})", .{self.globals.items.len}); - const index = @as(u32, @intCast(self.globals.items.len)); - _ = self.globals.addOneAssumeCapacity(); - break :blk index; - } - }; - - self.globals.items[index] = .{ .sym_index = 0 }; - - return index; - } - - pub fn addGotEntry(self: *Zld, target: SymbolWithLoc) !void { - if (self.got_table.lookup.contains(target)) return; - _ = try self.got_table.allocateEntry(self.gpa, target); - if (self.got_section_index == null) { - self.got_section_index = try MachO.initSection(self.gpa, self, "__DATA_CONST", "__got", .{ - .flags = macho.S_NON_LAZY_SYMBOL_POINTERS, - }); - } - } - - pub fn addTlvPtrEntry(self: *Zld, target: SymbolWithLoc) !void { - if (self.tlv_ptr_table.lookup.contains(target)) return; - _ = try self.tlv_ptr_table.allocateEntry(self.gpa, target); - if (self.tlv_ptr_section_index == null) { - self.tlv_ptr_section_index = try MachO.initSection(self.gpa, self, "__DATA", "__thread_ptrs", .{ - .flags = macho.S_THREAD_LOCAL_VARIABLE_POINTERS, - }); - } - } - - pub fn addStubEntry(self: *Zld, target: SymbolWithLoc) !void { - if (self.stubs_table.lookup.contains(target)) return; - _ = try self.stubs_table.allocateEntry(self.gpa, target); - if (self.stubs_section_index == null) { - self.stubs_section_index = try MachO.initSection(self.gpa, self, "__TEXT", "__stubs", .{ - .flags = macho.S_SYMBOL_STUBS | - macho.S_ATTR_PURE_INSTRUCTIONS | - macho.S_ATTR_SOME_INSTRUCTIONS, - .reserved2 = stubs.stubSize(self.options.target.cpu.arch), - }); - self.stub_helper_section_index = try MachO.initSection(self.gpa, self, "__TEXT", "__stub_helper", .{ - .flags = macho.S_REGULAR | - macho.S_ATTR_PURE_INSTRUCTIONS | - macho.S_ATTR_SOME_INSTRUCTIONS, - }); - self.la_symbol_ptr_section_index = try MachO.initSection(self.gpa, self, "__DATA", "__la_symbol_ptr", .{ - .flags = macho.S_LAZY_SYMBOL_POINTERS, - }); - } - } - - fn writeAtoms(self: *Zld) !void { - const gpa = self.gpa; - const slice = self.sections.slice(); - - for (slice.items(.first_atom_index), 0..) |first_atom_index, sect_id| { - const header = slice.items(.header)[sect_id]; - if (header.isZerofill()) continue; - - var atom_index = first_atom_index orelse continue; - - var buffer = try gpa.alloc(u8, math.cast(usize, header.size) orelse return error.Overflow); - defer gpa.free(buffer); - @memset(buffer, 0); // TODO with NOPs - - log.debug("writing atoms in {s},{s}", .{ header.segName(), header.sectName() }); - - while (true) { - const atom = self.getAtom(atom_index); - if (atom.getFile()) |file| { - const this_sym = self.getSymbol(atom.getSymbolWithLoc()); - const padding_size: usize = if (atom.next_index) |next_index| blk: { - const next_sym = self.getSymbol(self.getAtom(next_index).getSymbolWithLoc()); - const size = next_sym.n_value - (this_sym.n_value + atom.size); - break :blk math.cast(usize, size) orelse return error.Overflow; - } else 0; - - log.debug(" (adding ATOM(%{d}, '{s}') from object({d}) to buffer)", .{ - atom.sym_index, - self.getSymbolName(atom.getSymbolWithLoc()), - file, - }); - if (padding_size > 0) { - log.debug(" (with padding {x})", .{padding_size}); - } - - const offset = this_sym.n_value - header.addr; - log.debug(" (at offset 0x{x})", .{offset}); - - const code = Atom.getAtomCode(self, atom_index); - const relocs = Atom.getAtomRelocs(self, atom_index); - const size = math.cast(usize, atom.size) orelse return error.Overflow; - @memcpy(buffer[offset .. offset + size], code); - try Atom.resolveRelocs( - self, - atom_index, - buffer[offset..][0..size], - relocs, - ); - } - - if (atom.next_index) |next_index| { - atom_index = next_index; - } else break; - } - - log.debug(" (writing at file offset 0x{x})", .{header.offset}); - try self.file.pwriteAll(buffer, header.offset); - } - } - - fn writeDyldPrivateAtom(self: *Zld) !void { - const atom_index = self.dyld_private_atom_index orelse return; - const atom = self.getAtom(atom_index); - const sym = self.getSymbol(atom.getSymbolWithLoc()); - const sect_id = self.data_section_index.?; - const header = self.sections.items(.header)[sect_id]; - const offset = sym.n_value - header.addr + header.offset; - log.debug("writing __dyld_private at offset 0x{x}", .{offset}); - const buffer: [@sizeOf(u64)]u8 = [_]u8{0} ** @sizeOf(u64); - try self.file.pwriteAll(&buffer, offset); - } - - fn writeThunks(self: *Zld) !void { - assert(self.requiresThunks()); - const gpa = self.gpa; - - const sect_id = self.text_section_index orelse return; - const header = self.sections.items(.header)[sect_id]; - - for (self.thunks.items, 0..) |*thunk, i| { - if (thunk.getSize() == 0) continue; - var buffer = try std.ArrayList(u8).initCapacity(gpa, thunk.getSize()); - defer buffer.deinit(); - try thunks.writeThunkCode(self, thunk, buffer.writer()); - const thunk_atom = self.getAtom(thunk.getStartAtomIndex()); - const thunk_sym = self.getSymbol(thunk_atom.getSymbolWithLoc()); - const offset = thunk_sym.n_value - header.addr + header.offset; - log.debug("writing thunk({d}) at offset 0x{x}", .{ i, offset }); - try self.file.pwriteAll(buffer.items, offset); - } - } - - fn writePointerEntries(self: *Zld, sect_id: u8, table: anytype) !void { - const header = self.sections.items(.header)[sect_id]; - var buffer = try std.ArrayList(u8).initCapacity(self.gpa, header.size); - defer buffer.deinit(); - for (table.entries.items) |entry| { - const sym = self.getSymbol(entry); - buffer.writer().writeIntLittle(u64, sym.n_value) catch unreachable; - } - log.debug("writing __DATA_CONST,__got contents at file offset 0x{x}", .{header.offset}); - try self.file.pwriteAll(buffer.items, header.offset); - } - - fn writeStubs(self: *Zld) !void { - const gpa = self.gpa; - const cpu_arch = self.options.target.cpu.arch; - const stubs_header = self.sections.items(.header)[self.stubs_section_index.?]; - const la_symbol_ptr_header = self.sections.items(.header)[self.la_symbol_ptr_section_index.?]; - - var buffer = try std.ArrayList(u8).initCapacity(gpa, stubs_header.size); - defer buffer.deinit(); - - for (0..self.stubs_table.count()) |index| { - try stubs.writeStubCode(.{ - .cpu_arch = cpu_arch, - .source_addr = stubs_header.addr + stubs.stubSize(cpu_arch) * index, - .target_addr = la_symbol_ptr_header.addr + index * @sizeOf(u64), - }, buffer.writer()); - } - - log.debug("writing __TEXT,__stubs contents at file offset 0x{x}", .{stubs_header.offset}); - try self.file.pwriteAll(buffer.items, stubs_header.offset); - } - - fn writeStubHelpers(self: *Zld) !void { - const gpa = self.gpa; - const cpu_arch = self.options.target.cpu.arch; - const stub_helper_header = self.sections.items(.header)[self.stub_helper_section_index.?]; - - var buffer = try std.ArrayList(u8).initCapacity(gpa, stub_helper_header.size); - defer buffer.deinit(); - - { - const dyld_private_addr = blk: { - const atom = self.getAtom(self.dyld_private_atom_index.?); - const sym = self.getSymbol(atom.getSymbolWithLoc()); - break :blk sym.n_value; - }; - const dyld_stub_binder_got_addr = blk: { - const sym_loc = self.globals.items[self.dyld_stub_binder_index.?]; - break :blk self.getGotEntryAddress(sym_loc).?; - }; - try stubs.writeStubHelperPreambleCode(.{ - .cpu_arch = cpu_arch, - .source_addr = stub_helper_header.addr, - .dyld_private_addr = dyld_private_addr, - .dyld_stub_binder_got_addr = dyld_stub_binder_got_addr, - }, buffer.writer()); - } - - for (0..self.stubs_table.count()) |index| { - const source_addr = stub_helper_header.addr + stubs.stubHelperPreambleSize(cpu_arch) + - stubs.stubHelperSize(cpu_arch) * index; - try stubs.writeStubHelperCode(.{ - .cpu_arch = cpu_arch, - .source_addr = source_addr, - .target_addr = stub_helper_header.addr, - }, buffer.writer()); - } - - log.debug("writing __TEXT,__stub_helper contents at file offset 0x{x}", .{ - stub_helper_header.offset, - }); - try self.file.pwriteAll(buffer.items, stub_helper_header.offset); - } - - fn writeLaSymbolPtrs(self: *Zld) !void { - const gpa = self.gpa; - const cpu_arch = self.options.target.cpu.arch; - const la_symbol_ptr_header = self.sections.items(.header)[self.la_symbol_ptr_section_index.?]; - const stub_helper_header = self.sections.items(.header)[self.stub_helper_section_index.?]; - - var buffer = try std.ArrayList(u8).initCapacity(gpa, la_symbol_ptr_header.size); - defer buffer.deinit(); - - for (0..self.stubs_table.count()) |index| { - const target_addr = stub_helper_header.addr + stubs.stubHelperPreambleSize(cpu_arch) + - stubs.stubHelperSize(cpu_arch) * index; - buffer.writer().writeIntLittle(u64, target_addr) catch unreachable; - } - - log.debug("writing __DATA,__la_symbol_ptr contents at file offset 0x{x}", .{ - la_symbol_ptr_header.offset, - }); - try self.file.pwriteAll(buffer.items, la_symbol_ptr_header.offset); - } - - fn pruneAndSortSections(self: *Zld) !void { - const Entry = struct { - index: u8, - - pub fn lessThan(zld: *Zld, lhs: @This(), rhs: @This()) bool { - const lhs_header = zld.sections.items(.header)[lhs.index]; - const rhs_header = zld.sections.items(.header)[rhs.index]; - return MachO.getSectionPrecedence(lhs_header) < MachO.getSectionPrecedence(rhs_header); - } - }; - - const gpa = self.gpa; - - var entries = try std.ArrayList(Entry).initCapacity(gpa, self.sections.slice().len); - defer entries.deinit(); - - for (0..self.sections.slice().len) |index| { - const section = self.sections.get(index); - if (section.header.size == 0) { - log.debug("pruning section {s},{s} {?d}", .{ - section.header.segName(), - section.header.sectName(), - section.first_atom_index, - }); - for (&[_]*?u8{ - &self.text_section_index, - &self.data_const_section_index, - &self.data_section_index, - &self.bss_section_index, - &self.thread_vars_section_index, - &self.thread_data_section_index, - &self.thread_bss_section_index, - &self.eh_frame_section_index, - &self.unwind_info_section_index, - &self.got_section_index, - &self.tlv_ptr_section_index, - &self.stubs_section_index, - &self.stub_helper_section_index, - &self.la_symbol_ptr_section_index, - }) |maybe_index| { - if (maybe_index.* != null and maybe_index.*.? == index) { - maybe_index.* = null; - } - } - continue; - } - entries.appendAssumeCapacity(.{ .index = @intCast(index) }); - } - - mem.sort(Entry, entries.items, self, Entry.lessThan); - - var slice = self.sections.toOwnedSlice(); - defer slice.deinit(gpa); - - const backlinks = try gpa.alloc(u8, slice.len); - defer gpa.free(backlinks); - for (entries.items, 0..) |entry, i| { - backlinks[entry.index] = @as(u8, @intCast(i)); - } - - try self.sections.ensureTotalCapacity(gpa, entries.items.len); - for (entries.items) |entry| { - self.sections.appendAssumeCapacity(slice.get(entry.index)); - } - - for (&[_]*?u8{ - &self.text_section_index, - &self.data_const_section_index, - &self.data_section_index, - &self.bss_section_index, - &self.thread_vars_section_index, - &self.thread_data_section_index, - &self.thread_bss_section_index, - &self.eh_frame_section_index, - &self.unwind_info_section_index, - &self.got_section_index, - &self.tlv_ptr_section_index, - &self.stubs_section_index, - &self.stub_helper_section_index, - &self.la_symbol_ptr_section_index, - }) |maybe_index| { - if (maybe_index.*) |*index| { - index.* = backlinks[index.*]; - } - } - } - - fn calcSectionSizes(self: *Zld) !void { - const slice = self.sections.slice(); - for (slice.items(.header), 0..) |*header, sect_id| { - if (header.size == 0) continue; - if (self.text_section_index) |txt| { - if (txt == sect_id and self.requiresThunks()) continue; - } - - var atom_index = slice.items(.first_atom_index)[sect_id] orelse continue; - - header.size = 0; - header.@"align" = 0; - - while (true) { - const atom = self.getAtom(atom_index); - const atom_alignment = try math.powi(u32, 2, atom.alignment); - const atom_offset = mem.alignForward(u64, header.size, atom_alignment); - const padding = atom_offset - header.size; - - const sym = self.getSymbolPtr(atom.getSymbolWithLoc()); - sym.n_value = atom_offset; - - header.size += padding + atom.size; - header.@"align" = @max(header.@"align", atom.alignment); - - if (atom.next_index) |next_index| { - atom_index = next_index; - } else break; - } - } - - if (self.text_section_index != null and self.requiresThunks()) { - // Create jump/branch range extenders if needed. - try thunks.createThunks(self, self.text_section_index.?); - } - - // Update offsets of all symbols contained within each Atom. - // We need to do this since our unwind info synthesiser relies on - // traversing the symbols when synthesising unwind info and DWARF CFI records. - for (slice.items(.first_atom_index)) |first_atom_index| { - var atom_index = first_atom_index orelse continue; - - while (true) { - const atom = self.getAtom(atom_index); - const sym = self.getSymbol(atom.getSymbolWithLoc()); - - if (atom.getFile() != null) { - // Update each symbol contained within the atom - var it = Atom.getInnerSymbolsIterator(self, atom_index); - while (it.next()) |sym_loc| { - const inner_sym = self.getSymbolPtr(sym_loc); - inner_sym.n_value = sym.n_value + Atom.calcInnerSymbolOffset( - self, - atom_index, - sym_loc.sym_index, - ); - } - - // If there is a section alias, update it now too - if (Atom.getSectionAlias(self, atom_index)) |sym_loc| { - const alias = self.getSymbolPtr(sym_loc); - alias.n_value = sym.n_value; - } - } - - if (atom.next_index) |next_index| { - atom_index = next_index; - } else break; - } - } - - if (self.got_section_index) |sect_id| { - const header = &self.sections.items(.header)[sect_id]; - header.size = self.got_table.count() * @sizeOf(u64); - header.@"align" = 3; - } - - if (self.tlv_ptr_section_index) |sect_id| { - const header = &self.sections.items(.header)[sect_id]; - header.size = self.tlv_ptr_table.count() * @sizeOf(u64); - header.@"align" = 3; - } - - const cpu_arch = self.options.target.cpu.arch; - - if (self.stubs_section_index) |sect_id| { - const header = &self.sections.items(.header)[sect_id]; - header.size = self.stubs_table.count() * stubs.stubSize(cpu_arch); - header.@"align" = stubs.stubAlignment(cpu_arch); - } - - if (self.stub_helper_section_index) |sect_id| { - const header = &self.sections.items(.header)[sect_id]; - header.size = self.stubs_table.count() * stubs.stubHelperSize(cpu_arch) + - stubs.stubHelperPreambleSize(cpu_arch); - header.@"align" = stubs.stubAlignment(cpu_arch); - } - - if (self.la_symbol_ptr_section_index) |sect_id| { - const header = &self.sections.items(.header)[sect_id]; - header.size = self.stubs_table.count() * @sizeOf(u64); - header.@"align" = 3; - } - } - - fn allocateSegments(self: *Zld) !void { - for (self.segments.items, 0..) |*segment, segment_index| { - const is_text_segment = mem.eql(u8, segment.segName(), "__TEXT"); - const base_size = if (is_text_segment) try load_commands.calcMinHeaderPad(self.gpa, self.options, .{ - .segments = self.segments.items, - .dylibs = self.dylibs.items, - .referenced_dylibs = self.referenced_dylibs.keys(), - }) else 0; - try self.allocateSegment(@as(u8, @intCast(segment_index)), base_size); - } - } - - fn getSegmentAllocBase(self: Zld, segment_index: u8) struct { vmaddr: u64, fileoff: u64 } { - if (segment_index > 0) { - const prev_segment = self.segments.items[segment_index - 1]; - return .{ - .vmaddr = prev_segment.vmaddr + prev_segment.vmsize, - .fileoff = prev_segment.fileoff + prev_segment.filesize, - }; - } - return .{ .vmaddr = 0, .fileoff = 0 }; - } - - fn allocateSegment(self: *Zld, segment_index: u8, init_size: u64) !void { - const segment = &self.segments.items[segment_index]; - - if (mem.eql(u8, segment.segName(), "__PAGEZERO")) return; // allocated upon creation - - const base = self.getSegmentAllocBase(segment_index); - segment.vmaddr = base.vmaddr; - segment.fileoff = base.fileoff; - segment.filesize = init_size; - segment.vmsize = init_size; - - // Allocate the sections according to their alignment at the beginning of the segment. - const indexes = self.getSectionIndexes(segment_index); - var start = init_size; - - const slice = self.sections.slice(); - for (slice.items(.header)[indexes.start..indexes.end], 0..) |*header, sect_id| { - const alignment = try math.powi(u32, 2, header.@"align"); - const start_aligned = mem.alignForward(u64, start, alignment); - const n_sect = @as(u8, @intCast(indexes.start + sect_id + 1)); - - header.offset = if (header.isZerofill()) - 0 - else - @as(u32, @intCast(segment.fileoff + start_aligned)); - header.addr = segment.vmaddr + start_aligned; - - if (slice.items(.first_atom_index)[indexes.start + sect_id]) |first_atom_index| { - var atom_index = first_atom_index; - - log.debug("allocating local symbols in sect({d}, '{s},{s}')", .{ - n_sect, - header.segName(), - header.sectName(), - }); - - while (true) { - const atom = self.getAtom(atom_index); - const sym = self.getSymbolPtr(atom.getSymbolWithLoc()); - sym.n_value += header.addr; - sym.n_sect = n_sect; - - log.debug(" ATOM(%{d}, '{s}') @{x}", .{ - atom.sym_index, - self.getSymbolName(atom.getSymbolWithLoc()), - sym.n_value, - }); - - if (atom.getFile() != null) { - // Update each symbol contained within the atom - var it = Atom.getInnerSymbolsIterator(self, atom_index); - while (it.next()) |sym_loc| { - const inner_sym = self.getSymbolPtr(sym_loc); - inner_sym.n_value = sym.n_value + Atom.calcInnerSymbolOffset( - self, - atom_index, - sym_loc.sym_index, - ); - inner_sym.n_sect = n_sect; - } - - // If there is a section alias, update it now too - if (Atom.getSectionAlias(self, atom_index)) |sym_loc| { - const alias = self.getSymbolPtr(sym_loc); - alias.n_value = sym.n_value; - alias.n_sect = n_sect; - } - } - - if (atom.next_index) |next_index| { - atom_index = next_index; - } else break; - } - } - - start = start_aligned + header.size; - - if (!header.isZerofill()) { - segment.filesize = start; - } - segment.vmsize = start; - } - - const page_size = MachO.getPageSize(self.options.target.cpu.arch); - segment.filesize = mem.alignForward(u64, segment.filesize, page_size); - segment.vmsize = mem.alignForward(u64, segment.vmsize, page_size); - } - - fn writeLinkeditSegmentData(self: *Zld) !void { - const page_size = MachO.getPageSize(self.options.target.cpu.arch); - const seg = self.getLinkeditSegmentPtr(); - seg.filesize = 0; - seg.vmsize = 0; - - for (self.segments.items, 0..) |segment, id| { - if (self.linkedit_segment_cmd_index.? == @as(u8, @intCast(id))) continue; - if (seg.vmaddr < segment.vmaddr + segment.vmsize) { - seg.vmaddr = mem.alignForward(u64, segment.vmaddr + segment.vmsize, page_size); - } - if (seg.fileoff < segment.fileoff + segment.filesize) { - seg.fileoff = mem.alignForward(u64, segment.fileoff + segment.filesize, page_size); - } - } - try self.writeDyldInfoData(); - try self.writeFunctionStarts(); - try self.writeDataInCode(); - try self.writeSymtabs(); - - seg.vmsize = mem.alignForward(u64, seg.filesize, page_size); - } - - fn collectRebaseData(self: *Zld, rebase: *Rebase) !void { - log.debug("collecting rebase data", .{}); - - // First, unpack GOT entries - if (self.got_section_index) |sect_id| { - try MachO.collectRebaseDataFromTableSection(self.gpa, self, sect_id, rebase, self.got_table); - } - - // Next, unpack __la_symbol_ptr entries - if (self.la_symbol_ptr_section_index) |sect_id| { - try MachO.collectRebaseDataFromTableSection(self.gpa, self, sect_id, rebase, self.stubs_table); - } - - // Finally, unpack the rest. - const cpu_arch = self.options.target.cpu.arch; - for (self.objects.items) |*object| { - for (object.atoms.items) |atom_index| { - const atom = self.getAtom(atom_index); - const sym = self.getSymbol(atom.getSymbolWithLoc()); - if (sym.n_desc == MachO.N_DEAD) continue; - - const sect_id = sym.n_sect - 1; - const section = self.sections.items(.header)[sect_id]; - const segment_id = self.sections.items(.segment_index)[sect_id]; - const segment = self.segments.items[segment_id]; - if (segment.maxprot & macho.PROT.WRITE == 0) continue; - switch (section.type()) { - macho.S_LITERAL_POINTERS, - macho.S_REGULAR, - macho.S_MOD_INIT_FUNC_POINTERS, - macho.S_MOD_TERM_FUNC_POINTERS, - => {}, - else => continue, - } - - log.debug(" ATOM({d}, %{d}, '{s}')", .{ - atom_index, - atom.sym_index, - self.getSymbolName(atom.getSymbolWithLoc()), - }); - - const code = Atom.getAtomCode(self, atom_index); - const relocs = Atom.getAtomRelocs(self, atom_index); - const ctx = Atom.getRelocContext(self, atom_index); - - for (relocs) |rel| { - switch (cpu_arch) { - .aarch64 => { - const rel_type = @as(macho.reloc_type_arm64, @enumFromInt(rel.r_type)); - if (rel_type != .ARM64_RELOC_UNSIGNED) continue; - if (rel.r_length != 3) continue; - }, - .x86_64 => { - const rel_type = @as(macho.reloc_type_x86_64, @enumFromInt(rel.r_type)); - if (rel_type != .X86_64_RELOC_UNSIGNED) continue; - if (rel.r_length != 3) continue; - }, - else => unreachable, - } - const target = Atom.parseRelocTarget(self, .{ - .object_id = atom.getFile().?, - .rel = rel, - .code = code, - .base_offset = ctx.base_offset, - .base_addr = ctx.base_addr, - }); - const target_sym = self.getSymbol(target); - if (target_sym.undf()) continue; - - const base_offset = @as(i32, @intCast(sym.n_value - segment.vmaddr)); - const rel_offset = rel.r_address - ctx.base_offset; - const offset = @as(u64, @intCast(base_offset + rel_offset)); - log.debug(" | rebase at {x}", .{offset}); - - try rebase.entries.append(self.gpa, .{ - .offset = offset, - .segment_id = segment_id, - }); - } - } - } - - try rebase.finalize(self.gpa); - } - - fn collectBindData( - self: *Zld, - bind: *Bind, - ) !void { - log.debug("collecting bind data", .{}); - - // First, unpack GOT section - if (self.got_section_index) |sect_id| { - try MachO.collectBindDataFromTableSection(self.gpa, self, sect_id, bind, self.got_table); - } - - // Next, unpack TLV pointers section - if (self.tlv_ptr_section_index) |sect_id| { - try MachO.collectBindDataFromTableSection(self.gpa, self, sect_id, bind, self.tlv_ptr_table); - } - - // Finally, unpack the rest. - const cpu_arch = self.options.target.cpu.arch; - for (self.objects.items) |*object| { - for (object.atoms.items) |atom_index| { - const atom = self.getAtom(atom_index); - const sym = self.getSymbol(atom.getSymbolWithLoc()); - if (sym.n_desc == MachO.N_DEAD) continue; - - const sect_id = sym.n_sect - 1; - const section = self.sections.items(.header)[sect_id]; - const segment_id = self.sections.items(.segment_index)[sect_id]; - const segment = self.segments.items[segment_id]; - if (segment.maxprot & macho.PROT.WRITE == 0) continue; - switch (section.type()) { - macho.S_LITERAL_POINTERS, - macho.S_REGULAR, - macho.S_MOD_INIT_FUNC_POINTERS, - macho.S_MOD_TERM_FUNC_POINTERS, - => {}, - else => continue, - } - - log.debug(" ATOM({d}, %{d}, '{s}')", .{ - atom_index, - atom.sym_index, - self.getSymbolName(atom.getSymbolWithLoc()), - }); - - const code = Atom.getAtomCode(self, atom_index); - const relocs = Atom.getAtomRelocs(self, atom_index); - const ctx = Atom.getRelocContext(self, atom_index); - - for (relocs) |rel| { - switch (cpu_arch) { - .aarch64 => { - const rel_type = @as(macho.reloc_type_arm64, @enumFromInt(rel.r_type)); - if (rel_type != .ARM64_RELOC_UNSIGNED) continue; - if (rel.r_length != 3) continue; - }, - .x86_64 => { - const rel_type = @as(macho.reloc_type_x86_64, @enumFromInt(rel.r_type)); - if (rel_type != .X86_64_RELOC_UNSIGNED) continue; - if (rel.r_length != 3) continue; - }, - else => unreachable, - } - - const global = Atom.parseRelocTarget(self, .{ - .object_id = atom.getFile().?, - .rel = rel, - .code = code, - .base_offset = ctx.base_offset, - .base_addr = ctx.base_addr, - }); - const bind_sym_name = self.getSymbolName(global); - const bind_sym = self.getSymbol(global); - if (!bind_sym.undf()) continue; - - const base_offset = sym.n_value - segment.vmaddr; - const rel_offset = @as(u32, @intCast(rel.r_address - ctx.base_offset)); - const offset = @as(u64, @intCast(base_offset + rel_offset)); - const addend = mem.readIntLittle(i64, code[rel_offset..][0..8]); - - const dylib_ordinal = @divTrunc(@as(i16, @bitCast(bind_sym.n_desc)), macho.N_SYMBOL_RESOLVER); - log.debug(" | bind at {x}, import('{s}') in dylib({d})", .{ - base_offset, - bind_sym_name, - dylib_ordinal, - }); - log.debug(" | with addend {x}", .{addend}); - if (bind_sym.weakRef()) { - log.debug(" | marking as weak ref ", .{}); - } - try bind.entries.append(self.gpa, .{ - .target = global, - .offset = offset, - .segment_id = segment_id, - .addend = addend, - }); - } - } - } - - try bind.finalize(self.gpa, self); - } - - fn collectLazyBindData(self: *Zld, lazy_bind: *LazyBind) !void { - const sect_id = self.la_symbol_ptr_section_index orelse return; - try MachO.collectBindDataFromTableSection(self.gpa, self, sect_id, lazy_bind, self.stubs_table); - try lazy_bind.finalize(self.gpa, self); - } - - fn collectExportData(self: *Zld, trie: *Trie) !void { - const gpa = self.gpa; - - // TODO handle macho.EXPORT_SYMBOL_FLAGS_REEXPORT and macho.EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER. - log.debug("collecting export data", .{}); - - const exec_segment = self.segments.items[self.header_segment_cmd_index.?]; - const base_address = exec_segment.vmaddr; - - for (self.globals.items) |global| { - const sym = self.getSymbol(global); - if (sym.undf()) continue; - if (sym.n_desc == MachO.N_DEAD) continue; - - const sym_name = self.getSymbolName(global); - log.debug(" (putting '{s}' defined at 0x{x})", .{ sym_name, sym.n_value }); - try trie.put(gpa, .{ - .name = sym_name, - .vmaddr_offset = sym.n_value - base_address, - .export_flags = macho.EXPORT_SYMBOL_FLAGS_KIND_REGULAR, - }); - } - - try trie.finalize(gpa); - } - - fn writeDyldInfoData(self: *Zld) !void { - const gpa = self.gpa; - - var rebase = Rebase{}; - defer rebase.deinit(gpa); - try self.collectRebaseData(&rebase); - - var bind = Bind{}; - defer bind.deinit(gpa); - try self.collectBindData(&bind); - - var lazy_bind = LazyBind{}; - defer lazy_bind.deinit(gpa); - try self.collectLazyBindData(&lazy_bind); - - var trie = Trie{}; - defer trie.deinit(gpa); - try trie.init(gpa); - try self.collectExportData(&trie); - - const link_seg = self.getLinkeditSegmentPtr(); - assert(mem.isAlignedGeneric(u64, link_seg.fileoff, @alignOf(u64))); - const rebase_off = link_seg.fileoff; - const rebase_size = rebase.size(); - const rebase_size_aligned = mem.alignForward(u64, rebase_size, @alignOf(u64)); - log.debug("writing rebase info from 0x{x} to 0x{x}", .{ rebase_off, rebase_off + rebase_size_aligned }); - - const bind_off = rebase_off + rebase_size_aligned; - const bind_size = bind.size(); - const bind_size_aligned = mem.alignForward(u64, bind_size, @alignOf(u64)); - log.debug("writing bind info from 0x{x} to 0x{x}", .{ bind_off, bind_off + bind_size_aligned }); - - const lazy_bind_off = bind_off + bind_size_aligned; - const lazy_bind_size = lazy_bind.size(); - const lazy_bind_size_aligned = mem.alignForward(u64, lazy_bind_size, @alignOf(u64)); - log.debug("writing lazy bind info from 0x{x} to 0x{x}", .{ - lazy_bind_off, - lazy_bind_off + lazy_bind_size_aligned, - }); - - const export_off = lazy_bind_off + lazy_bind_size_aligned; - const export_size = trie.size; - const export_size_aligned = mem.alignForward(u64, export_size, @alignOf(u64)); - log.debug("writing export trie from 0x{x} to 0x{x}", .{ export_off, export_off + export_size_aligned }); - - const needed_size = math.cast(usize, export_off + export_size_aligned - rebase_off) orelse - return error.Overflow; - link_seg.filesize = needed_size; - assert(mem.isAlignedGeneric(u64, link_seg.fileoff + link_seg.filesize, @alignOf(u64))); - - var buffer = try gpa.alloc(u8, needed_size); - defer gpa.free(buffer); - @memset(buffer, 0); - - var stream = std.io.fixedBufferStream(buffer); - const writer = stream.writer(); - - try rebase.write(writer); - try stream.seekTo(bind_off - rebase_off); - - try bind.write(writer); - try stream.seekTo(lazy_bind_off - rebase_off); - - try lazy_bind.write(writer); - try stream.seekTo(export_off - rebase_off); - - _ = try trie.write(writer); - - log.debug("writing dyld info from 0x{x} to 0x{x}", .{ - rebase_off, - rebase_off + needed_size, - }); - - try self.file.pwriteAll(buffer, rebase_off); - try MachO.populateLazyBindOffsetsInStubHelper( - self, - self.options.target.cpu.arch, - self.file, - lazy_bind, - ); - - self.dyld_info_cmd.rebase_off = @as(u32, @intCast(rebase_off)); - self.dyld_info_cmd.rebase_size = @as(u32, @intCast(rebase_size_aligned)); - self.dyld_info_cmd.bind_off = @as(u32, @intCast(bind_off)); - self.dyld_info_cmd.bind_size = @as(u32, @intCast(bind_size_aligned)); - self.dyld_info_cmd.lazy_bind_off = @as(u32, @intCast(lazy_bind_off)); - self.dyld_info_cmd.lazy_bind_size = @as(u32, @intCast(lazy_bind_size_aligned)); - self.dyld_info_cmd.export_off = @as(u32, @intCast(export_off)); - self.dyld_info_cmd.export_size = @as(u32, @intCast(export_size_aligned)); - } - - const asc_u64 = std.sort.asc(u64); - - fn addSymbolToFunctionStarts(self: *Zld, sym_loc: SymbolWithLoc, addresses: *std.ArrayList(u64)) !void { - const sym = self.getSymbol(sym_loc); - if (sym.n_strx == 0) return; - if (sym.n_desc == MachO.N_DEAD) return; - if (self.symbolIsTemp(sym_loc)) return; - try addresses.append(sym.n_value); - } - - fn writeFunctionStarts(self: *Zld) !void { - const gpa = self.gpa; - const seg = self.segments.items[self.header_segment_cmd_index.?]; - - // We need to sort by address first - var addresses = std.ArrayList(u64).init(gpa); - defer addresses.deinit(); - - for (self.objects.items) |object| { - for (object.exec_atoms.items) |atom_index| { - const atom = self.getAtom(atom_index); - const sym_loc = atom.getSymbolWithLoc(); - try self.addSymbolToFunctionStarts(sym_loc, &addresses); - - var it = Atom.getInnerSymbolsIterator(self, atom_index); - while (it.next()) |inner_sym_loc| { - try self.addSymbolToFunctionStarts(inner_sym_loc, &addresses); - } - } - } - - mem.sort(u64, addresses.items, {}, asc_u64); - - var offsets = std.ArrayList(u32).init(gpa); - defer offsets.deinit(); - try offsets.ensureTotalCapacityPrecise(addresses.items.len); - - var last_off: u32 = 0; - for (addresses.items) |addr| { - const offset = @as(u32, @intCast(addr - seg.vmaddr)); - const diff = offset - last_off; - - if (diff == 0) continue; - - offsets.appendAssumeCapacity(diff); - last_off = offset; - } - - var buffer = std.ArrayList(u8).init(gpa); - defer buffer.deinit(); - - const max_size = @as(usize, @intCast(offsets.items.len * @sizeOf(u64))); - try buffer.ensureTotalCapacity(max_size); - - for (offsets.items) |offset| { - try std.leb.writeULEB128(buffer.writer(), offset); - } - - const link_seg = self.getLinkeditSegmentPtr(); - const offset = link_seg.fileoff + link_seg.filesize; - assert(mem.isAlignedGeneric(u64, offset, @alignOf(u64))); - const needed_size = buffer.items.len; - const needed_size_aligned = mem.alignForward(u64, needed_size, @alignOf(u64)); - const padding = math.cast(usize, needed_size_aligned - needed_size) orelse return error.Overflow; - if (padding > 0) { - try buffer.ensureUnusedCapacity(padding); - buffer.appendNTimesAssumeCapacity(0, padding); - } - link_seg.filesize = offset + needed_size_aligned - link_seg.fileoff; - - log.debug("writing function starts info from 0x{x} to 0x{x}", .{ offset, offset + needed_size_aligned }); - - try self.file.pwriteAll(buffer.items, offset); - - self.function_starts_cmd.dataoff = @as(u32, @intCast(offset)); - self.function_starts_cmd.datasize = @as(u32, @intCast(needed_size_aligned)); - } - - fn filterDataInCode( - dices: []const macho.data_in_code_entry, - start_addr: u64, - end_addr: u64, - ) []const macho.data_in_code_entry { - const Predicate = struct { - addr: u64, - - pub fn predicate(self: @This(), dice: macho.data_in_code_entry) bool { - return dice.offset >= self.addr; - } - }; - - const start = MachO.lsearch(macho.data_in_code_entry, dices, Predicate{ .addr = start_addr }); - const end = MachO.lsearch(macho.data_in_code_entry, dices[start..], Predicate{ .addr = end_addr }) + start; - - return dices[start..end]; - } - - fn writeDataInCode(self: *Zld) !void { - var out_dice = std.ArrayList(macho.data_in_code_entry).init(self.gpa); - defer out_dice.deinit(); - - const text_sect_id = self.text_section_index orelse return; - const text_sect_header = self.sections.items(.header)[text_sect_id]; - - for (self.objects.items) |object| { - if (!object.hasDataInCode()) continue; - const dice = object.data_in_code.items; - try out_dice.ensureUnusedCapacity(dice.len); - - for (object.exec_atoms.items) |atom_index| { - const atom = self.getAtom(atom_index); - const sym = self.getSymbol(atom.getSymbolWithLoc()); - if (sym.n_desc == MachO.N_DEAD) continue; - - const source_addr = if (object.getSourceSymbol(atom.sym_index)) |source_sym| - source_sym.n_value - else blk: { - const nbase = @as(u32, @intCast(object.in_symtab.?.len)); - const source_sect_id = @as(u8, @intCast(atom.sym_index - nbase)); - break :blk object.getSourceSection(source_sect_id).addr; - }; - const filtered_dice = filterDataInCode(dice, source_addr, source_addr + atom.size); - const base = math.cast(u32, sym.n_value - text_sect_header.addr + text_sect_header.offset) orelse - return error.Overflow; - - for (filtered_dice) |single| { - const offset = math.cast(u32, single.offset - source_addr + base) orelse - return error.Overflow; - out_dice.appendAssumeCapacity(.{ - .offset = offset, - .length = single.length, - .kind = single.kind, - }); - } - } - } - - const seg = self.getLinkeditSegmentPtr(); - const offset = seg.fileoff + seg.filesize; - assert(mem.isAlignedGeneric(u64, offset, @alignOf(u64))); - const needed_size = out_dice.items.len * @sizeOf(macho.data_in_code_entry); - const needed_size_aligned = mem.alignForward(u64, needed_size, @alignOf(u64)); - seg.filesize = offset + needed_size_aligned - seg.fileoff; - - const buffer = try self.gpa.alloc(u8, math.cast(usize, needed_size_aligned) orelse return error.Overflow); - defer self.gpa.free(buffer); - { - const src = mem.sliceAsBytes(out_dice.items); - @memcpy(buffer[0..src.len], src); - @memset(buffer[src.len..], 0); - } - - log.debug("writing data-in-code from 0x{x} to 0x{x}", .{ offset, offset + needed_size_aligned }); - - try self.file.pwriteAll(buffer, offset); - - self.data_in_code_cmd.dataoff = @as(u32, @intCast(offset)); - self.data_in_code_cmd.datasize = @as(u32, @intCast(needed_size_aligned)); - } - - fn writeSymtabs(self: *Zld) !void { - var ctx = try self.writeSymtab(); - defer ctx.imports_table.deinit(); - try self.writeDysymtab(ctx); - try self.writeStrtab(); - } - - fn addLocalToSymtab(self: *Zld, sym_loc: SymbolWithLoc, locals: *std.ArrayList(macho.nlist_64)) !void { - const sym = self.getSymbol(sym_loc); - if (sym.n_strx == 0) return; // no name, skip - if (sym.n_desc == MachO.N_DEAD) return; // garbage-collected, skip - if (sym.ext()) return; // an export lands in its own symtab section, skip - if (self.symbolIsTemp(sym_loc)) return; // local temp symbol, skip - - var out_sym = sym; - out_sym.n_strx = try self.strtab.insert(self.gpa, self.getSymbolName(sym_loc)); - try locals.append(out_sym); - } - - fn writeSymtab(self: *Zld) !SymtabCtx { - const gpa = self.gpa; - - var locals = std.ArrayList(macho.nlist_64).init(gpa); - defer locals.deinit(); - - for (self.objects.items) |object| { - for (object.atoms.items) |atom_index| { - const atom = self.getAtom(atom_index); - const sym_loc = atom.getSymbolWithLoc(); - try self.addLocalToSymtab(sym_loc, &locals); - - var it = Atom.getInnerSymbolsIterator(self, atom_index); - while (it.next()) |inner_sym_loc| { - try self.addLocalToSymtab(inner_sym_loc, &locals); - } - } - } - - var exports = std.ArrayList(macho.nlist_64).init(gpa); - defer exports.deinit(); - - for (self.globals.items) |global| { - const sym = self.getSymbol(global); - if (sym.undf()) continue; // import, skip - if (sym.n_desc == MachO.N_DEAD) continue; - - var out_sym = sym; - out_sym.n_strx = try self.strtab.insert(gpa, self.getSymbolName(global)); - try exports.append(out_sym); - } - - var imports = std.ArrayList(macho.nlist_64).init(gpa); - defer imports.deinit(); - - var imports_table = std.AutoHashMap(SymbolWithLoc, u32).init(gpa); - - for (self.globals.items) |global| { - const sym = self.getSymbol(global); - if (!sym.undf()) continue; // not an import, skip - if (sym.n_desc == MachO.N_DEAD) continue; - - const new_index = @as(u32, @intCast(imports.items.len)); - var out_sym = sym; - out_sym.n_strx = try self.strtab.insert(gpa, self.getSymbolName(global)); - try imports.append(out_sym); - try imports_table.putNoClobber(global, new_index); - } - - // We generate stabs last in order to ensure that the strtab always has debug info - // strings trailing - if (!self.options.strip) { - for (self.objects.items) |object| { - try self.generateSymbolStabs(object, &locals); - } - } - - const nlocals = @as(u32, @intCast(locals.items.len)); - const nexports = @as(u32, @intCast(exports.items.len)); - const nimports = @as(u32, @intCast(imports.items.len)); - const nsyms = nlocals + nexports + nimports; - - const seg = self.getLinkeditSegmentPtr(); - const offset = seg.fileoff + seg.filesize; - assert(mem.isAlignedGeneric(u64, offset, @alignOf(u64))); - const needed_size = nsyms * @sizeOf(macho.nlist_64); - seg.filesize = offset + needed_size - seg.fileoff; - assert(mem.isAlignedGeneric(u64, seg.fileoff + seg.filesize, @alignOf(u64))); - - var buffer = std.ArrayList(u8).init(gpa); - defer buffer.deinit(); - try buffer.ensureTotalCapacityPrecise(needed_size); - buffer.appendSliceAssumeCapacity(mem.sliceAsBytes(locals.items)); - buffer.appendSliceAssumeCapacity(mem.sliceAsBytes(exports.items)); - buffer.appendSliceAssumeCapacity(mem.sliceAsBytes(imports.items)); - - log.debug("writing symtab from 0x{x} to 0x{x}", .{ offset, offset + needed_size }); - try self.file.pwriteAll(buffer.items, offset); - - self.symtab_cmd.symoff = @as(u32, @intCast(offset)); - self.symtab_cmd.nsyms = nsyms; - - return SymtabCtx{ - .nlocalsym = nlocals, - .nextdefsym = nexports, - .nundefsym = nimports, - .imports_table = imports_table, - }; - } - - fn writeStrtab(self: *Zld) !void { - const seg = self.getLinkeditSegmentPtr(); - const offset = seg.fileoff + seg.filesize; - assert(mem.isAlignedGeneric(u64, offset, @alignOf(u64))); - const needed_size = self.strtab.buffer.items.len; - const needed_size_aligned = mem.alignForward(u64, needed_size, @alignOf(u64)); - seg.filesize = offset + needed_size_aligned - seg.fileoff; - - log.debug("writing string table from 0x{x} to 0x{x}", .{ offset, offset + needed_size_aligned }); - - const buffer = try self.gpa.alloc(u8, math.cast(usize, needed_size_aligned) orelse return error.Overflow); - defer self.gpa.free(buffer); - @memcpy(buffer[0..self.strtab.buffer.items.len], self.strtab.buffer.items); - @memset(buffer[self.strtab.buffer.items.len..], 0); - - try self.file.pwriteAll(buffer, offset); - - self.symtab_cmd.stroff = @as(u32, @intCast(offset)); - self.symtab_cmd.strsize = @as(u32, @intCast(needed_size_aligned)); - } - - const SymtabCtx = struct { - nlocalsym: u32, - nextdefsym: u32, - nundefsym: u32, - imports_table: std.AutoHashMap(SymbolWithLoc, u32), - }; - - fn writeDysymtab(self: *Zld, ctx: SymtabCtx) !void { - const gpa = self.gpa; - const nstubs = @as(u32, @intCast(self.stubs_table.lookup.count())); - const ngot_entries = @as(u32, @intCast(self.got_table.lookup.count())); - const nindirectsyms = nstubs * 2 + ngot_entries; - const iextdefsym = ctx.nlocalsym; - const iundefsym = iextdefsym + ctx.nextdefsym; - - const seg = self.getLinkeditSegmentPtr(); - const offset = seg.fileoff + seg.filesize; - assert(mem.isAlignedGeneric(u64, offset, @alignOf(u64))); - const needed_size = nindirectsyms * @sizeOf(u32); - const needed_size_aligned = mem.alignForward(u64, needed_size, @alignOf(u64)); - seg.filesize = offset + needed_size_aligned - seg.fileoff; - - log.debug("writing indirect symbol table from 0x{x} to 0x{x}", .{ offset, offset + needed_size_aligned }); - - var buf = std.ArrayList(u8).init(gpa); - defer buf.deinit(); - try buf.ensureTotalCapacityPrecise(math.cast(usize, needed_size_aligned) orelse return error.Overflow); - const writer = buf.writer(); - - if (self.stubs_section_index) |sect_id| { - const header = &self.sections.items(.header)[sect_id]; - header.reserved1 = 0; - for (self.stubs_table.entries.items) |entry| { - if (!self.stubs_table.lookup.contains(entry)) continue; - const target_sym = self.getSymbol(entry); - assert(target_sym.undf()); - try writer.writeIntLittle(u32, iundefsym + ctx.imports_table.get(entry).?); - } - } - - if (self.got_section_index) |sect_id| { - const header = &self.sections.items(.header)[sect_id]; - header.reserved1 = nstubs; - for (self.got_table.entries.items) |entry| { - if (!self.got_table.lookup.contains(entry)) continue; - const target_sym = self.getSymbol(entry); - if (target_sym.undf()) { - try writer.writeIntLittle(u32, iundefsym + ctx.imports_table.get(entry).?); - } else { - try writer.writeIntLittle(u32, macho.INDIRECT_SYMBOL_LOCAL); - } - } - } - - if (self.la_symbol_ptr_section_index) |sect_id| { - const header = &self.sections.items(.header)[sect_id]; - header.reserved1 = nstubs + ngot_entries; - for (self.stubs_table.entries.items) |entry| { - if (!self.stubs_table.lookup.contains(entry)) continue; - const target_sym = self.getSymbol(entry); - assert(target_sym.undf()); - try writer.writeIntLittle(u32, iundefsym + ctx.imports_table.get(entry).?); - } - } - - const padding = math.cast(usize, needed_size_aligned - needed_size) orelse return error.Overflow; - if (padding > 0) { - buf.appendNTimesAssumeCapacity(0, padding); - } - - assert(buf.items.len == needed_size_aligned); - try self.file.pwriteAll(buf.items, offset); - - self.dysymtab_cmd.nlocalsym = ctx.nlocalsym; - self.dysymtab_cmd.iextdefsym = iextdefsym; - self.dysymtab_cmd.nextdefsym = ctx.nextdefsym; - self.dysymtab_cmd.iundefsym = iundefsym; - self.dysymtab_cmd.nundefsym = ctx.nundefsym; - self.dysymtab_cmd.indirectsymoff = @as(u32, @intCast(offset)); - self.dysymtab_cmd.nindirectsyms = nindirectsyms; - } - - fn writeUuid(self: *Zld, comp: *const Compilation, uuid_cmd_offset: u32, has_codesig: bool) !void { - const file_size = if (!has_codesig) blk: { - const seg = self.getLinkeditSegmentPtr(); - break :blk seg.fileoff + seg.filesize; - } else self.codesig_cmd.dataoff; - try calcUuid(comp, self.file, file_size, &self.uuid_cmd.uuid); - const offset = uuid_cmd_offset + @sizeOf(macho.load_command); - try self.file.pwriteAll(&self.uuid_cmd.uuid, offset); - } - - fn writeCodeSignaturePadding(self: *Zld, code_sig: *CodeSignature) !void { - const seg = self.getLinkeditSegmentPtr(); - // Code signature data has to be 16-bytes aligned for Apple tools to recognize the file - // https://github.com/opensource-apple/cctools/blob/fdb4825f303fd5c0751be524babd32958181b3ed/libstuff/checkout.c#L271 - const offset = mem.alignForward(u64, seg.fileoff + seg.filesize, 16); - const needed_size = code_sig.estimateSize(offset); - seg.filesize = offset + needed_size - seg.fileoff; - seg.vmsize = mem.alignForward(u64, seg.filesize, MachO.getPageSize(self.options.target.cpu.arch)); - log.debug("writing code signature padding from 0x{x} to 0x{x}", .{ offset, offset + needed_size }); - // Pad out the space. We need to do this to calculate valid hashes for everything in the file - // except for code signature data. - try self.file.pwriteAll(&[_]u8{0}, offset + needed_size - 1); - - self.codesig_cmd.dataoff = @as(u32, @intCast(offset)); - self.codesig_cmd.datasize = @as(u32, @intCast(needed_size)); - } - - fn writeCodeSignature(self: *Zld, comp: *const Compilation, code_sig: *CodeSignature) !void { - const seg_id = self.header_segment_cmd_index.?; - const seg = self.segments.items[seg_id]; - - var buffer = std.ArrayList(u8).init(self.gpa); - defer buffer.deinit(); - try buffer.ensureTotalCapacityPrecise(code_sig.size()); - try code_sig.writeAdhocSignature(comp, .{ - .file = self.file, - .exec_seg_base = seg.fileoff, - .exec_seg_limit = seg.filesize, - .file_size = self.codesig_cmd.dataoff, - .output_mode = self.options.output_mode, - }, buffer.writer()); - assert(buffer.items.len == code_sig.size()); - - log.debug("writing code signature from 0x{x} to 0x{x}", .{ - self.codesig_cmd.dataoff, - self.codesig_cmd.dataoff + buffer.items.len, - }); - - try self.file.pwriteAll(buffer.items, self.codesig_cmd.dataoff); - } - - /// Writes Mach-O file header. - fn writeHeader(self: *Zld, ncmds: u32, sizeofcmds: u32) !void { - var header: macho.mach_header_64 = .{}; - header.flags = macho.MH_NOUNDEFS | macho.MH_DYLDLINK | macho.MH_PIE | macho.MH_TWOLEVEL; - - switch (self.options.target.cpu.arch) { - .aarch64 => { - header.cputype = macho.CPU_TYPE_ARM64; - header.cpusubtype = macho.CPU_SUBTYPE_ARM_ALL; - }, - .x86_64 => { - header.cputype = macho.CPU_TYPE_X86_64; - header.cpusubtype = macho.CPU_SUBTYPE_X86_64_ALL; - }, - else => return error.UnsupportedCpuArchitecture, - } - - switch (self.options.output_mode) { - .Exe => { - header.filetype = macho.MH_EXECUTE; - }, - .Lib => { - // By this point, it can only be a dylib. - header.filetype = macho.MH_DYLIB; - header.flags |= macho.MH_NO_REEXPORTED_DYLIBS; - }, - else => unreachable, - } - - if (self.thread_vars_section_index) |sect_id| { - header.flags |= macho.MH_HAS_TLV_DESCRIPTORS; - if (self.sections.items(.header)[sect_id].size > 0) { - header.flags |= macho.MH_HAS_TLV_DESCRIPTORS; - } - } - - header.ncmds = ncmds; - header.sizeofcmds = sizeofcmds; - - log.debug("writing Mach-O header {}", .{header}); - - try self.file.pwriteAll(mem.asBytes(&header), 0); - } - - pub fn makeStaticString(bytes: []const u8) [16]u8 { - var buf = [_]u8{0} ** 16; - @memcpy(buf[0..bytes.len], bytes); - return buf; - } - - pub fn getAtomPtr(self: *Zld, atom_index: Atom.Index) *Atom { - assert(atom_index < self.atoms.items.len); - return &self.atoms.items[atom_index]; - } - - pub fn getAtom(self: Zld, atom_index: Atom.Index) Atom { - assert(atom_index < self.atoms.items.len); - return self.atoms.items[atom_index]; - } - - fn getSegmentByName(self: Zld, segname: []const u8) ?u8 { - for (self.segments.items, 0..) |seg, i| { - if (mem.eql(u8, segname, seg.segName())) return @as(u8, @intCast(i)); - } else return null; - } - - pub fn getSegment(self: Zld, sect_id: u8) macho.segment_command_64 { - const index = self.sections.items(.segment_index)[sect_id]; - return self.segments.items[index]; - } - - pub fn getSegmentPtr(self: *Zld, sect_id: u8) *macho.segment_command_64 { - const index = self.sections.items(.segment_index)[sect_id]; - return &self.segments.items[index]; - } - - pub fn getLinkeditSegmentPtr(self: *Zld) *macho.segment_command_64 { - assert(self.segments.items.len > 0); - const seg = &self.segments.items[self.segments.items.len - 1]; - assert(mem.eql(u8, seg.segName(), "__LINKEDIT")); - return seg; - } - - pub fn getSectionByName(self: Zld, segname: []const u8, sectname: []const u8) ?u8 { - // TODO investigate caching with a hashmap - for (self.sections.items(.header), 0..) |header, i| { - if (mem.eql(u8, header.segName(), segname) and mem.eql(u8, header.sectName(), sectname)) - return @as(u8, @intCast(i)); - } else return null; - } - - pub fn getSectionIndexes(self: Zld, segment_index: u8) struct { start: u8, end: u8 } { - var start: u8 = 0; - const nsects = for (self.segments.items, 0..) |seg, i| { - if (i == segment_index) break @as(u8, @intCast(seg.nsects)); - start += @as(u8, @intCast(seg.nsects)); - } else 0; - return .{ .start = start, .end = start + nsects }; - } - - pub fn symbolIsTemp(self: *Zld, sym_with_loc: SymbolWithLoc) bool { - const sym = self.getSymbol(sym_with_loc); - if (!sym.sect()) return false; - if (sym.ext()) return false; - const sym_name = self.getSymbolName(sym_with_loc); - return mem.startsWith(u8, sym_name, "l") or mem.startsWith(u8, sym_name, "L"); - } - - /// Returns pointer-to-symbol described by `sym_with_loc` descriptor. - pub fn getSymbolPtr(self: *Zld, sym_with_loc: SymbolWithLoc) *macho.nlist_64 { - if (sym_with_loc.getFile()) |file| { - const object = &self.objects.items[file]; - return &object.symtab[sym_with_loc.sym_index]; - } else { - return &self.locals.items[sym_with_loc.sym_index]; - } - } - - /// Returns symbol described by `sym_with_loc` descriptor. - pub fn getSymbol(self: *const Zld, sym_with_loc: SymbolWithLoc) macho.nlist_64 { - if (sym_with_loc.getFile()) |file| { - const object = &self.objects.items[file]; - return object.symtab[sym_with_loc.sym_index]; - } else { - return self.locals.items[sym_with_loc.sym_index]; - } - } - - /// Returns name of the symbol described by `sym_with_loc` descriptor. - pub fn getSymbolName(self: *const Zld, sym_with_loc: SymbolWithLoc) []const u8 { - if (sym_with_loc.getFile()) |file| { - const object = self.objects.items[file]; - return object.getSymbolName(sym_with_loc.sym_index); - } else { - const sym = self.locals.items[sym_with_loc.sym_index]; - return self.strtab.get(sym.n_strx).?; - } - } - - pub fn getGlobalIndex(self: *const Zld, name: []const u8) ?u32 { - return self.resolver.get(name); - } - - pub fn getGlobalPtr(self: *Zld, name: []const u8) ?*SymbolWithLoc { - const global_index = self.resolver.get(name) orelse return null; - return &self.globals.items[global_index]; - } - - pub fn getGlobal(self: *const Zld, name: []const u8) ?SymbolWithLoc { - const global_index = self.resolver.get(name) orelse return null; - return self.globals.items[global_index]; - } - - const GetOrPutGlobalPtrResult = struct { - found_existing: bool, - value_ptr: *SymbolWithLoc, - }; - - pub fn getOrPutGlobalPtr(self: *Zld, name: []const u8) !GetOrPutGlobalPtrResult { - if (self.getGlobalPtr(name)) |ptr| { - return GetOrPutGlobalPtrResult{ .found_existing = true, .value_ptr = ptr }; - } - const global_index = try self.allocateGlobal(); - const global_name = try self.gpa.dupe(u8, name); - _ = try self.resolver.put(self.gpa, global_name, global_index); - const ptr = &self.globals.items[global_index]; - return GetOrPutGlobalPtrResult{ .found_existing = false, .value_ptr = ptr }; - } - - pub fn getGotEntryAddress(self: *Zld, sym_with_loc: SymbolWithLoc) ?u64 { - const index = self.got_table.lookup.get(sym_with_loc) orelse return null; - const header = self.sections.items(.header)[self.got_section_index.?]; - return header.addr + @sizeOf(u64) * index; - } - - pub fn getTlvPtrEntryAddress(self: *Zld, sym_with_loc: SymbolWithLoc) ?u64 { - const index = self.tlv_ptr_table.lookup.get(sym_with_loc) orelse return null; - const header = self.sections.items(.header)[self.tlv_ptr_section_index.?]; - return header.addr + @sizeOf(u64) * index; - } - - pub fn getStubsEntryAddress(self: *Zld, sym_with_loc: SymbolWithLoc) ?u64 { - const index = self.stubs_table.lookup.get(sym_with_loc) orelse return null; - const header = self.sections.items(.header)[self.stubs_section_index.?]; - return header.addr + stubs.stubSize(self.options.target.cpu.arch) * index; - } - - /// Returns symbol location corresponding to the set entrypoint. - /// Asserts output mode is executable. - pub fn getEntryPoint(self: Zld) SymbolWithLoc { - assert(self.options.output_mode == .Exe); - const global_index = self.entry_index.?; - return self.globals.items[global_index]; - } - - inline fn requiresThunks(self: Zld) bool { - return self.options.target.cpu.arch == .aarch64; - } - - pub fn generateSymbolStabs(self: *Zld, object: Object, locals: *std.ArrayList(macho.nlist_64)) !void { - log.debug("generating stabs for '{s}'", .{object.name}); - - const gpa = self.gpa; - var debug_info = object.parseDwarfInfo(); - - var lookup = DwarfInfo.AbbrevLookupTable.init(gpa); - defer lookup.deinit(); - try lookup.ensureUnusedCapacity(std.math.maxInt(u8)); - - // We assume there is only one CU. - var cu_it = debug_info.getCompileUnitIterator(); - const compile_unit = while (try cu_it.next()) |cu| { - const offset = math.cast(usize, cu.cuh.debug_abbrev_offset) orelse return error.Overflow; - try debug_info.genAbbrevLookupByKind(offset, &lookup); - break cu; - } else { - log.debug("no compile unit found in debug info in {s}; skipping", .{object.name}); - return; - }; - - var abbrev_it = compile_unit.getAbbrevEntryIterator(debug_info); - const cu_entry: DwarfInfo.AbbrevEntry = while (try abbrev_it.next(lookup)) |entry| switch (entry.tag) { - dwarf.TAG.compile_unit => break entry, - else => continue, - } else { - log.debug("missing DWARF_TAG_compile_unit tag in {s}; skipping", .{object.name}); - return; - }; - - var maybe_tu_name: ?[]const u8 = null; - var maybe_tu_comp_dir: ?[]const u8 = null; - var attr_it = cu_entry.getAttributeIterator(debug_info, compile_unit.cuh); - - while (try attr_it.next()) |attr| switch (attr.name) { - dwarf.AT.comp_dir => maybe_tu_comp_dir = attr.getString(debug_info, compile_unit.cuh) orelse continue, - dwarf.AT.name => maybe_tu_name = attr.getString(debug_info, compile_unit.cuh) orelse continue, - else => continue, - }; - - if (maybe_tu_name == null or maybe_tu_comp_dir == null) { - log.debug("missing DWARF_AT_comp_dir and DWARF_AT_name attributes {s}; skipping", .{object.name}); - return; - } - - const tu_name = maybe_tu_name.?; - const tu_comp_dir = maybe_tu_comp_dir.?; - - // Open scope - try locals.ensureUnusedCapacity(3); - locals.appendAssumeCapacity(.{ - .n_strx = try self.strtab.insert(gpa, tu_comp_dir), - .n_type = macho.N_SO, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }); - locals.appendAssumeCapacity(.{ - .n_strx = try self.strtab.insert(gpa, tu_name), - .n_type = macho.N_SO, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }); - locals.appendAssumeCapacity(.{ - .n_strx = try self.strtab.insert(gpa, object.name), - .n_type = macho.N_OSO, - .n_sect = 0, - .n_desc = 1, - .n_value = object.mtime, - }); - - var stabs_buf: [4]macho.nlist_64 = undefined; - - var name_lookup: ?DwarfInfo.SubprogramLookupByName = if (object.header.flags & macho.MH_SUBSECTIONS_VIA_SYMBOLS == 0) blk: { - var name_lookup = DwarfInfo.SubprogramLookupByName.init(gpa); - errdefer name_lookup.deinit(); - try name_lookup.ensureUnusedCapacity(@as(u32, @intCast(object.atoms.items.len))); - try debug_info.genSubprogramLookupByName(compile_unit, lookup, &name_lookup); - break :blk name_lookup; - } else null; - defer if (name_lookup) |*nl| nl.deinit(); - - for (object.atoms.items) |atom_index| { - const atom = self.getAtom(atom_index); - const stabs = try self.generateSymbolStabsForSymbol( - atom_index, - atom.getSymbolWithLoc(), - name_lookup, - &stabs_buf, - ); - try locals.appendSlice(stabs); - - var it = Atom.getInnerSymbolsIterator(self, atom_index); - while (it.next()) |sym_loc| { - const contained_stabs = try self.generateSymbolStabsForSymbol( - atom_index, - sym_loc, - name_lookup, - &stabs_buf, - ); - try locals.appendSlice(contained_stabs); - } - } - - // Close scope - try locals.append(.{ - .n_strx = 0, - .n_type = macho.N_SO, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }); - } - - fn generateSymbolStabsForSymbol( - self: *Zld, - atom_index: Atom.Index, - sym_loc: SymbolWithLoc, - lookup: ?DwarfInfo.SubprogramLookupByName, - buf: *[4]macho.nlist_64, - ) ![]const macho.nlist_64 { - const gpa = self.gpa; - const object = self.objects.items[sym_loc.getFile().?]; - const sym = self.getSymbol(sym_loc); - const sym_name = self.getSymbolName(sym_loc); - const header = self.sections.items(.header)[sym.n_sect - 1]; - - if (sym.n_strx == 0) return buf[0..0]; - if (self.symbolIsTemp(sym_loc)) return buf[0..0]; - - if (!header.isCode()) { - // Since we are not dealing with machine code, it's either a global or a static depending - // on the linkage scope. - if (sym.sect() and sym.ext()) { - // Global gets an N_GSYM stab type. - buf[0] = .{ - .n_strx = try self.strtab.insert(gpa, sym_name), - .n_type = macho.N_GSYM, - .n_sect = sym.n_sect, - .n_desc = 0, - .n_value = 0, - }; - } else { - // Local static gets an N_STSYM stab type. - buf[0] = .{ - .n_strx = try self.strtab.insert(gpa, sym_name), - .n_type = macho.N_STSYM, - .n_sect = sym.n_sect, - .n_desc = 0, - .n_value = sym.n_value, - }; - } - return buf[0..1]; - } - - const size: u64 = size: { - if (object.header.flags & macho.MH_SUBSECTIONS_VIA_SYMBOLS != 0) { - break :size self.getAtom(atom_index).size; - } - - // Since we don't have subsections to work with, we need to infer the size of each function - // the slow way by scanning the debug info for matching symbol names and extracting - // the symbol's DWARF_AT_low_pc and DWARF_AT_high_pc values. - const source_sym = object.getSourceSymbol(sym_loc.sym_index) orelse return buf[0..0]; - const subprogram = lookup.?.get(sym_name[1..]) orelse return buf[0..0]; - - if (subprogram.addr <= source_sym.n_value and source_sym.n_value < subprogram.addr + subprogram.size) { - break :size subprogram.size; - } else { - log.debug("no stab found for {s}", .{sym_name}); - return buf[0..0]; - } - }; - - buf[0] = .{ - .n_strx = 0, - .n_type = macho.N_BNSYM, - .n_sect = sym.n_sect, - .n_desc = 0, - .n_value = sym.n_value, - }; - buf[1] = .{ - .n_strx = try self.strtab.insert(gpa, sym_name), - .n_type = macho.N_FUN, - .n_sect = sym.n_sect, - .n_desc = 0, - .n_value = sym.n_value, - }; - buf[2] = .{ - .n_strx = 0, - .n_type = macho.N_FUN, - .n_sect = 0, - .n_desc = 0, - .n_value = size, - }; - buf[3] = .{ - .n_strx = 0, - .n_type = macho.N_ENSYM, - .n_sect = sym.n_sect, - .n_desc = 0, - .n_value = size, - }; - - return buf; - } - - fn logSegments(self: *Zld) void { - log.debug("segments:", .{}); - for (self.segments.items, 0..) |segment, i| { - log.debug(" segment({d}): {s} @{x} ({x}), sizeof({x})", .{ - i, - segment.segName(), - segment.fileoff, - segment.vmaddr, - segment.vmsize, - }); - } - } - - fn logSections(self: *Zld) void { - log.debug("sections:", .{}); - for (self.sections.items(.header), 0..) |header, i| { - log.debug(" sect({d}): {s},{s} @{x} ({x}), sizeof({x})", .{ - i + 1, - header.segName(), - header.sectName(), - header.offset, - header.addr, - header.size, - }); - } - } - - fn logSymAttributes(sym: macho.nlist_64, buf: []u8) []const u8 { - if (sym.sect()) { - buf[0] = 's'; - } - if (sym.ext()) { - if (sym.weakDef() or sym.pext()) { - buf[1] = 'w'; - } else { - buf[1] = 'e'; - } - } - if (sym.tentative()) { - buf[2] = 't'; - } - if (sym.undf()) { - buf[3] = 'u'; - } - return buf[0..]; - } - - fn logSymtab(self: *Zld) void { - var buf: [4]u8 = undefined; - - const scoped_log = std.log.scoped(.symtab); - - scoped_log.debug("locals:", .{}); - for (self.objects.items, 0..) |object, id| { - scoped_log.debug(" object({d}): {s}", .{ id, object.name }); - if (object.in_symtab == null) continue; - for (object.symtab, 0..) |sym, sym_id| { - @memset(&buf, '_'); - scoped_log.debug(" %{d}: {s} @{x} in sect({d}), {s}", .{ - sym_id, - object.getSymbolName(@as(u32, @intCast(sym_id))), - sym.n_value, - sym.n_sect, - logSymAttributes(sym, &buf), - }); - } - } - scoped_log.debug(" object(-1)", .{}); - for (self.locals.items, 0..) |sym, sym_id| { - if (sym.undf()) continue; - scoped_log.debug(" %{d}: {s} @{x} in sect({d}), {s}", .{ - sym_id, - self.strtab.get(sym.n_strx).?, - sym.n_value, - sym.n_sect, - logSymAttributes(sym, &buf), - }); - } - - scoped_log.debug("exports:", .{}); - for (self.globals.items, 0..) |global, i| { - const sym = self.getSymbol(global); - if (sym.undf()) continue; - if (sym.n_desc == MachO.N_DEAD) continue; - scoped_log.debug(" %{d}: {s} @{x} in sect({d}), {s} (def in object({?}))", .{ - i, - self.getSymbolName(global), - sym.n_value, - sym.n_sect, - logSymAttributes(sym, &buf), - global.file, - }); - } - - scoped_log.debug("imports:", .{}); - for (self.globals.items, 0..) |global, i| { - const sym = self.getSymbol(global); - if (!sym.undf()) continue; - if (sym.n_desc == MachO.N_DEAD) continue; - const ord = @divTrunc(sym.n_desc, macho.N_SYMBOL_RESOLVER); - scoped_log.debug(" %{d}: {s} @{x} in ord({d}), {s}", .{ - i, - self.getSymbolName(global), - sym.n_value, - ord, - logSymAttributes(sym, &buf), - }); - } - - scoped_log.debug("GOT entries:", .{}); - scoped_log.debug("{}", .{self.got_table}); - - scoped_log.debug("TLV pointers:", .{}); - scoped_log.debug("{}", .{self.tlv_ptr_table}); - - scoped_log.debug("stubs entries:", .{}); - scoped_log.debug("{}", .{self.stubs_table}); - - scoped_log.debug("thunks:", .{}); - for (self.thunks.items, 0..) |thunk, i| { - scoped_log.debug(" thunk({d})", .{i}); - const slice = thunk.targets.slice(); - for (slice.items(.tag), slice.items(.target), 0..) |tag, target, j| { - const atom_index = @as(u32, @intCast(thunk.getStartAtomIndex() + j)); - const atom = self.getAtom(atom_index); - const atom_sym = self.getSymbol(atom.getSymbolWithLoc()); - const target_addr = switch (tag) { - .stub => self.getStubsEntryAddress(target).?, - .atom => self.getSymbol(target).n_value, - }; - scoped_log.debug(" {d}@{x} => {s}({s}@{x})", .{ - j, - atom_sym.n_value, - @tagName(tag), - self.getSymbolName(target), - target_addr, - }); - } - } - } - - fn logAtoms(self: *Zld) void { - log.debug("atoms:", .{}); - const slice = self.sections.slice(); - for (slice.items(.first_atom_index), 0..) |first_atom_index, sect_id| { - var atom_index = first_atom_index orelse continue; - const header = slice.items(.header)[sect_id]; - - log.debug("{s},{s}", .{ header.segName(), header.sectName() }); - - while (true) { - const atom = self.getAtom(atom_index); - self.logAtom(atom_index, log); - - if (atom.next_index) |next_index| { - atom_index = next_index; - } else break; - } - } - } - - pub fn logAtom(self: *Zld, atom_index: Atom.Index, logger: anytype) void { - if (!build_options.enable_logging) return; - - const atom = self.getAtom(atom_index); - const sym = self.getSymbol(atom.getSymbolWithLoc()); - const sym_name = self.getSymbolName(atom.getSymbolWithLoc()); - logger.debug(" ATOM({d}, %{d}, '{s}') @ {x} (sizeof({x}), alignof({x})) in object({?}) in sect({d})", .{ - atom_index, - atom.sym_index, - sym_name, - sym.n_value, - atom.size, - atom.alignment, - atom.getFile(), - sym.n_sect, - }); - - if (atom.getFile() != null) { - var it = Atom.getInnerSymbolsIterator(self, atom_index); - while (it.next()) |sym_loc| { - const inner = self.getSymbol(sym_loc); - const inner_name = self.getSymbolName(sym_loc); - const offset = Atom.calcInnerSymbolOffset(self, atom_index, sym_loc.sym_index); - - logger.debug(" (%{d}, '{s}') @ {x} ({x})", .{ - sym_loc.sym_index, - inner_name, - inner.n_value, - offset, - }); - } - - if (Atom.getSectionAlias(self, atom_index)) |sym_loc| { - const alias = self.getSymbol(sym_loc); - const alias_name = self.getSymbolName(sym_loc); - - logger.debug(" (%{d}, '{s}') @ {x} ({x})", .{ - sym_loc.sym_index, - alias_name, - alias.n_value, - 0, - }); - } - } - } -}; - -pub fn linkWithZld(macho_file: *MachO, comp: *Compilation, prog_node: *std.Progress.Node) link.File.FlushError!void { +pub fn linkWithZld( + macho_file: *MachO, + comp: *Compilation, + prog_node: *std.Progress.Node, +) link.File.FlushError!void { const tracy = trace(@src()); defer tracy.end(); @@ -2611,8 +35,6 @@ pub fn linkWithZld(macho_file: *MachO, comp: *Compilation, prog_node: *std.Progr defer sub_prog_node.end(); const cpu_arch = target.cpu.arch; - const os_tag = target.os.tag; - const abi = target.abi; const is_lib = options.output_mode == .Lib; const is_dyn_lib = options.link_mode == .Dynamic and is_lib; const is_exe_or_dyn_lib = is_dyn_lib or options.output_mode == .Exe; @@ -2730,29 +152,26 @@ pub fn linkWithZld(macho_file: *MachO, comp: *Compilation, prog_node: *std.Progr } else { const sub_path = options.emit.?.sub_path; + const old_file = macho_file.base.file; // TODO is this needed at all? + defer macho_file.base.file = old_file; + const file = try directory.handle.createFile(sub_path, .{ .truncate = true, .read = true, .mode = link.determineMode(options.*), }); defer file.close(); - - var zld = Zld{ - .gpa = gpa, - .file = file, - .options = options, - }; - defer zld.deinit(); + macho_file.base.file = file; // Index 0 is always a null symbol. - try zld.locals.append(gpa, .{ + try macho_file.locals.append(gpa, .{ .n_strx = 0, .n_type = 0, .n_sect = 0, .n_desc = 0, .n_value = 0, }); - try zld.strtab.buffer.append(gpa, 0); + try macho_file.strtab.buffer.append(gpa, 0); // Positional arguments to the linker such as object files and static archives. var positionals = std.ArrayList(Compilation.LinkObject).init(arena); @@ -2930,9 +349,7 @@ pub fn linkWithZld(macho_file: *MachO, comp: *Compilation, prog_node: *std.Progr const in_file = try std.fs.cwd().openFile(obj.path, .{}); defer in_file.close(); - MachO.parsePositional( - &zld, - gpa, + macho_file.parsePositional( in_file, obj.path, obj.must_link, @@ -2949,9 +366,7 @@ pub fn linkWithZld(macho_file: *MachO, comp: *Compilation, prog_node: *std.Progr const in_file = try std.fs.cwd().openFile(path, .{}); defer in_file.close(); - MachO.parseLibrary( - &zld, - gpa, + macho_file.parseLibrary( in_file, path, lib, @@ -2965,198 +380,199 @@ pub fn linkWithZld(macho_file: *MachO, comp: *Compilation, prog_node: *std.Progr }; } - MachO.parseDependentLibs(&zld, gpa, &dependent_libs, options) catch |err| { + macho_file.parseDependentLibs(&dependent_libs, options) catch |err| { // TODO convert to error log.err("parsing dependent libraries failed with err {s}", .{@errorName(err)}); }; - try zld.resolveSymbols(); - try macho_file.reportUndefined(&zld); + var actions = std.ArrayList(MachO.ResolveAction).init(gpa); + defer actions.deinit(); + try macho_file.resolveSymbols(&actions); + try macho_file.reportUndefined(); - if (options.output_mode == .Exe) { - const entry_name = options.entry orelse load_commands.default_entry_point; - const global_index = zld.resolver.get(entry_name).?; // Error was flagged earlier - zld.entry_index = global_index; - } - - for (zld.objects.items, 0..) |*object, object_id| { - try object.splitIntoAtoms(&zld, @as(u32, @intCast(object_id))); + for (macho_file.objects.items, 0..) |*object, object_id| { + try object.splitIntoAtoms(macho_file, @as(u32, @intCast(object_id))); } if (gc_sections) { - try dead_strip.gcAtoms(&zld); + try dead_strip.gcAtoms(macho_file); } - try zld.createDyldPrivateAtom(); - try zld.createTentativeDefAtoms(); + try macho_file.createDyldPrivateAtom(); + try macho_file.createTentativeDefAtoms(); - if (zld.options.output_mode == .Exe) { - const global = zld.getEntryPoint(); - if (zld.getSymbol(global).undf()) { + if (macho_file.options.output_mode == .Exe) { + const global = macho_file.getEntryPoint().?; + if (macho_file.getSymbol(global).undf()) { // We do one additional check here in case the entry point was found in one of the dylibs. // (I actually have no idea what this would imply but it is a possible outcome and so we // support it.) - try zld.addStubEntry(global); + try macho_file.addStubEntry(global); } } - for (zld.objects.items) |object| { + for (macho_file.objects.items) |object| { for (object.atoms.items) |atom_index| { - const atom = zld.getAtom(atom_index); - const sym = zld.getSymbol(atom.getSymbolWithLoc()); - const header = zld.sections.items(.header)[sym.n_sect - 1]; + const atom = macho_file.getAtom(atom_index); + const sym = macho_file.getSymbol(atom.getSymbolWithLoc()); + const header = macho_file.sections.items(.header)[sym.n_sect - 1]; if (header.isZerofill()) continue; - const relocs = Atom.getAtomRelocs(&zld, atom_index); - try Atom.scanAtomRelocs(&zld, atom_index, relocs); + const relocs = Atom.getAtomRelocs(macho_file, atom_index); + try Atom.scanAtomRelocs(macho_file, atom_index, relocs); } } - try eh_frame.scanRelocs(&zld); - try UnwindInfo.scanRelocs(&zld); + try eh_frame.scanRelocs(macho_file); + try UnwindInfo.scanRelocs(macho_file); - if (zld.dyld_stub_binder_index) |index| try zld.addGotEntry(zld.globals.items[index]); + if (macho_file.dyld_stub_binder_index) |index| + try macho_file.addGotEntry(macho_file.globals.items[index]); - try zld.calcSectionSizes(); + try macho_file.calcSectionSizes(); - var unwind_info = UnwindInfo{ .gpa = zld.gpa }; + var unwind_info = UnwindInfo{ .gpa = gpa }; defer unwind_info.deinit(); - try unwind_info.collect(&zld); + try unwind_info.collect(macho_file); - try eh_frame.calcSectionSize(&zld, &unwind_info); - try unwind_info.calcSectionSize(&zld); + try eh_frame.calcSectionSize(macho_file, &unwind_info); + try unwind_info.calcSectionSize(macho_file); - try zld.pruneAndSortSections(); - try zld.createSegments(); - try zld.allocateSegments(); + try pruneAndSortSections(macho_file); + try createSegments(macho_file); + try allocateSegments(macho_file); - try MachO.allocateSpecialSymbols(&zld); + try macho_file.allocateSpecialSymbols(); if (build_options.enable_logging) { - zld.logSymtab(); - zld.logSegments(); - zld.logSections(); - zld.logAtoms(); + macho_file.logSymtab(); + macho_file.logSegments(); + macho_file.logSections(); + macho_file.logAtoms(); } - try zld.writeAtoms(); - if (zld.requiresThunks()) try zld.writeThunks(); - try zld.writeDyldPrivateAtom(); + try writeAtoms(macho_file); + if (macho_file.requiresThunks()) try writeThunks(macho_file); + try writeDyldPrivateAtom(macho_file); - if (zld.stubs_section_index) |_| { - try zld.writeStubs(); - try zld.writeStubHelpers(); - try zld.writeLaSymbolPtrs(); + if (macho_file.stubs_section_index) |_| { + try writeStubs(macho_file); + try writeStubHelpers(macho_file); + try writeLaSymbolPtrs(macho_file); } - if (zld.got_section_index) |sect_id| try zld.writePointerEntries(sect_id, &zld.got_table); - if (zld.tlv_ptr_section_index) |sect_id| try zld.writePointerEntries(sect_id, &zld.tlv_ptr_table); + if (macho_file.got_section_index) |sect_id| + try macho_file.writePointerEntries(sect_id, &macho_file.got_table); + if (macho_file.tlv_ptr_section_index) |sect_id| + try macho_file.writePointerEntries(sect_id, &macho_file.tlv_ptr_table); - try eh_frame.write(&zld, &unwind_info); - try unwind_info.write(&zld); - try zld.writeLinkeditSegmentData(); + try eh_frame.write(macho_file, &unwind_info); + try unwind_info.write(macho_file); + try macho_file.writeLinkeditSegmentData(); // If the last section of __DATA segment is zerofill section, we need to ensure // that the free space between the end of the last non-zerofill section of __DATA // segment and the beginning of __LINKEDIT segment is zerofilled as the loader will // copy-paste this space into memory for quicker zerofill operation. - if (zld.data_segment_cmd_index) |data_seg_id| blk: { + if (macho_file.data_segment_cmd_index) |data_seg_id| blk: { var physical_zerofill_start: ?u64 = null; - const section_indexes = zld.getSectionIndexes(data_seg_id); - for (zld.sections.items(.header)[section_indexes.start..section_indexes.end]) |header| { + const section_indexes = macho_file.getSectionIndexes(data_seg_id); + for (macho_file.sections.items(.header)[section_indexes.start..section_indexes.end]) |header| { if (header.isZerofill() and header.size > 0) break; physical_zerofill_start = header.offset + header.size; } else break :blk; const start = physical_zerofill_start orelse break :blk; - const linkedit = zld.getLinkeditSegmentPtr(); + const linkedit = macho_file.getLinkeditSegmentPtr(); const size = math.cast(usize, linkedit.fileoff - start) orelse return error.Overflow; if (size > 0) { log.debug("zeroing out zerofill area of length {x} at {x}", .{ size, start }); - var padding = try zld.gpa.alloc(u8, size); - defer zld.gpa.free(padding); + var padding = try gpa.alloc(u8, size); + defer gpa.free(padding); @memset(padding, 0); - try zld.file.pwriteAll(padding, start); + try macho_file.base.file.?.pwriteAll(padding, start); } } // Write code signature padding if required - const requires_codesig = blk: { - if (options.entitlements) |_| break :blk true; - if (cpu_arch == .aarch64 and (os_tag == .macos or abi == .simulator)) break :blk true; - break :blk false; - }; - var codesig: ?CodeSignature = if (requires_codesig) blk: { + var codesig: ?CodeSignature = if (macho_file.requiresCodeSignature()) blk: { // Preallocate space for the code signature. // We need to do this at this stage so that we have the load commands with proper values // written out to the file. // The most important here is to have the correct vm and filesize of the __LINKEDIT segment // where the code signature goes into. - var codesig = CodeSignature.init(MachO.getPageSize(zld.options.target.cpu.arch)); + var codesig = CodeSignature.init(MachO.getPageSize(cpu_arch)); codesig.code_directory.ident = fs.path.basename(full_out_path); if (options.entitlements) |path| { - try codesig.addEntitlements(zld.gpa, path); + try codesig.addEntitlements(gpa, path); } - try zld.writeCodeSignaturePadding(&codesig); + try macho_file.writeCodeSignaturePadding(&codesig); break :blk codesig; } else null; - defer if (codesig) |*csig| csig.deinit(zld.gpa); + defer if (codesig) |*csig| csig.deinit(gpa); // Write load commands var lc_buffer = std.ArrayList(u8).init(arena); const lc_writer = lc_buffer.writer(); - try MachO.writeSegmentHeaders(&zld, lc_writer); - try lc_writer.writeStruct(zld.dyld_info_cmd); - try lc_writer.writeStruct(zld.function_starts_cmd); - try lc_writer.writeStruct(zld.data_in_code_cmd); - try lc_writer.writeStruct(zld.symtab_cmd); - try lc_writer.writeStruct(zld.dysymtab_cmd); + try macho_file.writeSegmentHeaders(lc_writer); + try lc_writer.writeStruct(macho_file.dyld_info_cmd); + try lc_writer.writeStruct(macho_file.function_starts_cmd); + try lc_writer.writeStruct(macho_file.data_in_code_cmd); + try lc_writer.writeStruct(macho_file.symtab_cmd); + try lc_writer.writeStruct(macho_file.dysymtab_cmd); try load_commands.writeDylinkerLC(lc_writer); - if (zld.options.output_mode == .Exe) { - const seg_id = zld.header_segment_cmd_index.?; - const seg = zld.segments.items[seg_id]; - const global = zld.getEntryPoint(); - const sym = zld.getSymbol(global); + switch (macho_file.base.options.output_mode) { + .Exe => blk: { + const seg_id = macho_file.header_segment_cmd_index.?; + const seg = macho_file.segments.items[seg_id]; + const global = macho_file.getEntryPoint() orelse break :blk; + const sym = macho_file.getSymbol(global); - const addr: u64 = if (sym.undf()) - // In this case, the symbol has been resolved in one of dylibs and so we point - // to the stub as its vmaddr value. - zld.getStubsEntryAddress(global).? - else - sym.n_value; + const addr: u64 = if (sym.undf()) + // In this case, the symbol has been resolved in one of dylibs and so we point + // to the stub as its vmaddr value. + macho_file.getStubsEntryAddress(global).? + else + sym.n_value; - try lc_writer.writeStruct(macho.entry_point_command{ - .entryoff = @as(u32, @intCast(addr - seg.vmaddr)), - .stacksize = options.stack_size_override orelse 0, - }); - } else { - assert(zld.options.output_mode == .Lib); - try load_commands.writeDylibIdLC(zld.gpa, zld.options, lc_writer); + try lc_writer.writeStruct(macho.entry_point_command{ + .entryoff = @as(u32, @intCast(addr - seg.vmaddr)), + .stacksize = macho_file.base.options.stack_size_override orelse 0, + }); + }, + .Lib => if (macho_file.base.options.link_mode == .Dynamic) { + try load_commands.writeDylibIdLC(gpa, &macho_file.base.options, lc_writer); + }, + else => {}, } - try load_commands.writeRpathLCs(zld.gpa, zld.options, lc_writer); + try load_commands.writeRpathLCs(gpa, macho_file.base.options, lc_writer); try lc_writer.writeStruct(macho.source_version_command{ .version = 0, }); - try load_commands.writeBuildVersionLC(zld.options, lc_writer); + try load_commands.writeBuildVersionLC(macho_file.base.options, lc_writer); const uuid_cmd_offset = @sizeOf(macho.mach_header_64) + @as(u32, @intCast(lc_buffer.items.len)); - try lc_writer.writeStruct(zld.uuid_cmd); + try lc_writer.writeStruct(macho_file.uuid_cmd); - try load_commands.writeLoadDylibLCs(zld.dylibs.items, zld.referenced_dylibs.keys(), lc_writer); + try load_commands.writeLoadDylibLCs( + macho_file.dylibs.items, + macho_file.referenced_dylibs.keys(), + lc_writer, + ); - if (requires_codesig) { - try lc_writer.writeStruct(zld.codesig_cmd); + if (codesig != null) { + try lc_writer.writeStruct(macho_file.codesig_cmd); } const ncmds = load_commands.calcNumOfLCs(lc_buffer.items); - try zld.file.pwriteAll(lc_buffer.items, @sizeOf(macho.mach_header_64)); - try zld.writeHeader(ncmds, @as(u32, @intCast(lc_buffer.items.len))); - try zld.writeUuid(comp, uuid_cmd_offset, requires_codesig); + try macho_file.base.file.?.pwriteAll(lc_buffer.items, @sizeOf(macho.mach_header_64)); + try macho_file.writeHeader(ncmds, @as(u32, @intCast(lc_buffer.items.len))); + try macho_file.writeUuid(comp, uuid_cmd_offset, codesig != null); if (codesig) |*csig| { - try zld.writeCodeSignature(comp, csig); // code signing always comes last - try MachO.invalidateKernelCache(directory.handle, zld.options.emit.?.sub_path); + try macho_file.writeCodeSignature(comp, csig); // code signing always comes last + try MachO.invalidateKernelCache(directory.handle, macho_file.base.options.emit.?.sub_path); } } @@ -3177,3 +593,609 @@ pub fn linkWithZld(macho_file: *MachO, comp: *Compilation, prog_node: *std.Progr macho_file.base.lock = man.toOwnedLock(); } } + +fn createSegments(macho_file: *MachO) !void { + const gpa = macho_file.base.allocator; + const pagezero_vmsize = macho_file.base.options.pagezero_size orelse MachO.default_pagezero_vmsize; + const page_size = MachO.getPageSize(macho_file.base.options.target.cpu.arch); + const aligned_pagezero_vmsize = mem.alignBackward(u64, pagezero_vmsize, page_size); + if (macho_file.base.options.output_mode != .Lib and aligned_pagezero_vmsize > 0) { + if (aligned_pagezero_vmsize != pagezero_vmsize) { + log.warn("requested __PAGEZERO size (0x{x}) is not page aligned", .{pagezero_vmsize}); + log.warn(" rounding down to 0x{x}", .{aligned_pagezero_vmsize}); + } + macho_file.pagezero_segment_cmd_index = @intCast(macho_file.segments.items.len); + try macho_file.segments.append(gpa, .{ + .cmdsize = @sizeOf(macho.segment_command_64), + .segname = MachO.makeStaticString("__PAGEZERO"), + .vmsize = aligned_pagezero_vmsize, + }); + } + + // __TEXT segment is non-optional + { + const protection = MachO.getSegmentMemoryProtection("__TEXT"); + macho_file.text_segment_cmd_index = @intCast(macho_file.segments.items.len); + macho_file.header_segment_cmd_index = macho_file.text_segment_cmd_index.?; + try macho_file.segments.append(gpa, .{ + .cmdsize = @sizeOf(macho.segment_command_64), + .segname = MachO.makeStaticString("__TEXT"), + .maxprot = protection, + .initprot = protection, + }); + } + + for (macho_file.sections.items(.header), 0..) |header, sect_id| { + if (header.size == 0) continue; // empty section + + const segname = header.segName(); + const segment_id = macho_file.getSegmentByName(segname) orelse blk: { + log.debug("creating segment '{s}'", .{segname}); + const segment_id = @as(u8, @intCast(macho_file.segments.items.len)); + const protection = MachO.getSegmentMemoryProtection(segname); + try macho_file.segments.append(gpa, .{ + .cmdsize = @sizeOf(macho.segment_command_64), + .segname = MachO.makeStaticString(segname), + .maxprot = protection, + .initprot = protection, + }); + break :blk segment_id; + }; + const segment = &macho_file.segments.items[segment_id]; + segment.cmdsize += @sizeOf(macho.section_64); + segment.nsects += 1; + macho_file.sections.items(.segment_index)[sect_id] = segment_id; + } + + if (macho_file.getSegmentByName("__DATA_CONST")) |index| { + macho_file.data_const_segment_cmd_index = index; + } + + if (macho_file.getSegmentByName("__DATA")) |index| { + macho_file.data_segment_cmd_index = index; + } + + // __LINKEDIT always comes last + { + const protection = MachO.getSegmentMemoryProtection("__LINKEDIT"); + macho_file.linkedit_segment_cmd_index = @intCast(macho_file.segments.items.len); + try macho_file.segments.append(gpa, .{ + .cmdsize = @sizeOf(macho.segment_command_64), + .segname = MachO.makeStaticString("__LINKEDIT"), + .maxprot = protection, + .initprot = protection, + }); + } +} + +fn writeAtoms(macho_file: *MachO) !void { + const gpa = macho_file.base.allocator; + const slice = macho_file.sections.slice(); + + for (slice.items(.first_atom_index), 0..) |first_atom_index, sect_id| { + const header = slice.items(.header)[sect_id]; + if (header.isZerofill()) continue; + + var atom_index = first_atom_index orelse continue; + + var buffer = try gpa.alloc(u8, math.cast(usize, header.size) orelse return error.Overflow); + defer gpa.free(buffer); + @memset(buffer, 0); // TODO with NOPs + + log.debug("writing atoms in {s},{s}", .{ header.segName(), header.sectName() }); + + while (true) { + const atom = macho_file.getAtom(atom_index); + if (atom.getFile()) |file| { + const this_sym = macho_file.getSymbol(atom.getSymbolWithLoc()); + const padding_size: usize = if (atom.next_index) |next_index| blk: { + const next_sym = macho_file.getSymbol(macho_file.getAtom(next_index).getSymbolWithLoc()); + const size = next_sym.n_value - (this_sym.n_value + atom.size); + break :blk math.cast(usize, size) orelse return error.Overflow; + } else 0; + + log.debug(" (adding ATOM(%{d}, '{s}') from object({d}) to buffer)", .{ + atom.sym_index, + macho_file.getSymbolName(atom.getSymbolWithLoc()), + file, + }); + if (padding_size > 0) { + log.debug(" (with padding {x})", .{padding_size}); + } + + const offset = this_sym.n_value - header.addr; + log.debug(" (at offset 0x{x})", .{offset}); + + const code = Atom.getAtomCode(macho_file, atom_index); + const relocs = Atom.getAtomRelocs(macho_file, atom_index); + const size = math.cast(usize, atom.size) orelse return error.Overflow; + @memcpy(buffer[offset .. offset + size], code); + try Atom.resolveRelocs( + macho_file, + atom_index, + buffer[offset..][0..size], + relocs, + ); + } + + if (atom.next_index) |next_index| { + atom_index = next_index; + } else break; + } + + log.debug(" (writing at file offset 0x{x})", .{header.offset}); + try macho_file.base.file.?.pwriteAll(buffer, header.offset); + } +} + +fn writeDyldPrivateAtom(macho_file: *MachO) !void { + const atom_index = macho_file.dyld_private_atom_index orelse return; + const atom = macho_file.getAtom(atom_index); + const sym = macho_file.getSymbol(atom.getSymbolWithLoc()); + const sect_id = macho_file.data_section_index.?; + const header = macho_file.sections.items(.header)[sect_id]; + const offset = sym.n_value - header.addr + header.offset; + log.debug("writing __dyld_private at offset 0x{x}", .{offset}); + const buffer: [@sizeOf(u64)]u8 = [_]u8{0} ** @sizeOf(u64); + try macho_file.base.file.?.pwriteAll(&buffer, offset); +} + +fn writeThunks(macho_file: *MachO) !void { + assert(macho_file.requiresThunks()); + const gpa = macho_file.base.allocator; + + const sect_id = macho_file.text_section_index orelse return; + const header = macho_file.sections.items(.header)[sect_id]; + + for (macho_file.thunks.items, 0..) |*thunk, i| { + if (thunk.getSize() == 0) continue; + var buffer = try std.ArrayList(u8).initCapacity(gpa, thunk.getSize()); + defer buffer.deinit(); + try thunks.writeThunkCode(macho_file, thunk, buffer.writer()); + const thunk_atom = macho_file.getAtom(thunk.getStartAtomIndex()); + const thunk_sym = macho_file.getSymbol(thunk_atom.getSymbolWithLoc()); + const offset = thunk_sym.n_value - header.addr + header.offset; + log.debug("writing thunk({d}) at offset 0x{x}", .{ i, offset }); + try macho_file.base.file.?.pwriteAll(buffer.items, offset); + } +} + +fn writePointerEntries(macho_file: *MachO, sect_id: u8, table: anytype) !void { + const gpa = macho_file.base.allocator; + const header = macho_file.sections.items(.header)[sect_id]; + var buffer = try std.ArrayList(u8).initCapacity(gpa, header.size); + defer buffer.deinit(); + for (table.entries.items) |entry| { + const sym = macho_file.getSymbol(entry); + buffer.writer().writeIntLittle(u64, sym.n_value) catch unreachable; + } + log.debug("writing __DATA_CONST,__got contents at file offset 0x{x}", .{header.offset}); + try macho_file.base.file.?.pwriteAll(buffer.items, header.offset); +} + +fn writeStubs(macho_file: *MachO) !void { + const gpa = macho_file.base.allocator; + const cpu_arch = macho_file.base.options.target.cpu.arch; + const stubs_header = macho_file.sections.items(.header)[macho_file.stubs_section_index.?]; + const la_symbol_ptr_header = macho_file.sections.items(.header)[macho_file.la_symbol_ptr_section_index.?]; + + var buffer = try std.ArrayList(u8).initCapacity(gpa, stubs_header.size); + defer buffer.deinit(); + + for (0..macho_file.stub_table.count()) |index| { + try stubs.writeStubCode(.{ + .cpu_arch = cpu_arch, + .source_addr = stubs_header.addr + stubs.stubSize(cpu_arch) * index, + .target_addr = la_symbol_ptr_header.addr + index * @sizeOf(u64), + }, buffer.writer()); + } + + log.debug("writing __TEXT,__stubs contents at file offset 0x{x}", .{stubs_header.offset}); + try macho_file.base.file.?.pwriteAll(buffer.items, stubs_header.offset); +} + +fn writeStubHelpers(macho_file: *MachO) !void { + const gpa = macho_file.base.allocator; + const cpu_arch = macho_file.base.options.target.cpu.arch; + const stub_helper_header = macho_file.sections.items(.header)[macho_file.stub_helper_section_index.?]; + + var buffer = try std.ArrayList(u8).initCapacity(gpa, stub_helper_header.size); + defer buffer.deinit(); + + { + const dyld_private_addr = blk: { + const atom = macho_file.getAtom(macho_file.dyld_private_atom_index.?); + const sym = macho_file.getSymbol(atom.getSymbolWithLoc()); + break :blk sym.n_value; + }; + const dyld_stub_binder_got_addr = blk: { + const sym_loc = macho_file.globals.items[macho_file.dyld_stub_binder_index.?]; + break :blk macho_file.getGotEntryAddress(sym_loc).?; + }; + try stubs.writeStubHelperPreambleCode(.{ + .cpu_arch = cpu_arch, + .source_addr = stub_helper_header.addr, + .dyld_private_addr = dyld_private_addr, + .dyld_stub_binder_got_addr = dyld_stub_binder_got_addr, + }, buffer.writer()); + } + + for (0..macho_file.stub_table.count()) |index| { + const source_addr = stub_helper_header.addr + stubs.stubHelperPreambleSize(cpu_arch) + + stubs.stubHelperSize(cpu_arch) * index; + try stubs.writeStubHelperCode(.{ + .cpu_arch = cpu_arch, + .source_addr = source_addr, + .target_addr = stub_helper_header.addr, + }, buffer.writer()); + } + + log.debug("writing __TEXT,__stub_helper contents at file offset 0x{x}", .{ + stub_helper_header.offset, + }); + try macho_file.base.file.?.pwriteAll(buffer.items, stub_helper_header.offset); +} + +fn writeLaSymbolPtrs(macho_file: *MachO) !void { + const gpa = macho_file.base.allocator; + const cpu_arch = macho_file.base.options.target.cpu.arch; + const la_symbol_ptr_header = macho_file.sections.items(.header)[macho_file.la_symbol_ptr_section_index.?]; + const stub_helper_header = macho_file.sections.items(.header)[macho_file.stub_helper_section_index.?]; + + var buffer = try std.ArrayList(u8).initCapacity(gpa, la_symbol_ptr_header.size); + defer buffer.deinit(); + + for (0..macho_file.stub_table.count()) |index| { + const target_addr = stub_helper_header.addr + stubs.stubHelperPreambleSize(cpu_arch) + + stubs.stubHelperSize(cpu_arch) * index; + buffer.writer().writeIntLittle(u64, target_addr) catch unreachable; + } + + log.debug("writing __DATA,__la_symbol_ptr contents at file offset 0x{x}", .{ + la_symbol_ptr_header.offset, + }); + try macho_file.base.file.?.pwriteAll(buffer.items, la_symbol_ptr_header.offset); +} + +fn pruneAndSortSections(macho_file: *MachO) !void { + const Entry = struct { + index: u8, + + pub fn lessThan(ctx: *MachO, lhs: @This(), rhs: @This()) bool { + const lhs_header = ctx.sections.items(.header)[lhs.index]; + const rhs_header = ctx.sections.items(.header)[rhs.index]; + return MachO.getSectionPrecedence(lhs_header) < MachO.getSectionPrecedence(rhs_header); + } + }; + + const gpa = macho_file.base.allocator; + + var entries = try std.ArrayList(Entry).initCapacity(gpa, macho_file.sections.slice().len); + defer entries.deinit(); + + for (0..macho_file.sections.slice().len) |index| { + const section = macho_file.sections.get(index); + if (section.header.size == 0) { + log.debug("pruning section {s},{s} {?d}", .{ + section.header.segName(), + section.header.sectName(), + section.first_atom_index, + }); + for (&[_]*?u8{ + &macho_file.text_section_index, + &macho_file.data_const_section_index, + &macho_file.data_section_index, + &macho_file.bss_section_index, + &macho_file.thread_vars_section_index, + &macho_file.thread_data_section_index, + &macho_file.thread_bss_section_index, + &macho_file.eh_frame_section_index, + &macho_file.unwind_info_section_index, + &macho_file.got_section_index, + &macho_file.tlv_ptr_section_index, + &macho_file.stubs_section_index, + &macho_file.stub_helper_section_index, + &macho_file.la_symbol_ptr_section_index, + }) |maybe_index| { + if (maybe_index.* != null and maybe_index.*.? == index) { + maybe_index.* = null; + } + } + continue; + } + entries.appendAssumeCapacity(.{ .index = @intCast(index) }); + } + + mem.sort(Entry, entries.items, macho_file, Entry.lessThan); + + var slice = macho_file.sections.toOwnedSlice(); + defer slice.deinit(gpa); + + const backlinks = try gpa.alloc(u8, slice.len); + defer gpa.free(backlinks); + for (entries.items, 0..) |entry, i| { + backlinks[entry.index] = @as(u8, @intCast(i)); + } + + try macho_file.sections.ensureTotalCapacity(gpa, entries.items.len); + for (entries.items) |entry| { + macho_file.sections.appendAssumeCapacity(slice.get(entry.index)); + } + + for (&[_]*?u8{ + &macho_file.text_section_index, + &macho_file.data_const_section_index, + &macho_file.data_section_index, + &macho_file.bss_section_index, + &macho_file.thread_vars_section_index, + &macho_file.thread_data_section_index, + &macho_file.thread_bss_section_index, + &macho_file.eh_frame_section_index, + &macho_file.unwind_info_section_index, + &macho_file.got_section_index, + &macho_file.tlv_ptr_section_index, + &macho_file.stubs_section_index, + &macho_file.stub_helper_section_index, + &macho_file.la_symbol_ptr_section_index, + }) |maybe_index| { + if (maybe_index.*) |*index| { + index.* = backlinks[index.*]; + } + } +} + +fn calcSectionSizes(macho_file: *MachO) !void { + const slice = macho_file.sections.slice(); + for (slice.items(.header), 0..) |*header, sect_id| { + if (header.size == 0) continue; + if (macho_file.text_section_index) |txt| { + if (txt == sect_id and macho_file.requiresThunks()) continue; + } + + var atom_index = slice.items(.first_atom_index)[sect_id] orelse continue; + + header.size = 0; + header.@"align" = 0; + + while (true) { + const atom = macho_file.getAtom(atom_index); + const atom_alignment = try math.powi(u32, 2, atom.alignment); + const atom_offset = mem.alignForward(u64, header.size, atom_alignment); + const padding = atom_offset - header.size; + + const sym = macho_file.getSymbolPtr(atom.getSymbolWithLoc()); + sym.n_value = atom_offset; + + header.size += padding + atom.size; + header.@"align" = @max(header.@"align", atom.alignment); + + if (atom.next_index) |next_index| { + atom_index = next_index; + } else break; + } + } + + if (macho_file.text_section_index != null and macho_file.requiresThunks()) { + // Create jump/branch range extenders if needed. + try thunks.createThunks(macho_file, macho_file.text_section_index.?); + } + + // Update offsets of all symbols contained within each Atom. + // We need to do this since our unwind info synthesiser relies on + // traversing the symbols when synthesising unwind info and DWARF CFI records. + for (slice.items(.first_atom_index)) |first_atom_index| { + var atom_index = first_atom_index orelse continue; + + while (true) { + const atom = macho_file.getAtom(atom_index); + const sym = macho_file.getSymbol(atom.getSymbolWithLoc()); + + if (atom.getFile() != null) { + // Update each symbol contained within the atom + var it = Atom.getInnerSymbolsIterator(macho_file, atom_index); + while (it.next()) |sym_loc| { + const inner_sym = macho_file.getSymbolPtr(sym_loc); + inner_sym.n_value = sym.n_value + Atom.calcInnerSymbolOffset( + macho_file, + atom_index, + sym_loc.sym_index, + ); + } + + // If there is a section alias, update it now too + if (Atom.getSectionAlias(macho_file, atom_index)) |sym_loc| { + const alias = macho_file.getSymbolPtr(sym_loc); + alias.n_value = sym.n_value; + } + } + + if (atom.next_index) |next_index| { + atom_index = next_index; + } else break; + } + } + + if (macho_file.got_section_index) |sect_id| { + const header = &macho_file.sections.items(.header)[sect_id]; + header.size = macho_file.got_table.count() * @sizeOf(u64); + header.@"align" = 3; + } + + if (macho_file.tlv_ptr_section_index) |sect_id| { + const header = &macho_file.sections.items(.header)[sect_id]; + header.size = macho_file.tlv_ptr_table.count() * @sizeOf(u64); + header.@"align" = 3; + } + + const cpu_arch = macho_file.base.options.target.cpu.arch; + + if (macho_file.stubs_section_index) |sect_id| { + const header = &macho_file.sections.items(.header)[sect_id]; + header.size = macho_file.stub_table.count() * stubs.stubSize(cpu_arch); + header.@"align" = stubs.stubAlignment(cpu_arch); + } + + if (macho_file.stub_helper_section_index) |sect_id| { + const header = &macho_file.sections.items(.header)[sect_id]; + header.size = macho_file.stub_table.count() * stubs.stubHelperSize(cpu_arch) + + stubs.stubHelperPreambleSize(cpu_arch); + header.@"align" = stubs.stubAlignment(cpu_arch); + } + + if (macho_file.la_symbol_ptr_section_index) |sect_id| { + const header = &macho_file.sections.items(.header)[sect_id]; + header.size = macho_file.stub_table.count() * @sizeOf(u64); + header.@"align" = 3; + } +} + +fn allocateSegments(macho_file: *MachO) !void { + const gpa = macho_file.base.allocator; + for (macho_file.segments.items, 0..) |*segment, segment_index| { + const is_text_segment = mem.eql(u8, segment.segName(), "__TEXT"); + const base_size = if (is_text_segment) try load_commands.calcMinHeaderPad(gpa, macho_file.base.options, .{ + .segments = macho_file.segments.items, + .dylibs = macho_file.dylibs.items, + .referenced_dylibs = macho_file.referenced_dylibs.keys(), + }) else 0; + try allocateSegment(macho_file, @as(u8, @intCast(segment_index)), base_size); + } +} + +fn getSegmentAllocBase(macho_file: *MachO, segment_index: u8) struct { vmaddr: u64, fileoff: u64 } { + if (segment_index > 0) { + const prev_segment = macho_file.segments.items[segment_index - 1]; + return .{ + .vmaddr = prev_segment.vmaddr + prev_segment.vmsize, + .fileoff = prev_segment.fileoff + prev_segment.filesize, + }; + } + return .{ .vmaddr = 0, .fileoff = 0 }; +} + +fn allocateSegment(macho_file: *MachO, segment_index: u8, init_size: u64) !void { + const segment = &macho_file.segments.items[segment_index]; + + if (mem.eql(u8, segment.segName(), "__PAGEZERO")) return; // allocated upon creation + + const base = getSegmentAllocBase(macho_file, segment_index); + segment.vmaddr = base.vmaddr; + segment.fileoff = base.fileoff; + segment.filesize = init_size; + segment.vmsize = init_size; + + // Allocate the sections according to their alignment at the beginning of the segment. + const indexes = macho_file.getSectionIndexes(segment_index); + var start = init_size; + + const slice = macho_file.sections.slice(); + for (slice.items(.header)[indexes.start..indexes.end], 0..) |*header, sect_id| { + const alignment = try math.powi(u32, 2, header.@"align"); + const start_aligned = mem.alignForward(u64, start, alignment); + const n_sect = @as(u8, @intCast(indexes.start + sect_id + 1)); + + header.offset = if (header.isZerofill()) + 0 + else + @as(u32, @intCast(segment.fileoff + start_aligned)); + header.addr = segment.vmaddr + start_aligned; + + if (slice.items(.first_atom_index)[indexes.start + sect_id]) |first_atom_index| { + var atom_index = first_atom_index; + + log.debug("allocating local symbols in sect({d}, '{s},{s}')", .{ + n_sect, + header.segName(), + header.sectName(), + }); + + while (true) { + const atom = macho_file.getAtom(atom_index); + const sym = macho_file.getSymbolPtr(atom.getSymbolWithLoc()); + sym.n_value += header.addr; + sym.n_sect = n_sect; + + log.debug(" ATOM(%{d}, '{s}') @{x}", .{ + atom.sym_index, + macho_file.getSymbolName(atom.getSymbolWithLoc()), + sym.n_value, + }); + + if (atom.getFile() != null) { + // Update each symbol contained within the atom + var it = Atom.getInnerSymbolsIterator(macho_file, atom_index); + while (it.next()) |sym_loc| { + const inner_sym = macho_file.getSymbolPtr(sym_loc); + inner_sym.n_value = sym.n_value + Atom.calcInnerSymbolOffset( + macho_file, + atom_index, + sym_loc.sym_index, + ); + inner_sym.n_sect = n_sect; + } + + // If there is a section alias, update it now too + if (Atom.getSectionAlias(macho_file, atom_index)) |sym_loc| { + const alias = macho_file.getSymbolPtr(sym_loc); + alias.n_value = sym.n_value; + alias.n_sect = n_sect; + } + } + + if (atom.next_index) |next_index| { + atom_index = next_index; + } else break; + } + } + + start = start_aligned + header.size; + + if (!header.isZerofill()) { + segment.filesize = start; + } + segment.vmsize = start; + } + + const page_size = MachO.getPageSize(macho_file.base.options.target.cpu.arch); + segment.filesize = mem.alignForward(u64, segment.filesize, page_size); + segment.vmsize = mem.alignForward(u64, segment.vmsize, page_size); +} + +const std = @import("std"); +const build_options = @import("build_options"); +const assert = std.debug.assert; +const dwarf = std.dwarf; +const fs = std.fs; +const log = std.log.scoped(.link); +const macho = std.macho; +const math = std.math; +const mem = std.mem; + +const aarch64 = @import("../../arch/aarch64/bits.zig"); +const calcUuid = @import("uuid.zig").calcUuid; +const dead_strip = @import("dead_strip.zig"); +const eh_frame = @import("eh_frame.zig"); +const fat = @import("fat.zig"); +const link = @import("../../link.zig"); +const load_commands = @import("load_commands.zig"); +const stubs = @import("stubs.zig"); +const thunks = @import("thunks.zig"); +const trace = @import("../../tracy.zig").trace; + +const Allocator = mem.Allocator; +const Archive = @import("Archive.zig"); +const Atom = @import("Atom.zig"); +const Cache = std.Build.Cache; +const CodeSignature = @import("CodeSignature.zig"); +const Compilation = @import("../../Compilation.zig"); +const Dylib = @import("Dylib.zig"); +const MachO = @import("../MachO.zig"); +const Md5 = std.crypto.hash.Md5; +const LibStub = @import("../tapi.zig").LibStub; +const Object = @import("Object.zig"); +const Section = MachO.Section; +const StringTable = @import("../strtab.zig").StringTable; +const SymbolWithLoc = MachO.SymbolWithLoc; +const TableSection = @import("../table_section.zig").TableSection; +const Trie = @import("Trie.zig"); +const UnwindInfo = @import("UnwindInfo.zig"); From 9d62ebc0ce2c12e991f3016e71a8569e262c26c6 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sun, 27 Aug 2023 11:07:07 +0200 Subject: [PATCH 31/57] macho: fix compilation issues --- src/link/MachO.zig | 29 ++++++++++++++--------------- src/link/MachO/dead_strip.zig | 13 +++++++------ src/link/MachO/zld.zig | 25 ++++++++++++++----------- 3 files changed, 35 insertions(+), 32 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index ac7b4af988..d662266158 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -1455,7 +1455,7 @@ pub fn createTentativeDefAtoms(self: *MachO) !void { } } -fn createDyldPrivateAtom(self: *MachO) !void { +pub fn createDyldPrivateAtom(self: *MachO) !void { if (self.dyld_private_atom_index != null) return; const sym_index = try self.allocateSymbol(); @@ -2015,7 +2015,7 @@ fn growAtom(self: *MachO, atom_index: Atom.Index, new_atom_size: u64, alignment: return self.allocateAtom(atom_index, new_atom_size, alignment); } -fn allocateSymbol(self: *MachO) !u32 { +pub fn allocateSymbol(self: *MachO) !u32 { try self.locals.ensureUnusedCapacity(self.base.allocator, 1); const index = blk: { @@ -2104,7 +2104,7 @@ pub fn addStubEntry(self: *MachO, target: SymbolWithLoc) !void { pub fn addTlvPtrEntry(self: *MachO, target: SymbolWithLoc) !void { if (self.tlv_ptr_table.lookup.contains(target)) return; - _ = try self.tlv_ptr_table.allocateEntry(self.gpa, target); + _ = try self.tlv_ptr_table.allocateEntry(self.base.allocator, target); if (self.tlv_ptr_section_index == null) { self.tlv_ptr_section_index = try self.initSection("__DATA", "__thread_ptrs", .{ .flags = macho.S_THREAD_LOCAL_VARIABLE_POINTERS, @@ -3490,7 +3490,7 @@ fn collectRebaseData(self: *MachO, rebase: *Rebase) !void { const offset = @as(u64, @intCast(base_offset + rel_offset)); log.debug(" | rebase at {x}", .{offset}); - try rebase.entries.append(self.gpa, .{ + try rebase.entries.append(gpa, .{ .offset = offset, .segment_id = segment_id, }); @@ -3656,7 +3656,7 @@ fn collectBindData(self: *MachO, bind: anytype, raw_bindings: anytype) !void { if (bind_sym.weakRef()) { log.debug(" | marking as weak ref ", .{}); } - try bind.entries.append(self.gpa, .{ + try bind.entries.append(gpa, .{ .target = global, .offset = offset, .segment_id = segment_id, @@ -4004,8 +4004,8 @@ fn writeSymtab(self: *MachO) !SymtabCtx { var locals = std.ArrayList(macho.nlist_64).init(gpa); defer locals.deinit(); - for (0..self.locals.items) |sym_id| { - try self.addLocalToSymtab(.{ .sym_index = @intCast(sym_id) }); + for (0..self.locals.items.len) |sym_id| { + try self.addLocalToSymtab(.{ .sym_index = @intCast(sym_id) }, &locals); } for (self.objects.items) |object| { @@ -4611,7 +4611,7 @@ pub fn makeStaticString(bytes: []const u8) [16]u8 { return buf; } -fn getSegmentByName(self: MachO, segname: []const u8) ?u8 { +pub fn getSegmentByName(self: MachO, segname: []const u8) ?u8 { for (self.segments.items, 0..) |seg, i| { if (mem.eql(u8, segname, seg.segName())) return @as(u8, @intCast(i)); } else return null; @@ -5024,7 +5024,7 @@ pub fn logSymtab(self: *MachO) void { scoped_log.debug("{}", .{self.tlv_ptr_table}); scoped_log.debug("stubs entries:", .{}); - scoped_log.debug("{}", .{self.stubs_table}); + scoped_log.debug("{}", .{self.stub_table}); scoped_log.debug("thunks:", .{}); for (self.thunks.items, 0..) |thunk, i| { @@ -5151,7 +5151,7 @@ const Cache = std.Build.Cache; const CodeSignature = @import("MachO/CodeSignature.zig"); const Compilation = @import("../Compilation.zig"); const Dwarf = File.Dwarf; -const DwarfInfo = @import("DwarfInfo.zig"); +const DwarfInfo = @import("MachO/DwarfInfo.zig"); const Dylib = @import("MachO/Dylib.zig"); const File = link.File; const Object = @import("MachO/Object.zig"); @@ -5170,10 +5170,9 @@ const TypedValue = @import("../TypedValue.zig"); const Value = @import("../value.zig").Value; pub const DebugSymbols = @import("MachO/DebugSymbols.zig"); - -const Bind = @import("MachO/dyld_info/bind.zig").Bind(*const MachO, SymbolWithLoc); -const LazyBind = @import("MachO/dyld_info/bind.zig").LazyBind(*const MachO, SymbolWithLoc); -const Rebase = @import("MachO/dyld_info/Rebase.zig"); +pub const Bind = @import("MachO/dyld_info/bind.zig").Bind(*const MachO, SymbolWithLoc); +pub const LazyBind = @import("MachO/dyld_info/bind.zig").LazyBind(*const MachO, SymbolWithLoc); +pub const Rebase = @import("MachO/dyld_info/Rebase.zig"); pub const base_tag: File.Tag = File.Tag.macho; pub const N_DEAD: u16 = @as(u16, @bitCast(@as(i16, -1))); @@ -5257,7 +5256,7 @@ const UnnamedConstTable = std.AutoArrayHashMapUnmanaged(Module.Decl.Index, std.A const RebaseTable = std.AutoArrayHashMapUnmanaged(Atom.Index, std.ArrayListUnmanaged(u32)); const RelocationTable = std.AutoArrayHashMapUnmanaged(Atom.Index, std.ArrayListUnmanaged(Relocation)); -const ResolveAction = struct { +pub const ResolveAction = struct { kind: Kind, target: SymbolWithLoc, diff --git a/src/link/MachO/dead_strip.zig b/src/link/MachO/dead_strip.zig index 5e99ad2270..26053cb83d 100644 --- a/src/link/MachO/dead_strip.zig +++ b/src/link/MachO/dead_strip.zig @@ -36,11 +36,12 @@ fn collectRoots(macho_file: *MachO, roots: *AtomTable) !void { switch (macho_file.base.options.output_mode) { .Exe => { // Add entrypoint as GC root - const global: SymbolWithLoc = macho_file.getEntryPoint(); - if (global.getFile()) |file| { - try addRoot(macho_file, roots, file, global); - } else { - assert(macho_file.getSymbol(global).undf()); // Stub as our entrypoint is in a dylib. + if (macho_file.getEntryPoint()) |global| { + if (global.getFile()) |file| { + try addRoot(macho_file, roots, file, global); + } else { + assert(macho_file.getSymbol(global).undf()); // Stub as our entrypoint is in a dylib. + } } }, else => |other| { @@ -116,7 +117,7 @@ fn markLive(macho_file: *MachO, atom_index: Atom.Index, alive: *AtomTable) void alive.putAssumeCapacityNoClobber(atom_index, {}); - const cpu_arch = macho_file.options.target.cpu.arch; + const cpu_arch = macho_file.base.options.target.cpu.arch; const sym = macho_file.getSymbol(atom.getSymbolWithLoc()); const header = macho_file.sections.items(.header)[sym.n_sect - 1]; diff --git a/src/link/MachO/zld.zig b/src/link/MachO/zld.zig index 86bf14bdb2..a28df3c4e7 100644 --- a/src/link/MachO/zld.zig +++ b/src/link/MachO/zld.zig @@ -401,7 +401,7 @@ pub fn linkWithZld( try macho_file.createDyldPrivateAtom(); try macho_file.createTentativeDefAtoms(); - if (macho_file.options.output_mode == .Exe) { + if (macho_file.base.options.output_mode == .Exe) { const global = macho_file.getEntryPoint().?; if (macho_file.getSymbol(global).undf()) { // We do one additional check here in case the entry point was found in one of the dylibs. @@ -429,7 +429,7 @@ pub fn linkWithZld( if (macho_file.dyld_stub_binder_index) |index| try macho_file.addGotEntry(macho_file.globals.items[index]); - try macho_file.calcSectionSizes(); + try calcSectionSizes(macho_file); var unwind_info = UnwindInfo{ .gpa = gpa }; defer unwind_info.deinit(); @@ -461,9 +461,9 @@ pub fn linkWithZld( try writeLaSymbolPtrs(macho_file); } if (macho_file.got_section_index) |sect_id| - try macho_file.writePointerEntries(sect_id, &macho_file.got_table); + try writePointerEntries(macho_file, sect_id, &macho_file.got_table); if (macho_file.tlv_ptr_section_index) |sect_id| - try macho_file.writePointerEntries(sect_id, &macho_file.tlv_ptr_table); + try writePointerEntries(macho_file, sect_id, &macho_file.tlv_ptr_table); try eh_frame.write(macho_file, &unwind_info); try unwind_info.write(macho_file); @@ -546,11 +546,11 @@ pub fn linkWithZld( else => {}, } - try load_commands.writeRpathLCs(gpa, macho_file.base.options, lc_writer); + try load_commands.writeRpathLCs(gpa, &macho_file.base.options, lc_writer); try lc_writer.writeStruct(macho.source_version_command{ .version = 0, }); - try load_commands.writeBuildVersionLC(macho_file.base.options, lc_writer); + try load_commands.writeBuildVersionLC(&macho_file.base.options, lc_writer); const uuid_cmd_offset = @sizeOf(macho.mach_header_64) + @as(u32, @intCast(lc_buffer.items.len)); try lc_writer.writeStruct(macho_file.uuid_cmd); @@ -1053,11 +1053,14 @@ fn allocateSegments(macho_file: *MachO) !void { const gpa = macho_file.base.allocator; for (macho_file.segments.items, 0..) |*segment, segment_index| { const is_text_segment = mem.eql(u8, segment.segName(), "__TEXT"); - const base_size = if (is_text_segment) try load_commands.calcMinHeaderPad(gpa, macho_file.base.options, .{ - .segments = macho_file.segments.items, - .dylibs = macho_file.dylibs.items, - .referenced_dylibs = macho_file.referenced_dylibs.keys(), - }) else 0; + const base_size = if (is_text_segment) + try load_commands.calcMinHeaderPad(gpa, &macho_file.base.options, .{ + .segments = macho_file.segments.items, + .dylibs = macho_file.dylibs.items, + .referenced_dylibs = macho_file.referenced_dylibs.keys(), + }) + else + 0; try allocateSegment(macho_file, @as(u8, @intCast(segment_index)), base_size); } } From 5eb6c81d07d2df43f6df5e0a421b93dac10f8fed Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sun, 27 Aug 2023 22:07:39 +0200 Subject: [PATCH 32/57] macho: fix invalid alignment of stubs for x86_64 --- src/link/MachO/stubs.zig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/link/MachO/stubs.zig b/src/link/MachO/stubs.zig index 5478dd15f8..48878a41ab 100644 --- a/src/link/MachO/stubs.zig +++ b/src/link/MachO/stubs.zig @@ -24,7 +24,7 @@ pub inline fn stubSize(cpu_arch: std.Target.Cpu.Arch) u8 { pub inline fn stubAlignment(cpu_arch: std.Target.Cpu.Arch) u8 { return switch (cpu_arch) { - .x86_64 => 0, + .x86_64 => 1, .aarch64 => 2, else => unreachable, // unhandled architecture type }; From 0353bfd55ed67fa5c1c1d0e3004fcbae7f139b92 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sun, 27 Aug 2023 22:42:04 +0200 Subject: [PATCH 33/57] macho: fix a few regressions in incremental codepath --- src/link/MachO.zig | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index d662266158..a49999d210 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -1650,12 +1650,16 @@ fn resolveGlobalSymbol(self: *MachO, current: SymbolWithLoc) !void { const global_is_weak = global_sym.sect() and (global_sym.weakDef() or global_sym.pext()); if (sym_is_strong and global_is_strong) { - log.err("symbol '{s}' defined multiple times", .{sym_name}); - if (global.getFile()) |file| { - log.err(" first definition in '{s}'", .{self.objects.items[file].name}); - } - if (current.getFile()) |file| { - log.err(" next definition in '{s}'", .{self.objects.items[file].name}); + // TODO redo this logic with corresponding logic in updateDeclExports to avoid this + // ugly check. + if (self.mode == .zld) { + log.err("symbol '{s}' defined multiple times", .{sym_name}); + if (global.getFile()) |file| { + log.err(" first definition in '{s}'", .{self.objects.items[file].name}); + } + if (current.getFile()) |file| { + log.err(" next definition in '{s}'", .{self.objects.items[file].name}); + } } return error.MultipleSymbolDefinitions; } @@ -3079,6 +3083,7 @@ fn allocateSection(self: *MachO, segname: []const u8, sectname: []const u8, opts section.offset = mem.alignForward(u32, @as(u32, @intCast(off)), opts.alignment); section.size = opts.size; section.@"align" = math.log2(opts.alignment); + self.sections.items(.segment_index)[sect_id] = segment_id; assert(!section.isZerofill()); // TODO zerofill sections return sect_id; @@ -4053,8 +4058,8 @@ fn writeSymtab(self: *MachO) !SymtabCtx { // We generate stabs last in order to ensure that the strtab always has debug info // strings trailing if (!self.base.options.strip) { - assert(self.d_sym == null); // TODO for (self.objects.items) |object| { + assert(self.d_sym == null); // TODO try self.generateSymbolStabs(object, &locals); } } From 700b1e38ceeb66416a327e4f31969b1e52ba55ef Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sun, 27 Aug 2023 23:04:27 +0200 Subject: [PATCH 34/57] macho: fix overalignment of stubs on aarch64 --- src/link/MachO.zig | 6 +----- src/link/MachO/stubs.zig | 2 +- src/link/MachO/zld.zig | 4 ++-- 3 files changed, 4 insertions(+), 8 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index a49999d210..623c8d36f7 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -2912,11 +2912,7 @@ fn populateMissingMetadata(self: *MachO) !void { if (self.stub_helper_section_index == null) { self.stub_helper_section_index = try self.allocateSection("__TEXT3", "__stub_helper", .{ .size = @sizeOf(u32), - .alignment = switch (cpu_arch) { - .x86_64 => 1, - .aarch64 => @sizeOf(u32), - else => unreachable, // unhandled architecture type - }, + .alignment = stubs.stubAlignment(cpu_arch), .flags = macho.S_REGULAR | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS, .prot = macho.PROT.READ | macho.PROT.EXEC, }); diff --git a/src/link/MachO/stubs.zig b/src/link/MachO/stubs.zig index 48878a41ab..54502d1a20 100644 --- a/src/link/MachO/stubs.zig +++ b/src/link/MachO/stubs.zig @@ -25,7 +25,7 @@ pub inline fn stubSize(cpu_arch: std.Target.Cpu.Arch) u8 { pub inline fn stubAlignment(cpu_arch: std.Target.Cpu.Arch) u8 { return switch (cpu_arch) { .x86_64 => 1, - .aarch64 => 2, + .aarch64 => 4, else => unreachable, // unhandled architecture type }; } diff --git a/src/link/MachO/zld.zig b/src/link/MachO/zld.zig index a28df3c4e7..eb788f0fc2 100644 --- a/src/link/MachO/zld.zig +++ b/src/link/MachO/zld.zig @@ -1032,14 +1032,14 @@ fn calcSectionSizes(macho_file: *MachO) !void { if (macho_file.stubs_section_index) |sect_id| { const header = &macho_file.sections.items(.header)[sect_id]; header.size = macho_file.stub_table.count() * stubs.stubSize(cpu_arch); - header.@"align" = stubs.stubAlignment(cpu_arch); + header.@"align" = math.log2(stubs.stubAlignment(cpu_arch)); } if (macho_file.stub_helper_section_index) |sect_id| { const header = &macho_file.sections.items(.header)[sect_id]; header.size = macho_file.stub_table.count() * stubs.stubHelperSize(cpu_arch) + stubs.stubHelperPreambleSize(cpu_arch); - header.@"align" = stubs.stubAlignment(cpu_arch); + header.@"align" = math.log2(stubs.stubAlignment(cpu_arch)); } if (macho_file.la_symbol_ptr_section_index) |sect_id| { From 8d1ca8ce7b21e6ae0b396a35f5fc69ac52560074 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Mon, 28 Aug 2023 00:00:27 +0200 Subject: [PATCH 35/57] macho: swap sectname with segname typo when allocating sections --- src/link/MachO.zig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 623c8d36f7..ed77334a12 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -3070,7 +3070,7 @@ fn allocateSection(self: *MachO, segname: []const u8, sectname: []const u8, opts .cmdsize = @sizeOf(macho.segment_command_64) + @sizeOf(macho.section_64), }; - const sect_id = try self.initSection(sectname, segname, .{ + const sect_id = try self.initSection(segname, sectname, .{ .flags = opts.flags, .reserved2 = opts.reserved2, }); From 8330065a99e057152a7c717066f66adbc622a04b Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Mon, 28 Aug 2023 07:00:45 +0200 Subject: [PATCH 36/57] macho: report undefined via compiler errors in incremental driver --- src/link/MachO.zig | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index ed77334a12..a66f471766 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -428,21 +428,12 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No var actions = std.ArrayList(ResolveAction).init(self.base.allocator); defer actions.deinit(); try self.resolveSymbols(&actions); + try self.reportUndefined(); if (self.getEntryPoint() == null) { self.error_flags.no_entry_point_found = true; } - if (self.unresolved.count() > 0) { - for (self.unresolved.keys()) |index| { - // TODO: convert into compiler errors. - const global = self.globals.items[index]; - const sym_name = self.getSymbolName(global); - log.err("undefined symbol reference '{s}'", .{sym_name}); - } - return error.UndefinedSymbolReference; - } - for (actions.items) |action| switch (action.kind) { .none => {}, .add_got => try self.addGotEntry(action.target), From 0f02a1fcb0ea82c98de509e7ba9b7b8768f0d4ee Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Mon, 28 Aug 2023 07:31:44 +0200 Subject: [PATCH 37/57] macho: fix 32bit compilation issues --- src/link/MachO.zig | 4 +--- src/link/MachO/zld.zig | 18 ++++++++++++------ 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index a66f471766..d00796c784 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -927,9 +927,7 @@ fn parseDylib( const self_cpu_arch = link_options.target.cpu.arch; const file_stat = try file.stat(); - var file_size = math.cast(usize, file_stat.size) orelse return error.Overflow; - - file_size -= offset; + const file_size = math.cast(usize, file_stat.size - offset) orelse return error.Overflow; const contents = try file.readToEndAllocOptions(gpa, file_size, file_size, @alignOf(u64), null); defer gpa.free(contents); diff --git a/src/link/MachO/zld.zig b/src/link/MachO/zld.zig index eb788f0fc2..b5e1eb21c8 100644 --- a/src/link/MachO/zld.zig +++ b/src/link/MachO/zld.zig @@ -703,7 +703,8 @@ fn writeAtoms(macho_file: *MachO) !void { log.debug(" (with padding {x})", .{padding_size}); } - const offset = this_sym.n_value - header.addr; + const offset = math.cast(usize, this_sym.n_value - header.addr) orelse + return error.Overflow; log.debug(" (at offset 0x{x})", .{offset}); const code = Atom.getAtomCode(macho_file, atom_index); @@ -749,7 +750,8 @@ fn writeThunks(macho_file: *MachO) !void { for (macho_file.thunks.items, 0..) |*thunk, i| { if (thunk.getSize() == 0) continue; - var buffer = try std.ArrayList(u8).initCapacity(gpa, thunk.getSize()); + const thunk_size = math.cast(usize, thunk.getSize()) orelse return error.Overflow; + var buffer = try std.ArrayList(u8).initCapacity(gpa, thunk_size); defer buffer.deinit(); try thunks.writeThunkCode(macho_file, thunk, buffer.writer()); const thunk_atom = macho_file.getAtom(thunk.getStartAtomIndex()); @@ -763,7 +765,8 @@ fn writeThunks(macho_file: *MachO) !void { fn writePointerEntries(macho_file: *MachO, sect_id: u8, table: anytype) !void { const gpa = macho_file.base.allocator; const header = macho_file.sections.items(.header)[sect_id]; - var buffer = try std.ArrayList(u8).initCapacity(gpa, header.size); + const capacity = math.cast(usize, header.size) orelse return error.Overflow; + var buffer = try std.ArrayList(u8).initCapacity(gpa, capacity); defer buffer.deinit(); for (table.entries.items) |entry| { const sym = macho_file.getSymbol(entry); @@ -779,7 +782,8 @@ fn writeStubs(macho_file: *MachO) !void { const stubs_header = macho_file.sections.items(.header)[macho_file.stubs_section_index.?]; const la_symbol_ptr_header = macho_file.sections.items(.header)[macho_file.la_symbol_ptr_section_index.?]; - var buffer = try std.ArrayList(u8).initCapacity(gpa, stubs_header.size); + const capacity = math.cast(usize, stubs_header.size) orelse return error.Overflow; + var buffer = try std.ArrayList(u8).initCapacity(gpa, capacity); defer buffer.deinit(); for (0..macho_file.stub_table.count()) |index| { @@ -799,7 +803,8 @@ fn writeStubHelpers(macho_file: *MachO) !void { const cpu_arch = macho_file.base.options.target.cpu.arch; const stub_helper_header = macho_file.sections.items(.header)[macho_file.stub_helper_section_index.?]; - var buffer = try std.ArrayList(u8).initCapacity(gpa, stub_helper_header.size); + const capacity = math.cast(usize, stub_helper_header.size) orelse return error.Overflow; + var buffer = try std.ArrayList(u8).initCapacity(gpa, capacity); defer buffer.deinit(); { @@ -842,7 +847,8 @@ fn writeLaSymbolPtrs(macho_file: *MachO) !void { const la_symbol_ptr_header = macho_file.sections.items(.header)[macho_file.la_symbol_ptr_section_index.?]; const stub_helper_header = macho_file.sections.items(.header)[macho_file.stub_helper_section_index.?]; - var buffer = try std.ArrayList(u8).initCapacity(gpa, la_symbol_ptr_header.size); + const capacity = math.cast(usize, la_symbol_ptr_header.size) orelse return error.Overflow; + var buffer = try std.ArrayList(u8).initCapacity(gpa, capacity); defer buffer.deinit(); for (0..macho_file.stub_table.count()) |index| { From 68dc1a3e3fc8ab739cb34ed536c71a02727b3825 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Mon, 28 Aug 2023 07:53:03 +0200 Subject: [PATCH 38/57] macho: report symbol collision as compiler error --- src/link/MachO.zig | 63 ++++++++++++++++++++++++++++++------------ src/link/MachO/zld.zig | 5 +++- 2 files changed, 50 insertions(+), 18 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index d00796c784..1bbe2057b7 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -428,11 +428,14 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No var actions = std.ArrayList(ResolveAction).init(self.base.allocator); defer actions.deinit(); try self.resolveSymbols(&actions); - try self.reportUndefined(); if (self.getEntryPoint() == null) { self.error_flags.no_entry_point_found = true; } + if (self.unresolved.count() > 0) { + try self.reportUndefined(); + return error.FlushFailure; + } for (actions.items) |action| switch (action.kind) { .none => {}, @@ -1642,13 +1645,7 @@ fn resolveGlobalSymbol(self: *MachO, current: SymbolWithLoc) !void { // TODO redo this logic with corresponding logic in updateDeclExports to avoid this // ugly check. if (self.mode == .zld) { - log.err("symbol '{s}' defined multiple times", .{sym_name}); - if (global.getFile()) |file| { - log.err(" first definition in '{s}'", .{self.objects.items[file].name}); - } - if (current.getFile()) |file| { - log.err(" next definition in '{s}'", .{self.objects.items[file].name}); - } + try self.reportSymbolCollision(global, current); } return error.MultipleSymbolDefinitions; } @@ -1714,7 +1711,13 @@ fn resolveSymbolsInObject(self: *MachO, object_id: u32) !void { continue; } - try self.resolveGlobalSymbol(.{ .sym_index = sym_index, .file = object_id + 1 }); + self.resolveGlobalSymbol(.{ + .sym_index = sym_index, + .file = object_id + 1, + }) catch |err| switch (err) { + error.MultipleSymbolDefinitions => return error.FlushFailure, + else => |e| return e, + }; } } @@ -4833,20 +4836,16 @@ pub fn getSectionPrecedence(header: macho.section_64) u8 { return (@as(u8, @intCast(segment_precedence)) << 4) + section_precedence; } -pub fn reportUndefined(self: *MachO) !void { - const count = self.unresolved.count(); - if (count == 0) return; - +pub fn reportUndefined(self: *MachO) error{OutOfMemory}!void { const gpa = self.base.allocator; - + const count = self.unresolved.count(); try self.misc_errors.ensureUnusedCapacity(gpa, count); for (self.unresolved.keys()) |global_index| { const global = self.globals.items[global_index]; const sym_name = self.getSymbolName(global); - const nnotes: usize = if (global.getFile() == null) @as(usize, 0) else 1; - var notes = try std.ArrayList(File.ErrorMsg).initCapacity(gpa, nnotes); + var notes = try std.ArrayList(File.ErrorMsg).initCapacity(gpa, 1); defer notes.deinit(); if (global.getFile()) |file| { @@ -4863,8 +4862,38 @@ pub fn reportUndefined(self: *MachO) !void { self.misc_errors.appendAssumeCapacity(err_msg); } +} - return error.FlushFailure; +fn reportSymbolCollision( + self: *MachO, + first: SymbolWithLoc, + other: SymbolWithLoc, +) error{OutOfMemory}!void { + const gpa = self.base.allocator; + try self.misc_errors.ensureUnusedCapacity(gpa, 1); + + var notes = try std.ArrayList(File.ErrorMsg).initCapacity(gpa, 2); + defer notes.deinit(); + + if (first.getFile()) |file| { + const note = try std.fmt.allocPrint(gpa, "first definition in {s}", .{ + self.objects.items[file].name, + }); + notes.appendAssumeCapacity(.{ .msg = note }); + } + if (other.getFile()) |file| { + const note = try std.fmt.allocPrint(gpa, "next definition in {s}", .{ + self.objects.items[file].name, + }); + notes.appendAssumeCapacity(.{ .msg = note }); + } + + var err_msg = File.ErrorMsg{ .msg = try std.fmt.allocPrint(gpa, "symbol {s} defined multiple times", .{ + self.getSymbolName(first), + }) }; + err_msg.notes = try notes.toOwnedSlice(); + + self.misc_errors.appendAssumeCapacity(err_msg); } /// Binary search diff --git a/src/link/MachO/zld.zig b/src/link/MachO/zld.zig index b5e1eb21c8..152c276ddd 100644 --- a/src/link/MachO/zld.zig +++ b/src/link/MachO/zld.zig @@ -388,7 +388,10 @@ pub fn linkWithZld( var actions = std.ArrayList(MachO.ResolveAction).init(gpa); defer actions.deinit(); try macho_file.resolveSymbols(&actions); - try macho_file.reportUndefined(); + if (macho_file.unresolved.count() > 0) { + try macho_file.reportUndefined(); + return error.FlushFailure; + } for (macho_file.objects.items, 0..) |*object, object_id| { try object.splitIntoAtoms(macho_file, @as(u32, @intCast(object_id))); From 1820aed786a2bb61a6526873e7a8ddf47d45e9fd Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Mon, 28 Aug 2023 16:19:42 +0200 Subject: [PATCH 39/57] macho: convert log.err when CPU arch is mismatched into actual errors --- src/link/MachO.zig | 164 +++++++++++++++++++---------------------- src/link/MachO/zld.zig | 53 ++++++++++--- 2 files changed, 118 insertions(+), 99 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 1bbe2057b7..73ef01c4e4 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -396,11 +396,17 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No self.dylibs_map.clearRetainingCapacity(); self.referenced_dylibs.clearRetainingCapacity(); + const cpu_arch = self.base.options.target.cpu.arch; var dependent_libs = std.fifo.LinearFifo(struct { id: Dylib.Id, parent: u16, }, .Dynamic).init(arena); + var parse_error_ctx: union { + none: void, + detected_arch: std.Target.Cpu.Arch, + } = .{ .none = {} }; + for (libs.keys(), libs.values()) |path, lib| { const in_file = try std.fs.cwd().openFile(path, .{}); defer in_file.close(); @@ -411,15 +417,28 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No lib, false, &dependent_libs, - &self.base.options, - ) catch |err| { - // TODO convert to error - log.err("{s}: parsing library failed with err {s}", .{ path, @errorName(err) }); - continue; + &parse_error_ctx, + ) catch |err| switch (err) { + error.UnknownFileType => try self.reportParseError(path, "unknown file type", .{}), + error.MissingArchFatLib => try self.reportParseError( + path, + "missing architecture in universal file, expected '{s}'", + .{@tagName(cpu_arch)}, + ), + error.InvalidArch => try self.reportParseError( + path, + "invalid architecture '{s}', expected '{s}'", + .{ @tagName(parse_error_ctx.detected_arch), @tagName(cpu_arch) }, + ), + else => |e| try self.reportParseError( + path, + "parsing library failed with error '{s}'", + .{@errorName(e)}, + ), }; } - self.parseDependentLibs(&dependent_libs, &self.base.options) catch |err| { + self.parseDependentLibs(&dependent_libs, &parse_error_ctx) catch |err| { // TODO convert to error log.err("parsing dependent libraries failed with err {s}", .{@errorName(err)}); }; @@ -710,19 +729,19 @@ pub fn parsePositional( path: []const u8, must_link: bool, dependent_libs: anytype, - link_options: *const link.Options, + error_ctx: anytype, ) !void { const tracy = trace(@src()); defer tracy.end(); if (Object.isObject(file)) { - try self.parseObject(file, path, link_options); + try self.parseObject(file, path, error_ctx); } else { try self.parseLibrary(file, path, .{ .path = null, .needed = false, .weak = false, - }, must_link, dependent_libs, link_options); + }, must_link, dependent_libs, error_ctx); } } @@ -730,7 +749,7 @@ fn parseObject( self: *MachO, file: std.fs.File, path: []const u8, - link_options: *const link.Options, + error_ctx: anytype, ) !void { const tracy = trace(@src()); defer tracy.end(); @@ -758,15 +777,11 @@ fn parseObject( macho.CPU_TYPE_X86_64 => .x86_64, else => unreachable, }; - const self_cpu_arch = link_options.target.cpu.arch; + const self_cpu_arch = self.base.options.target.cpu.arch; if (self_cpu_arch != cpu_arch) { - // TODO convert into an error - log.err("{s}: invalid architecture '{s}', expected '{s}'", .{ - path, - @tagName(cpu_arch), - @tagName(self_cpu_arch), - }); + error_ctx.* = .{ .detected_arch = cpu_arch }; + return error.InvalidArch; } } @@ -777,70 +792,50 @@ pub fn parseLibrary( lib: link.SystemLib, must_link: bool, dependent_libs: anytype, - link_options: *const link.Options, + error_ctx: anytype, ) !void { const tracy = trace(@src()); defer tracy.end(); - const cpu_arch = link_options.target.cpu.arch; + const cpu_arch = self.base.options.target.cpu.arch; if (fat.isFatLibrary(file)) { - const offset = self.parseFatLibrary(file, path, cpu_arch) catch |err| switch (err) { - error.MissingArch => return, - else => |e| return e, - }; + const offset = try self.parseFatLibrary(file, cpu_arch); try file.seekTo(offset); if (Archive.isArchive(file, offset)) { - try self.parseArchive(path, offset, must_link, cpu_arch); + try self.parseArchive(path, offset, must_link, cpu_arch, error_ctx); } else if (Dylib.isDylib(file, offset)) { - try self.parseDylib(file, path, offset, dependent_libs, link_options, .{ + try self.parseDylib(file, path, offset, dependent_libs, .{ .needed = lib.needed, .weak = lib.weak, - }); - } else { - // TODO convert into an error - log.err("{s}: unknown file type", .{path}); - return; - } + }, error_ctx); + } else return error.UnknownFileType; } else if (Archive.isArchive(file, 0)) { - try self.parseArchive(path, 0, must_link, cpu_arch); + try self.parseArchive(path, 0, must_link, cpu_arch, error_ctx); } else if (Dylib.isDylib(file, 0)) { - try self.parseDylib(file, path, 0, dependent_libs, link_options, .{ + try self.parseDylib(file, path, 0, dependent_libs, .{ .needed = lib.needed, .weak = lib.weak, - }); + }, error_ctx); } else { - self.parseLibStub(file, path, dependent_libs, link_options, .{ + self.parseLibStub(file, path, dependent_libs, .{ .needed = lib.needed, .weak = lib.weak, }) catch |err| switch (err) { - error.NotLibStub, error.UnexpectedToken => { - // TODO convert into an error - log.err("{s}: unknown file type", .{path}); - return; - }, + error.NotLibStub, error.UnexpectedToken => return error.UnknownFileType, else => |e| return e, }; } } -pub fn parseFatLibrary( - self: *MachO, - file: std.fs.File, - path: []const u8, - cpu_arch: std.Target.Cpu.Arch, -) !u64 { +pub fn parseFatLibrary(self: *MachO, file: std.fs.File, cpu_arch: std.Target.Cpu.Arch) !u64 { _ = self; var buffer: [2]fat.Arch = undefined; const fat_archs = try fat.parseArchs(file, &buffer); const offset = for (fat_archs) |arch| { if (arch.tag == cpu_arch) break arch.offset; - } else { - // TODO convert into an error - log.err("{s}: missing arch in universal file: expected {s}", .{ path, @tagName(cpu_arch) }); - return error.MissingArch; - }; + } else return error.MissingArchFatLib; return offset; } @@ -850,13 +845,13 @@ fn parseArchive( fat_offset: u64, must_link: bool, cpu_arch: std.Target.Cpu.Arch, + error_ctx: anytype, ) !void { const gpa = self.base.allocator; // We take ownership of the file so that we can store it for the duration of symbol resolution. // TODO we shouldn't need to do that and could pre-parse the archive like we do for zld/ELF? const file = try std.fs.cwd().openFile(path, .{}); - errdefer file.close(); try file.seekTo(fat_offset); var archive = Archive{ @@ -882,13 +877,8 @@ fn parseArchive( else => unreachable, }; if (cpu_arch != parsed_cpu_arch) { - // TODO convert into an error - log.err("{s}: invalid architecture in archive '{s}', expected '{s}'", .{ - path, - @tagName(parsed_cpu_arch), - @tagName(cpu_arch), - }); - return error.MissingArch; + error_ctx.* = .{ .detected_arch = parsed_cpu_arch }; + return error.InvalidArch; } } @@ -923,11 +913,11 @@ fn parseDylib( path: []const u8, offset: u64, dependent_libs: anytype, - link_options: *const link.Options, dylib_options: DylibOpts, + error_ctx: anytype, ) !void { const gpa = self.base.allocator; - const self_cpu_arch = link_options.target.cpu.arch; + const self_cpu_arch = self.base.options.target.cpu.arch; const file_stat = try file.stat(); const file_size = math.cast(usize, file_stat.size - offset) orelse return error.Overflow; @@ -952,18 +942,13 @@ fn parseDylib( else => unreachable, }; if (self_cpu_arch != cpu_arch) { - // TODO convert into an error - log.err("{s}: invalid architecture '{s}', expected '{s}'", .{ - path, - @tagName(cpu_arch), - @tagName(self_cpu_arch), - }); - return error.MissingArch; + error_ctx.* = .{ .detected_arch = cpu_arch }; + return error.InvalidArch; } // TODO verify platform - self.addDylib(dylib, link_options, .{ + self.addDylib(dylib, .{ .needed = dylib_options.needed, .weak = dylib_options.weak, }) catch |err| switch (err) { @@ -977,7 +962,6 @@ fn parseLibStub( file: std.fs.File, path: []const u8, dependent_libs: anytype, - link_options: *const link.Options, dylib_options: DylibOpts, ) !void { const gpa = self.base.allocator; @@ -993,14 +977,14 @@ fn parseLibStub( try dylib.parseFromStub( gpa, - link_options.target, + self.base.options.target, lib_stub, @intCast(self.dylibs.items.len), // TODO defer it till later dependent_libs, path, ); - self.addDylib(dylib, link_options, .{ + self.addDylib(dylib, .{ .needed = dylib_options.needed, .weak = dylib_options.weak, }) catch |err| switch (err) { @@ -1009,12 +993,7 @@ fn parseLibStub( }; } -fn addDylib( - self: *MachO, - dylib: Dylib, - link_options: *const link.Options, - dylib_options: DylibOpts, -) !void { +fn addDylib(self: *MachO, dylib: Dylib, dylib_options: DylibOpts) !void { if (dylib_options.id) |id| { if (dylib.id.?.current_version < id.compatibility_version) { // TODO convert into an error @@ -1034,7 +1013,7 @@ fn addDylib( try self.dylibs.append(gpa, dylib); const should_link_dylib_even_if_unreachable = blk: { - if (link_options.dead_strip_dylibs and !dylib_options.needed) break :blk false; + if (self.base.options.dead_strip_dylibs and !dylib_options.needed) break :blk false; break :blk !(dylib_options.dependent or self.referenced_dylibs.contains(gop.value_ptr.*)); }; @@ -1043,7 +1022,7 @@ fn addDylib( } } -pub fn parseDependentLibs(self: *MachO, dependent_libs: anytype, link_options: *const link.Options) !void { +pub fn parseDependentLibs(self: *MachO, dependent_libs: anytype, error_ctx: anytype) !void { const tracy = trace(@src()); defer tracy.end(); @@ -1075,7 +1054,7 @@ pub fn parseDependentLibs(self: *MachO, dependent_libs: anytype, link_options: * for (&[_][]const u8{ extension, ".tbd" }) |ext| { const with_ext = try std.fmt.allocPrint(arena, "{s}{s}", .{ without_ext, ext }); - const full_path = if (link_options.sysroot) |root| + const full_path = if (self.base.options.sysroot) |root| try fs.path.join(arena, &.{ root, with_ext }) else with_ext; @@ -1089,21 +1068,18 @@ pub fn parseDependentLibs(self: *MachO, dependent_libs: anytype, link_options: * log.debug("trying dependency at fully resolved path {s}", .{full_path}); const offset: u64 = if (fat.isFatLibrary(file)) blk: { - const offset = self.parseFatLibrary(file, full_path, link_options.target.cpu.arch) catch |err| switch (err) { - error.MissingArch => break, - else => |e| return e, - }; + const offset = try self.parseFatLibrary(file, self.base.options.target.cpu.arch); try file.seekTo(offset); break :blk offset; } else 0; if (Dylib.isDylib(file, offset)) { - try self.parseDylib(file, full_path, offset, dependent_libs, link_options, .{ + try self.parseDylib(file, full_path, offset, dependent_libs, .{ .dependent = true, .weak = weak, - }); + }, error_ctx); } else { - self.parseLibStub(file, full_path, dependent_libs, link_options, .{ + self.parseLibStub(file, full_path, dependent_libs, .{ .dependent = true, .weak = weak, }) catch |err| switch (err) { @@ -4836,6 +4812,18 @@ pub fn getSectionPrecedence(header: macho.section_64) u8 { return (@as(u8, @intCast(segment_precedence)) << 4) + section_precedence; } +pub fn reportParseError(self: *MachO, path: []const u8, comptime format: []const u8, args: anytype) !void { + const gpa = self.base.allocator; + try self.misc_errors.ensureUnusedCapacity(gpa, 1); + var notes = try gpa.alloc(File.ErrorMsg, 1); + errdefer gpa.free(notes); + notes[0] = .{ .msg = try std.fmt.allocPrint(gpa, "while parsing {s}", .{path}) }; + self.misc_errors.appendAssumeCapacity(.{ + .msg = try std.fmt.allocPrint(gpa, format, args), + .notes = notes, + }); +} + pub fn reportUndefined(self: *MachO) error{OutOfMemory}!void { const gpa = self.base.allocator; const count = self.unresolved.count(); diff --git a/src/link/MachO/zld.zig b/src/link/MachO/zld.zig index 152c276ddd..074c61242d 100644 --- a/src/link/MachO/zld.zig +++ b/src/link/MachO/zld.zig @@ -345,6 +345,11 @@ pub fn linkWithZld( parent: u16, }, .Dynamic).init(arena); + var parse_error_ctx: union { + none: void, + detected_arch: std.Target.Cpu.Arch, + } = .{ .none = {} }; + for (positionals.items) |obj| { const in_file = try std.fs.cwd().openFile(obj.path, .{}); defer in_file.close(); @@ -354,11 +359,24 @@ pub fn linkWithZld( obj.path, obj.must_link, &dependent_libs, - options, - ) catch |err| { - // TODO convert to error - log.err("{s}: parsing positional failed with err {s}", .{ obj.path, @errorName(err) }); - continue; + &parse_error_ctx, + ) catch |err| switch (err) { + error.UnknownFileType => try macho_file.reportParseError(obj.path, "unknown file type", .{}), + error.MissingArchFatLib => try macho_file.reportParseError( + obj.path, + "missing architecture in universal file, expected '{s}'", + .{@tagName(cpu_arch)}, + ), + error.InvalidArch => try macho_file.reportParseError( + obj.path, + "invalid architecture '{s}', expected '{s}'", + .{ @tagName(parse_error_ctx.detected_arch), @tagName(cpu_arch) }, + ), + else => |e| try macho_file.reportParseError( + obj.path, + "parsing positional argument failed with error '{s}'", + .{@errorName(e)}, + ), }; } @@ -372,15 +390,28 @@ pub fn linkWithZld( lib, false, &dependent_libs, - options, - ) catch |err| { - // TODO convert to error - log.err("{s}: parsing library failed with err {s}", .{ path, @errorName(err) }); - continue; + &parse_error_ctx, + ) catch |err| switch (err) { + error.UnknownFileType => try macho_file.reportParseError(path, "unknown file type", .{}), + error.MissingArchFatLib => try macho_file.reportParseError( + path, + "missing architecture in universal file, expected '{s}'", + .{@tagName(cpu_arch)}, + ), + error.InvalidArch => try macho_file.reportParseError( + path, + "invalid architecture '{s}', expected '{s}'", + .{ @tagName(parse_error_ctx.detected_arch), @tagName(cpu_arch) }, + ), + else => |e| try macho_file.reportParseError( + path, + "parsing library failed with error '{s}'", + .{@errorName(e)}, + ), }; } - macho_file.parseDependentLibs(&dependent_libs, options) catch |err| { + macho_file.parseDependentLibs(&dependent_libs, &parse_error_ctx) catch |err| { // TODO convert to error log.err("parsing dependent libraries failed with err {s}", .{@errorName(err)}); }; From 2473ccc3358a33c0827ec7f4ea2ecfe18a1055ec Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Mon, 28 Aug 2023 17:34:10 +0200 Subject: [PATCH 40/57] macho: create an explicit error set for parse functions --- src/link/MachO.zig | 52 ++++++++++++++++++++++++---------------- src/link/MachO/Dylib.zig | 2 +- src/link/MachO/zld.zig | 2 ++ src/link/tapi.zig | 10 ++++++-- 4 files changed, 43 insertions(+), 23 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 73ef01c4e4..e7ab05876e 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -419,6 +419,7 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No &dependent_libs, &parse_error_ctx, ) catch |err| switch (err) { + error.DylibAlreadyExists => {}, error.UnknownFileType => try self.reportParseError(path, "unknown file type", .{}), error.MissingArchFatLib => try self.reportParseError( path, @@ -723,6 +724,22 @@ fn resolveLib( return full_path; } +const ParseError = error{ + UnknownFileType, + MissingArchFatLib, + InvalidArch, + DylibAlreadyExists, + IncompatibleDylibVersion, + OutOfMemory, + Overflow, + InputOutput, + MalformedArchive, + NotLibStub, + EndOfStream, + FileSystem, + NotSupported, +} || std.os.SeekError || std.fs.File.OpenError || std.fs.File.ReadError || tapi.TapiError; + pub fn parsePositional( self: *MachO, file: std.fs.File, @@ -730,7 +747,7 @@ pub fn parsePositional( must_link: bool, dependent_libs: anytype, error_ctx: anytype, -) !void { +) ParseError!void { const tracy = trace(@src()); defer tracy.end(); @@ -750,7 +767,7 @@ fn parseObject( file: std.fs.File, path: []const u8, error_ctx: anytype, -) !void { +) ParseError!void { const tracy = trace(@src()); defer tracy.end(); @@ -793,7 +810,7 @@ pub fn parseLibrary( must_link: bool, dependent_libs: anytype, error_ctx: anytype, -) !void { +) ParseError!void { const tracy = trace(@src()); defer tracy.end(); @@ -829,7 +846,7 @@ pub fn parseLibrary( } } -pub fn parseFatLibrary(self: *MachO, file: std.fs.File, cpu_arch: std.Target.Cpu.Arch) !u64 { +pub fn parseFatLibrary(self: *MachO, file: std.fs.File, cpu_arch: std.Target.Cpu.Arch) ParseError!u64 { _ = self; var buffer: [2]fat.Arch = undefined; const fat_archs = try fat.parseArchs(file, &buffer); @@ -846,7 +863,7 @@ fn parseArchive( must_link: bool, cpu_arch: std.Target.Cpu.Arch, error_ctx: anytype, -) !void { +) ParseError!void { const gpa = self.base.allocator; // We take ownership of the file so that we can store it for the duration of symbol resolution. @@ -915,7 +932,7 @@ fn parseDylib( dependent_libs: anytype, dylib_options: DylibOpts, error_ctx: anytype, -) !void { +) ParseError!void { const gpa = self.base.allocator; const self_cpu_arch = self.base.options.target.cpu.arch; @@ -948,13 +965,10 @@ fn parseDylib( // TODO verify platform - self.addDylib(dylib, .{ + try self.addDylib(dylib, .{ .needed = dylib_options.needed, .weak = dylib_options.weak, - }) catch |err| switch (err) { - error.DylibAlreadyExists => dylib.deinit(gpa), - else => |e| return e, - }; + }); } fn parseLibStub( @@ -963,7 +977,7 @@ fn parseLibStub( path: []const u8, dependent_libs: anytype, dylib_options: DylibOpts, -) !void { +) ParseError!void { const gpa = self.base.allocator; var lib_stub = try LibStub.loadFromFile(gpa, file); defer lib_stub.deinit(); @@ -984,16 +998,13 @@ fn parseLibStub( path, ); - self.addDylib(dylib, .{ + try self.addDylib(dylib, .{ .needed = dylib_options.needed, .weak = dylib_options.weak, - }) catch |err| switch (err) { - error.DylibAlreadyExists => dylib.deinit(gpa), - else => |e| return e, - }; + }); } -fn addDylib(self: *MachO, dylib: Dylib, dylib_options: DylibOpts) !void { +fn addDylib(self: *MachO, dylib: Dylib, dylib_options: DylibOpts) ParseError!void { if (dylib_options.id) |id| { if (dylib.id.?.current_version < id.compatibility_version) { // TODO convert into an error @@ -1022,7 +1033,7 @@ fn addDylib(self: *MachO, dylib: Dylib, dylib_options: DylibOpts) !void { } } -pub fn parseDependentLibs(self: *MachO, dependent_libs: anytype, error_ctx: anytype) !void { +pub fn parseDependentLibs(self: *MachO, dependent_libs: anytype, error_ctx: anytype) ParseError!void { const tracy = trace(@src()); defer tracy.end(); @@ -5145,6 +5156,7 @@ const link = @import("../link.zig"); const llvm_backend = @import("../codegen/llvm.zig"); const load_commands = @import("MachO/load_commands.zig"); const stubs = @import("MachO/stubs.zig"); +const tapi = @import("tapi.zig"); const target_util = @import("../target.zig"); const thunks = @import("MachO/thunks.zig"); const trace = @import("../tracy.zig").trace; @@ -5162,7 +5174,7 @@ const DwarfInfo = @import("MachO/DwarfInfo.zig"); const Dylib = @import("MachO/Dylib.zig"); const File = link.File; const Object = @import("MachO/Object.zig"); -const LibStub = @import("tapi.zig").LibStub; +const LibStub = tapi.LibStub; const Liveness = @import("../Liveness.zig"); const LlvmObject = @import("../codegen/llvm.zig").Object; const Md5 = std.crypto.hash.Md5; diff --git a/src/link/MachO/Dylib.zig b/src/link/MachO/Dylib.zig index 19a9eb8cd4..bee276881e 100644 --- a/src/link/MachO/Dylib.zig +++ b/src/link/MachO/Dylib.zig @@ -320,7 +320,7 @@ pub fn parseFromStub( dependent_libs: anytype, name: []const u8, ) !void { - if (lib_stub.inner.len == 0) return error.EmptyStubFile; + if (lib_stub.inner.len == 0) return error.NotLibStub; log.debug("parsing shared library from stub '{s}'", .{name}); diff --git a/src/link/MachO/zld.zig b/src/link/MachO/zld.zig index 074c61242d..1be3579856 100644 --- a/src/link/MachO/zld.zig +++ b/src/link/MachO/zld.zig @@ -361,6 +361,7 @@ pub fn linkWithZld( &dependent_libs, &parse_error_ctx, ) catch |err| switch (err) { + error.DylibAlreadyExists => {}, error.UnknownFileType => try macho_file.reportParseError(obj.path, "unknown file type", .{}), error.MissingArchFatLib => try macho_file.reportParseError( obj.path, @@ -392,6 +393,7 @@ pub fn linkWithZld( &dependent_libs, &parse_error_ctx, ) catch |err| switch (err) { + error.DylibAlreadyExists => {}, error.UnknownFileType => try macho_file.reportParseError(path, "unknown file type", .{}), error.MissingArchFatLib => try macho_file.reportParseError( path, diff --git a/src/link/tapi.zig b/src/link/tapi.zig index 98ee2ed5dd..f9ffd43d62 100644 --- a/src/link/tapi.zig +++ b/src/link/tapi.zig @@ -2,9 +2,10 @@ const std = @import("std"); const fs = std.fs; const mem = std.mem; const log = std.log.scoped(.tapi); +const yaml = @import("tapi/yaml.zig"); const Allocator = mem.Allocator; -const Yaml = @import("tapi/yaml.zig").Yaml; +const Yaml = yaml.Yaml; const VersionField = union(enum) { string: []const u8, @@ -102,6 +103,11 @@ pub const Tbd = union(enum) { } }; +pub const TapiError = error{ + NotLibStub, + FileTooBig, +} || yaml.YamlError || std.fs.File.ReadError; + pub const LibStub = struct { /// Underlying memory for stub's contents. yaml: Yaml, @@ -109,7 +115,7 @@ pub const LibStub = struct { /// Typed contents of the tbd file. inner: []Tbd, - pub fn loadFromFile(allocator: Allocator, file: fs.File) !LibStub { + pub fn loadFromFile(allocator: Allocator, file: fs.File) TapiError!LibStub { const source = try file.readToEndAlloc(allocator, std.math.maxInt(u32)); defer allocator.free(source); From 052984c5ae43a9846f012c3b3b1449c8aef47d2f Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Mon, 28 Aug 2023 19:37:09 +0200 Subject: [PATCH 41/57] macho: remove MachO.requiresThunks as it is obsolete --- src/link/MachO.zig | 4 ---- src/link/MachO/zld.zig | 8 ++++---- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index e7ab05876e..4cca79f994 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -4767,10 +4767,6 @@ pub inline fn getPageSize(cpu_arch: std.Target.Cpu.Arch) u16 { }; } -pub inline fn requiresThunks(self: MachO) bool { - return self.base.options.target.cpu.arch == .aarch64; -} - pub fn requiresCodeSignature(self: MachO) bool { if (self.base.options.entitlements) |_| return true; const cpu_arch = self.base.options.target.cpu.arch; diff --git a/src/link/MachO/zld.zig b/src/link/MachO/zld.zig index 1be3579856..efa23d4641 100644 --- a/src/link/MachO/zld.zig +++ b/src/link/MachO/zld.zig @@ -488,7 +488,7 @@ pub fn linkWithZld( } try writeAtoms(macho_file); - if (macho_file.requiresThunks()) try writeThunks(macho_file); + if (macho_file.base.options.target.cpu.arch == .aarch64) try writeThunks(macho_file); try writeDyldPrivateAtom(macho_file); if (macho_file.stubs_section_index) |_| { @@ -778,7 +778,7 @@ fn writeDyldPrivateAtom(macho_file: *MachO) !void { } fn writeThunks(macho_file: *MachO) !void { - assert(macho_file.requiresThunks()); + assert(macho_file.base.options.target.cpu.arch == .aarch64); const gpa = macho_file.base.allocator; const sect_id = macho_file.text_section_index orelse return; @@ -991,7 +991,7 @@ fn calcSectionSizes(macho_file: *MachO) !void { for (slice.items(.header), 0..) |*header, sect_id| { if (header.size == 0) continue; if (macho_file.text_section_index) |txt| { - if (txt == sect_id and macho_file.requiresThunks()) continue; + if (txt == sect_id and macho_file.base.options.target.cpu.arch == .aarch64) continue; } var atom_index = slice.items(.first_atom_index)[sect_id] orelse continue; @@ -1017,7 +1017,7 @@ fn calcSectionSizes(macho_file: *MachO) !void { } } - if (macho_file.text_section_index != null and macho_file.requiresThunks()) { + if (macho_file.text_section_index != null and macho_file.base.options.target.cpu.arch == .aarch64) { // Create jump/branch range extenders if needed. try thunks.createThunks(macho_file, macho_file.text_section_index.?); } From ec03619dcfa025442f6a70cbdfeafd5882591b64 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Mon, 28 Aug 2023 19:40:10 +0200 Subject: [PATCH 42/57] macho: make MachO.requiresCodeSignature accept link.Options --- src/link/MachO.zig | 12 ++++++------ src/link/MachO/load_commands.zig | 14 +++----------- src/link/MachO/zld.zig | 2 +- 3 files changed, 10 insertions(+), 18 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 4cca79f994..6a1f21778d 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -530,7 +530,7 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No try self.writeLinkeditSegmentData(); - var codesig: ?CodeSignature = if (self.requiresCodeSignature()) blk: { + var codesig: ?CodeSignature = if (requiresCodeSignature(&self.base.options)) blk: { // Preallocate space for the code signature. // We need to do this at this stage so that we have the load commands with proper values // written out to the file. @@ -4767,11 +4767,11 @@ pub inline fn getPageSize(cpu_arch: std.Target.Cpu.Arch) u16 { }; } -pub fn requiresCodeSignature(self: MachO) bool { - if (self.base.options.entitlements) |_| return true; - const cpu_arch = self.base.options.target.cpu.arch; - const os_tag = self.base.options.target.os.tag; - const abi = self.base.options.target.abi; +pub fn requiresCodeSignature(options: *const link.Options) bool { + if (options.entitlements) |_| return true; + const cpu_arch = options.target.cpu.arch; + const os_tag = options.target.os.tag; + const abi = options.target.abi; if (cpu_arch == .aarch64 and (os_tag == .macos or abi == .simulator)) return true; return false; } diff --git a/src/link/MachO/load_commands.zig b/src/link/MachO/load_commands.zig index b548bee2fc..d7b13104bf 100644 --- a/src/link/MachO/load_commands.zig +++ b/src/link/MachO/load_commands.zig @@ -91,17 +91,8 @@ fn calcLCsSize(gpa: Allocator, options: *const link.Options, ctx: CalcLCsSizeCtx ); } // LC_CODE_SIGNATURE - { - const target = options.target; - const requires_codesig = blk: { - if (options.entitlements) |_| break :blk true; - if (target.cpu.arch == .aarch64 and (target.os.tag == .macos or target.abi == .simulator)) - break :blk true; - break :blk false; - }; - if (requires_codesig) { - sizeofcmds += @sizeOf(macho.linkedit_data_command); - } + if (MachO.requiresCodeSignature(options)) { + sizeofcmds += @sizeOf(macho.linkedit_data_command); } return @as(u32, @intCast(sizeofcmds)); @@ -374,3 +365,4 @@ const mem = std.mem; const Allocator = mem.Allocator; const Dylib = @import("Dylib.zig"); +const MachO = @import("../MachO.zig"); diff --git a/src/link/MachO/zld.zig b/src/link/MachO/zld.zig index efa23d4641..011158ba24 100644 --- a/src/link/MachO/zld.zig +++ b/src/link/MachO/zld.zig @@ -529,7 +529,7 @@ pub fn linkWithZld( } // Write code signature padding if required - var codesig: ?CodeSignature = if (macho_file.requiresCodeSignature()) blk: { + var codesig: ?CodeSignature = if (MachO.requiresCodeSignature(&macho_file.base.options)) blk: { // Preallocate space for the code signature. // We need to do this at this stage so that we have the load commands with proper values // written out to the file. From 2e28ab153c39df77403eb64f282589349f05921d Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Mon, 28 Aug 2023 20:55:45 +0200 Subject: [PATCH 43/57] macho: parse platform info from each object file into Platform struct --- src/link/MachO.zig | 19 +++- src/link/MachO/Dylib.zig | 8 +- src/link/MachO/Object.zig | 21 ++++ src/link/MachO/load_commands.zig | 172 ++++++++++++++++++++++++++----- src/link/MachO/zld.zig | 22 +++- 5 files changed, 203 insertions(+), 39 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 6a1f21778d..5bde4575c5 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -585,7 +585,18 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No try lc_writer.writeStruct(macho.source_version_command{ .version = 0, }); - try load_commands.writeBuildVersionLC(&self.base.options, lc_writer); + { + const platform = load_commands.Platform.fromOptions(&self.base.options); + const sdk_version: ?std.SemanticVersion = self.base.options.darwin_sdk_version orelse blk: { + if (self.base.options.sysroot) |path| break :blk load_commands.inferSdkVersionFromSdkPath(path); + break :blk null; + }; + if (platform.isBuildVersionCompatible()) { + try load_commands.writeBuildVersionLC(platform, sdk_version, lc_writer); + } else { + try load_commands.writeVersionMinLC(platform, sdk_version, lc_writer); + } + } const uuid_cmd_offset = @sizeOf(macho.mach_header_64) + @as(u32, @intCast(lc_buffer.items.len)); try lc_writer.writeStruct(self.uuid_cmd); @@ -797,7 +808,7 @@ fn parseObject( const self_cpu_arch = self.base.options.target.cpu.arch; if (self_cpu_arch != cpu_arch) { - error_ctx.* = .{ .detected_arch = cpu_arch }; + error_ctx.detected_arch = cpu_arch; return error.InvalidArch; } } @@ -894,7 +905,7 @@ fn parseArchive( else => unreachable, }; if (cpu_arch != parsed_cpu_arch) { - error_ctx.* = .{ .detected_arch = parsed_cpu_arch }; + error_ctx.detected_arch = parsed_cpu_arch; return error.InvalidArch; } } @@ -959,7 +970,7 @@ fn parseDylib( else => unreachable, }; if (self_cpu_arch != cpu_arch) { - error_ctx.* = .{ .detected_arch = cpu_arch }; + error_ctx.detected_arch = cpu_arch; return error.InvalidArch; } diff --git a/src/link/MachO/Dylib.zig b/src/link/MachO/Dylib.zig index bee276881e..6dd7b6ae96 100644 --- a/src/link/MachO/Dylib.zig +++ b/src/link/MachO/Dylib.zig @@ -217,7 +217,7 @@ const TargetMatcher = struct { target: CrossTarget, target_strings: std.ArrayListUnmanaged([]const u8) = .{}, - fn init(allocator: Allocator, target: CrossTarget) !TargetMatcher { + pub fn init(allocator: Allocator, target: CrossTarget) !TargetMatcher { var self = TargetMatcher{ .allocator = allocator, .target = target, @@ -239,7 +239,7 @@ const TargetMatcher = struct { return self; } - fn deinit(self: *TargetMatcher) void { + pub fn deinit(self: *TargetMatcher) void { for (self.target_strings.items) |t| { self.allocator.free(t); } @@ -263,7 +263,7 @@ const TargetMatcher = struct { }; } - fn targetToAppleString(allocator: Allocator, target: CrossTarget) ![]const u8 { + pub fn targetToAppleString(allocator: Allocator, target: CrossTarget) ![]const u8 { const cpu_arch = cpuArchToAppleString(target.cpu_arch.?); const os_tag = @tagName(target.os_tag.?); const target_abi = abiToAppleString(target.abi orelse .none); @@ -291,7 +291,7 @@ const TargetMatcher = struct { return hasValue(archs, cpuArchToAppleString(self.target.cpu_arch.?)); } - fn matchesTargetTbd(self: TargetMatcher, tbd: Tbd) !bool { + pub fn matchesTargetTbd(self: TargetMatcher, tbd: Tbd) !bool { var arena = std.heap.ArenaAllocator.init(self.allocator); defer arena.deinit(); diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index 4af0c3e7aa..92f2899d8b 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -940,6 +940,26 @@ pub fn parseDwarfInfo(self: Object) DwarfInfo { return di; } +/// Returns Options.Platform composed from the first encountered build version type load command: +/// either LC_BUILD_VERSION or LC_VERSION_MIN_*. +pub fn getPlatform(self: Object) ?Platform { + var it = LoadCommandIterator{ + .ncmds = self.header.ncmds, + .buffer = self.contents[@sizeOf(macho.mach_header_64)..][0..self.header.sizeofcmds], + }; + while (it.next()) |cmd| { + switch (cmd.cmd()) { + .BUILD_VERSION, + .VERSION_MIN_MACOSX, + .VERSION_MIN_IPHONEOS, + .VERSION_MIN_TVOS, + .VERSION_MIN_WATCHOS, + => return Platform.fromLoadCommand(cmd), + else => {}, + } + } else return null; +} + pub fn getSectionContents(self: Object, sect: macho.section_64) []const u8 { const size = @as(usize, @intCast(sect.size)); return self.contents[sect.offset..][0..size]; @@ -1089,5 +1109,6 @@ const Atom = @import("Atom.zig"); const DwarfInfo = @import("DwarfInfo.zig"); const LoadCommandIterator = macho.LoadCommandIterator; const MachO = @import("../MachO.zig"); +const Platform = @import("load_commands.zig").Platform; const SymbolWithLoc = MachO.SymbolWithLoc; const UnwindInfo = @import("UnwindInfo.zig"); diff --git a/src/link/MachO/load_commands.zig b/src/link/MachO/load_commands.zig index d7b13104bf..6b326c34d3 100644 --- a/src/link/MachO/load_commands.zig +++ b/src/link/MachO/load_commands.zig @@ -76,8 +76,14 @@ fn calcLCsSize(gpa: Allocator, options: *const link.Options, ctx: CalcLCsSizeCtx } // LC_SOURCE_VERSION sizeofcmds += @sizeOf(macho.source_version_command); - // LC_BUILD_VERSION - sizeofcmds += @sizeOf(macho.build_version_command) + @sizeOf(macho.build_tool_version); + // LC_BUILD_VERSION or LC_VERSION_MIN_ + if (Platform.fromOptions(options).isBuildVersionCompatible()) { + // LC_BUILD_VERSION + sizeofcmds += @sizeOf(macho.build_version_command) + @sizeOf(macho.build_tool_version); + } else { + // LC_VERSION_MIN_ + sizeofcmds += @sizeOf(macho.version_min_command); + } // LC_UUID sizeofcmds += @sizeOf(macho.uuid_command); // LC_LOAD_DYLIB @@ -252,33 +258,28 @@ pub fn writeRpathLCs(gpa: Allocator, options: *const link.Options, lc_writer: an } } -pub fn writeBuildVersionLC(options: *const link.Options, lc_writer: anytype) !void { +pub fn writeVersionMinLC(platform: Platform, sdk_version: ?std.SemanticVersion, lc_writer: anytype) !void { + const cmd: macho.LC = switch (platform.os_tag) { + .macos => .VERSION_MIN_MACOSX, + .ios => .VERSION_MIN_IPHONEOS, + .tvos => .VERSION_MIN_TVOS, + .watchos => .VERSION_MIN_WATCHOS, + else => unreachable, + }; + try lc_writer.writeAll(mem.asBytes(&macho.version_min_command{ + .cmd = cmd, + .version = platform.toAppleVersion(), + .sdk = if (sdk_version) |ver| Platform.semanticVersionToAppleVersion(ver) else platform.toAppleVersion(), + })); +} + +pub fn writeBuildVersionLC(platform: Platform, sdk_version: ?std.SemanticVersion, lc_writer: anytype) !void { const cmdsize = @sizeOf(macho.build_version_command) + @sizeOf(macho.build_tool_version); - const platform_version = blk: { - const ver = options.target.os.version_range.semver.min; - const platform_version = @as(u32, @intCast(ver.major << 16 | ver.minor << 8)); - break :blk platform_version; - }; - const sdk_version: ?std.SemanticVersion = options.darwin_sdk_version orelse blk: { - if (options.sysroot) |path| break :blk inferSdkVersionFromSdkPath(path); - break :blk null; - }; - const sdk_version_value: u32 = if (sdk_version) |ver| - @intCast(ver.major << 16 | ver.minor << 8) - else - platform_version; - const is_simulator_abi = options.target.abi == .simulator; try lc_writer.writeStruct(macho.build_version_command{ .cmdsize = cmdsize, - .platform = switch (options.target.os.tag) { - .macos => .MACOS, - .ios => if (is_simulator_abi) macho.PLATFORM.IOSSIMULATOR else macho.PLATFORM.IOS, - .watchos => if (is_simulator_abi) macho.PLATFORM.WATCHOSSIMULATOR else macho.PLATFORM.WATCHOS, - .tvos => if (is_simulator_abi) macho.PLATFORM.TVOSSIMULATOR else macho.PLATFORM.TVOS, - else => unreachable, - }, - .minos = platform_version, - .sdk = sdk_version_value, + .platform = platform.toApplePlatform(), + .minos = platform.toAppleVersion(), + .sdk = if (sdk_version) |ver| Platform.semanticVersionToAppleVersion(ver) else platform.toAppleVersion(), .ntools = 1, }); try lc_writer.writeAll(mem.asBytes(&macho.build_tool_version{ @@ -301,7 +302,124 @@ pub fn writeLoadDylibLCs(dylibs: []const Dylib, referenced: []u16, lc_writer: an } } -fn inferSdkVersionFromSdkPath(path: []const u8) ?std.SemanticVersion { +pub const Platform = struct { + os_tag: std.Target.Os.Tag, + abi: std.Target.Abi, + version: std.SemanticVersion, + + /// Using Apple's ld64 as our blueprint, `min_version` as well as `sdk_version` are set to + /// the extracted minimum platform version. + pub fn fromLoadCommand(lc: macho.LoadCommandIterator.LoadCommand) Platform { + switch (lc.cmd()) { + .BUILD_VERSION => { + const cmd = lc.cast(macho.build_version_command).?; + return .{ + .os_tag = switch (cmd.platform) { + .MACOS => .macos, + .IOS, .IOSSIMULATOR => .ios, + .TVOS, .TVOSSIMULATOR => .tvos, + .WATCHOS, .WATCHOSSIMULATOR => .watchos, + else => @panic("TODO"), + }, + .abi = switch (cmd.platform) { + .IOSSIMULATOR, + .TVOSSIMULATOR, + .WATCHOSSIMULATOR, + => .simulator, + else => .none, + }, + .version = appleVersionToSemanticVersion(cmd.minos), + }; + }, + .VERSION_MIN_MACOSX, + .VERSION_MIN_IPHONEOS, + .VERSION_MIN_TVOS, + .VERSION_MIN_WATCHOS, + => { + const cmd = lc.cast(macho.version_min_command).?; + return .{ + .os_tag = switch (lc.cmd()) { + .VERSION_MIN_MACOSX => .macos, + .VERSION_MIN_IPHONEOS => .ios, + .VERSION_MIN_TVOS => .tvos, + .VERSION_MIN_WATCHOS => .watchos, + else => unreachable, + }, + .abi = .none, + .version = appleVersionToSemanticVersion(cmd.version), + }; + }, + else => unreachable, + } + } + + pub fn fromOptions(options: *const link.Options) Platform { + return .{ + .os_tag = options.target.os.tag, + .abi = options.target.abi, + .version = options.target.os.version_range.semver.min, + }; + } + + pub fn toAppleVersion(plat: Platform) u32 { + return semanticVersionToAppleVersion(plat.version); + } + + pub fn toApplePlatform(plat: Platform) macho.PLATFORM { + return switch (plat.os_tag) { + .macos => .MACOS, + .ios => if (plat.abi == .simulator) .IOSSIMULATOR else .IOS, + .tvos => if (plat.abi == .simulator) .TVOSSIMULATOR else .TVOS, + .watchos => if (plat.abi == .simulator) .WATCHOSSIMULATOR else .WATCHOS, + else => unreachable, + }; + } + + pub fn isBuildVersionCompatible(plat: Platform) bool { + inline for (supported_platforms) |sup_plat| { + if (sup_plat[0] == plat.os_tag and sup_plat[1] == plat.abi) { + return sup_plat[2] <= plat.toAppleVersion(); + } + } + return false; + } + + pub inline fn semanticVersionToAppleVersion(version: std.SemanticVersion) u32 { + const major = version.major; + const minor = version.minor; + const patch = version.patch; + return (@as(u32, @intCast(major)) << 16) | (@as(u32, @intCast(minor)) << 8) | @as(u32, @intCast(patch)); + } + + inline fn appleVersionToSemanticVersion(version: u32) std.SemanticVersion { + return .{ + .major = @as(u16, @truncate(version >> 16)), + .minor = @as(u8, @truncate(version >> 8)), + .patch = @as(u8, @truncate(version)), + }; + } +}; + +const SupportedPlatforms = struct { + std.Target.Os.Tag, + std.Target.Abi, + u32, // Min platform version for which to emit LC_BUILD_VERSION + u32, // Min supported platform version + ?[]const u8, // Env var to look for +}; + +// Source: https://github.com/apple-oss-distributions/ld64/blob/59a99ab60399c5e6c49e6945a9e1049c42b71135/src/ld/PlatformSupport.cpp#L52 +const supported_platforms = [_]SupportedPlatforms{ + .{ .macos, .none, 0xA0E00, 0xA0800, "MACOSX_DEPLOYMENT_TARGET" }, + .{ .ios, .none, 0xC0000, 0x70000, "IPHONEOS_DEPLOYMENT_TARGET" }, + .{ .tvos, .none, 0xC0000, 0x70000, "TVOS_DEPLOYMENT_TARGET" }, + .{ .watchos, .none, 0x50000, 0x20000, "WATCHOS_DEPLOYMENT_TARGET" }, + .{ .ios, .simulator, 0xD0000, 0x80000, null }, + .{ .tvos, .simulator, 0xD0000, 0x80000, null }, + .{ .watchos, .simulator, 0x60000, 0x20000, null }, +}; + +pub fn inferSdkVersionFromSdkPath(path: []const u8) ?std.SemanticVersion { const stem = std.fs.path.stem(path); const start = for (stem, 0..) |c, i| { if (std.ascii.isDigit(c)) break i; diff --git a/src/link/MachO/zld.zig b/src/link/MachO/zld.zig index 011158ba24..1a850b58a0 100644 --- a/src/link/MachO/zld.zig +++ b/src/link/MachO/zld.zig @@ -345,10 +345,13 @@ pub fn linkWithZld( parent: u16, }, .Dynamic).init(arena); - var parse_error_ctx: union { - none: void, + var parse_error_ctx: struct { detected_arch: std.Target.Cpu.Arch, - } = .{ .none = {} }; + detected_os: std.Target.Os.Tag, + } = .{ + .detected_arch = undefined, + .detected_os = undefined, + }; for (positionals.items) |obj| { const in_file = try std.fs.cwd().openFile(obj.path, .{}); @@ -586,7 +589,18 @@ pub fn linkWithZld( try lc_writer.writeStruct(macho.source_version_command{ .version = 0, }); - try load_commands.writeBuildVersionLC(&macho_file.base.options, lc_writer); + { + const platform = load_commands.Platform.fromOptions(&macho_file.base.options); + const sdk_version: ?std.SemanticVersion = macho_file.base.options.darwin_sdk_version orelse blk: { + if (macho_file.base.options.sysroot) |path| break :blk load_commands.inferSdkVersionFromSdkPath(path); + break :blk null; + }; + if (platform.isBuildVersionCompatible()) { + try load_commands.writeBuildVersionLC(platform, sdk_version, lc_writer); + } else { + try load_commands.writeVersionMinLC(platform, sdk_version, lc_writer); + } + } const uuid_cmd_offset = @sizeOf(macho.mach_header_64) + @as(u32, @intCast(lc_buffer.items.len)); try lc_writer.writeStruct(macho_file.uuid_cmd); From 3ece3f83f3457544d58dad5f56dd7ba338f07d9a Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Tue, 29 Aug 2023 12:00:25 +0200 Subject: [PATCH 44/57] macho: clean up helpers for std.SemanticVersion <-> Apple version formatting --- src/link/MachO/load_commands.zig | 34 ++++++++++++++++---------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/src/link/MachO/load_commands.zig b/src/link/MachO/load_commands.zig index 6b326c34d3..96b1bd2ac2 100644 --- a/src/link/MachO/load_commands.zig +++ b/src/link/MachO/load_commands.zig @@ -269,7 +269,7 @@ pub fn writeVersionMinLC(platform: Platform, sdk_version: ?std.SemanticVersion, try lc_writer.writeAll(mem.asBytes(&macho.version_min_command{ .cmd = cmd, .version = platform.toAppleVersion(), - .sdk = if (sdk_version) |ver| Platform.semanticVersionToAppleVersion(ver) else platform.toAppleVersion(), + .sdk = if (sdk_version) |ver| semanticVersionToAppleVersion(ver) else platform.toAppleVersion(), })); } @@ -279,7 +279,7 @@ pub fn writeBuildVersionLC(platform: Platform, sdk_version: ?std.SemanticVersion .cmdsize = cmdsize, .platform = platform.toApplePlatform(), .minos = platform.toAppleVersion(), - .sdk = if (sdk_version) |ver| Platform.semanticVersionToAppleVersion(ver) else platform.toAppleVersion(), + .sdk = if (sdk_version) |ver| semanticVersionToAppleVersion(ver) else platform.toAppleVersion(), .ntools = 1, }); try lc_writer.writeAll(mem.asBytes(&macho.build_tool_version{ @@ -383,21 +383,6 @@ pub const Platform = struct { } return false; } - - pub inline fn semanticVersionToAppleVersion(version: std.SemanticVersion) u32 { - const major = version.major; - const minor = version.minor; - const patch = version.patch; - return (@as(u32, @intCast(major)) << 16) | (@as(u32, @intCast(minor)) << 8) | @as(u32, @intCast(patch)); - } - - inline fn appleVersionToSemanticVersion(version: u32) std.SemanticVersion { - return .{ - .major = @as(u16, @truncate(version >> 16)), - .minor = @as(u8, @truncate(version >> 8)), - .patch = @as(u8, @truncate(version)), - }; - } }; const SupportedPlatforms = struct { @@ -419,6 +404,21 @@ const supported_platforms = [_]SupportedPlatforms{ .{ .watchos, .simulator, 0x60000, 0x20000, null }, }; +pub inline fn semanticVersionToAppleVersion(version: std.SemanticVersion) u32 { + const major = version.major; + const minor = version.minor; + const patch = version.patch; + return (@as(u32, @intCast(major)) << 16) | (@as(u32, @intCast(minor)) << 8) | @as(u32, @intCast(patch)); +} + +inline fn appleVersionToSemanticVersion(version: u32) std.SemanticVersion { + return .{ + .major = @as(u16, @truncate(version >> 16)), + .minor = @as(u8, @truncate(version >> 8)), + .patch = @as(u8, @truncate(version)), + }; +} + pub fn inferSdkVersionFromSdkPath(path: []const u8) ?std.SemanticVersion { const stem = std.fs.path.stem(path); const start = for (stem, 0..) |c, i| { From 1b01715a73b4156b3cb761d77d09c30791b8027c Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Tue, 29 Aug 2023 12:02:58 +0200 Subject: [PATCH 45/57] link: remove unused darwin_sdk_version field --- src/link.zig | 1 - src/link/MachO.zig | 8 ++++---- src/link/MachO/zld.zig | 8 ++++---- 3 files changed, 8 insertions(+), 9 deletions(-) diff --git a/src/link.zig b/src/link.zig index 724c1500f4..bf27efcc3a 100644 --- a/src/link.zig +++ b/src/link.zig @@ -228,7 +228,6 @@ pub const Options = struct { version: ?std.SemanticVersion, compatibility_version: ?std.SemanticVersion, - darwin_sdk_version: ?std.SemanticVersion = null, libc_installation: ?*const LibCInstallation, dwarf_format: ?std.dwarf.Format, diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 5bde4575c5..f06466b41f 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -587,10 +587,10 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No }); { const platform = load_commands.Platform.fromOptions(&self.base.options); - const sdk_version: ?std.SemanticVersion = self.base.options.darwin_sdk_version orelse blk: { - if (self.base.options.sysroot) |path| break :blk load_commands.inferSdkVersionFromSdkPath(path); - break :blk null; - }; + const sdk_version: ?std.SemanticVersion = if (self.base.options.sysroot) |path| + load_commands.inferSdkVersionFromSdkPath(path) + else + null; if (platform.isBuildVersionCompatible()) { try load_commands.writeBuildVersionLC(platform, sdk_version, lc_writer); } else { diff --git a/src/link/MachO/zld.zig b/src/link/MachO/zld.zig index 1a850b58a0..7c1b870ed5 100644 --- a/src/link/MachO/zld.zig +++ b/src/link/MachO/zld.zig @@ -591,10 +591,10 @@ pub fn linkWithZld( }); { const platform = load_commands.Platform.fromOptions(&macho_file.base.options); - const sdk_version: ?std.SemanticVersion = macho_file.base.options.darwin_sdk_version orelse blk: { - if (macho_file.base.options.sysroot) |path| break :blk load_commands.inferSdkVersionFromSdkPath(path); - break :blk null; - }; + const sdk_version: ?std.SemanticVersion = if (macho_file.base.options.sysroot) |path| + load_commands.inferSdkVersionFromSdkPath(path) + else + null; if (platform.isBuildVersionCompatible()) { try load_commands.writeBuildVersionLC(platform, sdk_version, lc_writer); } else { From 1cae41bbbb3cb1cf10bfa808ecbe289bfb4d5180 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Tue, 29 Aug 2023 12:14:57 +0200 Subject: [PATCH 46/57] macho: clean up array of supported platforms and versions --- src/link/MachO/load_commands.zig | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/src/link/MachO/load_commands.zig b/src/link/MachO/load_commands.zig index 96b1bd2ac2..5d3a84c87f 100644 --- a/src/link/MachO/load_commands.zig +++ b/src/link/MachO/load_commands.zig @@ -390,19 +390,20 @@ const SupportedPlatforms = struct { std.Target.Abi, u32, // Min platform version for which to emit LC_BUILD_VERSION u32, // Min supported platform version - ?[]const u8, // Env var to look for }; // Source: https://github.com/apple-oss-distributions/ld64/blob/59a99ab60399c5e6c49e6945a9e1049c42b71135/src/ld/PlatformSupport.cpp#L52 +// zig fmt: off const supported_platforms = [_]SupportedPlatforms{ - .{ .macos, .none, 0xA0E00, 0xA0800, "MACOSX_DEPLOYMENT_TARGET" }, - .{ .ios, .none, 0xC0000, 0x70000, "IPHONEOS_DEPLOYMENT_TARGET" }, - .{ .tvos, .none, 0xC0000, 0x70000, "TVOS_DEPLOYMENT_TARGET" }, - .{ .watchos, .none, 0x50000, 0x20000, "WATCHOS_DEPLOYMENT_TARGET" }, - .{ .ios, .simulator, 0xD0000, 0x80000, null }, - .{ .tvos, .simulator, 0xD0000, 0x80000, null }, - .{ .watchos, .simulator, 0x60000, 0x20000, null }, + .{ .macos, .none, 0xA0E00, 0xA0800 }, + .{ .ios, .none, 0xC0000, 0x70000 }, + .{ .tvos, .none, 0xC0000, 0x70000 }, + .{ .watchos, .none, 0x50000, 0x20000 }, + .{ .ios, .simulator, 0xD0000, 0x80000 }, + .{ .tvos, .simulator, 0xD0000, 0x80000 }, + .{ .watchos, .simulator, 0x60000, 0x20000 }, }; +// zig fmt: on pub inline fn semanticVersionToAppleVersion(version: std.SemanticVersion) u32 { const major = version.major; From 79b3285aa216350e0c2ff18436a169af69e4570f Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Tue, 29 Aug 2023 15:27:44 +0200 Subject: [PATCH 47/57] macho: handle mismatched and missing platform errors --- src/link/MachO.zig | 281 +++++++++++++++++++------------ src/link/MachO/Dylib.zig | 102 +++++------ src/link/MachO/Object.zig | 2 +- src/link/MachO/load_commands.zig | 32 +++- src/link/MachO/zld.zig | 53 ++---- src/link/tapi.zig | 24 +++ 6 files changed, 290 insertions(+), 204 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index f06466b41f..127e9f7027 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -396,16 +396,24 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No self.dylibs_map.clearRetainingCapacity(); self.referenced_dylibs.clearRetainingCapacity(); - const cpu_arch = self.base.options.target.cpu.arch; var dependent_libs = std.fifo.LinearFifo(struct { id: Dylib.Id, parent: u16, }, .Dynamic).init(arena); - var parse_error_ctx: union { - none: void, + var parse_error_ctx: struct { detected_arch: std.Target.Cpu.Arch, - } = .{ .none = {} }; + detected_platform: ?Platform, + detected_stub_targets: []const []const u8, + } = .{ + .detected_arch = undefined, + .detected_platform = null, + .detected_stub_targets = &[0][]const u8{}, + }; + defer { + for (parse_error_ctx.detected_stub_targets) |target| self.base.allocator.free(target); + self.base.allocator.free(parse_error_ctx.detected_stub_targets); + } for (libs.keys(), libs.values()) |path, lib| { const in_file = try std.fs.cwd().openFile(path, .{}); @@ -418,25 +426,7 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No false, &dependent_libs, &parse_error_ctx, - ) catch |err| switch (err) { - error.DylibAlreadyExists => {}, - error.UnknownFileType => try self.reportParseError(path, "unknown file type", .{}), - error.MissingArchFatLib => try self.reportParseError( - path, - "missing architecture in universal file, expected '{s}'", - .{@tagName(cpu_arch)}, - ), - error.InvalidArch => try self.reportParseError( - path, - "invalid architecture '{s}', expected '{s}'", - .{ @tagName(parse_error_ctx.detected_arch), @tagName(cpu_arch) }, - ), - else => |e| try self.reportParseError( - path, - "parsing library failed with error '{s}'", - .{@errorName(e)}, - ), - }; + ) catch |err| try self.handleAndReportParseError(path, err, parse_error_ctx); } self.parseDependentLibs(&dependent_libs, &parse_error_ctx) catch |err| { @@ -586,7 +576,7 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No .version = 0, }); { - const platform = load_commands.Platform.fromOptions(&self.base.options); + const platform = Platform.fromTarget(self.base.options.target); const sdk_version: ?std.SemanticVersion = if (self.base.options.sysroot) |path| load_commands.inferSdkVersionFromSdkPath(path) else @@ -738,7 +728,8 @@ fn resolveLib( const ParseError = error{ UnknownFileType, MissingArchFatLib, - InvalidArch, + InvalidTarget, + InvalidLibStubTargets, DylibAlreadyExists, IncompatibleDylibVersion, OutOfMemory, @@ -798,19 +789,24 @@ fn parseObject( }; errdefer object.deinit(gpa); try object.parse(gpa); - try self.objects.append(gpa, object); const cpu_arch: std.Target.Cpu.Arch = switch (object.header.cputype) { macho.CPU_TYPE_ARM64 => .aarch64, macho.CPU_TYPE_X86_64 => .x86_64, else => unreachable, }; - const self_cpu_arch = self.base.options.target.cpu.arch; + error_ctx.detected_arch = cpu_arch; - if (self_cpu_arch != cpu_arch) { - error_ctx.detected_arch = cpu_arch; - return error.InvalidArch; + if (object.getPlatform()) |platform| { + error_ctx.detected_platform = platform; } + + if (self.base.options.target.cpu.arch != cpu_arch) return error.InvalidTarget; + if (error_ctx.detected_platform) |platform| { + if (!Platform.fromTarget(self.base.options.target).eqlTarget(platform)) return error.InvalidTarget; + } + + try self.objects.append(gpa, object); } pub fn parseLibrary( @@ -825,14 +821,12 @@ pub fn parseLibrary( const tracy = trace(@src()); defer tracy.end(); - const cpu_arch = self.base.options.target.cpu.arch; - if (fat.isFatLibrary(file)) { - const offset = try self.parseFatLibrary(file, cpu_arch); + const offset = try self.parseFatLibrary(file, self.base.options.target.cpu.arch); try file.seekTo(offset); if (Archive.isArchive(file, offset)) { - try self.parseArchive(path, offset, must_link, cpu_arch, error_ctx); + try self.parseArchive(path, offset, must_link, error_ctx); } else if (Dylib.isDylib(file, offset)) { try self.parseDylib(file, path, offset, dependent_libs, .{ .needed = lib.needed, @@ -840,7 +834,7 @@ pub fn parseLibrary( }, error_ctx); } else return error.UnknownFileType; } else if (Archive.isArchive(file, 0)) { - try self.parseArchive(path, 0, must_link, cpu_arch, error_ctx); + try self.parseArchive(path, 0, must_link, error_ctx); } else if (Dylib.isDylib(file, 0)) { try self.parseDylib(file, path, 0, dependent_libs, .{ .needed = lib.needed, @@ -850,7 +844,7 @@ pub fn parseLibrary( self.parseLibStub(file, path, dependent_libs, .{ .needed = lib.needed, .weak = lib.weak, - }) catch |err| switch (err) { + }, error_ctx) catch |err| switch (err) { error.NotLibStub, error.UnexpectedToken => return error.UnknownFileType, else => |e| return e, }; @@ -872,7 +866,6 @@ fn parseArchive( path: []const u8, fat_offset: u64, must_link: bool, - cpu_arch: std.Target.Cpu.Arch, error_ctx: anytype, ) ParseError!void { const gpa = self.base.allocator; @@ -899,14 +892,20 @@ fn parseArchive( var object = try archive.parseObject(gpa, off); // TODO we are doing all this work to pull the header only! defer object.deinit(gpa); - const parsed_cpu_arch: std.Target.Cpu.Arch = switch (object.header.cputype) { + const cpu_arch: std.Target.Cpu.Arch = switch (object.header.cputype) { macho.CPU_TYPE_ARM64 => .aarch64, macho.CPU_TYPE_X86_64 => .x86_64, else => unreachable, }; - if (cpu_arch != parsed_cpu_arch) { - error_ctx.detected_arch = parsed_cpu_arch; - return error.InvalidArch; + error_ctx.detected_arch = cpu_arch; + + if (object.getPlatform()) |platform| { + error_ctx.detected_platform = platform; + } + + if (self.base.options.target.cpu.arch != cpu_arch) return error.InvalidTarget; + if (error_ctx.detected_platform) |platform| { + if (!Platform.fromTarget(self.base.options.target).eqlTarget(platform)) return error.InvalidTarget; } } @@ -945,8 +944,6 @@ fn parseDylib( error_ctx: anytype, ) ParseError!void { const gpa = self.base.allocator; - const self_cpu_arch = self.base.options.target.cpu.arch; - const file_stat = try file.stat(); const file_size = math.cast(usize, file_stat.size - offset) orelse return error.Overflow; @@ -969,12 +966,16 @@ fn parseDylib( macho.CPU_TYPE_X86_64 => .x86_64, else => unreachable, }; - if (self_cpu_arch != cpu_arch) { - error_ctx.detected_arch = cpu_arch; - return error.InvalidArch; + error_ctx.detected_arch = cpu_arch; + + if (dylib.getPlatform(contents)) |platform| { + error_ctx.detected_platform = platform; } - // TODO verify platform + if (self.base.options.target.cpu.arch != cpu_arch) return error.InvalidTarget; + if (error_ctx.detected_platform) |platform| { + if (!Platform.fromTarget(self.base.options.target).eqlTarget(platform)) return error.InvalidTarget; + } try self.addDylib(dylib, .{ .needed = dylib_options.needed, @@ -988,6 +989,7 @@ fn parseLibStub( path: []const u8, dependent_libs: anytype, dylib_options: DylibOpts, + error_ctx: anytype, ) ParseError!void { const gpa = self.base.allocator; var lib_stub = try LibStub.loadFromFile(gpa, file); @@ -995,7 +997,20 @@ fn parseLibStub( if (lib_stub.inner.len == 0) return error.NotLibStub; - // TODO verify platform + // Verify target + { + var matcher = try Dylib.TargetMatcher.init(gpa, self.base.options.target); + defer matcher.deinit(); + + const first_tbd = lib_stub.inner[0]; + const targets = try first_tbd.targets(gpa); + if (!matcher.matchesTarget(targets)) { + error_ctx.detected_stub_targets = targets; + return error.InvalidLibStubTargets; + } + for (targets) |t| gpa.free(t); + gpa.free(targets); + } var dylib = Dylib{ .weak = dylib_options.weak }; errdefer dylib.deinit(gpa); @@ -1104,7 +1119,7 @@ pub fn parseDependentLibs(self: *MachO, dependent_libs: anytype, error_ctx: anyt self.parseLibStub(file, full_path, dependent_libs, .{ .dependent = true, .weak = weak, - }) catch |err| switch (err) { + }, error_ctx) catch |err| switch (err) { error.NotLibStub, error.UnexpectedToken => continue, else => |e| return e, }; @@ -4830,6 +4845,53 @@ pub fn getSectionPrecedence(header: macho.section_64) u8 { return (@as(u8, @intCast(segment_precedence)) << 4) + section_precedence; } +pub fn handleAndReportParseError(self: *MachO, path: []const u8, err: ParseError, parse_error_ctx: anytype) !void { + const cpu_arch = self.base.options.target.cpu.arch; + switch (err) { + error.DylibAlreadyExists => {}, + error.UnknownFileType => try self.reportParseError(path, "unknown file type", .{}), + error.MissingArchFatLib => try self.reportParseError( + path, + "missing architecture in universal file, expected '{s}'", + .{@tagName(cpu_arch)}, + ), + error.InvalidTarget => if (parse_error_ctx.detected_platform) |platform| { + try self.reportParseError(path, "invalid target '{s}-{}', expected '{s}-{}'", .{ + @tagName(parse_error_ctx.detected_arch), + platform.fmtTarget(), + @tagName(cpu_arch), + Platform.fromTarget(self.base.options.target).fmtTarget(), + }); + } else { + try self.reportParseError( + path, + "invalid architecture '{s}', expected '{s}'", + .{ @tagName(parse_error_ctx.detected_arch), @tagName(cpu_arch) }, + ); + }, + error.InvalidLibStubTargets => { + var targets_string = std.ArrayList(u8).init(self.base.allocator); + defer targets_string.deinit(); + try targets_string.writer().writeAll("("); + for (parse_error_ctx.detected_stub_targets) |t| { + try targets_string.writer().print("{s}, ", .{t}); + } + try targets_string.resize(targets_string.items.len - 2); + try targets_string.writer().writeAll(")"); + try self.reportParseError(path, "invalid targets '{s}', expected '{s}-{}'", .{ + targets_string.items, + @tagName(cpu_arch), + Platform.fromTarget(self.base.options.target).fmtTarget(), + }); + }, + else => |e| try self.reportParseError( + path, + "parsing positional argument failed with error '{s}'", + .{@errorName(e)}, + ), + } +} + pub fn reportParseError(self: *MachO, path: []const u8, comptime format: []const u8, args: anytype) !void { const gpa = self.base.allocator; try self.misc_errors.ensureUnusedCapacity(gpa, 1); @@ -5140,66 +5202,6 @@ pub fn logAtom(self: *MachO, atom_index: Atom.Index, logger: anytype) void { } } -const MachO = @This(); - -const std = @import("std"); -const build_options = @import("build_options"); -const builtin = @import("builtin"); -const assert = std.debug.assert; -const dwarf = std.dwarf; -const fs = std.fs; -const log = std.log.scoped(.link); -const macho = std.macho; -const math = std.math; -const mem = std.mem; -const meta = std.meta; - -const aarch64 = @import("../arch/aarch64/bits.zig"); -const calcUuid = @import("MachO/uuid.zig").calcUuid; -const codegen = @import("../codegen.zig"); -const dead_strip = @import("MachO/dead_strip.zig"); -const fat = @import("MachO/fat.zig"); -const link = @import("../link.zig"); -const llvm_backend = @import("../codegen/llvm.zig"); -const load_commands = @import("MachO/load_commands.zig"); -const stubs = @import("MachO/stubs.zig"); -const tapi = @import("tapi.zig"); -const target_util = @import("../target.zig"); -const thunks = @import("MachO/thunks.zig"); -const trace = @import("../tracy.zig").trace; -const zld = @import("MachO/zld.zig"); - -const Air = @import("../Air.zig"); -const Allocator = mem.Allocator; -const Archive = @import("MachO/Archive.zig"); -pub const Atom = @import("MachO/Atom.zig"); -const Cache = std.Build.Cache; -const CodeSignature = @import("MachO/CodeSignature.zig"); -const Compilation = @import("../Compilation.zig"); -const Dwarf = File.Dwarf; -const DwarfInfo = @import("MachO/DwarfInfo.zig"); -const Dylib = @import("MachO/Dylib.zig"); -const File = link.File; -const Object = @import("MachO/Object.zig"); -const LibStub = tapi.LibStub; -const Liveness = @import("../Liveness.zig"); -const LlvmObject = @import("../codegen/llvm.zig").Object; -const Md5 = std.crypto.hash.Md5; -const Module = @import("../Module.zig"); -const InternPool = @import("../InternPool.zig"); -const Relocation = @import("MachO/Relocation.zig"); -const StringTable = @import("strtab.zig").StringTable; -const TableSection = @import("table_section.zig").TableSection; -const Trie = @import("MachO/Trie.zig"); -const Type = @import("../type.zig").Type; -const TypedValue = @import("../TypedValue.zig"); -const Value = @import("../value.zig").Value; - -pub const DebugSymbols = @import("MachO/DebugSymbols.zig"); -pub const Bind = @import("MachO/dyld_info/bind.zig").Bind(*const MachO, SymbolWithLoc); -pub const LazyBind = @import("MachO/dyld_info/bind.zig").LazyBind(*const MachO, SymbolWithLoc); -pub const Rebase = @import("MachO/dyld_info/Rebase.zig"); - pub const base_tag: File.Tag = File.Tag.macho; pub const N_DEAD: u16 = @as(u16, @bitCast(@as(i16, -1))); @@ -5332,3 +5334,64 @@ pub const default_pagezero_vmsize: u64 = 0x100000000; /// the table of load commands. This should be plenty for any /// potential future extensions. pub const default_headerpad_size: u32 = 0x1000; + +const MachO = @This(); + +const std = @import("std"); +const build_options = @import("build_options"); +const builtin = @import("builtin"); +const assert = std.debug.assert; +const dwarf = std.dwarf; +const fs = std.fs; +const log = std.log.scoped(.link); +const macho = std.macho; +const math = std.math; +const mem = std.mem; +const meta = std.meta; + +const aarch64 = @import("../arch/aarch64/bits.zig"); +const calcUuid = @import("MachO/uuid.zig").calcUuid; +const codegen = @import("../codegen.zig"); +const dead_strip = @import("MachO/dead_strip.zig"); +const fat = @import("MachO/fat.zig"); +const link = @import("../link.zig"); +const llvm_backend = @import("../codegen/llvm.zig"); +const load_commands = @import("MachO/load_commands.zig"); +const stubs = @import("MachO/stubs.zig"); +const tapi = @import("tapi.zig"); +const target_util = @import("../target.zig"); +const thunks = @import("MachO/thunks.zig"); +const trace = @import("../tracy.zig").trace; +const zld = @import("MachO/zld.zig"); + +const Air = @import("../Air.zig"); +const Allocator = mem.Allocator; +const Archive = @import("MachO/Archive.zig"); +pub const Atom = @import("MachO/Atom.zig"); +const Cache = std.Build.Cache; +const CodeSignature = @import("MachO/CodeSignature.zig"); +const Compilation = @import("../Compilation.zig"); +const Dwarf = File.Dwarf; +const DwarfInfo = @import("MachO/DwarfInfo.zig"); +const Dylib = @import("MachO/Dylib.zig"); +const File = link.File; +const Object = @import("MachO/Object.zig"); +const LibStub = tapi.LibStub; +const Liveness = @import("../Liveness.zig"); +const LlvmObject = @import("../codegen/llvm.zig").Object; +const Md5 = std.crypto.hash.Md5; +const Module = @import("../Module.zig"); +const InternPool = @import("../InternPool.zig"); +const Platform = load_commands.Platform; +const Relocation = @import("MachO/Relocation.zig"); +const StringTable = @import("strtab.zig").StringTable; +const TableSection = @import("table_section.zig").TableSection; +const Trie = @import("MachO/Trie.zig"); +const Type = @import("../type.zig").Type; +const TypedValue = @import("../TypedValue.zig"); +const Value = @import("../value.zig").Value; + +pub const DebugSymbols = @import("MachO/DebugSymbols.zig"); +pub const Bind = @import("MachO/dyld_info/bind.zig").Bind(*const MachO, SymbolWithLoc); +pub const LazyBind = @import("MachO/dyld_info/bind.zig").LazyBind(*const MachO, SymbolWithLoc); +pub const Rebase = @import("MachO/dyld_info/Rebase.zig"); diff --git a/src/link/MachO/Dylib.zig b/src/link/MachO/Dylib.zig index 6dd7b6ae96..581f804f13 100644 --- a/src/link/MachO/Dylib.zig +++ b/src/link/MachO/Dylib.zig @@ -178,6 +178,26 @@ pub fn parseFromBinary( } } +/// Returns Platform composed from the first encountered build version type load command: +/// either LC_BUILD_VERSION or LC_VERSION_MIN_*. +pub fn getPlatform(self: Dylib, data: []align(@alignOf(u64)) const u8) ?Platform { + var it = LoadCommandIterator{ + .ncmds = self.header.?.ncmds, + .buffer = data[@sizeOf(macho.mach_header_64)..][0..self.header.?.sizeofcmds], + }; + while (it.next()) |cmd| { + switch (cmd.cmd()) { + .BUILD_VERSION, + .VERSION_MIN_MACOSX, + .VERSION_MIN_IPHONEOS, + .VERSION_MIN_TVOS, + .VERSION_MIN_WATCHOS, + => return Platform.fromLoadCommand(cmd), + else => {}, + } + } else return null; +} + fn addObjCClassSymbol(self: *Dylib, allocator: Allocator, sym_name: []const u8) !void { const expanded = &[_][]const u8{ try std.fmt.allocPrint(allocator, "_OBJC_CLASS_$_{s}", .{sym_name}), @@ -212,27 +232,27 @@ fn addWeakSymbol(self: *Dylib, allocator: Allocator, sym_name: []const u8) !void try self.symbols.putNoClobber(allocator, try allocator.dupe(u8, sym_name), true); } -const TargetMatcher = struct { +pub const TargetMatcher = struct { allocator: Allocator, - target: CrossTarget, + cpu_arch: std.Target.Cpu.Arch, + os_tag: std.Target.Os.Tag, + abi: std.Target.Abi, target_strings: std.ArrayListUnmanaged([]const u8) = .{}, - pub fn init(allocator: Allocator, target: CrossTarget) !TargetMatcher { + pub fn init(allocator: Allocator, target: std.Target) !TargetMatcher { var self = TargetMatcher{ .allocator = allocator, - .target = target, + .cpu_arch = target.cpu.arch, + .os_tag = target.os.tag, + .abi = target.abi, }; - const apple_string = try targetToAppleString(allocator, target); + const apple_string = try toAppleTargetTriple(allocator, self.cpu_arch, self.os_tag, self.abi); try self.target_strings.append(allocator, apple_string); - const abi = target.abi orelse .none; - if (abi == .simulator) { + if (self.abi == .simulator) { // For Apple simulator targets, linking gets tricky as we need to link against the simulator // hosts dylibs too. - const host_target = try targetToAppleString(allocator, .{ - .cpu_arch = target.cpu_arch.?, - .os_tag = .macos, - }); + const host_target = try toAppleTargetTriple(allocator, self.cpu_arch, .macos, .none); try self.target_strings.append(allocator, host_target); } @@ -246,7 +266,7 @@ const TargetMatcher = struct { self.target_strings.deinit(self.allocator); } - inline fn cpuArchToAppleString(cpu_arch: std.Target.Cpu.Arch) []const u8 { + inline fn fmtCpuArch(cpu_arch: std.Target.Cpu.Arch) []const u8 { return switch (cpu_arch) { .aarch64 => "arm64", .x86_64 => "x86_64", @@ -254,7 +274,7 @@ const TargetMatcher = struct { }; } - inline fn abiToAppleString(abi: std.Target.Abi) ?[]const u8 { + inline fn fmtAbi(abi: std.Target.Abi) ?[]const u8 { return switch (abi) { .none => null, .simulator => "simulator", @@ -263,14 +283,18 @@ const TargetMatcher = struct { }; } - pub fn targetToAppleString(allocator: Allocator, target: CrossTarget) ![]const u8 { - const cpu_arch = cpuArchToAppleString(target.cpu_arch.?); - const os_tag = @tagName(target.os_tag.?); - const target_abi = abiToAppleString(target.abi orelse .none); - if (target_abi) |abi| { - return std.fmt.allocPrint(allocator, "{s}-{s}-{s}", .{ cpu_arch, os_tag, abi }); + pub fn toAppleTargetTriple( + allocator: Allocator, + cpu_arch: std.Target.Cpu.Arch, + os_tag: std.Target.Os.Tag, + abi: std.Target.Abi, + ) ![]const u8 { + const cpu_arch_s = fmtCpuArch(cpu_arch); + const os_tag_s = @tagName(os_tag); + if (fmtAbi(abi)) |abi_s| { + return std.fmt.allocPrint(allocator, "{s}-{s}-{s}", .{ cpu_arch_s, os_tag_s, abi_s }); } - return std.fmt.allocPrint(allocator, "{s}-{s}", .{ cpu_arch, os_tag }); + return std.fmt.allocPrint(allocator, "{s}-{s}", .{ cpu_arch_s, os_tag_s }); } fn hasValue(stack: []const []const u8, needle: []const u8) bool { @@ -280,7 +304,7 @@ const TargetMatcher = struct { return false; } - fn matchesTarget(self: TargetMatcher, targets: []const []const u8) bool { + pub fn matchesTarget(self: TargetMatcher, targets: []const []const u8) bool { for (self.target_strings.items) |t| { if (hasValue(targets, t)) return true; } @@ -288,26 +312,7 @@ const TargetMatcher = struct { } fn matchesArch(self: TargetMatcher, archs: []const []const u8) bool { - return hasValue(archs, cpuArchToAppleString(self.target.cpu_arch.?)); - } - - pub fn matchesTargetTbd(self: TargetMatcher, tbd: Tbd) !bool { - var arena = std.heap.ArenaAllocator.init(self.allocator); - defer arena.deinit(); - - const targets = switch (tbd) { - .v3 => |v3| blk: { - var targets = std.ArrayList([]const u8).init(arena.allocator()); - for (v3.archs) |arch| { - const target = try std.fmt.allocPrint(arena.allocator(), "{s}-{s}", .{ arch, v3.platform }); - try targets.append(target); - } - break :blk targets.items; - }, - .v4 => |v4| v4.targets, - }; - - return self.matchesTarget(targets); + return hasValue(archs, fmtCpuArch(self.cpu_arch)); } }; @@ -342,15 +347,16 @@ pub fn parseFromStub( log.debug(" (install_name '{s}')", .{umbrella_lib.installName()}); - var matcher = try TargetMatcher.init(allocator, .{ - .cpu_arch = target.cpu.arch, - .os_tag = target.os.tag, - .abi = target.abi, - }); + var matcher = try TargetMatcher.init(allocator, target); defer matcher.deinit(); for (lib_stub.inner, 0..) |elem, stub_index| { - if (!(try matcher.matchesTargetTbd(elem))) continue; + const targets = try elem.targets(allocator); + defer { + for (targets) |t| allocator.free(t); + allocator.free(targets); + } + if (!matcher.matchesTarget(targets)) continue; if (stub_index > 0) { // TODO I thought that we could switch on presence of `parent-umbrella` map; @@ -541,8 +547,8 @@ const fat = @import("fat.zig"); const tapi = @import("../tapi.zig"); const Allocator = mem.Allocator; -const CrossTarget = std.zig.CrossTarget; const LibStub = tapi.LibStub; const LoadCommandIterator = macho.LoadCommandIterator; const MachO = @import("../MachO.zig"); +const Platform = @import("load_commands.zig").Platform; const Tbd = tapi.Tbd; diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index 92f2899d8b..43c87cf092 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -940,7 +940,7 @@ pub fn parseDwarfInfo(self: Object) DwarfInfo { return di; } -/// Returns Options.Platform composed from the first encountered build version type load command: +/// Returns Platform composed from the first encountered build version type load command: /// either LC_BUILD_VERSION or LC_VERSION_MIN_*. pub fn getPlatform(self: Object) ?Platform { var it = LoadCommandIterator{ diff --git a/src/link/MachO/load_commands.zig b/src/link/MachO/load_commands.zig index 5d3a84c87f..50580d0275 100644 --- a/src/link/MachO/load_commands.zig +++ b/src/link/MachO/load_commands.zig @@ -77,7 +77,7 @@ fn calcLCsSize(gpa: Allocator, options: *const link.Options, ctx: CalcLCsSizeCtx // LC_SOURCE_VERSION sizeofcmds += @sizeOf(macho.source_version_command); // LC_BUILD_VERSION or LC_VERSION_MIN_ - if (Platform.fromOptions(options).isBuildVersionCompatible()) { + if (Platform.fromTarget(options.target).isBuildVersionCompatible()) { // LC_BUILD_VERSION sizeofcmds += @sizeOf(macho.build_version_command) + @sizeOf(macho.build_tool_version); } else { @@ -353,11 +353,11 @@ pub const Platform = struct { } } - pub fn fromOptions(options: *const link.Options) Platform { + pub fn fromTarget(target: std.Target) Platform { return .{ - .os_tag = options.target.os.tag, - .abi = options.target.abi, - .version = options.target.os.version_range.semver.min, + .os_tag = target.os.tag, + .abi = target.abi, + .version = target.os.version_range.semver.min, }; } @@ -383,6 +383,28 @@ pub const Platform = struct { } return false; } + + pub fn fmtTarget(plat: Platform) std.fmt.Formatter(formatTarget) { + return .{ .data = plat }; + } + + pub fn formatTarget( + plat: Platform, + comptime unused_fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, + ) !void { + _ = unused_fmt_string; + _ = options; + try writer.print("{s}", .{@tagName(plat.os_tag)}); + if (plat.abi != .none) { + try writer.print("-{s}", .{@tagName(plat.abi)}); + } + } + + pub fn eqlTarget(plat: Platform, other: Platform) bool { + return plat.os_tag == other.os_tag and plat.abi == other.abi; + } }; const SupportedPlatforms = struct { diff --git a/src/link/MachO/zld.zig b/src/link/MachO/zld.zig index 7c1b870ed5..6dbc361c28 100644 --- a/src/link/MachO/zld.zig +++ b/src/link/MachO/zld.zig @@ -347,11 +347,17 @@ pub fn linkWithZld( var parse_error_ctx: struct { detected_arch: std.Target.Cpu.Arch, - detected_os: std.Target.Os.Tag, + detected_platform: ?Platform, + detected_stub_targets: []const []const u8, } = .{ .detected_arch = undefined, - .detected_os = undefined, + .detected_platform = null, + .detected_stub_targets = &[0][]const u8{}, }; + defer { + for (parse_error_ctx.detected_stub_targets) |t| gpa.free(t); + gpa.free(parse_error_ctx.detected_stub_targets); + } for (positionals.items) |obj| { const in_file = try std.fs.cwd().openFile(obj.path, .{}); @@ -363,25 +369,7 @@ pub fn linkWithZld( obj.must_link, &dependent_libs, &parse_error_ctx, - ) catch |err| switch (err) { - error.DylibAlreadyExists => {}, - error.UnknownFileType => try macho_file.reportParseError(obj.path, "unknown file type", .{}), - error.MissingArchFatLib => try macho_file.reportParseError( - obj.path, - "missing architecture in universal file, expected '{s}'", - .{@tagName(cpu_arch)}, - ), - error.InvalidArch => try macho_file.reportParseError( - obj.path, - "invalid architecture '{s}', expected '{s}'", - .{ @tagName(parse_error_ctx.detected_arch), @tagName(cpu_arch) }, - ), - else => |e| try macho_file.reportParseError( - obj.path, - "parsing positional argument failed with error '{s}'", - .{@errorName(e)}, - ), - }; + ) catch |err| try macho_file.handleAndReportParseError(obj.path, err, parse_error_ctx); } for (libs.keys(), libs.values()) |path, lib| { @@ -395,25 +383,7 @@ pub fn linkWithZld( false, &dependent_libs, &parse_error_ctx, - ) catch |err| switch (err) { - error.DylibAlreadyExists => {}, - error.UnknownFileType => try macho_file.reportParseError(path, "unknown file type", .{}), - error.MissingArchFatLib => try macho_file.reportParseError( - path, - "missing architecture in universal file, expected '{s}'", - .{@tagName(cpu_arch)}, - ), - error.InvalidArch => try macho_file.reportParseError( - path, - "invalid architecture '{s}', expected '{s}'", - .{ @tagName(parse_error_ctx.detected_arch), @tagName(cpu_arch) }, - ), - else => |e| try macho_file.reportParseError( - path, - "parsing library failed with error '{s}'", - .{@errorName(e)}, - ), - }; + ) catch |err| try macho_file.handleAndReportParseError(path, err, parse_error_ctx); } macho_file.parseDependentLibs(&dependent_libs, &parse_error_ctx) catch |err| { @@ -590,7 +560,7 @@ pub fn linkWithZld( .version = 0, }); { - const platform = load_commands.Platform.fromOptions(&macho_file.base.options); + const platform = Platform.fromTarget(macho_file.base.options.target); const sdk_version: ?std.SemanticVersion = if (macho_file.base.options.sysroot) |path| load_commands.inferSdkVersionFromSdkPath(path) else @@ -1252,6 +1222,7 @@ const MachO = @import("../MachO.zig"); const Md5 = std.crypto.hash.Md5; const LibStub = @import("../tapi.zig").LibStub; const Object = @import("Object.zig"); +const Platform = load_commands.Platform; const Section = MachO.Section; const StringTable = @import("../strtab.zig").StringTable; const SymbolWithLoc = MachO.SymbolWithLoc; diff --git a/src/link/tapi.zig b/src/link/tapi.zig index f9ffd43d62..6fc62e585d 100644 --- a/src/link/tapi.zig +++ b/src/link/tapi.zig @@ -81,6 +81,30 @@ pub const Tbd = union(enum) { v3: TbdV3, v4: TbdV4, + /// Caller owns memory. + pub fn targets(self: Tbd, gpa: Allocator) error{OutOfMemory}![]const []const u8 { + var out = std.ArrayList([]const u8).init(gpa); + defer out.deinit(); + + switch (self) { + .v3 => |v3| { + try out.ensureTotalCapacityPrecise(v3.archs.len); + for (v3.archs) |arch| { + const target = try std.fmt.allocPrint(gpa, "{s}-{s}", .{ arch, v3.platform }); + out.appendAssumeCapacity(target); + } + }, + .v4 => |v4| { + try out.ensureTotalCapacityPrecise(v4.targets.len); + for (v4.targets) |t| { + out.appendAssumeCapacity(try gpa.dupe(u8, t)); + } + }, + } + + return out.toOwnedSlice(); + } + pub fn currentVersion(self: Tbd) ?VersionField { return switch (self) { .v3 => |v3| v3.current_version, From 7e167537c032133b416f36425e29c028c10a9462 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Tue, 29 Aug 2023 22:16:48 +0200 Subject: [PATCH 48/57] macho: simplify handling and reporting parsing errors --- src/link/MachO.zig | 205 +++++++++++++++---------------- src/link/MachO/fat.zig | 14 ++- src/link/MachO/load_commands.zig | 25 +++- src/link/MachO/zld.zig | 28 ++--- 4 files changed, 137 insertions(+), 135 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 127e9f7027..b5eb7910e0 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -401,35 +401,23 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No parent: u16, }, .Dynamic).init(arena); - var parse_error_ctx: struct { - detected_arch: std.Target.Cpu.Arch, - detected_platform: ?Platform, - detected_stub_targets: []const []const u8, - } = .{ - .detected_arch = undefined, - .detected_platform = null, - .detected_stub_targets = &[0][]const u8{}, - }; - defer { - for (parse_error_ctx.detected_stub_targets) |target| self.base.allocator.free(target); - self.base.allocator.free(parse_error_ctx.detected_stub_targets); - } + var parse_ctx = ParseErrorCtx.init(arena); for (libs.keys(), libs.values()) |path, lib| { const in_file = try std.fs.cwd().openFile(path, .{}); defer in_file.close(); - + defer parse_ctx.detected_targets.clearRetainingCapacity(); self.parseLibrary( in_file, path, lib, false, &dependent_libs, - &parse_error_ctx, - ) catch |err| try self.handleAndReportParseError(path, err, parse_error_ctx); + &parse_ctx, + ) catch |err| try self.handleAndReportParseError(path, err, &parse_ctx); } - self.parseDependentLibs(&dependent_libs, &parse_error_ctx) catch |err| { + self.parseDependentLibs(&dependent_libs, &parse_ctx) catch |err| { // TODO convert to error log.err("parsing dependent libraries failed with err {s}", .{@errorName(err)}); }; @@ -727,9 +715,7 @@ fn resolveLib( const ParseError = error{ UnknownFileType, - MissingArchFatLib, InvalidTarget, - InvalidLibStubTargets, DylibAlreadyExists, IncompatibleDylibVersion, OutOfMemory, @@ -748,19 +734,19 @@ pub fn parsePositional( path: []const u8, must_link: bool, dependent_libs: anytype, - error_ctx: anytype, + ctx: *ParseErrorCtx, ) ParseError!void { const tracy = trace(@src()); defer tracy.end(); if (Object.isObject(file)) { - try self.parseObject(file, path, error_ctx); + try self.parseObject(file, path, ctx); } else { try self.parseLibrary(file, path, .{ .path = null, .needed = false, .weak = false, - }, must_link, dependent_libs, error_ctx); + }, must_link, dependent_libs, ctx); } } @@ -768,7 +754,7 @@ fn parseObject( self: *MachO, file: std.fs.File, path: []const u8, - error_ctx: anytype, + ctx: *ParseErrorCtx, ) ParseError!void { const tracy = trace(@src()); defer tracy.end(); @@ -790,20 +776,21 @@ fn parseObject( errdefer object.deinit(gpa); try object.parse(gpa); - const cpu_arch: std.Target.Cpu.Arch = switch (object.header.cputype) { + const detected_cpu_arch: std.Target.Cpu.Arch = switch (object.header.cputype) { macho.CPU_TYPE_ARM64 => .aarch64, macho.CPU_TYPE_X86_64 => .x86_64, else => unreachable, }; - error_ctx.detected_arch = cpu_arch; + const detected_platform = object.getPlatform(); + const this_cpu_arch = self.base.options.target.cpu.arch; + const this_platform = Platform.fromTarget(self.base.options.target); - if (object.getPlatform()) |platform| { - error_ctx.detected_platform = platform; - } - - if (self.base.options.target.cpu.arch != cpu_arch) return error.InvalidTarget; - if (error_ctx.detected_platform) |platform| { - if (!Platform.fromTarget(self.base.options.target).eqlTarget(platform)) return error.InvalidTarget; + if (this_cpu_arch != detected_cpu_arch or + (detected_platform != null and !detected_platform.?.eqlTarget(this_platform))) + { + const platform = detected_platform orelse this_platform; + try ctx.detected_targets.append(try platform.allocPrintTarget(ctx.arena, detected_cpu_arch)); + return error.InvalidTarget; } try self.objects.append(gpa, object); @@ -816,48 +803,61 @@ pub fn parseLibrary( lib: link.SystemLib, must_link: bool, dependent_libs: anytype, - error_ctx: anytype, + ctx: *ParseErrorCtx, ) ParseError!void { const tracy = trace(@src()); defer tracy.end(); if (fat.isFatLibrary(file)) { - const offset = try self.parseFatLibrary(file, self.base.options.target.cpu.arch); + const offset = try self.parseFatLibrary(file, self.base.options.target.cpu.arch, ctx); try file.seekTo(offset); if (Archive.isArchive(file, offset)) { - try self.parseArchive(path, offset, must_link, error_ctx); + try self.parseArchive(path, offset, must_link, ctx); } else if (Dylib.isDylib(file, offset)) { try self.parseDylib(file, path, offset, dependent_libs, .{ .needed = lib.needed, .weak = lib.weak, - }, error_ctx); + }, ctx); } else return error.UnknownFileType; } else if (Archive.isArchive(file, 0)) { - try self.parseArchive(path, 0, must_link, error_ctx); + try self.parseArchive(path, 0, must_link, ctx); } else if (Dylib.isDylib(file, 0)) { try self.parseDylib(file, path, 0, dependent_libs, .{ .needed = lib.needed, .weak = lib.weak, - }, error_ctx); + }, ctx); } else { self.parseLibStub(file, path, dependent_libs, .{ .needed = lib.needed, .weak = lib.weak, - }, error_ctx) catch |err| switch (err) { + }, ctx) catch |err| switch (err) { error.NotLibStub, error.UnexpectedToken => return error.UnknownFileType, else => |e| return e, }; } } -pub fn parseFatLibrary(self: *MachO, file: std.fs.File, cpu_arch: std.Target.Cpu.Arch) ParseError!u64 { - _ = self; - var buffer: [2]fat.Arch = undefined; - const fat_archs = try fat.parseArchs(file, &buffer); +pub fn parseFatLibrary( + self: *MachO, + file: std.fs.File, + cpu_arch: std.Target.Cpu.Arch, + ctx: *ParseErrorCtx, +) ParseError!u64 { + const gpa = self.base.allocator; + + const fat_archs = try fat.parseArchs(gpa, file); + defer gpa.free(fat_archs); + const offset = for (fat_archs) |arch| { if (arch.tag == cpu_arch) break arch.offset; - } else return error.MissingArchFatLib; + } else { + try ctx.detected_targets.ensureTotalCapacityPrecise(fat_archs.len); + for (fat_archs) |arch| { + ctx.detected_targets.appendAssumeCapacity(try ctx.arena.dupe(u8, @tagName(arch.tag))); + } + return error.InvalidTarget; + }; return offset; } @@ -866,7 +866,7 @@ fn parseArchive( path: []const u8, fat_offset: u64, must_link: bool, - error_ctx: anytype, + ctx: *ParseErrorCtx, ) ParseError!void { const gpa = self.base.allocator; @@ -892,20 +892,21 @@ fn parseArchive( var object = try archive.parseObject(gpa, off); // TODO we are doing all this work to pull the header only! defer object.deinit(gpa); - const cpu_arch: std.Target.Cpu.Arch = switch (object.header.cputype) { + const detected_cpu_arch: std.Target.Cpu.Arch = switch (object.header.cputype) { macho.CPU_TYPE_ARM64 => .aarch64, macho.CPU_TYPE_X86_64 => .x86_64, else => unreachable, }; - error_ctx.detected_arch = cpu_arch; + const detected_platform = object.getPlatform(); + const this_cpu_arch = self.base.options.target.cpu.arch; + const this_platform = Platform.fromTarget(self.base.options.target); - if (object.getPlatform()) |platform| { - error_ctx.detected_platform = platform; - } - - if (self.base.options.target.cpu.arch != cpu_arch) return error.InvalidTarget; - if (error_ctx.detected_platform) |platform| { - if (!Platform.fromTarget(self.base.options.target).eqlTarget(platform)) return error.InvalidTarget; + if (this_cpu_arch != detected_cpu_arch or + (detected_platform != null and !detected_platform.?.eqlTarget(this_platform))) + { + const platform = detected_platform orelse this_platform; + try ctx.detected_targets.append(try platform.allocPrintTarget(gpa, detected_cpu_arch)); + return error.InvalidTarget; } } @@ -941,7 +942,7 @@ fn parseDylib( offset: u64, dependent_libs: anytype, dylib_options: DylibOpts, - error_ctx: anytype, + ctx: *ParseErrorCtx, ) ParseError!void { const gpa = self.base.allocator; const file_stat = try file.stat(); @@ -961,20 +962,21 @@ fn parseDylib( contents, ); - const cpu_arch: std.Target.Cpu.Arch = switch (dylib.header.?.cputype) { + const detected_cpu_arch: std.Target.Cpu.Arch = switch (dylib.header.?.cputype) { macho.CPU_TYPE_ARM64 => .aarch64, macho.CPU_TYPE_X86_64 => .x86_64, else => unreachable, }; - error_ctx.detected_arch = cpu_arch; + const detected_platform = dylib.getPlatform(contents); + const this_cpu_arch = self.base.options.target.cpu.arch; + const this_platform = Platform.fromTarget(self.base.options.target); - if (dylib.getPlatform(contents)) |platform| { - error_ctx.detected_platform = platform; - } - - if (self.base.options.target.cpu.arch != cpu_arch) return error.InvalidTarget; - if (error_ctx.detected_platform) |platform| { - if (!Platform.fromTarget(self.base.options.target).eqlTarget(platform)) return error.InvalidTarget; + if (this_cpu_arch != detected_cpu_arch or + (detected_platform != null and !detected_platform.?.eqlTarget(this_platform))) + { + const platform = detected_platform orelse this_platform; + try ctx.detected_targets.append(try platform.allocPrintTarget(ctx.arena, detected_cpu_arch)); + return error.InvalidTarget; } try self.addDylib(dylib, .{ @@ -989,7 +991,7 @@ fn parseLibStub( path: []const u8, dependent_libs: anytype, dylib_options: DylibOpts, - error_ctx: anytype, + ctx: *ParseErrorCtx, ) ParseError!void { const gpa = self.base.allocator; var lib_stub = try LibStub.loadFromFile(gpa, file); @@ -1004,12 +1006,17 @@ fn parseLibStub( const first_tbd = lib_stub.inner[0]; const targets = try first_tbd.targets(gpa); - if (!matcher.matchesTarget(targets)) { - error_ctx.detected_stub_targets = targets; - return error.InvalidLibStubTargets; + defer { + for (targets) |t| gpa.free(t); + gpa.free(targets); + } + if (!matcher.matchesTarget(targets)) { + try ctx.detected_targets.ensureUnusedCapacity(targets.len); + for (targets) |t| { + ctx.detected_targets.appendAssumeCapacity(try ctx.arena.dupe(u8, t)); + } + return error.InvalidTarget; } - for (targets) |t| gpa.free(t); - gpa.free(targets); } var dylib = Dylib{ .weak = dylib_options.weak }; @@ -1059,7 +1066,7 @@ fn addDylib(self: *MachO, dylib: Dylib, dylib_options: DylibOpts) ParseError!voi } } -pub fn parseDependentLibs(self: *MachO, dependent_libs: anytype, error_ctx: anytype) ParseError!void { +pub fn parseDependentLibs(self: *MachO, dependent_libs: anytype, ctx: *ParseErrorCtx) ParseError!void { const tracy = trace(@src()); defer tracy.end(); @@ -1105,7 +1112,7 @@ pub fn parseDependentLibs(self: *MachO, dependent_libs: anytype, error_ctx: anyt log.debug("trying dependency at fully resolved path {s}", .{full_path}); const offset: u64 = if (fat.isFatLibrary(file)) blk: { - const offset = try self.parseFatLibrary(file, self.base.options.target.cpu.arch); + const offset = try self.parseFatLibrary(file, self.base.options.target.cpu.arch, ctx); try file.seekTo(offset); break :blk offset; } else 0; @@ -1114,12 +1121,12 @@ pub fn parseDependentLibs(self: *MachO, dependent_libs: anytype, error_ctx: anyt try self.parseDylib(file, full_path, offset, dependent_libs, .{ .dependent = true, .weak = weak, - }, error_ctx); + }, ctx); } else { self.parseLibStub(file, full_path, dependent_libs, .{ .dependent = true, .weak = weak, - }, error_ctx) catch |err| switch (err) { + }, ctx) catch |err| switch (err) { error.NotLibStub, error.UnexpectedToken => continue, else => |e| return e, }; @@ -4845,50 +4852,40 @@ pub fn getSectionPrecedence(header: macho.section_64) u8 { return (@as(u8, @intCast(segment_precedence)) << 4) + section_precedence; } -pub fn handleAndReportParseError(self: *MachO, path: []const u8, err: ParseError, parse_error_ctx: anytype) !void { +pub const ParseErrorCtx = struct { + arena: Allocator, + detected_targets: std.ArrayList([]const u8), + + pub fn init(arena: Allocator) ParseErrorCtx { + return .{ .arena = arena, .detected_targets = std.ArrayList([]const u8).init(arena) }; + } +}; + +pub fn handleAndReportParseError( + self: *MachO, + path: []const u8, + err: ParseError, + ctx: *const ParseErrorCtx, +) !void { const cpu_arch = self.base.options.target.cpu.arch; switch (err) { error.DylibAlreadyExists => {}, error.UnknownFileType => try self.reportParseError(path, "unknown file type", .{}), - error.MissingArchFatLib => try self.reportParseError( - path, - "missing architecture in universal file, expected '{s}'", - .{@tagName(cpu_arch)}, - ), - error.InvalidTarget => if (parse_error_ctx.detected_platform) |platform| { - try self.reportParseError(path, "invalid target '{s}-{}', expected '{s}-{}'", .{ - @tagName(parse_error_ctx.detected_arch), - platform.fmtTarget(), - @tagName(cpu_arch), - Platform.fromTarget(self.base.options.target).fmtTarget(), - }); - } else { - try self.reportParseError( - path, - "invalid architecture '{s}', expected '{s}'", - .{ @tagName(parse_error_ctx.detected_arch), @tagName(cpu_arch) }, - ); - }, - error.InvalidLibStubTargets => { + error.InvalidTarget => { var targets_string = std.ArrayList(u8).init(self.base.allocator); defer targets_string.deinit(); try targets_string.writer().writeAll("("); - for (parse_error_ctx.detected_stub_targets) |t| { + for (ctx.detected_targets.items) |t| { try targets_string.writer().print("{s}, ", .{t}); } try targets_string.resize(targets_string.items.len - 2); try targets_string.writer().writeAll(")"); - try self.reportParseError(path, "invalid targets '{s}', expected '{s}-{}'", .{ + try self.reportParseError(path, "invalid target: expected '{}', but found '{s}'", .{ + Platform.fromTarget(self.base.options.target).fmtTarget(cpu_arch), targets_string.items, - @tagName(cpu_arch), - Platform.fromTarget(self.base.options.target).fmtTarget(), }); }, - else => |e| try self.reportParseError( - path, - "parsing positional argument failed with error '{s}'", - .{@errorName(e)}, - ), + else => |e| try self.reportParseError(path, "{s}: parsing object failed", .{@errorName(e)}), } } diff --git a/src/link/MachO/fat.zig b/src/link/MachO/fat.zig index 6dd32e2251..bc896704b8 100644 --- a/src/link/MachO/fat.zig +++ b/src/link/MachO/fat.zig @@ -10,12 +10,15 @@ pub const Arch = struct { offset: u64, }; -pub fn parseArchs(file: std.fs.File, buffer: *[2]Arch) ![]const Arch { +/// Caller owns the memory. +pub fn parseArchs(gpa: Allocator, file: std.fs.File) ![]const Arch { const reader = file.reader(); const fat_header = try reader.readStructBig(macho.fat_header); assert(fat_header.magic == macho.FAT_MAGIC); - var count: usize = 0; + var archs = try std.ArrayList(Arch).initCapacity(gpa, fat_header.nfat_arch); + defer archs.deinit(); + var fat_arch_index: u32 = 0; while (fat_arch_index < fat_header.nfat_arch) : (fat_arch_index += 1) { const fat_arch = try reader.readStructBig(macho.fat_arch); @@ -26,11 +29,11 @@ pub fn parseArchs(file: std.fs.File, buffer: *[2]Arch) ![]const Arch { macho.CPU_TYPE_X86_64 => if (fat_arch.cpusubtype == macho.CPU_SUBTYPE_X86_64_ALL) .x86_64 else continue, else => continue, }; - buffer[count] = .{ .tag = arch, .offset = fat_arch.offset }; - count += 1; + + archs.appendAssumeCapacity(.{ .tag = arch, .offset = fat_arch.offset }); } - return buffer[0..count]; + return archs.toOwnedSlice(); } const std = @import("std"); @@ -38,3 +41,4 @@ const assert = std.debug.assert; const log = std.log.scoped(.archive); const macho = std.macho; const mem = std.mem; +const Allocator = mem.Allocator; diff --git a/src/link/MachO/load_commands.zig b/src/link/MachO/load_commands.zig index 50580d0275..afad9d7884 100644 --- a/src/link/MachO/load_commands.zig +++ b/src/link/MachO/load_commands.zig @@ -384,24 +384,37 @@ pub const Platform = struct { return false; } - pub fn fmtTarget(plat: Platform) std.fmt.Formatter(formatTarget) { - return .{ .data = plat }; + pub fn fmtTarget(plat: Platform, cpu_arch: std.Target.Cpu.Arch) std.fmt.Formatter(formatTarget) { + return .{ .data = .{ .platform = plat, .cpu_arch = cpu_arch } }; } + const FmtCtx = struct { + platform: Platform, + cpu_arch: std.Target.Cpu.Arch, + }; + pub fn formatTarget( - plat: Platform, + ctx: FmtCtx, comptime unused_fmt_string: []const u8, options: std.fmt.FormatOptions, writer: anytype, ) !void { _ = unused_fmt_string; _ = options; - try writer.print("{s}", .{@tagName(plat.os_tag)}); - if (plat.abi != .none) { - try writer.print("-{s}", .{@tagName(plat.abi)}); + try writer.print("{s}-{s}", .{ @tagName(ctx.cpu_arch), @tagName(ctx.platform.os_tag) }); + if (ctx.platform.abi != .none) { + try writer.print("-{s}", .{@tagName(ctx.platform.abi)}); } } + /// Caller owns the memory. + pub fn allocPrintTarget(plat: Platform, gpa: Allocator, cpu_arch: std.Target.Cpu.Arch) error{OutOfMemory}![]u8 { + var buffer = std.ArrayList(u8).init(gpa); + defer buffer.deinit(); + try buffer.writer().print("{}", .{plat.fmtTarget(cpu_arch)}); + return buffer.toOwnedSlice(); + } + pub fn eqlTarget(plat: Platform, other: Platform) bool { return plat.os_tag == other.os_tag and plat.abi == other.abi; } diff --git a/src/link/MachO/zld.zig b/src/link/MachO/zld.zig index 6dbc361c28..7b53301eff 100644 --- a/src/link/MachO/zld.zig +++ b/src/link/MachO/zld.zig @@ -345,48 +345,36 @@ pub fn linkWithZld( parent: u16, }, .Dynamic).init(arena); - var parse_error_ctx: struct { - detected_arch: std.Target.Cpu.Arch, - detected_platform: ?Platform, - detected_stub_targets: []const []const u8, - } = .{ - .detected_arch = undefined, - .detected_platform = null, - .detected_stub_targets = &[0][]const u8{}, - }; - defer { - for (parse_error_ctx.detected_stub_targets) |t| gpa.free(t); - gpa.free(parse_error_ctx.detected_stub_targets); - } + var parse_ctx = MachO.ParseErrorCtx.init(arena); for (positionals.items) |obj| { const in_file = try std.fs.cwd().openFile(obj.path, .{}); defer in_file.close(); - + defer parse_ctx.detected_targets.clearRetainingCapacity(); macho_file.parsePositional( in_file, obj.path, obj.must_link, &dependent_libs, - &parse_error_ctx, - ) catch |err| try macho_file.handleAndReportParseError(obj.path, err, parse_error_ctx); + &parse_ctx, + ) catch |err| try macho_file.handleAndReportParseError(obj.path, err, &parse_ctx); } for (libs.keys(), libs.values()) |path, lib| { const in_file = try std.fs.cwd().openFile(path, .{}); defer in_file.close(); - + defer parse_ctx.detected_targets.clearRetainingCapacity(); macho_file.parseLibrary( in_file, path, lib, false, &dependent_libs, - &parse_error_ctx, - ) catch |err| try macho_file.handleAndReportParseError(path, err, parse_error_ctx); + &parse_ctx, + ) catch |err| try macho_file.handleAndReportParseError(path, err, &parse_ctx); } - macho_file.parseDependentLibs(&dependent_libs, &parse_error_ctx) catch |err| { + macho_file.parseDependentLibs(&dependent_libs, &parse_ctx) catch |err| { // TODO convert to error log.err("parsing dependent libraries failed with err {s}", .{@errorName(err)}); }; From ea9f2513a3b57aa4fda7825ca407672ce8d9da31 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Tue, 29 Aug 2023 22:26:58 +0200 Subject: [PATCH 49/57] macho: format parse error for fat libs to include CPU archs only --- src/link/MachO.zig | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index b5eb7910e0..59781f38af 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -716,6 +716,7 @@ fn resolveLib( const ParseError = error{ UnknownFileType, InvalidTarget, + InvalidTargetFatLibrary, DylibAlreadyExists, IncompatibleDylibVersion, OutOfMemory, @@ -856,7 +857,7 @@ pub fn parseFatLibrary( for (fat_archs) |arch| { ctx.detected_targets.appendAssumeCapacity(try ctx.arena.dupe(u8, @tagName(arch.tag))); } - return error.InvalidTarget; + return error.InvalidTargetFatLibrary; }; return offset; } @@ -4871,7 +4872,7 @@ pub fn handleAndReportParseError( switch (err) { error.DylibAlreadyExists => {}, error.UnknownFileType => try self.reportParseError(path, "unknown file type", .{}), - error.InvalidTarget => { + error.InvalidTarget, error.InvalidTargetFatLibrary => { var targets_string = std.ArrayList(u8).init(self.base.allocator); defer targets_string.deinit(); try targets_string.writer().writeAll("("); @@ -4880,10 +4881,20 @@ pub fn handleAndReportParseError( } try targets_string.resize(targets_string.items.len - 2); try targets_string.writer().writeAll(")"); - try self.reportParseError(path, "invalid target: expected '{}', but found '{s}'", .{ - Platform.fromTarget(self.base.options.target).fmtTarget(cpu_arch), - targets_string.items, - }); + + switch (err) { + error.InvalidTarget => try self.reportParseError( + path, + "invalid target: expected '{}', but found '{s}'", + .{ Platform.fromTarget(self.base.options.target).fmtTarget(cpu_arch), targets_string.items }, + ), + error.InvalidTargetFatLibrary => try self.reportParseError( + path, + "invalid architecture in univeral library: expected '{s}', but found '{s}'", + .{ @tagName(cpu_arch), targets_string.items }, + ), + else => unreachable, + } }, else => |e| try self.reportParseError(path, "{s}: parsing object failed", .{@errorName(e)}), } From f21245f5e773c61a8d1f5fb91309faadf0d2f103 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Wed, 30 Aug 2023 12:30:17 +0200 Subject: [PATCH 50/57] macho: refactor resolving and parsing dependent dylibs --- src/link/MachO.zig | 148 ++++++++++++++++++++++----------------- src/link/MachO/Dylib.zig | 2 + src/link/MachO/zld.zig | 18 ++--- 3 files changed, 96 insertions(+), 72 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 59781f38af..937df96009 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -401,26 +401,25 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No parent: u16, }, .Dynamic).init(arena); - var parse_ctx = ParseErrorCtx.init(arena); - for (libs.keys(), libs.values()) |path, lib| { const in_file = try std.fs.cwd().openFile(path, .{}); defer in_file.close(); - defer parse_ctx.detected_targets.clearRetainingCapacity(); + + var parse_ctx = ParseErrorCtx.init(self.base.allocator); + defer parse_ctx.deinit(); + self.parseLibrary( in_file, path, lib, false, + false, &dependent_libs, &parse_ctx, ) catch |err| try self.handleAndReportParseError(path, err, &parse_ctx); } - self.parseDependentLibs(&dependent_libs, &parse_ctx) catch |err| { - // TODO convert to error - log.err("parsing dependent libraries failed with err {s}", .{@errorName(err)}); - }; + try self.parseDependentLibs(&dependent_libs); } var actions = std.ArrayList(ResolveAction).init(self.base.allocator); @@ -674,7 +673,7 @@ fn resolveLibSystemInDirs(arena: Allocator, dirs: []const []const u8, out_libs: // Try stub file first. If we hit it, then we're done as the stub file // re-exports every single symbol definition. for (dirs) |dir| { - if (try resolveLib(arena, dir, "System", ".tbd")) |full_path| { + if (try resolveLib(arena, dir, "libSystem", ".tbd")) |full_path| { try out_libs.put(full_path, .{ .needed = true, .weak = false, .path = full_path }); return true; } @@ -682,8 +681,8 @@ fn resolveLibSystemInDirs(arena: Allocator, dirs: []const []const u8, out_libs: // If we didn't hit the stub file, try .dylib next. However, libSystem.dylib // doesn't export libc.dylib which we'll need to resolve subsequently also. for (dirs) |dir| { - if (try resolveLib(arena, dir, "System", ".dylib")) |libsystem_path| { - if (try resolveLib(arena, dir, "c", ".dylib")) |libc_path| { + if (try resolveLib(arena, dir, "libSystem", ".dylib")) |libsystem_path| { + if (try resolveLib(arena, dir, "libc", ".dylib")) |libc_path| { try out_libs.put(libsystem_path, .{ .needed = true, .weak = false, .path = libsystem_path }); try out_libs.put(libc_path, .{ .needed = true, .weak = false, .path = libc_path }); return true; @@ -700,7 +699,7 @@ fn resolveLib( name: []const u8, ext: []const u8, ) !?[]const u8 { - const search_name = try std.fmt.allocPrint(arena, "lib{s}{s}", .{ name, ext }); + const search_name = try std.fmt.allocPrint(arena, "{s}{s}", .{ name, ext }); const full_path = try fs.path.join(arena, &[_][]const u8{ search_dir, search_name }); // Check if the file exists. @@ -747,7 +746,7 @@ pub fn parsePositional( .path = null, .needed = false, .weak = false, - }, must_link, dependent_libs, ctx); + }, must_link, false, dependent_libs, ctx); } } @@ -790,7 +789,7 @@ fn parseObject( (detected_platform != null and !detected_platform.?.eqlTarget(this_platform))) { const platform = detected_platform orelse this_platform; - try ctx.detected_targets.append(try platform.allocPrintTarget(ctx.arena, detected_cpu_arch)); + try ctx.detected_targets.append(try platform.allocPrintTarget(ctx.arena(), detected_cpu_arch)); return error.InvalidTarget; } @@ -803,6 +802,7 @@ pub fn parseLibrary( path: []const u8, lib: link.SystemLib, must_link: bool, + is_dependent: bool, dependent_libs: anytype, ctx: *ParseErrorCtx, ) ParseError!void { @@ -819,6 +819,7 @@ pub fn parseLibrary( try self.parseDylib(file, path, offset, dependent_libs, .{ .needed = lib.needed, .weak = lib.weak, + .dependent = is_dependent, }, ctx); } else return error.UnknownFileType; } else if (Archive.isArchive(file, 0)) { @@ -827,11 +828,13 @@ pub fn parseLibrary( try self.parseDylib(file, path, 0, dependent_libs, .{ .needed = lib.needed, .weak = lib.weak, + .dependent = is_dependent, }, ctx); } else { self.parseLibStub(file, path, dependent_libs, .{ .needed = lib.needed, .weak = lib.weak, + .dependent = is_dependent, }, ctx) catch |err| switch (err) { error.NotLibStub, error.UnexpectedToken => return error.UnknownFileType, else => |e| return e, @@ -853,9 +856,9 @@ pub fn parseFatLibrary( const offset = for (fat_archs) |arch| { if (arch.tag == cpu_arch) break arch.offset; } else { - try ctx.detected_targets.ensureTotalCapacityPrecise(fat_archs.len); + try ctx.detected_targets.ensureUnusedCapacity(fat_archs.len); for (fat_archs) |arch| { - ctx.detected_targets.appendAssumeCapacity(try ctx.arena.dupe(u8, @tagName(arch.tag))); + ctx.detected_targets.appendAssumeCapacity(try ctx.arena().dupe(u8, @tagName(arch.tag))); } return error.InvalidTargetFatLibrary; }; @@ -952,7 +955,7 @@ fn parseDylib( const contents = try file.readToEndAllocOptions(gpa, file_size, file_size, @alignOf(u64), null); defer gpa.free(contents); - var dylib = Dylib{ .weak = dylib_options.weak }; + var dylib = Dylib{ .path = try gpa.dupe(u8, path), .weak = dylib_options.weak }; errdefer dylib.deinit(gpa); try dylib.parseFromBinary( @@ -976,7 +979,7 @@ fn parseDylib( (detected_platform != null and !detected_platform.?.eqlTarget(this_platform))) { const platform = detected_platform orelse this_platform; - try ctx.detected_targets.append(try platform.allocPrintTarget(ctx.arena, detected_cpu_arch)); + try ctx.detected_targets.append(try platform.allocPrintTarget(ctx.arena(), detected_cpu_arch)); return error.InvalidTarget; } @@ -1014,13 +1017,13 @@ fn parseLibStub( if (!matcher.matchesTarget(targets)) { try ctx.detected_targets.ensureUnusedCapacity(targets.len); for (targets) |t| { - ctx.detected_targets.appendAssumeCapacity(try ctx.arena.dupe(u8, t)); + ctx.detected_targets.appendAssumeCapacity(try ctx.arena().dupe(u8, t)); } return error.InvalidTarget; } } - var dylib = Dylib{ .weak = dylib_options.weak }; + var dylib = Dylib{ .path = try gpa.dupe(u8, path), .weak = dylib_options.weak }; errdefer dylib.deinit(gpa); try dylib.parseFromStub( @@ -1067,7 +1070,7 @@ fn addDylib(self: *MachO, dylib: Dylib, dylib_options: DylibOpts) ParseError!voi } } -pub fn parseDependentLibs(self: *MachO, dependent_libs: anytype, ctx: *ParseErrorCtx) ParseError!void { +pub fn parseDependentLibs(self: *MachO, dependent_libs: anytype) !void { const tracy = trace(@src()); defer tracy.end(); @@ -1081,12 +1084,13 @@ pub fn parseDependentLibs(self: *MachO, dependent_libs: anytype, ctx: *ParseErro const arena = arena_alloc.allocator(); defer arena_alloc.deinit(); - outer: while (dependent_libs.readItem()) |dep_id| { + while (dependent_libs.readItem()) |dep_id| { defer dep_id.id.deinit(gpa); if (self.dylibs_map.contains(dep_id.id.name)) continue; - const weak = self.dylibs.items[dep_id.parent].weak; + const parent = &self.dylibs.items[dep_id.parent]; + const weak = parent.weak; const has_ext = blk: { const basename = fs.path.basename(dep_id.id.name); break :blk mem.lastIndexOfScalar(u8, basename, '.') != null; @@ -1097,46 +1101,50 @@ pub fn parseDependentLibs(self: *MachO, dependent_libs: anytype, ctx: *ParseErro break :blk dep_id.id.name[0..index]; } else dep_id.id.name; - for (&[_][]const u8{ extension, ".tbd" }) |ext| { - const with_ext = try std.fmt.allocPrint(arena, "{s}{s}", .{ without_ext, ext }); - const full_path = if (self.base.options.sysroot) |root| - try fs.path.join(arena, &.{ root, with_ext }) - else - with_ext; - - const file = std.fs.cwd().openFile(full_path, .{}) catch |err| switch (err) { - error.FileNotFound => continue, - else => |e| return e, - }; - defer file.close(); - - log.debug("trying dependency at fully resolved path {s}", .{full_path}); - - const offset: u64 = if (fat.isFatLibrary(file)) blk: { - const offset = try self.parseFatLibrary(file, self.base.options.target.cpu.arch, ctx); - try file.seekTo(offset); - break :blk offset; - } else 0; - - if (Dylib.isDylib(file, offset)) { - try self.parseDylib(file, full_path, offset, dependent_libs, .{ - .dependent = true, - .weak = weak, - }, ctx); - } else { - self.parseLibStub(file, full_path, dependent_libs, .{ - .dependent = true, - .weak = weak, - }, ctx) catch |err| switch (err) { - error.NotLibStub, error.UnexpectedToken => continue, - else => |e| return e, - }; + const maybe_full_path = full_path: { + if (self.base.options.sysroot) |root| { + for (&[_][]const u8{ extension, ".tbd" }) |ext| { + if (try resolveLib(arena, root, without_ext, ext)) |full_path| break :full_path full_path; + } } - continue :outer; - } - // TODO convert into an error - log.err("{s}: unable to resolve dependency", .{dep_id.id.name}); + for (&[_][]const u8{ extension, ".tbd" }) |ext| { + if (try resolveLib(arena, "", without_ext, ext)) |full_path| break :full_path full_path; + } + + break :full_path null; + }; + + const full_path = maybe_full_path orelse { + try self.misc_errors.ensureUnusedCapacity(gpa, 1); + var notes = try gpa.alloc(File.ErrorMsg, 1); + errdefer gpa.free(notes); + const parent_name = if (parent.id) |id| id.name else parent.path; + notes[0] = .{ .msg = try std.fmt.allocPrint(gpa, "a dependency of {s}", .{parent_name}) }; + self.misc_errors.appendAssumeCapacity(.{ + .msg = try std.fmt.allocPrint(gpa, "missing dynamic library dependency: '{s}'", .{dep_id.id.name}), + .notes = notes, + }); + continue; + }; + + const file = try std.fs.cwd().openFile(full_path, .{}); + defer file.close(); + + log.debug("parsing dependency {s} at fully resolved path {s}", .{ dep_id.id.name, full_path }); + + var parse_ctx = ParseErrorCtx.init(gpa); + defer parse_ctx.deinit(); + + self.parseLibrary(file, full_path, .{ + .path = null, + .needed = false, + .weak = weak, + }, false, true, dependent_libs, &parse_ctx) catch |err| + try self.handleAndReportParseError(full_path, err, &parse_ctx); + + // TODO I think that it would be nice to rewrite this error to include metadata for failed dependency + // in addition to parsing error } } @@ -4854,11 +4862,23 @@ pub fn getSectionPrecedence(header: macho.section_64) u8 { } pub const ParseErrorCtx = struct { - arena: Allocator, + arena_allocator: std.heap.ArenaAllocator, detected_targets: std.ArrayList([]const u8), - pub fn init(arena: Allocator) ParseErrorCtx { - return .{ .arena = arena, .detected_targets = std.ArrayList([]const u8).init(arena) }; + pub fn init(gpa: Allocator) ParseErrorCtx { + return .{ + .arena_allocator = std.heap.ArenaAllocator.init(gpa), + .detected_targets = std.ArrayList([]const u8).init(gpa), + }; + } + + pub fn deinit(ctx: *ParseErrorCtx) void { + ctx.arena_allocator.deinit(); + ctx.detected_targets.deinit(); + } + + pub fn arena(ctx: *ParseErrorCtx) Allocator { + return ctx.arena_allocator.allocator(); } }; @@ -4890,7 +4910,7 @@ pub fn handleAndReportParseError( ), error.InvalidTargetFatLibrary => try self.reportParseError( path, - "invalid architecture in univeral library: expected '{s}', but found '{s}'", + "invalid architecture in universal library: expected '{s}', but found '{s}'", .{ @tagName(cpu_arch), targets_string.items }, ), else => unreachable, diff --git a/src/link/MachO/Dylib.zig b/src/link/MachO/Dylib.zig index 581f804f13..91411dc572 100644 --- a/src/link/MachO/Dylib.zig +++ b/src/link/MachO/Dylib.zig @@ -1,3 +1,4 @@ +path: []const u8, id: ?Id = null, weak: bool = false, /// Header is only set if Dylib is parsed directly from a binary and not a stub file. @@ -106,6 +107,7 @@ pub fn isDylib(file: std.fs.File, fat_offset: u64) bool { } pub fn deinit(self: *Dylib, allocator: Allocator) void { + allocator.free(self.path); for (self.symbols.keys()) |key| { allocator.free(key); } diff --git a/src/link/MachO/zld.zig b/src/link/MachO/zld.zig index 7b53301eff..e8241ca67e 100644 --- a/src/link/MachO/zld.zig +++ b/src/link/MachO/zld.zig @@ -345,12 +345,13 @@ pub fn linkWithZld( parent: u16, }, .Dynamic).init(arena); - var parse_ctx = MachO.ParseErrorCtx.init(arena); - for (positionals.items) |obj| { const in_file = try std.fs.cwd().openFile(obj.path, .{}); defer in_file.close(); - defer parse_ctx.detected_targets.clearRetainingCapacity(); + + var parse_ctx = MachO.ParseErrorCtx.init(gpa); + defer parse_ctx.deinit(); + macho_file.parsePositional( in_file, obj.path, @@ -363,21 +364,22 @@ pub fn linkWithZld( for (libs.keys(), libs.values()) |path, lib| { const in_file = try std.fs.cwd().openFile(path, .{}); defer in_file.close(); - defer parse_ctx.detected_targets.clearRetainingCapacity(); + + var parse_ctx = MachO.ParseErrorCtx.init(gpa); + defer parse_ctx.deinit(); + macho_file.parseLibrary( in_file, path, lib, false, + false, &dependent_libs, &parse_ctx, ) catch |err| try macho_file.handleAndReportParseError(path, err, &parse_ctx); } - macho_file.parseDependentLibs(&dependent_libs, &parse_ctx) catch |err| { - // TODO convert to error - log.err("parsing dependent libraries failed with err {s}", .{@errorName(err)}); - }; + try macho_file.parseDependentLibs(&dependent_libs); var actions = std.ArrayList(MachO.ResolveAction).init(gpa); defer actions.deinit(); From 22c81740ef611fe6e3b7ac2390fa9cf058f0ac6b Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Wed, 30 Aug 2023 12:37:05 +0200 Subject: [PATCH 51/57] macho: convert error.TODOImplementWritingStaticLibFiles into an actual error --- src/link.zig | 1 - src/link/MachO.zig | 11 +++++++---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/link.zig b/src/link.zig index bf27efcc3a..65204762c5 100644 --- a/src/link.zig +++ b/src/link.zig @@ -754,7 +754,6 @@ pub const File = struct { SymbolMismatchingType, TODOImplementPlan9Objs, TODOImplementWritingLibFiles, - TODOImplementWritingStaticLibFiles, UnableToSpawnSelf, UnableToSpawnWasm, UnableToWriteArchive, diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 937df96009..48f4384204 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -267,8 +267,11 @@ pub fn flush(self: *MachO, comp: *Compilation, prog_node: *std.Progress.Node) li if (build_options.have_llvm) { return self.base.linkAsArchive(comp, prog_node); } else { - log.err("TODO: non-LLVM archiver for MachO object files", .{}); - return error.TODOImplementWritingStaticLibFiles; + try self.misc_errors.ensureUnusedCapacity(self.base.allocator, 1); + self.misc_errors.appendAssumeCapacity(.{ + .msg = try self.base.allocator.dupe(u8, "TODO: non-LLVM archiver for MachO object files"), + }); + return error.FlushFailure; } } @@ -2233,7 +2236,7 @@ pub fn lowerUnnamedConst(self: *MachO, typed_value: TypedValue, decl_index: Modu .fail => |em| { decl.analysis = .codegen_failure; try mod.failed_decls.put(mod.gpa, decl_index, em); - log.err("{s}", .{em.msg}); + log.debug("{s}", .{em.msg}); return error.CodegenFail; }, }; @@ -2388,7 +2391,7 @@ fn updateLazySymbolAtom( const code = switch (res) { .ok => code_buffer.items, .fail => |em| { - log.err("{s}", .{em.msg}); + log.debug("{s}", .{em.msg}); return error.CodegenFail; }, }; From 5806e761bb676cdd537308f6c4a197e42228416d Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Wed, 30 Aug 2023 19:02:25 +0200 Subject: [PATCH 52/57] macho: improve error reporting for re-exports mismatch --- src/link.zig | 2 - src/link/MachO.zig | 99 +++++++++++++++++++++----------- src/link/MachO/load_commands.zig | 4 +- src/link/MachO/zld.zig | 6 +- 4 files changed, 71 insertions(+), 40 deletions(-) diff --git a/src/link.zig b/src/link.zig index 65204762c5..81fa0e2893 100644 --- a/src/link.zig +++ b/src/link.zig @@ -734,8 +734,6 @@ pub const File = struct { MissingEndForBody, MissingEndForExpression, /// TODO: this should be removed from the error set in favor of using ErrorFlags - MissingMainEntrypoint, - /// TODO: this should be removed from the error set in favor of using ErrorFlags MissingSection, MissingSymbol, MissingTableSymbols, diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 48f4384204..3e8cb7dfc6 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -399,10 +399,7 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No self.dylibs_map.clearRetainingCapacity(); self.referenced_dylibs.clearRetainingCapacity(); - var dependent_libs = std.fifo.LinearFifo(struct { - id: Dylib.Id, - parent: u16, - }, .Dynamic).init(arena); + var dependent_libs = std.fifo.LinearFifo(DylibReExportInfo, .Dynamic).init(arena); for (libs.keys(), libs.values()) |path, lib| { const in_file = try std.fs.cwd().openFile(path, .{}); @@ -417,6 +414,7 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No lib, false, false, + null, &dependent_libs, &parse_ctx, ) catch |err| try self.handleAndReportParseError(path, err, &parse_ctx); @@ -749,7 +747,7 @@ pub fn parsePositional( .path = null, .needed = false, .weak = false, - }, must_link, false, dependent_libs, ctx); + }, must_link, false, null, dependent_libs, ctx); } } @@ -806,6 +804,7 @@ pub fn parseLibrary( lib: link.SystemLib, must_link: bool, is_dependent: bool, + reexport_info: ?DylibReExportInfo, dependent_libs: anytype, ctx: *ParseErrorCtx, ) ParseError!void { @@ -823,6 +822,7 @@ pub fn parseLibrary( .needed = lib.needed, .weak = lib.weak, .dependent = is_dependent, + .reexport_info = reexport_info, }, ctx); } else return error.UnknownFileType; } else if (Archive.isArchive(file, 0)) { @@ -832,12 +832,14 @@ pub fn parseLibrary( .needed = lib.needed, .weak = lib.weak, .dependent = is_dependent, + .reexport_info = reexport_info, }, ctx); } else { self.parseLibStub(file, path, dependent_libs, .{ .needed = lib.needed, .weak = lib.weak, .dependent = is_dependent, + .reexport_info = reexport_info, }, ctx) catch |err| switch (err) { error.NotLibStub, error.UnexpectedToken => return error.UnknownFileType, else => |e| return e, @@ -935,8 +937,13 @@ fn parseArchive( } } +pub const DylibReExportInfo = struct { + id: Dylib.Id, + parent: u16, +}; + const DylibOpts = struct { - id: ?Dylib.Id = null, + reexport_info: ?DylibReExportInfo = null, dependent: bool = false, needed: bool = false, weak: bool = false, @@ -986,10 +993,7 @@ fn parseDylib( return error.InvalidTarget; } - try self.addDylib(dylib, .{ - .needed = dylib_options.needed, - .weak = dylib_options.weak, - }); + try self.addDylib(dylib, dylib_options, ctx); } fn parseLibStub( @@ -1038,20 +1042,17 @@ fn parseLibStub( path, ); - try self.addDylib(dylib, .{ - .needed = dylib_options.needed, - .weak = dylib_options.weak, - }); + try self.addDylib(dylib, dylib_options, ctx); } -fn addDylib(self: *MachO, dylib: Dylib, dylib_options: DylibOpts) ParseError!void { - if (dylib_options.id) |id| { - if (dylib.id.?.current_version < id.compatibility_version) { - // TODO convert into an error - log.warn("found dylib is incompatible with the required minimum version", .{}); - log.warn(" dylib: {s}", .{id.name}); - log.warn(" required minimum version: {}", .{id.compatibility_version}); - log.warn(" dylib version: {}", .{dylib.id.?.current_version}); +fn addDylib(self: *MachO, dylib: Dylib, dylib_options: DylibOpts, ctx: *ParseErrorCtx) ParseError!void { + if (dylib_options.reexport_info) |reexport_info| { + if (dylib.id.?.current_version < reexport_info.id.compatibility_version) { + ctx.detected_dylib_id = .{ + .parent = reexport_info.parent, + .required_version = reexport_info.id.compatibility_version, + .found_version = dylib.id.?.current_version, + }; return error.IncompatibleDylibVersion; } } @@ -1119,14 +1120,9 @@ pub fn parseDependentLibs(self: *MachO, dependent_libs: anytype) !void { }; const full_path = maybe_full_path orelse { - try self.misc_errors.ensureUnusedCapacity(gpa, 1); - var notes = try gpa.alloc(File.ErrorMsg, 1); - errdefer gpa.free(notes); const parent_name = if (parent.id) |id| id.name else parent.path; - notes[0] = .{ .msg = try std.fmt.allocPrint(gpa, "a dependency of {s}", .{parent_name}) }; - self.misc_errors.appendAssumeCapacity(.{ - .msg = try std.fmt.allocPrint(gpa, "missing dynamic library dependency: '{s}'", .{dep_id.id.name}), - .notes = notes, + try self.reportDependencyError(parent_name, null, "missing dynamic library dependency: '{s}'", .{ + dep_id.id.name, }); continue; }; @@ -1143,7 +1139,7 @@ pub fn parseDependentLibs(self: *MachO, dependent_libs: anytype) !void { .path = null, .needed = false, .weak = weak, - }, false, true, dependent_libs, &parse_ctx) catch |err| + }, false, true, dep_id, dependent_libs, &parse_ctx) catch |err| try self.handleAndReportParseError(full_path, err, &parse_ctx); // TODO I think that it would be nice to rewrite this error to include metadata for failed dependency @@ -4498,7 +4494,7 @@ pub fn writeHeader(self: *MachO, ncmds: u32, sizeofcmds: u32) !void { header.cputype = macho.CPU_TYPE_X86_64; header.cpusubtype = macho.CPU_SUBTYPE_X86_64_ALL; }, - else => return error.UnsupportedCpuArchitecture, + else => unreachable, } switch (self.base.options.output_mode) { @@ -4866,11 +4862,17 @@ pub fn getSectionPrecedence(header: macho.section_64) u8 { pub const ParseErrorCtx = struct { arena_allocator: std.heap.ArenaAllocator, + detected_dylib_id: struct { + parent: u16, + required_version: u32, + found_version: u32, + }, detected_targets: std.ArrayList([]const u8), pub fn init(gpa: Allocator) ParseErrorCtx { return .{ .arena_allocator = std.heap.ArenaAllocator.init(gpa), + .detected_dylib_id = undefined, .detected_targets = std.ArrayList([]const u8).init(gpa), }; } @@ -4894,6 +4896,18 @@ pub fn handleAndReportParseError( const cpu_arch = self.base.options.target.cpu.arch; switch (err) { error.DylibAlreadyExists => {}, + error.IncompatibleDylibVersion => { + const parent = &self.dylibs.items[ctx.detected_dylib_id.parent]; + try self.reportDependencyError( + if (parent.id) |id| id.name else parent.path, + path, + "incompatible dylib version: expected at least '{}', but found '{}'", + .{ + load_commands.appleVersionToSemanticVersion(ctx.detected_dylib_id.required_version), + load_commands.appleVersionToSemanticVersion(ctx.detected_dylib_id.found_version), + }, + ); + }, error.UnknownFileType => try self.reportParseError(path, "unknown file type", .{}), error.InvalidTarget, error.InvalidTargetFatLibrary => { var targets_string = std.ArrayList(u8).init(self.base.allocator); @@ -4923,7 +4937,28 @@ pub fn handleAndReportParseError( } } -pub fn reportParseError(self: *MachO, path: []const u8, comptime format: []const u8, args: anytype) !void { +fn reportDependencyError( + self: *MachO, + parent: []const u8, + path: ?[]const u8, + comptime format: []const u8, + args: anytype, +) !void { + const gpa = self.base.allocator; + try self.misc_errors.ensureUnusedCapacity(gpa, 1); + var notes = try std.ArrayList(File.ErrorMsg).initCapacity(gpa, 2); + defer notes.deinit(); + if (path) |p| { + notes.appendAssumeCapacity(.{ .msg = try std.fmt.allocPrint(gpa, "while parsing {s}", .{p}) }); + } + notes.appendAssumeCapacity(.{ .msg = try std.fmt.allocPrint(gpa, "a dependency of {s}", .{parent}) }); + self.misc_errors.appendAssumeCapacity(.{ + .msg = try std.fmt.allocPrint(gpa, format, args), + .notes = try notes.toOwnedSlice(), + }); +} + +fn reportParseError(self: *MachO, path: []const u8, comptime format: []const u8, args: anytype) !void { const gpa = self.base.allocator; try self.misc_errors.ensureUnusedCapacity(gpa, 1); var notes = try gpa.alloc(File.ErrorMsg, 1); diff --git a/src/link/MachO/load_commands.zig b/src/link/MachO/load_commands.zig index afad9d7884..fa33b88e36 100644 --- a/src/link/MachO/load_commands.zig +++ b/src/link/MachO/load_commands.zig @@ -440,14 +440,14 @@ const supported_platforms = [_]SupportedPlatforms{ }; // zig fmt: on -pub inline fn semanticVersionToAppleVersion(version: std.SemanticVersion) u32 { +inline fn semanticVersionToAppleVersion(version: std.SemanticVersion) u32 { const major = version.major; const minor = version.minor; const patch = version.patch; return (@as(u32, @intCast(major)) << 16) | (@as(u32, @intCast(minor)) << 8) | @as(u32, @intCast(patch)); } -inline fn appleVersionToSemanticVersion(version: u32) std.SemanticVersion { +pub inline fn appleVersionToSemanticVersion(version: u32) std.SemanticVersion { return .{ .major = @as(u16, @truncate(version >> 16)), .minor = @as(u8, @truncate(version >> 8)), diff --git a/src/link/MachO/zld.zig b/src/link/MachO/zld.zig index e8241ca67e..916b6b9478 100644 --- a/src/link/MachO/zld.zig +++ b/src/link/MachO/zld.zig @@ -340,10 +340,7 @@ pub fn linkWithZld( Compilation.dump_argv(argv.items); } - var dependent_libs = std.fifo.LinearFifo(struct { - id: Dylib.Id, - parent: u16, - }, .Dynamic).init(arena); + var dependent_libs = std.fifo.LinearFifo(MachO.DylibReExportInfo, .Dynamic).init(arena); for (positionals.items) |obj| { const in_file = try std.fs.cwd().openFile(obj.path, .{}); @@ -374,6 +371,7 @@ pub fn linkWithZld( lib, false, false, + null, &dependent_libs, &parse_ctx, ) catch |err| try macho_file.handleAndReportParseError(path, err, &parse_ctx); From 5144132320ae86320e9a6bf335bfbdccf52e2621 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Wed, 30 Aug 2023 20:16:37 +0200 Subject: [PATCH 53/57] macho: report formatted error for unhandled symbol types --- src/link.zig | 10 ------- src/link/MachO.zig | 66 ++++++++++++++++++++++++++++++---------------- 2 files changed, 44 insertions(+), 32 deletions(-) diff --git a/src/link.zig b/src/link.zig index 81fa0e2893..4bb30f7a7e 100644 --- a/src/link.zig +++ b/src/link.zig @@ -698,14 +698,12 @@ pub const File = struct { CurrentWorkingDirectoryUnlinked, DivisionByZero, DllImportLibraryNotFound, - EmptyStubFile, ExpectedFuncType, FailedToEmit, FailedToResolveRelocationTarget, FileSystem, FilesOpenedWithWrongFlags, FlushFailure, - FrameworkNotFound, FunctionSignatureMismatch, GlobalTypeMismatch, HotSwapUnavailableOnHostOperatingSystem, @@ -722,14 +720,12 @@ pub const File = struct { LLD_LinkingIsTODO_ForSpirV, LibCInstallationMissingCRTDir, LibCInstallationNotAvailable, - LibraryNotFound, LinkingWithoutZigSourceUnimplemented, MalformedArchive, MalformedDwarf, MalformedSection, MemoryTooBig, MemoryTooSmall, - MismatchedCpuArchitecture, MissAlignment, MissingEndForBody, MissingEndForExpression, @@ -738,9 +734,7 @@ pub const File = struct { MissingSymbol, MissingTableSymbols, ModuleNameMismatch, - MultipleSymbolDefinitions, NoObjectsToLink, - NotObject, NotObjectFile, NotSupported, OutOfMemory, @@ -756,16 +750,12 @@ pub const File = struct { UnableToSpawnWasm, UnableToWriteArchive, UndefinedLocal, - /// TODO: merge with UndefinedSymbolReference UndefinedSymbol, - /// TODO: merge with UndefinedSymbol - UndefinedSymbolReference, Underflow, UnexpectedRemainder, UnexpectedTable, UnexpectedValue, UnhandledDwFormValue, - UnhandledSymbolType, UnknownFeature, Unseekable, UnsupportedCpuArchitecture, diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 3e8cb7dfc6..d74c905487 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -1709,26 +1709,14 @@ fn resolveSymbolsInObject(self: *MachO, object_id: u32) !void { while (sym_index < in_symtab.len) : (sym_index += 1) { const sym = &object.symtab[sym_index]; const sym_name = object.getSymbolName(sym_index); + const sym_with_loc = SymbolWithLoc{ + .sym_index = sym_index, + .file = object_id + 1, + }; - if (sym.stab()) { - log.err("unhandled symbol type: stab", .{}); - log.err(" symbol '{s}'", .{sym_name}); - log.err(" first definition in '{s}'", .{object.name}); - return error.UnhandledSymbolType; - } - - if (sym.indr()) { - log.err("unhandled symbol type: indirect", .{}); - log.err(" symbol '{s}'", .{sym_name}); - log.err(" first definition in '{s}'", .{object.name}); - return error.UnhandledSymbolType; - } - - if (sym.abs()) { - log.err("unhandled symbol type: absolute", .{}); - log.err(" symbol '{s}'", .{sym_name}); - log.err(" first definition in '{s}'", .{object.name}); - return error.UnhandledSymbolType; + if (sym.stab() or sym.indr() or sym.abs()) { + try self.reportUnhandledSymbolType(sym_with_loc); + continue; } if (sym.sect() and !sym.ext()) { @@ -4892,7 +4880,7 @@ pub fn handleAndReportParseError( path: []const u8, err: ParseError, ctx: *const ParseErrorCtx, -) !void { +) error{OutOfMemory}!void { const cpu_arch = self.base.options.target.cpu.arch; switch (err) { error.DylibAlreadyExists => {}, @@ -4943,7 +4931,7 @@ fn reportDependencyError( path: ?[]const u8, comptime format: []const u8, args: anytype, -) !void { +) error{OutOfMemory}!void { const gpa = self.base.allocator; try self.misc_errors.ensureUnusedCapacity(gpa, 1); var notes = try std.ArrayList(File.ErrorMsg).initCapacity(gpa, 2); @@ -4958,7 +4946,12 @@ fn reportDependencyError( }); } -fn reportParseError(self: *MachO, path: []const u8, comptime format: []const u8, args: anytype) !void { +fn reportParseError( + self: *MachO, + path: []const u8, + comptime format: []const u8, + args: anytype, +) error{OutOfMemory}!void { const gpa = self.base.allocator; try self.misc_errors.ensureUnusedCapacity(gpa, 1); var notes = try gpa.alloc(File.ErrorMsg, 1); @@ -5030,6 +5023,35 @@ fn reportSymbolCollision( self.misc_errors.appendAssumeCapacity(err_msg); } +fn reportUnhandledSymbolType(self: *MachO, sym_with_loc: SymbolWithLoc) error{OutOfMemory}!void { + const gpa = self.base.allocator; + try self.misc_errors.ensureUnusedCapacity(gpa, 1); + + const notes = try gpa.alloc(File.ErrorMsg, 1); + errdefer gpa.free(notes); + + const file = sym_with_loc.getFile().?; + notes[0] = .{ .msg = try std.fmt.allocPrint(gpa, "defined in {s}", .{self.objects.items[file].name}) }; + + const sym = self.getSymbol(sym_with_loc); + const sym_type = if (sym.stab()) + "stab" + else if (sym.indr()) + "indirect" + else if (sym.abs()) + "absolute" + else + unreachable; + + self.misc_errors.appendAssumeCapacity(.{ + .msg = try std.fmt.allocPrint(gpa, "unhandled symbol type: '{s}' has type {s}", .{ + self.getSymbolName(sym_with_loc), + sym_type, + }), + .notes = notes, + }); +} + /// Binary search pub fn bsearch(comptime T: type, haystack: []align(1) const T, predicate: anytype) usize { if (!@hasDecl(@TypeOf(predicate), "predicate")) From ba710ec09dd3df4cd0ee8de9a5299aeaed53a847 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Wed, 30 Aug 2023 20:34:16 +0200 Subject: [PATCH 54/57] macho: remove obsolete error.FailedToResolveRelocationTarget --- src/link.zig | 1 - src/link/MachO/Archive.zig | 4 ++-- src/link/MachO/Atom.zig | 14 +++++--------- src/link/MachO/eh_frame.zig | 2 +- src/link/MachO/thunks.zig | 2 +- 5 files changed, 9 insertions(+), 14 deletions(-) diff --git a/src/link.zig b/src/link.zig index 4bb30f7a7e..90244a44c2 100644 --- a/src/link.zig +++ b/src/link.zig @@ -700,7 +700,6 @@ pub const File = struct { DllImportLibraryNotFound, ExpectedFuncType, FailedToEmit, - FailedToResolveRelocationTarget, FileSystem, FilesOpenedWithWrongFlags, FlushFailure, diff --git a/src/link/MachO/Archive.zig b/src/link/MachO/Archive.zig index 20a191281e..6d1c769f04 100644 --- a/src/link/MachO/Archive.zig +++ b/src/link/MachO/Archive.zig @@ -128,7 +128,7 @@ fn parseTableOfContents(self: *Archive, allocator: Allocator, reader: anytype) ! defer allocator.free(symtab); reader.readNoEof(symtab) catch { - log.err("incomplete symbol table: expected symbol table of length 0x{x}", .{symtab_size}); + log.debug("incomplete symbol table: expected symbol table of length 0x{x}", .{symtab_size}); return error.MalformedArchive; }; @@ -137,7 +137,7 @@ fn parseTableOfContents(self: *Archive, allocator: Allocator, reader: anytype) ! defer allocator.free(strtab); reader.readNoEof(strtab) catch { - log.err("incomplete symbol table: expected string table of length 0x{x}", .{strtab_size}); + log.debug("incomplete symbol table: expected string table of length 0x{x}", .{strtab_size}); return error.MalformedArchive; }; diff --git a/src/link/MachO/Atom.zig b/src/link/MachO/Atom.zig index bde6b09583..16d318ba2c 100644 --- a/src/link/MachO/Atom.zig +++ b/src/link/MachO/Atom.zig @@ -615,7 +615,7 @@ pub fn resolveRelocs( }; } -pub fn getRelocTargetAddress(macho_file: *MachO, target: SymbolWithLoc, is_tlv: bool) !u64 { +pub fn getRelocTargetAddress(macho_file: *MachO, target: SymbolWithLoc, is_tlv: bool) u64 { const target_atom_index = getRelocTargetAtomIndex(macho_file, target) orelse { // If there is no atom for target, we still need to check for special, atom-less // symbols such as `___dso_handle`. @@ -648,17 +648,13 @@ pub fn getRelocTargetAddress(macho_file: *MachO, target: SymbolWithLoc, is_tlv: // defined TLV template init section in the following order: // * wrt to __thread_data if defined, then // * wrt to __thread_bss + // TODO remember to check what the mechanism was prior to HAS_TLV_INITIALIZERS in earlier versions of macOS const sect_id: u16 = sect_id: { if (macho_file.thread_data_section_index) |i| { break :sect_id i; } else if (macho_file.thread_bss_section_index) |i| { break :sect_id i; - } else { - log.err("threadlocal variables present but no initializer sections found", .{}); - log.err(" __thread_data not found", .{}); - log.err(" __thread_bss not found", .{}); - return error.FailedToResolveRelocationTarget; - } + } else break :base_address 0; }; break :base_address macho_file.sections.items(.header)[sect_id].addr; } else 0; @@ -744,7 +740,7 @@ fn resolveRelocsArm64( const header = macho_file.sections.items(.header)[source_sym.n_sect - 1]; break :is_tlv header.type() == macho.S_THREAD_LOCAL_VARIABLES; }; - break :blk try getRelocTargetAddress(macho_file, target, is_tlv); + break :blk getRelocTargetAddress(macho_file, target, is_tlv); }; log.debug(" | source_addr = 0x{x}", .{source_addr}); @@ -1040,7 +1036,7 @@ fn resolveRelocsX86( const header = macho_file.sections.items(.header)[source_sym.n_sect - 1]; break :is_tlv header.type() == macho.S_THREAD_LOCAL_VARIABLES; }; - break :blk try getRelocTargetAddress(macho_file, target, is_tlv); + break :blk getRelocTargetAddress(macho_file, target, is_tlv); }; log.debug(" | source_addr = 0x{x}", .{source_addr}); diff --git a/src/link/MachO/eh_frame.zig b/src/link/MachO/eh_frame.zig index 332aea08e5..96b8f5c5a6 100644 --- a/src/link/MachO/eh_frame.zig +++ b/src/link/MachO/eh_frame.zig @@ -347,7 +347,7 @@ pub fn EhFrameRecord(comptime is_mutable: bool) type { }, .ARM64_RELOC_UNSIGNED => { assert(rel.r_extern == 1); - const target_addr = try Atom.getRelocTargetAddress(macho_file, target, false); + const target_addr = Atom.getRelocTargetAddress(macho_file, target, false); const result = @as(i64, @intCast(target_addr)) - @as(i64, @intCast(source_addr)); mem.writeIntLittle(i64, rec.data[rel_offset..][0..8], @as(i64, @intCast(result))); }, diff --git a/src/link/MachO/thunks.zig b/src/link/MachO/thunks.zig index 726fbdf2a6..2ee47478f4 100644 --- a/src/link/MachO/thunks.zig +++ b/src/link/MachO/thunks.zig @@ -317,7 +317,7 @@ fn isReachable( const target_addr = if (Atom.relocRequiresGot(macho_file, rel)) macho_file.getGotEntryAddress(target).? else - Atom.getRelocTargetAddress(macho_file, target, false) catch unreachable; + Atom.getRelocTargetAddress(macho_file, target, false); _ = Relocation.calcPcRelativeDisplacementArm64(source_addr, target_addr) catch return false; From ebe371b75769dcc5526cdb7650c875764fb536e4 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Wed, 30 Aug 2023 21:48:24 +0200 Subject: [PATCH 55/57] macho: report basic __eh_frame problems as errors --- src/link.zig | 3 --- src/link/MachO.zig | 2 +- src/link/MachO/Object.zig | 15 +++++++++------ src/link/MachO/UnwindInfo.zig | 12 ++++++------ src/link/MachO/dead_strip.zig | 22 +++++++++++----------- src/link/MachO/eh_frame.zig | 28 ++++++++++++++-------------- src/link/MachO/zld.zig | 16 ++++++++++++++-- 7 files changed, 55 insertions(+), 43 deletions(-) diff --git a/src/link.zig b/src/link.zig index 90244a44c2..b57ab6fa7b 100644 --- a/src/link.zig +++ b/src/link.zig @@ -693,7 +693,6 @@ pub const File = struct { /// TODO audit this error set. most of these should be collapsed into one error, /// and ErrorFlags should be updated to convey the meaning to the user. pub const FlushError = error{ - BadDwarfCfi, CacheUnavailable, CurrentWorkingDirectoryUnlinked, DivisionByZero, @@ -728,8 +727,6 @@ pub const File = struct { MissAlignment, MissingEndForBody, MissingEndForExpression, - /// TODO: this should be removed from the error set in favor of using ErrorFlags - MissingSection, MissingSymbol, MissingTableSymbols, ModuleNameMismatch, diff --git a/src/link/MachO.zig b/src/link/MachO.zig index d74c905487..10848da7f7 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -4946,7 +4946,7 @@ fn reportDependencyError( }); } -fn reportParseError( +pub fn reportParseError( self: *MachO, path: []const u8, comptime format: []const u8, diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index 43c87cf092..ab12ede5d7 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -334,7 +334,14 @@ fn sectionLessThanByAddress(ctx: void, lhs: SortedSection, rhs: SortedSection) b return lhs.header.addr < rhs.header.addr; } -pub fn splitIntoAtoms(self: *Object, macho_file: *MachO, object_id: u32) !void { +pub const SplitIntoAtomsError = error{ + OutOfMemory, + EndOfStream, + MissingEhFrameSection, + BadDwarfCfi, +}; + +pub fn splitIntoAtoms(self: *Object, macho_file: *MachO, object_id: u32) SplitIntoAtomsError!void { log.debug("splitting object({d}, {s}) into atoms", .{ object_id, self.name }); try self.splitRegularSections(macho_file, object_id); @@ -788,11 +795,7 @@ fn parseUnwindInfo(self: *Object, macho_file: *MachO, object_id: u32) !void { if (UnwindInfo.UnwindEncoding.isDwarf(record.compactUnwindEncoding, cpu_arch)) break true; } else false; - if (needs_eh_frame and !self.hasEhFrameRecords()) { - log.err("missing __TEXT,__eh_frame section", .{}); - log.err(" in object {s}", .{self.name}); - return error.MissingSection; - } + if (needs_eh_frame and !self.hasEhFrameRecords()) return error.MissingEhFrameSection; try self.parseRelocs(gpa, sect_id); const relocs = self.getRelocs(sect_id); diff --git a/src/link/MachO/UnwindInfo.zig b/src/link/MachO/UnwindInfo.zig index e3612c6948..adb1051301 100644 --- a/src/link/MachO/UnwindInfo.zig +++ b/src/link/MachO/UnwindInfo.zig @@ -240,7 +240,7 @@ pub fn collect(info: *UnwindInfo, macho_file: *MachO) !void { var record = unwind_records[record_id]; if (UnwindEncoding.isDwarf(record.compactUnwindEncoding, cpu_arch)) { - try info.collectPersonalityFromDwarf(macho_file, @as(u32, @intCast(object_id)), symbol, &record); + info.collectPersonalityFromDwarf(macho_file, @as(u32, @intCast(object_id)), symbol, &record); } else { if (getPersonalityFunctionReloc( macho_file, @@ -288,7 +288,7 @@ pub fn collect(info: *UnwindInfo, macho_file: *MachO) !void { if (object.eh_frame_records_lookup.get(symbol)) |fde_offset| { if (object.eh_frame_relocs_lookup.get(fde_offset).?.dead) continue; var record = nullRecord(); - try info.collectPersonalityFromDwarf(macho_file, @as(u32, @intCast(object_id)), symbol, &record); + info.collectPersonalityFromDwarf(macho_file, @as(u32, @intCast(object_id)), symbol, &record); switch (cpu_arch) { .aarch64 => UnwindEncoding.setMode(&record.compactUnwindEncoding, macho.UNWIND_ARM64_MODE.DWARF), .x86_64 => UnwindEncoding.setMode(&record.compactUnwindEncoding, macho.UNWIND_X86_64_MODE.DWARF), @@ -500,16 +500,16 @@ fn collectPersonalityFromDwarf( object_id: u32, sym_loc: SymbolWithLoc, record: *macho.compact_unwind_entry, -) !void { +) void { const object = &macho_file.objects.items[object_id]; var it = object.getEhFrameRecordsIterator(); const fde_offset = object.eh_frame_records_lookup.get(sym_loc).?; it.seekTo(fde_offset); - const fde = (try it.next()).?; + const fde = (it.next() catch return).?; // We don't care about the error since we already handled it const cie_ptr = fde.getCiePointerSource(object_id, macho_file, fde_offset); const cie_offset = fde_offset + 4 - cie_ptr; it.seekTo(cie_offset); - const cie = (try it.next()).?; + const cie = (it.next() catch return).?; // We don't care about the error since we already handled it if (cie.getPersonalityPointerReloc( macho_file, @@ -528,7 +528,7 @@ fn collectPersonalityFromDwarf( } } -pub fn calcSectionSize(info: UnwindInfo, macho_file: *MachO) !void { +pub fn calcSectionSize(info: UnwindInfo, macho_file: *MachO) void { const sect_id = macho_file.unwind_info_section_index orelse return; const sect = &macho_file.sections.items(.header)[sect_id]; sect.@"align" = 2; diff --git a/src/link/MachO/dead_strip.zig b/src/link/MachO/dead_strip.zig index 26053cb83d..42cd437564 100644 --- a/src/link/MachO/dead_strip.zig +++ b/src/link/MachO/dead_strip.zig @@ -13,7 +13,7 @@ pub fn gcAtoms(macho_file: *MachO) !void { try alive.ensureTotalCapacity(@as(u32, @intCast(macho_file.atoms.items.len))); try collectRoots(macho_file, &roots); - try mark(macho_file, roots, &alive); + mark(macho_file, roots, &alive); prune(macho_file, alive); } @@ -227,7 +227,7 @@ fn refersLive(macho_file: *MachO, atom_index: Atom.Index, alive: AtomTable) bool return false; } -fn mark(macho_file: *MachO, roots: AtomTable, alive: *AtomTable) !void { +fn mark(macho_file: *MachO, roots: AtomTable, alive: *AtomTable) void { var it = roots.keyIterator(); while (it.next()) |root| { markLive(macho_file, root.*, alive); @@ -264,11 +264,11 @@ fn mark(macho_file: *MachO, roots: AtomTable, alive: *AtomTable) !void { for (macho_file.objects.items, 0..) |_, object_id| { // Traverse unwind and eh_frame records noting if the source symbol has been marked, and if so, // marking all references as live. - try markUnwindRecords(macho_file, @as(u32, @intCast(object_id)), alive); + markUnwindRecords(macho_file, @as(u32, @intCast(object_id)), alive); } } -fn markUnwindRecords(macho_file: *MachO, object_id: u32, alive: *AtomTable) !void { +fn markUnwindRecords(macho_file: *MachO, object_id: u32, alive: *AtomTable) void { const object = &macho_file.objects.items[object_id]; const cpu_arch = macho_file.base.options.target.cpu.arch; @@ -280,7 +280,7 @@ fn markUnwindRecords(macho_file: *MachO, object_id: u32, alive: *AtomTable) !voi if (!object.hasUnwindRecords()) { if (alive.contains(atom_index)) { // Mark references live and continue. - try markEhFrameRecords(macho_file, object_id, atom_index, alive); + markEhFrameRecords(macho_file, object_id, atom_index, alive); } else { while (inner_syms_it.next()) |sym| { if (object.eh_frame_records_lookup.get(sym)) |fde_offset| { @@ -306,7 +306,7 @@ fn markUnwindRecords(macho_file: *MachO, object_id: u32, alive: *AtomTable) !voi const record = unwind_records[record_id]; if (UnwindInfo.UnwindEncoding.isDwarf(record.compactUnwindEncoding, cpu_arch)) { - try markEhFrameRecords(macho_file, object_id, atom_index, alive); + markEhFrameRecords(macho_file, object_id, atom_index, alive); } else { if (UnwindInfo.getPersonalityFunctionReloc(macho_file, object_id, record_id)) |rel| { const target = Atom.parseRelocTarget(macho_file, .{ @@ -339,7 +339,7 @@ fn markUnwindRecords(macho_file: *MachO, object_id: u32, alive: *AtomTable) !voi } } -fn markEhFrameRecords(macho_file: *MachO, object_id: u32, atom_index: Atom.Index, alive: *AtomTable) !void { +fn markEhFrameRecords(macho_file: *MachO, object_id: u32, atom_index: Atom.Index, alive: *AtomTable) void { const cpu_arch = macho_file.base.options.target.cpu.arch; const object = &macho_file.objects.items[object_id]; var it = object.getEhFrameRecordsIterator(); @@ -348,12 +348,12 @@ fn markEhFrameRecords(macho_file: *MachO, object_id: u32, atom_index: Atom.Index while (inner_syms_it.next()) |sym| { const fde_offset = object.eh_frame_records_lookup.get(sym) orelse continue; // Continue in case we hit a temp symbol alias it.seekTo(fde_offset); - const fde = (try it.next()).?; + const fde = (it.next() catch continue).?; // We don't care about the error at this point since it was already handled const cie_ptr = fde.getCiePointerSource(object_id, macho_file, fde_offset); const cie_offset = fde_offset + 4 - cie_ptr; it.seekTo(cie_offset); - const cie = (try it.next()).?; + const cie = (it.next() catch continue).?; // We don't care about the error at this point since it was already handled switch (cpu_arch) { .aarch64 => { @@ -377,10 +377,10 @@ fn markEhFrameRecords(macho_file: *MachO, object_id: u32, atom_index: Atom.Index }, .x86_64 => { const sect = object.getSourceSection(object.eh_frame_sect_id.?); - const lsda_ptr = try fde.getLsdaPointer(cie, .{ + const lsda_ptr = fde.getLsdaPointer(cie, .{ .base_addr = sect.addr, .base_offset = fde_offset, - }); + }) catch continue; // We don't care about the error at this point since it was already handled if (lsda_ptr) |lsda_address| { // Mark LSDA record as live const sym_index = object.getSymbolByAddress(lsda_address, null); diff --git a/src/link/MachO/eh_frame.zig b/src/link/MachO/eh_frame.zig index 96b8f5c5a6..31b3bb3311 100644 --- a/src/link/MachO/eh_frame.zig +++ b/src/link/MachO/eh_frame.zig @@ -13,7 +13,7 @@ pub fn scanRelocs(macho_file: *MachO) !void { const fde_offset = object.eh_frame_records_lookup.get(sym) orelse continue; if (object.eh_frame_relocs_lookup.get(fde_offset).?.dead) continue; it.seekTo(fde_offset); - const fde = (try it.next()).?; + const fde = (it.next() catch continue).?; // We don't care about this error since we already handled it const cie_ptr = fde.getCiePointerSource(@intCast(object_id), macho_file, fde_offset); const cie_offset = fde_offset + 4 - cie_ptr; @@ -21,7 +21,7 @@ pub fn scanRelocs(macho_file: *MachO) !void { if (!cies.contains(cie_offset)) { try cies.putNoClobber(cie_offset, {}); it.seekTo(cie_offset); - const cie = (try it.next()).?; + const cie = (it.next() catch continue).?; // We don't care about this error since we already handled it try cie.scanRelocs(macho_file, @as(u32, @intCast(object_id)), cie_offset); } } @@ -29,7 +29,7 @@ pub fn scanRelocs(macho_file: *MachO) !void { } } -pub fn calcSectionSize(macho_file: *MachO, unwind_info: *const UnwindInfo) !void { +pub fn calcSectionSize(macho_file: *MachO, unwind_info: *const UnwindInfo) error{OutOfMemory}!void { const sect_id = macho_file.eh_frame_section_index orelse return; const sect = &macho_file.sections.items(.header)[sect_id]; sect.@"align" = 3; @@ -59,7 +59,7 @@ pub fn calcSectionSize(macho_file: *MachO, unwind_info: *const UnwindInfo) !void if (!is_dwarf) continue; eh_it.seekTo(fde_record_offset); - const source_fde_record = (try eh_it.next()).?; + const source_fde_record = (eh_it.next() catch continue).?; // We already handled this error const cie_ptr = source_fde_record.getCiePointerSource(@intCast(object_id), macho_file, fde_record_offset); const cie_offset = fde_record_offset + 4 - cie_ptr; @@ -67,7 +67,7 @@ pub fn calcSectionSize(macho_file: *MachO, unwind_info: *const UnwindInfo) !void const gop = try cies.getOrPut(cie_offset); if (!gop.found_existing) { eh_it.seekTo(cie_offset); - const source_cie_record = (try eh_it.next()).?; + const source_cie_record = (eh_it.next() catch continue).?; // We already handled this error gop.value_ptr.* = size; size += source_cie_record.getSize(); } @@ -121,7 +121,7 @@ pub fn write(macho_file: *MachO, unwind_info: *UnwindInfo) !void { if (!is_dwarf) continue; eh_it.seekTo(fde_record_offset); - const source_fde_record = (try eh_it.next()).?; + const source_fde_record = (eh_it.next() catch continue).?; // We already handled this error const cie_ptr = source_fde_record.getCiePointerSource(@intCast(object_id), macho_file, fde_record_offset); const cie_offset = fde_record_offset + 4 - cie_ptr; @@ -129,7 +129,7 @@ pub fn write(macho_file: *MachO, unwind_info: *UnwindInfo) !void { const gop = try cies.getOrPut(cie_offset); if (!gop.found_existing) { eh_it.seekTo(cie_offset); - const source_cie_record = (try eh_it.next()).?; + const source_cie_record = (eh_it.next() catch continue).?; // We already handled this error var cie_record = try source_cie_record.toOwned(gpa); try cie_record.relocate(macho_file, @as(u32, @intCast(object_id)), .{ .source_offset = cie_offset, @@ -164,17 +164,17 @@ pub fn write(macho_file: *MachO, unwind_info: *UnwindInfo) !void { eh_frame_offset + 4 - fde_record.getCiePointer(), ).?; const eh_frame_sect = object.getSourceSection(object.eh_frame_sect_id.?); - const source_lsda_ptr = try fde_record.getLsdaPointer(cie_record, .{ + const source_lsda_ptr = fde_record.getLsdaPointer(cie_record, .{ .base_addr = eh_frame_sect.addr, .base_offset = fde_record_offset, - }); + }) catch continue; // We already handled this error if (source_lsda_ptr) |ptr| { const sym_index = object.getSymbolByAddress(ptr, null); const sym = object.symtab[sym_index]; - try fde_record.setLsdaPointer(cie_record, sym.n_value, .{ + fde_record.setLsdaPointer(cie_record, sym.n_value, .{ .base_addr = sect.addr, .base_offset = eh_frame_offset, - }); + }) catch continue; // We already handled this error } }, else => unreachable, @@ -191,10 +191,10 @@ pub fn write(macho_file: *MachO, unwind_info: *UnwindInfo) !void { const cie_record = eh_records.get( eh_frame_offset + 4 - fde_record.getCiePointer(), ).?; - const lsda_ptr = try fde_record.getLsdaPointer(cie_record, .{ + const lsda_ptr = fde_record.getLsdaPointer(cie_record, .{ .base_addr = sect.addr, .base_offset = eh_frame_offset, - }); + }) catch continue; // We already handled this error if (lsda_ptr) |ptr| { record.lsda = ptr - seg.vmaddr; } @@ -588,7 +588,7 @@ pub const Iterator = struct { var size = try reader.readIntLittle(u32); if (size == 0xFFFFFFFF) { - log.err("MachO doesn't support 64bit DWARF CFI __eh_frame records", .{}); + log.debug("MachO doesn't support 64bit DWARF CFI __eh_frame records", .{}); return error.BadDwarfCfi; } diff --git a/src/link/MachO/zld.zig b/src/link/MachO/zld.zig index 916b6b9478..2b98bc5ffb 100644 --- a/src/link/MachO/zld.zig +++ b/src/link/MachO/zld.zig @@ -388,7 +388,19 @@ pub fn linkWithZld( } for (macho_file.objects.items, 0..) |*object, object_id| { - try object.splitIntoAtoms(macho_file, @as(u32, @intCast(object_id))); + object.splitIntoAtoms(macho_file, @as(u32, @intCast(object_id))) catch |err| switch (err) { + error.MissingEhFrameSection => try macho_file.reportParseError( + object.name, + "missing section: '__TEXT,__eh_frame' is required but could not be found", + .{}, + ), + error.BadDwarfCfi => try macho_file.reportParseError( + object.name, + "invalid DWARF: failed to parse '__TEXT,__eh_frame' section", + .{}, + ), + else => |e| return e, + }; } if (gc_sections) { @@ -433,7 +445,7 @@ pub fn linkWithZld( try unwind_info.collect(macho_file); try eh_frame.calcSectionSize(macho_file, &unwind_info); - try unwind_info.calcSectionSize(macho_file); + unwind_info.calcSectionSize(macho_file); try pruneAndSortSections(macho_file); try createSegments(macho_file); From a7240f0c99426a546b05f7e8bb086805a2766ea9 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Wed, 30 Aug 2023 22:29:24 +0200 Subject: [PATCH 56/57] macho: remove error.UnhandledDwFormValue from link.File Eventually, we will validate DWARF info upfront and report errors to the user but this will require a rewrite of several parts of the linker so leaving as a TODO for the near future. --- src/link.zig | 1 - src/link/MachO.zig | 32 ++++++++++++++++++++++---------- src/link/MachO/DwarfInfo.zig | 3 ++- 3 files changed, 24 insertions(+), 12 deletions(-) diff --git a/src/link.zig b/src/link.zig index b57ab6fa7b..634f9679c4 100644 --- a/src/link.zig +++ b/src/link.zig @@ -751,7 +751,6 @@ pub const File = struct { UnexpectedRemainder, UnexpectedTable, UnexpectedValue, - UnhandledDwFormValue, UnknownFeature, Unseekable, UnsupportedCpuArchitecture, diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 10848da7f7..7cde05cf04 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -4101,6 +4101,9 @@ fn writeSymtab(self: *MachO) !SymtabCtx { }; } +// TODO this function currently skips generating symbol stabs in case errors are encountered in DWARF data. +// I think we should actually report those errors to the user and let them decide if they want to strip debug info +// in that case or not. fn generateSymbolStabs( self: *MachO, object: Object, @@ -4127,10 +4130,14 @@ fn generateSymbolStabs( }; var abbrev_it = compile_unit.getAbbrevEntryIterator(debug_info); - const cu_entry: DwarfInfo.AbbrevEntry = while (try abbrev_it.next(lookup)) |entry| switch (entry.tag) { - dwarf.TAG.compile_unit => break entry, - else => continue, - } else { + const maybe_cu_entry: ?DwarfInfo.AbbrevEntry = blk: { + while (abbrev_it.next(lookup) catch break :blk null) |entry| switch (entry.tag) { + dwarf.TAG.compile_unit => break :blk entry, + else => continue, + } else break :blk null; + }; + + const cu_entry = maybe_cu_entry orelse { log.debug("missing DWARF_TAG_compile_unit tag in {s}; skipping", .{object.name}); return; }; @@ -4139,11 +4146,13 @@ fn generateSymbolStabs( var maybe_tu_comp_dir: ?[]const u8 = null; var attr_it = cu_entry.getAttributeIterator(debug_info, compile_unit.cuh); - while (try attr_it.next()) |attr| switch (attr.name) { - dwarf.AT.comp_dir => maybe_tu_comp_dir = attr.getString(debug_info, compile_unit.cuh) orelse continue, - dwarf.AT.name => maybe_tu_name = attr.getString(debug_info, compile_unit.cuh) orelse continue, - else => continue, - }; + blk: { + while (attr_it.next() catch break :blk) |attr| switch (attr.name) { + dwarf.AT.comp_dir => maybe_tu_comp_dir = attr.getString(debug_info, compile_unit.cuh) orelse continue, + dwarf.AT.name => maybe_tu_name = attr.getString(debug_info, compile_unit.cuh) orelse continue, + else => continue, + }; + } if (maybe_tu_name == null or maybe_tu_comp_dir == null) { log.debug("missing DWARF_AT_comp_dir and DWARF_AT_name attributes {s}; skipping", .{object.name}); @@ -4183,7 +4192,10 @@ fn generateSymbolStabs( var name_lookup = DwarfInfo.SubprogramLookupByName.init(gpa); errdefer name_lookup.deinit(); try name_lookup.ensureUnusedCapacity(@as(u32, @intCast(object.atoms.items.len))); - try debug_info.genSubprogramLookupByName(compile_unit, lookup, &name_lookup); + debug_info.genSubprogramLookupByName(compile_unit, lookup, &name_lookup) catch |err| switch (err) { + error.UnhandledDwFormValue => {}, // TODO I don't like the fact we constantly re-iterate and hit this; we should validate once a priori + else => |e| return e, + }; break :blk name_lookup; } else null; defer if (name_lookup) |*nl| nl.deinit(); diff --git a/src/link/MachO/DwarfInfo.zig b/src/link/MachO/DwarfInfo.zig index 7b0536f60f..1f151a1dd5 100644 --- a/src/link/MachO/DwarfInfo.zig +++ b/src/link/MachO/DwarfInfo.zig @@ -444,7 +444,8 @@ fn findFormSize(self: DwarfInfo, form: u64, di_off: usize, cuh: CompileUnit.Head }, else => { - log.err("unhandled DW_FORM_* value with identifier {x}", .{form}); + // TODO figure out how to handle this + log.debug("unhandled DW_FORM_* value with identifier {x}", .{form}); return error.UnhandledDwFormValue; }, } From 19a1332ca140274d03e57d31fda7748a8a3641ba Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Wed, 30 Aug 2023 22:30:42 +0200 Subject: [PATCH 57/57] macho: improve formatting of invalid targets in libraries --- src/link/MachO.zig | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 7cde05cf04..ff5547cc19 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -4912,12 +4912,17 @@ pub fn handleAndReportParseError( error.InvalidTarget, error.InvalidTargetFatLibrary => { var targets_string = std.ArrayList(u8).init(self.base.allocator); defer targets_string.deinit(); - try targets_string.writer().writeAll("("); - for (ctx.detected_targets.items) |t| { - try targets_string.writer().print("{s}, ", .{t}); + + if (ctx.detected_targets.items.len > 1) { + try targets_string.writer().writeAll("("); + for (ctx.detected_targets.items) |t| { + try targets_string.writer().print("{s}, ", .{t}); + } + try targets_string.resize(targets_string.items.len - 2); + try targets_string.writer().writeAll(")"); + } else { + try targets_string.writer().writeAll(ctx.detected_targets.items[0]); } - try targets_string.resize(targets_string.items.len - 2); - try targets_string.writer().writeAll(")"); switch (err) { error.InvalidTarget => try self.reportParseError(