From 989639efba0a7098819c3eb85130cb50413cbf7c Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 3 Jul 2021 10:48:39 +0200 Subject: [PATCH] zld: coalesce symbols on creation --- src/link/MachO/Object.zig | 98 ++------ src/link/MachO/Symbol.zig | 481 +++++++++++--------------------------- src/link/MachO/Zld.zig | 413 +++++++++++++++++--------------- 3 files changed, 385 insertions(+), 607 deletions(-) diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index 1e169e93eb..c7150c2edc 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -45,9 +45,12 @@ dwarf_debug_str_index: ?u16 = null, dwarf_debug_line_index: ?u16 = null, dwarf_debug_ranges_index: ?u16 = null, +symtab: std.ArrayListUnmanaged(macho.nlist_64) = .{}, +strtab: std.ArrayListUnmanaged(u8) = .{}, + symbols: std.ArrayListUnmanaged(*Symbol) = .{}, stabs: std.ArrayListUnmanaged(*Symbol) = .{}, -initializers: std.ArrayListUnmanaged(*Symbol) = .{}, +initializers: std.ArrayListUnmanaged(u32) = .{}, data_in_code_entries: std.ArrayListUnmanaged(macho.data_in_code_entry) = .{}, pub const Section = struct { @@ -216,20 +219,13 @@ pub fn deinit(self: *Object) void { } self.sections.deinit(self.allocator); - for (self.symbols.items) |sym| { - sym.deinit(self.allocator); - self.allocator.destroy(sym); - } self.symbols.deinit(self.allocator); - - for (self.stabs.items) |stab| { - stab.deinit(self.allocator); - self.allocator.destroy(stab); - } self.stabs.deinit(self.allocator); self.data_in_code_entries.deinit(self.allocator); self.initializers.deinit(self.allocator); + self.symtab.deinit(self.allocator); + self.strtab.deinit(self.allocator); if (self.name) |n| { self.allocator.free(n); @@ -271,11 +267,10 @@ pub fn parse(self: *Object) !void { self.header = header; try self.readLoadCommands(reader); - try self.parseSymbols(); try self.parseSections(); + try self.parseSymtab(); try self.parseDataInCode(); try self.parseInitializers(); - try self.parseDebugInfo(); } pub fn readLoadCommands(self: *Object, reader: anytype) !void { @@ -394,14 +389,13 @@ pub fn parseInitializers(self: *Object) !void { const relocs = section.relocs orelse unreachable; try self.initializers.ensureCapacity(self.allocator, relocs.len); for (relocs) |rel| { - const sym = self.symbols.items[rel.target.symbol]; - self.initializers.appendAssumeCapacity(sym); + self.initializers.appendAssumeCapacity(rel.target.symbol); } - mem.reverse(*Symbol, self.initializers.items); + mem.reverse(u32, self.initializers.items); } -pub fn parseSymbols(self: *Object) !void { +fn parseSymtab(self: *Object) !void { const index = self.symtab_cmd_index orelse return; const symtab_cmd = self.load_commands.items[index].Symtab; @@ -409,59 +403,12 @@ pub fn parseSymbols(self: *Object) !void { defer self.allocator.free(symtab); _ = try self.file.?.preadAll(symtab, symtab_cmd.symoff); const slice = @alignCast(@alignOf(macho.nlist_64), mem.bytesAsSlice(macho.nlist_64, symtab)); + try self.symtab.appendSlice(self.allocator, slice); var strtab = try self.allocator.alloc(u8, symtab_cmd.strsize); defer self.allocator.free(strtab); _ = try self.file.?.preadAll(strtab, symtab_cmd.stroff); - - for (slice) |sym| { - const sym_name = mem.spanZ(@ptrCast([*:0]const u8, strtab.ptr + sym.n_strx)); - - if (Symbol.isStab(sym)) { - log.err("unhandled symbol type: stab {s} in {s}", .{ sym_name, self.name.? }); - return error.UnhandledSymbolType; - } - if (Symbol.isIndr(sym)) { - log.err("unhandled symbol type: indirect {s} in {s}", .{ sym_name, self.name.? }); - return error.UnhandledSymbolType; - } - if (Symbol.isAbs(sym)) { - log.err("unhandled symbol type: absolute {s} in {s}", .{ sym_name, self.name.? }); - return error.UnhandledSymbolType; - } - - const name = try self.allocator.dupe(u8, sym_name); - const symbol: *Symbol = symbol: { - if (Symbol.isSect(sym)) { - const linkage: Symbol.Regular.Linkage = linkage: { - if (!Symbol.isExt(sym)) break :linkage .translation_unit; - if (Symbol.isWeakDef(sym) or Symbol.isPext(sym)) break :linkage .linkage_unit; - break :linkage .global; - }; - break :symbol try Symbol.Regular.new(self.allocator, name, .{ - .linkage = linkage, - .address = sym.n_value, - .section = sym.n_sect - 1, - .weak_ref = Symbol.isWeakRef(sym), - .file = self, - }); - } - - if (sym.n_value != 0) { - break :symbol try Symbol.Tentative.new(self.allocator, name, .{ - .size = sym.n_value, - .alignment = (sym.n_desc >> 8) & 0x0f, - .file = self, - }); - } - - break :symbol try Symbol.Unresolved.new(self.allocator, name, .{ - .file = self, - }); - }; - - try self.symbols.append(self.allocator, symbol); - } + try self.strtab.appendSlice(self.allocator, strtab); } pub fn parseDebugInfo(self: *Object) !void { @@ -555,14 +502,6 @@ pub fn parseDebugInfo(self: *Object) !void { self.stabs.appendAssumeCapacity(delim_stab); } -fn readSection(self: Object, allocator: *Allocator, index: u16) ![]u8 { - const seg = self.load_commands.items[self.segment_cmd_index.?].Segment; - const sect = seg.sections.items[index]; - var buffer = try allocator.alloc(u8, @intCast(usize, sect.size)); - _ = try self.file.?.preadAll(buffer, sect.offset); - return buffer; -} - pub fn parseDataInCode(self: *Object) !void { const index = self.data_in_code_cmd_index orelse return; const data_in_code = self.load_commands.items[index].LinkeditData; @@ -582,3 +521,16 @@ pub fn parseDataInCode(self: *Object) !void { try self.data_in_code_entries.append(self.allocator, dice); } } + +fn readSection(self: Object, allocator: *Allocator, index: u16) ![]u8 { + const seg = self.load_commands.items[self.segment_cmd_index.?].Segment; + const sect = seg.sections.items[index]; + var buffer = try allocator.alloc(u8, @intCast(usize, sect.size)); + _ = try self.file.?.preadAll(buffer, sect.offset); + return buffer; +} + +pub fn getString(self: Object, off: u32) []const u8 { + assert(off < self.strtab.items.len); + return mem.spanZ(@ptrCast([*:0]const u8, self.strtab.items.ptr + off)); +} diff --git a/src/link/MachO/Symbol.zig b/src/link/MachO/Symbol.zig index 023e2ed7a8..5a0bfe9762 100644 --- a/src/link/MachO/Symbol.zig +++ b/src/link/MachO/Symbol.zig @@ -1,6 +1,7 @@ const Symbol = @This(); const std = @import("std"); +const assert = std.debug.assert; const macho = std.macho; const mem = std.mem; @@ -9,177 +10,32 @@ const Dylib = @import("Dylib.zig"); const Object = @import("Object.zig"); const StringTable = @import("StringTable.zig"); -pub const Type = enum { - stab, - regular, - proxy, - unresolved, - tentative, -}; - -/// Symbol type. -@"type": Type, - /// Symbol name. Owned slice. name: []const u8, -/// Alias of. -alias: ?*Symbol = null, - /// Index in GOT table for indirection. got_index: ?u32 = null, /// Index in stubs table for late binding. stubs_index: ?u32 = null, -pub const Stab = struct { - base: Symbol, +payload: union(enum) { + regular: Regular, + tentative: Tentative, + proxy: Proxy, + undef: Undefined, - // Symbol kind: function, etc. - kind: Kind, - - // Size of stab. - size: u64, - - // Base regular symbol for this stub if defined. - symbol: ?*Symbol = null, - - // null means self-reference. - file: ?*Object = null, - - pub const base_type: Symbol.Type = .stab; - - pub const Kind = enum { - so, - oso, - function, - global, - static, - }; - - const Opts = struct { - kind: Kind = .so, - size: u64 = 0, - symbol: ?*Symbol = null, - file: ?*Object = null, - }; - - pub fn new(allocator: *Allocator, name: []const u8, opts: Opts) !*Symbol { - const stab = try allocator.create(Stab); - errdefer allocator.destroy(stab); - - stab.* = .{ - .base = .{ - .@"type" = .stab, - .name = try allocator.dupe(u8, name), - }, - .kind = opts.kind, - .size = opts.size, - .symbol = opts.symbol, - .file = opts.file, + pub fn format(self: @This(), comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + return switch (self) { + .regular => |p| p.format(fmt, options, writer), + .tentative => |p| p.format(fmt, options, writer), + .proxy => |p| p.format(fmt, options, writer), + .undef => |p| p.format(fmt, options, writer), }; - - return &stab.base; } - - pub fn asNlists(stab: *Stab, allocator: *Allocator, strtab: *StringTable) ![]macho.nlist_64 { - var out = std.ArrayList(macho.nlist_64).init(allocator); - defer out.deinit(); - if (stab.kind == .so) { - try out.append(.{ - .n_strx = try strtab.getOrPut(stab.base.name), - .n_type = macho.N_SO, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }); - } else if (stab.kind == .oso) { - const mtime = mtime: { - const object = stab.file orelse break :mtime 0; - break :mtime object.mtime orelse 0; - }; - try out.append(.{ - .n_strx = try strtab.getOrPut(stab.base.name), - .n_type = macho.N_OSO, - .n_sect = 0, - .n_desc = 1, - .n_value = mtime, - }); - } else outer: { - const symbol = stab.symbol orelse unreachable; - const regular = symbol.getTopmostAlias().cast(Regular) orelse unreachable; - const is_match = blk: { - if (regular.file == null and stab.file == null) break :blk true; - if (regular.file) |f1| { - if (stab.file) |f2| { - if (f1 == f2) break :blk true; - } - } - break :blk false; - }; - if (!is_match) break :outer; - - switch (stab.kind) { - .function => { - try out.ensureUnusedCapacity(4); - out.appendAssumeCapacity(.{ - .n_strx = 0, - .n_type = macho.N_BNSYM, - .n_sect = regular.section, - .n_desc = 0, - .n_value = regular.address, - }); - out.appendAssumeCapacity(.{ - .n_strx = try strtab.getOrPut(stab.base.name), - .n_type = macho.N_FUN, - .n_sect = regular.section, - .n_desc = 0, - .n_value = regular.address, - }); - out.appendAssumeCapacity(.{ - .n_strx = 0, - .n_type = macho.N_FUN, - .n_sect = 0, - .n_desc = 0, - .n_value = stab.size, - }); - out.appendAssumeCapacity(.{ - .n_strx = 0, - .n_type = macho.N_ENSYM, - .n_sect = regular.section, - .n_desc = 0, - .n_value = stab.size, - }); - }, - .global => { - try out.append(.{ - .n_strx = try strtab.getOrPut(stab.base.name), - .n_type = macho.N_GSYM, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }); - }, - .static => { - try out.append(.{ - .n_strx = try strtab.getOrPut(stab.base.name), - .n_type = macho.N_STSYM, - .n_sect = regular.section, - .n_desc = 0, - .n_value = regular.address, - }); - }, - .so, .oso => unreachable, - } - } - - return out.toOwnedSlice(); - } -}; +}, pub const Regular = struct { - base: Symbol, - /// Linkage type. linkage: Linkage, @@ -196,77 +52,56 @@ pub const Regular = struct { /// null means self-reference. file: ?*Object = null, - /// True if symbol was already committed into the final - /// symbol table. - visited: bool = false, - - pub const base_type: Symbol.Type = .regular; - pub const Linkage = enum { translation_unit, linkage_unit, global, }; - const Opts = struct { - linkage: Linkage = .translation_unit, - address: u64 = 0, - section: u8 = 0, - weak_ref: bool = false, - file: ?*Object = null, - }; - - pub fn new(allocator: *Allocator, name: []const u8, opts: Opts) !*Symbol { - const reg = try allocator.create(Regular); - errdefer allocator.destroy(reg); - - reg.* = .{ - .base = .{ - .@"type" = .regular, - .name = try allocator.dupe(u8, name), - }, - .linkage = opts.linkage, - .address = opts.address, - .section = opts.section, - .weak_ref = opts.weak_ref, - .file = opts.file, - }; - - return ®.base; - } - - pub fn asNlist(regular: *Regular, strtab: *StringTable) !macho.nlist_64 { - const n_strx = try strtab.getOrPut(regular.base.name); - var nlist = macho.nlist_64{ - .n_strx = n_strx, - .n_type = macho.N_SECT, - .n_sect = regular.section, - .n_desc = 0, - .n_value = regular.address, - }; - - if (regular.linkage != .translation_unit) { - nlist.n_type |= macho.N_EXT; - } - if (regular.linkage == .linkage_unit) { - nlist.n_type |= macho.N_PEXT; - nlist.n_desc |= macho.N_WEAK_DEF; - } - - return nlist; - } - - pub fn isTemp(regular: *Regular) bool { + pub fn isTemp(regular: Regular) bool { if (regular.linkage == .translation_unit) { return mem.startsWith(u8, regular.base.name, "l") or mem.startsWith(u8, regular.base.name, "L"); } return false; } + + pub fn format(self: Regular, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + try std.fmt.format(writer, "Regular {{ ", .{}); + try std.fmt.format(writer, ".linkage = {s}, ", .{self.linkage}); + try std.fmt.format(writer, ".address = 0x{x}, ", .{self.address}); + try std.fmt.format(writer, ".section = {}, ", .{self.section}); + if (self.weak_ref) { + try std.fmt.format(writer, ".weak_ref, ", .{}); + } + if (self.file) |file| { + try std.fmt.format(writer, ".file = {s}, ", .{file.name.?}); + } + try std.fmt.format(writer, "}}", .{}); + } +}; + +pub const Tentative = struct { + /// Symbol size. + size: u64, + + /// Symbol alignment as power of two. + alignment: u16, + + /// File where this symbol was referenced. + file: ?*Object = null, + + pub fn format(self: Tentative, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + try std.fmt.format(writer, "Tentative {{ ", .{}); + try std.fmt.format(writer, ".size = 0x{x}, ", .{self.size}); + try std.fmt.format(writer, ".alignment = 0x{x}, ", .{self.alignment}); + if (self.file) |file| { + try std.fmt.format(writer, ".file = {s}, ", .{file.name.?}); + } + try std.fmt.format(writer, "}}", .{}); + } }; pub const Proxy = struct { - base: Symbol, - /// Dynamic binding info - spots within the final /// executable where this proxy is referenced from. bind_info: std.ArrayListUnmanaged(struct { @@ -278,161 +113,123 @@ pub const Proxy = struct { /// null means self-reference. file: ?*Dylib = null, - pub const base_type: Symbol.Type = .proxy; - - const Opts = struct { - file: ?*Dylib = null, - }; - - pub fn new(allocator: *Allocator, name: []const u8, opts: Opts) !*Symbol { - const proxy = try allocator.create(Proxy); - errdefer allocator.destroy(proxy); - - proxy.* = .{ - .base = .{ - .@"type" = .proxy, - .name = try allocator.dupe(u8, name), - }, - .file = opts.file, - }; - - return &proxy.base; - } - - pub fn asNlist(proxy: *Proxy, strtab: *StringTable) !macho.nlist_64 { - const n_strx = try strtab.getOrPut(proxy.base.name); - return macho.nlist_64{ - .n_strx = n_strx, - .n_type = macho.N_UNDF | macho.N_EXT, - .n_sect = 0, - .n_desc = (proxy.dylibOrdinal() * macho.N_SYMBOL_RESOLVER) | macho.REFERENCE_FLAG_UNDEFINED_NON_LAZY, - .n_value = 0, - }; - } - pub fn deinit(proxy: *Proxy, allocator: *Allocator) void { proxy.bind_info.deinit(allocator); } - pub fn dylibOrdinal(proxy: *Proxy) u16 { + pub fn dylibOrdinal(proxy: Proxy) u16 { const dylib = proxy.file orelse return 0; return dylib.ordinal.?; } + + pub fn format(self: Proxy, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + try std.fmt.format(writer, "Proxy {{ ", .{}); + if (self.bind_info.items.len > 0) { + // TODO + try std.fmt.format(writer, ".bind_info = {}, ", .{self.bind_info.items.len}); + } + if (self.file) |file| { + try std.fmt.format(writer, ".file = {s}, ", .{file.name.?}); + } + try std.fmt.format(writer, "}}", .{}); + } }; -pub const Unresolved = struct { - base: Symbol, - +pub const Undefined = struct { /// File where this symbol was referenced. /// null means synthetic, e.g., dyld_stub_binder. file: ?*Object = null, - pub const base_type: Symbol.Type = .unresolved; - - const Opts = struct { - file: ?*Object = null, - }; - - pub fn new(allocator: *Allocator, name: []const u8, opts: Opts) !*Symbol { - const undef = try allocator.create(Unresolved); - errdefer allocator.destroy(undef); - - undef.* = .{ - .base = .{ - .@"type" = .unresolved, - .name = try allocator.dupe(u8, name), - }, - .file = opts.file, - }; - - return &undef.base; - } - - pub fn asNlist(undef: *Unresolved, strtab: *StringTable) !macho.nlist_64 { - const n_strx = try strtab.getOrPut(undef.base.name); - return macho.nlist_64{ - .n_strx = n_strx, - .n_type = macho.N_UNDF, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }; + pub fn format(self: Undefined, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + try std.fmt.format(writer, "Undefined {{ ", .{}); + if (self.file) |file| { + try std.fmt.format(writer, ".file = {s}, ", .{file.name.?}); + } + try std.fmt.format(writer, "}}", .{}); } }; -pub const Tentative = struct { - base: Symbol, +/// Create new undefined symbol. +pub fn new(allocator: *Allocator, name: []const u8) !*Symbol { + const new_sym = try allocator.create(Symbol); + errdefer allocator.destroy(new_sym); - /// Symbol size. - size: u64, - - /// Symbol alignment as power of two. - alignment: u16, - - /// File where this symbol was referenced. - file: ?*Object = null, - - pub const base_type: Symbol.Type = .tentative; - - const Opts = struct { - size: u64 = 0, - alignment: u16 = 0, - file: ?*Object = null, + new_sym.* = .{ + .name = try allocator.dupe(u8, name), + .payload = .{ + .undef = .{}, + }, }; - pub fn new(allocator: *Allocator, name: []const u8, opts: Opts) !*Symbol { - const tent = try allocator.create(Tentative); - errdefer allocator.destroy(tent); + return new_sym; +} - tent.* = .{ - .base = .{ - .@"type" = .tentative, - .name = try allocator.dupe(u8, name), +pub fn asNlist(symbol: *Symbol, strtab: *StringTable) macho.nlist_64 { + const n_strx = try strtab.getOrPut(symbol.name); + const nlist = nlist: { + switch (symbol.payload) { + .regular => |regular| { + var nlist = macho.nlist_64{ + .n_strx = n_strx, + .n_type = macho.N_SECT, + .n_sect = regular.section, + .n_desc = 0, + .n_value = regular.address, + }; + + if (regular.linkage != .translation_unit) { + nlist.n_type |= macho.N_EXT; + } + if (regular.linkage == .linkage_unit) { + nlist.n_type |= macho.N_PEXT; + nlist.n_desc |= macho.N_WEAK_DEF; + } + + break :nlist nlist; }, - .size = opts.size, - .alignment = opts.alignment, - .file = opts.file, - }; + .tentative => |tentative| { + // TODO + break :nlist macho.nlist_64{ + .n_strx = n_strx, + .n_type = macho.N_UNDF, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }; + }, + .proxy => |proxy| { + break :nlist macho.nlist_64{ + .n_strx = n_strx, + .n_type = macho.N_UNDF | macho.N_EXT, + .n_sect = 0, + .n_desc = (proxy.dylibOrdinal() * macho.N_SYMBOL_RESOLVER) | macho.REFERENCE_FLAG_UNDEFINED_NON_LAZY, + .n_value = 0, + }; + }, + .undef => |undef| { + // TODO + break :nlist macho.nlist_64{ + .n_strx = n_strx, + .n_type = macho.N_UNDF, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }; + }, + } + }; + return nlist; +} - return &tent.base; - } +pub fn deinit(symbol: *Symbol, allocator: *Allocator) void { + allocator.free(symbol.name); - pub fn asNlist(tent: *Tentative, strtab: *StringTable) !macho.nlist_64 { - // TODO - const n_strx = try strtab.getOrPut(tent.base.name); - return macho.nlist_64{ - .n_strx = n_strx, - .n_type = macho.N_UNDF, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }; - } -}; - -pub fn deinit(base: *Symbol, allocator: *Allocator) void { - allocator.free(base.name); - - switch (base.@"type") { - .proxy => @fieldParentPtr(Proxy, "base", base).deinit(allocator), + switch (symbol.payload) { + .proxy => |*proxy| proxy.deinit(allocator), else => {}, } } -pub fn cast(base: *Symbol, comptime T: type) ?*T { - if (base.@"type" != T.base_type) { - return null; - } - return @fieldParentPtr(T, "base", base); -} - -pub fn getTopmostAlias(base: *Symbol) *Symbol { - if (base.alias) |alias| { - return alias.getTopmostAlias(); - } - return base; -} - pub fn isStab(sym: macho.nlist_64) bool { return (macho.N_STAB & sym.n_type) != 0; } diff --git a/src/link/MachO/Zld.zig b/src/link/MachO/Zld.zig index b0677f2604..3eeaa3f181 100644 --- a/src/link/MachO/Zld.zig +++ b/src/link/MachO/Zld.zig @@ -102,10 +102,9 @@ objc_selrefs_section_index: ?u16 = null, objc_classrefs_section_index: ?u16 = null, objc_data_section_index: ?u16 = null, +locals: std.ArrayListUnmanaged(*Symbol) = .{}, globals: std.StringArrayHashMapUnmanaged(*Symbol) = .{}, imports: std.StringArrayHashMapUnmanaged(*Symbol) = .{}, -unresolved: std.StringArrayHashMapUnmanaged(*Symbol) = .{}, -tentatives: std.StringArrayHashMapUnmanaged(*Symbol) = .{}, /// Offset into __DATA,__common section. /// Set if the linker found tentative definitions in any of the objects. @@ -173,15 +172,24 @@ pub fn deinit(self: *Zld) void { } self.dylibs.deinit(self.allocator); - for (self.imports.values()) |proxy| { - proxy.deinit(self.allocator); - self.allocator.destroy(proxy); + for (self.imports.values()) |sym| { + sym.deinit(self.allocator); + self.allocator.destroy(sym); } self.imports.deinit(self.allocator); - self.tentatives.deinit(self.allocator); + for (self.globals.values()) |sym| { + sym.deinit(self.allocator); + self.allocator.destroy(sym); + } self.globals.deinit(self.allocator); - self.unresolved.deinit(self.allocator); + + for (self.locals.items) |sym| { + sym.deinit(self.allocator); + self.allocator.destroy(sym); + } + self.locals.deinit(self.allocator); + self.strtab.deinit(); } @@ -221,20 +229,21 @@ pub fn link(self: *Zld, files: []const []const u8, output: Output, args: LinkArg try self.parseInputFiles(files, args.syslibroot); try self.parseLibs(args.libs, args.syslibroot); try self.resolveSymbols(); - try self.resolveStubsAndGotEntries(); - try self.updateMetadata(); - try self.sortSections(); - try self.addRpaths(args.rpaths); - try self.addDataInCodeLC(); - try self.addCodeSignatureLC(); - try self.allocateTextSegment(); - try self.allocateDataConstSegment(); - try self.allocateDataSegment(); - self.allocateLinkeditSegment(); - try self.allocateSymbols(); - try self.allocateTentativeSymbols(); - try self.allocateProxyBindAddresses(); - try self.flush(); + return error.TODO; + // try self.resolveStubsAndGotEntries(); + // try self.updateMetadata(); + // try self.sortSections(); + // try self.addRpaths(args.rpaths); + // try self.addDataInCodeLC(); + // try self.addCodeSignatureLC(); + // try self.allocateTextSegment(); + // try self.allocateDataConstSegment(); + // try self.allocateDataSegment(); + // self.allocateLinkeditSegment(); + // try self.allocateSymbols(); + // try self.allocateTentativeSymbols(); + // try self.allocateProxyBindAddresses(); + // try self.flush(); } fn parseInputFiles(self: *Zld, files: []const []const u8, syslibroot: ?[]const u8) !void { @@ -1458,92 +1467,100 @@ fn writeStubInStubHelper(self: *Zld, index: u32) !void { fn resolveSymbolsInObject(self: *Zld, object: *Object) !void { log.debug("resolving symbols in '{s}'", .{object.name}); - for (object.symbols.items) |sym| { - if (sym.cast(Symbol.Regular)) |reg| { - if (reg.linkage == .translation_unit) continue; // Symbol local to TU. + for (object.symtab.items) |sym| { + const sym_name = object.getString(sym.n_strx); - if (self.tentatives.fetchSwapRemove(sym.name)) |kv| { - // Create link to the global. - kv.value.alias = sym; - } - if (self.unresolved.fetchSwapRemove(sym.name)) |kv| { - // Create link to the global. - kv.value.alias = sym; - } - const sym_ptr = self.globals.getPtr(sym.name) orelse { - // Put new global symbol into the symbol table. - try self.globals.putNoClobber(self.allocator, sym.name, sym); - continue; + if (Symbol.isStab(sym)) { + log.err("unhandled symbol type: stab {s}", .{sym_name}); + log.err(" | first definition in {s}", .{object.name.?}); + return error.UnhandledSymbolType; + } + + if (Symbol.isIndr(sym)) { + log.err("unhandled symbol type: indirect {s}", .{sym_name}); + log.err(" | first definition in {s}", .{object.name.?}); + return error.UnhandledSymbolType; + } + + if (Symbol.isAbs(sym)) { + log.err("unhandled symbol type: absolute {s}", .{sym_name}); + log.err(" | first definition in {s}", .{object.name.?}); + return error.UnhandledSymbolType; + } + + if (Symbol.isSect(sym) and !Symbol.isExt(sym)) { + // Regular symbol local to translation unit + const symbol = try Symbol.new(self.allocator, sym_name); + symbol.payload = .{ + .regular = .{ + .linkage = .translation_unit, + .address = sym.n_value, + .section = sym.n_sect - 1, + .weak_ref = Symbol.isWeakRef(sym), + .file = object, + }, }; - const g_sym = sym_ptr.*; - const g_reg = g_sym.cast(Symbol.Regular) orelse unreachable; + try self.locals.append(self.allocator, symbol); + try object.symbols.append(self.allocator, symbol); + continue; + } - switch (g_reg.linkage) { - .translation_unit => unreachable, - .linkage_unit => { - if (reg.linkage == .linkage_unit) { - // Create link to the first encountered linkage_unit symbol. - sym.alias = g_sym; - continue; - } - }, - .global => { - if (reg.linkage == .global) { - log.debug("symbol '{s}' defined multiple times", .{reg.base.name}); - return error.MultipleSymbolDefinitions; - } - sym.alias = g_sym; - continue; - }, + const symbol = self.globals.get(sym_name) orelse symbol: { + // Insert new global symbol. + const symbol = try Symbol.new(self.allocator, sym_name); + symbol.payload.undef.file = object; + try self.globals.putNoClobber(self.allocator, symbol.name, symbol); + break :symbol symbol; + }; + + if (Symbol.isSect(sym)) { + // Global symbol + const linkage: Symbol.Regular.Linkage = if (Symbol.isWeakDef(sym) or Symbol.isPext(sym)) + .linkage_unit + else + .global; + + const should_update = if (symbol.payload == .regular) blk: { + if (symbol.payload.regular.linkage == .global and linkage == .global) { + log.err("symbol '{s}' defined multiple times", .{sym_name}); + log.err(" | first definition in {s}", .{symbol.payload.regular.file.?.name.?}); + log.err(" | next definition in {s}", .{object.name.?}); + return error.MultipleSymbolDefinitions; + } + break :blk symbol.payload.regular.linkage != .global; + } else true; + + if (should_update) { + symbol.payload = .{ + .regular = .{ + .linkage = linkage, + .address = sym.n_value, + .section = sym.n_sect - 1, + .weak_ref = Symbol.isWeakRef(sym), + .file = object, + }, + }; } - - g_sym.alias = sym; - sym_ptr.* = sym; - } else if (sym.cast(Symbol.Tentative)) |tent| { - if (self.globals.get(sym.name)) |g_sym| { - sym.alias = g_sym; - continue; - } - - if (self.unresolved.fetchSwapRemove(sym.name)) |kv| { - kv.value.alias = sym; - } - - const sym_ptr = self.tentatives.getPtr(sym.name) orelse { - // Put new tentative definition symbol into symbol table. - try self.tentatives.putNoClobber(self.allocator, sym.name, sym); - continue; + } else if (sym.n_value != 0) { + // Tentative definition + const should_update = switch (symbol.payload) { + .tentative => |tent| tent.size < sym.n_value, + .undef => true, + else => false, }; - // Compare by size and pick the largest tentative definition. - // We model this like a heap where the tentative definition with the - // largest size always washes up on top. - const t_sym = sym_ptr.*; - const t_tent = t_sym.cast(Symbol.Tentative) orelse unreachable; + if (should_update) { + symbol.payload = .{ + .tentative = .{ + .size = sym.n_value, + .alignment = (sym.n_desc >> 8) & 0x0f, + .file = object, + }, + }; + } + } - if (tent.size < t_tent.size) { - sym.alias = t_sym; - continue; - } - - t_sym.alias = sym; - sym_ptr.* = sym; - } else if (sym.cast(Symbol.Unresolved)) |_| { - if (self.globals.get(sym.name)) |g_sym| { - sym.alias = g_sym; - continue; - } - if (self.tentatives.get(sym.name)) |t_sym| { - sym.alias = t_sym; - continue; - } - if (self.unresolved.get(sym.name)) |u_sym| { - sym.alias = u_sym; - continue; - } - - try self.unresolved.putNoClobber(self.allocator, sym.name, sym); - } else unreachable; + try object.symbols.append(self.allocator, symbol); } } @@ -1553,111 +1570,123 @@ fn resolveSymbols(self: *Zld) !void { try self.resolveSymbolsInObject(object); } - // Second pass, resolve symbols in static libraries. - var next_sym: usize = 0; - while (true) { - if (next_sym == self.unresolved.count()) break; + log.warn("globals", .{}); + for (self.globals.values()) |value| { + log.warn(" | {s}: {}", .{ value.name, value.payload }); + } - const sym = self.unresolved.values()[next_sym]; - - var reset: bool = false; - for (self.archives.items) |archive| { - // Check if the entry exists in a static archive. - const offsets = archive.toc.get(sym.name) orelse { - // No hit. - continue; - }; - assert(offsets.items.len > 0); - - const object = try archive.parseObject(offsets.items[0]); - try self.objects.append(self.allocator, object); - try self.resolveSymbolsInObject(object); - - reset = true; - break; - } - - if (reset) { - next_sym = 0; - } else { - next_sym += 1; + for (self.objects.items) |object| { + log.warn("object {s}", .{object.name.?}); + for (object.symbols.items) |sym| { + log.warn(" | {s}: {}", .{ sym.name, sym.payload }); } } - // Third pass, resolve symbols in dynamic libraries. - var unresolved = std.ArrayList(*Symbol).init(self.allocator); - defer unresolved.deinit(); + // // Second pass, resolve symbols in static libraries. + // var next_sym: usize = 0; + // while (true) { + // if (next_sym == self.unresolved.count()) break; - try unresolved.ensureCapacity(self.unresolved.count()); - for (self.unresolved.values()) |value| { - unresolved.appendAssumeCapacity(value); - } - self.unresolved.clearRetainingCapacity(); + // const sym = self.unresolved.values()[next_sym]; - // Put dyld_stub_binder as an unresolved special symbol. - { - const name = try self.allocator.dupe(u8, "dyld_stub_binder"); - errdefer self.allocator.free(name); - const undef = try Symbol.Unresolved.new(self.allocator, name, .{}); - try unresolved.append(undef); - } + // var reset: bool = false; + // for (self.archives.items) |archive| { + // // Check if the entry exists in a static archive. + // const offsets = archive.toc.get(sym.name) orelse { + // // No hit. + // continue; + // }; + // assert(offsets.items.len > 0); - var referenced = std.AutoHashMap(*Dylib, void).init(self.allocator); - defer referenced.deinit(); + // const object = try archive.parseObject(offsets.items[0]); + // try self.objects.append(self.allocator, object); + // try self.resolveSymbolsInObject(object); - loop: while (unresolved.popOrNull()) |undef| { - const proxy = self.imports.get(undef.name) orelse outer: { - const proxy = inner: { - for (self.dylibs.items) |dylib| { - const proxy = (try dylib.createProxy(undef.name)) orelse continue; - try referenced.put(dylib, {}); - break :inner proxy; - } - if (mem.eql(u8, undef.name, "___dso_handle")) { - // TODO this is just a temp patch until I work out what to actually - // do with ___dso_handle and __mh_execute_header symbols which are - // synthetically created by the linker on macOS. - break :inner try Symbol.Proxy.new(self.allocator, undef.name, .{}); - } + // reset = true; + // break; + // } - self.unresolved.putAssumeCapacityNoClobber(undef.name, undef); - continue :loop; - }; + // if (reset) { + // next_sym = 0; + // } else { + // next_sym += 1; + // } + // } - try self.imports.putNoClobber(self.allocator, proxy.name, proxy); - break :outer proxy; - }; - undef.alias = proxy; - } + // // Third pass, resolve symbols in dynamic libraries. + // var unresolved = std.ArrayList(*Symbol).init(self.allocator); + // defer unresolved.deinit(); - // Add LC_LOAD_DYLIB load command for each referenced dylib/stub. - var it = referenced.iterator(); - while (it.next()) |entry| { - const dylib = entry.key_ptr.*; - dylib.ordinal = self.next_dylib_ordinal; - const dylib_id = dylib.id orelse unreachable; - var dylib_cmd = try createLoadDylibCommand( - self.allocator, - dylib_id.name, - dylib_id.timestamp, - dylib_id.current_version, - dylib_id.compatibility_version, - ); - errdefer dylib_cmd.deinit(self.allocator); - try self.load_commands.append(self.allocator, .{ .Dylib = dylib_cmd }); - self.next_dylib_ordinal += 1; - } + // try unresolved.ensureCapacity(self.unresolved.count()); + // for (self.unresolved.values()) |value| { + // unresolved.appendAssumeCapacity(value); + // } + // self.unresolved.clearRetainingCapacity(); - if (self.unresolved.count() > 0) { - for (self.unresolved.values()) |undef| { - log.err("undefined reference to symbol '{s}'", .{undef.name}); - if (undef.cast(Symbol.Unresolved).?.file) |file| { - log.err(" | referenced in {s}", .{file.name.?}); - } - } + // // Put dyld_stub_binder as an unresolved special symbol. + // { + // const name = try self.allocator.dupe(u8, "dyld_stub_binder"); + // errdefer self.allocator.free(name); + // const undef = try Symbol.Unresolved.new(self.allocator, name, .{}); + // try unresolved.append(undef); + // } - return error.UndefinedSymbolReference; - } + // var referenced = std.AutoHashMap(*Dylib, void).init(self.allocator); + // defer referenced.deinit(); + + // loop: while (unresolved.popOrNull()) |undef| { + // const proxy = self.imports.get(undef.name) orelse outer: { + // const proxy = inner: { + // for (self.dylibs.items) |dylib| { + // const proxy = (try dylib.createProxy(undef.name)) orelse continue; + // try referenced.put(dylib, {}); + // break :inner proxy; + // } + // if (mem.eql(u8, undef.name, "___dso_handle")) { + // // TODO this is just a temp patch until I work out what to actually + // // do with ___dso_handle and __mh_execute_header symbols which are + // // synthetically created by the linker on macOS. + // break :inner try Symbol.Proxy.new(self.allocator, undef.name, .{}); + // } + + // self.unresolved.putAssumeCapacityNoClobber(undef.name, undef); + // continue :loop; + // }; + + // try self.imports.putNoClobber(self.allocator, proxy.name, proxy); + // break :outer proxy; + // }; + // undef.alias = proxy; + // } + + // // Add LC_LOAD_DYLIB load command for each referenced dylib/stub. + // var it = referenced.iterator(); + // while (it.next()) |entry| { + // const dylib = entry.key_ptr.*; + // dylib.ordinal = self.next_dylib_ordinal; + // const dylib_id = dylib.id orelse unreachable; + // var dylib_cmd = try createLoadDylibCommand( + // self.allocator, + // dylib_id.name, + // dylib_id.timestamp, + // dylib_id.current_version, + // dylib_id.compatibility_version, + // ); + // errdefer dylib_cmd.deinit(self.allocator); + // try self.load_commands.append(self.allocator, .{ .Dylib = dylib_cmd }); + // self.next_dylib_ordinal += 1; + // } + + // if (self.unresolved.count() > 0) { + // for (self.unresolved.values()) |undef| { + // log.err("undefined reference to symbol '{s}'", .{undef.name}); + // if (undef.cast(Symbol.Unresolved).?.file) |file| { + // log.err(" | referenced in {s}", .{file.name.?}); + // } + // } + + // return error.UndefinedSymbolReference; + // } } fn resolveStubsAndGotEntries(self: *Zld) !void {