From 1a6d12ea92eb9033aed686c88b53ff406ec2fbe7 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Tue, 12 Sep 2023 23:27:11 +0200 Subject: [PATCH] elf: clean up and unify symbol ref handling in relocs Also, this lets us re-enable proper undefined symbols tracking. --- src/link/Elf.zig | 170 +++++++++++++++++-------------------- src/link/Elf/Atom.zig | 43 +++++++++- src/link/Elf/Object.zig | 4 +- src/link/Elf/Symbol.zig | 9 +- src/link/Elf/ZigModule.zig | 25 +++++- src/link/Elf/file.zig | 2 +- 6 files changed, 146 insertions(+), 107 deletions(-) diff --git a/src/link/Elf.zig b/src/link/Elf.zig index 51f909cb50..48209ec3d9 100644 --- a/src/link/Elf.zig +++ b/src/link/Elf.zig @@ -6,6 +6,9 @@ ptr_width: PtrWidth, /// If this is not null, an object file is created by LLVM and linked with LLD afterwards. llvm_object: ?*LlvmObject = null, +/// A list of all input files. +/// Index of each input file also encodes the priority or precedence of one input file +/// over another. files: std.MultiArrayList(File.Entry) = .{}, zig_module_index: ?File.Index = null, linker_defined_index: ?File.Index = null, @@ -47,6 +50,7 @@ shstrtab: StringTable(.strtab) = .{}, /// .strtab buffer strtab: StringTable(.strtab) = .{}, +/// Representation of the GOT table as committed to the file. got: GotSection = .{}, text_section_index: ?u16 = null, @@ -86,10 +90,10 @@ rela_iplt_start_index: ?Symbol.Index = null, rela_iplt_end_index: ?Symbol.Index = null, start_stop_indexes: std.ArrayListUnmanaged(u32) = .{}, +/// An array of symbols parsed across all input files. symbols: std.ArrayListUnmanaged(Symbol) = .{}, symbols_extra: std.ArrayListUnmanaged(u32) = .{}, resolver: std.AutoArrayHashMapUnmanaged(u32, Symbol.Index) = .{}, -unresolved: std.AutoArrayHashMapUnmanaged(Symbol.Index, void) = .{}, symbols_free_list: std.ArrayListUnmanaged(Symbol.Index) = .{}, phdr_table_dirty: bool = false, @@ -271,7 +275,6 @@ pub fn deinit(self: *Elf) void { self.symbols_free_list.deinit(gpa); self.got.deinit(gpa); self.resolver.deinit(gpa); - self.unresolved.deinit(gpa); self.start_stop_indexes.deinit(gpa); { @@ -316,7 +319,7 @@ pub fn getDeclVAddr(self: *Elf, decl_index: Module.Decl.Index, reloc_info: link. const parent_atom = self.symbol(reloc_info.parent_atom_index).atom(self).?; try parent_atom.addReloc(self, .{ .r_offset = reloc_info.offset, - .r_info = (@as(u64, @intCast(this_sym_index)) << 32) | elf.R_X86_64_64, + .r_info = (@as(u64, @intCast(this_sym.esym_index)) << 32) | elf.R_X86_64_64, .r_addend = reloc_info.addend, }); @@ -997,12 +1000,16 @@ pub fn flushModule(self: *Elf, comp: *Compilation, prog_node: *std.Progress.Node }; _ = compiler_rt_path; - // Parse input files + // Here we will parse input positional and library files (if referenced). + // This will roughly match in any linker backend we support. var positionals = std.ArrayList(Compilation.LinkObject).init(gpa); defer positionals.deinit(); try positionals.ensureUnusedCapacity(self.base.options.objects.len); positionals.appendSliceAssumeCapacity(self.base.options.objects); + // This is a set of object files emitted by clang in a single `build-exe` invocation. + // For instance, the implicit `a.o` as compiled by `zig build-exe a.c` will end up + // in this set. for (comp.c_object_table.keys()) |key| { try positionals.append(.{ .path = key.status.success.object_path }); } @@ -1016,6 +1023,7 @@ pub fn flushModule(self: *Elf, comp: *Compilation, prog_node: *std.Progress.Node try self.handleAndReportParseError(obj.path, err, &parse_ctx); } + // Handle any lazy symbols that were emitted by incremental compilation. if (self.lazy_syms.getPtr(.none)) |metadata| { // Most lazy symbols can be updated on first use, but // anyerror needs to wait for everything to be flushed. @@ -1046,27 +1054,34 @@ pub fn flushModule(self: *Elf, comp: *Compilation, prog_node: *std.Progress.Node try dw.flushModule(module); } + // If we haven't already, create a linker-generated input file comprising of + // linker-defined synthetic symbols only such as `_DYNAMIC`, etc. if (self.linker_defined_index == null) { const index = @as(File.Index, @intCast(try self.files.addOne(gpa))); self.files.set(index, .{ .linker_defined = .{ .index = index } }); self.linker_defined_index = index; } - - // Symbol resolution happens here try self.addLinkerDefinedSymbols(); + + // Now, we are ready to resolve the symbols across all input files. + // We will first resolve the files in the ZigModule, next in the parsed + // input Object files. + // Any qualifing unresolved symbol will be upgraded to an absolute, weak + // symbol for potential resolution at load-time. self.resolveSymbols(); self.markImportsExports(); self.claimUnresolved(); - // Scan and create missing synthetic entries such as GOT indirection + // Scan and create missing synthetic entries such as GOT indirection. try self.scanRelocs(); - // Allocate atoms parsed from input object files + // Allocate atoms parsed from input object files, followed by allocating + // linker-defined synthetic symbols. try self.allocateObjects(); self.allocateLinkerDefinedSymbols(); // Beyond this point, everything has been allocated a virtual address and we can resolve - // the relocations. + // the relocations, and commit objects to file. if (self.zig_module_index) |index| { for (self.file(index).?.zig_module.atoms.keys()) |atom_index| { const atom_ptr = self.atom(atom_index).?; @@ -1083,9 +1098,12 @@ pub fn flushModule(self: *Elf, comp: *Compilation, prog_node: *std.Progress.Node } try self.writeObjects(); + // Generate and emit the symbol table. try self.updateSymtabSize(); try self.writeSymtab(); + // Dump the state for easy debugging. + // State can be dumped via `--debug-log link_state`. if (build_options.enable_logging) { state_log.debug("{}", .{self.dumpState()}); } @@ -1393,17 +1411,32 @@ fn claimUnresolved(self: *Elf) void { } } +/// In scanRelocs we will go over all live atoms and scan their relocs. +/// This will help us work out what synthetics to emit, GOT indirection, etc. +/// This is also the point where we will report undefined symbols for any +/// alloc sections. fn scanRelocs(self: *Elf) !void { + const gpa = self.base.allocator; + + var undefs = std.AutoHashMap(Symbol.Index, std.ArrayList(Atom.Index)).init(gpa); + defer { + var it = undefs.iterator(); + while (it.next()) |entry| { + entry.value_ptr.deinit(); + } + undefs.deinit(); + } + if (self.zig_module_index) |index| { const zig_module = self.file(index).?.zig_module; - try zig_module.scanRelocs(self); + try zig_module.scanRelocs(self, &undefs); } for (self.objects.items) |index| { const object = self.file(index).?.object; - try object.scanRelocs(self); + try object.scanRelocs(self, &undefs); } - // try self.reportUndefined(); + try self.reportUndefined(&undefs); for (self.symbols.items) |*sym| { if (sym.flags.needs_got) { @@ -1433,8 +1466,10 @@ fn allocateObjects(self: *Elf) !void { for (object.globals()) |global_index| { const global = self.symbol(global_index); + const atom_ptr = global.atom(self) orelse continue; + if (!atom_ptr.alive) continue; if (global.file_index == index) { - global.value = global.atom(self).?.value; + global.value = atom_ptr.value; } } } @@ -2829,21 +2864,23 @@ pub fn updateDeclExports( }; const stt_bits: u8 = @as(u4, @truncate(decl_esym.st_info)); + const name_off = try self.strtab.insert(gpa, exp_name); const sym_index = if (decl_metadata.@"export"(self, exp_name)) |exp_index| exp_index.* else blk: { const sym_index = try zig_module.addGlobalEsym(gpa); - _ = try zig_module.global_symbols.addOne(gpa); + const lookup_gop = try zig_module.globals_lookup.getOrPut(gpa, name_off); + const esym = zig_module.elfSym(sym_index); + esym.st_name = name_off; + lookup_gop.value_ptr.* = sym_index; try decl_metadata.exports.append(gpa, sym_index); + const gop = try self.getOrPutGlobal(name_off); + try zig_module.global_symbols.append(gpa, gop.index); break :blk sym_index; }; - const name_off = try self.strtab.insert(gpa, exp_name); - const esym = &zig_module.global_esyms.items[sym_index]; + const esym = &zig_module.global_esyms.items[sym_index & 0x0fffffff]; esym.st_value = decl_sym.value; esym.st_shndx = decl_sym.atom_index; esym.st_info = (stb_bits << 4) | stt_bits; esym.st_name = name_off; - - const gop = try self.getOrPutGlobal(name_off); - zig_module.global_symbols.items[sym_index] = gop.index; } } @@ -3636,16 +3673,17 @@ pub fn getGlobalSymbol(self: *Elf, name: []const u8, lib_name: ?[]const u8) !u32 _ = lib_name; const gpa = self.base.allocator; const off = try self.strtab.insert(gpa, name); - const gop = try self.getOrPutGlobal(off); const zig_module = self.file(self.zig_module_index.?).?.zig_module; const lookup_gop = try zig_module.globals_lookup.getOrPut(gpa, off); if (!lookup_gop.found_existing) { const esym_index = try zig_module.addGlobalEsym(gpa); - const esym = &zig_module.global_esyms.items[esym_index]; + const esym = zig_module.elfSym(esym_index); esym.st_name = off; lookup_gop.value_ptr.* = esym_index; + const gop = try self.getOrPutGlobal(off); + try zig_module.global_symbols.append(gpa, gop.index); } - return gop.index; + return lookup_gop.value_ptr.*; } const GetOrCreateComdatGroupOwnerResult = struct { @@ -3684,89 +3722,39 @@ pub fn comdatGroupOwner(self: *Elf, index: ComdatGroupOwner.Index) *ComdatGroupO return &self.comdat_groups_owners.items[index]; } -fn reportUndefined(self: *Elf) !void { +fn reportUndefined(self: *Elf, undefs: anytype) !void { const gpa = self.base.allocator; const max_notes = 4; - try self.misc_errors.ensureUnusedCapacity(gpa, self.unresolved.keys().len); + try self.misc_errors.ensureUnusedCapacity(gpa, undefs.count()); - const CollectStruct = struct { - notes: [max_notes]link.File.ErrorMsg = [_]link.File.ErrorMsg{.{ .msg = undefined }} ** max_notes, - notes_len: u3 = 0, - notes_count: usize = 0, - }; - - const collect: []CollectStruct = try gpa.alloc(CollectStruct, self.unresolved.keys().len); - defer gpa.free(collect); - @memset(collect, .{}); - - // Collect all references across all input files - if (self.zig_module_index) |index| { - const zig_module = self.file(index).?.zig_module; - for (zig_module.atoms.keys()) |atom_index| { - const atom_ptr = self.atom(atom_index).?; - if (!atom_ptr.alive) continue; - - for (atom_ptr.relocs(self)) |rel| { - if (self.unresolved.getIndex(rel.r_sym())) |bin_index| { - const note = try std.fmt.allocPrint(gpa, "referenced by {s}:{s}", .{ - zig_module.path, - atom_ptr.name(self), - }); - const bin = &collect[bin_index]; - if (bin.notes_len < max_notes) { - bin.notes[bin.notes_len] = .{ .msg = note }; - bin.notes_len += 1; - } - bin.notes_count += 1; - } - } - } - } - - for (self.objects.items) |index| { - const object = self.file(index).?.object; - for (object.atoms.items) |atom_index| { - const atom_ptr = self.atom(atom_index) orelse continue; - if (!atom_ptr.alive) continue; - - for (atom_ptr.relocs(self)) |rel| { - const sym_index = object.symbols.items[rel.r_sym()]; - if (self.unresolved.getIndex(sym_index)) |bin_index| { - const note = try std.fmt.allocPrint(gpa, "referenced by {}:{s}", .{ - object.fmtPath(), - atom_ptr.name(self), - }); - const bin = &collect[bin_index]; - if (bin.notes_len < max_notes) { - bin.notes[bin.notes_len] = .{ .msg = note }; - bin.notes_len += 1; - } - bin.notes_count += 1; - } - } - } - } - - // Generate error notes - for (self.unresolved.keys(), 0..) |sym_index, bin_index| { - const collected = &collect[bin_index]; + var it = undefs.iterator(); + while (it.next()) |entry| { + const undef_index = entry.key_ptr.*; + const atoms = entry.value_ptr.*.items; + const nnotes = @min(atoms.len, max_notes); var notes = try std.ArrayList(link.File.ErrorMsg).initCapacity(gpa, max_notes + 1); defer notes.deinit(); - for (collected.notes[0..collected.notes_len]) |note| { - notes.appendAssumeCapacity(note); + for (atoms[0..nnotes]) |atom_index| { + const atom_ptr = self.atom(atom_index).?; + const file_ptr = self.file(atom_ptr.file_index).?; + const note = try std.fmt.allocPrint(gpa, "referenced by {s}:{s}", .{ + file_ptr.fmtPath(), + atom_ptr.name(self), + }); + notes.appendAssumeCapacity(.{ .msg = note }); } - if (collected.notes_count > max_notes) { - const remaining = collected.notes_count - max_notes; + if (atoms.len > max_notes) { + const remaining = atoms.len - max_notes; const note = try std.fmt.allocPrint(gpa, "referenced {d} more times", .{remaining}); notes.appendAssumeCapacity(.{ .msg = note }); } var err_msg = link.File.ErrorMsg{ - .msg = try std.fmt.allocPrint(gpa, "undefined symbol: {s}", .{self.symbol(sym_index).name(self)}), + .msg = try std.fmt.allocPrint(gpa, "undefined symbol: {s}", .{self.symbol(undef_index).name(self)}), }; err_msg.notes = try notes.toOwnedSlice(); @@ -3931,7 +3919,7 @@ const DeclMetadata = struct { fn @"export"(m: DeclMetadata, elf_file: *Elf, name: []const u8) ?*u32 { const zig_module = elf_file.file(elf_file.zig_module_index.?).?.zig_module; for (m.exports.items) |*exp| { - const exp_name = elf_file.strtab.getAssumeExists(zig_module.global_esyms.items[exp.*].st_name); + const exp_name = elf_file.strtab.getAssumeExists(zig_module.elfSym(exp.*).st_name); if (mem.eql(u8, name, exp_name)) return exp; } return null; diff --git a/src/link/Elf/Atom.zig b/src/link/Elf/Atom.zig index 871570843d..67686fec41 100644 --- a/src/link/Elf/Atom.zig +++ b/src/link/Elf/Atom.zig @@ -312,7 +312,7 @@ pub fn freeRelocs(self: Atom, elf_file: *Elf) void { zig_module.relocs.items[self.relocs_section_index].clearRetainingCapacity(); } -pub fn scanRelocs(self: Atom, elf_file: *Elf) !void { +pub fn scanRelocs(self: Atom, elf_file: *Elf, undefs: anytype) !void { const file_ptr = elf_file.file(self.file_index).?; const rels = self.relocs(elf_file); var i: usize = 0; @@ -322,11 +322,25 @@ pub fn scanRelocs(self: Atom, elf_file: *Elf) !void { if (rel.r_type() == elf.R_X86_64_NONE) continue; const symbol = switch (file_ptr) { - .zig_module => elf_file.symbol(rel.r_sym()), + .zig_module => |x| elf_file.symbol(x.symbol(rel.r_sym())), .object => |x| elf_file.symbol(x.symbols.items[rel.r_sym()]), else => unreachable, }; + // Check for violation of One Definition Rule for COMDATs. + if (symbol.file(elf_file) == null) { + // TODO convert into an error + log.debug("{}: {s}: {s} refers to a discarded COMDAT section", .{ + file_ptr.fmtPath(), + self.name(elf_file), + symbol.name(elf_file), + }); + continue; + } + + // Report an undefined symbol. + try self.reportUndefined(elf_file, symbol, rel, undefs); + // While traversing relocations, mark symbols that require special handling such as // pointer indirection via GOT, or a stub trampoline via PLT. switch (rel.r_type()) { @@ -363,6 +377,28 @@ pub fn scanRelocs(self: Atom, elf_file: *Elf) !void { } } +// This function will report any undefined non-weak symbols that are not imports. +fn reportUndefined(self: Atom, elf_file: *Elf, sym: *const Symbol, rel: elf.Elf64_Rela, undefs: anytype) !void { + const rel_esym = switch (elf_file.file(self.file_index).?) { + .zig_module => |x| x.elfSym(rel.r_sym()).*, + .object => |x| x.symtab[rel.r_sym()], + else => unreachable, + }; + const esym = sym.elfSym(elf_file); + if (rel_esym.st_shndx == elf.SHN_UNDEF and + rel_esym.st_bind() == elf.STB_GLOBAL and + sym.esym_index > 0 and + !sym.flags.import and + esym.st_shndx == elf.SHN_UNDEF) + { + const gop = try undefs.getOrPut(sym.index); + if (!gop.found_existing) { + gop.value_ptr.* = std.ArrayList(Atom.Index).init(elf_file.base.allocator); + } + try gop.value_ptr.append(self.atom_index); + } +} + /// TODO mark relocs dirty pub fn resolveRelocs(self: Atom, elf_file: *Elf, code: []u8) !void { relocs_log.debug("0x{x}: {s}", .{ self.value, self.name(elf_file) }); @@ -376,7 +412,7 @@ pub fn resolveRelocs(self: Atom, elf_file: *Elf, code: []u8) !void { if (r_type == elf.R_X86_64_NONE) continue; const target = switch (file_ptr) { - .zig_module => elf_file.symbol(rel.r_sym()), + .zig_module => |x| elf_file.symbol(x.symbol(rel.r_sym())), .object => |x| elf_file.symbol(x.symbols.items[rel.r_sym()]), else => unreachable, }; @@ -564,3 +600,4 @@ const Allocator = std.mem.Allocator; const Atom = @This(); const Elf = @import("../Elf.zig"); const File = @import("file.zig").File; +const Symbol = @import("Symbol.zig"); diff --git a/src/link/Elf/Object.zig b/src/link/Elf/Object.zig index 14892302a4..f095a5f99e 100644 --- a/src/link/Elf/Object.zig +++ b/src/link/Elf/Object.zig @@ -392,14 +392,14 @@ fn filterRelocs( return .{ .start = f_start, .len = f_len }; } -pub fn scanRelocs(self: *Object, elf_file: *Elf) !void { +pub fn scanRelocs(self: *Object, elf_file: *Elf, undefs: anytype) !void { for (self.atoms.items) |atom_index| { const atom = elf_file.atom(atom_index) orelse continue; if (!atom.alive) continue; const shdr = atom.inputShdr(elf_file); if (shdr.sh_flags & elf.SHF_ALLOC == 0) continue; if (shdr.sh_type == elf.SHT_NOBITS) continue; - try atom.scanRelocs(elf_file); + try atom.scanRelocs(elf_file, undefs); } for (self.cies.items) |cie| { diff --git a/src/link/Elf/Symbol.zig b/src/link/Elf/Symbol.zig index 18c3e9021e..42b9b81ef9 100644 --- a/src/link/Elf/Symbol.zig +++ b/src/link/Elf/Symbol.zig @@ -43,7 +43,7 @@ pub fn isLocal(symbol: Symbol) bool { return !(symbol.flags.import or symbol.flags.@"export"); } -pub inline fn isIFunc(symbol: Symbol, elf_file: *Elf) bool { +pub fn isIFunc(symbol: Symbol, elf_file: *Elf) bool { return symbol.type(elf_file) == elf.STT_GNU_IFUNC; } @@ -69,12 +69,7 @@ pub fn file(symbol: Symbol, elf_file: *Elf) ?File { pub fn elfSym(symbol: Symbol, elf_file: *Elf) elf.Elf64_Sym { const file_ptr = symbol.file(elf_file).?; switch (file_ptr) { - .zig_module => |x| { - const is_global = symbol.esym_index & 0x10000000 != 0; - const esym_index = symbol.esym_index & 0x0fffffff; - if (is_global) return x.global_esyms.items[esym_index]; - return x.local_esyms.items[esym_index]; - }, + .zig_module => |x| return x.elfSym(symbol.esym_index).*, .linker_defined => |x| return x.symtab.items[symbol.esym_index], .object => |x| return x.symtab[symbol.esym_index], } diff --git a/src/link/Elf/ZigModule.zig b/src/link/Elf/ZigModule.zig index 5e655fc274..46a382abf9 100644 --- a/src/link/Elf/ZigModule.zig +++ b/src/link/Elf/ZigModule.zig @@ -1,3 +1,8 @@ +//! ZigModule encapsulates the state of the incrementally compiled Zig module. +//! It stores the associated input local and global symbols, allocated atoms, +//! and any relocations that may have been emitted. +//! Think about this as fake in-memory Object file for the Zig module. + /// Path is owned by Module and lives as long as *Module. path: []const u8, index: File.Index, @@ -41,7 +46,7 @@ pub fn addGlobalEsym(self: *ZigModule, allocator: Allocator) !Symbol.Index { const esym = self.global_esyms.addOneAssumeCapacity(); esym.* = Elf.null_sym; esym.st_info = elf.STB_GLOBAL << 4; - return index; + return index | 0x10000000; } pub fn addAtom(self: *ZigModule, output_section_index: u16, elf_file: *Elf) !Symbol.Index { @@ -135,11 +140,11 @@ pub fn claimUnresolved(self: *ZigModule, elf_file: *Elf) void { } } -pub fn scanRelocs(self: *ZigModule, elf_file: *Elf) !void { +pub fn scanRelocs(self: *ZigModule, elf_file: *Elf, undefs: anytype) !void { for (self.atoms.keys()) |atom_index| { const atom = elf_file.atom(atom_index) orelse continue; if (!atom.alive) continue; - try atom.scanRelocs(elf_file); + try atom.scanRelocs(elf_file, undefs); } } @@ -197,6 +202,20 @@ pub fn writeSymtab(self: *ZigModule, elf_file: *Elf, ctx: anytype) void { } } +pub fn symbol(self: *ZigModule, index: Symbol.Index) Symbol.Index { + const is_global = index & 0x10000000 != 0; + const actual_index = index & 0x0fffffff; + if (is_global) return self.global_symbols.items[actual_index]; + return self.local_symbols.items[actual_index]; +} + +pub fn elfSym(self: *ZigModule, index: Symbol.Index) *elf.Elf64_Sym { + const is_global = index & 0x10000000 != 0; + const actual_index = index & 0x0fffffff; + if (is_global) return &self.global_esyms.items[actual_index]; + return &self.local_esyms.items[actual_index]; +} + pub fn locals(self: *ZigModule) []const Symbol.Index { return self.local_symbols.items; } diff --git a/src/link/Elf/file.zig b/src/link/Elf/file.zig index e75755949d..2b49f43bf1 100644 --- a/src/link/Elf/file.zig +++ b/src/link/Elf/file.zig @@ -23,7 +23,7 @@ pub const File = union(enum) { _ = unused_fmt_string; _ = options; switch (file) { - .zig_module => try writer.writeAll("(zig module)"), + .zig_module => |x| try writer.print("{s}", .{x.path}), .linker_defined => try writer.writeAll("(linker defined)"), .object => |x| try writer.print("{}", .{x.fmtPath()}), // .shared_object => |x| try writer.writeAll(x.path),