From c575e3daa4cdb39e38cc0b32fc8fe4a917947d34 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 25 Jul 2024 16:47:43 +0200 Subject: [PATCH] elf: resolve COMDATs in more parallel-friendly way --- src/link/Elf.zig | 123 +++++++--------------------- src/link/Elf/Atom.zig | 5 +- src/link/Elf/Object.zig | 71 +++++++++++++--- src/link/Elf/file.zig | 7 ++ src/link/Elf/relocatable.zig | 7 +- src/link/Elf/synthetic_sections.zig | 10 +-- test/link/elf.zig | 44 ++++++++-- 7 files changed, 140 insertions(+), 127 deletions(-) diff --git a/src/link/Elf.zig b/src/link/Elf.zig index d8d62eba10..b3cc9bc601 100644 --- a/src/link/Elf.zig +++ b/src/link/Elf.zig @@ -215,11 +215,8 @@ merge_subsections: std.ArrayListUnmanaged(MergeSubsection) = .{}, /// Table of last atom index in a section and matching atom free list if any. last_atom_and_free_list_table: LastAtomAndFreeListTable = .{}, -comdat_groups_owners: std.ArrayListUnmanaged(ComdatGroupOwner) = .{}, -comdat_groups_table: std.ArrayHashMapUnmanaged(ComdatGroupKey, ComdatGroupOwner.Index, ComdatGroupContext, false) = .{}, - /// Global string table used to provide quick access to global symbol resolvers -/// such as `resolver` and `comdat_groups_table`. +/// such as `resolver`. strings: StringTable = .{}, first_eflags: ?elf.Elf64_Word = null, @@ -506,8 +503,6 @@ pub fn deinit(self: *Elf) void { } self.last_atom_and_free_list_table.deinit(gpa); - self.comdat_groups_owners.deinit(gpa); - self.comdat_groups_table.deinit(gpa); self.strings.deinit(gpa); self.got.deinit(gpa); @@ -1305,7 +1300,7 @@ pub fn flushModule(self: *Elf, arena: Allocator, tid: Zcu.PerThread.Id, prog_nod // input Object files. // Any qualifing unresolved symbol will be upgraded to an absolute, weak // symbol for potential resolution at load-time. - self.resolveSymbols(); + try self.resolveSymbols(); self.markEhFrameAtomsDead(); try self.resolveMergeSections(); @@ -1955,7 +1950,7 @@ fn accessLibPath( /// 4. Reset state of all resolved globals since we will redo this bit on the pruned set. /// 5. Remove references to dead objects/shared objects /// 6. Re-run symbol resolution on pruned objects and shared objects sets. -pub fn resolveSymbols(self: *Elf) void { +pub fn resolveSymbols(self: *Elf) !void { // Resolve symbols in the ZigObject. For now, we assume that it's always live. if (self.zigObjectPtr()) |zig_object| zig_object.asFile().resolveSymbols(self); // Resolve symbols on the set of all objects and shared objects (even if some are unneeded). @@ -1986,32 +1981,17 @@ pub fn resolveSymbols(self: *Elf) void { } else i += 1; } - // Dedup comdat groups. - for (self.objects.items) |index| { - const object = self.file(index).?.object; - for (object.comdat_groups.items) |cg| { - const cg_owner = self.comdatGroupOwner(cg.owner); - const owner_file_index = if (self.file(cg_owner.file)) |file_ptr| - file_ptr.object.index - else - std.math.maxInt(File.Index); - cg_owner.file = @min(owner_file_index, index); - } - } + { + // Dedup comdat groups. + var table = std.StringHashMap(Ref).init(self.base.comp.gpa); + defer table.deinit(); - for (self.objects.items) |index| { - const object = self.file(index).?.object; - for (object.comdat_groups.items) |cg| { - const cg_owner = self.comdatGroupOwner(cg.owner); - if (cg_owner.file != index) { - for (cg.comdatGroupMembers(self)) |shndx| { - const atom_index = object.atoms_indexes.items[shndx]; - if (object.atom(atom_index)) |atom_ptr| { - atom_ptr.flags.alive = false; - atom_ptr.markFdesDead(self); - } - } - } + for (self.objects.items) |index| { + try self.file(index).?.object.resolveComdatGroups(self, &table); + } + + for (self.objects.items) |index| { + self.file(index).?.object.markComdatGroupsDead(self); } } @@ -5632,6 +5612,10 @@ pub fn atom(self: *Elf, ref: Ref) ?*Atom { return file_ptr.atom(ref.index); } +pub fn comdatGroup(self: *Elf, ref: Ref) *ComdatGroup { + return self.file(ref.file).?.comdatGroup(ref.index); +} + /// Returns pointer-to-symbol described at sym_index. pub fn symbol(self: *Elf, sym_index: Symbol.Index) *Symbol { return &self.symbols.items[sym_index]; @@ -5737,31 +5721,6 @@ pub fn zigObjectPtr(self: *Elf) ?*ZigObject { return self.file(index).?.zig_object; } -const GetOrCreateComdatGroupOwnerResult = struct { - found_existing: bool, - index: ComdatGroupOwner.Index, -}; - -pub fn getOrCreateComdatGroupOwner(self: *Elf, key: ComdatGroupKey) !GetOrCreateComdatGroupOwnerResult { - const gpa = self.base.comp.gpa; - const gop = try self.comdat_groups_table.getOrPutContext(gpa, key, .{ .elf_file = self }); - if (!gop.found_existing) { - const index: ComdatGroupOwner.Index = @intCast(self.comdat_groups_owners.items.len); - const owner = try self.comdat_groups_owners.addOne(gpa); - owner.* = .{}; - gop.value_ptr.* = index; - } - return .{ - .found_existing = gop.found_existing, - .index = gop.value_ptr.*, - }; -} - -pub fn comdatGroupOwner(self: *Elf, index: ComdatGroupOwner.Index) *ComdatGroupOwner { - assert(index < self.comdat_groups_owners.items.len); - return &self.comdat_groups_owners.items[index]; -} - pub fn addMergeSubsection(self: *Elf) !MergeSubsection.Index { const index: MergeSubsection.Index = @intCast(self.merge_subsections.items.len); const msec = try self.merge_subsections.addOne(self.base.comp.gpa); @@ -6243,48 +6202,24 @@ const default_entry_addr = 0x8000000; pub const base_tag: link.File.Tag = .elf; -const ComdatGroupKey = struct { - /// String table offset. - off: u32, - - /// File index. - file_index: File.Index, - - pub fn get(key: ComdatGroupKey, elf_file: *Elf) [:0]const u8 { - const file_ptr = elf_file.file(key.file_index).?; - return file_ptr.getString(key.off); - } -}; - -const ComdatGroupContext = struct { - elf_file: *Elf, - - pub fn eql(ctx: ComdatGroupContext, a: ComdatGroupKey, b: ComdatGroupKey, b_index: usize) bool { - _ = b_index; - const elf_file = ctx.elf_file; - return mem.eql(u8, a.get(elf_file), b.get(elf_file)); - } - - pub fn hash(ctx: ComdatGroupContext, a: ComdatGroupKey) u32 { - return std.array_hash_map.hashString(a.get(ctx.elf_file)); - } -}; - -const ComdatGroupOwner = struct { - file: File.Index = 0, - - const Index = u32; -}; - pub const ComdatGroup = struct { - owner: ComdatGroupOwner.Index, - file: File.Index, + signature_off: u32, + file_index: File.Index, shndx: u32, members_start: u32, members_len: u32, + alive: bool = true, + + pub fn file(cg: ComdatGroup, elf_file: *Elf) File { + return elf_file.file(cg.file_index).?; + } + + pub fn signature(cg: ComdatGroup, elf_file: *Elf) [:0]const u8 { + return cg.file(elf_file).object.getString(cg.signature_off); + } pub fn comdatGroupMembers(cg: ComdatGroup, elf_file: *Elf) []const u32 { - const object = elf_file.file(cg.file).?.object; + const object = cg.file(elf_file).object; return object.comdat_group_data.items[cg.members_start..][0..cg.members_len]; } diff --git a/src/link/Elf/Atom.zig b/src/link/Elf/Atom.zig index 743603fd47..e08bada260 100644 --- a/src/link/Elf/Atom.zig +++ b/src/link/Elf/Atom.zig @@ -346,7 +346,10 @@ pub fn writeRelocs(self: Atom, elf_file: *Elf, out_relocs: *std.ArrayList(elf.El r_sym = elf_file.sectionSymbolOutputSymtabIndex(msub.mergeSection(elf_file).output_section_index); } else { r_addend += @intCast(target.address(.{}, elf_file)); - r_sym = elf_file.sectionSymbolOutputSymtabIndex(target.outputShndx().?); + r_sym = if (target.outputShndx()) |osec| + elf_file.sectionSymbolOutputSymtabIndex(osec) + else + 0; }, else => { r_sym = target.outputSymtabIndex(elf_file) orelse 0; diff --git a/src/link/Elf/Object.zig b/src/link/Elf/Object.zig index 649c6e0354..1d5a575457 100644 --- a/src/link/Elf/Object.zig +++ b/src/link/Elf/Object.zig @@ -216,27 +216,41 @@ fn initAtoms(self: *Object, allocator: Allocator, handle: std.fs.File, elf_file: const shndx = @as(u32, @intCast(i)); const group_raw_data = try self.preadShdrContentsAlloc(allocator, handle, shndx); defer allocator.free(group_raw_data); - const group_nmembers = @divExact(group_raw_data.len, @sizeOf(u32)); + const group_nmembers = math.divExact(usize, group_raw_data.len, @sizeOf(u32)) catch { + try elf_file.reportParseError2( + self.index, + "corrupt section group: not evenly divisible ", + .{}, + ); + return error.MalformedObject; + }; + if (group_nmembers == 0) { + try elf_file.reportParseError2( + self.index, + "corrupt section group: empty section", + .{}, + ); + return error.MalformedObject; + } const group_members = @as([*]align(1) const u32, @ptrCast(group_raw_data.ptr))[0..group_nmembers]; if (group_members[0] != elf.GRP_COMDAT) { - // TODO convert into an error - log.debug("{}: unknown SHT_GROUP format", .{self.fmtPath()}); - continue; + try elf_file.reportParseError2( + self.index, + "corrupt section group: unknown SHT_GROUP format", + .{}, + ); + return error.MalformedObject; } const group_start = @as(u32, @intCast(self.comdat_group_data.items.len)); try self.comdat_group_data.appendUnalignedSlice(allocator, group_members[1..]); - const gop = try elf_file.getOrCreateComdatGroupOwner(.{ - .off = group_signature, - .file_index = self.index, - }); const comdat_group_index = try self.addComdatGroup(allocator); const comdat_group = self.comdatGroup(comdat_group_index); comdat_group.* = .{ - .owner = gop.index, - .file = self.index, + .signature_off = group_signature, + .file_index = self.index, .shndx = shndx, .members_start = group_start, .members_len = @intCast(group_nmembers - 1), @@ -912,6 +926,37 @@ pub fn convertCommonSymbols(self: *Object, elf_file: *Elf) !void { } } +pub fn resolveComdatGroups(self: *Object, elf_file: *Elf, table: anytype) !void { + for (self.comdat_groups.items, 0..) |*cg, cgi| { + const signature = cg.signature(elf_file); + const gop = try table.getOrPut(signature); + if (!gop.found_existing) { + gop.value_ptr.* = .{ .index = @intCast(cgi), .file = self.index }; + continue; + } + const current = elf_file.comdatGroup(gop.value_ptr.*); + cg.alive = false; + if (self.index < current.file_index) { + current.alive = false; + cg.alive = true; + gop.value_ptr.* = .{ .index = @intCast(cgi), .file = self.index }; + } + } +} + +pub fn markComdatGroupsDead(self: *Object, elf_file: *Elf) void { + for (self.comdat_groups.items) |cg| { + if (cg.alive) continue; + for (cg.comdatGroupMembers(elf_file)) |shndx| { + const atom_index = self.atoms_indexes.items[shndx]; + if (self.atom(atom_index)) |atom_ptr| { + atom_ptr.flags.alive = false; + atom_ptr.markFdesDead(elf_file); + } + } + } +} + pub fn initOutputSections(self: *Object, elf_file: *Elf) !void { for (self.atoms_indexes.items) |atom_index| { const atom_ptr = self.atom(atom_index) orelse continue; @@ -1428,9 +1473,9 @@ fn formatComdatGroups( const elf_file = ctx.elf_file; try writer.writeAll(" COMDAT groups\n"); for (object.comdat_groups.items, 0..) |cg, cg_index| { - const cg_owner = elf_file.comdatGroupOwner(cg.owner); - if (cg_owner.file != object.index) continue; - try writer.print(" COMDAT({d})\n", .{cg_index}); + try writer.print(" COMDAT({d})", .{cg_index}); + if (!cg.alive) try writer.writeAll(" : [*]"); + try writer.writeByte('\n'); const cg_members = cg.comdatGroupMembers(elf_file); for (cg_members) |shndx| { const atom_index = object.atoms_indexes.items[shndx]; diff --git a/src/link/Elf/file.zig b/src/link/Elf/file.zig index df2c59a0f2..9d2560e11b 100644 --- a/src/link/Elf/file.zig +++ b/src/link/Elf/file.zig @@ -137,6 +137,13 @@ pub const File = union(enum) { }; } + pub fn comdatGroup(file: File, ind: Elf.ComdatGroup.Index) *Elf.ComdatGroup { + return switch (file) { + .linker_defined, .shared_object, .zig_object => unreachable, + .object => |x| x.comdatGroup(ind), + }; + } + pub fn symbol(file: File, ind: Symbol.Index) Symbol.Index { return switch (file) { .zig_object => |x| x.symbol(ind), diff --git a/src/link/Elf/relocatable.zig b/src/link/Elf/relocatable.zig index 1776faa9ca..b3c7146169 100644 --- a/src/link/Elf/relocatable.zig +++ b/src/link/Elf/relocatable.zig @@ -190,7 +190,7 @@ pub fn flushObject(elf_file: *Elf, comp: *Compilation, module_obj_path: ?[]const // Now, we are ready to resolve the symbols across all input files. // We will first resolve the files in the ZigObject, next in the parsed // input Object files. - elf_file.resolveSymbols(); + try elf_file.resolveSymbols(); elf_file.markEhFrameAtomsDead(); try elf_file.resolveMergeSections(); try elf_file.addCommentString(); @@ -318,11 +318,8 @@ fn initComdatGroups(elf_file: *Elf) !void { for (elf_file.objects.items) |index| { const object = elf_file.file(index).?.object; - for (object.comdat_groups.items, 0..) |cg, cg_index| { - const cg_owner = elf_file.comdatGroupOwner(cg.owner); - if (cg_owner.file != index) continue; - + if (!cg.alive) continue; const cg_sec = try elf_file.comdat_group_sections.addOne(gpa); cg_sec.* = .{ .shndx = try elf_file.addSection(.{ diff --git a/src/link/Elf/synthetic_sections.zig b/src/link/Elf/synthetic_sections.zig index 25b6833469..78bdd08fed 100644 --- a/src/link/Elf/synthetic_sections.zig +++ b/src/link/Elf/synthetic_sections.zig @@ -1672,12 +1672,6 @@ pub const ComdatGroupSection = struct { shndx: u32, cg_ref: Elf.Ref, - fn ownerFile(cgs: ComdatGroupSection, elf_file: *Elf) ?File { - const cg = cgs.comdatGroup(elf_file); - const cg_owner = elf_file.comdatGroupOwner(cg.owner); - return elf_file.file(cg_owner.file); - } - fn comdatGroup(cgs: ComdatGroupSection, elf_file: *Elf) *Elf.ComdatGroup { const cg_file = elf_file.file(cgs.cg_ref.file).?; return cg_file.object.comdatGroup(cgs.cg_ref.index); @@ -1685,7 +1679,7 @@ pub const ComdatGroupSection = struct { pub fn symbol(cgs: ComdatGroupSection, elf_file: *Elf) Symbol.Index { const cg = cgs.comdatGroup(elf_file); - const object = cgs.ownerFile(elf_file).?.object; + const object = cg.file(elf_file).object; const shdr = object.shdrs.items[cg.shndx]; return object.symbols.items[shdr.sh_info]; } @@ -1698,7 +1692,7 @@ pub const ComdatGroupSection = struct { pub fn write(cgs: ComdatGroupSection, elf_file: *Elf, writer: anytype) !void { const cg = cgs.comdatGroup(elf_file); - const object = cgs.ownerFile(elf_file).?.object; + const object = cg.file(elf_file).object; const members = cg.comdatGroupMembers(elf_file); try writer.writeInt(u32, elf.GRP_COMDAT, .little); for (members) |shndx| { diff --git a/test/link/elf.zig b/test/link/elf.zig index 49291be022..2dc36c6dc2 100644 --- a/test/link/elf.zig +++ b/test/link/elf.zig @@ -404,9 +404,7 @@ fn testComdatElimination(b: *Build, opts: Options) *Step { main_o.linkLibCpp(); { - const exe = addExecutable(b, opts, .{ - .name = "main1", - }); + const exe = addExecutable(b, opts, .{ .name = "main1" }); exe.addObject(a_o); exe.addObject(main_o); exe.linkLibCpp(); @@ -431,9 +429,7 @@ fn testComdatElimination(b: *Build, opts: Options) *Step { } { - const exe = addExecutable(b, opts, .{ - .name = "main2", - }); + const exe = addExecutable(b, opts, .{ .name = "main2" }); exe.addObject(main_o); exe.addObject(a_o); exe.linkLibCpp(); @@ -457,6 +453,42 @@ fn testComdatElimination(b: *Build, opts: Options) *Step { test_step.dependOn(&check.step); } + { + const c_o = addObject(b, opts, .{ .name = "c" }); + c_o.addObject(main_o); + c_o.addObject(a_o); + + const exe = addExecutable(b, opts, .{ .name = "main3" }); + exe.addObject(c_o); + exe.linkLibCpp(); + + const run = addRunArtifact(exe); + run.expectStdOutEqual( + \\calling foo in main + \\calling foo in main + \\ + ); + test_step.dependOn(&run.step); + } + + { + const d_o = addObject(b, opts, .{ .name = "d" }); + d_o.addObject(a_o); + d_o.addObject(main_o); + + const exe = addExecutable(b, opts, .{ .name = "main4" }); + exe.addObject(d_o); + exe.linkLibCpp(); + + const run = addRunArtifact(exe); + run.expectStdOutEqual( + \\calling foo in a + \\calling foo in a + \\ + ); + test_step.dependOn(&run.step); + } + return test_step; }