From 8bd01eb7a914416a772b365dc75d83890067e26c Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Mon, 12 Feb 2024 23:59:19 +0100 Subject: [PATCH] elf: refactor archive specific object parsing logic --- src/link/Elf.zig | 18 ++++------ src/link/Elf/Object.zig | 65 ++++++++++++++++++---------------- src/link/Elf/relocatable.zig | 67 ++++++++++++++++++++++++++++++------ 3 files changed, 97 insertions(+), 53 deletions(-) diff --git a/src/link/Elf.zig b/src/link/Elf.zig index a21fa8ffa0..9f8c6c1793 100644 --- a/src/link/Elf.zig +++ b/src/link/Elf.zig @@ -1075,6 +1075,10 @@ pub fn flushModule(self: *Elf, arena: Allocator, prog_node: *std.Progress.Node) // --verbose-link if (comp.verbose_link) try self.dumpArgv(comp); + if (self.zigObjectPtr()) |zig_object| try zig_object.flushModule(self); + if (self.base.isStaticLib()) return relocatable.flushStaticLib(self, comp, module_obj_path); + if (self.base.isObject()) return relocatable.flushObject(self, comp, module_obj_path); + const csu = try CsuObjects.init(arena, comp); const compiler_rt_path: ?[]const u8 = blk: { if (comp.compiler_rt_lib) |x| break :blk x.full_object_path; @@ -1082,10 +1086,6 @@ pub fn flushModule(self: *Elf, arena: Allocator, prog_node: *std.Progress.Node) break :blk null; }; - if (self.zigObjectPtr()) |zig_object| try zig_object.flushModule(self); - if (self.base.isStaticLib()) return relocatable.flushStaticLib(self, comp, module_obj_path); - if (self.base.isObject()) return relocatable.flushObject(self, comp, module_obj_path); - // Here we will parse input positional and library files (if referenced). // This will roughly match in any linker backend we support. var positionals = std.ArrayList(Compilation.LinkObject).init(arena); @@ -1249,13 +1249,6 @@ pub fn flushModule(self: *Elf, arena: Allocator, prog_node: *std.Progress.Node) if (comp.link_errors.items.len > 0) return error.FlushFailure; - // Init all objects - for (self.objects.items) |index| { - try self.file(index).?.object.init(self); - } - - if (comp.link_errors.items.len > 0) return error.FlushFailure; - // Dedup shared objects { var seen_dsos = std.StringHashMap(void).init(gpa); @@ -1651,7 +1644,7 @@ fn dumpArgv(self: *Elf, comp: *Compilation) !void { Compilation.dump_argv(argv.items); } -const ParseError = error{ +pub const ParseError = error{ MalformedObject, MalformedArchive, InvalidCpuArch, @@ -1662,6 +1655,7 @@ const ParseError = error{ FileSystem, NotSupported, InvalidCharacter, + UnknownFileType, } || LdScript.Error || std.os.AccessError || std.os.SeekError || std.fs.File.OpenError || std.fs.File.ReadError; pub fn parsePositional(self: *Elf, path: []const u8, must_link: bool) ParseError!void { diff --git a/src/link/Elf/Object.zig b/src/link/Elf/Object.zig index 3a9d32f51c..92ac64169a 100644 --- a/src/link/Elf/Object.zig +++ b/src/link/Elf/Object.zig @@ -55,12 +55,26 @@ pub fn deinit(self: *Object, allocator: Allocator) void { pub fn parse(self: *Object, elf_file: *Elf) !void { const gpa = elf_file.base.comp.gpa; - const offset = if (self.archive) |ar| ar.offset else 0; const handle = elf_file.fileHandle(self.file_handle); + + try self.parseCommon(gpa, handle, elf_file); + try self.initAtoms(gpa, handle, elf_file); + try self.initSymtab(gpa, elf_file); + + for (self.shdrs.items, 0..) |shdr, i| { + const atom = elf_file.atom(self.atoms.items[i]) orelse continue; + if (!atom.flags.alive) continue; + if (shdr.sh_type == elf.SHT_X86_64_UNWIND or mem.eql(u8, atom.name(elf_file), ".eh_frame")) + try self.parseEhFrame(gpa, handle, @as(u32, @intCast(i)), elf_file); + } +} + +fn parseCommon(self: *Object, allocator: Allocator, handle: std.fs.File, elf_file: *Elf) !void { + const offset = if (self.archive) |ar| ar.offset else 0; const file_size = (try handle.stat()).size; - const header_buffer = try Elf.preadAllAlloc(gpa, handle, offset, @sizeOf(elf.Elf64_Ehdr)); - defer gpa.free(header_buffer); + const header_buffer = try Elf.preadAllAlloc(allocator, handle, offset, @sizeOf(elf.Elf64_Ehdr)); + defer allocator.free(header_buffer); self.header = @as(*align(1) const elf.Elf64_Ehdr, @ptrCast(header_buffer)).*; const target = elf_file.base.comp.root_mod.resolved_target.result; @@ -87,10 +101,10 @@ pub fn parse(self: *Object, elf_file: *Elf) !void { return error.MalformedObject; } - const shdrs_buffer = try Elf.preadAllAlloc(gpa, handle, offset + shoff, shsize); - defer gpa.free(shdrs_buffer); + const shdrs_buffer = try Elf.preadAllAlloc(allocator, handle, offset + shoff, shsize); + defer allocator.free(shdrs_buffer); const shdrs = @as([*]align(1) const elf.Elf64_Shdr, @ptrCast(shdrs_buffer.ptr))[0..shnum]; - try self.shdrs.appendUnalignedSlice(gpa, shdrs); + try self.shdrs.appendUnalignedSlice(allocator, shdrs); for (self.shdrs.items) |shdr| { if (shdr.sh_type != elf.SHT_NOBITS) { @@ -101,15 +115,15 @@ pub fn parse(self: *Object, elf_file: *Elf) !void { } } - const shstrtab = try self.preadShdrContentsAlloc(gpa, handle, self.header.?.e_shstrndx); - defer gpa.free(shstrtab); + const shstrtab = try self.preadShdrContentsAlloc(allocator, handle, self.header.?.e_shstrndx); + defer allocator.free(shstrtab); for (self.shdrs.items) |shdr| { if (shdr.sh_name >= shstrtab.len) { try elf_file.reportParseError2(self.index, "corrupt section name offset", .{}); return error.MalformedObject; } } - try self.strtab.appendSlice(gpa, shstrtab); + try self.strtab.appendSlice(allocator, shstrtab); const symtab_index = for (self.shdrs.items, 0..) |shdr, i| switch (shdr.sh_type) { elf.SHT_SYMTAB => break @as(u16, @intCast(i)), @@ -120,8 +134,8 @@ pub fn parse(self: *Object, elf_file: *Elf) !void { const shdr = self.shdrs.items[index]; self.first_global = shdr.sh_info; - const raw_symtab = try self.preadShdrContentsAlloc(gpa, handle, index); - defer gpa.free(raw_symtab); + const raw_symtab = try self.preadShdrContentsAlloc(allocator, handle, index); + defer allocator.free(raw_symtab); const nsyms = math.divExact(usize, raw_symtab.len, @sizeOf(elf.Elf64_Sym)) catch { try elf_file.reportParseError2(self.index, "symbol table not evenly divisible", .{}); return error.MalformedObject; @@ -129,11 +143,11 @@ pub fn parse(self: *Object, elf_file: *Elf) !void { const symtab = @as([*]align(1) const elf.Elf64_Sym, @ptrCast(raw_symtab.ptr))[0..nsyms]; const strtab_bias = @as(u32, @intCast(self.strtab.items.len)); - const strtab = try self.preadShdrContentsAlloc(gpa, handle, shdr.sh_link); - defer gpa.free(strtab); - try self.strtab.appendSlice(gpa, strtab); + const strtab = try self.preadShdrContentsAlloc(allocator, handle, shdr.sh_link); + defer allocator.free(strtab); + try self.strtab.appendSlice(allocator, strtab); - try self.symtab.ensureUnusedCapacity(gpa, symtab.len); + try self.symtab.ensureUnusedCapacity(allocator, symtab.len); for (symtab) |sym| { const out_sym = self.symtab.addOneAssumeCapacity(); out_sym.* = sym; @@ -145,21 +159,6 @@ pub fn parse(self: *Object, elf_file: *Elf) !void { } } -pub fn init(self: *Object, elf_file: *Elf) !void { - const gpa = elf_file.base.comp.gpa; - const handle = elf_file.fileHandle(self.file_handle); - - try self.initAtoms(gpa, handle, elf_file); - try self.initSymtab(gpa, elf_file); - - for (self.shdrs.items, 0..) |shdr, i| { - const atom = elf_file.atom(self.atoms.items[i]) orelse continue; - if (!atom.flags.alive) continue; - if (shdr.sh_type == elf.SHT_X86_64_UNWIND or mem.eql(u8, atom.name(elf_file), ".eh_frame")) - try self.parseEhFrame(gpa, handle, @as(u32, @intCast(i)), elf_file); - } -} - fn initAtoms(self: *Object, allocator: Allocator, handle: std.fs.File, elf_file: *Elf) !void { const shdrs = self.shdrs.items; try self.atoms.resize(allocator, shdrs.len); @@ -782,6 +781,12 @@ pub fn addAtomsToRelaSections(self: Object, elf_file: *Elf) !void { } } +pub fn parseAr(self: *Object, elf_file: *Elf) !void { + const gpa = elf_file.base.comp.gpa; + const handle = elf_file.fileHandle(self.file_handle); + try self.parseCommon(gpa, handle, elf_file); +} + pub fn updateArSymtab(self: Object, ar_symtab: *Archive.ArSymtab, elf_file: *Elf) !void { const comp = elf_file.base.comp; const gpa = comp.gpa; diff --git a/src/link/Elf/relocatable.zig b/src/link/Elf/relocatable.zig index f3985582e8..9492c30ab8 100644 --- a/src/link/Elf/relocatable.zig +++ b/src/link/Elf/relocatable.zig @@ -7,18 +7,20 @@ pub fn flushStaticLib(elf_file: *Elf, comp: *Compilation, module_obj_path: ?[]co try positionals.ensureUnusedCapacity(comp.objects.len); positionals.appendSliceAssumeCapacity(comp.objects); - // This is a set of object files emitted by clang in a single `build-exe` invocation. - // For instance, the implicit `a.o` as compiled by `zig build-exe a.c` will end up - // in this set. for (comp.c_object_table.keys()) |key| { try positionals.append(.{ .path = key.status.success.object_path }); } if (module_obj_path) |path| try positionals.append(.{ .path = path }); + if (comp.include_compiler_rt) { + try positionals.append(.{ .path = comp.compiler_rt_obj.?.full_object_path }); + } + for (positionals.items) |obj| { - elf_file.parsePositional(obj.path, obj.must_link) catch |err| switch (err) { + parsePositional(elf_file, obj.path) catch |err| switch (err) { error.MalformedObject, error.MalformedArchive, error.InvalidCpuArch => continue, // already reported + error.UnknownFileType => try elf_file.reportParseError(obj.path, "unknown file type for an object file", .{}), else => |e| try elf_file.reportParseError( obj.path, "unexpected error: parsing input file failed with error {s}", @@ -172,13 +174,6 @@ pub fn flushObject(elf_file: *Elf, comp: *Compilation, module_obj_path: ?[]const if (comp.link_errors.items.len > 0) return error.FlushFailure; - // Init all objects - for (elf_file.objects.items) |index| { - try elf_file.file(index).?.object.init(elf_file); - } - - if (comp.link_errors.items.len > 0) return error.FlushFailure; - // Now, we are ready to resolve the symbols across all input files. // We will first resolve the files in the ZigObject, next in the parsed // input Object files. @@ -214,6 +209,55 @@ pub fn flushObject(elf_file: *Elf, comp: *Compilation, module_obj_path: ?[]const if (comp.link_errors.items.len > 0) return error.FlushFailure; } +fn parsePositional(elf_file: *Elf, path: []const u8) Elf.ParseError!void { + if (try Object.isObject(path)) { + try parseObject(elf_file, path); + } else if (try Archive.isArchive(path)) { + try parseArchive(elf_file, path); + } else return error.UnknownFileType; + // TODO: should we check for LD script? + // Actually, should we even unpack an archive? +} + +fn parseObject(elf_file: *Elf, path: []const u8) Elf.ParseError!void { + const gpa = elf_file.base.comp.gpa; + const handle = try std.fs.cwd().openFile(path, .{}); + const fh = try elf_file.addFileHandle(handle); + + const index = @as(File.Index, @intCast(try elf_file.files.addOne(gpa))); + elf_file.files.set(index, .{ .object = .{ + .path = try gpa.dupe(u8, path), + .file_handle = fh, + .index = index, + } }); + try elf_file.objects.append(gpa, index); + + const object = elf_file.file(index).?.object; + try object.parseAr(elf_file); +} + +fn parseArchive(elf_file: *Elf, path: []const u8) Elf.ParseError!void { + const gpa = elf_file.base.comp.gpa; + const handle = try std.fs.cwd().openFile(path, .{}); + const fh = try elf_file.addFileHandle(handle); + + var archive = Archive{}; + defer archive.deinit(gpa); + try archive.parse(elf_file, path, fh); + + const objects = try archive.objects.toOwnedSlice(gpa); + defer gpa.free(objects); + + for (objects) |extracted| { + const index = @as(File.Index, @intCast(try elf_file.files.addOne(gpa))); + elf_file.files.set(index, .{ .object = extracted }); + const object = &elf_file.files.items(.data)[index].object; + object.index = index; + try object.parseAr(elf_file); + try elf_file.objects.append(gpa, index); + } +} + fn claimUnresolved(elf_file: *Elf) void { if (elf_file.zigObjectPtr()) |zig_object| { zig_object.claimUnresolvedObject(elf_file); @@ -518,3 +562,4 @@ const Archive = @import("Archive.zig"); const Compilation = @import("../../Compilation.zig"); const Elf = @import("../Elf.zig"); const File = @import("file.zig").File; +const Object = @import("Object.zig");