From 8087ec8e8c9e3abf8cf2f3952127aa97749610a5 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Tue, 24 Oct 2023 12:12:46 +0200 Subject: [PATCH] elf: improve parsing of ld scripts and actually test linking against them --- src/link/Elf.zig | 89 +++++++++++----------------- src/link/Elf/Archive.zig | 5 +- src/link/Elf/LdScript.zig | 108 +--------------------------------- src/link/Elf/Object.zig | 5 +- src/link/Elf/SharedObject.zig | 5 +- test/link/elf.zig | 33 +++++++++++ 6 files changed, 78 insertions(+), 167 deletions(-) diff --git a/src/link/Elf.zig b/src/link/Elf.zig index b8b233e4b0..421d6f9573 100644 --- a/src/link/Elf.zig +++ b/src/link/Elf.zig @@ -1353,10 +1353,8 @@ pub fn flushModule(self: *Elf, comp: *Compilation, prog_node: *std.Progress.Node } for (positionals.items) |obj| { - const in_file = try std.fs.cwd().openFile(obj.path, .{}); - defer in_file.close(); var parse_ctx: ParseErrorCtx = .{ .detected_cpu_arch = undefined }; - self.parsePositional(in_file, obj.path, obj.must_link, &parse_ctx) catch |err| + self.parsePositional(obj.path, obj.must_link, &parse_ctx) catch |err| try self.handleAndReportParseError(obj.path, err, &parse_ctx); } @@ -1437,9 +1435,7 @@ pub fn flushModule(self: *Elf, comp: *Compilation, prog_node: *std.Progress.Node for (system_libs.items) |lib| { var parse_ctx: ParseErrorCtx = .{ .detected_cpu_arch = undefined }; - const in_file = try std.fs.cwd().openFile(lib.path, .{}); - defer in_file.close(); - self.parseLibrary(in_file, lib, false, &parse_ctx) catch |err| + self.parseLibrary(lib, false, &parse_ctx) catch |err| try self.handleAndReportParseError(lib.path, err, &parse_ctx); } @@ -1456,10 +1452,8 @@ pub fn flushModule(self: *Elf, comp: *Compilation, prog_node: *std.Progress.Node if (csu.crtn) |v| try positionals.append(.{ .path = v }); for (positionals.items) |obj| { - const in_file = try std.fs.cwd().openFile(obj.path, .{}); - defer in_file.close(); var parse_ctx: ParseErrorCtx = .{ .detected_cpu_arch = undefined }; - self.parsePositional(in_file, obj.path, obj.must_link, &parse_ctx) catch |err| + self.parsePositional(obj.path, obj.must_link, &parse_ctx) catch |err| try self.handleAndReportParseError(obj.path, err, &parse_ctx); } @@ -1679,51 +1673,40 @@ const ParseError = error{ InvalidCharacter, } || LdScript.Error || std.os.AccessError || std.os.SeekError || std.fs.File.OpenError || std.fs.File.ReadError; -fn parsePositional( - self: *Elf, - in_file: std.fs.File, - path: []const u8, - must_link: bool, - ctx: *ParseErrorCtx, -) ParseError!void { +fn parsePositional(self: *Elf, path: []const u8, must_link: bool, ctx: *ParseErrorCtx) ParseError!void { const tracy = trace(@src()); defer tracy.end(); - - if (Object.isObject(in_file)) { - try self.parseObject(in_file, path, ctx); + if (try Object.isObject(path)) { + try self.parseObject(path, ctx); } else { - try self.parseLibrary(in_file, .{ .path = path }, must_link, ctx); + try self.parseLibrary(.{ .path = path }, must_link, ctx); } } -fn parseLibrary( - self: *Elf, - in_file: std.fs.File, - lib: SystemLib, - must_link: bool, - ctx: *ParseErrorCtx, -) ParseError!void { +fn parseLibrary(self: *Elf, lib: SystemLib, must_link: bool, ctx: *ParseErrorCtx) ParseError!void { const tracy = trace(@src()); defer tracy.end(); - if (Archive.isArchive(in_file)) { - try self.parseArchive(in_file, lib.path, must_link, ctx); - } else if (SharedObject.isSharedObject(in_file)) { - try self.parseSharedObject(in_file, lib, ctx); + if (try Archive.isArchive(lib.path)) { + try self.parseArchive(lib.path, must_link, ctx); + } else if (try SharedObject.isSharedObject(lib.path)) { + try self.parseSharedObject(lib, ctx); } else { // TODO if the script has a top-level comment identifying it as GNU ld script, // then report parse errors. Otherwise return UnknownFileType. - self.parseLdScript(in_file, lib, ctx) catch |err| switch (err) { + self.parseLdScript(lib, ctx) catch |err| switch (err) { else => return error.UnknownFileType, }; } } -fn parseObject(self: *Elf, in_file: std.fs.File, path: []const u8, ctx: *ParseErrorCtx) ParseError!void { +fn parseObject(self: *Elf, path: []const u8, ctx: *ParseErrorCtx) ParseError!void { const tracy = trace(@src()); defer tracy.end(); const gpa = self.base.allocator; + const in_file = try std.fs.cwd().openFile(path, .{}); + defer in_file.close(); const data = try in_file.readToEndAlloc(gpa, std.math.maxInt(u32)); const index = @as(File.Index, @intCast(try self.files.addOne(gpa))); self.files.set(index, .{ .object = .{ @@ -1740,17 +1723,13 @@ fn parseObject(self: *Elf, in_file: std.fs.File, path: []const u8, ctx: *ParseEr if (ctx.detected_cpu_arch != self.base.options.target.cpu.arch) return error.InvalidCpuArch; } -fn parseArchive( - self: *Elf, - in_file: std.fs.File, - path: []const u8, - must_link: bool, - ctx: *ParseErrorCtx, -) ParseError!void { +fn parseArchive(self: *Elf, path: []const u8, must_link: bool, ctx: *ParseErrorCtx) ParseError!void { const tracy = trace(@src()); defer tracy.end(); const gpa = self.base.allocator; + const in_file = try std.fs.cwd().openFile(path, .{}); + defer in_file.close(); const data = try in_file.readToEndAlloc(gpa, std.math.maxInt(u32)); var archive = Archive{ .path = try gpa.dupe(u8, path), .data = data }; defer archive.deinit(gpa); @@ -1773,16 +1752,13 @@ fn parseArchive( } } -fn parseSharedObject( - self: *Elf, - in_file: std.fs.File, - lib: SystemLib, - ctx: *ParseErrorCtx, -) ParseError!void { +fn parseSharedObject(self: *Elf, lib: SystemLib, ctx: *ParseErrorCtx) ParseError!void { const tracy = trace(@src()); defer tracy.end(); const gpa = self.base.allocator; + const in_file = try std.fs.cwd().openFile(lib.path, .{}); + defer in_file.close(); const data = try in_file.readToEndAlloc(gpa, std.math.maxInt(u32)); const index = @as(File.Index, @intCast(try self.files.addOne(gpa))); self.files.set(index, .{ .shared_object = .{ @@ -1801,11 +1777,13 @@ fn parseSharedObject( if (ctx.detected_cpu_arch != self.base.options.target.cpu.arch) return error.InvalidCpuArch; } -fn parseLdScript(self: *Elf, in_file: std.fs.File, lib: SystemLib, ctx: *ParseErrorCtx) ParseError!void { +fn parseLdScript(self: *Elf, lib: SystemLib, ctx: *ParseErrorCtx) ParseError!void { const tracy = trace(@src()); defer tracy.end(); const gpa = self.base.allocator; + const in_file = try std.fs.cwd().openFile(lib.path, .{}); + defer in_file.close(); const data = try in_file.readToEndAlloc(gpa, std.math.maxInt(u32)); defer gpa.free(data); @@ -1871,11 +1849,8 @@ fn parseLdScript(self: *Elf, in_file: std.fs.File, lib: SystemLib, ctx: *ParseEr } const full_path = test_path.items; - const scr_file = try std.fs.cwd().openFile(full_path, .{}); - defer scr_file.close(); - var scr_ctx: ParseErrorCtx = .{ .detected_cpu_arch = undefined }; - self.parseLibrary(scr_file, .{ + self.parseLibrary(.{ .needed = scr_obj.needed, .path = full_path, }, false, &scr_ctx) catch |err| try self.handleAndReportParseError(full_path, err, &scr_ctx); @@ -1893,14 +1868,16 @@ fn accessLibPath( const sep = fs.path.sep_str; const target = self.base.options.target; test_path.clearRetainingCapacity(); + const prefix = if (link_mode != null) "lib" else ""; + const suffix = if (link_mode) |mode| switch (mode) { + .Static => target.staticLibSuffix(), + .Dynamic => target.dynamicLibSuffix(), + } else ""; try test_path.writer().print("{s}" ++ sep ++ "{s}{s}{s}", .{ lib_dir_path, - target.libPrefix(), + prefix, lib_name, - if (link_mode) |mode| switch (mode) { - .Static => target.staticLibSuffix(), - .Dynamic => target.dynamicLibSuffix(), - } else "", + suffix, }); if (checked_paths) |cpaths| { try cpaths.append(try self.base.allocator.dupe(u8, test_path.items)); diff --git a/src/link/Elf/Archive.zig b/src/link/Elf/Archive.zig index 94529a368c..0eb2f2d404 100644 --- a/src/link/Elf/Archive.zig +++ b/src/link/Elf/Archive.zig @@ -62,10 +62,11 @@ const ar_hdr = extern struct { } }; -pub fn isArchive(file: std.fs.File) bool { +pub fn isArchive(path: []const u8) !bool { + const file = try std.fs.cwd().openFile(path, .{}); + defer file.close(); const reader = file.reader(); const magic = reader.readBytesNoEof(Archive.SARMAG) catch return false; - defer file.seekTo(0) catch {}; if (!mem.eql(u8, &magic, ARMAG)) return false; return true; } diff --git a/src/link/Elf/LdScript.zig b/src/link/Elf/LdScript.zig index c901a8ecba..c85f331d49 100644 --- a/src/link/Elf/LdScript.zig +++ b/src/link/Elf/LdScript.zig @@ -83,7 +83,8 @@ fn doParse(scr: *LdScript, ctx: struct { const cmd = ctx.parser.getCommand(cmd_id); switch (cmd) { .output_format => scr.cpu_arch = try ctx.parser.outputFormat(), - .group => try ctx.parser.group(ctx.args), + // TODO we should verify that group only contains libraries + .input, .group => try ctx.parser.group(ctx.args), else => return error.UnexpectedToken, } } else break; @@ -102,6 +103,7 @@ const LineColumn = struct { const Command = enum { output_format, + input, group, as_needed, @@ -420,110 +422,6 @@ const TokenIterator = struct { } }; -const testing = std.testing; - -fn testExpectedTokens(input: []const u8, expected: []const Token.Id) !void { - var given = std.ArrayList(Token.Id).init(testing.allocator); - defer given.deinit(); - - var tokenizer = Tokenizer{ .source = input }; - while (true) { - const tok = tokenizer.next(); - if (tok.id == .invalid) { - std.debug.print(" {s} => '{s}'\n", .{ @tagName(tok.id), tok.get(input) }); - } - try given.append(tok.id); - if (tok.id == .eof) break; - } - - try testing.expectEqualSlices(Token.Id, expected, given.items); -} - -test "Tokenizer - just comments" { - try testExpectedTokens( - \\/* GNU ld script - \\ Use the shared library, but some functions are only in - \\ the static library, so try that secondarily. */ - , &.{ .comment, .eof }); -} - -test "Tokenizer - comments with a simple command" { - try testExpectedTokens( - \\/* GNU ld script - \\ Use the shared library, but some functions are only in - \\ the static library, so try that secondarily. */ - \\OUTPUT_FORMAT(elf64-x86-64) - , &.{ .comment, .new_line, .command, .lparen, .literal, .rparen, .eof }); -} - -test "Tokenizer - libc.so" { - try testExpectedTokens( - \\/* GNU ld script - \\ Use the shared library, but some functions are only in - \\ the static library, so try that secondarily. */ - \\OUTPUT_FORMAT(elf64-x86-64) - \\GROUP ( /a/b/c.so.6 /a/d/e.a AS_NEEDED ( /f/g/h.so.2 ) ) - , &.{ - .comment, .new_line, // GNU comment - .command, .lparen, .literal, .rparen, .new_line, // output format - .command, .lparen, .literal, .literal, // group start - .command, .lparen, .literal, .rparen, // as needed - .rparen, // group end - .eof, - }); -} - -test "Parser - output format" { - const source = - \\OUTPUT_FORMAT(elf64-x86-64) - ; - var tokenizer = Tokenizer{ .source = source }; - var tokens = std.ArrayList(Token).init(testing.allocator); - defer tokens.deinit(); - while (true) { - const tok = tokenizer.next(); - try testing.expect(tok.id != .invalid); - try tokens.append(tok); - if (tok.id == .eof) break; - } - var it = TokenIterator{ .tokens = tokens.items }; - var parser = Parser{ .source = source, .it = &it }; - const tok_id = try parser.require(.command); - try testing.expectEqual(parser.getCommand(tok_id), .output_format); - const cpu_arch = try parser.outputFormat(); - try testing.expectEqual(cpu_arch, .x86_64); -} - -test "Parser - group with as-needed" { - const source = - \\GROUP ( /a/b/c.so.6 /a/d/e.a AS_NEEDED ( /f/g/h.so.2 ) ) - ; - var tokenizer = Tokenizer{ .source = source }; - var tokens = std.ArrayList(Token).init(testing.allocator); - defer tokens.deinit(); - while (true) { - const tok = tokenizer.next(); - try testing.expect(tok.id != .invalid); - try tokens.append(tok); - if (tok.id == .eof) break; - } - var it = TokenIterator{ .tokens = tokens.items }; - var parser = Parser{ .source = source, .it = &it }; - - var args = std.ArrayList(Elf.LinkObject).init(testing.allocator); - defer args.deinit(); - const tok_id = try parser.require(.command); - try testing.expectEqual(parser.getCommand(tok_id), .group); - try parser.group(&args); - - try testing.expectEqualStrings("/a/b/c.so.6", args.items[0].path); - try testing.expect(args.items[0].needed); - try testing.expectEqualStrings("/a/d/e.a", args.items[1].path); - try testing.expect(args.items[1].needed); - try testing.expectEqualStrings("/f/g/h.so.2", args.items[2].path); - try testing.expect(!args.items[2].needed); -} - const LdScript = @This(); const std = @import("std"); diff --git a/src/link/Elf/Object.zig b/src/link/Elf/Object.zig index c24025cadb..e21d6f161c 100644 --- a/src/link/Elf/Object.zig +++ b/src/link/Elf/Object.zig @@ -22,10 +22,11 @@ num_dynrelocs: u32 = 0, output_symtab_size: Elf.SymtabSize = .{}, -pub fn isObject(file: std.fs.File) bool { +pub fn isObject(path: []const u8) !bool { + const file = try std.fs.cwd().openFile(path, .{}); + defer file.close(); const reader = file.reader(); const header = reader.readStruct(elf.Elf64_Ehdr) catch return false; - defer file.seekTo(0) catch {}; if (!mem.eql(u8, header.e_ident[0..4], "\x7fELF")) return false; if (header.e_ident[elf.EI_VERSION] != 1) return false; if (header.e_type != elf.ET.REL) return false; diff --git a/src/link/Elf/SharedObject.zig b/src/link/Elf/SharedObject.zig index 0ff74edddf..710c025f34 100644 --- a/src/link/Elf/SharedObject.zig +++ b/src/link/Elf/SharedObject.zig @@ -22,10 +22,11 @@ alive: bool, output_symtab_size: Elf.SymtabSize = .{}, -pub fn isSharedObject(file: std.fs.File) bool { +pub fn isSharedObject(path: []const u8) !bool { + const file = try std.fs.cwd().openFile(path, .{}); + defer file.close(); const reader = file.reader(); const header = reader.readStruct(elf.Elf64_Ehdr) catch return false; - defer file.seekTo(0) catch {}; if (!mem.eql(u8, header.e_ident[0..4], "\x7fELF")) return false; if (header.e_ident[elf.EI_VERSION] != 1) return false; if (header.e_type != elf.ET.DYN) return false; diff --git a/test/link/elf.zig b/test/link/elf.zig index eabc0d5f17..d5a62db4cd 100644 --- a/test/link/elf.zig +++ b/test/link/elf.zig @@ -75,6 +75,7 @@ pub fn build(b: *Build) void { elf_step.dependOn(testLargeAlignmentExe(b, .{ .target = glibc_target })); elf_step.dependOn(testLargeBss(b, .{ .target = glibc_target })); elf_step.dependOn(testLinkOrder(b, .{ .target = glibc_target })); + elf_step.dependOn(testLdScript(b, .{ .target = glibc_target })); // https://github.com/ziglang/zig/issues/17451 // elf_step.dependOn(testNoEhFrameHdr(b, .{ .target = glibc_target })); elf_step.dependOn(testPie(b, .{ .target = glibc_target })); @@ -1568,6 +1569,38 @@ fn testLinkOrder(b: *Build, opts: Options) *Step { return test_step; } +fn testLdScript(b: *Build, opts: Options) *Step { + const test_step = addTestStep(b, "ld-script", opts); + + const dso = addSharedLibrary(b, "bar", opts); + addCSourceBytes(dso, "int foo() { return 42; }", &.{}); + + const scripts = WriteFile.create(b); + _ = scripts.add("liba.so", "INPUT(libfoo.so)"); + _ = scripts.add("libfoo.so", "GROUP(AS_NEEDED(-lbar))"); + + const exe = addExecutable(b, "main", opts); + addCSourceBytes(exe, + \\int foo(); + \\int main() { + \\ return foo() - 42; + \\} + , &.{}); + exe.linkSystemLibrary2("a", .{}); + exe.addLibraryPath(scripts.getDirectory()); + exe.addLibraryPath(dso.getEmittedBinDirectory()); + exe.addRPath(dso.getEmittedBinDirectory()); + exe.linkLibC(); + // https://github.com/ziglang/zig/issues/17619 + exe.pie = true; + + const run = addRunArtifact(exe); + run.expectExitCode(0); + test_step.dependOn(&run.step); + + return test_step; +} + fn testLinkingC(b: *Build, opts: Options) *Step { const test_step = addTestStep(b, "linking-c", opts);