elf: improve parsing of ld scripts and actually test linking against them

2026-02-21 16:54:52 +00:00 · 2023-10-24 12:12:46 +02:00 · 2023-10-24 12:12:46 +02:00 · 8087ec8e8c
commit 8087ec8e8c
parent 2f3add4f30
6 changed files with 78 additions and 167 deletions
--- a/src/link/Elf.zig
+++ b/src/link/Elf.zig
@ -1353,10 +1353,8 @@ pub fn flushModule(self: *Elf, comp: *Compilation, prog_node: *std.Progress.Node
    }

    for (positionals.items) |obj| {
-        const in_file = try std.fs.cwd().openFile(obj.path, .{});
-        defer in_file.close();
        var parse_ctx: ParseErrorCtx = .{ .detected_cpu_arch = undefined };
-        self.parsePositional(in_file, obj.path, obj.must_link, &parse_ctx) catch |err|
+        self.parsePositional(obj.path, obj.must_link, &parse_ctx) catch |err|
            try self.handleAndReportParseError(obj.path, err, &parse_ctx);
    }

@ -1437,9 +1435,7 @@ pub fn flushModule(self: *Elf, comp: *Compilation, prog_node: *std.Progress.Node

    for (system_libs.items) |lib| {
        var parse_ctx: ParseErrorCtx = .{ .detected_cpu_arch = undefined };
-        const in_file = try std.fs.cwd().openFile(lib.path, .{});
-        defer in_file.close();
-        self.parseLibrary(in_file, lib, false, &parse_ctx) catch |err|
+        self.parseLibrary(lib, false, &parse_ctx) catch |err|
            try self.handleAndReportParseError(lib.path, err, &parse_ctx);
    }

@ -1456,10 +1452,8 @@ pub fn flushModule(self: *Elf, comp: *Compilation, prog_node: *std.Progress.Node
    if (csu.crtn) |v| try positionals.append(.{ .path = v });

    for (positionals.items) |obj| {
-        const in_file = try std.fs.cwd().openFile(obj.path, .{});
-        defer in_file.close();
        var parse_ctx: ParseErrorCtx = .{ .detected_cpu_arch = undefined };
-        self.parsePositional(in_file, obj.path, obj.must_link, &parse_ctx) catch |err|
+        self.parsePositional(obj.path, obj.must_link, &parse_ctx) catch |err|
            try self.handleAndReportParseError(obj.path, err, &parse_ctx);
    }

@ -1679,51 +1673,40 @@ const ParseError = error{
    InvalidCharacter,
 } || LdScript.Error || std.os.AccessError || std.os.SeekError || std.fs.File.OpenError || std.fs.File.ReadError;

-fn parsePositional(
-    self: *Elf,
-    in_file: std.fs.File,
-    path: []const u8,
-    must_link: bool,
-    ctx: *ParseErrorCtx,
-) ParseError!void {
+fn parsePositional(self: *Elf, path: []const u8, must_link: bool, ctx: *ParseErrorCtx) ParseError!void {
    const tracy = trace(@src());
    defer tracy.end();
-
-    if (Object.isObject(in_file)) {
-        try self.parseObject(in_file, path, ctx);
+    if (try Object.isObject(path)) {
+        try self.parseObject(path, ctx);
    } else {
-        try self.parseLibrary(in_file, .{ .path = path }, must_link, ctx);
+        try self.parseLibrary(.{ .path = path }, must_link, ctx);
    }
 }

-fn parseLibrary(
-    self: *Elf,
-    in_file: std.fs.File,
-    lib: SystemLib,
-    must_link: bool,
-    ctx: *ParseErrorCtx,
-) ParseError!void {
+fn parseLibrary(self: *Elf, lib: SystemLib, must_link: bool, ctx: *ParseErrorCtx) ParseError!void {
    const tracy = trace(@src());
    defer tracy.end();

-    if (Archive.isArchive(in_file)) {
-        try self.parseArchive(in_file, lib.path, must_link, ctx);
-    } else if (SharedObject.isSharedObject(in_file)) {
-        try self.parseSharedObject(in_file, lib, ctx);
+    if (try Archive.isArchive(lib.path)) {
+        try self.parseArchive(lib.path, must_link, ctx);
+    } else if (try SharedObject.isSharedObject(lib.path)) {
+        try self.parseSharedObject(lib, ctx);
    } else {
        // TODO if the script has a top-level comment identifying it as GNU ld script,
        // then report parse errors. Otherwise return UnknownFileType.
-        self.parseLdScript(in_file, lib, ctx) catch |err| switch (err) {
+        self.parseLdScript(lib, ctx) catch |err| switch (err) {
            else => return error.UnknownFileType,
        };
    }
 }

-fn parseObject(self: *Elf, in_file: std.fs.File, path: []const u8, ctx: *ParseErrorCtx) ParseError!void {
+fn parseObject(self: *Elf, path: []const u8, ctx: *ParseErrorCtx) ParseError!void {
    const tracy = trace(@src());
    defer tracy.end();

    const gpa = self.base.allocator;
+    const in_file = try std.fs.cwd().openFile(path, .{});
+    defer in_file.close();
    const data = try in_file.readToEndAlloc(gpa, std.math.maxInt(u32));
    const index = @as(File.Index, @intCast(try self.files.addOne(gpa)));
    self.files.set(index, .{ .object = .{
@ -1740,17 +1723,13 @@ fn parseObject(self: *Elf, in_file: std.fs.File, path: []const u8, ctx: *ParseEr
    if (ctx.detected_cpu_arch != self.base.options.target.cpu.arch) return error.InvalidCpuArch;
 }

-fn parseArchive(
-    self: *Elf,
-    in_file: std.fs.File,
-    path: []const u8,
-    must_link: bool,
-    ctx: *ParseErrorCtx,
-) ParseError!void {
+fn parseArchive(self: *Elf, path: []const u8, must_link: bool, ctx: *ParseErrorCtx) ParseError!void {
    const tracy = trace(@src());
    defer tracy.end();

    const gpa = self.base.allocator;
+    const in_file = try std.fs.cwd().openFile(path, .{});
+    defer in_file.close();
    const data = try in_file.readToEndAlloc(gpa, std.math.maxInt(u32));
    var archive = Archive{ .path = try gpa.dupe(u8, path), .data = data };
    defer archive.deinit(gpa);
@ -1773,16 +1752,13 @@ fn parseArchive(
    }
 }

-fn parseSharedObject(
-    self: *Elf,
-    in_file: std.fs.File,
-    lib: SystemLib,
-    ctx: *ParseErrorCtx,
-) ParseError!void {
+fn parseSharedObject(self: *Elf, lib: SystemLib, ctx: *ParseErrorCtx) ParseError!void {
    const tracy = trace(@src());
    defer tracy.end();

    const gpa = self.base.allocator;
+    const in_file = try std.fs.cwd().openFile(lib.path, .{});
+    defer in_file.close();
    const data = try in_file.readToEndAlloc(gpa, std.math.maxInt(u32));
    const index = @as(File.Index, @intCast(try self.files.addOne(gpa)));
    self.files.set(index, .{ .shared_object = .{
@ -1801,11 +1777,13 @@ fn parseSharedObject(
    if (ctx.detected_cpu_arch != self.base.options.target.cpu.arch) return error.InvalidCpuArch;
 }

-fn parseLdScript(self: *Elf, in_file: std.fs.File, lib: SystemLib, ctx: *ParseErrorCtx) ParseError!void {
+fn parseLdScript(self: *Elf, lib: SystemLib, ctx: *ParseErrorCtx) ParseError!void {
    const tracy = trace(@src());
    defer tracy.end();

    const gpa = self.base.allocator;
+    const in_file = try std.fs.cwd().openFile(lib.path, .{});
+    defer in_file.close();
    const data = try in_file.readToEndAlloc(gpa, std.math.maxInt(u32));
    defer gpa.free(data);

@ -1871,11 +1849,8 @@ fn parseLdScript(self: *Elf, in_file: std.fs.File, lib: SystemLib, ctx: *ParseEr
        }

        const full_path = test_path.items;
-        const scr_file = try std.fs.cwd().openFile(full_path, .{});
-        defer scr_file.close();
-
        var scr_ctx: ParseErrorCtx = .{ .detected_cpu_arch = undefined };
-        self.parseLibrary(scr_file, .{
+        self.parseLibrary(.{
            .needed = scr_obj.needed,
            .path = full_path,
        }, false, &scr_ctx) catch |err| try self.handleAndReportParseError(full_path, err, &scr_ctx);
@ -1893,14 +1868,16 @@ fn accessLibPath(
    const sep = fs.path.sep_str;
    const target = self.base.options.target;
    test_path.clearRetainingCapacity();
+    const prefix = if (link_mode != null) "lib" else "";
+    const suffix = if (link_mode) |mode| switch (mode) {
+        .Static => target.staticLibSuffix(),
+        .Dynamic => target.dynamicLibSuffix(),
+    } else "";
    try test_path.writer().print("{s}" ++ sep ++ "{s}{s}{s}", .{
        lib_dir_path,
-        target.libPrefix(),
+        prefix,
        lib_name,
-        if (link_mode) |mode| switch (mode) {
-            .Static => target.staticLibSuffix(),
-            .Dynamic => target.dynamicLibSuffix(),
-        } else "",
+        suffix,
    });
    if (checked_paths) |cpaths| {
        try cpaths.append(try self.base.allocator.dupe(u8, test_path.items));
--- a/src/link/Elf/Archive.zig
+++ b/src/link/Elf/Archive.zig
@ -62,10 +62,11 @@ const ar_hdr = extern struct {
    }
 };

-pub fn isArchive(file: std.fs.File) bool {
+pub fn isArchive(path: []const u8) !bool {
+    const file = try std.fs.cwd().openFile(path, .{});
+    defer file.close();
    const reader = file.reader();
    const magic = reader.readBytesNoEof(Archive.SARMAG) catch return false;
-    defer file.seekTo(0) catch {};
    if (!mem.eql(u8, &magic, ARMAG)) return false;
    return true;
 }
--- a/src/link/Elf/LdScript.zig
+++ b/src/link/Elf/LdScript.zig
@ -83,7 +83,8 @@ fn doParse(scr: *LdScript, ctx: struct {
            const cmd = ctx.parser.getCommand(cmd_id);
            switch (cmd) {
                .output_format => scr.cpu_arch = try ctx.parser.outputFormat(),
-                .group => try ctx.parser.group(ctx.args),
+                // TODO we should verify that group only contains libraries
+                .input, .group => try ctx.parser.group(ctx.args),
                else => return error.UnexpectedToken,
            }
        } else break;
@ -102,6 +103,7 @@ const LineColumn = struct {

 const Command = enum {
    output_format,
+    input,
    group,
    as_needed,

@ -420,110 +422,6 @@ const TokenIterator = struct {
    }
 };

-const testing = std.testing;
-
-fn testExpectedTokens(input: []const u8, expected: []const Token.Id) !void {
-    var given = std.ArrayList(Token.Id).init(testing.allocator);
-    defer given.deinit();
-
-    var tokenizer = Tokenizer{ .source = input };
-    while (true) {
-        const tok = tokenizer.next();
-        if (tok.id == .invalid) {
-            std.debug.print("  {s} => '{s}'\n", .{ @tagName(tok.id), tok.get(input) });
-        }
-        try given.append(tok.id);
-        if (tok.id == .eof) break;
-    }
-
-    try testing.expectEqualSlices(Token.Id, expected, given.items);
-}
-
-test "Tokenizer - just comments" {
-    try testExpectedTokens(
-        \\/* GNU ld script
-        \\   Use the shared library, but some functions are only in
-        \\   the static library, so try that secondarily.  */
-    , &.{ .comment, .eof });
-}
-
-test "Tokenizer - comments with a simple command" {
-    try testExpectedTokens(
-        \\/* GNU ld script
-        \\   Use the shared library, but some functions are only in
-        \\   the static library, so try that secondarily.  */
-        \\OUTPUT_FORMAT(elf64-x86-64)
-    , &.{ .comment, .new_line, .command, .lparen, .literal, .rparen, .eof });
-}
-
-test "Tokenizer - libc.so" {
-    try testExpectedTokens(
-        \\/* GNU ld script
-        \\   Use the shared library, but some functions are only in
-        \\   the static library, so try that secondarily.  */
-        \\OUTPUT_FORMAT(elf64-x86-64)
-        \\GROUP ( /a/b/c.so.6 /a/d/e.a  AS_NEEDED ( /f/g/h.so.2 ) )
-    , &.{
-        .comment, .new_line, // GNU comment
-        .command, .lparen, .literal, .rparen, .new_line, // output format
-        .command, .lparen, .literal, .literal, // group start
-        .command, .lparen, .literal, .rparen, // as needed
-        .rparen, // group end
-        .eof,
-    });
-}
-
-test "Parser - output format" {
-    const source =
-        \\OUTPUT_FORMAT(elf64-x86-64)
-    ;
-    var tokenizer = Tokenizer{ .source = source };
-    var tokens = std.ArrayList(Token).init(testing.allocator);
-    defer tokens.deinit();
-    while (true) {
-        const tok = tokenizer.next();
-        try testing.expect(tok.id != .invalid);
-        try tokens.append(tok);
-        if (tok.id == .eof) break;
-    }
-    var it = TokenIterator{ .tokens = tokens.items };
-    var parser = Parser{ .source = source, .it = &it };
-    const tok_id = try parser.require(.command);
-    try testing.expectEqual(parser.getCommand(tok_id), .output_format);
-    const cpu_arch = try parser.outputFormat();
-    try testing.expectEqual(cpu_arch, .x86_64);
-}
-
-test "Parser - group with as-needed" {
-    const source =
-        \\GROUP ( /a/b/c.so.6 /a/d/e.a  AS_NEEDED ( /f/g/h.so.2 ) )
-    ;
-    var tokenizer = Tokenizer{ .source = source };
-    var tokens = std.ArrayList(Token).init(testing.allocator);
-    defer tokens.deinit();
-    while (true) {
-        const tok = tokenizer.next();
-        try testing.expect(tok.id != .invalid);
-        try tokens.append(tok);
-        if (tok.id == .eof) break;
-    }
-    var it = TokenIterator{ .tokens = tokens.items };
-    var parser = Parser{ .source = source, .it = &it };
-
-    var args = std.ArrayList(Elf.LinkObject).init(testing.allocator);
-    defer args.deinit();
-    const tok_id = try parser.require(.command);
-    try testing.expectEqual(parser.getCommand(tok_id), .group);
-    try parser.group(&args);
-
-    try testing.expectEqualStrings("/a/b/c.so.6", args.items[0].path);
-    try testing.expect(args.items[0].needed);
-    try testing.expectEqualStrings("/a/d/e.a", args.items[1].path);
-    try testing.expect(args.items[1].needed);
-    try testing.expectEqualStrings("/f/g/h.so.2", args.items[2].path);
-    try testing.expect(!args.items[2].needed);
-}
-
 const LdScript = @This();

 const std = @import("std");
--- a/src/link/Elf/Object.zig
+++ b/src/link/Elf/Object.zig
@ -22,10 +22,11 @@ num_dynrelocs: u32 = 0,

 output_symtab_size: Elf.SymtabSize = .{},

-pub fn isObject(file: std.fs.File) bool {
+pub fn isObject(path: []const u8) !bool {
+    const file = try std.fs.cwd().openFile(path, .{});
+    defer file.close();
    const reader = file.reader();
    const header = reader.readStruct(elf.Elf64_Ehdr) catch return false;
-    defer file.seekTo(0) catch {};
    if (!mem.eql(u8, header.e_ident[0..4], "\x7fELF")) return false;
    if (header.e_ident[elf.EI_VERSION] != 1) return false;
    if (header.e_type != elf.ET.REL) return false;
--- a/src/link/Elf/SharedObject.zig
+++ b/src/link/Elf/SharedObject.zig
@ -22,10 +22,11 @@ alive: bool,

 output_symtab_size: Elf.SymtabSize = .{},

-pub fn isSharedObject(file: std.fs.File) bool {
+pub fn isSharedObject(path: []const u8) !bool {
+    const file = try std.fs.cwd().openFile(path, .{});
+    defer file.close();
    const reader = file.reader();
    const header = reader.readStruct(elf.Elf64_Ehdr) catch return false;
-    defer file.seekTo(0) catch {};
    if (!mem.eql(u8, header.e_ident[0..4], "\x7fELF")) return false;
    if (header.e_ident[elf.EI_VERSION] != 1) return false;
    if (header.e_type != elf.ET.DYN) return false;
--- a/test/link/elf.zig
+++ b/test/link/elf.zig
@ -75,6 +75,7 @@ pub fn build(b: *Build) void {
    elf_step.dependOn(testLargeAlignmentExe(b, .{ .target = glibc_target }));
    elf_step.dependOn(testLargeBss(b, .{ .target = glibc_target }));
    elf_step.dependOn(testLinkOrder(b, .{ .target = glibc_target }));
+    elf_step.dependOn(testLdScript(b, .{ .target = glibc_target }));
    // https://github.com/ziglang/zig/issues/17451
    // elf_step.dependOn(testNoEhFrameHdr(b, .{ .target = glibc_target }));
    elf_step.dependOn(testPie(b, .{ .target = glibc_target }));
@ -1568,6 +1569,38 @@ fn testLinkOrder(b: *Build, opts: Options) *Step {
    return test_step;
 }

+fn testLdScript(b: *Build, opts: Options) *Step {
+    const test_step = addTestStep(b, "ld-script", opts);
+
+    const dso = addSharedLibrary(b, "bar", opts);
+    addCSourceBytes(dso, "int foo() { return 42; }", &.{});
+
+    const scripts = WriteFile.create(b);
+    _ = scripts.add("liba.so", "INPUT(libfoo.so)");
+    _ = scripts.add("libfoo.so", "GROUP(AS_NEEDED(-lbar))");
+
+    const exe = addExecutable(b, "main", opts);
+    addCSourceBytes(exe,
+        \\int foo();
+        \\int main() {
+        \\  return foo() - 42;
+        \\}
+    , &.{});
+    exe.linkSystemLibrary2("a", .{});
+    exe.addLibraryPath(scripts.getDirectory());
+    exe.addLibraryPath(dso.getEmittedBinDirectory());
+    exe.addRPath(dso.getEmittedBinDirectory());
+    exe.linkLibC();
+    // https://github.com/ziglang/zig/issues/17619
+    exe.pie = true;
+
+    const run = addRunArtifact(exe);
+    run.expectExitCode(0);
+    test_step.dependOn(&run.step);
+
+    return test_step;
+}
+
 fn testLinkingC(b: *Build, opts: Options) *Step {
    const test_step = addTestStep(b, "linking-c", opts);