From 8087ec8e8c9e3abf8cf2f3952127aa97749610a5 Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Tue, 24 Oct 2023 12:12:46 +0200
Subject: [PATCH] elf: improve parsing of ld scripts and actually test linking
 against them

---
 src/link/Elf.zig              |  89 +++++++++++-----------------
 src/link/Elf/Archive.zig      |   5 +-
 src/link/Elf/LdScript.zig     | 108 +---------------------------------
 src/link/Elf/Object.zig       |   5 +-
 src/link/Elf/SharedObject.zig |   5 +-
 test/link/elf.zig             |  33 +++++++++++
 6 files changed, 78 insertions(+), 167 deletions(-)

diff --git a/src/link/Elf.zig b/src/link/Elf.zig
index b8b233e4b0..421d6f9573 100644
--- a/src/link/Elf.zig
+++ b/src/link/Elf.zig
@@ -1353,10 +1353,8 @@ pub fn flushModule(self: *Elf, comp: *Compilation, prog_node: *std.Progress.Node
     }
 
     for (positionals.items) |obj| {
-        const in_file = try std.fs.cwd().openFile(obj.path, .{});
-        defer in_file.close();
         var parse_ctx: ParseErrorCtx = .{ .detected_cpu_arch = undefined };
-        self.parsePositional(in_file, obj.path, obj.must_link, &parse_ctx) catch |err|
+        self.parsePositional(obj.path, obj.must_link, &parse_ctx) catch |err|
             try self.handleAndReportParseError(obj.path, err, &parse_ctx);
     }
 
@@ -1437,9 +1435,7 @@ pub fn flushModule(self: *Elf, comp: *Compilation, prog_node: *std.Progress.Node
 
     for (system_libs.items) |lib| {
         var parse_ctx: ParseErrorCtx = .{ .detected_cpu_arch = undefined };
-        const in_file = try std.fs.cwd().openFile(lib.path, .{});
-        defer in_file.close();
-        self.parseLibrary(in_file, lib, false, &parse_ctx) catch |err|
+        self.parseLibrary(lib, false, &parse_ctx) catch |err|
             try self.handleAndReportParseError(lib.path, err, &parse_ctx);
     }
 
@@ -1456,10 +1452,8 @@ pub fn flushModule(self: *Elf, comp: *Compilation, prog_node: *std.Progress.Node
     if (csu.crtn) |v| try positionals.append(.{ .path = v });
 
     for (positionals.items) |obj| {
-        const in_file = try std.fs.cwd().openFile(obj.path, .{});
-        defer in_file.close();
         var parse_ctx: ParseErrorCtx = .{ .detected_cpu_arch = undefined };
-        self.parsePositional(in_file, obj.path, obj.must_link, &parse_ctx) catch |err|
+        self.parsePositional(obj.path, obj.must_link, &parse_ctx) catch |err|
             try self.handleAndReportParseError(obj.path, err, &parse_ctx);
     }
 
@@ -1679,51 +1673,40 @@ const ParseError = error{
     InvalidCharacter,
 } || LdScript.Error || std.os.AccessError || std.os.SeekError || std.fs.File.OpenError || std.fs.File.ReadError;
 
-fn parsePositional(
-    self: *Elf,
-    in_file: std.fs.File,
-    path: []const u8,
-    must_link: bool,
-    ctx: *ParseErrorCtx,
-) ParseError!void {
+fn parsePositional(self: *Elf, path: []const u8, must_link: bool, ctx: *ParseErrorCtx) ParseError!void {
     const tracy = trace(@src());
     defer tracy.end();
-
-    if (Object.isObject(in_file)) {
-        try self.parseObject(in_file, path, ctx);
+    if (try Object.isObject(path)) {
+        try self.parseObject(path, ctx);
     } else {
-        try self.parseLibrary(in_file, .{ .path = path }, must_link, ctx);
+        try self.parseLibrary(.{ .path = path }, must_link, ctx);
     }
 }
 
-fn parseLibrary(
-    self: *Elf,
-    in_file: std.fs.File,
-    lib: SystemLib,
-    must_link: bool,
-    ctx: *ParseErrorCtx,
-) ParseError!void {
+fn parseLibrary(self: *Elf, lib: SystemLib, must_link: bool, ctx: *ParseErrorCtx) ParseError!void {
     const tracy = trace(@src());
     defer tracy.end();
 
-    if (Archive.isArchive(in_file)) {
-        try self.parseArchive(in_file, lib.path, must_link, ctx);
-    } else if (SharedObject.isSharedObject(in_file)) {
-        try self.parseSharedObject(in_file, lib, ctx);
+    if (try Archive.isArchive(lib.path)) {
+        try self.parseArchive(lib.path, must_link, ctx);
+    } else if (try SharedObject.isSharedObject(lib.path)) {
+        try self.parseSharedObject(lib, ctx);
     } else {
         // TODO if the script has a top-level comment identifying it as GNU ld script,
         // then report parse errors. Otherwise return UnknownFileType.
-        self.parseLdScript(in_file, lib, ctx) catch |err| switch (err) {
+        self.parseLdScript(lib, ctx) catch |err| switch (err) {
             else => return error.UnknownFileType,
         };
     }
 }
 
-fn parseObject(self: *Elf, in_file: std.fs.File, path: []const u8, ctx: *ParseErrorCtx) ParseError!void {
+fn parseObject(self: *Elf, path: []const u8, ctx: *ParseErrorCtx) ParseError!void {
     const tracy = trace(@src());
     defer tracy.end();
 
     const gpa = self.base.allocator;
+    const in_file = try std.fs.cwd().openFile(path, .{});
+    defer in_file.close();
     const data = try in_file.readToEndAlloc(gpa, std.math.maxInt(u32));
     const index = @as(File.Index, @intCast(try self.files.addOne(gpa)));
     self.files.set(index, .{ .object = .{
@@ -1740,17 +1723,13 @@ fn parseObject(self: *Elf, in_file: std.fs.File, path: []const u8, ctx: *ParseEr
     if (ctx.detected_cpu_arch != self.base.options.target.cpu.arch) return error.InvalidCpuArch;
 }
 
-fn parseArchive(
-    self: *Elf,
-    in_file: std.fs.File,
-    path: []const u8,
-    must_link: bool,
-    ctx: *ParseErrorCtx,
-) ParseError!void {
+fn parseArchive(self: *Elf, path: []const u8, must_link: bool, ctx: *ParseErrorCtx) ParseError!void {
     const tracy = trace(@src());
     defer tracy.end();
 
     const gpa = self.base.allocator;
+    const in_file = try std.fs.cwd().openFile(path, .{});
+    defer in_file.close();
     const data = try in_file.readToEndAlloc(gpa, std.math.maxInt(u32));
     var archive = Archive{ .path = try gpa.dupe(u8, path), .data = data };
     defer archive.deinit(gpa);
@@ -1773,16 +1752,13 @@ fn parseArchive(
     }
 }
 
-fn parseSharedObject(
-    self: *Elf,
-    in_file: std.fs.File,
-    lib: SystemLib,
-    ctx: *ParseErrorCtx,
-) ParseError!void {
+fn parseSharedObject(self: *Elf, lib: SystemLib, ctx: *ParseErrorCtx) ParseError!void {
     const tracy = trace(@src());
     defer tracy.end();
 
     const gpa = self.base.allocator;
+    const in_file = try std.fs.cwd().openFile(lib.path, .{});
+    defer in_file.close();
     const data = try in_file.readToEndAlloc(gpa, std.math.maxInt(u32));
     const index = @as(File.Index, @intCast(try self.files.addOne(gpa)));
     self.files.set(index, .{ .shared_object = .{
@@ -1801,11 +1777,13 @@ fn parseSharedObject(
     if (ctx.detected_cpu_arch != self.base.options.target.cpu.arch) return error.InvalidCpuArch;
 }
 
-fn parseLdScript(self: *Elf, in_file: std.fs.File, lib: SystemLib, ctx: *ParseErrorCtx) ParseError!void {
+fn parseLdScript(self: *Elf, lib: SystemLib, ctx: *ParseErrorCtx) ParseError!void {
     const tracy = trace(@src());
     defer tracy.end();
 
     const gpa = self.base.allocator;
+    const in_file = try std.fs.cwd().openFile(lib.path, .{});
+    defer in_file.close();
     const data = try in_file.readToEndAlloc(gpa, std.math.maxInt(u32));
     defer gpa.free(data);
 
@@ -1871,11 +1849,8 @@ fn parseLdScript(self: *Elf, in_file: std.fs.File, lib: SystemLib, ctx: *ParseEr
         }
 
         const full_path = test_path.items;
-        const scr_file = try std.fs.cwd().openFile(full_path, .{});
-        defer scr_file.close();
-
         var scr_ctx: ParseErrorCtx = .{ .detected_cpu_arch = undefined };
-        self.parseLibrary(scr_file, .{
+        self.parseLibrary(.{
             .needed = scr_obj.needed,
             .path = full_path,
         }, false, &scr_ctx) catch |err| try self.handleAndReportParseError(full_path, err, &scr_ctx);
@@ -1893,14 +1868,16 @@ fn accessLibPath(
     const sep = fs.path.sep_str;
     const target = self.base.options.target;
     test_path.clearRetainingCapacity();
+    const prefix = if (link_mode != null) "lib" else "";
+    const suffix = if (link_mode) |mode| switch (mode) {
+        .Static => target.staticLibSuffix(),
+        .Dynamic => target.dynamicLibSuffix(),
+    } else "";
     try test_path.writer().print("{s}" ++ sep ++ "{s}{s}{s}", .{
         lib_dir_path,
-        target.libPrefix(),
+        prefix,
         lib_name,
-        if (link_mode) |mode| switch (mode) {
-            .Static => target.staticLibSuffix(),
-            .Dynamic => target.dynamicLibSuffix(),
-        } else "",
+        suffix,
     });
     if (checked_paths) |cpaths| {
         try cpaths.append(try self.base.allocator.dupe(u8, test_path.items));
diff --git a/src/link/Elf/Archive.zig b/src/link/Elf/Archive.zig
index 94529a368c..0eb2f2d404 100644
--- a/src/link/Elf/Archive.zig
+++ b/src/link/Elf/Archive.zig
@@ -62,10 +62,11 @@ const ar_hdr = extern struct {
     }
 };
 
-pub fn isArchive(file: std.fs.File) bool {
+pub fn isArchive(path: []const u8) !bool {
+    const file = try std.fs.cwd().openFile(path, .{});
+    defer file.close();
     const reader = file.reader();
     const magic = reader.readBytesNoEof(Archive.SARMAG) catch return false;
-    defer file.seekTo(0) catch {};
     if (!mem.eql(u8, &magic, ARMAG)) return false;
     return true;
 }
diff --git a/src/link/Elf/LdScript.zig b/src/link/Elf/LdScript.zig
index c901a8ecba..c85f331d49 100644
--- a/src/link/Elf/LdScript.zig
+++ b/src/link/Elf/LdScript.zig
@@ -83,7 +83,8 @@ fn doParse(scr: *LdScript, ctx: struct {
             const cmd = ctx.parser.getCommand(cmd_id);
             switch (cmd) {
                 .output_format => scr.cpu_arch = try ctx.parser.outputFormat(),
-                .group => try ctx.parser.group(ctx.args),
+                // TODO we should verify that group only contains libraries
+                .input, .group => try ctx.parser.group(ctx.args),
                 else => return error.UnexpectedToken,
             }
         } else break;
@@ -102,6 +103,7 @@ const LineColumn = struct {
 
 const Command = enum {
     output_format,
+    input,
     group,
     as_needed,
 
@@ -420,110 +422,6 @@ const TokenIterator = struct {
     }
 };
 
-const testing = std.testing;
-
-fn testExpectedTokens(input: []const u8, expected: []const Token.Id) !void {
-    var given = std.ArrayList(Token.Id).init(testing.allocator);
-    defer given.deinit();
-
-    var tokenizer = Tokenizer{ .source = input };
-    while (true) {
-        const tok = tokenizer.next();
-        if (tok.id == .invalid) {
-            std.debug.print("  {s} => '{s}'\n", .{ @tagName(tok.id), tok.get(input) });
-        }
-        try given.append(tok.id);
-        if (tok.id == .eof) break;
-    }
-
-    try testing.expectEqualSlices(Token.Id, expected, given.items);
-}
-
-test "Tokenizer - just comments" {
-    try testExpectedTokens(
-        \\/* GNU ld script
-        \\   Use the shared library, but some functions are only in
-        \\   the static library, so try that secondarily.  */
-    , &.{ .comment, .eof });
-}
-
-test "Tokenizer - comments with a simple command" {
-    try testExpectedTokens(
-        \\/* GNU ld script
-        \\   Use the shared library, but some functions are only in
-        \\   the static library, so try that secondarily.  */
-        \\OUTPUT_FORMAT(elf64-x86-64)
-    , &.{ .comment, .new_line, .command, .lparen, .literal, .rparen, .eof });
-}
-
-test "Tokenizer - libc.so" {
-    try testExpectedTokens(
-        \\/* GNU ld script
-        \\   Use the shared library, but some functions are only in
-        \\   the static library, so try that secondarily.  */
-        \\OUTPUT_FORMAT(elf64-x86-64)
-        \\GROUP ( /a/b/c.so.6 /a/d/e.a  AS_NEEDED ( /f/g/h.so.2 ) )
-    , &.{
-        .comment, .new_line, // GNU comment
-        .command, .lparen, .literal, .rparen, .new_line, // output format
-        .command, .lparen, .literal, .literal, // group start
-        .command, .lparen, .literal, .rparen, // as needed
-        .rparen, // group end
-        .eof,
-    });
-}
-
-test "Parser - output format" {
-    const source =
-        \\OUTPUT_FORMAT(elf64-x86-64)
-    ;
-    var tokenizer = Tokenizer{ .source = source };
-    var tokens = std.ArrayList(Token).init(testing.allocator);
-    defer tokens.deinit();
-    while (true) {
-        const tok = tokenizer.next();
-        try testing.expect(tok.id != .invalid);
-        try tokens.append(tok);
-        if (tok.id == .eof) break;
-    }
-    var it = TokenIterator{ .tokens = tokens.items };
-    var parser = Parser{ .source = source, .it = &it };
-    const tok_id = try parser.require(.command);
-    try testing.expectEqual(parser.getCommand(tok_id), .output_format);
-    const cpu_arch = try parser.outputFormat();
-    try testing.expectEqual(cpu_arch, .x86_64);
-}
-
-test "Parser - group with as-needed" {
-    const source =
-        \\GROUP ( /a/b/c.so.6 /a/d/e.a  AS_NEEDED ( /f/g/h.so.2 ) )
-    ;
-    var tokenizer = Tokenizer{ .source = source };
-    var tokens = std.ArrayList(Token).init(testing.allocator);
-    defer tokens.deinit();
-    while (true) {
-        const tok = tokenizer.next();
-        try testing.expect(tok.id != .invalid);
-        try tokens.append(tok);
-        if (tok.id == .eof) break;
-    }
-    var it = TokenIterator{ .tokens = tokens.items };
-    var parser = Parser{ .source = source, .it = &it };
-
-    var args = std.ArrayList(Elf.LinkObject).init(testing.allocator);
-    defer args.deinit();
-    const tok_id = try parser.require(.command);
-    try testing.expectEqual(parser.getCommand(tok_id), .group);
-    try parser.group(&args);
-
-    try testing.expectEqualStrings("/a/b/c.so.6", args.items[0].path);
-    try testing.expect(args.items[0].needed);
-    try testing.expectEqualStrings("/a/d/e.a", args.items[1].path);
-    try testing.expect(args.items[1].needed);
-    try testing.expectEqualStrings("/f/g/h.so.2", args.items[2].path);
-    try testing.expect(!args.items[2].needed);
-}
-
 const LdScript = @This();
 
 const std = @import("std");
diff --git a/src/link/Elf/Object.zig b/src/link/Elf/Object.zig
index c24025cadb..e21d6f161c 100644
--- a/src/link/Elf/Object.zig
+++ b/src/link/Elf/Object.zig
@@ -22,10 +22,11 @@ num_dynrelocs: u32 = 0,
 
 output_symtab_size: Elf.SymtabSize = .{},
 
-pub fn isObject(file: std.fs.File) bool {
+pub fn isObject(path: []const u8) !bool {
+    const file = try std.fs.cwd().openFile(path, .{});
+    defer file.close();
     const reader = file.reader();
     const header = reader.readStruct(elf.Elf64_Ehdr) catch return false;
-    defer file.seekTo(0) catch {};
     if (!mem.eql(u8, header.e_ident[0..4], "\x7fELF")) return false;
     if (header.e_ident[elf.EI_VERSION] != 1) return false;
     if (header.e_type != elf.ET.REL) return false;
diff --git a/src/link/Elf/SharedObject.zig b/src/link/Elf/SharedObject.zig
index 0ff74edddf..710c025f34 100644
--- a/src/link/Elf/SharedObject.zig
+++ b/src/link/Elf/SharedObject.zig
@@ -22,10 +22,11 @@ alive: bool,
 
 output_symtab_size: Elf.SymtabSize = .{},
 
-pub fn isSharedObject(file: std.fs.File) bool {
+pub fn isSharedObject(path: []const u8) !bool {
+    const file = try std.fs.cwd().openFile(path, .{});
+    defer file.close();
     const reader = file.reader();
     const header = reader.readStruct(elf.Elf64_Ehdr) catch return false;
-    defer file.seekTo(0) catch {};
     if (!mem.eql(u8, header.e_ident[0..4], "\x7fELF")) return false;
     if (header.e_ident[elf.EI_VERSION] != 1) return false;
     if (header.e_type != elf.ET.DYN) return false;
diff --git a/test/link/elf.zig b/test/link/elf.zig
index eabc0d5f17..d5a62db4cd 100644
--- a/test/link/elf.zig
+++ b/test/link/elf.zig
@@ -75,6 +75,7 @@ pub fn build(b: *Build) void {
     elf_step.dependOn(testLargeAlignmentExe(b, .{ .target = glibc_target }));
     elf_step.dependOn(testLargeBss(b, .{ .target = glibc_target }));
     elf_step.dependOn(testLinkOrder(b, .{ .target = glibc_target }));
+    elf_step.dependOn(testLdScript(b, .{ .target = glibc_target }));
     // https://github.com/ziglang/zig/issues/17451
     // elf_step.dependOn(testNoEhFrameHdr(b, .{ .target = glibc_target }));
     elf_step.dependOn(testPie(b, .{ .target = glibc_target }));
@@ -1568,6 +1569,38 @@ fn testLinkOrder(b: *Build, opts: Options) *Step {
     return test_step;
 }
 
+fn testLdScript(b: *Build, opts: Options) *Step {
+    const test_step = addTestStep(b, "ld-script", opts);
+
+    const dso = addSharedLibrary(b, "bar", opts);
+    addCSourceBytes(dso, "int foo() { return 42; }", &.{});
+
+    const scripts = WriteFile.create(b);
+    _ = scripts.add("liba.so", "INPUT(libfoo.so)");
+    _ = scripts.add("libfoo.so", "GROUP(AS_NEEDED(-lbar))");
+
+    const exe = addExecutable(b, "main", opts);
+    addCSourceBytes(exe,
+        \\int foo();
+        \\int main() {
+        \\  return foo() - 42;
+        \\}
+    , &.{});
+    exe.linkSystemLibrary2("a", .{});
+    exe.addLibraryPath(scripts.getDirectory());
+    exe.addLibraryPath(dso.getEmittedBinDirectory());
+    exe.addRPath(dso.getEmittedBinDirectory());
+    exe.linkLibC();
+    // https://github.com/ziglang/zig/issues/17619
+    exe.pie = true;
+
+    const run = addRunArtifact(exe);
+    run.expectExitCode(0);
+    test_step.dependOn(&run.step);
+
+    return test_step;
+}
+
 fn testLinkingC(b: *Build, opts: Options) *Step {
     const test_step = addTestStep(b, "linking-c", opts);