elf: improve parsing of ld scripts and actually test linking against them

This commit is contained in:
Jakub Konka 2023-10-24 12:12:46 +02:00
parent 2f3add4f30
commit 8087ec8e8c
6 changed files with 78 additions and 167 deletions

View File

@ -1353,10 +1353,8 @@ pub fn flushModule(self: *Elf, comp: *Compilation, prog_node: *std.Progress.Node
}
for (positionals.items) |obj| {
const in_file = try std.fs.cwd().openFile(obj.path, .{});
defer in_file.close();
var parse_ctx: ParseErrorCtx = .{ .detected_cpu_arch = undefined };
self.parsePositional(in_file, obj.path, obj.must_link, &parse_ctx) catch |err|
self.parsePositional(obj.path, obj.must_link, &parse_ctx) catch |err|
try self.handleAndReportParseError(obj.path, err, &parse_ctx);
}
@ -1437,9 +1435,7 @@ pub fn flushModule(self: *Elf, comp: *Compilation, prog_node: *std.Progress.Node
for (system_libs.items) |lib| {
var parse_ctx: ParseErrorCtx = .{ .detected_cpu_arch = undefined };
const in_file = try std.fs.cwd().openFile(lib.path, .{});
defer in_file.close();
self.parseLibrary(in_file, lib, false, &parse_ctx) catch |err|
self.parseLibrary(lib, false, &parse_ctx) catch |err|
try self.handleAndReportParseError(lib.path, err, &parse_ctx);
}
@ -1456,10 +1452,8 @@ pub fn flushModule(self: *Elf, comp: *Compilation, prog_node: *std.Progress.Node
if (csu.crtn) |v| try positionals.append(.{ .path = v });
for (positionals.items) |obj| {
const in_file = try std.fs.cwd().openFile(obj.path, .{});
defer in_file.close();
var parse_ctx: ParseErrorCtx = .{ .detected_cpu_arch = undefined };
self.parsePositional(in_file, obj.path, obj.must_link, &parse_ctx) catch |err|
self.parsePositional(obj.path, obj.must_link, &parse_ctx) catch |err|
try self.handleAndReportParseError(obj.path, err, &parse_ctx);
}
@ -1679,51 +1673,40 @@ const ParseError = error{
InvalidCharacter,
} || LdScript.Error || std.os.AccessError || std.os.SeekError || std.fs.File.OpenError || std.fs.File.ReadError;
fn parsePositional(
self: *Elf,
in_file: std.fs.File,
path: []const u8,
must_link: bool,
ctx: *ParseErrorCtx,
) ParseError!void {
fn parsePositional(self: *Elf, path: []const u8, must_link: bool, ctx: *ParseErrorCtx) ParseError!void {
const tracy = trace(@src());
defer tracy.end();
if (Object.isObject(in_file)) {
try self.parseObject(in_file, path, ctx);
if (try Object.isObject(path)) {
try self.parseObject(path, ctx);
} else {
try self.parseLibrary(in_file, .{ .path = path }, must_link, ctx);
try self.parseLibrary(.{ .path = path }, must_link, ctx);
}
}
fn parseLibrary(
self: *Elf,
in_file: std.fs.File,
lib: SystemLib,
must_link: bool,
ctx: *ParseErrorCtx,
) ParseError!void {
fn parseLibrary(self: *Elf, lib: SystemLib, must_link: bool, ctx: *ParseErrorCtx) ParseError!void {
const tracy = trace(@src());
defer tracy.end();
if (Archive.isArchive(in_file)) {
try self.parseArchive(in_file, lib.path, must_link, ctx);
} else if (SharedObject.isSharedObject(in_file)) {
try self.parseSharedObject(in_file, lib, ctx);
if (try Archive.isArchive(lib.path)) {
try self.parseArchive(lib.path, must_link, ctx);
} else if (try SharedObject.isSharedObject(lib.path)) {
try self.parseSharedObject(lib, ctx);
} else {
// TODO if the script has a top-level comment identifying it as GNU ld script,
// then report parse errors. Otherwise return UnknownFileType.
self.parseLdScript(in_file, lib, ctx) catch |err| switch (err) {
self.parseLdScript(lib, ctx) catch |err| switch (err) {
else => return error.UnknownFileType,
};
}
}
fn parseObject(self: *Elf, in_file: std.fs.File, path: []const u8, ctx: *ParseErrorCtx) ParseError!void {
fn parseObject(self: *Elf, path: []const u8, ctx: *ParseErrorCtx) ParseError!void {
const tracy = trace(@src());
defer tracy.end();
const gpa = self.base.allocator;
const in_file = try std.fs.cwd().openFile(path, .{});
defer in_file.close();
const data = try in_file.readToEndAlloc(gpa, std.math.maxInt(u32));
const index = @as(File.Index, @intCast(try self.files.addOne(gpa)));
self.files.set(index, .{ .object = .{
@ -1740,17 +1723,13 @@ fn parseObject(self: *Elf, in_file: std.fs.File, path: []const u8, ctx: *ParseEr
if (ctx.detected_cpu_arch != self.base.options.target.cpu.arch) return error.InvalidCpuArch;
}
fn parseArchive(
self: *Elf,
in_file: std.fs.File,
path: []const u8,
must_link: bool,
ctx: *ParseErrorCtx,
) ParseError!void {
fn parseArchive(self: *Elf, path: []const u8, must_link: bool, ctx: *ParseErrorCtx) ParseError!void {
const tracy = trace(@src());
defer tracy.end();
const gpa = self.base.allocator;
const in_file = try std.fs.cwd().openFile(path, .{});
defer in_file.close();
const data = try in_file.readToEndAlloc(gpa, std.math.maxInt(u32));
var archive = Archive{ .path = try gpa.dupe(u8, path), .data = data };
defer archive.deinit(gpa);
@ -1773,16 +1752,13 @@ fn parseArchive(
}
}
fn parseSharedObject(
self: *Elf,
in_file: std.fs.File,
lib: SystemLib,
ctx: *ParseErrorCtx,
) ParseError!void {
fn parseSharedObject(self: *Elf, lib: SystemLib, ctx: *ParseErrorCtx) ParseError!void {
const tracy = trace(@src());
defer tracy.end();
const gpa = self.base.allocator;
const in_file = try std.fs.cwd().openFile(lib.path, .{});
defer in_file.close();
const data = try in_file.readToEndAlloc(gpa, std.math.maxInt(u32));
const index = @as(File.Index, @intCast(try self.files.addOne(gpa)));
self.files.set(index, .{ .shared_object = .{
@ -1801,11 +1777,13 @@ fn parseSharedObject(
if (ctx.detected_cpu_arch != self.base.options.target.cpu.arch) return error.InvalidCpuArch;
}
fn parseLdScript(self: *Elf, in_file: std.fs.File, lib: SystemLib, ctx: *ParseErrorCtx) ParseError!void {
fn parseLdScript(self: *Elf, lib: SystemLib, ctx: *ParseErrorCtx) ParseError!void {
const tracy = trace(@src());
defer tracy.end();
const gpa = self.base.allocator;
const in_file = try std.fs.cwd().openFile(lib.path, .{});
defer in_file.close();
const data = try in_file.readToEndAlloc(gpa, std.math.maxInt(u32));
defer gpa.free(data);
@ -1871,11 +1849,8 @@ fn parseLdScript(self: *Elf, in_file: std.fs.File, lib: SystemLib, ctx: *ParseEr
}
const full_path = test_path.items;
const scr_file = try std.fs.cwd().openFile(full_path, .{});
defer scr_file.close();
var scr_ctx: ParseErrorCtx = .{ .detected_cpu_arch = undefined };
self.parseLibrary(scr_file, .{
self.parseLibrary(.{
.needed = scr_obj.needed,
.path = full_path,
}, false, &scr_ctx) catch |err| try self.handleAndReportParseError(full_path, err, &scr_ctx);
@ -1893,14 +1868,16 @@ fn accessLibPath(
const sep = fs.path.sep_str;
const target = self.base.options.target;
test_path.clearRetainingCapacity();
const prefix = if (link_mode != null) "lib" else "";
const suffix = if (link_mode) |mode| switch (mode) {
.Static => target.staticLibSuffix(),
.Dynamic => target.dynamicLibSuffix(),
} else "";
try test_path.writer().print("{s}" ++ sep ++ "{s}{s}{s}", .{
lib_dir_path,
target.libPrefix(),
prefix,
lib_name,
if (link_mode) |mode| switch (mode) {
.Static => target.staticLibSuffix(),
.Dynamic => target.dynamicLibSuffix(),
} else "",
suffix,
});
if (checked_paths) |cpaths| {
try cpaths.append(try self.base.allocator.dupe(u8, test_path.items));

View File

@ -62,10 +62,11 @@ const ar_hdr = extern struct {
}
};
pub fn isArchive(file: std.fs.File) bool {
pub fn isArchive(path: []const u8) !bool {
const file = try std.fs.cwd().openFile(path, .{});
defer file.close();
const reader = file.reader();
const magic = reader.readBytesNoEof(Archive.SARMAG) catch return false;
defer file.seekTo(0) catch {};
if (!mem.eql(u8, &magic, ARMAG)) return false;
return true;
}

View File

@ -83,7 +83,8 @@ fn doParse(scr: *LdScript, ctx: struct {
const cmd = ctx.parser.getCommand(cmd_id);
switch (cmd) {
.output_format => scr.cpu_arch = try ctx.parser.outputFormat(),
.group => try ctx.parser.group(ctx.args),
// TODO we should verify that group only contains libraries
.input, .group => try ctx.parser.group(ctx.args),
else => return error.UnexpectedToken,
}
} else break;
@ -102,6 +103,7 @@ const LineColumn = struct {
const Command = enum {
output_format,
input,
group,
as_needed,
@ -420,110 +422,6 @@ const TokenIterator = struct {
}
};
const testing = std.testing;
fn testExpectedTokens(input: []const u8, expected: []const Token.Id) !void {
var given = std.ArrayList(Token.Id).init(testing.allocator);
defer given.deinit();
var tokenizer = Tokenizer{ .source = input };
while (true) {
const tok = tokenizer.next();
if (tok.id == .invalid) {
std.debug.print(" {s} => '{s}'\n", .{ @tagName(tok.id), tok.get(input) });
}
try given.append(tok.id);
if (tok.id == .eof) break;
}
try testing.expectEqualSlices(Token.Id, expected, given.items);
}
test "Tokenizer - just comments" {
try testExpectedTokens(
\\/* GNU ld script
\\ Use the shared library, but some functions are only in
\\ the static library, so try that secondarily. */
, &.{ .comment, .eof });
}
test "Tokenizer - comments with a simple command" {
try testExpectedTokens(
\\/* GNU ld script
\\ Use the shared library, but some functions are only in
\\ the static library, so try that secondarily. */
\\OUTPUT_FORMAT(elf64-x86-64)
, &.{ .comment, .new_line, .command, .lparen, .literal, .rparen, .eof });
}
test "Tokenizer - libc.so" {
try testExpectedTokens(
\\/* GNU ld script
\\ Use the shared library, but some functions are only in
\\ the static library, so try that secondarily. */
\\OUTPUT_FORMAT(elf64-x86-64)
\\GROUP ( /a/b/c.so.6 /a/d/e.a AS_NEEDED ( /f/g/h.so.2 ) )
, &.{
.comment, .new_line, // GNU comment
.command, .lparen, .literal, .rparen, .new_line, // output format
.command, .lparen, .literal, .literal, // group start
.command, .lparen, .literal, .rparen, // as needed
.rparen, // group end
.eof,
});
}
test "Parser - output format" {
const source =
\\OUTPUT_FORMAT(elf64-x86-64)
;
var tokenizer = Tokenizer{ .source = source };
var tokens = std.ArrayList(Token).init(testing.allocator);
defer tokens.deinit();
while (true) {
const tok = tokenizer.next();
try testing.expect(tok.id != .invalid);
try tokens.append(tok);
if (tok.id == .eof) break;
}
var it = TokenIterator{ .tokens = tokens.items };
var parser = Parser{ .source = source, .it = &it };
const tok_id = try parser.require(.command);
try testing.expectEqual(parser.getCommand(tok_id), .output_format);
const cpu_arch = try parser.outputFormat();
try testing.expectEqual(cpu_arch, .x86_64);
}
test "Parser - group with as-needed" {
const source =
\\GROUP ( /a/b/c.so.6 /a/d/e.a AS_NEEDED ( /f/g/h.so.2 ) )
;
var tokenizer = Tokenizer{ .source = source };
var tokens = std.ArrayList(Token).init(testing.allocator);
defer tokens.deinit();
while (true) {
const tok = tokenizer.next();
try testing.expect(tok.id != .invalid);
try tokens.append(tok);
if (tok.id == .eof) break;
}
var it = TokenIterator{ .tokens = tokens.items };
var parser = Parser{ .source = source, .it = &it };
var args = std.ArrayList(Elf.LinkObject).init(testing.allocator);
defer args.deinit();
const tok_id = try parser.require(.command);
try testing.expectEqual(parser.getCommand(tok_id), .group);
try parser.group(&args);
try testing.expectEqualStrings("/a/b/c.so.6", args.items[0].path);
try testing.expect(args.items[0].needed);
try testing.expectEqualStrings("/a/d/e.a", args.items[1].path);
try testing.expect(args.items[1].needed);
try testing.expectEqualStrings("/f/g/h.so.2", args.items[2].path);
try testing.expect(!args.items[2].needed);
}
const LdScript = @This();
const std = @import("std");

View File

@ -22,10 +22,11 @@ num_dynrelocs: u32 = 0,
output_symtab_size: Elf.SymtabSize = .{},
pub fn isObject(file: std.fs.File) bool {
pub fn isObject(path: []const u8) !bool {
const file = try std.fs.cwd().openFile(path, .{});
defer file.close();
const reader = file.reader();
const header = reader.readStruct(elf.Elf64_Ehdr) catch return false;
defer file.seekTo(0) catch {};
if (!mem.eql(u8, header.e_ident[0..4], "\x7fELF")) return false;
if (header.e_ident[elf.EI_VERSION] != 1) return false;
if (header.e_type != elf.ET.REL) return false;

View File

@ -22,10 +22,11 @@ alive: bool,
output_symtab_size: Elf.SymtabSize = .{},
pub fn isSharedObject(file: std.fs.File) bool {
pub fn isSharedObject(path: []const u8) !bool {
const file = try std.fs.cwd().openFile(path, .{});
defer file.close();
const reader = file.reader();
const header = reader.readStruct(elf.Elf64_Ehdr) catch return false;
defer file.seekTo(0) catch {};
if (!mem.eql(u8, header.e_ident[0..4], "\x7fELF")) return false;
if (header.e_ident[elf.EI_VERSION] != 1) return false;
if (header.e_type != elf.ET.DYN) return false;

View File

@ -75,6 +75,7 @@ pub fn build(b: *Build) void {
elf_step.dependOn(testLargeAlignmentExe(b, .{ .target = glibc_target }));
elf_step.dependOn(testLargeBss(b, .{ .target = glibc_target }));
elf_step.dependOn(testLinkOrder(b, .{ .target = glibc_target }));
elf_step.dependOn(testLdScript(b, .{ .target = glibc_target }));
// https://github.com/ziglang/zig/issues/17451
// elf_step.dependOn(testNoEhFrameHdr(b, .{ .target = glibc_target }));
elf_step.dependOn(testPie(b, .{ .target = glibc_target }));
@ -1568,6 +1569,38 @@ fn testLinkOrder(b: *Build, opts: Options) *Step {
return test_step;
}
fn testLdScript(b: *Build, opts: Options) *Step {
const test_step = addTestStep(b, "ld-script", opts);
const dso = addSharedLibrary(b, "bar", opts);
addCSourceBytes(dso, "int foo() { return 42; }", &.{});
const scripts = WriteFile.create(b);
_ = scripts.add("liba.so", "INPUT(libfoo.so)");
_ = scripts.add("libfoo.so", "GROUP(AS_NEEDED(-lbar))");
const exe = addExecutable(b, "main", opts);
addCSourceBytes(exe,
\\int foo();
\\int main() {
\\ return foo() - 42;
\\}
, &.{});
exe.linkSystemLibrary2("a", .{});
exe.addLibraryPath(scripts.getDirectory());
exe.addLibraryPath(dso.getEmittedBinDirectory());
exe.addRPath(dso.getEmittedBinDirectory());
exe.linkLibC();
// https://github.com/ziglang/zig/issues/17619
exe.pie = true;
const run = addRunArtifact(exe);
run.expectExitCode(0);
test_step.dependOn(&run.step);
return test_step;
}
fn testLinkingC(b: *Build, opts: Options) *Step {
const test_step = addTestStep(b, "linking-c", opts);