zig/src/link/LdScript.zig
2025-07-01 16:35:26 -07:00

450 lines
13 KiB
Zig

path: Path,
cpu_arch: ?std.Target.Cpu.Arch,
args: []const Arg,
pub const Arg = struct {
needed: bool = false,
path: []const u8,
};
pub fn deinit(ls: *LdScript, gpa: Allocator) void {
gpa.free(ls.args);
ls.* = undefined;
}
pub const Error = error{
LinkFailure,
UnknownCpuArch,
OutOfMemory,
};
pub fn parse(
gpa: Allocator,
diags: *Diags,
/// For error reporting.
path: Path,
data: []const u8,
) Error!LdScript {
var tokenizer = Tokenizer{ .source = data };
var tokens: std.ArrayListUnmanaged(Token) = .empty;
defer tokens.deinit(gpa);
var line_col: std.ArrayListUnmanaged(LineColumn) = .empty;
defer line_col.deinit(gpa);
var line: usize = 0;
var prev_line_last_col: usize = 0;
while (true) {
const tok = tokenizer.next();
try tokens.append(gpa, tok);
const column = tok.start - prev_line_last_col;
try line_col.append(gpa, .{ .line = line, .column = column });
switch (tok.id) {
.invalid => {
return diags.failParse(path, "invalid token in LD script: '{f}' ({d}:{d})", .{
std.fmt.fmtSliceEscapeLower(tok.get(data)), line, column,
});
},
.new_line => {
line += 1;
prev_line_last_col = tok.end;
},
.eof => break,
else => {},
}
}
var it: TokenIterator = .{ .tokens = tokens.items };
var parser: Parser = .{
.gpa = gpa,
.source = data,
.it = &it,
.args = .empty,
.cpu_arch = null,
};
defer parser.args.deinit(gpa);
parser.start() catch |err| switch (err) {
error.UnexpectedToken => {
const last_token_id = parser.it.pos - 1;
const last_token = parser.it.get(last_token_id);
const lcol = line_col.items[last_token_id];
return diags.failParse(path, "unexpected token in LD script: {s}: '{s}' ({d}:{d})", .{
@tagName(last_token.id),
last_token.get(data),
lcol.line,
lcol.column,
});
},
else => |e| return e,
};
return .{
.path = path,
.cpu_arch = parser.cpu_arch,
.args = try parser.args.toOwnedSlice(gpa),
};
}
const LineColumn = struct {
line: usize,
column: usize,
};
const Command = enum {
output_format,
input,
group,
as_needed,
fn fromString(s: []const u8) ?Command {
inline for (@typeInfo(Command).@"enum".fields) |field| {
const upper_name = n: {
comptime var buf: [field.name.len]u8 = undefined;
inline for (field.name, 0..) |c, i| {
buf[i] = comptime std.ascii.toUpper(c);
}
break :n buf;
};
if (std.mem.eql(u8, &upper_name, s)) return @field(Command, field.name);
}
return null;
}
};
const Parser = struct {
gpa: Allocator,
source: []const u8,
it: *TokenIterator,
cpu_arch: ?std.Target.Cpu.Arch,
args: std.ArrayListUnmanaged(Arg),
fn start(parser: *Parser) !void {
while (true) {
parser.skipAny(&.{ .comment, .new_line });
if (parser.maybe(.command)) |cmd_id| {
const cmd = parser.getCommand(cmd_id);
switch (cmd) {
.output_format => parser.cpu_arch = try parser.outputFormat(),
// TODO we should verify that group only contains libraries
.input, .group => try parser.group(),
else => return error.UnexpectedToken,
}
} else break;
}
if (parser.it.next()) |tok| switch (tok.id) {
.eof => {},
else => return error.UnexpectedToken,
};
}
fn outputFormat(p: *Parser) !std.Target.Cpu.Arch {
const value = value: {
if (p.skip(&.{.lparen})) {
const value_id = try p.require(.literal);
const value = p.it.get(value_id);
_ = try p.require(.rparen);
break :value value.get(p.source);
} else if (p.skip(&.{ .new_line, .lbrace })) {
const value_id = try p.require(.literal);
const value = p.it.get(value_id);
_ = p.skip(&.{.new_line});
_ = try p.require(.rbrace);
break :value value.get(p.source);
} else return error.UnexpectedToken;
};
if (std.mem.eql(u8, value, "elf64-x86-64")) return .x86_64;
if (std.mem.eql(u8, value, "elf64-littleaarch64")) return .aarch64;
return error.UnknownCpuArch;
}
fn group(p: *Parser) !void {
const gpa = p.gpa;
if (!p.skip(&.{.lparen})) return error.UnexpectedToken;
while (true) {
if (p.maybe(.literal)) |tok_id| {
const tok = p.it.get(tok_id);
const path = tok.get(p.source);
try p.args.append(gpa, .{ .path = path, .needed = true });
} else if (p.maybe(.command)) |cmd_id| {
const cmd = p.getCommand(cmd_id);
switch (cmd) {
.as_needed => try p.asNeeded(),
else => return error.UnexpectedToken,
}
} else break;
}
_ = try p.require(.rparen);
}
fn asNeeded(p: *Parser) !void {
const gpa = p.gpa;
if (!p.skip(&.{.lparen})) return error.UnexpectedToken;
while (p.maybe(.literal)) |tok_id| {
const tok = p.it.get(tok_id);
const path = tok.get(p.source);
try p.args.append(gpa, .{ .path = path, .needed = false });
}
_ = try p.require(.rparen);
}
fn skip(p: *Parser, comptime ids: []const Token.Id) bool {
const pos = p.it.pos;
inline for (ids) |id| {
const tok = p.it.next() orelse return false;
if (tok.id != id) {
p.it.seekTo(pos);
return false;
}
}
return true;
}
fn skipAny(p: *Parser, comptime ids: []const Token.Id) void {
outer: while (p.it.next()) |tok| {
inline for (ids) |id| {
if (id == tok.id) continue :outer;
}
break p.it.seekBy(-1);
}
}
fn maybe(p: *Parser, comptime id: Token.Id) ?Token.Index {
const pos = p.it.pos;
const tok = p.it.next() orelse return null;
if (tok.id == id) return pos;
p.it.seekBy(-1);
return null;
}
fn require(p: *Parser, comptime id: Token.Id) !Token.Index {
return p.maybe(id) orelse return error.UnexpectedToken;
}
fn getCommand(p: *Parser, index: Token.Index) Command {
const tok = p.it.get(index);
assert(tok.id == .command);
return Command.fromString(tok.get(p.source)).?;
}
};
const Token = struct {
id: Id,
start: usize,
end: usize,
const Id = enum {
eof,
invalid,
new_line,
lparen, // (
rparen, // )
lbrace, // {
rbrace, // }
comment, // /* */
command, // literal with special meaning, see Command
literal,
};
const Index = usize;
fn get(tok: Token, source: []const u8) []const u8 {
return source[tok.start..tok.end];
}
};
const Tokenizer = struct {
source: []const u8,
index: usize = 0,
fn matchesPattern(comptime pattern: []const u8, slice: []const u8) bool {
comptime var count: usize = 0;
inline while (count < pattern.len) : (count += 1) {
if (count >= slice.len) return false;
const c = slice[count];
if (pattern[count] != c) return false;
}
return true;
}
fn matches(tok: Tokenizer, comptime pattern: []const u8) bool {
return matchesPattern(pattern, tok.source[tok.index..]);
}
fn isCommand(tok: Tokenizer, start: usize, end: usize) bool {
return if (Command.fromString(tok.source[start..end]) == null) false else true;
}
fn next(tok: *Tokenizer) Token {
var result = Token{
.id = .eof,
.start = tok.index,
.end = undefined,
};
var state: enum {
start,
comment,
literal,
} = .start;
while (tok.index < tok.source.len) : (tok.index += 1) {
const c = tok.source[tok.index];
switch (state) {
.start => switch (c) {
' ', '\t' => result.start += 1,
'\n' => {
result.id = .new_line;
tok.index += 1;
break;
},
'\r' => {
if (tok.matches("\r\n")) {
result.id = .new_line;
tok.index += "\r\n".len;
} else {
result.id = .invalid;
tok.index += 1;
}
break;
},
'/' => if (tok.matches("/*")) {
state = .comment;
tok.index += "/*".len;
} else {
state = .literal;
},
'(' => {
result.id = .lparen;
tok.index += 1;
break;
},
')' => {
result.id = .rparen;
tok.index += 1;
break;
},
'{' => {
result.id = .lbrace;
tok.index += 1;
break;
},
'}' => {
result.id = .rbrace;
tok.index += 1;
break;
},
else => state = .literal,
},
.comment => switch (c) {
'*' => if (tok.matches("*/")) {
result.id = .comment;
tok.index += "*/".len;
break;
},
else => {},
},
.literal => switch (c) {
' ', '(', '\n' => {
if (tok.isCommand(result.start, tok.index)) {
result.id = .command;
} else {
result.id = .literal;
}
break;
},
')' => {
result.id = .literal;
break;
},
'\r' => {
if (tok.matches("\r\n")) {
if (tok.isCommand(result.start, tok.index)) {
result.id = .command;
} else {
result.id = .literal;
}
} else {
result.id = .invalid;
tok.index += 1;
}
break;
},
else => {},
},
}
}
result.end = tok.index;
return result;
}
};
const TokenIterator = struct {
tokens: []const Token,
pos: Token.Index = 0,
fn next(it: *TokenIterator) ?Token {
const token = it.peek() orelse return null;
it.pos += 1;
return token;
}
fn peek(it: TokenIterator) ?Token {
if (it.pos >= it.tokens.len) return null;
return it.tokens[it.pos];
}
fn reset(it: *TokenIterator) void {
it.pos = 0;
}
fn seekTo(it: *TokenIterator, pos: Token.Index) void {
it.pos = pos;
}
fn seekBy(it: *TokenIterator, offset: isize) void {
const new_pos = @as(isize, @bitCast(it.pos)) + offset;
if (new_pos < 0) {
it.pos = 0;
} else {
it.pos = @as(usize, @intCast(new_pos));
}
}
fn get(it: *TokenIterator, pos: Token.Index) Token {
assert(pos < it.tokens.len);
return it.tokens[pos];
}
};
const LdScript = @This();
const Diags = @import("../link.zig").Diags;
const std = @import("std");
const assert = std.debug.assert;
const Path = std.Build.Cache.Path;
const Allocator = std.mem.Allocator;