zig/lib/std/build/CheckObjectStep.zig
Jakub Konka 03ddb42b8b link-tests: rename check() to checkStart()
Do not hardcode the symtab label; instead allow each parser to define
its own.

Check for missing extractor value in the matcher when matching `{}`.
2022-06-23 13:16:03 +02:00

393 lines
14 KiB
Zig

const std = @import("../std.zig");
const assert = std.debug.assert;
const build = std.build;
const fs = std.fs;
const macho = std.macho;
const mem = std.mem;
const testing = std.testing;
const CheckObjectStep = @This();
const Allocator = mem.Allocator;
const Builder = build.Builder;
const Step = build.Step;
pub const base_id = .check_obj;
step: Step,
builder: *Builder,
source: build.FileSource,
max_bytes: usize = 20 * 1024 * 1024,
checks: std.ArrayList(Check),
dump_symtab: bool = false,
obj_format: std.Target.ObjectFormat,
pub fn create(builder: *Builder, source: build.FileSource, obj_format: std.Target.ObjectFormat) *CheckObjectStep {
const gpa = builder.allocator;
const self = gpa.create(CheckObjectStep) catch unreachable;
self.* = .{
.builder = builder,
.step = Step.init(.check_file, "CheckObject", gpa, make),
.source = source.dupe(builder),
.checks = std.ArrayList(Check).init(gpa),
.obj_format = obj_format,
};
self.source.addStepDependencies(&self.step);
return self;
}
const Action = union(enum) {
match: MatchAction,
compute_eq: ComputeEqAction,
};
/// MatchAction is the main building block of standard matchers with optional eat-all token `{*}`
/// and extractors by name such as `{n_value}`. Please note this action is very simplistic in nature
/// i.e., it won't really handle edge cases/nontrivial examples. But given that we do want to use
/// it mainly to test the output of our object format parser-dumpers when testing the linkers, etc.
/// it should be plenty useful in its current form.
const MatchAction = struct {
needle: []const u8,
/// Will return true if the `needle` was found in the `haystack`.
/// Some examples include:
///
/// LC 0 => will match in its entirety
/// vmaddr {vmaddr} => will match `vmaddr` and then extract the following value as u64
/// and save under `vmaddr` global name (see `global_vars` param)
/// name {*}libobjc{*}.dylib => will match `name` followed by a token which contains `libobjc` and `.dylib`
/// in that order with other letters in between
fn match(act: MatchAction, haystack: []const u8, global_vars: anytype) !bool {
var hay_it = mem.tokenize(u8, mem.trim(u8, haystack, " "), " ");
var needle_it = mem.tokenize(u8, mem.trim(u8, act.needle, " "), " ");
while (needle_it.next()) |needle_tok| {
const hay_tok = hay_it.next() orelse return false;
if (mem.indexOf(u8, needle_tok, "{*}")) |index| {
// We have fuzzy matchers within the search pattern, so we match substrings.
var start = index;
var n_tok = needle_tok;
var h_tok = hay_tok;
while (true) {
n_tok = n_tok[start + 3 ..];
const inner = if (mem.indexOf(u8, n_tok, "{*}")) |sub_end|
n_tok[0..sub_end]
else
n_tok;
if (mem.indexOf(u8, h_tok, inner) == null) return false;
start = mem.indexOf(u8, n_tok, "{*}") orelse break;
}
} else if (mem.startsWith(u8, needle_tok, "{")) {
const closing_brace = mem.indexOf(u8, needle_tok, "}") orelse return error.MissingClosingBrace;
if (closing_brace != needle_tok.len - 1) return error.ClosingBraceNotLast;
const name = needle_tok[1..closing_brace];
if (name.len == 0) return error.MissingBraceValue;
const value = try std.fmt.parseInt(u64, hay_tok, 16);
try global_vars.putNoClobber(name, value);
} else {
if (!mem.eql(u8, hay_tok, needle_tok)) return false;
}
}
return true;
}
};
/// ComputeEqAction can be used to perform an operation on the extracted global variables
/// using the MatchAction. It currently only supports an addition. The operation is required
/// to be specified in Reverse Polish Notation to ease in operator-precedence parsing (well,
/// to avoid any parsing really).
/// For example, if the two extracted values were saved as `vmaddr` and `entryoff` respectively
/// they could then be added with this simple program `vmaddr entryoff +`.
const ComputeEqAction = struct {
expected: []const u8,
var_stack: std.ArrayList([]const u8),
op_stack: std.ArrayList(Op),
const Op = enum {
add,
};
};
const Check = struct {
builder: *Builder,
actions: std.ArrayList(Action),
fn create(b: *Builder) Check {
return .{
.builder = b,
.actions = std.ArrayList(Action).init(b.allocator),
};
}
fn match(self: *Check, needle: []const u8) void {
self.actions.append(.{
.match = .{ .needle = self.builder.dupe(needle) },
}) catch unreachable;
}
fn computeEq(self: *Check, act: ComputeEqAction) void {
self.actions.append(.{
.compute_eq = act,
}) catch unreachable;
}
};
/// Creates a new sequence of actions with `phrase` as the first anchor searched phrase.
pub fn checkStart(self: *CheckObjectStep, phrase: []const u8) void {
var new_check = Check.create(self.builder);
new_check.match(phrase);
self.checks.append(new_check) catch unreachable;
}
/// Adds another searched phrase to the latest created Check with `CheckObjectStep.checkStart(...)`.
/// Asserts at least one check already exists.
pub fn checkNext(self: *CheckObjectStep, phrase: []const u8) void {
assert(self.checks.items.len > 0);
const last = &self.checks.items[self.checks.items.len - 1];
last.match(phrase);
}
/// Creates a new check checking specifically symbol table parsed and dumped from the object
/// file.
/// Issuing this check will force parsing and dumping of the symbol table.
pub fn checkInSymtab(self: *CheckObjectStep) void {
self.dump_symtab = true;
const symtab_label = switch (self.obj_format) {
.macho => MachODumper.symtab_label,
else => @panic("TODO other parsers"),
};
self.checkStart(symtab_label);
}
/// Creates a new standalone, singular check which allows running simple binary operations
/// on the extracted variables. It will then compare the reduced program with the value of
/// the expected variable.
pub fn checkComputeEq(self: *CheckObjectStep, program: []const u8, expected: []const u8) void {
const gpa = self.builder.allocator;
var ca = ComputeEqAction{
.expected = expected,
.var_stack = std.ArrayList([]const u8).init(gpa),
.op_stack = std.ArrayList(ComputeEqAction.Op).init(gpa),
};
var it = mem.tokenize(u8, program, " ");
while (it.next()) |next| {
if (mem.eql(u8, next, "+")) {
ca.op_stack.append(.add) catch unreachable;
} else {
ca.var_stack.append(self.builder.dupe(next)) catch unreachable;
}
}
var new_check = Check.create(self.builder);
new_check.computeEq(ca);
self.checks.append(new_check) catch unreachable;
}
fn make(step: *Step) !void {
const self = @fieldParentPtr(CheckObjectStep, "step", step);
const gpa = self.builder.allocator;
const src_path = self.source.getPath(self.builder);
const contents = try fs.cwd().readFileAlloc(gpa, src_path, self.max_bytes);
const output = switch (self.obj_format) {
.macho => try MachODumper.parseAndDump(contents, .{
.gpa = gpa,
.dump_symtab = self.dump_symtab,
}),
.elf => @panic("TODO elf parser"),
.coff => @panic("TODO coff parser"),
.wasm => @panic("TODO wasm parser"),
else => unreachable,
};
var vars = std.StringHashMap(u64).init(gpa);
for (self.checks.items) |chk| {
var it = mem.tokenize(u8, output, "\r\n");
for (chk.actions.items) |act| {
switch (act) {
.match => |match_act| {
while (it.next()) |line| {
if (try match_act.match(line, &vars)) break;
} else {
std.debug.print(
\\
\\========= Expected to find: ==========================
\\{s}
\\========= But parsed file does not contain it: =======
\\{s}
\\
, .{ match_act.needle, output });
return error.TestFailed;
}
},
.compute_eq => |c_eq| {
var values = std.ArrayList(u64).init(gpa);
try values.ensureTotalCapacity(c_eq.var_stack.items.len);
for (c_eq.var_stack.items) |vv| {
const val = vars.get(vv) orelse {
std.debug.print(
\\
\\========= Variable was not extracted: ===========
\\{s}
\\========= From parsed file: =====================
\\{s}
\\
, .{ vv, output });
return error.TestFailed;
};
values.appendAssumeCapacity(val);
}
var op_i: usize = 1;
var reduced: u64 = values.items[0];
for (c_eq.op_stack.items) |op| {
const other = values.items[op_i];
switch (op) {
.add => {
reduced += other;
},
}
}
const expected = vars.get(c_eq.expected) orelse {
std.debug.print(
\\
\\========= Variable was not extracted: ===========
\\{s}
\\========= From parsed file: =====================
\\{s}
\\
, .{ c_eq.expected, output });
return error.TestFailed;
};
try testing.expectEqual(reduced, expected);
},
}
}
}
}
const Opts = struct {
gpa: ?Allocator = null,
dump_symtab: bool = false,
};
const MachODumper = struct {
const symtab_label = "symtab";
fn parseAndDump(bytes: []const u8, opts: Opts) ![]const u8 {
const gpa = opts.gpa orelse unreachable; // MachO dumper requires an allocator
var stream = std.io.fixedBufferStream(bytes);
const reader = stream.reader();
const hdr = try reader.readStruct(macho.mach_header_64);
if (hdr.magic != macho.MH_MAGIC_64) {
return error.InvalidMagicNumber;
}
var output = std.ArrayList(u8).init(gpa);
const writer = output.writer();
var symtab_cmd: ?macho.symtab_command = null;
var i: u16 = 0;
while (i < hdr.ncmds) : (i += 1) {
var cmd = try macho.LoadCommand.read(gpa, reader);
if (opts.dump_symtab and cmd.cmd() == .SYMTAB) {
symtab_cmd = cmd.symtab;
}
try dumpLoadCommand(cmd, i, writer);
try writer.writeByte('\n');
}
if (symtab_cmd) |cmd| {
try writer.writeAll(symtab_label ++ "\n");
const strtab = bytes[cmd.stroff..][0..cmd.strsize];
const raw_symtab = bytes[cmd.symoff..][0 .. cmd.nsyms * @sizeOf(macho.nlist_64)];
const symtab = mem.bytesAsSlice(macho.nlist_64, raw_symtab);
for (symtab) |sym| {
if (sym.stab()) continue;
const sym_name = mem.sliceTo(@ptrCast([*:0]const u8, strtab.ptr + sym.n_strx), 0);
try writer.print("{s} {x}\n", .{ sym_name, sym.n_value });
}
}
return output.toOwnedSlice();
}
fn dumpLoadCommand(lc: macho.LoadCommand, index: u16, writer: anytype) !void {
// print header first
try writer.print(
\\LC {d}
\\cmd {s}
\\cmdsize {d}
, .{ index, @tagName(lc.cmd()), lc.cmdsize() });
switch (lc.cmd()) {
.SEGMENT_64 => {
// TODO dump section headers
const seg = lc.segment.inner;
try writer.writeByte('\n');
try writer.print(
\\segname {s}
\\vmaddr {x}
\\vmsize {x}
\\fileoff {x}
\\filesz {x}
, .{
seg.segName(),
seg.vmaddr,
seg.vmsize,
seg.fileoff,
seg.filesize,
});
},
.ID_DYLIB,
.LOAD_DYLIB,
=> {
const dylib = lc.dylib.inner.dylib;
try writer.writeByte('\n');
try writer.print(
\\name {s}
\\timestamp {d}
\\current version {x}
\\compatibility version {x}
, .{
mem.sliceTo(lc.dylib.data, 0),
dylib.timestamp,
dylib.current_version,
dylib.compatibility_version,
});
},
.MAIN => {
try writer.writeByte('\n');
try writer.print(
\\entryoff {x}
\\stacksize {x}
, .{ lc.main.entryoff, lc.main.stacksize });
},
.RPATH => {
try writer.writeByte('\n');
try writer.print(
\\path {s}
, .{
mem.sliceTo(lc.rpath.data, 0),
});
},
else => {},
}
}
};