zig/lib/std/Build/Step/CheckObject.zig
Andrew Kelley 7e2a26c0c4 std.io.Writer.printValue: rework logic
Alignment and fill options only apply to numbers.

Rework the implementation to mainly branch on the format string rather
than the type information. This is more straightforward to maintain and
more straightforward for comptime evaluation.

Enums support being printed as decimal, hexadecimal, octal, and binary.

`formatInteger` is another possible format method that is
unconditionally called when the value type is struct and one of the
integer-printing format specifiers are used.
2025-07-07 22:43:53 -07:00

2682 lines
109 KiB
Zig

const std = @import("std");
const assert = std.debug.assert;
const elf = std.elf;
const fs = std.fs;
const macho = std.macho;
const math = std.math;
const mem = std.mem;
const testing = std.testing;
const Writer = std.io.Writer;
const CheckObject = @This();
const Allocator = mem.Allocator;
const Step = std.Build.Step;
pub const base_id: Step.Id = .check_object;
step: Step,
source: std.Build.LazyPath,
max_bytes: usize = 20 * 1024 * 1024,
checks: std.ArrayList(Check),
obj_format: std.Target.ObjectFormat,
pub fn create(
owner: *std.Build,
source: std.Build.LazyPath,
obj_format: std.Target.ObjectFormat,
) *CheckObject {
const gpa = owner.allocator;
const check_object = gpa.create(CheckObject) catch @panic("OOM");
check_object.* = .{
.step = .init(.{
.id = base_id,
.name = "CheckObject",
.owner = owner,
.makeFn = make,
}),
.source = source.dupe(owner),
.checks = .init(gpa),
.obj_format = obj_format,
};
check_object.source.addStepDependencies(&check_object.step);
return check_object;
}
const SearchPhrase = struct {
string: []const u8,
lazy_path: ?std.Build.LazyPath = null,
fn resolve(phrase: SearchPhrase, b: *std.Build, step: *Step) []const u8 {
const lazy_path = phrase.lazy_path orelse return phrase.string;
return b.fmt("{s} {s}", .{ phrase.string, lazy_path.getPath2(b, step) });
}
};
/// There five types of actions currently supported:
/// .exact - will do an exact match against the haystack
/// .contains - will check for existence within the haystack
/// .not_present - will check for non-existence within the haystack
/// .extract - will do an exact match and extract into a variable enclosed within `{name}` braces
/// .compute_cmp - will perform an operation on the extracted global variables
/// using the MatchAction. It currently only supports an addition. The operation is required
/// to be specified in Reverse Polish Notation to ease in operator-precedence parsing (well,
/// to avoid any parsing really).
/// For example, if the two extracted values were saved as `vmaddr` and `entryoff` respectively
/// they could then be added with this simple program `vmaddr entryoff +`.
const Action = struct {
tag: enum { exact, contains, not_present, extract, compute_cmp },
phrase: SearchPhrase,
expected: ?ComputeCompareExpected = null,
/// Returns true if the `phrase` is an exact match with the haystack and variable was successfully extracted.
fn extract(
act: Action,
b: *std.Build,
step: *Step,
haystack: []const u8,
global_vars: *std.StringHashMap(u64),
) !bool {
assert(act.tag == .extract);
const hay = mem.trim(u8, haystack, " ");
const phrase = mem.trim(u8, act.phrase.resolve(b, step), " ");
var candidate_vars: std.ArrayList(struct { name: []const u8, value: u64 }) = .init(b.allocator);
var hay_it = mem.tokenizeScalar(u8, hay, ' ');
var needle_it = mem.tokenizeScalar(u8, phrase, ' ');
while (needle_it.next()) |needle_tok| {
const hay_tok = hay_it.next() orelse break;
if (mem.startsWith(u8, needle_tok, "{")) {
const closing_brace = mem.indexOf(u8, needle_tok, "}") orelse return error.MissingClosingBrace;
if (closing_brace != needle_tok.len - 1) return error.ClosingBraceNotLast;
const name = needle_tok[1..closing_brace];
if (name.len == 0) return error.MissingBraceValue;
const value = std.fmt.parseInt(u64, hay_tok, 16) catch return false;
try candidate_vars.append(.{
.name = name,
.value = value,
});
} else {
if (!mem.eql(u8, hay_tok, needle_tok)) return false;
}
}
if (candidate_vars.items.len == 0) return false;
for (candidate_vars.items) |cv| try global_vars.putNoClobber(cv.name, cv.value);
return true;
}
/// Returns true if the `phrase` is an exact match with the haystack.
fn exact(
act: Action,
b: *std.Build,
step: *Step,
haystack: []const u8,
) bool {
assert(act.tag == .exact);
const hay = mem.trim(u8, haystack, " ");
const phrase = mem.trim(u8, act.phrase.resolve(b, step), " ");
return mem.eql(u8, hay, phrase);
}
/// Returns true if the `phrase` exists within the haystack.
fn contains(
act: Action,
b: *std.Build,
step: *Step,
haystack: []const u8,
) bool {
assert(act.tag == .contains);
const hay = mem.trim(u8, haystack, " ");
const phrase = mem.trim(u8, act.phrase.resolve(b, step), " ");
return mem.indexOf(u8, hay, phrase) != null;
}
/// Returns true if the `phrase` does not exist within the haystack.
fn notPresent(
act: Action,
b: *std.Build,
step: *Step,
haystack: []const u8,
) bool {
assert(act.tag == .not_present);
return !contains(.{
.tag = .contains,
.phrase = act.phrase,
.expected = act.expected,
}, b, step, haystack);
}
/// Will return true if the `phrase` is correctly parsed into an RPN program and
/// its reduced, computed value compares using `op` with the expected value, either
/// a literal or another extracted variable.
fn computeCmp(act: Action, b: *std.Build, step: *Step, global_vars: std.StringHashMap(u64)) !bool {
const gpa = step.owner.allocator;
const phrase = act.phrase.resolve(b, step);
var op_stack: std.ArrayList(enum { add, sub, mod, mul }) = .init(gpa);
var values: std.ArrayList(u64) = .init(gpa);
var it = mem.tokenizeScalar(u8, phrase, ' ');
while (it.next()) |next| {
if (mem.eql(u8, next, "+")) {
try op_stack.append(.add);
} else if (mem.eql(u8, next, "-")) {
try op_stack.append(.sub);
} else if (mem.eql(u8, next, "%")) {
try op_stack.append(.mod);
} else if (mem.eql(u8, next, "*")) {
try op_stack.append(.mul);
} else {
const val = std.fmt.parseInt(u64, next, 0) catch blk: {
break :blk global_vars.get(next) orelse {
try step.addError(
\\
\\========= variable was not extracted: ===========
\\{s}
\\=================================================
, .{next});
return error.UnknownVariable;
};
};
try values.append(val);
}
}
var op_i: usize = 1;
var reduced: u64 = values.items[0];
for (op_stack.items) |op| {
const other = values.items[op_i];
switch (op) {
.add => {
reduced += other;
},
.sub => {
reduced -= other;
},
.mod => {
reduced %= other;
},
.mul => {
reduced *= other;
},
}
op_i += 1;
}
const exp_value = switch (act.expected.?.value) {
.variable => |name| global_vars.get(name) orelse {
try step.addError(
\\
\\========= variable was not extracted: ===========
\\{s}
\\=================================================
, .{name});
return error.UnknownVariable;
},
.literal => |x| x,
};
return math.compare(reduced, act.expected.?.op, exp_value);
}
};
const ComputeCompareExpected = struct {
op: math.CompareOperator,
value: union(enum) {
variable: []const u8,
literal: u64,
},
pub fn format(value: ComputeCompareExpected, bw: *Writer) Writer.Error!void {
try bw.print("{s} ", .{@tagName(value.op)});
switch (value.value) {
.variable => |name| try bw.writeAll(name),
.literal => |x| try bw.print("{x}", .{x}),
}
}
};
const Check = struct {
kind: Kind,
payload: Payload,
allocator: Allocator,
data: std.ArrayListUnmanaged(u8),
actions: std.ArrayListUnmanaged(Action),
fn create(allocator: Allocator, kind: Kind) Check {
return .{
.kind = kind,
.payload = .{ .none = {} },
.allocator = allocator,
.data = .empty,
.actions = .empty,
};
}
fn dumpSection(gpa: Allocator, name: [:0]const u8) Check {
var check = Check.create(gpa, .dump_section);
const off: u32 = @intCast(check.data.items.len);
check.data.print(gpa, "{s}\x00", .{name}) catch @panic("OOM");
check.payload = .{ .dump_section = off };
return check;
}
fn extract(check: *Check, phrase: SearchPhrase) void {
const gpa = check.allocator;
check.actions.append(gpa, .{
.tag = .extract,
.phrase = phrase,
}) catch @panic("OOM");
}
fn exact(check: *Check, phrase: SearchPhrase) void {
const gpa = check.allocator;
check.actions.append(gpa, .{
.tag = .exact,
.phrase = phrase,
}) catch @panic("OOM");
}
fn contains(check: *Check, phrase: SearchPhrase) void {
const gpa = check.allocator;
check.actions.append(gpa, .{
.tag = .contains,
.phrase = phrase,
}) catch @panic("OOM");
}
fn notPresent(check: *Check, phrase: SearchPhrase) void {
const gpa = check.allocator;
check.actions.append(gpa, .{
.tag = .not_present,
.phrase = phrase,
}) catch @panic("OOM");
}
fn computeCmp(check: *Check, phrase: SearchPhrase, expected: ComputeCompareExpected) void {
const gpa = check.allocator;
check.actions.append(gpa, .{
.tag = .compute_cmp,
.phrase = phrase,
.expected = expected,
}) catch @panic("OOM");
}
const Kind = enum {
headers,
symtab,
indirect_symtab,
dynamic_symtab,
archive_symtab,
dynamic_section,
dyld_rebase,
dyld_bind,
dyld_weak_bind,
dyld_lazy_bind,
exports,
compute_compare,
dump_section,
};
const Payload = union {
none: void,
/// Null-delimited string in the 'data' buffer.
dump_section: u32,
};
};
/// Creates a new empty sequence of actions.
fn checkStart(check_object: *CheckObject, kind: Check.Kind) void {
const check = Check.create(check_object.step.owner.allocator, kind);
check_object.checks.append(check) catch @panic("OOM");
}
/// Adds an exact match phrase to the latest created Check.
pub fn checkExact(check_object: *CheckObject, phrase: []const u8) void {
check_object.checkExactInner(phrase, null);
}
/// Like `checkExact()` but takes an additional argument `LazyPath` which will be
/// resolved to a full search query in `make()`.
pub fn checkExactPath(check_object: *CheckObject, phrase: []const u8, lazy_path: std.Build.LazyPath) void {
check_object.checkExactInner(phrase, lazy_path);
}
fn checkExactInner(check_object: *CheckObject, phrase: []const u8, lazy_path: ?std.Build.LazyPath) void {
assert(check_object.checks.items.len > 0);
const last = &check_object.checks.items[check_object.checks.items.len - 1];
last.exact(.{ .string = check_object.step.owner.dupe(phrase), .lazy_path = lazy_path });
}
/// Adds a fuzzy match phrase to the latest created Check.
pub fn checkContains(check_object: *CheckObject, phrase: []const u8) void {
check_object.checkContainsInner(phrase, null);
}
/// Like `checkContains()` but takes an additional argument `lazy_path` which will be
/// resolved to a full search query in `make()`.
pub fn checkContainsPath(
check_object: *CheckObject,
phrase: []const u8,
lazy_path: std.Build.LazyPath,
) void {
check_object.checkContainsInner(phrase, lazy_path);
}
fn checkContainsInner(check_object: *CheckObject, phrase: []const u8, lazy_path: ?std.Build.LazyPath) void {
assert(check_object.checks.items.len > 0);
const last = &check_object.checks.items[check_object.checks.items.len - 1];
last.contains(.{ .string = check_object.step.owner.dupe(phrase), .lazy_path = lazy_path });
}
/// Adds an exact match phrase with variable extractor to the latest created Check.
pub fn checkExtract(check_object: *CheckObject, phrase: []const u8) void {
check_object.checkExtractInner(phrase, null);
}
/// Like `checkExtract()` but takes an additional argument `LazyPath` which will be
/// resolved to a full search query in `make()`.
pub fn checkExtractLazyPath(check_object: *CheckObject, phrase: []const u8, lazy_path: std.Build.LazyPath) void {
check_object.checkExtractInner(phrase, lazy_path);
}
fn checkExtractInner(check_object: *CheckObject, phrase: []const u8, lazy_path: ?std.Build.LazyPath) void {
assert(check_object.checks.items.len > 0);
const last = &check_object.checks.items[check_object.checks.items.len - 1];
last.extract(.{ .string = check_object.step.owner.dupe(phrase), .lazy_path = lazy_path });
}
/// Adds another searched phrase to the latest created Check
/// however ensures there is no matching phrase in the output.
pub fn checkNotPresent(check_object: *CheckObject, phrase: []const u8) void {
check_object.checkNotPresentInner(phrase, null);
}
/// Like `checkExtract()` but takes an additional argument `LazyPath` which will be
/// resolved to a full search query in `make()`.
pub fn checkNotPresentLazyPath(check_object: *CheckObject, phrase: []const u8, lazy_path: std.Build.LazyPath) void {
check_object.checkNotPresentInner(phrase, lazy_path);
}
fn checkNotPresentInner(check_object: *CheckObject, phrase: []const u8, lazy_path: ?std.Build.LazyPath) void {
assert(check_object.checks.items.len > 0);
const last = &check_object.checks.items[check_object.checks.items.len - 1];
last.notPresent(.{ .string = check_object.step.owner.dupe(phrase), .lazy_path = lazy_path });
}
/// Creates a new check checking in the file headers (section, program headers, etc.).
pub fn checkInHeaders(check_object: *CheckObject) void {
check_object.checkStart(.headers);
}
/// Creates a new check checking specifically symbol table parsed and dumped from the object
/// file.
pub fn checkInSymtab(check_object: *CheckObject) void {
const label = switch (check_object.obj_format) {
.macho => MachODumper.symtab_label,
.elf => ElfDumper.symtab_label,
.wasm => WasmDumper.symtab_label,
.coff => @panic("TODO symtab for coff"),
else => @panic("TODO other file formats"),
};
check_object.checkStart(.symtab);
check_object.checkExact(label);
}
/// Creates a new check checking specifically dyld rebase opcodes contents parsed and dumped
/// from the object file.
/// This check is target-dependent and applicable to MachO only.
pub fn checkInDyldRebase(check_object: *CheckObject) void {
const label = switch (check_object.obj_format) {
.macho => MachODumper.dyld_rebase_label,
else => @panic("Unsupported target platform"),
};
check_object.checkStart(.dyld_rebase);
check_object.checkExact(label);
}
/// Creates a new check checking specifically dyld bind opcodes contents parsed and dumped
/// from the object file.
/// This check is target-dependent and applicable to MachO only.
pub fn checkInDyldBind(check_object: *CheckObject) void {
const label = switch (check_object.obj_format) {
.macho => MachODumper.dyld_bind_label,
else => @panic("Unsupported target platform"),
};
check_object.checkStart(.dyld_bind);
check_object.checkExact(label);
}
/// Creates a new check checking specifically dyld weak bind opcodes contents parsed and dumped
/// from the object file.
/// This check is target-dependent and applicable to MachO only.
pub fn checkInDyldWeakBind(check_object: *CheckObject) void {
const label = switch (check_object.obj_format) {
.macho => MachODumper.dyld_weak_bind_label,
else => @panic("Unsupported target platform"),
};
check_object.checkStart(.dyld_weak_bind);
check_object.checkExact(label);
}
/// Creates a new check checking specifically dyld lazy bind opcodes contents parsed and dumped
/// from the object file.
/// This check is target-dependent and applicable to MachO only.
pub fn checkInDyldLazyBind(check_object: *CheckObject) void {
const label = switch (check_object.obj_format) {
.macho => MachODumper.dyld_lazy_bind_label,
else => @panic("Unsupported target platform"),
};
check_object.checkStart(.dyld_lazy_bind);
check_object.checkExact(label);
}
/// Creates a new check checking specifically exports info contents parsed and dumped
/// from the object file.
/// This check is target-dependent and applicable to MachO only.
pub fn checkInExports(check_object: *CheckObject) void {
const label = switch (check_object.obj_format) {
.macho => MachODumper.exports_label,
else => @panic("Unsupported target platform"),
};
check_object.checkStart(.exports);
check_object.checkExact(label);
}
/// Creates a new check checking specifically indirect symbol table parsed and dumped
/// from the object file.
/// This check is target-dependent and applicable to MachO only.
pub fn checkInIndirectSymtab(check_object: *CheckObject) void {
const label = switch (check_object.obj_format) {
.macho => MachODumper.indirect_symtab_label,
else => @panic("Unsupported target platform"),
};
check_object.checkStart(.indirect_symtab);
check_object.checkExact(label);
}
/// Creates a new check checking specifically dynamic symbol table parsed and dumped from the object
/// file.
/// This check is target-dependent and applicable to ELF only.
pub fn checkInDynamicSymtab(check_object: *CheckObject) void {
const label = switch (check_object.obj_format) {
.elf => ElfDumper.dynamic_symtab_label,
else => @panic("Unsupported target platform"),
};
check_object.checkStart(.dynamic_symtab);
check_object.checkExact(label);
}
/// Creates a new check checking specifically dynamic section parsed and dumped from the object
/// file.
/// This check is target-dependent and applicable to ELF only.
pub fn checkInDynamicSection(check_object: *CheckObject) void {
const label = switch (check_object.obj_format) {
.elf => ElfDumper.dynamic_section_label,
else => @panic("Unsupported target platform"),
};
check_object.checkStart(.dynamic_section);
check_object.checkExact(label);
}
/// Creates a new check checking specifically symbol table parsed and dumped from the archive
/// file.
pub fn checkInArchiveSymtab(check_object: *CheckObject) void {
const label = switch (check_object.obj_format) {
.elf => ElfDumper.archive_symtab_label,
else => @panic("TODO other file formats"),
};
check_object.checkStart(.archive_symtab);
check_object.checkExact(label);
}
pub fn dumpSection(check_object: *CheckObject, name: [:0]const u8) void {
const check = Check.dumpSection(check_object.step.owner.allocator, name);
check_object.checks.append(check) catch @panic("OOM");
}
/// Creates a new standalone, singular check which allows running simple binary operations
/// on the extracted variables. It will then compare the reduced program with the value of
/// the expected variable.
pub fn checkComputeCompare(
check_object: *CheckObject,
program: []const u8,
expected: ComputeCompareExpected,
) void {
var check = Check.create(check_object.step.owner.allocator, .compute_compare);
check.computeCmp(.{ .string = check_object.step.owner.dupe(program) }, expected);
check_object.checks.append(check) catch @panic("OOM");
}
fn make(step: *Step, make_options: Step.MakeOptions) !void {
_ = make_options;
const b = step.owner;
const gpa = b.allocator;
const check_object: *CheckObject = @fieldParentPtr("step", step);
try step.singleUnchangingWatchInput(check_object.source);
const src_path = check_object.source.getPath3(b, step);
const contents = src_path.root_dir.handle.readFileAllocOptions(
gpa,
src_path.sub_path,
check_object.max_bytes,
null,
.of(u64),
null,
) catch |err| return step.fail("unable to read '{f}': {s}", .{
std.fmt.alt(src_path, .formatEscapeChar), @errorName(err),
});
var vars: std.StringHashMap(u64) = .init(gpa);
for (check_object.checks.items) |chk| {
if (chk.kind == .compute_compare) {
assert(chk.actions.items.len == 1);
const act = chk.actions.items[0];
assert(act.tag == .compute_cmp);
const res = act.computeCmp(b, step, vars) catch |err| switch (err) {
error.UnknownVariable => return step.fail("Unknown variable", .{}),
else => |e| return e,
};
if (!res) {
return step.fail(
\\
\\========= comparison failed for action: ===========
\\{s} {f}
\\===================================================
, .{ act.phrase.resolve(b, step), act.expected.? });
}
continue;
}
const output = switch (check_object.obj_format) {
.macho => try MachODumper.parseAndDump(step, chk, contents),
.elf => try ElfDumper.parseAndDump(step, chk, contents),
.coff => return step.fail("TODO coff parser", .{}),
.wasm => try WasmDumper.parseAndDump(step, chk, contents),
else => unreachable,
};
// Depending on whether we requested dumping section verbatim or not,
// we either format message string with escaped codes, or not to aid debugging
// the failed test.
const fmtMessageString = struct {
fn fmtMessageString(kind: Check.Kind, msg: []const u8) std.fmt.Formatter(Ctx, formatMessageString) {
return .{ .data = .{
.kind = kind,
.msg = msg,
} };
}
const Ctx = struct {
kind: Check.Kind,
msg: []const u8,
};
fn formatMessageString(ctx: Ctx, w: *Writer) !void {
switch (ctx.kind) {
.dump_section => try w.print("{f}", .{std.ascii.hexEscape(ctx.msg, .lower)}),
else => try w.writeAll(ctx.msg),
}
}
}.fmtMessageString;
var it = mem.tokenizeAny(u8, output, "\r\n");
for (chk.actions.items) |act| {
switch (act.tag) {
.exact => {
while (it.next()) |line| {
if (act.exact(b, step, line)) break;
} else {
return step.fail(
\\
\\========= expected to find: ==========================
\\{f}
\\========= but parsed file does not contain it: =======
\\{f}
\\========= file path: =================================
\\{f}
, .{
fmtMessageString(chk.kind, act.phrase.resolve(b, step)),
fmtMessageString(chk.kind, output),
src_path,
});
}
},
.contains => {
while (it.next()) |line| {
if (act.contains(b, step, line)) break;
} else {
return step.fail(
\\
\\========= expected to find: ==========================
\\*{f}*
\\========= but parsed file does not contain it: =======
\\{f}
\\========= file path: =================================
\\{f}
, .{
fmtMessageString(chk.kind, act.phrase.resolve(b, step)),
fmtMessageString(chk.kind, output),
src_path,
});
}
},
.not_present => {
while (it.next()) |line| {
if (act.notPresent(b, step, line)) continue;
return step.fail(
\\
\\========= expected not to find: ===================
\\{f}
\\========= but parsed file does contain it: ========
\\{f}
\\========= file path: ==============================
\\{f}
, .{
fmtMessageString(chk.kind, act.phrase.resolve(b, step)),
fmtMessageString(chk.kind, output),
src_path,
});
}
},
.extract => {
while (it.next()) |line| {
if (try act.extract(b, step, line, &vars)) break;
} else {
return step.fail(
\\
\\========= expected to find and extract: ==============
\\{f}
\\========= but parsed file does not contain it: =======
\\{f}
\\========= file path: ==============================
\\{f}
, .{
fmtMessageString(chk.kind, act.phrase.resolve(b, step)),
fmtMessageString(chk.kind, output),
src_path,
});
}
},
.compute_cmp => unreachable,
}
}
}
}
const MachODumper = struct {
const dyld_rebase_label = "dyld rebase data";
const dyld_bind_label = "dyld bind data";
const dyld_weak_bind_label = "dyld weak bind data";
const dyld_lazy_bind_label = "dyld lazy bind data";
const exports_label = "exports data";
const symtab_label = "symbol table";
const indirect_symtab_label = "indirect symbol table";
fn parseAndDump(step: *Step, check: Check, bytes: []const u8) ![]const u8 {
// TODO: handle archives and fat files
return parseAndDumpObject(step, check, bytes);
}
const ObjectContext = struct {
gpa: Allocator,
data: []const u8,
header: macho.mach_header_64,
segments: std.ArrayListUnmanaged(macho.segment_command_64) = .empty,
sections: std.ArrayListUnmanaged(macho.section_64) = .empty,
symtab: std.ArrayListUnmanaged(macho.nlist_64) = .empty,
strtab: std.ArrayListUnmanaged(u8) = .empty,
indsymtab: std.ArrayListUnmanaged(u32) = .empty,
imports: std.ArrayListUnmanaged([]const u8) = .empty,
fn parse(ctx: *ObjectContext) !void {
var it = ctx.getLoadCommandIterator();
var i: usize = 0;
while (it.next()) |cmd| {
switch (cmd.cmd()) {
.SEGMENT_64 => {
const seg = cmd.cast(macho.segment_command_64).?;
try ctx.segments.append(ctx.gpa, seg);
try ctx.sections.ensureUnusedCapacity(ctx.gpa, seg.nsects);
for (cmd.getSections()) |sect| {
ctx.sections.appendAssumeCapacity(sect);
}
},
.SYMTAB => {
const lc = cmd.cast(macho.symtab_command).?;
const symtab = @as([*]align(1) const macho.nlist_64, @ptrCast(ctx.data[lc.symoff..].ptr))[0..lc.nsyms];
const strtab = ctx.data[lc.stroff..][0..lc.strsize];
try ctx.symtab.appendUnalignedSlice(ctx.gpa, symtab);
try ctx.strtab.appendSlice(ctx.gpa, strtab);
},
.DYSYMTAB => {
const lc = cmd.cast(macho.dysymtab_command).?;
const indexes = @as([*]align(1) const u32, @ptrCast(ctx.data[lc.indirectsymoff..].ptr))[0..lc.nindirectsyms];
try ctx.indsymtab.appendUnalignedSlice(ctx.gpa, indexes);
},
.LOAD_DYLIB,
.LOAD_WEAK_DYLIB,
.REEXPORT_DYLIB,
=> {
try ctx.imports.append(ctx.gpa, cmd.getDylibPathName());
},
else => {},
}
i += 1;
}
}
fn getString(ctx: ObjectContext, off: u32) [:0]const u8 {
assert(off < ctx.strtab.items.len);
return mem.sliceTo(@as([*:0]const u8, @ptrCast(ctx.strtab.items[off..].ptr)), 0);
}
fn getLoadCommandIterator(ctx: ObjectContext) macho.LoadCommandIterator {
const data = ctx.data[@sizeOf(macho.mach_header_64)..][0..ctx.header.sizeofcmds];
return .{ .ncmds = ctx.header.ncmds, .buffer = data };
}
fn getLoadCommand(ctx: ObjectContext, cmd: macho.LC) ?macho.LoadCommandIterator.LoadCommand {
var it = ctx.getLoadCommandIterator();
while (it.next()) |lc| if (lc.cmd() == cmd) {
return lc;
};
return null;
}
fn getSegmentByName(ctx: ObjectContext, name: []const u8) ?macho.segment_command_64 {
for (ctx.segments.items) |seg| {
if (mem.eql(u8, seg.segName(), name)) return seg;
}
return null;
}
fn getSectionByName(ctx: ObjectContext, segname: []const u8, sectname: []const u8) ?macho.section_64 {
for (ctx.sections.items) |sect| {
if (mem.eql(u8, sect.segName(), segname) and mem.eql(u8, sect.sectName(), sectname)) return sect;
}
return null;
}
fn dumpHeader(hdr: macho.mach_header_64, bw: *Writer) !void {
const cputype = switch (hdr.cputype) {
macho.CPU_TYPE_ARM64 => "ARM64",
macho.CPU_TYPE_X86_64 => "X86_64",
else => "Unknown",
};
const filetype = switch (hdr.filetype) {
macho.MH_OBJECT => "MH_OBJECT",
macho.MH_EXECUTE => "MH_EXECUTE",
macho.MH_FVMLIB => "MH_FVMLIB",
macho.MH_CORE => "MH_CORE",
macho.MH_PRELOAD => "MH_PRELOAD",
macho.MH_DYLIB => "MH_DYLIB",
macho.MH_DYLINKER => "MH_DYLINKER",
macho.MH_BUNDLE => "MH_BUNDLE",
macho.MH_DYLIB_STUB => "MH_DYLIB_STUB",
macho.MH_DSYM => "MH_DSYM",
macho.MH_KEXT_BUNDLE => "MH_KEXT_BUNDLE",
else => "Unknown",
};
try bw.print(
\\header
\\cputype {s}
\\filetype {s}
\\ncmds {d}
\\sizeofcmds {x}
\\flags
, .{
cputype,
filetype,
hdr.ncmds,
hdr.sizeofcmds,
});
if (hdr.flags > 0) {
if (hdr.flags & macho.MH_NOUNDEFS != 0) try bw.writeAll(" NOUNDEFS");
if (hdr.flags & macho.MH_INCRLINK != 0) try bw.writeAll(" INCRLINK");
if (hdr.flags & macho.MH_DYLDLINK != 0) try bw.writeAll(" DYLDLINK");
if (hdr.flags & macho.MH_BINDATLOAD != 0) try bw.writeAll(" BINDATLOAD");
if (hdr.flags & macho.MH_PREBOUND != 0) try bw.writeAll(" PREBOUND");
if (hdr.flags & macho.MH_SPLIT_SEGS != 0) try bw.writeAll(" SPLIT_SEGS");
if (hdr.flags & macho.MH_LAZY_INIT != 0) try bw.writeAll(" LAZY_INIT");
if (hdr.flags & macho.MH_TWOLEVEL != 0) try bw.writeAll(" TWOLEVEL");
if (hdr.flags & macho.MH_FORCE_FLAT != 0) try bw.writeAll(" FORCE_FLAT");
if (hdr.flags & macho.MH_NOMULTIDEFS != 0) try bw.writeAll(" NOMULTIDEFS");
if (hdr.flags & macho.MH_NOFIXPREBINDING != 0) try bw.writeAll(" NOFIXPREBINDING");
if (hdr.flags & macho.MH_PREBINDABLE != 0) try bw.writeAll(" PREBINDABLE");
if (hdr.flags & macho.MH_ALLMODSBOUND != 0) try bw.writeAll(" ALLMODSBOUND");
if (hdr.flags & macho.MH_SUBSECTIONS_VIA_SYMBOLS != 0) try bw.writeAll(" SUBSECTIONS_VIA_SYMBOLS");
if (hdr.flags & macho.MH_CANONICAL != 0) try bw.writeAll(" CANONICAL");
if (hdr.flags & macho.MH_WEAK_DEFINES != 0) try bw.writeAll(" WEAK_DEFINES");
if (hdr.flags & macho.MH_BINDS_TO_WEAK != 0) try bw.writeAll(" BINDS_TO_WEAK");
if (hdr.flags & macho.MH_ALLOW_STACK_EXECUTION != 0) try bw.writeAll(" ALLOW_STACK_EXECUTION");
if (hdr.flags & macho.MH_ROOT_SAFE != 0) try bw.writeAll(" ROOT_SAFE");
if (hdr.flags & macho.MH_SETUID_SAFE != 0) try bw.writeAll(" SETUID_SAFE");
if (hdr.flags & macho.MH_NO_REEXPORTED_DYLIBS != 0) try bw.writeAll(" NO_REEXPORTED_DYLIBS");
if (hdr.flags & macho.MH_PIE != 0) try bw.writeAll(" PIE");
if (hdr.flags & macho.MH_DEAD_STRIPPABLE_DYLIB != 0) try bw.writeAll(" DEAD_STRIPPABLE_DYLIB");
if (hdr.flags & macho.MH_HAS_TLV_DESCRIPTORS != 0) try bw.writeAll(" HAS_TLV_DESCRIPTORS");
if (hdr.flags & macho.MH_NO_HEAP_EXECUTION != 0) try bw.writeAll(" NO_HEAP_EXECUTION");
if (hdr.flags & macho.MH_APP_EXTENSION_SAFE != 0) try bw.writeAll(" APP_EXTENSION_SAFE");
if (hdr.flags & macho.MH_NLIST_OUTOFSYNC_WITH_DYLDINFO != 0) try bw.writeAll(" NLIST_OUTOFSYNC_WITH_DYLDINFO");
}
try bw.writeByte('\n');
}
fn dumpLoadCommand(lc: macho.LoadCommandIterator.LoadCommand, index: usize, bw: *Writer) !void {
// print header first
try bw.print(
\\LC {d}
\\cmd {s}
\\cmdsize {d}
, .{ index, @tagName(lc.cmd()), lc.cmdsize() });
switch (lc.cmd()) {
.SEGMENT_64 => {
const seg = lc.cast(macho.segment_command_64).?;
try bw.writeByte('\n');
try bw.print(
\\segname {s}
\\vmaddr {x}
\\vmsize {x}
\\fileoff {x}
\\filesz {x}
, .{
seg.segName(),
seg.vmaddr,
seg.vmsize,
seg.fileoff,
seg.filesize,
});
for (lc.getSections()) |sect| {
try bw.writeByte('\n');
try bw.print(
\\sectname {s}
\\addr {x}
\\size {x}
\\offset {x}
\\align {x}
, .{
sect.sectName(),
sect.addr,
sect.size,
sect.offset,
sect.@"align",
});
}
},
.ID_DYLIB,
.LOAD_DYLIB,
.LOAD_WEAK_DYLIB,
.REEXPORT_DYLIB,
=> {
const dylib = lc.cast(macho.dylib_command).?;
try bw.writeByte('\n');
try bw.print(
\\name {s}
\\timestamp {d}
\\current version {x}
\\compatibility version {x}
, .{
lc.getDylibPathName(),
dylib.dylib.timestamp,
dylib.dylib.current_version,
dylib.dylib.compatibility_version,
});
},
.MAIN => {
const main = lc.cast(macho.entry_point_command).?;
try bw.writeByte('\n');
try bw.print(
\\entryoff {x}
\\stacksize {x}
, .{ main.entryoff, main.stacksize });
},
.RPATH => {
try bw.writeByte('\n');
try bw.print(
\\path {s}
, .{
lc.getRpathPathName(),
});
},
.UUID => {
const uuid = lc.cast(macho.uuid_command).?;
try bw.writeByte('\n');
try bw.print("uuid {x}", .{&uuid.uuid});
},
.DATA_IN_CODE,
.FUNCTION_STARTS,
.CODE_SIGNATURE,
=> {
const llc = lc.cast(macho.linkedit_data_command).?;
try bw.writeByte('\n');
try bw.print(
\\dataoff {x}
\\datasize {x}
, .{ llc.dataoff, llc.datasize });
},
.DYLD_INFO_ONLY => {
const dlc = lc.cast(macho.dyld_info_command).?;
try bw.writeByte('\n');
try bw.print(
\\rebaseoff {x}
\\rebasesize {x}
\\bindoff {x}
\\bindsize {x}
\\weakbindoff {x}
\\weakbindsize {x}
\\lazybindoff {x}
\\lazybindsize {x}
\\exportoff {x}
\\exportsize {x}
, .{
dlc.rebase_off,
dlc.rebase_size,
dlc.bind_off,
dlc.bind_size,
dlc.weak_bind_off,
dlc.weak_bind_size,
dlc.lazy_bind_off,
dlc.lazy_bind_size,
dlc.export_off,
dlc.export_size,
});
},
.SYMTAB => {
const slc = lc.cast(macho.symtab_command).?;
try bw.writeByte('\n');
try bw.print(
\\symoff {x}
\\nsyms {x}
\\stroff {x}
\\strsize {x}
, .{
slc.symoff,
slc.nsyms,
slc.stroff,
slc.strsize,
});
},
.DYSYMTAB => {
const dlc = lc.cast(macho.dysymtab_command).?;
try bw.writeByte('\n');
try bw.print(
\\ilocalsym {x}
\\nlocalsym {x}
\\iextdefsym {x}
\\nextdefsym {x}
\\iundefsym {x}
\\nundefsym {x}
\\indirectsymoff {x}
\\nindirectsyms {x}
, .{
dlc.ilocalsym,
dlc.nlocalsym,
dlc.iextdefsym,
dlc.nextdefsym,
dlc.iundefsym,
dlc.nundefsym,
dlc.indirectsymoff,
dlc.nindirectsyms,
});
},
.BUILD_VERSION => {
const blc = lc.cast(macho.build_version_command).?;
try bw.writeByte('\n');
try bw.print(
\\platform {s}
\\minos {d}.{d}.{d}
\\sdk {d}.{d}.{d}
\\ntools {d}
, .{
@tagName(blc.platform),
blc.minos >> 16,
@as(u8, @truncate(blc.minos >> 8)),
@as(u8, @truncate(blc.minos)),
blc.sdk >> 16,
@as(u8, @truncate(blc.sdk >> 8)),
@as(u8, @truncate(blc.sdk)),
blc.ntools,
});
for (lc.getBuildVersionTools()) |tool| {
try bw.writeByte('\n');
switch (tool.tool) {
.CLANG, .SWIFT, .LD, .LLD, .ZIG => try bw.print("tool {s}\n", .{@tagName(tool.tool)}),
else => |x| try bw.print("tool {d}\n", .{@intFromEnum(x)}),
}
try bw.print(
\\version {d}.{d}.{d}
, .{
tool.version >> 16,
@as(u8, @truncate(tool.version >> 8)),
@as(u8, @truncate(tool.version)),
});
}
},
.VERSION_MIN_MACOSX,
.VERSION_MIN_IPHONEOS,
.VERSION_MIN_WATCHOS,
.VERSION_MIN_TVOS,
=> {
const vlc = lc.cast(macho.version_min_command).?;
try bw.writeByte('\n');
try bw.print(
\\version {d}.{d}.{d}
\\sdk {d}.{d}.{d}
, .{
vlc.version >> 16,
@as(u8, @truncate(vlc.version >> 8)),
@as(u8, @truncate(vlc.version)),
vlc.sdk >> 16,
@as(u8, @truncate(vlc.sdk >> 8)),
@as(u8, @truncate(vlc.sdk)),
});
},
else => {},
}
}
fn dumpSymtab(ctx: ObjectContext, bw: *Writer) !void {
try bw.writeAll(symtab_label ++ "\n");
for (ctx.symtab.items) |sym| {
const sym_name = ctx.getString(sym.n_strx);
if (sym.stab()) {
const tt = switch (sym.n_type) {
macho.N_SO => "SO",
macho.N_OSO => "OSO",
macho.N_BNSYM => "BNSYM",
macho.N_ENSYM => "ENSYM",
macho.N_FUN => "FUN",
macho.N_GSYM => "GSYM",
macho.N_STSYM => "STSYM",
else => "UNKNOWN STAB",
};
try bw.print("{x}", .{sym.n_value});
if (sym.n_sect > 0) {
const sect = ctx.sections.items[sym.n_sect - 1];
try bw.print(" ({s},{s})", .{ sect.segName(), sect.sectName() });
}
try bw.print(" {s} (stab) {s}\n", .{ tt, sym_name });
} else if (sym.sect()) {
const sect = ctx.sections.items[sym.n_sect - 1];
try bw.print("{x} ({s},{s})", .{
sym.n_value,
sect.segName(),
sect.sectName(),
});
if (sym.n_desc & macho.REFERENCED_DYNAMICALLY != 0) try bw.writeAll(" [referenced dynamically]");
if (sym.weakDef()) try bw.writeAll(" weak");
if (sym.weakRef()) try bw.writeAll(" weakref");
if (sym.ext()) {
if (sym.pext()) try bw.writeAll(" private");
try bw.writeAll(" external");
} else if (sym.pext()) try bw.writeAll(" (was private external)");
try bw.print(" {s}\n", .{sym_name});
} else if (sym.tentative()) {
const alignment = (sym.n_desc >> 8) & 0x0F;
try bw.print(" 0x{x:0>16} (common) (alignment 2^{d})", .{ sym.n_value, alignment });
if (sym.ext()) try bw.writeAll(" external");
try bw.print(" {s}\n", .{sym_name});
} else if (sym.undf()) {
const ordinal = @divFloor(@as(i16, @bitCast(sym.n_desc)), macho.N_SYMBOL_RESOLVER);
const import_name = blk: {
if (ordinal <= 0) {
if (ordinal == macho.BIND_SPECIAL_DYLIB_SELF)
break :blk "self import";
if (ordinal == macho.BIND_SPECIAL_DYLIB_MAIN_EXECUTABLE)
break :blk "main executable";
if (ordinal == macho.BIND_SPECIAL_DYLIB_FLAT_LOOKUP)
break :blk "flat lookup";
unreachable;
}
const full_path = ctx.imports.items[@as(u16, @bitCast(ordinal)) - 1];
const basename = fs.path.basename(full_path);
assert(basename.len > 0);
const ext = mem.lastIndexOfScalar(u8, basename, '.') orelse basename.len;
break :blk basename[0..ext];
};
try bw.writeAll("(undefined)");
if (sym.weakRef()) try bw.writeAll(" weakref");
if (sym.ext()) try bw.writeAll(" external");
try bw.print(" {s} (from {s})\n", .{
sym_name,
import_name,
});
}
}
}
fn dumpIndirectSymtab(ctx: ObjectContext, bw: *Writer) !void {
try bw.writeAll(indirect_symtab_label ++ "\n");
var sects_buffer: [3]macho.section_64 = undefined;
const sects = blk: {
var count: usize = 0;
if (ctx.getSectionByName("__TEXT", "__stubs")) |sect| {
sects_buffer[count] = sect;
count += 1;
}
if (ctx.getSectionByName("__DATA_CONST", "__got")) |sect| {
sects_buffer[count] = sect;
count += 1;
}
if (ctx.getSectionByName("__DATA", "__la_symbol_ptr")) |sect| {
sects_buffer[count] = sect;
count += 1;
}
break :blk sects_buffer[0..count];
};
const sortFn = struct {
fn sortFn(c: void, lhs: macho.section_64, rhs: macho.section_64) bool {
_ = c;
return lhs.reserved1 < rhs.reserved1;
}
}.sortFn;
mem.sort(macho.section_64, sects, {}, sortFn);
var i: usize = 0;
while (i < sects.len) : (i += 1) {
const sect = sects[i];
const start = sect.reserved1;
const end = if (i + 1 >= sects.len) ctx.indsymtab.items.len else sects[i + 1].reserved1;
const entry_size = blk: {
if (mem.eql(u8, sect.sectName(), "__stubs")) break :blk sect.reserved2;
break :blk @sizeOf(u64);
};
try bw.print("{s},{s}\n", .{ sect.segName(), sect.sectName() });
try bw.print("nentries {d}\n", .{end - start});
for (ctx.indsymtab.items[start..end], 0..) |index, j| {
const sym = ctx.symtab.items[index];
const addr = sect.addr + entry_size * j;
try bw.print("0x{x} {d} {s}\n", .{ addr, index, ctx.getString(sym.n_strx) });
}
}
}
fn dumpRebaseInfo(ctx: ObjectContext, data: []const u8, bw: *Writer) !void {
var rebases: std.ArrayList(u64) = .init(ctx.gpa);
defer rebases.deinit();
try ctx.parseRebaseInfo(data, &rebases);
mem.sort(u64, rebases.items, {}, std.sort.asc(u64));
for (rebases.items) |addr| {
try bw.print("0x{x}\n", .{addr});
}
}
fn parseRebaseInfo(ctx: ObjectContext, data: []const u8, rebases: *std.ArrayList(u64)) !void {
var br: std.io.Reader = .fixed(data);
var seg_id: ?u8 = null;
var offset: u64 = 0;
while (true) {
const byte = br.takeByte() catch break;
const opc = byte & macho.REBASE_OPCODE_MASK;
const imm = byte & macho.REBASE_IMMEDIATE_MASK;
switch (opc) {
macho.REBASE_OPCODE_DONE => break,
macho.REBASE_OPCODE_SET_TYPE_IMM => {},
macho.REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB => {
seg_id = imm;
offset = try br.takeLeb128(u64);
},
macho.REBASE_OPCODE_ADD_ADDR_IMM_SCALED => {
offset += imm * @sizeOf(u64);
},
macho.REBASE_OPCODE_ADD_ADDR_ULEB => {
const addend = try br.takeLeb128(u64);
offset += addend;
},
macho.REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB => {
const addend = try br.takeLeb128(u64);
const seg = ctx.segments.items[seg_id.?];
const addr = seg.vmaddr + offset;
try rebases.append(addr);
offset += addend + @sizeOf(u64);
},
macho.REBASE_OPCODE_DO_REBASE_IMM_TIMES,
macho.REBASE_OPCODE_DO_REBASE_ULEB_TIMES,
macho.REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB,
=> {
var ntimes: u64 = 1;
var skip: u64 = 0;
switch (opc) {
macho.REBASE_OPCODE_DO_REBASE_IMM_TIMES => {
ntimes = imm;
},
macho.REBASE_OPCODE_DO_REBASE_ULEB_TIMES => {
ntimes = try br.takeLeb128(u64);
},
macho.REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB => {
ntimes = try br.takeLeb128(u64);
skip = try br.takeLeb128(u64);
},
else => unreachable,
}
const seg = ctx.segments.items[seg_id.?];
const base_addr = seg.vmaddr;
var count: usize = 0;
while (count < ntimes) : (count += 1) {
const addr = base_addr + offset;
try rebases.append(addr);
offset += skip + @sizeOf(u64);
}
},
else => break,
}
}
}
const Binding = struct {
address: u64,
addend: i64,
ordinal: u16,
tag: Tag,
name: []const u8,
fn deinit(binding: *Binding, gpa: Allocator) void {
gpa.free(binding.name);
}
fn lessThan(ctx: void, lhs: Binding, rhs: Binding) bool {
_ = ctx;
return lhs.address < rhs.address;
}
const Tag = enum {
ord,
self,
exe,
flat,
};
};
fn dumpBindInfo(ctx: ObjectContext, data: []const u8, bw: *Writer) !void {
var bindings: std.ArrayList(Binding) = .init(ctx.gpa);
defer {
for (bindings.items) |*b| {
b.deinit(ctx.gpa);
}
bindings.deinit();
}
try ctx.parseBindInfo(data, &bindings);
mem.sort(Binding, bindings.items, {}, Binding.lessThan);
for (bindings.items) |binding| {
try bw.print("0x{x} [addend: {d}]", .{ binding.address, binding.addend });
try bw.writeAll(" (");
switch (binding.tag) {
.self => try bw.writeAll("self"),
.exe => try bw.writeAll("main executable"),
.flat => try bw.writeAll("flat lookup"),
.ord => try bw.writeAll(std.fs.path.basename(ctx.imports.items[binding.ordinal - 1])),
}
try bw.print(") {s}\n", .{binding.name});
}
}
fn parseBindInfo(ctx: ObjectContext, data: []const u8, bindings: *std.ArrayList(Binding)) !void {
var br: std.io.Reader = .fixed(data);
var seg_id: ?u8 = null;
var tag: Binding.Tag = .self;
var ordinal: u16 = 0;
var offset: u64 = 0;
var addend: i64 = 0;
var name_buf: std.io.Writer.Allocating = .init(ctx.gpa);
defer name_buf.deinit();
while (br.takeByte()) |byte| {
const opc = byte & macho.BIND_OPCODE_MASK;
const imm = byte & macho.BIND_IMMEDIATE_MASK;
switch (opc) {
macho.BIND_OPCODE_DONE,
macho.BIND_OPCODE_SET_TYPE_IMM,
=> {},
macho.BIND_OPCODE_SET_DYLIB_ORDINAL_IMM => {
tag = .ord;
ordinal = imm;
},
macho.BIND_OPCODE_SET_DYLIB_SPECIAL_IMM => {
switch (imm) {
0 => tag = .self,
0xf => tag = .exe,
0xe => tag = .flat,
else => unreachable,
}
},
macho.BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB => {
seg_id = imm;
offset = try br.takeLeb128(u64);
},
macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM => {
name_buf.clearRetainingCapacity();
_ = try br.streamDelimiterLimit(&name_buf.writer, 0, .limited(std.math.maxInt(u32)));
try name_buf.writer.writeByte(0);
},
macho.BIND_OPCODE_SET_ADDEND_SLEB => {
addend = try br.takeLeb128(i64);
},
macho.BIND_OPCODE_ADD_ADDR_ULEB => {
const x = try br.takeLeb128(u64);
offset = @intCast(@as(i64, @intCast(offset)) + @as(i64, @bitCast(x)));
},
macho.BIND_OPCODE_DO_BIND,
macho.BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB,
macho.BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED,
macho.BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB,
=> {
var add_addr: u64 = 0;
var count: u64 = 1;
var skip: u64 = 0;
switch (opc) {
macho.BIND_OPCODE_DO_BIND => {},
macho.BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB => {
add_addr = try br.takeLeb128(u64);
},
macho.BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED => {
add_addr = imm * @sizeOf(u64);
},
macho.BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB => {
count = try br.takeLeb128(u64);
skip = try br.takeLeb128(u64);
},
else => unreachable,
}
const seg = ctx.segments.items[seg_id.?];
var i: u64 = 0;
while (i < count) : (i += 1) {
const addr: u64 = @intCast(@as(i64, @intCast(seg.vmaddr + offset)));
try bindings.append(.{
.address = addr,
.addend = addend,
.tag = tag,
.ordinal = ordinal,
.name = try ctx.gpa.dupe(u8, name_buf.getWritten()),
});
offset += skip + @sizeOf(u64) + add_addr;
}
},
else => break,
}
} else |_| {}
}
fn dumpExportsTrie(ctx: ObjectContext, data: []const u8, bw: *Writer) !void {
const seg = ctx.getSegmentByName("__TEXT") orelse return;
var arena = std.heap.ArenaAllocator.init(ctx.gpa);
defer arena.deinit();
var exports: std.ArrayList(Export) = .init(arena.allocator());
var br: std.io.Reader = .fixed(data);
try parseTrieNode(arena.allocator(), &br, "", &exports);
mem.sort(Export, exports.items, {}, Export.lessThan);
for (exports.items) |exp| {
switch (exp.tag) {
.@"export" => {
const info = exp.data.@"export";
if (info.kind != .regular or info.weak) {
try bw.writeByte('[');
}
switch (info.kind) {
.regular => {},
.absolute => try bw.writeAll("ABS, "),
.tlv => try bw.writeAll("THREAD_LOCAL, "),
}
if (info.weak) try bw.writeAll("WEAK");
if (info.kind != .regular or info.weak) {
try bw.writeAll("] ");
}
try bw.print("{x} ", .{seg.vmaddr + info.vmoffset});
},
else => {},
}
try bw.print("{s}\n", .{exp.name});
}
}
const Export = struct {
name: []const u8,
tag: enum { @"export", reexport, stub_resolver },
data: union {
@"export": struct {
kind: enum { regular, absolute, tlv },
weak: bool = false,
vmoffset: u64,
},
reexport: u64,
stub_resolver: struct {
stub_offset: u64,
resolver_offset: u64,
},
},
inline fn rankByTag(@"export": Export) u3 {
return switch (@"export".tag) {
.@"export" => 1,
.reexport => 2,
.stub_resolver => 3,
};
}
fn lessThan(ctx: void, lhs: Export, rhs: Export) bool {
_ = ctx;
if (lhs.rankByTag() == rhs.rankByTag()) {
return switch (lhs.tag) {
.@"export" => lhs.data.@"export".vmoffset < rhs.data.@"export".vmoffset,
.reexport => lhs.data.reexport < rhs.data.reexport,
.stub_resolver => lhs.data.stub_resolver.stub_offset < rhs.data.stub_resolver.stub_offset,
};
}
return lhs.rankByTag() < rhs.rankByTag();
}
};
fn parseTrieNode(
arena: Allocator,
br: *std.io.Reader,
prefix: []const u8,
exports: *std.ArrayList(Export),
) !void {
const size = try br.takeLeb128(u64);
if (size > 0) {
const flags = try br.takeLeb128(u8);
switch (flags) {
macho.EXPORT_SYMBOL_FLAGS_REEXPORT => {
const ord = try br.takeLeb128(u64);
const name = try br.takeSentinel(0);
try exports.append(.{
.name = if (name.len > 0) name else prefix,
.tag = .reexport,
.data = .{ .reexport = ord },
});
},
macho.EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER => {
const stub_offset = try br.takeLeb128(u64);
const resolver_offset = try br.takeLeb128(u64);
try exports.append(.{
.name = prefix,
.tag = .stub_resolver,
.data = .{ .stub_resolver = .{
.stub_offset = stub_offset,
.resolver_offset = resolver_offset,
} },
});
},
else => {
const vmoff = try br.takeLeb128(u64);
try exports.append(.{
.name = prefix,
.tag = .@"export",
.data = .{ .@"export" = .{
.kind = switch (flags & macho.EXPORT_SYMBOL_FLAGS_KIND_MASK) {
macho.EXPORT_SYMBOL_FLAGS_KIND_REGULAR => .regular,
macho.EXPORT_SYMBOL_FLAGS_KIND_ABSOLUTE => .absolute,
macho.EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL => .tlv,
else => unreachable,
},
.weak = flags & macho.EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION != 0,
.vmoffset = vmoff,
} },
});
},
}
}
const nedges = try br.takeByte();
for (0..nedges) |_| {
const label = try br.takeSentinel(0);
const off = try br.takeLeb128(usize);
const prefix_label = try std.fmt.allocPrint(arena, "{s}{s}", .{ prefix, label });
const seek = br.seek;
br.seek = off;
try parseTrieNode(arena, br, prefix_label, exports);
br.seek = seek;
}
}
fn dumpSection(ctx: ObjectContext, sect: macho.section_64, bw: *Writer) !void {
const data = ctx.data[sect.offset..][0..sect.size];
try bw.print("{s}", .{data});
}
};
fn parseAndDumpObject(step: *Step, check: Check, bytes: []const u8) ![]const u8 {
const gpa = step.owner.allocator;
const hdr = @as(*align(1) const macho.mach_header_64, @ptrCast(bytes.ptr)).*;
if (hdr.magic != macho.MH_MAGIC_64) {
return error.InvalidMagicNumber;
}
var ctx = ObjectContext{ .gpa = gpa, .data = bytes, .header = hdr };
try ctx.parse();
var aw: std.io.Writer.Allocating = .init(gpa);
defer aw.deinit();
const bw = &aw.writer;
switch (check.kind) {
.headers => {
try ObjectContext.dumpHeader(ctx.header, bw);
var it = ctx.getLoadCommandIterator();
var i: usize = 0;
while (it.next()) |cmd| {
try ObjectContext.dumpLoadCommand(cmd, i, bw);
try bw.writeByte('\n');
i += 1;
}
},
.symtab => if (ctx.symtab.items.len > 0) {
try ctx.dumpSymtab(bw);
} else return step.fail("no symbol table found", .{}),
.indirect_symtab => if (ctx.symtab.items.len > 0 and ctx.indsymtab.items.len > 0) {
try ctx.dumpIndirectSymtab(bw);
} else return step.fail("no indirect symbol table found", .{}),
.dyld_rebase,
.dyld_bind,
.dyld_weak_bind,
.dyld_lazy_bind,
=> {
const cmd = ctx.getLoadCommand(.DYLD_INFO_ONLY) orelse
return step.fail("no dyld info found", .{});
const lc = cmd.cast(macho.dyld_info_command).?;
switch (check.kind) {
.dyld_rebase => if (lc.rebase_size > 0) {
const data = ctx.data[lc.rebase_off..][0..lc.rebase_size];
try bw.writeAll(dyld_rebase_label ++ "\n");
try ctx.dumpRebaseInfo(data, bw);
} else return step.fail("no rebase data found", .{}),
.dyld_bind => if (lc.bind_size > 0) {
const data = ctx.data[lc.bind_off..][0..lc.bind_size];
try bw.writeAll(dyld_bind_label ++ "\n");
try ctx.dumpBindInfo(data, bw);
} else return step.fail("no bind data found", .{}),
.dyld_weak_bind => if (lc.weak_bind_size > 0) {
const data = ctx.data[lc.weak_bind_off..][0..lc.weak_bind_size];
try bw.writeAll(dyld_weak_bind_label ++ "\n");
try ctx.dumpBindInfo(data, bw);
} else return step.fail("no weak bind data found", .{}),
.dyld_lazy_bind => if (lc.lazy_bind_size > 0) {
const data = ctx.data[lc.lazy_bind_off..][0..lc.lazy_bind_size];
try bw.writeAll(dyld_lazy_bind_label ++ "\n");
try ctx.dumpBindInfo(data, bw);
} else return step.fail("no lazy bind data found", .{}),
else => unreachable,
}
},
.exports => blk: {
if (ctx.getLoadCommand(.DYLD_INFO_ONLY)) |cmd| {
const lc = cmd.cast(macho.dyld_info_command).?;
if (lc.export_size > 0) {
const data = ctx.data[lc.export_off..][0..lc.export_size];
try bw.writeAll(exports_label ++ "\n");
try ctx.dumpExportsTrie(data, bw);
break :blk;
}
}
return step.fail("no exports data found", .{});
},
.dump_section => {
const name = mem.sliceTo(@as([*:0]const u8, @ptrCast(check.data.items[check.payload.dump_section..].ptr)), 0);
const sep_index = mem.indexOfScalar(u8, name, ',') orelse
return step.fail("invalid section name: {s}", .{name});
const segname = name[0..sep_index];
const sectname = name[sep_index + 1 ..];
const sect = ctx.getSectionByName(segname, sectname) orelse
return step.fail("section '{s}' not found", .{name});
try ctx.dumpSection(sect, bw);
},
else => return step.fail("invalid check kind for MachO file format: {s}", .{@tagName(check.kind)}),
}
return aw.toOwnedSlice();
}
};
const ElfDumper = struct {
const symtab_label = "symbol table";
const dynamic_symtab_label = "dynamic symbol table";
const dynamic_section_label = "dynamic section";
const archive_symtab_label = "archive symbol table";
fn parseAndDump(step: *Step, check: Check, bytes: []const u8) ![]const u8 {
return parseAndDumpArchive(step, check, bytes) catch |err| switch (err) {
error.InvalidArchiveMagicNumber => try parseAndDumpObject(step, check, bytes),
else => |e| return e,
};
}
fn parseAndDumpArchive(step: *Step, check: Check, bytes: []const u8) ![]const u8 {
const gpa = step.owner.allocator;
var br: std.io.Reader = .fixed(bytes);
if (!mem.eql(u8, try br.takeArray(elf.ARMAG.len), elf.ARMAG)) return error.InvalidArchiveMagicNumber;
var ctx: ArchiveContext = .{
.gpa = gpa,
.data = bytes,
.symtab = &.{},
.strtab = &.{},
.objects = .empty,
};
defer ctx.deinit();
while (br.seek < bytes.len) {
const hdr_seek = std.mem.alignForward(usize, br.seek, 2);
br.seek = hdr_seek;
const hdr = try br.takeStruct(elf.ar_hdr);
if (!mem.eql(u8, &hdr.ar_fmag, elf.ARFMAG)) return error.InvalidArchiveHeaderMagicNumber;
const data = try br.take(try hdr.size());
if (hdr.isSymtab()) {
try ctx.parseSymtab(data, .p32);
continue;
}
if (hdr.isSymtab64()) {
try ctx.parseSymtab(data, .p64);
continue;
}
if (hdr.isStrtab()) {
ctx.strtab = data;
continue;
}
if (hdr.isSymdef() or hdr.isSymdefSorted()) continue;
const name = hdr.name() orelse ctx.getString((try hdr.nameOffset()).?);
try ctx.objects.putNoClobber(gpa, hdr_seek, .{
.name = name,
.data = data,
});
}
var aw: std.io.Writer.Allocating = .init(gpa);
defer aw.deinit();
const bw = &aw.writer;
switch (check.kind) {
.archive_symtab => if (ctx.symtab.len > 0) {
try ctx.dumpSymtab(bw);
} else return step.fail("no archive symbol table found", .{}),
else => if (ctx.objects.count() > 0) {
try ctx.dumpObjects(step, check, bw);
} else return step.fail("empty archive", .{}),
}
return aw.toOwnedSlice();
}
const ArchiveContext = struct {
gpa: Allocator,
data: []const u8,
symtab: []ArSymtabEntry,
strtab: []const u8,
objects: std.AutoArrayHashMapUnmanaged(usize, struct { name: []const u8, data: []const u8 }),
fn deinit(ctx: *ArchiveContext) void {
ctx.gpa.free(ctx.symtab);
ctx.objects.deinit(ctx.gpa);
}
fn parseSymtab(ctx: *ArchiveContext, data: []const u8, ptr_width: enum { p32, p64 }) !void {
var br: std.io.Reader = .fixed(data);
const num = switch (ptr_width) {
.p32 => try br.takeInt(u32, .big),
.p64 => try br.takeInt(u64, .big),
};
const ptr_size: usize = switch (ptr_width) {
.p32 => @sizeOf(u32),
.p64 => @sizeOf(u64),
};
_ = try br.discard(.limited(num * ptr_size));
const strtab = br.buffered();
assert(ctx.symtab.len == 0);
ctx.symtab = try ctx.gpa.alloc(ArSymtabEntry, num);
var stroff: usize = 0;
for (ctx.symtab) |*entry| {
const off = switch (ptr_width) {
.p32 => try br.takeInt(u32, .big),
.p64 => try br.takeInt(u64, .big),
};
const name = mem.sliceTo(@as([*:0]const u8, @ptrCast(strtab[stroff..].ptr)), 0);
stroff += name.len + 1;
entry.* = .{ .off = off, .name = name };
}
}
fn dumpSymtab(ctx: ArchiveContext, bw: *Writer) !void {
var symbols: std.AutoArrayHashMap(usize, std.ArrayList([]const u8)) = .init(ctx.gpa);
defer {
for (symbols.values()) |*value| value.deinit();
symbols.deinit();
}
for (ctx.symtab) |entry| {
const gop = try symbols.getOrPut(@intCast(entry.off));
if (!gop.found_existing) gop.value_ptr.* = .init(ctx.gpa);
try gop.value_ptr.append(entry.name);
}
try bw.print("{s}\n", .{archive_symtab_label});
for (symbols.keys(), symbols.values()) |off, values| {
try bw.print("in object {s}\n", .{ctx.objects.get(off).?.name});
for (values.items) |value| try bw.print("{s}\n", .{value});
}
}
fn dumpObjects(ctx: ArchiveContext, step: *Step, check: Check, bw: *Writer) !void {
for (ctx.objects.values()) |object| {
try bw.print("object {s}\n", .{object.name});
const output = try parseAndDumpObject(step, check, object.data);
defer ctx.gpa.free(output);
try bw.print("{s}\n", .{output});
}
}
fn getString(ctx: ArchiveContext, off: u32) []const u8 {
assert(off < ctx.strtab.len);
const name = mem.sliceTo(@as([*:'\n']const u8, @ptrCast(ctx.strtab[off..].ptr)), 0);
return name[0 .. name.len - 1];
}
const ArSymtabEntry = struct {
name: [:0]const u8,
off: u64,
};
};
fn parseAndDumpObject(step: *Step, check: Check, bytes: []const u8) ![]const u8 {
const gpa = step.owner.allocator;
var br: std.io.Reader = .fixed(bytes);
const hdr = try br.takeStruct(elf.Elf64_Ehdr);
if (!mem.eql(u8, hdr.e_ident[0..4], "\x7fELF")) return error.InvalidMagicNumber;
const shdrs = @as([*]align(1) const elf.Elf64_Shdr, @ptrCast(bytes[hdr.e_shoff..].ptr))[0..hdr.e_shnum];
const phdrs = @as([*]align(1) const elf.Elf64_Phdr, @ptrCast(bytes[hdr.e_phoff..].ptr))[0..hdr.e_phnum];
var ctx: ObjectContext = .{
.gpa = gpa,
.data = bytes,
.hdr = hdr,
.shdrs = shdrs,
.phdrs = phdrs,
.shstrtab = undefined,
.symtab = .{},
.dysymtab = .{},
};
ctx.shstrtab = ctx.getSectionContents(ctx.hdr.e_shstrndx);
for (ctx.shdrs, 0..) |shdr, i| switch (shdr.sh_type) {
elf.SHT_SYMTAB, elf.SHT_DYNSYM => {
const raw = ctx.getSectionContents(i);
const nsyms = @divExact(raw.len, @sizeOf(elf.Elf64_Sym));
const symbols = @as([*]align(1) const elf.Elf64_Sym, @ptrCast(raw.ptr))[0..nsyms];
const strings = ctx.getSectionContents(shdr.sh_link);
switch (shdr.sh_type) {
elf.SHT_SYMTAB => {
ctx.symtab = .{
.symbols = symbols,
.strings = strings,
};
},
elf.SHT_DYNSYM => {
ctx.dysymtab = .{
.symbols = symbols,
.strings = strings,
};
},
else => unreachable,
}
},
else => {},
};
var aw: std.io.Writer.Allocating = .init(gpa);
defer aw.deinit();
const bw = &aw.writer;
switch (check.kind) {
.headers => {
try ctx.dumpHeader(bw);
try ctx.dumpShdrs(bw);
try ctx.dumpPhdrs(bw);
},
.symtab => if (ctx.symtab.symbols.len > 0) {
try ctx.dumpSymtab(.symtab, bw);
} else return step.fail("no symbol table found", .{}),
.dynamic_symtab => if (ctx.dysymtab.symbols.len > 0) {
try ctx.dumpSymtab(.dysymtab, bw);
} else return step.fail("no dynamic symbol table found", .{}),
.dynamic_section => if (ctx.getSectionByName(".dynamic")) |shndx| {
try ctx.dumpDynamicSection(shndx, bw);
} else return step.fail("no .dynamic section found", .{}),
.dump_section => {
const name = mem.sliceTo(@as([*:0]const u8, @ptrCast(check.data.items[check.payload.dump_section..].ptr)), 0);
const shndx = ctx.getSectionByName(name) orelse return step.fail("no '{s}' section found", .{name});
try ctx.dumpSection(shndx, bw);
},
else => return step.fail("invalid check kind for ELF file format: {s}", .{@tagName(check.kind)}),
}
return aw.toOwnedSlice();
}
const ObjectContext = struct {
gpa: Allocator,
data: []const u8,
hdr: *align(1) const elf.Elf64_Ehdr,
shdrs: []align(1) const elf.Elf64_Shdr,
phdrs: []align(1) const elf.Elf64_Phdr,
shstrtab: []const u8,
symtab: Symtab,
dysymtab: Symtab,
fn dumpHeader(ctx: ObjectContext, bw: *Writer) !void {
try bw.writeAll("header\n");
try bw.print("type {s}\n", .{@tagName(ctx.hdr.e_type)});
try bw.print("entry {x}\n", .{ctx.hdr.e_entry});
}
fn dumpPhdrs(ctx: ObjectContext, bw: *Writer) !void {
if (ctx.phdrs.len == 0) return;
try bw.writeAll("program headers\n");
for (ctx.phdrs, 0..) |phdr, phndx| {
try bw.print("phdr {d}\n", .{phndx});
try bw.print("type {f}\n", .{fmtPhType(phdr.p_type)});
try bw.print("vaddr {x}\n", .{phdr.p_vaddr});
try bw.print("paddr {x}\n", .{phdr.p_paddr});
try bw.print("offset {x}\n", .{phdr.p_offset});
try bw.print("memsz {x}\n", .{phdr.p_memsz});
try bw.print("filesz {x}\n", .{phdr.p_filesz});
try bw.print("align {x}\n", .{phdr.p_align});
{
const flags = phdr.p_flags;
try bw.writeAll("flags");
if (flags > 0) try bw.writeByte(' ');
if (flags & elf.PF_R != 0) {
try bw.writeByte('R');
}
if (flags & elf.PF_W != 0) {
try bw.writeByte('W');
}
if (flags & elf.PF_X != 0) {
try bw.writeByte('E');
}
if (flags & elf.PF_MASKOS != 0) {
try bw.writeAll("OS");
}
if (flags & elf.PF_MASKPROC != 0) {
try bw.writeAll("PROC");
}
try bw.writeByte('\n');
}
}
}
fn dumpShdrs(ctx: ObjectContext, bw: *Writer) !void {
if (ctx.shdrs.len == 0) return;
try bw.writeAll("section headers\n");
for (ctx.shdrs, 0..) |shdr, shndx| {
try bw.print("shdr {d}\n", .{shndx});
try bw.print("name {s}\n", .{ctx.getSectionName(shndx)});
try bw.print("type {f}\n", .{fmtShType(shdr.sh_type)});
try bw.print("addr {x}\n", .{shdr.sh_addr});
try bw.print("offset {x}\n", .{shdr.sh_offset});
try bw.print("size {x}\n", .{shdr.sh_size});
try bw.print("addralign {x}\n", .{shdr.sh_addralign});
// TODO dump formatted sh_flags
}
}
fn dumpDynamicSection(ctx: ObjectContext, shndx: usize, bw: *Writer) !void {
const shdr = ctx.shdrs[shndx];
const strtab = ctx.getSectionContents(shdr.sh_link);
const data = ctx.getSectionContents(shndx);
const nentries = @divExact(data.len, @sizeOf(elf.Elf64_Dyn));
const entries = @as([*]align(1) const elf.Elf64_Dyn, @ptrCast(data.ptr))[0..nentries];
try bw.writeAll(ElfDumper.dynamic_section_label ++ "\n");
for (entries) |entry| {
const key = @as(u64, @bitCast(entry.d_tag));
const value = entry.d_val;
const key_str = switch (key) {
elf.DT_NEEDED => "NEEDED",
elf.DT_SONAME => "SONAME",
elf.DT_INIT_ARRAY => "INIT_ARRAY",
elf.DT_INIT_ARRAYSZ => "INIT_ARRAYSZ",
elf.DT_FINI_ARRAY => "FINI_ARRAY",
elf.DT_FINI_ARRAYSZ => "FINI_ARRAYSZ",
elf.DT_HASH => "HASH",
elf.DT_GNU_HASH => "GNU_HASH",
elf.DT_STRTAB => "STRTAB",
elf.DT_SYMTAB => "SYMTAB",
elf.DT_STRSZ => "STRSZ",
elf.DT_SYMENT => "SYMENT",
elf.DT_PLTGOT => "PLTGOT",
elf.DT_PLTRELSZ => "PLTRELSZ",
elf.DT_PLTREL => "PLTREL",
elf.DT_JMPREL => "JMPREL",
elf.DT_RELA => "RELA",
elf.DT_RELASZ => "RELASZ",
elf.DT_RELAENT => "RELAENT",
elf.DT_VERDEF => "VERDEF",
elf.DT_VERDEFNUM => "VERDEFNUM",
elf.DT_FLAGS => "FLAGS",
elf.DT_FLAGS_1 => "FLAGS_1",
elf.DT_VERNEED => "VERNEED",
elf.DT_VERNEEDNUM => "VERNEEDNUM",
elf.DT_VERSYM => "VERSYM",
elf.DT_RELACOUNT => "RELACOUNT",
elf.DT_RPATH => "RPATH",
elf.DT_RUNPATH => "RUNPATH",
elf.DT_INIT => "INIT",
elf.DT_FINI => "FINI",
elf.DT_NULL => "NULL",
else => "UNKNOWN",
};
try bw.print("{s}", .{key_str});
switch (key) {
elf.DT_NEEDED,
elf.DT_SONAME,
elf.DT_RPATH,
elf.DT_RUNPATH,
=> {
const name = getString(strtab, @intCast(value));
try bw.print(" {s}", .{name});
},
elf.DT_INIT_ARRAY,
elf.DT_FINI_ARRAY,
elf.DT_HASH,
elf.DT_GNU_HASH,
elf.DT_STRTAB,
elf.DT_SYMTAB,
elf.DT_PLTGOT,
elf.DT_JMPREL,
elf.DT_RELA,
elf.DT_VERDEF,
elf.DT_VERNEED,
elf.DT_VERSYM,
elf.DT_INIT,
elf.DT_FINI,
elf.DT_NULL,
=> try bw.print(" {x}", .{value}),
elf.DT_INIT_ARRAYSZ,
elf.DT_FINI_ARRAYSZ,
elf.DT_STRSZ,
elf.DT_SYMENT,
elf.DT_PLTRELSZ,
elf.DT_RELASZ,
elf.DT_RELAENT,
elf.DT_RELACOUNT,
=> try bw.print(" {d}", .{value}),
elf.DT_PLTREL => try bw.writeAll(switch (value) {
elf.DT_REL => " REL",
elf.DT_RELA => " RELA",
else => " UNKNOWN",
}),
elf.DT_FLAGS => if (value > 0) {
if (value & elf.DF_ORIGIN != 0) try bw.writeAll(" ORIGIN");
if (value & elf.DF_SYMBOLIC != 0) try bw.writeAll(" SYMBOLIC");
if (value & elf.DF_TEXTREL != 0) try bw.writeAll(" TEXTREL");
if (value & elf.DF_BIND_NOW != 0) try bw.writeAll(" BIND_NOW");
if (value & elf.DF_STATIC_TLS != 0) try bw.writeAll(" STATIC_TLS");
},
elf.DT_FLAGS_1 => if (value > 0) {
if (value & elf.DF_1_NOW != 0) try bw.writeAll(" NOW");
if (value & elf.DF_1_GLOBAL != 0) try bw.writeAll(" GLOBAL");
if (value & elf.DF_1_GROUP != 0) try bw.writeAll(" GROUP");
if (value & elf.DF_1_NODELETE != 0) try bw.writeAll(" NODELETE");
if (value & elf.DF_1_LOADFLTR != 0) try bw.writeAll(" LOADFLTR");
if (value & elf.DF_1_INITFIRST != 0) try bw.writeAll(" INITFIRST");
if (value & elf.DF_1_NOOPEN != 0) try bw.writeAll(" NOOPEN");
if (value & elf.DF_1_ORIGIN != 0) try bw.writeAll(" ORIGIN");
if (value & elf.DF_1_DIRECT != 0) try bw.writeAll(" DIRECT");
if (value & elf.DF_1_TRANS != 0) try bw.writeAll(" TRANS");
if (value & elf.DF_1_INTERPOSE != 0) try bw.writeAll(" INTERPOSE");
if (value & elf.DF_1_NODEFLIB != 0) try bw.writeAll(" NODEFLIB");
if (value & elf.DF_1_NODUMP != 0) try bw.writeAll(" NODUMP");
if (value & elf.DF_1_CONFALT != 0) try bw.writeAll(" CONFALT");
if (value & elf.DF_1_ENDFILTEE != 0) try bw.writeAll(" ENDFILTEE");
if (value & elf.DF_1_DISPRELDNE != 0) try bw.writeAll(" DISPRELDNE");
if (value & elf.DF_1_DISPRELPND != 0) try bw.writeAll(" DISPRELPND");
if (value & elf.DF_1_NODIRECT != 0) try bw.writeAll(" NODIRECT");
if (value & elf.DF_1_IGNMULDEF != 0) try bw.writeAll(" IGNMULDEF");
if (value & elf.DF_1_NOKSYMS != 0) try bw.writeAll(" NOKSYMS");
if (value & elf.DF_1_NOHDR != 0) try bw.writeAll(" NOHDR");
if (value & elf.DF_1_EDITED != 0) try bw.writeAll(" EDITED");
if (value & elf.DF_1_NORELOC != 0) try bw.writeAll(" NORELOC");
if (value & elf.DF_1_SYMINTPOSE != 0) try bw.writeAll(" SYMINTPOSE");
if (value & elf.DF_1_GLOBAUDIT != 0) try bw.writeAll(" GLOBAUDIT");
if (value & elf.DF_1_SINGLETON != 0) try bw.writeAll(" SINGLETON");
if (value & elf.DF_1_STUB != 0) try bw.writeAll(" STUB");
if (value & elf.DF_1_PIE != 0) try bw.writeAll(" PIE");
},
else => try bw.print(" {x}", .{value}),
}
try bw.writeByte('\n');
}
}
fn dumpSymtab(ctx: ObjectContext, comptime @"type": enum { symtab, dysymtab }, bw: *Writer) !void {
const symtab = switch (@"type") {
.symtab => ctx.symtab,
.dysymtab => ctx.dysymtab,
};
try bw.writeAll(switch (@"type") {
.symtab => symtab_label,
.dysymtab => dynamic_symtab_label,
} ++ "\n");
for (symtab.symbols, 0..) |sym, index| {
try bw.print("{x} {x}", .{ sym.st_value, sym.st_size });
{
if (elf.SHN_LORESERVE <= sym.st_shndx and sym.st_shndx < elf.SHN_HIRESERVE) {
if (elf.SHN_LOPROC <= sym.st_shndx and sym.st_shndx < elf.SHN_HIPROC) {
try bw.print(" LO+{d}", .{sym.st_shndx - elf.SHN_LOPROC});
} else {
const sym_ndx = switch (sym.st_shndx) {
elf.SHN_ABS => "ABS",
elf.SHN_COMMON => "COM",
elf.SHN_LIVEPATCH => "LIV",
else => "UNK",
};
try bw.print(" {s}", .{sym_ndx});
}
} else if (sym.st_shndx == elf.SHN_UNDEF) {
try bw.writeAll(" UND");
} else {
try bw.print(" {x}", .{sym.st_shndx});
}
}
blk: {
const tt = sym.st_type();
const sym_type = switch (tt) {
elf.STT_NOTYPE => "NOTYPE",
elf.STT_OBJECT => "OBJECT",
elf.STT_FUNC => "FUNC",
elf.STT_SECTION => "SECTION",
elf.STT_FILE => "FILE",
elf.STT_COMMON => "COMMON",
elf.STT_TLS => "TLS",
elf.STT_NUM => "NUM",
elf.STT_GNU_IFUNC => "IFUNC",
else => if (elf.STT_LOPROC <= tt and tt < elf.STT_HIPROC) {
break :blk try bw.print(" LOPROC+{d}", .{tt - elf.STT_LOPROC});
} else if (elf.STT_LOOS <= tt and tt < elf.STT_HIOS) {
break :blk try bw.print(" LOOS+{d}", .{tt - elf.STT_LOOS});
} else "UNK",
};
try bw.print(" {s}", .{sym_type});
}
blk: {
const bind = sym.st_bind();
const sym_bind = switch (bind) {
elf.STB_LOCAL => "LOCAL",
elf.STB_GLOBAL => "GLOBAL",
elf.STB_WEAK => "WEAK",
elf.STB_NUM => "NUM",
else => if (elf.STB_LOPROC <= bind and bind < elf.STB_HIPROC) {
break :blk try bw.print(" LOPROC+{d}", .{bind - elf.STB_LOPROC});
} else if (elf.STB_LOOS <= bind and bind < elf.STB_HIOS) {
break :blk try bw.print(" LOOS+{d}", .{bind - elf.STB_LOOS});
} else "UNKNOWN",
};
try bw.print(" {s}", .{sym_bind});
}
const sym_vis = @as(elf.STV, @enumFromInt(@as(u2, @truncate(sym.st_other))));
try bw.print(" {s}", .{@tagName(sym_vis)});
const sym_name = switch (sym.st_type()) {
elf.STT_SECTION => ctx.getSectionName(sym.st_shndx),
else => symtab.getName(index).?,
};
try bw.print(" {s}\n", .{sym_name});
}
}
fn dumpSection(ctx: ObjectContext, shndx: usize, bw: *Writer) !void {
const data = ctx.getSectionContents(shndx);
try bw.print("{s}", .{data});
}
inline fn getSectionName(ctx: ObjectContext, shndx: usize) []const u8 {
const shdr = ctx.shdrs[shndx];
return getString(ctx.shstrtab, shdr.sh_name);
}
fn getSectionContents(ctx: ObjectContext, shndx: usize) []const u8 {
const shdr = ctx.shdrs[shndx];
assert(shdr.sh_offset < ctx.data.len);
assert(shdr.sh_offset + shdr.sh_size <= ctx.data.len);
return ctx.data[shdr.sh_offset..][0..shdr.sh_size];
}
fn getSectionByName(ctx: ObjectContext, name: []const u8) ?usize {
for (0..ctx.shdrs.len) |shndx| {
if (mem.eql(u8, ctx.getSectionName(shndx), name)) return shndx;
} else return null;
}
};
const Symtab = struct {
symbols: []align(1) const elf.Elf64_Sym = &[0]elf.Elf64_Sym{},
strings: []const u8 = &[0]u8{},
fn get(st: Symtab, index: usize) ?elf.Elf64_Sym {
if (index >= st.symbols.len) return null;
return st.symbols[index];
}
fn getName(st: Symtab, index: usize) ?[]const u8 {
const sym = st.get(index) orelse return null;
return getString(st.strings, sym.st_name);
}
};
fn getString(strtab: []const u8, off: u32) []const u8 {
const str = strtab[off..];
return str[0..std.mem.indexOfScalar(u8, str, 0).?];
}
fn fmtShType(sh_type: u32) std.fmt.Formatter(u32, formatShType) {
return .{ .data = sh_type };
}
fn formatShType(sh_type: u32, w: *Writer) Writer.Error!void {
const name = switch (sh_type) {
elf.SHT_NULL => "NULL",
elf.SHT_PROGBITS => "PROGBITS",
elf.SHT_SYMTAB => "SYMTAB",
elf.SHT_STRTAB => "STRTAB",
elf.SHT_RELA => "RELA",
elf.SHT_HASH => "HASH",
elf.SHT_DYNAMIC => "DYNAMIC",
elf.SHT_NOTE => "NOTE",
elf.SHT_NOBITS => "NOBITS",
elf.SHT_REL => "REL",
elf.SHT_SHLIB => "SHLIB",
elf.SHT_DYNSYM => "DYNSYM",
elf.SHT_INIT_ARRAY => "INIT_ARRAY",
elf.SHT_FINI_ARRAY => "FINI_ARRAY",
elf.SHT_PREINIT_ARRAY => "PREINIT_ARRAY",
elf.SHT_GROUP => "GROUP",
elf.SHT_SYMTAB_SHNDX => "SYMTAB_SHNDX",
elf.SHT_X86_64_UNWIND => "X86_64_UNWIND",
elf.SHT_LLVM_ADDRSIG => "LLVM_ADDRSIG",
elf.SHT_GNU_HASH => "GNU_HASH",
elf.SHT_GNU_VERDEF => "VERDEF",
elf.SHT_GNU_VERNEED => "VERNEED",
elf.SHT_GNU_VERSYM => "VERSYM",
else => if (elf.SHT_LOOS <= sh_type and sh_type < elf.SHT_HIOS) {
return try w.print("LOOS+0x{x}", .{sh_type - elf.SHT_LOOS});
} else if (elf.SHT_LOPROC <= sh_type and sh_type < elf.SHT_HIPROC) {
return try w.print("LOPROC+0x{x}", .{sh_type - elf.SHT_LOPROC});
} else if (elf.SHT_LOUSER <= sh_type and sh_type < elf.SHT_HIUSER) {
return try w.print("LOUSER+0x{x}", .{sh_type - elf.SHT_LOUSER});
} else "UNKNOWN",
};
try w.writeAll(name);
}
fn fmtPhType(ph_type: u32) std.fmt.Formatter(u32, formatPhType) {
return .{ .data = ph_type };
}
fn formatPhType(ph_type: u32, w: *Writer) Writer.Error!void {
const p_type = switch (ph_type) {
elf.PT_NULL => "NULL",
elf.PT_LOAD => "LOAD",
elf.PT_DYNAMIC => "DYNAMIC",
elf.PT_INTERP => "INTERP",
elf.PT_NOTE => "NOTE",
elf.PT_SHLIB => "SHLIB",
elf.PT_PHDR => "PHDR",
elf.PT_TLS => "TLS",
elf.PT_NUM => "NUM",
elf.PT_GNU_EH_FRAME => "GNU_EH_FRAME",
elf.PT_GNU_STACK => "GNU_STACK",
elf.PT_GNU_RELRO => "GNU_RELRO",
else => if (elf.PT_LOOS <= ph_type and ph_type < elf.PT_HIOS) {
return try w.print("LOOS+0x{x}", .{ph_type - elf.PT_LOOS});
} else if (elf.PT_LOPROC <= ph_type and ph_type < elf.PT_HIPROC) {
return try w.print("LOPROC+0x{x}", .{ph_type - elf.PT_LOPROC});
} else "UNKNOWN",
};
try w.writeAll(p_type);
}
};
const WasmDumper = struct {
const symtab_label = "symbols";
fn parseAndDump(step: *Step, check: Check, bytes: []const u8) ![]const u8 {
const gpa = step.owner.allocator;
var br: std.io.Reader = .fixed(bytes);
const buf = try br.takeArray(8);
if (!mem.eql(u8, buf[0..4], &std.wasm.magic)) return error.InvalidMagicByte;
if (!mem.eql(u8, buf[4..8], &std.wasm.version)) return error.UnsupportedWasmVersion;
var aw: std.io.Writer.Allocating = .init(gpa);
defer aw.deinit();
const bw = &aw.writer;
parseAndDumpInner(step, check, &br, bw) catch |err| switch (err) {
error.EndOfStream => try bw.writeAll("\n<UnexpectedEndOfStream>"),
else => |e| return e,
};
return aw.toOwnedSlice();
}
fn parseAndDumpInner(
step: *Step,
check: Check,
br: *std.io.Reader,
bw: *Writer,
) !void {
var section_br: std.io.Reader = undefined;
switch (check.kind) {
.headers => while (br.takeEnum(std.wasm.Section, .little)) |section| {
section_br = .fixed(try br.take(try br.takeLeb128(u32)));
try parseAndDumpSection(step, section, &section_br, bw);
} else |err| switch (err) {
error.InvalidEnumTag => return step.fail("invalid section id", .{}),
error.EndOfStream => {},
else => |e| return e,
},
else => return step.fail("invalid check kind for Wasm file format: {s}", .{@tagName(check.kind)}),
}
}
fn parseAndDumpSection(
step: *Step,
section: std.wasm.Section,
br: *std.io.Reader,
bw: *Writer,
) !void {
try bw.print(
\\Section {s}
\\size {d}
, .{ @tagName(section), br.buffer.len });
switch (section) {
.type,
.import,
.function,
.table,
.memory,
.global,
.@"export",
.element,
.code,
.data,
=> {
const entries = try br.takeLeb128(u32);
try bw.print("\nentries {d}\n", .{entries});
try parseSection(step, section, br, entries, bw);
},
.custom => {
const name = try br.take(try br.takeLeb128(u32));
try bw.print("\nname {s}\n", .{name});
if (mem.eql(u8, name, "name")) {
try parseDumpNames(step, br, bw);
} else if (mem.eql(u8, name, "producers")) {
try parseDumpProducers(br, bw);
} else if (mem.eql(u8, name, "target_features")) {
try parseDumpFeatures(br, bw);
}
// TODO: Implement parsing and dumping other custom sections (such as relocations)
},
.start => {
const start = try br.takeLeb128(u32);
try bw.print("\nstart {d}\n", .{start});
},
.data_count => {
const count = try br.takeLeb128(u32);
try bw.print("\ncount {d}\n", .{count});
},
else => {}, // skip unknown sections
}
}
fn parseSection(step: *Step, section: std.wasm.Section, br: *std.io.Reader, entries: u32, bw: *Writer) !void {
switch (section) {
.type => {
var i: u32 = 0;
while (i < entries) : (i += 1) {
const func_type = try br.takeByte();
if (func_type != std.wasm.function_type) {
return step.fail("expected function type, found byte '{d}'", .{func_type});
}
const params = try br.takeLeb128(u32);
try bw.print("params {d}\n", .{params});
var index: u32 = 0;
while (index < params) : (index += 1) {
_ = try parseDumpType(step, std.wasm.Valtype, br, bw);
} else index = 0;
const returns = try br.takeLeb128(u32);
try bw.print("returns {d}\n", .{returns});
while (index < returns) : (index += 1) {
_ = try parseDumpType(step, std.wasm.Valtype, br, bw);
}
}
},
.import => {
var i: u32 = 0;
while (i < entries) : (i += 1) {
const module_name = try br.take(try br.takeLeb128(u32));
const name = try br.take(try br.takeLeb128(u32));
const kind = br.takeEnum(std.wasm.ExternalKind, .little) catch |err| switch (err) {
error.InvalidEnumTag => return step.fail("invalid import kind", .{}),
else => |e| return e,
};
try bw.print(
\\module {s}
\\name {s}
\\kind {s}
, .{ module_name, name, @tagName(kind) });
try bw.writeByte('\n');
switch (kind) {
.function => try bw.print("index {d}\n", .{try br.takeLeb128(u32)}),
.memory => try parseDumpLimits(br, bw),
.global => {
_ = try parseDumpType(step, std.wasm.Valtype, br, bw);
try bw.print("mutable {}\n", .{0x01 == try br.takeLeb128(u32)});
},
.table => {
_ = try parseDumpType(step, std.wasm.RefType, br, bw);
try parseDumpLimits(br, bw);
},
}
}
},
.function => {
var i: u32 = 0;
while (i < entries) : (i += 1) {
try bw.print("index {d}\n", .{try br.takeLeb128(u32)});
}
},
.table => {
var i: u32 = 0;
while (i < entries) : (i += 1) {
_ = try parseDumpType(step, std.wasm.RefType, br, bw);
try parseDumpLimits(br, bw);
}
},
.memory => {
var i: u32 = 0;
while (i < entries) : (i += 1) {
try parseDumpLimits(br, bw);
}
},
.global => {
var i: u32 = 0;
while (i < entries) : (i += 1) {
_ = try parseDumpType(step, std.wasm.Valtype, br, bw);
try bw.print("mutable {}\n", .{0x01 == try br.takeLeb128(u1)});
try parseDumpInit(step, br, bw);
}
},
.@"export" => {
var i: u32 = 0;
while (i < entries) : (i += 1) {
const name = try br.take(try br.takeLeb128(u32));
const kind = br.takeEnum(std.wasm.ExternalKind, .little) catch |err| switch (err) {
error.InvalidEnumTag => return step.fail("invalid export kind value", .{}),
else => |e| return e,
};
const index = try br.takeLeb128(u32);
try bw.print(
\\name {s}
\\kind {s}
\\index {d}
, .{ name, @tagName(kind), index });
try bw.writeByte('\n');
}
},
.element => {
var i: u32 = 0;
while (i < entries) : (i += 1) {
try bw.print("table index {d}\n", .{try br.takeLeb128(u32)});
try parseDumpInit(step, br, bw);
const function_indexes = try br.takeLeb128(u32);
var function_index: u32 = 0;
try bw.print("indexes {d}\n", .{function_indexes});
while (function_index < function_indexes) : (function_index += 1) {
try bw.print("index {d}\n", .{try br.takeLeb128(u32)});
}
}
},
.code => {}, // code section is considered opaque to linker
.data => {
var i: u32 = 0;
while (i < entries) : (i += 1) {
const flags: packed struct(u32) {
passive: bool,
memidx: bool,
unused: u30,
} = @bitCast(try br.takeLeb128(u32));
const index = if (flags.memidx) try br.takeLeb128(u32) else 0;
try bw.print("memory index 0x{x}\n", .{index});
if (!flags.passive) try parseDumpInit(step, br, bw);
const size = try br.takeLeb128(u32);
try bw.print("size {d}\n", .{size});
_ = try br.discard(.limited(size)); // we do not care about the content of the segments
}
},
else => unreachable,
}
}
fn parseDumpType(step: *Step, comptime E: type, br: *std.io.Reader, bw: *Writer) !E {
const tag = br.takeEnum(E, .little) catch |err| switch (err) {
error.InvalidEnumTag => return step.fail("invalid wasm type value", .{}),
else => |e| return e,
};
try bw.print("type {s}\n", .{@tagName(tag)});
return tag;
}
fn parseDumpLimits(br: *std.io.Reader, bw: *Writer) !void {
const flags = try br.takeLeb128(u8);
const min = try br.takeLeb128(u32);
try bw.print("min {x}\n", .{min});
if (flags != 0) try bw.print("max {x}\n", .{try br.takeLeb128(u32)});
}
fn parseDumpInit(step: *Step, br: *std.io.Reader, bw: *Writer) !void {
const opcode = br.takeEnum(std.wasm.Opcode, .little) catch |err| switch (err) {
error.InvalidEnumTag => return step.fail("invalid wasm opcode", .{}),
else => |e| return e,
};
switch (opcode) {
.i32_const => try bw.print("i32.const {x}\n", .{try br.takeLeb128(i32)}),
.i64_const => try bw.print("i64.const {x}\n", .{try br.takeLeb128(i64)}),
.f32_const => try bw.print("f32.const {x}\n", .{@as(f32, @bitCast(try br.takeInt(u32, .little)))}),
.f64_const => try bw.print("f64.const {x}\n", .{@as(f64, @bitCast(try br.takeInt(u64, .little)))}),
.global_get => try bw.print("global.get {x}\n", .{try br.takeLeb128(u32)}),
else => unreachable,
}
const end_opcode = try br.takeLeb128(u8);
if (end_opcode != @intFromEnum(std.wasm.Opcode.end)) {
return step.fail("expected 'end' opcode in init expression", .{});
}
}
/// https://webassembly.github.io/spec/core/appendix/custom.html
fn parseDumpNames(step: *Step, br: *std.io.Reader, bw: *Writer) !void {
var subsection_br: std.io.Reader = undefined;
while (br.seek < br.buffer.len) {
switch (try parseDumpType(step, std.wasm.NameSubsection, br, bw)) {
// The module name subsection ... consists of a single name
// that is assigned to the module itself.
.module => {
subsection_br = .fixed(try br.take(try br.takeLeb128(u32)));
const name = try subsection_br.take(try subsection_br.takeLeb128(u32));
try bw.print(
\\name {s}
\\
, .{name});
if (subsection_br.seek != subsection_br.buffer.len) return error.BadSubsectionSize;
},
// The function name subsection ... consists of a name map
// assigning function names to function indices.
.function, .global, .data_segment => {
subsection_br = .fixed(try br.take(try br.takeLeb128(u32)));
const entries = try br.takeLeb128(u32);
try bw.print(
\\names {d}
\\
, .{entries});
for (0..entries) |_| {
const index = try br.takeLeb128(u32);
const name = try br.take(try br.takeLeb128(u32));
try bw.print(
\\index {d}
\\name {s}
\\
, .{ index, name });
}
if (subsection_br.seek != subsection_br.buffer.len) return error.BadSubsectionSize;
},
// The local name subsection ... consists of an indirect name
// map assigning local names to local indices grouped by
// function indices.
.local => {
return step.fail("TODO implement parseDumpNames for local subsections", .{});
},
else => |t| return step.fail("invalid subsection type: {s}", .{@tagName(t)}),
}
}
}
fn parseDumpProducers(br: *std.io.Reader, bw: *Writer) !void {
const field_count = try br.takeLeb128(u32);
try bw.print(
\\fields {d}
\\
, .{field_count});
var current_field: u32 = 0;
while (current_field < field_count) : (current_field += 1) {
const field_name = try br.take(try br.takeLeb128(u32));
const value_count = try br.takeLeb128(u32);
try bw.print(
\\field_name {s}
\\values {d}
\\
, .{ field_name, value_count });
var current_value: u32 = 0;
while (current_value < value_count) : (current_value += 1) {
const value = try br.take(try br.takeLeb128(u32));
const version = try br.take(try br.takeLeb128(u32));
try bw.print(
\\value_name {s}
\\version {s}
\\
, .{ value, version });
}
}
}
fn parseDumpFeatures(br: *std.io.Reader, bw: *Writer) !void {
const feature_count = try br.takeLeb128(u32);
try bw.print(
\\features {d}
\\
, .{feature_count});
var index: u32 = 0;
while (index < feature_count) : (index += 1) {
const prefix_byte = try br.takeLeb128(u8);
const feature_name = try br.take(try br.takeLeb128(u32));
try bw.print(
\\{c} {s}
\\
, .{ prefix_byte, feature_name });
}
}
};