zig/lib/std/Build/Step/CheckObject.zig

const std = @import("std");
const assert = std.debug.assert;
const elf = std.elf;
const fs = std.fs;
const macho = std.macho;
const math = std.math;
const mem = std.mem;
const testing = std.testing;

const CheckObject = @This();

const Allocator = mem.Allocator;
const Step = std.Build.Step;

pub const base_id = .check_object;

step: Step,
source: std.Build.FileSource,
max_bytes: usize = 20 * 1024 * 1024,
checks: std.ArrayList(Check),
dump_symtab: bool = false,
obj_format: std.Target.ObjectFormat,

pub fn create(
    owner: *std.Build,
    source: std.Build.FileSource,
    obj_format: std.Target.ObjectFormat,
) *CheckObject {
    const gpa = owner.allocator;
    const self = gpa.create(CheckObject) catch @panic("OOM");
    self.* = .{
        .step = Step.init(.{
            .id = .check_file,
            .name = "CheckObject",
            .owner = owner,
            .makeFn = make,
        }),
        .source = source.dupe(owner),
        .checks = std.ArrayList(Check).init(gpa),
        .obj_format = obj_format,
    };
    self.source.addStepDependencies(&self.step);
    return self;
}

const SearchPhrase = struct {
    string: []const u8,
    file_source: ?std.Build.FileSource = null,

    fn resolve(phrase: SearchPhrase, b: *std.Build, step: *Step) []const u8 {
        const file_source = phrase.file_source orelse return phrase.string;
        return b.fmt("{s} {s}", .{ phrase.string, file_source.getPath2(b, step) });
    }
};

/// There two types of actions currently supported:
/// * `.match` - is the main building block of standard matchers with optional eat-all token `{*}`
/// and extractors by name such as `{n_value}`. Please note this action is very simplistic in nature
/// i.e., it won't really handle edge cases/nontrivial examples. But given that we do want to use
/// it mainly to test the output of our object format parser-dumpers when testing the linkers, etc.
/// it should be plenty useful in its current form.
/// * `.compute_cmp` - can be used to perform an operation on the extracted global variables
/// using the MatchAction. It currently only supports an addition. The operation is required
/// to be specified in Reverse Polish Notation to ease in operator-precedence parsing (well,
/// to avoid any parsing really).
/// For example, if the two extracted values were saved as `vmaddr` and `entryoff` respectively
/// they could then be added with this simple program `vmaddr entryoff +`.
const Action = struct {
    tag: enum { match, not_present, compute_cmp },
    phrase: SearchPhrase,
    expected: ?ComputeCompareExpected = null,

    /// Will return true if the `phrase` was found in the `haystack`.
    /// Some examples include:
    ///
    /// LC 0                     => will match in its entirety
    /// vmaddr {vmaddr}          => will match `vmaddr` and then extract the following value as u64
    ///                             and save under `vmaddr` global name (see `global_vars` param)
    /// name {*}libobjc{*}.dylib => will match `name` followed by a token which contains `libobjc` and `.dylib`
    ///                             in that order with other letters in between
    fn match(
        act: Action,
        b: *std.Build,
        step: *Step,
        haystack: []const u8,
        global_vars: anytype,
    ) !bool {
        assert(act.tag == .match or act.tag == .not_present);
        const phrase = act.phrase.resolve(b, step);
        var candidate_var: ?struct { name: []const u8, value: u64 } = null;
        var hay_it = mem.tokenizeScalar(u8, mem.trim(u8, haystack, " "), ' ');
        var needle_it = mem.tokenizeScalar(u8, mem.trim(u8, phrase, " "), ' ');

        while (needle_it.next()) |needle_tok| {
            const hay_tok = hay_it.next() orelse return false;

            if (mem.indexOf(u8, needle_tok, "{*}")) |index| {
                // We have fuzzy matchers within the search pattern, so we match substrings.
                var start = index;
                var n_tok = needle_tok;
                var h_tok = hay_tok;
                while (true) {
                    n_tok = n_tok[start + 3 ..];
                    const inner = if (mem.indexOf(u8, n_tok, "{*}")) |sub_end|
                        n_tok[0..sub_end]
                    else
                        n_tok;
                    if (mem.indexOf(u8, h_tok, inner) == null) return false;
                    start = mem.indexOf(u8, n_tok, "{*}") orelse break;
                }
            } else if (mem.startsWith(u8, needle_tok, "{")) {
                const closing_brace = mem.indexOf(u8, needle_tok, "}") orelse return error.MissingClosingBrace;
                if (closing_brace != needle_tok.len - 1) return error.ClosingBraceNotLast;

                const name = needle_tok[1..closing_brace];
                if (name.len == 0) return error.MissingBraceValue;
                const value = try std.fmt.parseInt(u64, hay_tok, 16);
                candidate_var = .{
                    .name = name,
                    .value = value,
                };
            } else {
                if (!mem.eql(u8, hay_tok, needle_tok)) return false;
            }
        }

        if (candidate_var) |v| {
            try global_vars.putNoClobber(v.name, v.value);
        }

        return true;
    }

    /// Will return true if the `phrase` is correctly parsed into an RPN program and
    /// its reduced, computed value compares using `op` with the expected value, either
    /// a literal or another extracted variable.
    fn computeCmp(act: Action, b: *std.Build, step: *Step, global_vars: anytype) !bool {
        const gpa = step.owner.allocator;
        const phrase = act.phrase.resolve(b, step);
        var op_stack = std.ArrayList(enum { add, sub, mod, mul }).init(gpa);
        var values = std.ArrayList(u64).init(gpa);

        var it = mem.tokenizeScalar(u8, phrase, ' ');
        while (it.next()) |next| {
            if (mem.eql(u8, next, "+")) {
                try op_stack.append(.add);
            } else if (mem.eql(u8, next, "-")) {
                try op_stack.append(.sub);
            } else if (mem.eql(u8, next, "%")) {
                try op_stack.append(.mod);
            } else if (mem.eql(u8, next, "*")) {
                try op_stack.append(.mul);
            } else {
                const val = std.fmt.parseInt(u64, next, 0) catch blk: {
                    break :blk global_vars.get(next) orelse {
                        try step.addError(
                            \\
                            \\========= variable was not extracted: ===========
                            \\{s}
                            \\=================================================
                        , .{next});
                        return error.UnknownVariable;
                    };
                };
                try values.append(val);
            }
        }

        var op_i: usize = 1;
        var reduced: u64 = values.items[0];
        for (op_stack.items) |op| {
            const other = values.items[op_i];
            switch (op) {
                .add => {
                    reduced += other;
                },
                .sub => {
                    reduced -= other;
                },
                .mod => {
                    reduced %= other;
                },
                .mul => {
                    reduced *= other;
                },
            }
            op_i += 1;
        }

        const exp_value = switch (act.expected.?.value) {
            .variable => |name| global_vars.get(name) orelse {
                try step.addError(
                    \\
                    \\========= variable was not extracted: ===========
                    \\{s}
                    \\=================================================
                , .{name});
                return error.UnknownVariable;
            },
            .literal => |x| x,
        };
        return math.compare(reduced, act.expected.?.op, exp_value);
    }
};

const ComputeCompareExpected = struct {
    op: math.CompareOperator,
    value: union(enum) {
        variable: []const u8,
        literal: u64,
    },

    pub fn format(
        value: @This(),
        comptime fmt: []const u8,
        options: std.fmt.FormatOptions,
        writer: anytype,
    ) !void {
        if (fmt.len != 0) std.fmt.invalidFmtError(fmt, value);
        _ = options;
        try writer.print("{s} ", .{@tagName(value.op)});
        switch (value.value) {
            .variable => |name| try writer.writeAll(name),
            .literal => |x| try writer.print("{x}", .{x}),
        }
    }
};

const Check = struct {
    actions: std.ArrayList(Action),

    fn create(allocator: Allocator) Check {
        return .{
            .actions = std.ArrayList(Action).init(allocator),
        };
    }

    fn match(self: *Check, phrase: SearchPhrase) void {
        self.actions.append(.{
            .tag = .match,
            .phrase = phrase,
        }) catch @panic("OOM");
    }

    fn notPresent(self: *Check, phrase: SearchPhrase) void {
        self.actions.append(.{
            .tag = .not_present,
            .phrase = phrase,
        }) catch @panic("OOM");
    }

    fn computeCmp(self: *Check, phrase: SearchPhrase, expected: ComputeCompareExpected) void {
        self.actions.append(.{
            .tag = .compute_cmp,
            .phrase = phrase,
            .expected = expected,
        }) catch @panic("OOM");
    }
};

/// Creates a new sequence of actions with `phrase` as the first anchor searched phrase.
pub fn checkStart(self: *CheckObject, phrase: []const u8) void {
    var new_check = Check.create(self.step.owner.allocator);
    new_check.match(.{ .string = self.step.owner.dupe(phrase) });
    self.checks.append(new_check) catch @panic("OOM");
}

/// Adds another searched phrase to the latest created Check with `CheckObject.checkStart(...)`.
/// Asserts at least one check already exists.
pub fn checkNext(self: *CheckObject, phrase: []const u8) void {
    assert(self.checks.items.len > 0);
    const last = &self.checks.items[self.checks.items.len - 1];
    last.match(.{ .string = self.step.owner.dupe(phrase) });
}

/// Like `checkNext()` but takes an additional argument `FileSource` which will be
/// resolved to a full search query in `make()`.
pub fn checkNextFileSource(
    self: *CheckObject,
    phrase: []const u8,
    file_source: std.Build.FileSource,
) void {
    assert(self.checks.items.len > 0);
    const last = &self.checks.items[self.checks.items.len - 1];
    last.match(.{ .string = self.step.owner.dupe(phrase), .file_source = file_source });
}

/// Adds another searched phrase to the latest created Check with `CheckObject.checkStart(...)`
/// however ensures there is no matching phrase in the output.
/// Asserts at least one check already exists.
pub fn checkNotPresent(self: *CheckObject, phrase: []const u8) void {
    assert(self.checks.items.len > 0);
    const last = &self.checks.items[self.checks.items.len - 1];
    last.notPresent(.{ .string = self.step.owner.dupe(phrase) });
}

/// Creates a new check checking specifically symbol table parsed and dumped from the object
/// file.
/// Issuing this check will force parsing and dumping of the symbol table.
pub fn checkInSymtab(self: *CheckObject) void {
    self.dump_symtab = true;
    const symtab_label = switch (self.obj_format) {
        .macho => MachODumper.symtab_label,
        else => @panic("TODO other parsers"),
    };
    self.checkStart(symtab_label);
}

/// Creates a new standalone, singular check which allows running simple binary operations
/// on the extracted variables. It will then compare the reduced program with the value of
/// the expected variable.
pub fn checkComputeCompare(
    self: *CheckObject,
    program: []const u8,
    expected: ComputeCompareExpected,
) void {
    var new_check = Check.create(self.step.owner.allocator);
    new_check.computeCmp(.{ .string = self.step.owner.dupe(program) }, expected);
    self.checks.append(new_check) catch @panic("OOM");
}

fn make(step: *Step, prog_node: *std.Progress.Node) !void {
    _ = prog_node;
    const b = step.owner;
    const gpa = b.allocator;
    const self = @fieldParentPtr(CheckObject, "step", step);

    const src_path = self.source.getPath(b);
    const contents = fs.cwd().readFileAllocOptions(
        gpa,
        src_path,
        self.max_bytes,
        null,
        @alignOf(u64),
        null,
    ) catch |err| return step.fail("unable to read '{s}': {s}", .{ src_path, @errorName(err) });

    const output = switch (self.obj_format) {
        .macho => try MachODumper.parseAndDump(step, contents, .{
            .dump_symtab = self.dump_symtab,
        }),
        .elf => try ElfDumper.parseAndDump(step, contents, .{
            .dump_symtab = self.dump_symtab,
        }),
        .coff => @panic("TODO coff parser"),
        .wasm => try WasmDumper.parseAndDump(step, contents, .{
            .dump_symtab = self.dump_symtab,
        }),
        else => unreachable,
    };

    var vars = std.StringHashMap(u64).init(gpa);

    for (self.checks.items) |chk| {
        var it = mem.tokenizeAny(u8, output, "\r\n");
        for (chk.actions.items) |act| {
            switch (act.tag) {
                .match => {
                    while (it.next()) |line| {
                        if (try act.match(b, step, line, &vars)) break;
                    } else {
                        return step.fail(
                            \\
                            \\========= expected to find: ==========================
                            \\{s}
                            \\========= but parsed file does not contain it: =======
                            \\{s}
                            \\======================================================
                        , .{ act.phrase.resolve(b, step), output });
                    }
                },
                .not_present => {
                    while (it.next()) |line| {
                        if (try act.match(b, step, line, &vars)) {
                            return step.fail(
                                \\
                                \\========= expected not to find: ===================
                                \\{s}
                                \\========= but parsed file does contain it: ========
                                \\{s}
                                \\===================================================
                            , .{ act.phrase.resolve(b, step), output });
                        }
                    }
                },
                .compute_cmp => {
                    const res = act.computeCmp(b, step, vars) catch |err| switch (err) {
                        error.UnknownVariable => {
                            return step.fail(
                                \\========= from parsed file: =====================
                                \\{s}
                                \\=================================================
                            , .{output});
                        },
                        else => |e| return e,
                    };
                    if (!res) {
                        return step.fail(
                            \\
                            \\========= comparison failed for action: ===========
                            \\{s} {}
                            \\========= from parsed file: =======================
                            \\{s}
                            \\===================================================
                        , .{ act.phrase.resolve(b, step), act.expected.?, output });
                    }
                },
            }
        }
    }
}

const Opts = struct {
    dump_symtab: bool = false,
};

const MachODumper = struct {
    const LoadCommandIterator = macho.LoadCommandIterator;
    const symtab_label = "symtab";

    fn parseAndDump(step: *Step, bytes: []align(@alignOf(u64)) const u8, opts: Opts) ![]const u8 {
        const gpa = step.owner.allocator;
        var stream = std.io.fixedBufferStream(bytes);
        const reader = stream.reader();

        const hdr = try reader.readStruct(macho.mach_header_64);
        if (hdr.magic != macho.MH_MAGIC_64) {
            return error.InvalidMagicNumber;
        }

        var output = std.ArrayList(u8).init(gpa);
        const writer = output.writer();

        var symtab: []const macho.nlist_64 = undefined;
        var strtab: []const u8 = undefined;
        var sections = std.ArrayList(macho.section_64).init(gpa);
        var imports = std.ArrayList([]const u8).init(gpa);

        var it = LoadCommandIterator{
            .ncmds = hdr.ncmds,
            .buffer = bytes[@sizeOf(macho.mach_header_64)..][0..hdr.sizeofcmds],
        };
        var i: usize = 0;
        while (it.next()) |cmd| {
            switch (cmd.cmd()) {
                .SEGMENT_64 => {
                    const seg = cmd.cast(macho.segment_command_64).?;
                    try sections.ensureUnusedCapacity(seg.nsects);
                    for (cmd.getSections()) |sect| {
                        sections.appendAssumeCapacity(sect);
                    }
                },
                .SYMTAB => if (opts.dump_symtab) {
                    const lc = cmd.cast(macho.symtab_command).?;
                    symtab = @as(
                        [*]const macho.nlist_64,
                        @ptrCast(@alignCast(&bytes[lc.symoff])),
                    )[0..lc.nsyms];
                    strtab = bytes[lc.stroff..][0..lc.strsize];
                },
                .LOAD_DYLIB,
                .LOAD_WEAK_DYLIB,
                .REEXPORT_DYLIB,
                => {
                    try imports.append(cmd.getDylibPathName());
                },
                else => {},
            }

            try dumpLoadCommand(cmd, i, writer);
            try writer.writeByte('\n');

            i += 1;
        }

        if (opts.dump_symtab) {
            try writer.print("{s}\n", .{symtab_label});
            for (symtab) |sym| {
                if (sym.stab()) continue;
                const sym_name = mem.sliceTo(@as([*:0]const u8, @ptrCast(strtab.ptr + sym.n_strx)), 0);
                if (sym.sect()) {
                    const sect = sections.items[sym.n_sect - 1];
                    try writer.print("{x} ({s},{s})", .{
                        sym.n_value,
                        sect.segName(),
                        sect.sectName(),
                    });
                    if (sym.ext()) {
                        try writer.writeAll(" external");
                    }
                    try writer.print(" {s}\n", .{sym_name});
                } else if (sym.undf()) {
                    const ordinal = @divTrunc(@as(i16, @bitCast(sym.n_desc)), macho.N_SYMBOL_RESOLVER);
                    const import_name = blk: {
                        if (ordinal <= 0) {
                            if (ordinal == macho.BIND_SPECIAL_DYLIB_SELF)
                                break :blk "self import";
                            if (ordinal == macho.BIND_SPECIAL_DYLIB_MAIN_EXECUTABLE)
                                break :blk "main executable";
                            if (ordinal == macho.BIND_SPECIAL_DYLIB_FLAT_LOOKUP)
                                break :blk "flat lookup";
                            unreachable;
                        }
                        const full_path = imports.items[@as(u16, @bitCast(ordinal)) - 1];
                        const basename = fs.path.basename(full_path);
                        assert(basename.len > 0);
                        const ext = mem.lastIndexOfScalar(u8, basename, '.') orelse basename.len;
                        break :blk basename[0..ext];
                    };
                    try writer.writeAll("(undefined)");
                    if (sym.weakRef()) {
                        try writer.writeAll(" weak");
                    }
                    if (sym.ext()) {
                        try writer.writeAll(" external");
                    }
                    try writer.print(" {s} (from {s})\n", .{
                        sym_name,
                        import_name,
                    });
                } else unreachable;
            }
        }

        return output.toOwnedSlice();
    }

    fn dumpLoadCommand(lc: macho.LoadCommandIterator.LoadCommand, index: usize, writer: anytype) !void {
        // print header first
        try writer.print(
            \\LC {d}
            \\cmd {s}
            \\cmdsize {d}
        , .{ index, @tagName(lc.cmd()), lc.cmdsize() });

        switch (lc.cmd()) {
            .SEGMENT_64 => {
                const seg = lc.cast(macho.segment_command_64).?;
                try writer.writeByte('\n');
                try writer.print(
                    \\segname {s}
                    \\vmaddr {x}
                    \\vmsize {x}
                    \\fileoff {x}
                    \\filesz {x}
                , .{
                    seg.segName(),
                    seg.vmaddr,
                    seg.vmsize,
                    seg.fileoff,
                    seg.filesize,
                });

                for (lc.getSections()) |sect| {
                    try writer.writeByte('\n');
                    try writer.print(
                        \\sectname {s}
                        \\addr {x}
                        \\size {x}
                        \\offset {x}
                        \\align {x}
                    , .{
                        sect.sectName(),
                        sect.addr,
                        sect.size,
                        sect.offset,
                        sect.@"align",
                    });
                }
            },

            .ID_DYLIB,
            .LOAD_DYLIB,
            .LOAD_WEAK_DYLIB,
            .REEXPORT_DYLIB,
            => {
                const dylib = lc.cast(macho.dylib_command).?;
                try writer.writeByte('\n');
                try writer.print(
                    \\name {s}
                    \\timestamp {d}
                    \\current version {x}
                    \\compatibility version {x}
                , .{
                    lc.getDylibPathName(),
                    dylib.dylib.timestamp,
                    dylib.dylib.current_version,
                    dylib.dylib.compatibility_version,
                });
            },

            .MAIN => {
                const main = lc.cast(macho.entry_point_command).?;
                try writer.writeByte('\n');
                try writer.print(
                    \\entryoff {x}
                    \\stacksize {x}
                , .{ main.entryoff, main.stacksize });
            },

            .RPATH => {
                try writer.writeByte('\n');
                try writer.print(
                    \\path {s}
                , .{
                    lc.getRpathPathName(),
                });
            },

            .UUID => {
                const uuid = lc.cast(macho.uuid_command).?;
                try writer.writeByte('\n');
                try writer.print("uuid {x}", .{std.fmt.fmtSliceHexLower(&uuid.uuid)});
            },

            .DATA_IN_CODE,
            .FUNCTION_STARTS,
            .CODE_SIGNATURE,
            => {
                const llc = lc.cast(macho.linkedit_data_command).?;
                try writer.writeByte('\n');
                try writer.print(
                    \\dataoff {x}
                    \\datasize {x}
                , .{ llc.dataoff, llc.datasize });
            },

            .DYLD_INFO_ONLY => {
                const dlc = lc.cast(macho.dyld_info_command).?;
                try writer.writeByte('\n');
                try writer.print(
                    \\rebaseoff {x}
                    \\rebasesize {x}
                    \\bindoff {x}
                    \\bindsize {x}
                    \\weakbindoff {x}
                    \\weakbindsize {x}
                    \\lazybindoff {x}
                    \\lazybindsize {x}
                    \\exportoff {x}
                    \\exportsize {x}
                , .{
                    dlc.rebase_off,
                    dlc.rebase_size,
                    dlc.bind_off,
                    dlc.bind_size,
                    dlc.weak_bind_off,
                    dlc.weak_bind_size,
                    dlc.lazy_bind_off,
                    dlc.lazy_bind_size,
                    dlc.export_off,
                    dlc.export_size,
                });
            },

            .SYMTAB => {
                const slc = lc.cast(macho.symtab_command).?;
                try writer.writeByte('\n');
                try writer.print(
                    \\symoff {x}
                    \\nsyms {x}
                    \\stroff {x}
                    \\strsize {x}
                , .{
                    slc.symoff,
                    slc.nsyms,
                    slc.stroff,
                    slc.strsize,
                });
            },

            .DYSYMTAB => {
                const dlc = lc.cast(macho.dysymtab_command).?;
                try writer.writeByte('\n');
                try writer.print(
                    \\ilocalsym {x}
                    \\nlocalsym {x}
                    \\iextdefsym {x}
                    \\nextdefsym {x}
                    \\iundefsym {x}
                    \\nundefsym {x}
                    \\indirectsymoff {x}
                    \\nindirectsyms {x}
                , .{
                    dlc.ilocalsym,
                    dlc.nlocalsym,
                    dlc.iextdefsym,
                    dlc.nextdefsym,
                    dlc.iundefsym,
                    dlc.nundefsym,
                    dlc.indirectsymoff,
                    dlc.nindirectsyms,
                });
            },

            else => {},
        }
    }
};

const ElfDumper = struct {
    const symtab_label = "symtab";

    const Symtab = struct {
        symbols: []align(1) const elf.Elf64_Sym,
        strings: []const u8,

        fn get(st: Symtab, index: usize) ?elf.Elf64_Sym {
            if (index >= st.symbols.len) return null;
            return st.symbols[index];
        }

        fn getName(st: Symtab, index: usize) ?[]const u8 {
            const sym = st.get(index) orelse return null;
            assert(sym.st_name < st.strings.len);
            return mem.sliceTo(@ptrCast(st.strings.ptr + sym.st_name), 0);
        }
    };

    const Context = struct {
        gpa: Allocator,
        data: []const u8,
        hdr: elf.Elf64_Ehdr,
        shdrs: []align(1) const elf.Elf64_Shdr,
        phdrs: []align(1) const elf.Elf64_Phdr,
        shstrtab: []const u8,
        symtab: ?Symtab = null,
        dysymtab: ?Symtab = null,
    };

    fn parseAndDump(step: *Step, bytes: []const u8, opts: Opts) ![]const u8 {
        const gpa = step.owner.allocator;
        var stream = std.io.fixedBufferStream(bytes);
        const reader = stream.reader();

        const hdr = try reader.readStruct(elf.Elf64_Ehdr);
        if (!mem.eql(u8, hdr.e_ident[0..4], "\x7fELF")) {
            return error.InvalidMagicNumber;
        }

        const shdrs = @as([*]align(1) const elf.Elf64_Shdr, @ptrCast(bytes.ptr + hdr.e_shoff))[0..hdr.e_shnum];
        const phdrs = @as([*]align(1) const elf.Elf64_Phdr, @ptrCast(bytes.ptr + hdr.e_phoff))[0..hdr.e_phnum];

        var ctx = Context{
            .gpa = gpa,
            .data = bytes,
            .hdr = hdr,
            .shdrs = shdrs,
            .phdrs = phdrs,
            .shstrtab = undefined,
        };
        ctx.shstrtab = getSectionContents(ctx, ctx.hdr.e_shstrndx);

        if (opts.dump_symtab) {
            for (ctx.shdrs, 0..) |shdr, i| switch (shdr.sh_type) {
                elf.SHT_SYMTAB, elf.SHT_DYNSYM => {
                    const raw = getSectionContents(ctx, i);
                    const nsyms = @divExact(raw.len, @sizeOf(elf.Elf64_Sym));
                    const symbols = @as([*]align(1) const elf.Elf64_Sym, @ptrCast(raw.ptr))[0..nsyms];
                    const strings = getSectionContents(ctx, shdr.sh_link);

                    switch (shdr.sh_type) {
                        elf.SHT_SYMTAB => {
                            ctx.symtab = .{
                                .symbols = symbols,
                                .strings = strings,
                            };
                        },
                        elf.SHT_DYNSYM => {
                            ctx.dysymtab = .{
                                .symbols = symbols,
                                .strings = strings,
                            };
                        },
                        else => unreachable,
                    }
                },

                else => {},
            };
        }

        var output = std.ArrayList(u8).init(gpa);
        const writer = output.writer();

        try dumpHeader(ctx, writer);
        try dumpShdrs(ctx, writer);
        try dumpPhdrs(ctx, writer);

        return output.toOwnedSlice();
    }

    fn getSectionName(ctx: Context, shndx: usize) []const u8 {
        const shdr = ctx.shdrs[shndx];
        assert(shdr.sh_name < ctx.shstrtab.len);
        return mem.sliceTo(@as([*:0]const u8, @ptrCast(ctx.shstrtab.ptr + shdr.sh_name)), 0);
    }

    fn getSectionContents(ctx: Context, shndx: usize) []const u8 {
        const shdr = ctx.shdrs[shndx];
        assert(shdr.sh_offset < ctx.data.len);
        assert(shdr.sh_offset + shdr.sh_size <= ctx.data.len);
        return ctx.data[shdr.sh_offset..][0..shdr.sh_size];
    }

    fn dumpHeader(ctx: Context, writer: anytype) !void {
        try writer.writeAll("header\n");
        try writer.print("type {s}\n", .{@tagName(ctx.hdr.e_type)});
        try writer.print("entry {x}\n", .{ctx.hdr.e_entry});
    }

    fn dumpShdrs(ctx: Context, writer: anytype) !void {
        if (ctx.shdrs.len == 0) return;

        for (ctx.shdrs, 0..) |shdr, shndx| {
            try writer.print("shdr {d}\n", .{shndx});
            try writer.print("name {s}\n", .{getSectionName(ctx, shndx)});
            try writer.print("type {s}\n", .{fmtShType(shdr.sh_type)});
            try writer.print("addr {x}\n", .{shdr.sh_addr});
            try writer.print("offset {x}\n", .{shdr.sh_offset});
            try writer.print("size {x}\n", .{shdr.sh_size});
            try writer.print("addralign {x}\n", .{shdr.sh_addralign});
            // TODO dump formatted sh_flags
        }
    }

    fn fmtShType(sh_type: u32) std.fmt.Formatter(formatShType) {
        return .{ .data = sh_type };
    }

    fn formatShType(
        sh_type: u32,
        comptime unused_fmt_string: []const u8,
        options: std.fmt.FormatOptions,
        writer: anytype,
    ) !void {
        _ = unused_fmt_string;
        _ = options;
        if (elf.SHT_LOOS <= sh_type and sh_type < elf.SHT_HIOS) {
            try writer.print("LOOS+0x{x}", .{sh_type - elf.SHT_LOOS});
        } else if (elf.SHT_LOPROC <= sh_type and sh_type < elf.SHT_HIPROC) {
            try writer.print("LOPROC+0x{x}", .{sh_type - elf.SHT_LOPROC});
        } else if (elf.SHT_LOUSER <= sh_type and sh_type < elf.SHT_HIUSER) {
            try writer.print("LOUSER+0x{x}", .{sh_type - elf.SHT_LOUSER});
        } else {
            const name = switch (sh_type) {
                elf.SHT_NULL => "NULL",
                elf.SHT_PROGBITS => "PROGBITS",
                elf.SHT_SYMTAB => "SYMTAB",
                elf.SHT_STRTAB => "STRTAB",
                elf.SHT_RELA => "RELA",
                elf.SHT_HASH => "HASH",
                elf.SHT_DYNAMIC => "DYNAMIC",
                elf.SHT_NOTE => "NOTE",
                elf.SHT_NOBITS => "NOBITS",
                elf.SHT_REL => "REL",
                elf.SHT_SHLIB => "SHLIB",
                elf.SHT_DYNSYM => "DYNSYM",
                elf.SHT_INIT_ARRAY => "INIT_ARRAY",
                elf.SHT_FINI_ARRAY => "FINI_ARRAY",
                elf.SHT_PREINIT_ARRAY => "PREINIT_ARRAY",
                elf.SHT_GROUP => "GROUP",
                elf.SHT_SYMTAB_SHNDX => "SYMTAB_SHNDX",
                elf.SHT_X86_64_UNWIND => "X86_64_UNWIND",
                elf.SHT_LLVM_ADDRSIG => "LLVM_ADDRSIG",
                elf.SHT_GNU_HASH => "GNU_HASH",
                elf.SHT_GNU_VERDEF => "VERDEF",
                elf.SHT_GNU_VERNEED => "VERNEED",
                elf.SHT_GNU_VERSYM => "VERSYM",
                else => "UNKNOWN",
            };
            try writer.writeAll(name);
        }
    }

    fn dumpPhdrs(ctx: Context, writer: anytype) !void {
        if (ctx.phdrs.len == 0) return;

        for (ctx.phdrs, 0..) |phdr, phndx| {
            try writer.print("phdr {d}\n", .{phndx});
            try writer.print("type {s}\n", .{fmtPhType(phdr.p_type)});
            try writer.print("vaddr {x}\n", .{phdr.p_vaddr});
            try writer.print("paddr {x}\n", .{phdr.p_paddr});
            try writer.print("offset {x}\n", .{phdr.p_offset});
            try writer.print("memsz {x}\n", .{phdr.p_memsz});
            try writer.print("filesz {x}\n", .{phdr.p_filesz});
            try writer.print("align {x}\n", .{phdr.p_align});
            // TODO dump formatted p_flags
        }
    }

    fn fmtPhType(ph_type: u32) std.fmt.Formatter(formatPhType) {
        return .{ .data = ph_type };
    }

    fn formatPhType(
        ph_type: u32,
        comptime unused_fmt_string: []const u8,
        options: std.fmt.FormatOptions,
        writer: anytype,
    ) !void {
        _ = unused_fmt_string;
        _ = options;
        if (elf.PT_LOOS <= ph_type and ph_type < elf.PT_HIOS) {
            try writer.print("LOOS+0x{x}", .{ph_type - elf.PT_LOOS});
        } else if (elf.PT_LOPROC <= ph_type and ph_type < elf.PT_HIPROC) {
            try writer.print("LOPROC+0x{x}", .{ph_type - elf.PT_LOPROC});
        } else {
            const p_type = switch (ph_type) {
                elf.PT_NULL => "NULL",
                elf.PT_LOAD => "LOAD",
                elf.PT_DYNAMIC => "DYNAMIC",
                elf.PT_INTERP => "INTERP",
                elf.PT_NOTE => "NOTE",
                elf.PT_SHLIB => "SHLIB",
                elf.PT_PHDR => "PHDR",
                elf.PT_TLS => "TLS",
                elf.PT_NUM => "NUM",
                elf.PT_GNU_EH_FRAME => "GNU_EH_FRAME",
                elf.PT_GNU_STACK => "GNU_STACK",
                elf.PT_GNU_RELRO => "GNU_RELRO",
                else => "UNKNOWN",
            };
            try writer.writeAll(p_type);
        }
    }
};

const WasmDumper = struct {
    const symtab_label = "symbols";

    fn parseAndDump(step: *Step, bytes: []const u8, opts: Opts) ![]const u8 {
        const gpa = step.owner.allocator;
        if (opts.dump_symtab) {
            @panic("TODO: Implement symbol table parsing and dumping");
        }

        var fbs = std.io.fixedBufferStream(bytes);
        const reader = fbs.reader();

        const buf = try reader.readBytesNoEof(8);
        if (!mem.eql(u8, buf[0..4], &std.wasm.magic)) {
            return error.InvalidMagicByte;
        }
        if (!mem.eql(u8, buf[4..], &std.wasm.version)) {
            return error.UnsupportedWasmVersion;
        }

        var output = std.ArrayList(u8).init(gpa);
        errdefer output.deinit();
        const writer = output.writer();

        while (reader.readByte()) |current_byte| {
            const section = std.meta.intToEnum(std.wasm.Section, current_byte) catch {
                return step.fail("Found invalid section id '{d}'", .{current_byte});
            };

            const section_length = try std.leb.readULEB128(u32, reader);
            try parseAndDumpSection(step, section, bytes[fbs.pos..][0..section_length], writer);
            fbs.pos += section_length;
        } else |_| {} // reached end of stream

        return output.toOwnedSlice();
    }

    fn parseAndDumpSection(
        step: *Step,
        section: std.wasm.Section,
        data: []const u8,
        writer: anytype,
    ) !void {
        var fbs = std.io.fixedBufferStream(data);
        const reader = fbs.reader();

        try writer.print(
            \\Section {s}
            \\size {d}
        , .{ @tagName(section), data.len });

        switch (section) {
            .type,
            .import,
            .function,
            .table,
            .memory,
            .global,
            .@"export",
            .element,
            .code,
            .data,
            => {
                const entries = try std.leb.readULEB128(u32, reader);
                try writer.print("\nentries {d}\n", .{entries});
                try dumpSection(step, section, data[fbs.pos..], entries, writer);
            },
            .custom => {
                const name_length = try std.leb.readULEB128(u32, reader);
                const name = data[fbs.pos..][0..name_length];
                fbs.pos += name_length;
                try writer.print("\nname {s}\n", .{name});

                if (mem.eql(u8, name, "name")) {
                    try parseDumpNames(step, reader, writer, data);
                } else if (mem.eql(u8, name, "producers")) {
                    try parseDumpProducers(reader, writer, data);
                } else if (mem.eql(u8, name, "target_features")) {
                    try parseDumpFeatures(reader, writer, data);
                }
                // TODO: Implement parsing and dumping other custom sections (such as relocations)
            },
            .start => {
                const start = try std.leb.readULEB128(u32, reader);
                try writer.print("\nstart {d}\n", .{start});
            },
            .data_count => {
                const count = try std.leb.readULEB128(u32, reader);
                try writer.print("\ncount {d}\n", .{count});
            },
            else => {}, // skip unknown sections
        }
    }

    fn dumpSection(step: *Step, section: std.wasm.Section, data: []const u8, entries: u32, writer: anytype) !void {
        var fbs = std.io.fixedBufferStream(data);
        const reader = fbs.reader();

        switch (section) {
            .type => {
                var i: u32 = 0;
                while (i < entries) : (i += 1) {
                    const func_type = try reader.readByte();
                    if (func_type != std.wasm.function_type) {
                        return step.fail("expected function type, found byte '{d}'", .{func_type});
                    }
                    const params = try std.leb.readULEB128(u32, reader);
                    try writer.print("params {d}\n", .{params});
                    var index: u32 = 0;
                    while (index < params) : (index += 1) {
                        try parseDumpType(step, std.wasm.Valtype, reader, writer);
                    } else index = 0;
                    const returns = try std.leb.readULEB128(u32, reader);
                    try writer.print("returns {d}\n", .{returns});
                    while (index < returns) : (index += 1) {
                        try parseDumpType(step, std.wasm.Valtype, reader, writer);
                    }
                }
            },
            .import => {
                var i: u32 = 0;
                while (i < entries) : (i += 1) {
                    const module_name_len = try std.leb.readULEB128(u32, reader);
                    const module_name = data[fbs.pos..][0..module_name_len];
                    fbs.pos += module_name_len;
                    const name_len = try std.leb.readULEB128(u32, reader);
                    const name = data[fbs.pos..][0..name_len];
                    fbs.pos += name_len;

                    const kind = std.meta.intToEnum(std.wasm.ExternalKind, try reader.readByte()) catch {
                        return step.fail("invalid import kind", .{});
                    };

                    try writer.print(
                        \\module {s}
                        \\name {s}
                        \\kind {s}
                    , .{ module_name, name, @tagName(kind) });
                    try writer.writeByte('\n');
                    switch (kind) {
                        .function => {
                            try writer.print("index {d}\n", .{try std.leb.readULEB128(u32, reader)});
                        },
                        .memory => {
                            try parseDumpLimits(reader, writer);
                        },
                        .global => {
                            try parseDumpType(step, std.wasm.Valtype, reader, writer);
                            try writer.print("mutable {}\n", .{0x01 == try std.leb.readULEB128(u32, reader)});
                        },
                        .table => {
                            try parseDumpType(step, std.wasm.RefType, reader, writer);
                            try parseDumpLimits(reader, writer);
                        },
                    }
                }
            },
            .function => {
                var i: u32 = 0;
                while (i < entries) : (i += 1) {
                    try writer.print("index {d}\n", .{try std.leb.readULEB128(u32, reader)});
                }
            },
            .table => {
                var i: u32 = 0;
                while (i < entries) : (i += 1) {
                    try parseDumpType(step, std.wasm.RefType, reader, writer);
                    try parseDumpLimits(reader, writer);
                }
            },
            .memory => {
                var i: u32 = 0;
                while (i < entries) : (i += 1) {
                    try parseDumpLimits(reader, writer);
                }
            },
            .global => {
                var i: u32 = 0;
                while (i < entries) : (i += 1) {
                    try parseDumpType(step, std.wasm.Valtype, reader, writer);
                    try writer.print("mutable {}\n", .{0x01 == try std.leb.readULEB128(u1, reader)});
                    try parseDumpInit(step, reader, writer);
                }
            },
            .@"export" => {
                var i: u32 = 0;
                while (i < entries) : (i += 1) {
                    const name_len = try std.leb.readULEB128(u32, reader);
                    const name = data[fbs.pos..][0..name_len];
                    fbs.pos += name_len;
                    const kind_byte = try std.leb.readULEB128(u8, reader);
                    const kind = std.meta.intToEnum(std.wasm.ExternalKind, kind_byte) catch {
                        return step.fail("invalid export kind value '{d}'", .{kind_byte});
                    };
                    const index = try std.leb.readULEB128(u32, reader);
                    try writer.print(
                        \\name {s}
                        \\kind {s}
                        \\index {d}
                    , .{ name, @tagName(kind), index });
                    try writer.writeByte('\n');
                }
            },
            .element => {
                var i: u32 = 0;
                while (i < entries) : (i += 1) {
                    try writer.print("table index {d}\n", .{try std.leb.readULEB128(u32, reader)});
                    try parseDumpInit(step, reader, writer);

                    const function_indexes = try std.leb.readULEB128(u32, reader);
                    var function_index: u32 = 0;
                    try writer.print("indexes {d}\n", .{function_indexes});
                    while (function_index < function_indexes) : (function_index += 1) {
                        try writer.print("index {d}\n", .{try std.leb.readULEB128(u32, reader)});
                    }
                }
            },
            .code => {}, // code section is considered opaque to linker
            .data => {
                var i: u32 = 0;
                while (i < entries) : (i += 1) {
                    const flags = try std.leb.readULEB128(u32, reader);
                    const index = if (flags & 0x02 != 0)
                        try std.leb.readULEB128(u32, reader)
                    else
                        0;
                    try writer.print("memory index 0x{x}\n", .{index});
                    if (flags == 0) {
                        try parseDumpInit(step, reader, writer);
                    }

                    const size = try std.leb.readULEB128(u32, reader);
                    try writer.print("size {d}\n", .{size});
                    try reader.skipBytes(size, .{}); // we do not care about the content of the segments
                }
            },
            else => unreachable,
        }
    }

    fn parseDumpType(step: *Step, comptime WasmType: type, reader: anytype, writer: anytype) !void {
        const type_byte = try reader.readByte();
        const valtype = std.meta.intToEnum(WasmType, type_byte) catch {
            return step.fail("Invalid wasm type value '{d}'", .{type_byte});
        };
        try writer.print("type {s}\n", .{@tagName(valtype)});
    }

    fn parseDumpLimits(reader: anytype, writer: anytype) !void {
        const flags = try std.leb.readULEB128(u8, reader);
        const min = try std.leb.readULEB128(u32, reader);

        try writer.print("min {x}\n", .{min});
        if (flags != 0) {
            try writer.print("max {x}\n", .{try std.leb.readULEB128(u32, reader)});
        }
    }

    fn parseDumpInit(step: *Step, reader: anytype, writer: anytype) !void {
        const byte = try reader.readByte();
        const opcode = std.meta.intToEnum(std.wasm.Opcode, byte) catch {
            return step.fail("invalid wasm opcode '{d}'", .{byte});
        };
        switch (opcode) {
            .i32_const => try writer.print("i32.const {x}\n", .{try std.leb.readILEB128(i32, reader)}),
            .i64_const => try writer.print("i64.const {x}\n", .{try std.leb.readILEB128(i64, reader)}),
            .f32_const => try writer.print("f32.const {x}\n", .{@as(f32, @bitCast(try reader.readIntLittle(u32)))}),
            .f64_const => try writer.print("f64.const {x}\n", .{@as(f64, @bitCast(try reader.readIntLittle(u64)))}),
            .global_get => try writer.print("global.get {x}\n", .{try std.leb.readULEB128(u32, reader)}),
            else => unreachable,
        }
        const end_opcode = try std.leb.readULEB128(u8, reader);
        if (end_opcode != std.wasm.opcode(.end)) {
            return step.fail("expected 'end' opcode in init expression", .{});
        }
    }

    fn parseDumpNames(step: *Step, reader: anytype, writer: anytype, data: []const u8) !void {
        while (reader.context.pos < data.len) {
            try parseDumpType(step, std.wasm.NameSubsection, reader, writer);
            const size = try std.leb.readULEB128(u32, reader);
            const entries = try std.leb.readULEB128(u32, reader);
            try writer.print(
                \\size {d}
                \\names {d}
            , .{ size, entries });
            try writer.writeByte('\n');
            var i: u32 = 0;
            while (i < entries) : (i += 1) {
                const index = try std.leb.readULEB128(u32, reader);
                const name_len = try std.leb.readULEB128(u32, reader);
                const pos = reader.context.pos;
                const name = data[pos..][0..name_len];
                reader.context.pos += name_len;

                try writer.print(
                    \\index {d}
                    \\name {s}
                , .{ index, name });
                try writer.writeByte('\n');
            }
        }
    }

    fn parseDumpProducers(reader: anytype, writer: anytype, data: []const u8) !void {
        const field_count = try std.leb.readULEB128(u32, reader);
        try writer.print("fields {d}\n", .{field_count});
        var current_field: u32 = 0;
        while (current_field < field_count) : (current_field += 1) {
            const field_name_length = try std.leb.readULEB128(u32, reader);
            const field_name = data[reader.context.pos..][0..field_name_length];
            reader.context.pos += field_name_length;

            const value_count = try std.leb.readULEB128(u32, reader);
            try writer.print(
                \\field_name {s}
                \\values {d}
            , .{ field_name, value_count });
            try writer.writeByte('\n');
            var current_value: u32 = 0;
            while (current_value < value_count) : (current_value += 1) {
                const value_length = try std.leb.readULEB128(u32, reader);
                const value = data[reader.context.pos..][0..value_length];
                reader.context.pos += value_length;

                const version_length = try std.leb.readULEB128(u32, reader);
                const version = data[reader.context.pos..][0..version_length];
                reader.context.pos += version_length;

                try writer.print(
                    \\value_name {s}
                    \\version {s}
                , .{ value, version });
                try writer.writeByte('\n');
            }
        }
    }

    fn parseDumpFeatures(reader: anytype, writer: anytype, data: []const u8) !void {
        const feature_count = try std.leb.readULEB128(u32, reader);
        try writer.print("features {d}\n", .{feature_count});

        var index: u32 = 0;
        while (index < feature_count) : (index += 1) {
            const prefix_byte = try std.leb.readULEB128(u8, reader);
            const name_length = try std.leb.readULEB128(u32, reader);
            const feature_name = data[reader.context.pos..][0..name_length];
            reader.context.pos += name_length;

            try writer.print("{c} {s}\n", .{ prefix_byte, feature_name });
        }
    }
};