zig/src/link/Wasm/Archive.zig

const Archive = @This();

const std = @import("std");
const assert = std.debug.assert;
const fs = std.fs;
const log = std.log.scoped(.archive);
const mem = std.mem;

const Allocator = mem.Allocator;
const Object = @import("Object.zig");

file: fs.File,
name: []const u8,

header: ar_hdr = undefined,

/// A list of long file names, delimited by a LF character (0x0a).
/// This is stored as a single slice of bytes, as the header-names
/// point to the character index of a file name, rather than the index
/// in the list.
long_file_names: []const u8 = undefined,

/// Parsed table of contents.
/// Each symbol name points to a list of all definition
/// sites within the current static archive.
toc: std.StringArrayHashMapUnmanaged(std.ArrayListUnmanaged(u32)) = .{},

// Archive files start with the ARMAG identifying string.  Then follows a
// `struct ar_hdr', and as many bytes of member file data as its `ar_size'
// member indicates, for each member file.
/// String that begins an archive file.
const ARMAG: *const [SARMAG:0]u8 = "!<arch>\n";
/// Size of that string.
const SARMAG: u4 = 8;

/// String in ar_fmag at the end of each header.
const ARFMAG: *const [2:0]u8 = "`\n";

const ar_hdr = extern struct {
    /// Member file name, sometimes / terminated.
    ar_name: [16]u8,

    /// File date, decimal seconds since Epoch.
    ar_date: [12]u8,

    /// User ID, in ASCII format.
    ar_uid: [6]u8,

    /// Group ID, in ASCII format.
    ar_gid: [6]u8,

    /// File mode, in ASCII octal.
    ar_mode: [8]u8,

    /// File size, in ASCII decimal.
    ar_size: [10]u8,

    /// Always contains ARFMAG.
    ar_fmag: [2]u8,

    const NameOrIndex = union(enum) {
        name: []const u8,
        index: u32,
    };

    fn nameOrIndex(archive: ar_hdr) !NameOrIndex {
        const value = getValue(&archive.ar_name);
        const slash_index = mem.indexOfScalar(u8, value, '/') orelse return error.MalformedArchive;
        const len = value.len;
        if (slash_index == len - 1) {
            // Name stored directly
            return NameOrIndex{ .name = value };
        } else {
            // Name follows the header directly and its length is encoded in
            // the name field.
            const index = try std.fmt.parseInt(u32, value[slash_index + 1 ..], 10);
            return NameOrIndex{ .index = index };
        }
    }

    fn date(archive: ar_hdr) !u64 {
        const value = getValue(&archive.ar_date);
        return std.fmt.parseInt(u64, value, 10);
    }

    fn size(archive: ar_hdr) !u32 {
        const value = getValue(&archive.ar_size);
        return std.fmt.parseInt(u32, value, 10);
    }

    fn getValue(raw: []const u8) []const u8 {
        return mem.trimRight(u8, raw, &[_]u8{@as(u8, 0x20)});
    }
};

pub fn deinit(archive: *Archive, allocator: Allocator) void {
    archive.file.close();
    for (archive.toc.keys()) |*key| {
        allocator.free(key.*);
    }
    for (archive.toc.values()) |*value| {
        value.deinit(allocator);
    }
    archive.toc.deinit(allocator);
    allocator.free(archive.long_file_names);
}

pub fn parse(archive: *Archive, allocator: Allocator) !void {
    const reader = archive.file.reader();

    const magic = try reader.readBytesNoEof(SARMAG);
    if (!mem.eql(u8, &magic, ARMAG)) {
        log.debug("invalid magic: expected '{s}', found '{s}'", .{ ARMAG, magic });
        return error.NotArchive;
    }

    archive.header = try reader.readStruct(ar_hdr);
    if (!mem.eql(u8, &archive.header.ar_fmag, ARFMAG)) {
        log.debug("invalid header delimiter: expected '{s}', found '{s}'", .{ ARFMAG, archive.header.ar_fmag });
        return error.NotArchive;
    }

    try archive.parseTableOfContents(allocator, reader);
    try archive.parseNameTable(allocator, reader);
}

fn parseName(archive: *const Archive, header: ar_hdr) ![]const u8 {
    const name_or_index = try header.nameOrIndex();
    switch (name_or_index) {
        .name => |name| return name,
        .index => |index| {
            const name = mem.sliceTo(archive.long_file_names[index..], 0x0a);
            return mem.trimRight(u8, name, "/");
        },
    }
}

fn parseTableOfContents(archive: *Archive, allocator: Allocator, reader: anytype) !void {
    // size field can have extra spaces padded in front as well as the end,
    // so we trim those first before parsing the ASCII value.
    const size_trimmed = mem.trim(u8, &archive.header.ar_size, " ");
    const sym_tab_size = try std.fmt.parseInt(u32, size_trimmed, 10);

    const num_symbols = try reader.readIntBig(u32);
    const symbol_positions = try allocator.alloc(u32, num_symbols);
    defer allocator.free(symbol_positions);
    for (symbol_positions) |*index| {
        index.* = try reader.readIntBig(u32);
    }

    const sym_tab = try allocator.alloc(u8, sym_tab_size - 4 - (4 * num_symbols));
    defer allocator.free(sym_tab);

    reader.readNoEof(sym_tab) catch {
        log.err("incomplete symbol table: expected symbol table of length 0x{x}", .{sym_tab.len});
        return error.MalformedArchive;
    };

    var i: usize = 0;
    var pos: usize = 0;
    while (i < num_symbols) : (i += 1) {
        const string = mem.sliceTo(sym_tab[pos..], 0);
        pos += string.len + 1;
        if (string.len == 0) continue;

        const name = try allocator.dupe(u8, string);
        errdefer allocator.free(name);
        const gop = try archive.toc.getOrPut(allocator, name);
        if (gop.found_existing) {
            allocator.free(name);
        } else {
            gop.value_ptr.* = .{};
        }
        try gop.value_ptr.append(allocator, symbol_positions[i]);
    }
}

fn parseNameTable(archive: *Archive, allocator: Allocator, reader: anytype) !void {
    const header: ar_hdr = try reader.readStruct(ar_hdr);
    if (!mem.eql(u8, &header.ar_fmag, ARFMAG)) {
        log.err("invalid header delimiter: expected '{s}', found '{s}'", .{ ARFMAG, header.ar_fmag });
        return error.MalformedArchive;
    }
    if (!mem.eql(u8, header.ar_name[0..2], "//")) {
        log.err("invalid archive. Long name table missing", .{});
        return error.MalformedArchive;
    }
    const table_size = try header.size();
    const long_file_names = try allocator.alloc(u8, table_size);
    errdefer allocator.free(long_file_names);
    try reader.readNoEof(long_file_names);
    archive.long_file_names = long_file_names;
}

/// From a given file offset, starts reading for a file header.
/// When found, parses the object file into an `Object` and returns it.
pub fn parseObject(archive: Archive, allocator: Allocator, file_offset: u32) !Object {
    try archive.file.seekTo(file_offset);
    const reader = archive.file.reader();
    const header = try reader.readStruct(ar_hdr);
    const current_offset = try archive.file.getPos();
    try archive.file.seekTo(0);

    if (!mem.eql(u8, &header.ar_fmag, ARFMAG)) {
        log.err("invalid header delimiter: expected '{s}', found '{s}'", .{ ARFMAG, header.ar_fmag });
        return error.MalformedArchive;
    }

    const object_name = try archive.parseName(header);
    const name = name: {
        var buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined;
        const path = try std.os.realpath(archive.name, &buffer);
        break :name try std.fmt.allocPrint(allocator, "{s}({s})", .{ path, object_name });
    };
    defer allocator.free(name);

    const object_file = try std.fs.cwd().openFile(archive.name, .{});
    errdefer object_file.close();

    const object_file_size = try header.size();
    try object_file.seekTo(current_offset);
    return Object.create(allocator, object_file, name, object_file_size);
}