mirror of
https://github.com/ziglang/zig.git
synced 2026-02-14 13:30:45 +00:00
Merge pull request #7368 from kubkon/macho-trie-cleanup
stage2: MachO export trie cleanup
This commit is contained in:
commit
23c1b7faee
@ -1333,6 +1333,15 @@ pub const N_WEAK_DEF: u16 = 0x80;
|
||||
/// This bit is only available in .o files (MH_OBJECT filetype)
|
||||
pub const N_SYMBOL_RESOLVER: u16 = 0x100;
|
||||
|
||||
// The following are used on the flags byte of a terminal node // in the export information.
|
||||
pub const EXPORT_SYMBOL_FLAGS_KIND_MASK: u8 = 0x03;
|
||||
pub const EXPORT_SYMBOL_FLAGS_KIND_REGULAR: u8 = 0x00;
|
||||
pub const EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL: u8 = 0x01;
|
||||
pub const EXPORT_SYMBOL_FLAGS_KIND_ABSOLUTE: u8 = 0x02;
|
||||
pub const EXPORT_SYMBOL_FLAGS_KIND_WEAK_DEFINITION: u8 = 0x04;
|
||||
pub const EXPORT_SYMBOL_FLAGS_REEXPORT: u8 = 0x08;
|
||||
pub const EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER: u8 = 0x10;
|
||||
|
||||
// Codesign consts and structs taken from:
|
||||
// https://opensource.apple.com/source/xnu/xnu-6153.81.5/osfmk/kern/cs_blobs.h.auto.html
|
||||
|
||||
|
||||
@ -301,7 +301,10 @@ pub fn flushModule(self: *MachO, comp: *Compilation) !void {
|
||||
const tracy = trace(@src());
|
||||
defer tracy.end();
|
||||
|
||||
switch (self.base.options.output_mode) {
|
||||
const output_mode = self.base.options.output_mode;
|
||||
const target = self.base.options.target;
|
||||
|
||||
switch (output_mode) {
|
||||
.Exe => {
|
||||
if (self.entry_addr) |addr| {
|
||||
// Update LC_MAIN with entry offset.
|
||||
@ -312,12 +315,15 @@ pub fn flushModule(self: *MachO, comp: *Compilation) !void {
|
||||
try self.writeExportTrie();
|
||||
try self.writeSymbolTable();
|
||||
try self.writeStringTable();
|
||||
// Preallocate space for the code signature.
|
||||
// We need to do this at this stage so that we have the load commands with proper values
|
||||
// written out to the file.
|
||||
// The most important here is to have the correct vm and filesize of the __LINKEDIT segment
|
||||
// where the code signature goes into.
|
||||
try self.writeCodeSignaturePadding();
|
||||
|
||||
if (target.cpu.arch == .aarch64) {
|
||||
// Preallocate space for the code signature.
|
||||
// We need to do this at this stage so that we have the load commands with proper values
|
||||
// written out to the file.
|
||||
// The most important here is to have the correct vm and filesize of the __LINKEDIT segment
|
||||
// where the code signature goes into.
|
||||
try self.writeCodeSignaturePadding();
|
||||
}
|
||||
},
|
||||
.Obj => {},
|
||||
.Lib => return error.TODOImplementWritingLibFiles,
|
||||
@ -339,9 +345,11 @@ pub fn flushModule(self: *MachO, comp: *Compilation) !void {
|
||||
|
||||
assert(!self.cmd_table_dirty);
|
||||
|
||||
switch (self.base.options.output_mode) {
|
||||
.Exe, .Lib => try self.writeCodeSignature(), // code signing always comes last
|
||||
else => {},
|
||||
if (target.cpu.arch == .aarch64) {
|
||||
switch (output_mode) {
|
||||
.Exe, .Lib => try self.writeCodeSignature(), // code signing always comes last
|
||||
else => {},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -752,17 +760,15 @@ fn linkWithLLD(self: *MachO, comp: *Compilation) !void {
|
||||
const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment;
|
||||
const text_section = text_segment.sections.items[self.text_section_index.?];
|
||||
const after_last_cmd_offset = self.header.?.sizeofcmds + @sizeOf(macho.mach_header_64);
|
||||
const needed_size = @sizeOf(macho.linkedit_data_command);
|
||||
const needed_size = @sizeOf(macho.linkedit_data_command) * alloc_num / alloc_den;
|
||||
|
||||
if (needed_size + after_last_cmd_offset > text_section.offset) {
|
||||
// TODO We are in the position to be able to increase the padding by moving all sections
|
||||
// by the required offset, but this requires a little bit more thinking and bookkeeping.
|
||||
// For now, return an error informing the user of the problem.
|
||||
log.err("Not enough padding between load commands and start of __text section:\n", .{});
|
||||
log.err("Offset after last load command: 0x{x}\n", .{after_last_cmd_offset});
|
||||
log.err("Beginning of __text section: 0x{x}\n", .{text_section.offset});
|
||||
log.err("Needed size: 0x{x}\n", .{needed_size});
|
||||
std.log.err("Unable to extend padding between the end of load commands and start of __text section.", .{});
|
||||
std.log.err("Re-run the linker with '-headerpad 0x{x}' option if available, or", .{needed_size});
|
||||
std.log.err("fall back to the system linker by exporting 'ZIG_SYSTEM_LINKER_HACK=1'.", .{});
|
||||
return error.NotEnoughPadding;
|
||||
}
|
||||
|
||||
const linkedit_segment = self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment;
|
||||
// TODO This is clunky.
|
||||
self.linkedit_segment_next_offset = @intCast(u32, mem.alignForwardGeneric(u64, linkedit_segment.inner.fileoff + linkedit_segment.inner.filesize, @sizeOf(u64)));
|
||||
@ -1799,38 +1805,40 @@ fn writeCodeSignature(self: *MachO) !void {
|
||||
fn writeExportTrie(self: *MachO) !void {
|
||||
if (self.global_symbols.items.len == 0) return;
|
||||
|
||||
var trie: Trie = .{};
|
||||
defer trie.deinit(self.base.allocator);
|
||||
var trie = Trie.init(self.base.allocator);
|
||||
defer trie.deinit();
|
||||
|
||||
const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment;
|
||||
for (self.global_symbols.items) |symbol| {
|
||||
// TODO figure out if we should put all global symbols into the export trie
|
||||
const name = self.getString(symbol.n_strx);
|
||||
assert(symbol.n_value >= text_segment.inner.vmaddr);
|
||||
try trie.put(self.base.allocator, .{
|
||||
try trie.put(.{
|
||||
.name = name,
|
||||
.vmaddr_offset = symbol.n_value - text_segment.inner.vmaddr,
|
||||
.export_flags = 0, // TODO workout creation of export flags
|
||||
.export_flags = macho.EXPORT_SYMBOL_FLAGS_KIND_REGULAR,
|
||||
});
|
||||
}
|
||||
|
||||
var buffer: std.ArrayListUnmanaged(u8) = .{};
|
||||
defer buffer.deinit(self.base.allocator);
|
||||
|
||||
try trie.writeULEB128Mem(self.base.allocator, &buffer);
|
||||
try trie.finalize();
|
||||
var buffer = try self.base.allocator.alloc(u8, trie.size);
|
||||
defer self.base.allocator.free(buffer);
|
||||
var stream = std.io.fixedBufferStream(buffer);
|
||||
const nwritten = try trie.write(stream.writer());
|
||||
assert(nwritten == trie.size);
|
||||
|
||||
const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly;
|
||||
const export_size = @intCast(u32, mem.alignForward(buffer.items.len, @sizeOf(u64)));
|
||||
const export_size = @intCast(u32, mem.alignForward(buffer.len, @sizeOf(u64)));
|
||||
dyld_info.export_off = self.linkedit_segment_next_offset.?;
|
||||
dyld_info.export_size = export_size;
|
||||
|
||||
log.debug("writing export trie from 0x{x} to 0x{x}\n", .{ dyld_info.export_off, dyld_info.export_off + export_size });
|
||||
|
||||
if (export_size > buffer.items.len) {
|
||||
if (export_size > buffer.len) {
|
||||
// Pad out to align(8).
|
||||
try self.base.file.?.pwriteAll(&[_]u8{0}, dyld_info.export_off + export_size);
|
||||
}
|
||||
try self.base.file.?.pwriteAll(buffer.items, dyld_info.export_off);
|
||||
try self.base.file.?.pwriteAll(buffer, dyld_info.export_off);
|
||||
|
||||
self.linkedit_segment_next_offset = dyld_info.export_off + dyld_info.export_size;
|
||||
// Advance size of __LINKEDIT segment
|
||||
@ -1917,7 +1925,9 @@ fn parseFromFile(self: *MachO, file: fs.File) !void {
|
||||
switch (cmd.cmd()) {
|
||||
macho.LC_SEGMENT_64 => {
|
||||
const x = cmd.Segment;
|
||||
if (isSegmentOrSection(&x.inner.segname, "__LINKEDIT")) {
|
||||
if (isSegmentOrSection(&x.inner.segname, "__PAGEZERO")) {
|
||||
self.pagezero_segment_cmd_index = i;
|
||||
} else if (isSegmentOrSection(&x.inner.segname, "__LINKEDIT")) {
|
||||
self.linkedit_segment_cmd_index = i;
|
||||
} else if (isSegmentOrSection(&x.inner.segname, "__TEXT")) {
|
||||
self.text_segment_cmd_index = i;
|
||||
@ -1926,16 +1936,48 @@ fn parseFromFile(self: *MachO, file: fs.File) !void {
|
||||
self.text_section_index = @intCast(u16, j);
|
||||
}
|
||||
}
|
||||
} else if (isSegmentOrSection(&x.inner.segname, "__DATA")) {
|
||||
self.data_segment_cmd_index = i;
|
||||
}
|
||||
},
|
||||
macho.LC_DYLD_INFO_ONLY => {
|
||||
self.dyld_info_cmd_index = i;
|
||||
},
|
||||
macho.LC_SYMTAB => {
|
||||
self.symtab_cmd_index = i;
|
||||
},
|
||||
macho.LC_DYSYMTAB => {
|
||||
self.dysymtab_cmd_index = i;
|
||||
},
|
||||
macho.LC_LOAD_DYLINKER => {
|
||||
self.dylinker_cmd_index = i;
|
||||
},
|
||||
macho.LC_VERSION_MIN_MACOSX, macho.LC_VERSION_MIN_IPHONEOS, macho.LC_VERSION_MIN_WATCHOS, macho.LC_VERSION_MIN_TVOS => {
|
||||
self.version_min_cmd_index = i;
|
||||
},
|
||||
macho.LC_SOURCE_VERSION => {
|
||||
self.source_version_cmd_index = i;
|
||||
},
|
||||
macho.LC_MAIN => {
|
||||
self.main_cmd_index = i;
|
||||
},
|
||||
macho.LC_LOAD_DYLIB => {
|
||||
self.libsystem_cmd_index = i; // TODO This is incorrect, but we'll fixup later.
|
||||
},
|
||||
macho.LC_FUNCTION_STARTS => {
|
||||
self.function_starts_cmd_index = i;
|
||||
},
|
||||
macho.LC_DATA_IN_CODE => {
|
||||
self.data_in_code_cmd_index = i;
|
||||
},
|
||||
macho.LC_CODE_SIGNATURE => {
|
||||
self.code_signature_cmd_index = i;
|
||||
},
|
||||
// TODO populate more MachO fields
|
||||
else => {},
|
||||
else => {
|
||||
std.log.err("Unknown load command detected: 0x{x}.", .{cmd.cmd()});
|
||||
return error.UnknownLoadCommand;
|
||||
},
|
||||
}
|
||||
self.load_commands.appendAssumeCapacity(cmd);
|
||||
}
|
||||
|
||||
@ -34,156 +34,218 @@ const std = @import("std");
|
||||
const mem = std.mem;
|
||||
const leb = std.leb;
|
||||
const log = std.log.scoped(.link);
|
||||
const macho = std.macho;
|
||||
const testing = std.testing;
|
||||
const assert = std.debug.assert;
|
||||
const Allocator = mem.Allocator;
|
||||
|
||||
pub const Symbol = struct {
|
||||
name: []const u8,
|
||||
vmaddr_offset: u64,
|
||||
export_flags: u64,
|
||||
};
|
||||
pub const Node = struct {
|
||||
base: *Trie,
|
||||
|
||||
const Edge = struct {
|
||||
from: *Node,
|
||||
to: *Node,
|
||||
label: []const u8,
|
||||
/// Terminal info associated with this node.
|
||||
/// If this node is not a terminal node, info is null.
|
||||
terminal_info: ?struct {
|
||||
/// Export flags associated with this exported symbol.
|
||||
export_flags: u64,
|
||||
/// VM address offset wrt to the section this symbol is defined against.
|
||||
vmaddr_offset: u64,
|
||||
} = null,
|
||||
|
||||
fn deinit(self: *Edge, alloc: *Allocator) void {
|
||||
self.to.deinit(alloc);
|
||||
alloc.destroy(self.to);
|
||||
self.from = undefined;
|
||||
self.to = undefined;
|
||||
}
|
||||
};
|
||||
|
||||
const Node = struct {
|
||||
/// Export flags associated with this exported symbol (if any).
|
||||
export_flags: ?u64 = null,
|
||||
/// VM address offset wrt to the section this symbol is defined against (if any).
|
||||
vmaddr_offset: ?u64 = null,
|
||||
/// Offset of this node in the trie output byte stream.
|
||||
trie_offset: ?usize = null,
|
||||
|
||||
/// List of all edges originating from this node.
|
||||
edges: std.ArrayListUnmanaged(Edge) = .{},
|
||||
|
||||
fn deinit(self: *Node, alloc: *Allocator) void {
|
||||
for (self.edges.items) |*edge| {
|
||||
edge.deinit(alloc);
|
||||
}
|
||||
self.edges.deinit(alloc);
|
||||
}
|
||||
node_dirty: bool = true,
|
||||
|
||||
const PutResult = struct {
|
||||
/// Node reached at this stage of `put` op.
|
||||
node: *Node,
|
||||
/// Count of newly inserted nodes at this stage of `put` op.
|
||||
node_count: usize,
|
||||
/// Edge connecting to nodes in the trie.
|
||||
pub const Edge = struct {
|
||||
from: *Node,
|
||||
to: *Node,
|
||||
label: []u8,
|
||||
|
||||
fn deinit(self: *Edge, allocator: *Allocator) void {
|
||||
self.to.deinit(allocator);
|
||||
allocator.destroy(self.to);
|
||||
allocator.free(self.label);
|
||||
self.from = undefined;
|
||||
self.to = undefined;
|
||||
self.label = undefined;
|
||||
}
|
||||
};
|
||||
|
||||
fn deinit(self: *Node, allocator: *Allocator) void {
|
||||
for (self.edges.items) |*edge| {
|
||||
edge.deinit(allocator);
|
||||
}
|
||||
self.edges.deinit(allocator);
|
||||
}
|
||||
|
||||
/// Inserts a new node starting from `self`.
|
||||
fn put(self: *Node, alloc: *Allocator, label: []const u8, node_count: usize) !PutResult {
|
||||
var curr_node_count = node_count;
|
||||
fn put(self: *Node, allocator: *Allocator, label: []const u8) !*Node {
|
||||
// Check for match with edges from this node.
|
||||
for (self.edges.items) |*edge| {
|
||||
const match = mem.indexOfDiff(u8, edge.label, label) orelse return PutResult{
|
||||
.node = edge.to,
|
||||
.node_count = curr_node_count,
|
||||
};
|
||||
const match = mem.indexOfDiff(u8, edge.label, label) orelse return edge.to;
|
||||
if (match == 0) continue;
|
||||
if (match == edge.label.len) return edge.to.put(alloc, label[match..], curr_node_count);
|
||||
if (match == edge.label.len) return edge.to.put(allocator, label[match..]);
|
||||
|
||||
// Found a match, need to splice up nodes.
|
||||
// From: A -> B
|
||||
// To: A -> C -> B
|
||||
const mid = try alloc.create(Node);
|
||||
mid.* = .{};
|
||||
const to_label = edge.label;
|
||||
const mid = try allocator.create(Node);
|
||||
mid.* = .{ .base = self.base };
|
||||
var to_label = try allocator.dupe(u8, edge.label[match..]);
|
||||
allocator.free(edge.label);
|
||||
const to_node = edge.to;
|
||||
edge.to = mid;
|
||||
edge.label = label[0..match];
|
||||
curr_node_count += 1;
|
||||
edge.label = try allocator.dupe(u8, label[0..match]);
|
||||
self.base.node_count += 1;
|
||||
|
||||
try mid.edges.append(alloc, .{
|
||||
try mid.edges.append(allocator, .{
|
||||
.from = mid,
|
||||
.to = to_node,
|
||||
.label = to_label[match..],
|
||||
.label = to_label,
|
||||
});
|
||||
|
||||
if (match == label.len) {
|
||||
return PutResult{ .node = to_node, .node_count = curr_node_count };
|
||||
} else {
|
||||
return mid.put(alloc, label[match..], curr_node_count);
|
||||
}
|
||||
return if (match == label.len) to_node else mid.put(allocator, label[match..]);
|
||||
}
|
||||
|
||||
// Add a new node.
|
||||
const node = try alloc.create(Node);
|
||||
node.* = .{};
|
||||
curr_node_count += 1;
|
||||
const node = try allocator.create(Node);
|
||||
node.* = .{ .base = self.base };
|
||||
self.base.node_count += 1;
|
||||
|
||||
try self.edges.append(alloc, .{
|
||||
try self.edges.append(allocator, .{
|
||||
.from = self,
|
||||
.to = node,
|
||||
.label = label,
|
||||
.label = try allocator.dupe(u8, label),
|
||||
});
|
||||
|
||||
return PutResult{ .node = node, .node_count = curr_node_count };
|
||||
return node;
|
||||
}
|
||||
|
||||
/// This method should only be called *after* updateOffset has been called!
|
||||
/// In case this is not upheld, this method will panic.
|
||||
fn writeULEB128Mem(self: Node, buffer: *std.ArrayListUnmanaged(u8)) !void {
|
||||
assert(self.trie_offset != null); // You need to call updateOffset first.
|
||||
if (self.vmaddr_offset) |offset| {
|
||||
/// Recursively parses the node from the input byte stream.
|
||||
fn read(self: *Node, allocator: *Allocator, reader: anytype) Trie.ReadError!usize {
|
||||
self.node_dirty = true;
|
||||
const trie_offset = try reader.context.getPos();
|
||||
self.trie_offset = trie_offset;
|
||||
|
||||
var nread: usize = 0;
|
||||
|
||||
const node_size = try leb.readULEB128(u64, reader);
|
||||
if (node_size > 0) {
|
||||
const export_flags = try leb.readULEB128(u64, reader);
|
||||
// TODO Parse special flags.
|
||||
assert(export_flags & macho.EXPORT_SYMBOL_FLAGS_REEXPORT == 0 and
|
||||
export_flags & macho.EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER == 0);
|
||||
|
||||
const vmaddr_offset = try leb.readULEB128(u64, reader);
|
||||
|
||||
self.terminal_info = .{
|
||||
.export_flags = export_flags,
|
||||
.vmaddr_offset = vmaddr_offset,
|
||||
};
|
||||
}
|
||||
|
||||
const nedges = try reader.readByte();
|
||||
self.base.node_count += nedges;
|
||||
|
||||
nread += (try reader.context.getPos()) - trie_offset;
|
||||
|
||||
var i: usize = 0;
|
||||
while (i < nedges) : (i += 1) {
|
||||
const edge_start_pos = try reader.context.getPos();
|
||||
|
||||
const label = blk: {
|
||||
var label_buf = std.ArrayList(u8).init(allocator);
|
||||
while (true) {
|
||||
const next = try reader.readByte();
|
||||
if (next == @as(u8, 0))
|
||||
break;
|
||||
try label_buf.append(next);
|
||||
}
|
||||
break :blk label_buf.toOwnedSlice();
|
||||
};
|
||||
|
||||
const seek_to = try leb.readULEB128(u64, reader);
|
||||
const return_pos = try reader.context.getPos();
|
||||
|
||||
nread += return_pos - edge_start_pos;
|
||||
try reader.context.seekTo(seek_to);
|
||||
|
||||
const node = try allocator.create(Node);
|
||||
node.* = .{ .base = self.base };
|
||||
|
||||
nread += try node.read(allocator, reader);
|
||||
try self.edges.append(allocator, .{
|
||||
.from = self,
|
||||
.to = node,
|
||||
.label = label,
|
||||
});
|
||||
try reader.context.seekTo(return_pos);
|
||||
}
|
||||
|
||||
return nread;
|
||||
}
|
||||
|
||||
/// Writes this node to a byte stream.
|
||||
/// The children of this node *are* not written to the byte stream
|
||||
/// recursively. To write all nodes to a byte stream in sequence,
|
||||
/// iterate over `Trie.ordered_nodes` and call this method on each node.
|
||||
/// This is one of the requirements of the MachO.
|
||||
/// Panics if `finalize` was not called before calling this method.
|
||||
fn write(self: Node, writer: anytype) !void {
|
||||
assert(!self.node_dirty);
|
||||
if (self.terminal_info) |info| {
|
||||
// Terminal node info: encode export flags and vmaddr offset of this symbol.
|
||||
var info_buf_len: usize = 0;
|
||||
var info_buf: [@sizeOf(u64) * 2]u8 = undefined;
|
||||
var info_stream = std.io.fixedBufferStream(&info_buf);
|
||||
try leb.writeULEB128(info_stream.writer(), self.export_flags.?);
|
||||
try leb.writeULEB128(info_stream.writer(), offset);
|
||||
// TODO Implement for special flags.
|
||||
assert(info.export_flags & macho.EXPORT_SYMBOL_FLAGS_REEXPORT == 0 and
|
||||
info.export_flags & macho.EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER == 0);
|
||||
try leb.writeULEB128(info_stream.writer(), info.export_flags);
|
||||
try leb.writeULEB128(info_stream.writer(), info.vmaddr_offset);
|
||||
|
||||
// Encode the size of the terminal node info.
|
||||
var size_buf: [@sizeOf(u64)]u8 = undefined;
|
||||
var size_stream = std.io.fixedBufferStream(&size_buf);
|
||||
try leb.writeULEB128(size_stream.writer(), info_stream.pos);
|
||||
|
||||
// Now, write them to the output buffer.
|
||||
buffer.appendSliceAssumeCapacity(size_buf[0..size_stream.pos]);
|
||||
buffer.appendSliceAssumeCapacity(info_buf[0..info_stream.pos]);
|
||||
// Now, write them to the output stream.
|
||||
try writer.writeAll(size_buf[0..size_stream.pos]);
|
||||
try writer.writeAll(info_buf[0..info_stream.pos]);
|
||||
} else {
|
||||
// Non-terminal node is delimited by 0 byte.
|
||||
buffer.appendAssumeCapacity(0);
|
||||
try writer.writeByte(0);
|
||||
}
|
||||
// Write number of edges (max legal number of edges is 256).
|
||||
buffer.appendAssumeCapacity(@intCast(u8, self.edges.items.len));
|
||||
try writer.writeByte(@intCast(u8, self.edges.items.len));
|
||||
|
||||
for (self.edges.items) |edge| {
|
||||
// Write edges labels.
|
||||
buffer.appendSliceAssumeCapacity(edge.label);
|
||||
buffer.appendAssumeCapacity(0);
|
||||
|
||||
var buf: [@sizeOf(u64)]u8 = undefined;
|
||||
var buf_stream = std.io.fixedBufferStream(&buf);
|
||||
try leb.writeULEB128(buf_stream.writer(), edge.to.trie_offset.?);
|
||||
buffer.appendSliceAssumeCapacity(buf[0..buf_stream.pos]);
|
||||
// Write edge label and offset to next node in trie.
|
||||
try writer.writeAll(edge.label);
|
||||
try writer.writeByte(0);
|
||||
try leb.writeULEB128(writer, edge.to.trie_offset.?);
|
||||
}
|
||||
}
|
||||
|
||||
const UpdateResult = struct {
|
||||
const FinalizeResult = struct {
|
||||
/// Current size of this node in bytes.
|
||||
node_size: usize,
|
||||
|
||||
/// True if the trie offset of this node in the output byte stream
|
||||
/// would need updating; false otherwise.
|
||||
updated: bool,
|
||||
};
|
||||
|
||||
/// Updates offset of this node in the output byte stream.
|
||||
fn updateOffset(self: *Node, offset: usize) UpdateResult {
|
||||
fn finalize(self: *Node, offset_in_trie: usize) FinalizeResult {
|
||||
var node_size: usize = 0;
|
||||
if (self.vmaddr_offset) |vmaddr| {
|
||||
node_size += sizeULEB128Mem(self.export_flags.?);
|
||||
node_size += sizeULEB128Mem(vmaddr);
|
||||
if (self.terminal_info) |info| {
|
||||
node_size += sizeULEB128Mem(info.export_flags);
|
||||
node_size += sizeULEB128Mem(info.vmaddr_offset);
|
||||
node_size += sizeULEB128Mem(node_size);
|
||||
} else {
|
||||
node_size += 1; // 0x0 for non-terminal nodes
|
||||
@ -196,8 +258,9 @@ const Node = struct {
|
||||
}
|
||||
|
||||
const trie_offset = self.trie_offset orelse 0;
|
||||
const updated = offset != trie_offset;
|
||||
self.trie_offset = offset;
|
||||
const updated = offset_in_trie != trie_offset;
|
||||
self.trie_offset = offset_in_trie;
|
||||
self.node_dirty = false;
|
||||
|
||||
return .{ .node_size = node_size, .updated = updated };
|
||||
}
|
||||
@ -215,70 +278,146 @@ const Node = struct {
|
||||
}
|
||||
};
|
||||
|
||||
/// Count of nodes in the trie.
|
||||
/// The count is updated at every `put` call.
|
||||
/// The trie always consists of at least a root node, hence
|
||||
/// the count always starts at 1.
|
||||
node_count: usize = 1,
|
||||
/// The root node of the trie.
|
||||
root: Node = .{},
|
||||
root: ?*Node = null,
|
||||
|
||||
allocator: *Allocator,
|
||||
|
||||
/// If you want to access nodes ordered in DFS fashion,
|
||||
/// you should call `finalize` first since the nodes
|
||||
/// in this container are not guaranteed to not be stale
|
||||
/// if more insertions took place after the last `finalize`
|
||||
/// call.
|
||||
ordered_nodes: std.ArrayListUnmanaged(*Node) = .{},
|
||||
|
||||
/// The size of the trie in bytes.
|
||||
/// This value may be outdated if there were additional
|
||||
/// insertions performed after `finalize` was called.
|
||||
/// Call `finalize` before accessing this value to ensure
|
||||
/// it is up-to-date.
|
||||
size: usize = 0,
|
||||
|
||||
/// Number of nodes currently in the trie.
|
||||
node_count: usize = 0,
|
||||
|
||||
trie_dirty: bool = true,
|
||||
|
||||
pub fn init(allocator: *Allocator) Trie {
|
||||
return .{ .allocator = allocator };
|
||||
}
|
||||
|
||||
/// Export symbol that is to be placed in the trie.
|
||||
pub const ExportSymbol = struct {
|
||||
/// Name of the symbol.
|
||||
name: []const u8,
|
||||
|
||||
/// Offset of this symbol's virtual memory address from the beginning
|
||||
/// of the __TEXT segment.
|
||||
vmaddr_offset: u64,
|
||||
|
||||
/// Export flags of this exported symbol.
|
||||
export_flags: u64,
|
||||
};
|
||||
|
||||
/// Insert a symbol into the trie, updating the prefixes in the process.
|
||||
/// This operation may change the layout of the trie by splicing edges in
|
||||
/// certain circumstances.
|
||||
pub fn put(self: *Trie, alloc: *Allocator, symbol: Symbol) !void {
|
||||
const res = try self.root.put(alloc, symbol.name, 0);
|
||||
self.node_count += res.node_count;
|
||||
res.node.vmaddr_offset = symbol.vmaddr_offset;
|
||||
res.node.export_flags = symbol.export_flags;
|
||||
pub fn put(self: *Trie, symbol: ExportSymbol) !void {
|
||||
try self.createRoot();
|
||||
const node = try self.root.?.put(self.allocator, symbol.name);
|
||||
node.terminal_info = .{
|
||||
.vmaddr_offset = symbol.vmaddr_offset,
|
||||
.export_flags = symbol.export_flags,
|
||||
};
|
||||
self.trie_dirty = true;
|
||||
}
|
||||
|
||||
/// Write the trie to a buffer ULEB128 encoded.
|
||||
pub fn writeULEB128Mem(self: *Trie, alloc: *Allocator, buffer: *std.ArrayListUnmanaged(u8)) !void {
|
||||
var ordered_nodes: std.ArrayListUnmanaged(*Node) = .{};
|
||||
defer ordered_nodes.deinit(alloc);
|
||||
/// Finalizes this trie for writing to a byte stream.
|
||||
/// This step performs multiple passes through the trie ensuring
|
||||
/// there are no gaps after every `Node` is ULEB128 encoded.
|
||||
/// Call this method before trying to `write` the trie to a byte stream.
|
||||
pub fn finalize(self: *Trie) !void {
|
||||
if (!self.trie_dirty) return;
|
||||
|
||||
try ordered_nodes.ensureCapacity(alloc, self.node_count);
|
||||
walkInOrder(&self.root, &ordered_nodes);
|
||||
self.ordered_nodes.shrinkRetainingCapacity(0);
|
||||
try self.ordered_nodes.ensureCapacity(self.allocator, self.node_count);
|
||||
|
||||
comptime const Fifo = std.fifo.LinearFifo(*Node, .{ .Static = std.math.maxInt(u8) });
|
||||
var fifo = Fifo.init();
|
||||
try fifo.writeItem(self.root.?);
|
||||
|
||||
while (fifo.readItem()) |next| {
|
||||
for (next.edges.items) |*edge| {
|
||||
try fifo.writeItem(edge.to);
|
||||
}
|
||||
self.ordered_nodes.appendAssumeCapacity(next);
|
||||
}
|
||||
|
||||
var offset: usize = 0;
|
||||
var more: bool = true;
|
||||
while (more) {
|
||||
offset = 0;
|
||||
self.size = 0;
|
||||
more = false;
|
||||
for (ordered_nodes.items) |node| {
|
||||
const res = node.updateOffset(offset);
|
||||
offset += res.node_size;
|
||||
for (self.ordered_nodes.items) |node| {
|
||||
const res = node.finalize(self.size);
|
||||
self.size += res.node_size;
|
||||
if (res.updated) more = true;
|
||||
}
|
||||
}
|
||||
|
||||
try buffer.ensureCapacity(alloc, buffer.items.len + offset);
|
||||
for (ordered_nodes.items) |node| {
|
||||
try node.writeULEB128Mem(buffer);
|
||||
}
|
||||
self.trie_dirty = false;
|
||||
}
|
||||
|
||||
/// Walks the trie in DFS order gathering all nodes into a linear stream of nodes.
|
||||
fn walkInOrder(node: *Node, list: *std.ArrayListUnmanaged(*Node)) void {
|
||||
list.appendAssumeCapacity(node);
|
||||
for (node.edges.items) |*edge| {
|
||||
walkInOrder(edge.to, list);
|
||||
}
|
||||
const ReadError = error{
|
||||
OutOfMemory,
|
||||
EndOfStream,
|
||||
Overflow,
|
||||
};
|
||||
|
||||
/// Parse the trie from a byte stream.
|
||||
pub fn read(self: *Trie, reader: anytype) ReadError!usize {
|
||||
try self.createRoot();
|
||||
return self.root.?.read(self.allocator, reader);
|
||||
}
|
||||
|
||||
pub fn deinit(self: *Trie, alloc: *Allocator) void {
|
||||
self.root.deinit(alloc);
|
||||
/// Write the trie to a byte stream.
|
||||
/// Caller owns the memory and needs to free it.
|
||||
/// Panics if the trie was not finalized using `finalize`
|
||||
/// before calling this method.
|
||||
pub fn write(self: Trie, writer: anytype) !usize {
|
||||
assert(!self.trie_dirty);
|
||||
var counting_writer = std.io.countingWriter(writer);
|
||||
for (self.ordered_nodes.items) |node| {
|
||||
try node.write(counting_writer.writer());
|
||||
}
|
||||
return counting_writer.bytes_written;
|
||||
}
|
||||
|
||||
pub fn deinit(self: *Trie) void {
|
||||
if (self.root) |root| {
|
||||
root.deinit(self.allocator);
|
||||
self.allocator.destroy(root);
|
||||
}
|
||||
self.ordered_nodes.deinit(self.allocator);
|
||||
}
|
||||
|
||||
fn createRoot(self: *Trie) !void {
|
||||
if (self.root == null) {
|
||||
const root = try self.allocator.create(Node);
|
||||
root.* = .{ .base = self };
|
||||
self.root = root;
|
||||
self.node_count += 1;
|
||||
}
|
||||
}
|
||||
|
||||
test "Trie node count" {
|
||||
var gpa = testing.allocator;
|
||||
var trie: Trie = .{};
|
||||
defer trie.deinit(gpa);
|
||||
var trie = Trie.init(gpa);
|
||||
defer trie.deinit();
|
||||
|
||||
testing.expectEqual(trie.node_count, 1);
|
||||
testing.expectEqual(trie.node_count, 0);
|
||||
testing.expect(trie.root == null);
|
||||
|
||||
try trie.put(gpa, .{
|
||||
try trie.put(.{
|
||||
.name = "_main",
|
||||
.vmaddr_offset = 0,
|
||||
.export_flags = 0,
|
||||
@ -286,14 +425,14 @@ test "Trie node count" {
|
||||
testing.expectEqual(trie.node_count, 2);
|
||||
|
||||
// Inserting the same node shouldn't update the trie.
|
||||
try trie.put(gpa, .{
|
||||
try trie.put(.{
|
||||
.name = "_main",
|
||||
.vmaddr_offset = 0,
|
||||
.export_flags = 0,
|
||||
});
|
||||
testing.expectEqual(trie.node_count, 2);
|
||||
|
||||
try trie.put(gpa, .{
|
||||
try trie.put(.{
|
||||
.name = "__mh_execute_header",
|
||||
.vmaddr_offset = 0x1000,
|
||||
.export_flags = 0,
|
||||
@ -301,13 +440,13 @@ test "Trie node count" {
|
||||
testing.expectEqual(trie.node_count, 4);
|
||||
|
||||
// Inserting the same node shouldn't update the trie.
|
||||
try trie.put(gpa, .{
|
||||
try trie.put(.{
|
||||
.name = "__mh_execute_header",
|
||||
.vmaddr_offset = 0x1000,
|
||||
.export_flags = 0,
|
||||
});
|
||||
testing.expectEqual(trie.node_count, 4);
|
||||
try trie.put(gpa, .{
|
||||
try trie.put(.{
|
||||
.name = "_main",
|
||||
.vmaddr_offset = 0,
|
||||
.export_flags = 0,
|
||||
@ -317,31 +456,28 @@ test "Trie node count" {
|
||||
|
||||
test "Trie basic" {
|
||||
var gpa = testing.allocator;
|
||||
var trie: Trie = .{};
|
||||
defer trie.deinit(gpa);
|
||||
|
||||
// root
|
||||
testing.expect(trie.root.edges.items.len == 0);
|
||||
var trie = Trie.init(gpa);
|
||||
defer trie.deinit();
|
||||
|
||||
// root --- _st ---> node
|
||||
try trie.put(gpa, .{
|
||||
try trie.put(.{
|
||||
.name = "_st",
|
||||
.vmaddr_offset = 0,
|
||||
.export_flags = 0,
|
||||
});
|
||||
testing.expect(trie.root.edges.items.len == 1);
|
||||
testing.expect(mem.eql(u8, trie.root.edges.items[0].label, "_st"));
|
||||
testing.expect(trie.root.?.edges.items.len == 1);
|
||||
testing.expect(mem.eql(u8, trie.root.?.edges.items[0].label, "_st"));
|
||||
|
||||
{
|
||||
// root --- _st ---> node --- art ---> node
|
||||
try trie.put(gpa, .{
|
||||
try trie.put(.{
|
||||
.name = "_start",
|
||||
.vmaddr_offset = 0,
|
||||
.export_flags = 0,
|
||||
});
|
||||
testing.expect(trie.root.edges.items.len == 1);
|
||||
testing.expect(trie.root.?.edges.items.len == 1);
|
||||
|
||||
const nextEdge = &trie.root.edges.items[0];
|
||||
const nextEdge = &trie.root.?.edges.items[0];
|
||||
testing.expect(mem.eql(u8, nextEdge.label, "_st"));
|
||||
testing.expect(nextEdge.to.edges.items.len == 1);
|
||||
testing.expect(mem.eql(u8, nextEdge.to.edges.items[0].label, "art"));
|
||||
@ -350,14 +486,14 @@ test "Trie basic" {
|
||||
// root --- _ ---> node --- st ---> node --- art ---> node
|
||||
// |
|
||||
// | --- main ---> node
|
||||
try trie.put(gpa, .{
|
||||
try trie.put(.{
|
||||
.name = "_main",
|
||||
.vmaddr_offset = 0,
|
||||
.export_flags = 0,
|
||||
});
|
||||
testing.expect(trie.root.edges.items.len == 1);
|
||||
testing.expect(trie.root.?.edges.items.len == 1);
|
||||
|
||||
const nextEdge = &trie.root.edges.items[0];
|
||||
const nextEdge = &trie.root.?.edges.items[0];
|
||||
testing.expect(mem.eql(u8, nextEdge.label, "_"));
|
||||
testing.expect(nextEdge.to.edges.items.len == 2);
|
||||
testing.expect(mem.eql(u8, nextEdge.to.edges.items[0].label, "st"));
|
||||
@ -368,72 +504,81 @@ test "Trie basic" {
|
||||
}
|
||||
}
|
||||
|
||||
test "Trie.writeULEB128Mem" {
|
||||
test "write Trie to a byte stream" {
|
||||
var gpa = testing.allocator;
|
||||
var trie: Trie = .{};
|
||||
defer trie.deinit(gpa);
|
||||
var trie = Trie.init(gpa);
|
||||
defer trie.deinit();
|
||||
|
||||
try trie.put(gpa, .{
|
||||
try trie.put(.{
|
||||
.name = "__mh_execute_header",
|
||||
.vmaddr_offset = 0,
|
||||
.export_flags = 0,
|
||||
});
|
||||
try trie.put(gpa, .{
|
||||
try trie.put(.{
|
||||
.name = "_main",
|
||||
.vmaddr_offset = 0x1000,
|
||||
.export_flags = 0,
|
||||
});
|
||||
|
||||
var buffer: std.ArrayListUnmanaged(u8) = .{};
|
||||
defer buffer.deinit(gpa);
|
||||
|
||||
try trie.writeULEB128Mem(gpa, &buffer);
|
||||
try trie.finalize();
|
||||
try trie.finalize(); // Finalizing mulitple times is a nop subsequently unless we add new nodes.
|
||||
|
||||
const exp_buffer = [_]u8{
|
||||
0x0,
|
||||
0x1,
|
||||
0x5f,
|
||||
0x0,
|
||||
0x5,
|
||||
0x0,
|
||||
0x2,
|
||||
0x5f,
|
||||
0x6d,
|
||||
0x68,
|
||||
0x5f,
|
||||
0x65,
|
||||
0x78,
|
||||
0x65,
|
||||
0x63,
|
||||
0x75,
|
||||
0x74,
|
||||
0x65,
|
||||
0x5f,
|
||||
0x68,
|
||||
0x65,
|
||||
0x61,
|
||||
0x64,
|
||||
0x65,
|
||||
0x72,
|
||||
0x0,
|
||||
0x21,
|
||||
0x6d,
|
||||
0x61,
|
||||
0x69,
|
||||
0x6e,
|
||||
0x0,
|
||||
0x25,
|
||||
0x2,
|
||||
0x0,
|
||||
0x0,
|
||||
0x0,
|
||||
0x3,
|
||||
0x0,
|
||||
0x80,
|
||||
0x20,
|
||||
0x0,
|
||||
0x0, 0x1, // node root
|
||||
0x5f, 0x0, 0x5, // edge '_'
|
||||
0x0, 0x2, // non-terminal node
|
||||
0x5f, 0x6d, 0x68, 0x5f, 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, // edge '_mh_execute_header'
|
||||
0x65, 0x5f, 0x68, 0x65, 0x61, 0x64, 0x65, 0x72, 0x0, 0x21, // edge '_mh_execute_header'
|
||||
0x6d, 0x61, 0x69, 0x6e, 0x0, 0x25, // edge 'main'
|
||||
0x2, 0x0, 0x0, 0x0, // terminal node
|
||||
0x3, 0x0, 0x80, 0x20, 0x0, // terminal node
|
||||
};
|
||||
|
||||
testing.expect(buffer.items.len == exp_buffer.len);
|
||||
testing.expect(mem.eql(u8, buffer.items, exp_buffer[0..]));
|
||||
var buffer = try gpa.alloc(u8, trie.size);
|
||||
defer gpa.free(buffer);
|
||||
var stream = std.io.fixedBufferStream(buffer);
|
||||
{
|
||||
const nwritten = try trie.write(stream.writer());
|
||||
testing.expect(nwritten == trie.size);
|
||||
testing.expect(mem.eql(u8, buffer, exp_buffer[0..]));
|
||||
}
|
||||
{
|
||||
// Writing finalized trie again should yield the same result.
|
||||
try stream.seekTo(0);
|
||||
const nwritten = try trie.write(stream.writer());
|
||||
testing.expect(nwritten == trie.size);
|
||||
testing.expect(mem.eql(u8, buffer, exp_buffer[0..]));
|
||||
}
|
||||
}
|
||||
|
||||
test "parse Trie from byte stream" {
|
||||
var gpa = testing.allocator;
|
||||
|
||||
const in_buffer = [_]u8{
|
||||
0x0, 0x1, // node root
|
||||
0x5f, 0x0, 0x5, // edge '_'
|
||||
0x0, 0x2, // non-terminal node
|
||||
0x5f, 0x6d, 0x68, 0x5f, 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, // edge '_mh_execute_header'
|
||||
0x65, 0x5f, 0x68, 0x65, 0x61, 0x64, 0x65, 0x72, 0x0, 0x21, // edge '_mh_execute_header'
|
||||
0x6d, 0x61, 0x69, 0x6e, 0x0, 0x25, // edge 'main'
|
||||
0x2, 0x0, 0x0, 0x0, // terminal node
|
||||
0x3, 0x0, 0x80, 0x20, 0x0, // terminal node
|
||||
};
|
||||
|
||||
var in_stream = std.io.fixedBufferStream(in_buffer[0..]);
|
||||
var trie = Trie.init(gpa);
|
||||
defer trie.deinit();
|
||||
const nread = try trie.read(in_stream.reader());
|
||||
|
||||
testing.expect(nread == in_buffer.len);
|
||||
|
||||
try trie.finalize();
|
||||
|
||||
var out_buffer = try gpa.alloc(u8, trie.size);
|
||||
defer gpa.free(out_buffer);
|
||||
var out_stream = std.io.fixedBufferStream(out_buffer);
|
||||
const nwritten = try trie.write(out_stream.writer());
|
||||
|
||||
testing.expect(nwritten == trie.size);
|
||||
testing.expect(mem.eql(u8, in_buffer[0..], out_buffer));
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user