mirror of
https://github.com/ziglang/zig.git
synced 2025-12-06 06:13:07 +00:00
432 lines
16 KiB
Zig
432 lines
16 KiB
Zig
//! Represents export trie used in MachO executables and dynamic libraries.
|
|
//! The purpose of an export trie is to encode as compactly as possible all
|
|
//! export symbols for the loader `dyld`.
|
|
//! The export trie encodes offset and other information using ULEB128
|
|
//! encoding, and is part of the __LINKEDIT segment.
|
|
//!
|
|
//! Description from loader.h:
|
|
//!
|
|
//! The symbols exported by a dylib are encoded in a trie. This is a compact
|
|
//! representation that factors out common prefixes. It also reduces LINKEDIT pages
|
|
//! in RAM because it encodes all information (name, address, flags) in one small,
|
|
//! contiguous range. The export area is a stream of nodes. The first node sequentially
|
|
//! is the start node for the trie.
|
|
//!
|
|
//! Nodes for a symbol start with a uleb128 that is the length of the exported symbol
|
|
//! information for the string so far. If there is no exported symbol, the node starts
|
|
//! with a zero byte. If there is exported info, it follows the length.
|
|
//!
|
|
//! First is a uleb128 containing flags. Normally, it is followed by a uleb128 encoded
|
|
//! offset which is location of the content named by the symbol from the mach_header
|
|
//! for the image. If the flags is EXPORT_SYMBOL_FLAGS_REEXPORT, then following the flags
|
|
//! is a uleb128 encoded library ordinal, then a zero terminated UTF8 string. If the string
|
|
//! is zero length, then the symbol is re-export from the specified dylib with the same name.
|
|
//! If the flags is EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER, then following the flags is two
|
|
//! uleb128s: the stub offset and the resolver offset. The stub is used by non-lazy pointers.
|
|
//! The resolver is used by lazy pointers and must be called to get the actual address to use.
|
|
//!
|
|
//! After the optional exported symbol information is a byte of how many edges (0-255) that
|
|
//! this node has leaving it, followed by each edge. Each edge is a zero terminated UTF8 of
|
|
//! the addition chars in the symbol, followed by a uleb128 offset for the node that edge points to.
|
|
|
|
/// The root node of the trie.
|
|
root: ?Node.Index = null,
|
|
buffer: std.ArrayList(u8) = .empty,
|
|
nodes: std.MultiArrayList(Node) = .{},
|
|
edges: std.ArrayList(Edge) = .empty,
|
|
|
|
/// Insert a symbol into the trie, updating the prefixes in the process.
|
|
/// This operation may change the layout of the trie by splicing edges in
|
|
/// certain circumstances.
|
|
fn put(self: *Trie, allocator: Allocator, symbol: ExportSymbol) !void {
|
|
// const tracy = trace(@src());
|
|
// defer tracy.end();
|
|
|
|
const node_index = try self.putNode(self.root.?, allocator, symbol.name);
|
|
const slice = self.nodes.slice();
|
|
slice.items(.is_terminal)[node_index] = true;
|
|
slice.items(.vmaddr_offset)[node_index] = symbol.vmaddr_offset;
|
|
slice.items(.export_flags)[node_index] = symbol.export_flags;
|
|
}
|
|
|
|
/// Inserts a new node starting at `node_index`.
|
|
fn putNode(self: *Trie, node_index: Node.Index, allocator: Allocator, label: []const u8) !Node.Index {
|
|
// Check for match with edges from this node.
|
|
for (self.nodes.items(.edges)[node_index].items) |edge_index| {
|
|
const edge = &self.edges.items[edge_index];
|
|
const match = mem.indexOfDiff(u8, edge.label, label) orelse return edge.node;
|
|
if (match == 0) continue;
|
|
if (match == edge.label.len) return self.putNode(edge.node, allocator, label[match..]);
|
|
|
|
// Found a match, need to splice up nodes.
|
|
// From: A -> B
|
|
// To: A -> C -> B
|
|
const mid_index = try self.addNode(allocator);
|
|
const to_label = edge.label[match..];
|
|
const to_node = edge.node;
|
|
edge.node = mid_index;
|
|
edge.label = label[0..match];
|
|
|
|
const new_edge_index = try self.addEdge(allocator);
|
|
const new_edge = &self.edges.items[new_edge_index];
|
|
new_edge.node = to_node;
|
|
new_edge.label = to_label;
|
|
try self.nodes.items(.edges)[mid_index].append(allocator, new_edge_index);
|
|
|
|
return if (match == label.len) mid_index else self.putNode(mid_index, allocator, label[match..]);
|
|
}
|
|
|
|
// Add a new node.
|
|
const new_node_index = try self.addNode(allocator);
|
|
const new_edge_index = try self.addEdge(allocator);
|
|
const new_edge = &self.edges.items[new_edge_index];
|
|
new_edge.node = new_node_index;
|
|
new_edge.label = label;
|
|
try self.nodes.items(.edges)[node_index].append(allocator, new_edge_index);
|
|
|
|
return new_node_index;
|
|
}
|
|
|
|
pub fn updateSize(self: *Trie, macho_file: *MachO) !void {
|
|
const tracy = trace(@src());
|
|
defer tracy.end();
|
|
|
|
const gpa = macho_file.base.comp.gpa;
|
|
|
|
try self.init(gpa);
|
|
try self.nodes.ensureUnusedCapacity(gpa, macho_file.resolver.values.items.len * 2);
|
|
try self.edges.ensureUnusedCapacity(gpa, macho_file.resolver.values.items.len * 2);
|
|
|
|
const seg = macho_file.getTextSegment();
|
|
for (macho_file.resolver.values.items) |ref| {
|
|
if (ref.getFile(macho_file) == null) continue;
|
|
const sym = ref.getSymbol(macho_file).?;
|
|
if (!sym.flags.@"export") continue;
|
|
if (sym.getAtom(macho_file)) |atom| if (!atom.isAlive()) continue;
|
|
var flags: u64 = if (sym.flags.abs)
|
|
macho.EXPORT_SYMBOL_FLAGS_KIND_ABSOLUTE
|
|
else if (sym.flags.tlv)
|
|
macho.EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL
|
|
else
|
|
macho.EXPORT_SYMBOL_FLAGS_KIND_REGULAR;
|
|
if (sym.flags.weak) {
|
|
flags |= macho.EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION;
|
|
macho_file.weak_defines.store(true, .seq_cst);
|
|
macho_file.binds_to_weak.store(true, .seq_cst);
|
|
}
|
|
try self.put(gpa, .{
|
|
.name = sym.getName(macho_file),
|
|
.vmaddr_offset = sym.getAddress(.{ .stubs = false }, macho_file) - seg.vmaddr,
|
|
.export_flags = flags,
|
|
});
|
|
}
|
|
|
|
try self.finalize(gpa);
|
|
|
|
macho_file.dyld_info_cmd.export_size = mem.alignForward(u32, @intCast(self.buffer.items.len), @alignOf(u64));
|
|
}
|
|
|
|
/// Finalizes this trie for writing to a byte stream.
|
|
/// This step performs multiple passes through the trie ensuring
|
|
/// there are no gaps after every `Node` is ULEB128 encoded.
|
|
/// Call this method before trying to `write` the trie to a byte stream.
|
|
fn finalize(self: *Trie, allocator: Allocator) !void {
|
|
const tracy = trace(@src());
|
|
defer tracy.end();
|
|
|
|
var ordered_nodes = std.array_list.Managed(Node.Index).init(allocator);
|
|
defer ordered_nodes.deinit();
|
|
try ordered_nodes.ensureTotalCapacityPrecise(self.nodes.items(.is_terminal).len);
|
|
|
|
{
|
|
var fifo: std.ArrayList(Node.Index) = .empty;
|
|
defer fifo.deinit(allocator);
|
|
|
|
try fifo.append(allocator, self.root.?);
|
|
|
|
var i: usize = 0;
|
|
while (i < fifo.items.len) {
|
|
const next_index = fifo.items[i];
|
|
i += 1;
|
|
const edges = &self.nodes.items(.edges)[next_index];
|
|
for (edges.items) |edge_index| {
|
|
const edge = self.edges.items[edge_index];
|
|
try fifo.append(allocator, edge.node);
|
|
}
|
|
ordered_nodes.appendAssumeCapacity(next_index);
|
|
}
|
|
}
|
|
|
|
var more: bool = true;
|
|
var size: u32 = 0;
|
|
while (more) {
|
|
size = 0;
|
|
more = false;
|
|
for (ordered_nodes.items) |node_index| {
|
|
const res = try self.finalizeNode(node_index, size);
|
|
size += res.node_size;
|
|
if (res.updated) more = true;
|
|
}
|
|
}
|
|
|
|
try self.buffer.ensureTotalCapacityPrecise(allocator, size);
|
|
|
|
var allocating: std.Io.Writer.Allocating = .fromArrayList(allocator, &self.buffer);
|
|
defer self.buffer = allocating.toArrayList();
|
|
const writer = &allocating.writer;
|
|
|
|
for (ordered_nodes.items) |node_index| {
|
|
try self.writeNode(node_index, writer);
|
|
}
|
|
}
|
|
|
|
const FinalizeNodeResult = struct {
|
|
/// Current size of this node in bytes.
|
|
node_size: u32,
|
|
|
|
/// True if the trie offset of this node in the output byte stream
|
|
/// would need updating; false otherwise.
|
|
updated: bool,
|
|
};
|
|
|
|
/// Updates offset of this node in the output byte stream.
|
|
fn finalizeNode(self: *Trie, node_index: Node.Index, offset_in_trie: u32) !FinalizeNodeResult {
|
|
var trash_buffer: [64]u8 = undefined;
|
|
var stream: std.Io.Writer.Discarding = .init(&trash_buffer);
|
|
const writer = &stream.writer;
|
|
const slice = self.nodes.slice();
|
|
|
|
var node_size: u32 = 0;
|
|
if (slice.items(.is_terminal)[node_index]) {
|
|
const export_flags = slice.items(.export_flags)[node_index];
|
|
const vmaddr_offset = slice.items(.vmaddr_offset)[node_index];
|
|
try writer.writeUleb128(export_flags);
|
|
try writer.writeUleb128(vmaddr_offset);
|
|
try writer.writeUleb128(stream.fullCount());
|
|
} else {
|
|
node_size += 1; // 0x0 for non-terminal nodes
|
|
}
|
|
node_size += 1; // 1 byte for edge count
|
|
|
|
for (slice.items(.edges)[node_index].items) |edge_index| {
|
|
const edge = &self.edges.items[edge_index];
|
|
const next_node_offset = slice.items(.trie_offset)[edge.node];
|
|
node_size += @intCast(edge.label.len + 1);
|
|
try writer.writeUleb128(next_node_offset);
|
|
}
|
|
|
|
const trie_offset = slice.items(.trie_offset)[node_index];
|
|
const updated = offset_in_trie != trie_offset;
|
|
slice.items(.trie_offset)[node_index] = offset_in_trie;
|
|
node_size += @intCast(stream.fullCount());
|
|
|
|
return .{ .node_size = node_size, .updated = updated };
|
|
}
|
|
|
|
fn init(self: *Trie, allocator: Allocator) !void {
|
|
assert(self.root == null);
|
|
self.root = try self.addNode(allocator);
|
|
}
|
|
|
|
pub fn deinit(self: *Trie, allocator: Allocator) void {
|
|
for (self.nodes.items(.edges)) |*edges| {
|
|
edges.deinit(allocator);
|
|
}
|
|
self.nodes.deinit(allocator);
|
|
self.edges.deinit(allocator);
|
|
self.buffer.deinit(allocator);
|
|
}
|
|
|
|
pub fn write(self: Trie, writer: *std.Io.Writer) !void {
|
|
if (self.buffer.items.len == 0) return;
|
|
try writer.writeAll(self.buffer.items);
|
|
}
|
|
|
|
/// Writes this node to a byte stream.
|
|
/// The children of this node *are* not written to the byte stream
|
|
/// recursively. To write all nodes to a byte stream in sequence,
|
|
/// iterate over `Trie.ordered_nodes` and call this method on each node.
|
|
/// This is one of the requirements of the MachO.
|
|
/// Panics if `finalize` was not called before calling this method.
|
|
fn writeNode(self: *Trie, node_index: Node.Index, writer: *std.Io.Writer) !void {
|
|
const slice = self.nodes.slice();
|
|
const edges = slice.items(.edges)[node_index];
|
|
const is_terminal = slice.items(.is_terminal)[node_index];
|
|
const export_flags = slice.items(.export_flags)[node_index];
|
|
const vmaddr_offset = slice.items(.vmaddr_offset)[node_index];
|
|
|
|
if (is_terminal) {
|
|
// Terminal node info: encode export flags and vmaddr offset of this symbol.
|
|
var info_buf: [@sizeOf(u64) * 2]u8 = undefined;
|
|
var info_stream: std.Io.Writer = .fixed(&info_buf);
|
|
// TODO Implement for special flags.
|
|
assert(export_flags & macho.EXPORT_SYMBOL_FLAGS_REEXPORT == 0 and
|
|
export_flags & macho.EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER == 0);
|
|
try info_stream.writeUleb128(export_flags);
|
|
try info_stream.writeUleb128(vmaddr_offset);
|
|
|
|
// Encode the size of the terminal node info.
|
|
var size_buf: [@sizeOf(u64)]u8 = undefined;
|
|
var size_stream: std.Io.Writer = .fixed(&size_buf);
|
|
try size_stream.writeUleb128(info_stream.end);
|
|
|
|
// Now, write them to the output stream.
|
|
try writer.writeAll(size_buf[0..size_stream.end]);
|
|
try writer.writeAll(info_buf[0..info_stream.end]);
|
|
} else {
|
|
// Non-terminal node is delimited by 0 byte.
|
|
try writer.writeByte(0);
|
|
}
|
|
// Write number of edges (max legal number of edges is 256).
|
|
try writer.writeByte(@as(u8, @intCast(edges.items.len)));
|
|
|
|
for (edges.items) |edge_index| {
|
|
const edge = self.edges.items[edge_index];
|
|
// Write edge label and offset to next node in trie.
|
|
try writer.writeAll(edge.label);
|
|
try writer.writeByte(0);
|
|
try writer.writeUleb128(slice.items(.trie_offset)[edge.node]);
|
|
}
|
|
}
|
|
|
|
fn addNode(self: *Trie, allocator: Allocator) !Node.Index {
|
|
const index: Node.Index = @intCast(try self.nodes.addOne(allocator));
|
|
self.nodes.set(index, .{});
|
|
return index;
|
|
}
|
|
|
|
fn addEdge(self: *Trie, allocator: Allocator) !Edge.Index {
|
|
const index: Edge.Index = @intCast(self.edges.items.len);
|
|
const edge = try self.edges.addOne(allocator);
|
|
edge.* = .{};
|
|
return index;
|
|
}
|
|
|
|
/// Export symbol that is to be placed in the trie.
|
|
pub const ExportSymbol = struct {
|
|
/// Name of the symbol.
|
|
name: []const u8,
|
|
|
|
/// Offset of this symbol's virtual memory address from the beginning
|
|
/// of the __TEXT segment.
|
|
vmaddr_offset: u64,
|
|
|
|
/// Export flags of this exported symbol.
|
|
export_flags: u64,
|
|
};
|
|
|
|
const Node = struct {
|
|
is_terminal: bool = false,
|
|
|
|
/// Export flags associated with this exported symbol.
|
|
export_flags: u64 = 0,
|
|
|
|
/// VM address offset wrt to the section this symbol is defined against.
|
|
vmaddr_offset: u64 = 0,
|
|
|
|
/// Offset of this node in the trie output byte stream.
|
|
trie_offset: u32 = 0,
|
|
|
|
/// List of all edges originating from this node.
|
|
edges: std.ArrayList(Edge.Index) = .empty,
|
|
|
|
const Index = u32;
|
|
};
|
|
|
|
/// Edge connecting nodes in the trie.
|
|
const Edge = struct {
|
|
/// Target node in the trie.
|
|
node: Node.Index = 0,
|
|
|
|
/// Matching prefix.
|
|
label: []const u8 = "",
|
|
|
|
const Index = u32;
|
|
};
|
|
|
|
fn expectEqualHexStrings(expected: []const u8, given: []const u8) !void {
|
|
assert(expected.len > 0);
|
|
if (mem.eql(u8, expected, given)) return;
|
|
const expected_fmt = try std.fmt.allocPrint(testing.allocator, "{x}", .{expected});
|
|
defer testing.allocator.free(expected_fmt);
|
|
const given_fmt = try std.fmt.allocPrint(testing.allocator, "{x}", .{given});
|
|
defer testing.allocator.free(given_fmt);
|
|
const idx = mem.indexOfDiff(u8, expected_fmt, given_fmt).?;
|
|
const padding = try testing.allocator.alloc(u8, idx + 5);
|
|
defer testing.allocator.free(padding);
|
|
@memset(padding, ' ');
|
|
std.debug.print("\nEXP: {s}\nGIV: {s}\n{s}^ -- first differing byte\n", .{ expected_fmt, given_fmt, padding });
|
|
return error.TestFailed;
|
|
}
|
|
|
|
test "write Trie to a byte stream" {
|
|
const gpa = testing.allocator;
|
|
var trie: Trie = .{};
|
|
defer trie.deinit(gpa);
|
|
try trie.init(gpa);
|
|
|
|
try trie.put(gpa, .{
|
|
.name = "__mh_execute_header",
|
|
.vmaddr_offset = 0,
|
|
.export_flags = 0,
|
|
});
|
|
try trie.put(gpa, .{
|
|
.name = "_main",
|
|
.vmaddr_offset = 0x1000,
|
|
.export_flags = 0,
|
|
});
|
|
|
|
try trie.finalize(gpa);
|
|
|
|
const exp_buffer = [_]u8{
|
|
0x0, 0x1, // node root
|
|
0x5f, 0x0, 0x5, // edge '_'
|
|
0x0, 0x2, // non-terminal node
|
|
0x5f, 0x6d, 0x68, 0x5f, 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, // edge '_mh_execute_header'
|
|
0x65, 0x5f, 0x68, 0x65, 0x61, 0x64, 0x65, 0x72, 0x0, 0x21, // edge '_mh_execute_header'
|
|
0x6d, 0x61, 0x69, 0x6e, 0x0, 0x25, // edge 'main'
|
|
0x2, 0x0, 0x0, 0x0, // terminal node
|
|
0x3, 0x0, 0x80, 0x20, 0x0, // terminal node
|
|
};
|
|
try expectEqualHexStrings(&exp_buffer, trie.buffer.items);
|
|
}
|
|
|
|
test "ordering bug" {
|
|
const gpa = testing.allocator;
|
|
var trie: Trie = .{};
|
|
defer trie.deinit(gpa);
|
|
try trie.init(gpa);
|
|
|
|
try trie.put(gpa, .{
|
|
.name = "_asStr",
|
|
.vmaddr_offset = 0x558,
|
|
.export_flags = 0,
|
|
});
|
|
try trie.put(gpa, .{
|
|
.name = "_a",
|
|
.vmaddr_offset = 0x8008,
|
|
.export_flags = 0,
|
|
});
|
|
|
|
try trie.finalize(gpa);
|
|
|
|
const exp_buffer = [_]u8{
|
|
0x00, 0x01, 0x5F, 0x61, 0x00, 0x06, 0x04, 0x00,
|
|
0x88, 0x80, 0x02, 0x01, 0x73, 0x53, 0x74, 0x72,
|
|
0x00, 0x12, 0x03, 0x00, 0xD8, 0x0A, 0x00,
|
|
};
|
|
try expectEqualHexStrings(&exp_buffer, trie.buffer.items);
|
|
}
|
|
|
|
const assert = std.debug.assert;
|
|
const log = std.log.scoped(.macho);
|
|
const macho = std.macho;
|
|
const mem = std.mem;
|
|
const std = @import("std");
|
|
const testing = std.testing;
|
|
const trace = @import("../../../tracy.zig").trace;
|
|
|
|
const Allocator = mem.Allocator;
|
|
const MachO = @import("../../MachO.zig");
|
|
const Trie = @This();
|