mirror of
https://github.com/ziglang/zig.git
synced 2026-02-12 20:37:54 +00:00
Move trie structure into its own file-module
Signed-off-by: Jakub Konka <kubkon@jakubkonka.com>
This commit is contained in:
parent
b13b36a71d
commit
bdab4f53c1
@ -20,6 +20,8 @@ const File = link.File;
|
||||
const Cache = @import("../Cache.zig");
|
||||
const target_util = @import("../target.zig");
|
||||
|
||||
const Trie = @import("MachO/Trie.zig");
|
||||
|
||||
pub const base_tag: File.Tag = File.Tag.macho;
|
||||
|
||||
const LoadCommand = union(enum) {
|
||||
@ -64,156 +66,6 @@ const LoadCommand = union(enum) {
|
||||
}
|
||||
};
|
||||
|
||||
/// Represents export trie used in MachO executables and dynamic libraries.
|
||||
/// The purpose of an export trie is to encode as compactly as possible all
|
||||
/// export symbols for the loader `dyld`.
|
||||
/// The export trie encodes offset and other information using ULEB128
|
||||
/// encoding, and is part of the __LINKEDIT segment.
|
||||
const Trie = struct {
|
||||
const Node = struct {
|
||||
const Edge = struct {
|
||||
from: *Node,
|
||||
to: *Node,
|
||||
label: []const u8,
|
||||
|
||||
pub fn deinit(self: *Edge, alloc: *Allocator) void {
|
||||
self.to.deinit(alloc);
|
||||
alloc.destroy(self.to);
|
||||
self.from = undefined;
|
||||
self.to = undefined;
|
||||
}
|
||||
};
|
||||
|
||||
export_flags: ?u64 = null,
|
||||
offset: ?u64 = null,
|
||||
edges: std.ArrayListUnmanaged(Edge) = .{},
|
||||
|
||||
pub fn deinit(self: *Node, alloc: *Allocator) void {
|
||||
for (self.edges.items) |*edge| {
|
||||
edge.deinit(alloc);
|
||||
}
|
||||
self.edges.deinit(alloc);
|
||||
}
|
||||
|
||||
pub fn put(self: *Node, alloc: *Allocator, fromEdge: ?*Edge, prefix: usize, label: []const u8) !*Node {
|
||||
// Traverse all edges.
|
||||
for (self.edges.items) |*edge| {
|
||||
const match = mem.indexOfDiff(u8, edge.label, label) orelse return self; // Got a full match, don't do anything.
|
||||
if (match - prefix > 0) {
|
||||
// If we match, we advance further down the trie.
|
||||
return edge.to.put(alloc, edge, match, label);
|
||||
}
|
||||
}
|
||||
|
||||
if (fromEdge) |from| {
|
||||
if (mem.eql(u8, from.label, label[0..prefix])) {
|
||||
if (prefix == label.len) return self;
|
||||
} else {
|
||||
// Fixup nodes. We need to insert an intermediate node between
|
||||
// from.to and self.
|
||||
const mid = try alloc.create(Node);
|
||||
mid.* = .{};
|
||||
const to_label = from.label;
|
||||
from.to = mid;
|
||||
from.label = label[0..prefix];
|
||||
|
||||
try mid.edges.append(alloc, .{
|
||||
.from = mid,
|
||||
.to = self,
|
||||
.label = to_label,
|
||||
});
|
||||
|
||||
if (prefix == label.len) return self; // We're done.
|
||||
|
||||
const new_node = try alloc.create(Node);
|
||||
new_node.* = .{};
|
||||
|
||||
try mid.edges.append(alloc, .{
|
||||
.from = mid,
|
||||
.to = new_node,
|
||||
.label = label,
|
||||
});
|
||||
|
||||
return new_node;
|
||||
}
|
||||
}
|
||||
|
||||
// Add a new edge.
|
||||
const node = try alloc.create(Node);
|
||||
node.* = .{};
|
||||
|
||||
try self.edges.append(alloc, .{
|
||||
.from = self,
|
||||
.to = node,
|
||||
.label = label,
|
||||
});
|
||||
|
||||
return node;
|
||||
}
|
||||
|
||||
pub fn write(self: Node, alloc: *Allocator, buffer: *std.ArrayListUnmanaged(u8)) Trie.WriteError!void {
|
||||
if (self.offset) |off| {
|
||||
// Terminal node info: encode export flags and vmaddr offset of this symbol.
|
||||
var info_buf_len: usize = 0;
|
||||
var info_buf: [@sizeOf(u64) * 2]u8 = undefined;
|
||||
info_buf_len += try std.debug.leb.writeULEB128Mem(info_buf[0..], self.export_flags.?);
|
||||
info_buf_len += try std.debug.leb.writeULEB128Mem(info_buf[info_buf_len..], off);
|
||||
|
||||
// Encode the size of the terminal node info.
|
||||
var size_buf: [@sizeOf(u64)]u8 = undefined;
|
||||
const size_buf_len = try std.debug.leb.writeULEB128Mem(size_buf[0..], info_buf_len);
|
||||
|
||||
// Now, write them to the output buffer.
|
||||
try buffer.ensureCapacity(alloc, buffer.items.len + info_buf_len + size_buf_len);
|
||||
buffer.appendSliceAssumeCapacity(size_buf[0..size_buf_len]);
|
||||
buffer.appendSliceAssumeCapacity(info_buf[0..info_buf_len]);
|
||||
} else {
|
||||
// Non-terminal node is delimited by 0 byte.
|
||||
try buffer.append(alloc, 0);
|
||||
}
|
||||
// Write number of edges (max legal number of edges is 256).
|
||||
try buffer.append(alloc, @intCast(u8, self.edges.items.len));
|
||||
|
||||
var node_offset_info: [@sizeOf(u8)]u64 = undefined;
|
||||
for (self.edges.items) |edge, i| {
|
||||
// Write edges labels leaving out space in-between to later populate
|
||||
// with offsets to each node.
|
||||
try buffer.ensureCapacity(alloc, buffer.items.len + edge.label.len + 1 + @sizeOf(u64)); // +1 to account for null-byte
|
||||
buffer.appendSliceAssumeCapacity(edge.label);
|
||||
buffer.appendAssumeCapacity(0);
|
||||
node_offset_info[i] = buffer.items.len;
|
||||
const padding = [_]u8{0} ** @sizeOf(u64);
|
||||
buffer.appendSliceAssumeCapacity(padding[0..]);
|
||||
}
|
||||
|
||||
for (self.edges.items) |edge, i| {
|
||||
const offset = buffer.items.len;
|
||||
try edge.to.write(alloc, buffer);
|
||||
// We can now populate the offset to the node pointed by this edge.
|
||||
var offset_buf: [@sizeOf(u64)]u8 = undefined;
|
||||
const offset_buf_len = try std.debug.leb.writeULEB128Mem(offset_buf[0..], offset);
|
||||
mem.copy(u8, buffer.items[node_offset_info[i]..], offset_buf[0..offset_buf_len]);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
root: Node,
|
||||
|
||||
pub fn put(self: *Trie, alloc: *Allocator, word: []const u8) !*Node {
|
||||
return self.root.put(alloc, null, 0, word);
|
||||
}
|
||||
|
||||
pub const WriteError = error{ OutOfMemory, NoSpaceLeft };
|
||||
|
||||
pub fn write(self: Trie, alloc: *Allocator, buffer: *std.ArrayListUnmanaged(u8)) WriteError!void {
|
||||
return self.root.write(alloc, buffer);
|
||||
}
|
||||
|
||||
pub fn deinit(self: *Trie, alloc: *Allocator) void {
|
||||
self.root.deinit(alloc);
|
||||
}
|
||||
};
|
||||
|
||||
base: File,
|
||||
|
||||
/// Table of all load commands
|
||||
@ -1541,19 +1393,21 @@ fn writeExportTrie(self: *MachO) !void {
|
||||
defer trie.deinit(self.base.allocator);
|
||||
|
||||
const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment;
|
||||
|
||||
for (self.global_symbols.items) |symbol| {
|
||||
// TODO figure out if we should put all global symbols into the export trie
|
||||
const name = self.getString(symbol.n_strx);
|
||||
const node = try trie.put(self.base.allocator, name);
|
||||
node.offset = symbol.n_value - text_segment.vmaddr;
|
||||
node.export_flags = 0; // TODO workout creation of export flags
|
||||
assert(symbol.n_value >= text_segment.vmaddr);
|
||||
try trie.put(self.base.allocator, .{
|
||||
.name = name,
|
||||
.offset = symbol.n_value - text_segment.vmaddr,
|
||||
.export_flags = 0, // TODO workout creation of export flags
|
||||
});
|
||||
}
|
||||
|
||||
var buffer: std.ArrayListUnmanaged(u8) = .{};
|
||||
defer buffer.deinit(self.base.allocator);
|
||||
|
||||
try trie.write(self.base.allocator, &buffer);
|
||||
try trie.writeULEB128Mem(self.base.allocator, &buffer);
|
||||
|
||||
const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfo;
|
||||
try self.base.file.?.pwriteAll(buffer.items, dyld_info.export_off);
|
||||
@ -1688,48 +1542,3 @@ fn satMul(a: anytype, b: anytype) @TypeOf(a, b) {
|
||||
const T = @TypeOf(a, b);
|
||||
return std.math.mul(T, a, b) catch std.math.maxInt(T);
|
||||
}
|
||||
|
||||
test "Trie basic" {
|
||||
const testing = @import("std").testing;
|
||||
var gpa = testing.allocator;
|
||||
|
||||
var trie: Trie = .{
|
||||
.root = .{},
|
||||
};
|
||||
defer trie.deinit(gpa);
|
||||
|
||||
// root
|
||||
testing.expect(trie.root.edges.items.len == 0);
|
||||
|
||||
// root --- _st ---> node
|
||||
try trie.put(gpa, "_st");
|
||||
testing.expect(trie.root.edges.items.len == 1);
|
||||
testing.expect(mem.eql(u8, trie.root.edges.items[0].label, "_st"));
|
||||
|
||||
{
|
||||
// root --- _st ---> node --- _start ---> node
|
||||
try trie.put(gpa, "_start");
|
||||
testing.expect(trie.root.edges.items.len == 1);
|
||||
|
||||
const nextEdge = &trie.root.edges.items[0];
|
||||
testing.expect(mem.eql(u8, nextEdge.label, "_st"));
|
||||
testing.expect(nextEdge.to.edges.items.len == 1);
|
||||
testing.expect(mem.eql(u8, nextEdge.to.edges.items[0].label, "_start"));
|
||||
}
|
||||
{
|
||||
// root --- _ ---> node --- _st ---> node --- _start ---> node
|
||||
// |
|
||||
// | --- _main ---> node
|
||||
try trie.put(gpa, "_main");
|
||||
testing.expect(trie.root.edges.items.len == 1);
|
||||
|
||||
const nextEdge = &trie.root.edges.items[0];
|
||||
testing.expect(mem.eql(u8, nextEdge.label, "_"));
|
||||
testing.expect(nextEdge.to.edges.items.len == 2);
|
||||
testing.expect(mem.eql(u8, nextEdge.to.edges.items[0].label, "_st"));
|
||||
testing.expect(mem.eql(u8, nextEdge.to.edges.items[1].label, "_main"));
|
||||
|
||||
const nextNextEdge = &nextEdge.to.edges.items[0];
|
||||
testing.expect(mem.eql(u8, nextNextEdge.to.edges.items[0].label, "_start"));
|
||||
}
|
||||
}
|
||||
|
||||
259
src/link/MachO/Trie.zig
Normal file
259
src/link/MachO/Trie.zig
Normal file
@ -0,0 +1,259 @@
|
||||
/// Represents export trie used in MachO executables and dynamic libraries.
|
||||
/// The purpose of an export trie is to encode as compactly as possible all
|
||||
/// export symbols for the loader `dyld`.
|
||||
/// The export trie encodes offset and other information using ULEB128
|
||||
/// encoding, and is part of the __LINKEDIT segment.
|
||||
///
|
||||
/// Description from loader.h:
|
||||
///
|
||||
/// The symbols exported by a dylib are encoded in a trie. This is a compact
|
||||
/// representation that factors out common prefixes. It also reduces LINKEDIT pages
|
||||
/// in RAM because it encodes all information (name, address, flags) in one small,
|
||||
/// contiguous range. The export area is a stream of nodes. The first node sequentially
|
||||
/// is the start node for the trie.
|
||||
///
|
||||
/// Nodes for a symbol start with a uleb128 that is the length of the exported symbol
|
||||
/// information for the string so far. If there is no exported symbol, the node starts
|
||||
/// with a zero byte. If there is exported info, it follows the length.
|
||||
///
|
||||
/// First is a uleb128 containing flags. Normally, it is followed by a uleb128 encoded
|
||||
/// offset which is location of the content named by the symbol from the mach_header
|
||||
/// for the image. If the flags is EXPORT_SYMBOL_FLAGS_REEXPORT, then following the flags
|
||||
/// is a uleb128 encoded library ordinal, then a zero terminated UTF8 string. If the string
|
||||
/// is zero length, then the symbol is re-export from the specified dylib with the same name.
|
||||
/// If the flags is EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER, then following the flags is two
|
||||
/// uleb128s: the stub offset and the resolver offset. The stub is used by non-lazy pointers.
|
||||
/// The resolver is used by lazy pointers and must be called to get the actual address to use.
|
||||
///
|
||||
/// After the optional exported symbol information is a byte of how many edges (0-255) that
|
||||
/// this node has leaving it, followed by each edge. Each edge is a zero terminated UTF8 of
|
||||
/// the addition chars in the symbol, followed by a uleb128 offset for the node that edge points to.
|
||||
const Trie = @This();
|
||||
|
||||
const std = @import("std");
|
||||
const mem = std.mem;
|
||||
const leb = std.debug.leb;
|
||||
const log = std.log.scoped(.link);
|
||||
const Allocator = mem.Allocator;
|
||||
|
||||
pub const Symbol = struct {
|
||||
name: []const u8,
|
||||
offset: u64,
|
||||
export_flags: u64,
|
||||
};
|
||||
|
||||
const Edge = struct {
|
||||
from: *Node,
|
||||
to: *Node,
|
||||
label: []const u8,
|
||||
|
||||
fn deinit(self: *Edge, alloc: *Allocator) void {
|
||||
self.to.deinit(alloc);
|
||||
alloc.destroy(self.to);
|
||||
self.from = undefined;
|
||||
self.to = undefined;
|
||||
}
|
||||
};
|
||||
|
||||
const Node = struct {
|
||||
export_flags: ?u64 = null,
|
||||
offset: ?u64 = null,
|
||||
edges: std.ArrayListUnmanaged(Edge) = .{},
|
||||
|
||||
fn deinit(self: *Node, alloc: *Allocator) void {
|
||||
for (self.edges.items) |*edge| {
|
||||
edge.deinit(alloc);
|
||||
}
|
||||
self.edges.deinit(alloc);
|
||||
}
|
||||
|
||||
fn put(self: *Node, alloc: *Allocator, fromEdge: ?*Edge, prefix: usize, label: []const u8) !*Node {
|
||||
// Traverse all edges.
|
||||
for (self.edges.items) |*edge| {
|
||||
const match = mem.indexOfDiff(u8, edge.label, label) orelse return self; // Got a full match, don't do anything.
|
||||
if (match - prefix > 0) {
|
||||
// If we match, we advance further down the trie.
|
||||
return edge.to.put(alloc, edge, match, label);
|
||||
}
|
||||
}
|
||||
|
||||
if (fromEdge) |from| {
|
||||
if (mem.eql(u8, from.label, label[0..prefix])) {
|
||||
if (prefix == label.len) return self;
|
||||
} else {
|
||||
// Fixup nodes. We need to insert an intermediate node between
|
||||
// from.to and self.
|
||||
// Is: A -> B
|
||||
// Should be: A -> C -> B
|
||||
const mid = try alloc.create(Node);
|
||||
mid.* = .{};
|
||||
const to_label = from.label;
|
||||
from.to = mid;
|
||||
from.label = label[0..prefix];
|
||||
|
||||
try mid.edges.append(alloc, .{
|
||||
.from = mid,
|
||||
.to = self,
|
||||
.label = to_label,
|
||||
});
|
||||
|
||||
if (prefix == label.len) return self; // We're done.
|
||||
|
||||
const new_node = try alloc.create(Node);
|
||||
new_node.* = .{};
|
||||
|
||||
try mid.edges.append(alloc, .{
|
||||
.from = mid,
|
||||
.to = new_node,
|
||||
.label = label,
|
||||
});
|
||||
|
||||
return new_node;
|
||||
}
|
||||
}
|
||||
|
||||
// Add a new edge.
|
||||
const node = try alloc.create(Node);
|
||||
node.* = .{};
|
||||
|
||||
try self.edges.append(alloc, .{
|
||||
.from = self,
|
||||
.to = node,
|
||||
.label = label,
|
||||
});
|
||||
|
||||
return node;
|
||||
}
|
||||
|
||||
fn writeULEB128Mem(self: Node, alloc: *Allocator, buffer: *std.ArrayListUnmanaged(u8)) Trie.WriteError!void {
|
||||
if (self.offset) |offset| {
|
||||
// Terminal node info: encode export flags and vmaddr offset of this symbol.
|
||||
var info_buf_len: usize = 0;
|
||||
var info_buf: [@sizeOf(u64) * 2]u8 = undefined;
|
||||
info_buf_len += try leb.writeULEB128Mem(info_buf[0..], self.export_flags.?);
|
||||
info_buf_len += try leb.writeULEB128Mem(info_buf[info_buf_len..], offset);
|
||||
|
||||
// Encode the size of the terminal node info.
|
||||
var size_buf: [@sizeOf(u64)]u8 = undefined;
|
||||
const size_buf_len = try leb.writeULEB128Mem(size_buf[0..], info_buf_len);
|
||||
|
||||
// Now, write them to the output buffer.
|
||||
try buffer.ensureCapacity(alloc, buffer.items.len + info_buf_len + size_buf_len);
|
||||
buffer.appendSliceAssumeCapacity(size_buf[0..size_buf_len]);
|
||||
buffer.appendSliceAssumeCapacity(info_buf[0..info_buf_len]);
|
||||
} else {
|
||||
// Non-terminal node is delimited by 0 byte.
|
||||
try buffer.append(alloc, 0);
|
||||
}
|
||||
// Write number of edges (max legal number of edges is 256).
|
||||
try buffer.append(alloc, @intCast(u8, self.edges.items.len));
|
||||
|
||||
var node_offset_info: [@sizeOf(u8)]u64 = undefined;
|
||||
for (self.edges.items) |edge, i| {
|
||||
// Write edges labels leaving out space in-between to later populate
|
||||
// with offsets to each node.
|
||||
try buffer.ensureCapacity(alloc, buffer.items.len + edge.label.len + 1 + @sizeOf(u64)); // +1 to account for null-byte
|
||||
buffer.appendSliceAssumeCapacity(edge.label);
|
||||
buffer.appendAssumeCapacity(0);
|
||||
node_offset_info[i] = buffer.items.len;
|
||||
const padding = [_]u8{0} ** @sizeOf(u64);
|
||||
buffer.appendSliceAssumeCapacity(padding[0..]);
|
||||
}
|
||||
|
||||
for (self.edges.items) |edge, i| {
|
||||
const offset = buffer.items.len;
|
||||
try edge.to.writeULEB128Mem(alloc, buffer);
|
||||
// We can now populate the offset to the node pointed by this edge.
|
||||
// TODO this is not the approach taken by `ld64` which does several iterations
|
||||
// to close the gap between the space encoding the offset to the node pointed
|
||||
// by this edge. However, it seems that as long as we are contiguous, the padding
|
||||
// introduced here should not influence the performance of `dyld`. I'm leaving
|
||||
// this TODO here though as a reminder to re-investigate in the future and especially
|
||||
// when we start working on dylibs in case `dyld` refuses to cooperate and/or the
|
||||
// performance is noticably sufferring.
|
||||
// Link to official impl: https://opensource.apple.com/source/ld64/ld64-123.2.1/src/abstraction/MachOTrie.hpp
|
||||
var offset_buf: [@sizeOf(u64)]u8 = undefined;
|
||||
const offset_buf_len = try leb.writeULEB128Mem(offset_buf[0..], offset);
|
||||
mem.copy(u8, buffer.items[node_offset_info[i]..], offset_buf[0..offset_buf_len]);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
root: Node,
|
||||
|
||||
/// Insert a symbol into the trie, updating the prefixes in the process.
|
||||
/// This operation may change the layout of the trie by splicing edges in
|
||||
/// certain circumstances.
|
||||
pub fn put(self: *Trie, alloc: *Allocator, symbol: Symbol) !void {
|
||||
const node = try self.root.put(alloc, null, 0, symbol.name);
|
||||
node.offset = symbol.offset;
|
||||
node.export_flags = symbol.export_flags;
|
||||
}
|
||||
|
||||
pub const WriteError = error{ OutOfMemory, NoSpaceLeft };
|
||||
|
||||
/// Write the trie to a buffer ULEB128 encoded.
|
||||
pub fn writeULEB128Mem(self: Trie, alloc: *Allocator, buffer: *std.ArrayListUnmanaged(u8)) WriteError!void {
|
||||
return self.root.writeULEB128Mem(alloc, buffer);
|
||||
}
|
||||
|
||||
pub fn deinit(self: *Trie, alloc: *Allocator) void {
|
||||
self.root.deinit(alloc);
|
||||
}
|
||||
|
||||
test "Trie basic" {
|
||||
const testing = @import("std").testing;
|
||||
var gpa = testing.allocator;
|
||||
|
||||
var trie: Trie = .{
|
||||
.root = .{},
|
||||
};
|
||||
defer trie.deinit(gpa);
|
||||
|
||||
// root
|
||||
testing.expect(trie.root.edges.items.len == 0);
|
||||
|
||||
// root --- _st ---> node
|
||||
try trie.put(gpa, .{
|
||||
.name = "_st",
|
||||
.offset = 0,
|
||||
.export_flags = 0,
|
||||
});
|
||||
testing.expect(trie.root.edges.items.len == 1);
|
||||
testing.expect(mem.eql(u8, trie.root.edges.items[0].label, "_st"));
|
||||
|
||||
{
|
||||
// root --- _st ---> node --- _start ---> node
|
||||
try trie.put(gpa, .{
|
||||
.name = "_start",
|
||||
.offset = 0,
|
||||
.export_flags = 0,
|
||||
});
|
||||
testing.expect(trie.root.edges.items.len == 1);
|
||||
|
||||
const nextEdge = &trie.root.edges.items[0];
|
||||
testing.expect(mem.eql(u8, nextEdge.label, "_st"));
|
||||
testing.expect(nextEdge.to.edges.items.len == 1);
|
||||
testing.expect(mem.eql(u8, nextEdge.to.edges.items[0].label, "_start"));
|
||||
}
|
||||
{
|
||||
// root --- _ ---> node --- _st ---> node --- _start ---> node
|
||||
// |
|
||||
// | --- _main ---> node
|
||||
try trie.put(gpa, .{
|
||||
.name = "_main",
|
||||
.offset = 0,
|
||||
.export_flags = 0,
|
||||
});
|
||||
testing.expect(trie.root.edges.items.len == 1);
|
||||
|
||||
const nextEdge = &trie.root.edges.items[0];
|
||||
testing.expect(mem.eql(u8, nextEdge.label, "_"));
|
||||
testing.expect(nextEdge.to.edges.items.len == 2);
|
||||
testing.expect(mem.eql(u8, nextEdge.to.edges.items[0].label, "_st"));
|
||||
testing.expect(mem.eql(u8, nextEdge.to.edges.items[1].label, "_main"));
|
||||
|
||||
const nextNextEdge = &nextEdge.to.edges.items[0];
|
||||
testing.expect(mem.eql(u8, nextNextEdge.to.edges.items[0].label, "_start"));
|
||||
}
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user