From f0a73df8e72e156bd95fa6c7f4de9512513d01b3 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Mon, 5 Oct 2020 21:59:07 +0200 Subject: [PATCH 1/9] Add prototype for export trie generation in MachO linker Signed-off-by: Jakub Konka --- src/link/MachO.zig | 136 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 136 insertions(+) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index a1b9484e13..5db71ee1ab 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -64,6 +64,97 @@ const LoadCommand = union(enum) { } }; +/// Represents export trie used in MachO executables and dynamic libraries. +/// The purpose of an export trie is to encode as compactly as possible all +/// export symbols for the loader `dyld`. +/// The export trie encodes offset and other information using ULEB128 +/// encoding, and is part of the __LINKEDIT segment. +const Trie = struct { + const Node = struct { + const Edge = struct { + from: *Node, + to: *Node, + label: []const u8, + + pub fn deinit(self: *Edge, alloc: *Allocator) void { + self.to.deinit(alloc); + alloc.destroy(self.to); + self.from = undefined; + self.to = undefined; + } + }; + + edges: std.ArrayListUnmanaged(Edge) = .{}, + + pub fn deinit(self: *Node, alloc: *Allocator) void { + for (self.edges.items) |*edge| { + edge.deinit(alloc); + } + self.edges.deinit(alloc); + } + + pub fn put(self: *Node, alloc: *Allocator, fromEdge: ?*Edge, prefix: usize, label: []const u8) !void { + // Traverse all edges. + for (self.edges.items) |*edge| { + const match = mem.indexOfDiff(u8, edge.label, label) orelse return; // Got a full match, don't do anything. + if (match - prefix > 0) { + // If we match, we advance further down the trie. + return edge.to.put(alloc, edge, match, label); + } + } + + if (fromEdge) |from| { + if (mem.eql(u8, from.label, label[0..prefix])) { + if (prefix == label.len) return; + } else { + // Fixup nodes. We need to insert an intermediate node between + // from.to and self. + const mid = try alloc.create(Node); + mid.* = .{}; + const to_label = from.label; + from.to = mid; + from.label = label[0..prefix]; + + try mid.edges.append(alloc, .{ + .from = mid, + .to = self, + .label = to_label, + }); + + if (prefix == label.len) return; // We're done. + + const new_node = try alloc.create(Node); + new_node.* = .{}; + return mid.edges.append(alloc, .{ + .from = mid, + .to = new_node, + .label = label, + }); + } + } + + // Add a new edge. + const node = try alloc.create(Node); + node.* = .{}; + return self.edges.append(alloc, .{ + .from = self, + .to = node, + .label = label, + }); + } + }; + + root: Node, + + pub fn put(self: *Trie, alloc: *Allocator, word: []const u8) !void { + return self.root.put(alloc, null, 0, word); + } + + pub fn deinit(self: *Trie, alloc: *Allocator) void { + self.root.deinit(alloc); + } +}; + base: File, /// Table of all load commands @@ -1533,3 +1624,48 @@ fn satMul(a: anytype, b: anytype) @TypeOf(a, b) { const T = @TypeOf(a, b); return std.math.mul(T, a, b) catch std.math.maxInt(T); } + +test "Trie basic" { + const testing = @import("std").testing; + var gpa = testing.allocator; + + var trie: Trie = .{ + .root = .{}, + }; + defer trie.deinit(gpa); + + // root + testing.expect(trie.root.edges.items.len == 0); + + // root --- _st ---> node + try trie.put(gpa, "_st"); + testing.expect(trie.root.edges.items.len == 1); + testing.expect(mem.eql(u8, trie.root.edges.items[0].label, "_st")); + + { + // root --- _st ---> node --- _start ---> node + try trie.put(gpa, "_start"); + testing.expect(trie.root.edges.items.len == 1); + + const nextEdge = &trie.root.edges.items[0]; + testing.expect(mem.eql(u8, nextEdge.label, "_st")); + testing.expect(nextEdge.to.edges.items.len == 1); + testing.expect(mem.eql(u8, nextEdge.to.edges.items[0].label, "_start")); + } + { + // root --- _ ---> node --- _st ---> node --- _start ---> node + // | + // | --- _main ---> node + try trie.put(gpa, "_main"); + testing.expect(trie.root.edges.items.len == 1); + + const nextEdge = &trie.root.edges.items[0]; + testing.expect(mem.eql(u8, nextEdge.label, "_")); + testing.expect(nextEdge.to.edges.items.len == 2); + testing.expect(mem.eql(u8, nextEdge.to.edges.items[0].label, "_st")); + testing.expect(mem.eql(u8, nextEdge.to.edges.items[1].label, "_main")); + + const nextNextEdge = &nextEdge.to.edges.items[0]; + testing.expect(mem.eql(u8, nextNextEdge.to.edges.items[0].label, "_start")); + } +} From e76fb8d8c82ffc9fdeef2de0a6008c756103811b Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Tue, 6 Oct 2020 22:34:39 +0200 Subject: [PATCH 2/9] Add incomplete writing of trie to bytes buffer Signed-off-by: Jakub Konka --- src/link/MachO.zig | 101 ++++++++++++++++++++++++++++++++++----------- 1 file changed, 77 insertions(+), 24 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 5db71ee1ab..b522655b13 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -84,6 +84,8 @@ const Trie = struct { } }; + export_flags: ?u64 = null, + offset: ?u64 = null, edges: std.ArrayListUnmanaged(Edge) = .{}, pub fn deinit(self: *Node, alloc: *Allocator) void { @@ -93,10 +95,10 @@ const Trie = struct { self.edges.deinit(alloc); } - pub fn put(self: *Node, alloc: *Allocator, fromEdge: ?*Edge, prefix: usize, label: []const u8) !void { + pub fn put(self: *Node, alloc: *Allocator, fromEdge: ?*Edge, prefix: usize, label: []const u8) !*Node { // Traverse all edges. for (self.edges.items) |*edge| { - const match = mem.indexOfDiff(u8, edge.label, label) orelse return; // Got a full match, don't do anything. + const match = mem.indexOfDiff(u8, edge.label, label) orelse return self; // Got a full match, don't do anything. if (match - prefix > 0) { // If we match, we advance further down the trie. return edge.to.put(alloc, edge, match, label); @@ -105,7 +107,7 @@ const Trie = struct { if (fromEdge) |from| { if (mem.eql(u8, from.label, label[0..prefix])) { - if (prefix == label.len) return; + if (prefix == label.len) return self; } else { // Fixup nodes. We need to insert an intermediate node between // from.to and self. @@ -121,35 +123,86 @@ const Trie = struct { .label = to_label, }); - if (prefix == label.len) return; // We're done. + if (prefix == label.len) return self; // We're done. const new_node = try alloc.create(Node); new_node.* = .{}; - return mid.edges.append(alloc, .{ + + try mid.edges.append(alloc, .{ .from = mid, .to = new_node, .label = label, }); + + return new_node; } } // Add a new edge. const node = try alloc.create(Node); node.* = .{}; - return self.edges.append(alloc, .{ + + try self.edges.append(alloc, .{ .from = self, .to = node, .label = label, }); + + return node; + } + + pub fn write(self: Node, buf: []u8, offset: u64) error{NoSpaceLeft}!usize { + var pos: usize = 0; + if (self.offset) |off| { + var info_buf_pos: usize = 0; + var info_buf: [@sizeOf(u64) * 2]u8 = undefined; + info_buf_pos += try std.debug.leb.writeULEB128Mem(info_buf[0..], self.export_flags.?); + info_buf_pos += try std.debug.leb.writeULEB128Mem(info_buf[info_buf_pos..], off); + log.debug("info_buf = {x}\n", .{info_buf[0..info_buf_pos]}); + pos += try std.debug.leb.writeULEB128Mem(buf[pos..], info_buf_pos); + mem.copy(u8, buf[pos..], info_buf[0..info_buf_pos]); + pos += info_buf_pos; + log.debug("buf = {x}\n", .{buf}); + } else { + buf[pos] = 0; + pos += 1; + } + buf[pos] = @intCast(u8, self.edges.items.len); + pos += 1; + + for (self.edges.items) |edge| { + mem.copy(u8, buf[pos..], edge.label); + pos += edge.label.len; + buf[pos] = 0; + pos += 1; + const curr_offset = pos + offset + 1; + pos += try std.debug.leb.writeULEB128Mem(buf[pos..], curr_offset); + pos += try edge.to.write(buf[pos..], curr_offset); + log.debug("buf = {x}\n", .{buf}); + } + + return pos; } }; root: Node, - pub fn put(self: *Trie, alloc: *Allocator, word: []const u8) !void { + pub fn put(self: *Trie, alloc: *Allocator, word: []const u8) !*Node { return self.root.put(alloc, null, 0, word); } + pub fn write(self: Trie, alloc: *Allocator, file: *fs.File, offset: u64) !void { + // TODO get the actual node count + const count = 10; + const node_size = @sizeOf(u64) * 2; + + var buf = try alloc.alloc(u8, count * node_size); + defer alloc.free(buf); + + const written = try self.root.write(buf, 0); + return file.pwriteAll(buf[0..written], offset); + } + pub fn deinit(self: *Trie, alloc: *Allocator) void { self.root.deinit(alloc); } @@ -347,10 +400,10 @@ pub fn flushModule(self: *MachO, comp: *Compilation) !void { switch (self.base.options.output_mode) { .Exe => { - if (self.entry_addr) |addr| { - // Write export trie. - try self.writeExportTrie(); + // Write export trie. + try self.writeExportTrie(); + if (self.entry_addr) |addr| { // Update LC_MAIN with entry offset const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment; const main_cmd = &self.load_commands.items[self.main_cmd_index.?].EntryPoint; @@ -1474,25 +1527,25 @@ fn writeAllUndefSymbols(self: *MachO) !void { } fn writeExportTrie(self: *MachO) !void { - assert(self.entry_addr != null); + if (self.global_symbols.items.len == 0) return; // No exports, nothing to do. - // TODO implement mechanism for generating a prefix tree of the exported symbols - // single branch export trie - var buf = [_]u8{0} ** 24; - buf[0] = 0; // root node - buf[1] = 1; // 1 branch from root - mem.copy(u8, buf[2..], "_start"); - buf[8] = 0; - buf[9] = 9 + 1; + var trie: Trie = .{ + .root = .{}, + }; + defer trie.deinit(self.base.allocator); const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment; - const addr = self.entry_addr.? - text_segment.vmaddr; - const written = try std.debug.leb.writeULEB128Mem(buf[12..], addr); - buf[10] = @intCast(u8, written) + 1; - buf[11] = 0; + + for (self.global_symbols.items) |symbol| { + // TODO figure out if we should put all global symbols into the export trie + const name = self.getString(symbol.n_strx); + const node = try trie.put(self.base.allocator, name); + node.offset = symbol.n_value - text_segment.vmaddr; + node.export_flags = 0; // TODO workout creation of export flags + } const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfo; - try self.base.file.?.pwriteAll(buf[0..], dyld_info.export_off); + try trie.write(self.base.allocator, &self.base.file.?, dyld_info.export_off); } fn writeStringTable(self: *MachO) !void { From b13b36a71d63b6dfe4beda940fa6f9488fb3690a Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Wed, 7 Oct 2020 00:36:13 +0200 Subject: [PATCH 3/9] Approach using array list for auto mem mgmt Signed-off-by: Jakub Konka --- src/link/MachO.zig | 79 ++++++++++++++++++++++++++-------------------- 1 file changed, 45 insertions(+), 34 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index b522655b13..b49d022637 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -151,37 +151,49 @@ const Trie = struct { return node; } - pub fn write(self: Node, buf: []u8, offset: u64) error{NoSpaceLeft}!usize { - var pos: usize = 0; + pub fn write(self: Node, alloc: *Allocator, buffer: *std.ArrayListUnmanaged(u8)) Trie.WriteError!void { if (self.offset) |off| { - var info_buf_pos: usize = 0; + // Terminal node info: encode export flags and vmaddr offset of this symbol. + var info_buf_len: usize = 0; var info_buf: [@sizeOf(u64) * 2]u8 = undefined; - info_buf_pos += try std.debug.leb.writeULEB128Mem(info_buf[0..], self.export_flags.?); - info_buf_pos += try std.debug.leb.writeULEB128Mem(info_buf[info_buf_pos..], off); - log.debug("info_buf = {x}\n", .{info_buf[0..info_buf_pos]}); - pos += try std.debug.leb.writeULEB128Mem(buf[pos..], info_buf_pos); - mem.copy(u8, buf[pos..], info_buf[0..info_buf_pos]); - pos += info_buf_pos; - log.debug("buf = {x}\n", .{buf}); + info_buf_len += try std.debug.leb.writeULEB128Mem(info_buf[0..], self.export_flags.?); + info_buf_len += try std.debug.leb.writeULEB128Mem(info_buf[info_buf_len..], off); + + // Encode the size of the terminal node info. + var size_buf: [@sizeOf(u64)]u8 = undefined; + const size_buf_len = try std.debug.leb.writeULEB128Mem(size_buf[0..], info_buf_len); + + // Now, write them to the output buffer. + try buffer.ensureCapacity(alloc, buffer.items.len + info_buf_len + size_buf_len); + buffer.appendSliceAssumeCapacity(size_buf[0..size_buf_len]); + buffer.appendSliceAssumeCapacity(info_buf[0..info_buf_len]); } else { - buf[pos] = 0; - pos += 1; + // Non-terminal node is delimited by 0 byte. + try buffer.append(alloc, 0); } - buf[pos] = @intCast(u8, self.edges.items.len); - pos += 1; + // Write number of edges (max legal number of edges is 256). + try buffer.append(alloc, @intCast(u8, self.edges.items.len)); - for (self.edges.items) |edge| { - mem.copy(u8, buf[pos..], edge.label); - pos += edge.label.len; - buf[pos] = 0; - pos += 1; - const curr_offset = pos + offset + 1; - pos += try std.debug.leb.writeULEB128Mem(buf[pos..], curr_offset); - pos += try edge.to.write(buf[pos..], curr_offset); - log.debug("buf = {x}\n", .{buf}); + var node_offset_info: [@sizeOf(u8)]u64 = undefined; + for (self.edges.items) |edge, i| { + // Write edges labels leaving out space in-between to later populate + // with offsets to each node. + try buffer.ensureCapacity(alloc, buffer.items.len + edge.label.len + 1 + @sizeOf(u64)); // +1 to account for null-byte + buffer.appendSliceAssumeCapacity(edge.label); + buffer.appendAssumeCapacity(0); + node_offset_info[i] = buffer.items.len; + const padding = [_]u8{0} ** @sizeOf(u64); + buffer.appendSliceAssumeCapacity(padding[0..]); } - return pos; + for (self.edges.items) |edge, i| { + const offset = buffer.items.len; + try edge.to.write(alloc, buffer); + // We can now populate the offset to the node pointed by this edge. + var offset_buf: [@sizeOf(u64)]u8 = undefined; + const offset_buf_len = try std.debug.leb.writeULEB128Mem(offset_buf[0..], offset); + mem.copy(u8, buffer.items[node_offset_info[i]..], offset_buf[0..offset_buf_len]); + } } }; @@ -191,16 +203,10 @@ const Trie = struct { return self.root.put(alloc, null, 0, word); } - pub fn write(self: Trie, alloc: *Allocator, file: *fs.File, offset: u64) !void { - // TODO get the actual node count - const count = 10; - const node_size = @sizeOf(u64) * 2; + pub const WriteError = error{ OutOfMemory, NoSpaceLeft }; - var buf = try alloc.alloc(u8, count * node_size); - defer alloc.free(buf); - - const written = try self.root.write(buf, 0); - return file.pwriteAll(buf[0..written], offset); + pub fn write(self: Trie, alloc: *Allocator, buffer: *std.ArrayListUnmanaged(u8)) WriteError!void { + return self.root.write(alloc, buffer); } pub fn deinit(self: *Trie, alloc: *Allocator) void { @@ -1544,8 +1550,13 @@ fn writeExportTrie(self: *MachO) !void { node.export_flags = 0; // TODO workout creation of export flags } + var buffer: std.ArrayListUnmanaged(u8) = .{}; + defer buffer.deinit(self.base.allocator); + + try trie.write(self.base.allocator, &buffer); + const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfo; - try trie.write(self.base.allocator, &self.base.file.?, dyld_info.export_off); + try self.base.file.?.pwriteAll(buffer.items, dyld_info.export_off); } fn writeStringTable(self: *MachO) !void { From bdab4f53c1fa614fcd89468f305184fa36520039 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Wed, 7 Oct 2020 19:36:50 +0200 Subject: [PATCH 4/9] Move trie structure into its own file-module Signed-off-by: Jakub Konka --- src/link/MachO.zig | 209 ++------------------------------ src/link/MachO/Trie.zig | 259 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 268 insertions(+), 200 deletions(-) create mode 100644 src/link/MachO/Trie.zig diff --git a/src/link/MachO.zig b/src/link/MachO.zig index b49d022637..afc54f8f7b 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -20,6 +20,8 @@ const File = link.File; const Cache = @import("../Cache.zig"); const target_util = @import("../target.zig"); +const Trie = @import("MachO/Trie.zig"); + pub const base_tag: File.Tag = File.Tag.macho; const LoadCommand = union(enum) { @@ -64,156 +66,6 @@ const LoadCommand = union(enum) { } }; -/// Represents export trie used in MachO executables and dynamic libraries. -/// The purpose of an export trie is to encode as compactly as possible all -/// export symbols for the loader `dyld`. -/// The export trie encodes offset and other information using ULEB128 -/// encoding, and is part of the __LINKEDIT segment. -const Trie = struct { - const Node = struct { - const Edge = struct { - from: *Node, - to: *Node, - label: []const u8, - - pub fn deinit(self: *Edge, alloc: *Allocator) void { - self.to.deinit(alloc); - alloc.destroy(self.to); - self.from = undefined; - self.to = undefined; - } - }; - - export_flags: ?u64 = null, - offset: ?u64 = null, - edges: std.ArrayListUnmanaged(Edge) = .{}, - - pub fn deinit(self: *Node, alloc: *Allocator) void { - for (self.edges.items) |*edge| { - edge.deinit(alloc); - } - self.edges.deinit(alloc); - } - - pub fn put(self: *Node, alloc: *Allocator, fromEdge: ?*Edge, prefix: usize, label: []const u8) !*Node { - // Traverse all edges. - for (self.edges.items) |*edge| { - const match = mem.indexOfDiff(u8, edge.label, label) orelse return self; // Got a full match, don't do anything. - if (match - prefix > 0) { - // If we match, we advance further down the trie. - return edge.to.put(alloc, edge, match, label); - } - } - - if (fromEdge) |from| { - if (mem.eql(u8, from.label, label[0..prefix])) { - if (prefix == label.len) return self; - } else { - // Fixup nodes. We need to insert an intermediate node between - // from.to and self. - const mid = try alloc.create(Node); - mid.* = .{}; - const to_label = from.label; - from.to = mid; - from.label = label[0..prefix]; - - try mid.edges.append(alloc, .{ - .from = mid, - .to = self, - .label = to_label, - }); - - if (prefix == label.len) return self; // We're done. - - const new_node = try alloc.create(Node); - new_node.* = .{}; - - try mid.edges.append(alloc, .{ - .from = mid, - .to = new_node, - .label = label, - }); - - return new_node; - } - } - - // Add a new edge. - const node = try alloc.create(Node); - node.* = .{}; - - try self.edges.append(alloc, .{ - .from = self, - .to = node, - .label = label, - }); - - return node; - } - - pub fn write(self: Node, alloc: *Allocator, buffer: *std.ArrayListUnmanaged(u8)) Trie.WriteError!void { - if (self.offset) |off| { - // Terminal node info: encode export flags and vmaddr offset of this symbol. - var info_buf_len: usize = 0; - var info_buf: [@sizeOf(u64) * 2]u8 = undefined; - info_buf_len += try std.debug.leb.writeULEB128Mem(info_buf[0..], self.export_flags.?); - info_buf_len += try std.debug.leb.writeULEB128Mem(info_buf[info_buf_len..], off); - - // Encode the size of the terminal node info. - var size_buf: [@sizeOf(u64)]u8 = undefined; - const size_buf_len = try std.debug.leb.writeULEB128Mem(size_buf[0..], info_buf_len); - - // Now, write them to the output buffer. - try buffer.ensureCapacity(alloc, buffer.items.len + info_buf_len + size_buf_len); - buffer.appendSliceAssumeCapacity(size_buf[0..size_buf_len]); - buffer.appendSliceAssumeCapacity(info_buf[0..info_buf_len]); - } else { - // Non-terminal node is delimited by 0 byte. - try buffer.append(alloc, 0); - } - // Write number of edges (max legal number of edges is 256). - try buffer.append(alloc, @intCast(u8, self.edges.items.len)); - - var node_offset_info: [@sizeOf(u8)]u64 = undefined; - for (self.edges.items) |edge, i| { - // Write edges labels leaving out space in-between to later populate - // with offsets to each node. - try buffer.ensureCapacity(alloc, buffer.items.len + edge.label.len + 1 + @sizeOf(u64)); // +1 to account for null-byte - buffer.appendSliceAssumeCapacity(edge.label); - buffer.appendAssumeCapacity(0); - node_offset_info[i] = buffer.items.len; - const padding = [_]u8{0} ** @sizeOf(u64); - buffer.appendSliceAssumeCapacity(padding[0..]); - } - - for (self.edges.items) |edge, i| { - const offset = buffer.items.len; - try edge.to.write(alloc, buffer); - // We can now populate the offset to the node pointed by this edge. - var offset_buf: [@sizeOf(u64)]u8 = undefined; - const offset_buf_len = try std.debug.leb.writeULEB128Mem(offset_buf[0..], offset); - mem.copy(u8, buffer.items[node_offset_info[i]..], offset_buf[0..offset_buf_len]); - } - } - }; - - root: Node, - - pub fn put(self: *Trie, alloc: *Allocator, word: []const u8) !*Node { - return self.root.put(alloc, null, 0, word); - } - - pub const WriteError = error{ OutOfMemory, NoSpaceLeft }; - - pub fn write(self: Trie, alloc: *Allocator, buffer: *std.ArrayListUnmanaged(u8)) WriteError!void { - return self.root.write(alloc, buffer); - } - - pub fn deinit(self: *Trie, alloc: *Allocator) void { - self.root.deinit(alloc); - } -}; - base: File, /// Table of all load commands @@ -1541,19 +1393,21 @@ fn writeExportTrie(self: *MachO) !void { defer trie.deinit(self.base.allocator); const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment; - for (self.global_symbols.items) |symbol| { // TODO figure out if we should put all global symbols into the export trie const name = self.getString(symbol.n_strx); - const node = try trie.put(self.base.allocator, name); - node.offset = symbol.n_value - text_segment.vmaddr; - node.export_flags = 0; // TODO workout creation of export flags + assert(symbol.n_value >= text_segment.vmaddr); + try trie.put(self.base.allocator, .{ + .name = name, + .offset = symbol.n_value - text_segment.vmaddr, + .export_flags = 0, // TODO workout creation of export flags + }); } var buffer: std.ArrayListUnmanaged(u8) = .{}; defer buffer.deinit(self.base.allocator); - try trie.write(self.base.allocator, &buffer); + try trie.writeULEB128Mem(self.base.allocator, &buffer); const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfo; try self.base.file.?.pwriteAll(buffer.items, dyld_info.export_off); @@ -1688,48 +1542,3 @@ fn satMul(a: anytype, b: anytype) @TypeOf(a, b) { const T = @TypeOf(a, b); return std.math.mul(T, a, b) catch std.math.maxInt(T); } - -test "Trie basic" { - const testing = @import("std").testing; - var gpa = testing.allocator; - - var trie: Trie = .{ - .root = .{}, - }; - defer trie.deinit(gpa); - - // root - testing.expect(trie.root.edges.items.len == 0); - - // root --- _st ---> node - try trie.put(gpa, "_st"); - testing.expect(trie.root.edges.items.len == 1); - testing.expect(mem.eql(u8, trie.root.edges.items[0].label, "_st")); - - { - // root --- _st ---> node --- _start ---> node - try trie.put(gpa, "_start"); - testing.expect(trie.root.edges.items.len == 1); - - const nextEdge = &trie.root.edges.items[0]; - testing.expect(mem.eql(u8, nextEdge.label, "_st")); - testing.expect(nextEdge.to.edges.items.len == 1); - testing.expect(mem.eql(u8, nextEdge.to.edges.items[0].label, "_start")); - } - { - // root --- _ ---> node --- _st ---> node --- _start ---> node - // | - // | --- _main ---> node - try trie.put(gpa, "_main"); - testing.expect(trie.root.edges.items.len == 1); - - const nextEdge = &trie.root.edges.items[0]; - testing.expect(mem.eql(u8, nextEdge.label, "_")); - testing.expect(nextEdge.to.edges.items.len == 2); - testing.expect(mem.eql(u8, nextEdge.to.edges.items[0].label, "_st")); - testing.expect(mem.eql(u8, nextEdge.to.edges.items[1].label, "_main")); - - const nextNextEdge = &nextEdge.to.edges.items[0]; - testing.expect(mem.eql(u8, nextNextEdge.to.edges.items[0].label, "_start")); - } -} diff --git a/src/link/MachO/Trie.zig b/src/link/MachO/Trie.zig new file mode 100644 index 0000000000..24b06c8ba2 --- /dev/null +++ b/src/link/MachO/Trie.zig @@ -0,0 +1,259 @@ +/// Represents export trie used in MachO executables and dynamic libraries. +/// The purpose of an export trie is to encode as compactly as possible all +/// export symbols for the loader `dyld`. +/// The export trie encodes offset and other information using ULEB128 +/// encoding, and is part of the __LINKEDIT segment. +/// +/// Description from loader.h: +/// +/// The symbols exported by a dylib are encoded in a trie. This is a compact +/// representation that factors out common prefixes. It also reduces LINKEDIT pages +/// in RAM because it encodes all information (name, address, flags) in one small, +/// contiguous range. The export area is a stream of nodes. The first node sequentially +/// is the start node for the trie. +/// +/// Nodes for a symbol start with a uleb128 that is the length of the exported symbol +/// information for the string so far. If there is no exported symbol, the node starts +/// with a zero byte. If there is exported info, it follows the length. +/// +/// First is a uleb128 containing flags. Normally, it is followed by a uleb128 encoded +/// offset which is location of the content named by the symbol from the mach_header +/// for the image. If the flags is EXPORT_SYMBOL_FLAGS_REEXPORT, then following the flags +/// is a uleb128 encoded library ordinal, then a zero terminated UTF8 string. If the string +/// is zero length, then the symbol is re-export from the specified dylib with the same name. +/// If the flags is EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER, then following the flags is two +/// uleb128s: the stub offset and the resolver offset. The stub is used by non-lazy pointers. +/// The resolver is used by lazy pointers and must be called to get the actual address to use. +/// +/// After the optional exported symbol information is a byte of how many edges (0-255) that +/// this node has leaving it, followed by each edge. Each edge is a zero terminated UTF8 of +/// the addition chars in the symbol, followed by a uleb128 offset for the node that edge points to. +const Trie = @This(); + +const std = @import("std"); +const mem = std.mem; +const leb = std.debug.leb; +const log = std.log.scoped(.link); +const Allocator = mem.Allocator; + +pub const Symbol = struct { + name: []const u8, + offset: u64, + export_flags: u64, +}; + +const Edge = struct { + from: *Node, + to: *Node, + label: []const u8, + + fn deinit(self: *Edge, alloc: *Allocator) void { + self.to.deinit(alloc); + alloc.destroy(self.to); + self.from = undefined; + self.to = undefined; + } +}; + +const Node = struct { + export_flags: ?u64 = null, + offset: ?u64 = null, + edges: std.ArrayListUnmanaged(Edge) = .{}, + + fn deinit(self: *Node, alloc: *Allocator) void { + for (self.edges.items) |*edge| { + edge.deinit(alloc); + } + self.edges.deinit(alloc); + } + + fn put(self: *Node, alloc: *Allocator, fromEdge: ?*Edge, prefix: usize, label: []const u8) !*Node { + // Traverse all edges. + for (self.edges.items) |*edge| { + const match = mem.indexOfDiff(u8, edge.label, label) orelse return self; // Got a full match, don't do anything. + if (match - prefix > 0) { + // If we match, we advance further down the trie. + return edge.to.put(alloc, edge, match, label); + } + } + + if (fromEdge) |from| { + if (mem.eql(u8, from.label, label[0..prefix])) { + if (prefix == label.len) return self; + } else { + // Fixup nodes. We need to insert an intermediate node between + // from.to and self. + // Is: A -> B + // Should be: A -> C -> B + const mid = try alloc.create(Node); + mid.* = .{}; + const to_label = from.label; + from.to = mid; + from.label = label[0..prefix]; + + try mid.edges.append(alloc, .{ + .from = mid, + .to = self, + .label = to_label, + }); + + if (prefix == label.len) return self; // We're done. + + const new_node = try alloc.create(Node); + new_node.* = .{}; + + try mid.edges.append(alloc, .{ + .from = mid, + .to = new_node, + .label = label, + }); + + return new_node; + } + } + + // Add a new edge. + const node = try alloc.create(Node); + node.* = .{}; + + try self.edges.append(alloc, .{ + .from = self, + .to = node, + .label = label, + }); + + return node; + } + + fn writeULEB128Mem(self: Node, alloc: *Allocator, buffer: *std.ArrayListUnmanaged(u8)) Trie.WriteError!void { + if (self.offset) |offset| { + // Terminal node info: encode export flags and vmaddr offset of this symbol. + var info_buf_len: usize = 0; + var info_buf: [@sizeOf(u64) * 2]u8 = undefined; + info_buf_len += try leb.writeULEB128Mem(info_buf[0..], self.export_flags.?); + info_buf_len += try leb.writeULEB128Mem(info_buf[info_buf_len..], offset); + + // Encode the size of the terminal node info. + var size_buf: [@sizeOf(u64)]u8 = undefined; + const size_buf_len = try leb.writeULEB128Mem(size_buf[0..], info_buf_len); + + // Now, write them to the output buffer. + try buffer.ensureCapacity(alloc, buffer.items.len + info_buf_len + size_buf_len); + buffer.appendSliceAssumeCapacity(size_buf[0..size_buf_len]); + buffer.appendSliceAssumeCapacity(info_buf[0..info_buf_len]); + } else { + // Non-terminal node is delimited by 0 byte. + try buffer.append(alloc, 0); + } + // Write number of edges (max legal number of edges is 256). + try buffer.append(alloc, @intCast(u8, self.edges.items.len)); + + var node_offset_info: [@sizeOf(u8)]u64 = undefined; + for (self.edges.items) |edge, i| { + // Write edges labels leaving out space in-between to later populate + // with offsets to each node. + try buffer.ensureCapacity(alloc, buffer.items.len + edge.label.len + 1 + @sizeOf(u64)); // +1 to account for null-byte + buffer.appendSliceAssumeCapacity(edge.label); + buffer.appendAssumeCapacity(0); + node_offset_info[i] = buffer.items.len; + const padding = [_]u8{0} ** @sizeOf(u64); + buffer.appendSliceAssumeCapacity(padding[0..]); + } + + for (self.edges.items) |edge, i| { + const offset = buffer.items.len; + try edge.to.writeULEB128Mem(alloc, buffer); + // We can now populate the offset to the node pointed by this edge. + // TODO this is not the approach taken by `ld64` which does several iterations + // to close the gap between the space encoding the offset to the node pointed + // by this edge. However, it seems that as long as we are contiguous, the padding + // introduced here should not influence the performance of `dyld`. I'm leaving + // this TODO here though as a reminder to re-investigate in the future and especially + // when we start working on dylibs in case `dyld` refuses to cooperate and/or the + // performance is noticably sufferring. + // Link to official impl: https://opensource.apple.com/source/ld64/ld64-123.2.1/src/abstraction/MachOTrie.hpp + var offset_buf: [@sizeOf(u64)]u8 = undefined; + const offset_buf_len = try leb.writeULEB128Mem(offset_buf[0..], offset); + mem.copy(u8, buffer.items[node_offset_info[i]..], offset_buf[0..offset_buf_len]); + } + } +}; + +root: Node, + +/// Insert a symbol into the trie, updating the prefixes in the process. +/// This operation may change the layout of the trie by splicing edges in +/// certain circumstances. +pub fn put(self: *Trie, alloc: *Allocator, symbol: Symbol) !void { + const node = try self.root.put(alloc, null, 0, symbol.name); + node.offset = symbol.offset; + node.export_flags = symbol.export_flags; +} + +pub const WriteError = error{ OutOfMemory, NoSpaceLeft }; + +/// Write the trie to a buffer ULEB128 encoded. +pub fn writeULEB128Mem(self: Trie, alloc: *Allocator, buffer: *std.ArrayListUnmanaged(u8)) WriteError!void { + return self.root.writeULEB128Mem(alloc, buffer); +} + +pub fn deinit(self: *Trie, alloc: *Allocator) void { + self.root.deinit(alloc); +} + +test "Trie basic" { + const testing = @import("std").testing; + var gpa = testing.allocator; + + var trie: Trie = .{ + .root = .{}, + }; + defer trie.deinit(gpa); + + // root + testing.expect(trie.root.edges.items.len == 0); + + // root --- _st ---> node + try trie.put(gpa, .{ + .name = "_st", + .offset = 0, + .export_flags = 0, + }); + testing.expect(trie.root.edges.items.len == 1); + testing.expect(mem.eql(u8, trie.root.edges.items[0].label, "_st")); + + { + // root --- _st ---> node --- _start ---> node + try trie.put(gpa, .{ + .name = "_start", + .offset = 0, + .export_flags = 0, + }); + testing.expect(trie.root.edges.items.len == 1); + + const nextEdge = &trie.root.edges.items[0]; + testing.expect(mem.eql(u8, nextEdge.label, "_st")); + testing.expect(nextEdge.to.edges.items.len == 1); + testing.expect(mem.eql(u8, nextEdge.to.edges.items[0].label, "_start")); + } + { + // root --- _ ---> node --- _st ---> node --- _start ---> node + // | + // | --- _main ---> node + try trie.put(gpa, .{ + .name = "_main", + .offset = 0, + .export_flags = 0, + }); + testing.expect(trie.root.edges.items.len == 1); + + const nextEdge = &trie.root.edges.items[0]; + testing.expect(mem.eql(u8, nextEdge.label, "_")); + testing.expect(nextEdge.to.edges.items.len == 2); + testing.expect(mem.eql(u8, nextEdge.to.edges.items[0].label, "_st")); + testing.expect(mem.eql(u8, nextEdge.to.edges.items[1].label, "_main")); + + const nextNextEdge = &nextEdge.to.edges.items[0]; + testing.expect(mem.eql(u8, nextNextEdge.to.edges.items[0].label, "_start")); + } +} From b5b25d38a8fa4e66e54ff1279c1becee877793f6 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Wed, 7 Oct 2020 20:32:02 +0200 Subject: [PATCH 5/9] Fix improper reuse of global symbols in MachO Signed-off-by: Jakub Konka --- src/Module.zig | 15 ++++++++++++--- src/link.zig | 8 ++++++++ src/link/Elf.zig | 4 ++-- src/link/MachO.zig | 24 ++++++++++++++++++++---- 4 files changed, 42 insertions(+), 9 deletions(-) diff --git a/src/Module.zig b/src/Module.zig index 75b6afffcd..0c2a38be28 100644 --- a/src/Module.zig +++ b/src/Module.zig @@ -93,7 +93,7 @@ pub const Export = struct { /// Byte offset into the file that contains the export directive. src: usize, /// Represents the position of the export, if any, in the output file. - link: link.File.Elf.Export, + link: link.File.Export, /// The Decl that performs the export. Note that this is *not* the Decl being exported. owner_decl: *Decl, /// The Decl being exported. Note this is *not* the Decl performing the export. @@ -1712,7 +1712,10 @@ fn deleteDeclExports(self: *Module, decl: *Decl) void { } } if (self.comp.bin_file.cast(link.File.Elf)) |elf| { - elf.deleteExport(exp.link); + elf.deleteExport(exp.link.elf); + } + if (self.comp.bin_file.cast(link.File.MachO)) |macho| { + macho.deleteExport(exp.link.macho); } if (self.failed_exports.remove(exp)) |entry| { entry.value.destroy(self.gpa); @@ -1875,7 +1878,13 @@ pub fn analyzeExport(self: *Module, scope: *Scope, src: usize, borrowed_symbol_n new_export.* = .{ .options = .{ .name = symbol_name }, .src = src, - .link = .{}, + .link = switch (self.comp.bin_file.tag) { + .coff => .{ .coff = {} }, + .elf => .{ .elf = link.File.Elf.Export{} }, + .macho => .{ .macho = link.File.MachO.Export{} }, + .c => .{ .c = {} }, + .wasm => .{ .wasm = {} }, + }, .owner_decl = owner_decl, .exported_decl = exported_decl, .status = .in_progress, diff --git a/src/link.zig b/src/link.zig index 139977b3e2..99bca45fbe 100644 --- a/src/link.zig +++ b/src/link.zig @@ -133,6 +133,14 @@ pub const File = struct { wasm: ?Wasm.FnData, }; + pub const Export = union { + elf: Elf.Export, + coff: void, + macho: MachO.Export, + c: void, + wasm: void, + }; + /// For DWARF .debug_info. pub const DbgInfoTypeRelocsTable = std.HashMapUnmanaged(Type, DbgInfoTypeReloc, Type.hash, Type.eql, std.hash_map.DefaultMaxLoadPercentage); diff --git a/src/link/Elf.zig b/src/link/Elf.zig index c62bb29f78..a316a9c19e 100644 --- a/src/link/Elf.zig +++ b/src/link/Elf.zig @@ -2588,7 +2588,7 @@ pub fn updateDeclExports( }, }; const stt_bits: u8 = @truncate(u4, decl_sym.st_info); - if (exp.link.sym_index) |i| { + if (exp.link.elf.sym_index) |i| { const sym = &self.global_symbols.items[i]; sym.* = .{ .st_name = try self.updateString(sym.st_name, exp.options.name), @@ -2613,7 +2613,7 @@ pub fn updateDeclExports( .st_size = decl_sym.st_size, }; - exp.link.sym_index = @intCast(u32, i); + exp.link.elf.sym_index = @intCast(u32, i); } } } diff --git a/src/link/MachO.zig b/src/link/MachO.zig index afc54f8f7b..486620804b 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -115,6 +115,9 @@ local_symbols: std.ArrayListUnmanaged(macho.nlist_64) = .{}, global_symbols: std.ArrayListUnmanaged(macho.nlist_64) = .{}, /// Table of all undefined symbols undef_symbols: std.ArrayListUnmanaged(macho.nlist_64) = .{}, + +global_symbol_free_list: std.ArrayListUnmanaged(u32) = .{}, + dyld_stub_binder_index: ?u16 = null, /// Table of symbol names aka the string table. @@ -178,6 +181,10 @@ pub const TextBlock = struct { }; }; +pub const Export = struct { + sym_index: ?u32 = null, +}; + pub const SrcFn = struct { pub const empty = SrcFn{}; }; @@ -713,6 +720,7 @@ pub fn deinit(self: *MachO) void { self.string_table.deinit(self.base.allocator); self.undef_symbols.deinit(self.base.allocator); self.global_symbols.deinit(self.base.allocator); + self.global_symbol_free_list.deinit(self.base.allocator); self.local_symbols.deinit(self.base.allocator); self.sections.deinit(self.base.allocator); self.load_commands.deinit(self.base.allocator); @@ -837,7 +845,7 @@ pub fn updateDeclExports( }, }; const n_type = decl_sym.n_type | macho.N_EXT; - if (exp.link.sym_index) |i| { + if (exp.link.macho.sym_index) |i| { const sym = &self.global_symbols.items[i]; sym.* = .{ .n_strx = try self.updateString(sym.n_strx, exp.options.name), @@ -848,8 +856,10 @@ pub fn updateDeclExports( }; } else { const name_str_index = try self.makeString(exp.options.name); - _ = self.global_symbols.addOneAssumeCapacity(); - const i = self.global_symbols.items.len - 1; + const i = if (self.global_symbol_free_list.popOrNull()) |i| i else blk: { + _ = self.global_symbols.addOneAssumeCapacity(); + break :blk self.global_symbols.items.len - 1; + }; self.global_symbols.items[i] = .{ .n_strx = name_str_index, .n_type = n_type, @@ -858,11 +868,17 @@ pub fn updateDeclExports( .n_value = decl_sym.n_value, }; - exp.link.sym_index = @intCast(u32, i); + exp.link.macho.sym_index = @intCast(u32, i); } } } +pub fn deleteExport(self: *MachO, exp: Export) void { + const sym_index = exp.sym_index orelse return; + self.global_symbol_free_list.append(self.base.allocator, sym_index) catch {}; + self.global_symbols.items[sym_index].n_type = 0; +} + pub fn freeDecl(self: *MachO, decl: *Module.Decl) void {} pub fn getDeclVAddr(self: *MachO, decl: *const Module.Decl) u64 { From ea44d12d1be8eb17a1555f6ab794621da0212171 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Wed, 7 Oct 2020 21:19:45 +0200 Subject: [PATCH 6/9] Add writeULEB128Mem test and couple fixes Signed-off-by: Jakub Konka --- src/link/MachO/Trie.zig | 177 ++++++++++++++++++++++++++++------------ 1 file changed, 127 insertions(+), 50 deletions(-) diff --git a/src/link/MachO/Trie.zig b/src/link/MachO/Trie.zig index 24b06c8ba2..f7d37315cf 100644 --- a/src/link/MachO/Trie.zig +++ b/src/link/MachO/Trie.zig @@ -34,6 +34,7 @@ const std = @import("std"); const mem = std.mem; const leb = std.debug.leb; const log = std.log.scoped(.link); +const testing = std.testing; const Allocator = mem.Allocator; pub const Symbol = struct { @@ -67,48 +68,33 @@ const Node = struct { self.edges.deinit(alloc); } - fn put(self: *Node, alloc: *Allocator, fromEdge: ?*Edge, prefix: usize, label: []const u8) !*Node { - // Traverse all edges. + fn put(self: *Node, alloc: *Allocator, label: []const u8) !*Node { + // Check for match with edges from this node. for (self.edges.items) |*edge| { - const match = mem.indexOfDiff(u8, edge.label, label) orelse return self; // Got a full match, don't do anything. - if (match - prefix > 0) { - // If we match, we advance further down the trie. - return edge.to.put(alloc, edge, match, label); - } - } + const match = mem.indexOfDiff(u8, edge.label, label) orelse return edge.to; + if (match == 0) continue; + if (match == edge.label.len) return edge.to.put(alloc, label[match..]); - if (fromEdge) |from| { - if (mem.eql(u8, from.label, label[0..prefix])) { - if (prefix == label.len) return self; + // Found a match, need to splice up nodes. + // From: A -> B + // To: A -> C -> B + const mid = try alloc.create(Node); + mid.* = .{}; + const to_label = edge.label; + const to_node = edge.to; + edge.to = mid; + edge.label = label[0..match]; + + try mid.edges.append(alloc, .{ + .from = mid, + .to = to_node, + .label = to_label[match..], + }); + + if (match == label.len) { + return to_node; } else { - // Fixup nodes. We need to insert an intermediate node between - // from.to and self. - // Is: A -> B - // Should be: A -> C -> B - const mid = try alloc.create(Node); - mid.* = .{}; - const to_label = from.label; - from.to = mid; - from.label = label[0..prefix]; - - try mid.edges.append(alloc, .{ - .from = mid, - .to = self, - .label = to_label, - }); - - if (prefix == label.len) return self; // We're done. - - const new_node = try alloc.create(Node); - new_node.* = .{}; - - try mid.edges.append(alloc, .{ - .from = mid, - .to = new_node, - .label = label, - }); - - return new_node; + return mid.put(alloc, label[match..]); } } @@ -148,7 +134,7 @@ const Node = struct { // Write number of edges (max legal number of edges is 256). try buffer.append(alloc, @intCast(u8, self.edges.items.len)); - var node_offset_info: [@sizeOf(u8)]u64 = undefined; + var node_offset_info: [std.math.maxInt(u8)]u64 = undefined; for (self.edges.items) |edge, i| { // Write edges labels leaving out space in-between to later populate // with offsets to each node. @@ -185,7 +171,7 @@ root: Node, /// This operation may change the layout of the trie by splicing edges in /// certain circumstances. pub fn put(self: *Trie, alloc: *Allocator, symbol: Symbol) !void { - const node = try self.root.put(alloc, null, 0, symbol.name); + const node = try self.root.put(alloc, symbol.name); node.offset = symbol.offset; node.export_flags = symbol.export_flags; } @@ -202,9 +188,7 @@ pub fn deinit(self: *Trie, alloc: *Allocator) void { } test "Trie basic" { - const testing = @import("std").testing; var gpa = testing.allocator; - var trie: Trie = .{ .root = .{}, }; @@ -223,7 +207,7 @@ test "Trie basic" { testing.expect(mem.eql(u8, trie.root.edges.items[0].label, "_st")); { - // root --- _st ---> node --- _start ---> node + // root --- _st ---> node --- art ---> node try trie.put(gpa, .{ .name = "_start", .offset = 0, @@ -234,12 +218,12 @@ test "Trie basic" { const nextEdge = &trie.root.edges.items[0]; testing.expect(mem.eql(u8, nextEdge.label, "_st")); testing.expect(nextEdge.to.edges.items.len == 1); - testing.expect(mem.eql(u8, nextEdge.to.edges.items[0].label, "_start")); + testing.expect(mem.eql(u8, nextEdge.to.edges.items[0].label, "art")); } { - // root --- _ ---> node --- _st ---> node --- _start ---> node + // root --- _ ---> node --- st ---> node --- art ---> node // | - // | --- _main ---> node + // | --- main ---> node try trie.put(gpa, .{ .name = "_main", .offset = 0, @@ -250,10 +234,103 @@ test "Trie basic" { const nextEdge = &trie.root.edges.items[0]; testing.expect(mem.eql(u8, nextEdge.label, "_")); testing.expect(nextEdge.to.edges.items.len == 2); - testing.expect(mem.eql(u8, nextEdge.to.edges.items[0].label, "_st")); - testing.expect(mem.eql(u8, nextEdge.to.edges.items[1].label, "_main")); + testing.expect(mem.eql(u8, nextEdge.to.edges.items[0].label, "st")); + testing.expect(mem.eql(u8, nextEdge.to.edges.items[1].label, "main")); const nextNextEdge = &nextEdge.to.edges.items[0]; - testing.expect(mem.eql(u8, nextNextEdge.to.edges.items[0].label, "_start")); + testing.expect(mem.eql(u8, nextNextEdge.to.edges.items[0].label, "art")); } } + +test "Trie.writeULEB128Mem" { + var gpa = testing.allocator; + var trie: Trie = .{ + .root = .{}, + }; + defer trie.deinit(gpa); + + try trie.put(gpa, .{ + .name = "__mh_execute_header", + .offset = 0, + .export_flags = 0, + }); + try trie.put(gpa, .{ + .name = "_main", + .offset = 0x1000, + .export_flags = 0, + }); + + var buffer: std.ArrayListUnmanaged(u8) = .{}; + defer buffer.deinit(gpa); + + try trie.writeULEB128Mem(gpa, &buffer); + + const exp_buffer = [_]u8{ + 0x0, + 0x1, + 0x5f, + 0x0, + 0xc, + 0x0, + 0x0, + 0x0, + 0x0, + 0x0, + 0x0, + 0x0, + 0x0, + 0x2, + 0x5f, + 0x6d, + 0x68, + 0x5f, + 0x65, + 0x78, + 0x65, + 0x63, + 0x75, + 0x74, + 0x65, + 0x5f, + 0x68, + 0x65, + 0x61, + 0x64, + 0x65, + 0x72, + 0x0, + 0x36, + 0x0, + 0x0, + 0x0, + 0x0, + 0x0, + 0x0, + 0x0, + 0x6d, + 0x61, + 0x69, + 0x6e, + 0x0, + 0x3a, + 0x0, + 0x0, + 0x0, + 0x0, + 0x0, + 0x0, + 0x0, + 0x2, + 0x0, + 0x0, + 0x0, + 0x3, + 0x0, + 0x80, + 0x20, + 0x0, + }; + + testing.expect(buffer.items.len == exp_buffer.len); + testing.expect(mem.eql(u8, buffer.items, exp_buffer[0..])); +} From 5f86505cf79a0ce75e1a02602ae0e9c845024982 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 8 Oct 2020 17:52:08 +0200 Subject: [PATCH 7/9] Fix ULEB128 encoding of trie Use algorithm described in official Apple `ld64` implementation. Link: https://opensource.apple.com/source/ld64/ld64-123.2.1/src/abstraction/MachOTrie.hpp Signed-off-by: Jakub Konka --- src/link/MachO.zig | 2 +- src/link/MachO/Trie.zig | 144 +++++++++++++++++++++++----------------- 2 files changed, 84 insertions(+), 62 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 486620804b..ce9b7d1706 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -1415,7 +1415,7 @@ fn writeExportTrie(self: *MachO) !void { assert(symbol.n_value >= text_segment.vmaddr); try trie.put(self.base.allocator, .{ .name = name, - .offset = symbol.n_value - text_segment.vmaddr, + .vmaddr_offset = symbol.n_value - text_segment.vmaddr, .export_flags = 0, // TODO workout creation of export flags }); } diff --git a/src/link/MachO/Trie.zig b/src/link/MachO/Trie.zig index f7d37315cf..4c13262d2d 100644 --- a/src/link/MachO/Trie.zig +++ b/src/link/MachO/Trie.zig @@ -39,7 +39,7 @@ const Allocator = mem.Allocator; pub const Symbol = struct { name: []const u8, - offset: u64, + vmaddr_offset: u64, export_flags: u64, }; @@ -58,7 +58,8 @@ const Edge = struct { const Node = struct { export_flags: ?u64 = null, - offset: ?u64 = null, + vmaddr_offset: ?u64 = null, + trie_offset: usize = 0, edges: std.ArrayListUnmanaged(Edge) = .{}, fn deinit(self: *Node, alloc: *Allocator) void { @@ -111,8 +112,8 @@ const Node = struct { return node; } - fn writeULEB128Mem(self: Node, alloc: *Allocator, buffer: *std.ArrayListUnmanaged(u8)) Trie.WriteError!void { - if (self.offset) |offset| { + fn writeULEB128Mem(self: Node, alloc: *Allocator, buffer: *std.ArrayListUnmanaged(u8)) !void { + if (self.vmaddr_offset) |offset| { // Terminal node info: encode export flags and vmaddr offset of this symbol. var info_buf_len: usize = 0; var info_buf: [@sizeOf(u64) * 2]u8 = undefined; @@ -134,34 +135,53 @@ const Node = struct { // Write number of edges (max legal number of edges is 256). try buffer.append(alloc, @intCast(u8, self.edges.items.len)); - var node_offset_info: [std.math.maxInt(u8)]u64 = undefined; - for (self.edges.items) |edge, i| { - // Write edges labels leaving out space in-between to later populate - // with offsets to each node. - try buffer.ensureCapacity(alloc, buffer.items.len + edge.label.len + 1 + @sizeOf(u64)); // +1 to account for null-byte + for (self.edges.items) |edge| { + // Write edges labels. + try buffer.ensureCapacity(alloc, buffer.items.len + edge.label.len + 1); // +1 to account for null-byte buffer.appendSliceAssumeCapacity(edge.label); buffer.appendAssumeCapacity(0); - node_offset_info[i] = buffer.items.len; - const padding = [_]u8{0} ** @sizeOf(u64); - buffer.appendSliceAssumeCapacity(padding[0..]); + + var buf: [@sizeOf(u64)]u8 = undefined; + const buf_len = try leb.writeULEB128Mem(buf[0..], edge.to.trie_offset); + try buffer.appendSlice(alloc, buf[0..buf_len]); + } + } + + const UpdateResult = struct { + node_size: usize, + updated: bool, + }; + + fn updateOffset(self: *Node, offset: usize) UpdateResult { + var node_size: usize = 0; + if (self.vmaddr_offset) |vmaddr| { + node_size += sizeULEB128Mem(self.export_flags.?); + node_size += sizeULEB128Mem(vmaddr); + node_size += sizeULEB128Mem(node_size); + } else { + node_size += 1; // 0x0 for non-terminal nodes + } + node_size += 1; // 1 byte for edge count + + for (self.edges.items) |edge| { + node_size += edge.label.len + 1 + sizeULEB128Mem(edge.to.trie_offset); } - for (self.edges.items) |edge, i| { - const offset = buffer.items.len; - try edge.to.writeULEB128Mem(alloc, buffer); - // We can now populate the offset to the node pointed by this edge. - // TODO this is not the approach taken by `ld64` which does several iterations - // to close the gap between the space encoding the offset to the node pointed - // by this edge. However, it seems that as long as we are contiguous, the padding - // introduced here should not influence the performance of `dyld`. I'm leaving - // this TODO here though as a reminder to re-investigate in the future and especially - // when we start working on dylibs in case `dyld` refuses to cooperate and/or the - // performance is noticably sufferring. - // Link to official impl: https://opensource.apple.com/source/ld64/ld64-123.2.1/src/abstraction/MachOTrie.hpp - var offset_buf: [@sizeOf(u64)]u8 = undefined; - const offset_buf_len = try leb.writeULEB128Mem(offset_buf[0..], offset); - mem.copy(u8, buffer.items[node_offset_info[i]..], offset_buf[0..offset_buf_len]); + const updated = offset != self.trie_offset; + self.trie_offset = offset; + + return .{ .node_size = node_size, .updated = updated }; + } + + fn sizeULEB128Mem(value: u64) usize { + var res: usize = 0; + var v = value; + while (true) { + v = v >> 7; + res += 1; + if (v == 0) break; } + return res; } }; @@ -172,15 +192,38 @@ root: Node, /// certain circumstances. pub fn put(self: *Trie, alloc: *Allocator, symbol: Symbol) !void { const node = try self.root.put(alloc, symbol.name); - node.offset = symbol.offset; + node.vmaddr_offset = symbol.vmaddr_offset; node.export_flags = symbol.export_flags; } -pub const WriteError = error{ OutOfMemory, NoSpaceLeft }; - /// Write the trie to a buffer ULEB128 encoded. -pub fn writeULEB128Mem(self: Trie, alloc: *Allocator, buffer: *std.ArrayListUnmanaged(u8)) WriteError!void { - return self.root.writeULEB128Mem(alloc, buffer); +pub fn writeULEB128Mem(self: *Trie, alloc: *Allocator, buffer: *std.ArrayListUnmanaged(u8)) !void { + var ordered_nodes: std.ArrayListUnmanaged(*Node) = .{}; + defer ordered_nodes.deinit(alloc); + + try walkInOrder(&self.root, alloc, &ordered_nodes); + + var more: bool = true; + while (more) { + var offset: usize = 0; + more = false; + for (ordered_nodes.items) |node| { + const res = node.updateOffset(offset); + offset += res.node_size; + if (res.updated) more = true; + } + } + + for (ordered_nodes.items) |node| { + try node.writeULEB128Mem(alloc, buffer); + } +} + +fn walkInOrder(node: *Node, alloc: *Allocator, list: *std.ArrayListUnmanaged(*Node)) error{OutOfMemory}!void { + try list.append(alloc, node); + for (node.edges.items) |*edge| { + try walkInOrder(edge.to, alloc, list); + } } pub fn deinit(self: *Trie, alloc: *Allocator) void { @@ -200,7 +243,7 @@ test "Trie basic" { // root --- _st ---> node try trie.put(gpa, .{ .name = "_st", - .offset = 0, + .vmaddr_offset = 0, .export_flags = 0, }); testing.expect(trie.root.edges.items.len == 1); @@ -210,7 +253,7 @@ test "Trie basic" { // root --- _st ---> node --- art ---> node try trie.put(gpa, .{ .name = "_start", - .offset = 0, + .vmaddr_offset = 0, .export_flags = 0, }); testing.expect(trie.root.edges.items.len == 1); @@ -226,7 +269,7 @@ test "Trie basic" { // | --- main ---> node try trie.put(gpa, .{ .name = "_main", - .offset = 0, + .vmaddr_offset = 0, .export_flags = 0, }); testing.expect(trie.root.edges.items.len == 1); @@ -251,12 +294,12 @@ test "Trie.writeULEB128Mem" { try trie.put(gpa, .{ .name = "__mh_execute_header", - .offset = 0, + .vmaddr_offset = 0, .export_flags = 0, }); try trie.put(gpa, .{ .name = "_main", - .offset = 0x1000, + .vmaddr_offset = 0x1000, .export_flags = 0, }); @@ -270,14 +313,7 @@ test "Trie.writeULEB128Mem" { 0x1, 0x5f, 0x0, - 0xc, - 0x0, - 0x0, - 0x0, - 0x0, - 0x0, - 0x0, - 0x0, + 0x5, 0x0, 0x2, 0x5f, @@ -299,27 +335,13 @@ test "Trie.writeULEB128Mem" { 0x65, 0x72, 0x0, - 0x36, - 0x0, - 0x0, - 0x0, - 0x0, - 0x0, - 0x0, - 0x0, + 0x21, 0x6d, 0x61, 0x69, 0x6e, 0x0, - 0x3a, - 0x0, - 0x0, - 0x0, - 0x0, - 0x0, - 0x0, - 0x0, + 0x25, 0x2, 0x0, 0x0, From ba41e599bfaff2c614c4edfbe5c7ffe94b437486 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 8 Oct 2020 18:10:32 +0200 Subject: [PATCH 8/9] Clean up writing the trie into ULEB128 byte stream Prealloc as much as possible to improve alloc performance. Signed-off-by: Jakub Konka --- src/link/MachO.zig | 4 +- src/link/MachO/Trie.zig | 144 +++++++++++++++++++++++++++++++--------- 2 files changed, 112 insertions(+), 36 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index ce9b7d1706..697e4f0be3 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -1403,9 +1403,7 @@ fn writeAllUndefSymbols(self: *MachO) !void { fn writeExportTrie(self: *MachO) !void { if (self.global_symbols.items.len == 0) return; // No exports, nothing to do. - var trie: Trie = .{ - .root = .{}, - }; + var trie: Trie = .{}; defer trie.deinit(self.base.allocator); const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment; diff --git a/src/link/MachO/Trie.zig b/src/link/MachO/Trie.zig index 4c13262d2d..d19914f292 100644 --- a/src/link/MachO/Trie.zig +++ b/src/link/MachO/Trie.zig @@ -35,6 +35,7 @@ const mem = std.mem; const leb = std.debug.leb; const log = std.log.scoped(.link); const testing = std.testing; +const assert = std.debug.assert; const Allocator = mem.Allocator; pub const Symbol = struct { @@ -57,9 +58,13 @@ const Edge = struct { }; const Node = struct { + /// Export flags associated with this exported symbol (if any). export_flags: ?u64 = null, + /// VM address offset wrt to the section this symbol is defined against (if any). vmaddr_offset: ?u64 = null, - trie_offset: usize = 0, + /// Offset of this node in the trie output byte stream. + trie_offset: ?usize = null, + /// List of all edges originating from this node. edges: std.ArrayListUnmanaged(Edge) = .{}, fn deinit(self: *Node, alloc: *Allocator) void { @@ -69,12 +74,24 @@ const Node = struct { self.edges.deinit(alloc); } - fn put(self: *Node, alloc: *Allocator, label: []const u8) !*Node { + const PutResult = struct { + /// Node reached at this stage of `put` op. + node: *Node, + /// Count of newly inserted nodes at this stage of `put` op. + node_count: usize, + }; + + /// Inserts a new node starting from `self`. + fn put(self: *Node, alloc: *Allocator, label: []const u8, node_count: usize) !PutResult { + var curr_node_count = node_count; // Check for match with edges from this node. for (self.edges.items) |*edge| { - const match = mem.indexOfDiff(u8, edge.label, label) orelse return edge.to; + const match = mem.indexOfDiff(u8, edge.label, label) orelse return PutResult{ + .node = edge.to, + .node_count = curr_node_count, + }; if (match == 0) continue; - if (match == edge.label.len) return edge.to.put(alloc, label[match..]); + if (match == edge.label.len) return edge.to.put(alloc, label[match..], curr_node_count); // Found a match, need to splice up nodes. // From: A -> B @@ -85,6 +102,7 @@ const Node = struct { const to_node = edge.to; edge.to = mid; edge.label = label[0..match]; + curr_node_count += 1; try mid.edges.append(alloc, .{ .from = mid, @@ -93,15 +111,16 @@ const Node = struct { }); if (match == label.len) { - return to_node; + return PutResult{ .node = to_node, .node_count = curr_node_count }; } else { - return mid.put(alloc, label[match..]); + return mid.put(alloc, label[match..], curr_node_count); } } - // Add a new edge. + // Add a new node. const node = try alloc.create(Node); node.* = .{}; + curr_node_count += 1; try self.edges.append(alloc, .{ .from = self, @@ -109,10 +128,13 @@ const Node = struct { .label = label, }); - return node; + return PutResult{ .node = node, .node_count = curr_node_count }; } - fn writeULEB128Mem(self: Node, alloc: *Allocator, buffer: *std.ArrayListUnmanaged(u8)) !void { + /// This method should only be called *after* updateOffset has been called! + /// In case this is not upheld, this method will panic. + fn writeULEB128Mem(self: Node, buffer: *std.ArrayListUnmanaged(u8)) !void { + assert(self.trie_offset != null); // You need to call updateOffset first. if (self.vmaddr_offset) |offset| { // Terminal node info: encode export flags and vmaddr offset of this symbol. var info_buf_len: usize = 0; @@ -125,33 +147,35 @@ const Node = struct { const size_buf_len = try leb.writeULEB128Mem(size_buf[0..], info_buf_len); // Now, write them to the output buffer. - try buffer.ensureCapacity(alloc, buffer.items.len + info_buf_len + size_buf_len); buffer.appendSliceAssumeCapacity(size_buf[0..size_buf_len]); buffer.appendSliceAssumeCapacity(info_buf[0..info_buf_len]); } else { // Non-terminal node is delimited by 0 byte. - try buffer.append(alloc, 0); + buffer.appendAssumeCapacity(0); } // Write number of edges (max legal number of edges is 256). - try buffer.append(alloc, @intCast(u8, self.edges.items.len)); + buffer.appendAssumeCapacity(@intCast(u8, self.edges.items.len)); for (self.edges.items) |edge| { // Write edges labels. - try buffer.ensureCapacity(alloc, buffer.items.len + edge.label.len + 1); // +1 to account for null-byte buffer.appendSliceAssumeCapacity(edge.label); buffer.appendAssumeCapacity(0); var buf: [@sizeOf(u64)]u8 = undefined; - const buf_len = try leb.writeULEB128Mem(buf[0..], edge.to.trie_offset); - try buffer.appendSlice(alloc, buf[0..buf_len]); + const buf_len = try leb.writeULEB128Mem(buf[0..], edge.to.trie_offset.?); + buffer.appendSliceAssumeCapacity(buf[0..buf_len]); } } const UpdateResult = struct { + /// Current size of this node in bytes. node_size: usize, + /// True if the trie offset of this node in the output byte stream + /// would need updating; false otherwise. updated: bool, }; + /// Updates offset of this node in the output byte stream. fn updateOffset(self: *Node, offset: usize) UpdateResult { var node_size: usize = 0; if (self.vmaddr_offset) |vmaddr| { @@ -164,15 +188,18 @@ const Node = struct { node_size += 1; // 1 byte for edge count for (self.edges.items) |edge| { - node_size += edge.label.len + 1 + sizeULEB128Mem(edge.to.trie_offset); + const next_node_offset = edge.to.trie_offset orelse 0; + node_size += edge.label.len + 1 + sizeULEB128Mem(next_node_offset); } - const updated = offset != self.trie_offset; + const trie_offset = self.trie_offset orelse 0; + const updated = offset != trie_offset; self.trie_offset = offset; return .{ .node_size = node_size, .updated = updated }; } + /// Calculates number of bytes in ULEB128 encoding of value. fn sizeULEB128Mem(value: u64) usize { var res: usize = 0; var v = value; @@ -185,15 +212,22 @@ const Node = struct { } }; -root: Node, +/// Count of nodes in the trie. +/// The count is updated at every `put` call. +/// The trie always consists of at least a root node, hence +/// the count always starts at 1. +node_count: usize = 1, +/// The root node of the trie. +root: Node = .{}, /// Insert a symbol into the trie, updating the prefixes in the process. /// This operation may change the layout of the trie by splicing edges in /// certain circumstances. pub fn put(self: *Trie, alloc: *Allocator, symbol: Symbol) !void { - const node = try self.root.put(alloc, symbol.name); - node.vmaddr_offset = symbol.vmaddr_offset; - node.export_flags = symbol.export_flags; + const res = try self.root.put(alloc, symbol.name, 0); + self.node_count += res.node_count; + res.node.vmaddr_offset = symbol.vmaddr_offset; + res.node.export_flags = symbol.export_flags; } /// Write the trie to a buffer ULEB128 encoded. @@ -201,11 +235,13 @@ pub fn writeULEB128Mem(self: *Trie, alloc: *Allocator, buffer: *std.ArrayListUnm var ordered_nodes: std.ArrayListUnmanaged(*Node) = .{}; defer ordered_nodes.deinit(alloc); - try walkInOrder(&self.root, alloc, &ordered_nodes); + try ordered_nodes.ensureCapacity(alloc, self.node_count); + walkInOrder(&self.root, &ordered_nodes); + var offset: usize = 0; var more: bool = true; while (more) { - var offset: usize = 0; + offset = 0; more = false; for (ordered_nodes.items) |node| { const res = node.updateOffset(offset); @@ -214,15 +250,17 @@ pub fn writeULEB128Mem(self: *Trie, alloc: *Allocator, buffer: *std.ArrayListUnm } } + try buffer.ensureCapacity(alloc, buffer.items.len + offset); for (ordered_nodes.items) |node| { - try node.writeULEB128Mem(alloc, buffer); + try node.writeULEB128Mem(buffer); } } -fn walkInOrder(node: *Node, alloc: *Allocator, list: *std.ArrayListUnmanaged(*Node)) error{OutOfMemory}!void { - try list.append(alloc, node); +/// Walks the trie in DFS order gathering all nodes into a linear stream of nodes. +fn walkInOrder(node: *Node, list: *std.ArrayListUnmanaged(*Node)) void { + list.appendAssumeCapacity(node); for (node.edges.items) |*edge| { - try walkInOrder(edge.to, alloc, list); + walkInOrder(edge.to, list); } } @@ -230,11 +268,53 @@ pub fn deinit(self: *Trie, alloc: *Allocator) void { self.root.deinit(alloc); } +test "Trie node count" { + var gpa = testing.allocator; + var trie: Trie = .{}; + defer trie.deinit(gpa); + + testing.expectEqual(trie.node_count, 1); + + try trie.put(gpa, .{ + .name = "_main", + .vmaddr_offset = 0, + .export_flags = 0, + }); + testing.expectEqual(trie.node_count, 2); + + // Inserting the same node shouldn't update the trie. + try trie.put(gpa, .{ + .name = "_main", + .vmaddr_offset = 0, + .export_flags = 0, + }); + testing.expectEqual(trie.node_count, 2); + + try trie.put(gpa, .{ + .name = "__mh_execute_header", + .vmaddr_offset = 0x1000, + .export_flags = 0, + }); + testing.expectEqual(trie.node_count, 4); + + // Inserting the same node shouldn't update the trie. + try trie.put(gpa, .{ + .name = "__mh_execute_header", + .vmaddr_offset = 0x1000, + .export_flags = 0, + }); + testing.expectEqual(trie.node_count, 4); + try trie.put(gpa, .{ + .name = "_main", + .vmaddr_offset = 0, + .export_flags = 0, + }); + testing.expectEqual(trie.node_count, 4); +} + test "Trie basic" { var gpa = testing.allocator; - var trie: Trie = .{ - .root = .{}, - }; + var trie: Trie = .{}; defer trie.deinit(gpa); // root @@ -287,9 +367,7 @@ test "Trie basic" { test "Trie.writeULEB128Mem" { var gpa = testing.allocator; - var trie: Trie = .{ - .root = .{}, - }; + var trie: Trie = .{}; defer trie.deinit(gpa); try trie.put(gpa, .{ From 8dc40236153e7c7d1b8378a117d8453e3b262933 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Fri, 9 Oct 2020 17:22:39 +0200 Subject: [PATCH 9/9] Apply nitpick: top-level doc comments Signed-off-by: Jakub Konka --- src/link/MachO/Trie.zig | 60 ++++++++++++++++++++--------------------- 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/src/link/MachO/Trie.zig b/src/link/MachO/Trie.zig index d19914f292..e077df101d 100644 --- a/src/link/MachO/Trie.zig +++ b/src/link/MachO/Trie.zig @@ -1,33 +1,33 @@ -/// Represents export trie used in MachO executables and dynamic libraries. -/// The purpose of an export trie is to encode as compactly as possible all -/// export symbols for the loader `dyld`. -/// The export trie encodes offset and other information using ULEB128 -/// encoding, and is part of the __LINKEDIT segment. -/// -/// Description from loader.h: -/// -/// The symbols exported by a dylib are encoded in a trie. This is a compact -/// representation that factors out common prefixes. It also reduces LINKEDIT pages -/// in RAM because it encodes all information (name, address, flags) in one small, -/// contiguous range. The export area is a stream of nodes. The first node sequentially -/// is the start node for the trie. -/// -/// Nodes for a symbol start with a uleb128 that is the length of the exported symbol -/// information for the string so far. If there is no exported symbol, the node starts -/// with a zero byte. If there is exported info, it follows the length. -/// -/// First is a uleb128 containing flags. Normally, it is followed by a uleb128 encoded -/// offset which is location of the content named by the symbol from the mach_header -/// for the image. If the flags is EXPORT_SYMBOL_FLAGS_REEXPORT, then following the flags -/// is a uleb128 encoded library ordinal, then a zero terminated UTF8 string. If the string -/// is zero length, then the symbol is re-export from the specified dylib with the same name. -/// If the flags is EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER, then following the flags is two -/// uleb128s: the stub offset and the resolver offset. The stub is used by non-lazy pointers. -/// The resolver is used by lazy pointers and must be called to get the actual address to use. -/// -/// After the optional exported symbol information is a byte of how many edges (0-255) that -/// this node has leaving it, followed by each edge. Each edge is a zero terminated UTF8 of -/// the addition chars in the symbol, followed by a uleb128 offset for the node that edge points to. +//! Represents export trie used in MachO executables and dynamic libraries. +//! The purpose of an export trie is to encode as compactly as possible all +//! export symbols for the loader `dyld`. +//! The export trie encodes offset and other information using ULEB128 +//! encoding, and is part of the __LINKEDIT segment. +//! +//! Description from loader.h: +//! +//! The symbols exported by a dylib are encoded in a trie. This is a compact +//! representation that factors out common prefixes. It also reduces LINKEDIT pages +//! in RAM because it encodes all information (name, address, flags) in one small, +//! contiguous range. The export area is a stream of nodes. The first node sequentially +//! is the start node for the trie. +//! +//! Nodes for a symbol start with a uleb128 that is the length of the exported symbol +//! information for the string so far. If there is no exported symbol, the node starts +//! with a zero byte. If there is exported info, it follows the length. +//! +//! First is a uleb128 containing flags. Normally, it is followed by a uleb128 encoded +//! offset which is location of the content named by the symbol from the mach_header +//! for the image. If the flags is EXPORT_SYMBOL_FLAGS_REEXPORT, then following the flags +//! is a uleb128 encoded library ordinal, then a zero terminated UTF8 string. If the string +//! is zero length, then the symbol is re-export from the specified dylib with the same name. +//! If the flags is EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER, then following the flags is two +//! uleb128s: the stub offset and the resolver offset. The stub is used by non-lazy pointers. +//! The resolver is used by lazy pointers and must be called to get the actual address to use. +//! +//! After the optional exported symbol information is a byte of how many edges (0-255) that +//! this node has leaving it, followed by each edge. Each edge is a zero terminated UTF8 of +//! the addition chars in the symbol, followed by a uleb128 offset for the node that edge points to. const Trie = @This(); const std = @import("std");