From f0a73df8e72e156bd95fa6c7f4de9512513d01b3 Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Mon, 5 Oct 2020 21:59:07 +0200
Subject: [PATCH 1/9] Add prototype for export trie generation in MachO linker

Signed-off-by: Jakub Konka <kubkon@jakubkonka.com>
---
 src/link/MachO.zig | 136 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 136 insertions(+)

diff --git a/src/link/MachO.zig b/src/link/MachO.zig
index a1b9484e13..5db71ee1ab 100644
--- a/src/link/MachO.zig
+++ b/src/link/MachO.zig
@@ -64,6 +64,97 @@ const LoadCommand = union(enum) {
     }
 };
 
+/// Represents export trie used in MachO executables and dynamic libraries.
+/// The purpose of an export trie is to encode as compactly as possible all
+/// export symbols for the loader `dyld`.
+/// The export trie encodes offset and other information using ULEB128
+/// encoding, and is part of the __LINKEDIT segment.
+const Trie = struct {
+    const Node = struct {
+        const Edge = struct {
+            from: *Node,
+            to: *Node,
+            label: []const u8,
+
+            pub fn deinit(self: *Edge, alloc: *Allocator) void {
+                self.to.deinit(alloc);
+                alloc.destroy(self.to);
+                self.from = undefined;
+                self.to = undefined;
+            }
+        };
+
+        edges: std.ArrayListUnmanaged(Edge) = .{},
+
+        pub fn deinit(self: *Node, alloc: *Allocator) void {
+            for (self.edges.items) |*edge| {
+                edge.deinit(alloc);
+            }
+            self.edges.deinit(alloc);
+        }
+
+        pub fn put(self: *Node, alloc: *Allocator, fromEdge: ?*Edge, prefix: usize, label: []const u8) !void {
+            // Traverse all edges.
+            for (self.edges.items) |*edge| {
+                const match = mem.indexOfDiff(u8, edge.label, label) orelse return; // Got a full match, don't do anything.
+                if (match - prefix > 0) {
+                    // If we match, we advance further down the trie.
+                    return edge.to.put(alloc, edge, match, label);
+                }
+            }
+
+            if (fromEdge) |from| {
+                if (mem.eql(u8, from.label, label[0..prefix])) {
+                    if (prefix == label.len) return;
+                } else {
+                    // Fixup nodes. We need to insert an intermediate node between
+                    // from.to and self.
+                    const mid = try alloc.create(Node);
+                    mid.* = .{};
+                    const to_label = from.label;
+                    from.to = mid;
+                    from.label = label[0..prefix];
+
+                    try mid.edges.append(alloc, .{
+                        .from = mid,
+                        .to = self,
+                        .label = to_label,
+                    });
+
+                    if (prefix == label.len) return; // We're done.
+
+                    const new_node = try alloc.create(Node);
+                    new_node.* = .{};
+                    return mid.edges.append(alloc, .{
+                        .from = mid,
+                        .to = new_node,
+                        .label = label,
+                    });
+                }
+            }
+
+            // Add a new edge.
+            const node = try alloc.create(Node);
+            node.* = .{};
+            return self.edges.append(alloc, .{
+                .from = self,
+                .to = node,
+                .label = label,
+            });
+        }
+    };
+
+    root: Node,
+
+    pub fn put(self: *Trie, alloc: *Allocator, word: []const u8) !void {
+        return self.root.put(alloc, null, 0, word);
+    }
+
+    pub fn deinit(self: *Trie, alloc: *Allocator) void {
+        self.root.deinit(alloc);
+    }
+};
+
 base: File,
 
 /// Table of all load commands
@@ -1533,3 +1624,48 @@ fn satMul(a: anytype, b: anytype) @TypeOf(a, b) {
     const T = @TypeOf(a, b);
     return std.math.mul(T, a, b) catch std.math.maxInt(T);
 }
+
+test "Trie basic" {
+    const testing = @import("std").testing;
+    var gpa = testing.allocator;
+
+    var trie: Trie = .{
+        .root = .{},
+    };
+    defer trie.deinit(gpa);
+
+    // root
+    testing.expect(trie.root.edges.items.len == 0);
+
+    // root --- _st ---> node
+    try trie.put(gpa, "_st");
+    testing.expect(trie.root.edges.items.len == 1);
+    testing.expect(mem.eql(u8, trie.root.edges.items[0].label, "_st"));
+
+    {
+        // root --- _st ---> node --- _start ---> node
+        try trie.put(gpa, "_start");
+        testing.expect(trie.root.edges.items.len == 1);
+
+        const nextEdge = &trie.root.edges.items[0];
+        testing.expect(mem.eql(u8, nextEdge.label, "_st"));
+        testing.expect(nextEdge.to.edges.items.len == 1);
+        testing.expect(mem.eql(u8, nextEdge.to.edges.items[0].label, "_start"));
+    }
+    {
+        // root --- _ ---> node --- _st ---> node --- _start ---> node
+        //                  |
+        //                  |   --- _main ---> node
+        try trie.put(gpa, "_main");
+        testing.expect(trie.root.edges.items.len == 1);
+
+        const nextEdge = &trie.root.edges.items[0];
+        testing.expect(mem.eql(u8, nextEdge.label, "_"));
+        testing.expect(nextEdge.to.edges.items.len == 2);
+        testing.expect(mem.eql(u8, nextEdge.to.edges.items[0].label, "_st"));
+        testing.expect(mem.eql(u8, nextEdge.to.edges.items[1].label, "_main"));
+
+        const nextNextEdge = &nextEdge.to.edges.items[0];
+        testing.expect(mem.eql(u8, nextNextEdge.to.edges.items[0].label, "_start"));
+    }
+}

From e76fb8d8c82ffc9fdeef2de0a6008c756103811b Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Tue, 6 Oct 2020 22:34:39 +0200
Subject: [PATCH 2/9] Add incomplete writing of trie to bytes buffer

Signed-off-by: Jakub Konka <kubkon@jakubkonka.com>
---
 src/link/MachO.zig | 101 ++++++++++++++++++++++++++++++++++-----------
 1 file changed, 77 insertions(+), 24 deletions(-)

diff --git a/src/link/MachO.zig b/src/link/MachO.zig
index 5db71ee1ab..b522655b13 100644
--- a/src/link/MachO.zig
+++ b/src/link/MachO.zig
@@ -84,6 +84,8 @@ const Trie = struct {
             }
         };
 
+        export_flags: ?u64 = null,
+        offset: ?u64 = null,
         edges: std.ArrayListUnmanaged(Edge) = .{},
 
         pub fn deinit(self: *Node, alloc: *Allocator) void {
@@ -93,10 +95,10 @@ const Trie = struct {
             self.edges.deinit(alloc);
         }
 
-        pub fn put(self: *Node, alloc: *Allocator, fromEdge: ?*Edge, prefix: usize, label: []const u8) !void {
+        pub fn put(self: *Node, alloc: *Allocator, fromEdge: ?*Edge, prefix: usize, label: []const u8) !*Node {
             // Traverse all edges.
             for (self.edges.items) |*edge| {
-                const match = mem.indexOfDiff(u8, edge.label, label) orelse return; // Got a full match, don't do anything.
+                const match = mem.indexOfDiff(u8, edge.label, label) orelse return self; // Got a full match, don't do anything.
                 if (match - prefix > 0) {
                     // If we match, we advance further down the trie.
                     return edge.to.put(alloc, edge, match, label);
@@ -105,7 +107,7 @@ const Trie = struct {
 
             if (fromEdge) |from| {
                 if (mem.eql(u8, from.label, label[0..prefix])) {
-                    if (prefix == label.len) return;
+                    if (prefix == label.len) return self;
                 } else {
                     // Fixup nodes. We need to insert an intermediate node between
                     // from.to and self.
@@ -121,35 +123,86 @@ const Trie = struct {
                         .label = to_label,
                     });
 
-                    if (prefix == label.len) return; // We're done.
+                    if (prefix == label.len) return self; // We're done.
 
                     const new_node = try alloc.create(Node);
                     new_node.* = .{};
-                    return mid.edges.append(alloc, .{
+
+                    try mid.edges.append(alloc, .{
                         .from = mid,
                         .to = new_node,
                         .label = label,
                     });
+
+                    return new_node;
                 }
             }
 
             // Add a new edge.
             const node = try alloc.create(Node);
             node.* = .{};
-            return self.edges.append(alloc, .{
+
+            try self.edges.append(alloc, .{
                 .from = self,
                 .to = node,
                 .label = label,
             });
+
+            return node;
+        }
+
+        pub fn write(self: Node, buf: []u8, offset: u64) error{NoSpaceLeft}!usize {
+            var pos: usize = 0;
+            if (self.offset) |off| {
+                var info_buf_pos: usize = 0;
+                var info_buf: [@sizeOf(u64) * 2]u8 = undefined;
+                info_buf_pos += try std.debug.leb.writeULEB128Mem(info_buf[0..], self.export_flags.?);
+                info_buf_pos += try std.debug.leb.writeULEB128Mem(info_buf[info_buf_pos..], off);
+                log.debug("info_buf = {x}\n", .{info_buf[0..info_buf_pos]});
+                pos += try std.debug.leb.writeULEB128Mem(buf[pos..], info_buf_pos);
+                mem.copy(u8, buf[pos..], info_buf[0..info_buf_pos]);
+                pos += info_buf_pos;
+                log.debug("buf = {x}\n", .{buf});
+            } else {
+                buf[pos] = 0;
+                pos += 1;
+            }
+            buf[pos] = @intCast(u8, self.edges.items.len);
+            pos += 1;
+
+            for (self.edges.items) |edge| {
+                mem.copy(u8, buf[pos..], edge.label);
+                pos += edge.label.len;
+                buf[pos] = 0;
+                pos += 1;
+                const curr_offset = pos + offset + 1;
+                pos += try std.debug.leb.writeULEB128Mem(buf[pos..], curr_offset);
+                pos += try edge.to.write(buf[pos..], curr_offset);
+                log.debug("buf = {x}\n", .{buf});
+            }
+
+            return pos;
         }
     };
 
     root: Node,
 
-    pub fn put(self: *Trie, alloc: *Allocator, word: []const u8) !void {
+    pub fn put(self: *Trie, alloc: *Allocator, word: []const u8) !*Node {
         return self.root.put(alloc, null, 0, word);
     }
 
+    pub fn write(self: Trie, alloc: *Allocator, file: *fs.File, offset: u64) !void {
+        // TODO get the actual node count
+        const count = 10;
+        const node_size = @sizeOf(u64) * 2;
+
+        var buf = try alloc.alloc(u8, count * node_size);
+        defer alloc.free(buf);
+
+        const written = try self.root.write(buf, 0);
+        return file.pwriteAll(buf[0..written], offset);
+    }
+
     pub fn deinit(self: *Trie, alloc: *Allocator) void {
         self.root.deinit(alloc);
     }
@@ -347,10 +400,10 @@ pub fn flushModule(self: *MachO, comp: *Compilation) !void {
 
     switch (self.base.options.output_mode) {
         .Exe => {
-            if (self.entry_addr) |addr| {
-                // Write export trie.
-                try self.writeExportTrie();
+            // Write export trie.
+            try self.writeExportTrie();
 
+            if (self.entry_addr) |addr| {
                 // Update LC_MAIN with entry offset
                 const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment;
                 const main_cmd = &self.load_commands.items[self.main_cmd_index.?].EntryPoint;
@@ -1474,25 +1527,25 @@ fn writeAllUndefSymbols(self: *MachO) !void {
 }
 
 fn writeExportTrie(self: *MachO) !void {
-    assert(self.entry_addr != null);
+    if (self.global_symbols.items.len == 0) return; // No exports, nothing to do.
 
-    // TODO implement mechanism for generating a prefix tree of the exported symbols
-    // single branch export trie
-    var buf = [_]u8{0} ** 24;
-    buf[0] = 0; // root node
-    buf[1] = 1; // 1 branch from root
-    mem.copy(u8, buf[2..], "_start");
-    buf[8] = 0;
-    buf[9] = 9 + 1;
+    var trie: Trie = .{
+        .root = .{},
+    };
+    defer trie.deinit(self.base.allocator);
 
     const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment;
-    const addr = self.entry_addr.? - text_segment.vmaddr;
-    const written = try std.debug.leb.writeULEB128Mem(buf[12..], addr);
-    buf[10] = @intCast(u8, written) + 1;
-    buf[11] = 0;
+
+    for (self.global_symbols.items) |symbol| {
+        // TODO figure out if we should put all global symbols into the export trie
+        const name = self.getString(symbol.n_strx);
+        const node = try trie.put(self.base.allocator, name);
+        node.offset = symbol.n_value - text_segment.vmaddr;
+        node.export_flags = 0; // TODO workout creation of export flags
+    }
 
     const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfo;
-    try self.base.file.?.pwriteAll(buf[0..], dyld_info.export_off);
+    try trie.write(self.base.allocator, &self.base.file.?, dyld_info.export_off);
 }
 
 fn writeStringTable(self: *MachO) !void {

From b13b36a71d63b6dfe4beda940fa6f9488fb3690a Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Wed, 7 Oct 2020 00:36:13 +0200
Subject: [PATCH 3/9] Approach using array list for auto mem mgmt

Signed-off-by: Jakub Konka <kubkon@jakubkonka.com>
---
 src/link/MachO.zig | 79 ++++++++++++++++++++++++++--------------------
 1 file changed, 45 insertions(+), 34 deletions(-)

diff --git a/src/link/MachO.zig b/src/link/MachO.zig
index b522655b13..b49d022637 100644
--- a/src/link/MachO.zig
+++ b/src/link/MachO.zig
@@ -151,37 +151,49 @@ const Trie = struct {
             return node;
         }
 
-        pub fn write(self: Node, buf: []u8, offset: u64) error{NoSpaceLeft}!usize {
-            var pos: usize = 0;
+        pub fn write(self: Node, alloc: *Allocator, buffer: *std.ArrayListUnmanaged(u8)) Trie.WriteError!void {
             if (self.offset) |off| {
-                var info_buf_pos: usize = 0;
+                // Terminal node info: encode export flags and vmaddr offset of this symbol.
+                var info_buf_len: usize = 0;
                 var info_buf: [@sizeOf(u64) * 2]u8 = undefined;
-                info_buf_pos += try std.debug.leb.writeULEB128Mem(info_buf[0..], self.export_flags.?);
-                info_buf_pos += try std.debug.leb.writeULEB128Mem(info_buf[info_buf_pos..], off);
-                log.debug("info_buf = {x}\n", .{info_buf[0..info_buf_pos]});
-                pos += try std.debug.leb.writeULEB128Mem(buf[pos..], info_buf_pos);
-                mem.copy(u8, buf[pos..], info_buf[0..info_buf_pos]);
-                pos += info_buf_pos;
-                log.debug("buf = {x}\n", .{buf});
+                info_buf_len += try std.debug.leb.writeULEB128Mem(info_buf[0..], self.export_flags.?);
+                info_buf_len += try std.debug.leb.writeULEB128Mem(info_buf[info_buf_len..], off);
+
+                // Encode the size of the terminal node info.
+                var size_buf: [@sizeOf(u64)]u8 = undefined;
+                const size_buf_len = try std.debug.leb.writeULEB128Mem(size_buf[0..], info_buf_len);
+
+                // Now, write them to the output buffer.
+                try buffer.ensureCapacity(alloc, buffer.items.len + info_buf_len + size_buf_len);
+                buffer.appendSliceAssumeCapacity(size_buf[0..size_buf_len]);
+                buffer.appendSliceAssumeCapacity(info_buf[0..info_buf_len]);
             } else {
-                buf[pos] = 0;
-                pos += 1;
+                // Non-terminal node is delimited by 0 byte.
+                try buffer.append(alloc, 0);
             }
-            buf[pos] = @intCast(u8, self.edges.items.len);
-            pos += 1;
+            // Write number of edges (max legal number of edges is 256).
+            try buffer.append(alloc, @intCast(u8, self.edges.items.len));
 
-            for (self.edges.items) |edge| {
-                mem.copy(u8, buf[pos..], edge.label);
-                pos += edge.label.len;
-                buf[pos] = 0;
-                pos += 1;
-                const curr_offset = pos + offset + 1;
-                pos += try std.debug.leb.writeULEB128Mem(buf[pos..], curr_offset);
-                pos += try edge.to.write(buf[pos..], curr_offset);
-                log.debug("buf = {x}\n", .{buf});
+            var node_offset_info: [@sizeOf(u8)]u64 = undefined;
+            for (self.edges.items) |edge, i| {
+                // Write edges labels leaving out space in-between to later populate
+                // with offsets to each node.
+                try buffer.ensureCapacity(alloc, buffer.items.len + edge.label.len + 1 + @sizeOf(u64)); // +1 to account for null-byte
+                buffer.appendSliceAssumeCapacity(edge.label);
+                buffer.appendAssumeCapacity(0);
+                node_offset_info[i] = buffer.items.len;
+                const padding = [_]u8{0} ** @sizeOf(u64);
+                buffer.appendSliceAssumeCapacity(padding[0..]);
             }
 
-            return pos;
+            for (self.edges.items) |edge, i| {
+                const offset = buffer.items.len;
+                try edge.to.write(alloc, buffer);
+                // We can now populate the offset to the node pointed by this edge.
+                var offset_buf: [@sizeOf(u64)]u8 = undefined;
+                const offset_buf_len = try std.debug.leb.writeULEB128Mem(offset_buf[0..], offset);
+                mem.copy(u8, buffer.items[node_offset_info[i]..], offset_buf[0..offset_buf_len]);
+            }
         }
     };
 
@@ -191,16 +203,10 @@ const Trie = struct {
         return self.root.put(alloc, null, 0, word);
     }
 
-    pub fn write(self: Trie, alloc: *Allocator, file: *fs.File, offset: u64) !void {
-        // TODO get the actual node count
-        const count = 10;
-        const node_size = @sizeOf(u64) * 2;
+    pub const WriteError = error{ OutOfMemory, NoSpaceLeft };
 
-        var buf = try alloc.alloc(u8, count * node_size);
-        defer alloc.free(buf);
-
-        const written = try self.root.write(buf, 0);
-        return file.pwriteAll(buf[0..written], offset);
+    pub fn write(self: Trie, alloc: *Allocator, buffer: *std.ArrayListUnmanaged(u8)) WriteError!void {
+        return self.root.write(alloc, buffer);
     }
 
     pub fn deinit(self: *Trie, alloc: *Allocator) void {
@@ -1544,8 +1550,13 @@ fn writeExportTrie(self: *MachO) !void {
         node.export_flags = 0; // TODO workout creation of export flags
     }
 
+    var buffer: std.ArrayListUnmanaged(u8) = .{};
+    defer buffer.deinit(self.base.allocator);
+
+    try trie.write(self.base.allocator, &buffer);
+
     const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfo;
-    try trie.write(self.base.allocator, &self.base.file.?, dyld_info.export_off);
+    try self.base.file.?.pwriteAll(buffer.items, dyld_info.export_off);
 }
 
 fn writeStringTable(self: *MachO) !void {

From bdab4f53c1fa614fcd89468f305184fa36520039 Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Wed, 7 Oct 2020 19:36:50 +0200
Subject: [PATCH 4/9] Move trie structure into its own file-module

Signed-off-by: Jakub Konka <kubkon@jakubkonka.com>
---
 src/link/MachO.zig      | 209 ++------------------------------
 src/link/MachO/Trie.zig | 259 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 268 insertions(+), 200 deletions(-)
 create mode 100644 src/link/MachO/Trie.zig

diff --git a/src/link/MachO.zig b/src/link/MachO.zig
index b49d022637..afc54f8f7b 100644
--- a/src/link/MachO.zig
+++ b/src/link/MachO.zig
@@ -20,6 +20,8 @@ const File = link.File;
 const Cache = @import("../Cache.zig");
 const target_util = @import("../target.zig");
 
+const Trie = @import("MachO/Trie.zig");
+
 pub const base_tag: File.Tag = File.Tag.macho;
 
 const LoadCommand = union(enum) {
@@ -64,156 +66,6 @@ const LoadCommand = union(enum) {
     }
 };
 
-/// Represents export trie used in MachO executables and dynamic libraries.
-/// The purpose of an export trie is to encode as compactly as possible all
-/// export symbols for the loader `dyld`.
-/// The export trie encodes offset and other information using ULEB128
-/// encoding, and is part of the __LINKEDIT segment.
-const Trie = struct {
-    const Node = struct {
-        const Edge = struct {
-            from: *Node,
-            to: *Node,
-            label: []const u8,
-
-            pub fn deinit(self: *Edge, alloc: *Allocator) void {
-                self.to.deinit(alloc);
-                alloc.destroy(self.to);
-                self.from = undefined;
-                self.to = undefined;
-            }
-        };
-
-        export_flags: ?u64 = null,
-        offset: ?u64 = null,
-        edges: std.ArrayListUnmanaged(Edge) = .{},
-
-        pub fn deinit(self: *Node, alloc: *Allocator) void {
-            for (self.edges.items) |*edge| {
-                edge.deinit(alloc);
-            }
-            self.edges.deinit(alloc);
-        }
-
-        pub fn put(self: *Node, alloc: *Allocator, fromEdge: ?*Edge, prefix: usize, label: []const u8) !*Node {
-            // Traverse all edges.
-            for (self.edges.items) |*edge| {
-                const match = mem.indexOfDiff(u8, edge.label, label) orelse return self; // Got a full match, don't do anything.
-                if (match - prefix > 0) {
-                    // If we match, we advance further down the trie.
-                    return edge.to.put(alloc, edge, match, label);
-                }
-            }
-
-            if (fromEdge) |from| {
-                if (mem.eql(u8, from.label, label[0..prefix])) {
-                    if (prefix == label.len) return self;
-                } else {
-                    // Fixup nodes. We need to insert an intermediate node between
-                    // from.to and self.
-                    const mid = try alloc.create(Node);
-                    mid.* = .{};
-                    const to_label = from.label;
-                    from.to = mid;
-                    from.label = label[0..prefix];
-
-                    try mid.edges.append(alloc, .{
-                        .from = mid,
-                        .to = self,
-                        .label = to_label,
-                    });
-
-                    if (prefix == label.len) return self; // We're done.
-
-                    const new_node = try alloc.create(Node);
-                    new_node.* = .{};
-
-                    try mid.edges.append(alloc, .{
-                        .from = mid,
-                        .to = new_node,
-                        .label = label,
-                    });
-
-                    return new_node;
-                }
-            }
-
-            // Add a new edge.
-            const node = try alloc.create(Node);
-            node.* = .{};
-
-            try self.edges.append(alloc, .{
-                .from = self,
-                .to = node,
-                .label = label,
-            });
-
-            return node;
-        }
-
-        pub fn write(self: Node, alloc: *Allocator, buffer: *std.ArrayListUnmanaged(u8)) Trie.WriteError!void {
-            if (self.offset) |off| {
-                // Terminal node info: encode export flags and vmaddr offset of this symbol.
-                var info_buf_len: usize = 0;
-                var info_buf: [@sizeOf(u64) * 2]u8 = undefined;
-                info_buf_len += try std.debug.leb.writeULEB128Mem(info_buf[0..], self.export_flags.?);
-                info_buf_len += try std.debug.leb.writeULEB128Mem(info_buf[info_buf_len..], off);
-
-                // Encode the size of the terminal node info.
-                var size_buf: [@sizeOf(u64)]u8 = undefined;
-                const size_buf_len = try std.debug.leb.writeULEB128Mem(size_buf[0..], info_buf_len);
-
-                // Now, write them to the output buffer.
-                try buffer.ensureCapacity(alloc, buffer.items.len + info_buf_len + size_buf_len);
-                buffer.appendSliceAssumeCapacity(size_buf[0..size_buf_len]);
-                buffer.appendSliceAssumeCapacity(info_buf[0..info_buf_len]);
-            } else {
-                // Non-terminal node is delimited by 0 byte.
-                try buffer.append(alloc, 0);
-            }
-            // Write number of edges (max legal number of edges is 256).
-            try buffer.append(alloc, @intCast(u8, self.edges.items.len));
-
-            var node_offset_info: [@sizeOf(u8)]u64 = undefined;
-            for (self.edges.items) |edge, i| {
-                // Write edges labels leaving out space in-between to later populate
-                // with offsets to each node.
-                try buffer.ensureCapacity(alloc, buffer.items.len + edge.label.len + 1 + @sizeOf(u64)); // +1 to account for null-byte
-                buffer.appendSliceAssumeCapacity(edge.label);
-                buffer.appendAssumeCapacity(0);
-                node_offset_info[i] = buffer.items.len;
-                const padding = [_]u8{0} ** @sizeOf(u64);
-                buffer.appendSliceAssumeCapacity(padding[0..]);
-            }
-
-            for (self.edges.items) |edge, i| {
-                const offset = buffer.items.len;
-                try edge.to.write(alloc, buffer);
-                // We can now populate the offset to the node pointed by this edge.
-                var offset_buf: [@sizeOf(u64)]u8 = undefined;
-                const offset_buf_len = try std.debug.leb.writeULEB128Mem(offset_buf[0..], offset);
-                mem.copy(u8, buffer.items[node_offset_info[i]..], offset_buf[0..offset_buf_len]);
-            }
-        }
-    };
-
-    root: Node,
-
-    pub fn put(self: *Trie, alloc: *Allocator, word: []const u8) !*Node {
-        return self.root.put(alloc, null, 0, word);
-    }
-
-    pub const WriteError = error{ OutOfMemory, NoSpaceLeft };
-
-    pub fn write(self: Trie, alloc: *Allocator, buffer: *std.ArrayListUnmanaged(u8)) WriteError!void {
-        return self.root.write(alloc, buffer);
-    }
-
-    pub fn deinit(self: *Trie, alloc: *Allocator) void {
-        self.root.deinit(alloc);
-    }
-};
-
 base: File,
 
 /// Table of all load commands
@@ -1541,19 +1393,21 @@ fn writeExportTrie(self: *MachO) !void {
     defer trie.deinit(self.base.allocator);
 
     const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment;
-
     for (self.global_symbols.items) |symbol| {
         // TODO figure out if we should put all global symbols into the export trie
         const name = self.getString(symbol.n_strx);
-        const node = try trie.put(self.base.allocator, name);
-        node.offset = symbol.n_value - text_segment.vmaddr;
-        node.export_flags = 0; // TODO workout creation of export flags
+        assert(symbol.n_value >= text_segment.vmaddr);
+        try trie.put(self.base.allocator, .{
+            .name = name,
+            .offset = symbol.n_value - text_segment.vmaddr,
+            .export_flags = 0, // TODO workout creation of export flags
+        });
     }
 
     var buffer: std.ArrayListUnmanaged(u8) = .{};
     defer buffer.deinit(self.base.allocator);
 
-    try trie.write(self.base.allocator, &buffer);
+    try trie.writeULEB128Mem(self.base.allocator, &buffer);
 
     const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfo;
     try self.base.file.?.pwriteAll(buffer.items, dyld_info.export_off);
@@ -1688,48 +1542,3 @@ fn satMul(a: anytype, b: anytype) @TypeOf(a, b) {
     const T = @TypeOf(a, b);
     return std.math.mul(T, a, b) catch std.math.maxInt(T);
 }
-
-test "Trie basic" {
-    const testing = @import("std").testing;
-    var gpa = testing.allocator;
-
-    var trie: Trie = .{
-        .root = .{},
-    };
-    defer trie.deinit(gpa);
-
-    // root
-    testing.expect(trie.root.edges.items.len == 0);
-
-    // root --- _st ---> node
-    try trie.put(gpa, "_st");
-    testing.expect(trie.root.edges.items.len == 1);
-    testing.expect(mem.eql(u8, trie.root.edges.items[0].label, "_st"));
-
-    {
-        // root --- _st ---> node --- _start ---> node
-        try trie.put(gpa, "_start");
-        testing.expect(trie.root.edges.items.len == 1);
-
-        const nextEdge = &trie.root.edges.items[0];
-        testing.expect(mem.eql(u8, nextEdge.label, "_st"));
-        testing.expect(nextEdge.to.edges.items.len == 1);
-        testing.expect(mem.eql(u8, nextEdge.to.edges.items[0].label, "_start"));
-    }
-    {
-        // root --- _ ---> node --- _st ---> node --- _start ---> node
-        //                  |
-        //                  |   --- _main ---> node
-        try trie.put(gpa, "_main");
-        testing.expect(trie.root.edges.items.len == 1);
-
-        const nextEdge = &trie.root.edges.items[0];
-        testing.expect(mem.eql(u8, nextEdge.label, "_"));
-        testing.expect(nextEdge.to.edges.items.len == 2);
-        testing.expect(mem.eql(u8, nextEdge.to.edges.items[0].label, "_st"));
-        testing.expect(mem.eql(u8, nextEdge.to.edges.items[1].label, "_main"));
-
-        const nextNextEdge = &nextEdge.to.edges.items[0];
-        testing.expect(mem.eql(u8, nextNextEdge.to.edges.items[0].label, "_start"));
-    }
-}
diff --git a/src/link/MachO/Trie.zig b/src/link/MachO/Trie.zig
new file mode 100644
index 0000000000..24b06c8ba2
--- /dev/null
+++ b/src/link/MachO/Trie.zig
@@ -0,0 +1,259 @@
+/// Represents export trie used in MachO executables and dynamic libraries.
+/// The purpose of an export trie is to encode as compactly as possible all
+/// export symbols for the loader `dyld`.
+/// The export trie encodes offset and other information using ULEB128
+/// encoding, and is part of the __LINKEDIT segment.
+///
+/// Description from loader.h:
+///
+/// The symbols exported by a dylib are encoded in a trie. This is a compact
+/// representation that factors out common prefixes. It also reduces LINKEDIT pages
+/// in RAM because it encodes all information (name, address, flags) in one small,
+/// contiguous range. The export area is a stream of nodes. The first node sequentially
+/// is the start node for the trie.
+///
+/// Nodes for a symbol start with a uleb128 that is the length of the exported symbol
+/// information for the string so far. If there is no exported symbol, the node starts
+/// with a zero byte. If there is exported info, it follows the length.
+///
+/// First is a uleb128 containing flags. Normally, it is followed by a uleb128 encoded
+/// offset which is location of the content named by the symbol from the mach_header
+/// for the image. If the flags is EXPORT_SYMBOL_FLAGS_REEXPORT, then following the flags
+/// is a uleb128 encoded library ordinal, then a zero terminated UTF8 string. If the string
+/// is zero length, then the symbol is re-export from the specified dylib with the same name.
+/// If the flags is EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER, then following the flags is two
+/// uleb128s: the stub offset and the resolver offset. The stub is used by non-lazy pointers.
+/// The resolver is used by lazy pointers and must be called to get the actual address to use.
+///
+/// After the optional exported symbol information is a byte of how many edges (0-255) that
+/// this node has leaving it, followed by each edge. Each edge is a zero terminated UTF8 of
+/// the addition chars in the symbol, followed by a uleb128 offset for the node that edge points to.
+const Trie = @This();
+
+const std = @import("std");
+const mem = std.mem;
+const leb = std.debug.leb;
+const log = std.log.scoped(.link);
+const Allocator = mem.Allocator;
+
+pub const Symbol = struct {
+    name: []const u8,
+    offset: u64,
+    export_flags: u64,
+};
+
+const Edge = struct {
+    from: *Node,
+    to: *Node,
+    label: []const u8,
+
+    fn deinit(self: *Edge, alloc: *Allocator) void {
+        self.to.deinit(alloc);
+        alloc.destroy(self.to);
+        self.from = undefined;
+        self.to = undefined;
+    }
+};
+
+const Node = struct {
+    export_flags: ?u64 = null,
+    offset: ?u64 = null,
+    edges: std.ArrayListUnmanaged(Edge) = .{},
+
+    fn deinit(self: *Node, alloc: *Allocator) void {
+        for (self.edges.items) |*edge| {
+            edge.deinit(alloc);
+        }
+        self.edges.deinit(alloc);
+    }
+
+    fn put(self: *Node, alloc: *Allocator, fromEdge: ?*Edge, prefix: usize, label: []const u8) !*Node {
+        // Traverse all edges.
+        for (self.edges.items) |*edge| {
+            const match = mem.indexOfDiff(u8, edge.label, label) orelse return self; // Got a full match, don't do anything.
+            if (match - prefix > 0) {
+                // If we match, we advance further down the trie.
+                return edge.to.put(alloc, edge, match, label);
+            }
+        }
+
+        if (fromEdge) |from| {
+            if (mem.eql(u8, from.label, label[0..prefix])) {
+                if (prefix == label.len) return self;
+            } else {
+                // Fixup nodes. We need to insert an intermediate node between
+                // from.to and self.
+                // Is: A -> B
+                // Should be: A -> C -> B
+                const mid = try alloc.create(Node);
+                mid.* = .{};
+                const to_label = from.label;
+                from.to = mid;
+                from.label = label[0..prefix];
+
+                try mid.edges.append(alloc, .{
+                    .from = mid,
+                    .to = self,
+                    .label = to_label,
+                });
+
+                if (prefix == label.len) return self; // We're done.
+
+                const new_node = try alloc.create(Node);
+                new_node.* = .{};
+
+                try mid.edges.append(alloc, .{
+                    .from = mid,
+                    .to = new_node,
+                    .label = label,
+                });
+
+                return new_node;
+            }
+        }
+
+        // Add a new edge.
+        const node = try alloc.create(Node);
+        node.* = .{};
+
+        try self.edges.append(alloc, .{
+            .from = self,
+            .to = node,
+            .label = label,
+        });
+
+        return node;
+    }
+
+    fn writeULEB128Mem(self: Node, alloc: *Allocator, buffer: *std.ArrayListUnmanaged(u8)) Trie.WriteError!void {
+        if (self.offset) |offset| {
+            // Terminal node info: encode export flags and vmaddr offset of this symbol.
+            var info_buf_len: usize = 0;
+            var info_buf: [@sizeOf(u64) * 2]u8 = undefined;
+            info_buf_len += try leb.writeULEB128Mem(info_buf[0..], self.export_flags.?);
+            info_buf_len += try leb.writeULEB128Mem(info_buf[info_buf_len..], offset);
+
+            // Encode the size of the terminal node info.
+            var size_buf: [@sizeOf(u64)]u8 = undefined;
+            const size_buf_len = try leb.writeULEB128Mem(size_buf[0..], info_buf_len);
+
+            // Now, write them to the output buffer.
+            try buffer.ensureCapacity(alloc, buffer.items.len + info_buf_len + size_buf_len);
+            buffer.appendSliceAssumeCapacity(size_buf[0..size_buf_len]);
+            buffer.appendSliceAssumeCapacity(info_buf[0..info_buf_len]);
+        } else {
+            // Non-terminal node is delimited by 0 byte.
+            try buffer.append(alloc, 0);
+        }
+        // Write number of edges (max legal number of edges is 256).
+        try buffer.append(alloc, @intCast(u8, self.edges.items.len));
+
+        var node_offset_info: [@sizeOf(u8)]u64 = undefined;
+        for (self.edges.items) |edge, i| {
+            // Write edges labels leaving out space in-between to later populate
+            // with offsets to each node.
+            try buffer.ensureCapacity(alloc, buffer.items.len + edge.label.len + 1 + @sizeOf(u64)); // +1 to account for null-byte
+            buffer.appendSliceAssumeCapacity(edge.label);
+            buffer.appendAssumeCapacity(0);
+            node_offset_info[i] = buffer.items.len;
+            const padding = [_]u8{0} ** @sizeOf(u64);
+            buffer.appendSliceAssumeCapacity(padding[0..]);
+        }
+
+        for (self.edges.items) |edge, i| {
+            const offset = buffer.items.len;
+            try edge.to.writeULEB128Mem(alloc, buffer);
+            // We can now populate the offset to the node pointed by this edge.
+            // TODO this is not the approach taken by `ld64` which does several iterations
+            // to close the gap between the space encoding the offset to the node pointed
+            // by this edge. However, it seems that as long as we are contiguous, the padding
+            // introduced here should not influence the performance of `dyld`. I'm leaving
+            // this TODO here though as a reminder to re-investigate in the future and especially
+            // when we start working on dylibs in case `dyld` refuses to cooperate and/or the
+            // performance is noticably sufferring.
+            // Link to official impl: https://opensource.apple.com/source/ld64/ld64-123.2.1/src/abstraction/MachOTrie.hpp
+            var offset_buf: [@sizeOf(u64)]u8 = undefined;
+            const offset_buf_len = try leb.writeULEB128Mem(offset_buf[0..], offset);
+            mem.copy(u8, buffer.items[node_offset_info[i]..], offset_buf[0..offset_buf_len]);
+        }
+    }
+};
+
+root: Node,
+
+/// Insert a symbol into the trie, updating the prefixes in the process.
+/// This operation may change the layout of the trie by splicing edges in
+/// certain circumstances.
+pub fn put(self: *Trie, alloc: *Allocator, symbol: Symbol) !void {
+    const node = try self.root.put(alloc, null, 0, symbol.name);
+    node.offset = symbol.offset;
+    node.export_flags = symbol.export_flags;
+}
+
+pub const WriteError = error{ OutOfMemory, NoSpaceLeft };
+
+/// Write the trie to a buffer ULEB128 encoded.
+pub fn writeULEB128Mem(self: Trie, alloc: *Allocator, buffer: *std.ArrayListUnmanaged(u8)) WriteError!void {
+    return self.root.writeULEB128Mem(alloc, buffer);
+}
+
+pub fn deinit(self: *Trie, alloc: *Allocator) void {
+    self.root.deinit(alloc);
+}
+
+test "Trie basic" {
+    const testing = @import("std").testing;
+    var gpa = testing.allocator;
+
+    var trie: Trie = .{
+        .root = .{},
+    };
+    defer trie.deinit(gpa);
+
+    // root
+    testing.expect(trie.root.edges.items.len == 0);
+
+    // root --- _st ---> node
+    try trie.put(gpa, .{
+        .name = "_st",
+        .offset = 0,
+        .export_flags = 0,
+    });
+    testing.expect(trie.root.edges.items.len == 1);
+    testing.expect(mem.eql(u8, trie.root.edges.items[0].label, "_st"));
+
+    {
+        // root --- _st ---> node --- _start ---> node
+        try trie.put(gpa, .{
+            .name = "_start",
+            .offset = 0,
+            .export_flags = 0,
+        });
+        testing.expect(trie.root.edges.items.len == 1);
+
+        const nextEdge = &trie.root.edges.items[0];
+        testing.expect(mem.eql(u8, nextEdge.label, "_st"));
+        testing.expect(nextEdge.to.edges.items.len == 1);
+        testing.expect(mem.eql(u8, nextEdge.to.edges.items[0].label, "_start"));
+    }
+    {
+        // root --- _ ---> node --- _st ---> node --- _start ---> node
+        //                  |
+        //                  |   --- _main ---> node
+        try trie.put(gpa, .{
+            .name = "_main",
+            .offset = 0,
+            .export_flags = 0,
+        });
+        testing.expect(trie.root.edges.items.len == 1);
+
+        const nextEdge = &trie.root.edges.items[0];
+        testing.expect(mem.eql(u8, nextEdge.label, "_"));
+        testing.expect(nextEdge.to.edges.items.len == 2);
+        testing.expect(mem.eql(u8, nextEdge.to.edges.items[0].label, "_st"));
+        testing.expect(mem.eql(u8, nextEdge.to.edges.items[1].label, "_main"));
+
+        const nextNextEdge = &nextEdge.to.edges.items[0];
+        testing.expect(mem.eql(u8, nextNextEdge.to.edges.items[0].label, "_start"));
+    }
+}

From b5b25d38a8fa4e66e54ff1279c1becee877793f6 Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Wed, 7 Oct 2020 20:32:02 +0200
Subject: [PATCH 5/9] Fix improper reuse of global symbols in MachO

Signed-off-by: Jakub Konka <kubkon@jakubkonka.com>
---
 src/Module.zig     | 15 ++++++++++++---
 src/link.zig       |  8 ++++++++
 src/link/Elf.zig   |  4 ++--
 src/link/MachO.zig | 24 ++++++++++++++++++++----
 4 files changed, 42 insertions(+), 9 deletions(-)

diff --git a/src/Module.zig b/src/Module.zig
index 75b6afffcd..0c2a38be28 100644
--- a/src/Module.zig
+++ b/src/Module.zig
@@ -93,7 +93,7 @@ pub const Export = struct {
     /// Byte offset into the file that contains the export directive.
     src: usize,
     /// Represents the position of the export, if any, in the output file.
-    link: link.File.Elf.Export,
+    link: link.File.Export,
     /// The Decl that performs the export. Note that this is *not* the Decl being exported.
     owner_decl: *Decl,
     /// The Decl being exported. Note this is *not* the Decl performing the export.
@@ -1712,7 +1712,10 @@ fn deleteDeclExports(self: *Module, decl: *Decl) void {
             }
         }
         if (self.comp.bin_file.cast(link.File.Elf)) |elf| {
-            elf.deleteExport(exp.link);
+            elf.deleteExport(exp.link.elf);
+        }
+        if (self.comp.bin_file.cast(link.File.MachO)) |macho| {
+            macho.deleteExport(exp.link.macho);
         }
         if (self.failed_exports.remove(exp)) |entry| {
             entry.value.destroy(self.gpa);
@@ -1875,7 +1878,13 @@ pub fn analyzeExport(self: *Module, scope: *Scope, src: usize, borrowed_symbol_n
     new_export.* = .{
         .options = .{ .name = symbol_name },
         .src = src,
-        .link = .{},
+        .link = switch (self.comp.bin_file.tag) {
+            .coff => .{ .coff = {} },
+            .elf => .{ .elf = link.File.Elf.Export{} },
+            .macho => .{ .macho = link.File.MachO.Export{} },
+            .c => .{ .c = {} },
+            .wasm => .{ .wasm = {} },
+        },
         .owner_decl = owner_decl,
         .exported_decl = exported_decl,
         .status = .in_progress,
diff --git a/src/link.zig b/src/link.zig
index 139977b3e2..99bca45fbe 100644
--- a/src/link.zig
+++ b/src/link.zig
@@ -133,6 +133,14 @@ pub const File = struct {
         wasm: ?Wasm.FnData,
     };
 
+    pub const Export = union {
+        elf: Elf.Export,
+        coff: void,
+        macho: MachO.Export,
+        c: void,
+        wasm: void,
+    };
+
     /// For DWARF .debug_info.
     pub const DbgInfoTypeRelocsTable = std.HashMapUnmanaged(Type, DbgInfoTypeReloc, Type.hash, Type.eql, std.hash_map.DefaultMaxLoadPercentage);
 
diff --git a/src/link/Elf.zig b/src/link/Elf.zig
index c62bb29f78..a316a9c19e 100644
--- a/src/link/Elf.zig
+++ b/src/link/Elf.zig
@@ -2588,7 +2588,7 @@ pub fn updateDeclExports(
             },
         };
         const stt_bits: u8 = @truncate(u4, decl_sym.st_info);
-        if (exp.link.sym_index) |i| {
+        if (exp.link.elf.sym_index) |i| {
             const sym = &self.global_symbols.items[i];
             sym.* = .{
                 .st_name = try self.updateString(sym.st_name, exp.options.name),
@@ -2613,7 +2613,7 @@ pub fn updateDeclExports(
                 .st_size = decl_sym.st_size,
             };
 
-            exp.link.sym_index = @intCast(u32, i);
+            exp.link.elf.sym_index = @intCast(u32, i);
         }
     }
 }
diff --git a/src/link/MachO.zig b/src/link/MachO.zig
index afc54f8f7b..486620804b 100644
--- a/src/link/MachO.zig
+++ b/src/link/MachO.zig
@@ -115,6 +115,9 @@ local_symbols: std.ArrayListUnmanaged(macho.nlist_64) = .{},
 global_symbols: std.ArrayListUnmanaged(macho.nlist_64) = .{},
 /// Table of all undefined symbols
 undef_symbols: std.ArrayListUnmanaged(macho.nlist_64) = .{},
+
+global_symbol_free_list: std.ArrayListUnmanaged(u32) = .{},
+
 dyld_stub_binder_index: ?u16 = null,
 
 /// Table of symbol names aka the string table.
@@ -178,6 +181,10 @@ pub const TextBlock = struct {
     };
 };
 
+pub const Export = struct {
+    sym_index: ?u32 = null,
+};
+
 pub const SrcFn = struct {
     pub const empty = SrcFn{};
 };
@@ -713,6 +720,7 @@ pub fn deinit(self: *MachO) void {
     self.string_table.deinit(self.base.allocator);
     self.undef_symbols.deinit(self.base.allocator);
     self.global_symbols.deinit(self.base.allocator);
+    self.global_symbol_free_list.deinit(self.base.allocator);
     self.local_symbols.deinit(self.base.allocator);
     self.sections.deinit(self.base.allocator);
     self.load_commands.deinit(self.base.allocator);
@@ -837,7 +845,7 @@ pub fn updateDeclExports(
             },
         };
         const n_type = decl_sym.n_type | macho.N_EXT;
-        if (exp.link.sym_index) |i| {
+        if (exp.link.macho.sym_index) |i| {
             const sym = &self.global_symbols.items[i];
             sym.* = .{
                 .n_strx = try self.updateString(sym.n_strx, exp.options.name),
@@ -848,8 +856,10 @@ pub fn updateDeclExports(
             };
         } else {
             const name_str_index = try self.makeString(exp.options.name);
-            _ = self.global_symbols.addOneAssumeCapacity();
-            const i = self.global_symbols.items.len - 1;
+            const i = if (self.global_symbol_free_list.popOrNull()) |i| i else blk: {
+                _ = self.global_symbols.addOneAssumeCapacity();
+                break :blk self.global_symbols.items.len - 1;
+            };
             self.global_symbols.items[i] = .{
                 .n_strx = name_str_index,
                 .n_type = n_type,
@@ -858,11 +868,17 @@ pub fn updateDeclExports(
                 .n_value = decl_sym.n_value,
             };
 
-            exp.link.sym_index = @intCast(u32, i);
+            exp.link.macho.sym_index = @intCast(u32, i);
         }
     }
 }
 
+pub fn deleteExport(self: *MachO, exp: Export) void {
+    const sym_index = exp.sym_index orelse return;
+    self.global_symbol_free_list.append(self.base.allocator, sym_index) catch {};
+    self.global_symbols.items[sym_index].n_type = 0;
+}
+
 pub fn freeDecl(self: *MachO, decl: *Module.Decl) void {}
 
 pub fn getDeclVAddr(self: *MachO, decl: *const Module.Decl) u64 {

From ea44d12d1be8eb17a1555f6ab794621da0212171 Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Wed, 7 Oct 2020 21:19:45 +0200
Subject: [PATCH 6/9] Add writeULEB128Mem test and couple fixes

Signed-off-by: Jakub Konka <kubkon@jakubkonka.com>
---
 src/link/MachO/Trie.zig | 177 ++++++++++++++++++++++++++++------------
 1 file changed, 127 insertions(+), 50 deletions(-)

diff --git a/src/link/MachO/Trie.zig b/src/link/MachO/Trie.zig
index 24b06c8ba2..f7d37315cf 100644
--- a/src/link/MachO/Trie.zig
+++ b/src/link/MachO/Trie.zig
@@ -34,6 +34,7 @@ const std = @import("std");
 const mem = std.mem;
 const leb = std.debug.leb;
 const log = std.log.scoped(.link);
+const testing = std.testing;
 const Allocator = mem.Allocator;
 
 pub const Symbol = struct {
@@ -67,48 +68,33 @@ const Node = struct {
         self.edges.deinit(alloc);
     }
 
-    fn put(self: *Node, alloc: *Allocator, fromEdge: ?*Edge, prefix: usize, label: []const u8) !*Node {
-        // Traverse all edges.
+    fn put(self: *Node, alloc: *Allocator, label: []const u8) !*Node {
+        // Check for match with edges from this node.
         for (self.edges.items) |*edge| {
-            const match = mem.indexOfDiff(u8, edge.label, label) orelse return self; // Got a full match, don't do anything.
-            if (match - prefix > 0) {
-                // If we match, we advance further down the trie.
-                return edge.to.put(alloc, edge, match, label);
-            }
-        }
+            const match = mem.indexOfDiff(u8, edge.label, label) orelse return edge.to;
+            if (match == 0) continue;
+            if (match == edge.label.len) return edge.to.put(alloc, label[match..]);
 
-        if (fromEdge) |from| {
-            if (mem.eql(u8, from.label, label[0..prefix])) {
-                if (prefix == label.len) return self;
+            // Found a match, need to splice up nodes.
+            // From: A -> B
+            // To: A -> C -> B
+            const mid = try alloc.create(Node);
+            mid.* = .{};
+            const to_label = edge.label;
+            const to_node = edge.to;
+            edge.to = mid;
+            edge.label = label[0..match];
+
+            try mid.edges.append(alloc, .{
+                .from = mid,
+                .to = to_node,
+                .label = to_label[match..],
+            });
+
+            if (match == label.len) {
+                return to_node;
             } else {
-                // Fixup nodes. We need to insert an intermediate node between
-                // from.to and self.
-                // Is: A -> B
-                // Should be: A -> C -> B
-                const mid = try alloc.create(Node);
-                mid.* = .{};
-                const to_label = from.label;
-                from.to = mid;
-                from.label = label[0..prefix];
-
-                try mid.edges.append(alloc, .{
-                    .from = mid,
-                    .to = self,
-                    .label = to_label,
-                });
-
-                if (prefix == label.len) return self; // We're done.
-
-                const new_node = try alloc.create(Node);
-                new_node.* = .{};
-
-                try mid.edges.append(alloc, .{
-                    .from = mid,
-                    .to = new_node,
-                    .label = label,
-                });
-
-                return new_node;
+                return mid.put(alloc, label[match..]);
             }
         }
 
@@ -148,7 +134,7 @@ const Node = struct {
         // Write number of edges (max legal number of edges is 256).
         try buffer.append(alloc, @intCast(u8, self.edges.items.len));
 
-        var node_offset_info: [@sizeOf(u8)]u64 = undefined;
+        var node_offset_info: [std.math.maxInt(u8)]u64 = undefined;
         for (self.edges.items) |edge, i| {
             // Write edges labels leaving out space in-between to later populate
             // with offsets to each node.
@@ -185,7 +171,7 @@ root: Node,
 /// This operation may change the layout of the trie by splicing edges in
 /// certain circumstances.
 pub fn put(self: *Trie, alloc: *Allocator, symbol: Symbol) !void {
-    const node = try self.root.put(alloc, null, 0, symbol.name);
+    const node = try self.root.put(alloc, symbol.name);
     node.offset = symbol.offset;
     node.export_flags = symbol.export_flags;
 }
@@ -202,9 +188,7 @@ pub fn deinit(self: *Trie, alloc: *Allocator) void {
 }
 
 test "Trie basic" {
-    const testing = @import("std").testing;
     var gpa = testing.allocator;
-
     var trie: Trie = .{
         .root = .{},
     };
@@ -223,7 +207,7 @@ test "Trie basic" {
     testing.expect(mem.eql(u8, trie.root.edges.items[0].label, "_st"));
 
     {
-        // root --- _st ---> node --- _start ---> node
+        // root --- _st ---> node --- art ---> node
         try trie.put(gpa, .{
             .name = "_start",
             .offset = 0,
@@ -234,12 +218,12 @@ test "Trie basic" {
         const nextEdge = &trie.root.edges.items[0];
         testing.expect(mem.eql(u8, nextEdge.label, "_st"));
         testing.expect(nextEdge.to.edges.items.len == 1);
-        testing.expect(mem.eql(u8, nextEdge.to.edges.items[0].label, "_start"));
+        testing.expect(mem.eql(u8, nextEdge.to.edges.items[0].label, "art"));
     }
     {
-        // root --- _ ---> node --- _st ---> node --- _start ---> node
+        // root --- _ ---> node --- st ---> node --- art ---> node
         //                  |
-        //                  |   --- _main ---> node
+        //                  |   --- main ---> node
         try trie.put(gpa, .{
             .name = "_main",
             .offset = 0,
@@ -250,10 +234,103 @@ test "Trie basic" {
         const nextEdge = &trie.root.edges.items[0];
         testing.expect(mem.eql(u8, nextEdge.label, "_"));
         testing.expect(nextEdge.to.edges.items.len == 2);
-        testing.expect(mem.eql(u8, nextEdge.to.edges.items[0].label, "_st"));
-        testing.expect(mem.eql(u8, nextEdge.to.edges.items[1].label, "_main"));
+        testing.expect(mem.eql(u8, nextEdge.to.edges.items[0].label, "st"));
+        testing.expect(mem.eql(u8, nextEdge.to.edges.items[1].label, "main"));
 
         const nextNextEdge = &nextEdge.to.edges.items[0];
-        testing.expect(mem.eql(u8, nextNextEdge.to.edges.items[0].label, "_start"));
+        testing.expect(mem.eql(u8, nextNextEdge.to.edges.items[0].label, "art"));
     }
 }
+
+test "Trie.writeULEB128Mem" {
+    var gpa = testing.allocator;
+    var trie: Trie = .{
+        .root = .{},
+    };
+    defer trie.deinit(gpa);
+
+    try trie.put(gpa, .{
+        .name = "__mh_execute_header",
+        .offset = 0,
+        .export_flags = 0,
+    });
+    try trie.put(gpa, .{
+        .name = "_main",
+        .offset = 0x1000,
+        .export_flags = 0,
+    });
+
+    var buffer: std.ArrayListUnmanaged(u8) = .{};
+    defer buffer.deinit(gpa);
+
+    try trie.writeULEB128Mem(gpa, &buffer);
+
+    const exp_buffer = [_]u8{
+        0x0,
+        0x1,
+        0x5f,
+        0x0,
+        0xc,
+        0x0,
+        0x0,
+        0x0,
+        0x0,
+        0x0,
+        0x0,
+        0x0,
+        0x0,
+        0x2,
+        0x5f,
+        0x6d,
+        0x68,
+        0x5f,
+        0x65,
+        0x78,
+        0x65,
+        0x63,
+        0x75,
+        0x74,
+        0x65,
+        0x5f,
+        0x68,
+        0x65,
+        0x61,
+        0x64,
+        0x65,
+        0x72,
+        0x0,
+        0x36,
+        0x0,
+        0x0,
+        0x0,
+        0x0,
+        0x0,
+        0x0,
+        0x0,
+        0x6d,
+        0x61,
+        0x69,
+        0x6e,
+        0x0,
+        0x3a,
+        0x0,
+        0x0,
+        0x0,
+        0x0,
+        0x0,
+        0x0,
+        0x0,
+        0x2,
+        0x0,
+        0x0,
+        0x0,
+        0x3,
+        0x0,
+        0x80,
+        0x20,
+        0x0,
+    };
+
+    testing.expect(buffer.items.len == exp_buffer.len);
+    testing.expect(mem.eql(u8, buffer.items, exp_buffer[0..]));
+}

From 5f86505cf79a0ce75e1a02602ae0e9c845024982 Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Thu, 8 Oct 2020 17:52:08 +0200
Subject: [PATCH 7/9] Fix ULEB128 encoding of trie

Use algorithm described in official Apple `ld64` implementation.
Link: https://opensource.apple.com/source/ld64/ld64-123.2.1/src/abstraction/MachOTrie.hpp

Signed-off-by: Jakub Konka <kubkon@jakubkonka.com>
---
 src/link/MachO.zig      |   2 +-
 src/link/MachO/Trie.zig | 144 +++++++++++++++++++++++-----------------
 2 files changed, 84 insertions(+), 62 deletions(-)

diff --git a/src/link/MachO.zig b/src/link/MachO.zig
index 486620804b..ce9b7d1706 100644
--- a/src/link/MachO.zig
+++ b/src/link/MachO.zig
@@ -1415,7 +1415,7 @@ fn writeExportTrie(self: *MachO) !void {
         assert(symbol.n_value >= text_segment.vmaddr);
         try trie.put(self.base.allocator, .{
             .name = name,
-            .offset = symbol.n_value - text_segment.vmaddr,
+            .vmaddr_offset = symbol.n_value - text_segment.vmaddr,
             .export_flags = 0, // TODO workout creation of export flags
         });
     }
diff --git a/src/link/MachO/Trie.zig b/src/link/MachO/Trie.zig
index f7d37315cf..4c13262d2d 100644
--- a/src/link/MachO/Trie.zig
+++ b/src/link/MachO/Trie.zig
@@ -39,7 +39,7 @@ const Allocator = mem.Allocator;
 
 pub const Symbol = struct {
     name: []const u8,
-    offset: u64,
+    vmaddr_offset: u64,
     export_flags: u64,
 };
 
@@ -58,7 +58,8 @@ const Edge = struct {
 
 const Node = struct {
     export_flags: ?u64 = null,
-    offset: ?u64 = null,
+    vmaddr_offset: ?u64 = null,
+    trie_offset: usize = 0,
     edges: std.ArrayListUnmanaged(Edge) = .{},
 
     fn deinit(self: *Node, alloc: *Allocator) void {
@@ -111,8 +112,8 @@ const Node = struct {
         return node;
     }
 
-    fn writeULEB128Mem(self: Node, alloc: *Allocator, buffer: *std.ArrayListUnmanaged(u8)) Trie.WriteError!void {
-        if (self.offset) |offset| {
+    fn writeULEB128Mem(self: Node, alloc: *Allocator, buffer: *std.ArrayListUnmanaged(u8)) !void {
+        if (self.vmaddr_offset) |offset| {
             // Terminal node info: encode export flags and vmaddr offset of this symbol.
             var info_buf_len: usize = 0;
             var info_buf: [@sizeOf(u64) * 2]u8 = undefined;
@@ -134,34 +135,53 @@ const Node = struct {
         // Write number of edges (max legal number of edges is 256).
         try buffer.append(alloc, @intCast(u8, self.edges.items.len));
 
-        var node_offset_info: [std.math.maxInt(u8)]u64 = undefined;
-        for (self.edges.items) |edge, i| {
-            // Write edges labels leaving out space in-between to later populate
-            // with offsets to each node.
-            try buffer.ensureCapacity(alloc, buffer.items.len + edge.label.len + 1 + @sizeOf(u64)); // +1 to account for null-byte
+        for (self.edges.items) |edge| {
+            // Write edges labels.
+            try buffer.ensureCapacity(alloc, buffer.items.len + edge.label.len + 1); // +1 to account for null-byte
             buffer.appendSliceAssumeCapacity(edge.label);
             buffer.appendAssumeCapacity(0);
-            node_offset_info[i] = buffer.items.len;
-            const padding = [_]u8{0} ** @sizeOf(u64);
-            buffer.appendSliceAssumeCapacity(padding[0..]);
+
+            var buf: [@sizeOf(u64)]u8 = undefined;
+            const buf_len = try leb.writeULEB128Mem(buf[0..], edge.to.trie_offset);
+            try buffer.appendSlice(alloc, buf[0..buf_len]);
+        }
+    }
+
+    const UpdateResult = struct {
+        node_size: usize,
+        updated: bool,
+    };
+
+    fn updateOffset(self: *Node, offset: usize) UpdateResult {
+        var node_size: usize = 0;
+        if (self.vmaddr_offset) |vmaddr| {
+            node_size += sizeULEB128Mem(self.export_flags.?);
+            node_size += sizeULEB128Mem(vmaddr);
+            node_size += sizeULEB128Mem(node_size);
+        } else {
+            node_size += 1; // 0x0 for non-terminal nodes
+        }
+        node_size += 1; // 1 byte for edge count
+
+        for (self.edges.items) |edge| {
+            node_size += edge.label.len + 1 + sizeULEB128Mem(edge.to.trie_offset);
         }
 
-        for (self.edges.items) |edge, i| {
-            const offset = buffer.items.len;
-            try edge.to.writeULEB128Mem(alloc, buffer);
-            // We can now populate the offset to the node pointed by this edge.
-            // TODO this is not the approach taken by `ld64` which does several iterations
-            // to close the gap between the space encoding the offset to the node pointed
-            // by this edge. However, it seems that as long as we are contiguous, the padding
-            // introduced here should not influence the performance of `dyld`. I'm leaving
-            // this TODO here though as a reminder to re-investigate in the future and especially
-            // when we start working on dylibs in case `dyld` refuses to cooperate and/or the
-            // performance is noticably sufferring.
-            // Link to official impl: https://opensource.apple.com/source/ld64/ld64-123.2.1/src/abstraction/MachOTrie.hpp
-            var offset_buf: [@sizeOf(u64)]u8 = undefined;
-            const offset_buf_len = try leb.writeULEB128Mem(offset_buf[0..], offset);
-            mem.copy(u8, buffer.items[node_offset_info[i]..], offset_buf[0..offset_buf_len]);
+        const updated = offset != self.trie_offset;
+        self.trie_offset = offset;
+
+        return .{ .node_size = node_size, .updated = updated };
+    }
+
+    fn sizeULEB128Mem(value: u64) usize {
+        var res: usize = 0;
+        var v = value;
+        while (true) {
+            v = v >> 7;
+            res += 1;
+            if (v == 0) break;
         }
+        return res;
     }
 };
 
@@ -172,15 +192,38 @@ root: Node,
 /// certain circumstances.
 pub fn put(self: *Trie, alloc: *Allocator, symbol: Symbol) !void {
     const node = try self.root.put(alloc, symbol.name);
-    node.offset = symbol.offset;
+    node.vmaddr_offset = symbol.vmaddr_offset;
     node.export_flags = symbol.export_flags;
 }
 
-pub const WriteError = error{ OutOfMemory, NoSpaceLeft };
-
 /// Write the trie to a buffer ULEB128 encoded.
-pub fn writeULEB128Mem(self: Trie, alloc: *Allocator, buffer: *std.ArrayListUnmanaged(u8)) WriteError!void {
-    return self.root.writeULEB128Mem(alloc, buffer);
+pub fn writeULEB128Mem(self: *Trie, alloc: *Allocator, buffer: *std.ArrayListUnmanaged(u8)) !void {
+    var ordered_nodes: std.ArrayListUnmanaged(*Node) = .{};
+    defer ordered_nodes.deinit(alloc);
+
+    try walkInOrder(&self.root, alloc, &ordered_nodes);
+
+    var more: bool = true;
+    while (more) {
+        var offset: usize = 0;
+        more = false;
+        for (ordered_nodes.items) |node| {
+            const res = node.updateOffset(offset);
+            offset += res.node_size;
+            if (res.updated) more = true;
+        }
+    }
+
+    for (ordered_nodes.items) |node| {
+        try node.writeULEB128Mem(alloc, buffer);
+    }
+}
+
+fn walkInOrder(node: *Node, alloc: *Allocator, list: *std.ArrayListUnmanaged(*Node)) error{OutOfMemory}!void {
+    try list.append(alloc, node);
+    for (node.edges.items) |*edge| {
+        try walkInOrder(edge.to, alloc, list);
+    }
 }
 
 pub fn deinit(self: *Trie, alloc: *Allocator) void {
@@ -200,7 +243,7 @@ test "Trie basic" {
     // root --- _st ---> node
     try trie.put(gpa, .{
         .name = "_st",
-        .offset = 0,
+        .vmaddr_offset = 0,
         .export_flags = 0,
     });
     testing.expect(trie.root.edges.items.len == 1);
@@ -210,7 +253,7 @@ test "Trie basic" {
         // root --- _st ---> node --- art ---> node
         try trie.put(gpa, .{
             .name = "_start",
-            .offset = 0,
+            .vmaddr_offset = 0,
             .export_flags = 0,
         });
         testing.expect(trie.root.edges.items.len == 1);
@@ -226,7 +269,7 @@ test "Trie basic" {
         //                  |   --- main ---> node
         try trie.put(gpa, .{
             .name = "_main",
-            .offset = 0,
+            .vmaddr_offset = 0,
             .export_flags = 0,
         });
         testing.expect(trie.root.edges.items.len == 1);
@@ -251,12 +294,12 @@ test "Trie.writeULEB128Mem" {
 
     try trie.put(gpa, .{
         .name = "__mh_execute_header",
-        .offset = 0,
+        .vmaddr_offset = 0,
         .export_flags = 0,
     });
     try trie.put(gpa, .{
         .name = "_main",
-        .offset = 0x1000,
+        .vmaddr_offset = 0x1000,
         .export_flags = 0,
     });
 
@@ -270,14 +313,7 @@ test "Trie.writeULEB128Mem" {
         0x1,
         0x5f,
         0x0,
-        0xc,
-        0x0,
-        0x0,
-        0x0,
-        0x0,
-        0x0,
-        0x0,
-        0x0,
+        0x5,
         0x0,
         0x2,
         0x5f,
@@ -299,27 +335,13 @@ test "Trie.writeULEB128Mem" {
         0x65,
         0x72,
         0x0,
-        0x36,
-        0x0,
-        0x0,
-        0x0,
-        0x0,
-        0x0,
-        0x0,
-        0x0,
+        0x21,
         0x6d,
         0x61,
         0x69,
         0x6e,
         0x0,
-        0x3a,
-        0x0,
-        0x0,
-        0x0,
-        0x0,
-        0x0,
-        0x0,
-        0x0,
+        0x25,
         0x2,
         0x0,
         0x0,

From ba41e599bfaff2c614c4edfbe5c7ffe94b437486 Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Thu, 8 Oct 2020 18:10:32 +0200
Subject: [PATCH 8/9] Clean up writing the trie into ULEB128 byte stream

Prealloc as much as possible to improve alloc performance.

Signed-off-by: Jakub Konka <kubkon@jakubkonka.com>
---
 src/link/MachO.zig      |   4 +-
 src/link/MachO/Trie.zig | 144 +++++++++++++++++++++++++++++++---------
 2 files changed, 112 insertions(+), 36 deletions(-)

diff --git a/src/link/MachO.zig b/src/link/MachO.zig
index ce9b7d1706..697e4f0be3 100644
--- a/src/link/MachO.zig
+++ b/src/link/MachO.zig
@@ -1403,9 +1403,7 @@ fn writeAllUndefSymbols(self: *MachO) !void {
 fn writeExportTrie(self: *MachO) !void {
     if (self.global_symbols.items.len == 0) return; // No exports, nothing to do.
 
-    var trie: Trie = .{
-        .root = .{},
-    };
+    var trie: Trie = .{};
     defer trie.deinit(self.base.allocator);
 
     const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment;
diff --git a/src/link/MachO/Trie.zig b/src/link/MachO/Trie.zig
index 4c13262d2d..d19914f292 100644
--- a/src/link/MachO/Trie.zig
+++ b/src/link/MachO/Trie.zig
@@ -35,6 +35,7 @@ const mem = std.mem;
 const leb = std.debug.leb;
 const log = std.log.scoped(.link);
 const testing = std.testing;
+const assert = std.debug.assert;
 const Allocator = mem.Allocator;
 
 pub const Symbol = struct {
@@ -57,9 +58,13 @@ const Edge = struct {
 };
 
 const Node = struct {
+    /// Export flags associated with this exported symbol (if any).
     export_flags: ?u64 = null,
+    /// VM address offset wrt to the section this symbol is defined against (if any).
     vmaddr_offset: ?u64 = null,
-    trie_offset: usize = 0,
+    /// Offset of this node in the trie output byte stream.
+    trie_offset: ?usize = null,
+    /// List of all edges originating from this node.
     edges: std.ArrayListUnmanaged(Edge) = .{},
 
     fn deinit(self: *Node, alloc: *Allocator) void {
@@ -69,12 +74,24 @@ const Node = struct {
         self.edges.deinit(alloc);
     }
 
-    fn put(self: *Node, alloc: *Allocator, label: []const u8) !*Node {
+    const PutResult = struct {
+        /// Node reached at this stage of `put` op.
+        node: *Node,
+        /// Count of newly inserted nodes at this stage of `put` op.
+        node_count: usize,
+    };
+
+    /// Inserts a new node starting from `self`.
+    fn put(self: *Node, alloc: *Allocator, label: []const u8, node_count: usize) !PutResult {
+        var curr_node_count = node_count;
         // Check for match with edges from this node.
         for (self.edges.items) |*edge| {
-            const match = mem.indexOfDiff(u8, edge.label, label) orelse return edge.to;
+            const match = mem.indexOfDiff(u8, edge.label, label) orelse return PutResult{
+                .node = edge.to,
+                .node_count = curr_node_count,
+            };
             if (match == 0) continue;
-            if (match == edge.label.len) return edge.to.put(alloc, label[match..]);
+            if (match == edge.label.len) return edge.to.put(alloc, label[match..], curr_node_count);
 
             // Found a match, need to splice up nodes.
             // From: A -> B
@@ -85,6 +102,7 @@ const Node = struct {
             const to_node = edge.to;
             edge.to = mid;
             edge.label = label[0..match];
+            curr_node_count += 1;
 
             try mid.edges.append(alloc, .{
                 .from = mid,
@@ -93,15 +111,16 @@ const Node = struct {
             });
 
             if (match == label.len) {
-                return to_node;
+                return PutResult{ .node = to_node, .node_count = curr_node_count };
             } else {
-                return mid.put(alloc, label[match..]);
+                return mid.put(alloc, label[match..], curr_node_count);
             }
         }
 
-        // Add a new edge.
+        // Add a new node.
         const node = try alloc.create(Node);
         node.* = .{};
+        curr_node_count += 1;
 
         try self.edges.append(alloc, .{
             .from = self,
@@ -109,10 +128,13 @@ const Node = struct {
             .label = label,
         });
 
-        return node;
+        return PutResult{ .node = node, .node_count = curr_node_count };
     }
 
-    fn writeULEB128Mem(self: Node, alloc: *Allocator, buffer: *std.ArrayListUnmanaged(u8)) !void {
+    /// This method should only be called *after* updateOffset has been called!
+    /// In case this is not upheld, this method will panic.
+    fn writeULEB128Mem(self: Node, buffer: *std.ArrayListUnmanaged(u8)) !void {
+        assert(self.trie_offset != null); // You need to call updateOffset first.
         if (self.vmaddr_offset) |offset| {
             // Terminal node info: encode export flags and vmaddr offset of this symbol.
             var info_buf_len: usize = 0;
@@ -125,33 +147,35 @@ const Node = struct {
             const size_buf_len = try leb.writeULEB128Mem(size_buf[0..], info_buf_len);
 
             // Now, write them to the output buffer.
-            try buffer.ensureCapacity(alloc, buffer.items.len + info_buf_len + size_buf_len);
             buffer.appendSliceAssumeCapacity(size_buf[0..size_buf_len]);
             buffer.appendSliceAssumeCapacity(info_buf[0..info_buf_len]);
         } else {
             // Non-terminal node is delimited by 0 byte.
-            try buffer.append(alloc, 0);
+            buffer.appendAssumeCapacity(0);
         }
         // Write number of edges (max legal number of edges is 256).
-        try buffer.append(alloc, @intCast(u8, self.edges.items.len));
+        buffer.appendAssumeCapacity(@intCast(u8, self.edges.items.len));
 
         for (self.edges.items) |edge| {
             // Write edges labels.
-            try buffer.ensureCapacity(alloc, buffer.items.len + edge.label.len + 1); // +1 to account for null-byte
             buffer.appendSliceAssumeCapacity(edge.label);
             buffer.appendAssumeCapacity(0);
 
             var buf: [@sizeOf(u64)]u8 = undefined;
-            const buf_len = try leb.writeULEB128Mem(buf[0..], edge.to.trie_offset);
-            try buffer.appendSlice(alloc, buf[0..buf_len]);
+            const buf_len = try leb.writeULEB128Mem(buf[0..], edge.to.trie_offset.?);
+            buffer.appendSliceAssumeCapacity(buf[0..buf_len]);
         }
     }
 
     const UpdateResult = struct {
+        /// Current size of this node in bytes.
         node_size: usize,
+        /// True if the trie offset of this node in the output byte stream
+        /// would need updating; false otherwise.
         updated: bool,
     };
 
+    /// Updates offset of this node in the output byte stream.
     fn updateOffset(self: *Node, offset: usize) UpdateResult {
         var node_size: usize = 0;
         if (self.vmaddr_offset) |vmaddr| {
@@ -164,15 +188,18 @@ const Node = struct {
         node_size += 1; // 1 byte for edge count
 
         for (self.edges.items) |edge| {
-            node_size += edge.label.len + 1 + sizeULEB128Mem(edge.to.trie_offset);
+            const next_node_offset = edge.to.trie_offset orelse 0;
+            node_size += edge.label.len + 1 + sizeULEB128Mem(next_node_offset);
         }
 
-        const updated = offset != self.trie_offset;
+        const trie_offset = self.trie_offset orelse 0;
+        const updated = offset != trie_offset;
         self.trie_offset = offset;
 
         return .{ .node_size = node_size, .updated = updated };
     }
 
+    /// Calculates number of bytes in ULEB128 encoding of value.
     fn sizeULEB128Mem(value: u64) usize {
         var res: usize = 0;
         var v = value;
@@ -185,15 +212,22 @@ const Node = struct {
     }
 };
 
-root: Node,
+/// Count of nodes in the trie.
+/// The count is updated at every `put` call.
+/// The trie always consists of at least a root node, hence
+/// the count always starts at 1.
+node_count: usize = 1,
+/// The root node of the trie.
+root: Node = .{},
 
 /// Insert a symbol into the trie, updating the prefixes in the process.
 /// This operation may change the layout of the trie by splicing edges in
 /// certain circumstances.
 pub fn put(self: *Trie, alloc: *Allocator, symbol: Symbol) !void {
-    const node = try self.root.put(alloc, symbol.name);
-    node.vmaddr_offset = symbol.vmaddr_offset;
-    node.export_flags = symbol.export_flags;
+    const res = try self.root.put(alloc, symbol.name, 0);
+    self.node_count += res.node_count;
+    res.node.vmaddr_offset = symbol.vmaddr_offset;
+    res.node.export_flags = symbol.export_flags;
 }
 
 /// Write the trie to a buffer ULEB128 encoded.
@@ -201,11 +235,13 @@ pub fn writeULEB128Mem(self: *Trie, alloc: *Allocator, buffer: *std.ArrayListUnm
     var ordered_nodes: std.ArrayListUnmanaged(*Node) = .{};
     defer ordered_nodes.deinit(alloc);
 
-    try walkInOrder(&self.root, alloc, &ordered_nodes);
+    try ordered_nodes.ensureCapacity(alloc, self.node_count);
+    walkInOrder(&self.root, &ordered_nodes);
 
+    var offset: usize = 0;
     var more: bool = true;
     while (more) {
-        var offset: usize = 0;
+        offset = 0;
         more = false;
         for (ordered_nodes.items) |node| {
             const res = node.updateOffset(offset);
@@ -214,15 +250,17 @@ pub fn writeULEB128Mem(self: *Trie, alloc: *Allocator, buffer: *std.ArrayListUnm
         }
     }
 
+    try buffer.ensureCapacity(alloc, buffer.items.len + offset);
     for (ordered_nodes.items) |node| {
-        try node.writeULEB128Mem(alloc, buffer);
+        try node.writeULEB128Mem(buffer);
     }
 }
 
-fn walkInOrder(node: *Node, alloc: *Allocator, list: *std.ArrayListUnmanaged(*Node)) error{OutOfMemory}!void {
-    try list.append(alloc, node);
+/// Walks the trie in DFS order gathering all nodes into a linear stream of nodes.
+fn walkInOrder(node: *Node, list: *std.ArrayListUnmanaged(*Node)) void {
+    list.appendAssumeCapacity(node);
     for (node.edges.items) |*edge| {
-        try walkInOrder(edge.to, alloc, list);
+        walkInOrder(edge.to, list);
     }
 }
 
@@ -230,11 +268,53 @@ pub fn deinit(self: *Trie, alloc: *Allocator) void {
     self.root.deinit(alloc);
 }
 
+test "Trie node count" {
+    var gpa = testing.allocator;
+    var trie: Trie = .{};
+    defer trie.deinit(gpa);
+
+    testing.expectEqual(trie.node_count, 1);
+
+    try trie.put(gpa, .{
+        .name = "_main",
+        .vmaddr_offset = 0,
+        .export_flags = 0,
+    });
+    testing.expectEqual(trie.node_count, 2);
+
+    // Inserting the same node shouldn't update the trie.
+    try trie.put(gpa, .{
+        .name = "_main",
+        .vmaddr_offset = 0,
+        .export_flags = 0,
+    });
+    testing.expectEqual(trie.node_count, 2);
+
+    try trie.put(gpa, .{
+        .name = "__mh_execute_header",
+        .vmaddr_offset = 0x1000,
+        .export_flags = 0,
+    });
+    testing.expectEqual(trie.node_count, 4);
+
+    // Inserting the same node shouldn't update the trie.
+    try trie.put(gpa, .{
+        .name = "__mh_execute_header",
+        .vmaddr_offset = 0x1000,
+        .export_flags = 0,
+    });
+    testing.expectEqual(trie.node_count, 4);
+    try trie.put(gpa, .{
+        .name = "_main",
+        .vmaddr_offset = 0,
+        .export_flags = 0,
+    });
+    testing.expectEqual(trie.node_count, 4);
+}
+
 test "Trie basic" {
     var gpa = testing.allocator;
-    var trie: Trie = .{
-        .root = .{},
-    };
+    var trie: Trie = .{};
     defer trie.deinit(gpa);
 
     // root
@@ -287,9 +367,7 @@ test "Trie basic" {
 
 test "Trie.writeULEB128Mem" {
     var gpa = testing.allocator;
-    var trie: Trie = .{
-        .root = .{},
-    };
+    var trie: Trie = .{};
     defer trie.deinit(gpa);
 
     try trie.put(gpa, .{

From 8dc40236153e7c7d1b8378a117d8453e3b262933 Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Fri, 9 Oct 2020 17:22:39 +0200
Subject: [PATCH 9/9] Apply nitpick: top-level doc comments

Signed-off-by: Jakub Konka <kubkon@jakubkonka.com>
---
 src/link/MachO/Trie.zig | 60 ++++++++++++++++++++---------------------
 1 file changed, 30 insertions(+), 30 deletions(-)

diff --git a/src/link/MachO/Trie.zig b/src/link/MachO/Trie.zig
index d19914f292..e077df101d 100644
--- a/src/link/MachO/Trie.zig
+++ b/src/link/MachO/Trie.zig
@@ -1,33 +1,33 @@
-/// Represents export trie used in MachO executables and dynamic libraries.
-/// The purpose of an export trie is to encode as compactly as possible all
-/// export symbols for the loader `dyld`.
-/// The export trie encodes offset and other information using ULEB128
-/// encoding, and is part of the __LINKEDIT segment.
-///
-/// Description from loader.h:
-///
-/// The symbols exported by a dylib are encoded in a trie. This is a compact
-/// representation that factors out common prefixes. It also reduces LINKEDIT pages
-/// in RAM because it encodes all information (name, address, flags) in one small,
-/// contiguous range. The export area is a stream of nodes. The first node sequentially
-/// is the start node for the trie.
-///
-/// Nodes for a symbol start with a uleb128 that is the length of the exported symbol
-/// information for the string so far. If there is no exported symbol, the node starts
-/// with a zero byte. If there is exported info, it follows the length.
-///
-/// First is a uleb128 containing flags. Normally, it is followed by a uleb128 encoded
-/// offset which is location of the content named by the symbol from the mach_header
-/// for the image. If the flags is EXPORT_SYMBOL_FLAGS_REEXPORT, then following the flags
-/// is a uleb128 encoded library ordinal, then a zero terminated UTF8 string. If the string
-/// is zero length, then the symbol is re-export from the specified dylib with the same name.
-/// If the flags is EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER, then following the flags is two
-/// uleb128s: the stub offset and the resolver offset. The stub is used by non-lazy pointers.
-/// The resolver is used by lazy pointers and must be called to get the actual address to use.
-///
-/// After the optional exported symbol information is a byte of how many edges (0-255) that
-/// this node has leaving it, followed by each edge. Each edge is a zero terminated UTF8 of
-/// the addition chars in the symbol, followed by a uleb128 offset for the node that edge points to.
+//! Represents export trie used in MachO executables and dynamic libraries.
+//! The purpose of an export trie is to encode as compactly as possible all
+//! export symbols for the loader `dyld`.
+//! The export trie encodes offset and other information using ULEB128
+//! encoding, and is part of the __LINKEDIT segment.
+//!
+//! Description from loader.h:
+//!
+//! The symbols exported by a dylib are encoded in a trie. This is a compact
+//! representation that factors out common prefixes. It also reduces LINKEDIT pages
+//! in RAM because it encodes all information (name, address, flags) in one small,
+//! contiguous range. The export area is a stream of nodes. The first node sequentially
+//! is the start node for the trie.
+//!
+//! Nodes for a symbol start with a uleb128 that is the length of the exported symbol
+//! information for the string so far. If there is no exported symbol, the node starts
+//! with a zero byte. If there is exported info, it follows the length.
+//!
+//! First is a uleb128 containing flags. Normally, it is followed by a uleb128 encoded
+//! offset which is location of the content named by the symbol from the mach_header
+//! for the image. If the flags is EXPORT_SYMBOL_FLAGS_REEXPORT, then following the flags
+//! is a uleb128 encoded library ordinal, then a zero terminated UTF8 string. If the string
+//! is zero length, then the symbol is re-export from the specified dylib with the same name.
+//! If the flags is EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER, then following the flags is two
+//! uleb128s: the stub offset and the resolver offset. The stub is used by non-lazy pointers.
+//! The resolver is used by lazy pointers and must be called to get the actual address to use.
+//!
+//! After the optional exported symbol information is a byte of how many edges (0-255) that
+//! this node has leaving it, followed by each edge. Each edge is a zero terminated UTF8 of
+//! the addition chars in the symbol, followed by a uleb128 offset for the node that edge points to.
 const Trie = @This();
 
 const std = @import("std");