diff --git a/CMakeLists.txt b/CMakeLists.txt
index d4d2675499..843e400922 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -564,7 +564,14 @@ set(ZIG_STAGE2_SOURCES
     "${CMAKE_SOURCE_DIR}/src/link/Coff.zig"
     "${CMAKE_SOURCE_DIR}/src/link/Elf.zig"
     "${CMAKE_SOURCE_DIR}/src/link/MachO.zig"
+    "${CMAKE_SOURCE_DIR}/src/link/MachO/Archive.zig"
+    "${CMAKE_SOURCE_DIR}/src/link/MachO/CodeSignature.zig"
+    "${CMAKE_SOURCE_DIR}/src/link/MachO/DebugSymbols.zig"
+    "${CMAKE_SOURCE_DIR}/src/link/MachO/Object.zig"
     "${CMAKE_SOURCE_DIR}/src/link/MachO/Trie.zig"
+    "${CMAKE_SOURCE_DIR}/src/link/MachO/Zld.zig"
+    "${CMAKE_SOURCE_DIR}/src/link/MachO/bind.zig"
+    "${CMAKE_SOURCE_DIR}/src/link/MachO/commands.zig"
     "${CMAKE_SOURCE_DIR}/src/link/Wasm.zig"
     "${CMAKE_SOURCE_DIR}/src/link/C/zig.h"
     "${CMAKE_SOURCE_DIR}/src/link/msdos-stub.bin"
diff --git a/lib/std/bit_set.zig b/lib/std/bit_set.zig
index 29ad0d7963..80cdd5c79c 100644
--- a/lib/std/bit_set.zig
+++ b/lib/std/bit_set.zig
@@ -176,7 +176,7 @@ pub fn IntegerBitSet(comptime size: u16) type {
         /// The default options (.{}) will iterate indices of set bits in
         /// ascending order.  Modifications to the underlying bit set may
         /// or may not be observed by the iterator.
-        pub fn iterator(self: *const Self, comptime options: IteratorOptions) Iterator(options.direction) {
+        pub fn iterator(self: *const Self, comptime options: IteratorOptions) Iterator(options) {
             return .{
                 .bits_remain = switch (options.kind) {
                     .set => self.mask,
@@ -185,7 +185,11 @@ pub fn IntegerBitSet(comptime size: u16) type {
             };
         }
 
-        fn Iterator(comptime direction: IteratorOptions.Direction) type {
+        pub fn Iterator(comptime options: IteratorOptions) type {
+            return SingleWordIterator(options.direction);
+        }
+
+        fn SingleWordIterator(comptime direction: IteratorOptions.Direction) type {
             return struct {
                 const IterSelf = @This();
                 // all bits which have not yet been iterated over
@@ -425,8 +429,12 @@ pub fn ArrayBitSet(comptime MaskIntType: type, comptime size: usize) type {
         /// The default options (.{}) will iterate indices of set bits in
         /// ascending order.  Modifications to the underlying bit set may
         /// or may not be observed by the iterator.
-        pub fn iterator(self: *const Self, comptime options: IteratorOptions) BitSetIterator(MaskInt, options) {
-            return BitSetIterator(MaskInt, options).init(&self.masks, last_item_mask);
+        pub fn iterator(self: *const Self, comptime options: IteratorOptions) Iterator(options) {
+            return Iterator(options).init(&self.masks, last_item_mask);
+        }
+
+        pub fn Iterator(comptime options: IteratorOptions) type {
+            return BitSetIterator(MaskInt, options);
         }
 
         fn maskBit(index: usize) MaskInt {
@@ -700,11 +708,15 @@ pub const DynamicBitSetUnmanaged = struct {
     /// ascending order.  Modifications to the underlying bit set may
     /// or may not be observed by the iterator.  Resizing the underlying
     /// bit set invalidates the iterator.
-    pub fn iterator(self: *const Self, comptime options: IteratorOptions) BitSetIterator(MaskInt, options) {
+    pub fn iterator(self: *const Self, comptime options: IteratorOptions) Iterator(options) {
         const num_masks = numMasks(self.bit_length);
         const padding_bits = num_masks * @bitSizeOf(MaskInt) - self.bit_length;
         const last_item_mask = (~@as(MaskInt, 0)) >> @intCast(ShiftInt, padding_bits);
-        return BitSetIterator(MaskInt, options).init(self.masks[0..num_masks], last_item_mask);
+        return Iterator(options).init(self.masks[0..num_masks], last_item_mask);
+    }
+
+    pub fn Iterator(comptime options: IteratorOptions) type {
+        return BitSetIterator(MaskInt, options);
     }
 
     fn maskBit(index: usize) MaskInt {
@@ -858,9 +870,11 @@ pub const DynamicBitSet = struct {
     /// ascending order.  Modifications to the underlying bit set may
     /// or may not be observed by the iterator.  Resizing the underlying
     /// bit set invalidates the iterator.
-    pub fn iterator(self: *Self, comptime options: IteratorOptions) BitSetIterator(MaskInt, options) {
+    pub fn iterator(self: *Self, comptime options: IteratorOptions) Iterator(options) {
         return self.unmanaged.iterator(options);
     }
+
+    pub const Iterator = DynamicBitSetUnmanaged.Iterator;
 };
 
 /// Options for configuring an iterator over a bit set
diff --git a/lib/std/c/builtins.zig b/lib/std/c/builtins.zig
index 2c03c1ceac..99721a150c 100644
--- a/lib/std/c/builtins.zig
+++ b/lib/std/c/builtins.zig
@@ -140,7 +140,7 @@ pub fn __builtin_object_size(ptr: ?*const c_void, ty: c_int) callconv(.Inline) u
     // If it is not possible to determine which objects ptr points to at compile time,
     // __builtin_object_size should return (size_t) -1 for type 0 or 1 and (size_t) 0
     // for type 2 or 3.
-    if (ty == 0 or ty == 1) return @bitCast(usize, -@as(c_long, 1));
+    if (ty == 0 or ty == 1) return @bitCast(usize, -@as(isize, 1));
     if (ty == 2 or ty == 3) return 0;
     unreachable;
 }
@@ -188,3 +188,9 @@ pub fn __builtin_memcpy(
 pub fn __builtin_expect(expr: c_long, c: c_long) callconv(.Inline) c_long {
     return expr;
 }
+
+// __builtin_alloca_with_align is not currently implemented.
+// It is used in a run-translated-c test and a test-translate-c test to ensure that non-implemented
+// builtins are correctly demoted. If you implement __builtin_alloca_with_align, please update the
+// run-translated-c test and the test-translate-c test to use a different non-implemented builtin.
+// pub fn __builtin_alloca_with_align(size: usize, alignment: usize) callconv(.Inline) *c_void {}
diff --git a/lib/std/crypto.zig b/lib/std/crypto.zig
index 1ec7674999..457b9130d9 100644
--- a/lib/std/crypto.zig
+++ b/lib/std/crypto.zig
@@ -24,8 +24,12 @@ pub const aead = struct {
     pub const Gimli = @import("crypto/gimli.zig").Aead;
 
     pub const chacha_poly = struct {
-        pub const ChaCha20Poly1305 = @import("crypto/chacha20.zig").Chacha20Poly1305;
-        pub const XChaCha20Poly1305 = @import("crypto/chacha20.zig").XChacha20Poly1305;
+        pub const ChaCha20Poly1305 = @import("crypto/chacha20.zig").ChaCha20Poly1305;
+        pub const ChaCha12Poly1305 = @import("crypto/chacha20.zig").ChaCha12Poly1305;
+        pub const ChaCha8Poly1305 = @import("crypto/chacha20.zig").ChaCha8Poly1305;
+        pub const XChaCha20Poly1305 = @import("crypto/chacha20.zig").XChaCha20Poly1305;
+        pub const XChaCha12Poly1305 = @import("crypto/chacha20.zig").XChaCha12Poly1305;
+        pub const XChaCha8Poly1305 = @import("crypto/chacha20.zig").XChaCha8Poly1305;
     };
 
     pub const isap = @import("crypto/isap.zig");
@@ -119,8 +123,14 @@ pub const sign = struct {
 pub const stream = struct {
     pub const chacha = struct {
         pub const ChaCha20IETF = @import("crypto/chacha20.zig").ChaCha20IETF;
+        pub const ChaCha12IETF = @import("crypto/chacha20.zig").ChaCha12IETF;
+        pub const ChaCha8IETF = @import("crypto/chacha20.zig").ChaCha8IETF;
         pub const ChaCha20With64BitNonce = @import("crypto/chacha20.zig").ChaCha20With64BitNonce;
+        pub const ChaCha12With64BitNonce = @import("crypto/chacha20.zig").ChaCha12With64BitNonce;
+        pub const ChaCha8With64BitNonce = @import("crypto/chacha20.zig").ChaCha8With64BitNonce;
         pub const XChaCha20IETF = @import("crypto/chacha20.zig").XChaCha20IETF;
+        pub const XChaCha12IETF = @import("crypto/chacha20.zig").XChaCha12IETF;
+        pub const XChaCha8IETF = @import("crypto/chacha20.zig").XChaCha8IETF;
     };
 
     pub const salsa = struct {
diff --git a/lib/std/crypto/benchmark.zig b/lib/std/crypto/benchmark.zig
index e3ffa62ed1..49d5b15820 100644
--- a/lib/std/crypto/benchmark.zig
+++ b/lib/std/crypto/benchmark.zig
@@ -202,6 +202,7 @@ pub fn benchmarkBatchSignatureVerification(comptime Signature: anytype, comptime
 const aeads = [_]Crypto{
     Crypto{ .ty = crypto.aead.chacha_poly.ChaCha20Poly1305, .name = "chacha20Poly1305" },
     Crypto{ .ty = crypto.aead.chacha_poly.XChaCha20Poly1305, .name = "xchacha20Poly1305" },
+    Crypto{ .ty = crypto.aead.chacha_poly.XChaCha8Poly1305, .name = "xchacha8Poly1305" },
     Crypto{ .ty = crypto.aead.salsa_poly.XSalsa20Poly1305, .name = "xsalsa20Poly1305" },
     Crypto{ .ty = crypto.aead.Gimli, .name = "gimli-aead" },
     Crypto{ .ty = crypto.aead.aegis.Aegis128L, .name = "aegis-128l" },
diff --git a/lib/std/crypto/chacha20.zig b/lib/std/crypto/chacha20.zig
index 4611923892..e1fe3e232d 100644
--- a/lib/std/crypto/chacha20.zig
+++ b/lib/std/crypto/chacha20.zig
@@ -15,286 +15,357 @@ const Vector = std.meta.Vector;
 const Poly1305 = std.crypto.onetimeauth.Poly1305;
 const Error = std.crypto.Error;
 
+/// IETF-variant of the ChaCha20 stream cipher, as designed for TLS.
+pub const ChaCha20IETF = ChaChaIETF(20);
+
+/// IETF-variant of the ChaCha20 stream cipher, reduced to 12 rounds.
+/// Reduced-rounds versions are faster than the full-round version, but have a lower security margin.
+/// However, ChaCha is still believed to have a comfortable security even with only with 8 rounds.
+pub const ChaCha12IETF = ChaChaIETF(12);
+
+/// IETF-variant of the ChaCha20 stream cipher, reduced to 8 rounds.
+/// Reduced-rounds versions are faster than the full-round version, but have a lower security margin.
+/// However, ChaCha is still believed to have a comfortable security even with only with 8 rounds.
+pub const ChaCha8IETF = ChaChaIETF(8);
+
+/// Original ChaCha20 stream cipher.
+pub const ChaCha20With64BitNonce = ChaChaWith64BitNonce(20);
+
+/// Original ChaCha20 stream cipher, reduced to 12 rounds.
+/// Reduced-rounds versions are faster than the full-round version, but have a lower security margin.
+/// However, ChaCha is still believed to have a comfortable security even with only with 8 rounds.
+pub const ChaCha12With64BitNonce = ChaChaWith64BitNonce(12);
+
+/// Original ChaCha20 stream cipher, reduced to 8 rounds.
+/// Reduced-rounds versions are faster than the full-round version, but have a lower security margin.
+/// However, ChaCha is still believed to have a comfortable security even with only with 8 rounds.
+pub const ChaCha8With64BitNonce = ChaChaWith64BitNonce(8);
+
+/// XChaCha20 (nonce-extended version of the IETF ChaCha20 variant) stream cipher
+pub const XChaCha20IETF = XChaChaIETF(20);
+
+/// XChaCha20 (nonce-extended version of the IETF ChaCha20 variant) stream cipher, reduced to 12 rounds
+/// Reduced-rounds versions are faster than the full-round version, but have a lower security margin.
+/// However, ChaCha is still believed to have a comfortable security even with only with 8 rounds.
+pub const XChaCha12IETF = XChaChaIETF(12);
+
+/// XChaCha20 (nonce-extended version of the IETF ChaCha20 variant) stream cipher, reduced to 8 rounds
+/// Reduced-rounds versions are faster than the full-round version, but have a lower security margin.
+/// However, ChaCha is still believed to have a comfortable security even with only with 8 rounds.
+pub const XChaCha8IETF = XChaChaIETF(8);
+
+/// ChaCha20-Poly1305 authenticated cipher, as designed for TLS
+pub const ChaCha20Poly1305 = ChaChaPoly1305(20);
+
+/// ChaCha20-Poly1305 authenticated cipher, reduced to 12 rounds
+/// Reduced-rounds versions are faster than the full-round version, but have a lower security margin.
+/// However, ChaCha is still believed to have a comfortable security even with only with 8 rounds.
+pub const ChaCha12Poly1305 = ChaChaPoly1305(12);
+
+/// ChaCha20-Poly1305 authenticated cipher, reduced to 8 rounds
+/// Reduced-rounds versions are faster than the full-round version, but have a lower security margin.
+/// However, ChaCha is still believed to have a comfortable security even with only with 8 rounds.
+pub const ChaCha8Poly1305 = ChaChaPoly1305(8);
+
+/// XChaCha20-Poly1305 authenticated cipher
+pub const XChaCha20Poly1305 = XChaChaPoly1305(20);
+
+/// XChaCha20-Poly1305 authenticated cipher
+/// Reduced-rounds versions are faster than the full-round version, but have a lower security margin.
+/// However, ChaCha is still believed to have a comfortable security even with only with 8 rounds.
+pub const XChaCha12Poly1305 = XChaChaPoly1305(12);
+
+/// XChaCha20-Poly1305 authenticated cipher
+/// Reduced-rounds versions are faster than the full-round version, but have a lower security margin.
+/// However, ChaCha is still believed to have a comfortable security even with only with 8 rounds.
+pub const XChaCha8Poly1305 = XChaChaPoly1305(8);
+
 // Vectorized implementation of the core function
-const ChaCha20VecImpl = struct {
-    const Lane = Vector(4, u32);
-    const BlockVec = [4]Lane;
+fn ChaChaVecImpl(comptime rounds_nb: usize) type {
+    return struct {
+        const Lane = Vector(4, u32);
+        const BlockVec = [4]Lane;
 
-    fn initContext(key: [8]u32, d: [4]u32) BlockVec {
-        const c = "expand 32-byte k";
-        const constant_le = comptime Lane{
-            mem.readIntLittle(u32, c[0..4]),
-            mem.readIntLittle(u32, c[4..8]),
-            mem.readIntLittle(u32, c[8..12]),
-            mem.readIntLittle(u32, c[12..16]),
-        };
-        return BlockVec{
-            constant_le,
-            Lane{ key[0], key[1], key[2], key[3] },
-            Lane{ key[4], key[5], key[6], key[7] },
-            Lane{ d[0], d[1], d[2], d[3] },
-        };
-    }
-
-    fn chacha20Core(x: *BlockVec, input: BlockVec) callconv(.Inline) void {
-        x.* = input;
-
-        var r: usize = 0;
-        while (r < 20) : (r += 2) {
-            x[0] +%= x[1];
-            x[3] ^= x[0];
-            x[3] = math.rotl(Lane, x[3], 16);
-
-            x[2] +%= x[3];
-            x[1] ^= x[2];
-            x[1] = math.rotl(Lane, x[1], 12);
-
-            x[0] +%= x[1];
-            x[3] ^= x[0];
-            x[0] = @shuffle(u32, x[0], undefined, [_]i32{ 3, 0, 1, 2 });
-            x[3] = math.rotl(Lane, x[3], 8);
-
-            x[2] +%= x[3];
-            x[3] = @shuffle(u32, x[3], undefined, [_]i32{ 2, 3, 0, 1 });
-            x[1] ^= x[2];
-            x[2] = @shuffle(u32, x[2], undefined, [_]i32{ 1, 2, 3, 0 });
-            x[1] = math.rotl(Lane, x[1], 7);
-
-            x[0] +%= x[1];
-            x[3] ^= x[0];
-            x[3] = math.rotl(Lane, x[3], 16);
-
-            x[2] +%= x[3];
-            x[1] ^= x[2];
-            x[1] = math.rotl(Lane, x[1], 12);
-
-            x[0] +%= x[1];
-            x[3] ^= x[0];
-            x[0] = @shuffle(u32, x[0], undefined, [_]i32{ 1, 2, 3, 0 });
-            x[3] = math.rotl(Lane, x[3], 8);
-
-            x[2] +%= x[3];
-            x[3] = @shuffle(u32, x[3], undefined, [_]i32{ 2, 3, 0, 1 });
-            x[1] ^= x[2];
-            x[2] = @shuffle(u32, x[2], undefined, [_]i32{ 3, 0, 1, 2 });
-            x[1] = math.rotl(Lane, x[1], 7);
+        fn initContext(key: [8]u32, d: [4]u32) BlockVec {
+            const c = "expand 32-byte k";
+            const constant_le = comptime Lane{
+                mem.readIntLittle(u32, c[0..4]),
+                mem.readIntLittle(u32, c[4..8]),
+                mem.readIntLittle(u32, c[8..12]),
+                mem.readIntLittle(u32, c[12..16]),
+            };
+            return BlockVec{
+                constant_le,
+                Lane{ key[0], key[1], key[2], key[3] },
+                Lane{ key[4], key[5], key[6], key[7] },
+                Lane{ d[0], d[1], d[2], d[3] },
+            };
         }
-    }
 
-    fn hashToBytes(out: *[64]u8, x: BlockVec) callconv(.Inline) void {
-        var i: usize = 0;
-        while (i < 4) : (i += 1) {
-            mem.writeIntLittle(u32, out[16 * i + 0 ..][0..4], x[i][0]);
-            mem.writeIntLittle(u32, out[16 * i + 4 ..][0..4], x[i][1]);
-            mem.writeIntLittle(u32, out[16 * i + 8 ..][0..4], x[i][2]);
-            mem.writeIntLittle(u32, out[16 * i + 12 ..][0..4], x[i][3]);
+        fn chacha20Core(x: *BlockVec, input: BlockVec) callconv(.Inline) void {
+            x.* = input;
+
+            var r: usize = 0;
+            while (r < rounds_nb) : (r += 2) {
+                x[0] +%= x[1];
+                x[3] ^= x[0];
+                x[3] = math.rotl(Lane, x[3], 16);
+
+                x[2] +%= x[3];
+                x[1] ^= x[2];
+                x[1] = math.rotl(Lane, x[1], 12);
+
+                x[0] +%= x[1];
+                x[3] ^= x[0];
+                x[0] = @shuffle(u32, x[0], undefined, [_]i32{ 3, 0, 1, 2 });
+                x[3] = math.rotl(Lane, x[3], 8);
+
+                x[2] +%= x[3];
+                x[3] = @shuffle(u32, x[3], undefined, [_]i32{ 2, 3, 0, 1 });
+                x[1] ^= x[2];
+                x[2] = @shuffle(u32, x[2], undefined, [_]i32{ 1, 2, 3, 0 });
+                x[1] = math.rotl(Lane, x[1], 7);
+
+                x[0] +%= x[1];
+                x[3] ^= x[0];
+                x[3] = math.rotl(Lane, x[3], 16);
+
+                x[2] +%= x[3];
+                x[1] ^= x[2];
+                x[1] = math.rotl(Lane, x[1], 12);
+
+                x[0] +%= x[1];
+                x[3] ^= x[0];
+                x[0] = @shuffle(u32, x[0], undefined, [_]i32{ 1, 2, 3, 0 });
+                x[3] = math.rotl(Lane, x[3], 8);
+
+                x[2] +%= x[3];
+                x[3] = @shuffle(u32, x[3], undefined, [_]i32{ 2, 3, 0, 1 });
+                x[1] ^= x[2];
+                x[2] = @shuffle(u32, x[2], undefined, [_]i32{ 3, 0, 1, 2 });
+                x[1] = math.rotl(Lane, x[1], 7);
+            }
         }
-    }
 
-    fn contextFeedback(x: *BlockVec, ctx: BlockVec) callconv(.Inline) void {
-        x[0] +%= ctx[0];
-        x[1] +%= ctx[1];
-        x[2] +%= ctx[2];
-        x[3] +%= ctx[3];
-    }
+        fn hashToBytes(out: *[64]u8, x: BlockVec) callconv(.Inline) void {
+            var i: usize = 0;
+            while (i < 4) : (i += 1) {
+                mem.writeIntLittle(u32, out[16 * i + 0 ..][0..4], x[i][0]);
+                mem.writeIntLittle(u32, out[16 * i + 4 ..][0..4], x[i][1]);
+                mem.writeIntLittle(u32, out[16 * i + 8 ..][0..4], x[i][2]);
+                mem.writeIntLittle(u32, out[16 * i + 12 ..][0..4], x[i][3]);
+            }
+        }
 
-    fn chacha20Xor(out: []u8, in: []const u8, key: [8]u32, counter: [4]u32) void {
-        var ctx = initContext(key, counter);
-        var x: BlockVec = undefined;
-        var buf: [64]u8 = undefined;
-        var i: usize = 0;
-        while (i + 64 <= in.len) : (i += 64) {
+        fn contextFeedback(x: *BlockVec, ctx: BlockVec) callconv(.Inline) void {
+            x[0] +%= ctx[0];
+            x[1] +%= ctx[1];
+            x[2] +%= ctx[2];
+            x[3] +%= ctx[3];
+        }
+
+        fn chacha20Xor(out: []u8, in: []const u8, key: [8]u32, counter: [4]u32) void {
+            var ctx = initContext(key, counter);
+            var x: BlockVec = undefined;
+            var buf: [64]u8 = undefined;
+            var i: usize = 0;
+            while (i + 64 <= in.len) : (i += 64) {
+                chacha20Core(x[0..], ctx);
+                contextFeedback(&x, ctx);
+                hashToBytes(buf[0..], x);
+
+                var xout = out[i..];
+                const xin = in[i..];
+                var j: usize = 0;
+                while (j < 64) : (j += 1) {
+                    xout[j] = xin[j];
+                }
+                j = 0;
+                while (j < 64) : (j += 1) {
+                    xout[j] ^= buf[j];
+                }
+                ctx[3][0] += 1;
+            }
+            if (i < in.len) {
+                chacha20Core(x[0..], ctx);
+                contextFeedback(&x, ctx);
+                hashToBytes(buf[0..], x);
+
+                var xout = out[i..];
+                const xin = in[i..];
+                var j: usize = 0;
+                while (j < in.len % 64) : (j += 1) {
+                    xout[j] = xin[j] ^ buf[j];
+                }
+            }
+        }
+
+        fn hchacha20(input: [16]u8, key: [32]u8) [32]u8 {
+            var c: [4]u32 = undefined;
+            for (c) |_, i| {
+                c[i] = mem.readIntLittle(u32, input[4 * i ..][0..4]);
+            }
+            const ctx = initContext(keyToWords(key), c);
+            var x: BlockVec = undefined;
             chacha20Core(x[0..], ctx);
-            contextFeedback(&x, ctx);
-            hashToBytes(buf[0..], x);
-
-            var xout = out[i..];
-            const xin = in[i..];
-            var j: usize = 0;
-            while (j < 64) : (j += 1) {
-                xout[j] = xin[j];
-            }
-            j = 0;
-            while (j < 64) : (j += 1) {
-                xout[j] ^= buf[j];
-            }
-            ctx[3][0] += 1;
+            var out: [32]u8 = undefined;
+            mem.writeIntLittle(u32, out[0..4], x[0][0]);
+            mem.writeIntLittle(u32, out[4..8], x[0][1]);
+            mem.writeIntLittle(u32, out[8..12], x[0][2]);
+            mem.writeIntLittle(u32, out[12..16], x[0][3]);
+            mem.writeIntLittle(u32, out[16..20], x[3][0]);
+            mem.writeIntLittle(u32, out[20..24], x[3][1]);
+            mem.writeIntLittle(u32, out[24..28], x[3][2]);
+            mem.writeIntLittle(u32, out[28..32], x[3][3]);
+            return out;
         }
-        if (i < in.len) {
-            chacha20Core(x[0..], ctx);
-            contextFeedback(&x, ctx);
-            hashToBytes(buf[0..], x);
-
-            var xout = out[i..];
-            const xin = in[i..];
-            var j: usize = 0;
-            while (j < in.len % 64) : (j += 1) {
-                xout[j] = xin[j] ^ buf[j];
-            }
-        }
-    }
-
-    fn hchacha20(input: [16]u8, key: [32]u8) [32]u8 {
-        var c: [4]u32 = undefined;
-        for (c) |_, i| {
-            c[i] = mem.readIntLittle(u32, input[4 * i ..][0..4]);
-        }
-        const ctx = initContext(keyToWords(key), c);
-        var x: BlockVec = undefined;
-        chacha20Core(x[0..], ctx);
-        var out: [32]u8 = undefined;
-        mem.writeIntLittle(u32, out[0..4], x[0][0]);
-        mem.writeIntLittle(u32, out[4..8], x[0][1]);
-        mem.writeIntLittle(u32, out[8..12], x[0][2]);
-        mem.writeIntLittle(u32, out[12..16], x[0][3]);
-        mem.writeIntLittle(u32, out[16..20], x[3][0]);
-        mem.writeIntLittle(u32, out[20..24], x[3][1]);
-        mem.writeIntLittle(u32, out[24..28], x[3][2]);
-        mem.writeIntLittle(u32, out[28..32], x[3][3]);
-        return out;
-    }
-};
+    };
+}
 
 // Non-vectorized implementation of the core function
-const ChaCha20NonVecImpl = struct {
-    const BlockVec = [16]u32;
+fn ChaChaNonVecImpl(comptime rounds_nb: usize) type {
+    return struct {
+        const BlockVec = [16]u32;
 
-    fn initContext(key: [8]u32, d: [4]u32) BlockVec {
-        const c = "expand 32-byte k";
-        const constant_le = comptime [4]u32{
-            mem.readIntLittle(u32, c[0..4]),
-            mem.readIntLittle(u32, c[4..8]),
-            mem.readIntLittle(u32, c[8..12]),
-            mem.readIntLittle(u32, c[12..16]),
-        };
-        return BlockVec{
-            constant_le[0], constant_le[1], constant_le[2], constant_le[3],
-            key[0],         key[1],         key[2],         key[3],
-            key[4],         key[5],         key[6],         key[7],
-            d[0],           d[1],           d[2],           d[3],
-        };
-    }
+        fn initContext(key: [8]u32, d: [4]u32) BlockVec {
+            const c = "expand 32-byte k";
+            const constant_le = comptime [4]u32{
+                mem.readIntLittle(u32, c[0..4]),
+                mem.readIntLittle(u32, c[4..8]),
+                mem.readIntLittle(u32, c[8..12]),
+                mem.readIntLittle(u32, c[12..16]),
+            };
+            return BlockVec{
+                constant_le[0], constant_le[1], constant_le[2], constant_le[3],
+                key[0],         key[1],         key[2],         key[3],
+                key[4],         key[5],         key[6],         key[7],
+                d[0],           d[1],           d[2],           d[3],
+            };
+        }
 
-    const QuarterRound = struct {
-        a: usize,
-        b: usize,
-        c: usize,
-        d: usize,
+        const QuarterRound = struct {
+            a: usize,
+            b: usize,
+            c: usize,
+            d: usize,
+        };
+
+        fn Rp(a: usize, b: usize, c: usize, d: usize) QuarterRound {
+            return QuarterRound{
+                .a = a,
+                .b = b,
+                .c = c,
+                .d = d,
+            };
+        }
+
+        fn chacha20Core(x: *BlockVec, input: BlockVec) callconv(.Inline) void {
+            x.* = input;
+
+            const rounds = comptime [_]QuarterRound{
+                Rp(0, 4, 8, 12),
+                Rp(1, 5, 9, 13),
+                Rp(2, 6, 10, 14),
+                Rp(3, 7, 11, 15),
+                Rp(0, 5, 10, 15),
+                Rp(1, 6, 11, 12),
+                Rp(2, 7, 8, 13),
+                Rp(3, 4, 9, 14),
+            };
+
+            comptime var j: usize = 0;
+            inline while (j < rounds_nb) : (j += 2) {
+                inline for (rounds) |r| {
+                    x[r.a] +%= x[r.b];
+                    x[r.d] = math.rotl(u32, x[r.d] ^ x[r.a], @as(u32, 16));
+                    x[r.c] +%= x[r.d];
+                    x[r.b] = math.rotl(u32, x[r.b] ^ x[r.c], @as(u32, 12));
+                    x[r.a] +%= x[r.b];
+                    x[r.d] = math.rotl(u32, x[r.d] ^ x[r.a], @as(u32, 8));
+                    x[r.c] +%= x[r.d];
+                    x[r.b] = math.rotl(u32, x[r.b] ^ x[r.c], @as(u32, 7));
+                }
+            }
+        }
+
+        fn hashToBytes(out: *[64]u8, x: BlockVec) callconv(.Inline) void {
+            var i: usize = 0;
+            while (i < 4) : (i += 1) {
+                mem.writeIntLittle(u32, out[16 * i + 0 ..][0..4], x[i * 4 + 0]);
+                mem.writeIntLittle(u32, out[16 * i + 4 ..][0..4], x[i * 4 + 1]);
+                mem.writeIntLittle(u32, out[16 * i + 8 ..][0..4], x[i * 4 + 2]);
+                mem.writeIntLittle(u32, out[16 * i + 12 ..][0..4], x[i * 4 + 3]);
+            }
+        }
+
+        fn contextFeedback(x: *BlockVec, ctx: BlockVec) callconv(.Inline) void {
+            var i: usize = 0;
+            while (i < 16) : (i += 1) {
+                x[i] +%= ctx[i];
+            }
+        }
+
+        fn chacha20Xor(out: []u8, in: []const u8, key: [8]u32, counter: [4]u32) void {
+            var ctx = initContext(key, counter);
+            var x: BlockVec = undefined;
+            var buf: [64]u8 = undefined;
+            var i: usize = 0;
+            while (i + 64 <= in.len) : (i += 64) {
+                chacha20Core(x[0..], ctx);
+                contextFeedback(&x, ctx);
+                hashToBytes(buf[0..], x);
+
+                var xout = out[i..];
+                const xin = in[i..];
+                var j: usize = 0;
+                while (j < 64) : (j += 1) {
+                    xout[j] = xin[j];
+                }
+                j = 0;
+                while (j < 64) : (j += 1) {
+                    xout[j] ^= buf[j];
+                }
+                ctx[12] += 1;
+            }
+            if (i < in.len) {
+                chacha20Core(x[0..], ctx);
+                contextFeedback(&x, ctx);
+                hashToBytes(buf[0..], x);
+
+                var xout = out[i..];
+                const xin = in[i..];
+                var j: usize = 0;
+                while (j < in.len % 64) : (j += 1) {
+                    xout[j] = xin[j] ^ buf[j];
+                }
+            }
+        }
+
+        fn hchacha20(input: [16]u8, key: [32]u8) [32]u8 {
+            var c: [4]u32 = undefined;
+            for (c) |_, i| {
+                c[i] = mem.readIntLittle(u32, input[4 * i ..][0..4]);
+            }
+            const ctx = initContext(keyToWords(key), c);
+            var x: BlockVec = undefined;
+            chacha20Core(x[0..], ctx);
+            var out: [32]u8 = undefined;
+            mem.writeIntLittle(u32, out[0..4], x[0]);
+            mem.writeIntLittle(u32, out[4..8], x[1]);
+            mem.writeIntLittle(u32, out[8..12], x[2]);
+            mem.writeIntLittle(u32, out[12..16], x[3]);
+            mem.writeIntLittle(u32, out[16..20], x[12]);
+            mem.writeIntLittle(u32, out[20..24], x[13]);
+            mem.writeIntLittle(u32, out[24..28], x[14]);
+            mem.writeIntLittle(u32, out[28..32], x[15]);
+            return out;
+        }
     };
+}
 
-    fn Rp(a: usize, b: usize, c: usize, d: usize) QuarterRound {
-        return QuarterRound{
-            .a = a,
-            .b = b,
-            .c = c,
-            .d = d,
-        };
-    }
-
-    fn chacha20Core(x: *BlockVec, input: BlockVec) callconv(.Inline) void {
-        x.* = input;
-
-        const rounds = comptime [_]QuarterRound{
-            Rp(0, 4, 8, 12),
-            Rp(1, 5, 9, 13),
-            Rp(2, 6, 10, 14),
-            Rp(3, 7, 11, 15),
-            Rp(0, 5, 10, 15),
-            Rp(1, 6, 11, 12),
-            Rp(2, 7, 8, 13),
-            Rp(3, 4, 9, 14),
-        };
-
-        comptime var j: usize = 0;
-        inline while (j < 20) : (j += 2) {
-            inline for (rounds) |r| {
-                x[r.a] +%= x[r.b];
-                x[r.d] = math.rotl(u32, x[r.d] ^ x[r.a], @as(u32, 16));
-                x[r.c] +%= x[r.d];
-                x[r.b] = math.rotl(u32, x[r.b] ^ x[r.c], @as(u32, 12));
-                x[r.a] +%= x[r.b];
-                x[r.d] = math.rotl(u32, x[r.d] ^ x[r.a], @as(u32, 8));
-                x[r.c] +%= x[r.d];
-                x[r.b] = math.rotl(u32, x[r.b] ^ x[r.c], @as(u32, 7));
-            }
-        }
-    }
-
-    fn hashToBytes(out: *[64]u8, x: BlockVec) callconv(.Inline) void {
-        var i: usize = 0;
-        while (i < 4) : (i += 1) {
-            mem.writeIntLittle(u32, out[16 * i + 0 ..][0..4], x[i * 4 + 0]);
-            mem.writeIntLittle(u32, out[16 * i + 4 ..][0..4], x[i * 4 + 1]);
-            mem.writeIntLittle(u32, out[16 * i + 8 ..][0..4], x[i * 4 + 2]);
-            mem.writeIntLittle(u32, out[16 * i + 12 ..][0..4], x[i * 4 + 3]);
-        }
-    }
-
-    fn contextFeedback(x: *BlockVec, ctx: BlockVec) callconv(.Inline) void {
-        var i: usize = 0;
-        while (i < 16) : (i += 1) {
-            x[i] +%= ctx[i];
-        }
-    }
-
-    fn chacha20Xor(out: []u8, in: []const u8, key: [8]u32, counter: [4]u32) void {
-        var ctx = initContext(key, counter);
-        var x: BlockVec = undefined;
-        var buf: [64]u8 = undefined;
-        var i: usize = 0;
-        while (i + 64 <= in.len) : (i += 64) {
-            chacha20Core(x[0..], ctx);
-            contextFeedback(&x, ctx);
-            hashToBytes(buf[0..], x);
-
-            var xout = out[i..];
-            const xin = in[i..];
-            var j: usize = 0;
-            while (j < 64) : (j += 1) {
-                xout[j] = xin[j];
-            }
-            j = 0;
-            while (j < 64) : (j += 1) {
-                xout[j] ^= buf[j];
-            }
-            ctx[12] += 1;
-        }
-        if (i < in.len) {
-            chacha20Core(x[0..], ctx);
-            contextFeedback(&x, ctx);
-            hashToBytes(buf[0..], x);
-
-            var xout = out[i..];
-            const xin = in[i..];
-            var j: usize = 0;
-            while (j < in.len % 64) : (j += 1) {
-                xout[j] = xin[j] ^ buf[j];
-            }
-        }
-    }
-
-    fn hchacha20(input: [16]u8, key: [32]u8) [32]u8 {
-        var c: [4]u32 = undefined;
-        for (c) |_, i| {
-            c[i] = mem.readIntLittle(u32, input[4 * i ..][0..4]);
-        }
-        const ctx = initContext(keyToWords(key), c);
-        var x: BlockVec = undefined;
-        chacha20Core(x[0..], ctx);
-        var out: [32]u8 = undefined;
-        mem.writeIntLittle(u32, out[0..4], x[0]);
-        mem.writeIntLittle(u32, out[4..8], x[1]);
-        mem.writeIntLittle(u32, out[8..12], x[2]);
-        mem.writeIntLittle(u32, out[12..16], x[3]);
-        mem.writeIntLittle(u32, out[16..20], x[12]);
-        mem.writeIntLittle(u32, out[20..24], x[13]);
-        mem.writeIntLittle(u32, out[24..28], x[14]);
-        mem.writeIntLittle(u32, out[28..32], x[15]);
-        return out;
-    }
-};
-
-const ChaCha20Impl = if (std.Target.current.cpu.arch == .x86_64) ChaCha20VecImpl else ChaCha20NonVecImpl;
+fn ChaChaImpl(comptime rounds_nb: usize) type {
+    return if (std.Target.current.cpu.arch == .x86_64) ChaChaVecImpl(rounds_nb) else ChaChaNonVecImpl(rounds_nb);
+}
 
 fn keyToWords(key: [32]u8) [8]u32 {
     var k: [8]u32 = undefined;
@@ -305,68 +376,239 @@ fn keyToWords(key: [32]u8) [8]u32 {
     return k;
 }
 
-/// ChaCha20 avoids the possibility of timing attacks, as there are no branches
-/// on secret key data.
-///
-/// in and out should be the same length.
-/// counter should generally be 0 or 1
-///
-/// ChaCha20 is self-reversing. To decrypt just run the cipher with the same
-/// counter, nonce, and key.
-pub const ChaCha20IETF = struct {
-    pub fn xor(out: []u8, in: []const u8, counter: u32, key: [32]u8, nonce: [12]u8) void {
-        assert(in.len == out.len);
-        assert((in.len >> 6) + counter <= maxInt(u32));
+fn extend(key: [32]u8, nonce: [24]u8, comptime rounds_nb: usize) struct { key: [32]u8, nonce: [12]u8 } {
+    var subnonce: [12]u8 = undefined;
+    mem.set(u8, subnonce[0..4], 0);
+    mem.copy(u8, subnonce[4..], nonce[16..24]);
+    return .{
+        .key = ChaChaImpl(rounds_nb).hchacha20(nonce[0..16].*, key),
+        .nonce = subnonce,
+    };
+}
 
-        var c: [4]u32 = undefined;
-        c[0] = counter;
-        c[1] = mem.readIntLittle(u32, nonce[0..4]);
-        c[2] = mem.readIntLittle(u32, nonce[4..8]);
-        c[3] = mem.readIntLittle(u32, nonce[8..12]);
-        ChaCha20Impl.chacha20Xor(out, in, keyToWords(key), c);
-    }
-};
+fn ChaChaIETF(comptime rounds_nb: usize) type {
+    return struct {
+        /// Nonce length in bytes.
+        pub const nonce_length = 12;
+        /// Key length in bytes.
+        pub const key_length = 32;
 
-/// This is the original ChaCha20 before RFC 7539, which recommends using the
-/// orgininal version on applications such as disk or file encryption that might
-/// exceed the 256 GiB limit of the 96-bit nonce version.
-pub const ChaCha20With64BitNonce = struct {
-    pub fn xor(out: []u8, in: []const u8, counter: u64, key: [32]u8, nonce: [8]u8) void {
-        assert(in.len == out.len);
-        assert(counter +% (in.len >> 6) >= counter);
+        /// Add the output of the ChaCha20 stream cipher to `in` and stores the result into `out`.
+        /// WARNING: This function doesn't provide authenticated encryption.
+        /// Using the AEAD or one of the `box` versions is usually preferred.
+        pub fn xor(out: []u8, in: []const u8, counter: u32, key: [key_length]u8, nonce: [nonce_length]u8) void {
+            assert(in.len == out.len);
+            assert(in.len / 64 <= (1 << 32 - 1) - counter);
 
-        var cursor: usize = 0;
-        const k = keyToWords(key);
-        var c: [4]u32 = undefined;
-        c[0] = @truncate(u32, counter);
-        c[1] = @truncate(u32, counter >> 32);
-        c[2] = mem.readIntLittle(u32, nonce[0..4]);
-        c[3] = mem.readIntLittle(u32, nonce[4..8]);
+            var d: [4]u32 = undefined;
+            d[0] = counter;
+            d[1] = mem.readIntLittle(u32, nonce[0..4]);
+            d[2] = mem.readIntLittle(u32, nonce[4..8]);
+            d[3] = mem.readIntLittle(u32, nonce[8..12]);
+            ChaChaImpl(rounds_nb).chacha20Xor(out, in, keyToWords(key), d);
+        }
+    };
+}
 
-        const block_length = (1 << 6);
-        // The full block size is greater than the address space on a 32bit machine
-        const big_block = if (@sizeOf(usize) > 4) (block_length << 32) else maxInt(usize);
+fn ChaChaWith64BitNonce(comptime rounds_nb: usize) type {
+    return struct {
+        /// Nonce length in bytes.
+        pub const nonce_length = 8;
+        /// Key length in bytes.
+        pub const key_length = 32;
 
-        // first partial big block
-        if (((@intCast(u64, maxInt(u32) - @truncate(u32, counter)) + 1) << 6) < in.len) {
-            ChaCha20Impl.chacha20Xor(out[cursor..big_block], in[cursor..big_block], k, c);
-            cursor = big_block - cursor;
-            c[1] += 1;
-            if (comptime @sizeOf(usize) > 4) {
-                // A big block is giant: 256 GiB, but we can avoid this limitation
-                var remaining_blocks: u32 = @intCast(u32, (in.len / big_block));
-                var i: u32 = 0;
-                while (remaining_blocks > 0) : (remaining_blocks -= 1) {
-                    ChaCha20Impl.chacha20Xor(out[cursor .. cursor + big_block], in[cursor .. cursor + big_block], k, c);
-                    c[1] += 1; // upper 32-bit of counter, generic chacha20Xor() doesn't know about this.
-                    cursor += big_block;
+        /// Add the output of the ChaCha20 stream cipher to `in` and stores the result into `out`.
+        /// WARNING: This function doesn't provide authenticated encryption.
+        /// Using the AEAD or one of the `box` versions is usually preferred.
+        pub fn xor(out: []u8, in: []const u8, counter: u64, key: [key_length]u8, nonce: [nonce_length]u8) void {
+            assert(in.len == out.len);
+            assert(in.len / 64 <= (1 << 64 - 1) - counter);
+
+            var cursor: usize = 0;
+            const k = keyToWords(key);
+            var c: [4]u32 = undefined;
+            c[0] = @truncate(u32, counter);
+            c[1] = @truncate(u32, counter >> 32);
+            c[2] = mem.readIntLittle(u32, nonce[0..4]);
+            c[3] = mem.readIntLittle(u32, nonce[4..8]);
+
+            const block_length = (1 << 6);
+            // The full block size is greater than the address space on a 32bit machine
+            const big_block = if (@sizeOf(usize) > 4) (block_length << 32) else maxInt(usize);
+
+            // first partial big block
+            if (((@intCast(u64, maxInt(u32) - @truncate(u32, counter)) + 1) << 6) < in.len) {
+                ChaChaImpl(rounds_nb).chacha20Xor(out[cursor..big_block], in[cursor..big_block], k, c);
+                cursor = big_block - cursor;
+                c[1] += 1;
+                if (comptime @sizeOf(usize) > 4) {
+                    // A big block is giant: 256 GiB, but we can avoid this limitation
+                    var remaining_blocks: u32 = @intCast(u32, (in.len / big_block));
+                    var i: u32 = 0;
+                    while (remaining_blocks > 0) : (remaining_blocks -= 1) {
+                        ChaChaImpl(rounds_nb).chacha20Xor(out[cursor .. cursor + big_block], in[cursor .. cursor + big_block], k, c);
+                        c[1] += 1; // upper 32-bit of counter, generic chacha20Xor() doesn't know about this.
+                        cursor += big_block;
+                    }
                 }
             }
+            ChaChaImpl(rounds_nb).chacha20Xor(out[cursor..], in[cursor..], k, c);
+        }
+    };
+}
+
+fn XChaChaIETF(comptime rounds_nb: usize) type {
+    return struct {
+        /// Nonce length in bytes.
+        pub const nonce_length = 24;
+        /// Key length in bytes.
+        pub const key_length = 32;
+
+        /// Add the output of the XChaCha20 stream cipher to `in` and stores the result into `out`.
+        /// WARNING: This function doesn't provide authenticated encryption.
+        /// Using the AEAD or one of the `box` versions is usually preferred.
+        pub fn xor(out: []u8, in: []const u8, counter: u32, key: [key_length]u8, nonce: [nonce_length]u8) void {
+            const extended = extend(key, nonce, rounds_nb);
+            ChaChaIETF(rounds_nb).xor(out, in, counter, extended.key, extended.nonce);
+        }
+    };
+}
+
+fn ChaChaPoly1305(comptime rounds_nb: usize) type {
+    return struct {
+        pub const tag_length = 16;
+        pub const nonce_length = 12;
+        pub const key_length = 32;
+
+        /// c: ciphertext: output buffer should be of size m.len
+        /// tag: authentication tag: output MAC
+        /// m: message
+        /// ad: Associated Data
+        /// npub: public nonce
+        /// k: private key
+        pub fn encrypt(c: []u8, tag: *[tag_length]u8, m: []const u8, ad: []const u8, npub: [nonce_length]u8, k: [key_length]u8) void {
+            assert(c.len == m.len);
+
+            var polyKey = [_]u8{0} ** 32;
+            ChaChaIETF(rounds_nb).xor(polyKey[0..], polyKey[0..], 0, k, npub);
+
+            ChaChaIETF(rounds_nb).xor(c[0..m.len], m, 1, k, npub);
+
+            var mac = Poly1305.init(polyKey[0..]);
+            mac.update(ad);
+            if (ad.len % 16 != 0) {
+                const zeros = [_]u8{0} ** 16;
+                const padding = 16 - (ad.len % 16);
+                mac.update(zeros[0..padding]);
+            }
+            mac.update(c[0..m.len]);
+            if (m.len % 16 != 0) {
+                const zeros = [_]u8{0} ** 16;
+                const padding = 16 - (m.len % 16);
+                mac.update(zeros[0..padding]);
+            }
+            var lens: [16]u8 = undefined;
+            mem.writeIntLittle(u64, lens[0..8], ad.len);
+            mem.writeIntLittle(u64, lens[8..16], m.len);
+            mac.update(lens[0..]);
+            mac.final(tag);
         }
 
-        ChaCha20Impl.chacha20Xor(out[cursor..], in[cursor..], k, c);
+        /// m: message: output buffer should be of size c.len
+        /// c: ciphertext
+        /// tag: authentication tag
+        /// ad: Associated Data
+        /// npub: public nonce
+        /// k: private key
+        /// NOTE: the check of the authentication tag is currently not done in constant time
+        pub fn decrypt(m: []u8, c: []const u8, tag: [tag_length]u8, ad: []const u8, npub: [nonce_length]u8, k: [key_length]u8) Error!void {
+            assert(c.len == m.len);
+
+            var polyKey = [_]u8{0} ** 32;
+            ChaChaIETF(rounds_nb).xor(polyKey[0..], polyKey[0..], 0, k, npub);
+
+            var mac = Poly1305.init(polyKey[0..]);
+
+            mac.update(ad);
+            if (ad.len % 16 != 0) {
+                const zeros = [_]u8{0} ** 16;
+                const padding = 16 - (ad.len % 16);
+                mac.update(zeros[0..padding]);
+            }
+            mac.update(c);
+            if (c.len % 16 != 0) {
+                const zeros = [_]u8{0} ** 16;
+                const padding = 16 - (c.len % 16);
+                mac.update(zeros[0..padding]);
+            }
+            var lens: [16]u8 = undefined;
+            mem.writeIntLittle(u64, lens[0..8], ad.len);
+            mem.writeIntLittle(u64, lens[8..16], c.len);
+            mac.update(lens[0..]);
+            var computedTag: [16]u8 = undefined;
+            mac.final(computedTag[0..]);
+
+            var acc: u8 = 0;
+            for (computedTag) |_, i| {
+                acc |= computedTag[i] ^ tag[i];
+            }
+            if (acc != 0) {
+                return error.AuthenticationFailed;
+            }
+            ChaChaIETF(rounds_nb).xor(m[0..c.len], c, 1, k, npub);
+        }
+    };
+}
+
+fn XChaChaPoly1305(comptime rounds_nb: usize) type {
+    return struct {
+        pub const tag_length = 16;
+        pub const nonce_length = 24;
+        pub const key_length = 32;
+
+        /// c: ciphertext: output buffer should be of size m.len
+        /// tag: authentication tag: output MAC
+        /// m: message
+        /// ad: Associated Data
+        /// npub: public nonce
+        /// k: private key
+        pub fn encrypt(c: []u8, tag: *[tag_length]u8, m: []const u8, ad: []const u8, npub: [nonce_length]u8, k: [key_length]u8) void {
+            const extended = extend(k, npub, rounds_nb);
+            return ChaChaPoly1305(rounds_nb).encrypt(c, tag, m, ad, extended.nonce, extended.key);
+        }
+
+        /// m: message: output buffer should be of size c.len
+        /// c: ciphertext
+        /// tag: authentication tag
+        /// ad: Associated Data
+        /// npub: public nonce
+        /// k: private key
+        pub fn decrypt(m: []u8, c: []const u8, tag: [tag_length]u8, ad: []const u8, npub: [nonce_length]u8, k: [key_length]u8) Error!void {
+            const extended = extend(k, npub, rounds_nb);
+            return ChaChaPoly1305(rounds_nb).decrypt(m, c, tag, ad, extended.nonce, extended.key);
+        }
+    };
+}
+
+test "chacha20 AEAD API" {
+    const aeads = [_]type{ ChaCha20Poly1305, XChaCha20Poly1305 };
+    const m = "Ladies and Gentlemen of the class of '99: If I could offer you only one tip for the future, sunscreen would be it.";
+    const ad = "Additional data";
+
+    inline for (aeads) |aead| {
+        const key = [_]u8{69} ** aead.key_length;
+        const nonce = [_]u8{42} ** aead.nonce_length;
+        var c: [m.len]u8 = undefined;
+        var tag: [aead.tag_length]u8 = undefined;
+        var out: [m.len]u8 = undefined;
+
+        aead.encrypt(c[0..], tag[0..], m, ad, nonce, key);
+        try aead.decrypt(out[0..], c[0..], tag, ad[0..], nonce, key);
+        testing.expectEqualSlices(u8, out[0..], m);
+        c[0] += 1;
+        testing.expectError(error.AuthenticationFailed, aead.decrypt(out[0..], c[0..], tag, ad[0..], nonce, key));
     }
-};
+}
 
 // https://tools.ietf.org/html/rfc7539#section-2.4.2
 test "crypto.chacha20 test vector sunscreen" {
@@ -387,7 +629,7 @@ test "crypto.chacha20 test vector sunscreen" {
         0xb4, 0x0b, 0x8e, 0xed, 0xf2, 0x78, 0x5e, 0x42,
         0x87, 0x4d,
     };
-    const input = "Ladies and Gentlemen of the class of '99: If I could offer you only one tip for the future, sunscreen would be it.";
+    const m = "Ladies and Gentlemen of the class of '99: If I could offer you only one tip for the future, sunscreen would be it.";
     var result: [114]u8 = undefined;
     const key = [_]u8{
         0,  1,  2,  3,  4,  5,  6,  7,
@@ -401,13 +643,12 @@ test "crypto.chacha20 test vector sunscreen" {
         0, 0, 0, 0,
     };
 
-    ChaCha20IETF.xor(result[0..], input[0..], 1, key, nonce);
+    ChaCha20IETF.xor(result[0..], m[0..], 1, key, nonce);
     testing.expectEqualSlices(u8, &expected_result, &result);
 
-    // Chacha20 is self-reversing.
-    var plaintext: [114]u8 = undefined;
-    ChaCha20IETF.xor(plaintext[0..], result[0..], 1, key, nonce);
-    testing.expect(mem.order(u8, input, &plaintext) == .eq);
+    var m2: [114]u8 = undefined;
+    ChaCha20IETF.xor(m2[0..], result[0..], 1, key, nonce);
+    testing.expect(mem.order(u8, m, &m2) == .eq);
 }
 
 // https://tools.ietf.org/html/draft-agl-tls-chacha20poly1305-04#section-7
@@ -422,7 +663,7 @@ test "crypto.chacha20 test vector 1" {
         0x6a, 0x43, 0xb8, 0xf4, 0x15, 0x18, 0xa1, 0x1c,
         0xc3, 0x87, 0xb6, 0x69, 0xb2, 0xee, 0x65, 0x86,
     };
-    const input = [_]u8{
+    const m = [_]u8{
         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
@@ -441,7 +682,7 @@ test "crypto.chacha20 test vector 1" {
     };
     const nonce = [_]u8{ 0, 0, 0, 0, 0, 0, 0, 0 };
 
-    ChaCha20With64BitNonce.xor(result[0..], input[0..], 0, key, nonce);
+    ChaCha20With64BitNonce.xor(result[0..], m[0..], 0, key, nonce);
     testing.expectEqualSlices(u8, &expected_result, &result);
 }
 
@@ -456,7 +697,7 @@ test "crypto.chacha20 test vector 2" {
         0x53, 0xd7, 0x92, 0xb1, 0xc4, 0x3f, 0xea, 0x81,
         0x7e, 0x9a, 0xd2, 0x75, 0xae, 0x54, 0x69, 0x63,
     };
-    const input = [_]u8{
+    const m = [_]u8{
         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
@@ -475,7 +716,7 @@ test "crypto.chacha20 test vector 2" {
     };
     const nonce = [_]u8{ 0, 0, 0, 0, 0, 0, 0, 0 };
 
-    ChaCha20With64BitNonce.xor(result[0..], input[0..], 0, key, nonce);
+    ChaCha20With64BitNonce.xor(result[0..], m[0..], 0, key, nonce);
     testing.expectEqualSlices(u8, &expected_result, &result);
 }
 
@@ -490,7 +731,7 @@ test "crypto.chacha20 test vector 3" {
         0x52, 0x77, 0x06, 0x2e, 0xb7, 0xa0, 0x43, 0x3e,
         0x44, 0x5f, 0x41, 0xe3,
     };
-    const input = [_]u8{
+    const m = [_]u8{
         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
@@ -509,7 +750,7 @@ test "crypto.chacha20 test vector 3" {
     };
     const nonce = [_]u8{ 0, 0, 0, 0, 0, 0, 0, 1 };
 
-    ChaCha20With64BitNonce.xor(result[0..], input[0..], 0, key, nonce);
+    ChaCha20With64BitNonce.xor(result[0..], m[0..], 0, key, nonce);
     testing.expectEqualSlices(u8, &expected_result, &result);
 }
 
@@ -524,7 +765,7 @@ test "crypto.chacha20 test vector 4" {
         0x5d, 0xdc, 0x49, 0x7a, 0x0b, 0x46, 0x6e, 0x7d,
         0x6b, 0xbd, 0xb0, 0x04, 0x1b, 0x2f, 0x58, 0x6b,
     };
-    const input = [_]u8{
+    const m = [_]u8{
         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
@@ -543,7 +784,7 @@ test "crypto.chacha20 test vector 4" {
     };
     const nonce = [_]u8{ 1, 0, 0, 0, 0, 0, 0, 0 };
 
-    ChaCha20With64BitNonce.xor(result[0..], input[0..], 0, key, nonce);
+    ChaCha20With64BitNonce.xor(result[0..], m[0..], 0, key, nonce);
     testing.expectEqualSlices(u8, &expected_result, &result);
 }
 
@@ -585,7 +826,7 @@ test "crypto.chacha20 test vector 5" {
         0x87, 0x46, 0xd4, 0x52, 0x4d, 0x38, 0x40, 0x7a,
         0x6d, 0xeb, 0x3a, 0xb7, 0x8f, 0xab, 0x78, 0xc9,
     };
-    const input = [_]u8{
+    const m = [_]u8{
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@@ -615,147 +856,14 @@ test "crypto.chacha20 test vector 5" {
         0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
     };
 
-    ChaCha20With64BitNonce.xor(result[0..], input[0..], 0, key, nonce);
+    ChaCha20With64BitNonce.xor(result[0..], m[0..], 0, key, nonce);
     testing.expectEqualSlices(u8, &expected_result, &result);
 }
 
-pub const chacha20poly1305_tag_length = 16;
-
-fn chacha20poly1305SealDetached(ciphertext: []u8, tag: *[chacha20poly1305_tag_length]u8, plaintext: []const u8, data: []const u8, key: [32]u8, nonce: [12]u8) void {
-    assert(ciphertext.len == plaintext.len);
-
-    // derive poly1305 key
-    var polyKey = [_]u8{0} ** 32;
-    ChaCha20IETF.xor(polyKey[0..], polyKey[0..], 0, key, nonce);
-
-    // encrypt plaintext
-    ChaCha20IETF.xor(ciphertext[0..plaintext.len], plaintext, 1, key, nonce);
-
-    // construct mac
-    var mac = Poly1305.init(polyKey[0..]);
-    mac.update(data);
-    if (data.len % 16 != 0) {
-        const zeros = [_]u8{0} ** 16;
-        const padding = 16 - (data.len % 16);
-        mac.update(zeros[0..padding]);
-    }
-    mac.update(ciphertext[0..plaintext.len]);
-    if (plaintext.len % 16 != 0) {
-        const zeros = [_]u8{0} ** 16;
-        const padding = 16 - (plaintext.len % 16);
-        mac.update(zeros[0..padding]);
-    }
-    var lens: [16]u8 = undefined;
-    mem.writeIntLittle(u64, lens[0..8], data.len);
-    mem.writeIntLittle(u64, lens[8..16], plaintext.len);
-    mac.update(lens[0..]);
-    mac.final(tag);
-}
-
-fn chacha20poly1305Seal(ciphertextAndTag: []u8, plaintext: []const u8, data: []const u8, key: [32]u8, nonce: [12]u8) void {
-    return chacha20poly1305SealDetached(ciphertextAndTag[0..plaintext.len], ciphertextAndTag[plaintext.len..][0..chacha20poly1305_tag_length], plaintext, data, key, nonce);
-}
-
-/// Verifies and decrypts an authenticated message produced by chacha20poly1305SealDetached.
-fn chacha20poly1305OpenDetached(dst: []u8, ciphertext: []const u8, tag: *const [chacha20poly1305_tag_length]u8, data: []const u8, key: [32]u8, nonce: [12]u8) Error!void {
-    // split ciphertext and tag
-    assert(dst.len == ciphertext.len);
-
-    // derive poly1305 key
-    var polyKey = [_]u8{0} ** 32;
-    ChaCha20IETF.xor(polyKey[0..], polyKey[0..], 0, key, nonce);
-
-    // construct mac
-    var mac = Poly1305.init(polyKey[0..]);
-
-    mac.update(data);
-    if (data.len % 16 != 0) {
-        const zeros = [_]u8{0} ** 16;
-        const padding = 16 - (data.len % 16);
-        mac.update(zeros[0..padding]);
-    }
-    mac.update(ciphertext);
-    if (ciphertext.len % 16 != 0) {
-        const zeros = [_]u8{0} ** 16;
-        const padding = 16 - (ciphertext.len % 16);
-        mac.update(zeros[0..padding]);
-    }
-    var lens: [16]u8 = undefined;
-    mem.writeIntLittle(u64, lens[0..8], data.len);
-    mem.writeIntLittle(u64, lens[8..16], ciphertext.len);
-    mac.update(lens[0..]);
-    var computedTag: [16]u8 = undefined;
-    mac.final(computedTag[0..]);
-
-    // verify mac in constant time
-    // TODO: we can't currently guarantee that this will run in constant time.
-    // See https://github.com/ziglang/zig/issues/1776
-    var acc: u8 = 0;
-    for (computedTag) |_, i| {
-        acc |= computedTag[i] ^ tag[i];
-    }
-    if (acc != 0) {
-        return error.AuthenticationFailed;
-    }
-
-    // decrypt ciphertext
-    ChaCha20IETF.xor(dst[0..ciphertext.len], ciphertext, 1, key, nonce);
-}
-
-/// Verifies and decrypts an authenticated message produced by chacha20poly1305Seal.
-fn chacha20poly1305Open(dst: []u8, ciphertextAndTag: []const u8, data: []const u8, key: [32]u8, nonce: [12]u8) Error!void {
-    if (ciphertextAndTag.len < chacha20poly1305_tag_length) {
-        return error.AuthenticationFailed;
-    }
-    const ciphertextLen = ciphertextAndTag.len - chacha20poly1305_tag_length;
-    return try chacha20poly1305OpenDetached(dst, ciphertextAndTag[0..ciphertextLen], ciphertextAndTag[ciphertextLen..][0..chacha20poly1305_tag_length], data, key, nonce);
-}
-
-fn extend(key: [32]u8, nonce: [24]u8) struct { key: [32]u8, nonce: [12]u8 } {
-    var subnonce: [12]u8 = undefined;
-    mem.set(u8, subnonce[0..4], 0);
-    mem.copy(u8, subnonce[4..], nonce[16..24]);
-    return .{
-        .key = ChaCha20Impl.hchacha20(nonce[0..16].*, key),
-        .nonce = subnonce,
-    };
-}
-
-pub const XChaCha20IETF = struct {
-    pub fn xor(out: []u8, in: []const u8, counter: u32, key: [32]u8, nonce: [24]u8) void {
-        const extended = extend(key, nonce);
-        ChaCha20IETF.xor(out, in, counter, extended.key, extended.nonce);
-    }
-};
-
-pub const xchacha20poly1305_tag_length = 16;
-
-fn xchacha20poly1305SealDetached(ciphertext: []u8, tag: *[chacha20poly1305_tag_length]u8, plaintext: []const u8, data: []const u8, key: [32]u8, nonce: [24]u8) void {
-    const extended = extend(key, nonce);
-    return chacha20poly1305SealDetached(ciphertext, tag, plaintext, data, extended.key, extended.nonce);
-}
-
-fn xchacha20poly1305Seal(ciphertextAndTag: []u8, plaintext: []const u8, data: []const u8, key: [32]u8, nonce: [24]u8) void {
-    const extended = extend(key, nonce);
-    return chacha20poly1305Seal(ciphertextAndTag, plaintext, data, extended.key, extended.nonce);
-}
-
-/// Verifies and decrypts an authenticated message produced by xchacha20poly1305SealDetached.
-fn xchacha20poly1305OpenDetached(plaintext: []u8, ciphertext: []const u8, tag: *const [chacha20poly1305_tag_length]u8, data: []const u8, key: [32]u8, nonce: [24]u8) Error!void {
-    const extended = extend(key, nonce);
-    return try chacha20poly1305OpenDetached(plaintext, ciphertext, tag, data, extended.key, extended.nonce);
-}
-
-/// Verifies and decrypts an authenticated message produced by xchacha20poly1305Seal.
-fn xchacha20poly1305Open(ciphertextAndTag: []u8, msgAndTag: []const u8, data: []const u8, key: [32]u8, nonce: [24]u8) Error!void {
-    const extended = extend(key, nonce);
-    return try chacha20poly1305Open(ciphertextAndTag, msgAndTag, data, extended.key, extended.nonce);
-}
-
 test "seal" {
     {
-        const plaintext = "";
-        const data = "";
+        const m = "";
+        const ad = "";
         const key = [_]u8{
             0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
             0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
@@ -764,11 +872,11 @@ test "seal" {
         const exp_out = [_]u8{ 0xa0, 0x78, 0x4d, 0x7a, 0x47, 0x16, 0xf3, 0xfe, 0xb4, 0xf6, 0x4e, 0x7f, 0x4b, 0x39, 0xbf, 0x4 };
 
         var out: [exp_out.len]u8 = undefined;
-        chacha20poly1305Seal(out[0..], plaintext, data, key, nonce);
+        ChaCha20Poly1305.encrypt(out[0..m.len], out[m.len..], m, ad, nonce, key);
         testing.expectEqualSlices(u8, exp_out[0..], out[0..]);
     }
     {
-        const plaintext = [_]u8{
+        const m = [_]u8{
             0x4c, 0x61, 0x64, 0x69, 0x65, 0x73, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x47, 0x65, 0x6e, 0x74, 0x6c,
             0x65, 0x6d, 0x65, 0x6e, 0x20, 0x6f, 0x66, 0x20, 0x74, 0x68, 0x65, 0x20, 0x63, 0x6c, 0x61, 0x73,
             0x73, 0x20, 0x6f, 0x66, 0x20, 0x27, 0x39, 0x39, 0x3a, 0x20, 0x49, 0x66, 0x20, 0x49, 0x20, 0x63,
@@ -778,7 +886,7 @@ test "seal" {
             0x63, 0x72, 0x65, 0x65, 0x6e, 0x20, 0x77, 0x6f, 0x75, 0x6c, 0x64, 0x20, 0x62, 0x65, 0x20, 0x69,
             0x74, 0x2e,
         };
-        const data = [_]u8{ 0x50, 0x51, 0x52, 0x53, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7 };
+        const ad = [_]u8{ 0x50, 0x51, 0x52, 0x53, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7 };
         const key = [_]u8{
             0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
             0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
@@ -797,15 +905,15 @@ test "seal" {
         };
 
         var out: [exp_out.len]u8 = undefined;
-        chacha20poly1305Seal(out[0..], plaintext[0..], data[0..], key, nonce);
+        ChaCha20Poly1305.encrypt(out[0..m.len], out[m.len..], m[0..], ad[0..], nonce, key);
         testing.expectEqualSlices(u8, exp_out[0..], out[0..]);
     }
 }
 
 test "open" {
     {
-        const ciphertext = [_]u8{ 0xa0, 0x78, 0x4d, 0x7a, 0x47, 0x16, 0xf3, 0xfe, 0xb4, 0xf6, 0x4e, 0x7f, 0x4b, 0x39, 0xbf, 0x4 };
-        const data = "";
+        const c = [_]u8{ 0xa0, 0x78, 0x4d, 0x7a, 0x47, 0x16, 0xf3, 0xfe, 0xb4, 0xf6, 0x4e, 0x7f, 0x4b, 0x39, 0xbf, 0x4 };
+        const ad = "";
         const key = [_]u8{
             0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
             0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
@@ -814,11 +922,11 @@ test "open" {
         const exp_out = "";
 
         var out: [exp_out.len]u8 = undefined;
-        try chacha20poly1305Open(out[0..], ciphertext[0..], data, key, nonce);
+        try ChaCha20Poly1305.decrypt(out[0..], c[0..exp_out.len], c[exp_out.len..].*, ad[0..], nonce, key);
         testing.expectEqualSlices(u8, exp_out[0..], out[0..]);
     }
     {
-        const ciphertext = [_]u8{
+        const c = [_]u8{
             0xd3, 0x1a, 0x8d, 0x34, 0x64, 0x8e, 0x60, 0xdb, 0x7b, 0x86, 0xaf, 0xbc, 0x53, 0xef, 0x7e, 0xc2,
             0xa4, 0xad, 0xed, 0x51, 0x29, 0x6e, 0x8,  0xfe, 0xa9, 0xe2, 0xb5, 0xa7, 0x36, 0xee, 0x62, 0xd6,
             0x3d, 0xbe, 0xa4, 0x5e, 0x8c, 0xa9, 0x67, 0x12, 0x82, 0xfa, 0xfb, 0x69, 0xda, 0x92, 0x72, 0x8b,
@@ -829,7 +937,7 @@ test "open" {
             0x61, 0x16, 0x1a, 0xe1, 0xb,  0x59, 0x4f, 0x9,  0xe2, 0x6a, 0x7e, 0x90, 0x2e, 0xcb, 0xd0, 0x60,
             0x6,  0x91,
         };
-        const data = [_]u8{ 0x50, 0x51, 0x52, 0x53, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7 };
+        const ad = [_]u8{ 0x50, 0x51, 0x52, 0x53, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7 };
         const key = [_]u8{
             0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
             0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
@@ -847,126 +955,45 @@ test "open" {
         };
 
         var out: [exp_out.len]u8 = undefined;
-        try chacha20poly1305Open(out[0..], ciphertext[0..], data[0..], key, nonce);
+        try ChaCha20Poly1305.decrypt(out[0..], c[0..exp_out.len], c[exp_out.len..].*, ad[0..], nonce, key);
         testing.expectEqualSlices(u8, exp_out[0..], out[0..]);
 
         // corrupting the ciphertext, data, key, or nonce should cause a failure
-        var bad_ciphertext = ciphertext;
-        bad_ciphertext[0] ^= 1;
-        testing.expectError(error.AuthenticationFailed, chacha20poly1305Open(out[0..], bad_ciphertext[0..], data[0..], key, nonce));
-        var bad_data = data;
-        bad_data[0] ^= 1;
-        testing.expectError(error.AuthenticationFailed, chacha20poly1305Open(out[0..], ciphertext[0..], bad_data[0..], key, nonce));
+        var bad_c = c;
+        bad_c[0] ^= 1;
+        testing.expectError(error.AuthenticationFailed, ChaCha20Poly1305.decrypt(out[0..], bad_c[0..out.len], bad_c[out.len..].*, ad[0..], nonce, key));
+        var bad_ad = ad;
+        bad_ad[0] ^= 1;
+        testing.expectError(error.AuthenticationFailed, ChaCha20Poly1305.decrypt(out[0..], c[0..out.len], c[out.len..].*, bad_ad[0..], nonce, key));
         var bad_key = key;
         bad_key[0] ^= 1;
-        testing.expectError(error.AuthenticationFailed, chacha20poly1305Open(out[0..], ciphertext[0..], data[0..], bad_key, nonce));
+        testing.expectError(error.AuthenticationFailed, ChaCha20Poly1305.decrypt(out[0..], c[0..out.len], c[out.len..].*, ad[0..], nonce, bad_key));
         var bad_nonce = nonce;
         bad_nonce[0] ^= 1;
-        testing.expectError(error.AuthenticationFailed, chacha20poly1305Open(out[0..], ciphertext[0..], data[0..], key, bad_nonce));
-
-        // a short ciphertext should result in a different error
-        testing.expectError(error.AuthenticationFailed, chacha20poly1305Open(out[0..], "", data[0..], key, bad_nonce));
+        testing.expectError(error.AuthenticationFailed, ChaCha20Poly1305.decrypt(out[0..], c[0..out.len], c[out.len..].*, ad[0..], bad_nonce, key));
     }
 }
 
 test "crypto.xchacha20" {
     const key = [_]u8{69} ** 32;
     const nonce = [_]u8{42} ** 24;
-    const input = "Ladies and Gentlemen of the class of '99: If I could offer you only one tip for the future, sunscreen would be it.";
+    const m = "Ladies and Gentlemen of the class of '99: If I could offer you only one tip for the future, sunscreen would be it.";
     {
-        var ciphertext: [input.len]u8 = undefined;
-        XChaCha20IETF.xor(ciphertext[0..], input[0..], 0, key, nonce);
-        var buf: [2 * ciphertext.len]u8 = undefined;
-        testing.expectEqualStrings(try std.fmt.bufPrint(&buf, "{s}", .{std.fmt.fmtSliceHexUpper(&ciphertext)}), "E0A1BCF939654AFDBDC1746EC49832647C19D891F0D1A81FC0C1703B4514BDEA584B512F6908C2C5E9DD18D5CBC1805DE5803FE3B9CA5F193FB8359E91FAB0C3BB40309A292EB1CF49685C65C4A3ADF4F11DB0CD2B6B67FBC174BC2E860E8F769FD3565BBFAD1C845E05A0FED9BE167C240D");
+        var c: [m.len]u8 = undefined;
+        XChaCha20IETF.xor(c[0..], m[0..], 0, key, nonce);
+        var buf: [2 * c.len]u8 = undefined;
+        testing.expectEqualStrings(try std.fmt.bufPrint(&buf, "{s}", .{std.fmt.fmtSliceHexUpper(&c)}), "E0A1BCF939654AFDBDC1746EC49832647C19D891F0D1A81FC0C1703B4514BDEA584B512F6908C2C5E9DD18D5CBC1805DE5803FE3B9CA5F193FB8359E91FAB0C3BB40309A292EB1CF49685C65C4A3ADF4F11DB0CD2B6B67FBC174BC2E860E8F769FD3565BBFAD1C845E05A0FED9BE167C240D");
     }
     {
-        const data = "Additional data";
-        var ciphertext: [input.len + xchacha20poly1305_tag_length]u8 = undefined;
-        xchacha20poly1305Seal(ciphertext[0..], input, data, key, nonce);
-        var out: [input.len]u8 = undefined;
-        try xchacha20poly1305Open(out[0..], ciphertext[0..], data, key, nonce);
-        var buf: [2 * ciphertext.len]u8 = undefined;
-        testing.expectEqualStrings(try std.fmt.bufPrint(&buf, "{s}", .{std.fmt.fmtSliceHexUpper(&ciphertext)}), "994D2DD32333F48E53650C02C7A2ABB8E018B0836D7175AEC779F52E961780768F815C58F1AA52D211498DB89B9216763F569C9433A6BBFCEFB4D4A49387A4C5207FBB3B5A92B5941294DF30588C6740D39DC16FA1F0E634F7246CF7CDCB978E44347D89381B7A74EB7084F754B90BDE9AAF5A94B8F2A85EFD0B50692AE2D425E234");
-        testing.expectEqualSlices(u8, out[0..], input);
-        ciphertext[0] += 1;
-        testing.expectError(error.AuthenticationFailed, xchacha20poly1305Open(out[0..], ciphertext[0..], data, key, nonce));
-    }
-}
-
-pub const Chacha20Poly1305 = struct {
-    pub const tag_length = 16;
-    pub const nonce_length = 12;
-    pub const key_length = 32;
-
-    /// c: ciphertext: output buffer should be of size m.len
-    /// tag: authentication tag: output MAC
-    /// m: message
-    /// ad: Associated Data
-    /// npub: public nonce
-    /// k: private key
-    pub fn encrypt(c: []u8, tag: *[tag_length]u8, m: []const u8, ad: []const u8, npub: [nonce_length]u8, k: [key_length]u8) void {
-        assert(c.len == m.len);
-        return chacha20poly1305SealDetached(c, tag, m, ad, k, npub);
-    }
-
-    /// m: message: output buffer should be of size c.len
-    /// c: ciphertext
-    /// tag: authentication tag
-    /// ad: Associated Data
-    /// npub: public nonce
-    /// k: private key
-    /// NOTE: the check of the authentication tag is currently not done in constant time
-    pub fn decrypt(m: []u8, c: []const u8, tag: [tag_length]u8, ad: []const u8, npub: [nonce_length]u8, k: [key_length]u8) Error!void {
-        assert(c.len == m.len);
-        return try chacha20poly1305OpenDetached(m, c, tag[0..], ad, k, npub);
-    }
-};
-
-pub const XChacha20Poly1305 = struct {
-    pub const tag_length = 16;
-    pub const nonce_length = 24;
-    pub const key_length = 32;
-
-    /// c: ciphertext: output buffer should be of size m.len
-    /// tag: authentication tag: output MAC
-    /// m: message
-    /// ad: Associated Data
-    /// npub: public nonce
-    /// k: private key
-    pub fn encrypt(c: []u8, tag: *[tag_length]u8, m: []const u8, ad: []const u8, npub: [nonce_length]u8, k: [key_length]u8) void {
-        assert(c.len == m.len);
-        return xchacha20poly1305SealDetached(c, tag, m, ad, k, npub);
-    }
-
-    /// m: message: output buffer should be of size c.len
-    /// c: ciphertext
-    /// tag: authentication tag
-    /// ad: Associated Data
-    /// npub: public nonce
-    /// k: private key
-    /// NOTE: the check of the authentication tag is currently not done in constant time
-    pub fn decrypt(m: []u8, c: []const u8, tag: [tag_length]u8, ad: []const u8, npub: [nonce_length]u8, k: [key_length]u8) Error!void {
-        assert(c.len == m.len);
-        return try xchacha20poly1305OpenDetached(m, c, tag[0..], ad, k, npub);
-    }
-};
-
-test "chacha20 AEAD API" {
-    const aeads = [_]type{ Chacha20Poly1305, XChacha20Poly1305 };
-    const input = "Ladies and Gentlemen of the class of '99: If I could offer you only one tip for the future, sunscreen would be it.";
-    const data = "Additional data";
-
-    inline for (aeads) |aead| {
-        const key = [_]u8{69} ** aead.key_length;
-        const nonce = [_]u8{42} ** aead.nonce_length;
-        var ciphertext: [input.len]u8 = undefined;
-        var tag: [aead.tag_length]u8 = undefined;
-        var out: [input.len]u8 = undefined;
-
-        aead.encrypt(ciphertext[0..], tag[0..], input, data, nonce, key);
-        try aead.decrypt(out[0..], ciphertext[0..], tag, data[0..], nonce, key);
-        testing.expectEqualSlices(u8, out[0..], input);
-        ciphertext[0] += 1;
-        testing.expectError(error.AuthenticationFailed, aead.decrypt(out[0..], ciphertext[0..], tag, data[0..], nonce, key));
+        const ad = "Additional data";
+        var c: [m.len + XChaCha20Poly1305.tag_length]u8 = undefined;
+        XChaCha20Poly1305.encrypt(c[0..m.len], c[m.len..], m, ad, nonce, key);
+        var out: [m.len]u8 = undefined;
+        try XChaCha20Poly1305.decrypt(out[0..], c[0..m.len], c[m.len..].*, ad, nonce, key);
+        var buf: [2 * c.len]u8 = undefined;
+        testing.expectEqualStrings(try std.fmt.bufPrint(&buf, "{s}", .{std.fmt.fmtSliceHexUpper(&c)}), "994D2DD32333F48E53650C02C7A2ABB8E018B0836D7175AEC779F52E961780768F815C58F1AA52D211498DB89B9216763F569C9433A6BBFCEFB4D4A49387A4C5207FBB3B5A92B5941294DF30588C6740D39DC16FA1F0E634F7246CF7CDCB978E44347D89381B7A74EB7084F754B90BDE9AAF5A94B8F2A85EFD0B50692AE2D425E234");
+        testing.expectEqualSlices(u8, out[0..], m);
+        c[0] += 1;
+        testing.expectError(error.AuthenticationFailed, XChaCha20Poly1305.decrypt(out[0..], c[0..m.len], c[m.len..].*, ad, nonce, key));
     }
 }
diff --git a/lib/std/crypto/pbkdf2.zig b/lib/std/crypto/pbkdf2.zig
index ce1912091b..575fb83006 100644
--- a/lib/std/crypto/pbkdf2.zig
+++ b/lib/std/crypto/pbkdf2.zig
@@ -20,20 +20,20 @@ const Error = std.crypto.Error;
 // pseudorandom function. See Appendix B.1 for further discussion.)
 // PBKDF2 is recommended for new applications.
 //
-// PBKDF2 (P, S, c, dkLen)
+// PBKDF2 (P, S, c, dk_len)
 //
-// Options:        PRF        underlying pseudorandom function (hLen
+// Options:        PRF        underlying pseudorandom function (h_len
 //                            denotes the length in octets of the
 //                            pseudorandom function output)
 //
 // Input:          P          password, an octet string
 //                 S          salt, an octet string
 //                 c          iteration count, a positive integer
-//                 dkLen      intended length in octets of the derived
+//                 dk_len      intended length in octets of the derived
 //                            key, a positive integer, at most
-//                            (2^32 - 1) * hLen
+//                            (2^32 - 1) * h_len
 //
-// Output:         DK         derived key, a dkLen-octet string
+// Output:         DK         derived key, a dk_len-octet string
 
 // Based on Apple's CommonKeyDerivation, based originally on code by Damien Bergamini.
 
@@ -41,7 +41,7 @@ const Error = std.crypto.Error;
 ///
 /// PBKDF2 is defined in RFC 2898, and is a recommendation of NIST SP 800-132.
 ///
-/// derivedKey: Slice of appropriate size for generated key. Generally 16 or 32 bytes in length.
+/// dk: Slice of appropriate size for generated key. Generally 16 or 32 bytes in length.
 ///             May be uninitialized. All bytes will be overwritten.
 ///             Maximum size is `maxInt(u32) * Hash.digest_length`
 ///             It is a programming error to pass buffer longer than the maximum size.
@@ -52,43 +52,38 @@ const Error = std.crypto.Error;
 ///
 /// rounds: Iteration count. Must be greater than 0. Common values range from 1,000 to 100,000.
 ///         Larger iteration counts improve security by increasing the time required to compute
-///         the derivedKey. It is common to tune this parameter to achieve approximately 100ms.
+///         the dk. It is common to tune this parameter to achieve approximately 100ms.
 ///
 /// Prf: Pseudo-random function to use. A common choice is `std.crypto.auth.hmac.HmacSha256`.
-pub fn pbkdf2(derivedKey: []u8, password: []const u8, salt: []const u8, rounds: u32, comptime Prf: type) Error!void {
+pub fn pbkdf2(dk: []u8, password: []const u8, salt: []const u8, rounds: u32, comptime Prf: type) Error!void {
     if (rounds < 1) return error.WeakParameters;
 
-    const dkLen = derivedKey.len;
-    const hLen = Prf.mac_length;
-    comptime std.debug.assert(hLen >= 1);
+    const dk_len = dk.len;
+    const h_len = Prf.mac_length;
+    comptime std.debug.assert(h_len >= 1);
 
     // FromSpec:
     //
-    //   1. If dkLen > maxInt(u32) * hLen, output "derived key too long" and
+    //   1. If dk_len > maxInt(u32) * h_len, output "derived key too long" and
     //      stop.
     //
-    if (comptime (maxInt(usize) > maxInt(u32) * hLen) and (dkLen > @as(usize, maxInt(u32) * hLen))) {
-        // If maxInt(usize) is less than `maxInt(u32) * hLen` then dkLen is always inbounds
+    if (dk_len / h_len >= maxInt(u32)) {
+        // Counter starts at 1 and is 32 bit, so if we have to return more blocks, we would overflow
         return error.OutputTooLong;
     }
 
     // FromSpec:
     //
-    //   2. Let l be the number of hLen-long blocks of bytes in the derived key,
+    //   2. Let l be the number of h_len-long blocks of bytes in the derived key,
     //      rounding up, and let r be the number of bytes in the last
     //      block
     //
 
-    // l will not overflow, proof:
-    // let `L(dkLen, hLen) = (dkLen + hLen - 1) / hLen`
-    // then `L^-1(l, hLen) = l*hLen - hLen + 1`
-    // 1) L^-1(maxInt(u32), hLen) <= maxInt(u32)*hLen
-    // 2) maxInt(u32)*hLen - hLen + 1 <= maxInt(u32)*hLen // subtract maxInt(u32)*hLen + 1
-    // 3) -hLen <= -1 // multiply by -1
-    // 4) hLen >= 1
-    const r_ = dkLen % hLen;
-    const l = @intCast(u32, (dkLen / hLen) + @as(u1, if (r_ == 0) 0 else 1)); // original: (dkLen + hLen - 1) / hLen
-    const r = if (r_ == 0) hLen else r_;
+    const blocks_count = @intCast(u32, std.math.divCeil(usize, dk_len, h_len) catch unreachable);
+    var r = dk_len % h_len;
+    if (r == 0) {
+        r = h_len;
+    }
 
     // FromSpec:
     //
@@ -118,37 +113,38 @@ pub fn pbkdf2(derivedKey: []u8, password: []const u8, salt: []const u8, rounds:
     //  Here, INT (i) is a four-octet encoding of the integer i, most
     //  significant octet first.
     //
-    //  4. Concatenate the blocks and extract the first dkLen octets to
+    //  4. Concatenate the blocks and extract the first dk_len octets to
     //  produce a derived key DK:
     //
     //            DK = T_1 || T_2 ||  ...  || T_l<0..r-1>
-    var block: u32 = 0; // Spec limits to u32
-    while (block < l) : (block += 1) {
-        var prevBlock: [hLen]u8 = undefined;
-        var newBlock: [hLen]u8 = undefined;
+
+    var block: u32 = 0;
+    while (block < blocks_count) : (block += 1) {
+        var prev_block: [h_len]u8 = undefined;
+        var new_block: [h_len]u8 = undefined;
 
         // U_1 = PRF (P, S || INT (i))
-        const blockIndex = mem.toBytes(mem.nativeToBig(u32, block + 1)); // Block index starts at 0001
+        const block_index = mem.toBytes(mem.nativeToBig(u32, block + 1)); // Block index starts at 0001
         var ctx = Prf.init(password);
         ctx.update(salt);
-        ctx.update(blockIndex[0..]);
-        ctx.final(prevBlock[0..]);
+        ctx.update(block_index[0..]);
+        ctx.final(prev_block[0..]);
 
         // Choose portion of DK to write into (T_n) and initialize
-        const offset = block * hLen;
-        const blockLen = if (block != l - 1) hLen else r;
-        const dkBlock: []u8 = derivedKey[offset..][0..blockLen];
-        mem.copy(u8, dkBlock, prevBlock[0..dkBlock.len]);
+        const offset = block * h_len;
+        const block_len = if (block != blocks_count - 1) h_len else r;
+        const dk_block: []u8 = dk[offset..][0..block_len];
+        mem.copy(u8, dk_block, prev_block[0..dk_block.len]);
 
         var i: u32 = 1;
         while (i < rounds) : (i += 1) {
             // U_c = PRF (P, U_{c-1})
-            Prf.create(&newBlock, prevBlock[0..], password);
-            mem.copy(u8, prevBlock[0..], newBlock[0..]);
+            Prf.create(&new_block, prev_block[0..], password);
+            mem.copy(u8, prev_block[0..], new_block[0..]);
 
             // F (P, S, c, i) = U_1 \xor U_2 \xor ... \xor U_c
-            for (dkBlock) |_, j| {
-                dkBlock[j] ^= newBlock[j];
+            for (dk_block) |_, j| {
+                dk_block[j] ^= new_block[j];
             }
         }
     }
@@ -158,49 +154,50 @@ const htest = @import("test.zig");
 const HmacSha1 = std.crypto.auth.hmac.HmacSha1;
 
 // RFC 6070 PBKDF2 HMAC-SHA1 Test Vectors
+
 test "RFC 6070 one iteration" {
     const p = "password";
     const s = "salt";
     const c = 1;
-    const dkLen = 20;
+    const dk_len = 20;
 
-    var derivedKey: [dkLen]u8 = undefined;
+    var dk: [dk_len]u8 = undefined;
 
-    try pbkdf2(&derivedKey, p, s, c, HmacSha1);
+    try pbkdf2(&dk, p, s, c, HmacSha1);
 
     const expected = "0c60c80f961f0e71f3a9b524af6012062fe037a6";
 
-    htest.assertEqual(expected, derivedKey[0..]);
+    htest.assertEqual(expected, dk[0..]);
 }
 
 test "RFC 6070 two iterations" {
     const p = "password";
     const s = "salt";
     const c = 2;
-    const dkLen = 20;
+    const dk_len = 20;
 
-    var derivedKey: [dkLen]u8 = undefined;
+    var dk: [dk_len]u8 = undefined;
 
-    try pbkdf2(&derivedKey, p, s, c, HmacSha1);
+    try pbkdf2(&dk, p, s, c, HmacSha1);
 
     const expected = "ea6c014dc72d6f8ccd1ed92ace1d41f0d8de8957";
 
-    htest.assertEqual(expected, derivedKey[0..]);
+    htest.assertEqual(expected, dk[0..]);
 }
 
 test "RFC 6070 4096 iterations" {
     const p = "password";
     const s = "salt";
     const c = 4096;
-    const dkLen = 20;
+    const dk_len = 20;
 
-    var derivedKey: [dkLen]u8 = undefined;
+    var dk: [dk_len]u8 = undefined;
 
-    try pbkdf2(&derivedKey, p, s, c, HmacSha1);
+    try pbkdf2(&dk, p, s, c, HmacSha1);
 
     const expected = "4b007901b765489abead49d926f721d065a429c1";
 
-    htest.assertEqual(expected, derivedKey[0..]);
+    htest.assertEqual(expected, dk[0..]);
 }
 
 test "RFC 6070 16,777,216 iterations" {
@@ -212,48 +209,48 @@ test "RFC 6070 16,777,216 iterations" {
     const p = "password";
     const s = "salt";
     const c = 16777216;
-    const dkLen = 20;
+    const dk_len = 20;
 
-    var derivedKey = [_]u8{0} ** dkLen;
+    var dk = [_]u8{0} ** dk_len;
 
-    try pbkdf2(&derivedKey, p, s, c, HmacSha1);
+    try pbkdf2(&dk, p, s, c, HmacSha1);
 
     const expected = "eefe3d61cd4da4e4e9945b3d6ba2158c2634e984";
 
-    htest.assertEqual(expected, derivedKey[0..]);
+    htest.assertEqual(expected, dk[0..]);
 }
 
 test "RFC 6070 multi-block salt and password" {
     const p = "passwordPASSWORDpassword";
     const s = "saltSALTsaltSALTsaltSALTsaltSALTsalt";
     const c = 4096;
-    const dkLen = 25;
+    const dk_len = 25;
 
-    var derivedKey: [dkLen]u8 = undefined;
+    var dk: [dk_len]u8 = undefined;
 
-    try pbkdf2(&derivedKey, p, s, c, HmacSha1);
+    try pbkdf2(&dk, p, s, c, HmacSha1);
 
     const expected = "3d2eec4fe41c849b80c8d83662c0e44a8b291a964cf2f07038";
 
-    htest.assertEqual(expected, derivedKey[0..]);
+    htest.assertEqual(expected, dk[0..]);
 }
 
 test "RFC 6070 embedded NUL" {
     const p = "pass\x00word";
     const s = "sa\x00lt";
     const c = 4096;
-    const dkLen = 16;
+    const dk_len = 16;
 
-    var derivedKey: [dkLen]u8 = undefined;
+    var dk: [dk_len]u8 = undefined;
 
-    try pbkdf2(&derivedKey, p, s, c, HmacSha1);
+    try pbkdf2(&dk, p, s, c, HmacSha1);
 
     const expected = "56fa6aa75548099dcc37d7f03425e0c3";
 
-    htest.assertEqual(expected, derivedKey[0..]);
+    htest.assertEqual(expected, dk[0..]);
 }
 
-test "Very large dkLen" {
+test "Very large dk_len" {
     // This test allocates 8GB of memory and is expected to take several hours to run.
     if (true) {
         return error.SkipZigTest;
@@ -261,13 +258,13 @@ test "Very large dkLen" {
     const p = "password";
     const s = "salt";
     const c = 1;
-    const dkLen = 1 << 33;
+    const dk_len = 1 << 33;
 
-    var derivedKey = try std.testing.allocator.alloc(u8, dkLen);
+    var dk = try std.testing.allocator.alloc(u8, dk_len);
     defer {
-        std.testing.allocator.free(derivedKey);
+        std.testing.allocator.free(dk);
     }
 
-    try pbkdf2(derivedKey, p, s, c, HmacSha1);
     // Just verify this doesn't crash with an overflow
+    try pbkdf2(dk, p, s, c, HmacSha1);
 }
diff --git a/lib/std/debug.zig b/lib/std/debug.zig
index 74fb95ffa8..a7badf7ed1 100644
--- a/lib/std/debug.zig
+++ b/lib/std/debug.zig
@@ -250,24 +250,6 @@ pub fn panicExtra(trace: ?*const builtin.StackTrace, first_trace_addr: ?usize, c
         resetSegfaultHandler();
     }
 
-    if (comptime std.Target.current.isDarwin() and std.Target.current.cpu.arch == .aarch64)
-        nosuspend {
-            // As a workaround for not having threadlocal variable support in LLD for this target,
-            // we have a simpler panic implementation that does not use threadlocal variables.
-            // TODO https://github.com/ziglang/zig/issues/7527
-            const stderr = io.getStdErr().writer();
-            if (@atomicRmw(u8, &panicking, .Add, 1, .SeqCst) == 0) {
-                stderr.print("panic: " ++ format ++ "\n", args) catch os.abort();
-                if (trace) |t| {
-                    dumpStackTrace(t.*);
-                }
-                dumpCurrentStackTrace(first_trace_addr);
-            } else {
-                stderr.print("Panicked during a panic. Aborting.\n", .{}) catch os.abort();
-            }
-            os.abort();
-        };
-
     nosuspend switch (panic_stage) {
         0 => {
             panic_stage = 1;
diff --git a/lib/std/enums.zig b/lib/std/enums.zig
new file mode 100644
index 0000000000..bddda38c9f
--- /dev/null
+++ b/lib/std/enums.zig
@@ -0,0 +1,1281 @@
+// SPDX-License-Identifier: MIT
+// Copyright (c) 2015-2021 Zig Contributors
+// This file is part of [zig](https://ziglang.org/), which is MIT licensed.
+// The MIT license requires this copyright notice to be included in all copies
+// and substantial portions of the software.
+
+//! This module contains utilities and data structures for working with enums.
+
+const std = @import("std.zig");
+const assert = std.debug.assert;
+const testing = std.testing;
+const EnumField = std.builtin.TypeInfo.EnumField;
+
+/// Returns a struct with a field matching each unique named enum element.
+/// If the enum is extern and has multiple names for the same value, only
+/// the first name is used.  Each field is of type Data and has the provided
+/// default, which may be undefined.
+pub fn EnumFieldStruct(comptime E: type, comptime Data: type, comptime field_default: ?Data) type {
+    const StructField = std.builtin.TypeInfo.StructField;
+    var fields: []const StructField = &[_]StructField{};
+    for (uniqueFields(E)) |field, i| {
+        fields = fields ++ &[_]StructField{.{
+            .name = field.name,
+            .field_type = Data,
+            .default_value = field_default,
+            .is_comptime = false,
+            .alignment = if (@sizeOf(Data) > 0) @alignOf(Data) else 0,
+        }};
+    }
+    return @Type(.{ .Struct = .{
+        .layout = .Auto,
+        .fields = fields,
+        .decls = &[_]std.builtin.TypeInfo.Declaration{},
+        .is_tuple = false,
+    }});
+}
+
+/// Looks up the supplied fields in the given enum type.
+/// Uses only the field names, field values are ignored.
+/// The result array is in the same order as the input.
+pub fn valuesFromFields(comptime E: type, comptime fields: []const EnumField) []const E {
+    comptime {
+        var result: [fields.len]E = undefined;
+        for (fields) |f, i| {
+            result[i] = @field(E, f.name);
+        }
+        return &result;
+    }
+}
+
+test "std.enums.valuesFromFields" {
+    const E = extern enum { a, b, c, d = 0 };
+    const fields = valuesFromFields(E, &[_]EnumField{
+        .{ .name = "b", .value = undefined },
+        .{ .name = "a", .value = undefined },
+        .{ .name = "a", .value = undefined },
+        .{ .name = "d", .value = undefined },
+    });
+    testing.expectEqual(E.b, fields[0]);
+    testing.expectEqual(E.a, fields[1]);
+    testing.expectEqual(E.d, fields[2]); // a == d
+    testing.expectEqual(E.d, fields[3]);
+}
+
+/// Returns the set of all named values in the given enum, in
+/// declaration order.
+pub fn values(comptime E: type) []const E {
+    return comptime valuesFromFields(E, @typeInfo(E).Enum.fields);
+}
+
+test "std.enum.values" {
+    const E = extern enum { a, b, c, d = 0 };
+    testing.expectEqualSlices(E, &.{.a, .b, .c, .d}, values(E));
+}
+
+/// Returns the set of all unique named values in the given enum, in
+/// declaration order.  For repeated values in extern enums, only the
+/// first name for each value is included.
+pub fn uniqueValues(comptime E: type) []const E {
+    return comptime valuesFromFields(E, uniqueFields(E));
+}
+
+test "std.enum.uniqueValues" {
+    const E = extern enum { a, b, c, d = 0, e, f = 3 };
+    testing.expectEqualSlices(E, &.{.a, .b, .c, .f}, uniqueValues(E));
+
+    const F = enum { a, b, c };
+    testing.expectEqualSlices(F, &.{.a, .b, .c}, uniqueValues(F));
+}
+
+/// Returns the set of all unique field values in the given enum, in
+/// declaration order.  For repeated values in extern enums, only the
+/// first name for each value is included.
+pub fn uniqueFields(comptime E: type) []const EnumField {
+    comptime {
+        const info = @typeInfo(E).Enum;
+        const raw_fields = info.fields;
+        // Only extern enums can contain duplicates,
+        // so fast path other types.
+        if (info.layout != .Extern) {
+            return raw_fields;
+        }
+
+        var unique_fields: []const EnumField = &[_]EnumField{};
+    outer:
+        for (raw_fields) |candidate| {
+            for (unique_fields) |u| {
+                if (u.value == candidate.value)
+                    continue :outer;
+            }
+            unique_fields = unique_fields ++ &[_]EnumField{candidate};
+        }
+
+        return unique_fields;
+    }
+}
+
+/// Determines the length of a direct-mapped enum array, indexed by
+/// @intCast(usize, @enumToInt(enum_value)).  The enum must be exhaustive.
+/// If the enum contains any fields with values that cannot be represented
+/// by usize, a compile error is issued.  The max_unused_slots parameter limits
+/// the total number of items which have no matching enum key (holes in the enum
+/// numbering).  So for example, if an enum has values 1, 2, 5, and 6, max_unused_slots
+/// must be at least 3, to allow unused slots 0, 3, and 4.
+fn directEnumArrayLen(comptime E: type, comptime max_unused_slots: comptime_int) comptime_int {
+    const info = @typeInfo(E).Enum;
+    if (!info.is_exhaustive) {
+        @compileError("Cannot create direct array of non-exhaustive enum "++@typeName(E));
+    }
+
+    var max_value: comptime_int = -1;
+    const max_usize: comptime_int = ~@as(usize, 0);
+    const fields = uniqueFields(E);
+    for (fields) |f| {
+        if (f.value < 0) {
+            @compileError("Cannot create a direct enum array for "++@typeName(E)++", field ."++f.name++" has a negative value.");
+        }
+        if (f.value > max_value) {
+            if (f.value > max_usize) {
+                @compileError("Cannot create a direct enum array for "++@typeName(E)++", field ."++f.name++" is larger than the max value of usize.");
+            }
+            max_value = f.value;
+        }
+    }
+
+    const unused_slots = max_value + 1 - fields.len;
+    if (unused_slots > max_unused_slots) {
+        const unused_str = std.fmt.comptimePrint("{d}", .{unused_slots});
+        const allowed_str = std.fmt.comptimePrint("{d}", .{max_unused_slots});
+        @compileError("Cannot create a direct enum array for "++@typeName(E)++". It would have "++unused_str++" unused slots, but only "++allowed_str++" are allowed.");
+    }
+
+    return max_value + 1;
+}
+
+/// Initializes an array of Data which can be indexed by
+/// @intCast(usize, @enumToInt(enum_value)).  The enum must be exhaustive.
+/// If the enum contains any fields with values that cannot be represented
+/// by usize, a compile error is issued.  The max_unused_slots parameter limits
+/// the total number of items which have no matching enum key (holes in the enum
+/// numbering).  So for example, if an enum has values 1, 2, 5, and 6, max_unused_slots
+/// must be at least 3, to allow unused slots 0, 3, and 4.
+/// The init_values parameter must be a struct with field names that match the enum values.
+/// If the enum has multiple fields with the same value, the name of the first one must
+/// be used.
+pub fn directEnumArray(
+    comptime E: type,
+    comptime Data: type,
+    comptime max_unused_slots: comptime_int,
+    init_values: EnumFieldStruct(E, Data, null),
+) [directEnumArrayLen(E, max_unused_slots)]Data {
+    return directEnumArrayDefault(E, Data, null, max_unused_slots, init_values);
+}
+
+test "std.enums.directEnumArray" {
+    const E = enum(i4) { a = 4, b = 6, c = 2 };
+    var runtime_false: bool = false;
+    const array = directEnumArray(E, bool, 4, .{
+        .a = true,
+        .b = runtime_false,
+        .c = true,
+    });
+
+    testing.expectEqual([7]bool, @TypeOf(array));
+    testing.expectEqual(true, array[4]);
+    testing.expectEqual(false, array[6]);
+    testing.expectEqual(true, array[2]);
+}
+
+/// Initializes an array of Data which can be indexed by
+/// @intCast(usize, @enumToInt(enum_value)).  The enum must be exhaustive.
+/// If the enum contains any fields with values that cannot be represented
+/// by usize, a compile error is issued.  The max_unused_slots parameter limits
+/// the total number of items which have no matching enum key (holes in the enum
+/// numbering).  So for example, if an enum has values 1, 2, 5, and 6, max_unused_slots
+/// must be at least 3, to allow unused slots 0, 3, and 4.
+/// The init_values parameter must be a struct with field names that match the enum values.
+/// If the enum has multiple fields with the same value, the name of the first one must
+/// be used.
+pub fn directEnumArrayDefault(
+    comptime E: type,
+    comptime Data: type,
+    comptime default: ?Data,
+    comptime max_unused_slots: comptime_int,
+    init_values: EnumFieldStruct(E, Data, default),
+) [directEnumArrayLen(E, max_unused_slots)]Data {
+    const len = comptime directEnumArrayLen(E, max_unused_slots);
+    var result: [len]Data = if (default) |d| [_]Data{d} ** len else undefined;
+    inline for (@typeInfo(@TypeOf(init_values)).Struct.fields) |f, i| {
+        const enum_value = @field(E, f.name);
+        const index = @intCast(usize, @enumToInt(enum_value));
+        result[index] = @field(init_values, f.name);
+    }
+    return result;
+}
+
+test "std.enums.directEnumArrayDefault" {
+    const E = enum(i4) { a = 4, b = 6, c = 2 };
+    var runtime_false: bool = false;
+    const array = directEnumArrayDefault(E, bool, false, 4, .{
+        .a = true,
+        .b = runtime_false,
+    });
+
+    testing.expectEqual([7]bool, @TypeOf(array));
+    testing.expectEqual(true, array[4]);
+    testing.expectEqual(false, array[6]);
+    testing.expectEqual(false, array[2]);
+}
+
+/// Cast an enum literal, value, or string to the enum value of type E
+/// with the same name.
+pub fn nameCast(comptime E: type, comptime value: anytype) E {
+    comptime {
+        const V = @TypeOf(value);
+        if (V == E) return value;
+        var name: ?[]const u8 = switch (@typeInfo(V)) {
+            .EnumLiteral, .Enum => @tagName(value),
+            .Pointer => if (std.meta.trait.isZigString(V)) value else null,
+            else => null,
+        };
+        if (name) |n| {
+            if (@hasField(E, n)) {
+                return @field(E, n);
+            }
+            @compileError("Enum "++@typeName(E)++" has no field named "++n);
+        }
+        @compileError("Cannot cast from "++@typeName(@TypeOf(value))++" to "++@typeName(E));
+    }
+}
+
+test "std.enums.nameCast" {
+    const A = enum { a = 0, b = 1 };
+    const B = enum { a = 1, b = 0 };
+    testing.expectEqual(A.a, nameCast(A, .a));
+    testing.expectEqual(A.a, nameCast(A, A.a));
+    testing.expectEqual(A.a, nameCast(A, B.a));
+    testing.expectEqual(A.a, nameCast(A, "a"));
+    testing.expectEqual(A.a, nameCast(A, @as(*const[1]u8, "a")));
+    testing.expectEqual(A.a, nameCast(A, @as([:0]const u8, "a")));
+    testing.expectEqual(A.a, nameCast(A, @as([]const u8, "a")));
+
+    testing.expectEqual(B.a, nameCast(B, .a));
+    testing.expectEqual(B.a, nameCast(B, A.a));
+    testing.expectEqual(B.a, nameCast(B, B.a));
+    testing.expectEqual(B.a, nameCast(B, "a"));
+
+    testing.expectEqual(B.b, nameCast(B, .b));
+    testing.expectEqual(B.b, nameCast(B, A.b));
+    testing.expectEqual(B.b, nameCast(B, B.b));
+    testing.expectEqual(B.b, nameCast(B, "b"));
+}
+
+/// A set of enum elements, backed by a bitfield.  If the enum
+/// is not dense, a mapping will be constructed from enum values
+/// to dense indices.  This type does no dynamic allocation and
+/// can be copied by value.
+pub fn EnumSet(comptime E: type) type {
+    const mixin = struct {
+        fn EnumSetExt(comptime Self: type) type {
+            const Indexer = Self.Indexer;
+            return struct {
+                /// Initializes the set using a struct of bools
+                pub fn init(init_values: EnumFieldStruct(E, bool, false)) Self {
+                    var result = Self{};
+                    comptime var i: usize = 0;
+                    inline while (i < Self.len) : (i += 1) {
+                        comptime const key = Indexer.keyForIndex(i);
+                        comptime const tag = @tagName(key);
+                        if (@field(init_values, tag)) {
+                            result.bits.set(i);
+                        }
+                    }
+                    return result;
+                }
+            };
+        }
+    };
+    return IndexedSet(EnumIndexer(E), mixin.EnumSetExt);
+}
+
+/// A map keyed by an enum, backed by a bitfield and a dense array.
+/// If the enum is not dense, a mapping will be constructed from
+/// enum values to dense indices.  This type does no dynamic
+/// allocation and can be copied by value.
+pub fn EnumMap(comptime E: type, comptime V: type) type {
+    const mixin = struct {
+        fn EnumMapExt(comptime Self: type) type {
+            const Indexer = Self.Indexer;
+            return struct {
+                /// Initializes the map using a sparse struct of optionals
+                pub fn init(init_values: EnumFieldStruct(E, ?V, @as(?V, null))) Self {
+                    var result = Self{};
+                    comptime var i: usize = 0;
+                    inline while (i < Self.len) : (i += 1) {
+                        comptime const key = Indexer.keyForIndex(i);
+                        comptime const tag = @tagName(key);
+                        if (@field(init_values, tag)) |*v| {
+                            result.bits.set(i);
+                            result.values[i] = v.*;
+                        }
+                    }
+                    return result;
+                }
+                /// Initializes a full mapping with all keys set to value.
+                /// Consider using EnumArray instead if the map will remain full.
+                pub fn initFull(value: V) Self {
+                    var result = Self{
+                        .bits = Self.BitSet.initFull(),
+                        .values = undefined,
+                    };
+                    std.mem.set(V, &result.values, value);
+                    return result;
+                }
+                /// Initializes a full mapping with supplied values.
+                /// Consider using EnumArray instead if the map will remain full.
+                pub fn initFullWith(init_values: EnumFieldStruct(E, V, @as(?V, null))) Self {
+                    return initFullWithDefault(@as(?V, null), init_values);
+                }
+                /// Initializes a full mapping with a provided default.
+                /// Consider using EnumArray instead if the map will remain full.
+                pub fn initFullWithDefault(comptime default: ?V, init_values: EnumFieldStruct(E, V, default)) Self {
+                    var result = Self{
+                        .bits = Self.BitSet.initFull(),
+                        .values = undefined,
+                    };
+                    comptime var i: usize = 0;
+                    inline while (i < Self.len) : (i += 1) {
+                        comptime const key = Indexer.keyForIndex(i);
+                        comptime const tag = @tagName(key);
+                        result.values[i] = @field(init_values, tag);
+                    }
+                    return result;
+                }
+            };
+        }
+    };
+    return IndexedMap(EnumIndexer(E), V, mixin.EnumMapExt);
+}
+
+/// An array keyed by an enum, backed by a dense array.
+/// If the enum is not dense, a mapping will be constructed from
+/// enum values to dense indices.  This type does no dynamic
+/// allocation and can be copied by value.
+pub fn EnumArray(comptime E: type, comptime V: type) type {
+    const mixin = struct {
+        fn EnumArrayExt(comptime Self: type) type {
+            const Indexer = Self.Indexer;
+            return struct {
+                /// Initializes all values in the enum array
+                pub fn init(init_values: EnumFieldStruct(E, V, @as(?V, null))) Self {
+                    return initDefault(@as(?V, null), init_values);
+                }
+
+                /// Initializes values in the enum array, with the specified default.
+                pub fn initDefault(comptime default: ?V, init_values: EnumFieldStruct(E, V, default)) Self {
+                    var result = Self{ .values = undefined };
+                    comptime var i: usize = 0;
+                    inline while (i < Self.len) : (i += 1) {
+                        const key = comptime Indexer.keyForIndex(i);
+                        const tag = @tagName(key);
+                        result.values[i] = @field(init_values, tag);
+                    }
+                    return result;
+                }
+            };
+        }
+    };
+    return IndexedArray(EnumIndexer(E), V, mixin.EnumArrayExt);
+}
+
+/// Pass this function as the Ext parameter to Indexed* if you
+/// do not want to attach any extensions.  This parameter was
+/// originally an optional, but optional generic functions
+/// seem to be broken at the moment.
+/// TODO: Once #8169 is fixed, consider switching this param
+/// back to an optional.
+pub fn NoExtension(comptime Self: type) type {
+    return NoExt;
+}
+const NoExt = struct{};
+
+/// A set type with an Indexer mapping from keys to indices.
+/// Presence or absence is stored as a dense bitfield.  This
+/// type does no allocation and can be copied by value.
+pub fn IndexedSet(comptime I: type, comptime Ext: fn(type)type) type {
+    comptime ensureIndexer(I);
+    return struct {
+        const Self = @This();
+
+        pub usingnamespace Ext(Self);
+
+        /// The indexing rules for converting between keys and indices.
+        pub const Indexer = I;
+        /// The element type for this set.
+        pub const Key = Indexer.Key;
+
+        const BitSet = std.StaticBitSet(Indexer.count);
+
+        /// The maximum number of items in this set.
+        pub const len = Indexer.count;
+
+        bits: BitSet = BitSet.initEmpty(),
+
+        /// Returns a set containing all possible keys. 
+        pub fn initFull() Self {
+            return .{ .bits = BitSet.initFull() };
+        }
+
+        /// Returns the number of keys in the set.
+        pub fn count(self: Self) usize {
+            return self.bits.count();
+        }
+
+        /// Checks if a key is in the set.
+        pub fn contains(self: Self, key: Key) bool {
+            return self.bits.isSet(Indexer.indexOf(key));
+        }
+
+        /// Puts a key in the set.
+        pub fn insert(self: *Self, key: Key) void {
+            self.bits.set(Indexer.indexOf(key));
+        }
+
+        /// Removes a key from the set.
+        pub fn remove(self: *Self, key: Key) void {
+            self.bits.unset(Indexer.indexOf(key));
+        }
+
+        /// Changes the presence of a key in the set to match the passed bool.
+        pub fn setPresent(self: *Self, key: Key, present: bool) void {
+            self.bits.setValue(Indexer.indexOf(key), present);
+        }
+
+        /// Toggles the presence of a key in the set.  If the key is in
+        /// the set, removes it.  Otherwise adds it.
+        pub fn toggle(self: *Self, key: Key) void {
+            self.bits.toggle(Indexer.indexOf(key));
+        }
+
+        /// Toggles the presence of all keys in the passed set.
+        pub fn toggleSet(self: *Self, other: Self) void {
+            self.bits.toggleSet(other.bits);
+        }
+
+        /// Toggles all possible keys in the set.
+        pub fn toggleAll(self: *Self) void {
+            self.bits.toggleAll();
+        }
+
+        /// Adds all keys in the passed set to this set.
+        pub fn setUnion(self: *Self, other: Self) void {
+            self.bits.setUnion(other.bits);
+        }
+
+        /// Removes all keys which are not in the passed set.
+        pub fn setIntersection(self: *Self, other: Self) void {
+            self.bits.setIntersection(other.bits);
+        }
+
+        /// Returns an iterator over this set, which iterates in
+        /// index order.  Modifications to the set during iteration
+        /// may or may not be observed by the iterator, but will
+        /// not invalidate it.
+        pub fn iterator(self: *Self) Iterator {
+            return .{ .inner = self.bits.iterator(.{}) };
+        }
+
+        pub const Iterator = struct {
+            inner: BitSet.Iterator(.{}),
+
+            pub fn next(self: *Iterator) ?Key {
+                return if (self.inner.next()) |index|
+                    Indexer.keyForIndex(index)
+                else null;
+            }
+        };
+    };
+}
+
+/// A map from keys to values, using an index lookup.  Uses a
+/// bitfield to track presence and a dense array of values.
+/// This type does no allocation and can be copied by value.
+pub fn IndexedMap(comptime I: type, comptime V: type, comptime Ext: fn(type)type) type {
+    comptime ensureIndexer(I);
+    return struct {
+        const Self = @This();
+
+        pub usingnamespace Ext(Self);
+
+        /// The index mapping for this map
+        pub const Indexer = I;
+        /// The key type used to index this map
+        pub const Key = Indexer.Key;
+        /// The value type stored in this map
+        pub const Value = V;
+        /// The number of possible keys in the map
+        pub const len = Indexer.count;
+
+        const BitSet = std.StaticBitSet(Indexer.count);
+
+        /// Bits determining whether items are in the map
+        bits: BitSet = BitSet.initEmpty(),
+        /// Values of items in the map.  If the associated
+        /// bit is zero, the value is undefined.
+        values: [Indexer.count]Value = undefined,
+
+        /// The number of items in the map.
+        pub fn count(self: Self) usize {
+            return self.bits.count();
+        }
+
+        /// Checks if the map contains an item.
+        pub fn contains(self: Self, key: Key) bool {
+            return self.bits.isSet(Indexer.indexOf(key));
+        }
+
+        /// Gets the value associated with a key.
+        /// If the key is not in the map, returns null.
+        pub fn get(self: Self, key: Key) ?Value {
+            const index = Indexer.indexOf(key);
+            return if (self.bits.isSet(index)) self.values[index] else null;
+        }
+
+        /// Gets the value associated with a key, which must
+        /// exist in the map.
+        pub fn getAssertContains(self: Self, key: Key) Value {
+            const index = Indexer.indexOf(key);
+            assert(self.bits.isSet(index));
+            return self.values[index];
+        }
+
+        /// Gets the address of the value associated with a key.
+        /// If the key is not in the map, returns null.
+        pub fn getPtr(self: *Self, key: Key) ?*Value {
+            const index = Indexer.indexOf(key);
+            return if (self.bits.isSet(index)) &self.values[index] else null;
+        }
+
+        /// Gets the address of the const value associated with a key.
+        /// If the key is not in the map, returns null.
+        pub fn getPtrConst(self: *const Self, key: Key) ?*const Value {
+            const index = Indexer.indexOf(key);
+            return if (self.bits.isSet(index)) &self.values[index] else null;
+        }
+
+        /// Gets the address of the value associated with a key.
+        /// The key must be present in the map.
+        pub fn getPtrAssertContains(self: *Self, key: Key) *Value {
+            const index = Indexer.indexOf(key);
+            assert(self.bits.isSet(index));
+            return &self.values[index];
+        }
+
+        /// Adds the key to the map with the supplied value.
+        /// If the key is already in the map, overwrites the value.
+        pub fn put(self: *Self, key: Key, value: Value) void {
+            const index = Indexer.indexOf(key);
+            self.bits.set(index);
+            self.values[index] = value;
+        }
+
+        /// Adds the key to the map with an undefined value.
+        /// If the key is already in the map, the value becomes undefined.
+        /// A pointer to the value is returned, which should be
+        /// used to initialize the value.
+        pub fn putUninitialized(self: *Self, key: Key) *Value {
+            const index = Indexer.indexOf(key);
+            self.bits.set(index);
+            self.values[index] = undefined;
+            return &self.values[index];
+        }
+
+        /// Sets the value associated with the key in the map,
+        /// and returns the old value.  If the key was not in
+        /// the map, returns null.
+        pub fn fetchPut(self: *Self, key: Key, value: Value) ?Value {
+            const index = Indexer.indexOf(key);
+            const result: ?Value = if (self.bits.isSet(index)) self.values[index] else null;
+            self.bits.set(index);
+            self.values[index] = value;
+            return result;
+        }
+
+        /// Removes a key from the map.  If the key was not in the map,
+        /// does nothing.
+        pub fn remove(self: *Self, key: Key) void {
+            const index = Indexer.indexOf(key);
+            self.bits.unset(index);
+            self.values[index] = undefined;
+        }
+
+        /// Removes a key from the map, and returns the old value.
+        /// If the key was not in the map, returns null.
+        pub fn fetchRemove(self: *Self, key: Key) ?Value {
+            const index = Indexer.indexOf(key);
+            const result: ?Value = if (self.bits.isSet(index)) self.values[index] else null;
+            self.bits.unset(index);
+            self.values[index] = undefined;
+            return result;
+        }
+
+        /// Returns an iterator over the map, which visits items in index order.
+        /// Modifications to the underlying map may or may not be observed by
+        /// the iterator, but will not invalidate it.
+        pub fn iterator(self: *Self) Iterator {
+            return .{
+                .inner = self.bits.iterator(.{}),
+                .values = &self.values,
+            };
+        }
+
+        /// An entry in the map.
+        pub const Entry = struct {
+            /// The key associated with this entry.
+            /// Modifying this key will not change the map.
+            key: Key,
+
+            /// A pointer to the value in the map associated
+            /// with this key.  Modifications through this
+            /// pointer will modify the underlying data.
+            value: *Value,
+        };
+
+        pub const Iterator = struct {
+            inner: BitSet.Iterator(.{}),
+            values: *[Indexer.count]Value,
+
+            pub fn next(self: *Iterator) ?Entry {
+                return if (self.inner.next()) |index|
+                    Entry{
+                        .key = Indexer.keyForIndex(index),
+                        .value = &self.values[index],
+                    }
+                else null;
+            }
+        };
+    };
+}
+
+/// A dense array of values, using an indexed lookup.
+/// This type does no allocation and can be copied by value.
+pub fn IndexedArray(comptime I: type, comptime V: type, comptime Ext: fn(type)type) type {
+    comptime ensureIndexer(I);
+    return struct {
+        const Self = @This();
+
+        pub usingnamespace Ext(Self);
+
+        /// The index mapping for this map
+        pub const Indexer = I;
+        /// The key type used to index this map
+        pub const Key = Indexer.Key;
+        /// The value type stored in this map
+        pub const Value = V;
+        /// The number of possible keys in the map
+        pub const len = Indexer.count;
+
+        values: [Indexer.count]Value,
+
+        pub fn initUndefined() Self {
+            return Self{ .values = undefined };
+        }
+
+        pub fn initFill(v: Value) Self {
+            var self: Self = undefined;
+            std.mem.set(Value, &self.values, v);
+            return self;
+        }
+
+        /// Returns the value in the array associated with a key.
+        pub fn get(self: Self, key: Key) Value {
+            return self.values[Indexer.indexOf(key)];
+        }
+
+        /// Returns a pointer to the slot in the array associated with a key.
+        pub fn getPtr(self: *Self, key: Key) *Value {
+            return &self.values[Indexer.indexOf(key)];
+        }
+
+        /// Returns a const pointer to the slot in the array associated with a key.
+        pub fn getPtrConst(self: *const Self, key: Key) *const Value {
+            return &self.values[Indexer.indexOf(key)];
+        }
+
+        /// Sets the value in the slot associated with a key.
+        pub fn set(self: *Self, key: Key, value: Value) void {
+            self.values[Indexer.indexOf(key)] = value;
+        }
+
+        /// Iterates over the items in the array, in index order.
+        pub fn iterator(self: *Self) Iterator {
+            return .{
+                .values = &self.values,
+            };
+        }
+
+        /// An entry in the array.
+        pub const Entry = struct {
+            /// The key associated with this entry.
+            /// Modifying this key will not change the array.
+            key: Key,
+
+            /// A pointer to the value in the array associated
+            /// with this key.  Modifications through this
+            /// pointer will modify the underlying data.
+            value: *Value,
+        };
+
+        pub const Iterator = struct {
+            index: usize = 0,
+            values: *[Indexer.count]Value,
+
+            pub fn next(self: *Iterator) ?Entry {
+                const index = self.index;
+                if (index < Indexer.count) {
+                    self.index += 1;
+                    return Entry{
+                        .key = Indexer.keyForIndex(index),
+                        .value = &self.values[index],
+                    };
+                }
+                return null;
+            }
+        };
+    };
+}
+
+/// Verifies that a type is a valid Indexer, providing a helpful
+/// compile error if not.  An Indexer maps a comptime known set
+/// of keys to a dense set of zero-based indices.
+/// The indexer interface must look like this:
+/// ```
+/// struct {
+///     /// The key type which this indexer converts to indices
+///     pub const Key: type,
+///     /// The number of indexes in the dense mapping
+///     pub const count: usize,
+///     /// Converts from a key to an index
+///     pub fn indexOf(Key) usize;
+///     /// Converts from an index to a key
+///     pub fn keyForIndex(usize) Key;
+/// }
+/// ```
+pub fn ensureIndexer(comptime T: type) void {
+    comptime {
+        if (!@hasDecl(T, "Key")) @compileError("Indexer must have decl Key: type.");
+        if (@TypeOf(T.Key) != type) @compileError("Indexer.Key must be a type.");
+        if (!@hasDecl(T, "count")) @compileError("Indexer must have decl count: usize.");
+        if (@TypeOf(T.count) != usize) @compileError("Indexer.count must be a usize.");
+        if (!@hasDecl(T, "indexOf")) @compileError("Indexer.indexOf must be a fn(Key)usize.");
+        if (@TypeOf(T.indexOf) != fn(T.Key)usize) @compileError("Indexer must have decl indexOf: fn(Key)usize.");
+        if (!@hasDecl(T, "keyForIndex")) @compileError("Indexer must have decl keyForIndex: fn(usize)Key.");
+        if (@TypeOf(T.keyForIndex) != fn(usize)T.Key) @compileError("Indexer.keyForIndex must be a fn(usize)Key.");
+    }
+}
+
+test "std.enums.ensureIndexer" {
+    ensureIndexer(struct {
+        pub const Key = u32;
+        pub const count: usize = 8;
+        pub fn indexOf(k: Key) usize {
+            return @intCast(usize, k);
+        }
+        pub fn keyForIndex(index: usize) Key {
+            return @intCast(Key, index);
+        }
+    });
+}
+
+fn ascByValue(ctx: void, comptime a: EnumField, comptime b: EnumField) bool {
+    return a.value < b.value;
+}
+pub fn EnumIndexer(comptime E: type) type {
+    if (!@typeInfo(E).Enum.is_exhaustive) {
+        @compileError("Cannot create an enum indexer for a non-exhaustive enum.");
+    }
+
+    const const_fields = uniqueFields(E);
+    var fields = const_fields[0..const_fields.len].*;
+    if (fields.len == 0) {
+        return struct {
+            pub const Key = E;
+            pub const count: usize = 0;
+            pub fn indexOf(e: E) usize { unreachable; }
+            pub fn keyForIndex(i: usize) E { unreachable; }
+        };
+    }
+    std.sort.sort(EnumField, &fields, {}, ascByValue);
+    const min = fields[0].value;
+    const max = fields[fields.len-1].value;
+    if (max - min == fields.len-1) {
+        return struct {
+            pub const Key = E;
+            pub const count = fields.len;
+            pub fn indexOf(e: E) usize {
+                return @intCast(usize, @enumToInt(e) - min);
+            }
+            pub fn keyForIndex(i: usize) E {
+                // TODO fix addition semantics.  This calculation
+                // gives up some safety to avoid artificially limiting
+                // the range of signed enum values to max_isize.
+                const enum_value = if (min < 0) @bitCast(isize, i) +% min else i + min;
+                return @intToEnum(E, @intCast(std.meta.Tag(E), enum_value));
+            }
+        };
+    }
+
+    const keys = valuesFromFields(E, &fields);
+
+    return struct {
+        pub const Key = E;
+        pub const count = fields.len;
+        pub fn indexOf(e: E) usize {
+            for (keys) |k, i| {
+                if (k == e) return i;
+            }
+            unreachable;
+        }
+        pub fn keyForIndex(i: usize) E {
+            return keys[i];
+        }
+    };
+}
+
+test "std.enums.EnumIndexer dense zeroed" {
+    const E = enum{ b = 1, a = 0, c = 2 };
+    const Indexer = EnumIndexer(E);
+    ensureIndexer(Indexer);
+    testing.expectEqual(E, Indexer.Key);
+    testing.expectEqual(@as(usize, 3), Indexer.count);
+
+    testing.expectEqual(@as(usize, 0), Indexer.indexOf(.a));
+    testing.expectEqual(@as(usize, 1), Indexer.indexOf(.b));
+    testing.expectEqual(@as(usize, 2), Indexer.indexOf(.c));
+
+    testing.expectEqual(E.a, Indexer.keyForIndex(0));
+    testing.expectEqual(E.b, Indexer.keyForIndex(1));
+    testing.expectEqual(E.c, Indexer.keyForIndex(2));
+}
+
+test "std.enums.EnumIndexer dense positive" {
+    const E = enum(u4) { c = 6, a = 4, b = 5 };
+    const Indexer = EnumIndexer(E);
+    ensureIndexer(Indexer);
+    testing.expectEqual(E, Indexer.Key);
+    testing.expectEqual(@as(usize, 3), Indexer.count);
+
+    testing.expectEqual(@as(usize, 0), Indexer.indexOf(.a));
+    testing.expectEqual(@as(usize, 1), Indexer.indexOf(.b));
+    testing.expectEqual(@as(usize, 2), Indexer.indexOf(.c));
+
+    testing.expectEqual(E.a, Indexer.keyForIndex(0));
+    testing.expectEqual(E.b, Indexer.keyForIndex(1));
+    testing.expectEqual(E.c, Indexer.keyForIndex(2));
+}
+
+test "std.enums.EnumIndexer dense negative" {
+    const E = enum(i4) { a = -6, c = -4, b = -5 };
+    const Indexer = EnumIndexer(E);
+    ensureIndexer(Indexer);
+    testing.expectEqual(E, Indexer.Key);
+    testing.expectEqual(@as(usize, 3), Indexer.count);
+
+    testing.expectEqual(@as(usize, 0), Indexer.indexOf(.a));
+    testing.expectEqual(@as(usize, 1), Indexer.indexOf(.b));
+    testing.expectEqual(@as(usize, 2), Indexer.indexOf(.c));
+
+    testing.expectEqual(E.a, Indexer.keyForIndex(0));
+    testing.expectEqual(E.b, Indexer.keyForIndex(1));
+    testing.expectEqual(E.c, Indexer.keyForIndex(2));
+}
+
+test "std.enums.EnumIndexer sparse" {
+    const E = enum(i4) { a = -2, c = 6, b = 4 };
+    const Indexer = EnumIndexer(E);
+    ensureIndexer(Indexer);
+    testing.expectEqual(E, Indexer.Key);
+    testing.expectEqual(@as(usize, 3), Indexer.count);
+
+    testing.expectEqual(@as(usize, 0), Indexer.indexOf(.a));
+    testing.expectEqual(@as(usize, 1), Indexer.indexOf(.b));
+    testing.expectEqual(@as(usize, 2), Indexer.indexOf(.c));
+
+    testing.expectEqual(E.a, Indexer.keyForIndex(0));
+    testing.expectEqual(E.b, Indexer.keyForIndex(1));
+    testing.expectEqual(E.c, Indexer.keyForIndex(2));
+}
+
+test "std.enums.EnumIndexer repeats" {
+    const E = extern enum{ a = -2, c = 6, b = 4, b2 = 4 };
+    const Indexer = EnumIndexer(E);
+    ensureIndexer(Indexer);
+    testing.expectEqual(E, Indexer.Key);
+    testing.expectEqual(@as(usize, 3), Indexer.count);
+
+    testing.expectEqual(@as(usize, 0), Indexer.indexOf(.a));
+    testing.expectEqual(@as(usize, 1), Indexer.indexOf(.b));
+    testing.expectEqual(@as(usize, 2), Indexer.indexOf(.c));
+
+    testing.expectEqual(E.a, Indexer.keyForIndex(0));
+    testing.expectEqual(E.b, Indexer.keyForIndex(1));
+    testing.expectEqual(E.c, Indexer.keyForIndex(2));
+}
+
+test "std.enums.EnumSet" {
+    const E = extern enum { a, b, c, d, e = 0 };
+    const Set = EnumSet(E);
+    testing.expectEqual(E, Set.Key);
+    testing.expectEqual(EnumIndexer(E), Set.Indexer);
+    testing.expectEqual(@as(usize, 4), Set.len);
+
+    // Empty sets
+    const empty = Set{};
+    comptime testing.expect(empty.count() == 0);
+
+    var empty_b = Set.init(.{});
+    testing.expect(empty_b.count() == 0);
+
+    const empty_c = comptime Set.init(.{});
+    comptime testing.expect(empty_c.count() == 0);
+
+    const full = Set.initFull();
+    testing.expect(full.count() == Set.len);
+
+    const full_b = comptime Set.initFull();
+    comptime testing.expect(full_b.count() == Set.len);
+
+    testing.expectEqual(false, empty.contains(.a));
+    testing.expectEqual(false, empty.contains(.b));
+    testing.expectEqual(false, empty.contains(.c));
+    testing.expectEqual(false, empty.contains(.d));
+    testing.expectEqual(false, empty.contains(.e));
+    {
+        var iter = empty_b.iterator();
+        testing.expectEqual(@as(?E, null), iter.next());
+    }
+
+    var mut = Set.init(.{
+        .a=true, .c=true,
+    });
+    testing.expectEqual(@as(usize, 2), mut.count());
+    testing.expectEqual(true, mut.contains(.a));
+    testing.expectEqual(false, mut.contains(.b));
+    testing.expectEqual(true, mut.contains(.c));
+    testing.expectEqual(false, mut.contains(.d));
+    testing.expectEqual(true, mut.contains(.e)); // aliases a
+    {
+        var it = mut.iterator();
+        testing.expectEqual(@as(?E, .a), it.next());
+        testing.expectEqual(@as(?E, .c), it.next());
+        testing.expectEqual(@as(?E, null), it.next());
+    }
+
+    mut.toggleAll();
+    testing.expectEqual(@as(usize, 2), mut.count());
+    testing.expectEqual(false, mut.contains(.a));
+    testing.expectEqual(true, mut.contains(.b));
+    testing.expectEqual(false, mut.contains(.c));
+    testing.expectEqual(true, mut.contains(.d));
+    testing.expectEqual(false, mut.contains(.e)); // aliases a
+    {
+        var it = mut.iterator();
+        testing.expectEqual(@as(?E, .b), it.next());
+        testing.expectEqual(@as(?E, .d), it.next());
+        testing.expectEqual(@as(?E, null), it.next());
+    }
+
+    mut.toggleSet(Set.init(.{ .a=true, .b=true }));
+    testing.expectEqual(@as(usize, 2), mut.count());
+    testing.expectEqual(true, mut.contains(.a));
+    testing.expectEqual(false, mut.contains(.b));
+    testing.expectEqual(false, mut.contains(.c));
+    testing.expectEqual(true, mut.contains(.d));
+    testing.expectEqual(true, mut.contains(.e)); // aliases a
+
+    mut.setUnion(Set.init(.{ .a=true, .b=true }));
+    testing.expectEqual(@as(usize, 3), mut.count());
+    testing.expectEqual(true, mut.contains(.a));
+    testing.expectEqual(true, mut.contains(.b));
+    testing.expectEqual(false, mut.contains(.c));
+    testing.expectEqual(true, mut.contains(.d));
+
+    mut.remove(.c);
+    mut.remove(.b);
+    testing.expectEqual(@as(usize, 2), mut.count());
+    testing.expectEqual(true, mut.contains(.a));
+    testing.expectEqual(false, mut.contains(.b));
+    testing.expectEqual(false, mut.contains(.c));
+    testing.expectEqual(true, mut.contains(.d));
+
+    mut.setIntersection(Set.init(.{ .a=true, .b=true }));
+    testing.expectEqual(@as(usize, 1), mut.count());
+    testing.expectEqual(true, mut.contains(.a));
+    testing.expectEqual(false, mut.contains(.b));
+    testing.expectEqual(false, mut.contains(.c));
+    testing.expectEqual(false, mut.contains(.d));
+
+    mut.insert(.a);
+    mut.insert(.b);
+    testing.expectEqual(@as(usize, 2), mut.count());
+    testing.expectEqual(true, mut.contains(.a));
+    testing.expectEqual(true, mut.contains(.b));
+    testing.expectEqual(false, mut.contains(.c));
+    testing.expectEqual(false, mut.contains(.d));
+
+    mut.setPresent(.a, false);
+    mut.toggle(.b);
+    mut.toggle(.c);
+    mut.setPresent(.d, true);
+    testing.expectEqual(@as(usize, 2), mut.count());
+    testing.expectEqual(false, mut.contains(.a));
+    testing.expectEqual(false, mut.contains(.b));
+    testing.expectEqual(true, mut.contains(.c));
+    testing.expectEqual(true, mut.contains(.d));
+}
+
+test "std.enums.EnumArray void" {
+    const E = extern enum { a, b, c, d, e = 0 };
+    const ArrayVoid = EnumArray(E, void);
+    testing.expectEqual(E, ArrayVoid.Key);
+    testing.expectEqual(EnumIndexer(E), ArrayVoid.Indexer);
+    testing.expectEqual(void, ArrayVoid.Value);
+    testing.expectEqual(@as(usize, 4), ArrayVoid.len);
+
+    const undef = ArrayVoid.initUndefined();
+    var inst = ArrayVoid.initFill({});
+    const inst2 = ArrayVoid.init(.{ .a = {}, .b = {}, .c = {}, .d = {} });
+    const inst3 = ArrayVoid.initDefault({}, .{});
+
+    _ = inst.get(.a);
+    _ = inst.getPtr(.b);
+    _ = inst.getPtrConst(.c);
+    inst.set(.a, {});
+
+    var it = inst.iterator();
+    testing.expectEqual(E.a, it.next().?.key);
+    testing.expectEqual(E.b, it.next().?.key);
+    testing.expectEqual(E.c, it.next().?.key);
+    testing.expectEqual(E.d, it.next().?.key);
+    testing.expect(it.next() == null);
+}
+
+test "std.enums.EnumArray sized" {
+    const E = extern enum { a, b, c, d, e = 0 };
+    const Array = EnumArray(E, usize);
+    testing.expectEqual(E, Array.Key);
+    testing.expectEqual(EnumIndexer(E), Array.Indexer);
+    testing.expectEqual(usize, Array.Value);
+    testing.expectEqual(@as(usize, 4), Array.len);
+
+    const undef = Array.initUndefined();
+    var inst = Array.initFill(5);
+    const inst2 = Array.init(.{ .a = 1, .b = 2, .c = 3, .d = 4 });
+    const inst3 = Array.initDefault(6, .{.b = 4, .c = 2});
+
+    testing.expectEqual(@as(usize, 5), inst.get(.a));
+    testing.expectEqual(@as(usize, 5), inst.get(.b));
+    testing.expectEqual(@as(usize, 5), inst.get(.c));
+    testing.expectEqual(@as(usize, 5), inst.get(.d));
+
+    testing.expectEqual(@as(usize, 1), inst2.get(.a));
+    testing.expectEqual(@as(usize, 2), inst2.get(.b));
+    testing.expectEqual(@as(usize, 3), inst2.get(.c));
+    testing.expectEqual(@as(usize, 4), inst2.get(.d));
+
+    testing.expectEqual(@as(usize, 6), inst3.get(.a));
+    testing.expectEqual(@as(usize, 4), inst3.get(.b));
+    testing.expectEqual(@as(usize, 2), inst3.get(.c));
+    testing.expectEqual(@as(usize, 6), inst3.get(.d));
+
+    testing.expectEqual(&inst.values[0], inst.getPtr(.a));
+    testing.expectEqual(&inst.values[1], inst.getPtr(.b));
+    testing.expectEqual(&inst.values[2], inst.getPtr(.c));
+    testing.expectEqual(&inst.values[3], inst.getPtr(.d));
+
+    testing.expectEqual(@as(*const usize, &inst.values[0]), inst.getPtrConst(.a));
+    testing.expectEqual(@as(*const usize, &inst.values[1]), inst.getPtrConst(.b));
+    testing.expectEqual(@as(*const usize, &inst.values[2]), inst.getPtrConst(.c));
+    testing.expectEqual(@as(*const usize, &inst.values[3]), inst.getPtrConst(.d));
+
+    inst.set(.c, 8);
+    testing.expectEqual(@as(usize, 5), inst.get(.a));
+    testing.expectEqual(@as(usize, 5), inst.get(.b));
+    testing.expectEqual(@as(usize, 8), inst.get(.c));
+    testing.expectEqual(@as(usize, 5), inst.get(.d));
+
+    var it = inst.iterator();
+    const Entry = Array.Entry;
+    testing.expectEqual(@as(?Entry, Entry{
+        .key = .a,
+        .value = &inst.values[0],
+    }), it.next());
+    testing.expectEqual(@as(?Entry, Entry{
+        .key = .b,
+        .value = &inst.values[1],
+    }), it.next());
+    testing.expectEqual(@as(?Entry, Entry{
+        .key = .c,
+        .value = &inst.values[2],
+    }), it.next());
+    testing.expectEqual(@as(?Entry, Entry{
+        .key = .d,
+        .value = &inst.values[3],
+    }), it.next());
+    testing.expectEqual(@as(?Entry, null), it.next());
+}
+
+test "std.enums.EnumMap void" {
+    const E = extern enum { a, b, c, d, e = 0 };
+    const Map = EnumMap(E, void);
+    testing.expectEqual(E, Map.Key);
+    testing.expectEqual(EnumIndexer(E), Map.Indexer);
+    testing.expectEqual(void, Map.Value);
+    testing.expectEqual(@as(usize, 4), Map.len);
+
+    const b = Map.initFull({});
+    testing.expectEqual(@as(usize, 4), b.count());
+
+    const c = Map.initFullWith(.{ .a = {}, .b = {}, .c = {}, .d = {} });
+    testing.expectEqual(@as(usize, 4), c.count());
+
+    const d = Map.initFullWithDefault({}, .{ .b = {} });
+    testing.expectEqual(@as(usize, 4), d.count());
+
+    var a = Map.init(.{ .b = {}, .d = {} });
+    testing.expectEqual(@as(usize, 2), a.count());
+    testing.expectEqual(false, a.contains(.a));
+    testing.expectEqual(true, a.contains(.b));
+    testing.expectEqual(false, a.contains(.c));
+    testing.expectEqual(true, a.contains(.d));
+    testing.expect(a.get(.a) == null);
+    testing.expect(a.get(.b) != null);
+    testing.expect(a.get(.c) == null);
+    testing.expect(a.get(.d) != null);
+    testing.expect(a.getPtr(.a) == null);
+    testing.expect(a.getPtr(.b) != null);
+    testing.expect(a.getPtr(.c) == null);
+    testing.expect(a.getPtr(.d) != null);
+    testing.expect(a.getPtrConst(.a) == null);
+    testing.expect(a.getPtrConst(.b) != null);
+    testing.expect(a.getPtrConst(.c) == null);
+    testing.expect(a.getPtrConst(.d) != null);
+    _ = a.getPtrAssertContains(.b);
+    _ = a.getAssertContains(.d);
+
+    a.put(.a, {});
+    a.put(.a, {});
+    a.putUninitialized(.c).* = {};
+    a.putUninitialized(.c).* = {};
+
+    testing.expectEqual(@as(usize, 4), a.count());
+    testing.expect(a.get(.a) != null);
+    testing.expect(a.get(.b) != null);
+    testing.expect(a.get(.c) != null);
+    testing.expect(a.get(.d) != null);
+
+    a.remove(.a);
+    _ = a.fetchRemove(.c);
+
+    var iter = a.iterator();
+    const Entry = Map.Entry;
+    testing.expectEqual(E.b, iter.next().?.key);
+    testing.expectEqual(E.d, iter.next().?.key);
+    testing.expect(iter.next() == null);
+}
+
+test "std.enums.EnumMap sized" {
+    const E = extern enum { a, b, c, d, e = 0 };
+    const Map = EnumMap(E, usize);
+    testing.expectEqual(E, Map.Key);
+    testing.expectEqual(EnumIndexer(E), Map.Indexer);
+    testing.expectEqual(usize, Map.Value);
+    testing.expectEqual(@as(usize, 4), Map.len);
+
+    const b = Map.initFull(5);
+    testing.expectEqual(@as(usize, 4), b.count());
+    testing.expect(b.contains(.a));
+    testing.expect(b.contains(.b));
+    testing.expect(b.contains(.c));
+    testing.expect(b.contains(.d));
+    testing.expectEqual(@as(?usize, 5), b.get(.a));
+    testing.expectEqual(@as(?usize, 5), b.get(.b));
+    testing.expectEqual(@as(?usize, 5), b.get(.c));
+    testing.expectEqual(@as(?usize, 5), b.get(.d));
+
+    const c = Map.initFullWith(.{ .a = 1, .b = 2, .c = 3, .d = 4 });
+    testing.expectEqual(@as(usize, 4), c.count());
+    testing.expect(c.contains(.a));
+    testing.expect(c.contains(.b));
+    testing.expect(c.contains(.c));
+    testing.expect(c.contains(.d));
+    testing.expectEqual(@as(?usize, 1), c.get(.a));
+    testing.expectEqual(@as(?usize, 2), c.get(.b));
+    testing.expectEqual(@as(?usize, 3), c.get(.c));
+    testing.expectEqual(@as(?usize, 4), c.get(.d));
+
+    const d = Map.initFullWithDefault(6, .{ .b = 2, .c = 4 });
+    testing.expectEqual(@as(usize, 4), d.count());
+    testing.expect(d.contains(.a));
+    testing.expect(d.contains(.b));
+    testing.expect(d.contains(.c));
+    testing.expect(d.contains(.d));
+    testing.expectEqual(@as(?usize, 6), d.get(.a));
+    testing.expectEqual(@as(?usize, 2), d.get(.b));
+    testing.expectEqual(@as(?usize, 4), d.get(.c));
+    testing.expectEqual(@as(?usize, 6), d.get(.d));
+
+    var a = Map.init(.{ .b = 2, .d = 4 });
+    testing.expectEqual(@as(usize, 2), a.count());
+    testing.expectEqual(false, a.contains(.a));
+    testing.expectEqual(true, a.contains(.b));
+    testing.expectEqual(false, a.contains(.c));
+    testing.expectEqual(true, a.contains(.d));
+
+    testing.expectEqual(@as(?usize, null), a.get(.a));
+    testing.expectEqual(@as(?usize, 2), a.get(.b));
+    testing.expectEqual(@as(?usize, null), a.get(.c));
+    testing.expectEqual(@as(?usize, 4), a.get(.d));
+
+    testing.expectEqual(@as(?*usize, null), a.getPtr(.a));
+    testing.expectEqual(@as(?*usize, &a.values[1]), a.getPtr(.b));
+    testing.expectEqual(@as(?*usize, null), a.getPtr(.c));
+    testing.expectEqual(@as(?*usize, &a.values[3]), a.getPtr(.d));
+
+    testing.expectEqual(@as(?*const usize, null), a.getPtrConst(.a));
+    testing.expectEqual(@as(?*const usize, &a.values[1]), a.getPtrConst(.b));
+    testing.expectEqual(@as(?*const usize, null), a.getPtrConst(.c));
+    testing.expectEqual(@as(?*const usize, &a.values[3]), a.getPtrConst(.d));
+
+    testing.expectEqual(@as(*const usize, &a.values[1]), a.getPtrAssertContains(.b));
+    testing.expectEqual(@as(*const usize, &a.values[3]), a.getPtrAssertContains(.d));
+    testing.expectEqual(@as(usize, 2), a.getAssertContains(.b));
+    testing.expectEqual(@as(usize, 4), a.getAssertContains(.d));
+
+    a.put(.a, 3);
+    a.put(.a, 5);
+    a.putUninitialized(.c).* = 7;
+    a.putUninitialized(.c).* = 9;
+
+    testing.expectEqual(@as(usize, 4), a.count());
+    testing.expectEqual(@as(?usize, 5), a.get(.a));
+    testing.expectEqual(@as(?usize, 2), a.get(.b));
+    testing.expectEqual(@as(?usize, 9), a.get(.c));
+    testing.expectEqual(@as(?usize, 4), a.get(.d));
+
+    a.remove(.a);
+    testing.expectEqual(@as(?usize, null), a.fetchRemove(.a));
+    testing.expectEqual(@as(?usize, 9), a.fetchRemove(.c));
+    a.remove(.c);
+
+    var iter = a.iterator();
+    const Entry = Map.Entry;
+    testing.expectEqual(@as(?Entry, Entry{
+        .key = .b, .value = &a.values[1],
+    }), iter.next());
+    testing.expectEqual(@as(?Entry, Entry{
+        .key = .d, .value = &a.values[3],
+    }), iter.next());
+    testing.expectEqual(@as(?Entry, null), iter.next());
+}
diff --git a/lib/std/fs/path.zig b/lib/std/fs/path.zig
index 776cb4040c..0bba522fb6 100644
--- a/lib/std/fs/path.zig
+++ b/lib/std/fs/path.zig
@@ -92,7 +92,7 @@ pub fn join(allocator: *Allocator, paths: []const []const u8) ![]u8 {
 /// Naively combines a series of paths with the native path seperator and null terminator.
 /// Allocates memory for the result, which must be freed by the caller.
 pub fn joinZ(allocator: *Allocator, paths: []const []const u8) ![:0]u8 {
-    const out = joinSepMaybeZ(allocator, sep, isSep, paths, true);
+    const out = try joinSepMaybeZ(allocator, sep, isSep, paths, true);
     return out[0 .. out.len - 1 :0];
 }
 
@@ -119,6 +119,16 @@ fn testJoinMaybeZPosix(paths: []const []const u8, expected: []const u8, zero: bo
 }
 
 test "join" {
+    {
+        const actual: []u8 = try join(testing.allocator, &[_][]const u8{});
+        defer testing.allocator.free(actual);
+        testing.expectEqualSlices(u8, "", actual);
+    }
+    {
+        const actual: [:0]u8 = try joinZ(testing.allocator, &[_][]const u8{});
+        defer testing.allocator.free(actual);
+        testing.expectEqualSlices(u8, "", actual);
+    }
     for (&[_]bool{ false, true }) |zero| {
         testJoinMaybeZWindows(&[_][]const u8{}, "", zero);
         testJoinMaybeZWindows(&[_][]const u8{ "c:\\a\\b", "c" }, "c:\\a\\b\\c", zero);
diff --git a/lib/std/macho.zig b/lib/std/macho.zig
index 6785abffca..f66626bafe 100644
--- a/lib/std/macho.zig
+++ b/lib/std/macho.zig
@@ -1227,6 +1227,24 @@ pub const S_ATTR_EXT_RELOC = 0x200;
 /// section has local relocation entries
 pub const S_ATTR_LOC_RELOC = 0x100;
 
+/// template of initial values for TLVs
+pub const S_THREAD_LOCAL_REGULAR = 0x11;
+
+/// template of initial values for TLVs
+pub const S_THREAD_LOCAL_ZEROFILL = 0x12;
+
+/// TLV descriptors
+pub const S_THREAD_LOCAL_VARIABLES = 0x13;
+
+/// pointers to TLV descriptors
+pub const S_THREAD_LOCAL_VARIABLE_POINTERS = 0x14;
+
+/// functions to call to initialize TLV values
+pub const S_THREAD_LOCAL_INIT_FUNCTION_POINTERS = 0x15;
+
+/// 32-bit offsets to initializers
+pub const S_INIT_FUNC_OFFSETS = 0x16;
+
 pub const cpu_type_t = integer_t;
 pub const cpu_subtype_t = integer_t;
 pub const integer_t = c_int;
@@ -1422,6 +1440,14 @@ pub const EXPORT_SYMBOL_FLAGS_KIND_WEAK_DEFINITION: u8 = 0x04;
 pub const EXPORT_SYMBOL_FLAGS_REEXPORT: u8 = 0x08;
 pub const EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER: u8 = 0x10;
 
+// An indirect symbol table entry is simply a 32bit index into the symbol table
+// to the symbol that the pointer or stub is refering to.  Unless it is for a
+// non-lazy symbol pointer section for a defined symbol which strip(1) as
+// removed.  In which case it has the value INDIRECT_SYMBOL_LOCAL.  If the
+// symbol was also absolute INDIRECT_SYMBOL_ABS is or'ed with that.
+pub const INDIRECT_SYMBOL_LOCAL: u32 = 0x80000000;
+pub const INDIRECT_SYMBOL_ABS: u32 = 0x40000000;
+
 // Codesign consts and structs taken from:
 // https://opensource.apple.com/source/xnu/xnu-6153.81.5/osfmk/kern/cs_blobs.h.auto.html
 
@@ -1589,3 +1615,17 @@ pub const GenericBlob = extern struct {
     /// Total length of blob
     length: u32,
 };
+
+/// The LC_DATA_IN_CODE load commands uses a linkedit_data_command
+/// to point to an array of data_in_code_entry entries. Each entry
+/// describes a range of data in a code section.
+pub const data_in_code_entry = extern struct {
+    /// From mach_header to start of data range.
+    offset: u32,
+
+    /// Number of bytes in data range.
+    length: u16,
+
+    /// A DICE_KIND value.
+    kind: u16,
+};
diff --git a/lib/std/meta.zig b/lib/std/meta.zig
index fd3e03bdbd..cdc93e5d33 100644
--- a/lib/std/meta.zig
+++ b/lib/std/meta.zig
@@ -888,19 +888,20 @@ pub fn Vector(comptime len: u32, comptime child: type) type {
 /// Given a type and value, cast the value to the type as c would.
 /// This is for translate-c and is not intended for general use.
 pub fn cast(comptime DestType: type, target: anytype) DestType {
-    const TargetType = @TypeOf(target);
+    // this function should behave like transCCast in translate-c, except it's for macros
+    const SourceType = @TypeOf(target);
     switch (@typeInfo(DestType)) {
-        .Pointer => |dest_ptr| {
-            switch (@typeInfo(TargetType)) {
+        .Pointer => {
+            switch (@typeInfo(SourceType)) {
                 .Int, .ComptimeInt => {
                     return @intToPtr(DestType, target);
                 },
-                .Pointer => |ptr| {
-                    return @ptrCast(DestType, @alignCast(dest_ptr.alignment, target));
+                .Pointer => {
+                    return castPtr(DestType, target);
                 },
                 .Optional => |opt| {
                     if (@typeInfo(opt.child) == .Pointer) {
-                        return @ptrCast(DestType, @alignCast(dest_ptr.alignment, target));
+                        return castPtr(DestType, target);
                     }
                 },
                 else => {},
@@ -908,17 +909,16 @@ pub fn cast(comptime DestType: type, target: anytype) DestType {
         },
         .Optional => |dest_opt| {
             if (@typeInfo(dest_opt.child) == .Pointer) {
-                const dest_ptr = @typeInfo(dest_opt.child).Pointer;
-                switch (@typeInfo(TargetType)) {
+                switch (@typeInfo(SourceType)) {
                     .Int, .ComptimeInt => {
                         return @intToPtr(DestType, target);
                     },
                     .Pointer => {
-                        return @ptrCast(DestType, @alignCast(dest_ptr.alignment, target));
+                        return castPtr(DestType, target);
                     },
                     .Optional => |target_opt| {
                         if (@typeInfo(target_opt.child) == .Pointer) {
-                            return @ptrCast(DestType, @alignCast(dest_ptr.alignment, target));
+                            return castPtr(DestType, target);
                         }
                     },
                     else => {},
@@ -926,25 +926,25 @@ pub fn cast(comptime DestType: type, target: anytype) DestType {
             }
         },
         .Enum => {
-            if (@typeInfo(TargetType) == .Int or @typeInfo(TargetType) == .ComptimeInt) {
+            if (@typeInfo(SourceType) == .Int or @typeInfo(SourceType) == .ComptimeInt) {
                 return @intToEnum(DestType, target);
             }
         },
-        .Int, .ComptimeInt => {
-            switch (@typeInfo(TargetType)) {
+        .Int => {
+            switch (@typeInfo(SourceType)) {
                 .Pointer => {
-                    return @intCast(DestType, @ptrToInt(target));
+                    return castInt(DestType, @ptrToInt(target));
                 },
                 .Optional => |opt| {
                     if (@typeInfo(opt.child) == .Pointer) {
-                        return @intCast(DestType, @ptrToInt(target));
+                        return castInt(DestType, @ptrToInt(target));
                     }
                 },
                 .Enum => {
-                    return @intCast(DestType, @enumToInt(target));
+                    return castInt(DestType, @enumToInt(target));
                 },
-                .Int, .ComptimeInt => {
-                    return @intCast(DestType, target);
+                .Int => {
+                    return castInt(DestType, target);
                 },
                 else => {},
             }
@@ -954,6 +954,34 @@ pub fn cast(comptime DestType: type, target: anytype) DestType {
     return @as(DestType, target);
 }
 
+fn castInt(comptime DestType: type, target: anytype) DestType {
+    const dest = @typeInfo(DestType).Int;
+    const source = @typeInfo(@TypeOf(target)).Int;
+
+    if (dest.bits < source.bits)
+        return @bitCast(DestType, @truncate(Int(source.signedness, dest.bits), target))
+    else
+        return @bitCast(DestType, @as(Int(source.signedness, dest.bits), target));
+}
+
+fn castPtr(comptime DestType: type, target: anytype) DestType {
+    const dest = ptrInfo(DestType);
+    const source = ptrInfo(@TypeOf(target));
+
+    if (source.is_const and !dest.is_const or source.is_volatile and !dest.is_volatile)
+        return @intToPtr(DestType, @ptrToInt(target))
+    else
+        return @ptrCast(DestType, @alignCast(dest.alignment, target));
+}
+
+fn ptrInfo(comptime PtrType: type) TypeInfo.Pointer {
+    return switch(@typeInfo(PtrType)){
+        .Optional => |opt_info| @typeInfo(opt_info.child).Pointer,
+        .Pointer => |ptr_info| ptr_info,
+        else => unreachable,
+    };
+}
+
 test "std.meta.cast" {
     const E = enum(u2) {
         Zero,
@@ -977,6 +1005,11 @@ test "std.meta.cast" {
     testing.expectEqual(@as(u32, 4), cast(u32, @intToPtr(?*u32, 4)));
     testing.expectEqual(@as(u32, 10), cast(u32, @as(u64, 10)));
     testing.expectEqual(@as(u8, 2), cast(u8, E.Two));
+
+    testing.expectEqual(@bitCast(i32, @as(u32, 0x8000_0000)), cast(i32, @as(u32, 0x8000_0000)));
+
+    testing.expectEqual(@intToPtr(*u8, 2), cast(*u8, @intToPtr(*const u8, 2)));
+    testing.expectEqual(@intToPtr(*u8, 2), cast(*u8, @intToPtr(*volatile u8, 2)));
 }
 
 /// Given a value returns its size as C's sizeof operator would.
diff --git a/lib/std/meta/trait.zig b/lib/std/meta/trait.zig
index e67f9b9bc4..481bfe212b 100644
--- a/lib/std/meta/trait.zig
+++ b/lib/std/meta/trait.zig
@@ -408,6 +408,84 @@ test "std.meta.trait.isTuple" {
     testing.expect(isTuple(@TypeOf(t3)));
 }
 
+/// Returns true if the passed type will coerce to []const u8.
+/// Any of the following are considered strings:
+/// ```
+/// []const u8, [:S]const u8, *const [N]u8, *const [N:S]u8,
+/// []u8, [:S]u8, *[:S]u8, *[N:S]u8.
+/// ```
+/// These types are not considered strings:
+/// ```
+/// u8, [N]u8, [*]const u8, [*:0]const u8,
+/// [*]const [N]u8, []const u16, []const i8,
+/// *const u8, ?[]const u8, ?*const [N]u8.
+/// ```
+pub fn isZigString(comptime T: type) bool {
+    comptime {
+        // Only pointer types can be strings, no optionals
+        const info = @typeInfo(T);
+        if (info != .Pointer) return false;
+
+        const ptr = &info.Pointer;
+        // Check for CV qualifiers that would prevent coerction to []const u8
+        if (ptr.is_volatile or ptr.is_allowzero) return false;
+
+        // If it's already a slice, simple check.
+        if (ptr.size == .Slice) {
+            return ptr.child == u8;
+        }
+
+        // Otherwise check if it's an array type that coerces to slice.
+        if (ptr.size == .One) {
+            const child = @typeInfo(ptr.child);
+            if (child == .Array) {
+                const arr = &child.Array;
+                return arr.child == u8;
+            }
+        }
+
+        return false;
+    }
+}
+
+test "std.meta.trait.isZigString" {
+    testing.expect(isZigString([]const u8));
+    testing.expect(isZigString([]u8));
+    testing.expect(isZigString([:0]const u8));
+    testing.expect(isZigString([:0]u8));
+    testing.expect(isZigString([:5]const u8));
+    testing.expect(isZigString([:5]u8));
+    testing.expect(isZigString(*const [0]u8));
+    testing.expect(isZigString(*[0]u8));
+    testing.expect(isZigString(*const [0:0]u8));
+    testing.expect(isZigString(*[0:0]u8));
+    testing.expect(isZigString(*const [0:5]u8));
+    testing.expect(isZigString(*[0:5]u8));
+    testing.expect(isZigString(*const [10]u8));
+    testing.expect(isZigString(*[10]u8));
+    testing.expect(isZigString(*const [10:0]u8));
+    testing.expect(isZigString(*[10:0]u8));
+    testing.expect(isZigString(*const [10:5]u8));
+    testing.expect(isZigString(*[10:5]u8));
+
+    testing.expect(!isZigString(u8));
+    testing.expect(!isZigString([4]u8));
+    testing.expect(!isZigString([4:0]u8));
+    testing.expect(!isZigString([*]const u8));
+    testing.expect(!isZigString([*]const [4]u8));
+    testing.expect(!isZigString([*c]const u8));
+    testing.expect(!isZigString([*c]const [4]u8));
+    testing.expect(!isZigString([*:0]const u8));
+    testing.expect(!isZigString([*:0]const u8));
+    testing.expect(!isZigString(*[]const u8));
+    testing.expect(!isZigString(?[]const u8));
+    testing.expect(!isZigString(?*const [4]u8));
+    testing.expect(!isZigString([]allowzero u8));
+    testing.expect(!isZigString([]volatile u8));
+    testing.expect(!isZigString(*allowzero [4]u8));
+    testing.expect(!isZigString(*volatile [4]u8));
+}
+
 pub fn hasDecls(comptime T: type, comptime names: anytype) bool {
     inline for (names) |name| {
         if (!@hasDecl(T, name))
diff --git a/lib/std/os.zig b/lib/std/os.zig
index 362a58f7fb..8b552d05a7 100644
--- a/lib/std/os.zig
+++ b/lib/std/os.zig
@@ -2879,7 +2879,7 @@ pub fn bind(sock: socket_t, addr: *const sockaddr, len: socklen_t) BindError!voi
     unreachable;
 }
 
-const ListenError = error{
+pub const ListenError = error{
     /// Another socket is already listening on the same port.
     /// For Internet domain sockets, the  socket referred to by sockfd had not previously
     /// been bound to an address and, upon attempting to bind it to an ephemeral port, it
@@ -5827,7 +5827,7 @@ pub fn tcsetattr(handle: fd_t, optional_action: TCSA, termios_p: termios) Termio
     }
 }
 
-const IoCtl_SIOCGIFINDEX_Error = error{
+pub const IoCtl_SIOCGIFINDEX_Error = error{
     FileSystem,
     InterfaceNotFound,
 } || UnexpectedError;
diff --git a/lib/std/std.zig b/lib/std/std.zig
index a7e5bcb682..82249af157 100644
--- a/lib/std/std.zig
+++ b/lib/std/std.zig
@@ -20,6 +20,9 @@ pub const ComptimeStringMap = @import("comptime_string_map.zig").ComptimeStringM
 pub const DynLib = @import("dynamic_library.zig").DynLib;
 pub const DynamicBitSet = bit_set.DynamicBitSet;
 pub const DynamicBitSetUnmanaged = bit_set.DynamicBitSetUnmanaged;
+pub const EnumArray = enums.EnumArray;
+pub const EnumMap = enums.EnumMap;
+pub const EnumSet = enums.EnumSet;
 pub const HashMap = hash_map.HashMap;
 pub const HashMapUnmanaged = hash_map.HashMapUnmanaged;
 pub const MultiArrayList = @import("multi_array_list.zig").MultiArrayList;
@@ -54,6 +57,7 @@ pub const cstr = @import("cstr.zig");
 pub const debug = @import("debug.zig");
 pub const dwarf = @import("dwarf.zig");
 pub const elf = @import("elf.zig");
+pub const enums = @import("enums.zig");
 pub const event = @import("event.zig");
 pub const fifo = @import("fifo.zig");
 pub const fmt = @import("fmt.zig");
diff --git a/lib/std/zig/parser_test.zig b/lib/std/zig/parser_test.zig
index a83e95e431..2a343a6edc 100644
--- a/lib/std/zig/parser_test.zig
+++ b/lib/std/zig/parser_test.zig
@@ -4,6 +4,31 @@
 // The MIT license requires this copyright notice to be included in all copies
 // and substantial portions of the software.
 
+test "zig fmt: respect line breaks in struct field value declaration" {
+    try testCanonical(
+        \\const Foo = struct {
+        \\    bar: u32 =
+        \\        42,
+        \\    bar: u32 =
+        \\        // a comment
+        \\        42,
+        \\    bar: u32 =
+        \\        42,
+        \\    // a comment
+        \\    bar: []const u8 =
+        \\        \\ foo
+        \\        \\ bar
+        \\        \\ baz
+        \\    ,
+        \\    bar: u32 =
+        \\        blk: {
+        \\            break :blk 42;
+        \\        },
+        \\};
+        \\
+    );
+}
+
 // TODO Remove this after zig 0.9.0 is released.
 test "zig fmt: rewrite inline functions as callconv(.Inline)" {
     try testTransform(
@@ -3038,6 +3063,54 @@ test "zig fmt: switch" {
         \\}
         \\
     );
+
+    try testTransform(
+        \\test {
+        \\    switch (x) {
+        \\        foo =>
+        \\            "bar",
+        \\    }
+        \\}
+        \\
+    ,
+        \\test {
+        \\    switch (x) {
+        \\        foo => "bar",
+        \\    }
+        \\}
+        \\
+    );
+}
+
+test "zig fmt: switch multiline string" {
+    try testCanonical(
+        \\test "switch multiline string" {
+        \\    const x: u32 = 0;
+        \\    const str = switch (x) {
+        \\        1 => "one",
+        \\        2 =>
+        \\        \\ Comma after the multiline string
+        \\        \\ is needed
+        \\        ,
+        \\        3 => "three",
+        \\        else => "else",
+        \\    };
+        \\
+        \\    const Union = union(enum) {
+        \\        Int: i64,
+        \\        Float: f64,
+        \\    };
+        \\
+        \\    const str = switch (u) {
+        \\        Union.Int => |int|
+        \\        \\ Comma after the multiline string
+        \\        \\ is needed
+        \\        ,
+        \\        Union.Float => |*float| unreachable,
+        \\    };
+        \\}
+        \\
+    );
 }
 
 test "zig fmt: while" {
diff --git a/lib/std/zig/render.zig b/lib/std/zig/render.zig
index 9fe9b96f00..640f25829a 100644
--- a/lib/std/zig/render.zig
+++ b/lib/std/zig/render.zig
@@ -1159,8 +1159,29 @@ fn renderContainerField(
         try renderToken(ais, tree, rparen_token, .space); // )
     }
     const eq_token = tree.firstToken(field.ast.value_expr) - 1;
-    try renderToken(ais, tree, eq_token, .space); // =
-    return renderExpressionComma(gpa, ais, tree, field.ast.value_expr, space); // value
+    const eq_space: Space = if (tree.tokensOnSameLine(eq_token, eq_token + 1)) .space else .newline;
+    {
+        ais.pushIndent();
+        try renderToken(ais, tree, eq_token, eq_space); // =
+        ais.popIndent();
+    }
+
+    if (eq_space == .space)
+        return renderExpressionComma(gpa, ais, tree, field.ast.value_expr, space); // value
+
+    const token_tags = tree.tokens.items(.tag);
+    const maybe_comma = tree.lastToken(field.ast.value_expr) + 1;
+
+    if (token_tags[maybe_comma] == .comma) {
+        ais.pushIndent();
+        try renderExpression(gpa, ais, tree, field.ast.value_expr, .none); // value
+        ais.popIndent();
+        try renderToken(ais, tree, maybe_comma, space);
+    } else {
+        ais.pushIndent();
+        try renderExpression(gpa, ais, tree, field.ast.value_expr, space); // value
+        ais.popIndent();
+    }
 }
 
 fn renderBuiltinCall(
@@ -1423,6 +1444,7 @@ fn renderSwitchCase(
     switch_case: ast.full.SwitchCase,
     space: Space,
 ) Error!void {
+    const node_tags = tree.nodes.items(.tag);
     const token_tags = tree.tokens.items(.tag);
     const trailing_comma = token_tags[switch_case.ast.arrow_token - 1] == .comma;
 
@@ -1445,17 +1467,23 @@ fn renderSwitchCase(
     }
 
     // Render the arrow and everything after it
-    try renderToken(ais, tree, switch_case.ast.arrow_token, .space);
+    const pre_target_space = if (node_tags[switch_case.ast.target_expr] == .multiline_string_literal)
+        // Newline gets inserted when rendering the target expr.
+        Space.none
+    else
+        Space.space;
+    const after_arrow_space: Space = if (switch_case.payload_token == null) pre_target_space else .space;
+    try renderToken(ais, tree, switch_case.ast.arrow_token, after_arrow_space);
 
     if (switch_case.payload_token) |payload_token| {
         try renderToken(ais, tree, payload_token - 1, .none); // pipe
         if (token_tags[payload_token] == .asterisk) {
             try renderToken(ais, tree, payload_token, .none); // asterisk
             try renderToken(ais, tree, payload_token + 1, .none); // identifier
-            try renderToken(ais, tree, payload_token + 2, .space); // pipe
+            try renderToken(ais, tree, payload_token + 2, pre_target_space); // pipe
         } else {
             try renderToken(ais, tree, payload_token, .none); // identifier
-            try renderToken(ais, tree, payload_token + 1, .space); // pipe
+            try renderToken(ais, tree, payload_token + 1, pre_target_space); // pipe
         }
     }
 
diff --git a/src/BuiltinFn.zig b/src/BuiltinFn.zig
index deb1cbfa76..1710169dc7 100644
--- a/src/BuiltinFn.zig
+++ b/src/BuiltinFn.zig
@@ -477,7 +477,7 @@ pub const list = list: {
             "@intCast",
             .{
                 .tag = .int_cast,
-                .param_count = 1,
+                .param_count = 2,
             },
         },
         .{
diff --git a/src/clang.zig b/src/clang.zig
index 60afa28cf1..0d18ae42b3 100644
--- a/src/clang.zig
+++ b/src/clang.zig
@@ -537,6 +537,11 @@ pub const FunctionType = opaque {
     extern fn ZigClangFunctionType_getReturnType(*const FunctionType) QualType;
 };
 
+pub const GenericSelectionExpr = opaque {
+    pub const getResultExpr = ZigClangGenericSelectionExpr_getResultExpr;
+    extern fn ZigClangGenericSelectionExpr_getResultExpr(*const GenericSelectionExpr) *const Expr;
+};
+
 pub const IfStmt = opaque {
     pub const getThen = ZigClangIfStmt_getThen;
     extern fn ZigClangIfStmt_getThen(*const IfStmt) *const Stmt;
diff --git a/src/codegen.zig b/src/codegen.zig
index 41afaac989..10abc34290 100644
--- a/src/codegen.zig
+++ b/src/codegen.zig
@@ -2133,9 +2133,12 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
                 if (inst.func.value()) |func_value| {
                     if (func_value.castTag(.function)) |func_payload| {
                         const func = func_payload.data;
-                        const text_segment = &macho_file.load_commands.items[macho_file.text_segment_cmd_index.?].Segment;
-                        const got = &text_segment.sections.items[macho_file.got_section_index.?];
-                        const got_addr = got.addr + func.owner_decl.link.macho.offset_table_index * @sizeOf(u64);
+                        const got_addr = blk: {
+                            const seg = macho_file.load_commands.items[macho_file.data_const_segment_cmd_index.?].Segment;
+                            const got = seg.sections.items[macho_file.got_section_index.?];
+                            break :blk got.addr + func.owner_decl.link.macho.offset_table_index * @sizeOf(u64);
+                        };
+                        log.debug("got_addr = 0x{x}", .{got_addr});
                         switch (arch) {
                             .x86_64 => {
                                 try self.genSetReg(inst.base.src, Type.initTag(.u32), .rax, .{ .memory = got_addr });
@@ -2153,8 +2156,8 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
                         const decl = func_payload.data;
                         const decl_name = try std.fmt.allocPrint(self.bin_file.allocator, "_{s}", .{decl.name});
                         defer self.bin_file.allocator.free(decl_name);
-                        const already_defined = macho_file.extern_lazy_symbols.contains(decl_name);
-                        const symbol: u32 = if (macho_file.extern_lazy_symbols.getIndex(decl_name)) |index|
+                        const already_defined = macho_file.lazy_imports.contains(decl_name);
+                        const symbol: u32 = if (macho_file.lazy_imports.getIndex(decl_name)) |index|
                             @intCast(u32, index)
                         else
                             try macho_file.addExternSymbol(decl_name);
@@ -3304,80 +3307,32 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
                     },
                     .memory => |addr| {
                         if (self.bin_file.options.pie) {
-                            // For MachO, the binary, with the exception of object files, has to be a PIE.
-                            // Therefore we cannot load an absolute address.
-                            // Instead, we need to make use of PC-relative addressing.
-                            if (reg.id() == 0) { // x0 is special-cased
-                                // TODO This needs to be optimised in the stack usage (perhaps use a shadow stack
-                                // like described here:
-                                // https://community.arm.com/developer/ip-products/processors/b/processors-ip-blog/posts/using-the-stack-in-aarch64-implementing-push-and-pop)
-                                // str x28, [sp, #-16]
-                                mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.str(.x28, Register.sp, .{
-                                    .offset = Instruction.LoadStoreOffset.imm_pre_index(-16),
-                                }).toU32());
-                                // adr x28, #8
-                                mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.adr(.x28, 8).toU32());
-                                if (self.bin_file.cast(link.File.MachO)) |macho_file| {
-                                    try macho_file.pie_fixups.append(self.bin_file.allocator, .{
-                                        .address = addr,
-                                        .start = self.code.items.len,
-                                        .len = 4,
-                                    });
-                                } else {
-                                    return self.fail(src, "TODO implement genSetReg for PIE on this platform", .{});
-                                }
-                                // b [label]
-                                mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.b(0).toU32());
-                                // mov r, x0
-                                mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.orr(
-                                    reg,
-                                    .xzr,
-                                    .x0,
-                                    Instruction.Shift.none,
-                                ).toU32());
-                                // ldr x28, [sp], #16
-                                mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.ldr(.x28, .{
-                                    .register = .{
-                                        .rn = Register.sp,
-                                        .offset = Instruction.LoadStoreOffset.imm_post_index(16),
-                                    },
-                                }).toU32());
+                            // PC-relative displacement to the entry in the GOT table.
+                            // TODO we should come up with our own, backend independent relocation types
+                            // which each backend (Elf, MachO, etc.) would then translate into an actual
+                            // fixup when linking.
+                            // adrp reg, pages
+                            if (self.bin_file.cast(link.File.MachO)) |macho_file| {
+                                try macho_file.pie_fixups.append(self.bin_file.allocator, .{
+                                    .target_addr = addr,
+                                    .offset = self.code.items.len,
+                                    .size = 4,
+                                });
                             } else {
-                                // stp x0, x28, [sp, #-16]
-                                mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.stp(
-                                    .x0,
-                                    .x28,
-                                    Register.sp,
-                                    Instruction.LoadStorePairOffset.pre_index(-16),
-                                ).toU32());
-                                // adr x28, #8
-                                mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.adr(.x28, 8).toU32());
-                                if (self.bin_file.cast(link.File.MachO)) |macho_file| {
-                                    try macho_file.pie_fixups.append(self.bin_file.allocator, .{
-                                        .address = addr,
-                                        .start = self.code.items.len,
-                                        .len = 4,
-                                    });
-                                } else {
-                                    return self.fail(src, "TODO implement genSetReg for PIE on this platform", .{});
-                                }
-                                // b [label]
-                                mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.b(0).toU32());
-                                // mov r, x0
-                                mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.orr(
-                                    reg,
-                                    .xzr,
-                                    .x0,
-                                    Instruction.Shift.none,
-                                ).toU32());
-                                // ldp x0, x28, [sp, #16]
-                                mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.ldp(
-                                    .x0,
-                                    .x28,
-                                    Register.sp,
-                                    Instruction.LoadStorePairOffset.post_index(16),
-                                ).toU32());
+                                return self.fail(src, "TODO implement genSetReg for PIE GOT indirection on this platform", .{});
                             }
+                            mem.writeIntLittle(
+                                u32,
+                                try self.code.addManyAsArray(4),
+                                Instruction.adrp(reg, 0).toU32(),
+                            );
+                            // ldr reg, reg, offset
+                            mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.ldr(reg, .{
+                                .register = .{
+                                    .rn = reg,
+                                    .offset = Instruction.LoadStoreOffset.imm(0),
+                                },
+                            }).toU32());
                         } else {
                             // The value is in memory at a hard-coded address.
                             // If the type is a pointer, it means the pointer address is at this memory location.
@@ -3561,62 +3516,31 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
                     },
                     .memory => |x| {
                         if (self.bin_file.options.pie) {
-                            // For MachO, the binary, with the exception of object files, has to be a PIE.
-                            // Therefore, we cannot load an absolute address.
-                            assert(x > math.maxInt(u32)); // 32bit direct addressing is not supported by MachO.
-                            // The plan here is to use unconditional relative jump to GOT entry, where we store
-                            // pre-calculated and stored effective address to load into the target register.
-                            // We leave the actual displacement information empty (0-padded) and fixing it up
-                            // later in the linker.
-                            if (reg.id() == 0) { // %rax is special-cased
-                                try self.code.ensureCapacity(self.code.items.len + 5);
-                                if (self.bin_file.cast(link.File.MachO)) |macho_file| {
-                                    try macho_file.pie_fixups.append(self.bin_file.allocator, .{
-                                        .address = x,
-                                        .start = self.code.items.len,
-                                        .len = 5,
-                                    });
-                                } else {
-                                    return self.fail(src, "TODO implement genSetReg for PIE on this platform", .{});
-                                }
-                                // call [label]
-                                self.code.appendSliceAssumeCapacity(&[_]u8{
-                                    0xE8,
-                                    0x0,
-                                    0x0,
-                                    0x0,
-                                    0x0,
+                            // RIP-relative displacement to the entry in the GOT table.
+                            // TODO we should come up with our own, backend independent relocation types
+                            // which each backend (Elf, MachO, etc.) would then translate into an actual
+                            // fixup when linking.
+                            if (self.bin_file.cast(link.File.MachO)) |macho_file| {
+                                try macho_file.pie_fixups.append(self.bin_file.allocator, .{
+                                    .target_addr = x,
+                                    .offset = self.code.items.len + 3,
+                                    .size = 4,
                                 });
                             } else {
-                                try self.code.ensureCapacity(self.code.items.len + 10);
-                                // push %rax
-                                self.code.appendSliceAssumeCapacity(&[_]u8{0x50});
-                                if (self.bin_file.cast(link.File.MachO)) |macho_file| {
-                                    try macho_file.pie_fixups.append(self.bin_file.allocator, .{
-                                        .address = x,
-                                        .start = self.code.items.len,
-                                        .len = 5,
-                                    });
-                                } else {
-                                    return self.fail(src, "TODO implement genSetReg for PIE on this platform", .{});
-                                }
-                                // call [label]
-                                self.code.appendSliceAssumeCapacity(&[_]u8{
-                                    0xE8,
-                                    0x0,
-                                    0x0,
-                                    0x0,
-                                    0x0,
-                                });
-                                // mov %r, %rax
-                                self.code.appendSliceAssumeCapacity(&[_]u8{
-                                    0x48,
-                                    0x89,
-                                    0xC0 | @as(u8, reg.id()),
-                                });
-                                // pop %rax
-                                self.code.appendSliceAssumeCapacity(&[_]u8{0x58});
+                                return self.fail(src, "TODO implement genSetReg for PIE GOT indirection on this platform", .{});
                             }
+                            try self.code.ensureCapacity(self.code.items.len + 7);
+                            self.rex(.{ .w = reg.size() == 64, .r = reg.isExtended() });
+                            self.code.appendSliceAssumeCapacity(&[_]u8{
+                                0x8D,
+                                0x05 | (@as(u8, reg.id() & 0b111) << 3),
+                            });
+                            mem.writeIntLittle(u32, self.code.addManyAsArrayAssumeCapacity(4), 0);
+
+                            try self.code.ensureCapacity(self.code.items.len + 3);
+                            self.rex(.{ .w = reg.size() == 64, .b = reg.isExtended(), .r = reg.isExtended() });
+                            const RM = (@as(u8, reg.id() & 0b111) << 3) | @truncate(u3, reg.id());
+                            self.code.appendSliceAssumeCapacity(&[_]u8{ 0x8B, RM });
                         } else if (x <= math.maxInt(u32)) {
                             // Moving from memory to a register is a variant of `8B /r`.
                             // Since we're using 64-bit moves, we require a REX.
@@ -3779,9 +3703,11 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
                             return MCValue{ .memory = got_addr };
                         } else if (self.bin_file.cast(link.File.MachO)) |macho_file| {
                             const decl = payload.data;
-                            const text_segment = &macho_file.load_commands.items[macho_file.text_segment_cmd_index.?].Segment;
-                            const got = &text_segment.sections.items[macho_file.got_section_index.?];
-                            const got_addr = got.addr + decl.link.macho.offset_table_index * ptr_bytes;
+                            const got_addr = blk: {
+                                const seg = macho_file.load_commands.items[macho_file.data_const_segment_cmd_index.?].Segment;
+                                const got = seg.sections.items[macho_file.got_section_index.?];
+                                break :blk got.addr + decl.link.macho.offset_table_index * ptr_bytes;
+                            };
                             return MCValue{ .memory = got_addr };
                         } else if (self.bin_file.cast(link.File.Coff)) |coff_file| {
                             const decl = payload.data;
diff --git a/src/codegen/aarch64.zig b/src/codegen/aarch64.zig
index 8abc616e2f..d06abeac07 100644
--- a/src/codegen/aarch64.zig
+++ b/src/codegen/aarch64.zig
@@ -221,7 +221,8 @@ pub const Instruction = union(enum) {
         offset: u12,
         opc: u2,
         op1: u2,
-        fixed: u4 = 0b111_0,
+        v: u1,
+        fixed: u3 = 0b111,
         size: u2,
     },
     LoadStorePairOfRegisters: packed struct {
@@ -505,6 +506,7 @@ pub const Instruction = union(enum) {
                         .offset = offset.toU12(),
                         .opc = opc,
                         .op1 = op1,
+                        .v = 0,
                         .size = 0b10,
                     },
                 };
@@ -517,6 +519,7 @@ pub const Instruction = union(enum) {
                         .offset = offset.toU12(),
                         .opc = opc,
                         .op1 = op1,
+                        .v = 0,
                         .size = 0b11,
                     },
                 };
diff --git a/src/codegen/llvm.zig b/src/codegen/llvm.zig
index f087957f1d..7233dbdd07 100644
--- a/src/codegen/llvm.zig
+++ b/src/codegen/llvm.zig
@@ -219,7 +219,7 @@ pub const LLVMIRModule = struct {
 
         var error_message: [*:0]const u8 = undefined;
         var target: *const llvm.Target = undefined;
-        if (llvm.Target.getFromTriple(llvm_target_triple.ptr, &target, &error_message)) {
+        if (llvm.Target.getFromTriple(llvm_target_triple.ptr, &target, &error_message).toBool()) {
             defer llvm.disposeMessage(error_message);
 
             const stderr = std.io.getStdErr().writer();
@@ -303,7 +303,7 @@ pub const LLVMIRModule = struct {
             // verifyModule always allocs the error_message even if there is no error
             defer llvm.disposeMessage(error_message);
 
-            if (self.llvm_module.verify(.ReturnStatus, &error_message)) {
+            if (self.llvm_module.verify(.ReturnStatus, &error_message).toBool()) {
                 const stderr = std.io.getStdErr().writer();
                 try stderr.print("broken LLVM module found: {s}\nThis is a bug in the Zig compiler.", .{error_message});
                 return error.BrokenLLVMModule;
@@ -319,7 +319,7 @@ pub const LLVMIRModule = struct {
             object_pathZ.ptr,
             .ObjectFile,
             &error_message,
-        )) {
+        ).toBool()) {
             defer llvm.disposeMessage(error_message);
 
             const stderr = std.io.getStdErr().writer();
@@ -614,7 +614,7 @@ pub const LLVMIRModule = struct {
 
             var indices: [2]*const llvm.Value = .{
                 index_type.constNull(),
-                index_type.constInt(1, false),
+                index_type.constInt(1, .False),
             };
 
             return self.builder.buildLoad(self.builder.buildInBoundsGEP(operand, &indices, 2, ""), "");
@@ -676,7 +676,7 @@ pub const LLVMIRModule = struct {
         const signed = inst.base.ty.isSignedInt();
         // TODO: Should we use intcast here or just a simple bitcast?
         //       LLVM does truncation vs bitcast (+signed extension) in the intcast depending on the sizes
-        return self.builder.buildIntCast2(val, try self.getLLVMType(inst.base.ty, inst.base.src), signed, "");
+        return self.builder.buildIntCast2(val, try self.getLLVMType(inst.base.ty, inst.base.src), llvm.Bool.fromBool(signed), "");
     }
 
     fn genBitCast(self: *LLVMIRModule, inst: *Inst.UnOp) !?*const llvm.Value {
@@ -782,7 +782,7 @@ pub const LLVMIRModule = struct {
                 if (bigint.limbs.len != 1) {
                     return self.fail(src, "TODO implement bigger bigint", .{});
                 }
-                const llvm_int = llvm_type.constInt(bigint.limbs[0], false);
+                const llvm_int = llvm_type.constInt(bigint.limbs[0], .False);
                 if (!bigint.positive) {
                     return llvm.constNeg(llvm_int);
                 }
@@ -820,7 +820,7 @@ pub const LLVMIRModule = struct {
                         return self.fail(src, "TODO handle other sentinel values", .{});
                     } else false;
 
-                    return self.context.constString(payload.data.ptr, @intCast(c_uint, payload.data.len), !zero_sentinel);
+                    return self.context.constString(payload.data.ptr, @intCast(c_uint, payload.data.len), llvm.Bool.fromBool(!zero_sentinel));
                 } else {
                     return self.fail(src, "TODO handle more array values", .{});
                 }
@@ -836,13 +836,13 @@ pub const LLVMIRModule = struct {
                             llvm_child_type.constNull(),
                             self.context.intType(1).constNull(),
                         };
-                        return self.context.constStruct(&optional_values, 2, false);
+                        return self.context.constStruct(&optional_values, 2, .False);
                     } else {
                         var optional_values: [2]*const llvm.Value = .{
                             try self.genTypedValue(src, .{ .ty = child_type, .val = tv.val }),
                             self.context.intType(1).constAllOnes(),
                         };
-                        return self.context.constStruct(&optional_values, 2, false);
+                        return self.context.constStruct(&optional_values, 2, .False);
                     }
                 } else {
                     return self.fail(src, "TODO implement const of optional pointer", .{});
@@ -882,7 +882,7 @@ pub const LLVMIRModule = struct {
                         try self.getLLVMType(child_type, src),
                         self.context.intType(1),
                     };
-                    return self.context.structType(&optional_types, 2, false);
+                    return self.context.structType(&optional_types, 2, .False);
                 } else {
                     return self.fail(src, "TODO implement optional pointers as actual pointers", .{});
                 }
@@ -934,7 +934,7 @@ pub const LLVMIRModule = struct {
             try self.getLLVMType(return_type, src),
             if (fn_param_len == 0) null else llvm_param.ptr,
             @intCast(c_uint, fn_param_len),
-            false,
+            .False,
         );
         const llvm_fn = self.llvm_module.addFunction(func.name, fn_type);
 
diff --git a/src/codegen/llvm/bindings.zig b/src/codegen/llvm/bindings.zig
index ccba3d9973..7217ca381e 100644
--- a/src/codegen/llvm/bindings.zig
+++ b/src/codegen/llvm/bindings.zig
@@ -1,7 +1,20 @@
 //! We do this instead of @cImport because the self-hosted compiler is easier
 //! to bootstrap if it does not depend on translate-c.
 
-const LLVMBool = bool;
+/// Do not compare directly to .True, use toBool() instead.
+pub const Bool = enum(c_int) {
+    False,
+    True,
+    _,
+
+    pub fn fromBool(b: bool) Bool {
+        return @intToEnum(Bool, @boolToInt(b));
+    }
+
+    pub fn toBool(b: Bool) bool {
+        return b != .False;
+    }
+};
 pub const AttributeIndex = c_uint;
 
 /// Make sure to use the *InContext functions instead of the global ones.
@@ -22,13 +35,13 @@ pub const Context = opaque {
     extern fn LLVMVoidTypeInContext(C: *const Context) *const Type;
 
     pub const structType = LLVMStructTypeInContext;
-    extern fn LLVMStructTypeInContext(C: *const Context, ElementTypes: [*]*const Type, ElementCount: c_uint, Packed: LLVMBool) *const Type;
+    extern fn LLVMStructTypeInContext(C: *const Context, ElementTypes: [*]*const Type, ElementCount: c_uint, Packed: Bool) *const Type;
 
     pub const constString = LLVMConstStringInContext;
-    extern fn LLVMConstStringInContext(C: *const Context, Str: [*]const u8, Length: c_uint, DontNullTerminate: LLVMBool) *const Value;
+    extern fn LLVMConstStringInContext(C: *const Context, Str: [*]const u8, Length: c_uint, DontNullTerminate: Bool) *const Value;
 
     pub const constStruct = LLVMConstStructInContext;
-    extern fn LLVMConstStructInContext(C: *const Context, ConstantVals: [*]*const Value, Count: c_uint, Packed: LLVMBool) *const Value;
+    extern fn LLVMConstStructInContext(C: *const Context, ConstantVals: [*]*const Value, Count: c_uint, Packed: Bool) *const Value;
 
     pub const createBasicBlock = LLVMCreateBasicBlockInContext;
     extern fn LLVMCreateBasicBlockInContext(C: *const Context, Name: [*:0]const u8) *const BasicBlock;
@@ -59,7 +72,7 @@ pub const Value = opaque {
 
 pub const Type = opaque {
     pub const functionType = LLVMFunctionType;
-    extern fn LLVMFunctionType(ReturnType: *const Type, ParamTypes: ?[*]*const Type, ParamCount: c_uint, IsVarArg: LLVMBool) *const Type;
+    extern fn LLVMFunctionType(ReturnType: *const Type, ParamTypes: ?[*]*const Type, ParamCount: c_uint, IsVarArg: Bool) *const Type;
 
     pub const constNull = LLVMConstNull;
     extern fn LLVMConstNull(Ty: *const Type) *const Value;
@@ -68,7 +81,7 @@ pub const Type = opaque {
     extern fn LLVMConstAllOnes(Ty: *const Type) *const Value;
 
     pub const constInt = LLVMConstInt;
-    extern fn LLVMConstInt(IntTy: *const Type, N: c_ulonglong, SignExtend: LLVMBool) *const Value;
+    extern fn LLVMConstInt(IntTy: *const Type, N: c_ulonglong, SignExtend: Bool) *const Value;
 
     pub const constArray = LLVMConstArray;
     extern fn LLVMConstArray(ElementTy: *const Type, ConstantVals: ?[*]*const Value, Length: c_uint) *const Value;
@@ -91,7 +104,7 @@ pub const Module = opaque {
     extern fn LLVMDisposeModule(*const Module) void;
 
     pub const verify = LLVMVerifyModule;
-    extern fn LLVMVerifyModule(*const Module, Action: VerifierFailureAction, OutMessage: *[*:0]const u8) LLVMBool;
+    extern fn LLVMVerifyModule(*const Module, Action: VerifierFailureAction, OutMessage: *[*:0]const u8) Bool;
 
     pub const addFunction = LLVMAddFunction;
     extern fn LLVMAddFunction(*const Module, Name: [*:0]const u8, FunctionTy: *const Type) *const Value;
@@ -191,7 +204,7 @@ pub const Builder = opaque {
     extern fn LLVMBuildNUWSub(*const Builder, LHS: *const Value, RHS: *const Value, Name: [*:0]const u8) *const Value;
 
     pub const buildIntCast2 = LLVMBuildIntCast2;
-    extern fn LLVMBuildIntCast2(*const Builder, Val: *const Value, DestTy: *const Type, IsSigned: LLVMBool, Name: [*:0]const u8) *const Value;
+    extern fn LLVMBuildIntCast2(*const Builder, Val: *const Value, DestTy: *const Type, IsSigned: Bool, Name: [*:0]const u8) *const Value;
 
     pub const buildBitCast = LLVMBuildBitCast;
     extern fn LLVMBuildBitCast(*const Builder, Val: *const Value, DestTy: *const Type, Name: [*:0]const u8) *const Value;
@@ -258,7 +271,7 @@ pub const TargetMachine = opaque {
         Filename: [*:0]const u8,
         codegen: CodeGenFileType,
         ErrorMessage: *[*:0]const u8,
-    ) LLVMBool;
+    ) Bool;
 };
 
 pub const CodeMode = extern enum {
@@ -295,7 +308,7 @@ pub const CodeGenFileType = extern enum {
 
 pub const Target = opaque {
     pub const getFromTriple = LLVMGetTargetFromTriple;
-    extern fn LLVMGetTargetFromTriple(Triple: [*:0]const u8, T: **const Target, ErrorMessage: *[*:0]const u8) LLVMBool;
+    extern fn LLVMGetTargetFromTriple(Triple: [*:0]const u8, T: **const Target, ErrorMessage: *[*:0]const u8) Bool;
 };
 
 extern fn LLVMInitializeAArch64TargetInfo() void;
diff --git a/src/link/MachO.zig b/src/link/MachO.zig
index c76cea9134..761a4a8e1d 100644
--- a/src/link/MachO.zig
+++ b/src/link/MachO.zig
@@ -11,7 +11,9 @@ const codegen = @import("../codegen.zig");
 const aarch64 = @import("../codegen/aarch64.zig");
 const math = std.math;
 const mem = std.mem;
+const meta = std.meta;
 
+const bind = @import("MachO/bind.zig");
 const trace = @import("../tracy.zig").trace;
 const build_options = @import("build_options");
 const Module = @import("../Module.zig");
@@ -24,9 +26,9 @@ const target_util = @import("../target.zig");
 const DebugSymbols = @import("MachO/DebugSymbols.zig");
 const Trie = @import("MachO/Trie.zig");
 const CodeSignature = @import("MachO/CodeSignature.zig");
+const Zld = @import("MachO/Zld.zig");
 
 usingnamespace @import("MachO/commands.zig");
-usingnamespace @import("MachO/imports.zig");
 
 pub const base_tag: File.Tag = File.Tag.macho;
 
@@ -87,14 +89,12 @@ code_signature_cmd_index: ?u16 = null,
 
 /// Index into __TEXT,__text section.
 text_section_index: ?u16 = null,
-/// Index into __TEXT,__ziggot section.
-got_section_index: ?u16 = null,
 /// Index into __TEXT,__stubs section.
 stubs_section_index: ?u16 = null,
 /// Index into __TEXT,__stub_helper section.
 stub_helper_section_index: ?u16 = null,
 /// Index into __DATA_CONST,__got section.
-data_got_section_index: ?u16 = null,
+got_section_index: ?u16 = null,
 /// Index into __DATA,__la_symbol_ptr section.
 la_symbol_ptr_section_index: ?u16 = null,
 /// Index into __DATA,__data section.
@@ -104,16 +104,16 @@ entry_addr: ?u64 = null,
 
 /// Table of all local symbols
 /// Internally references string table for names (which are optional).
-local_symbols: std.ArrayListUnmanaged(macho.nlist_64) = .{},
+locals: std.ArrayListUnmanaged(macho.nlist_64) = .{},
 /// Table of all global symbols
-global_symbols: std.ArrayListUnmanaged(macho.nlist_64) = .{},
+globals: std.ArrayListUnmanaged(macho.nlist_64) = .{},
 /// Table of all extern nonlazy symbols, indexed by name.
-extern_nonlazy_symbols: std.StringArrayHashMapUnmanaged(ExternSymbol) = .{},
+nonlazy_imports: std.StringArrayHashMapUnmanaged(Import) = .{},
 /// Table of all extern lazy symbols, indexed by name.
-extern_lazy_symbols: std.StringArrayHashMapUnmanaged(ExternSymbol) = .{},
+lazy_imports: std.StringArrayHashMapUnmanaged(Import) = .{},
 
-local_symbol_free_list: std.ArrayListUnmanaged(u32) = .{},
-global_symbol_free_list: std.ArrayListUnmanaged(u32) = .{},
+locals_free_list: std.ArrayListUnmanaged(u32) = .{},
+globals_free_list: std.ArrayListUnmanaged(u32) = .{},
 offset_table_free_list: std.ArrayListUnmanaged(u32) = .{},
 
 stub_helper_stubs_start_off: ?u64 = null,
@@ -122,8 +122,8 @@ stub_helper_stubs_start_off: ?u64 = null,
 string_table: std.ArrayListUnmanaged(u8) = .{},
 string_table_directory: std.StringHashMapUnmanaged(u32) = .{},
 
-/// Table of trampolines to the actual symbols in __text section.
-offset_table: std.ArrayListUnmanaged(u64) = .{},
+/// Table of GOT entries.
+offset_table: std.ArrayListUnmanaged(GOTEntry) = .{},
 
 error_flags: File.ErrorFlags = File.ErrorFlags{},
 
@@ -154,14 +154,19 @@ string_table_needs_relocation: bool = false,
 /// allocate a fresh text block, which will have ideal capacity, and then grow it
 /// by 1 byte. It will then have -1 overcapacity.
 text_block_free_list: std.ArrayListUnmanaged(*TextBlock) = .{},
+
 /// Pointer to the last allocated text block
 last_text_block: ?*TextBlock = null,
+
 /// A list of all PIE fixups required for this run of the linker.
 /// Warning, this is currently NOT thread-safe. See the TODO below.
 /// TODO Move this list inside `updateDecl` where it should be allocated
 /// prior to calling `generateSymbol`, and then immediately deallocated
 /// rather than sitting in the global scope.
-pie_fixups: std.ArrayListUnmanaged(PieFixup) = .{},
+/// TODO We should also rewrite this using generic relocations common to all
+/// backends.
+pie_fixups: std.ArrayListUnmanaged(PIEFixup) = .{},
+
 /// A list of all stub (extern decls) fixups required for this run of the linker.
 /// Warning, this is currently NOT thread-safe. See the TODO below.
 /// TODO Move this list inside `updateDecl` where it should be allocated
@@ -169,14 +174,42 @@ pie_fixups: std.ArrayListUnmanaged(PieFixup) = .{},
 /// rather than sitting in the global scope.
 stub_fixups: std.ArrayListUnmanaged(StubFixup) = .{},
 
-pub const PieFixup = struct {
-    /// Target address we wanted to address in absolute terms.
-    address: u64,
-    /// Where in the byte stream we should perform the fixup.
-    start: usize,
-    /// The length of the byte stream. For x86_64, this will be
-    /// variable. For aarch64, it will be fixed at 4 bytes.
-    len: usize,
+pub const GOTEntry = struct {
+    /// GOT entry can either be a local pointer or an extern (nonlazy) import.
+    kind: enum {
+        Local,
+        Extern,
+    },
+
+    /// Id to the macho.nlist_64 from the respective table: either locals or nonlazy imports.
+    /// TODO I'm more and more inclined to just manage a single, max two symbol tables
+    ///  rather than 4 as we currently do, but I'll follow up in the future PR.
+    symbol: u32,
+
+    /// Index of this entry in the GOT.
+    index: u32,
+};
+
+pub const Import = struct {
+    /// MachO symbol table entry.
+    symbol: macho.nlist_64,
+
+    /// Id of the dynamic library where the specified entries can be found.
+    dylib_ordinal: i64,
+
+    /// Index of this import within the import list.
+    index: u32,
+};
+
+pub const PIEFixup = struct {
+    /// Target VM address of this relocation.
+    target_addr: u64,
+
+    /// Offset within the byte stream.
+    offset: usize,
+
+    /// Size of the relocation.
+    size: usize,
 };
 
 pub const StubFixup = struct {
@@ -260,9 +293,9 @@ pub const TextBlock = struct {
     /// File offset relocation happens transparently, so it is not included in
     /// this calculation.
     fn capacity(self: TextBlock, macho_file: MachO) u64 {
-        const self_sym = macho_file.local_symbols.items[self.local_sym_index];
+        const self_sym = macho_file.locals.items[self.local_sym_index];
         if (self.next) |next| {
-            const next_sym = macho_file.local_symbols.items[next.local_sym_index];
+            const next_sym = macho_file.locals.items[next.local_sym_index];
             return next_sym.n_value - self_sym.n_value;
         } else {
             // We are the last block.
@@ -274,8 +307,8 @@ pub const TextBlock = struct {
     fn freeListEligible(self: TextBlock, macho_file: MachO) bool {
         // No need to keep a free list node for the last block.
         const next = self.next orelse return false;
-        const self_sym = macho_file.local_symbols.items[self.local_sym_index];
-        const next_sym = macho_file.local_symbols.items[next.local_sym_index];
+        const self_sym = macho_file.locals.items[self.local_sym_index];
+        const next_sym = macho_file.locals.items[next.local_sym_index];
         const cap = next_sym.n_value - self_sym.n_value;
         const ideal_cap = padToIdeal(self.size);
         if (cap <= ideal_cap) return false;
@@ -344,7 +377,7 @@ pub fn openPath(allocator: *Allocator, sub_path: []const u8, options: link.Optio
     };
 
     // Index 0 is always a null symbol.
-    try self.local_symbols.append(allocator, .{
+    try self.locals.append(allocator, .{
         .n_strx = 0,
         .n_type = 0,
         .n_sect = 0,
@@ -600,7 +633,74 @@ fn linkWithLLD(self: *MachO, comp: *Compilation) !void {
         if (!mem.eql(u8, the_object_path, full_out_path)) {
             try fs.cwd().copyFile(the_object_path, fs.cwd(), full_out_path, .{});
         }
-    } else {
+    } else outer: {
+        const use_zld = blk: {
+            if (self.base.options.is_native_os and self.base.options.system_linker_hack) {
+                // If the user forces the use of ld64, make sure we are running native!
+                break :blk false;
+            }
+
+            if (self.base.options.target.cpu.arch == .aarch64) {
+                // On aarch64, always use zld.
+                break :blk true;
+            }
+
+            if (self.base.options.link_libcpp or
+                self.base.options.output_mode == .Lib or
+                self.base.options.linker_script != null)
+            {
+                // Fallback to LLD in this handful of cases on x86_64 only.
+                break :blk false;
+            }
+
+            break :blk true;
+        };
+
+        if (use_zld) {
+            var zld = Zld.init(self.base.allocator);
+            defer zld.deinit();
+            zld.arch = target.cpu.arch;
+
+            var input_files = std.ArrayList([]const u8).init(self.base.allocator);
+            defer input_files.deinit();
+            // Positional arguments to the linker such as object files.
+            try input_files.appendSlice(self.base.options.objects);
+            for (comp.c_object_table.items()) |entry| {
+                try input_files.append(entry.key.status.success.object_path);
+            }
+            if (module_obj_path) |p| {
+                try input_files.append(p);
+            }
+            try input_files.append(comp.compiler_rt_static_lib.?.full_object_path);
+            // libc++ dep
+            if (self.base.options.link_libcpp) {
+                try input_files.append(comp.libcxxabi_static_lib.?.full_object_path);
+                try input_files.append(comp.libcxx_static_lib.?.full_object_path);
+            }
+
+            if (self.base.options.verbose_link) {
+                var argv = std.ArrayList([]const u8).init(self.base.allocator);
+                defer argv.deinit();
+
+                try argv.append("zig");
+                try argv.append("ld");
+
+                try argv.ensureCapacity(input_files.items.len);
+                for (input_files.items) |f| {
+                    argv.appendAssumeCapacity(f);
+                }
+
+                try argv.append("-o");
+                try argv.append(full_out_path);
+
+                Compilation.dump_argv(argv.items);
+            }
+
+            try zld.link(input_files.items, full_out_path);
+
+            break :outer;
+        }
+
         // Create an LLD command line and invoke it.
         var argv = std.ArrayList([]const u8).init(self.base.allocator);
         defer argv.deinit();
@@ -871,119 +971,6 @@ fn linkWithLLD(self: *MachO, comp: *Compilation) !void {
                     log.warn("unexpected LLD stderr:\n{s}", .{stderr});
                 }
             }
-
-            // At this stage, LLD has done its job. It is time to patch the resultant
-            // binaries up!
-            const out_file = try directory.handle.openFile(self.base.options.emit.?.sub_path, .{ .write = true });
-            try self.parseFromFile(out_file);
-
-            if (self.libsystem_cmd_index == null and self.header.?.filetype == macho.MH_EXECUTE) {
-                const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment;
-                const text_section = text_segment.sections.items[self.text_section_index.?];
-                const after_last_cmd_offset = self.header.?.sizeofcmds + @sizeOf(macho.mach_header_64);
-                const needed_size = padToIdeal(@sizeOf(macho.linkedit_data_command));
-
-                if (needed_size + after_last_cmd_offset > text_section.offset) {
-                    log.err("Unable to extend padding between the end of load commands and start of __text section.", .{});
-                    log.err("Re-run the linker with '-headerpad 0x{x}' option if available, or", .{needed_size});
-                    log.err("fall back to the system linker by exporting 'ZIG_SYSTEM_LINKER_HACK=1'.", .{});
-                    return error.NotEnoughPadding;
-                }
-
-                // Calculate next available dylib ordinal.
-                const next_ordinal = blk: {
-                    var ordinal: u32 = 1;
-                    for (self.load_commands.items) |cmd| {
-                        switch (cmd) {
-                            .Dylib => ordinal += 1,
-                            else => {},
-                        }
-                    }
-                    break :blk ordinal;
-                };
-
-                // Add load dylib load command
-                self.libsystem_cmd_index = @intCast(u16, self.load_commands.items.len);
-                const cmdsize = @intCast(u32, mem.alignForwardGeneric(
-                    u64,
-                    @sizeOf(macho.dylib_command) + mem.lenZ(LIB_SYSTEM_PATH),
-                    @sizeOf(u64),
-                ));
-                // TODO Find a way to work out runtime version from the OS version triple stored in std.Target.
-                // In the meantime, we're gonna hardcode to the minimum compatibility version of 0.0.0.
-                const min_version = 0x0;
-                var dylib_cmd = emptyGenericCommandWithData(macho.dylib_command{
-                    .cmd = macho.LC_LOAD_DYLIB,
-                    .cmdsize = cmdsize,
-                    .dylib = .{
-                        .name = @sizeOf(macho.dylib_command),
-                        .timestamp = 2, // not sure why not simply 0; this is reverse engineered from Mach-O files
-                        .current_version = min_version,
-                        .compatibility_version = min_version,
-                    },
-                });
-                dylib_cmd.data = try self.base.allocator.alloc(u8, cmdsize - dylib_cmd.inner.dylib.name);
-                mem.set(u8, dylib_cmd.data, 0);
-                mem.copy(u8, dylib_cmd.data, mem.spanZ(LIB_SYSTEM_PATH));
-                try self.load_commands.append(self.base.allocator, .{ .Dylib = dylib_cmd });
-                self.header_dirty = true;
-                self.load_commands_dirty = true;
-
-                if (self.symtab_cmd_index == null or self.dysymtab_cmd_index == null) {
-                    log.err("Incomplete Mach-O binary: no LC_SYMTAB or LC_DYSYMTAB load command found!", .{});
-                    log.err("Without the symbol table, it is not possible to patch up the binary for cross-compilation.", .{});
-                    return error.NoSymbolTableFound;
-                }
-
-                // Patch dyld info
-                try self.fixupBindInfo(next_ordinal);
-                try self.fixupLazyBindInfo(next_ordinal);
-
-                // Write updated load commands and the header
-                try self.writeLoadCommands();
-                try self.writeHeader();
-
-                assert(!self.header_dirty);
-                assert(!self.load_commands_dirty);
-            }
-            if (self.code_signature_cmd_index == null) outer: {
-                if (target.cpu.arch != .aarch64) break :outer; // This is currently needed only for aarch64 targets.
-                const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment;
-                const text_section = text_segment.sections.items[self.text_section_index.?];
-                const after_last_cmd_offset = self.header.?.sizeofcmds + @sizeOf(macho.mach_header_64);
-                const needed_size = padToIdeal(@sizeOf(macho.linkedit_data_command));
-
-                if (needed_size + after_last_cmd_offset > text_section.offset) {
-                    log.err("Unable to extend padding between the end of load commands and start of __text section.", .{});
-                    log.err("Re-run the linker with '-headerpad 0x{x}' option if available, or", .{needed_size});
-                    log.err("fall back to the system linker by exporting 'ZIG_SYSTEM_LINKER_HACK=1'.", .{});
-                    return error.NotEnoughPadding;
-                }
-
-                // Add code signature load command
-                self.code_signature_cmd_index = @intCast(u16, self.load_commands.items.len);
-                try self.load_commands.append(self.base.allocator, .{
-                    .LinkeditData = .{
-                        .cmd = macho.LC_CODE_SIGNATURE,
-                        .cmdsize = @sizeOf(macho.linkedit_data_command),
-                        .dataoff = 0,
-                        .datasize = 0,
-                    },
-                });
-                self.header_dirty = true;
-                self.load_commands_dirty = true;
-
-                // Pad out space for code signature
-                try self.writeCodeSignaturePadding();
-                // Write updated load commands and the header
-                try self.writeLoadCommands();
-                try self.writeHeader();
-                // Generate adhoc code signature
-                try self.writeCodeSignature();
-
-                assert(!self.header_dirty);
-                assert(!self.load_commands_dirty);
-            }
         }
     }
 
@@ -1019,14 +1006,14 @@ pub fn deinit(self: *MachO) void {
     if (self.d_sym) |*ds| {
         ds.deinit(self.base.allocator);
     }
-    for (self.extern_lazy_symbols.items()) |*entry| {
+    for (self.lazy_imports.items()) |*entry| {
         self.base.allocator.free(entry.key);
     }
-    self.extern_lazy_symbols.deinit(self.base.allocator);
-    for (self.extern_nonlazy_symbols.items()) |*entry| {
+    self.lazy_imports.deinit(self.base.allocator);
+    for (self.nonlazy_imports.items()) |*entry| {
         self.base.allocator.free(entry.key);
     }
-    self.extern_nonlazy_symbols.deinit(self.base.allocator);
+    self.nonlazy_imports.deinit(self.base.allocator);
     self.pie_fixups.deinit(self.base.allocator);
     self.stub_fixups.deinit(self.base.allocator);
     self.text_block_free_list.deinit(self.base.allocator);
@@ -1040,10 +1027,10 @@ pub fn deinit(self: *MachO) void {
     }
     self.string_table_directory.deinit(self.base.allocator);
     self.string_table.deinit(self.base.allocator);
-    self.global_symbols.deinit(self.base.allocator);
-    self.global_symbol_free_list.deinit(self.base.allocator);
-    self.local_symbols.deinit(self.base.allocator);
-    self.local_symbol_free_list.deinit(self.base.allocator);
+    self.globals.deinit(self.base.allocator);
+    self.globals_free_list.deinit(self.base.allocator);
+    self.locals.deinit(self.base.allocator);
+    self.locals_free_list.deinit(self.base.allocator);
     for (self.load_commands.items) |*lc| {
         lc.deinit(self.base.allocator);
     }
@@ -1098,7 +1085,7 @@ fn shrinkTextBlock(self: *MachO, text_block: *TextBlock, new_block_size: u64) vo
 }
 
 fn growTextBlock(self: *MachO, text_block: *TextBlock, new_block_size: u64, alignment: u64) !u64 {
-    const sym = self.local_symbols.items[text_block.local_sym_index];
+    const sym = self.locals.items[text_block.local_sym_index];
     const align_ok = mem.alignBackwardGeneric(u64, sym.n_value, alignment) == sym.n_value;
     const need_realloc = !align_ok or new_block_size > text_block.capacity(self.*);
     if (!need_realloc) return sym.n_value;
@@ -1108,34 +1095,41 @@ fn growTextBlock(self: *MachO, text_block: *TextBlock, new_block_size: u64, alig
 pub fn allocateDeclIndexes(self: *MachO, decl: *Module.Decl) !void {
     if (decl.link.macho.local_sym_index != 0) return;
 
-    try self.local_symbols.ensureCapacity(self.base.allocator, self.local_symbols.items.len + 1);
+    try self.locals.ensureCapacity(self.base.allocator, self.locals.items.len + 1);
     try self.offset_table.ensureCapacity(self.base.allocator, self.offset_table.items.len + 1);
 
-    if (self.local_symbol_free_list.popOrNull()) |i| {
+    if (self.locals_free_list.popOrNull()) |i| {
         log.debug("reusing symbol index {d} for {s}", .{ i, decl.name });
         decl.link.macho.local_sym_index = i;
     } else {
-        log.debug("allocating symbol index {d} for {s}", .{ self.local_symbols.items.len, decl.name });
-        decl.link.macho.local_sym_index = @intCast(u32, self.local_symbols.items.len);
-        _ = self.local_symbols.addOneAssumeCapacity();
+        log.debug("allocating symbol index {d} for {s}", .{ self.locals.items.len, decl.name });
+        decl.link.macho.local_sym_index = @intCast(u32, self.locals.items.len);
+        _ = self.locals.addOneAssumeCapacity();
     }
 
     if (self.offset_table_free_list.popOrNull()) |i| {
+        log.debug("reusing offset table entry index {d} for {s}", .{ i, decl.name });
         decl.link.macho.offset_table_index = i;
     } else {
+        log.debug("allocating offset table entry index {d} for {s}", .{ self.offset_table.items.len, decl.name });
         decl.link.macho.offset_table_index = @intCast(u32, self.offset_table.items.len);
         _ = self.offset_table.addOneAssumeCapacity();
         self.offset_table_count_dirty = true;
+        self.rebase_info_dirty = true;
     }
 
-    self.local_symbols.items[decl.link.macho.local_sym_index] = .{
+    self.locals.items[decl.link.macho.local_sym_index] = .{
         .n_strx = 0,
         .n_type = 0,
         .n_sect = 0,
         .n_desc = 0,
         .n_value = 0,
     };
-    self.offset_table.items[decl.link.macho.offset_table_index] = 0;
+    self.offset_table.items[decl.link.macho.offset_table_index] = .{
+        .kind = .Local,
+        .symbol = decl.link.macho.local_sym_index,
+        .index = decl.link.macho.offset_table_index,
+    };
 }
 
 pub fn updateDecl(self: *MachO, module: *Module, decl: *Module.Decl) !void {
@@ -1178,8 +1172,9 @@ pub fn updateDecl(self: *MachO, module: *Module, decl: *Module.Decl) !void {
         .externally_managed => |x| x,
         .appended => code_buffer.items,
         .fail => |em| {
-            // Clear any PIE fixups and stub fixups for this decl.
+            // Clear any PIE fixups for this decl.
             self.pie_fixups.shrinkRetainingCapacity(0);
+            // Clear any stub fixups for this decl.
             self.stub_fixups.shrinkRetainingCapacity(0);
             decl.analysis = .codegen_failure;
             try module.failed_decls.put(module.gpa, decl, em);
@@ -1189,7 +1184,7 @@ pub fn updateDecl(self: *MachO, module: *Module, decl: *Module.Decl) !void {
 
     const required_alignment = typed_value.ty.abiAlignment(self.base.options.target);
     assert(decl.link.macho.local_sym_index != 0); // Caller forgot to call allocateDeclIndexes()
-    const symbol = &self.local_symbols.items[decl.link.macho.local_sym_index];
+    const symbol = &self.locals.items[decl.link.macho.local_sym_index];
 
     if (decl.link.macho.size != 0) {
         const capacity = decl.link.macho.capacity(self.*);
@@ -1198,9 +1193,12 @@ pub fn updateDecl(self: *MachO, module: *Module, decl: *Module.Decl) !void {
             const vaddr = try self.growTextBlock(&decl.link.macho, code.len, required_alignment);
             log.debug("growing {s} from 0x{x} to 0x{x}", .{ decl.name, symbol.n_value, vaddr });
             if (vaddr != symbol.n_value) {
-                symbol.n_value = vaddr;
                 log.debug(" (writing new offset table entry)", .{});
-                self.offset_table.items[decl.link.macho.offset_table_index] = vaddr;
+                self.offset_table.items[decl.link.macho.offset_table_index] = .{
+                    .kind = .Local,
+                    .symbol = decl.link.macho.local_sym_index,
+                    .index = decl.link.macho.offset_table_index,
+                };
                 try self.writeOffsetTableEntry(decl.link.macho.offset_table_index);
             }
         } else if (code.len < decl.link.macho.size) {
@@ -1229,7 +1227,11 @@ pub fn updateDecl(self: *MachO, module: *Module, decl: *Module.Decl) !void {
             .n_desc = 0,
             .n_value = addr,
         };
-        self.offset_table.items[decl.link.macho.offset_table_index] = addr;
+        self.offset_table.items[decl.link.macho.offset_table_index] = .{
+            .kind = .Local,
+            .symbol = decl.link.macho.local_sym_index,
+            .index = decl.link.macho.offset_table_index,
+        };
 
         try self.writeLocalSymbol(decl.link.macho.local_sym_index);
         if (self.d_sym) |*ds|
@@ -1237,30 +1239,48 @@ pub fn updateDecl(self: *MachO, module: *Module, decl: *Module.Decl) !void {
         try self.writeOffsetTableEntry(decl.link.macho.offset_table_index);
     }
 
-    // Perform PIE fixups (if any)
-    const text_segment = &self.load_commands.items[self.text_segment_cmd_index.?].Segment;
-    const got_section = text_segment.sections.items[self.got_section_index.?];
+    // Calculate displacements to target addr (if any).
     while (self.pie_fixups.popOrNull()) |fixup| {
-        const target_addr = fixup.address;
-        const this_addr = symbol.n_value + fixup.start;
+        assert(fixup.size == 4);
+        const this_addr = symbol.n_value + fixup.offset;
+        const target_addr = fixup.target_addr;
+
         switch (self.base.options.target.cpu.arch) {
             .x86_64 => {
-                assert(target_addr >= this_addr + fixup.len);
-                const displacement = try math.cast(u32, target_addr - this_addr - fixup.len);
-                var placeholder = code_buffer.items[fixup.start + fixup.len - @sizeOf(u32) ..][0..@sizeOf(u32)];
-                mem.writeIntSliceLittle(u32, placeholder, displacement);
+                const displacement = try math.cast(u32, target_addr - this_addr - 4);
+                mem.writeIntLittle(u32, code_buffer.items[fixup.offset..][0..4], displacement);
             },
             .aarch64 => {
-                assert(target_addr >= this_addr);
-                const displacement = try math.cast(u27, target_addr - this_addr);
-                var placeholder = code_buffer.items[fixup.start..][0..fixup.len];
-                mem.writeIntSliceLittle(u32, placeholder, aarch64.Instruction.b(@as(i28, displacement)).toU32());
+                // TODO optimize instruction based on jump length (use ldr(literal) + nop if possible).
+                {
+                    const inst = code_buffer.items[fixup.offset..][0..4];
+                    var parsed = mem.bytesAsValue(meta.TagPayload(
+                        aarch64.Instruction,
+                        aarch64.Instruction.PCRelativeAddress,
+                    ), inst);
+                    const this_page = @intCast(i32, this_addr >> 12);
+                    const target_page = @intCast(i32, target_addr >> 12);
+                    const pages = @bitCast(u21, @intCast(i21, target_page - this_page));
+                    parsed.immhi = @truncate(u19, pages >> 2);
+                    parsed.immlo = @truncate(u2, pages);
+                }
+                {
+                    const inst = code_buffer.items[fixup.offset + 4 ..][0..4];
+                    var parsed = mem.bytesAsValue(meta.TagPayload(
+                        aarch64.Instruction,
+                        aarch64.Instruction.LoadStoreRegister,
+                    ), inst);
+                    const narrowed = @truncate(u12, target_addr);
+                    const offset = try math.divExact(u12, narrowed, 8);
+                    parsed.offset = offset;
+                }
             },
             else => unreachable, // unsupported target architecture
         }
     }
 
     // Resolve stubs (if any)
+    const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment;
     const stubs = text_segment.sections.items[self.stubs_section_index.?];
     for (self.stub_fixups.items) |fixup| {
         const stub_addr = stubs.addr + fixup.symbol * stubs.reserved2;
@@ -1285,9 +1305,6 @@ pub fn updateDecl(self: *MachO, module: *Module, decl: *Module.Decl) !void {
             try self.writeStubInStubHelper(fixup.symbol);
             try self.writeLazySymbolPointer(fixup.symbol);
 
-            const extern_sym = &self.extern_lazy_symbols.items()[fixup.symbol].value;
-            extern_sym.segment = self.data_segment_cmd_index.?;
-            extern_sym.offset = fixup.symbol * @sizeOf(u64);
             self.rebase_info_dirty = true;
             self.lazy_binding_info_dirty = true;
         }
@@ -1329,9 +1346,9 @@ pub fn updateDeclExports(
     const tracy = trace(@src());
     defer tracy.end();
 
-    try self.global_symbols.ensureCapacity(self.base.allocator, self.global_symbols.items.len + exports.len);
+    try self.globals.ensureCapacity(self.base.allocator, self.globals.items.len + exports.len);
     if (decl.link.macho.local_sym_index == 0) return;
-    const decl_sym = &self.local_symbols.items[decl.link.macho.local_sym_index];
+    const decl_sym = &self.locals.items[decl.link.macho.local_sym_index];
 
     for (exports) |exp| {
         if (exp.options.section) |section_name| {
@@ -1364,7 +1381,7 @@ pub fn updateDeclExports(
         };
         const n_type = decl_sym.n_type | macho.N_EXT;
         if (exp.link.macho.sym_index) |i| {
-            const sym = &self.global_symbols.items[i];
+            const sym = &self.globals.items[i];
             sym.* = .{
                 .n_strx = try self.updateString(sym.n_strx, exp.options.name),
                 .n_type = n_type,
@@ -1374,12 +1391,12 @@ pub fn updateDeclExports(
             };
         } else {
             const name_str_index = try self.makeString(exp.options.name);
-            const i = if (self.global_symbol_free_list.popOrNull()) |i| i else blk: {
-                _ = self.global_symbols.addOneAssumeCapacity();
+            const i = if (self.globals_free_list.popOrNull()) |i| i else blk: {
+                _ = self.globals.addOneAssumeCapacity();
                 self.export_info_dirty = true;
-                break :blk self.global_symbols.items.len - 1;
+                break :blk self.globals.items.len - 1;
             };
-            self.global_symbols.items[i] = .{
+            self.globals.items[i] = .{
                 .n_strx = name_str_index,
                 .n_type = n_type,
                 .n_sect = @intCast(u8, self.text_section_index.?) + 1,
@@ -1394,18 +1411,18 @@ pub fn updateDeclExports(
 
 pub fn deleteExport(self: *MachO, exp: Export) void {
     const sym_index = exp.sym_index orelse return;
-    self.global_symbol_free_list.append(self.base.allocator, sym_index) catch {};
-    self.global_symbols.items[sym_index].n_type = 0;
+    self.globals_free_list.append(self.base.allocator, sym_index) catch {};
+    self.globals.items[sym_index].n_type = 0;
 }
 
 pub fn freeDecl(self: *MachO, decl: *Module.Decl) void {
     // Appending to free lists is allowed to fail because the free lists are heuristics based anyway.
     self.freeTextBlock(&decl.link.macho);
     if (decl.link.macho.local_sym_index != 0) {
-        self.local_symbol_free_list.append(self.base.allocator, decl.link.macho.local_sym_index) catch {};
+        self.locals_free_list.append(self.base.allocator, decl.link.macho.local_sym_index) catch {};
         self.offset_table_free_list.append(self.base.allocator, decl.link.macho.offset_table_index) catch {};
 
-        self.local_symbols.items[decl.link.macho.local_sym_index].n_type = 0;
+        self.locals.items[decl.link.macho.local_sym_index].n_type = 0;
 
         decl.link.macho.local_sym_index = 0;
     }
@@ -1413,7 +1430,7 @@ pub fn freeDecl(self: *MachO, decl: *Module.Decl) void {
 
 pub fn getDeclVAddr(self: *MachO, decl: *const Module.Decl) u64 {
     assert(decl.link.macho.local_sym_index != 0);
-    return self.local_symbols.items[decl.link.macho.local_sym_index].n_value;
+    return self.locals.items[decl.link.macho.local_sym_index].n_value;
 }
 
 pub fn populateMissingMetadata(self: *MachO) !void {
@@ -1553,39 +1570,6 @@ pub fn populateMissingMetadata(self: *MachO) !void {
         self.header_dirty = true;
         self.load_commands_dirty = true;
     }
-    if (self.got_section_index == null) {
-        const text_segment = &self.load_commands.items[self.text_segment_cmd_index.?].Segment;
-        self.got_section_index = @intCast(u16, text_segment.sections.items.len);
-
-        const alignment: u2 = switch (self.base.options.target.cpu.arch) {
-            .x86_64 => 0,
-            .aarch64 => 2,
-            else => unreachable, // unhandled architecture type
-        };
-        const flags = macho.S_REGULAR | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS;
-        const needed_size = @sizeOf(u64) * self.base.options.symbol_count_hint;
-        const off = text_segment.findFreeSpace(needed_size, @alignOf(u64), self.header_pad);
-        assert(off + needed_size <= text_segment.inner.fileoff + text_segment.inner.filesize); // TODO Must expand __TEXT segment.
-
-        log.debug("found __ziggot section free space 0x{x} to 0x{x}", .{ off, off + needed_size });
-
-        try text_segment.addSection(self.base.allocator, .{
-            .sectname = makeStaticString("__ziggot"),
-            .segname = makeStaticString("__TEXT"),
-            .addr = text_segment.inner.vmaddr + off,
-            .size = needed_size,
-            .offset = @intCast(u32, off),
-            .@"align" = alignment,
-            .reloff = 0,
-            .nreloc = 0,
-            .flags = flags,
-            .reserved1 = 0,
-            .reserved2 = 0,
-            .reserved3 = 0,
-        });
-        self.header_dirty = true;
-        self.load_commands_dirty = true;
-    }
     if (self.stubs_section_index == null) {
         const text_segment = &self.load_commands.items[self.text_segment_cmd_index.?].Segment;
         self.stubs_section_index = @intCast(u16, text_segment.sections.items.len);
@@ -1597,7 +1581,7 @@ pub fn populateMissingMetadata(self: *MachO) !void {
         };
         const stub_size: u4 = switch (self.base.options.target.cpu.arch) {
             .x86_64 => 6,
-            .aarch64 => 2 * @sizeOf(u32),
+            .aarch64 => 3 * @sizeOf(u32),
             else => unreachable, // unhandled architecture type
         };
         const flags = macho.S_SYMBOL_STUBS | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS;
@@ -1686,9 +1670,9 @@ pub fn populateMissingMetadata(self: *MachO) !void {
         self.header_dirty = true;
         self.load_commands_dirty = true;
     }
-    if (self.data_got_section_index == null) {
+    if (self.got_section_index == null) {
         const dc_segment = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment;
-        self.data_got_section_index = @intCast(u16, dc_segment.sections.items.len);
+        self.got_section_index = @intCast(u16, dc_segment.sections.items.len);
 
         const flags = macho.S_NON_LAZY_SYMBOL_POINTERS;
         const needed_size = @sizeOf(u64) * self.base.options.symbol_count_hint;
@@ -2060,12 +2044,12 @@ pub fn populateMissingMetadata(self: *MachO) !void {
         self.header_dirty = true;
         self.load_commands_dirty = true;
     }
-    if (!self.extern_nonlazy_symbols.contains("dyld_stub_binder")) {
-        const index = @intCast(u32, self.extern_nonlazy_symbols.items().len);
+    if (!self.nonlazy_imports.contains("dyld_stub_binder")) {
+        const index = @intCast(u32, self.nonlazy_imports.items().len);
         const name = try self.base.allocator.dupe(u8, "dyld_stub_binder");
         const offset = try self.makeString("dyld_stub_binder");
-        try self.extern_nonlazy_symbols.putNoClobber(self.base.allocator, name, .{
-            .inner = .{
+        try self.nonlazy_imports.putNoClobber(self.base.allocator, name, .{
+            .symbol = .{
                 .n_strx = offset,
                 .n_type = std.macho.N_UNDF | std.macho.N_EXT,
                 .n_sect = 0,
@@ -2073,68 +2057,19 @@ pub fn populateMissingMetadata(self: *MachO) !void {
                 .n_value = 0,
             },
             .dylib_ordinal = 1, // TODO this is currently hardcoded.
-            .segment = self.data_const_segment_cmd_index.?,
-            .offset = index * @sizeOf(u64),
+            .index = index,
         });
+        const off_index = @intCast(u32, self.offset_table.items.len);
+        try self.offset_table.append(self.base.allocator, .{
+            .kind = .Extern,
+            .symbol = index,
+            .index = off_index,
+        });
+        try self.writeOffsetTableEntry(off_index);
         self.binding_info_dirty = true;
     }
     if (self.stub_helper_stubs_start_off == null) {
-        const text_segment = &self.load_commands.items[self.text_segment_cmd_index.?].Segment;
-        const stub_helper = &text_segment.sections.items[self.stub_helper_section_index.?];
-        const data_segment = &self.load_commands.items[self.data_segment_cmd_index.?].Segment;
-        const data = &data_segment.sections.items[self.data_section_index.?];
-        const data_const_segment = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment;
-        const got = &data_const_segment.sections.items[self.data_got_section_index.?];
-        switch (self.base.options.target.cpu.arch) {
-            .x86_64 => {
-                const code_size = 15;
-                var code: [code_size]u8 = undefined;
-                // lea %r11, [rip + disp]
-                code[0] = 0x4c;
-                code[1] = 0x8d;
-                code[2] = 0x1d;
-                {
-                    const displacement = try math.cast(u32, data.addr - stub_helper.addr - 7);
-                    mem.writeIntLittle(u32, code[3..7], displacement);
-                }
-                // push %r11
-                code[7] = 0x41;
-                code[8] = 0x53;
-                // jmp [rip + disp]
-                code[9] = 0xff;
-                code[10] = 0x25;
-                {
-                    const displacement = try math.cast(u32, got.addr - stub_helper.addr - code_size);
-                    mem.writeIntLittle(u32, code[11..], displacement);
-                }
-                self.stub_helper_stubs_start_off = stub_helper.offset + code_size;
-                try self.base.file.?.pwriteAll(&code, stub_helper.offset);
-            },
-            .aarch64 => {
-                var code: [4 * @sizeOf(u32)]u8 = undefined;
-                {
-                    const displacement = try math.cast(i21, data.addr - stub_helper.addr);
-                    mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.adr(.x17, displacement).toU32());
-                }
-                mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.stp(
-                    .x16,
-                    .x17,
-                    aarch64.Register.sp,
-                    aarch64.Instruction.LoadStorePairOffset.pre_index(-16),
-                ).toU32());
-                {
-                    const displacement = try math.divExact(u64, got.addr - stub_helper.addr - 2 * @sizeOf(u32), 4);
-                    const literal = try math.cast(u19, displacement);
-                    mem.writeIntLittle(u32, code[8..12], aarch64.Instruction.ldr(.x16, .{
-                        .literal = literal,
-                    }).toU32());
-                }
-                mem.writeIntLittle(u32, code[12..16], aarch64.Instruction.br(.x16).toU32());
-                self.stub_helper_stubs_start_off = stub_helper.offset + 4 * @sizeOf(u32);
-                try self.base.file.?.pwriteAll(&code, stub_helper.offset);
-            },
-            else => unreachable,
-        }
+        try self.writeStubHelperPreamble();
     }
 }
 
@@ -2159,7 +2094,7 @@ fn allocateTextBlock(self: *MachO, text_block: *TextBlock, new_block_size: u64,
             const big_block = self.text_block_free_list.items[i];
             // We now have a pointer to a live text block that has too much capacity.
             // Is it enough that we could fit this new text block?
-            const sym = self.local_symbols.items[big_block.local_sym_index];
+            const sym = self.locals.items[big_block.local_sym_index];
             const capacity = big_block.capacity(self.*);
             const ideal_capacity = padToIdeal(capacity);
             const ideal_capacity_end_vaddr = sym.n_value + ideal_capacity;
@@ -2190,7 +2125,7 @@ fn allocateTextBlock(self: *MachO, text_block: *TextBlock, new_block_size: u64,
             }
             break :blk new_start_vaddr;
         } else if (self.last_text_block) |last| {
-            const last_symbol = self.local_symbols.items[last.local_sym_index];
+            const last_symbol = self.locals.items[last.local_sym_index];
             // TODO We should pad out the excess capacity with NOPs. For executables,
             // no padding seems to be OK, but it will probably not be for objects.
             const ideal_capacity = padToIdeal(last.size);
@@ -2288,12 +2223,12 @@ fn updateString(self: *MachO, old_str_off: u32, new_name: []const u8) !u32 {
 }
 
 pub fn addExternSymbol(self: *MachO, name: []const u8) !u32 {
-    const index = @intCast(u32, self.extern_lazy_symbols.items().len);
+    const index = @intCast(u32, self.lazy_imports.items().len);
     const offset = try self.makeString(name);
     const sym_name = try self.base.allocator.dupe(u8, name);
     const dylib_ordinal = 1; // TODO this is now hardcoded, since we only support libSystem.
-    try self.extern_lazy_symbols.putNoClobber(self.base.allocator, sym_name, .{
-        .inner = .{
+    try self.lazy_imports.putNoClobber(self.base.allocator, sym_name, .{
+        .symbol = .{
             .n_strx = offset,
             .n_type = macho.N_UNDF | macho.N_EXT,
             .n_sect = 0,
@@ -2301,6 +2236,7 @@ pub fn addExternSymbol(self: *MachO, name: []const u8) !u32 {
             .n_value = 0,
         },
         .dylib_ordinal = dylib_ordinal,
+        .index = index,
     });
     log.debug("adding new extern symbol '{s}' with dylib ordinal '{}'", .{ name, dylib_ordinal });
     return index;
@@ -2459,41 +2395,29 @@ fn findFreeSpaceLinkedit(self: *MachO, object_size: u64, min_alignment: u16, sta
 }
 
 fn writeOffsetTableEntry(self: *MachO, index: usize) !void {
-    const text_segment = &self.load_commands.items[self.text_segment_cmd_index.?].Segment;
-    const sect = &text_segment.sections.items[self.got_section_index.?];
+    const seg = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment;
+    const sect = &seg.sections.items[self.got_section_index.?];
     const off = sect.offset + @sizeOf(u64) * index;
-    const vmaddr = sect.addr + @sizeOf(u64) * index;
 
     if (self.offset_table_count_dirty) {
         // TODO relocate.
         self.offset_table_count_dirty = false;
     }
 
-    var code: [8]u8 = undefined;
-    switch (self.base.options.target.cpu.arch) {
-        .x86_64 => {
-            const pos_symbol_off = try math.cast(u31, vmaddr - self.offset_table.items[index] + 7);
-            const symbol_off = @bitCast(u32, @as(i32, pos_symbol_off) * -1);
-            // lea %rax, [rip - disp]
-            code[0] = 0x48;
-            code[1] = 0x8D;
-            code[2] = 0x5;
-            mem.writeIntLittle(u32, code[3..7], symbol_off);
-            // ret
-            code[7] = 0xC3;
-        },
-        .aarch64 => {
-            const pos_symbol_off = try math.cast(u20, vmaddr - self.offset_table.items[index]);
-            const symbol_off = @as(i21, pos_symbol_off) * -1;
-            // adr x0, #-disp
-            mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.adr(.x0, symbol_off).toU32());
-            // ret x28
-            mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.ret(.x28).toU32());
-        },
-        else => unreachable, // unsupported target architecture
-    }
-    log.debug("writing offset table entry 0x{x} at 0x{x}", .{ self.offset_table.items[index], off });
-    try self.base.file.?.pwriteAll(&code, off);
+    const got_entry = self.offset_table.items[index];
+    const sym = blk: {
+        switch (got_entry.kind) {
+            .Local => {
+                break :blk self.locals.items[got_entry.symbol];
+            },
+            .Extern => {
+                break :blk self.nonlazy_imports.items()[got_entry.symbol].value.symbol;
+            },
+        }
+    };
+    const sym_name = self.getString(sym.n_strx);
+    log.debug("writing offset table entry [ 0x{x} => 0x{x} ({s}) ]", .{ off, sym.n_value, sym_name });
+    try self.base.file.?.pwriteAll(mem.asBytes(&sym.n_value), off);
 }
 
 fn writeLazySymbolPointer(self: *MachO, index: u32) !void {
@@ -2516,6 +2440,133 @@ fn writeLazySymbolPointer(self: *MachO, index: u32) !void {
     try self.base.file.?.pwriteAll(&buf, off);
 }
 
+fn writeStubHelperPreamble(self: *MachO) !void {
+    const text_segment = &self.load_commands.items[self.text_segment_cmd_index.?].Segment;
+    const stub_helper = &text_segment.sections.items[self.stub_helper_section_index.?];
+    const data_const_segment = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment;
+    const got = &data_const_segment.sections.items[self.got_section_index.?];
+    const data_segment = &self.load_commands.items[self.data_segment_cmd_index.?].Segment;
+    const data = &data_segment.sections.items[self.data_section_index.?];
+
+    switch (self.base.options.target.cpu.arch) {
+        .x86_64 => {
+            const code_size = 15;
+            var code: [code_size]u8 = undefined;
+            // lea %r11, [rip + disp]
+            code[0] = 0x4c;
+            code[1] = 0x8d;
+            code[2] = 0x1d;
+            {
+                const target_addr = data.addr;
+                const displacement = try math.cast(u32, target_addr - stub_helper.addr - 7);
+                mem.writeIntLittle(u32, code[3..7], displacement);
+            }
+            // push %r11
+            code[7] = 0x41;
+            code[8] = 0x53;
+            // jmp [rip + disp]
+            code[9] = 0xff;
+            code[10] = 0x25;
+            {
+                const displacement = try math.cast(u32, got.addr - stub_helper.addr - code_size);
+                mem.writeIntLittle(u32, code[11..], displacement);
+            }
+            try self.base.file.?.pwriteAll(&code, stub_helper.offset);
+            self.stub_helper_stubs_start_off = stub_helper.offset + code_size;
+        },
+        .aarch64 => {
+            var code: [6 * @sizeOf(u32)]u8 = undefined;
+
+            data_blk_outer: {
+                const this_addr = stub_helper.addr;
+                const target_addr = data.addr;
+                data_blk: {
+                    const displacement = math.cast(i21, target_addr - this_addr) catch |_| break :data_blk;
+                    // adr x17, disp
+                    mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.adr(.x17, displacement).toU32());
+                    // nop
+                    mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.nop().toU32());
+                    break :data_blk_outer;
+                }
+                data_blk: {
+                    const new_this_addr = this_addr + @sizeOf(u32);
+                    const displacement = math.cast(i21, target_addr - new_this_addr) catch |_| break :data_blk;
+                    // nop
+                    mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.nop().toU32());
+                    // adr x17, disp
+                    mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.adr(.x17, displacement).toU32());
+                    break :data_blk_outer;
+                }
+                // Jump is too big, replace adr with adrp and add.
+                const this_page = @intCast(i32, this_addr >> 12);
+                const target_page = @intCast(i32, target_addr >> 12);
+                const pages = @intCast(i21, target_page - this_page);
+                // adrp x17, pages
+                mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.adrp(.x17, pages).toU32());
+                const narrowed = @truncate(u12, target_addr);
+                mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.add(.x17, .x17, narrowed, false).toU32());
+            }
+
+            // stp x16, x17, [sp, #-16]!
+            mem.writeIntLittle(u32, code[8..12], aarch64.Instruction.stp(
+                .x16,
+                .x17,
+                aarch64.Register.sp,
+                aarch64.Instruction.LoadStorePairOffset.pre_index(-16),
+            ).toU32());
+
+            binder_blk_outer: {
+                const this_addr = stub_helper.addr + 3 * @sizeOf(u32);
+                const target_addr = got.addr;
+                binder_blk: {
+                    const displacement = math.divExact(u64, target_addr - this_addr, 4) catch |_| break :binder_blk;
+                    const literal = math.cast(u18, displacement) catch |_| break :binder_blk;
+                    // ldr x16, label
+                    mem.writeIntLittle(u32, code[12..16], aarch64.Instruction.ldr(.x16, .{
+                        .literal = literal,
+                    }).toU32());
+                    // nop
+                    mem.writeIntLittle(u32, code[16..20], aarch64.Instruction.nop().toU32());
+                    break :binder_blk_outer;
+                }
+                binder_blk: {
+                    const new_this_addr = this_addr + @sizeOf(u32);
+                    const displacement = math.divExact(u64, target_addr - new_this_addr, 4) catch |_| break :binder_blk;
+                    const literal = math.cast(u18, displacement) catch |_| break :binder_blk;
+                    // nop
+                    mem.writeIntLittle(u32, code[12..16], aarch64.Instruction.nop().toU32());
+                    // ldr x16, label
+                    mem.writeIntLittle(u32, code[16..20], aarch64.Instruction.ldr(.x16, .{
+                        .literal = literal,
+                    }).toU32());
+                    break :binder_blk_outer;
+                }
+                // Jump is too big, replace ldr with adrp and ldr(register).
+                const this_page = @intCast(i32, this_addr >> 12);
+                const target_page = @intCast(i32, target_addr >> 12);
+                const pages = @intCast(i21, target_page - this_page);
+                // adrp x16, pages
+                mem.writeIntLittle(u32, code[12..16], aarch64.Instruction.adrp(.x16, pages).toU32());
+                const narrowed = @truncate(u12, target_addr);
+                const offset = try math.divExact(u12, narrowed, 8);
+                // ldr x16, x16, offset
+                mem.writeIntLittle(u32, code[16..20], aarch64.Instruction.ldr(.x16, .{
+                    .register = .{
+                        .rn = .x16,
+                        .offset = aarch64.Instruction.LoadStoreOffset.imm(offset),
+                    },
+                }).toU32());
+            }
+
+            // br x16
+            mem.writeIntLittle(u32, code[20..24], aarch64.Instruction.br(.x16).toU32());
+            try self.base.file.?.pwriteAll(&code, stub_helper.offset);
+            self.stub_helper_stubs_start_off = stub_helper.offset + code.len;
+        },
+        else => unreachable,
+    }
+}
+
 fn writeStub(self: *MachO, index: u32) !void {
     const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment;
     const stubs = text_segment.sections.items[self.stubs_section_index.?];
@@ -2525,9 +2576,12 @@ fn writeStub(self: *MachO, index: u32) !void {
     const stub_off = stubs.offset + index * stubs.reserved2;
     const stub_addr = stubs.addr + index * stubs.reserved2;
     const la_ptr_addr = la_symbol_ptr.addr + index * @sizeOf(u64);
+
     log.debug("writing stub at 0x{x}", .{stub_off});
+
     var code = try self.base.allocator.alloc(u8, stubs.reserved2);
     defer self.base.allocator.free(code);
+
     switch (self.base.options.target.cpu.arch) {
         .x86_64 => {
             assert(la_ptr_addr >= stub_addr + stubs.reserved2);
@@ -2539,12 +2593,50 @@ fn writeStub(self: *MachO, index: u32) !void {
         },
         .aarch64 => {
             assert(la_ptr_addr >= stub_addr);
-            const displacement = try math.divExact(u64, la_ptr_addr - stub_addr, 4);
-            const literal = try math.cast(u19, displacement);
-            mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.ldr(.x16, .{
-                .literal = literal,
-            }).toU32());
-            mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.br(.x16).toU32());
+            outer: {
+                const this_addr = stub_addr;
+                const target_addr = la_ptr_addr;
+                inner: {
+                    const displacement = math.divExact(u64, target_addr - this_addr, 4) catch |_| break :inner;
+                    const literal = math.cast(u18, displacement) catch |_| break :inner;
+                    // ldr x16, literal
+                    mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.ldr(.x16, .{
+                        .literal = literal,
+                    }).toU32());
+                    // nop
+                    mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.nop().toU32());
+                    break :outer;
+                }
+                inner: {
+                    const new_this_addr = this_addr + @sizeOf(u32);
+                    const displacement = math.divExact(u64, target_addr - new_this_addr, 4) catch |_| break :inner;
+                    const literal = math.cast(u18, displacement) catch |_| break :inner;
+                    // nop
+                    mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.nop().toU32());
+                    // ldr x16, literal
+                    mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.ldr(.x16, .{
+                        .literal = literal,
+                    }).toU32());
+                    break :outer;
+                }
+                // Use adrp followed by ldr(register).
+                const this_page = @intCast(i32, this_addr >> 12);
+                const target_page = @intCast(i32, target_addr >> 12);
+                const pages = @intCast(i21, target_page - this_page);
+                // adrp x16, pages
+                mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.adrp(.x16, pages).toU32());
+                const narrowed = @truncate(u12, target_addr);
+                const offset = try math.divExact(u12, narrowed, 8);
+                // ldr x16, x16, offset
+                mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.ldr(.x16, .{
+                    .register = .{
+                        .rn = .x16,
+                        .offset = aarch64.Instruction.LoadStoreOffset.imm(offset),
+                    },
+                }).toU32());
+            }
+            // br x16
+            mem.writeIntLittle(u32, code[8..12], aarch64.Instruction.br(.x16).toU32());
         },
         else => unreachable,
     }
@@ -2561,8 +2653,10 @@ fn writeStubInStubHelper(self: *MachO, index: u32) !void {
         else => unreachable,
     };
     const stub_off = self.stub_helper_stubs_start_off.? + index * stub_size;
+
     var code = try self.base.allocator.alloc(u8, stub_size);
     defer self.base.allocator.free(code);
+
     switch (self.base.options.target.cpu.arch) {
         .x86_64 => {
             const displacement = try math.cast(
@@ -2577,12 +2671,19 @@ fn writeStubInStubHelper(self: *MachO, index: u32) !void {
             mem.writeIntLittle(u32, code[6..][0..4], @bitCast(u32, displacement));
         },
         .aarch64 => {
-            const displacement = try math.cast(i28, @intCast(i64, stub_helper.offset) - @intCast(i64, stub_off) - 4);
+            const literal = blk: {
+                const div_res = try math.divExact(u64, stub_size - @sizeOf(u32), 4);
+                break :blk try math.cast(u18, div_res);
+            };
+            // ldr w16, literal
             mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.ldr(.w16, .{
-                .literal = @divExact(stub_size - @sizeOf(u32), 4),
+                .literal = literal,
             }).toU32());
+            const displacement = try math.cast(i28, @intCast(i64, stub_helper.offset) - @intCast(i64, stub_off) - 4);
+            // b disp
             mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.b(displacement).toU32());
-            mem.writeIntLittle(u32, code[8..12], 0x0); // Just a placeholder populated in `populateLazyBindOffsetsInStubHelper`.
+            // Just a placeholder populated in `populateLazyBindOffsetsInStubHelper`.
+            mem.writeIntLittle(u32, code[8..12], 0x0);
         },
         else => unreachable,
     }
@@ -2591,9 +2692,9 @@ fn writeStubInStubHelper(self: *MachO, index: u32) !void {
 
 fn relocateSymbolTable(self: *MachO) !void {
     const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab;
-    const nlocals = self.local_symbols.items.len;
-    const nglobals = self.global_symbols.items.len;
-    const nundefs = self.extern_lazy_symbols.items().len + self.extern_nonlazy_symbols.items().len;
+    const nlocals = self.locals.items.len;
+    const nglobals = self.globals.items.len;
+    const nundefs = self.lazy_imports.items().len + self.nonlazy_imports.items().len;
     const nsyms = nlocals + nglobals + nundefs;
 
     if (symtab.nsyms < nsyms) {
@@ -2628,7 +2729,7 @@ fn writeLocalSymbol(self: *MachO, index: usize) !void {
     const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab;
     const off = symtab.symoff + @sizeOf(macho.nlist_64) * index;
     log.debug("writing local symbol {} at 0x{x}", .{ index, off });
-    try self.base.file.?.pwriteAll(mem.asBytes(&self.local_symbols.items[index]), off);
+    try self.base.file.?.pwriteAll(mem.asBytes(&self.locals.items[index]), off);
 }
 
 fn writeAllGlobalAndUndefSymbols(self: *MachO) !void {
@@ -2637,18 +2738,18 @@ fn writeAllGlobalAndUndefSymbols(self: *MachO) !void {
 
     try self.relocateSymbolTable();
     const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab;
-    const nlocals = self.local_symbols.items.len;
-    const nglobals = self.global_symbols.items.len;
+    const nlocals = self.locals.items.len;
+    const nglobals = self.globals.items.len;
 
-    const nundefs = self.extern_lazy_symbols.items().len + self.extern_nonlazy_symbols.items().len;
+    const nundefs = self.lazy_imports.items().len + self.nonlazy_imports.items().len;
     var undefs = std.ArrayList(macho.nlist_64).init(self.base.allocator);
     defer undefs.deinit();
     try undefs.ensureCapacity(nundefs);
-    for (self.extern_lazy_symbols.items()) |entry| {
-        undefs.appendAssumeCapacity(entry.value.inner);
+    for (self.lazy_imports.items()) |entry| {
+        undefs.appendAssumeCapacity(entry.value.symbol);
     }
-    for (self.extern_nonlazy_symbols.items()) |entry| {
-        undefs.appendAssumeCapacity(entry.value.inner);
+    for (self.nonlazy_imports.items()) |entry| {
+        undefs.appendAssumeCapacity(entry.value.symbol);
     }
 
     const locals_off = symtab.symoff;
@@ -2657,7 +2758,7 @@ fn writeAllGlobalAndUndefSymbols(self: *MachO) !void {
     const globals_off = locals_off + locals_size;
     const globals_size = nglobals * @sizeOf(macho.nlist_64);
     log.debug("writing global symbols from 0x{x} to 0x{x}", .{ globals_off, globals_size + globals_off });
-    try self.base.file.?.pwriteAll(mem.sliceAsBytes(self.global_symbols.items), globals_off);
+    try self.base.file.?.pwriteAll(mem.sliceAsBytes(self.globals.items), globals_off);
 
     const undefs_off = globals_off + globals_size;
     const undefs_size = nundefs * @sizeOf(macho.nlist_64);
@@ -2683,15 +2784,15 @@ fn writeIndirectSymbolTable(self: *MachO) !void {
     const text_segment = &self.load_commands.items[self.text_segment_cmd_index.?].Segment;
     const stubs = &text_segment.sections.items[self.stubs_section_index.?];
     const data_const_seg = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment;
-    const got = &data_const_seg.sections.items[self.data_got_section_index.?];
+    const got = &data_const_seg.sections.items[self.got_section_index.?];
     const data_segment = &self.load_commands.items[self.data_segment_cmd_index.?].Segment;
     const la_symbol_ptr = &data_segment.sections.items[self.la_symbol_ptr_section_index.?];
     const dysymtab = &self.load_commands.items[self.dysymtab_cmd_index.?].Dysymtab;
 
-    const lazy = self.extern_lazy_symbols.items();
-    const nonlazy = self.extern_nonlazy_symbols.items();
+    const lazy = self.lazy_imports.items();
+    const got_entries = self.offset_table.items;
     const allocated_size = self.allocatedSizeLinkedit(dysymtab.indirectsymoff);
-    const nindirectsyms = @intCast(u32, lazy.len * 2 + nonlazy.len);
+    const nindirectsyms = @intCast(u32, lazy.len * 2 + got_entries.len);
     const needed_size = @intCast(u32, nindirectsyms * @sizeOf(u32));
 
     if (needed_size > allocated_size) {
@@ -2710,20 +2811,27 @@ fn writeIndirectSymbolTable(self: *MachO) !void {
     var writer = stream.writer();
 
     stubs.reserved1 = 0;
-    for (self.extern_lazy_symbols.items()) |_, i| {
+    for (lazy) |_, i| {
         const symtab_idx = @intCast(u32, dysymtab.iundefsym + i);
         try writer.writeIntLittle(u32, symtab_idx);
     }
 
     const base_id = @intCast(u32, lazy.len);
     got.reserved1 = base_id;
-    for (self.extern_nonlazy_symbols.items()) |_, i| {
-        const symtab_idx = @intCast(u32, dysymtab.iundefsym + i + base_id);
-        try writer.writeIntLittle(u32, symtab_idx);
+    for (got_entries) |entry| {
+        switch (entry.kind) {
+            .Local => {
+                try writer.writeIntLittle(u32, macho.INDIRECT_SYMBOL_LOCAL);
+            },
+            .Extern => {
+                const symtab_idx = @intCast(u32, dysymtab.iundefsym + entry.index + base_id);
+                try writer.writeIntLittle(u32, symtab_idx);
+            },
+        }
     }
 
-    la_symbol_ptr.reserved1 = got.reserved1 + @intCast(u32, nonlazy.len);
-    for (self.extern_lazy_symbols.items()) |_, i| {
+    la_symbol_ptr.reserved1 = got.reserved1 + @intCast(u32, got_entries.len);
+    for (lazy) |_, i| {
         const symtab_idx = @intCast(u32, dysymtab.iundefsym + i);
         try writer.writeIntLittle(u32, symtab_idx);
     }
@@ -2789,7 +2897,7 @@ fn writeCodeSignature(self: *MachO) !void {
 
 fn writeExportTrie(self: *MachO) !void {
     if (!self.export_info_dirty) return;
-    if (self.global_symbols.items.len == 0) return;
+    if (self.globals.items.len == 0) return;
 
     const tracy = trace(@src());
     defer tracy.end();
@@ -2798,7 +2906,7 @@ fn writeExportTrie(self: *MachO) !void {
     defer trie.deinit();
 
     const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment;
-    for (self.global_symbols.items) |symbol| {
+    for (self.globals.items) |symbol| {
         // TODO figure out if we should put all global symbols into the export trie
         const name = self.getString(symbol.n_strx);
         assert(symbol.n_value >= text_segment.inner.vmaddr);
@@ -2840,14 +2948,48 @@ fn writeRebaseInfoTable(self: *MachO) !void {
     const tracy = trace(@src());
     defer tracy.end();
 
-    const size = try rebaseInfoSize(self.extern_lazy_symbols.items());
+    var pointers = std.ArrayList(bind.Pointer).init(self.base.allocator);
+    defer pointers.deinit();
+
+    if (self.got_section_index) |idx| {
+        const seg = self.load_commands.items[self.data_const_segment_cmd_index.?].Segment;
+        const sect = seg.sections.items[idx];
+        const base_offset = sect.addr - seg.inner.vmaddr;
+        const segment_id = self.data_const_segment_cmd_index.?;
+
+        for (self.offset_table.items) |entry| {
+            if (entry.kind == .Extern) continue;
+            try pointers.append(.{
+                .offset = base_offset + entry.index * @sizeOf(u64),
+                .segment_id = segment_id,
+            });
+        }
+    }
+
+    if (self.la_symbol_ptr_section_index) |idx| {
+        try pointers.ensureCapacity(pointers.items.len + self.lazy_imports.items().len);
+        const seg = self.load_commands.items[self.data_segment_cmd_index.?].Segment;
+        const sect = seg.sections.items[idx];
+        const base_offset = sect.addr - seg.inner.vmaddr;
+        const segment_id = self.data_segment_cmd_index.?;
+
+        for (self.lazy_imports.items()) |entry| {
+            pointers.appendAssumeCapacity(.{
+                .offset = base_offset + entry.value.index * @sizeOf(u64),
+                .segment_id = segment_id,
+            });
+        }
+    }
+
+    std.sort.sort(bind.Pointer, pointers.items, {}, bind.pointerCmp);
+
+    const size = try bind.rebaseInfoSize(pointers.items);
     var buffer = try self.base.allocator.alloc(u8, @intCast(usize, size));
     defer self.base.allocator.free(buffer);
 
     var stream = std.io.fixedBufferStream(buffer);
-    try writeRebaseInfo(self.extern_lazy_symbols.items(), stream.writer());
+    try bind.writeRebaseInfo(pointers.items, stream.writer());
 
-    const linkedit_segment = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment;
     const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly;
     const allocated_size = self.allocatedSizeLinkedit(dyld_info.rebase_off);
     const needed_size = mem.alignForwardGeneric(u64, buffer.len, @alignOf(u64));
@@ -2872,14 +3014,34 @@ fn writeBindingInfoTable(self: *MachO) !void {
     const tracy = trace(@src());
     defer tracy.end();
 
-    const size = try bindInfoSize(self.extern_nonlazy_symbols.items());
+    var pointers = std.ArrayList(bind.Pointer).init(self.base.allocator);
+    defer pointers.deinit();
+
+    if (self.got_section_index) |idx| {
+        const seg = self.load_commands.items[self.data_const_segment_cmd_index.?].Segment;
+        const sect = seg.sections.items[idx];
+        const base_offset = sect.addr - seg.inner.vmaddr;
+        const segment_id = @intCast(u16, self.data_const_segment_cmd_index.?);
+
+        for (self.offset_table.items) |entry| {
+            if (entry.kind == .Local) continue;
+            const import = self.nonlazy_imports.items()[entry.symbol];
+            try pointers.append(.{
+                .offset = base_offset + entry.index * @sizeOf(u64),
+                .segment_id = segment_id,
+                .dylib_ordinal = import.value.dylib_ordinal,
+                .name = import.key,
+            });
+        }
+    }
+
+    const size = try bind.bindInfoSize(pointers.items);
     var buffer = try self.base.allocator.alloc(u8, @intCast(usize, size));
     defer self.base.allocator.free(buffer);
 
     var stream = std.io.fixedBufferStream(buffer);
-    try writeBindInfo(self.extern_nonlazy_symbols.items(), stream.writer());
+    try bind.writeBindInfo(pointers.items, stream.writer());
 
-    const linkedit_segment = self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment;
     const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly;
     const allocated_size = self.allocatedSizeLinkedit(dyld_info.bind_off);
     const needed_size = mem.alignForwardGeneric(u64, buffer.len, @alignOf(u64));
@@ -2901,14 +3063,36 @@ fn writeBindingInfoTable(self: *MachO) !void {
 fn writeLazyBindingInfoTable(self: *MachO) !void {
     if (!self.lazy_binding_info_dirty) return;
 
-    const size = try lazyBindInfoSize(self.extern_lazy_symbols.items());
+    const tracy = trace(@src());
+    defer tracy.end();
+
+    var pointers = std.ArrayList(bind.Pointer).init(self.base.allocator);
+    defer pointers.deinit();
+
+    if (self.la_symbol_ptr_section_index) |idx| {
+        try pointers.ensureCapacity(self.lazy_imports.items().len);
+        const seg = self.load_commands.items[self.data_segment_cmd_index.?].Segment;
+        const sect = seg.sections.items[idx];
+        const base_offset = sect.addr - seg.inner.vmaddr;
+        const segment_id = @intCast(u16, self.data_segment_cmd_index.?);
+
+        for (self.lazy_imports.items()) |entry| {
+            pointers.appendAssumeCapacity(.{
+                .offset = base_offset + entry.value.index * @sizeOf(u64),
+                .segment_id = segment_id,
+                .dylib_ordinal = entry.value.dylib_ordinal,
+                .name = entry.key,
+            });
+        }
+    }
+
+    const size = try bind.lazyBindInfoSize(pointers.items);
     var buffer = try self.base.allocator.alloc(u8, @intCast(usize, size));
     defer self.base.allocator.free(buffer);
 
     var stream = std.io.fixedBufferStream(buffer);
-    try writeLazyBindInfo(self.extern_lazy_symbols.items(), stream.writer());
+    try bind.writeLazyBindInfo(pointers.items, stream.writer());
 
-    const linkedit_segment = self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment;
     const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly;
     const allocated_size = self.allocatedSizeLinkedit(dyld_info.lazy_bind_off);
     const needed_size = mem.alignForwardGeneric(u64, buffer.len, @alignOf(u64));
@@ -2929,7 +3113,7 @@ fn writeLazyBindingInfoTable(self: *MachO) !void {
 }
 
 fn populateLazyBindOffsetsInStubHelper(self: *MachO, buffer: []const u8) !void {
-    if (self.extern_lazy_symbols.items().len == 0) return;
+    if (self.lazy_imports.items().len == 0) return;
 
     var stream = std.io.fixedBufferStream(buffer);
     var reader = stream.reader();
@@ -2975,7 +3159,7 @@ fn populateLazyBindOffsetsInStubHelper(self: *MachO, buffer: []const u8) !void {
             else => {},
         }
     }
-    assert(self.extern_lazy_symbols.items().len <= offsets.items.len);
+    assert(self.lazy_imports.items().len <= offsets.items.len);
 
     const stub_size: u4 = switch (self.base.options.target.cpu.arch) {
         .x86_64 => 10,
@@ -2988,7 +3172,7 @@ fn populateLazyBindOffsetsInStubHelper(self: *MachO, buffer: []const u8) !void {
         else => unreachable,
     };
     var buf: [@sizeOf(u32)]u8 = undefined;
-    for (self.extern_lazy_symbols.items()) |_, i| {
+    for (self.lazy_imports.items()) |_, i| {
         const placeholder_off = self.stub_helper_stubs_start_off.? + i * stub_size + off;
         mem.writeIntLittle(u32, &buf, offsets.items[i]);
         try self.base.file.?.pwriteAll(&buf, placeholder_off);
@@ -3102,177 +3286,6 @@ fn writeHeader(self: *MachO) !void {
     self.header_dirty = false;
 }
 
-/// Parse MachO contents from existing binary file.
-fn parseFromFile(self: *MachO, file: fs.File) !void {
-    self.base.file = file;
-    var reader = file.reader();
-    const header = try reader.readStruct(macho.mach_header_64);
-    try self.load_commands.ensureCapacity(self.base.allocator, header.ncmds);
-    var i: u16 = 0;
-    while (i < header.ncmds) : (i += 1) {
-        const cmd = try LoadCommand.read(self.base.allocator, reader);
-        switch (cmd.cmd()) {
-            macho.LC_SEGMENT_64 => {
-                const x = cmd.Segment;
-                if (parseAndCmpName(&x.inner.segname, "__PAGEZERO")) {
-                    self.pagezero_segment_cmd_index = i;
-                } else if (parseAndCmpName(&x.inner.segname, "__LINKEDIT")) {
-                    self.linkedit_segment_cmd_index = i;
-                } else if (parseAndCmpName(&x.inner.segname, "__TEXT")) {
-                    self.text_segment_cmd_index = i;
-                    for (x.sections.items) |sect, j| {
-                        if (parseAndCmpName(&sect.sectname, "__text")) {
-                            self.text_section_index = @intCast(u16, j);
-                        }
-                    }
-                } else if (parseAndCmpName(&x.inner.segname, "__DATA")) {
-                    self.data_segment_cmd_index = i;
-                } else if (parseAndCmpName(&x.inner.segname, "__DATA_CONST")) {
-                    self.data_const_segment_cmd_index = i;
-                }
-            },
-            macho.LC_DYLD_INFO_ONLY => {
-                self.dyld_info_cmd_index = i;
-            },
-            macho.LC_SYMTAB => {
-                self.symtab_cmd_index = i;
-            },
-            macho.LC_DYSYMTAB => {
-                self.dysymtab_cmd_index = i;
-            },
-            macho.LC_LOAD_DYLINKER => {
-                self.dylinker_cmd_index = i;
-            },
-            macho.LC_VERSION_MIN_MACOSX, macho.LC_VERSION_MIN_IPHONEOS, macho.LC_VERSION_MIN_WATCHOS, macho.LC_VERSION_MIN_TVOS => {
-                self.version_min_cmd_index = i;
-            },
-            macho.LC_SOURCE_VERSION => {
-                self.source_version_cmd_index = i;
-            },
-            macho.LC_UUID => {
-                self.uuid_cmd_index = i;
-            },
-            macho.LC_MAIN => {
-                self.main_cmd_index = i;
-            },
-            macho.LC_LOAD_DYLIB => {
-                const x = cmd.Dylib;
-                if (parseAndCmpName(x.data, mem.spanZ(LIB_SYSTEM_PATH))) {
-                    self.libsystem_cmd_index = i;
-                }
-            },
-            macho.LC_FUNCTION_STARTS => {
-                self.function_starts_cmd_index = i;
-            },
-            macho.LC_DATA_IN_CODE => {
-                self.data_in_code_cmd_index = i;
-            },
-            macho.LC_CODE_SIGNATURE => {
-                self.code_signature_cmd_index = i;
-            },
-            else => {
-                log.warn("Unknown load command detected: 0x{x}.", .{cmd.cmd()});
-            },
-        }
-        self.load_commands.appendAssumeCapacity(cmd);
-    }
-    self.header = header;
-}
-
-fn parseAndCmpName(name: []const u8, needle: []const u8) bool {
-    const len = mem.indexOfScalar(u8, name, @as(u8, 0)) orelse name.len;
-    return mem.eql(u8, name[0..len], needle);
-}
-
-fn parseSymbolTable(self: *MachO) !void {
-    const symtab = self.load_commands.items[self.symtab_cmd_index.?].Symtab;
-    const dysymtab = self.load_commands.items[self.dysymtab_cmd_index.?].Dysymtab;
-
-    var buffer = try self.base.allocator.alloc(macho.nlist_64, symtab.nsyms);
-    defer self.base.allocator.free(buffer);
-    const nread = try self.base.file.?.preadAll(@ptrCast([*]u8, buffer)[0 .. symtab.nsyms * @sizeOf(macho.nlist_64)], symtab.symoff);
-    assert(@divExact(nread, @sizeOf(macho.nlist_64)) == buffer.len);
-
-    try self.local_symbols.ensureCapacity(self.base.allocator, dysymtab.nlocalsym);
-    try self.global_symbols.ensureCapacity(self.base.allocator, dysymtab.nextdefsym);
-    try self.undef_symbols.ensureCapacity(self.base.allocator, dysymtab.nundefsym);
-
-    self.local_symbols.appendSliceAssumeCapacity(buffer[dysymtab.ilocalsym .. dysymtab.ilocalsym + dysymtab.nlocalsym]);
-    self.global_symbols.appendSliceAssumeCapacity(buffer[dysymtab.iextdefsym .. dysymtab.iextdefsym + dysymtab.nextdefsym]);
-    self.undef_symbols.appendSliceAssumeCapacity(buffer[dysymtab.iundefsym .. dysymtab.iundefsym + dysymtab.nundefsym]);
-}
-
-fn parseStringTable(self: *MachO) !void {
-    const symtab = self.load_commands.items[self.symtab_cmd_index.?].Symtab;
-
-    var buffer = try self.base.allocator.alloc(u8, symtab.strsize);
-    defer self.base.allocator.free(buffer);
-    const nread = try self.base.file.?.preadAll(buffer, symtab.stroff);
-    assert(nread == buffer.len);
-
-    try self.string_table.ensureCapacity(self.base.allocator, symtab.strsize);
-    self.string_table.appendSliceAssumeCapacity(buffer);
-}
-
-fn fixupBindInfo(self: *MachO, dylib_ordinal: u32) !void {
-    const dyld_info = self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly;
-    var buffer = try self.base.allocator.alloc(u8, dyld_info.bind_size);
-    defer self.base.allocator.free(buffer);
-    const nread = try self.base.file.?.preadAll(buffer, dyld_info.bind_off);
-    assert(nread == buffer.len);
-    try self.fixupInfoCommon(buffer, dylib_ordinal);
-    try self.base.file.?.pwriteAll(buffer, dyld_info.bind_off);
-}
-
-fn fixupLazyBindInfo(self: *MachO, dylib_ordinal: u32) !void {
-    const dyld_info = self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly;
-    var buffer = try self.base.allocator.alloc(u8, dyld_info.lazy_bind_size);
-    defer self.base.allocator.free(buffer);
-    const nread = try self.base.file.?.preadAll(buffer, dyld_info.lazy_bind_off);
-    assert(nread == buffer.len);
-    try self.fixupInfoCommon(buffer, dylib_ordinal);
-    try self.base.file.?.pwriteAll(buffer, dyld_info.lazy_bind_off);
-}
-
-fn fixupInfoCommon(self: *MachO, buffer: []u8, dylib_ordinal: u32) !void {
-    var stream = std.io.fixedBufferStream(buffer);
-    var reader = stream.reader();
-
-    while (true) {
-        const inst = reader.readByte() catch |err| switch (err) {
-            error.EndOfStream => break,
-            else => return err,
-        };
-        const imm: u8 = inst & macho.BIND_IMMEDIATE_MASK;
-        const opcode: u8 = inst & macho.BIND_OPCODE_MASK;
-
-        switch (opcode) {
-            macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM => {
-                var next = try reader.readByte();
-                while (next != @as(u8, 0)) {
-                    next = try reader.readByte();
-                }
-            },
-            macho.BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB => {
-                _ = try std.leb.readULEB128(u64, reader);
-            },
-            macho.BIND_OPCODE_SET_DYLIB_SPECIAL_IMM, macho.BIND_OPCODE_SET_DYLIB_ORDINAL_IMM => {
-                // Perform the fixup.
-                try stream.seekBy(-1);
-                var writer = stream.writer();
-                try writer.writeByte(macho.BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | @truncate(u4, dylib_ordinal));
-            },
-            macho.BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB => {
-                _ = try std.leb.readULEB128(u64, reader);
-            },
-            macho.BIND_OPCODE_SET_ADDEND_SLEB => {
-                _ = try std.leb.readILEB128(i64, reader);
-            },
-            else => {},
-        }
-    }
-}
-
 pub fn padToIdeal(actual_size: anytype) @TypeOf(actual_size) {
     // TODO https://github.com/ziglang/zig/issues/1284
     return std.math.add(@TypeOf(actual_size), actual_size, actual_size / ideal_factor) catch
diff --git a/src/link/MachO/Archive.zig b/src/link/MachO/Archive.zig
new file mode 100644
index 0000000000..8fa0457a16
--- /dev/null
+++ b/src/link/MachO/Archive.zig
@@ -0,0 +1,256 @@
+const Archive = @This();
+
+const std = @import("std");
+const assert = std.debug.assert;
+const fs = std.fs;
+const log = std.log.scoped(.archive);
+const macho = std.macho;
+const mem = std.mem;
+
+const Allocator = mem.Allocator;
+const Object = @import("Object.zig");
+const parseName = @import("Zld.zig").parseName;
+
+usingnamespace @import("commands.zig");
+
+allocator: *Allocator,
+file: fs.File,
+header: ar_hdr,
+name: []u8,
+
+objects: std.ArrayListUnmanaged(Object) = .{},
+
+// Archive files start with the ARMAG identifying string.  Then follows a
+// `struct ar_hdr', and as many bytes of member file data as its `ar_size'
+// member indicates, for each member file.
+/// String that begins an archive file.
+const ARMAG: *const [SARMAG:0]u8 = "!<arch>\n";
+/// Size of that string.
+const SARMAG: u4 = 8;
+
+/// String in ar_fmag at the end of each header.
+const ARFMAG: *const [2:0]u8 = "`\n";
+
+const ar_hdr = extern struct {
+    /// Member file name, sometimes / terminated.
+    ar_name: [16]u8,
+
+    /// File date, decimal seconds since Epoch.
+    ar_date: [12]u8,
+
+    /// User ID, in ASCII format.
+    ar_uid: [6]u8,
+
+    /// Group ID, in ASCII format.
+    ar_gid: [6]u8,
+
+    /// File mode, in ASCII octal.
+    ar_mode: [8]u8,
+
+    /// File size, in ASCII decimal.
+    ar_size: [10]u8,
+
+    /// Always contains ARFMAG.
+    ar_fmag: [2]u8,
+
+    const NameOrLength = union(enum) {
+        Name: []const u8,
+        Length: u64,
+    };
+    pub fn nameOrLength(self: ar_hdr) !NameOrLength {
+        const value = getValue(&self.ar_name);
+        const slash_index = mem.indexOf(u8, value, "/") orelse return error.MalformedArchive;
+        const len = value.len;
+        if (slash_index == len - 1) {
+            // Name stored directly
+            return NameOrLength{ .Name = value };
+        } else {
+            // Name follows the header directly and its length is encoded in
+            // the name field.
+            const length = try std.fmt.parseInt(u64, value[slash_index + 1 ..], 10);
+            return NameOrLength{ .Length = length };
+        }
+    }
+
+    pub fn size(self: ar_hdr) !u64 {
+        const value = getValue(&self.ar_size);
+        return std.fmt.parseInt(u64, value, 10);
+    }
+
+    fn getValue(raw: []const u8) []const u8 {
+        return mem.trimRight(u8, raw, &[_]u8{@as(u8, 0x20)});
+    }
+};
+
+pub fn deinit(self: *Archive) void {
+    self.allocator.free(self.name);
+    for (self.objects.items) |*object| {
+        object.deinit();
+    }
+    self.objects.deinit(self.allocator);
+    self.file.close();
+}
+
+/// Caller owns the returned Archive instance and is responsible for calling
+/// `deinit` to free allocated memory.
+pub fn initFromFile(allocator: *Allocator, arch: std.Target.Cpu.Arch, ar_name: []const u8, file: fs.File) !Archive {
+    var reader = file.reader();
+    var magic = try readMagic(allocator, reader);
+    defer allocator.free(magic);
+
+    if (!mem.eql(u8, magic, ARMAG)) {
+        // Reset file cursor.
+        try file.seekTo(0);
+        return error.NotArchive;
+    }
+
+    const header = try reader.readStruct(ar_hdr);
+
+    if (!mem.eql(u8, &header.ar_fmag, ARFMAG))
+        return error.MalformedArchive;
+
+    var embedded_name = try getName(allocator, header, reader);
+    log.debug("parsing archive '{s}' at '{s}'", .{ embedded_name, ar_name });
+    defer allocator.free(embedded_name);
+
+    var name = try allocator.dupe(u8, ar_name);
+    var self = Archive{
+        .allocator = allocator,
+        .file = file,
+        .header = header,
+        .name = name,
+    };
+
+    var object_offsets = try self.readTableOfContents(reader);
+    defer self.allocator.free(object_offsets);
+
+    var i: usize = 1;
+    while (i < object_offsets.len) : (i += 1) {
+        const offset = object_offsets[i];
+        try reader.context.seekTo(offset);
+        try self.readObject(arch, ar_name, reader);
+    }
+
+    return self;
+}
+
+fn readTableOfContents(self: *Archive, reader: anytype) ![]u32 {
+    const symtab_size = try reader.readIntLittle(u32);
+    var symtab = try self.allocator.alloc(u8, symtab_size);
+    defer self.allocator.free(symtab);
+    try reader.readNoEof(symtab);
+
+    const strtab_size = try reader.readIntLittle(u32);
+    var strtab = try self.allocator.alloc(u8, strtab_size);
+    defer self.allocator.free(strtab);
+    try reader.readNoEof(strtab);
+
+    var symtab_stream = std.io.fixedBufferStream(symtab);
+    var symtab_reader = symtab_stream.reader();
+
+    var object_offsets = std.ArrayList(u32).init(self.allocator);
+    try object_offsets.append(0);
+    var last: usize = 0;
+
+    while (true) {
+        const n_strx = symtab_reader.readIntLittle(u32) catch |err| switch (err) {
+            error.EndOfStream => break,
+            else => |e| return e,
+        };
+        const object_offset = try symtab_reader.readIntLittle(u32);
+
+        // TODO Store the table of contents for later reuse.
+
+        // Here, we assume that symbols are NOT sorted in any way, and
+        // they point to objects in sequence.
+        if (object_offsets.items[last] != object_offset) {
+            try object_offsets.append(object_offset);
+            last += 1;
+        }
+    }
+
+    return object_offsets.toOwnedSlice();
+}
+
+fn readObject(self: *Archive, arch: std.Target.Cpu.Arch, ar_name: []const u8, reader: anytype) !void {
+    const object_header = try reader.readStruct(ar_hdr);
+
+    if (!mem.eql(u8, &object_header.ar_fmag, ARFMAG))
+        return error.MalformedArchive;
+
+    var object_name = try getName(self.allocator, object_header, reader);
+    log.debug("extracting object '{s}' from archive '{s}'", .{ object_name, self.name });
+
+    const offset = @intCast(u32, try reader.context.getPos());
+    const header = try reader.readStruct(macho.mach_header_64);
+
+    const this_arch: std.Target.Cpu.Arch = switch (header.cputype) {
+        macho.CPU_TYPE_ARM64 => .aarch64,
+        macho.CPU_TYPE_X86_64 => .x86_64,
+        else => |value| {
+            log.err("unsupported cpu architecture 0x{x}", .{value});
+            return error.UnsupportedCpuArchitecture;
+        },
+    };
+    if (this_arch != arch) {
+        log.err("mismatched cpu architecture: found {s}, expected {s}", .{ this_arch, arch });
+        return error.MismatchedCpuArchitecture;
+    }
+
+    // TODO Implement std.fs.File.clone() or similar.
+    var new_file = try fs.cwd().openFile(ar_name, .{});
+    var object = Object{
+        .allocator = self.allocator,
+        .name = object_name,
+        .ar_name = try mem.dupe(self.allocator, u8, ar_name),
+        .file = new_file,
+        .header = header,
+    };
+
+    try object.readLoadCommands(reader, .{ .offset = offset });
+
+    if (object.symtab_cmd_index != null) {
+        try object.readSymtab();
+        try object.readStrtab();
+    }
+
+    if (object.data_in_code_cmd_index != null) try object.readDataInCode();
+
+    log.debug("\n\n", .{});
+    log.debug("{s} defines symbols", .{object.name});
+    for (object.symtab.items) |sym| {
+        const symname = object.getString(sym.n_strx);
+        log.debug("'{s}': {}", .{ symname, sym });
+    }
+
+    try self.objects.append(self.allocator, object);
+}
+
+fn readMagic(allocator: *Allocator, reader: anytype) ![]u8 {
+    var magic = std.ArrayList(u8).init(allocator);
+    try magic.ensureCapacity(SARMAG);
+    var i: usize = 0;
+    while (i < SARMAG) : (i += 1) {
+        const next = try reader.readByte();
+        magic.appendAssumeCapacity(next);
+    }
+    return magic.toOwnedSlice();
+}
+
+fn getName(allocator: *Allocator, header: ar_hdr, reader: anytype) ![]u8 {
+    const name_or_length = try header.nameOrLength();
+    var name: []u8 = undefined;
+    switch (name_or_length) {
+        .Name => |n| {
+            name = try allocator.dupe(u8, n);
+        },
+        .Length => |len| {
+            var n = try allocator.alloc(u8, len);
+            defer allocator.free(n);
+            try reader.readNoEof(n);
+            const actual_len = mem.indexOfScalar(u8, n, @as(u8, 0));
+            name = try allocator.dupe(u8, n[0..actual_len.?]);
+        },
+    }
+    return name;
+}
diff --git a/src/link/MachO/DebugSymbols.zig b/src/link/MachO/DebugSymbols.zig
index 042c1a12cf..a81fd00c0a 100644
--- a/src/link/MachO/DebugSymbols.zig
+++ b/src/link/MachO/DebugSymbols.zig
@@ -839,8 +839,8 @@ fn findFreeSpaceLinkedit(self: *DebugSymbols, object_size: u64, min_alignment: u
 
 fn relocateSymbolTable(self: *DebugSymbols) !void {
     const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab;
-    const nlocals = self.base.local_symbols.items.len;
-    const nglobals = self.base.global_symbols.items.len;
+    const nlocals = self.base.locals.items.len;
+    const nglobals = self.base.globals.items.len;
     const nsyms = nlocals + nglobals;
 
     if (symtab.nsyms < nsyms) {
@@ -875,7 +875,7 @@ pub fn writeLocalSymbol(self: *DebugSymbols, index: usize) !void {
     const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab;
     const off = symtab.symoff + @sizeOf(macho.nlist_64) * index;
     log.debug("writing dSym local symbol {} at 0x{x}", .{ index, off });
-    try self.file.pwriteAll(mem.asBytes(&self.base.local_symbols.items[index]), off);
+    try self.file.pwriteAll(mem.asBytes(&self.base.locals.items[index]), off);
 }
 
 fn writeStringTable(self: *DebugSymbols) !void {
@@ -1057,7 +1057,7 @@ pub fn commitDeclDebugInfo(
     var dbg_info_buffer = &debug_buffers.dbg_info_buffer;
     var dbg_info_type_relocs = &debug_buffers.dbg_info_type_relocs;
 
-    const symbol = self.base.local_symbols.items[decl.link.macho.local_sym_index];
+    const symbol = self.base.locals.items[decl.link.macho.local_sym_index];
     const text_block = &decl.link.macho;
     // If the Decl is a function, we need to update the __debug_line program.
     const typed_value = decl.typed_value.most_recent.typed_value;
diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig
new file mode 100644
index 0000000000..bb3da944bb
--- /dev/null
+++ b/src/link/MachO/Object.zig
@@ -0,0 +1,228 @@
+const Object = @This();
+
+const std = @import("std");
+const assert = std.debug.assert;
+const fs = std.fs;
+const io = std.io;
+const log = std.log.scoped(.object);
+const macho = std.macho;
+const mem = std.mem;
+
+const Allocator = mem.Allocator;
+const parseName = @import("Zld.zig").parseName;
+
+usingnamespace @import("commands.zig");
+
+allocator: *Allocator,
+file: fs.File,
+name: []u8,
+ar_name: ?[]u8 = null,
+
+header: macho.mach_header_64,
+
+load_commands: std.ArrayListUnmanaged(LoadCommand) = .{},
+
+segment_cmd_index: ?u16 = null,
+symtab_cmd_index: ?u16 = null,
+dysymtab_cmd_index: ?u16 = null,
+build_version_cmd_index: ?u16 = null,
+data_in_code_cmd_index: ?u16 = null,
+text_section_index: ?u16 = null,
+
+// __DWARF segment sections
+dwarf_debug_info_index: ?u16 = null,
+dwarf_debug_abbrev_index: ?u16 = null,
+dwarf_debug_str_index: ?u16 = null,
+dwarf_debug_line_index: ?u16 = null,
+dwarf_debug_ranges_index: ?u16 = null,
+
+symtab: std.ArrayListUnmanaged(macho.nlist_64) = .{},
+strtab: std.ArrayListUnmanaged(u8) = .{},
+
+data_in_code_entries: std.ArrayListUnmanaged(macho.data_in_code_entry) = .{},
+
+pub fn deinit(self: *Object) void {
+    for (self.load_commands.items) |*lc| {
+        lc.deinit(self.allocator);
+    }
+    self.load_commands.deinit(self.allocator);
+    self.symtab.deinit(self.allocator);
+    self.strtab.deinit(self.allocator);
+    self.data_in_code_entries.deinit(self.allocator);
+    self.allocator.free(self.name);
+    if (self.ar_name) |v| {
+        self.allocator.free(v);
+    }
+    self.file.close();
+}
+
+/// Caller owns the returned Object instance and is responsible for calling
+/// `deinit` to free allocated memory.
+pub fn initFromFile(allocator: *Allocator, arch: std.Target.Cpu.Arch, name: []const u8, file: fs.File) !Object {
+    var reader = file.reader();
+    const header = try reader.readStruct(macho.mach_header_64);
+
+    if (header.filetype != macho.MH_OBJECT) {
+        // Reset file cursor.
+        try file.seekTo(0);
+        return error.NotObject;
+    }
+
+    const this_arch: std.Target.Cpu.Arch = switch (header.cputype) {
+        macho.CPU_TYPE_ARM64 => .aarch64,
+        macho.CPU_TYPE_X86_64 => .x86_64,
+        else => |value| {
+            log.err("unsupported cpu architecture 0x{x}", .{value});
+            return error.UnsupportedCpuArchitecture;
+        },
+    };
+    if (this_arch != arch) {
+        log.err("mismatched cpu architecture: found {s}, expected {s}", .{ this_arch, arch });
+        return error.MismatchedCpuArchitecture;
+    }
+
+    var self = Object{
+        .allocator = allocator,
+        .name = try allocator.dupe(u8, name),
+        .file = file,
+        .header = header,
+    };
+
+    try self.readLoadCommands(reader, .{});
+
+    if (self.symtab_cmd_index != null) {
+        try self.readSymtab();
+        try self.readStrtab();
+    }
+
+    if (self.data_in_code_cmd_index != null) try self.readDataInCode();
+
+    log.debug("\n\n", .{});
+    log.debug("{s} defines symbols", .{self.name});
+    for (self.symtab.items) |sym| {
+        const symname = self.getString(sym.n_strx);
+        log.debug("'{s}': {}", .{ symname, sym });
+    }
+
+    return self;
+}
+
+pub const ReadOffset = struct {
+    offset: ?u32 = null,
+};
+
+pub fn readLoadCommands(self: *Object, reader: anytype, offset: ReadOffset) !void {
+    const offset_mod = offset.offset orelse 0;
+    try self.load_commands.ensureCapacity(self.allocator, self.header.ncmds);
+
+    var i: u16 = 0;
+    while (i < self.header.ncmds) : (i += 1) {
+        var cmd = try LoadCommand.read(self.allocator, reader);
+        switch (cmd.cmd()) {
+            macho.LC_SEGMENT_64 => {
+                self.segment_cmd_index = i;
+                var seg = cmd.Segment;
+                for (seg.sections.items) |*sect, j| {
+                    const index = @intCast(u16, j);
+                    const segname = parseName(&sect.segname);
+                    const sectname = parseName(&sect.sectname);
+                    if (mem.eql(u8, segname, "__DWARF")) {
+                        if (mem.eql(u8, sectname, "__debug_info")) {
+                            self.dwarf_debug_info_index = index;
+                        } else if (mem.eql(u8, sectname, "__debug_abbrev")) {
+                            self.dwarf_debug_abbrev_index = index;
+                        } else if (mem.eql(u8, sectname, "__debug_str")) {
+                            self.dwarf_debug_str_index = index;
+                        } else if (mem.eql(u8, sectname, "__debug_line")) {
+                            self.dwarf_debug_line_index = index;
+                        } else if (mem.eql(u8, sectname, "__debug_ranges")) {
+                            self.dwarf_debug_ranges_index = index;
+                        }
+                    } else if (mem.eql(u8, segname, "__TEXT")) {
+                        if (mem.eql(u8, sectname, "__text")) {
+                            self.text_section_index = index;
+                        }
+                    }
+
+                    sect.offset += offset_mod;
+                    if (sect.reloff > 0)
+                        sect.reloff += offset_mod;
+                }
+
+                seg.inner.fileoff += offset_mod;
+            },
+            macho.LC_SYMTAB => {
+                self.symtab_cmd_index = i;
+                cmd.Symtab.symoff += offset_mod;
+                cmd.Symtab.stroff += offset_mod;
+            },
+            macho.LC_DYSYMTAB => {
+                self.dysymtab_cmd_index = i;
+            },
+            macho.LC_BUILD_VERSION => {
+                self.build_version_cmd_index = i;
+            },
+            macho.LC_DATA_IN_CODE => {
+                self.data_in_code_cmd_index = i;
+            },
+            else => {
+                log.debug("Unknown load command detected: 0x{x}.", .{cmd.cmd()});
+            },
+        }
+        self.load_commands.appendAssumeCapacity(cmd);
+    }
+}
+
+pub fn readSymtab(self: *Object) !void {
+    const symtab_cmd = self.load_commands.items[self.symtab_cmd_index.?].Symtab;
+    var buffer = try self.allocator.alloc(u8, @sizeOf(macho.nlist_64) * symtab_cmd.nsyms);
+    defer self.allocator.free(buffer);
+    _ = try self.file.preadAll(buffer, symtab_cmd.symoff);
+    try self.symtab.ensureCapacity(self.allocator, symtab_cmd.nsyms);
+    // TODO this align case should not be needed.
+    // Probably a bug in stage1.
+    const slice = @alignCast(@alignOf(macho.nlist_64), mem.bytesAsSlice(macho.nlist_64, buffer));
+    self.symtab.appendSliceAssumeCapacity(slice);
+}
+
+pub fn readStrtab(self: *Object) !void {
+    const symtab_cmd = self.load_commands.items[self.symtab_cmd_index.?].Symtab;
+    var buffer = try self.allocator.alloc(u8, symtab_cmd.strsize);
+    defer self.allocator.free(buffer);
+    _ = try self.file.preadAll(buffer, symtab_cmd.stroff);
+    try self.strtab.ensureCapacity(self.allocator, symtab_cmd.strsize);
+    self.strtab.appendSliceAssumeCapacity(buffer);
+}
+
+pub fn getString(self: *const Object, str_off: u32) []const u8 {
+    assert(str_off < self.strtab.items.len);
+    return mem.spanZ(@ptrCast([*:0]const u8, self.strtab.items.ptr + str_off));
+}
+
+pub fn readSection(self: Object, allocator: *Allocator, index: u16) ![]u8 {
+    const seg = self.load_commands.items[self.segment_cmd_index.?].Segment;
+    const sect = seg.sections.items[index];
+    var buffer = try allocator.alloc(u8, sect.size);
+    _ = try self.file.preadAll(buffer, sect.offset);
+    return buffer;
+}
+
+pub fn readDataInCode(self: *Object) !void {
+    const index = self.data_in_code_cmd_index orelse return;
+    const data_in_code = self.load_commands.items[index].LinkeditData;
+
+    var buffer = try self.allocator.alloc(u8, data_in_code.datasize);
+    defer self.allocator.free(buffer);
+
+    _ = try self.file.preadAll(buffer, data_in_code.dataoff);
+
+    var stream = io.fixedBufferStream(buffer);
+    var reader = stream.reader();
+    while (true) {
+        const dice = reader.readStruct(macho.data_in_code_entry) catch |err| switch (err) {
+            error.EndOfStream => break,
+            else => |e| return e,
+        };
+        try self.data_in_code_entries.append(self.allocator, dice);
+    }
+}
diff --git a/src/link/MachO/Zld.zig b/src/link/MachO/Zld.zig
new file mode 100644
index 0000000000..c98bacc08a
--- /dev/null
+++ b/src/link/MachO/Zld.zig
@@ -0,0 +1,3192 @@
+const Zld = @This();
+
+const std = @import("std");
+const assert = std.debug.assert;
+const dwarf = std.dwarf;
+const leb = std.leb;
+const mem = std.mem;
+const meta = std.meta;
+const fs = std.fs;
+const macho = std.macho;
+const math = std.math;
+const log = std.log.scoped(.zld);
+const aarch64 = @import("../../codegen/aarch64.zig");
+
+const Allocator = mem.Allocator;
+const CodeSignature = @import("CodeSignature.zig");
+const Archive = @import("Archive.zig");
+const Object = @import("Object.zig");
+const Trie = @import("Trie.zig");
+
+usingnamespace @import("commands.zig");
+usingnamespace @import("bind.zig");
+
+allocator: *Allocator,
+
+arch: ?std.Target.Cpu.Arch = null,
+page_size: ?u16 = null,
+file: ?fs.File = null,
+out_path: ?[]const u8 = null,
+
+// TODO Eventually, we will want to keep track of the  archives themselves to be able to exclude objects
+// contained within from landing in the final artifact. For now however, since we don't optimise the binary
+// at all, we just move all objects from the archives into the final artifact.
+objects: std.ArrayListUnmanaged(Object) = .{},
+
+load_commands: std.ArrayListUnmanaged(LoadCommand) = .{},
+
+pagezero_segment_cmd_index: ?u16 = null,
+text_segment_cmd_index: ?u16 = null,
+data_const_segment_cmd_index: ?u16 = null,
+data_segment_cmd_index: ?u16 = null,
+linkedit_segment_cmd_index: ?u16 = null,
+dyld_info_cmd_index: ?u16 = null,
+symtab_cmd_index: ?u16 = null,
+dysymtab_cmd_index: ?u16 = null,
+dylinker_cmd_index: ?u16 = null,
+libsystem_cmd_index: ?u16 = null,
+data_in_code_cmd_index: ?u16 = null,
+function_starts_cmd_index: ?u16 = null,
+main_cmd_index: ?u16 = null,
+version_min_cmd_index: ?u16 = null,
+source_version_cmd_index: ?u16 = null,
+uuid_cmd_index: ?u16 = null,
+code_signature_cmd_index: ?u16 = null,
+
+// __TEXT segment sections
+text_section_index: ?u16 = null,
+stubs_section_index: ?u16 = null,
+stub_helper_section_index: ?u16 = null,
+text_const_section_index: ?u16 = null,
+cstring_section_index: ?u16 = null,
+
+// __DATA segment sections
+got_section_index: ?u16 = null,
+tlv_section_index: ?u16 = null,
+tlv_data_section_index: ?u16 = null,
+tlv_bss_section_index: ?u16 = null,
+la_symbol_ptr_section_index: ?u16 = null,
+data_const_section_index: ?u16 = null,
+data_section_index: ?u16 = null,
+bss_section_index: ?u16 = null,
+
+locals: std.StringArrayHashMapUnmanaged(std.ArrayListUnmanaged(Symbol)) = .{},
+exports: std.StringArrayHashMapUnmanaged(macho.nlist_64) = .{},
+nonlazy_imports: std.StringArrayHashMapUnmanaged(Import) = .{},
+lazy_imports: std.StringArrayHashMapUnmanaged(Import) = .{},
+tlv_bootstrap: ?Import = null,
+threadlocal_offsets: std.ArrayListUnmanaged(u64) = .{},
+local_rebases: std.ArrayListUnmanaged(Pointer) = .{},
+nonlazy_pointers: std.StringArrayHashMapUnmanaged(GotEntry) = .{},
+
+strtab: std.ArrayListUnmanaged(u8) = .{},
+
+stub_helper_stubs_start_off: ?u64 = null,
+
+mappings: std.AutoHashMapUnmanaged(MappingKey, SectionMapping) = .{},
+unhandled_sections: std.AutoHashMapUnmanaged(MappingKey, u0) = .{},
+
+// TODO this will require scanning the relocations at least one to work out
+// the exact amount of local GOT indirections. For the time being, set some
+// default value.
+const max_local_got_indirections: u16 = 1000;
+
+const GotEntry = struct {
+    index: u32,
+    target_addr: u64,
+};
+
+const MappingKey = struct {
+    object_id: u16,
+    source_sect_id: u16,
+};
+
+const SectionMapping = struct {
+    source_sect_id: u16,
+    target_seg_id: u16,
+    target_sect_id: u16,
+    offset: u32,
+};
+
+const Symbol = struct {
+    inner: macho.nlist_64,
+    tt: Type,
+    object_id: u16,
+
+    const Type = enum {
+        Local,
+        WeakGlobal,
+        Global,
+    };
+};
+
+const DebugInfo = struct {
+    inner: dwarf.DwarfInfo,
+    debug_info: []u8,
+    debug_abbrev: []u8,
+    debug_str: []u8,
+    debug_line: []u8,
+    debug_ranges: []u8,
+
+    pub fn parseFromObject(allocator: *Allocator, object: Object) !?DebugInfo {
+        var debug_info = blk: {
+            const index = object.dwarf_debug_info_index orelse return null;
+            break :blk try object.readSection(allocator, index);
+        };
+        var debug_abbrev = blk: {
+            const index = object.dwarf_debug_abbrev_index orelse return null;
+            break :blk try object.readSection(allocator, index);
+        };
+        var debug_str = blk: {
+            const index = object.dwarf_debug_str_index orelse return null;
+            break :blk try object.readSection(allocator, index);
+        };
+        var debug_line = blk: {
+            const index = object.dwarf_debug_line_index orelse return null;
+            break :blk try object.readSection(allocator, index);
+        };
+        var debug_ranges = blk: {
+            if (object.dwarf_debug_ranges_index) |ind| {
+                break :blk try object.readSection(allocator, ind);
+            }
+            break :blk try allocator.alloc(u8, 0);
+        };
+
+        var inner: dwarf.DwarfInfo = .{
+            .endian = .Little,
+            .debug_info = debug_info,
+            .debug_abbrev = debug_abbrev,
+            .debug_str = debug_str,
+            .debug_line = debug_line,
+            .debug_ranges = debug_ranges,
+        };
+        try dwarf.openDwarfDebugInfo(&inner, allocator);
+
+        return DebugInfo{
+            .inner = inner,
+            .debug_info = debug_info,
+            .debug_abbrev = debug_abbrev,
+            .debug_str = debug_str,
+            .debug_line = debug_line,
+            .debug_ranges = debug_ranges,
+        };
+    }
+
+    pub fn deinit(self: *DebugInfo, allocator: *Allocator) void {
+        allocator.free(self.debug_info);
+        allocator.free(self.debug_abbrev);
+        allocator.free(self.debug_str);
+        allocator.free(self.debug_line);
+        allocator.free(self.debug_ranges);
+        self.inner.abbrev_table_list.deinit();
+        self.inner.compile_unit_list.deinit();
+        self.inner.func_list.deinit();
+    }
+};
+
+pub const Import = struct {
+    /// MachO symbol table entry.
+    symbol: macho.nlist_64,
+
+    /// Id of the dynamic library where the specified entries can be found.
+    dylib_ordinal: i64,
+
+    /// Index of this import within the import list.
+    index: u32,
+};
+
+/// Default path to dyld
+/// TODO instead of hardcoding it, we should probably look through some env vars and search paths
+/// instead but this will do for now.
+const DEFAULT_DYLD_PATH: [*:0]const u8 = "/usr/lib/dyld";
+
+/// Default lib search path
+/// TODO instead of hardcoding it, we should probably look through some env vars and search paths
+/// instead but this will do for now.
+const DEFAULT_LIB_SEARCH_PATH: []const u8 = "/usr/lib";
+
+const LIB_SYSTEM_NAME: [*:0]const u8 = "System";
+/// TODO we should search for libSystem and fail if it doesn't exist, instead of hardcoding it
+const LIB_SYSTEM_PATH: [*:0]const u8 = DEFAULT_LIB_SEARCH_PATH ++ "/libSystem.B.dylib";
+
+pub fn init(allocator: *Allocator) Zld {
+    return .{ .allocator = allocator };
+}
+
+pub fn deinit(self: *Zld) void {
+    self.threadlocal_offsets.deinit(self.allocator);
+    self.strtab.deinit(self.allocator);
+    self.local_rebases.deinit(self.allocator);
+    for (self.lazy_imports.items()) |*entry| {
+        self.allocator.free(entry.key);
+    }
+    self.lazy_imports.deinit(self.allocator);
+    for (self.nonlazy_imports.items()) |*entry| {
+        self.allocator.free(entry.key);
+    }
+    self.nonlazy_imports.deinit(self.allocator);
+    for (self.nonlazy_pointers.items()) |*entry| {
+        self.allocator.free(entry.key);
+    }
+    self.nonlazy_pointers.deinit(self.allocator);
+    for (self.exports.items()) |*entry| {
+        self.allocator.free(entry.key);
+    }
+    self.exports.deinit(self.allocator);
+    for (self.locals.items()) |*entry| {
+        self.allocator.free(entry.key);
+        entry.value.deinit(self.allocator);
+    }
+    self.locals.deinit(self.allocator);
+    for (self.objects.items) |*object| {
+        object.deinit();
+    }
+    self.objects.deinit(self.allocator);
+    for (self.load_commands.items) |*lc| {
+        lc.deinit(self.allocator);
+    }
+    self.load_commands.deinit(self.allocator);
+    self.mappings.deinit(self.allocator);
+    self.unhandled_sections.deinit(self.allocator);
+    if (self.file) |*f| f.close();
+}
+
+pub fn link(self: *Zld, files: []const []const u8, out_path: []const u8) !void {
+    if (files.len == 0) return error.NoInputFiles;
+    if (out_path.len == 0) return error.EmptyOutputPath;
+
+    if (self.arch == null) {
+        // Try inferring the arch from the object files.
+        self.arch = blk: {
+            const file = try fs.cwd().openFile(files[0], .{});
+            defer file.close();
+            var reader = file.reader();
+            const header = try reader.readStruct(macho.mach_header_64);
+            const arch: std.Target.Cpu.Arch = switch (header.cputype) {
+                macho.CPU_TYPE_X86_64 => .x86_64,
+                macho.CPU_TYPE_ARM64 => .aarch64,
+                else => |value| {
+                    log.err("unsupported cpu architecture 0x{x}", .{value});
+                    return error.UnsupportedCpuArchitecture;
+                },
+            };
+            break :blk arch;
+        };
+    }
+
+    self.page_size = switch (self.arch.?) {
+        .aarch64 => 0x4000,
+        .x86_64 => 0x1000,
+        else => unreachable,
+    };
+    self.out_path = out_path;
+    self.file = try fs.cwd().createFile(out_path, .{
+        .truncate = true,
+        .read = true,
+        .mode = if (std.Target.current.os.tag == .windows) 0 else 0o777,
+    });
+
+    try self.populateMetadata();
+    try self.parseInputFiles(files);
+    try self.sortSections();
+    try self.resolveImports();
+    try self.allocateTextSegment();
+    try self.allocateDataConstSegment();
+    try self.allocateDataSegment();
+    self.allocateLinkeditSegment();
+    try self.writeStubHelperCommon();
+    try self.resolveSymbols();
+    try self.doRelocs();
+    try self.flush();
+}
+
+fn parseInputFiles(self: *Zld, files: []const []const u8) !void {
+    for (files) |file_name| {
+        const file = try fs.cwd().openFile(file_name, .{});
+
+        try_object: {
+            var object = Object.initFromFile(self.allocator, self.arch.?, file_name, file) catch |err| switch (err) {
+                error.NotObject => break :try_object,
+                else => |e| return e,
+            };
+            const index = @intCast(u16, self.objects.items.len);
+            try self.objects.append(self.allocator, object);
+            try self.updateMetadata(index);
+            continue;
+        }
+
+        try_archive: {
+            var archive = Archive.initFromFile(self.allocator, self.arch.?, file_name, file) catch |err| switch (err) {
+                error.NotArchive => break :try_archive,
+                else => |e| return e,
+            };
+            defer archive.deinit();
+            while (archive.objects.popOrNull()) |object| {
+                const index = @intCast(u16, self.objects.items.len);
+                try self.objects.append(self.allocator, object);
+                try self.updateMetadata(index);
+            }
+            continue;
+        }
+
+        log.err("unexpected file type: expected object '.o' or archive '.a': {s}", .{file_name});
+        return error.UnexpectedInputFileType;
+    }
+}
+
+fn mapAndUpdateSections(
+    self: *Zld,
+    object_id: u16,
+    source_sect_id: u16,
+    target_seg_id: u16,
+    target_sect_id: u16,
+) !void {
+    const object = self.objects.items[object_id];
+    const source_seg = object.load_commands.items[object.segment_cmd_index.?].Segment;
+    const source_sect = source_seg.sections.items[source_sect_id];
+    const target_seg = &self.load_commands.items[target_seg_id].Segment;
+    const target_sect = &target_seg.sections.items[target_sect_id];
+
+    const alignment = try math.powi(u32, 2, target_sect.@"align");
+    const offset = mem.alignForwardGeneric(u64, target_sect.size, alignment);
+    const size = mem.alignForwardGeneric(u64, source_sect.size, alignment);
+    const key = MappingKey{
+        .object_id = object_id,
+        .source_sect_id = source_sect_id,
+    };
+    try self.mappings.putNoClobber(self.allocator, key, .{
+        .source_sect_id = source_sect_id,
+        .target_seg_id = target_seg_id,
+        .target_sect_id = target_sect_id,
+        .offset = @intCast(u32, offset),
+    });
+    log.debug("{s}: {s},{s} mapped to {s},{s} from 0x{x} to 0x{x}", .{
+        object.name,
+        parseName(&source_sect.segname),
+        parseName(&source_sect.sectname),
+        parseName(&target_sect.segname),
+        parseName(&target_sect.sectname),
+        offset,
+        offset + size,
+    });
+
+    target_sect.size = offset + size;
+}
+
+fn updateMetadata(self: *Zld, object_id: u16) !void {
+    const object = self.objects.items[object_id];
+    const object_seg = object.load_commands.items[object.segment_cmd_index.?].Segment;
+    const text_seg = &self.load_commands.items[self.text_segment_cmd_index.?].Segment;
+    const data_const_seg = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment;
+    const data_seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment;
+
+    // Create missing metadata
+    for (object_seg.sections.items) |source_sect, id| {
+        if (id == object.text_section_index.?) continue;
+        const segname = parseName(&source_sect.segname);
+        const sectname = parseName(&source_sect.sectname);
+        const flags = source_sect.flags;
+
+        switch (flags) {
+            macho.S_REGULAR, macho.S_4BYTE_LITERALS, macho.S_8BYTE_LITERALS, macho.S_16BYTE_LITERALS => {
+                if (mem.eql(u8, segname, "__TEXT")) {
+                    if (self.text_const_section_index != null) continue;
+
+                    self.text_const_section_index = @intCast(u16, text_seg.sections.items.len);
+                    try text_seg.addSection(self.allocator, .{
+                        .sectname = makeStaticString("__const"),
+                        .segname = makeStaticString("__TEXT"),
+                        .addr = 0,
+                        .size = 0,
+                        .offset = 0,
+                        .@"align" = 0,
+                        .reloff = 0,
+                        .nreloc = 0,
+                        .flags = macho.S_REGULAR,
+                        .reserved1 = 0,
+                        .reserved2 = 0,
+                        .reserved3 = 0,
+                    });
+                } else if (mem.eql(u8, segname, "__DATA")) {
+                    if (!mem.eql(u8, sectname, "__const")) continue;
+                    if (self.data_const_section_index != null) continue;
+
+                    self.data_const_section_index = @intCast(u16, data_const_seg.sections.items.len);
+                    try data_const_seg.addSection(self.allocator, .{
+                        .sectname = makeStaticString("__const"),
+                        .segname = makeStaticString("__DATA_CONST"),
+                        .addr = 0,
+                        .size = 0,
+                        .offset = 0,
+                        .@"align" = 0,
+                        .reloff = 0,
+                        .nreloc = 0,
+                        .flags = macho.S_REGULAR,
+                        .reserved1 = 0,
+                        .reserved2 = 0,
+                        .reserved3 = 0,
+                    });
+                }
+            },
+            macho.S_CSTRING_LITERALS => {
+                if (!mem.eql(u8, segname, "__TEXT")) continue;
+                if (self.cstring_section_index != null) continue;
+
+                self.cstring_section_index = @intCast(u16, text_seg.sections.items.len);
+                try text_seg.addSection(self.allocator, .{
+                    .sectname = makeStaticString("__cstring"),
+                    .segname = makeStaticString("__TEXT"),
+                    .addr = 0,
+                    .size = 0,
+                    .offset = 0,
+                    .@"align" = 0,
+                    .reloff = 0,
+                    .nreloc = 0,
+                    .flags = macho.S_CSTRING_LITERALS,
+                    .reserved1 = 0,
+                    .reserved2 = 0,
+                    .reserved3 = 0,
+                });
+            },
+            macho.S_ZEROFILL => {
+                if (!mem.eql(u8, segname, "__DATA")) continue;
+                if (self.bss_section_index != null) continue;
+
+                self.bss_section_index = @intCast(u16, data_seg.sections.items.len);
+                try data_seg.addSection(self.allocator, .{
+                    .sectname = makeStaticString("__bss"),
+                    .segname = makeStaticString("__DATA"),
+                    .addr = 0,
+                    .size = 0,
+                    .offset = 0,
+                    .@"align" = 0,
+                    .reloff = 0,
+                    .nreloc = 0,
+                    .flags = macho.S_ZEROFILL,
+                    .reserved1 = 0,
+                    .reserved2 = 0,
+                    .reserved3 = 0,
+                });
+            },
+            macho.S_THREAD_LOCAL_VARIABLES => {
+                if (!mem.eql(u8, segname, "__DATA")) continue;
+                if (self.tlv_section_index != null) continue;
+
+                self.tlv_section_index = @intCast(u16, data_seg.sections.items.len);
+                try data_seg.addSection(self.allocator, .{
+                    .sectname = makeStaticString("__thread_vars"),
+                    .segname = makeStaticString("__DATA"),
+                    .addr = 0,
+                    .size = 0,
+                    .offset = 0,
+                    .@"align" = 0,
+                    .reloff = 0,
+                    .nreloc = 0,
+                    .flags = macho.S_THREAD_LOCAL_VARIABLES,
+                    .reserved1 = 0,
+                    .reserved2 = 0,
+                    .reserved3 = 0,
+                });
+            },
+            macho.S_THREAD_LOCAL_REGULAR => {
+                if (!mem.eql(u8, segname, "__DATA")) continue;
+                if (self.tlv_data_section_index != null) continue;
+
+                self.tlv_data_section_index = @intCast(u16, data_seg.sections.items.len);
+                try data_seg.addSection(self.allocator, .{
+                    .sectname = makeStaticString("__thread_data"),
+                    .segname = makeStaticString("__DATA"),
+                    .addr = 0,
+                    .size = 0,
+                    .offset = 0,
+                    .@"align" = 0,
+                    .reloff = 0,
+                    .nreloc = 0,
+                    .flags = macho.S_THREAD_LOCAL_REGULAR,
+                    .reserved1 = 0,
+                    .reserved2 = 0,
+                    .reserved3 = 0,
+                });
+            },
+            macho.S_THREAD_LOCAL_ZEROFILL => {
+                if (!mem.eql(u8, segname, "__DATA")) continue;
+                if (self.tlv_bss_section_index != null) continue;
+
+                self.tlv_bss_section_index = @intCast(u16, data_seg.sections.items.len);
+                try data_seg.addSection(self.allocator, .{
+                    .sectname = makeStaticString("__thread_bss"),
+                    .segname = makeStaticString("__DATA"),
+                    .addr = 0,
+                    .size = 0,
+                    .offset = 0,
+                    .@"align" = 0,
+                    .reloff = 0,
+                    .nreloc = 0,
+                    .flags = macho.S_THREAD_LOCAL_ZEROFILL,
+                    .reserved1 = 0,
+                    .reserved2 = 0,
+                    .reserved3 = 0,
+                });
+            },
+            else => {
+                log.debug("unhandled section type 0x{x} for '{s}/{s}'", .{ flags, segname, sectname });
+            },
+        }
+    }
+
+    // Find ideal section alignment.
+    for (object_seg.sections.items) |source_sect, id| {
+        if (self.getMatchingSection(source_sect)) |res| {
+            const target_seg = &self.load_commands.items[res.seg].Segment;
+            const target_sect = &target_seg.sections.items[res.sect];
+            target_sect.@"align" = math.max(target_sect.@"align", source_sect.@"align");
+        }
+    }
+
+    // Update section mappings
+    for (object_seg.sections.items) |source_sect, id| {
+        const source_sect_id = @intCast(u16, id);
+        if (self.getMatchingSection(source_sect)) |res| {
+            try self.mapAndUpdateSections(object_id, source_sect_id, res.seg, res.sect);
+            continue;
+        }
+
+        const segname = parseName(&source_sect.segname);
+        const sectname = parseName(&source_sect.sectname);
+        log.debug("section '{s}/{s}' will be unmapped", .{ segname, sectname });
+        try self.unhandled_sections.putNoClobber(self.allocator, .{
+            .object_id = object_id,
+            .source_sect_id = source_sect_id,
+        }, 0);
+    }
+}
+
+const MatchingSection = struct {
+    seg: u16,
+    sect: u16,
+};
+
+fn getMatchingSection(self: *Zld, section: macho.section_64) ?MatchingSection {
+    const segname = parseName(&section.segname);
+    const sectname = parseName(&section.sectname);
+    const res: ?MatchingSection = blk: {
+        switch (section.flags) {
+            macho.S_4BYTE_LITERALS, macho.S_8BYTE_LITERALS, macho.S_16BYTE_LITERALS => {
+                break :blk .{
+                    .seg = self.text_segment_cmd_index.?,
+                    .sect = self.text_const_section_index.?,
+                };
+            },
+            macho.S_CSTRING_LITERALS => {
+                break :blk .{
+                    .seg = self.text_segment_cmd_index.?,
+                    .sect = self.cstring_section_index.?,
+                };
+            },
+            macho.S_ZEROFILL => {
+                break :blk .{
+                    .seg = self.data_segment_cmd_index.?,
+                    .sect = self.bss_section_index.?,
+                };
+            },
+            macho.S_THREAD_LOCAL_VARIABLES => {
+                break :blk .{
+                    .seg = self.data_segment_cmd_index.?,
+                    .sect = self.tlv_section_index.?,
+                };
+            },
+            macho.S_THREAD_LOCAL_REGULAR => {
+                break :blk .{
+                    .seg = self.data_segment_cmd_index.?,
+                    .sect = self.tlv_data_section_index.?,
+                };
+            },
+            macho.S_THREAD_LOCAL_ZEROFILL => {
+                break :blk .{
+                    .seg = self.data_segment_cmd_index.?,
+                    .sect = self.tlv_bss_section_index.?,
+                };
+            },
+            macho.S_REGULAR | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS => {
+                break :blk .{
+                    .seg = self.text_segment_cmd_index.?,
+                    .sect = self.text_section_index.?,
+                };
+            },
+            macho.S_REGULAR => {
+                if (mem.eql(u8, segname, "__TEXT")) {
+                    break :blk .{
+                        .seg = self.text_segment_cmd_index.?,
+                        .sect = self.text_const_section_index.?,
+                    };
+                } else if (mem.eql(u8, segname, "__DATA")) {
+                    if (mem.eql(u8, sectname, "__data")) {
+                        break :blk .{
+                            .seg = self.data_segment_cmd_index.?,
+                            .sect = self.data_section_index.?,
+                        };
+                    } else if (mem.eql(u8, sectname, "__const")) {
+                        break :blk .{
+                            .seg = self.data_const_segment_cmd_index.?,
+                            .sect = self.data_const_section_index.?,
+                        };
+                    }
+                }
+                break :blk null;
+            },
+            else => {
+                break :blk null;
+            },
+        }
+    };
+    return res;
+}
+
+fn sortSections(self: *Zld) !void {
+    var text_index_mapping = std.AutoHashMap(u16, u16).init(self.allocator);
+    defer text_index_mapping.deinit();
+    var data_const_index_mapping = std.AutoHashMap(u16, u16).init(self.allocator);
+    defer data_const_index_mapping.deinit();
+    var data_index_mapping = std.AutoHashMap(u16, u16).init(self.allocator);
+    defer data_index_mapping.deinit();
+
+    {
+        // __TEXT segment
+        const seg = &self.load_commands.items[self.text_segment_cmd_index.?].Segment;
+        var sections = seg.sections.toOwnedSlice(self.allocator);
+        defer self.allocator.free(sections);
+        try seg.sections.ensureCapacity(self.allocator, sections.len);
+
+        const indices = &[_]*?u16{
+            &self.text_section_index,
+            &self.stubs_section_index,
+            &self.stub_helper_section_index,
+            &self.text_const_section_index,
+            &self.cstring_section_index,
+        };
+        for (indices) |maybe_index| {
+            const new_index: u16 = if (maybe_index.*) |index| blk: {
+                const idx = @intCast(u16, seg.sections.items.len);
+                seg.sections.appendAssumeCapacity(sections[index]);
+                try text_index_mapping.putNoClobber(index, idx);
+                break :blk idx;
+            } else continue;
+            maybe_index.* = new_index;
+        }
+    }
+
+    {
+        // __DATA_CONST segment
+        const seg = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment;
+        var sections = seg.sections.toOwnedSlice(self.allocator);
+        defer self.allocator.free(sections);
+        try seg.sections.ensureCapacity(self.allocator, sections.len);
+
+        const indices = &[_]*?u16{
+            &self.got_section_index,
+            &self.data_const_section_index,
+        };
+        for (indices) |maybe_index| {
+            const new_index: u16 = if (maybe_index.*) |index| blk: {
+                const idx = @intCast(u16, seg.sections.items.len);
+                seg.sections.appendAssumeCapacity(sections[index]);
+                try data_const_index_mapping.putNoClobber(index, idx);
+                break :blk idx;
+            } else continue;
+            maybe_index.* = new_index;
+        }
+    }
+
+    {
+        // __DATA segment
+        const seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment;
+        var sections = seg.sections.toOwnedSlice(self.allocator);
+        defer self.allocator.free(sections);
+        try seg.sections.ensureCapacity(self.allocator, sections.len);
+
+        // __DATA segment
+        const indices = &[_]*?u16{
+            &self.la_symbol_ptr_section_index,
+            &self.tlv_section_index,
+            &self.data_section_index,
+            &self.tlv_data_section_index,
+            &self.tlv_bss_section_index,
+            &self.bss_section_index,
+        };
+        for (indices) |maybe_index| {
+            const new_index: u16 = if (maybe_index.*) |index| blk: {
+                const idx = @intCast(u16, seg.sections.items.len);
+                seg.sections.appendAssumeCapacity(sections[index]);
+                try data_index_mapping.putNoClobber(index, idx);
+                break :blk idx;
+            } else continue;
+            maybe_index.* = new_index;
+        }
+    }
+
+    var it = self.mappings.iterator();
+    while (it.next()) |entry| {
+        const mapping = &entry.value;
+        if (self.text_segment_cmd_index.? == mapping.target_seg_id) {
+            const new_index = text_index_mapping.get(mapping.target_sect_id) orelse unreachable;
+            mapping.target_sect_id = new_index;
+        } else if (self.data_const_segment_cmd_index.? == mapping.target_seg_id) {
+            const new_index = data_const_index_mapping.get(mapping.target_sect_id) orelse unreachable;
+            mapping.target_sect_id = new_index;
+        } else if (self.data_segment_cmd_index.? == mapping.target_seg_id) {
+            const new_index = data_index_mapping.get(mapping.target_sect_id) orelse unreachable;
+            mapping.target_sect_id = new_index;
+        } else unreachable;
+    }
+}
+
+fn resolveImports(self: *Zld) !void {
+    var imports = std.StringArrayHashMap(bool).init(self.allocator);
+    defer imports.deinit();
+
+    for (self.objects.items) |object| {
+        for (object.symtab.items) |sym| {
+            if (isLocal(&sym)) continue;
+
+            const name = object.getString(sym.n_strx);
+            const res = try imports.getOrPut(name);
+            if (isExport(&sym)) {
+                res.entry.value = false;
+                continue;
+            }
+            if (res.found_existing and !res.entry.value)
+                continue;
+            res.entry.value = true;
+        }
+    }
+
+    for (imports.items()) |entry| {
+        if (!entry.value) continue;
+
+        const sym_name = entry.key;
+        const n_strx = try self.makeString(sym_name);
+        var new_sym: macho.nlist_64 = .{
+            .n_strx = n_strx,
+            .n_type = macho.N_UNDF | macho.N_EXT,
+            .n_value = 0,
+            .n_desc = macho.REFERENCE_FLAG_UNDEFINED_NON_LAZY | macho.N_SYMBOL_RESOLVER,
+            .n_sect = 0,
+        };
+        var key = try self.allocator.dupe(u8, sym_name);
+        // TODO handle symbol resolution from non-libc dylibs.
+        const dylib_ordinal = 1;
+
+        // TODO need to rework this. Perhaps should create a set of all possible libc
+        // symbols which are expected to be nonlazy?
+        if (mem.eql(u8, sym_name, "___stdoutp") or
+            mem.eql(u8, sym_name, "___stderrp") or
+            mem.eql(u8, sym_name, "___stdinp") or
+            mem.eql(u8, sym_name, "___stack_chk_guard") or
+            mem.eql(u8, sym_name, "_environ") or
+            mem.eql(u8, sym_name, "__DefaultRuneLocale") or
+            mem.eql(u8, sym_name, "_mach_task_self_"))
+        {
+            log.debug("writing nonlazy symbol '{s}'", .{sym_name});
+            const index = @intCast(u32, self.nonlazy_imports.items().len);
+            try self.nonlazy_imports.putNoClobber(self.allocator, key, .{
+                .symbol = new_sym,
+                .dylib_ordinal = dylib_ordinal,
+                .index = index,
+            });
+        } else if (mem.eql(u8, sym_name, "__tlv_bootstrap")) {
+            log.debug("writing threadlocal symbol '{s}'", .{sym_name});
+            self.tlv_bootstrap = .{
+                .symbol = new_sym,
+                .dylib_ordinal = dylib_ordinal,
+                .index = 0,
+            };
+        } else {
+            log.debug("writing lazy symbol '{s}'", .{sym_name});
+            const index = @intCast(u32, self.lazy_imports.items().len);
+            try self.lazy_imports.putNoClobber(self.allocator, key, .{
+                .symbol = new_sym,
+                .dylib_ordinal = dylib_ordinal,
+                .index = index,
+            });
+        }
+    }
+
+    const n_strx = try self.makeString("dyld_stub_binder");
+    const name = try self.allocator.dupe(u8, "dyld_stub_binder");
+    log.debug("writing nonlazy symbol 'dyld_stub_binder'", .{});
+    const index = @intCast(u32, self.nonlazy_imports.items().len);
+    try self.nonlazy_imports.putNoClobber(self.allocator, name, .{
+        .symbol = .{
+            .n_strx = n_strx,
+            .n_type = std.macho.N_UNDF | std.macho.N_EXT,
+            .n_sect = 0,
+            .n_desc = std.macho.REFERENCE_FLAG_UNDEFINED_NON_LAZY | std.macho.N_SYMBOL_RESOLVER,
+            .n_value = 0,
+        },
+        .dylib_ordinal = 1,
+        .index = index,
+    });
+}
+
+fn allocateTextSegment(self: *Zld) !void {
+    const seg = &self.load_commands.items[self.text_segment_cmd_index.?].Segment;
+    const nexterns = @intCast(u32, self.lazy_imports.items().len);
+
+    const base_vmaddr = self.load_commands.items[self.pagezero_segment_cmd_index.?].Segment.inner.vmsize;
+    seg.inner.fileoff = 0;
+    seg.inner.vmaddr = base_vmaddr;
+
+    // Set stubs and stub_helper sizes
+    const stubs = &seg.sections.items[self.stubs_section_index.?];
+    const stub_helper = &seg.sections.items[self.stub_helper_section_index.?];
+    stubs.size += nexterns * stubs.reserved2;
+
+    const stub_size: u4 = switch (self.arch.?) {
+        .x86_64 => 10,
+        .aarch64 => 3 * @sizeOf(u32),
+        else => unreachable,
+    };
+    stub_helper.size += nexterns * stub_size;
+
+    var sizeofcmds: u64 = 0;
+    for (self.load_commands.items) |lc| {
+        sizeofcmds += lc.cmdsize();
+    }
+
+    try self.allocateSegment(self.text_segment_cmd_index.?, @sizeOf(macho.mach_header_64) + sizeofcmds);
+
+    // Shift all sections to the back to minimize jump size between __TEXT and __DATA segments.
+    var min_alignment: u32 = 0;
+    for (seg.sections.items) |sect| {
+        const alignment = try math.powi(u32, 2, sect.@"align");
+        min_alignment = math.max(min_alignment, alignment);
+    }
+
+    assert(min_alignment > 0);
+    const last_sect_idx = seg.sections.items.len - 1;
+    const last_sect = seg.sections.items[last_sect_idx];
+    const shift: u32 = blk: {
+        const diff = seg.inner.filesize - last_sect.offset - last_sect.size;
+        const factor = @divTrunc(diff, min_alignment);
+        break :blk @intCast(u32, factor * min_alignment);
+    };
+
+    if (shift > 0) {
+        for (seg.sections.items) |*sect| {
+            sect.offset += shift;
+            sect.addr += shift;
+        }
+    }
+}
+
+fn allocateDataConstSegment(self: *Zld) !void {
+    const seg = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment;
+    const nonlazy = @intCast(u32, self.nonlazy_imports.items().len);
+
+    const text_seg = self.load_commands.items[self.text_segment_cmd_index.?].Segment;
+    seg.inner.fileoff = text_seg.inner.fileoff + text_seg.inner.filesize;
+    seg.inner.vmaddr = text_seg.inner.vmaddr + text_seg.inner.vmsize;
+
+    // Set got size
+    const got = &seg.sections.items[self.got_section_index.?];
+    // TODO this will require scanning the relocations at least one to work out
+    // the exact amount of local GOT indirections. For the time being, set some
+    // default value.
+    got.size += (max_local_got_indirections + nonlazy) * @sizeOf(u64);
+
+    try self.allocateSegment(self.data_const_segment_cmd_index.?, 0);
+}
+
+fn allocateDataSegment(self: *Zld) !void {
+    const seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment;
+    const lazy = @intCast(u32, self.lazy_imports.items().len);
+
+    const data_const_seg = self.load_commands.items[self.data_const_segment_cmd_index.?].Segment;
+    seg.inner.fileoff = data_const_seg.inner.fileoff + data_const_seg.inner.filesize;
+    seg.inner.vmaddr = data_const_seg.inner.vmaddr + data_const_seg.inner.vmsize;
+
+    // Set la_symbol_ptr and data size
+    const la_symbol_ptr = &seg.sections.items[self.la_symbol_ptr_section_index.?];
+    const data = &seg.sections.items[self.data_section_index.?];
+    la_symbol_ptr.size += lazy * @sizeOf(u64);
+    data.size += @sizeOf(u64); // TODO when do we need more?
+
+    try self.allocateSegment(self.data_segment_cmd_index.?, 0);
+}
+
+fn allocateLinkeditSegment(self: *Zld) void {
+    const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment;
+    const data_seg = self.load_commands.items[self.data_segment_cmd_index.?].Segment;
+    seg.inner.fileoff = data_seg.inner.fileoff + data_seg.inner.filesize;
+    seg.inner.vmaddr = data_seg.inner.vmaddr + data_seg.inner.vmsize;
+}
+
+fn allocateSegment(self: *Zld, index: u16, offset: u64) !void {
+    const base_vmaddr = self.load_commands.items[self.pagezero_segment_cmd_index.?].Segment.inner.vmsize;
+    const seg = &self.load_commands.items[index].Segment;
+
+    // Allocate the sections according to their alignment at the beginning of the segment.
+    var start: u64 = offset;
+    for (seg.sections.items) |*sect| {
+        const alignment = try math.powi(u32, 2, sect.@"align");
+        const start_aligned = mem.alignForwardGeneric(u64, start, alignment);
+        const end_aligned = mem.alignForwardGeneric(u64, start_aligned + sect.size, alignment);
+        sect.offset = @intCast(u32, seg.inner.fileoff + start_aligned);
+        sect.addr = seg.inner.vmaddr + start_aligned;
+        start = end_aligned;
+    }
+
+    const seg_size_aligned = mem.alignForwardGeneric(u64, start, self.page_size.?);
+    seg.inner.filesize = seg_size_aligned;
+    seg.inner.vmsize = seg_size_aligned;
+}
+
+fn writeStubHelperCommon(self: *Zld) !void {
+    const text_segment = &self.load_commands.items[self.text_segment_cmd_index.?].Segment;
+    const stub_helper = &text_segment.sections.items[self.stub_helper_section_index.?];
+    const data_const_segment = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment;
+    const got = &data_const_segment.sections.items[self.got_section_index.?];
+    const data_segment = &self.load_commands.items[self.data_segment_cmd_index.?].Segment;
+    const data = &data_segment.sections.items[self.data_section_index.?];
+    const la_symbol_ptr = data_segment.sections.items[self.la_symbol_ptr_section_index.?];
+
+    self.stub_helper_stubs_start_off = blk: {
+        switch (self.arch.?) {
+            .x86_64 => {
+                const code_size = 15;
+                var code: [code_size]u8 = undefined;
+                // lea %r11, [rip + disp]
+                code[0] = 0x4c;
+                code[1] = 0x8d;
+                code[2] = 0x1d;
+                {
+                    const target_addr = data.addr + data.size - @sizeOf(u64);
+                    const displacement = try math.cast(u32, target_addr - stub_helper.addr - 7);
+                    mem.writeIntLittle(u32, code[3..7], displacement);
+                }
+                // push %r11
+                code[7] = 0x41;
+                code[8] = 0x53;
+                // jmp [rip + disp]
+                code[9] = 0xff;
+                code[10] = 0x25;
+                {
+                    const dyld_stub_binder = self.nonlazy_imports.get("dyld_stub_binder").?;
+                    const addr = (got.addr + dyld_stub_binder.index * @sizeOf(u64));
+                    const displacement = try math.cast(u32, addr - stub_helper.addr - code_size);
+                    mem.writeIntLittle(u32, code[11..], displacement);
+                }
+                try self.file.?.pwriteAll(&code, stub_helper.offset);
+                break :blk stub_helper.offset + code_size;
+            },
+            .aarch64 => {
+                var code: [6 * @sizeOf(u32)]u8 = undefined;
+                data_blk_outer: {
+                    const this_addr = stub_helper.addr;
+                    const target_addr = data.addr + data.size - @sizeOf(u64);
+                    data_blk: {
+                        const displacement = math.cast(i21, target_addr - this_addr) catch |_| break :data_blk;
+                        // adr x17, disp
+                        mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.adr(.x17, displacement).toU32());
+                        // nop
+                        mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.nop().toU32());
+                        break :data_blk_outer;
+                    }
+                    data_blk: {
+                        const new_this_addr = this_addr + @sizeOf(u32);
+                        const displacement = math.cast(i21, target_addr - new_this_addr) catch |_| break :data_blk;
+                        // nop
+                        mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.nop().toU32());
+                        // adr x17, disp
+                        mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.adr(.x17, displacement).toU32());
+                        break :data_blk_outer;
+                    }
+                    // Jump is too big, replace adr with adrp and add.
+                    const this_page = @intCast(i32, this_addr >> 12);
+                    const target_page = @intCast(i32, target_addr >> 12);
+                    const pages = @intCast(i21, target_page - this_page);
+                    mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.adrp(.x17, pages).toU32());
+                    const narrowed = @truncate(u12, target_addr);
+                    mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.add(.x17, .x17, narrowed, false).toU32());
+                }
+                // stp x16, x17, [sp, #-16]!
+                code[8] = 0xf0;
+                code[9] = 0x47;
+                code[10] = 0xbf;
+                code[11] = 0xa9;
+                binder_blk_outer: {
+                    const dyld_stub_binder = self.nonlazy_imports.get("dyld_stub_binder").?;
+                    const this_addr = stub_helper.addr + 3 * @sizeOf(u32);
+                    const target_addr = (got.addr + dyld_stub_binder.index * @sizeOf(u64));
+                    binder_blk: {
+                        const displacement = math.divExact(u64, target_addr - this_addr, 4) catch |_| break :binder_blk;
+                        const literal = math.cast(u18, displacement) catch |_| break :binder_blk;
+                        // ldr x16, label
+                        mem.writeIntLittle(u32, code[12..16], aarch64.Instruction.ldr(.x16, .{
+                            .literal = literal,
+                        }).toU32());
+                        // nop
+                        mem.writeIntLittle(u32, code[16..20], aarch64.Instruction.nop().toU32());
+                        break :binder_blk_outer;
+                    }
+                    binder_blk: {
+                        const new_this_addr = this_addr + @sizeOf(u32);
+                        const displacement = math.divExact(u64, target_addr - new_this_addr, 4) catch |_| break :binder_blk;
+                        const literal = math.cast(u18, displacement) catch |_| break :binder_blk;
+                        log.debug("2: disp=0x{x}, literal=0x{x}", .{ displacement, literal });
+                        // Pad with nop to please division.
+                        // nop
+                        mem.writeIntLittle(u32, code[12..16], aarch64.Instruction.nop().toU32());
+                        // ldr x16, label
+                        mem.writeIntLittle(u32, code[16..20], aarch64.Instruction.ldr(.x16, .{
+                            .literal = literal,
+                        }).toU32());
+                        break :binder_blk_outer;
+                    }
+                    // Use adrp followed by ldr(immediate).
+                    const this_page = @intCast(i32, this_addr >> 12);
+                    const target_page = @intCast(i32, target_addr >> 12);
+                    const pages = @intCast(i21, target_page - this_page);
+                    mem.writeIntLittle(u32, code[12..16], aarch64.Instruction.adrp(.x16, pages).toU32());
+                    const narrowed = @truncate(u12, target_addr);
+                    const offset = try math.divExact(u12, narrowed, 8);
+                    mem.writeIntLittle(u32, code[16..20], aarch64.Instruction.ldr(.x16, .{
+                        .register = .{
+                            .rn = .x16,
+                            .offset = aarch64.Instruction.LoadStoreOffset.imm(offset),
+                        },
+                    }).toU32());
+                }
+                // br x16
+                code[20] = 0x00;
+                code[21] = 0x02;
+                code[22] = 0x1f;
+                code[23] = 0xd6;
+                try self.file.?.pwriteAll(&code, stub_helper.offset);
+                break :blk stub_helper.offset + 6 * @sizeOf(u32);
+            },
+            else => unreachable,
+        }
+    };
+
+    for (self.lazy_imports.items()) |_, i| {
+        const index = @intCast(u32, i);
+        try self.writeLazySymbolPointer(index);
+        try self.writeStub(index);
+        try self.writeStubInStubHelper(index);
+    }
+}
+
+fn writeLazySymbolPointer(self: *Zld, index: u32) !void {
+    const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment;
+    const stub_helper = text_segment.sections.items[self.stub_helper_section_index.?];
+    const data_segment = self.load_commands.items[self.data_segment_cmd_index.?].Segment;
+    const la_symbol_ptr = data_segment.sections.items[self.la_symbol_ptr_section_index.?];
+
+    const stub_size: u4 = switch (self.arch.?) {
+        .x86_64 => 10,
+        .aarch64 => 3 * @sizeOf(u32),
+        else => unreachable,
+    };
+    const stub_off = self.stub_helper_stubs_start_off.? + index * stub_size;
+    const end = stub_helper.addr + stub_off - stub_helper.offset;
+    var buf: [@sizeOf(u64)]u8 = undefined;
+    mem.writeIntLittle(u64, &buf, end);
+    const off = la_symbol_ptr.offset + index * @sizeOf(u64);
+    log.debug("writing lazy symbol pointer entry 0x{x} at 0x{x}", .{ end, off });
+    try self.file.?.pwriteAll(&buf, off);
+}
+
+fn writeStub(self: *Zld, index: u32) !void {
+    const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment;
+    const stubs = text_segment.sections.items[self.stubs_section_index.?];
+    const data_segment = self.load_commands.items[self.data_segment_cmd_index.?].Segment;
+    const la_symbol_ptr = data_segment.sections.items[self.la_symbol_ptr_section_index.?];
+
+    const stub_off = stubs.offset + index * stubs.reserved2;
+    const stub_addr = stubs.addr + index * stubs.reserved2;
+    const la_ptr_addr = la_symbol_ptr.addr + index * @sizeOf(u64);
+    log.debug("writing stub at 0x{x}", .{stub_off});
+    var code = try self.allocator.alloc(u8, stubs.reserved2);
+    defer self.allocator.free(code);
+    switch (self.arch.?) {
+        .x86_64 => {
+            assert(la_ptr_addr >= stub_addr + stubs.reserved2);
+            const displacement = try math.cast(u32, la_ptr_addr - stub_addr - stubs.reserved2);
+            // jmp
+            code[0] = 0xff;
+            code[1] = 0x25;
+            mem.writeIntLittle(u32, code[2..][0..4], displacement);
+        },
+        .aarch64 => {
+            assert(la_ptr_addr >= stub_addr);
+            outer: {
+                const this_addr = stub_addr;
+                const target_addr = la_ptr_addr;
+                inner: {
+                    const displacement = math.divExact(u64, target_addr - this_addr, 4) catch |_| break :inner;
+                    const literal = math.cast(u18, displacement) catch |_| break :inner;
+                    // ldr x16, literal
+                    mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.ldr(.x16, .{
+                        .literal = literal,
+                    }).toU32());
+                    // nop
+                    mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.nop().toU32());
+                    break :outer;
+                }
+                inner: {
+                    const new_this_addr = this_addr + @sizeOf(u32);
+                    const displacement = math.divExact(u64, target_addr - new_this_addr, 4) catch |_| break :inner;
+                    const literal = math.cast(u18, displacement) catch |_| break :inner;
+                    // nop
+                    mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.nop().toU32());
+                    // ldr x16, literal
+                    mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.ldr(.x16, .{
+                        .literal = literal,
+                    }).toU32());
+                    break :outer;
+                }
+                // Use adrp followed by ldr(immediate).
+                const this_page = @intCast(i32, this_addr >> 12);
+                const target_page = @intCast(i32, target_addr >> 12);
+                const pages = @intCast(i21, target_page - this_page);
+                mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.adrp(.x16, pages).toU32());
+                const narrowed = @truncate(u12, target_addr);
+                const offset = try math.divExact(u12, narrowed, 8);
+                mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.ldr(.x16, .{
+                    .register = .{
+                        .rn = .x16,
+                        .offset = aarch64.Instruction.LoadStoreOffset.imm(offset),
+                    },
+                }).toU32());
+            }
+            // br x16
+            mem.writeIntLittle(u32, code[8..12], aarch64.Instruction.br(.x16).toU32());
+        },
+        else => unreachable,
+    }
+    try self.file.?.pwriteAll(code, stub_off);
+}
+
+fn writeStubInStubHelper(self: *Zld, index: u32) !void {
+    const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment;
+    const stub_helper = text_segment.sections.items[self.stub_helper_section_index.?];
+
+    const stub_size: u4 = switch (self.arch.?) {
+        .x86_64 => 10,
+        .aarch64 => 3 * @sizeOf(u32),
+        else => unreachable,
+    };
+    const stub_off = self.stub_helper_stubs_start_off.? + index * stub_size;
+    var code = try self.allocator.alloc(u8, stub_size);
+    defer self.allocator.free(code);
+    switch (self.arch.?) {
+        .x86_64 => {
+            const displacement = try math.cast(
+                i32,
+                @intCast(i64, stub_helper.offset) - @intCast(i64, stub_off) - stub_size,
+            );
+            // pushq
+            code[0] = 0x68;
+            mem.writeIntLittle(u32, code[1..][0..4], 0x0); // Just a placeholder populated in `populateLazyBindOffsetsInStubHelper`.
+            // jmpq
+            code[5] = 0xe9;
+            mem.writeIntLittle(u32, code[6..][0..4], @bitCast(u32, displacement));
+        },
+        .aarch64 => {
+            const displacement = try math.cast(i28, @intCast(i64, stub_helper.offset) - @intCast(i64, stub_off) - 4);
+            const literal = @divExact(stub_size - @sizeOf(u32), 4);
+            // ldr w16, literal
+            mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.ldr(.w16, .{
+                .literal = literal,
+            }).toU32());
+            // b disp
+            mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.b(displacement).toU32());
+            mem.writeIntLittle(u32, code[8..12], 0x0); // Just a placeholder populated in `populateLazyBindOffsetsInStubHelper`.
+        },
+        else => unreachable,
+    }
+    try self.file.?.pwriteAll(code, stub_off);
+}
+
+fn resolveSymbols(self: *Zld) !void {
+    for (self.objects.items) |object, object_id| {
+        const seg = object.load_commands.items[object.segment_cmd_index.?].Segment;
+        log.debug("\n\n", .{});
+        log.debug("resolving symbols in {s}", .{object.name});
+
+        for (object.symtab.items) |sym| {
+            if (isImport(&sym)) continue;
+
+            const sym_name = object.getString(sym.n_strx);
+            const out_name = try self.allocator.dupe(u8, sym_name);
+            const locs = try self.locals.getOrPut(self.allocator, out_name);
+            defer {
+                if (locs.found_existing) self.allocator.free(out_name);
+            }
+
+            if (!locs.found_existing) {
+                locs.entry.value = .{};
+            }
+
+            const tt: Symbol.Type = blk: {
+                if (isLocal(&sym)) {
+                    break :blk .Local;
+                } else if (isWeakDef(&sym)) {
+                    break :blk .WeakGlobal;
+                } else {
+                    break :blk .Global;
+                }
+            };
+            if (tt == .Global) {
+                for (locs.entry.value.items) |ss| {
+                    if (ss.tt == .Global) {
+                        log.debug("symbol already defined '{s}'", .{sym_name});
+                        continue;
+                        // log.err("symbol '{s}' defined multiple times: {}", .{ sym_name, sym });
+                        // return error.MultipleSymbolDefinitions;
+                    }
+                }
+            }
+
+            const source_sect_id = sym.n_sect - 1;
+            const target_mapping = self.mappings.get(.{
+                .object_id = @intCast(u16, object_id),
+                .source_sect_id = source_sect_id,
+            }) orelse {
+                if (self.unhandled_sections.get(.{
+                    .object_id = @intCast(u16, object_id),
+                    .source_sect_id = source_sect_id,
+                }) != null) continue;
+
+                log.err("section not mapped for symbol '{s}': {}", .{ sym_name, sym });
+                return error.SectionNotMappedForSymbol;
+            };
+            const source_sect = seg.sections.items[source_sect_id];
+            const target_seg = self.load_commands.items[target_mapping.target_seg_id].Segment;
+            const target_sect = target_seg.sections.items[target_mapping.target_sect_id];
+            const target_addr = target_sect.addr + target_mapping.offset;
+            const n_value = sym.n_value - source_sect.addr + target_addr;
+
+            log.debug("resolving '{s}':{} as {s} symbol at 0x{x}", .{ sym_name, sym, tt, n_value });
+
+            // TODO there might be a more generic way of doing this.
+            var n_sect: u16 = 0;
+            for (self.load_commands.items) |cmd, cmd_id| {
+                if (cmd != .Segment) break;
+                if (cmd_id == target_mapping.target_seg_id) {
+                    n_sect += target_mapping.target_sect_id + 1;
+                    break;
+                }
+                n_sect += @intCast(u16, cmd.Segment.sections.items.len);
+            }
+
+            const n_strx = try self.makeString(sym_name);
+            try locs.entry.value.append(self.allocator, .{
+                .inner = .{
+                    .n_strx = n_strx,
+                    .n_value = n_value,
+                    .n_type = macho.N_SECT,
+                    .n_desc = sym.n_desc,
+                    .n_sect = @intCast(u8, n_sect),
+                },
+                .tt = tt,
+                .object_id = @intCast(u16, object_id),
+            });
+        }
+    }
+}
+
+fn doRelocs(self: *Zld) !void {
+    for (self.objects.items) |object, object_id| {
+        log.debug("\n\n", .{});
+        log.debug("relocating object {s}", .{object.name});
+
+        const seg = object.load_commands.items[object.segment_cmd_index.?].Segment;
+
+        for (seg.sections.items) |sect, source_sect_id| {
+            const segname = parseName(&sect.segname);
+            const sectname = parseName(&sect.sectname);
+
+            var code = try self.allocator.alloc(u8, sect.size);
+            _ = try object.file.preadAll(code, sect.offset);
+            defer self.allocator.free(code);
+
+            // Parse relocs (if any)
+            var raw_relocs = try self.allocator.alloc(u8, @sizeOf(macho.relocation_info) * sect.nreloc);
+            defer self.allocator.free(raw_relocs);
+            _ = try object.file.preadAll(raw_relocs, sect.reloff);
+            const relocs = mem.bytesAsSlice(macho.relocation_info, raw_relocs);
+
+            // Get mapping
+            const target_mapping = self.mappings.get(.{
+                .object_id = @intCast(u16, object_id),
+                .source_sect_id = @intCast(u16, source_sect_id),
+            }) orelse {
+                log.debug("no mapping for {s},{s}; skipping", .{ segname, sectname });
+                continue;
+            };
+            const target_seg = self.load_commands.items[target_mapping.target_seg_id].Segment;
+            const target_sect = target_seg.sections.items[target_mapping.target_sect_id];
+            const target_sect_addr = target_sect.addr + target_mapping.offset;
+            const target_sect_off = target_sect.offset + target_mapping.offset;
+
+            var addend: ?u64 = null;
+            var sub: ?i64 = null;
+
+            for (relocs) |rel| {
+                const off = @intCast(u32, rel.r_address);
+                const this_addr = target_sect_addr + off;
+
+                switch (self.arch.?) {
+                    .aarch64 => {
+                        const rel_type = @intToEnum(macho.reloc_type_arm64, rel.r_type);
+                        log.debug("{s}", .{rel_type});
+                        log.debug("    | source address 0x{x}", .{this_addr});
+                        log.debug("    | offset 0x{x}", .{off});
+
+                        if (rel_type == .ARM64_RELOC_ADDEND) {
+                            addend = rel.r_symbolnum;
+                            log.debug("    | calculated addend = 0x{x}", .{addend});
+                            // TODO followed by either PAGE21 or PAGEOFF12 only.
+                            continue;
+                        }
+                    },
+                    .x86_64 => {
+                        const rel_type = @intToEnum(macho.reloc_type_x86_64, rel.r_type);
+                        log.debug("{s}", .{rel_type});
+                        log.debug("    | source address 0x{x}", .{this_addr});
+                        log.debug("    | offset 0x{x}", .{off});
+                    },
+                    else => {},
+                }
+
+                const target_addr = try self.relocTargetAddr(@intCast(u16, object_id), rel);
+                log.debug("    | target address 0x{x}", .{target_addr});
+                if (rel.r_extern == 1) {
+                    const target_symname = object.getString(object.symtab.items[rel.r_symbolnum].n_strx);
+                    log.debug("    | target symbol '{s}'", .{target_symname});
+                } else {
+                    const target_sectname = seg.sections.items[rel.r_symbolnum - 1].sectname;
+                    log.debug("    | target section '{s}'", .{parseName(&target_sectname)});
+                }
+
+                switch (self.arch.?) {
+                    .x86_64 => {
+                        const rel_type = @intToEnum(macho.reloc_type_x86_64, rel.r_type);
+
+                        switch (rel_type) {
+                            .X86_64_RELOC_BRANCH => {
+                                assert(rel.r_length == 2);
+                                const inst = code[off..][0..4];
+                                const displacement = @bitCast(u32, @intCast(i32, @intCast(i64, target_addr) - @intCast(i64, this_addr) - 4));
+                                mem.writeIntLittle(u32, inst, displacement);
+                            },
+                            .X86_64_RELOC_GOT_LOAD => {
+                                assert(rel.r_length == 2);
+                                const inst = code[off..][0..4];
+                                const displacement = @bitCast(u32, @intCast(i32, @intCast(i64, target_addr) - @intCast(i64, this_addr) - 4));
+
+                                blk: {
+                                    const data_const_seg = self.load_commands.items[self.data_const_segment_cmd_index.?].Segment;
+                                    const got = data_const_seg.sections.items[self.got_section_index.?];
+                                    if (got.addr <= target_addr and target_addr < got.addr + got.size) break :blk;
+                                    log.debug("    | rewriting to leaq", .{});
+                                    code[off - 2] = 0x8d;
+                                }
+
+                                mem.writeIntLittle(u32, inst, displacement);
+                            },
+                            .X86_64_RELOC_GOT => {
+                                assert(rel.r_length == 2);
+                                // TODO Instead of referring to the target symbol directly, we refer to it
+                                // indirectly via GOT. Getting actual target address should be done in the
+                                // helper relocTargetAddr function rather than here.
+                                const sym = object.symtab.items[rel.r_symbolnum];
+                                const sym_name = try self.allocator.dupe(u8, object.getString(sym.n_strx));
+                                const res = try self.nonlazy_pointers.getOrPut(self.allocator, sym_name);
+                                defer if (res.found_existing) self.allocator.free(sym_name);
+
+                                const data_const_seg = self.load_commands.items[self.data_const_segment_cmd_index.?].Segment;
+                                const got = data_const_seg.sections.items[self.got_section_index.?];
+
+                                if (!res.found_existing) {
+                                    const index = @intCast(u32, self.nonlazy_pointers.items().len) - 1;
+                                    assert(index < max_local_got_indirections); // TODO This is just a temp solution.
+                                    res.entry.value = .{
+                                        .index = index,
+                                        .target_addr = target_addr,
+                                    };
+                                    var buf: [@sizeOf(u64)]u8 = undefined;
+                                    mem.writeIntLittle(u64, &buf, target_addr);
+                                    const got_offset = got.offset + (index + self.nonlazy_imports.items().len) * @sizeOf(u64);
+
+                                    log.debug("    | GOT off 0x{x}", .{got.offset});
+                                    log.debug("    | writing GOT entry 0x{x} at 0x{x}", .{ target_addr, got_offset });
+
+                                    try self.file.?.pwriteAll(&buf, got_offset);
+                                }
+
+                                const index = res.entry.value.index + self.nonlazy_imports.items().len;
+                                const actual_target_addr = got.addr + index * @sizeOf(u64);
+
+                                log.debug("    | GOT addr 0x{x}", .{got.addr});
+                                log.debug("    | actual target address in GOT 0x{x}", .{actual_target_addr});
+
+                                const inst = code[off..][0..4];
+                                const displacement = @bitCast(u32, @intCast(i32, @intCast(i64, actual_target_addr) - @intCast(i64, this_addr) - 4));
+                                mem.writeIntLittle(u32, inst, displacement);
+                            },
+                            .X86_64_RELOC_TLV => {
+                                assert(rel.r_length == 2);
+                                // We need to rewrite the opcode from movq to leaq.
+                                code[off - 2] = 0x8d;
+                                // Add displacement.
+                                const inst = code[off..][0..4];
+                                const displacement = @bitCast(u32, @intCast(i32, @intCast(i64, target_addr) - @intCast(i64, this_addr) - 4));
+                                mem.writeIntLittle(u32, inst, displacement);
+                            },
+                            .X86_64_RELOC_SIGNED,
+                            .X86_64_RELOC_SIGNED_1,
+                            .X86_64_RELOC_SIGNED_2,
+                            .X86_64_RELOC_SIGNED_4,
+                            => {
+                                assert(rel.r_length == 2);
+                                const inst = code[off..][0..4];
+                                const offset = @intCast(i64, mem.readIntLittle(i32, inst));
+                                log.debug("    | calculated addend 0x{x}", .{offset});
+                                const actual_target_addr = blk: {
+                                    if (rel.r_extern == 1) {
+                                        break :blk @intCast(i64, target_addr) + offset;
+                                    } else {
+                                        const correction: i4 = switch (rel_type) {
+                                            .X86_64_RELOC_SIGNED => 0,
+                                            .X86_64_RELOC_SIGNED_1 => 1,
+                                            .X86_64_RELOC_SIGNED_2 => 2,
+                                            .X86_64_RELOC_SIGNED_4 => 4,
+                                            else => unreachable,
+                                        };
+                                        log.debug("    | calculated correction 0x{x}", .{correction});
+
+                                        // The value encoded in the instruction is a displacement - 4 - correction.
+                                        // To obtain the adjusted target address in the final binary, we need
+                                        // calculate the original target address within the object file, establish
+                                        // what the offset from the original target section was, and apply this
+                                        // offset to the resultant target section with this relocated binary.
+                                        const orig_sect_id = @intCast(u16, rel.r_symbolnum - 1);
+                                        const target_map = self.mappings.get(.{
+                                            .object_id = @intCast(u16, object_id),
+                                            .source_sect_id = orig_sect_id,
+                                        }) orelse unreachable;
+                                        const orig_seg = object.load_commands.items[object.segment_cmd_index.?].Segment;
+                                        const orig_sect = orig_seg.sections.items[orig_sect_id];
+                                        const orig_offset = off + offset + 4 + correction - @intCast(i64, orig_sect.addr);
+                                        log.debug("    | original offset 0x{x}", .{orig_offset});
+                                        const adjusted = @intCast(i64, target_addr) + orig_offset;
+                                        log.debug("    | adjusted target address 0x{x}", .{adjusted});
+                                        break :blk adjusted - correction;
+                                    }
+                                };
+                                const result = actual_target_addr - @intCast(i64, this_addr) - 4;
+                                const displacement = @bitCast(u32, @intCast(i32, result));
+                                mem.writeIntLittle(u32, inst, displacement);
+                            },
+                            .X86_64_RELOC_SUBTRACTOR => {
+                                sub = @intCast(i64, target_addr);
+                            },
+                            .X86_64_RELOC_UNSIGNED => {
+                                switch (rel.r_length) {
+                                    3 => {
+                                        const inst = code[off..][0..8];
+                                        const offset = mem.readIntLittle(i64, inst);
+
+                                        const result = outer: {
+                                            if (rel.r_extern == 1) {
+                                                log.debug("    | calculated addend 0x{x}", .{offset});
+                                                if (sub) |s| {
+                                                    break :outer @intCast(i64, target_addr) - s + offset;
+                                                } else {
+                                                    break :outer @intCast(i64, target_addr) + offset;
+                                                }
+                                            } else {
+                                                // The value encoded in the instruction is an absolute offset
+                                                // from the start of MachO header to the target address in the
+                                                // object file. To extract the address, we calculate the offset from
+                                                // the beginning of the source section to the address, and apply it to
+                                                // the target address value.
+                                                const orig_sect_id = @intCast(u16, rel.r_symbolnum - 1);
+                                                const target_map = self.mappings.get(.{
+                                                    .object_id = @intCast(u16, object_id),
+                                                    .source_sect_id = orig_sect_id,
+                                                }) orelse unreachable;
+                                                const orig_seg = object.load_commands.items[object.segment_cmd_index.?].Segment;
+                                                const orig_sect = orig_seg.sections.items[orig_sect_id];
+                                                const orig_offset = offset - @intCast(i64, orig_sect.addr);
+                                                const actual_target_addr = inner: {
+                                                    if (sub) |s| {
+                                                        break :inner @intCast(i64, target_addr) - s + orig_offset;
+                                                    } else {
+                                                        break :inner @intCast(i64, target_addr) + orig_offset;
+                                                    }
+                                                };
+                                                log.debug("    | adjusted target address 0x{x}", .{actual_target_addr});
+                                                break :outer actual_target_addr;
+                                            }
+                                        };
+                                        mem.writeIntLittle(u64, inst, @bitCast(u64, result));
+                                        sub = null;
+
+                                        rebases: {
+                                            var hit: bool = false;
+                                            if (target_mapping.target_seg_id == self.data_segment_cmd_index.?) {
+                                                if (self.data_section_index) |index| {
+                                                    if (index == target_mapping.target_sect_id) hit = true;
+                                                }
+                                            }
+                                            if (target_mapping.target_seg_id == self.data_const_segment_cmd_index.?) {
+                                                if (self.data_const_section_index) |index| {
+                                                    if (index == target_mapping.target_sect_id) hit = true;
+                                                }
+                                            }
+
+                                            if (!hit) break :rebases;
+
+                                            try self.local_rebases.append(self.allocator, .{
+                                                .offset = this_addr - target_seg.inner.vmaddr,
+                                                .segment_id = target_mapping.target_seg_id,
+                                            });
+                                        }
+                                        // TLV is handled via a separate offset mechanism.
+                                        // Calculate the offset to the initializer.
+                                        if (target_sect.flags == macho.S_THREAD_LOCAL_VARIABLES) tlv: {
+                                            assert(rel.r_extern == 1);
+                                            const sym = object.symtab.items[rel.r_symbolnum];
+                                            if (isImport(&sym)) break :tlv;
+
+                                            const base_addr = blk: {
+                                                if (self.tlv_data_section_index) |index| {
+                                                    const tlv_data = target_seg.sections.items[index];
+                                                    break :blk tlv_data.addr;
+                                                } else {
+                                                    const tlv_bss = target_seg.sections.items[self.tlv_bss_section_index.?];
+                                                    break :blk tlv_bss.addr;
+                                                }
+                                            };
+                                            // Since we require TLV data to always preceed TLV bss section, we calculate
+                                            // offsets wrt to the former if it is defined; otherwise, wrt to the latter.
+                                            try self.threadlocal_offsets.append(self.allocator, target_addr - base_addr);
+                                        }
+                                    },
+                                    2 => {
+                                        const inst = code[off..][0..4];
+                                        const offset = mem.readIntLittle(i32, inst);
+                                        log.debug("    | calculated addend 0x{x}", .{offset});
+                                        const result = if (sub) |s|
+                                            @intCast(i64, target_addr) - s + offset
+                                        else
+                                            @intCast(i64, target_addr) + offset;
+                                        mem.writeIntLittle(u32, inst, @truncate(u32, @bitCast(u64, result)));
+                                        sub = null;
+                                    },
+                                    else => |len| {
+                                        log.err("unexpected relocation length 0x{x}", .{len});
+                                        return error.UnexpectedRelocationLength;
+                                    },
+                                }
+                            },
+                        }
+                    },
+                    .aarch64 => {
+                        const rel_type = @intToEnum(macho.reloc_type_arm64, rel.r_type);
+
+                        switch (rel_type) {
+                            .ARM64_RELOC_BRANCH26 => {
+                                assert(rel.r_length == 2);
+                                const inst = code[off..][0..4];
+                                const displacement = @intCast(
+                                    i28,
+                                    @intCast(i64, target_addr) - @intCast(i64, this_addr),
+                                );
+                                var parsed = mem.bytesAsValue(
+                                    meta.TagPayload(
+                                        aarch64.Instruction,
+                                        aarch64.Instruction.UnconditionalBranchImmediate,
+                                    ),
+                                    inst,
+                                );
+                                parsed.imm26 = @truncate(u26, @bitCast(u28, displacement) >> 2);
+                            },
+                            .ARM64_RELOC_PAGE21,
+                            .ARM64_RELOC_GOT_LOAD_PAGE21,
+                            .ARM64_RELOC_TLVP_LOAD_PAGE21,
+                            => {
+                                assert(rel.r_length == 2);
+                                const inst = code[off..][0..4];
+                                const ta = if (addend) |a| target_addr + a else target_addr;
+                                const this_page = @intCast(i32, this_addr >> 12);
+                                const target_page = @intCast(i32, ta >> 12);
+                                const pages = @bitCast(u21, @intCast(i21, target_page - this_page));
+                                log.debug("    | moving by {} pages", .{pages});
+                                var parsed = mem.bytesAsValue(
+                                    meta.TagPayload(
+                                        aarch64.Instruction,
+                                        aarch64.Instruction.PCRelativeAddress,
+                                    ),
+                                    inst,
+                                );
+                                parsed.immhi = @truncate(u19, pages >> 2);
+                                parsed.immlo = @truncate(u2, pages);
+                                addend = null;
+                            },
+                            .ARM64_RELOC_PAGEOFF12,
+                            .ARM64_RELOC_GOT_LOAD_PAGEOFF12,
+                            => {
+                                const inst = code[off..][0..4];
+                                if (aarch64IsArithmetic(inst)) {
+                                    log.debug("    | detected ADD opcode", .{});
+                                    // add
+                                    var parsed = mem.bytesAsValue(
+                                        meta.TagPayload(
+                                            aarch64.Instruction,
+                                            aarch64.Instruction.AddSubtractImmediate,
+                                        ),
+                                        inst,
+                                    );
+                                    const ta = if (addend) |a| target_addr + a else target_addr;
+                                    const narrowed = @truncate(u12, ta);
+                                    parsed.imm12 = narrowed;
+                                } else {
+                                    log.debug("    | detected LDR/STR opcode", .{});
+                                    // ldr/str
+                                    var parsed = mem.bytesAsValue(
+                                        meta.TagPayload(
+                                            aarch64.Instruction,
+                                            aarch64.Instruction.LoadStoreRegister,
+                                        ),
+                                        inst,
+                                    );
+
+                                    const ta = if (addend) |a| target_addr + a else target_addr;
+                                    const narrowed = @truncate(u12, ta);
+                                    log.debug("    | narrowed 0x{x}", .{narrowed});
+                                    log.debug("    | parsed.size 0x{x}", .{parsed.size});
+
+                                    if (rel_type == .ARM64_RELOC_GOT_LOAD_PAGEOFF12) blk: {
+                                        const data_const_seg = self.load_commands.items[self.data_const_segment_cmd_index.?].Segment;
+                                        const got = data_const_seg.sections.items[self.got_section_index.?];
+                                        if (got.addr <= target_addr and target_addr < got.addr + got.size) break :blk;
+
+                                        log.debug("    | rewriting to add", .{});
+                                        mem.writeIntLittle(u32, inst, aarch64.Instruction.add(
+                                            @intToEnum(aarch64.Register, parsed.rt),
+                                            @intToEnum(aarch64.Register, parsed.rn),
+                                            narrowed,
+                                            false,
+                                        ).toU32());
+                                        addend = null;
+                                        continue;
+                                    }
+
+                                    const offset: u12 = blk: {
+                                        if (parsed.size == 0) {
+                                            if (parsed.v == 1) {
+                                                // 128-bit SIMD is scaled by 16.
+                                                break :blk try math.divExact(u12, narrowed, 16);
+                                            }
+                                            // Otherwise, 8-bit SIMD or ldrb.
+                                            break :blk narrowed;
+                                        } else {
+                                            const denom: u4 = try math.powi(u4, 2, parsed.size);
+                                            break :blk try math.divExact(u12, narrowed, denom);
+                                        }
+                                    };
+                                    parsed.offset = offset;
+                                }
+                                addend = null;
+                            },
+                            .ARM64_RELOC_TLVP_LOAD_PAGEOFF12 => {
+                                const RegInfo = struct {
+                                    rd: u5,
+                                    rn: u5,
+                                    size: u1,
+                                };
+                                const inst = code[off..][0..4];
+                                const parsed: RegInfo = blk: {
+                                    if (aarch64IsArithmetic(inst)) {
+                                        const curr = mem.bytesAsValue(
+                                            meta.TagPayload(
+                                                aarch64.Instruction,
+                                                aarch64.Instruction.AddSubtractImmediate,
+                                            ),
+                                            inst,
+                                        );
+                                        break :blk .{ .rd = curr.rd, .rn = curr.rn, .size = curr.sf };
+                                    } else {
+                                        const curr = mem.bytesAsValue(
+                                            meta.TagPayload(
+                                                aarch64.Instruction,
+                                                aarch64.Instruction.LoadStoreRegister,
+                                            ),
+                                            inst,
+                                        );
+                                        break :blk .{ .rd = curr.rt, .rn = curr.rn, .size = @truncate(u1, curr.size) };
+                                    }
+                                };
+                                const ta = if (addend) |a| target_addr + a else target_addr;
+                                const narrowed = @truncate(u12, ta);
+                                log.debug("    | rewriting TLV access to ADD opcode", .{});
+                                // For TLV, we always generate an add instruction.
+                                mem.writeIntLittle(u32, inst, aarch64.Instruction.add(
+                                    @intToEnum(aarch64.Register, parsed.rd),
+                                    @intToEnum(aarch64.Register, parsed.rn),
+                                    narrowed,
+                                    false,
+                                ).toU32());
+                            },
+                            .ARM64_RELOC_SUBTRACTOR => {
+                                sub = @intCast(i64, target_addr);
+                            },
+                            .ARM64_RELOC_UNSIGNED => {
+                                switch (rel.r_length) {
+                                    3 => {
+                                        const inst = code[off..][0..8];
+                                        const offset = mem.readIntLittle(i64, inst);
+                                        log.debug("    | calculated addend 0x{x}", .{offset});
+                                        const result = if (sub) |s|
+                                            @intCast(i64, target_addr) - s + offset
+                                        else
+                                            @intCast(i64, target_addr) + offset;
+                                        mem.writeIntLittle(u64, inst, @bitCast(u64, result));
+                                        sub = null;
+
+                                        rebases: {
+                                            var hit: bool = false;
+                                            if (target_mapping.target_seg_id == self.data_segment_cmd_index.?) {
+                                                if (self.data_section_index) |index| {
+                                                    if (index == target_mapping.target_sect_id) hit = true;
+                                                }
+                                            }
+                                            if (target_mapping.target_seg_id == self.data_const_segment_cmd_index.?) {
+                                                if (self.data_const_section_index) |index| {
+                                                    if (index == target_mapping.target_sect_id) hit = true;
+                                                }
+                                            }
+
+                                            if (!hit) break :rebases;
+
+                                            try self.local_rebases.append(self.allocator, .{
+                                                .offset = this_addr - target_seg.inner.vmaddr,
+                                                .segment_id = target_mapping.target_seg_id,
+                                            });
+                                        }
+                                        // TLV is handled via a separate offset mechanism.
+                                        // Calculate the offset to the initializer.
+                                        if (target_sect.flags == macho.S_THREAD_LOCAL_VARIABLES) tlv: {
+                                            assert(rel.r_extern == 1);
+                                            const sym = object.symtab.items[rel.r_symbolnum];
+                                            if (isImport(&sym)) break :tlv;
+
+                                            const base_addr = blk: {
+                                                if (self.tlv_data_section_index) |index| {
+                                                    const tlv_data = target_seg.sections.items[index];
+                                                    break :blk tlv_data.addr;
+                                                } else {
+                                                    const tlv_bss = target_seg.sections.items[self.tlv_bss_section_index.?];
+                                                    break :blk tlv_bss.addr;
+                                                }
+                                            };
+                                            // Since we require TLV data to always preceed TLV bss section, we calculate
+                                            // offsets wrt to the former if it is defined; otherwise, wrt to the latter.
+                                            try self.threadlocal_offsets.append(self.allocator, target_addr - base_addr);
+                                        }
+                                    },
+                                    2 => {
+                                        const inst = code[off..][0..4];
+                                        const offset = mem.readIntLittle(i32, inst);
+                                        log.debug("    | calculated addend 0x{x}", .{offset});
+                                        const result = if (sub) |s|
+                                            @intCast(i64, target_addr) - s + offset
+                                        else
+                                            @intCast(i64, target_addr) + offset;
+                                        mem.writeIntLittle(u32, inst, @truncate(u32, @bitCast(u64, result)));
+                                        sub = null;
+                                    },
+                                    else => |len| {
+                                        log.err("unexpected relocation length 0x{x}", .{len});
+                                        return error.UnexpectedRelocationLength;
+                                    },
+                                }
+                            },
+                            .ARM64_RELOC_POINTER_TO_GOT => return error.TODOArm64RelocPointerToGot,
+                            else => unreachable,
+                        }
+                    },
+                    else => unreachable,
+                }
+            }
+
+            log.debug("writing contents of '{s},{s}' section from '{s}' from 0x{x} to 0x{x}", .{
+                segname,
+                sectname,
+                object.name,
+                target_sect_off,
+                target_sect_off + code.len,
+            });
+
+            if (target_sect.flags == macho.S_ZEROFILL or
+                target_sect.flags == macho.S_THREAD_LOCAL_ZEROFILL or
+                target_sect.flags == macho.S_THREAD_LOCAL_VARIABLES)
+            {
+                log.debug("zeroing out '{s},{s}' from 0x{x} to 0x{x}", .{
+                    parseName(&target_sect.segname),
+                    parseName(&target_sect.sectname),
+                    target_sect_off,
+                    target_sect_off + code.len,
+                });
+                // Zero-out the space
+                var zeroes = try self.allocator.alloc(u8, code.len);
+                defer self.allocator.free(zeroes);
+                mem.set(u8, zeroes, 0);
+                try self.file.?.pwriteAll(zeroes, target_sect_off);
+            } else {
+                try self.file.?.pwriteAll(code, target_sect_off);
+            }
+        }
+    }
+}
+
+fn relocTargetAddr(self: *Zld, object_id: u16, rel: macho.relocation_info) !u64 {
+    const object = self.objects.items[object_id];
+    const seg = object.load_commands.items[object.segment_cmd_index.?].Segment;
+    const target_addr = blk: {
+        if (rel.r_extern == 1) {
+            const sym = object.symtab.items[rel.r_symbolnum];
+            if (isLocal(&sym) or isExport(&sym)) {
+                // Relocate using section offsets only.
+                const target_mapping = self.mappings.get(.{
+                    .object_id = object_id,
+                    .source_sect_id = sym.n_sect - 1,
+                }) orelse unreachable;
+                const source_sect = seg.sections.items[target_mapping.source_sect_id];
+                const target_seg = self.load_commands.items[target_mapping.target_seg_id].Segment;
+                const target_sect = target_seg.sections.items[target_mapping.target_sect_id];
+                const target_sect_addr = target_sect.addr + target_mapping.offset;
+                log.debug("    | symbol local to object", .{});
+                break :blk target_sect_addr + sym.n_value - source_sect.addr;
+            } else if (isImport(&sym)) {
+                // Relocate to either the artifact's local symbol, or an import from
+                // shared library.
+                const sym_name = object.getString(sym.n_strx);
+                if (self.locals.get(sym_name)) |locs| {
+                    var n_value: ?u64 = null;
+                    for (locs.items) |loc| {
+                        switch (loc.tt) {
+                            .Global => {
+                                n_value = loc.inner.n_value;
+                                break;
+                            },
+                            .WeakGlobal => {
+                                n_value = loc.inner.n_value;
+                            },
+                            .Local => {},
+                        }
+                    }
+                    if (n_value) |v| {
+                        break :blk v;
+                    }
+                    log.err("local symbol export '{s}' not found", .{sym_name});
+                    return error.LocalSymbolExportNotFound;
+                } else if (self.lazy_imports.get(sym_name)) |ext| {
+                    const segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment;
+                    const stubs = segment.sections.items[self.stubs_section_index.?];
+                    break :blk stubs.addr + ext.index * stubs.reserved2;
+                } else if (self.nonlazy_imports.get(sym_name)) |ext| {
+                    const segment = self.load_commands.items[self.data_const_segment_cmd_index.?].Segment;
+                    const got = segment.sections.items[self.got_section_index.?];
+                    break :blk got.addr + ext.index * @sizeOf(u64);
+                } else if (mem.eql(u8, sym_name, "__tlv_bootstrap")) {
+                    const segment = self.load_commands.items[self.data_segment_cmd_index.?].Segment;
+                    const tlv = segment.sections.items[self.tlv_section_index.?];
+                    break :blk tlv.addr + self.tlv_bootstrap.?.index * @sizeOf(u64);
+                } else {
+                    log.err("failed to resolve symbol '{s}' as a relocation target", .{sym_name});
+                    return error.FailedToResolveRelocationTarget;
+                }
+            } else {
+                log.err("unexpected symbol {}, {s}", .{ sym, object.getString(sym.n_strx) });
+                return error.UnexpectedSymbolWhenRelocating;
+            }
+        } else {
+            // TODO I think we need to reparse the relocation_info as scattered_relocation_info
+            // here to get the actual section plus offset into that section of the relocated
+            // symbol. Unless the fine-grained location is encoded within the cell in the code
+            // buffer?
+            const target_mapping = self.mappings.get(.{
+                .object_id = object_id,
+                .source_sect_id = @intCast(u16, rel.r_symbolnum - 1),
+            }) orelse unreachable;
+            const target_seg = self.load_commands.items[target_mapping.target_seg_id].Segment;
+            const target_sect = target_seg.sections.items[target_mapping.target_sect_id];
+            break :blk target_sect.addr + target_mapping.offset;
+        }
+    };
+    return target_addr;
+}
+
+fn populateMetadata(self: *Zld) !void {
+    if (self.pagezero_segment_cmd_index == null) {
+        self.pagezero_segment_cmd_index = @intCast(u16, self.load_commands.items.len);
+        try self.load_commands.append(self.allocator, .{
+            .Segment = SegmentCommand.empty(.{
+                .cmd = macho.LC_SEGMENT_64,
+                .cmdsize = @sizeOf(macho.segment_command_64),
+                .segname = makeStaticString("__PAGEZERO"),
+                .vmaddr = 0,
+                .vmsize = 0x100000000, // size always set to 4GB
+                .fileoff = 0,
+                .filesize = 0,
+                .maxprot = 0,
+                .initprot = 0,
+                .nsects = 0,
+                .flags = 0,
+            }),
+        });
+    }
+
+    if (self.text_segment_cmd_index == null) {
+        self.text_segment_cmd_index = @intCast(u16, self.load_commands.items.len);
+        try self.load_commands.append(self.allocator, .{
+            .Segment = SegmentCommand.empty(.{
+                .cmd = macho.LC_SEGMENT_64,
+                .cmdsize = @sizeOf(macho.segment_command_64),
+                .segname = makeStaticString("__TEXT"),
+                .vmaddr = 0x100000000, // always starts at 4GB
+                .vmsize = 0,
+                .fileoff = 0,
+                .filesize = 0,
+                .maxprot = macho.VM_PROT_READ | macho.VM_PROT_EXECUTE,
+                .initprot = macho.VM_PROT_READ | macho.VM_PROT_EXECUTE,
+                .nsects = 0,
+                .flags = 0,
+            }),
+        });
+    }
+
+    if (self.text_section_index == null) {
+        const text_seg = &self.load_commands.items[self.text_segment_cmd_index.?].Segment;
+        self.text_section_index = @intCast(u16, text_seg.sections.items.len);
+        const alignment: u2 = switch (self.arch.?) {
+            .x86_64 => 0,
+            .aarch64 => 2,
+            else => unreachable, // unhandled architecture type
+        };
+        try text_seg.addSection(self.allocator, .{
+            .sectname = makeStaticString("__text"),
+            .segname = makeStaticString("__TEXT"),
+            .addr = 0,
+            .size = 0,
+            .offset = 0,
+            .@"align" = alignment,
+            .reloff = 0,
+            .nreloc = 0,
+            .flags = macho.S_REGULAR | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS,
+            .reserved1 = 0,
+            .reserved2 = 0,
+            .reserved3 = 0,
+        });
+    }
+
+    if (self.stubs_section_index == null) {
+        const text_seg = &self.load_commands.items[self.text_segment_cmd_index.?].Segment;
+        self.stubs_section_index = @intCast(u16, text_seg.sections.items.len);
+        const alignment: u2 = switch (self.arch.?) {
+            .x86_64 => 0,
+            .aarch64 => 2,
+            else => unreachable, // unhandled architecture type
+        };
+        const stub_size: u4 = switch (self.arch.?) {
+            .x86_64 => 6,
+            .aarch64 => 3 * @sizeOf(u32),
+            else => unreachable, // unhandled architecture type
+        };
+        try text_seg.addSection(self.allocator, .{
+            .sectname = makeStaticString("__stubs"),
+            .segname = makeStaticString("__TEXT"),
+            .addr = 0,
+            .size = 0,
+            .offset = 0,
+            .@"align" = alignment,
+            .reloff = 0,
+            .nreloc = 0,
+            .flags = macho.S_SYMBOL_STUBS | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS,
+            .reserved1 = 0,
+            .reserved2 = stub_size,
+            .reserved3 = 0,
+        });
+    }
+
+    if (self.stub_helper_section_index == null) {
+        const text_seg = &self.load_commands.items[self.text_segment_cmd_index.?].Segment;
+        self.stub_helper_section_index = @intCast(u16, text_seg.sections.items.len);
+        const alignment: u2 = switch (self.arch.?) {
+            .x86_64 => 0,
+            .aarch64 => 2,
+            else => unreachable, // unhandled architecture type
+        };
+        const stub_helper_size: u6 = switch (self.arch.?) {
+            .x86_64 => 15,
+            .aarch64 => 6 * @sizeOf(u32),
+            else => unreachable,
+        };
+        try text_seg.addSection(self.allocator, .{
+            .sectname = makeStaticString("__stub_helper"),
+            .segname = makeStaticString("__TEXT"),
+            .addr = 0,
+            .size = stub_helper_size,
+            .offset = 0,
+            .@"align" = alignment,
+            .reloff = 0,
+            .nreloc = 0,
+            .flags = macho.S_REGULAR | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS,
+            .reserved1 = 0,
+            .reserved2 = 0,
+            .reserved3 = 0,
+        });
+    }
+
+    if (self.data_const_segment_cmd_index == null) {
+        self.data_const_segment_cmd_index = @intCast(u16, self.load_commands.items.len);
+        try self.load_commands.append(self.allocator, .{
+            .Segment = SegmentCommand.empty(.{
+                .cmd = macho.LC_SEGMENT_64,
+                .cmdsize = @sizeOf(macho.segment_command_64),
+                .segname = makeStaticString("__DATA_CONST"),
+                .vmaddr = 0,
+                .vmsize = 0,
+                .fileoff = 0,
+                .filesize = 0,
+                .maxprot = macho.VM_PROT_READ | macho.VM_PROT_WRITE,
+                .initprot = macho.VM_PROT_READ | macho.VM_PROT_WRITE,
+                .nsects = 0,
+                .flags = 0,
+            }),
+        });
+    }
+
+    if (self.got_section_index == null) {
+        const data_const_seg = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment;
+        self.got_section_index = @intCast(u16, data_const_seg.sections.items.len);
+        try data_const_seg.addSection(self.allocator, .{
+            .sectname = makeStaticString("__got"),
+            .segname = makeStaticString("__DATA_CONST"),
+            .addr = 0,
+            .size = 0,
+            .offset = 0,
+            .@"align" = 3, // 2^3 = @sizeOf(u64)
+            .reloff = 0,
+            .nreloc = 0,
+            .flags = macho.S_NON_LAZY_SYMBOL_POINTERS,
+            .reserved1 = 0,
+            .reserved2 = 0,
+            .reserved3 = 0,
+        });
+    }
+
+    if (self.data_segment_cmd_index == null) {
+        self.data_segment_cmd_index = @intCast(u16, self.load_commands.items.len);
+        try self.load_commands.append(self.allocator, .{
+            .Segment = SegmentCommand.empty(.{
+                .cmd = macho.LC_SEGMENT_64,
+                .cmdsize = @sizeOf(macho.segment_command_64),
+                .segname = makeStaticString("__DATA"),
+                .vmaddr = 0,
+                .vmsize = 0,
+                .fileoff = 0,
+                .filesize = 0,
+                .maxprot = macho.VM_PROT_READ | macho.VM_PROT_WRITE,
+                .initprot = macho.VM_PROT_READ | macho.VM_PROT_WRITE,
+                .nsects = 0,
+                .flags = 0,
+            }),
+        });
+    }
+
+    if (self.la_symbol_ptr_section_index == null) {
+        const data_seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment;
+        self.la_symbol_ptr_section_index = @intCast(u16, data_seg.sections.items.len);
+        try data_seg.addSection(self.allocator, .{
+            .sectname = makeStaticString("__la_symbol_ptr"),
+            .segname = makeStaticString("__DATA"),
+            .addr = 0,
+            .size = 0,
+            .offset = 0,
+            .@"align" = 3, // 2^3 = @sizeOf(u64)
+            .reloff = 0,
+            .nreloc = 0,
+            .flags = macho.S_LAZY_SYMBOL_POINTERS,
+            .reserved1 = 0,
+            .reserved2 = 0,
+            .reserved3 = 0,
+        });
+    }
+
+    if (self.data_section_index == null) {
+        const data_seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment;
+        self.data_section_index = @intCast(u16, data_seg.sections.items.len);
+        try data_seg.addSection(self.allocator, .{
+            .sectname = makeStaticString("__data"),
+            .segname = makeStaticString("__DATA"),
+            .addr = 0,
+            .size = 0,
+            .offset = 0,
+            .@"align" = 3, // 2^3 = @sizeOf(u64)
+            .reloff = 0,
+            .nreloc = 0,
+            .flags = macho.S_REGULAR,
+            .reserved1 = 0,
+            .reserved2 = 0,
+            .reserved3 = 0,
+        });
+    }
+
+    if (self.linkedit_segment_cmd_index == null) {
+        self.linkedit_segment_cmd_index = @intCast(u16, self.load_commands.items.len);
+        try self.load_commands.append(self.allocator, .{
+            .Segment = SegmentCommand.empty(.{
+                .cmd = macho.LC_SEGMENT_64,
+                .cmdsize = @sizeOf(macho.segment_command_64),
+                .segname = makeStaticString("__LINKEDIT"),
+                .vmaddr = 0,
+                .vmsize = 0,
+                .fileoff = 0,
+                .filesize = 0,
+                .maxprot = macho.VM_PROT_READ,
+                .initprot = macho.VM_PROT_READ,
+                .nsects = 0,
+                .flags = 0,
+            }),
+        });
+    }
+
+    if (self.dyld_info_cmd_index == null) {
+        self.dyld_info_cmd_index = @intCast(u16, self.load_commands.items.len);
+        try self.load_commands.append(self.allocator, .{
+            .DyldInfoOnly = .{
+                .cmd = macho.LC_DYLD_INFO_ONLY,
+                .cmdsize = @sizeOf(macho.dyld_info_command),
+                .rebase_off = 0,
+                .rebase_size = 0,
+                .bind_off = 0,
+                .bind_size = 0,
+                .weak_bind_off = 0,
+                .weak_bind_size = 0,
+                .lazy_bind_off = 0,
+                .lazy_bind_size = 0,
+                .export_off = 0,
+                .export_size = 0,
+            },
+        });
+    }
+
+    if (self.symtab_cmd_index == null) {
+        self.symtab_cmd_index = @intCast(u16, self.load_commands.items.len);
+        try self.load_commands.append(self.allocator, .{
+            .Symtab = .{
+                .cmd = macho.LC_SYMTAB,
+                .cmdsize = @sizeOf(macho.symtab_command),
+                .symoff = 0,
+                .nsyms = 0,
+                .stroff = 0,
+                .strsize = 0,
+            },
+        });
+        try self.strtab.append(self.allocator, 0);
+    }
+
+    if (self.dysymtab_cmd_index == null) {
+        self.dysymtab_cmd_index = @intCast(u16, self.load_commands.items.len);
+        try self.load_commands.append(self.allocator, .{
+            .Dysymtab = .{
+                .cmd = macho.LC_DYSYMTAB,
+                .cmdsize = @sizeOf(macho.dysymtab_command),
+                .ilocalsym = 0,
+                .nlocalsym = 0,
+                .iextdefsym = 0,
+                .nextdefsym = 0,
+                .iundefsym = 0,
+                .nundefsym = 0,
+                .tocoff = 0,
+                .ntoc = 0,
+                .modtaboff = 0,
+                .nmodtab = 0,
+                .extrefsymoff = 0,
+                .nextrefsyms = 0,
+                .indirectsymoff = 0,
+                .nindirectsyms = 0,
+                .extreloff = 0,
+                .nextrel = 0,
+                .locreloff = 0,
+                .nlocrel = 0,
+            },
+        });
+    }
+
+    if (self.dylinker_cmd_index == null) {
+        self.dylinker_cmd_index = @intCast(u16, self.load_commands.items.len);
+        const cmdsize = @intCast(u32, mem.alignForwardGeneric(
+            u64,
+            @sizeOf(macho.dylinker_command) + mem.lenZ(DEFAULT_DYLD_PATH),
+            @sizeOf(u64),
+        ));
+        var dylinker_cmd = emptyGenericCommandWithData(macho.dylinker_command{
+            .cmd = macho.LC_LOAD_DYLINKER,
+            .cmdsize = cmdsize,
+            .name = @sizeOf(macho.dylinker_command),
+        });
+        dylinker_cmd.data = try self.allocator.alloc(u8, cmdsize - dylinker_cmd.inner.name);
+        mem.set(u8, dylinker_cmd.data, 0);
+        mem.copy(u8, dylinker_cmd.data, mem.spanZ(DEFAULT_DYLD_PATH));
+        try self.load_commands.append(self.allocator, .{ .Dylinker = dylinker_cmd });
+    }
+
+    if (self.libsystem_cmd_index == null) {
+        self.libsystem_cmd_index = @intCast(u16, self.load_commands.items.len);
+        const cmdsize = @intCast(u32, mem.alignForwardGeneric(
+            u64,
+            @sizeOf(macho.dylib_command) + mem.lenZ(LIB_SYSTEM_PATH),
+            @sizeOf(u64),
+        ));
+        // TODO Find a way to work out runtime version from the OS version triple stored in std.Target.
+        // In the meantime, we're gonna hardcode to the minimum compatibility version of 0.0.0.
+        const min_version = 0x0;
+        var dylib_cmd = emptyGenericCommandWithData(macho.dylib_command{
+            .cmd = macho.LC_LOAD_DYLIB,
+            .cmdsize = cmdsize,
+            .dylib = .{
+                .name = @sizeOf(macho.dylib_command),
+                .timestamp = 2, // not sure why not simply 0; this is reverse engineered from Mach-O files
+                .current_version = min_version,
+                .compatibility_version = min_version,
+            },
+        });
+        dylib_cmd.data = try self.allocator.alloc(u8, cmdsize - dylib_cmd.inner.dylib.name);
+        mem.set(u8, dylib_cmd.data, 0);
+        mem.copy(u8, dylib_cmd.data, mem.spanZ(LIB_SYSTEM_PATH));
+        try self.load_commands.append(self.allocator, .{ .Dylib = dylib_cmd });
+    }
+
+    if (self.main_cmd_index == null) {
+        self.main_cmd_index = @intCast(u16, self.load_commands.items.len);
+        try self.load_commands.append(self.allocator, .{
+            .Main = .{
+                .cmd = macho.LC_MAIN,
+                .cmdsize = @sizeOf(macho.entry_point_command),
+                .entryoff = 0x0,
+                .stacksize = 0,
+            },
+        });
+    }
+
+    if (self.source_version_cmd_index == null) {
+        self.source_version_cmd_index = @intCast(u16, self.load_commands.items.len);
+        try self.load_commands.append(self.allocator, .{
+            .SourceVersion = .{
+                .cmd = macho.LC_SOURCE_VERSION,
+                .cmdsize = @sizeOf(macho.source_version_command),
+                .version = 0x0,
+            },
+        });
+    }
+
+    if (self.uuid_cmd_index == null) {
+        self.uuid_cmd_index = @intCast(u16, self.load_commands.items.len);
+        var uuid_cmd: macho.uuid_command = .{
+            .cmd = macho.LC_UUID,
+            .cmdsize = @sizeOf(macho.uuid_command),
+            .uuid = undefined,
+        };
+        std.crypto.random.bytes(&uuid_cmd.uuid);
+        try self.load_commands.append(self.allocator, .{ .Uuid = uuid_cmd });
+    }
+
+    if (self.code_signature_cmd_index == null and self.arch.? == .aarch64) {
+        self.code_signature_cmd_index = @intCast(u16, self.load_commands.items.len);
+        try self.load_commands.append(self.allocator, .{
+            .LinkeditData = .{
+                .cmd = macho.LC_CODE_SIGNATURE,
+                .cmdsize = @sizeOf(macho.linkedit_data_command),
+                .dataoff = 0,
+                .datasize = 0,
+            },
+        });
+    }
+
+    if (self.data_in_code_cmd_index == null and self.arch.? == .x86_64) {
+        self.data_in_code_cmd_index = @intCast(u16, self.load_commands.items.len);
+        try self.load_commands.append(self.allocator, .{
+            .LinkeditData = .{
+                .cmd = macho.LC_DATA_IN_CODE,
+                .cmdsize = @sizeOf(macho.linkedit_data_command),
+                .dataoff = 0,
+                .datasize = 0,
+            },
+        });
+    }
+}
+
+fn flush(self: *Zld) !void {
+    if (self.bss_section_index) |index| {
+        const seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment;
+        const sect = &seg.sections.items[index];
+        sect.offset = 0;
+    }
+
+    if (self.tlv_bss_section_index) |index| {
+        const seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment;
+        const sect = &seg.sections.items[index];
+        sect.offset = 0;
+    }
+
+    if (self.tlv_section_index) |index| {
+        const seg = self.load_commands.items[self.data_segment_cmd_index.?].Segment;
+        const sect = &seg.sections.items[index];
+
+        var buffer = try self.allocator.alloc(u8, sect.size);
+        defer self.allocator.free(buffer);
+        _ = try self.file.?.preadAll(buffer, sect.offset);
+
+        var stream = std.io.fixedBufferStream(buffer);
+        var writer = stream.writer();
+
+        const seek_amt = 2 * @sizeOf(u64);
+        while (self.threadlocal_offsets.popOrNull()) |offset| {
+            try writer.context.seekBy(seek_amt);
+            try writer.writeIntLittle(u64, offset);
+        }
+
+        try self.file.?.pwriteAll(buffer, sect.offset);
+    }
+
+    try self.setEntryPoint();
+    try self.writeRebaseInfoTable();
+    try self.writeBindInfoTable();
+    try self.writeLazyBindInfoTable();
+    try self.writeExportInfo();
+    if (self.arch.? == .x86_64) {
+        try self.writeDataInCode();
+    }
+
+    {
+        const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment;
+        const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab;
+        symtab.symoff = @intCast(u32, seg.inner.fileoff + seg.inner.filesize);
+    }
+
+    try self.writeDebugInfo();
+    try self.writeSymbolTable();
+    try self.writeDynamicSymbolTable();
+    try self.writeStringTable();
+
+    {
+        // Seal __LINKEDIT size
+        const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment;
+        seg.inner.vmsize = mem.alignForwardGeneric(u64, seg.inner.filesize, self.page_size.?);
+    }
+
+    if (self.arch.? == .aarch64) {
+        try self.writeCodeSignaturePadding();
+    }
+
+    try self.writeLoadCommands();
+    try self.writeHeader();
+
+    if (self.arch.? == .aarch64) {
+        try self.writeCodeSignature();
+    }
+
+    if (comptime std.Target.current.isDarwin() and std.Target.current.cpu.arch == .aarch64) {
+        try fs.cwd().copyFile(self.out_path.?, fs.cwd(), self.out_path.?, .{});
+    }
+}
+
+fn setEntryPoint(self: *Zld) !void {
+    // TODO we should respect the -entry flag passed in by the user to set a custom
+    // entrypoint. For now, assume default of `_main`.
+    const seg = self.load_commands.items[self.text_segment_cmd_index.?].Segment;
+    const text = seg.sections.items[self.text_section_index.?];
+    const entry_syms = self.locals.get("_main") orelse return error.MissingMainEntrypoint;
+
+    var entry_sym: ?macho.nlist_64 = null;
+    for (entry_syms.items) |es| {
+        switch (es.tt) {
+            .Global => {
+                entry_sym = es.inner;
+                break;
+            },
+            .WeakGlobal => {
+                entry_sym = es.inner;
+            },
+            .Local => {},
+        }
+    }
+    if (entry_sym == null) {
+        log.err("no (weak) global definition of _main found", .{});
+        return error.MissingMainEntrypoint;
+    }
+
+    const name = try self.allocator.dupe(u8, "_main");
+    try self.exports.putNoClobber(self.allocator, name, .{
+        .n_strx = entry_sym.?.n_strx,
+        .n_value = entry_sym.?.n_value,
+        .n_type = macho.N_SECT | macho.N_EXT,
+        .n_desc = entry_sym.?.n_desc,
+        .n_sect = entry_sym.?.n_sect,
+    });
+
+    const ec = &self.load_commands.items[self.main_cmd_index.?].Main;
+    ec.entryoff = @intCast(u32, entry_sym.?.n_value - seg.inner.vmaddr);
+}
+
+fn writeRebaseInfoTable(self: *Zld) !void {
+    var pointers = std.ArrayList(Pointer).init(self.allocator);
+    defer pointers.deinit();
+
+    try pointers.ensureCapacity(pointers.items.len + self.local_rebases.items.len);
+    pointers.appendSliceAssumeCapacity(self.local_rebases.items);
+
+    if (self.got_section_index) |idx| {
+        // TODO this should be cleaned up!
+        try pointers.ensureCapacity(pointers.items.len + self.nonlazy_pointers.items().len);
+        const seg = self.load_commands.items[self.data_const_segment_cmd_index.?].Segment;
+        const sect = seg.sections.items[idx];
+        const base_offset = sect.addr - seg.inner.vmaddr;
+        const segment_id = @intCast(u16, self.data_const_segment_cmd_index.?);
+        const index_offset = @intCast(u32, self.nonlazy_imports.items().len);
+        for (self.nonlazy_pointers.items()) |entry| {
+            const index = index_offset + entry.value.index;
+            pointers.appendAssumeCapacity(.{
+                .offset = base_offset + index * @sizeOf(u64),
+                .segment_id = segment_id,
+            });
+        }
+    }
+
+    if (self.la_symbol_ptr_section_index) |idx| {
+        try pointers.ensureCapacity(pointers.items.len + self.lazy_imports.items().len);
+        const seg = self.load_commands.items[self.data_segment_cmd_index.?].Segment;
+        const sect = seg.sections.items[idx];
+        const base_offset = sect.addr - seg.inner.vmaddr;
+        const segment_id = @intCast(u16, self.data_segment_cmd_index.?);
+        for (self.lazy_imports.items()) |entry| {
+            pointers.appendAssumeCapacity(.{
+                .offset = base_offset + entry.value.index * @sizeOf(u64),
+                .segment_id = segment_id,
+            });
+        }
+    }
+
+    std.sort.sort(Pointer, pointers.items, {}, pointerCmp);
+
+    const size = try rebaseInfoSize(pointers.items);
+    var buffer = try self.allocator.alloc(u8, @intCast(usize, size));
+    defer self.allocator.free(buffer);
+
+    var stream = std.io.fixedBufferStream(buffer);
+    try writeRebaseInfo(pointers.items, stream.writer());
+
+    const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment;
+    const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly;
+    dyld_info.rebase_off = @intCast(u32, seg.inner.fileoff);
+    dyld_info.rebase_size = @intCast(u32, mem.alignForwardGeneric(u64, buffer.len, @sizeOf(u64)));
+    seg.inner.filesize += dyld_info.rebase_size;
+
+    log.debug("writing rebase info from 0x{x} to 0x{x}", .{ dyld_info.rebase_off, dyld_info.rebase_off + dyld_info.rebase_size });
+
+    try self.file.?.pwriteAll(buffer, dyld_info.rebase_off);
+}
+
+fn writeBindInfoTable(self: *Zld) !void {
+    var pointers = std.ArrayList(Pointer).init(self.allocator);
+    defer pointers.deinit();
+
+    if (self.got_section_index) |idx| {
+        try pointers.ensureCapacity(pointers.items.len + self.nonlazy_imports.items().len);
+        const seg = self.load_commands.items[self.data_const_segment_cmd_index.?].Segment;
+        const sect = seg.sections.items[idx];
+        const base_offset = sect.addr - seg.inner.vmaddr;
+        const segment_id = @intCast(u16, self.data_const_segment_cmd_index.?);
+        for (self.nonlazy_imports.items()) |entry| {
+            pointers.appendAssumeCapacity(.{
+                .offset = base_offset + entry.value.index * @sizeOf(u64),
+                .segment_id = segment_id,
+                .dylib_ordinal = entry.value.dylib_ordinal,
+                .name = entry.key,
+            });
+        }
+    }
+
+    if (self.tlv_section_index) |idx| {
+        const seg = self.load_commands.items[self.data_segment_cmd_index.?].Segment;
+        const sect = seg.sections.items[idx];
+        const base_offset = sect.addr - seg.inner.vmaddr;
+        const segment_id = @intCast(u16, self.data_segment_cmd_index.?);
+        try pointers.append(.{
+            .offset = base_offset + self.tlv_bootstrap.?.index * @sizeOf(u64),
+            .segment_id = segment_id,
+            .dylib_ordinal = self.tlv_bootstrap.?.dylib_ordinal,
+            .name = "__tlv_bootstrap",
+        });
+    }
+
+    const size = try bindInfoSize(pointers.items);
+    var buffer = try self.allocator.alloc(u8, @intCast(usize, size));
+    defer self.allocator.free(buffer);
+
+    var stream = std.io.fixedBufferStream(buffer);
+    try writeBindInfo(pointers.items, stream.writer());
+
+    const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment;
+    const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly;
+    dyld_info.bind_off = @intCast(u32, seg.inner.fileoff + seg.inner.filesize);
+    dyld_info.bind_size = @intCast(u32, mem.alignForwardGeneric(u64, buffer.len, @alignOf(u64)));
+    seg.inner.filesize += dyld_info.bind_size;
+
+    log.debug("writing binding info from 0x{x} to 0x{x}", .{ dyld_info.bind_off, dyld_info.bind_off + dyld_info.bind_size });
+
+    try self.file.?.pwriteAll(buffer, dyld_info.bind_off);
+}
+
+fn writeLazyBindInfoTable(self: *Zld) !void {
+    var pointers = std.ArrayList(Pointer).init(self.allocator);
+    defer pointers.deinit();
+    try pointers.ensureCapacity(self.lazy_imports.items().len);
+
+    if (self.la_symbol_ptr_section_index) |idx| {
+        const seg = self.load_commands.items[self.data_segment_cmd_index.?].Segment;
+        const sect = seg.sections.items[idx];
+        const base_offset = sect.addr - seg.inner.vmaddr;
+        const segment_id = @intCast(u16, self.data_segment_cmd_index.?);
+        for (self.lazy_imports.items()) |entry| {
+            pointers.appendAssumeCapacity(.{
+                .offset = base_offset + entry.value.index * @sizeOf(u64),
+                .segment_id = segment_id,
+                .dylib_ordinal = entry.value.dylib_ordinal,
+                .name = entry.key,
+            });
+        }
+    }
+
+    const size = try lazyBindInfoSize(pointers.items);
+    var buffer = try self.allocator.alloc(u8, @intCast(usize, size));
+    defer self.allocator.free(buffer);
+
+    var stream = std.io.fixedBufferStream(buffer);
+    try writeLazyBindInfo(pointers.items, stream.writer());
+
+    const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment;
+    const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly;
+    dyld_info.lazy_bind_off = @intCast(u32, seg.inner.fileoff + seg.inner.filesize);
+    dyld_info.lazy_bind_size = @intCast(u32, mem.alignForwardGeneric(u64, buffer.len, @alignOf(u64)));
+    seg.inner.filesize += dyld_info.lazy_bind_size;
+
+    log.debug("writing lazy binding info from 0x{x} to 0x{x}", .{ dyld_info.lazy_bind_off, dyld_info.lazy_bind_off + dyld_info.lazy_bind_size });
+
+    try self.file.?.pwriteAll(buffer, dyld_info.lazy_bind_off);
+    try self.populateLazyBindOffsetsInStubHelper(buffer);
+}
+
+fn populateLazyBindOffsetsInStubHelper(self: *Zld, buffer: []const u8) !void {
+    var stream = std.io.fixedBufferStream(buffer);
+    var reader = stream.reader();
+    var offsets = std.ArrayList(u32).init(self.allocator);
+    try offsets.append(0);
+    defer offsets.deinit();
+    var valid_block = false;
+
+    while (true) {
+        const inst = reader.readByte() catch |err| switch (err) {
+            error.EndOfStream => break,
+            else => return err,
+        };
+        const imm: u8 = inst & macho.BIND_IMMEDIATE_MASK;
+        const opcode: u8 = inst & macho.BIND_OPCODE_MASK;
+
+        switch (opcode) {
+            macho.BIND_OPCODE_DO_BIND => {
+                valid_block = true;
+            },
+            macho.BIND_OPCODE_DONE => {
+                if (valid_block) {
+                    const offset = try stream.getPos();
+                    try offsets.append(@intCast(u32, offset));
+                }
+                valid_block = false;
+            },
+            macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM => {
+                var next = try reader.readByte();
+                while (next != @as(u8, 0)) {
+                    next = try reader.readByte();
+                }
+            },
+            macho.BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB => {
+                _ = try leb.readULEB128(u64, reader);
+            },
+            macho.BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB => {
+                _ = try leb.readULEB128(u64, reader);
+            },
+            macho.BIND_OPCODE_SET_ADDEND_SLEB => {
+                _ = try leb.readILEB128(i64, reader);
+            },
+            else => {},
+        }
+    }
+    assert(self.lazy_imports.items().len <= offsets.items.len);
+
+    const stub_size: u4 = switch (self.arch.?) {
+        .x86_64 => 10,
+        .aarch64 => 3 * @sizeOf(u32),
+        else => unreachable,
+    };
+    const off: u4 = switch (self.arch.?) {
+        .x86_64 => 1,
+        .aarch64 => 2 * @sizeOf(u32),
+        else => unreachable,
+    };
+    var buf: [@sizeOf(u32)]u8 = undefined;
+    for (self.lazy_imports.items()) |entry| {
+        const symbol = entry.value;
+        const placeholder_off = self.stub_helper_stubs_start_off.? + symbol.index * stub_size + off;
+        mem.writeIntLittle(u32, &buf, offsets.items[symbol.index]);
+        try self.file.?.pwriteAll(&buf, placeholder_off);
+    }
+}
+
+fn writeExportInfo(self: *Zld) !void {
+    var trie = Trie.init(self.allocator);
+    defer trie.deinit();
+
+    const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment;
+    for (self.exports.items()) |entry| {
+        const name = entry.key;
+        const symbol = entry.value;
+        // TODO figure out if we should put all exports into the export trie
+        assert(symbol.n_value >= text_segment.inner.vmaddr);
+        try trie.put(.{
+            .name = name,
+            .vmaddr_offset = symbol.n_value - text_segment.inner.vmaddr,
+            .export_flags = macho.EXPORT_SYMBOL_FLAGS_KIND_REGULAR,
+        });
+    }
+
+    try trie.finalize();
+    var buffer = try self.allocator.alloc(u8, @intCast(usize, trie.size));
+    defer self.allocator.free(buffer);
+    var stream = std.io.fixedBufferStream(buffer);
+    const nwritten = try trie.write(stream.writer());
+    assert(nwritten == trie.size);
+
+    const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment;
+    const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly;
+    dyld_info.export_off = @intCast(u32, seg.inner.fileoff + seg.inner.filesize);
+    dyld_info.export_size = @intCast(u32, mem.alignForwardGeneric(u64, buffer.len, @alignOf(u64)));
+    seg.inner.filesize += dyld_info.export_size;
+
+    log.debug("writing export info from 0x{x} to 0x{x}", .{ dyld_info.export_off, dyld_info.export_off + dyld_info.export_size });
+
+    try self.file.?.pwriteAll(buffer, dyld_info.export_off);
+}
+
+fn writeDebugInfo(self: *Zld) !void {
+    var stabs = std.ArrayList(macho.nlist_64).init(self.allocator);
+    defer stabs.deinit();
+
+    for (self.objects.items) |object, object_id| {
+        var debug_info = blk: {
+            var di = try DebugInfo.parseFromObject(self.allocator, object);
+            break :blk di orelse continue;
+        };
+        defer debug_info.deinit(self.allocator);
+
+        const compile_unit = try debug_info.inner.findCompileUnit(0x0); // We assume there is only one CU.
+        const name = try compile_unit.die.getAttrString(&debug_info.inner, dwarf.AT_name);
+        const comp_dir = try compile_unit.die.getAttrString(&debug_info.inner, dwarf.AT_comp_dir);
+
+        {
+            const tu_path = try std.fs.path.join(self.allocator, &[_][]const u8{ comp_dir, name });
+            defer self.allocator.free(tu_path);
+            const dirname = std.fs.path.dirname(tu_path) orelse "./";
+            // Current dir
+            try stabs.append(.{
+                .n_strx = try self.makeString(tu_path[0 .. dirname.len + 1]),
+                .n_type = macho.N_SO,
+                .n_sect = 0,
+                .n_desc = 0,
+                .n_value = 0,
+            });
+            // Artifact name
+            try stabs.append(.{
+                .n_strx = try self.makeString(tu_path[dirname.len + 1 ..]),
+                .n_type = macho.N_SO,
+                .n_sect = 0,
+                .n_desc = 0,
+                .n_value = 0,
+            });
+            // Path to object file with debug info
+            var buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined;
+            const full_path = blk: {
+                if (object.ar_name) |prefix| {
+                    const path = try std.os.realpath(prefix, &buffer);
+                    break :blk try std.fmt.allocPrint(self.allocator, "{s}({s})", .{ path, object.name });
+                } else {
+                    const path = try std.os.realpath(object.name, &buffer);
+                    break :blk try mem.dupe(self.allocator, u8, path);
+                }
+            };
+            defer self.allocator.free(full_path);
+            const stat = try object.file.stat();
+            const mtime = @intCast(u64, @divFloor(stat.mtime, 1_000_000_000));
+            try stabs.append(.{
+                .n_strx = try self.makeString(full_path),
+                .n_type = macho.N_OSO,
+                .n_sect = 0,
+                .n_desc = 1,
+                .n_value = mtime,
+            });
+        }
+        log.debug("analyzing debug info in '{s}'", .{object.name});
+
+        for (object.symtab.items) |source_sym| {
+            const symname = object.getString(source_sym.n_strx);
+            const source_addr = source_sym.n_value;
+            const target_syms = self.locals.get(symname) orelse continue;
+            const target_sym: Symbol = blk: {
+                for (target_syms.items) |ts| {
+                    if (ts.object_id == @intCast(u16, object_id)) break :blk ts;
+                } else continue;
+            };
+
+            const maybe_size = blk: for (debug_info.inner.func_list.items) |func| {
+                if (func.pc_range) |range| {
+                    if (source_addr >= range.start and source_addr < range.end) {
+                        break :blk range.end - range.start;
+                    }
+                }
+            } else null;
+
+            if (maybe_size) |size| {
+                try stabs.append(.{
+                    .n_strx = 0,
+                    .n_type = macho.N_BNSYM,
+                    .n_sect = target_sym.inner.n_sect,
+                    .n_desc = 0,
+                    .n_value = target_sym.inner.n_value,
+                });
+                try stabs.append(.{
+                    .n_strx = target_sym.inner.n_strx,
+                    .n_type = macho.N_FUN,
+                    .n_sect = target_sym.inner.n_sect,
+                    .n_desc = 0,
+                    .n_value = target_sym.inner.n_value,
+                });
+                try stabs.append(.{
+                    .n_strx = 0,
+                    .n_type = macho.N_FUN,
+                    .n_sect = 0,
+                    .n_desc = 0,
+                    .n_value = size,
+                });
+                try stabs.append(.{
+                    .n_strx = 0,
+                    .n_type = macho.N_ENSYM,
+                    .n_sect = target_sym.inner.n_sect,
+                    .n_desc = 0,
+                    .n_value = size,
+                });
+            } else {
+                // TODO need a way to differentiate symbols: global, static, local, etc.
+                try stabs.append(.{
+                    .n_strx = target_sym.inner.n_strx,
+                    .n_type = macho.N_STSYM,
+                    .n_sect = target_sym.inner.n_sect,
+                    .n_desc = 0,
+                    .n_value = target_sym.inner.n_value,
+                });
+            }
+        }
+
+        // Close the source file!
+        try stabs.append(.{
+            .n_strx = 0,
+            .n_type = macho.N_SO,
+            .n_sect = 0,
+            .n_desc = 0,
+            .n_value = 0,
+        });
+    }
+
+    if (stabs.items.len == 0) return;
+
+    // Write stabs into the symbol table
+    const linkedit = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment;
+    const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab;
+
+    symtab.nsyms = @intCast(u32, stabs.items.len);
+
+    const stabs_off = symtab.symoff;
+    const stabs_size = symtab.nsyms * @sizeOf(macho.nlist_64);
+    log.debug("writing symbol stabs from 0x{x} to 0x{x}", .{ stabs_off, stabs_size + stabs_off });
+    try self.file.?.pwriteAll(mem.sliceAsBytes(stabs.items), stabs_off);
+
+    linkedit.inner.filesize += stabs_size;
+
+    // Update dynamic symbol table.
+    const dysymtab = &self.load_commands.items[self.dysymtab_cmd_index.?].Dysymtab;
+    dysymtab.nlocalsym = symtab.nsyms;
+}
+
+fn writeSymbolTable(self: *Zld) !void {
+    const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment;
+    const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab;
+
+    var locals = std.ArrayList(macho.nlist_64).init(self.allocator);
+    defer locals.deinit();
+
+    for (self.locals.items()) |entries| {
+        log.debug("'{s}': {} entries", .{ entries.key, entries.value.items.len });
+        // var symbol: ?macho.nlist_64 = null;
+        for (entries.value.items) |entry| {
+            log.debug("    | {}", .{entry.inner});
+            log.debug("    | {}", .{entry.tt});
+            log.debug("    | {s}", .{self.objects.items[entry.object_id].name});
+            try locals.append(entry.inner);
+        }
+    }
+    const nlocals = locals.items.len;
+
+    const nexports = self.exports.items().len;
+    var exports = std.ArrayList(macho.nlist_64).init(self.allocator);
+    defer exports.deinit();
+
+    try exports.ensureCapacity(nexports);
+    for (self.exports.items()) |entry| {
+        exports.appendAssumeCapacity(entry.value);
+    }
+
+    const has_tlv: bool = self.tlv_bootstrap != null;
+
+    var nundefs = self.lazy_imports.items().len + self.nonlazy_imports.items().len;
+    if (has_tlv) nundefs += 1;
+
+    var undefs = std.ArrayList(macho.nlist_64).init(self.allocator);
+    defer undefs.deinit();
+
+    try undefs.ensureCapacity(nundefs);
+    for (self.lazy_imports.items()) |entry| {
+        undefs.appendAssumeCapacity(entry.value.symbol);
+    }
+    for (self.nonlazy_imports.items()) |entry| {
+        undefs.appendAssumeCapacity(entry.value.symbol);
+    }
+    if (has_tlv) {
+        undefs.appendAssumeCapacity(self.tlv_bootstrap.?.symbol);
+    }
+
+    const locals_off = symtab.symoff + symtab.nsyms * @sizeOf(macho.nlist_64);
+    const locals_size = nlocals * @sizeOf(macho.nlist_64);
+    log.debug("writing local symbols from 0x{x} to 0x{x}", .{ locals_off, locals_size + locals_off });
+    try self.file.?.pwriteAll(mem.sliceAsBytes(locals.items), locals_off);
+
+    const exports_off = locals_off + locals_size;
+    const exports_size = nexports * @sizeOf(macho.nlist_64);
+    log.debug("writing exported symbols from 0x{x} to 0x{x}", .{ exports_off, exports_size + exports_off });
+    try self.file.?.pwriteAll(mem.sliceAsBytes(exports.items), exports_off);
+
+    const undefs_off = exports_off + exports_size;
+    const undefs_size = nundefs * @sizeOf(macho.nlist_64);
+    log.debug("writing undefined symbols from 0x{x} to 0x{x}", .{ undefs_off, undefs_size + undefs_off });
+    try self.file.?.pwriteAll(mem.sliceAsBytes(undefs.items), undefs_off);
+
+    symtab.nsyms += @intCast(u32, nlocals + nexports + nundefs);
+    seg.inner.filesize += locals_size + exports_size + undefs_size;
+
+    // Update dynamic symbol table.
+    const dysymtab = &self.load_commands.items[self.dysymtab_cmd_index.?].Dysymtab;
+    dysymtab.nlocalsym += @intCast(u32, nlocals);
+    dysymtab.iextdefsym = dysymtab.nlocalsym;
+    dysymtab.nextdefsym = @intCast(u32, nexports);
+    dysymtab.iundefsym = dysymtab.nlocalsym + dysymtab.nextdefsym;
+    dysymtab.nundefsym = @intCast(u32, nundefs);
+}
+
+fn writeDynamicSymbolTable(self: *Zld) !void {
+    const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment;
+    const text_segment = &self.load_commands.items[self.text_segment_cmd_index.?].Segment;
+    const stubs = &text_segment.sections.items[self.stubs_section_index.?];
+    const data_const_segment = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment;
+    const got = &data_const_segment.sections.items[self.got_section_index.?];
+    const data_segment = &self.load_commands.items[self.data_segment_cmd_index.?].Segment;
+    const la_symbol_ptr = &data_segment.sections.items[self.la_symbol_ptr_section_index.?];
+    const dysymtab = &self.load_commands.items[self.dysymtab_cmd_index.?].Dysymtab;
+
+    const lazy = self.lazy_imports.items();
+    const nonlazy = self.nonlazy_imports.items();
+    const got_locals = self.nonlazy_pointers.items();
+    dysymtab.indirectsymoff = @intCast(u32, seg.inner.fileoff + seg.inner.filesize);
+    dysymtab.nindirectsyms = @intCast(u32, lazy.len * 2 + nonlazy.len + got_locals.len);
+    const needed_size = dysymtab.nindirectsyms * @sizeOf(u32);
+    seg.inner.filesize += needed_size;
+
+    log.debug("writing indirect symbol table from 0x{x} to 0x{x}", .{
+        dysymtab.indirectsymoff,
+        dysymtab.indirectsymoff + needed_size,
+    });
+
+    var buf = try self.allocator.alloc(u8, needed_size);
+    defer self.allocator.free(buf);
+    var stream = std.io.fixedBufferStream(buf);
+    var writer = stream.writer();
+
+    stubs.reserved1 = 0;
+    for (lazy) |_, i| {
+        const symtab_idx = @intCast(u32, dysymtab.iundefsym + i);
+        try writer.writeIntLittle(u32, symtab_idx);
+    }
+
+    const base_id = @intCast(u32, lazy.len);
+    got.reserved1 = base_id;
+    for (nonlazy) |_, i| {
+        const symtab_idx = @intCast(u32, dysymtab.iundefsym + i + base_id);
+        try writer.writeIntLittle(u32, symtab_idx);
+    }
+    // TODO there should be one common set of GOT entries.
+    for (got_locals) |_| {
+        try writer.writeIntLittle(u32, macho.INDIRECT_SYMBOL_LOCAL);
+    }
+
+    la_symbol_ptr.reserved1 = got.reserved1 + @intCast(u32, nonlazy.len) + @intCast(u32, got_locals.len);
+    for (lazy) |_, i| {
+        const symtab_idx = @intCast(u32, dysymtab.iundefsym + i);
+        try writer.writeIntLittle(u32, symtab_idx);
+    }
+
+    try self.file.?.pwriteAll(buf, dysymtab.indirectsymoff);
+}
+
+fn writeStringTable(self: *Zld) !void {
+    const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment;
+    const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab;
+    symtab.stroff = @intCast(u32, seg.inner.fileoff + seg.inner.filesize);
+    symtab.strsize = @intCast(u32, mem.alignForwardGeneric(u64, self.strtab.items.len, @alignOf(u64)));
+    seg.inner.filesize += symtab.strsize;
+
+    log.debug("writing string table from 0x{x} to 0x{x}", .{ symtab.stroff, symtab.stroff + symtab.strsize });
+
+    try self.file.?.pwriteAll(self.strtab.items, symtab.stroff);
+
+    if (symtab.strsize > self.strtab.items.len and self.arch.? == .x86_64) {
+        // This is the last section, so we need to pad it out.
+        try self.file.?.pwriteAll(&[_]u8{0}, seg.inner.fileoff + seg.inner.filesize - 1);
+    }
+}
+
+fn writeDataInCode(self: *Zld) !void {
+    const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment;
+    const dice_cmd = &self.load_commands.items[self.data_in_code_cmd_index.?].LinkeditData;
+    const fileoff = seg.inner.fileoff + seg.inner.filesize;
+
+    var buf = std.ArrayList(u8).init(self.allocator);
+    defer buf.deinit();
+
+    const text_seg = self.load_commands.items[self.text_segment_cmd_index.?].Segment;
+    const text_sect = text_seg.sections.items[self.text_section_index.?];
+    for (self.objects.items) |object, object_id| {
+        const source_seg = object.load_commands.items[object.segment_cmd_index.?].Segment;
+        const source_sect = source_seg.sections.items[object.text_section_index.?];
+        const target_mapping = self.mappings.get(.{
+            .object_id = @intCast(u16, object_id),
+            .source_sect_id = object.text_section_index.?,
+        }) orelse continue;
+
+        try buf.ensureCapacity(
+            buf.items.len + object.data_in_code_entries.items.len * @sizeOf(macho.data_in_code_entry),
+        );
+        for (object.data_in_code_entries.items) |dice| {
+            const new_dice: macho.data_in_code_entry = .{
+                .offset = text_sect.offset + target_mapping.offset + dice.offset,
+                .length = dice.length,
+                .kind = dice.kind,
+            };
+            buf.appendSliceAssumeCapacity(mem.asBytes(&new_dice));
+        }
+    }
+    const datasize = @intCast(u32, buf.items.len);
+
+    dice_cmd.dataoff = @intCast(u32, fileoff);
+    dice_cmd.datasize = datasize;
+    seg.inner.filesize += datasize;
+
+    log.debug("writing data-in-code from 0x{x} to 0x{x}", .{ fileoff, fileoff + datasize });
+
+    try self.file.?.pwriteAll(buf.items, fileoff);
+}
+
+fn writeCodeSignaturePadding(self: *Zld) !void {
+    const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment;
+    const code_sig_cmd = &self.load_commands.items[self.code_signature_cmd_index.?].LinkeditData;
+    const fileoff = seg.inner.fileoff + seg.inner.filesize;
+    const needed_size = CodeSignature.calcCodeSignaturePaddingSize(
+        self.out_path.?,
+        fileoff,
+        self.page_size.?,
+    );
+    code_sig_cmd.dataoff = @intCast(u32, fileoff);
+    code_sig_cmd.datasize = needed_size;
+
+    // Advance size of __LINKEDIT segment
+    seg.inner.filesize += needed_size;
+    seg.inner.vmsize = mem.alignForwardGeneric(u64, seg.inner.filesize, self.page_size.?);
+
+    log.debug("writing code signature padding from 0x{x} to 0x{x}", .{ fileoff, fileoff + needed_size });
+
+    // Pad out the space. We need to do this to calculate valid hashes for everything in the file
+    // except for code signature data.
+    try self.file.?.pwriteAll(&[_]u8{0}, fileoff + needed_size - 1);
+}
+
+fn writeCodeSignature(self: *Zld) !void {
+    const text_seg = self.load_commands.items[self.text_segment_cmd_index.?].Segment;
+    const code_sig_cmd = self.load_commands.items[self.code_signature_cmd_index.?].LinkeditData;
+
+    var code_sig = CodeSignature.init(self.allocator, self.page_size.?);
+    defer code_sig.deinit();
+    try code_sig.calcAdhocSignature(
+        self.file.?,
+        self.out_path.?,
+        text_seg.inner,
+        code_sig_cmd,
+        .Exe,
+    );
+
+    var buffer = try self.allocator.alloc(u8, code_sig.size());
+    defer self.allocator.free(buffer);
+    var stream = std.io.fixedBufferStream(buffer);
+    try code_sig.write(stream.writer());
+
+    log.debug("writing code signature from 0x{x} to 0x{x}", .{ code_sig_cmd.dataoff, code_sig_cmd.dataoff + buffer.len });
+
+    try self.file.?.pwriteAll(buffer, code_sig_cmd.dataoff);
+}
+
+fn writeLoadCommands(self: *Zld) !void {
+    var sizeofcmds: u32 = 0;
+    for (self.load_commands.items) |lc| {
+        sizeofcmds += lc.cmdsize();
+    }
+
+    var buffer = try self.allocator.alloc(u8, sizeofcmds);
+    defer self.allocator.free(buffer);
+    var writer = std.io.fixedBufferStream(buffer).writer();
+    for (self.load_commands.items) |lc| {
+        try lc.write(writer);
+    }
+
+    const off = @sizeOf(macho.mach_header_64);
+    log.debug("writing {} load commands from 0x{x} to 0x{x}", .{ self.load_commands.items.len, off, off + sizeofcmds });
+    try self.file.?.pwriteAll(buffer, off);
+}
+
+fn writeHeader(self: *Zld) !void {
+    var header: macho.mach_header_64 = undefined;
+    header.magic = macho.MH_MAGIC_64;
+
+    const CpuInfo = struct {
+        cpu_type: macho.cpu_type_t,
+        cpu_subtype: macho.cpu_subtype_t,
+    };
+
+    const cpu_info: CpuInfo = switch (self.arch.?) {
+        .aarch64 => .{
+            .cpu_type = macho.CPU_TYPE_ARM64,
+            .cpu_subtype = macho.CPU_SUBTYPE_ARM_ALL,
+        },
+        .x86_64 => .{
+            .cpu_type = macho.CPU_TYPE_X86_64,
+            .cpu_subtype = macho.CPU_SUBTYPE_X86_64_ALL,
+        },
+        else => return error.UnsupportedCpuArchitecture,
+    };
+    header.cputype = cpu_info.cpu_type;
+    header.cpusubtype = cpu_info.cpu_subtype;
+    header.filetype = macho.MH_EXECUTE;
+    header.flags = macho.MH_NOUNDEFS | macho.MH_DYLDLINK | macho.MH_PIE | macho.MH_TWOLEVEL;
+    header.reserved = 0;
+
+    if (self.tlv_section_index) |_|
+        header.flags |= macho.MH_HAS_TLV_DESCRIPTORS;
+
+    header.ncmds = @intCast(u32, self.load_commands.items.len);
+    header.sizeofcmds = 0;
+    for (self.load_commands.items) |cmd| {
+        header.sizeofcmds += cmd.cmdsize();
+    }
+    log.debug("writing Mach-O header {}", .{header});
+    try self.file.?.pwriteAll(mem.asBytes(&header), 0);
+}
+
+pub fn makeStaticString(bytes: []const u8) [16]u8 {
+    var buf = [_]u8{0} ** 16;
+    assert(bytes.len <= buf.len);
+    mem.copy(u8, &buf, bytes);
+    return buf;
+}
+
+fn makeString(self: *Zld, bytes: []const u8) !u32 {
+    try self.strtab.ensureCapacity(self.allocator, self.strtab.items.len + bytes.len + 1);
+    const offset = @intCast(u32, self.strtab.items.len);
+    log.debug("writing new string '{s}' into string table at offset 0x{x}", .{ bytes, offset });
+    self.strtab.appendSliceAssumeCapacity(bytes);
+    self.strtab.appendAssumeCapacity(0);
+    return offset;
+}
+
+fn getString(self: *const Zld, str_off: u32) []const u8 {
+    assert(str_off < self.strtab.items.len);
+    return mem.spanZ(@ptrCast([*:0]const u8, self.strtab.items.ptr + str_off));
+}
+
+pub fn parseName(name: *const [16]u8) []const u8 {
+    const len = mem.indexOfScalar(u8, name, @as(u8, 0)) orelse name.len;
+    return name[0..len];
+}
+
+fn isLocal(sym: *const macho.nlist_64) callconv(.Inline) bool {
+    if (isExtern(sym)) return false;
+    const tt = macho.N_TYPE & sym.n_type;
+    return tt == macho.N_SECT;
+}
+
+fn isExport(sym: *const macho.nlist_64) callconv(.Inline) bool {
+    if (!isExtern(sym)) return false;
+    const tt = macho.N_TYPE & sym.n_type;
+    return tt == macho.N_SECT;
+}
+
+fn isImport(sym: *const macho.nlist_64) callconv(.Inline) bool {
+    if (!isExtern(sym)) return false;
+    const tt = macho.N_TYPE & sym.n_type;
+    return tt == macho.N_UNDF;
+}
+
+fn isExtern(sym: *const macho.nlist_64) callconv(.Inline) bool {
+    if ((sym.n_type & macho.N_EXT) == 0) return false;
+    return (sym.n_type & macho.N_PEXT) == 0;
+}
+
+fn isWeakDef(sym: *const macho.nlist_64) callconv(.Inline) bool {
+    return (sym.n_desc & macho.N_WEAK_DEF) != 0;
+}
+
+fn aarch64IsArithmetic(inst: *const [4]u8) callconv(.Inline) bool {
+    const group_decode = @truncate(u5, inst[3]);
+    return ((group_decode >> 2) == 4);
+}
diff --git a/src/link/MachO/bind.zig b/src/link/MachO/bind.zig
new file mode 100644
index 0000000000..d234fa8242
--- /dev/null
+++ b/src/link/MachO/bind.zig
@@ -0,0 +1,145 @@
+const std = @import("std");
+const leb = std.leb;
+const macho = std.macho;
+
+pub const Pointer = struct {
+    offset: u64,
+    segment_id: u16,
+    dylib_ordinal: ?i64 = null,
+    name: ?[]const u8 = null,
+};
+
+pub fn pointerCmp(context: void, a: Pointer, b: Pointer) bool {
+    if (a.segment_id < b.segment_id) return true;
+    if (a.segment_id == b.segment_id) {
+        return a.offset < b.offset;
+    }
+    return false;
+}
+
+pub fn rebaseInfoSize(pointers: []const Pointer) !u64 {
+    var stream = std.io.countingWriter(std.io.null_writer);
+    var writer = stream.writer();
+    var size: u64 = 0;
+
+    for (pointers) |pointer| {
+        size += 2;
+        try leb.writeILEB128(writer, pointer.offset);
+        size += 1;
+    }
+
+    size += 1 + stream.bytes_written;
+    return size;
+}
+
+pub fn writeRebaseInfo(pointers: []const Pointer, writer: anytype) !void {
+    for (pointers) |pointer| {
+        try writer.writeByte(macho.REBASE_OPCODE_SET_TYPE_IMM | @truncate(u4, macho.REBASE_TYPE_POINTER));
+        try writer.writeByte(macho.REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | @truncate(u4, pointer.segment_id));
+
+        try leb.writeILEB128(writer, pointer.offset);
+        try writer.writeByte(macho.REBASE_OPCODE_DO_REBASE_IMM_TIMES | @truncate(u4, 1));
+    }
+    try writer.writeByte(macho.REBASE_OPCODE_DONE);
+}
+
+pub fn bindInfoSize(pointers: []const Pointer) !u64 {
+    var stream = std.io.countingWriter(std.io.null_writer);
+    var writer = stream.writer();
+    var size: u64 = 0;
+
+    for (pointers) |pointer| {
+        size += 1;
+        if (pointer.dylib_ordinal.? > 15) {
+            try leb.writeULEB128(writer, @bitCast(u64, pointer.dylib_ordinal.?));
+        }
+        size += 1;
+
+        size += 1;
+        size += pointer.name.?.len;
+        size += 1;
+
+        size += 1;
+
+        try leb.writeILEB128(writer, pointer.offset);
+        size += 1;
+    }
+
+    size += stream.bytes_written + 1;
+    return size;
+}
+
+pub fn writeBindInfo(pointers: []const Pointer, writer: anytype) !void {
+    for (pointers) |pointer| {
+        if (pointer.dylib_ordinal.? > 15) {
+            try writer.writeByte(macho.BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB);
+            try leb.writeULEB128(writer, @bitCast(u64, pointer.dylib_ordinal.?));
+        } else if (pointer.dylib_ordinal.? > 0) {
+            try writer.writeByte(macho.BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | @truncate(u4, @bitCast(u64, pointer.dylib_ordinal.?)));
+        } else {
+            try writer.writeByte(macho.BIND_OPCODE_SET_DYLIB_SPECIAL_IMM | @truncate(u4, @bitCast(u64, pointer.dylib_ordinal.?)));
+        }
+        try writer.writeByte(macho.BIND_OPCODE_SET_TYPE_IMM | @truncate(u4, macho.BIND_TYPE_POINTER));
+
+        try writer.writeByte(macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM); // TODO Sometimes we might want to add flags.
+        try writer.writeAll(pointer.name.?);
+        try writer.writeByte(0);
+
+        try writer.writeByte(macho.BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | @truncate(u4, pointer.segment_id));
+
+        try leb.writeILEB128(writer, pointer.offset);
+        try writer.writeByte(macho.BIND_OPCODE_DO_BIND);
+    }
+
+    try writer.writeByte(macho.BIND_OPCODE_DONE);
+}
+
+pub fn lazyBindInfoSize(pointers: []const Pointer) !u64 {
+    var stream = std.io.countingWriter(std.io.null_writer);
+    var writer = stream.writer();
+    var size: u64 = 0;
+
+    for (pointers) |pointer| {
+        size += 1;
+
+        try leb.writeILEB128(writer, pointer.offset);
+
+        size += 1;
+        if (pointer.dylib_ordinal.? > 15) {
+            try leb.writeULEB128(writer, @bitCast(u64, pointer.dylib_ordinal.?));
+        }
+
+        size += 1;
+        size += pointer.name.?.len;
+        size += 1;
+
+        size += 2;
+    }
+
+    size += stream.bytes_written;
+    return size;
+}
+
+pub fn writeLazyBindInfo(pointers: []const Pointer, writer: anytype) !void {
+    for (pointers) |pointer| {
+        try writer.writeByte(macho.BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | @truncate(u4, pointer.segment_id));
+
+        try leb.writeILEB128(writer, pointer.offset);
+
+        if (pointer.dylib_ordinal.? > 15) {
+            try writer.writeByte(macho.BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB);
+            try leb.writeULEB128(writer, @bitCast(u64, pointer.dylib_ordinal.?));
+        } else if (pointer.dylib_ordinal.? > 0) {
+            try writer.writeByte(macho.BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | @truncate(u4, @bitCast(u64, pointer.dylib_ordinal.?)));
+        } else {
+            try writer.writeByte(macho.BIND_OPCODE_SET_DYLIB_SPECIAL_IMM | @truncate(u4, @bitCast(u64, pointer.dylib_ordinal.?)));
+        }
+
+        try writer.writeByte(macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM); // TODO Sometimes we might want to add flags.
+        try writer.writeAll(pointer.name.?);
+        try writer.writeByte(0);
+
+        try writer.writeByte(macho.BIND_OPCODE_DO_BIND);
+        try writer.writeByte(macho.BIND_OPCODE_DONE);
+    }
+}
diff --git a/src/link/MachO/imports.zig b/src/link/MachO/imports.zig
deleted file mode 100644
index 2bc34d14c5..0000000000
--- a/src/link/MachO/imports.zig
+++ /dev/null
@@ -1,152 +0,0 @@
-const std = @import("std");
-const leb = std.leb;
-const macho = std.macho;
-const mem = std.mem;
-
-const assert = std.debug.assert;
-const Allocator = mem.Allocator;
-
-pub const ExternSymbol = struct {
-    /// MachO symbol table entry.
-    inner: macho.nlist_64,
-
-    /// Id of the dynamic library where the specified entries can be found.
-    /// Id of 0 means self.
-    /// TODO this should really be an id into the table of all defined
-    /// dylibs.
-    dylib_ordinal: i64 = 0,
-
-    /// Id of the segment where this symbol is defined (will have its address
-    /// resolved).
-    segment: u16 = 0,
-
-    /// Offset relative to the start address of the `segment`.
-    offset: u32 = 0,
-};
-
-pub fn rebaseInfoSize(symbols: anytype) !u64 {
-    var stream = std.io.countingWriter(std.io.null_writer);
-    var writer = stream.writer();
-    var size: u64 = 0;
-
-    for (symbols) |entry| {
-        size += 2;
-        try leb.writeILEB128(writer, entry.value.offset);
-        size += 1;
-    }
-
-    size += 1 + stream.bytes_written;
-    return size;
-}
-
-pub fn writeRebaseInfo(symbols: anytype, writer: anytype) !void {
-    for (symbols) |entry| {
-        const symbol = entry.value;
-        try writer.writeByte(macho.REBASE_OPCODE_SET_TYPE_IMM | @truncate(u4, macho.REBASE_TYPE_POINTER));
-        try writer.writeByte(macho.REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | @truncate(u4, symbol.segment));
-        try leb.writeILEB128(writer, symbol.offset);
-        try writer.writeByte(macho.REBASE_OPCODE_DO_REBASE_IMM_TIMES | @truncate(u4, 1));
-    }
-    try writer.writeByte(macho.REBASE_OPCODE_DONE);
-}
-
-pub fn bindInfoSize(symbols: anytype) !u64 {
-    var stream = std.io.countingWriter(std.io.null_writer);
-    var writer = stream.writer();
-    var size: u64 = 0;
-
-    for (symbols) |entry| {
-        const symbol = entry.value;
-
-        size += 1;
-        if (symbol.dylib_ordinal > 15) {
-            try leb.writeULEB128(writer, @bitCast(u64, symbol.dylib_ordinal));
-        }
-        size += 1;
-
-        size += 1;
-        size += entry.key.len;
-        size += 1;
-
-        size += 1;
-        try leb.writeILEB128(writer, symbol.offset);
-        size += 2;
-    }
-
-    size += stream.bytes_written;
-    return size;
-}
-
-pub fn writeBindInfo(symbols: anytype, writer: anytype) !void {
-    for (symbols) |entry| {
-        const symbol = entry.value;
-
-        if (symbol.dylib_ordinal > 15) {
-            try writer.writeByte(macho.BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB);
-            try leb.writeULEB128(writer, @bitCast(u64, symbol.dylib_ordinal));
-        } else if (symbol.dylib_ordinal > 0) {
-            try writer.writeByte(macho.BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | @truncate(u4, @bitCast(u64, symbol.dylib_ordinal)));
-        } else {
-            try writer.writeByte(macho.BIND_OPCODE_SET_DYLIB_SPECIAL_IMM | @truncate(u4, @bitCast(u64, symbol.dylib_ordinal)));
-        }
-        try writer.writeByte(macho.BIND_OPCODE_SET_TYPE_IMM | @truncate(u4, macho.BIND_TYPE_POINTER));
-
-        try writer.writeByte(macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM); // TODO Sometimes we might want to add flags.
-        try writer.writeAll(entry.key);
-        try writer.writeByte(0);
-
-        try writer.writeByte(macho.BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | @truncate(u4, symbol.segment));
-        try leb.writeILEB128(writer, symbol.offset);
-        try writer.writeByte(macho.BIND_OPCODE_DO_BIND);
-        try writer.writeByte(macho.BIND_OPCODE_DONE);
-    }
-}
-
-pub fn lazyBindInfoSize(symbols: anytype) !u64 {
-    var stream = std.io.countingWriter(std.io.null_writer);
-    var writer = stream.writer();
-    var size: u64 = 0;
-
-    for (symbols) |entry| {
-        const symbol = entry.value;
-        size += 1;
-        try leb.writeILEB128(writer, symbol.offset);
-        size += 1;
-        if (symbol.dylib_ordinal > 15) {
-            try leb.writeULEB128(writer, @bitCast(u64, symbol.dylib_ordinal));
-        }
-
-        size += 1;
-        size += entry.key.len;
-        size += 1;
-
-        size += 2;
-    }
-
-    size += stream.bytes_written;
-    return size;
-}
-
-pub fn writeLazyBindInfo(symbols: anytype, writer: anytype) !void {
-    for (symbols) |entry| {
-        const symbol = entry.value;
-        try writer.writeByte(macho.BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | @truncate(u4, symbol.segment));
-        try leb.writeILEB128(writer, symbol.offset);
-
-        if (symbol.dylib_ordinal > 15) {
-            try writer.writeByte(macho.BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB);
-            try leb.writeULEB128(writer, @bitCast(u64, symbol.dylib_ordinal));
-        } else if (symbol.dylib_ordinal > 0) {
-            try writer.writeByte(macho.BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | @truncate(u4, @bitCast(u64, symbol.dylib_ordinal)));
-        } else {
-            try writer.writeByte(macho.BIND_OPCODE_SET_DYLIB_SPECIAL_IMM | @truncate(u4, @bitCast(u64, symbol.dylib_ordinal)));
-        }
-
-        try writer.writeByte(macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM); // TODO Sometimes we might want to add flags.
-        try writer.writeAll(entry.key);
-        try writer.writeByte(0);
-
-        try writer.writeByte(macho.BIND_OPCODE_DO_BIND);
-        try writer.writeByte(macho.BIND_OPCODE_DONE);
-    }
-}
diff --git a/src/main.zig b/src/main.zig
index bc4f209b45..76f957456a 100644
--- a/src/main.zig
+++ b/src/main.zig
@@ -2637,6 +2637,50 @@ fn argvCmd(allocator: *Allocator, argv: []const []const u8) ![]u8 {
     return cmd.toOwnedSlice();
 }
 
+fn readSourceFileToEndAlloc(allocator: *mem.Allocator, input: *const fs.File, size_hint: ?usize) ![]const u8 {
+    const source_code = input.readToEndAllocOptions(
+        allocator,
+        max_src_size,
+        size_hint,
+        @alignOf(u16),
+        null,
+    ) catch |err| switch (err) {
+        error.ConnectionResetByPeer => unreachable,
+        error.ConnectionTimedOut => unreachable,
+        error.NotOpenForReading => unreachable,
+        else => |e| return e,
+    };
+    errdefer allocator.free(source_code);
+
+    // Detect unsupported file types with their Byte Order Mark
+    const unsupported_boms = [_][]const u8{
+        "\xff\xfe\x00\x00", // UTF-32 little endian
+        "\xfe\xff\x00\x00", // UTF-32 big endian
+        "\xfe\xff", // UTF-16 big endian
+    };
+    for (unsupported_boms) |bom| {
+        if (mem.startsWith(u8, source_code, bom)) {
+            return error.UnsupportedEncoding;
+        }
+    }
+
+    // If the file starts with a UTF-16 little endian BOM, translate it to UTF-8
+    if (mem.startsWith(u8, source_code, "\xff\xfe")) {
+        const source_code_utf16_le = mem.bytesAsSlice(u16, source_code);
+        const source_code_utf8 = std.unicode.utf16leToUtf8Alloc(allocator, source_code_utf16_le) catch |err| switch (err) {
+            error.DanglingSurrogateHalf => error.UnsupportedEncoding,
+            error.ExpectedSecondSurrogateHalf => error.UnsupportedEncoding,
+            error.UnexpectedSecondSurrogateHalf => error.UnsupportedEncoding,
+            else => |e| return e,
+        };
+
+        allocator.free(source_code);
+        return source_code_utf8;
+    }
+
+    return source_code;
+}
+
 pub const usage_fmt =
     \\Usage: zig fmt [file]...
     \\
@@ -2708,9 +2752,10 @@ pub fn cmdFmt(gpa: *Allocator, args: []const []const u8) !void {
             fatal("cannot use --stdin with positional arguments", .{});
         }
 
-        const stdin = io.getStdIn().reader();
-
-        const source_code = try stdin.readAllAlloc(gpa, max_src_size);
+        const stdin = io.getStdIn();
+        const source_code = readSourceFileToEndAlloc(gpa, &stdin, null) catch |err| {
+            fatal("unable to read stdin: {s}", .{err});
+        };
         defer gpa.free(source_code);
 
         var tree = std.zig.parse(gpa, source_code) catch |err| {
@@ -2785,6 +2830,7 @@ const FmtError = error{
     EndOfStream,
     Unseekable,
     NotOpenForWriting,
+    UnsupportedEncoding,
 } || fs.File.OpenError;
 
 fn fmtPath(fmt: *Fmt, file_path: []const u8, check_mode: bool, dir: fs.Dir, sub_path: []const u8) FmtError!void {
@@ -2850,21 +2896,15 @@ fn fmtPathFile(
     if (stat.kind == .Directory)
         return error.IsDir;
 
-    const source_code = source_file.readToEndAllocOptions(
+    const source_code = try readSourceFileToEndAlloc(
         fmt.gpa,
-        max_src_size,
+        &source_file,
         std.math.cast(usize, stat.size) catch return error.FileTooBig,
-        @alignOf(u8),
-        null,
-    ) catch |err| switch (err) {
-        error.ConnectionResetByPeer => unreachable,
-        error.ConnectionTimedOut => unreachable,
-        error.NotOpenForReading => unreachable,
-        else => |e| return e,
-    };
+    );
+    defer fmt.gpa.free(source_code);
+
     source_file.close();
     file_closed = true;
-    defer fmt.gpa.free(source_code);
 
     // Add to set after no longer possible to get error.IsDir.
     if (try fmt.seen.fetchPut(stat.inode, {})) |_| return;
@@ -3241,7 +3281,8 @@ pub const ClangArgIterator = struct {
                 self.zig_equivalent = clang_arg.zig_equivalent;
                 break :find_clang_arg;
             },
-        } else {
+        }
+        else {
             fatal("Unknown Clang option: '{s}'", .{arg});
         }
     }
diff --git a/src/stage1/all_types.hpp b/src/stage1/all_types.hpp
index 7ad585a524..b1d0bd2ce1 100644
--- a/src/stage1/all_types.hpp
+++ b/src/stage1/all_types.hpp
@@ -391,6 +391,8 @@ enum LazyValueId {
     LazyValueIdAlignOf,
     LazyValueIdSizeOf,
     LazyValueIdPtrType,
+    LazyValueIdPtrTypeSimple,
+    LazyValueIdPtrTypeSimpleConst,
     LazyValueIdOptType,
     LazyValueIdSliceType,
     LazyValueIdFnType,
@@ -467,6 +469,13 @@ struct LazyValuePtrType {
     bool is_allowzero;
 };
 
+struct LazyValuePtrTypeSimple {
+    LazyValue base;
+
+    IrAnalyze *ira;
+    IrInstGen *elem_type;
+};
+
 struct LazyValueOptType {
     LazyValue base;
 
@@ -2610,7 +2619,8 @@ enum IrInstSrcId {
     IrInstSrcIdEnumToInt,
     IrInstSrcIdIntToErr,
     IrInstSrcIdErrToInt,
-    IrInstSrcIdCheckSwitchProngs,
+    IrInstSrcIdCheckSwitchProngsUnderYes,
+    IrInstSrcIdCheckSwitchProngsUnderNo,
     IrInstSrcIdCheckStatementIsVoid,
     IrInstSrcIdTypeName,
     IrInstSrcIdDeclRef,
@@ -2624,12 +2634,15 @@ enum IrInstSrcId {
     IrInstSrcIdHasField,
     IrInstSrcIdSetEvalBranchQuota,
     IrInstSrcIdPtrType,
+    IrInstSrcIdPtrTypeSimple,
+    IrInstSrcIdPtrTypeSimpleConst,
     IrInstSrcIdAlignCast,
     IrInstSrcIdImplicitCast,
     IrInstSrcIdResolveResult,
     IrInstSrcIdResetResult,
     IrInstSrcIdSetAlignStack,
-    IrInstSrcIdArgType,
+    IrInstSrcIdArgTypeAllowVarFalse,
+    IrInstSrcIdArgTypeAllowVarTrue,
     IrInstSrcIdExport,
     IrInstSrcIdExtern,
     IrInstSrcIdErrorReturnTrace,
@@ -3294,6 +3307,12 @@ struct IrInstSrcArrayType {
     IrInstSrc *child_type;
 };
 
+struct IrInstSrcPtrTypeSimple {
+    IrInstSrc base;
+
+    IrInstSrc *child_type;
+};
+
 struct IrInstSrcPtrType {
     IrInstSrc base;
 
@@ -4020,7 +4039,6 @@ struct IrInstSrcCheckSwitchProngs {
     IrInstSrcCheckSwitchProngsRange *ranges;
     size_t range_count;
     AstNode* else_prong;
-    bool have_underscore_prong;
 };
 
 struct IrInstSrcCheckStatementIsVoid {
@@ -4144,7 +4162,6 @@ struct IrInstSrcArgType {
 
     IrInstSrc *fn_type;
     IrInstSrc *arg_index;
-    bool allow_var;
 };
 
 struct IrInstSrcExport {
diff --git a/src/stage1/analyze.cpp b/src/stage1/analyze.cpp
index a4e368288e..d46faac2f0 100644
--- a/src/stage1/analyze.cpp
+++ b/src/stage1/analyze.cpp
@@ -1237,6 +1237,22 @@ Error type_val_resolve_zero_bits(CodeGen *g, ZigValue *type_val, ZigType *parent
                         parent_type_val, is_zero_bits);
             }
         }
+        case LazyValueIdPtrTypeSimple:
+        case LazyValueIdPtrTypeSimpleConst: {
+            LazyValuePtrTypeSimple *lazy_ptr_type = reinterpret_cast<LazyValuePtrTypeSimple *>(type_val->data.x_lazy);
+
+            if (parent_type_val == lazy_ptr_type->elem_type->value) {
+                // Does a struct which contains a pointer field to itself have bits? Yes.
+                *is_zero_bits = false;
+                return ErrorNone;
+            } else {
+                if (parent_type_val == nullptr) {
+                    parent_type_val = type_val;
+                }
+                return type_val_resolve_zero_bits(g, lazy_ptr_type->elem_type->value, parent_type,
+                        parent_type_val, is_zero_bits);
+            }
+        }
         case LazyValueIdArrayType: {
             LazyValueArrayType *lazy_array_type =
                 reinterpret_cast<LazyValueArrayType *>(type_val->data.x_lazy);
@@ -1285,6 +1301,8 @@ Error type_val_resolve_is_opaque_type(CodeGen *g, ZigValue *type_val, bool *is_o
             zig_unreachable();
         case LazyValueIdSliceType:
         case LazyValueIdPtrType:
+        case LazyValueIdPtrTypeSimple:
+        case LazyValueIdPtrTypeSimpleConst:
         case LazyValueIdFnType:
         case LazyValueIdOptType:
         case LazyValueIdErrUnionType:
@@ -1313,6 +1331,11 @@ static ReqCompTime type_val_resolve_requires_comptime(CodeGen *g, ZigValue *type
             LazyValuePtrType *lazy_ptr_type = reinterpret_cast<LazyValuePtrType *>(type_val->data.x_lazy);
             return type_val_resolve_requires_comptime(g, lazy_ptr_type->elem_type->value);
         }
+        case LazyValueIdPtrTypeSimple:
+        case LazyValueIdPtrTypeSimpleConst: {
+            LazyValuePtrTypeSimple *lazy_ptr_type = reinterpret_cast<LazyValuePtrTypeSimple *>(type_val->data.x_lazy);
+            return type_val_resolve_requires_comptime(g, lazy_ptr_type->elem_type->value);
+        }
         case LazyValueIdOptType: {
             LazyValueOptType *lazy_opt_type = reinterpret_cast<LazyValueOptType *>(type_val->data.x_lazy);
             return type_val_resolve_requires_comptime(g, lazy_opt_type->payload_type->value);
@@ -1413,6 +1436,24 @@ start_over:
             }
             return ErrorNone;
         }
+        case LazyValueIdPtrTypeSimple:
+        case LazyValueIdPtrTypeSimpleConst: {
+            LazyValuePtrTypeSimple *lazy_ptr_type = reinterpret_cast<LazyValuePtrTypeSimple *>(type_val->data.x_lazy);
+            bool is_zero_bits;
+            if ((err = type_val_resolve_zero_bits(g, lazy_ptr_type->elem_type->value, nullptr,
+                nullptr, &is_zero_bits)))
+            {
+                return err;
+            }
+            if (is_zero_bits) {
+                *abi_size = 0;
+                *size_in_bits = 0;
+            } else {
+                *abi_size = g->builtin_types.entry_usize->abi_size;
+                *size_in_bits = g->builtin_types.entry_usize->size_in_bits;
+            }
+            return ErrorNone;
+        }
         case LazyValueIdFnType:
             *abi_size = g->builtin_types.entry_usize->abi_size;
             *size_in_bits = g->builtin_types.entry_usize->size_in_bits;
@@ -1449,6 +1490,8 @@ Error type_val_resolve_abi_align(CodeGen *g, AstNode *source_node, ZigValue *typ
             zig_unreachable();
         case LazyValueIdSliceType:
         case LazyValueIdPtrType:
+        case LazyValueIdPtrTypeSimple:
+        case LazyValueIdPtrTypeSimpleConst:
         case LazyValueIdFnType:
             *abi_align = g->builtin_types.entry_usize->abi_align;
             return ErrorNone;
@@ -1506,7 +1549,9 @@ static OnePossibleValue type_val_resolve_has_one_possible_value(CodeGen *g, ZigV
                 return OnePossibleValueYes;
             return type_val_resolve_has_one_possible_value(g, lazy_array_type->elem_type->value);
         }
-        case LazyValueIdPtrType: {
+        case LazyValueIdPtrType:
+        case LazyValueIdPtrTypeSimple:
+        case LazyValueIdPtrTypeSimpleConst: {
             Error err;
             bool zero_bits;
             if ((err = type_val_resolve_zero_bits(g, type_val, nullptr, nullptr, &zero_bits))) {
@@ -5758,6 +5803,8 @@ static bool can_mutate_comptime_var_state(ZigValue *value) {
             case LazyValueIdAlignOf:
             case LazyValueIdSizeOf:
             case LazyValueIdPtrType:
+            case LazyValueIdPtrTypeSimple:
+            case LazyValueIdPtrTypeSimpleConst:
             case LazyValueIdOptType:
             case LazyValueIdSliceType:
             case LazyValueIdFnType:
diff --git a/src/stage1/ir.cpp b/src/stage1/ir.cpp
index 7906df3b0d..16536dc71f 100644
--- a/src/stage1/ir.cpp
+++ b/src/stage1/ir.cpp
@@ -476,7 +476,8 @@ static void destroy_instruction_src(IrInstSrc *inst) {
             return heap::c_allocator.destroy(reinterpret_cast<IrInstSrcIntToErr *>(inst));
         case IrInstSrcIdErrToInt:
             return heap::c_allocator.destroy(reinterpret_cast<IrInstSrcErrToInt *>(inst));
-        case IrInstSrcIdCheckSwitchProngs:
+        case IrInstSrcIdCheckSwitchProngsUnderNo:
+        case IrInstSrcIdCheckSwitchProngsUnderYes:
             return heap::c_allocator.destroy(reinterpret_cast<IrInstSrcCheckSwitchProngs *>(inst));
         case IrInstSrcIdCheckStatementIsVoid:
             return heap::c_allocator.destroy(reinterpret_cast<IrInstSrcCheckStatementIsVoid *>(inst));
@@ -486,6 +487,9 @@ static void destroy_instruction_src(IrInstSrc *inst) {
             return heap::c_allocator.destroy(reinterpret_cast<IrInstSrcTagName *>(inst));
         case IrInstSrcIdPtrType:
             return heap::c_allocator.destroy(reinterpret_cast<IrInstSrcPtrType *>(inst));
+        case IrInstSrcIdPtrTypeSimple:
+        case IrInstSrcIdPtrTypeSimpleConst:
+            return heap::c_allocator.destroy(reinterpret_cast<IrInstSrcPtrTypeSimple *>(inst));
         case IrInstSrcIdDeclRef:
             return heap::c_allocator.destroy(reinterpret_cast<IrInstSrcDeclRef *>(inst));
         case IrInstSrcIdPanic:
@@ -514,7 +518,8 @@ static void destroy_instruction_src(IrInstSrc *inst) {
             return heap::c_allocator.destroy(reinterpret_cast<IrInstSrcResetResult *>(inst));
         case IrInstSrcIdSetAlignStack:
             return heap::c_allocator.destroy(reinterpret_cast<IrInstSrcSetAlignStack *>(inst));
-        case IrInstSrcIdArgType:
+        case IrInstSrcIdArgTypeAllowVarFalse:
+        case IrInstSrcIdArgTypeAllowVarTrue:
             return heap::c_allocator.destroy(reinterpret_cast<IrInstSrcArgType *>(inst));
         case IrInstSrcIdExport:
             return heap::c_allocator.destroy(reinterpret_cast<IrInstSrcExport *>(inst));
@@ -1470,10 +1475,6 @@ static constexpr IrInstSrcId ir_inst_id(IrInstSrcErrToInt *) {
     return IrInstSrcIdErrToInt;
 }
 
-static constexpr IrInstSrcId ir_inst_id(IrInstSrcCheckSwitchProngs *) {
-    return IrInstSrcIdCheckSwitchProngs;
-}
-
 static constexpr IrInstSrcId ir_inst_id(IrInstSrcCheckStatementIsVoid *) {
     return IrInstSrcIdCheckStatementIsVoid;
 }
@@ -1546,10 +1547,6 @@ static constexpr IrInstSrcId ir_inst_id(IrInstSrcSetAlignStack *) {
     return IrInstSrcIdSetAlignStack;
 }
 
-static constexpr IrInstSrcId ir_inst_id(IrInstSrcArgType *) {
-    return IrInstSrcIdArgType;
-}
-
 static constexpr IrInstSrcId ir_inst_id(IrInstSrcExport *) {
     return IrInstSrcIdExport;
 }
@@ -2615,11 +2612,35 @@ static IrInstGen *ir_build_br_gen(IrAnalyze *ira, IrInst *source_instr, IrBasicB
     return &inst->base;
 }
 
+static IrInstSrc *ir_build_ptr_type_simple(IrBuilderSrc *irb, Scope *scope, AstNode *source_node,
+        IrInstSrc *child_type, bool is_const)
+{
+    IrInstSrcPtrTypeSimple *inst = heap::c_allocator.create<IrInstSrcPtrTypeSimple>();
+    inst->base.id = is_const ? IrInstSrcIdPtrTypeSimpleConst : IrInstSrcIdPtrTypeSimple;
+    inst->base.base.scope = scope;
+    inst->base.base.source_node = source_node;
+    inst->base.base.debug_id = exec_next_debug_id(irb->exec);
+    inst->base.owner_bb = irb->current_basic_block;
+    ir_instruction_append(irb->current_basic_block, &inst->base);
+
+    inst->child_type = child_type;
+
+    ir_ref_instruction(child_type, irb->current_basic_block);
+
+    return &inst->base;
+}
+
 static IrInstSrc *ir_build_ptr_type(IrBuilderSrc *irb, Scope *scope, AstNode *source_node,
         IrInstSrc *child_type, bool is_const, bool is_volatile, PtrLen ptr_len,
         IrInstSrc *sentinel, IrInstSrc *align_value,
         uint32_t bit_offset_start, uint32_t host_int_bytes, bool is_allow_zero)
 {
+    if (!is_volatile && ptr_len == PtrLenSingle && sentinel == nullptr && align_value == nullptr &&
+            bit_offset_start == 0 && host_int_bytes == 0 && is_allow_zero == 0)
+    {
+        return ir_build_ptr_type_simple(irb, scope, source_node, child_type, is_const);
+    }
+
     IrInstSrcPtrType *inst = ir_build_instruction<IrInstSrcPtrType>(irb, scope, source_node);
     inst->sentinel = sentinel;
     inst->align_value = align_value;
@@ -4354,13 +4375,19 @@ static IrInstSrc *ir_build_check_switch_prongs(IrBuilderSrc *irb, Scope *scope,
         IrInstSrc *target_value, IrInstSrcCheckSwitchProngsRange *ranges, size_t range_count,
         AstNode* else_prong, bool have_underscore_prong)
 {
-    IrInstSrcCheckSwitchProngs *instruction = ir_build_instruction<IrInstSrcCheckSwitchProngs>(
-            irb, scope, source_node);
+    IrInstSrcCheckSwitchProngs *instruction = heap::c_allocator.create<IrInstSrcCheckSwitchProngs>();
+    instruction->base.id = have_underscore_prong ?
+        IrInstSrcIdCheckSwitchProngsUnderYes : IrInstSrcIdCheckSwitchProngsUnderNo;
+    instruction->base.base.scope = scope;
+    instruction->base.base.source_node = source_node;
+    instruction->base.base.debug_id = exec_next_debug_id(irb->exec);
+    instruction->base.owner_bb = irb->current_basic_block;
+    ir_instruction_append(irb->current_basic_block, &instruction->base);
+
     instruction->target_value = target_value;
     instruction->ranges = ranges;
     instruction->range_count = range_count;
     instruction->else_prong = else_prong;
-    instruction->have_underscore_prong = have_underscore_prong;
 
     ir_ref_instruction(target_value, irb->current_basic_block);
     for (size_t i = 0; i < range_count; i += 1) {
@@ -4590,10 +4617,17 @@ static IrInstSrc *ir_build_set_align_stack(IrBuilderSrc *irb, Scope *scope, AstN
 static IrInstSrc *ir_build_arg_type(IrBuilderSrc *irb, Scope *scope, AstNode *source_node,
         IrInstSrc *fn_type, IrInstSrc *arg_index, bool allow_var)
 {
-    IrInstSrcArgType *instruction = ir_build_instruction<IrInstSrcArgType>(irb, scope, source_node);
+    IrInstSrcArgType *instruction = heap::c_allocator.create<IrInstSrcArgType>();
+    instruction->base.id = allow_var ?
+        IrInstSrcIdArgTypeAllowVarTrue : IrInstSrcIdArgTypeAllowVarFalse;
+    instruction->base.base.scope = scope;
+    instruction->base.base.source_node = source_node;
+    instruction->base.base.debug_id = exec_next_debug_id(irb->exec);
+    instruction->base.owner_bb = irb->current_basic_block;
+    ir_instruction_append(irb->current_basic_block, &instruction->base);
+
     instruction->fn_type = fn_type;
     instruction->arg_index = arg_index;
-    instruction->allow_var = allow_var;
 
     ir_ref_instruction(fn_type, irb->current_basic_block);
     ir_ref_instruction(arg_index, irb->current_basic_block);
@@ -29702,7 +29736,7 @@ static IrInstGen *ir_analyze_instruction_test_comptime(IrAnalyze *ira, IrInstSrc
 }
 
 static IrInstGen *ir_analyze_instruction_check_switch_prongs(IrAnalyze *ira,
-        IrInstSrcCheckSwitchProngs *instruction)
+        IrInstSrcCheckSwitchProngs *instruction, bool have_underscore_prong)
 {
     IrInstGen *target_value = instruction->target_value->child;
     ZigType *switch_type = target_value->value->type;
@@ -29767,7 +29801,7 @@ static IrInstGen *ir_analyze_instruction_check_switch_prongs(IrAnalyze *ira,
                 bigint_incr(&field_index);
             }
         }
-        if (instruction->have_underscore_prong) {
+        if (have_underscore_prong) {
             if (!switch_type->data.enumeration.non_exhaustive) {
                 ir_add_error(ira, &instruction->base.base,
                     buf_sprintf("switch on exhaustive enum has `_` prong"));
@@ -30871,6 +30905,24 @@ static IrInstGen *ir_analyze_instruction_ptr_to_int(IrAnalyze *ira, IrInstSrcPtr
     return ir_build_ptr_to_int_gen(ira, &instruction->base.base, target);
 }
 
+static IrInstGen *ir_analyze_instruction_ptr_type_simple(IrAnalyze *ira,
+        IrInstSrcPtrTypeSimple *instruction, bool is_const)
+{
+    IrInstGen *result = ir_const(ira, &instruction->base.base, ira->codegen->builtin_types.entry_type);
+    result->value->special = ConstValSpecialLazy;
+
+    LazyValuePtrTypeSimple *lazy_ptr_type = heap::c_allocator.create<LazyValuePtrTypeSimple>();
+    lazy_ptr_type->ira = ira; ira_ref(ira);
+    result->value->data.x_lazy = &lazy_ptr_type->base;
+    lazy_ptr_type->base.id = is_const ? LazyValueIdPtrTypeSimpleConst : LazyValueIdPtrTypeSimple;
+
+    lazy_ptr_type->elem_type = instruction->child_type->child;
+    if (ir_resolve_type_lazy(ira, lazy_ptr_type->elem_type) == nullptr)
+        return ira->codegen->invalid_inst_gen;
+
+    return result;
+}
+
 static IrInstGen *ir_analyze_instruction_ptr_type(IrAnalyze *ira, IrInstSrcPtrType *instruction) {
     IrInstGen *result = ir_const(ira, &instruction->base.base, ira->codegen->builtin_types.entry_type);
     result->value->special = ConstValSpecialLazy;
@@ -30976,7 +31028,9 @@ static IrInstGen *ir_analyze_instruction_set_align_stack(IrAnalyze *ira, IrInstS
     return ir_const_void(ira, &instruction->base.base);
 }
 
-static IrInstGen *ir_analyze_instruction_arg_type(IrAnalyze *ira, IrInstSrcArgType *instruction) {
+static IrInstGen *ir_analyze_instruction_arg_type(IrAnalyze *ira, IrInstSrcArgType *instruction,
+        bool allow_var)
+{
     IrInstGen *fn_type_inst = instruction->fn_type->child;
     ZigType *fn_type = ir_resolve_type(ira, fn_type_inst);
     if (type_is_invalid(fn_type))
@@ -30998,7 +31052,7 @@ static IrInstGen *ir_analyze_instruction_arg_type(IrAnalyze *ira, IrInstSrcArgTy
 
     FnTypeId *fn_type_id = &fn_type->data.fn.fn_type_id;
     if (arg_index >= fn_type_id->param_count) {
-        if (instruction->allow_var) {
+        if (allow_var) {
             // TODO remove this with var args
             return ir_const_type(ira, &instruction->base.base, ira->codegen->builtin_types.entry_anytype);
         }
@@ -31013,7 +31067,7 @@ static IrInstGen *ir_analyze_instruction_arg_type(IrAnalyze *ira, IrInstSrcArgTy
         // Args are only unresolved if our function is generic.
         ir_assert(fn_type->data.fn.is_generic, &instruction->base.base);
 
-        if (instruction->allow_var) {
+        if (allow_var) {
             return ir_const_type(ira, &instruction->base.base, ira->codegen->builtin_types.entry_anytype);
         } else {
             ir_add_error(ira, &arg_index_inst->base,
@@ -32341,8 +32395,10 @@ static IrInstGen *ir_analyze_instruction_base(IrAnalyze *ira, IrInstSrc *instruc
             return ir_analyze_instruction_fn_proto(ira, (IrInstSrcFnProto *)instruction);
         case IrInstSrcIdTestComptime:
             return ir_analyze_instruction_test_comptime(ira, (IrInstSrcTestComptime *)instruction);
-        case IrInstSrcIdCheckSwitchProngs:
-            return ir_analyze_instruction_check_switch_prongs(ira, (IrInstSrcCheckSwitchProngs *)instruction);
+        case IrInstSrcIdCheckSwitchProngsUnderNo:
+            return ir_analyze_instruction_check_switch_prongs(ira, (IrInstSrcCheckSwitchProngs *)instruction, false);
+        case IrInstSrcIdCheckSwitchProngsUnderYes:
+            return ir_analyze_instruction_check_switch_prongs(ira, (IrInstSrcCheckSwitchProngs *)instruction, true);
         case IrInstSrcIdCheckStatementIsVoid:
             return ir_analyze_instruction_check_statement_is_void(ira, (IrInstSrcCheckStatementIsVoid *)instruction);
         case IrInstSrcIdDeclRef:
@@ -32373,6 +32429,10 @@ static IrInstGen *ir_analyze_instruction_base(IrAnalyze *ira, IrInstSrc *instruc
             return ir_analyze_instruction_set_eval_branch_quota(ira, (IrInstSrcSetEvalBranchQuota *)instruction);
         case IrInstSrcIdPtrType:
             return ir_analyze_instruction_ptr_type(ira, (IrInstSrcPtrType *)instruction);
+        case IrInstSrcIdPtrTypeSimple:
+            return ir_analyze_instruction_ptr_type_simple(ira, (IrInstSrcPtrTypeSimple *)instruction, false);
+        case IrInstSrcIdPtrTypeSimpleConst:
+            return ir_analyze_instruction_ptr_type_simple(ira, (IrInstSrcPtrTypeSimple *)instruction, true);
         case IrInstSrcIdAlignCast:
             return ir_analyze_instruction_align_cast(ira, (IrInstSrcAlignCast *)instruction);
         case IrInstSrcIdImplicitCast:
@@ -32383,8 +32443,10 @@ static IrInstGen *ir_analyze_instruction_base(IrAnalyze *ira, IrInstSrc *instruc
             return ir_analyze_instruction_reset_result(ira, (IrInstSrcResetResult *)instruction);
         case IrInstSrcIdSetAlignStack:
             return ir_analyze_instruction_set_align_stack(ira, (IrInstSrcSetAlignStack *)instruction);
-        case IrInstSrcIdArgType:
-            return ir_analyze_instruction_arg_type(ira, (IrInstSrcArgType *)instruction);
+        case IrInstSrcIdArgTypeAllowVarFalse:
+            return ir_analyze_instruction_arg_type(ira, (IrInstSrcArgType *)instruction, false);
+        case IrInstSrcIdArgTypeAllowVarTrue:
+            return ir_analyze_instruction_arg_type(ira, (IrInstSrcArgType *)instruction, true);
         case IrInstSrcIdExport:
             return ir_analyze_instruction_export(ira, (IrInstSrcExport *)instruction);
         case IrInstSrcIdExtern:
@@ -32737,12 +32799,15 @@ bool ir_inst_src_has_side_effects(IrInstSrc *instruction) {
         case IrInstSrcIdMemcpy:
         case IrInstSrcIdBreakpoint:
         case IrInstSrcIdOverflowOp: // TODO when we support multiple returns this can be side effect free
-        case IrInstSrcIdCheckSwitchProngs:
+        case IrInstSrcIdCheckSwitchProngsUnderNo:
+        case IrInstSrcIdCheckSwitchProngsUnderYes:
         case IrInstSrcIdCheckStatementIsVoid:
         case IrInstSrcIdCheckRuntimeScope:
         case IrInstSrcIdPanic:
         case IrInstSrcIdSetEvalBranchQuota:
         case IrInstSrcIdPtrType:
+        case IrInstSrcIdPtrTypeSimple:
+        case IrInstSrcIdPtrTypeSimpleConst:
         case IrInstSrcIdSetAlignStack:
         case IrInstSrcIdExport:
         case IrInstSrcIdExtern:
@@ -32826,7 +32891,8 @@ bool ir_inst_src_has_side_effects(IrInstSrc *instruction) {
         case IrInstSrcIdAlignCast:
         case IrInstSrcIdImplicitCast:
         case IrInstSrcIdResolveResult:
-        case IrInstSrcIdArgType:
+        case IrInstSrcIdArgTypeAllowVarFalse:
+        case IrInstSrcIdArgTypeAllowVarTrue:
         case IrInstSrcIdErrorReturnTrace:
         case IrInstSrcIdErrorUnion:
         case IrInstSrcIdFloatOp:
@@ -33249,6 +33315,54 @@ static Error ir_resolve_lazy_raw(AstNode *source_node, ZigValue *val) {
             // We can't free the lazy value here, because multiple other ZigValues might be pointing to it.
             return ErrorNone;
         }
+        case LazyValueIdPtrTypeSimple: {
+            LazyValuePtrTypeSimple *lazy_ptr_type = reinterpret_cast<LazyValuePtrTypeSimple *>(val->data.x_lazy);
+            IrAnalyze *ira = lazy_ptr_type->ira;
+
+            ZigType *elem_type = ir_resolve_type(ira, lazy_ptr_type->elem_type);
+            if (type_is_invalid(elem_type))
+                return ErrorSemanticAnalyzeFail;
+
+            if (elem_type->id == ZigTypeIdUnreachable) {
+                ir_add_error(ira, &lazy_ptr_type->elem_type->base,
+                        buf_create_from_str("pointer to noreturn not allowed"));
+                return ErrorSemanticAnalyzeFail;
+            }
+
+            assert(val->type->id == ZigTypeIdMetaType);
+            val->data.x_type = get_pointer_to_type_extra2(ira->codegen, elem_type,
+                    false, false, PtrLenSingle, 0,
+                    0, 0,
+                    false, VECTOR_INDEX_NONE, nullptr, nullptr);
+            val->special = ConstValSpecialStatic;
+
+            // We can't free the lazy value here, because multiple other ZigValues might be pointing to it.
+            return ErrorNone;
+        }
+        case LazyValueIdPtrTypeSimpleConst: {
+            LazyValuePtrTypeSimple *lazy_ptr_type = reinterpret_cast<LazyValuePtrTypeSimple *>(val->data.x_lazy);
+            IrAnalyze *ira = lazy_ptr_type->ira;
+
+            ZigType *elem_type = ir_resolve_type(ira, lazy_ptr_type->elem_type);
+            if (type_is_invalid(elem_type))
+                return ErrorSemanticAnalyzeFail;
+
+            if (elem_type->id == ZigTypeIdUnreachable) {
+                ir_add_error(ira, &lazy_ptr_type->elem_type->base,
+                        buf_create_from_str("pointer to noreturn not allowed"));
+                return ErrorSemanticAnalyzeFail;
+            }
+
+            assert(val->type->id == ZigTypeIdMetaType);
+            val->data.x_type = get_pointer_to_type_extra2(ira->codegen, elem_type,
+                    true, false, PtrLenSingle, 0,
+                    0, 0,
+                    false, VECTOR_INDEX_NONE, nullptr, nullptr);
+            val->special = ConstValSpecialStatic;
+
+            // We can't free the lazy value here, because multiple other ZigValues might be pointing to it.
+            return ErrorNone;
+        }
         case LazyValueIdArrayType: {
             LazyValueArrayType *lazy_array_type = reinterpret_cast<LazyValueArrayType *>(val->data.x_lazy);
             IrAnalyze *ira = lazy_array_type->ira;
diff --git a/src/stage1/ir_print.cpp b/src/stage1/ir_print.cpp
index 98d349012e..22b6c1ed5f 100644
--- a/src/stage1/ir_print.cpp
+++ b/src/stage1/ir_print.cpp
@@ -270,8 +270,10 @@ const char* ir_inst_src_type_str(IrInstSrcId id) {
             return "SrcIntToErr";
         case IrInstSrcIdErrToInt:
             return "SrcErrToInt";
-        case IrInstSrcIdCheckSwitchProngs:
-            return "SrcCheckSwitchProngs";
+        case IrInstSrcIdCheckSwitchProngsUnderNo:
+            return "SrcCheckSwitchProngsUnderNo";
+        case IrInstSrcIdCheckSwitchProngsUnderYes:
+            return "SrcCheckSwitchProngsUnderYes";
         case IrInstSrcIdCheckStatementIsVoid:
             return "SrcCheckStatementIsVoid";
         case IrInstSrcIdTypeName:
@@ -298,6 +300,10 @@ const char* ir_inst_src_type_str(IrInstSrcId id) {
             return "SrcSetEvalBranchQuota";
         case IrInstSrcIdPtrType:
             return "SrcPtrType";
+        case IrInstSrcIdPtrTypeSimple:
+            return "SrcPtrTypeSimple";
+        case IrInstSrcIdPtrTypeSimpleConst:
+            return "SrcPtrTypeSimpleConst";
         case IrInstSrcIdAlignCast:
             return "SrcAlignCast";
         case IrInstSrcIdImplicitCast:
@@ -308,8 +314,10 @@ const char* ir_inst_src_type_str(IrInstSrcId id) {
             return "SrcResetResult";
         case IrInstSrcIdSetAlignStack:
             return "SrcSetAlignStack";
-        case IrInstSrcIdArgType:
-            return "SrcArgType";
+        case IrInstSrcIdArgTypeAllowVarFalse:
+            return "SrcArgTypeAllowVarFalse";
+        case IrInstSrcIdArgTypeAllowVarTrue:
+            return "SrcArgTypeAllowVarTrue";
         case IrInstSrcIdExport:
             return "SrcExport";
         case IrInstSrcIdExtern:
@@ -2187,7 +2195,9 @@ static void ir_print_err_to_int(IrPrintGen *irp, IrInstGenErrToInt *instruction)
     ir_print_other_inst_gen(irp, instruction->target);
 }
 
-static void ir_print_check_switch_prongs(IrPrintSrc *irp, IrInstSrcCheckSwitchProngs *instruction) {
+static void ir_print_check_switch_prongs(IrPrintSrc *irp, IrInstSrcCheckSwitchProngs *instruction,
+        bool have_underscore_prong)
+{
     fprintf(irp->f, "@checkSwitchProngs(");
     ir_print_other_inst_src(irp, instruction->target_value);
     fprintf(irp->f, ",");
@@ -2200,6 +2210,8 @@ static void ir_print_check_switch_prongs(IrPrintSrc *irp, IrInstSrcCheckSwitchPr
     }
     const char *have_else_str = instruction->else_prong != nullptr ? "yes" : "no";
     fprintf(irp->f, ")else:%s", have_else_str);
+    const char *have_under_str = have_underscore_prong ? "yes" : "no";
+    fprintf(irp->f, " _:%s", have_under_str);
 }
 
 static void ir_print_check_statement_is_void(IrPrintSrc *irp, IrInstSrcCheckStatementIsVoid *instruction) {
@@ -2237,6 +2249,15 @@ static void ir_print_ptr_type(IrPrintSrc *irp, IrInstSrcPtrType *instruction) {
     ir_print_other_inst_src(irp, instruction->child_type);
 }
 
+static void ir_print_ptr_type_simple(IrPrintSrc *irp, IrInstSrcPtrTypeSimple *instruction,
+        bool is_const)
+{
+    fprintf(irp->f, "&");
+    const char *const_str = is_const ? "const " : "";
+    fprintf(irp->f, "*%s", const_str);
+    ir_print_other_inst_src(irp, instruction->child_type);
+}
+
 static void ir_print_decl_ref(IrPrintSrc *irp, IrInstSrcDeclRef *instruction) {
     const char *ptr_str = (instruction->lval != LValNone) ? "ptr " : "";
     fprintf(irp->f, "declref %s%s", ptr_str, buf_ptr(instruction->tld->name));
@@ -2344,11 +2365,17 @@ static void ir_print_set_align_stack(IrPrintSrc *irp, IrInstSrcSetAlignStack *in
     fprintf(irp->f, ")");
 }
 
-static void ir_print_arg_type(IrPrintSrc *irp, IrInstSrcArgType *instruction) {
+static void ir_print_arg_type(IrPrintSrc *irp, IrInstSrcArgType *instruction, bool allow_var) {
     fprintf(irp->f, "@ArgType(");
     ir_print_other_inst_src(irp, instruction->fn_type);
     fprintf(irp->f, ",");
     ir_print_other_inst_src(irp, instruction->arg_index);
+    fprintf(irp->f, ",");
+    if (allow_var) {
+        fprintf(irp->f, "allow_var=true");
+    } else {
+        fprintf(irp->f, "allow_var=false");
+    }
     fprintf(irp->f, ")");
 }
 
@@ -2885,8 +2912,11 @@ static void ir_print_inst_src(IrPrintSrc *irp, IrInstSrc *instruction, bool trai
         case IrInstSrcIdErrToInt:
             ir_print_err_to_int(irp, (IrInstSrcErrToInt *)instruction);
             break;
-        case IrInstSrcIdCheckSwitchProngs:
-            ir_print_check_switch_prongs(irp, (IrInstSrcCheckSwitchProngs *)instruction);
+        case IrInstSrcIdCheckSwitchProngsUnderNo:
+            ir_print_check_switch_prongs(irp, (IrInstSrcCheckSwitchProngs *)instruction, false);
+            break;
+        case IrInstSrcIdCheckSwitchProngsUnderYes:
+            ir_print_check_switch_prongs(irp, (IrInstSrcCheckSwitchProngs *)instruction, true);
             break;
         case IrInstSrcIdCheckStatementIsVoid:
             ir_print_check_statement_is_void(irp, (IrInstSrcCheckStatementIsVoid *)instruction);
@@ -2900,6 +2930,12 @@ static void ir_print_inst_src(IrPrintSrc *irp, IrInstSrc *instruction, bool trai
         case IrInstSrcIdPtrType:
             ir_print_ptr_type(irp, (IrInstSrcPtrType *)instruction);
             break;
+        case IrInstSrcIdPtrTypeSimple:
+            ir_print_ptr_type_simple(irp, (IrInstSrcPtrTypeSimple *)instruction, false);
+            break;
+        case IrInstSrcIdPtrTypeSimpleConst:
+            ir_print_ptr_type_simple(irp, (IrInstSrcPtrTypeSimple *)instruction, true);
+            break;
         case IrInstSrcIdDeclRef:
             ir_print_decl_ref(irp, (IrInstSrcDeclRef *)instruction);
             break;
@@ -2942,8 +2978,11 @@ static void ir_print_inst_src(IrPrintSrc *irp, IrInstSrc *instruction, bool trai
         case IrInstSrcIdSetAlignStack:
             ir_print_set_align_stack(irp, (IrInstSrcSetAlignStack *)instruction);
             break;
-        case IrInstSrcIdArgType:
-            ir_print_arg_type(irp, (IrInstSrcArgType *)instruction);
+        case IrInstSrcIdArgTypeAllowVarFalse:
+            ir_print_arg_type(irp, (IrInstSrcArgType *)instruction, false);
+            break;
+        case IrInstSrcIdArgTypeAllowVarTrue:
+            ir_print_arg_type(irp, (IrInstSrcArgType *)instruction, true);
             break;
         case IrInstSrcIdExport:
             ir_print_export(irp, (IrInstSrcExport *)instruction);
diff --git a/src/translate_c.zig b/src/translate_c.zig
index c5ff56f32a..8aabc30754 100644
--- a/src/translate_c.zig
+++ b/src/translate_c.zig
@@ -11,6 +11,7 @@ const math = std.math;
 const ast = @import("translate_c/ast.zig");
 const Node = ast.Node;
 const Tag = Node.Tag;
+const c_builtins = std.c.builtins;
 
 const CallingConvention = std.builtin.CallingConvention;
 
@@ -635,7 +636,7 @@ fn visitVarDecl(c: *Context, var_decl: *const clang.VarDecl, mangled_name: ?[]co
     if (has_init) trans_init: {
         if (decl_init) |expr| {
             const node_or_error = if (expr.getStmtClass() == .StringLiteralClass)
-                transStringLiteralAsArray(c, scope, @ptrCast(*const clang.StringLiteral, expr), zigArraySize(c, type_node) catch 0)
+                transStringLiteralInitializer(c, scope, @ptrCast(*const clang.StringLiteral, expr), type_node)
             else
                 transExprCoercing(c, scope, expr, .used);
             init_node = node_or_error catch |err| switch (err) {
@@ -1058,6 +1059,10 @@ fn transStmt(
             const compound_literal = @ptrCast(*const clang.CompoundLiteralExpr, stmt);
             return transExpr(c, scope, compound_literal.getInitializer(), result_used);
         },
+        .GenericSelectionExprClass => {
+            const gen_sel = @ptrCast(*const clang.GenericSelectionExpr, stmt);
+            return transExpr(c, scope, gen_sel.getResultExpr(), result_used);
+        },
         else => {
             return fail(c, error.UnsupportedTranslation, stmt.getBeginLoc(), "TODO implement translation of stmt class {s}", .{@tagName(sc)});
         },
@@ -1407,7 +1412,7 @@ fn transDeclStmtOne(
 
             var init_node = if (decl_init) |expr|
                 if (expr.getStmtClass() == .StringLiteralClass)
-                    try transStringLiteralAsArray(c, scope, @ptrCast(*const clang.StringLiteral, expr), try zigArraySize(c, type_node))
+                    try transStringLiteralInitializer(c, scope, @ptrCast(*const clang.StringLiteral, expr), type_node)
                 else
                     try transExprCoercing(c, scope, expr, .used)
             else
@@ -1522,7 +1527,7 @@ fn transImplicitCastExpr(
             return maybeSuppressResult(c, scope, result_used, ne);
         },
         .BuiltinFnToFnPtr => {
-            return transExpr(c, scope, sub_expr, result_used);
+            return transBuiltinFnExpr(c, scope, sub_expr, result_used);
         },
         .ToVoid => {
             // Should only appear in the rhs and lhs of a ConditionalOperator
@@ -1538,6 +1543,22 @@ fn transImplicitCastExpr(
     }
 }
 
+fn isBuiltinDefined(name: []const u8) bool {
+    inline for (std.meta.declarations(c_builtins)) |decl| {
+        if (std.mem.eql(u8, name, decl.name)) return true;
+    }
+    return false;
+}
+
+fn transBuiltinFnExpr(c: *Context, scope: *Scope, expr: *const clang.Expr, used: ResultUsed) TransError!Node {
+    const node = try transExpr(c, scope, expr, used);
+    if (node.castTag(.identifier)) |ident| {
+        const name = ident.data;
+        if (!isBuiltinDefined(name)) return fail(c, error.UnsupportedTranslation, expr.getBeginLoc(), "TODO implement function '{s}' in std.c.builtins", .{name});
+    }
+    return node;
+}
+
 fn transBoolExpr(
     c: *Context,
     scope: *Scope,
@@ -1582,6 +1603,10 @@ fn exprIsNarrowStringLiteral(expr: *const clang.Expr) bool {
             const op_expr = @ptrCast(*const clang.ParenExpr, expr).getSubExpr();
             return exprIsNarrowStringLiteral(op_expr);
         },
+        .GenericSelectionExprClass => {
+            const gen_sel = @ptrCast(*const clang.GenericSelectionExpr, expr);
+            return exprIsNarrowStringLiteral(gen_sel.getResultExpr());
+        },
         else => return false,
     }
 }
@@ -1733,6 +1758,20 @@ fn transReturnStmt(
     return Tag.@"return".create(c.arena, rhs);
 }
 
+fn transNarrowStringLiteral(
+    c: *Context,
+    scope: *Scope,
+    stmt: *const clang.StringLiteral,
+    result_used: ResultUsed,
+) TransError!Node {
+    var len: usize = undefined;
+    const bytes_ptr = stmt.getString_bytes_begin_size(&len);
+
+    const str = try std.fmt.allocPrint(c.arena, "\"{}\"", .{std.zig.fmtEscapes(bytes_ptr[0..len])});
+    const node = try Tag.string_literal.create(c.arena, str);
+    return maybeSuppressResult(c, scope, result_used, node);
+}
+
 fn transStringLiteral(
     c: *Context,
     scope: *Scope,
@@ -1741,19 +1780,14 @@ fn transStringLiteral(
 ) TransError!Node {
     const kind = stmt.getKind();
     switch (kind) {
-        .Ascii, .UTF8 => {
-            var len: usize = undefined;
-            const bytes_ptr = stmt.getString_bytes_begin_size(&len);
-
-            const str = try std.fmt.allocPrint(c.arena, "\"{}\"", .{std.zig.fmtEscapes(bytes_ptr[0..len])});
-            const node = try Tag.string_literal.create(c.arena, str);
-            return maybeSuppressResult(c, scope, result_used, node);
-        },
+        .Ascii, .UTF8 => return transNarrowStringLiteral(c, scope, stmt, result_used),
         .UTF16, .UTF32, .Wide => {
             const str_type = @tagName(stmt.getKind());
             const name = try std.fmt.allocPrint(c.arena, "zig.{s}_string_{d}", .{ str_type, c.getMangle() });
-            const lit_array = try transStringLiteralAsArray(c, scope, stmt, stmt.getLength() + 1);
 
+            const expr_base = @ptrCast(*const clang.Expr, stmt);
+            const array_type = try transQualTypeInitialized(c, scope, expr_base.getType(), expr_base, expr_base.getBeginLoc());
+            const lit_array = try transStringLiteralInitializer(c, scope, stmt, array_type);
             const decl = try Tag.var_simple.create(c.arena, .{ .name = name, .init = lit_array });
             try scope.appendNode(decl);
             const node = try Tag.identifier.create(c.arena, name);
@@ -1762,52 +1796,67 @@ fn transStringLiteral(
     }
 }
 
-/// Parse the size of an array back out from an ast Node.
-fn zigArraySize(c: *Context, node: Node) TransError!usize {
-    if (node.castTag(.array_type)) |array| {
-        return array.data.len;
-    }
-    return error.UnsupportedTranslation;
+fn getArrayPayload(array_type: Node) ast.Payload.Array.ArrayTypeInfo {
+    return (array_type.castTag(.array_type) orelse array_type.castTag(.null_sentinel_array_type).?).data;
 }
 
-/// Translate a string literal to an array of integers. Used when an
-/// array is initialized from a string literal. `array_size` is the
-/// size of the array being initialized. If the string literal is larger
-/// than the array, truncate the string. If the array is larger than the
-/// string literal, pad the array with 0's
-fn transStringLiteralAsArray(
+/// Translate a string literal that is initializing an array. In general narrow string
+/// literals become `"<string>".*` or `"<string>"[0..<size>].*` if they need truncation.
+/// Wide string literals become an array of integers. zero-fillers pad out the array to
+/// the appropriate length, if necessary.
+fn transStringLiteralInitializer(
     c: *Context,
     scope: *Scope,
     stmt: *const clang.StringLiteral,
-    array_size: usize,
+    array_type: Node,
 ) TransError!Node {
-    if (array_size == 0) return error.UnsupportedType;
+    assert(array_type.tag() == .array_type or array_type.tag() == .null_sentinel_array_type);
+
+    const is_narrow = stmt.getKind() == .Ascii or stmt.getKind() == .UTF8;
 
     const str_length = stmt.getLength();
+    const payload = getArrayPayload(array_type);
+    const array_size = payload.len;
+    const elem_type = payload.elem_type;
 
-    const expr_base = @ptrCast(*const clang.Expr, stmt);
-    const ty = expr_base.getType().getTypePtr();
-    const const_arr_ty = @ptrCast(*const clang.ConstantArrayType, ty);
+    if (array_size == 0) return Tag.empty_array.create(c.arena, elem_type);
 
-    const elem_type = try transQualType(c, scope, const_arr_ty.getElementType(), expr_base.getBeginLoc());
-    const arr_type = try Tag.array_type.create(c.arena, .{ .len = array_size, .elem_type = elem_type });
-    const init_list = try c.arena.alloc(Node, array_size);
+    const num_inits = math.min(str_length, array_size);
+    const init_node = if (num_inits > 0) blk: {
+        if (is_narrow) {
+            // "string literal".* or string literal"[0..num_inits].*
+            var str = try transNarrowStringLiteral(c, scope, stmt, .used);
+            if (str_length != array_size) str = try Tag.string_slice.create(c.arena, .{ .string = str, .end = num_inits });
+            break :blk try Tag.deref.create(c.arena, str);
+        } else {
+            const init_list = try c.arena.alloc(Node, num_inits);
+            var i: c_uint = 0;
+            while (i < num_inits) : (i += 1) {
+                init_list[i] = try transCreateCharLitNode(c, false, stmt.getCodeUnit(i));
+            }
+            const init_args = .{ .len = num_inits, .elem_type = elem_type };
+            const init_array_type = try if (array_type.tag() == .array_type) Tag.array_type.create(c.arena, init_args) else Tag.null_sentinel_array_type.create(c.arena, init_args);
+            break :blk try Tag.array_init.create(c.arena, .{
+                .cond = init_array_type,
+                .cases = init_list,
+            });
+        }
+    } else null;
 
-    var i: c_uint = 0;
-    const kind = stmt.getKind();
-    const narrow = kind == .Ascii or kind == .UTF8;
-    while (i < str_length and i < array_size) : (i += 1) {
-        const code_unit = stmt.getCodeUnit(i);
-        init_list[i] = try transCreateCharLitNode(c, narrow, code_unit);
-    }
-    while (i < array_size) : (i += 1) {
-        init_list[i] = try transCreateNodeNumber(c, 0, .int);
-    }
+    if (num_inits == array_size) return init_node.?; // init_node is only null if num_inits == 0; but if num_inits == array_size == 0 we've already returned
+    assert(array_size > str_length); // If array_size <= str_length, `num_inits == array_size` and we've already returned.
 
-    return Tag.array_init.create(c.arena, .{
-        .cond = arr_type,
-        .cases = init_list,
+    const filler_node = try Tag.array_filler.create(c.arena, .{
+        .type = elem_type,
+        .filler = Tag.zero_literal.init(),
+        .count = array_size - str_length,
     });
+
+    if (init_node) |some| {
+        return Tag.array_cat.create(c.arena, .{ .lhs = some, .rhs = filler_node });
+    } else {
+        return filler_node;
+    }
 }
 
 /// determine whether `stmt` is a "pointer subtraction expression" - a subtraction where
@@ -1836,6 +1885,7 @@ fn cIntTypeForEnum(enum_qt: clang.QualType) clang.QualType {
     return enum_decl.getIntegerType();
 }
 
+// when modifying this function, make sure to also update std.meta.cast
 fn transCCast(
     c: *Context,
     scope: *Scope,
@@ -2725,6 +2775,10 @@ fn cIsFunctionDeclRef(expr: *const clang.Expr) bool {
             const opcode = un_op.getOpcode();
             return (opcode == .AddrOf or opcode == .Deref) and cIsFunctionDeclRef(un_op.getSubExpr());
         },
+        .GenericSelectionExprClass => {
+            const gen_sel = @ptrCast(*const clang.GenericSelectionExpr, expr);
+            return cIsFunctionDeclRef(gen_sel.getResultExpr());
+        },
         else => return false,
     }
 }
@@ -3194,11 +3248,11 @@ fn transFloatingLiteral(c: *Context, scope: *Scope, stmt: *const clang.FloatingL
     var dbl = stmt.getValueAsApproximateDouble();
     const is_negative = dbl < 0;
     if (is_negative) dbl = -dbl;
-    const str = try std.fmt.allocPrint(c.arena, "{d}", .{dbl});
-    var node = if (dbl == std.math.floor(dbl))
-        try Tag.integer_literal.create(c.arena, str)
+    const str = if (dbl == std.math.floor(dbl))
+        try std.fmt.allocPrint(c.arena, "{d}.0", .{dbl})
     else
-        try Tag.float_literal.create(c.arena, str);
+        try std.fmt.allocPrint(c.arena, "{d}", .{dbl});
+    var node = try Tag.float_literal.create(c.arena, str);
     if (is_negative) node = try Tag.negate.create(c.arena, node);
     return maybeSuppressResult(c, scope, used, node);
 }
@@ -3312,9 +3366,8 @@ fn addTopLevelDecl(c: *Context, name: []const u8, decl_node: Node) !void {
     try c.global_scope.nodes.append(decl_node);
 }
 
-/// Translate a qual type for a variable with an initializer. The initializer
-/// only matters for incomplete arrays, since the size of the array is determined
-/// by the size of the initializer
+/// Translate a qualtype for a variable with an initializer. This only matters
+/// for incomplete arrays, since the initializer determines the size of the array.
 fn transQualTypeInitialized(
     c: *Context,
     scope: *Scope,
@@ -3330,9 +3383,14 @@ fn transQualTypeInitialized(
         switch (decl_init.getStmtClass()) {
             .StringLiteralClass => {
                 const string_lit = @ptrCast(*const clang.StringLiteral, decl_init);
-                const string_lit_size = string_lit.getLength() + 1; // +1 for null terminator
+                const string_lit_size = string_lit.getLength();
                 const array_size = @intCast(usize, string_lit_size);
-                return Tag.array_type.create(c.arena, .{ .len = array_size, .elem_type = elem_ty });
+
+                // incomplete array initialized with empty string, will be translated as [1]T{0}
+                // see https://github.com/ziglang/zig/issues/8256
+                if (array_size == 0) return Tag.array_type.create(c.arena, .{ .len = 1, .elem_type = elem_ty });
+
+                return Tag.null_sentinel_array_type.create(c.arena, .{ .len = array_size, .elem_type = elem_ty });
             },
             .InitListExprClass => {
                 const init_expr = @ptrCast(*const clang.InitListExpr, decl_init);
@@ -4746,6 +4804,10 @@ fn parseCPrimaryExprInner(c: *Context, m: *MacroCtx, scope: *Scope) ParseError!N
         },
         .Identifier => {
             const mangled_name = scope.getAlias(slice);
+            if (mem.startsWith(u8, mangled_name, "__builtin_") and !isBuiltinDefined(mangled_name)) {
+                try m.fail(c, "TODO implement function '{s}' in std.c.builtins", .{mangled_name});
+                return error.ParseError;
+            }
             return Tag.identifier.create(c.arena, builtin_typedef_map.get(mangled_name) orelse mangled_name);
         },
         .LParen => {
diff --git a/src/translate_c/ast.zig b/src/translate_c/ast.zig
index e5f76cc1de..4b595a7940 100644
--- a/src/translate_c/ast.zig
+++ b/src/translate_c/ast.zig
@@ -40,6 +40,8 @@ pub const Node = extern union {
         string_literal,
         char_literal,
         enum_literal,
+        /// "string"[0..end]
+        string_slice,
         identifier,
         @"if",
         /// if (!operand) break;
@@ -176,6 +178,7 @@ pub const Node = extern union {
         c_pointer,
         single_pointer,
         array_type,
+        null_sentinel_array_type,
 
         /// @import("std").meta.sizeof(operand)
         std_meta_sizeof,
@@ -334,7 +337,7 @@ pub const Node = extern union {
                 .std_meta_promoteIntLiteral => Payload.PromoteIntLiteral,
                 .block => Payload.Block,
                 .c_pointer, .single_pointer => Payload.Pointer,
-                .array_type => Payload.Array,
+                .array_type, .null_sentinel_array_type => Payload.Array,
                 .arg_redecl, .alias, .fail_decl => Payload.ArgRedecl,
                 .log2_int_type => Payload.Log2IntType,
                 .var_simple, .pub_var_simple => Payload.SimpleVarDecl,
@@ -342,6 +345,7 @@ pub const Node = extern union {
                 .array_filler => Payload.ArrayFiller,
                 .pub_inline_fn => Payload.PubInlineFn,
                 .field_access => Payload.FieldAccess,
+                .string_slice => Payload.StringSlice,
             };
         }
 
@@ -584,10 +588,12 @@ pub const Payload = struct {
 
     pub const Array = struct {
         base: Payload,
-        data: struct {
+        data: ArrayTypeInfo,
+
+        pub const ArrayTypeInfo = struct {
             elem_type: Node,
             len: usize,
-        },
+        };
     };
 
     pub const Pointer = struct {
@@ -664,6 +670,14 @@ pub const Payload = struct {
             radix: Node,
         },
     };
+
+    pub const StringSlice = struct {
+        base: Payload,
+        data: struct {
+            string: Node,
+            end: usize,
+        },
+    };
 };
 
 /// Converts the nodes into a Zig ast.
@@ -1015,6 +1029,36 @@ fn renderNode(c: *Context, node: Node) Allocator.Error!NodeIndex {
                 .data = undefined,
             });
         },
+        .string_slice => {
+            const payload = node.castTag(.string_slice).?.data;
+
+            const string = try renderNode(c, payload.string);
+            const l_bracket = try c.addToken(.l_bracket, "[");
+            const start = try c.addNode(.{
+                .tag = .integer_literal,
+                .main_token = try c.addToken(.integer_literal, "0"),
+                .data = undefined,
+            });
+            _ = try c.addToken(.ellipsis2, "..");
+            const end = try c.addNode(.{
+                .tag = .integer_literal,
+                .main_token = try c.addTokenFmt(.integer_literal, "{d}", .{payload.end}),
+                .data = undefined,
+            });
+            _ = try c.addToken(.r_bracket, "]");
+
+            return c.addNode(.{
+                .tag = .slice,
+                .main_token = l_bracket,
+                .data = .{
+                    .lhs = string,
+                    .rhs = try c.addExtra(std.zig.ast.Node.Slice{
+                        .start = start,
+                        .end = end,
+                    }),
+                },
+            });
+        },
         .fail_decl => {
             const payload = node.castTag(.fail_decl).?.data;
             // pub const name = @compileError(msg);
@@ -1581,6 +1625,10 @@ fn renderNode(c: *Context, node: Node) Allocator.Error!NodeIndex {
             const payload = node.castTag(.array_type).?.data;
             return renderArrayType(c, payload.len, payload.elem_type);
         },
+        .null_sentinel_array_type => {
+            const payload = node.castTag(.null_sentinel_array_type).?.data;
+            return renderNullSentinelArrayType(c, payload.len, payload.elem_type);
+        },
         .array_filler => {
             const payload = node.castTag(.array_filler).?.data;
 
@@ -1946,6 +1994,36 @@ fn renderArrayType(c: *Context, len: usize, elem_type: Node) !NodeIndex {
     });
 }
 
+fn renderNullSentinelArrayType(c: *Context, len: usize, elem_type: Node) !NodeIndex {
+    const l_bracket = try c.addToken(.l_bracket, "[");
+    const len_expr = try c.addNode(.{
+        .tag = .integer_literal,
+        .main_token = try c.addTokenFmt(.integer_literal, "{d}", .{len}),
+        .data = undefined,
+    });
+    _ = try c.addToken(.colon, ":");
+
+    const sentinel_expr = try c.addNode(.{
+        .tag = .integer_literal,
+        .main_token = try c.addToken(.integer_literal, "0"),
+        .data = undefined,
+    });
+
+    _ = try c.addToken(.r_bracket, "]");
+    const elem_type_expr = try renderNode(c, elem_type);
+    return c.addNode(.{
+        .tag = .array_type_sentinel,
+        .main_token = l_bracket,
+        .data = .{
+            .lhs = len_expr,
+            .rhs = try c.addExtra(std.zig.ast.Node.ArrayTypeSentinel {
+                .sentinel = sentinel_expr,
+                .elem_type = elem_type_expr,
+            }),
+        },
+    });
+}
+
 fn addSemicolonIfNeeded(c: *Context, node: Node) !void {
     switch (node.tag()) {
         .warning => unreachable,
@@ -2014,6 +2092,7 @@ fn renderNodeGrouped(c: *Context, node: Node) !NodeIndex {
         .integer_literal,
         .float_literal,
         .string_literal,
+        .string_slice,
         .char_literal,
         .enum_literal,
         .identifier,
@@ -2035,6 +2114,7 @@ fn renderNodeGrouped(c: *Context, node: Node) !NodeIndex {
         .func,
         .call,
         .array_type,
+        .null_sentinel_array_type,
         .bool_to_int,
         .div_exact,
         .byte_offset_of,
diff --git a/src/zig_clang.cpp b/src/zig_clang.cpp
index b789df0764..95e9e390a1 100644
--- a/src/zig_clang.cpp
+++ b/src/zig_clang.cpp
@@ -2445,6 +2445,11 @@ struct ZigClangQualType ZigClangFunctionType_getReturnType(const struct ZigClang
     return bitcast(casted->getReturnType());
 }
 
+const struct ZigClangExpr *ZigClangGenericSelectionExpr_getResultExpr(const struct ZigClangGenericSelectionExpr *self) {
+    auto casted = reinterpret_cast<const clang::GenericSelectionExpr *>(self);
+    return reinterpret_cast<const struct ZigClangExpr *>(casted->getResultExpr());
+}
+
 bool ZigClangFunctionProtoType_isVariadic(const struct ZigClangFunctionProtoType *self) {
     auto casted = reinterpret_cast<const clang::FunctionProtoType *>(self);
     return casted->isVariadic();
diff --git a/src/zig_clang.h b/src/zig_clang.h
index 34e2d5afb7..59eacf7587 100644
--- a/src/zig_clang.h
+++ b/src/zig_clang.h
@@ -1116,6 +1116,8 @@ ZIG_EXTERN_C bool ZigClangFunctionType_getNoReturnAttr(const struct ZigClangFunc
 ZIG_EXTERN_C enum ZigClangCallingConv ZigClangFunctionType_getCallConv(const struct ZigClangFunctionType *self);
 ZIG_EXTERN_C struct ZigClangQualType ZigClangFunctionType_getReturnType(const struct ZigClangFunctionType *self);
 
+ZIG_EXTERN_C const struct ZigClangExpr *ZigClangGenericSelectionExpr_getResultExpr(const struct ZigClangGenericSelectionExpr *self);
+
 ZIG_EXTERN_C bool ZigClangFunctionProtoType_isVariadic(const struct ZigClangFunctionProtoType *self);
 ZIG_EXTERN_C unsigned ZigClangFunctionProtoType_getNumParams(const struct ZigClangFunctionProtoType *self);
 ZIG_EXTERN_C struct ZigClangQualType ZigClangFunctionProtoType_getParamType(const struct ZigClangFunctionProtoType *self, unsigned i);
diff --git a/test/cli.zig b/test/cli.zig
index c0702fa54c..dedea67a59 100644
--- a/test/cli.zig
+++ b/test/cli.zig
@@ -28,6 +28,8 @@ pub fn main() !void {
     const zig_exe = try fs.path.resolve(a, &[_][]const u8{zig_exe_rel});
 
     const dir_path = try fs.path.join(a, &[_][]const u8{ cache_root, "clitest" });
+    defer fs.cwd().deleteTree(dir_path) catch {};
+    
     const TestFn = fn ([]const u8, []const u8) anyerror!void;
     const test_fns = [_]TestFn{
         testZigInitLib,
@@ -174,4 +176,13 @@ fn testZigFmt(zig_exe: []const u8, dir_path: []const u8) !void {
     const run_result3 = try exec(dir_path, true, &[_][]const u8{ zig_exe, "fmt", dir_path });
     // both files have been formatted, nothing should change now
     testing.expect(run_result3.stdout.len == 0);
+
+    // Check UTF-16 decoding
+    const fmt4_zig_path = try fs.path.join(a, &[_][]const u8{ dir_path, "fmt4.zig" });
+    var unformatted_code_utf16 = "\xff\xfe \x00 \x00 \x00 \x00/\x00/\x00 \x00n\x00o\x00 \x00r\x00e\x00a\x00s\x00o\x00n\x00";
+    try fs.cwd().writeFile(fmt4_zig_path, unformatted_code_utf16);
+
+    const run_result4 = try exec(dir_path, true, &[_][]const u8{ zig_exe, "fmt", dir_path });
+    testing.expect(std.mem.startsWith(u8, run_result4.stdout, fmt4_zig_path));
+    testing.expect(run_result4.stdout.len == fmt4_zig_path.len + 1 and run_result4.stdout[run_result4.stdout.len - 1] == '\n');
 }
diff --git a/test/run_translated_c.zig b/test/run_translated_c.zig
index 01df88c852..44c3956a12 100644
--- a/test/run_translated_c.zig
+++ b/test/run_translated_c.zig
@@ -3,6 +3,17 @@ const tests = @import("tests.zig");
 const nl = std.cstr.line_sep;
 
 pub fn addCases(cases: *tests.RunTranslatedCContext) void {
+    cases.add("division of floating literals",
+        \\#define _NO_CRT_STDIO_INLINE 1
+        \\#include <stdio.h>
+        \\#define PI 3.14159265358979323846f
+        \\#define DEG2RAD (PI/180.0f)
+        \\int main(void) {
+        \\    printf("DEG2RAD is: %f\n", DEG2RAD);
+        \\    return 0;
+        \\}
+    , "DEG2RAD is: 0.017453" ++ nl);
+
     cases.add("use global scope for record/enum/typedef type transalation if needed",
         \\void bar(void);
         \\void baz(void);
@@ -1187,4 +1198,50 @@ pub fn addCases(cases: *tests.RunTranslatedCContext) void {
         \\    return 0;
         \\}
     , "");
+
+    cases.add("Generic selections",
+        \\#include <stdlib.h>
+        \\#include <string.h>
+        \\#include <stdint.h>
+        \\#define my_generic_fn(X) _Generic((X),    \
+        \\              int: abs,                   \
+        \\              char *: strlen,             \
+        \\              size_t: malloc,             \
+        \\              default: free               \
+        \\)(X)
+        \\#define my_generic_val(X) _Generic((X),   \
+        \\              int: 1,                     \
+        \\              const char *: "bar"         \
+        \\)
+        \\int main(void) {
+        \\    if (my_generic_val(100) != 1) abort();
+        \\
+        \\    const char *foo = "foo";
+        \\    const char *bar = my_generic_val(foo);
+        \\    if (strcmp(bar, "bar") != 0) abort();
+        \\
+        \\    if (my_generic_fn(-42) != 42) abort();
+        \\    if (my_generic_fn("hello") != 5) abort();
+        \\
+        \\    size_t size = 8192;
+        \\    uint8_t *mem = my_generic_fn(size);
+        \\    memset(mem, 42, size);
+        \\    if (mem[size - 1] != 42) abort();
+        \\    my_generic_fn(mem);
+        \\
+        \\    return 0;
+        \\}
+    , "");
+
+    // See __builtin_alloca_with_align comment in std.c.builtins
+    cases.add("use of unimplemented builtin in unused function does not prevent compilation",
+        \\#include <stdlib.h>
+        \\void unused() {
+        \\    __builtin_alloca_with_align(1, 8);
+        \\}
+        \\int main(void) {
+        \\    if (__builtin_sqrt(1.0) != 1.0) abort();
+        \\    return 0;
+        \\}
+    , "");
 }
diff --git a/test/stage2/cbe.zig b/test/stage2/cbe.zig
index e9082f57fa..8a8a8ca224 100644
--- a/test/stage2/cbe.zig
+++ b/test/stage2/cbe.zig
@@ -51,7 +51,7 @@ pub fn addCases(ctx: *TestContext) !void {
             \\    _ = printf("Hello, %s!\n", "world");
             \\    return 0;
             \\}
-        , "Hello, world!\n");
+        , "Hello, world!" ++ std.cstr.line_sep);
     }
 
     {
diff --git a/test/standalone.zig b/test/standalone.zig
index 3ad0659f09..d8c08a6b9c 100644
--- a/test/standalone.zig
+++ b/test/standalone.zig
@@ -9,7 +9,10 @@ pub fn addCases(cases: *tests.StandaloneContext) void {
     cases.add("test/standalone/main_return_error/error_u8.zig");
     cases.add("test/standalone/main_return_error/error_u8_non_zero.zig");
     cases.addBuildFile("test/standalone/main_pkg_path/build.zig");
-    cases.addBuildFile("test/standalone/shared_library/build.zig");
+    if (std.Target.current.os.tag != .macos) {
+        // TODO zld cannot link shared libraries yet.
+        cases.addBuildFile("test/standalone/shared_library/build.zig");
+    }
     cases.addBuildFile("test/standalone/mix_o_files/build.zig");
     cases.addBuildFile("test/standalone/global_linkage/build.zig");
     cases.addBuildFile("test/standalone/static_c_lib/build.zig");
diff --git a/test/translate_c.zig b/test/translate_c.zig
index 47d7c5d9eb..2d5d838033 100644
--- a/test/translate_c.zig
+++ b/test/translate_c.zig
@@ -745,14 +745,7 @@ pub fn addCases(cases: *tests.TranslateCContext) void {
         \\    static const char v2[] = "2.2.2";
         \\}
     , &[_][]const u8{
-        \\const v2: [6]u8 = [6]u8{
-        \\    '2',
-        \\    '.',
-        \\    '2',
-        \\    '.',
-        \\    '2',
-        \\    0,
-        \\};
+        \\const v2: [5:0]u8 = "2.2.2".*;
         \\pub export fn foo() void {}
     });
 
@@ -1600,30 +1593,9 @@ pub fn addCases(cases: *tests.TranslateCContext) void {
         \\static char arr1[] = "hello";
         \\char arr2[] = "hello";
     , &[_][]const u8{
-        \\pub export var arr0: [6]u8 = [6]u8{
-        \\    'h',
-        \\    'e',
-        \\    'l',
-        \\    'l',
-        \\    'o',
-        \\    0,
-        \\};
-        \\pub var arr1: [6]u8 = [6]u8{
-        \\    'h',
-        \\    'e',
-        \\    'l',
-        \\    'l',
-        \\    'o',
-        \\    0,
-        \\};
-        \\pub export var arr2: [6]u8 = [6]u8{
-        \\    'h',
-        \\    'e',
-        \\    'l',
-        \\    'l',
-        \\    'o',
-        \\    0,
-        \\};
+        \\pub export var arr0: [5:0]u8 = "hello".*;
+        \\pub var arr1: [5:0]u8 = "hello".*;
+        \\pub export var arr2: [5:0]u8 = "hello".*;
     });
 
     cases.add("array initializer expr",
@@ -2456,7 +2428,7 @@ pub fn addCases(cases: *tests.TranslateCContext) void {
         \\    b: c_int,
         \\};
         \\pub extern var a: struct_Foo;
-        \\pub export var b: f32 = 2;
+        \\pub export var b: f32 = 2.0;
         \\pub export fn foo() void {
         \\    var c: [*c]struct_Foo = undefined;
         \\    _ = a.b;
@@ -3020,17 +2992,17 @@ pub fn addCases(cases: *tests.TranslateCContext) void {
         \\pub extern fn fn_bool(x: bool) void;
         \\pub extern fn fn_ptr(x: ?*c_void) void;
         \\pub export fn call() void {
-        \\    fn_int(@floatToInt(c_int, 3));
-        \\    fn_int(@floatToInt(c_int, 3));
-        \\    fn_int(@floatToInt(c_int, 3));
+        \\    fn_int(@floatToInt(c_int, 3.0));
+        \\    fn_int(@floatToInt(c_int, 3.0));
+        \\    fn_int(@floatToInt(c_int, 3.0));
         \\    fn_int(@as(c_int, 1094861636));
         \\    fn_f32(@intToFloat(f32, @as(c_int, 3)));
         \\    fn_f64(@intToFloat(f64, @as(c_int, 3)));
         \\    fn_char(@bitCast(u8, @truncate(i8, @as(c_int, '3'))));
         \\    fn_char(@bitCast(u8, @truncate(i8, @as(c_int, '\x01'))));
         \\    fn_char(@bitCast(u8, @truncate(i8, @as(c_int, 0))));
-        \\    fn_f32(3);
-        \\    fn_f64(3);
+        \\    fn_f32(3.0);
+        \\    fn_f64(3.0);
         \\    fn_bool(@as(c_int, 123) != 0);
         \\    fn_bool(@as(c_int, 0) != 0);
         \\    fn_bool(@ptrToInt(fn_int) != 0);
@@ -3418,4 +3390,56 @@ pub fn addCases(cases: *tests.TranslateCContext) void {
         \\pub const MAY_NEED_PROMOTION_HEX = @import("std").meta.promoteIntLiteral(c_int, 0x80000000, .hexadecimal);
         \\pub const MAY_NEED_PROMOTION_OCT = @import("std").meta.promoteIntLiteral(c_int, 0o20000000000, .octal);
     });
+
+    // See __builtin_alloca_with_align comment in std.c.builtins
+    cases.add("demote un-implemented builtins",
+        \\#define FOO(X) __builtin_alloca_with_align((X), 8)
+    , &[_][]const u8{
+        \\pub const FOO = @compileError("TODO implement function '__builtin_alloca_with_align' in std.c.builtins");
+    });
+
+    cases.add("null sentinel arrays when initialized from string literal. Issue #8256",
+        \\#include <stdint.h>
+        \\char zero[0] = "abc";
+        \\uint32_t zero_w[0] = U"💯💯💯";
+        \\char empty_incomplete[] = "";
+        \\uint32_t empty_incomplete_w[] = U"";
+        \\char empty_constant[100] = "";
+        \\uint32_t empty_constant_w[100] = U"";
+        \\char incomplete[] = "abc";
+        \\uint32_t incomplete_w[] = U"💯💯💯";
+        \\char truncated[1] = "abc";
+        \\uint32_t truncated_w[1] = U"💯💯💯";
+        \\char extend[5] = "a";
+        \\uint32_t extend_w[5] = U"💯";
+        \\char no_null[3] = "abc";
+        \\uint32_t no_null_w[3] = U"💯💯💯";
+    , &[_][]const u8{
+        \\pub export var zero: [0]u8 = [0]u8{};
+        \\pub export var zero_w: [0]u32 = [0]u32{};
+        \\pub export var empty_incomplete: [1]u8 = [1]u8{0} ** 1;
+        \\pub export var empty_incomplete_w: [1]u32 = [1]u32{0} ** 1;
+        \\pub export var empty_constant: [100]u8 = [1]u8{0} ** 100;
+        \\pub export var empty_constant_w: [100]u32 = [1]u32{0} ** 100;
+        \\pub export var incomplete: [3:0]u8 = "abc".*;
+        \\pub export var incomplete_w: [3:0]u32 = [3:0]u32{
+        \\    '\u{1f4af}',
+        \\    '\u{1f4af}',
+        \\    '\u{1f4af}',
+        \\};
+        \\pub export var truncated: [1]u8 = "abc"[0..1].*;
+        \\pub export var truncated_w: [1]u32 = [1]u32{
+        \\    '\u{1f4af}',
+        \\};
+        \\pub export var extend: [5]u8 = "a"[0..1].* ++ [1]u8{0} ** 4;
+        \\pub export var extend_w: [5]u32 = [1]u32{
+        \\    '\u{1f4af}',
+        \\} ++ [1]u32{0} ** 4;
+        \\pub export var no_null: [3]u8 = "abc".*;
+        \\pub export var no_null_w: [3]u32 = [3]u32{
+        \\    '\u{1f4af}',
+        \\    '\u{1f4af}',
+        \\    '\u{1f4af}',
+        \\};
+    });
 }