diff --git a/ci/azure/pipelines.yml b/ci/azure/pipelines.yml
index 45504c5b6e..86f92f5ef6 100644
--- a/ci/azure/pipelines.yml
+++ b/ci/azure/pipelines.yml
@@ -61,6 +61,7 @@ jobs:
 
   - pwsh: |
       Set-Variable -Name ZIGINSTALLDIR -Value "$(Get-Location)\stage3-release"
+      Set-Variable -Name ZIGPREFIXPATH -Value "$(Get-Location)\$(ZIG_LLVM_CLANG_LLD_NAME)"
 
       function CheckLastExitCode {
         if (!$?) {
@@ -72,8 +73,7 @@ jobs:
       & "$ZIGINSTALLDIR\bin\zig.exe" build test docs `
         --search-prefix "$ZIGPREFIXPATH" `
         -Dstatic-llvm `
-        -Dskip-non-native `
-        -Dskip-stage2-tests
+        -Dskip-non-native
       CheckLastExitCode
     name: test
     displayName: 'Test'
diff --git a/doc/docgen.zig b/doc/docgen.zig
index 0f0e212e3c..50000da44c 100644
--- a/doc/docgen.zig
+++ b/doc/docgen.zig
@@ -1210,7 +1210,7 @@ fn genHtml(
     var env_map = try process.getEnvMap(allocator);
     try env_map.put("ZIG_DEBUG_COLOR", "1");
 
-    const host = try std.zig.system.NativeTargetInfo.detect(allocator, .{});
+    const host = try std.zig.system.NativeTargetInfo.detect(.{});
     const builtin_code = try getBuiltinCode(allocator, &env_map, zig_exe);
 
     for (toc.nodes) |node| {
@@ -1474,7 +1474,6 @@ fn genHtml(
                                 .arch_os_abi = triple,
                             });
                             const target_info = try std.zig.system.NativeTargetInfo.detect(
-                                allocator,
                                 cross_target,
                             );
                             switch (host.getExternalExecutor(target_info, .{
diff --git a/lib/std/build.zig b/lib/std/build.zig
index 4c05586159..f11dba717d 100644
--- a/lib/std/build.zig
+++ b/lib/std/build.zig
@@ -171,7 +171,7 @@ pub const Builder = struct {
         const env_map = try allocator.create(EnvMap);
         env_map.* = try process.getEnvMap(allocator);
 
-        const host = try NativeTargetInfo.detect(allocator, .{});
+        const host = try NativeTargetInfo.detect(.{});
 
         const self = try allocator.create(Builder);
         self.* = Builder{
@@ -1798,7 +1798,7 @@ pub const LibExeObjStep = struct {
     }
 
     fn computeOutFileNames(self: *LibExeObjStep) void {
-        self.target_info = NativeTargetInfo.detect(self.builder.allocator, self.target) catch
+        self.target_info = NativeTargetInfo.detect(self.target) catch
             unreachable;
 
         const target = self.target_info.target;
diff --git a/lib/std/build/EmulatableRunStep.zig b/lib/std/build/EmulatableRunStep.zig
index 0479d3a2f0..23bdf5e595 100644
--- a/lib/std/build/EmulatableRunStep.zig
+++ b/lib/std/build/EmulatableRunStep.zig
@@ -158,7 +158,7 @@ fn warnAboutForeignBinaries(step: *EmulatableRunStep) void {
 
     const host_name = builder.host.target.zigTriple(builder.allocator) catch unreachable;
     const foreign_name = artifact.target.zigTriple(builder.allocator) catch unreachable;
-    const target_info = std.zig.system.NativeTargetInfo.detect(builder.allocator, artifact.target) catch unreachable;
+    const target_info = std.zig.system.NativeTargetInfo.detect(artifact.target) catch unreachable;
     const need_cross_glibc = artifact.target.isGnuLibC() and artifact.is_linking_libc;
     switch (builder.host.getExternalExecutor(target_info, .{
         .qemu_fixes_dl = need_cross_glibc and builder.glibc_runtimes_dir != null,
diff --git a/lib/std/fs/file.zig b/lib/std/fs/file.zig
index 5de746150b..3792e1c1f2 100644
--- a/lib/std/fs/file.zig
+++ b/lib/std/fs/file.zig
@@ -990,6 +990,8 @@ pub const File = struct {
         return index;
     }
 
+    /// On Windows, this function currently does alter the file pointer.
+    /// https://github.com/ziglang/zig/issues/12783
     pub fn pread(self: File, buffer: []u8, offset: u64) PReadError!usize {
         if (is_windows) {
             return windows.ReadFile(self.handle, buffer, offset, self.intended_io_mode);
@@ -1004,6 +1006,8 @@ pub const File = struct {
 
     /// Returns the number of bytes read. If the number read is smaller than `buffer.len`, it
     /// means the file reached the end. Reaching the end of a file is not an error condition.
+    /// On Windows, this function currently does alter the file pointer.
+    /// https://github.com/ziglang/zig/issues/12783
     pub fn preadAll(self: File, buffer: []u8, offset: u64) PReadError!usize {
         var index: usize = 0;
         while (index != buffer.len) {
@@ -1058,6 +1062,8 @@ pub const File = struct {
     }
 
     /// See https://github.com/ziglang/zig/issues/7699
+    /// On Windows, this function currently does alter the file pointer.
+    /// https://github.com/ziglang/zig/issues/12783
     pub fn preadv(self: File, iovecs: []const os.iovec, offset: u64) PReadError!usize {
         if (is_windows) {
             // TODO improve this to use ReadFileScatter
@@ -1079,6 +1085,8 @@ pub const File = struct {
     /// The `iovecs` parameter is mutable because this function needs to mutate the fields in
     /// order to handle partial reads from the underlying OS layer.
     /// See https://github.com/ziglang/zig/issues/7699
+    /// On Windows, this function currently does alter the file pointer.
+    /// https://github.com/ziglang/zig/issues/12783
     pub fn preadvAll(self: File, iovecs: []os.iovec, offset: u64) PReadError!usize {
         if (iovecs.len == 0) return 0;
 
@@ -1122,6 +1130,8 @@ pub const File = struct {
         }
     }
 
+    /// On Windows, this function currently does alter the file pointer.
+    /// https://github.com/ziglang/zig/issues/12783
     pub fn pwrite(self: File, bytes: []const u8, offset: u64) PWriteError!usize {
         if (is_windows) {
             return windows.WriteFile(self.handle, bytes, offset, self.intended_io_mode);
@@ -1134,6 +1144,8 @@ pub const File = struct {
         }
     }
 
+    /// On Windows, this function currently does alter the file pointer.
+    /// https://github.com/ziglang/zig/issues/12783
     pub fn pwriteAll(self: File, bytes: []const u8, offset: u64) PWriteError!void {
         var index: usize = 0;
         while (index < bytes.len) {
@@ -1179,6 +1191,8 @@ pub const File = struct {
     }
 
     /// See https://github.com/ziglang/zig/issues/7699
+    /// On Windows, this function currently does alter the file pointer.
+    /// https://github.com/ziglang/zig/issues/12783
     pub fn pwritev(self: File, iovecs: []os.iovec_const, offset: u64) PWriteError!usize {
         if (is_windows) {
             // TODO improve this to use WriteFileScatter
@@ -1197,6 +1211,8 @@ pub const File = struct {
     /// The `iovecs` parameter is mutable because this function needs to mutate the fields in
     /// order to handle partial writes from the underlying OS layer.
     /// See https://github.com/ziglang/zig/issues/7699
+    /// On Windows, this function currently does alter the file pointer.
+    /// https://github.com/ziglang/zig/issues/12783
     pub fn pwritevAll(self: File, iovecs: []os.iovec_const, offset: u64) PWriteError!void {
         if (iovecs.len == 0) return;
 
diff --git a/lib/std/io.zig b/lib/std/io.zig
index 50d134b856..d878afd3ae 100644
--- a/lib/std/io.zig
+++ b/lib/std/io.zig
@@ -36,6 +36,10 @@ pub const default_mode: ModeOverride = if (is_async) Mode.evented else .blocking
 
 fn getStdOutHandle() os.fd_t {
     if (builtin.os.tag == .windows) {
+        if (builtin.zig_backend == .stage2_x86_64) {
+            // TODO: this is just a temporary workaround until we advance x86 backend further along.
+            return os.windows.GetStdHandle(os.windows.STD_OUTPUT_HANDLE) catch os.windows.INVALID_HANDLE_VALUE;
+        }
         return os.windows.peb().ProcessParameters.hStdOutput;
     }
 
@@ -58,6 +62,10 @@ pub fn getStdOut() File {
 
 fn getStdErrHandle() os.fd_t {
     if (builtin.os.tag == .windows) {
+        if (builtin.zig_backend == .stage2_x86_64) {
+            // TODO: this is just a temporary workaround until we advance x86 backend further along.
+            return os.windows.GetStdHandle(os.windows.STD_ERROR_HANDLE) catch os.windows.INVALID_HANDLE_VALUE;
+        }
         return os.windows.peb().ProcessParameters.hStdError;
     }
 
@@ -80,6 +88,10 @@ pub fn getStdErr() File {
 
 fn getStdInHandle() os.fd_t {
     if (builtin.os.tag == .windows) {
+        if (builtin.zig_backend == .stage2_x86_64) {
+            // TODO: this is just a temporary workaround until we advance x86 backend further along.
+            return os.windows.GetStdHandle(os.windows.STD_INPUT_HANDLE) catch os.windows.INVALID_HANDLE_VALUE;
+        }
         return os.windows.peb().ProcessParameters.hStdInput;
     }
 
diff --git a/lib/std/os/uefi/protocols/block_io_protocol.zig b/lib/std/os/uefi/protocols/block_io_protocol.zig
index 938eb930da..45b60eb59e 100644
--- a/lib/std/os/uefi/protocols/block_io_protocol.zig
+++ b/lib/std/os/uefi/protocols/block_io_protocol.zig
@@ -2,7 +2,7 @@ const std = @import("std");
 const uefi = std.os.uefi;
 const Status = uefi.Status;
 
-const EfiBlockMedia = extern struct {
+pub const EfiBlockMedia = extern struct {
     /// The current media ID. If the media changes, this value is changed.
     media_id: u32,
 
@@ -38,7 +38,7 @@ const EfiBlockMedia = extern struct {
     optimal_transfer_length_granularity: u32,
 };
 
-const BlockIoProtocol = extern struct {
+pub const BlockIoProtocol = extern struct {
     const Self = @This();
 
     revision: u64,
diff --git a/lib/std/os/windows/kernel32.zig b/lib/std/os/windows/kernel32.zig
index 9e6f5df97b..8d146def7f 100644
--- a/lib/std/os/windows/kernel32.zig
+++ b/lib/std/os/windows/kernel32.zig
@@ -348,7 +348,13 @@ pub extern "kernel32" fn WriteFile(
     in_out_lpOverlapped: ?*OVERLAPPED,
 ) callconv(WINAPI) BOOL;
 
-pub extern "kernel32" fn WriteFileEx(hFile: HANDLE, lpBuffer: [*]const u8, nNumberOfBytesToWrite: DWORD, lpOverlapped: *OVERLAPPED, lpCompletionRoutine: LPOVERLAPPED_COMPLETION_ROUTINE) callconv(WINAPI) BOOL;
+pub extern "kernel32" fn WriteFileEx(
+    hFile: HANDLE,
+    lpBuffer: [*]const u8,
+    nNumberOfBytesToWrite: DWORD,
+    lpOverlapped: *OVERLAPPED,
+    lpCompletionRoutine: LPOVERLAPPED_COMPLETION_ROUTINE,
+) callconv(WINAPI) BOOL;
 
 pub extern "kernel32" fn LoadLibraryW(lpLibFileName: [*:0]const u16) callconv(WINAPI) ?HMODULE;
 
diff --git a/lib/std/simd.zig b/lib/std/simd.zig
index b2655758c0..972bf136e9 100644
--- a/lib/std/simd.zig
+++ b/lib/std/simd.zig
@@ -9,7 +9,7 @@ const builtin = @import("builtin");
 pub fn suggestVectorSizeForCpu(comptime T: type, comptime cpu: std.Target.Cpu) ?usize {
     // This is guesswork, if you have better suggestions can add it or edit the current here
     // This can run in comptime only, but stage 1 fails at it, stage 2 can understand it
-    const element_bit_size = @maximum(8, std.math.ceilPowerOfTwo(T, @bitSizeOf(T)) catch unreachable);
+    const element_bit_size = @maximum(8, std.math.ceilPowerOfTwo(u16, @bitSizeOf(T)) catch unreachable);
     const vector_bit_size: u16 = blk: {
         if (cpu.arch.isX86()) {
             if (T == bool and std.Target.x86.featureSetHas(.prefer_mask_registers)) return 64;
@@ -57,6 +57,15 @@ pub fn suggestVectorSize(comptime T: type) ?usize {
     return suggestVectorSizeForCpu(T, builtin.cpu);
 }
 
+test "suggestVectorSizeForCpu works with signed and unsigned values" {
+    comptime var cpu = std.Target.Cpu.baseline(std.Target.Cpu.Arch.x86_64);
+    comptime cpu.features.addFeature(@enumToInt(std.Target.x86.Feature.avx512f));
+    const signed_integer_size = suggestVectorSizeForCpu(i32, cpu).?;
+    const unsigned_integer_size = suggestVectorSizeForCpu(u32, cpu).?;
+    try std.testing.expectEqual(@as(usize, 16), unsigned_integer_size);
+    try std.testing.expectEqual(@as(usize, 16), signed_integer_size);
+}
+
 fn vectorLength(comptime VectorType: type) comptime_int {
     return switch (@typeInfo(VectorType)) {
         .Vector => |info| info.len,
diff --git a/lib/std/start.zig b/lib/std/start.zig
index 49094ab02d..9f70cce1ea 100644
--- a/lib/std/start.zig
+++ b/lib/std/start.zig
@@ -36,6 +36,10 @@ comptime {
                 if (@typeInfo(@TypeOf(root.main)).Fn.calling_convention != .C) {
                     @export(main2, .{ .name = "main" });
                 }
+            } else if (builtin.os.tag == .windows) {
+                if (!@hasDecl(root, "wWinMainCRTStartup") and !@hasDecl(root, "mainCRTStartup")) {
+                    @export(wWinMainCRTStartup2, .{ .name = "wWinMainCRTStartup" });
+                }
             } else if (builtin.os.tag == .wasi and @hasDecl(root, "main")) {
                 @export(wasiMain2, .{ .name = "_start" });
             } else {
diff --git a/lib/std/zig/system/NativeTargetInfo.zig b/lib/std/zig/system/NativeTargetInfo.zig
index 824a1a26b6..73f76b11b7 100644
--- a/lib/std/zig/system/NativeTargetInfo.zig
+++ b/lib/std/zig/system/NativeTargetInfo.zig
@@ -28,6 +28,7 @@ pub const DetectError = error{
     SystemFdQuotaExceeded,
     DeviceBusy,
     OSVersionDetectionFail,
+    Unexpected,
 };
 
 /// Given a `CrossTarget`, which specifies in detail which parts of the target should be detected
@@ -36,8 +37,7 @@ pub const DetectError = error{
 /// relative to that.
 /// Any resources this function allocates are released before returning, and so there is no
 /// deinitialization method.
-/// TODO Remove the Allocator requirement from this function.
-pub fn detect(allocator: Allocator, cross_target: CrossTarget) DetectError!NativeTargetInfo {
+pub fn detect(cross_target: CrossTarget) DetectError!NativeTargetInfo {
     var os = cross_target.getOsTag().defaultVersionRange(cross_target.getCpuArch());
     if (cross_target.os_tag == null) {
         switch (builtin.target.os.tag) {
@@ -198,7 +198,7 @@ pub fn detect(allocator: Allocator, cross_target: CrossTarget) DetectError!Nativ
     } orelse backup_cpu_detection: {
         break :backup_cpu_detection Target.Cpu.baseline(cpu_arch);
     };
-    var result = try detectAbiAndDynamicLinker(allocator, cpu, os, cross_target);
+    var result = try detectAbiAndDynamicLinker(cpu, os, cross_target);
     // For x86, we need to populate some CPU feature flags depending on architecture
     // and mode:
     //  * 16bit_mode => if the abi is code16
@@ -235,13 +235,20 @@ pub fn detect(allocator: Allocator, cross_target: CrossTarget) DetectError!Nativ
     return result;
 }
 
-/// First we attempt to use the executable's own binary. If it is dynamically
-/// linked, then it should answer both the C ABI question and the dynamic linker question.
-/// If it is statically linked, then we try /usr/bin/env (or the file it references in shebang). If that does not provide the answer, then
-/// we fall back to the defaults.
-/// TODO Remove the Allocator requirement from this function.
+/// In the past, this function attempted to use the executable's own binary if it was dynamically
+/// linked to answer both the C ABI question and the dynamic linker question. However, this
+/// could be problematic on a system that uses a RUNPATH for the compiler binary, locking
+/// it to an older glibc version, while system binaries such as /usr/bin/env use a newer glibc
+/// version. The problem is that libc.so.6 glibc version will match that of the system while
+/// the dynamic linker will match that of the compiler binary. Executables with these versions
+/// mismatching will fail to run.
+///
+/// Therefore, this function works the same regardless of whether the compiler binary is
+/// dynamically or statically linked. It inspects `/usr/bin/env` as an ELF file to find the
+/// answer to these questions, or if there is a shebang line, then it chases the referenced
+/// file recursively. If that does not provide the answer, then the function falls back to
+/// defaults.
 fn detectAbiAndDynamicLinker(
-    allocator: Allocator,
     cpu: Target.Cpu,
     os: Target.Os,
     cross_target: CrossTarget,
@@ -279,8 +286,8 @@ fn detectAbiAndDynamicLinker(
     const ofmt = cross_target.ofmt orelse Target.ObjectFormat.default(os.tag, cpu.arch);
 
     for (all_abis) |abi| {
-        // This may be a nonsensical parameter. We detect this with error.UnknownDynamicLinkerPath and
-        // skip adding it to `ld_info_list`.
+        // This may be a nonsensical parameter. We detect this with
+        // error.UnknownDynamicLinkerPath and skip adding it to `ld_info_list`.
         const target: Target = .{
             .cpu = cpu,
             .os = os,
@@ -300,64 +307,6 @@ fn detectAbiAndDynamicLinker(
 
     // Best case scenario: the executable is dynamically linked, and we can iterate
     // over our own shared objects and find a dynamic linker.
-    self_exe: {
-        const lib_paths = try std.process.getSelfExeSharedLibPaths(allocator);
-        defer {
-            for (lib_paths) |lib_path| {
-                allocator.free(lib_path);
-            }
-            allocator.free(lib_paths);
-        }
-
-        var found_ld_info: LdInfo = undefined;
-        var found_ld_path: [:0]const u8 = undefined;
-
-        // Look for dynamic linker.
-        // This is O(N^M) but typical case here is N=2 and M=10.
-        find_ld: for (lib_paths) |lib_path| {
-            for (ld_info_list) |ld_info| {
-                const standard_ld_basename = fs.path.basename(ld_info.ld.get().?);
-                if (std.mem.endsWith(u8, lib_path, standard_ld_basename)) {
-                    found_ld_info = ld_info;
-                    found_ld_path = lib_path;
-                    break :find_ld;
-                }
-            }
-        } else break :self_exe;
-
-        // Look for glibc version.
-        var os_adjusted = os;
-        if (builtin.target.os.tag == .linux and found_ld_info.abi.isGnu() and
-            cross_target.glibc_version == null)
-        {
-            for (lib_paths) |lib_path| {
-                if (std.mem.endsWith(u8, lib_path, glibc_so_basename)) {
-                    os_adjusted.version_range.linux.glibc = glibcVerFromSO(lib_path) catch |err| switch (err) {
-                        error.UnrecognizedGnuLibCFileName => continue,
-                        error.InvalidGnuLibCVersion => continue,
-                        error.GnuLibCVersionUnavailable => continue,
-                        else => |e| return e,
-                    };
-                    break;
-                }
-            }
-        }
-
-        var result: NativeTargetInfo = .{
-            .target = .{
-                .cpu = cpu,
-                .os = os_adjusted,
-                .abi = cross_target.abi orelse found_ld_info.abi,
-                .ofmt = cross_target.ofmt orelse Target.ObjectFormat.default(os_adjusted.tag, cpu.arch),
-            },
-            .dynamic_linker = if (cross_target.dynamic_linker.get() == null)
-                DynamicLinker.init(found_ld_path)
-            else
-                cross_target.dynamic_linker,
-        };
-        return result;
-    }
-
     const elf_file = blk: {
         // This block looks for a shebang line in /usr/bin/env,
         // if it finds one, then instead of using /usr/bin/env as the ELF file to examine, it uses the file it references instead,
@@ -369,7 +318,7 @@ fn detectAbiAndDynamicLinker(
         // #! (2) + 255 (max length of shebang line since Linux 5.1) + \n (1)
         var buffer: [258]u8 = undefined;
         while (true) {
-            const file = std.fs.openFileAbsolute(file_name, .{}) catch |err| switch (err) {
+            const file = fs.openFileAbsolute(file_name, .{}) catch |err| switch (err) {
                 error.NoSpaceLeft => unreachable,
                 error.NameTooLong => unreachable,
                 error.PathAlreadyExists => unreachable,
@@ -390,44 +339,35 @@ fn detectAbiAndDynamicLinker(
                 error.FileTooBig,
                 error.Unexpected,
                 => |e| {
-                    std.log.warn("Encoutered error: {s}, falling back to default ABI and dynamic linker.\n", .{@errorName(e)});
+                    std.log.warn("Encountered error: {s}, falling back to default ABI and dynamic linker.\n", .{@errorName(e)});
                     return defaultAbiAndDynamicLinker(cpu, os, cross_target);
                 },
 
                 else => |e| return e,
             };
+            errdefer file.close();
 
-            const line = file.reader().readUntilDelimiter(&buffer, '\n') catch |err| switch (err) {
-                error.IsDir => unreachable, // Handled before
-                error.AccessDenied => unreachable,
-                error.WouldBlock => unreachable, // Did not request blocking mode
-                error.OperationAborted => unreachable, // Windows-only
-                error.BrokenPipe => unreachable,
-                error.ConnectionResetByPeer => unreachable,
-                error.ConnectionTimedOut => unreachable,
-                error.InputOutput => unreachable,
-                error.Unexpected => unreachable,
-
-                error.StreamTooLong,
-                error.EndOfStream,
-                error.NotOpenForReading,
+            const len = preadMin(file, &buffer, 0, buffer.len) catch |err| switch (err) {
+                error.UnexpectedEndOfFile,
+                error.UnableToReadElfFile,
                 => break :blk file,
 
-                else => |e| {
-                    file.close();
-                    return e;
-                },
+                else => |e| return e,
             };
+            const newline = mem.indexOfScalar(u8, buffer[0..len], '\n') orelse break :blk file;
+            const line = buffer[0..newline];
             if (!mem.startsWith(u8, line, "#!")) break :blk file;
-            var it = std.mem.tokenize(u8, line[2..], " ");
-            file.close();
+            var it = mem.tokenize(u8, line[2..], " ");
             file_name = it.next() orelse return defaultAbiAndDynamicLinker(cpu, os, cross_target);
+            file.close();
         }
     };
     defer elf_file.close();
 
     // If Zig is statically linked, such as via distributed binary static builds, the above
     // trick (block self_exe) won't work. The next thing we fall back to is the same thing, but for elf_file.
+    // TODO: inline this function and combine the buffer we already read above to find
+    // the possible shebang line with the buffer we use for the ELF header.
     return abiAndDynamicLinkerFromFile(elf_file, cpu, os, ld_info_list, cross_target) catch |err| switch (err) {
         error.FileSystem,
         error.SystemResources,
@@ -447,31 +387,196 @@ fn detectAbiAndDynamicLinker(
         error.NameTooLong,
         // Finally, we fall back on the standard path.
         => |e| {
-            std.log.warn("Encoutered error: {s}, falling back to default ABI and dynamic linker.\n", .{@errorName(e)});
+            std.log.warn("Encountered error: {s}, falling back to default ABI and dynamic linker.\n", .{@errorName(e)});
             return defaultAbiAndDynamicLinker(cpu, os, cross_target);
         },
     };
 }
 
-const glibc_so_basename = "libc.so.6";
-
-fn glibcVerFromSO(so_path: [:0]const u8) !std.builtin.Version {
-    var link_buf: [std.os.PATH_MAX]u8 = undefined;
-    const link_name = std.os.readlinkZ(so_path.ptr, &link_buf) catch |err| switch (err) {
-        error.AccessDenied => return error.GnuLibCVersionUnavailable,
-        error.FileSystem => return error.FileSystem,
-        error.SymLinkLoop => return error.SymLinkLoop,
+fn glibcVerFromRPath(rpath: []const u8) !std.builtin.Version {
+    var dir = fs.cwd().openDir(rpath, .{}) catch |err| switch (err) {
+        error.NameTooLong => unreachable,
+        error.InvalidUtf8 => unreachable,
+        error.BadPathName => unreachable,
+        error.DeviceBusy => unreachable,
+
+        error.FileNotFound,
+        error.NotDir,
+        error.InvalidHandle,
+        error.AccessDenied,
+        error.NoDevice,
+        => return error.GLibCNotFound,
+
+        error.ProcessFdQuotaExceeded,
+        error.SystemFdQuotaExceeded,
+        error.SystemResources,
+        error.SymLinkLoop,
+        error.Unexpected,
+        => |e| return e,
+    };
+    defer dir.close();
+
+    // Now we have a candidate for the path to libc shared object. In
+    // the past, we used readlink() here because the link name would
+    // reveal the glibc version. However, in more recent GNU/Linux
+    // installations, there is no symlink. Thus we instead use a more
+    // robust check of opening the libc shared object and looking at the
+    // .dynstr section, and finding the max version number of symbols
+    // that start with "GLIBC_2.".
+    const glibc_so_basename = "libc.so.6";
+    var f = dir.openFile(glibc_so_basename, .{}) catch |err| switch (err) {
         error.NameTooLong => unreachable,
-        error.NotLink => return error.GnuLibCVersionUnavailable,
-        error.FileNotFound => return error.GnuLibCVersionUnavailable,
-        error.SystemResources => return error.SystemResources,
-        error.NotDir => return error.GnuLibCVersionUnavailable,
-        error.Unexpected => return error.GnuLibCVersionUnavailable,
         error.InvalidUtf8 => unreachable, // Windows only
         error.BadPathName => unreachable, // Windows only
-        error.UnsupportedReparsePointType => unreachable, // Windows only
+        error.PipeBusy => unreachable, // Windows-only
+        error.SharingViolation => unreachable, // Windows-only
+        error.FileLocksNotSupported => unreachable, // No lock requested.
+        error.NoSpaceLeft => unreachable, // read-only
+        error.PathAlreadyExists => unreachable, // read-only
+        error.DeviceBusy => unreachable, // read-only
+        error.FileBusy => unreachable, // read-only
+        error.InvalidHandle => unreachable, // should not be in the error set
+        error.WouldBlock => unreachable, // not using O_NONBLOCK
+        error.NoDevice => unreachable, // not asking for a special device
+
+        error.AccessDenied,
+        error.FileNotFound,
+        error.NotDir,
+        error.IsDir,
+        => return error.GLibCNotFound,
+
+        error.FileTooBig => return error.Unexpected,
+
+        error.ProcessFdQuotaExceeded,
+        error.SystemFdQuotaExceeded,
+        error.SystemResources,
+        error.SymLinkLoop,
+        error.Unexpected,
+        => |e| return e,
     };
-    return glibcVerFromLinkName(link_name, "libc-");
+    defer f.close();
+
+    return glibcVerFromSoFile(f) catch |err| switch (err) {
+        error.InvalidElfMagic,
+        error.InvalidElfEndian,
+        error.InvalidElfClass,
+        error.InvalidElfFile,
+        error.InvalidElfVersion,
+        error.InvalidGnuLibCVersion,
+        error.UnexpectedEndOfFile,
+        => return error.GLibCNotFound,
+
+        error.SystemResources,
+        error.UnableToReadElfFile,
+        error.Unexpected,
+        error.FileSystem,
+        => |e| return e,
+    };
+}
+
+fn glibcVerFromSoFile(file: fs.File) !std.builtin.Version {
+    var hdr_buf: [@sizeOf(elf.Elf64_Ehdr)]u8 align(@alignOf(elf.Elf64_Ehdr)) = undefined;
+    _ = try preadMin(file, &hdr_buf, 0, hdr_buf.len);
+    const hdr32 = @ptrCast(*elf.Elf32_Ehdr, &hdr_buf);
+    const hdr64 = @ptrCast(*elf.Elf64_Ehdr, &hdr_buf);
+    if (!mem.eql(u8, hdr32.e_ident[0..4], elf.MAGIC)) return error.InvalidElfMagic;
+    const elf_endian: std.builtin.Endian = switch (hdr32.e_ident[elf.EI_DATA]) {
+        elf.ELFDATA2LSB => .Little,
+        elf.ELFDATA2MSB => .Big,
+        else => return error.InvalidElfEndian,
+    };
+    const need_bswap = elf_endian != native_endian;
+    if (hdr32.e_ident[elf.EI_VERSION] != 1) return error.InvalidElfVersion;
+
+    const is_64 = switch (hdr32.e_ident[elf.EI_CLASS]) {
+        elf.ELFCLASS32 => false,
+        elf.ELFCLASS64 => true,
+        else => return error.InvalidElfClass,
+    };
+    const shstrndx = elfInt(is_64, need_bswap, hdr32.e_shstrndx, hdr64.e_shstrndx);
+    var shoff = elfInt(is_64, need_bswap, hdr32.e_shoff, hdr64.e_shoff);
+    const shentsize = elfInt(is_64, need_bswap, hdr32.e_shentsize, hdr64.e_shentsize);
+    const str_section_off = shoff + @as(u64, shentsize) * @as(u64, shstrndx);
+    var sh_buf: [16 * @sizeOf(elf.Elf64_Shdr)]u8 align(@alignOf(elf.Elf64_Shdr)) = undefined;
+    if (sh_buf.len < shentsize) return error.InvalidElfFile;
+
+    _ = try preadMin(file, &sh_buf, str_section_off, shentsize);
+    const shstr32 = @ptrCast(*elf.Elf32_Shdr, @alignCast(@alignOf(elf.Elf32_Shdr), &sh_buf));
+    const shstr64 = @ptrCast(*elf.Elf64_Shdr, @alignCast(@alignOf(elf.Elf64_Shdr), &sh_buf));
+    const shstrtab_off = elfInt(is_64, need_bswap, shstr32.sh_offset, shstr64.sh_offset);
+    const shstrtab_size = elfInt(is_64, need_bswap, shstr32.sh_size, shstr64.sh_size);
+    var strtab_buf: [4096:0]u8 = undefined;
+    const shstrtab_len = std.math.min(shstrtab_size, strtab_buf.len);
+    const shstrtab_read_len = try preadMin(file, &strtab_buf, shstrtab_off, shstrtab_len);
+    const shstrtab = strtab_buf[0..shstrtab_read_len];
+    const shnum = elfInt(is_64, need_bswap, hdr32.e_shnum, hdr64.e_shnum);
+    var sh_i: u16 = 0;
+    const dynstr: struct { offset: u64, size: u64 } = find_dyn_str: while (sh_i < shnum) {
+        // Reserve some bytes so that we can deref the 64-bit struct fields
+        // even when the ELF file is 32-bits.
+        const sh_reserve: usize = @sizeOf(elf.Elf64_Shdr) - @sizeOf(elf.Elf32_Shdr);
+        const sh_read_byte_len = try preadMin(
+            file,
+            sh_buf[0 .. sh_buf.len - sh_reserve],
+            shoff,
+            shentsize,
+        );
+        var sh_buf_i: usize = 0;
+        while (sh_buf_i < sh_read_byte_len and sh_i < shnum) : ({
+            sh_i += 1;
+            shoff += shentsize;
+            sh_buf_i += shentsize;
+        }) {
+            const sh32 = @ptrCast(
+                *elf.Elf32_Shdr,
+                @alignCast(@alignOf(elf.Elf32_Shdr), &sh_buf[sh_buf_i]),
+            );
+            const sh64 = @ptrCast(
+                *elf.Elf64_Shdr,
+                @alignCast(@alignOf(elf.Elf64_Shdr), &sh_buf[sh_buf_i]),
+            );
+            const sh_name_off = elfInt(is_64, need_bswap, sh32.sh_name, sh64.sh_name);
+            // TODO this pointer cast should not be necessary
+            const sh_name = mem.sliceTo(std.meta.assumeSentinel(shstrtab[sh_name_off..].ptr, 0), 0);
+            if (mem.eql(u8, sh_name, ".dynstr")) {
+                break :find_dyn_str .{
+                    .offset = elfInt(is_64, need_bswap, sh32.sh_offset, sh64.sh_offset),
+                    .size = elfInt(is_64, need_bswap, sh32.sh_size, sh64.sh_size),
+                };
+            }
+        }
+    } else return error.InvalidGnuLibCVersion;
+
+    // Here we loop over all the strings in the dynstr string table, assuming that any
+    // strings that start with "GLIBC_2." indicate the existence of such a glibc version,
+    // and furthermore, that the system-installed glibc is at minimum that version.
+
+    // Empirically, glibc 2.34 libc.so .dynstr section is 32441 bytes on my system.
+    // Here I use double this value plus some headroom. This makes it only need
+    // a single read syscall here.
+    var buf: [80000]u8 = undefined;
+    if (buf.len < dynstr.size) return error.InvalidGnuLibCVersion;
+
+    const dynstr_size = @intCast(usize, dynstr.size);
+    const dynstr_bytes = buf[0..dynstr_size];
+    _ = try preadMin(file, dynstr_bytes, dynstr.offset, dynstr_bytes.len);
+    var it = mem.split(u8, dynstr_bytes, &.{0});
+    var max_ver: std.builtin.Version = .{ .major = 2, .minor = 2, .patch = 5 };
+    while (it.next()) |s| {
+        if (mem.startsWith(u8, s, "GLIBC_2.")) {
+            const chopped = s["GLIBC_".len..];
+            const ver = std.builtin.Version.parse(chopped) catch |err| switch (err) {
+                error.Overflow => return error.InvalidGnuLibCVersion,
+                error.InvalidCharacter => return error.InvalidGnuLibCVersion,
+                error.InvalidVersion => return error.InvalidGnuLibCVersion,
+            };
+            switch (ver.order(max_ver)) {
+                .gt => max_ver = ver,
+                .lt, .eq => continue,
+            }
+        }
+    }
+    return max_ver;
 }
 
 fn glibcVerFromLinkName(link_name: []const u8, prefix: []const u8) !std.builtin.Version {
@@ -641,65 +746,65 @@ pub fn abiAndDynamicLinkerFromFile(
     if (builtin.target.os.tag == .linux and result.target.isGnuLibC() and
         cross_target.glibc_version == null)
     {
-        if (rpath_offset) |rpoff| {
-            const shstrndx = elfInt(is_64, need_bswap, hdr32.e_shstrndx, hdr64.e_shstrndx);
+        const shstrndx = elfInt(is_64, need_bswap, hdr32.e_shstrndx, hdr64.e_shstrndx);
 
-            var shoff = elfInt(is_64, need_bswap, hdr32.e_shoff, hdr64.e_shoff);
-            const shentsize = elfInt(is_64, need_bswap, hdr32.e_shentsize, hdr64.e_shentsize);
-            const str_section_off = shoff + @as(u64, shentsize) * @as(u64, shstrndx);
+        var shoff = elfInt(is_64, need_bswap, hdr32.e_shoff, hdr64.e_shoff);
+        const shentsize = elfInt(is_64, need_bswap, hdr32.e_shentsize, hdr64.e_shentsize);
+        const str_section_off = shoff + @as(u64, shentsize) * @as(u64, shstrndx);
 
-            var sh_buf: [16 * @sizeOf(elf.Elf64_Shdr)]u8 align(@alignOf(elf.Elf64_Shdr)) = undefined;
-            if (sh_buf.len < shentsize) return error.InvalidElfFile;
+        var sh_buf: [16 * @sizeOf(elf.Elf64_Shdr)]u8 align(@alignOf(elf.Elf64_Shdr)) = undefined;
+        if (sh_buf.len < shentsize) return error.InvalidElfFile;
 
-            _ = try preadMin(file, &sh_buf, str_section_off, shentsize);
-            const shstr32 = @ptrCast(*elf.Elf32_Shdr, @alignCast(@alignOf(elf.Elf32_Shdr), &sh_buf));
-            const shstr64 = @ptrCast(*elf.Elf64_Shdr, @alignCast(@alignOf(elf.Elf64_Shdr), &sh_buf));
-            const shstrtab_off = elfInt(is_64, need_bswap, shstr32.sh_offset, shstr64.sh_offset);
-            const shstrtab_size = elfInt(is_64, need_bswap, shstr32.sh_size, shstr64.sh_size);
-            var strtab_buf: [4096:0]u8 = undefined;
-            const shstrtab_len = std.math.min(shstrtab_size, strtab_buf.len);
-            const shstrtab_read_len = try preadMin(file, &strtab_buf, shstrtab_off, shstrtab_len);
-            const shstrtab = strtab_buf[0..shstrtab_read_len];
+        _ = try preadMin(file, &sh_buf, str_section_off, shentsize);
+        const shstr32 = @ptrCast(*elf.Elf32_Shdr, @alignCast(@alignOf(elf.Elf32_Shdr), &sh_buf));
+        const shstr64 = @ptrCast(*elf.Elf64_Shdr, @alignCast(@alignOf(elf.Elf64_Shdr), &sh_buf));
+        const shstrtab_off = elfInt(is_64, need_bswap, shstr32.sh_offset, shstr64.sh_offset);
+        const shstrtab_size = elfInt(is_64, need_bswap, shstr32.sh_size, shstr64.sh_size);
+        var strtab_buf: [4096:0]u8 = undefined;
+        const shstrtab_len = std.math.min(shstrtab_size, strtab_buf.len);
+        const shstrtab_read_len = try preadMin(file, &strtab_buf, shstrtab_off, shstrtab_len);
+        const shstrtab = strtab_buf[0..shstrtab_read_len];
 
-            const shnum = elfInt(is_64, need_bswap, hdr32.e_shnum, hdr64.e_shnum);
-            var sh_i: u16 = 0;
-            const dynstr: ?struct { offset: u64, size: u64 } = find_dyn_str: while (sh_i < shnum) {
-                // Reserve some bytes so that we can deref the 64-bit struct fields
-                // even when the ELF file is 32-bits.
-                const sh_reserve: usize = @sizeOf(elf.Elf64_Shdr) - @sizeOf(elf.Elf32_Shdr);
-                const sh_read_byte_len = try preadMin(
-                    file,
-                    sh_buf[0 .. sh_buf.len - sh_reserve],
-                    shoff,
-                    shentsize,
+        const shnum = elfInt(is_64, need_bswap, hdr32.e_shnum, hdr64.e_shnum);
+        var sh_i: u16 = 0;
+        const dynstr: ?struct { offset: u64, size: u64 } = find_dyn_str: while (sh_i < shnum) {
+            // Reserve some bytes so that we can deref the 64-bit struct fields
+            // even when the ELF file is 32-bits.
+            const sh_reserve: usize = @sizeOf(elf.Elf64_Shdr) - @sizeOf(elf.Elf32_Shdr);
+            const sh_read_byte_len = try preadMin(
+                file,
+                sh_buf[0 .. sh_buf.len - sh_reserve],
+                shoff,
+                shentsize,
+            );
+            var sh_buf_i: usize = 0;
+            while (sh_buf_i < sh_read_byte_len and sh_i < shnum) : ({
+                sh_i += 1;
+                shoff += shentsize;
+                sh_buf_i += shentsize;
+            }) {
+                const sh32 = @ptrCast(
+                    *elf.Elf32_Shdr,
+                    @alignCast(@alignOf(elf.Elf32_Shdr), &sh_buf[sh_buf_i]),
                 );
-                var sh_buf_i: usize = 0;
-                while (sh_buf_i < sh_read_byte_len and sh_i < shnum) : ({
-                    sh_i += 1;
-                    shoff += shentsize;
-                    sh_buf_i += shentsize;
-                }) {
-                    const sh32 = @ptrCast(
-                        *elf.Elf32_Shdr,
-                        @alignCast(@alignOf(elf.Elf32_Shdr), &sh_buf[sh_buf_i]),
-                    );
-                    const sh64 = @ptrCast(
-                        *elf.Elf64_Shdr,
-                        @alignCast(@alignOf(elf.Elf64_Shdr), &sh_buf[sh_buf_i]),
-                    );
-                    const sh_name_off = elfInt(is_64, need_bswap, sh32.sh_name, sh64.sh_name);
-                    // TODO this pointer cast should not be necessary
-                    const sh_name = mem.sliceTo(std.meta.assumeSentinel(shstrtab[sh_name_off..].ptr, 0), 0);
-                    if (mem.eql(u8, sh_name, ".dynstr")) {
-                        break :find_dyn_str .{
-                            .offset = elfInt(is_64, need_bswap, sh32.sh_offset, sh64.sh_offset),
-                            .size = elfInt(is_64, need_bswap, sh32.sh_size, sh64.sh_size),
-                        };
-                    }
+                const sh64 = @ptrCast(
+                    *elf.Elf64_Shdr,
+                    @alignCast(@alignOf(elf.Elf64_Shdr), &sh_buf[sh_buf_i]),
+                );
+                const sh_name_off = elfInt(is_64, need_bswap, sh32.sh_name, sh64.sh_name);
+                // TODO this pointer cast should not be necessary
+                const sh_name = mem.sliceTo(std.meta.assumeSentinel(shstrtab[sh_name_off..].ptr, 0), 0);
+                if (mem.eql(u8, sh_name, ".dynstr")) {
+                    break :find_dyn_str .{
+                        .offset = elfInt(is_64, need_bswap, sh32.sh_offset, sh64.sh_offset),
+                        .size = elfInt(is_64, need_bswap, sh32.sh_size, sh64.sh_size),
+                    };
                 }
-            } else null;
+            }
+        } else null;
 
-            if (dynstr) |ds| {
+        if (dynstr) |ds| {
+            if (rpath_offset) |rpoff| {
                 // TODO this pointer cast should not be necessary
                 const rpoff_usize = std.math.cast(usize, rpoff) orelse return error.InvalidElfFile;
                 if (rpoff_usize > ds.size) return error.InvalidElfFile;
@@ -713,64 +818,31 @@ pub fn abiAndDynamicLinkerFromFile(
                 const rpath_list = mem.sliceTo(std.meta.assumeSentinel(strtab.ptr, 0), 0);
                 var it = mem.tokenize(u8, rpath_list, ":");
                 while (it.next()) |rpath| {
-                    var dir = fs.cwd().openDir(rpath, .{}) catch |err| switch (err) {
-                        error.NameTooLong => unreachable,
-                        error.InvalidUtf8 => unreachable,
-                        error.BadPathName => unreachable,
-                        error.DeviceBusy => unreachable,
-
-                        error.FileNotFound,
-                        error.NotDir,
-                        error.InvalidHandle,
-                        error.AccessDenied,
-                        error.NoDevice,
-                        => continue,
-
-                        error.ProcessFdQuotaExceeded,
-                        error.SystemFdQuotaExceeded,
-                        error.SystemResources,
-                        error.SymLinkLoop,
-                        error.Unexpected,
-                        => |e| return e,
-                    };
-                    defer dir.close();
-
-                    var link_buf: [std.os.PATH_MAX]u8 = undefined;
-                    const link_name = std.os.readlinkatZ(
-                        dir.fd,
-                        glibc_so_basename,
-                        &link_buf,
-                    ) catch |err| switch (err) {
-                        error.NameTooLong => unreachable,
-                        error.InvalidUtf8 => unreachable, // Windows only
-                        error.BadPathName => unreachable, // Windows only
-                        error.UnsupportedReparsePointType => unreachable, // Windows only
-
-                        error.AccessDenied,
-                        error.FileNotFound,
-                        error.NotLink,
-                        error.NotDir,
-                        => continue,
-
-                        error.SystemResources,
-                        error.FileSystem,
-                        error.SymLinkLoop,
-                        error.Unexpected,
-                        => |e| return e,
-                    };
-                    result.target.os.version_range.linux.glibc = glibcVerFromLinkName(
-                        link_name,
-                        "libc-",
-                    ) catch |err| switch (err) {
-                        error.UnrecognizedGnuLibCFileName,
-                        error.InvalidGnuLibCVersion,
-                        => continue,
-                    };
-                    break;
+                    if (glibcVerFromRPath(rpath)) |ver| {
+                        result.target.os.version_range.linux.glibc = ver;
+                        return result;
+                    } else |err| switch (err) {
+                        error.GLibCNotFound => continue,
+                        else => |e| return e,
+                    }
                 }
             }
-        } else if (result.dynamic_linker.get()) |dl_path| glibc_ver: {
-            // There is no DT_RUNPATH but we can try to see if the information is
+        }
+
+        if (result.dynamic_linker.get()) |dl_path| glibc_ver: {
+            // There is no DT_RUNPATH so we try to find libc.so.6 inside the same
+            // directory as the dynamic linker.
+            if (fs.path.dirname(dl_path)) |rpath| {
+                if (glibcVerFromRPath(rpath)) |ver| {
+                    result.target.os.version_range.linux.glibc = ver;
+                    return result;
+                } else |err| switch (err) {
+                    error.GLibCNotFound => {},
+                    else => |e| return e,
+                }
+            }
+
+            // So far, no luck. Next we try to see if the information is
             // present in the symlink data for the dynamic linker path.
             var link_buf: [std.os.PATH_MAX]u8 = undefined;
             const link_name = std.os.readlink(dl_path, &link_buf) catch |err| switch (err) {
@@ -799,6 +871,36 @@ pub fn abiAndDynamicLinkerFromFile(
                 error.InvalidGnuLibCVersion,
                 => break :glibc_ver,
             };
+            return result;
+        }
+
+        // Nothing worked so far. Finally we fall back to hard-coded search paths.
+        // Some distros such as Debian keep their libc.so.6 in `/lib/$triple/`.
+        var path_buf: [std.os.PATH_MAX]u8 = undefined;
+        var index: usize = 0;
+        const prefix = "/lib/";
+        const cpu_arch = @tagName(result.target.cpu.arch);
+        const os_tag = @tagName(result.target.os.tag);
+        const abi = @tagName(result.target.abi);
+        mem.copy(u8, path_buf[index..], prefix);
+        index += prefix.len;
+        mem.copy(u8, path_buf[index..], cpu_arch);
+        index += cpu_arch.len;
+        path_buf[index] = '-';
+        index += 1;
+        mem.copy(u8, path_buf[index..], os_tag);
+        index += os_tag.len;
+        path_buf[index] = '-';
+        index += 1;
+        mem.copy(u8, path_buf[index..], abi);
+        index += abi.len;
+        const rpath = path_buf[0..index];
+        if (glibcVerFromRPath(rpath)) |ver| {
+            result.target.os.version_range.linux.glibc = ver;
+            return result;
+        } else |err| switch (err) {
+            error.GLibCNotFound => {},
+            else => |e| return e,
         }
     }
 
diff --git a/src/Compilation.zig b/src/Compilation.zig
index c1321e40cf..597f5cffff 100644
--- a/src/Compilation.zig
+++ b/src/Compilation.zig
@@ -878,6 +878,9 @@ pub const InitOptions = struct {
     linker_shared_memory: bool = false,
     linker_global_base: ?u64 = null,
     linker_export_symbol_names: []const []const u8 = &.{},
+    linker_print_gc_sections: bool = false,
+    linker_print_icf_sections: bool = false,
+    linker_print_map: bool = false,
     each_lib_rpath: ?bool = null,
     build_id: ?bool = null,
     disable_c_depfile: bool = false,
@@ -1727,6 +1730,9 @@ pub fn create(gpa: Allocator, options: InitOptions) !*Compilation {
             .shared_memory = options.linker_shared_memory,
             .global_base = options.linker_global_base,
             .export_symbol_names = options.linker_export_symbol_names,
+            .print_gc_sections = options.linker_print_gc_sections,
+            .print_icf_sections = options.linker_print_icf_sections,
+            .print_map = options.linker_print_map,
             .z_nodelete = options.linker_z_nodelete,
             .z_notext = options.linker_z_notext,
             .z_defs = options.linker_z_defs,
diff --git a/src/Module.zig b/src/Module.zig
index c63fe43158..ea89225537 100644
--- a/src/Module.zig
+++ b/src/Module.zig
@@ -345,6 +345,15 @@ pub const CaptureScope = struct {
     /// During sema, this map is backed by the gpa.  Once sema completes,
     /// it is reallocated using the value_arena.
     captures: std.AutoHashMapUnmanaged(Zir.Inst.Index, TypedValue) = .{},
+
+    pub fn failed(noalias self: *const @This()) bool {
+        return self.captures.available == 0 and self.captures.size == std.math.maxInt(u32);
+    }
+
+    pub fn fail(noalias self: *@This()) void {
+        self.captures.available = 0;
+        self.captures.size = std.math.maxInt(u32);
+    }
 };
 
 pub const WipCaptureScope = struct {
@@ -383,6 +392,7 @@ pub const WipCaptureScope = struct {
     pub fn deinit(noalias self: *@This()) void {
         if (!self.finalized) {
             self.scope.captures.deinit(self.gpa);
+            self.scope.fail();
         }
         self.* = undefined;
     }
@@ -4274,11 +4284,14 @@ pub fn ensureFuncBodyAnalyzed(mod: *Module, func: *Fn) SemaError!void {
 
             const comp = mod.comp;
 
-            if (comp.bin_file.options.emit == null and
+            const no_bin_file = (comp.bin_file.options.emit == null and
                 comp.emit_asm == null and
                 comp.emit_llvm_ir == null and
-                comp.emit_llvm_bc == null)
-            {
+                comp.emit_llvm_bc == null);
+
+            const dump_air = builtin.mode == .Debug and comp.verbose_air;
+
+            if (no_bin_file and !dump_air) {
                 return;
             }
 
@@ -4286,7 +4299,7 @@ pub fn ensureFuncBodyAnalyzed(mod: *Module, func: *Fn) SemaError!void {
             var liveness = try Liveness.analyze(gpa, air);
             defer liveness.deinit(gpa);
 
-            if (builtin.mode == .Debug and comp.verbose_air) {
+            if (dump_air) {
                 const fqn = try decl.getFullyQualifiedName(mod);
                 defer mod.gpa.free(fqn);
 
@@ -4295,6 +4308,10 @@ pub fn ensureFuncBodyAnalyzed(mod: *Module, func: *Fn) SemaError!void {
                 std.debug.print("# End Function AIR: {s}\n\n", .{fqn});
             }
 
+            if (no_bin_file) {
+                return;
+            }
+
             comp.bin_file.updateFunc(mod, func, air, liveness) catch |err| switch (err) {
                 error.OutOfMemory => return error.OutOfMemory,
                 error.AnalysisFail => {
diff --git a/src/Sema.zig b/src/Sema.zig
index fb1638bc2a..15e891ef87 100644
--- a/src/Sema.zig
+++ b/src/Sema.zig
@@ -5956,7 +5956,6 @@ fn analyzeCall(
                 error.NeededSourceLocation => {
                     _ = sema.inst_map.remove(inst);
                     const decl = sema.mod.declPtr(block.src_decl);
-                    child_block.src_decl = block.src_decl;
                     try sema.analyzeInlineCallArg(
                         block,
                         &child_block,
@@ -13740,6 +13739,16 @@ fn zirClosureGet(
     const tv = while (true) {
         // Note: We don't need to add a dependency here, because
         // decls always depend on their lexical parents.
+
+        // Fail this decl if a scope it depended on failed.
+        if (scope.failed()) {
+            if (sema.owner_func) |owner_func| {
+                owner_func.state = .dependency_failure;
+            } else {
+                sema.owner_decl.analysis = .dependency_failure;
+            }
+            return error.AnalysisFail;
+        }
         if (scope.captures.getPtr(inst_data.inst)) |tv| {
             break tv;
         }
@@ -18076,8 +18085,8 @@ fn bitOffsetOf(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!u6
     const target = sema.mod.getTarget();
 
     try sema.resolveTypeLayout(block, lhs_src, ty);
-    switch (ty.tag()) {
-        .@"struct", .tuple, .anon_struct => {},
+    switch (ty.zigTypeTag()) {
+        .Struct => {},
         else => {
             const msg = msg: {
                 const msg = try sema.errMsg(block, lhs_src, "expected struct type, found '{}'", .{ty.fmt(sema.mod)});
@@ -19617,28 +19626,19 @@ fn zirMemcpy(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!void
     const dest_src: LazySrcLoc = .{ .node_offset_builtin_call_arg0 = inst_data.src_node };
     const src_src: LazySrcLoc = .{ .node_offset_builtin_call_arg1 = inst_data.src_node };
     const len_src: LazySrcLoc = .{ .node_offset_builtin_call_arg2 = inst_data.src_node };
-    const dest_ptr = try sema.resolveInst(extra.dest);
-    const dest_ptr_ty = sema.typeOf(dest_ptr);
+    const uncasted_dest_ptr = try sema.resolveInst(extra.dest);
 
-    try sema.checkPtrOperand(block, dest_src, dest_ptr_ty);
-    if (dest_ptr_ty.isConstPtr()) {
-        return sema.fail(block, dest_src, "cannot store through const pointer '{}'", .{dest_ptr_ty.fmt(sema.mod)});
-    }
+    // TODO AstGen's coerced_ty cannot handle volatile here
+    var dest_ptr_info = Type.initTag(.manyptr_u8).ptrInfo().data;
+    dest_ptr_info.@"volatile" = sema.typeOf(uncasted_dest_ptr).isVolatilePtr();
+    const dest_ptr_ty = try Type.ptr(sema.arena, sema.mod, dest_ptr_info);
+    const dest_ptr = try sema.coerce(block, dest_ptr_ty, uncasted_dest_ptr, dest_src);
 
     const uncasted_src_ptr = try sema.resolveInst(extra.source);
-    const uncasted_src_ptr_ty = sema.typeOf(uncasted_src_ptr);
-    try sema.checkPtrOperand(block, src_src, uncasted_src_ptr_ty);
-    const src_ptr_info = uncasted_src_ptr_ty.ptrInfo().data;
-    const wanted_src_ptr_ty = try Type.ptr(sema.arena, sema.mod, .{
-        .pointee_type = dest_ptr_ty.elemType2(),
-        .@"align" = src_ptr_info.@"align",
-        .@"addrspace" = src_ptr_info.@"addrspace",
-        .mutable = false,
-        .@"allowzero" = src_ptr_info.@"allowzero",
-        .@"volatile" = src_ptr_info.@"volatile",
-        .size = .Many,
-    });
-    const src_ptr = try sema.coerce(block, wanted_src_ptr_ty, uncasted_src_ptr, src_src);
+    var src_ptr_info = Type.initTag(.manyptr_const_u8).ptrInfo().data;
+    src_ptr_info.@"volatile" = sema.typeOf(uncasted_src_ptr).isVolatilePtr();
+    const src_ptr_ty = try Type.ptr(sema.arena, sema.mod, src_ptr_info);
+    const src_ptr = try sema.coerce(block, src_ptr_ty, uncasted_src_ptr, src_src);
     const len = try sema.coerce(block, Type.usize, try sema.resolveInst(extra.byte_count), len_src);
 
     const runtime_src = if (try sema.resolveDefinedValue(block, dest_src, dest_ptr)) |dest_ptr_val| rs: {
@@ -19674,14 +19674,15 @@ fn zirMemset(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!void
     const dest_src: LazySrcLoc = .{ .node_offset_builtin_call_arg0 = inst_data.src_node };
     const value_src: LazySrcLoc = .{ .node_offset_builtin_call_arg1 = inst_data.src_node };
     const len_src: LazySrcLoc = .{ .node_offset_builtin_call_arg2 = inst_data.src_node };
-    const dest_ptr = try sema.resolveInst(extra.dest);
-    const dest_ptr_ty = sema.typeOf(dest_ptr);
-    try sema.checkPtrOperand(block, dest_src, dest_ptr_ty);
-    if (dest_ptr_ty.isConstPtr()) {
-        return sema.fail(block, dest_src, "cannot store through const pointer '{}'", .{dest_ptr_ty.fmt(sema.mod)});
-    }
-    const elem_ty = dest_ptr_ty.elemType2();
-    const value = try sema.coerce(block, elem_ty, try sema.resolveInst(extra.byte), value_src);
+    const uncasted_dest_ptr = try sema.resolveInst(extra.dest);
+
+    // TODO AstGen's coerced_ty cannot handle volatile here
+    var ptr_info = Type.initTag(.manyptr_u8).ptrInfo().data;
+    ptr_info.@"volatile" = sema.typeOf(uncasted_dest_ptr).isVolatilePtr();
+    const dest_ptr_ty = try Type.ptr(sema.arena, sema.mod, ptr_info);
+    const dest_ptr = try sema.coerce(block, dest_ptr_ty, uncasted_dest_ptr, dest_src);
+
+    const value = try sema.coerce(block, Type.u8, try sema.resolveInst(extra.byte), value_src);
     const len = try sema.coerce(block, Type.usize, try sema.resolveInst(extra.byte_count), len_src);
 
     const runtime_src = if (try sema.resolveDefinedValue(block, dest_src, dest_ptr)) |ptr_val| rs: {
@@ -26013,6 +26014,7 @@ fn analyzeDeclRef(sema: *Sema, decl_index: Decl.Index) CompileError!Air.Inst.Ref
             .pointee_type = decl_tv.ty,
             .mutable = false,
             .@"addrspace" = decl.@"addrspace",
+            .@"align" = decl.@"align",
         }),
         try Value.Tag.decl_ref.create(sema.arena, decl_index),
     );
diff --git a/src/arch/wasm/CodeGen.zig b/src/arch/wasm/CodeGen.zig
index b9637bf8e3..a72ae6a423 100644
--- a/src/arch/wasm/CodeGen.zig
+++ b/src/arch/wasm/CodeGen.zig
@@ -666,6 +666,10 @@ pub fn deinit(self: *Self) void {
     self.locals.deinit(self.gpa);
     self.mir_instructions.deinit(self.gpa);
     self.mir_extra.deinit(self.gpa);
+    self.free_locals_i32.deinit(self.gpa);
+    self.free_locals_i64.deinit(self.gpa);
+    self.free_locals_f32.deinit(self.gpa);
+    self.free_locals_f64.deinit(self.gpa);
     self.* = undefined;
 }
 
diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig
index e5d47e589a..25e8695e82 100644
--- a/src/arch/x86_64/CodeGen.zig
+++ b/src/arch/x86_64/CodeGen.zig
@@ -32,11 +32,6 @@ const abi = @import("abi.zig");
 const errUnionPayloadOffset = codegen.errUnionPayloadOffset;
 const errUnionErrorOffset = codegen.errUnionErrorOffset;
 
-const callee_preserved_regs = abi.callee_preserved_regs;
-const caller_preserved_regs = abi.caller_preserved_regs;
-const c_abi_int_param_regs = abi.c_abi_int_param_regs;
-const c_abi_int_return_regs = abi.c_abi_int_return_regs;
-
 const Condition = bits.Condition;
 const RegisterManager = abi.RegisterManager;
 const RegisterLock = RegisterManager.RegisterLock;
@@ -137,6 +132,7 @@ pub const MCValue = union(enum) {
     /// If the type is a pointer, it means the pointer is referenced indirectly via GOT.
     /// When lowered, linker will emit a relocation of type X86_64_RELOC_GOT.
     got_load: u32,
+    imports_load: u32,
     /// The value is in memory referenced directly via symbol index.
     /// If the type is a pointer, it means the pointer is referenced directly via symbol index.
     /// When lowered, linker will emit a relocation of type X86_64_RELOC_SIGNED.
@@ -156,6 +152,7 @@ pub const MCValue = union(enum) {
             .ptr_stack_offset,
             .direct_load,
             .got_load,
+            .imports_load,
             => true,
             else => false,
         };
@@ -203,6 +200,42 @@ const Branch = struct {
         self.inst_table.deinit(gpa);
         self.* = undefined;
     }
+
+    const FormatContext = struct {
+        insts: []const Air.Inst.Index,
+        mcvs: []const MCValue,
+    };
+
+    fn fmt(
+        ctx: FormatContext,
+        comptime unused_format_string: []const u8,
+        options: std.fmt.FormatOptions,
+        writer: anytype,
+    ) @TypeOf(writer).Error!void {
+        _ = options;
+        comptime assert(unused_format_string.len == 0);
+        try writer.writeAll("Branch {\n");
+        for (ctx.insts) |inst, i| {
+            const mcv = ctx.mcvs[i];
+            try writer.print("  %{d} => {}\n", .{ inst, mcv });
+        }
+        try writer.writeAll("}");
+    }
+
+    fn format(branch: Branch, comptime unused_format_string: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void {
+        _ = branch;
+        _ = unused_format_string;
+        _ = options;
+        _ = writer;
+        @compileError("do not format Branch directly; use ty.fmtDebug()");
+    }
+
+    fn fmtDebug(self: @This()) std.fmt.Formatter(fmt) {
+        return .{ .data = .{
+            .insts = self.inst_table.keys(),
+            .mcvs = self.inst_table.values(),
+        } };
+    }
 };
 
 const StackAllocation = struct {
@@ -235,7 +268,7 @@ const BigTomb = struct {
     fn finishAir(bt: *BigTomb, result: MCValue) void {
         const is_used = !bt.function.liveness.isUnused(bt.inst);
         if (is_used) {
-            log.debug("%{d} => {}", .{ bt.inst, result });
+            log.debug("  (saving %{d} => {})", .{ bt.inst, result });
             const branch = &bt.function.branch_stack.items[bt.function.branch_stack.items.len - 1];
             branch.inst_table.putAssumeCapacityNoClobber(bt.inst, result);
         }
@@ -406,16 +439,17 @@ fn gen(self: *Self) InnerError!void {
         });
 
         if (self.ret_mcv == .stack_offset) {
-            // The address where to store the return value for the caller is in `.rdi`
+            // The address where to store the return value for the caller is in a
             // register which the callee is free to clobber. Therefore, we purposely
             // spill it to stack immediately.
             const stack_offset = mem.alignForwardGeneric(u32, self.next_stack_offset + 8, 8);
             self.next_stack_offset = stack_offset;
             self.max_end_stack = @maximum(self.max_end_stack, self.next_stack_offset);
 
-            try self.genSetStack(Type.usize, @intCast(i32, stack_offset), MCValue{ .register = .rdi }, .{});
+            const ret_reg = abi.getCAbiIntParamRegs(self.target.*)[0];
+            try self.genSetStack(Type.usize, @intCast(i32, stack_offset), MCValue{ .register = ret_reg }, .{});
             self.ret_mcv = MCValue{ .stack_offset = @intCast(i32, stack_offset) };
-            log.debug("gen: spilling .rdi to stack at offset {}", .{stack_offset});
+            log.debug("gen: spilling {s} to stack at offset {}", .{ @tagName(ret_reg), stack_offset });
         }
 
         _ = try self.addInst(.{
@@ -446,10 +480,11 @@ fn gen(self: *Self) InnerError!void {
 
         // Create list of registers to save in the prologue.
         // TODO handle register classes
-        var reg_list: Mir.RegisterList(Register, &callee_preserved_regs) = .{};
-        inline for (callee_preserved_regs) |reg| {
+        var reg_list = Mir.RegisterList{};
+        const callee_preserved_regs = abi.getCalleePreservedRegs(self.target.*);
+        for (callee_preserved_regs) |reg| {
             if (self.register_manager.isRegAllocated(reg)) {
-                reg_list.push(reg);
+                reg_list.push(callee_preserved_regs, reg);
             }
         }
         const saved_regs_stack_space: u32 = reg_list.count() * 8;
@@ -797,6 +832,7 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
 fn processDeath(self: *Self, inst: Air.Inst.Index) void {
     const air_tags = self.air.instructions.items(.tag);
     if (air_tags[inst] == .constant) return; // Constants are immortal.
+    log.debug("%{d} => {}", .{ inst, MCValue{ .dead = {} } });
     // When editing this function, note that the logic must synchronize with `reuseOperand`.
     const prev_value = self.getResolvedInstValue(inst);
     const branch = &self.branch_stack.items[self.branch_stack.items.len - 1];
@@ -2274,6 +2310,7 @@ fn airArrayElemVal(self: *Self, inst: Air.Inst.Index) !void {
         .memory,
         .got_load,
         .direct_load,
+        .imports_load,
         => {
             try self.loadMemPtrIntoRegister(addr_reg, Type.usize, array);
         },
@@ -2618,6 +2655,7 @@ fn load(self: *Self, dst_mcv: MCValue, ptr: MCValue, ptr_ty: Type) InnerError!vo
         .memory,
         .got_load,
         .direct_load,
+        .imports_load,
         => {
             const reg = try self.copyToTmpRegister(ptr_ty, ptr);
             try self.load(dst_mcv, .{ .register = reg }, ptr_ty);
@@ -2655,6 +2693,7 @@ fn loadMemPtrIntoRegister(self: *Self, reg: Register, ptr_ty: Type, ptr: MCValue
     switch (ptr) {
         .got_load,
         .direct_load,
+        .imports_load,
         => |sym_index| {
             const abi_size = @intCast(u32, ptr_ty.abiSize(self.target.*));
             const mod = self.bin_file.options.module.?;
@@ -2666,6 +2705,7 @@ fn loadMemPtrIntoRegister(self: *Self, reg: Register, ptr_ty: Type, ptr: MCValue
             const flags: u2 = switch (ptr) {
                 .got_load => 0b00,
                 .direct_load => 0b01,
+                .imports_load => 0b10,
                 else => unreachable,
             };
             _ = try self.addInst(.{
@@ -2763,6 +2803,7 @@ fn store(self: *Self, ptr: MCValue, value: MCValue, ptr_ty: Type, value_ty: Type
                 },
                 .got_load,
                 .direct_load,
+                .imports_load,
                 .memory,
                 .stack_offset,
                 => {
@@ -2783,6 +2824,7 @@ fn store(self: *Self, ptr: MCValue, value: MCValue, ptr_ty: Type, value_ty: Type
         },
         .got_load,
         .direct_load,
+        .imports_load,
         .memory,
         => {
             const value_lock: ?RegisterLock = switch (value) {
@@ -2854,6 +2896,7 @@ fn store(self: *Self, ptr: MCValue, value: MCValue, ptr_ty: Type, value_ty: Type
                 },
                 .got_load,
                 .direct_load,
+                .imports_load,
                 .memory,
                 => {
                     if (abi_size <= 8) {
@@ -3565,6 +3608,7 @@ fn genBinOpMir(self: *Self, mir_tag: Mir.Inst.Tag, dst_ty: Type, dst_mcv: MCValu
                 .memory,
                 .got_load,
                 .direct_load,
+                .imports_load,
                 .eflags,
                 => {
                     assert(abi_size <= 8);
@@ -3650,7 +3694,10 @@ fn genBinOpMir(self: *Self, mir_tag: Mir.Inst.Tag, dst_ty: Type, dst_mcv: MCValu
                 => {
                     return self.fail("TODO implement x86 ADD/SUB/CMP source memory", .{});
                 },
-                .got_load, .direct_load => {
+                .got_load,
+                .direct_load,
+                .imports_load,
+                => {
                     return self.fail("TODO implement x86 ADD/SUB/CMP source symbol at index in linker", .{});
                 },
                 .eflags => {
@@ -3661,7 +3708,10 @@ fn genBinOpMir(self: *Self, mir_tag: Mir.Inst.Tag, dst_ty: Type, dst_mcv: MCValu
         .memory => {
             return self.fail("TODO implement x86 ADD/SUB/CMP destination memory", .{});
         },
-        .got_load, .direct_load => {
+        .got_load,
+        .direct_load,
+        .imports_load,
+        => {
             return self.fail("TODO implement x86 ADD/SUB/CMP destination symbol at index", .{});
         },
     }
@@ -3729,7 +3779,10 @@ fn genIntMulComplexOpMir(self: *Self, dst_ty: Type, dst_mcv: MCValue, src_mcv: M
                 .memory => {
                     return self.fail("TODO implement x86 multiply source memory", .{});
                 },
-                .got_load, .direct_load => {
+                .got_load,
+                .direct_load,
+                .imports_load,
+                => {
                     return self.fail("TODO implement x86 multiply source symbol at index in linker", .{});
                 },
                 .eflags => {
@@ -3773,7 +3826,10 @@ fn genIntMulComplexOpMir(self: *Self, dst_ty: Type, dst_mcv: MCValue, src_mcv: M
                 .memory, .stack_offset => {
                     return self.fail("TODO implement x86 multiply source memory", .{});
                 },
-                .got_load, .direct_load => {
+                .got_load,
+                .direct_load,
+                .imports_load,
+                => {
                     return self.fail("TODO implement x86 multiply source symbol at index in linker", .{});
                 },
                 .eflags => {
@@ -3784,7 +3840,10 @@ fn genIntMulComplexOpMir(self: *Self, dst_ty: Type, dst_mcv: MCValue, src_mcv: M
         .memory => {
             return self.fail("TODO implement x86 multiply destination memory", .{});
         },
-        .got_load, .direct_load => {
+        .got_load,
+        .direct_load,
+        .imports_load,
+        => {
             return self.fail("TODO implement x86 multiply destination symbol at index in linker", .{});
         },
     }
@@ -3898,11 +3957,11 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallOptions.
 
     try self.spillEflagsIfOccupied();
 
-    for (caller_preserved_regs) |reg| {
+    for (abi.getCallerPreservedRegs(self.target.*)) |reg| {
         try self.register_manager.getReg(reg, null);
     }
 
-    const rdi_lock: ?RegisterLock = blk: {
+    const ret_reg_lock: ?RegisterLock = blk: {
         if (info.return_value == .stack_offset) {
             const ret_ty = fn_ty.fnReturnType();
             const ret_abi_size = @intCast(u32, ret_ty.abiSize(self.target.*));
@@ -3910,17 +3969,18 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallOptions.
             const stack_offset = @intCast(i32, try self.allocMem(inst, ret_abi_size, ret_abi_align));
             log.debug("airCall: return value on stack at offset {}", .{stack_offset});
 
-            try self.register_manager.getReg(.rdi, null);
-            try self.genSetReg(Type.usize, .rdi, .{ .ptr_stack_offset = stack_offset });
-            const rdi_lock = self.register_manager.lockRegAssumeUnused(.rdi);
+            const ret_reg = abi.getCAbiIntParamRegs(self.target.*)[0];
+            try self.register_manager.getReg(ret_reg, null);
+            try self.genSetReg(Type.usize, ret_reg, .{ .ptr_stack_offset = stack_offset });
+            const ret_reg_lock = self.register_manager.lockRegAssumeUnused(ret_reg);
 
             info.return_value.stack_offset = stack_offset;
 
-            break :blk rdi_lock;
+            break :blk ret_reg_lock;
         }
         break :blk null;
     };
-    defer if (rdi_lock) |lock| self.register_manager.unlockReg(lock);
+    defer if (ret_reg_lock) |lock| self.register_manager.unlockReg(lock);
 
     for (args) |arg, arg_i| {
         const mc_arg = info.args[arg_i];
@@ -3948,6 +4008,7 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallOptions.
             .memory => unreachable,
             .got_load => unreachable,
             .direct_load => unreachable,
+            .imports_load => unreachable,
             .eflags => unreachable,
             .register_overflow => unreachable,
         }
@@ -3999,7 +4060,7 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallOptions.
                 .data = undefined,
             });
         }
-    } else if (self.bin_file.cast(link.File.Coff)) |_| {
+    } else if (self.bin_file.cast(link.File.Coff)) |coff_file| {
         if (self.air.value(callee)) |func_value| {
             if (func_value.castTag(.function)) |func_payload| {
                 const func = func_payload.data;
@@ -4015,8 +4076,27 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallOptions.
                     }),
                     .data = undefined,
                 });
-            } else if (func_value.castTag(.extern_fn)) |_| {
-                return self.fail("TODO implement calling extern functions", .{});
+            } else if (func_value.castTag(.extern_fn)) |func_payload| {
+                const extern_fn = func_payload.data;
+                const decl_name = mod.declPtr(extern_fn.owner_decl).name;
+                if (extern_fn.lib_name) |lib_name| {
+                    log.debug("TODO enforce that '{s}' is expected in '{s}' library", .{
+                        decl_name,
+                        lib_name,
+                    });
+                }
+                const sym_index = try coff_file.getGlobalSymbol(mem.sliceTo(decl_name, 0));
+                try self.genSetReg(Type.initTag(.usize), .rax, .{
+                    .imports_load = sym_index,
+                });
+                _ = try self.addInst(.{
+                    .tag = .call,
+                    .ops = Mir.Inst.Ops.encode(.{
+                        .reg1 = .rax,
+                        .flags = 0b01,
+                    }),
+                    .data = undefined,
+                });
             } else {
                 return self.fail("TODO implement calling bitcasted functions", .{});
             }
@@ -4425,7 +4505,11 @@ fn genVarDbgInfo(
                     leb128.writeILEB128(dbg_info.writer(), -off) catch unreachable;
                     dbg_info.items[fixup] += @intCast(u8, dbg_info.items.len - fixup - 2);
                 },
-                .memory, .got_load, .direct_load => {
+                .memory,
+                .got_load,
+                .direct_load,
+                .imports_load,
+                => {
                     const ptr_width = @intCast(u8, @divExact(self.target.cpu.arch.ptrBitWidth(), 8));
                     const is_ptr = switch (tag) {
                         .dbg_var_ptr => true,
@@ -4456,7 +4540,10 @@ fn genVarDbgInfo(
                         try dbg_info.append(DW.OP.deref);
                     }
                     switch (mcv) {
-                        .got_load, .direct_load => |index| try dw.addExprlocReloc(index, offset, is_ptr),
+                        .got_load,
+                        .direct_load,
+                        .imports_load,
+                        => |index| try dw.addExprlocReloc(index, offset, is_ptr),
                         else => {},
                     }
                 },
@@ -4626,15 +4713,17 @@ fn airCondBr(self: *Self, inst: Air.Inst.Index) !void {
 
     // Revert to the previous register and stack allocation state.
 
-    var saved_then_branch = self.branch_stack.pop();
-    defer saved_then_branch.deinit(self.gpa);
+    var then_branch = self.branch_stack.pop();
+    defer then_branch.deinit(self.gpa);
 
     self.revertState(saved_state);
 
     try self.performReloc(reloc);
 
-    const else_branch = self.branch_stack.addOneAssumeCapacity();
-    else_branch.* = .{};
+    try self.branch_stack.append(.{});
+    errdefer {
+        _ = self.branch_stack.pop();
+    }
 
     try self.ensureProcessDeathCapacity(liveness_condbr.else_deaths.len);
     for (liveness_condbr.else_deaths) |operand| {
@@ -4642,6 +4731,9 @@ fn airCondBr(self: *Self, inst: Air.Inst.Index) !void {
     }
     try self.genBody(else_body);
 
+    var else_branch = self.branch_stack.pop();
+    defer else_branch.deinit(self.gpa);
+
     // At this point, each branch will possibly have conflicting values for where
     // each instruction is stored. They agree, however, on which instructions are alive/dead.
     // We use the first ("then") branch as canonical, and here emit
@@ -4650,74 +4742,17 @@ fn airCondBr(self: *Self, inst: Air.Inst.Index) !void {
     // that we can use all the code emitting abstractions. This is why at the bottom we
     // assert that parent_branch.free_registers equals the saved_then_branch.free_registers
     // rather than assigning it.
-    const parent_branch = &self.branch_stack.items[self.branch_stack.items.len - 2];
-    try parent_branch.inst_table.ensureUnusedCapacity(self.gpa, else_branch.inst_table.count());
-
-    const else_slice = else_branch.inst_table.entries.slice();
-    const else_keys = else_slice.items(.key);
-    const else_values = else_slice.items(.value);
-    for (else_keys) |else_key, else_idx| {
-        const else_value = else_values[else_idx];
-        const canon_mcv = if (saved_then_branch.inst_table.fetchSwapRemove(else_key)) |then_entry| blk: {
-            // The instruction's MCValue is overridden in both branches.
-            parent_branch.inst_table.putAssumeCapacity(else_key, then_entry.value);
-            if (else_value == .dead) {
-                assert(then_entry.value == .dead);
-                continue;
-            }
-            break :blk then_entry.value;
-        } else blk: {
-            if (else_value == .dead)
-                continue;
-            // The instruction is only overridden in the else branch.
-            var i: usize = self.branch_stack.items.len - 2;
-            while (true) {
-                i -= 1; // If this overflows, the question is: why wasn't the instruction marked dead?
-                if (self.branch_stack.items[i].inst_table.get(else_key)) |mcv| {
-                    assert(mcv != .dead);
-                    break :blk mcv;
-                }
-            }
-        };
-        log.debug("consolidating else_entry {d} {}=>{}", .{ else_key, else_value, canon_mcv });
-        // TODO make sure the destination stack offset / register does not already have something
-        // going on there.
-        try self.setRegOrMem(self.air.typeOfIndex(else_key), canon_mcv, else_value);
-        // TODO track the new register / stack allocation
-    }
-    try parent_branch.inst_table.ensureUnusedCapacity(self.gpa, saved_then_branch.inst_table.count());
-    const then_slice = saved_then_branch.inst_table.entries.slice();
-    const then_keys = then_slice.items(.key);
-    const then_values = then_slice.items(.value);
-    for (then_keys) |then_key, then_idx| {
-        const then_value = then_values[then_idx];
-        // We already deleted the items from this table that matched the else_branch.
-        // So these are all instructions that are only overridden in the then branch.
-        parent_branch.inst_table.putAssumeCapacity(then_key, then_value);
-        log.debug("then_value = {}", .{then_value});
-        if (then_value == .dead)
-            continue;
-        const parent_mcv = blk: {
-            var i: usize = self.branch_stack.items.len - 2;
-            while (true) {
-                i -= 1;
-                if (self.branch_stack.items[i].inst_table.get(then_key)) |mcv| {
-                    assert(mcv != .dead);
-                    break :blk mcv;
-                }
-            }
-        };
-        log.debug("consolidating then_entry {d} {}=>{}", .{ then_key, parent_mcv, then_value });
-        // TODO make sure the destination stack offset / register does not already have something
-        // going on there.
-        try self.setRegOrMem(self.air.typeOfIndex(then_key), parent_mcv, then_value);
-        // TODO track the new register / stack allocation
+    log.debug("airCondBr: %{d}", .{inst});
+    log.debug("Upper branches:", .{});
+    for (self.branch_stack.items) |bs| {
+        log.debug("{}", .{bs.fmtDebug()});
     }
 
-    {
-        var item = self.branch_stack.pop();
-        item.deinit(self.gpa);
-    }
+    log.debug("Then branch: {}", .{then_branch.fmtDebug()});
+    log.debug("Else branch: {}", .{else_branch.fmtDebug()});
+
+    const parent_branch = &self.branch_stack.items[self.branch_stack.items.len - 1];
+    try self.canonicaliseBranches(parent_branch, &then_branch, &else_branch);
 
     // We already took care of pl_op.operand earlier, so we're going
     // to pass .none here
@@ -5102,6 +5137,15 @@ fn airSwitch(self: *Self, inst: Air.Inst.Index) !void {
         }
     }
 
+    var branch_stack = std.ArrayList(Branch).init(self.gpa);
+    defer {
+        for (branch_stack.items) |*bs| {
+            bs.deinit(self.gpa);
+        }
+        branch_stack.deinit();
+    }
+    try branch_stack.ensureTotalCapacityPrecise(switch_br.data.cases_len + 1);
+
     while (case_i < switch_br.data.cases_len) : (case_i += 1) {
         const case = self.air.extraData(Air.SwitchBr.Case, extra_index);
         const items = @ptrCast([]const Air.Inst.Ref, self.air.extra[case.end..][0..case.data.items_len]);
@@ -5131,10 +5175,9 @@ fn airSwitch(self: *Self, inst: Air.Inst.Index) !void {
 
         try self.genBody(case_body);
 
-        // Revert to the previous register and stack allocation state.
-        var saved_case_branch = self.branch_stack.pop();
-        defer saved_case_branch.deinit(self.gpa);
+        branch_stack.appendAssumeCapacity(self.branch_stack.pop());
 
+        // Revert to the previous register and stack allocation state.
         self.revertState(saved_state);
 
         for (relocs) |reloc| {
@@ -5144,10 +5187,13 @@ fn airSwitch(self: *Self, inst: Air.Inst.Index) !void {
 
     if (switch_br.data.else_body_len > 0) {
         const else_body = self.air.extra[extra_index..][0..switch_br.data.else_body_len];
+
+        // Capture the state of register and stack allocation state so that we can revert to it.
+        const saved_state = try self.captureState();
+
         try self.branch_stack.append(.{});
-        defer {
-            var item = self.branch_stack.pop();
-            item.deinit(self.gpa);
+        errdefer {
+            _ = self.branch_stack.pop();
         }
 
         const else_deaths = liveness.deaths.len - 1;
@@ -5158,8 +5204,30 @@ fn airSwitch(self: *Self, inst: Air.Inst.Index) !void {
 
         try self.genBody(else_body);
 
-        // TODO consolidate returned MCValues between prongs and else branch like we do
-        // in airCondBr.
+        branch_stack.appendAssumeCapacity(self.branch_stack.pop());
+
+        // Revert to the previous register and stack allocation state.
+        self.revertState(saved_state);
+    }
+
+    // Consolidate returned MCValues between prongs and else branch like we do
+    // in airCondBr.
+    log.debug("airSwitch: %{d}", .{inst});
+    log.debug("Upper branches:", .{});
+    for (self.branch_stack.items) |bs| {
+        log.debug("{}", .{bs.fmtDebug()});
+    }
+    for (branch_stack.items) |bs, i| {
+        log.debug("Case-{d} branch: {}", .{ i, bs.fmtDebug() });
+    }
+
+    // TODO: can we reduce the complexity of this algorithm?
+    const parent_branch = &self.branch_stack.items[self.branch_stack.items.len - 1];
+    var i: usize = branch_stack.items.len;
+    while (i > 1) : (i -= 1) {
+        const canon_branch = &branch_stack.items[i - 2];
+        const target_branch = &branch_stack.items[i - 1];
+        try self.canonicaliseBranches(parent_branch, canon_branch, target_branch);
     }
 
     // We already took care of pl_op.operand earlier, so we're going
@@ -5167,6 +5235,72 @@ fn airSwitch(self: *Self, inst: Air.Inst.Index) !void {
     return self.finishAir(inst, .unreach, .{ .none, .none, .none });
 }
 
+fn canonicaliseBranches(self: *Self, parent_branch: *Branch, canon_branch: *Branch, target_branch: *Branch) !void {
+    try parent_branch.inst_table.ensureUnusedCapacity(self.gpa, target_branch.inst_table.count());
+
+    const target_slice = target_branch.inst_table.entries.slice();
+    const target_keys = target_slice.items(.key);
+    const target_values = target_slice.items(.value);
+
+    for (target_keys) |target_key, target_idx| {
+        const target_value = target_values[target_idx];
+        const canon_mcv = if (canon_branch.inst_table.fetchSwapRemove(target_key)) |canon_entry| blk: {
+            // The instruction's MCValue is overridden in both branches.
+            parent_branch.inst_table.putAssumeCapacity(target_key, canon_entry.value);
+            if (target_value == .dead) {
+                assert(canon_entry.value == .dead);
+                continue;
+            }
+            break :blk canon_entry.value;
+        } else blk: {
+            if (target_value == .dead)
+                continue;
+            // The instruction is only overridden in the else branch.
+            var i: usize = self.branch_stack.items.len - 1;
+            while (true) {
+                i -= 1; // If this overflows, the question is: why wasn't the instruction marked dead?
+                if (self.branch_stack.items[i].inst_table.get(target_key)) |mcv| {
+                    assert(mcv != .dead);
+                    break :blk mcv;
+                }
+            }
+        };
+        log.debug("consolidating target_entry {d} {}=>{}", .{ target_key, target_value, canon_mcv });
+        // TODO make sure the destination stack offset / register does not already have something
+        // going on there.
+        try self.setRegOrMem(self.air.typeOfIndex(target_key), canon_mcv, target_value);
+        // TODO track the new register / stack allocation
+    }
+    try parent_branch.inst_table.ensureUnusedCapacity(self.gpa, canon_branch.inst_table.count());
+    const canon_slice = canon_branch.inst_table.entries.slice();
+    const canon_keys = canon_slice.items(.key);
+    const canon_values = canon_slice.items(.value);
+    for (canon_keys) |canon_key, canon_idx| {
+        const canon_value = canon_values[canon_idx];
+        // We already deleted the items from this table that matched the target_branch.
+        // So these are all instructions that are only overridden in the canon branch.
+        parent_branch.inst_table.putAssumeCapacity(canon_key, canon_value);
+        log.debug("canon_value = {}", .{canon_value});
+        if (canon_value == .dead)
+            continue;
+        const parent_mcv = blk: {
+            var i: usize = self.branch_stack.items.len - 1;
+            while (true) {
+                i -= 1;
+                if (self.branch_stack.items[i].inst_table.get(canon_key)) |mcv| {
+                    assert(mcv != .dead);
+                    break :blk mcv;
+                }
+            }
+        };
+        log.debug("consolidating canon_entry {d} {}=>{}", .{ canon_key, parent_mcv, canon_value });
+        // TODO make sure the destination stack offset / register does not already have something
+        // going on there.
+        try self.setRegOrMem(self.air.typeOfIndex(canon_key), parent_mcv, canon_value);
+        // TODO track the new register / stack allocation
+    }
+}
+
 fn performReloc(self: *Self, reloc: Mir.Inst.Index) !void {
     const next_inst = @intCast(u32, self.mir_instructions.len);
     switch (self.mir_instructions.items(.tag)[reloc]) {
@@ -5196,7 +5330,7 @@ fn br(self: *Self, block: Air.Inst.Index, operand: Air.Inst.Ref) !void {
             block_data.mcv = switch (operand_mcv) {
                 .none, .dead, .unreach => unreachable,
                 .register, .stack_offset, .memory => operand_mcv,
-                .eflags, .immediate => blk: {
+                .eflags, .immediate, .ptr_stack_offset => blk: {
                     const new_mcv = try self.allocRegOrMem(block, true);
                     try self.setRegOrMem(self.air.typeOfIndex(block), new_mcv, operand_mcv);
                     break :blk new_mcv;
@@ -5456,6 +5590,7 @@ fn genSetStackArg(self: *Self, ty: Type, stack_offset: i32, mcv: MCValue) InnerE
         .memory,
         .direct_load,
         .got_load,
+        .imports_load,
         => {
             if (abi_size <= 8) {
                 const reg = try self.copyToTmpRegister(ty, mcv);
@@ -5703,6 +5838,7 @@ fn genSetStack(self: *Self, ty: Type, stack_offset: i32, mcv: MCValue, opts: Inl
         .memory,
         .got_load,
         .direct_load,
+        .imports_load,
         => {
             if (abi_size <= 8) {
                 const reg = try self.copyToTmpRegister(ty, mcv);
@@ -5796,7 +5932,6 @@ const InlineMemcpyOpts = struct {
     dest_stack_base: ?Register = null,
 };
 
-/// Spills .rax and .rcx.
 fn genInlineMemcpy(
     self: *Self,
     dst_ptr: MCValue,
@@ -5804,15 +5939,6 @@ fn genInlineMemcpy(
     len: MCValue,
     opts: InlineMemcpyOpts,
 ) InnerError!void {
-    // TODO preserve contents of .rax and .rcx if not free, and then restore
-    try self.register_manager.getReg(.rax, null);
-    try self.register_manager.getReg(.rcx, null);
-
-    const reg_locks = self.register_manager.lockRegsAssumeUnused(2, .{ .rax, .rcx });
-    defer for (reg_locks) |lock| {
-        self.register_manager.unlockReg(lock);
-    };
-
     const ssbase_lock: ?RegisterLock = if (opts.source_stack_base) |reg|
         self.register_manager.lockReg(reg)
     else
@@ -5825,11 +5951,18 @@ fn genInlineMemcpy(
         null;
     defer if (dsbase_lock) |lock| self.register_manager.unlockReg(lock);
 
-    const dst_addr_reg = try self.register_manager.allocReg(null, gp);
+    const regs = try self.register_manager.allocRegs(5, .{ null, null, null, null, null }, gp);
+    const dst_addr_reg = regs[0];
+    const src_addr_reg = regs[1];
+    const index_reg = regs[2].to64();
+    const count_reg = regs[3].to64();
+    const tmp_reg = regs[4].to8();
+
     switch (dst_ptr) {
         .memory,
         .got_load,
         .direct_load,
+        .imports_load,
         => {
             try self.loadMemPtrIntoRegister(dst_addr_reg, Type.usize, dst_ptr);
         },
@@ -5857,14 +5990,12 @@ fn genInlineMemcpy(
             return self.fail("TODO implement memcpy for setting stack when dest is {}", .{dst_ptr});
         },
     }
-    const dst_addr_reg_lock = self.register_manager.lockRegAssumeUnused(dst_addr_reg);
-    defer self.register_manager.unlockReg(dst_addr_reg_lock);
 
-    const src_addr_reg = try self.register_manager.allocReg(null, gp);
     switch (src_ptr) {
         .memory,
         .got_load,
         .direct_load,
+        .imports_load,
         => {
             try self.loadMemPtrIntoRegister(src_addr_reg, Type.usize, src_ptr);
         },
@@ -5892,26 +6023,13 @@ fn genInlineMemcpy(
             return self.fail("TODO implement memcpy for setting stack when src is {}", .{src_ptr});
         },
     }
-    const src_addr_reg_lock = self.register_manager.lockRegAssumeUnused(src_addr_reg);
-    defer self.register_manager.unlockReg(src_addr_reg_lock);
-
-    const regs = try self.register_manager.allocRegs(2, .{ null, null }, gp);
-    const count_reg = regs[0].to64();
-    const tmp_reg = regs[1].to8();
 
     try self.genSetReg(Type.usize, count_reg, len);
 
-    // mov rcx, 0
+    // mov index_reg, 0
     _ = try self.addInst(.{
         .tag = .mov,
-        .ops = Mir.Inst.Ops.encode(.{ .reg1 = .rcx }),
-        .data = .{ .imm = 0 },
-    });
-
-    // mov rax, 0
-    _ = try self.addInst(.{
-        .tag = .mov,
-        .ops = Mir.Inst.Ops.encode(.{ .reg1 = .rax }),
+        .ops = Mir.Inst.Ops.encode(.{ .reg1 = index_reg }),
         .data = .{ .imm = 0 },
     });
 
@@ -5933,37 +6051,30 @@ fn genInlineMemcpy(
         } },
     });
 
-    // mov tmp, [addr + rcx]
+    // mov tmp, [addr + index_reg]
     _ = try self.addInst(.{
         .tag = .mov_scale_src,
         .ops = Mir.Inst.Ops.encode(.{
             .reg1 = tmp_reg.to8(),
             .reg2 = src_addr_reg,
         }),
-        .data = .{ .imm = 0 },
+        .data = .{ .payload = try self.addExtra(Mir.IndexRegisterDisp.encode(index_reg, 0)) },
     });
 
-    // mov [stack_offset + rax], tmp
+    // mov [stack_offset + index_reg], tmp
     _ = try self.addInst(.{
         .tag = .mov_scale_dst,
         .ops = Mir.Inst.Ops.encode(.{
             .reg1 = dst_addr_reg,
             .reg2 = tmp_reg.to8(),
         }),
-        .data = .{ .imm = 0 },
+        .data = .{ .payload = try self.addExtra(Mir.IndexRegisterDisp.encode(index_reg, 0)) },
     });
 
-    // add rcx, 1
+    // add index_reg, 1
     _ = try self.addInst(.{
         .tag = .add,
-        .ops = Mir.Inst.Ops.encode(.{ .reg1 = .rcx }),
-        .data = .{ .imm = 1 },
-    });
-
-    // add rax, 1
-    _ = try self.addInst(.{
-        .tag = .add,
-        .ops = Mir.Inst.Ops.encode(.{ .reg1 = .rax }),
+        .ops = Mir.Inst.Ops.encode(.{ .reg1 = index_reg }),
         .data = .{ .imm = 1 },
     });
 
@@ -5985,7 +6096,6 @@ fn genInlineMemcpy(
     try self.performReloc(loop_reloc);
 }
 
-/// Spills .rax register.
 fn genInlineMemset(
     self: *Self,
     dst_ptr: MCValue,
@@ -5993,16 +6103,27 @@ fn genInlineMemset(
     len: MCValue,
     opts: InlineMemcpyOpts,
 ) InnerError!void {
-    // TODO preserve contents of .rax and then restore
-    try self.register_manager.getReg(.rax, null);
-    const rax_lock = self.register_manager.lockRegAssumeUnused(.rax);
-    defer self.register_manager.unlockReg(rax_lock);
+    const ssbase_lock: ?RegisterLock = if (opts.source_stack_base) |reg|
+        self.register_manager.lockReg(reg)
+    else
+        null;
+    defer if (ssbase_lock) |reg| self.register_manager.unlockReg(reg);
+
+    const dsbase_lock: ?RegisterLock = if (opts.dest_stack_base) |reg|
+        self.register_manager.lockReg(reg)
+    else
+        null;
+    defer if (dsbase_lock) |lock| self.register_manager.unlockReg(lock);
+
+    const regs = try self.register_manager.allocRegs(2, .{ null, null }, gp);
+    const addr_reg = regs[0];
+    const index_reg = regs[1].to64();
 
-    const addr_reg = try self.register_manager.allocReg(null, gp);
     switch (dst_ptr) {
         .memory,
         .got_load,
         .direct_load,
+        .imports_load,
         => {
             try self.loadMemPtrIntoRegister(addr_reg, Type.usize, dst_ptr);
         },
@@ -6030,17 +6151,15 @@ fn genInlineMemset(
             return self.fail("TODO implement memcpy for setting stack when dest is {}", .{dst_ptr});
         },
     }
-    const addr_reg_lock = self.register_manager.lockRegAssumeUnused(addr_reg);
-    defer self.register_manager.unlockReg(addr_reg_lock);
 
-    try self.genSetReg(Type.usize, .rax, len);
-    try self.genBinOpMir(.sub, Type.usize, .{ .register = .rax }, .{ .immediate = 1 });
+    try self.genSetReg(Type.usize, index_reg, len);
+    try self.genBinOpMir(.sub, Type.usize, .{ .register = index_reg }, .{ .immediate = 1 });
 
     // loop:
-    // cmp rax, -1
+    // cmp index_reg, -1
     const loop_start = try self.addInst(.{
         .tag = .cmp,
-        .ops = Mir.Inst.Ops.encode(.{ .reg1 = .rax }),
+        .ops = Mir.Inst.Ops.encode(.{ .reg1 = index_reg }),
         .data = .{ .imm = @bitCast(u32, @as(i32, -1)) },
     });
 
@@ -6059,24 +6178,20 @@ fn genInlineMemset(
             if (x > math.maxInt(i32)) {
                 return self.fail("TODO inline memset for value immediate larger than 32bits", .{});
             }
-            // mov byte ptr [rbp + rax + stack_offset], imm
-            const payload = try self.addExtra(Mir.ImmPair{
-                .dest_off = 0,
-                .operand = @truncate(u32, x),
-            });
+            // mov byte ptr [rbp + index_reg + stack_offset], imm
             _ = try self.addInst(.{
                 .tag = .mov_mem_index_imm,
                 .ops = Mir.Inst.Ops.encode(.{ .reg1 = addr_reg }),
-                .data = .{ .payload = payload },
+                .data = .{ .payload = try self.addExtra(Mir.IndexRegisterDispImm.encode(index_reg, 0, @truncate(u32, x))) },
             });
         },
         else => return self.fail("TODO inline memset for value of type {}", .{value}),
     }
 
-    // sub rax, 1
+    // sub index_reg, 1
     _ = try self.addInst(.{
         .tag = .sub,
-        .ops = Mir.Inst.Ops.encode(.{ .reg1 = .rax }),
+        .ops = Mir.Inst.Ops.encode(.{ .reg1 = index_reg }),
         .data = .{ .imm = 1 },
     });
 
@@ -6243,6 +6358,7 @@ fn genSetReg(self: *Self, ty: Type, reg: Register, mcv: MCValue) InnerError!void
         },
         .direct_load,
         .got_load,
+        .imports_load,
         => {
             switch (ty.zigTypeTag()) {
                 .Float => {
@@ -6637,7 +6753,11 @@ fn airMemcpy(self: *Self, inst: Air.Inst.Index) !void {
     // TODO Is this the only condition for pointer dereference for memcpy?
     const src: MCValue = blk: {
         switch (src_ptr) {
-            .got_load, .direct_load, .memory => {
+            .got_load,
+            .direct_load,
+            .imports_load,
+            .memory,
+            => {
                 const reg = try self.register_manager.allocReg(null, gp);
                 try self.loadMemPtrIntoRegister(reg, src_ty, src_ptr);
                 _ = try self.addInst(.{
@@ -6901,7 +7021,7 @@ fn lowerUnnamedConst(self: *Self, tv: TypedValue) InnerError!MCValue {
     } else if (self.bin_file.cast(link.File.MachO)) |_| {
         return MCValue{ .direct_load = local_sym_index };
     } else if (self.bin_file.cast(link.File.Coff)) |_| {
-        return self.fail("TODO lower unnamed const in COFF", .{});
+        return MCValue{ .direct_load = local_sym_index };
     } else if (self.bin_file.cast(link.File.Plan9)) |_| {
         return self.fail("TODO lower unnamed const in Plan9", .{});
     } else {
@@ -7066,7 +7186,82 @@ fn resolveCallingConventionValues(self: *Self, fn_ty: Type) !CallMCValues {
             result.stack_align = 1;
             return result;
         },
-        .Unspecified, .C => {
+        .C => {
+            // Return values
+            if (ret_ty.zigTypeTag() == .NoReturn) {
+                result.return_value = .{ .unreach = {} };
+            } else if (!ret_ty.hasRuntimeBitsIgnoreComptime() and !ret_ty.isError()) {
+                // TODO: is this even possible for C calling convention?
+                result.return_value = .{ .none = {} };
+            } else {
+                const ret_ty_size = @intCast(u32, ret_ty.abiSize(self.target.*));
+                if (ret_ty_size == 0) {
+                    assert(ret_ty.isError());
+                    result.return_value = .{ .immediate = 0 };
+                } else if (ret_ty_size <= 8) {
+                    const aliased_reg = registerAlias(abi.getCAbiIntReturnRegs(self.target.*)[0], ret_ty_size);
+                    result.return_value = .{ .register = aliased_reg };
+                } else {
+                    // TODO: return argument cell should go first
+                    result.return_value = .{ .stack_offset = 0 };
+                }
+            }
+
+            // Input params
+            var next_stack_offset: u32 = switch (result.return_value) {
+                .stack_offset => |off| @intCast(u32, off),
+                else => 0,
+            };
+
+            for (param_types) |ty, i| {
+                assert(ty.hasRuntimeBits());
+
+                const classes: []const abi.Class = switch (self.target.os.tag) {
+                    .windows => &[1]abi.Class{abi.classifyWindows(ty, self.target.*)},
+                    else => mem.sliceTo(&abi.classifySystemV(ty, self.target.*), .none),
+                };
+                if (classes.len > 1) {
+                    return self.fail("TODO handle multiple classes per type", .{});
+                }
+                switch (classes[0]) {
+                    .integer => blk: {
+                        if (i >= abi.getCAbiIntParamRegs(self.target.*).len) break :blk; // fallthrough
+                        result.args[i] = .{ .register = abi.getCAbiIntParamRegs(self.target.*)[i] };
+                        continue;
+                    },
+                    .memory => {}, // fallthrough
+                    else => |class| return self.fail("TODO handle calling convention class {s}", .{
+                        @tagName(class),
+                    }),
+                }
+
+                const param_size = @intCast(u32, ty.abiSize(self.target.*));
+                const param_align = @intCast(u32, ty.abiAlignment(self.target.*));
+                const offset = mem.alignForwardGeneric(u32, next_stack_offset + param_size, param_align);
+                result.args[i] = .{ .stack_offset = @intCast(i32, offset) };
+                next_stack_offset = offset;
+            }
+
+            // Align the stack to 16bytes before allocating shadow stack space (if any).
+            const aligned_next_stack_offset = mem.alignForwardGeneric(u32, next_stack_offset, 16);
+            const padding = aligned_next_stack_offset - next_stack_offset;
+            if (padding > 0) {
+                for (result.args) |*arg| {
+                    if (arg.isRegister()) continue;
+                    arg.stack_offset += @intCast(i32, padding);
+                }
+            }
+
+            const shadow_stack_space: u32 = switch (self.target.os.tag) {
+                .windows => @intCast(u32, 4 * @sizeOf(u64)),
+                else => 0,
+            };
+
+            // alignment padding | args ... | shadow stack space (if any) | ret addr | $rbp |
+            result.stack_byte_count = aligned_next_stack_offset + shadow_stack_space;
+            result.stack_align = 16;
+        },
+        .Unspecified => {
             // Return values
             if (ret_ty.zigTypeTag() == .NoReturn) {
                 result.return_value = .{ .unreach = {} };
@@ -7078,84 +7273,32 @@ fn resolveCallingConventionValues(self: *Self, fn_ty: Type) !CallMCValues {
                     assert(ret_ty.isError());
                     result.return_value = .{ .immediate = 0 };
                 } else if (ret_ty_size <= 8) {
-                    const aliased_reg = registerAlias(c_abi_int_return_regs[0], ret_ty_size);
+                    const aliased_reg = registerAlias(abi.getCAbiIntReturnRegs(self.target.*)[0], ret_ty_size);
                     result.return_value = .{ .register = aliased_reg };
                 } else {
                     // We simply make the return MCValue a stack offset. However, the actual value
                     // for the offset will be populated later. We will also push the stack offset
-                    // value into .rdi register when we resolve the offset.
+                    // value into an appropriate register when we resolve the offset.
                     result.return_value = .{ .stack_offset = 0 };
                 }
             }
 
             // Input params
-            // First, split into args that can be passed via registers.
-            // This will make it easier to then push the rest of args in reverse
-            // order on the stack.
-            var next_int_reg: usize = 0;
-            var by_reg = std.AutoHashMap(usize, usize).init(self.bin_file.allocator);
-            defer by_reg.deinit();
-
-            // If we want debug output, we store all args on stack for better liveness of args
-            // in debugging contexts such as previewing the args in the debugger anywhere in
-            // the procedure. Passing the args via registers can lead to reusing the register
-            // for local ops thus clobbering the input arg forever.
-            // This of course excludes C ABI calls.
-            const omit_args_in_registers = blk: {
-                if (cc == .C) break :blk false;
-                switch (self.bin_file.options.optimize_mode) {
-                    .Debug => break :blk true,
-                    else => break :blk false,
-                }
-            };
-            if (!omit_args_in_registers) {
-                for (param_types) |ty, i| {
-                    if (!ty.hasRuntimeBits()) continue;
-                    const param_size = @intCast(u32, ty.abiSize(self.target.*));
-                    // For simplicity of codegen, slices and other types are always pushed onto the stack.
-                    // TODO: look into optimizing this by passing things as registers sometimes,
-                    // such as ptr and len of slices as separate registers.
-                    // TODO: also we need to honor the C ABI for relevant types rather than passing on
-                    // the stack here.
-                    const pass_in_reg = switch (ty.zigTypeTag()) {
-                        .Bool => true,
-                        .Int, .Enum => param_size <= 8,
-                        .Pointer => ty.ptrSize() != .Slice,
-                        .Optional => ty.isPtrLikeOptional(),
-                        else => false,
-                    };
-                    if (pass_in_reg) {
-                        if (next_int_reg >= c_abi_int_param_regs.len) break;
-                        try by_reg.putNoClobber(i, next_int_reg);
-                        next_int_reg += 1;
-                    }
-                }
-            }
-
             var next_stack_offset: u32 = switch (result.return_value) {
                 .stack_offset => |off| @intCast(u32, off),
                 else => 0,
             };
-            var count: usize = param_types.len;
-            while (count > 0) : (count -= 1) {
-                const i = count - 1;
-                const ty = param_types[i];
+
+            for (param_types) |ty, i| {
                 if (!ty.hasRuntimeBits()) {
-                    assert(cc != .C);
                     result.args[i] = .{ .none = {} };
                     continue;
                 }
                 const param_size = @intCast(u32, ty.abiSize(self.target.*));
                 const param_align = @intCast(u32, ty.abiAlignment(self.target.*));
-                if (by_reg.get(i)) |int_reg| {
-                    const aliased_reg = registerAlias(c_abi_int_param_regs[int_reg], param_size);
-                    result.args[i] = .{ .register = aliased_reg };
-                    next_int_reg += 1;
-                } else {
-                    const offset = mem.alignForwardGeneric(u32, next_stack_offset + param_size, param_align);
-                    result.args[i] = .{ .stack_offset = @intCast(i32, offset) };
-                    next_stack_offset = offset;
-                }
+                const offset = mem.alignForwardGeneric(u32, next_stack_offset + param_size, param_align);
+                result.args[i] = .{ .stack_offset = @intCast(i32, offset) };
+                next_stack_offset = offset;
             }
 
             result.stack_align = 16;
diff --git a/src/arch/x86_64/Emit.zig b/src/arch/x86_64/Emit.zig
index 12f3e9118f..45e58be972 100644
--- a/src/arch/x86_64/Emit.zig
+++ b/src/arch/x86_64/Emit.zig
@@ -283,10 +283,11 @@ fn mirPushPopRegisterList(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerErro
     const ops = emit.mir.instructions.items(.ops)[inst].decode();
     const payload = emit.mir.instructions.items(.data)[inst].payload;
     const save_reg_list = emit.mir.extraData(Mir.SaveRegisterList, payload).data;
-    const reg_list = Mir.RegisterList(Register, &abi.callee_preserved_regs).fromInt(save_reg_list.register_list);
     var disp: i32 = -@intCast(i32, save_reg_list.stack_end);
-    inline for (abi.callee_preserved_regs) |reg| {
-        if (reg_list.isSet(reg)) {
+    const reg_list = Mir.RegisterList.fromInt(save_reg_list.register_list);
+    const callee_preserved_regs = abi.getCalleePreservedRegs(emit.target.*);
+    for (callee_preserved_regs) |reg| {
+        if (reg_list.isSet(callee_preserved_regs, reg)) {
             switch (tag) {
                 .push => try lowerToMrEnc(.mov, RegisterOrMemory.mem(.qword_ptr, .{
                     .disp = @bitCast(u32, disp),
@@ -614,14 +615,15 @@ inline fn immOpSize(u_imm: u32) u6 {
 fn mirArithScaleSrc(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!void {
     const ops = emit.mir.instructions.items(.ops)[inst].decode();
     const scale = ops.flags;
-    const imm = emit.mir.instructions.items(.data)[inst].imm;
-    // OP reg1, [reg2 + scale*rcx + imm32]
+    const payload = emit.mir.instructions.items(.data)[inst].payload;
+    const index_reg_disp = emit.mir.extraData(Mir.IndexRegisterDisp, payload).data.decode();
+    // OP reg1, [reg2 + scale*index + imm32]
     const scale_index = ScaleIndex{
         .scale = scale,
-        .index = .rcx,
+        .index = index_reg_disp.index,
     };
     return lowerToRmEnc(tag, ops.reg1, RegisterOrMemory.mem(Memory.PtrSize.new(ops.reg1.size()), .{
-        .disp = imm,
+        .disp = index_reg_disp.disp,
         .base = ops.reg2,
         .scale_index = scale_index,
     }), emit.code);
@@ -630,22 +632,16 @@ fn mirArithScaleSrc(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!void
 fn mirArithScaleDst(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!void {
     const ops = emit.mir.instructions.items(.ops)[inst].decode();
     const scale = ops.flags;
-    const imm = emit.mir.instructions.items(.data)[inst].imm;
+    const payload = emit.mir.instructions.items(.data)[inst].payload;
+    const index_reg_disp = emit.mir.extraData(Mir.IndexRegisterDisp, payload).data.decode();
     const scale_index = ScaleIndex{
         .scale = scale,
-        .index = .rax,
+        .index = index_reg_disp.index,
     };
-    if (ops.reg2 == .none) {
-        // OP qword ptr [reg1 + scale*rax + 0], imm32
-        return lowerToMiEnc(tag, RegisterOrMemory.mem(.qword_ptr, .{
-            .disp = 0,
-            .base = ops.reg1,
-            .scale_index = scale_index,
-        }), imm, emit.code);
-    }
-    // OP [reg1 + scale*rax + imm32], reg2
+    assert(ops.reg2 != .none);
+    // OP [reg1 + scale*index + imm32], reg2
     return lowerToMrEnc(tag, RegisterOrMemory.mem(Memory.PtrSize.new(ops.reg2.size()), .{
-        .disp = imm,
+        .disp = index_reg_disp.disp,
         .base = ops.reg1,
         .scale_index = scale_index,
     }), ops.reg2, emit.code);
@@ -655,24 +651,24 @@ fn mirArithScaleImm(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!void
     const ops = emit.mir.instructions.items(.ops)[inst].decode();
     const scale = ops.flags;
     const payload = emit.mir.instructions.items(.data)[inst].payload;
-    const imm_pair = emit.mir.extraData(Mir.ImmPair, payload).data;
+    const index_reg_disp_imm = emit.mir.extraData(Mir.IndexRegisterDispImm, payload).data.decode();
     const scale_index = ScaleIndex{
         .scale = scale,
-        .index = .rax,
+        .index = index_reg_disp_imm.index,
     };
-    // OP qword ptr [reg1 + scale*rax + imm32], imm32
+    // OP qword ptr [reg1 + scale*index + imm32], imm32
     return lowerToMiEnc(tag, RegisterOrMemory.mem(.qword_ptr, .{
-        .disp = imm_pair.dest_off,
+        .disp = index_reg_disp_imm.disp,
         .base = ops.reg1,
         .scale_index = scale_index,
-    }), imm_pair.operand, emit.code);
+    }), index_reg_disp_imm.imm, emit.code);
 }
 
 fn mirArithMemIndexImm(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!void {
     const ops = emit.mir.instructions.items(.ops)[inst].decode();
     assert(ops.reg2 == .none);
     const payload = emit.mir.instructions.items(.data)[inst].payload;
-    const imm_pair = emit.mir.extraData(Mir.ImmPair, payload).data;
+    const index_reg_disp_imm = emit.mir.extraData(Mir.IndexRegisterDispImm, payload).data.decode();
     const ptr_size: Memory.PtrSize = switch (ops.flags) {
         0b00 => .byte_ptr,
         0b01 => .word_ptr,
@@ -681,14 +677,14 @@ fn mirArithMemIndexImm(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!v
     };
     const scale_index = ScaleIndex{
         .scale = 0,
-        .index = .rax,
+        .index = index_reg_disp_imm.index,
     };
-    // OP ptr [reg1 + rax*1 + imm32], imm32
+    // OP ptr [reg1 + index + imm32], imm32
     return lowerToMiEnc(tag, RegisterOrMemory.mem(ptr_size, .{
-        .disp = imm_pair.dest_off,
+        .disp = index_reg_disp_imm.disp,
         .base = ops.reg1,
         .scale_index = scale_index,
-    }), imm_pair.operand, emit.code);
+    }), index_reg_disp_imm.imm, emit.code);
 }
 
 fn mirMovSignExtend(emit: *Emit, inst: Mir.Inst.Index) InnerError!void {
@@ -956,18 +952,19 @@ fn mirLea(emit: *Emit, inst: Mir.Inst.Index) InnerError!void {
             mem.writeIntLittle(i32, emit.code.items[end_offset - 4 ..][0..4], disp);
         },
         0b10 => {
-            // lea reg, [rbp + rcx + imm32]
-            const imm = emit.mir.instructions.items(.data)[inst].imm;
+            // lea reg, [rbp + index + imm32]
+            const payload = emit.mir.instructions.items(.data)[inst].payload;
+            const index_reg_disp = emit.mir.extraData(Mir.IndexRegisterDisp, payload).data.decode();
             const src_reg: ?Register = if (ops.reg2 != .none) ops.reg2 else null;
             const scale_index = ScaleIndex{
                 .scale = 0,
-                .index = .rcx,
+                .index = index_reg_disp.index,
             };
             return lowerToRmEnc(
                 .lea,
                 ops.reg1,
                 RegisterOrMemory.mem(Memory.PtrSize.new(ops.reg1.size()), .{
-                    .disp = imm,
+                    .disp = index_reg_disp.disp,
                     .base = src_reg,
                     .scale_index = scale_index,
                 }),
@@ -985,8 +982,8 @@ fn mirLeaPic(emit: *Emit, inst: Mir.Inst.Index) InnerError!void {
     const relocation = emit.mir.instructions.items(.data)[inst].relocation;
 
     switch (ops.flags) {
-        0b00, 0b01 => {},
-        else => return emit.fail("TODO unused LEA PIC variants 0b10 and 0b11", .{}),
+        0b00, 0b01, 0b10 => {},
+        else => return emit.fail("TODO unused LEA PIC variant 0b11", .{}),
     }
 
     // lea reg1, [rip + reloc]
@@ -1024,6 +1021,7 @@ fn mirLeaPic(emit: *Emit, inst: Mir.Inst.Index) InnerError!void {
             .@"type" = switch (ops.flags) {
                 0b00 => .got,
                 0b01 => .direct,
+                0b10 => .imports,
                 else => unreachable,
             },
             .target = .{ .sym_index = relocation.sym_index, .file = null },
@@ -1031,7 +1029,6 @@ fn mirLeaPic(emit: *Emit, inst: Mir.Inst.Index) InnerError!void {
             .addend = 0,
             .pcrel = true,
             .length = 2,
-            .prev_vaddr = atom.getSymbol(coff_file).value,
         });
     } else {
         return emit.fail("TODO implement lea reg, [rip + reloc] for linking backends different than MachO", .{});
@@ -1157,6 +1154,17 @@ fn mirCallExtern(emit: *Emit, inst: Mir.Inst.Index) InnerError!void {
             .length = 2,
             .@"type" = @enumToInt(std.macho.reloc_type_x86_64.X86_64_RELOC_BRANCH),
         });
+    } else if (emit.bin_file.cast(link.File.Coff)) |coff_file| {
+        // Add relocation to the decl.
+        const atom = coff_file.atom_by_index_table.get(relocation.atom_index).?;
+        try atom.addRelocation(coff_file, .{
+            .@"type" = .direct,
+            .target = .{ .sym_index = relocation.sym_index, .file = null },
+            .offset = offset,
+            .addend = 0,
+            .pcrel = true,
+            .length = 2,
+        });
     } else {
         return emit.fail("TODO implement call_extern for linking backends different than MachO", .{});
     }
@@ -2241,6 +2249,7 @@ fn lowerToMxEnc(tag: Tag, reg_or_mem: RegisterOrMemory, enc: Encoding, code: *st
                 encoder.rex(.{
                     .w = wide,
                     .b = base.isExtended(),
+                    .x = if (mem_op.scale_index) |si| si.index.isExtended() else false,
                 });
             }
             opc.encode(encoder);
@@ -2346,10 +2355,12 @@ fn lowerToMiXEnc(
                 encoder.rex(.{
                     .w = dst_mem.ptr_size == .qword_ptr,
                     .b = base.isExtended(),
+                    .x = if (dst_mem.scale_index) |si| si.index.isExtended() else false,
                 });
             } else {
                 encoder.rex(.{
                     .w = dst_mem.ptr_size == .qword_ptr,
+                    .x = if (dst_mem.scale_index) |si| si.index.isExtended() else false,
                 });
             }
             opc.encode(encoder);
@@ -2401,11 +2412,13 @@ fn lowerToRmEnc(
                     .w = setRexWRegister(reg),
                     .r = reg.isExtended(),
                     .b = base.isExtended(),
+                    .x = if (src_mem.scale_index) |si| si.index.isExtended() else false,
                 });
             } else {
                 encoder.rex(.{
                     .w = setRexWRegister(reg),
                     .r = reg.isExtended(),
+                    .x = if (src_mem.scale_index) |si| si.index.isExtended() else false,
                 });
             }
             opc.encode(encoder);
@@ -2446,11 +2459,13 @@ fn lowerToMrEnc(
                     .w = dst_mem.ptr_size == .qword_ptr or setRexWRegister(reg),
                     .r = reg.isExtended(),
                     .b = base.isExtended(),
+                    .x = if (dst_mem.scale_index) |si| si.index.isExtended() else false,
                 });
             } else {
                 encoder.rex(.{
                     .w = dst_mem.ptr_size == .qword_ptr or setRexWRegister(reg),
                     .r = reg.isExtended(),
+                    .x = if (dst_mem.scale_index) |si| si.index.isExtended() else false,
                 });
             }
             opc.encode(encoder);
@@ -2490,11 +2505,13 @@ fn lowerToRmiEnc(
                     .w = setRexWRegister(reg),
                     .r = reg.isExtended(),
                     .b = base.isExtended(),
+                    .x = if (src_mem.scale_index) |si| si.index.isExtended() else false,
                 });
             } else {
                 encoder.rex(.{
                     .w = setRexWRegister(reg),
                     .r = reg.isExtended(),
+                    .x = if (src_mem.scale_index) |si| si.index.isExtended() else false,
                 });
             }
             opc.encode(encoder);
@@ -2531,10 +2548,12 @@ fn lowerToVmEnc(
                 vex.rex(.{
                     .r = reg.isExtended(),
                     .b = base.isExtended(),
+                    .x = if (src_mem.scale_index) |si| si.index.isExtended() else false,
                 });
             } else {
                 vex.rex(.{
                     .r = reg.isExtended(),
+                    .x = if (src_mem.scale_index) |si| si.index.isExtended() else false,
                 });
             }
             encoder.vex(enc.prefix);
@@ -2571,10 +2590,12 @@ fn lowerToMvEnc(
                 vex.rex(.{
                     .r = reg.isExtended(),
                     .b = base.isExtended(),
+                    .x = if (dst_mem.scale_index) |si| si.index.isExtended() else false,
                 });
             } else {
                 vex.rex(.{
                     .r = reg.isExtended(),
+                    .x = if (dst_mem.scale_index) |si| si.index.isExtended() else false,
                 });
             }
             encoder.vex(enc.prefix);
diff --git a/src/arch/x86_64/Mir.zig b/src/arch/x86_64/Mir.zig
index 71aecc5e85..182f3267a6 100644
--- a/src/arch/x86_64/Mir.zig
+++ b/src/arch/x86_64/Mir.zig
@@ -44,25 +44,28 @@ pub const Inst = struct {
         ///       0b01 word ptr [reg1 + imm32], imm16
         ///       0b10 dword ptr [reg1 + imm32], imm32
         ///       0b11 qword ptr [reg1 + imm32], imm32 (sign-extended to imm64)
+        /// Notes:
+        ///  * Uses `ImmPair` as payload
         adc_mem_imm,
 
-        /// form: reg1, [reg2 + scale*rcx + imm32]
-        /// ops flags  scale
-        ///      0b00      1
-        ///      0b01      2
-        ///      0b10      4
-        ///      0b11      8
-        adc_scale_src,
-
-        /// form: [reg1 + scale*rax + imm32], reg2
-        /// form: [reg1 + scale*rax + 0], imm32
+        /// form: reg1, [reg2 + scale*index + imm32]
         /// ops flags  scale
         ///      0b00      1
         ///      0b01      2
         ///      0b10      4
         ///      0b11      8
         /// Notes:
-        ///  * If reg2 is `none` then it means Data field `imm` is used as the immediate.
+        ///  * Uses `IndexRegisterDisp` as payload
+        adc_scale_src,
+
+        /// form: [reg1 + scale*index + imm32], reg2
+        /// ops flags  scale
+        ///      0b00      1
+        ///      0b01      2
+        ///      0b10      4
+        ///      0b11      8
+        /// Notes:
+        ///  * Uses `IndexRegisterDisp` payload.
         adc_scale_dst,
 
         /// form: [reg1 + scale*rax + imm32], imm32
@@ -72,14 +75,16 @@ pub const Inst = struct {
         ///      0b10      4
         ///      0b11      8
         /// Notes:
-        ///  * Data field `payload` points at `ImmPair`.
+        ///  * Uses `IndexRegisterDispImm` payload.
         adc_scale_imm,
 
         /// ops flags: form:
-        ///       0b00 byte ptr [reg1 + rax + imm32], imm8
-        ///       0b01 word ptr [reg1 + rax + imm32], imm16
-        ///       0b10 dword ptr [reg1 + rax + imm32], imm32
-        ///       0b11 qword ptr [reg1 + rax + imm32], imm32 (sign-extended to imm64)
+        ///       0b00 byte ptr [reg1 + index + imm32], imm8
+        ///       0b01 word ptr [reg1 + index + imm32], imm16
+        ///       0b10 dword ptr [reg1 + index + imm32], imm32
+        ///       0b11 qword ptr [reg1 + index + imm32], imm32 (sign-extended to imm64)
+        /// Notes:
+        ///  * Uses `IndexRegisterDispImm` payload.
         adc_mem_index_imm,
 
         // The following instructions all have the same encoding as `adc`.
@@ -174,12 +179,15 @@ pub const Inst = struct {
         ///      0b00  reg1, [reg2 + imm32]
         ///      0b00  reg1, [ds:imm32]
         ///      0b01  reg1, [rip + imm32]
-        ///      0b10  reg1, [reg2 + rcx + imm32]
+        ///      0b10  reg1, [reg2 + index + imm32]
+        /// Notes:
+        ///  * 0b10 uses `IndexRegisterDisp` payload
         lea,
 
         /// ops flags: form:
         ///      0b00  reg1, [rip + reloc] // via GOT PIC
         ///      0b01  reg1, [rip + reloc] // direct load PIC
+        ///      0b10  reg1, [rip + reloc] // via imports table PIC
         /// Notes:
         /// * `Data` contains `relocation`
         lea_pic,
@@ -460,46 +468,103 @@ pub const Inst = struct {
     }
 };
 
-pub fn RegisterList(comptime Reg: type, comptime registers: []const Reg) type {
-    assert(registers.len <= @bitSizeOf(u32));
-    return struct {
-        bitset: RegBitSet = RegBitSet.initEmpty(),
+pub const IndexRegisterDisp = struct {
+    /// Index register to use with SIB-based encoding
+    index: u32,
 
-        const RegBitSet = IntegerBitSet(registers.len);
-        const Self = @This();
+    /// Displacement value
+    disp: u32,
 
-        fn getIndexForReg(reg: Reg) RegBitSet.MaskInt {
-            inline for (registers) |cpreg, i| {
-                if (reg.id() == cpreg.id()) return i;
-            }
-            unreachable; // register not in input register list!
+    pub fn encode(index: Register, disp: u32) IndexRegisterDisp {
+        return .{
+            .index = @enumToInt(index),
+            .disp = disp,
+        };
+    }
+
+    pub fn decode(this: IndexRegisterDisp) struct {
+        index: Register,
+        disp: u32,
+    } {
+        return .{
+            .index = @intToEnum(Register, this.index),
+            .disp = this.disp,
+        };
+    }
+};
+
+/// TODO: would it be worth making `IndexRegisterDisp` and `IndexRegisterDispImm` a variable length list
+/// instead of having two structs, one a superset of the other one?
+pub const IndexRegisterDispImm = struct {
+    /// Index register to use with SIB-based encoding
+    index: u32,
+
+    /// Displacement value
+    disp: u32,
+
+    /// Immediate
+    imm: u32,
+
+    pub fn encode(index: Register, disp: u32, imm: u32) IndexRegisterDispImm {
+        return .{
+            .index = @enumToInt(index),
+            .disp = disp,
+            .imm = imm,
+        };
+    }
+
+    pub fn decode(this: IndexRegisterDispImm) struct {
+        index: Register,
+        disp: u32,
+        imm: u32,
+    } {
+        return .{
+            .index = @intToEnum(Register, this.index),
+            .disp = this.disp,
+            .imm = this.imm,
+        };
+    }
+};
+
+/// Used in conjunction with `SaveRegisterList` payload to transfer a list of used registers
+/// in a compact manner.
+pub const RegisterList = struct {
+    bitset: BitSet = BitSet.initEmpty(),
+
+    const BitSet = IntegerBitSet(@ctz(@as(u32, 0)));
+    const Self = @This();
+
+    fn getIndexForReg(registers: []const Register, reg: Register) BitSet.MaskInt {
+        for (registers) |cpreg, i| {
+            if (reg.id() == cpreg.id()) return @intCast(u32, i);
         }
+        unreachable; // register not in input register list!
+    }
 
-        pub fn push(self: *Self, reg: Reg) void {
-            const index = getIndexForReg(reg);
-            self.bitset.set(index);
-        }
+    pub fn push(self: *Self, registers: []const Register, reg: Register) void {
+        const index = getIndexForReg(registers, reg);
+        self.bitset.set(index);
+    }
 
-        pub fn isSet(self: Self, reg: Reg) bool {
-            const index = getIndexForReg(reg);
-            return self.bitset.isSet(index);
-        }
+    pub fn isSet(self: Self, registers: []const Register, reg: Register) bool {
+        const index = getIndexForReg(registers, reg);
+        return self.bitset.isSet(index);
+    }
 
-        pub fn asInt(self: Self) u32 {
-            return self.bitset.mask;
-        }
+    pub fn asInt(self: Self) u32 {
+        return self.bitset.mask;
+    }
 
-        pub fn fromInt(mask: u32) Self {
-            return .{
-                .bitset = RegBitSet{ .mask = @intCast(RegBitSet.MaskInt, mask) },
-            };
-        }
+    pub fn fromInt(mask: u32) Self {
+        return .{
+            .bitset = BitSet{ .mask = @intCast(BitSet.MaskInt, mask) },
+        };
+    }
 
-        pub fn count(self: Self) u32 {
-            return @intCast(u32, self.bitset.count());
-        }
-    };
-}
+    pub fn count(self: Self) u32 {
+        return @intCast(u32, self.bitset.count());
+    }
+};
 
 pub const SaveRegisterList = struct {
     /// Use `RegisterList` to populate.
diff --git a/src/arch/x86_64/abi.zig b/src/arch/x86_64/abi.zig
index 344fe235f3..298fc6656f 100644
--- a/src/arch/x86_64/abi.zig
+++ b/src/arch/x86_64/abi.zig
@@ -392,23 +392,69 @@ pub fn classifySystemV(ty: Type, target: Target) [8]Class {
     }
 }
 
-/// Note that .rsp and .rbp also belong to this set, however, we never expect to use them
-/// for anything else but stack offset tracking therefore we exclude them from this set.
-pub const callee_preserved_regs = [_]Register{ .rbx, .r12, .r13, .r14, .r15 };
-/// These registers need to be preserved (saved on the stack) and restored by the caller before
-/// the caller relinquishes control to a subroutine via call instruction (or similar).
-/// In other words, these registers are free to use by the callee.
-pub const caller_preserved_regs = [_]Register{ .rax, .rcx, .rdx, .rsi, .rdi, .r8, .r9, .r10, .r11 };
+pub const SysV = struct {
+    /// Note that .rsp and .rbp also belong to this set, however, we never expect to use them
+    /// for anything else but stack offset tracking therefore we exclude them from this set.
+    pub const callee_preserved_regs = [_]Register{ .rbx, .r12, .r13, .r14, .r15 };
+    /// These registers need to be preserved (saved on the stack) and restored by the caller before
+    /// the caller relinquishes control to a subroutine via call instruction (or similar).
+    /// In other words, these registers are free to use by the callee.
+    pub const caller_preserved_regs = [_]Register{ .rax, .rcx, .rdx, .rsi, .rdi, .r8, .r9, .r10, .r11 };
 
-pub const c_abi_int_param_regs = [_]Register{ .rdi, .rsi, .rdx, .rcx, .r8, .r9 };
-pub const c_abi_int_return_regs = [_]Register{ .rax, .rdx };
+    pub const c_abi_int_param_regs = [_]Register{ .rdi, .rsi, .rdx, .rcx, .r8, .r9 };
+    pub const c_abi_int_return_regs = [_]Register{ .rax, .rdx };
+};
 
+pub const Win64 = struct {
+    /// Note that .rsp and .rbp also belong to this set, however, we never expect to use them
+    /// for anything else but stack offset tracking therefore we exclude them from this set.
+    pub const callee_preserved_regs = [_]Register{ .rbx, .rsi, .rdi, .r12, .r13, .r14, .r15 };
+    /// These registers need to be preserved (saved on the stack) and restored by the caller before
+    /// the caller relinquishes control to a subroutine via call instruction (or similar).
+    /// In other words, these registers are free to use by the callee.
+    pub const caller_preserved_regs = [_]Register{ .rax, .rcx, .rdx, .r8, .r9, .r10, .r11 };
+
+    pub const c_abi_int_param_regs = [_]Register{ .rcx, .rdx, .r8, .r9 };
+    pub const c_abi_int_return_regs = [_]Register{.rax};
+};
+
+pub fn getCalleePreservedRegs(target: Target) []const Register {
+    return switch (target.os.tag) {
+        .windows => &Win64.callee_preserved_regs,
+        else => &SysV.callee_preserved_regs,
+    };
+}
+
+pub fn getCallerPreservedRegs(target: Target) []const Register {
+    return switch (target.os.tag) {
+        .windows => &Win64.caller_preserved_regs,
+        else => &SysV.caller_preserved_regs,
+    };
+}
+
+pub fn getCAbiIntParamRegs(target: Target) []const Register {
+    return switch (target.os.tag) {
+        .windows => &Win64.c_abi_int_param_regs,
+        else => &SysV.c_abi_int_param_regs,
+    };
+}
+
+pub fn getCAbiIntReturnRegs(target: Target) []const Register {
+    return switch (target.os.tag) {
+        .windows => &Win64.c_abi_int_return_regs,
+        else => &SysV.c_abi_int_return_regs,
+    };
+}
+
+const gp_regs = [_]Register{
+    .rbx, .r12, .r13, .r14, .r15, .rax, .rcx, .rdx, .rsi, .rdi, .r8, .r9, .r10, .r11,
+};
 const sse_avx_regs = [_]Register{
     .ymm0, .ymm1, .ymm2,  .ymm3,  .ymm4,  .ymm5,  .ymm6,  .ymm7,
     .ymm8, .ymm9, .ymm10, .ymm11, .ymm12, .ymm13, .ymm14, .ymm15,
 };
-const allocatable_registers = callee_preserved_regs ++ caller_preserved_regs ++ sse_avx_regs;
-pub const RegisterManager = RegisterManagerFn(@import("CodeGen.zig"), Register, &allocatable_registers);
+const allocatable_regs = gp_regs ++ sse_avx_regs;
+pub const RegisterManager = RegisterManagerFn(@import("CodeGen.zig"), Register, &allocatable_regs);
 
 // Register classes
 const RegisterBitSet = RegisterManager.RegisterBitSet;
@@ -417,15 +463,15 @@ pub const RegisterClass = struct {
         var set = RegisterBitSet.initEmpty();
         set.setRangeValue(.{
             .start = 0,
-            .end = caller_preserved_regs.len + callee_preserved_regs.len,
+            .end = gp_regs.len,
         }, true);
         break :blk set;
     };
     pub const sse: RegisterBitSet = blk: {
         var set = RegisterBitSet.initEmpty();
         set.setRangeValue(.{
-            .start = caller_preserved_regs.len + callee_preserved_regs.len,
-            .end = allocatable_registers.len,
+            .start = gp_regs.len,
+            .end = allocatable_regs.len,
         }, true);
         break :blk set;
     };
diff --git a/src/codegen/llvm.zig b/src/codegen/llvm.zig
index dc43548b80..d20b6f2ab6 100644
--- a/src/codegen/llvm.zig
+++ b/src/codegen/llvm.zig
@@ -3912,7 +3912,7 @@ pub const DeclGen = struct {
                                 var b: usize = 0;
                                 for (parent_ty.structFields().values()[0..field_index]) |field| {
                                     if (field.is_comptime or !field.ty.hasRuntimeBitsIgnoreComptime()) continue;
-                                    b += field.ty.bitSize(target);
+                                    b += @intCast(usize, field.ty.bitSize(target));
                                 }
                                 break :b b;
                             };
@@ -9385,6 +9385,12 @@ pub const FuncGen = struct {
             return self.builder.buildBitCast(truncated_int, elem_llvm_ty, "");
         }
 
+        if (info.pointee_type.isPtrAtRuntime()) {
+            const same_size_int = self.context.intType(elem_bits);
+            const truncated_int = self.builder.buildTrunc(shifted_value, same_size_int, "");
+            return self.builder.buildIntToPtr(truncated_int, elem_llvm_ty, "");
+        }
+
         return self.builder.buildTrunc(shifted_value, elem_llvm_ty, "");
     }
 
@@ -9416,7 +9422,10 @@ pub const FuncGen = struct {
             // Convert to equally-sized integer type in order to perform the bit
             // operations on the value to store
             const value_bits_type = self.context.intType(elem_bits);
-            const value_bits = self.builder.buildBitCast(elem, value_bits_type, "");
+            const value_bits = if (elem_ty.isPtrAtRuntime())
+                self.builder.buildPtrToInt(elem, value_bits_type, "")
+            else
+                self.builder.buildBitCast(elem, value_bits_type, "");
 
             var mask_val = value_bits_type.constAllOnes();
             mask_val = mask_val.constZExt(containing_int_ty);
diff --git a/src/link.zig b/src/link.zig
index 1a0689381b..fe7891a439 100644
--- a/src/link.zig
+++ b/src/link.zig
@@ -166,6 +166,9 @@ pub const Options = struct {
     version_script: ?[]const u8,
     soname: ?[]const u8,
     llvm_cpu_features: ?[*:0]const u8,
+    print_gc_sections: bool,
+    print_icf_sections: bool,
+    print_map: bool,
 
     objects: []Compilation.LinkObject,
     framework_dirs: []const []const u8,
@@ -476,7 +479,7 @@ pub const File = struct {
         log.debug("getGlobalSymbol '{s}'", .{name});
         switch (base.tag) {
             // zig fmt: off
-            .coff  => unreachable,
+            .coff  => return @fieldParentPtr(Coff, "base", base).getGlobalSymbol(name),
             .elf   => unreachable,
             .macho => return @fieldParentPtr(MachO, "base", base).getGlobalSymbol(name),
             .plan9 => unreachable,
diff --git a/src/link/Coff.zig b/src/link/Coff.zig
index e302571671..49263df225 100644
--- a/src/link/Coff.zig
+++ b/src/link/Coff.zig
@@ -30,7 +30,6 @@ const TypedValue = @import("../TypedValue.zig");
 pub const base_tag: link.File.Tag = .coff;
 
 const msdos_stub = @embedFile("msdos-stub.bin");
-const N_DATA_DIRS: u5 = 16;
 
 /// If this is not null, an object file is created by LLVM and linked with LLD afterwards.
 llvm_object: ?*LlvmObject = null,
@@ -44,24 +43,33 @@ page_size: u32,
 objects: std.ArrayListUnmanaged(Object) = .{},
 
 sections: std.MultiArrayList(Section) = .{},
-data_directories: [N_DATA_DIRS]coff.ImageDataDirectory,
+data_directories: [coff.IMAGE_NUMBEROF_DIRECTORY_ENTRIES]coff.ImageDataDirectory,
 
 text_section_index: ?u16 = null,
 got_section_index: ?u16 = null,
 rdata_section_index: ?u16 = null,
 data_section_index: ?u16 = null,
 reloc_section_index: ?u16 = null,
+idata_section_index: ?u16 = null,
 
 locals: std.ArrayListUnmanaged(coff.Symbol) = .{},
-globals: std.StringArrayHashMapUnmanaged(SymbolWithLoc) = .{},
+globals: std.ArrayListUnmanaged(SymbolWithLoc) = .{},
+resolver: std.StringHashMapUnmanaged(u32) = .{},
+unresolved: std.AutoArrayHashMapUnmanaged(u32, bool) = .{},
 
 locals_free_list: std.ArrayListUnmanaged(u32) = .{},
+globals_free_list: std.ArrayListUnmanaged(u32) = .{},
 
 strtab: StringTable(.strtab) = .{},
 strtab_offset: ?u32 = null,
 
-got_entries: std.AutoArrayHashMapUnmanaged(SymbolWithLoc, u32) = .{},
+got_entries: std.ArrayListUnmanaged(Entry) = .{},
 got_entries_free_list: std.ArrayListUnmanaged(u32) = .{},
+got_entries_table: std.AutoHashMapUnmanaged(SymbolWithLoc, u32) = .{},
+
+imports: std.ArrayListUnmanaged(Entry) = .{},
+imports_free_list: std.ArrayListUnmanaged(u32) = .{},
+imports_table: std.AutoHashMapUnmanaged(SymbolWithLoc, u32) = .{},
 
 /// Virtual address of the entry point procedure relative to image base.
 entry_addr: ?u32 = null,
@@ -109,17 +117,33 @@ relocs: RelocTable = .{},
 /// this will be a table indexed by index into the list of Atoms.
 base_relocs: BaseRelocationTable = .{},
 
+const Entry = struct {
+    target: SymbolWithLoc,
+    // Index into the synthetic symbol table (i.e., file == null).
+    sym_index: u32,
+};
+
 pub const Reloc = struct {
     @"type": enum {
         got,
         direct,
+        imports,
     },
     target: SymbolWithLoc,
     offset: u32,
     addend: u32,
     pcrel: bool,
     length: u2,
-    prev_vaddr: u32,
+    dirty: bool = true,
+
+    /// Returns an Atom which is the target node of this relocation edge (if any).
+    fn getTargetAtom(self: Reloc, coff_file: *Coff) ?*Atom {
+        switch (self.@"type") {
+            .got => return coff_file.getGotAtomForSymbol(self.target),
+            .direct => return coff_file.getAtomForSymbol(self.target),
+            .imports => return coff_file.getImportAtomForSymbol(self.target),
+        }
+    }
 };
 
 const RelocTable = std.AutoHashMapUnmanaged(*Atom, std.ArrayListUnmanaged(Reloc));
@@ -180,6 +204,16 @@ pub const SymbolWithLoc = struct {
 
     // null means it's a synthetic global or Zig source.
     file: ?u32 = null,
+
+    pub fn eql(this: SymbolWithLoc, other: SymbolWithLoc) bool {
+        if (this.file == null and other.file == null) {
+            return this.sym_index == other.sym_index;
+        }
+        if (this.file != null and other.file != null) {
+            return this.sym_index == other.sym_index and this.file.? == other.file.?;
+        }
+        return false;
+    }
 };
 
 /// When allocating, the ideal_capacity is calculated by
@@ -234,7 +268,7 @@ pub fn createEmpty(gpa: Allocator, options: link.Options) !*Coff {
         },
         .ptr_width = ptr_width,
         .page_size = page_size,
-        .data_directories = comptime mem.zeroes([N_DATA_DIRS]coff.ImageDataDirectory),
+        .data_directories = comptime mem.zeroes([coff.IMAGE_NUMBEROF_DIRECTORY_ENTRIES]coff.ImageDataDirectory),
     };
 
     const use_llvm = build_options.have_llvm and options.use_llvm;
@@ -269,10 +303,24 @@ pub fn deinit(self: *Coff) void {
 
     self.locals.deinit(gpa);
     self.globals.deinit(gpa);
+
+    {
+        var it = self.resolver.keyIterator();
+        while (it.next()) |key_ptr| {
+            gpa.free(key_ptr.*);
+        }
+        self.resolver.deinit(gpa);
+    }
+
+    self.unresolved.deinit(gpa);
     self.locals_free_list.deinit(gpa);
     self.strtab.deinit(gpa);
     self.got_entries.deinit(gpa);
     self.got_entries_free_list.deinit(gpa);
+    self.got_entries_table.deinit(gpa);
+    self.imports.deinit(gpa);
+    self.imports_free_list.deinit(gpa);
+    self.imports_table.deinit(gpa);
     self.decls.deinit(gpa);
     self.atom_by_index_table.deinit(gpa);
 
@@ -305,145 +353,76 @@ fn populateMissingMetadata(self: *Coff) !void {
     assert(self.llvm_object == null);
     const gpa = self.base.allocator;
 
-    if (self.text_section_index == null) {
-        self.text_section_index = @intCast(u16, self.sections.slice().len);
-        const file_size = @intCast(u32, self.base.options.program_code_size_hint);
-        const off = self.findFreeSpace(file_size, self.page_size); // TODO we are over-aligning in file; we should track both in file and in memory pointers
-        log.debug("found .text free space 0x{x} to 0x{x}", .{ off, off + file_size });
-        var header = coff.SectionHeader{
-            .name = undefined,
-            .virtual_size = file_size,
-            .virtual_address = off,
-            .size_of_raw_data = file_size,
-            .pointer_to_raw_data = off,
-            .pointer_to_relocations = 0,
-            .pointer_to_linenumbers = 0,
-            .number_of_relocations = 0,
-            .number_of_linenumbers = 0,
-            .flags = .{
-                .CNT_CODE = 1,
-                .MEM_EXECUTE = 1,
-                .MEM_READ = 1,
-            },
-        };
-        try self.setSectionName(&header, ".text");
-        try self.sections.append(gpa, .{ .header = header });
-    }
-
-    if (self.got_section_index == null) {
-        self.got_section_index = @intCast(u16, self.sections.slice().len);
-        const file_size = @intCast(u32, self.base.options.symbol_count_hint) * self.ptr_width.abiSize();
-        const off = self.findFreeSpace(file_size, self.page_size);
-        log.debug("found .got free space 0x{x} to 0x{x}", .{ off, off + file_size });
-        var header = coff.SectionHeader{
-            .name = undefined,
-            .virtual_size = file_size,
-            .virtual_address = off,
-            .size_of_raw_data = file_size,
-            .pointer_to_raw_data = off,
-            .pointer_to_relocations = 0,
-            .pointer_to_linenumbers = 0,
-            .number_of_relocations = 0,
-            .number_of_linenumbers = 0,
-            .flags = .{
-                .CNT_INITIALIZED_DATA = 1,
-                .MEM_READ = 1,
-            },
-        };
-        try self.setSectionName(&header, ".got");
-        try self.sections.append(gpa, .{ .header = header });
-    }
-
-    if (self.rdata_section_index == null) {
-        self.rdata_section_index = @intCast(u16, self.sections.slice().len);
-        const file_size: u32 = 1024;
-        const off = self.findFreeSpace(file_size, self.page_size);
-        log.debug("found .rdata free space 0x{x} to 0x{x}", .{ off, off + file_size });
-        var header = coff.SectionHeader{
-            .name = undefined,
-            .virtual_size = file_size,
-            .virtual_address = off,
-            .size_of_raw_data = file_size,
-            .pointer_to_raw_data = off,
-            .pointer_to_relocations = 0,
-            .pointer_to_linenumbers = 0,
-            .number_of_relocations = 0,
-            .number_of_linenumbers = 0,
-            .flags = .{
-                .CNT_INITIALIZED_DATA = 1,
-                .MEM_READ = 1,
-            },
-        };
-        try self.setSectionName(&header, ".rdata");
-        try self.sections.append(gpa, .{ .header = header });
-    }
-
-    if (self.data_section_index == null) {
-        self.data_section_index = @intCast(u16, self.sections.slice().len);
-        const file_size: u32 = 1024;
-        const off = self.findFreeSpace(file_size, self.page_size);
-        log.debug("found .data free space 0x{x} to 0x{x}", .{ off, off + file_size });
-        var header = coff.SectionHeader{
-            .name = undefined,
-            .virtual_size = file_size,
-            .virtual_address = off,
-            .size_of_raw_data = file_size,
-            .pointer_to_raw_data = off,
-            .pointer_to_relocations = 0,
-            .pointer_to_linenumbers = 0,
-            .number_of_relocations = 0,
-            .number_of_linenumbers = 0,
-            .flags = .{
-                .CNT_INITIALIZED_DATA = 1,
-                .MEM_READ = 1,
-                .MEM_WRITE = 1,
-            },
-        };
-        try self.setSectionName(&header, ".data");
-        try self.sections.append(gpa, .{ .header = header });
-    }
-
-    if (self.reloc_section_index == null) {
-        self.reloc_section_index = @intCast(u16, self.sections.slice().len);
-        const file_size = @intCast(u32, self.base.options.symbol_count_hint) * @sizeOf(coff.BaseRelocation);
-        const off = self.findFreeSpace(file_size, self.page_size);
-        log.debug("found .reloc free space 0x{x} to 0x{x}", .{ off, off + file_size });
-        var header = coff.SectionHeader{
-            .name = undefined,
-            .virtual_size = file_size,
-            .virtual_address = off,
-            .size_of_raw_data = file_size,
-            .pointer_to_raw_data = off,
-            .pointer_to_relocations = 0,
-            .pointer_to_linenumbers = 0,
-            .number_of_relocations = 0,
-            .number_of_linenumbers = 0,
-            .flags = .{
-                .CNT_INITIALIZED_DATA = 1,
-                .MEM_PURGEABLE = 1,
-                .MEM_READ = 1,
-            },
-        };
-        try self.setSectionName(&header, ".reloc");
-        try self.sections.append(gpa, .{ .header = header });
-    }
-
-    if (self.strtab_offset == null) {
-        try self.strtab.buffer.append(gpa, 0);
-        self.strtab_offset = self.findFreeSpace(@intCast(u32, self.strtab.len()), 1);
-        log.debug("found strtab free space 0x{x} to 0x{x}", .{ self.strtab_offset.?, self.strtab_offset.? + self.strtab.len() });
-    }
+    try self.strtab.buffer.ensureUnusedCapacity(gpa, @sizeOf(u32));
+    self.strtab.buffer.appendNTimesAssumeCapacity(0, @sizeOf(u32));
 
     // Index 0 is always a null symbol.
     try self.locals.append(gpa, .{
         .name = [_]u8{0} ** 8,
         .value = 0,
-        .section_number = @intToEnum(coff.SectionNumber, 0),
+        .section_number = .UNDEFINED,
         .@"type" = .{ .base_type = .NULL, .complex_type = .NULL },
         .storage_class = .NULL,
         .number_of_aux_symbols = 0,
     });
 
+    if (self.text_section_index == null) {
+        const file_size = @intCast(u32, self.base.options.program_code_size_hint);
+        self.text_section_index = try self.allocateSection(".text", file_size, .{
+            .CNT_CODE = 1,
+            .MEM_EXECUTE = 1,
+            .MEM_READ = 1,
+        });
+    }
+
+    if (self.got_section_index == null) {
+        const file_size = @intCast(u32, self.base.options.symbol_count_hint) * self.ptr_width.abiSize();
+        self.got_section_index = try self.allocateSection(".got", file_size, .{
+            .CNT_INITIALIZED_DATA = 1,
+            .MEM_READ = 1,
+        });
+    }
+
+    if (self.rdata_section_index == null) {
+        const file_size: u32 = self.page_size;
+        self.rdata_section_index = try self.allocateSection(".rdata", file_size, .{
+            .CNT_INITIALIZED_DATA = 1,
+            .MEM_READ = 1,
+        });
+    }
+
+    if (self.data_section_index == null) {
+        const file_size: u32 = self.page_size;
+        self.data_section_index = try self.allocateSection(".data", file_size, .{
+            .CNT_INITIALIZED_DATA = 1,
+            .MEM_READ = 1,
+            .MEM_WRITE = 1,
+        });
+    }
+
+    if (self.idata_section_index == null) {
+        const file_size = @intCast(u32, self.base.options.symbol_count_hint) * self.ptr_width.abiSize();
+        self.idata_section_index = try self.allocateSection(".idata", file_size, .{
+            .CNT_INITIALIZED_DATA = 1,
+            .MEM_READ = 1,
+        });
+    }
+
+    if (self.reloc_section_index == null) {
+        const file_size = @intCast(u32, self.base.options.symbol_count_hint) * @sizeOf(coff.BaseRelocation);
+        self.reloc_section_index = try self.allocateSection(".reloc", file_size, .{
+            .CNT_INITIALIZED_DATA = 1,
+            .MEM_DISCARDABLE = 1,
+            .MEM_READ = 1,
+        });
+    }
+
+    if (self.strtab_offset == null) {
+        const file_size = @intCast(u32, self.strtab.len());
+        self.strtab_offset = self.findFreeSpace(file_size, @alignOf(u32)); // 4bytes aligned seems like a good idea here
+        log.debug("found strtab free space 0x{x} to 0x{x}", .{ self.strtab_offset.?, self.strtab_offset.? + file_size });
+    }
+
     {
         // We need to find out what the max file offset is according to section headers.
         // Otherwise, we may end up with an COFF binary with file size not matching the final section's
@@ -459,6 +438,72 @@ fn populateMissingMetadata(self: *Coff) !void {
     }
 }
 
+fn allocateSection(self: *Coff, name: []const u8, size: u32, flags: coff.SectionHeaderFlags) !u16 {
+    const index = @intCast(u16, self.sections.slice().len);
+    const off = self.findFreeSpace(size, default_file_alignment);
+    // Memory is always allocated in sequence
+    // TODO: investigate if we can allocate .text last; this way it would never need to grow in memory!
+    const vaddr = blk: {
+        if (index == 0) break :blk self.page_size;
+        const prev_header = self.sections.items(.header)[index - 1];
+        break :blk mem.alignForwardGeneric(u32, prev_header.virtual_address + prev_header.virtual_size, self.page_size);
+    };
+    // We commit more memory than needed upfront so that we don't have to reallocate too soon.
+    const memsz = mem.alignForwardGeneric(u32, size, self.page_size) * 100;
+    log.debug("found {s} free space 0x{x} to 0x{x} (0x{x} - 0x{x})", .{
+        name,
+        off,
+        off + size,
+        vaddr,
+        vaddr + size,
+    });
+    var header = coff.SectionHeader{
+        .name = undefined,
+        .virtual_size = memsz,
+        .virtual_address = vaddr,
+        .size_of_raw_data = size,
+        .pointer_to_raw_data = off,
+        .pointer_to_relocations = 0,
+        .pointer_to_linenumbers = 0,
+        .number_of_relocations = 0,
+        .number_of_linenumbers = 0,
+        .flags = flags,
+    };
+    try self.setSectionName(&header, name);
+    try self.sections.append(self.base.allocator, .{ .header = header });
+    return index;
+}
+
+fn growSectionVM(self: *Coff, sect_id: u32, needed_size: u32) !void {
+    const header = &self.sections.items(.header)[sect_id];
+    const increased_size = padToIdeal(needed_size);
+    const old_aligned_end = header.virtual_address + mem.alignForwardGeneric(u32, header.virtual_size, self.page_size);
+    const new_aligned_end = header.virtual_address + mem.alignForwardGeneric(u32, increased_size, self.page_size);
+    const diff = new_aligned_end - old_aligned_end;
+    log.debug("growing {s} in virtual memory by {x}", .{ self.getSectionName(header), diff });
+
+    // TODO: enforce order by increasing VM addresses in self.sections container.
+    // This is required by the loader anyhow as far as I can tell.
+    for (self.sections.items(.header)[sect_id + 1 ..]) |*next_header, next_sect_id| {
+        const maybe_last_atom = &self.sections.items(.last_atom)[sect_id + 1 + next_sect_id];
+        next_header.virtual_address += diff;
+
+        if (maybe_last_atom.*) |last_atom| {
+            var atom = last_atom;
+            while (true) {
+                const sym = atom.getSymbolPtr(self);
+                sym.value += diff;
+
+                if (atom.prev) |prev| {
+                    atom = prev;
+                } else break;
+            }
+        }
+    }
+
+    header.virtual_size = increased_size;
+}
+
 pub fn allocateDeclIndexes(self: *Coff, decl_index: Module.Decl.Index) !void {
     if (self.llvm_object) |_| return;
     const decl = self.base.options.module.?.declPtr(decl_index);
@@ -542,16 +587,33 @@ fn allocateAtom(self: *Coff, atom: *Atom, new_atom_size: u32, alignment: u32) !u
         const sect_capacity = self.allocatedSize(header.pointer_to_raw_data);
         const needed_size: u32 = (vaddr + new_atom_size) - header.virtual_address;
         if (needed_size > sect_capacity) {
-            @panic("TODO move section");
+            const new_offset = self.findFreeSpace(needed_size, default_file_alignment);
+            const current_size = if (maybe_last_atom.*) |last_atom| blk: {
+                const sym = last_atom.getSymbol(self);
+                break :blk (sym.value + last_atom.size) - header.virtual_address;
+            } else 0;
+            log.debug("moving {s} from 0x{x} to 0x{x}", .{ self.getSectionName(header), header.pointer_to_raw_data, new_offset });
+            const amt = try self.base.file.?.copyRangeAll(
+                header.pointer_to_raw_data,
+                self.base.file.?,
+                new_offset,
+                current_size,
+            );
+            if (amt != current_size) return error.InputOutput;
+            header.pointer_to_raw_data = new_offset;
         }
+
+        const sect_vm_capacity = self.allocatedVirtualSize(header.virtual_address);
+        if (needed_size > sect_vm_capacity) {
+            try self.growSectionVM(sect_id, needed_size);
+            self.markRelocsDirtyByAddress(header.virtual_address + needed_size);
+        }
+
+        header.virtual_size = @maximum(header.virtual_size, needed_size);
+        header.size_of_raw_data = needed_size;
         maybe_last_atom.* = atom;
-        // header.virtual_size = needed_size;
-        // header.size_of_raw_data = mem.alignForwardGeneric(u32, needed_size, default_file_alignment);
     }
 
-    // if (header.getAlignment().? < alignment) {
-    //     header.setAlignment(alignment);
-    // }
     atom.size = new_atom_size;
     atom.alignment = alignment;
 
@@ -596,7 +658,7 @@ fn allocateSymbol(self: *Coff) !u32 {
     self.locals.items[index] = .{
         .name = [_]u8{0} ** 8,
         .value = 0,
-        .section_number = @intToEnum(coff.SectionNumber, 0),
+        .section_number = .UNDEFINED,
         .@"type" = .{ .base_type = .NULL, .complex_type = .NULL },
         .storage_class = .NULL,
         .number_of_aux_symbols = 0,
@@ -605,24 +667,71 @@ fn allocateSymbol(self: *Coff) !u32 {
     return index;
 }
 
-pub fn allocateGotEntry(self: *Coff, target: SymbolWithLoc) !u32 {
+fn allocateGlobal(self: *Coff) !u32 {
     const gpa = self.base.allocator;
-    try self.got_entries.ensureUnusedCapacity(gpa, 1);
-    const index: u32 = blk: {
-        if (self.got_entries_free_list.popOrNull()) |index| {
-            log.debug("  (reusing GOT entry index {d})", .{index});
-            if (self.got_entries.getIndex(target)) |existing| {
-                assert(existing == index);
-            }
+    try self.globals.ensureUnusedCapacity(gpa, 1);
+
+    const index = blk: {
+        if (self.globals_free_list.popOrNull()) |index| {
+            log.debug("  (reusing global index {d})", .{index});
             break :blk index;
         } else {
-            log.debug("  (allocating GOT entry at index {d})", .{self.got_entries.keys().len});
-            const index = @intCast(u32, self.got_entries.keys().len);
-            self.got_entries.putAssumeCapacityNoClobber(target, 0);
+            log.debug("  (allocating global index {d})", .{self.globals.items.len});
+            const index = @intCast(u32, self.globals.items.len);
+            _ = self.globals.addOneAssumeCapacity();
             break :blk index;
         }
     };
-    self.got_entries.keys()[index] = target;
+
+    self.globals.items[index] = .{
+        .sym_index = 0,
+        .file = null,
+    };
+
+    return index;
+}
+
+pub fn allocateGotEntry(self: *Coff, target: SymbolWithLoc) !u32 {
+    const gpa = self.base.allocator;
+    try self.got_entries.ensureUnusedCapacity(gpa, 1);
+
+    const index: u32 = blk: {
+        if (self.got_entries_free_list.popOrNull()) |index| {
+            log.debug("  (reusing GOT entry index {d})", .{index});
+            break :blk index;
+        } else {
+            log.debug("  (allocating GOT entry at index {d})", .{self.got_entries.items.len});
+            const index = @intCast(u32, self.got_entries.items.len);
+            _ = self.got_entries.addOneAssumeCapacity();
+            break :blk index;
+        }
+    };
+
+    self.got_entries.items[index] = .{ .target = target, .sym_index = 0 };
+    try self.got_entries_table.putNoClobber(gpa, target, index);
+
+    return index;
+}
+
+pub fn allocateImportEntry(self: *Coff, target: SymbolWithLoc) !u32 {
+    const gpa = self.base.allocator;
+    try self.imports.ensureUnusedCapacity(gpa, 1);
+
+    const index: u32 = blk: {
+        if (self.imports_free_list.popOrNull()) |index| {
+            log.debug("  (reusing import entry index {d})", .{index});
+            break :blk index;
+        } else {
+            log.debug("  (allocating import entry at index {d})", .{self.imports.items.len});
+            const index = @intCast(u32, self.imports.items.len);
+            _ = self.imports.addOneAssumeCapacity();
+            break :blk index;
+        }
+    };
+
+    self.imports.items[index] = .{ .target = target, .sym_index = 0 };
+    try self.imports_table.putNoClobber(gpa, target, index);
+
     return index;
 }
 
@@ -637,7 +746,6 @@ fn createGotAtom(self: *Coff, target: SymbolWithLoc) !*Atom {
 
     try self.managed_atoms.append(gpa, atom);
     try self.atom_by_index_table.putNoClobber(gpa, atom.sym_index, atom);
-    self.got_entries.getPtr(target).?.* = atom.sym_index;
 
     const sym = atom.getSymbolPtr(self);
     sym.section_number = @intToEnum(coff.SectionNumber, self.got_section_index.? + 1);
@@ -652,7 +760,6 @@ fn createGotAtom(self: *Coff, target: SymbolWithLoc) !*Atom {
         .addend = 0,
         .pcrel = false,
         .length = 3,
-        .prev_vaddr = sym.value,
     });
 
     const target_sym = self.getSymbol(target);
@@ -666,6 +773,27 @@ fn createGotAtom(self: *Coff, target: SymbolWithLoc) !*Atom {
     return atom;
 }
 
+fn createImportAtom(self: *Coff) !*Atom {
+    const gpa = self.base.allocator;
+    const atom = try gpa.create(Atom);
+    errdefer gpa.destroy(atom);
+    atom.* = Atom.empty;
+    atom.sym_index = try self.allocateSymbol();
+    atom.size = @sizeOf(u64);
+    atom.alignment = @alignOf(u64);
+
+    try self.managed_atoms.append(gpa, atom);
+    try self.atom_by_index_table.putNoClobber(gpa, atom.sym_index, atom);
+
+    const sym = atom.getSymbolPtr(self);
+    sym.section_number = @intToEnum(coff.SectionNumber, self.idata_section_index.? + 1);
+    sym.value = try self.allocateAtom(atom, atom.size, atom.alignment);
+
+    log.debug("allocated import atom at 0x{x}", .{sym.value});
+
+    return atom;
+}
+
 fn growAtom(self: *Coff, atom: *Atom, new_atom_size: u32, alignment: u32) !u32 {
     const sym = atom.getSymbol(self);
     const align_ok = mem.alignBackwardGeneric(u32, sym.value, alignment) == sym.value;
@@ -686,12 +814,12 @@ fn writeAtom(self: *Coff, atom: *Atom, code: []const u8) !void {
     const sym = atom.getSymbol(self);
     const section = self.sections.get(@enumToInt(sym.section_number) - 1);
     const file_offset = section.header.pointer_to_raw_data + sym.value - section.header.virtual_address;
-    log.debug("writing atom for symbol {s} at file offset 0x{x}", .{ atom.getName(self), file_offset });
+    log.debug("writing atom for symbol {s} at file offset 0x{x} to 0x{x}", .{ atom.getName(self), file_offset, file_offset + code.len });
     try self.base.file.?.pwriteAll(code, file_offset);
     try self.resolveRelocs(atom);
 }
 
-fn writeGotAtom(self: *Coff, atom: *Atom) !void {
+fn writePtrWidthAtom(self: *Coff, atom: *Atom) !void {
     switch (self.ptr_width) {
         .p32 => {
             var buffer: [@sizeOf(u32)]u8 = [_]u8{0} ** @sizeOf(u32);
@@ -704,6 +832,29 @@ fn writeGotAtom(self: *Coff, atom: *Atom) !void {
     }
 }
 
+fn markRelocsDirtyByTarget(self: *Coff, target: SymbolWithLoc) void {
+    // TODO: reverse-lookup might come in handy here
+    var it = self.relocs.valueIterator();
+    while (it.next()) |relocs| {
+        for (relocs.items) |*reloc| {
+            if (!reloc.target.eql(target)) continue;
+            reloc.dirty = true;
+        }
+    }
+}
+
+fn markRelocsDirtyByAddress(self: *Coff, addr: u32) void {
+    var it = self.relocs.valueIterator();
+    while (it.next()) |relocs| {
+        for (relocs.items) |*reloc| {
+            const target_atom = reloc.getTargetAtom(self) orelse continue;
+            const target_sym = target_atom.getSymbol(self);
+            if (target_sym.value < addr) continue;
+            reloc.dirty = true;
+        }
+    }
+}
+
 fn resolveRelocs(self: *Coff, atom: *Atom) !void {
     const relocs = self.relocs.get(atom) orelse return;
     const source_sym = atom.getSymbol(self);
@@ -713,29 +864,28 @@ fn resolveRelocs(self: *Coff, atom: *Atom) !void {
     log.debug("relocating '{s}'", .{atom.getName(self)});
 
     for (relocs.items) |*reloc| {
-        const target_vaddr = switch (reloc.@"type") {
-            .got => blk: {
-                const got_atom = self.getGotAtomForSymbol(reloc.target) orelse continue;
-                break :blk got_atom.getSymbol(self).value;
-            },
-            .direct => self.getSymbol(reloc.target).value,
-        };
+        if (!reloc.dirty) continue;
+
+        const target_atom = reloc.getTargetAtom(self) orelse continue;
+        const target_vaddr = target_atom.getSymbol(self).value;
         const target_vaddr_with_addend = target_vaddr + reloc.addend;
 
-        if (target_vaddr_with_addend == reloc.prev_vaddr) continue;
-
-        log.debug("  ({x}: [() => 0x{x} ({s})) ({s})", .{
-            reloc.offset,
+        log.debug("  ({x}: [() => 0x{x} ({s})) ({s}) (in file at 0x{x})", .{
+            source_sym.value + reloc.offset,
             target_vaddr_with_addend,
             self.getSymbolName(reloc.target),
             @tagName(reloc.@"type"),
+            file_offset + reloc.offset,
         });
 
+        reloc.dirty = false;
+
         if (reloc.pcrel) {
             const source_vaddr = source_sym.value + reloc.offset;
-            const disp = target_vaddr_with_addend - source_vaddr - 4;
-            try self.base.file.?.pwriteAll(mem.asBytes(&@intCast(u32, disp)), file_offset + reloc.offset);
-            return;
+            const disp =
+                @intCast(i32, target_vaddr_with_addend) - @intCast(i32, source_vaddr) - 4;
+            try self.base.file.?.pwriteAll(mem.asBytes(&disp), file_offset + reloc.offset);
+            continue;
         }
 
         switch (self.ptr_width) {
@@ -755,14 +905,15 @@ fn resolveRelocs(self: *Coff, atom: *Atom) !void {
                 else => unreachable,
             },
         }
-
-        reloc.prev_vaddr = target_vaddr_with_addend;
     }
 }
 
 fn freeAtom(self: *Coff, atom: *Atom) void {
     log.debug("freeAtom {*}", .{atom});
 
+    // Remove any relocs and base relocs associated with this Atom
+    self.freeRelocationsForAtom(atom);
+
     const sym = atom.getSymbol(self);
     const sect_id = @enumToInt(sym.section_number) - 1;
     const free_list = &self.sections.items(.free_list)[sect_id];
@@ -825,11 +976,14 @@ pub fn updateFunc(self: *Coff, module: *Module, func: *Module.Fn, air: Air, live
     const tracy = trace(@src());
     defer tracy.end();
 
+    const decl_index = func.owner_decl;
+    const decl = module.declPtr(decl_index);
+    self.freeUnnamedConsts(decl_index);
+    self.freeRelocationsForAtom(&decl.link.coff);
+
     var code_buffer = std.ArrayList(u8).init(self.base.allocator);
     defer code_buffer.deinit();
 
-    const decl_index = func.owner_decl;
-    const decl = module.declPtr(decl_index);
     const res = try codegen.generateFunction(
         &self.base,
         decl.srcLoc(),
@@ -856,10 +1010,67 @@ pub fn updateFunc(self: *Coff, module: *Module, func: *Module.Fn, air: Air, live
 }
 
 pub fn lowerUnnamedConst(self: *Coff, tv: TypedValue, decl_index: Module.Decl.Index) !u32 {
-    _ = self;
-    _ = tv;
-    _ = decl_index;
-    @panic("TODO lowerUnnamedConst");
+    const gpa = self.base.allocator;
+    var code_buffer = std.ArrayList(u8).init(gpa);
+    defer code_buffer.deinit();
+
+    const mod = self.base.options.module.?;
+    const decl = mod.declPtr(decl_index);
+
+    const gop = try self.unnamed_const_atoms.getOrPut(gpa, decl_index);
+    if (!gop.found_existing) {
+        gop.value_ptr.* = .{};
+    }
+    const unnamed_consts = gop.value_ptr;
+
+    const atom = try gpa.create(Atom);
+    errdefer gpa.destroy(atom);
+    atom.* = Atom.empty;
+
+    atom.sym_index = try self.allocateSymbol();
+    const sym = atom.getSymbolPtr(self);
+    const sym_name = blk: {
+        const decl_name = try decl.getFullyQualifiedName(mod);
+        defer gpa.free(decl_name);
+
+        const index = unnamed_consts.items.len;
+        break :blk try std.fmt.allocPrint(gpa, "__unnamed_{s}_{d}", .{ decl_name, index });
+    };
+    defer gpa.free(sym_name);
+    try self.setSymbolName(sym, sym_name);
+    sym.section_number = @intToEnum(coff.SectionNumber, self.rdata_section_index.? + 1);
+
+    try self.managed_atoms.append(gpa, atom);
+    try self.atom_by_index_table.putNoClobber(gpa, atom.sym_index, atom);
+
+    const res = try codegen.generateSymbol(&self.base, decl.srcLoc(), tv, &code_buffer, .none, .{
+        .parent_atom_index = atom.sym_index,
+    });
+    const code = switch (res) {
+        .externally_managed => |x| x,
+        .appended => code_buffer.items,
+        .fail => |em| {
+            decl.analysis = .codegen_failure;
+            try mod.failed_decls.put(mod.gpa, decl_index, em);
+            log.err("{s}", .{em.msg});
+            return error.AnalysisFail;
+        },
+    };
+
+    const required_alignment = tv.ty.abiAlignment(self.base.options.target);
+    atom.alignment = required_alignment;
+    atom.size = @intCast(u32, code.len);
+    sym.value = try self.allocateAtom(atom, atom.size, atom.alignment);
+    errdefer self.freeAtom(atom);
+
+    try unnamed_consts.append(gpa, atom);
+
+    log.debug("allocated atom for {s} at 0x{x}", .{ sym_name, sym.value });
+    log.debug("  (required alignment 0x{x})", .{required_alignment});
+
+    try self.writeAtom(atom, code);
+
+    return atom.sym_index;
 }
 
 pub fn updateDecl(self: *Coff, module: *Module, decl_index: Module.Decl.Index) !void {
@@ -884,6 +1095,8 @@ pub fn updateDecl(self: *Coff, module: *Module, decl_index: Module.Decl.Index) !
         }
     }
 
+    self.freeRelocationsForAtom(&decl.link.coff);
+
     var code_buffer = std.ArrayList(u8).init(self.base.allocator);
     defer code_buffer.deinit();
 
@@ -892,7 +1105,7 @@ pub fn updateDecl(self: *Coff, module: *Module, decl_index: Module.Decl.Index) !
         .ty = decl.ty,
         .val = decl_val,
     }, &code_buffer, .none, .{
-        .parent_atom_index = 0,
+        .parent_atom_index = decl.link.coff.sym_index,
     });
     const code = switch (res) {
         .externally_managed => |x| x,
@@ -970,8 +1183,10 @@ fn updateDeclCode(self: *Coff, decl_index: Module.Decl.Index, code: []const u8,
             if (vaddr != sym.value) {
                 sym.value = vaddr;
                 log.debug("  (updating GOT entry)", .{});
-                const got_atom = self.getGotAtomForSymbol(.{ .sym_index = atom.sym_index, .file = null }).?;
-                try self.writeGotAtom(got_atom);
+                const got_target = SymbolWithLoc{ .sym_index = atom.sym_index, .file = null };
+                const got_atom = self.getGotAtomForSymbol(got_target).?;
+                self.markRelocsDirtyByTarget(got_target);
+                try self.writePtrWidthAtom(got_atom);
             }
         } else if (code_len < atom.size) {
             self.shrinkAtom(atom, code_len);
@@ -990,14 +1205,35 @@ fn updateDeclCode(self: *Coff, decl_index: Module.Decl.Index, code: []const u8,
         sym.value = vaddr;
 
         const got_target = SymbolWithLoc{ .sym_index = atom.sym_index, .file = null };
-        _ = try self.allocateGotEntry(got_target);
+        const got_index = try self.allocateGotEntry(got_target);
         const got_atom = try self.createGotAtom(got_target);
-        try self.writeGotAtom(got_atom);
+        self.got_entries.items[got_index].sym_index = got_atom.sym_index;
+        try self.writePtrWidthAtom(got_atom);
     }
 
+    self.markRelocsDirtyByTarget(atom.getSymbolWithLoc());
     try self.writeAtom(atom, code);
 }
 
+fn freeRelocationsForAtom(self: *Coff, atom: *Atom) void {
+    _ = self.relocs.remove(atom);
+    _ = self.base_relocs.remove(atom);
+}
+
+fn freeUnnamedConsts(self: *Coff, decl_index: Module.Decl.Index) void {
+    const gpa = self.base.allocator;
+    const unnamed_consts = self.unnamed_const_atoms.getPtr(decl_index) orelse return;
+    for (unnamed_consts.items) |atom| {
+        self.freeAtom(atom);
+        self.locals_free_list.append(gpa, atom.sym_index) catch {};
+        self.locals.items[atom.sym_index].section_number = .UNDEFINED;
+        _ = self.atom_by_index_table.remove(atom.sym_index);
+        log.debug("  adding local symbol index {d} to free list", .{atom.sym_index});
+        atom.sym_index = 0;
+    }
+    unnamed_consts.clearAndFree(gpa);
+}
+
 pub fn freeDecl(self: *Coff, decl_index: Module.Decl.Index) void {
     if (build_options.have_llvm) {
         if (self.llvm_object) |llvm_object| return llvm_object.freeDecl(decl_index);
@@ -1011,6 +1247,7 @@ pub fn freeDecl(self: *Coff, decl_index: Module.Decl.Index) void {
     const kv = self.decls.fetchRemove(decl_index);
     if (kv.?.value) |_| {
         self.freeAtom(&decl.link.coff);
+        self.freeUnnamedConsts(decl_index);
     }
 
     // Appending to free lists is allowed to fail because the free lists are heuristics based anyway.
@@ -1021,14 +1258,20 @@ pub fn freeDecl(self: *Coff, decl_index: Module.Decl.Index) void {
 
         // Try freeing GOT atom if this decl had one
         const got_target = SymbolWithLoc{ .sym_index = sym_index, .file = null };
-        if (self.got_entries.getIndex(got_target)) |got_index| {
+        if (self.got_entries_table.get(got_target)) |got_index| {
             self.got_entries_free_list.append(gpa, @intCast(u32, got_index)) catch {};
-            self.got_entries.values()[got_index] = 0;
+            self.got_entries.items[got_index] = .{
+                .target = .{ .sym_index = 0, .file = null },
+                .sym_index = 0,
+            };
+            _ = self.got_entries_table.remove(got_target);
+
             log.debug("  adding GOT index {d} to free list (target local@{d})", .{ got_index, sym_index });
         }
 
-        self.locals.items[sym_index].section_number = @intToEnum(coff.SectionNumber, 0);
+        self.locals.items[sym_index].section_number = .UNDEFINED;
         _ = self.atom_by_index_table.remove(sym_index);
+        log.debug("  adding local symbol index {d} to free list", .{sym_index});
         decl.link.coff.sym_index = 0;
     }
 }
@@ -1154,44 +1397,49 @@ pub fn deleteExport(self: *Coff, exp: Export) void {
     const sym = self.getSymbolPtr(sym_loc);
     const sym_name = self.getSymbolName(sym_loc);
     log.debug("deleting export '{s}'", .{sym_name});
-    assert(sym.storage_class == .EXTERNAL);
+    assert(sym.storage_class == .EXTERNAL and sym.section_number != .UNDEFINED);
     sym.* = .{
         .name = [_]u8{0} ** 8,
         .value = 0,
-        .section_number = @intToEnum(coff.SectionNumber, 0),
+        .section_number = .UNDEFINED,
         .@"type" = .{ .base_type = .NULL, .complex_type = .NULL },
         .storage_class = .NULL,
         .number_of_aux_symbols = 0,
     };
     self.locals_free_list.append(gpa, sym_index) catch {};
 
-    if (self.globals.get(sym_name)) |global| blk: {
-        if (global.sym_index != sym_index) break :blk;
-        if (global.file != null) break :blk;
-        const kv = self.globals.fetchSwapRemove(sym_name);
-        gpa.free(kv.?.key);
+    if (self.resolver.fetchRemove(sym_name)) |entry| {
+        defer gpa.free(entry.key);
+        self.globals_free_list.append(gpa, entry.value) catch {};
+        self.globals.items[entry.value] = .{
+            .sym_index = 0,
+            .file = null,
+        };
     }
 }
 
 fn resolveGlobalSymbol(self: *Coff, current: SymbolWithLoc) !void {
     const gpa = self.base.allocator;
     const sym = self.getSymbol(current);
-    _ = sym;
     const sym_name = self.getSymbolName(current);
 
-    const name = try gpa.dupe(u8, sym_name);
-    const global_index = @intCast(u32, self.globals.values().len);
-    _ = global_index;
-    const gop = try self.globals.getOrPut(gpa, name);
-    defer if (gop.found_existing) gpa.free(name);
-
-    if (!gop.found_existing) {
-        gop.value_ptr.* = current;
-        // TODO undef + tentative
+    const global_index = self.resolver.get(sym_name) orelse {
+        const name = try gpa.dupe(u8, sym_name);
+        const global_index = try self.allocateGlobal();
+        self.globals.items[global_index] = current;
+        try self.resolver.putNoClobber(gpa, name, global_index);
+        if (sym.section_number == .UNDEFINED) {
+            try self.unresolved.putNoClobber(gpa, global_index, false);
+        }
         return;
-    }
+    };
 
     log.debug("TODO finish resolveGlobalSymbols implementation", .{});
+
+    if (sym.section_number == .UNDEFINED) return;
+
+    _ = self.unresolved.swapRemove(global_index);
+    self.globals.items[global_index] = current;
 }
 
 pub fn flush(self: *Coff, comp: *Compilation, prog_node: *std.Progress.Node) !void {
@@ -1227,6 +1475,17 @@ pub fn flushModule(self: *Coff, comp: *Compilation, prog_node: *std.Progress.Nod
     sub_prog_node.activate();
     defer sub_prog_node.end();
 
+    while (self.unresolved.popOrNull()) |entry| {
+        assert(entry.value); // We only expect imports generated by the incremental linker for now.
+        const global = self.globals.items[entry.key];
+        if (self.imports_table.contains(global)) continue;
+
+        const import_index = try self.allocateImportEntry(global);
+        const import_atom = try self.createImportAtom();
+        self.imports.items[import_index].sym_index = import_atom.sym_index;
+        try self.writePtrWidthAtom(import_atom);
+    }
+
     if (build_options.enable_logging) {
         self.logSymtab();
     }
@@ -1237,6 +1496,7 @@ pub fn flushModule(self: *Coff, comp: *Compilation, prog_node: *std.Progress.Nod
             try self.resolveRelocs(atom.*);
         }
     }
+    try self.writeImportTable();
     try self.writeBaseRelocations();
 
     if (self.getEntryPoint()) |entry_sym_loc| {
@@ -1262,10 +1522,47 @@ pub fn getDeclVAddr(
     decl_index: Module.Decl.Index,
     reloc_info: link.File.RelocInfo,
 ) !u64 {
-    _ = self;
-    _ = decl_index;
-    _ = reloc_info;
-    @panic("TODO getDeclVAddr");
+    const mod = self.base.options.module.?;
+    const decl = mod.declPtr(decl_index);
+
+    assert(self.llvm_object == null);
+    assert(decl.link.coff.sym_index != 0);
+
+    const atom = self.atom_by_index_table.get(reloc_info.parent_atom_index).?;
+    const target = SymbolWithLoc{ .sym_index = decl.link.coff.sym_index, .file = null };
+    try atom.addRelocation(self, .{
+        .@"type" = .direct,
+        .target = target,
+        .offset = @intCast(u32, reloc_info.offset),
+        .addend = reloc_info.addend,
+        .pcrel = false,
+        .length = 3,
+    });
+    try atom.addBaseRelocation(self, @intCast(u32, reloc_info.offset));
+
+    return 0;
+}
+
+pub fn getGlobalSymbol(self: *Coff, name: []const u8) !u32 {
+    if (self.resolver.get(name)) |global_index| {
+        return self.globals.items[global_index].sym_index;
+    }
+
+    const gpa = self.base.allocator;
+    const sym_index = try self.allocateSymbol();
+    const global_index = try self.allocateGlobal();
+    const sym_loc = SymbolWithLoc{ .sym_index = sym_index, .file = null };
+    self.globals.items[global_index] = sym_loc;
+
+    const sym_name = try gpa.dupe(u8, name);
+    const sym = self.getSymbolPtr(sym_loc);
+    try self.setSymbolName(sym, sym_name);
+    sym.storage_class = .EXTERNAL;
+
+    try self.resolver.putNoClobber(gpa, sym_name, global_index);
+    try self.unresolved.putNoClobber(gpa, global_index, true);
+
+    return sym_index;
 }
 
 pub fn updateDeclLineNumber(self: *Coff, module: *Module, decl: *Module.Decl) !void {
@@ -1342,7 +1639,25 @@ fn writeBaseRelocations(self: *Coff) !void {
     const header = &self.sections.items(.header)[self.reloc_section_index.?];
     const sect_capacity = self.allocatedSize(header.pointer_to_raw_data);
     const needed_size = @intCast(u32, buffer.items.len);
-    assert(needed_size < sect_capacity); // TODO expand .reloc section
+    if (needed_size > sect_capacity) {
+        const new_offset = self.findFreeSpace(needed_size, default_file_alignment);
+        log.debug("writing {s} at 0x{x} to 0x{x} (0x{x} - 0x{x})", .{
+            self.getSectionName(header),
+            header.pointer_to_raw_data,
+            header.pointer_to_raw_data + needed_size,
+            new_offset,
+            new_offset + needed_size,
+        });
+        header.pointer_to_raw_data = new_offset;
+
+        const sect_vm_capacity = self.allocatedVirtualSize(header.virtual_address);
+        if (needed_size > sect_vm_capacity) {
+            // TODO: we want to enforce .reloc after every alloc section.
+            try self.growSectionVM(self.reloc_section_index.?, needed_size);
+        }
+    }
+    header.virtual_size = @maximum(header.virtual_size, needed_size);
+    header.size_of_raw_data = needed_size;
 
     try self.base.file.?.pwriteAll(buffer.items, header.pointer_to_raw_data);
 
@@ -1352,17 +1667,111 @@ fn writeBaseRelocations(self: *Coff) !void {
     };
 }
 
+fn writeImportTable(self: *Coff) !void {
+    if (self.idata_section_index == null) return;
+
+    const gpa = self.base.allocator;
+
+    const section = self.sections.get(self.idata_section_index.?);
+    const last_atom = section.last_atom orelse return;
+
+    const iat_rva = section.header.virtual_address;
+    const iat_size = last_atom.getSymbol(self).value + last_atom.size * 2 - iat_rva; // account for sentinel zero pointer
+
+    const dll_name = "KERNEL32.dll";
+
+    var import_dir_entry = coff.ImportDirectoryEntry{
+        .import_lookup_table_rva = @sizeOf(coff.ImportDirectoryEntry) * 2,
+        .time_date_stamp = 0,
+        .forwarder_chain = 0,
+        .name_rva = 0,
+        .import_address_table_rva = iat_rva,
+    };
+
+    // TODO: we currently assume there's only one (implicit) DLL - ntdll
+    var lookup_table = std.ArrayList(coff.ImportLookupEntry64.ByName).init(gpa);
+    defer lookup_table.deinit();
+
+    var names_table = std.ArrayList(u8).init(gpa);
+    defer names_table.deinit();
+
+    // TODO: check if import is still valid
+    for (self.imports.items) |entry| {
+        const target_name = self.getSymbolName(entry.target);
+        const start = names_table.items.len;
+        mem.writeIntLittle(u16, try names_table.addManyAsArray(2), 0); // TODO: currently, hint is set to 0 as we haven't yet parsed any DLL
+        try names_table.appendSlice(target_name);
+        try names_table.append(0);
+        const end = names_table.items.len;
+        if (!mem.isAlignedGeneric(usize, end - start, @sizeOf(u16))) {
+            try names_table.append(0);
+        }
+        try lookup_table.append(.{ .name_table_rva = @intCast(u31, start) });
+    }
+    try lookup_table.append(.{ .name_table_rva = 0 }); // the sentinel
+
+    const dir_entry_size = @sizeOf(coff.ImportDirectoryEntry) + lookup_table.items.len * @sizeOf(coff.ImportLookupEntry64.ByName) + names_table.items.len + dll_name.len + 1;
+    const needed_size = iat_size + dir_entry_size + @sizeOf(coff.ImportDirectoryEntry);
+    const sect_capacity = self.allocatedSize(section.header.pointer_to_raw_data);
+    assert(needed_size < sect_capacity); // TODO: implement expanding .idata section
+
+    // Fixup offsets
+    const base_rva = iat_rva + iat_size;
+    import_dir_entry.import_lookup_table_rva += base_rva;
+    import_dir_entry.name_rva = @intCast(u32, base_rva + dir_entry_size + @sizeOf(coff.ImportDirectoryEntry) - dll_name.len - 1);
+
+    for (lookup_table.items[0 .. lookup_table.items.len - 1]) |*lk| {
+        lk.name_table_rva += @intCast(u31, base_rva + @sizeOf(coff.ImportDirectoryEntry) * 2 + lookup_table.items.len * @sizeOf(coff.ImportLookupEntry64.ByName));
+    }
+
+    var buffer = std.ArrayList(u8).init(gpa);
+    defer buffer.deinit();
+    try buffer.ensureTotalCapacity(dir_entry_size + @sizeOf(coff.ImportDirectoryEntry));
+    buffer.appendSliceAssumeCapacity(mem.asBytes(&import_dir_entry));
+    buffer.appendNTimesAssumeCapacity(0, @sizeOf(coff.ImportDirectoryEntry)); // the sentinel; TODO: I think doing all of the above on bytes directly might be cleaner
+    buffer.appendSliceAssumeCapacity(mem.sliceAsBytes(lookup_table.items));
+    buffer.appendSliceAssumeCapacity(names_table.items);
+    buffer.appendSliceAssumeCapacity(dll_name);
+    buffer.appendAssumeCapacity(0);
+
+    try self.base.file.?.pwriteAll(buffer.items, section.header.pointer_to_raw_data + iat_size);
+    // Override the IAT atoms
+    // TODO: we should rewrite only dirtied atoms, but that's for way later
+    try self.base.file.?.pwriteAll(mem.sliceAsBytes(lookup_table.items), section.header.pointer_to_raw_data);
+
+    self.data_directories[@enumToInt(coff.DirectoryEntry.IMPORT)] = .{
+        .virtual_address = iat_rva + iat_size,
+        .size = @intCast(u32, @sizeOf(coff.ImportDirectoryEntry) * 2),
+    };
+
+    self.data_directories[@enumToInt(coff.DirectoryEntry.IAT)] = .{
+        .virtual_address = iat_rva,
+        .size = iat_size,
+    };
+}
+
 fn writeStrtab(self: *Coff) !void {
+    if (self.strtab_offset == null) return;
+
     const allocated_size = self.allocatedSize(self.strtab_offset.?);
     const needed_size = @intCast(u32, self.strtab.len());
 
     if (needed_size > allocated_size) {
         self.strtab_offset = null;
-        self.strtab_offset = @intCast(u32, self.findFreeSpace(needed_size, 1));
+        self.strtab_offset = @intCast(u32, self.findFreeSpace(needed_size, @alignOf(u32)));
     }
 
     log.debug("writing strtab from 0x{x} to 0x{x}", .{ self.strtab_offset.?, self.strtab_offset.? + needed_size });
-    try self.base.file.?.pwriteAll(self.strtab.buffer.items, self.strtab_offset.?);
+
+    var buffer = std.ArrayList(u8).init(self.base.allocator);
+    defer buffer.deinit();
+    try buffer.ensureTotalCapacityPrecise(needed_size);
+    buffer.appendSliceAssumeCapacity(self.strtab.items());
+    // Here, we do a trick in that we do not commit the size of the strtab to strtab buffer, instead
+    // we write the length of the strtab to a temporary buffer that goes to file.
+    mem.writeIntLittle(u32, buffer.items[0..4], @intCast(u32, self.strtab.len()));
+
+    try self.base.file.?.pwriteAll(buffer.items, self.strtab_offset.?);
 }
 
 fn writeSectionHeaders(self: *Coff) !void {
@@ -1527,14 +1936,15 @@ pub fn padToIdeal(actual_size: anytype) @TypeOf(actual_size) {
 }
 
 fn detectAllocCollision(self: *Coff, start: u32, size: u32) ?u32 {
-    const headers_size = self.getSizeOfHeaders();
+    const headers_size = @maximum(self.getSizeOfHeaders(), self.page_size);
     if (start < headers_size)
         return headers_size;
 
-    const end = start + size;
+    const end = start + padToIdeal(size);
 
     if (self.strtab_offset) |off| {
-        const increased_size = @intCast(u32, self.strtab.len());
+        const tight_size = @intCast(u32, self.strtab.len());
+        const increased_size = padToIdeal(tight_size);
         const test_end = off + increased_size;
         if (end > off and start < test_end) {
             return test_end;
@@ -1542,7 +1952,8 @@ fn detectAllocCollision(self: *Coff, start: u32, size: u32) ?u32 {
     }
 
     for (self.sections.items(.header)) |header| {
-        const increased_size = header.size_of_raw_data;
+        const tight_size = header.size_of_raw_data;
+        const increased_size = padToIdeal(tight_size);
         const test_end = header.pointer_to_raw_data + increased_size;
         if (end > header.pointer_to_raw_data and start < test_end) {
             return test_end;
@@ -1552,7 +1963,7 @@ fn detectAllocCollision(self: *Coff, start: u32, size: u32) ?u32 {
     return null;
 }
 
-pub fn allocatedSize(self: *Coff, start: u32) u32 {
+fn allocatedSize(self: *Coff, start: u32) u32 {
     if (start == 0)
         return 0;
     var min_pos: u32 = std.math.maxInt(u32);
@@ -1566,7 +1977,7 @@ pub fn allocatedSize(self: *Coff, start: u32) u32 {
     return min_pos - start;
 }
 
-pub fn findFreeSpace(self: *Coff, object_size: u32, min_alignment: u32) u32 {
+fn findFreeSpace(self: *Coff, object_size: u32, min_alignment: u32) u32 {
     var start: u32 = 0;
     while (self.detectAllocCollision(start, object_size)) |item_end| {
         start = mem.alignForwardGeneric(u32, item_end, min_alignment);
@@ -1574,6 +1985,17 @@ pub fn findFreeSpace(self: *Coff, object_size: u32, min_alignment: u32) u32 {
     return start;
 }
 
+fn allocatedVirtualSize(self: *Coff, start: u32) u32 {
+    if (start == 0)
+        return 0;
+    var min_pos: u32 = std.math.maxInt(u32);
+    for (self.sections.items(.header)) |header| {
+        if (header.virtual_address <= start) continue;
+        if (header.virtual_address < min_pos) min_pos = header.virtual_address;
+    }
+    return min_pos - start;
+}
+
 inline fn getSizeOfHeaders(self: Coff) u32 {
     const msdos_hdr_size = msdos_stub.len + 4;
     return @intCast(u32, msdos_hdr_size + @sizeOf(coff.CoffHeader) + self.getOptionalHeaderSize() +
@@ -1614,23 +2036,24 @@ inline fn getSizeOfImage(self: Coff) u32 {
 
 /// Returns symbol location corresponding to the set entrypoint (if any).
 pub fn getEntryPoint(self: Coff) ?SymbolWithLoc {
-    const entry_name = self.base.options.entry orelse "_start"; // TODO this is incomplete
-    return self.globals.get(entry_name);
+    const entry_name = self.base.options.entry orelse "wWinMainCRTStartup"; // TODO this is incomplete
+    const global_index = self.resolver.get(entry_name) orelse return null;
+    return self.globals.items[global_index];
 }
 
-/// Returns pointer-to-symbol described by `sym_with_loc` descriptor.
+/// Returns pointer-to-symbol described by `sym_loc` descriptor.
 pub fn getSymbolPtr(self: *Coff, sym_loc: SymbolWithLoc) *coff.Symbol {
     assert(sym_loc.file == null); // TODO linking object files
     return &self.locals.items[sym_loc.sym_index];
 }
 
-/// Returns symbol described by `sym_with_loc` descriptor.
+/// Returns symbol described by `sym_loc` descriptor.
 pub fn getSymbol(self: *const Coff, sym_loc: SymbolWithLoc) *const coff.Symbol {
     assert(sym_loc.file == null); // TODO linking object files
     return &self.locals.items[sym_loc.sym_index];
 }
 
-/// Returns name of the symbol described by `sym_with_loc` descriptor.
+/// Returns name of the symbol described by `sym_loc` descriptor.
 pub fn getSymbolName(self: *const Coff, sym_loc: SymbolWithLoc) []const u8 {
     assert(sym_loc.file == null); // TODO linking object files
     const sym = self.getSymbol(sym_loc);
@@ -1638,18 +2061,27 @@ pub fn getSymbolName(self: *const Coff, sym_loc: SymbolWithLoc) []const u8 {
     return self.strtab.get(offset).?;
 }
 
-/// Returns atom if there is an atom referenced by the symbol described by `sym_with_loc` descriptor.
+/// Returns atom if there is an atom referenced by the symbol described by `sym_loc` descriptor.
 /// Returns null on failure.
 pub fn getAtomForSymbol(self: *Coff, sym_loc: SymbolWithLoc) ?*Atom {
     assert(sym_loc.file == null); // TODO linking with object files
     return self.atom_by_index_table.get(sym_loc.sym_index);
 }
 
-/// Returns GOT atom that references `sym_with_loc` if one exists.
+/// Returns GOT atom that references `sym_loc` if one exists.
 /// Returns null otherwise.
 pub fn getGotAtomForSymbol(self: *Coff, sym_loc: SymbolWithLoc) ?*Atom {
-    const got_index = self.got_entries.get(sym_loc) orelse return null;
-    return self.atom_by_index_table.get(got_index);
+    const got_index = self.got_entries_table.get(sym_loc) orelse return null;
+    const got_entry = self.got_entries.items[got_index];
+    return self.getAtomForSymbol(.{ .sym_index = got_entry.sym_index, .file = null });
+}
+
+/// Returns import atom that references `sym_loc` if one exists.
+/// Returns null otherwise.
+pub fn getImportAtomForSymbol(self: *Coff, sym_loc: SymbolWithLoc) ?*Atom {
+    const imports_index = self.imports_table.get(sym_loc) orelse return null;
+    const imports_entry = self.imports.items[imports_index];
+    return self.getAtomForSymbol(.{ .sym_index = imports_entry.sym_index, .file = null });
 }
 
 fn setSectionName(self: *Coff, header: *coff.SectionHeader, name: []const u8) !void {
@@ -1663,6 +2095,14 @@ fn setSectionName(self: *Coff, header: *coff.SectionHeader, name: []const u8) !v
     mem.set(u8, header.name[name_offset.len..], 0);
 }
 
+fn getSectionName(self: *const Coff, header: *const coff.SectionHeader) []const u8 {
+    if (header.getName()) |name| {
+        return name;
+    }
+    const offset = header.getNameOffset().?;
+    return self.strtab.get(offset).?;
+}
+
 fn setSymbolName(self: *Coff, symbol: *coff.Symbol, name: []const u8) !void {
     if (name.len <= 8) {
         mem.copy(u8, &symbol.name, name);
@@ -1725,29 +2165,42 @@ fn logSymtab(self: *Coff) void {
     }
 
     log.debug("globals table:", .{});
-    for (self.globals.keys()) |name, id| {
-        const value = self.globals.values()[id];
-        log.debug("  {s} => %{d} in object({?d})", .{ name, value.sym_index, value.file });
+    for (self.globals.items) |sym_loc| {
+        const sym_name = self.getSymbolName(sym_loc);
+        log.debug("  {s} => %{d} in object({?d})", .{ sym_name, sym_loc.sym_index, sym_loc.file });
     }
 
     log.debug("GOT entries:", .{});
-    for (self.got_entries.keys()) |target, i| {
-        const got_sym = self.getSymbol(.{ .sym_index = self.got_entries.values()[i], .file = null });
-        const target_sym = self.getSymbol(target);
+    for (self.got_entries.items) |entry, i| {
+        const got_sym = self.getSymbol(.{ .sym_index = entry.sym_index, .file = null });
+        const target_sym = self.getSymbol(entry.target);
         if (target_sym.section_number == .UNDEFINED) {
             log.debug("  {d}@{x} => import('{s}')", .{
                 i,
                 got_sym.value,
-                self.getSymbolName(target),
+                self.getSymbolName(entry.target),
             });
         } else {
             log.debug("  {d}@{x} => local(%{d}) in object({?d}) {s}", .{
                 i,
                 got_sym.value,
-                target.sym_index,
-                target.file,
+                entry.target.sym_index,
+                entry.target.file,
                 logSymAttributes(target_sym, &buf),
             });
         }
     }
 }
+
+fn logSections(self: *Coff) void {
+    log.debug("sections:", .{});
+    for (self.sections.items(.header)) |*header| {
+        log.debug("  {s}: VM({x}, {x}) FILE({x}, {x})", .{
+            self.getSectionName(header),
+            header.virtual_address,
+            header.virtual_address + header.virtual_size,
+            header.pointer_to_raw_data,
+            header.pointer_to_raw_data + header.size_of_raw_data,
+        });
+    }
+}
diff --git a/src/link/Coff/Atom.zig b/src/link/Coff/Atom.zig
index a7608d9a34..ffd8fe45e6 100644
--- a/src/link/Coff/Atom.zig
+++ b/src/link/Coff/Atom.zig
@@ -4,8 +4,6 @@ const std = @import("std");
 const coff = std.coff;
 const log = std.log.scoped(.link);
 
-const Allocator = std.mem.Allocator;
-
 const Coff = @import("../Coff.zig");
 const Reloc = Coff.Reloc;
 const SymbolWithLoc = Coff.SymbolWithLoc;
@@ -41,11 +39,6 @@ pub const empty = Atom{
     .next = null,
 };
 
-pub fn deinit(self: *Atom, gpa: Allocator) void {
-    _ = self;
-    _ = gpa;
-}
-
 /// Returns symbol referencing this atom.
 pub fn getSymbol(self: Atom, coff_file: *const Coff) *const coff.Symbol {
     return coff_file.getSymbol(.{
@@ -118,3 +111,13 @@ pub fn addBaseRelocation(self: *Atom, coff_file: *Coff, offset: u32) !void {
     }
     try gop.value_ptr.append(gpa, offset);
 }
+
+pub fn addBinding(self: *Atom, coff_file: *Coff, target: SymbolWithLoc) !void {
+    const gpa = coff_file.base.allocator;
+    log.debug("  (adding binding to target %{d} in %{d})", .{ target.sym_index, self.sym_index });
+    const gop = try coff_file.bindings.getOrPut(gpa, self);
+    if (!gop.found_existing) {
+        gop.value_ptr.* = .{};
+    }
+    try gop.value_ptr.append(gpa, target);
+}
diff --git a/src/link/Dwarf.zig b/src/link/Dwarf.zig
index 3ae151491f..474c822ae6 100644
--- a/src/link/Dwarf.zig
+++ b/src/link/Dwarf.zig
@@ -861,7 +861,8 @@ pub fn commitDeclState(
                             },
                             .wasm => {
                                 const wasm_file = file.cast(File.Wasm).?;
-                                writeDbgLineNopsBuffered(wasm_file.debug_line.items, src_fn.off, 0, &.{}, src_fn.len);
+                                const debug_line = wasm_file.debug_line_atom.?.code;
+                                writeDbgLineNopsBuffered(debug_line.items, src_fn.off, 0, &.{}, src_fn.len);
                             },
                             else => unreachable,
                         }
@@ -972,23 +973,21 @@ pub fn commitDeclState(
                 },
                 .wasm => {
                     const wasm_file = file.cast(File.Wasm).?;
-                    const segment_index = try wasm_file.getDebugLineIndex();
-                    const segment = &wasm_file.segments.items[segment_index];
-                    const debug_line = &wasm_file.debug_line;
-                    if (needed_size != segment.size) {
+                    const atom = wasm_file.debug_line_atom.?;
+                    const debug_line = &atom.code;
+                    const segment_size = debug_line.items.len;
+                    if (needed_size != segment_size) {
                         log.debug(" needed size does not equal allocated size: {d}", .{needed_size});
-                        if (needed_size > segment.size) {
-                            log.debug("  allocating {d} bytes for 'debug line' information", .{needed_size - segment.size});
+                        if (needed_size > segment_size) {
+                            log.debug("  allocating {d} bytes for 'debug line' information", .{needed_size - segment_size});
                             try debug_line.resize(self.allocator, needed_size);
-                            mem.set(u8, debug_line.items[segment.size..], 0);
+                            mem.set(u8, debug_line.items[segment_size..], 0);
                         }
-                        segment.size = needed_size;
                         debug_line.items.len = needed_size;
                     }
-                    const offset = segment.offset + src_fn.off;
                     writeDbgLineNopsBuffered(
                         debug_line.items,
-                        offset,
+                        src_fn.off,
                         prev_padding_size,
                         dbg_line_buffer.items,
                         next_padding_size,
@@ -1146,10 +1145,8 @@ fn updateDeclDebugInfoAllocation(self: *Dwarf, file: *File, atom: *Atom, len: u3
                     },
                     .wasm => {
                         const wasm_file = file.cast(File.Wasm).?;
-                        const segment_index = try wasm_file.getDebugInfoIndex();
-                        const segment = &wasm_file.segments.items[segment_index];
-                        const offset = segment.offset + atom.off;
-                        try writeDbgInfoNopsToArrayList(gpa, &wasm_file.debug_info, offset, 0, &.{0}, atom.len, false);
+                        const debug_info = &wasm_file.debug_info_atom.?.code;
+                        try writeDbgInfoNopsToArrayList(gpa, debug_info, atom.off, 0, &.{0}, atom.len, false);
                     },
                     else => unreachable,
                 }
@@ -1276,27 +1273,25 @@ fn writeDeclDebugInfo(self: *Dwarf, file: *File, atom: *Atom, dbg_info_buf: []co
         },
         .wasm => {
             const wasm_file = file.cast(File.Wasm).?;
-            const segment_index = try wasm_file.getDebugInfoIndex();
-            const segment = &wasm_file.segments.items[segment_index];
-            const debug_info = &wasm_file.debug_info;
-            if (needed_size != segment.size) {
+            const info_atom = wasm_file.debug_info_atom.?;
+            const debug_info = &info_atom.code;
+            const segment_size = debug_info.items.len;
+            if (needed_size != segment_size) {
                 log.debug(" needed size does not equal allocated size: {d}", .{needed_size});
-                if (needed_size > segment.size) {
-                    log.debug("  allocating {d} bytes for 'debug info' information", .{needed_size - segment.size});
+                if (needed_size > segment_size) {
+                    log.debug("  allocating {d} bytes for 'debug info' information", .{needed_size - segment_size});
                     try debug_info.resize(self.allocator, needed_size);
-                    mem.set(u8, debug_info.items[segment.size..], 0);
+                    mem.set(u8, debug_info.items[segment_size..], 0);
                 }
-                segment.size = needed_size;
                 debug_info.items.len = needed_size;
             }
-            const offset = segment.offset + atom.off;
             log.debug(" writeDbgInfoNopsToArrayList debug_info_len={d} offset={d} content_len={d} next_padding_size={d}", .{
-                debug_info.items.len, offset, dbg_info_buf.len, next_padding_size,
+                debug_info.items.len, atom.off, dbg_info_buf.len, next_padding_size,
             });
             try writeDbgInfoNopsToArrayList(
                 gpa,
                 debug_info,
-                offset,
+                atom.off,
                 prev_padding_size,
                 dbg_info_buf,
                 next_padding_size,
@@ -1337,10 +1332,9 @@ pub fn updateDeclLineNumber(self: *Dwarf, file: *File, decl: *const Module.Decl)
         },
         .wasm => {
             const wasm_file = file.cast(File.Wasm).?;
-            const segment_index = wasm_file.getDebugLineIndex() catch unreachable;
-            const segment = wasm_file.segments.items[segment_index];
-            const offset = segment.offset + decl.fn_link.wasm.src_fn.off + self.getRelocDbgLineOff();
-            mem.copy(u8, wasm_file.debug_line.items[offset..], &data);
+            const offset = decl.fn_link.wasm.src_fn.off + self.getRelocDbgLineOff();
+            const atom = wasm_file.debug_line_atom.?;
+            mem.copy(u8, atom.code.items[offset..], &data);
         },
         else => unreachable,
     }
@@ -1576,8 +1570,9 @@ pub fn writeDbgAbbrev(self: *Dwarf, file: *File) !void {
         },
         .wasm => {
             const wasm_file = file.cast(File.Wasm).?;
-            try wasm_file.debug_abbrev.resize(wasm_file.base.allocator, needed_size);
-            mem.copy(u8, wasm_file.debug_abbrev.items, &abbrev_buf);
+            const debug_abbrev = &wasm_file.debug_abbrev_atom.?.code;
+            try debug_abbrev.resize(wasm_file.base.allocator, needed_size);
+            mem.copy(u8, debug_abbrev.items, &abbrev_buf);
         },
         else => unreachable,
     }
@@ -1687,7 +1682,8 @@ pub fn writeDbgInfoHeader(self: *Dwarf, file: *File, module: *Module, low_pc: u6
         },
         .wasm => {
             const wasm_file = file.cast(File.Wasm).?;
-            try writeDbgInfoNopsToArrayList(self.allocator, &wasm_file.debug_info, 0, 0, di_buf.items, jmp_amt, false);
+            const debug_info = &wasm_file.debug_info_atom.?.code;
+            try writeDbgInfoNopsToArrayList(self.allocator, debug_info, 0, 0, di_buf.items, jmp_amt, false);
         },
         else => unreachable,
     }
@@ -2016,8 +2012,9 @@ pub fn writeDbgAranges(self: *Dwarf, file: *File, addr: u64, size: u64) !void {
         },
         .wasm => {
             const wasm_file = file.cast(File.Wasm).?;
-            try wasm_file.debug_aranges.resize(wasm_file.base.allocator, needed_size);
-            mem.copy(u8, wasm_file.debug_aranges.items, di_buf.items);
+            const debug_ranges = &wasm_file.debug_ranges_atom.?.code;
+            try debug_ranges.resize(wasm_file.base.allocator, needed_size);
+            mem.copy(u8, debug_ranges.items, di_buf.items);
         },
         else => unreachable,
     }
@@ -2139,7 +2136,8 @@ pub fn writeDbgLineHeader(self: *Dwarf, file: *File, module: *Module) !void {
         },
         .wasm => {
             const wasm_file = file.cast(File.Wasm).?;
-            writeDbgLineNopsBuffered(wasm_file.debug_line.items, 0, 0, di_buf.items, jmp_amt);
+            const debug_line = wasm_file.debug_line_atom.?.code;
+            writeDbgLineNopsBuffered(debug_line.items, 0, 0, di_buf.items, jmp_amt);
         },
         else => unreachable,
     }
@@ -2287,7 +2285,8 @@ pub fn flushModule(self: *Dwarf, file: *File, module: *Module) !void {
                 },
                 .wasm => {
                     const wasm_file = file.cast(File.Wasm).?;
-                    mem.copy(u8, wasm_file.debug_info.items[reloc.atom.off + reloc.offset ..], &buf);
+                    const debug_info = wasm_file.debug_info_atom.?.code;
+                    mem.copy(u8, debug_info.items[reloc.atom.off + reloc.offset ..], &buf);
                 },
                 else => unreachable,
             }
diff --git a/src/link/Elf.zig b/src/link/Elf.zig
index 0326bffe36..a70473fe07 100644
--- a/src/link/Elf.zig
+++ b/src/link/Elf.zig
@@ -1482,6 +1482,18 @@ fn linkWithLLD(self: *Elf, comp: *Compilation, prog_node: *std.Progress.Node) !v
             try argv.append("--gc-sections");
         }
 
+        if (self.base.options.print_gc_sections) {
+            try argv.append("--print-gc-sections");
+        }
+
+        if (self.base.options.print_icf_sections) {
+            try argv.append("--print-icf-sections");
+        }
+
+        if (self.base.options.print_map) {
+            try argv.append("--print-map");
+        }
+
         if (self.base.options.eh_frame_hdr) {
             try argv.append("--eh-frame-hdr");
         }
diff --git a/src/link/MachO.zig b/src/link/MachO.zig
index af25441066..429bf64eb2 100644
--- a/src/link/MachO.zig
+++ b/src/link/MachO.zig
@@ -793,11 +793,13 @@ fn linkOneShot(self: *MachO, comp: *Compilation, prog_node: *std.Progress.Node)
         }
     } else {
         const sub_path = self.base.options.emit.?.sub_path;
-        self.base.file = try directory.handle.createFile(sub_path, .{
-            .truncate = true,
-            .read = true,
-            .mode = link.determineMode(self.base.options),
-        });
+        if (self.base.file == null) {
+            self.base.file = try directory.handle.createFile(sub_path, .{
+                .truncate = true,
+                .read = true,
+                .mode = link.determineMode(self.base.options),
+            });
+        }
         // Index 0 is always a null symbol.
         try self.locals.append(gpa, .{
             .n_strx = 0,
@@ -1155,6 +1157,29 @@ fn linkOneShot(self: *MachO, comp: *Compilation, prog_node: *std.Progress.Node)
         var ncmds: u32 = 0;
 
         try self.writeLinkeditSegmentData(&ncmds, lc_writer);
+
+        // If the last section of __DATA segment is zerofill section, we need to ensure
+        // that the free space between the end of the last non-zerofill section of __DATA
+        // segment and the beginning of __LINKEDIT segment is zerofilled as the loader will
+        // copy-paste this space into memory for quicker zerofill operation.
+        if (self.data_segment_cmd_index) |data_seg_id| blk: {
+            var physical_zerofill_start: u64 = 0;
+            const section_indexes = self.getSectionIndexes(data_seg_id);
+            for (self.sections.items(.header)[section_indexes.start..section_indexes.end]) |header| {
+                if (header.isZerofill() and header.size > 0) break;
+                physical_zerofill_start = header.offset + header.size;
+            } else break :blk;
+            const linkedit = self.segments.items[self.linkedit_segment_cmd_index.?];
+            const physical_zerofill_size = math.cast(usize, linkedit.fileoff - physical_zerofill_start) orelse
+                return error.Overflow;
+            if (physical_zerofill_size > 0) {
+                var padding = try self.base.allocator.alloc(u8, physical_zerofill_size);
+                defer self.base.allocator.free(padding);
+                mem.set(u8, padding, 0);
+                try self.base.file.?.pwriteAll(padding, physical_zerofill_start);
+            }
+        }
+
         try writeDylinkerLC(&ncmds, lc_writer);
         try self.writeMainLC(&ncmds, lc_writer);
         try self.writeDylibIdLC(&ncmds, lc_writer);
@@ -1435,7 +1460,6 @@ fn parseArchive(self: *MachO, path: []const u8, force_load: bool) !bool {
 
     if (force_load) {
         defer archive.deinit(gpa);
-        defer file.close();
         // Get all offsets from the ToC
         var offsets = std.AutoArrayHashMap(u32, void).init(gpa);
         defer offsets.deinit();
@@ -3086,15 +3110,6 @@ pub fn deinit(self: *MachO) void {
     self.atom_by_index_table.deinit(gpa);
 }
 
-pub fn closeFiles(self: MachO) void {
-    for (self.archives.items) |archive| {
-        archive.file.close();
-    }
-    if (self.d_sym) |ds| {
-        ds.file.close();
-    }
-}
-
 fn freeAtom(self: *MachO, atom: *Atom, sect_id: u8, owns_atom: bool) void {
     log.debug("freeAtom {*}", .{atom});
     if (!owns_atom) {
@@ -5698,8 +5713,10 @@ fn writeHeader(self: *MachO, ncmds: u32, sizeofcmds: u32) !void {
         else => unreachable,
     }
 
-    if (self.getSectionByName("__DATA", "__thread_vars")) |_| {
-        header.flags |= macho.MH_HAS_TLV_DESCRIPTORS;
+    if (self.getSectionByName("__DATA", "__thread_vars")) |sect_id| {
+        if (self.sections.items(.header)[sect_id].size > 0) {
+            header.flags |= macho.MH_HAS_TLV_DESCRIPTORS;
+        }
     }
 
     header.ncmds = ncmds;
diff --git a/src/link/MachO/Archive.zig b/src/link/MachO/Archive.zig
index 054f75fff3..59a956534e 100644
--- a/src/link/MachO/Archive.zig
+++ b/src/link/MachO/Archive.zig
@@ -88,6 +88,7 @@ const ar_hdr = extern struct {
 };
 
 pub fn deinit(self: *Archive, allocator: Allocator) void {
+    self.file.close();
     for (self.toc.keys()) |*key| {
         allocator.free(key.*);
     }
diff --git a/src/link/MachO/DebugSymbols.zig b/src/link/MachO/DebugSymbols.zig
index a7dc6391c2..ffff0fe5f8 100644
--- a/src/link/MachO/DebugSymbols.zig
+++ b/src/link/MachO/DebugSymbols.zig
@@ -306,6 +306,7 @@ pub fn flushModule(self: *DebugSymbols, allocator: Allocator, options: link.Opti
 }
 
 pub fn deinit(self: *DebugSymbols, allocator: Allocator) void {
+    self.file.close();
     self.segments.deinit(allocator);
     self.sections.deinit(allocator);
     self.dwarf.deinit();
diff --git a/src/link/Wasm.zig b/src/link/Wasm.zig
index 68eb3b0aee..34af955a5f 100644
--- a/src/link/Wasm.zig
+++ b/src/link/Wasm.zig
@@ -67,6 +67,18 @@ code_section_index: ?u32 = null,
 debug_info_index: ?u32 = null,
 /// The index of the segment representing the custom '.debug_line' section.
 debug_line_index: ?u32 = null,
+/// The index of the segment representing the custom '.debug_loc' section.
+debug_loc_index: ?u32 = null,
+/// The index of the segment representing the custom '.debug_ranges' section.
+debug_ranges_index: ?u32 = null,
+/// The index of the segment representing the custom '.debug_pubnames' section.
+debug_pubnames_index: ?u32 = null,
+/// The index of the segment representing the custom '.debug_pubtypes' section.
+debug_pubtypes_index: ?u32 = null,
+/// The index of the segment representing the custom '.debug_pubtypes' section.
+debug_str_index: ?u32 = null,
+/// The index of the segment representing the custom '.debug_pubtypes' section.
+debug_abbrev_index: ?u32 = null,
 /// The count of imported functions. This number will be appended
 /// to the function indexes as their index starts at the lowest non-extern function.
 imported_functions_count: u32 = 0,
@@ -83,24 +95,15 @@ imports: std.AutoHashMapUnmanaged(SymbolLoc, types.Import) = .{},
 segments: std.ArrayListUnmanaged(Segment) = .{},
 /// Maps a data segment key (such as .rodata) to the index into `segments`.
 data_segments: std.StringArrayHashMapUnmanaged(u32) = .{},
-/// A list of `types.Segment` which provide meta data
-/// about a data symbol such as its name
-segment_info: std.ArrayListUnmanaged(types.Segment) = .{},
+/// A table of `types.Segment` which provide meta data
+/// about a data symbol such as its name where the key is
+/// the segment index, which can be found from `data_segments`
+segment_info: std.AutoArrayHashMapUnmanaged(u32, types.Segment) = .{},
 /// Deduplicated string table for strings used by symbols, imports and exports.
 string_table: StringTable = .{},
 /// Debug information for wasm
 dwarf: ?Dwarf = null,
 
-// *debug information* //
-/// Contains all bytes for the '.debug_info' section
-debug_info: std.ArrayListUnmanaged(u8) = .{},
-/// Contains all bytes for the '.debug_line' section
-debug_line: std.ArrayListUnmanaged(u8) = .{},
-/// Contains all bytes for the '.debug_abbrev' section
-debug_abbrev: std.ArrayListUnmanaged(u8) = .{},
-/// Contains all bytes for the '.debug_ranges' section
-debug_aranges: std.ArrayListUnmanaged(u8) = .{},
-
 // Output sections
 /// Output type section
 func_types: std.ArrayListUnmanaged(wasm.Type) = .{},
@@ -156,6 +159,19 @@ export_names: std.AutoHashMapUnmanaged(SymbolLoc, u32) = .{},
 /// The actual table is populated during `flush`.
 error_table_symbol: ?u32 = null,
 
+// Debug section atoms. These are only set when the current compilation
+// unit contains Zig code. The lifetime of these atoms are extended
+// until the end of the compiler's lifetime. Meaning they're not freed
+// during `flush()` in incremental-mode.
+debug_info_atom: ?*Atom = null,
+debug_line_atom: ?*Atom = null,
+debug_loc_atom: ?*Atom = null,
+debug_ranges_atom: ?*Atom = null,
+debug_abbrev_atom: ?*Atom = null,
+debug_str_atom: ?*Atom = null,
+debug_pubnames_atom: ?*Atom = null,
+debug_pubtypes_atom: ?*Atom = null,
+
 pub const Segment = struct {
     alignment: u32,
     size: u32,
@@ -209,6 +225,18 @@ pub const SymbolLoc = struct {
         }
         return wasm_bin.string_table.get(wasm_bin.symbols.items[self.index].name);
     }
+
+    /// From a given symbol location, returns the final location.
+    /// e.g. when a symbol was resolved and replaced by the symbol
+    /// in a different file, this will return said location.
+    /// If the symbol wasn't replaced by another, this will return
+    /// the given location itself.
+    pub fn finalLoc(self: SymbolLoc, wasm_bin: *const Wasm) SymbolLoc {
+        if (wasm_bin.discarded.get(self)) |new_loc| {
+            return new_loc.finalLoc(wasm_bin);
+        }
+        return self;
+    }
 };
 
 /// Generic string table that duplicates strings
@@ -335,6 +363,7 @@ pub fn openPath(allocator: Allocator, sub_path: []const u8, options: link.Option
         };
     }
 
+    try wasm_bin.initDebugSections();
     return wasm_bin;
 }
 
@@ -363,6 +392,24 @@ pub fn createEmpty(gpa: Allocator, options: link.Options) !*Wasm {
     return self;
 }
 
+/// Initializes symbols and atoms for the debug sections
+/// Initialization is only done when compiling Zig code.
+/// When Zig is invoked as a linker instead, the atoms
+/// and symbols come from the object files instead.
+pub fn initDebugSections(self: *Wasm) !void {
+    if (self.dwarf == null) return; // not compiling Zig code, so no need to pre-initialize debug sections
+    assert(self.debug_info_index == null);
+    // this will create an Atom and set the index for us.
+    self.debug_info_atom = try self.createDebugSectionForIndex(&self.debug_info_index, ".debug_info");
+    self.debug_line_atom = try self.createDebugSectionForIndex(&self.debug_line_index, ".debug_line");
+    self.debug_loc_atom = try self.createDebugSectionForIndex(&self.debug_loc_index, ".debug_loc");
+    self.debug_abbrev_atom = try self.createDebugSectionForIndex(&self.debug_abbrev_index, ".debug_abbrev");
+    self.debug_ranges_atom = try self.createDebugSectionForIndex(&self.debug_ranges_index, ".debug_ranges");
+    self.debug_str_atom = try self.createDebugSectionForIndex(&self.debug_str_index, ".debug_str");
+    self.debug_pubnames_atom = try self.createDebugSectionForIndex(&self.debug_pubnames_index, ".debug_pubnames");
+    self.debug_pubtypes_atom = try self.createDebugSectionForIndex(&self.debug_pubtypes_index, ".debug_pubtypes");
+}
+
 fn parseInputFiles(self: *Wasm, files: []const []const u8) !void {
     for (files) |path| {
         if (try self.parseObjectFile(path)) continue;
@@ -644,16 +691,14 @@ pub fn deinit(self: *Wasm) void {
     for (self.func_types.items) |*func_type| {
         func_type.deinit(gpa);
     }
-    for (self.segment_info.items) |segment_info| {
+    for (self.segment_info.values()) |segment_info| {
         gpa.free(segment_info.name);
     }
     for (self.objects.items) |*object| {
-        object.file.?.close();
         object.deinit(gpa);
     }
 
     for (self.archives.items) |*archive| {
-        archive.file.close();
         archive.deinit(gpa);
     }
 
@@ -692,11 +737,6 @@ pub fn deinit(self: *Wasm) void {
     if (self.dwarf) |*dwarf| {
         dwarf.deinit();
     }
-
-    self.debug_info.deinit(gpa);
-    self.debug_line.deinit(gpa);
-    self.debug_abbrev.deinit(gpa);
-    self.debug_aranges.deinit(gpa);
 }
 
 pub fn allocateDeclIndexes(self: *Wasm, decl_index: Module.Decl.Index) !void {
@@ -1337,16 +1377,7 @@ fn parseAtom(self: *Wasm, atom: *Atom, kind: Kind) !void {
                 const index = gop.value_ptr.*;
                 self.segments.items[index].size += atom.size;
 
-                // segment indexes can be off by 1 due to also containing a segment
-                // for the code section, so we must check if the existing segment
-                // is larger than that of the code section, and substract the index by 1 in such case.
-                var info_add = if (self.code_section_index) |idx| blk: {
-                    if (idx < index) break :blk @as(u32, 1);
-                    break :blk 0;
-                } else @as(u32, 0);
-                if (self.debug_info_index != null) info_add += 1;
-                if (self.debug_line_index != null) info_add += 1;
-                symbol.index = index - info_add;
+                symbol.index = @intCast(u32, self.segment_info.getIndex(index).?);
                 // segment info already exists, so free its memory
                 self.base.allocator.free(segment_name);
                 break :result index;
@@ -1359,8 +1390,8 @@ fn parseAtom(self: *Wasm, atom: *Atom, kind: Kind) !void {
                 });
                 gop.value_ptr.* = index;
 
-                const info_index = @intCast(u32, self.segment_info.items.len);
-                try self.segment_info.append(self.base.allocator, segment_info);
+                const info_index = @intCast(u32, self.segment_info.count());
+                try self.segment_info.put(self.base.allocator, index, segment_info);
                 symbol.index = info_index;
                 break :result index;
             }
@@ -1370,18 +1401,54 @@ fn parseAtom(self: *Wasm, atom: *Atom, kind: Kind) !void {
     const segment: *Segment = &self.segments.items[final_index];
     segment.alignment = std.math.max(segment.alignment, atom.alignment);
 
-    if (self.atoms.getPtr(final_index)) |last| {
+    try self.appendAtomAtIndex(final_index, atom);
+}
+
+/// From a given index, append the given `Atom` at the back of the linked list.
+/// Simply inserts it into the map of atoms when it doesn't exist yet.
+pub fn appendAtomAtIndex(self: *Wasm, index: u32, atom: *Atom) !void {
+    if (self.atoms.getPtr(index)) |last| {
         last.*.next = atom;
         atom.prev = last.*;
         last.* = atom;
     } else {
-        try self.atoms.putNoClobber(self.base.allocator, final_index, atom);
+        try self.atoms.putNoClobber(self.base.allocator, index, atom);
     }
 }
 
+/// Allocates debug atoms into their respective debug sections
+/// to merge them with maybe-existing debug atoms from object files.
+fn allocateDebugAtoms(self: *Wasm) !void {
+    if (self.dwarf == null) return;
+
+    const allocAtom = struct {
+        fn f(bin: *Wasm, maybe_index: *?u32, atom: *Atom) !void {
+            const index = maybe_index.* orelse idx: {
+                const index = @intCast(u32, bin.segments.items.len);
+                try bin.appendDummySegment();
+                maybe_index.* = index;
+                break :idx index;
+            };
+            atom.size = @intCast(u32, atom.code.items.len);
+            bin.symbols.items[atom.sym_index].index = index;
+            try bin.appendAtomAtIndex(index, atom);
+        }
+    }.f;
+
+    try allocAtom(self, &self.debug_info_index, self.debug_info_atom.?);
+    try allocAtom(self, &self.debug_line_index, self.debug_line_atom.?);
+    try allocAtom(self, &self.debug_loc_index, self.debug_loc_atom.?);
+    try allocAtom(self, &self.debug_str_index, self.debug_str_atom.?);
+    try allocAtom(self, &self.debug_ranges_index, self.debug_ranges_atom.?);
+    try allocAtom(self, &self.debug_abbrev_index, self.debug_abbrev_atom.?);
+    try allocAtom(self, &self.debug_pubnames_index, self.debug_pubnames_atom.?);
+    try allocAtom(self, &self.debug_pubtypes_index, self.debug_pubtypes_atom.?);
+}
+
 fn allocateAtoms(self: *Wasm) !void {
     // first sort the data segments
     try sortDataSegments(self);
+    try allocateDebugAtoms(self);
 
     var it = self.atoms.iterator();
     while (it.next()) |entry| {
@@ -1399,7 +1466,7 @@ fn allocateAtoms(self: *Wasm) !void {
                 atom.size,
             });
             offset += atom.size;
-            self.symbol_atom.putAssumeCapacity(atom.symbolLoc(), atom); // Update atom pointers
+            try self.symbol_atom.put(self.base.allocator, atom.symbolLoc(), atom); // Update atom pointers
             atom = atom.next orelse break;
         }
         segment.size = std.mem.alignForwardGeneric(u32, offset, segment.alignment);
@@ -1753,7 +1820,7 @@ fn setupMemory(self: *Wasm) !void {
 /// From a given object's index and the index of the segment, returns the corresponding
 /// index of the segment within the final data section. When the segment does not yet
 /// exist, a new one will be initialized and appended. The new index will be returned in that case.
-pub fn getMatchingSegment(self: *Wasm, object_index: u16, relocatable_index: u32) !u32 {
+pub fn getMatchingSegment(self: *Wasm, object_index: u16, relocatable_index: u32) !?u32 {
     const object: Object = self.objects.items[object_index];
     const relocatable_data = object.relocatable_data[relocatable_index];
     const index = @intCast(u32, self.segments.items.len);
@@ -1765,27 +1832,83 @@ pub fn getMatchingSegment(self: *Wasm, object_index: u16, relocatable_index: u32
             const result = try self.data_segments.getOrPut(self.base.allocator, segment_info.outputName(merge_segment));
             if (!result.found_existing) {
                 result.value_ptr.* = index;
-                try self.segments.append(self.base.allocator, .{
-                    .alignment = 1,
-                    .size = 0,
-                    .offset = 0,
-                });
+                try self.appendDummySegment();
                 return index;
             } else return result.value_ptr.*;
         },
         .code => return self.code_section_index orelse blk: {
             self.code_section_index = index;
-            try self.segments.append(self.base.allocator, .{
-                .alignment = 1,
-                .size = 0,
-                .offset = 0,
-            });
+            try self.appendDummySegment();
             break :blk index;
         },
-        .custom => return error.@"TODO: Custom section relocations for wasm",
+        .debug => {
+            const debug_name = object.getDebugName(relocatable_data);
+            if (mem.eql(u8, debug_name, ".debug_info")) {
+                return self.debug_info_index orelse blk: {
+                    self.debug_info_index = index;
+                    try self.appendDummySegment();
+                    break :blk index;
+                };
+            } else if (mem.eql(u8, debug_name, ".debug_line")) {
+                return self.debug_line_index orelse blk: {
+                    self.debug_line_index = index;
+                    try self.appendDummySegment();
+                    break :blk index;
+                };
+            } else if (mem.eql(u8, debug_name, ".debug_loc")) {
+                return self.debug_loc_index orelse blk: {
+                    self.debug_loc_index = index;
+                    try self.appendDummySegment();
+                    break :blk index;
+                };
+            } else if (mem.eql(u8, debug_name, ".debug_ranges")) {
+                return self.debug_line_index orelse blk: {
+                    self.debug_ranges_index = index;
+                    try self.appendDummySegment();
+                    break :blk index;
+                };
+            } else if (mem.eql(u8, debug_name, ".debug_pubnames")) {
+                return self.debug_pubnames_index orelse blk: {
+                    self.debug_pubnames_index = index;
+                    try self.appendDummySegment();
+                    break :blk index;
+                };
+            } else if (mem.eql(u8, debug_name, ".debug_pubtypes")) {
+                return self.debug_pubtypes_index orelse blk: {
+                    self.debug_pubtypes_index = index;
+                    try self.appendDummySegment();
+                    break :blk index;
+                };
+            } else if (mem.eql(u8, debug_name, ".debug_abbrev")) {
+                return self.debug_abbrev_index orelse blk: {
+                    self.debug_abbrev_index = index;
+                    try self.appendDummySegment();
+                    break :blk index;
+                };
+            } else if (mem.eql(u8, debug_name, ".debug_str")) {
+                return self.debug_str_index orelse blk: {
+                    self.debug_str_index = index;
+                    try self.appendDummySegment();
+                    break :blk index;
+                };
+            } else {
+                log.warn("found unknown debug section '{s}'", .{debug_name});
+                log.warn("  debug section will be skipped", .{});
+                return null;
+            }
+        },
     }
 }
 
+/// Appends a new segment with default field values
+fn appendDummySegment(self: *Wasm) !void {
+    try self.segments.append(self.base.allocator, .{
+        .alignment = 1,
+        .size = 0,
+        .offset = 0,
+    });
+}
+
 /// Returns the symbol index of the error name table.
 ///
 /// When the symbol does not yet exist, it will create a new one instead.
@@ -1903,50 +2026,52 @@ fn populateErrorNameTable(self: *Wasm) !void {
     try self.parseAtom(names_atom, .{ .data = .read_only });
 }
 
-pub fn getDebugInfoIndex(self: *Wasm) !u32 {
-    assert(self.dwarf != null);
-    return self.debug_info_index orelse {
-        self.debug_info_index = @intCast(u32, self.segments.items.len);
-        const segment = try self.segments.addOne(self.base.allocator);
-        segment.* = .{
-            .size = 0,
-            .offset = 0,
-            // debug sections always have alignment '1'
-            .alignment = 1,
-        };
-        return self.debug_info_index.?;
-    };
-}
+/// From a given index variable, creates a new debug section.
+/// This initializes the index, appends a new segment,
+/// and finally, creates a managed `Atom`.
+pub fn createDebugSectionForIndex(self: *Wasm, index: *?u32, name: []const u8) !*Atom {
+    const new_index = @intCast(u32, self.segments.items.len);
+    index.* = new_index;
+    try self.appendDummySegment();
+    // _ = index;
 
-pub fn getDebugLineIndex(self: *Wasm) !u32 {
-    assert(self.dwarf != null);
-    return self.debug_line_index orelse {
-        self.debug_line_index = @intCast(u32, self.segments.items.len);
-        const segment = try self.segments.addOne(self.base.allocator);
-        segment.* = .{
-            .size = 0,
-            .offset = 0,
-            .alignment = 1,
-        };
-        return self.debug_line_index.?;
+    const sym_index = self.symbols_free_list.popOrNull() orelse idx: {
+        const tmp_index = @intCast(u32, self.symbols.items.len);
+        _ = try self.symbols.addOne(self.base.allocator);
+        break :idx tmp_index;
     };
+    self.symbols.items[sym_index] = .{
+        .tag = .section,
+        .name = try self.string_table.put(self.base.allocator, name),
+        .index = 0,
+        .flags = @enumToInt(Symbol.Flag.WASM_SYM_BINDING_LOCAL),
+    };
+
+    const atom = try self.base.allocator.create(Atom);
+    atom.* = Atom.empty;
+    atom.alignment = 1; // debug sections are always 1-byte-aligned
+    atom.sym_index = sym_index;
+    try self.managed_atoms.append(self.base.allocator, atom);
+    try self.symbol_atom.put(self.base.allocator, atom.symbolLoc(), atom);
+    return atom;
 }
 
 fn resetState(self: *Wasm) void {
-    for (self.segment_info.items) |*segment_info| {
+    for (self.segment_info.values()) |segment_info| {
         self.base.allocator.free(segment_info.name);
     }
-    const mod = self.base.options.module.?;
-    var decl_it = self.decls.keyIterator();
-    while (decl_it.next()) |decl_index_ptr| {
-        const decl = mod.declPtr(decl_index_ptr.*);
-        const atom = &decl.link.wasm;
-        atom.next = null;
-        atom.prev = null;
+    if (self.base.options.module) |mod| {
+        var decl_it = self.decls.keyIterator();
+        while (decl_it.next()) |decl_index_ptr| {
+            const decl = mod.declPtr(decl_index_ptr.*);
+            const atom = &decl.link.wasm;
+            atom.next = null;
+            atom.prev = null;
 
-        for (atom.locals.items) |*local_atom| {
-            local_atom.next = null;
-            local_atom.prev = null;
+            for (atom.locals.items) |*local_atom| {
+                local_atom.next = null;
+                local_atom.prev = null;
+            }
         }
     }
     self.functions.clearRetainingCapacity();
@@ -1959,6 +2084,12 @@ fn resetState(self: *Wasm) void {
     self.code_section_index = null;
     self.debug_info_index = null;
     self.debug_line_index = null;
+    self.debug_loc_index = null;
+    self.debug_str_index = null;
+    self.debug_ranges_index = null;
+    self.debug_abbrev_index = null;
+    self.debug_pubnames_index = null;
+    self.debug_pubtypes_index = null;
 }
 
 pub fn flush(self: *Wasm, comp: *Compilation, prog_node: *std.Progress.Node) !void {
@@ -2036,29 +2167,34 @@ pub fn flushModule(self: *Wasm, comp: *Compilation, prog_node: *std.Progress.Nod
     defer self.resetState();
     try self.setupStart();
     try self.setupImports();
-    const mod = self.base.options.module.?;
-    var decl_it = self.decls.keyIterator();
-    while (decl_it.next()) |decl_index_ptr| {
-        const decl = mod.declPtr(decl_index_ptr.*);
-        if (decl.isExtern()) continue;
-        const atom = &decl.*.link.wasm;
-        if (decl.ty.zigTypeTag() == .Fn) {
-            try self.parseAtom(atom, .{ .function = decl.fn_link.wasm });
-        } else if (decl.getVariable()) |variable| {
-            if (!variable.is_mutable) {
-                try self.parseAtom(atom, .{ .data = .read_only });
-            } else if (variable.init.isUndefDeep()) {
-                try self.parseAtom(atom, .{ .data = .uninitialized });
+    if (self.base.options.module) |mod| {
+        var decl_it = self.decls.keyIterator();
+        while (decl_it.next()) |decl_index_ptr| {
+            const decl = mod.declPtr(decl_index_ptr.*);
+            if (decl.isExtern()) continue;
+            const atom = &decl.*.link.wasm;
+            if (decl.ty.zigTypeTag() == .Fn) {
+                try self.parseAtom(atom, .{ .function = decl.fn_link.wasm });
+            } else if (decl.getVariable()) |variable| {
+                if (!variable.is_mutable) {
+                    try self.parseAtom(atom, .{ .data = .read_only });
+                } else if (variable.init.isUndefDeep()) {
+                    try self.parseAtom(atom, .{ .data = .uninitialized });
+                } else {
+                    try self.parseAtom(atom, .{ .data = .initialized });
+                }
             } else {
-                try self.parseAtom(atom, .{ .data = .initialized });
+                try self.parseAtom(atom, .{ .data = .read_only });
+            }
+
+            // also parse atoms for a decl's locals
+            for (atom.locals.items) |*local_atom| {
+                try self.parseAtom(local_atom, .{ .data = .read_only });
             }
-        } else {
-            try self.parseAtom(atom, .{ .data = .read_only });
         }
 
-        // also parse atoms for a decl's locals
-        for (atom.locals.items) |*local_atom| {
-            try self.parseAtom(local_atom, .{ .data = .read_only });
+        if (self.dwarf) |*dwarf| {
+            try dwarf.flushModule(&self.base, self.base.options.module.?);
         }
     }
 
@@ -2066,9 +2202,6 @@ pub fn flushModule(self: *Wasm, comp: *Compilation, prog_node: *std.Progress.Nod
         try object.parseIntoAtoms(self.base.allocator, @intCast(u16, object_index), self);
     }
 
-    if (self.dwarf) |*dwarf| {
-        try dwarf.flushModule(&self.base, self.base.options.module.?);
-    }
     try self.allocateAtoms();
     try self.setupMemory();
     self.mapFunctionTable();
@@ -2424,19 +2557,44 @@ pub fn flushModule(self: *Wasm, comp: *Compilation, prog_node: *std.Progress.Nod
         }
     } else if (!self.base.options.strip) {
         if (self.dwarf) |*dwarf| {
-            if (self.debug_info_index != null) {
-                try dwarf.writeDbgAbbrev(&self.base);
-                // for debug info and ranges, the address is always 0,
-                // as locations are always offsets relative to 'code' section.
-                try dwarf.writeDbgInfoHeader(&self.base, mod, 0, code_section_size);
-                try dwarf.writeDbgAranges(&self.base, 0, code_section_size);
-                try dwarf.writeDbgLineHeader(&self.base, mod);
+            const mod = self.base.options.module.?;
+            try dwarf.writeDbgAbbrev(&self.base);
+            // for debug info and ranges, the address is always 0,
+            // as locations are always offsets relative to 'code' section.
+            try dwarf.writeDbgInfoHeader(&self.base, mod, 0, code_section_size);
+            try dwarf.writeDbgAranges(&self.base, 0, code_section_size);
+            try dwarf.writeDbgLineHeader(&self.base, mod);
+        }
 
-                try emitDebugSection(file, self.debug_info.items, ".debug_info");
-                try emitDebugSection(file, self.debug_aranges.items, ".debug_ranges");
-                try emitDebugSection(file, self.debug_abbrev.items, ".debug_abbrev");
-                try emitDebugSection(file, self.debug_line.items, ".debug_line");
-                try emitDebugSection(file, dwarf.strtab.items, ".debug_str");
+        var debug_bytes = std.ArrayList(u8).init(self.base.allocator);
+        defer debug_bytes.deinit();
+
+        const DebugSection = struct {
+            name: []const u8,
+            index: ?u32,
+        };
+
+        const debug_sections: []const DebugSection = &.{
+            .{ .name = ".debug_info", .index = self.debug_info_index },
+            .{ .name = ".debug_pubtypes", .index = self.debug_pubtypes_index },
+            .{ .name = ".debug_abbrev", .index = self.debug_abbrev_index },
+            .{ .name = ".debug_line", .index = self.debug_line_index },
+            .{ .name = ".debug_str", .index = self.debug_str_index },
+            .{ .name = ".debug_pubnames", .index = self.debug_pubnames_index },
+            .{ .name = ".debug_loc", .index = self.debug_loc_index },
+            .{ .name = ".debug_ranges", .index = self.debug_ranges_index },
+        };
+
+        for (debug_sections) |item| {
+            if (item.index) |index| {
+                var atom = self.atoms.get(index).?.getFirst();
+                while (true) {
+                    atom.resolveRelocs(self);
+                    try debug_bytes.appendSlice(atom.code.items);
+                    atom = atom.next orelse break;
+                }
+                try emitDebugSection(file, debug_bytes.items, item.name);
+                debug_bytes.clearRetainingCapacity();
             }
         }
         try self.emitNameSection(file, arena);
@@ -2444,6 +2602,7 @@ pub fn flushModule(self: *Wasm, comp: *Compilation, prog_node: *std.Progress.Nod
 }
 
 fn emitDebugSection(file: fs.File, data: []const u8, name: []const u8) !void {
+    if (data.len == 0) return;
     const header_offset = try reserveCustomSectionHeader(file);
     const writer = file.writer();
     try leb.writeULEB128(writer, @intCast(u32, name.len));
@@ -3057,14 +3216,26 @@ fn writeVecSectionHeader(file: fs.File, offset: u64, section: wasm.Section, size
     buf[0] = @enumToInt(section);
     leb.writeUnsignedFixed(5, buf[1..6], size);
     leb.writeUnsignedFixed(5, buf[6..], items);
-    try file.pwriteAll(&buf, offset);
+
+    if (builtin.target.os.tag == .windows) {
+        // https://github.com/ziglang/zig/issues/12783
+        const curr_pos = try file.getPos();
+        try file.pwriteAll(&buf, offset);
+        try file.seekTo(curr_pos);
+    } else try file.pwriteAll(&buf, offset);
 }
 
 fn writeCustomSectionHeader(file: fs.File, offset: u64, size: u32) !void {
     var buf: [1 + 5]u8 = undefined;
     buf[0] = 0; // 0 = 'custom' section
     leb.writeUnsignedFixed(5, buf[1..6], size);
-    try file.pwriteAll(&buf, offset);
+
+    if (builtin.target.os.tag == .windows) {
+        // https://github.com/ziglang/zig/issues/12783
+        const curr_pos = try file.getPos();
+        try file.pwriteAll(&buf, offset);
+        try file.seekTo(curr_pos);
+    } else try file.pwriteAll(&buf, offset);
 }
 
 fn emitLinkSection(self: *Wasm, file: fs.File, arena: Allocator, symbol_table: *std.AutoArrayHashMap(SymbolLoc, u32)) !void {
@@ -3149,8 +3320,8 @@ fn emitSegmentInfo(self: *Wasm, file: fs.File, arena: Allocator) !void {
     var payload = std.ArrayList(u8).init(arena);
     const writer = payload.writer();
     try leb.writeULEB128(file.writer(), @enumToInt(types.SubsectionType.WASM_SEGMENT_INFO));
-    try leb.writeULEB128(writer, @intCast(u32, self.segment_info.items.len));
-    for (self.segment_info.items) |segment_info| {
+    try leb.writeULEB128(writer, @intCast(u32, self.segment_info.count()));
+    for (self.segment_info.values()) |segment_info| {
         log.debug("Emit segment: {s} align({d}) flags({b})", .{
             segment_info.name,
             @ctz(segment_info.alignment),
diff --git a/src/link/Wasm/Archive.zig b/src/link/Wasm/Archive.zig
index c80d26d17d..b1cce15b1d 100644
--- a/src/link/Wasm/Archive.zig
+++ b/src/link/Wasm/Archive.zig
@@ -95,6 +95,7 @@ const ar_hdr = extern struct {
 };
 
 pub fn deinit(archive: *Archive, allocator: Allocator) void {
+    archive.file.close();
     for (archive.toc.keys()) |*key| {
         allocator.free(key.*);
     }
diff --git a/src/link/Wasm/Atom.zig b/src/link/Wasm/Atom.zig
index 9e7f7a5a76..3e288fa018 100644
--- a/src/link/Wasm/Atom.zig
+++ b/src/link/Wasm/Atom.zig
@@ -90,6 +90,19 @@ pub fn getFirst(self: *Atom) *Atom {
     return tmp;
 }
 
+/// Unlike `getFirst` this returns the first `*Atom` that was
+/// produced from Zig code, rather than an object file.
+/// This is useful for debug sections where we want to extend
+/// the bytes, and don't want to overwrite existing Atoms.
+pub fn getFirstZigAtom(self: *Atom) *Atom {
+    if (self.file == null) return self;
+    var tmp = self;
+    return while (tmp.prev) |prev| {
+        if (prev.file == null) break prev;
+        tmp = prev;
+    } else unreachable; // must allocate an Atom first!
+}
+
 /// Returns the location of the symbol that represents this `Atom`
 pub fn symbolLoc(self: Atom) Wasm.SymbolLoc {
     return .{ .file = self.file, .index = self.sym_index };
@@ -145,7 +158,7 @@ pub fn resolveRelocs(self: *Atom, wasm_bin: *const Wasm) void {
 /// All values will be represented as a `u64` as all values can fit within it.
 /// The final value must be casted to the correct size.
 fn relocationValue(self: Atom, relocation: types.Relocation, wasm_bin: *const Wasm) u64 {
-    const target_loc: Wasm.SymbolLoc = .{ .file = self.file, .index = relocation.index };
+    const target_loc = (Wasm.SymbolLoc{ .file = self.file, .index = relocation.index }).finalLoc(wasm_bin);
     const symbol = target_loc.getSymbol(wasm_bin).*;
     switch (relocation.relocation_type) {
         .R_WASM_FUNCTION_INDEX_LEB => return symbol.index,
@@ -174,19 +187,34 @@ fn relocationValue(self: Atom, relocation: types.Relocation, wasm_bin: *const Wa
         => {
             std.debug.assert(symbol.tag == .data and !symbol.isUndefined());
             const merge_segment = wasm_bin.base.options.output_mode != .Obj;
-            const target_atom_loc = wasm_bin.discarded.get(target_loc) orelse target_loc;
-            const target_atom = wasm_bin.symbol_atom.get(target_atom_loc).?;
+            const target_atom = wasm_bin.symbol_atom.get(target_loc).?;
             const segment_info = if (target_atom.file) |object_index| blk: {
                 break :blk wasm_bin.objects.items[object_index].segment_info;
-            } else wasm_bin.segment_info.items;
+            } else wasm_bin.segment_info.values();
             const segment_name = segment_info[symbol.index].outputName(merge_segment);
             const segment_index = wasm_bin.data_segments.get(segment_name).?;
             const segment = wasm_bin.segments.items[segment_index];
             return target_atom.offset + segment.offset + (relocation.addend orelse 0);
         },
         .R_WASM_EVENT_INDEX_LEB => return symbol.index,
-        .R_WASM_SECTION_OFFSET_I32,
-        .R_WASM_FUNCTION_OFFSET_I32,
-        => return relocation.offset,
+        .R_WASM_SECTION_OFFSET_I32 => {
+            const target_atom = wasm_bin.symbol_atom.get(target_loc).?;
+            return target_atom.offset + (relocation.addend orelse 0);
+        },
+        .R_WASM_FUNCTION_OFFSET_I32 => {
+            const target_atom = wasm_bin.symbol_atom.get(target_loc).?;
+            var atom = target_atom.getFirst();
+            var offset: u32 = 0;
+            // TODO: Calculate this during atom allocation, rather than
+            // this linear calculation. For now it's done here as atoms
+            // are being sorted after atom allocation, as functions aren't
+            // merged until later.
+            while (true) {
+                offset += 5; // each atom uses 5 bytes to store its body's size
+                if (atom == target_atom) break;
+                atom = atom.next.?;
+            }
+            return target_atom.offset + offset + (relocation.addend orelse 0);
+        },
     }
 }
diff --git a/src/link/Wasm/Object.zig b/src/link/Wasm/Object.zig
index 50827ca9fb..808608fac5 100644
--- a/src/link/Wasm/Object.zig
+++ b/src/link/Wasm/Object.zig
@@ -63,16 +63,21 @@ relocatable_data: []const RelocatableData = &.{},
 /// import name, module name and export names. Each string will be deduplicated
 /// and returns an offset into the table.
 string_table: Wasm.StringTable = .{},
+/// All the names of each debug section found in the current object file.
+/// Each name is terminated by a null-terminator. The name can be found,
+/// from the `index` offset within the `RelocatableData`.
+debug_names: [:0]const u8,
 
 /// Represents a single item within a section (depending on its `type`)
 const RelocatableData = struct {
     /// The type of the relocatable data
-    type: enum { data, code, custom },
+    type: enum { data, code, debug },
     /// Pointer to the data of the segment, where its length is written to `size`
     data: [*]u8,
     /// The size in bytes of the data representing the segment within the section
     size: u32,
-    /// The index within the section itself
+    /// The index within the section itself, or in case of a debug section,
+    /// the offset within the `string_table`.
     index: u32,
     /// The offset within the section where the data starts
     offset: u32,
@@ -96,9 +101,16 @@ const RelocatableData = struct {
         return switch (self.type) {
             .data => .data,
             .code => .function,
-            .custom => .section,
+            .debug => .section,
         };
     }
+
+    /// Returns the index within a section itself, or in case of a debug section,
+    /// returns the section index within the object file.
+    pub fn getIndex(self: RelocatableData) u32 {
+        if (self.type == .debug) return self.section_index;
+        return self.index;
+    }
 };
 
 pub const InitError = error{NotObjectFile} || ParseError || std.fs.File.ReadError;
@@ -111,6 +123,7 @@ pub fn create(gpa: Allocator, file: std.fs.File, name: []const u8, maybe_max_siz
     var object: Object = .{
         .file = file,
         .name = try gpa.dupe(u8, name),
+        .debug_names = &.{},
     };
 
     var is_object_file: bool = false;
@@ -141,6 +154,9 @@ pub fn create(gpa: Allocator, file: std.fs.File, name: []const u8, maybe_max_siz
 /// Frees all memory of `Object` at once. The given `Allocator` must be
 /// the same allocator that was used when `init` was called.
 pub fn deinit(self: *Object, gpa: Allocator) void {
+    if (self.file) |file| {
+        file.close();
+    }
     for (self.func_types) |func_ty| {
         gpa.free(func_ty.params);
         gpa.free(func_ty.returns);
@@ -197,6 +213,11 @@ pub fn importedCountByKind(self: *const Object, kind: std.wasm.ExternalKind) u32
     } else i;
 }
 
+/// From a given `RelocatableDate`, find the corresponding debug section name
+pub fn getDebugName(self: *const Object, relocatable_data: RelocatableData) []const u8 {
+    return self.string_table.get(relocatable_data.index);
+}
+
 /// Checks if the object file is an MVP version.
 /// When that's the case, we check if there's an import table definiton with its name
 /// set to '__indirect_function_table". When that's also the case,
@@ -328,10 +349,15 @@ fn Parser(comptime ReaderType: type) type {
 
             self.object.version = version;
             var relocatable_data = std.ArrayList(RelocatableData).init(gpa);
+            var debug_names = std.ArrayList(u8).init(gpa);
 
-            errdefer while (relocatable_data.popOrNull()) |rel_data| {
-                gpa.free(rel_data.data[0..rel_data.size]);
-            } else relocatable_data.deinit();
+            errdefer {
+                while (relocatable_data.popOrNull()) |rel_data| {
+                    gpa.free(rel_data.data[0..rel_data.size]);
+                } else relocatable_data.deinit();
+                gpa.free(debug_names.items);
+                debug_names.deinit();
+            }
 
             var section_index: u32 = 0;
             while (self.reader.reader().readByte()) |byte| : (section_index += 1) {
@@ -347,11 +373,26 @@ fn Parser(comptime ReaderType: type) type {
 
                         if (std.mem.eql(u8, name, "linking")) {
                             is_object_file.* = true;
+                            self.object.relocatable_data = relocatable_data.items; // at this point no new relocatable sections will appear so we're free to store them.
                             try self.parseMetadata(gpa, @intCast(usize, reader.context.bytes_left));
                         } else if (std.mem.startsWith(u8, name, "reloc")) {
                             try self.parseRelocations(gpa);
                         } else if (std.mem.eql(u8, name, "target_features")) {
                             try self.parseFeatures(gpa);
+                        } else if (std.mem.startsWith(u8, name, ".debug")) {
+                            const debug_size = @intCast(u32, reader.context.bytes_left);
+                            const debug_content = try gpa.alloc(u8, debug_size);
+                            errdefer gpa.free(debug_content);
+                            try reader.readNoEof(debug_content);
+
+                            try relocatable_data.append(.{
+                                .type = .debug,
+                                .data = debug_content.ptr,
+                                .size = debug_size,
+                                .index = try self.object.string_table.put(gpa, name),
+                                .offset = 0, // debug sections only contain 1 entry, so no need to calculate offset
+                                .section_index = section_index,
+                            });
                         } else {
                             try reader.skipBytes(reader.context.bytes_left, .{});
                         }
@@ -737,7 +778,12 @@ fn Parser(comptime ReaderType: type) type {
                 },
                 .section => {
                     symbol.index = try leb.readULEB128(u32, reader);
-                    symbol.name = try self.object.string_table.put(gpa, @tagName(symbol.tag));
+                    for (self.object.relocatable_data) |data| {
+                        if (data.section_index == symbol.index) {
+                            symbol.name = data.index;
+                            break;
+                        }
+                    }
                 },
                 else => {
                     symbol.index = try leb.readULEB128(u32, reader);
@@ -827,7 +873,6 @@ fn assertEnd(reader: anytype) !void {
 
 /// Parses an object file into atoms, for code and data sections
 pub fn parseIntoAtoms(self: *Object, gpa: Allocator, object_index: u16, wasm_bin: *Wasm) !void {
-    log.debug("Parsing data section into atoms", .{});
     const Key = struct {
         kind: Symbol.Tag,
         index: u32,
@@ -839,7 +884,7 @@ pub fn parseIntoAtoms(self: *Object, gpa: Allocator, object_index: u16, wasm_bin
 
     for (self.symtable) |symbol, symbol_index| {
         switch (symbol.tag) {
-            .function, .data => if (!symbol.isUndefined()) {
+            .function, .data, .section => if (!symbol.isUndefined()) {
                 const gop = try symbol_for_segment.getOrPut(.{ .kind = symbol.tag, .index = symbol.index });
                 const sym_idx = @intCast(u32, symbol_index);
                 if (!gop.found_existing) {
@@ -852,12 +897,9 @@ pub fn parseIntoAtoms(self: *Object, gpa: Allocator, object_index: u16, wasm_bin
     }
 
     for (self.relocatable_data) |relocatable_data, index| {
-        const symbols = symbol_for_segment.getPtr(.{
-            .kind = relocatable_data.getSymbolKind(),
-            .index = @intCast(u32, relocatable_data.index),
-        }) orelse continue; // encountered a segment we do not create an atom for
-        const sym_index = symbols.pop();
-        const final_index = try wasm_bin.getMatchingSegment(object_index, @intCast(u32, index));
+        const final_index = (try wasm_bin.getMatchingSegment(object_index, @intCast(u32, index))) orelse {
+            continue; // found unknown section, so skip parsing into atom as we do not know how to handle it.
+        };
 
         const atom = try gpa.create(Atom);
         atom.* = Atom.empty;
@@ -870,7 +912,6 @@ pub fn parseIntoAtoms(self: *Object, gpa: Allocator, object_index: u16, wasm_bin
         atom.file = object_index;
         atom.size = relocatable_data.size;
         atom.alignment = relocatable_data.getAlignment(self);
-        atom.sym_index = sym_index;
 
         const relocations: []types.Relocation = self.relocations.get(relocatable_data.section_index) orelse &.{};
         for (relocations) |relocation| {
@@ -892,28 +933,31 @@ pub fn parseIntoAtoms(self: *Object, gpa: Allocator, object_index: u16, wasm_bin
 
         try atom.code.appendSlice(gpa, relocatable_data.data[0..relocatable_data.size]);
 
-        // symbols referencing the same atom will be added as alias
-        // or as 'parent' when they are global.
-        while (symbols.popOrNull()) |idx| {
-            const alias_symbol = self.symtable[idx];
-            const symbol = self.symtable[atom.sym_index];
-            if (alias_symbol.isGlobal() and symbol.isLocal()) {
-                atom.sym_index = idx;
+        if (symbol_for_segment.getPtr(.{
+            .kind = relocatable_data.getSymbolKind(),
+            .index = relocatable_data.getIndex(),
+        })) |symbols| {
+            atom.sym_index = symbols.pop();
+
+            // symbols referencing the same atom will be added as alias
+            // or as 'parent' when they are global.
+            while (symbols.popOrNull()) |idx| {
+                const alias_symbol = self.symtable[idx];
+                const symbol = self.symtable[atom.sym_index];
+                if (alias_symbol.isGlobal() and symbol.isLocal()) {
+                    atom.sym_index = idx;
+                }
             }
+            try wasm_bin.symbol_atom.putNoClobber(gpa, atom.symbolLoc(), atom);
         }
-        try wasm_bin.symbol_atom.putNoClobber(gpa, atom.symbolLoc(), atom);
 
         const segment: *Wasm.Segment = &wasm_bin.segments.items[final_index];
-        segment.alignment = std.math.max(segment.alignment, atom.alignment);
-
-        if (wasm_bin.atoms.getPtr(final_index)) |last| {
-            last.*.next = atom;
-            atom.prev = last.*;
-            last.* = atom;
-        } else {
-            try wasm_bin.atoms.putNoClobber(gpa, final_index, atom);
+        if (relocatable_data.type == .data) { //code section and debug sections are 1-byte aligned
+            segment.alignment = std.math.max(segment.alignment, atom.alignment);
         }
-        log.debug("Parsed into atom: '{s}'", .{self.string_table.get(self.symtable[atom.sym_index].name)});
+
+        try wasm_bin.appendAtomAtIndex(final_index, atom);
+        log.debug("Parsed into atom: '{s}' at segment index {d}", .{ self.string_table.get(self.symtable[atom.sym_index].name), final_index });
     }
 }
 
diff --git a/src/link/strtab.zig b/src/link/strtab.zig
index 8e314f189f..abb58defef 100644
--- a/src/link/strtab.zig
+++ b/src/link/strtab.zig
@@ -110,6 +110,10 @@ pub fn StringTable(comptime log_scope: @Type(.EnumLiteral)) type {
             return self.get(off) orelse unreachable;
         }
 
+        pub fn items(self: Self) []const u8 {
+            return self.buffer.items;
+        }
+
         pub fn len(self: Self) usize {
             return self.buffer.items.len;
         }
diff --git a/src/main.zig b/src/main.zig
index 6263a6a402..aaea682c7b 100644
--- a/src/main.zig
+++ b/src/main.zig
@@ -268,7 +268,7 @@ pub fn mainArgs(gpa: Allocator, arena: Allocator, args: []const []const u8) !voi
     } else if (mem.eql(u8, cmd, "init-lib")) {
         return cmdInit(gpa, arena, cmd_args, .Lib);
     } else if (mem.eql(u8, cmd, "targets")) {
-        const info = try detectNativeTargetInfo(arena, .{});
+        const info = try detectNativeTargetInfo(.{});
         const stdout = io.getStdOut().writer();
         return @import("print_targets.zig").cmdTargets(arena, cmd_args, stdout, info.target);
     } else if (mem.eql(u8, cmd, "version")) {
@@ -691,6 +691,9 @@ fn buildOutputType(
     var linker_max_memory: ?u64 = null;
     var linker_shared_memory: bool = false;
     var linker_global_base: ?u64 = null;
+    var linker_print_gc_sections: bool = false;
+    var linker_print_icf_sections: bool = false;
+    var linker_print_map: bool = false;
     var linker_z_nodelete = false;
     var linker_z_notext = false;
     var linker_z_defs = false;
@@ -1816,6 +1819,12 @@ fn buildOutputType(
                     linker_gc_sections = true;
                 } else if (mem.eql(u8, arg, "--no-gc-sections")) {
                     linker_gc_sections = false;
+                } else if (mem.eql(u8, arg, "--print-gc-sections")) {
+                    linker_print_gc_sections = true;
+                } else if (mem.eql(u8, arg, "--print-icf-sections")) {
+                    linker_print_icf_sections = true;
+                } else if (mem.eql(u8, arg, "--print-map")) {
+                    linker_print_map = true;
                 } else if (mem.eql(u8, arg, "--allow-shlib-undefined") or
                     mem.eql(u8, arg, "-allow-shlib-undefined"))
                 {
@@ -2258,7 +2267,7 @@ fn buildOutputType(
     }
 
     const cross_target = try parseCrossTargetOrReportFatalError(arena, target_parse_options);
-    const target_info = try detectNativeTargetInfo(gpa, cross_target);
+    const target_info = try detectNativeTargetInfo(cross_target);
 
     if (target_info.target.os.tag != .freestanding) {
         if (ensure_libc_on_non_freestanding)
@@ -2911,6 +2920,9 @@ fn buildOutputType(
         .linker_initial_memory = linker_initial_memory,
         .linker_max_memory = linker_max_memory,
         .linker_shared_memory = linker_shared_memory,
+        .linker_print_gc_sections = linker_print_gc_sections,
+        .linker_print_icf_sections = linker_print_icf_sections,
+        .linker_print_map = linker_print_map,
         .linker_global_base = linker_global_base,
         .linker_export_symbol_names = linker_export_symbol_names.items,
         .linker_z_nodelete = linker_z_nodelete,
@@ -3271,7 +3283,7 @@ fn runOrTest(
     if (std.process.can_execv and arg_mode == .run and !watch) {
         // execv releases the locks; no need to destroy the Compilation here.
         const err = std.process.execv(gpa, argv.items);
-        try warnAboutForeignBinaries(gpa, arena, arg_mode, target_info, link_libc);
+        try warnAboutForeignBinaries(arena, arg_mode, target_info, link_libc);
         const cmd = try std.mem.join(arena, " ", argv.items);
         fatal("the following command failed to execve with '{s}':\n{s}", .{ @errorName(err), cmd });
     } else if (std.process.can_spawn) {
@@ -3288,7 +3300,7 @@ fn runOrTest(
         }
 
         const term = child.spawnAndWait() catch |err| {
-            try warnAboutForeignBinaries(gpa, arena, arg_mode, target_info, link_libc);
+            try warnAboutForeignBinaries(arena, arg_mode, target_info, link_libc);
             const cmd = try std.mem.join(arena, " ", argv.items);
             fatal("the following command failed with '{s}':\n{s}", .{ @errorName(err), cmd });
         };
@@ -3902,7 +3914,7 @@ pub fn cmdBuild(gpa: Allocator, arena: Allocator, args: []const []const u8) !voi
         gimmeMoreOfThoseSweetSweetFileDescriptors();
 
         const cross_target: std.zig.CrossTarget = .{};
-        const target_info = try detectNativeTargetInfo(gpa, cross_target);
+        const target_info = try detectNativeTargetInfo(cross_target);
 
         const exe_basename = try std.zig.binNameAlloc(arena, .{
             .root_name = "build",
@@ -4944,8 +4956,8 @@ test "fds" {
     gimmeMoreOfThoseSweetSweetFileDescriptors();
 }
 
-fn detectNativeTargetInfo(gpa: Allocator, cross_target: std.zig.CrossTarget) !std.zig.system.NativeTargetInfo {
-    return std.zig.system.NativeTargetInfo.detect(gpa, cross_target);
+fn detectNativeTargetInfo(cross_target: std.zig.CrossTarget) !std.zig.system.NativeTargetInfo {
+    return std.zig.system.NativeTargetInfo.detect(cross_target);
 }
 
 /// Indicate that we are now terminating with a successful exit code.
@@ -5308,14 +5320,13 @@ fn parseIntSuffix(arg: []const u8, prefix_len: usize) u64 {
 }
 
 fn warnAboutForeignBinaries(
-    gpa: Allocator,
     arena: Allocator,
     arg_mode: ArgMode,
     target_info: std.zig.system.NativeTargetInfo,
     link_libc: bool,
 ) !void {
     const host_cross_target: std.zig.CrossTarget = .{};
-    const host_target_info = try detectNativeTargetInfo(gpa, host_cross_target);
+    const host_target_info = try detectNativeTargetInfo(host_cross_target);
 
     switch (host_target_info.getExternalExecutor(target_info, .{ .link_libc = link_libc })) {
         .native => return,
diff --git a/src/test.zig b/src/test.zig
index babded13f9..f26c65f3f8 100644
--- a/src/test.zig
+++ b/src/test.zig
@@ -177,6 +177,8 @@ const TestManifestConfigDefaults = struct {
                 inline for (&[_][]const u8{ "x86_64", "aarch64" }) |arch| {
                     defaults = defaults ++ arch ++ "-macos" ++ ",";
                 }
+                // Windows
+                defaults = defaults ++ "x86_64-windows" ++ ",";
                 // Wasm
                 defaults = defaults ++ "wasm32-wasi";
                 return defaults;
@@ -1211,7 +1213,7 @@ pub const TestContext = struct {
     }
 
     fn run(self: *TestContext) !void {
-        const host = try std.zig.system.NativeTargetInfo.detect(self.gpa, .{});
+        const host = try std.zig.system.NativeTargetInfo.detect(.{});
 
         var progress = std.Progress{};
         const root_node = progress.start("compiler", self.cases.items.len);
@@ -1300,7 +1302,7 @@ pub const TestContext = struct {
         global_cache_directory: Compilation.Directory,
         host: std.zig.system.NativeTargetInfo,
     ) !void {
-        const target_info = try std.zig.system.NativeTargetInfo.detect(allocator, case.target);
+        const target_info = try std.zig.system.NativeTargetInfo.detect(case.target);
         const target = target_info.target;
 
         var arena_allocator = std.heap.ArenaAllocator.init(allocator);
@@ -1546,6 +1548,12 @@ pub const TestContext = struct {
             .self_exe_path = std.testing.zig_exe_path,
             // TODO instead of turning off color, pass in a std.Progress.Node
             .color = .off,
+            // TODO: force self-hosted linkers with stage2 backend to avoid LLD creeping in
+            //       until the auto-select mechanism deems them worthy
+            .use_lld = switch (case.backend) {
+                .stage2 => false,
+                else => null,
+            },
         });
         defer comp.destroy();
 
diff --git a/src/translate_c.zig b/src/translate_c.zig
index faa8a456f5..014f6b1934 100644
--- a/src/translate_c.zig
+++ b/src/translate_c.zig
@@ -1167,7 +1167,7 @@ fn transRecordDecl(c: *Context, scope: *Scope, record_decl: *const clang.RecordD
         }
 
         if (!c.zig_is_stage1 and is_packed) {
-            return failDecl(c, record_loc, bare_name, "cannot translate packed record union", .{});
+            return failDecl(c, record_loc, name, "cannot translate packed record union", .{});
         }
 
         const record_payload = try c.arena.create(ast.Payload.Record);
@@ -5799,7 +5799,7 @@ fn zigifyEscapeSequences(ctx: *Context, m: *MacroCtx) ![]const u8 {
         }
     }
     for (source) |c| {
-        if (c == '\\') {
+        if (c == '\\' or c == '\t') {
             break;
         }
     } else return source;
@@ -5876,6 +5876,13 @@ fn zigifyEscapeSequences(ctx: *Context, m: *MacroCtx) ![]const u8 {
                     state = .Start;
             },
             .Start => {
+                if (c == '\t') {
+                    bytes[i] = '\\';
+                    i += 1;
+                    bytes[i] = 't';
+                    i += 1;
+                    continue;
+                }
                 if (c == '\\') {
                     state = .Escape;
                 }
diff --git a/test/behavior.zig b/test/behavior.zig
index 4b55913af5..db107bcbb1 100644
--- a/test/behavior.zig
+++ b/test/behavior.zig
@@ -86,6 +86,7 @@ test {
     _ = @import("behavior/bugs/12430.zig");
     _ = @import("behavior/bugs/12486.zig");
     _ = @import("behavior/bugs/12680.zig");
+    _ = @import("behavior/bugs/12776.zig");
     _ = @import("behavior/byteswap.zig");
     _ = @import("behavior/byval_arg_var.zig");
     _ = @import("behavior/call.zig");
diff --git a/test/behavior/bugs/12776.zig b/test/behavior/bugs/12776.zig
new file mode 100644
index 0000000000..e8fe106ac7
--- /dev/null
+++ b/test/behavior/bugs/12776.zig
@@ -0,0 +1,42 @@
+const std = @import("std");
+const builtin = @import("builtin");
+
+const RAM = struct {
+    data: [0xFFFF + 1]u8,
+    fn new() !RAM {
+        return RAM{ .data = [_]u8{0} ** 0x10000 };
+    }
+    fn get(self: *RAM, addr: u16) u8 {
+        return self.data[addr];
+    }
+};
+
+const CPU = packed struct {
+    interrupts: bool,
+    ram: *RAM,
+    fn new(ram: *RAM) !CPU {
+        return CPU{
+            .ram = ram,
+            .interrupts = false,
+        };
+    }
+    fn tick(self: *CPU) !void {
+        var queued_interrupts = self.ram.get(0xFFFF) & self.ram.get(0xFF0F);
+        if (self.interrupts and queued_interrupts != 0) {
+            self.interrupts = false;
+        }
+    }
+};
+
+test {
+    if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest;
+    if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest;
+    if (builtin.zig_backend == .stage2_c) return error.SkipZigTest;
+    if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest;
+    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
+
+    var ram = try RAM.new();
+    var cpu = try CPU.new(&ram);
+    try cpu.tick();
+    try std.testing.expect(cpu.interrupts == false);
+}
diff --git a/test/behavior/pointers.zig b/test/behavior/pointers.zig
index adbc308742..dcdea1ff80 100644
--- a/test/behavior/pointers.zig
+++ b/test/behavior/pointers.zig
@@ -486,3 +486,14 @@ test "array slicing to slice" {
     try S.doTheTest();
     comptime try S.doTheTest();
 }
+
+test "pointer to constant decl preserves alignment" {
+    const S = struct {
+        a: u8,
+        b: u8,
+        const aligned align(8) = @This(){ .a = 3, .b = 4 };
+    };
+
+    const alignment = @typeInfo(@TypeOf(&S.aligned)).Pointer.alignment;
+    try std.testing.expect(alignment == 8);
+}
diff --git a/test/behavior/translate_c_macros.h b/test/behavior/translate_c_macros.h
index 222a7ded6c..439577fecc 100644
--- a/test/behavior/translate_c_macros.h
+++ b/test/behavior/translate_c_macros.h
@@ -50,3 +50,5 @@ typedef _Bool uintptr_t;
 #define CAST_TO_UINTPTR(X) (uintptr_t)(X)
 
 #define LARGE_INT 18446744073709550592
+
+#define EMBEDDED_TAB "hello	"
diff --git a/test/behavior/translate_c_macros.zig b/test/behavior/translate_c_macros.zig
index d670e0cbd4..314a9028df 100644
--- a/test/behavior/translate_c_macros.zig
+++ b/test/behavior/translate_c_macros.zig
@@ -2,6 +2,7 @@ const builtin = @import("builtin");
 const std = @import("std");
 const expect = std.testing.expect;
 const expectEqual = std.testing.expectEqual;
+const expectEqualStrings = std.testing.expectEqualStrings;
 
 const h = @cImport(@cInclude("behavior/translate_c_macros.h"));
 
@@ -123,3 +124,13 @@ test "large integer macro" {
 
     try expectEqual(@as(c_ulonglong, 18446744073709550592), h.LARGE_INT);
 }
+
+test "string literal macro with embedded tab character" {
+    if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
+
+    try expectEqualStrings("hello\t", h.EMBEDDED_TAB);
+}
diff --git a/test/cases/aarch64-macos/hello_world_with_updates.0.zig b/test/cases/aarch64-macos/hello_world_with_updates.0.zig
index dc65cd8279..3c7a494180 100644
--- a/test/cases/aarch64-macos/hello_world_with_updates.0.zig
+++ b/test/cases/aarch64-macos/hello_world_with_updates.0.zig
@@ -2,5 +2,5 @@
 // output_mode=Exe
 // target=aarch64-macos
 //
-// :105:9: error: struct 'tmp.tmp' has no member named 'main'
+// :109:9: error: struct 'tmp.tmp' has no member named 'main'
 // :7:1: note: struct declared here
diff --git a/test/cases/compile_errors/closure_get_depends_on_failed_decl.zig b/test/cases/compile_errors/closure_get_depends_on_failed_decl.zig
new file mode 100644
index 0000000000..ccdbf67713
--- /dev/null
+++ b/test/cases/compile_errors/closure_get_depends_on_failed_decl.zig
@@ -0,0 +1,26 @@
+pub inline fn instanceRequestAdapter() void {}
+
+pub inline fn requestAdapter(
+    comptime callbackArg: fn () callconv(.Inline) void,
+) void {
+    _ = (struct {
+        pub fn callback() callconv(.C) void {
+            callbackArg();
+        }
+    }).callback;
+    instanceRequestAdapter(undefined); // note wrong number of arguments here
+}
+
+inline fn foo() void {}
+
+pub export fn entry() void {
+    requestAdapter(foo);
+}
+
+// error
+// backend=stage2
+// target=native
+//
+// :11:5: error: expected 0 argument(s), found 1
+// :1:12: note: function declared here
+// :17:19: note: called from here
diff --git a/test/cases/compile_errors/closure_get_in_param_ty_instantiate_incorrectly.zig b/test/cases/compile_errors/closure_get_in_param_ty_instantiate_incorrectly.zig
new file mode 100644
index 0000000000..dc533442fb
--- /dev/null
+++ b/test/cases/compile_errors/closure_get_in_param_ty_instantiate_incorrectly.zig
@@ -0,0 +1,24 @@
+fn Observable(comptime T: type) type {
+    return struct {
+        fn map(Src: T, Dst: anytype, function: fn (T) Dst) Dst {
+            _ = Src;
+            _ = function;
+            return Observable(Dst);
+        }
+    };
+}
+
+fn u32Tou64(x: u32) u64 {
+    _ = x;
+    return 0;
+}
+
+pub export fn entry() void {
+    Observable(u32).map(u32, u64, u32Tou64(0));
+}
+
+// error
+// backend=stage2
+// target=native
+//
+// :17:25: error: expected type 'u32', found 'type'
diff --git a/test/cases/compile_errors/incorrect_type_to_memset_memcpy.zig b/test/cases/compile_errors/incorrect_type_to_memset_memcpy.zig
new file mode 100644
index 0000000000..d3a6b7cc4b
--- /dev/null
+++ b/test/cases/compile_errors/incorrect_type_to_memset_memcpy.zig
@@ -0,0 +1,19 @@
+pub export fn entry() void {
+    var buf: [5]u8 = .{ 1, 2, 3, 4, 5 };
+    var slice: []u8 = &buf;
+    const a: u32 = 1234;
+    @memcpy(slice, @ptrCast([*]const u8, &a), 4);
+}
+pub export fn entry1() void {
+    var buf: [5]u8 = .{ 1, 2, 3, 4, 5 };
+    var ptr: *u8 = &buf[0];
+    @memcpy(ptr, 0, 4);
+}
+
+// error
+// backend=stage2
+// target=native
+//
+// :5:13: error: expected type '[*]u8', found '[]u8'
+// :10:13: error: expected type '[*]u8', found '*u8'
+// :10:13: note: a single pointer cannot cast into a many pointer
diff --git a/test/cases/x86_64-linux/hello_world_with_updates.0.zig b/test/cases/x86_64-linux/hello_world_with_updates.0.zig
index 795f4f2991..c9c94442d0 100644
--- a/test/cases/x86_64-linux/hello_world_with_updates.0.zig
+++ b/test/cases/x86_64-linux/hello_world_with_updates.0.zig
@@ -2,5 +2,5 @@
 // output_mode=Exe
 // target=x86_64-linux
 //
-// :105:9: error: struct 'tmp.tmp' has no member named 'main'
+// :109:9: error: struct 'tmp.tmp' has no member named 'main'
 // :7:1: note: struct declared here
diff --git a/test/cases/x86_64-macos/hello_world_with_updates.0.zig b/test/cases/x86_64-macos/hello_world_with_updates.0.zig
index 9839371e31..5860c9c0f6 100644
--- a/test/cases/x86_64-macos/hello_world_with_updates.0.zig
+++ b/test/cases/x86_64-macos/hello_world_with_updates.0.zig
@@ -2,5 +2,5 @@
 // output_mode=Exe
 // target=x86_64-macos
 //
-// :105:9: error: struct 'tmp.tmp' has no member named 'main'
+// :109:9: error: struct 'tmp.tmp' has no member named 'main'
 // :7:1: note: struct declared here
diff --git a/test/cases/x86_64-windows/hello_world_with_updates.0.zig b/test/cases/x86_64-windows/hello_world_with_updates.0.zig
new file mode 100644
index 0000000000..142699b9da
--- /dev/null
+++ b/test/cases/x86_64-windows/hello_world_with_updates.0.zig
@@ -0,0 +1,6 @@
+// error
+// output_mode=Exe
+// target=x86_64-windows
+//
+// :130:9: error: struct 'tmp.tmp' has no member named 'main'
+// :7:1: note: struct declared here
diff --git a/test/cases/x86_64-windows/hello_world_with_updates.1.zig b/test/cases/x86_64-windows/hello_world_with_updates.1.zig
new file mode 100644
index 0000000000..e18a4c6a1e
--- /dev/null
+++ b/test/cases/x86_64-windows/hello_world_with_updates.1.zig
@@ -0,0 +1,6 @@
+pub export fn main() noreturn {}
+
+// error
+//
+// :1:32: error: function declared 'noreturn' returns
+// :1:22: note: 'noreturn' declared here
diff --git a/test/cases/x86_64-windows/hello_world_with_updates.2.zig b/test/cases/x86_64-windows/hello_world_with_updates.2.zig
new file mode 100644
index 0000000000..6c2fd5b24e
--- /dev/null
+++ b/test/cases/x86_64-windows/hello_world_with_updates.2.zig
@@ -0,0 +1,16 @@
+const std = @import("std");
+
+pub fn main() void {
+    print();
+}
+
+fn print() void {
+    const msg = "Hello, World!\n";
+    const stdout = std.io.getStdOut();
+    stdout.writeAll(msg) catch unreachable;
+}
+
+// run
+//
+// Hello, World!
+//
diff --git a/test/link.zig b/test/link.zig
index b68353122c..d1dcbbc292 100644
--- a/test/link.zig
+++ b/test/link.zig
@@ -28,11 +28,22 @@ pub fn addCases(cases: *tests.StandaloneContext) void {
 }
 
 fn addWasmCases(cases: *tests.StandaloneContext) void {
+    cases.addBuildFile("test/link/wasm/archive/build.zig", .{
+        .build_modes = true,
+        .requires_stage2 = true,
+    });
+
     cases.addBuildFile("test/link/wasm/bss/build.zig", .{
         .build_modes = true,
         .requires_stage2 = true,
     });
 
+    cases.addBuildFile("test/link/wasm/extern/build.zig", .{
+        .build_modes = true,
+        .requires_stage2 = true,
+        .use_emulation = true,
+    });
+
     cases.addBuildFile("test/link/wasm/segments/build.zig", .{
         .build_modes = true,
         .requires_stage2 = true,
@@ -47,17 +58,6 @@ fn addWasmCases(cases: *tests.StandaloneContext) void {
         .build_modes = true,
         .requires_stage2 = true,
     });
-
-    cases.addBuildFile("test/link/wasm/archive/build.zig", .{
-        .build_modes = true,
-        .requires_stage2 = true,
-    });
-
-    cases.addBuildFile("test/link/wasm/extern/build.zig", .{
-        .build_modes = true,
-        .requires_stage2 = true,
-        .use_emulation = true,
-    });
 }
 
 fn addMachOCases(cases: *tests.StandaloneContext) void {
diff --git a/test/tests.zig b/test/tests.zig
index a329233199..53e58156a4 100644
--- a/test/tests.zig
+++ b/test/tests.zig
@@ -108,6 +108,14 @@ const test_targets = blk: {
             },
             .backend = .stage2_x86_64,
         },
+        .{
+            .target = .{
+                .cpu_arch = .x86_64,
+                .os_tag = .windows,
+                .abi = .gnu,
+            },
+            .backend = .stage2_x86_64,
+        },
 
         .{
             .target = .{
@@ -693,6 +701,8 @@ pub fn addPkgTests(
             else => {
                 these_tests.use_stage1 = false;
                 these_tests.use_llvm = false;
+                // TODO: force self-hosted linkers to avoid LLD creeping in until the auto-select mechanism deems them worthy
+                these_tests.use_lld = false;
             },
         };